remi 0.2.15 → 0.2.16

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 89aa19a0e9852eb3fcbf63cd3890102501fc27ff
4
- data.tar.gz: ef447ea96c09edd89953aabef5b9e18188d06c50
3
+ metadata.gz: c08e200bea80edb6451804d752e12d3f820211db
4
+ data.tar.gz: af6e06a889c02f13586e61c4c65b09cbbac14bb1
5
5
  SHA512:
6
- metadata.gz: 11db98f16aff0c37c83a02dd44dec3ec6e1f908c61e85aaeb277b789acc58b2eaf0706b10274591004da92e0ea66321b1ee2fe5318422772c251519150d41d80
7
- data.tar.gz: 7639ef4a4b9b3aa03385e270b5aa3ea10796dd9e9250819e706be0fa366d95dcb4c886e6ccab54e5546f0aa7a38ea6adbb61909fa6919be3350aa68dc8d6b0c0
6
+ metadata.gz: fa607c200a4c8f838e31c635fbfcd521ad14c86d734b4176880f1cdc8bf7d8e53dc8c3672af62e735a731ad8d3234f54df9982bd9778efceee1e63e7680d03f5
7
+ data.tar.gz: 18715e8761212d48bf00d32378949044fa9957650b08491f43733bb9902e753d44f2b68a86706b8e00f59c01b4b0fd68a8192ebee4d2d763279bab74a0e3bbe9
@@ -1,11 +1,11 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- remi (0.2.15)
4
+ remi (0.2.16)
5
5
  activesupport (~> 4.2)
6
6
  bond (~> 0.5)
7
7
  cucumber (~> 2.1)
8
- daru (~> 0.1)
8
+ daru (= 0.1.2)
9
9
  docile (~> 1.1)
10
10
  faker (~> 1.6)
11
11
  net-sftp (~> 2.1)
@@ -39,12 +39,12 @@ GEM
39
39
  cucumber-core (1.4.0)
40
40
  gherkin (~> 3.2.0)
41
41
  cucumber-wire (0.0.1)
42
- daru (0.1.1)
42
+ daru (0.1.2)
43
43
  reportbuilder (~> 1.4)
44
- spreadsheet (~> 1.0.3)
44
+ spreadsheet (~> 1.1.1)
45
45
  diff-lcs (1.2.5)
46
46
  docile (1.1.5)
47
- faker (1.6.1)
47
+ faker (1.6.3)
48
48
  i18n (~> 0.5)
49
49
  faraday (0.9.2)
50
50
  multipart-post (>= 1.2, < 3)
@@ -94,7 +94,7 @@ GEM
94
94
  rspec-core (~> 3.4.0)
95
95
  rspec-expectations (~> 3.4.0)
96
96
  rspec-mocks (~> 3.4.0)
97
- rspec-core (3.4.2)
97
+ rspec-core (3.4.3)
98
98
  rspec-support (~> 3.4.0)
99
99
  rspec-expectations (3.4.0)
100
100
  diff-lcs (>= 1.2.0, < 2.0)
@@ -107,7 +107,7 @@ GEM
107
107
  salesforce_bulk_api (0.0.12)
108
108
  json
109
109
  xml-simple
110
- spreadsheet (1.0.9)
110
+ spreadsheet (1.1.1)
111
111
  ruby-ole (>= 1.0)
112
112
  text-table (1.2.4)
113
113
  thread_safe (0.3.5)
@@ -0,0 +1,21 @@
1
+ Feature: Tests targets that are Csv Files.
2
+
3
+ Background:
4
+ Given the job is 'Csv File Target'
5
+ And the job target 'Some Csv File'
6
+
7
+
8
+ Scenario: Defining target csv options.
9
+
10
+ Given the target 'Some Csv File'
11
+ And the target file is delimited with a pipe
12
+ And the target file is encoded using "UTF-8" format
13
+ And the target file uses a double quote to quote embedded delimiters
14
+ And the target file uses a preceding double quote to escape an embedded quoting character
15
+ And the target file uses unix line endings
16
+ And the target file contains a header row
17
+ And the target file contains all of the following headers in this order:
18
+ | header |
19
+ | col3 |
20
+ | col1 |
21
+ | col2 |
@@ -0,0 +1,9 @@
1
+ Feature: Tests targets that are Sftp Files.
2
+
3
+ Background:
4
+ Given the job is 'Sftp File Target'
5
+ And the job target 'Some File'
6
+
7
+ Scenario: Defining the remote path.
8
+ Given the target 'Some File'
9
+ Then the file is uploaded to the remote path "some_file_*Today: %Y%m%d*.csv"
@@ -71,37 +71,52 @@ Then /^no files will be downloaded for processing$/ do
71
71
  expect { @brt.source.extract }.to raise_error Remi::Extractor::SftpFile::FileNotFoundError
72
72
  end
73
73
 
74
+ Then /^the file is uploaded to the remote path "([^"]+)"$/ do |remote_path|
75
+ expect(@brt.target.get_attrib(:remote_path)).to eq Remi::BusinessRules::ParseFormula.parse(remote_path)
76
+ end
77
+
78
+ ## CSV Options
79
+
80
+ Given /^the (source|target) file is delimited with a (\w+)$/ do |st, delimiter|
81
+ expect(@brt.send(st.to_sym).csv_options[:col_sep]).to eq Remi::BusinessRules.csv_opt_map[delimiter]
82
+ end
74
83
 
75
- Given /^the source file is delimited with a (\w+)$/ do |delimiter|
76
- expect(@brt.source.csv_options[:col_sep]).to eq Remi::BusinessRules.csv_opt_map[delimiter]
84
+ Given /^the (source|target) file is encoded using "([^"]+)" format$/ do |st, encoding|
85
+ expect(@brt.send(st.to_sym).csv_options[:encoding].split(':').first).to eq encoding
77
86
  end
78
87
 
79
- Given /^the source file is encoded using "([^"]+)" format$/ do |encoding|
80
- expect(@brt.source.csv_options[:encoding].split(':').first).to eq encoding
88
+ Given /^the (source|target) file uses a ([\w ]+) to quote embedded delimiters$/ do |st, quote_char|
89
+ expect(@brt.send(st.to_sym).csv_options[:quote_char]).to eq Remi::BusinessRules.csv_opt_map[quote_char]
81
90
  end
82
91
 
83
- Given /^the source file uses a ([\w ]+) to quote embedded delimiters$/ do |quote_char|
84
- expect(@brt.source.csv_options[:quote_char]).to eq Remi::BusinessRules.csv_opt_map[quote_char]
92
+ Given /^the (source|target) file uses a preceding ([\w ]+) to escape an embedded quoting character$/ do |st, escape_char|
93
+ expect(@brt.send(st.to_sym).csv_options[:quote_char]).to eq Remi::BusinessRules.csv_opt_map[escape_char]
85
94
  end
86
95
 
87
- Given /^the source file uses a preceding ([\w ]+) to escape an embedded quoting character$/ do |escape_char|
88
- expect(@brt.source.csv_options[:quote_char]).to eq Remi::BusinessRules.csv_opt_map[escape_char]
96
+ Given /^the (source|target) file uses ([\w ]+) line endings$/ do |st, line_endings|
97
+ expect(@brt.send(st.to_sym).csv_options[:row_sep]).to eq Remi::BusinessRules.csv_opt_map[line_endings]
89
98
  end
90
99
 
91
- Given /^the source file uses ([\w ]+) line endings$/ do |line_endings|
92
- expect(@brt.source.csv_options[:row_sep]).to eq Remi::BusinessRules.csv_opt_map[line_endings]
100
+ Given /^the (source|target) file (contains|does not contain) a header row$/ do |st, header|
101
+ expect(@brt.send(st.to_sym).csv_options[:headers]).to eq (header == 'contains')
93
102
  end
94
103
 
95
- Given /^the source file (contains|does not contain) a header row$/ do |header|
96
- expect(@brt.source.csv_options[:headers]).to eq (header == 'contains')
104
+ Given /^the (source|target) file contains at least the following headers in no particular order:$/ do |st, table|
105
+ table.rows.each do |row|
106
+ field = row.first
107
+ step "the #{st} field '#{field}'"
108
+ end
109
+ expect(@brt.send(st.to_sym).data_obj.fields.keys).to include(*@brt.send(st.to_sym).fields.names)
97
110
  end
98
111
 
99
- Given /^the source file contains at least the following headers in no particular order:$/ do |table|
112
+ Given /^the (source|target) file contains all of the following headers in this order:$/ do |st, table|
100
113
  table.rows.each do |row|
101
114
  field = row.first
102
- step "the source field '#{field}'"
115
+ step "the #{st} field '#{field}'"
103
116
  end
104
- expect(@brt.source.data_obj.fields.keys).to include(*@brt.source.fields.names)
117
+
118
+ @brt.run_transforms if st == 'target'
119
+ expect(@brt.send(st.to_sym).data_obj.fields.keys).to eq @brt.send(st.to_sym).fields.names
105
120
  end
106
121
 
107
122
 
@@ -1,5 +1 @@
1
- require_relative '../../jobs/sample_job'
2
- require_relative '../../jobs/copy_source_job'
3
- require_relative '../../jobs/transforms/transform_jobs'
4
- require_relative '../../jobs/aggregate_job'
5
- require_relative '../../jobs/parameters_job'
1
+ Dir["#{File.dirname(__FILE__)}/../../jobs/**/*_job.rb"].each { |f| require(f) }
@@ -0,0 +1,20 @@
1
+ require_relative 'all_jobs_shared'
2
+
3
+ class CsvFileTargetJob
4
+ include AllJobsShared
5
+ using Remi::Refinements::Daru
6
+
7
+ define_target :some_csv_file, Remi::DataTarget::CsvFile,
8
+ path: "#{Remi::Settings.work_dir}/some_file.csv",
9
+ csv_options: {
10
+ col_sep: '|'
11
+ }
12
+
13
+ define_transform :main do
14
+ some_csv_file.df = Daru::DataFrame.new({
15
+ col3: Faker::Hipster.words(10),
16
+ col1: Faker::Hipster.words(10),
17
+ col2: ["uh, \"oh"] + Faker::Hipster.words(9)
18
+ }, order: [:col3, :col1, :col2])
19
+ end
20
+ end
@@ -0,0 +1,19 @@
1
+ require_relative 'all_jobs_shared'
2
+
3
+ class SftpFileTargetJob
4
+ include AllJobsShared
5
+ using Remi::Refinements::Daru
6
+
7
+
8
+ define_target :some_file, Remi::DataTarget::SftpFile,
9
+ credentials: {
10
+ host: 'example.com',
11
+ username: 'user',
12
+ password: 'secret'
13
+ },
14
+ local_path: "#{Remi::Settings.work_dir}/some_file.csv",
15
+ remote_path: "some_file_#{DateTime.current.strftime('%Y%m%d')}.csv"
16
+
17
+ define_transform :main do
18
+ end
19
+ end
@@ -57,5 +57,6 @@ require 'remi/data_target.rb'
57
57
  require 'remi/data_target/data_frame'
58
58
  require 'remi/data_target/salesforce'
59
59
  require 'remi/data_target/csv_file'
60
+ require 'remi/data_target/sftp_file'
60
61
 
61
62
  require 'remi/transform'
@@ -29,9 +29,9 @@ module Remi::BusinessRules
29
29
 
30
30
  def formulas
31
31
  @formulas ||= RegexSieve.new({
32
- /(today|yesterday|tomorrow)/i => [:date_reference, :match_single_day],
33
- /(this|last|previous|next) (day|month|year|week)/i => [:date_reference, :match_single_unit],
34
- /(\d+)\s(day|days|month|months|year|years|week|weeks) (ago|from now)/i => [:date_reference, :match_multiple]
32
+ /(today|yesterday|tomorrow)(|:[^*]+)\*/i => [:date_reference, :match_single_day],
33
+ /(this|last|previous|next) (day|month|year|week)(|:[^*]+)\*/i => [:date_reference, :match_single_unit],
34
+ /(\d+)\s(day|days|month|months|year|years|week|weeks) (ago|from now)(|:[^*]+)\*/i => [:date_reference, :match_multiple]
35
35
  })
36
36
  end
37
37
 
@@ -39,7 +39,7 @@ module Remi::BusinessRules
39
39
  return form unless is_formula?(form)
40
40
 
41
41
  form_opt = formulas[form, :match]
42
- raise "Unknown formula #{form}" unless form_opt
42
+ raise "Unknown formula #{form}" unless form_opt[:match]
43
43
 
44
44
  to_replace = form.match(base_regex)[0]
45
45
  replace_with = if form_opt[:value][0] == :date_reference
@@ -54,30 +54,37 @@ module Remi::BusinessRules
54
54
 
55
55
  def date_reference(formula, captured)
56
56
  parsed = self.send("date_reference_#{formula}", *captured)
57
- Date.current.send("#{parsed[:unit]}_#{parsed[:direction]}", parsed[:quantity]).strftime('%Y-%m-%d')
57
+ Date.current.send("#{parsed[:unit]}_#{parsed[:direction]}", parsed[:quantity]).strftime(parsed[:format])
58
58
  end
59
59
 
60
- def date_reference_match_single_day(form, direction)
60
+ def parse_colon_date_format(str)
61
+ str.blank? ? '%Y-%m-%d' : str.slice(1..-1).strip
62
+ end
63
+
64
+ def date_reference_match_single_day(form, direction, format=nil)
61
65
  {
62
66
  quantity: direction.downcase == 'today' ? 0 : 1,
63
67
  unit: 'days',
64
- direction: { 'today' => 'ago', 'yesterday' => 'ago', 'tomorrow' => 'since' }[direction.downcase]
68
+ direction: { 'today' => 'ago', 'yesterday' => 'ago', 'tomorrow' => 'since' }[direction.downcase],
69
+ format: parse_colon_date_format(format)
65
70
  }
66
71
  end
67
72
 
68
- def date_reference_match_single_unit(form, direction, unit)
73
+ def date_reference_match_single_unit(form, direction, unit, format=nil)
69
74
  {
70
75
  quantity: direction.downcase == 'this' ? 0 : 1,
71
76
  unit: unit.downcase.pluralize,
72
- direction: { 'this' => 'ago', 'last' => 'ago', 'previous' => 'ago', 'next' => 'since' }[direction.downcase]
77
+ direction: { 'this' => 'ago', 'last' => 'ago', 'previous' => 'ago', 'next' => 'since' }[direction.downcase],
78
+ format: parse_colon_date_format(format)
73
79
  }
74
80
  end
75
81
 
76
- def date_reference_match_multiple(form, quantity, unit, direction)
82
+ def date_reference_match_multiple(form, quantity, unit, direction, format=nil)
77
83
  {
78
84
  quantity: quantity.to_i,
79
85
  unit: unit.downcase.pluralize,
80
- direction: { 'ago' => 'ago', 'from now' => 'since' }[direction.downcase]
86
+ direction: { 'ago' => 'ago', 'from now' => 'since' }[direction.downcase],
87
+ format: parse_colon_date_format(format)
81
88
  }
82
89
  end
83
90
  end
@@ -240,6 +247,10 @@ module Remi::BusinessRules
240
247
  @data_obj.df.size
241
248
  end
242
249
 
250
+ def get_attrib(name)
251
+ @data_obj.send(name)
252
+ end
253
+
243
254
  # Public: Converts the data subject to a hash where the keys are the table
244
255
  # columns and the values are an array for the value of column for each row.
245
256
  def column_hash
@@ -1,49 +1,42 @@
1
1
  module Remi
2
2
  module DataTarget
3
- class Salesforce
3
+ class CsvFile
4
4
  include DataTarget
5
5
 
6
- def initialize(object:, operation:, credentials:, api: :bulk, logger: Remi::Settings.logger)
7
- @sfo = object
8
- @operation = operation
9
- @credentials = credentials
10
- @api = api
6
+ def self.default_csv_options
7
+ CSV::DEFAULT_OPTIONS.merge({
8
+ headers: true,
9
+ header_converters: Remi::FieldSymbolizers[:standard],
10
+ col_sep: ',',
11
+ encoding: 'UTF-8',
12
+ quote_char: '"',
13
+ row_sep: "\n"
14
+ })
15
+ end
16
+
17
+ def initialize(path:, csv_options: {}, logger: Remi::Settings.logger)
18
+ @path = path
19
+ @csv_options = self.class.default_csv_options.merge(csv_options)
11
20
  @logger = logger
12
21
  end
13
22
 
23
+ attr_reader :path
24
+ attr_reader :csv_options
25
+
14
26
  def field_symbolizer
15
- Remi::FieldSymbolizers[:salesforce]
27
+ self.class.default_csv_options[:header_converters]
16
28
  end
17
29
 
18
30
  def load
19
31
  return true if @loaded || df.size == 0
20
32
 
21
- @logger.info "Performing Salesforce #{@operation} on object #{@sfo}"
33
+ @logger.info "Writing CSV file #{@path}"
22
34
 
23
- if @operation == :update
24
- Remi::SfBulkHelper::SfBulkUpdate.update(restforce_client, @sfo, df_as_array_of_hashes, logger: @logger)
25
- elsif @operation == :create
26
- Remi::SfBulkHelper::SfBulkCreate.create(restforce_client, @sfo, df_as_array_of_hashes, logger: @logger)
27
- end
35
+ df.write_csv @path, @csv_options
28
36
 
29
37
  @loaded = true
30
38
  end
31
39
 
32
- def restforce_client
33
- @restforce_client ||= begin
34
- client = Restforce.new(@credentials)
35
-
36
- #run a dummy query to initiate a connection. Workaround for Bulk API problem
37
- # https://github.com/yatish27/salesforce_bulk_api/issues/33
38
- client.query('SELECT Id FROM Contact LIMIT 1')
39
- client
40
- end
41
- end
42
-
43
- def df_as_array_of_hashes
44
- df.to_a[0]
45
- end
46
-
47
40
  end
48
41
  end
49
42
  end
@@ -0,0 +1,54 @@
1
+ module Remi
2
+ module DataTarget
3
+ class SftpFile
4
+ include DataTarget
5
+
6
+ def initialize(credentials:, local_path:, remote_path: File.basename(local_path), logger: Remi::Settings.logger)
7
+ @credentials = credentials
8
+ @local_path = local_path
9
+ @remote_path = remote_path
10
+ @logger = logger
11
+ end
12
+
13
+ attr_reader :local_path
14
+ attr_reader :remote_path
15
+
16
+ def load
17
+ return true if @loaded
18
+
19
+ connection do |sftp|
20
+ retry_upload { sftp.upload! @local_path, @remote_path }
21
+ end
22
+
23
+ @loaded = true
24
+ end
25
+
26
+
27
+
28
+ private
29
+
30
+ def connection(&block)
31
+ result = nil
32
+ Net::SFTP.start(@credentials[:host], @credentials[:username], password: @credentials[:password], port: @credentials[:port] || '22') do |sftp|
33
+ result = yield sftp
34
+ end
35
+ result
36
+ end
37
+
38
+ def retry_upload(ntry=2, &block)
39
+ 1.upto(ntry).each do |itry|
40
+ begin
41
+ block.call
42
+ rescue RuntimeError => err
43
+ raise err unless itry < ntry
44
+ @logger.error "Upload failed with error: #{err.message}"
45
+ @logger.error "Retry attempt #{itry}/#{ntry-1}"
46
+ sleep(1)
47
+ end
48
+ end
49
+ end
50
+
51
+
52
+ end
53
+ end
54
+ end
@@ -1,3 +1,3 @@
1
1
  module Remi
2
- VERSION = '0.2.15'
2
+ VERSION = '0.2.16'
3
3
  end
@@ -13,7 +13,7 @@ Gem::Specification.new do |s|
13
13
  s.description = "Data manipulation and ETL in Ruby"
14
14
 
15
15
  s.rubyforge_project = "Remi"
16
- s.add_runtime_dependency "daru", ["~> 0.1"]
16
+ s.add_runtime_dependency "daru", ["0.1.2"]
17
17
 
18
18
  s.add_runtime_dependency 'bond', ['~> 0.5']
19
19
  s.add_runtime_dependency 'docile', ['~> 1.1']
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: remi
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.15
4
+ version: 0.2.16
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sterling Paramore
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-02-23 00:00:00.000000000 Z
11
+ date: 2016-02-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daru
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - '='
18
18
  - !ruby/object:Gem::Version
19
- version: '0.1'
19
+ version: 0.1.2
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "~>"
24
+ - - '='
25
25
  - !ruby/object:Gem::Version
26
- version: '0.1'
26
+ version: 0.1.2
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: bond
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -225,10 +225,12 @@ files:
225
225
  - bin/remi
226
226
  - doc/install-rbenv-os_x.md
227
227
  - features/aggregate.feature
228
+ - features/csv_file_target_job.feature
228
229
  - features/examples.feature
229
230
  - features/formulas.feature
230
231
  - features/parameters.feature
231
232
  - features/sample_job.feature
233
+ - features/sftp_file_target_job.feature
232
234
  - features/step_definitions/remi_step.rb
233
235
  - features/support/env.rb
234
236
  - features/support/env_app.rb
@@ -239,8 +241,10 @@ files:
239
241
  - jobs/aggregate_job.rb
240
242
  - jobs/all_jobs_shared.rb
241
243
  - jobs/copy_source_job.rb
244
+ - jobs/csv_file_target_job.rb
242
245
  - jobs/parameters_job.rb
243
246
  - jobs/sample_job.rb
247
+ - jobs/sftp_file_target_job.rb
244
248
  - jobs/transforms/date_diff_job.rb
245
249
  - jobs/transforms/nvl_job.rb
246
250
  - jobs/transforms/parse_date_job.rb
@@ -261,6 +265,7 @@ files:
261
265
  - lib/remi/data_target/csv_file.rb
262
266
  - lib/remi/data_target/data_frame.rb
263
267
  - lib/remi/data_target/salesforce.rb
268
+ - lib/remi/data_target/sftp_file.rb
264
269
  - lib/remi/extractor/sftp_file.rb
265
270
  - lib/remi/field_symbolizers.rb
266
271
  - lib/remi/job.rb
@@ -300,10 +305,12 @@ specification_version: 4
300
305
  summary: Remi (Ruby Extract Map Integrate)
301
306
  test_files:
302
307
  - features/aggregate.feature
308
+ - features/csv_file_target_job.feature
303
309
  - features/examples.feature
304
310
  - features/formulas.feature
305
311
  - features/parameters.feature
306
312
  - features/sample_job.feature
313
+ - features/sftp_file_target_job.feature
307
314
  - features/step_definitions/remi_step.rb
308
315
  - features/support/env.rb
309
316
  - features/support/env_app.rb