remi 0.2.15 → 0.2.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 89aa19a0e9852eb3fcbf63cd3890102501fc27ff
4
- data.tar.gz: ef447ea96c09edd89953aabef5b9e18188d06c50
3
+ metadata.gz: c08e200bea80edb6451804d752e12d3f820211db
4
+ data.tar.gz: af6e06a889c02f13586e61c4c65b09cbbac14bb1
5
5
  SHA512:
6
- metadata.gz: 11db98f16aff0c37c83a02dd44dec3ec6e1f908c61e85aaeb277b789acc58b2eaf0706b10274591004da92e0ea66321b1ee2fe5318422772c251519150d41d80
7
- data.tar.gz: 7639ef4a4b9b3aa03385e270b5aa3ea10796dd9e9250819e706be0fa366d95dcb4c886e6ccab54e5546f0aa7a38ea6adbb61909fa6919be3350aa68dc8d6b0c0
6
+ metadata.gz: fa607c200a4c8f838e31c635fbfcd521ad14c86d734b4176880f1cdc8bf7d8e53dc8c3672af62e735a731ad8d3234f54df9982bd9778efceee1e63e7680d03f5
7
+ data.tar.gz: 18715e8761212d48bf00d32378949044fa9957650b08491f43733bb9902e753d44f2b68a86706b8e00f59c01b4b0fd68a8192ebee4d2d763279bab74a0e3bbe9
@@ -1,11 +1,11 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- remi (0.2.15)
4
+ remi (0.2.16)
5
5
  activesupport (~> 4.2)
6
6
  bond (~> 0.5)
7
7
  cucumber (~> 2.1)
8
- daru (~> 0.1)
8
+ daru (= 0.1.2)
9
9
  docile (~> 1.1)
10
10
  faker (~> 1.6)
11
11
  net-sftp (~> 2.1)
@@ -39,12 +39,12 @@ GEM
39
39
  cucumber-core (1.4.0)
40
40
  gherkin (~> 3.2.0)
41
41
  cucumber-wire (0.0.1)
42
- daru (0.1.1)
42
+ daru (0.1.2)
43
43
  reportbuilder (~> 1.4)
44
- spreadsheet (~> 1.0.3)
44
+ spreadsheet (~> 1.1.1)
45
45
  diff-lcs (1.2.5)
46
46
  docile (1.1.5)
47
- faker (1.6.1)
47
+ faker (1.6.3)
48
48
  i18n (~> 0.5)
49
49
  faraday (0.9.2)
50
50
  multipart-post (>= 1.2, < 3)
@@ -94,7 +94,7 @@ GEM
94
94
  rspec-core (~> 3.4.0)
95
95
  rspec-expectations (~> 3.4.0)
96
96
  rspec-mocks (~> 3.4.0)
97
- rspec-core (3.4.2)
97
+ rspec-core (3.4.3)
98
98
  rspec-support (~> 3.4.0)
99
99
  rspec-expectations (3.4.0)
100
100
  diff-lcs (>= 1.2.0, < 2.0)
@@ -107,7 +107,7 @@ GEM
107
107
  salesforce_bulk_api (0.0.12)
108
108
  json
109
109
  xml-simple
110
- spreadsheet (1.0.9)
110
+ spreadsheet (1.1.1)
111
111
  ruby-ole (>= 1.0)
112
112
  text-table (1.2.4)
113
113
  thread_safe (0.3.5)
@@ -0,0 +1,21 @@
1
+ Feature: Tests targets that are Csv Files.
2
+
3
+ Background:
4
+ Given the job is 'Csv File Target'
5
+ And the job target 'Some Csv File'
6
+
7
+
8
+ Scenario: Defining target csv options.
9
+
10
+ Given the target 'Some Csv File'
11
+ And the target file is delimited with a pipe
12
+ And the target file is encoded using "UTF-8" format
13
+ And the target file uses a double quote to quote embedded delimiters
14
+ And the target file uses a preceding double quote to escape an embedded quoting character
15
+ And the target file uses unix line endings
16
+ And the target file contains a header row
17
+ And the target file contains all of the following headers in this order:
18
+ | header |
19
+ | col3 |
20
+ | col1 |
21
+ | col2 |
@@ -0,0 +1,9 @@
1
+ Feature: Tests targets that are Sftp Files.
2
+
3
+ Background:
4
+ Given the job is 'Sftp File Target'
5
+ And the job target 'Some File'
6
+
7
+ Scenario: Defining the remote path.
8
+ Given the target 'Some File'
9
+ Then the file is uploaded to the remote path "some_file_*Today: %Y%m%d*.csv"
@@ -71,37 +71,52 @@ Then /^no files will be downloaded for processing$/ do
71
71
  expect { @brt.source.extract }.to raise_error Remi::Extractor::SftpFile::FileNotFoundError
72
72
  end
73
73
 
74
+ Then /^the file is uploaded to the remote path "([^"]+)"$/ do |remote_path|
75
+ expect(@brt.target.get_attrib(:remote_path)).to eq Remi::BusinessRules::ParseFormula.parse(remote_path)
76
+ end
77
+
78
+ ## CSV Options
79
+
80
+ Given /^the (source|target) file is delimited with a (\w+)$/ do |st, delimiter|
81
+ expect(@brt.send(st.to_sym).csv_options[:col_sep]).to eq Remi::BusinessRules.csv_opt_map[delimiter]
82
+ end
74
83
 
75
- Given /^the source file is delimited with a (\w+)$/ do |delimiter|
76
- expect(@brt.source.csv_options[:col_sep]).to eq Remi::BusinessRules.csv_opt_map[delimiter]
84
+ Given /^the (source|target) file is encoded using "([^"]+)" format$/ do |st, encoding|
85
+ expect(@brt.send(st.to_sym).csv_options[:encoding].split(':').first).to eq encoding
77
86
  end
78
87
 
79
- Given /^the source file is encoded using "([^"]+)" format$/ do |encoding|
80
- expect(@brt.source.csv_options[:encoding].split(':').first).to eq encoding
88
+ Given /^the (source|target) file uses a ([\w ]+) to quote embedded delimiters$/ do |st, quote_char|
89
+ expect(@brt.send(st.to_sym).csv_options[:quote_char]).to eq Remi::BusinessRules.csv_opt_map[quote_char]
81
90
  end
82
91
 
83
- Given /^the source file uses a ([\w ]+) to quote embedded delimiters$/ do |quote_char|
84
- expect(@brt.source.csv_options[:quote_char]).to eq Remi::BusinessRules.csv_opt_map[quote_char]
92
+ Given /^the (source|target) file uses a preceding ([\w ]+) to escape an embedded quoting character$/ do |st, escape_char|
93
+ expect(@brt.send(st.to_sym).csv_options[:quote_char]).to eq Remi::BusinessRules.csv_opt_map[escape_char]
85
94
  end
86
95
 
87
- Given /^the source file uses a preceding ([\w ]+) to escape an embedded quoting character$/ do |escape_char|
88
- expect(@brt.source.csv_options[:quote_char]).to eq Remi::BusinessRules.csv_opt_map[escape_char]
96
+ Given /^the (source|target) file uses ([\w ]+) line endings$/ do |st, line_endings|
97
+ expect(@brt.send(st.to_sym).csv_options[:row_sep]).to eq Remi::BusinessRules.csv_opt_map[line_endings]
89
98
  end
90
99
 
91
- Given /^the source file uses ([\w ]+) line endings$/ do |line_endings|
92
- expect(@brt.source.csv_options[:row_sep]).to eq Remi::BusinessRules.csv_opt_map[line_endings]
100
+ Given /^the (source|target) file (contains|does not contain) a header row$/ do |st, header|
101
+ expect(@brt.send(st.to_sym).csv_options[:headers]).to eq (header == 'contains')
93
102
  end
94
103
 
95
- Given /^the source file (contains|does not contain) a header row$/ do |header|
96
- expect(@brt.source.csv_options[:headers]).to eq (header == 'contains')
104
+ Given /^the (source|target) file contains at least the following headers in no particular order:$/ do |st, table|
105
+ table.rows.each do |row|
106
+ field = row.first
107
+ step "the #{st} field '#{field}'"
108
+ end
109
+ expect(@brt.send(st.to_sym).data_obj.fields.keys).to include(*@brt.send(st.to_sym).fields.names)
97
110
  end
98
111
 
99
- Given /^the source file contains at least the following headers in no particular order:$/ do |table|
112
+ Given /^the (source|target) file contains all of the following headers in this order:$/ do |st, table|
100
113
  table.rows.each do |row|
101
114
  field = row.first
102
- step "the source field '#{field}'"
115
+ step "the #{st} field '#{field}'"
103
116
  end
104
- expect(@brt.source.data_obj.fields.keys).to include(*@brt.source.fields.names)
117
+
118
+ @brt.run_transforms if st == 'target'
119
+ expect(@brt.send(st.to_sym).data_obj.fields.keys).to eq @brt.send(st.to_sym).fields.names
105
120
  end
106
121
 
107
122
 
@@ -1,5 +1 @@
1
- require_relative '../../jobs/sample_job'
2
- require_relative '../../jobs/copy_source_job'
3
- require_relative '../../jobs/transforms/transform_jobs'
4
- require_relative '../../jobs/aggregate_job'
5
- require_relative '../../jobs/parameters_job'
1
+ Dir["#{File.dirname(__FILE__)}/../../jobs/**/*_job.rb"].each { |f| require(f) }
@@ -0,0 +1,20 @@
1
+ require_relative 'all_jobs_shared'
2
+
3
+ class CsvFileTargetJob
4
+ include AllJobsShared
5
+ using Remi::Refinements::Daru
6
+
7
+ define_target :some_csv_file, Remi::DataTarget::CsvFile,
8
+ path: "#{Remi::Settings.work_dir}/some_file.csv",
9
+ csv_options: {
10
+ col_sep: '|'
11
+ }
12
+
13
+ define_transform :main do
14
+ some_csv_file.df = Daru::DataFrame.new({
15
+ col3: Faker::Hipster.words(10),
16
+ col1: Faker::Hipster.words(10),
17
+ col2: ["uh, \"oh"] + Faker::Hipster.words(9)
18
+ }, order: [:col3, :col1, :col2])
19
+ end
20
+ end
@@ -0,0 +1,19 @@
1
+ require_relative 'all_jobs_shared'
2
+
3
+ class SftpFileTargetJob
4
+ include AllJobsShared
5
+ using Remi::Refinements::Daru
6
+
7
+
8
+ define_target :some_file, Remi::DataTarget::SftpFile,
9
+ credentials: {
10
+ host: 'example.com',
11
+ username: 'user',
12
+ password: 'secret'
13
+ },
14
+ local_path: "#{Remi::Settings.work_dir}/some_file.csv",
15
+ remote_path: "some_file_#{DateTime.current.strftime('%Y%m%d')}.csv"
16
+
17
+ define_transform :main do
18
+ end
19
+ end
@@ -57,5 +57,6 @@ require 'remi/data_target.rb'
57
57
  require 'remi/data_target/data_frame'
58
58
  require 'remi/data_target/salesforce'
59
59
  require 'remi/data_target/csv_file'
60
+ require 'remi/data_target/sftp_file'
60
61
 
61
62
  require 'remi/transform'
@@ -29,9 +29,9 @@ module Remi::BusinessRules
29
29
 
30
30
  def formulas
31
31
  @formulas ||= RegexSieve.new({
32
- /(today|yesterday|tomorrow)/i => [:date_reference, :match_single_day],
33
- /(this|last|previous|next) (day|month|year|week)/i => [:date_reference, :match_single_unit],
34
- /(\d+)\s(day|days|month|months|year|years|week|weeks) (ago|from now)/i => [:date_reference, :match_multiple]
32
+ /(today|yesterday|tomorrow)(|:[^*]+)\*/i => [:date_reference, :match_single_day],
33
+ /(this|last|previous|next) (day|month|year|week)(|:[^*]+)\*/i => [:date_reference, :match_single_unit],
34
+ /(\d+)\s(day|days|month|months|year|years|week|weeks) (ago|from now)(|:[^*]+)\*/i => [:date_reference, :match_multiple]
35
35
  })
36
36
  end
37
37
 
@@ -39,7 +39,7 @@ module Remi::BusinessRules
39
39
  return form unless is_formula?(form)
40
40
 
41
41
  form_opt = formulas[form, :match]
42
- raise "Unknown formula #{form}" unless form_opt
42
+ raise "Unknown formula #{form}" unless form_opt[:match]
43
43
 
44
44
  to_replace = form.match(base_regex)[0]
45
45
  replace_with = if form_opt[:value][0] == :date_reference
@@ -54,30 +54,37 @@ module Remi::BusinessRules
54
54
 
55
55
  def date_reference(formula, captured)
56
56
  parsed = self.send("date_reference_#{formula}", *captured)
57
- Date.current.send("#{parsed[:unit]}_#{parsed[:direction]}", parsed[:quantity]).strftime('%Y-%m-%d')
57
+ Date.current.send("#{parsed[:unit]}_#{parsed[:direction]}", parsed[:quantity]).strftime(parsed[:format])
58
58
  end
59
59
 
60
- def date_reference_match_single_day(form, direction)
60
+ def parse_colon_date_format(str)
61
+ str.blank? ? '%Y-%m-%d' : str.slice(1..-1).strip
62
+ end
63
+
64
+ def date_reference_match_single_day(form, direction, format=nil)
61
65
  {
62
66
  quantity: direction.downcase == 'today' ? 0 : 1,
63
67
  unit: 'days',
64
- direction: { 'today' => 'ago', 'yesterday' => 'ago', 'tomorrow' => 'since' }[direction.downcase]
68
+ direction: { 'today' => 'ago', 'yesterday' => 'ago', 'tomorrow' => 'since' }[direction.downcase],
69
+ format: parse_colon_date_format(format)
65
70
  }
66
71
  end
67
72
 
68
- def date_reference_match_single_unit(form, direction, unit)
73
+ def date_reference_match_single_unit(form, direction, unit, format=nil)
69
74
  {
70
75
  quantity: direction.downcase == 'this' ? 0 : 1,
71
76
  unit: unit.downcase.pluralize,
72
- direction: { 'this' => 'ago', 'last' => 'ago', 'previous' => 'ago', 'next' => 'since' }[direction.downcase]
77
+ direction: { 'this' => 'ago', 'last' => 'ago', 'previous' => 'ago', 'next' => 'since' }[direction.downcase],
78
+ format: parse_colon_date_format(format)
73
79
  }
74
80
  end
75
81
 
76
- def date_reference_match_multiple(form, quantity, unit, direction)
82
+ def date_reference_match_multiple(form, quantity, unit, direction, format=nil)
77
83
  {
78
84
  quantity: quantity.to_i,
79
85
  unit: unit.downcase.pluralize,
80
- direction: { 'ago' => 'ago', 'from now' => 'since' }[direction.downcase]
86
+ direction: { 'ago' => 'ago', 'from now' => 'since' }[direction.downcase],
87
+ format: parse_colon_date_format(format)
81
88
  }
82
89
  end
83
90
  end
@@ -240,6 +247,10 @@ module Remi::BusinessRules
240
247
  @data_obj.df.size
241
248
  end
242
249
 
250
+ def get_attrib(name)
251
+ @data_obj.send(name)
252
+ end
253
+
243
254
  # Public: Converts the data subject to a hash where the keys are the table
244
255
  # columns and the values are an array for the value of column for each row.
245
256
  def column_hash
@@ -1,49 +1,42 @@
1
1
  module Remi
2
2
  module DataTarget
3
- class Salesforce
3
+ class CsvFile
4
4
  include DataTarget
5
5
 
6
- def initialize(object:, operation:, credentials:, api: :bulk, logger: Remi::Settings.logger)
7
- @sfo = object
8
- @operation = operation
9
- @credentials = credentials
10
- @api = api
6
+ def self.default_csv_options
7
+ CSV::DEFAULT_OPTIONS.merge({
8
+ headers: true,
9
+ header_converters: Remi::FieldSymbolizers[:standard],
10
+ col_sep: ',',
11
+ encoding: 'UTF-8',
12
+ quote_char: '"',
13
+ row_sep: "\n"
14
+ })
15
+ end
16
+
17
+ def initialize(path:, csv_options: {}, logger: Remi::Settings.logger)
18
+ @path = path
19
+ @csv_options = self.class.default_csv_options.merge(csv_options)
11
20
  @logger = logger
12
21
  end
13
22
 
23
+ attr_reader :path
24
+ attr_reader :csv_options
25
+
14
26
  def field_symbolizer
15
- Remi::FieldSymbolizers[:salesforce]
27
+ self.class.default_csv_options[:header_converters]
16
28
  end
17
29
 
18
30
  def load
19
31
  return true if @loaded || df.size == 0
20
32
 
21
- @logger.info "Performing Salesforce #{@operation} on object #{@sfo}"
33
+ @logger.info "Writing CSV file #{@path}"
22
34
 
23
- if @operation == :update
24
- Remi::SfBulkHelper::SfBulkUpdate.update(restforce_client, @sfo, df_as_array_of_hashes, logger: @logger)
25
- elsif @operation == :create
26
- Remi::SfBulkHelper::SfBulkCreate.create(restforce_client, @sfo, df_as_array_of_hashes, logger: @logger)
27
- end
35
+ df.write_csv @path, @csv_options
28
36
 
29
37
  @loaded = true
30
38
  end
31
39
 
32
- def restforce_client
33
- @restforce_client ||= begin
34
- client = Restforce.new(@credentials)
35
-
36
- #run a dummy query to initiate a connection. Workaround for Bulk API problem
37
- # https://github.com/yatish27/salesforce_bulk_api/issues/33
38
- client.query('SELECT Id FROM Contact LIMIT 1')
39
- client
40
- end
41
- end
42
-
43
- def df_as_array_of_hashes
44
- df.to_a[0]
45
- end
46
-
47
40
  end
48
41
  end
49
42
  end
@@ -0,0 +1,54 @@
1
+ module Remi
2
+ module DataTarget
3
+ class SftpFile
4
+ include DataTarget
5
+
6
+ def initialize(credentials:, local_path:, remote_path: File.basename(local_path), logger: Remi::Settings.logger)
7
+ @credentials = credentials
8
+ @local_path = local_path
9
+ @remote_path = remote_path
10
+ @logger = logger
11
+ end
12
+
13
+ attr_reader :local_path
14
+ attr_reader :remote_path
15
+
16
+ def load
17
+ return true if @loaded
18
+
19
+ connection do |sftp|
20
+ retry_upload { sftp.upload! @local_path, @remote_path }
21
+ end
22
+
23
+ @loaded = true
24
+ end
25
+
26
+
27
+
28
+ private
29
+
30
+ def connection(&block)
31
+ result = nil
32
+ Net::SFTP.start(@credentials[:host], @credentials[:username], password: @credentials[:password], port: @credentials[:port] || '22') do |sftp|
33
+ result = yield sftp
34
+ end
35
+ result
36
+ end
37
+
38
+ def retry_upload(ntry=2, &block)
39
+ 1.upto(ntry).each do |itry|
40
+ begin
41
+ block.call
42
+ rescue RuntimeError => err
43
+ raise err unless itry < ntry
44
+ @logger.error "Upload failed with error: #{err.message}"
45
+ @logger.error "Retry attempt #{itry}/#{ntry-1}"
46
+ sleep(1)
47
+ end
48
+ end
49
+ end
50
+
51
+
52
+ end
53
+ end
54
+ end
@@ -1,3 +1,3 @@
1
1
  module Remi
2
- VERSION = '0.2.15'
2
+ VERSION = '0.2.16'
3
3
  end
@@ -13,7 +13,7 @@ Gem::Specification.new do |s|
13
13
  s.description = "Data manipulation and ETL in Ruby"
14
14
 
15
15
  s.rubyforge_project = "Remi"
16
- s.add_runtime_dependency "daru", ["~> 0.1"]
16
+ s.add_runtime_dependency "daru", ["0.1.2"]
17
17
 
18
18
  s.add_runtime_dependency 'bond', ['~> 0.5']
19
19
  s.add_runtime_dependency 'docile', ['~> 1.1']
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: remi
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.15
4
+ version: 0.2.16
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sterling Paramore
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-02-23 00:00:00.000000000 Z
11
+ date: 2016-02-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daru
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - '='
18
18
  - !ruby/object:Gem::Version
19
- version: '0.1'
19
+ version: 0.1.2
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "~>"
24
+ - - '='
25
25
  - !ruby/object:Gem::Version
26
- version: '0.1'
26
+ version: 0.1.2
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: bond
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -225,10 +225,12 @@ files:
225
225
  - bin/remi
226
226
  - doc/install-rbenv-os_x.md
227
227
  - features/aggregate.feature
228
+ - features/csv_file_target_job.feature
228
229
  - features/examples.feature
229
230
  - features/formulas.feature
230
231
  - features/parameters.feature
231
232
  - features/sample_job.feature
233
+ - features/sftp_file_target_job.feature
232
234
  - features/step_definitions/remi_step.rb
233
235
  - features/support/env.rb
234
236
  - features/support/env_app.rb
@@ -239,8 +241,10 @@ files:
239
241
  - jobs/aggregate_job.rb
240
242
  - jobs/all_jobs_shared.rb
241
243
  - jobs/copy_source_job.rb
244
+ - jobs/csv_file_target_job.rb
242
245
  - jobs/parameters_job.rb
243
246
  - jobs/sample_job.rb
247
+ - jobs/sftp_file_target_job.rb
244
248
  - jobs/transforms/date_diff_job.rb
245
249
  - jobs/transforms/nvl_job.rb
246
250
  - jobs/transforms/parse_date_job.rb
@@ -261,6 +265,7 @@ files:
261
265
  - lib/remi/data_target/csv_file.rb
262
266
  - lib/remi/data_target/data_frame.rb
263
267
  - lib/remi/data_target/salesforce.rb
268
+ - lib/remi/data_target/sftp_file.rb
264
269
  - lib/remi/extractor/sftp_file.rb
265
270
  - lib/remi/field_symbolizers.rb
266
271
  - lib/remi/job.rb
@@ -300,10 +305,12 @@ specification_version: 4
300
305
  summary: Remi (Ruby Extract Map Integrate)
301
306
  test_files:
302
307
  - features/aggregate.feature
308
+ - features/csv_file_target_job.feature
303
309
  - features/examples.feature
304
310
  - features/formulas.feature
305
311
  - features/parameters.feature
306
312
  - features/sample_job.feature
313
+ - features/sftp_file_target_job.feature
307
314
  - features/step_definitions/remi_step.rb
308
315
  - features/support/env.rb
309
316
  - features/support/env_app.rb