remi 0.0.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/.bundle/config +2 -0
  3. data/.gitignore +3 -2
  4. data/.rspec +2 -0
  5. data/.ruby-version +1 -0
  6. data/Gemfile +4 -0
  7. data/Gemfile.lock +123 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +94 -3
  10. data/bin/remi +8 -0
  11. data/doc/install-rbenv-os_x.md +47 -0
  12. data/lib/remi.rb +56 -9
  13. data/lib/remi/cli.rb +56 -0
  14. data/lib/remi/core/daru.rb +28 -0
  15. data/lib/remi/core/refinements.rb +21 -0
  16. data/lib/remi/core/string.rb +8 -0
  17. data/lib/remi/cucumber.rb +7 -0
  18. data/lib/remi/cucumber/business_rules.rb +504 -0
  19. data/lib/remi/cucumber/data_source.rb +63 -0
  20. data/lib/remi/data_source.rb +13 -0
  21. data/lib/remi/data_source/csv_file.rb +79 -0
  22. data/lib/remi/data_source/data_frame.rb +10 -0
  23. data/lib/remi/data_source/postgres.rb +58 -0
  24. data/lib/remi/data_source/salesforce.rb +78 -0
  25. data/lib/remi/data_subject.rb +25 -0
  26. data/lib/remi/data_target.rb +15 -0
  27. data/lib/remi/data_target/csv_file.rb +49 -0
  28. data/lib/remi/data_target/data_frame.rb +14 -0
  29. data/lib/remi/data_target/salesforce.rb +49 -0
  30. data/lib/remi/extractor/sftp_file.rb +84 -0
  31. data/lib/remi/field_symbolizers.rb +17 -0
  32. data/lib/remi/job.rb +200 -0
  33. data/lib/remi/lookup/regex_sieve.rb +55 -0
  34. data/lib/remi/project/features/examples.feature +24 -0
  35. data/lib/remi/project/features/formulas.feature +64 -0
  36. data/lib/remi/project/features/sample_job.feature +304 -0
  37. data/lib/remi/project/features/step_definitions/remi_step.rb +310 -0
  38. data/lib/remi/project/features/support/env.rb +10 -0
  39. data/lib/remi/project/features/support/env_app.rb +3 -0
  40. data/lib/remi/project/features/transforms/date_diff.feature +50 -0
  41. data/lib/remi/project/features/transforms/parse_date.feature +34 -0
  42. data/lib/remi/project/features/transforms/prefix.feature +15 -0
  43. data/lib/remi/project/jobs/all_jobs_shared.rb +25 -0
  44. data/lib/remi/project/jobs/copy_source_job.rb +12 -0
  45. data/lib/remi/project/jobs/sample_job.rb +164 -0
  46. data/lib/remi/project/jobs/transforms/date_diff_job.rb +17 -0
  47. data/lib/remi/project/jobs/transforms/parse_date_job.rb +18 -0
  48. data/lib/remi/project/jobs/transforms/prefix_job.rb +16 -0
  49. data/lib/remi/project/jobs/transforms/transform_jobs.rb +3 -0
  50. data/lib/remi/settings.rb +39 -0
  51. data/lib/remi/sf_bulk_helper.rb +265 -0
  52. data/lib/remi/source_to_target_map.rb +93 -0
  53. data/lib/remi/transform.rb +137 -0
  54. data/lib/remi/version.rb +3 -0
  55. data/remi.gemspec +25 -7
  56. data/workbooks/sample_workbook.ipynb +56 -0
  57. data/workbooks/workbook_helper.rb +1 -0
  58. metadata +234 -17
  59. data/lib/noodling.rb +0 -163
  60. data/test/test_NAME.rb +0 -19
@@ -0,0 +1,84 @@
1
+ module Remi
2
+ module Extractor
3
+
4
+ class LocalFile
5
+ def initialize(path)
6
+ @path = path
7
+ end
8
+
9
+ def extract
10
+ @path
11
+ end
12
+ end
13
+
14
+ class SftpFile
15
+
16
+ class FileNotFoundError < StandardError; end
17
+
18
+ def initialize(credentials:, remote_file:, remote_folder: '', local_folder: Settings.work_dir, port: '22', most_recent_only: false, logger: Remi::Settings.logger)
19
+ @credentials = credentials
20
+ @remote_file = remote_file
21
+ @remote_folder = remote_folder
22
+ @local_folder = local_folder
23
+ @port = port
24
+ @most_recent_only = most_recent_only
25
+ @logger = logger
26
+ end
27
+
28
+ attr_reader :logger
29
+
30
+ def extract
31
+ to_download = @most_recent_only ? Array(most_recent_entry(matching_entries)) : matching_entries
32
+ raise FileNotFoundError, "File not found: #{@remote_file}" if to_download.size == 0
33
+ download(to_download)
34
+ end
35
+
36
+ def all_entries(remote_folder = @remote_folder)
37
+ @all_entries ||= connection { |sftp| sftp.dir.entries(File.join("/", remote_folder)) }
38
+ end
39
+
40
+ def matching_entries(match_name = @remote_file)
41
+ all_entries.select { |e| match_name.match e.name }
42
+ end
43
+
44
+ def most_recent_entry(entries = matching_entries)
45
+ entries.sort_by { |e| e.attributes.createtime }.reverse!.first
46
+ end
47
+
48
+ def download(to_download = matching_entries, local_folder: @local_folder, ntry: 3)
49
+ connection do |sftp|
50
+ to_download.map do |entry|
51
+ local_file = File.join(local_folder, entry.name)
52
+ @logger.info "Downloading #{entry.name} to #{local_file}"
53
+ retry_download(ntry) { sftp.download!(entry.name, local_file) }
54
+ local_file
55
+ end
56
+ end
57
+ end
58
+
59
+
60
+ private
61
+
62
+ def connection(&block)
63
+ result = nil
64
+ Net::SFTP.start(@credentials[:host], @credentials[:username], password: @credentials[:password], port: @port) do |sftp|
65
+ result = yield sftp
66
+ end
67
+ result
68
+ end
69
+
70
+ def retry_download(ntry=2, &block)
71
+ 1.upto(ntry).each do |itry|
72
+ begin
73
+ block.call
74
+ rescue RuntimeError => err
75
+ raise err unless itry < ntry
76
+ @logger.error "Download failed with error: #{err.message}"
77
+ @logger.error "Retry attempt #{itry}/#{ntry-1}"
78
+ sleep(1)
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,17 @@
1
+ module Remi
2
+ module FieldSymbolizers
3
+ def self.[](symbolizer)
4
+ symbolizers[symbolizer]
5
+ end
6
+
7
+ def self.symbolizers
8
+ @symbolizers ||= {
9
+ standard: CSV::HeaderConverters[:symbol],
10
+ salesforce: lambda { |f|
11
+ f.encode(CSV::ConverterEncoding).strip.gsub(/\s+/, "_").
12
+ gsub(/\W+/, "").to_sym
13
+ }
14
+ }
15
+ end
16
+ end
17
+ end
data/lib/remi/job.rb ADDED
@@ -0,0 +1,200 @@
1
+ module Remi
2
+ module Job
3
+ module JobClassMethods
4
+ attr_accessor :params
5
+ attr_accessor :lookups
6
+ attr_accessor :sources
7
+ attr_accessor :targets
8
+ attr_accessor :transforms
9
+
10
+ def define_param(key, value)
11
+ @params ||= {}
12
+ @params[key] = value
13
+ end
14
+
15
+ def define_lookup(name, type_class, options)
16
+ @lookups ||= []
17
+ @lookups << name
18
+
19
+ define_method(name) do
20
+ iv_name = instance_variable_get("@#{name}")
21
+ return iv_name if iv_name
22
+
23
+ if type_class == Hash
24
+ lookup = options
25
+ else
26
+ lookup = type_class.new(options)
27
+ end
28
+ instance_variable_set("@#{name}", lookup)
29
+ end
30
+ end
31
+
32
+ def define_source(name, type_class, **options)
33
+ @sources ||= []
34
+ @sources << name
35
+
36
+ define_method(name) do
37
+ iv_name = instance_variable_get("@#{name}")
38
+ return iv_name if iv_name
39
+
40
+ source = type_class.new(options)
41
+ instance_variable_set("@#{name}", source)
42
+ end
43
+ end
44
+
45
+ def define_target(name, type_class, **options)
46
+ @targets ||= []
47
+ @targets << name
48
+
49
+ define_method(name) do
50
+ iv_name = instance_variable_get("@#{name}")
51
+ return iv_name if iv_name
52
+
53
+ target = type_class.new(options)
54
+ instance_variable_set("@#{name}", target)
55
+ end
56
+ end
57
+
58
+ def define_transform(name, sources: [], targets: [], &block)
59
+ @transforms ||= {}
60
+ @transforms[name] = { sources: Array(sources), targets: Array(targets) }
61
+
62
+ define_method(name) do
63
+ instance_eval { @logger.info "Running transformation #{__method__}" }
64
+ instance_eval(&block)
65
+ end
66
+ end
67
+
68
+ def params
69
+ @params || {}
70
+ end
71
+
72
+ def lookups
73
+ @lookups || []
74
+ end
75
+
76
+ def sources
77
+ @sources || []
78
+ end
79
+
80
+ def targets
81
+ @targets || []
82
+ end
83
+
84
+ def transforms
85
+ @transforms || {}
86
+ end
87
+
88
+
89
+ def work_dir
90
+ Settings.work_dir
91
+ end
92
+
93
+ def self.extended(receiver)
94
+ end
95
+
96
+ def included(receiver)
97
+ receiver.extend(JobClassMethods)
98
+ receiver.params = self.params.merge(receiver.params)
99
+ receiver.lookups = self.lookups + receiver.lookups
100
+ receiver.sources = self.sources + receiver.sources
101
+ receiver.targets = self.targets + receiver.targets
102
+ receiver.transforms = self.transforms.merge(receiver.transforms)
103
+ end
104
+ end
105
+
106
+ def self.included(receiver)
107
+ receiver.extend(JobClassMethods)
108
+ end
109
+
110
+
111
+ def params
112
+ self.class.params
113
+ end
114
+
115
+ def lookups
116
+ self.class.lookups
117
+ end
118
+
119
+ def sources
120
+ self.class.sources
121
+ end
122
+
123
+ def targets
124
+ self.class.targets
125
+ end
126
+
127
+ def transforms
128
+ self.class.transforms
129
+ end
130
+
131
+
132
+
133
+ def initialize(delete_work_dir: true, logger: Settings.logger)
134
+ @delete_work_dir = delete_work_dir
135
+ @logger = logger
136
+ create_work_dir
137
+ end
138
+
139
+ def work_dir
140
+ self.class.work_dir
141
+ end
142
+
143
+ def finalize
144
+ delete_work_dir
145
+ end
146
+
147
+ def delete_work_dir
148
+ if @delete_work_dir && (work_dir.match /^#{Dir.tmpdir}/)
149
+ @logger.info "Deleting temporary directory #{work_dir}"
150
+ FileUtils.rm_r work_dir
151
+ else
152
+ @logger.debug "Not going to delete working directory #{work_dir}"
153
+ nil
154
+ end
155
+ end
156
+
157
+ def create_work_dir
158
+ @logger.info "Creating working directory #{work_dir}"
159
+ FileUtils.mkdir_p work_dir
160
+ end
161
+
162
+ # Public: Runs any transforms that use the sources and targets selected. If
163
+ # source and target is not specified, then all transforms will be run.
164
+ # If only the source is specified, then all transforms that use any of the
165
+ # sources will be run. Same for specified transforms.
166
+ #
167
+ # sources - Array of source names
168
+ # targets - Array of target names
169
+ #
170
+ # Returns an array containing the result of each transform.
171
+ def run_transforms_using(sources: nil, targets: nil)
172
+ transforms.map do |t, st|
173
+ selected_sources = (st[:sources] & Array(sources || st[:sources])).size > 0
174
+ selected_targets = (st[:targets] & Array(targets || st[:targets])).size > 0
175
+ self.send(t) if selected_sources && selected_targets
176
+ end
177
+ end
178
+
179
+ def run_all_transforms
180
+ transforms.map { |t, st| self.send(t) }
181
+ end
182
+
183
+ def load_all_targets
184
+ targets.each do |target|
185
+ @logger.info "Loading target #{target}"
186
+ self.send(target).tap { |t| t.respond_to?(:load) ? t.load : nil }
187
+ end
188
+ end
189
+
190
+ # Public: Runs all transforms defined in the job.
191
+ #
192
+ # Returns the job instance.
193
+ def run
194
+ # Do all of the stuff here
195
+ run_all_transforms
196
+ load_all_targets
197
+ self
198
+ end
199
+ end
200
+ end
@@ -0,0 +1,55 @@
1
+ module Remi
2
+ module Lookup
3
+
4
+ # Public: RegexSieve class. The RegexSieve functions in a manner similar
5
+ # a hash. The regex sieve is initialized with a hash where the keys are
6
+ # regular expressions and the values can be any valid Ruby object. The order
7
+ # of the keys matters. When the regex sieve is accessed using the array
8
+ # accessor [], it returns the first matching record. By default, only
9
+ # the values are returned, but the key and all matching capture groups
10
+ # can optionally be returned.
11
+ #
12
+ # Examples:
13
+ #
14
+ # r = RegexSieve.new({
15
+ # /something/ => 'Something',
16
+ # /something else/ => 'This will never get matched because the one above will match first',
17
+ # /cool$/ => 'Cool',
18
+ # /cool beans/ => 'Really Cool'
19
+ # })
20
+ #
21
+ # r['something else'] # => 'Something'
22
+ # r['cool beans'] # => 'Really Cool'
23
+ class RegexSieve
24
+ def initialize(sieve)
25
+ @sieve = sieve
26
+ end
27
+
28
+ # Public: Array accessor for Regex Sieve.
29
+ #
30
+ # key - The string that will be matched to the keys in the sieve.
31
+ # opt - By default, only the values in the hash used to initialize the sieve
32
+ # will be returned. However, if you want to return the keys or the
33
+ # capture groups then use :regex, :match, or both, respectively.
34
+ #
35
+ # Example:
36
+ # r['something'] # => 'Something
37
+ # r['something', :regex] # => { value: 'Something', regex: /something/ }
38
+ # r['sometinng', :match, :regex] # => { value: 'Something', regex: /something/, match: #<MatchData "something"> }
39
+ def [](key, *opt)
40
+ opt = opt | [:value]
41
+
42
+ regex_match = nil
43
+ found = @sieve.find do |regex, v|
44
+ regex_match = regex.match(key)
45
+ end
46
+
47
+ return nil if found.nil?
48
+ full_result = { value: found[1], regex: found[0], match: regex_match }
49
+
50
+ full_result.select! { |k, v| opt.include?(k) }
51
+ full_result.size > 1 ? full_result : full_result.values.first
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,24 @@
1
+ Feature: This tests the creation of example records.
2
+
3
+ Background:
4
+ Given the job is 'Copy Source'
5
+ And the job source 'Source Data'
6
+ And the job target 'Target Data'
7
+ And the source 'Source Data'
8
+ And the target 'Target Data'
9
+
10
+ Scenario: Simple example record loads in the source and is directly copied to target.
11
+
12
+ Given the following example record for 'Source Data':
13
+ | MyField | MyOtherField |
14
+ | Remilspot | Niblet |
15
+ Then the target field 'MyField' is set to the value "Remilspot"
16
+ And the target field 'MyOtherField' is set to the value "Niblet"
17
+
18
+ Scenario: Handling date formulas in the example data with day units.
19
+
20
+ Given the following example record for 'Source Data':
21
+ | Yesterday | ThreeDaysFromNow |
22
+ | *Yesterday* | *3 days from now* |
23
+ Then the target field 'Yesterday' is the date 1 day ago
24
+ And the target field 'ThreeDaysFromNow' is the date 3 days from now
@@ -0,0 +1,64 @@
1
+ Feature: This tests the creation of example records.
2
+
3
+ Background:
4
+ Given the job is 'Copy Source'
5
+ And the job source 'Source Data'
6
+ And the job target 'Target Data'
7
+ And the source 'Source Data'
8
+ And the target 'Target Data'
9
+
10
+ Scenario: Handling date formulas in the example data with day units.
11
+
12
+ Given the following example record for 'Source Data':
13
+ | Yesterday | Tomorrow | OneDayAgo | SevenDaysAgo | ThreeDaysFromNow |
14
+ | *Yesterday* | *Tomorrow* | *1 day ago* | *7 days ago* | *3 days from now* |
15
+ Then the target field 'Yesterday' is the date 1 day ago
16
+ And the target field 'Tomorrow' is the date 1 day from now
17
+ And the target field 'OneDayAgo' is the date 1 day ago
18
+ And the target field 'SevenDaysAgo' is the date 7 days ago
19
+ And the target field 'ThreeDaysFromNow' is the date 3 days from now
20
+
21
+ Scenario: Handling date formulas in the example data with month units.
22
+
23
+ Given the following example record for 'Source Data':
24
+ | LastMonth | NextMonth | OneMonthAgo | SevenMonthsAgo | ThreeMonthsFromNow |
25
+ | *Last Month* | *Next Month* | *1 month ago* | *7 months ago* | *3 months from now* |
26
+ Then the target field 'LastMonth' is the date 1 month ago
27
+ And the target field 'NextMonth' is the date 1 month from now
28
+ And the target field 'OneMonthAgo' is the date 1 month ago
29
+ And the target field 'SevenMonthsAgo' is the date 7 months ago
30
+ And the target field 'ThreeMonthsFromNow' is the date 3 months from now
31
+
32
+ Scenario: Handling date formulas in the example data with year units.
33
+
34
+ Given the following example record for 'Source Data':
35
+ | LastYear | NextYear | OneYearAgo | SevenYearsAgo | ThreeYearsFromNow |
36
+ | *Last Year* | *Next Year* | *1 year ago* | *7 years ago* | *3 years from now* |
37
+ Then the target field 'LastYear' is the date 1 year ago
38
+ And the target field 'NextYear' is the date 1 year from now
39
+ And the target field 'OneYearAgo' is the date 1 year ago
40
+ And the target field 'SevenYearsAgo' is the date 7 years ago
41
+ And the target field 'ThreeYearsFromNow' is the date 3 years from now
42
+
43
+ Scenario: Handling date formulas in the example data with week units.
44
+
45
+ Given the following example record for 'Source Data':
46
+ | LastWeek | NextWeek | OneWeekAgo | SevenWeeksAgo | ThreeWeeksFromNow |
47
+ | *Last Week* | *Next Week* | *1 week ago* | *7 weeks ago* | *3 weeks from now* |
48
+ Then the target field 'LastWeek' is the date 1 week ago
49
+ And the target field 'NextWeek' is the date 1 week from now
50
+ And the target field 'OneWeekAgo' is the date 1 week ago
51
+ And the target field 'SevenWeeksAgo' is the date 7 weeks ago
52
+ And the target field 'ThreeWeeksFromNow' is the date 3 weeks from now
53
+
54
+ Scenario: Handling date formulas when set explicitly in the source.
55
+
56
+ Given the following example record for 'Source Data':
57
+ | SomeDate |
58
+ | 2015-10-22 |
59
+ And the source field 'SomeDate' is set to the value "*Yesterday*"
60
+ Then the target field 'SomeDate' is the date 1 day ago
61
+
62
+ When the source field 'SomeDate' is set to the value "*2 months from now*"
63
+ Then the target field 'SomeDate' is the date 2 months from now
64
+ Then the target field 'SomeDate' is populated with "*2 months from now*"