remi 0.0.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/.bundle/config +2 -0
  3. data/.gitignore +3 -2
  4. data/.rspec +2 -0
  5. data/.ruby-version +1 -0
  6. data/Gemfile +4 -0
  7. data/Gemfile.lock +123 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +94 -3
  10. data/bin/remi +8 -0
  11. data/doc/install-rbenv-os_x.md +47 -0
  12. data/lib/remi.rb +56 -9
  13. data/lib/remi/cli.rb +56 -0
  14. data/lib/remi/core/daru.rb +28 -0
  15. data/lib/remi/core/refinements.rb +21 -0
  16. data/lib/remi/core/string.rb +8 -0
  17. data/lib/remi/cucumber.rb +7 -0
  18. data/lib/remi/cucumber/business_rules.rb +504 -0
  19. data/lib/remi/cucumber/data_source.rb +63 -0
  20. data/lib/remi/data_source.rb +13 -0
  21. data/lib/remi/data_source/csv_file.rb +79 -0
  22. data/lib/remi/data_source/data_frame.rb +10 -0
  23. data/lib/remi/data_source/postgres.rb +58 -0
  24. data/lib/remi/data_source/salesforce.rb +78 -0
  25. data/lib/remi/data_subject.rb +25 -0
  26. data/lib/remi/data_target.rb +15 -0
  27. data/lib/remi/data_target/csv_file.rb +49 -0
  28. data/lib/remi/data_target/data_frame.rb +14 -0
  29. data/lib/remi/data_target/salesforce.rb +49 -0
  30. data/lib/remi/extractor/sftp_file.rb +84 -0
  31. data/lib/remi/field_symbolizers.rb +17 -0
  32. data/lib/remi/job.rb +200 -0
  33. data/lib/remi/lookup/regex_sieve.rb +55 -0
  34. data/lib/remi/project/features/examples.feature +24 -0
  35. data/lib/remi/project/features/formulas.feature +64 -0
  36. data/lib/remi/project/features/sample_job.feature +304 -0
  37. data/lib/remi/project/features/step_definitions/remi_step.rb +310 -0
  38. data/lib/remi/project/features/support/env.rb +10 -0
  39. data/lib/remi/project/features/support/env_app.rb +3 -0
  40. data/lib/remi/project/features/transforms/date_diff.feature +50 -0
  41. data/lib/remi/project/features/transforms/parse_date.feature +34 -0
  42. data/lib/remi/project/features/transforms/prefix.feature +15 -0
  43. data/lib/remi/project/jobs/all_jobs_shared.rb +25 -0
  44. data/lib/remi/project/jobs/copy_source_job.rb +12 -0
  45. data/lib/remi/project/jobs/sample_job.rb +164 -0
  46. data/lib/remi/project/jobs/transforms/date_diff_job.rb +17 -0
  47. data/lib/remi/project/jobs/transforms/parse_date_job.rb +18 -0
  48. data/lib/remi/project/jobs/transforms/prefix_job.rb +16 -0
  49. data/lib/remi/project/jobs/transforms/transform_jobs.rb +3 -0
  50. data/lib/remi/settings.rb +39 -0
  51. data/lib/remi/sf_bulk_helper.rb +265 -0
  52. data/lib/remi/source_to_target_map.rb +93 -0
  53. data/lib/remi/transform.rb +137 -0
  54. data/lib/remi/version.rb +3 -0
  55. data/remi.gemspec +25 -7
  56. data/workbooks/sample_workbook.ipynb +56 -0
  57. data/workbooks/workbook_helper.rb +1 -0
  58. metadata +234 -17
  59. data/lib/noodling.rb +0 -163
  60. data/test/test_NAME.rb +0 -19
@@ -0,0 +1,84 @@
1
+ module Remi
2
+ module Extractor
3
+
4
+ class LocalFile
5
+ def initialize(path)
6
+ @path = path
7
+ end
8
+
9
+ def extract
10
+ @path
11
+ end
12
+ end
13
+
14
+ class SftpFile
15
+
16
+ class FileNotFoundError < StandardError; end
17
+
18
+ def initialize(credentials:, remote_file:, remote_folder: '', local_folder: Settings.work_dir, port: '22', most_recent_only: false, logger: Remi::Settings.logger)
19
+ @credentials = credentials
20
+ @remote_file = remote_file
21
+ @remote_folder = remote_folder
22
+ @local_folder = local_folder
23
+ @port = port
24
+ @most_recent_only = most_recent_only
25
+ @logger = logger
26
+ end
27
+
28
+ attr_reader :logger
29
+
30
+ def extract
31
+ to_download = @most_recent_only ? Array(most_recent_entry(matching_entries)) : matching_entries
32
+ raise FileNotFoundError, "File not found: #{@remote_file}" if to_download.size == 0
33
+ download(to_download)
34
+ end
35
+
36
+ def all_entries(remote_folder = @remote_folder)
37
+ @all_entries ||= connection { |sftp| sftp.dir.entries(File.join("/", remote_folder)) }
38
+ end
39
+
40
+ def matching_entries(match_name = @remote_file)
41
+ all_entries.select { |e| match_name.match e.name }
42
+ end
43
+
44
+ def most_recent_entry(entries = matching_entries)
45
+ entries.sort_by { |e| e.attributes.createtime }.reverse!.first
46
+ end
47
+
48
+ def download(to_download = matching_entries, local_folder: @local_folder, ntry: 3)
49
+ connection do |sftp|
50
+ to_download.map do |entry|
51
+ local_file = File.join(local_folder, entry.name)
52
+ @logger.info "Downloading #{entry.name} to #{local_file}"
53
+ retry_download(ntry) { sftp.download!(entry.name, local_file) }
54
+ local_file
55
+ end
56
+ end
57
+ end
58
+
59
+
60
+ private
61
+
62
+ def connection(&block)
63
+ result = nil
64
+ Net::SFTP.start(@credentials[:host], @credentials[:username], password: @credentials[:password], port: @port) do |sftp|
65
+ result = yield sftp
66
+ end
67
+ result
68
+ end
69
+
70
+ def retry_download(ntry=2, &block)
71
+ 1.upto(ntry).each do |itry|
72
+ begin
73
+ block.call
74
+ rescue RuntimeError => err
75
+ raise err unless itry < ntry
76
+ @logger.error "Download failed with error: #{err.message}"
77
+ @logger.error "Retry attempt #{itry}/#{ntry-1}"
78
+ sleep(1)
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,17 @@
1
+ module Remi
2
+ module FieldSymbolizers
3
+ def self.[](symbolizer)
4
+ symbolizers[symbolizer]
5
+ end
6
+
7
+ def self.symbolizers
8
+ @symbolizers ||= {
9
+ standard: CSV::HeaderConverters[:symbol],
10
+ salesforce: lambda { |f|
11
+ f.encode(CSV::ConverterEncoding).strip.gsub(/\s+/, "_").
12
+ gsub(/\W+/, "").to_sym
13
+ }
14
+ }
15
+ end
16
+ end
17
+ end
data/lib/remi/job.rb ADDED
@@ -0,0 +1,200 @@
1
+ module Remi
2
+ module Job
3
+ module JobClassMethods
4
+ attr_accessor :params
5
+ attr_accessor :lookups
6
+ attr_accessor :sources
7
+ attr_accessor :targets
8
+ attr_accessor :transforms
9
+
10
+ def define_param(key, value)
11
+ @params ||= {}
12
+ @params[key] = value
13
+ end
14
+
15
+ def define_lookup(name, type_class, options)
16
+ @lookups ||= []
17
+ @lookups << name
18
+
19
+ define_method(name) do
20
+ iv_name = instance_variable_get("@#{name}")
21
+ return iv_name if iv_name
22
+
23
+ if type_class == Hash
24
+ lookup = options
25
+ else
26
+ lookup = type_class.new(options)
27
+ end
28
+ instance_variable_set("@#{name}", lookup)
29
+ end
30
+ end
31
+
32
+ def define_source(name, type_class, **options)
33
+ @sources ||= []
34
+ @sources << name
35
+
36
+ define_method(name) do
37
+ iv_name = instance_variable_get("@#{name}")
38
+ return iv_name if iv_name
39
+
40
+ source = type_class.new(options)
41
+ instance_variable_set("@#{name}", source)
42
+ end
43
+ end
44
+
45
+ def define_target(name, type_class, **options)
46
+ @targets ||= []
47
+ @targets << name
48
+
49
+ define_method(name) do
50
+ iv_name = instance_variable_get("@#{name}")
51
+ return iv_name if iv_name
52
+
53
+ target = type_class.new(options)
54
+ instance_variable_set("@#{name}", target)
55
+ end
56
+ end
57
+
58
+ def define_transform(name, sources: [], targets: [], &block)
59
+ @transforms ||= {}
60
+ @transforms[name] = { sources: Array(sources), targets: Array(targets) }
61
+
62
+ define_method(name) do
63
+ instance_eval { @logger.info "Running transformation #{__method__}" }
64
+ instance_eval(&block)
65
+ end
66
+ end
67
+
68
+ def params
69
+ @params || {}
70
+ end
71
+
72
+ def lookups
73
+ @lookups || []
74
+ end
75
+
76
+ def sources
77
+ @sources || []
78
+ end
79
+
80
+ def targets
81
+ @targets || []
82
+ end
83
+
84
+ def transforms
85
+ @transforms || {}
86
+ end
87
+
88
+
89
+ def work_dir
90
+ Settings.work_dir
91
+ end
92
+
93
+ def self.extended(receiver)
94
+ end
95
+
96
+ def included(receiver)
97
+ receiver.extend(JobClassMethods)
98
+ receiver.params = self.params.merge(receiver.params)
99
+ receiver.lookups = self.lookups + receiver.lookups
100
+ receiver.sources = self.sources + receiver.sources
101
+ receiver.targets = self.targets + receiver.targets
102
+ receiver.transforms = self.transforms.merge(receiver.transforms)
103
+ end
104
+ end
105
+
106
+ def self.included(receiver)
107
+ receiver.extend(JobClassMethods)
108
+ end
109
+
110
+
111
+ def params
112
+ self.class.params
113
+ end
114
+
115
+ def lookups
116
+ self.class.lookups
117
+ end
118
+
119
+ def sources
120
+ self.class.sources
121
+ end
122
+
123
+ def targets
124
+ self.class.targets
125
+ end
126
+
127
+ def transforms
128
+ self.class.transforms
129
+ end
130
+
131
+
132
+
133
+ def initialize(delete_work_dir: true, logger: Settings.logger)
134
+ @delete_work_dir = delete_work_dir
135
+ @logger = logger
136
+ create_work_dir
137
+ end
138
+
139
+ def work_dir
140
+ self.class.work_dir
141
+ end
142
+
143
+ def finalize
144
+ delete_work_dir
145
+ end
146
+
147
+ def delete_work_dir
148
+ if @delete_work_dir && (work_dir.match /^#{Dir.tmpdir}/)
149
+ @logger.info "Deleting temporary directory #{work_dir}"
150
+ FileUtils.rm_r work_dir
151
+ else
152
+ @logger.debug "Not going to delete working directory #{work_dir}"
153
+ nil
154
+ end
155
+ end
156
+
157
+ def create_work_dir
158
+ @logger.info "Creating working directory #{work_dir}"
159
+ FileUtils.mkdir_p work_dir
160
+ end
161
+
162
+ # Public: Runs any transforms that use the sources and targets selected. If
163
+ # source and target is not specified, then all transforms will be run.
164
+ # If only the source is specified, then all transforms that use any of the
165
+ # sources will be run. Same for specified transforms.
166
+ #
167
+ # sources - Array of source names
168
+ # targets - Array of target names
169
+ #
170
+ # Returns an array containing the result of each transform.
171
+ def run_transforms_using(sources: nil, targets: nil)
172
+ transforms.map do |t, st|
173
+ selected_sources = (st[:sources] & Array(sources || st[:sources])).size > 0
174
+ selected_targets = (st[:targets] & Array(targets || st[:targets])).size > 0
175
+ self.send(t) if selected_sources && selected_targets
176
+ end
177
+ end
178
+
179
+ def run_all_transforms
180
+ transforms.map { |t, st| self.send(t) }
181
+ end
182
+
183
+ def load_all_targets
184
+ targets.each do |target|
185
+ @logger.info "Loading target #{target}"
186
+ self.send(target).tap { |t| t.respond_to?(:load) ? t.load : nil }
187
+ end
188
+ end
189
+
190
+ # Public: Runs all transforms defined in the job.
191
+ #
192
+ # Returns the job instance.
193
+ def run
194
+ # Do all of the stuff here
195
+ run_all_transforms
196
+ load_all_targets
197
+ self
198
+ end
199
+ end
200
+ end
@@ -0,0 +1,55 @@
1
+ module Remi
2
+ module Lookup
3
+
4
+ # Public: RegexSieve class. The RegexSieve functions in a manner similar
5
+ # a hash. The regex sieve is initialized with a hash where the keys are
6
+ # regular expressions and the values can be any valid Ruby object. The order
7
+ # of the keys matters. When the regex sieve is accessed using the array
8
+ # accessor [], it returns the first matching record. By default, only
9
+ # the values are returned, but the key and all matching capture groups
10
+ # can optionally be returned.
11
+ #
12
+ # Examples:
13
+ #
14
+ # r = RegexSieve.new({
15
+ # /something/ => 'Something',
16
+ # /something else/ => 'This will never get matched because the one above will match first',
17
+ # /cool$/ => 'Cool',
18
+ # /cool beans/ => 'Really Cool'
19
+ # })
20
+ #
21
+ # r['something else'] # => 'Something'
22
+ # r['cool beans'] # => 'Really Cool'
23
+ class RegexSieve
24
+ def initialize(sieve)
25
+ @sieve = sieve
26
+ end
27
+
28
+ # Public: Array accessor for Regex Sieve.
29
+ #
30
+ # key - The string that will be matched to the keys in the sieve.
31
+ # opt - By default, only the values in the hash used to initialize the sieve
32
+ # will be returned. However, if you want to return the keys or the
33
+ # capture groups then use :regex, :match, or both, respectively.
34
+ #
35
+ # Example:
36
+ # r['something'] # => 'Something
37
+ # r['something', :regex] # => { value: 'Something', regex: /something/ }
38
+ # r['sometinng', :match, :regex] # => { value: 'Something', regex: /something/, match: #<MatchData "something"> }
39
+ def [](key, *opt)
40
+ opt = opt | [:value]
41
+
42
+ regex_match = nil
43
+ found = @sieve.find do |regex, v|
44
+ regex_match = regex.match(key)
45
+ end
46
+
47
+ return nil if found.nil?
48
+ full_result = { value: found[1], regex: found[0], match: regex_match }
49
+
50
+ full_result.select! { |k, v| opt.include?(k) }
51
+ full_result.size > 1 ? full_result : full_result.values.first
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,24 @@
1
+ Feature: This tests the creation of example records.
2
+
3
+ Background:
4
+ Given the job is 'Copy Source'
5
+ And the job source 'Source Data'
6
+ And the job target 'Target Data'
7
+ And the source 'Source Data'
8
+ And the target 'Target Data'
9
+
10
+ Scenario: Simple example record loads in the source and is directly copied to target.
11
+
12
+ Given the following example record for 'Source Data':
13
+ | MyField | MyOtherField |
14
+ | Remilspot | Niblet |
15
+ Then the target field 'MyField' is set to the value "Remilspot"
16
+ And the target field 'MyOtherField' is set to the value "Niblet"
17
+
18
+ Scenario: Handling date formulas in the example data with day units.
19
+
20
+ Given the following example record for 'Source Data':
21
+ | Yesterday | ThreeDaysFromNow |
22
+ | *Yesterday* | *3 days from now* |
23
+ Then the target field 'Yesterday' is the date 1 day ago
24
+ And the target field 'ThreeDaysFromNow' is the date 3 days from now
@@ -0,0 +1,64 @@
1
+ Feature: This tests the creation of example records.
2
+
3
+ Background:
4
+ Given the job is 'Copy Source'
5
+ And the job source 'Source Data'
6
+ And the job target 'Target Data'
7
+ And the source 'Source Data'
8
+ And the target 'Target Data'
9
+
10
+ Scenario: Handling date formulas in the example data with day units.
11
+
12
+ Given the following example record for 'Source Data':
13
+ | Yesterday | Tomorrow | OneDayAgo | SevenDaysAgo | ThreeDaysFromNow |
14
+ | *Yesterday* | *Tomorrow* | *1 day ago* | *7 days ago* | *3 days from now* |
15
+ Then the target field 'Yesterday' is the date 1 day ago
16
+ And the target field 'Tomorrow' is the date 1 day from now
17
+ And the target field 'OneDayAgo' is the date 1 day ago
18
+ And the target field 'SevenDaysAgo' is the date 7 days ago
19
+ And the target field 'ThreeDaysFromNow' is the date 3 days from now
20
+
21
+ Scenario: Handling date formulas in the example data with month units.
22
+
23
+ Given the following example record for 'Source Data':
24
+ | LastMonth | NextMonth | OneMonthAgo | SevenMonthsAgo | ThreeMonthsFromNow |
25
+ | *Last Month* | *Next Month* | *1 month ago* | *7 months ago* | *3 months from now* |
26
+ Then the target field 'LastMonth' is the date 1 month ago
27
+ And the target field 'NextMonth' is the date 1 month from now
28
+ And the target field 'OneMonthAgo' is the date 1 month ago
29
+ And the target field 'SevenMonthsAgo' is the date 7 months ago
30
+ And the target field 'ThreeMonthsFromNow' is the date 3 months from now
31
+
32
+ Scenario: Handling date formulas in the example data with year units.
33
+
34
+ Given the following example record for 'Source Data':
35
+ | LastYear | NextYear | OneYearAgo | SevenYearsAgo | ThreeYearsFromNow |
36
+ | *Last Year* | *Next Year* | *1 year ago* | *7 years ago* | *3 years from now* |
37
+ Then the target field 'LastYear' is the date 1 year ago
38
+ And the target field 'NextYear' is the date 1 year from now
39
+ And the target field 'OneYearAgo' is the date 1 year ago
40
+ And the target field 'SevenYearsAgo' is the date 7 years ago
41
+ And the target field 'ThreeYearsFromNow' is the date 3 years from now
42
+
43
+ Scenario: Handling date formulas in the example data with week units.
44
+
45
+ Given the following example record for 'Source Data':
46
+ | LastWeek | NextWeek | OneWeekAgo | SevenWeeksAgo | ThreeWeeksFromNow |
47
+ | *Last Week* | *Next Week* | *1 week ago* | *7 weeks ago* | *3 weeks from now* |
48
+ Then the target field 'LastWeek' is the date 1 week ago
49
+ And the target field 'NextWeek' is the date 1 week from now
50
+ And the target field 'OneWeekAgo' is the date 1 week ago
51
+ And the target field 'SevenWeeksAgo' is the date 7 weeks ago
52
+ And the target field 'ThreeWeeksFromNow' is the date 3 weeks from now
53
+
54
+ Scenario: Handling date formulas when set explicitly in the source.
55
+
56
+ Given the following example record for 'Source Data':
57
+ | SomeDate |
58
+ | 2015-10-22 |
59
+ And the source field 'SomeDate' is set to the value "*Yesterday*"
60
+ Then the target field 'SomeDate' is the date 1 day ago
61
+
62
+ When the source field 'SomeDate' is set to the value "*2 months from now*"
63
+ Then the target field 'SomeDate' is the date 2 months from now
64
+ Then the target field 'SomeDate' is populated with "*2 months from now*"