remi 0.0.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.bundle/config +2 -0
- data/.gitignore +3 -2
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +123 -0
- data/LICENSE.txt +21 -0
- data/README.md +94 -3
- data/bin/remi +8 -0
- data/doc/install-rbenv-os_x.md +47 -0
- data/lib/remi.rb +56 -9
- data/lib/remi/cli.rb +56 -0
- data/lib/remi/core/daru.rb +28 -0
- data/lib/remi/core/refinements.rb +21 -0
- data/lib/remi/core/string.rb +8 -0
- data/lib/remi/cucumber.rb +7 -0
- data/lib/remi/cucumber/business_rules.rb +504 -0
- data/lib/remi/cucumber/data_source.rb +63 -0
- data/lib/remi/data_source.rb +13 -0
- data/lib/remi/data_source/csv_file.rb +79 -0
- data/lib/remi/data_source/data_frame.rb +10 -0
- data/lib/remi/data_source/postgres.rb +58 -0
- data/lib/remi/data_source/salesforce.rb +78 -0
- data/lib/remi/data_subject.rb +25 -0
- data/lib/remi/data_target.rb +15 -0
- data/lib/remi/data_target/csv_file.rb +49 -0
- data/lib/remi/data_target/data_frame.rb +14 -0
- data/lib/remi/data_target/salesforce.rb +49 -0
- data/lib/remi/extractor/sftp_file.rb +84 -0
- data/lib/remi/field_symbolizers.rb +17 -0
- data/lib/remi/job.rb +200 -0
- data/lib/remi/lookup/regex_sieve.rb +55 -0
- data/lib/remi/project/features/examples.feature +24 -0
- data/lib/remi/project/features/formulas.feature +64 -0
- data/lib/remi/project/features/sample_job.feature +304 -0
- data/lib/remi/project/features/step_definitions/remi_step.rb +310 -0
- data/lib/remi/project/features/support/env.rb +10 -0
- data/lib/remi/project/features/support/env_app.rb +3 -0
- data/lib/remi/project/features/transforms/date_diff.feature +50 -0
- data/lib/remi/project/features/transforms/parse_date.feature +34 -0
- data/lib/remi/project/features/transforms/prefix.feature +15 -0
- data/lib/remi/project/jobs/all_jobs_shared.rb +25 -0
- data/lib/remi/project/jobs/copy_source_job.rb +12 -0
- data/lib/remi/project/jobs/sample_job.rb +164 -0
- data/lib/remi/project/jobs/transforms/date_diff_job.rb +17 -0
- data/lib/remi/project/jobs/transforms/parse_date_job.rb +18 -0
- data/lib/remi/project/jobs/transforms/prefix_job.rb +16 -0
- data/lib/remi/project/jobs/transforms/transform_jobs.rb +3 -0
- data/lib/remi/settings.rb +39 -0
- data/lib/remi/sf_bulk_helper.rb +265 -0
- data/lib/remi/source_to_target_map.rb +93 -0
- data/lib/remi/transform.rb +137 -0
- data/lib/remi/version.rb +3 -0
- data/remi.gemspec +25 -7
- data/workbooks/sample_workbook.ipynb +56 -0
- data/workbooks/workbook_helper.rb +1 -0
- metadata +234 -17
- data/lib/noodling.rb +0 -163
- data/test/test_NAME.rb +0 -19
@@ -0,0 +1,84 @@
|
|
1
|
+
module Remi
|
2
|
+
module Extractor
|
3
|
+
|
4
|
+
class LocalFile
|
5
|
+
def initialize(path)
|
6
|
+
@path = path
|
7
|
+
end
|
8
|
+
|
9
|
+
def extract
|
10
|
+
@path
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
class SftpFile
|
15
|
+
|
16
|
+
class FileNotFoundError < StandardError; end
|
17
|
+
|
18
|
+
def initialize(credentials:, remote_file:, remote_folder: '', local_folder: Settings.work_dir, port: '22', most_recent_only: false, logger: Remi::Settings.logger)
|
19
|
+
@credentials = credentials
|
20
|
+
@remote_file = remote_file
|
21
|
+
@remote_folder = remote_folder
|
22
|
+
@local_folder = local_folder
|
23
|
+
@port = port
|
24
|
+
@most_recent_only = most_recent_only
|
25
|
+
@logger = logger
|
26
|
+
end
|
27
|
+
|
28
|
+
attr_reader :logger
|
29
|
+
|
30
|
+
def extract
|
31
|
+
to_download = @most_recent_only ? Array(most_recent_entry(matching_entries)) : matching_entries
|
32
|
+
raise FileNotFoundError, "File not found: #{@remote_file}" if to_download.size == 0
|
33
|
+
download(to_download)
|
34
|
+
end
|
35
|
+
|
36
|
+
def all_entries(remote_folder = @remote_folder)
|
37
|
+
@all_entries ||= connection { |sftp| sftp.dir.entries(File.join("/", remote_folder)) }
|
38
|
+
end
|
39
|
+
|
40
|
+
def matching_entries(match_name = @remote_file)
|
41
|
+
all_entries.select { |e| match_name.match e.name }
|
42
|
+
end
|
43
|
+
|
44
|
+
def most_recent_entry(entries = matching_entries)
|
45
|
+
entries.sort_by { |e| e.attributes.createtime }.reverse!.first
|
46
|
+
end
|
47
|
+
|
48
|
+
def download(to_download = matching_entries, local_folder: @local_folder, ntry: 3)
|
49
|
+
connection do |sftp|
|
50
|
+
to_download.map do |entry|
|
51
|
+
local_file = File.join(local_folder, entry.name)
|
52
|
+
@logger.info "Downloading #{entry.name} to #{local_file}"
|
53
|
+
retry_download(ntry) { sftp.download!(entry.name, local_file) }
|
54
|
+
local_file
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
|
60
|
+
private
|
61
|
+
|
62
|
+
def connection(&block)
|
63
|
+
result = nil
|
64
|
+
Net::SFTP.start(@credentials[:host], @credentials[:username], password: @credentials[:password], port: @port) do |sftp|
|
65
|
+
result = yield sftp
|
66
|
+
end
|
67
|
+
result
|
68
|
+
end
|
69
|
+
|
70
|
+
def retry_download(ntry=2, &block)
|
71
|
+
1.upto(ntry).each do |itry|
|
72
|
+
begin
|
73
|
+
block.call
|
74
|
+
rescue RuntimeError => err
|
75
|
+
raise err unless itry < ntry
|
76
|
+
@logger.error "Download failed with error: #{err.message}"
|
77
|
+
@logger.error "Retry attempt #{itry}/#{ntry-1}"
|
78
|
+
sleep(1)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Remi
|
2
|
+
module FieldSymbolizers
|
3
|
+
def self.[](symbolizer)
|
4
|
+
symbolizers[symbolizer]
|
5
|
+
end
|
6
|
+
|
7
|
+
def self.symbolizers
|
8
|
+
@symbolizers ||= {
|
9
|
+
standard: CSV::HeaderConverters[:symbol],
|
10
|
+
salesforce: lambda { |f|
|
11
|
+
f.encode(CSV::ConverterEncoding).strip.gsub(/\s+/, "_").
|
12
|
+
gsub(/\W+/, "").to_sym
|
13
|
+
}
|
14
|
+
}
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
data/lib/remi/job.rb
ADDED
@@ -0,0 +1,200 @@
|
|
1
|
+
module Remi
|
2
|
+
module Job
|
3
|
+
module JobClassMethods
|
4
|
+
attr_accessor :params
|
5
|
+
attr_accessor :lookups
|
6
|
+
attr_accessor :sources
|
7
|
+
attr_accessor :targets
|
8
|
+
attr_accessor :transforms
|
9
|
+
|
10
|
+
def define_param(key, value)
|
11
|
+
@params ||= {}
|
12
|
+
@params[key] = value
|
13
|
+
end
|
14
|
+
|
15
|
+
def define_lookup(name, type_class, options)
|
16
|
+
@lookups ||= []
|
17
|
+
@lookups << name
|
18
|
+
|
19
|
+
define_method(name) do
|
20
|
+
iv_name = instance_variable_get("@#{name}")
|
21
|
+
return iv_name if iv_name
|
22
|
+
|
23
|
+
if type_class == Hash
|
24
|
+
lookup = options
|
25
|
+
else
|
26
|
+
lookup = type_class.new(options)
|
27
|
+
end
|
28
|
+
instance_variable_set("@#{name}", lookup)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def define_source(name, type_class, **options)
|
33
|
+
@sources ||= []
|
34
|
+
@sources << name
|
35
|
+
|
36
|
+
define_method(name) do
|
37
|
+
iv_name = instance_variable_get("@#{name}")
|
38
|
+
return iv_name if iv_name
|
39
|
+
|
40
|
+
source = type_class.new(options)
|
41
|
+
instance_variable_set("@#{name}", source)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def define_target(name, type_class, **options)
|
46
|
+
@targets ||= []
|
47
|
+
@targets << name
|
48
|
+
|
49
|
+
define_method(name) do
|
50
|
+
iv_name = instance_variable_get("@#{name}")
|
51
|
+
return iv_name if iv_name
|
52
|
+
|
53
|
+
target = type_class.new(options)
|
54
|
+
instance_variable_set("@#{name}", target)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def define_transform(name, sources: [], targets: [], &block)
|
59
|
+
@transforms ||= {}
|
60
|
+
@transforms[name] = { sources: Array(sources), targets: Array(targets) }
|
61
|
+
|
62
|
+
define_method(name) do
|
63
|
+
instance_eval { @logger.info "Running transformation #{__method__}" }
|
64
|
+
instance_eval(&block)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def params
|
69
|
+
@params || {}
|
70
|
+
end
|
71
|
+
|
72
|
+
def lookups
|
73
|
+
@lookups || []
|
74
|
+
end
|
75
|
+
|
76
|
+
def sources
|
77
|
+
@sources || []
|
78
|
+
end
|
79
|
+
|
80
|
+
def targets
|
81
|
+
@targets || []
|
82
|
+
end
|
83
|
+
|
84
|
+
def transforms
|
85
|
+
@transforms || {}
|
86
|
+
end
|
87
|
+
|
88
|
+
|
89
|
+
def work_dir
|
90
|
+
Settings.work_dir
|
91
|
+
end
|
92
|
+
|
93
|
+
def self.extended(receiver)
|
94
|
+
end
|
95
|
+
|
96
|
+
def included(receiver)
|
97
|
+
receiver.extend(JobClassMethods)
|
98
|
+
receiver.params = self.params.merge(receiver.params)
|
99
|
+
receiver.lookups = self.lookups + receiver.lookups
|
100
|
+
receiver.sources = self.sources + receiver.sources
|
101
|
+
receiver.targets = self.targets + receiver.targets
|
102
|
+
receiver.transforms = self.transforms.merge(receiver.transforms)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def self.included(receiver)
|
107
|
+
receiver.extend(JobClassMethods)
|
108
|
+
end
|
109
|
+
|
110
|
+
|
111
|
+
def params
|
112
|
+
self.class.params
|
113
|
+
end
|
114
|
+
|
115
|
+
def lookups
|
116
|
+
self.class.lookups
|
117
|
+
end
|
118
|
+
|
119
|
+
def sources
|
120
|
+
self.class.sources
|
121
|
+
end
|
122
|
+
|
123
|
+
def targets
|
124
|
+
self.class.targets
|
125
|
+
end
|
126
|
+
|
127
|
+
def transforms
|
128
|
+
self.class.transforms
|
129
|
+
end
|
130
|
+
|
131
|
+
|
132
|
+
|
133
|
+
def initialize(delete_work_dir: true, logger: Settings.logger)
|
134
|
+
@delete_work_dir = delete_work_dir
|
135
|
+
@logger = logger
|
136
|
+
create_work_dir
|
137
|
+
end
|
138
|
+
|
139
|
+
def work_dir
|
140
|
+
self.class.work_dir
|
141
|
+
end
|
142
|
+
|
143
|
+
def finalize
|
144
|
+
delete_work_dir
|
145
|
+
end
|
146
|
+
|
147
|
+
def delete_work_dir
|
148
|
+
if @delete_work_dir && (work_dir.match /^#{Dir.tmpdir}/)
|
149
|
+
@logger.info "Deleting temporary directory #{work_dir}"
|
150
|
+
FileUtils.rm_r work_dir
|
151
|
+
else
|
152
|
+
@logger.debug "Not going to delete working directory #{work_dir}"
|
153
|
+
nil
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
def create_work_dir
|
158
|
+
@logger.info "Creating working directory #{work_dir}"
|
159
|
+
FileUtils.mkdir_p work_dir
|
160
|
+
end
|
161
|
+
|
162
|
+
# Public: Runs any transforms that use the sources and targets selected. If
|
163
|
+
# source and target is not specified, then all transforms will be run.
|
164
|
+
# If only the source is specified, then all transforms that use any of the
|
165
|
+
# sources will be run. Same for specified transforms.
|
166
|
+
#
|
167
|
+
# sources - Array of source names
|
168
|
+
# targets - Array of target names
|
169
|
+
#
|
170
|
+
# Returns an array containing the result of each transform.
|
171
|
+
def run_transforms_using(sources: nil, targets: nil)
|
172
|
+
transforms.map do |t, st|
|
173
|
+
selected_sources = (st[:sources] & Array(sources || st[:sources])).size > 0
|
174
|
+
selected_targets = (st[:targets] & Array(targets || st[:targets])).size > 0
|
175
|
+
self.send(t) if selected_sources && selected_targets
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
def run_all_transforms
|
180
|
+
transforms.map { |t, st| self.send(t) }
|
181
|
+
end
|
182
|
+
|
183
|
+
def load_all_targets
|
184
|
+
targets.each do |target|
|
185
|
+
@logger.info "Loading target #{target}"
|
186
|
+
self.send(target).tap { |t| t.respond_to?(:load) ? t.load : nil }
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
# Public: Runs all transforms defined in the job.
|
191
|
+
#
|
192
|
+
# Returns the job instance.
|
193
|
+
def run
|
194
|
+
# Do all of the stuff here
|
195
|
+
run_all_transforms
|
196
|
+
load_all_targets
|
197
|
+
self
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module Remi
|
2
|
+
module Lookup
|
3
|
+
|
4
|
+
# Public: RegexSieve class. The RegexSieve functions in a manner similar
|
5
|
+
# a hash. The regex sieve is initialized with a hash where the keys are
|
6
|
+
# regular expressions and the values can be any valid Ruby object. The order
|
7
|
+
# of the keys matters. When the regex sieve is accessed using the array
|
8
|
+
# accessor [], it returns the first matching record. By default, only
|
9
|
+
# the values are returned, but the key and all matching capture groups
|
10
|
+
# can optionally be returned.
|
11
|
+
#
|
12
|
+
# Examples:
|
13
|
+
#
|
14
|
+
# r = RegexSieve.new({
|
15
|
+
# /something/ => 'Something',
|
16
|
+
# /something else/ => 'This will never get matched because the one above will match first',
|
17
|
+
# /cool$/ => 'Cool',
|
18
|
+
# /cool beans/ => 'Really Cool'
|
19
|
+
# })
|
20
|
+
#
|
21
|
+
# r['something else'] # => 'Something'
|
22
|
+
# r['cool beans'] # => 'Really Cool'
|
23
|
+
class RegexSieve
|
24
|
+
def initialize(sieve)
|
25
|
+
@sieve = sieve
|
26
|
+
end
|
27
|
+
|
28
|
+
# Public: Array accessor for Regex Sieve.
|
29
|
+
#
|
30
|
+
# key - The string that will be matched to the keys in the sieve.
|
31
|
+
# opt - By default, only the values in the hash used to initialize the sieve
|
32
|
+
# will be returned. However, if you want to return the keys or the
|
33
|
+
# capture groups then use :regex, :match, or both, respectively.
|
34
|
+
#
|
35
|
+
# Example:
|
36
|
+
# r['something'] # => 'Something
|
37
|
+
# r['something', :regex] # => { value: 'Something', regex: /something/ }
|
38
|
+
# r['sometinng', :match, :regex] # => { value: 'Something', regex: /something/, match: #<MatchData "something"> }
|
39
|
+
def [](key, *opt)
|
40
|
+
opt = opt | [:value]
|
41
|
+
|
42
|
+
regex_match = nil
|
43
|
+
found = @sieve.find do |regex, v|
|
44
|
+
regex_match = regex.match(key)
|
45
|
+
end
|
46
|
+
|
47
|
+
return nil if found.nil?
|
48
|
+
full_result = { value: found[1], regex: found[0], match: regex_match }
|
49
|
+
|
50
|
+
full_result.select! { |k, v| opt.include?(k) }
|
51
|
+
full_result.size > 1 ? full_result : full_result.values.first
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
Feature: This tests the creation of example records.
|
2
|
+
|
3
|
+
Background:
|
4
|
+
Given the job is 'Copy Source'
|
5
|
+
And the job source 'Source Data'
|
6
|
+
And the job target 'Target Data'
|
7
|
+
And the source 'Source Data'
|
8
|
+
And the target 'Target Data'
|
9
|
+
|
10
|
+
Scenario: Simple example record loads in the source and is directly copied to target.
|
11
|
+
|
12
|
+
Given the following example record for 'Source Data':
|
13
|
+
| MyField | MyOtherField |
|
14
|
+
| Remilspot | Niblet |
|
15
|
+
Then the target field 'MyField' is set to the value "Remilspot"
|
16
|
+
And the target field 'MyOtherField' is set to the value "Niblet"
|
17
|
+
|
18
|
+
Scenario: Handling date formulas in the example data with day units.
|
19
|
+
|
20
|
+
Given the following example record for 'Source Data':
|
21
|
+
| Yesterday | ThreeDaysFromNow |
|
22
|
+
| *Yesterday* | *3 days from now* |
|
23
|
+
Then the target field 'Yesterday' is the date 1 day ago
|
24
|
+
And the target field 'ThreeDaysFromNow' is the date 3 days from now
|
@@ -0,0 +1,64 @@
|
|
1
|
+
Feature: This tests the creation of example records.
|
2
|
+
|
3
|
+
Background:
|
4
|
+
Given the job is 'Copy Source'
|
5
|
+
And the job source 'Source Data'
|
6
|
+
And the job target 'Target Data'
|
7
|
+
And the source 'Source Data'
|
8
|
+
And the target 'Target Data'
|
9
|
+
|
10
|
+
Scenario: Handling date formulas in the example data with day units.
|
11
|
+
|
12
|
+
Given the following example record for 'Source Data':
|
13
|
+
| Yesterday | Tomorrow | OneDayAgo | SevenDaysAgo | ThreeDaysFromNow |
|
14
|
+
| *Yesterday* | *Tomorrow* | *1 day ago* | *7 days ago* | *3 days from now* |
|
15
|
+
Then the target field 'Yesterday' is the date 1 day ago
|
16
|
+
And the target field 'Tomorrow' is the date 1 day from now
|
17
|
+
And the target field 'OneDayAgo' is the date 1 day ago
|
18
|
+
And the target field 'SevenDaysAgo' is the date 7 days ago
|
19
|
+
And the target field 'ThreeDaysFromNow' is the date 3 days from now
|
20
|
+
|
21
|
+
Scenario: Handling date formulas in the example data with month units.
|
22
|
+
|
23
|
+
Given the following example record for 'Source Data':
|
24
|
+
| LastMonth | NextMonth | OneMonthAgo | SevenMonthsAgo | ThreeMonthsFromNow |
|
25
|
+
| *Last Month* | *Next Month* | *1 month ago* | *7 months ago* | *3 months from now* |
|
26
|
+
Then the target field 'LastMonth' is the date 1 month ago
|
27
|
+
And the target field 'NextMonth' is the date 1 month from now
|
28
|
+
And the target field 'OneMonthAgo' is the date 1 month ago
|
29
|
+
And the target field 'SevenMonthsAgo' is the date 7 months ago
|
30
|
+
And the target field 'ThreeMonthsFromNow' is the date 3 months from now
|
31
|
+
|
32
|
+
Scenario: Handling date formulas in the example data with year units.
|
33
|
+
|
34
|
+
Given the following example record for 'Source Data':
|
35
|
+
| LastYear | NextYear | OneYearAgo | SevenYearsAgo | ThreeYearsFromNow |
|
36
|
+
| *Last Year* | *Next Year* | *1 year ago* | *7 years ago* | *3 years from now* |
|
37
|
+
Then the target field 'LastYear' is the date 1 year ago
|
38
|
+
And the target field 'NextYear' is the date 1 year from now
|
39
|
+
And the target field 'OneYearAgo' is the date 1 year ago
|
40
|
+
And the target field 'SevenYearsAgo' is the date 7 years ago
|
41
|
+
And the target field 'ThreeYearsFromNow' is the date 3 years from now
|
42
|
+
|
43
|
+
Scenario: Handling date formulas in the example data with week units.
|
44
|
+
|
45
|
+
Given the following example record for 'Source Data':
|
46
|
+
| LastWeek | NextWeek | OneWeekAgo | SevenWeeksAgo | ThreeWeeksFromNow |
|
47
|
+
| *Last Week* | *Next Week* | *1 week ago* | *7 weeks ago* | *3 weeks from now* |
|
48
|
+
Then the target field 'LastWeek' is the date 1 week ago
|
49
|
+
And the target field 'NextWeek' is the date 1 week from now
|
50
|
+
And the target field 'OneWeekAgo' is the date 1 week ago
|
51
|
+
And the target field 'SevenWeeksAgo' is the date 7 weeks ago
|
52
|
+
And the target field 'ThreeWeeksFromNow' is the date 3 weeks from now
|
53
|
+
|
54
|
+
Scenario: Handling date formulas when set explicitly in the source.
|
55
|
+
|
56
|
+
Given the following example record for 'Source Data':
|
57
|
+
| SomeDate |
|
58
|
+
| 2015-10-22 |
|
59
|
+
And the source field 'SomeDate' is set to the value "*Yesterday*"
|
60
|
+
Then the target field 'SomeDate' is the date 1 day ago
|
61
|
+
|
62
|
+
When the source field 'SomeDate' is set to the value "*2 months from now*"
|
63
|
+
Then the target field 'SomeDate' is the date 2 months from now
|
64
|
+
Then the target field 'SomeDate' is populated with "*2 months from now*"
|