remi 0.0.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.bundle/config +2 -0
- data/.gitignore +3 -2
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +123 -0
- data/LICENSE.txt +21 -0
- data/README.md +94 -3
- data/bin/remi +8 -0
- data/doc/install-rbenv-os_x.md +47 -0
- data/lib/remi.rb +56 -9
- data/lib/remi/cli.rb +56 -0
- data/lib/remi/core/daru.rb +28 -0
- data/lib/remi/core/refinements.rb +21 -0
- data/lib/remi/core/string.rb +8 -0
- data/lib/remi/cucumber.rb +7 -0
- data/lib/remi/cucumber/business_rules.rb +504 -0
- data/lib/remi/cucumber/data_source.rb +63 -0
- data/lib/remi/data_source.rb +13 -0
- data/lib/remi/data_source/csv_file.rb +79 -0
- data/lib/remi/data_source/data_frame.rb +10 -0
- data/lib/remi/data_source/postgres.rb +58 -0
- data/lib/remi/data_source/salesforce.rb +78 -0
- data/lib/remi/data_subject.rb +25 -0
- data/lib/remi/data_target.rb +15 -0
- data/lib/remi/data_target/csv_file.rb +49 -0
- data/lib/remi/data_target/data_frame.rb +14 -0
- data/lib/remi/data_target/salesforce.rb +49 -0
- data/lib/remi/extractor/sftp_file.rb +84 -0
- data/lib/remi/field_symbolizers.rb +17 -0
- data/lib/remi/job.rb +200 -0
- data/lib/remi/lookup/regex_sieve.rb +55 -0
- data/lib/remi/project/features/examples.feature +24 -0
- data/lib/remi/project/features/formulas.feature +64 -0
- data/lib/remi/project/features/sample_job.feature +304 -0
- data/lib/remi/project/features/step_definitions/remi_step.rb +310 -0
- data/lib/remi/project/features/support/env.rb +10 -0
- data/lib/remi/project/features/support/env_app.rb +3 -0
- data/lib/remi/project/features/transforms/date_diff.feature +50 -0
- data/lib/remi/project/features/transforms/parse_date.feature +34 -0
- data/lib/remi/project/features/transforms/prefix.feature +15 -0
- data/lib/remi/project/jobs/all_jobs_shared.rb +25 -0
- data/lib/remi/project/jobs/copy_source_job.rb +12 -0
- data/lib/remi/project/jobs/sample_job.rb +164 -0
- data/lib/remi/project/jobs/transforms/date_diff_job.rb +17 -0
- data/lib/remi/project/jobs/transforms/parse_date_job.rb +18 -0
- data/lib/remi/project/jobs/transforms/prefix_job.rb +16 -0
- data/lib/remi/project/jobs/transforms/transform_jobs.rb +3 -0
- data/lib/remi/settings.rb +39 -0
- data/lib/remi/sf_bulk_helper.rb +265 -0
- data/lib/remi/source_to_target_map.rb +93 -0
- data/lib/remi/transform.rb +137 -0
- data/lib/remi/version.rb +3 -0
- data/remi.gemspec +25 -7
- data/workbooks/sample_workbook.ipynb +56 -0
- data/workbooks/workbook_helper.rb +1 -0
- metadata +234 -17
- data/lib/noodling.rb +0 -163
- data/test/test_NAME.rb +0 -19
@@ -0,0 +1,84 @@
|
|
1
|
+
module Remi
|
2
|
+
module Extractor
|
3
|
+
|
4
|
+
class LocalFile
|
5
|
+
def initialize(path)
|
6
|
+
@path = path
|
7
|
+
end
|
8
|
+
|
9
|
+
def extract
|
10
|
+
@path
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
class SftpFile
|
15
|
+
|
16
|
+
class FileNotFoundError < StandardError; end
|
17
|
+
|
18
|
+
def initialize(credentials:, remote_file:, remote_folder: '', local_folder: Settings.work_dir, port: '22', most_recent_only: false, logger: Remi::Settings.logger)
|
19
|
+
@credentials = credentials
|
20
|
+
@remote_file = remote_file
|
21
|
+
@remote_folder = remote_folder
|
22
|
+
@local_folder = local_folder
|
23
|
+
@port = port
|
24
|
+
@most_recent_only = most_recent_only
|
25
|
+
@logger = logger
|
26
|
+
end
|
27
|
+
|
28
|
+
attr_reader :logger
|
29
|
+
|
30
|
+
def extract
|
31
|
+
to_download = @most_recent_only ? Array(most_recent_entry(matching_entries)) : matching_entries
|
32
|
+
raise FileNotFoundError, "File not found: #{@remote_file}" if to_download.size == 0
|
33
|
+
download(to_download)
|
34
|
+
end
|
35
|
+
|
36
|
+
def all_entries(remote_folder = @remote_folder)
|
37
|
+
@all_entries ||= connection { |sftp| sftp.dir.entries(File.join("/", remote_folder)) }
|
38
|
+
end
|
39
|
+
|
40
|
+
def matching_entries(match_name = @remote_file)
|
41
|
+
all_entries.select { |e| match_name.match e.name }
|
42
|
+
end
|
43
|
+
|
44
|
+
def most_recent_entry(entries = matching_entries)
|
45
|
+
entries.sort_by { |e| e.attributes.createtime }.reverse!.first
|
46
|
+
end
|
47
|
+
|
48
|
+
def download(to_download = matching_entries, local_folder: @local_folder, ntry: 3)
|
49
|
+
connection do |sftp|
|
50
|
+
to_download.map do |entry|
|
51
|
+
local_file = File.join(local_folder, entry.name)
|
52
|
+
@logger.info "Downloading #{entry.name} to #{local_file}"
|
53
|
+
retry_download(ntry) { sftp.download!(entry.name, local_file) }
|
54
|
+
local_file
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
|
60
|
+
private
|
61
|
+
|
62
|
+
def connection(&block)
|
63
|
+
result = nil
|
64
|
+
Net::SFTP.start(@credentials[:host], @credentials[:username], password: @credentials[:password], port: @port) do |sftp|
|
65
|
+
result = yield sftp
|
66
|
+
end
|
67
|
+
result
|
68
|
+
end
|
69
|
+
|
70
|
+
def retry_download(ntry=2, &block)
|
71
|
+
1.upto(ntry).each do |itry|
|
72
|
+
begin
|
73
|
+
block.call
|
74
|
+
rescue RuntimeError => err
|
75
|
+
raise err unless itry < ntry
|
76
|
+
@logger.error "Download failed with error: #{err.message}"
|
77
|
+
@logger.error "Retry attempt #{itry}/#{ntry-1}"
|
78
|
+
sleep(1)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Remi
|
2
|
+
module FieldSymbolizers
|
3
|
+
def self.[](symbolizer)
|
4
|
+
symbolizers[symbolizer]
|
5
|
+
end
|
6
|
+
|
7
|
+
def self.symbolizers
|
8
|
+
@symbolizers ||= {
|
9
|
+
standard: CSV::HeaderConverters[:symbol],
|
10
|
+
salesforce: lambda { |f|
|
11
|
+
f.encode(CSV::ConverterEncoding).strip.gsub(/\s+/, "_").
|
12
|
+
gsub(/\W+/, "").to_sym
|
13
|
+
}
|
14
|
+
}
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
data/lib/remi/job.rb
ADDED
@@ -0,0 +1,200 @@
|
|
1
|
+
module Remi
|
2
|
+
module Job
|
3
|
+
module JobClassMethods
|
4
|
+
attr_accessor :params
|
5
|
+
attr_accessor :lookups
|
6
|
+
attr_accessor :sources
|
7
|
+
attr_accessor :targets
|
8
|
+
attr_accessor :transforms
|
9
|
+
|
10
|
+
def define_param(key, value)
|
11
|
+
@params ||= {}
|
12
|
+
@params[key] = value
|
13
|
+
end
|
14
|
+
|
15
|
+
def define_lookup(name, type_class, options)
|
16
|
+
@lookups ||= []
|
17
|
+
@lookups << name
|
18
|
+
|
19
|
+
define_method(name) do
|
20
|
+
iv_name = instance_variable_get("@#{name}")
|
21
|
+
return iv_name if iv_name
|
22
|
+
|
23
|
+
if type_class == Hash
|
24
|
+
lookup = options
|
25
|
+
else
|
26
|
+
lookup = type_class.new(options)
|
27
|
+
end
|
28
|
+
instance_variable_set("@#{name}", lookup)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def define_source(name, type_class, **options)
|
33
|
+
@sources ||= []
|
34
|
+
@sources << name
|
35
|
+
|
36
|
+
define_method(name) do
|
37
|
+
iv_name = instance_variable_get("@#{name}")
|
38
|
+
return iv_name if iv_name
|
39
|
+
|
40
|
+
source = type_class.new(options)
|
41
|
+
instance_variable_set("@#{name}", source)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def define_target(name, type_class, **options)
|
46
|
+
@targets ||= []
|
47
|
+
@targets << name
|
48
|
+
|
49
|
+
define_method(name) do
|
50
|
+
iv_name = instance_variable_get("@#{name}")
|
51
|
+
return iv_name if iv_name
|
52
|
+
|
53
|
+
target = type_class.new(options)
|
54
|
+
instance_variable_set("@#{name}", target)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def define_transform(name, sources: [], targets: [], &block)
|
59
|
+
@transforms ||= {}
|
60
|
+
@transforms[name] = { sources: Array(sources), targets: Array(targets) }
|
61
|
+
|
62
|
+
define_method(name) do
|
63
|
+
instance_eval { @logger.info "Running transformation #{__method__}" }
|
64
|
+
instance_eval(&block)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def params
|
69
|
+
@params || {}
|
70
|
+
end
|
71
|
+
|
72
|
+
def lookups
|
73
|
+
@lookups || []
|
74
|
+
end
|
75
|
+
|
76
|
+
def sources
|
77
|
+
@sources || []
|
78
|
+
end
|
79
|
+
|
80
|
+
def targets
|
81
|
+
@targets || []
|
82
|
+
end
|
83
|
+
|
84
|
+
def transforms
|
85
|
+
@transforms || {}
|
86
|
+
end
|
87
|
+
|
88
|
+
|
89
|
+
def work_dir
|
90
|
+
Settings.work_dir
|
91
|
+
end
|
92
|
+
|
93
|
+
def self.extended(receiver)
|
94
|
+
end
|
95
|
+
|
96
|
+
def included(receiver)
|
97
|
+
receiver.extend(JobClassMethods)
|
98
|
+
receiver.params = self.params.merge(receiver.params)
|
99
|
+
receiver.lookups = self.lookups + receiver.lookups
|
100
|
+
receiver.sources = self.sources + receiver.sources
|
101
|
+
receiver.targets = self.targets + receiver.targets
|
102
|
+
receiver.transforms = self.transforms.merge(receiver.transforms)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def self.included(receiver)
|
107
|
+
receiver.extend(JobClassMethods)
|
108
|
+
end
|
109
|
+
|
110
|
+
|
111
|
+
def params
|
112
|
+
self.class.params
|
113
|
+
end
|
114
|
+
|
115
|
+
def lookups
|
116
|
+
self.class.lookups
|
117
|
+
end
|
118
|
+
|
119
|
+
def sources
|
120
|
+
self.class.sources
|
121
|
+
end
|
122
|
+
|
123
|
+
def targets
|
124
|
+
self.class.targets
|
125
|
+
end
|
126
|
+
|
127
|
+
def transforms
|
128
|
+
self.class.transforms
|
129
|
+
end
|
130
|
+
|
131
|
+
|
132
|
+
|
133
|
+
def initialize(delete_work_dir: true, logger: Settings.logger)
|
134
|
+
@delete_work_dir = delete_work_dir
|
135
|
+
@logger = logger
|
136
|
+
create_work_dir
|
137
|
+
end
|
138
|
+
|
139
|
+
def work_dir
|
140
|
+
self.class.work_dir
|
141
|
+
end
|
142
|
+
|
143
|
+
def finalize
|
144
|
+
delete_work_dir
|
145
|
+
end
|
146
|
+
|
147
|
+
def delete_work_dir
|
148
|
+
if @delete_work_dir && (work_dir.match /^#{Dir.tmpdir}/)
|
149
|
+
@logger.info "Deleting temporary directory #{work_dir}"
|
150
|
+
FileUtils.rm_r work_dir
|
151
|
+
else
|
152
|
+
@logger.debug "Not going to delete working directory #{work_dir}"
|
153
|
+
nil
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
def create_work_dir
|
158
|
+
@logger.info "Creating working directory #{work_dir}"
|
159
|
+
FileUtils.mkdir_p work_dir
|
160
|
+
end
|
161
|
+
|
162
|
+
# Public: Runs any transforms that use the sources and targets selected. If
|
163
|
+
# source and target is not specified, then all transforms will be run.
|
164
|
+
# If only the source is specified, then all transforms that use any of the
|
165
|
+
# sources will be run. Same for specified transforms.
|
166
|
+
#
|
167
|
+
# sources - Array of source names
|
168
|
+
# targets - Array of target names
|
169
|
+
#
|
170
|
+
# Returns an array containing the result of each transform.
|
171
|
+
def run_transforms_using(sources: nil, targets: nil)
|
172
|
+
transforms.map do |t, st|
|
173
|
+
selected_sources = (st[:sources] & Array(sources || st[:sources])).size > 0
|
174
|
+
selected_targets = (st[:targets] & Array(targets || st[:targets])).size > 0
|
175
|
+
self.send(t) if selected_sources && selected_targets
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
def run_all_transforms
|
180
|
+
transforms.map { |t, st| self.send(t) }
|
181
|
+
end
|
182
|
+
|
183
|
+
def load_all_targets
|
184
|
+
targets.each do |target|
|
185
|
+
@logger.info "Loading target #{target}"
|
186
|
+
self.send(target).tap { |t| t.respond_to?(:load) ? t.load : nil }
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
# Public: Runs all transforms defined in the job.
|
191
|
+
#
|
192
|
+
# Returns the job instance.
|
193
|
+
def run
|
194
|
+
# Do all of the stuff here
|
195
|
+
run_all_transforms
|
196
|
+
load_all_targets
|
197
|
+
self
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module Remi
|
2
|
+
module Lookup
|
3
|
+
|
4
|
+
# Public: RegexSieve class. The RegexSieve functions in a manner similar
|
5
|
+
# a hash. The regex sieve is initialized with a hash where the keys are
|
6
|
+
# regular expressions and the values can be any valid Ruby object. The order
|
7
|
+
# of the keys matters. When the regex sieve is accessed using the array
|
8
|
+
# accessor [], it returns the first matching record. By default, only
|
9
|
+
# the values are returned, but the key and all matching capture groups
|
10
|
+
# can optionally be returned.
|
11
|
+
#
|
12
|
+
# Examples:
|
13
|
+
#
|
14
|
+
# r = RegexSieve.new({
|
15
|
+
# /something/ => 'Something',
|
16
|
+
# /something else/ => 'This will never get matched because the one above will match first',
|
17
|
+
# /cool$/ => 'Cool',
|
18
|
+
# /cool beans/ => 'Really Cool'
|
19
|
+
# })
|
20
|
+
#
|
21
|
+
# r['something else'] # => 'Something'
|
22
|
+
# r['cool beans'] # => 'Really Cool'
|
23
|
+
class RegexSieve
|
24
|
+
def initialize(sieve)
|
25
|
+
@sieve = sieve
|
26
|
+
end
|
27
|
+
|
28
|
+
# Public: Array accessor for Regex Sieve.
|
29
|
+
#
|
30
|
+
# key - The string that will be matched to the keys in the sieve.
|
31
|
+
# opt - By default, only the values in the hash used to initialize the sieve
|
32
|
+
# will be returned. However, if you want to return the keys or the
|
33
|
+
# capture groups then use :regex, :match, or both, respectively.
|
34
|
+
#
|
35
|
+
# Example:
|
36
|
+
# r['something'] # => 'Something
|
37
|
+
# r['something', :regex] # => { value: 'Something', regex: /something/ }
|
38
|
+
# r['sometinng', :match, :regex] # => { value: 'Something', regex: /something/, match: #<MatchData "something"> }
|
39
|
+
def [](key, *opt)
|
40
|
+
opt = opt | [:value]
|
41
|
+
|
42
|
+
regex_match = nil
|
43
|
+
found = @sieve.find do |regex, v|
|
44
|
+
regex_match = regex.match(key)
|
45
|
+
end
|
46
|
+
|
47
|
+
return nil if found.nil?
|
48
|
+
full_result = { value: found[1], regex: found[0], match: regex_match }
|
49
|
+
|
50
|
+
full_result.select! { |k, v| opt.include?(k) }
|
51
|
+
full_result.size > 1 ? full_result : full_result.values.first
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
Feature: This tests the creation of example records.
|
2
|
+
|
3
|
+
Background:
|
4
|
+
Given the job is 'Copy Source'
|
5
|
+
And the job source 'Source Data'
|
6
|
+
And the job target 'Target Data'
|
7
|
+
And the source 'Source Data'
|
8
|
+
And the target 'Target Data'
|
9
|
+
|
10
|
+
Scenario: Simple example record loads in the source and is directly copied to target.
|
11
|
+
|
12
|
+
Given the following example record for 'Source Data':
|
13
|
+
| MyField | MyOtherField |
|
14
|
+
| Remilspot | Niblet |
|
15
|
+
Then the target field 'MyField' is set to the value "Remilspot"
|
16
|
+
And the target field 'MyOtherField' is set to the value "Niblet"
|
17
|
+
|
18
|
+
Scenario: Handling date formulas in the example data with day units.
|
19
|
+
|
20
|
+
Given the following example record for 'Source Data':
|
21
|
+
| Yesterday | ThreeDaysFromNow |
|
22
|
+
| *Yesterday* | *3 days from now* |
|
23
|
+
Then the target field 'Yesterday' is the date 1 day ago
|
24
|
+
And the target field 'ThreeDaysFromNow' is the date 3 days from now
|
@@ -0,0 +1,64 @@
|
|
1
|
+
Feature: This tests the creation of example records.
|
2
|
+
|
3
|
+
Background:
|
4
|
+
Given the job is 'Copy Source'
|
5
|
+
And the job source 'Source Data'
|
6
|
+
And the job target 'Target Data'
|
7
|
+
And the source 'Source Data'
|
8
|
+
And the target 'Target Data'
|
9
|
+
|
10
|
+
Scenario: Handling date formulas in the example data with day units.
|
11
|
+
|
12
|
+
Given the following example record for 'Source Data':
|
13
|
+
| Yesterday | Tomorrow | OneDayAgo | SevenDaysAgo | ThreeDaysFromNow |
|
14
|
+
| *Yesterday* | *Tomorrow* | *1 day ago* | *7 days ago* | *3 days from now* |
|
15
|
+
Then the target field 'Yesterday' is the date 1 day ago
|
16
|
+
And the target field 'Tomorrow' is the date 1 day from now
|
17
|
+
And the target field 'OneDayAgo' is the date 1 day ago
|
18
|
+
And the target field 'SevenDaysAgo' is the date 7 days ago
|
19
|
+
And the target field 'ThreeDaysFromNow' is the date 3 days from now
|
20
|
+
|
21
|
+
Scenario: Handling date formulas in the example data with month units.
|
22
|
+
|
23
|
+
Given the following example record for 'Source Data':
|
24
|
+
| LastMonth | NextMonth | OneMonthAgo | SevenMonthsAgo | ThreeMonthsFromNow |
|
25
|
+
| *Last Month* | *Next Month* | *1 month ago* | *7 months ago* | *3 months from now* |
|
26
|
+
Then the target field 'LastMonth' is the date 1 month ago
|
27
|
+
And the target field 'NextMonth' is the date 1 month from now
|
28
|
+
And the target field 'OneMonthAgo' is the date 1 month ago
|
29
|
+
And the target field 'SevenMonthsAgo' is the date 7 months ago
|
30
|
+
And the target field 'ThreeMonthsFromNow' is the date 3 months from now
|
31
|
+
|
32
|
+
Scenario: Handling date formulas in the example data with year units.
|
33
|
+
|
34
|
+
Given the following example record for 'Source Data':
|
35
|
+
| LastYear | NextYear | OneYearAgo | SevenYearsAgo | ThreeYearsFromNow |
|
36
|
+
| *Last Year* | *Next Year* | *1 year ago* | *7 years ago* | *3 years from now* |
|
37
|
+
Then the target field 'LastYear' is the date 1 year ago
|
38
|
+
And the target field 'NextYear' is the date 1 year from now
|
39
|
+
And the target field 'OneYearAgo' is the date 1 year ago
|
40
|
+
And the target field 'SevenYearsAgo' is the date 7 years ago
|
41
|
+
And the target field 'ThreeYearsFromNow' is the date 3 years from now
|
42
|
+
|
43
|
+
Scenario: Handling date formulas in the example data with week units.
|
44
|
+
|
45
|
+
Given the following example record for 'Source Data':
|
46
|
+
| LastWeek | NextWeek | OneWeekAgo | SevenWeeksAgo | ThreeWeeksFromNow |
|
47
|
+
| *Last Week* | *Next Week* | *1 week ago* | *7 weeks ago* | *3 weeks from now* |
|
48
|
+
Then the target field 'LastWeek' is the date 1 week ago
|
49
|
+
And the target field 'NextWeek' is the date 1 week from now
|
50
|
+
And the target field 'OneWeekAgo' is the date 1 week ago
|
51
|
+
And the target field 'SevenWeeksAgo' is the date 7 weeks ago
|
52
|
+
And the target field 'ThreeWeeksFromNow' is the date 3 weeks from now
|
53
|
+
|
54
|
+
Scenario: Handling date formulas when set explicitly in the source.
|
55
|
+
|
56
|
+
Given the following example record for 'Source Data':
|
57
|
+
| SomeDate |
|
58
|
+
| 2015-10-22 |
|
59
|
+
And the source field 'SomeDate' is set to the value "*Yesterday*"
|
60
|
+
Then the target field 'SomeDate' is the date 1 day ago
|
61
|
+
|
62
|
+
When the source field 'SomeDate' is set to the value "*2 months from now*"
|
63
|
+
Then the target field 'SomeDate' is the date 2 months from now
|
64
|
+
Then the target field 'SomeDate' is populated with "*2 months from now*"
|