cure 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +132 -0
- data/CODE_OF_CONDUCT.md +84 -0
- data/Dockerfile +10 -0
- data/Gemfile +12 -0
- data/Gemfile.lock +71 -0
- data/LICENSE.txt +21 -0
- data/README.md +129 -0
- data/Rakefile +12 -0
- data/exe/cure +42 -0
- data/lib/cure/config.rb +56 -0
- data/lib/cure/csv_helpers.rb +6 -0
- data/lib/cure/export/exporter.rb +49 -0
- data/lib/cure/file_helpers.rb +38 -0
- data/lib/cure/generator/base.rb +148 -0
- data/lib/cure/log.rb +29 -0
- data/lib/cure/main.rb +63 -0
- data/lib/cure/object_helpers.rb +27 -0
- data/lib/cure/strategy/base.rb +223 -0
- data/lib/cure/transformation/candidate.rb +86 -0
- data/lib/cure/transformation/transform.rb +105 -0
- data/lib/cure/version.rb +5 -0
- data/lib/cure.rb +26 -0
- data/templates/aws_cur_template.json +143 -0
- data/templates/example_template.json +38 -0
- metadata +111 -0
@@ -0,0 +1,148 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Cure
|
4
|
+
module Generator
|
5
|
+
class Base
|
6
|
+
# @return [Hash]
|
7
|
+
attr_accessor :options
|
8
|
+
|
9
|
+
def initialize(options={})
|
10
|
+
@options = options
|
11
|
+
end
|
12
|
+
|
13
|
+
# @param [Object/Nil] source_value
|
14
|
+
# @return [String]
|
15
|
+
def generate(source_value=nil)
|
16
|
+
translated = _generate(source_value)
|
17
|
+
translated = "#{prefix}#{translated}" if prefix
|
18
|
+
translated = "#{translated}#{suffix}" if suffix
|
19
|
+
translated
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
# @param [Object/Nil] _source_value
|
25
|
+
# @return [String]
|
26
|
+
def _generate(_source_value)
|
27
|
+
raise NotImplementedError, "#{self.class} has not implemented method '#{__method__}'"
|
28
|
+
end
|
29
|
+
|
30
|
+
def prefix(default=nil)
|
31
|
+
extract_property("prefix", default)
|
32
|
+
end
|
33
|
+
|
34
|
+
def suffix(default=nil)
|
35
|
+
extract_property("suffix", default)
|
36
|
+
end
|
37
|
+
|
38
|
+
def length(default=nil)
|
39
|
+
extract_property("length", default)
|
40
|
+
end
|
41
|
+
|
42
|
+
def property_name(default=nil)
|
43
|
+
extract_property("name", default)
|
44
|
+
end
|
45
|
+
|
46
|
+
def extract_property(property, default_val)
|
47
|
+
@options.fetch(property, default_val)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
class HexGenerator < Base
|
52
|
+
private
|
53
|
+
|
54
|
+
# @param [Object] _source_value
|
55
|
+
def _generate(_source_value)
|
56
|
+
1.upto(length(rand(0..9))).map { rand(0..15).to_s(16) }.join("")
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
class NumberGenerator < Base
|
61
|
+
private
|
62
|
+
|
63
|
+
# @param [Object] _source_value
|
64
|
+
def _generate(_source_value)
|
65
|
+
1.upto(length(rand(0..9))).map { rand(1..9) }.join("").to_i
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
class RedactGenerator < Base
|
70
|
+
private
|
71
|
+
|
72
|
+
# @param [Object] source_value
|
73
|
+
def _generate(source_value)
|
74
|
+
1.upto(length(source_value&.length || 5)).map { "X" }.join("")
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
class PlaceholderGenerator < Base
|
79
|
+
include Configuration
|
80
|
+
|
81
|
+
private
|
82
|
+
|
83
|
+
# @param [Object] _source_value
|
84
|
+
def _generate(_source_value)
|
85
|
+
value = config.placeholders[property_name]
|
86
|
+
value || raise("Missing placeholder value. Available candidates: [#{config.placeholders.join(", ")}]")
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
require "securerandom"
|
91
|
+
|
92
|
+
class GuidGenerator < Base
|
93
|
+
private
|
94
|
+
|
95
|
+
# @param [Object] _source_value
|
96
|
+
def _generate(_source_value)
|
97
|
+
SecureRandom.uuid.to_s
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
require "faker"
|
102
|
+
|
103
|
+
class FakerGenerator < Base
|
104
|
+
private
|
105
|
+
|
106
|
+
# @param [Object] _source_value
|
107
|
+
def _generate(_source_value)
|
108
|
+
mod_code = extract_property("module", nil)
|
109
|
+
mod = Faker.const_get(mod_code)
|
110
|
+
|
111
|
+
raise "No Faker module found for [#{mod_code}]" unless mod
|
112
|
+
|
113
|
+
meth_code = extract_property("method", nil)&.to_sym
|
114
|
+
raise "No Faker module found for [#{meth_code}]" unless mod.methods.include?(meth_code)
|
115
|
+
|
116
|
+
mod.send(meth_code)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
class CharacterGenerator < Base
|
121
|
+
|
122
|
+
def initialize(options=nil)
|
123
|
+
super(options)
|
124
|
+
end
|
125
|
+
|
126
|
+
private
|
127
|
+
|
128
|
+
# @param [Object] source_value
|
129
|
+
def _generate(source_value)
|
130
|
+
arr = build_options.map(&:to_a).flatten
|
131
|
+
(0...length(source_value&.length || 5)).map { arr[rand(arr.length)] }.join
|
132
|
+
end
|
133
|
+
|
134
|
+
def build_options
|
135
|
+
return [("a".."z"), ("A".."Z"), (0..9)] unless @options.key?("types")
|
136
|
+
|
137
|
+
type_array = @options["types"]
|
138
|
+
|
139
|
+
arr = []
|
140
|
+
arr << ("a".."z") if type_array.include? "lowercase"
|
141
|
+
arr << ("A".."Z") if type_array.include? "uppercase"
|
142
|
+
arr << (0..9) if type_array.include? "number"
|
143
|
+
arr << ("!".."+") if type_array.include? "symbol"
|
144
|
+
arr
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
data/lib/cure/log.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "cure"
|
4
|
+
|
5
|
+
module Cure
|
6
|
+
module Log
|
7
|
+
# @param [String] message
|
8
|
+
def log_debug(message)
|
9
|
+
Cure.logger.debug(message)
|
10
|
+
end
|
11
|
+
|
12
|
+
# @param [String] message
|
13
|
+
def log_info(message)
|
14
|
+
Cure.logger.info(message)
|
15
|
+
end
|
16
|
+
|
17
|
+
# @param [String] message
|
18
|
+
def log_warn(message)
|
19
|
+
Cure.logger.warn(message)
|
20
|
+
end
|
21
|
+
|
22
|
+
# @param [String] message
|
23
|
+
# @param [Exception/Nil] ex
|
24
|
+
def log_error(message, ex=nil)
|
25
|
+
Cure.logger.error(message)
|
26
|
+
Cure.logger.error(ex.backtrace.join("\n")) unless ex.nil?
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
data/lib/cure/main.rb
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "cure"
|
4
|
+
require "json"
|
5
|
+
require "cure/transformation/candidate"
|
6
|
+
require "cure/transformation/transform"
|
7
|
+
require "cure/export/exporter"
|
8
|
+
|
9
|
+
module Cure
|
10
|
+
|
11
|
+
class Main
|
12
|
+
include Configuration
|
13
|
+
include FileHelpers
|
14
|
+
|
15
|
+
# @return [Cure::Main]
|
16
|
+
def self.init(template_file, csv_file, output_dir)
|
17
|
+
# Run all init stuff here.
|
18
|
+
main = Main.new
|
19
|
+
main.setup(template_file, csv_file, output_dir)
|
20
|
+
|
21
|
+
main
|
22
|
+
end
|
23
|
+
|
24
|
+
# @return [Cure::Transformation::Transform]
|
25
|
+
attr_accessor :transformer
|
26
|
+
|
27
|
+
# @return [Boolean]
|
28
|
+
attr_reader :is_initialised
|
29
|
+
|
30
|
+
def initialize
|
31
|
+
@is_initialised = false
|
32
|
+
end
|
33
|
+
|
34
|
+
def run
|
35
|
+
raise "Not init" unless @transformer
|
36
|
+
|
37
|
+
ctx = build_ctx
|
38
|
+
export(ctx)
|
39
|
+
end
|
40
|
+
|
41
|
+
# @return [Cure::Transform::TransformContext]
|
42
|
+
def build_ctx
|
43
|
+
@transformer.extract_from_file(config.source_file_location)
|
44
|
+
end
|
45
|
+
|
46
|
+
def setup(template_file, csv_file, output_dir)
|
47
|
+
config = create_config(csv_file, JSON.parse(read_file(template_file)), output_dir)
|
48
|
+
register_config(config)
|
49
|
+
|
50
|
+
candidates = config.template["candidates"].map { |c| Cure::Transformation::Candidate.new.from_json(c) }
|
51
|
+
|
52
|
+
@transformer = Cure::Transformation::Transform.new(candidates)
|
53
|
+
@is_initialised = true
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
# @param [Cure::Transform::TransformContext] ctx
|
59
|
+
def export(ctx)
|
60
|
+
Cure::Export::Exporter.export_ctx(ctx, config.output_dir, "csv_file")
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "cure"
|
4
|
+
require "json"
|
5
|
+
|
6
|
+
module Cure
|
7
|
+
module ObjectHelpers
|
8
|
+
def attributes=(hash)
|
9
|
+
hash.each do |key, value|
|
10
|
+
send("#{key}=", value)
|
11
|
+
rescue NoMethodError
|
12
|
+
Cure.logger.warn("Error deserializing object: No property for #{key}")
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def from_json(json)
|
17
|
+
return from_hash(json) if json.is_a?(Hash) # Just a guard in case serialisation is done
|
18
|
+
|
19
|
+
from_hash(JSON.parse(json))
|
20
|
+
end
|
21
|
+
|
22
|
+
def from_hash(hash)
|
23
|
+
self.attributes = hash
|
24
|
+
self
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,223 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "singleton"
|
4
|
+
|
5
|
+
module Cure
|
6
|
+
module Strategy
|
7
|
+
# Singleton Strategy for storing data across all processes
|
8
|
+
module History
|
9
|
+
# @return [Hash]
|
10
|
+
def history
|
11
|
+
HistoryCache.instance.history_cache
|
12
|
+
end
|
13
|
+
|
14
|
+
# @return [String]
|
15
|
+
def retrieve_history(source_value)
|
16
|
+
history[source_value] unless source_value.nil? || source_value == ""
|
17
|
+
end
|
18
|
+
|
19
|
+
# @param [String] source_value
|
20
|
+
# @param [String] value
|
21
|
+
def store_history(source_value, value)
|
22
|
+
history[source_value] = value unless source_value.nil? || source_value == ""
|
23
|
+
end
|
24
|
+
|
25
|
+
def reset_history
|
26
|
+
HistoryCache.instance.reset
|
27
|
+
end
|
28
|
+
alias clear_history reset_history
|
29
|
+
|
30
|
+
class HistoryCache
|
31
|
+
include Singleton
|
32
|
+
|
33
|
+
attr_reader :history_cache
|
34
|
+
|
35
|
+
def initialize
|
36
|
+
@history_cache = {}
|
37
|
+
end
|
38
|
+
|
39
|
+
def reset
|
40
|
+
@history_cache = {}
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
class Base
|
46
|
+
include History
|
47
|
+
|
48
|
+
# Additional details needed to make substitution.
|
49
|
+
# @return [Hash]
|
50
|
+
attr_accessor :options
|
51
|
+
|
52
|
+
def initialize(options)
|
53
|
+
@options = options
|
54
|
+
end
|
55
|
+
|
56
|
+
# @param [String] source_value
|
57
|
+
# @param [Generator::Base] generator
|
58
|
+
# @return [String]
|
59
|
+
#
|
60
|
+
# This will retrieve the (partial) value, then generate a new replacement.
|
61
|
+
def extract(source_value, generator)
|
62
|
+
extracted_value = _retrieve_value(source_value)
|
63
|
+
|
64
|
+
existing = retrieve_history(extracted_value)
|
65
|
+
return _replace_value(source_value, existing) if existing
|
66
|
+
|
67
|
+
generated_value = generator.generate(source_value).to_s
|
68
|
+
value = _replace_value(source_value, generated_value)
|
69
|
+
|
70
|
+
store_history(extracted_value, generated_value)
|
71
|
+
|
72
|
+
value
|
73
|
+
end
|
74
|
+
|
75
|
+
private
|
76
|
+
|
77
|
+
# @param [String] _source_value
|
78
|
+
def _retrieve_value(_source_value)
|
79
|
+
raise NotImplementedError, "#{self.class} has not implemented method '#{__method__}'"
|
80
|
+
end
|
81
|
+
|
82
|
+
# @param [String] _source_value
|
83
|
+
# @param [String] _generated_value
|
84
|
+
# @return [String]
|
85
|
+
def _replace_value(_source_value, _generated_value)
|
86
|
+
raise NotImplementedError, "#{self.class} has not implemented method '#{__method__}'"
|
87
|
+
end
|
88
|
+
|
89
|
+
def replace_partial_record
|
90
|
+
replace_partial = @options["replace_partial"]
|
91
|
+
return replace_partial || false unless replace_partial.instance_of?(String)
|
92
|
+
|
93
|
+
(replace_partial || "true").to_s == "true"
|
94
|
+
end
|
95
|
+
|
96
|
+
def value?(value)
|
97
|
+
!value.nil? && value != ""
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
class FullStrategy < Base
|
102
|
+
private
|
103
|
+
|
104
|
+
# @param [String] source_value
|
105
|
+
# @return [String]
|
106
|
+
def _retrieve_value(source_value)
|
107
|
+
source_value
|
108
|
+
end
|
109
|
+
|
110
|
+
# @param [String] _source_value
|
111
|
+
# @param [String] generated_value
|
112
|
+
# @return [String]
|
113
|
+
def _replace_value(_source_value, generated_value)
|
114
|
+
generated_value
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
class RegexStrategy < Base
|
119
|
+
# gsub catchment group
|
120
|
+
# @param [String] source_value
|
121
|
+
def _retrieve_value(source_value)
|
122
|
+
m = /#{@options["regex_cg"]}/.match(source_value)
|
123
|
+
return unless m.instance_of?(MatchData) && (!m[1].nil? && m[1] != "")
|
124
|
+
|
125
|
+
m[1]
|
126
|
+
end
|
127
|
+
|
128
|
+
# @param [String] source_value
|
129
|
+
# @param [String] generated_value
|
130
|
+
# @return [String]
|
131
|
+
def _replace_value(source_value, generated_value)
|
132
|
+
m = /#{@options["regex_cg"]}/.match(source_value)
|
133
|
+
return unless m.instance_of?(MatchData) && (!m[1].nil? && m[1] != "")
|
134
|
+
|
135
|
+
generated_value unless replace_partial_record
|
136
|
+
|
137
|
+
source_value.gsub(m[1], generated_value)
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
class MatchStrategy < Base
|
142
|
+
# gsub catchment group
|
143
|
+
# @param [String] source_value
|
144
|
+
def _retrieve_value(source_value)
|
145
|
+
@options["match"] || nil if source_value.include? @options["match"]
|
146
|
+
end
|
147
|
+
|
148
|
+
# @param [String] source_value
|
149
|
+
# @param [String] generated_value
|
150
|
+
# @return [String]
|
151
|
+
def _replace_value(source_value, generated_value)
|
152
|
+
return unless source_value.include? @options["match"]
|
153
|
+
|
154
|
+
source_value.gsub(@options["match"], generated_value)
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
class StartWithStrategy < Base
|
159
|
+
# gsub catchment group
|
160
|
+
# @param [String] source_value
|
161
|
+
def _retrieve_value(source_value)
|
162
|
+
@options["match"] || nil if source_value.start_with? @options["match"]
|
163
|
+
end
|
164
|
+
|
165
|
+
# @param [String] source_value
|
166
|
+
# @param [String] generated_value
|
167
|
+
# @return [String]
|
168
|
+
def _replace_value(source_value, generated_value)
|
169
|
+
return unless source_value.start_with? @options["match"]
|
170
|
+
|
171
|
+
return generated_value unless replace_partial_record
|
172
|
+
|
173
|
+
@options["match"] + generated_value
|
174
|
+
# source_value.chomp(@options["match"]) + generated_value
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
class EndWithStrategy < Base
|
179
|
+
# gsub catchment group
|
180
|
+
# @param [String] source_value
|
181
|
+
def _retrieve_value(source_value)
|
182
|
+
@options["match"] || nil if source_value.end_with? @options["match"]
|
183
|
+
end
|
184
|
+
|
185
|
+
# @param [String] source_value
|
186
|
+
# @param [String] generated_value
|
187
|
+
# @return [String]
|
188
|
+
def _replace_value(source_value, generated_value)
|
189
|
+
return unless source_value.end_with? @options["match"]
|
190
|
+
|
191
|
+
return generated_value unless replace_partial_record
|
192
|
+
|
193
|
+
generated_value + @options["match"]
|
194
|
+
# generated_value + source_value.reverse.chomp(@options["match"].reverse).reverse
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
class SplitStrategy < Base
|
199
|
+
# @param [String] source_value
|
200
|
+
def _retrieve_value(source_value)
|
201
|
+
split_token = @options["token"]
|
202
|
+
|
203
|
+
return unless source_value.include?(split_token)
|
204
|
+
|
205
|
+
result_arr = source_value.split(split_token)
|
206
|
+
result_arr[@options["index"]]
|
207
|
+
end
|
208
|
+
|
209
|
+
# @param [String] source_value
|
210
|
+
# @param [String] generated_value
|
211
|
+
# @return [String]
|
212
|
+
def _replace_value(source_value, generated_value)
|
213
|
+
split_token = @options["token"]
|
214
|
+
|
215
|
+
return unless source_value.include?(split_token)
|
216
|
+
|
217
|
+
result_arr = source_value.split(split_token)
|
218
|
+
result_arr[@options["index"]] = generated_value if value?(result_arr[@options["index"]])
|
219
|
+
result_arr.join(split_token)
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
223
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "cure/object_helpers"
|
4
|
+
require "cure/strategy/base"
|
5
|
+
require "cure/generator/base"
|
6
|
+
require "cure/log"
|
7
|
+
|
8
|
+
module Cure
|
9
|
+
module Transformation
|
10
|
+
# Per row, we will have a candidate for each transformation that needs to be made
|
11
|
+
class Candidate
|
12
|
+
include ObjectHelpers
|
13
|
+
include Log
|
14
|
+
|
15
|
+
# Lookup column name for CSV.
|
16
|
+
# @return [String]
|
17
|
+
attr_accessor :column
|
18
|
+
|
19
|
+
# What sort of data needs to be generated.
|
20
|
+
# @return [List<Translation>]
|
21
|
+
attr_reader :translations
|
22
|
+
|
23
|
+
attr_reader :no_match_translation
|
24
|
+
|
25
|
+
# @param [String] source_value
|
26
|
+
# @return [String]
|
27
|
+
# Transforms the existing value
|
28
|
+
def perform(source_value)
|
29
|
+
# log_debug("Performing substitution for [#{@column}] with [#{@translations.length}] translations")
|
30
|
+
value = source_value
|
31
|
+
|
32
|
+
@translations.each do |translation|
|
33
|
+
temp = translation.extract(value)
|
34
|
+
value = temp if temp
|
35
|
+
end
|
36
|
+
|
37
|
+
if value == source_value
|
38
|
+
log_debug("No translation made for #{value} [#{source_value}]")
|
39
|
+
value = @no_match_translation&.extract(source_value)
|
40
|
+
log_debug("Translated to #{value} from [#{source_value}]")
|
41
|
+
end
|
42
|
+
|
43
|
+
value
|
44
|
+
end
|
45
|
+
|
46
|
+
def translations=(opts)
|
47
|
+
@translations = opts.map { |o| Translation.new.from_hash(o) }
|
48
|
+
end
|
49
|
+
|
50
|
+
def no_match_translation=(opts)
|
51
|
+
@no_match_translation = Translation.new.from_hash(opts)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
class Translation
|
56
|
+
include ObjectHelpers
|
57
|
+
|
58
|
+
# What sort of replacement is done, full/random/lookup/partial.
|
59
|
+
# @return [Strategy::Base]
|
60
|
+
attr_reader :strategy
|
61
|
+
|
62
|
+
# What sort of data needs to be generated.
|
63
|
+
# @return [Generator::Base]
|
64
|
+
attr_reader :generator
|
65
|
+
|
66
|
+
# @param [String] source_value
|
67
|
+
# @return [String]
|
68
|
+
def extract(source_value)
|
69
|
+
@strategy.extract(source_value, @generator)
|
70
|
+
end
|
71
|
+
|
72
|
+
# @param [Hash] opts
|
73
|
+
def strategy=(opts)
|
74
|
+
clazz_name = "Cure::Strategy::#{opts["name"].to_s.capitalize}Strategy"
|
75
|
+
@strategy = Kernel.const_get(clazz_name).new(opts["options"] || {})
|
76
|
+
end
|
77
|
+
|
78
|
+
# @param [Hash] opts
|
79
|
+
def generator=(opts)
|
80
|
+
clazz_name = "Cure::Generator::#{opts["name"].to_s.capitalize}Generator"
|
81
|
+
@generator = Kernel.const_get(clazz_name).new(opts["options"] || {})
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
@@ -0,0 +1,105 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "cure/log"
|
4
|
+
require "cure/file_helpers"
|
5
|
+
require "rcsv"
|
6
|
+
|
7
|
+
module Cure
|
8
|
+
module Transformation
|
9
|
+
class Transform
|
10
|
+
include Log
|
11
|
+
include FileHelpers
|
12
|
+
|
13
|
+
# @return [Array<Candidate>]
|
14
|
+
attr_accessor :candidates
|
15
|
+
|
16
|
+
# @param [Array<Candidate>] candidates
|
17
|
+
def initialize(candidates)
|
18
|
+
@candidates = candidates
|
19
|
+
end
|
20
|
+
|
21
|
+
# @param [String] csv_file_location
|
22
|
+
# @return [TransformContext]
|
23
|
+
def extract_from_file(csv_file_location)
|
24
|
+
file_contents = read_file(csv_file_location)
|
25
|
+
extract_from_contents(file_contents)
|
26
|
+
end
|
27
|
+
|
28
|
+
# @param [String] file_contents
|
29
|
+
# @return [TransformContext]
|
30
|
+
def extract_from_contents(file_contents)
|
31
|
+
ctx = TransformContext.new
|
32
|
+
parse_content(ctx, file_contents, header: :none) do |row|
|
33
|
+
if ctx.row_count == 1
|
34
|
+
ctx.extract_column_headers(row)
|
35
|
+
next
|
36
|
+
end
|
37
|
+
|
38
|
+
row = transform(ctx.column_headers, row)
|
39
|
+
ctx.add_transformed_row(row)
|
40
|
+
end
|
41
|
+
|
42
|
+
ctx
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
# @param [TransformContext] ctx
|
48
|
+
# @param [String] file_contents
|
49
|
+
# @param [Proc] _block
|
50
|
+
# @param [Hash] opts
|
51
|
+
# @yield [Array] row
|
52
|
+
# @yield [TransformContext] ctx
|
53
|
+
# @return [TransformContext]
|
54
|
+
def parse_content(ctx, file_contents, opts={}, &_block)
|
55
|
+
return nil unless block_given?
|
56
|
+
|
57
|
+
Rcsv.parse(file_contents, opts) do |row|
|
58
|
+
ctx.row_count += 1
|
59
|
+
yield row
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# @param [Hash] column_headers
|
64
|
+
# @param [Array] row
|
65
|
+
# @return [Array]
|
66
|
+
def transform(column_headers, row)
|
67
|
+
@candidates.each do |candidate|
|
68
|
+
column_idx = column_headers[candidate.column]
|
69
|
+
next unless column_idx
|
70
|
+
|
71
|
+
existing_value = row[column_idx]
|
72
|
+
next unless existing_value
|
73
|
+
|
74
|
+
new_value = candidate.perform(existing_value) # transform value
|
75
|
+
row[column_idx] = new_value
|
76
|
+
end
|
77
|
+
|
78
|
+
row
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
class TransformContext
|
83
|
+
include FileHelpers
|
84
|
+
|
85
|
+
attr_accessor :row_count,
|
86
|
+
:transformed_rows,
|
87
|
+
:column_headers
|
88
|
+
|
89
|
+
def initialize
|
90
|
+
@row_count = 0
|
91
|
+
@transformed_rows = []
|
92
|
+
@column_headers = {}
|
93
|
+
end
|
94
|
+
|
95
|
+
# @param [Array<String>] row
|
96
|
+
def extract_column_headers(row)
|
97
|
+
row.each_with_index { |column, idx| @column_headers[column] = idx }
|
98
|
+
end
|
99
|
+
|
100
|
+
def add_transformed_row(row)
|
101
|
+
@transformed_rows << row
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|