cure 0.1.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +13 -3
- data/.tool-versions +1 -0
- data/Dockerfile +1 -1
- data/Gemfile +1 -0
- data/Gemfile.lock +25 -6
- data/README.md +61 -93
- data/docs/README.md +33 -0
- data/docs/about.md +219 -0
- data/docs/builder/add.md +52 -0
- data/docs/builder/black_white_list.md +83 -0
- data/docs/builder/copy.md +48 -0
- data/docs/builder/explode.md +70 -0
- data/docs/builder/main.md +43 -0
- data/docs/builder/remove.md +46 -0
- data/docs/examples/examples.md +164 -0
- data/docs/export/main.md +37 -0
- data/docs/extract/main.md +89 -0
- data/docs/metadata/main.md +29 -0
- data/docs/query/main.md +45 -0
- data/docs/sources/main.md +36 -0
- data/docs/transform/main.md +53 -0
- data/docs/validate/main.md +42 -0
- data/exe/cure +12 -41
- data/exe/cure.old +59 -0
- data/lib/cure/builder/base_builder.rb +151 -0
- data/lib/cure/builder/candidate.rb +56 -0
- data/lib/cure/cli/command.rb +105 -0
- data/lib/cure/cli/generate_command.rb +54 -0
- data/lib/cure/cli/new_command.rb +52 -0
- data/lib/cure/cli/run_command.rb +19 -0
- data/lib/cure/cli/templates/README.md.erb +1 -0
- data/lib/cure/cli/templates/gemfile.erb +5 -0
- data/lib/cure/cli/templates/gitignore.erb +181 -0
- data/lib/cure/cli/templates/new_template.rb.erb +31 -0
- data/lib/cure/cli/templates/tool-versions.erb +1 -0
- data/lib/cure/config.rb +142 -18
- data/lib/cure/coordinator.rb +61 -25
- data/lib/cure/database.rb +191 -0
- data/lib/cure/dsl/builder.rb +26 -0
- data/lib/cure/dsl/exporters.rb +45 -0
- data/lib/cure/dsl/extraction.rb +60 -0
- data/lib/cure/dsl/metadata.rb +33 -0
- data/lib/cure/dsl/queries.rb +36 -0
- data/lib/cure/dsl/source_files.rb +36 -0
- data/lib/cure/dsl/template.rb +131 -0
- data/lib/cure/dsl/transformations.rb +95 -0
- data/lib/cure/dsl/validator.rb +22 -0
- data/lib/cure/export/base_processor.rb +194 -0
- data/lib/cure/export/manager.rb +24 -0
- data/lib/cure/extract/base_processor.rb +47 -0
- data/lib/cure/extract/csv_lookup.rb +14 -3
- data/lib/cure/extract/extractor.rb +41 -84
- data/lib/cure/extract/filter.rb +118 -0
- data/lib/cure/extract/named_range.rb +94 -0
- data/lib/cure/extract/named_range_processor.rb +128 -0
- data/lib/cure/extract/variable.rb +25 -0
- data/lib/cure/extract/variable_processor.rb +57 -0
- data/lib/cure/generator/base_generator.rb +14 -4
- data/lib/cure/generator/case_generator.rb +10 -3
- data/lib/cure/generator/character_generator.rb +9 -3
- data/lib/cure/generator/erb_generator.rb +21 -0
- data/lib/cure/generator/eval_generator.rb +34 -0
- data/lib/cure/generator/faker_generator.rb +7 -1
- data/lib/cure/generator/guid_generator.rb +7 -2
- data/lib/cure/generator/hex_generator.rb +6 -1
- data/lib/cure/generator/imports.rb +4 -0
- data/lib/cure/generator/number_generator.rb +6 -1
- data/lib/cure/generator/placeholder_generator.rb +7 -1
- data/lib/cure/generator/proc_generator.rb +21 -0
- data/lib/cure/generator/redact_generator.rb +9 -3
- data/lib/cure/generator/static_generator.rb +21 -0
- data/lib/cure/generator/variable_generator.rb +11 -5
- data/lib/cure/helpers/file_helpers.rb +12 -2
- data/lib/cure/helpers/object_helpers.rb +5 -17
- data/lib/cure/helpers/perf_helpers.rb +30 -0
- data/lib/cure/helpers/string.rb +54 -0
- data/lib/cure/launcher.rb +125 -0
- data/lib/cure/log.rb +7 -0
- data/lib/cure/planner.rb +136 -0
- data/lib/cure/strategy/append_strategy.rb +4 -0
- data/lib/cure/strategy/base_strategy.rb +19 -44
- data/lib/cure/strategy/contain_strategy.rb +51 -0
- data/lib/cure/strategy/end_with_strategy.rb +7 -1
- data/lib/cure/strategy/full_strategy.rb +4 -0
- data/lib/cure/strategy/history/history_cache.rb +82 -0
- data/lib/cure/strategy/imports.rb +2 -0
- data/lib/cure/strategy/match_strategy.rb +7 -2
- data/lib/cure/strategy/prepend_strategy.rb +28 -0
- data/lib/cure/strategy/regex_strategy.rb +7 -1
- data/lib/cure/strategy/split_strategy.rb +8 -3
- data/lib/cure/strategy/start_with_strategy.rb +7 -1
- data/lib/cure/transformation/candidate.rb +32 -35
- data/lib/cure/transformation/transform.rb +22 -56
- data/lib/cure/validator/base_rule.rb +78 -0
- data/lib/cure/validator/candidate.rb +54 -0
- data/lib/cure/validator/manager.rb +21 -0
- data/lib/cure/validators.rb +3 -3
- data/lib/cure/version.rb +1 -1
- data/lib/cure.rb +19 -11
- data/templates/dsl_example.rb +48 -0
- data/templates/empty_template.rb +31 -0
- metadata +132 -21
- data/lib/cure/export/exporter.rb +0 -74
- data/lib/cure/extract/builder.rb +0 -27
- data/lib/cure/main.rb +0 -72
- data/lib/cure/template/dispatch.rb +0 -30
- data/lib/cure/template/extraction.rb +0 -38
- data/lib/cure/template/template.rb +0 -28
- data/lib/cure/template/transformations.rb +0 -26
- data/templates/aws_cur_template.json +0 -145
- data/templates/example_template.json +0 -54
@@ -0,0 +1,194 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "csv"
|
4
|
+
require "cure/log"
|
5
|
+
require "cure/helpers/file_helpers"
|
6
|
+
|
7
|
+
module Cure
|
8
|
+
module Export
|
9
|
+
class BaseProcessor
|
10
|
+
include Log
|
11
|
+
|
12
|
+
attr_reader :named_range
|
13
|
+
|
14
|
+
def initialize(named_range, opts)
|
15
|
+
@named_range = named_range
|
16
|
+
@opts = opts
|
17
|
+
@limit_rows = opts.fetch(:limit_rows, nil)
|
18
|
+
|
19
|
+
@processed = 0
|
20
|
+
end
|
21
|
+
|
22
|
+
# @param [Hash]
|
23
|
+
def process_row(row)
|
24
|
+
process(row) unless @limit_rows && @limit_rows <= @processed
|
25
|
+
|
26
|
+
@processed += 1
|
27
|
+
end
|
28
|
+
|
29
|
+
# @param [Hash]
|
30
|
+
def process(_row)
|
31
|
+
raise NotImplementedError, "#{self.class} has not implemented method '#{__method__}'"
|
32
|
+
end
|
33
|
+
|
34
|
+
def setup
|
35
|
+
raise NotImplementedError, "#{self.class} has not implemented method '#{__method__}'"
|
36
|
+
end
|
37
|
+
|
38
|
+
def cleanup
|
39
|
+
raise NotImplementedError, "#{self.class} has not implemented method '#{__method__}'"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
require "terminal-table"
|
44
|
+
|
45
|
+
class TerminalProcessor < BaseProcessor
|
46
|
+
|
47
|
+
attr_reader :table, :limit_rows, :processed
|
48
|
+
|
49
|
+
def process(row)
|
50
|
+
@table.headings = row.keys if @processed.zero?
|
51
|
+
@table.add_row(row.values)
|
52
|
+
end
|
53
|
+
|
54
|
+
def setup
|
55
|
+
# Markdown mode
|
56
|
+
Terminal::Table::Style.defaults = {
|
57
|
+
border_top: false,
|
58
|
+
border_bottom: false,
|
59
|
+
border_x: "-",
|
60
|
+
border_y: "|",
|
61
|
+
border_i: "|"
|
62
|
+
}
|
63
|
+
|
64
|
+
log_info "Exporting [#{@named_range}] to terminal."
|
65
|
+
@table = Terminal::Table.new(title: @opts[:title] || "<No Title Set>")
|
66
|
+
end
|
67
|
+
|
68
|
+
def cleanup
|
69
|
+
puts @table
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
class CsvProcessor < BaseProcessor
|
74
|
+
include Helpers::FileHelpers
|
75
|
+
|
76
|
+
attr_reader :csv_file
|
77
|
+
|
78
|
+
def process(row)
|
79
|
+
@csv_file.write(row.keys.to_csv) if @processed.zero?
|
80
|
+
@csv_file.write(row.values.to_csv)
|
81
|
+
end
|
82
|
+
|
83
|
+
def setup
|
84
|
+
log_info "Exporting [#{@named_range}] to CSV..."
|
85
|
+
|
86
|
+
output_dir = @opts[:directory]
|
87
|
+
file_name = @opts[:file_name]
|
88
|
+
|
89
|
+
log_info("Exporting file to [#{output_dir}/#{file_name}]")
|
90
|
+
# file_name = "#{file_name}-#{Time.now.utc.strftime("%Y-%m-%dT%H:%M:%S%-z")}"
|
91
|
+
|
92
|
+
path = "#{output_dir}/#{file_name}"
|
93
|
+
|
94
|
+
# clean_dir(output_dir)
|
95
|
+
|
96
|
+
dir = File.dirname(path)
|
97
|
+
FileUtils.mkdir_p(dir) unless File.directory?(dir)
|
98
|
+
|
99
|
+
path = "#{path}.csv"
|
100
|
+
@csv_file = File.open(path, "w")
|
101
|
+
@processed = 0
|
102
|
+
end
|
103
|
+
|
104
|
+
def cleanup
|
105
|
+
ensure
|
106
|
+
log_info File.basename(@csv_file)
|
107
|
+
@csv_file.close
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
class ChunkCsvProcessor < BaseProcessor
|
112
|
+
include Helpers::FileHelpers
|
113
|
+
|
114
|
+
attr_reader :current_csv_file,
|
115
|
+
:file_name_prefix,
|
116
|
+
:directory,
|
117
|
+
:chunk_size,
|
118
|
+
:include_headers,
|
119
|
+
:row_count
|
120
|
+
|
121
|
+
def process(row)
|
122
|
+
chunked_file_handler do |csv_file|
|
123
|
+
if @processed.zero? || (@processed % @chunk_size).zero? || (@processed % @chunk_size).zero?
|
124
|
+
csv_file.write(row.keys.to_csv)
|
125
|
+
end
|
126
|
+
|
127
|
+
csv_file.write(row.values.to_csv)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
def setup
|
132
|
+
log_info "Exporting [#{@named_range}] to CSV..."
|
133
|
+
|
134
|
+
extract_opts
|
135
|
+
|
136
|
+
log_info("Exporting file to [#{@output_dir}/#{@file_name_prefix}]")
|
137
|
+
|
138
|
+
clean_dir(@output_dir)
|
139
|
+
|
140
|
+
dir = File.dirname("#{@output_dir}/#{@file_name_prefix}")
|
141
|
+
FileUtils.mkdir_p(dir) unless File.directory?(dir)
|
142
|
+
|
143
|
+
@processed = 0
|
144
|
+
@current_chunk = 0
|
145
|
+
end
|
146
|
+
|
147
|
+
def cleanup
|
148
|
+
ensure
|
149
|
+
@current_csv_file.close
|
150
|
+
end
|
151
|
+
|
152
|
+
def extract_opts
|
153
|
+
# TODO: Add offset? pick a slice?
|
154
|
+
@output_dir = @opts[:directory]
|
155
|
+
@file_name_prefix = @opts[:file_name_prefix]
|
156
|
+
@directory = @opts[:directory]
|
157
|
+
@chunk_size = @opts[:chunk_size]
|
158
|
+
@include_headers = @opts.fetch(:include_headers, true)
|
159
|
+
end
|
160
|
+
|
161
|
+
def chunked_file_handler(&block)
|
162
|
+
raise "No block" unless block
|
163
|
+
|
164
|
+
if @processed.zero? || (@processed % @chunk_size).zero?
|
165
|
+
@current_csv_file&.close
|
166
|
+
|
167
|
+
@current_chunk += 1
|
168
|
+
log_info "Writing file to #{current_file_path}"
|
169
|
+
@current_csv_file = File.open(current_file_path, "w")
|
170
|
+
end
|
171
|
+
|
172
|
+
yield @current_csv_file
|
173
|
+
end
|
174
|
+
|
175
|
+
def current_file_path
|
176
|
+
"#{@output_dir}/#{@current_chunk}-#{@file_name_prefix}.csv"
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
class YieldRowProcessor < BaseProcessor
|
181
|
+
attr_reader :proc
|
182
|
+
|
183
|
+
def process_row(row)
|
184
|
+
@proc.call(row)
|
185
|
+
end
|
186
|
+
|
187
|
+
def setup
|
188
|
+
@proc = @opts.fetch(:proc)
|
189
|
+
end
|
190
|
+
|
191
|
+
def cleanup; end
|
192
|
+
end
|
193
|
+
end
|
194
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Cure
|
4
|
+
module Export
|
5
|
+
class Manager
|
6
|
+
|
7
|
+
# @param [Array<Cure::Export::BaseProcessor>] candidates
|
8
|
+
attr_reader :processors
|
9
|
+
|
10
|
+
def initialize(named_range, processors)
|
11
|
+
@named_range = named_range
|
12
|
+
@processors = processors
|
13
|
+
end
|
14
|
+
|
15
|
+
def with_processors
|
16
|
+
@processors.each(&:setup)
|
17
|
+
|
18
|
+
yield @processors
|
19
|
+
|
20
|
+
@processors.each(&:cleanup)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "cure/log"
|
4
|
+
require "cure/config"
|
5
|
+
require "cure/extract/csv_lookup"
|
6
|
+
|
7
|
+
require "csv"
|
8
|
+
|
9
|
+
module Cure
|
10
|
+
module Extract
|
11
|
+
class BaseProcessor
|
12
|
+
|
13
|
+
# @return [Cure::DatabaseService]
|
14
|
+
attr_reader :database_service
|
15
|
+
|
16
|
+
# @param [Cure::DatabaseService] database_service
|
17
|
+
def initialize(database_service)
|
18
|
+
@database_service = database_service
|
19
|
+
end
|
20
|
+
|
21
|
+
protected
|
22
|
+
|
23
|
+
# @param [String] tbl_name
|
24
|
+
# @param [Array<Object>] columns
|
25
|
+
def create_table(tbl_name, columns)
|
26
|
+
candidate_column_names = []
|
27
|
+
columns.each_with_index do |col, idx|
|
28
|
+
candidate_column_names << (col || "col_#{idx}")
|
29
|
+
end
|
30
|
+
|
31
|
+
@database_service.create_table(tbl_name.to_sym, candidate_column_names)
|
32
|
+
end
|
33
|
+
|
34
|
+
# @param [String] tbl_name
|
35
|
+
# @param [Array<Object>] values
|
36
|
+
def insert_record(tbl_name, values)
|
37
|
+
@database_service.insert_row(tbl_name.to_sym, values)
|
38
|
+
end
|
39
|
+
|
40
|
+
# @param [String] tbl_name
|
41
|
+
# @param [Array<Array<Object>>] values
|
42
|
+
def insert_batched_rows(tbl_name, values)
|
43
|
+
@database_service.insert_batched_rows(tbl_name.to_sym, values)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -4,10 +4,12 @@ module Cure
|
|
4
4
|
module Extract
|
5
5
|
class CsvLookup
|
6
6
|
|
7
|
-
# @param [String] position - [Ex A1:B1, A1:B1,A2:B2]
|
7
|
+
# @param [String,Integer] position - [Ex A1:B1, A1:B1,A2:B2]
|
8
8
|
# @return [Array] [column_start_idx, column_end_idx, row_start_idx, row_end_idx]
|
9
9
|
def self.array_position_lookup(position)
|
10
|
-
|
10
|
+
# This is a better way, still trying to figure out a better way but -1 doesn't work for ranges.
|
11
|
+
# return [0, -1, 0, -1] if position.is_a?(Integer) && position == -1
|
12
|
+
return [0, 1_023, 0, 10_000_000] if position.is_a?(Integer) && position == -1 # Whole sheet
|
11
13
|
|
12
14
|
start, finish, *_excess = position.split(":")
|
13
15
|
raise "Invalid format" unless start || finish
|
@@ -20,10 +22,19 @@ module Cure
|
|
20
22
|
]
|
21
23
|
end
|
22
24
|
|
25
|
+
# @param [String] range
|
23
26
|
def self.position_for_letter(range)
|
24
|
-
|
27
|
+
result = 0
|
28
|
+
range.upcase.scan(/[A-Z]+/).first&.each_char do |n|
|
29
|
+
result *= 26
|
30
|
+
result += n.ord - 65 + 1
|
31
|
+
end
|
32
|
+
|
33
|
+
# Excel columns are not 0th indexed.
|
34
|
+
result - 1
|
25
35
|
end
|
26
36
|
|
37
|
+
# @param [String] range
|
27
38
|
def self.position_for_digit(range)
|
28
39
|
range.upcase.scan(/\d+/).first.to_i - 1
|
29
40
|
end
|
@@ -2,17 +2,26 @@
|
|
2
2
|
|
3
3
|
require "cure/log"
|
4
4
|
require "cure/config"
|
5
|
+
require "cure/database"
|
5
6
|
require "cure/extract/csv_lookup"
|
6
7
|
require "cure/helpers/file_helpers"
|
8
|
+
require "cure/helpers/perf_helpers"
|
9
|
+
require "cure/extract/named_range_processor"
|
10
|
+
require "cure/extract/variable_processor"
|
11
|
+
|
12
|
+
require "csv"
|
13
|
+
require "objspace"
|
7
14
|
|
8
15
|
module Cure
|
9
16
|
module Extract
|
10
17
|
class Extractor
|
11
18
|
include Log
|
19
|
+
include Database
|
12
20
|
include Configuration
|
13
21
|
include Helpers::FileHelpers
|
22
|
+
include Helpers::PerfHelpers
|
14
23
|
|
15
|
-
# @
|
24
|
+
# @return [Hash] opts
|
16
25
|
attr_reader :opts
|
17
26
|
|
18
27
|
# @param [Hash] opts
|
@@ -20,103 +29,51 @@ module Cure
|
|
20
29
|
@opts = opts
|
21
30
|
end
|
22
31
|
|
23
|
-
# @param [String]
|
24
|
-
# @
|
25
|
-
def
|
26
|
-
|
27
|
-
|
28
|
-
end
|
29
|
-
|
30
|
-
# @param [String] file_contents
|
31
|
-
# @return [WrappedCSV]
|
32
|
-
def extract_from_contents(file_contents)
|
33
|
-
parsed_content = parse_csv(file_contents, header: :none)
|
34
|
-
log_info("Parsed CSV into #{parsed_content.content.length} sections.")
|
35
|
-
parsed_content
|
36
|
-
end
|
37
|
-
|
38
|
-
# private
|
32
|
+
# @param [Pathname,String] file - location of file
|
33
|
+
# @param [String] ref_name - name of reference file
|
34
|
+
def parse_csv(file, ref_name:)
|
35
|
+
nr_processor = named_range_processor(ref_name: ref_name)
|
36
|
+
v_processor = variable_processor(ref_name: ref_name)
|
39
37
|
|
40
|
-
|
41
|
-
|
42
|
-
# @return [WrappedCSV]
|
43
|
-
def parse_csv(file_contents, opts={})
|
44
|
-
csv_rows = []
|
38
|
+
sample_rows = config.template.extraction.sample_rows
|
39
|
+
row_count = 0
|
45
40
|
|
46
|
-
|
41
|
+
database_service.with_transaction do
|
42
|
+
CSV.foreach(file, liberal_parsing: true) do |row|
|
43
|
+
next if sample_rows && row_count >= sample_rows
|
47
44
|
|
48
|
-
|
49
|
-
|
50
|
-
|
45
|
+
nr_processor.process_row(row_count, row)
|
46
|
+
v_processor.process_row(row_count, row)
|
47
|
+
row_count += 1
|
51
48
|
|
52
|
-
|
53
|
-
|
49
|
+
log_info "#{row_count} rows processed [#{Time.now}]" if (row_count % 1_000).zero?
|
50
|
+
end
|
54
51
|
|
55
|
-
|
56
|
-
# @return [Array<Hash>]
|
57
|
-
def extract_named_ranges(csv_rows)
|
58
|
-
# Use only the NR's that are defined from the candidates list
|
59
|
-
candidates = config.template.transformations.candidates
|
60
|
-
candidate_nrs = config.template.extraction.required_named_ranges(candidates.map(&:named_range).uniq)
|
61
|
-
candidate_nrs.map do |nr|
|
62
|
-
{
|
63
|
-
"rows" => extract_from_rows(csv_rows, nr["section"]),
|
64
|
-
"name" => nr["name"]
|
65
|
-
}
|
52
|
+
nr_processor.after_process
|
66
53
|
end
|
67
|
-
end
|
68
54
|
|
69
|
-
|
70
|
-
# @return [Hash]
|
71
|
-
def extract_variables(csv_rows)
|
72
|
-
config.template.extraction.variables.each_with_object({}) do |variable, hash|
|
73
|
-
hash[variable["name"]] = lookup_location(csv_rows, variable["location"])
|
74
|
-
end
|
55
|
+
log_info "[#{row_count}] total rows parsed from CSV"
|
75
56
|
end
|
76
57
|
|
77
|
-
|
78
|
-
def extract_from_rows(rows, named_range)
|
79
|
-
psx = CsvLookup.array_position_lookup(named_range)
|
58
|
+
private
|
80
59
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
ret_val << row[psx[0]..psx[1]] if psx[3] == -1 || (idx >= psx[2] && idx <= psx[3])
|
86
|
-
end
|
87
|
-
|
88
|
-
ret_val
|
89
|
-
end
|
90
|
-
|
91
|
-
# @param [Array<Array>] rows
|
92
|
-
# @param [String] variable_location
|
93
|
-
def lookup_location(rows, variable_location)
|
94
|
-
psx = [CsvLookup.position_for_letter(variable_location),
|
95
|
-
CsvLookup.position_for_digit(variable_location)]
|
96
|
-
rows[psx[1]][psx[0]]
|
97
|
-
end
|
60
|
+
# @param [String] ref_name - name of reference file
|
61
|
+
# @return [Cure::Extract::NamedRangeProcessor]
|
62
|
+
def named_range_processor(ref_name:)
|
63
|
+
candidate_nrs = config.template.extraction.required_named_ranges(ref_name: ref_name)
|
98
64
|
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
# @return [Array, nil]
|
103
|
-
def handle_row(row_idx, row, psx)
|
104
|
-
return nil unless psx[3] == -1 || (row_idx >= psx[2] && row_idx <= psx[3])
|
65
|
+
if candidate_nrs.empty?
|
66
|
+
candidate_nrs = [NamedRange.default_named_range(name: ref_name)]
|
67
|
+
end
|
105
68
|
|
106
|
-
|
69
|
+
NamedRangeProcessor.new(database_service, candidate_nrs)
|
107
70
|
end
|
108
|
-
end
|
109
|
-
|
110
|
-
class WrappedCSV
|
111
|
-
# @return [Array<Hash>]
|
112
|
-
attr_accessor :content
|
113
|
-
|
114
|
-
# @return [Hash]
|
115
|
-
attr_accessor :variables
|
116
71
|
|
117
|
-
|
118
|
-
|
119
|
-
|
72
|
+
# @param [String] ref_name - name of reference file
|
73
|
+
# @return [Cure::Extract::VariableProcessor]
|
74
|
+
def variable_processor(ref_name:)
|
75
|
+
variables = config.template.extraction.required_variables(ref_name: ref_name)
|
76
|
+
VariableProcessor.new(database_service, variables || [])
|
120
77
|
end
|
121
78
|
end
|
122
79
|
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Cure
|
4
|
+
module Extract
|
5
|
+
class Filter
|
6
|
+
|
7
|
+
# @return [Filter::RowHandler] row_handler
|
8
|
+
attr_reader :row_handler
|
9
|
+
|
10
|
+
# @return [Filter::ColumnHandler] col_handler
|
11
|
+
attr_reader :col_handler
|
12
|
+
|
13
|
+
def initialize
|
14
|
+
@row_handler = RowHandler.new
|
15
|
+
@col_handler = ColumnHandler.new
|
16
|
+
end
|
17
|
+
|
18
|
+
def columns(&block)
|
19
|
+
return unless block
|
20
|
+
|
21
|
+
@col_handler.instance_eval(&block)
|
22
|
+
end
|
23
|
+
|
24
|
+
def rows(&block)
|
25
|
+
return unless block
|
26
|
+
|
27
|
+
@row_handler.instance_eval(&block)
|
28
|
+
end
|
29
|
+
|
30
|
+
class ColumnHandler
|
31
|
+
|
32
|
+
attr_reader :definitions, :source_col_positions
|
33
|
+
|
34
|
+
def initialize
|
35
|
+
@definitions = []
|
36
|
+
@source_col_positions = nil
|
37
|
+
end
|
38
|
+
|
39
|
+
# @param [String] source
|
40
|
+
# @param [String] as
|
41
|
+
def with(source:, as: nil)
|
42
|
+
@definitions << {
|
43
|
+
source: source,
|
44
|
+
as: as || source
|
45
|
+
}
|
46
|
+
|
47
|
+
self
|
48
|
+
end
|
49
|
+
|
50
|
+
# @param [Array<String>] columns_arr
|
51
|
+
def set_col_positions(columns_arr)
|
52
|
+
@source_col_positions = @definitions.each_with_object({}) do |d, hash|
|
53
|
+
hash[columns_arr.index(d[:source])] = d
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
# @param [Array<String>] columns_arr
|
58
|
+
def translate_headers(columns_arr)
|
59
|
+
return columns_arr unless has_content?
|
60
|
+
|
61
|
+
@source_col_positions.map do |position, val|
|
62
|
+
if position.nil?
|
63
|
+
raise "Cannot find header position for #{val[:source]}. Please check it exists."
|
64
|
+
end
|
65
|
+
|
66
|
+
columns_arr[position] = val[:as]
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
# @param [Array<String>] columns_arr
|
71
|
+
def filter_row(columns_arr)
|
72
|
+
return columns_arr unless has_content?
|
73
|
+
|
74
|
+
@source_col_positions.keys.map {|k| columns_arr[k] }
|
75
|
+
end
|
76
|
+
|
77
|
+
# @return [TrueClass, FalseClass]
|
78
|
+
def has_content?
|
79
|
+
@definitions.any?
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
class RowHandler
|
84
|
+
|
85
|
+
attr_accessor :start_proc, :finish_proc, :including_proc
|
86
|
+
|
87
|
+
# @param [String] where
|
88
|
+
# @param [Hash] options
|
89
|
+
def start(where:, options: {})
|
90
|
+
@start_proc = {where:, options:}
|
91
|
+
|
92
|
+
self
|
93
|
+
end
|
94
|
+
|
95
|
+
# @param [String] where
|
96
|
+
# @param [Hash] options
|
97
|
+
def finish(where:, options: {})
|
98
|
+
@finish_proc = {where:, options:}
|
99
|
+
|
100
|
+
self
|
101
|
+
end
|
102
|
+
|
103
|
+
# @param [String] where
|
104
|
+
# @param [Hash] options
|
105
|
+
def including(where:, options: {})
|
106
|
+
@including_proc = {where:, options:}
|
107
|
+
|
108
|
+
self
|
109
|
+
end
|
110
|
+
|
111
|
+
# @return [TrueClass, FalseClass]
|
112
|
+
def has_content?
|
113
|
+
!!(@start_proc || @finish_proc || @including_proc)
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "cure/extract/filter"
|
4
|
+
|
5
|
+
module Cure
|
6
|
+
module Extract
|
7
|
+
class NamedRange
|
8
|
+
|
9
|
+
def self.default_named_range(name: nil)
|
10
|
+
name ||= "_default"
|
11
|
+
|
12
|
+
new(name, -1)
|
13
|
+
end
|
14
|
+
|
15
|
+
attr_accessor :filter, :row_count
|
16
|
+
attr_reader :name, :section, :headers, :ref_name, :placeholder
|
17
|
+
|
18
|
+
# This is complex purely to support headers not being the 0th row.
|
19
|
+
# A template can specify that the headers row be completely disconnected
|
20
|
+
# from the content, thus we have three bounds:
|
21
|
+
# - Content bounds
|
22
|
+
# - Header bounds
|
23
|
+
# - Sheet bounds (headers AND content)
|
24
|
+
|
25
|
+
# @param [String] ref_name - file reference (for multiple files)
|
26
|
+
def initialize(name, section, headers: nil, ref_name: nil, placeholder: false)
|
27
|
+
@name = name
|
28
|
+
@filter = Filter.new
|
29
|
+
@section = Extract::CsvLookup.array_position_lookup(section)
|
30
|
+
@headers = calculate_headers(headers)
|
31
|
+
@row_count = 0
|
32
|
+
@placeholder = placeholder
|
33
|
+
@ref_name = ref_name || "_default"
|
34
|
+
end
|
35
|
+
|
36
|
+
# @param [Integer] row_idx
|
37
|
+
# @return [TrueClass, FalseClass]
|
38
|
+
def row_in_bounds?(row_idx)
|
39
|
+
row_bounds_range.cover?(row_idx)
|
40
|
+
end
|
41
|
+
|
42
|
+
# @param [Integer] row_idx
|
43
|
+
# @return [TrueClass, FalseClass]
|
44
|
+
def header_in_bounds?(row_idx)
|
45
|
+
header_bounds_range.cover?(row_idx)
|
46
|
+
end
|
47
|
+
|
48
|
+
# @param [Integer] row_idx
|
49
|
+
# @return [TrueClass, FalseClass]
|
50
|
+
def content_in_bounds?(row_idx)
|
51
|
+
content_bounds_range.cover?(row_idx)
|
52
|
+
end
|
53
|
+
|
54
|
+
# @return [Range]
|
55
|
+
def row_bounds_range
|
56
|
+
@row_bounds_range ||= (row_bounds&.first..row_bounds&.last)
|
57
|
+
end
|
58
|
+
|
59
|
+
def row_bounds
|
60
|
+
@row_bounds ||= content_bounds.concat(header_bounds).uniq.minmax
|
61
|
+
end
|
62
|
+
|
63
|
+
# @return [Range]
|
64
|
+
def content_bounds_range
|
65
|
+
@content_bounds_range ||= (content_bounds[0]..content_bounds[1])
|
66
|
+
end
|
67
|
+
|
68
|
+
def content_bounds
|
69
|
+
@content_bounds ||= @section[2..3]
|
70
|
+
end
|
71
|
+
|
72
|
+
# @return [Range]
|
73
|
+
def header_bounds_range
|
74
|
+
@header_bounds_range ||= (header_bounds&.first..header_bounds&.last)
|
75
|
+
end
|
76
|
+
|
77
|
+
def header_bounds
|
78
|
+
@header_bounds ||= @headers[2..3]
|
79
|
+
end
|
80
|
+
|
81
|
+
def active_row_count(row_idx)
|
82
|
+
row_idx - @row_count
|
83
|
+
end
|
84
|
+
|
85
|
+
private
|
86
|
+
|
87
|
+
def calculate_headers(headers)
|
88
|
+
return Extract::CsvLookup.array_position_lookup(headers) if headers
|
89
|
+
|
90
|
+
[@section[0], @section[1], @section[2], @section[2]]
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|