cure 0.1.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +16 -3
- data/.tool-versions +1 -0
- data/Dockerfile +1 -1
- data/Gemfile +1 -0
- data/Gemfile.lock +25 -6
- data/README.md +59 -81
- data/docs/README.md +33 -0
- data/docs/about.md +219 -0
- data/docs/builder/add.md +52 -0
- data/docs/builder/black_white_list.md +83 -0
- data/docs/builder/copy.md +48 -0
- data/docs/builder/explode.md +70 -0
- data/docs/builder/main.md +43 -0
- data/docs/builder/remove.md +46 -0
- data/docs/examples/examples.md +164 -0
- data/docs/export/main.md +37 -0
- data/docs/extract/main.md +89 -0
- data/docs/metadata/main.md +29 -0
- data/docs/query/main.md +45 -0
- data/docs/sources/main.md +36 -0
- data/docs/transform/main.md +53 -0
- data/docs/validate/main.md +42 -0
- data/exe/cure +12 -37
- data/exe/cure.old +59 -0
- data/lib/cure/builder/base_builder.rb +151 -0
- data/lib/cure/builder/candidate.rb +56 -0
- data/lib/cure/cli/command.rb +105 -0
- data/lib/cure/cli/generate_command.rb +54 -0
- data/lib/cure/cli/new_command.rb +52 -0
- data/lib/cure/cli/run_command.rb +19 -0
- data/lib/cure/cli/templates/README.md.erb +1 -0
- data/lib/cure/cli/templates/gemfile.erb +5 -0
- data/lib/cure/cli/templates/gitignore.erb +181 -0
- data/lib/cure/cli/templates/new_template.rb.erb +31 -0
- data/lib/cure/cli/templates/tool-versions.erb +1 -0
- data/lib/cure/config.rb +151 -13
- data/lib/cure/coordinator.rb +108 -0
- data/lib/cure/database.rb +191 -0
- data/lib/cure/dsl/builder.rb +26 -0
- data/lib/cure/dsl/exporters.rb +45 -0
- data/lib/cure/dsl/extraction.rb +60 -0
- data/lib/cure/dsl/metadata.rb +33 -0
- data/lib/cure/dsl/queries.rb +36 -0
- data/lib/cure/dsl/source_files.rb +36 -0
- data/lib/cure/dsl/template.rb +131 -0
- data/lib/cure/dsl/transformations.rb +95 -0
- data/lib/cure/dsl/validator.rb +22 -0
- data/lib/cure/export/base_processor.rb +194 -0
- data/lib/cure/export/manager.rb +24 -0
- data/lib/cure/extract/base_processor.rb +47 -0
- data/lib/cure/extract/csv_lookup.rb +43 -0
- data/lib/cure/extract/extractor.rb +80 -0
- data/lib/cure/extract/filter.rb +118 -0
- data/lib/cure/extract/named_range.rb +94 -0
- data/lib/cure/extract/named_range_processor.rb +128 -0
- data/lib/cure/extract/variable.rb +25 -0
- data/lib/cure/extract/variable_processor.rb +57 -0
- data/lib/cure/generator/base_generator.rb +61 -0
- data/lib/cure/generator/case_generator.rb +32 -0
- data/lib/cure/generator/character_generator.rb +41 -0
- data/lib/cure/generator/erb_generator.rb +21 -0
- data/lib/cure/generator/eval_generator.rb +34 -0
- data/lib/cure/generator/faker_generator.rb +31 -0
- data/lib/cure/generator/guid_generator.rb +21 -0
- data/lib/cure/generator/hex_generator.rb +21 -0
- data/lib/cure/generator/imports.rb +16 -0
- data/lib/cure/generator/number_generator.rb +21 -0
- data/lib/cure/generator/placeholder_generator.rb +26 -0
- data/lib/cure/generator/proc_generator.rb +21 -0
- data/lib/cure/generator/redact_generator.rb +22 -0
- data/lib/cure/generator/static_generator.rb +21 -0
- data/lib/cure/generator/variable_generator.rb +26 -0
- data/lib/cure/helpers/file_helpers.rb +50 -0
- data/lib/cure/helpers/object_helpers.rb +17 -0
- data/lib/cure/helpers/perf_helpers.rb +30 -0
- data/lib/cure/helpers/string.rb +54 -0
- data/lib/cure/launcher.rb +125 -0
- data/lib/cure/log.rb +10 -3
- data/lib/cure/planner.rb +136 -0
- data/lib/cure/strategy/append_strategy.rb +28 -0
- data/lib/cure/strategy/base_strategy.rb +98 -0
- data/lib/cure/strategy/contain_strategy.rb +51 -0
- data/lib/cure/strategy/end_with_strategy.rb +52 -0
- data/lib/cure/strategy/full_strategy.rb +28 -0
- data/lib/cure/strategy/history/history_cache.rb +82 -0
- data/lib/cure/strategy/imports.rb +12 -0
- data/lib/cure/strategy/match_strategy.rb +48 -0
- data/lib/cure/strategy/prepend_strategy.rb +28 -0
- data/lib/cure/strategy/regex_strategy.rb +55 -0
- data/lib/cure/strategy/split_strategy.rb +58 -0
- data/lib/cure/strategy/start_with_strategy.rb +53 -0
- data/lib/cure/transformation/candidate.rb +47 -36
- data/lib/cure/transformation/transform.rb +29 -71
- data/lib/cure/validator/base_rule.rb +78 -0
- data/lib/cure/validator/candidate.rb +54 -0
- data/lib/cure/validator/manager.rb +21 -0
- data/lib/cure/validators.rb +71 -0
- data/lib/cure/version.rb +1 -1
- data/lib/cure.rb +19 -6
- data/templates/dsl_example.rb +48 -0
- data/templates/empty_template.rb +31 -0
- metadata +161 -23
- data/lib/cure/csv_helpers.rb +0 -6
- data/lib/cure/export/exporter.rb +0 -49
- data/lib/cure/file_helpers.rb +0 -38
- data/lib/cure/generator/base.rb +0 -148
- data/lib/cure/main.rb +0 -63
- data/lib/cure/object_helpers.rb +0 -27
- data/lib/cure/strategy/base.rb +0 -223
- data/templates/aws_cur_template.json +0 -143
- data/templates/example_template.json +0 -38
@@ -0,0 +1,47 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "cure/log"
|
4
|
+
require "cure/config"
|
5
|
+
require "cure/extract/csv_lookup"
|
6
|
+
|
7
|
+
require "csv"
|
8
|
+
|
9
|
+
module Cure
|
10
|
+
module Extract
|
11
|
+
class BaseProcessor
|
12
|
+
|
13
|
+
# @return [Cure::DatabaseService]
|
14
|
+
attr_reader :database_service
|
15
|
+
|
16
|
+
# @param [Cure::DatabaseService] database_service
|
17
|
+
def initialize(database_service)
|
18
|
+
@database_service = database_service
|
19
|
+
end
|
20
|
+
|
21
|
+
protected
|
22
|
+
|
23
|
+
# @param [String] tbl_name
|
24
|
+
# @param [Array<Object>] columns
|
25
|
+
def create_table(tbl_name, columns)
|
26
|
+
candidate_column_names = []
|
27
|
+
columns.each_with_index do |col, idx|
|
28
|
+
candidate_column_names << (col || "col_#{idx}")
|
29
|
+
end
|
30
|
+
|
31
|
+
@database_service.create_table(tbl_name.to_sym, candidate_column_names)
|
32
|
+
end
|
33
|
+
|
34
|
+
# @param [String] tbl_name
|
35
|
+
# @param [Array<Object>] values
|
36
|
+
def insert_record(tbl_name, values)
|
37
|
+
@database_service.insert_row(tbl_name.to_sym, values)
|
38
|
+
end
|
39
|
+
|
40
|
+
# @param [String] tbl_name
|
41
|
+
# @param [Array<Array<Object>>] values
|
42
|
+
def insert_batched_rows(tbl_name, values)
|
43
|
+
@database_service.insert_batched_rows(tbl_name.to_sym, values)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Cure
|
4
|
+
module Extract
|
5
|
+
class CsvLookup
|
6
|
+
|
7
|
+
# @param [String,Integer] position - [Ex A1:B1, A1:B1,A2:B2]
|
8
|
+
# @return [Array] [column_start_idx, column_end_idx, row_start_idx, row_end_idx]
|
9
|
+
def self.array_position_lookup(position)
|
10
|
+
# This is a better way, still trying to figure out a better way but -1 doesn't work for ranges.
|
11
|
+
# return [0, -1, 0, -1] if position.is_a?(Integer) && position == -1
|
12
|
+
return [0, 1_023, 0, 10_000_000] if position.is_a?(Integer) && position == -1 # Whole sheet
|
13
|
+
|
14
|
+
start, finish, *_excess = position.split(":")
|
15
|
+
raise "Invalid format" unless start || finish
|
16
|
+
|
17
|
+
[
|
18
|
+
position_for_letter(start),
|
19
|
+
position_for_letter(finish),
|
20
|
+
position_for_digit(start),
|
21
|
+
position_for_digit(finish)
|
22
|
+
]
|
23
|
+
end
|
24
|
+
|
25
|
+
# @param [String] range
|
26
|
+
def self.position_for_letter(range)
|
27
|
+
result = 0
|
28
|
+
range.upcase.scan(/[A-Z]+/).first&.each_char do |n|
|
29
|
+
result *= 26
|
30
|
+
result += n.ord - 65 + 1
|
31
|
+
end
|
32
|
+
|
33
|
+
# Excel columns are not 0th indexed.
|
34
|
+
result - 1
|
35
|
+
end
|
36
|
+
|
37
|
+
# @param [String] range
|
38
|
+
def self.position_for_digit(range)
|
39
|
+
range.upcase.scan(/\d+/).first.to_i - 1
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "cure/log"
|
4
|
+
require "cure/config"
|
5
|
+
require "cure/database"
|
6
|
+
require "cure/extract/csv_lookup"
|
7
|
+
require "cure/helpers/file_helpers"
|
8
|
+
require "cure/helpers/perf_helpers"
|
9
|
+
require "cure/extract/named_range_processor"
|
10
|
+
require "cure/extract/variable_processor"
|
11
|
+
|
12
|
+
require "csv"
|
13
|
+
require "objspace"
|
14
|
+
|
15
|
+
module Cure
|
16
|
+
module Extract
|
17
|
+
class Extractor
|
18
|
+
include Log
|
19
|
+
include Database
|
20
|
+
include Configuration
|
21
|
+
include Helpers::FileHelpers
|
22
|
+
include Helpers::PerfHelpers
|
23
|
+
|
24
|
+
# @return [Hash] opts
|
25
|
+
attr_reader :opts
|
26
|
+
|
27
|
+
# @param [Hash] opts
|
28
|
+
def initialize(opts)
|
29
|
+
@opts = opts
|
30
|
+
end
|
31
|
+
|
32
|
+
# @param [Pathname,String] file - location of file
|
33
|
+
# @param [String] ref_name - name of reference file
|
34
|
+
def parse_csv(file, ref_name:)
|
35
|
+
nr_processor = named_range_processor(ref_name: ref_name)
|
36
|
+
v_processor = variable_processor(ref_name: ref_name)
|
37
|
+
|
38
|
+
sample_rows = config.template.extraction.sample_rows
|
39
|
+
row_count = 0
|
40
|
+
|
41
|
+
database_service.with_transaction do
|
42
|
+
CSV.foreach(file, liberal_parsing: true) do |row|
|
43
|
+
next if sample_rows && row_count >= sample_rows
|
44
|
+
|
45
|
+
nr_processor.process_row(row_count, row)
|
46
|
+
v_processor.process_row(row_count, row)
|
47
|
+
row_count += 1
|
48
|
+
|
49
|
+
log_info "#{row_count} rows processed [#{Time.now}]" if (row_count % 1_000).zero?
|
50
|
+
end
|
51
|
+
|
52
|
+
nr_processor.after_process
|
53
|
+
end
|
54
|
+
|
55
|
+
log_info "[#{row_count}] total rows parsed from CSV"
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
|
60
|
+
# @param [String] ref_name - name of reference file
|
61
|
+
# @return [Cure::Extract::NamedRangeProcessor]
|
62
|
+
def named_range_processor(ref_name:)
|
63
|
+
candidate_nrs = config.template.extraction.required_named_ranges(ref_name: ref_name)
|
64
|
+
|
65
|
+
if candidate_nrs.empty?
|
66
|
+
candidate_nrs = [NamedRange.default_named_range(name: ref_name)]
|
67
|
+
end
|
68
|
+
|
69
|
+
NamedRangeProcessor.new(database_service, candidate_nrs)
|
70
|
+
end
|
71
|
+
|
72
|
+
# @param [String] ref_name - name of reference file
|
73
|
+
# @return [Cure::Extract::VariableProcessor]
|
74
|
+
def variable_processor(ref_name:)
|
75
|
+
variables = config.template.extraction.required_variables(ref_name: ref_name)
|
76
|
+
VariableProcessor.new(database_service, variables || [])
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Cure
|
4
|
+
module Extract
|
5
|
+
class Filter
|
6
|
+
|
7
|
+
# @return [Filter::RowHandler] row_handler
|
8
|
+
attr_reader :row_handler
|
9
|
+
|
10
|
+
# @return [Filter::ColumnHandler] col_handler
|
11
|
+
attr_reader :col_handler
|
12
|
+
|
13
|
+
def initialize
|
14
|
+
@row_handler = RowHandler.new
|
15
|
+
@col_handler = ColumnHandler.new
|
16
|
+
end
|
17
|
+
|
18
|
+
def columns(&block)
|
19
|
+
return unless block
|
20
|
+
|
21
|
+
@col_handler.instance_eval(&block)
|
22
|
+
end
|
23
|
+
|
24
|
+
def rows(&block)
|
25
|
+
return unless block
|
26
|
+
|
27
|
+
@row_handler.instance_eval(&block)
|
28
|
+
end
|
29
|
+
|
30
|
+
class ColumnHandler
|
31
|
+
|
32
|
+
attr_reader :definitions, :source_col_positions
|
33
|
+
|
34
|
+
def initialize
|
35
|
+
@definitions = []
|
36
|
+
@source_col_positions = nil
|
37
|
+
end
|
38
|
+
|
39
|
+
# @param [String] source
|
40
|
+
# @param [String] as
|
41
|
+
def with(source:, as: nil)
|
42
|
+
@definitions << {
|
43
|
+
source: source,
|
44
|
+
as: as || source
|
45
|
+
}
|
46
|
+
|
47
|
+
self
|
48
|
+
end
|
49
|
+
|
50
|
+
# @param [Array<String>] columns_arr
|
51
|
+
def set_col_positions(columns_arr)
|
52
|
+
@source_col_positions = @definitions.each_with_object({}) do |d, hash|
|
53
|
+
hash[columns_arr.index(d[:source])] = d
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
# @param [Array<String>] columns_arr
|
58
|
+
def translate_headers(columns_arr)
|
59
|
+
return columns_arr unless has_content?
|
60
|
+
|
61
|
+
@source_col_positions.map do |position, val|
|
62
|
+
if position.nil?
|
63
|
+
raise "Cannot find header position for #{val[:source]}. Please check it exists."
|
64
|
+
end
|
65
|
+
|
66
|
+
columns_arr[position] = val[:as]
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
# @param [Array<String>] columns_arr
|
71
|
+
def filter_row(columns_arr)
|
72
|
+
return columns_arr unless has_content?
|
73
|
+
|
74
|
+
@source_col_positions.keys.map {|k| columns_arr[k] }
|
75
|
+
end
|
76
|
+
|
77
|
+
# @return [TrueClass, FalseClass]
|
78
|
+
def has_content?
|
79
|
+
@definitions.any?
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
class RowHandler
|
84
|
+
|
85
|
+
attr_accessor :start_proc, :finish_proc, :including_proc
|
86
|
+
|
87
|
+
# @param [String] where
|
88
|
+
# @param [Hash] options
|
89
|
+
def start(where:, options: {})
|
90
|
+
@start_proc = {where:, options:}
|
91
|
+
|
92
|
+
self
|
93
|
+
end
|
94
|
+
|
95
|
+
# @param [String] where
|
96
|
+
# @param [Hash] options
|
97
|
+
def finish(where:, options: {})
|
98
|
+
@finish_proc = {where:, options:}
|
99
|
+
|
100
|
+
self
|
101
|
+
end
|
102
|
+
|
103
|
+
# @param [String] where
|
104
|
+
# @param [Hash] options
|
105
|
+
def including(where:, options: {})
|
106
|
+
@including_proc = {where:, options:}
|
107
|
+
|
108
|
+
self
|
109
|
+
end
|
110
|
+
|
111
|
+
# @return [TrueClass, FalseClass]
|
112
|
+
def has_content?
|
113
|
+
!!(@start_proc || @finish_proc || @including_proc)
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "cure/extract/filter"
|
4
|
+
|
5
|
+
module Cure
|
6
|
+
module Extract
|
7
|
+
class NamedRange
|
8
|
+
|
9
|
+
def self.default_named_range(name: nil)
|
10
|
+
name ||= "_default"
|
11
|
+
|
12
|
+
new(name, -1)
|
13
|
+
end
|
14
|
+
|
15
|
+
attr_accessor :filter, :row_count
|
16
|
+
attr_reader :name, :section, :headers, :ref_name, :placeholder
|
17
|
+
|
18
|
+
# This is complex purely to support headers not being the 0th row.
|
19
|
+
# A template can specify that the headers row be completely disconnected
|
20
|
+
# from the content, thus we have three bounds:
|
21
|
+
# - Content bounds
|
22
|
+
# - Header bounds
|
23
|
+
# - Sheet bounds (headers AND content)
|
24
|
+
|
25
|
+
# @param [String] ref_name - file reference (for multiple files)
|
26
|
+
def initialize(name, section, headers: nil, ref_name: nil, placeholder: false)
|
27
|
+
@name = name
|
28
|
+
@filter = Filter.new
|
29
|
+
@section = Extract::CsvLookup.array_position_lookup(section)
|
30
|
+
@headers = calculate_headers(headers)
|
31
|
+
@row_count = 0
|
32
|
+
@placeholder = placeholder
|
33
|
+
@ref_name = ref_name || "_default"
|
34
|
+
end
|
35
|
+
|
36
|
+
# @param [Integer] row_idx
|
37
|
+
# @return [TrueClass, FalseClass]
|
38
|
+
def row_in_bounds?(row_idx)
|
39
|
+
row_bounds_range.cover?(row_idx)
|
40
|
+
end
|
41
|
+
|
42
|
+
# @param [Integer] row_idx
|
43
|
+
# @return [TrueClass, FalseClass]
|
44
|
+
def header_in_bounds?(row_idx)
|
45
|
+
header_bounds_range.cover?(row_idx)
|
46
|
+
end
|
47
|
+
|
48
|
+
# @param [Integer] row_idx
|
49
|
+
# @return [TrueClass, FalseClass]
|
50
|
+
def content_in_bounds?(row_idx)
|
51
|
+
content_bounds_range.cover?(row_idx)
|
52
|
+
end
|
53
|
+
|
54
|
+
# @return [Range]
|
55
|
+
def row_bounds_range
|
56
|
+
@row_bounds_range ||= (row_bounds&.first..row_bounds&.last)
|
57
|
+
end
|
58
|
+
|
59
|
+
def row_bounds
|
60
|
+
@row_bounds ||= content_bounds.concat(header_bounds).uniq.minmax
|
61
|
+
end
|
62
|
+
|
63
|
+
# @return [Range]
|
64
|
+
def content_bounds_range
|
65
|
+
@content_bounds_range ||= (content_bounds[0]..content_bounds[1])
|
66
|
+
end
|
67
|
+
|
68
|
+
def content_bounds
|
69
|
+
@content_bounds ||= @section[2..3]
|
70
|
+
end
|
71
|
+
|
72
|
+
# @return [Range]
|
73
|
+
def header_bounds_range
|
74
|
+
@header_bounds_range ||= (header_bounds&.first..header_bounds&.last)
|
75
|
+
end
|
76
|
+
|
77
|
+
def header_bounds
|
78
|
+
@header_bounds ||= @headers[2..3]
|
79
|
+
end
|
80
|
+
|
81
|
+
def active_row_count(row_idx)
|
82
|
+
row_idx - @row_count
|
83
|
+
end
|
84
|
+
|
85
|
+
private
|
86
|
+
|
87
|
+
def calculate_headers(headers)
|
88
|
+
return Extract::CsvLookup.array_position_lookup(headers) if headers
|
89
|
+
|
90
|
+
[@section[0], @section[1], @section[2], @section[2]]
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,128 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "cure/log"
|
4
|
+
require "cure/config"
|
5
|
+
require "cure/extract/csv_lookup"
|
6
|
+
require "cure/extract/base_processor"
|
7
|
+
|
8
|
+
require "csv"
|
9
|
+
|
10
|
+
module Cure
|
11
|
+
module Extract
|
12
|
+
class NamedRangeProcessor < BaseProcessor
|
13
|
+
|
14
|
+
# @return [Array<Extraction::NamedRange>] named_ranges
|
15
|
+
attr_reader :candidate_nrs
|
16
|
+
|
17
|
+
def initialize(database_service, candidate_nrs)
|
18
|
+
@candidate_nrs = candidate_nrs
|
19
|
+
@cache = init_cache
|
20
|
+
|
21
|
+
@tables_created = []
|
22
|
+
super database_service
|
23
|
+
end
|
24
|
+
|
25
|
+
# @param [Integer] row_idx
|
26
|
+
# @param [Array] csv_row
|
27
|
+
def process_row(row_idx, csv_row) # rubocop:disable all
|
28
|
+
# Return if row is not in any named range
|
29
|
+
return unless row_bounds.cover?(row_idx)
|
30
|
+
|
31
|
+
# Iterate over the NR's, if its inside those bounds, add it
|
32
|
+
@candidate_nrs.each do |nr|
|
33
|
+
next unless nr.row_in_bounds?(row_idx)
|
34
|
+
|
35
|
+
if nr.filter.row_handler.has_content?
|
36
|
+
unless nr.filter.row_handler.including_proc[:where].call(csv_row)
|
37
|
+
nr.row_count += 1
|
38
|
+
next
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# Row is inbounds - we need to do two things, filter the content, create the table, insert the row
|
43
|
+
if nr.header_in_bounds?(nr.active_row_count(row_idx))
|
44
|
+
column_headers = csv_row[nr.section[0]..nr.section[1]]
|
45
|
+
|
46
|
+
if nr.filter.col_handler.has_content?
|
47
|
+
nr.filter.col_handler.set_col_positions(column_headers)
|
48
|
+
column_headers = nr.filter.col_handler.translate_headers(column_headers)
|
49
|
+
end
|
50
|
+
|
51
|
+
# Create table, flush cache
|
52
|
+
create_table(nr.name, column_headers)
|
53
|
+
@tables_created << nr.name
|
54
|
+
|
55
|
+
@cache[nr.name].each do |row|
|
56
|
+
insert_record(
|
57
|
+
nr.name,
|
58
|
+
nr.filter.col_handler.filter_row(row)
|
59
|
+
)
|
60
|
+
end
|
61
|
+
|
62
|
+
@cache[nr.name] = [] # Evict cache
|
63
|
+
|
64
|
+
next
|
65
|
+
end
|
66
|
+
|
67
|
+
next unless nr.content_in_bounds?(row_idx)
|
68
|
+
|
69
|
+
# 0. Remove unnecessary columns
|
70
|
+
|
71
|
+
|
72
|
+
# 2. If cache is over n records and if the table exists,
|
73
|
+
# add it to the database.
|
74
|
+
|
75
|
+
filtered_row = nr.filter.col_handler.filter_row(
|
76
|
+
csv_row[nr.section[0]..nr.section[1]]
|
77
|
+
)
|
78
|
+
|
79
|
+
if @tables_created.include?(nr.name)
|
80
|
+
@cache[nr.name] << filtered_row.unshift(row_idx)
|
81
|
+
|
82
|
+
if @cache[nr.name].size >= 10
|
83
|
+
insert_cache(nr.name)
|
84
|
+
next
|
85
|
+
end
|
86
|
+
else
|
87
|
+
# If the table doesnt exist, cache it for now.
|
88
|
+
@cache[nr.name] << filtered_row.unshift(row_idx)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def after_process
|
94
|
+
@cache.each do |named_range, cache|
|
95
|
+
insert_cache(named_range) if cache.size.positive?
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# @return [Range]
|
100
|
+
# This covers the max size of all named ranges
|
101
|
+
def row_bounds
|
102
|
+
@row_bounds ||= calculate_row_bounds
|
103
|
+
end
|
104
|
+
|
105
|
+
# @return [Range]
|
106
|
+
def calculate_row_bounds
|
107
|
+
positions = @candidate_nrs.map(&:row_bounds).flatten.sort
|
108
|
+
(positions.first..positions.last)
|
109
|
+
end
|
110
|
+
|
111
|
+
private
|
112
|
+
|
113
|
+
def init_cache
|
114
|
+
cache = {}
|
115
|
+
@candidate_nrs.each do |nr|
|
116
|
+
cache[nr.name] = []
|
117
|
+
end
|
118
|
+
|
119
|
+
cache
|
120
|
+
end
|
121
|
+
|
122
|
+
def insert_cache(named_range)
|
123
|
+
insert_batched_rows(named_range, @cache[named_range])
|
124
|
+
@cache[named_range] = []
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Cure
|
4
|
+
module Extract
|
5
|
+
class Variable
|
6
|
+
attr_reader :name, :location, :ref_name
|
7
|
+
|
8
|
+
def initialize(name, location, ref_name: "_default")
|
9
|
+
@name = name
|
10
|
+
@location = [Extract::CsvLookup.position_for_letter(location),
|
11
|
+
Extract::CsvLookup.position_for_digit(location)]
|
12
|
+
@ref_name = ref_name
|
13
|
+
end
|
14
|
+
|
15
|
+
def row_in_bounds?(row_idx)
|
16
|
+
row_bounds_range.cover?(row_idx)
|
17
|
+
end
|
18
|
+
|
19
|
+
# @return [Range]
|
20
|
+
def row_bounds_range
|
21
|
+
@row_bounds_range ||= (@location&.last..@location&.last)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "cure/log"
|
4
|
+
require "cure/config"
|
5
|
+
require "cure/extract/base_processor"
|
6
|
+
|
7
|
+
require "csv"
|
8
|
+
|
9
|
+
module Cure
|
10
|
+
module Extract
|
11
|
+
class VariableProcessor < BaseProcessor
|
12
|
+
|
13
|
+
# @return [Array<Extraction::Variable>] variables
|
14
|
+
attr_reader :candidate_variables
|
15
|
+
|
16
|
+
def initialize(database_service, candidate_variables)
|
17
|
+
super(database_service)
|
18
|
+
|
19
|
+
@candidate_variables = candidate_variables
|
20
|
+
@candidate_count = candidate_variables.length
|
21
|
+
@processed = 0
|
22
|
+
|
23
|
+
init_db
|
24
|
+
end
|
25
|
+
|
26
|
+
# @param [Integer] row_idx
|
27
|
+
# @param [Array] csv_row
|
28
|
+
def process_row(row_idx, csv_row)
|
29
|
+
# Return if row is not in any variable OR if all candidates are processed.
|
30
|
+
|
31
|
+
return if @candidate_count == @processed
|
32
|
+
return unless candidate_rows.include?(row_idx)
|
33
|
+
|
34
|
+
# Iterate over the NR's, if its inside those bounds, add it
|
35
|
+
@candidate_variables.each do |cv|
|
36
|
+
next unless cv.row_in_bounds?(row_idx)
|
37
|
+
|
38
|
+
insert_record(:variables, [nil, cv.name, csv_row[cv.location.first]])
|
39
|
+
@processed += 1
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# @return [Array]
|
44
|
+
def candidate_rows
|
45
|
+
@candidate_rows ||= @candidate_variables.map { |v| v.location.last }
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def init_db
|
51
|
+
return if @database_service.table_exist?(:variables)
|
52
|
+
|
53
|
+
create_table(:variables, %w[name value])
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Cure
|
4
|
+
module Generator
|
5
|
+
class BaseGenerator
|
6
|
+
# @return [Hash]
|
7
|
+
attr_accessor :options
|
8
|
+
|
9
|
+
def initialize(options={})
|
10
|
+
@options = options
|
11
|
+
end
|
12
|
+
|
13
|
+
# @param [Object/Nil] source_value
|
14
|
+
# @param [RowCtx/Nil] row_ctx
|
15
|
+
# @return [String]
|
16
|
+
def generate(source_value, row_ctx)
|
17
|
+
translated = _generate(source_value, row_ctx)
|
18
|
+
translated = "#{prefix}#{translated}" if prefix
|
19
|
+
translated = "#{translated}#{suffix}" if suffix
|
20
|
+
translated
|
21
|
+
end
|
22
|
+
|
23
|
+
def describe
|
24
|
+
_describe
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
# @param [RowCtx] _row_ctx
|
30
|
+
# @param [Object/Nil] _source_value
|
31
|
+
# @return [String]
|
32
|
+
def _generate(_source_value, _row_ctx)
|
33
|
+
raise NotImplementedError, "#{self.class} has not implemented method '#{__method__}'"
|
34
|
+
end
|
35
|
+
|
36
|
+
def _describe
|
37
|
+
raise NotImplementedError, "#{self.class} has not implemented method '#{__method__}'"
|
38
|
+
end
|
39
|
+
|
40
|
+
def prefix(default=nil)
|
41
|
+
extract_property("prefix", default)
|
42
|
+
end
|
43
|
+
|
44
|
+
def suffix(default=nil)
|
45
|
+
extract_property("suffix", default)
|
46
|
+
end
|
47
|
+
|
48
|
+
def length(default=nil)
|
49
|
+
extract_property("length", default)
|
50
|
+
end
|
51
|
+
|
52
|
+
def property_name(default=nil)
|
53
|
+
extract_property("name", default)
|
54
|
+
end
|
55
|
+
|
56
|
+
def extract_property(property, default_val)
|
57
|
+
@options.fetch(property.to_sym, @options.fetch(property, default_val))
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "cure/generator/base_generator"
|
4
|
+
|
5
|
+
module Cure
|
6
|
+
module Generator
|
7
|
+
class CaseGenerator < BaseGenerator
|
8
|
+
private
|
9
|
+
|
10
|
+
# @param [Object] source_value
|
11
|
+
# @param [RowCtx] _row_ctx
|
12
|
+
def _generate(source_value, _row_ctx)
|
13
|
+
result = case_options.fetch(:switch)
|
14
|
+
.find { |opts| opts[:case] == source_value }
|
15
|
+
&.fetch(:return_value, nil)
|
16
|
+
|
17
|
+
return result if result
|
18
|
+
|
19
|
+
case_options.fetch(:else, {}).fetch(:return_value, nil)
|
20
|
+
end
|
21
|
+
|
22
|
+
# @return [Hash]
|
23
|
+
def case_options
|
24
|
+
@case_options ||= extract_property("statement", nil)
|
25
|
+
end
|
26
|
+
|
27
|
+
def _describe
|
28
|
+
"Will match source value against a value included in #{case_options}"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|