normalizator 0.0.0 → 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/lib/normalizator/normalize.rb +30 -10
- data/lib/normalizator/version.rb +1 -1
- data/lib/rules/base_rule.rb +4 -1
- data/lib/rules/enum_rule.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 672c1219025fa763f47905ac4d2493d664d336d8cf903a0fc29b27a3048f7942
|
4
|
+
data.tar.gz: 6e9d98d3c34cd74ea9036eb69b17f610ac418e21f0a1facb0e5969de19ae22d2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a7097277a6e49c229fc94374170f229ea07b138bc9d39f032de914882f885c3c8f53fdd577f5f84feeb83b8940774fec620a820bde13eb5ea92ac3adaecc395a
|
7
|
+
data.tar.gz: 763d26a3923f7deaad959330542f6c15df1eb96effb6f7eff45631bfcd53811a19748cbf13d8fadacf06f0200664fc4218d151807c136214b1d2ac3658d47795
|
data/.gitignore
CHANGED
@@ -30,13 +30,13 @@ module Normalizator
|
|
30
30
|
def run_rules_on_row(original_row)
|
31
31
|
new_row = @options[:exclude_fileds_without_rule] ? {} : original_row.clone
|
32
32
|
|
33
|
-
@rules_keys.each do |
|
34
|
-
if
|
35
|
-
next if should_skip_mutly_field_rule(
|
36
|
-
run_rules_on_multy_value(
|
33
|
+
@rules_keys.each do |rule_field|
|
34
|
+
if rule_field.instance_of? Array
|
35
|
+
next if should_skip_mutly_field_rule?(rule_field, original_row)
|
36
|
+
run_rules_on_multy_value(rule_field, new_row, original_row)
|
37
37
|
else
|
38
|
-
next if should_skip_rule(
|
39
|
-
|
38
|
+
next if should_skip_rule?(rule_field, new_row, original_row)
|
39
|
+
run_rules_on_single_value(rule_field, new_row, original_row)
|
40
40
|
end
|
41
41
|
end
|
42
42
|
|
@@ -44,30 +44,42 @@ module Normalizator
|
|
44
44
|
end
|
45
45
|
|
46
46
|
def run_rules_on_multy_value(multy_field, new_row, original_row)
|
47
|
-
|
47
|
+
rules = @rules[multy_field]
|
48
|
+
runs_on_derived_value = should_provide_derived_value?(rules)
|
49
|
+
values = multy_field.map { |sub_field| runs_on_derived_value ? new_row[sub_field] : original_row[sub_field] }
|
48
50
|
|
49
|
-
normalized_values = run_rules_on_value(
|
51
|
+
normalized_values = run_rules_on_value(rules, values, original_row)
|
50
52
|
|
51
53
|
multy_field.each_with_index do |sub_field, index|
|
52
54
|
new_row[sub_field] = normalized_values[index]
|
53
55
|
end
|
54
56
|
end
|
55
57
|
|
58
|
+
def run_rules_on_single_value(field, new_row, original_row)
|
59
|
+
rules = @rules[field]
|
60
|
+
runs_on_derived_value = should_provide_derived_value?(rules)
|
61
|
+
value = runs_on_derived_value ? new_row[field] : original_row[field]
|
62
|
+
|
63
|
+
new_row[field] = run_rules_on_value(rules, value, original_row)
|
64
|
+
end
|
65
|
+
|
56
66
|
def run_rules_on_value(rules, values, original_row)
|
57
67
|
if rules.instance_of? Array
|
58
68
|
ruled_field = values
|
59
69
|
|
60
70
|
rules.each do |rule|
|
71
|
+
raise(RuleError, 'Custom rules should implement .apply method') unless rule.respond_to?(:apply)
|
61
72
|
ruled_field = rule.apply(ruled_field, original_row)
|
62
73
|
end
|
63
74
|
|
64
75
|
ruled_field
|
65
76
|
else
|
77
|
+
raise(RuleError, 'Custom rules should implement .apply method') unless rules.respond_to?(:apply)
|
66
78
|
rules.apply(values, original_row)
|
67
79
|
end
|
68
80
|
end
|
69
81
|
|
70
|
-
def should_skip_rule(field, new_row, original_row)
|
82
|
+
def should_skip_rule?(field, new_row, original_row)
|
71
83
|
unless original_row.key? field
|
72
84
|
return true if @options[:ignore_unmatched_rules]
|
73
85
|
|
@@ -76,8 +88,16 @@ module Normalizator
|
|
76
88
|
end
|
77
89
|
end
|
78
90
|
|
79
|
-
def should_skip_mutly_field_rule(multy_field, original_row)
|
91
|
+
def should_skip_mutly_field_rule?(multy_field, original_row)
|
80
92
|
multy_field.any? { |sub_field| !original_row.key?(sub_field) }
|
81
93
|
end
|
94
|
+
|
95
|
+
def should_provide_derived_value?(rules)
|
96
|
+
if rules.instance_of? Array
|
97
|
+
rules[0].options[:runs_on_derived_value]
|
98
|
+
else
|
99
|
+
rules.options[:runs_on_derived_value]
|
100
|
+
end
|
101
|
+
end
|
82
102
|
end
|
83
103
|
end
|
data/lib/normalizator/version.rb
CHANGED
data/lib/rules/base_rule.rb
CHANGED
@@ -1,9 +1,12 @@
|
|
1
1
|
# Normalizator::BaseRule module
|
2
2
|
module Normalizator
|
3
3
|
class BaseRule
|
4
|
+
attr_reader :options
|
5
|
+
|
4
6
|
DEFAULT_OPTIONS = {
|
5
7
|
return_original_on_failure: true,
|
6
|
-
default_value_on_failure: nil
|
8
|
+
default_value_on_failure: nil,
|
9
|
+
runs_on_derived_value: false
|
7
10
|
}.freeze
|
8
11
|
|
9
12
|
def initialize(options = {})
|
data/lib/rules/enum_rule.rb
CHANGED
@@ -40,7 +40,7 @@ module Normalizator
|
|
40
40
|
sanitized_enum = enum.downcase.strip
|
41
41
|
|
42
42
|
return [enum, true] if sanitized_value == sanitized_enum
|
43
|
-
return [enum, false] if @options[:diffuse] && sanitized_enum.include?(sanitized_value)
|
43
|
+
return [enum, false] if @options[:diffuse] && !sanitized_value.empty? && sanitized_enum.include?(sanitized_value)
|
44
44
|
|
45
45
|
[best_match, false]
|
46
46
|
end
|