data_collector 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +32 -0
- data/lib/data_collector/core.rb +27 -0
- data/lib/data_collector/rules.rb +1 -1
- data/lib/data_collector/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b38644344f6bcfa74d68a2281b83c803879334fbd0f281fde4de8f92b180c5ec
|
4
|
+
data.tar.gz: 582650f6f0237f4a45c45ab137b6eac0110117b77a7b917eae448d17285ae999
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 550f42750545164f6e7f47347144023be9c1bb58ff661886e1a5ef51c2804fcea8d75b87c65ce29b6a288a3e24bcbfce26f5987c17424cdb1023fda510675b49
|
7
|
+
data.tar.gz: ee0d60db7824fda4688c0c41aed8f8e2884aeb5d9e0b4178c53685a4fcc44e3ef93f848f3b597d6c7b8fdb85a64ffe155f03e9e6bc59fcf54fc109b662a5d892
|
data/README.md
CHANGED
@@ -73,6 +73,38 @@ filter data from a hash using [JsonPath](http://goessner.net/articles/JsonPath/i
|
|
73
73
|
filtered_data = filter(data, "$..metadata.record")
|
74
74
|
```
|
75
75
|
|
76
|
+
#### rules
|
77
|
+
|
78
|
+
rules allows you to define a simple structure to run against a JSONPath filter
|
79
|
+
|
80
|
+
A rule is made up of a Hash the key is the map key field its value is a Hash with a JSONPath filter and options to apply a convert method on the filtered results.
|
81
|
+
Available convert methods are: time, map, each, call, suffix
|
82
|
+
- time: Parses a given time/date string into a Time object
|
83
|
+
- map: applies a mapping to a filter
|
84
|
+
- suffix: adds a suffix to a result
|
85
|
+
- call: executes a lambda on the filter
|
86
|
+
- each: runs a lambda on each row of a filter
|
87
|
+
|
88
|
+
example:
|
89
|
+
```ruby
|
90
|
+
my_rules = {
|
91
|
+
'identifier' => {"filter" => '$..id'},
|
92
|
+
'language' => {'filter' => '$..lang',
|
93
|
+
'options' => {'convert' => 'map',
|
94
|
+
'map' => {'nl' => 'dut', 'fr' => 'fre', 'de' => 'ger', 'en' => 'eng'}
|
95
|
+
}
|
96
|
+
},
|
97
|
+
'subject' => {'filter' => '$..keywords',
|
98
|
+
options' => {'convert' => 'each',
|
99
|
+
'lambda' => lambda {|d| d.split(',')}
|
100
|
+
}
|
101
|
+
},
|
102
|
+
'creationdate' => {'filter' => '$..published_date', 'convert' => 'time'}
|
103
|
+
}
|
104
|
+
|
105
|
+
rules.run(my_rules, record, output)
|
106
|
+
```
|
107
|
+
|
76
108
|
#### config
|
77
109
|
config is an object that points to "config.yml" you can read and/or store data to this object.
|
78
110
|
|
data/lib/data_collector/core.rb
CHANGED
@@ -47,6 +47,33 @@ module DataCollector
|
|
47
47
|
@output ||= Output.new
|
48
48
|
end
|
49
49
|
|
50
|
+
#You can apply rules to input
|
51
|
+
# A rule is made up of a Hash the key is the map key field its value is a Hash with a JSONPath filter and
|
52
|
+
# options to apply a convert method on the filtered results.
|
53
|
+
#
|
54
|
+
# available convert methods are: time, map, each, call, suffix
|
55
|
+
# - time: Parses a given time/date string into a Time object
|
56
|
+
# - map: applies a mapping to a filter
|
57
|
+
# - suffix: adds a suffix to a result
|
58
|
+
# - call: executes a lambda on the filter
|
59
|
+
# - each: runs a lambda on each row of a filter
|
60
|
+
#
|
61
|
+
# example:
|
62
|
+
# my_rules = {
|
63
|
+
# 'identifier' => {"filter" => '$..id'},
|
64
|
+
# 'language' => {'filter' => '$..lang',
|
65
|
+
# 'options' => {'convert' => 'map',
|
66
|
+
# 'map' => {'nl' => 'dut', 'fr' => 'fre', 'de' => 'ger', 'en' => 'eng'}
|
67
|
+
# }
|
68
|
+
# },
|
69
|
+
# 'subject' => {'filter' => '$..keywords',
|
70
|
+
# options' => {'convert' => 'each',
|
71
|
+
# 'lambda' => lambda {|d| d.split(',')}
|
72
|
+
# }
|
73
|
+
# },
|
74
|
+
# 'creationdate' => {'filter' => '$..published_date', 'convert' => 'time'}
|
75
|
+
# }
|
76
|
+
# rules.run(my_rules, input, output)
|
50
77
|
def rules
|
51
78
|
@rules ||= Rules.new
|
52
79
|
end
|
data/lib/data_collector/rules.rb
CHANGED
@@ -25,7 +25,7 @@ module DataCollector
|
|
25
25
|
|
26
26
|
private
|
27
27
|
def apply_rule(map_to_key, rule, from_record, to_record)
|
28
|
-
if
|
28
|
+
if rule.has_key?('options') && rule['options'].has_key?('convert') && rule['options']['convert'].eql?('each')
|
29
29
|
result = get_value_for(map_to_key, rule['filter'], from_record, rule['options'])
|
30
30
|
|
31
31
|
if result.is_a?(Array)
|