data_collector 0.5.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +93 -4
- data/lib/data_collector/rules_ng.rb +2 -1
- data/lib/data_collector/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 78a054b825d99611c8f5070120c11b478975152433b3c76b9a63a6694b7c8cd0
|
4
|
+
data.tar.gz: f11143808a8e1647a791b0e12e1c5855dc1589de37f7ea50c0e8789f88538774
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0b3396b16e5500add7dbb8e2883e5bfd8e14ba898ce7291ed876ed1a423a59a2dc9bba42e1c96150a7119284398c58c18daf8ea7dc82844e3cee55d9ea17fe7f
|
7
|
+
data.tar.gz: a7f7821eb19e50918321b8d1b47caa4ab9d04d40b2918a502f86512fbe25b8c35b4bee0705eb1044ade83cdb7f0c0f45a4023363e14c8153283ed9bdfec32f1a
|
data/README.md
CHANGED
@@ -1,10 +1,15 @@
|
|
1
1
|
# DataCollector
|
2
|
-
Convenience module to Extract, Transform and Load your data.
|
3
|
-
You have
|
4
|
-
|
2
|
+
Convenience module to Extract, Transform and Load your data.
|
3
|
+
You have main objects that help you to 'INPUT', 'OUTPUT' and 'FILTER' data. The basic ETL components.
|
4
|
+
Support objects like CONFIG, LOG, RULES and the new RULES_NG just to make life easier.
|
5
5
|
|
6
6
|
Including the DataCollector::Core module into your application gives you access to these objects.
|
7
7
|
|
8
|
+
The RULES and RULES_NG objects work in a very simple concept. Rules exist of 3 components:
|
9
|
+
- a destination tag
|
10
|
+
- a jsonpath filter to get the data
|
11
|
+
- a lambda to execute on every filter hit
|
12
|
+
|
8
13
|
|
9
14
|
#### input
|
10
15
|
Read input from an URI. This URI can have a http, https or file scheme
|
@@ -92,7 +97,7 @@ filter data from a hash using [JSONPath](http://goessner.net/articles/JsonPath/i
|
|
92
97
|
filtered_data = filter(data, "$..metadata.record")
|
93
98
|
```
|
94
99
|
|
95
|
-
#### rules
|
100
|
+
#### rules (depricated)
|
96
101
|
See newer rules_ng object
|
97
102
|
Allows you to define a simple lambda structure to run against a JSONPath filter
|
98
103
|
|
@@ -138,6 +143,90 @@ RULE_SET
|
|
138
143
|
SUFFIX
|
139
144
|
```
|
140
145
|
|
146
|
+
##### Examples
|
147
|
+
|
148
|
+
Here you find different rule combination that are possible
|
149
|
+
|
150
|
+
``` ruby
|
151
|
+
RULE_SETS = {
|
152
|
+
'rs_only_filter' => {
|
153
|
+
'only_filter' => "$.title"
|
154
|
+
},
|
155
|
+
'rs_only_text' => {
|
156
|
+
'plain_text_tag' => {
|
157
|
+
'text' => 'hello world'
|
158
|
+
}
|
159
|
+
},
|
160
|
+
'rs_text_with_suffix' => {
|
161
|
+
'text_tag_with_suffix' => {
|
162
|
+
'text' => ['hello_world', {'suffix' => '-suffix'}]
|
163
|
+
}
|
164
|
+
},
|
165
|
+
'rs_map_with_json_filter' => {
|
166
|
+
'language' => {
|
167
|
+
'@' => {'nl' => 'dut', 'fr' => 'fre', 'de' => 'ger', 'en' => 'eng'}
|
168
|
+
}
|
169
|
+
},
|
170
|
+
'rs_hash_with_json_filter' => {
|
171
|
+
'multiple_of_2' => {
|
172
|
+
'@' => lambda { |d| d.to_i * 2 }
|
173
|
+
}
|
174
|
+
},
|
175
|
+
'rs_hash_with_multiple_json_filter' => {
|
176
|
+
'multiple_of' => [
|
177
|
+
{'@' => lambda { |d| d.to_i * 2 }},
|
178
|
+
{'@' => lambda { |d| d.to_i * 3 }}
|
179
|
+
]
|
180
|
+
},
|
181
|
+
'rs_hash_with_json_filter_and_suffix' => {
|
182
|
+
'multiple_of_with_suffix' => {
|
183
|
+
'@' => [lambda {|d| d.to_i*2}, 'suffix' => '-multiple_of_2']
|
184
|
+
}
|
185
|
+
},
|
186
|
+
'rs_hash_with_json_filter_and_multiple_lambdas' => {
|
187
|
+
'multiple_lambdas' => {
|
188
|
+
'@' => [lambda {|d| d.to_i*2}, lambda {|d| Math.sqrt(d.to_i) }]
|
189
|
+
}
|
190
|
+
},
|
191
|
+
'rs_hash_with_json_filter_and_option' => {
|
192
|
+
'subjects' => {
|
193
|
+
'$..subject' => [
|
194
|
+
lambda {|d,o|
|
195
|
+
{
|
196
|
+
doc_id: o['id'],
|
197
|
+
subject: d
|
198
|
+
}
|
199
|
+
}
|
200
|
+
]
|
201
|
+
}
|
202
|
+
}
|
203
|
+
```
|
204
|
+
|
205
|
+
Here is an example on how to call last RULESET "rs_hash_with_json_filter_and_option".
|
206
|
+
|
207
|
+
***rules_ng.run*** can have 4 parameters. First 3 are mandatory. The last one ***options*** can hold data static to a rule set.
|
208
|
+
|
209
|
+
```ruby
|
210
|
+
include DataCollector::Core
|
211
|
+
output.clear
|
212
|
+
data = {'subject' => ['water', 'thermodynamics']}
|
213
|
+
|
214
|
+
rules_ng.run(RULE_SETS['rs_hash_with_json_filter_and_option'], data, output, {'id' => 1})
|
215
|
+
|
216
|
+
```
|
217
|
+
|
218
|
+
Results in:
|
219
|
+
```json
|
220
|
+
{
|
221
|
+
"subjects":[
|
222
|
+
{"doc_id":1,"subject":"water"},
|
223
|
+
{"doc_id":1,"subject":"thermodynamics"}
|
224
|
+
]
|
225
|
+
}
|
226
|
+
```
|
227
|
+
|
228
|
+
|
229
|
+
|
141
230
|
#### config
|
142
231
|
config is an object that points to "config.yml" you can read and/or store data to this object.
|
143
232
|
|
@@ -41,6 +41,7 @@ module DataCollector
|
|
41
41
|
else
|
42
42
|
data = rule_payload.select { |s| s.is_a?(String) }
|
43
43
|
rule_payload = rule_payload.delete_if { |s| s.is_a?(String) }
|
44
|
+
rule_payload = "@" if rule_payload.empty?
|
44
45
|
# if rule_payload.size == 1
|
45
46
|
# rule_payload = rule_payload.first
|
46
47
|
# end
|
@@ -88,9 +89,9 @@ module DataCollector
|
|
88
89
|
output_data = [input_data]
|
89
90
|
end
|
90
91
|
|
91
|
-
output_data = output_data.first if output_data.is_a?(Array) && output_data.size == 1
|
92
92
|
output_data.compact! if output_data.is_a?(Array)
|
93
93
|
output_data.flatten! if output_data.is_a?(Array)# || output_data.is_a?(Hash)
|
94
|
+
output_data = output_data.first if output_data.is_a?(Array) && output_data.size == 1
|
94
95
|
output_data
|
95
96
|
end
|
96
97
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_collector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mehmet Celik
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-04-
|
11
|
+
date: 2020-04-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|