data_collector 0.5.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f8b064bcc0483a98014157586cb18473d240486f3e4c1550b9eeba0cc026af6b
4
- data.tar.gz: 503c959ca4d3ac06e65e41f0f721aed795b27871d220d676eeb6bc7b81a169d1
3
+ metadata.gz: 78a054b825d99611c8f5070120c11b478975152433b3c76b9a63a6694b7c8cd0
4
+ data.tar.gz: f11143808a8e1647a791b0e12e1c5855dc1589de37f7ea50c0e8789f88538774
5
5
  SHA512:
6
- metadata.gz: 34b2b890765598022e97ee914d6159bd6741ac6d2f711189c81144e880a6590b74183355417d32c3cf39197aad0be914c5d6c6c0089379beac3e14c4bf0b1b01
7
- data.tar.gz: 860aa1bab4a0a0c580840f12bf42f77b76598a7433b033211d200647117c91754b49c76540cb52bf58a96184498293c8c0e73659481a7a938277dd53cbd9f7e1
6
+ metadata.gz: 0b3396b16e5500add7dbb8e2883e5bfd8e14ba898ce7291ed876ed1a423a59a2dc9bba42e1c96150a7119284398c58c18daf8ea7dc82844e3cee55d9ea17fe7f
7
+ data.tar.gz: a7f7821eb19e50918321b8d1b47caa4ab9d04d40b2918a502f86512fbe25b8c35b4bee0705eb1044ade83cdb7f0c0f45a4023363e14c8153283ed9bdfec32f1a
data/README.md CHANGED
@@ -1,10 +1,15 @@
1
1
  # DataCollector
2
- Convenience module to Extract, Transform and Load your data.
3
- You have 3 main objects that you can use for ETL => INPUT, OUTPUT and FILTER
4
- and support objects like CONFIG, LOG, RULES and the new RULES_NG
2
+ Convenience module to Extract, Transform and Load your data.
3
+ You have main objects that help you to 'INPUT', 'OUTPUT' and 'FILTER' data. The basic ETL components.
4
+ Support objects like CONFIG, LOG, RULES and the new RULES_NG just to make life easier.
5
5
 
6
6
  Including the DataCollector::Core module into your application gives you access to these objects.
7
7
 
8
+ The RULES and RULES_NG objects work in a very simple concept. Rules exist of 3 components:
9
+ - a destination tag
10
+ - a jsonpath filter to get the data
11
+ - a lambda to execute on every filter hit
12
+
8
13
 
9
14
  #### input
10
15
  Read input from an URI. This URI can have a http, https or file scheme
@@ -92,7 +97,7 @@ filter data from a hash using [JSONPath](http://goessner.net/articles/JsonPath/i
92
97
  filtered_data = filter(data, "$..metadata.record")
93
98
  ```
94
99
 
95
- #### rules
100
+ #### rules (depricated)
96
101
  See newer rules_ng object
97
102
  Allows you to define a simple lambda structure to run against a JSONPath filter
98
103
 
@@ -138,6 +143,90 @@ RULE_SET
138
143
  SUFFIX
139
144
  ```
140
145
 
146
+ ##### Examples
147
+
148
+ Here you find different rule combination that are possible
149
+
150
+ ``` ruby
151
+ RULE_SETS = {
152
+ 'rs_only_filter' => {
153
+ 'only_filter' => "$.title"
154
+ },
155
+ 'rs_only_text' => {
156
+ 'plain_text_tag' => {
157
+ 'text' => 'hello world'
158
+ }
159
+ },
160
+ 'rs_text_with_suffix' => {
161
+ 'text_tag_with_suffix' => {
162
+ 'text' => ['hello_world', {'suffix' => '-suffix'}]
163
+ }
164
+ },
165
+ 'rs_map_with_json_filter' => {
166
+ 'language' => {
167
+ '@' => {'nl' => 'dut', 'fr' => 'fre', 'de' => 'ger', 'en' => 'eng'}
168
+ }
169
+ },
170
+ 'rs_hash_with_json_filter' => {
171
+ 'multiple_of_2' => {
172
+ '@' => lambda { |d| d.to_i * 2 }
173
+ }
174
+ },
175
+ 'rs_hash_with_multiple_json_filter' => {
176
+ 'multiple_of' => [
177
+ {'@' => lambda { |d| d.to_i * 2 }},
178
+ {'@' => lambda { |d| d.to_i * 3 }}
179
+ ]
180
+ },
181
+ 'rs_hash_with_json_filter_and_suffix' => {
182
+ 'multiple_of_with_suffix' => {
183
+ '@' => [lambda {|d| d.to_i*2}, 'suffix' => '-multiple_of_2']
184
+ }
185
+ },
186
+ 'rs_hash_with_json_filter_and_multiple_lambdas' => {
187
+ 'multiple_lambdas' => {
188
+ '@' => [lambda {|d| d.to_i*2}, lambda {|d| Math.sqrt(d.to_i) }]
189
+ }
190
+ },
191
+ 'rs_hash_with_json_filter_and_option' => {
192
+ 'subjects' => {
193
+ '$..subject' => [
194
+ lambda {|d,o|
195
+ {
196
+ doc_id: o['id'],
197
+ subject: d
198
+ }
199
+ }
200
+ ]
201
+ }
202
+ }
203
+ ```
204
+
205
+ Here is an example on how to call last RULESET "rs_hash_with_json_filter_and_option".
206
+
207
+ ***rules_ng.run*** can have 4 parameters. First 3 are mandatory. The last one ***options*** can hold data static to a rule set.
208
+
209
+ ```ruby
210
+ include DataCollector::Core
211
+ output.clear
212
+ data = {'subject' => ['water', 'thermodynamics']}
213
+
214
+ rules_ng.run(RULE_SETS['rs_hash_with_json_filter_and_option'], data, output, {'id' => 1})
215
+
216
+ ```
217
+
218
+ Results in:
219
+ ```json
220
+ {
221
+ "subjects":[
222
+ {"doc_id":1,"subject":"water"},
223
+ {"doc_id":1,"subject":"thermodynamics"}
224
+ ]
225
+ }
226
+ ```
227
+
228
+
229
+
141
230
  #### config
142
231
  config is an object that points to "config.yml" you can read and/or store data to this object.
143
232
 
@@ -41,6 +41,7 @@ module DataCollector
41
41
  else
42
42
  data = rule_payload.select { |s| s.is_a?(String) }
43
43
  rule_payload = rule_payload.delete_if { |s| s.is_a?(String) }
44
+ rule_payload = "@" if rule_payload.empty?
44
45
  # if rule_payload.size == 1
45
46
  # rule_payload = rule_payload.first
46
47
  # end
@@ -88,9 +89,9 @@ module DataCollector
88
89
  output_data = [input_data]
89
90
  end
90
91
 
91
- output_data = output_data.first if output_data.is_a?(Array) && output_data.size == 1
92
92
  output_data.compact! if output_data.is_a?(Array)
93
93
  output_data.flatten! if output_data.is_a?(Array)# || output_data.is_a?(Hash)
94
+ output_data = output_data.first if output_data.is_a?(Array) && output_data.size == 1
94
95
  output_data
95
96
  end
96
97
 
@@ -1,4 +1,4 @@
1
1
  # encoding: utf-8
2
2
  module DataCollector
3
- VERSION = "0.5.0"
3
+ VERSION = "0.6.0"
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_collector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mehmet Celik
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-04-01 00:00:00.000000000 Z
11
+ date: 2020-04-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri