data_collector 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f8b064bcc0483a98014157586cb18473d240486f3e4c1550b9eeba0cc026af6b
4
- data.tar.gz: 503c959ca4d3ac06e65e41f0f721aed795b27871d220d676eeb6bc7b81a169d1
3
+ metadata.gz: 78a054b825d99611c8f5070120c11b478975152433b3c76b9a63a6694b7c8cd0
4
+ data.tar.gz: f11143808a8e1647a791b0e12e1c5855dc1589de37f7ea50c0e8789f88538774
5
5
  SHA512:
6
- metadata.gz: 34b2b890765598022e97ee914d6159bd6741ac6d2f711189c81144e880a6590b74183355417d32c3cf39197aad0be914c5d6c6c0089379beac3e14c4bf0b1b01
7
- data.tar.gz: 860aa1bab4a0a0c580840f12bf42f77b76598a7433b033211d200647117c91754b49c76540cb52bf58a96184498293c8c0e73659481a7a938277dd53cbd9f7e1
6
+ metadata.gz: 0b3396b16e5500add7dbb8e2883e5bfd8e14ba898ce7291ed876ed1a423a59a2dc9bba42e1c96150a7119284398c58c18daf8ea7dc82844e3cee55d9ea17fe7f
7
+ data.tar.gz: a7f7821eb19e50918321b8d1b47caa4ab9d04d40b2918a502f86512fbe25b8c35b4bee0705eb1044ade83cdb7f0c0f45a4023363e14c8153283ed9bdfec32f1a
data/README.md CHANGED
@@ -1,10 +1,15 @@
1
1
  # DataCollector
2
- Convenience module to Extract, Transform and Load your data.
3
- You have 3 main objects that you can use for ETL => INPUT, OUTPUT and FILTER
4
- and support objects like CONFIG, LOG, RULES and the new RULES_NG
2
+ Convenience module to Extract, Transform and Load your data.
3
+ You have main objects that help you to 'INPUT', 'OUTPUT' and 'FILTER' data. The basic ETL components.
4
+ Support objects like CONFIG, LOG, RULES and the new RULES_NG just to make life easier.
5
5
 
6
6
  Including the DataCollector::Core module into your application gives you access to these objects.
7
7
 
8
+ The RULES and RULES_NG objects work in a very simple concept. Rules exist of 3 components:
9
+ - a destination tag
10
+ - a jsonpath filter to get the data
11
+ - a lambda to execute on every filter hit
12
+
8
13
 
9
14
  #### input
10
15
  Read input from an URI. This URI can have a http, https or file scheme
@@ -92,7 +97,7 @@ filter data from a hash using [JSONPath](http://goessner.net/articles/JsonPath/i
92
97
  filtered_data = filter(data, "$..metadata.record")
93
98
  ```
94
99
 
95
- #### rules
100
+ #### rules (depricated)
96
101
  See newer rules_ng object
97
102
  Allows you to define a simple lambda structure to run against a JSONPath filter
98
103
 
@@ -138,6 +143,90 @@ RULE_SET
138
143
  SUFFIX
139
144
  ```
140
145
 
146
+ ##### Examples
147
+
148
+ Here you find different rule combination that are possible
149
+
150
+ ``` ruby
151
+ RULE_SETS = {
152
+ 'rs_only_filter' => {
153
+ 'only_filter' => "$.title"
154
+ },
155
+ 'rs_only_text' => {
156
+ 'plain_text_tag' => {
157
+ 'text' => 'hello world'
158
+ }
159
+ },
160
+ 'rs_text_with_suffix' => {
161
+ 'text_tag_with_suffix' => {
162
+ 'text' => ['hello_world', {'suffix' => '-suffix'}]
163
+ }
164
+ },
165
+ 'rs_map_with_json_filter' => {
166
+ 'language' => {
167
+ '@' => {'nl' => 'dut', 'fr' => 'fre', 'de' => 'ger', 'en' => 'eng'}
168
+ }
169
+ },
170
+ 'rs_hash_with_json_filter' => {
171
+ 'multiple_of_2' => {
172
+ '@' => lambda { |d| d.to_i * 2 }
173
+ }
174
+ },
175
+ 'rs_hash_with_multiple_json_filter' => {
176
+ 'multiple_of' => [
177
+ {'@' => lambda { |d| d.to_i * 2 }},
178
+ {'@' => lambda { |d| d.to_i * 3 }}
179
+ ]
180
+ },
181
+ 'rs_hash_with_json_filter_and_suffix' => {
182
+ 'multiple_of_with_suffix' => {
183
+ '@' => [lambda {|d| d.to_i*2}, 'suffix' => '-multiple_of_2']
184
+ }
185
+ },
186
+ 'rs_hash_with_json_filter_and_multiple_lambdas' => {
187
+ 'multiple_lambdas' => {
188
+ '@' => [lambda {|d| d.to_i*2}, lambda {|d| Math.sqrt(d.to_i) }]
189
+ }
190
+ },
191
+ 'rs_hash_with_json_filter_and_option' => {
192
+ 'subjects' => {
193
+ '$..subject' => [
194
+ lambda {|d,o|
195
+ {
196
+ doc_id: o['id'],
197
+ subject: d
198
+ }
199
+ }
200
+ ]
201
+ }
202
+ }
203
+ ```
204
+
205
+ Here is an example on how to call last RULESET "rs_hash_with_json_filter_and_option".
206
+
207
+ ***rules_ng.run*** can have 4 parameters. First 3 are mandatory. The last one ***options*** can hold data static to a rule set.
208
+
209
+ ```ruby
210
+ include DataCollector::Core
211
+ output.clear
212
+ data = {'subject' => ['water', 'thermodynamics']}
213
+
214
+ rules_ng.run(RULE_SETS['rs_hash_with_json_filter_and_option'], data, output, {'id' => 1})
215
+
216
+ ```
217
+
218
+ Results in:
219
+ ```json
220
+ {
221
+ "subjects":[
222
+ {"doc_id":1,"subject":"water"},
223
+ {"doc_id":1,"subject":"thermodynamics"}
224
+ ]
225
+ }
226
+ ```
227
+
228
+
229
+
141
230
  #### config
142
231
  config is an object that points to "config.yml" you can read and/or store data to this object.
143
232
 
@@ -41,6 +41,7 @@ module DataCollector
41
41
  else
42
42
  data = rule_payload.select { |s| s.is_a?(String) }
43
43
  rule_payload = rule_payload.delete_if { |s| s.is_a?(String) }
44
+ rule_payload = "@" if rule_payload.empty?
44
45
  # if rule_payload.size == 1
45
46
  # rule_payload = rule_payload.first
46
47
  # end
@@ -88,9 +89,9 @@ module DataCollector
88
89
  output_data = [input_data]
89
90
  end
90
91
 
91
- output_data = output_data.first if output_data.is_a?(Array) && output_data.size == 1
92
92
  output_data.compact! if output_data.is_a?(Array)
93
93
  output_data.flatten! if output_data.is_a?(Array)# || output_data.is_a?(Hash)
94
+ output_data = output_data.first if output_data.is_a?(Array) && output_data.size == 1
94
95
  output_data
95
96
  end
96
97
 
@@ -1,4 +1,4 @@
1
1
  # encoding: utf-8
2
2
  module DataCollector
3
- VERSION = "0.5.0"
3
+ VERSION = "0.6.0"
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_collector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mehmet Celik
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-04-01 00:00:00.000000000 Z
11
+ date: 2020-04-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri