data_collector 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +23 -3
- data/data_collector.gemspec +1 -1
- data/lib/data_collector/core.rb +6 -0
- data/lib/data_collector/rules.rb +55 -22
- data/lib/data_collector/rules_ng.rb +88 -0
- data/lib/data_collector/version.rb +1 -1
- metadata +8 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5fb3cc5c42f536a8bdc99421eb6f46972c6c97597f5c0af63e96463f320837ef
|
4
|
+
data.tar.gz: d377133bf042985f6d68b2bb2960d9c4ddae169a39d4602df4a3f4003d6b6434
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cd48af243e1615e8cafaf8ed1ab4d9019661e3205286fbd32c9ec4bb3e2faa7e07025a62b378062f10dfc0f1775acb401ad4cb901851cbbaf4cb78a2e7c6474a
|
7
|
+
data.tar.gz: c92eee26040f89ab0b691ba6bf4ad9792222e545e82bb9ebea8b4a4941831615a74d9f389e58d6feccf89ba42c6dd5a02c55bfed996fc670cd1d9600d5023476
|
data/README.md
CHANGED
@@ -1,8 +1,13 @@
|
|
1
1
|
# DataCollector
|
2
2
|
Convenience module to Extract, Transform and Load your data.
|
3
|
+
You have 3 main objects that you can use for ETL => INPUT, OUTPUT and FILTER
|
4
|
+
and support objects like CONFIG, LOG, RULES and the new RULES_NG
|
5
|
+
|
6
|
+
Including the DataCollector::Core module into your application gives you access to these objects.
|
7
|
+
|
3
8
|
|
4
9
|
#### input
|
5
|
-
Read input from an URI
|
10
|
+
Read input from an URI. This URI can have a http, https or file scheme
|
6
11
|
|
7
12
|
**Public methods**
|
8
13
|
```ruby
|
@@ -26,7 +31,7 @@ example:
|
|
26
31
|
Inputs can be JSON, XML or CSV or XML in a TAR.GZ file
|
27
32
|
|
28
33
|
### output
|
29
|
-
Output is an object you can store
|
34
|
+
Output is an object you can store key/value pairs that needs to be written to an output stream.
|
30
35
|
```ruby
|
31
36
|
output[:name] = 'John'
|
32
37
|
output[:last_name] = 'Doe'
|
@@ -88,15 +93,17 @@ filter data from a hash using [JSONPath](http://goessner.net/articles/JsonPath/i
|
|
88
93
|
```
|
89
94
|
|
90
95
|
#### rules
|
96
|
+
See newer rules_ng object
|
91
97
|
Allows you to define a simple lambda structure to run against a JSONPath filter
|
92
98
|
|
93
99
|
A rule is made up of a Hash the key is the map key field its value is a Hash with a JSONPath filter and options to apply a convert method on the filtered results.
|
94
|
-
Available convert methods are: time, map, each, call, suffix
|
100
|
+
Available convert methods are: time, map, each, call, suffix, text
|
95
101
|
- time: parses a given time/date string into a Time object
|
96
102
|
- map: applies a mapping to a filter
|
97
103
|
- suffix: adds a suffix to a result
|
98
104
|
- call: executes a lambda on the filter
|
99
105
|
- each: runs a lambda on each row of a filter
|
106
|
+
- text: passthrough method. Returns value unchanged
|
100
107
|
|
101
108
|
example:
|
102
109
|
```ruby
|
@@ -118,6 +125,19 @@ Available convert methods are: time, map, each, call, suffix
|
|
118
125
|
rules.run(my_rules, record, output)
|
119
126
|
```
|
120
127
|
|
128
|
+
#### rules_ng
|
129
|
+
!!! not compatible with RULES object
|
130
|
+
|
131
|
+
TODO: work in progress see test for examples on how to use
|
132
|
+
|
133
|
+
```
|
134
|
+
RULE_SET
|
135
|
+
RULES*
|
136
|
+
FILTERS*
|
137
|
+
LAMBDA*
|
138
|
+
SUFFIX
|
139
|
+
```
|
140
|
+
|
121
141
|
#### config
|
122
142
|
config is an object that points to "config.yml" you can read and/or store data to this object.
|
123
143
|
|
data/data_collector.gemspec
CHANGED
@@ -47,6 +47,6 @@ Gem::Specification.new do |spec|
|
|
47
47
|
spec.add_runtime_dependency "json-ld", "~> 3.1"
|
48
48
|
|
49
49
|
spec.add_development_dependency "bundler", "~> 2.0"
|
50
|
-
spec.add_development_dependency "rake", "
|
50
|
+
spec.add_development_dependency "rake", ">= 12.3"
|
51
51
|
spec.add_development_dependency "minitest", "~> 5.0"
|
52
52
|
end
|
data/lib/data_collector/core.rb
CHANGED
@@ -5,6 +5,7 @@ require 'logger'
|
|
5
5
|
require_relative 'input'
|
6
6
|
require_relative 'output'
|
7
7
|
require_relative 'rules'
|
8
|
+
require_relative 'rules_ng'
|
8
9
|
require_relative 'config_file'
|
9
10
|
|
10
11
|
module DataCollector
|
@@ -79,6 +80,11 @@ module DataCollector
|
|
79
80
|
@rules ||= Rules.new
|
80
81
|
end
|
81
82
|
|
83
|
+
# New rules runner
|
84
|
+
def rules_ng
|
85
|
+
@rules_ng ||= RulesNg.new
|
86
|
+
end
|
87
|
+
|
82
88
|
# evaluator http://jsonpath.com/
|
83
89
|
# uitleg http://goessner.net/articles/JsonPath/index.html
|
84
90
|
def filter(data, filter_path)
|
data/lib/data_collector/rules.rb
CHANGED
@@ -6,31 +6,32 @@ module DataCollector
|
|
6
6
|
@logger = Logger.new(STDOUT)
|
7
7
|
end
|
8
8
|
|
9
|
-
def run(rule_map, from_record, to_record)
|
9
|
+
def run(rule_map, from_record, to_record, options = {})
|
10
10
|
rule_map.each do |map_to_key, rule|
|
11
11
|
if rule.is_a?(Array)
|
12
12
|
rule.each do |sub_rule|
|
13
|
-
apply_rule(map_to_key, sub_rule, from_record, to_record)
|
13
|
+
apply_rule(map_to_key, sub_rule, from_record, to_record, options)
|
14
14
|
end
|
15
15
|
else
|
16
|
-
apply_rule(map_to_key, rule, from_record, to_record)
|
16
|
+
apply_rule(map_to_key, rule, from_record, to_record, options)
|
17
17
|
end
|
18
18
|
end
|
19
19
|
|
20
20
|
to_record.each do |element|
|
21
21
|
element = element.delete_if do |k, v|
|
22
|
-
v != false && (v.nil?
|
22
|
+
v != false && (v.nil?)
|
23
23
|
end
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
27
27
|
private
|
28
28
|
|
29
|
-
def apply_rule(map_to_key, rule, from_record, to_record)
|
29
|
+
def apply_rule(map_to_key, rule, from_record, to_record, options = {})
|
30
30
|
if rule.has_key?('text')
|
31
|
-
|
31
|
+
suffix = (rule && rule.key?('options') && rule['options'].key?('suffix')) ? rule['options']['suffix'] : ''
|
32
|
+
to_record << { map_to_key.to_sym => add_suffix(rule['text'], suffix) }
|
32
33
|
elsif rule.has_key?('options') && rule['options'].has_key?('convert') && rule['options']['convert'].eql?('each')
|
33
|
-
result = get_value_for(map_to_key, rule['filter'], from_record, rule['options'])
|
34
|
+
result = get_value_for(map_to_key, rule['filter'], from_record, rule['options'], options)
|
34
35
|
|
35
36
|
if result.is_a?(Array)
|
36
37
|
result.each do |m|
|
@@ -40,14 +41,14 @@ module DataCollector
|
|
40
41
|
to_record << {map_to_key.to_sym => result}
|
41
42
|
end
|
42
43
|
else
|
43
|
-
result = get_value_for(map_to_key, rule['filter'], from_record, rule['options'])
|
44
|
+
result = get_value_for(map_to_key, rule['filter'], from_record, rule['options'], options)
|
44
45
|
return if result && result.empty?
|
45
46
|
|
46
47
|
to_record << {map_to_key.to_sym => result}
|
47
48
|
end
|
48
49
|
end
|
49
50
|
|
50
|
-
def get_value_for(tag_key, filter_path, record, options = {})
|
51
|
+
def get_value_for(tag_key, filter_path, record, rule_options = {}, options = {})
|
51
52
|
data = nil
|
52
53
|
if record
|
53
54
|
if filter_path.is_a?(Array) && !record.is_a?(Array)
|
@@ -56,34 +57,47 @@ module DataCollector
|
|
56
57
|
|
57
58
|
data = Core::filter(record, filter_path)
|
58
59
|
|
59
|
-
if data &&
|
60
|
-
if
|
61
|
-
case
|
60
|
+
if data && rule_options
|
61
|
+
if rule_options.key?('convert')
|
62
|
+
case rule_options['convert']
|
62
63
|
when 'time'
|
63
|
-
|
64
|
+
result = []
|
65
|
+
data = [data] unless data.is_a?(Array)
|
66
|
+
data.each do |d|
|
67
|
+
result << Time.parse(d)
|
68
|
+
end
|
69
|
+
data = result
|
64
70
|
when 'map'
|
65
71
|
if data.is_a?(Array)
|
66
72
|
data = data.map do |r|
|
67
|
-
|
73
|
+
rule_options['map'][r] if rule_options['map'].key?(r)
|
68
74
|
end
|
69
75
|
|
70
76
|
data.compact!
|
71
|
-
data.flatten! if
|
77
|
+
data.flatten! if rule_options.key?('flatten') && rule_options['flatten']
|
72
78
|
else
|
73
|
-
return
|
79
|
+
return rule_options['map'][data] if rule_options['map'].key?(data)
|
74
80
|
end
|
75
81
|
when 'each'
|
76
82
|
data = [data] unless data.is_a?(Array)
|
77
|
-
|
78
|
-
|
83
|
+
if options.empty?
|
84
|
+
data = data.map { |d| rule_options['lambda'].call(d) }
|
85
|
+
else
|
86
|
+
data = data.map { |d| rule_options['lambda'].call(d, options) }
|
87
|
+
end
|
88
|
+
data.flatten! if rule_options.key?('flatten') && rule_options['flatten']
|
79
89
|
when 'call'
|
80
|
-
|
90
|
+
if options.empty?
|
91
|
+
data = rule_options['lambda'].call(data)
|
92
|
+
else
|
93
|
+
data = rule_options['lambda'].call(data, options)
|
94
|
+
end
|
95
|
+
return data
|
81
96
|
end
|
82
97
|
end
|
83
98
|
|
84
|
-
if
|
85
|
-
data = data
|
86
|
-
data += options['suffix']
|
99
|
+
if rule_options.key?('suffix')
|
100
|
+
data = add_suffix(data, rule_options['suffix'])
|
87
101
|
end
|
88
102
|
|
89
103
|
end
|
@@ -93,5 +107,24 @@ module DataCollector
|
|
93
107
|
return data
|
94
108
|
end
|
95
109
|
|
110
|
+
def add_suffix(data, suffix)
|
111
|
+
case data.class.name
|
112
|
+
when 'Array'
|
113
|
+
result = []
|
114
|
+
data.each do |d|
|
115
|
+
result << add_suffix(d, suffix)
|
116
|
+
end
|
117
|
+
data = result
|
118
|
+
when 'Hash'
|
119
|
+
data.each do |k, v|
|
120
|
+
data[k] = add_suffix(v, suffix)
|
121
|
+
end
|
122
|
+
else
|
123
|
+
data = data.to_s
|
124
|
+
data += suffix
|
125
|
+
end
|
126
|
+
data
|
127
|
+
end
|
128
|
+
|
96
129
|
end
|
97
130
|
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
require 'logger'
|
2
|
+
|
3
|
+
module DataCollector
|
4
|
+
class RulesNg
|
5
|
+
def initialize(logger = Logger.new(STDOUT))
|
6
|
+
@logger = logger
|
7
|
+
end
|
8
|
+
|
9
|
+
def run(rules, input_data, output_data, options = {})
|
10
|
+
rules.each do |tag, rule|
|
11
|
+
apply_rule(tag, rule, input_data, output_data, options)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
def apply_rule(tag, rule, input_data, output_data, options = {})
|
17
|
+
if rule.is_a?(Array)
|
18
|
+
rule.each do |sub_rule|
|
19
|
+
apply_rule(tag, sub_rule, input_data, output_data, options)
|
20
|
+
end
|
21
|
+
return output_data
|
22
|
+
end
|
23
|
+
|
24
|
+
rule_filter = rule.keys.first
|
25
|
+
rule_payload = rule.values.first
|
26
|
+
case rule_filter
|
27
|
+
when 'text'
|
28
|
+
if rule_payload.is_a?(String)
|
29
|
+
data = rule_payload
|
30
|
+
else
|
31
|
+
data = rule_payload.select{|s| s.is_a?(String)}
|
32
|
+
rule_payload = rule_payload.delete_if{|s| s.is_a?(String)}
|
33
|
+
if rule_payload.size == 1
|
34
|
+
rule_payload = rule_payload.first
|
35
|
+
end
|
36
|
+
end
|
37
|
+
when /json_path\:/
|
38
|
+
data = json_path_filter(rule_filter.gsub(/^json_path\:/), input_data)
|
39
|
+
else
|
40
|
+
data = json_path_filter(rule_filter, input_data)
|
41
|
+
end
|
42
|
+
|
43
|
+
data = apply_filtered_data_on_payload(data, rule_payload, options)
|
44
|
+
|
45
|
+
output_data << {tag.to_sym => data}
|
46
|
+
end
|
47
|
+
|
48
|
+
def apply_filtered_data_on_payload(input_data, payload, options = {})
|
49
|
+
output_data = nil
|
50
|
+
case payload.class.name
|
51
|
+
when 'Proc'
|
52
|
+
if options && options.empty?
|
53
|
+
output_data = payload.call(input_data)
|
54
|
+
else
|
55
|
+
output_data = payload.call(input_data, options)
|
56
|
+
end
|
57
|
+
when 'Hash'
|
58
|
+
input_data = [input_data] unless input_data.is_a?(Array)
|
59
|
+
output_data = input_data.map do |m|
|
60
|
+
if payload.key?('suffix')
|
61
|
+
"#{m}#{payload['suffix']}"
|
62
|
+
else
|
63
|
+
payload[m]
|
64
|
+
end
|
65
|
+
end if input_data.is_a?(Array)
|
66
|
+
when 'Array'
|
67
|
+
output_data = input_data
|
68
|
+
payload.each do |p|
|
69
|
+
output_data = apply_filtered_data_on_payload(output_data, p, options)
|
70
|
+
end
|
71
|
+
else
|
72
|
+
output_data = input_data
|
73
|
+
end
|
74
|
+
|
75
|
+
output_data.compact! if output_data.is_a?(Array)
|
76
|
+
output_data
|
77
|
+
end
|
78
|
+
|
79
|
+
def json_path_filter(filter, input_data)
|
80
|
+
data = nil
|
81
|
+
return data if input_data.nil? || input_data.empty?
|
82
|
+
return input_data if input_data.is_a?(String)
|
83
|
+
Core::filter(input_data, filter)
|
84
|
+
end
|
85
|
+
|
86
|
+
|
87
|
+
end
|
88
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_collector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mehmet Celik
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-03-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -154,16 +154,16 @@ dependencies:
|
|
154
154
|
name: rake
|
155
155
|
requirement: !ruby/object:Gem::Requirement
|
156
156
|
requirements:
|
157
|
-
- - "
|
157
|
+
- - ">="
|
158
158
|
- !ruby/object:Gem::Version
|
159
|
-
version: '
|
159
|
+
version: '12.3'
|
160
160
|
type: :development
|
161
161
|
prerelease: false
|
162
162
|
version_requirements: !ruby/object:Gem::Requirement
|
163
163
|
requirements:
|
164
|
-
- - "
|
164
|
+
- - ">="
|
165
165
|
- !ruby/object:Gem::Version
|
166
|
-
version: '
|
166
|
+
version: '12.3'
|
167
167
|
- !ruby/object:Gem::Dependency
|
168
168
|
name: minitest
|
169
169
|
requirement: !ruby/object:Gem::Requirement
|
@@ -201,6 +201,7 @@ files:
|
|
201
201
|
- lib/data_collector/input.rb
|
202
202
|
- lib/data_collector/output.rb
|
203
203
|
- lib/data_collector/rules.rb
|
204
|
+
- lib/data_collector/rules_ng.rb
|
204
205
|
- lib/data_collector/runner.rb
|
205
206
|
- lib/data_collector/version.rb
|
206
207
|
homepage: https://github.com/mehmetc/data_collector
|
@@ -225,7 +226,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
225
226
|
- !ruby/object:Gem::Version
|
226
227
|
version: '0'
|
227
228
|
requirements: []
|
228
|
-
rubygems_version: 3.0.
|
229
|
+
rubygems_version: 3.0.2
|
229
230
|
signing_key:
|
230
231
|
specification_version: 4
|
231
232
|
summary: ETL helper library
|