data_collector 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +23 -3
- data/data_collector.gemspec +1 -1
- data/lib/data_collector/core.rb +6 -0
- data/lib/data_collector/rules.rb +55 -22
- data/lib/data_collector/rules_ng.rb +88 -0
- data/lib/data_collector/version.rb +1 -1
- metadata +8 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5fb3cc5c42f536a8bdc99421eb6f46972c6c97597f5c0af63e96463f320837ef
|
4
|
+
data.tar.gz: d377133bf042985f6d68b2bb2960d9c4ddae169a39d4602df4a3f4003d6b6434
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cd48af243e1615e8cafaf8ed1ab4d9019661e3205286fbd32c9ec4bb3e2faa7e07025a62b378062f10dfc0f1775acb401ad4cb901851cbbaf4cb78a2e7c6474a
|
7
|
+
data.tar.gz: c92eee26040f89ab0b691ba6bf4ad9792222e545e82bb9ebea8b4a4941831615a74d9f389e58d6feccf89ba42c6dd5a02c55bfed996fc670cd1d9600d5023476
|
data/README.md
CHANGED
@@ -1,8 +1,13 @@
|
|
1
1
|
# DataCollector
|
2
2
|
Convenience module to Extract, Transform and Load your data.
|
3
|
+
You have 3 main objects that you can use for ETL => INPUT, OUTPUT and FILTER
|
4
|
+
and support objects like CONFIG, LOG, RULES and the new RULES_NG
|
5
|
+
|
6
|
+
Including the DataCollector::Core module into your application gives you access to these objects.
|
7
|
+
|
3
8
|
|
4
9
|
#### input
|
5
|
-
Read input from an URI
|
10
|
+
Read input from an URI. This URI can have a http, https or file scheme
|
6
11
|
|
7
12
|
**Public methods**
|
8
13
|
```ruby
|
@@ -26,7 +31,7 @@ example:
|
|
26
31
|
Inputs can be JSON, XML or CSV or XML in a TAR.GZ file
|
27
32
|
|
28
33
|
### output
|
29
|
-
Output is an object you can store
|
34
|
+
Output is an object you can store key/value pairs that needs to be written to an output stream.
|
30
35
|
```ruby
|
31
36
|
output[:name] = 'John'
|
32
37
|
output[:last_name] = 'Doe'
|
@@ -88,15 +93,17 @@ filter data from a hash using [JSONPath](http://goessner.net/articles/JsonPath/i
|
|
88
93
|
```
|
89
94
|
|
90
95
|
#### rules
|
96
|
+
See newer rules_ng object
|
91
97
|
Allows you to define a simple lambda structure to run against a JSONPath filter
|
92
98
|
|
93
99
|
A rule is made up of a Hash the key is the map key field its value is a Hash with a JSONPath filter and options to apply a convert method on the filtered results.
|
94
|
-
Available convert methods are: time, map, each, call, suffix
|
100
|
+
Available convert methods are: time, map, each, call, suffix, text
|
95
101
|
- time: parses a given time/date string into a Time object
|
96
102
|
- map: applies a mapping to a filter
|
97
103
|
- suffix: adds a suffix to a result
|
98
104
|
- call: executes a lambda on the filter
|
99
105
|
- each: runs a lambda on each row of a filter
|
106
|
+
- text: passthrough method. Returns value unchanged
|
100
107
|
|
101
108
|
example:
|
102
109
|
```ruby
|
@@ -118,6 +125,19 @@ Available convert methods are: time, map, each, call, suffix
|
|
118
125
|
rules.run(my_rules, record, output)
|
119
126
|
```
|
120
127
|
|
128
|
+
#### rules_ng
|
129
|
+
!!! not compatible with RULES object
|
130
|
+
|
131
|
+
TODO: work in progress see test for examples on how to use
|
132
|
+
|
133
|
+
```
|
134
|
+
RULE_SET
|
135
|
+
RULES*
|
136
|
+
FILTERS*
|
137
|
+
LAMBDA*
|
138
|
+
SUFFIX
|
139
|
+
```
|
140
|
+
|
121
141
|
#### config
|
122
142
|
config is an object that points to "config.yml" you can read and/or store data to this object.
|
123
143
|
|
data/data_collector.gemspec
CHANGED
@@ -47,6 +47,6 @@ Gem::Specification.new do |spec|
|
|
47
47
|
spec.add_runtime_dependency "json-ld", "~> 3.1"
|
48
48
|
|
49
49
|
spec.add_development_dependency "bundler", "~> 2.0"
|
50
|
-
spec.add_development_dependency "rake", "
|
50
|
+
spec.add_development_dependency "rake", ">= 12.3"
|
51
51
|
spec.add_development_dependency "minitest", "~> 5.0"
|
52
52
|
end
|
data/lib/data_collector/core.rb
CHANGED
@@ -5,6 +5,7 @@ require 'logger'
|
|
5
5
|
require_relative 'input'
|
6
6
|
require_relative 'output'
|
7
7
|
require_relative 'rules'
|
8
|
+
require_relative 'rules_ng'
|
8
9
|
require_relative 'config_file'
|
9
10
|
|
10
11
|
module DataCollector
|
@@ -79,6 +80,11 @@ module DataCollector
|
|
79
80
|
@rules ||= Rules.new
|
80
81
|
end
|
81
82
|
|
83
|
+
# New rules runner
|
84
|
+
def rules_ng
|
85
|
+
@rules_ng ||= RulesNg.new
|
86
|
+
end
|
87
|
+
|
82
88
|
# evaluator http://jsonpath.com/
|
83
89
|
# uitleg http://goessner.net/articles/JsonPath/index.html
|
84
90
|
def filter(data, filter_path)
|
data/lib/data_collector/rules.rb
CHANGED
@@ -6,31 +6,32 @@ module DataCollector
|
|
6
6
|
@logger = Logger.new(STDOUT)
|
7
7
|
end
|
8
8
|
|
9
|
-
def run(rule_map, from_record, to_record)
|
9
|
+
def run(rule_map, from_record, to_record, options = {})
|
10
10
|
rule_map.each do |map_to_key, rule|
|
11
11
|
if rule.is_a?(Array)
|
12
12
|
rule.each do |sub_rule|
|
13
|
-
apply_rule(map_to_key, sub_rule, from_record, to_record)
|
13
|
+
apply_rule(map_to_key, sub_rule, from_record, to_record, options)
|
14
14
|
end
|
15
15
|
else
|
16
|
-
apply_rule(map_to_key, rule, from_record, to_record)
|
16
|
+
apply_rule(map_to_key, rule, from_record, to_record, options)
|
17
17
|
end
|
18
18
|
end
|
19
19
|
|
20
20
|
to_record.each do |element|
|
21
21
|
element = element.delete_if do |k, v|
|
22
|
-
v != false && (v.nil?
|
22
|
+
v != false && (v.nil?)
|
23
23
|
end
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
27
27
|
private
|
28
28
|
|
29
|
-
def apply_rule(map_to_key, rule, from_record, to_record)
|
29
|
+
def apply_rule(map_to_key, rule, from_record, to_record, options = {})
|
30
30
|
if rule.has_key?('text')
|
31
|
-
|
31
|
+
suffix = (rule && rule.key?('options') && rule['options'].key?('suffix')) ? rule['options']['suffix'] : ''
|
32
|
+
to_record << { map_to_key.to_sym => add_suffix(rule['text'], suffix) }
|
32
33
|
elsif rule.has_key?('options') && rule['options'].has_key?('convert') && rule['options']['convert'].eql?('each')
|
33
|
-
result = get_value_for(map_to_key, rule['filter'], from_record, rule['options'])
|
34
|
+
result = get_value_for(map_to_key, rule['filter'], from_record, rule['options'], options)
|
34
35
|
|
35
36
|
if result.is_a?(Array)
|
36
37
|
result.each do |m|
|
@@ -40,14 +41,14 @@ module DataCollector
|
|
40
41
|
to_record << {map_to_key.to_sym => result}
|
41
42
|
end
|
42
43
|
else
|
43
|
-
result = get_value_for(map_to_key, rule['filter'], from_record, rule['options'])
|
44
|
+
result = get_value_for(map_to_key, rule['filter'], from_record, rule['options'], options)
|
44
45
|
return if result && result.empty?
|
45
46
|
|
46
47
|
to_record << {map_to_key.to_sym => result}
|
47
48
|
end
|
48
49
|
end
|
49
50
|
|
50
|
-
def get_value_for(tag_key, filter_path, record, options = {})
|
51
|
+
def get_value_for(tag_key, filter_path, record, rule_options = {}, options = {})
|
51
52
|
data = nil
|
52
53
|
if record
|
53
54
|
if filter_path.is_a?(Array) && !record.is_a?(Array)
|
@@ -56,34 +57,47 @@ module DataCollector
|
|
56
57
|
|
57
58
|
data = Core::filter(record, filter_path)
|
58
59
|
|
59
|
-
if data &&
|
60
|
-
if
|
61
|
-
case
|
60
|
+
if data && rule_options
|
61
|
+
if rule_options.key?('convert')
|
62
|
+
case rule_options['convert']
|
62
63
|
when 'time'
|
63
|
-
|
64
|
+
result = []
|
65
|
+
data = [data] unless data.is_a?(Array)
|
66
|
+
data.each do |d|
|
67
|
+
result << Time.parse(d)
|
68
|
+
end
|
69
|
+
data = result
|
64
70
|
when 'map'
|
65
71
|
if data.is_a?(Array)
|
66
72
|
data = data.map do |r|
|
67
|
-
|
73
|
+
rule_options['map'][r] if rule_options['map'].key?(r)
|
68
74
|
end
|
69
75
|
|
70
76
|
data.compact!
|
71
|
-
data.flatten! if
|
77
|
+
data.flatten! if rule_options.key?('flatten') && rule_options['flatten']
|
72
78
|
else
|
73
|
-
return
|
79
|
+
return rule_options['map'][data] if rule_options['map'].key?(data)
|
74
80
|
end
|
75
81
|
when 'each'
|
76
82
|
data = [data] unless data.is_a?(Array)
|
77
|
-
|
78
|
-
|
83
|
+
if options.empty?
|
84
|
+
data = data.map { |d| rule_options['lambda'].call(d) }
|
85
|
+
else
|
86
|
+
data = data.map { |d| rule_options['lambda'].call(d, options) }
|
87
|
+
end
|
88
|
+
data.flatten! if rule_options.key?('flatten') && rule_options['flatten']
|
79
89
|
when 'call'
|
80
|
-
|
90
|
+
if options.empty?
|
91
|
+
data = rule_options['lambda'].call(data)
|
92
|
+
else
|
93
|
+
data = rule_options['lambda'].call(data, options)
|
94
|
+
end
|
95
|
+
return data
|
81
96
|
end
|
82
97
|
end
|
83
98
|
|
84
|
-
if
|
85
|
-
data = data
|
86
|
-
data += options['suffix']
|
99
|
+
if rule_options.key?('suffix')
|
100
|
+
data = add_suffix(data, rule_options['suffix'])
|
87
101
|
end
|
88
102
|
|
89
103
|
end
|
@@ -93,5 +107,24 @@ module DataCollector
|
|
93
107
|
return data
|
94
108
|
end
|
95
109
|
|
110
|
+
def add_suffix(data, suffix)
|
111
|
+
case data.class.name
|
112
|
+
when 'Array'
|
113
|
+
result = []
|
114
|
+
data.each do |d|
|
115
|
+
result << add_suffix(d, suffix)
|
116
|
+
end
|
117
|
+
data = result
|
118
|
+
when 'Hash'
|
119
|
+
data.each do |k, v|
|
120
|
+
data[k] = add_suffix(v, suffix)
|
121
|
+
end
|
122
|
+
else
|
123
|
+
data = data.to_s
|
124
|
+
data += suffix
|
125
|
+
end
|
126
|
+
data
|
127
|
+
end
|
128
|
+
|
96
129
|
end
|
97
130
|
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
require 'logger'
|
2
|
+
|
3
|
+
module DataCollector
|
4
|
+
class RulesNg
|
5
|
+
def initialize(logger = Logger.new(STDOUT))
|
6
|
+
@logger = logger
|
7
|
+
end
|
8
|
+
|
9
|
+
def run(rules, input_data, output_data, options = {})
|
10
|
+
rules.each do |tag, rule|
|
11
|
+
apply_rule(tag, rule, input_data, output_data, options)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
def apply_rule(tag, rule, input_data, output_data, options = {})
|
17
|
+
if rule.is_a?(Array)
|
18
|
+
rule.each do |sub_rule|
|
19
|
+
apply_rule(tag, sub_rule, input_data, output_data, options)
|
20
|
+
end
|
21
|
+
return output_data
|
22
|
+
end
|
23
|
+
|
24
|
+
rule_filter = rule.keys.first
|
25
|
+
rule_payload = rule.values.first
|
26
|
+
case rule_filter
|
27
|
+
when 'text'
|
28
|
+
if rule_payload.is_a?(String)
|
29
|
+
data = rule_payload
|
30
|
+
else
|
31
|
+
data = rule_payload.select{|s| s.is_a?(String)}
|
32
|
+
rule_payload = rule_payload.delete_if{|s| s.is_a?(String)}
|
33
|
+
if rule_payload.size == 1
|
34
|
+
rule_payload = rule_payload.first
|
35
|
+
end
|
36
|
+
end
|
37
|
+
when /json_path\:/
|
38
|
+
data = json_path_filter(rule_filter.gsub(/^json_path\:/), input_data)
|
39
|
+
else
|
40
|
+
data = json_path_filter(rule_filter, input_data)
|
41
|
+
end
|
42
|
+
|
43
|
+
data = apply_filtered_data_on_payload(data, rule_payload, options)
|
44
|
+
|
45
|
+
output_data << {tag.to_sym => data}
|
46
|
+
end
|
47
|
+
|
48
|
+
def apply_filtered_data_on_payload(input_data, payload, options = {})
|
49
|
+
output_data = nil
|
50
|
+
case payload.class.name
|
51
|
+
when 'Proc'
|
52
|
+
if options && options.empty?
|
53
|
+
output_data = payload.call(input_data)
|
54
|
+
else
|
55
|
+
output_data = payload.call(input_data, options)
|
56
|
+
end
|
57
|
+
when 'Hash'
|
58
|
+
input_data = [input_data] unless input_data.is_a?(Array)
|
59
|
+
output_data = input_data.map do |m|
|
60
|
+
if payload.key?('suffix')
|
61
|
+
"#{m}#{payload['suffix']}"
|
62
|
+
else
|
63
|
+
payload[m]
|
64
|
+
end
|
65
|
+
end if input_data.is_a?(Array)
|
66
|
+
when 'Array'
|
67
|
+
output_data = input_data
|
68
|
+
payload.each do |p|
|
69
|
+
output_data = apply_filtered_data_on_payload(output_data, p, options)
|
70
|
+
end
|
71
|
+
else
|
72
|
+
output_data = input_data
|
73
|
+
end
|
74
|
+
|
75
|
+
output_data.compact! if output_data.is_a?(Array)
|
76
|
+
output_data
|
77
|
+
end
|
78
|
+
|
79
|
+
def json_path_filter(filter, input_data)
|
80
|
+
data = nil
|
81
|
+
return data if input_data.nil? || input_data.empty?
|
82
|
+
return input_data if input_data.is_a?(String)
|
83
|
+
Core::filter(input_data, filter)
|
84
|
+
end
|
85
|
+
|
86
|
+
|
87
|
+
end
|
88
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_collector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mehmet Celik
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-03-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -154,16 +154,16 @@ dependencies:
|
|
154
154
|
name: rake
|
155
155
|
requirement: !ruby/object:Gem::Requirement
|
156
156
|
requirements:
|
157
|
-
- - "
|
157
|
+
- - ">="
|
158
158
|
- !ruby/object:Gem::Version
|
159
|
-
version: '
|
159
|
+
version: '12.3'
|
160
160
|
type: :development
|
161
161
|
prerelease: false
|
162
162
|
version_requirements: !ruby/object:Gem::Requirement
|
163
163
|
requirements:
|
164
|
-
- - "
|
164
|
+
- - ">="
|
165
165
|
- !ruby/object:Gem::Version
|
166
|
-
version: '
|
166
|
+
version: '12.3'
|
167
167
|
- !ruby/object:Gem::Dependency
|
168
168
|
name: minitest
|
169
169
|
requirement: !ruby/object:Gem::Requirement
|
@@ -201,6 +201,7 @@ files:
|
|
201
201
|
- lib/data_collector/input.rb
|
202
202
|
- lib/data_collector/output.rb
|
203
203
|
- lib/data_collector/rules.rb
|
204
|
+
- lib/data_collector/rules_ng.rb
|
204
205
|
- lib/data_collector/runner.rb
|
205
206
|
- lib/data_collector/version.rb
|
206
207
|
homepage: https://github.com/mehmetc/data_collector
|
@@ -225,7 +226,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
225
226
|
- !ruby/object:Gem::Version
|
226
227
|
version: '0'
|
227
228
|
requirements: []
|
228
|
-
rubygems_version: 3.0.
|
229
|
+
rubygems_version: 3.0.2
|
229
230
|
signing_key:
|
230
231
|
specification_version: 4
|
231
232
|
summary: ETL helper library
|