data_collector 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 54fed64f173cd683fa66d514e65c74d42bb992965fdb85ca15df823e97e4ded1
4
- data.tar.gz: 18170f3fd1660fef26448961ab5e5af22c42af405750f54873e6cb650718f3b2
3
+ metadata.gz: 5fb3cc5c42f536a8bdc99421eb6f46972c6c97597f5c0af63e96463f320837ef
4
+ data.tar.gz: d377133bf042985f6d68b2bb2960d9c4ddae169a39d4602df4a3f4003d6b6434
5
5
  SHA512:
6
- metadata.gz: ddb5f0e7b5619cbf29ba7a396381ac0bdba315ac94aef9b6d210f4184f47e028f47e4d96608268c83dcbb8c3d291df3bc23493c76ea11a57f33802d1c32e4903
7
- data.tar.gz: 805a7b7775abab148ed13be3e70bd7dd16e8de4f9335b7b109e38b99e4298008ea05e093df835bf519e886de9b134ed43a50a6dde462d5c2f2623bfa9a591213
6
+ metadata.gz: cd48af243e1615e8cafaf8ed1ab4d9019661e3205286fbd32c9ec4bb3e2faa7e07025a62b378062f10dfc0f1775acb401ad4cb901851cbbaf4cb78a2e7c6474a
7
+ data.tar.gz: c92eee26040f89ab0b691ba6bf4ad9792222e545e82bb9ebea8b4a4941831615a74d9f389e58d6feccf89ba42c6dd5a02c55bfed996fc670cd1d9600d5023476
data/README.md CHANGED
@@ -1,8 +1,13 @@
1
1
  # DataCollector
2
2
  Convenience module to Extract, Transform and Load your data.
3
+ You have 3 main objects that you can use for ETL => INPUT, OUTPUT and FILTER
4
+ and support objects like CONFIG, LOG, RULES and the new RULES_NG
5
+
6
+ Including the DataCollector::Core module into your application gives you access to these objects.
7
+
3
8
 
4
9
  #### input
5
- Read input from an URI
10
+ Read input from an URI. This URI can have a http, https or file scheme
6
11
 
7
12
  **Public methods**
8
13
  ```ruby
@@ -26,7 +31,7 @@ example:
26
31
  Inputs can be JSON, XML or CSV or XML in a TAR.GZ file
27
32
 
28
33
  ### output
29
- Output is an object you can store data that needs to be written to an output stream.
34
+ Output is an object you can store key/value pairs that needs to be written to an output stream.
30
35
  ```ruby
31
36
  output[:name] = 'John'
32
37
  output[:last_name] = 'Doe'
@@ -88,15 +93,17 @@ filter data from a hash using [JSONPath](http://goessner.net/articles/JsonPath/i
88
93
  ```
89
94
 
90
95
  #### rules
96
+ See newer rules_ng object
91
97
  Allows you to define a simple lambda structure to run against a JSONPath filter
92
98
 
93
99
  A rule is made up of a Hash the key is the map key field its value is a Hash with a JSONPath filter and options to apply a convert method on the filtered results.
94
- Available convert methods are: time, map, each, call, suffix
100
+ Available convert methods are: time, map, each, call, suffix, text
95
101
  - time: parses a given time/date string into a Time object
96
102
  - map: applies a mapping to a filter
97
103
  - suffix: adds a suffix to a result
98
104
  - call: executes a lambda on the filter
99
105
  - each: runs a lambda on each row of a filter
106
+ - text: passthrough method. Returns value unchanged
100
107
 
101
108
  example:
102
109
  ```ruby
@@ -118,6 +125,19 @@ Available convert methods are: time, map, each, call, suffix
118
125
  rules.run(my_rules, record, output)
119
126
  ```
120
127
 
128
+ #### rules_ng
129
+ !!! not compatible with RULES object
130
+
131
+ TODO: work in progress see test for examples on how to use
132
+
133
+ ```
134
+ RULE_SET
135
+ RULES*
136
+ FILTERS*
137
+ LAMBDA*
138
+ SUFFIX
139
+ ```
140
+
121
141
  #### config
122
142
  config is an object that points to "config.yml" you can read and/or store data to this object.
123
143
 
@@ -47,6 +47,6 @@ Gem::Specification.new do |spec|
47
47
  spec.add_runtime_dependency "json-ld", "~> 3.1"
48
48
 
49
49
  spec.add_development_dependency "bundler", "~> 2.0"
50
- spec.add_development_dependency "rake", "~> 10.0"
50
+ spec.add_development_dependency "rake", ">= 12.3"
51
51
  spec.add_development_dependency "minitest", "~> 5.0"
52
52
  end
@@ -5,6 +5,7 @@ require 'logger'
5
5
  require_relative 'input'
6
6
  require_relative 'output'
7
7
  require_relative 'rules'
8
+ require_relative 'rules_ng'
8
9
  require_relative 'config_file'
9
10
 
10
11
  module DataCollector
@@ -79,6 +80,11 @@ module DataCollector
79
80
  @rules ||= Rules.new
80
81
  end
81
82
 
83
+ # New rules runner
84
+ def rules_ng
85
+ @rules_ng ||= RulesNg.new
86
+ end
87
+
82
88
  # evaluator http://jsonpath.com/
83
89
  # uitleg http://goessner.net/articles/JsonPath/index.html
84
90
  def filter(data, filter_path)
@@ -6,31 +6,32 @@ module DataCollector
6
6
  @logger = Logger.new(STDOUT)
7
7
  end
8
8
 
9
- def run(rule_map, from_record, to_record)
9
+ def run(rule_map, from_record, to_record, options = {})
10
10
  rule_map.each do |map_to_key, rule|
11
11
  if rule.is_a?(Array)
12
12
  rule.each do |sub_rule|
13
- apply_rule(map_to_key, sub_rule, from_record, to_record)
13
+ apply_rule(map_to_key, sub_rule, from_record, to_record, options)
14
14
  end
15
15
  else
16
- apply_rule(map_to_key, rule, from_record, to_record)
16
+ apply_rule(map_to_key, rule, from_record, to_record, options)
17
17
  end
18
18
  end
19
19
 
20
20
  to_record.each do |element|
21
21
  element = element.delete_if do |k, v|
22
- v != false && (v.nil? || v.empty?)
22
+ v != false && (v.nil?)
23
23
  end
24
24
  end
25
25
  end
26
26
 
27
27
  private
28
28
 
29
- def apply_rule(map_to_key, rule, from_record, to_record)
29
+ def apply_rule(map_to_key, rule, from_record, to_record, options = {})
30
30
  if rule.has_key?('text')
31
- to_record << { map_to_key.to_sym => rule['text'] }
31
+ suffix = (rule && rule.key?('options') && rule['options'].key?('suffix')) ? rule['options']['suffix'] : ''
32
+ to_record << { map_to_key.to_sym => add_suffix(rule['text'], suffix) }
32
33
  elsif rule.has_key?('options') && rule['options'].has_key?('convert') && rule['options']['convert'].eql?('each')
33
- result = get_value_for(map_to_key, rule['filter'], from_record, rule['options'])
34
+ result = get_value_for(map_to_key, rule['filter'], from_record, rule['options'], options)
34
35
 
35
36
  if result.is_a?(Array)
36
37
  result.each do |m|
@@ -40,14 +41,14 @@ module DataCollector
40
41
  to_record << {map_to_key.to_sym => result}
41
42
  end
42
43
  else
43
- result = get_value_for(map_to_key, rule['filter'], from_record, rule['options'])
44
+ result = get_value_for(map_to_key, rule['filter'], from_record, rule['options'], options)
44
45
  return if result && result.empty?
45
46
 
46
47
  to_record << {map_to_key.to_sym => result}
47
48
  end
48
49
  end
49
50
 
50
- def get_value_for(tag_key, filter_path, record, options = {})
51
+ def get_value_for(tag_key, filter_path, record, rule_options = {}, options = {})
51
52
  data = nil
52
53
  if record
53
54
  if filter_path.is_a?(Array) && !record.is_a?(Array)
@@ -56,34 +57,47 @@ module DataCollector
56
57
 
57
58
  data = Core::filter(record, filter_path)
58
59
 
59
- if data && options
60
- if options.key?('convert')
61
- case options['convert']
60
+ if data && rule_options
61
+ if rule_options.key?('convert')
62
+ case rule_options['convert']
62
63
  when 'time'
63
- data = Time.parse(data).strftime('%Y-%m-%d')
64
+ result = []
65
+ data = [data] unless data.is_a?(Array)
66
+ data.each do |d|
67
+ result << Time.parse(d)
68
+ end
69
+ data = result
64
70
  when 'map'
65
71
  if data.is_a?(Array)
66
72
  data = data.map do |r|
67
- return options['map'][r] if options['map'].key?(r)
73
+ rule_options['map'][r] if rule_options['map'].key?(r)
68
74
  end
69
75
 
70
76
  data.compact!
71
- data.flatten! if options.key?('flatten') && options['flatten']
77
+ data.flatten! if rule_options.key?('flatten') && rule_options['flatten']
72
78
  else
73
- return options['map'][data] if options['map'].key?(data)
79
+ return rule_options['map'][data] if rule_options['map'].key?(data)
74
80
  end
75
81
  when 'each'
76
82
  data = [data] unless data.is_a?(Array)
77
- data = data.map { |d| options['lambda'].call(d) }
78
- data.flatten! if options.key?('flatten') && options['flatten']
83
+ if options.empty?
84
+ data = data.map { |d| rule_options['lambda'].call(d) }
85
+ else
86
+ data = data.map { |d| rule_options['lambda'].call(d, options) }
87
+ end
88
+ data.flatten! if rule_options.key?('flatten') && rule_options['flatten']
79
89
  when 'call'
80
- return options['lambda'].call(data)
90
+ if options.empty?
91
+ data = rule_options['lambda'].call(data)
92
+ else
93
+ data = rule_options['lambda'].call(data, options)
94
+ end
95
+ return data
81
96
  end
82
97
  end
83
98
 
84
- if options.key?('suffix')
85
- data = data.to_s
86
- data += options['suffix']
99
+ if rule_options.key?('suffix')
100
+ data = add_suffix(data, rule_options['suffix'])
87
101
  end
88
102
 
89
103
  end
@@ -93,5 +107,24 @@ module DataCollector
93
107
  return data
94
108
  end
95
109
 
110
+ def add_suffix(data, suffix)
111
+ case data.class.name
112
+ when 'Array'
113
+ result = []
114
+ data.each do |d|
115
+ result << add_suffix(d, suffix)
116
+ end
117
+ data = result
118
+ when 'Hash'
119
+ data.each do |k, v|
120
+ data[k] = add_suffix(v, suffix)
121
+ end
122
+ else
123
+ data = data.to_s
124
+ data += suffix
125
+ end
126
+ data
127
+ end
128
+
96
129
  end
97
130
  end
@@ -0,0 +1,88 @@
1
+ require 'logger'
2
+
3
+ module DataCollector
4
+ class RulesNg
5
+ def initialize(logger = Logger.new(STDOUT))
6
+ @logger = logger
7
+ end
8
+
9
+ def run(rules, input_data, output_data, options = {})
10
+ rules.each do |tag, rule|
11
+ apply_rule(tag, rule, input_data, output_data, options)
12
+ end
13
+ end
14
+
15
+ private
16
+ def apply_rule(tag, rule, input_data, output_data, options = {})
17
+ if rule.is_a?(Array)
18
+ rule.each do |sub_rule|
19
+ apply_rule(tag, sub_rule, input_data, output_data, options)
20
+ end
21
+ return output_data
22
+ end
23
+
24
+ rule_filter = rule.keys.first
25
+ rule_payload = rule.values.first
26
+ case rule_filter
27
+ when 'text'
28
+ if rule_payload.is_a?(String)
29
+ data = rule_payload
30
+ else
31
+ data = rule_payload.select{|s| s.is_a?(String)}
32
+ rule_payload = rule_payload.delete_if{|s| s.is_a?(String)}
33
+ if rule_payload.size == 1
34
+ rule_payload = rule_payload.first
35
+ end
36
+ end
37
+ when /json_path\:/
38
+ data = json_path_filter(rule_filter.gsub(/^json_path\:/), input_data)
39
+ else
40
+ data = json_path_filter(rule_filter, input_data)
41
+ end
42
+
43
+ data = apply_filtered_data_on_payload(data, rule_payload, options)
44
+
45
+ output_data << {tag.to_sym => data}
46
+ end
47
+
48
+ def apply_filtered_data_on_payload(input_data, payload, options = {})
49
+ output_data = nil
50
+ case payload.class.name
51
+ when 'Proc'
52
+ if options && options.empty?
53
+ output_data = payload.call(input_data)
54
+ else
55
+ output_data = payload.call(input_data, options)
56
+ end
57
+ when 'Hash'
58
+ input_data = [input_data] unless input_data.is_a?(Array)
59
+ output_data = input_data.map do |m|
60
+ if payload.key?('suffix')
61
+ "#{m}#{payload['suffix']}"
62
+ else
63
+ payload[m]
64
+ end
65
+ end if input_data.is_a?(Array)
66
+ when 'Array'
67
+ output_data = input_data
68
+ payload.each do |p|
69
+ output_data = apply_filtered_data_on_payload(output_data, p, options)
70
+ end
71
+ else
72
+ output_data = input_data
73
+ end
74
+
75
+ output_data.compact! if output_data.is_a?(Array)
76
+ output_data
77
+ end
78
+
79
+ def json_path_filter(filter, input_data)
80
+ data = nil
81
+ return data if input_data.nil? || input_data.empty?
82
+ return input_data if input_data.is_a?(String)
83
+ Core::filter(input_data, filter)
84
+ end
85
+
86
+
87
+ end
88
+ end
@@ -1,4 +1,4 @@
1
1
  # encoding: utf-8
2
2
  module DataCollector
3
- VERSION = "0.3.0"
3
+ VERSION = "0.4.0"
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_collector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mehmet Celik
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-02-11 00:00:00.000000000 Z
11
+ date: 2020-03-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -154,16 +154,16 @@ dependencies:
154
154
  name: rake
155
155
  requirement: !ruby/object:Gem::Requirement
156
156
  requirements:
157
- - - "~>"
157
+ - - ">="
158
158
  - !ruby/object:Gem::Version
159
- version: '10.0'
159
+ version: '12.3'
160
160
  type: :development
161
161
  prerelease: false
162
162
  version_requirements: !ruby/object:Gem::Requirement
163
163
  requirements:
164
- - - "~>"
164
+ - - ">="
165
165
  - !ruby/object:Gem::Version
166
- version: '10.0'
166
+ version: '12.3'
167
167
  - !ruby/object:Gem::Dependency
168
168
  name: minitest
169
169
  requirement: !ruby/object:Gem::Requirement
@@ -201,6 +201,7 @@ files:
201
201
  - lib/data_collector/input.rb
202
202
  - lib/data_collector/output.rb
203
203
  - lib/data_collector/rules.rb
204
+ - lib/data_collector/rules_ng.rb
204
205
  - lib/data_collector/runner.rb
205
206
  - lib/data_collector/version.rb
206
207
  homepage: https://github.com/mehmetc/data_collector
@@ -225,7 +226,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
225
226
  - !ruby/object:Gem::Version
226
227
  version: '0'
227
228
  requirements: []
228
- rubygems_version: 3.0.6
229
+ rubygems_version: 3.0.2
229
230
  signing_key:
230
231
  specification_version: 4
231
232
  summary: ETL helper library