data_collector 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 54fed64f173cd683fa66d514e65c74d42bb992965fdb85ca15df823e97e4ded1
4
- data.tar.gz: 18170f3fd1660fef26448961ab5e5af22c42af405750f54873e6cb650718f3b2
3
+ metadata.gz: 5fb3cc5c42f536a8bdc99421eb6f46972c6c97597f5c0af63e96463f320837ef
4
+ data.tar.gz: d377133bf042985f6d68b2bb2960d9c4ddae169a39d4602df4a3f4003d6b6434
5
5
  SHA512:
6
- metadata.gz: ddb5f0e7b5619cbf29ba7a396381ac0bdba315ac94aef9b6d210f4184f47e028f47e4d96608268c83dcbb8c3d291df3bc23493c76ea11a57f33802d1c32e4903
7
- data.tar.gz: 805a7b7775abab148ed13be3e70bd7dd16e8de4f9335b7b109e38b99e4298008ea05e093df835bf519e886de9b134ed43a50a6dde462d5c2f2623bfa9a591213
6
+ metadata.gz: cd48af243e1615e8cafaf8ed1ab4d9019661e3205286fbd32c9ec4bb3e2faa7e07025a62b378062f10dfc0f1775acb401ad4cb901851cbbaf4cb78a2e7c6474a
7
+ data.tar.gz: c92eee26040f89ab0b691ba6bf4ad9792222e545e82bb9ebea8b4a4941831615a74d9f389e58d6feccf89ba42c6dd5a02c55bfed996fc670cd1d9600d5023476
data/README.md CHANGED
@@ -1,8 +1,13 @@
1
1
  # DataCollector
2
2
  Convenience module to Extract, Transform and Load your data.
3
+ You have 3 main objects that you can use for ETL => INPUT, OUTPUT and FILTER
4
+ and support objects like CONFIG, LOG, RULES and the new RULES_NG
5
+
6
+ Including the DataCollector::Core module into your application gives you access to these objects.
7
+
3
8
 
4
9
  #### input
5
- Read input from an URI
10
+ Read input from an URI. This URI can have a http, https or file scheme
6
11
 
7
12
  **Public methods**
8
13
  ```ruby
@@ -26,7 +31,7 @@ example:
26
31
  Inputs can be JSON, XML or CSV or XML in a TAR.GZ file
27
32
 
28
33
  ### output
29
- Output is an object you can store data that needs to be written to an output stream.
34
+ Output is an object you can store key/value pairs that needs to be written to an output stream.
30
35
  ```ruby
31
36
  output[:name] = 'John'
32
37
  output[:last_name] = 'Doe'
@@ -88,15 +93,17 @@ filter data from a hash using [JSONPath](http://goessner.net/articles/JsonPath/i
88
93
  ```
89
94
 
90
95
  #### rules
96
+ See newer rules_ng object
91
97
  Allows you to define a simple lambda structure to run against a JSONPath filter
92
98
 
93
99
  A rule is made up of a Hash the key is the map key field its value is a Hash with a JSONPath filter and options to apply a convert method on the filtered results.
94
- Available convert methods are: time, map, each, call, suffix
100
+ Available convert methods are: time, map, each, call, suffix, text
95
101
  - time: parses a given time/date string into a Time object
96
102
  - map: applies a mapping to a filter
97
103
  - suffix: adds a suffix to a result
98
104
  - call: executes a lambda on the filter
99
105
  - each: runs a lambda on each row of a filter
106
+ - text: passthrough method. Returns value unchanged
100
107
 
101
108
  example:
102
109
  ```ruby
@@ -118,6 +125,19 @@ Available convert methods are: time, map, each, call, suffix
118
125
  rules.run(my_rules, record, output)
119
126
  ```
120
127
 
128
+ #### rules_ng
129
+ !!! not compatible with RULES object
130
+
131
+ TODO: work in progress see test for examples on how to use
132
+
133
+ ```
134
+ RULE_SET
135
+ RULES*
136
+ FILTERS*
137
+ LAMBDA*
138
+ SUFFIX
139
+ ```
140
+
121
141
  #### config
122
142
  config is an object that points to "config.yml" you can read and/or store data to this object.
123
143
 
@@ -47,6 +47,6 @@ Gem::Specification.new do |spec|
47
47
  spec.add_runtime_dependency "json-ld", "~> 3.1"
48
48
 
49
49
  spec.add_development_dependency "bundler", "~> 2.0"
50
- spec.add_development_dependency "rake", "~> 10.0"
50
+ spec.add_development_dependency "rake", ">= 12.3"
51
51
  spec.add_development_dependency "minitest", "~> 5.0"
52
52
  end
@@ -5,6 +5,7 @@ require 'logger'
5
5
  require_relative 'input'
6
6
  require_relative 'output'
7
7
  require_relative 'rules'
8
+ require_relative 'rules_ng'
8
9
  require_relative 'config_file'
9
10
 
10
11
  module DataCollector
@@ -79,6 +80,11 @@ module DataCollector
79
80
  @rules ||= Rules.new
80
81
  end
81
82
 
83
+ # New rules runner
84
+ def rules_ng
85
+ @rules_ng ||= RulesNg.new
86
+ end
87
+
82
88
  # evaluator http://jsonpath.com/
83
89
  # uitleg http://goessner.net/articles/JsonPath/index.html
84
90
  def filter(data, filter_path)
@@ -6,31 +6,32 @@ module DataCollector
6
6
  @logger = Logger.new(STDOUT)
7
7
  end
8
8
 
9
- def run(rule_map, from_record, to_record)
9
+ def run(rule_map, from_record, to_record, options = {})
10
10
  rule_map.each do |map_to_key, rule|
11
11
  if rule.is_a?(Array)
12
12
  rule.each do |sub_rule|
13
- apply_rule(map_to_key, sub_rule, from_record, to_record)
13
+ apply_rule(map_to_key, sub_rule, from_record, to_record, options)
14
14
  end
15
15
  else
16
- apply_rule(map_to_key, rule, from_record, to_record)
16
+ apply_rule(map_to_key, rule, from_record, to_record, options)
17
17
  end
18
18
  end
19
19
 
20
20
  to_record.each do |element|
21
21
  element = element.delete_if do |k, v|
22
- v != false && (v.nil? || v.empty?)
22
+ v != false && (v.nil?)
23
23
  end
24
24
  end
25
25
  end
26
26
 
27
27
  private
28
28
 
29
- def apply_rule(map_to_key, rule, from_record, to_record)
29
+ def apply_rule(map_to_key, rule, from_record, to_record, options = {})
30
30
  if rule.has_key?('text')
31
- to_record << { map_to_key.to_sym => rule['text'] }
31
+ suffix = (rule && rule.key?('options') && rule['options'].key?('suffix')) ? rule['options']['suffix'] : ''
32
+ to_record << { map_to_key.to_sym => add_suffix(rule['text'], suffix) }
32
33
  elsif rule.has_key?('options') && rule['options'].has_key?('convert') && rule['options']['convert'].eql?('each')
33
- result = get_value_for(map_to_key, rule['filter'], from_record, rule['options'])
34
+ result = get_value_for(map_to_key, rule['filter'], from_record, rule['options'], options)
34
35
 
35
36
  if result.is_a?(Array)
36
37
  result.each do |m|
@@ -40,14 +41,14 @@ module DataCollector
40
41
  to_record << {map_to_key.to_sym => result}
41
42
  end
42
43
  else
43
- result = get_value_for(map_to_key, rule['filter'], from_record, rule['options'])
44
+ result = get_value_for(map_to_key, rule['filter'], from_record, rule['options'], options)
44
45
  return if result && result.empty?
45
46
 
46
47
  to_record << {map_to_key.to_sym => result}
47
48
  end
48
49
  end
49
50
 
50
- def get_value_for(tag_key, filter_path, record, options = {})
51
+ def get_value_for(tag_key, filter_path, record, rule_options = {}, options = {})
51
52
  data = nil
52
53
  if record
53
54
  if filter_path.is_a?(Array) && !record.is_a?(Array)
@@ -56,34 +57,47 @@ module DataCollector
56
57
 
57
58
  data = Core::filter(record, filter_path)
58
59
 
59
- if data && options
60
- if options.key?('convert')
61
- case options['convert']
60
+ if data && rule_options
61
+ if rule_options.key?('convert')
62
+ case rule_options['convert']
62
63
  when 'time'
63
- data = Time.parse(data).strftime('%Y-%m-%d')
64
+ result = []
65
+ data = [data] unless data.is_a?(Array)
66
+ data.each do |d|
67
+ result << Time.parse(d)
68
+ end
69
+ data = result
64
70
  when 'map'
65
71
  if data.is_a?(Array)
66
72
  data = data.map do |r|
67
- return options['map'][r] if options['map'].key?(r)
73
+ rule_options['map'][r] if rule_options['map'].key?(r)
68
74
  end
69
75
 
70
76
  data.compact!
71
- data.flatten! if options.key?('flatten') && options['flatten']
77
+ data.flatten! if rule_options.key?('flatten') && rule_options['flatten']
72
78
  else
73
- return options['map'][data] if options['map'].key?(data)
79
+ return rule_options['map'][data] if rule_options['map'].key?(data)
74
80
  end
75
81
  when 'each'
76
82
  data = [data] unless data.is_a?(Array)
77
- data = data.map { |d| options['lambda'].call(d) }
78
- data.flatten! if options.key?('flatten') && options['flatten']
83
+ if options.empty?
84
+ data = data.map { |d| rule_options['lambda'].call(d) }
85
+ else
86
+ data = data.map { |d| rule_options['lambda'].call(d, options) }
87
+ end
88
+ data.flatten! if rule_options.key?('flatten') && rule_options['flatten']
79
89
  when 'call'
80
- return options['lambda'].call(data)
90
+ if options.empty?
91
+ data = rule_options['lambda'].call(data)
92
+ else
93
+ data = rule_options['lambda'].call(data, options)
94
+ end
95
+ return data
81
96
  end
82
97
  end
83
98
 
84
- if options.key?('suffix')
85
- data = data.to_s
86
- data += options['suffix']
99
+ if rule_options.key?('suffix')
100
+ data = add_suffix(data, rule_options['suffix'])
87
101
  end
88
102
 
89
103
  end
@@ -93,5 +107,24 @@ module DataCollector
93
107
  return data
94
108
  end
95
109
 
110
+ def add_suffix(data, suffix)
111
+ case data.class.name
112
+ when 'Array'
113
+ result = []
114
+ data.each do |d|
115
+ result << add_suffix(d, suffix)
116
+ end
117
+ data = result
118
+ when 'Hash'
119
+ data.each do |k, v|
120
+ data[k] = add_suffix(v, suffix)
121
+ end
122
+ else
123
+ data = data.to_s
124
+ data += suffix
125
+ end
126
+ data
127
+ end
128
+
96
129
  end
97
130
  end
@@ -0,0 +1,88 @@
1
+ require 'logger'
2
+
3
+ module DataCollector
4
+ class RulesNg
5
+ def initialize(logger = Logger.new(STDOUT))
6
+ @logger = logger
7
+ end
8
+
9
+ def run(rules, input_data, output_data, options = {})
10
+ rules.each do |tag, rule|
11
+ apply_rule(tag, rule, input_data, output_data, options)
12
+ end
13
+ end
14
+
15
+ private
16
+ def apply_rule(tag, rule, input_data, output_data, options = {})
17
+ if rule.is_a?(Array)
18
+ rule.each do |sub_rule|
19
+ apply_rule(tag, sub_rule, input_data, output_data, options)
20
+ end
21
+ return output_data
22
+ end
23
+
24
+ rule_filter = rule.keys.first
25
+ rule_payload = rule.values.first
26
+ case rule_filter
27
+ when 'text'
28
+ if rule_payload.is_a?(String)
29
+ data = rule_payload
30
+ else
31
+ data = rule_payload.select{|s| s.is_a?(String)}
32
+ rule_payload = rule_payload.delete_if{|s| s.is_a?(String)}
33
+ if rule_payload.size == 1
34
+ rule_payload = rule_payload.first
35
+ end
36
+ end
37
+ when /json_path\:/
38
+ data = json_path_filter(rule_filter.gsub(/^json_path\:/), input_data)
39
+ else
40
+ data = json_path_filter(rule_filter, input_data)
41
+ end
42
+
43
+ data = apply_filtered_data_on_payload(data, rule_payload, options)
44
+
45
+ output_data << {tag.to_sym => data}
46
+ end
47
+
48
+ def apply_filtered_data_on_payload(input_data, payload, options = {})
49
+ output_data = nil
50
+ case payload.class.name
51
+ when 'Proc'
52
+ if options && options.empty?
53
+ output_data = payload.call(input_data)
54
+ else
55
+ output_data = payload.call(input_data, options)
56
+ end
57
+ when 'Hash'
58
+ input_data = [input_data] unless input_data.is_a?(Array)
59
+ output_data = input_data.map do |m|
60
+ if payload.key?('suffix')
61
+ "#{m}#{payload['suffix']}"
62
+ else
63
+ payload[m]
64
+ end
65
+ end if input_data.is_a?(Array)
66
+ when 'Array'
67
+ output_data = input_data
68
+ payload.each do |p|
69
+ output_data = apply_filtered_data_on_payload(output_data, p, options)
70
+ end
71
+ else
72
+ output_data = input_data
73
+ end
74
+
75
+ output_data.compact! if output_data.is_a?(Array)
76
+ output_data
77
+ end
78
+
79
+ def json_path_filter(filter, input_data)
80
+ data = nil
81
+ return data if input_data.nil? || input_data.empty?
82
+ return input_data if input_data.is_a?(String)
83
+ Core::filter(input_data, filter)
84
+ end
85
+
86
+
87
+ end
88
+ end
@@ -1,4 +1,4 @@
1
1
  # encoding: utf-8
2
2
  module DataCollector
3
- VERSION = "0.3.0"
3
+ VERSION = "0.4.0"
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_collector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mehmet Celik
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-02-11 00:00:00.000000000 Z
11
+ date: 2020-03-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -154,16 +154,16 @@ dependencies:
154
154
  name: rake
155
155
  requirement: !ruby/object:Gem::Requirement
156
156
  requirements:
157
- - - "~>"
157
+ - - ">="
158
158
  - !ruby/object:Gem::Version
159
- version: '10.0'
159
+ version: '12.3'
160
160
  type: :development
161
161
  prerelease: false
162
162
  version_requirements: !ruby/object:Gem::Requirement
163
163
  requirements:
164
- - - "~>"
164
+ - - ">="
165
165
  - !ruby/object:Gem::Version
166
- version: '10.0'
166
+ version: '12.3'
167
167
  - !ruby/object:Gem::Dependency
168
168
  name: minitest
169
169
  requirement: !ruby/object:Gem::Requirement
@@ -201,6 +201,7 @@ files:
201
201
  - lib/data_collector/input.rb
202
202
  - lib/data_collector/output.rb
203
203
  - lib/data_collector/rules.rb
204
+ - lib/data_collector/rules_ng.rb
204
205
  - lib/data_collector/runner.rb
205
206
  - lib/data_collector/version.rb
206
207
  homepage: https://github.com/mehmetc/data_collector
@@ -225,7 +226,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
225
226
  - !ruby/object:Gem::Version
226
227
  version: '0'
227
228
  requirements: []
228
- rubygems_version: 3.0.6
229
+ rubygems_version: 3.0.2
229
230
  signing_key:
230
231
  specification_version: 4
231
232
  summary: ETL helper library