data_collector 0.31.0 → 0.33.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +14 -2
- data/lib/data_collector/config_file.rb +31 -8
- data/lib/data_collector/core.rb +12 -6
- data/lib/data_collector/input/rpc.rb +15 -2
- data/lib/data_collector/output.rb +9 -5
- data/lib/data_collector/version.rb +1 -1
- data/lib/proxy_logger.rb +16 -0
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 38cc404597c0961fb94fae9421a00b6e6a18de26f2e1ce568928eb2e46d2395a
|
4
|
+
data.tar.gz: 849195568397a3a553d14e882bdb18662eddc0f55c6af0a16d5983c4f68979bd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b63028d59533473eb875184049f98137147db027d837729f1de5a203a41d6c12631264e17ddc626f7366d32e8183adc81bb9036c931fd39f8c77398394e59ce3
|
7
|
+
data.tar.gz: 922f2cba4f14321eee7cff191c9e775cde4595e78a97515dde6583a2d25b84b52a0c4b13db7304c40b0f2d50a80c1fd0a003b991dbb899395ee776a6b00bb52d
|
data/README.md
CHANGED
@@ -46,7 +46,7 @@ pipeline.run
|
|
46
46
|
|
47
47
|
```ruby
|
48
48
|
#create a pipeline scheduled to run every morning at 06:00 am
|
49
|
-
pipeline = Pipeline.new(
|
49
|
+
pipeline = Pipeline.new(cron: '0 6 * * *')
|
50
50
|
|
51
51
|
pipeline.on_message do |input, output|
|
52
52
|
data = input.from_uri("https://dummyjson.com/comments?limit=10")
|
@@ -336,10 +336,22 @@ Log to stdout
|
|
336
336
|
log("hello world")
|
337
337
|
```
|
338
338
|
#### error
|
339
|
-
Log an error
|
339
|
+
Log an error to stdout
|
340
340
|
```ruby
|
341
341
|
error("if you have an issue take a tissue")
|
342
342
|
```
|
343
|
+
### logger
|
344
|
+
Logs are by default written to Standard OUT. If you want to change where to log to.
|
345
|
+
```ruby
|
346
|
+
f = File.open('/tmp/data.log', 'w')
|
347
|
+
f.sync = true # do not buffer
|
348
|
+
# add multiple log outputs
|
349
|
+
logger(STDOUT, f)
|
350
|
+
|
351
|
+
#write to both STDOUT and /tmp/data.log
|
352
|
+
log('Hello world')
|
353
|
+
```
|
354
|
+
|
343
355
|
## Example
|
344
356
|
Input data ___test.csv___
|
345
357
|
```csv
|
@@ -6,9 +6,19 @@ module DataCollector
|
|
6
6
|
class ConfigFile
|
7
7
|
@config = {}
|
8
8
|
@config_file_path = ''
|
9
|
+
@config_file_name = 'config.yml'
|
10
|
+
@mtime = nil
|
9
11
|
|
10
12
|
def self.version
|
11
|
-
'0.0.
|
13
|
+
'0.0.3'
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.name
|
17
|
+
@config_file_name
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.name=(config_file_name)
|
21
|
+
@config_file_name = config_file_name
|
12
22
|
end
|
13
23
|
|
14
24
|
def self.path
|
@@ -37,19 +47,27 @@ module DataCollector
|
|
37
47
|
@config.include?(key)
|
38
48
|
end
|
39
49
|
|
50
|
+
def self.keys
|
51
|
+
init
|
52
|
+
@config.keys
|
53
|
+
end
|
40
54
|
|
41
|
-
|
55
|
+
def self.init
|
42
56
|
discover_config_file_path
|
43
|
-
|
44
|
-
|
57
|
+
raise Errno::ENOENT, "#{@config_file_path}/config.yml Not Found. Set path to config.yml" unless File.exist?("#{@config_file_path}/config.yml")
|
58
|
+
|
59
|
+
ftime = File.exist?("#{@config_file_path}/config.yml") ? File.mtime("#{@config_file_path}/config.yml") : nil
|
60
|
+
if @config.empty? || @mtime != ftime
|
61
|
+
config = YAML::load_file("#{@config_file_path}/config.yml")
|
45
62
|
@config = process(config)
|
46
63
|
end
|
47
64
|
end
|
48
65
|
|
49
|
-
|
50
|
-
private_class_method def self.discover_config_file_path
|
66
|
+
def self.discover_config_file_path
|
51
67
|
if @config_file_path.nil? || @config_file_path.empty?
|
52
|
-
if
|
68
|
+
if ENV.key?('CONFIG_FILE_PATH')
|
69
|
+
@config_file_path = ENV['CONFIG_FILE_PATH']
|
70
|
+
elsif File.exist?('config.yml')
|
53
71
|
@config_file_path = '.'
|
54
72
|
elsif File.exist?("config/config.yml")
|
55
73
|
@config_file_path = 'config'
|
@@ -57,7 +75,7 @@ module DataCollector
|
|
57
75
|
end
|
58
76
|
end
|
59
77
|
|
60
|
-
|
78
|
+
def self.process(config)
|
61
79
|
new_config = {}
|
62
80
|
config.each do |k, v|
|
63
81
|
if config[k].is_a?(Hash)
|
@@ -68,5 +86,10 @@ module DataCollector
|
|
68
86
|
|
69
87
|
new_config
|
70
88
|
end
|
89
|
+
|
90
|
+
private_class_method :new
|
91
|
+
private_class_method :init
|
92
|
+
private_class_method :discover_config_file_path
|
93
|
+
private_class_method :process
|
71
94
|
end
|
72
95
|
end
|
data/lib/data_collector/core.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
require 'jsonpath'
|
3
|
-
require '
|
3
|
+
require 'proxy_logger'
|
4
4
|
|
5
5
|
require_relative 'input'
|
6
6
|
require_relative 'output'
|
@@ -114,7 +114,7 @@ module DataCollector
|
|
114
114
|
|
115
115
|
filtered
|
116
116
|
rescue StandardError => e
|
117
|
-
@logger ||=
|
117
|
+
@logger ||= self.logger
|
118
118
|
@logger.error("#{filter_path} failed: #{e.message}")
|
119
119
|
[]
|
120
120
|
end
|
@@ -126,19 +126,25 @@ module DataCollector
|
|
126
126
|
module_function :config
|
127
127
|
|
128
128
|
def log(message)
|
129
|
-
@logger ||=
|
129
|
+
@logger ||= self.logger
|
130
130
|
@logger.info(message)
|
131
131
|
end
|
132
132
|
module_function :log
|
133
133
|
|
134
134
|
def error(message)
|
135
|
-
@logger ||=
|
135
|
+
@logger ||= self.logger
|
136
136
|
@logger.error(message)
|
137
137
|
end
|
138
138
|
module_function :error
|
139
139
|
|
140
|
-
def logger
|
141
|
-
@logger ||=
|
140
|
+
def logger(*destinations)
|
141
|
+
@logger ||= begin
|
142
|
+
destinations = STDOUT if destinations.nil? || destinations.empty?
|
143
|
+
Logger.new(ProxyLogger.new(destinations))
|
144
|
+
rescue StandardError => e
|
145
|
+
puts "Unable to instantiate ProxyLogger: #{e.message}"
|
146
|
+
Logger.new(STDOUT)
|
147
|
+
end
|
142
148
|
end
|
143
149
|
module_function :logger
|
144
150
|
|
@@ -14,6 +14,8 @@ module DataCollector
|
|
14
14
|
|
15
15
|
def running?
|
16
16
|
@running
|
17
|
+
rescue StandardError => e
|
18
|
+
DataCollector::Core.error(e.message)
|
17
19
|
end
|
18
20
|
|
19
21
|
def stop
|
@@ -21,10 +23,14 @@ module DataCollector
|
|
21
23
|
@listener.shutdown
|
22
24
|
@running = false
|
23
25
|
end
|
26
|
+
rescue StandardError => e
|
27
|
+
DataCollector::Core.error(e.message)
|
24
28
|
end
|
25
29
|
|
26
30
|
def pause
|
27
31
|
raise "PAUSE not implemented."
|
32
|
+
rescue StandardError => e
|
33
|
+
DataCollector::Core.error(e.message)
|
28
34
|
end
|
29
35
|
|
30
36
|
|
@@ -47,16 +53,23 @@ module DataCollector
|
|
47
53
|
else
|
48
54
|
yield block if block_given?
|
49
55
|
end
|
56
|
+
rescue StandardError => e
|
57
|
+
DataCollector::Core.error(e.message)
|
50
58
|
end
|
51
59
|
|
52
60
|
private
|
53
|
-
def create_listener
|
61
|
+
def create_listener(log = false)
|
54
62
|
@listener ||= BunnyBurrow::Server.new do |server|
|
55
63
|
parse_uri
|
56
64
|
server.rabbitmq_url = @bunny_uri.to_s
|
57
65
|
server.rabbitmq_exchange = @bunny_channel
|
58
|
-
|
66
|
+
|
67
|
+
server.logger = DataCollector::Core.logger if log
|
59
68
|
end
|
69
|
+
|
70
|
+
@listener
|
71
|
+
rescue StandardError => e
|
72
|
+
DataCollector::Core.error(e.message)
|
60
73
|
end
|
61
74
|
|
62
75
|
def parse_uri
|
@@ -6,9 +6,11 @@ require 'minitar'
|
|
6
6
|
require 'zlib'
|
7
7
|
require 'cgi'
|
8
8
|
require 'active_support/core_ext/hash'
|
9
|
+
require 'active_support/core_ext/array'
|
9
10
|
require "active_support/isolated_execution_state"
|
10
11
|
require 'active_support/xml_mini'
|
11
12
|
require 'fileutils'
|
13
|
+
|
12
14
|
require_relative './output/rpc'
|
13
15
|
|
14
16
|
module DataCollector
|
@@ -43,10 +45,11 @@ module DataCollector
|
|
43
45
|
@data[k] << v
|
44
46
|
end
|
45
47
|
else
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
48
|
+
if v.is_a?(Array) # merge with array
|
49
|
+
@data[k] = [@data[k]] + v
|
50
|
+
else
|
51
|
+
@data[k] = v
|
52
|
+
end
|
50
53
|
end
|
51
54
|
else
|
52
55
|
@data[k] = v
|
@@ -251,7 +254,7 @@ module DataCollector
|
|
251
254
|
DataCollector::Output::Rpc.new(uri, options)
|
252
255
|
end
|
253
256
|
|
254
|
-
def
|
257
|
+
def to_queue(uri, options = {})
|
255
258
|
raise "to be implemented"
|
256
259
|
end
|
257
260
|
|
@@ -263,6 +266,7 @@ module DataCollector
|
|
263
266
|
data.compact!
|
264
267
|
data.each { |k, v| data[k] = deep_compact(v) }
|
265
268
|
data.compact!
|
269
|
+
data
|
266
270
|
elsif data.is_a?(Array)
|
267
271
|
# puts " - Array - #{data}"
|
268
272
|
data.map! { |v| deep_compact(v) }
|
data/lib/proxy_logger.rb
ADDED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_collector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.33.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mehmet Celik
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-11-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -311,6 +311,7 @@ files:
|
|
311
311
|
- lib/data_collector/rules_ng.rb
|
312
312
|
- lib/data_collector/runner.rb
|
313
313
|
- lib/data_collector/version.rb
|
314
|
+
- lib/proxy_logger.rb
|
314
315
|
homepage: https://github.com/mehmetc/data_collector
|
315
316
|
licenses:
|
316
317
|
- MIT
|
@@ -333,7 +334,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
333
334
|
- !ruby/object:Gem::Version
|
334
335
|
version: '0'
|
335
336
|
requirements: []
|
336
|
-
rubygems_version: 3.4.
|
337
|
+
rubygems_version: 3.4.10
|
337
338
|
signing_key:
|
338
339
|
specification_version: 4
|
339
340
|
summary: ETL helper library
|