data_collector 0.18.0 → 0.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +5 -2
- data/lib/data_collector/input.rb +2 -0
- data/lib/data_collector/pipeline.rb +30 -5
- data/lib/data_collector/rules_ng.rb +19 -8
- data/lib/data_collector/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 35d57ff2998ab1343a4d6e906bcd76bd67951a0eae9a6db69387e4de7dbba285
|
4
|
+
data.tar.gz: 702a6447c28533d2dcdce237cc209417963ea2827eaed4f4ad0ab56a62c42783
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 80e487e0d8bfa19cec43a607b3c58698c37e23fd6385be3102d3ca87584348d585241f1794f848c460c02751f29f3a28d1365c472b8e3a532a922f9104fb2e06
|
7
|
+
data.tar.gz: 0366f4350e54e1bf985f68d3d0532b6fb00394aad23a3641b07fd65594e7e0b16ddf5da90574251c75b83c63fe09455aa988b7395706fe76e995b17fc79cb2fe
|
data/README.md
CHANGED
data/lib/data_collector/input.rb
CHANGED
@@ -37,8 +37,10 @@ module DataCollector
|
|
37
37
|
data = from_https(uri, options)
|
38
38
|
when 'file'
|
39
39
|
if File.directory?("#{uri.host}/#{uri.path}")
|
40
|
+
raise DataCollector::Error, "#{uri.host}/#{uri.path} not found" unless File.exist?("#{uri.host}/#{uri.path}")
|
40
41
|
return from_dir(uri, options)
|
41
42
|
else
|
43
|
+
raise DataCollector::Error, "#{uri.host}/#{uri.path} not found" unless File.exist?("#{uri.host}/#{uri.path}")
|
42
44
|
data = from_file(uri, options)
|
43
45
|
end
|
44
46
|
when 'amqp'
|
@@ -13,6 +13,8 @@ module DataCollector
|
|
13
13
|
|
14
14
|
@schedule = options[:schedule] || {}
|
15
15
|
@name = options[:name] || "#{Time.now.to_i}-#{rand(10000)}"
|
16
|
+
@options = options
|
17
|
+
@listeners = []
|
16
18
|
end
|
17
19
|
|
18
20
|
def on_message(&block)
|
@@ -22,6 +24,9 @@ module DataCollector
|
|
22
24
|
def run
|
23
25
|
if paused? && @running
|
24
26
|
@paused = false
|
27
|
+
@listeners.each do |listener|
|
28
|
+
listener.run if listener.paused?
|
29
|
+
end
|
25
30
|
end
|
26
31
|
|
27
32
|
@running = true
|
@@ -42,8 +47,20 @@ module DataCollector
|
|
42
47
|
end
|
43
48
|
else # run once
|
44
49
|
@run_count += 1
|
45
|
-
|
46
|
-
|
50
|
+
if @options.key?(:uri)
|
51
|
+
listener = Input.new.from_uri(@options[:uri], @options)
|
52
|
+
listener.on_message do |input, output, filename|
|
53
|
+
DataCollector::Core.log("PIPELINE triggered by #{filename}")
|
54
|
+
handle_on_message(@input, @output, filename)
|
55
|
+
end
|
56
|
+
@listeners << listener
|
57
|
+
|
58
|
+
listener.run(true)
|
59
|
+
|
60
|
+
else
|
61
|
+
DataCollector::Core.log("PIPELINE running once")
|
62
|
+
handle_on_message(@input, @output)
|
63
|
+
end
|
47
64
|
end
|
48
65
|
rescue StandardError => e
|
49
66
|
DataCollector::Core.error("PIPELINE run failed: #{e.message}")
|
@@ -54,10 +71,18 @@ module DataCollector
|
|
54
71
|
def stop
|
55
72
|
@running = false
|
56
73
|
@paused = false
|
74
|
+
@listeners.each do |listener|
|
75
|
+
listener.stop if listener.running?
|
76
|
+
end
|
57
77
|
end
|
58
78
|
|
59
79
|
def pause
|
60
|
-
|
80
|
+
if @running
|
81
|
+
@paused = !@paused
|
82
|
+
@listeners.each do |listener|
|
83
|
+
listener.pause if listener.running?
|
84
|
+
end
|
85
|
+
end
|
61
86
|
end
|
62
87
|
|
63
88
|
def running?
|
@@ -74,11 +99,11 @@ module DataCollector
|
|
74
99
|
|
75
100
|
private
|
76
101
|
|
77
|
-
def handle_on_message(input, output)
|
102
|
+
def handle_on_message(input, output, filename = nil)
|
78
103
|
if (callback = @on_message_callback)
|
79
104
|
timing = Time.now
|
80
105
|
begin
|
81
|
-
callback.call(input, output)
|
106
|
+
callback.call(input, output, filename)
|
82
107
|
rescue StandardError => e
|
83
108
|
DataCollector::Core.error("PIPELINE #{e.message}")
|
84
109
|
ensure
|
@@ -51,7 +51,7 @@ module DataCollector
|
|
51
51
|
|
52
52
|
data = apply_filtered_data_on_payload(data, rule_payload, options)
|
53
53
|
|
54
|
-
output_data << {tag.to_sym => data} unless data.nil? || (data.is_a?(Array) && data.empty?)
|
54
|
+
output_data << { tag.to_sym => data } unless data.nil? || (data.is_a?(Array) && data.empty?)
|
55
55
|
rescue StandardError => e
|
56
56
|
# puts "error running rule '#{tag}'\n\t#{e.message}"
|
57
57
|
# puts e.backtrace.join("\n")
|
@@ -61,15 +61,26 @@ module DataCollector
|
|
61
61
|
def apply_filtered_data_on_payload(input_data, payload, options = {})
|
62
62
|
return nil if input_data.nil?
|
63
63
|
|
64
|
-
normalized_options = options.select{|k,v| k !~ /^_/ }.with_indifferent_access
|
64
|
+
normalized_options = options.select { |k, v| k !~ /^_/ }.with_indifferent_access
|
65
65
|
output_data = nil
|
66
66
|
case payload.class.name
|
67
67
|
when 'Proc'
|
68
68
|
data = input_data.is_a?(Array) ? input_data : [input_data]
|
69
69
|
output_data = if normalized_options.empty?
|
70
|
-
data.map { |d| payload.call(d) }
|
70
|
+
# data.map { |d| payload.curry.call(d).call(d) }
|
71
|
+
data.map { |d|
|
72
|
+
loop do
|
73
|
+
payload_result = payload.curry.call(d)
|
74
|
+
break payload_result unless payload_result.is_a?(Proc)
|
75
|
+
end
|
76
|
+
}
|
71
77
|
else
|
72
|
-
data.map { |d|
|
78
|
+
data.map { |d|
|
79
|
+
loop do
|
80
|
+
payload_result = payload.curry.call(d, normalized_options)
|
81
|
+
break payload_result unless payload_result.is_a?(Proc)
|
82
|
+
end
|
83
|
+
}
|
73
84
|
end
|
74
85
|
when 'Hash'
|
75
86
|
input_data = [input_data] unless input_data.is_a?(Array)
|
@@ -77,9 +88,9 @@ module DataCollector
|
|
77
88
|
output_data = input_data.map do |m|
|
78
89
|
if payload.key?('suffix')
|
79
90
|
if (m.is_a?(Hash))
|
80
|
-
m.transform_values{|v| v.is_a?(String) ? "#{v}#{payload['suffix']}" : v}
|
91
|
+
m.transform_values { |v| v.is_a?(String) ? "#{v}#{payload['suffix']}" : v }
|
81
92
|
elsif m.is_a?(Array)
|
82
|
-
m.map{|n| n.is_a?(String) ? "#{n}#{payload['suffix']}": n}
|
93
|
+
m.map { |n| n.is_a?(String) ? "#{n}#{payload['suffix']}" : n }
|
83
94
|
elsif m.methods.include?(:to_s)
|
84
95
|
"#{m}#{payload['suffix']}"
|
85
96
|
else
|
@@ -102,8 +113,8 @@ module DataCollector
|
|
102
113
|
output_data.compact! if output_data.is_a?(Array)
|
103
114
|
output_data.flatten! if output_data.is_a?(Array)
|
104
115
|
if output_data.is_a?(Array) &&
|
105
|
-
|
106
|
-
|
116
|
+
output_data.size == 1 &&
|
117
|
+
(output_data.first.is_a?(Array) || output_data.first.is_a?(Hash))
|
107
118
|
output_data = output_data.first
|
108
119
|
end
|
109
120
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_collector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.19.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mehmet Celik
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-05-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|