data_collector 0.52.0 → 0.53.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/data_collector/input.rb +68 -47
- data/lib/data_collector/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 749914ea43bad2734c79a131eb5db2f1ef2b953fa386a9cb3eb50698e7d2f1f4
|
4
|
+
data.tar.gz: 0402100bbfe402ad1c96cc0b767c608a917402e748ea4f57b9bf298ca9510dba
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b00c092e0670a26a85bbe1299c687a132c65af7950c5b0f74ff537a8ba948af27617164bd8beebc3b072d424fafdd58865e8cafbaa03d5e6aafb8e682806ba43
|
7
|
+
data.tar.gz: 4072e26cfd9a2ef57b094b3ea7cc340f0d735e8ca4356d6e346f725b2486e7284b624936724c0545677571d7ff2a0c447785374778cb55080e69cf54bc7df1a3
|
data/lib/data_collector/input.rb
CHANGED
@@ -29,60 +29,64 @@ module DataCollector
|
|
29
29
|
|
30
30
|
def from_uri(source, options = {}, &block)
|
31
31
|
block_consumed = false
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
if
|
52
|
-
|
53
|
-
|
32
|
+
data = nil
|
33
|
+
if source.is_a?(StringIO)
|
34
|
+
data = from_stringio(source, options)
|
35
|
+
else
|
36
|
+
source = CGI.unescapeHTML(source)
|
37
|
+
@logger.info("Reading #{source}")
|
38
|
+
raise DataCollector::Error, 'from_uri expects a scheme like file:// of https://' unless source =~ /:\/\//
|
39
|
+
|
40
|
+
scheme, path = source.split('://')
|
41
|
+
source = "#{scheme}://#{URI.encode_www_form_component(path)}"
|
42
|
+
uri = URI(source)
|
43
|
+
begin
|
44
|
+
case uri.scheme
|
45
|
+
when 'http'
|
46
|
+
data = from_http(uri, options)
|
47
|
+
when 'https'
|
48
|
+
data = from_https(uri, options)
|
49
|
+
when 'file'
|
50
|
+
absolute_path = File.absolute_path("#{URI.decode_www_form_component("#{uri.host}#{uri.path}")}")
|
51
|
+
if File.directory?(absolute_path)
|
52
|
+
return from_dir(uri, options)
|
53
|
+
else
|
54
|
+
if block_given?
|
55
|
+
data = from_file(uri, options, &block)
|
56
|
+
block_consumed = true if data.is_a?(TrueClass)
|
57
|
+
else
|
58
|
+
data = from_file(uri, options)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
when /amqp/
|
62
|
+
if uri.scheme =~ /^rpc/
|
63
|
+
data = from_rpc(uri, options)
|
54
64
|
else
|
55
|
-
data =
|
65
|
+
data = from_queue(uri, options)
|
56
66
|
end
|
57
|
-
end
|
58
|
-
when /amqp/
|
59
|
-
if uri.scheme =~ /^rpc/
|
60
|
-
data = from_rpc(uri, options)
|
61
67
|
else
|
62
|
-
|
68
|
+
raise "Do not know how to process #{source}"
|
63
69
|
end
|
64
|
-
else
|
65
|
-
raise "Do not know how to process #{source}"
|
66
|
-
end
|
67
70
|
|
68
|
-
data = data.nil? ? 'no data found' : data
|
69
|
-
|
70
|
-
if block_given? && !block_consumed
|
71
|
-
yield data
|
72
|
-
else
|
73
|
-
data
|
74
71
|
end
|
75
|
-
rescue DataCollector::InputError => e
|
76
|
-
@logger.error(e.message)
|
77
|
-
raise e
|
78
|
-
rescue DataCollector::Error => e
|
79
|
-
@logger.error(e.message)
|
80
|
-
nil
|
81
|
-
rescue StandardError => e
|
82
|
-
@logger.error(e.message)
|
83
|
-
puts e.backtrace.join("\n")
|
84
|
-
nil
|
85
72
|
end
|
73
|
+
data = data.nil? ? 'no data found' : data
|
74
|
+
|
75
|
+
if block_given? && !block_consumed
|
76
|
+
yield data
|
77
|
+
else
|
78
|
+
data
|
79
|
+
end
|
80
|
+
rescue DataCollector::InputError => e
|
81
|
+
@logger.error(e.message)
|
82
|
+
raise e
|
83
|
+
rescue DataCollector::Error => e
|
84
|
+
@logger.error(e.message)
|
85
|
+
nil
|
86
|
+
rescue StandardError => e
|
87
|
+
@logger.error(e.message)
|
88
|
+
puts e.backtrace.join("\n")
|
89
|
+
nil
|
86
90
|
end
|
87
91
|
|
88
92
|
private
|
@@ -204,6 +208,23 @@ module DataCollector
|
|
204
208
|
data
|
205
209
|
end
|
206
210
|
|
211
|
+
def from_stringio(sio, options = {}, &block)
|
212
|
+
raise DataCollector::InputError, "No IO input" unless sio.is_a?(StringIO)
|
213
|
+
raise DataCollector::InputError, "content_type option not supplied" unless options.key?(:content_type)
|
214
|
+
|
215
|
+
preferred_extension = MIME::Types[options[:content_type]].first.extensions.first || 'txt'
|
216
|
+
|
217
|
+
file = Tempfile.new(["dc_", ".#{preferred_extension}"])
|
218
|
+
begin
|
219
|
+
sio.rewind
|
220
|
+
file.write(sio.read)
|
221
|
+
file.close
|
222
|
+
from_file(URI("file://#{file.path}"))
|
223
|
+
ensure
|
224
|
+
file.unlink
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
207
228
|
def from_file(uri, options = {}, &block)
|
208
229
|
data = nil
|
209
230
|
uri = normalize_uri(uri)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_collector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.53.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mehmet Celik
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-07-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|