data_collector 0.29.0 → 0.30.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/data_collector/input.rb +52 -29
- data/lib/data_collector/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 47a88bcc23a8fdc3922f22b3cb984eed3ea7b725d139e72b1f6f669d70ebb8e6
|
4
|
+
data.tar.gz: 62a4c5775dca4cdd6c6776c6d8628a717d1eb021a314b663ac7f0b6c95e317ac
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 95cff72022a43f18c9495cfe6b1f3a66a1f086e196dcb03528152c5a3c4dc34b8e551d9f6fb2d576e758a9c61b34693632990342488b2406fcd3b44a003e4381
|
7
|
+
data.tar.gz: 71abb93a64fbb419b5eb4f5e8e0ba6221f69b289fe8b133bbc5b208446e7b4b132d4199f769775170d69f2e5fb1051836ce9ce68222ca4a199eb0a0854978826
|
data/lib/data_collector/input.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#encoding: UTF-8
|
1
|
+
# encoding: UTF-8
|
2
2
|
require 'http'
|
3
3
|
require 'open-uri'
|
4
4
|
require 'nokogiri'
|
@@ -16,7 +16,7 @@ require_relative 'input/dir'
|
|
16
16
|
require_relative 'input/queue'
|
17
17
|
require_relative 'input/rpc'
|
18
18
|
|
19
|
-
#require_relative 'ext/xml_utility_node'
|
19
|
+
# require_relative 'ext/xml_utility_node'
|
20
20
|
module DataCollector
|
21
21
|
class Input
|
22
22
|
attr_reader :raw
|
@@ -25,13 +25,14 @@ module DataCollector
|
|
25
25
|
@logger = Logger.new(STDOUT)
|
26
26
|
end
|
27
27
|
|
28
|
-
def from_uri(source, options = {})
|
28
|
+
def from_uri(source, options = {}, &block)
|
29
|
+
block_consumed = false
|
29
30
|
source = CGI.unescapeHTML(source)
|
30
31
|
@logger.info("Reading #{source}")
|
31
32
|
raise DataCollector::Error, 'from_uri expects a scheme like file:// of https://' unless source =~ /:\/\//
|
32
33
|
|
33
34
|
scheme, path = source.split('://')
|
34
|
-
source="#{scheme}://#{URI.encode_www_form_component(path)}"
|
35
|
+
source = "#{scheme}://#{URI.encode_www_form_component(path)}"
|
35
36
|
uri = URI(source)
|
36
37
|
begin
|
37
38
|
data = nil
|
@@ -43,11 +44,14 @@ module DataCollector
|
|
43
44
|
when 'file'
|
44
45
|
absolute_path = File.absolute_path("#{URI.decode_www_form_component("#{uri.host}#{uri.path}")}")
|
45
46
|
if File.directory?(absolute_path)
|
46
|
-
#raise DataCollector::Error, "#{uri.host}/#{uri.path} not found" unless File.exist?("#{uri.host}/#{uri.path}")
|
47
47
|
return from_dir(uri, options)
|
48
48
|
else
|
49
|
-
|
50
|
-
|
49
|
+
if block_given?
|
50
|
+
data = from_file(uri, options, &block)
|
51
|
+
block_consumed = true if data.is_a?(TrueClass)
|
52
|
+
else
|
53
|
+
data = from_file(uri, options)
|
54
|
+
end
|
51
55
|
end
|
52
56
|
when /amqp/
|
53
57
|
if uri.scheme =~ /^rpc/
|
@@ -61,7 +65,7 @@ module DataCollector
|
|
61
65
|
|
62
66
|
data = data.nil? ? 'no data found' : data
|
63
67
|
|
64
|
-
if block_given?
|
68
|
+
if block_given? && !block_consumed
|
65
69
|
yield data
|
66
70
|
else
|
67
71
|
data
|
@@ -94,7 +98,7 @@ module DataCollector
|
|
94
98
|
|
95
99
|
http = HTTP
|
96
100
|
|
97
|
-
#http.use(logging: {logger: @logger})
|
101
|
+
# http.use(logging: {logger: @logger})
|
98
102
|
|
99
103
|
if options.key?(:user) && options.key?(:password)
|
100
104
|
@logger.debug "Set Basic_auth"
|
@@ -102,34 +106,33 @@ module DataCollector
|
|
102
106
|
password = options[:password]
|
103
107
|
http = HTTP.basic_auth(user: user, pass: password)
|
104
108
|
elsif options.key?(:bearer_token)
|
105
|
-
@logger.debug
|
109
|
+
@logger.debug "Set authorization bearer token"
|
106
110
|
bearer = options[:bearer_token]
|
107
111
|
bearer = "Bearer #{bearer}" unless bearer =~ /^Bearer /i
|
108
112
|
http = HTTP.auth(bearer)
|
109
113
|
end
|
110
114
|
|
111
|
-
if options.key?(:cookies)
|
112
|
-
@logger.debug
|
113
|
-
http = http.cookies(
|
115
|
+
if options.key?(:cookies)
|
116
|
+
@logger.debug "Set cookies"
|
117
|
+
http = http.cookies(options[:cookies])
|
114
118
|
end
|
115
119
|
|
116
|
-
if options.key?(:headers)
|
117
|
-
@logger.debug
|
118
|
-
http = http.headers(
|
120
|
+
if options.key?(:headers)
|
121
|
+
@logger.debug "Set http headers"
|
122
|
+
http = http.headers(options[:headers])
|
119
123
|
end
|
120
|
-
|
124
|
+
|
121
125
|
if options.key?(:verify_ssl) && uri.scheme.eql?('https')
|
122
126
|
@logger.warn "Disabling SSL verification. "
|
123
|
-
#shouldn't use this but we all do ...
|
127
|
+
# shouldn't use this but we all do ...
|
124
128
|
ctx = OpenSSL::SSL::SSLContext.new
|
125
129
|
ctx.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
126
130
|
|
127
131
|
http_response = http.follow.get(escape_uri(uri), ssl_context: ctx)
|
128
|
-
|
129
132
|
else
|
130
133
|
http_response = http.follow.get(escape_uri(uri))
|
131
134
|
end
|
132
|
-
|
135
|
+
|
133
136
|
case http_response.code
|
134
137
|
when 200..299
|
135
138
|
@raw = data = http_response.body.to_s
|
@@ -159,7 +162,7 @@ module DataCollector
|
|
159
162
|
end
|
160
163
|
end
|
161
164
|
|
162
|
-
raise '206 Partial Content' if http_response.code ==206
|
165
|
+
raise '206 Partial Content' if http_response.code == 206
|
163
166
|
|
164
167
|
when 401
|
165
168
|
raise DataCollector::InputError, 'Unauthorized'
|
@@ -175,7 +178,7 @@ module DataCollector
|
|
175
178
|
data
|
176
179
|
end
|
177
180
|
|
178
|
-
def from_file(uri, options = {})
|
181
|
+
def from_file(uri, options = {}, &block)
|
179
182
|
data = nil
|
180
183
|
uri = normalize_uri(uri)
|
181
184
|
absolute_path = File.absolute_path(uri)
|
@@ -190,12 +193,29 @@ module DataCollector
|
|
190
193
|
when '.xml'
|
191
194
|
data = xml_to_hash(data, options)
|
192
195
|
when '.gz'
|
196
|
+
tar_data = []
|
193
197
|
Minitar.open(Zlib::GzipReader.new(File.open("#{absolute_path}", 'rb'))) do |i|
|
194
198
|
i.each do |entry|
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
+
next unless entry.typeflag.eql?('0')
|
200
|
+
if block_given?
|
201
|
+
data = xml_to_hash(entry.read, options)
|
202
|
+
yield data
|
203
|
+
|
204
|
+
data = true
|
205
|
+
else
|
206
|
+
tar_data << entry.read
|
207
|
+
|
208
|
+
if tar_data.length == 1
|
209
|
+
data = xml_to_hash(tar_data.first, options)
|
210
|
+
else
|
211
|
+
data = []
|
212
|
+
tar_data.each do |d|
|
213
|
+
data << xml_to_hash(d, options)
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end #block
|
217
|
+
end #entry
|
218
|
+
end #tar
|
199
219
|
when '.csv'
|
200
220
|
data = csv_to_hash(data)
|
201
221
|
else
|
@@ -219,9 +239,12 @@ module DataCollector
|
|
219
239
|
end
|
220
240
|
|
221
241
|
def xml_to_hash(data, options = {})
|
222
|
-
#gsub('<\/', '< /') outherwise wrong XML-parsing (see records lirias1729192 )
|
223
|
-
data.
|
242
|
+
# gsub('<\/', '< /') outherwise wrong XML-parsing (see records lirias1729192 )
|
243
|
+
return unless data.is_a?(String)
|
244
|
+
data.force_encoding('UTF-8')
|
245
|
+
data = data.encode("UTF-8", invalid: :replace, replace: "")
|
224
246
|
data = data.gsub /</, '< /'
|
247
|
+
|
225
248
|
xml_typecast = options.with_indifferent_access.key?('xml_typecast') ? options.with_indifferent_access['xml_typecast'] : true
|
226
249
|
nori = Nori.new(parser: :nokogiri, advanced_typecasting: xml_typecast, strip_namespaces: true, convert_tags_to: lambda { |tag| tag.gsub(/^@/, '_') })
|
227
250
|
nori.parse(data)
|
@@ -245,7 +268,7 @@ module DataCollector
|
|
245
268
|
file_type = if headers.include?('Content-Type')
|
246
269
|
headers['Content-Type'].split(';').first
|
247
270
|
else
|
248
|
-
@logger.debug
|
271
|
+
@logger.debug "No Header content-type available"
|
249
272
|
MIME::Types.of(filename_from(headers)).first.content_type
|
250
273
|
end
|
251
274
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_collector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.30.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mehmet Celik
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-08-
|
11
|
+
date: 2023-08-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|