data_collector 0.29.0 → 0.30.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d09248a6f7a15b7c6ad783502506e9cdc3ba5010070a810b9bc8c803b15fc862
4
- data.tar.gz: d47019eddd26810162229555997d957f35823e45b105036cbd1e4f90d361be81
3
+ metadata.gz: 47a88bcc23a8fdc3922f22b3cb984eed3ea7b725d139e72b1f6f669d70ebb8e6
4
+ data.tar.gz: 62a4c5775dca4cdd6c6776c6d8628a717d1eb021a314b663ac7f0b6c95e317ac
5
5
  SHA512:
6
- metadata.gz: 8f508edbc7ad18d413bf6deac82c0848ab04955edd347b2a749b0ecf8baf3caef4052fdcd8b424d6b17e82b8da18e4f9cc8b40160d1b22566241986a4d5c6e7c
7
- data.tar.gz: 4c339eb2adc53e2afe615a9b1ebca1649c0466c51366babd574bf7bdfbb4ef0c8df121308561514c69dc6fcd31cd6c4fa6e8d689ab0cff0e9bcff9554f96f85a
6
+ metadata.gz: 95cff72022a43f18c9495cfe6b1f3a66a1f086e196dcb03528152c5a3c4dc34b8e551d9f6fb2d576e758a9c61b34693632990342488b2406fcd3b44a003e4381
7
+ data.tar.gz: 71abb93a64fbb419b5eb4f5e8e0ba6221f69b289fe8b133bbc5b208446e7b4b132d4199f769775170d69f2e5fb1051836ce9ce68222ca4a199eb0a0854978826
@@ -1,4 +1,4 @@
1
- #encoding: UTF-8
1
+ # encoding: UTF-8
2
2
  require 'http'
3
3
  require 'open-uri'
4
4
  require 'nokogiri'
@@ -16,7 +16,7 @@ require_relative 'input/dir'
16
16
  require_relative 'input/queue'
17
17
  require_relative 'input/rpc'
18
18
 
19
- #require_relative 'ext/xml_utility_node'
19
+ # require_relative 'ext/xml_utility_node'
20
20
  module DataCollector
21
21
  class Input
22
22
  attr_reader :raw
@@ -25,13 +25,14 @@ module DataCollector
25
25
  @logger = Logger.new(STDOUT)
26
26
  end
27
27
 
28
- def from_uri(source, options = {})
28
+ def from_uri(source, options = {}, &block)
29
+ block_consumed = false
29
30
  source = CGI.unescapeHTML(source)
30
31
  @logger.info("Reading #{source}")
31
32
  raise DataCollector::Error, 'from_uri expects a scheme like file:// of https://' unless source =~ /:\/\//
32
33
 
33
34
  scheme, path = source.split('://')
34
- source="#{scheme}://#{URI.encode_www_form_component(path)}"
35
+ source = "#{scheme}://#{URI.encode_www_form_component(path)}"
35
36
  uri = URI(source)
36
37
  begin
37
38
  data = nil
@@ -43,11 +44,14 @@ module DataCollector
43
44
  when 'file'
44
45
  absolute_path = File.absolute_path("#{URI.decode_www_form_component("#{uri.host}#{uri.path}")}")
45
46
  if File.directory?(absolute_path)
46
- #raise DataCollector::Error, "#{uri.host}/#{uri.path} not found" unless File.exist?("#{uri.host}/#{uri.path}")
47
47
  return from_dir(uri, options)
48
48
  else
49
- # raise DataCollector::Error, "#{uri.host}/#{uri.path} not found" unless File.exist?("#{uri.host}/#{uri.path}")
50
- data = from_file(uri, options)
49
+ if block_given?
50
+ data = from_file(uri, options, &block)
51
+ block_consumed = true if data.is_a?(TrueClass)
52
+ else
53
+ data = from_file(uri, options)
54
+ end
51
55
  end
52
56
  when /amqp/
53
57
  if uri.scheme =~ /^rpc/
@@ -61,7 +65,7 @@ module DataCollector
61
65
 
62
66
  data = data.nil? ? 'no data found' : data
63
67
 
64
- if block_given?
68
+ if block_given? && !block_consumed
65
69
  yield data
66
70
  else
67
71
  data
@@ -94,7 +98,7 @@ module DataCollector
94
98
 
95
99
  http = HTTP
96
100
 
97
- #http.use(logging: {logger: @logger})
101
+ # http.use(logging: {logger: @logger})
98
102
 
99
103
  if options.key?(:user) && options.key?(:password)
100
104
  @logger.debug "Set Basic_auth"
@@ -102,34 +106,33 @@ module DataCollector
102
106
  password = options[:password]
103
107
  http = HTTP.basic_auth(user: user, pass: password)
104
108
  elsif options.key?(:bearer_token)
105
- @logger.debug "Set authorization bearer token"
109
+ @logger.debug "Set authorization bearer token"
106
110
  bearer = options[:bearer_token]
107
111
  bearer = "Bearer #{bearer}" unless bearer =~ /^Bearer /i
108
112
  http = HTTP.auth(bearer)
109
113
  end
110
114
 
111
- if options.key?(:cookies)
112
- @logger.debug "Set cookies"
113
- http = http.cookies( options[:cookies] )
115
+ if options.key?(:cookies)
116
+ @logger.debug "Set cookies"
117
+ http = http.cookies(options[:cookies])
114
118
  end
115
119
 
116
- if options.key?(:headers)
117
- @logger.debug "Set http headers"
118
- http = http.headers( options[:headers] )
120
+ if options.key?(:headers)
121
+ @logger.debug "Set http headers"
122
+ http = http.headers(options[:headers])
119
123
  end
120
-
124
+
121
125
  if options.key?(:verify_ssl) && uri.scheme.eql?('https')
122
126
  @logger.warn "Disabling SSL verification. "
123
- #shouldn't use this but we all do ...
127
+ # shouldn't use this but we all do ...
124
128
  ctx = OpenSSL::SSL::SSLContext.new
125
129
  ctx.verify_mode = OpenSSL::SSL::VERIFY_NONE
126
130
 
127
131
  http_response = http.follow.get(escape_uri(uri), ssl_context: ctx)
128
-
129
132
  else
130
133
  http_response = http.follow.get(escape_uri(uri))
131
134
  end
132
-
135
+
133
136
  case http_response.code
134
137
  when 200..299
135
138
  @raw = data = http_response.body.to_s
@@ -159,7 +162,7 @@ module DataCollector
159
162
  end
160
163
  end
161
164
 
162
- raise '206 Partial Content' if http_response.code ==206
165
+ raise '206 Partial Content' if http_response.code == 206
163
166
 
164
167
  when 401
165
168
  raise DataCollector::InputError, 'Unauthorized'
@@ -175,7 +178,7 @@ module DataCollector
175
178
  data
176
179
  end
177
180
 
178
- def from_file(uri, options = {})
181
+ def from_file(uri, options = {}, &block)
179
182
  data = nil
180
183
  uri = normalize_uri(uri)
181
184
  absolute_path = File.absolute_path(uri)
@@ -190,12 +193,29 @@ module DataCollector
190
193
  when '.xml'
191
194
  data = xml_to_hash(data, options)
192
195
  when '.gz'
196
+ tar_data = []
193
197
  Minitar.open(Zlib::GzipReader.new(File.open("#{absolute_path}", 'rb'))) do |i|
194
198
  i.each do |entry|
195
- data = entry.read
196
- end
197
- end
198
- data = xml_to_hash(data, options)
199
+ next unless entry.typeflag.eql?('0')
200
+ if block_given?
201
+ data = xml_to_hash(entry.read, options)
202
+ yield data
203
+
204
+ data = true
205
+ else
206
+ tar_data << entry.read
207
+
208
+ if tar_data.length == 1
209
+ data = xml_to_hash(tar_data.first, options)
210
+ else
211
+ data = []
212
+ tar_data.each do |d|
213
+ data << xml_to_hash(d, options)
214
+ end
215
+ end
216
+ end #block
217
+ end #entry
218
+ end #tar
199
219
  when '.csv'
200
220
  data = csv_to_hash(data)
201
221
  else
@@ -219,9 +239,12 @@ module DataCollector
219
239
  end
220
240
 
221
241
  def xml_to_hash(data, options = {})
222
- #gsub('&lt;\/', '&lt; /') outherwise wrong XML-parsing (see records lirias1729192 )
223
- data.force_encoding('UTF-8') #encode("UTF-8", invalid: :replace, replace: "")
242
+ # gsub('&lt;\/', '&lt; /') outherwise wrong XML-parsing (see records lirias1729192 )
243
+ return unless data.is_a?(String)
244
+ data.force_encoding('UTF-8')
245
+ data = data.encode("UTF-8", invalid: :replace, replace: "")
224
246
  data = data.gsub /&lt;/, '&lt; /'
247
+
225
248
  xml_typecast = options.with_indifferent_access.key?('xml_typecast') ? options.with_indifferent_access['xml_typecast'] : true
226
249
  nori = Nori.new(parser: :nokogiri, advanced_typecasting: xml_typecast, strip_namespaces: true, convert_tags_to: lambda { |tag| tag.gsub(/^@/, '_') })
227
250
  nori.parse(data)
@@ -245,7 +268,7 @@ module DataCollector
245
268
  file_type = if headers.include?('Content-Type')
246
269
  headers['Content-Type'].split(';').first
247
270
  else
248
- @logger.debug "No Header content-type available"
271
+ @logger.debug "No Header content-type available"
249
272
  MIME::Types.of(filename_from(headers)).first.content_type
250
273
  end
251
274
 
@@ -1,4 +1,4 @@
1
1
  # encoding: utf-8
2
2
  module DataCollector
3
- VERSION = "0.29.0"
3
+ VERSION = "0.30.0"
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_collector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.29.0
4
+ version: 0.30.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mehmet Celik
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-08-08 00:00:00.000000000 Z
11
+ date: 2023-08-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport