data_collector 0.29.0 → 0.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d09248a6f7a15b7c6ad783502506e9cdc3ba5010070a810b9bc8c803b15fc862
4
- data.tar.gz: d47019eddd26810162229555997d957f35823e45b105036cbd1e4f90d361be81
3
+ metadata.gz: 47a88bcc23a8fdc3922f22b3cb984eed3ea7b725d139e72b1f6f669d70ebb8e6
4
+ data.tar.gz: 62a4c5775dca4cdd6c6776c6d8628a717d1eb021a314b663ac7f0b6c95e317ac
5
5
  SHA512:
6
- metadata.gz: 8f508edbc7ad18d413bf6deac82c0848ab04955edd347b2a749b0ecf8baf3caef4052fdcd8b424d6b17e82b8da18e4f9cc8b40160d1b22566241986a4d5c6e7c
7
- data.tar.gz: 4c339eb2adc53e2afe615a9b1ebca1649c0466c51366babd574bf7bdfbb4ef0c8df121308561514c69dc6fcd31cd6c4fa6e8d689ab0cff0e9bcff9554f96f85a
6
+ metadata.gz: 95cff72022a43f18c9495cfe6b1f3a66a1f086e196dcb03528152c5a3c4dc34b8e551d9f6fb2d576e758a9c61b34693632990342488b2406fcd3b44a003e4381
7
+ data.tar.gz: 71abb93a64fbb419b5eb4f5e8e0ba6221f69b289fe8b133bbc5b208446e7b4b132d4199f769775170d69f2e5fb1051836ce9ce68222ca4a199eb0a0854978826
@@ -1,4 +1,4 @@
1
- #encoding: UTF-8
1
+ # encoding: UTF-8
2
2
  require 'http'
3
3
  require 'open-uri'
4
4
  require 'nokogiri'
@@ -16,7 +16,7 @@ require_relative 'input/dir'
16
16
  require_relative 'input/queue'
17
17
  require_relative 'input/rpc'
18
18
 
19
- #require_relative 'ext/xml_utility_node'
19
+ # require_relative 'ext/xml_utility_node'
20
20
  module DataCollector
21
21
  class Input
22
22
  attr_reader :raw
@@ -25,13 +25,14 @@ module DataCollector
25
25
  @logger = Logger.new(STDOUT)
26
26
  end
27
27
 
28
- def from_uri(source, options = {})
28
+ def from_uri(source, options = {}, &block)
29
+ block_consumed = false
29
30
  source = CGI.unescapeHTML(source)
30
31
  @logger.info("Reading #{source}")
31
32
  raise DataCollector::Error, 'from_uri expects a scheme like file:// of https://' unless source =~ /:\/\//
32
33
 
33
34
  scheme, path = source.split('://')
34
- source="#{scheme}://#{URI.encode_www_form_component(path)}"
35
+ source = "#{scheme}://#{URI.encode_www_form_component(path)}"
35
36
  uri = URI(source)
36
37
  begin
37
38
  data = nil
@@ -43,11 +44,14 @@ module DataCollector
43
44
  when 'file'
44
45
  absolute_path = File.absolute_path("#{URI.decode_www_form_component("#{uri.host}#{uri.path}")}")
45
46
  if File.directory?(absolute_path)
46
- #raise DataCollector::Error, "#{uri.host}/#{uri.path} not found" unless File.exist?("#{uri.host}/#{uri.path}")
47
47
  return from_dir(uri, options)
48
48
  else
49
- # raise DataCollector::Error, "#{uri.host}/#{uri.path} not found" unless File.exist?("#{uri.host}/#{uri.path}")
50
- data = from_file(uri, options)
49
+ if block_given?
50
+ data = from_file(uri, options, &block)
51
+ block_consumed = true if data.is_a?(TrueClass)
52
+ else
53
+ data = from_file(uri, options)
54
+ end
51
55
  end
52
56
  when /amqp/
53
57
  if uri.scheme =~ /^rpc/
@@ -61,7 +65,7 @@ module DataCollector
61
65
 
62
66
  data = data.nil? ? 'no data found' : data
63
67
 
64
- if block_given?
68
+ if block_given? && !block_consumed
65
69
  yield data
66
70
  else
67
71
  data
@@ -94,7 +98,7 @@ module DataCollector
94
98
 
95
99
  http = HTTP
96
100
 
97
- #http.use(logging: {logger: @logger})
101
+ # http.use(logging: {logger: @logger})
98
102
 
99
103
  if options.key?(:user) && options.key?(:password)
100
104
  @logger.debug "Set Basic_auth"
@@ -102,34 +106,33 @@ module DataCollector
102
106
  password = options[:password]
103
107
  http = HTTP.basic_auth(user: user, pass: password)
104
108
  elsif options.key?(:bearer_token)
105
- @logger.debug "Set authorization bearer token"
109
+ @logger.debug "Set authorization bearer token"
106
110
  bearer = options[:bearer_token]
107
111
  bearer = "Bearer #{bearer}" unless bearer =~ /^Bearer /i
108
112
  http = HTTP.auth(bearer)
109
113
  end
110
114
 
111
- if options.key?(:cookies)
112
- @logger.debug "Set cookies"
113
- http = http.cookies( options[:cookies] )
115
+ if options.key?(:cookies)
116
+ @logger.debug "Set cookies"
117
+ http = http.cookies(options[:cookies])
114
118
  end
115
119
 
116
- if options.key?(:headers)
117
- @logger.debug "Set http headers"
118
- http = http.headers( options[:headers] )
120
+ if options.key?(:headers)
121
+ @logger.debug "Set http headers"
122
+ http = http.headers(options[:headers])
119
123
  end
120
-
124
+
121
125
  if options.key?(:verify_ssl) && uri.scheme.eql?('https')
122
126
  @logger.warn "Disabling SSL verification. "
123
- #shouldn't use this but we all do ...
127
+ # shouldn't use this but we all do ...
124
128
  ctx = OpenSSL::SSL::SSLContext.new
125
129
  ctx.verify_mode = OpenSSL::SSL::VERIFY_NONE
126
130
 
127
131
  http_response = http.follow.get(escape_uri(uri), ssl_context: ctx)
128
-
129
132
  else
130
133
  http_response = http.follow.get(escape_uri(uri))
131
134
  end
132
-
135
+
133
136
  case http_response.code
134
137
  when 200..299
135
138
  @raw = data = http_response.body.to_s
@@ -159,7 +162,7 @@ module DataCollector
159
162
  end
160
163
  end
161
164
 
162
- raise '206 Partial Content' if http_response.code ==206
165
+ raise '206 Partial Content' if http_response.code == 206
163
166
 
164
167
  when 401
165
168
  raise DataCollector::InputError, 'Unauthorized'
@@ -175,7 +178,7 @@ module DataCollector
175
178
  data
176
179
  end
177
180
 
178
- def from_file(uri, options = {})
181
+ def from_file(uri, options = {}, &block)
179
182
  data = nil
180
183
  uri = normalize_uri(uri)
181
184
  absolute_path = File.absolute_path(uri)
@@ -190,12 +193,29 @@ module DataCollector
190
193
  when '.xml'
191
194
  data = xml_to_hash(data, options)
192
195
  when '.gz'
196
+ tar_data = []
193
197
  Minitar.open(Zlib::GzipReader.new(File.open("#{absolute_path}", 'rb'))) do |i|
194
198
  i.each do |entry|
195
- data = entry.read
196
- end
197
- end
198
- data = xml_to_hash(data, options)
199
+ next unless entry.typeflag.eql?('0')
200
+ if block_given?
201
+ data = xml_to_hash(entry.read, options)
202
+ yield data
203
+
204
+ data = true
205
+ else
206
+ tar_data << entry.read
207
+
208
+ if tar_data.length == 1
209
+ data = xml_to_hash(tar_data.first, options)
210
+ else
211
+ data = []
212
+ tar_data.each do |d|
213
+ data << xml_to_hash(d, options)
214
+ end
215
+ end
216
+ end #block
217
+ end #entry
218
+ end #tar
199
219
  when '.csv'
200
220
  data = csv_to_hash(data)
201
221
  else
@@ -219,9 +239,12 @@ module DataCollector
219
239
  end
220
240
 
221
241
  def xml_to_hash(data, options = {})
222
- #gsub('&lt;\/', '&lt; /') outherwise wrong XML-parsing (see records lirias1729192 )
223
- data.force_encoding('UTF-8') #encode("UTF-8", invalid: :replace, replace: "")
242
+ # gsub('&lt;\/', '&lt; /') outherwise wrong XML-parsing (see records lirias1729192 )
243
+ return unless data.is_a?(String)
244
+ data.force_encoding('UTF-8')
245
+ data = data.encode("UTF-8", invalid: :replace, replace: "")
224
246
  data = data.gsub /&lt;/, '&lt; /'
247
+
225
248
  xml_typecast = options.with_indifferent_access.key?('xml_typecast') ? options.with_indifferent_access['xml_typecast'] : true
226
249
  nori = Nori.new(parser: :nokogiri, advanced_typecasting: xml_typecast, strip_namespaces: true, convert_tags_to: lambda { |tag| tag.gsub(/^@/, '_') })
227
250
  nori.parse(data)
@@ -245,7 +268,7 @@ module DataCollector
245
268
  file_type = if headers.include?('Content-Type')
246
269
  headers['Content-Type'].split(';').first
247
270
  else
248
- @logger.debug "No Header content-type available"
271
+ @logger.debug "No Header content-type available"
249
272
  MIME::Types.of(filename_from(headers)).first.content_type
250
273
  end
251
274
 
@@ -1,4 +1,4 @@
1
1
  # encoding: utf-8
2
2
  module DataCollector
3
- VERSION = "0.29.0"
3
+ VERSION = "0.30.0"
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_collector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.29.0
4
+ version: 0.30.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mehmet Celik
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-08-08 00:00:00.000000000 Z
11
+ date: 2023-08-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport