data_collector 0.27.0 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b001584d7cb8f525f22370a79a4921104358f9de512d81da6236f65a590220bc
4
- data.tar.gz: 773bf24b624e28e056a82f0c665e603a119830c9cc872d1938be02ab7251ad03
3
+ metadata.gz: d09248a6f7a15b7c6ad783502506e9cdc3ba5010070a810b9bc8c803b15fc862
4
+ data.tar.gz: d47019eddd26810162229555997d957f35823e45b105036cbd1e4f90d361be81
5
5
  SHA512:
6
- metadata.gz: 8d0a62ddf8408c72e6277080f7105bd8d12f39c2f30e4a44910d890932a50ef9399d096d98b990b87db422a6af7557821e45045a049c64ebbdd90d8cd811062c
7
- data.tar.gz: 53e2290744797a8763ccd0491a02c9dd1ee4e73919f95c20d66bb03889cca18f31cb4090728f1ef3acee6b76ef9e062f4196d17b90b5bf0ec3d2132e74543c2a
6
+ metadata.gz: 8f508edbc7ad18d413bf6deac82c0848ab04955edd347b2a749b0ecf8baf3caef4052fdcd8b424d6b17e82b8da18e4f9cc8b40160d1b22566241986a4d5c6e7c
7
+ data.tar.gz: 4c339eb2adc53e2afe615a9b1ebca1649c0466c51366babd574bf7bdfbb4ef0c8df121308561514c69dc6fcd31cd6c4fa6e8d689ab0cff0e9bcff9554f96f85a
@@ -28,7 +28,7 @@ module DataCollector
28
28
  def from_uri(source, options = {})
29
29
  source = CGI.unescapeHTML(source)
30
30
  @logger.info("Reading #{source}")
31
- raise DataCollector::Error 'from_uri expects a scheme like file:// of https://' unless source =~ /:\/\//
31
+ raise DataCollector::Error, 'from_uri expects a scheme like file:// of https://' unless source =~ /:\/\//
32
32
 
33
33
  scheme, path = source.split('://')
34
34
  source="#{scheme}://#{URI.encode_www_form_component(path)}"
@@ -66,8 +66,14 @@ module DataCollector
66
66
  else
67
67
  data
68
68
  end
69
- rescue => e
70
- @logger.info(e.message)
69
+ rescue DataCollector::InputError => e
70
+ @logger.error(e.message)
71
+ raise e
72
+ rescue DataCollector::Error => e
73
+ @logger.error(e.message)
74
+ nil
75
+ rescue StandardError => e
76
+ @logger.error(e.message)
71
77
  puts e.backtrace.join("\n")
72
78
  nil
73
79
  end
@@ -156,13 +162,13 @@ module DataCollector
156
162
  raise '206 Partial Content' if http_response.code ==206
157
163
 
158
164
  when 401
159
- raise 'Unauthorized'
165
+ raise DataCollector::InputError, 'Unauthorized'
160
166
  when 403
161
- raise 'Forbidden'
167
+ raise DataCollector::InputError, 'Forbidden'
162
168
  when 404
163
- raise 'Not found'
169
+ raise DataCollector::InputError, 'Not found'
164
170
  else
165
- raise "Unable to process received status code = #{http_response.code}"
171
+ raise DataCollector::InputError, "Unable to process received status code = #{http_response.code}"
166
172
  end
167
173
 
168
174
  #[data, http_response.code]
@@ -214,11 +220,11 @@ module DataCollector
214
220
 
215
221
  def xml_to_hash(data, options = {})
216
222
  #gsub('<\/', '< /') outherwise wrong XML-parsing (see records lirias1729192 )
223
+ data.force_encoding('UTF-8') #encode("UTF-8", invalid: :replace, replace: "")
217
224
  data = data.gsub /</, '< /'
218
225
  xml_typecast = options.with_indifferent_access.key?('xml_typecast') ? options.with_indifferent_access['xml_typecast'] : true
219
226
  nori = Nori.new(parser: :nokogiri, advanced_typecasting: xml_typecast, strip_namespaces: true, convert_tags_to: lambda { |tag| tag.gsub(/^@/, '_') })
220
227
  nori.parse(data)
221
- #JSON.parse(nori.parse(data).to_json)
222
228
  end
223
229
 
224
230
  def csv_to_hash(data)
@@ -1,4 +1,4 @@
1
1
  # encoding: utf-8
2
2
  module DataCollector
3
- VERSION = "0.27.0"
3
+ VERSION = "0.29.0"
4
4
  end
@@ -8,5 +8,9 @@ require 'data_collector/pipeline'
8
8
  require 'data_collector/ext/xml_utility_node'
9
9
 
10
10
  module DataCollector
11
- class Error < StandardError; end
11
+ class Error < StandardError
12
+ end
13
+
14
+ class InputError < Error
15
+ end
12
16
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_collector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.27.0
4
+ version: 0.29.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mehmet Celik
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-08-04 00:00:00.000000000 Z
11
+ date: 2023-08-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport