data_collector 0.51.0 → 0.53.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c14c5e51a7f7eb0afc8b3c9e8580dd6f76324f08ee1fd06c73e58df68e368bd7
4
- data.tar.gz: f2c856272c5feea8a432055b8d4c3220bddadde7fe66d1d0602e408f7686f082
3
+ metadata.gz: 749914ea43bad2734c79a131eb5db2f1ef2b953fa386a9cb3eb50698e7d2f1f4
4
+ data.tar.gz: 0402100bbfe402ad1c96cc0b767c608a917402e748ea4f57b9bf298ca9510dba
5
5
  SHA512:
6
- metadata.gz: 2d8324edc28e85ddb5641da200d584b4d4a59d5ac7a8686fe297ca52ef326eccba9d183b3971f27070dd5779bac9a371999336f0b8f20d36b237fa3ced7feaaa
7
- data.tar.gz: 462d507afd995e24b98fdd98c2aecd1325d60eb61d582c34b0b6590cdd181bab6d499cae15cb1895d1fb2863ebc4fc60328907ff6b83a10e9bc8d2ba77dad1f0
6
+ metadata.gz: b00c092e0670a26a85bbe1299c687a132c65af7950c5b0f74ff537a8ba948af27617164bd8beebc3b072d424fafdd58865e8cafbaa03d5e6aafb8e682806ba43
7
+ data.tar.gz: 4072e26cfd9a2ef57b094b3ea7cc340f0d735e8ca4356d6e346f725b2486e7284b624936724c0545677571d7ff2a0c447785374778cb55080e69cf54bc7df1a3
@@ -29,60 +29,64 @@ module DataCollector
29
29
 
30
30
  def from_uri(source, options = {}, &block)
31
31
  block_consumed = false
32
- source = CGI.unescapeHTML(source)
33
- @logger.info("Reading #{source}")
34
- raise DataCollector::Error, 'from_uri expects a scheme like file:// of https://' unless source =~ /:\/\//
35
-
36
- scheme, path = source.split('://')
37
- source = "#{scheme}://#{URI.encode_www_form_component(path)}"
38
- uri = URI(source)
39
- begin
40
- data = nil
41
- case uri.scheme
42
- when 'http'
43
- data = from_http(uri, options)
44
- when 'https'
45
- data = from_https(uri, options)
46
- when 'file'
47
- absolute_path = File.absolute_path("#{URI.decode_www_form_component("#{uri.host}#{uri.path}")}")
48
- if File.directory?(absolute_path)
49
- return from_dir(uri, options)
50
- else
51
- if block_given?
52
- data = from_file(uri, options, &block)
53
- block_consumed = true if data.is_a?(TrueClass)
32
+ data = nil
33
+ if source.is_a?(StringIO)
34
+ data = from_stringio(source, options)
35
+ else
36
+ source = CGI.unescapeHTML(source)
37
+ @logger.info("Reading #{source}")
38
+ raise DataCollector::Error, 'from_uri expects a scheme like file:// of https://' unless source =~ /:\/\//
39
+
40
+ scheme, path = source.split('://')
41
+ source = "#{scheme}://#{URI.encode_www_form_component(path)}"
42
+ uri = URI(source)
43
+ begin
44
+ case uri.scheme
45
+ when 'http'
46
+ data = from_http(uri, options)
47
+ when 'https'
48
+ data = from_https(uri, options)
49
+ when 'file'
50
+ absolute_path = File.absolute_path("#{URI.decode_www_form_component("#{uri.host}#{uri.path}")}")
51
+ if File.directory?(absolute_path)
52
+ return from_dir(uri, options)
53
+ else
54
+ if block_given?
55
+ data = from_file(uri, options, &block)
56
+ block_consumed = true if data.is_a?(TrueClass)
57
+ else
58
+ data = from_file(uri, options)
59
+ end
60
+ end
61
+ when /amqp/
62
+ if uri.scheme =~ /^rpc/
63
+ data = from_rpc(uri, options)
54
64
  else
55
- data = from_file(uri, options)
65
+ data = from_queue(uri, options)
56
66
  end
57
- end
58
- when /amqp/
59
- if uri.scheme =~ /^rpc/
60
- data = from_rpc(uri, options)
61
67
  else
62
- data = from_queue(uri, options)
68
+ raise "Do not know how to process #{source}"
63
69
  end
64
- else
65
- raise "Do not know how to process #{source}"
66
- end
67
70
 
68
- data = data.nil? ? 'no data found' : data
69
-
70
- if block_given? && !block_consumed
71
- yield data
72
- else
73
- data
74
71
  end
75
- rescue DataCollector::InputError => e
76
- @logger.error(e.message)
77
- raise e
78
- rescue DataCollector::Error => e
79
- @logger.error(e.message)
80
- nil
81
- rescue StandardError => e
82
- @logger.error(e.message)
83
- puts e.backtrace.join("\n")
84
- nil
85
72
  end
73
+ data = data.nil? ? 'no data found' : data
74
+
75
+ if block_given? && !block_consumed
76
+ yield data
77
+ else
78
+ data
79
+ end
80
+ rescue DataCollector::InputError => e
81
+ @logger.error(e.message)
82
+ raise e
83
+ rescue DataCollector::Error => e
84
+ @logger.error(e.message)
85
+ nil
86
+ rescue StandardError => e
87
+ @logger.error(e.message)
88
+ puts e.backtrace.join("\n")
89
+ nil
86
90
  end
87
91
 
88
92
  private
@@ -204,6 +208,23 @@ module DataCollector
204
208
  data
205
209
  end
206
210
 
211
+ def from_stringio(sio, options = {}, &block)
212
+ raise DataCollector::InputError, "No IO input" unless sio.is_a?(StringIO)
213
+ raise DataCollector::InputError, "content_type option not supplied" unless options.key?(:content_type)
214
+
215
+ preferred_extension = MIME::Types[options[:content_type]].first.extensions.first || 'txt'
216
+
217
+ file = Tempfile.new(["dc_", ".#{preferred_extension}"])
218
+ begin
219
+ sio.rewind
220
+ file.write(sio.read)
221
+ file.close
222
+ from_file(URI("file://#{file.path}"))
223
+ ensure
224
+ file.unlink
225
+ end
226
+ end
227
+
207
228
  def from_file(uri, options = {}, &block)
208
229
  data = nil
209
230
  uri = normalize_uri(uri)
@@ -167,7 +167,8 @@ module DataCollector
167
167
 
168
168
  data[:response_date] = DateTime.now.xmlschema
169
169
 
170
- ERB.new(File.read(erb_file), 0, '>').result(binding)
170
+ #ERB.new(File.read(erb_file), 0, '>').result(binding)
171
+ ERB.new(File.read(erb_file), trim_mode: '-').result(binding)
171
172
  rescue Exception => e
172
173
  raise "unable to transform to text: #{e.message}"
173
174
  end
@@ -1,4 +1,4 @@
1
1
  # encoding: utf-8
2
2
  module DataCollector
3
- VERSION = "0.51.0"
3
+ VERSION = "0.53.0"
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_collector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.51.0
4
+ version: 0.53.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mehmet Celik
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-04-17 00:00:00.000000000 Z
11
+ date: 2024-07-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -351,7 +351,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
351
351
  - !ruby/object:Gem::Version
352
352
  version: '0'
353
353
  requirements: []
354
- rubygems_version: 3.5.6
354
+ rubygems_version: 3.5.10
355
355
  signing_key:
356
356
  specification_version: 4
357
357
  summary: ETL helper library