data_collector 0.25.0 → 0.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8065c0d1b54cf1c39be5cfcb70a4fae1f33b02021182c75538ed03e73393d35a
4
- data.tar.gz: 879054ad24b178f08bfea7914c5fbc593c32c611bd4e3ad833869a3e5b36d5b1
3
+ metadata.gz: b001584d7cb8f525f22370a79a4921104358f9de512d81da6236f65a590220bc
4
+ data.tar.gz: 773bf24b624e28e056a82f0c665e603a119830c9cc872d1938be02ab7251ad03
5
5
  SHA512:
6
- metadata.gz: 390ac889c52055cfd8f5326c6e7c1549faee6b8c41af4535b9fc5d3038701f62c441caaf41895d7ad64b4941e609fdee32ac745255b9cb2fbc0981d787b00847
7
- data.tar.gz: dbc57c97f30e5659ccfebba0850e99eee24ecada4a5e9a136ae6b036f19b587a4d66460e509c32d36be13cd4d745ed89f9a73e11a32b1b15fe800036c1836bea
6
+ metadata.gz: 8d0a62ddf8408c72e6277080f7105bd8d12f39c2f30e4a44910d890932a50ef9399d096d98b990b87db422a6af7557821e45045a049c64ebbdd90d8cd811062c
7
+ data.tar.gz: 53e2290744797a8763ccd0491a02c9dd1ee4e73919f95c20d66bb03889cca18f31cb4090728f1ef3acee6b76ef9e062f4196d17b90b5bf0ec3d2132e74543c2a
data/README.md CHANGED
@@ -8,6 +8,7 @@ include DataCollector::Core
8
8
  ```
9
9
  Every object can be used on its own.
10
10
 
11
+ ### DataCollector Objects
11
12
  #### Pipeline
12
13
  Allows you to create a simple pipeline of operations to process data. With a data pipeline, you can collect, process, and transform data, and then transfer it to various systems and applications.
13
14
 
@@ -20,6 +21,7 @@ executed in the [ISO8601 duration format](https://www.digi.com/resources/documen
20
21
  - schedule: [ISO8601 duration format](https://www.digi.com/resources/documentation/digidocs//90001488-13/reference/r_iso_8601_duration_format.htm)
21
22
  - cron: in cron format ex. '1 12 * * *' intervals are not supported
22
23
  - uri: a directory/file to watch
24
+ - xml_typecast: true/false -> convert convert string values to TrueClass, FalseClass, Time, Date, and DateTime
23
25
  - .run: start the pipeline. blocking if a schedule is supplied
24
26
  - .stop: stop the pipeline
25
27
  - .pause: pause the pipeline. Restart using .run
@@ -74,12 +76,14 @@ For a push input component, a listener is created with a processing logic block
74
76
  A push happens when new data is created in a directory, message queue, ...
75
77
 
76
78
  ```ruby
77
- from_uri(source, options = {:raw, :content_type})
79
+ from_uri(source, options = {:raw, :content_type, :headers, :cookies})
78
80
  ```
79
81
  - source: an uri with a scheme of http, https, file, amqp
80
82
  - options:
81
83
  - raw: _boolean_ do not parse
82
84
  - content_type: _string_ force a content_type if the 'Content-Type' returned by the http server is incorrect
85
+ - headers: request headers
86
+ - cookies: session cookies etc.
83
87
 
84
88
  ###### example:
85
89
  ```ruby
@@ -55,7 +55,7 @@ module DataCollector
55
55
  parse_uri
56
56
  server.rabbitmq_url = @bunny_uri.to_s
57
57
  server.rabbitmq_exchange = @bunny_channel
58
- server.logger = DataCollector::Core.logger
58
+ #server.logger = DataCollector::Core.logger
59
59
  end
60
60
  end
61
61
 
@@ -31,7 +31,7 @@ module DataCollector
31
31
  raise DataCollector::Error 'from_uri expects a scheme like file:// of https://' unless source =~ /:\/\//
32
32
 
33
33
  scheme, path = source.split('://')
34
- source="#{scheme}://#{URI.encode_uri_component(path)}"
34
+ source="#{scheme}://#{URI.encode_www_form_component(path)}"
35
35
  uri = URI(source)
36
36
  begin
37
37
  data = nil
@@ -41,7 +41,7 @@ module DataCollector
41
41
  when 'https'
42
42
  data = from_https(uri, options)
43
43
  when 'file'
44
- absolute_path = File.absolute_path("#{URI.decode_uri_component("#{uri.host}#{uri.path}")}")
44
+ absolute_path = File.absolute_path("#{URI.decode_www_form_component("#{uri.host}#{uri.path}")}")
45
45
  if File.directory?(absolute_path)
46
46
  #raise DataCollector::Error, "#{uri.host}/#{uri.path} not found" unless File.exist?("#{uri.host}/#{uri.path}")
47
47
  return from_dir(uri, options)
@@ -80,7 +80,7 @@ module DataCollector
80
80
  end
81
81
 
82
82
  def from_https(uri, options = {})
83
- uri = URI.decode_uri_component("#{uri.to_s}")
83
+ uri = URI.decode_www_form_component("#{uri.to_s}")
84
84
  data = nil
85
85
  if options.with_indifferent_access.include?(:logging) && options.with_indifferent_access[:logging]
86
86
  HTTP.default_options = HTTP::Options.new(features: { logging: { logger: @logger } })
@@ -102,6 +102,16 @@ module DataCollector
102
102
  http = HTTP.auth(bearer)
103
103
  end
104
104
 
105
+ if options.key?(:cookies)
106
+ @logger.debug "Set cookies"
107
+ http = http.cookies( options[:cookies] )
108
+ end
109
+
110
+ if options.key?(:headers)
111
+ @logger.debug "Set http headers"
112
+ http = http.headers( options[:headers] )
113
+ end
114
+
105
115
  if options.key?(:verify_ssl) && uri.scheme.eql?('https')
106
116
  @logger.warn "Disabling SSL verification. "
107
117
  #shouldn't use this but we all do ...
@@ -109,10 +119,11 @@ module DataCollector
109
119
  ctx.verify_mode = OpenSSL::SSL::VERIFY_NONE
110
120
 
111
121
  http_response = http.follow.get(escape_uri(uri), ssl_context: ctx)
122
+
112
123
  else
113
124
  http_response = http.follow.get(escape_uri(uri))
114
125
  end
115
-
126
+
116
127
  case http_response.code
117
128
  when 200..299
118
129
  @raw = data = http_response.body.to_s
@@ -130,15 +141,15 @@ module DataCollector
130
141
  when 'application/json'
131
142
  data = JSON.parse(data)
132
143
  when 'application/atom+xml'
133
- data = xml_to_hash(data)
144
+ data = xml_to_hash(data, options)
134
145
  when 'text/csv'
135
146
  data = csv_to_hash(data)
136
147
  when 'application/xml'
137
- data = xml_to_hash(data)
148
+ data = xml_to_hash(data, options)
138
149
  when 'text/xml'
139
- data = xml_to_hash(data)
150
+ data = xml_to_hash(data, options)
140
151
  else
141
- data = xml_to_hash(data)
152
+ data = xml_to_hash(data, options)
142
153
  end
143
154
  end
144
155
 
@@ -171,14 +182,14 @@ module DataCollector
171
182
  when '.json'
172
183
  data = JSON.parse(data)
173
184
  when '.xml'
174
- data = xml_to_hash(data)
185
+ data = xml_to_hash(data, options)
175
186
  when '.gz'
176
187
  Minitar.open(Zlib::GzipReader.new(File.open("#{absolute_path}", 'rb'))) do |i|
177
188
  i.each do |entry|
178
189
  data = entry.read
179
190
  end
180
191
  end
181
- data = xml_to_hash(data)
192
+ data = xml_to_hash(data, options)
182
193
  when '.csv'
183
194
  data = csv_to_hash(data)
184
195
  else
@@ -201,10 +212,11 @@ module DataCollector
201
212
  DataCollector::Input::Rpc.new(uri, options)
202
213
  end
203
214
 
204
- def xml_to_hash(data)
215
+ def xml_to_hash(data, options = {})
205
216
  #gsub('<\/', '< /') outherwise wrong XML-parsing (see records lirias1729192 )
206
217
  data = data.gsub /</, '< /'
207
- nori = Nori.new(parser: :nokogiri, strip_namespaces: true, convert_tags_to: lambda { |tag| tag.gsub(/^@/, '_') })
218
+ xml_typecast = options.with_indifferent_access.key?('xml_typecast') ? options.with_indifferent_access['xml_typecast'] : true
219
+ nori = Nori.new(parser: :nokogiri, advanced_typecasting: xml_typecast, strip_namespaces: true, convert_tags_to: lambda { |tag| tag.gsub(/^@/, '_') })
208
220
  nori.parse(data)
209
221
  #JSON.parse(nori.parse(data).to_json)
210
222
  end
@@ -235,7 +247,7 @@ module DataCollector
235
247
  end
236
248
 
237
249
  def normalize_uri(uri)
238
- "#{URI.decode_uri_component(uri.host)}#{URI.decode_uri_component(uri.path)}"
250
+ "#{URI.decode_www_form_component(uri.host)}#{URI.decode_www_form_component(uri.path)}"
239
251
  end
240
252
  end
241
253
  end
@@ -1,4 +1,4 @@
1
1
  # encoding: utf-8
2
2
  module DataCollector
3
- VERSION = "0.25.0"
3
+ VERSION = "0.27.0"
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_collector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.25.0
4
+ version: 0.27.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mehmet Celik
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-07-31 00:00:00.000000000 Z
11
+ date: 2023-08-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport