data_collector 0.25.0 → 0.27.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8065c0d1b54cf1c39be5cfcb70a4fae1f33b02021182c75538ed03e73393d35a
4
- data.tar.gz: 879054ad24b178f08bfea7914c5fbc593c32c611bd4e3ad833869a3e5b36d5b1
3
+ metadata.gz: b001584d7cb8f525f22370a79a4921104358f9de512d81da6236f65a590220bc
4
+ data.tar.gz: 773bf24b624e28e056a82f0c665e603a119830c9cc872d1938be02ab7251ad03
5
5
  SHA512:
6
- metadata.gz: 390ac889c52055cfd8f5326c6e7c1549faee6b8c41af4535b9fc5d3038701f62c441caaf41895d7ad64b4941e609fdee32ac745255b9cb2fbc0981d787b00847
7
- data.tar.gz: dbc57c97f30e5659ccfebba0850e99eee24ecada4a5e9a136ae6b036f19b587a4d66460e509c32d36be13cd4d745ed89f9a73e11a32b1b15fe800036c1836bea
6
+ metadata.gz: 8d0a62ddf8408c72e6277080f7105bd8d12f39c2f30e4a44910d890932a50ef9399d096d98b990b87db422a6af7557821e45045a049c64ebbdd90d8cd811062c
7
+ data.tar.gz: 53e2290744797a8763ccd0491a02c9dd1ee4e73919f95c20d66bb03889cca18f31cb4090728f1ef3acee6b76ef9e062f4196d17b90b5bf0ec3d2132e74543c2a
data/README.md CHANGED
@@ -8,6 +8,7 @@ include DataCollector::Core
8
8
  ```
9
9
  Every object can be used on its own.
10
10
 
11
+ ### DataCollector Objects
11
12
  #### Pipeline
12
13
  Allows you to create a simple pipeline of operations to process data. With a data pipeline, you can collect, process, and transform data, and then transfer it to various systems and applications.
13
14
 
@@ -20,6 +21,7 @@ executed in the [ISO8601 duration format](https://www.digi.com/resources/documen
20
21
  - schedule: [ISO8601 duration format](https://www.digi.com/resources/documentation/digidocs//90001488-13/reference/r_iso_8601_duration_format.htm)
21
22
  - cron: in cron format ex. '1 12 * * *' intervals are not supported
22
23
  - uri: a directory/file to watch
24
+ - xml_typecast: true/false -> convert convert string values to TrueClass, FalseClass, Time, Date, and DateTime
23
25
  - .run: start the pipeline. blocking if a schedule is supplied
24
26
  - .stop: stop the pipeline
25
27
  - .pause: pause the pipeline. Restart using .run
@@ -74,12 +76,14 @@ For a push input component, a listener is created with a processing logic block
74
76
  A push happens when new data is created in a directory, message queue, ...
75
77
 
76
78
  ```ruby
77
- from_uri(source, options = {:raw, :content_type})
79
+ from_uri(source, options = {:raw, :content_type, :headers, :cookies})
78
80
  ```
79
81
  - source: an uri with a scheme of http, https, file, amqp
80
82
  - options:
81
83
  - raw: _boolean_ do not parse
82
84
  - content_type: _string_ force a content_type if the 'Content-Type' returned by the http server is incorrect
85
+ - headers: request headers
86
+ - cookies: session cookies etc.
83
87
 
84
88
  ###### example:
85
89
  ```ruby
@@ -55,7 +55,7 @@ module DataCollector
55
55
  parse_uri
56
56
  server.rabbitmq_url = @bunny_uri.to_s
57
57
  server.rabbitmq_exchange = @bunny_channel
58
- server.logger = DataCollector::Core.logger
58
+ #server.logger = DataCollector::Core.logger
59
59
  end
60
60
  end
61
61
 
@@ -31,7 +31,7 @@ module DataCollector
31
31
  raise DataCollector::Error 'from_uri expects a scheme like file:// of https://' unless source =~ /:\/\//
32
32
 
33
33
  scheme, path = source.split('://')
34
- source="#{scheme}://#{URI.encode_uri_component(path)}"
34
+ source="#{scheme}://#{URI.encode_www_form_component(path)}"
35
35
  uri = URI(source)
36
36
  begin
37
37
  data = nil
@@ -41,7 +41,7 @@ module DataCollector
41
41
  when 'https'
42
42
  data = from_https(uri, options)
43
43
  when 'file'
44
- absolute_path = File.absolute_path("#{URI.decode_uri_component("#{uri.host}#{uri.path}")}")
44
+ absolute_path = File.absolute_path("#{URI.decode_www_form_component("#{uri.host}#{uri.path}")}")
45
45
  if File.directory?(absolute_path)
46
46
  #raise DataCollector::Error, "#{uri.host}/#{uri.path} not found" unless File.exist?("#{uri.host}/#{uri.path}")
47
47
  return from_dir(uri, options)
@@ -80,7 +80,7 @@ module DataCollector
80
80
  end
81
81
 
82
82
  def from_https(uri, options = {})
83
- uri = URI.decode_uri_component("#{uri.to_s}")
83
+ uri = URI.decode_www_form_component("#{uri.to_s}")
84
84
  data = nil
85
85
  if options.with_indifferent_access.include?(:logging) && options.with_indifferent_access[:logging]
86
86
  HTTP.default_options = HTTP::Options.new(features: { logging: { logger: @logger } })
@@ -102,6 +102,16 @@ module DataCollector
102
102
  http = HTTP.auth(bearer)
103
103
  end
104
104
 
105
+ if options.key?(:cookies)
106
+ @logger.debug "Set cookies"
107
+ http = http.cookies( options[:cookies] )
108
+ end
109
+
110
+ if options.key?(:headers)
111
+ @logger.debug "Set http headers"
112
+ http = http.headers( options[:headers] )
113
+ end
114
+
105
115
  if options.key?(:verify_ssl) && uri.scheme.eql?('https')
106
116
  @logger.warn "Disabling SSL verification. "
107
117
  #shouldn't use this but we all do ...
@@ -109,10 +119,11 @@ module DataCollector
109
119
  ctx.verify_mode = OpenSSL::SSL::VERIFY_NONE
110
120
 
111
121
  http_response = http.follow.get(escape_uri(uri), ssl_context: ctx)
122
+
112
123
  else
113
124
  http_response = http.follow.get(escape_uri(uri))
114
125
  end
115
-
126
+
116
127
  case http_response.code
117
128
  when 200..299
118
129
  @raw = data = http_response.body.to_s
@@ -130,15 +141,15 @@ module DataCollector
130
141
  when 'application/json'
131
142
  data = JSON.parse(data)
132
143
  when 'application/atom+xml'
133
- data = xml_to_hash(data)
144
+ data = xml_to_hash(data, options)
134
145
  when 'text/csv'
135
146
  data = csv_to_hash(data)
136
147
  when 'application/xml'
137
- data = xml_to_hash(data)
148
+ data = xml_to_hash(data, options)
138
149
  when 'text/xml'
139
- data = xml_to_hash(data)
150
+ data = xml_to_hash(data, options)
140
151
  else
141
- data = xml_to_hash(data)
152
+ data = xml_to_hash(data, options)
142
153
  end
143
154
  end
144
155
 
@@ -171,14 +182,14 @@ module DataCollector
171
182
  when '.json'
172
183
  data = JSON.parse(data)
173
184
  when '.xml'
174
- data = xml_to_hash(data)
185
+ data = xml_to_hash(data, options)
175
186
  when '.gz'
176
187
  Minitar.open(Zlib::GzipReader.new(File.open("#{absolute_path}", 'rb'))) do |i|
177
188
  i.each do |entry|
178
189
  data = entry.read
179
190
  end
180
191
  end
181
- data = xml_to_hash(data)
192
+ data = xml_to_hash(data, options)
182
193
  when '.csv'
183
194
  data = csv_to_hash(data)
184
195
  else
@@ -201,10 +212,11 @@ module DataCollector
201
212
  DataCollector::Input::Rpc.new(uri, options)
202
213
  end
203
214
 
204
- def xml_to_hash(data)
215
+ def xml_to_hash(data, options = {})
205
216
  #gsub('<\/', '< /') outherwise wrong XML-parsing (see records lirias1729192 )
206
217
  data = data.gsub /</, '< /'
207
- nori = Nori.new(parser: :nokogiri, strip_namespaces: true, convert_tags_to: lambda { |tag| tag.gsub(/^@/, '_') })
218
+ xml_typecast = options.with_indifferent_access.key?('xml_typecast') ? options.with_indifferent_access['xml_typecast'] : true
219
+ nori = Nori.new(parser: :nokogiri, advanced_typecasting: xml_typecast, strip_namespaces: true, convert_tags_to: lambda { |tag| tag.gsub(/^@/, '_') })
208
220
  nori.parse(data)
209
221
  #JSON.parse(nori.parse(data).to_json)
210
222
  end
@@ -235,7 +247,7 @@ module DataCollector
235
247
  end
236
248
 
237
249
  def normalize_uri(uri)
238
- "#{URI.decode_uri_component(uri.host)}#{URI.decode_uri_component(uri.path)}"
250
+ "#{URI.decode_www_form_component(uri.host)}#{URI.decode_www_form_component(uri.path)}"
239
251
  end
240
252
  end
241
253
  end
@@ -1,4 +1,4 @@
1
1
  # encoding: utf-8
2
2
  module DataCollector
3
- VERSION = "0.25.0"
3
+ VERSION = "0.27.0"
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_collector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.25.0
4
+ version: 0.27.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mehmet Celik
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-07-31 00:00:00.000000000 Z
11
+ date: 2023-08-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport