data_collector 0.26.0 → 0.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 057dac92f5a83458b244a962ef0518fa093365a38c2388582de0556ac7b884ca
4
- data.tar.gz: 9aebb908d52dda01dc3efce7dcda3e40c8b4f45e718cef94052c9d37fca604eb
3
+ metadata.gz: 71b519b75704a9bb6e46601c8195ab69608f19fae12ecde7c6c57869b8fca450
4
+ data.tar.gz: aa4a0697ffb9f6bc21c45f2ecdfa13c9deca7c8b69f2e8159f4901ab6be8d646
5
5
  SHA512:
6
- metadata.gz: aa91e78ec3aef0010553886539284af4aafb3ed7014ef35e6f7094377bf9ad2efb7f6025bc7f8715d4558d8a5a1242325761a12a6e696590828fa1752c579198
7
- data.tar.gz: 7309cc34f3d022d212ab9870558909ab84be3c6550160ce822808f98edb259c96669899247a7faefec6365068ac5aa68a6d5b43f279591156f47c9f02d0e182c
6
+ metadata.gz: 44dbd4e953260ef2c66b31f8a2914f4378bbeb969c670207bfa2272c4c2d5096a8ec5b3352b913093fb601b6d6c15ebc14d029a936e88c65ddf3d6eabe32e4d2
7
+ data.tar.gz: ee74ae98b99ab8b88e81d1c2c5437d860c314683e517858dc739924049c55ce8a4614cbfa40a97ac8bcd619cd678b6121e2158f45813370ee2c36de185a7244e
data/README.md CHANGED
@@ -8,6 +8,7 @@ include DataCollector::Core
8
8
  ```
9
9
  Every object can be used on its own.
10
10
 
11
+ ### DataCollector Objects
11
12
  #### Pipeline
12
13
  Allows you to create a simple pipeline of operations to process data. With a data pipeline, you can collect, process, and transform data, and then transfer it to various systems and applications.
13
14
 
@@ -20,6 +21,7 @@ executed in the [ISO8601 duration format](https://www.digi.com/resources/documen
20
21
  - schedule: [ISO8601 duration format](https://www.digi.com/resources/documentation/digidocs//90001488-13/reference/r_iso_8601_duration_format.htm)
21
22
  - cron: in cron format ex. '1 12 * * *' intervals are not supported
22
23
  - uri: a directory/file to watch
24
+ - xml_typecast: true/false -> convert convert string values to TrueClass, FalseClass, Time, Date, and DateTime
23
25
  - .run: start the pipeline. blocking if a schedule is supplied
24
26
  - .stop: stop the pipeline
25
27
  - .pause: pause the pipeline. Restart using .run
@@ -74,12 +76,14 @@ For a push input component, a listener is created with a processing logic block
74
76
  A push happens when new data is created in a directory, message queue, ...
75
77
 
76
78
  ```ruby
77
- from_uri(source, options = {:raw, :content_type})
79
+ from_uri(source, options = {:raw, :content_type, :headers, :cookies})
78
80
  ```
79
81
  - source: an uri with a scheme of http, https, file, amqp
80
82
  - options:
81
83
  - raw: _boolean_ do not parse
82
84
  - content_type: _string_ force a content_type if the 'Content-Type' returned by the http server is incorrect
85
+ - headers: request headers
86
+ - cookies: session cookies etc.
83
87
 
84
88
  ###### example:
85
89
  ```ruby
@@ -66,7 +66,13 @@ module DataCollector
66
66
  else
67
67
  data
68
68
  end
69
- rescue => e
69
+ rescue DataCollector::InputError => e
70
+ @logger.info(e.message)
71
+ raise e
72
+ rescue DataCollector::Error => e
73
+ @logger.info(e.message)
74
+ nil
75
+ rescue StandardError => e
70
76
  @logger.info(e.message)
71
77
  puts e.backtrace.join("\n")
72
78
  nil
@@ -141,28 +147,28 @@ module DataCollector
141
147
  when 'application/json'
142
148
  data = JSON.parse(data)
143
149
  when 'application/atom+xml'
144
- data = xml_to_hash(data)
150
+ data = xml_to_hash(data, options)
145
151
  when 'text/csv'
146
152
  data = csv_to_hash(data)
147
153
  when 'application/xml'
148
- data = xml_to_hash(data)
154
+ data = xml_to_hash(data, options)
149
155
  when 'text/xml'
150
- data = xml_to_hash(data)
156
+ data = xml_to_hash(data, options)
151
157
  else
152
- data = xml_to_hash(data)
158
+ data = xml_to_hash(data, options)
153
159
  end
154
160
  end
155
161
 
156
162
  raise '206 Partial Content' if http_response.code ==206
157
163
 
158
164
  when 401
159
- raise 'Unauthorized'
165
+ raise DataCollector::InputError, 'Unauthorized'
160
166
  when 403
161
- raise 'Forbidden'
167
+ raise DataCollector::InputError, 'Forbidden'
162
168
  when 404
163
- raise 'Not found'
169
+ raise DataCollector::InputError, 'Not found'
164
170
  else
165
- raise "Unable to process received status code = #{http_response.code}"
171
+ raise DataCollector::InputError, "Unable to process received status code = #{http_response.code}"
166
172
  end
167
173
 
168
174
  #[data, http_response.code]
@@ -182,14 +188,14 @@ module DataCollector
182
188
  when '.json'
183
189
  data = JSON.parse(data)
184
190
  when '.xml'
185
- data = xml_to_hash(data)
191
+ data = xml_to_hash(data, options)
186
192
  when '.gz'
187
193
  Minitar.open(Zlib::GzipReader.new(File.open("#{absolute_path}", 'rb'))) do |i|
188
194
  i.each do |entry|
189
195
  data = entry.read
190
196
  end
191
197
  end
192
- data = xml_to_hash(data)
198
+ data = xml_to_hash(data, options)
193
199
  when '.csv'
194
200
  data = csv_to_hash(data)
195
201
  else
@@ -212,10 +218,11 @@ module DataCollector
212
218
  DataCollector::Input::Rpc.new(uri, options)
213
219
  end
214
220
 
215
- def xml_to_hash(data)
221
+ def xml_to_hash(data, options = {})
216
222
  #gsub('<\/', '< /') outherwise wrong XML-parsing (see records lirias1729192 )
217
223
  data = data.gsub /</, '< /'
218
- nori = Nori.new(parser: :nokogiri, strip_namespaces: true, convert_tags_to: lambda { |tag| tag.gsub(/^@/, '_') })
224
+ xml_typecast = options.with_indifferent_access.key?('xml_typecast') ? options.with_indifferent_access['xml_typecast'] : true
225
+ nori = Nori.new(parser: :nokogiri, advanced_typecasting: xml_typecast, strip_namespaces: true, convert_tags_to: lambda { |tag| tag.gsub(/^@/, '_') })
219
226
  nori.parse(data)
220
227
  #JSON.parse(nori.parse(data).to_json)
221
228
  end
@@ -1,4 +1,4 @@
1
1
  # encoding: utf-8
2
2
  module DataCollector
3
- VERSION = "0.26.0"
3
+ VERSION = "0.28.0"
4
4
  end
@@ -9,4 +9,6 @@ require 'data_collector/ext/xml_utility_node'
9
9
 
10
10
  module DataCollector
11
11
  class Error < StandardError; end
12
+
13
+ class InputError < StandardError; end
12
14
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_collector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.26.0
4
+ version: 0.28.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mehmet Celik
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-08-02 00:00:00.000000000 Z
11
+ date: 2023-08-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport