data_collector 0.26.0 → 0.27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +5 -1
- data/lib/data_collector/input.rb +9 -8
- data/lib/data_collector/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b001584d7cb8f525f22370a79a4921104358f9de512d81da6236f65a590220bc
|
4
|
+
data.tar.gz: 773bf24b624e28e056a82f0c665e603a119830c9cc872d1938be02ab7251ad03
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8d0a62ddf8408c72e6277080f7105bd8d12f39c2f30e4a44910d890932a50ef9399d096d98b990b87db422a6af7557821e45045a049c64ebbdd90d8cd811062c
|
7
|
+
data.tar.gz: 53e2290744797a8763ccd0491a02c9dd1ee4e73919f95c20d66bb03889cca18f31cb4090728f1ef3acee6b76ef9e062f4196d17b90b5bf0ec3d2132e74543c2a
|
data/README.md
CHANGED
@@ -8,6 +8,7 @@ include DataCollector::Core
|
|
8
8
|
```
|
9
9
|
Every object can be used on its own.
|
10
10
|
|
11
|
+
### DataCollector Objects
|
11
12
|
#### Pipeline
|
12
13
|
Allows you to create a simple pipeline of operations to process data. With a data pipeline, you can collect, process, and transform data, and then transfer it to various systems and applications.
|
13
14
|
|
@@ -20,6 +21,7 @@ executed in the [ISO8601 duration format](https://www.digi.com/resources/documen
|
|
20
21
|
- schedule: [ISO8601 duration format](https://www.digi.com/resources/documentation/digidocs//90001488-13/reference/r_iso_8601_duration_format.htm)
|
21
22
|
- cron: in cron format ex. '1 12 * * *' intervals are not supported
|
22
23
|
- uri: a directory/file to watch
|
24
|
+
- xml_typecast: true/false -> convert convert string values to TrueClass, FalseClass, Time, Date, and DateTime
|
23
25
|
- .run: start the pipeline. blocking if a schedule is supplied
|
24
26
|
- .stop: stop the pipeline
|
25
27
|
- .pause: pause the pipeline. Restart using .run
|
@@ -74,12 +76,14 @@ For a push input component, a listener is created with a processing logic block
|
|
74
76
|
A push happens when new data is created in a directory, message queue, ...
|
75
77
|
|
76
78
|
```ruby
|
77
|
-
from_uri(source, options = {:raw, :content_type})
|
79
|
+
from_uri(source, options = {:raw, :content_type, :headers, :cookies})
|
78
80
|
```
|
79
81
|
- source: an uri with a scheme of http, https, file, amqp
|
80
82
|
- options:
|
81
83
|
- raw: _boolean_ do not parse
|
82
84
|
- content_type: _string_ force a content_type if the 'Content-Type' returned by the http server is incorrect
|
85
|
+
- headers: request headers
|
86
|
+
- cookies: session cookies etc.
|
83
87
|
|
84
88
|
###### example:
|
85
89
|
```ruby
|
data/lib/data_collector/input.rb
CHANGED
@@ -141,15 +141,15 @@ module DataCollector
|
|
141
141
|
when 'application/json'
|
142
142
|
data = JSON.parse(data)
|
143
143
|
when 'application/atom+xml'
|
144
|
-
data = xml_to_hash(data)
|
144
|
+
data = xml_to_hash(data, options)
|
145
145
|
when 'text/csv'
|
146
146
|
data = csv_to_hash(data)
|
147
147
|
when 'application/xml'
|
148
|
-
data = xml_to_hash(data)
|
148
|
+
data = xml_to_hash(data, options)
|
149
149
|
when 'text/xml'
|
150
|
-
data = xml_to_hash(data)
|
150
|
+
data = xml_to_hash(data, options)
|
151
151
|
else
|
152
|
-
data = xml_to_hash(data)
|
152
|
+
data = xml_to_hash(data, options)
|
153
153
|
end
|
154
154
|
end
|
155
155
|
|
@@ -182,14 +182,14 @@ module DataCollector
|
|
182
182
|
when '.json'
|
183
183
|
data = JSON.parse(data)
|
184
184
|
when '.xml'
|
185
|
-
data = xml_to_hash(data)
|
185
|
+
data = xml_to_hash(data, options)
|
186
186
|
when '.gz'
|
187
187
|
Minitar.open(Zlib::GzipReader.new(File.open("#{absolute_path}", 'rb'))) do |i|
|
188
188
|
i.each do |entry|
|
189
189
|
data = entry.read
|
190
190
|
end
|
191
191
|
end
|
192
|
-
data = xml_to_hash(data)
|
192
|
+
data = xml_to_hash(data, options)
|
193
193
|
when '.csv'
|
194
194
|
data = csv_to_hash(data)
|
195
195
|
else
|
@@ -212,10 +212,11 @@ module DataCollector
|
|
212
212
|
DataCollector::Input::Rpc.new(uri, options)
|
213
213
|
end
|
214
214
|
|
215
|
-
def xml_to_hash(data)
|
215
|
+
def xml_to_hash(data, options = {})
|
216
216
|
#gsub('<\/', '< /') outherwise wrong XML-parsing (see records lirias1729192 )
|
217
217
|
data = data.gsub /</, '< /'
|
218
|
-
|
218
|
+
xml_typecast = options.with_indifferent_access.key?('xml_typecast') ? options.with_indifferent_access['xml_typecast'] : true
|
219
|
+
nori = Nori.new(parser: :nokogiri, advanced_typecasting: xml_typecast, strip_namespaces: true, convert_tags_to: lambda { |tag| tag.gsub(/^@/, '_') })
|
219
220
|
nori.parse(data)
|
220
221
|
#JSON.parse(nori.parse(data).to_json)
|
221
222
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_collector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.27.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mehmet Celik
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-08-
|
11
|
+
date: 2023-08-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|