data_collector 0.26.0 → 0.28.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +5 -1
- data/lib/data_collector/input.rb +20 -13
- data/lib/data_collector/version.rb +1 -1
- data/lib/data_collector.rb +2 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 71b519b75704a9bb6e46601c8195ab69608f19fae12ecde7c6c57869b8fca450
|
4
|
+
data.tar.gz: aa4a0697ffb9f6bc21c45f2ecdfa13c9deca7c8b69f2e8159f4901ab6be8d646
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 44dbd4e953260ef2c66b31f8a2914f4378bbeb969c670207bfa2272c4c2d5096a8ec5b3352b913093fb601b6d6c15ebc14d029a936e88c65ddf3d6eabe32e4d2
|
7
|
+
data.tar.gz: ee74ae98b99ab8b88e81d1c2c5437d860c314683e517858dc739924049c55ce8a4614cbfa40a97ac8bcd619cd678b6121e2158f45813370ee2c36de185a7244e
|
data/README.md
CHANGED
@@ -8,6 +8,7 @@ include DataCollector::Core
|
|
8
8
|
```
|
9
9
|
Every object can be used on its own.
|
10
10
|
|
11
|
+
### DataCollector Objects
|
11
12
|
#### Pipeline
|
12
13
|
Allows you to create a simple pipeline of operations to process data. With a data pipeline, you can collect, process, and transform data, and then transfer it to various systems and applications.
|
13
14
|
|
@@ -20,6 +21,7 @@ executed in the [ISO8601 duration format](https://www.digi.com/resources/documen
|
|
20
21
|
- schedule: [ISO8601 duration format](https://www.digi.com/resources/documentation/digidocs//90001488-13/reference/r_iso_8601_duration_format.htm)
|
21
22
|
- cron: in cron format ex. '1 12 * * *' intervals are not supported
|
22
23
|
- uri: a directory/file to watch
|
24
|
+
- xml_typecast: true/false -> convert convert string values to TrueClass, FalseClass, Time, Date, and DateTime
|
23
25
|
- .run: start the pipeline. blocking if a schedule is supplied
|
24
26
|
- .stop: stop the pipeline
|
25
27
|
- .pause: pause the pipeline. Restart using .run
|
@@ -74,12 +76,14 @@ For a push input component, a listener is created with a processing logic block
|
|
74
76
|
A push happens when new data is created in a directory, message queue, ...
|
75
77
|
|
76
78
|
```ruby
|
77
|
-
from_uri(source, options = {:raw, :content_type})
|
79
|
+
from_uri(source, options = {:raw, :content_type, :headers, :cookies})
|
78
80
|
```
|
79
81
|
- source: an uri with a scheme of http, https, file, amqp
|
80
82
|
- options:
|
81
83
|
- raw: _boolean_ do not parse
|
82
84
|
- content_type: _string_ force a content_type if the 'Content-Type' returned by the http server is incorrect
|
85
|
+
- headers: request headers
|
86
|
+
- cookies: session cookies etc.
|
83
87
|
|
84
88
|
###### example:
|
85
89
|
```ruby
|
data/lib/data_collector/input.rb
CHANGED
@@ -66,7 +66,13 @@ module DataCollector
|
|
66
66
|
else
|
67
67
|
data
|
68
68
|
end
|
69
|
-
rescue => e
|
69
|
+
rescue DataCollector::InputError => e
|
70
|
+
@logger.info(e.message)
|
71
|
+
raise e
|
72
|
+
rescue DataCollector::Error => e
|
73
|
+
@logger.info(e.message)
|
74
|
+
nil
|
75
|
+
rescue StandardError => e
|
70
76
|
@logger.info(e.message)
|
71
77
|
puts e.backtrace.join("\n")
|
72
78
|
nil
|
@@ -141,28 +147,28 @@ module DataCollector
|
|
141
147
|
when 'application/json'
|
142
148
|
data = JSON.parse(data)
|
143
149
|
when 'application/atom+xml'
|
144
|
-
data = xml_to_hash(data)
|
150
|
+
data = xml_to_hash(data, options)
|
145
151
|
when 'text/csv'
|
146
152
|
data = csv_to_hash(data)
|
147
153
|
when 'application/xml'
|
148
|
-
data = xml_to_hash(data)
|
154
|
+
data = xml_to_hash(data, options)
|
149
155
|
when 'text/xml'
|
150
|
-
data = xml_to_hash(data)
|
156
|
+
data = xml_to_hash(data, options)
|
151
157
|
else
|
152
|
-
data = xml_to_hash(data)
|
158
|
+
data = xml_to_hash(data, options)
|
153
159
|
end
|
154
160
|
end
|
155
161
|
|
156
162
|
raise '206 Partial Content' if http_response.code ==206
|
157
163
|
|
158
164
|
when 401
|
159
|
-
raise 'Unauthorized'
|
165
|
+
raise DataCollector::InputError, 'Unauthorized'
|
160
166
|
when 403
|
161
|
-
raise 'Forbidden'
|
167
|
+
raise DataCollector::InputError, 'Forbidden'
|
162
168
|
when 404
|
163
|
-
raise 'Not found'
|
169
|
+
raise DataCollector::InputError, 'Not found'
|
164
170
|
else
|
165
|
-
raise "Unable to process received status code = #{http_response.code}"
|
171
|
+
raise DataCollector::InputError, "Unable to process received status code = #{http_response.code}"
|
166
172
|
end
|
167
173
|
|
168
174
|
#[data, http_response.code]
|
@@ -182,14 +188,14 @@ module DataCollector
|
|
182
188
|
when '.json'
|
183
189
|
data = JSON.parse(data)
|
184
190
|
when '.xml'
|
185
|
-
data = xml_to_hash(data)
|
191
|
+
data = xml_to_hash(data, options)
|
186
192
|
when '.gz'
|
187
193
|
Minitar.open(Zlib::GzipReader.new(File.open("#{absolute_path}", 'rb'))) do |i|
|
188
194
|
i.each do |entry|
|
189
195
|
data = entry.read
|
190
196
|
end
|
191
197
|
end
|
192
|
-
data = xml_to_hash(data)
|
198
|
+
data = xml_to_hash(data, options)
|
193
199
|
when '.csv'
|
194
200
|
data = csv_to_hash(data)
|
195
201
|
else
|
@@ -212,10 +218,11 @@ module DataCollector
|
|
212
218
|
DataCollector::Input::Rpc.new(uri, options)
|
213
219
|
end
|
214
220
|
|
215
|
-
def xml_to_hash(data)
|
221
|
+
def xml_to_hash(data, options = {})
|
216
222
|
#gsub('<\/', '< /') outherwise wrong XML-parsing (see records lirias1729192 )
|
217
223
|
data = data.gsub /</, '< /'
|
218
|
-
|
224
|
+
xml_typecast = options.with_indifferent_access.key?('xml_typecast') ? options.with_indifferent_access['xml_typecast'] : true
|
225
|
+
nori = Nori.new(parser: :nokogiri, advanced_typecasting: xml_typecast, strip_namespaces: true, convert_tags_to: lambda { |tag| tag.gsub(/^@/, '_') })
|
219
226
|
nori.parse(data)
|
220
227
|
#JSON.parse(nori.parse(data).to_json)
|
221
228
|
end
|
data/lib/data_collector.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_collector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.28.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mehmet Celik
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-08-
|
11
|
+
date: 2023-08-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|