data_collector 0.26.0 → 0.28.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +5 -1
- data/lib/data_collector/input.rb +20 -13
- data/lib/data_collector/version.rb +1 -1
- data/lib/data_collector.rb +2 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 71b519b75704a9bb6e46601c8195ab69608f19fae12ecde7c6c57869b8fca450
|
4
|
+
data.tar.gz: aa4a0697ffb9f6bc21c45f2ecdfa13c9deca7c8b69f2e8159f4901ab6be8d646
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 44dbd4e953260ef2c66b31f8a2914f4378bbeb969c670207bfa2272c4c2d5096a8ec5b3352b913093fb601b6d6c15ebc14d029a936e88c65ddf3d6eabe32e4d2
|
7
|
+
data.tar.gz: ee74ae98b99ab8b88e81d1c2c5437d860c314683e517858dc739924049c55ce8a4614cbfa40a97ac8bcd619cd678b6121e2158f45813370ee2c36de185a7244e
|
data/README.md
CHANGED
@@ -8,6 +8,7 @@ include DataCollector::Core
|
|
8
8
|
```
|
9
9
|
Every object can be used on its own.
|
10
10
|
|
11
|
+
### DataCollector Objects
|
11
12
|
#### Pipeline
|
12
13
|
Allows you to create a simple pipeline of operations to process data. With a data pipeline, you can collect, process, and transform data, and then transfer it to various systems and applications.
|
13
14
|
|
@@ -20,6 +21,7 @@ executed in the [ISO8601 duration format](https://www.digi.com/resources/documen
|
|
20
21
|
- schedule: [ISO8601 duration format](https://www.digi.com/resources/documentation/digidocs//90001488-13/reference/r_iso_8601_duration_format.htm)
|
21
22
|
- cron: in cron format ex. '1 12 * * *' intervals are not supported
|
22
23
|
- uri: a directory/file to watch
|
24
|
+
- xml_typecast: true/false -> convert convert string values to TrueClass, FalseClass, Time, Date, and DateTime
|
23
25
|
- .run: start the pipeline. blocking if a schedule is supplied
|
24
26
|
- .stop: stop the pipeline
|
25
27
|
- .pause: pause the pipeline. Restart using .run
|
@@ -74,12 +76,14 @@ For a push input component, a listener is created with a processing logic block
|
|
74
76
|
A push happens when new data is created in a directory, message queue, ...
|
75
77
|
|
76
78
|
```ruby
|
77
|
-
from_uri(source, options = {:raw, :content_type})
|
79
|
+
from_uri(source, options = {:raw, :content_type, :headers, :cookies})
|
78
80
|
```
|
79
81
|
- source: an uri with a scheme of http, https, file, amqp
|
80
82
|
- options:
|
81
83
|
- raw: _boolean_ do not parse
|
82
84
|
- content_type: _string_ force a content_type if the 'Content-Type' returned by the http server is incorrect
|
85
|
+
- headers: request headers
|
86
|
+
- cookies: session cookies etc.
|
83
87
|
|
84
88
|
###### example:
|
85
89
|
```ruby
|
data/lib/data_collector/input.rb
CHANGED
@@ -66,7 +66,13 @@ module DataCollector
|
|
66
66
|
else
|
67
67
|
data
|
68
68
|
end
|
69
|
-
rescue => e
|
69
|
+
rescue DataCollector::InputError => e
|
70
|
+
@logger.info(e.message)
|
71
|
+
raise e
|
72
|
+
rescue DataCollector::Error => e
|
73
|
+
@logger.info(e.message)
|
74
|
+
nil
|
75
|
+
rescue StandardError => e
|
70
76
|
@logger.info(e.message)
|
71
77
|
puts e.backtrace.join("\n")
|
72
78
|
nil
|
@@ -141,28 +147,28 @@ module DataCollector
|
|
141
147
|
when 'application/json'
|
142
148
|
data = JSON.parse(data)
|
143
149
|
when 'application/atom+xml'
|
144
|
-
data = xml_to_hash(data)
|
150
|
+
data = xml_to_hash(data, options)
|
145
151
|
when 'text/csv'
|
146
152
|
data = csv_to_hash(data)
|
147
153
|
when 'application/xml'
|
148
|
-
data = xml_to_hash(data)
|
154
|
+
data = xml_to_hash(data, options)
|
149
155
|
when 'text/xml'
|
150
|
-
data = xml_to_hash(data)
|
156
|
+
data = xml_to_hash(data, options)
|
151
157
|
else
|
152
|
-
data = xml_to_hash(data)
|
158
|
+
data = xml_to_hash(data, options)
|
153
159
|
end
|
154
160
|
end
|
155
161
|
|
156
162
|
raise '206 Partial Content' if http_response.code ==206
|
157
163
|
|
158
164
|
when 401
|
159
|
-
raise 'Unauthorized'
|
165
|
+
raise DataCollector::InputError, 'Unauthorized'
|
160
166
|
when 403
|
161
|
-
raise 'Forbidden'
|
167
|
+
raise DataCollector::InputError, 'Forbidden'
|
162
168
|
when 404
|
163
|
-
raise 'Not found'
|
169
|
+
raise DataCollector::InputError, 'Not found'
|
164
170
|
else
|
165
|
-
raise "Unable to process received status code = #{http_response.code}"
|
171
|
+
raise DataCollector::InputError, "Unable to process received status code = #{http_response.code}"
|
166
172
|
end
|
167
173
|
|
168
174
|
#[data, http_response.code]
|
@@ -182,14 +188,14 @@ module DataCollector
|
|
182
188
|
when '.json'
|
183
189
|
data = JSON.parse(data)
|
184
190
|
when '.xml'
|
185
|
-
data = xml_to_hash(data)
|
191
|
+
data = xml_to_hash(data, options)
|
186
192
|
when '.gz'
|
187
193
|
Minitar.open(Zlib::GzipReader.new(File.open("#{absolute_path}", 'rb'))) do |i|
|
188
194
|
i.each do |entry|
|
189
195
|
data = entry.read
|
190
196
|
end
|
191
197
|
end
|
192
|
-
data = xml_to_hash(data)
|
198
|
+
data = xml_to_hash(data, options)
|
193
199
|
when '.csv'
|
194
200
|
data = csv_to_hash(data)
|
195
201
|
else
|
@@ -212,10 +218,11 @@ module DataCollector
|
|
212
218
|
DataCollector::Input::Rpc.new(uri, options)
|
213
219
|
end
|
214
220
|
|
215
|
-
def xml_to_hash(data)
|
221
|
+
def xml_to_hash(data, options = {})
|
216
222
|
#gsub('<\/', '< /') outherwise wrong XML-parsing (see records lirias1729192 )
|
217
223
|
data = data.gsub /</, '< /'
|
218
|
-
|
224
|
+
xml_typecast = options.with_indifferent_access.key?('xml_typecast') ? options.with_indifferent_access['xml_typecast'] : true
|
225
|
+
nori = Nori.new(parser: :nokogiri, advanced_typecasting: xml_typecast, strip_namespaces: true, convert_tags_to: lambda { |tag| tag.gsub(/^@/, '_') })
|
219
226
|
nori.parse(data)
|
220
227
|
#JSON.parse(nori.parse(data).to_json)
|
221
228
|
end
|
data/lib/data_collector.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_collector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.28.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mehmet Celik
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-08-
|
11
|
+
date: 2023-08-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|