data_collector 0.25.0 → 0.27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +5 -1
- data/lib/data_collector/input/rpc.rb +1 -1
- data/lib/data_collector/input.rb +25 -13
- data/lib/data_collector/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b001584d7cb8f525f22370a79a4921104358f9de512d81da6236f65a590220bc
|
4
|
+
data.tar.gz: 773bf24b624e28e056a82f0c665e603a119830c9cc872d1938be02ab7251ad03
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8d0a62ddf8408c72e6277080f7105bd8d12f39c2f30e4a44910d890932a50ef9399d096d98b990b87db422a6af7557821e45045a049c64ebbdd90d8cd811062c
|
7
|
+
data.tar.gz: 53e2290744797a8763ccd0491a02c9dd1ee4e73919f95c20d66bb03889cca18f31cb4090728f1ef3acee6b76ef9e062f4196d17b90b5bf0ec3d2132e74543c2a
|
data/README.md
CHANGED
@@ -8,6 +8,7 @@ include DataCollector::Core
|
|
8
8
|
```
|
9
9
|
Every object can be used on its own.
|
10
10
|
|
11
|
+
### DataCollector Objects
|
11
12
|
#### Pipeline
|
12
13
|
Allows you to create a simple pipeline of operations to process data. With a data pipeline, you can collect, process, and transform data, and then transfer it to various systems and applications.
|
13
14
|
|
@@ -20,6 +21,7 @@ executed in the [ISO8601 duration format](https://www.digi.com/resources/documen
|
|
20
21
|
- schedule: [ISO8601 duration format](https://www.digi.com/resources/documentation/digidocs//90001488-13/reference/r_iso_8601_duration_format.htm)
|
21
22
|
- cron: in cron format ex. '1 12 * * *' intervals are not supported
|
22
23
|
- uri: a directory/file to watch
|
24
|
+
- xml_typecast: true/false -> convert convert string values to TrueClass, FalseClass, Time, Date, and DateTime
|
23
25
|
- .run: start the pipeline. blocking if a schedule is supplied
|
24
26
|
- .stop: stop the pipeline
|
25
27
|
- .pause: pause the pipeline. Restart using .run
|
@@ -74,12 +76,14 @@ For a push input component, a listener is created with a processing logic block
|
|
74
76
|
A push happens when new data is created in a directory, message queue, ...
|
75
77
|
|
76
78
|
```ruby
|
77
|
-
from_uri(source, options = {:raw, :content_type})
|
79
|
+
from_uri(source, options = {:raw, :content_type, :headers, :cookies})
|
78
80
|
```
|
79
81
|
- source: an uri with a scheme of http, https, file, amqp
|
80
82
|
- options:
|
81
83
|
- raw: _boolean_ do not parse
|
82
84
|
- content_type: _string_ force a content_type if the 'Content-Type' returned by the http server is incorrect
|
85
|
+
- headers: request headers
|
86
|
+
- cookies: session cookies etc.
|
83
87
|
|
84
88
|
###### example:
|
85
89
|
```ruby
|
data/lib/data_collector/input.rb
CHANGED
@@ -31,7 +31,7 @@ module DataCollector
|
|
31
31
|
raise DataCollector::Error 'from_uri expects a scheme like file:// of https://' unless source =~ /:\/\//
|
32
32
|
|
33
33
|
scheme, path = source.split('://')
|
34
|
-
source="#{scheme}://#{URI.
|
34
|
+
source="#{scheme}://#{URI.encode_www_form_component(path)}"
|
35
35
|
uri = URI(source)
|
36
36
|
begin
|
37
37
|
data = nil
|
@@ -41,7 +41,7 @@ module DataCollector
|
|
41
41
|
when 'https'
|
42
42
|
data = from_https(uri, options)
|
43
43
|
when 'file'
|
44
|
-
absolute_path = File.absolute_path("#{URI.
|
44
|
+
absolute_path = File.absolute_path("#{URI.decode_www_form_component("#{uri.host}#{uri.path}")}")
|
45
45
|
if File.directory?(absolute_path)
|
46
46
|
#raise DataCollector::Error, "#{uri.host}/#{uri.path} not found" unless File.exist?("#{uri.host}/#{uri.path}")
|
47
47
|
return from_dir(uri, options)
|
@@ -80,7 +80,7 @@ module DataCollector
|
|
80
80
|
end
|
81
81
|
|
82
82
|
def from_https(uri, options = {})
|
83
|
-
uri = URI.
|
83
|
+
uri = URI.decode_www_form_component("#{uri.to_s}")
|
84
84
|
data = nil
|
85
85
|
if options.with_indifferent_access.include?(:logging) && options.with_indifferent_access[:logging]
|
86
86
|
HTTP.default_options = HTTP::Options.new(features: { logging: { logger: @logger } })
|
@@ -102,6 +102,16 @@ module DataCollector
|
|
102
102
|
http = HTTP.auth(bearer)
|
103
103
|
end
|
104
104
|
|
105
|
+
if options.key?(:cookies)
|
106
|
+
@logger.debug "Set cookies"
|
107
|
+
http = http.cookies( options[:cookies] )
|
108
|
+
end
|
109
|
+
|
110
|
+
if options.key?(:headers)
|
111
|
+
@logger.debug "Set http headers"
|
112
|
+
http = http.headers( options[:headers] )
|
113
|
+
end
|
114
|
+
|
105
115
|
if options.key?(:verify_ssl) && uri.scheme.eql?('https')
|
106
116
|
@logger.warn "Disabling SSL verification. "
|
107
117
|
#shouldn't use this but we all do ...
|
@@ -109,10 +119,11 @@ module DataCollector
|
|
109
119
|
ctx.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
110
120
|
|
111
121
|
http_response = http.follow.get(escape_uri(uri), ssl_context: ctx)
|
122
|
+
|
112
123
|
else
|
113
124
|
http_response = http.follow.get(escape_uri(uri))
|
114
125
|
end
|
115
|
-
|
126
|
+
|
116
127
|
case http_response.code
|
117
128
|
when 200..299
|
118
129
|
@raw = data = http_response.body.to_s
|
@@ -130,15 +141,15 @@ module DataCollector
|
|
130
141
|
when 'application/json'
|
131
142
|
data = JSON.parse(data)
|
132
143
|
when 'application/atom+xml'
|
133
|
-
data = xml_to_hash(data)
|
144
|
+
data = xml_to_hash(data, options)
|
134
145
|
when 'text/csv'
|
135
146
|
data = csv_to_hash(data)
|
136
147
|
when 'application/xml'
|
137
|
-
data = xml_to_hash(data)
|
148
|
+
data = xml_to_hash(data, options)
|
138
149
|
when 'text/xml'
|
139
|
-
data = xml_to_hash(data)
|
150
|
+
data = xml_to_hash(data, options)
|
140
151
|
else
|
141
|
-
data = xml_to_hash(data)
|
152
|
+
data = xml_to_hash(data, options)
|
142
153
|
end
|
143
154
|
end
|
144
155
|
|
@@ -171,14 +182,14 @@ module DataCollector
|
|
171
182
|
when '.json'
|
172
183
|
data = JSON.parse(data)
|
173
184
|
when '.xml'
|
174
|
-
data = xml_to_hash(data)
|
185
|
+
data = xml_to_hash(data, options)
|
175
186
|
when '.gz'
|
176
187
|
Minitar.open(Zlib::GzipReader.new(File.open("#{absolute_path}", 'rb'))) do |i|
|
177
188
|
i.each do |entry|
|
178
189
|
data = entry.read
|
179
190
|
end
|
180
191
|
end
|
181
|
-
data = xml_to_hash(data)
|
192
|
+
data = xml_to_hash(data, options)
|
182
193
|
when '.csv'
|
183
194
|
data = csv_to_hash(data)
|
184
195
|
else
|
@@ -201,10 +212,11 @@ module DataCollector
|
|
201
212
|
DataCollector::Input::Rpc.new(uri, options)
|
202
213
|
end
|
203
214
|
|
204
|
-
def xml_to_hash(data)
|
215
|
+
def xml_to_hash(data, options = {})
|
205
216
|
#gsub('<\/', '< /') outherwise wrong XML-parsing (see records lirias1729192 )
|
206
217
|
data = data.gsub /</, '< /'
|
207
|
-
|
218
|
+
xml_typecast = options.with_indifferent_access.key?('xml_typecast') ? options.with_indifferent_access['xml_typecast'] : true
|
219
|
+
nori = Nori.new(parser: :nokogiri, advanced_typecasting: xml_typecast, strip_namespaces: true, convert_tags_to: lambda { |tag| tag.gsub(/^@/, '_') })
|
208
220
|
nori.parse(data)
|
209
221
|
#JSON.parse(nori.parse(data).to_json)
|
210
222
|
end
|
@@ -235,7 +247,7 @@ module DataCollector
|
|
235
247
|
end
|
236
248
|
|
237
249
|
def normalize_uri(uri)
|
238
|
-
"#{URI.
|
250
|
+
"#{URI.decode_www_form_component(uri.host)}#{URI.decode_www_form_component(uri.path)}"
|
239
251
|
end
|
240
252
|
end
|
241
253
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_collector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.27.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mehmet Celik
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-08-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|