data_collector 0.34.0 → 0.35.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -0
- data/data_collector.gemspec +1 -1
- data/lib/data_collector/input.rb +18 -4
- data/lib/data_collector/version.rb +1 -1
- metadata +17 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1b9d99a5bf660ca9feb5bea5dd13f12c2539dcb224d6689b7a1181af32d0ab20
|
4
|
+
data.tar.gz: 84fa315a8e2fa932caf84cad75d9c86c643c8d3b1faa389b383125de23022630
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 82cb1c183a2d7a8ea950a181f2ae9571720533b6defd378bc0cab91fc84d876c75fc71029cd1f46c993c0b004eee7ecf32635bff808cda4419b3236ea7206c42
|
7
|
+
data.tar.gz: 3bf761a214120ed1d12aad599e8137ce9a9b2028c757a7e59117d0ac3300e307237160cfb359032d8506db8efc37aee3669604bba0cbac59b60b51e064fc79e8
|
data/README.md
CHANGED
@@ -84,6 +84,8 @@ A push happens when new data is created in a directory, message queue, ...
|
|
84
84
|
- content_type: _string_ force a content_type if the 'Content-Type' returned by the http server is incorrect
|
85
85
|
- headers: request headers
|
86
86
|
- cookies: session cookies etc.
|
87
|
+
- method: http verb one of [GET, POST] defaul('GET')
|
88
|
+
- body: http post body
|
87
89
|
|
88
90
|
###### example:
|
89
91
|
```ruby
|
@@ -91,6 +93,8 @@ A push happens when new data is created in a directory, message queue, ...
|
|
91
93
|
input.from_uri("http://www.libis.be")
|
92
94
|
input.from_uri("file://hello.txt")
|
93
95
|
input.from_uri("http://www.libis.be/record.jsonld", content_type: 'application/ld+json')
|
96
|
+
input.from_uri("https://www.w3.org/TR/rdf12-turtle/examples/example1.ttl")
|
97
|
+
input.from_uri("https://dbpedia.org/sparql", body: "query=SELECT * WHERE {?sub ?pred ?obj} LIMIT 10", method:"POST", headers: {accept: "text/turtle"})
|
94
98
|
|
95
99
|
# read data from a RabbitMQ queue
|
96
100
|
listener = input.from_uri('amqp://user:password@localhost?channel=hello&queue=world')
|
data/data_collector.gemspec
CHANGED
@@ -39,7 +39,6 @@ Gem::Specification.new do |spec|
|
|
39
39
|
spec.add_runtime_dependency 'activesupport', '~> 7.0'
|
40
40
|
spec.add_runtime_dependency 'http', '~> 5.1'
|
41
41
|
spec.add_runtime_dependency 'json', '~> 2.6'
|
42
|
-
spec.add_runtime_dependency 'json-ld', '~> 3.3'
|
43
42
|
spec.add_runtime_dependency 'jsonpath', '~> 1.1'
|
44
43
|
spec.add_runtime_dependency 'mime-types', '~> 3.5'
|
45
44
|
spec.add_runtime_dependency 'minitar', '= 0.9'
|
@@ -51,6 +50,7 @@ Gem::Specification.new do |spec|
|
|
51
50
|
spec.add_runtime_dependency 'bunny_burrow', '~> 1.5'
|
52
51
|
spec.add_runtime_dependency 'builder', '~> 3.2'
|
53
52
|
spec.add_runtime_dependency 'parse-cron', '~> 0.1'
|
53
|
+
spec.add_runtime_dependency 'linkeddata', '~> 3.3'
|
54
54
|
|
55
55
|
spec.add_development_dependency 'bundler', '~> 2.3'
|
56
56
|
spec.add_development_dependency 'minitest', '~> 5.18'
|
data/lib/data_collector/input.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
require 'http'
|
3
3
|
require 'open-uri'
|
4
4
|
require 'nokogiri'
|
5
|
-
require '
|
5
|
+
require 'linkeddata'
|
6
6
|
require 'nori'
|
7
7
|
require 'uri'
|
8
8
|
require 'logger'
|
@@ -122,15 +122,24 @@ module DataCollector
|
|
122
122
|
http = http.headers(options[:headers])
|
123
123
|
end
|
124
124
|
|
125
|
+
ctx = nil
|
126
|
+
http_query_options = {}
|
125
127
|
if options.key?(:verify_ssl) && uri.scheme.eql?('https')
|
126
128
|
@logger.warn "Disabling SSL verification. "
|
127
129
|
# shouldn't use this but we all do ...
|
128
130
|
ctx = OpenSSL::SSL::SSLContext.new
|
129
131
|
ctx.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
130
132
|
|
131
|
-
|
133
|
+
http_query_options[:ssl_context] = ctx
|
134
|
+
end
|
135
|
+
|
136
|
+
if options.key?(:method) && options[:method].downcase.eql?('post')
|
137
|
+
raise DataCollector::InputError, "No body found, a POST request needs a body" unless options.key?(:body)
|
138
|
+
http_query_options[:body] = options[:body]
|
139
|
+
|
140
|
+
http_response = http.follow.post(escape_uri(uri), http_query_options)
|
132
141
|
else
|
133
|
-
http_response = http.follow.get(escape_uri(uri))
|
142
|
+
http_response = http.follow.get(escape_uri(uri), http_query_options)
|
134
143
|
end
|
135
144
|
|
136
145
|
case http_response.code
|
@@ -157,6 +166,11 @@ module DataCollector
|
|
157
166
|
data = xml_to_hash(data, options)
|
158
167
|
when 'text/xml'
|
159
168
|
data = xml_to_hash(data, options)
|
169
|
+
when 'text/turtle'
|
170
|
+
graph = RDF::Graph.new do |graph|
|
171
|
+
RDF::Turtle::Reader.new(data) {|reader| graph << reader}
|
172
|
+
end
|
173
|
+
data = JSON.parse(graph.dump(:jsonld, validate: false, standard_prefixes: true))
|
160
174
|
else
|
161
175
|
data = xml_to_hash(data, options)
|
162
176
|
end
|
@@ -171,7 +185,7 @@ module DataCollector
|
|
171
185
|
when 404
|
172
186
|
raise DataCollector::InputError, 'Not found'
|
173
187
|
else
|
174
|
-
raise DataCollector::InputError, "Unable to process received status code = #{http_response.code}"
|
188
|
+
raise DataCollector::InputError, "Unable to process received status code = #{http_response.code} error= #{http_response.body.to_s}"
|
175
189
|
end
|
176
190
|
|
177
191
|
#[data, http_response.code]
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_collector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.35.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mehmet Celik
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-11-
|
11
|
+
date: 2023-11-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -52,20 +52,6 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '2.6'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: json-ld
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - "~>"
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '3.3'
|
62
|
-
type: :runtime
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - "~>"
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: '3.3'
|
69
55
|
- !ruby/object:Gem::Dependency
|
70
56
|
name: jsonpath
|
71
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -220,6 +206,20 @@ dependencies:
|
|
220
206
|
- - "~>"
|
221
207
|
- !ruby/object:Gem::Version
|
222
208
|
version: '0.1'
|
209
|
+
- !ruby/object:Gem::Dependency
|
210
|
+
name: linkeddata
|
211
|
+
requirement: !ruby/object:Gem::Requirement
|
212
|
+
requirements:
|
213
|
+
- - "~>"
|
214
|
+
- !ruby/object:Gem::Version
|
215
|
+
version: '3.3'
|
216
|
+
type: :runtime
|
217
|
+
prerelease: false
|
218
|
+
version_requirements: !ruby/object:Gem::Requirement
|
219
|
+
requirements:
|
220
|
+
- - "~>"
|
221
|
+
- !ruby/object:Gem::Version
|
222
|
+
version: '3.3'
|
223
223
|
- !ruby/object:Gem::Dependency
|
224
224
|
name: bundler
|
225
225
|
requirement: !ruby/object:Gem::Requirement
|
@@ -334,7 +334,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
334
334
|
- !ruby/object:Gem::Version
|
335
335
|
version: '0'
|
336
336
|
requirements: []
|
337
|
-
rubygems_version: 3.4.
|
337
|
+
rubygems_version: 3.4.21
|
338
338
|
signing_key:
|
339
339
|
specification_version: 4
|
340
340
|
summary: ETL helper library
|