data_collector 0.34.0 → 0.35.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -0
- data/data_collector.gemspec +1 -1
- data/lib/data_collector/input.rb +18 -4
- data/lib/data_collector/version.rb +1 -1
- metadata +17 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1b9d99a5bf660ca9feb5bea5dd13f12c2539dcb224d6689b7a1181af32d0ab20
|
4
|
+
data.tar.gz: 84fa315a8e2fa932caf84cad75d9c86c643c8d3b1faa389b383125de23022630
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 82cb1c183a2d7a8ea950a181f2ae9571720533b6defd378bc0cab91fc84d876c75fc71029cd1f46c993c0b004eee7ecf32635bff808cda4419b3236ea7206c42
|
7
|
+
data.tar.gz: 3bf761a214120ed1d12aad599e8137ce9a9b2028c757a7e59117d0ac3300e307237160cfb359032d8506db8efc37aee3669604bba0cbac59b60b51e064fc79e8
|
data/README.md
CHANGED
@@ -84,6 +84,8 @@ A push happens when new data is created in a directory, message queue, ...
|
|
84
84
|
- content_type: _string_ force a content_type if the 'Content-Type' returned by the http server is incorrect
|
85
85
|
- headers: request headers
|
86
86
|
- cookies: session cookies etc.
|
87
|
+
- method: http verb one of [GET, POST] defaul('GET')
|
88
|
+
- body: http post body
|
87
89
|
|
88
90
|
###### example:
|
89
91
|
```ruby
|
@@ -91,6 +93,8 @@ A push happens when new data is created in a directory, message queue, ...
|
|
91
93
|
input.from_uri("http://www.libis.be")
|
92
94
|
input.from_uri("file://hello.txt")
|
93
95
|
input.from_uri("http://www.libis.be/record.jsonld", content_type: 'application/ld+json')
|
96
|
+
input.from_uri("https://www.w3.org/TR/rdf12-turtle/examples/example1.ttl")
|
97
|
+
input.from_uri("https://dbpedia.org/sparql", body: "query=SELECT * WHERE {?sub ?pred ?obj} LIMIT 10", method:"POST", headers: {accept: "text/turtle"})
|
94
98
|
|
95
99
|
# read data from a RabbitMQ queue
|
96
100
|
listener = input.from_uri('amqp://user:password@localhost?channel=hello&queue=world')
|
data/data_collector.gemspec
CHANGED
@@ -39,7 +39,6 @@ Gem::Specification.new do |spec|
|
|
39
39
|
spec.add_runtime_dependency 'activesupport', '~> 7.0'
|
40
40
|
spec.add_runtime_dependency 'http', '~> 5.1'
|
41
41
|
spec.add_runtime_dependency 'json', '~> 2.6'
|
42
|
-
spec.add_runtime_dependency 'json-ld', '~> 3.3'
|
43
42
|
spec.add_runtime_dependency 'jsonpath', '~> 1.1'
|
44
43
|
spec.add_runtime_dependency 'mime-types', '~> 3.5'
|
45
44
|
spec.add_runtime_dependency 'minitar', '= 0.9'
|
@@ -51,6 +50,7 @@ Gem::Specification.new do |spec|
|
|
51
50
|
spec.add_runtime_dependency 'bunny_burrow', '~> 1.5'
|
52
51
|
spec.add_runtime_dependency 'builder', '~> 3.2'
|
53
52
|
spec.add_runtime_dependency 'parse-cron', '~> 0.1'
|
53
|
+
spec.add_runtime_dependency 'linkeddata', '~> 3.3'
|
54
54
|
|
55
55
|
spec.add_development_dependency 'bundler', '~> 2.3'
|
56
56
|
spec.add_development_dependency 'minitest', '~> 5.18'
|
data/lib/data_collector/input.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
require 'http'
|
3
3
|
require 'open-uri'
|
4
4
|
require 'nokogiri'
|
5
|
-
require '
|
5
|
+
require 'linkeddata'
|
6
6
|
require 'nori'
|
7
7
|
require 'uri'
|
8
8
|
require 'logger'
|
@@ -122,15 +122,24 @@ module DataCollector
|
|
122
122
|
http = http.headers(options[:headers])
|
123
123
|
end
|
124
124
|
|
125
|
+
ctx = nil
|
126
|
+
http_query_options = {}
|
125
127
|
if options.key?(:verify_ssl) && uri.scheme.eql?('https')
|
126
128
|
@logger.warn "Disabling SSL verification. "
|
127
129
|
# shouldn't use this but we all do ...
|
128
130
|
ctx = OpenSSL::SSL::SSLContext.new
|
129
131
|
ctx.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
130
132
|
|
131
|
-
|
133
|
+
http_query_options[:ssl_context] = ctx
|
134
|
+
end
|
135
|
+
|
136
|
+
if options.key?(:method) && options[:method].downcase.eql?('post')
|
137
|
+
raise DataCollector::InputError, "No body found, a POST request needs a body" unless options.key?(:body)
|
138
|
+
http_query_options[:body] = options[:body]
|
139
|
+
|
140
|
+
http_response = http.follow.post(escape_uri(uri), http_query_options)
|
132
141
|
else
|
133
|
-
http_response = http.follow.get(escape_uri(uri))
|
142
|
+
http_response = http.follow.get(escape_uri(uri), http_query_options)
|
134
143
|
end
|
135
144
|
|
136
145
|
case http_response.code
|
@@ -157,6 +166,11 @@ module DataCollector
|
|
157
166
|
data = xml_to_hash(data, options)
|
158
167
|
when 'text/xml'
|
159
168
|
data = xml_to_hash(data, options)
|
169
|
+
when 'text/turtle'
|
170
|
+
graph = RDF::Graph.new do |graph|
|
171
|
+
RDF::Turtle::Reader.new(data) {|reader| graph << reader}
|
172
|
+
end
|
173
|
+
data = JSON.parse(graph.dump(:jsonld, validate: false, standard_prefixes: true))
|
160
174
|
else
|
161
175
|
data = xml_to_hash(data, options)
|
162
176
|
end
|
@@ -171,7 +185,7 @@ module DataCollector
|
|
171
185
|
when 404
|
172
186
|
raise DataCollector::InputError, 'Not found'
|
173
187
|
else
|
174
|
-
raise DataCollector::InputError, "Unable to process received status code = #{http_response.code}"
|
188
|
+
raise DataCollector::InputError, "Unable to process received status code = #{http_response.code} error= #{http_response.body.to_s}"
|
175
189
|
end
|
176
190
|
|
177
191
|
#[data, http_response.code]
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_collector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.35.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mehmet Celik
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-11-
|
11
|
+
date: 2023-11-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -52,20 +52,6 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '2.6'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: json-ld
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - "~>"
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '3.3'
|
62
|
-
type: :runtime
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - "~>"
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: '3.3'
|
69
55
|
- !ruby/object:Gem::Dependency
|
70
56
|
name: jsonpath
|
71
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -220,6 +206,20 @@ dependencies:
|
|
220
206
|
- - "~>"
|
221
207
|
- !ruby/object:Gem::Version
|
222
208
|
version: '0.1'
|
209
|
+
- !ruby/object:Gem::Dependency
|
210
|
+
name: linkeddata
|
211
|
+
requirement: !ruby/object:Gem::Requirement
|
212
|
+
requirements:
|
213
|
+
- - "~>"
|
214
|
+
- !ruby/object:Gem::Version
|
215
|
+
version: '3.3'
|
216
|
+
type: :runtime
|
217
|
+
prerelease: false
|
218
|
+
version_requirements: !ruby/object:Gem::Requirement
|
219
|
+
requirements:
|
220
|
+
- - "~>"
|
221
|
+
- !ruby/object:Gem::Version
|
222
|
+
version: '3.3'
|
223
223
|
- !ruby/object:Gem::Dependency
|
224
224
|
name: bundler
|
225
225
|
requirement: !ruby/object:Gem::Requirement
|
@@ -334,7 +334,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
334
334
|
- !ruby/object:Gem::Version
|
335
335
|
version: '0'
|
336
336
|
requirements: []
|
337
|
-
rubygems_version: 3.4.
|
337
|
+
rubygems_version: 3.4.21
|
338
338
|
signing_key:
|
339
339
|
specification_version: 4
|
340
340
|
summary: ETL helper library
|