data_collector 0.53.0 → 0.55.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/data_collector.gemspec +1 -0
- data/lib/data_collector/ext/file.rb +60 -0
- data/lib/data_collector/ext/x-open-uri-and-write.rb +25 -0
- data/lib/data_collector/input.rb +10 -4
- data/lib/data_collector/version.rb +1 -1
- data/lib/proxy_logger.rb +2 -0
- metadata +18 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a5cc4cf83290713c75860b1cf70e90ae019e423b53f6909b425ba0f134af9672
|
4
|
+
data.tar.gz: eb8d383233e5d8492c3112582cb820bc4b28eeed0bea33d0bd715532456c24c1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 14b5ef4e812c16fb05a1ac75ab38d365eed90ce9883d09053cfe656df8d2d3c5101bc81e8d439a7ceaa65de1552b451d64bdfd5c614844502ee6841ef25c3ef2
|
7
|
+
data.tar.gz: ecc4b0d595804845ec423f4270578f053ffc5c987ed6c082ad9a1bc068f2a4ebe9ae0b5eb36b55826bf82a0caf7f2e8797b2ff7d9a9a597848357c29912c6bd7
|
data/README.md
CHANGED
@@ -95,6 +95,7 @@ A push happens when new data is created in a directory, message queue, ...
|
|
95
95
|
input.from_uri("http://www.libis.be/record.jsonld", content_type: 'application/ld+json')
|
96
96
|
input.from_uri("https://www.w3.org/TR/rdf12-turtle/examples/example1.ttl")
|
97
97
|
input.from_uri("https://dbpedia.org/sparql", body: "query=SELECT * WHERE {?sub ?pred ?obj} LIMIT 10", method:"POST", headers: {accept: "text/turtle"})
|
98
|
+
input.from_uri(StringIO.new(File.read('myrecords.xml')), content_type: 'application/xml' )
|
98
99
|
|
99
100
|
# read data from a RabbitMQ queue
|
100
101
|
listener = input.from_uri('amqp://user:password@localhost?channel=hello&queue=world')
|
data/data_collector.gemspec
CHANGED
@@ -52,6 +52,7 @@ Gem::Specification.new do |spec|
|
|
52
52
|
spec.add_runtime_dependency 'parse-cron', '~> 0.1'
|
53
53
|
spec.add_runtime_dependency 'linkeddata', '~> 3.3'
|
54
54
|
spec.add_runtime_dependency 'connection_pool', '~> 2.4'
|
55
|
+
spec.add_runtime_dependency 'open-uri-and-write', '~> 0.1.0'
|
55
56
|
#spec.add_runtime_dependency 'grpc', '~> 1.61'
|
56
57
|
|
57
58
|
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# Extensions and modifications (monkeypatching) to the File class:
|
2
|
+
|
3
|
+
class File
|
4
|
+
|
5
|
+
class << self
|
6
|
+
alias original_delete delete
|
7
|
+
alias original_open open
|
8
|
+
alias original_exist? exist?
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.exist?(name)
|
12
|
+
if(name[/https?:\/\//])
|
13
|
+
dav = OpenUriAndWrite::CredentialsStore.get_connection_for_url(name)
|
14
|
+
dav.exist?(name)
|
15
|
+
else
|
16
|
+
self.original_exist?(name)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.delete(names)
|
21
|
+
filenames = []
|
22
|
+
if(names.class == String)
|
23
|
+
filenames << names
|
24
|
+
elsif(names.class = Array)
|
25
|
+
filenames = names
|
26
|
+
end
|
27
|
+
filenames.each do |filename|
|
28
|
+
if(filename[/^(https?):\/\//])
|
29
|
+
dav = OpenUriAndWrite::CredentialsStore.get_connection_for_url(filename)
|
30
|
+
dav.delete(filename)
|
31
|
+
else
|
32
|
+
self.original_delete(filename)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.open(name, *rest, &block)
|
38
|
+
if name.respond_to?(:open)
|
39
|
+
name.open(*rest, &block)
|
40
|
+
elsif name.respond_to?(:to_s) and name[/^(https?):\/\//] and rest.size > 0 and rest.first.to_s[/^w/]
|
41
|
+
webdav_agent = OpenUriAndWrite::Handle.new(name, rest)
|
42
|
+
if(block)
|
43
|
+
yield webdav_agent
|
44
|
+
else
|
45
|
+
return webdav_agent
|
46
|
+
end
|
47
|
+
else
|
48
|
+
rest.map! do |m|
|
49
|
+
if m.is_a?(Hash)
|
50
|
+
{}.store( m.keys.first.to_s.to_sym, m.values.first)
|
51
|
+
else
|
52
|
+
m
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
self.original_open(name, *rest, &block)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
require 'uri'
|
3
|
+
require 'open-uri'
|
4
|
+
require 'net/dav'
|
5
|
+
require 'highline/import'
|
6
|
+
|
7
|
+
require 'open-uri-and-write/handle'
|
8
|
+
require 'open-uri-and-write/usernames'
|
9
|
+
require 'open-uri-and-write/credentials_store'
|
10
|
+
require_relative 'file'
|
11
|
+
require 'open-uri-and-write/dir_extensions'
|
12
|
+
require 'open-uri-and-write/kernel_extensions'
|
13
|
+
|
14
|
+
module OpenUriAndWrite
|
15
|
+
class Handle < StringIO
|
16
|
+
def write(string)
|
17
|
+
if(@filemode[/^r/])
|
18
|
+
raise IOError.new(true), "not opened for writing"
|
19
|
+
end
|
20
|
+
|
21
|
+
super(string)
|
22
|
+
@dav.put_string(@url, string)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
data/lib/data_collector/input.rb
CHANGED
@@ -180,10 +180,7 @@ module DataCollector
|
|
180
180
|
when 'text/html'
|
181
181
|
data = html_to_hash(data, options)
|
182
182
|
when 'text/turtle'
|
183
|
-
|
184
|
-
RDF::Turtle::Reader.new(data) { |reader| graph << reader }
|
185
|
-
end
|
186
|
-
data = JSON.parse(graph.dump(:jsonld, validate: false, standard_prefixes: true))
|
183
|
+
data = rdf_to_hash(data, options)
|
187
184
|
when /^image/
|
188
185
|
options['file_type'] = file_type
|
189
186
|
data = image_to_data(data, options)
|
@@ -208,6 +205,13 @@ module DataCollector
|
|
208
205
|
data
|
209
206
|
end
|
210
207
|
|
208
|
+
def rdf_to_hash(data, options = {})
|
209
|
+
graph = RDF::Graph.new do |graph|
|
210
|
+
RDF::Turtle::Reader.new(data) { |reader| graph << reader }
|
211
|
+
end
|
212
|
+
data = JSON.parse(graph.dump(:jsonld, validate: false, standard_prefixes: true))
|
213
|
+
end
|
214
|
+
|
211
215
|
def from_stringio(sio, options = {}, &block)
|
212
216
|
raise DataCollector::InputError, "No IO input" unless sio.is_a?(StringIO)
|
213
217
|
raise DataCollector::InputError, "content_type option not supplied" unless options.key?(:content_type)
|
@@ -271,6 +275,8 @@ module DataCollector
|
|
271
275
|
data = csv_to_hash(data, options)
|
272
276
|
when '.jpg', '.png', '.gif'
|
273
277
|
data = image_to_data(data, options)
|
278
|
+
when '.ttl'
|
279
|
+
data = rdf_to_hash(data, options)
|
274
280
|
else
|
275
281
|
raise "Do not know how to process #{uri.to_s}"
|
276
282
|
end
|
data/lib/proxy_logger.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_collector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.55.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mehmet Celik
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2025-01-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -234,6 +234,20 @@ dependencies:
|
|
234
234
|
- - "~>"
|
235
235
|
- !ruby/object:Gem::Version
|
236
236
|
version: '2.4'
|
237
|
+
- !ruby/object:Gem::Dependency
|
238
|
+
name: open-uri-and-write
|
239
|
+
requirement: !ruby/object:Gem::Requirement
|
240
|
+
requirements:
|
241
|
+
- - "~>"
|
242
|
+
- !ruby/object:Gem::Version
|
243
|
+
version: 0.1.0
|
244
|
+
type: :runtime
|
245
|
+
prerelease: false
|
246
|
+
version_requirements: !ruby/object:Gem::Requirement
|
247
|
+
requirements:
|
248
|
+
- - "~>"
|
249
|
+
- !ruby/object:Gem::Version
|
250
|
+
version: 0.1.0
|
237
251
|
- !ruby/object:Gem::Dependency
|
238
252
|
name: bundler
|
239
253
|
requirement: !ruby/object:Gem::Requirement
|
@@ -311,7 +325,9 @@ files:
|
|
311
325
|
- lib/data_collector/config_file.rb
|
312
326
|
- lib/data_collector/core.rb
|
313
327
|
- lib/data_collector/ext/base.rb
|
328
|
+
- lib/data_collector/ext/file.rb
|
314
329
|
- lib/data_collector/ext/nokogiri.rb
|
330
|
+
- lib/data_collector/ext/x-open-uri-and-write.rb
|
315
331
|
- lib/data_collector/ext/xml_utility_node.rb
|
316
332
|
- lib/data_collector/input.rb
|
317
333
|
- lib/data_collector/input/dir.rb
|