data_collector 0.53.0 → 0.55.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 749914ea43bad2734c79a131eb5db2f1ef2b953fa386a9cb3eb50698e7d2f1f4
4
- data.tar.gz: 0402100bbfe402ad1c96cc0b767c608a917402e748ea4f57b9bf298ca9510dba
3
+ metadata.gz: a5cc4cf83290713c75860b1cf70e90ae019e423b53f6909b425ba0f134af9672
4
+ data.tar.gz: eb8d383233e5d8492c3112582cb820bc4b28eeed0bea33d0bd715532456c24c1
5
5
  SHA512:
6
- metadata.gz: b00c092e0670a26a85bbe1299c687a132c65af7950c5b0f74ff537a8ba948af27617164bd8beebc3b072d424fafdd58865e8cafbaa03d5e6aafb8e682806ba43
7
- data.tar.gz: 4072e26cfd9a2ef57b094b3ea7cc340f0d735e8ca4356d6e346f725b2486e7284b624936724c0545677571d7ff2a0c447785374778cb55080e69cf54bc7df1a3
6
+ metadata.gz: 14b5ef4e812c16fb05a1ac75ab38d365eed90ce9883d09053cfe656df8d2d3c5101bc81e8d439a7ceaa65de1552b451d64bdfd5c614844502ee6841ef25c3ef2
7
+ data.tar.gz: ecc4b0d595804845ec423f4270578f053ffc5c987ed6c082ad9a1bc068f2a4ebe9ae0b5eb36b55826bf82a0caf7f2e8797b2ff7d9a9a597848357c29912c6bd7
data/README.md CHANGED
@@ -95,6 +95,7 @@ A push happens when new data is created in a directory, message queue, ...
95
95
  input.from_uri("http://www.libis.be/record.jsonld", content_type: 'application/ld+json')
96
96
  input.from_uri("https://www.w3.org/TR/rdf12-turtle/examples/example1.ttl")
97
97
  input.from_uri("https://dbpedia.org/sparql", body: "query=SELECT * WHERE {?sub ?pred ?obj} LIMIT 10", method:"POST", headers: {accept: "text/turtle"})
98
+ input.from_uri(StringIO.new(File.read('myrecords.xml')), content_type: 'application/xml' )
98
99
 
99
100
  # read data from a RabbitMQ queue
100
101
  listener = input.from_uri('amqp://user:password@localhost?channel=hello&queue=world')
@@ -52,6 +52,7 @@ Gem::Specification.new do |spec|
52
52
  spec.add_runtime_dependency 'parse-cron', '~> 0.1'
53
53
  spec.add_runtime_dependency 'linkeddata', '~> 3.3'
54
54
  spec.add_runtime_dependency 'connection_pool', '~> 2.4'
55
+ spec.add_runtime_dependency 'open-uri-and-write', '~> 0.1.0'
55
56
  #spec.add_runtime_dependency 'grpc', '~> 1.61'
56
57
 
57
58
 
@@ -0,0 +1,60 @@
1
+ # Extensions and modifications (monkeypatching) to the File class:
2
+
3
+ class File
4
+
5
+ class << self
6
+ alias original_delete delete
7
+ alias original_open open
8
+ alias original_exist? exist?
9
+ end
10
+
11
+ def self.exist?(name)
12
+ if(name[/https?:\/\//])
13
+ dav = OpenUriAndWrite::CredentialsStore.get_connection_for_url(name)
14
+ dav.exist?(name)
15
+ else
16
+ self.original_exist?(name)
17
+ end
18
+ end
19
+
20
+ def self.delete(names)
21
+ filenames = []
22
+ if(names.class == String)
23
+ filenames << names
24
+ elsif(names.class = Array)
25
+ filenames = names
26
+ end
27
+ filenames.each do |filename|
28
+ if(filename[/^(https?):\/\//])
29
+ dav = OpenUriAndWrite::CredentialsStore.get_connection_for_url(filename)
30
+ dav.delete(filename)
31
+ else
32
+ self.original_delete(filename)
33
+ end
34
+ end
35
+ end
36
+
37
+ def self.open(name, *rest, &block)
38
+ if name.respond_to?(:open)
39
+ name.open(*rest, &block)
40
+ elsif name.respond_to?(:to_s) and name[/^(https?):\/\//] and rest.size > 0 and rest.first.to_s[/^w/]
41
+ webdav_agent = OpenUriAndWrite::Handle.new(name, rest)
42
+ if(block)
43
+ yield webdav_agent
44
+ else
45
+ return webdav_agent
46
+ end
47
+ else
48
+ rest.map! do |m|
49
+ if m.is_a?(Hash)
50
+ {}.store( m.keys.first.to_s.to_sym, m.values.first)
51
+ else
52
+ m
53
+ end
54
+ end
55
+
56
+ self.original_open(name, *rest, &block)
57
+ end
58
+ end
59
+
60
+ end
@@ -0,0 +1,25 @@
1
+ require 'stringio'
2
+ require 'uri'
3
+ require 'open-uri'
4
+ require 'net/dav'
5
+ require 'highline/import'
6
+
7
+ require 'open-uri-and-write/handle'
8
+ require 'open-uri-and-write/usernames'
9
+ require 'open-uri-and-write/credentials_store'
10
+ require_relative 'file'
11
+ require 'open-uri-and-write/dir_extensions'
12
+ require 'open-uri-and-write/kernel_extensions'
13
+
14
+ module OpenUriAndWrite
15
+ class Handle < StringIO
16
+ def write(string)
17
+ if(@filemode[/^r/])
18
+ raise IOError.new(true), "not opened for writing"
19
+ end
20
+
21
+ super(string)
22
+ @dav.put_string(@url, string)
23
+ end
24
+ end
25
+ end
@@ -180,10 +180,7 @@ module DataCollector
180
180
  when 'text/html'
181
181
  data = html_to_hash(data, options)
182
182
  when 'text/turtle'
183
- graph = RDF::Graph.new do |graph|
184
- RDF::Turtle::Reader.new(data) { |reader| graph << reader }
185
- end
186
- data = JSON.parse(graph.dump(:jsonld, validate: false, standard_prefixes: true))
183
+ data = rdf_to_hash(data, options)
187
184
  when /^image/
188
185
  options['file_type'] = file_type
189
186
  data = image_to_data(data, options)
@@ -208,6 +205,13 @@ module DataCollector
208
205
  data
209
206
  end
210
207
 
208
+ def rdf_to_hash(data, options = {})
209
+ graph = RDF::Graph.new do |graph|
210
+ RDF::Turtle::Reader.new(data) { |reader| graph << reader }
211
+ end
212
+ data = JSON.parse(graph.dump(:jsonld, validate: false, standard_prefixes: true))
213
+ end
214
+
211
215
  def from_stringio(sio, options = {}, &block)
212
216
  raise DataCollector::InputError, "No IO input" unless sio.is_a?(StringIO)
213
217
  raise DataCollector::InputError, "content_type option not supplied" unless options.key?(:content_type)
@@ -271,6 +275,8 @@ module DataCollector
271
275
  data = csv_to_hash(data, options)
272
276
  when '.jpg', '.png', '.gif'
273
277
  data = image_to_data(data, options)
278
+ when '.ttl'
279
+ data = rdf_to_hash(data, options)
274
280
  else
275
281
  raise "Do not know how to process #{uri.to_s}"
276
282
  end
@@ -1,4 +1,4 @@
1
1
  # encoding: utf-8
2
2
  module DataCollector
3
- VERSION = "0.53.0"
3
+ VERSION = "0.55.0"
4
4
  end
data/lib/proxy_logger.rb CHANGED
@@ -1,3 +1,5 @@
1
+ require_relative 'data_collector/ext/x-open-uri-and-write'
2
+
1
3
  class ProxyLogger
2
4
  attr_reader :targets
3
5
  def initialize(*targets)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_collector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.53.0
4
+ version: 0.55.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mehmet Celik
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-07-05 00:00:00.000000000 Z
11
+ date: 2025-01-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -234,6 +234,20 @@ dependencies:
234
234
  - - "~>"
235
235
  - !ruby/object:Gem::Version
236
236
  version: '2.4'
237
+ - !ruby/object:Gem::Dependency
238
+ name: open-uri-and-write
239
+ requirement: !ruby/object:Gem::Requirement
240
+ requirements:
241
+ - - "~>"
242
+ - !ruby/object:Gem::Version
243
+ version: 0.1.0
244
+ type: :runtime
245
+ prerelease: false
246
+ version_requirements: !ruby/object:Gem::Requirement
247
+ requirements:
248
+ - - "~>"
249
+ - !ruby/object:Gem::Version
250
+ version: 0.1.0
237
251
  - !ruby/object:Gem::Dependency
238
252
  name: bundler
239
253
  requirement: !ruby/object:Gem::Requirement
@@ -311,7 +325,9 @@ files:
311
325
  - lib/data_collector/config_file.rb
312
326
  - lib/data_collector/core.rb
313
327
  - lib/data_collector/ext/base.rb
328
+ - lib/data_collector/ext/file.rb
314
329
  - lib/data_collector/ext/nokogiri.rb
330
+ - lib/data_collector/ext/x-open-uri-and-write.rb
315
331
  - lib/data_collector/ext/xml_utility_node.rb
316
332
  - lib/data_collector/input.rb
317
333
  - lib/data_collector/input/dir.rb