data_collector 0.60.0 → 0.61.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/data_collector.gemspec +2 -0
- data/lib/data_collector/input.rb +34 -5
- data/lib/data_collector/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ef5644ca619ea4368ca7e6620fd5b692995b3f81206837e91a7f8b576799014e
|
4
|
+
data.tar.gz: 1c064d51a23891829cc376822de08368e1129cc5891a13a47e3c66e893a75e11
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 524275f37414a0a67566ae4d942ffee36de24980c99138c87947f71c995adf57924a2d5e34de5e7e0b3b313e043132c7dbe76eb364ecd0bc014e3e5eca120466
|
7
|
+
data.tar.gz: 5f54cc53b400f1778edf35b3e9ba5b9a981b17a688467ba05fd7f2e18449002c10ea55bc40d6c152d12481f7957ec17baf9ebf278a1bcab0b990000d1ab3873f
|
data/data_collector.gemspec
CHANGED
@@ -14,6 +14,8 @@ Gem::Specification.new do |spec|
|
|
14
14
|
spec.homepage = 'https://github.com/mehmetc/data_collector'
|
15
15
|
spec.license = 'MIT'
|
16
16
|
|
17
|
+
spec.required_ruby_version = ">= 3.4"
|
18
|
+
|
17
19
|
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
18
20
|
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
19
21
|
if spec.respond_to?(:metadata)
|
data/lib/data_collector/input.rb
CHANGED
@@ -32,6 +32,8 @@ module DataCollector
|
|
32
32
|
data = nil
|
33
33
|
if source.is_a?(StringIO)
|
34
34
|
data = from_stringio(source, options)
|
35
|
+
elsif source.respond_to?(:read)
|
36
|
+
data = from_tempfile(source, options)
|
35
37
|
else
|
36
38
|
source = CGI.unescapeHTML(source)
|
37
39
|
@logger.info("Reading #{source}")
|
@@ -153,7 +155,21 @@ module DataCollector
|
|
153
155
|
|
154
156
|
case http_response.code
|
155
157
|
when 200..299
|
156
|
-
|
158
|
+
|
159
|
+
if http_response.code == 206
|
160
|
+
@logger.debug "HTTP response 206 Partial Content"
|
161
|
+
data = http_response.body.readpartial
|
162
|
+
loop do
|
163
|
+
partial_data = http_response.body.readpartial
|
164
|
+
if partial_data.nil? || partial_data.empty?
|
165
|
+
break
|
166
|
+
end
|
167
|
+
data = data + partial_data.to_s
|
168
|
+
end
|
169
|
+
@raw = data
|
170
|
+
else
|
171
|
+
@raw = data = http_response.body.to_s
|
172
|
+
end
|
157
173
|
|
158
174
|
file_type = options.with_indifferent_access.has_key?(:content_type) ? options.with_indifferent_access[:content_type] : file_type_from(http_response.headers)
|
159
175
|
|
@@ -185,8 +201,6 @@ module DataCollector
|
|
185
201
|
end
|
186
202
|
end
|
187
203
|
|
188
|
-
raise '206 Partial Content' if http_response.code == 206
|
189
|
-
|
190
204
|
when 401
|
191
205
|
raise DataCollector::InputError, 'Unauthorized'
|
192
206
|
when 403
|
@@ -210,6 +224,17 @@ module DataCollector
|
|
210
224
|
data = JSON.parse(graph.dump(:jsonld, validate: false, standard_prefixes: true))
|
211
225
|
end
|
212
226
|
|
227
|
+
def from_tempfile(tempfile, options = {}, &block)
|
228
|
+
#file = Tempfile.new(["dc_", ".#{preferred_extension}"])
|
229
|
+
begin
|
230
|
+
tempfile.rewind
|
231
|
+
#file.write(temp_input_file.read)
|
232
|
+
#file.close
|
233
|
+
from_file(URI("file://#{tempfile.path}"), options)
|
234
|
+
#ensure
|
235
|
+
#file.unlink
|
236
|
+
end
|
237
|
+
end
|
213
238
|
def from_stringio(sio, options = {}, &block)
|
214
239
|
raise DataCollector::InputError, "No IO input" unless sio.is_a?(StringIO)
|
215
240
|
raise DataCollector::InputError, "content_type option not supplied" unless options.key?(:content_type)
|
@@ -232,11 +257,15 @@ module DataCollector
|
|
232
257
|
uri = normalize_uri(uri)
|
233
258
|
absolute_path = File.absolute_path(uri)
|
234
259
|
file_type = MIME::Types.type_for(uri).first.to_s
|
235
|
-
|
260
|
+
file_type = File.extname(absolute_path) if file_type.empty?
|
261
|
+
options['file_type'] = MIME::Types[(options[:content_type] || file_type)].first.preferred_extension
|
262
|
+
|
263
|
+
options['file_extention'] = ".#{options['file_type']}"
|
236
264
|
raise DataCollector::Error, "#{uri.to_s} not found" unless File.exist?("#{absolute_path}")
|
237
265
|
unless options.has_key?('raw') && options['raw'] == true
|
238
266
|
@raw = data = File.read("#{absolute_path}")
|
239
|
-
|
267
|
+
|
268
|
+
case options['file_extention']
|
240
269
|
when '.jsonld'
|
241
270
|
data = JSON.parse(data)
|
242
271
|
when '.json'
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_collector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.61.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mehmet Celik
|
8
8
|
bindir: exe
|
9
9
|
cert_chain: []
|
10
|
-
date: 2025-
|
10
|
+
date: 2025-03-12 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: activesupport
|
@@ -400,7 +400,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
400
400
|
requirements:
|
401
401
|
- - ">="
|
402
402
|
- !ruby/object:Gem::Version
|
403
|
-
version: '
|
403
|
+
version: '3.4'
|
404
404
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
405
405
|
requirements:
|
406
406
|
- - ">="
|