data_collector 0.60.0 → 0.62.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/data_collector.gemspec +2 -0
- data/lib/data_collector/config_file.rb +34 -2
- data/lib/data_collector/core.rb +3 -1
- data/lib/data_collector/input.rb +34 -5
- data/lib/data_collector/version.rb +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '0923a758b6a621afd2d2fb4aa85f580cb757ee3ca1150e7175827a9bf617e07b'
|
4
|
+
data.tar.gz: 1a83fecc0244088447747f4c196ffcfddc6cb70e5ab8f40232a0401ac9912fc3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d6d75c0bd763bbf2c107ebe7e5bc2c5c813454c6ad496eb234d38c79fae01525969e5d8b6abd601cf3103f8363f3f7965957cb54103eb10aaa80b65f80aa3073
|
7
|
+
data.tar.gz: '029d8bbf6819f90b955f373d8ee8e795fca2b130fc1900b487ef692a1b56cf7f7a8d02b222637bf83361354f1e795713c731eba90fb446f2906a758986c99af1'
|
data/data_collector.gemspec
CHANGED
@@ -14,6 +14,8 @@ Gem::Specification.new do |spec|
|
|
14
14
|
spec.homepage = 'https://github.com/mehmetc/data_collector'
|
15
15
|
spec.license = 'MIT'
|
16
16
|
|
17
|
+
spec.required_ruby_version = ">= 3.4"
|
18
|
+
|
17
19
|
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
18
20
|
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
19
21
|
if spec.respond_to?(:metadata)
|
@@ -10,7 +10,7 @@ module DataCollector
|
|
10
10
|
@mtime = nil
|
11
11
|
|
12
12
|
def self.version
|
13
|
-
'0.0.
|
13
|
+
'0.0.4'
|
14
14
|
end
|
15
15
|
|
16
16
|
def self.name
|
@@ -59,7 +59,8 @@ module DataCollector
|
|
59
59
|
|
60
60
|
ftime = File.exist?("#{@config_file_path}/#{@config_file_name}") ? File.mtime("#{@config_file_path}/#{@config_file_name}") : nil
|
61
61
|
if @config.nil? || @config.empty? || @mtime != ftime
|
62
|
-
config = YAML::load_file("#{@config_file_path}/#{@config_file_name}", aliases: true, permitted_classes: [Time, Symbol])
|
62
|
+
# config = YAML::load_file("#{@config_file_path}/#{@config_file_name}", aliases: true, permitted_classes: [Time, Symbol])
|
63
|
+
config = interpret_yaml_with_expressions("#{@config_file_path}/#{@config_file_name}", ENV)
|
63
64
|
@config = process(config)
|
64
65
|
end
|
65
66
|
end
|
@@ -92,5 +93,36 @@ module DataCollector
|
|
92
93
|
private_class_method :init
|
93
94
|
private_class_method :discover_config_file_path
|
94
95
|
private_class_method :process
|
96
|
+
|
97
|
+
private
|
98
|
+
def self.interpret_yaml_with_expressions(yaml_file, variables = {})
|
99
|
+
# Read the YAML file content
|
100
|
+
content = File.read(yaml_file)
|
101
|
+
|
102
|
+
# Process any :key: "${expression}" patterns
|
103
|
+
processed_content = content.gsub(/:key:\s*"?\$\{([^}]+)\}"?/) do |match|
|
104
|
+
expression = $1.strip
|
105
|
+
|
106
|
+
# Evaluate the expression using the provided variables
|
107
|
+
if variables.key?(expression)
|
108
|
+
# Replace with the variable value
|
109
|
+
match.gsub("${#{expression}}", variables[expression].to_s)
|
110
|
+
elsif expression.include?('.')
|
111
|
+
# Handle dot notation for nested variables
|
112
|
+
parts = expression.split('.')
|
113
|
+
value = variables
|
114
|
+
parts.each do |part|
|
115
|
+
value = value[part.to_sym] if value.is_a?(Hash) && value.key?(part.to_sym)
|
116
|
+
end
|
117
|
+
value.to_s
|
118
|
+
else
|
119
|
+
# Keep the original if we can't evaluate
|
120
|
+
match
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
# Parse the processed YAML content
|
125
|
+
YAML.load(processed_content, aliases: true, permitted_classes: [Time, Symbol])
|
126
|
+
end
|
95
127
|
end
|
96
128
|
end
|
data/lib/data_collector/core.rb
CHANGED
@@ -138,7 +138,9 @@ module DataCollector
|
|
138
138
|
module_function :error
|
139
139
|
|
140
140
|
def logger(*destinations)
|
141
|
-
@logger
|
141
|
+
@logger if @logger && (@log_destination.eql?(destinations.flatten) || destinations.empty? || destinations.nil?)
|
142
|
+
@log_destination = destinations.flatten
|
143
|
+
@logger = begin
|
142
144
|
destinations = STDOUT if destinations.nil? || destinations.empty?
|
143
145
|
Logger.new(ProxyLogger.new(destinations))
|
144
146
|
rescue StandardError => e
|
data/lib/data_collector/input.rb
CHANGED
@@ -32,6 +32,8 @@ module DataCollector
|
|
32
32
|
data = nil
|
33
33
|
if source.is_a?(StringIO)
|
34
34
|
data = from_stringio(source, options)
|
35
|
+
elsif source.respond_to?(:read)
|
36
|
+
data = from_tempfile(source, options)
|
35
37
|
else
|
36
38
|
source = CGI.unescapeHTML(source)
|
37
39
|
@logger.info("Reading #{source}")
|
@@ -153,7 +155,21 @@ module DataCollector
|
|
153
155
|
|
154
156
|
case http_response.code
|
155
157
|
when 200..299
|
156
|
-
|
158
|
+
|
159
|
+
if http_response.code == 206
|
160
|
+
@logger.debug "HTTP response 206 Partial Content"
|
161
|
+
data = http_response.body.readpartial
|
162
|
+
loop do
|
163
|
+
partial_data = http_response.body.readpartial
|
164
|
+
if partial_data.nil? || partial_data.empty?
|
165
|
+
break
|
166
|
+
end
|
167
|
+
data = data + partial_data.to_s
|
168
|
+
end
|
169
|
+
@raw = data
|
170
|
+
else
|
171
|
+
@raw = data = http_response.body.to_s
|
172
|
+
end
|
157
173
|
|
158
174
|
file_type = options.with_indifferent_access.has_key?(:content_type) ? options.with_indifferent_access[:content_type] : file_type_from(http_response.headers)
|
159
175
|
|
@@ -185,8 +201,6 @@ module DataCollector
|
|
185
201
|
end
|
186
202
|
end
|
187
203
|
|
188
|
-
raise '206 Partial Content' if http_response.code == 206
|
189
|
-
|
190
204
|
when 401
|
191
205
|
raise DataCollector::InputError, 'Unauthorized'
|
192
206
|
when 403
|
@@ -210,6 +224,17 @@ module DataCollector
|
|
210
224
|
data = JSON.parse(graph.dump(:jsonld, validate: false, standard_prefixes: true))
|
211
225
|
end
|
212
226
|
|
227
|
+
def from_tempfile(tempfile, options = {}, &block)
|
228
|
+
#file = Tempfile.new(["dc_", ".#{preferred_extension}"])
|
229
|
+
begin
|
230
|
+
tempfile.rewind
|
231
|
+
#file.write(temp_input_file.read)
|
232
|
+
#file.close
|
233
|
+
from_file(URI("file://#{tempfile.path}"), options)
|
234
|
+
#ensure
|
235
|
+
#file.unlink
|
236
|
+
end
|
237
|
+
end
|
213
238
|
def from_stringio(sio, options = {}, &block)
|
214
239
|
raise DataCollector::InputError, "No IO input" unless sio.is_a?(StringIO)
|
215
240
|
raise DataCollector::InputError, "content_type option not supplied" unless options.key?(:content_type)
|
@@ -232,11 +257,15 @@ module DataCollector
|
|
232
257
|
uri = normalize_uri(uri)
|
233
258
|
absolute_path = File.absolute_path(uri)
|
234
259
|
file_type = MIME::Types.type_for(uri).first.to_s
|
235
|
-
|
260
|
+
file_type = File.extname(absolute_path) if file_type.empty?
|
261
|
+
options['file_type'] = MIME::Types[(options[:content_type] || file_type)].first.preferred_extension
|
262
|
+
|
263
|
+
options['file_extention'] = ".#{options['file_type']}"
|
236
264
|
raise DataCollector::Error, "#{uri.to_s} not found" unless File.exist?("#{absolute_path}")
|
237
265
|
unless options.has_key?('raw') && options['raw'] == true
|
238
266
|
@raw = data = File.read("#{absolute_path}")
|
239
|
-
|
267
|
+
|
268
|
+
case options['file_extention']
|
240
269
|
when '.jsonld'
|
241
270
|
data = JSON.parse(data)
|
242
271
|
when '.json'
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_collector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.62.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mehmet Celik
|
8
8
|
bindir: exe
|
9
9
|
cert_chain: []
|
10
|
-
date:
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: activesupport
|
@@ -400,14 +400,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
400
400
|
requirements:
|
401
401
|
- - ">="
|
402
402
|
- !ruby/object:Gem::Version
|
403
|
-
version: '
|
403
|
+
version: '3.4'
|
404
404
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
405
405
|
requirements:
|
406
406
|
- - ">="
|
407
407
|
- !ruby/object:Gem::Version
|
408
408
|
version: '0'
|
409
409
|
requirements: []
|
410
|
-
rubygems_version: 3.6.
|
410
|
+
rubygems_version: 3.6.8
|
411
411
|
specification_version: 4
|
412
412
|
summary: ETL helper library
|
413
413
|
test_files: []
|