data_collector 0.60.0 → 0.62.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a929b89fe8361f9efa13b102aea2b7792ee4c524948978cb0da4521797d45489
4
- data.tar.gz: 0ab8859d19d8ba21ac72648fa845d18aeb5ef8af058dca4a5f2b417ec5826997
3
+ metadata.gz: '0923a758b6a621afd2d2fb4aa85f580cb757ee3ca1150e7175827a9bf617e07b'
4
+ data.tar.gz: 1a83fecc0244088447747f4c196ffcfddc6cb70e5ab8f40232a0401ac9912fc3
5
5
  SHA512:
6
- metadata.gz: cc2112dc0b352d19c3950c0a05c96905eb9646f687a305e12af8e930262271800e5db41d22578e2246c663ddcc901df7b439e781b61a8dd09873a8cb8d442e41
7
- data.tar.gz: d18ce58fe0f3d6544f054c8a389fb30700d76c60e597ce8b973e2f72c34a9bc21c6ee62cebf606b2d1de9b0249e91d5d9bc9afff4e97c12386d0b2eaf3c0a59c
6
+ metadata.gz: d6d75c0bd763bbf2c107ebe7e5bc2c5c813454c6ad496eb234d38c79fae01525969e5d8b6abd601cf3103f8363f3f7965957cb54103eb10aaa80b65f80aa3073
7
+ data.tar.gz: '029d8bbf6819f90b955f373d8ee8e795fca2b130fc1900b487ef692a1b56cf7f7a8d02b222637bf83361354f1e795713c731eba90fb446f2906a758986c99af1'
@@ -14,6 +14,8 @@ Gem::Specification.new do |spec|
14
14
  spec.homepage = 'https://github.com/mehmetc/data_collector'
15
15
  spec.license = 'MIT'
16
16
 
17
+ spec.required_ruby_version = ">= 3.4"
18
+
17
19
  # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
18
20
  # to allow pushing to a single host or delete this section to allow pushing to any host.
19
21
  if spec.respond_to?(:metadata)
@@ -10,7 +10,7 @@ module DataCollector
10
10
  @mtime = nil
11
11
 
12
12
  def self.version
13
- '0.0.3'
13
+ '0.0.4'
14
14
  end
15
15
 
16
16
  def self.name
@@ -59,7 +59,8 @@ module DataCollector
59
59
 
60
60
  ftime = File.exist?("#{@config_file_path}/#{@config_file_name}") ? File.mtime("#{@config_file_path}/#{@config_file_name}") : nil
61
61
  if @config.nil? || @config.empty? || @mtime != ftime
62
- config = YAML::load_file("#{@config_file_path}/#{@config_file_name}", aliases: true, permitted_classes: [Time, Symbol])
62
+ # config = YAML::load_file("#{@config_file_path}/#{@config_file_name}", aliases: true, permitted_classes: [Time, Symbol])
63
+ config = interpret_yaml_with_expressions("#{@config_file_path}/#{@config_file_name}", ENV)
63
64
  @config = process(config)
64
65
  end
65
66
  end
@@ -92,5 +93,36 @@ module DataCollector
92
93
  private_class_method :init
93
94
  private_class_method :discover_config_file_path
94
95
  private_class_method :process
96
+
97
+ private
98
+ def self.interpret_yaml_with_expressions(yaml_file, variables = {})
99
+ # Read the YAML file content
100
+ content = File.read(yaml_file)
101
+
102
+ # Process any :key: "${expression}" patterns
103
+ processed_content = content.gsub(/:key:\s*"?\$\{([^}]+)\}"?/) do |match|
104
+ expression = $1.strip
105
+
106
+ # Evaluate the expression using the provided variables
107
+ if variables.key?(expression)
108
+ # Replace with the variable value
109
+ match.gsub("${#{expression}}", variables[expression].to_s)
110
+ elsif expression.include?('.')
111
+ # Handle dot notation for nested variables
112
+ parts = expression.split('.')
113
+ value = variables
114
+ parts.each do |part|
115
+ value = value[part.to_sym] if value.is_a?(Hash) && value.key?(part.to_sym)
116
+ end
117
+ value.to_s
118
+ else
119
+ # Keep the original if we can't evaluate
120
+ match
121
+ end
122
+ end
123
+
124
+ # Parse the processed YAML content
125
+ YAML.load(processed_content, aliases: true, permitted_classes: [Time, Symbol])
126
+ end
95
127
  end
96
128
  end
@@ -138,7 +138,9 @@ module DataCollector
138
138
  module_function :error
139
139
 
140
140
  def logger(*destinations)
141
- @logger ||= begin
141
+ @logger if @logger && (@log_destination.eql?(destinations.flatten) || destinations.empty? || destinations.nil?)
142
+ @log_destination = destinations.flatten
143
+ @logger = begin
142
144
  destinations = STDOUT if destinations.nil? || destinations.empty?
143
145
  Logger.new(ProxyLogger.new(destinations))
144
146
  rescue StandardError => e
@@ -32,6 +32,8 @@ module DataCollector
32
32
  data = nil
33
33
  if source.is_a?(StringIO)
34
34
  data = from_stringio(source, options)
35
+ elsif source.respond_to?(:read)
36
+ data = from_tempfile(source, options)
35
37
  else
36
38
  source = CGI.unescapeHTML(source)
37
39
  @logger.info("Reading #{source}")
@@ -153,7 +155,21 @@ module DataCollector
153
155
 
154
156
  case http_response.code
155
157
  when 200..299
156
- @raw = data = http_response.body.to_s
158
+
159
+ if http_response.code == 206
160
+ @logger.debug "HTTP response 206 Partial Content"
161
+ data = http_response.body.readpartial
162
+ loop do
163
+ partial_data = http_response.body.readpartial
164
+ if partial_data.nil? || partial_data.empty?
165
+ break
166
+ end
167
+ data = data + partial_data.to_s
168
+ end
169
+ @raw = data
170
+ else
171
+ @raw = data = http_response.body.to_s
172
+ end
157
173
 
158
174
  file_type = options.with_indifferent_access.has_key?(:content_type) ? options.with_indifferent_access[:content_type] : file_type_from(http_response.headers)
159
175
 
@@ -185,8 +201,6 @@ module DataCollector
185
201
  end
186
202
  end
187
203
 
188
- raise '206 Partial Content' if http_response.code == 206
189
-
190
204
  when 401
191
205
  raise DataCollector::InputError, 'Unauthorized'
192
206
  when 403
@@ -210,6 +224,17 @@ module DataCollector
210
224
  data = JSON.parse(graph.dump(:jsonld, validate: false, standard_prefixes: true))
211
225
  end
212
226
 
227
+ def from_tempfile(tempfile, options = {}, &block)
228
+ #file = Tempfile.new(["dc_", ".#{preferred_extension}"])
229
+ begin
230
+ tempfile.rewind
231
+ #file.write(temp_input_file.read)
232
+ #file.close
233
+ from_file(URI("file://#{tempfile.path}"), options)
234
+ #ensure
235
+ #file.unlink
236
+ end
237
+ end
213
238
  def from_stringio(sio, options = {}, &block)
214
239
  raise DataCollector::InputError, "No IO input" unless sio.is_a?(StringIO)
215
240
  raise DataCollector::InputError, "content_type option not supplied" unless options.key?(:content_type)
@@ -232,11 +257,15 @@ module DataCollector
232
257
  uri = normalize_uri(uri)
233
258
  absolute_path = File.absolute_path(uri)
234
259
  file_type = MIME::Types.type_for(uri).first.to_s
235
- options['file_type'] = options[:content_type] || file_type
260
+ file_type = File.extname(absolute_path) if file_type.empty?
261
+ options['file_type'] = MIME::Types[(options[:content_type] || file_type)].first.preferred_extension
262
+
263
+ options['file_extention'] = ".#{options['file_type']}"
236
264
  raise DataCollector::Error, "#{uri.to_s} not found" unless File.exist?("#{absolute_path}")
237
265
  unless options.has_key?('raw') && options['raw'] == true
238
266
  @raw = data = File.read("#{absolute_path}")
239
- case File.extname(absolute_path)
267
+
268
+ case options['file_extention']
240
269
  when '.jsonld'
241
270
  data = JSON.parse(data)
242
271
  when '.json'
@@ -1,4 +1,4 @@
1
1
  # encoding: utf-8
2
2
  module DataCollector
3
- VERSION = "0.60.0"
3
+ VERSION = "0.62.0"
4
4
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_collector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.60.0
4
+ version: 0.62.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mehmet Celik
8
8
  bindir: exe
9
9
  cert_chain: []
10
- date: 2025-02-24 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: activesupport
@@ -400,14 +400,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
400
400
  requirements:
401
401
  - - ">="
402
402
  - !ruby/object:Gem::Version
403
- version: '0'
403
+ version: '3.4'
404
404
  required_rubygems_version: !ruby/object:Gem::Requirement
405
405
  requirements:
406
406
  - - ">="
407
407
  - !ruby/object:Gem::Version
408
408
  version: '0'
409
409
  requirements: []
410
- rubygems_version: 3.6.2
410
+ rubygems_version: 3.6.8
411
411
  specification_version: 4
412
412
  summary: ETL helper library
413
413
  test_files: []