data_collector 0.23.0 → 0.25.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1a6db4367b12a41101620cc818a1995868aab20b65157de8c19ea60c68aa90a0
4
- data.tar.gz: 39a2cbcd665f85d71bda741a131c54d4f2871ae83ae63b86c9eed9f0d84e405a
3
+ metadata.gz: 8065c0d1b54cf1c39be5cfcb70a4fae1f33b02021182c75538ed03e73393d35a
4
+ data.tar.gz: 879054ad24b178f08bfea7914c5fbc593c32c611bd4e3ad833869a3e5b36d5b1
5
5
  SHA512:
6
- metadata.gz: 5d7e6e243f9693603c4502b2f1bbe2fb64b8cf95718be257066940d6fafed24a8352ded805f285b617e25f5fbd703bf6e812dbd9f2b85d5c22cc143e7cdfdc1e
7
- data.tar.gz: 73b0c03ef6ccb7855cfc1b6f424c543cc00fc79e182ccc2525c068f3b8d97535e4eaaaa66b0fd59225049d6a0dfb3dc55861319638dfb4c6df489ede6c1f9077
6
+ metadata.gz: 390ac889c52055cfd8f5326c6e7c1549faee6b8c41af4535b9fc5d3038701f62c441caaf41895d7ad64b4941e609fdee32ac745255b9cb2fbc0981d787b00847
7
+ data.tar.gz: dbc57c97f30e5659ccfebba0850e99eee24ecada4a5e9a136ae6b036f19b587a4d66460e509c32d36be13cd4d745ed89f9a73e11a32b1b15fe800036c1836bea
data/README.md CHANGED
@@ -15,6 +15,11 @@ You can set a schedule for pipelines that are triggered by new data, specifying
15
15
  executed in the [ISO8601 duration format](https://www.digi.com/resources/documentation/digidocs//90001488-13/reference/r_iso_8601_duration_format.htm). The processing logic is then executed.
16
16
  ###### methods:
17
17
  - .new(options): options can be schedule in [ISO8601 duration format](https://www.digi.com/resources/documentation/digidocs//90001488-13/reference/r_iso_8601_duration_format.htm) and name
18
+ - options:
19
+ - name: pipeline name
20
+ - schedule: [ISO8601 duration format](https://www.digi.com/resources/documentation/digidocs//90001488-13/reference/r_iso_8601_duration_format.htm)
21
+ - cron: in cron format ex. '1 12 * * *' intervals are not supported
22
+ - uri: a directory/file to watch
18
23
  - .run: start the pipeline. blocking if a schedule is supplied
19
24
  - .stop: stop the pipeline
20
25
  - .pause: pause the pipeline. Restart using .run
@@ -37,6 +42,19 @@ end
37
42
  pipeline.run
38
43
  ```
39
44
 
45
+ ```ruby
46
+ #create a pipeline scheduled to run every morning at 06:00 am
47
+ pipeline = Pipeline.new(schedule: '0 6 * * *')
48
+
49
+ pipeline.on_message do |input, output|
50
+ data = input.from_uri("https://dummyjson.com/comments?limit=10")
51
+ # process data
52
+ end
53
+
54
+ pipeline.run
55
+ ```
56
+
57
+
40
58
  ```ruby
41
59
  #create a pipeline to listen and process files in a directory
42
60
  extract = DataCollector::Pipeline.new(name: 'extract', uri: 'file://./data/in')
@@ -50,6 +50,7 @@ Gem::Specification.new do |spec|
50
50
  spec.add_runtime_dependency 'bunny', '~> 2.20'
51
51
  spec.add_runtime_dependency 'bunny_burrow', '~> 1.5'
52
52
  spec.add_runtime_dependency 'builder', '~> 3.2'
53
+ spec.add_runtime_dependency 'parse-cron', '~> 0.1'
53
54
 
54
55
  spec.add_development_dependency 'bundler', '~> 2.3'
55
56
  spec.add_development_dependency 'minitest', '~> 5.18'
@@ -26,7 +26,7 @@ module DataCollector
26
26
  private
27
27
 
28
28
  def create_listener
29
- absolute_path = File.absolute_path("#{URI.decode_uri_component(@uri.to_s)}")
29
+ absolute_path = File.absolute_path("#{@uri.host}#{@uri.path}")
30
30
  raise DataCollector::Error, "#{@uri.to_s} not found" unless File.exist?(absolute_path)
31
31
 
32
32
  @listener ||= Listen.to(absolute_path, @options) do |modified, added, _|
@@ -4,7 +4,7 @@ module DataCollector
4
4
  class Input
5
5
  class Generic
6
6
  def initialize(uri, options = {})
7
- @uri = URI(URI.decode_uri_component(uri.to_s))
7
+ @uri = URI(URI.decode_uri_component(uri.to_s)) #"#{uri.scheme}://#{URI.decode_uri_component(uri.host)}#{URI.decode_uri_component(uri.path)}"
8
8
  @options = options
9
9
  @running = false
10
10
 
@@ -160,7 +160,8 @@ module DataCollector
160
160
 
161
161
  def from_file(uri, options = {})
162
162
  data = nil
163
- absolute_path = File.absolute_path("#{URI.decode_uri_component(uri.to_s)}")
163
+ uri = normalize_uri(uri)
164
+ absolute_path = File.absolute_path(uri)
164
165
  raise DataCollector::Error, "#{uri.to_s} not found" unless File.exist?("#{absolute_path}")
165
166
  unless options.has_key?('raw') && options['raw'] == true
166
167
  @raw = data = File.read("#{absolute_path}")
@@ -233,5 +234,8 @@ module DataCollector
233
234
  return file_type
234
235
  end
235
236
 
237
+ def normalize_uri(uri)
238
+ "#{URI.decode_uri_component(uri.host)}#{URI.decode_uri_component(uri.path)}"
239
+ end
236
240
  end
237
241
  end
@@ -1,4 +1,5 @@
1
1
  require 'iso8601'
2
+ require 'parse-cron'
2
3
 
3
4
  module DataCollector
4
5
  class Pipeline
@@ -12,6 +13,7 @@ module DataCollector
12
13
  @run_count = 0
13
14
 
14
15
  @schedule = options[:schedule] || {}
16
+ @cron = options[:cron || '']
15
17
  @name = options[:name] || "pipeline-#{Time.now.to_i}-#{rand(10000)}"
16
18
  @options = options
17
19
  @listeners = []
@@ -43,6 +45,20 @@ module DataCollector
43
45
 
44
46
  DataCollector::Core.log("PIPELINE running in #{interval.size} seconds")
45
47
  sleep interval.size
48
+ handle_on_message(@input, @output) unless paused?
49
+ end
50
+ elsif @cron && !@cron.empty?
51
+ cron_parser = CronParser.new(@cron)
52
+ while running?
53
+ @run_count += 1
54
+ start_time = ISO8601::DateTime.new(Time.now.to_datetime.to_s)
55
+ next_run = cron_parser.next(start_time.to_time)
56
+
57
+ interval = ISO8601::TimeInterval.from_datetimes(start_time, ISO8601::DateTime.new(next_run.to_datetime.to_s))
58
+
59
+ DataCollector::Core.log("PIPELINE running at #{next_run.to_datetime.strftime('%Y-%m-%dT%H:%M:%S')} or in #{interval.size} seconds")
60
+ sleep interval.size
61
+
46
62
  handle_on_message(@input, @output) unless paused?
47
63
  end
48
64
  else # run once
@@ -1,4 +1,4 @@
1
1
  # encoding: utf-8
2
2
  module DataCollector
3
- VERSION = "0.23.0"
3
+ VERSION = "0.25.0"
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_collector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.23.0
4
+ version: 0.25.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mehmet Celik
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-07-24 00:00:00.000000000 Z
11
+ date: 2023-07-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -206,6 +206,20 @@ dependencies:
206
206
  - - "~>"
207
207
  - !ruby/object:Gem::Version
208
208
  version: '3.2'
209
+ - !ruby/object:Gem::Dependency
210
+ name: parse-cron
211
+ requirement: !ruby/object:Gem::Requirement
212
+ requirements:
213
+ - - "~>"
214
+ - !ruby/object:Gem::Version
215
+ version: '0.1'
216
+ type: :runtime
217
+ prerelease: false
218
+ version_requirements: !ruby/object:Gem::Requirement
219
+ requirements:
220
+ - - "~>"
221
+ - !ruby/object:Gem::Version
222
+ version: '0.1'
209
223
  - !ruby/object:Gem::Dependency
210
224
  name: bundler
211
225
  requirement: !ruby/object:Gem::Requirement
@@ -319,7 +333,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
319
333
  - !ruby/object:Gem::Version
320
334
  version: '0'
321
335
  requirements: []
322
- rubygems_version: 3.4.10
336
+ rubygems_version: 3.4.13
323
337
  signing_key:
324
338
  specification_version: 4
325
339
  summary: ETL helper library