data_collector 0.24.0 → 0.25.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7745e79eb3836ab3c469cc5da39f395d42c144940b98115c96058fb01f8a629c
4
- data.tar.gz: 77aeb246a6a23477d07195c020091999825af21c9639c0a9679017daefeea9e9
3
+ metadata.gz: 8065c0d1b54cf1c39be5cfcb70a4fae1f33b02021182c75538ed03e73393d35a
4
+ data.tar.gz: 879054ad24b178f08bfea7914c5fbc593c32c611bd4e3ad833869a3e5b36d5b1
5
5
  SHA512:
6
- metadata.gz: e48596ac6e5fc14be89c2aadc50416558fa4a08594d28cccee630c1157dc365a556999c82d303e4669c94e1db88d6b0cf3f044730d0057586220a6c3172b72a6
7
- data.tar.gz: 2112dfa9191e8aa948317a16d581a9ea487327030cb013b43937757c144c33ca4fc975faee91fd6731d8f043ed0cefd5ecc28a562351c5de9b96196167871e36
6
+ metadata.gz: 390ac889c52055cfd8f5326c6e7c1549faee6b8c41af4535b9fc5d3038701f62c441caaf41895d7ad64b4941e609fdee32ac745255b9cb2fbc0981d787b00847
7
+ data.tar.gz: dbc57c97f30e5659ccfebba0850e99eee24ecada4a5e9a136ae6b036f19b587a4d66460e509c32d36be13cd4d745ed89f9a73e11a32b1b15fe800036c1836bea
data/README.md CHANGED
@@ -15,6 +15,11 @@ You can set a schedule for pipelines that are triggered by new data, specifying
15
15
  executed in the [ISO8601 duration format](https://www.digi.com/resources/documentation/digidocs//90001488-13/reference/r_iso_8601_duration_format.htm). The processing logic is then executed.
16
16
  ###### methods:
17
17
  - .new(options): options can be schedule in [ISO8601 duration format](https://www.digi.com/resources/documentation/digidocs//90001488-13/reference/r_iso_8601_duration_format.htm) and name
18
+ - options:
19
+ - name: pipeline name
20
+ - schedule: [ISO8601 duration format](https://www.digi.com/resources/documentation/digidocs//90001488-13/reference/r_iso_8601_duration_format.htm)
21
+ - cron: in cron format ex. '1 12 * * *' intervals are not supported
22
+ - uri: a directory/file to watch
18
23
  - .run: start the pipeline. blocking if a schedule is supplied
19
24
  - .stop: stop the pipeline
20
25
  - .pause: pause the pipeline. Restart using .run
@@ -37,6 +42,19 @@ end
37
42
  pipeline.run
38
43
  ```
39
44
 
45
+ ```ruby
46
+ #create a pipeline scheduled to run every morning at 06:00 am
47
+ pipeline = Pipeline.new(schedule: '0 6 * * *')
48
+
49
+ pipeline.on_message do |input, output|
50
+ data = input.from_uri("https://dummyjson.com/comments?limit=10")
51
+ # process data
52
+ end
53
+
54
+ pipeline.run
55
+ ```
56
+
57
+
40
58
  ```ruby
41
59
  #create a pipeline to listen and process files in a directory
42
60
  extract = DataCollector::Pipeline.new(name: 'extract', uri: 'file://./data/in')
@@ -50,6 +50,7 @@ Gem::Specification.new do |spec|
50
50
  spec.add_runtime_dependency 'bunny', '~> 2.20'
51
51
  spec.add_runtime_dependency 'bunny_burrow', '~> 1.5'
52
52
  spec.add_runtime_dependency 'builder', '~> 3.2'
53
+ spec.add_runtime_dependency 'parse-cron', '~> 0.1'
53
54
 
54
55
  spec.add_development_dependency 'bundler', '~> 2.3'
55
56
  spec.add_development_dependency 'minitest', '~> 5.18'
@@ -1,4 +1,5 @@
1
1
  require 'iso8601'
2
+ require 'parse-cron'
2
3
 
3
4
  module DataCollector
4
5
  class Pipeline
@@ -12,6 +13,7 @@ module DataCollector
12
13
  @run_count = 0
13
14
 
14
15
  @schedule = options[:schedule] || {}
16
+ @cron = options[:cron || '']
15
17
  @name = options[:name] || "pipeline-#{Time.now.to_i}-#{rand(10000)}"
16
18
  @options = options
17
19
  @listeners = []
@@ -43,6 +45,20 @@ module DataCollector
43
45
 
44
46
  DataCollector::Core.log("PIPELINE running in #{interval.size} seconds")
45
47
  sleep interval.size
48
+ handle_on_message(@input, @output) unless paused?
49
+ end
50
+ elsif @cron && !@cron.empty?
51
+ cron_parser = CronParser.new(@cron)
52
+ while running?
53
+ @run_count += 1
54
+ start_time = ISO8601::DateTime.new(Time.now.to_datetime.to_s)
55
+ next_run = cron_parser.next(start_time.to_time)
56
+
57
+ interval = ISO8601::TimeInterval.from_datetimes(start_time, ISO8601::DateTime.new(next_run.to_datetime.to_s))
58
+
59
+ DataCollector::Core.log("PIPELINE running at #{next_run.to_datetime.strftime('%Y-%m-%dT%H:%M:%S')} or in #{interval.size} seconds")
60
+ sleep interval.size
61
+
46
62
  handle_on_message(@input, @output) unless paused?
47
63
  end
48
64
  else # run once
@@ -1,4 +1,4 @@
1
1
  # encoding: utf-8
2
2
  module DataCollector
3
- VERSION = "0.24.0"
3
+ VERSION = "0.25.0"
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_collector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.24.0
4
+ version: 0.25.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mehmet Celik
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-07-24 00:00:00.000000000 Z
11
+ date: 2023-07-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -206,6 +206,20 @@ dependencies:
206
206
  - - "~>"
207
207
  - !ruby/object:Gem::Version
208
208
  version: '3.2'
209
+ - !ruby/object:Gem::Dependency
210
+ name: parse-cron
211
+ requirement: !ruby/object:Gem::Requirement
212
+ requirements:
213
+ - - "~>"
214
+ - !ruby/object:Gem::Version
215
+ version: '0.1'
216
+ type: :runtime
217
+ prerelease: false
218
+ version_requirements: !ruby/object:Gem::Requirement
219
+ requirements:
220
+ - - "~>"
221
+ - !ruby/object:Gem::Version
222
+ version: '0.1'
209
223
  - !ruby/object:Gem::Dependency
210
224
  name: bundler
211
225
  requirement: !ruby/object:Gem::Requirement
@@ -319,7 +333,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
319
333
  - !ruby/object:Gem::Version
320
334
  version: '0'
321
335
  requirements: []
322
- rubygems_version: 3.4.10
336
+ rubygems_version: 3.4.13
323
337
  signing_key:
324
338
  specification_version: 4
325
339
  summary: ETL helper library