data_collector 0.24.0 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7745e79eb3836ab3c469cc5da39f395d42c144940b98115c96058fb01f8a629c
4
- data.tar.gz: 77aeb246a6a23477d07195c020091999825af21c9639c0a9679017daefeea9e9
3
+ metadata.gz: 8065c0d1b54cf1c39be5cfcb70a4fae1f33b02021182c75538ed03e73393d35a
4
+ data.tar.gz: 879054ad24b178f08bfea7914c5fbc593c32c611bd4e3ad833869a3e5b36d5b1
5
5
  SHA512:
6
- metadata.gz: e48596ac6e5fc14be89c2aadc50416558fa4a08594d28cccee630c1157dc365a556999c82d303e4669c94e1db88d6b0cf3f044730d0057586220a6c3172b72a6
7
- data.tar.gz: 2112dfa9191e8aa948317a16d581a9ea487327030cb013b43937757c144c33ca4fc975faee91fd6731d8f043ed0cefd5ecc28a562351c5de9b96196167871e36
6
+ metadata.gz: 390ac889c52055cfd8f5326c6e7c1549faee6b8c41af4535b9fc5d3038701f62c441caaf41895d7ad64b4941e609fdee32ac745255b9cb2fbc0981d787b00847
7
+ data.tar.gz: dbc57c97f30e5659ccfebba0850e99eee24ecada4a5e9a136ae6b036f19b587a4d66460e509c32d36be13cd4d745ed89f9a73e11a32b1b15fe800036c1836bea
data/README.md CHANGED
@@ -15,6 +15,11 @@ You can set a schedule for pipelines that are triggered by new data, specifying
15
15
  executed in the [ISO8601 duration format](https://www.digi.com/resources/documentation/digidocs//90001488-13/reference/r_iso_8601_duration_format.htm). The processing logic is then executed.
16
16
  ###### methods:
17
17
  - .new(options): options can be schedule in [ISO8601 duration format](https://www.digi.com/resources/documentation/digidocs//90001488-13/reference/r_iso_8601_duration_format.htm) and name
18
+ - options:
19
+ - name: pipeline name
20
+ - schedule: [ISO8601 duration format](https://www.digi.com/resources/documentation/digidocs//90001488-13/reference/r_iso_8601_duration_format.htm)
21
+ - cron: in cron format ex. '1 12 * * *' intervals are not supported
22
+ - uri: a directory/file to watch
18
23
  - .run: start the pipeline. blocking if a schedule is supplied
19
24
  - .stop: stop the pipeline
20
25
  - .pause: pause the pipeline. Restart using .run
@@ -37,6 +42,19 @@ end
37
42
  pipeline.run
38
43
  ```
39
44
 
45
+ ```ruby
46
+ #create a pipeline scheduled to run every morning at 06:00 am
47
+ pipeline = Pipeline.new(schedule: '0 6 * * *')
48
+
49
+ pipeline.on_message do |input, output|
50
+ data = input.from_uri("https://dummyjson.com/comments?limit=10")
51
+ # process data
52
+ end
53
+
54
+ pipeline.run
55
+ ```
56
+
57
+
40
58
  ```ruby
41
59
  #create a pipeline to listen and process files in a directory
42
60
  extract = DataCollector::Pipeline.new(name: 'extract', uri: 'file://./data/in')
@@ -50,6 +50,7 @@ Gem::Specification.new do |spec|
50
50
  spec.add_runtime_dependency 'bunny', '~> 2.20'
51
51
  spec.add_runtime_dependency 'bunny_burrow', '~> 1.5'
52
52
  spec.add_runtime_dependency 'builder', '~> 3.2'
53
+ spec.add_runtime_dependency 'parse-cron', '~> 0.1'
53
54
 
54
55
  spec.add_development_dependency 'bundler', '~> 2.3'
55
56
  spec.add_development_dependency 'minitest', '~> 5.18'
@@ -1,4 +1,5 @@
1
1
  require 'iso8601'
2
+ require 'parse-cron'
2
3
 
3
4
  module DataCollector
4
5
  class Pipeline
@@ -12,6 +13,7 @@ module DataCollector
12
13
  @run_count = 0
13
14
 
14
15
  @schedule = options[:schedule] || {}
16
+ @cron = options[:cron || '']
15
17
  @name = options[:name] || "pipeline-#{Time.now.to_i}-#{rand(10000)}"
16
18
  @options = options
17
19
  @listeners = []
@@ -43,6 +45,20 @@ module DataCollector
43
45
 
44
46
  DataCollector::Core.log("PIPELINE running in #{interval.size} seconds")
45
47
  sleep interval.size
48
+ handle_on_message(@input, @output) unless paused?
49
+ end
50
+ elsif @cron && !@cron.empty?
51
+ cron_parser = CronParser.new(@cron)
52
+ while running?
53
+ @run_count += 1
54
+ start_time = ISO8601::DateTime.new(Time.now.to_datetime.to_s)
55
+ next_run = cron_parser.next(start_time.to_time)
56
+
57
+ interval = ISO8601::TimeInterval.from_datetimes(start_time, ISO8601::DateTime.new(next_run.to_datetime.to_s))
58
+
59
+ DataCollector::Core.log("PIPELINE running at #{next_run.to_datetime.strftime('%Y-%m-%dT%H:%M:%S')} or in #{interval.size} seconds")
60
+ sleep interval.size
61
+
46
62
  handle_on_message(@input, @output) unless paused?
47
63
  end
48
64
  else # run once
@@ -1,4 +1,4 @@
1
1
  # encoding: utf-8
2
2
  module DataCollector
3
- VERSION = "0.24.0"
3
+ VERSION = "0.25.0"
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_collector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.24.0
4
+ version: 0.25.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mehmet Celik
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-07-24 00:00:00.000000000 Z
11
+ date: 2023-07-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -206,6 +206,20 @@ dependencies:
206
206
  - - "~>"
207
207
  - !ruby/object:Gem::Version
208
208
  version: '3.2'
209
+ - !ruby/object:Gem::Dependency
210
+ name: parse-cron
211
+ requirement: !ruby/object:Gem::Requirement
212
+ requirements:
213
+ - - "~>"
214
+ - !ruby/object:Gem::Version
215
+ version: '0.1'
216
+ type: :runtime
217
+ prerelease: false
218
+ version_requirements: !ruby/object:Gem::Requirement
219
+ requirements:
220
+ - - "~>"
221
+ - !ruby/object:Gem::Version
222
+ version: '0.1'
209
223
  - !ruby/object:Gem::Dependency
210
224
  name: bundler
211
225
  requirement: !ruby/object:Gem::Requirement
@@ -319,7 +333,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
319
333
  - !ruby/object:Gem::Version
320
334
  version: '0'
321
335
  requirements: []
322
- rubygems_version: 3.4.10
336
+ rubygems_version: 3.4.13
323
337
  signing_key:
324
338
  specification_version: 4
325
339
  summary: ETL helper library