data_collector 0.24.0 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +18 -0
- data/data_collector.gemspec +1 -0
- data/lib/data_collector/pipeline.rb +16 -0
- data/lib/data_collector/version.rb +1 -1
- metadata +17 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8065c0d1b54cf1c39be5cfcb70a4fae1f33b02021182c75538ed03e73393d35a
|
4
|
+
data.tar.gz: 879054ad24b178f08bfea7914c5fbc593c32c611bd4e3ad833869a3e5b36d5b1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 390ac889c52055cfd8f5326c6e7c1549faee6b8c41af4535b9fc5d3038701f62c441caaf41895d7ad64b4941e609fdee32ac745255b9cb2fbc0981d787b00847
|
7
|
+
data.tar.gz: dbc57c97f30e5659ccfebba0850e99eee24ecada4a5e9a136ae6b036f19b587a4d66460e509c32d36be13cd4d745ed89f9a73e11a32b1b15fe800036c1836bea
|
data/README.md
CHANGED
@@ -15,6 +15,11 @@ You can set a schedule for pipelines that are triggered by new data, specifying
|
|
15
15
|
executed in the [ISO8601 duration format](https://www.digi.com/resources/documentation/digidocs//90001488-13/reference/r_iso_8601_duration_format.htm). The processing logic is then executed.
|
16
16
|
###### methods:
|
17
17
|
- .new(options): options can be schedule in [ISO8601 duration format](https://www.digi.com/resources/documentation/digidocs//90001488-13/reference/r_iso_8601_duration_format.htm) and name
|
18
|
+
- options:
|
19
|
+
- name: pipeline name
|
20
|
+
- schedule: [ISO8601 duration format](https://www.digi.com/resources/documentation/digidocs//90001488-13/reference/r_iso_8601_duration_format.htm)
|
21
|
+
- cron: in cron format ex. '1 12 * * *' intervals are not supported
|
22
|
+
- uri: a directory/file to watch
|
18
23
|
- .run: start the pipeline. blocking if a schedule is supplied
|
19
24
|
- .stop: stop the pipeline
|
20
25
|
- .pause: pause the pipeline. Restart using .run
|
@@ -37,6 +42,19 @@ end
|
|
37
42
|
pipeline.run
|
38
43
|
```
|
39
44
|
|
45
|
+
```ruby
|
46
|
+
#create a pipeline scheduled to run every morning at 06:00 am
|
47
|
+
pipeline = Pipeline.new(schedule: '0 6 * * *')
|
48
|
+
|
49
|
+
pipeline.on_message do |input, output|
|
50
|
+
data = input.from_uri("https://dummyjson.com/comments?limit=10")
|
51
|
+
# process data
|
52
|
+
end
|
53
|
+
|
54
|
+
pipeline.run
|
55
|
+
```
|
56
|
+
|
57
|
+
|
40
58
|
```ruby
|
41
59
|
#create a pipeline to listen and process files in a directory
|
42
60
|
extract = DataCollector::Pipeline.new(name: 'extract', uri: 'file://./data/in')
|
data/data_collector.gemspec
CHANGED
@@ -50,6 +50,7 @@ Gem::Specification.new do |spec|
|
|
50
50
|
spec.add_runtime_dependency 'bunny', '~> 2.20'
|
51
51
|
spec.add_runtime_dependency 'bunny_burrow', '~> 1.5'
|
52
52
|
spec.add_runtime_dependency 'builder', '~> 3.2'
|
53
|
+
spec.add_runtime_dependency 'parse-cron', '~> 0.1'
|
53
54
|
|
54
55
|
spec.add_development_dependency 'bundler', '~> 2.3'
|
55
56
|
spec.add_development_dependency 'minitest', '~> 5.18'
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'iso8601'
|
2
|
+
require 'parse-cron'
|
2
3
|
|
3
4
|
module DataCollector
|
4
5
|
class Pipeline
|
@@ -12,6 +13,7 @@ module DataCollector
|
|
12
13
|
@run_count = 0
|
13
14
|
|
14
15
|
@schedule = options[:schedule] || {}
|
16
|
+
@cron = options[:cron || '']
|
15
17
|
@name = options[:name] || "pipeline-#{Time.now.to_i}-#{rand(10000)}"
|
16
18
|
@options = options
|
17
19
|
@listeners = []
|
@@ -43,6 +45,20 @@ module DataCollector
|
|
43
45
|
|
44
46
|
DataCollector::Core.log("PIPELINE running in #{interval.size} seconds")
|
45
47
|
sleep interval.size
|
48
|
+
handle_on_message(@input, @output) unless paused?
|
49
|
+
end
|
50
|
+
elsif @cron && !@cron.empty?
|
51
|
+
cron_parser = CronParser.new(@cron)
|
52
|
+
while running?
|
53
|
+
@run_count += 1
|
54
|
+
start_time = ISO8601::DateTime.new(Time.now.to_datetime.to_s)
|
55
|
+
next_run = cron_parser.next(start_time.to_time)
|
56
|
+
|
57
|
+
interval = ISO8601::TimeInterval.from_datetimes(start_time, ISO8601::DateTime.new(next_run.to_datetime.to_s))
|
58
|
+
|
59
|
+
DataCollector::Core.log("PIPELINE running at #{next_run.to_datetime.strftime('%Y-%m-%dT%H:%M:%S')} or in #{interval.size} seconds")
|
60
|
+
sleep interval.size
|
61
|
+
|
46
62
|
handle_on_message(@input, @output) unless paused?
|
47
63
|
end
|
48
64
|
else # run once
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_collector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.25.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mehmet Celik
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-07-
|
11
|
+
date: 2023-07-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -206,6 +206,20 @@ dependencies:
|
|
206
206
|
- - "~>"
|
207
207
|
- !ruby/object:Gem::Version
|
208
208
|
version: '3.2'
|
209
|
+
- !ruby/object:Gem::Dependency
|
210
|
+
name: parse-cron
|
211
|
+
requirement: !ruby/object:Gem::Requirement
|
212
|
+
requirements:
|
213
|
+
- - "~>"
|
214
|
+
- !ruby/object:Gem::Version
|
215
|
+
version: '0.1'
|
216
|
+
type: :runtime
|
217
|
+
prerelease: false
|
218
|
+
version_requirements: !ruby/object:Gem::Requirement
|
219
|
+
requirements:
|
220
|
+
- - "~>"
|
221
|
+
- !ruby/object:Gem::Version
|
222
|
+
version: '0.1'
|
209
223
|
- !ruby/object:Gem::Dependency
|
210
224
|
name: bundler
|
211
225
|
requirement: !ruby/object:Gem::Requirement
|
@@ -319,7 +333,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
319
333
|
- !ruby/object:Gem::Version
|
320
334
|
version: '0'
|
321
335
|
requirements: []
|
322
|
-
rubygems_version: 3.4.
|
336
|
+
rubygems_version: 3.4.13
|
323
337
|
signing_key:
|
324
338
|
specification_version: 4
|
325
339
|
summary: ETL helper library
|