ductr 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c0d2e2a83217838bd12d781d75ddf8d2ceee68a276f25250033cfd88ec5fe03c
4
- data.tar.gz: 4e694303f68f0603ad5e3053cfcab2d1320687e19b7d2eb3923f15d68c4dd901
3
+ metadata.gz: 8ba418763bfafcbc8f42d9f78c3277ab24d7feac82f77aad21ddef34d9967c4a
4
+ data.tar.gz: 3a05f4e233052037ca21c0b8e4dfe374a09f0745cc1b07128f6111b55107088f
5
5
  SHA512:
6
- metadata.gz: 63f82f601d39aeda829260eafdc390ad0ea506c6a9510876ed5703266cd0496b24942a18337807f52c55a1df3617c5fff84476ed4e5ce35d18a6c396cc5431cd
7
- data.tar.gz: 0da4ed8bfc3272d8ee4e0a542a60b6bb0266f44fee2c59f2318f93b1c4a759579707e78ebe637bd70c6b40f6e806159cf3cecd4b423c5efe4e4a8e0ef91b1fc8
6
+ metadata.gz: 14f51aca717bf8d342f384756ed496261ca8626e3a010eb9790292a34f05e1419ee7e200618eb7e3d3a6bae8dfeb547a04fe7a2a69fa28a5d435a6af2e9d20df
7
+ data.tar.gz: bf8e6b8e2ab6941b66db0bb17a3360c44dd797ad04dab0ebbf245335090fe61d5b90b78fa6fe6266d4efd6a05ab11bf13eac7488b11583c3bb13ea7066275c75
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- ductr (0.1.2)
4
+ ductr (0.2.0)
5
5
  activejob (~> 7.0)
6
6
  annotable (~> 0.1)
7
7
  colorize (~> 0.8)
@@ -27,8 +27,8 @@ GEM
27
27
  commander (4.6.0)
28
28
  highline (~> 2.0.0)
29
29
  concurrent-ruby (1.2.2)
30
- debug (1.6.3)
31
- irb (>= 1.3.6)
30
+ debug (1.7.2)
31
+ irb (>= 1.5.0)
32
32
  reline (>= 0.3.1)
33
33
  diff-lcs (1.5.0)
34
34
  et-orbi (1.2.7)
@@ -41,10 +41,10 @@ GEM
41
41
  highline (2.0.3)
42
42
  i18n (1.12.0)
43
43
  concurrent-ruby (~> 1.0)
44
- io-console (0.5.11)
45
- irb (1.4.2)
44
+ io-console (0.6.0)
45
+ irb (1.6.3)
46
46
  reline (>= 0.3.0)
47
- json (2.6.2)
47
+ json (2.6.3)
48
48
  kiba (4.0.0)
49
49
  minitest (5.18.0)
50
50
  parallel (1.22.1)
@@ -53,44 +53,44 @@ GEM
53
53
  parser
54
54
  rainbow (~> 3.0)
55
55
  sorbet-runtime (>= 0.5)
56
- parser (3.1.2.1)
56
+ parser (3.2.2.0)
57
57
  ast (~> 2.4.1)
58
58
  raabro (1.4.0)
59
59
  rainbow (3.1.1)
60
60
  rake (13.0.6)
61
- regexp_parser (2.6.0)
62
- reline (0.3.1)
61
+ regexp_parser (2.7.0)
62
+ reline (0.3.3)
63
63
  io-console (~> 0.5)
64
64
  rexml (3.2.5)
65
65
  rspec (3.12.0)
66
66
  rspec-core (~> 3.12.0)
67
67
  rspec-expectations (~> 3.12.0)
68
68
  rspec-mocks (~> 3.12.0)
69
- rspec-core (3.12.0)
69
+ rspec-core (3.12.1)
70
70
  rspec-support (~> 3.12.0)
71
- rspec-expectations (3.12.0)
71
+ rspec-expectations (3.12.2)
72
72
  diff-lcs (>= 1.2.0, < 2.0)
73
73
  rspec-support (~> 3.12.0)
74
- rspec-mocks (3.12.0)
74
+ rspec-mocks (3.12.5)
75
75
  diff-lcs (>= 1.2.0, < 2.0)
76
76
  rspec-support (~> 3.12.0)
77
77
  rspec-support (3.12.0)
78
- rubocop (1.38.0)
78
+ rubocop (1.48.1)
79
79
  json (~> 2.3)
80
80
  parallel (~> 1.10)
81
- parser (>= 3.1.2.1)
81
+ parser (>= 3.2.0.0)
82
82
  rainbow (>= 2.2.2, < 4.0)
83
83
  regexp_parser (>= 1.8, < 3.0)
84
84
  rexml (>= 3.2.5, < 4.0)
85
- rubocop-ast (>= 1.23.0, < 2.0)
85
+ rubocop-ast (>= 1.26.0, < 2.0)
86
86
  ruby-progressbar (~> 1.7)
87
- unicode-display_width (>= 1.4.0, < 3.0)
88
- rubocop-ast (1.23.0)
89
- parser (>= 3.1.1.0)
90
- ruby-progressbar (1.11.0)
87
+ unicode-display_width (>= 2.4.0, < 3.0)
88
+ rubocop-ast (1.28.0)
89
+ parser (>= 3.2.1.0)
90
+ ruby-progressbar (1.13.0)
91
91
  rufus-scheduler (3.8.2)
92
92
  fugit (~> 1.1, >= 1.1.6)
93
- sorbet-runtime (0.5.10712)
93
+ sorbet-runtime (0.5.10746)
94
94
  sord (4.0.0)
95
95
  commander (~> 4.5)
96
96
  parlour (~> 5.0)
@@ -99,7 +99,7 @@ GEM
99
99
  thor (1.2.1)
100
100
  tzinfo (2.0.6)
101
101
  concurrent-ruby (~> 1.0)
102
- unicode-display_width (2.3.0)
102
+ unicode-display_width (2.4.2)
103
103
  webrick (1.7.0)
104
104
  yard (0.9.28)
105
105
  webrick (~> 1.7.0)
@@ -118,4 +118,4 @@ DEPENDENCIES
118
118
  yard (~> 0.9)
119
119
 
120
120
  BUNDLED WITH
121
- 2.3.7
121
+ 2.4.8
@@ -42,6 +42,8 @@ module Ductr
42
42
  copy_file "gemfile.rb", "Gemfile"
43
43
  copy_file "rubocop.yml", ".rubocop.yml"
44
44
 
45
+ create_file "config/initializers/.gitkeep"
46
+ create_file "lib/.gitkeep"
45
47
  create_file "app/jobs/.gitkeep"
46
48
  create_file "app/pipelines/.gitkeep"
47
49
  create_file "app/schedulers/.gitkeep"
@@ -3,3 +3,5 @@
3
3
  require "ductr"
4
4
 
5
5
  require_relative "environment/#{Ductr.env}"
6
+
7
+ Dir[File.join(__dir__, "initializers", "*.rb")].each { |file| require file }
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ module Sequel
5
+ #
6
+ # The base class of all sequel-based adapters.
7
+ #
8
+ class Adapter < Ductr::Adapter
9
+ # @return [Sequel::Database, nil] The database connection instance
10
+ attr_reader :db
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ module Sequel
5
+ #
6
+ # A destination control that write rows one by one.
7
+ #
8
+ class BasicDestination < Ductr::ETL::Destination
9
+ #
10
+ # Opens the database if needed and call the job's method to insert one row at time.
11
+ #
12
+ # @param [Hash<Symbol, Object>] row The row to insert, preferably a Hash
13
+ #
14
+ # @return [void]
15
+ #
16
+ def write(row)
17
+ call_method(adapter.db, row)
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ module Sequel
5
+ #
6
+ # A lookup control that execute one query per row.
7
+ #
8
+ class BasicLookup < Ductr::ETL::Transform
9
+ #
10
+ # Calls the job's method to merge its result with the current row.
11
+ #
12
+ # @param [Hash<Symbol, Object>] row The current row, preferably a Hash
13
+ #
14
+ # @return [Hash<Symbol, Object>] The row merged with looked up row or the untouched row if nothing was found
15
+ #
16
+ def process(row)
17
+ matching_row = call_method(adapter.db, row).first
18
+ return row unless matching_row
19
+
20
+ row.merge matching_row
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ module Sequel
5
+ #
6
+ # A source control that yields rows one by one.
7
+ #
8
+ class BasicSource < Ductr::ETL::Source
9
+ #
10
+ # Opens the database, calls the job's method and iterate over the query results.
11
+ #
12
+ # @yield The each block
13
+ #
14
+ # @return [void]
15
+ #
16
+ def each(&)
17
+ call_method(adapter.db).each(&)
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ module Sequel
5
+ #
6
+ # A destination control that accumulates rows in a buffer to write them by batch.
7
+ #
8
+ class BufferedDestination < Ductr::ETL::BufferedDestination
9
+ #
10
+ # Open the database if needed and call the job's method to run the query.
11
+ #
12
+ # @return [void]
13
+ #
14
+ def on_flush
15
+ call_method(adapter.db, buffer)
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ module Sequel
5
+ #
6
+ # A lookup control that execute the query for a bunch of rows.
7
+ #
8
+ class BufferedLookup < Ductr::ETL::BufferedTransform
9
+ #
10
+ # Opens the database if needed, calls the job's method and pass the each block to it.
11
+ #
12
+ # @yield The each block
13
+ #
14
+ # @return [void]
15
+ #
16
+ def on_flush(&)
17
+ call_method(adapter.db, buffer, &)
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ module Sequel
5
+ #
6
+ # A destination control that accumulates rows in a buffer to upsert them by batch.
7
+ #
8
+ class BufferedUpsertDestination < Ductr::ETL::BufferedDestination
9
+ #
10
+ # Open the database if needed and call the job's method to run the query.
11
+ #
12
+ # @return [void]
13
+ #
14
+ def on_flush
15
+ call_method(adapter.db, excluded, buffer)
16
+ end
17
+
18
+ private
19
+
20
+ #
21
+ # Generate the excluded keys hash e.g.
22
+ #
23
+ # ```ruby
24
+ # {a: Sequel[:excluded][:a]}
25
+ # ```
26
+ #
27
+ # @return [Hash<Symbol, Sequel::SQL::QualifiedIdentifier>] The excluded keys hash
28
+ #
29
+ def excluded
30
+ keys = buffer.first.keys
31
+
32
+ excluded_keys = keys.map do |key|
33
+ Sequel[:excluded][key]
34
+ end
35
+
36
+ keys.zip(excluded_keys).to_h
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ module SQLite
5
+ #
6
+ # A lookup control that execute the query for a bunch of rows and merge them with the buffer's rows.
7
+ #
8
+ class MatchLookup < Ductr::ETL::BufferedTransform
9
+ #
10
+ # The looked up row key to match.
11
+ #
12
+ # @return [Symbol] The column name
13
+ #
14
+ def from_key
15
+ @options[:merge].first
16
+ end
17
+
18
+ #
19
+ # The buffer row key to match.
20
+ #
21
+ # @return [Symbol] The column name
22
+ #
23
+ def to_key
24
+ @options[:merge].last
25
+ end
26
+
27
+ #
28
+ # Opens the database if needed, calls the job's method and merges
29
+ # the looked up rows with corresponding buffer rows.
30
+ #
31
+ # @yield [row] The each block
32
+ # @yieldparam [Hash<Symbol, Object>] row The merged row
33
+ #
34
+ # @return [void]
35
+ #
36
+ def on_flush(&)
37
+ call_method(adapter.db, buffer_keys).each do |row|
38
+ match = buffer_find(row)
39
+ next yield(row) unless match
40
+
41
+ yield(row.merge match)
42
+ end
43
+ end
44
+
45
+ private
46
+
47
+ #
48
+ # Find the corresponding row into the buffer.
49
+ #
50
+ # @param [Hash<Symbol, Object>] row The looked up row
51
+ #
52
+ # @return [Hash<Symbol, Object>, nil] the matching row if exists
53
+ #
54
+ def buffer_find(row)
55
+ buffer.find { |r| r[from_key] == row[to_key] }
56
+ end
57
+
58
+ #
59
+ # Maps the buffer keys into an array.
60
+ #
61
+ # @return [Array<Integer, String>] The keys array
62
+ #
63
+ def buffer_keys
64
+ buffer.map { |row| row[from_key] }
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ module Sequel
5
+ #
6
+ # A source control that allows to select a big number of rows by relying on pagination.
7
+ #
8
+ class PaginatedSource < Ductr::ETL::PaginatedSource
9
+ #
10
+ # Calls the job's method and iterate on the query result.
11
+ # Returns true if the page is full, false otherwise.
12
+ #
13
+ # @yield The each block
14
+ #
15
+ # @raise [InconsistentPaginationError] When the query return more rows than the page size
16
+ # @return [Boolean] True if the page is full, false otherwise.
17
+ #
18
+ def each_page(&)
19
+ rows_count = 0
20
+
21
+ call_method(adapter.db, @offset, page_size).each do |row|
22
+ yield(row)
23
+ rows_count += 1
24
+ end
25
+
26
+ if rows_count > page_size
27
+ raise InconsistentPaginationError,
28
+ "The query returned #{rows_count} rows but the page size is #{page_size} rows"
29
+ end
30
+
31
+ rows_count == page_size
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ module Sequel
5
+ #
6
+ # The rufus-scheduler handler class.
7
+ # @see https://github.com/jmettraux/rufus-scheduler#scheduling-handler-instances
8
+ # For further information
9
+ #
10
+ class PollingHandler
11
+ #
12
+ # Creates the handler based on the given scheduler, its method name and the trigger's adapter instance.
13
+ #
14
+ # @param [Method] method The scheduler's method
15
+ # @param [Ductr::Adapter] adapter The trigger's adapter
16
+ #
17
+ def initialize(method, adapter)
18
+ @method = method
19
+ @adapter = adapter
20
+ @last_triggering_key = nil
21
+ end
22
+
23
+ #
24
+ # The callable method used by the trigger, actually calls the scheduler's method.
25
+ #
26
+ # @return [void]
27
+ #
28
+ def call
29
+ @adapter.open do |db|
30
+ @method.call(db) do |triggering_key|
31
+ return false if triggering_key == @last_triggering_key
32
+
33
+ @last_triggering_key = triggering_key
34
+ true
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ module Sequel
5
+ #
6
+ # A trigger based on the RufusTrigger, runs the PollingHandler at the given timing.
7
+ #
8
+ class PollingTrigger < Ductr::RufusTrigger
9
+ #
10
+ # Closes the connection if the scheduler is stopped.
11
+ #
12
+ # @return [void]
13
+ #
14
+ def stop
15
+ super
16
+ adapter.close!
17
+ end
18
+
19
+ private
20
+
21
+ #
22
+ # Returns a callable object, allowing rufus-scheduler to call it.
23
+ #
24
+ # @param [Ductr::Scheduler] scheduler The scheduler instance
25
+ # @param [Method] method The scheduler's method
26
+ # @param [Hash] ** The option passed to the trigger annotation
27
+ #
28
+ # @return [#call] A callable object
29
+ #
30
+ def callable(method, **)
31
+ PollingHandler.new(method, adapter)
32
+ end
33
+ end
34
+ end
35
+ end
data/lib/ductr/version.rb CHANGED
@@ -2,5 +2,5 @@
2
2
 
3
3
  module Ductr
4
4
  # @return [String] The ductr's version number
5
- VERSION = "0.1.2"
5
+ VERSION = "0.2.0"
6
6
  end
data/lib/ductr.rb CHANGED
@@ -133,10 +133,10 @@ end
133
133
  if File.directory?("#{pwd = Dir.pwd}/app")
134
134
  Zeitwerk::Loader.new.tap do |loader|
135
135
  loader.push_dir "#{pwd}/app"
136
+ loader.push_dir "#{pwd}/lib"
136
137
 
137
- loader.collapse "#{pwd}/app/jobs"
138
- loader.collapse "#{pwd}/app/pipelines"
139
- loader.collapse "#{pwd}/app/schedulers"
138
+ app_paths = Dir.glob("#{pwd}/app/**/**").select { |f| File.directory? f }
139
+ loader.collapse(app_paths)
140
140
 
141
141
  loader.setup
142
142
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ductr
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mathieu Morel
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-03-26 00:00:00.000000000 Z
11
+ date: 2023-04-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: debug
@@ -252,6 +252,17 @@ files:
252
252
  - lib/ductr/registry.rb
253
253
  - lib/ductr/rufus_trigger.rb
254
254
  - lib/ductr/scheduler.rb
255
+ - lib/ductr/sequel/adapter.rb
256
+ - lib/ductr/sequel/basic_destination.rb
257
+ - lib/ductr/sequel/basic_lookup.rb
258
+ - lib/ductr/sequel/basic_source.rb
259
+ - lib/ductr/sequel/buffered_destination.rb
260
+ - lib/ductr/sequel/buffered_lookup.rb
261
+ - lib/ductr/sequel/buffered_upsert_destination.rb
262
+ - lib/ductr/sequel/match_lookup.rb
263
+ - lib/ductr/sequel/paginated_source.rb
264
+ - lib/ductr/sequel/polling_handler.rb
265
+ - lib/ductr/sequel/polling_trigger.rb
255
266
  - lib/ductr/store.rb
256
267
  - lib/ductr/store/job_serializer.rb
257
268
  - lib/ductr/store/job_store.rb