ductr 0.1.2 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c0d2e2a83217838bd12d781d75ddf8d2ceee68a276f25250033cfd88ec5fe03c
4
- data.tar.gz: 4e694303f68f0603ad5e3053cfcab2d1320687e19b7d2eb3923f15d68c4dd901
3
+ metadata.gz: 8ba418763bfafcbc8f42d9f78c3277ab24d7feac82f77aad21ddef34d9967c4a
4
+ data.tar.gz: 3a05f4e233052037ca21c0b8e4dfe374a09f0745cc1b07128f6111b55107088f
5
5
  SHA512:
6
- metadata.gz: 63f82f601d39aeda829260eafdc390ad0ea506c6a9510876ed5703266cd0496b24942a18337807f52c55a1df3617c5fff84476ed4e5ce35d18a6c396cc5431cd
7
- data.tar.gz: 0da4ed8bfc3272d8ee4e0a542a60b6bb0266f44fee2c59f2318f93b1c4a759579707e78ebe637bd70c6b40f6e806159cf3cecd4b423c5efe4e4a8e0ef91b1fc8
6
+ metadata.gz: 14f51aca717bf8d342f384756ed496261ca8626e3a010eb9790292a34f05e1419ee7e200618eb7e3d3a6bae8dfeb547a04fe7a2a69fa28a5d435a6af2e9d20df
7
+ data.tar.gz: bf8e6b8e2ab6941b66db0bb17a3360c44dd797ad04dab0ebbf245335090fe61d5b90b78fa6fe6266d4efd6a05ab11bf13eac7488b11583c3bb13ea7066275c75
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- ductr (0.1.2)
4
+ ductr (0.2.0)
5
5
  activejob (~> 7.0)
6
6
  annotable (~> 0.1)
7
7
  colorize (~> 0.8)
@@ -27,8 +27,8 @@ GEM
27
27
  commander (4.6.0)
28
28
  highline (~> 2.0.0)
29
29
  concurrent-ruby (1.2.2)
30
- debug (1.6.3)
31
- irb (>= 1.3.6)
30
+ debug (1.7.2)
31
+ irb (>= 1.5.0)
32
32
  reline (>= 0.3.1)
33
33
  diff-lcs (1.5.0)
34
34
  et-orbi (1.2.7)
@@ -41,10 +41,10 @@ GEM
41
41
  highline (2.0.3)
42
42
  i18n (1.12.0)
43
43
  concurrent-ruby (~> 1.0)
44
- io-console (0.5.11)
45
- irb (1.4.2)
44
+ io-console (0.6.0)
45
+ irb (1.6.3)
46
46
  reline (>= 0.3.0)
47
- json (2.6.2)
47
+ json (2.6.3)
48
48
  kiba (4.0.0)
49
49
  minitest (5.18.0)
50
50
  parallel (1.22.1)
@@ -53,44 +53,44 @@ GEM
53
53
  parser
54
54
  rainbow (~> 3.0)
55
55
  sorbet-runtime (>= 0.5)
56
- parser (3.1.2.1)
56
+ parser (3.2.2.0)
57
57
  ast (~> 2.4.1)
58
58
  raabro (1.4.0)
59
59
  rainbow (3.1.1)
60
60
  rake (13.0.6)
61
- regexp_parser (2.6.0)
62
- reline (0.3.1)
61
+ regexp_parser (2.7.0)
62
+ reline (0.3.3)
63
63
  io-console (~> 0.5)
64
64
  rexml (3.2.5)
65
65
  rspec (3.12.0)
66
66
  rspec-core (~> 3.12.0)
67
67
  rspec-expectations (~> 3.12.0)
68
68
  rspec-mocks (~> 3.12.0)
69
- rspec-core (3.12.0)
69
+ rspec-core (3.12.1)
70
70
  rspec-support (~> 3.12.0)
71
- rspec-expectations (3.12.0)
71
+ rspec-expectations (3.12.2)
72
72
  diff-lcs (>= 1.2.0, < 2.0)
73
73
  rspec-support (~> 3.12.0)
74
- rspec-mocks (3.12.0)
74
+ rspec-mocks (3.12.5)
75
75
  diff-lcs (>= 1.2.0, < 2.0)
76
76
  rspec-support (~> 3.12.0)
77
77
  rspec-support (3.12.0)
78
- rubocop (1.38.0)
78
+ rubocop (1.48.1)
79
79
  json (~> 2.3)
80
80
  parallel (~> 1.10)
81
- parser (>= 3.1.2.1)
81
+ parser (>= 3.2.0.0)
82
82
  rainbow (>= 2.2.2, < 4.0)
83
83
  regexp_parser (>= 1.8, < 3.0)
84
84
  rexml (>= 3.2.5, < 4.0)
85
- rubocop-ast (>= 1.23.0, < 2.0)
85
+ rubocop-ast (>= 1.26.0, < 2.0)
86
86
  ruby-progressbar (~> 1.7)
87
- unicode-display_width (>= 1.4.0, < 3.0)
88
- rubocop-ast (1.23.0)
89
- parser (>= 3.1.1.0)
90
- ruby-progressbar (1.11.0)
87
+ unicode-display_width (>= 2.4.0, < 3.0)
88
+ rubocop-ast (1.28.0)
89
+ parser (>= 3.2.1.0)
90
+ ruby-progressbar (1.13.0)
91
91
  rufus-scheduler (3.8.2)
92
92
  fugit (~> 1.1, >= 1.1.6)
93
- sorbet-runtime (0.5.10712)
93
+ sorbet-runtime (0.5.10746)
94
94
  sord (4.0.0)
95
95
  commander (~> 4.5)
96
96
  parlour (~> 5.0)
@@ -99,7 +99,7 @@ GEM
99
99
  thor (1.2.1)
100
100
  tzinfo (2.0.6)
101
101
  concurrent-ruby (~> 1.0)
102
- unicode-display_width (2.3.0)
102
+ unicode-display_width (2.4.2)
103
103
  webrick (1.7.0)
104
104
  yard (0.9.28)
105
105
  webrick (~> 1.7.0)
@@ -118,4 +118,4 @@ DEPENDENCIES
118
118
  yard (~> 0.9)
119
119
 
120
120
  BUNDLED WITH
121
- 2.3.7
121
+ 2.4.8
@@ -42,6 +42,8 @@ module Ductr
42
42
  copy_file "gemfile.rb", "Gemfile"
43
43
  copy_file "rubocop.yml", ".rubocop.yml"
44
44
 
45
+ create_file "config/initializers/.gitkeep"
46
+ create_file "lib/.gitkeep"
45
47
  create_file "app/jobs/.gitkeep"
46
48
  create_file "app/pipelines/.gitkeep"
47
49
  create_file "app/schedulers/.gitkeep"
@@ -3,3 +3,5 @@
3
3
  require "ductr"
4
4
 
5
5
  require_relative "environment/#{Ductr.env}"
6
+
7
+ Dir[File.join(__dir__, "initializers", "*.rb")].each { |file| require file }
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ module Sequel
5
+ #
6
+ # The base class of all sequel-based adapters.
7
+ #
8
+ class Adapter < Ductr::Adapter
9
+ # @return [Sequel::Database, nil] The database connection instance
10
+ attr_reader :db
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ module Sequel
5
+ #
6
+ # A destination control that write rows one by one.
7
+ #
8
+ class BasicDestination < Ductr::ETL::Destination
9
+ #
10
+ # Opens the database if needed and call the job's method to insert one row at time.
11
+ #
12
+ # @param [Hash<Symbol, Object>] row The row to insert, preferably a Hash
13
+ #
14
+ # @return [void]
15
+ #
16
+ def write(row)
17
+ call_method(adapter.db, row)
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ module Sequel
5
+ #
6
+ # A lookup control that execute one query per row.
7
+ #
8
+ class BasicLookup < Ductr::ETL::Transform
9
+ #
10
+ # Calls the job's method to merge its result with the current row.
11
+ #
12
+ # @param [Hash<Symbol, Object>] row The current row, preferably a Hash
13
+ #
14
+ # @return [Hash<Symbol, Object>] The row merged with looked up row or the untouched row if nothing was found
15
+ #
16
+ def process(row)
17
+ matching_row = call_method(adapter.db, row).first
18
+ return row unless matching_row
19
+
20
+ row.merge matching_row
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ module Sequel
5
+ #
6
+ # A source control that yields rows one by one.
7
+ #
8
+ class BasicSource < Ductr::ETL::Source
9
+ #
10
+ # Opens the database, calls the job's method and iterate over the query results.
11
+ #
12
+ # @yield The each block
13
+ #
14
+ # @return [void]
15
+ #
16
+ def each(&)
17
+ call_method(adapter.db).each(&)
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ module Sequel
5
+ #
6
+ # A destination control that accumulates rows in a buffer to write them by batch.
7
+ #
8
+ class BufferedDestination < Ductr::ETL::BufferedDestination
9
+ #
10
+ # Open the database if needed and call the job's method to run the query.
11
+ #
12
+ # @return [void]
13
+ #
14
+ def on_flush
15
+ call_method(adapter.db, buffer)
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ module Sequel
5
+ #
6
+ # A lookup control that execute the query for a bunch of rows.
7
+ #
8
+ class BufferedLookup < Ductr::ETL::BufferedTransform
9
+ #
10
+ # Opens the database if needed, calls the job's method and pass the each block to it.
11
+ #
12
+ # @yield The each block
13
+ #
14
+ # @return [void]
15
+ #
16
+ def on_flush(&)
17
+ call_method(adapter.db, buffer, &)
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ module Sequel
5
+ #
6
+ # A destination control that accumulates rows in a buffer to upsert them by batch.
7
+ #
8
+ class BufferedUpsertDestination < Ductr::ETL::BufferedDestination
9
+ #
10
+ # Open the database if needed and call the job's method to run the query.
11
+ #
12
+ # @return [void]
13
+ #
14
+ def on_flush
15
+ call_method(adapter.db, excluded, buffer)
16
+ end
17
+
18
+ private
19
+
20
+ #
21
+ # Generate the excluded keys hash e.g.
22
+ #
23
+ # ```ruby
24
+ # {a: Sequel[:excluded][:a]}
25
+ # ```
26
+ #
27
+ # @return [Hash<Symbol, Sequel::SQL::QualifiedIdentifier>] The excluded keys hash
28
+ #
29
+ def excluded
30
+ keys = buffer.first.keys
31
+
32
+ excluded_keys = keys.map do |key|
33
+ Sequel[:excluded][key]
34
+ end
35
+
36
+ keys.zip(excluded_keys).to_h
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ module SQLite
5
+ #
6
+ # A lookup control that execute the query for a bunch of rows and merge them with the buffer's rows.
7
+ #
8
+ class MatchLookup < Ductr::ETL::BufferedTransform
9
+ #
10
+ # The looked up row key to match.
11
+ #
12
+ # @return [Symbol] The column name
13
+ #
14
+ def from_key
15
+ @options[:merge].first
16
+ end
17
+
18
+ #
19
+ # The buffer row key to match.
20
+ #
21
+ # @return [Symbol] The column name
22
+ #
23
+ def to_key
24
+ @options[:merge].last
25
+ end
26
+
27
+ #
28
+ # Opens the database if needed, calls the job's method and merges
29
+ # the looked up rows with corresponding buffer rows.
30
+ #
31
+ # @yield [row] The each block
32
+ # @yieldparam [Hash<Symbol, Object>] row The merged row
33
+ #
34
+ # @return [void]
35
+ #
36
+ def on_flush(&)
37
+ call_method(adapter.db, buffer_keys).each do |row|
38
+ match = buffer_find(row)
39
+ next yield(row) unless match
40
+
41
+ yield(row.merge match)
42
+ end
43
+ end
44
+
45
+ private
46
+
47
+ #
48
+ # Find the corresponding row into the buffer.
49
+ #
50
+ # @param [Hash<Symbol, Object>] row The looked up row
51
+ #
52
+ # @return [Hash<Symbol, Object>, nil] the matching row if exists
53
+ #
54
+ def buffer_find(row)
55
+ buffer.find { |r| r[from_key] == row[to_key] }
56
+ end
57
+
58
+ #
59
+ # Maps the buffer keys into an array.
60
+ #
61
+ # @return [Array<Integer, String>] The keys array
62
+ #
63
+ def buffer_keys
64
+ buffer.map { |row| row[from_key] }
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ module Sequel
5
+ #
6
+ # A source control that allows to select a big number of rows by relying on pagination.
7
+ #
8
+ class PaginatedSource < Ductr::ETL::PaginatedSource
9
+ #
10
+ # Calls the job's method and iterate on the query result.
11
+ # Returns true if the page is full, false otherwise.
12
+ #
13
+ # @yield The each block
14
+ #
15
+ # @raise [InconsistentPaginationError] When the query return more rows than the page size
16
+ # @return [Boolean] True if the page is full, false otherwise.
17
+ #
18
+ def each_page(&)
19
+ rows_count = 0
20
+
21
+ call_method(adapter.db, @offset, page_size).each do |row|
22
+ yield(row)
23
+ rows_count += 1
24
+ end
25
+
26
+ if rows_count > page_size
27
+ raise InconsistentPaginationError,
28
+ "The query returned #{rows_count} rows but the page size is #{page_size} rows"
29
+ end
30
+
31
+ rows_count == page_size
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ module Sequel
5
+ #
6
+ # The rufus-scheduler handler class.
7
+ # @see https://github.com/jmettraux/rufus-scheduler#scheduling-handler-instances
8
+ # For further information
9
+ #
10
+ class PollingHandler
11
+ #
12
+ # Creates the handler based on the given scheduler, its method name and the trigger's adapter instance.
13
+ #
14
+ # @param [Method] method The scheduler's method
15
+ # @param [Ductr::Adapter] adapter The trigger's adapter
16
+ #
17
+ def initialize(method, adapter)
18
+ @method = method
19
+ @adapter = adapter
20
+ @last_triggering_key = nil
21
+ end
22
+
23
+ #
24
+ # The callable method used by the trigger, actually calls the scheduler's method.
25
+ #
26
+ # @return [void]
27
+ #
28
+ def call
29
+ @adapter.open do |db|
30
+ @method.call(db) do |triggering_key|
31
+ return false if triggering_key == @last_triggering_key
32
+
33
+ @last_triggering_key = triggering_key
34
+ true
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ module Sequel
5
+ #
6
+ # A trigger based on the RufusTrigger, runs the PollingHandler at the given timing.
7
+ #
8
+ class PollingTrigger < Ductr::RufusTrigger
9
+ #
10
+ # Closes the connection if the scheduler is stopped.
11
+ #
12
+ # @return [void]
13
+ #
14
+ def stop
15
+ super
16
+ adapter.close!
17
+ end
18
+
19
+ private
20
+
21
+ #
22
+ # Returns a callable object, allowing rufus-scheduler to call it.
23
+ #
24
+ # @param [Ductr::Scheduler] scheduler The scheduler instance
25
+ # @param [Method] method The scheduler's method
26
+ # @param [Hash] ** The option passed to the trigger annotation
27
+ #
28
+ # @return [#call] A callable object
29
+ #
30
+ def callable(method, **)
31
+ PollingHandler.new(method, adapter)
32
+ end
33
+ end
34
+ end
35
+ end
data/lib/ductr/version.rb CHANGED
@@ -2,5 +2,5 @@
2
2
 
3
3
  module Ductr
4
4
  # @return [String] The ductr's version number
5
- VERSION = "0.1.2"
5
+ VERSION = "0.2.0"
6
6
  end
data/lib/ductr.rb CHANGED
@@ -133,10 +133,10 @@ end
133
133
  if File.directory?("#{pwd = Dir.pwd}/app")
134
134
  Zeitwerk::Loader.new.tap do |loader|
135
135
  loader.push_dir "#{pwd}/app"
136
+ loader.push_dir "#{pwd}/lib"
136
137
 
137
- loader.collapse "#{pwd}/app/jobs"
138
- loader.collapse "#{pwd}/app/pipelines"
139
- loader.collapse "#{pwd}/app/schedulers"
138
+ app_paths = Dir.glob("#{pwd}/app/**/**").select { |f| File.directory? f }
139
+ loader.collapse(app_paths)
140
140
 
141
141
  loader.setup
142
142
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ductr
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mathieu Morel
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-03-26 00:00:00.000000000 Z
11
+ date: 2023-04-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: debug
@@ -252,6 +252,17 @@ files:
252
252
  - lib/ductr/registry.rb
253
253
  - lib/ductr/rufus_trigger.rb
254
254
  - lib/ductr/scheduler.rb
255
+ - lib/ductr/sequel/adapter.rb
256
+ - lib/ductr/sequel/basic_destination.rb
257
+ - lib/ductr/sequel/basic_lookup.rb
258
+ - lib/ductr/sequel/basic_source.rb
259
+ - lib/ductr/sequel/buffered_destination.rb
260
+ - lib/ductr/sequel/buffered_lookup.rb
261
+ - lib/ductr/sequel/buffered_upsert_destination.rb
262
+ - lib/ductr/sequel/match_lookup.rb
263
+ - lib/ductr/sequel/paginated_source.rb
264
+ - lib/ductr/sequel/polling_handler.rb
265
+ - lib/ductr/sequel/polling_trigger.rb
255
266
  - lib/ductr/store.rb
256
267
  - lib/ductr/store/job_serializer.rb
257
268
  - lib/ductr/store/job_store.rb