ductr-sqlite 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ module SQLite
5
+ #
6
+ # A destination control that accumulates rows in a buffer to upsert them by batch, registered as `:buffered_upsert`.
7
+ # Accept the `:buffer_size` option, default value is 10 000:
8
+ #
9
+ # destination :some_sqlite_database, :buffered_upsert, buffer_size: 42
10
+ # def my_destination(buffer, excluded, db)
11
+ # db[:items].insert_conflict(target: :id, update: excluded).multi_insert(buffer)
12
+ # end
13
+ #
14
+ # @see more Ductr::ETL::BufferedDestination
15
+ #
16
+ class BufferedUpsertDestination < Ductr::ETL::BufferedDestination
17
+ Adapter.destination_registry.add(self, as: :buffered_upsert)
18
+
19
+ #
20
+ # Open the database if needed and call the job's method to run the query.
21
+ #
22
+ # @return [void]
23
+ #
24
+ def on_flush
25
+ call_method(buffer, excluded, adapter.db)
26
+ end
27
+
28
+ private
29
+
30
+ #
31
+ # Generate the excluded keys hash e.g.
32
+ #
33
+ # ```ruby
34
+ # {a: Sequel[:excluded][:a]}
35
+ # ```
36
+ #
37
+ # @return [Hash<Symbol, Sequel::SQL::QualifiedIdentifier>] The excluded keys hash
38
+ #
39
+ def excluded
40
+ keys = buffer.first.keys
41
+
42
+ excluded_keys = keys.map do |key|
43
+ Sequel[:excluded][key]
44
+ end
45
+
46
+ keys.zip(excluded_keys).to_h
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ module SQLite
5
+ #
6
+ # A lookup control that execute the query for a bunch of rows, registered as `:match`.
7
+ #
8
+ # Accept the `:buffer_size` option, default value is 10 000.
9
+ # Accept the `:merge` option, mandatory an array with two entries:
10
+ # - The first one is the looked up row key to match.
11
+ # - The second one is the buffer row key to match.
12
+ #
13
+ # Unless the `:buffered` lookup, this one abstracts the row matching logic by assuming that
14
+ # you want to merge rows based on a key couple e.g. primary / foreign keys:
15
+ #
16
+ # lookup :some_sqlite_database, :match, merge: [:id, :item], buffer_size: 42
17
+ # def merge_with_stuff(db, ids)
18
+ # db[:items_bis].where(item: ids)
19
+ # end
20
+ #
21
+ class MatchLookup < Ductr::ETL::BufferedTransform
22
+ Adapter.lookup_registry.add(self, as: :match)
23
+
24
+ #
25
+ # The looked up row key to match.
26
+ #
27
+ # @return [Symbol] The column name
28
+ #
29
+ def from_key
30
+ @options[:merge].first
31
+ end
32
+
33
+ #
34
+ # The buffer row key to match.
35
+ #
36
+ # @return [Symbol] The column name
37
+ #
38
+ def to_key
39
+ @options[:merge].last
40
+ end
41
+
42
+ #
43
+ # Opens the database if needed, calls the job's method and merges
44
+ # the looked up rows with corresponding buffer rows.
45
+ #
46
+ # @yield [row] The each block
47
+ # @yieldparam [Hash<Symbol, Object>] row The merged row
48
+ #
49
+ # @return [void]
50
+ #
51
+ def on_flush(&)
52
+ call_method(adapter.db, buffer_keys).each do |row|
53
+ match = buffer_find(row)
54
+ next yield(row) unless match
55
+
56
+ yield(row.merge match)
57
+ end
58
+ end
59
+
60
+ private
61
+
62
+ #
63
+ # Find the corresponding row into the buffer.
64
+ #
65
+ # @param [Hash<Symbol, Object>] row The looked up row
66
+ #
67
+ # @return [Hash<Symbol, Object>, nil] the matching row if exists
68
+ #
69
+ def buffer_find(row)
70
+ buffer.find { |r| r[from_key] == row[to_key] }
71
+ end
72
+
73
+ #
74
+ # Maps the buffer keys into an array.
75
+ #
76
+ # @return [Array<Integer, String>] The keys array
77
+ #
78
+ def buffer_keys
79
+ buffer.map { |row| row[from_key] }
80
+ end
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ module SQLite
5
+ #
6
+ # A source control that allows to select a big number of rows by relying on pagination, registered as `:paginated`.
7
+ # Accept the `:page_size` option, default value is 10 000.
8
+ #
9
+ # source :some_sqlite_database, :paginated, page_size: 4
10
+ # def my_source(db, offset, limit)
11
+ # db[:items].offset(offset).limit(limit)
12
+ # end
13
+ #
14
+ # Ensure to not select more rows than the configured page size,
15
+ # otherwise it will raise an `InconsistentPaginationError`.
16
+ #
17
+ class PaginatedSource < Ductr::ETL::PaginatedSource
18
+ Adapter.source_registry.add(self, as: :paginated)
19
+
20
+ #
21
+ # Calls the job's method and iterate on the query result.
22
+ # Returns true if the page is full, false otherwise.
23
+ #
24
+ # @yield The each block
25
+ #
26
+ # @raise [InconsistentPaginationError] When the query return more rows than the page size
27
+ # @return [Boolean] True if the page is full, false otherwise.
28
+ #
29
+ def each_page(&)
30
+ rows_count = 0
31
+
32
+ call_method(adapter.db, @offset, page_size).each do |row|
33
+ yield(row)
34
+ rows_count += 1
35
+ end
36
+
37
+ if rows_count > page_size
38
+ raise InconsistentPaginationError,
39
+ "The query returned #{rows_count} rows but the page size is #{page_size} rows"
40
+ end
41
+
42
+ rows_count == page_size
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ module SQLite
5
+ #
6
+ # The rufus-scheduler handler class.
7
+ # @see https://github.com/jmettraux/rufus-scheduler#scheduling-handler-instances
8
+ # For further information
9
+ #
10
+ class PollingHandler
11
+ #
12
+ # Creates the handler based on the given scheduler, its method name and the trigger's adapter instance.
13
+ #
14
+ # @param [Method] method The scheduler's method
15
+ # @param [Ductr::Adapter] adapter The trigger's adapter
16
+ #
17
+ def initialize(method, adapter)
18
+ @method = method
19
+ @adapter = adapter
20
+ @last_triggering_key = nil
21
+ end
22
+
23
+ #
24
+ # The callable method used by the trigger, actually calls the scheduler's method.
25
+ #
26
+ # @return [void]
27
+ #
28
+ def call
29
+ @adapter.open do |db|
30
+ @method.call(db) do |triggering_key|
31
+ return false if triggering_key == @last_triggering_key
32
+
33
+ @last_triggering_key = triggering_key
34
+ true
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ module SQLite
5
+ #
6
+ # A trigger based on the RufusTrigger, runs the PollingHandler at the given timing.
7
+ # The handler calls the scheduler's method with a block which compares the yield result with the previous one.
8
+ # If they are different, yield returns true:
9
+ #
10
+ # trigger :my_database, :polling, interval: "1min"
11
+ # def check_timestamp(db) # will perform MyJob if the name have changed
12
+ # return unless yield(db[:items].select(:name).first)
13
+ #
14
+ # MyJob.perform_later
15
+ # end
16
+ #
17
+ class PollingTrigger < Ductr::RufusTrigger
18
+ Adapter.trigger_registry.add(self, as: :polling)
19
+
20
+ #
21
+ # Closes the connection if the scheduler is stopped.
22
+ #
23
+ # @return [void]
24
+ #
25
+ def stop
26
+ super
27
+ adapter.close!
28
+ end
29
+
30
+ private
31
+
32
+ #
33
+ # Returns a callable object, allowing rufus-scheduler to call it.
34
+ #
35
+ # @param [Ductr::Scheduler] scheduler The scheduler instance
36
+ # @param [Symbol] method_name The scheduler's method name
37
+ # @param [Hash] ** The option passed to the trigger annotation
38
+ #
39
+ # @return [#call] A callable object
40
+ #
41
+ def callable(scheduler, method_name, **)
42
+ PollingHandler.new(scheduler, method_name, adapter)
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ module SQLite
5
+ # @return [String] VERSION Gem's version
6
+ VERSION = "0.1.1"
7
+ end
8
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ductr"
4
+ require "sequel"
5
+
6
+ Dir[File.join(__dir__, "sqlite", "*.rb")].each { |file| require file }
7
+
8
+ # :nodoc:
9
+ module Ductr
10
+ #
11
+ # ## SQLite adapter for Ductr ETL
12
+ # This gem provides useful controls to operate Ductr ETL with SQLite databases.
13
+ #
14
+ # To get details about the database connection handling, checkout the {Ductr::SQLite::Adapter} class.
15
+ #
16
+ # ### Sources
17
+ # - {Ductr::SQLite::BasicSource} Yields rows one by one.
18
+ # - {Ductr::SQLite::PaginatedSource} Allows to select a big number of rows by relying on pagination.
19
+ #
20
+ # ### Lookups
21
+ # - {Ductr::SQLite::BasicLookup} Executes one query per row and merge the looked up row with the current row.
22
+ # - {Ductr::SQLite::BufferedLookup} Executes one query for a bunch of rows and let you implement the matching logic.
23
+ # - {Ductr::SQLite::MatchLookup} Executes one query for a bunch of rows and abstracts the matching logic.
24
+ #
25
+ # ### Destinations
26
+ # - {Ductr::SQLite::BasicDestination} Writes rows one by one.
27
+ # - {Ductr::SQLite::BufferedDestination} Accumulates rows in a buffer to write them by batch.
28
+ # - {Ductr::SQLite::BufferedUpsertDestination} Accumulates rows in a buffer to upsert them by batch.
29
+ #
30
+ module SQLite; end
31
+ end
@@ -0,0 +1,271 @@
1
+ # :nodoc:
2
+ module Ductr
3
+ #
4
+ # ## SQLite adapter for Ductr ETL
5
+ # This gem provides useful controls to operate Ductr ETL with SQLite databases.
6
+ #
7
+ # To get details about the database connection handling, checkout the {Ductr::SQLite::Adapter} class.
8
+ #
9
+ # ### Sources
10
+ # - {Ductr::SQLite::BasicSource} Yields rows one by one.
11
+ # - {Ductr::SQLite::PaginatedSource} Allows to select a big number of rows by relying on pagination.
12
+ #
13
+ # ### Lookups
14
+ # - {Ductr::SQLite::BasicLookup} Executes one query per row and merge the looked up row with the current row.
15
+ # - {Ductr::SQLite::BufferedLookup} Executes one query for a bunch of rows and let you implement the matching logic.
16
+ # - {Ductr::SQLite::MatchLookup} Executes one query for a bunch of rows and abstracts the matching logic.
17
+ #
18
+ # ### Destinations
19
+ # - {Ductr::SQLite::BasicDestination} Writes rows one by one.
20
+ # - {Ductr::SQLite::BufferedDestination} Accumulates rows in a buffer to write them by batch.
21
+ # - {Ductr::SQLite::BufferedUpsertDestination} Accumulates rows in a buffer to upsert them by batch.
22
+ module SQLite
23
+ VERSION: String
24
+
25
+ #
26
+ # The SQLite adapter class implement the required #open! and #close! methods to handle the database connection.
27
+ # The adapter is registered as `:sqlite` to use it, add `adapter: sqlite` to the YAML configuration e.g.:
28
+ #
29
+ # ```yml
30
+ # # config/development.yml
31
+ # adapters:
32
+ # some_sqlite_database:
33
+ # adapter: "sqlite"
34
+ # database: "example.db"
35
+ # ```
36
+ class Adapter < Ductr::Adapter
37
+ # sord warn - Sequel::Database wasn't able to be resolved to a constant in this project
38
+ # Opens the database connection with the adapter's configuration.
39
+ #
40
+ # _@return_ — The database connection instance
41
+ def open!: () -> Sequel::Database
42
+
43
+ # Closes the database connection.
44
+ # In the specific case of SQLite, we just destroy the connection instance.
45
+ def close!: () -> void
46
+
47
+ # sord warn - Sequel::Database wasn't able to be resolved to a constant in this project
48
+ # _@return_ — The database connection instance
49
+ attr_reader db: Sequel::Database?
50
+ end
51
+
52
+ #
53
+ # A lookup control that execute one query per row, registered as `:basic`.
54
+ # The job's method must return a row which will merged with the current row:
55
+ #
56
+ # lookup :some_sqlite_database, :basic
57
+ # def my_lookup(row, db)
58
+ # db[:items_bis].where(item: row[:id]).limit(1)
59
+ # end
60
+ #
61
+ # As the control merge the looked up row with the current row,
62
+ # ensure that column names are different or they will be overwritten.
63
+ #
64
+ # If the lookup returns a falsy value, nothing won't be merged with the current row.
65
+ class BasicLookup < Ductr::ETL::Transform
66
+ # Calls the job's method to merge its result with the current row.
67
+ #
68
+ # _@param_ `row` — The current row, preferably a Hash
69
+ #
70
+ # _@return_ — The row merged with looked up row or the untouched row if nothing was found
71
+ def process: (::Hash[Symbol, Object] row) -> ::Hash[Symbol, Object]
72
+ end
73
+
74
+ #
75
+ # A source control that yields rows one by one, registered as `:basic`:
76
+ #
77
+ # source :some_sqlite_database, :basic
78
+ # def select_some_stuff(db)
79
+ # db[:items].limit(42)
80
+ # end
81
+ #
82
+ # Do not try to select a large number of rows, as they will all be loaded into memory.
83
+ class BasicSource < Ductr::ETL::Source
84
+ # Opens the database, calls the job's method and iterate over the query results.
85
+ def each: () -> void
86
+ end
87
+
88
+ #
89
+ # A lookup control that execute the query for a bunch of rows, registered as `:match`.
90
+ #
91
+ # Accept the `:buffer_size` option, default value is 10 000.
92
+ # Accept the `:merge` option, mandatory an array with two entries:
93
+ # - The first one is the looked up row key to match.
94
+ # - The second one is the buffer row key to match.
95
+ #
96
+ # Unless the `:buffered` lookup, this one abstracts the row matching logic by assuming that
97
+ # you want to merge rows based on a key couple e.g. primary / foreign keys:
98
+ #
99
+ # lookup :some_sqlite_database, :match, merge: [:id, :item], buffer_size: 42
100
+ # def merge_with_stuff(db, ids)
101
+ # db[:items_bis].where(item: ids)
102
+ # end
103
+ class MatchLookup < Ductr::ETL::BufferedTransform
104
+ # The looked up row key to match.
105
+ #
106
+ # _@return_ — The column name
107
+ def from_key: () -> Symbol
108
+
109
+ # The buffer row key to match.
110
+ #
111
+ # _@return_ — The column name
112
+ def to_key: () -> Symbol
113
+
114
+ # Opens the database if needed, calls the job's method and merges
115
+ # the looked up rows with corresponding buffer rows.
116
+ def on_flush: () ?{ (::Hash[Symbol, Object] row) -> void } -> void
117
+
118
+ # Find the corresponding row into the buffer.
119
+ #
120
+ # _@param_ `row` — The looked up row
121
+ #
122
+ # _@return_ — the matching row if exists
123
+ def buffer_find: (::Hash[Symbol, Object] row) -> ::Hash[Symbol, Object]?
124
+
125
+ # Maps the buffer keys into an array.
126
+ #
127
+ # _@return_ — The keys array
128
+ def buffer_keys: () -> ::Array[(Integer | String)]
129
+ end
130
+
131
+ #
132
+ # A lookup control that execute the query for a bunch of rows, registered as `:buffered`.
133
+ # Accept the `:buffer_size` option, default value is 10 000.
134
+ # You have to implement your own row matching logic:
135
+ #
136
+ # lookup :some_sqlite_database, :buffered, buffer_size: 42
137
+ # def my_lookup(db, buffer, &)
138
+ # ids = buffer.map {|row| row[:id]}
139
+ # db[:items].where(item: ids).each do |row|
140
+ # match = buffer.find { |r| r[:id] == row[:item] }
141
+ #
142
+ # next yield(row) unless match
143
+ #
144
+ # yield(row.merge match)
145
+ # end
146
+ # end
147
+ class BufferedLookup < Ductr::ETL::BufferedTransform
148
+ # Opens the database if needed, calls the job's method and pass the each block to it.
149
+ def on_flush: () -> void
150
+ end
151
+
152
+ #
153
+ # The rufus-scheduler handler class.
154
+ # @see https://github.com/jmettraux/rufus-scheduler#scheduling-handler-instances
155
+ # For further information
156
+ class PollingHandler
157
+ # sord warn - Ductr::Adapter wasn't able to be resolved to a constant in this project
158
+ # Creates the handler based on the given scheduler, its method name and the trigger's adapter instance.
159
+ #
160
+ # _@param_ `method` — The scheduler's method
161
+ #
162
+ # _@param_ `adapter` — The trigger's adapter
163
+ def initialize: (Method method, Ductr::Adapter adapter) -> void
164
+
165
+ # The callable method used by the trigger, actually calls the scheduler's method.
166
+ def call: () -> void
167
+ end
168
+
169
+ #
170
+ # A trigger based on the RufusTrigger, runs the PollingHandler at the given timing.
171
+ # The handler calls the scheduler's method with a block which compares the yield result with the previous one.
172
+ # If they are different, yield returns true:
173
+ #
174
+ # trigger :my_database, :polling, interval: "1min"
175
+ # def check_timestamp(db) # will perform MyJob if the name have changed
176
+ # return unless yield(db[:items].select(:name).first)
177
+ #
178
+ # MyJob.perform_later
179
+ # end
180
+ class PollingTrigger < Ductr::RufusTrigger
181
+ # Closes the connection if the scheduler is stopped.
182
+ def stop: () -> void
183
+
184
+ # sord warn - Ductr::Scheduler wasn't able to be resolved to a constant in this project
185
+ # sord duck - #call looks like a duck type, replacing with untyped
186
+ # Returns a callable object, allowing rufus-scheduler to call it.
187
+ #
188
+ # _@param_ `scheduler` — The scheduler instance
189
+ #
190
+ # _@param_ `method_name` — The scheduler's method name
191
+ #
192
+ # _@param_ `**` — The option passed to the trigger annotation
193
+ #
194
+ # _@return_ — A callable object
195
+ def callable: (Ductr::Scheduler scheduler, Symbol method_name) -> untyped
196
+ end
197
+
198
+ #
199
+ # A source control that allows to select a big number of rows by relying on pagination, registered as `:paginated`.
200
+ # Accept the `:page_size` option, default value is 10 000.
201
+ #
202
+ # source :some_sqlite_database, :paginated, page_size: 4
203
+ # def my_source(db, offset, limit)
204
+ # db[:items].offset(offset).limit(limit)
205
+ # end
206
+ #
207
+ # Ensure to not select more rows than the configured page size,
208
+ # otherwise it will raise an `InconsistentPaginationError`.
209
+ class PaginatedSource < Ductr::ETL::PaginatedSource
210
+ # Calls the job's method and iterate on the query result.
211
+ # Returns true if the page is full, false otherwise.
212
+ #
213
+ # _@return_ — True if the page is full, false otherwise.
214
+ def each_page: () -> bool
215
+ end
216
+
217
+ #
218
+ # A destination control that write rows one by one, registered as `:basic`:
219
+ #
220
+ # destination :some_sqlite_database, :basic
221
+ # def my_destination(row, db)
222
+ # db[:items].insert(row)
223
+ # end
224
+ class BasicDestination < Ductr::ETL::Destination
225
+ # Opens the database if needed and call the job's method to insert one row at time.
226
+ #
227
+ # _@param_ `row` — The row to insert, preferably a Hash
228
+ def write: (::Hash[Symbol, Object] row) -> void
229
+ end
230
+
231
+ #
232
+ # A destination control that accumulates rows in a buffer to write them by batch, registered as `:buffered`.
233
+ # Accept the `:buffer_size` option, default value is 10 000:
234
+ #
235
+ # destination :some_sqlite_database, :buffered, buffer_size: 42
236
+ # def my_destination(buffer, db)
237
+ # db[:items].multi_insert(buffer)
238
+ # end
239
+ #
240
+ # @see more Ductr::ETL::BufferedDestination
241
+ class BufferedDestination < Ductr::ETL::BufferedDestination
242
+ # Open the database if needed and call the job's method to run the query.
243
+ def on_flush: () -> void
244
+ end
245
+
246
+ #
247
+ # A destination control that accumulates rows in a buffer to upsert them by batch, registered as `:buffered_upsert`.
248
+ # Accept the `:buffer_size` option, default value is 10 000:
249
+ #
250
+ # destination :some_sqlite_database, :buffered_upsert, buffer_size: 42
251
+ # def my_destination(buffer, excluded, db)
252
+ # db[:items].insert_conflict(target: :id, update: excluded).multi_insert(buffer)
253
+ # end
254
+ #
255
+ # @see more Ductr::ETL::BufferedDestination
256
+ class BufferedUpsertDestination < Ductr::ETL::BufferedDestination
257
+ # Open the database if needed and call the job's method to run the query.
258
+ def on_flush: () -> void
259
+
260
+ # sord warn - Sequel::SQL::QualifiedIdentifier wasn't able to be resolved to a constant in this project
261
+ # Generate the excluded keys hash e.g.
262
+ #
263
+ # ```ruby
264
+ # {a: Sequel[:excluded][:a]}
265
+ # ```
266
+ #
267
+ # _@return_ — The excluded keys hash
268
+ def excluded: () -> ::Hash[Symbol, Sequel::SQL::QualifiedIdentifier]
269
+ end
270
+ end
271
+ end