ductr-sqlite 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ module SQLite
5
+ #
6
+ # A destination control that accumulates rows in a buffer to upsert them by batch, registered as `:buffered_upsert`.
7
+ # Accept the `:buffer_size` option, default value is 10 000:
8
+ #
9
+ # destination :some_sqlite_database, :buffered_upsert, buffer_size: 42
10
+ # def my_destination(buffer, excluded, db)
11
+ # db[:items].insert_conflict(target: :id, update: excluded).multi_insert(buffer)
12
+ # end
13
+ #
14
+ # @see more Ductr::ETL::BufferedDestination
15
+ #
16
+ class BufferedUpsertDestination < Ductr::ETL::BufferedDestination
17
+ Adapter.destination_registry.add(self, as: :buffered_upsert)
18
+
19
+ #
20
+ # Open the database if needed and call the job's method to run the query.
21
+ #
22
+ # @return [void]
23
+ #
24
+ def on_flush
25
+ call_method(buffer, excluded, adapter.db)
26
+ end
27
+
28
+ private
29
+
30
+ #
31
+ # Generate the excluded keys hash e.g.
32
+ #
33
+ # ```ruby
34
+ # {a: Sequel[:excluded][:a]}
35
+ # ```
36
+ #
37
+ # @return [Hash<Symbol, Sequel::SQL::QualifiedIdentifier>] The excluded keys hash
38
+ #
39
+ def excluded
40
+ keys = buffer.first.keys
41
+
42
+ excluded_keys = keys.map do |key|
43
+ Sequel[:excluded][key]
44
+ end
45
+
46
+ keys.zip(excluded_keys).to_h
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ module SQLite
5
+ #
6
+ # A lookup control that execute the query for a bunch of rows, registered as `:match`.
7
+ #
8
+ # Accept the `:buffer_size` option, default value is 10 000.
9
+ # Accept the `:merge` option, mandatory an array with two entries:
10
+ # - The first one is the looked up row key to match.
11
+ # - The second one is the buffer row key to match.
12
+ #
13
+ # Unless the `:buffered` lookup, this one abstracts the row matching logic by assuming that
14
+ # you want to merge rows based on a key couple e.g. primary / foreign keys:
15
+ #
16
+ # lookup :some_sqlite_database, :match, merge: [:id, :item], buffer_size: 42
17
+ # def merge_with_stuff(db, ids)
18
+ # db[:items_bis].where(item: ids)
19
+ # end
20
+ #
21
+ class MatchLookup < Ductr::ETL::BufferedTransform
22
+ Adapter.lookup_registry.add(self, as: :match)
23
+
24
+ #
25
+ # The looked up row key to match.
26
+ #
27
+ # @return [Symbol] The column name
28
+ #
29
+ def from_key
30
+ @options[:merge].first
31
+ end
32
+
33
+ #
34
+ # The buffer row key to match.
35
+ #
36
+ # @return [Symbol] The column name
37
+ #
38
+ def to_key
39
+ @options[:merge].last
40
+ end
41
+
42
+ #
43
+ # Opens the database if needed, calls the job's method and merges
44
+ # the looked up rows with corresponding buffer rows.
45
+ #
46
+ # @yield [row] The each block
47
+ # @yieldparam [Hash<Symbol, Object>] row The merged row
48
+ #
49
+ # @return [void]
50
+ #
51
+ def on_flush(&)
52
+ call_method(adapter.db, buffer_keys).each do |row|
53
+ match = buffer_find(row)
54
+ next yield(row) unless match
55
+
56
+ yield(row.merge match)
57
+ end
58
+ end
59
+
60
+ private
61
+
62
+ #
63
+ # Find the corresponding row into the buffer.
64
+ #
65
+ # @param [Hash<Symbol, Object>] row The looked up row
66
+ #
67
+ # @return [Hash<Symbol, Object>, nil] the matching row if exists
68
+ #
69
+ def buffer_find(row)
70
+ buffer.find { |r| r[from_key] == row[to_key] }
71
+ end
72
+
73
+ #
74
+ # Maps the buffer keys into an array.
75
+ #
76
+ # @return [Array<Integer, String>] The keys array
77
+ #
78
+ def buffer_keys
79
+ buffer.map { |row| row[from_key] }
80
+ end
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ module SQLite
5
+ #
6
+ # A source control that allows to select a big number of rows by relying on pagination, registered as `:paginated`.
7
+ # Accept the `:page_size` option, default value is 10 000.
8
+ #
9
+ # source :some_sqlite_database, :paginated, page_size: 4
10
+ # def my_source(db, offset, limit)
11
+ # db[:items].offset(offset).limit(limit)
12
+ # end
13
+ #
14
+ # Ensure to not select more rows than the configured page size,
15
+ # otherwise it will raise an `InconsistentPaginationError`.
16
+ #
17
+ class PaginatedSource < Ductr::ETL::PaginatedSource
18
+ Adapter.source_registry.add(self, as: :paginated)
19
+
20
+ #
21
+ # Calls the job's method and iterate on the query result.
22
+ # Returns true if the page is full, false otherwise.
23
+ #
24
+ # @yield The each block
25
+ #
26
+ # @raise [InconsistentPaginationError] When the query return more rows than the page size
27
+ # @return [Boolean] True if the page is full, false otherwise.
28
+ #
29
+ def each_page(&)
30
+ rows_count = 0
31
+
32
+ call_method(adapter.db, @offset, page_size).each do |row|
33
+ yield(row)
34
+ rows_count += 1
35
+ end
36
+
37
+ if rows_count > page_size
38
+ raise InconsistentPaginationError,
39
+ "The query returned #{rows_count} rows but the page size is #{page_size} rows"
40
+ end
41
+
42
+ rows_count == page_size
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ module SQLite
5
+ #
6
+ # The rufus-scheduler handler class.
7
+ # @see https://github.com/jmettraux/rufus-scheduler#scheduling-handler-instances
8
+ # For further information
9
+ #
10
+ class PollingHandler
11
+ #
12
+ # Creates the handler based on the given scheduler, its method name and the trigger's adapter instance.
13
+ #
14
+ # @param [Method] method The scheduler's method
15
+ # @param [Ductr::Adapter] adapter The trigger's adapter
16
+ #
17
+ def initialize(method, adapter)
18
+ @method = method
19
+ @adapter = adapter
20
+ @last_triggering_key = nil
21
+ end
22
+
23
+ #
24
+ # The callable method used by the trigger, actually calls the scheduler's method.
25
+ #
26
+ # @return [void]
27
+ #
28
+ def call
29
+ @adapter.open do |db|
30
+ @method.call(db) do |triggering_key|
31
+ return false if triggering_key == @last_triggering_key
32
+
33
+ @last_triggering_key = triggering_key
34
+ true
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ module SQLite
5
+ #
6
+ # A trigger based on the RufusTrigger, runs the PollingHandler at the given timing.
7
+ # The handler calls the scheduler's method with a block which compares the yield result with the previous one.
8
+ # If they are different, yield returns true:
9
+ #
10
+ # trigger :my_database, :polling, interval: "1min"
11
+ # def check_timestamp(db) # will perform MyJob if the name have changed
12
+ # return unless yield(db[:items].select(:name).first)
13
+ #
14
+ # MyJob.perform_later
15
+ # end
16
+ #
17
+ class PollingTrigger < Ductr::RufusTrigger
18
+ Adapter.trigger_registry.add(self, as: :polling)
19
+
20
+ #
21
+ # Closes the connection if the scheduler is stopped.
22
+ #
23
+ # @return [void]
24
+ #
25
+ def stop
26
+ super
27
+ adapter.close!
28
+ end
29
+
30
+ private
31
+
32
+ #
33
+ # Returns a callable object, allowing rufus-scheduler to call it.
34
+ #
35
+ # @param [Ductr::Scheduler] scheduler The scheduler instance
36
+ # @param [Symbol] method_name The scheduler's method name
37
+ # @param [Hash] ** The option passed to the trigger annotation
38
+ #
39
+ # @return [#call] A callable object
40
+ #
41
+ def callable(scheduler, method_name, **)
42
+ PollingHandler.new(scheduler, method_name, adapter)
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ductr
4
+ module SQLite
5
+ # @return [String] VERSION Gem's version
6
+ VERSION = "0.1.1"
7
+ end
8
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ductr"
4
+ require "sequel"
5
+
6
+ Dir[File.join(__dir__, "sqlite", "*.rb")].each { |file| require file }
7
+
8
+ # :nodoc:
9
+ module Ductr
10
+ #
11
+ # ## SQLite adapter for Ductr ETL
12
+ # This gem provides useful controls to operate Ductr ETL with SQLite databases.
13
+ #
14
+ # To get details about the database connection handling, checkout the {Ductr::SQLite::Adapter} class.
15
+ #
16
+ # ### Sources
17
+ # - {Ductr::SQLite::BasicSource} Yields rows one by one.
18
+ # - {Ductr::SQLite::PaginatedSource} Allows to select a big number of rows by relying on pagination.
19
+ #
20
+ # ### Lookups
21
+ # - {Ductr::SQLite::BasicLookup} Executes one query per row and merge the looked up row with the current row.
22
+ # - {Ductr::SQLite::BufferedLookup} Executes one query for a bunch of rows and let you implement the matching logic.
23
+ # - {Ductr::SQLite::MatchLookup} Executes one query for a bunch of rows and abstracts the matching logic.
24
+ #
25
+ # ### Destinations
26
+ # - {Ductr::SQLite::BasicDestination} Writes rows one by one.
27
+ # - {Ductr::SQLite::BufferedDestination} Accumulates rows in a buffer to write them by batch.
28
+ # - {Ductr::SQLite::BufferedUpsertDestination} Accumulates rows in a buffer to upsert them by batch.
29
+ #
30
+ module SQLite; end
31
+ end
@@ -0,0 +1,271 @@
1
+ # :nodoc:
2
+ module Ductr
3
+ #
4
+ # ## SQLite adapter for Ductr ETL
5
+ # This gem provides useful controls to operate Ductr ETL with SQLite databases.
6
+ #
7
+ # To get details about the database connection handling, checkout the {Ductr::SQLite::Adapter} class.
8
+ #
9
+ # ### Sources
10
+ # - {Ductr::SQLite::BasicSource} Yields rows one by one.
11
+ # - {Ductr::SQLite::PaginatedSource} Allows to select a big number of rows by relying on pagination.
12
+ #
13
+ # ### Lookups
14
+ # - {Ductr::SQLite::BasicLookup} Executes one query per row and merge the looked up row with the current row.
15
+ # - {Ductr::SQLite::BufferedLookup} Executes one query for a bunch of rows and let you implement the matching logic.
16
+ # - {Ductr::SQLite::MatchLookup} Executes one query for a bunch of rows and abstracts the matching logic.
17
+ #
18
+ # ### Destinations
19
+ # - {Ductr::SQLite::BasicDestination} Writes rows one by one.
20
+ # - {Ductr::SQLite::BufferedDestination} Accumulates rows in a buffer to write them by batch.
21
+ # - {Ductr::SQLite::BufferedUpsertDestination} Accumulates rows in a buffer to upsert them by batch.
22
+ module SQLite
23
+ VERSION: String
24
+
25
+ #
26
+ # The SQLite adapter class implement the required #open! and #close! methods to handle the database connection.
27
+ # The adapter is registered as `:sqlite` to use it, add `adapter: sqlite` to the YAML configuration e.g.:
28
+ #
29
+ # ```yml
30
+ # # config/development.yml
31
+ # adapters:
32
+ # some_sqlite_database:
33
+ # adapter: "sqlite"
34
+ # database: "example.db"
35
+ # ```
36
+ class Adapter < Ductr::Adapter
37
+ # sord warn - Sequel::Database wasn't able to be resolved to a constant in this project
38
+ # Opens the database connection with the adapter's configuration.
39
+ #
40
+ # _@return_ — The database connection instance
41
+ def open!: () -> Sequel::Database
42
+
43
+ # Closes the database connection.
44
+ # In the specific case of SQLite, we just destroy the connection instance.
45
+ def close!: () -> void
46
+
47
+ # sord warn - Sequel::Database wasn't able to be resolved to a constant in this project
48
+ # _@return_ — The database connection instance
49
+ attr_reader db: Sequel::Database?
50
+ end
51
+
52
+ #
53
+ # A lookup control that execute one query per row, registered as `:basic`.
54
+ # The job's method must return a row which will merged with the current row:
55
+ #
56
+ # lookup :some_sqlite_database, :basic
57
+ # def my_lookup(row, db)
58
+ # db[:items_bis].where(item: row[:id]).limit(1)
59
+ # end
60
+ #
61
+ # As the control merge the looked up row with the current row,
62
+ # ensure that column names are different or they will be overwritten.
63
+ #
64
+ # If the lookup returns a falsy value, nothing won't be merged with the current row.
65
+ class BasicLookup < Ductr::ETL::Transform
66
+ # Calls the job's method to merge its result with the current row.
67
+ #
68
+ # _@param_ `row` — The current row, preferably a Hash
69
+ #
70
+ # _@return_ — The row merged with looked up row or the untouched row if nothing was found
71
+ def process: (::Hash[Symbol, Object] row) -> ::Hash[Symbol, Object]
72
+ end
73
+
74
+ #
75
+ # A source control that yields rows one by one, registered as `:basic`:
76
+ #
77
+ # source :some_sqlite_database, :basic
78
+ # def select_some_stuff(db)
79
+ # db[:items].limit(42)
80
+ # end
81
+ #
82
+ # Do not try to select a large number of rows, as they will all be loaded into memory.
83
+ class BasicSource < Ductr::ETL::Source
84
+ # Opens the database, calls the job's method and iterate over the query results.
85
+ def each: () -> void
86
+ end
87
+
88
+ #
89
+ # A lookup control that execute the query for a bunch of rows, registered as `:match`.
90
+ #
91
+ # Accept the `:buffer_size` option, default value is 10 000.
92
+ # Accept the `:merge` option, mandatory an array with two entries:
93
+ # - The first one is the looked up row key to match.
94
+ # - The second one is the buffer row key to match.
95
+ #
96
+ # Unless the `:buffered` lookup, this one abstracts the row matching logic by assuming that
97
+ # you want to merge rows based on a key couple e.g. primary / foreign keys:
98
+ #
99
+ # lookup :some_sqlite_database, :match, merge: [:id, :item], buffer_size: 42
100
+ # def merge_with_stuff(db, ids)
101
+ # db[:items_bis].where(item: ids)
102
+ # end
103
+ class MatchLookup < Ductr::ETL::BufferedTransform
104
+ # The looked up row key to match.
105
+ #
106
+ # _@return_ — The column name
107
+ def from_key: () -> Symbol
108
+
109
+ # The buffer row key to match.
110
+ #
111
+ # _@return_ — The column name
112
+ def to_key: () -> Symbol
113
+
114
+ # Opens the database if needed, calls the job's method and merges
115
+ # the looked up rows with corresponding buffer rows.
116
+ def on_flush: () ?{ (::Hash[Symbol, Object] row) -> void } -> void
117
+
118
+ # Find the corresponding row into the buffer.
119
+ #
120
+ # _@param_ `row` — The looked up row
121
+ #
122
+ # _@return_ — the matching row if exists
123
+ def buffer_find: (::Hash[Symbol, Object] row) -> ::Hash[Symbol, Object]?
124
+
125
+ # Maps the buffer keys into an array.
126
+ #
127
+ # _@return_ — The keys array
128
+ def buffer_keys: () -> ::Array[(Integer | String)]
129
+ end
130
+
131
+ #
132
+ # A lookup control that execute the query for a bunch of rows, registered as `:buffered`.
133
+ # Accept the `:buffer_size` option, default value is 10 000.
134
+ # You have to implement your own row matching logic:
135
+ #
136
+ # lookup :some_sqlite_database, :buffered, buffer_size: 42
137
+ # def my_lookup(db, buffer, &)
138
+ # ids = buffer.map {|row| row[:id]}
139
+ # db[:items].where(item: ids).each do |row|
140
+ # match = buffer.find { |r| r[:id] == row[:item] }
141
+ #
142
+ # next yield(row) unless match
143
+ #
144
+ # yield(row.merge match)
145
+ # end
146
+ # end
147
+ class BufferedLookup < Ductr::ETL::BufferedTransform
148
+ # Opens the database if needed, calls the job's method and pass the each block to it.
149
+ def on_flush: () -> void
150
+ end
151
+
152
+ #
153
+ # The rufus-scheduler handler class.
154
+ # @see https://github.com/jmettraux/rufus-scheduler#scheduling-handler-instances
155
+ # For further information
156
+ class PollingHandler
157
+ # sord warn - Ductr::Adapter wasn't able to be resolved to a constant in this project
158
+ # Creates the handler based on the given scheduler, its method name and the trigger's adapter instance.
159
+ #
160
+ # _@param_ `method` — The scheduler's method
161
+ #
162
+ # _@param_ `adapter` — The trigger's adapter
163
+ def initialize: (Method method, Ductr::Adapter adapter) -> void
164
+
165
+ # The callable method used by the trigger, actually calls the scheduler's method.
166
+ def call: () -> void
167
+ end
168
+
169
+ #
170
+ # A trigger based on the RufusTrigger, runs the PollingHandler at the given timing.
171
+ # The handler calls the scheduler's method with a block which compares the yield result with the previous one.
172
+ # If they are different, yield returns true:
173
+ #
174
+ # trigger :my_database, :polling, interval: "1min"
175
+ # def check_timestamp(db) # will perform MyJob if the name have changed
176
+ # return unless yield(db[:items].select(:name).first)
177
+ #
178
+ # MyJob.perform_later
179
+ # end
180
+ class PollingTrigger < Ductr::RufusTrigger
181
+ # Closes the connection if the scheduler is stopped.
182
+ def stop: () -> void
183
+
184
+ # sord warn - Ductr::Scheduler wasn't able to be resolved to a constant in this project
185
+ # sord duck - #call looks like a duck type, replacing with untyped
186
+ # Returns a callable object, allowing rufus-scheduler to call it.
187
+ #
188
+ # _@param_ `scheduler` — The scheduler instance
189
+ #
190
+ # _@param_ `method_name` — The scheduler's method name
191
+ #
192
+ # _@param_ `**` — The option passed to the trigger annotation
193
+ #
194
+ # _@return_ — A callable object
195
+ def callable: (Ductr::Scheduler scheduler, Symbol method_name) -> untyped
196
+ end
197
+
198
+ #
199
+ # A source control that allows to select a big number of rows by relying on pagination, registered as `:paginated`.
200
+ # Accept the `:page_size` option, default value is 10 000.
201
+ #
202
+ # source :some_sqlite_database, :paginated, page_size: 4
203
+ # def my_source(db, offset, limit)
204
+ # db[:items].offset(offset).limit(limit)
205
+ # end
206
+ #
207
+ # Ensure to not select more rows than the configured page size,
208
+ # otherwise it will raise an `InconsistentPaginationError`.
209
+ class PaginatedSource < Ductr::ETL::PaginatedSource
210
+ # Calls the job's method and iterate on the query result.
211
+ # Returns true if the page is full, false otherwise.
212
+ #
213
+ # _@return_ — True if the page is full, false otherwise.
214
+ def each_page: () -> bool
215
+ end
216
+
217
+ #
218
+ # A destination control that write rows one by one, registered as `:basic`:
219
+ #
220
+ # destination :some_sqlite_database, :basic
221
+ # def my_destination(row, db)
222
+ # db[:items].insert(row)
223
+ # end
224
+ class BasicDestination < Ductr::ETL::Destination
225
+ # Opens the database if needed and call the job's method to insert one row at time.
226
+ #
227
+ # _@param_ `row` — The row to insert, preferably a Hash
228
+ def write: (::Hash[Symbol, Object] row) -> void
229
+ end
230
+
231
+ #
232
+ # A destination control that accumulates rows in a buffer to write them by batch, registered as `:buffered`.
233
+ # Accept the `:buffer_size` option, default value is 10 000:
234
+ #
235
+ # destination :some_sqlite_database, :buffered, buffer_size: 42
236
+ # def my_destination(buffer, db)
237
+ # db[:items].multi_insert(buffer)
238
+ # end
239
+ #
240
+ # @see more Ductr::ETL::BufferedDestination
241
+ class BufferedDestination < Ductr::ETL::BufferedDestination
242
+ # Open the database if needed and call the job's method to run the query.
243
+ def on_flush: () -> void
244
+ end
245
+
246
+ #
247
+ # A destination control that accumulates rows in a buffer to upsert them by batch, registered as `:buffered_upsert`.
248
+ # Accept the `:buffer_size` option, default value is 10 000:
249
+ #
250
+ # destination :some_sqlite_database, :buffered_upsert, buffer_size: 42
251
+ # def my_destination(buffer, excluded, db)
252
+ # db[:items].insert_conflict(target: :id, update: excluded).multi_insert(buffer)
253
+ # end
254
+ #
255
+ # @see more Ductr::ETL::BufferedDestination
256
+ class BufferedUpsertDestination < Ductr::ETL::BufferedDestination
257
+ # Open the database if needed and call the job's method to run the query.
258
+ def on_flush: () -> void
259
+
260
+ # sord warn - Sequel::SQL::QualifiedIdentifier wasn't able to be resolved to a constant in this project
261
+ # Generate the excluded keys hash e.g.
262
+ #
263
+ # ```ruby
264
+ # {a: Sequel[:excluded][:a]}
265
+ # ```
266
+ #
267
+ # _@return_ — The excluded keys hash
268
+ def excluded: () -> ::Hash[Symbol, Sequel::SQL::QualifiedIdentifier]
269
+ end
270
+ end
271
+ end