RubyGems - ductr-sqlite - Versions diffs - 0.1.1 - Mend

ductr-sqlite 0.1.1

Files changed (28) hide show

checksums.yaml +7 -0
data/.rspec +3 -0
data/.rubocop.yml +24 -0
data/.vscode/settings.json +22 -0
data/.yardopts +1 -0
data/CODE_OF_CONDUCT.md +84 -0
data/COPYING +674 -0
data/COPYING.LESSER +165 -0
data/Gemfile +6 -0
data/Gemfile.lock +135 -0
data/README.md +63 -0
data/Rakefile +38 -0
data/ductr-sqlite.gemspec +48 -0
data/lib/ductr/sqlite/adapter.rb +43 -0
data/lib/ductr/sqlite/basic_destination.rb +28 -0
data/lib/ductr/sqlite/basic_lookup.rb +37 -0
data/lib/ductr/sqlite/basic_source.rb +30 -0
data/lib/ductr/sqlite/buffered_destination.rb +29 -0
data/lib/ductr/sqlite/buffered_lookup.rb +37 -0
data/lib/ductr/sqlite/buffered_upsert_destination.rb +50 -0
data/lib/ductr/sqlite/match_lookup.rb +83 -0
data/lib/ductr/sqlite/paginated_source.rb +46 -0
data/lib/ductr/sqlite/polling_handler.rb +40 -0
data/lib/ductr/sqlite/polling_trigger.rb +46 -0
data/lib/ductr/sqlite/version.rb +8 -0
data/lib/ductr/sqlite.rb +31 -0
data/sig/ductr/sqlite.rbs +271 -0
metadata +242 -0

data/lib/ductr/sqlite/buffered_upsert_destination.rb ADDED Viewed

@@ -0,0 +1,50 @@
+# frozen_string_literal: true
+module Ductr
+  module SQLite
+    #
+    # A destination control that accumulates rows in a buffer to upsert them by batch, registered as `:buffered_upsert`.
+    # Accept the `:buffer_size` option, default value is 10 000:
+    #
+    #   destination :some_sqlite_database, :buffered_upsert, buffer_size: 42
+    #   def my_destination(buffer, excluded, db)
+    #     db[:items].insert_conflict(target: :id, update: excluded).multi_insert(buffer)
+    #   end
+    #
+    # @see more Ductr::ETL::BufferedDestination
+    #
+    class BufferedUpsertDestination < Ductr::ETL::BufferedDestination
+      Adapter.destination_registry.add(self, as: :buffered_upsert)
+      #
+      # Open the database if needed and call the job's method to run the query.
+      #
+      # @return [void]
+      #
+      def on_flush
+        call_method(buffer, excluded, adapter.db)
+      end
+      private
+      #
+      # Generate the excluded keys hash e.g.
+      #
+      # ```ruby
+      # {a: Sequel[:excluded][:a]}
+      # ```
+      #
+      # @return [Hash<Symbol, Sequel::SQL::QualifiedIdentifier>] The excluded keys hash
+      #
+      def excluded
+        keys = buffer.first.keys
+        excluded_keys = keys.map do |key|
+          Sequel[:excluded][key]
+        end
+        keys.zip(excluded_keys).to_h
+      end
+    end
+  end
+end

data/lib/ductr/sqlite/match_lookup.rb ADDED Viewed

@@ -0,0 +1,83 @@
+# frozen_string_literal: true
+module Ductr
+  module SQLite
+    #
+    # A lookup control that execute the query for a bunch of rows, registered as `:match`.
+    #
+    # Accept the `:buffer_size` option, default value is 10 000.
+    # Accept the `:merge` option, mandatory an array with two entries:
+    # - The first one is the looked up row key to match.
+    # - The second one is the buffer row key to match.
+    #
+    # Unless the `:buffered` lookup, this one abstracts the row matching logic by assuming that
+    # you want to merge rows based on a key couple e.g. primary / foreign keys:
+    #
+    #   lookup :some_sqlite_database, :match, merge: [:id, :item], buffer_size: 42
+    #   def merge_with_stuff(db, ids)
+    #     db[:items_bis].where(item: ids)
+    #   end
+    #
+    class MatchLookup < Ductr::ETL::BufferedTransform
+      Adapter.lookup_registry.add(self, as: :match)
+      #
+      # The looked up row key to match.
+      #
+      # @return [Symbol] The column name
+      #
+      def from_key
+        @options[:merge].first
+      end
+      #
+      # The buffer row key to match.
+      #
+      # @return [Symbol] The column name
+      #
+      def to_key
+        @options[:merge].last
+      end
+      #
+      # Opens the database if needed, calls the job's method and merges
+      # the looked up rows with corresponding buffer rows.
+      #
+      # @yield [row] The each block
+      # @yieldparam [Hash<Symbol, Object>] row The merged row
+      #
+      # @return [void]
+      #
+      def on_flush(&)
+        call_method(adapter.db, buffer_keys).each do |row|
+          match = buffer_find(row)
+          next yield(row) unless match
+          yield(row.merge match)
+        end
+      end
+      private
+      #
+      # Find the corresponding row into the buffer.
+      #
+      # @param [Hash<Symbol, Object>] row The looked up row
+      #
+      # @return [Hash<Symbol, Object>, nil] the matching row if exists
+      #
+      def buffer_find(row)
+        buffer.find { |r| r[from_key] == row[to_key] }
+      end
+      #
+      # Maps the buffer keys into an array.
+      #
+      # @return [Array<Integer, String>] The keys array
+      #
+      def buffer_keys
+        buffer.map { |row| row[from_key] }
+      end
+    end
+  end
+end

data/lib/ductr/sqlite/paginated_source.rb ADDED Viewed

@@ -0,0 +1,46 @@
+# frozen_string_literal: true
+module Ductr
+  module SQLite
+    #
+    # A source control that allows to select a big number of rows by relying on pagination, registered as `:paginated`.
+    # Accept the `:page_size` option, default value is 10 000.
+    #
+    #   source :some_sqlite_database, :paginated, page_size: 4
+    #   def my_source(db, offset, limit)
+    #     db[:items].offset(offset).limit(limit)
+    #   end
+    #
+    # Ensure to not select more rows than the configured page size,
+    # otherwise it will raise an `InconsistentPaginationError`.
+    #
+    class PaginatedSource < Ductr::ETL::PaginatedSource
+      Adapter.source_registry.add(self, as: :paginated)
+      #
+      # Calls the job's method and iterate on the query result.
+      # Returns true if the page is full, false otherwise.
+      #
+      # @yield The each block
+      #
+      # @raise [InconsistentPaginationError] When the query return more rows than the page size
+      # @return [Boolean] True if the page is full, false otherwise.
+      #
+      def each_page(&)
+        rows_count = 0
+        call_method(adapter.db, @offset, page_size).each do |row|
+          yield(row)
+          rows_count += 1
+        end
+        if rows_count > page_size
+          raise InconsistentPaginationError,
+                "The query returned #{rows_count} rows but the page size is #{page_size} rows"
+        end
+        rows_count == page_size
+      end
+    end
+  end
+end

data/lib/ductr/sqlite/polling_handler.rb ADDED Viewed

@@ -0,0 +1,40 @@
+# frozen_string_literal: true
+module Ductr
+  module SQLite
+    #
+    # The rufus-scheduler handler class.
+    # @see https://github.com/jmettraux/rufus-scheduler#scheduling-handler-instances
+    #   For further information
+    #
+    class PollingHandler
+      #
+      # Creates the handler based on the given scheduler, its method name and the trigger's adapter instance.
+      #
+      # @param [Method] method The scheduler's method
+      # @param [Ductr::Adapter] adapter The trigger's adapter
+      #
+      def initialize(method, adapter)
+        @method = method
+        @adapter = adapter
+        @last_triggering_key = nil
+      end
+      #
+      # The callable method used by the trigger, actually calls the scheduler's method.
+      #
+      # @return [void]
+      #
+      def call
+        @adapter.open do |db|
+          @method.call(db) do |triggering_key|
+            return false if triggering_key == @last_triggering_key
+            @last_triggering_key = triggering_key
+            true
+          end
+        end
+      end
+    end
+  end
+end

data/lib/ductr/sqlite/polling_trigger.rb ADDED Viewed

@@ -0,0 +1,46 @@
+# frozen_string_literal: true
+module Ductr
+  module SQLite
+    #
+    # A trigger based on the RufusTrigger, runs the PollingHandler at the given timing.
+    # The handler calls the scheduler's method with a block which compares the yield result with the previous one.
+    # If they are different, yield returns true:
+    #
+    #   trigger :my_database, :polling, interval: "1min"
+    #   def check_timestamp(db) # will perform MyJob if the name have changed
+    #     return unless yield(db[:items].select(:name).first)
+    #
+    #     MyJob.perform_later
+    #   end
+    #
+    class PollingTrigger < Ductr::RufusTrigger
+      Adapter.trigger_registry.add(self, as: :polling)
+      #
+      # Closes the connection if the scheduler is stopped.
+      #
+      # @return [void]
+      #
+      def stop
+        super
+        adapter.close!
+      end
+      private
+      #
+      # Returns a callable object, allowing rufus-scheduler to call it.
+      #
+      # @param [Ductr::Scheduler] scheduler The scheduler instance
+      # @param [Symbol] method_name The scheduler's method name
+      # @param [Hash] ** The option passed to the trigger annotation
+      #
+      # @return [#call] A callable object
+      #
+      def callable(scheduler, method_name, **)
+        PollingHandler.new(scheduler, method_name, adapter)
+      end
+    end
+  end
+end

data/lib/ductr/sqlite/version.rb ADDED Viewed

@@ -0,0 +1,8 @@
+# frozen_string_literal: true
+module Ductr
+  module SQLite
+    # @return [String] VERSION Gem's version
+    VERSION = "0.1.1"
+  end
+end

data/lib/ductr/sqlite.rb ADDED Viewed

@@ -0,0 +1,31 @@
+# frozen_string_literal: true
+require "ductr"
+require "sequel"
+Dir[File.join(__dir__, "sqlite", "*.rb")].each { |file| require file }
+# :nodoc:
+module Ductr
+  #
+  # ## SQLite adapter for Ductr ETL
+  # This gem provides useful controls to operate Ductr ETL with SQLite databases.
+  #
+  # To get details about the database connection handling, checkout the {Ductr::SQLite::Adapter} class.
+  #
+  # ### Sources
+  # - {Ductr::SQLite::BasicSource} Yields rows one by one.
+  # - {Ductr::SQLite::PaginatedSource} Allows to select a big number of rows by relying on pagination.
+  #
+  # ### Lookups
+  # - {Ductr::SQLite::BasicLookup} Executes one query per row and merge the looked up row with the current row.
+  # - {Ductr::SQLite::BufferedLookup} Executes one query for a bunch of rows and let you implement the matching logic.
+  # - {Ductr::SQLite::MatchLookup} Executes one query for a bunch of rows and abstracts the matching logic.
+  #
+  # ### Destinations
+  # - {Ductr::SQLite::BasicDestination} Writes rows one by one.
+  # - {Ductr::SQLite::BufferedDestination} Accumulates rows in a buffer to write them by batch.
+  # - {Ductr::SQLite::BufferedUpsertDestination} Accumulates rows in a buffer to upsert them by batch.
+  #
+  module SQLite; end
+end

data/sig/ductr/sqlite.rbs ADDED Viewed

@@ -0,0 +1,271 @@
+# :nodoc:
+module Ductr
+  #
+  # ## SQLite adapter for Ductr ETL
+  # This gem provides useful controls to operate Ductr ETL with SQLite databases.
+  #
+  # To get details about the database connection handling, checkout the {Ductr::SQLite::Adapter} class.
+  #
+  # ### Sources
+  # - {Ductr::SQLite::BasicSource} Yields rows one by one.
+  # - {Ductr::SQLite::PaginatedSource} Allows to select a big number of rows by relying on pagination.
+  #
+  # ### Lookups
+  # - {Ductr::SQLite::BasicLookup} Executes one query per row and merge the looked up row with the current row.
+  # - {Ductr::SQLite::BufferedLookup} Executes one query for a bunch of rows and let you implement the matching logic.
+  # - {Ductr::SQLite::MatchLookup} Executes one query for a bunch of rows and abstracts the matching logic.
+  #
+  # ### Destinations
+  # - {Ductr::SQLite::BasicDestination} Writes rows one by one.
+  # - {Ductr::SQLite::BufferedDestination} Accumulates rows in a buffer to write them by batch.
+  # - {Ductr::SQLite::BufferedUpsertDestination} Accumulates rows in a buffer to upsert them by batch.
+  module SQLite
+    VERSION: String
+    #
+    # The SQLite adapter class implement the required #open! and #close! methods to handle the database connection.
+    # The adapter is registered as `:sqlite` to use it, add `adapter: sqlite` to the YAML configuration e.g.:
+    #
+    # ```yml
+    # # config/development.yml
+    # adapters:
+    #   some_sqlite_database:
+    #     adapter: "sqlite"
+    #     database: "example.db"
+    # ```
+    class Adapter < Ductr::Adapter
+      # sord warn - Sequel::Database wasn't able to be resolved to a constant in this project
+      # Opens the database connection with the adapter's configuration.
+      #
+      # _@return_ — The database connection instance
+      def open!: () -> Sequel::Database
+      # Closes the database connection.
+      # In the specific case of SQLite, we just destroy the connection instance.
+      def close!: () -> void
+      # sord warn - Sequel::Database wasn't able to be resolved to a constant in this project
+      # _@return_ — The database connection instance
+      attr_reader db: Sequel::Database?
+    end
+    #
+    # A lookup control that execute one query per row, registered as `:basic`.
+    # The job's method must return a row which will merged with the current row:
+    #
+    #   lookup :some_sqlite_database, :basic
+    #   def my_lookup(row, db)
+    #     db[:items_bis].where(item: row[:id]).limit(1)
+    #   end
+    #
+    # As the control merge the looked up row with the current row,
+    # ensure that column names are different or they will be overwritten.
+    #
+    # If the lookup returns a falsy value, nothing won't be merged with the current row.
+    class BasicLookup < Ductr::ETL::Transform
+      # Calls the job's method to merge its result with the current row.
+      #
+      # _@param_ `row` — The current row, preferably a Hash
+      #
+      # _@return_ — The row merged with looked up row or the untouched row if nothing was found
+      def process: (::Hash[Symbol, Object] row) -> ::Hash[Symbol, Object]
+    end
+    #
+    # A source control that yields rows one by one, registered as `:basic`:
+    #
+    #   source :some_sqlite_database, :basic
+    #   def select_some_stuff(db)
+    #     db[:items].limit(42)
+    #   end
+    #
+    # Do not try to select a large number of rows, as they will all be loaded into memory.
+    class BasicSource < Ductr::ETL::Source
+      # Opens the database, calls the job's method and iterate over the query results.
+      def each: () -> void
+    end
+    #
+    # A lookup control that execute the query for a bunch of rows, registered as `:match`.
+    #
+    # Accept the `:buffer_size` option, default value is 10 000.
+    # Accept the `:merge` option, mandatory an array with two entries:
+    # - The first one is the looked up row key to match.
+    # - The second one is the buffer row key to match.
+    #
+    # Unless the `:buffered` lookup, this one abstracts the row matching logic by assuming that
+    # you want to merge rows based on a key couple e.g. primary / foreign keys:
+    #
+    #   lookup :some_sqlite_database, :match, merge: [:id, :item], buffer_size: 42
+    #   def merge_with_stuff(db, ids)
+    #     db[:items_bis].where(item: ids)
+    #   end
+    class MatchLookup < Ductr::ETL::BufferedTransform
+      # The looked up row key to match.
+      #
+      # _@return_ — The column name
+      def from_key: () -> Symbol
+      # The buffer row key to match.
+      #
+      # _@return_ — The column name
+      def to_key: () -> Symbol
+      # Opens the database if needed, calls the job's method and merges
+      # the looked up rows with corresponding buffer rows.
+      def on_flush: () ?{ (::Hash[Symbol, Object] row) -> void } -> void
+      # Find the corresponding row into the buffer.
+      #
+      # _@param_ `row` — The looked up row
+      #
+      # _@return_ — the matching row if exists
+      def buffer_find: (::Hash[Symbol, Object] row) -> ::Hash[Symbol, Object]?
+      # Maps the buffer keys into an array.
+      #
+      # _@return_ — The keys array
+      def buffer_keys: () -> ::Array[(Integer | String)]
+    end
+    #
+    # A lookup control that execute the query for a bunch of rows, registered as `:buffered`.
+    # Accept the `:buffer_size` option, default value is 10 000.
+    # You have to implement your own row matching logic:
+    #
+    #   lookup :some_sqlite_database, :buffered, buffer_size: 42
+    #   def my_lookup(db, buffer, &)
+    #     ids = buffer.map {|row| row[:id]}
+    #     db[:items].where(item: ids).each do |row|
+    #       match = buffer.find { |r| r[:id] == row[:item] }
+    #
+    #       next yield(row) unless match
+    #
+    #       yield(row.merge match)
+    #     end
+    #   end
+    class BufferedLookup < Ductr::ETL::BufferedTransform
+      # Opens the database if needed, calls the job's method and pass the each block to it.
+      def on_flush: () -> void
+    end
+    #
+    # The rufus-scheduler handler class.
+    # @see https://github.com/jmettraux/rufus-scheduler#scheduling-handler-instances
+    #   For further information
+    class PollingHandler
+      # sord warn - Ductr::Adapter wasn't able to be resolved to a constant in this project
+      # Creates the handler based on the given scheduler, its method name and the trigger's adapter instance.
+      #
+      # _@param_ `method` — The scheduler's method
+      #
+      # _@param_ `adapter` — The trigger's adapter
+      def initialize: (Method method, Ductr::Adapter adapter) -> void
+      # The callable method used by the trigger, actually calls the scheduler's method.
+      def call: () -> void
+    end
+    #
+    # A trigger based on the RufusTrigger, runs the PollingHandler at the given timing.
+    # The handler calls the scheduler's method with a block which compares the yield result with the previous one.
+    # If they are different, yield returns true:
+    #
+    #   trigger :my_database, :polling, interval: "1min"
+    #   def check_timestamp(db) # will perform MyJob if the name have changed
+    #     return unless yield(db[:items].select(:name).first)
+    #
+    #     MyJob.perform_later
+    #   end
+    class PollingTrigger < Ductr::RufusTrigger
+      # Closes the connection if the scheduler is stopped.
+      def stop: () -> void
+      # sord warn - Ductr::Scheduler wasn't able to be resolved to a constant in this project
+      # sord duck - #call looks like a duck type, replacing with untyped
+      # Returns a callable object, allowing rufus-scheduler to call it.
+      #
+      # _@param_ `scheduler` — The scheduler instance
+      #
+      # _@param_ `method_name` — The scheduler's method name
+      #
+      # _@param_ `**` — The option passed to the trigger annotation
+      #
+      # _@return_ — A callable object
+      def callable: (Ductr::Scheduler scheduler, Symbol method_name) -> untyped
+    end
+    #
+    # A source control that allows to select a big number of rows by relying on pagination, registered as `:paginated`.
+    # Accept the `:page_size` option, default value is 10 000.
+    #
+    #   source :some_sqlite_database, :paginated, page_size: 4
+    #   def my_source(db, offset, limit)
+    #     db[:items].offset(offset).limit(limit)
+    #   end
+    #
+    # Ensure to not select more rows than the configured page size,
+    # otherwise it will raise an `InconsistentPaginationError`.
+    class PaginatedSource < Ductr::ETL::PaginatedSource
+      # Calls the job's method and iterate on the query result.
+      # Returns true if the page is full, false otherwise.
+      #
+      # _@return_ — True if the page is full, false otherwise.
+      def each_page: () -> bool
+    end
+    #
+    # A destination control that write rows one by one, registered as `:basic`:
+    #
+    #   destination :some_sqlite_database, :basic
+    #   def my_destination(row, db)
+    #     db[:items].insert(row)
+    #   end
+    class BasicDestination < Ductr::ETL::Destination
+      # Opens the database if needed and call the job's method to insert one row at time.
+      #
+      # _@param_ `row` — The row to insert, preferably a Hash
+      def write: (::Hash[Symbol, Object] row) -> void
+    end
+    #
+    # A destination control that accumulates rows in a buffer to write them by batch, registered as `:buffered`.
+    # Accept the `:buffer_size` option, default value is 10 000:
+    #
+    #   destination :some_sqlite_database, :buffered, buffer_size: 42
+    #   def my_destination(buffer, db)
+    #     db[:items].multi_insert(buffer)
+    #   end
+    #
+    # @see more Ductr::ETL::BufferedDestination
+    class BufferedDestination < Ductr::ETL::BufferedDestination
+      # Open the database if needed and call the job's method to run the query.
+      def on_flush: () -> void
+    end
+    #
+    # A destination control that accumulates rows in a buffer to upsert them by batch, registered as `:buffered_upsert`.
+    # Accept the `:buffer_size` option, default value is 10 000:
+    #
+    #   destination :some_sqlite_database, :buffered_upsert, buffer_size: 42
+    #   def my_destination(buffer, excluded, db)
+    #     db[:items].insert_conflict(target: :id, update: excluded).multi_insert(buffer)
+    #   end
+    #
+    # @see more Ductr::ETL::BufferedDestination
+    class BufferedUpsertDestination < Ductr::ETL::BufferedDestination
+      # Open the database if needed and call the job's method to run the query.
+      def on_flush: () -> void
+      # sord warn - Sequel::SQL::QualifiedIdentifier wasn't able to be resolved to a constant in this project
+      # Generate the excluded keys hash e.g.
+      #
+      # ```ruby
+      # {a: Sequel[:excluded][:a]}
+      # ```
+      #
+      # _@return_ — The excluded keys hash
+      def excluded: () -> ::Hash[Symbol, Sequel::SQL::QualifiedIdentifier]
+    end
+  end
+end