RubyGems - sequel-hexspace - Versions diffs - 1.0.0 - Mend

sequel-hexspace 1.0.0

Files changed (6) hide show

checksums.yaml +7 -0
data/LICENSE +18 -0
data/README +3 -0
data/lib/sequel/adapters/hexspace.rb +92 -0
data/lib/sequel/adapters/shared/spark.rb +467 -0
metadata +141 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA256:
+  metadata.gz: b2949511f39f19c48b5120965d8b236fe93215c07ee9cf987d7b80f81d649b81
+  data.tar.gz: 68b65e86ccc1ad3cc1fef989eb1d79b7927f96c4f4b5a60abb9699c7b425f4a4
+SHA512:
+  metadata.gz: 021db16089f80aee9a8398b54747265e6eedb59713b7fac36316d4623a324ce96036e532df0ac75171ec47a04ade882ee168c5c773e5487a7591567bfaab707c
+  data.tar.gz: 2767b9661167bc70fb775513f4abcb0465a77cc19c16f9bdb94972acd30896a310c3daeb6d8a34cfdd089ccf6c8bd6558bc459a9ae2eb3415c7cfd35b5d7273a

data/LICENSE ADDED Viewed

@@ -0,0 +1,18 @@
+Copyright (c) 2023 Jeremy Evans
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to
+deal in the Software without restriction, including without limitation the
+rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+sell copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/README ADDED Viewed

@@ -0,0 +1,3 @@
+This is a hexspace adapter for Sequel, designed to be used with Spark (not
+Hive). You can use the hexspace:// protocol in the Sequel connection URL
+to use this adapter.

data/lib/sequel/adapters/hexspace.rb ADDED Viewed

@@ -0,0 +1,92 @@
+require 'hexspace'
+require_relative 'shared/spark'
+module Sequel
+  module Hexspace
+    class Database < Sequel::Database
+      include Spark::DatabaseMethods
+      set_adapter_scheme :hexspace
+      ALLOWED_CLIENT_KEYWORDS = ::Hexspace::Client.instance_method(:initialize).parameters.map(&:last).freeze
+      def connect(server)
+        opts = server_opts(server)
+        opts[:username] = opts[:user]
+        opts.select!{|k,v| v.to_s != '' && ALLOWED_CLIENT_KEYWORDS.include?(k)}
+        ::Hexspace::Client.new(**opts)
+      end
+      def dataset_class_default
+        Dataset
+      end
+      def disconnect_connection(conn)
+        # Hexspace does not appear to support a disconnection method
+        # To keep tests happy, mark the connection as invalid
+        conn.instance_variable_set(:@sequel_invalid, true)
+      end
+      def execute(sql, opts=OPTS)
+        synchronize(opts[:server]) do |conn|
+          res = log_connection_yield(sql, conn){conn.execute(sql, result_object: true)}
+        rescue => e
+          raise_error(e)
+        else
+          yield res if defined?(yield)
+        end
+      end
+      def execute_insert(sql, opts=OPTS)
+        execute(sql, opts)
+        # Return nil instead of empty array.
+        # Spark does not support primary keys nor autoincrementing values
+        nil
+      end
+      def valid_connection?(conn)
+        !conn.instance_variable_get(:@sequel_invalid)
+      end
+    end
+    class Dataset < Sequel::Dataset
+      include Spark::DatasetMethods
+      def fetch_rows(sql)
+        execute(sql) do |result|
+          columns = result.columns.map(&:to_sym)
+          self.columns = columns
+          next if result.rows.empty?
+          types = result.column_types
+          column_info = columns.map.with_index do |name, i|
+            conversion_proc = case types[i]
+            when 'binary'
+              Sequel.method(:blob)
+            when 'timestamp'
+              db.method(:to_application_timestamp)
+            end
+            [i, name, conversion_proc]
+          end
+          result.rows.each do |row|
+            h = {}
+            column_info.each do |i, name, conversion_proc|
+              value = row[i]
+              h[name] = if value.nil?
+                nil
+              elsif conversion_proc
+                conversion_proc.call(value)
+              else
+                value
+              end
+            end
+            yield h
+          end
+        end
+      end
+    end
+  end
+end

data/lib/sequel/adapters/shared/spark.rb ADDED Viewed

@@ -0,0 +1,467 @@
+# frozen-string-literal: true
+require 'sequel/adapters/utils/unmodified_identifiers'
+module Sequel
+  module Spark
+    Sequel::Database.set_shared_adapter_scheme(:spark, self)
+    module DatabaseMethods
+      include UnmodifiedIdentifiers::DatabaseMethods
+      def create_schema(schema_name, opts=OPTS)
+        run(create_schema_sql(schema_name, opts))
+      end
+      def database_type
+        :spark
+      end
+      def drop_schema(schema_name, opts=OPTS)
+        run(drop_schema_sql(schema_name, opts))
+      end
+      # Spark does not support primary keys, so do not
+      # add any options
+      def serial_primary_key_options
+        # We could raise an exception here instead of just
+        # ignoring the primary key setting.
+        {:type=>Integer}
+      end
+      def supports_create_table_if_not_exists?
+        true
+      end
+      def tables(opts=OPTS)
+        _mangle_tables(_tables("TABLES", :tableName, opts) - _views(opts), opts)
+      end
+      # Spark does not support transactions.
+      def transaction(opts=nil)
+        yield
+      end
+      # Use an inline VALUES table.
+      def values(v)
+        @default_dataset.clone(:values=>v)
+      end
+      def views(opts=OPTS)
+        _mangle_tables(_views(opts), opts)
+      end
+      private
+      def _tables(type, column, opts)
+        sql = String.new
+        sql << "SHOW " << type
+        if schema = opts[:schema]
+          sql << " IN " << literal(schema)
+        end
+        if like = opts[:like]
+          sql << " LIKE " << literal(like)
+        end
+        ds = dataset.with_sql(sql)
+        # Always internally qualify, so that if a table name in a schema
+        # has the same name as a temporary view, it will not exclude
+        # the table name.
+        ds.map([:namespace, column]).map do |ns, name|
+          if ns && !ns.empty?
+            Sequel::SQL::QualifiedIdentifier.new(ns, name)
+          else
+            name.to_sym
+          end
+        end
+      end
+      def _views(opts)
+        _tables("VIEWS", :viewName, opts)
+      end
+      def _mangle_tables(tables, opts)
+        if opts[:qualify]
+          tables
+        else
+          tables.map{|t| t.is_a?(Sequel::SQL::QualifiedIdentifier) ? t.column.to_sym : t}
+        end
+      end
+      def create_schema_sql(schema_name, opts)
+        sql = String.new
+        sql << 'CREATE SCHEMA '
+        sql << 'IF NOT EXISTS ' if opts[:if_not_exists]
+        sql << literal(schema_name)
+        if comment = opts[:comment]
+          sql << ' COMMENT '
+          sql << literal(comment)
+        end
+        if location = opts[:location]
+          sql << ' LOCATION '
+          sql << literal(location)
+        end
+        if properties = opts[:properties]
+          sql << ' WITH DBPROPERTIES ('
+          properties.each do |k, v|
+            sql << literal(k.to_s) << "=" << literal(v.to_s)
+          end
+          sql << ')'
+        end
+        sql
+      end
+      def create_table_sql(name, generator, options)
+        _append_table_view_options_sql(super, options)
+      end
+      def create_table_as_sql(name, sql, options)
+        _append_table_view_options_sql(create_table_prefix_sql(name, options), options) << " AS #{sql}"
+      end
+      def create_view_sql(name, source, options)
+        if source.is_a?(Hash)
+          options = source
+          source = nil
+        end
+        sql = String.new
+        sql << create_view_sql_append_columns("CREATE #{'OR REPLACE 'if options[:replace]}#{'TEMPORARY ' if options[:temp]}VIEW#{' IF NOT EXISTS' if options[:if_not_exists]} #{quote_schema_table(name)}", options[:columns])
+        if source
+          source = source.sql if source.is_a?(Dataset)
+          sql << " AS " << source
+        end
+        _append_table_view_options_sql(sql, options)
+      end
+      def _append_table_view_options_sql(sql, options)
+        if options[:using]
+          sql << " USING " << options[:using].to_s
+        end
+        if options[:partitioned_by]
+          sql << " PARTITIONED BY "
+          _append_column_list_sql(sql, options[:partitioned_by])
+        end
+        if options[:clustered_by]
+          sql << " CLUSTERED BY "
+          _append_column_list_sql(sql, options[:clustered_by])
+          if options[:sorted_by]
+            sql << " SORTED BY "
+            _append_column_list_sql(sql, options[:sorted_by])
+          end
+          raise "Must specify :num_buckets when :clustered_by is used" unless options[:num_buckets]
+          sql << " INTO " << literal(options[:num_buckets]) << " BUCKETS"
+        end
+        if options[:options]
+          sql << ' OPTIONS ('
+          options[:options].each do |k, v|
+            sql << literal(k.to_s) << "=" << literal(v.to_s)
+          end
+          sql << ')'
+        end
+        sql
+      end
+      def _append_column_list_sql(sql, columns)
+        sql << '('
+        schema_utility_dataset.send(:identifier_list_append, sql, Array(columns))
+        sql << ')'
+      end
+      def drop_schema_sql(schema_name, opts)
+        sql = String.new
+        sql << 'DROP SCHEMA '
+        sql << 'IF EXISTS ' if opts[:if_exists]
+        sql << literal(schema_name)
+        sql << ' CASCADE' if opts[:cascade]
+        sql
+      end
+      def schema_parse_table(table, opts)
+        m = output_identifier_meth(opts[:dataset])
+        im = input_identifier_meth(opts[:dataset])
+        metadata_dataset.with_sql("DESCRIBE #{"#{im.call(opts[:schema])}." if opts[:schema]}#{im.call(table)}").map do |row|
+          [m.call(row[:col_name]), {:db_type=>row[:data_type], :type=>schema_column_type(row[:data_type])}]
+        end
+      end
+      def supports_create_or_replace_view?
+        true
+      end
+      def type_literal_generic_file(column)
+        'binary'
+      end
+      def type_literal_generic_float(column)
+        'float'
+      end
+      def type_literal_generic_string(column)
+        'string'
+      end
+    end
+    module DatasetMethods
+      include UnmodifiedIdentifiers::DatasetMethods
+      Dataset.def_sql_method(self, :select, [['if opts[:values]', %w'values'], ['else', %w'with select distinct columns from join where group having compounds order limit']])
+      def date_add_sql_append(sql, da)
+        expr = da.expr
+        cast_type = da.cast_type || Time
+        h = Hash.new(0)
+        da.interval.each do |k, v|
+          h[k] = v || 0
+        end
+        if h[:weeks]
+          h[:days] += h[:weeks] * 7
+        end
+        if h[:years] != 0 || h[:months] != 0
+          expr = Sequel.+(expr, Sequel.function(:make_ym_interval, h[:years], h[:months]))
+        end
+        if h[:days] != 0 || h[:hours] != 0 || h[:minutes] != 0 || h[:seconds] != 0
+          expr = Sequel.+(expr, Sequel.function(:make_dt_interval, h[:days], h[:hours], h[:minutes], h[:seconds]))
+        end
+        literal_append(sql, expr)
+      end
+      # Emulate delete by selecting all rows except the ones being deleted
+      # into a new table, drop the current table, and rename the new
+      # table to the current table name.
+      #
+      # This is designed to minimize the changes to the tests, and is
+      # not recommended for production use.
+      def delete
+        _with_temp_table
+      end
+      def update(columns)
+        updated_cols = columns.keys
+        other_cols = db.from(first_source_table).columns - updated_cols
+        updated_vals = columns.values
+        _with_temp_table do |tmp_name|
+          db.from(tmp_name).insert([*updated_cols, *other_cols], select(*updated_vals, *other_cols))
+        end
+      end
+      private def _with_temp_table
+        n = count
+        table_name = first_source_table
+        tmp_name = literal(table_name).gsub('`', '') + "__sequel_delete_emulate"
+        db.create_table(tmp_name, :as=>select_all.invert)
+        yield tmp_name if defined?(yield)
+        db.drop_table(table_name)
+        db.rename_table(tmp_name, table_name)
+        n
+      end
+      protected def compound_clone(type, dataset, opts)
+        dataset = dataset.from_self if dataset.opts[:with]
+        super
+      end
+      def complex_expression_sql_append(sql, op, args)
+        case op
+        when :<<
+          literal_append(sql, Sequel.function(:shiftleft, *args))
+        when :>>
+          literal_append(sql, Sequel.function(:shiftright, *args))
+        when :~
+          literal_append(sql, Sequel.function(:regexp, *args))
+        when :'!~'
+          literal_append(sql, ~Sequel.function(:regexp, *args))
+        when :'~*'
+          literal_append(sql, Sequel.function(:regexp, Sequel.function(:lower, args[0]), Sequel.function(:lower, args[1])))
+        when :'!~*'
+          literal_append(sql, ~Sequel.function(:regexp, Sequel.function(:lower, args[0]), Sequel.function(:lower, args[1])))
+        else
+          super
+        end
+      end
+      def multi_insert_sql_strategy
+        :values
+      end
+      def quoted_identifier_append(sql, name)
+        sql << '`' << name.to_s.gsub('`', '``') << '`'
+      end
+      def requires_sql_standard_datetimes?
+        true
+      end
+      def insert_supports_empty_values?
+        false
+      end
+      def literal_blob_append(sql, v)
+        sql << "to_binary('" << [v].pack("m*").gsub("\n", "") << "', 'base64')"
+      end
+      def literal_false
+        "false"
+      end
+      def literal_string_append(sql, v)
+        sql << "'" << v.gsub(/(['\\])/, '\\\\\1') << "'"
+      end
+      def literal_true
+        "true"
+      end
+      def supports_cte?(type=:select)
+        type == :select
+      end
+      def supports_cte_in_subqueries?
+        true
+      end
+      def supports_group_cube?
+        true
+      end
+      def supports_group_rollup?
+        true
+      end
+      def supports_grouping_sets?
+        true
+      end
+      def supports_regexp?
+        true
+      end
+      def supports_window_functions?
+        true
+      end
+      # Handle forward references in existing CTEs in the dataset by inserting this
+      # dataset before any dataset that would reference it.
+      def with(name, dataset, opts=OPTS)
+        opts = Hash[opts].merge!(:name=>name, :dataset=>dataset).freeze
+        references = ReferenceExtractor.references(dataset)
+        if with = @opts[:with]
+          with = with.dup
+          existing_references = @opts[:with_references]
+          if referencing_dataset = existing_references[literal(name)]
+            unless i = with.find_index{|o| o[:dataset].equal?(referencing_dataset)}
+              raise Sequel::Error, "internal error finding referencing dataset"
+            end
+            with.insert(i, opts)
+            # When not inserting dataset at the end, if both the new dataset and the
+            # dataset right after it refer to the same reference, keep the reference
+            # to the new dataset, so that that dataset is inserted before the new dataset
+            # dataset
+            existing_references = existing_references.reject do |k, v|
+              references[k] && v.equal?(referencing_dataset)
+            end
+          else
+            with << opts
+          end
+          # Assume we will insert the dataset at the end, so existing references have priority
+          references = references.merge(existing_references)
+        else
+          with = [opts]
+        end
+        clone(:with=>with.freeze, :with_references=>references.freeze)
+      end
+      private def select_values_sql(sql)
+        sql << 'VALUES '
+        expression_list_append(sql, opts[:values])
+      end
+    end
+    # ReferenceExtractor extracts references from datasets that will be used as CTEs.
+    class ReferenceExtractor < ASTTransformer
+      TABLE_IDENTIFIER_KEYS = [:from, :join].freeze
+      COLUMN_IDENTIFIER_KEYS = [:select, :where, :having, :order, :group, :compounds].freeze
+      # Returns a hash of literal string identifier keys referenced by the given
+      # dataset with the given dataset as the value for each key.
+      def self.references(dataset)
+        new(dataset).tap{|ext| ext.transform(dataset)}.references
+      end
+      attr_reader :references
+      def initialize(dataset)
+        @dataset = dataset
+        @references = {}
+      end
+      private
+      # Extract references from FROM/JOIN, where bare identifiers represent tables.
+      def table_identifier_extract(o)
+        case o
+        when String
+          @references[@dataset.literal(Sequel.identifier(o))] = @dataset
+        when Symbol, SQL::Identifier
+          @references[@dataset.literal(o)] = @dataset
+        when SQL::AliasedExpression
+          table_identifier_extract(o.expression)
+        when SQL::JoinOnClause
+          table_identifier_extract(o.table_expr)
+          v(o.on)
+        when SQL::JoinClause
+          table_identifier_extract(o.table_expr)
+        else
+          v(o)
+        end
+      end
+      # Extract references from datasets, where bare identifiers in most case represent columns,
+      # and only qualified identifiers include a table reference.
+      def v(o)
+        case o
+        when Sequel::Dataset
+          # Special case FROM/JOIN, because identifiers inside refer to tables and not columns
+          TABLE_IDENTIFIER_KEYS.each{|k| o.opts[k]&.each{|jc| table_identifier_extract(jc)}}
+          # Look in other keys that may have qualified references or subqueries
+          COLUMN_IDENTIFIER_KEYS.each{|k| v(o.opts[k])}
+        when SQL::QualifiedIdentifier
+          # If a qualified identifier has a qualified identifier as a key,
+          # such as schema.table.column, ignore it, because CTE identifiers shouldn't
+          # be schema qualified.
+          unless o.table.is_a?(SQL::QualifiedIdentifier)
+            @references[@dataset.literal(Sequel.identifier(o.table))] = @dataset
+          end
+        else
+          super
+        end
+      end
+    end
+    private_constant :ReferenceExtractor
+  end
+end

metadata ADDED Viewed

@@ -0,0 +1,141 @@
+--- !ruby/object:Gem::Specification
+name: sequel-hexspace
+version: !ruby/object:Gem::Version
+  version: 1.0.0
+platform: ruby
+authors:
+- Jeremy Evans
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2024-04-03 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: sequel
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '5.0'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '5.0'
+- !ruby/object:Gem::Dependency
+  name: hexspace
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 0.2.1
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 0.2.1
+- !ruby/object:Gem::Dependency
+  name: rake
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+- !ruby/object:Gem::Dependency
+  name: minitest
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '5.7'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '5.7'
+- !ruby/object:Gem::Dependency
+  name: minitest-hooks
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+- !ruby/object:Gem::Dependency
+  name: minitest-global_expectations
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+description: |
+  This is a hexspace adapter for Sequel, designed to be used with Spark (not
+  Hive). You can use the hexspace:// protocol in the Sequel connection URL
+  to use this adapter.
+email: code@jeremyevans.net
+executables: []
+extensions: []
+extra_rdoc_files:
+- LICENSE
+files:
+- LICENSE
+- README
+- lib/sequel/adapters/hexspace.rb
+- lib/sequel/adapters/shared/spark.rb
+homepage: https://github.com/jeremyevans/sequel-hexspace.git
+licenses:
+- MIT
+metadata: {}
+post_install_message:
+rdoc_options:
+- "--quiet"
+- "--line-numbers"
+- "--inline-source"
+- "--title"
+- 'sequel-hexspace: Sequel adapter for hexspace driver and Apache Spark database'
+- "--main"
+- README
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubygems_version: 3.4.6
+signing_key:
+specification_version: 4
+summary: Sequel adapter for hexspace driver and Apache Spark database
+test_files: []