RubyGems - partitioned - Versions diffs - 0.8.0 - Mend

partitioned 0.8.0

Files changed (95) hide show

data/Gemfile +17 -0
data/LICENSE +30 -0
data/PARTITIONING_EXPLAINED.txt +351 -0
data/README +111 -0
data/Rakefile +27 -0
data/examples/README +23 -0
data/examples/company_id.rb +417 -0
data/examples/company_id_and_created_at.rb +689 -0
data/examples/created_at.rb +590 -0
data/examples/created_at_referencing_awards.rb +1000 -0
data/examples/id.rb +475 -0
data/examples/lib/by_company_id.rb +11 -0
data/examples/lib/command_line_tool_mixin.rb +71 -0
data/examples/lib/company.rb +29 -0
data/examples/lib/get_options.rb +44 -0
data/examples/lib/roman.rb +41 -0
data/examples/start_date.rb +621 -0
data/init.rb +1 -0
data/lib/monkey_patch_activerecord.rb +92 -0
data/lib/monkey_patch_postgres.rb +73 -0
data/lib/partitioned.rb +26 -0
data/lib/partitioned/active_record_overrides.rb +34 -0
data/lib/partitioned/bulk_methods_mixin.rb +288 -0
data/lib/partitioned/by_created_at.rb +13 -0
data/lib/partitioned/by_foreign_key.rb +21 -0
data/lib/partitioned/by_id.rb +35 -0
data/lib/partitioned/by_integer_field.rb +32 -0
data/lib/partitioned/by_monthly_time_field.rb +23 -0
data/lib/partitioned/by_time_field.rb +65 -0
data/lib/partitioned/by_weekly_time_field.rb +30 -0
data/lib/partitioned/multi_level.rb +20 -0
data/lib/partitioned/multi_level/configurator/data.rb +14 -0
data/lib/partitioned/multi_level/configurator/dsl.rb +32 -0
data/lib/partitioned/multi_level/configurator/reader.rb +162 -0
data/lib/partitioned/multi_level/partition_manager.rb +47 -0
data/lib/partitioned/partitioned_base.rb +354 -0
data/lib/partitioned/partitioned_base/configurator.rb +6 -0
data/lib/partitioned/partitioned_base/configurator/data.rb +62 -0
data/lib/partitioned/partitioned_base/configurator/dsl.rb +628 -0
data/lib/partitioned/partitioned_base/configurator/reader.rb +209 -0
data/lib/partitioned/partitioned_base/partition_manager.rb +138 -0
data/lib/partitioned/partitioned_base/sql_adapter.rb +286 -0
data/lib/partitioned/version.rb +3 -0
data/lib/tasks/desirable_tasks.rake +4 -0
data/partitioned.gemspec +21 -0
data/spec/dummy/.rspec +1 -0
data/spec/dummy/README.rdoc +261 -0
data/spec/dummy/Rakefile +7 -0
data/spec/dummy/app/assets/javascripts/application.js +9 -0
data/spec/dummy/app/assets/stylesheets/application.css +7 -0
data/spec/dummy/app/controllers/application_controller.rb +3 -0
data/spec/dummy/app/helpers/application_helper.rb +2 -0
data/spec/dummy/app/views/layouts/application.html.erb +14 -0
data/spec/dummy/config.ru +4 -0
data/spec/dummy/config/application.rb +51 -0
data/spec/dummy/config/boot.rb +10 -0
data/spec/dummy/config/database.yml +32 -0
data/spec/dummy/config/environment.rb +5 -0
data/spec/dummy/config/environments/development.rb +30 -0
data/spec/dummy/config/environments/production.rb +60 -0
data/spec/dummy/config/environments/test.rb +39 -0
data/spec/dummy/config/initializers/backtrace_silencers.rb +7 -0
data/spec/dummy/config/initializers/inflections.rb +10 -0
data/spec/dummy/config/initializers/mime_types.rb +5 -0
data/spec/dummy/config/initializers/secret_token.rb +7 -0
data/spec/dummy/config/initializers/session_store.rb +8 -0
data/spec/dummy/config/initializers/wrap_parameters.rb +14 -0
data/spec/dummy/config/locales/en.yml +5 -0
data/spec/dummy/config/routes.rb +58 -0
data/spec/dummy/public/404.html +26 -0
data/spec/dummy/public/422.html +26 -0
data/spec/dummy/public/500.html +26 -0
data/spec/dummy/public/favicon.ico +0 -0
data/spec/dummy/script/rails +6 -0
data/spec/dummy/spec/spec_helper.rb +27 -0
data/spec/monkey_patch_posgres_spec.rb +176 -0
data/spec/partitioned/bulk_methods_mixin_spec.rb +512 -0
data/spec/partitioned/by_created_at_spec.rb +62 -0
data/spec/partitioned/by_foreign_key_spec.rb +95 -0
data/spec/partitioned/by_id_spec.rb +97 -0
data/spec/partitioned/by_integer_field_spec.rb +143 -0
data/spec/partitioned/by_monthly_time_field_spec.rb +100 -0
data/spec/partitioned/by_time_field_spec.rb +182 -0
data/spec/partitioned/by_weekly_time_field_spec.rb +100 -0
data/spec/partitioned/multi_level/configurator/dsl_spec.rb +88 -0
data/spec/partitioned/multi_level/configurator/reader_spec.rb +147 -0
data/spec/partitioned/partitioned_base/configurator/dsl_spec.rb +459 -0
data/spec/partitioned/partitioned_base/configurator/reader_spec.rb +513 -0
data/spec/partitioned/partitioned_base/sql_adapter_spec.rb +204 -0
data/spec/partitioned/partitioned_base_spec.rb +173 -0
data/spec/spec_helper.rb +32 -0
data/spec/support/shared_example_spec_helper_for_integer_key.rb +137 -0
data/spec/support/shared_example_spec_helper_for_time_key.rb +147 -0
data/spec/support/tables_spec_helper.rb +47 -0
metadata +250 -0

data/init.rb ADDED Viewed

	@@ -0,0 +1 @@
1	+ require 'partitioned.rb'

data/lib/monkey_patch_activerecord.rb ADDED Viewed

@@ -0,0 +1,92 @@
+require 'active_record'
+require 'active_record/base'
+require 'active_record/connection_adapters/abstract_adapter'
+require 'active_record/relation.rb'
+require 'active_record/persistence.rb'
+#
+# patching activerecord to allow specifying the table name as a function of
+# attributes
+#
+module ActiveRecord
+  module Persistence
+    def create
+      if self.id.nil? && self.class.respond_to?(:prefetch_primary_key?) && self.class.prefetch_primary_key?
+        self.id = connection.next_sequence_value(self.class.sequence_name)
+      end
+      attributes_values = arel_attributes_values(!id.nil?)
+      new_id = self.class.unscoped.insert attributes_values
+      self.id ||= new_id
+      IdentityMap.add(self) if IdentityMap.enabled?
+      @new_record = false
+      id
+    end
+  end
+  #
+  # patches for relation to allow back hooks into the activerecord
+  # requesting name of table as a function of attributes
+  #
+  class Relation
+    #
+    # patches activerecord's building of an insert statement to request
+    # of the model a table name with respect to attribute values being
+    # inserted
+    #
+    # the differences between this and the original code are small and marked
+    # with PARTITIONED comment
+    def insert(values)
+      primary_key_value = nil
+      if primary_key && Hash === values
+        primary_key_value = values[values.keys.find { |k|
+          k.name == primary_key
+        }]
+        if !primary_key_value && connection.prefetch_primary_key?(klass.table_name)
+          primary_key_value = connection.next_sequence_value(klass.sequence_name)
+          values[klass.arel_table[klass.primary_key]] = primary_key_value
+        end
+      end
+      im = arel.create_insert
+      #
+      # PARTITIONED ADDITION. get arel_table from class with respect to the
+      # current values to placed in the table (which hopefully hold the values
+      # that are used to determine the child table this insert should be
+      # redirected to)
+      #
+      actual_arel_table = @klass.dynamic_arel_table(Hash[*values.map{|k,v| [k.name,v]}.flatten]) if @klass.respond_to? :dynamic_arel_table
+      actual_arel_table = @table unless actual_arel_table
+      im.into actual_arel_table
+      conn = @klass.connection
+      substitutes = values.sort_by { |arel_attr,_| arel_attr.name }
+      binds       = substitutes.map do |arel_attr, value|
+        [@klass.columns_hash[arel_attr.name], value]
+      end
+      substitutes.each_with_index do |tuple, i|
+        tuple[1] = conn.substitute_at(binds[i][0], i)
+      end
+      if values.empty? # empty insert
+        im.values = Arel.sql(connection.empty_insert_statement_value)
+      else
+        im.insert substitutes
+      end
+      conn.insert(
+        im,
+        'SQL',
+        primary_key,
+        primary_key_value,
+        nil,
+        binds)
+    end
+  end
+end

data/lib/monkey_patch_postgres.rb ADDED Viewed

@@ -0,0 +1,73 @@
+require 'active_record'
+require 'active_record/base'
+require 'active_record/connection_adapters/abstract_adapter'
+module ActiveRecord::ConnectionAdapters
+  class TableDefinition
+    def check_constraint(constraint)
+      @columns << Struct.new(:to_sql).new("CHECK (#{constraint})")
+    end
+  end
+  class PostgreSQLAdapter < AbstractAdapter
+    #
+    # get the next value in a sequence.  used on INSERT operation for
+    # partitioning like by_id because the ID is required before the insert
+    # so that the specific child table is known ahead of time.
+    #
+    def next_sequence_value(sequence_name)
+      return execute("select nextval('#{sequence_name}')").field_values("nextval").first
+    end
+    #
+    # get the some next values in a sequence.
+    # batch_size - count of values
+    #
+    def next_sequence_values(sequence_name, batch_size)
+      result = execute("select nextval('#{sequence_name}') from generate_series(1, #{batch_size})")
+      return result.field_values("nextval").map(&:to_i)
+    end
+    #
+    # causes active resource to fetch the primary key for the table (using next_sequence_value())
+    # just before an insert.  We need the prefetch to happen but we don't have enough information
+    # here to determine if it should happen, so Relation::insert has been modified to request of
+    # the ActiveRecord::Base derived class if it requires a prefetch.
+    #
+    def prefetch_primary_key?(table_name)
+      return false
+    end
+    #
+    # creates a schema given a name.
+    # options:
+    #   :unless_exists - check if schema exists.
+    #
+    def create_schema(name, options = {})
+      if options[:unless_exists]
+        return if execute("select count(*) from pg_namespace where nspname = '#{name}'").getvalue(0,0).to_i > 0
+      end
+      execute("CREATE SCHEMA #{name}")
+    end
+    #
+    # drop a schema given a name.
+    # options:
+    #   :if_exists - check if schema exists.
+    #   :cascade - cascade drop to dependant objects
+    #
+    def drop_schema(name, options = {})
+      if options[:if_exists]
+        return if execute("select count(*) from pg_namespace where nspname = '#{name}'").getvalue(0,0).to_i == 0
+      end
+      execute("DROP SCHEMA #{name}#{' cascade' if options[:cascade]}")
+    end
+    #
+    # add foreign key constraint to table.
+    #
+    def add_foreign_key(referencing_table_name, referencing_field_name, referenced_table_name, referenced_field_name = :id)
+      execute("ALTER TABLE #{referencing_table_name} add foreign key (#{referencing_field_name}) references #{referenced_table_name}(#{referenced_field_name})")
+    end
+  end
+end

data/lib/partitioned.rb ADDED Viewed

@@ -0,0 +1,26 @@
+require 'monkey_patch_activerecord'
+require 'monkey_patch_postgres'
+require 'partitioned/bulk_methods_mixin'
+require 'partitioned/active_record_overrides'
+require 'partitioned/partitioned_base/configurator.rb'
+require 'partitioned/partitioned_base/configurator/data'
+require 'partitioned/partitioned_base/configurator/dsl'
+require 'partitioned/partitioned_base.rb'
+require 'partitioned/partitioned_base/configurator/reader'
+require 'partitioned/partitioned_base/partition_manager'
+require 'partitioned/partitioned_base/sql_adapter'
+require 'partitioned/by_time_field'
+require 'partitioned/by_monthly_time_field'
+require 'partitioned/by_weekly_time_field'
+require 'partitioned/by_created_at'
+require 'partitioned/by_integer_field'
+require 'partitioned/by_id'
+require 'partitioned/by_foreign_key'
+require 'partitioned/multi_level'
+require 'partitioned/multi_level/configurator/data'
+require 'partitioned/multi_level/configurator/dsl'
+require 'partitioned/multi_level/configurator/reader'
+require 'partitioned/multi_level/partition_manager'

data/lib/partitioned/active_record_overrides.rb ADDED Viewed

@@ -0,0 +1,34 @@
+#
+# these are things our base class must fix in ActiveRecord::Base
+#
+# no need to monkey patch these, just override them.
+#
+module Partitioned
+  module ActiveRecordOverrides
+    #
+    # arel_attribute_values needs to return attributes (and their values) associated with the dynamic_arel_table instead of the
+    # static arel_table provided by ActiveRecord.
+    #
+    # the standard release of this function gathers a collection of attributes and creates a wrapper function around them
+    # that names the table they are associated with. that naming is incorrect for partitioned tables.
+    #
+    # we call the standard release's method then retrofit our partitioned table into the hash that is returned.
+    #
+    def arel_attributes_values(include_primary_key = true, include_readonly_attributes = true, attribute_names = @attributes.keys)
+      attrs = super
+      actual_arel_table = dynamic_arel_table(self.class.table_name)
+      return Hash[*attrs.map{|k,v| [actual_arel_table[k.name], v]}.flatten]
+    end
+    #
+    # delete just needs a wrapper around it to specify the specific partition.
+    #
+    def delete
+      if persisted?
+        self.class.from_partition(*self.class.partition_key_values(attributes)).delete(id)
+      end
+      @destroyed = true
+      freeze
+    end
+  end
+end

data/lib/partitioned/bulk_methods_mixin.rb ADDED Viewed

@@ -0,0 +1,288 @@
+module Partitioned
+  module BulkMethodsMixin
+    class BulkUploadDataInconsistent < StandardError
+      def initialize(model, table_name, expected_columns, found_columns, while_doing)
+        super("#{model.name}: for table: #{table_name}; #{expected_columns} != #{found_columns}; #{while_doing}")
+      end
+    end
+    #
+    # BULK creation of many rows
+    #
+    # rows: an array of hashtables of data to insert into the database
+    #       each hashtable must have the same number of keys (and same
+    #       names for each key).
+    #
+    # options:
+    #   :slice_size = 1000
+    #   :returning = nil
+    #   :check_consistency = true
+    #
+    # examples:
+    #  first example didn't uses more options.
+    #
+    # rows = [{
+    #   :name => 'Keith',
+    #   :salary => 1000,
+    # },
+    # {
+    #   :name => 'Alex',
+    #   :salary => 2000,
+    # }]
+    #
+    # Employee.create_many(rows)
+    #
+    #  this second example uses :returning option
+    #  to returns key values
+    #
+    # rows = [{
+    #   :name => 'Keith',
+    #   :salary => 1000,
+    # },
+    # {
+    #   :name => 'Alex',
+    #   :salary => 2000,
+    # }]
+    #
+    # options = {
+    #   :returning => [:id]
+    # }
+    #
+    # Employee.create_many(rows, options) returns [#<Employee id: 1>, #<Employee id: 2>]
+    #
+    #  third example uses :slice_size option.
+    #  Slice_size - is an integer that specifies how many
+    #  records will be created in a single SQL query.
+    #
+    # rows = [{
+    #   :name => 'Keith',
+    #   :salary => 1000,
+    # },
+    # {
+    #   :name => 'Alex',
+    #   :salary => 2000,
+    # },
+    # {
+    #   :name => 'Mark',
+    #   :salary => 3000,
+    # }]
+    #
+    # options = {
+    #   :slice_size => 2
+    # }
+    #
+    # Employee.create_many(rows, options) will generate two insert queries
+    #
+    def create_many(rows, options = {})
+      return [] if rows.blank?
+      options[:slice_size] = 1000 unless options.has_key?(:slice_size)
+      options[:check_consistency] = true unless options.has_key?(:check_consistency)
+      returning_clause = ""
+      if options[:returning]
+        if options[:returning].is_a? Array
+          returning_list = options[:returning].join(',')
+        else
+          returning_list = options[:returning]
+        end
+        returning_clause = " returning #{returning_list}"
+      end
+      returning = []
+      created_at_value = Time.zone.now
+      num_sequences_needed = rows.reject{|r| r[:id].present?}.length
+      if num_sequences_needed > 0
+        row_ids = connection.next_sequence_values(sequence_name, num_sequences_needed)
+      else
+        row_ids = []
+      end
+      rows.each do |row|
+        # set the primary key if it needs to be set
+        row[:id] ||= row_ids.shift
+      end.each do |row|
+        # set :created_at if need be
+        row[:created_at] ||= created_at_value
+      end.group_by do |row|
+        respond_to?(:partition_name) ? partition_name(*partition_key_values(row)) : table_name
+      end.each do |table_name, rows_for_table|
+        column_names = rows_for_table[0].keys.sort{|a,b| a.to_s <=> b.to_s}
+        sql_insert_string = "insert into #{table_name} (#{column_names.join(',')}) values "
+        rows_for_table.map do |row|
+          if options[:check_consistency]
+            row_column_names = row.keys.sort{|a,b| a.to_s <=> b.to_s}
+            if column_names != row_column_names
+              raise BulkUploadDataInconsistent.new(self, table_name, column_names, row_column_names, "while attempting to build insert statement")
+            end
+          end
+          column_values = column_names.map do |column_name|
+            quote_value(row[column_name], columns_hash[column_name.to_s])
+          end.join(',')
+          "(#{column_values})"
+        end.each_slice(options[:slice_size]) do |insert_slice|
+          returning += find_by_sql(sql_insert_string + insert_slice.join(',') + returning_clause)
+        end
+      end
+      return returning
+    end
+    #
+    # BULK updates of many rows
+    #
+    # rows: an array of hashtables of data to insert into the database
+    #       each hashtable must have the same number of keys (and same
+    #       names for each key).
+    #
+    # options:
+    #   :slice_size = 1000
+    #   :returning = nil
+    #   :set_array = from first row passed in
+    #   :check_consistency = true
+    #   :where = '"#{table_name}.id = datatable.id"'
+    #
+    # examples:
+    #  this first example uses "set_array" to add the value of "salary"
+    #  to the specific employee's salary
+    #  the default where clause is to match IDs so, it works here.
+    # rows = [{
+    #   :id => 1,
+    #   :salary => 1000,
+    # },
+    # {
+    #   :id => 10,
+    #   :salary => 2000,
+    # },
+    # {
+    #   :id => 23,
+    #   :salary => 2500,
+    # }]
+    #
+    # options = {
+    #   :set_array => '"salary = datatable.salary"'
+    # }
+    #
+    # Employee.update_many(rows, options)
+    #
+    #
+    #  this versions sets the where clause to match Salaries.
+    # rows = [{
+    #   :id => 1,
+    #   :salary => 1000,
+    #   :company_id => 10
+    # },
+    # {
+    #   :id => 10,
+    #   :salary => 2000,
+    #   :company_id => 12
+    # },
+    # {
+    #   :id => 23,
+    #   :salary => 2500,
+    #   :company_id => 5
+    # }]
+    #
+    # options = {
+    #   :set_array => '"company_id = datatable.company_id"',
+    #   :where => '"#{table_name}.salary = datatable.salary"'
+    # }
+    #
+    # Employee.update_many(rows, options)
+    #
+    #
+    #  this version sets the where clause to the KEY of the hash passed in
+    # and the set_array is generated from the VALUES
+    #
+    # rows = {
+    #   { :id => 1 } => {
+    #     :salary => 100000,
+    #     :company_id => 10
+    #   },
+    #   { :id => 10 } => {
+    #     :salary => 110000,
+    #     :company_id => 12
+    #   },
+    #   { :id => 23 } => {
+    #     :salary => 90000,
+    #     :company_id => 5
+    #   }
+    # }
+    #
+    # Employee.update_many(rows)
+    #
+    # Remember that you should probably set updated_at using "updated = datatable.updated_at"
+    # or "updated_at = now()" in the set_array if you want to follow
+    # the standard active record model for time columns (and you have an updated_at column)
+    def update_many(rows, options = {})
+      return [] if rows.blank?
+      if rows.is_a?(Hash)
+        options[:where] = '"' + rows.keys[0].keys.map{|key| '#{table_name}.' + "#{key} = datatable.#{key}"}.join(' and ') + '"'
+        options[:set_array] = '"' + rows.values[0].keys.map{|key| "#{key} = datatable.#{key}"}.join(',') + '"' unless options[:set_array]
+        r = []
+        rows.each do |key,value|
+          r << key.merge(value)
+        end
+        rows = r
+      end
+      unless options[:set_array]
+        column_names =  rows[0].keys
+        columns_to_remove = [:id]
+        columns_to_remove += [partition_keys].map{|k| k.to_sym} if respond_to?(:partition_keys)
+        options[:set_array] = '"' + (column_names - columns_to_remove.flatten).map{|cn| "#{cn} = datatable.#{cn}"}.join(',') + '"'
+      end
+      options[:slice_size] = 1000 unless options[:slice_size]
+      options[:check_consistency] = true unless options.has_key?(:check_consistency)
+      returning_clause = ""
+      if options[:returning]
+        if options[:returning].is_a?(Array)
+          returning_list = options[:returning].map{|r| '#{table_name}.' + r.to_s}.join(',')
+        else
+          returning_list = options[:returning]
+        end
+        returning_clause = "\" returning #{returning_list}\""
+      end
+      options[:where] = '"#{table_name}.id = datatable.id"' unless options[:where]
+      returning = []
+      rows.group_by do |row|
+        respond_to?(:partition_name) ? partition_name(*partition_key_values(row)) : table_name
+      end.each do |table_name, rows_for_table|
+        column_names = rows_for_table[0].keys.sort{|a,b| a.to_s <=> b.to_s}
+        rows_for_table.each_slice(options[:slice_size]) do |update_slice|
+          datatable_rows = []
+          update_slice.each_with_index do |row,i|
+            if options[:check_consistency]
+              row_column_names = row.keys.sort{|a,b| a.to_s <=> b.to_s}
+              if column_names != row_column_names
+                raise BulkUploadDataInconsistent.new(self, table_name, column_names, row_column_names, "while attempting to build update statement")
+              end
+            end
+            datatable_rows << row.map do |column_name,column_value|
+              column_name = column_name.to_s
+              columns_hash_value = columns_hash[column_name]
+              if i == 0
+                "#{quote_value(column_value, columns_hash_value)}::#{columns_hash_value.sql_type} as #{column_name}"
+              else
+                quote_value(column_value, columns_hash_value)
+              end
+            end.join(',')
+          end
+          datatable = datatable_rows.join(' union select ')
+          sql_update_string = <<-SQL
+            update #{table_name} set
+              #{eval(options[:set_array])}
+            from
+            (select
+              #{datatable}
+            ) as datatable
+            where
+              #{eval(options[:where])}
+            #{eval(returning_clause)}
+          SQL
+          returning += find_by_sql(sql_update_string)
+        end
+      end
+      return returning
+    end
+  end
+end