partitioned 0.8.0 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/README CHANGED
@@ -1,29 +1,30 @@
1
1
  Partitioned
2
2
  ===========
3
3
 
4
- Partitioned adds assistance to ActiveRecord for manipulating
5
- (reading, creating, updating) an activerecord model that represents
6
- data that may be in one of many database tables (determined by the Models data).
4
+ Partitioned adds assistance to ActiveRecord for manipulating (reading,
5
+ creating, updating) an activerecord model that represents data that
6
+ may be in one of many database tables (determined by the Models data).
7
7
 
8
- It also has features that support the creation and deleting of child tables and
9
- partitioning support infrastructure.
8
+ It also has features that support the creation and deleting of child
9
+ tables and partitioning support infrastructure.
10
10
 
11
- It supports Postgres partitioning and has specific features to overcome basic
12
- failings of Postgres's implementation of partitioning.
11
+ It supports Postgres partitioning and has specific features to
12
+ overcome basic failings of Postgres's implementation of partitioning.
13
13
 
14
14
  Basics:
15
- A parent table can be inherited by many child tables that inherit most of the
16
- attributes of the parent table including its columns. child tables typically
17
- (and for the uses of this plugin must) have a unique check constraint the
18
- defines which data should be located in that specific child table.
15
+ A parent table can be inherited by many child tables that inherit most
16
+ of the attributes of the parent table including its columns. child
17
+ tables typically (and for the uses of this plugin must) have a unique
18
+ check constraint the defines which data should be located in that
19
+ specific child table.
19
20
 
20
- Such a constraint allows for the SQL planner to ignore most child tables and target
21
- the (hopefully) one child table that contains the records interested. This splits
22
- data, and meta-data (indexes) which provides streamlined targeted access to the
23
- desired data.
21
+ Such a constraint allows for the SQL planner to ignore most child
22
+ tables and target the (hopefully) one child table that contains the
23
+ records interested. This splits data, and meta-data (indexes) which
24
+ provides streamlined targeted access to the desired data.
24
25
 
25
- Support for bulk inserts and bulk updates is also provided via Partitioned::Base.create_many and
26
- Partitioned::Base.update_many.
26
+ Support for bulk inserts and bulk updates is also provided via
27
+ Partitioned::Base.create_many and Partitioned::Base.update_many.
27
28
 
28
29
  Example
29
30
  =======
@@ -33,7 +34,21 @@ Given the following models:
33
34
  class Company < ActiveRecord::Base
34
35
  end
35
36
 
36
- class Employee < Partitioned::ByCompanyId
37
+ class ByCompanyId < Partitioned::ByForeignKey
38
+ self.abstract_class = true
39
+
40
+ belongs_to :company
41
+
42
+ def self.partition_foreign_key
43
+ return :company_id
44
+ end
45
+
46
+ partitioned do |partition|
47
+ partition.index :id, :unique => true
48
+ end
49
+ end
50
+
51
+ class Employee < ByCompanyId
37
52
  end
38
53
 
39
54
  and the following tables:
@@ -47,6 +62,10 @@ and the following tables:
47
62
  name text null
48
63
  );
49
64
 
65
+ -- add some companies
66
+ insert into table companies (name) values
67
+ ('company 1'),('company 2'),('company 2');
68
+
50
69
  -- this is the parent table
51
70
  create table employees
52
71
  (
@@ -57,18 +76,45 @@ and the following tables:
57
76
  company_id integer not null references companies
58
77
  );
59
78
 
79
+ We now need to create some infrastructure for partitioned tables,
80
+ in particular, we create a schema to hold the child partition
81
+ tables of employees.
82
+
83
+ Employee.create_infrastructure
84
+
85
+ Which creates the employees_partitions schema using the following SQL:
86
+
60
87
  create schema employees_partitions;
61
88
 
62
- create table companies (name) values ('company 1'),('company 2'),('company 2');
89
+ NOTE: We also install protections on the employees table so it isn't
90
+ used as a data table (this SQL is not presented for simplicity but is
91
+ apart of the create_infrastructure call).
92
+
93
+ To add child tables we use the create_new_partitions_tables method:
94
+
95
+ company_ids = Company.all.map(&:id)
96
+ Employee.create_new_partition_tables(company_ids)
97
+
98
+ which results in the following SQL:
63
99
 
64
- -- some children
65
- create table employees_partitions.p1 ( CHECK ( company_id = 1 ) ) INHERITS (employees);
66
- create table employees_partitions.p2 ( CHECK ( company_id = 2 ) ) INHERITS (employees);
67
- create table employees_partitions.p3 ( CHECK ( company_id = 3 ) ) INHERITS (employees);
100
+ create table employees_partitions.p1
101
+ ( CHECK ( company_id = 1 ) ) INHERITS (employees);
102
+ create table employees_partitions.p2
103
+ ( CHECK ( company_id = 2 ) ) INHERITS (employees);
104
+ create table employees_partitions.p3
105
+ ( CHECK ( company_id = 3 ) ) INHERITS (employees);
68
106
 
69
- since database records exist in a specific child table dependant on the field "company_id"
70
- we need to have creates that turn into database inserts of the EMPLOYEES table redirect
71
- the record insert into the specific child table determined by the value of COMPANY_ID
107
+ NOTE: Some other SQL is generated in the above example, specifically
108
+ the reference to the companies table needs to be explicitly created
109
+ for postgres child tables AND the unique index on 'id' is created.
110
+ These are not shown for simplicity.
111
+
112
+ Now we can do operations involving the child partitions.
113
+
114
+ Since database records exist in a specific child table dependant on
115
+ the field "company_id" we need to have creates that turn into database
116
+ inserts of the EMPLOYEES table redirect the record insert into the
117
+ specific child table determined by the value of COMPANY_ID
72
118
 
73
119
  eg:
74
120
  employee = Employee.create(:name => 'Keith', :company_id => 1)
@@ -79,12 +125,12 @@ this would normally produce the following:
79
125
  but with Partitioned we see:
80
126
  INSERT INTO employees_partitions.p1 ('name', company_id) values ('Keith', 1);
81
127
 
82
- reads of such a table need some assistance to find the specific child table the
83
- record exists in.
128
+ reads of such a table need some assistance to find the specific child
129
+ table the record exists in.
84
130
 
85
- Since we are partitioned by company_id the programmer needs to provide that information
86
- when fetching data, or the database will need to search all child table for the
87
- specific record we are looking for.
131
+ Since we are partitioned by company_id the programmer needs to provide
132
+ that information when fetching data, or the database will need to
133
+ search all child table for the specific record we are looking for.
88
134
 
89
135
  This is no longer valid (well, doesn't perform well):
90
136
 
@@ -93,11 +139,14 @@ This is no longer valid (well, doesn't perform well):
93
139
  instead, do one of the following:
94
140
 
95
141
  employee = Employee.from_partition(1).find(1)
96
- employee = Employee.find(:first, :conditions => {:name => 'Keith', :company_id => 1})
97
- employee = Employee.find(:first, :conditions => {:id => 1, :company_id => 1})
98
-
99
- an update (employee.save where the record already exists in the database) will take
100
- advantage of knowing which child table the record exists in so it can do some optimization.
142
+ employee = Employee.find(:first,
143
+ :conditions => {:name => 'Keith', :company_id => 1})
144
+ employee = Employee.find(:first,
145
+ :conditions => {:id => 1, :company_id => 1})
146
+
147
+ an update (employee.save where the record already exists in the
148
+ database) will take advantage of knowing which child table the record
149
+ exists in so it can do some optimization.
101
150
 
102
151
  so, the following works as expected:
103
152
 
data/Rakefile CHANGED
@@ -4,6 +4,9 @@ begin
4
4
  rescue LoadError
5
5
  puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
6
6
  end
7
+
8
+ task :default => :spec
9
+
7
10
  begin
8
11
  require 'rdoc/task'
9
12
  rescue LoadError
data/examples/README CHANGED
@@ -1,23 +1,51 @@
1
1
  The directory holds examples of how to use the partitioned gem.
2
+ These rails scripts will create and populate partitioned tables. The
3
+ scripts accept the following parameters:
4
+
5
+ --? list available options
6
+ --force delete tables before starting
7
+ default: false
8
+ --cleanup delete tables and exit
9
+ default: false
10
+ --create-many how many objects to create via create_many
11
+ default: 3000
12
+ --create-individual how many objects to create via create
13
+ default: 1000
14
+ --new-individual how many objects to create via new.save
15
+ default: 1000
16
+ --update-individual how many objects to update individually
17
+ default: 1000
18
+ --update-many how many objects to update via update_many
19
+ default: 1000
20
+
21
+ The scripts are:
22
+
23
+ company_id.rb: table 'employees' partitioned by company_id
24
+
25
+ company_id_and_created_at.rb: table 'employees' has multi-level
26
+ partitioning by company_id then created_at created_at is grouped by
27
+ week where weeks start on Monday.
28
+
29
+ created_at.rb: table 'employees' partitioned by created_at
30
+ created_at is grouped by week where weeks start on Monday.
31
+
32
+ created_at_referencing_awards.rb: table 'employees' partitioned by
33
+ created_at and table 'awards' is partitioned by created_at which a
34
+ reference to specific child table of employees with the created_at
35
+ range.
36
+
37
+ id.rb: partitioned on 'id' grouping each 10 records into separate
38
+ child tables.
39
+
40
+ start_date.rb: grouped by column start_date which is a date grouped
41
+ by month.
2
42
 
3
43
  The lib directory contains:
4
- by_company_id.rb - a partitioned model where the partition's key is the column company_id that references companies.
5
- company.rb - an ActiveRecord model for the table companies.
6
- roman.rb - some helper routines for generating roman numerals.
7
44
 
8
- This directory holds executable rails scripts that create and populate partitioned tables. The scripts accept the following
9
- parameters:
10
- --force delete used tables before starting
11
- --cleanup delete used tables and exit
45
+ by_company_id.rb: a partitioned model where the partition's key is
46
+ the column company_id that references companies.
47
+
48
+ company.rb: an ActiveRecord model for the table companies.
49
+
50
+ roman.rb: some helper routines for generating roman numerals.
12
51
 
13
- The scripts are:
14
- company_id.rb - table 'employees' partitioned by company_id
15
- company_id_and_created_at.rb - table 'employees' has multi-level partitioning by company_id then created_at
16
- created_at is grouped by week where weeks start on Monday.
17
- created_at.rb - table 'employees' partitioned by created_at
18
- created_at is grouped by week where weeks start on Monday.
19
- created_at_referencing_awards.rb - table 'employees' partitioned by created_at and table 'awards' is partitioned
20
- by created_at which a reference to specific child table of employees with the
21
- created_at range.
22
- id.rb - partitioned on 'id' grouping each 10 records into separate child tables.
23
- start_date.rb - grouped by column start_date which is a date grouped by month.
@@ -5,11 +5,17 @@ require 'active_record/relation.rb'
5
5
  require 'active_record/persistence.rb'
6
6
 
7
7
  #
8
- # patching activerecord to allow specifying the table name as a function of
9
- # attributes
8
+ # Patching {ActiveRecord} to allow specifying the table name as a function of
9
+ # attributes.
10
10
  #
11
11
  module ActiveRecord
12
+ #
13
+ # Patches for Persistence to allow certain partitioning (that related to the primary key) to work.
14
+ #
12
15
  module Persistence
16
+ #
17
+ # patch the create method to prefetch the primary key if needed
18
+ #
13
19
  def create
14
20
  if self.id.nil? && self.class.respond_to?(:prefetch_primary_key?) && self.class.prefetch_primary_key?
15
21
  self.id = connection.next_sequence_value(self.class.sequence_name)
@@ -27,17 +33,17 @@ module ActiveRecord
27
33
  end
28
34
  end
29
35
  #
30
- # patches for relation to allow back hooks into the activerecord
31
- # requesting name of table as a function of attributes
36
+ # Patches for relation to allow back hooks into the {ActiveRecord}
37
+ # requesting name of table as a function of attributes.
32
38
  #
33
39
  class Relation
34
40
  #
35
- # patches activerecord's building of an insert statement to request
41
+ # Patches {ActiveRecord}'s building of an insert statement to request
36
42
  # of the model a table name with respect to attribute values being
37
- # inserted
43
+ # inserted.
38
44
  #
39
- # the differences between this and the original code are small and marked
40
- # with PARTITIONED comment
45
+ # The differences between this and the original code are small and marked
46
+ # with PARTITIONED comment.
41
47
  def insert(values)
42
48
  primary_key_value = nil
43
49
 
@@ -2,47 +2,73 @@ require 'active_record'
2
2
  require 'active_record/base'
3
3
  require 'active_record/connection_adapters/abstract_adapter'
4
4
 
5
+ #
6
+ # Patching {ActiveRecord::ConnectionAdapters::TableDefinition} and
7
+ # {ActiveRecord::ConnectionAdapters::PostgreSQLAdapter} to add functionality
8
+ # needed to abstract partition specific SQL statements.
9
+ #
5
10
  module ActiveRecord::ConnectionAdapters
11
+ #
12
+ # Patches associated with building check constraints.
13
+ #
6
14
  class TableDefinition
15
+ #
16
+ # Builds a SQL check constraint
17
+ #
18
+ # @param [String] constraint a SQL constraint
7
19
  def check_constraint(constraint)
8
20
  @columns << Struct.new(:to_sql).new("CHECK (#{constraint})")
9
21
  end
10
22
  end
11
23
 
24
+ #
25
+ # Patches extending the postgres adapter with new operations for managing
26
+ # sequences (and sets of sequence values), schemas and foreign keys.
27
+ # These should go into AbstractAdapter allowing any database adapter
28
+ # to take advantage of these SQL builders.
29
+ #
12
30
  class PostgreSQLAdapter < AbstractAdapter
13
31
  #
14
- # get the next value in a sequence. used on INSERT operation for
32
+ # Get the next value in a sequence. Used on INSERT operation for
15
33
  # partitioning like by_id because the ID is required before the insert
16
34
  # so that the specific child table is known ahead of time.
17
35
  #
36
+ # @param [String] sequence_name the name of the sequence to fetch the next value from
37
+ # @return [Integer] the value from the sequence
18
38
  def next_sequence_value(sequence_name)
19
39
  return execute("select nextval('#{sequence_name}')").field_values("nextval").first
20
40
  end
21
41
 
22
42
  #
23
- # get the some next values in a sequence.
24
- # batch_size - count of values
43
+ # Get the some next values in a sequence.
25
44
  #
45
+ # @param [String] sequence_name the name of the sequence to fetch the next values from
46
+ # @param [Integer] batch_size count of values.
47
+ # @return [Array<Integer>] an array of values from the sequence
26
48
  def next_sequence_values(sequence_name, batch_size)
27
49
  result = execute("select nextval('#{sequence_name}') from generate_series(1, #{batch_size})")
28
50
  return result.field_values("nextval").map(&:to_i)
29
51
  end
30
52
 
31
53
  #
32
- # causes active resource to fetch the primary key for the table (using next_sequence_value())
33
- # just before an insert. We need the prefetch to happen but we don't have enough information
54
+ # Causes active resource to fetch the primary key for the table (using next_sequence_value())
55
+ # just before an insert. We need the prefetch to happen but we don't have enough information
34
56
  # here to determine if it should happen, so Relation::insert has been modified to request of
35
57
  # the ActiveRecord::Base derived class if it requires a prefetch.
36
58
  #
59
+ # @param [String] table_name the table name to query
60
+ # @return [Boolean] returns true if the table should have its primary key prefetched.
37
61
  def prefetch_primary_key?(table_name)
38
62
  return false
39
63
  end
40
64
 
41
65
  #
42
- # creates a schema given a name.
43
- # options:
44
- # :unless_exists - check if schema exists.
66
+ # Creates a schema given a name.
45
67
  #
68
+ # @param [String] name the name of the schema.
69
+ # @param [Hash] options ({}) options for creating a schema
70
+ # @option options [Boolean] :unless_exists (false) check if schema exists.
71
+ # @return [optional] undefined
46
72
  def create_schema(name, options = {})
47
73
  if options[:unless_exists]
48
74
  return if execute("select count(*) from pg_namespace where nspname = '#{name}'").getvalue(0,0).to_i > 0
@@ -51,11 +77,13 @@ module ActiveRecord::ConnectionAdapters
51
77
  end
52
78
 
53
79
  #
54
- # drop a schema given a name.
55
- # options:
56
- # :if_exists - check if schema exists.
57
- # :cascade - cascade drop to dependant objects
80
+ # Drop a schema given a name.
58
81
  #
82
+ # @param [String] name the name of the schema.
83
+ # @param [Hash] options ({}) options for dropping a schema
84
+ # @option options [Boolean] :if_exists (false) check if schema exists.
85
+ # @option options [Boolean] :cascade (false) drop dependant objects
86
+ # @return [optional] undefined
59
87
  def drop_schema(name, options = {})
60
88
  if options[:if_exists]
61
89
  return if execute("select count(*) from pg_namespace where nspname = '#{name}'").getvalue(0,0).to_i == 0
@@ -64,8 +92,13 @@ module ActiveRecord::ConnectionAdapters
64
92
  end
65
93
 
66
94
  #
67
- # add foreign key constraint to table.
95
+ # Add foreign key constraint to table.
68
96
  #
97
+ # @param [String] referencing_table_name the name of the table containing the foreign key
98
+ # @param [String] referencing_field_name the name of foreign key column
99
+ # @param [String] referenced_table_name the name of the table referenced by the foreign key
100
+ # @param [String] referenced_field_name (:id) the name of the column referenced by the foreign key
101
+ # @return [optional] undefined
69
102
  def add_foreign_key(referencing_table_name, referencing_field_name, referenced_table_name, referenced_field_name = :id)
70
103
  execute("ALTER TABLE #{referencing_table_name} add foreign key (#{referencing_field_name}) references #{referenced_table_name}(#{referenced_field_name})")
71
104
  end
@@ -1,19 +1,26 @@
1
1
  #
2
- # these are things our base class must fix in ActiveRecord::Base
2
+ # These are things our base class must fix in ActiveRecord::Base
3
3
  #
4
- # no need to monkey patch these, just override them.
4
+ # No need to monkey patch these, just override them.
5
5
  #
6
6
  module Partitioned
7
+ #
8
+ # methods that need to be override in an ActiveRecord::Base derived class so that we can support partitioning
9
+ #
7
10
  module ActiveRecordOverrides
8
11
  #
9
12
  # arel_attribute_values needs to return attributes (and their values) associated with the dynamic_arel_table instead of the
10
13
  # static arel_table provided by ActiveRecord.
11
14
  #
12
- # the standard release of this function gathers a collection of attributes and creates a wrapper function around them
15
+ # The standard release of this function gathers a collection of attributes and creates a wrapper function around them
13
16
  # that names the table they are associated with. that naming is incorrect for partitioned tables.
14
17
  #
15
- # we call the standard release's method then retrofit our partitioned table into the hash that is returned.
18
+ # We call the standard releases method then retrofit our partitioned table into the hash that is returned.
16
19
  #
20
+ # @param [Boolean] include_primary_key (true)
21
+ # @param [Boolean] include_readonly_attributes (true)
22
+ # @param [Boolean] attribute_names (@attributes.keys)
23
+ # @return [Hash] hash of key value pairs associated with persistent attributes
17
24
  def arel_attributes_values(include_primary_key = true, include_readonly_attributes = true, attribute_names = @attributes.keys)
18
25
  attrs = super
19
26
  actual_arel_table = dynamic_arel_table(self.class.table_name)
@@ -21,8 +28,9 @@ module Partitioned
21
28
  end
22
29
 
23
30
  #
24
- # delete just needs a wrapper around it to specify the specific partition.
31
+ # Delete just needs a wrapper around it to specify the specific partition.
25
32
  #
33
+ # @return [optional] undefined
26
34
  def delete
27
35
  if persisted?
28
36
  self.class.from_partition(*self.class.partition_key_values(attributes)).delete(id)