partitioned 0.8.0 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README +85 -36
- data/Rakefile +3 -0
- data/examples/README +46 -18
- data/lib/monkey_patch_activerecord.rb +14 -8
- data/lib/monkey_patch_postgres.rb +46 -13
- data/lib/partitioned/active_record_overrides.rb +13 -5
- data/lib/partitioned/bulk_methods_mixin.rb +91 -146
- data/lib/partitioned/by_created_at.rb +3 -1
- data/lib/partitioned/by_foreign_key.rb +5 -0
- data/lib/partitioned/by_id.rb +10 -4
- data/lib/partitioned/by_integer_field.rb +9 -0
- data/lib/partitioned/by_monthly_time_field.rb +8 -1
- data/lib/partitioned/by_time_field.rb +16 -8
- data/lib/partitioned/by_weekly_time_field.rb +6 -3
- data/lib/partitioned/multi_level/configurator/data.rb +1 -0
- data/lib/partitioned/multi_level/configurator/dsl.rb +11 -0
- data/lib/partitioned/multi_level/configurator/reader.rb +18 -0
- data/lib/partitioned/multi_level/partition_manager.rb +13 -4
- data/lib/partitioned/multi_level.rb +3 -1
- data/lib/partitioned/partitioned_base/configurator/data.rb +10 -1
- data/lib/partitioned/partitioned_base/configurator/dsl.rb +20 -15
- data/lib/partitioned/partitioned_base/configurator/reader.rb +3 -0
- data/lib/partitioned/partitioned_base/configurator.rb +4 -0
- data/lib/partitioned/partitioned_base/partition_manager.rb +17 -15
- data/lib/partitioned/partitioned_base/sql_adapter.rb +25 -23
- data/lib/partitioned/partitioned_base.rb +112 -41
- data/lib/partitioned/version.rb +2 -1
- data/partitioned.gemspec +3 -2
- metadata +68 -73
data/README
CHANGED
@@ -1,29 +1,30 @@
|
|
1
1
|
Partitioned
|
2
2
|
===========
|
3
3
|
|
4
|
-
Partitioned adds assistance to ActiveRecord for manipulating
|
5
|
-
|
6
|
-
|
4
|
+
Partitioned adds assistance to ActiveRecord for manipulating (reading,
|
5
|
+
creating, updating) an activerecord model that represents data that
|
6
|
+
may be in one of many database tables (determined by the Models data).
|
7
7
|
|
8
|
-
It also has features that support the creation and deleting of child
|
9
|
-
partitioning support infrastructure.
|
8
|
+
It also has features that support the creation and deleting of child
|
9
|
+
tables and partitioning support infrastructure.
|
10
10
|
|
11
|
-
It supports Postgres partitioning and has specific features to
|
12
|
-
failings of Postgres's implementation of partitioning.
|
11
|
+
It supports Postgres partitioning and has specific features to
|
12
|
+
overcome basic failings of Postgres's implementation of partitioning.
|
13
13
|
|
14
14
|
Basics:
|
15
|
-
A parent table can be inherited by many child tables that inherit most
|
16
|
-
attributes of the parent table including its columns. child
|
17
|
-
(and for the uses of this plugin must) have a unique
|
18
|
-
defines which data should be located in that
|
15
|
+
A parent table can be inherited by many child tables that inherit most
|
16
|
+
of the attributes of the parent table including its columns. child
|
17
|
+
tables typically (and for the uses of this plugin must) have a unique
|
18
|
+
check constraint the defines which data should be located in that
|
19
|
+
specific child table.
|
19
20
|
|
20
|
-
Such a constraint allows for the SQL planner to ignore most child
|
21
|
-
the (hopefully) one child table that contains the
|
22
|
-
data, and meta-data (indexes) which
|
23
|
-
desired data.
|
21
|
+
Such a constraint allows for the SQL planner to ignore most child
|
22
|
+
tables and target the (hopefully) one child table that contains the
|
23
|
+
records interested. This splits data, and meta-data (indexes) which
|
24
|
+
provides streamlined targeted access to the desired data.
|
24
25
|
|
25
|
-
Support for bulk inserts and bulk updates is also provided via
|
26
|
-
Partitioned::Base.update_many.
|
26
|
+
Support for bulk inserts and bulk updates is also provided via
|
27
|
+
Partitioned::Base.create_many and Partitioned::Base.update_many.
|
27
28
|
|
28
29
|
Example
|
29
30
|
=======
|
@@ -33,7 +34,21 @@ Given the following models:
|
|
33
34
|
class Company < ActiveRecord::Base
|
34
35
|
end
|
35
36
|
|
36
|
-
class
|
37
|
+
class ByCompanyId < Partitioned::ByForeignKey
|
38
|
+
self.abstract_class = true
|
39
|
+
|
40
|
+
belongs_to :company
|
41
|
+
|
42
|
+
def self.partition_foreign_key
|
43
|
+
return :company_id
|
44
|
+
end
|
45
|
+
|
46
|
+
partitioned do |partition|
|
47
|
+
partition.index :id, :unique => true
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
class Employee < ByCompanyId
|
37
52
|
end
|
38
53
|
|
39
54
|
and the following tables:
|
@@ -47,6 +62,10 @@ and the following tables:
|
|
47
62
|
name text null
|
48
63
|
);
|
49
64
|
|
65
|
+
-- add some companies
|
66
|
+
insert into table companies (name) values
|
67
|
+
('company 1'),('company 2'),('company 2');
|
68
|
+
|
50
69
|
-- this is the parent table
|
51
70
|
create table employees
|
52
71
|
(
|
@@ -57,18 +76,45 @@ and the following tables:
|
|
57
76
|
company_id integer not null references companies
|
58
77
|
);
|
59
78
|
|
79
|
+
We now need to create some infrastructure for partitioned tables,
|
80
|
+
in particular, we create a schema to hold the child partition
|
81
|
+
tables of employees.
|
82
|
+
|
83
|
+
Employee.create_infrastructure
|
84
|
+
|
85
|
+
Which creates the employees_partitions schema using the following SQL:
|
86
|
+
|
60
87
|
create schema employees_partitions;
|
61
88
|
|
62
|
-
|
89
|
+
NOTE: We also install protections on the employees table so it isn't
|
90
|
+
used as a data table (this SQL is not presented for simplicity but is
|
91
|
+
apart of the create_infrastructure call).
|
92
|
+
|
93
|
+
To add child tables we use the create_new_partitions_tables method:
|
94
|
+
|
95
|
+
company_ids = Company.all.map(&:id)
|
96
|
+
Employee.create_new_partition_tables(company_ids)
|
97
|
+
|
98
|
+
which results in the following SQL:
|
63
99
|
|
64
|
-
|
65
|
-
|
66
|
-
create table employees_partitions.p2
|
67
|
-
|
100
|
+
create table employees_partitions.p1
|
101
|
+
( CHECK ( company_id = 1 ) ) INHERITS (employees);
|
102
|
+
create table employees_partitions.p2
|
103
|
+
( CHECK ( company_id = 2 ) ) INHERITS (employees);
|
104
|
+
create table employees_partitions.p3
|
105
|
+
( CHECK ( company_id = 3 ) ) INHERITS (employees);
|
68
106
|
|
69
|
-
|
70
|
-
|
71
|
-
|
107
|
+
NOTE: Some other SQL is generated in the above example, specifically
|
108
|
+
the reference to the companies table needs to be explicitly created
|
109
|
+
for postgres child tables AND the unique index on 'id' is created.
|
110
|
+
These are not shown for simplicity.
|
111
|
+
|
112
|
+
Now we can do operations involving the child partitions.
|
113
|
+
|
114
|
+
Since database records exist in a specific child table dependant on
|
115
|
+
the field "company_id" we need to have creates that turn into database
|
116
|
+
inserts of the EMPLOYEES table redirect the record insert into the
|
117
|
+
specific child table determined by the value of COMPANY_ID
|
72
118
|
|
73
119
|
eg:
|
74
120
|
employee = Employee.create(:name => 'Keith', :company_id => 1)
|
@@ -79,12 +125,12 @@ this would normally produce the following:
|
|
79
125
|
but with Partitioned we see:
|
80
126
|
INSERT INTO employees_partitions.p1 ('name', company_id) values ('Keith', 1);
|
81
127
|
|
82
|
-
reads of such a table need some assistance to find the specific child
|
83
|
-
record exists in.
|
128
|
+
reads of such a table need some assistance to find the specific child
|
129
|
+
table the record exists in.
|
84
130
|
|
85
|
-
Since we are partitioned by company_id the programmer needs to provide
|
86
|
-
when fetching data, or the database will need to
|
87
|
-
specific record we are looking for.
|
131
|
+
Since we are partitioned by company_id the programmer needs to provide
|
132
|
+
that information when fetching data, or the database will need to
|
133
|
+
search all child table for the specific record we are looking for.
|
88
134
|
|
89
135
|
This is no longer valid (well, doesn't perform well):
|
90
136
|
|
@@ -93,11 +139,14 @@ This is no longer valid (well, doesn't perform well):
|
|
93
139
|
instead, do one of the following:
|
94
140
|
|
95
141
|
employee = Employee.from_partition(1).find(1)
|
96
|
-
employee = Employee.find(:first,
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
142
|
+
employee = Employee.find(:first,
|
143
|
+
:conditions => {:name => 'Keith', :company_id => 1})
|
144
|
+
employee = Employee.find(:first,
|
145
|
+
:conditions => {:id => 1, :company_id => 1})
|
146
|
+
|
147
|
+
an update (employee.save where the record already exists in the
|
148
|
+
database) will take advantage of knowing which child table the record
|
149
|
+
exists in so it can do some optimization.
|
101
150
|
|
102
151
|
so, the following works as expected:
|
103
152
|
|
data/Rakefile
CHANGED
data/examples/README
CHANGED
@@ -1,23 +1,51 @@
|
|
1
1
|
The directory holds examples of how to use the partitioned gem.
|
2
|
+
These rails scripts will create and populate partitioned tables. The
|
3
|
+
scripts accept the following parameters:
|
4
|
+
|
5
|
+
--? list available options
|
6
|
+
--force delete tables before starting
|
7
|
+
default: false
|
8
|
+
--cleanup delete tables and exit
|
9
|
+
default: false
|
10
|
+
--create-many how many objects to create via create_many
|
11
|
+
default: 3000
|
12
|
+
--create-individual how many objects to create via create
|
13
|
+
default: 1000
|
14
|
+
--new-individual how many objects to create via new.save
|
15
|
+
default: 1000
|
16
|
+
--update-individual how many objects to update individually
|
17
|
+
default: 1000
|
18
|
+
--update-many how many objects to update via update_many
|
19
|
+
default: 1000
|
20
|
+
|
21
|
+
The scripts are:
|
22
|
+
|
23
|
+
company_id.rb: table 'employees' partitioned by company_id
|
24
|
+
|
25
|
+
company_id_and_created_at.rb: table 'employees' has multi-level
|
26
|
+
partitioning by company_id then created_at created_at is grouped by
|
27
|
+
week where weeks start on Monday.
|
28
|
+
|
29
|
+
created_at.rb: table 'employees' partitioned by created_at
|
30
|
+
created_at is grouped by week where weeks start on Monday.
|
31
|
+
|
32
|
+
created_at_referencing_awards.rb: table 'employees' partitioned by
|
33
|
+
created_at and table 'awards' is partitioned by created_at which a
|
34
|
+
reference to specific child table of employees with the created_at
|
35
|
+
range.
|
36
|
+
|
37
|
+
id.rb: partitioned on 'id' grouping each 10 records into separate
|
38
|
+
child tables.
|
39
|
+
|
40
|
+
start_date.rb: grouped by column start_date which is a date grouped
|
41
|
+
by month.
|
2
42
|
|
3
43
|
The lib directory contains:
|
4
|
-
by_company_id.rb - a partitioned model where the partition's key is the column company_id that references companies.
|
5
|
-
company.rb - an ActiveRecord model for the table companies.
|
6
|
-
roman.rb - some helper routines for generating roman numerals.
|
7
44
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
45
|
+
by_company_id.rb: a partitioned model where the partition's key is
|
46
|
+
the column company_id that references companies.
|
47
|
+
|
48
|
+
company.rb: an ActiveRecord model for the table companies.
|
49
|
+
|
50
|
+
roman.rb: some helper routines for generating roman numerals.
|
12
51
|
|
13
|
-
The scripts are:
|
14
|
-
company_id.rb - table 'employees' partitioned by company_id
|
15
|
-
company_id_and_created_at.rb - table 'employees' has multi-level partitioning by company_id then created_at
|
16
|
-
created_at is grouped by week where weeks start on Monday.
|
17
|
-
created_at.rb - table 'employees' partitioned by created_at
|
18
|
-
created_at is grouped by week where weeks start on Monday.
|
19
|
-
created_at_referencing_awards.rb - table 'employees' partitioned by created_at and table 'awards' is partitioned
|
20
|
-
by created_at which a reference to specific child table of employees with the
|
21
|
-
created_at range.
|
22
|
-
id.rb - partitioned on 'id' grouping each 10 records into separate child tables.
|
23
|
-
start_date.rb - grouped by column start_date which is a date grouped by month.
|
@@ -5,11 +5,17 @@ require 'active_record/relation.rb'
|
|
5
5
|
require 'active_record/persistence.rb'
|
6
6
|
|
7
7
|
#
|
8
|
-
#
|
9
|
-
# attributes
|
8
|
+
# Patching {ActiveRecord} to allow specifying the table name as a function of
|
9
|
+
# attributes.
|
10
10
|
#
|
11
11
|
module ActiveRecord
|
12
|
+
#
|
13
|
+
# Patches for Persistence to allow certain partitioning (that related to the primary key) to work.
|
14
|
+
#
|
12
15
|
module Persistence
|
16
|
+
#
|
17
|
+
# patch the create method to prefetch the primary key if needed
|
18
|
+
#
|
13
19
|
def create
|
14
20
|
if self.id.nil? && self.class.respond_to?(:prefetch_primary_key?) && self.class.prefetch_primary_key?
|
15
21
|
self.id = connection.next_sequence_value(self.class.sequence_name)
|
@@ -27,17 +33,17 @@ module ActiveRecord
|
|
27
33
|
end
|
28
34
|
end
|
29
35
|
#
|
30
|
-
#
|
31
|
-
# requesting name of table as a function of attributes
|
36
|
+
# Patches for relation to allow back hooks into the {ActiveRecord}
|
37
|
+
# requesting name of table as a function of attributes.
|
32
38
|
#
|
33
39
|
class Relation
|
34
40
|
#
|
35
|
-
#
|
41
|
+
# Patches {ActiveRecord}'s building of an insert statement to request
|
36
42
|
# of the model a table name with respect to attribute values being
|
37
|
-
# inserted
|
43
|
+
# inserted.
|
38
44
|
#
|
39
|
-
#
|
40
|
-
# with PARTITIONED comment
|
45
|
+
# The differences between this and the original code are small and marked
|
46
|
+
# with PARTITIONED comment.
|
41
47
|
def insert(values)
|
42
48
|
primary_key_value = nil
|
43
49
|
|
@@ -2,47 +2,73 @@ require 'active_record'
|
|
2
2
|
require 'active_record/base'
|
3
3
|
require 'active_record/connection_adapters/abstract_adapter'
|
4
4
|
|
5
|
+
#
|
6
|
+
# Patching {ActiveRecord::ConnectionAdapters::TableDefinition} and
|
7
|
+
# {ActiveRecord::ConnectionAdapters::PostgreSQLAdapter} to add functionality
|
8
|
+
# needed to abstract partition specific SQL statements.
|
9
|
+
#
|
5
10
|
module ActiveRecord::ConnectionAdapters
|
11
|
+
#
|
12
|
+
# Patches associated with building check constraints.
|
13
|
+
#
|
6
14
|
class TableDefinition
|
15
|
+
#
|
16
|
+
# Builds a SQL check constraint
|
17
|
+
#
|
18
|
+
# @param [String] constraint a SQL constraint
|
7
19
|
def check_constraint(constraint)
|
8
20
|
@columns << Struct.new(:to_sql).new("CHECK (#{constraint})")
|
9
21
|
end
|
10
22
|
end
|
11
23
|
|
24
|
+
#
|
25
|
+
# Patches extending the postgres adapter with new operations for managing
|
26
|
+
# sequences (and sets of sequence values), schemas and foreign keys.
|
27
|
+
# These should go into AbstractAdapter allowing any database adapter
|
28
|
+
# to take advantage of these SQL builders.
|
29
|
+
#
|
12
30
|
class PostgreSQLAdapter < AbstractAdapter
|
13
31
|
#
|
14
|
-
#
|
32
|
+
# Get the next value in a sequence. Used on INSERT operation for
|
15
33
|
# partitioning like by_id because the ID is required before the insert
|
16
34
|
# so that the specific child table is known ahead of time.
|
17
35
|
#
|
36
|
+
# @param [String] sequence_name the name of the sequence to fetch the next value from
|
37
|
+
# @return [Integer] the value from the sequence
|
18
38
|
def next_sequence_value(sequence_name)
|
19
39
|
return execute("select nextval('#{sequence_name}')").field_values("nextval").first
|
20
40
|
end
|
21
41
|
|
22
42
|
#
|
23
|
-
#
|
24
|
-
# batch_size - count of values
|
43
|
+
# Get the some next values in a sequence.
|
25
44
|
#
|
45
|
+
# @param [String] sequence_name the name of the sequence to fetch the next values from
|
46
|
+
# @param [Integer] batch_size count of values.
|
47
|
+
# @return [Array<Integer>] an array of values from the sequence
|
26
48
|
def next_sequence_values(sequence_name, batch_size)
|
27
49
|
result = execute("select nextval('#{sequence_name}') from generate_series(1, #{batch_size})")
|
28
50
|
return result.field_values("nextval").map(&:to_i)
|
29
51
|
end
|
30
52
|
|
31
53
|
#
|
32
|
-
#
|
33
|
-
# just before an insert.
|
54
|
+
# Causes active resource to fetch the primary key for the table (using next_sequence_value())
|
55
|
+
# just before an insert. We need the prefetch to happen but we don't have enough information
|
34
56
|
# here to determine if it should happen, so Relation::insert has been modified to request of
|
35
57
|
# the ActiveRecord::Base derived class if it requires a prefetch.
|
36
58
|
#
|
59
|
+
# @param [String] table_name the table name to query
|
60
|
+
# @return [Boolean] returns true if the table should have its primary key prefetched.
|
37
61
|
def prefetch_primary_key?(table_name)
|
38
62
|
return false
|
39
63
|
end
|
40
64
|
|
41
65
|
#
|
42
|
-
#
|
43
|
-
# options:
|
44
|
-
# :unless_exists - check if schema exists.
|
66
|
+
# Creates a schema given a name.
|
45
67
|
#
|
68
|
+
# @param [String] name the name of the schema.
|
69
|
+
# @param [Hash] options ({}) options for creating a schema
|
70
|
+
# @option options [Boolean] :unless_exists (false) check if schema exists.
|
71
|
+
# @return [optional] undefined
|
46
72
|
def create_schema(name, options = {})
|
47
73
|
if options[:unless_exists]
|
48
74
|
return if execute("select count(*) from pg_namespace where nspname = '#{name}'").getvalue(0,0).to_i > 0
|
@@ -51,11 +77,13 @@ module ActiveRecord::ConnectionAdapters
|
|
51
77
|
end
|
52
78
|
|
53
79
|
#
|
54
|
-
#
|
55
|
-
# options:
|
56
|
-
# :if_exists - check if schema exists.
|
57
|
-
# :cascade - cascade drop to dependant objects
|
80
|
+
# Drop a schema given a name.
|
58
81
|
#
|
82
|
+
# @param [String] name the name of the schema.
|
83
|
+
# @param [Hash] options ({}) options for dropping a schema
|
84
|
+
# @option options [Boolean] :if_exists (false) check if schema exists.
|
85
|
+
# @option options [Boolean] :cascade (false) drop dependant objects
|
86
|
+
# @return [optional] undefined
|
59
87
|
def drop_schema(name, options = {})
|
60
88
|
if options[:if_exists]
|
61
89
|
return if execute("select count(*) from pg_namespace where nspname = '#{name}'").getvalue(0,0).to_i == 0
|
@@ -64,8 +92,13 @@ module ActiveRecord::ConnectionAdapters
|
|
64
92
|
end
|
65
93
|
|
66
94
|
#
|
67
|
-
#
|
95
|
+
# Add foreign key constraint to table.
|
68
96
|
#
|
97
|
+
# @param [String] referencing_table_name the name of the table containing the foreign key
|
98
|
+
# @param [String] referencing_field_name the name of foreign key column
|
99
|
+
# @param [String] referenced_table_name the name of the table referenced by the foreign key
|
100
|
+
# @param [String] referenced_field_name (:id) the name of the column referenced by the foreign key
|
101
|
+
# @return [optional] undefined
|
69
102
|
def add_foreign_key(referencing_table_name, referencing_field_name, referenced_table_name, referenced_field_name = :id)
|
70
103
|
execute("ALTER TABLE #{referencing_table_name} add foreign key (#{referencing_field_name}) references #{referenced_table_name}(#{referenced_field_name})")
|
71
104
|
end
|
@@ -1,19 +1,26 @@
|
|
1
1
|
#
|
2
|
-
#
|
2
|
+
# These are things our base class must fix in ActiveRecord::Base
|
3
3
|
#
|
4
|
-
#
|
4
|
+
# No need to monkey patch these, just override them.
|
5
5
|
#
|
6
6
|
module Partitioned
|
7
|
+
#
|
8
|
+
# methods that need to be override in an ActiveRecord::Base derived class so that we can support partitioning
|
9
|
+
#
|
7
10
|
module ActiveRecordOverrides
|
8
11
|
#
|
9
12
|
# arel_attribute_values needs to return attributes (and their values) associated with the dynamic_arel_table instead of the
|
10
13
|
# static arel_table provided by ActiveRecord.
|
11
14
|
#
|
12
|
-
#
|
15
|
+
# The standard release of this function gathers a collection of attributes and creates a wrapper function around them
|
13
16
|
# that names the table they are associated with. that naming is incorrect for partitioned tables.
|
14
17
|
#
|
15
|
-
#
|
18
|
+
# We call the standard releases method then retrofit our partitioned table into the hash that is returned.
|
16
19
|
#
|
20
|
+
# @param [Boolean] include_primary_key (true)
|
21
|
+
# @param [Boolean] include_readonly_attributes (true)
|
22
|
+
# @param [Boolean] attribute_names (@attributes.keys)
|
23
|
+
# @return [Hash] hash of key value pairs associated with persistent attributes
|
17
24
|
def arel_attributes_values(include_primary_key = true, include_readonly_attributes = true, attribute_names = @attributes.keys)
|
18
25
|
attrs = super
|
19
26
|
actual_arel_table = dynamic_arel_table(self.class.table_name)
|
@@ -21,8 +28,9 @@ module Partitioned
|
|
21
28
|
end
|
22
29
|
|
23
30
|
#
|
24
|
-
#
|
31
|
+
# Delete just needs a wrapper around it to specify the specific partition.
|
25
32
|
#
|
33
|
+
# @return [optional] undefined
|
26
34
|
def delete
|
27
35
|
if persisted?
|
28
36
|
self.class.from_partition(*self.class.partition_key_values(attributes)).delete(id)
|