partitioned 0.8.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +85 -36
- data/Rakefile +3 -0
- data/examples/README +46 -18
- data/lib/monkey_patch_activerecord.rb +14 -8
- data/lib/monkey_patch_postgres.rb +46 -13
- data/lib/partitioned/active_record_overrides.rb +13 -5
- data/lib/partitioned/bulk_methods_mixin.rb +91 -146
- data/lib/partitioned/by_created_at.rb +3 -1
- data/lib/partitioned/by_foreign_key.rb +5 -0
- data/lib/partitioned/by_id.rb +10 -4
- data/lib/partitioned/by_integer_field.rb +9 -0
- data/lib/partitioned/by_monthly_time_field.rb +8 -1
- data/lib/partitioned/by_time_field.rb +16 -8
- data/lib/partitioned/by_weekly_time_field.rb +6 -3
- data/lib/partitioned/multi_level/configurator/data.rb +1 -0
- data/lib/partitioned/multi_level/configurator/dsl.rb +11 -0
- data/lib/partitioned/multi_level/configurator/reader.rb +18 -0
- data/lib/partitioned/multi_level/partition_manager.rb +13 -4
- data/lib/partitioned/multi_level.rb +3 -1
- data/lib/partitioned/partitioned_base/configurator/data.rb +10 -1
- data/lib/partitioned/partitioned_base/configurator/dsl.rb +20 -15
- data/lib/partitioned/partitioned_base/configurator/reader.rb +3 -0
- data/lib/partitioned/partitioned_base/configurator.rb +4 -0
- data/lib/partitioned/partitioned_base/partition_manager.rb +17 -15
- data/lib/partitioned/partitioned_base/sql_adapter.rb +25 -23
- data/lib/partitioned/partitioned_base.rb +112 -41
- data/lib/partitioned/version.rb +2 -1
- data/partitioned.gemspec +3 -2
- metadata +68 -73
data/README
CHANGED
@@ -1,29 +1,30 @@
|
|
1
1
|
Partitioned
|
2
2
|
===========
|
3
3
|
|
4
|
-
Partitioned adds assistance to ActiveRecord for manipulating
|
5
|
-
|
6
|
-
|
4
|
+
Partitioned adds assistance to ActiveRecord for manipulating (reading,
|
5
|
+
creating, updating) an activerecord model that represents data that
|
6
|
+
may be in one of many database tables (determined by the Models data).
|
7
7
|
|
8
|
-
It also has features that support the creation and deleting of child
|
9
|
-
partitioning support infrastructure.
|
8
|
+
It also has features that support the creation and deleting of child
|
9
|
+
tables and partitioning support infrastructure.
|
10
10
|
|
11
|
-
It supports Postgres partitioning and has specific features to
|
12
|
-
failings of Postgres's implementation of partitioning.
|
11
|
+
It supports Postgres partitioning and has specific features to
|
12
|
+
overcome basic failings of Postgres's implementation of partitioning.
|
13
13
|
|
14
14
|
Basics:
|
15
|
-
A parent table can be inherited by many child tables that inherit most
|
16
|
-
attributes of the parent table including its columns. child
|
17
|
-
(and for the uses of this plugin must) have a unique
|
18
|
-
defines which data should be located in that
|
15
|
+
A parent table can be inherited by many child tables that inherit most
|
16
|
+
of the attributes of the parent table including its columns. child
|
17
|
+
tables typically (and for the uses of this plugin must) have a unique
|
18
|
+
check constraint the defines which data should be located in that
|
19
|
+
specific child table.
|
19
20
|
|
20
|
-
Such a constraint allows for the SQL planner to ignore most child
|
21
|
-
the (hopefully) one child table that contains the
|
22
|
-
data, and meta-data (indexes) which
|
23
|
-
desired data.
|
21
|
+
Such a constraint allows for the SQL planner to ignore most child
|
22
|
+
tables and target the (hopefully) one child table that contains the
|
23
|
+
records interested. This splits data, and meta-data (indexes) which
|
24
|
+
provides streamlined targeted access to the desired data.
|
24
25
|
|
25
|
-
Support for bulk inserts and bulk updates is also provided via
|
26
|
-
Partitioned::Base.update_many.
|
26
|
+
Support for bulk inserts and bulk updates is also provided via
|
27
|
+
Partitioned::Base.create_many and Partitioned::Base.update_many.
|
27
28
|
|
28
29
|
Example
|
29
30
|
=======
|
@@ -33,7 +34,21 @@ Given the following models:
|
|
33
34
|
class Company < ActiveRecord::Base
|
34
35
|
end
|
35
36
|
|
36
|
-
class
|
37
|
+
class ByCompanyId < Partitioned::ByForeignKey
|
38
|
+
self.abstract_class = true
|
39
|
+
|
40
|
+
belongs_to :company
|
41
|
+
|
42
|
+
def self.partition_foreign_key
|
43
|
+
return :company_id
|
44
|
+
end
|
45
|
+
|
46
|
+
partitioned do |partition|
|
47
|
+
partition.index :id, :unique => true
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
class Employee < ByCompanyId
|
37
52
|
end
|
38
53
|
|
39
54
|
and the following tables:
|
@@ -47,6 +62,10 @@ and the following tables:
|
|
47
62
|
name text null
|
48
63
|
);
|
49
64
|
|
65
|
+
-- add some companies
|
66
|
+
insert into table companies (name) values
|
67
|
+
('company 1'),('company 2'),('company 2');
|
68
|
+
|
50
69
|
-- this is the parent table
|
51
70
|
create table employees
|
52
71
|
(
|
@@ -57,18 +76,45 @@ and the following tables:
|
|
57
76
|
company_id integer not null references companies
|
58
77
|
);
|
59
78
|
|
79
|
+
We now need to create some infrastructure for partitioned tables,
|
80
|
+
in particular, we create a schema to hold the child partition
|
81
|
+
tables of employees.
|
82
|
+
|
83
|
+
Employee.create_infrastructure
|
84
|
+
|
85
|
+
Which creates the employees_partitions schema using the following SQL:
|
86
|
+
|
60
87
|
create schema employees_partitions;
|
61
88
|
|
62
|
-
|
89
|
+
NOTE: We also install protections on the employees table so it isn't
|
90
|
+
used as a data table (this SQL is not presented for simplicity but is
|
91
|
+
apart of the create_infrastructure call).
|
92
|
+
|
93
|
+
To add child tables we use the create_new_partitions_tables method:
|
94
|
+
|
95
|
+
company_ids = Company.all.map(&:id)
|
96
|
+
Employee.create_new_partition_tables(company_ids)
|
97
|
+
|
98
|
+
which results in the following SQL:
|
63
99
|
|
64
|
-
|
65
|
-
|
66
|
-
create table employees_partitions.p2
|
67
|
-
|
100
|
+
create table employees_partitions.p1
|
101
|
+
( CHECK ( company_id = 1 ) ) INHERITS (employees);
|
102
|
+
create table employees_partitions.p2
|
103
|
+
( CHECK ( company_id = 2 ) ) INHERITS (employees);
|
104
|
+
create table employees_partitions.p3
|
105
|
+
( CHECK ( company_id = 3 ) ) INHERITS (employees);
|
68
106
|
|
69
|
-
|
70
|
-
|
71
|
-
|
107
|
+
NOTE: Some other SQL is generated in the above example, specifically
|
108
|
+
the reference to the companies table needs to be explicitly created
|
109
|
+
for postgres child tables AND the unique index on 'id' is created.
|
110
|
+
These are not shown for simplicity.
|
111
|
+
|
112
|
+
Now we can do operations involving the child partitions.
|
113
|
+
|
114
|
+
Since database records exist in a specific child table dependant on
|
115
|
+
the field "company_id" we need to have creates that turn into database
|
116
|
+
inserts of the EMPLOYEES table redirect the record insert into the
|
117
|
+
specific child table determined by the value of COMPANY_ID
|
72
118
|
|
73
119
|
eg:
|
74
120
|
employee = Employee.create(:name => 'Keith', :company_id => 1)
|
@@ -79,12 +125,12 @@ this would normally produce the following:
|
|
79
125
|
but with Partitioned we see:
|
80
126
|
INSERT INTO employees_partitions.p1 ('name', company_id) values ('Keith', 1);
|
81
127
|
|
82
|
-
reads of such a table need some assistance to find the specific child
|
83
|
-
record exists in.
|
128
|
+
reads of such a table need some assistance to find the specific child
|
129
|
+
table the record exists in.
|
84
130
|
|
85
|
-
Since we are partitioned by company_id the programmer needs to provide
|
86
|
-
when fetching data, or the database will need to
|
87
|
-
specific record we are looking for.
|
131
|
+
Since we are partitioned by company_id the programmer needs to provide
|
132
|
+
that information when fetching data, or the database will need to
|
133
|
+
search all child table for the specific record we are looking for.
|
88
134
|
|
89
135
|
This is no longer valid (well, doesn't perform well):
|
90
136
|
|
@@ -93,11 +139,14 @@ This is no longer valid (well, doesn't perform well):
|
|
93
139
|
instead, do one of the following:
|
94
140
|
|
95
141
|
employee = Employee.from_partition(1).find(1)
|
96
|
-
employee = Employee.find(:first,
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
142
|
+
employee = Employee.find(:first,
|
143
|
+
:conditions => {:name => 'Keith', :company_id => 1})
|
144
|
+
employee = Employee.find(:first,
|
145
|
+
:conditions => {:id => 1, :company_id => 1})
|
146
|
+
|
147
|
+
an update (employee.save where the record already exists in the
|
148
|
+
database) will take advantage of knowing which child table the record
|
149
|
+
exists in so it can do some optimization.
|
101
150
|
|
102
151
|
so, the following works as expected:
|
103
152
|
|
data/Rakefile
CHANGED
data/examples/README
CHANGED
@@ -1,23 +1,51 @@
|
|
1
1
|
The directory holds examples of how to use the partitioned gem.
|
2
|
+
These rails scripts will create and populate partitioned tables. The
|
3
|
+
scripts accept the following parameters:
|
4
|
+
|
5
|
+
--? list available options
|
6
|
+
--force delete tables before starting
|
7
|
+
default: false
|
8
|
+
--cleanup delete tables and exit
|
9
|
+
default: false
|
10
|
+
--create-many how many objects to create via create_many
|
11
|
+
default: 3000
|
12
|
+
--create-individual how many objects to create via create
|
13
|
+
default: 1000
|
14
|
+
--new-individual how many objects to create via new.save
|
15
|
+
default: 1000
|
16
|
+
--update-individual how many objects to update individually
|
17
|
+
default: 1000
|
18
|
+
--update-many how many objects to update via update_many
|
19
|
+
default: 1000
|
20
|
+
|
21
|
+
The scripts are:
|
22
|
+
|
23
|
+
company_id.rb: table 'employees' partitioned by company_id
|
24
|
+
|
25
|
+
company_id_and_created_at.rb: table 'employees' has multi-level
|
26
|
+
partitioning by company_id then created_at created_at is grouped by
|
27
|
+
week where weeks start on Monday.
|
28
|
+
|
29
|
+
created_at.rb: table 'employees' partitioned by created_at
|
30
|
+
created_at is grouped by week where weeks start on Monday.
|
31
|
+
|
32
|
+
created_at_referencing_awards.rb: table 'employees' partitioned by
|
33
|
+
created_at and table 'awards' is partitioned by created_at which a
|
34
|
+
reference to specific child table of employees with the created_at
|
35
|
+
range.
|
36
|
+
|
37
|
+
id.rb: partitioned on 'id' grouping each 10 records into separate
|
38
|
+
child tables.
|
39
|
+
|
40
|
+
start_date.rb: grouped by column start_date which is a date grouped
|
41
|
+
by month.
|
2
42
|
|
3
43
|
The lib directory contains:
|
4
|
-
by_company_id.rb - a partitioned model where the partition's key is the column company_id that references companies.
|
5
|
-
company.rb - an ActiveRecord model for the table companies.
|
6
|
-
roman.rb - some helper routines for generating roman numerals.
|
7
44
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
45
|
+
by_company_id.rb: a partitioned model where the partition's key is
|
46
|
+
the column company_id that references companies.
|
47
|
+
|
48
|
+
company.rb: an ActiveRecord model for the table companies.
|
49
|
+
|
50
|
+
roman.rb: some helper routines for generating roman numerals.
|
12
51
|
|
13
|
-
The scripts are:
|
14
|
-
company_id.rb - table 'employees' partitioned by company_id
|
15
|
-
company_id_and_created_at.rb - table 'employees' has multi-level partitioning by company_id then created_at
|
16
|
-
created_at is grouped by week where weeks start on Monday.
|
17
|
-
created_at.rb - table 'employees' partitioned by created_at
|
18
|
-
created_at is grouped by week where weeks start on Monday.
|
19
|
-
created_at_referencing_awards.rb - table 'employees' partitioned by created_at and table 'awards' is partitioned
|
20
|
-
by created_at which a reference to specific child table of employees with the
|
21
|
-
created_at range.
|
22
|
-
id.rb - partitioned on 'id' grouping each 10 records into separate child tables.
|
23
|
-
start_date.rb - grouped by column start_date which is a date grouped by month.
|
@@ -5,11 +5,17 @@ require 'active_record/relation.rb'
|
|
5
5
|
require 'active_record/persistence.rb'
|
6
6
|
|
7
7
|
#
|
8
|
-
#
|
9
|
-
# attributes
|
8
|
+
# Patching {ActiveRecord} to allow specifying the table name as a function of
|
9
|
+
# attributes.
|
10
10
|
#
|
11
11
|
module ActiveRecord
|
12
|
+
#
|
13
|
+
# Patches for Persistence to allow certain partitioning (that related to the primary key) to work.
|
14
|
+
#
|
12
15
|
module Persistence
|
16
|
+
#
|
17
|
+
# patch the create method to prefetch the primary key if needed
|
18
|
+
#
|
13
19
|
def create
|
14
20
|
if self.id.nil? && self.class.respond_to?(:prefetch_primary_key?) && self.class.prefetch_primary_key?
|
15
21
|
self.id = connection.next_sequence_value(self.class.sequence_name)
|
@@ -27,17 +33,17 @@ module ActiveRecord
|
|
27
33
|
end
|
28
34
|
end
|
29
35
|
#
|
30
|
-
#
|
31
|
-
# requesting name of table as a function of attributes
|
36
|
+
# Patches for relation to allow back hooks into the {ActiveRecord}
|
37
|
+
# requesting name of table as a function of attributes.
|
32
38
|
#
|
33
39
|
class Relation
|
34
40
|
#
|
35
|
-
#
|
41
|
+
# Patches {ActiveRecord}'s building of an insert statement to request
|
36
42
|
# of the model a table name with respect to attribute values being
|
37
|
-
# inserted
|
43
|
+
# inserted.
|
38
44
|
#
|
39
|
-
#
|
40
|
-
# with PARTITIONED comment
|
45
|
+
# The differences between this and the original code are small and marked
|
46
|
+
# with PARTITIONED comment.
|
41
47
|
def insert(values)
|
42
48
|
primary_key_value = nil
|
43
49
|
|
@@ -2,47 +2,73 @@ require 'active_record'
|
|
2
2
|
require 'active_record/base'
|
3
3
|
require 'active_record/connection_adapters/abstract_adapter'
|
4
4
|
|
5
|
+
#
|
6
|
+
# Patching {ActiveRecord::ConnectionAdapters::TableDefinition} and
|
7
|
+
# {ActiveRecord::ConnectionAdapters::PostgreSQLAdapter} to add functionality
|
8
|
+
# needed to abstract partition specific SQL statements.
|
9
|
+
#
|
5
10
|
module ActiveRecord::ConnectionAdapters
|
11
|
+
#
|
12
|
+
# Patches associated with building check constraints.
|
13
|
+
#
|
6
14
|
class TableDefinition
|
15
|
+
#
|
16
|
+
# Builds a SQL check constraint
|
17
|
+
#
|
18
|
+
# @param [String] constraint a SQL constraint
|
7
19
|
def check_constraint(constraint)
|
8
20
|
@columns << Struct.new(:to_sql).new("CHECK (#{constraint})")
|
9
21
|
end
|
10
22
|
end
|
11
23
|
|
24
|
+
#
|
25
|
+
# Patches extending the postgres adapter with new operations for managing
|
26
|
+
# sequences (and sets of sequence values), schemas and foreign keys.
|
27
|
+
# These should go into AbstractAdapter allowing any database adapter
|
28
|
+
# to take advantage of these SQL builders.
|
29
|
+
#
|
12
30
|
class PostgreSQLAdapter < AbstractAdapter
|
13
31
|
#
|
14
|
-
#
|
32
|
+
# Get the next value in a sequence. Used on INSERT operation for
|
15
33
|
# partitioning like by_id because the ID is required before the insert
|
16
34
|
# so that the specific child table is known ahead of time.
|
17
35
|
#
|
36
|
+
# @param [String] sequence_name the name of the sequence to fetch the next value from
|
37
|
+
# @return [Integer] the value from the sequence
|
18
38
|
def next_sequence_value(sequence_name)
|
19
39
|
return execute("select nextval('#{sequence_name}')").field_values("nextval").first
|
20
40
|
end
|
21
41
|
|
22
42
|
#
|
23
|
-
#
|
24
|
-
# batch_size - count of values
|
43
|
+
# Get the some next values in a sequence.
|
25
44
|
#
|
45
|
+
# @param [String] sequence_name the name of the sequence to fetch the next values from
|
46
|
+
# @param [Integer] batch_size count of values.
|
47
|
+
# @return [Array<Integer>] an array of values from the sequence
|
26
48
|
def next_sequence_values(sequence_name, batch_size)
|
27
49
|
result = execute("select nextval('#{sequence_name}') from generate_series(1, #{batch_size})")
|
28
50
|
return result.field_values("nextval").map(&:to_i)
|
29
51
|
end
|
30
52
|
|
31
53
|
#
|
32
|
-
#
|
33
|
-
# just before an insert.
|
54
|
+
# Causes active resource to fetch the primary key for the table (using next_sequence_value())
|
55
|
+
# just before an insert. We need the prefetch to happen but we don't have enough information
|
34
56
|
# here to determine if it should happen, so Relation::insert has been modified to request of
|
35
57
|
# the ActiveRecord::Base derived class if it requires a prefetch.
|
36
58
|
#
|
59
|
+
# @param [String] table_name the table name to query
|
60
|
+
# @return [Boolean] returns true if the table should have its primary key prefetched.
|
37
61
|
def prefetch_primary_key?(table_name)
|
38
62
|
return false
|
39
63
|
end
|
40
64
|
|
41
65
|
#
|
42
|
-
#
|
43
|
-
# options:
|
44
|
-
# :unless_exists - check if schema exists.
|
66
|
+
# Creates a schema given a name.
|
45
67
|
#
|
68
|
+
# @param [String] name the name of the schema.
|
69
|
+
# @param [Hash] options ({}) options for creating a schema
|
70
|
+
# @option options [Boolean] :unless_exists (false) check if schema exists.
|
71
|
+
# @return [optional] undefined
|
46
72
|
def create_schema(name, options = {})
|
47
73
|
if options[:unless_exists]
|
48
74
|
return if execute("select count(*) from pg_namespace where nspname = '#{name}'").getvalue(0,0).to_i > 0
|
@@ -51,11 +77,13 @@ module ActiveRecord::ConnectionAdapters
|
|
51
77
|
end
|
52
78
|
|
53
79
|
#
|
54
|
-
#
|
55
|
-
# options:
|
56
|
-
# :if_exists - check if schema exists.
|
57
|
-
# :cascade - cascade drop to dependant objects
|
80
|
+
# Drop a schema given a name.
|
58
81
|
#
|
82
|
+
# @param [String] name the name of the schema.
|
83
|
+
# @param [Hash] options ({}) options for dropping a schema
|
84
|
+
# @option options [Boolean] :if_exists (false) check if schema exists.
|
85
|
+
# @option options [Boolean] :cascade (false) drop dependant objects
|
86
|
+
# @return [optional] undefined
|
59
87
|
def drop_schema(name, options = {})
|
60
88
|
if options[:if_exists]
|
61
89
|
return if execute("select count(*) from pg_namespace where nspname = '#{name}'").getvalue(0,0).to_i == 0
|
@@ -64,8 +92,13 @@ module ActiveRecord::ConnectionAdapters
|
|
64
92
|
end
|
65
93
|
|
66
94
|
#
|
67
|
-
#
|
95
|
+
# Add foreign key constraint to table.
|
68
96
|
#
|
97
|
+
# @param [String] referencing_table_name the name of the table containing the foreign key
|
98
|
+
# @param [String] referencing_field_name the name of foreign key column
|
99
|
+
# @param [String] referenced_table_name the name of the table referenced by the foreign key
|
100
|
+
# @param [String] referenced_field_name (:id) the name of the column referenced by the foreign key
|
101
|
+
# @return [optional] undefined
|
69
102
|
def add_foreign_key(referencing_table_name, referencing_field_name, referenced_table_name, referenced_field_name = :id)
|
70
103
|
execute("ALTER TABLE #{referencing_table_name} add foreign key (#{referencing_field_name}) references #{referenced_table_name}(#{referenced_field_name})")
|
71
104
|
end
|
@@ -1,19 +1,26 @@
|
|
1
1
|
#
|
2
|
-
#
|
2
|
+
# These are things our base class must fix in ActiveRecord::Base
|
3
3
|
#
|
4
|
-
#
|
4
|
+
# No need to monkey patch these, just override them.
|
5
5
|
#
|
6
6
|
module Partitioned
|
7
|
+
#
|
8
|
+
# methods that need to be override in an ActiveRecord::Base derived class so that we can support partitioning
|
9
|
+
#
|
7
10
|
module ActiveRecordOverrides
|
8
11
|
#
|
9
12
|
# arel_attribute_values needs to return attributes (and their values) associated with the dynamic_arel_table instead of the
|
10
13
|
# static arel_table provided by ActiveRecord.
|
11
14
|
#
|
12
|
-
#
|
15
|
+
# The standard release of this function gathers a collection of attributes and creates a wrapper function around them
|
13
16
|
# that names the table they are associated with. that naming is incorrect for partitioned tables.
|
14
17
|
#
|
15
|
-
#
|
18
|
+
# We call the standard releases method then retrofit our partitioned table into the hash that is returned.
|
16
19
|
#
|
20
|
+
# @param [Boolean] include_primary_key (true)
|
21
|
+
# @param [Boolean] include_readonly_attributes (true)
|
22
|
+
# @param [Boolean] attribute_names (@attributes.keys)
|
23
|
+
# @return [Hash] hash of key value pairs associated with persistent attributes
|
17
24
|
def arel_attributes_values(include_primary_key = true, include_readonly_attributes = true, attribute_names = @attributes.keys)
|
18
25
|
attrs = super
|
19
26
|
actual_arel_table = dynamic_arel_table(self.class.table_name)
|
@@ -21,8 +28,9 @@ module Partitioned
|
|
21
28
|
end
|
22
29
|
|
23
30
|
#
|
24
|
-
#
|
31
|
+
# Delete just needs a wrapper around it to specify the specific partition.
|
25
32
|
#
|
33
|
+
# @return [optional] undefined
|
26
34
|
def delete
|
27
35
|
if persisted?
|
28
36
|
self.class.from_partition(*self.class.partition_key_values(attributes)).delete(id)
|