partitioned 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +17 -0
- data/LICENSE +30 -0
- data/PARTITIONING_EXPLAINED.txt +351 -0
- data/README +111 -0
- data/Rakefile +27 -0
- data/examples/README +23 -0
- data/examples/company_id.rb +417 -0
- data/examples/company_id_and_created_at.rb +689 -0
- data/examples/created_at.rb +590 -0
- data/examples/created_at_referencing_awards.rb +1000 -0
- data/examples/id.rb +475 -0
- data/examples/lib/by_company_id.rb +11 -0
- data/examples/lib/command_line_tool_mixin.rb +71 -0
- data/examples/lib/company.rb +29 -0
- data/examples/lib/get_options.rb +44 -0
- data/examples/lib/roman.rb +41 -0
- data/examples/start_date.rb +621 -0
- data/init.rb +1 -0
- data/lib/monkey_patch_activerecord.rb +92 -0
- data/lib/monkey_patch_postgres.rb +73 -0
- data/lib/partitioned.rb +26 -0
- data/lib/partitioned/active_record_overrides.rb +34 -0
- data/lib/partitioned/bulk_methods_mixin.rb +288 -0
- data/lib/partitioned/by_created_at.rb +13 -0
- data/lib/partitioned/by_foreign_key.rb +21 -0
- data/lib/partitioned/by_id.rb +35 -0
- data/lib/partitioned/by_integer_field.rb +32 -0
- data/lib/partitioned/by_monthly_time_field.rb +23 -0
- data/lib/partitioned/by_time_field.rb +65 -0
- data/lib/partitioned/by_weekly_time_field.rb +30 -0
- data/lib/partitioned/multi_level.rb +20 -0
- data/lib/partitioned/multi_level/configurator/data.rb +14 -0
- data/lib/partitioned/multi_level/configurator/dsl.rb +32 -0
- data/lib/partitioned/multi_level/configurator/reader.rb +162 -0
- data/lib/partitioned/multi_level/partition_manager.rb +47 -0
- data/lib/partitioned/partitioned_base.rb +354 -0
- data/lib/partitioned/partitioned_base/configurator.rb +6 -0
- data/lib/partitioned/partitioned_base/configurator/data.rb +62 -0
- data/lib/partitioned/partitioned_base/configurator/dsl.rb +628 -0
- data/lib/partitioned/partitioned_base/configurator/reader.rb +209 -0
- data/lib/partitioned/partitioned_base/partition_manager.rb +138 -0
- data/lib/partitioned/partitioned_base/sql_adapter.rb +286 -0
- data/lib/partitioned/version.rb +3 -0
- data/lib/tasks/desirable_tasks.rake +4 -0
- data/partitioned.gemspec +21 -0
- data/spec/dummy/.rspec +1 -0
- data/spec/dummy/README.rdoc +261 -0
- data/spec/dummy/Rakefile +7 -0
- data/spec/dummy/app/assets/javascripts/application.js +9 -0
- data/spec/dummy/app/assets/stylesheets/application.css +7 -0
- data/spec/dummy/app/controllers/application_controller.rb +3 -0
- data/spec/dummy/app/helpers/application_helper.rb +2 -0
- data/spec/dummy/app/views/layouts/application.html.erb +14 -0
- data/spec/dummy/config.ru +4 -0
- data/spec/dummy/config/application.rb +51 -0
- data/spec/dummy/config/boot.rb +10 -0
- data/spec/dummy/config/database.yml +32 -0
- data/spec/dummy/config/environment.rb +5 -0
- data/spec/dummy/config/environments/development.rb +30 -0
- data/spec/dummy/config/environments/production.rb +60 -0
- data/spec/dummy/config/environments/test.rb +39 -0
- data/spec/dummy/config/initializers/backtrace_silencers.rb +7 -0
- data/spec/dummy/config/initializers/inflections.rb +10 -0
- data/spec/dummy/config/initializers/mime_types.rb +5 -0
- data/spec/dummy/config/initializers/secret_token.rb +7 -0
- data/spec/dummy/config/initializers/session_store.rb +8 -0
- data/spec/dummy/config/initializers/wrap_parameters.rb +14 -0
- data/spec/dummy/config/locales/en.yml +5 -0
- data/spec/dummy/config/routes.rb +58 -0
- data/spec/dummy/public/404.html +26 -0
- data/spec/dummy/public/422.html +26 -0
- data/spec/dummy/public/500.html +26 -0
- data/spec/dummy/public/favicon.ico +0 -0
- data/spec/dummy/script/rails +6 -0
- data/spec/dummy/spec/spec_helper.rb +27 -0
- data/spec/monkey_patch_posgres_spec.rb +176 -0
- data/spec/partitioned/bulk_methods_mixin_spec.rb +512 -0
- data/spec/partitioned/by_created_at_spec.rb +62 -0
- data/spec/partitioned/by_foreign_key_spec.rb +95 -0
- data/spec/partitioned/by_id_spec.rb +97 -0
- data/spec/partitioned/by_integer_field_spec.rb +143 -0
- data/spec/partitioned/by_monthly_time_field_spec.rb +100 -0
- data/spec/partitioned/by_time_field_spec.rb +182 -0
- data/spec/partitioned/by_weekly_time_field_spec.rb +100 -0
- data/spec/partitioned/multi_level/configurator/dsl_spec.rb +88 -0
- data/spec/partitioned/multi_level/configurator/reader_spec.rb +147 -0
- data/spec/partitioned/partitioned_base/configurator/dsl_spec.rb +459 -0
- data/spec/partitioned/partitioned_base/configurator/reader_spec.rb +513 -0
- data/spec/partitioned/partitioned_base/sql_adapter_spec.rb +204 -0
- data/spec/partitioned/partitioned_base_spec.rb +173 -0
- data/spec/spec_helper.rb +32 -0
- data/spec/support/shared_example_spec_helper_for_integer_key.rb +137 -0
- data/spec/support/shared_example_spec_helper_for_time_key.rb +147 -0
- data/spec/support/tables_spec_helper.rb +47 -0
- metadata +250 -0
data/Gemfile
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
source "http://rubygems.org"
|
|
2
|
+
|
|
3
|
+
# Declare your gem's dependencies in partitioned.gemspec.
|
|
4
|
+
# Bundler will treat runtime dependencies like base dependencies, and
|
|
5
|
+
# development dependencies will be added by default to the :development group.
|
|
6
|
+
gemspec
|
|
7
|
+
|
|
8
|
+
# jquery-rails is used by the dummy application
|
|
9
|
+
gem "jquery-rails"
|
|
10
|
+
|
|
11
|
+
# Declare any dependencies that are still in development here instead of in
|
|
12
|
+
# your gemspec. These might include edge Rails or gems from your path or
|
|
13
|
+
# Git. Remember to move these dependencies to your gemspec before releasing
|
|
14
|
+
# your gem to rubygems.org.
|
|
15
|
+
|
|
16
|
+
# To use debugger
|
|
17
|
+
# gem 'ruby-debug'
|
data/LICENSE
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
Copyright (c) 2010-2012, Fiksu, Inc.
|
|
2
|
+
All rights reserved.
|
|
3
|
+
|
|
4
|
+
Redistribution and use in source and binary forms, with or without
|
|
5
|
+
modification, are permitted provided that the following conditions are
|
|
6
|
+
met:
|
|
7
|
+
|
|
8
|
+
o Redistributions of source code must retain the above copyright
|
|
9
|
+
notice, this list of conditions and the following disclaimer.
|
|
10
|
+
|
|
11
|
+
o Redistributions in binary form must reproduce the above copyright
|
|
12
|
+
notice, this list of conditions and the following disclaimer in the
|
|
13
|
+
documentation and/or other materials provided with the
|
|
14
|
+
distribution.
|
|
15
|
+
|
|
16
|
+
o Fiksu, Inc. nor the names of its contributors may be used to
|
|
17
|
+
endorse or promote products derived from this software without
|
|
18
|
+
specific prior written permission.
|
|
19
|
+
|
|
20
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
21
|
+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
22
|
+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
23
|
+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
24
|
+
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
25
|
+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
26
|
+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
27
|
+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
28
|
+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
29
|
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
30
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
This was combed from instant messaging session Keith had with Aleksandr.
|
|
2
|
+
|
|
3
|
+
It describes partitioning as it relates to Postges and how Postgres implements partitioning
|
|
4
|
+
(from a user's perspective) using table inheritance.
|
|
5
|
+
|
|
6
|
+
Keith: do you know what a primary key is?
|
|
7
|
+
me: yes
|
|
8
|
+
Keith: ok… do you know what a check constraint is
|
|
9
|
+
me: yes
|
|
10
|
+
Keith: great, do you know what an index is?
|
|
11
|
+
me: yes
|
|
12
|
+
Keith: excellent. Do you know what table inheritance means with respect to postgres?
|
|
13
|
+
me: Do you mean partitioning ?
|
|
14
|
+
Keith:
|
|
15
|
+
no, they are two different things, let me explain:
|
|
16
|
+
a very simple table:
|
|
17
|
+
create table a (a1 integer, a2 integer);
|
|
18
|
+
and a child of it:
|
|
19
|
+
create table b () inherits (a);
|
|
20
|
+
that is table inheritance. table 'b' has all the columns of table 'a'
|
|
21
|
+
me: Yes, it is clear
|
|
22
|
+
Keith:
|
|
23
|
+
if you go to a psql prompt you can type those in and see what happens…. but let me explain further,
|
|
24
|
+
you can do something like this:
|
|
25
|
+
create table c (c1 text) inherits a;
|
|
26
|
+
table 'c' has all columns table 'a' has AND it has 'c1' (a text column), cool?
|
|
27
|
+
(look at the difference between the create table 'c' and create table 'b')
|
|
28
|
+
me: b has only columns a1 and a2
|
|
29
|
+
Keith:
|
|
30
|
+
correct, that is table inheritance. no strings between the tables except the schemas are shared.
|
|
31
|
+
well.. there is one string. child tables will be search for data when the parent table is queried, example:
|
|
32
|
+
|
|
33
|
+
psql=# create table a (a1 integer, a2 integer);
|
|
34
|
+
psql=# create table b () inherits (a);
|
|
35
|
+
psql=# create table c (c1 text) inherits (a);
|
|
36
|
+
psql=# insert into c (a1,a2,c1) values (1,2,'three');
|
|
37
|
+
psql=# insert into b (a1,a2) values (11,22);
|
|
38
|
+
psql=# insert into a (a1,a2) values (111,222);
|
|
39
|
+
psql=# select * from a;
|
|
40
|
+
a1 | a2
|
|
41
|
+
-----+-----
|
|
42
|
+
111 | 222
|
|
43
|
+
11 | 22
|
|
44
|
+
1 | 2
|
|
45
|
+
(3 rows)
|
|
46
|
+
|
|
47
|
+
psql=# select * from b;
|
|
48
|
+
a1 | a2
|
|
49
|
+
----+----
|
|
50
|
+
11 | 22
|
|
51
|
+
(1 row)
|
|
52
|
+
|
|
53
|
+
psql=# select * from c;
|
|
54
|
+
a1 | a2 | c1
|
|
55
|
+
----+----+-------
|
|
56
|
+
1 | 2 | three
|
|
57
|
+
(1 row)
|
|
58
|
+
|
|
59
|
+
does this make sense?
|
|
60
|
+
me: cool
|
|
61
|
+
Keith:
|
|
62
|
+
notice that you can insert values in the parent table and the child's schema can be different.. no problems.
|
|
63
|
+
pretty slick,eh? this is table inheritance. partitioning is built on top of it.
|
|
64
|
+
Keith:
|
|
65
|
+
so, postgres can handle large tables. millions of rows. but indexes can get really large.
|
|
66
|
+
Especially if you have an index on a text column, but even indexes on integer fields can be large.
|
|
67
|
+
tens of millions of rows ... billions of rows… at some point the indexes take up more space that ready memory allows
|
|
68
|
+
if that happens Postgres partially swaps in indexes as it can... works on them, then swaps in others parts. This is very slow,
|
|
69
|
+
understand?
|
|
70
|
+
me: yes I do
|
|
71
|
+
Keith:
|
|
72
|
+
We'll work with two major tables for the rest of the examples. COMPANIES representing a business and EMPLOYEEES representing
|
|
73
|
+
all known employees for all known COMPANIES.
|
|
74
|
+
create table companies
|
|
75
|
+
(
|
|
76
|
+
id serial not null primary key,
|
|
77
|
+
created_at timestamp not null default now(),
|
|
78
|
+
updated_at timestamp,
|
|
79
|
+
name text null
|
|
80
|
+
);
|
|
81
|
+
create table employees
|
|
82
|
+
(
|
|
83
|
+
id serial not null primary key,
|
|
84
|
+
created_at timestamp not null default now(),
|
|
85
|
+
updated_at timestamp,
|
|
86
|
+
name text not null,
|
|
87
|
+
salary money not null,
|
|
88
|
+
company_id integer not null references companies
|
|
89
|
+
);
|
|
90
|
+
does this make sense?
|
|
91
|
+
me: yes it does
|
|
92
|
+
Keith:
|
|
93
|
+
let's say our job is to track every employee for 4 very large companies. one might just put them all in the employees table...
|
|
94
|
+
insert into companies (name) values ('Fluent Mobile, Inc.'),('Fiksu, Inc.'),('AppExchanger.com, Inc.'),('FreeMyApps.com, Inc.');
|
|
95
|
+
four companies -- got it?
|
|
96
|
+
me: i see
|
|
97
|
+
Keith:
|
|
98
|
+
but, let's say that each company has 5 million employees. that is a large amount of data
|
|
99
|
+
and doing a search on their name field would be slow even with an index on some machines.
|
|
100
|
+
to solve that problem we partition the employees table on "company_id"… and here is how we do that:
|
|
101
|
+
create table employees_1 (check (company_id = 1)) inherits (employees);
|
|
102
|
+
create table employees_2 (check (company_id = 2)) inherits (employees);
|
|
103
|
+
create table employees_3 (check (company_id = 3)) inherits (employees);
|
|
104
|
+
create table employees_4 (check (company_id = 4)) inherits (employees);
|
|
105
|
+
the check constraint is the key to partitioning...
|
|
106
|
+
so, employees_1 inherits from employees (has all its columns) AND it adds one thing — a check constraint which forces any row in
|
|
107
|
+
its table to have a company_id value = 1, make sense?
|
|
108
|
+
me: yes
|
|
109
|
+
Keith:
|
|
110
|
+
Then to insert records into the table:
|
|
111
|
+
insert into employees_1 (name, salary, company_id) values ('keith', '100', 1);
|
|
112
|
+
does work, but
|
|
113
|
+
insert into employees_1 (name, salary, company_id) values ('keith', '100', 2);
|
|
114
|
+
will fail
|
|
115
|
+
me: check constraint processed data. it is clear
|
|
116
|
+
Keith:
|
|
117
|
+
Exactly. Great. Now i'll add some data to the tables.
|
|
118
|
+
notice this:
|
|
119
|
+
psql=# \d employees
|
|
120
|
+
Table "public.employees"
|
|
121
|
+
Column | Type | Modifiers
|
|
122
|
+
------------+-----------------------------+--------------------------------------------------------
|
|
123
|
+
id | integer | not null default nextval('employees_id_seq'::regclass)
|
|
124
|
+
created_at | timestamp without time zone | not null default now()
|
|
125
|
+
updated_at | timestamp without time zone |
|
|
126
|
+
name | text | not null
|
|
127
|
+
salary | money | not null
|
|
128
|
+
company_id | integer | not null
|
|
129
|
+
Indexes:
|
|
130
|
+
"employees_pkey" PRIMARY KEY, btree (id)
|
|
131
|
+
Foreign-key constraints:
|
|
132
|
+
"employees_company_id_fkey" FOREIGN KEY (company_id) REFERENCES companies(id)
|
|
133
|
+
Number of child tables: 4 (Use \d+ to list them.)
|
|
134
|
+
see that there are child tables and PSQL tells you about them
|
|
135
|
+
me: i see all childrens
|
|
136
|
+
Keith:
|
|
137
|
+
now for data.
|
|
138
|
+
insert into employees_1 (name, salary, company_id) values ('keith', '100', 1), ('k2', '101', 1),('k3', '105', 1),('k4', '110', 1);
|
|
139
|
+
insert into employees_2 (name, salary, company_id) values ('sally', '100', 2), ('s2', '101', 2),('s3', '105', 2),('s4', '110', 2);
|
|
140
|
+
insert into employees_3 (name, salary, company_id) values ('william', '100', 3), ('w2', '101', 3),('w3', '105', 3),('w4', '110', 3);
|
|
141
|
+
insert into employees_4 (name, salary, company_id) values ('laura', '100', 4), ('l2', '101', 4),('l3', '105', 4),('l4', '110', 4);
|
|
142
|
+
note that we don't insert any data into the parent table… i will explain that soon.
|
|
143
|
+
(you can insert data into the parent table… it's legal.. but not logical for our purposes).
|
|
144
|
+
you can delete all rows in all tables by saying:
|
|
145
|
+
delete from employees;
|
|
146
|
+
but.. insert that data.. and let's talk about query planners. the query planner is the actuall machine
|
|
147
|
+
in the database that figures out how to execute the query.
|
|
148
|
+
'explain' shows you what the query planner is doing/would do.
|
|
149
|
+
so… we'll use explain to figure out how partitioning helps us, try
|
|
150
|
+
psql=# explain select * from employees where name = 'keith';
|
|
151
|
+
QUERY PLAN
|
|
152
|
+
-----------------------------------------------------------------------------------
|
|
153
|
+
Result (cost=0.00..103.75 rows=20 width=64)
|
|
154
|
+
-> Append (cost=0.00..103.75 rows=20 width=64)
|
|
155
|
+
-> Seq Scan on employees (cost=0.00..20.75 rows=4 width=64)
|
|
156
|
+
Filter: (name = 'keith'::text)
|
|
157
|
+
-> Seq Scan on employees_1 employees (cost=0.00..20.75 rows=4 width=64)
|
|
158
|
+
Filter: (name = 'keith'::text)
|
|
159
|
+
-> Seq Scan on employees_2 employees (cost=0.00..20.75 rows=4 width=64)
|
|
160
|
+
Filter: (name = 'keith'::text)
|
|
161
|
+
-> Seq Scan on employees_3 employees (cost=0.00..20.75 rows=4 width=64)
|
|
162
|
+
Filter: (name = 'keith'::text)
|
|
163
|
+
-> Seq Scan on employees_4 employees (cost=0.00..20.75 rows=4 width=64)
|
|
164
|
+
Filter: (name = 'keith'::text)
|
|
165
|
+
(12 rows)
|
|
166
|
+
which is the worst of all possibilities. it checks every child table for name 'keith' then consolodates the information and returns the one row.
|
|
167
|
+
but.. if we do this
|
|
168
|
+
explain select * from employees where name = 'keith' and company_id = 1;
|
|
169
|
+
we get
|
|
170
|
+
psql=# explain select * from employees where name = 'keith' and company_id = 1;
|
|
171
|
+
QUERY PLAN
|
|
172
|
+
-----------------------------------------------------------------------------------
|
|
173
|
+
Result (cost=0.00..45.80 rows=2 width=64)
|
|
174
|
+
-> Append (cost=0.00..45.80 rows=2 width=64)
|
|
175
|
+
-> Seq Scan on employees (cost=0.00..22.90 rows=1 width=64)
|
|
176
|
+
Filter: ((name = 'keith'::text) AND (company_id = 1))
|
|
177
|
+
-> Seq Scan on employees_1 employees (cost=0.00..22.90 rows=1 width=64)
|
|
178
|
+
Filter: ((name = 'keith'::text) AND (company_id = 1))
|
|
179
|
+
(6 rows)
|
|
180
|
+
so much win
|
|
181
|
+
me: cost is better, and plan too
|
|
182
|
+
Keith:
|
|
183
|
+
Correct, because the planner could tell (using knowledge from the check constraint) that employees_1 was the only table it needed to look at.
|
|
184
|
+
equally as valid and even faster is:
|
|
185
|
+
explain select * from employees_1 where name = 'keith';
|
|
186
|
+
psql=# explain select * from employees_1 where name = 'keith';
|
|
187
|
+
QUERY PLAN
|
|
188
|
+
-------------------------------------------------------------
|
|
189
|
+
Seq Scan on employees_1 (cost=0.00..20.75 rows=4 width=64)
|
|
190
|
+
Filter: (name = 'keith'::text)
|
|
191
|
+
(2 rows)
|
|
192
|
+
you can imaging that an update will work in the same way. if the company_id is not specified it will check all tables.
|
|
193
|
+
me: i see
|
|
194
|
+
Keith:
|
|
195
|
+
great, so… that is partitioning. there are other forms... but before we go on, try this:
|
|
196
|
+
create table employees_5 (check (company_id = 5)) inherits (employees);
|
|
197
|
+
insert into employees_5 (name, salary, company_id) values ('vicky', '100', 5), ('v2', '101', 5),('v3', '105', 5),('v4', '110', 5);
|
|
198
|
+
well, that works, why? how could we insert a value 5 into company_id if employees has a reference to companies and there is not
|
|
199
|
+
a record with company.id = 5. Because postgres does not propogate indexes and referential integrity to child tables.
|
|
200
|
+
So, even though employees has the reference, employees_5 does not and the only constraint is that company_id = 5,
|
|
201
|
+
but not if company.id = 5 exists in the companies table does that make sense?
|
|
202
|
+
me: yes it is clear
|
|
203
|
+
Keith: so, to alleviate that problem we need to add foreign key constraints to each of the child tables
|
|
204
|
+
me: Yes it can resolve our inconsistency problem
|
|
205
|
+
Keith:
|
|
206
|
+
ALTER TABLE employees_1 ADD CONSTRAINT e1cfk FOREIGN KEY (company_id) REFERENCES companies (id);
|
|
207
|
+
ALTER TABLE employees_2 ADD CONSTRAINT e2cfk FOREIGN KEY (company_id) REFERENCES companies (id);
|
|
208
|
+
ALTER TABLE employees_3 ADD CONSTRAINT e3cfk FOREIGN KEY (company_id) REFERENCES companies (id);
|
|
209
|
+
ALTER TABLE employees_4 ADD CONSTRAINT e4cfk FOREIGN KEY (company_id) REFERENCES companies (id);
|
|
210
|
+
so, those should work.. but we need one more for employees_5, but of course:
|
|
211
|
+
psql=# ALTER TABLE employees_5 ADD CONSTRAINT e5cfk FOREIGN KEY (company_id) REFERENCES companies (id);
|
|
212
|
+
ERROR: insert or update on table "employees_5" violates foreign key constraint "e5cfk"
|
|
213
|
+
DETAIL: Key (company_id)=(5) is not present in table "companies".
|
|
214
|
+
make sense?
|
|
215
|
+
me: yes
|
|
216
|
+
Keith: to fix this we need to add one more row to companies...
|
|
217
|
+
Keith:
|
|
218
|
+
INSERT INTO companies (name) values ('gga');
|
|
219
|
+
and now ALTER TABLE employees_5 ADD CONSTRAINT e5cfk FOREIGN KEY (company_id) REFERENCES companies (id);
|
|
220
|
+
works, ok?
|
|
221
|
+
me: ok
|
|
222
|
+
Keith:
|
|
223
|
+
so, you can partition in many things… not just company_id = 1
|
|
224
|
+
you could say "check (company_id in (1,2,3))" for one child table
|
|
225
|
+
and "check (company_id = 4)" for another
|
|
226
|
+
and "check (company_id >= 5)" in yet another
|
|
227
|
+
if that made sense, you can even have check constraints overlap (although you shouldn't do that)
|
|
228
|
+
but one table could have CHECK (company_id <= 3)
|
|
229
|
+
and another could have CHECK (company_id >= 3)
|
|
230
|
+
so both child tables would be searched when company_id = 3. is that a problem? well.. that is up to you to decide with your data.
|
|
231
|
+
but we will, from now on, deal only with child tables that are mutually exclusive to fully optimize the query planner
|
|
232
|
+
does that make sense?
|
|
233
|
+
me: yes that does
|
|
234
|
+
Keith: Great, you can also partition on created_at… one minute.
|
|
235
|
+
Keith:
|
|
236
|
+
drop table employees_1;
|
|
237
|
+
drop table employees_2;
|
|
238
|
+
drop table employees_3;
|
|
239
|
+
drop table employees_4;
|
|
240
|
+
drop table employees_5;
|
|
241
|
+
create table employees_2000 (check (created_at >= '2000-01-01' and created_at < '2001-01-01')) inherits (employees);
|
|
242
|
+
create table employees_2001 (check (created_at >= '2001-01-01' and created_at < '2002-01-01')) inherits (employees);
|
|
243
|
+
create table employees_2002 (check (created_at >= '2002-01-01' and created_at < '2003-01-01')) inherits (employees);
|
|
244
|
+
create table employees_2003 (check (created_at >= '2003-01-01' and created_at < '2004-01-01')) inherits (employees);
|
|
245
|
+
create table employees_2004 (check (created_at >= '2004-01-01' and created_at < '2005-01-01')) inherits (employees);
|
|
246
|
+
is that obvious what it does?
|
|
247
|
+
me: yes, we create partition for each year between 2000-2005 years
|
|
248
|
+
Keith:
|
|
249
|
+
yes, and only created_at is looked at to determine where the query planner will look for records
|
|
250
|
+
make sense? company_id is not in the check constraint so the planner will not use it for queries on these tables
|
|
251
|
+
me: to have the excellent performance we need to add two partition for company_id and created_at
|
|
252
|
+
Keith:
|
|
253
|
+
it depends on how we access the data. but YES you are right
|
|
254
|
+
BUT if we only cared about when the employee record was created and NEVER cared about the company_id… then this schema
|
|
255
|
+
fits our needs, correct?
|
|
256
|
+
select distinct company_id from employees where created_at = '2001-06-14';
|
|
257
|
+
something like that is still efficient, for this schema, right?
|
|
258
|
+
or we can think about a schema of reports.. which we only care about reports on a year by year basis.
|
|
259
|
+
employees might not be the best example of usage for this… but the logic should be sane.
|
|
260
|
+
me: yes it very helpfull for reporting statistics and we have the best performance
|
|
261
|
+
Keith:
|
|
262
|
+
great, now.. let me blow your mind. is your mind ready to be blown?
|
|
263
|
+
me: I'm fine. yet...
|
|
264
|
+
Keith:
|
|
265
|
+
drop table employees_1;
|
|
266
|
+
drop table employees_2;
|
|
267
|
+
drop table employees_3;
|
|
268
|
+
drop table employees_4;
|
|
269
|
+
drop table employees_5;
|
|
270
|
+
drop table employees_2000;
|
|
271
|
+
drop table employees_2001;
|
|
272
|
+
drop table employees_2002;
|
|
273
|
+
drop table employees_2003;
|
|
274
|
+
drop table employees_2004;
|
|
275
|
+
create table employees_1 (check (company_id = 1)) inherits (employees);
|
|
276
|
+
create table employees_2 (check (company_id = 2)) inherits (employees);
|
|
277
|
+
create table employees_3 (check (company_id = 3)) inherits (employees);
|
|
278
|
+
create table employees_4 (check (company_id = 4)) inherits (employees);
|
|
279
|
+
create table employees_5 (check (company_id = 5)) inherits (employees);
|
|
280
|
+
create table employees_1_2000 (check (created_at >= '2000-01-01' and created_at < '2001-01-01')) inherits (employees_1);
|
|
281
|
+
create table employees_1_2001 (check (created_at >= '2001-01-01' and created_at < '2002-01-01')) inherits (employees_1);
|
|
282
|
+
create table employees_1_2002 (check (created_at >= '2002-01-01' and created_at < '2003-01-01')) inherits (employees_1);
|
|
283
|
+
create table employees_1_2003 (check (created_at >= '2003-01-01' and created_at < '2004-01-01')) inherits (employees_1);
|
|
284
|
+
create table employees_1_2004 (check (created_at >= '2004-01-01' and created_at < '2005-01-01')) inherits (employees_1);
|
|
285
|
+
|
|
286
|
+
create table employees_2_2000 (check (created_at >= '2000-01-01' and created_at < '2001-01-01')) inherits (employees_2);
|
|
287
|
+
create table employees_2_2001 (check (created_at >= '2001-01-01' and created_at < '2002-01-01')) inherits (employees_2);
|
|
288
|
+
create table employees_2_2002 (check (created_at >= '2002-01-01' and created_at < '2003-01-01')) inherits (employees_2);
|
|
289
|
+
create table employees_2_2003 (check (created_at >= '2003-01-01' and created_at < '2004-01-01')) inherits (employees_2);
|
|
290
|
+
create table employees_2_2004 (check (created_at >= '2004-01-01' and created_at < '2005-01-01')) inherits (employees_2);
|
|
291
|
+
|
|
292
|
+
create table employees_3_2000 (check (created_at >= '2000-01-01' and created_at < '2001-01-01')) inherits (employees_3);
|
|
293
|
+
create table employees_3_2001 (check (created_at >= '2001-01-01' and created_at < '2002-01-01')) inherits (employees_3);
|
|
294
|
+
create table employees_3_2002 (check (created_at >= '2002-01-01' and created_at < '2003-01-01')) inherits (employees_3);
|
|
295
|
+
create table employees_3_2003 (check (created_at >= '2003-01-01' and created_at < '2004-01-01')) inherits (employees_3);
|
|
296
|
+
create table employees_3_2004 (check (created_at >= '2004-01-01' and created_at < '2005-01-01')) inherits (employees_3);
|
|
297
|
+
|
|
298
|
+
create table employees_4_2000 (check (created_at >= '2000-01-01' and created_at < '2001-01-01')) inherits (employees_4);
|
|
299
|
+
create table employees_4_2001 (check (created_at >= '2001-01-01' and created_at < '2002-01-01')) inherits (employees_4);
|
|
300
|
+
create table employees_4_2002 (check (created_at >= '2002-01-01' and created_at < '2003-01-01')) inherits (employees_4);
|
|
301
|
+
create table employees_4_2003 (check (created_at >= '2003-01-01' and created_at < '2004-01-01')) inherits (employees_4);
|
|
302
|
+
create table employees_4_2004 (check (created_at >= '2004-01-01' and created_at < '2005-01-01')) inherits (employees_4);
|
|
303
|
+
|
|
304
|
+
create table employees_5_2000 (check (created_at >= '2000-01-01' and created_at < '2001-01-01')) inherits (employees_5);
|
|
305
|
+
create table employees_5_2001 (check (created_at >= '2001-01-01' and created_at < '2002-01-01')) inherits (employees_5);
|
|
306
|
+
create table employees_5_2002 (check (created_at >= '2002-01-01' and created_at < '2003-01-01')) inherits (employees_5);
|
|
307
|
+
create table employees_5_2003 (check (created_at >= '2003-01-01' and created_at < '2004-01-01')) inherits (employees_5);
|
|
308
|
+
create table employees_5_2004 (check (created_at >= '2004-01-01' and created_at < '2005-01-01')) inherits (employees_5);
|
|
309
|
+
|
|
310
|
+
we can have multi level partitioning. in this case… the first level inherits from employees
|
|
311
|
+
but each employees_X table has 5 children that inherit from it and put the check constraint on created_at.
|
|
312
|
+
so, now a query:
|
|
313
|
+
select * from employees where created_at = '2001-07–4' and company_id = 5;
|
|
314
|
+
wins big. the referential integrity problem with company_id => companies still exists. we must apply that
|
|
315
|
+
ALTER TABLE employees_1_2000 ADD CONSTRAINT e1cfk FOREIGN KEY (company_id) REFERENCES companies (id);
|
|
316
|
+
ALTER TABLE employees_1_2001 ADD CONSTRAINT e2cfk FOREIGN KEY (company_id) REFERENCES companies (id);
|
|
317
|
+
ALTER TABLE employees_1_2002 ADD CONSTRAINT e3cfk FOREIGN KEY (company_id) REFERENCES companies (id);
|
|
318
|
+
ALTER TABLE employees_1_2003 ADD CONSTRAINT e4cfk FOREIGN KEY (company_id) REFERENCES companies (id);
|
|
319
|
+
ALTER TABLE employees_1_2004 ADD CONSTRAINT e5cfk FOREIGN KEY (company_id) REFERENCES companies (id);
|
|
320
|
+
|
|
321
|
+
ALTER TABLE employees_2_2000 ADD CONSTRAINT e1cfk FOREIGN KEY (company_id) REFERENCES companies (id);
|
|
322
|
+
ALTER TABLE employees_2_2001 ADD CONSTRAINT e2cfk FOREIGN KEY (company_id) REFERENCES companies (id);
|
|
323
|
+
ALTER TABLE employees_2_2002 ADD CONSTRAINT e3cfk FOREIGN KEY (company_id) REFERENCES companies (id);
|
|
324
|
+
ALTER TABLE employees_2_2003 ADD CONSTRAINT e4cfk FOREIGN KEY (company_id) REFERENCES companies (id);
|
|
325
|
+
ALTER TABLE employees_2_2004 ADD CONSTRAINT e5cfk FOREIGN KEY (company_id) REFERENCES companies (id);
|
|
326
|
+
|
|
327
|
+
ALTER TABLE employees_3_2000 ADD CONSTRAINT e1cfk FOREIGN KEY (company_id) REFERENCES companies (id);
|
|
328
|
+
ALTER TABLE employees_3_2001 ADD CONSTRAINT e2cfk FOREIGN KEY (company_id) REFERENCES companies (id);
|
|
329
|
+
ALTER TABLE employees_3_2002 ADD CONSTRAINT e3cfk FOREIGN KEY (company_id) REFERENCES companies (id);
|
|
330
|
+
ALTER TABLE employees_3_2003 ADD CONSTRAINT e4cfk FOREIGN KEY (company_id) REFERENCES companies (id);
|
|
331
|
+
ALTER TABLE employees_3_2004 ADD CONSTRAINT e5cfk FOREIGN KEY (company_id) REFERENCES companies (id);
|
|
332
|
+
|
|
333
|
+
ALTER TABLE employees_4_2000 ADD CONSTRAINT e1cfk FOREIGN KEY (company_id) REFERENCES companies (id);
|
|
334
|
+
ALTER TABLE employees_4_2001 ADD CONSTRAINT e2cfk FOREIGN KEY (company_id) REFERENCES companies (id);
|
|
335
|
+
ALTER TABLE employees_4_2002 ADD CONSTRAINT e3cfk FOREIGN KEY (company_id) REFERENCES companies (id);
|
|
336
|
+
ALTER TABLE employees_4_2003 ADD CONSTRAINT e4cfk FOREIGN KEY (company_id) REFERENCES companies (id);
|
|
337
|
+
ALTER TABLE employees_4_2004 ADD CONSTRAINT e5cfk FOREIGN KEY (company_id) REFERENCES companies (id);
|
|
338
|
+
|
|
339
|
+
ALTER TABLE employees_5_2000 ADD CONSTRAINT e1cfk FOREIGN KEY (company_id) REFERENCES companies (id);
|
|
340
|
+
ALTER TABLE employees_5_2001 ADD CONSTRAINT e2cfk FOREIGN KEY (company_id) REFERENCES companies (id);
|
|
341
|
+
ALTER TABLE employees_5_2002 ADD CONSTRAINT e3cfk FOREIGN KEY (company_id) REFERENCES companies (id);
|
|
342
|
+
ALTER TABLE employees_5_2003 ADD CONSTRAINT e4cfk FOREIGN KEY (company_id) REFERENCES companies (id);
|
|
343
|
+
ALTER TABLE employees_5_2004 ADD CONSTRAINT e5cfk FOREIGN KEY (company_id) REFERENCES companies (id);
|
|
344
|
+
|
|
345
|
+
and that is a multi level partitioned table. you only stick data in leaf tables.. that is EMPLOYEES_4_2000 gets data.
|
|
346
|
+
EMPLOYEES and EMPLOYEES_4 do not get any data (or you lose some benefit from the query planner)
|
|
347
|
+
so… that is partitioning.
|
|
348
|
+
me: cool!
|
|
349
|
+
Keith:
|
|
350
|
+
you now know as much about partitioning as I do. read this sometime:
|
|
351
|
+
http://www.postgresql.org/docs/9.1/interactive/ddl-partitioning.html
|
data/README
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
Partitioned
|
|
2
|
+
===========
|
|
3
|
+
|
|
4
|
+
Partitioned adds assistance to ActiveRecord for manipulating
|
|
5
|
+
(reading, creating, updating) an activerecord model that represents
|
|
6
|
+
data that may be in one of many database tables (determined by the Models data).
|
|
7
|
+
|
|
8
|
+
It also has features that support the creation and deleting of child tables and
|
|
9
|
+
partitioning support infrastructure.
|
|
10
|
+
|
|
11
|
+
It supports Postgres partitioning and has specific features to overcome basic
|
|
12
|
+
failings of Postgres's implementation of partitioning.
|
|
13
|
+
|
|
14
|
+
Basics:
|
|
15
|
+
A parent table can be inherited by many child tables that inherit most of the
|
|
16
|
+
attributes of the parent table including its columns. child tables typically
|
|
17
|
+
(and for the uses of this plugin must) have a unique check constraint the
|
|
18
|
+
defines which data should be located in that specific child table.
|
|
19
|
+
|
|
20
|
+
Such a constraint allows for the SQL planner to ignore most child tables and target
|
|
21
|
+
the (hopefully) one child table that contains the records interested. This splits
|
|
22
|
+
data, and meta-data (indexes) which provides streamlined targeted access to the
|
|
23
|
+
desired data.
|
|
24
|
+
|
|
25
|
+
Support for bulk inserts and bulk updates is also provided via Partitioned::Base.create_many and
|
|
26
|
+
Partitioned::Base.update_many.
|
|
27
|
+
|
|
28
|
+
Example
|
|
29
|
+
=======
|
|
30
|
+
|
|
31
|
+
Given the following models:
|
|
32
|
+
|
|
33
|
+
class Company < ActiveRecord::Base
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
class Employee < Partitioned::ByCompanyId
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
and the following tables:
|
|
40
|
+
|
|
41
|
+
-- this is the referenced table
|
|
42
|
+
create table companies
|
|
43
|
+
(
|
|
44
|
+
id serial not null primary key,
|
|
45
|
+
created_at timestamp not null default now(),
|
|
46
|
+
updated_at timestamp,
|
|
47
|
+
name text null
|
|
48
|
+
);
|
|
49
|
+
|
|
50
|
+
-- this is the parent table
|
|
51
|
+
create table employees
|
|
52
|
+
(
|
|
53
|
+
id serial not null primary key,
|
|
54
|
+
created_at timestamp not null default now(),
|
|
55
|
+
updated_at timestamp,
|
|
56
|
+
name text null,
|
|
57
|
+
company_id integer not null references companies
|
|
58
|
+
);
|
|
59
|
+
|
|
60
|
+
create schema employees_partitions;
|
|
61
|
+
|
|
62
|
+
create table companies (name) values ('company 1'),('company 2'),('company 2');
|
|
63
|
+
|
|
64
|
+
-- some children
|
|
65
|
+
create table employees_partitions.p1 ( CHECK ( company_id = 1 ) ) INHERITS (employees);
|
|
66
|
+
create table employees_partitions.p2 ( CHECK ( company_id = 2 ) ) INHERITS (employees);
|
|
67
|
+
create table employees_partitions.p3 ( CHECK ( company_id = 3 ) ) INHERITS (employees);
|
|
68
|
+
|
|
69
|
+
since database records exist in a specific child table dependant on the field "company_id"
|
|
70
|
+
we need to have creates that turn into database inserts of the EMPLOYEES table redirect
|
|
71
|
+
the record insert into the specific child table determined by the value of COMPANY_ID
|
|
72
|
+
|
|
73
|
+
eg:
|
|
74
|
+
employee = Employee.create(:name => 'Keith', :company_id => 1)
|
|
75
|
+
|
|
76
|
+
this would normally produce the following:
|
|
77
|
+
INSERT INTO employees ('name', company_id) values ('Keith', 1);
|
|
78
|
+
|
|
79
|
+
but with Partitioned we see:
|
|
80
|
+
INSERT INTO employees_partitions.p1 ('name', company_id) values ('Keith', 1);
|
|
81
|
+
|
|
82
|
+
reads of such a table need some assistance to find the specific child table the
|
|
83
|
+
record exists in.
|
|
84
|
+
|
|
85
|
+
Since we are partitioned by company_id the programmer needs to provide that information
|
|
86
|
+
when fetching data, or the database will need to search all child table for the
|
|
87
|
+
specific record we are looking for.
|
|
88
|
+
|
|
89
|
+
This is no longer valid (well, doesn't perform well):
|
|
90
|
+
|
|
91
|
+
employee = Employee.find(1)
|
|
92
|
+
|
|
93
|
+
instead, do one of the following:
|
|
94
|
+
|
|
95
|
+
employee = Employee.from_partition(1).find(1)
|
|
96
|
+
employee = Employee.find(:first, :conditions => {:name => 'Keith', :company_id => 1})
|
|
97
|
+
employee = Employee.find(:first, :conditions => {:id => 1, :company_id => 1})
|
|
98
|
+
|
|
99
|
+
an update (employee.save where the record already exists in the database) will take
|
|
100
|
+
advantage of knowing which child table the record exists in so it can do some optimization.
|
|
101
|
+
|
|
102
|
+
so, the following works as expected:
|
|
103
|
+
|
|
104
|
+
employee.name = "Not Keith"
|
|
105
|
+
employee.save
|
|
106
|
+
|
|
107
|
+
turns into the following SQL:
|
|
108
|
+
|
|
109
|
+
update employees_partitions.p1 set name = 'Not Keith' where id = 1;
|
|
110
|
+
|
|
111
|
+
Copyright 2010-2012 fiksu.com, inc, all rights reserved
|