partitioned 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. data/Gemfile +17 -0
  2. data/LICENSE +30 -0
  3. data/PARTITIONING_EXPLAINED.txt +351 -0
  4. data/README +111 -0
  5. data/Rakefile +27 -0
  6. data/examples/README +23 -0
  7. data/examples/company_id.rb +417 -0
  8. data/examples/company_id_and_created_at.rb +689 -0
  9. data/examples/created_at.rb +590 -0
  10. data/examples/created_at_referencing_awards.rb +1000 -0
  11. data/examples/id.rb +475 -0
  12. data/examples/lib/by_company_id.rb +11 -0
  13. data/examples/lib/command_line_tool_mixin.rb +71 -0
  14. data/examples/lib/company.rb +29 -0
  15. data/examples/lib/get_options.rb +44 -0
  16. data/examples/lib/roman.rb +41 -0
  17. data/examples/start_date.rb +621 -0
  18. data/init.rb +1 -0
  19. data/lib/monkey_patch_activerecord.rb +92 -0
  20. data/lib/monkey_patch_postgres.rb +73 -0
  21. data/lib/partitioned.rb +26 -0
  22. data/lib/partitioned/active_record_overrides.rb +34 -0
  23. data/lib/partitioned/bulk_methods_mixin.rb +288 -0
  24. data/lib/partitioned/by_created_at.rb +13 -0
  25. data/lib/partitioned/by_foreign_key.rb +21 -0
  26. data/lib/partitioned/by_id.rb +35 -0
  27. data/lib/partitioned/by_integer_field.rb +32 -0
  28. data/lib/partitioned/by_monthly_time_field.rb +23 -0
  29. data/lib/partitioned/by_time_field.rb +65 -0
  30. data/lib/partitioned/by_weekly_time_field.rb +30 -0
  31. data/lib/partitioned/multi_level.rb +20 -0
  32. data/lib/partitioned/multi_level/configurator/data.rb +14 -0
  33. data/lib/partitioned/multi_level/configurator/dsl.rb +32 -0
  34. data/lib/partitioned/multi_level/configurator/reader.rb +162 -0
  35. data/lib/partitioned/multi_level/partition_manager.rb +47 -0
  36. data/lib/partitioned/partitioned_base.rb +354 -0
  37. data/lib/partitioned/partitioned_base/configurator.rb +6 -0
  38. data/lib/partitioned/partitioned_base/configurator/data.rb +62 -0
  39. data/lib/partitioned/partitioned_base/configurator/dsl.rb +628 -0
  40. data/lib/partitioned/partitioned_base/configurator/reader.rb +209 -0
  41. data/lib/partitioned/partitioned_base/partition_manager.rb +138 -0
  42. data/lib/partitioned/partitioned_base/sql_adapter.rb +286 -0
  43. data/lib/partitioned/version.rb +3 -0
  44. data/lib/tasks/desirable_tasks.rake +4 -0
  45. data/partitioned.gemspec +21 -0
  46. data/spec/dummy/.rspec +1 -0
  47. data/spec/dummy/README.rdoc +261 -0
  48. data/spec/dummy/Rakefile +7 -0
  49. data/spec/dummy/app/assets/javascripts/application.js +9 -0
  50. data/spec/dummy/app/assets/stylesheets/application.css +7 -0
  51. data/spec/dummy/app/controllers/application_controller.rb +3 -0
  52. data/spec/dummy/app/helpers/application_helper.rb +2 -0
  53. data/spec/dummy/app/views/layouts/application.html.erb +14 -0
  54. data/spec/dummy/config.ru +4 -0
  55. data/spec/dummy/config/application.rb +51 -0
  56. data/spec/dummy/config/boot.rb +10 -0
  57. data/spec/dummy/config/database.yml +32 -0
  58. data/spec/dummy/config/environment.rb +5 -0
  59. data/spec/dummy/config/environments/development.rb +30 -0
  60. data/spec/dummy/config/environments/production.rb +60 -0
  61. data/spec/dummy/config/environments/test.rb +39 -0
  62. data/spec/dummy/config/initializers/backtrace_silencers.rb +7 -0
  63. data/spec/dummy/config/initializers/inflections.rb +10 -0
  64. data/spec/dummy/config/initializers/mime_types.rb +5 -0
  65. data/spec/dummy/config/initializers/secret_token.rb +7 -0
  66. data/spec/dummy/config/initializers/session_store.rb +8 -0
  67. data/spec/dummy/config/initializers/wrap_parameters.rb +14 -0
  68. data/spec/dummy/config/locales/en.yml +5 -0
  69. data/spec/dummy/config/routes.rb +58 -0
  70. data/spec/dummy/public/404.html +26 -0
  71. data/spec/dummy/public/422.html +26 -0
  72. data/spec/dummy/public/500.html +26 -0
  73. data/spec/dummy/public/favicon.ico +0 -0
  74. data/spec/dummy/script/rails +6 -0
  75. data/spec/dummy/spec/spec_helper.rb +27 -0
  76. data/spec/monkey_patch_posgres_spec.rb +176 -0
  77. data/spec/partitioned/bulk_methods_mixin_spec.rb +512 -0
  78. data/spec/partitioned/by_created_at_spec.rb +62 -0
  79. data/spec/partitioned/by_foreign_key_spec.rb +95 -0
  80. data/spec/partitioned/by_id_spec.rb +97 -0
  81. data/spec/partitioned/by_integer_field_spec.rb +143 -0
  82. data/spec/partitioned/by_monthly_time_field_spec.rb +100 -0
  83. data/spec/partitioned/by_time_field_spec.rb +182 -0
  84. data/spec/partitioned/by_weekly_time_field_spec.rb +100 -0
  85. data/spec/partitioned/multi_level/configurator/dsl_spec.rb +88 -0
  86. data/spec/partitioned/multi_level/configurator/reader_spec.rb +147 -0
  87. data/spec/partitioned/partitioned_base/configurator/dsl_spec.rb +459 -0
  88. data/spec/partitioned/partitioned_base/configurator/reader_spec.rb +513 -0
  89. data/spec/partitioned/partitioned_base/sql_adapter_spec.rb +204 -0
  90. data/spec/partitioned/partitioned_base_spec.rb +173 -0
  91. data/spec/spec_helper.rb +32 -0
  92. data/spec/support/shared_example_spec_helper_for_integer_key.rb +137 -0
  93. data/spec/support/shared_example_spec_helper_for_time_key.rb +147 -0
  94. data/spec/support/tables_spec_helper.rb +47 -0
  95. metadata +250 -0
data/Gemfile ADDED
@@ -0,0 +1,17 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Declare your gem's dependencies in partitioned.gemspec.
4
+ # Bundler will treat runtime dependencies like base dependencies, and
5
+ # development dependencies will be added by default to the :development group.
6
+ gemspec
7
+
8
+ # jquery-rails is used by the dummy application
9
+ gem "jquery-rails"
10
+
11
+ # Declare any dependencies that are still in development here instead of in
12
+ # your gemspec. These might include edge Rails or gems from your path or
13
+ # Git. Remember to move these dependencies to your gemspec before releasing
14
+ # your gem to rubygems.org.
15
+
16
+ # To use debugger
17
+ # gem 'ruby-debug'
data/LICENSE ADDED
@@ -0,0 +1,30 @@
1
+ Copyright (c) 2010-2012, Fiksu, Inc.
2
+ All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without
5
+ modification, are permitted provided that the following conditions are
6
+ met:
7
+
8
+ o Redistributions of source code must retain the above copyright
9
+ notice, this list of conditions and the following disclaimer.
10
+
11
+ o Redistributions in binary form must reproduce the above copyright
12
+ notice, this list of conditions and the following disclaimer in the
13
+ documentation and/or other materials provided with the
14
+ distribution.
15
+
16
+ o Fiksu, Inc. nor the names of its contributors may be used to
17
+ endorse or promote products derived from this software without
18
+ specific prior written permission.
19
+
20
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,351 @@
1
+ This was combed from instant messaging session Keith had with Aleksandr.
2
+
3
+ It describes partitioning as it relates to Postges and how Postgres implements partitioning
4
+ (from a user's perspective) using table inheritance.
5
+
6
+ Keith: do you know what a primary key is?
7
+ me: yes
8
+ Keith: ok… do you know what a check constraint is
9
+ me: yes
10
+ Keith: great, do you know what an index is?
11
+ me: yes
12
+ Keith: excellent. Do you know what table inheritance means with respect to postgres?
13
+ me: Do you mean partitioning ?
14
+ Keith:
15
+ no, they are two different things, let me explain:
16
+ a very simple table:
17
+ create table a (a1 integer, a2 integer);
18
+ and a child of it:
19
+ create table b () inherits (a);
20
+ that is table inheritance. table 'b' has all the columns of table 'a'
21
+ me: Yes, it is clear
22
+ Keith:
23
+ if you go to a psql prompt you can type those in and see what happens…. but let me explain further,
24
+ you can do something like this:
25
+ create table c (c1 text) inherits a;
26
+ table 'c' has all columns table 'a' has AND it has 'c1' (a text column), cool?
27
+ (look at the difference between the create table 'c' and create table 'b')
28
+ me: b has only columns a1 and a2
29
+ Keith:
30
+ correct, that is table inheritance. no strings between the tables except the schemas are shared.
31
+ well.. there is one string. child tables will be search for data when the parent table is queried, example:
32
+
33
+ psql=# create table a (a1 integer, a2 integer);
34
+ psql=# create table b () inherits (a);
35
+ psql=# create table c (c1 text) inherits (a);
36
+ psql=# insert into c (a1,a2,c1) values (1,2,'three');
37
+ psql=# insert into b (a1,a2) values (11,22);
38
+ psql=# insert into a (a1,a2) values (111,222);
39
+ psql=# select * from a;
40
+ a1 | a2
41
+ -----+-----
42
+ 111 | 222
43
+ 11 | 22
44
+ 1 | 2
45
+ (3 rows)
46
+
47
+ psql=# select * from b;
48
+ a1 | a2
49
+ ----+----
50
+ 11 | 22
51
+ (1 row)
52
+
53
+ psql=# select * from c;
54
+ a1 | a2 | c1
55
+ ----+----+-------
56
+ 1 | 2 | three
57
+ (1 row)
58
+
59
+ does this make sense?
60
+ me: cool
61
+ Keith:
62
+ notice that you can insert values in the parent table and the child's schema can be different.. no problems.
63
+ pretty slick,eh? this is table inheritance. partitioning is built on top of it.
64
+ Keith:
65
+ so, postgres can handle large tables. millions of rows. but indexes can get really large.
66
+ Especially if you have an index on a text column, but even indexes on integer fields can be large.
67
+ tens of millions of rows ... billions of rows… at some point the indexes take up more space that ready memory allows
68
+ if that happens Postgres partially swaps in indexes as it can... works on them, then swaps in others parts. This is very slow,
69
+ understand?
70
+ me: yes I do
71
+ Keith:
72
+ We'll work with two major tables for the rest of the examples. COMPANIES representing a business and EMPLOYEEES representing
73
+ all known employees for all known COMPANIES.
74
+ create table companies
75
+ (
76
+ id serial not null primary key,
77
+ created_at timestamp not null default now(),
78
+ updated_at timestamp,
79
+ name text null
80
+ );
81
+ create table employees
82
+ (
83
+ id serial not null primary key,
84
+ created_at timestamp not null default now(),
85
+ updated_at timestamp,
86
+ name text not null,
87
+ salary money not null,
88
+ company_id integer not null references companies
89
+ );
90
+ does this make sense?
91
+ me: yes it does
92
+ Keith:
93
+ let's say our job is to track every employee for 4 very large companies. one might just put them all in the employees table...
94
+ insert into companies (name) values ('Fluent Mobile, Inc.'),('Fiksu, Inc.'),('AppExchanger.com, Inc.'),('FreeMyApps.com, Inc.');
95
+ four companies -- got it?
96
+ me: i see
97
+ Keith:
98
+ but, let's say that each company has 5 million employees. that is a large amount of data
99
+ and doing a search on their name field would be slow even with an index on some machines.
100
+ to solve that problem we partition the employees table on "company_id"… and here is how we do that:
101
+ create table employees_1 (check (company_id = 1)) inherits (employees);
102
+ create table employees_2 (check (company_id = 2)) inherits (employees);
103
+ create table employees_3 (check (company_id = 3)) inherits (employees);
104
+ create table employees_4 (check (company_id = 4)) inherits (employees);
105
+ the check constraint is the key to partitioning...
106
+ so, employees_1 inherits from employees (has all its columns) AND it adds one thing — a check constraint which forces any row in
107
+ its table to have a company_id value = 1, make sense?
108
+ me: yes
109
+ Keith:
110
+ Then to insert records into the table:
111
+ insert into employees_1 (name, salary, company_id) values ('keith', '100', 1);
112
+ does work, but
113
+ insert into employees_1 (name, salary, company_id) values ('keith', '100', 2);
114
+ will fail
115
+ me: check constraint processed data. it is clear
116
+ Keith:
117
+ Exactly. Great. Now i'll add some data to the tables.
118
+ notice this:
119
+ psql=# \d employees
120
+ Table "public.employees"
121
+ Column | Type | Modifiers
122
+ ------------+-----------------------------+--------------------------------------------------------
123
+ id | integer | not null default nextval('employees_id_seq'::regclass)
124
+ created_at | timestamp without time zone | not null default now()
125
+ updated_at | timestamp without time zone |
126
+ name | text | not null
127
+ salary | money | not null
128
+ company_id | integer | not null
129
+ Indexes:
130
+ "employees_pkey" PRIMARY KEY, btree (id)
131
+ Foreign-key constraints:
132
+ "employees_company_id_fkey" FOREIGN KEY (company_id) REFERENCES companies(id)
133
+ Number of child tables: 4 (Use \d+ to list them.)
134
+ see that there are child tables and PSQL tells you about them
135
+ me: i see all childrens
136
+ Keith:
137
+ now for data.
138
+ insert into employees_1 (name, salary, company_id) values ('keith', '100', 1), ('k2', '101', 1),('k3', '105', 1),('k4', '110', 1);
139
+ insert into employees_2 (name, salary, company_id) values ('sally', '100', 2), ('s2', '101', 2),('s3', '105', 2),('s4', '110', 2);
140
+ insert into employees_3 (name, salary, company_id) values ('william', '100', 3), ('w2', '101', 3),('w3', '105', 3),('w4', '110', 3);
141
+ insert into employees_4 (name, salary, company_id) values ('laura', '100', 4), ('l2', '101', 4),('l3', '105', 4),('l4', '110', 4);
142
+ note that we don't insert any data into the parent table… i will explain that soon.
143
+ (you can insert data into the parent table… it's legal.. but not logical for our purposes).
144
+ you can delete all rows in all tables by saying:
145
+ delete from employees;
146
+ but.. insert that data.. and let's talk about query planners. the query planner is the actuall machine
147
+ in the database that figures out how to execute the query.
148
+ 'explain' shows you what the query planner is doing/would do.
149
+ so… we'll use explain to figure out how partitioning helps us, try
150
+ psql=# explain select * from employees where name = 'keith';
151
+ QUERY PLAN
152
+ -----------------------------------------------------------------------------------
153
+ Result (cost=0.00..103.75 rows=20 width=64)
154
+ -> Append (cost=0.00..103.75 rows=20 width=64)
155
+ -> Seq Scan on employees (cost=0.00..20.75 rows=4 width=64)
156
+ Filter: (name = 'keith'::text)
157
+ -> Seq Scan on employees_1 employees (cost=0.00..20.75 rows=4 width=64)
158
+ Filter: (name = 'keith'::text)
159
+ -> Seq Scan on employees_2 employees (cost=0.00..20.75 rows=4 width=64)
160
+ Filter: (name = 'keith'::text)
161
+ -> Seq Scan on employees_3 employees (cost=0.00..20.75 rows=4 width=64)
162
+ Filter: (name = 'keith'::text)
163
+ -> Seq Scan on employees_4 employees (cost=0.00..20.75 rows=4 width=64)
164
+ Filter: (name = 'keith'::text)
165
+ (12 rows)
166
+ which is the worst of all possibilities. it checks every child table for name 'keith' then consolodates the information and returns the one row.
167
+ but.. if we do this
168
+ explain select * from employees where name = 'keith' and company_id = 1;
169
+ we get
170
+ psql=# explain select * from employees where name = 'keith' and company_id = 1;
171
+ QUERY PLAN
172
+ -----------------------------------------------------------------------------------
173
+ Result (cost=0.00..45.80 rows=2 width=64)
174
+ -> Append (cost=0.00..45.80 rows=2 width=64)
175
+ -> Seq Scan on employees (cost=0.00..22.90 rows=1 width=64)
176
+ Filter: ((name = 'keith'::text) AND (company_id = 1))
177
+ -> Seq Scan on employees_1 employees (cost=0.00..22.90 rows=1 width=64)
178
+ Filter: ((name = 'keith'::text) AND (company_id = 1))
179
+ (6 rows)
180
+ so much win
181
+ me: cost is better, and plan too
182
+ Keith:
183
+ Correct, because the planner could tell (using knowledge from the check constraint) that employees_1 was the only table it needed to look at.
184
+ equally as valid and even faster is:
185
+ explain select * from employees_1 where name = 'keith';
186
+ psql=# explain select * from employees_1 where name = 'keith';
187
+ QUERY PLAN
188
+ -------------------------------------------------------------
189
+ Seq Scan on employees_1 (cost=0.00..20.75 rows=4 width=64)
190
+ Filter: (name = 'keith'::text)
191
+ (2 rows)
192
+ you can imaging that an update will work in the same way. if the company_id is not specified it will check all tables.
193
+ me: i see
194
+ Keith:
195
+ great, so… that is partitioning. there are other forms... but before we go on, try this:
196
+ create table employees_5 (check (company_id = 5)) inherits (employees);
197
+ insert into employees_5 (name, salary, company_id) values ('vicky', '100', 5), ('v2', '101', 5),('v3', '105', 5),('v4', '110', 5);
198
+ well, that works, why? how could we insert a value 5 into company_id if employees has a reference to companies and there is not
199
+ a record with company.id = 5. Because postgres does not propogate indexes and referential integrity to child tables.
200
+ So, even though employees has the reference, employees_5 does not and the only constraint is that company_id = 5,
201
+ but not if company.id = 5 exists in the companies table does that make sense?
202
+ me: yes it is clear
203
+ Keith: so, to alleviate that problem we need to add foreign key constraints to each of the child tables
204
+ me: Yes it can resolve our inconsistency problem
205
+ Keith:
206
+ ALTER TABLE employees_1 ADD CONSTRAINT e1cfk FOREIGN KEY (company_id) REFERENCES companies (id);
207
+ ALTER TABLE employees_2 ADD CONSTRAINT e2cfk FOREIGN KEY (company_id) REFERENCES companies (id);
208
+ ALTER TABLE employees_3 ADD CONSTRAINT e3cfk FOREIGN KEY (company_id) REFERENCES companies (id);
209
+ ALTER TABLE employees_4 ADD CONSTRAINT e4cfk FOREIGN KEY (company_id) REFERENCES companies (id);
210
+ so, those should work.. but we need one more for employees_5, but of course:
211
+ psql=# ALTER TABLE employees_5 ADD CONSTRAINT e5cfk FOREIGN KEY (company_id) REFERENCES companies (id);
212
+ ERROR: insert or update on table "employees_5" violates foreign key constraint "e5cfk"
213
+ DETAIL: Key (company_id)=(5) is not present in table "companies".
214
+ make sense?
215
+ me: yes
216
+ Keith: to fix this we need to add one more row to companies...
217
+ Keith:
218
+ INSERT INTO companies (name) values ('gga');
219
+ and now ALTER TABLE employees_5 ADD CONSTRAINT e5cfk FOREIGN KEY (company_id) REFERENCES companies (id);
220
+ works, ok?
221
+ me: ok
222
+ Keith:
223
+ so, you can partition in many things… not just company_id = 1
224
+ you could say "check (company_id in (1,2,3))" for one child table
225
+ and "check (company_id = 4)" for another
226
+ and "check (company_id >= 5)" in yet another
227
+ if that made sense, you can even have check constraints overlap (although you shouldn't do that)
228
+ but one table could have CHECK (company_id <= 3)
229
+ and another could have CHECK (company_id >= 3)
230
+ so both child tables would be searched when company_id = 3. is that a problem? well.. that is up to you to decide with your data.
231
+ but we will, from now on, deal only with child tables that are mutually exclusive to fully optimize the query planner
232
+ does that make sense?
233
+ me: yes that does
234
+ Keith: Great, you can also partition on created_at… one minute.
235
+ Keith:
236
+ drop table employees_1;
237
+ drop table employees_2;
238
+ drop table employees_3;
239
+ drop table employees_4;
240
+ drop table employees_5;
241
+ create table employees_2000 (check (created_at >= '2000-01-01' and created_at < '2001-01-01')) inherits (employees);
242
+ create table employees_2001 (check (created_at >= '2001-01-01' and created_at < '2002-01-01')) inherits (employees);
243
+ create table employees_2002 (check (created_at >= '2002-01-01' and created_at < '2003-01-01')) inherits (employees);
244
+ create table employees_2003 (check (created_at >= '2003-01-01' and created_at < '2004-01-01')) inherits (employees);
245
+ create table employees_2004 (check (created_at >= '2004-01-01' and created_at < '2005-01-01')) inherits (employees);
246
+ is that obvious what it does?
247
+ me: yes, we create partition for each year between 2000-2005 years
248
+ Keith:
249
+ yes, and only created_at is looked at to determine where the query planner will look for records
250
+ make sense? company_id is not in the check constraint so the planner will not use it for queries on these tables
251
+ me: to have the excellent performance we need to add two partition for company_id and created_at
252
+ Keith:
253
+ it depends on how we access the data. but YES you are right
254
+ BUT if we only cared about when the employee record was created and NEVER cared about the company_id… then this schema
255
+ fits our needs, correct?
256
+ select distinct company_id from employees where created_at = '2001-06-14';
257
+ something like that is still efficient, for this schema, right?
258
+ or we can think about a schema of reports.. which we only care about reports on a year by year basis.
259
+ employees might not be the best example of usage for this… but the logic should be sane.
260
+ me: yes it very helpfull for reporting statistics and we have the best performance
261
+ Keith:
262
+ great, now.. let me blow your mind. is your mind ready to be blown?
263
+ me: I'm fine. yet...
264
+ Keith:
265
+ drop table employees_1;
266
+ drop table employees_2;
267
+ drop table employees_3;
268
+ drop table employees_4;
269
+ drop table employees_5;
270
+ drop table employees_2000;
271
+ drop table employees_2001;
272
+ drop table employees_2002;
273
+ drop table employees_2003;
274
+ drop table employees_2004;
275
+ create table employees_1 (check (company_id = 1)) inherits (employees);
276
+ create table employees_2 (check (company_id = 2)) inherits (employees);
277
+ create table employees_3 (check (company_id = 3)) inherits (employees);
278
+ create table employees_4 (check (company_id = 4)) inherits (employees);
279
+ create table employees_5 (check (company_id = 5)) inherits (employees);
280
+ create table employees_1_2000 (check (created_at >= '2000-01-01' and created_at < '2001-01-01')) inherits (employees_1);
281
+ create table employees_1_2001 (check (created_at >= '2001-01-01' and created_at < '2002-01-01')) inherits (employees_1);
282
+ create table employees_1_2002 (check (created_at >= '2002-01-01' and created_at < '2003-01-01')) inherits (employees_1);
283
+ create table employees_1_2003 (check (created_at >= '2003-01-01' and created_at < '2004-01-01')) inherits (employees_1);
284
+ create table employees_1_2004 (check (created_at >= '2004-01-01' and created_at < '2005-01-01')) inherits (employees_1);
285
+
286
+ create table employees_2_2000 (check (created_at >= '2000-01-01' and created_at < '2001-01-01')) inherits (employees_2);
287
+ create table employees_2_2001 (check (created_at >= '2001-01-01' and created_at < '2002-01-01')) inherits (employees_2);
288
+ create table employees_2_2002 (check (created_at >= '2002-01-01' and created_at < '2003-01-01')) inherits (employees_2);
289
+ create table employees_2_2003 (check (created_at >= '2003-01-01' and created_at < '2004-01-01')) inherits (employees_2);
290
+ create table employees_2_2004 (check (created_at >= '2004-01-01' and created_at < '2005-01-01')) inherits (employees_2);
291
+
292
+ create table employees_3_2000 (check (created_at >= '2000-01-01' and created_at < '2001-01-01')) inherits (employees_3);
293
+ create table employees_3_2001 (check (created_at >= '2001-01-01' and created_at < '2002-01-01')) inherits (employees_3);
294
+ create table employees_3_2002 (check (created_at >= '2002-01-01' and created_at < '2003-01-01')) inherits (employees_3);
295
+ create table employees_3_2003 (check (created_at >= '2003-01-01' and created_at < '2004-01-01')) inherits (employees_3);
296
+ create table employees_3_2004 (check (created_at >= '2004-01-01' and created_at < '2005-01-01')) inherits (employees_3);
297
+
298
+ create table employees_4_2000 (check (created_at >= '2000-01-01' and created_at < '2001-01-01')) inherits (employees_4);
299
+ create table employees_4_2001 (check (created_at >= '2001-01-01' and created_at < '2002-01-01')) inherits (employees_4);
300
+ create table employees_4_2002 (check (created_at >= '2002-01-01' and created_at < '2003-01-01')) inherits (employees_4);
301
+ create table employees_4_2003 (check (created_at >= '2003-01-01' and created_at < '2004-01-01')) inherits (employees_4);
302
+ create table employees_4_2004 (check (created_at >= '2004-01-01' and created_at < '2005-01-01')) inherits (employees_4);
303
+
304
+ create table employees_5_2000 (check (created_at >= '2000-01-01' and created_at < '2001-01-01')) inherits (employees_5);
305
+ create table employees_5_2001 (check (created_at >= '2001-01-01' and created_at < '2002-01-01')) inherits (employees_5);
306
+ create table employees_5_2002 (check (created_at >= '2002-01-01' and created_at < '2003-01-01')) inherits (employees_5);
307
+ create table employees_5_2003 (check (created_at >= '2003-01-01' and created_at < '2004-01-01')) inherits (employees_5);
308
+ create table employees_5_2004 (check (created_at >= '2004-01-01' and created_at < '2005-01-01')) inherits (employees_5);
309
+
310
+ we can have multi level partitioning. in this case… the first level inherits from employees
311
+ but each employees_X table has 5 children that inherit from it and put the check constraint on created_at.
312
+ so, now a query:
313
+ select * from employees where created_at = '2001-07–4' and company_id = 5;
314
+ wins big. the referential integrity problem with company_id => companies still exists. we must apply that
315
+ ALTER TABLE employees_1_2000 ADD CONSTRAINT e1cfk FOREIGN KEY (company_id) REFERENCES companies (id);
316
+ ALTER TABLE employees_1_2001 ADD CONSTRAINT e2cfk FOREIGN KEY (company_id) REFERENCES companies (id);
317
+ ALTER TABLE employees_1_2002 ADD CONSTRAINT e3cfk FOREIGN KEY (company_id) REFERENCES companies (id);
318
+ ALTER TABLE employees_1_2003 ADD CONSTRAINT e4cfk FOREIGN KEY (company_id) REFERENCES companies (id);
319
+ ALTER TABLE employees_1_2004 ADD CONSTRAINT e5cfk FOREIGN KEY (company_id) REFERENCES companies (id);
320
+
321
+ ALTER TABLE employees_2_2000 ADD CONSTRAINT e1cfk FOREIGN KEY (company_id) REFERENCES companies (id);
322
+ ALTER TABLE employees_2_2001 ADD CONSTRAINT e2cfk FOREIGN KEY (company_id) REFERENCES companies (id);
323
+ ALTER TABLE employees_2_2002 ADD CONSTRAINT e3cfk FOREIGN KEY (company_id) REFERENCES companies (id);
324
+ ALTER TABLE employees_2_2003 ADD CONSTRAINT e4cfk FOREIGN KEY (company_id) REFERENCES companies (id);
325
+ ALTER TABLE employees_2_2004 ADD CONSTRAINT e5cfk FOREIGN KEY (company_id) REFERENCES companies (id);
326
+
327
+ ALTER TABLE employees_3_2000 ADD CONSTRAINT e1cfk FOREIGN KEY (company_id) REFERENCES companies (id);
328
+ ALTER TABLE employees_3_2001 ADD CONSTRAINT e2cfk FOREIGN KEY (company_id) REFERENCES companies (id);
329
+ ALTER TABLE employees_3_2002 ADD CONSTRAINT e3cfk FOREIGN KEY (company_id) REFERENCES companies (id);
330
+ ALTER TABLE employees_3_2003 ADD CONSTRAINT e4cfk FOREIGN KEY (company_id) REFERENCES companies (id);
331
+ ALTER TABLE employees_3_2004 ADD CONSTRAINT e5cfk FOREIGN KEY (company_id) REFERENCES companies (id);
332
+
333
+ ALTER TABLE employees_4_2000 ADD CONSTRAINT e1cfk FOREIGN KEY (company_id) REFERENCES companies (id);
334
+ ALTER TABLE employees_4_2001 ADD CONSTRAINT e2cfk FOREIGN KEY (company_id) REFERENCES companies (id);
335
+ ALTER TABLE employees_4_2002 ADD CONSTRAINT e3cfk FOREIGN KEY (company_id) REFERENCES companies (id);
336
+ ALTER TABLE employees_4_2003 ADD CONSTRAINT e4cfk FOREIGN KEY (company_id) REFERENCES companies (id);
337
+ ALTER TABLE employees_4_2004 ADD CONSTRAINT e5cfk FOREIGN KEY (company_id) REFERENCES companies (id);
338
+
339
+ ALTER TABLE employees_5_2000 ADD CONSTRAINT e1cfk FOREIGN KEY (company_id) REFERENCES companies (id);
340
+ ALTER TABLE employees_5_2001 ADD CONSTRAINT e2cfk FOREIGN KEY (company_id) REFERENCES companies (id);
341
+ ALTER TABLE employees_5_2002 ADD CONSTRAINT e3cfk FOREIGN KEY (company_id) REFERENCES companies (id);
342
+ ALTER TABLE employees_5_2003 ADD CONSTRAINT e4cfk FOREIGN KEY (company_id) REFERENCES companies (id);
343
+ ALTER TABLE employees_5_2004 ADD CONSTRAINT e5cfk FOREIGN KEY (company_id) REFERENCES companies (id);
344
+
345
+ and that is a multi level partitioned table. you only stick data in leaf tables.. that is EMPLOYEES_4_2000 gets data.
346
+ EMPLOYEES and EMPLOYEES_4 do not get any data (or you lose some benefit from the query planner)
347
+ so… that is partitioning.
348
+ me: cool!
349
+ Keith:
350
+ you now know as much about partitioning as I do. read this sometime:
351
+ http://www.postgresql.org/docs/9.1/interactive/ddl-partitioning.html
data/README ADDED
@@ -0,0 +1,111 @@
1
+ Partitioned
2
+ ===========
3
+
4
+ Partitioned adds assistance to ActiveRecord for manipulating
5
+ (reading, creating, updating) an activerecord model that represents
6
+ data that may be in one of many database tables (determined by the Models data).
7
+
8
+ It also has features that support the creation and deleting of child tables and
9
+ partitioning support infrastructure.
10
+
11
+ It supports Postgres partitioning and has specific features to overcome basic
12
+ failings of Postgres's implementation of partitioning.
13
+
14
+ Basics:
15
+ A parent table can be inherited by many child tables that inherit most of the
16
+ attributes of the parent table including its columns. child tables typically
17
+ (and for the uses of this plugin must) have a unique check constraint the
18
+ defines which data should be located in that specific child table.
19
+
20
+ Such a constraint allows for the SQL planner to ignore most child tables and target
21
+ the (hopefully) one child table that contains the records interested. This splits
22
+ data, and meta-data (indexes) which provides streamlined targeted access to the
23
+ desired data.
24
+
25
+ Support for bulk inserts and bulk updates is also provided via Partitioned::Base.create_many and
26
+ Partitioned::Base.update_many.
27
+
28
+ Example
29
+ =======
30
+
31
+ Given the following models:
32
+
33
+ class Company < ActiveRecord::Base
34
+ end
35
+
36
+ class Employee < Partitioned::ByCompanyId
37
+ end
38
+
39
+ and the following tables:
40
+
41
+ -- this is the referenced table
42
+ create table companies
43
+ (
44
+ id serial not null primary key,
45
+ created_at timestamp not null default now(),
46
+ updated_at timestamp,
47
+ name text null
48
+ );
49
+
50
+ -- this is the parent table
51
+ create table employees
52
+ (
53
+ id serial not null primary key,
54
+ created_at timestamp not null default now(),
55
+ updated_at timestamp,
56
+ name text null,
57
+ company_id integer not null references companies
58
+ );
59
+
60
+ create schema employees_partitions;
61
+
62
+ create table companies (name) values ('company 1'),('company 2'),('company 2');
63
+
64
+ -- some children
65
+ create table employees_partitions.p1 ( CHECK ( company_id = 1 ) ) INHERITS (employees);
66
+ create table employees_partitions.p2 ( CHECK ( company_id = 2 ) ) INHERITS (employees);
67
+ create table employees_partitions.p3 ( CHECK ( company_id = 3 ) ) INHERITS (employees);
68
+
69
+ since database records exist in a specific child table dependant on the field "company_id"
70
+ we need to have creates that turn into database inserts of the EMPLOYEES table redirect
71
+ the record insert into the specific child table determined by the value of COMPANY_ID
72
+
73
+ eg:
74
+ employee = Employee.create(:name => 'Keith', :company_id => 1)
75
+
76
+ this would normally produce the following:
77
+ INSERT INTO employees ('name', company_id) values ('Keith', 1);
78
+
79
+ but with Partitioned we see:
80
+ INSERT INTO employees_partitions.p1 ('name', company_id) values ('Keith', 1);
81
+
82
+ reads of such a table need some assistance to find the specific child table the
83
+ record exists in.
84
+
85
+ Since we are partitioned by company_id the programmer needs to provide that information
86
+ when fetching data, or the database will need to search all child table for the
87
+ specific record we are looking for.
88
+
89
+ This is no longer valid (well, doesn't perform well):
90
+
91
+ employee = Employee.find(1)
92
+
93
+ instead, do one of the following:
94
+
95
+ employee = Employee.from_partition(1).find(1)
96
+ employee = Employee.find(:first, :conditions => {:name => 'Keith', :company_id => 1})
97
+ employee = Employee.find(:first, :conditions => {:id => 1, :company_id => 1})
98
+
99
+ an update (employee.save where the record already exists in the database) will take
100
+ advantage of knowing which child table the record exists in so it can do some optimization.
101
+
102
+ so, the following works as expected:
103
+
104
+ employee.name = "Not Keith"
105
+ employee.save
106
+
107
+ turns into the following SQL:
108
+
109
+ update employees_partitions.p1 set name = 'Not Keith' where id = 1;
110
+
111
+ Copyright 2010-2012 fiksu.com, inc, all rights reserved