partitioned 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (95) hide show
  1. data/Gemfile +17 -0
  2. data/LICENSE +30 -0
  3. data/PARTITIONING_EXPLAINED.txt +351 -0
  4. data/README +111 -0
  5. data/Rakefile +27 -0
  6. data/examples/README +23 -0
  7. data/examples/company_id.rb +417 -0
  8. data/examples/company_id_and_created_at.rb +689 -0
  9. data/examples/created_at.rb +590 -0
  10. data/examples/created_at_referencing_awards.rb +1000 -0
  11. data/examples/id.rb +475 -0
  12. data/examples/lib/by_company_id.rb +11 -0
  13. data/examples/lib/command_line_tool_mixin.rb +71 -0
  14. data/examples/lib/company.rb +29 -0
  15. data/examples/lib/get_options.rb +44 -0
  16. data/examples/lib/roman.rb +41 -0
  17. data/examples/start_date.rb +621 -0
  18. data/init.rb +1 -0
  19. data/lib/monkey_patch_activerecord.rb +92 -0
  20. data/lib/monkey_patch_postgres.rb +73 -0
  21. data/lib/partitioned.rb +26 -0
  22. data/lib/partitioned/active_record_overrides.rb +34 -0
  23. data/lib/partitioned/bulk_methods_mixin.rb +288 -0
  24. data/lib/partitioned/by_created_at.rb +13 -0
  25. data/lib/partitioned/by_foreign_key.rb +21 -0
  26. data/lib/partitioned/by_id.rb +35 -0
  27. data/lib/partitioned/by_integer_field.rb +32 -0
  28. data/lib/partitioned/by_monthly_time_field.rb +23 -0
  29. data/lib/partitioned/by_time_field.rb +65 -0
  30. data/lib/partitioned/by_weekly_time_field.rb +30 -0
  31. data/lib/partitioned/multi_level.rb +20 -0
  32. data/lib/partitioned/multi_level/configurator/data.rb +14 -0
  33. data/lib/partitioned/multi_level/configurator/dsl.rb +32 -0
  34. data/lib/partitioned/multi_level/configurator/reader.rb +162 -0
  35. data/lib/partitioned/multi_level/partition_manager.rb +47 -0
  36. data/lib/partitioned/partitioned_base.rb +354 -0
  37. data/lib/partitioned/partitioned_base/configurator.rb +6 -0
  38. data/lib/partitioned/partitioned_base/configurator/data.rb +62 -0
  39. data/lib/partitioned/partitioned_base/configurator/dsl.rb +628 -0
  40. data/lib/partitioned/partitioned_base/configurator/reader.rb +209 -0
  41. data/lib/partitioned/partitioned_base/partition_manager.rb +138 -0
  42. data/lib/partitioned/partitioned_base/sql_adapter.rb +286 -0
  43. data/lib/partitioned/version.rb +3 -0
  44. data/lib/tasks/desirable_tasks.rake +4 -0
  45. data/partitioned.gemspec +21 -0
  46. data/spec/dummy/.rspec +1 -0
  47. data/spec/dummy/README.rdoc +261 -0
  48. data/spec/dummy/Rakefile +7 -0
  49. data/spec/dummy/app/assets/javascripts/application.js +9 -0
  50. data/spec/dummy/app/assets/stylesheets/application.css +7 -0
  51. data/spec/dummy/app/controllers/application_controller.rb +3 -0
  52. data/spec/dummy/app/helpers/application_helper.rb +2 -0
  53. data/spec/dummy/app/views/layouts/application.html.erb +14 -0
  54. data/spec/dummy/config.ru +4 -0
  55. data/spec/dummy/config/application.rb +51 -0
  56. data/spec/dummy/config/boot.rb +10 -0
  57. data/spec/dummy/config/database.yml +32 -0
  58. data/spec/dummy/config/environment.rb +5 -0
  59. data/spec/dummy/config/environments/development.rb +30 -0
  60. data/spec/dummy/config/environments/production.rb +60 -0
  61. data/spec/dummy/config/environments/test.rb +39 -0
  62. data/spec/dummy/config/initializers/backtrace_silencers.rb +7 -0
  63. data/spec/dummy/config/initializers/inflections.rb +10 -0
  64. data/spec/dummy/config/initializers/mime_types.rb +5 -0
  65. data/spec/dummy/config/initializers/secret_token.rb +7 -0
  66. data/spec/dummy/config/initializers/session_store.rb +8 -0
  67. data/spec/dummy/config/initializers/wrap_parameters.rb +14 -0
  68. data/spec/dummy/config/locales/en.yml +5 -0
  69. data/spec/dummy/config/routes.rb +58 -0
  70. data/spec/dummy/public/404.html +26 -0
  71. data/spec/dummy/public/422.html +26 -0
  72. data/spec/dummy/public/500.html +26 -0
  73. data/spec/dummy/public/favicon.ico +0 -0
  74. data/spec/dummy/script/rails +6 -0
  75. data/spec/dummy/spec/spec_helper.rb +27 -0
  76. data/spec/monkey_patch_posgres_spec.rb +176 -0
  77. data/spec/partitioned/bulk_methods_mixin_spec.rb +512 -0
  78. data/spec/partitioned/by_created_at_spec.rb +62 -0
  79. data/spec/partitioned/by_foreign_key_spec.rb +95 -0
  80. data/spec/partitioned/by_id_spec.rb +97 -0
  81. data/spec/partitioned/by_integer_field_spec.rb +143 -0
  82. data/spec/partitioned/by_monthly_time_field_spec.rb +100 -0
  83. data/spec/partitioned/by_time_field_spec.rb +182 -0
  84. data/spec/partitioned/by_weekly_time_field_spec.rb +100 -0
  85. data/spec/partitioned/multi_level/configurator/dsl_spec.rb +88 -0
  86. data/spec/partitioned/multi_level/configurator/reader_spec.rb +147 -0
  87. data/spec/partitioned/partitioned_base/configurator/dsl_spec.rb +459 -0
  88. data/spec/partitioned/partitioned_base/configurator/reader_spec.rb +513 -0
  89. data/spec/partitioned/partitioned_base/sql_adapter_spec.rb +204 -0
  90. data/spec/partitioned/partitioned_base_spec.rb +173 -0
  91. data/spec/spec_helper.rb +32 -0
  92. data/spec/support/shared_example_spec_helper_for_integer_key.rb +137 -0
  93. data/spec/support/shared_example_spec_helper_for_time_key.rb +147 -0
  94. data/spec/support/tables_spec_helper.rb +47 -0
  95. metadata +250 -0
@@ -0,0 +1,13 @@
1
+ module Partitioned
2
+ #
3
+ # partition tables by created_at grouping them by week, with
4
+ # a week defined as seven days starting on Monday.
5
+ #
6
+ class ByCreatedAt < ByWeeklyTimeField
7
+ self.abstract_class = true
8
+
9
+ def self.partition_time_field
10
+ return :created_at
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,21 @@
1
+ module Partitioned
2
+ class ByForeignKey < ByIntegerField
3
+ self.abstract_class = true
4
+
5
+ def self.partition_integer_field
6
+ return partition_foreign_key
7
+ end
8
+
9
+ def self.partition_foreign_key
10
+ raise MethodNotImplemented.new(self, :partition_foreign_key)
11
+ end
12
+
13
+ partitioned do |partition|
14
+ partition.foreign_key lambda {|model, foreign_key_value|
15
+ return Configurator::Data::ForeignKey.new(model.partition_foreign_key,
16
+ ActiveSupport::Inflector::pluralize(model.partition_foreign_key.to_s.sub(/_id$/,'')),
17
+ :id)
18
+ }
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,35 @@
1
+ module Partitioned
2
+ #
3
+ # table partitioning by id. this partitioning breaks up data by
4
+ # the value of its primary key. a specific record's child table
5
+ # is determined by the number resulting from the integer math:
6
+ # ID / ById::partition_table_size * ById::partition_table_size
7
+ #
8
+ class ById < ByIntegerField
9
+ self.abstract_class = true
10
+
11
+ #
12
+ # specific to this partitioning, we need to prefetch the primary key (id)
13
+ # before we attempt to do the insert because the insert wants to know the
14
+ # name of the specific child table to access.
15
+ #
16
+ def self.prefetch_primary_key?
17
+ return true
18
+ end
19
+
20
+ #
21
+ # the number of records in each child table.
22
+ #
23
+ def self.partition_table_size
24
+ return 10000000
25
+ end
26
+
27
+ def self.partition_integer_field
28
+ return :id
29
+ end
30
+
31
+ partitioned do |partition|
32
+ partition.index :id, :unique => true
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,32 @@
1
+ module Partitioned
2
+ class ByIntegerField < PartitionedBase
3
+ self.abstract_class = true
4
+
5
+ def self.partition_table_size
6
+ return 1
7
+ end
8
+
9
+ def self.partition_integer_field
10
+ raise MethodNotImplemented.new(self, :partition_integer_field)
11
+ end
12
+
13
+ def self.partition_normalize_key_value(integer_field_value)
14
+ return integer_field_value / partition_table_size * partition_table_size
15
+ end
16
+
17
+ partitioned do |partition|
18
+ partition.on lambda {|model| return model.partition_integer_field }
19
+
20
+ partition.order "substring(tablename, 2)::integer desc"
21
+
22
+ partition.check_constraint lambda { |model, id|
23
+ value = model.partition_normalize_key_value(id)
24
+ if model.partition_table_size == 1
25
+ return "( #{model.partition_integer_field} = #{value} )"
26
+ else
27
+ return "( #{model.partition_integer_field} >= #{value} and #{model.partition_integer_field} < #{value + model.partition_table_size} )"
28
+ end
29
+ }
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,23 @@
1
+ module Partitioned
2
+ #
3
+ # partition tables by a time field grouping them by week, with
4
+ # a week defined as seven days starting on Monday.
5
+ #
6
+ class ByMonthlyTimeField < ByTimeField
7
+ self.abstract_class = true
8
+
9
+ def self.partition_normalize_key_value(time_value)
10
+ return time_value.at_beginning_of_month
11
+ end
12
+
13
+ def self.partition_table_size
14
+ return 1.month
15
+ end
16
+
17
+ partitioned do |partition|
18
+ partition.base_name lambda { |model, time_field|
19
+ return model.partition_normalize_key_value(time_field).strftime('%Y%m')
20
+ }
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,65 @@
1
+ module Partitioned
2
+ #
3
+ # partition tables by a time field grouping them by day
4
+ #
5
+ class ByTimeField < PartitionedBase
6
+ self.abstract_class = true
7
+
8
+ #
9
+ # generate an enumerable that represents all the dates between
10
+ # start_date and end_date skipping step
11
+ #
12
+ # this can be used to calls that take an enumerable like create_infrastructure
13
+ #
14
+ def self.partition_generate_range(start_date, end_date, step = :default)
15
+ step = partition_table_size if step == :default
16
+ current_date = partition_normalize_key_value(start_date)
17
+ dates = []
18
+ while current_date <= end_date
19
+ dates << current_date
20
+ current_date += step
21
+ end
22
+ return dates
23
+ end
24
+
25
+ #
26
+ # normalize the value to the current day
27
+ #
28
+ def self.partition_normalize_key_value(time_value)
29
+ return time_value.to_date
30
+ end
31
+
32
+ #
33
+ # the size of the partition, 1.day
34
+ #
35
+ def self.partition_table_size
36
+ return 1.day
37
+ end
38
+
39
+ #
40
+ # abstract -- implement in a derived clas.
41
+ # the name of the time-related field we will use to partition child tables
42
+ #
43
+ def self.partition_time_field
44
+ raise MethodNotImplemented.new(self, :partition_time_field)
45
+ end
46
+
47
+ partitioned do |partition|
48
+ partition.on lambda {|model| model.partition_time_field}
49
+
50
+ partition.index lambda {|model, time_field|
51
+ return Configurator::Data::Index.new(model.partition_time_field, {})
52
+ }
53
+
54
+ partition.order 'tablename desc'
55
+
56
+ partition.base_name lambda { |model, time_field|
57
+ return model.partition_normalize_key_value(time_field).strftime('%Y%m%d')
58
+ }
59
+ partition.check_constraint lambda { |model, time_field|
60
+ date = model.partition_normalize_key_value(time_field)
61
+ return "#{model.partition_time_field} >= '#{date.strftime}' AND #{model.partition_time_field} < '#{(date + model.partition_table_size).strftime}'"
62
+ }
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,30 @@
1
+ module Partitioned
2
+ #
3
+ # partition tables by a time field grouping them by week, with
4
+ # a week defined as seven days starting on Monday.
5
+ #
6
+ class ByWeeklyTimeField < ByTimeField
7
+ self.abstract_class = true
8
+
9
+ #
10
+ # normalize a partition key value by week. We've picked
11
+ # the begining of the week to key on, which is Monday.
12
+ #
13
+ def self.partition_normalize_key_value(time_value)
14
+ return time_value.at_beginning_of_week
15
+ end
16
+
17
+ #
18
+ # The size of the partition table, 7 days (1.week)
19
+ #
20
+ def self.partition_table_size
21
+ return 1.week
22
+ end
23
+
24
+ partitioned do |partition|
25
+ partition.base_name lambda { |model, time_field|
26
+ return model.partition_normalize_key_value(time_field).strftime('%Y%m%d')
27
+ }
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,20 @@
1
+ module Partitioned
2
+ #
3
+ # table partitioning by a referenced id column which itself is partitioned
4
+ # further weekly by a date column.
5
+ #
6
+ class MultiLevel < PartitionedBase
7
+ self.abstract_class = true
8
+
9
+ #
10
+ # Normalize the values for the each of using class.
11
+ #
12
+ def self.partition_normalize_key_value(values)
13
+ normalized_values = []
14
+ [*values].each_with_index do |value,index|
15
+ normalized_values << configurator.using_class(index).partition_normalize_key_value(value)
16
+ end
17
+ return normalized_values
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,14 @@
1
+ module Partitioned
2
+ class MultiLevel
3
+ module Configurator
4
+ class Data < Partitioned::PartitionedBase::Configurator::Data
5
+ attr_accessor :using_classes
6
+
7
+ def initialize
8
+ super
9
+ @using_classes = []
10
+ end
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,32 @@
1
+ module Partitioned
2
+ class MultiLevel
3
+ module Configurator
4
+ class Dsl < Partitioned::PartitionedBase::Configurator::Dsl
5
+ class InvalidForMultiLevelPartitioning < StandardError
6
+ def initialize(model, dsl_key, remedy)
7
+ super("#{model.name}: '#{dsl_key}' is not valid for multi-level partitioning. #{remedy}")
8
+ end
9
+ end
10
+
11
+ attr_reader :data, :model
12
+
13
+ def initialize(most_derived_activerecord_class)
14
+ super(most_derived_activerecord_class, Partitioned::MultiLevel::Configurator::Data)
15
+ @using_classes = []
16
+ end
17
+
18
+ #
19
+ # Definition of classes which will be used at multi level partitioning.
20
+ #
21
+ def using_classes(*classes)
22
+ data.using_classes += [*classes]
23
+ end
24
+
25
+ def on(*ignored)
26
+ raise InvalidForMultiLevelPartitioning.new(model, :on, "the partitioned keyword 'using' is used to define multi-level partitioned tables.")
27
+ end
28
+
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,162 @@
1
+ module Partitioned
2
+ class MultiLevel
3
+ module Configurator
4
+ class Reader < Partitioned::PartitionedBase::Configurator::Reader
5
+ UsingConfigurator = Struct.new(:model, :sliced_class, :dsl)
6
+ def initialize(most_derived_activerecord_class)
7
+ super
8
+ @using_classes = nil
9
+ @using_configurators = nil
10
+ end
11
+
12
+ #
13
+ # The field used to partition child tables.
14
+ #
15
+ def on_fields
16
+ unless @on_fields
17
+ @on_fields = using_collect(&:on_field).map(&:to_sym)
18
+ end
19
+ return @on_fields
20
+ end
21
+
22
+ #
23
+ # The schema name of the table who is the direct ancestor of a child table.
24
+ #
25
+ def parent_table_schema_name(*partition_key_values)
26
+ if partition_key_values.length <= 1
27
+ return super
28
+ end
29
+
30
+ return schema_name
31
+ end
32
+
33
+ #
34
+ # The table name of the table who is the direct ancestor of a child table.
35
+ #
36
+ def parent_table_name(*partition_key_values)
37
+ if partition_key_values.length <= 1
38
+ return super
39
+ end
40
+
41
+ # [0...-1] is here because the base name for this parent table is defined by the remove the leaf key value
42
+ # that is:
43
+ # current top level table name: public.foos
44
+ # child schema area: foos_partitions
45
+ # current partition classes: ByCompanyId then ByCreatedAt
46
+ # current key values:
47
+ # company_id: 42
48
+ # created_at: 2011-01-03
49
+ # child table name: foos_partitions.p42_20110103
50
+ # parent table: foos_partitions.p42
51
+ # grand parent table: public.foos
52
+ return parent_table_schema_name(*partition_key_values) + '.p' + base_name(*partition_key_values[0...-1])
53
+ end
54
+
55
+ #
56
+ # Define the check constraint for a given child table.
57
+ #
58
+ def check_constraint(*partition_key_values)
59
+ index = partition_key_values.length - 1
60
+ value = partition_key_values[index]
61
+ return using_configurator(index).check_constraint(value)
62
+ end
63
+
64
+ #
65
+ # The name of the child table without the schema name or name prefix.
66
+ #
67
+ def base_name(*partition_key_values)
68
+ parts = []
69
+ partition_key_values.each_with_index do |value,index|
70
+ parts << using_configurator(index).base_name(value)
71
+ end
72
+ return parts.join('_')
73
+ end
74
+
75
+ def using_configurator(index)
76
+ return using_class(index).configurator
77
+ end
78
+
79
+ def using_class(index)
80
+ return using_classes[index]
81
+ end
82
+
83
+
84
+ protected
85
+
86
+ def using_configurators
87
+ unless @using_configurators
88
+ @using_configurators = []
89
+ using_classes.each do |using_class|
90
+ using_class.ancestors.each do |ancestor|
91
+ next if ancestor.class == Module
92
+ @using_configurators << UsingConfigurator.new(using_class, ancestor, ancestor::configurator_dsl) if ancestor::configurator_dsl
93
+ break if ancestor == Partitioned::PartitionedBase
94
+ end
95
+ end
96
+ end
97
+ return @using_configurators
98
+ end
99
+
100
+ def using_classes
101
+ unless @using_classes
102
+ @using_classes = collect_from_collection(&:using_classes).inject([]) do |array,new_items|
103
+ array += [*new_items]
104
+ end.to_a
105
+ end
106
+ return @using_classes
107
+ end
108
+
109
+ def using_collect(*partition_key_values, &block)
110
+ values = []
111
+ using_configurators.each do |using_configurator|
112
+ data = using_configurator.dsl.data
113
+ intermediate_value = block.call(data) rescue nil
114
+ if intermediate_value.is_a? Proc
115
+ values << intermediate_value.call(using_configurator.model, *partition_key_values)
116
+ elsif intermediate_value.is_a? String
117
+ values << eval("\"#{intermediate_value}\"")
118
+ else
119
+ values << intermediate_value unless intermediate_value.blank?
120
+ end
121
+ end
122
+ return values
123
+ end
124
+
125
+ def using_collect_first(*partition_key_values, &block)
126
+ using_configurators.each do |using_configurator|
127
+ data = using_configurator.dsl.data
128
+ intermediate_value = block.call(data) rescue nil
129
+ if intermediate_value.is_a? Proc
130
+ return intermediate_value.call(using_configurator.model, *partition_key_values)
131
+ elsif intermediate_value.is_a? String
132
+ return eval("\"#{intermediate_value}\"")
133
+ else
134
+ return intermediate_value unless intermediate_value.nil?
135
+ end
136
+ end
137
+ return nil
138
+ end
139
+
140
+ def using_collect_from_collection(*partition_key_values, &block)
141
+ values = []
142
+ using_configurators.each do |using_configurator|
143
+ data = using_configurator.dsl.data
144
+ intermediate_values = []
145
+ intermediate_values = block.call(data) rescue nil
146
+ [*intermediate_values].each do |intermediate_value|
147
+ if intermediate_value.is_a? Proc
148
+ values << intermediate_value.call(using_configurator.model, *partition_key_values)
149
+ elsif intermediate_value.is_a? String
150
+ values << eval("\"#{intermediate_value}\"")
151
+ else
152
+ values << intermediate_value unless intermediate_value.blank?
153
+ end
154
+ end
155
+ end
156
+ return values
157
+ end
158
+
159
+ end
160
+ end
161
+ end
162
+ end