massive_record 0.1.1 → 0.2.0.beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. data/CHANGELOG.md +28 -5
  2. data/Gemfile.lock +12 -12
  3. data/README.md +29 -1
  4. data/lib/massive_record/adapters/initialize.rb +18 -0
  5. data/lib/massive_record/adapters/thrift/adapter.rb +25 -0
  6. data/lib/massive_record/adapters/thrift/column_family.rb +24 -0
  7. data/lib/massive_record/adapters/thrift/connection.rb +73 -0
  8. data/lib/massive_record/{thrift → adapters/thrift/hbase}/hbase.rb +0 -0
  9. data/lib/massive_record/{thrift → adapters/thrift/hbase}/hbase_constants.rb +0 -0
  10. data/lib/massive_record/{thrift → adapters/thrift/hbase}/hbase_types.rb +0 -0
  11. data/lib/massive_record/adapters/thrift/row.rb +150 -0
  12. data/lib/massive_record/adapters/thrift/scanner.rb +59 -0
  13. data/lib/massive_record/adapters/thrift/table.rb +169 -0
  14. data/lib/massive_record/orm/attribute_methods/read.rb +2 -1
  15. data/lib/massive_record/orm/base.rb +61 -3
  16. data/lib/massive_record/orm/coders/chained.rb +71 -0
  17. data/lib/massive_record/orm/coders/json.rb +17 -0
  18. data/lib/massive_record/orm/coders/yaml.rb +15 -0
  19. data/lib/massive_record/orm/coders.rb +3 -0
  20. data/lib/massive_record/orm/errors.rb +15 -2
  21. data/lib/massive_record/orm/finders/scope.rb +166 -0
  22. data/lib/massive_record/orm/finders.rb +45 -24
  23. data/lib/massive_record/orm/persistence.rb +4 -4
  24. data/lib/massive_record/orm/relations/interface.rb +170 -0
  25. data/lib/massive_record/orm/relations/metadata.rb +150 -0
  26. data/lib/massive_record/orm/relations/proxy/references_many.rb +229 -0
  27. data/lib/massive_record/orm/relations/proxy/references_one.rb +40 -0
  28. data/lib/massive_record/orm/relations/proxy/references_one_polymorphic.rb +49 -0
  29. data/lib/massive_record/orm/relations/proxy.rb +174 -0
  30. data/lib/massive_record/orm/relations.rb +6 -0
  31. data/lib/massive_record/orm/schema/column_interface.rb +1 -1
  32. data/lib/massive_record/orm/schema/field.rb +62 -27
  33. data/lib/massive_record/orm/single_table_inheritance.rb +21 -0
  34. data/lib/massive_record/version.rb +1 -1
  35. data/lib/massive_record/wrapper/adapter.rb +6 -0
  36. data/lib/massive_record/wrapper/base.rb +6 -7
  37. data/lib/massive_record/wrapper/cell.rb +9 -32
  38. data/lib/massive_record/wrapper/column_families_collection.rb +2 -2
  39. data/lib/massive_record/wrapper/errors.rb +10 -0
  40. data/lib/massive_record/wrapper/tables_collection.rb +1 -1
  41. data/lib/massive_record.rb +5 -12
  42. data/spec/orm/cases/attribute_methods_spec.rb +5 -1
  43. data/spec/orm/cases/base_spec.rb +77 -4
  44. data/spec/orm/cases/column_spec.rb +1 -1
  45. data/spec/orm/cases/finder_default_scope.rb +53 -0
  46. data/spec/orm/cases/finder_scope_spec.rb +288 -0
  47. data/spec/orm/cases/finders_spec.rb +56 -13
  48. data/spec/orm/cases/persistence_spec.rb +20 -5
  49. data/spec/orm/cases/single_table_inheritance_spec.rb +26 -0
  50. data/spec/orm/cases/table_spec.rb +1 -1
  51. data/spec/orm/cases/timestamps_spec.rb +16 -16
  52. data/spec/orm/coders/chained_spec.rb +73 -0
  53. data/spec/orm/coders/json_spec.rb +6 -0
  54. data/spec/orm/coders/yaml_spec.rb +6 -0
  55. data/spec/orm/models/best_friend.rb +7 -0
  56. data/spec/orm/models/friend.rb +4 -0
  57. data/spec/orm/models/person.rb +20 -6
  58. data/spec/orm/models/{person_with_timestamps.rb → person_with_timestamp.rb} +1 -1
  59. data/spec/orm/models/test_class.rb +3 -0
  60. data/spec/orm/relations/interface_spec.rb +207 -0
  61. data/spec/orm/relations/metadata_spec.rb +202 -0
  62. data/spec/orm/relations/proxy/references_many_spec.rb +624 -0
  63. data/spec/orm/relations/proxy/references_one_polymorphic_spec.rb +106 -0
  64. data/spec/orm/relations/proxy/references_one_spec.rb +111 -0
  65. data/spec/orm/relations/proxy_spec.rb +13 -0
  66. data/spec/orm/schema/field_spec.rb +101 -2
  67. data/spec/shared/orm/coders/an_orm_coder.rb +14 -0
  68. data/spec/shared/orm/relations/proxy.rb +154 -0
  69. data/spec/shared/orm/relations/singular_proxy.rb +68 -0
  70. data/spec/spec_helper.rb +1 -0
  71. data/spec/thrift/cases/encoding_spec.rb +28 -7
  72. data/spec/wrapper/cases/adapter_spec.rb +9 -0
  73. data/spec/wrapper/cases/connection_spec.rb +13 -10
  74. data/spec/wrapper/cases/table_spec.rb +85 -85
  75. metadata +74 -22
  76. data/TODO.md +0 -8
  77. data/lib/massive_record/exceptions.rb +0 -11
  78. data/lib/massive_record/wrapper/column_family.rb +0 -22
  79. data/lib/massive_record/wrapper/connection.rb +0 -71
  80. data/lib/massive_record/wrapper/row.rb +0 -173
  81. data/lib/massive_record/wrapper/scanner.rb +0 -61
  82. data/lib/massive_record/wrapper/table.rb +0 -149
  83. data/spec/orm/cases/hbase/connection_spec.rb +0 -13
data/TODO.md DELETED
@@ -1,8 +0,0 @@
1
- # TODO
2
-
3
- * code documentation
4
- * Rails ORM (ActiveModel etc.)
5
- * write tests
6
- * add generators for hbase.yml etc.
7
- * add migration tools
8
- * ...
@@ -1,11 +0,0 @@
1
- module MassiveRecord
2
-
3
- class ConnectionException < Exception
4
-
5
- def initialize
6
-
7
- end
8
-
9
- end
10
-
11
- end
@@ -1,22 +0,0 @@
1
- module MassiveRecord
2
- module Wrapper
3
- class ColumnFamily
4
-
5
- attr_accessor :name, :max_versions, :columns
6
-
7
- def initialize(column_name, opts = {})
8
- @name = column_name
9
- @max_versions = opts[:max_versions] || 10
10
- @columns = opts[:columns] || []
11
- end
12
-
13
- def descriptor
14
- Apache::Hadoop::Hbase::Thrift::ColumnDescriptor.new do |col|
15
- col.name = "#{name}:"
16
- col.maxVersions = max_versions
17
- end
18
- end
19
-
20
- end
21
- end
22
- end
@@ -1,71 +0,0 @@
1
- module MassiveRecord
2
- module Wrapper
3
- class Connection
4
-
5
- attr_accessor :host, :port, :timeout
6
-
7
- def initialize(opts = {})
8
- @timeout = 4000
9
- @host = opts[:host]
10
- @port = opts[:port] || 9090
11
- end
12
-
13
- def transport
14
- @transport ||= Thrift::BufferedTransport.new(Thrift::Socket.new(@host, @port, @timeout))
15
- end
16
-
17
- def open
18
- protocol = Thrift::BinaryProtocol.new(transport)
19
- @client = Apache::Hadoop::Hbase::Thrift::Hbase::Client.new(protocol)
20
-
21
- begin
22
- transport.open()
23
- true
24
- rescue
25
- raise MassiveRecord::ConnectionException.new, "Unable to connect to HBase on #{@host}, port #{@port}"
26
- end
27
- end
28
-
29
- def close
30
- @transport.close.nil?
31
- end
32
-
33
- def client
34
- @client
35
- end
36
-
37
- def active?
38
- @transport.open?
39
- end
40
-
41
- def tables
42
- collection = TablesCollection.new
43
- collection.connection = self
44
- getTableNames().each{|table_name| collection.push(table_name)}
45
- collection
46
- end
47
-
48
- def load_table(table_name)
49
- MassiveRecord::Wrapper::Table.new(self, table_name)
50
- end
51
-
52
- # Wrapp HBase API to be able to catch errors and try reconnect
53
- def method_missing(method, *args)
54
- begin
55
- open if not @client
56
- client.send(method, *args) if @client
57
- rescue IOError
58
- @client = nil
59
- open
60
- client.send(method, *args) if @client
61
- rescue Thrift::TransportException
62
- @transport = nil
63
- @client = nil
64
- open
65
- client.send(method, *args) if @client
66
- end
67
- end
68
-
69
- end
70
- end
71
- end
@@ -1,173 +0,0 @@
1
- require 'json'
2
-
3
- module MassiveRecord
4
- module Wrapper
5
- class Row
6
-
7
- attr_accessor :id, :column_families, :columns, :new_record, :table
8
-
9
- def initialize(opts = {})
10
- @id = opts[:id]
11
- self.values = opts[:values] || {}
12
- @table = opts[:table]
13
- @column_families = opts[:column_families] || []
14
- @columns = opts[:columns] || {}
15
- @new_record = true
16
- end
17
-
18
- def column_names
19
- columns.keys
20
- end
21
-
22
- def fetch_all_column_families
23
- @table.fetch_column_family
24
- fetch_column_families(@table.column_family_names)
25
- end
26
-
27
- def fetch_column_families(list)
28
- @column_families = table.column_families.collect do |column_name, description|
29
- ColumnFamily.new(column_name.split(":").first, {
30
- :row => self,
31
- :name => description.name,
32
- :max_versions => description.maxVersions,
33
- :compression => description.compression,
34
- :in_memory => description.inMemory
35
- # bloomFilterType, bloomFilterVectorSize, bloomFilterNbHashes, blockCacheEnabled, timeToLive
36
- })
37
- end
38
- end
39
-
40
- # = Parse columns / cells and create a Hash from them
41
- def values
42
- @columns.inject({"id" => id}) {|h, (column_name, cell)| h[column_name] = cell.deserialize_value; h}
43
- end
44
-
45
- def values=(data)
46
- @values = {}
47
- update_columns(data)
48
- end
49
-
50
- def update_columns(data = {})
51
- data.each do |column_family_name, columns|
52
- columns.each do |column_name, values|
53
- update_column(column_family_name, column_name, values)
54
- end
55
- end
56
- end
57
-
58
- def update_column(column_family_name, column_name, value)
59
- column = "#{column_family_name}:#{column_name}"
60
-
61
- if @columns[column].nil?
62
- @columns[column] = Cell.new({ :value => Cell.serialize_value(value), :created_at => Time.now })
63
- else
64
- @columns[column].serialize_value(value)
65
- end
66
- end
67
-
68
- # = Merge column values with new data : it implies that column values is a JSON encoded string
69
- def merge_columns(data)
70
- data.each do |column_family_name, columns|
71
- columns.each do |column_name, values|
72
- if values.is_a?(Hash)
73
- unless @columns["#{column_family_name}:#{column_name}"].nil?
74
- column_value = @columns["#{column_family_name}:#{column_name}"].deserialize_value.merge(values)
75
- else
76
- column_value = values
77
- end
78
- elsif values.is_a?(Array)
79
- unless @columns["#{column_family_name}:#{column_name}"].nil?
80
- column_value = @columns["#{column_family_name}:#{column_name}"].deserialize_value | values
81
- else
82
- column_value = values
83
- end
84
- else
85
- column_value = values
86
- end
87
- update_column(column_family_name, column_name, column_value)
88
- end
89
- end
90
- end
91
-
92
- # = Parse columns cells and save them
93
- def save
94
- mutations = []
95
-
96
- @columns.each do |column_name, cell|
97
- m = Apache::Hadoop::Hbase::Thrift::Mutation.new
98
- m.column = column_name
99
- m.value = cell.serialized_value
100
-
101
- mutations.push(m)
102
- end
103
-
104
- @table.client.mutateRow(@table.name, id.to_s, mutations).nil?
105
- end
106
-
107
-
108
- #
109
- # FIXME
110
- #
111
- # The thrift wrapper is only working with strings as far as I can see,
112
- # and the atomicIncrement call on strings kinda doesn't make sense on strings
113
- #
114
- # For now I'll implement this without atomicIncrement, to get the behaviour we want.
115
- # Guess this in time will either be fixed or raised an not-supported-error. If the
116
- # latter is the case I guess we'll need to shift over to a jruby adapter and use the
117
- # java api instead of thrift.
118
- #
119
- def atomic_increment(column_name, by = 1)
120
- # @table.client.atomicIncrement(@table.name, id.to_s, column_name, by)
121
- value_to_increment = @columns[column_name.to_s].value
122
-
123
- raise "Value to increment (#{value_to_increment}) doesnt seem to be a number!" unless value_to_increment =~ /^\d+$/
124
- raise "Argument by must be an integer" unless by.is_a? Fixnum
125
-
126
- value_to_increment = value_to_increment.to_i
127
- value_to_increment += by
128
- value_to_increment = value_to_increment.to_s
129
-
130
- mutation = Apache::Hadoop::Hbase::Thrift::Mutation.new
131
- mutation.column = column_name
132
- mutation.value = value_to_increment
133
-
134
- if @table.client.mutateRow(@table.name, id.to_s, [mutation]).nil?
135
- value_to_increment
136
- end
137
- end
138
-
139
- def self.populate_from_trow_result(result, connection, table_name, column_families = [])
140
- row = self.new
141
- row.id = result.row
142
- row.new_record = false
143
- row.table = Table.new(connection, table_name)
144
- row.column_families = column_families
145
-
146
- result.columns.each do |name, value|
147
- row.columns[name] = Cell.new({
148
- :value => value.value,
149
- :created_at => Time.at(value.timestamp / 1000, (value.timestamp % 1000) * 1000)
150
- })
151
- end
152
-
153
- row
154
- end
155
-
156
- def destroy
157
- @table.client.deleteAllRow(@table.name, @id).nil?
158
- end
159
-
160
- def new_record?
161
- @new_record
162
- end
163
-
164
- def prev
165
- self
166
- end
167
-
168
- def updated_at
169
- columns.values.collect(&:created_at).max
170
- end
171
- end
172
- end
173
- end
@@ -1,61 +0,0 @@
1
- module MassiveRecord
2
- module Wrapper
3
- class Scanner
4
-
5
- attr_accessor :connection, :table_name, :column_family_names, :opened_scanner
6
- attr_accessor :start_key, :offset_key, :created_at, :limit
7
- attr_accessor :formatted_column_family_names, :column_family_names
8
-
9
- def initialize(connection, table_name, column_family_names, opts = {})
10
- @connection = connection
11
- @table_name = table_name
12
- @column_family_names = column_family_names.collect{|n| n.split(":").first}
13
- @column_family_names = opts[:columns] unless opts[:columns].nil?
14
- @formatted_column_family_names = column_family_names.collect{|n| "#{n.split(":").first}:"}
15
- @start_key = opts[:start_key].to_s
16
- @offset_key = opts[:offset_key].to_s
17
- @created_at = opts[:created_at].to_s
18
- @limit = opts[:limit] || 10
19
- end
20
-
21
- def key
22
- start_key.empty? ? offset_key : start_key
23
- end
24
-
25
- def open
26
- if created_at.empty?
27
- self.opened_scanner = connection.scannerOpen(table_name, key, formatted_column_family_names)
28
- else
29
- self.opened_scanner = connection.scannerOpenTs(table_name, key, formatted_column_family_names, created_at)
30
- end
31
- end
32
-
33
- def close
34
- connection.scannerClose(opened_scanner)
35
- end
36
-
37
- def fetch_trows(opts = {})
38
- connection.scannerGetList(opened_scanner, limit)
39
- end
40
-
41
- def fetch_rows(opts = {})
42
- populate_rows(fetch_trows(opts))
43
- end
44
-
45
- def populate_rows(results)
46
- results.collect do |result|
47
- if offset_key.empty?
48
- populate_row(result) unless result.row.match(/^#{start_key}/).nil?
49
- else
50
- populate_row(result)
51
- end
52
- end.select{|r| !r.nil?}
53
- end
54
-
55
- def populate_row(result)
56
- Row.populate_from_trow_result(result, connection, table_name, column_family_names)
57
- end
58
-
59
- end
60
- end
61
- end
@@ -1,149 +0,0 @@
1
- module MassiveRecord
2
- module Wrapper
3
- class Table
4
-
5
- attr_accessor :connection, :name, :column_families
6
-
7
- def initialize(connection, table_name)
8
- @connection = connection
9
- @name = table_name.to_s
10
- init_column_families
11
- end
12
-
13
- def init_column_families
14
- @column_families = ColumnFamiliesCollection.new
15
- @column_families.table = self
16
- end
17
-
18
- def self.create(connection, table_name, column_families = [])
19
- table = self.new(connection, table_name)
20
- table.column_families = column_families
21
- table.save
22
- end
23
-
24
- def save
25
- begin
26
- client.createTable(name, @column_families.collect{|cf| cf.descriptor}).nil?
27
- rescue Apache::Hadoop::Hbase::Thrift::AlreadyExists => ex
28
- "The table already exists."
29
- rescue => ex
30
- raise ex
31
- end
32
- end
33
-
34
- def client
35
- connection
36
- end
37
-
38
- def disable
39
- client.disableTable(name).nil?
40
- end
41
-
42
- def destroy
43
- disable
44
- client.deleteTable(name).nil?
45
- end
46
-
47
- def create_column_families(column_family_names)
48
- column_family_names.each{|name| @column_families.push(ColumnFamily.new(name))}
49
- end
50
-
51
- def fetch_column_families
52
- @column_families.clear
53
- client.getColumnDescriptors(name).each do |column_name, description|
54
- @column_families.push(ColumnFamily.new(column_name.split(":").first))
55
- end
56
- @column_families
57
- end
58
-
59
- def column_family_names
60
- @column_families.collect{|column_family| column_family.name.to_s}
61
- end
62
-
63
- def fetch_column_family_names
64
- fetch_column_families
65
- column_family_names
66
- end
67
-
68
- def column_names
69
- first.column_names
70
- end
71
-
72
- def scanner(opts = {})
73
- scanner = Scanner.new(connection, name, column_family_names, format_options_for_scanner(opts))
74
-
75
- if block_given?
76
- begin
77
- scanner.open
78
- yield scanner
79
- ensure
80
- scanner.close
81
- end
82
- else
83
- scanner
84
- end
85
- end
86
-
87
- def format_options_for_scanner(opts = {})
88
- {
89
- :start_key => opts[:start],
90
- :offset_key => opts[:offset],
91
- :created_at => opts[:created_at],
92
- :columns => opts[:select], # list of column families to fetch from hbase
93
- :limit => opts[:limit] || opts[:batch_size]
94
- }
95
- end
96
-
97
- def all(opts = {})
98
- scanner(opts) do |s|
99
- s.fetch_rows(opts)
100
- end
101
- end
102
-
103
- def first(opts = {})
104
- all(opts.merge(:limit => 1)).first
105
- end
106
-
107
- def find(*args)
108
- arg = args[0]
109
- opts = args[1] || {}
110
- arg.is_a?(Array) ? arg.collect{|id| first(opts.merge(:start => id))} : first(opts.merge(:start => arg))
111
- end
112
-
113
- def find_in_batches(opts = {})
114
- results_limit = opts.delete(:limit)
115
- results_found = 0
116
-
117
- scanner(opts) do |s|
118
- while (true) do
119
- s.limit = results_limit - results_found if !results_limit.nil? && results_limit <= results_found + s.limit
120
- rows = s.fetch_rows
121
- if rows.empty?
122
- break
123
- else
124
- results_found += rows.size
125
- yield rows
126
- end
127
- end
128
- end
129
- end
130
-
131
- def exists?
132
- connection.tables.include?(name)
133
- end
134
-
135
- def regions
136
- connection.getTableRegions(name).collect do |r|
137
- {
138
- :start_key => r.startKey,
139
- :end_key => r.endKey,
140
- :id => r.id,
141
- :name => r.name,
142
- :version => r.version
143
- }
144
- end
145
- end
146
-
147
- end
148
- end
149
- end