massive_record 0.1.1 → 0.2.0.beta

Sign up to get free protection for your applications and to get access to all the features.
Files changed (83) hide show
  1. data/CHANGELOG.md +28 -5
  2. data/Gemfile.lock +12 -12
  3. data/README.md +29 -1
  4. data/lib/massive_record/adapters/initialize.rb +18 -0
  5. data/lib/massive_record/adapters/thrift/adapter.rb +25 -0
  6. data/lib/massive_record/adapters/thrift/column_family.rb +24 -0
  7. data/lib/massive_record/adapters/thrift/connection.rb +73 -0
  8. data/lib/massive_record/{thrift → adapters/thrift/hbase}/hbase.rb +0 -0
  9. data/lib/massive_record/{thrift → adapters/thrift/hbase}/hbase_constants.rb +0 -0
  10. data/lib/massive_record/{thrift → adapters/thrift/hbase}/hbase_types.rb +0 -0
  11. data/lib/massive_record/adapters/thrift/row.rb +150 -0
  12. data/lib/massive_record/adapters/thrift/scanner.rb +59 -0
  13. data/lib/massive_record/adapters/thrift/table.rb +169 -0
  14. data/lib/massive_record/orm/attribute_methods/read.rb +2 -1
  15. data/lib/massive_record/orm/base.rb +61 -3
  16. data/lib/massive_record/orm/coders/chained.rb +71 -0
  17. data/lib/massive_record/orm/coders/json.rb +17 -0
  18. data/lib/massive_record/orm/coders/yaml.rb +15 -0
  19. data/lib/massive_record/orm/coders.rb +3 -0
  20. data/lib/massive_record/orm/errors.rb +15 -2
  21. data/lib/massive_record/orm/finders/scope.rb +166 -0
  22. data/lib/massive_record/orm/finders.rb +45 -24
  23. data/lib/massive_record/orm/persistence.rb +4 -4
  24. data/lib/massive_record/orm/relations/interface.rb +170 -0
  25. data/lib/massive_record/orm/relations/metadata.rb +150 -0
  26. data/lib/massive_record/orm/relations/proxy/references_many.rb +229 -0
  27. data/lib/massive_record/orm/relations/proxy/references_one.rb +40 -0
  28. data/lib/massive_record/orm/relations/proxy/references_one_polymorphic.rb +49 -0
  29. data/lib/massive_record/orm/relations/proxy.rb +174 -0
  30. data/lib/massive_record/orm/relations.rb +6 -0
  31. data/lib/massive_record/orm/schema/column_interface.rb +1 -1
  32. data/lib/massive_record/orm/schema/field.rb +62 -27
  33. data/lib/massive_record/orm/single_table_inheritance.rb +21 -0
  34. data/lib/massive_record/version.rb +1 -1
  35. data/lib/massive_record/wrapper/adapter.rb +6 -0
  36. data/lib/massive_record/wrapper/base.rb +6 -7
  37. data/lib/massive_record/wrapper/cell.rb +9 -32
  38. data/lib/massive_record/wrapper/column_families_collection.rb +2 -2
  39. data/lib/massive_record/wrapper/errors.rb +10 -0
  40. data/lib/massive_record/wrapper/tables_collection.rb +1 -1
  41. data/lib/massive_record.rb +5 -12
  42. data/spec/orm/cases/attribute_methods_spec.rb +5 -1
  43. data/spec/orm/cases/base_spec.rb +77 -4
  44. data/spec/orm/cases/column_spec.rb +1 -1
  45. data/spec/orm/cases/finder_default_scope.rb +53 -0
  46. data/spec/orm/cases/finder_scope_spec.rb +288 -0
  47. data/spec/orm/cases/finders_spec.rb +56 -13
  48. data/spec/orm/cases/persistence_spec.rb +20 -5
  49. data/spec/orm/cases/single_table_inheritance_spec.rb +26 -0
  50. data/spec/orm/cases/table_spec.rb +1 -1
  51. data/spec/orm/cases/timestamps_spec.rb +16 -16
  52. data/spec/orm/coders/chained_spec.rb +73 -0
  53. data/spec/orm/coders/json_spec.rb +6 -0
  54. data/spec/orm/coders/yaml_spec.rb +6 -0
  55. data/spec/orm/models/best_friend.rb +7 -0
  56. data/spec/orm/models/friend.rb +4 -0
  57. data/spec/orm/models/person.rb +20 -6
  58. data/spec/orm/models/{person_with_timestamps.rb → person_with_timestamp.rb} +1 -1
  59. data/spec/orm/models/test_class.rb +3 -0
  60. data/spec/orm/relations/interface_spec.rb +207 -0
  61. data/spec/orm/relations/metadata_spec.rb +202 -0
  62. data/spec/orm/relations/proxy/references_many_spec.rb +624 -0
  63. data/spec/orm/relations/proxy/references_one_polymorphic_spec.rb +106 -0
  64. data/spec/orm/relations/proxy/references_one_spec.rb +111 -0
  65. data/spec/orm/relations/proxy_spec.rb +13 -0
  66. data/spec/orm/schema/field_spec.rb +101 -2
  67. data/spec/shared/orm/coders/an_orm_coder.rb +14 -0
  68. data/spec/shared/orm/relations/proxy.rb +154 -0
  69. data/spec/shared/orm/relations/singular_proxy.rb +68 -0
  70. data/spec/spec_helper.rb +1 -0
  71. data/spec/thrift/cases/encoding_spec.rb +28 -7
  72. data/spec/wrapper/cases/adapter_spec.rb +9 -0
  73. data/spec/wrapper/cases/connection_spec.rb +13 -10
  74. data/spec/wrapper/cases/table_spec.rb +85 -85
  75. metadata +74 -22
  76. data/TODO.md +0 -8
  77. data/lib/massive_record/exceptions.rb +0 -11
  78. data/lib/massive_record/wrapper/column_family.rb +0 -22
  79. data/lib/massive_record/wrapper/connection.rb +0 -71
  80. data/lib/massive_record/wrapper/row.rb +0 -173
  81. data/lib/massive_record/wrapper/scanner.rb +0 -61
  82. data/lib/massive_record/wrapper/table.rb +0 -149
  83. data/spec/orm/cases/hbase/connection_spec.rb +0 -13
data/TODO.md DELETED
@@ -1,8 +0,0 @@
1
- # TODO
2
-
3
- * code documentation
4
- * Rails ORM (ActiveModel etc.)
5
- * write tests
6
- * add generators for hbase.yml etc.
7
- * add migration tools
8
- * ...
@@ -1,11 +0,0 @@
1
- module MassiveRecord
2
-
3
- class ConnectionException < Exception
4
-
5
- def initialize
6
-
7
- end
8
-
9
- end
10
-
11
- end
@@ -1,22 +0,0 @@
1
- module MassiveRecord
2
- module Wrapper
3
- class ColumnFamily
4
-
5
- attr_accessor :name, :max_versions, :columns
6
-
7
- def initialize(column_name, opts = {})
8
- @name = column_name
9
- @max_versions = opts[:max_versions] || 10
10
- @columns = opts[:columns] || []
11
- end
12
-
13
- def descriptor
14
- Apache::Hadoop::Hbase::Thrift::ColumnDescriptor.new do |col|
15
- col.name = "#{name}:"
16
- col.maxVersions = max_versions
17
- end
18
- end
19
-
20
- end
21
- end
22
- end
@@ -1,71 +0,0 @@
1
- module MassiveRecord
2
- module Wrapper
3
- class Connection
4
-
5
- attr_accessor :host, :port, :timeout
6
-
7
- def initialize(opts = {})
8
- @timeout = 4000
9
- @host = opts[:host]
10
- @port = opts[:port] || 9090
11
- end
12
-
13
- def transport
14
- @transport ||= Thrift::BufferedTransport.new(Thrift::Socket.new(@host, @port, @timeout))
15
- end
16
-
17
- def open
18
- protocol = Thrift::BinaryProtocol.new(transport)
19
- @client = Apache::Hadoop::Hbase::Thrift::Hbase::Client.new(protocol)
20
-
21
- begin
22
- transport.open()
23
- true
24
- rescue
25
- raise MassiveRecord::ConnectionException.new, "Unable to connect to HBase on #{@host}, port #{@port}"
26
- end
27
- end
28
-
29
- def close
30
- @transport.close.nil?
31
- end
32
-
33
- def client
34
- @client
35
- end
36
-
37
- def active?
38
- @transport.open?
39
- end
40
-
41
- def tables
42
- collection = TablesCollection.new
43
- collection.connection = self
44
- getTableNames().each{|table_name| collection.push(table_name)}
45
- collection
46
- end
47
-
48
- def load_table(table_name)
49
- MassiveRecord::Wrapper::Table.new(self, table_name)
50
- end
51
-
52
- # Wrapp HBase API to be able to catch errors and try reconnect
53
- def method_missing(method, *args)
54
- begin
55
- open if not @client
56
- client.send(method, *args) if @client
57
- rescue IOError
58
- @client = nil
59
- open
60
- client.send(method, *args) if @client
61
- rescue Thrift::TransportException
62
- @transport = nil
63
- @client = nil
64
- open
65
- client.send(method, *args) if @client
66
- end
67
- end
68
-
69
- end
70
- end
71
- end
@@ -1,173 +0,0 @@
1
- require 'json'
2
-
3
- module MassiveRecord
4
- module Wrapper
5
- class Row
6
-
7
- attr_accessor :id, :column_families, :columns, :new_record, :table
8
-
9
- def initialize(opts = {})
10
- @id = opts[:id]
11
- self.values = opts[:values] || {}
12
- @table = opts[:table]
13
- @column_families = opts[:column_families] || []
14
- @columns = opts[:columns] || {}
15
- @new_record = true
16
- end
17
-
18
- def column_names
19
- columns.keys
20
- end
21
-
22
- def fetch_all_column_families
23
- @table.fetch_column_family
24
- fetch_column_families(@table.column_family_names)
25
- end
26
-
27
- def fetch_column_families(list)
28
- @column_families = table.column_families.collect do |column_name, description|
29
- ColumnFamily.new(column_name.split(":").first, {
30
- :row => self,
31
- :name => description.name,
32
- :max_versions => description.maxVersions,
33
- :compression => description.compression,
34
- :in_memory => description.inMemory
35
- # bloomFilterType, bloomFilterVectorSize, bloomFilterNbHashes, blockCacheEnabled, timeToLive
36
- })
37
- end
38
- end
39
-
40
- # = Parse columns / cells and create a Hash from them
41
- def values
42
- @columns.inject({"id" => id}) {|h, (column_name, cell)| h[column_name] = cell.deserialize_value; h}
43
- end
44
-
45
- def values=(data)
46
- @values = {}
47
- update_columns(data)
48
- end
49
-
50
- def update_columns(data = {})
51
- data.each do |column_family_name, columns|
52
- columns.each do |column_name, values|
53
- update_column(column_family_name, column_name, values)
54
- end
55
- end
56
- end
57
-
58
- def update_column(column_family_name, column_name, value)
59
- column = "#{column_family_name}:#{column_name}"
60
-
61
- if @columns[column].nil?
62
- @columns[column] = Cell.new({ :value => Cell.serialize_value(value), :created_at => Time.now })
63
- else
64
- @columns[column].serialize_value(value)
65
- end
66
- end
67
-
68
- # = Merge column values with new data : it implies that column values is a JSON encoded string
69
- def merge_columns(data)
70
- data.each do |column_family_name, columns|
71
- columns.each do |column_name, values|
72
- if values.is_a?(Hash)
73
- unless @columns["#{column_family_name}:#{column_name}"].nil?
74
- column_value = @columns["#{column_family_name}:#{column_name}"].deserialize_value.merge(values)
75
- else
76
- column_value = values
77
- end
78
- elsif values.is_a?(Array)
79
- unless @columns["#{column_family_name}:#{column_name}"].nil?
80
- column_value = @columns["#{column_family_name}:#{column_name}"].deserialize_value | values
81
- else
82
- column_value = values
83
- end
84
- else
85
- column_value = values
86
- end
87
- update_column(column_family_name, column_name, column_value)
88
- end
89
- end
90
- end
91
-
92
- # = Parse columns cells and save them
93
- def save
94
- mutations = []
95
-
96
- @columns.each do |column_name, cell|
97
- m = Apache::Hadoop::Hbase::Thrift::Mutation.new
98
- m.column = column_name
99
- m.value = cell.serialized_value
100
-
101
- mutations.push(m)
102
- end
103
-
104
- @table.client.mutateRow(@table.name, id.to_s, mutations).nil?
105
- end
106
-
107
-
108
- #
109
- # FIXME
110
- #
111
- # The thrift wrapper is only working with strings as far as I can see,
112
- # and the atomicIncrement call on strings kinda doesn't make sense on strings
113
- #
114
- # For now I'll implement this without atomicIncrement, to get the behaviour we want.
115
- # Guess this in time will either be fixed or raised an not-supported-error. If the
116
- # latter is the case I guess we'll need to shift over to a jruby adapter and use the
117
- # java api instead of thrift.
118
- #
119
- def atomic_increment(column_name, by = 1)
120
- # @table.client.atomicIncrement(@table.name, id.to_s, column_name, by)
121
- value_to_increment = @columns[column_name.to_s].value
122
-
123
- raise "Value to increment (#{value_to_increment}) doesnt seem to be a number!" unless value_to_increment =~ /^\d+$/
124
- raise "Argument by must be an integer" unless by.is_a? Fixnum
125
-
126
- value_to_increment = value_to_increment.to_i
127
- value_to_increment += by
128
- value_to_increment = value_to_increment.to_s
129
-
130
- mutation = Apache::Hadoop::Hbase::Thrift::Mutation.new
131
- mutation.column = column_name
132
- mutation.value = value_to_increment
133
-
134
- if @table.client.mutateRow(@table.name, id.to_s, [mutation]).nil?
135
- value_to_increment
136
- end
137
- end
138
-
139
- def self.populate_from_trow_result(result, connection, table_name, column_families = [])
140
- row = self.new
141
- row.id = result.row
142
- row.new_record = false
143
- row.table = Table.new(connection, table_name)
144
- row.column_families = column_families
145
-
146
- result.columns.each do |name, value|
147
- row.columns[name] = Cell.new({
148
- :value => value.value,
149
- :created_at => Time.at(value.timestamp / 1000, (value.timestamp % 1000) * 1000)
150
- })
151
- end
152
-
153
- row
154
- end
155
-
156
- def destroy
157
- @table.client.deleteAllRow(@table.name, @id).nil?
158
- end
159
-
160
- def new_record?
161
- @new_record
162
- end
163
-
164
- def prev
165
- self
166
- end
167
-
168
- def updated_at
169
- columns.values.collect(&:created_at).max
170
- end
171
- end
172
- end
173
- end
@@ -1,61 +0,0 @@
1
- module MassiveRecord
2
- module Wrapper
3
- class Scanner
4
-
5
- attr_accessor :connection, :table_name, :column_family_names, :opened_scanner
6
- attr_accessor :start_key, :offset_key, :created_at, :limit
7
- attr_accessor :formatted_column_family_names, :column_family_names
8
-
9
- def initialize(connection, table_name, column_family_names, opts = {})
10
- @connection = connection
11
- @table_name = table_name
12
- @column_family_names = column_family_names.collect{|n| n.split(":").first}
13
- @column_family_names = opts[:columns] unless opts[:columns].nil?
14
- @formatted_column_family_names = column_family_names.collect{|n| "#{n.split(":").first}:"}
15
- @start_key = opts[:start_key].to_s
16
- @offset_key = opts[:offset_key].to_s
17
- @created_at = opts[:created_at].to_s
18
- @limit = opts[:limit] || 10
19
- end
20
-
21
- def key
22
- start_key.empty? ? offset_key : start_key
23
- end
24
-
25
- def open
26
- if created_at.empty?
27
- self.opened_scanner = connection.scannerOpen(table_name, key, formatted_column_family_names)
28
- else
29
- self.opened_scanner = connection.scannerOpenTs(table_name, key, formatted_column_family_names, created_at)
30
- end
31
- end
32
-
33
- def close
34
- connection.scannerClose(opened_scanner)
35
- end
36
-
37
- def fetch_trows(opts = {})
38
- connection.scannerGetList(opened_scanner, limit)
39
- end
40
-
41
- def fetch_rows(opts = {})
42
- populate_rows(fetch_trows(opts))
43
- end
44
-
45
- def populate_rows(results)
46
- results.collect do |result|
47
- if offset_key.empty?
48
- populate_row(result) unless result.row.match(/^#{start_key}/).nil?
49
- else
50
- populate_row(result)
51
- end
52
- end.select{|r| !r.nil?}
53
- end
54
-
55
- def populate_row(result)
56
- Row.populate_from_trow_result(result, connection, table_name, column_family_names)
57
- end
58
-
59
- end
60
- end
61
- end
@@ -1,149 +0,0 @@
1
- module MassiveRecord
2
- module Wrapper
3
- class Table
4
-
5
- attr_accessor :connection, :name, :column_families
6
-
7
- def initialize(connection, table_name)
8
- @connection = connection
9
- @name = table_name.to_s
10
- init_column_families
11
- end
12
-
13
- def init_column_families
14
- @column_families = ColumnFamiliesCollection.new
15
- @column_families.table = self
16
- end
17
-
18
- def self.create(connection, table_name, column_families = [])
19
- table = self.new(connection, table_name)
20
- table.column_families = column_families
21
- table.save
22
- end
23
-
24
- def save
25
- begin
26
- client.createTable(name, @column_families.collect{|cf| cf.descriptor}).nil?
27
- rescue Apache::Hadoop::Hbase::Thrift::AlreadyExists => ex
28
- "The table already exists."
29
- rescue => ex
30
- raise ex
31
- end
32
- end
33
-
34
- def client
35
- connection
36
- end
37
-
38
- def disable
39
- client.disableTable(name).nil?
40
- end
41
-
42
- def destroy
43
- disable
44
- client.deleteTable(name).nil?
45
- end
46
-
47
- def create_column_families(column_family_names)
48
- column_family_names.each{|name| @column_families.push(ColumnFamily.new(name))}
49
- end
50
-
51
- def fetch_column_families
52
- @column_families.clear
53
- client.getColumnDescriptors(name).each do |column_name, description|
54
- @column_families.push(ColumnFamily.new(column_name.split(":").first))
55
- end
56
- @column_families
57
- end
58
-
59
- def column_family_names
60
- @column_families.collect{|column_family| column_family.name.to_s}
61
- end
62
-
63
- def fetch_column_family_names
64
- fetch_column_families
65
- column_family_names
66
- end
67
-
68
- def column_names
69
- first.column_names
70
- end
71
-
72
- def scanner(opts = {})
73
- scanner = Scanner.new(connection, name, column_family_names, format_options_for_scanner(opts))
74
-
75
- if block_given?
76
- begin
77
- scanner.open
78
- yield scanner
79
- ensure
80
- scanner.close
81
- end
82
- else
83
- scanner
84
- end
85
- end
86
-
87
- def format_options_for_scanner(opts = {})
88
- {
89
- :start_key => opts[:start],
90
- :offset_key => opts[:offset],
91
- :created_at => opts[:created_at],
92
- :columns => opts[:select], # list of column families to fetch from hbase
93
- :limit => opts[:limit] || opts[:batch_size]
94
- }
95
- end
96
-
97
- def all(opts = {})
98
- scanner(opts) do |s|
99
- s.fetch_rows(opts)
100
- end
101
- end
102
-
103
- def first(opts = {})
104
- all(opts.merge(:limit => 1)).first
105
- end
106
-
107
- def find(*args)
108
- arg = args[0]
109
- opts = args[1] || {}
110
- arg.is_a?(Array) ? arg.collect{|id| first(opts.merge(:start => id))} : first(opts.merge(:start => arg))
111
- end
112
-
113
- def find_in_batches(opts = {})
114
- results_limit = opts.delete(:limit)
115
- results_found = 0
116
-
117
- scanner(opts) do |s|
118
- while (true) do
119
- s.limit = results_limit - results_found if !results_limit.nil? && results_limit <= results_found + s.limit
120
- rows = s.fetch_rows
121
- if rows.empty?
122
- break
123
- else
124
- results_found += rows.size
125
- yield rows
126
- end
127
- end
128
- end
129
- end
130
-
131
- def exists?
132
- connection.tables.include?(name)
133
- end
134
-
135
- def regions
136
- connection.getTableRegions(name).collect do |r|
137
- {
138
- :start_key => r.startKey,
139
- :end_key => r.endKey,
140
- :id => r.id,
141
- :name => r.name,
142
- :version => r.version
143
- }
144
- end
145
- end
146
-
147
- end
148
- end
149
- end