massive_record 0.1.1 → 0.2.0.beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. data/CHANGELOG.md +28 -5
  2. data/Gemfile.lock +12 -12
  3. data/README.md +29 -1
  4. data/lib/massive_record/adapters/initialize.rb +18 -0
  5. data/lib/massive_record/adapters/thrift/adapter.rb +25 -0
  6. data/lib/massive_record/adapters/thrift/column_family.rb +24 -0
  7. data/lib/massive_record/adapters/thrift/connection.rb +73 -0
  8. data/lib/massive_record/{thrift → adapters/thrift/hbase}/hbase.rb +0 -0
  9. data/lib/massive_record/{thrift → adapters/thrift/hbase}/hbase_constants.rb +0 -0
  10. data/lib/massive_record/{thrift → adapters/thrift/hbase}/hbase_types.rb +0 -0
  11. data/lib/massive_record/adapters/thrift/row.rb +150 -0
  12. data/lib/massive_record/adapters/thrift/scanner.rb +59 -0
  13. data/lib/massive_record/adapters/thrift/table.rb +169 -0
  14. data/lib/massive_record/orm/attribute_methods/read.rb +2 -1
  15. data/lib/massive_record/orm/base.rb +61 -3
  16. data/lib/massive_record/orm/coders/chained.rb +71 -0
  17. data/lib/massive_record/orm/coders/json.rb +17 -0
  18. data/lib/massive_record/orm/coders/yaml.rb +15 -0
  19. data/lib/massive_record/orm/coders.rb +3 -0
  20. data/lib/massive_record/orm/errors.rb +15 -2
  21. data/lib/massive_record/orm/finders/scope.rb +166 -0
  22. data/lib/massive_record/orm/finders.rb +45 -24
  23. data/lib/massive_record/orm/persistence.rb +4 -4
  24. data/lib/massive_record/orm/relations/interface.rb +170 -0
  25. data/lib/massive_record/orm/relations/metadata.rb +150 -0
  26. data/lib/massive_record/orm/relations/proxy/references_many.rb +229 -0
  27. data/lib/massive_record/orm/relations/proxy/references_one.rb +40 -0
  28. data/lib/massive_record/orm/relations/proxy/references_one_polymorphic.rb +49 -0
  29. data/lib/massive_record/orm/relations/proxy.rb +174 -0
  30. data/lib/massive_record/orm/relations.rb +6 -0
  31. data/lib/massive_record/orm/schema/column_interface.rb +1 -1
  32. data/lib/massive_record/orm/schema/field.rb +62 -27
  33. data/lib/massive_record/orm/single_table_inheritance.rb +21 -0
  34. data/lib/massive_record/version.rb +1 -1
  35. data/lib/massive_record/wrapper/adapter.rb +6 -0
  36. data/lib/massive_record/wrapper/base.rb +6 -7
  37. data/lib/massive_record/wrapper/cell.rb +9 -32
  38. data/lib/massive_record/wrapper/column_families_collection.rb +2 -2
  39. data/lib/massive_record/wrapper/errors.rb +10 -0
  40. data/lib/massive_record/wrapper/tables_collection.rb +1 -1
  41. data/lib/massive_record.rb +5 -12
  42. data/spec/orm/cases/attribute_methods_spec.rb +5 -1
  43. data/spec/orm/cases/base_spec.rb +77 -4
  44. data/spec/orm/cases/column_spec.rb +1 -1
  45. data/spec/orm/cases/finder_default_scope.rb +53 -0
  46. data/spec/orm/cases/finder_scope_spec.rb +288 -0
  47. data/spec/orm/cases/finders_spec.rb +56 -13
  48. data/spec/orm/cases/persistence_spec.rb +20 -5
  49. data/spec/orm/cases/single_table_inheritance_spec.rb +26 -0
  50. data/spec/orm/cases/table_spec.rb +1 -1
  51. data/spec/orm/cases/timestamps_spec.rb +16 -16
  52. data/spec/orm/coders/chained_spec.rb +73 -0
  53. data/spec/orm/coders/json_spec.rb +6 -0
  54. data/spec/orm/coders/yaml_spec.rb +6 -0
  55. data/spec/orm/models/best_friend.rb +7 -0
  56. data/spec/orm/models/friend.rb +4 -0
  57. data/spec/orm/models/person.rb +20 -6
  58. data/spec/orm/models/{person_with_timestamps.rb → person_with_timestamp.rb} +1 -1
  59. data/spec/orm/models/test_class.rb +3 -0
  60. data/spec/orm/relations/interface_spec.rb +207 -0
  61. data/spec/orm/relations/metadata_spec.rb +202 -0
  62. data/spec/orm/relations/proxy/references_many_spec.rb +624 -0
  63. data/spec/orm/relations/proxy/references_one_polymorphic_spec.rb +106 -0
  64. data/spec/orm/relations/proxy/references_one_spec.rb +111 -0
  65. data/spec/orm/relations/proxy_spec.rb +13 -0
  66. data/spec/orm/schema/field_spec.rb +101 -2
  67. data/spec/shared/orm/coders/an_orm_coder.rb +14 -0
  68. data/spec/shared/orm/relations/proxy.rb +154 -0
  69. data/spec/shared/orm/relations/singular_proxy.rb +68 -0
  70. data/spec/spec_helper.rb +1 -0
  71. data/spec/thrift/cases/encoding_spec.rb +28 -7
  72. data/spec/wrapper/cases/adapter_spec.rb +9 -0
  73. data/spec/wrapper/cases/connection_spec.rb +13 -10
  74. data/spec/wrapper/cases/table_spec.rb +85 -85
  75. metadata +74 -22
  76. data/TODO.md +0 -8
  77. data/lib/massive_record/exceptions.rb +0 -11
  78. data/lib/massive_record/wrapper/column_family.rb +0 -22
  79. data/lib/massive_record/wrapper/connection.rb +0 -71
  80. data/lib/massive_record/wrapper/row.rb +0 -173
  81. data/lib/massive_record/wrapper/scanner.rb +0 -61
  82. data/lib/massive_record/wrapper/table.rb +0 -149
  83. data/spec/orm/cases/hbase/connection_spec.rb +0 -13
data/CHANGELOG.md CHANGED
@@ -1,10 +1,34 @@
1
1
  # v0.2.0 (git develop)
2
2
 
3
3
 
4
- # v0.1.2 (git master)
5
-
6
-
7
-
4
+ # v0.2.0.beta (git master)
5
+
6
+ - ORM will now take care of serialize and de-serialize of attributes like arrays, hashes etc. It is doing so
7
+ based on the type of your fields. You can select either JSON or YAML serialization for your data. As a default it
8
+ will use JSON. You can also, by chaining multiple coders together add support for multiple serialization types
9
+ when reading data from the database.
10
+ - Thrift-adapter will no longer auto-serialize objects likes hashes and arrays. Its vlaues must now be strings, and it
11
+ will only take care of encoding/decoding it to and from what Thrift expects (binary encoding).
12
+ - Compare Person === proxy_targeting_a_person will now be true. Makes case-when-constructions doable.
13
+ - Single table inheritance is supported. By default you can have an attribute called type to give you support for it in a table.
14
+ - A default_scope is possible to set on classes. For instance: Calling default_scope select(:only_this_column_family)
15
+ inside of a class will execute finder operations with this as default scope. If you need to fetch records of class
16
+ without your preset default scope you can use Model.unscoped.
17
+ - We now have some ActiveRecord like chaining of method calls when we do find-operations. Like Person.select(:column_family).limit(2)
18
+ is the same as Person.all(:select => ['column_family', :limit => 2])
19
+ - references_many has first() and limit() which uses the target array if loaded, or load only what it needs from the database.
20
+ - Wrapper::Thrift has been moved into Adapter::Thrift. Adding more adapters should be not that hard now.
21
+ - References many is now possible. We have to strategies: Store an array of foreign keys in the proxy_owner,
22
+ or supply a ids-starts-with and open up a scanner and read from that point.
23
+ - Setting a non-parsable value on date/time field will no longer raise an error.
24
+ - Scanner no longer fetches with a limit of 10 by default. It is set to 100000000.
25
+ - References one relations support polymorphic relations.
26
+ - Simple implementation of references_one relation. This is where you have a foreign key you will look up in a different table.
27
+
28
+
29
+ # v0.1.2
30
+ - Fixed, or at least made better, the is_yaml? method in Wrapper::Cell.This functionality of serialize/de-serialize
31
+ should be moved up into the ORM asap, but for now a hot fix has been applied.
8
32
 
9
33
 
10
34
  # v0.1.1
@@ -18,7 +42,6 @@
18
42
  - Bugfix: Database cleaner no longer tries to remove tables with same name twice.
19
43
 
20
44
 
21
-
22
45
  # v0.1.0
23
46
 
24
47
  - Communication with Hbase via Thrift.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- massive_record (0.1.1)
4
+ massive_record (0.2.0.beta)
5
5
  activemodel
6
6
  activesupport
7
7
  thrift (>= 0.5.0)
@@ -9,23 +9,23 @@ PATH
9
9
  GEM
10
10
  remote: http://rubygems.org/
11
11
  specs:
12
- activemodel (3.0.3)
13
- activesupport (= 3.0.3)
12
+ activemodel (3.0.5)
13
+ activesupport (= 3.0.5)
14
14
  builder (~> 2.1.2)
15
15
  i18n (~> 0.4)
16
- activesupport (3.0.3)
16
+ activesupport (3.0.5)
17
17
  builder (2.1.2)
18
18
  diff-lcs (1.1.2)
19
19
  i18n (0.5.0)
20
- rspec (2.4.0)
21
- rspec-core (~> 2.4.0)
22
- rspec-expectations (~> 2.4.0)
23
- rspec-mocks (~> 2.4.0)
24
- rspec-core (2.4.0)
25
- rspec-expectations (2.4.0)
20
+ rspec (2.5.0)
21
+ rspec-core (~> 2.5.0)
22
+ rspec-expectations (~> 2.5.0)
23
+ rspec-mocks (~> 2.5.0)
24
+ rspec-core (2.5.1)
25
+ rspec-expectations (2.5.0)
26
26
  diff-lcs (~> 1.1.2)
27
- rspec-mocks (2.4.0)
28
- thrift (0.5.0)
27
+ rspec-mocks (2.5.0)
28
+ thrift (0.6.0)
29
29
 
30
30
  PLATFORMS
31
31
  ruby
data/README.md CHANGED
@@ -54,6 +54,8 @@ Both MassiveRecord::ORM::Table and MassiveRecord::ORM::Column do now have some f
54
54
  - Casting of attributes
55
55
  - Serialization of array / hashes
56
56
  - Timestamps like created_at and updated_at. Updated at will always be available, created_at must be defined. See example down:
57
+ - Finder scopes. Like: Person.select(:only_columns_from_this_family).limit(10).collect(&:name)
58
+ - Ability to set a default scope.
57
59
 
58
60
  Tables also have:
59
61
  - Persistencey method calls like create, save and destroy (but they do not actually save things to hbase)
@@ -63,11 +65,19 @@ Tables also have:
63
65
  - Save / update methods
64
66
  - Auto-creation of table and column families on save if table does not exists.
65
67
  - Destroy records
68
+ - Relations: See MassiveRecord::ORM::Relations::Interface ClassMethods for documentation
66
69
 
67
70
 
68
71
  Here is an example of usage, both for Table and Column:
69
72
 
70
73
  class Person < MassiveRecord::ORM::Table
74
+ references_one :boss, :class_name => "Person", :store_in => :info
75
+ references_one :attachment, :polymorphic => true
76
+ references_many :friends, :store_in => :info
77
+ references_many :cars, :records_starts_from => :cars_start_id
78
+
79
+ default_scope select(:info)
80
+
71
81
  column_family :info do
72
82
  field :name
73
83
  field :email
@@ -75,12 +85,29 @@ Here is an example of usage, both for Table and Column:
75
85
  field :points, :integer, :default => 0
76
86
  field :date_of_birth, :date
77
87
  field :newsletter, :boolean, :default => false
88
+ field :type # Used for single table inheritance
78
89
 
79
90
  timestamps # ..or field :created_at, :time
80
91
  end
81
92
 
93
+ column_family :misc do
94
+ field :with_a_lot_of_uninteresting_data
95
+ end
96
+
97
+
82
98
  validates_presence_of :name, :email
83
99
  validates_format_of :email, :with => /\A([^@\s]+)@((?:[-a-z0-9]+\.)+[a-z]{2,})\Z/i
100
+
101
+ # Returns the id the scanner should start from in the Car table
102
+ # to fetch cars related to this person
103
+ def cars_start_id
104
+ id+'-'
105
+ end
106
+ end
107
+
108
+ class Friend < Person
109
+ # This one will be stored in Person's table with it's type set to Friend.
110
+ # Calling Person.all will return object back as a Friend.
84
111
  end
85
112
 
86
113
 
@@ -148,7 +175,8 @@ You can, if you'd like, work directly against the adapter.
148
175
  ## Planned work
149
176
 
150
177
  - Rename Wrapper to Adapter, and make it easy to switch from Thrift to another way of communicating with Hbase.
151
- - Associations and embedded objects.
178
+ - Embedded objects.
179
+ - Cache the decoded values of attributes, not use the value_is_already_decoded?. This will fix possible problem with YAML as coder backend.
152
180
  - Implement other Adapters, for instance using jruby and the Java API.
153
181
 
154
182
 
@@ -0,0 +1,18 @@
1
+ module MassiveRecord
2
+ def self.adapter=(name)
3
+ @adapter = name
4
+ end
5
+
6
+ def self.adapter
7
+ @adapter
8
+ end
9
+ end
10
+
11
+ # Default adapter is set to thrift
12
+ MassiveRecord.adapter = :thrift
13
+
14
+ # Check the adapter is valid
15
+ raise "The adapter can only be 'thrift'." unless [:thrift].include?(MassiveRecord.adapter)
16
+
17
+ # Load specific adapter
18
+ require "massive_record/adapters/#{MassiveRecord.adapter}/adapter"
@@ -0,0 +1,25 @@
1
+ module MassiveRecord
2
+ module Adapters
3
+ module Thrift
4
+ end
5
+ end
6
+ end
7
+
8
+ ADAPTER = MassiveRecord::Adapters::Thrift
9
+
10
+ # Thrift Gems
11
+ require 'thrift'
12
+ require 'thrift/transport/socket'
13
+ require 'thrift/protocol/binary_protocol'
14
+
15
+ # Generated Ruby classes from Thrift for HBase
16
+ require 'massive_record/adapters/thrift/hbase/hbase_constants'
17
+ require 'massive_record/adapters/thrift/hbase/hbase_types'
18
+ require 'massive_record/adapters/thrift/hbase/hbase'
19
+
20
+ # Adapter
21
+ require 'massive_record/adapters/thrift/column_family'
22
+ require 'massive_record/adapters/thrift/connection'
23
+ require 'massive_record/adapters/thrift/row'
24
+ require 'massive_record/adapters/thrift/scanner'
25
+ require 'massive_record/adapters/thrift/table'
@@ -0,0 +1,24 @@
1
+ module MassiveRecord
2
+ module Adapters
3
+ module Thrift
4
+ class ColumnFamily
5
+
6
+ attr_accessor :name, :max_versions, :columns
7
+
8
+ def initialize(column_name, opts = {})
9
+ @name = column_name
10
+ @max_versions = opts[:max_versions] || 10
11
+ @columns = opts[:columns] || []
12
+ end
13
+
14
+ def descriptor
15
+ Apache::Hadoop::Hbase::Thrift::ColumnDescriptor.new do |col|
16
+ col.name = "#{name}:"
17
+ col.maxVersions = max_versions
18
+ end
19
+ end
20
+
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,73 @@
1
+ module MassiveRecord
2
+ module Adapters
3
+ module Thrift
4
+ class Connection
5
+
6
+ attr_accessor :host, :port, :timeout
7
+
8
+ def initialize(opts = {})
9
+ @timeout = 4000
10
+ @host = opts[:host]
11
+ @port = opts[:port] || 9090
12
+ end
13
+
14
+ def transport
15
+ @transport ||= ::Thrift::BufferedTransport.new(::Thrift::Socket.new(@host, @port, @timeout))
16
+ end
17
+
18
+ def open
19
+ protocol = ::Thrift::BinaryProtocol.new(transport)
20
+ @client = Apache::Hadoop::Hbase::Thrift::Hbase::Client.new(protocol)
21
+
22
+ begin
23
+ transport.open()
24
+ true
25
+ rescue
26
+ raise MassiveRecord::Wrapper::Errors::ConnectionException.new, "Unable to connect to HBase on #{@host}, port #{@port}"
27
+ end
28
+ end
29
+
30
+ def close
31
+ @transport.close.nil?
32
+ end
33
+
34
+ def client
35
+ @client
36
+ end
37
+
38
+ def open?
39
+ @transport.try("open?")
40
+ end
41
+
42
+ def tables
43
+ collection = MassiveRecord::Wrapper::TablesCollection.new
44
+ collection.connection = self
45
+ getTableNames().each{|table_name| collection.push(table_name)}
46
+ collection
47
+ end
48
+
49
+ def load_table(table_name)
50
+ MassiveRecord::Wrapper::Table.new(self, table_name)
51
+ end
52
+
53
+ # Wrapp HBase API to be able to catch errors and try reconnect
54
+ def method_missing(method, *args)
55
+ begin
56
+ open if not @client
57
+ client.send(method, *args) if @client
58
+ rescue IOError
59
+ @client = nil
60
+ open
61
+ client.send(method, *args) if @client
62
+ rescue ::Thrift::TransportException
63
+ @transport = nil
64
+ @client = nil
65
+ open
66
+ client.send(method, *args) if @client
67
+ end
68
+ end
69
+
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,150 @@
1
+ module MassiveRecord
2
+ module Adapters
3
+ module Thrift
4
+ class Row
5
+
6
+ attr_accessor :id, :column_families, :columns, :new_record, :table
7
+
8
+ def initialize(opts = {})
9
+ @id = opts[:id]
10
+ self.values = opts[:values] || {}
11
+ @table = opts[:table]
12
+ @column_families = opts[:column_families] || []
13
+ @columns = opts[:columns] || {}
14
+ @new_record = true
15
+ end
16
+
17
+ def column_names
18
+ columns.keys
19
+ end
20
+
21
+ def fetch_all_column_families
22
+ @table.fetch_column_family
23
+ fetch_column_families(@table.column_family_names)
24
+ end
25
+
26
+ def fetch_column_families(list)
27
+ @column_families = table.column_families.collect do |column_name, description|
28
+ MassiveRecord::Wrapper::ColumnFamily.new(column_name.split(":").first, {
29
+ :row => self,
30
+ :name => description.name,
31
+ :max_versions => description.maxVersions,
32
+ :compression => description.compression,
33
+ :in_memory => description.inMemory
34
+ # bloomFilterType, bloomFilterVectorSize, bloomFilterNbHashes, blockCacheEnabled, timeToLive
35
+ })
36
+ end
37
+ end
38
+
39
+ # = Parse columns / cells and create a Hash from them
40
+ def values
41
+ @columns.inject({"id" => id}) {|h, (column_name, cell)| h[column_name] = cell.value; h}
42
+ end
43
+
44
+ def values=(data)
45
+ @values = {}
46
+ update_columns(data)
47
+ end
48
+
49
+ def update_columns(data = {})
50
+ data.each do |column_family_name, columns|
51
+ columns.each do |column_name, values|
52
+ update_column(column_family_name, column_name, values)
53
+ end
54
+ end
55
+ end
56
+
57
+ def update_column(column_family_name, column_name, value)
58
+ column = "#{column_family_name}:#{column_name}"
59
+
60
+ if @columns[column].nil?
61
+ @columns[column] = MassiveRecord::Wrapper::Cell.new({:value => value, :created_at => Time.now})
62
+ else
63
+ @columns[column].value = value
64
+ end
65
+ end
66
+
67
+ # = Parse columns cells and save them
68
+ def save
69
+ mutations = []
70
+
71
+ @columns.each do |column_name, cell|
72
+ m = Apache::Hadoop::Hbase::Thrift::Mutation.new
73
+ m.column = column_name
74
+ m.value = cell.value_to_thrift
75
+
76
+ mutations.push(m)
77
+ end
78
+
79
+ @table.client.mutateRow(@table.name, id.to_s, mutations).nil?
80
+ end
81
+
82
+
83
+ #
84
+ # FIXME
85
+ #
86
+ # The thrift wrapper is only working with strings as far as I can see,
87
+ # and the atomicIncrement call on strings kinda doesn't make sense on strings
88
+ #
89
+ # For now I'll implement this without atomicIncrement, to get the behaviour we want.
90
+ # Guess this in time will either be fixed or raised an not-supported-error. If the
91
+ # latter is the case I guess we'll need to shift over to a jruby adapter and use the
92
+ # java api instead of thrift.
93
+ #
94
+ def atomic_increment(column_name, by = 1)
95
+ # @table.client.atomicIncrement(@table.name, id.to_s, column_name, by)
96
+ value_to_increment = @columns[column_name.to_s].value
97
+
98
+ raise "Value to increment (#{value_to_increment}) doesnt seem to be a number!" unless value_to_increment =~ /^\d+$/
99
+ raise "Argument by must be an integer" unless by.is_a? Fixnum
100
+
101
+ value_to_increment = value_to_increment.to_i
102
+ value_to_increment += by
103
+ value_to_increment = value_to_increment.to_s
104
+
105
+ mutation = Apache::Hadoop::Hbase::Thrift::Mutation.new
106
+ mutation.column = column_name
107
+ mutation.value = value_to_increment
108
+
109
+ if @table.client.mutateRow(@table.name, id.to_s, [mutation]).nil?
110
+ value_to_increment
111
+ end
112
+ end
113
+
114
+ def self.populate_from_trow_result(result, connection, table_name, column_families = [])
115
+ row = self.new
116
+ row.id = result.row
117
+ row.new_record = false
118
+ row.table = Table.new(connection, table_name)
119
+ row.column_families = column_families
120
+
121
+ result.columns.each do |name, value|
122
+ row.columns[name] = MassiveRecord::Wrapper::Cell.new({
123
+ :value => value.value,
124
+ :created_at => Time.at(value.timestamp / 1000, (value.timestamp % 1000) * 1000)
125
+ })
126
+ end
127
+
128
+ row
129
+ end
130
+
131
+ def destroy
132
+ @table.client.deleteAllRow(@table.name, @id).nil?
133
+ end
134
+
135
+ def new_record?
136
+ @new_record
137
+ end
138
+
139
+ def prev
140
+ self
141
+ end
142
+
143
+ def updated_at
144
+ columns.values.collect(&:created_at).max
145
+ end
146
+
147
+ end
148
+ end
149
+ end
150
+ end