massive_record 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. data/.autotest +15 -0
  2. data/.gitignore +6 -0
  3. data/.rspec +2 -0
  4. data/Gemfile +4 -0
  5. data/Gemfile.lock +38 -0
  6. data/Manifest +24 -0
  7. data/README.md +225 -0
  8. data/Rakefile +16 -0
  9. data/TODO.md +8 -0
  10. data/autotest/discover.rb +1 -0
  11. data/lib/massive_record.rb +18 -0
  12. data/lib/massive_record/exceptions.rb +11 -0
  13. data/lib/massive_record/orm/attribute_methods.rb +61 -0
  14. data/lib/massive_record/orm/attribute_methods/dirty.rb +80 -0
  15. data/lib/massive_record/orm/attribute_methods/read.rb +23 -0
  16. data/lib/massive_record/orm/attribute_methods/write.rb +24 -0
  17. data/lib/massive_record/orm/base.rb +176 -0
  18. data/lib/massive_record/orm/callbacks.rb +52 -0
  19. data/lib/massive_record/orm/column.rb +18 -0
  20. data/lib/massive_record/orm/config.rb +47 -0
  21. data/lib/massive_record/orm/errors.rb +47 -0
  22. data/lib/massive_record/orm/finders.rb +125 -0
  23. data/lib/massive_record/orm/id_factory.rb +133 -0
  24. data/lib/massive_record/orm/persistence.rb +199 -0
  25. data/lib/massive_record/orm/schema.rb +4 -0
  26. data/lib/massive_record/orm/schema/column_families.rb +48 -0
  27. data/lib/massive_record/orm/schema/column_family.rb +102 -0
  28. data/lib/massive_record/orm/schema/column_interface.rb +91 -0
  29. data/lib/massive_record/orm/schema/common_interface.rb +48 -0
  30. data/lib/massive_record/orm/schema/field.rb +128 -0
  31. data/lib/massive_record/orm/schema/fields.rb +37 -0
  32. data/lib/massive_record/orm/schema/table_interface.rb +96 -0
  33. data/lib/massive_record/orm/table.rb +9 -0
  34. data/lib/massive_record/orm/validations.rb +52 -0
  35. data/lib/massive_record/spec/support/simple_database_cleaner.rb +52 -0
  36. data/lib/massive_record/thrift/hbase.rb +2307 -0
  37. data/lib/massive_record/thrift/hbase_constants.rb +14 -0
  38. data/lib/massive_record/thrift/hbase_types.rb +225 -0
  39. data/lib/massive_record/version.rb +3 -0
  40. data/lib/massive_record/wrapper/base.rb +28 -0
  41. data/lib/massive_record/wrapper/cell.rb +45 -0
  42. data/lib/massive_record/wrapper/column_families_collection.rb +19 -0
  43. data/lib/massive_record/wrapper/column_family.rb +22 -0
  44. data/lib/massive_record/wrapper/connection.rb +71 -0
  45. data/lib/massive_record/wrapper/row.rb +170 -0
  46. data/lib/massive_record/wrapper/scanner.rb +50 -0
  47. data/lib/massive_record/wrapper/table.rb +148 -0
  48. data/lib/massive_record/wrapper/tables_collection.rb +13 -0
  49. data/massive_record.gemspec +28 -0
  50. data/spec/config.yml.example +4 -0
  51. data/spec/orm/cases/attribute_methods_spec.rb +47 -0
  52. data/spec/orm/cases/auto_generate_id_spec.rb +54 -0
  53. data/spec/orm/cases/base_spec.rb +176 -0
  54. data/spec/orm/cases/callbacks_spec.rb +309 -0
  55. data/spec/orm/cases/column_spec.rb +49 -0
  56. data/spec/orm/cases/config_spec.rb +103 -0
  57. data/spec/orm/cases/dirty_spec.rb +129 -0
  58. data/spec/orm/cases/encoding_spec.rb +49 -0
  59. data/spec/orm/cases/finders_spec.rb +208 -0
  60. data/spec/orm/cases/hbase/connection_spec.rb +13 -0
  61. data/spec/orm/cases/i18n_spec.rb +32 -0
  62. data/spec/orm/cases/id_factory_spec.rb +75 -0
  63. data/spec/orm/cases/persistence_spec.rb +479 -0
  64. data/spec/orm/cases/table_spec.rb +81 -0
  65. data/spec/orm/cases/validation_spec.rb +92 -0
  66. data/spec/orm/models/address.rb +7 -0
  67. data/spec/orm/models/person.rb +15 -0
  68. data/spec/orm/models/test_class.rb +5 -0
  69. data/spec/orm/schema/column_families_spec.rb +186 -0
  70. data/spec/orm/schema/column_family_spec.rb +131 -0
  71. data/spec/orm/schema/column_interface_spec.rb +115 -0
  72. data/spec/orm/schema/field_spec.rb +196 -0
  73. data/spec/orm/schema/fields_spec.rb +126 -0
  74. data/spec/orm/schema/table_interface_spec.rb +171 -0
  75. data/spec/spec_helper.rb +15 -0
  76. data/spec/support/connection_helpers.rb +76 -0
  77. data/spec/support/mock_massive_record_connection.rb +80 -0
  78. data/spec/thrift/cases/encoding_spec.rb +48 -0
  79. data/spec/wrapper/cases/connection_spec.rb +53 -0
  80. data/spec/wrapper/cases/table_spec.rb +231 -0
  81. metadata +228 -0
data/.autotest ADDED
@@ -0,0 +1,15 @@
1
+ require 'autotest/restart'
2
+ require 'autotest/growl'
3
+ require 'autotest/fsevent'
4
+
5
+ Autotest.add_hook(:initialize) {|at|
6
+ %w{.git .svn .hg .DS_Store ._* vendor tmp log doc}.each do |exception|
7
+ at.add_exception(exception)
8
+ end
9
+ at.clear_mappings
10
+ # take out the default (test/test*rb)
11
+ at.add_mapping(%r{^lib/.*\.rb$}) {|f, _|
12
+ Dir['spec/**/*.rb']
13
+ }
14
+ nil
15
+ }
data/.gitignore ADDED
@@ -0,0 +1,6 @@
1
+ pkg/*
2
+ *.gem
3
+ .bundle
4
+ .DS_Store
5
+ spec/config.yml
6
+ rdoc/
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format p
2
+ --color
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in testgem.gemspec
4
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,38 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ massive_record (0.1.0)
5
+ activemodel
6
+ activesupport
7
+ thrift (>= 0.5.0)
8
+
9
+ GEM
10
+ remote: http://rubygems.org/
11
+ specs:
12
+ activemodel (3.0.3)
13
+ activesupport (= 3.0.3)
14
+ builder (~> 2.1.2)
15
+ i18n (~> 0.4)
16
+ activesupport (3.0.3)
17
+ builder (2.1.2)
18
+ diff-lcs (1.1.2)
19
+ i18n (0.5.0)
20
+ rspec (2.2.0)
21
+ rspec-core (~> 2.2)
22
+ rspec-expectations (~> 2.2)
23
+ rspec-mocks (~> 2.2)
24
+ rspec-core (2.2.1)
25
+ rspec-expectations (2.2.0)
26
+ diff-lcs (~> 1.1.2)
27
+ rspec-mocks (2.2.0)
28
+ thrift (0.5.0)
29
+
30
+ PLATFORMS
31
+ ruby
32
+
33
+ DEPENDENCIES
34
+ activemodel
35
+ activesupport
36
+ massive_record!
37
+ rspec (>= 2.1.0)
38
+ thrift (>= 0.5.0)
data/Manifest ADDED
@@ -0,0 +1,24 @@
1
+ Manifest
2
+ README.md
3
+ Rakefile
4
+ autotest/discover.rb
5
+ lib/massive_record.rb
6
+ lib/massive_record/base.rb
7
+ lib/massive_record/cell.rb
8
+ lib/massive_record/column_families_collection.rb
9
+ lib/massive_record/column_family.rb
10
+ lib/massive_record/connection.rb
11
+ lib/massive_record/migration.rb
12
+ lib/massive_record/row.rb
13
+ lib/massive_record/scanner.rb
14
+ lib/massive_record/table.rb
15
+ lib/massive_record/tables_collection.rb
16
+ lib/massive_record/thrift/hbase.rb
17
+ lib/massive_record/thrift/hbase_constants.rb
18
+ lib/massive_record/thrift/hbase_types.rb
19
+ massive_record.gemspec
20
+ spec/README.md
21
+ spec/config.yml.example
22
+ spec/connection_spec.rb
23
+ spec/spec_helper.rb
24
+ spec/table_spec.rb
data/README.md ADDED
@@ -0,0 +1,225 @@
1
+ # Massive Record
2
+
3
+ Massive Record is an ORM for HBase. It currently uses
4
+ Thrift to communicate with HBase and will in time support
5
+ other forms for communicating with Hbase.
6
+
7
+
8
+ See introduction to HBase model architecture:
9
+ http://wiki.apache.org/hadoop/Hbase/HbaseArchitecture
10
+ Understanding terminology of Table / Row / Column family / Column / Cell:
11
+ http://jimbojw.com/wiki/index.php?title=Understanding_Hbase_and_BigTable
12
+
13
+
14
+ ## Installation
15
+
16
+ gem install massive_record
17
+
18
+ ### Ruby on Rails
19
+
20
+ Add the following Gems in your Gemfile:
21
+
22
+ gem 'massive_record'
23
+
24
+ Create an config/hbase.yml file with the following content:
25
+
26
+ defaults: &defaults
27
+ host: somewhere.compute.amazonaws.com # No 'http', it's a Thrift connection
28
+ port: 9090
29
+
30
+ development:
31
+ <<: *defaults
32
+
33
+ test:
34
+ <<: *defaults
35
+
36
+ production:
37
+ <<: *defaults
38
+
39
+
40
+ ## Usage
41
+
42
+ There are two ways for using the Massive Record library. At the highest level we have ORM. This is Active Model compliant and makes
43
+ it easy to use. The second way of doing things is working directly against the wrapper (soon to be called adapter).
44
+
45
+
46
+ ### ORM
47
+
48
+ Both MassiveRecord::ORM::Table and MassiveRecord::ORM::Column do now have some functionality which you can expect from an ORM. This includes:
49
+ - An initializer which takes attribute hash and assigns them to your object.
50
+ - Write and read methods for the attributes
51
+ - Validations, as you expect from an ActiveRecord.
52
+ - Callbacks, as you expect from an ActiveRecord.
53
+ - Information about changes on attributes.
54
+ - Casting of attributes
55
+ - Serialization of array / hashes
56
+
57
+ Tables also have:
58
+ - Persistencey method calls like create, save and destroy (but they do not actually save things to hbase)
59
+ - Easy access to adapter's connection via Person.connection
60
+ - Easy access to adapter's hbase table via Person.table
61
+ - Finder method, like Person.find("an_id"), Person.find("id1", "id2"), Person.all etc
62
+ - Save / update methods
63
+ - Auto-creation of table and column families on save if table does not exists.
64
+ - Destroy records
65
+
66
+
67
+ Here is an example of usage, both for Table and Column:
68
+
69
+ class Person < MassiveRecord::ORM::Table
70
+ column_family :info do
71
+ field :name
72
+ field :email
73
+ field :phone_number
74
+ field :points, :integer, :default => 0
75
+ field :date_of_birth, :date
76
+ field :newsletter, :boolean, :default => false
77
+ end
78
+
79
+ validates_presence_of :name, :email
80
+ validates_format_of :email, :with => /\A([^@\s]+)@((?:[-a-z0-9]+\.)+[a-z]{2,})\Z/i
81
+ end
82
+
83
+
84
+
85
+ class Address < MassiveRecord::ORM::Column
86
+ field :street
87
+ field :number, :integer
88
+ field :nice_place, :boolean, :default => true
89
+ end
90
+
91
+
92
+ ## Wrapper (adapter) API
93
+
94
+ You can, if you'd like, work directly against the adapter.
95
+
96
+ # Init a new connection with HBase
97
+ conn = MassiveRecord::Wrapper::Connection.new(:host => 'localhost', :port => 9090)
98
+ conn.open
99
+
100
+ # OR init a connection using the config/hbase.yml file with Rails
101
+ conn = MassiveRecord::Wrapper::Base.connection
102
+
103
+ # Fetch tables name
104
+ conn.tables # => ["companies", "news", "webpages"]
105
+
106
+ # Init a table
107
+ table = MassiveRecord::Wrapper::Table.new(conn, :people)
108
+
109
+ # Add a column family
110
+ column = MassiveRecord::Wrapper::ColumnFamily.new(:info)
111
+ table.column_families.push(column)
112
+
113
+ # Or bulk add column families
114
+ table.create_column_families([:friends, :misc])
115
+
116
+ # Create the table
117
+ table.save # will raise an exception if the table already exists
118
+
119
+ # Fetch column families from the database
120
+ table.fetch_column_families # => [ColumnFamily#RTY4424, ColumnFamily#R475424, ColumnFamily#GHJ9424]
121
+ table.column_families.collect(&:name) # => ["info", "friends", "misc"]
122
+
123
+ # Add a new row
124
+ row = MassiveRecord::Wrapper::Row.new
125
+ row.id = "my_unique_id"
126
+ row.values = { :info => { :first_name => "H", :last_name => "Base", :email => "h@base.com" } }
127
+ row.table = table
128
+ row.save
129
+
130
+ # Fetch rows
131
+ table.first # => MassiveRecord#ID1
132
+ table.all(:limit => 10) # => [MassiveRecord#ID1, MassiveRecord#ID2, ...]
133
+ table.find("ID2") # => MassiveRecord#ID2
134
+ table.find(["ID1", "ID2"]) # => [MassiveRecord#ID1, MassiveRecord#ID2]
135
+ table.all(:limit => 3, :start => "ID2") # => [MassiveRecord#ID2, MassiveRecord#ID3, MassiveRecord#ID4]
136
+
137
+ # Manipulate rows
138
+ table.first.destroy # => true
139
+
140
+ # Remove the table
141
+ table.destroy
142
+
143
+
144
+
145
+ ## Planned work
146
+
147
+ - Rename Wrapper to Adapter, and make it easy to switch from Thrift to another way of communicating with Hbase.
148
+ - Automatically handling time stamps like created_at and updated_at.
149
+ - Associations and embedded objects.
150
+ - Implement other Adapters, for instance using jruby and the Java API.
151
+
152
+
153
+
154
+ ## Contribute
155
+
156
+ If you want to contribute feel free to fork this project :-)
157
+ Make a feature branch, write test, implement and make a pull request.
158
+
159
+ ### Getting started
160
+
161
+ git clone git://github.com/CompanyBook/massive_record.git (or the address to your fork)
162
+ cd massive_record
163
+ bundle install
164
+
165
+ Next up you need to add a config.yml file inside of spec/ which contains something like:
166
+ host: url.to-a.thrift.server
167
+ port: 9090
168
+ table: massive_record_test_table
169
+
170
+ You should now be able to run `rspec spec/`
171
+
172
+ ### Play with it in the console
173
+
174
+ Checkout the massive_record project and install it as a Gem :
175
+
176
+ cd massive_record/
177
+ bundle console
178
+ ruby-1.9.2-p0 > Bundler.require
179
+ => [
180
+ <Bundler::Dependency type=:runtime name="massive_record" requirements=">= 0">,
181
+ <Bundler::Dependency type=:runtime name="thrift" requirements=">= 0.5.0">,
182
+ <Bundler::Dependency type=:runtime name="activesupport" requirements=">= 0">,
183
+ <Bundler::Dependency type=:runtime name="activemodel" requirements=">= 0">,
184
+ <Bundler::Dependency type=:runtime name="rspec" requirements=">= 2.1.0">
185
+ ]
186
+ ruby-1.9.2-p0 > MassiveRecord::VERSION
187
+ => "0.0.1"
188
+
189
+ ### Clean HBase database between each test
190
+
191
+ We have created a helper module MassiveRecord::Rspec::SimpleDatabaseCleaner which, when included into rspec tests, will clean
192
+ the database for ORM records between each test case. You can also take a look into spec/support/mock_massive_record_connection.rb
193
+ for some functionality which will mock a hbase connection making it easier (faster) to test code where no real database is needed.
194
+
195
+
196
+
197
+
198
+ ## More Information and Resources
199
+
200
+ ### Thrift API
201
+
202
+ Ruby Library using the HBase Thrift API.
203
+ http://wiki.apache.org/hadoop/Hbase/ThriftApi
204
+
205
+ The generated Ruby files can be found under lib/massive_record/thrift/
206
+ The whole API (CRUD and more) is present in the Client object (Apache::Hadoop::Hbase::Thrift::Hbase::Client).
207
+ The client can be easily initialized using the MassiveRecord connection :
208
+
209
+ conn = MassiveRecord::Wrapper::Connection.new(:host => 'localhost', :port => 9090)
210
+ conn.open
211
+
212
+ client = conn.client
213
+ # Do whatever you want with the client object
214
+
215
+ ### Q&A
216
+
217
+ How to add a new column family to an existing table?
218
+
219
+ # Connect to the HBase console on the server itself and enter the following code :
220
+ disable 'companies'
221
+ alter 'companies', { NAME => 'new_collumn_familiy' }
222
+ enable 'companies'
223
+
224
+
225
+ Copyright (c) 2011 Companybook, released under the MIT license
data/Rakefile ADDED
@@ -0,0 +1,16 @@
1
+ $:.push File.expand_path("../lib", __FILE__)
2
+ require 'bundler'
3
+ require "massive_record/version"
4
+
5
+ Bundler::GemHelper.install_tasks
6
+
7
+ require "rspec/core/rake_task"
8
+ RSpec::Core::RakeTask.new(:spec)
9
+
10
+ require 'rake/rdoctask'
11
+ Rake::RDocTask.new do |rdoc|
12
+ rdoc.rdoc_dir = 'rdoc'
13
+ rdoc.title = "MassiveRecord #{MassiveRecord::VERSION}"
14
+ rdoc.rdoc_files.include('README*')
15
+ rdoc.rdoc_files.include('lib/**/*.rb')
16
+ end
data/TODO.md ADDED
@@ -0,0 +1,8 @@
1
+ # TODO
2
+
3
+ * code documentation
4
+ * Rails ORM (ActiveModel etc.)
5
+ * write tests
6
+ * add generators for hbase.yml etc.
7
+ * add migration tools
8
+ * ...
@@ -0,0 +1 @@
1
+ Autotest.add_discovery { "rspec2" }
@@ -0,0 +1,18 @@
1
+ # Thrift Gems
2
+ require 'thrift'
3
+ require 'thrift/transport/socket'
4
+ require 'thrift/protocol/binary_protocol'
5
+
6
+ # Exceptions
7
+ require 'massive_record/exceptions'
8
+
9
+ # Generated Ruby classes from Thrift for HBase
10
+ require 'massive_record/thrift/hbase_constants'
11
+ require 'massive_record/thrift/hbase_types'
12
+ require 'massive_record/thrift/hbase'
13
+
14
+ # Wrapper
15
+ require 'massive_record/wrapper/base'
16
+
17
+ # ORM
18
+ require 'massive_record/orm/base'
@@ -0,0 +1,11 @@
1
+ module MassiveRecord
2
+
3
+ class ConnectionException < Exception
4
+
5
+ def initialize
6
+
7
+ end
8
+
9
+ end
10
+
11
+ end
@@ -0,0 +1,61 @@
1
+ module MassiveRecord
2
+ module ORM
3
+ module AttributeMethods
4
+ extend ActiveSupport::Concern
5
+ include ActiveModel::AttributeMethods
6
+
7
+ module ClassMethods
8
+ def define_attribute_methods
9
+ super(known_attribute_names)
10
+ end
11
+ end
12
+
13
+
14
+ def attributes
15
+ @attributes ||= {}
16
+ end
17
+
18
+ def attributes=(new_attributes)
19
+ return unless new_attributes.is_a?(Hash)
20
+
21
+ new_attributes.each do |attr, value|
22
+ writer_method = "#{attr}="
23
+ if respond_to? writer_method
24
+ send(writer_method, value)
25
+ else
26
+ raise UnkownAttributeError.new("Unkown attribute: #{attr}")
27
+ end
28
+ end
29
+ end
30
+
31
+
32
+ def method_missing(method, *args, &block)
33
+ unless self.class.attribute_methods_generated?
34
+ self.class.define_attribute_methods
35
+ send(method, *args, &block)
36
+ else
37
+ super
38
+ end
39
+ end
40
+
41
+ def respond_to?(*args)
42
+ self.class.define_attribute_methods unless self.class.attribute_methods_generated?
43
+ super
44
+ end
45
+
46
+ private
47
+
48
+ def attributes_raw=(new_attributes)
49
+ return unless new_attributes.is_a?(Hash)
50
+ attributes = new_attributes.stringify_keys
51
+ @attributes = {'id' => nil}.merge(attributes)
52
+ end
53
+
54
+ def attributes_from_field_definition
55
+ attributes = {'id' => nil}
56
+ attributes.merge! self.class.default_attributes_from_schema if self.class.respond_to? :default_attributes_from_schema
57
+ attributes
58
+ end
59
+ end
60
+ end
61
+ end