massive_record 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.autotest +15 -0
- data/.gitignore +6 -0
- data/.rspec +2 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +38 -0
- data/Manifest +24 -0
- data/README.md +225 -0
- data/Rakefile +16 -0
- data/TODO.md +8 -0
- data/autotest/discover.rb +1 -0
- data/lib/massive_record.rb +18 -0
- data/lib/massive_record/exceptions.rb +11 -0
- data/lib/massive_record/orm/attribute_methods.rb +61 -0
- data/lib/massive_record/orm/attribute_methods/dirty.rb +80 -0
- data/lib/massive_record/orm/attribute_methods/read.rb +23 -0
- data/lib/massive_record/orm/attribute_methods/write.rb +24 -0
- data/lib/massive_record/orm/base.rb +176 -0
- data/lib/massive_record/orm/callbacks.rb +52 -0
- data/lib/massive_record/orm/column.rb +18 -0
- data/lib/massive_record/orm/config.rb +47 -0
- data/lib/massive_record/orm/errors.rb +47 -0
- data/lib/massive_record/orm/finders.rb +125 -0
- data/lib/massive_record/orm/id_factory.rb +133 -0
- data/lib/massive_record/orm/persistence.rb +199 -0
- data/lib/massive_record/orm/schema.rb +4 -0
- data/lib/massive_record/orm/schema/column_families.rb +48 -0
- data/lib/massive_record/orm/schema/column_family.rb +102 -0
- data/lib/massive_record/orm/schema/column_interface.rb +91 -0
- data/lib/massive_record/orm/schema/common_interface.rb +48 -0
- data/lib/massive_record/orm/schema/field.rb +128 -0
- data/lib/massive_record/orm/schema/fields.rb +37 -0
- data/lib/massive_record/orm/schema/table_interface.rb +96 -0
- data/lib/massive_record/orm/table.rb +9 -0
- data/lib/massive_record/orm/validations.rb +52 -0
- data/lib/massive_record/spec/support/simple_database_cleaner.rb +52 -0
- data/lib/massive_record/thrift/hbase.rb +2307 -0
- data/lib/massive_record/thrift/hbase_constants.rb +14 -0
- data/lib/massive_record/thrift/hbase_types.rb +225 -0
- data/lib/massive_record/version.rb +3 -0
- data/lib/massive_record/wrapper/base.rb +28 -0
- data/lib/massive_record/wrapper/cell.rb +45 -0
- data/lib/massive_record/wrapper/column_families_collection.rb +19 -0
- data/lib/massive_record/wrapper/column_family.rb +22 -0
- data/lib/massive_record/wrapper/connection.rb +71 -0
- data/lib/massive_record/wrapper/row.rb +170 -0
- data/lib/massive_record/wrapper/scanner.rb +50 -0
- data/lib/massive_record/wrapper/table.rb +148 -0
- data/lib/massive_record/wrapper/tables_collection.rb +13 -0
- data/massive_record.gemspec +28 -0
- data/spec/config.yml.example +4 -0
- data/spec/orm/cases/attribute_methods_spec.rb +47 -0
- data/spec/orm/cases/auto_generate_id_spec.rb +54 -0
- data/spec/orm/cases/base_spec.rb +176 -0
- data/spec/orm/cases/callbacks_spec.rb +309 -0
- data/spec/orm/cases/column_spec.rb +49 -0
- data/spec/orm/cases/config_spec.rb +103 -0
- data/spec/orm/cases/dirty_spec.rb +129 -0
- data/spec/orm/cases/encoding_spec.rb +49 -0
- data/spec/orm/cases/finders_spec.rb +208 -0
- data/spec/orm/cases/hbase/connection_spec.rb +13 -0
- data/spec/orm/cases/i18n_spec.rb +32 -0
- data/spec/orm/cases/id_factory_spec.rb +75 -0
- data/spec/orm/cases/persistence_spec.rb +479 -0
- data/spec/orm/cases/table_spec.rb +81 -0
- data/spec/orm/cases/validation_spec.rb +92 -0
- data/spec/orm/models/address.rb +7 -0
- data/spec/orm/models/person.rb +15 -0
- data/spec/orm/models/test_class.rb +5 -0
- data/spec/orm/schema/column_families_spec.rb +186 -0
- data/spec/orm/schema/column_family_spec.rb +131 -0
- data/spec/orm/schema/column_interface_spec.rb +115 -0
- data/spec/orm/schema/field_spec.rb +196 -0
- data/spec/orm/schema/fields_spec.rb +126 -0
- data/spec/orm/schema/table_interface_spec.rb +171 -0
- data/spec/spec_helper.rb +15 -0
- data/spec/support/connection_helpers.rb +76 -0
- data/spec/support/mock_massive_record_connection.rb +80 -0
- data/spec/thrift/cases/encoding_spec.rb +48 -0
- data/spec/wrapper/cases/connection_spec.rb +53 -0
- data/spec/wrapper/cases/table_spec.rb +231 -0
- metadata +228 -0
data/.autotest
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'autotest/restart'
|
2
|
+
require 'autotest/growl'
|
3
|
+
require 'autotest/fsevent'
|
4
|
+
|
5
|
+
Autotest.add_hook(:initialize) {|at|
|
6
|
+
%w{.git .svn .hg .DS_Store ._* vendor tmp log doc}.each do |exception|
|
7
|
+
at.add_exception(exception)
|
8
|
+
end
|
9
|
+
at.clear_mappings
|
10
|
+
# take out the default (test/test*rb)
|
11
|
+
at.add_mapping(%r{^lib/.*\.rb$}) {|f, _|
|
12
|
+
Dir['spec/**/*.rb']
|
13
|
+
}
|
14
|
+
nil
|
15
|
+
}
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
massive_record (0.1.0)
|
5
|
+
activemodel
|
6
|
+
activesupport
|
7
|
+
thrift (>= 0.5.0)
|
8
|
+
|
9
|
+
GEM
|
10
|
+
remote: http://rubygems.org/
|
11
|
+
specs:
|
12
|
+
activemodel (3.0.3)
|
13
|
+
activesupport (= 3.0.3)
|
14
|
+
builder (~> 2.1.2)
|
15
|
+
i18n (~> 0.4)
|
16
|
+
activesupport (3.0.3)
|
17
|
+
builder (2.1.2)
|
18
|
+
diff-lcs (1.1.2)
|
19
|
+
i18n (0.5.0)
|
20
|
+
rspec (2.2.0)
|
21
|
+
rspec-core (~> 2.2)
|
22
|
+
rspec-expectations (~> 2.2)
|
23
|
+
rspec-mocks (~> 2.2)
|
24
|
+
rspec-core (2.2.1)
|
25
|
+
rspec-expectations (2.2.0)
|
26
|
+
diff-lcs (~> 1.1.2)
|
27
|
+
rspec-mocks (2.2.0)
|
28
|
+
thrift (0.5.0)
|
29
|
+
|
30
|
+
PLATFORMS
|
31
|
+
ruby
|
32
|
+
|
33
|
+
DEPENDENCIES
|
34
|
+
activemodel
|
35
|
+
activesupport
|
36
|
+
massive_record!
|
37
|
+
rspec (>= 2.1.0)
|
38
|
+
thrift (>= 0.5.0)
|
data/Manifest
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
Manifest
|
2
|
+
README.md
|
3
|
+
Rakefile
|
4
|
+
autotest/discover.rb
|
5
|
+
lib/massive_record.rb
|
6
|
+
lib/massive_record/base.rb
|
7
|
+
lib/massive_record/cell.rb
|
8
|
+
lib/massive_record/column_families_collection.rb
|
9
|
+
lib/massive_record/column_family.rb
|
10
|
+
lib/massive_record/connection.rb
|
11
|
+
lib/massive_record/migration.rb
|
12
|
+
lib/massive_record/row.rb
|
13
|
+
lib/massive_record/scanner.rb
|
14
|
+
lib/massive_record/table.rb
|
15
|
+
lib/massive_record/tables_collection.rb
|
16
|
+
lib/massive_record/thrift/hbase.rb
|
17
|
+
lib/massive_record/thrift/hbase_constants.rb
|
18
|
+
lib/massive_record/thrift/hbase_types.rb
|
19
|
+
massive_record.gemspec
|
20
|
+
spec/README.md
|
21
|
+
spec/config.yml.example
|
22
|
+
spec/connection_spec.rb
|
23
|
+
spec/spec_helper.rb
|
24
|
+
spec/table_spec.rb
|
data/README.md
ADDED
@@ -0,0 +1,225 @@
|
|
1
|
+
# Massive Record
|
2
|
+
|
3
|
+
Massive Record is an ORM for HBase. It currently uses
|
4
|
+
Thrift to communicate with HBase and will in time support
|
5
|
+
other forms for communicating with Hbase.
|
6
|
+
|
7
|
+
|
8
|
+
See introduction to HBase model architecture:
|
9
|
+
http://wiki.apache.org/hadoop/Hbase/HbaseArchitecture
|
10
|
+
Understanding terminology of Table / Row / Column family / Column / Cell:
|
11
|
+
http://jimbojw.com/wiki/index.php?title=Understanding_Hbase_and_BigTable
|
12
|
+
|
13
|
+
|
14
|
+
## Installation
|
15
|
+
|
16
|
+
gem install massive_record
|
17
|
+
|
18
|
+
### Ruby on Rails
|
19
|
+
|
20
|
+
Add the following Gems in your Gemfile:
|
21
|
+
|
22
|
+
gem 'massive_record'
|
23
|
+
|
24
|
+
Create an config/hbase.yml file with the following content:
|
25
|
+
|
26
|
+
defaults: &defaults
|
27
|
+
host: somewhere.compute.amazonaws.com # No 'http', it's a Thrift connection
|
28
|
+
port: 9090
|
29
|
+
|
30
|
+
development:
|
31
|
+
<<: *defaults
|
32
|
+
|
33
|
+
test:
|
34
|
+
<<: *defaults
|
35
|
+
|
36
|
+
production:
|
37
|
+
<<: *defaults
|
38
|
+
|
39
|
+
|
40
|
+
## Usage
|
41
|
+
|
42
|
+
There are two ways for using the Massive Record library. At the highest level we have ORM. This is Active Model compliant and makes
|
43
|
+
it easy to use. The second way of doing things is working directly against the wrapper (soon to be called adapter).
|
44
|
+
|
45
|
+
|
46
|
+
### ORM
|
47
|
+
|
48
|
+
Both MassiveRecord::ORM::Table and MassiveRecord::ORM::Column do now have some functionality which you can expect from an ORM. This includes:
|
49
|
+
- An initializer which takes attribute hash and assigns them to your object.
|
50
|
+
- Write and read methods for the attributes
|
51
|
+
- Validations, as you expect from an ActiveRecord.
|
52
|
+
- Callbacks, as you expect from an ActiveRecord.
|
53
|
+
- Information about changes on attributes.
|
54
|
+
- Casting of attributes
|
55
|
+
- Serialization of array / hashes
|
56
|
+
|
57
|
+
Tables also have:
|
58
|
+
- Persistencey method calls like create, save and destroy (but they do not actually save things to hbase)
|
59
|
+
- Easy access to adapter's connection via Person.connection
|
60
|
+
- Easy access to adapter's hbase table via Person.table
|
61
|
+
- Finder method, like Person.find("an_id"), Person.find("id1", "id2"), Person.all etc
|
62
|
+
- Save / update methods
|
63
|
+
- Auto-creation of table and column families on save if table does not exists.
|
64
|
+
- Destroy records
|
65
|
+
|
66
|
+
|
67
|
+
Here is an example of usage, both for Table and Column:
|
68
|
+
|
69
|
+
class Person < MassiveRecord::ORM::Table
|
70
|
+
column_family :info do
|
71
|
+
field :name
|
72
|
+
field :email
|
73
|
+
field :phone_number
|
74
|
+
field :points, :integer, :default => 0
|
75
|
+
field :date_of_birth, :date
|
76
|
+
field :newsletter, :boolean, :default => false
|
77
|
+
end
|
78
|
+
|
79
|
+
validates_presence_of :name, :email
|
80
|
+
validates_format_of :email, :with => /\A([^@\s]+)@((?:[-a-z0-9]+\.)+[a-z]{2,})\Z/i
|
81
|
+
end
|
82
|
+
|
83
|
+
|
84
|
+
|
85
|
+
class Address < MassiveRecord::ORM::Column
|
86
|
+
field :street
|
87
|
+
field :number, :integer
|
88
|
+
field :nice_place, :boolean, :default => true
|
89
|
+
end
|
90
|
+
|
91
|
+
|
92
|
+
## Wrapper (adapter) API
|
93
|
+
|
94
|
+
You can, if you'd like, work directly against the adapter.
|
95
|
+
|
96
|
+
# Init a new connection with HBase
|
97
|
+
conn = MassiveRecord::Wrapper::Connection.new(:host => 'localhost', :port => 9090)
|
98
|
+
conn.open
|
99
|
+
|
100
|
+
# OR init a connection using the config/hbase.yml file with Rails
|
101
|
+
conn = MassiveRecord::Wrapper::Base.connection
|
102
|
+
|
103
|
+
# Fetch tables name
|
104
|
+
conn.tables # => ["companies", "news", "webpages"]
|
105
|
+
|
106
|
+
# Init a table
|
107
|
+
table = MassiveRecord::Wrapper::Table.new(conn, :people)
|
108
|
+
|
109
|
+
# Add a column family
|
110
|
+
column = MassiveRecord::Wrapper::ColumnFamily.new(:info)
|
111
|
+
table.column_families.push(column)
|
112
|
+
|
113
|
+
# Or bulk add column families
|
114
|
+
table.create_column_families([:friends, :misc])
|
115
|
+
|
116
|
+
# Create the table
|
117
|
+
table.save # will raise an exception if the table already exists
|
118
|
+
|
119
|
+
# Fetch column families from the database
|
120
|
+
table.fetch_column_families # => [ColumnFamily#RTY4424, ColumnFamily#R475424, ColumnFamily#GHJ9424]
|
121
|
+
table.column_families.collect(&:name) # => ["info", "friends", "misc"]
|
122
|
+
|
123
|
+
# Add a new row
|
124
|
+
row = MassiveRecord::Wrapper::Row.new
|
125
|
+
row.id = "my_unique_id"
|
126
|
+
row.values = { :info => { :first_name => "H", :last_name => "Base", :email => "h@base.com" } }
|
127
|
+
row.table = table
|
128
|
+
row.save
|
129
|
+
|
130
|
+
# Fetch rows
|
131
|
+
table.first # => MassiveRecord#ID1
|
132
|
+
table.all(:limit => 10) # => [MassiveRecord#ID1, MassiveRecord#ID2, ...]
|
133
|
+
table.find("ID2") # => MassiveRecord#ID2
|
134
|
+
table.find(["ID1", "ID2"]) # => [MassiveRecord#ID1, MassiveRecord#ID2]
|
135
|
+
table.all(:limit => 3, :start => "ID2") # => [MassiveRecord#ID2, MassiveRecord#ID3, MassiveRecord#ID4]
|
136
|
+
|
137
|
+
# Manipulate rows
|
138
|
+
table.first.destroy # => true
|
139
|
+
|
140
|
+
# Remove the table
|
141
|
+
table.destroy
|
142
|
+
|
143
|
+
|
144
|
+
|
145
|
+
## Planned work
|
146
|
+
|
147
|
+
- Rename Wrapper to Adapter, and make it easy to switch from Thrift to another way of communicating with Hbase.
|
148
|
+
- Automatically handling time stamps like created_at and updated_at.
|
149
|
+
- Associations and embedded objects.
|
150
|
+
- Implement other Adapters, for instance using jruby and the Java API.
|
151
|
+
|
152
|
+
|
153
|
+
|
154
|
+
## Contribute
|
155
|
+
|
156
|
+
If you want to contribute feel free to fork this project :-)
|
157
|
+
Make a feature branch, write test, implement and make a pull request.
|
158
|
+
|
159
|
+
### Getting started
|
160
|
+
|
161
|
+
git clone git://github.com/CompanyBook/massive_record.git (or the address to your fork)
|
162
|
+
cd massive_record
|
163
|
+
bundle install
|
164
|
+
|
165
|
+
Next up you need to add a config.yml file inside of spec/ which contains something like:
|
166
|
+
host: url.to-a.thrift.server
|
167
|
+
port: 9090
|
168
|
+
table: massive_record_test_table
|
169
|
+
|
170
|
+
You should now be able to run `rspec spec/`
|
171
|
+
|
172
|
+
### Play with it in the console
|
173
|
+
|
174
|
+
Checkout the massive_record project and install it as a Gem :
|
175
|
+
|
176
|
+
cd massive_record/
|
177
|
+
bundle console
|
178
|
+
ruby-1.9.2-p0 > Bundler.require
|
179
|
+
=> [
|
180
|
+
<Bundler::Dependency type=:runtime name="massive_record" requirements=">= 0">,
|
181
|
+
<Bundler::Dependency type=:runtime name="thrift" requirements=">= 0.5.0">,
|
182
|
+
<Bundler::Dependency type=:runtime name="activesupport" requirements=">= 0">,
|
183
|
+
<Bundler::Dependency type=:runtime name="activemodel" requirements=">= 0">,
|
184
|
+
<Bundler::Dependency type=:runtime name="rspec" requirements=">= 2.1.0">
|
185
|
+
]
|
186
|
+
ruby-1.9.2-p0 > MassiveRecord::VERSION
|
187
|
+
=> "0.0.1"
|
188
|
+
|
189
|
+
### Clean HBase database between each test
|
190
|
+
|
191
|
+
We have created a helper module MassiveRecord::Rspec::SimpleDatabaseCleaner which, when included into rspec tests, will clean
|
192
|
+
the database for ORM records between each test case. You can also take a look into spec/support/mock_massive_record_connection.rb
|
193
|
+
for some functionality which will mock a hbase connection making it easier (faster) to test code where no real database is needed.
|
194
|
+
|
195
|
+
|
196
|
+
|
197
|
+
|
198
|
+
## More Information and Resources
|
199
|
+
|
200
|
+
### Thrift API
|
201
|
+
|
202
|
+
Ruby Library using the HBase Thrift API.
|
203
|
+
http://wiki.apache.org/hadoop/Hbase/ThriftApi
|
204
|
+
|
205
|
+
The generated Ruby files can be found under lib/massive_record/thrift/
|
206
|
+
The whole API (CRUD and more) is present in the Client object (Apache::Hadoop::Hbase::Thrift::Hbase::Client).
|
207
|
+
The client can be easily initialized using the MassiveRecord connection :
|
208
|
+
|
209
|
+
conn = MassiveRecord::Wrapper::Connection.new(:host => 'localhost', :port => 9090)
|
210
|
+
conn.open
|
211
|
+
|
212
|
+
client = conn.client
|
213
|
+
# Do whatever you want with the client object
|
214
|
+
|
215
|
+
### Q&A
|
216
|
+
|
217
|
+
How to add a new column family to an existing table?
|
218
|
+
|
219
|
+
# Connect to the HBase console on the server itself and enter the following code :
|
220
|
+
disable 'companies'
|
221
|
+
alter 'companies', { NAME => 'new_collumn_familiy' }
|
222
|
+
enable 'companies'
|
223
|
+
|
224
|
+
|
225
|
+
Copyright (c) 2011 Companybook, released under the MIT license
|
data/Rakefile
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
$:.push File.expand_path("../lib", __FILE__)
|
2
|
+
require 'bundler'
|
3
|
+
require "massive_record/version"
|
4
|
+
|
5
|
+
Bundler::GemHelper.install_tasks
|
6
|
+
|
7
|
+
require "rspec/core/rake_task"
|
8
|
+
RSpec::Core::RakeTask.new(:spec)
|
9
|
+
|
10
|
+
require 'rake/rdoctask'
|
11
|
+
Rake::RDocTask.new do |rdoc|
|
12
|
+
rdoc.rdoc_dir = 'rdoc'
|
13
|
+
rdoc.title = "MassiveRecord #{MassiveRecord::VERSION}"
|
14
|
+
rdoc.rdoc_files.include('README*')
|
15
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
16
|
+
end
|
data/TODO.md
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
Autotest.add_discovery { "rspec2" }
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# Thrift Gems
|
2
|
+
require 'thrift'
|
3
|
+
require 'thrift/transport/socket'
|
4
|
+
require 'thrift/protocol/binary_protocol'
|
5
|
+
|
6
|
+
# Exceptions
|
7
|
+
require 'massive_record/exceptions'
|
8
|
+
|
9
|
+
# Generated Ruby classes from Thrift for HBase
|
10
|
+
require 'massive_record/thrift/hbase_constants'
|
11
|
+
require 'massive_record/thrift/hbase_types'
|
12
|
+
require 'massive_record/thrift/hbase'
|
13
|
+
|
14
|
+
# Wrapper
|
15
|
+
require 'massive_record/wrapper/base'
|
16
|
+
|
17
|
+
# ORM
|
18
|
+
require 'massive_record/orm/base'
|
@@ -0,0 +1,61 @@
|
|
1
|
+
module MassiveRecord
|
2
|
+
module ORM
|
3
|
+
module AttributeMethods
|
4
|
+
extend ActiveSupport::Concern
|
5
|
+
include ActiveModel::AttributeMethods
|
6
|
+
|
7
|
+
module ClassMethods
|
8
|
+
def define_attribute_methods
|
9
|
+
super(known_attribute_names)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
|
14
|
+
def attributes
|
15
|
+
@attributes ||= {}
|
16
|
+
end
|
17
|
+
|
18
|
+
def attributes=(new_attributes)
|
19
|
+
return unless new_attributes.is_a?(Hash)
|
20
|
+
|
21
|
+
new_attributes.each do |attr, value|
|
22
|
+
writer_method = "#{attr}="
|
23
|
+
if respond_to? writer_method
|
24
|
+
send(writer_method, value)
|
25
|
+
else
|
26
|
+
raise UnkownAttributeError.new("Unkown attribute: #{attr}")
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
def method_missing(method, *args, &block)
|
33
|
+
unless self.class.attribute_methods_generated?
|
34
|
+
self.class.define_attribute_methods
|
35
|
+
send(method, *args, &block)
|
36
|
+
else
|
37
|
+
super
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def respond_to?(*args)
|
42
|
+
self.class.define_attribute_methods unless self.class.attribute_methods_generated?
|
43
|
+
super
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
def attributes_raw=(new_attributes)
|
49
|
+
return unless new_attributes.is_a?(Hash)
|
50
|
+
attributes = new_attributes.stringify_keys
|
51
|
+
@attributes = {'id' => nil}.merge(attributes)
|
52
|
+
end
|
53
|
+
|
54
|
+
def attributes_from_field_definition
|
55
|
+
attributes = {'id' => nil}
|
56
|
+
attributes.merge! self.class.default_attributes_from_schema if self.class.respond_to? :default_attributes_from_schema
|
57
|
+
attributes
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|