massive_record 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. data/.autotest +15 -0
  2. data/.gitignore +6 -0
  3. data/.rspec +2 -0
  4. data/Gemfile +4 -0
  5. data/Gemfile.lock +38 -0
  6. data/Manifest +24 -0
  7. data/README.md +225 -0
  8. data/Rakefile +16 -0
  9. data/TODO.md +8 -0
  10. data/autotest/discover.rb +1 -0
  11. data/lib/massive_record.rb +18 -0
  12. data/lib/massive_record/exceptions.rb +11 -0
  13. data/lib/massive_record/orm/attribute_methods.rb +61 -0
  14. data/lib/massive_record/orm/attribute_methods/dirty.rb +80 -0
  15. data/lib/massive_record/orm/attribute_methods/read.rb +23 -0
  16. data/lib/massive_record/orm/attribute_methods/write.rb +24 -0
  17. data/lib/massive_record/orm/base.rb +176 -0
  18. data/lib/massive_record/orm/callbacks.rb +52 -0
  19. data/lib/massive_record/orm/column.rb +18 -0
  20. data/lib/massive_record/orm/config.rb +47 -0
  21. data/lib/massive_record/orm/errors.rb +47 -0
  22. data/lib/massive_record/orm/finders.rb +125 -0
  23. data/lib/massive_record/orm/id_factory.rb +133 -0
  24. data/lib/massive_record/orm/persistence.rb +199 -0
  25. data/lib/massive_record/orm/schema.rb +4 -0
  26. data/lib/massive_record/orm/schema/column_families.rb +48 -0
  27. data/lib/massive_record/orm/schema/column_family.rb +102 -0
  28. data/lib/massive_record/orm/schema/column_interface.rb +91 -0
  29. data/lib/massive_record/orm/schema/common_interface.rb +48 -0
  30. data/lib/massive_record/orm/schema/field.rb +128 -0
  31. data/lib/massive_record/orm/schema/fields.rb +37 -0
  32. data/lib/massive_record/orm/schema/table_interface.rb +96 -0
  33. data/lib/massive_record/orm/table.rb +9 -0
  34. data/lib/massive_record/orm/validations.rb +52 -0
  35. data/lib/massive_record/spec/support/simple_database_cleaner.rb +52 -0
  36. data/lib/massive_record/thrift/hbase.rb +2307 -0
  37. data/lib/massive_record/thrift/hbase_constants.rb +14 -0
  38. data/lib/massive_record/thrift/hbase_types.rb +225 -0
  39. data/lib/massive_record/version.rb +3 -0
  40. data/lib/massive_record/wrapper/base.rb +28 -0
  41. data/lib/massive_record/wrapper/cell.rb +45 -0
  42. data/lib/massive_record/wrapper/column_families_collection.rb +19 -0
  43. data/lib/massive_record/wrapper/column_family.rb +22 -0
  44. data/lib/massive_record/wrapper/connection.rb +71 -0
  45. data/lib/massive_record/wrapper/row.rb +170 -0
  46. data/lib/massive_record/wrapper/scanner.rb +50 -0
  47. data/lib/massive_record/wrapper/table.rb +148 -0
  48. data/lib/massive_record/wrapper/tables_collection.rb +13 -0
  49. data/massive_record.gemspec +28 -0
  50. data/spec/config.yml.example +4 -0
  51. data/spec/orm/cases/attribute_methods_spec.rb +47 -0
  52. data/spec/orm/cases/auto_generate_id_spec.rb +54 -0
  53. data/spec/orm/cases/base_spec.rb +176 -0
  54. data/spec/orm/cases/callbacks_spec.rb +309 -0
  55. data/spec/orm/cases/column_spec.rb +49 -0
  56. data/spec/orm/cases/config_spec.rb +103 -0
  57. data/spec/orm/cases/dirty_spec.rb +129 -0
  58. data/spec/orm/cases/encoding_spec.rb +49 -0
  59. data/spec/orm/cases/finders_spec.rb +208 -0
  60. data/spec/orm/cases/hbase/connection_spec.rb +13 -0
  61. data/spec/orm/cases/i18n_spec.rb +32 -0
  62. data/spec/orm/cases/id_factory_spec.rb +75 -0
  63. data/spec/orm/cases/persistence_spec.rb +479 -0
  64. data/spec/orm/cases/table_spec.rb +81 -0
  65. data/spec/orm/cases/validation_spec.rb +92 -0
  66. data/spec/orm/models/address.rb +7 -0
  67. data/spec/orm/models/person.rb +15 -0
  68. data/spec/orm/models/test_class.rb +5 -0
  69. data/spec/orm/schema/column_families_spec.rb +186 -0
  70. data/spec/orm/schema/column_family_spec.rb +131 -0
  71. data/spec/orm/schema/column_interface_spec.rb +115 -0
  72. data/spec/orm/schema/field_spec.rb +196 -0
  73. data/spec/orm/schema/fields_spec.rb +126 -0
  74. data/spec/orm/schema/table_interface_spec.rb +171 -0
  75. data/spec/spec_helper.rb +15 -0
  76. data/spec/support/connection_helpers.rb +76 -0
  77. data/spec/support/mock_massive_record_connection.rb +80 -0
  78. data/spec/thrift/cases/encoding_spec.rb +48 -0
  79. data/spec/wrapper/cases/connection_spec.rb +53 -0
  80. data/spec/wrapper/cases/table_spec.rb +231 -0
  81. metadata +228 -0
data/.autotest ADDED
@@ -0,0 +1,15 @@
1
+ require 'autotest/restart'
2
+ require 'autotest/growl'
3
+ require 'autotest/fsevent'
4
+
5
+ Autotest.add_hook(:initialize) {|at|
6
+ %w{.git .svn .hg .DS_Store ._* vendor tmp log doc}.each do |exception|
7
+ at.add_exception(exception)
8
+ end
9
+ at.clear_mappings
10
+ # take out the default (test/test*rb)
11
+ at.add_mapping(%r{^lib/.*\.rb$}) {|f, _|
12
+ Dir['spec/**/*.rb']
13
+ }
14
+ nil
15
+ }
data/.gitignore ADDED
@@ -0,0 +1,6 @@
1
+ pkg/*
2
+ *.gem
3
+ .bundle
4
+ .DS_Store
5
+ spec/config.yml
6
+ rdoc/
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format p
2
+ --color
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in testgem.gemspec
4
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,38 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ massive_record (0.1.0)
5
+ activemodel
6
+ activesupport
7
+ thrift (>= 0.5.0)
8
+
9
+ GEM
10
+ remote: http://rubygems.org/
11
+ specs:
12
+ activemodel (3.0.3)
13
+ activesupport (= 3.0.3)
14
+ builder (~> 2.1.2)
15
+ i18n (~> 0.4)
16
+ activesupport (3.0.3)
17
+ builder (2.1.2)
18
+ diff-lcs (1.1.2)
19
+ i18n (0.5.0)
20
+ rspec (2.2.0)
21
+ rspec-core (~> 2.2)
22
+ rspec-expectations (~> 2.2)
23
+ rspec-mocks (~> 2.2)
24
+ rspec-core (2.2.1)
25
+ rspec-expectations (2.2.0)
26
+ diff-lcs (~> 1.1.2)
27
+ rspec-mocks (2.2.0)
28
+ thrift (0.5.0)
29
+
30
+ PLATFORMS
31
+ ruby
32
+
33
+ DEPENDENCIES
34
+ activemodel
35
+ activesupport
36
+ massive_record!
37
+ rspec (>= 2.1.0)
38
+ thrift (>= 0.5.0)
data/Manifest ADDED
@@ -0,0 +1,24 @@
1
+ Manifest
2
+ README.md
3
+ Rakefile
4
+ autotest/discover.rb
5
+ lib/massive_record.rb
6
+ lib/massive_record/base.rb
7
+ lib/massive_record/cell.rb
8
+ lib/massive_record/column_families_collection.rb
9
+ lib/massive_record/column_family.rb
10
+ lib/massive_record/connection.rb
11
+ lib/massive_record/migration.rb
12
+ lib/massive_record/row.rb
13
+ lib/massive_record/scanner.rb
14
+ lib/massive_record/table.rb
15
+ lib/massive_record/tables_collection.rb
16
+ lib/massive_record/thrift/hbase.rb
17
+ lib/massive_record/thrift/hbase_constants.rb
18
+ lib/massive_record/thrift/hbase_types.rb
19
+ massive_record.gemspec
20
+ spec/README.md
21
+ spec/config.yml.example
22
+ spec/connection_spec.rb
23
+ spec/spec_helper.rb
24
+ spec/table_spec.rb
data/README.md ADDED
@@ -0,0 +1,225 @@
1
+ # Massive Record
2
+
3
+ Massive Record is an ORM for HBase. It currently uses
4
+ Thrift to communicate with HBase and will in time support
5
+ other forms for communicating with Hbase.
6
+
7
+
8
+ See introduction to HBase model architecture:
9
+ http://wiki.apache.org/hadoop/Hbase/HbaseArchitecture
10
+ Understanding terminology of Table / Row / Column family / Column / Cell:
11
+ http://jimbojw.com/wiki/index.php?title=Understanding_Hbase_and_BigTable
12
+
13
+
14
+ ## Installation
15
+
16
+ gem install massive_record
17
+
18
+ ### Ruby on Rails
19
+
20
+ Add the following Gems in your Gemfile:
21
+
22
+ gem 'massive_record'
23
+
24
+ Create an config/hbase.yml file with the following content:
25
+
26
+ defaults: &defaults
27
+ host: somewhere.compute.amazonaws.com # No 'http', it's a Thrift connection
28
+ port: 9090
29
+
30
+ development:
31
+ <<: *defaults
32
+
33
+ test:
34
+ <<: *defaults
35
+
36
+ production:
37
+ <<: *defaults
38
+
39
+
40
+ ## Usage
41
+
42
+ There are two ways for using the Massive Record library. At the highest level we have ORM. This is Active Model compliant and makes
43
+ it easy to use. The second way of doing things is working directly against the wrapper (soon to be called adapter).
44
+
45
+
46
+ ### ORM
47
+
48
+ Both MassiveRecord::ORM::Table and MassiveRecord::ORM::Column do now have some functionality which you can expect from an ORM. This includes:
49
+ - An initializer which takes attribute hash and assigns them to your object.
50
+ - Write and read methods for the attributes
51
+ - Validations, as you expect from an ActiveRecord.
52
+ - Callbacks, as you expect from an ActiveRecord.
53
+ - Information about changes on attributes.
54
+ - Casting of attributes
55
+ - Serialization of array / hashes
56
+
57
+ Tables also have:
58
+ - Persistencey method calls like create, save and destroy (but they do not actually save things to hbase)
59
+ - Easy access to adapter's connection via Person.connection
60
+ - Easy access to adapter's hbase table via Person.table
61
+ - Finder method, like Person.find("an_id"), Person.find("id1", "id2"), Person.all etc
62
+ - Save / update methods
63
+ - Auto-creation of table and column families on save if table does not exists.
64
+ - Destroy records
65
+
66
+
67
+ Here is an example of usage, both for Table and Column:
68
+
69
+ class Person < MassiveRecord::ORM::Table
70
+ column_family :info do
71
+ field :name
72
+ field :email
73
+ field :phone_number
74
+ field :points, :integer, :default => 0
75
+ field :date_of_birth, :date
76
+ field :newsletter, :boolean, :default => false
77
+ end
78
+
79
+ validates_presence_of :name, :email
80
+ validates_format_of :email, :with => /\A([^@\s]+)@((?:[-a-z0-9]+\.)+[a-z]{2,})\Z/i
81
+ end
82
+
83
+
84
+
85
+ class Address < MassiveRecord::ORM::Column
86
+ field :street
87
+ field :number, :integer
88
+ field :nice_place, :boolean, :default => true
89
+ end
90
+
91
+
92
+ ## Wrapper (adapter) API
93
+
94
+ You can, if you'd like, work directly against the adapter.
95
+
96
+ # Init a new connection with HBase
97
+ conn = MassiveRecord::Wrapper::Connection.new(:host => 'localhost', :port => 9090)
98
+ conn.open
99
+
100
+ # OR init a connection using the config/hbase.yml file with Rails
101
+ conn = MassiveRecord::Wrapper::Base.connection
102
+
103
+ # Fetch tables name
104
+ conn.tables # => ["companies", "news", "webpages"]
105
+
106
+ # Init a table
107
+ table = MassiveRecord::Wrapper::Table.new(conn, :people)
108
+
109
+ # Add a column family
110
+ column = MassiveRecord::Wrapper::ColumnFamily.new(:info)
111
+ table.column_families.push(column)
112
+
113
+ # Or bulk add column families
114
+ table.create_column_families([:friends, :misc])
115
+
116
+ # Create the table
117
+ table.save # will raise an exception if the table already exists
118
+
119
+ # Fetch column families from the database
120
+ table.fetch_column_families # => [ColumnFamily#RTY4424, ColumnFamily#R475424, ColumnFamily#GHJ9424]
121
+ table.column_families.collect(&:name) # => ["info", "friends", "misc"]
122
+
123
+ # Add a new row
124
+ row = MassiveRecord::Wrapper::Row.new
125
+ row.id = "my_unique_id"
126
+ row.values = { :info => { :first_name => "H", :last_name => "Base", :email => "h@base.com" } }
127
+ row.table = table
128
+ row.save
129
+
130
+ # Fetch rows
131
+ table.first # => MassiveRecord#ID1
132
+ table.all(:limit => 10) # => [MassiveRecord#ID1, MassiveRecord#ID2, ...]
133
+ table.find("ID2") # => MassiveRecord#ID2
134
+ table.find(["ID1", "ID2"]) # => [MassiveRecord#ID1, MassiveRecord#ID2]
135
+ table.all(:limit => 3, :start => "ID2") # => [MassiveRecord#ID2, MassiveRecord#ID3, MassiveRecord#ID4]
136
+
137
+ # Manipulate rows
138
+ table.first.destroy # => true
139
+
140
+ # Remove the table
141
+ table.destroy
142
+
143
+
144
+
145
+ ## Planned work
146
+
147
+ - Rename Wrapper to Adapter, and make it easy to switch from Thrift to another way of communicating with Hbase.
148
+ - Automatically handling time stamps like created_at and updated_at.
149
+ - Associations and embedded objects.
150
+ - Implement other Adapters, for instance using jruby and the Java API.
151
+
152
+
153
+
154
+ ## Contribute
155
+
156
+ If you want to contribute feel free to fork this project :-)
157
+ Make a feature branch, write test, implement and make a pull request.
158
+
159
+ ### Getting started
160
+
161
+ git clone git://github.com/CompanyBook/massive_record.git (or the address to your fork)
162
+ cd massive_record
163
+ bundle install
164
+
165
+ Next up you need to add a config.yml file inside of spec/ which contains something like:
166
+ host: url.to-a.thrift.server
167
+ port: 9090
168
+ table: massive_record_test_table
169
+
170
+ You should now be able to run `rspec spec/`
171
+
172
+ ### Play with it in the console
173
+
174
+ Checkout the massive_record project and install it as a Gem :
175
+
176
+ cd massive_record/
177
+ bundle console
178
+ ruby-1.9.2-p0 > Bundler.require
179
+ => [
180
+ <Bundler::Dependency type=:runtime name="massive_record" requirements=">= 0">,
181
+ <Bundler::Dependency type=:runtime name="thrift" requirements=">= 0.5.0">,
182
+ <Bundler::Dependency type=:runtime name="activesupport" requirements=">= 0">,
183
+ <Bundler::Dependency type=:runtime name="activemodel" requirements=">= 0">,
184
+ <Bundler::Dependency type=:runtime name="rspec" requirements=">= 2.1.0">
185
+ ]
186
+ ruby-1.9.2-p0 > MassiveRecord::VERSION
187
+ => "0.0.1"
188
+
189
+ ### Clean HBase database between each test
190
+
191
+ We have created a helper module MassiveRecord::Rspec::SimpleDatabaseCleaner which, when included into rspec tests, will clean
192
+ the database for ORM records between each test case. You can also take a look into spec/support/mock_massive_record_connection.rb
193
+ for some functionality which will mock a hbase connection making it easier (faster) to test code where no real database is needed.
194
+
195
+
196
+
197
+
198
+ ## More Information and Resources
199
+
200
+ ### Thrift API
201
+
202
+ Ruby Library using the HBase Thrift API.
203
+ http://wiki.apache.org/hadoop/Hbase/ThriftApi
204
+
205
+ The generated Ruby files can be found under lib/massive_record/thrift/
206
+ The whole API (CRUD and more) is present in the Client object (Apache::Hadoop::Hbase::Thrift::Hbase::Client).
207
+ The client can be easily initialized using the MassiveRecord connection :
208
+
209
+ conn = MassiveRecord::Wrapper::Connection.new(:host => 'localhost', :port => 9090)
210
+ conn.open
211
+
212
+ client = conn.client
213
+ # Do whatever you want with the client object
214
+
215
+ ### Q&A
216
+
217
+ How to add a new column family to an existing table?
218
+
219
+ # Connect to the HBase console on the server itself and enter the following code :
220
+ disable 'companies'
221
+ alter 'companies', { NAME => 'new_collumn_familiy' }
222
+ enable 'companies'
223
+
224
+
225
+ Copyright (c) 2011 Companybook, released under the MIT license
data/Rakefile ADDED
@@ -0,0 +1,16 @@
1
+ $:.push File.expand_path("../lib", __FILE__)
2
+ require 'bundler'
3
+ require "massive_record/version"
4
+
5
+ Bundler::GemHelper.install_tasks
6
+
7
+ require "rspec/core/rake_task"
8
+ RSpec::Core::RakeTask.new(:spec)
9
+
10
+ require 'rake/rdoctask'
11
+ Rake::RDocTask.new do |rdoc|
12
+ rdoc.rdoc_dir = 'rdoc'
13
+ rdoc.title = "MassiveRecord #{MassiveRecord::VERSION}"
14
+ rdoc.rdoc_files.include('README*')
15
+ rdoc.rdoc_files.include('lib/**/*.rb')
16
+ end
data/TODO.md ADDED
@@ -0,0 +1,8 @@
1
+ # TODO
2
+
3
+ * code documentation
4
+ * Rails ORM (ActiveModel etc.)
5
+ * write tests
6
+ * add generators for hbase.yml etc.
7
+ * add migration tools
8
+ * ...
@@ -0,0 +1 @@
1
+ Autotest.add_discovery { "rspec2" }
@@ -0,0 +1,18 @@
1
+ # Thrift Gems
2
+ require 'thrift'
3
+ require 'thrift/transport/socket'
4
+ require 'thrift/protocol/binary_protocol'
5
+
6
+ # Exceptions
7
+ require 'massive_record/exceptions'
8
+
9
+ # Generated Ruby classes from Thrift for HBase
10
+ require 'massive_record/thrift/hbase_constants'
11
+ require 'massive_record/thrift/hbase_types'
12
+ require 'massive_record/thrift/hbase'
13
+
14
+ # Wrapper
15
+ require 'massive_record/wrapper/base'
16
+
17
+ # ORM
18
+ require 'massive_record/orm/base'
@@ -0,0 +1,11 @@
1
+ module MassiveRecord
2
+
3
+ class ConnectionException < Exception
4
+
5
+ def initialize
6
+
7
+ end
8
+
9
+ end
10
+
11
+ end
@@ -0,0 +1,61 @@
1
+ module MassiveRecord
2
+ module ORM
3
+ module AttributeMethods
4
+ extend ActiveSupport::Concern
5
+ include ActiveModel::AttributeMethods
6
+
7
+ module ClassMethods
8
+ def define_attribute_methods
9
+ super(known_attribute_names)
10
+ end
11
+ end
12
+
13
+
14
+ def attributes
15
+ @attributes ||= {}
16
+ end
17
+
18
+ def attributes=(new_attributes)
19
+ return unless new_attributes.is_a?(Hash)
20
+
21
+ new_attributes.each do |attr, value|
22
+ writer_method = "#{attr}="
23
+ if respond_to? writer_method
24
+ send(writer_method, value)
25
+ else
26
+ raise UnkownAttributeError.new("Unkown attribute: #{attr}")
27
+ end
28
+ end
29
+ end
30
+
31
+
32
+ def method_missing(method, *args, &block)
33
+ unless self.class.attribute_methods_generated?
34
+ self.class.define_attribute_methods
35
+ send(method, *args, &block)
36
+ else
37
+ super
38
+ end
39
+ end
40
+
41
+ def respond_to?(*args)
42
+ self.class.define_attribute_methods unless self.class.attribute_methods_generated?
43
+ super
44
+ end
45
+
46
+ private
47
+
48
+ def attributes_raw=(new_attributes)
49
+ return unless new_attributes.is_a?(Hash)
50
+ attributes = new_attributes.stringify_keys
51
+ @attributes = {'id' => nil}.merge(attributes)
52
+ end
53
+
54
+ def attributes_from_field_definition
55
+ attributes = {'id' => nil}
56
+ attributes.merge! self.class.default_attributes_from_schema if self.class.respond_to? :default_attributes_from_schema
57
+ attributes
58
+ end
59
+ end
60
+ end
61
+ end