cassandra_datum 0.0.9

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,52 @@
1
+ .idea
2
+ atlassian*
3
+
4
+ # rcov generated
5
+ coverage
6
+ coverage.data
7
+
8
+ # rdoc generated
9
+ rdoc
10
+
11
+ # yard generated
12
+ doc
13
+ .yardoc
14
+
15
+ # bundler
16
+ .bundle
17
+
18
+ # jeweler generated
19
+ pkg
20
+
21
+ # Have editor/IDE/OS specific files you need to ignore? Consider using a global gitignore:
22
+ #
23
+ # * Create a file at ~/.gitignore
24
+ # * Include files you want ignored
25
+ # * Run: git config --global core.excludesfile ~/.gitignore
26
+ #
27
+ # After doing this, these files will be ignored in all your git projects,
28
+ # saving you from having to 'pollute' every project you touch with them
29
+ #
30
+ # Not sure what to needs to be ignored for particular editors/OSes? Here's some ideas to get you started. (Remember, remove the leading # of the line)
31
+ #
32
+ # For MacOS:
33
+ #
34
+ #.DS_Store
35
+
36
+ # For TextMate
37
+ #*.tmproj
38
+ #tmtags
39
+
40
+ # For emacs:
41
+ #*~
42
+ #\#*
43
+ #.\#*
44
+
45
+ # For vim:
46
+ #*.swp
47
+
48
+ # For redcar:
49
+ #.redcar
50
+
51
+ # For rubinius:
52
+ #*.rbc
data/Gemfile ADDED
@@ -0,0 +1,20 @@
1
+ source "http://rubygems.org"
2
+
3
+ gem 'activesupport', '~> 3'
4
+ gem 'activemodel', '~> 3'
5
+ gem 'activerecord', '~> 3'
6
+ gem 'exception_helper'
7
+
8
+ gem "cassandra", :git => "http://github.com/backupify/cassandra.git"
9
+ gem 'active_attr', :git => "http://github.com/backupify/active_attr.git"
10
+
11
+ # Add dependencies to develop your gem here.
12
+ # Include everything needed to run rake, tests, features, etc.
13
+ group :development do
14
+ gem "shoulda"
15
+ gem 'factory_girl'
16
+ gem "rdoc"
17
+ gem "bundler"
18
+ gem "jeweler"
19
+ gem "thin"
20
+ end
@@ -0,0 +1,113 @@
1
+ GIT
2
+ remote: http://github.com/backupify/active_attr.git
3
+ revision: 69951a36e62bc348b6d2c86ce50c1251ad709e34
4
+ specs:
5
+ active_attr (0.8.2)
6
+ activemodel (>= 3.0.2, < 4.1)
7
+ activesupport (>= 3.0.2, < 4.1)
8
+
9
+ GIT
10
+ remote: http://github.com/backupify/cassandra.git
11
+ revision: e66c55ebb61422db96546f2052c5701f6e8e5343
12
+ specs:
13
+ cassandra (0.12.2)
14
+ json
15
+ rake
16
+ simple_uuid (~> 0.2.0)
17
+ thrift_client (>= 0.7.0)
18
+
19
+ GEM
20
+ remote: http://rubygems.org/
21
+ specs:
22
+ activemodel (3.2.14)
23
+ activesupport (= 3.2.14)
24
+ builder (~> 3.0.0)
25
+ activerecord (3.2.14)
26
+ activemodel (= 3.2.14)
27
+ activesupport (= 3.2.14)
28
+ arel (~> 3.0.2)
29
+ tzinfo (~> 0.3.29)
30
+ activesupport (3.2.14)
31
+ i18n (~> 0.6, >= 0.6.4)
32
+ multi_json (~> 1.0)
33
+ addressable (2.3.5)
34
+ arel (3.0.2)
35
+ builder (3.0.4)
36
+ daemons (1.1.9)
37
+ eventmachine (1.0.3)
38
+ exception_helper (0.1.2)
39
+ factory_girl (4.2.0)
40
+ activesupport (>= 3.0.0)
41
+ faraday (0.8.8)
42
+ multipart-post (~> 1.2.0)
43
+ git (1.2.6)
44
+ github_api (0.10.1)
45
+ addressable
46
+ faraday (~> 0.8.1)
47
+ hashie (>= 1.2)
48
+ multi_json (~> 1.4)
49
+ nokogiri (~> 1.5.2)
50
+ oauth2
51
+ hashie (2.0.5)
52
+ highline (1.6.19)
53
+ httpauth (0.2.0)
54
+ i18n (0.6.5)
55
+ jeweler (1.8.7)
56
+ builder
57
+ bundler (~> 1.0)
58
+ git (>= 1.2.5)
59
+ github_api (= 0.10.1)
60
+ highline (>= 1.6.15)
61
+ nokogiri (= 1.5.10)
62
+ rake
63
+ rdoc
64
+ json (1.8.0)
65
+ jwt (0.1.8)
66
+ multi_json (>= 1.5)
67
+ multi_json (1.7.9)
68
+ multi_xml (0.5.5)
69
+ multipart-post (1.2.0)
70
+ nokogiri (1.5.10)
71
+ oauth2 (0.9.2)
72
+ faraday (~> 0.8)
73
+ httpauth (~> 0.2)
74
+ jwt (~> 0.1.4)
75
+ multi_json (~> 1.0)
76
+ multi_xml (~> 0.5)
77
+ rack (~> 1.2)
78
+ rack (1.5.2)
79
+ rake (10.1.0)
80
+ rdoc (4.0.1)
81
+ json (~> 1.4)
82
+ shoulda (3.5.0)
83
+ shoulda-context (~> 1.0, >= 1.0.1)
84
+ shoulda-matchers (>= 1.4.1, < 3.0)
85
+ shoulda-context (1.1.5)
86
+ shoulda-matchers (2.3.0)
87
+ activesupport (>= 3.0.0)
88
+ simple_uuid (0.2.0)
89
+ thin (1.5.1)
90
+ daemons (>= 1.0.9)
91
+ eventmachine (>= 0.12.6)
92
+ rack (>= 1.0.0)
93
+ thrift (0.9.1)
94
+ thrift_client (0.9.2)
95
+ thrift (~> 0.9.0)
96
+ tzinfo (0.3.37)
97
+
98
+ PLATFORMS
99
+ ruby
100
+
101
+ DEPENDENCIES
102
+ active_attr!
103
+ activemodel (~> 3)
104
+ activerecord (~> 3)
105
+ activesupport (~> 3)
106
+ bundler
107
+ cassandra!
108
+ exception_helper
109
+ factory_girl
110
+ jeweler
111
+ rdoc
112
+ shoulda
113
+ thin
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2012 Jason Haruska
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,14 @@
1
+ = cassandra_datum
2
+
3
+ == Test setup
4
+
5
+ Do this in cassandra-cli:
6
+
7
+ use BackupifyMetadata_test; # create it first if it doesn't exist
8
+ create column family MockCassandraData with column_type='Super' and comparator='com.backupify.db.DatumType' and subcomparator='UTF8Type';
9
+
10
+ == Copyright
11
+
12
+ Copyright (c) 2012 Jason Haruska. See LICENSE.txt for
13
+ further details.
14
+
@@ -0,0 +1,24 @@
1
+ # encoding: utf-8
2
+
3
+ require 'bundler'
4
+ require "bundler/gem_tasks"
5
+ require 'rake'
6
+ require 'rake/testtask'
7
+
8
+ begin
9
+ Bundler.setup(:default, :development)
10
+ rescue Bundler::BundlerError => e
11
+ $stderr.puts e.message
12
+ $stderr.puts "Run `bundle install` to install missing gems"
13
+ exit e.status_code
14
+ end
15
+
16
+ require 'rake/testtask'
17
+ Rake::TestTask.new(:test) do |test|
18
+ test.libs << 'lib' << 'test'
19
+ test.pattern = 'test/**/*_test.rb'
20
+ test.verbose = true
21
+ end
22
+
23
+ task :default => :test
24
+
@@ -0,0 +1,32 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'cassandra_datum/version'
5
+
6
+
7
+ Gem::Specification.new do |gem|
8
+ gem.name = "cassandra_datum"
9
+ gem.version = CassandraDatum::VERSION
10
+ gem.authors = ["Jason Haruska"]
11
+ gem.email = ["jason@backupify.com"]
12
+ gem.description = "Cassandra backed ORM"
13
+ gem.summary = "An active record like object base that is backed by Cassandra"
14
+ gem.homepage = "http://github.com/backupify/cassandra_datum"
15
+ gem.license = "MIT"
16
+
17
+ gem.files = `git ls-files`.split($/)
18
+ gem.executables = gem.files.grep(%r{^bin/}) { |f| File.basename(f) }
19
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
20
+ gem.require_paths = ["lib"]
21
+
22
+ gem.add_runtime_dependency(%q<activesupport>, [">= 2.3.5"])
23
+ gem.add_runtime_dependency(%q<activemodel>, [">= 2.3.5"])
24
+ gem.add_runtime_dependency(%q<activerecord>, [">= 2.3.5"])
25
+ gem.add_runtime_dependency(%q<cassandra>, [">= 0"])
26
+ gem.add_runtime_dependency(%q<active_attr>, [">= 0"])
27
+ gem.add_runtime_dependency(%q<exception_helper>, [">= 0"])
28
+
29
+ gem.add_development_dependency(%q<shoulda>, [">= 0"])
30
+ gem.add_development_dependency(%q<factory_girl>, [">= 0"])
31
+ end
32
+
@@ -0,0 +1,19 @@
1
+ require 'cassandra_datum/version'
2
+ require 'cassandra_datum/base'
3
+ require 'cassandra_datum/railtie' if defined?(Rails)
4
+
5
+ module CassandraDatum
6
+
7
+ def self.configuration
8
+ @@configuration ||= begin
9
+ require 'erb'
10
+ env = defined?(Rails) ? Rails.env : 'development'
11
+ config_file = ENV['CONFIG_FILE'] || (defined?(Rails) ? "#{Rails.root}/config/cassandra.yml" : nil)
12
+ fail "No CONFIG_FILE or Rails.root defined" unless config_file
13
+ config_file = File.expand_path(config_file)
14
+ config = YAML::load(ERB.new(IO.read(config_file)).result)
15
+ config[env]
16
+ end
17
+ end
18
+
19
+ end
@@ -0,0 +1,272 @@
1
+ require 'active_attr/model'
2
+ require 'active_model/observing'
3
+ require 'active_model/callbacks'
4
+ require 'exception_helper/retry'
5
+ require 'active_record/errors'
6
+ require 'active_record/validations'
7
+ require 'cassandra'
8
+
9
+ module CassandraDatum
10
+ class Base
11
+ include ActiveAttr::Model
12
+
13
+ include ActiveModel::Observing
14
+ extend ActiveModel::Callbacks
15
+
16
+ include ExceptionHelper::Retry
17
+
18
+ define_model_callbacks :save
19
+ define_model_callbacks :destroy
20
+
21
+ attr_reader :updated_at
22
+
23
+ FIRST_KEY = ''
24
+ LAST_KEY = 'a~0'
25
+ DEFAULT_ALL_COUNT = 50
26
+ DEFAULT_WALK_ROW_COUNT = 1000
27
+ SINGLETON = 1
28
+
29
+ before_save :populate_type_if_exists
30
+
31
+ def initialize_with_updated_at(*attr)
32
+ # the OrderedHash returned by the cassandra client has a timestamps method which contains the write date of each column
33
+ if attr.size > 0 && attr.first.respond_to?(:timestamps)
34
+ timestamp_in_microseconds = attr.first.timestamps.values.max
35
+ @updated_at = Time.at(timestamp_in_microseconds / 1000000, timestamp_in_microseconds % 1000000).to_datetime
36
+ end
37
+
38
+ initialize_without_updated_at(*attr)
39
+ end
40
+
41
+ def initialize_with_utf8_encoding(*attr)
42
+ if attr.size > 0 && attr.first.is_a?(Hash)
43
+ #careful not to trounce timestamps in Cassandra::OrderedHash
44
+ timestamps = attr.first.is_a?(Cassandra::OrderedHash) ? attr.first.timestamps : nil
45
+ attr.first.each { |k, v| attr.first[k] = "#{v}".force_encoding('UTF-8') unless v.blank? }
46
+ attr.first.instance_variable_set(:@timestamps, timestamps) if timestamps.present?
47
+ end
48
+
49
+ initialize_without_utf8_encoding(*attr)
50
+ end
51
+
52
+ alias_method_chain :initialize, :updated_at
53
+ alias_method_chain :initialize, :utf8_encoding
54
+
55
+ def self.create(*attr)
56
+ new(*attr).tap(&:save!)
57
+ end
58
+
59
+ @@column_family = nil
60
+
61
+ def self.column_family(*name)
62
+ if name.present?
63
+ @@column_family = name.first
64
+ else
65
+ @@column_family || model_name.plural.camelize
66
+ end
67
+ end
68
+
69
+ #key can be from to_param as well as just the key. This function handles both
70
+ def self.find(key)
71
+ row_id, column_name = Base64.decode64(key.tr('-_', '+/')).split(':', 2)
72
+
73
+ res = cassandra_client.get(column_family, row_id, column_name)
74
+
75
+ raise ActiveRecord::RecordNotFound.new if res.blank?
76
+
77
+ initialize_datum res
78
+ end
79
+
80
+ def self.find_by_key(key)
81
+ find(key)
82
+ rescue ActiveRecord::RecordNotFound
83
+ nil
84
+ end
85
+
86
+ #will always return data in reverse chronological order
87
+ # @option[row_id] the row_id to paginate through (optional if passing in a before_id or after_id)
88
+ # @option[before_id] return a page of data that occurs before this key (exclusive)
89
+ # @option[after_id] return a page of data that occurs after this key (exclusive)
90
+ # @option[count] limit the number of data returned (default 50)
91
+ def self.all(options={})
92
+ options.symbolize_keys! if options.respond_to?(:symbolize_keys!)
93
+
94
+ cass_options = {}
95
+ cass_options[:count] = (options[:count] || DEFAULT_ALL_COUNT).to_i
96
+
97
+ if options[:before_id]
98
+ row_id, cass_options[:start] = Base64.decode64(options[:before_id].tr('-_', '+/')).split(':', 2)
99
+ cass_options[:reversed] = true
100
+ cass_options[:count] += 1
101
+ elsif options[:after_id]
102
+ row_id, cass_options[:start] = Base64.decode64(options[:after_id].tr('-_', '+/')).split(':', 2)
103
+ cass_options[:count] += 1
104
+ elsif options[:row_id]
105
+ row_id = options[:row_id].to_s
106
+ end
107
+
108
+ result = cassandra_client.get(column_family, row_id, cass_options).collect do |k, v|
109
+ initialize_datum v
110
+ end
111
+
112
+ if options[:before_id]
113
+ result.delete_at(0) if result.size > 0 && result[0].key == options[:before_id]
114
+ result.reverse!
115
+ elsif options[:after_id]
116
+ result.delete_at(0) if result.size > 0 && result[0].key == options[:after_id]
117
+ end
118
+
119
+ result
120
+ end
121
+
122
+ # don't overuse this. it crawls an entire row
123
+ def self.find_each(row_id, options = {})
124
+ walk_row(row_id, options) do |k, v|
125
+ yield initialize_datum(v)
126
+ end
127
+ end
128
+
129
+ # don't overuse this. it crawls an entire row
130
+ def self.find_each_key(row_id, options = {})
131
+ walk_row(row_id, options) { |k, v| yield Base64.strict_encode64([row_id, k].join(':')).tr('+/', '-_') }
132
+ end
133
+
134
+ def row_id
135
+ SINGLETON.to_s
136
+ end
137
+
138
+ def document_id
139
+ SINGLETON
140
+ end
141
+
142
+ def timestamp
143
+ Time.at(SINGLETON).to_datetime
144
+ end
145
+
146
+ def column_name
147
+ encode_for_cassandra("#{document_id}~#{timestamp.to_i}")
148
+ end
149
+
150
+ def key
151
+ Base64.strict_encode64([row_id, column_name].join(':')).tr('+/', '-_')
152
+ end
153
+
154
+ def encode_for_cassandra(str, opts = {})
155
+ CassandraDatum::Base.encode_for_cassandra(str, opts)
156
+ end
157
+
158
+ def self.encode_for_cassandra(str, opts = {})
159
+ encode_opts = {
160
+ :invalid => :replace,
161
+ :undef => :replace,
162
+ :replace => ''
163
+ }.merge(opts)
164
+
165
+ str.encode('UTF-8', encode_opts).force_encoding('ASCII-8BIT')
166
+ end
167
+
168
+
169
+ def to_param
170
+ self.key
171
+ end
172
+
173
+ def save
174
+ save!
175
+ rescue Exception => e
176
+ false
177
+ end
178
+
179
+ def save!
180
+ _run_save_callbacks do
181
+ attrs = {}
182
+
183
+ attributes.reject { |k, v| v.nil? }.each do |k, v|
184
+ attrs[k] = [Array, Hash].any?{ |collection_class| v.is_a?(collection_class) } ? v.to_json : "#{v}"
185
+ attrs[k] = encode_for_cassandra(attrs[k])
186
+ end
187
+
188
+ raise ActiveRecord::RecordInvalid.new(self) unless self.valid?
189
+
190
+ self.class.cassandra_client.insert(self.class.column_family, self.row_id, {self.column_name => attrs})
191
+
192
+ # this value might be a tad different from the value in cassandra. the only way to get the true updated_at value is to reload the datum
193
+ @updated_at = DateTime.now
194
+ end
195
+ end
196
+
197
+ def reload
198
+ self.class.find(self.key)
199
+ end
200
+
201
+ def destroy
202
+ _run_destroy_callbacks { self.delete }
203
+ end
204
+
205
+ def self.delete_all(row_id)
206
+ cassandra_client.remove column_family, row_id
207
+ end
208
+
209
+ def self.delete(row_id, *column_names)
210
+ column_names.flatten.each { |column_name| CASSANDRA_CLIENT.remove column_family, row_id, column_name }
211
+ end
212
+
213
+ def delete
214
+ self.class.delete(self.row_id, self.column_name)
215
+ end
216
+
217
+ def self.cassandra_client
218
+ if defined?(::CASSANDRA_CLIENT)
219
+ ::CASSANDRA_CLIENT
220
+ else
221
+ logger.error("No cassandra client defined. Please set CASSANDRA_CLIENT")
222
+ nil
223
+ end
224
+ end
225
+
226
+
227
+ def as_json(options={})
228
+ options = {:only => self.class.accessor_names}.merge(options)
229
+ self.include_root_in_json = false
230
+ super(options)
231
+ end
232
+
233
+ def new_record?
234
+ self.updated_at.blank?
235
+ end
236
+
237
+ protected
238
+
239
+ # don't overuse this. it crawls an entire row
240
+ def self.walk_row(row_id, options = {})
241
+ options = {:count => DEFAULT_WALK_ROW_COUNT}.merge(options)
242
+
243
+ start = options[:start] || (options[:reversed] ? LAST_KEY : FIRST_KEY)
244
+ last_start = nil
245
+
246
+ loop do
247
+ retry_on_failure(::Thrift::Exception, :retry_count => 5, :retry_sleep => 10) do
248
+ last_start = start
249
+
250
+ res = cassandra_client.get(column_family, row_id, options.merge(:start => start))
251
+
252
+ res.each do |k, v|
253
+ next if k == last_start # ignore the first result we get back. since start is the last record in the previous get, it'll always be off by 1
254
+ start = k
255
+ yield [k, v]
256
+ end
257
+ end
258
+
259
+ break if last_start == start
260
+ end
261
+ end
262
+
263
+ def populate_type_if_exists
264
+ self.type = self.class.name if self.respond_to?(:type=)
265
+ end
266
+
267
+ def self.initialize_datum(res)
268
+ datum_class = res['type'].present? ? res['type'].constantize : self
269
+ datum_class.new res
270
+ end
271
+ end
272
+ end