cassandra_datum 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,52 @@
1
+ .idea
2
+ atlassian*
3
+
4
+ # rcov generated
5
+ coverage
6
+ coverage.data
7
+
8
+ # rdoc generated
9
+ rdoc
10
+
11
+ # yard generated
12
+ doc
13
+ .yardoc
14
+
15
+ # bundler
16
+ .bundle
17
+
18
+ # jeweler generated
19
+ pkg
20
+
21
+ # Have editor/IDE/OS specific files you need to ignore? Consider using a global gitignore:
22
+ #
23
+ # * Create a file at ~/.gitignore
24
+ # * Include files you want ignored
25
+ # * Run: git config --global core.excludesfile ~/.gitignore
26
+ #
27
+ # After doing this, these files will be ignored in all your git projects,
28
+ # saving you from having to 'pollute' every project you touch with them
29
+ #
30
+ # Not sure what to needs to be ignored for particular editors/OSes? Here's some ideas to get you started. (Remember, remove the leading # of the line)
31
+ #
32
+ # For MacOS:
33
+ #
34
+ #.DS_Store
35
+
36
+ # For TextMate
37
+ #*.tmproj
38
+ #tmtags
39
+
40
+ # For emacs:
41
+ #*~
42
+ #\#*
43
+ #.\#*
44
+
45
+ # For vim:
46
+ #*.swp
47
+
48
+ # For redcar:
49
+ #.redcar
50
+
51
+ # For rubinius:
52
+ #*.rbc
data/Gemfile ADDED
@@ -0,0 +1,20 @@
1
+ source "http://rubygems.org"
2
+
3
+ gem 'activesupport', '~> 3'
4
+ gem 'activemodel', '~> 3'
5
+ gem 'activerecord', '~> 3'
6
+ gem 'exception_helper'
7
+
8
+ gem "cassandra", :git => "http://github.com/backupify/cassandra.git"
9
+ gem 'active_attr', :git => "http://github.com/backupify/active_attr.git"
10
+
11
+ # Add dependencies to develop your gem here.
12
+ # Include everything needed to run rake, tests, features, etc.
13
+ group :development do
14
+ gem "shoulda"
15
+ gem 'factory_girl'
16
+ gem "rdoc"
17
+ gem "bundler"
18
+ gem "jeweler"
19
+ gem "thin"
20
+ end
@@ -0,0 +1,113 @@
1
+ GIT
2
+ remote: http://github.com/backupify/active_attr.git
3
+ revision: 69951a36e62bc348b6d2c86ce50c1251ad709e34
4
+ specs:
5
+ active_attr (0.8.2)
6
+ activemodel (>= 3.0.2, < 4.1)
7
+ activesupport (>= 3.0.2, < 4.1)
8
+
9
+ GIT
10
+ remote: http://github.com/backupify/cassandra.git
11
+ revision: e66c55ebb61422db96546f2052c5701f6e8e5343
12
+ specs:
13
+ cassandra (0.12.2)
14
+ json
15
+ rake
16
+ simple_uuid (~> 0.2.0)
17
+ thrift_client (>= 0.7.0)
18
+
19
+ GEM
20
+ remote: http://rubygems.org/
21
+ specs:
22
+ activemodel (3.2.14)
23
+ activesupport (= 3.2.14)
24
+ builder (~> 3.0.0)
25
+ activerecord (3.2.14)
26
+ activemodel (= 3.2.14)
27
+ activesupport (= 3.2.14)
28
+ arel (~> 3.0.2)
29
+ tzinfo (~> 0.3.29)
30
+ activesupport (3.2.14)
31
+ i18n (~> 0.6, >= 0.6.4)
32
+ multi_json (~> 1.0)
33
+ addressable (2.3.5)
34
+ arel (3.0.2)
35
+ builder (3.0.4)
36
+ daemons (1.1.9)
37
+ eventmachine (1.0.3)
38
+ exception_helper (0.1.2)
39
+ factory_girl (4.2.0)
40
+ activesupport (>= 3.0.0)
41
+ faraday (0.8.8)
42
+ multipart-post (~> 1.2.0)
43
+ git (1.2.6)
44
+ github_api (0.10.1)
45
+ addressable
46
+ faraday (~> 0.8.1)
47
+ hashie (>= 1.2)
48
+ multi_json (~> 1.4)
49
+ nokogiri (~> 1.5.2)
50
+ oauth2
51
+ hashie (2.0.5)
52
+ highline (1.6.19)
53
+ httpauth (0.2.0)
54
+ i18n (0.6.5)
55
+ jeweler (1.8.7)
56
+ builder
57
+ bundler (~> 1.0)
58
+ git (>= 1.2.5)
59
+ github_api (= 0.10.1)
60
+ highline (>= 1.6.15)
61
+ nokogiri (= 1.5.10)
62
+ rake
63
+ rdoc
64
+ json (1.8.0)
65
+ jwt (0.1.8)
66
+ multi_json (>= 1.5)
67
+ multi_json (1.7.9)
68
+ multi_xml (0.5.5)
69
+ multipart-post (1.2.0)
70
+ nokogiri (1.5.10)
71
+ oauth2 (0.9.2)
72
+ faraday (~> 0.8)
73
+ httpauth (~> 0.2)
74
+ jwt (~> 0.1.4)
75
+ multi_json (~> 1.0)
76
+ multi_xml (~> 0.5)
77
+ rack (~> 1.2)
78
+ rack (1.5.2)
79
+ rake (10.1.0)
80
+ rdoc (4.0.1)
81
+ json (~> 1.4)
82
+ shoulda (3.5.0)
83
+ shoulda-context (~> 1.0, >= 1.0.1)
84
+ shoulda-matchers (>= 1.4.1, < 3.0)
85
+ shoulda-context (1.1.5)
86
+ shoulda-matchers (2.3.0)
87
+ activesupport (>= 3.0.0)
88
+ simple_uuid (0.2.0)
89
+ thin (1.5.1)
90
+ daemons (>= 1.0.9)
91
+ eventmachine (>= 0.12.6)
92
+ rack (>= 1.0.0)
93
+ thrift (0.9.1)
94
+ thrift_client (0.9.2)
95
+ thrift (~> 0.9.0)
96
+ tzinfo (0.3.37)
97
+
98
+ PLATFORMS
99
+ ruby
100
+
101
+ DEPENDENCIES
102
+ active_attr!
103
+ activemodel (~> 3)
104
+ activerecord (~> 3)
105
+ activesupport (~> 3)
106
+ bundler
107
+ cassandra!
108
+ exception_helper
109
+ factory_girl
110
+ jeweler
111
+ rdoc
112
+ shoulda
113
+ thin
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2012 Jason Haruska
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,14 @@
1
+ = cassandra_datum
2
+
3
+ == Test setup
4
+
5
+ Do this in cassandra-cli:
6
+
7
+ use BackupifyMetadata_test; # create it first if it doesn't exist
8
+ create column family MockCassandraData with column_type='Super' and comparator='com.backupify.db.DatumType' and subcomparator='UTF8Type';
9
+
10
+ == Copyright
11
+
12
+ Copyright (c) 2012 Jason Haruska. See LICENSE.txt for
13
+ further details.
14
+
@@ -0,0 +1,24 @@
1
+ # encoding: utf-8
2
+
3
+ require 'bundler'
4
+ require "bundler/gem_tasks"
5
+ require 'rake'
6
+ require 'rake/testtask'
7
+
8
+ begin
9
+ Bundler.setup(:default, :development)
10
+ rescue Bundler::BundlerError => e
11
+ $stderr.puts e.message
12
+ $stderr.puts "Run `bundle install` to install missing gems"
13
+ exit e.status_code
14
+ end
15
+
16
+ require 'rake/testtask'
17
+ Rake::TestTask.new(:test) do |test|
18
+ test.libs << 'lib' << 'test'
19
+ test.pattern = 'test/**/*_test.rb'
20
+ test.verbose = true
21
+ end
22
+
23
+ task :default => :test
24
+
@@ -0,0 +1,32 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'cassandra_datum/version'
5
+
6
+
7
+ Gem::Specification.new do |gem|
8
+ gem.name = "cassandra_datum"
9
+ gem.version = CassandraDatum::VERSION
10
+ gem.authors = ["Jason Haruska"]
11
+ gem.email = ["jason@backupify.com"]
12
+ gem.description = "Cassandra backed ORM"
13
+ gem.summary = "An active record like object base that is backed by Cassandra"
14
+ gem.homepage = "http://github.com/backupify/cassandra_datum"
15
+ gem.license = "MIT"
16
+
17
+ gem.files = `git ls-files`.split($/)
18
+ gem.executables = gem.files.grep(%r{^bin/}) { |f| File.basename(f) }
19
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
20
+ gem.require_paths = ["lib"]
21
+
22
+ gem.add_runtime_dependency(%q<activesupport>, [">= 2.3.5"])
23
+ gem.add_runtime_dependency(%q<activemodel>, [">= 2.3.5"])
24
+ gem.add_runtime_dependency(%q<activerecord>, [">= 2.3.5"])
25
+ gem.add_runtime_dependency(%q<cassandra>, [">= 0"])
26
+ gem.add_runtime_dependency(%q<active_attr>, [">= 0"])
27
+ gem.add_runtime_dependency(%q<exception_helper>, [">= 0"])
28
+
29
+ gem.add_development_dependency(%q<shoulda>, [">= 0"])
30
+ gem.add_development_dependency(%q<factory_girl>, [">= 0"])
31
+ end
32
+
@@ -0,0 +1,19 @@
1
+ require 'cassandra_datum/version'
2
+ require 'cassandra_datum/base'
3
+ require 'cassandra_datum/railtie' if defined?(Rails)
4
+
5
+ module CassandraDatum
6
+
7
+ def self.configuration
8
+ @@configuration ||= begin
9
+ require 'erb'
10
+ env = defined?(Rails) ? Rails.env : 'development'
11
+ config_file = ENV['CONFIG_FILE'] || (defined?(Rails) ? "#{Rails.root}/config/cassandra.yml" : nil)
12
+ fail "No CONFIG_FILE or Rails.root defined" unless config_file
13
+ config_file = File.expand_path(config_file)
14
+ config = YAML::load(ERB.new(IO.read(config_file)).result)
15
+ config[env]
16
+ end
17
+ end
18
+
19
+ end
@@ -0,0 +1,272 @@
1
+ require 'active_attr/model'
2
+ require 'active_model/observing'
3
+ require 'active_model/callbacks'
4
+ require 'exception_helper/retry'
5
+ require 'active_record/errors'
6
+ require 'active_record/validations'
7
+ require 'cassandra'
8
+
9
+ module CassandraDatum
10
+ class Base
11
+ include ActiveAttr::Model
12
+
13
+ include ActiveModel::Observing
14
+ extend ActiveModel::Callbacks
15
+
16
+ include ExceptionHelper::Retry
17
+
18
+ define_model_callbacks :save
19
+ define_model_callbacks :destroy
20
+
21
+ attr_reader :updated_at
22
+
23
+ FIRST_KEY = ''
24
+ LAST_KEY = 'a~0'
25
+ DEFAULT_ALL_COUNT = 50
26
+ DEFAULT_WALK_ROW_COUNT = 1000
27
+ SINGLETON = 1
28
+
29
+ before_save :populate_type_if_exists
30
+
31
+ def initialize_with_updated_at(*attr)
32
+ # the OrderedHash returned by the cassandra client has a timestamps method which contains the write date of each column
33
+ if attr.size > 0 && attr.first.respond_to?(:timestamps)
34
+ timestamp_in_microseconds = attr.first.timestamps.values.max
35
+ @updated_at = Time.at(timestamp_in_microseconds / 1000000, timestamp_in_microseconds % 1000000).to_datetime
36
+ end
37
+
38
+ initialize_without_updated_at(*attr)
39
+ end
40
+
41
+ def initialize_with_utf8_encoding(*attr)
42
+ if attr.size > 0 && attr.first.is_a?(Hash)
43
+ #careful not to trounce timestamps in Cassandra::OrderedHash
44
+ timestamps = attr.first.is_a?(Cassandra::OrderedHash) ? attr.first.timestamps : nil
45
+ attr.first.each { |k, v| attr.first[k] = "#{v}".force_encoding('UTF-8') unless v.blank? }
46
+ attr.first.instance_variable_set(:@timestamps, timestamps) if timestamps.present?
47
+ end
48
+
49
+ initialize_without_utf8_encoding(*attr)
50
+ end
51
+
52
+ alias_method_chain :initialize, :updated_at
53
+ alias_method_chain :initialize, :utf8_encoding
54
+
55
+ def self.create(*attr)
56
+ new(*attr).tap(&:save!)
57
+ end
58
+
59
+ @@column_family = nil
60
+
61
+ def self.column_family(*name)
62
+ if name.present?
63
+ @@column_family = name.first
64
+ else
65
+ @@column_family || model_name.plural.camelize
66
+ end
67
+ end
68
+
69
+ #key can be from to_param as well as just the key. This function handles both
70
+ def self.find(key)
71
+ row_id, column_name = Base64.decode64(key.tr('-_', '+/')).split(':', 2)
72
+
73
+ res = cassandra_client.get(column_family, row_id, column_name)
74
+
75
+ raise ActiveRecord::RecordNotFound.new if res.blank?
76
+
77
+ initialize_datum res
78
+ end
79
+
80
+ def self.find_by_key(key)
81
+ find(key)
82
+ rescue ActiveRecord::RecordNotFound
83
+ nil
84
+ end
85
+
86
+ #will always return data in reverse chronological order
87
+ # @option[row_id] the row_id to paginate through (optional if passing in a before_id or after_id)
88
+ # @option[before_id] return a page of data that occurs before this key (exclusive)
89
+ # @option[after_id] return a page of data that occurs after this key (exclusive)
90
+ # @option[count] limit the number of data returned (default 50)
91
+ def self.all(options={})
92
+ options.symbolize_keys! if options.respond_to?(:symbolize_keys!)
93
+
94
+ cass_options = {}
95
+ cass_options[:count] = (options[:count] || DEFAULT_ALL_COUNT).to_i
96
+
97
+ if options[:before_id]
98
+ row_id, cass_options[:start] = Base64.decode64(options[:before_id].tr('-_', '+/')).split(':', 2)
99
+ cass_options[:reversed] = true
100
+ cass_options[:count] += 1
101
+ elsif options[:after_id]
102
+ row_id, cass_options[:start] = Base64.decode64(options[:after_id].tr('-_', '+/')).split(':', 2)
103
+ cass_options[:count] += 1
104
+ elsif options[:row_id]
105
+ row_id = options[:row_id].to_s
106
+ end
107
+
108
+ result = cassandra_client.get(column_family, row_id, cass_options).collect do |k, v|
109
+ initialize_datum v
110
+ end
111
+
112
+ if options[:before_id]
113
+ result.delete_at(0) if result.size > 0 && result[0].key == options[:before_id]
114
+ result.reverse!
115
+ elsif options[:after_id]
116
+ result.delete_at(0) if result.size > 0 && result[0].key == options[:after_id]
117
+ end
118
+
119
+ result
120
+ end
121
+
122
+ # don't overuse this. it crawls an entire row
123
+ def self.find_each(row_id, options = {})
124
+ walk_row(row_id, options) do |k, v|
125
+ yield initialize_datum(v)
126
+ end
127
+ end
128
+
129
+ # don't overuse this. it crawls an entire row
130
+ def self.find_each_key(row_id, options = {})
131
+ walk_row(row_id, options) { |k, v| yield Base64.strict_encode64([row_id, k].join(':')).tr('+/', '-_') }
132
+ end
133
+
134
+ def row_id
135
+ SINGLETON.to_s
136
+ end
137
+
138
+ def document_id
139
+ SINGLETON
140
+ end
141
+
142
+ def timestamp
143
+ Time.at(SINGLETON).to_datetime
144
+ end
145
+
146
+ def column_name
147
+ encode_for_cassandra("#{document_id}~#{timestamp.to_i}")
148
+ end
149
+
150
+ def key
151
+ Base64.strict_encode64([row_id, column_name].join(':')).tr('+/', '-_')
152
+ end
153
+
154
+ def encode_for_cassandra(str, opts = {})
155
+ CassandraDatum::Base.encode_for_cassandra(str, opts)
156
+ end
157
+
158
+ def self.encode_for_cassandra(str, opts = {})
159
+ encode_opts = {
160
+ :invalid => :replace,
161
+ :undef => :replace,
162
+ :replace => ''
163
+ }.merge(opts)
164
+
165
+ str.encode('UTF-8', encode_opts).force_encoding('ASCII-8BIT')
166
+ end
167
+
168
+
169
+ def to_param
170
+ self.key
171
+ end
172
+
173
+ def save
174
+ save!
175
+ rescue Exception => e
176
+ false
177
+ end
178
+
179
+ def save!
180
+ _run_save_callbacks do
181
+ attrs = {}
182
+
183
+ attributes.reject { |k, v| v.nil? }.each do |k, v|
184
+ attrs[k] = [Array, Hash].any?{ |collection_class| v.is_a?(collection_class) } ? v.to_json : "#{v}"
185
+ attrs[k] = encode_for_cassandra(attrs[k])
186
+ end
187
+
188
+ raise ActiveRecord::RecordInvalid.new(self) unless self.valid?
189
+
190
+ self.class.cassandra_client.insert(self.class.column_family, self.row_id, {self.column_name => attrs})
191
+
192
+ # this value might be a tad different from the value in cassandra. the only way to get the true updated_at value is to reload the datum
193
+ @updated_at = DateTime.now
194
+ end
195
+ end
196
+
197
+ def reload
198
+ self.class.find(self.key)
199
+ end
200
+
201
+ def destroy
202
+ _run_destroy_callbacks { self.delete }
203
+ end
204
+
205
+ def self.delete_all(row_id)
206
+ cassandra_client.remove column_family, row_id
207
+ end
208
+
209
+ def self.delete(row_id, *column_names)
210
+ column_names.flatten.each { |column_name| CASSANDRA_CLIENT.remove column_family, row_id, column_name }
211
+ end
212
+
213
+ def delete
214
+ self.class.delete(self.row_id, self.column_name)
215
+ end
216
+
217
+ def self.cassandra_client
218
+ if defined?(::CASSANDRA_CLIENT)
219
+ ::CASSANDRA_CLIENT
220
+ else
221
+ logger.error("No cassandra client defined. Please set CASSANDRA_CLIENT")
222
+ nil
223
+ end
224
+ end
225
+
226
+
227
+ def as_json(options={})
228
+ options = {:only => self.class.accessor_names}.merge(options)
229
+ self.include_root_in_json = false
230
+ super(options)
231
+ end
232
+
233
+ def new_record?
234
+ self.updated_at.blank?
235
+ end
236
+
237
+ protected
238
+
239
+ # don't overuse this. it crawls an entire row
240
+ def self.walk_row(row_id, options = {})
241
+ options = {:count => DEFAULT_WALK_ROW_COUNT}.merge(options)
242
+
243
+ start = options[:start] || (options[:reversed] ? LAST_KEY : FIRST_KEY)
244
+ last_start = nil
245
+
246
+ loop do
247
+ retry_on_failure(::Thrift::Exception, :retry_count => 5, :retry_sleep => 10) do
248
+ last_start = start
249
+
250
+ res = cassandra_client.get(column_family, row_id, options.merge(:start => start))
251
+
252
+ res.each do |k, v|
253
+ next if k == last_start # ignore the first result we get back. since start is the last record in the previous get, it'll always be off by 1
254
+ start = k
255
+ yield [k, v]
256
+ end
257
+ end
258
+
259
+ break if last_start == start
260
+ end
261
+ end
262
+
263
+ def populate_type_if_exists
264
+ self.type = self.class.name if self.respond_to?(:type=)
265
+ end
266
+
267
+ def self.initialize_datum(res)
268
+ datum_class = res['type'].present? ? res['type'].constantize : self
269
+ datum_class.new res
270
+ end
271
+ end
272
+ end