whi-cassie 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/HISTORY.txt +3 -0
- data/MIT-LICENSE +20 -0
- data/README.md +213 -0
- data/Rakefile +18 -0
- data/VERSION +1 -0
- data/lib/cassie/config.rb +62 -0
- data/lib/cassie/model.rb +483 -0
- data/lib/cassie/railtie.rb +20 -0
- data/lib/cassie/schema.rb +129 -0
- data/lib/cassie/testing.rb +46 -0
- data/lib/cassie.rb +317 -0
- data/lib/whi-cassie.rb +1 -0
- data/spec/cassie/config_spec.rb +56 -0
- data/spec/cassie/model_spec.rb +349 -0
- data/spec/cassie_spec.rb +147 -0
- data/spec/models/thing.rb +35 -0
- data/spec/models/type_tester.rb +23 -0
- data/spec/schema/test.cql +6 -0
- data/spec/spec_helper.rb +33 -0
- data/whi-cassie.gemspec +26 -0
- metadata +144 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: b5b36ebf78a28b63c80478d20c672bef7a411250
|
4
|
+
data.tar.gz: ffaeaaebd95342e98c91d04840934cfbd2540bb4
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 81b226510b09bfddf2f9fcd07910a97deecacb668964676ebc1393637807173c551e84a173706dee4994473858fd1a03ea00292601ed79712be514dcae1a75fd
|
7
|
+
data.tar.gz: 48a2d80f9737f34b6850be20eff36f179e81a751558b58e9dcff80b6a54627dd5874ec299ab095679689318fe67824b18afb9b2b94d751c46162d5fd2269fc82
|
data/.gitignore
ADDED
data/HISTORY.txt
ADDED
data/MIT-LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2015 WHI, Inc.
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,213 @@
|
|
1
|
+
# Cassie
|
2
|
+
|
3
|
+
The short and sweet Cassandra object mapper from [We Heart It](http://weheartit.com/)
|
4
|
+
|
5
|
+
## Usage
|
6
|
+
|
7
|
+
```ruby
|
8
|
+
class Thing
|
9
|
+
# Your model must include this
|
10
|
+
include Cassie::Model
|
11
|
+
|
12
|
+
# Set the table name where the data lives.
|
13
|
+
self.table_name = "things"
|
14
|
+
|
15
|
+
# Set the keyspace where the table lives. Keyspaces can be defined abstractly and mapped
|
16
|
+
# and mapped in a configuration file. This can allow you to have different keyspace names
|
17
|
+
# between different environments and still use the same code.
|
18
|
+
self.keyspace = "default"
|
19
|
+
|
20
|
+
# You must defind the primary key. They columns must be listed in the order that they apper
|
21
|
+
# in the Cassandra CQL PRIMARY KEY clause defining the table.
|
22
|
+
self.primary_key = [:owner, :id]
|
23
|
+
|
24
|
+
# All columns are explicitly defined with their name and data type and an optional
|
25
|
+
# alias name.
|
26
|
+
column :owner, :int
|
27
|
+
column :id, :int, :as => :identifier
|
28
|
+
column :val, :varchar, :as => :value
|
29
|
+
|
30
|
+
# The ordering keys should also be defined along with how they are ordered.
|
31
|
+
ordering_key :id, :desc
|
32
|
+
|
33
|
+
# You can use all the standard ActiveModel validations.
|
34
|
+
validates_presence_of :owner, :id
|
35
|
+
|
36
|
+
# You also get before and after callbacks for create, update, save, and destroy.
|
37
|
+
before_save :some_callback_method
|
38
|
+
|
39
|
+
...
|
40
|
+
end
|
41
|
+
|
42
|
+
Cassie.configure!(:host => "localhost")
|
43
|
+
|
44
|
+
Thing.create(:owner => 1, :identifier => 2, :value => "woot")
|
45
|
+
|
46
|
+
owner_records = Thing.find_all(where: {:owner => 1})
|
47
|
+
|
48
|
+
record = Thing.find(:owner => 1, :identifier => 2)
|
49
|
+
record.value = "woot"
|
50
|
+
record.save
|
51
|
+
|
52
|
+
record.destroy
|
53
|
+
```
|
54
|
+
|
55
|
+
## Features
|
56
|
+
|
57
|
+
To add the Cassie behaviors to your model you just need to incude `Cassie::Model` in your class.
|
58
|
+
|
59
|
+
### Configuration
|
60
|
+
|
61
|
+
You need to configure Cassie before using it. Cassie can only connect to a single Cassandra cluster. To configure Cassie you need to call the Cassie.configure! method with a hash. The options for the hash can be either strings or symbols.
|
62
|
+
|
63
|
+
The options should contain all the options to define your cluster connection (see http://datastax.github.io/ruby-driver/api/#cluster-class_method)
|
64
|
+
|
65
|
+
You can also pass the following options:
|
66
|
+
|
67
|
+
* :cluster - Options to connect to the cluster. See http://datastax.github.io/ruby-driver/api/#cluster-class_method
|
68
|
+
* :keyspaces - Map of abstract keyspace names to actual keyspace names. This is provided so that you can have handle to reference keyspaces where the actual keyspace names change from environment to environment.
|
69
|
+
* :default_keyspace - The default keyspace to be used for the connection. All tables will be assumed to exist in this keyspace. (optional)
|
70
|
+
* :max_prepared_statements - The limit of the number of prepared statements that will be saved on the client. (default 1000)
|
71
|
+
* :schema_directory - Path to a directory where the schmea files are kept. This value should only be set in development and testing environments. (optional)
|
72
|
+
|
73
|
+
### Explicitly defined data structure
|
74
|
+
|
75
|
+
Since all aspects of working with Cassandra tables are very tightly tied to their data structures, we make you explicitly define it in you Ruby objects. That way it's all there where the developer can see it and the code can enforce certain things if it needs to.
|
76
|
+
|
77
|
+
At a minimum you need to define the table, keyspace, primary key, and columns. For each column you need to define both the name and data type.
|
78
|
+
|
79
|
+
### ActiveModel validations and callbacks
|
80
|
+
|
81
|
+
You can use all the standard ActiveModel validation methods on your models. You can also define before or after callbacks for create, update, save, and destroy actions.
|
82
|
+
|
83
|
+
Note that one difference between Cassandra and other data stores is that data is only eventually consistent. Some subtle results of this:
|
84
|
+
|
85
|
+
1. You won't get an error if you try to create a record with the same primary key twice. Cassandra will simple use the second insert as an update statement.
|
86
|
+
2. If you perform any queries in your validation logic (e.g. to ensure a value is unique), you really need to use a high consistency level like quorum. If you use a low consistency level, there is a chance that your query can hit a node in the cluster that hasn't been replicated to and your validation could make decisions based on the wrong data. You can control the consistency level on most of the query methods or you can use `Cassie.consistency` to set a default consistency within a block.
|
87
|
+
|
88
|
+
### Prepared statements
|
89
|
+
|
90
|
+
For the best Cassandra performance you need to prepare all your CQL statements on the client. Cassie will handle doing that for you where possible.
|
91
|
+
|
92
|
+
Cassie will only prepare a statement if you call a method with value parameters. For instance, in the Cassie::Model#find_all method, you can pass the where clause as either a CQL string, a Hash of values, or an Array in the form [CQL, value, value, ...]. If you pass a CQL string, the statement will not be prepared. If you pass a Hash or an Array, the statement will be prepared and cached locally. If you do have a hard coded CQL string that you will execute multiple times, you can pass it to Cassie.prepare.
|
93
|
+
|
94
|
+
Examples:
|
95
|
+
|
96
|
+
```ruby
|
97
|
+
Thing.find_all(where: "owner = 1") # Will not use prepared statement
|
98
|
+
Thing.find_all(where: {:owner => 1}) # Will use pepared statement
|
99
|
+
Thing.find_all(where: ["owner = ?", 1]) # Will use pepared statement
|
100
|
+
Thing.find_all(where: Cassie.prepare("owner = 1")) # Will use pepared statement
|
101
|
+
```
|
102
|
+
|
103
|
+
The prepared statement cache is capped to 1000 statements by default. For best performance you should ensure that you aren't preparing statements with arbitrary value interpolated into the CQL; otherwise your prepared statement cache will turn over frequently and you'll lose the performance advantages it provides.
|
104
|
+
|
105
|
+
```ruby
|
106
|
+
Thing.find_all(where: ["owner = #{user.id} AND id > ?", id]) # This type of thing is very bad for the statement cache.
|
107
|
+
```
|
108
|
+
|
109
|
+
If necessary you can increase the prepared statement cache in the configuration with the max_prepared_statments option.
|
110
|
+
|
111
|
+
### Batches
|
112
|
+
|
113
|
+
You can send all insert, update, and delete statements as a batch to Cassandra by wrapping them with a `batch` block:
|
114
|
+
|
115
|
+
```ruby
|
116
|
+
Thing.batch do
|
117
|
+
Thing.delete_all(:owner => 1)
|
118
|
+
Thing.create(:owner => 1, :identifier => 2, :value => 'foo')
|
119
|
+
end
|
120
|
+
```
|
121
|
+
|
122
|
+
Cassandra can perform better when sending statements as a batch. Batching also ensures that all the statements are received by the cluster. They won't all be persisted as a unit like in a relational database transaction, but they will all be eventually persisted.
|
123
|
+
|
124
|
+
### Support for short column names
|
125
|
+
|
126
|
+
Because Cassandra stores the column name with each value, using descriptive column names is a bad idea if you have a lot of data and small column types (see https://issues.apache.org/jira/browse/CASSANDRA-4175). For instance, if you have an integer column to hold user ids the normal thing to do is name it "user_id". However, in Cassandra, this will result in each column using 7 bytes for the name and only 4 bytes for the value. If your table has billions of rows this can add up pretty quickly. As such, it's best to use very short column names. However, this can make your code pretty unreadable.
|
127
|
+
|
128
|
+
Cassie solves this problem by allowing you to provide aliases for columns when you define them on your models.
|
129
|
+
|
130
|
+
```ruby
|
131
|
+
class Data
|
132
|
+
include Cassie::Model
|
133
|
+
...
|
134
|
+
column :u, :int, as: :user_id
|
135
|
+
...
|
136
|
+
end
|
137
|
+
```
|
138
|
+
|
139
|
+
This will let you use the more description `user_id` instead of `u` almost everywhere within your Ruby code.
|
140
|
+
|
141
|
+
* You can initialize records like `Data.new(:user_id => id)`
|
142
|
+
* You can find records like `Date.find(:user_id => id)`
|
143
|
+
* You can access the value like `data.user_id`
|
144
|
+
* You can set the value like `data.user_id = id`
|
145
|
+
* etc.
|
146
|
+
|
147
|
+
The exceptions are:
|
148
|
+
|
149
|
+
* If you call `data.attributes` the keys in the returned hash will be :u instead of :user_id
|
150
|
+
* If you need to query with raw CQL you'll need to use the actual column name instead of the alias
|
151
|
+
|
152
|
+
### Schema Definitions
|
153
|
+
|
154
|
+
For development and testing environments you should create a directory that defines your Cassandra schema. The organization should be each keyspace should be defined in a file name "#{keyspace}.cql" where keyspace is the abstract name defined for the keyspace in the keyspaces configuration. Setting up schemas is required for using the testing integration (see below) and is very useful for keeping development environments in sync.
|
155
|
+
|
156
|
+
### Testing
|
157
|
+
|
158
|
+
Cassie has built in support for testing environments to efficiently cleanup data between test cases with the Cassie::Testing module.
|
159
|
+
|
160
|
+
To use it with rspec you should add this code to your spec_helper.rb file:
|
161
|
+
|
162
|
+
```ruby
|
163
|
+
config.before(:suite) do
|
164
|
+
Cassie::Schema.all do |keyspace|
|
165
|
+
Cassie::Schema.load!(keyspace)
|
166
|
+
end
|
167
|
+
Cassie::Testing.prepare!
|
168
|
+
end
|
169
|
+
|
170
|
+
config.after(:suite) do
|
171
|
+
Cassie::Schema.all do |keyspace|
|
172
|
+
Cassie::Schema.drop!(keyspace)
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
config.around(:each) do |example|
|
177
|
+
Cassie::Testing.cleanup! do
|
178
|
+
example.run
|
179
|
+
end
|
180
|
+
end
|
181
|
+
```
|
182
|
+
|
183
|
+
### Using with Rails
|
184
|
+
|
185
|
+
If you're using Rails, Cassie will automatically initialize itself with the configuration file found in config/cassie.yml. You can put ERB code into the configuration if desired.
|
186
|
+
|
187
|
+
In development and test environments it will look for the schema definitions in db/cassandra.
|
188
|
+
|
189
|
+
If you're using a forking web server (i.e. passenger or unicorn) you will need to handle disconnecting and reconnecting the Cassandra connection after forking. For passenger you should include a file in config/initializers/cassie.rb with:
|
190
|
+
|
191
|
+
```ruby
|
192
|
+
if defined?(PhusionPassenger)
|
193
|
+
PhusionPassenger.on_event(:starting_worker_process) do |forked|
|
194
|
+
if forked
|
195
|
+
# Disconnect if already connected so we don't share connections with other processes.
|
196
|
+
if Cassie.instance.connected?
|
197
|
+
Cassie.instance.disconnect
|
198
|
+
end
|
199
|
+
Cassie.instance.connect
|
200
|
+
end
|
201
|
+
end
|
202
|
+
end
|
203
|
+
```
|
204
|
+
|
205
|
+
You'll need something similar on other web servers. In any case, you'll want to make sure that you call Cassie.instance.connect in an initializer. It can take several seconds to establish the connection so you really want the connection to created before your server starts accepting traffic.
|
206
|
+
|
207
|
+
### Limitations
|
208
|
+
|
209
|
+
Ruby 2.0 (or compatible) required.
|
210
|
+
|
211
|
+
You can only use one Cassandra cluster with Cassie since it only maintains a single connection. You can, however, use multiple keyspaces within the cluster.
|
212
|
+
|
213
|
+
Query methods will not gracefully handle querying records by values other than primary keys. Even though Cassandra will let you do this by passing extra options, Cassie doesn't handle it since it just encourages bad practices. If you need to perform such queries you can always send raw CQL to Cassie#execute.
|
data/Rakefile
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
|
3
|
+
desc 'Default: run unit tests.'
|
4
|
+
task :default => :test
|
5
|
+
|
6
|
+
desc 'RVM likes to call it tests'
|
7
|
+
task :tests => :test
|
8
|
+
|
9
|
+
begin
|
10
|
+
require 'rspec'
|
11
|
+
require 'rspec/core/rake_task'
|
12
|
+
desc 'Run the unit tests'
|
13
|
+
RSpec::Core::RakeTask.new(:test)
|
14
|
+
rescue LoadError
|
15
|
+
task :test do
|
16
|
+
STDERR.puts "You must have rspec 2.0 installed to run the tests"
|
17
|
+
end
|
18
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
1.0.0
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# Simple configuration for connecting to Cassandra.
|
2
|
+
#
|
3
|
+
# :cluster should be a Hash of the options to initialize the Cassandra cluster.
|
4
|
+
# See http://datastax.github.io/ruby-driver/api/#cluster-class_method for details.
|
5
|
+
#
|
6
|
+
# :keyspaces are a map of abstract keyspace names to actual names. These can be used in lieu of hard
|
7
|
+
# coding keyspace names and can be especially useful if keyspaces differ between environments. The
|
8
|
+
# abstract names can then be used when defining the keyspace for a model.
|
9
|
+
#
|
10
|
+
# :default_keyspace is an optional keyspace name to use as the default. It can be either the actual
|
11
|
+
# name or an abstract name mapped to an actual name in the keyspaces map.
|
12
|
+
#
|
13
|
+
# :max_prepared_statements is the maximum number of prepared statements that will be kept cached on
|
14
|
+
# the client (default 1000).
|
15
|
+
#
|
16
|
+
# :schema_directory is an optional path to the location where you schema files are stored. This should
|
17
|
+
# only be set in development and test environments since schema statements can be destructive in
|
18
|
+
# production.
|
19
|
+
class Cassie::Config
|
20
|
+
attr_reader :cluster
|
21
|
+
attr_accessor :max_prepared_statements, :schema_directory, :default_keyspace
|
22
|
+
|
23
|
+
def initialize(options = {})
|
24
|
+
options = options.symbolize_keys
|
25
|
+
@cluster = (options[:cluster] || {}).symbolize_keys
|
26
|
+
@keyspaces = (options[:keyspaces] || {}).stringify_keys
|
27
|
+
@max_prepared_statements = (options[:max_prepared_statements] || 1000)
|
28
|
+
@schema_directory = options[:schema_directory]
|
29
|
+
@default_keyspace = options[:default_keyspace]
|
30
|
+
end
|
31
|
+
|
32
|
+
# Get the actual keyspace mapped to the abstract name.
|
33
|
+
def keyspace(name)
|
34
|
+
@keyspaces[name.to_s] || name.to_s
|
35
|
+
end
|
36
|
+
|
37
|
+
# Get the list of keyspaces defined for the cluster.
|
38
|
+
def keyspaces
|
39
|
+
@keyspaces.values
|
40
|
+
end
|
41
|
+
|
42
|
+
# Get the list of abstract keyspace names.
|
43
|
+
def keyspace_names
|
44
|
+
@keyspaces.keys
|
45
|
+
end
|
46
|
+
|
47
|
+
# Add a mapping of a name to a keyspace.
|
48
|
+
def add_keyspace(name, value)
|
49
|
+
@keyspaces[name.to_s] = value
|
50
|
+
end
|
51
|
+
|
52
|
+
# Return the cluster options without passwords or tokens. Used for logging.
|
53
|
+
def sanitized_cluster
|
54
|
+
options = cluster.dup
|
55
|
+
options[:password] = "SUPPRESSED" if options.include?(:password)
|
56
|
+
options[:passphrase] = "SUPPRESSED" if options.include?(:passphrase)
|
57
|
+
options[:credentials] = "SUPPRESSED" if options.include?(:credentials)
|
58
|
+
options[:auth_provider] = "SUPPRESSED" if options.include?(:auth_provider)
|
59
|
+
options
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|