cassandra-model 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,49 @@
1
+ module CassandraModel
2
+ class StringType
3
+ def self.load(v)
4
+ v && v.to_s
5
+ end
6
+ end
7
+
8
+ class IntegerType
9
+ def self.load(v)
10
+ v && v.to_i
11
+ end
12
+ end
13
+
14
+ class FloatType
15
+ def self.load(v)
16
+ v && v.to_f
17
+ end
18
+ end
19
+
20
+ class DatetimeType
21
+ def self.dump(v)
22
+ v && v.strftime('%FT%T%z')
23
+ end
24
+
25
+ def self.load(v)
26
+ v && ::DateTime.strptime(v, '%FT%T%z')
27
+ end
28
+ end
29
+
30
+ class JsonType
31
+ def self.dump(v)
32
+ v && ::JSON.dump(v)
33
+ end
34
+
35
+ def self.load(v)
36
+ v && ::JSON.load(v)
37
+ end
38
+ end
39
+
40
+ class BooleanType
41
+ def self.dump(v)
42
+ v == '1'
43
+ end
44
+
45
+ def self.load(v)
46
+ v ? '1' : '0'
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,45 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), 'test_helper'))
2
+
3
+ class CassandraModelCallbacksTest < Test::Unit::TestCase
4
+ context "CassandraModel::Base" do
5
+ setup do
6
+ @klass = Class.new(CassandraModel::Base) do
7
+ key :name
8
+ column :age, :integer
9
+ column :dob, :datetime
10
+ column :note, :json
11
+
12
+ validate do
13
+ self.errors << "dob required" if dob.nil?
14
+ end
15
+ end
16
+
17
+ @klass.establish_connection 'cassandra-model'
18
+ end
19
+
20
+ should "connect to cassandra" do
21
+ assert_kind_of Cassandra, @klass.connection
22
+ end
23
+
24
+ should "store all defined columns" do
25
+ assert_equal({:age => :integer ,
26
+ :dob => :datetime,
27
+ :note => :json} , @klass.columns)
28
+ end
29
+
30
+ should "validate model by provided block" do
31
+ assert_kind_of Proc, @klass.validation
32
+
33
+ model = @klass.new()
34
+ assert !model.valid?
35
+
36
+ model = @klass.new(:name => "tl")
37
+ assert !model.valid?
38
+
39
+ model = @klass.new(:name => "tl", :dob => DateTime.now)
40
+ assert model.valid?
41
+ assert_equal "tl", model.key
42
+ assert_kind_of DateTime, model.dob
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,43 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), 'test_helper'))
2
+
3
+ class CassandraModelCallbacksTest < Test::Unit::TestCase
4
+ context "CassandraModel::Callbacks" do
5
+ setup do
6
+ @base = Class.new(Object) do
7
+ include CassandraModel::Callbacks
8
+ define_callbacks :foo
9
+ end
10
+
11
+ @klass = Class.new(@base) do
12
+ def bar; @n = [:bar]; end
13
+
14
+ def foo
15
+ run_callbacks(:foo) { @n << :foo }
16
+ end
17
+
18
+ def baz(v)
19
+ @n << :baz if v == [:bar, :foo]
20
+ end
21
+
22
+ def quux; @n << :quux; end
23
+ end
24
+ end
25
+
26
+ should "provide before and after callbacks for foo function" do
27
+ assert @klass.respond_to?(:define_callbacks)
28
+ assert @klass.respond_to?(:callbacks)
29
+ assert @klass.respond_to?(:before_foo)
30
+ assert @klass.respond_to?(:after_foo)
31
+ assert_equal Hash.new, @klass.callbacks
32
+ end
33
+
34
+ should "invoke callback functions when foo executed" do
35
+ @klass.send(:before_foo, :bar)
36
+ @klass.send(:after_foo, :baz, :quux)
37
+ assert_equal 2, @klass.callbacks.length
38
+ assert_equal [:bar], @klass.callbacks[:before_foo]
39
+ assert_equal [:baz, :quux], @klass.callbacks[:after_foo]
40
+ assert_equal [:bar, :foo, :baz, :quux], @klass.new.foo
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,81 @@
1
+
2
+ require File.expand_path(File.join(File.dirname(__FILE__), 'test_helper'))
3
+
4
+ class User < CassandraModel::Base
5
+ column_family :Users
6
+
7
+ key :username
8
+ column :full_name
9
+ column :created_at, :datetime
10
+
11
+ write_consistency_level Cassandra::Consistency::ALL
12
+
13
+ before_save :set_default_time
14
+
15
+ validate do
16
+ errors << "full name required" if full_name.nil? || full_name.empty?
17
+ end
18
+
19
+ private
20
+
21
+ def set_default_time
22
+ self.created_at = Time.now
23
+ end
24
+ end
25
+
26
+ class CassandraModelTest < Test::Unit::TestCase
27
+ context "CassandraModel" do
28
+ setup do
29
+ @connection = CassandraModel::Base.establish_connection("CassandraModel")
30
+ @connection.clear_keyspace!
31
+
32
+ @user = User.create(:username => "tl", :full_name => "tien le")
33
+ end
34
+
35
+ should "be able to connect to Cassandra" do
36
+ assert_kind_of Cassandra, @connection
37
+ assert_equal "CassandraModel", @connection.keyspace
38
+ end
39
+
40
+ should "not create a new user when validation fails" do
41
+ user = User.create(:username => "tl")
42
+ assert !user.valid?
43
+ assert user.new_record?
44
+
45
+ user = User.new(:username => "tl").save
46
+ assert user.new_record?
47
+ assert_equal "full name required", user.errors.first
48
+
49
+ user = User.new(:full_name => "tl").save
50
+ assert_equal "key required", user.errors.first
51
+ end
52
+
53
+ should "create a new user when validation passed" do
54
+ assert !@user.new_record?
55
+ assert @user.eql?(User.get("tl"))
56
+ assert_equal @user, User.get("tl")
57
+ assert_equal "tien le", User.get("tl").full_name
58
+
59
+ user = User.new(:username => "abc", :full_name => "Foo")
60
+ user.save
61
+ assert_equal ["created_at", "full_name"], @connection.get(:Users, "abc").keys
62
+ end
63
+
64
+ should "destroy a record" do
65
+ @user.destroy
66
+ assert User.get("tl").nil?
67
+ assert_raise(CassandraModel::RecordNotFound) { User["tl"] }
68
+ end
69
+
70
+ should "return true if record exists and otherwise" do
71
+ assert User.exists?("tl")
72
+ assert !User.exists?("foo")
73
+ end
74
+
75
+ should "only take defined attributes" do
76
+ user = User.new(:username => "abc", :full_name => "Foo", :hachiko => 'dog')
77
+ user.save
78
+ assert_equal ["created_at", "full_name"], @connection.get(:Users, "abc").keys
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,47 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ # The directory where Cassandra's configs live (required)
18
+ CASSANDRA_CONF=$CASSANDRA_CONF
19
+
20
+ # This can be the path to a jar file, or a directory containing the
21
+ # compiled classes. NOTE: This isn't needed by the startup script,
22
+ # it's just used here in constructing the classpath.
23
+ cassandra_bin=$CASSANDRA_HOME/build/classes
24
+
25
+ # The java classpath (required)
26
+ CLASSPATH=$CASSANDRA_CONF:$CASSANDRA_BIN
27
+
28
+ for jar in $CASSANDRA_HOME/lib/*.jar $CASSANDRA_HOME/build/lib/jars/*.jar; do
29
+ CLASSPATH=$CLASSPATH:$jar
30
+ done
31
+
32
+ # Arguments to pass to the JVM
33
+ JVM_OPTS=" \
34
+ -ea \
35
+ -Xms128M \
36
+ -Xmx1G \
37
+ -XX:TargetSurvivorRatio=90 \
38
+ -XX:+AggressiveOpts \
39
+ -XX:+UseParNewGC \
40
+ -XX:+UseConcMarkSweepGC \
41
+ -XX:+CMSParallelRemarkEnabled \
42
+ -XX:+HeapDumpOnOutOfMemoryError \
43
+ -XX:SurvivorRatio=128 \
44
+ -XX:MaxTenuringThreshold=0 \
45
+ -Dcom.sun.management.jmxremote.port=8080 \
46
+ -Dcom.sun.management.jmxremote.ssl=false \
47
+ -Dcom.sun.management.jmxremote.authenticate=false"
@@ -0,0 +1,27 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ # for production, you should probably set the root to INFO
18
+ # and the pattern to %c instead of %l. (%l is slower.)
19
+
20
+ # output messages into a rolling log file as well as stdout
21
+ log4j.rootLogger=WARN,stderr
22
+
23
+ # stderr
24
+ log4j.appender.stderr=org.apache.log4j.ConsoleAppender
25
+ log4j.appender.stderr.target=System.err
26
+ log4j.appender.stderr.layout=org.apache.log4j.PatternLayout
27
+ log4j.appender.stderr.layout.ConversionPattern=%5p %d{HH:mm:ss,SSS} %m%n
@@ -0,0 +1,40 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ # for production, you should probably set the root to INFO
18
+ # and the pattern to %c instead of %l. (%l is slower.)
19
+
20
+ # output messages into a rolling log file as well as stdout
21
+ log4j.rootLogger=INFO,stdout,R
22
+
23
+ # stdout
24
+ log4j.appender.stdout=org.apache.log4j.ConsoleAppender
25
+ log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
26
+ log4j.appender.stdout.layout.ConversionPattern=%5p %d{HH:mm:ss,SSS} %m%n
27
+
28
+ # rolling log file
29
+ log4j.appender.R=org.apache.log4j.RollingFileAppender
30
+ log4j.appender.file.maxFileSize=20MB
31
+ log4j.appender.file.maxBackupIndex=50
32
+ log4j.appender.R.layout=org.apache.log4j.PatternLayout
33
+ log4j.appender.R.layout.ConversionPattern=%5p [%t] %d{ISO8601} %F (line %L) %m%n
34
+ # Edit the next line to point to your logs directory
35
+ log4j.appender.R.File=data/logs/system.log
36
+
37
+ # Application logging options
38
+ #log4j.logger.com.facebook=DEBUG
39
+ #log4j.logger.com.facebook.infrastructure.gms=DEBUG
40
+ #log4j.logger.com.facebook.infrastructure.db=DEBUG
@@ -0,0 +1,368 @@
1
+ <!--
2
+ ~ Licensed to the Apache Software Foundation (ASF) under one
3
+ ~ or more contributor license agreements. See the NOTICE file
4
+ ~ distributed with this work for additional information
5
+ ~ regarding copyright ownership. The ASF licenses this file
6
+ ~ to you under the Apache License, Version 2.0 (the
7
+ ~ "License"); you may not use this file except in compliance
8
+ ~ with the License. You may obtain a copy of the License at
9
+ ~
10
+ ~ http://www.apache.org/licenses/LICENSE-2.0
11
+ ~
12
+ ~ Unless required by applicable law or agreed to in writing,
13
+ ~ software distributed under the License is distributed on an
14
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ ~ KIND, either express or implied. See the License for the
16
+ ~ specific language governing permissions and limitations
17
+ ~ under the License.
18
+ -->
19
+ <Storage>
20
+ <!--======================================================================-->
21
+ <!-- Basic Configurations -->
22
+ <!--======================================================================-->
23
+
24
+ <!--
25
+ ~ The name of this cluster. This is mainly used to prevent machines in
26
+ ~ one logical cluster from joining another.
27
+ -->
28
+ <ClusterName>Test Cluster</ClusterName>
29
+
30
+ <!--
31
+ ~ Turn on to make new [non-seed] nodes automatically migrate the right data
32
+ ~ to themselves. (If no InitialToken is specified, they will pick one
33
+ ~ such that they will get half the range of the most-loaded node.)
34
+ ~ If a node starts up without bootstrapping, it will mark itself bootstrapped
35
+ ~ so that you can't subsequently accidently bootstrap a node with
36
+ ~ data on it. (You can reset this by wiping your data and commitlog
37
+ ~ directories.)
38
+ ~
39
+ ~ Off by default so that new clusters and upgraders from 0.4 don't
40
+ ~ bootstrap immediately. You should turn this on when you start adding
41
+ ~ new nodes to a cluster that already has data on it. (If you are upgrading
42
+ ~ from 0.4, start your cluster with it off once before changing it to true.
43
+ ~ Otherwise, no data will be lost but you will incur a lot of unnecessary
44
+ ~ I/O before your cluster starts up.)
45
+ -->
46
+ <AutoBootstrap>false</AutoBootstrap>
47
+
48
+ <!--
49
+ ~ Keyspaces and ColumnFamilies:
50
+ ~ A ColumnFamily is the Cassandra concept closest to a relational
51
+ ~ table. Keyspaces are separate groups of ColumnFamilies. Except in
52
+ ~ very unusual circumstances you will have one Keyspace per application.
53
+
54
+ ~ There is an implicit keyspace named 'system' for Cassandra internals.
55
+ -->
56
+ <Keyspaces>
57
+ <Keyspace Name="CassandraModel">
58
+ <!--
59
+ ~ ColumnFamily definitions have one required attribute (Name)
60
+ ~ and several optional ones.
61
+ ~
62
+ ~ The CompareWith attribute tells Cassandra how to sort the columns
63
+ ~ for slicing operations. The default is BytesType, which is a
64
+ ~ straightforward lexical comparison of the bytes in each column.
65
+ ~ Other options are AsciiType, UTF8Type, LexicalUUIDType, TimeUUIDType,
66
+ ~ and LongType. You can also specify the fully-qualified class
67
+ ~ name to a class of your choice extending
68
+ ~ org.apache.cassandra.db.marshal.AbstractType.
69
+ ~
70
+ ~ SuperColumns have a similar CompareSubcolumnsWith attribute.
71
+ ~
72
+ ~ BytesType: Simple sort by byte value. No validation is performed.
73
+ ~ AsciiType: Like BytesType, but validates that the input can be
74
+ ~ parsed as US-ASCII.
75
+ ~ UTF8Type: A string encoded as UTF8
76
+ ~ LongType: A 64bit long
77
+ ~ LexicalUUIDType: A 128bit UUID, compared lexically (by byte value)
78
+ ~ TimeUUIDType: a 128bit version 1 UUID, compared by timestamp
79
+ ~
80
+ ~ (To get the closest approximation to 0.3-style supercolumns, you
81
+ ~ would use CompareWith=UTF8Type CompareSubcolumnsWith=LongType.)
82
+ ~
83
+ ~ An optional `Comment` attribute may be used to attach additional
84
+ ~ human-readable information about the column family to its definition.
85
+ ~
86
+ ~ The optional KeysCached attribute specifies
87
+ ~ the number of keys per sstable whose locations we keep in
88
+ ~ memory in "mostly LRU" order. (JUST the key locations, NOT any
89
+ ~ column values.) Specify a fraction (value less than 1), a percentage
90
+ ~ (ending in a % sign) or an absolute number of keys to cache.
91
+ ~ KeysCached defaults to 200000 keys.
92
+ ~
93
+ ~ The optional RowsCached attribute specifies the number of rows
94
+ ~ whose entire contents we cache in memory. Do not use this on
95
+ ~ ColumnFamilies with large rows, or ColumnFamilies with high write:read
96
+ ~ ratios. Specify a fraction (value less than 1), a percentage (ending in
97
+ ~ a % sign) or an absolute number of rows to cache.
98
+ ~ RowsCached defaults to 0, i.e., row cache is off by default.
99
+ ~
100
+ ~ Remember, when using caches as a percentage, they WILL grow with
101
+ ~ your data set!
102
+ -->
103
+ <ColumnFamily Name="Users" CompareWith="BytesType" KeysCached="100000" RowsCached="1000"/>
104
+ <ColumnFamily Name="Posts" CompareWith="BytesType" />
105
+ <ColumnFamily Name="Comments" CompareWith="TimeUUIDType" CompareSubcolumnsWith="BytesType" ColumnType="Super" />
106
+
107
+ <!--
108
+ ~ Strategy: Setting this to the class that implements
109
+ ~ IReplicaPlacementStrategy will change the way the node picker works.
110
+ ~ Out of the box, Cassandra provides
111
+ ~ org.apache.cassandra.locator.RackUnawareStrategy and
112
+ ~ org.apache.cassandra.locator.RackAwareStrategy (place one replica in
113
+ ~ a different datacenter, and the others on different racks in the same
114
+ ~ one.)
115
+ -->
116
+ <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
117
+
118
+ <!-- Number of replicas of the data -->
119
+ <ReplicationFactor>1</ReplicationFactor>
120
+
121
+ <!--
122
+ ~ EndPointSnitch: Setting this to the class that implements
123
+ ~ AbstractEndpointSnitch, which lets Cassandra know enough
124
+ ~ about your network topology to route requests efficiently.
125
+ ~ Out of the box, Cassandra provides org.apache.cassandra.locator.EndPointSnitch,
126
+ ~ and PropertyFileEndPointSnitch is available in contrib/.
127
+ -->
128
+ <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
129
+
130
+ </Keyspace>
131
+ </Keyspaces>
132
+
133
+ <!--
134
+ ~ Authenticator: any IAuthenticator may be used, including your own as long
135
+ ~ as it is on the classpath. Out of the box, Cassandra provides
136
+ ~ org.apache.cassandra.auth.AllowAllAuthenticator and,
137
+ ~ org.apache.cassandra.auth.SimpleAuthenticator
138
+ ~ (SimpleAuthenticator uses access.properties and passwd.properties by
139
+ ~ default).
140
+ ~
141
+ ~ If you don't specify an authenticator, AllowAllAuthenticator is used.
142
+ -->
143
+ <Authenticator>org.apache.cassandra.auth.AllowAllAuthenticator</Authenticator>
144
+
145
+ <!--
146
+ ~ Partitioner: any IPartitioner may be used, including your own as long
147
+ ~ as it is on the classpath. Out of the box, Cassandra provides
148
+ ~ org.apache.cassandra.dht.RandomPartitioner,
149
+ ~ org.apache.cassandra.dht.OrderPreservingPartitioner, and
150
+ ~ org.apache.cassandra.dht.CollatingOrderPreservingPartitioner.
151
+ ~ (CollatingOPP colates according to EN,US rules, not naive byte
152
+ ~ ordering. Use this as an example if you need locale-aware collation.)
153
+ ~ Range queries require using an order-preserving partitioner.
154
+ ~
155
+ ~ Achtung! Changing this parameter requires wiping your data
156
+ ~ directories, since the partitioner can modify the sstable on-disk
157
+ ~ format.
158
+ -->
159
+ <Partitioner>org.apache.cassandra.dht.RandomPartitioner</Partitioner>
160
+
161
+ <!--
162
+ ~ If you are using an order-preserving partitioner and you know your key
163
+ ~ distribution, you can specify the token for this node to use. (Keys
164
+ ~ are sent to the node with the "closest" token, so distributing your
165
+ ~ tokens equally along the key distribution space will spread keys
166
+ ~ evenly across your cluster.) This setting is only checked the first
167
+ ~ time a node is started.
168
+
169
+ ~ This can also be useful with RandomPartitioner to force equal spacing
170
+ ~ of tokens around the hash space, especially for clusters with a small
171
+ ~ number of nodes.
172
+ -->
173
+ <InitialToken></InitialToken>
174
+
175
+ <!--
176
+ ~ Directories: Specify where Cassandra should store different data on
177
+ ~ disk. Keep the data disks and the CommitLog disks separate for best
178
+ ~ performance
179
+ -->
180
+ <CommitLogDirectory>data/cassandra/commitlog</CommitLogDirectory>
181
+ <DataFileDirectories>
182
+ <DataFileDirectory>data/cassandra/data</DataFileDirectory>
183
+ </DataFileDirectories>
184
+
185
+
186
+ <!--
187
+ ~ Addresses of hosts that are deemed contact points. Cassandra nodes
188
+ ~ use this list of hosts to find each other and learn the topology of
189
+ ~ the ring. You must change this if you are running multiple nodes!
190
+ -->
191
+ <Seeds>
192
+ <Seed>127.0.0.1</Seed>
193
+ </Seeds>
194
+
195
+
196
+ <!-- Miscellaneous -->
197
+
198
+ <!-- Time to wait for a reply from other nodes before failing the command -->
199
+ <RpcTimeoutInMillis>10000</RpcTimeoutInMillis>
200
+ <!-- Size to allow commitlog to grow to before creating a new segment -->
201
+ <CommitLogRotationThresholdInMB>128</CommitLogRotationThresholdInMB>
202
+
203
+
204
+ <!-- Local hosts and ports -->
205
+
206
+ <!--
207
+ ~ Address to bind to and tell other nodes to connect to. You _must_
208
+ ~ change this if you want multiple nodes to be able to communicate!
209
+ ~
210
+ ~ Leaving it blank leaves it up to InetAddress.getLocalHost(). This
211
+ ~ will always do the Right Thing *if* the node is properly configured
212
+ ~ (hostname, name resolution, etc), and the Right Thing is to use the
213
+ ~ address associated with the hostname (it might not be).
214
+ -->
215
+ <ListenAddress>localhost</ListenAddress>
216
+ <!-- internal communications port -->
217
+ <StoragePort>7000</StoragePort>
218
+
219
+ <!--
220
+ ~ The address to bind the Thrift RPC service to. Unlike ListenAddress
221
+ ~ above, you *can* specify 0.0.0.0 here if you want Thrift to listen on
222
+ ~ all interfaces.
223
+ ~
224
+ ~ Leaving this blank has the same effect it does for ListenAddress,
225
+ ~ (i.e. it will be based on the configured hostname of the node).
226
+ -->
227
+ <ThriftAddress>localhost</ThriftAddress>
228
+ <!-- Thrift RPC port (the port clients connect to). -->
229
+ <ThriftPort>9160</ThriftPort>
230
+ <!--
231
+ ~ Whether or not to use a framed transport for Thrift. If this option
232
+ ~ is set to true then you must also use a framed transport on the
233
+ ~ client-side, (framed and non-framed transports are not compatible).
234
+ -->
235
+ <ThriftFramedTransport>false</ThriftFramedTransport>
236
+
237
+
238
+ <!--======================================================================-->
239
+ <!-- Memory, Disk, and Performance -->
240
+ <!--======================================================================-->
241
+
242
+ <!--
243
+ ~ Access mode. mmapped i/o is substantially faster, but only practical on
244
+ ~ a 64bit machine (which notably does not include EC2 "small" instances)
245
+ ~ or relatively small datasets. "auto", the safe choice, will enable
246
+ ~ mmapping on a 64bit JVM. Other values are "mmap", "mmap_index_only"
247
+ ~ (which may allow you to get part of the benefits of mmap on a 32bit
248
+ ~ machine by mmapping only index files) and "standard".
249
+ ~ (The buffer size settings that follow only apply to standard,
250
+ ~ non-mmapped i/o.)
251
+ -->
252
+ <DiskAccessMode>auto</DiskAccessMode>
253
+
254
+ <!--
255
+ ~ Size of compacted row above which to log a warning. (If compacted
256
+ ~ rows do not fit in memory, Cassandra will crash. This is explained
257
+ ~ in http://wiki.apache.org/cassandra/CassandraLimitations and is
258
+ ~ scheduled to be fixed in 0.7.)
259
+ -->
260
+ <RowWarningThresholdInMB>512</RowWarningThresholdInMB>
261
+
262
+ <!--
263
+ ~ Buffer size to use when performing contiguous column slices. Increase
264
+ ~ this to the size of the column slices you typically perform.
265
+ ~ (Name-based queries are performed with a buffer size of
266
+ ~ ColumnIndexSizeInKB.)
267
+ -->
268
+ <SlicedBufferSizeInKB>64</SlicedBufferSizeInKB>
269
+
270
+ <!--
271
+ ~ Buffer size to use when flushing memtables to disk. (Only one
272
+ ~ memtable is ever flushed at a time.) Increase (decrease) the index
273
+ ~ buffer size relative to the data buffer if you have few (many)
274
+ ~ columns per key. Bigger is only better _if_ your memtables get large
275
+ ~ enough to use the space. (Check in your data directory after your
276
+ ~ app has been running long enough.) -->
277
+ <FlushDataBufferSizeInMB>32</FlushDataBufferSizeInMB>
278
+ <FlushIndexBufferSizeInMB>8</FlushIndexBufferSizeInMB>
279
+
280
+ <!--
281
+ ~ Add column indexes to a row after its contents reach this size.
282
+ ~ Increase if your column values are large, or if you have a very large
283
+ ~ number of columns. The competing causes are, Cassandra has to
284
+ ~ deserialize this much of the row to read a single column, so you want
285
+ ~ it to be small - at least if you do many partial-row reads - but all
286
+ ~ the index data is read for each access, so you don't want to generate
287
+ ~ that wastefully either.
288
+ -->
289
+ <ColumnIndexSizeInKB>64</ColumnIndexSizeInKB>
290
+
291
+ <!--
292
+ ~ Flush memtable after this much data has been inserted, including
293
+ ~ overwritten data. There is one memtable per column family, and
294
+ ~ this threshold is based solely on the amount of data stored, not
295
+ ~ actual heap memory usage (there is some overhead in indexing the
296
+ ~ columns).
297
+ -->
298
+ <MemtableThroughputInMB>64</MemtableThroughputInMB>
299
+ <!--
300
+ ~ Throughput setting for Binary Memtables. Typically these are
301
+ ~ used for bulk load so you want them to be larger.
302
+ -->
303
+ <BinaryMemtableThroughputInMB>256</BinaryMemtableThroughputInMB>
304
+ <!--
305
+ ~ The maximum number of columns in millions to store in memory per
306
+ ~ ColumnFamily before flushing to disk. This is also a per-memtable
307
+ ~ setting. Use with MemtableThroughputInMB to tune memory usage.
308
+ -->
309
+ <MemtableOperationsInMillions>0.3</MemtableOperationsInMillions>
310
+ <!--
311
+ ~ The maximum time to leave a dirty memtable unflushed.
312
+ ~ (While any affected columnfamilies have unflushed data from a
313
+ ~ commit log segment, that segment cannot be deleted.)
314
+ ~ This needs to be large enough that it won't cause a flush storm
315
+ ~ of all your memtables flushing at once because none has hit
316
+ ~ the size or count thresholds yet. For production, a larger
317
+ ~ value such as 1440 is recommended.
318
+ -->
319
+ <MemtableFlushAfterMinutes>60</MemtableFlushAfterMinutes>
320
+
321
+ <!--
322
+ ~ Unlike most systems, in Cassandra writes are faster than reads, so
323
+ ~ you can afford more of those in parallel. A good rule of thumb is 2
324
+ ~ concurrent reads per processor core. Increase ConcurrentWrites to
325
+ ~ the number of clients writing at once if you enable CommitLogSync +
326
+ ~ CommitLogSyncDelay. -->
327
+ <ConcurrentReads>8</ConcurrentReads>
328
+ <ConcurrentWrites>32</ConcurrentWrites>
329
+
330
+ <!--
331
+ ~ CommitLogSync may be either "periodic" or "batch." When in batch
332
+ ~ mode, Cassandra won't ack writes until the commit log has been
333
+ ~ fsynced to disk. It will wait up to CommitLogSyncBatchWindowInMS
334
+ ~ milliseconds for other writes, before performing the sync.
335
+
336
+ ~ This is less necessary in Cassandra than in traditional databases
337
+ ~ since replication reduces the odds of losing data from a failure
338
+ ~ after writing the log entry but before it actually reaches the disk.
339
+ ~ So the other option is "periodic," where writes may be acked immediately
340
+ ~ and the CommitLog is simply synced every CommitLogSyncPeriodInMS
341
+ ~ milliseconds.
342
+ -->
343
+ <CommitLogSync>periodic</CommitLogSync>
344
+ <!--
345
+ ~ Interval at which to perform syncs of the CommitLog in periodic mode.
346
+ ~ Usually the default of 10000ms is fine; increase it if your i/o
347
+ ~ load is such that syncs are taking excessively long times.
348
+ -->
349
+ <CommitLogSyncPeriodInMS>10000</CommitLogSyncPeriodInMS>
350
+ <!--
351
+ ~ Delay (in milliseconds) during which additional commit log entries
352
+ ~ may be written before fsync in batch mode. This will increase
353
+ ~ latency slightly, but can vastly improve throughput where there are
354
+ ~ many writers. Set to zero to disable (each entry will be synced
355
+ ~ individually). Reasonable values range from a minimal 0.1 to 10 or
356
+ ~ even more if throughput matters more than latency.
357
+ -->
358
+ <!-- <CommitLogSyncBatchWindowInMS>1</CommitLogSyncBatchWindowInMS> -->
359
+
360
+ <!--
361
+ ~ Time to wait before garbage-collection deletion markers. Set this to
362
+ ~ a large enough value that you are confident that the deletion marker
363
+ ~ will be propagated to all replicas by the time this many seconds has
364
+ ~ elapsed, even in the face of hardware failures. The default value is
365
+ ~ ten days.
366
+ -->
367
+ <GCGraceSeconds>864000</GCGraceSeconds>
368
+ </Storage>