cassandra-model 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,49 @@
1
+ module CassandraModel
2
+ class StringType
3
+ def self.load(v)
4
+ v && v.to_s
5
+ end
6
+ end
7
+
8
+ class IntegerType
9
+ def self.load(v)
10
+ v && v.to_i
11
+ end
12
+ end
13
+
14
+ class FloatType
15
+ def self.load(v)
16
+ v && v.to_f
17
+ end
18
+ end
19
+
20
+ class DatetimeType
21
+ def self.dump(v)
22
+ v && v.strftime('%FT%T%z')
23
+ end
24
+
25
+ def self.load(v)
26
+ v && ::DateTime.strptime(v, '%FT%T%z')
27
+ end
28
+ end
29
+
30
+ class JsonType
31
+ def self.dump(v)
32
+ v && ::JSON.dump(v)
33
+ end
34
+
35
+ def self.load(v)
36
+ v && ::JSON.load(v)
37
+ end
38
+ end
39
+
40
+ class BooleanType
41
+ def self.dump(v)
42
+ v == '1'
43
+ end
44
+
45
+ def self.load(v)
46
+ v ? '1' : '0'
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,45 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), 'test_helper'))
2
+
3
+ class CassandraModelCallbacksTest < Test::Unit::TestCase
4
+ context "CassandraModel::Base" do
5
+ setup do
6
+ @klass = Class.new(CassandraModel::Base) do
7
+ key :name
8
+ column :age, :integer
9
+ column :dob, :datetime
10
+ column :note, :json
11
+
12
+ validate do
13
+ self.errors << "dob required" if dob.nil?
14
+ end
15
+ end
16
+
17
+ @klass.establish_connection 'cassandra-model'
18
+ end
19
+
20
+ should "connect to cassandra" do
21
+ assert_kind_of Cassandra, @klass.connection
22
+ end
23
+
24
+ should "store all defined columns" do
25
+ assert_equal({:age => :integer ,
26
+ :dob => :datetime,
27
+ :note => :json} , @klass.columns)
28
+ end
29
+
30
+ should "validate model by provided block" do
31
+ assert_kind_of Proc, @klass.validation
32
+
33
+ model = @klass.new()
34
+ assert !model.valid?
35
+
36
+ model = @klass.new(:name => "tl")
37
+ assert !model.valid?
38
+
39
+ model = @klass.new(:name => "tl", :dob => DateTime.now)
40
+ assert model.valid?
41
+ assert_equal "tl", model.key
42
+ assert_kind_of DateTime, model.dob
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,43 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), 'test_helper'))
2
+
3
+ class CassandraModelCallbacksTest < Test::Unit::TestCase
4
+ context "CassandraModel::Callbacks" do
5
+ setup do
6
+ @base = Class.new(Object) do
7
+ include CassandraModel::Callbacks
8
+ define_callbacks :foo
9
+ end
10
+
11
+ @klass = Class.new(@base) do
12
+ def bar; @n = [:bar]; end
13
+
14
+ def foo
15
+ run_callbacks(:foo) { @n << :foo }
16
+ end
17
+
18
+ def baz(v)
19
+ @n << :baz if v == [:bar, :foo]
20
+ end
21
+
22
+ def quux; @n << :quux; end
23
+ end
24
+ end
25
+
26
+ should "provide before and after callbacks for foo function" do
27
+ assert @klass.respond_to?(:define_callbacks)
28
+ assert @klass.respond_to?(:callbacks)
29
+ assert @klass.respond_to?(:before_foo)
30
+ assert @klass.respond_to?(:after_foo)
31
+ assert_equal Hash.new, @klass.callbacks
32
+ end
33
+
34
+ should "invoke callback functions when foo executed" do
35
+ @klass.send(:before_foo, :bar)
36
+ @klass.send(:after_foo, :baz, :quux)
37
+ assert_equal 2, @klass.callbacks.length
38
+ assert_equal [:bar], @klass.callbacks[:before_foo]
39
+ assert_equal [:baz, :quux], @klass.callbacks[:after_foo]
40
+ assert_equal [:bar, :foo, :baz, :quux], @klass.new.foo
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,81 @@
1
+
2
+ require File.expand_path(File.join(File.dirname(__FILE__), 'test_helper'))
3
+
4
+ class User < CassandraModel::Base
5
+ column_family :Users
6
+
7
+ key :username
8
+ column :full_name
9
+ column :created_at, :datetime
10
+
11
+ write_consistency_level Cassandra::Consistency::ALL
12
+
13
+ before_save :set_default_time
14
+
15
+ validate do
16
+ errors << "full name required" if full_name.nil? || full_name.empty?
17
+ end
18
+
19
+ private
20
+
21
+ def set_default_time
22
+ self.created_at = Time.now
23
+ end
24
+ end
25
+
26
+ class CassandraModelTest < Test::Unit::TestCase
27
+ context "CassandraModel" do
28
+ setup do
29
+ @connection = CassandraModel::Base.establish_connection("CassandraModel")
30
+ @connection.clear_keyspace!
31
+
32
+ @user = User.create(:username => "tl", :full_name => "tien le")
33
+ end
34
+
35
+ should "be able to connect to Cassandra" do
36
+ assert_kind_of Cassandra, @connection
37
+ assert_equal "CassandraModel", @connection.keyspace
38
+ end
39
+
40
+ should "not create a new user when validation fails" do
41
+ user = User.create(:username => "tl")
42
+ assert !user.valid?
43
+ assert user.new_record?
44
+
45
+ user = User.new(:username => "tl").save
46
+ assert user.new_record?
47
+ assert_equal "full name required", user.errors.first
48
+
49
+ user = User.new(:full_name => "tl").save
50
+ assert_equal "key required", user.errors.first
51
+ end
52
+
53
+ should "create a new user when validation passed" do
54
+ assert !@user.new_record?
55
+ assert @user.eql?(User.get("tl"))
56
+ assert_equal @user, User.get("tl")
57
+ assert_equal "tien le", User.get("tl").full_name
58
+
59
+ user = User.new(:username => "abc", :full_name => "Foo")
60
+ user.save
61
+ assert_equal ["created_at", "full_name"], @connection.get(:Users, "abc").keys
62
+ end
63
+
64
+ should "destroy a record" do
65
+ @user.destroy
66
+ assert User.get("tl").nil?
67
+ assert_raise(CassandraModel::RecordNotFound) { User["tl"] }
68
+ end
69
+
70
+ should "return true if record exists and otherwise" do
71
+ assert User.exists?("tl")
72
+ assert !User.exists?("foo")
73
+ end
74
+
75
+ should "only take defined attributes" do
76
+ user = User.new(:username => "abc", :full_name => "Foo", :hachiko => 'dog')
77
+ user.save
78
+ assert_equal ["created_at", "full_name"], @connection.get(:Users, "abc").keys
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,47 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ # The directory where Cassandra's configs live (required)
18
+ CASSANDRA_CONF=$CASSANDRA_CONF
19
+
20
+ # This can be the path to a jar file, or a directory containing the
21
+ # compiled classes. NOTE: This isn't needed by the startup script,
22
+ # it's just used here in constructing the classpath.
23
+ cassandra_bin=$CASSANDRA_HOME/build/classes
24
+
25
+ # The java classpath (required)
26
+ CLASSPATH=$CASSANDRA_CONF:$CASSANDRA_BIN
27
+
28
+ for jar in $CASSANDRA_HOME/lib/*.jar $CASSANDRA_HOME/build/lib/jars/*.jar; do
29
+ CLASSPATH=$CLASSPATH:$jar
30
+ done
31
+
32
+ # Arguments to pass to the JVM
33
+ JVM_OPTS=" \
34
+ -ea \
35
+ -Xms128M \
36
+ -Xmx1G \
37
+ -XX:TargetSurvivorRatio=90 \
38
+ -XX:+AggressiveOpts \
39
+ -XX:+UseParNewGC \
40
+ -XX:+UseConcMarkSweepGC \
41
+ -XX:+CMSParallelRemarkEnabled \
42
+ -XX:+HeapDumpOnOutOfMemoryError \
43
+ -XX:SurvivorRatio=128 \
44
+ -XX:MaxTenuringThreshold=0 \
45
+ -Dcom.sun.management.jmxremote.port=8080 \
46
+ -Dcom.sun.management.jmxremote.ssl=false \
47
+ -Dcom.sun.management.jmxremote.authenticate=false"
@@ -0,0 +1,27 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ # for production, you should probably set the root to INFO
18
+ # and the pattern to %c instead of %l. (%l is slower.)
19
+
20
+ # output messages into a rolling log file as well as stdout
21
+ log4j.rootLogger=WARN,stderr
22
+
23
+ # stderr
24
+ log4j.appender.stderr=org.apache.log4j.ConsoleAppender
25
+ log4j.appender.stderr.target=System.err
26
+ log4j.appender.stderr.layout=org.apache.log4j.PatternLayout
27
+ log4j.appender.stderr.layout.ConversionPattern=%5p %d{HH:mm:ss,SSS} %m%n
@@ -0,0 +1,40 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ # for production, you should probably set the root to INFO
18
+ # and the pattern to %c instead of %l. (%l is slower.)
19
+
20
+ # output messages into a rolling log file as well as stdout
21
+ log4j.rootLogger=INFO,stdout,R
22
+
23
+ # stdout
24
+ log4j.appender.stdout=org.apache.log4j.ConsoleAppender
25
+ log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
26
+ log4j.appender.stdout.layout.ConversionPattern=%5p %d{HH:mm:ss,SSS} %m%n
27
+
28
+ # rolling log file
29
+ log4j.appender.R=org.apache.log4j.RollingFileAppender
30
+ log4j.appender.file.maxFileSize=20MB
31
+ log4j.appender.file.maxBackupIndex=50
32
+ log4j.appender.R.layout=org.apache.log4j.PatternLayout
33
+ log4j.appender.R.layout.ConversionPattern=%5p [%t] %d{ISO8601} %F (line %L) %m%n
34
+ # Edit the next line to point to your logs directory
35
+ log4j.appender.R.File=data/logs/system.log
36
+
37
+ # Application logging options
38
+ #log4j.logger.com.facebook=DEBUG
39
+ #log4j.logger.com.facebook.infrastructure.gms=DEBUG
40
+ #log4j.logger.com.facebook.infrastructure.db=DEBUG
@@ -0,0 +1,368 @@
1
+ <!--
2
+ ~ Licensed to the Apache Software Foundation (ASF) under one
3
+ ~ or more contributor license agreements. See the NOTICE file
4
+ ~ distributed with this work for additional information
5
+ ~ regarding copyright ownership. The ASF licenses this file
6
+ ~ to you under the Apache License, Version 2.0 (the
7
+ ~ "License"); you may not use this file except in compliance
8
+ ~ with the License. You may obtain a copy of the License at
9
+ ~
10
+ ~ http://www.apache.org/licenses/LICENSE-2.0
11
+ ~
12
+ ~ Unless required by applicable law or agreed to in writing,
13
+ ~ software distributed under the License is distributed on an
14
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ ~ KIND, either express or implied. See the License for the
16
+ ~ specific language governing permissions and limitations
17
+ ~ under the License.
18
+ -->
19
+ <Storage>
20
+ <!--======================================================================-->
21
+ <!-- Basic Configurations -->
22
+ <!--======================================================================-->
23
+
24
+ <!--
25
+ ~ The name of this cluster. This is mainly used to prevent machines in
26
+ ~ one logical cluster from joining another.
27
+ -->
28
+ <ClusterName>Test Cluster</ClusterName>
29
+
30
+ <!--
31
+ ~ Turn on to make new [non-seed] nodes automatically migrate the right data
32
+ ~ to themselves. (If no InitialToken is specified, they will pick one
33
+ ~ such that they will get half the range of the most-loaded node.)
34
+ ~ If a node starts up without bootstrapping, it will mark itself bootstrapped
35
+ ~ so that you can't subsequently accidently bootstrap a node with
36
+ ~ data on it. (You can reset this by wiping your data and commitlog
37
+ ~ directories.)
38
+ ~
39
+ ~ Off by default so that new clusters and upgraders from 0.4 don't
40
+ ~ bootstrap immediately. You should turn this on when you start adding
41
+ ~ new nodes to a cluster that already has data on it. (If you are upgrading
42
+ ~ from 0.4, start your cluster with it off once before changing it to true.
43
+ ~ Otherwise, no data will be lost but you will incur a lot of unnecessary
44
+ ~ I/O before your cluster starts up.)
45
+ -->
46
+ <AutoBootstrap>false</AutoBootstrap>
47
+
48
+ <!--
49
+ ~ Keyspaces and ColumnFamilies:
50
+ ~ A ColumnFamily is the Cassandra concept closest to a relational
51
+ ~ table. Keyspaces are separate groups of ColumnFamilies. Except in
52
+ ~ very unusual circumstances you will have one Keyspace per application.
53
+
54
+ ~ There is an implicit keyspace named 'system' for Cassandra internals.
55
+ -->
56
+ <Keyspaces>
57
+ <Keyspace Name="CassandraModel">
58
+ <!--
59
+ ~ ColumnFamily definitions have one required attribute (Name)
60
+ ~ and several optional ones.
61
+ ~
62
+ ~ The CompareWith attribute tells Cassandra how to sort the columns
63
+ ~ for slicing operations. The default is BytesType, which is a
64
+ ~ straightforward lexical comparison of the bytes in each column.
65
+ ~ Other options are AsciiType, UTF8Type, LexicalUUIDType, TimeUUIDType,
66
+ ~ and LongType. You can also specify the fully-qualified class
67
+ ~ name to a class of your choice extending
68
+ ~ org.apache.cassandra.db.marshal.AbstractType.
69
+ ~
70
+ ~ SuperColumns have a similar CompareSubcolumnsWith attribute.
71
+ ~
72
+ ~ BytesType: Simple sort by byte value. No validation is performed.
73
+ ~ AsciiType: Like BytesType, but validates that the input can be
74
+ ~ parsed as US-ASCII.
75
+ ~ UTF8Type: A string encoded as UTF8
76
+ ~ LongType: A 64bit long
77
+ ~ LexicalUUIDType: A 128bit UUID, compared lexically (by byte value)
78
+ ~ TimeUUIDType: a 128bit version 1 UUID, compared by timestamp
79
+ ~
80
+ ~ (To get the closest approximation to 0.3-style supercolumns, you
81
+ ~ would use CompareWith=UTF8Type CompareSubcolumnsWith=LongType.)
82
+ ~
83
+ ~ An optional `Comment` attribute may be used to attach additional
84
+ ~ human-readable information about the column family to its definition.
85
+ ~
86
+ ~ The optional KeysCached attribute specifies
87
+ ~ the number of keys per sstable whose locations we keep in
88
+ ~ memory in "mostly LRU" order. (JUST the key locations, NOT any
89
+ ~ column values.) Specify a fraction (value less than 1), a percentage
90
+ ~ (ending in a % sign) or an absolute number of keys to cache.
91
+ ~ KeysCached defaults to 200000 keys.
92
+ ~
93
+ ~ The optional RowsCached attribute specifies the number of rows
94
+ ~ whose entire contents we cache in memory. Do not use this on
95
+ ~ ColumnFamilies with large rows, or ColumnFamilies with high write:read
96
+ ~ ratios. Specify a fraction (value less than 1), a percentage (ending in
97
+ ~ a % sign) or an absolute number of rows to cache.
98
+ ~ RowsCached defaults to 0, i.e., row cache is off by default.
99
+ ~
100
+ ~ Remember, when using caches as a percentage, they WILL grow with
101
+ ~ your data set!
102
+ -->
103
+ <ColumnFamily Name="Users" CompareWith="BytesType" KeysCached="100000" RowsCached="1000"/>
104
+ <ColumnFamily Name="Posts" CompareWith="BytesType" />
105
+ <ColumnFamily Name="Comments" CompareWith="TimeUUIDType" CompareSubcolumnsWith="BytesType" ColumnType="Super" />
106
+
107
+ <!--
108
+ ~ Strategy: Setting this to the class that implements
109
+ ~ IReplicaPlacementStrategy will change the way the node picker works.
110
+ ~ Out of the box, Cassandra provides
111
+ ~ org.apache.cassandra.locator.RackUnawareStrategy and
112
+ ~ org.apache.cassandra.locator.RackAwareStrategy (place one replica in
113
+ ~ a different datacenter, and the others on different racks in the same
114
+ ~ one.)
115
+ -->
116
+ <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
117
+
118
+ <!-- Number of replicas of the data -->
119
+ <ReplicationFactor>1</ReplicationFactor>
120
+
121
+ <!--
122
+ ~ EndPointSnitch: Setting this to the class that implements
123
+ ~ AbstractEndpointSnitch, which lets Cassandra know enough
124
+ ~ about your network topology to route requests efficiently.
125
+ ~ Out of the box, Cassandra provides org.apache.cassandra.locator.EndPointSnitch,
126
+ ~ and PropertyFileEndPointSnitch is available in contrib/.
127
+ -->
128
+ <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
129
+
130
+ </Keyspace>
131
+ </Keyspaces>
132
+
133
+ <!--
134
+ ~ Authenticator: any IAuthenticator may be used, including your own as long
135
+ ~ as it is on the classpath. Out of the box, Cassandra provides
136
+ ~ org.apache.cassandra.auth.AllowAllAuthenticator and,
137
+ ~ org.apache.cassandra.auth.SimpleAuthenticator
138
+ ~ (SimpleAuthenticator uses access.properties and passwd.properties by
139
+ ~ default).
140
+ ~
141
+ ~ If you don't specify an authenticator, AllowAllAuthenticator is used.
142
+ -->
143
+ <Authenticator>org.apache.cassandra.auth.AllowAllAuthenticator</Authenticator>
144
+
145
+ <!--
146
+ ~ Partitioner: any IPartitioner may be used, including your own as long
147
+ ~ as it is on the classpath. Out of the box, Cassandra provides
148
+ ~ org.apache.cassandra.dht.RandomPartitioner,
149
+ ~ org.apache.cassandra.dht.OrderPreservingPartitioner, and
150
+ ~ org.apache.cassandra.dht.CollatingOrderPreservingPartitioner.
151
+ ~ (CollatingOPP colates according to EN,US rules, not naive byte
152
+ ~ ordering. Use this as an example if you need locale-aware collation.)
153
+ ~ Range queries require using an order-preserving partitioner.
154
+ ~
155
+ ~ Achtung! Changing this parameter requires wiping your data
156
+ ~ directories, since the partitioner can modify the sstable on-disk
157
+ ~ format.
158
+ -->
159
+ <Partitioner>org.apache.cassandra.dht.RandomPartitioner</Partitioner>
160
+
161
+ <!--
162
+ ~ If you are using an order-preserving partitioner and you know your key
163
+ ~ distribution, you can specify the token for this node to use. (Keys
164
+ ~ are sent to the node with the "closest" token, so distributing your
165
+ ~ tokens equally along the key distribution space will spread keys
166
+ ~ evenly across your cluster.) This setting is only checked the first
167
+ ~ time a node is started.
168
+
169
+ ~ This can also be useful with RandomPartitioner to force equal spacing
170
+ ~ of tokens around the hash space, especially for clusters with a small
171
+ ~ number of nodes.
172
+ -->
173
+ <InitialToken></InitialToken>
174
+
175
+ <!--
176
+ ~ Directories: Specify where Cassandra should store different data on
177
+ ~ disk. Keep the data disks and the CommitLog disks separate for best
178
+ ~ performance
179
+ -->
180
+ <CommitLogDirectory>data/cassandra/commitlog</CommitLogDirectory>
181
+ <DataFileDirectories>
182
+ <DataFileDirectory>data/cassandra/data</DataFileDirectory>
183
+ </DataFileDirectories>
184
+
185
+
186
+ <!--
187
+ ~ Addresses of hosts that are deemed contact points. Cassandra nodes
188
+ ~ use this list of hosts to find each other and learn the topology of
189
+ ~ the ring. You must change this if you are running multiple nodes!
190
+ -->
191
+ <Seeds>
192
+ <Seed>127.0.0.1</Seed>
193
+ </Seeds>
194
+
195
+
196
+ <!-- Miscellaneous -->
197
+
198
+ <!-- Time to wait for a reply from other nodes before failing the command -->
199
+ <RpcTimeoutInMillis>10000</RpcTimeoutInMillis>
200
+ <!-- Size to allow commitlog to grow to before creating a new segment -->
201
+ <CommitLogRotationThresholdInMB>128</CommitLogRotationThresholdInMB>
202
+
203
+
204
+ <!-- Local hosts and ports -->
205
+
206
+ <!--
207
+ ~ Address to bind to and tell other nodes to connect to. You _must_
208
+ ~ change this if you want multiple nodes to be able to communicate!
209
+ ~
210
+ ~ Leaving it blank leaves it up to InetAddress.getLocalHost(). This
211
+ ~ will always do the Right Thing *if* the node is properly configured
212
+ ~ (hostname, name resolution, etc), and the Right Thing is to use the
213
+ ~ address associated with the hostname (it might not be).
214
+ -->
215
+ <ListenAddress>localhost</ListenAddress>
216
+ <!-- internal communications port -->
217
+ <StoragePort>7000</StoragePort>
218
+
219
+ <!--
220
+ ~ The address to bind the Thrift RPC service to. Unlike ListenAddress
221
+ ~ above, you *can* specify 0.0.0.0 here if you want Thrift to listen on
222
+ ~ all interfaces.
223
+ ~
224
+ ~ Leaving this blank has the same effect it does for ListenAddress,
225
+ ~ (i.e. it will be based on the configured hostname of the node).
226
+ -->
227
+ <ThriftAddress>localhost</ThriftAddress>
228
+ <!-- Thrift RPC port (the port clients connect to). -->
229
+ <ThriftPort>9160</ThriftPort>
230
+ <!--
231
+ ~ Whether or not to use a framed transport for Thrift. If this option
232
+ ~ is set to true then you must also use a framed transport on the
233
+ ~ client-side, (framed and non-framed transports are not compatible).
234
+ -->
235
+ <ThriftFramedTransport>false</ThriftFramedTransport>
236
+
237
+
238
+ <!--======================================================================-->
239
+ <!-- Memory, Disk, and Performance -->
240
+ <!--======================================================================-->
241
+
242
+ <!--
243
+ ~ Access mode. mmapped i/o is substantially faster, but only practical on
244
+ ~ a 64bit machine (which notably does not include EC2 "small" instances)
245
+ ~ or relatively small datasets. "auto", the safe choice, will enable
246
+ ~ mmapping on a 64bit JVM. Other values are "mmap", "mmap_index_only"
247
+ ~ (which may allow you to get part of the benefits of mmap on a 32bit
248
+ ~ machine by mmapping only index files) and "standard".
249
+ ~ (The buffer size settings that follow only apply to standard,
250
+ ~ non-mmapped i/o.)
251
+ -->
252
+ <DiskAccessMode>auto</DiskAccessMode>
253
+
254
+ <!--
255
+ ~ Size of compacted row above which to log a warning. (If compacted
256
+ ~ rows do not fit in memory, Cassandra will crash. This is explained
257
+ ~ in http://wiki.apache.org/cassandra/CassandraLimitations and is
258
+ ~ scheduled to be fixed in 0.7.)
259
+ -->
260
+ <RowWarningThresholdInMB>512</RowWarningThresholdInMB>
261
+
262
+ <!--
263
+ ~ Buffer size to use when performing contiguous column slices. Increase
264
+ ~ this to the size of the column slices you typically perform.
265
+ ~ (Name-based queries are performed with a buffer size of
266
+ ~ ColumnIndexSizeInKB.)
267
+ -->
268
+ <SlicedBufferSizeInKB>64</SlicedBufferSizeInKB>
269
+
270
+ <!--
271
+ ~ Buffer size to use when flushing memtables to disk. (Only one
272
+ ~ memtable is ever flushed at a time.) Increase (decrease) the index
273
+ ~ buffer size relative to the data buffer if you have few (many)
274
+ ~ columns per key. Bigger is only better _if_ your memtables get large
275
+ ~ enough to use the space. (Check in your data directory after your
276
+ ~ app has been running long enough.) -->
277
+ <FlushDataBufferSizeInMB>32</FlushDataBufferSizeInMB>
278
+ <FlushIndexBufferSizeInMB>8</FlushIndexBufferSizeInMB>
279
+
280
+ <!--
281
+ ~ Add column indexes to a row after its contents reach this size.
282
+ ~ Increase if your column values are large, or if you have a very large
283
+ ~ number of columns. The competing causes are, Cassandra has to
284
+ ~ deserialize this much of the row to read a single column, so you want
285
+ ~ it to be small - at least if you do many partial-row reads - but all
286
+ ~ the index data is read for each access, so you don't want to generate
287
+ ~ that wastefully either.
288
+ -->
289
+ <ColumnIndexSizeInKB>64</ColumnIndexSizeInKB>
290
+
291
+ <!--
292
+ ~ Flush memtable after this much data has been inserted, including
293
+ ~ overwritten data. There is one memtable per column family, and
294
+ ~ this threshold is based solely on the amount of data stored, not
295
+ ~ actual heap memory usage (there is some overhead in indexing the
296
+ ~ columns).
297
+ -->
298
+ <MemtableThroughputInMB>64</MemtableThroughputInMB>
299
+ <!--
300
+ ~ Throughput setting for Binary Memtables. Typically these are
301
+ ~ used for bulk load so you want them to be larger.
302
+ -->
303
+ <BinaryMemtableThroughputInMB>256</BinaryMemtableThroughputInMB>
304
+ <!--
305
+ ~ The maximum number of columns in millions to store in memory per
306
+ ~ ColumnFamily before flushing to disk. This is also a per-memtable
307
+ ~ setting. Use with MemtableThroughputInMB to tune memory usage.
308
+ -->
309
+ <MemtableOperationsInMillions>0.3</MemtableOperationsInMillions>
310
+ <!--
311
+ ~ The maximum time to leave a dirty memtable unflushed.
312
+ ~ (While any affected columnfamilies have unflushed data from a
313
+ ~ commit log segment, that segment cannot be deleted.)
314
+ ~ This needs to be large enough that it won't cause a flush storm
315
+ ~ of all your memtables flushing at once because none has hit
316
+ ~ the size or count thresholds yet. For production, a larger
317
+ ~ value such as 1440 is recommended.
318
+ -->
319
+ <MemtableFlushAfterMinutes>60</MemtableFlushAfterMinutes>
320
+
321
+ <!--
322
+ ~ Unlike most systems, in Cassandra writes are faster than reads, so
323
+ ~ you can afford more of those in parallel. A good rule of thumb is 2
324
+ ~ concurrent reads per processor core. Increase ConcurrentWrites to
325
+ ~ the number of clients writing at once if you enable CommitLogSync +
326
+ ~ CommitLogSyncDelay. -->
327
+ <ConcurrentReads>8</ConcurrentReads>
328
+ <ConcurrentWrites>32</ConcurrentWrites>
329
+
330
+ <!--
331
+ ~ CommitLogSync may be either "periodic" or "batch." When in batch
332
+ ~ mode, Cassandra won't ack writes until the commit log has been
333
+ ~ fsynced to disk. It will wait up to CommitLogSyncBatchWindowInMS
334
+ ~ milliseconds for other writes, before performing the sync.
335
+
336
+ ~ This is less necessary in Cassandra than in traditional databases
337
+ ~ since replication reduces the odds of losing data from a failure
338
+ ~ after writing the log entry but before it actually reaches the disk.
339
+ ~ So the other option is "periodic," where writes may be acked immediately
340
+ ~ and the CommitLog is simply synced every CommitLogSyncPeriodInMS
341
+ ~ milliseconds.
342
+ -->
343
+ <CommitLogSync>periodic</CommitLogSync>
344
+ <!--
345
+ ~ Interval at which to perform syncs of the CommitLog in periodic mode.
346
+ ~ Usually the default of 10000ms is fine; increase it if your i/o
347
+ ~ load is such that syncs are taking excessively long times.
348
+ -->
349
+ <CommitLogSyncPeriodInMS>10000</CommitLogSyncPeriodInMS>
350
+ <!--
351
+ ~ Delay (in milliseconds) during which additional commit log entries
352
+ ~ may be written before fsync in batch mode. This will increase
353
+ ~ latency slightly, but can vastly improve throughput where there are
354
+ ~ many writers. Set to zero to disable (each entry will be synced
355
+ ~ individually). Reasonable values range from a minimal 0.1 to 10 or
356
+ ~ even more if throughput matters more than latency.
357
+ -->
358
+ <!-- <CommitLogSyncBatchWindowInMS>1</CommitLogSyncBatchWindowInMS> -->
359
+
360
+ <!--
361
+ ~ Time to wait before garbage-collection deletion markers. Set this to
362
+ ~ a large enough value that you are confident that the deletion marker
363
+ ~ will be propagated to all replicas by the time this many seconds has
364
+ ~ elapsed, even in the face of hardware failures. The default value is
365
+ ~ ten days.
366
+ -->
367
+ <GCGraceSeconds>864000</GCGraceSeconds>
368
+ </Storage>