cassandra-cql 1.1.5 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,44 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ # for production, you should probably set pattern to %c instead of %l.
18
+ # (%l is slower.)
19
+
20
+ # output messages into a rolling log file as well as stdout
21
+ log4j.rootLogger=INFO,stdout,R
22
+
23
+ # stdout
24
+ log4j.appender.stdout=org.apache.log4j.ConsoleAppender
25
+ log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
26
+ log4j.appender.stdout.layout.ConversionPattern=%5p %d{HH:mm:ss,SSS} %m%n
27
+
28
+ # rolling log file
29
+ log4j.appender.R=org.apache.log4j.RollingFileAppender
30
+ log4j.appender.R.maxFileSize=20MB
31
+ log4j.appender.R.maxBackupIndex=50
32
+ log4j.appender.R.layout=org.apache.log4j.PatternLayout
33
+ log4j.appender.R.layout.ConversionPattern=%5p [%t] %d{ISO8601} %F (line %L) %m%n
34
+ # Edit the next line to point to your logs directory
35
+ log4j.appender.R.File=/var/log/cassandra/system.log
36
+
37
+ # Application logging options
38
+ #log4j.logger.org.apache.cassandra=DEBUG
39
+ #log4j.logger.org.apache.cassandra.db=DEBUG
40
+ #log4j.logger.org.apache.cassandra.service.StorageProxy=DEBUG
41
+
42
+ # Adding this to avoid thrift logging disconnect errors.
43
+ log4j.logger.org.apache.thrift.server.TNonblockingServer=ERROR
44
+
@@ -0,0 +1,72 @@
1
+ {"Twitter":{
2
+ "Users":{
3
+ "comparator_type":"org.apache.cassandra.db.marshal.UTF8Type",
4
+ "column_type":"Standard"},
5
+ "UserAudits":{
6
+ "comparator_type":"org.apache.cassandra.db.marshal.UTF8Type",
7
+ "column_type":"Standard"},
8
+ "UserCounters":{
9
+ "comparator_type":"org.apache.cassandra.db.marshal.UTF8Type",
10
+ "column_type":"Standard",
11
+ "default_validation_class":"CounterColumnType"},
12
+ "UserCounterAggregates":{
13
+ "subcomparator_type":"org.apache.cassandra.db.marshal.UTF8Type",
14
+ "comparator_type":"org.apache.cassandra.db.marshal.UTF8Type",
15
+ "column_type":"Super",
16
+ "default_validation_class":"CounterColumnType"},
17
+ "UserRelationships":{
18
+ "subcomparator_type":"org.apache.cassandra.db.marshal.TimeUUIDType",
19
+ "comparator_type":"org.apache.cassandra.db.marshal.UTF8Type",
20
+ "column_type":"Super"},
21
+ "Usernames":{
22
+ "comparator_type":"org.apache.cassandra.db.marshal.UTF8Type",
23
+ "column_type":"Standard"},
24
+ "Statuses":{
25
+ "comparator_type":"org.apache.cassandra.db.marshal.UTF8Type",
26
+ "column_type":"Standard"},
27
+ "StatusAudits":{
28
+ "comparator_type":"org.apache.cassandra.db.marshal.UTF8Type",
29
+ "column_type":"Standard"},
30
+ "StatusRelationships":{
31
+ "subcomparator_type":"org.apache.cassandra.db.marshal.TimeUUIDType",
32
+ "comparator_type":"org.apache.cassandra.db.marshal.UTF8Type",
33
+ "column_type":"Super"},
34
+ "Indexes":{
35
+ "comparator_type":"org.apache.cassandra.db.marshal.UTF8Type",
36
+ "column_type":"Super"},
37
+ "TimelinishThings":{
38
+ "comparator_type":"org.apache.cassandra.db.marshal.BytesType",
39
+ "column_type":"Standard"}
40
+ },
41
+ "Multiblog":{
42
+ "Blogs":{
43
+ "comparator_type":"org.apache.cassandra.db.marshal.TimeUUIDType",
44
+ "column_type":"Standard"},
45
+ "Comments":{
46
+ "comparator_type":"org.apache.cassandra.db.marshal.TimeUUIDType",
47
+ "column_type":"Standard"}
48
+ },
49
+ "MultiblogLong":{
50
+ "Blogs":{
51
+ "comparator_type":"org.apache.cassandra.db.marshal.LongType",
52
+ "column_type":"Standard"},
53
+ "Comments":{
54
+ "comparator_type":"org.apache.cassandra.db.marshal.LongType",
55
+ "column_type":"Standard"}
56
+ },
57
+ "TypeConversions":{
58
+ "UUIDColumnConversion":{
59
+ "comparator_type":"org.apache.cassandra.db.marshal.TimeUUIDType",
60
+ "column_type":"Standard"},
61
+ "SuperUUID":{
62
+ "subcomparator_type":"org.apache.cassandra.db.marshal.TimeUUIDType",
63
+ "comparator_type":"org.apache.cassandra.db.marshal.TimeUUIDType",
64
+ "column_type":"Super"},
65
+ "CompositeColumnConversion":{
66
+ "comparator_type":"org.apache.cassandra.db.marshal.CompositeType(org.apache.cassandra.db.marshal.IntegerType,org.apache.cassandra.db.marshal.UTF8Type)",
67
+ "column_type":"Standard"},
68
+ "DynamicComposite":{
69
+ "comparator_type":"org.apache.cassandra.db.marshal.DynamicCompositeType(u=>org.apache.cassandra.db.marshal.UUIDType,t=>org.apache.cassandra.db.marshal.TimeUUIDType,s=>org.apache.cassandra.db.marshal.UTF8Type,b=>org.apache.cassandra.db.marshal.BytesType,a=>org.apache.cassandra.db.marshal.AsciiType,l=>org.apache.cassandra.db.marshal.LongType,x=>org.apache.cassandra.db.marshal.LexicalUUIDType,i=>org.apache.cassandra.db.marshal.IntegerType)",
70
+ "column_type":"Standard"}
71
+ }
72
+ }
@@ -0,0 +1,57 @@
1
+ create keyspace Twitter with
2
+ placement_strategy = 'org.apache.cassandra.locator.SimpleStrategy' AND
3
+ strategy_options = {replication_factor:1};
4
+ use Twitter;
5
+ create column family Users with comparator = 'UTF8Type';
6
+ create column family UserAudits with comparator = 'UTF8Type';
7
+ create column family UserCounters with comparator = 'UTF8Type' and
8
+ default_validation_class = CounterColumnType;
9
+ create column family UserCounterAggregates with column_type = 'Super'
10
+ and comparator = 'UTF8Type' and
11
+ subcomparator = 'UTF8Type' and
12
+ default_validation_class = CounterColumnType;
13
+ create column family UserRelationships with
14
+ comparator = 'UTF8Type' and
15
+ column_type = 'Super' and
16
+ subcomparator = 'TimeUUIDType';
17
+ create column family Usernames with comparator = 'UTF8Type';
18
+ create column family Statuses
19
+ with comparator = 'UTF8Type'
20
+ and column_metadata = [
21
+ {column_name: 'tags', validation_class: 'BytesType', index_type: 'KEYS'}
22
+ ];
23
+ create column family StatusAudits with comparator = 'UTF8Type';
24
+ create column family StatusRelationships with
25
+ comparator = 'UTF8Type' and
26
+ column_type = 'Super' and
27
+ subcomparator = 'TimeUUIDType';
28
+ create column family Indexes with
29
+ comparator = 'UTF8Type' and
30
+ column_type = 'Super';
31
+ create column family TimelinishThings with
32
+ comparator = 'BytesType';
33
+
34
+ create keyspace Multiblog with
35
+ placement_strategy = 'org.apache.cassandra.locator.SimpleStrategy' AND
36
+ strategy_options = {replication_factor:1};
37
+ use Multiblog;
38
+ create column family Blogs with comparator = 'TimeUUIDType';
39
+ create column family Comments with comparator = 'TimeUUIDType';
40
+
41
+
42
+ create keyspace MultiblogLong with
43
+ placement_strategy = 'org.apache.cassandra.locator.SimpleStrategy' AND
44
+ strategy_options = {replication_factor:1};
45
+ use MultiblogLong;
46
+ create column family Blogs with comparator = 'LongType';
47
+ create column family Comments with comparator = 'LongType';
48
+
49
+ create keyspace TypeConversions with
50
+ placement_strategy = 'org.apache.cassandra.locator.SimpleStrategy' AND
51
+ strategy_options = {replication_factor:1};
52
+ use TypeConversions;
53
+ create column family UUIDColumnConversion with comparator = TimeUUIDType;
54
+ create column family SuperUUID with comparator = TimeUUIDType and column_type = Super;
55
+ create column family CompositeColumnConversion with comparator = 'CompositeType(IntegerType, UTF8Type)';
56
+ create column family DynamicComposite with comparator ='DynamicCompositeType
57
+ (a=>AsciiType,b=>BytesType,i=>IntegerType,x=>LexicalUUIDType,l=>LongType,t=>TimeUUIDType,s=>UTF8Type,u=>UUIDType)';
@@ -0,0 +1,41 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ if [ "x$CASSANDRA_HOME" = "x" ]; then
18
+ CASSANDRA_HOME=`dirname $0`/..
19
+ fi
20
+
21
+ # The directory where Cassandra's configs live (required)
22
+ if [ "x$CASSANDRA_CONF" = "x" ]; then
23
+ CASSANDRA_CONF=$CASSANDRA_HOME/conf
24
+ fi
25
+
26
+ # This can be the path to a jar file, or a directory containing the
27
+ # compiled classes. NOTE: This isn't needed by the startup script,
28
+ # it's just used here in constructing the classpath.
29
+ cassandra_bin=$CASSANDRA_HOME/build/classes/main
30
+ cassandra_bin=$cassandra_bin:$CASSANDRA_HOME/build/classes/thrift
31
+ #cassandra_bin=$cassandra_home/build/cassandra.jar
32
+
33
+ # JAVA_HOME can optionally be set here
34
+ #JAVA_HOME=/usr/local/jdk6
35
+
36
+ # The java classpath (required)
37
+ CLASSPATH=$CASSANDRA_CONF:$cassandra_bin
38
+
39
+ for jar in $CASSANDRA_HOME/lib/*.jar; do
40
+ CLASSPATH=$CLASSPATH:$jar
41
+ done
@@ -0,0 +1,643 @@
1
+ # Cassandra storage config YAML
2
+
3
+ # NOTE:
4
+ # See http://wiki.apache.org/cassandra/StorageConfiguration for
5
+ # full explanations of configuration directives
6
+ # /NOTE
7
+
8
+ # The name of the cluster. This is mainly used to prevent machines in
9
+ # one logical cluster from joining another.
10
+ cluster_name: 'Test Cluster'
11
+
12
+ # This defines the number of tokens randomly assigned to this node on the ring
13
+ # The more tokens, relative to other nodes, the larger the proportion of data
14
+ # that this node will store. You probably want all nodes to have the same number
15
+ # of tokens assuming they have equal hardware capability.
16
+ #
17
+ # If you leave this unspecified, Cassandra will use the default of 1 token for legacy compatibility,
18
+ # and will use the initial_token as described below.
19
+ #
20
+ # Specifying initial_token will override this setting.
21
+ #
22
+ # If you already have a cluster with 1 token per node, and wish to migrate to
23
+ # multiple tokens per node, see http://wiki.apache.org/cassandra/Operations
24
+ # num_tokens: 256
25
+
26
+ # If you haven't specified num_tokens, or have set it to the default of 1 then
27
+ # you should always specify InitialToken when setting up a production
28
+ # cluster for the first time, and often when adding capacity later.
29
+ # The principle is that each node should be given an equal slice of
30
+ # the token ring; see http://wiki.apache.org/cassandra/Operations
31
+ # for more details.
32
+ #
33
+ # If blank, Cassandra will request a token bisecting the range of
34
+ # the heaviest-loaded existing node. If there is no load information
35
+ # available, such as is the case with a new cluster, it will pick
36
+ # a random token, which will lead to hot spots.
37
+ initial_token:
38
+
39
+ # See http://wiki.apache.org/cassandra/HintedHandoff
40
+ hinted_handoff_enabled: true
41
+ # this defines the maximum amount of time a dead host will have hints
42
+ # generated. After it has been dead this long, hints will be dropped.
43
+ max_hint_window_in_ms: 3600000 # 1 hours
44
+ # throttle in KB's per second, per delivery thread
45
+ hinted_handoff_throttle_in_kb: 1024
46
+ # Number of threads with which to deliver hints;
47
+ # Consider increasing this number when you have multi-dc deployments, since
48
+ # cross-dc handoff tends to be slower
49
+ max_hints_delivery_threads: 2
50
+
51
+ # The following setting populates the page cache on memtable flush and compaction
52
+ # WARNING: Enable this setting only when the whole node's data fits in memory.
53
+ # Defaults to: false
54
+ # populate_io_cache_on_flush: false
55
+
56
+ # authentication backend, implementing IAuthenticator; used to identify users
57
+ authenticator: org.apache.cassandra.auth.AllowAllAuthenticator
58
+
59
+ # authorization backend, implementing IAuthorizer; used to limit access/provide permissions
60
+ authorizer: org.apache.cassandra.auth.AllowAllAuthorizer
61
+
62
+ # The partitioner is responsible for distributing rows (by key) across
63
+ # nodes in the cluster. Any IPartitioner may be used, including your
64
+ # own as long as it is on the classpath. Out of the box, Cassandra
65
+ # provides org.apache.cassandra.dht.{Murmur3Partitioner, RandomPartitioner
66
+ # ByteOrderedPartitioner, OrderPreservingPartitioner (deprecated)}.
67
+ #
68
+ # - RandomPartitioner distributes rows across the cluster evenly by md5.
69
+ # This is the default prior to 1.2 and is retained for compatibility.
70
+ # - Murmur3Partitioner is similar to RandomPartioner but uses Murmur3_128
71
+ # Hash Function instead of md5. When in doubt, this is the best option.
72
+ # - ByteOrderedPartitioner orders rows lexically by key bytes. BOP allows
73
+ # scanning rows in key order, but the ordering can generate hot spots
74
+ # for sequential insertion workloads.
75
+ # - OrderPreservingPartitioner is an obsolete form of BOP, that stores
76
+ # - keys in a less-efficient format and only works with keys that are
77
+ # UTF8-encoded Strings.
78
+ # - CollatingOPP colates according to EN,US rules rather than lexical byte
79
+ # ordering. Use this as an example if you need custom collation.
80
+ #
81
+ # See http://wiki.apache.org/cassandra/Operations for more on
82
+ # partitioners and token selection.
83
+ partitioner: org.apache.cassandra.dht.Murmur3Partitioner
84
+
85
+ # directories where Cassandra should store data on disk.
86
+ data_file_directories:
87
+ - data/data
88
+
89
+ # commit log
90
+ commitlog_directory: data/commitlog
91
+
92
+ # policy for data disk failures:
93
+ # stop: shut down gossip and Thrift, leaving the node effectively dead, but
94
+ # still inspectable via JMX.
95
+ # best_effort: stop using the failed disk and respond to requests based on
96
+ # remaining available sstables. This means you WILL see obsolete
97
+ # data at CL.ONE!
98
+ # ignore: ignore fatal errors and let requests fail, as in pre-1.2 Cassandra
99
+ disk_failure_policy: stop
100
+
101
+ # Maximum size of the key cache in memory.
102
+ #
103
+ # Each key cache hit saves 1 seek and each row cache hit saves 2 seeks at the
104
+ # minimum, sometimes more. The key cache is fairly tiny for the amount of
105
+ # time it saves, so it's worthwhile to use it at large numbers.
106
+ # The row cache saves even more time, but must store the whole values of
107
+ # its rows, so it is extremely space-intensive. It's best to only use the
108
+ # row cache if you have hot rows or static rows.
109
+ #
110
+ # NOTE: if you reduce the size, you may not get you hottest keys loaded on startup.
111
+ #
112
+ # Default value is empty to make it "auto" (min(5% of Heap (in MB), 100MB)). Set to 0 to disable key cache.
113
+ key_cache_size_in_mb:
114
+
115
+ # Duration in seconds after which Cassandra should
116
+ # safe the keys cache. Caches are saved to saved_caches_directory as
117
+ # specified in this configuration file.
118
+ #
119
+ # Saved caches greatly improve cold-start speeds, and is relatively cheap in
120
+ # terms of I/O for the key cache. Row cache saving is much more expensive and
121
+ # has limited use.
122
+ #
123
+ # Default is 14400 or 4 hours.
124
+ key_cache_save_period: 14400
125
+
126
+ # Number of keys from the key cache to save
127
+ # Disabled by default, meaning all keys are going to be saved
128
+ # key_cache_keys_to_save: 100
129
+
130
+ # Maximum size of the row cache in memory.
131
+ # NOTE: if you reduce the size, you may not get you hottest keys loaded on startup.
132
+ #
133
+ # Default value is 0, to disable row caching.
134
+ row_cache_size_in_mb: 0
135
+
136
+ # Duration in seconds after which Cassandra should
137
+ # safe the row cache. Caches are saved to saved_caches_directory as specified
138
+ # in this configuration file.
139
+ #
140
+ # Saved caches greatly improve cold-start speeds, and is relatively cheap in
141
+ # terms of I/O for the key cache. Row cache saving is much more expensive and
142
+ # has limited use.
143
+ #
144
+ # Default is 0 to disable saving the row cache.
145
+ row_cache_save_period: 0
146
+
147
+ # Number of keys from the row cache to save
148
+ # Disabled by default, meaning all keys are going to be saved
149
+ # row_cache_keys_to_save: 100
150
+
151
+ # The provider for the row cache to use.
152
+ #
153
+ # Supported values are: ConcurrentLinkedHashCacheProvider, SerializingCacheProvider
154
+ #
155
+ # SerializingCacheProvider serialises the contents of the row and stores
156
+ # it in native memory, i.e., off the JVM Heap. Serialized rows take
157
+ # significantly less memory than "live" rows in the JVM, so you can cache
158
+ # more rows in a given memory footprint. And storing the cache off-heap
159
+ # means you can use smaller heap sizes, reducing the impact of GC pauses.
160
+ #
161
+ # It is also valid to specify the fully-qualified class name to a class
162
+ # that implements org.apache.cassandra.cache.IRowCacheProvider.
163
+ #
164
+ # Defaults to SerializingCacheProvider
165
+ row_cache_provider: SerializingCacheProvider
166
+
167
+ # saved caches
168
+ saved_caches_directory: data/saved_caches
169
+
170
+ # commitlog_sync may be either "periodic" or "batch."
171
+ # When in batch mode, Cassandra won't ack writes until the commit log
172
+ # has been fsynced to disk. It will wait up to
173
+ # commitlog_sync_batch_window_in_ms milliseconds for other writes, before
174
+ # performing the sync.
175
+ #
176
+ # commitlog_sync: batch
177
+ # commitlog_sync_batch_window_in_ms: 50
178
+ #
179
+ # the other option is "periodic" where writes may be acked immediately
180
+ # and the CommitLog is simply synced every commitlog_sync_period_in_ms
181
+ # milliseconds.
182
+ commitlog_sync: periodic
183
+ commitlog_sync_period_in_ms: 10000
184
+
185
+ # The size of the individual commitlog file segments. A commitlog
186
+ # segment may be archived, deleted, or recycled once all the data
187
+ # in it (potentally from each columnfamily in the system) has been
188
+ # flushed to sstables.
189
+ #
190
+ # The default size is 32, which is almost always fine, but if you are
191
+ # archiving commitlog segments (see commitlog_archiving.properties),
192
+ # then you probably want a finer granularity of archiving; 8 or 16 MB
193
+ # is reasonable.
194
+ commitlog_segment_size_in_mb: 32
195
+
196
+ # any class that implements the SeedProvider interface and has a
197
+ # constructor that takes a Map<String, String> of parameters will do.
198
+ seed_provider:
199
+ # Addresses of hosts that are deemed contact points.
200
+ # Cassandra nodes use this list of hosts to find each other and learn
201
+ # the topology of the ring. You must change this if you are running
202
+ # multiple nodes!
203
+ - class_name: org.apache.cassandra.locator.SimpleSeedProvider
204
+ parameters:
205
+ # seeds is actually a comma-delimited list of addresses.
206
+ # Ex: "<ip1>,<ip2>,<ip3>"
207
+ - seeds: "127.0.0.1"
208
+
209
+ # emergency pressure valve: each time heap usage after a full (CMS)
210
+ # garbage collection is above this fraction of the max, Cassandra will
211
+ # flush the largest memtables.
212
+ #
213
+ # Set to 1.0 to disable. Setting this lower than
214
+ # CMSInitiatingOccupancyFraction is not likely to be useful.
215
+ #
216
+ # RELYING ON THIS AS YOUR PRIMARY TUNING MECHANISM WILL WORK POORLY:
217
+ # it is most effective under light to moderate load, or read-heavy
218
+ # workloads; under truly massive write load, it will often be too
219
+ # little, too late.
220
+ flush_largest_memtables_at: 0.75
221
+
222
+ # emergency pressure valve #2: the first time heap usage after a full
223
+ # (CMS) garbage collection is above this fraction of the max,
224
+ # Cassandra will reduce cache maximum _capacity_ to the given fraction
225
+ # of the current _size_. Should usually be set substantially above
226
+ # flush_largest_memtables_at, since that will have less long-term
227
+ # impact on the system.
228
+ #
229
+ # Set to 1.0 to disable. Setting this lower than
230
+ # CMSInitiatingOccupancyFraction is not likely to be useful.
231
+ reduce_cache_sizes_at: 0.85
232
+ reduce_cache_capacity_to: 0.6
233
+
234
+ # For workloads with more data than can fit in memory, Cassandra's
235
+ # bottleneck will be reads that need to fetch data from
236
+ # disk. "concurrent_reads" should be set to (16 * number_of_drives) in
237
+ # order to allow the operations to enqueue low enough in the stack
238
+ # that the OS and drives can reorder them.
239
+ #
240
+ # On the other hand, since writes are almost never IO bound, the ideal
241
+ # number of "concurrent_writes" is dependent on the number of cores in
242
+ # your system; (8 * number_of_cores) is a good rule of thumb.
243
+ concurrent_reads: 32
244
+ concurrent_writes: 32
245
+
246
+ # Total memory to use for memtables. Cassandra will flush the largest
247
+ # memtable when this much memory is used.
248
+ # If omitted, Cassandra will set it to 1/3 of the heap.
249
+ # memtable_total_space_in_mb: 2048
250
+
251
+ # Total space to use for commitlogs. Since commitlog segments are
252
+ # mmapped, and hence use up address space, the default size is 32
253
+ # on 32-bit JVMs, and 1024 on 64-bit JVMs.
254
+ #
255
+ # If space gets above this value (it will round up to the next nearest
256
+ # segment multiple), Cassandra will flush every dirty CF in the oldest
257
+ # segment and remove it. So a small total commitlog space will tend
258
+ # to cause more flush activity on less-active columnfamilies.
259
+ # commitlog_total_space_in_mb: 4096
260
+
261
+ # This sets the amount of memtable flush writer threads. These will
262
+ # be blocked by disk io, and each one will hold a memtable in memory
263
+ # while blocked. If you have a large heap and many data directories,
264
+ # you can increase this value for better flush performance.
265
+ # By default this will be set to the amount of data directories defined.
266
+ #memtable_flush_writers: 1
267
+
268
+ # the number of full memtables to allow pending flush, that is,
269
+ # waiting for a writer thread. At a minimum, this should be set to
270
+ # the maximum number of secondary indexes created on a single CF.
271
+ memtable_flush_queue_size: 4
272
+
273
+ # Whether to, when doing sequential writing, fsync() at intervals in
274
+ # order to force the operating system to flush the dirty
275
+ # buffers. Enable this to avoid sudden dirty buffer flushing from
276
+ # impacting read latencies. Almost always a good idea on SSD:s; not
277
+ # necessarily on platters.
278
+ trickle_fsync: false
279
+ trickle_fsync_interval_in_kb: 10240
280
+
281
+ # TCP port, for commands and data
282
+ storage_port: 7000
283
+
284
+ # SSL port, for encrypted communication. Unused unless enabled in
285
+ # encryption_options
286
+ ssl_storage_port: 7001
287
+
288
+ # Address to bind to and tell other Cassandra nodes to connect to. You
289
+ # _must_ change this if you want multiple nodes to be able to
290
+ # communicate!
291
+ #
292
+ # Leaving it blank leaves it up to InetAddress.getLocalHost(). This
293
+ # will always do the Right Thing *if* the node is properly configured
294
+ # (hostname, name resolution, etc), and the Right Thing is to use the
295
+ # address associated with the hostname (it might not be).
296
+ #
297
+ # Setting this to 0.0.0.0 is always wrong.
298
+ listen_address: localhost
299
+
300
+ # Address to broadcast to other Cassandra nodes
301
+ # Leaving this blank will set it to the same value as listen_address
302
+ # broadcast_address: 1.2.3.4
303
+
304
+
305
+ # Whether to start the native transport server.
306
+ # Currently, only the thrift server is started by default because the native
307
+ # transport is considered beta.
308
+ # Please note that the address on which the native transport is bound is the
309
+ # same as the rpc_address. The port however is different and specified below.
310
+ start_native_transport: false
311
+ # port for the CQL native transport to listen for clients on
312
+ native_transport_port: 9042
313
+ # The minimum and maximum threads for handling requests when the native
314
+ # transport is used. The meaning is those is similar to the one of
315
+ # rpc_min_threads and rpc_max_threads, though the default differ slightly and
316
+ # are the ones below:
317
+ # native_transport_min_threads: 16
318
+ # native_transport_max_threads: 128
319
+
320
+
321
+ # Whether to start the thrift rpc server.
322
+ start_rpc: true
323
+ # The address to bind the Thrift RPC service to -- clients connect
324
+ # here. Unlike ListenAddress above, you *can* specify 0.0.0.0 here if
325
+ # you want Thrift to listen on all interfaces.
326
+ #
327
+ # Leaving this blank has the same effect it does for ListenAddress,
328
+ # (i.e. it will be based on the configured hostname of the node).
329
+ rpc_address: localhost
330
+ # port for Thrift to listen for clients on
331
+ rpc_port: 9160
332
+
333
+ # enable or disable keepalive on rpc connections
334
+ rpc_keepalive: true
335
+
336
+ # Cassandra provides three out-of-the-box options for the RPC Server:
337
+ #
338
+ # sync -> One thread per thrift connection. For a very large number of clients, memory
339
+ # will be your limiting factor. On a 64 bit JVM, 128KB is the minimum stack size
340
+ # per thread, and that will correspond to your use of virtual memory (but physical memory
341
+ # may be limited depending on use of stack space).
342
+ #
343
+ # hsha -> Stands for "half synchronous, half asynchronous." All thrift clients are handled
344
+ # asynchronously using a small number of threads that does not vary with the amount
345
+ # of thrift clients (and thus scales well to many clients). The rpc requests are still
346
+ # synchronous (one thread per active request).
347
+ #
348
+ # The default is sync because on Windows hsha is about 30% slower. On Linux,
349
+ # sync/hsha performance is about the same, with hsha of course using less memory.
350
+ #
351
+ # Alternatively, can provide your own RPC server by providing the fully-qualified class name
352
+ # of an o.a.c.t.TServerFactory that can create an instance of it.
353
+ rpc_server_type: sync
354
+
355
+ # Uncomment rpc_min|max_thread to set request pool size limits.
356
+ #
357
+ # Regardless of your choice of RPC server (see above), the number of maximum requests in the
358
+ # RPC thread pool dictates how many concurrent requests are possible (but if you are using the sync
359
+ # RPC server, it also dictates the number of clients that can be connected at all).
360
+ #
361
+ # The default is unlimited and thus provide no protection against clients overwhelming the server. You are
362
+ # encouraged to set a maximum that makes sense for you in production, but do keep in mind that
363
+ # rpc_max_threads represents the maximum number of client requests this server may execute concurrently.
364
+ #
365
+ # rpc_min_threads: 16
366
+ # rpc_max_threads: 2048
367
+
368
+ # uncomment to set socket buffer sizes on rpc connections
369
+ # rpc_send_buff_size_in_bytes:
370
+ # rpc_recv_buff_size_in_bytes:
371
+
372
+ # Frame size for thrift (maximum field length).
373
+ thrift_framed_transport_size_in_mb: 15
374
+
375
+ # The max length of a thrift message, including all fields and
376
+ # internal thrift overhead.
377
+ thrift_max_message_length_in_mb: 16
378
+
379
+ # Set to true to have Cassandra create a hard link to each sstable
380
+ # flushed or streamed locally in a backups/ subdirectory of the
381
+ # Keyspace data. Removing these links is the operator's
382
+ # responsibility.
383
+ incremental_backups: false
384
+
385
+ # Whether or not to take a snapshot before each compaction. Be
386
+ # careful using this option, since Cassandra won't clean up the
387
+ # snapshots for you. Mostly useful if you're paranoid when there
388
+ # is a data format change.
389
+ snapshot_before_compaction: false
390
+
391
+ # Whether or not a snapshot is taken of the data before keyspace truncation
392
+ # or dropping of column families. The STRONGLY advised default of true
393
+ # should be used to provide data safety. If you set this flag to false, you will
394
+ # lose data on truncation or drop.
395
+ auto_snapshot: true
396
+
397
+ # Add column indexes to a row after its contents reach this size.
398
+ # Increase if your column values are large, or if you have a very large
399
+ # number of columns. The competing causes are, Cassandra has to
400
+ # deserialize this much of the row to read a single column, so you want
401
+ # it to be small - at least if you do many partial-row reads - but all
402
+ # the index data is read for each access, so you don't want to generate
403
+ # that wastefully either.
404
+ column_index_size_in_kb: 64
405
+
406
+ # Size limit for rows being compacted in memory. Larger rows will spill
407
+ # over to disk and use a slower two-pass compaction process. A message
408
+ # will be logged specifying the row key.
409
+ in_memory_compaction_limit_in_mb: 64
410
+
411
+ # Number of simultaneous compactions to allow, NOT including
412
+ # validation "compactions" for anti-entropy repair. Simultaneous
413
+ # compactions can help preserve read performance in a mixed read/write
414
+ # workload, by mitigating the tendency of small sstables to accumulate
415
+ # during a single long running compactions. The default is usually
416
+ # fine and if you experience problems with compaction running too
417
+ # slowly or too fast, you should look at
418
+ # compaction_throughput_mb_per_sec first.
419
+ #
420
+ # concurrent_compactors defaults to the number of cores.
421
+ # Uncomment to make compaction mono-threaded, the pre-0.8 default.
422
+ #concurrent_compactors: 1
423
+
424
+ # Multi-threaded compaction. When enabled, each compaction will use
425
+ # up to one thread per core, plus one thread per sstable being merged.
426
+ # This is usually only useful for SSD-based hardware: otherwise,
427
+ # your concern is usually to get compaction to do LESS i/o (see:
428
+ # compaction_throughput_mb_per_sec), not more.
429
+ multithreaded_compaction: false
430
+
431
+ # Throttles compaction to the given total throughput across the entire
432
+ # system. The faster you insert data, the faster you need to compact in
433
+ # order to keep the sstable count down, but in general, setting this to
434
+ # 16 to 32 times the rate you are inserting data is more than sufficient.
435
+ # Setting this to 0 disables throttling. Note that this account for all types
436
+ # of compaction, including validation compaction.
437
+ compaction_throughput_mb_per_sec: 16
438
+
439
+ # Track cached row keys during compaction, and re-cache their new
440
+ # positions in the compacted sstable. Disable if you use really large
441
+ # key caches.
442
+ compaction_preheat_key_cache: true
443
+
444
+ # Throttles all outbound streaming file transfers on this node to the
445
+ # given total throughput in Mbps. This is necessary because Cassandra does
446
+ # mostly sequential IO when streaming data during bootstrap or repair, which
447
+ # can lead to saturating the network connection and degrading rpc performance.
448
+ # When unset, the default is 400 Mbps or 50 MB/s.
449
+ # stream_throughput_outbound_megabits_per_sec: 400
450
+
451
+ # How long the coordinator should wait for read operations to complete
452
+ read_request_timeout_in_ms: 10000
453
+ # How long the coordinator should wait for seq or index scans to complete
454
+ range_request_timeout_in_ms: 10000
455
+ # How long the coordinator should wait for writes to complete
456
+ write_request_timeout_in_ms: 10000
457
+ # How long the coordinator should wait for truncates to complete
458
+ # (This can be much longer, because unless auto_snapshot is disabled
459
+ # we need to flush first so we can snapshot before removing the data.)
460
+ truncate_request_timeout_in_ms: 60000
461
+ # The default timeout for other, miscellaneous operations
462
+ request_timeout_in_ms: 10000
463
+
464
+ # Enable operation timeout information exchange between nodes to accurately
465
+ # measure request timeouts, If disabled cassandra will assuming the request
466
+ # was forwarded to the replica instantly by the coordinator
467
+ #
468
+ # Warning: before enabling this property make sure to ntp is installed
469
+ # and the times are synchronized between the nodes.
470
+ cross_node_timeout: false
471
+
472
+ # Enable socket timeout for streaming operation.
473
+ # When a timeout occurs during streaming, streaming is retried from the start
474
+ # of the current file. This *can* involve re-streaming an important amount of
475
+ # data, so you should avoid setting the value too low.
476
+ # Default value is 0, which never timeout streams.
477
+ # streaming_socket_timeout_in_ms: 0
478
+
479
+ # phi value that must be reached for a host to be marked down.
480
+ # most users should never need to adjust this.
481
+ # phi_convict_threshold: 8
482
+
483
+ # endpoint_snitch -- Set this to a class that implements
484
+ # IEndpointSnitch. The snitch has two functions:
485
+ # - it teaches Cassandra enough about your network topology to route
486
+ # requests efficiently
487
+ # - it allows Cassandra to spread replicas around your cluster to avoid
488
+ # correlated failures. It does this by grouping machines into
489
+ # "datacenters" and "racks." Cassandra will do its best not to have
490
+ # more than one replica on the same "rack" (which may not actually
491
+ # be a physical location)
492
+ #
493
+ # IF YOU CHANGE THE SNITCH AFTER DATA IS INSERTED INTO THE CLUSTER,
494
+ # YOU MUST RUN A FULL REPAIR, SINCE THE SNITCH AFFECTS WHERE REPLICAS
495
+ # ARE PLACED.
496
+ #
497
+ # Out of the box, Cassandra provides
498
+ # - SimpleSnitch:
499
+ # Treats Strategy order as proximity. This improves cache locality
500
+ # when disabling read repair, which can further improve throughput.
501
+ # Only appropriate for single-datacenter deployments.
502
+ # - PropertyFileSnitch:
503
+ # Proximity is determined by rack and data center, which are
504
+ # explicitly configured in cassandra-topology.properties.
505
+ # - GossipingPropertyFileSnitch
506
+ # The rack and datacenter for the local node are defined in
507
+ # cassandra-rackdc.properties and propagated to other nodes via gossip. If
508
+ # cassandra-topology.properties exists, it is used as a fallback, allowing
509
+ # migration from the PropertyFileSnitch.
510
+ # - RackInferringSnitch:
511
+ # Proximity is determined by rack and data center, which are
512
+ # assumed to correspond to the 3rd and 2nd octet of each node's
513
+ # IP address, respectively. Unless this happens to match your
514
+ # deployment conventions (as it did Facebook's), this is best used
515
+ # as an example of writing a custom Snitch class.
516
+ # - Ec2Snitch:
517
+ # Appropriate for EC2 deployments in a single Region. Loads Region
518
+ # and Availability Zone information from the EC2 API. The Region is
519
+ # treated as the Datacenter, and the Availability Zone as the rack.
520
+ # Only private IPs are used, so this will not work across multiple
521
+ # Regions.
522
+ # - Ec2MultiRegionSnitch:
523
+ # Uses public IPs as broadcast_address to allow cross-region
524
+ # connectivity. (Thus, you should set seed addresses to the public
525
+ # IP as well.) You will need to open the storage_port or
526
+ # ssl_storage_port on the public IP firewall. (For intra-Region
527
+ # traffic, Cassandra will switch to the private IP after
528
+ # establishing a connection.)
529
+ #
530
+ # You can use a custom Snitch by setting this to the full class name
531
+ # of the snitch, which will be assumed to be on your classpath.
532
+ endpoint_snitch: SimpleSnitch
533
+
534
+ # controls how often to perform the more expensive part of host score
535
+ # calculation
536
+ dynamic_snitch_update_interval_in_ms: 100
537
+ # controls how often to reset all host scores, allowing a bad host to
538
+ # possibly recover
539
+ dynamic_snitch_reset_interval_in_ms: 600000
540
+ # if set greater than zero and read_repair_chance is < 1.0, this will allow
541
+ # 'pinning' of replicas to hosts in order to increase cache capacity.
542
+ # The badness threshold will control how much worse the pinned host has to be
543
+ # before the dynamic snitch will prefer other replicas over it. This is
544
+ # expressed as a double which represents a percentage. Thus, a value of
545
+ # 0.2 means Cassandra would continue to prefer the static snitch values
546
+ # until the pinned host was 20% worse than the fastest.
547
+ dynamic_snitch_badness_threshold: 0.1
548
+
549
+ # request_scheduler -- Set this to a class that implements
550
+ # RequestScheduler, which will schedule incoming client requests
551
+ # according to the specific policy. This is useful for multi-tenancy
552
+ # with a single Cassandra cluster.
553
+ # NOTE: This is specifically for requests from the client and does
554
+ # not affect inter node communication.
555
+ # org.apache.cassandra.scheduler.NoScheduler - No scheduling takes place
556
+ # org.apache.cassandra.scheduler.RoundRobinScheduler - Round robin of
557
+ # client requests to a node with a separate queue for each
558
+ # request_scheduler_id. The scheduler is further customized by
559
+ # request_scheduler_options as described below.
560
+ request_scheduler: org.apache.cassandra.scheduler.NoScheduler
561
+
562
+ # Scheduler Options vary based on the type of scheduler
563
+ # NoScheduler - Has no options
564
+ # RoundRobin
565
+ # - throttle_limit -- The throttle_limit is the number of in-flight
566
+ # requests per client. Requests beyond
567
+ # that limit are queued up until
568
+ # running requests can complete.
569
+ # The value of 80 here is twice the number of
570
+ # concurrent_reads + concurrent_writes.
571
+ # - default_weight -- default_weight is optional and allows for
572
+ # overriding the default which is 1.
573
+ # - weights -- Weights are optional and will default to 1 or the
574
+ # overridden default_weight. The weight translates into how
575
+ # many requests are handled during each turn of the
576
+ # RoundRobin, based on the scheduler id.
577
+ #
578
+ # request_scheduler_options:
579
+ # throttle_limit: 80
580
+ # default_weight: 5
581
+ # weights:
582
+ # Keyspace1: 1
583
+ # Keyspace2: 5
584
+
585
+ # request_scheduler_id -- An identifer based on which to perform
586
+ # the request scheduling. Currently the only valid option is keyspace.
587
+ # request_scheduler_id: keyspace
588
+
589
+ # index_interval controls the sampling of entries from the primrary
590
+ # row index in terms of space versus time. The larger the interval,
591
+ # the smaller and less effective the sampling will be. In technicial
592
+ # terms, the interval coresponds to the number of index entries that
593
+ # are skipped between taking each sample. All the sampled entries
594
+ # must fit in memory. Generally, a value between 128 and 512 here
595
+ # coupled with a large key cache size on CFs results in the best trade
596
+ # offs. This value is not often changed, however if you have many
597
+ # very small rows (many to an OS page), then increasing this will
598
+ # often lower memory usage without a impact on performance.
599
+ index_interval: 128
600
+
601
+ # Enable or disable inter-node encryption
602
+ # Default settings are TLS v1, RSA 1024-bit keys (it is imperative that
603
+ # users generate their own keys) TLS_RSA_WITH_AES_128_CBC_SHA as the cipher
604
+ # suite for authentication, key exchange and encryption of the actual data transfers.
605
+ # NOTE: No custom encryption options are enabled at the moment
606
+ # The available internode options are : all, none, dc, rack
607
+ #
608
+ # If set to dc cassandra will encrypt the traffic between the DCs
609
+ # If set to rack cassandra will encrypt the traffic between the racks
610
+ #
611
+ # The passwords used in these options must match the passwords used when generating
612
+ # the keystore and truststore. For instructions on generating these files, see:
613
+ # http://download.oracle.com/javase/6/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore
614
+ #
615
+ server_encryption_options:
616
+ internode_encryption: none
617
+ keystore: conf/.keystore
618
+ keystore_password: cassandra
619
+ truststore: conf/.truststore
620
+ truststore_password: cassandra
621
+ # More advanced defaults below:
622
+ # protocol: TLS
623
+ # algorithm: SunX509
624
+ # store_type: JKS
625
+ # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA]
626
+
627
+ # enable or disable client/server encryption.
628
+ client_encryption_options:
629
+ enabled: false
630
+ keystore: conf/.keystore
631
+ keystore_password: cassandra
632
+ # More advanced defaults below:
633
+ # protocol: TLS
634
+ # algorithm: SunX509
635
+ # store_type: JKS
636
+ # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA]
637
+
638
+ # internode_compression controls whether traffic between nodes is
639
+ # compressed.
640
+ # can be: all - all traffic is compressed
641
+ # dc - traffic between different datacenters is compressed
642
+ # none - nothing is compressed.
643
+ internode_compression: all