cassandra 0.7.6 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data.tar.gz.sig CHANGED
Binary file
data/CHANGELOG CHANGED
@@ -1,3 +1,5 @@
1
+ v0.8.0 Compatibility with Cassandra 0.6 betas (no longer compatible with 0.5); assorted bugfixes.
2
+
1
3
  v0.7.6 Bugfixes.
2
4
 
3
5
  v0.7.5 Another packaging error.
data/Manifest CHANGED
@@ -20,6 +20,7 @@ lib/cassandra/mock.rb
20
20
  lib/cassandra/ordered_hash.rb
21
21
  lib/cassandra/protocol.rb
22
22
  lib/cassandra/time.rb
23
+ test/cassandra_client_test.rb
23
24
  test/cassandra_mock_test.rb
24
25
  test/cassandra_test.rb
25
26
  test/comparable_types_test.rb
data/Rakefile CHANGED
@@ -7,7 +7,7 @@ unless ENV['FROM_BIN_CASSANDRA_HELPER']
7
7
  p.project = "fauna"
8
8
  p.summary = "A Ruby client for the Cassandra distributed database."
9
9
  p.rubygems_version = ">= 0.8"
10
- p.dependencies = ['thrift_client >= 0.4.0', 'json', 'rake', 'simple_uuid >= 0.1.0']
10
+ p.dependencies = ['thrift_client >=0.4.0', 'json', 'rake', 'simple_uuid >=0.1.0']
11
11
  p.ignore_pattern = /^(data|vendor\/cassandra|cassandra|vendor\/thrift)/
12
12
  p.rdoc_pattern = /^(lib|bin|tasks|ext)|^README|^CHANGELOG|^TODO|^LICENSE|^COPYING$/
13
13
  p.url = "http://blog.evanweaver.com/files/doc/fauna/cassandra/"
@@ -15,8 +15,9 @@ unless ENV['FROM_BIN_CASSANDRA_HELPER']
15
15
  end
16
16
  end
17
17
 
18
- CASSANDRA_HOME = "#{ENV['HOME']}/cassandra"
19
- DIST_URL = "http://github.com/downloads/ryanking/cassandra/apache-cassandra-incubating-0.5.0.2010-02-21-bin.tar.gz"
18
+ CASSANDRA_HOME = ENV['CASSANDRA_HOME'] || "#{ENV['HOME']}/cassandra"
19
+ DOWNLOAD_DIR = "/tmp"
20
+ DIST_URL = "http://apache.osuosl.org/incubator/cassandra/0.6.0/apache-cassandra-0.6.0-beta2-bin.tar.gz"
20
21
  DIST_FILE = DIST_URL.split('/').last
21
22
 
22
23
  directory CASSANDRA_HOME
@@ -26,9 +27,9 @@ desc "Start Cassandra"
26
27
  task :cassandra => [:java, File.join(CASSANDRA_HOME, 'server'), File.join(CASSANDRA_HOME, 'test', 'data')] do
27
28
  env = ""
28
29
  if !ENV["CASSANDRA_INCLUDE"]
29
- env << "CASSANDRA_INCLUDE=#{Dir.pwd}/conf/cassandra.in.sh "
30
+ env << "CASSANDRA_INCLUDE=#{File.expand_path(Dir.pwd)}/conf/cassandra.in.sh "
30
31
  env << "CASSANDRA_HOME=#{CASSANDRA_HOME}/server "
31
- env << "CASSANDRA_CONF=#{Dir.pwd}/conf"
32
+ env << "CASSANDRA_CONF=#{File.expand_path(Dir.pwd)}/conf"
32
33
  end
33
34
 
34
35
  Dir.chdir(File.join(CASSANDRA_HOME, 'server')) do
@@ -36,16 +37,19 @@ task :cassandra => [:java, File.join(CASSANDRA_HOME, 'server'), File.join(CASSAN
36
37
  end
37
38
  end
38
39
 
39
- file File.join(CASSANDRA_HOME, 'server') => File.join(CASSANDRA_HOME, DIST_FILE) do
40
+ file File.join(CASSANDRA_HOME, 'server') => File.join(DOWNLOAD_DIR, DIST_FILE) do
40
41
  Dir.chdir(CASSANDRA_HOME) do
41
- sh "tar xzvf #{DIST_FILE}"
42
- sh "mv #{DIST_FILE.split('.')[0..2].join('.')} server"
42
+ sh "tar xzf #{DIST_FILE}"
43
+ sh "mv #{DIST_FILE.split('.')[0..2].join('.').sub('-bin', '')} server"
44
+ Dir.chdir('server') do
45
+ sh "ant ivy-retrieve"
46
+ end
43
47
  end
44
48
  end
45
49
 
46
- file File.join(CASSANDRA_HOME, DIST_FILE) => CASSANDRA_HOME do
50
+ file File.join(DOWNLOAD_DIR, DIST_FILE) => CASSANDRA_HOME do
47
51
  puts "downloading"
48
- cmd = "curl -L -o #{File.join(CASSANDRA_HOME, DIST_FILE)} #{DIST_URL}"
52
+ cmd = "curl -L -o #{File.join(DOWNLOAD_DIR, DIST_FILE)} #{DIST_URL}"
49
53
  sh cmd
50
54
  end
51
55
 
@@ -74,5 +78,5 @@ task :thrift do
74
78
  system(
75
79
  "cd vendor &&
76
80
  rm -rf gen-rb &&
77
- thrift -gen rb #{CASSANDRA_HOME}/interface/cassandra.thrift")
81
+ thrift -gen rb #{CASSANDRA_HOME}/server/interface/cassandra.thrift")
78
82
  end
data/cassandra.gemspec CHANGED
@@ -2,18 +2,18 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{cassandra}
5
- s.version = "0.7.6"
5
+ s.version = "0.8.0"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 0.8") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Evan Weaver, Ryan King"]
9
9
  s.cert_chain = ["/Users/ryan/.gemkeys/gem-public_cert.pem"]
10
- s.date = %q{2010-03-01}
10
+ s.date = %q{2010-03-23}
11
11
  s.default_executable = %q{cassandra_helper}
12
12
  s.description = %q{A Ruby client for the Cassandra distributed database.}
13
13
  s.email = %q{}
14
14
  s.executables = ["cassandra_helper"]
15
15
  s.extra_rdoc_files = ["CHANGELOG", "LICENSE", "README.rdoc", "bin/cassandra_helper", "lib/cassandra.rb", "lib/cassandra/array.rb", "lib/cassandra/cassandra.rb", "lib/cassandra/columns.rb", "lib/cassandra/comparable.rb", "lib/cassandra/constants.rb", "lib/cassandra/debug.rb", "lib/cassandra/helpers.rb", "lib/cassandra/long.rb", "lib/cassandra/mock.rb", "lib/cassandra/ordered_hash.rb", "lib/cassandra/protocol.rb", "lib/cassandra/time.rb"]
16
- s.files = ["CHANGELOG", "LICENSE", "Manifest", "README.rdoc", "Rakefile", "bin/cassandra_helper", "conf/cassandra.in.sh", "conf/log4j.properties", "conf/storage-conf.xml", "lib/cassandra.rb", "lib/cassandra/array.rb", "lib/cassandra/cassandra.rb", "lib/cassandra/columns.rb", "lib/cassandra/comparable.rb", "lib/cassandra/constants.rb", "lib/cassandra/debug.rb", "lib/cassandra/helpers.rb", "lib/cassandra/long.rb", "lib/cassandra/mock.rb", "lib/cassandra/ordered_hash.rb", "lib/cassandra/protocol.rb", "lib/cassandra/time.rb", "test/cassandra_mock_test.rb", "test/cassandra_test.rb", "test/comparable_types_test.rb", "test/ordered_hash_test.rb", "test/test_helper.rb", "vendor/gen-rb/cassandra.rb", "vendor/gen-rb/cassandra_constants.rb", "vendor/gen-rb/cassandra_types.rb", "cassandra.gemspec"]
16
+ s.files = ["CHANGELOG", "LICENSE", "Manifest", "README.rdoc", "Rakefile", "bin/cassandra_helper", "conf/cassandra.in.sh", "conf/log4j.properties", "conf/storage-conf.xml", "lib/cassandra.rb", "lib/cassandra/array.rb", "lib/cassandra/cassandra.rb", "lib/cassandra/columns.rb", "lib/cassandra/comparable.rb", "lib/cassandra/constants.rb", "lib/cassandra/debug.rb", "lib/cassandra/helpers.rb", "lib/cassandra/long.rb", "lib/cassandra/mock.rb", "lib/cassandra/ordered_hash.rb", "lib/cassandra/protocol.rb", "lib/cassandra/time.rb", "test/cassandra_client_test.rb", "test/cassandra_mock_test.rb", "test/cassandra_test.rb", "test/comparable_types_test.rb", "test/ordered_hash_test.rb", "test/test_helper.rb", "vendor/gen-rb/cassandra.rb", "vendor/gen-rb/cassandra_constants.rb", "vendor/gen-rb/cassandra_types.rb", "cassandra.gemspec"]
17
17
  s.homepage = %q{http://blog.evanweaver.com/files/doc/fauna/cassandra/}
18
18
  s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Cassandra", "--main", "README.rdoc"]
19
19
  s.require_paths = ["lib"]
@@ -21,27 +21,27 @@ Gem::Specification.new do |s|
21
21
  s.rubygems_version = %q{1.3.5}
22
22
  s.signing_key = %q{/Users/ryan/.gemkeys/gem-private_key.pem}
23
23
  s.summary = %q{A Ruby client for the Cassandra distributed database.}
24
- s.test_files = ["test/cassandra_mock_test.rb", "test/cassandra_test.rb", "test/comparable_types_test.rb", "test/ordered_hash_test.rb", "test/test_helper.rb"]
24
+ s.test_files = ["test/cassandra_client_test.rb", "test/cassandra_mock_test.rb", "test/cassandra_test.rb", "test/comparable_types_test.rb", "test/ordered_hash_test.rb", "test/test_helper.rb"]
25
25
 
26
26
  if s.respond_to? :specification_version then
27
27
  current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
28
28
  s.specification_version = 3
29
29
 
30
30
  if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
31
- s.add_runtime_dependency(%q<thrift_client>, [">= 0", "= 0.4.0"])
31
+ s.add_runtime_dependency(%q<thrift_client>, [">= 0.4.0"])
32
32
  s.add_runtime_dependency(%q<json>, [">= 0"])
33
33
  s.add_runtime_dependency(%q<rake>, [">= 0"])
34
- s.add_runtime_dependency(%q<simple_uuid>, [">= 0", "= 0.1.0"])
34
+ s.add_runtime_dependency(%q<simple_uuid>, [">= 0.1.0"])
35
35
  else
36
- s.add_dependency(%q<thrift_client>, [">= 0", "= 0.4.0"])
36
+ s.add_dependency(%q<thrift_client>, [">= 0.4.0"])
37
37
  s.add_dependency(%q<json>, [">= 0"])
38
38
  s.add_dependency(%q<rake>, [">= 0"])
39
- s.add_dependency(%q<simple_uuid>, [">= 0", "= 0.1.0"])
39
+ s.add_dependency(%q<simple_uuid>, [">= 0.1.0"])
40
40
  end
41
41
  else
42
- s.add_dependency(%q<thrift_client>, [">= 0", "= 0.4.0"])
42
+ s.add_dependency(%q<thrift_client>, [">= 0.4.0"])
43
43
  s.add_dependency(%q<json>, [">= 0"])
44
44
  s.add_dependency(%q<rake>, [">= 0"])
45
- s.add_dependency(%q<simple_uuid>, [">= 0", "= 0.1.0"])
45
+ s.add_dependency(%q<simple_uuid>, [">= 0.1.0"])
46
46
  end
47
47
  end
data/conf/cassandra.in.sh CHANGED
@@ -18,34 +18,30 @@
18
18
  CASSANDRA_CONF=$CASSANDRA_CONF
19
19
 
20
20
  # This can be the path to a jar file, or a directory containing the
21
- # compiled classes.
21
+ # compiled classes. NOTE: This isn't needed by the startup script,
22
+ # it's just used here in constructing the classpath.
22
23
  cassandra_bin=$CASSANDRA_HOME/build/classes
23
24
 
24
25
  # The java classpath (required)
25
- CLASSPATH=$CASSANDRA_CONF:$cassandra_bin
26
+ CLASSPATH=$CASSANDRA_CONF:$CASSANDRA_BIN
26
27
 
27
- for jar in $CASSANDRA_HOME/lib/*.jar; do
28
+ for jar in $CASSANDRA_HOME/lib/*.jar $CASSANDRA_HOME/build/lib/jars/*.jar; do
28
29
  CLASSPATH=$CLASSPATH:$jar
29
30
  done
30
31
 
31
- echo "CASSANDRA_HOME: $CASSANDRA_HOME"
32
- echo "CASSANDRA_CONF: $CASSANDRA_CONF"
33
-
34
32
  # Arguments to pass to the JVM
35
33
  JVM_OPTS=" \
36
34
  -ea \
37
- -Xdebug \
38
- -Xrunjdwp:transport=dt_socket,server=y,address=8888,suspend=n \
39
- -Xms512M \
35
+ -Xms128M \
40
36
  -Xmx1G \
41
- -XX:SurvivorRatio=8 \
42
37
  -XX:TargetSurvivorRatio=90 \
43
38
  -XX:+AggressiveOpts \
44
39
  -XX:+UseParNewGC \
45
40
  -XX:+UseConcMarkSweepGC \
46
- -XX:CMSInitiatingOccupancyFraction=1 \
47
41
  -XX:+CMSParallelRemarkEnabled \
48
42
  -XX:+HeapDumpOnOutOfMemoryError \
43
+ -XX:SurvivorRatio=128 \
44
+ -XX:MaxTenuringThreshold=0 \
49
45
  -Dcom.sun.management.jmxremote.port=8080 \
50
46
  -Dcom.sun.management.jmxremote.ssl=false \
51
47
  -Dcom.sun.management.jmxremote.authenticate=false"
@@ -7,7 +7,7 @@
7
7
  ~ "License"); you may not use this file except in compliance
8
8
  ~ with the License. You may obtain a copy of the License at
9
9
  ~
10
- ~ http:/www.apache.org/licenses/LICENSE-2.0
10
+ ~ http://www.apache.org/licenses/LICENSE-2.0
11
11
  ~
12
12
  ~ Unless required by applicable law or agreed to in writing,
13
13
  ~ software distributed under the License is distributed on an
@@ -15,224 +15,325 @@
15
15
  ~ KIND, either express or implied. See the License for the
16
16
  ~ specific language governing permissions and limitations
17
17
  ~ under the License.
18
- -->
18
+ -->
19
19
  <Storage>
20
- <!--======================================================================-->
21
- <!-- Basic Configuration -->
22
- <!--======================================================================-->
23
- <ClusterName>Test</ClusterName>
24
-
25
- <!-- Tables and ColumnFamilies
26
- Think of a table as a namespace, not a relational table.
27
- (ColumnFamilies are closer in meaning to those.)
28
-
29
- There is an implicit table named 'system' for Cassandra internals.
30
- -->
31
- <Keyspaces>
32
- <Keyspace Name="Twitter">
33
- <KeysCachedFraction>0.01</KeysCachedFraction>
34
- <ColumnFamily CompareWith="UTF8Type" Name="Users" />
35
- <ColumnFamily CompareWith="UTF8Type" Name="UserAudits" />
36
- <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="UserRelationships" />
37
- <ColumnFamily CompareWith="UTF8Type" Name="Usernames" />
38
- <ColumnFamily CompareWith="UTF8Type" Name="Statuses" />
39
- <ColumnFamily CompareWith="UTF8Type" Name="StatusAudits" />
40
- <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="StatusRelationships" />
41
- <ColumnFamily CompareWith="UTF8Type" ColumnType="Super" Name="Index" />
42
- </Keyspace>
43
-
44
- <Keyspace Name="Multiblog">
45
- <KeysCachedFraction>0.01</KeysCachedFraction>
46
- <ColumnFamily CompareWith="TimeUUIDType" Name="Blogs"/>
47
- <ColumnFamily CompareWith="TimeUUIDType" Name="Comments"/>
48
- </Keyspace>
20
+ <!--======================================================================-->
21
+ <!-- Basic Configuration -->
22
+ <!--======================================================================-->
49
23
 
50
- <Keyspace Name="MultiblogLong">
51
- <KeysCachedFraction>0.01</KeysCachedFraction>
52
- <ColumnFamily CompareWith="LongType" Name="Blogs"/>
53
- <ColumnFamily CompareWith="LongType" Name="Comments"/>
54
- </Keyspace>
55
- <Keyspace Name="CassandraObject">
24
+ <!--
25
+ ~ The name of this cluster. This is mainly used to prevent machines in
26
+ ~ one logical cluster from joining another.
27
+ -->
28
+ <ClusterName>Test</ClusterName>
29
+
30
+ <!--
31
+ ~ Turn on to make new [non-seed] nodes automatically migrate the right data
32
+ ~ to themselves. (If no InitialToken is specified, they will pick one
33
+ ~ such that they will get half the range of the most-loaded node.)
34
+ ~ If a node starts up without bootstrapping, it will mark itself bootstrapped
35
+ ~ so that you can't subsequently accidently bootstrap a node with
36
+ ~ data on it. (You can reset this by wiping your data and commitlog
37
+ ~ directories.)
38
+ ~
39
+ ~ Off by default so that new clusters and upgraders from 0.4 don't
40
+ ~ bootstrap immediately. You should turn this on when you start adding
41
+ ~ new nodes to a cluster that already has data on it. (If you are upgrading
42
+ ~ from 0.4, start your cluster with it off once before changing it to true.
43
+ ~ Otherwise, no data will be lost but you will incur a lot of unnecessary
44
+ ~ I/O before your cluster starts up.)
45
+ -->
46
+ <AutoBootstrap>false</AutoBootstrap>
47
+
48
+ <!--
49
+ ~ Keyspaces and ColumnFamilies:
50
+ ~ A ColumnFamily is the Cassandra concept closest to a relational
51
+ ~ table. Keyspaces are separate groups of ColumnFamilies. Except in
52
+ ~ very unusual circumstances you will have one Keyspace per application.
53
+
54
+ ~ There is an implicit keyspace named 'system' for Cassandra internals.
55
+ -->
56
+ <Keyspaces>
57
+ <Keyspace Name="Twitter">
56
58
  <KeysCachedFraction>0.01</KeysCachedFraction>
57
- <ColumnFamily CompareWith="UTF8Type" Name="Customers" />
58
- <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="CustomerRelationships" />
59
- <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="CustomersByLastName" />
60
- <ColumnFamily CompareWith="UTF8Type" Name="Invoices" />
61
- <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="InvoiceRelationships" />
62
- <ColumnFamily CompareWith="UTF8Type" Name="InvoicesByNumber" />
63
- <ColumnFamily CompareWith="UTF8Type" Name="Payments" />
64
- <ColumnFamily CompareWith="UTF8Type" Name="Appointments" />
65
- </Keyspace>
66
- </Keyspaces>
67
-
68
- <!-- Partitioner: any IPartitioner may be used, including your own
69
- as long as it is on the classpath. Out of the box,
70
- Cassandra provides
71
- org.apache.cassandra.dht.RandomPartitioner and
72
- org.apache.cassandra.dht.OrderPreservingPartitioner.
73
- Range queries require using OrderPreservingPartitioner or a subclass.
74
-
75
- Achtung! Changing this parameter requires wiping your data directories,
76
- since the partitioner can modify the sstable on-disk format.
77
- -->
78
- <Partitioner>org.apache.cassandra.dht.OrderPreservingPartitioner</Partitioner>
79
-
80
- <!-- If you are using the OrderPreservingPartitioner and you know your key
81
- distribution, you can specify the token for this node to use.
82
- (Keys are sent to the node with the "closest" token, so distributing
83
- your tokens equally along the key distribution space will spread
84
- keys evenly across your cluster.) This setting is only checked the
85
- first time a node is started.
86
-
87
- This can also be useful with RandomPartitioner to force equal
88
- spacing of tokens around the hash space, especially for
89
- clusters with a small number of nodes. -->
90
- <InitialToken></InitialToken>
91
-
92
-
93
- <!-- EndPointSnitch: Setting this to the class that implements IEndPointSnitch
94
- which will see if two endpoints are in the same data center or on the same rack.
95
- Out of the box, Cassandra provides
96
- org.apache.cassandra.locator.EndPointSnitch
97
- -->
98
- <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
99
-
100
- <!-- Strategy: Setting this to the class that implements IReplicaPlacementStrategy
101
- will change the way the node picker works.
102
- Out of the box, Cassandra provides
103
- org.apache.cassandra.locator.RackUnawareStrategy
104
- org.apache.cassandra.locator.RackAwareStrategy
105
- (place one replica in a different datacenter, and the
106
- others on different racks in the same one.)
107
- -->
108
- <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
109
-
110
- <!-- Number of replicas of the data-->
111
- <ReplicationFactor>1</ReplicationFactor>
112
-
113
- <!-- Directories: Specify where Cassandra should store different data on disk
114
- Keep the data disks and the CommitLog disks separate for best performance
115
- -->
116
- <CommitLogDirectory>data/commitlog</CommitLogDirectory>
117
- <DataFileDirectories>
118
- <DataFileDirectory>data/data</DataFileDirectory>
119
- </DataFileDirectories>
120
- <CalloutLocation>data/callouts</CalloutLocation>
121
- <BootstrapFileDirectory>data/bootstrap</BootstrapFileDirectory>
122
- <StagingFileDirectory>data/staging</StagingFileDirectory>
123
-
124
- <!-- Addresses of hosts that are deemed contact points. Cassandra nodes use
125
- this list of hosts to find each other and learn the topology of the ring.
126
- You must change this if you are running multiple nodes!
127
- -->
128
- <Seeds>
129
- <Seed>127.0.0.1</Seed>
130
- </Seeds>
131
-
132
-
133
- <!-- Miscellaneous -->
134
-
135
- <!-- time to wait for a reply from other nodes before failing the command -->
136
- <RpcTimeoutInMillis>5000</RpcTimeoutInMillis>
137
- <!-- size to allow commitlog to grow to before creating a new segment -->
138
- <CommitLogRotationThresholdInMB>128</CommitLogRotationThresholdInMB>
139
-
140
-
141
- <!-- Local hosts and ports -->
142
-
143
- <!-- Address to bind to and tell other nodes to connect to.
144
- You _must_ change this if you want multiple nodes to be able
145
- to communicate!
146
-
147
- Leaving it blank leaves it up to InetAddress.getLocalHost().
148
- This will always do the Right Thing *if* the node is properly
149
- configured (hostname, name resolution, etc), and the Right
150
- Thing is to use the address associated with the hostname (it
151
- might not be). -->
152
- <ListenAddress>localhost</ListenAddress>
153
- <!-- TCP port, for commands and data -->
154
- <StoragePort>7000</StoragePort>
155
- <!-- UDP port, for membership communications (gossip) -->
156
- <ControlPort>7001</ControlPort>
157
-
158
- <!-- The address to bind the Thrift RPC service to. Unlike
159
- ListenAddress above, you *can* specify 0.0.0.0 here if you want
160
- Thrift to listen on all interfaces.
161
-
162
- Leaving this blank has the same effect it does for ListenAddress,
163
- (i.e. it will be based on the configured hostname of the node).
164
- -->
165
- <ThriftAddress>localhost</ThriftAddress>
166
- <!-- Thrift RPC port (the port clients connect to). -->
167
- <ThriftPort>9160</ThriftPort>
168
-
169
-
170
- <!--======================================================================-->
171
- <!-- Memory, Disk, and Performance -->
172
- <!--======================================================================-->
173
-
174
- <!-- Add column indexes to a row after its contents reach this size -->
175
- <ColumnIndexSizeInKB>256</ColumnIndexSizeInKB>
176
-
177
- <!--
178
- The maximum amount of data to store in memory before flushing to
179
- disk. Note: There is one memtable per column family, and this threshold
180
- is based solely on the amount of data stored, not actual heap memory
181
- usage (there is some overhead in indexing the columns).
182
- -->
183
- <MemtableSizeInMB>32</MemtableSizeInMB>
184
-
185
- <!--
186
- The maximum number of columns in millions to store in memory
187
- before flushing to disk. This is also a per-memtable setting.
188
- Use with MemtableSizeInMB to tune memory usage.
189
- -->
190
- <MemtableObjectCountInMillions>0.01</MemtableObjectCountInMillions>
191
-
192
- <!-- Unlike most systems, in Cassandra writes are faster than
193
- reads, so you can afford more of those in parallel.
194
- A good rule of thumb is 2 concurrent reads per processor core.
195
- You especially want more concurrentwrites if you are using
196
- CommitLogSync + CommitLogSyncDelay. -->
197
- <ConcurrentReads>8</ConcurrentReads>
198
- <ConcurrentWrites>32</ConcurrentWrites>
199
-
200
- <!-- CommitLogSync may be either "periodic" or "batch."
201
- When in batch mode, Cassandra won't ack writes until the commit log
202
- has been fsynced to disk. It will wait up to CommitLogSyncBatchWindowInMS
203
- milliseconds for other writes, before performing the sync.
204
-
205
- This is less necessary in Cassandra
206
- than in traditional databases since replication reduces the
207
- odds of losing data from a failure after writing the log
208
- entry but before it actually reaches the disk. So the other
209
- option is "timed," where wirtes may be acked immediately
210
- and the CommitLog is simply synced every CommitLogSyncPeriodInMS
211
- milliseconds.
212
- -->
213
- <CommitLogSync>periodic</CommitLogSync>
214
- <!-- Interval at which to perform syncs of the CommitLog in periodic
215
- mode. Usually the default of 1000ms is fine; increase it
216
- only if the CommitLog PendingTasks backlog in jmx shows that
217
- you are frequently scheduling a second sync while the first
218
- has not yet been processed.
219
- -->
220
- <CommitLogSyncPeriodInMS>1000</CommitLogSyncPeriodInMS>
221
- <!-- Delay (in microseconds) during which additional commit log
222
- entries may be written before fsync in batch mode. This will increase
223
- latency slightly, but can vastly improve throughput where
224
- there are many writers. Set to zero to disable
225
- (each entry will be synced individually).
226
- Reasonable values range from a minimal 0.1 to 10 or even more
227
- if throughput matters more than latency.
228
- -->
229
- <!-- <CommitLogSyncBatchWindowInMS>1</CommitLogSyncBatchWindowInMS> -->
230
-
231
- <!-- Time to wait before garbage-collection deletion markers.
232
- Set this to a large enough value that you are confident
233
- that the deletion marker will be propagated to all replicas
234
- by the time this many seconds has elapsed, even in the
235
- face of hardware failures. The default value is ten days.
236
- -->
237
- <GCGraceSeconds>864000</GCGraceSeconds>
59
+ <ColumnFamily CompareWith="UTF8Type" Name="Users" />
60
+ <ColumnFamily CompareWith="UTF8Type" Name="UserAudits" />
61
+ <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="UserRelationships" />
62
+ <ColumnFamily CompareWith="UTF8Type" Name="Usernames" />
63
+ <ColumnFamily CompareWith="UTF8Type" Name="Statuses" />
64
+ <ColumnFamily CompareWith="UTF8Type" Name="StatusAudits" />
65
+ <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="StatusRelationships" />
66
+ <ColumnFamily CompareWith="UTF8Type" ColumnType="Super" Name="Index" />
67
+
68
+ <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
69
+ <ReplicationFactor>1</ReplicationFactor>
70
+ <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
71
+ </Keyspace>
72
+
73
+ <Keyspace Name="Multiblog">
74
+ <KeysCachedFraction>0.01</KeysCachedFraction>
75
+ <ColumnFamily CompareWith="TimeUUIDType" Name="Blogs"/>
76
+ <ColumnFamily CompareWith="TimeUUIDType" Name="Comments"/>
77
+
78
+ <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
79
+ <ReplicationFactor>1</ReplicationFactor>
80
+ <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
81
+ </Keyspace>
82
+
83
+ <Keyspace Name="MultiblogLong">
84
+ <KeysCachedFraction>0.01</KeysCachedFraction>
85
+ <ColumnFamily CompareWith="LongType" Name="Blogs"/>
86
+ <ColumnFamily CompareWith="LongType" Name="Comments"/>
87
+
88
+ <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
89
+ <ReplicationFactor>1</ReplicationFactor>
90
+ <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
91
+ </Keyspace>
92
+
93
+ <Keyspace Name="CassandraObject">
94
+ <KeysCachedFraction>0.01</KeysCachedFraction>
95
+ <ColumnFamily CompareWith="UTF8Type" Name="Customers" />
96
+ <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="CustomerRelationships" />
97
+ <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="CustomersByLastName" />
98
+ <ColumnFamily CompareWith="UTF8Type" Name="Invoices" />
99
+ <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="InvoiceRelationships" />
100
+ <ColumnFamily CompareWith="UTF8Type" Name="InvoicesByNumber" />
101
+ <ColumnFamily CompareWith="UTF8Type" Name="Payments" />
102
+ <ColumnFamily CompareWith="UTF8Type" Name="Appointments" />
103
+
104
+ <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
105
+ <ReplicationFactor>1</ReplicationFactor>
106
+ <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
107
+ </Keyspace>
108
+ </Keyspaces>
109
+
110
+ <!--
111
+ ~ Authenticator: any IAuthenticator may be used, including your own as long
112
+ ~ as it is on the classpath. Out of the box, Cassandra provides
113
+ ~ org.apache.cassandra.auth.AllowAllAuthenticator and,
114
+ ~ org.apache.cassandra.auth.SimpleAuthenticator
115
+ ~ (SimpleAuthenticator uses access.properties and passwd.properties by
116
+ ~ default).
117
+ ~
118
+ ~ If you don't specify an authenticator, AllowAllAuthenticator is used.
119
+ -->
120
+ <Authenticator>org.apache.cassandra.auth.AllowAllAuthenticator</Authenticator>
121
+
122
+ <!--
123
+ ~ Partitioner: any IPartitioner may be used, including your own as long
124
+ ~ as it is on the classpath. Out of the box, Cassandra provides
125
+ ~ org.apache.cassandra.dht.RandomPartitioner,
126
+ ~ org.apache.cassandra.dht.OrderPreservingPartitioner, and
127
+ ~ org.apache.cassandra.dht.CollatingOrderPreservingPartitioner.
128
+ ~ (CollatingOPP colates according to EN,US rules, not naive byte
129
+ ~ ordering. Use this as an example if you need locale-aware collation.)
130
+ ~ Range queries require using an order-preserving partitioner.
131
+ ~
132
+ ~ Achtung! Changing this parameter requires wiping your data
133
+ ~ directories, since the partitioner can modify the sstable on-disk
134
+ ~ format.
135
+ -->
136
+ <Partitioner>org.apache.cassandra.dht.RandomPartitioner</Partitioner>
137
+
138
+ <!--
139
+ ~ If you are using an order-preserving partitioner and you know your key
140
+ ~ distribution, you can specify the token for this node to use. (Keys
141
+ ~ are sent to the node with the "closest" token, so distributing your
142
+ ~ tokens equally along the key distribution space will spread keys
143
+ ~ evenly across your cluster.) This setting is only checked the first
144
+ ~ time a node is started.
145
+
146
+ ~ This can also be useful with RandomPartitioner to force equal spacing
147
+ ~ of tokens around the hash space, especially for clusters with a small
148
+ ~ number of nodes.
149
+ -->
150
+ <InitialToken></InitialToken>
151
+
152
+ <!--
153
+ ~ Directories: Specify where Cassandra should store different data on
154
+ ~ disk. Keep the data disks and the CommitLog disks separate for best
155
+ ~ performance
156
+ -->
157
+ <CommitLogDirectory>/var/lib/cassandra/commitlog</CommitLogDirectory>
158
+ <DataFileDirectories>
159
+ <DataFileDirectory>/var/lib/cassandra/data</DataFileDirectory>
160
+ </DataFileDirectories>
161
+ <CalloutLocation>/var/lib/cassandra/callouts</CalloutLocation>
162
+ <StagingFileDirectory>/var/lib/cassandra/staging</StagingFileDirectory>
163
+
164
+
165
+ <!--
166
+ ~ Addresses of hosts that are deemed contact points. Cassandra nodes
167
+ ~ use this list of hosts to find each other and learn the topology of
168
+ ~ the ring. You must change this if you are running multiple nodes!
169
+ -->
170
+ <Seeds>
171
+ <Seed>127.0.0.1</Seed>
172
+ </Seeds>
173
+
174
+
175
+ <!-- Miscellaneous -->
176
+
177
+ <!-- Time to wait for a reply from other nodes before failing the command -->
178
+ <RpcTimeoutInMillis>5000</RpcTimeoutInMillis>
179
+ <!-- Size to allow commitlog to grow to before creating a new segment -->
180
+ <CommitLogRotationThresholdInMB>128</CommitLogRotationThresholdInMB>
181
+
182
+
183
+ <!-- Local hosts and ports -->
184
+
185
+ <!--
186
+ ~ Address to bind to and tell other nodes to connect to. You _must_
187
+ ~ change this if you want multiple nodes to be able to communicate!
188
+ ~
189
+ ~ Leaving it blank leaves it up to InetAddress.getLocalHost(). This
190
+ ~ will always do the Right Thing *if* the node is properly configured
191
+ ~ (hostname, name resolution, etc), and the Right Thing is to use the
192
+ ~ address associated with the hostname (it might not be).
193
+ -->
194
+ <ListenAddress>localhost</ListenAddress>
195
+ <!-- internal communications port -->
196
+ <StoragePort>7000</StoragePort>
197
+
198
+ <!--
199
+ ~ The address to bind the Thrift RPC service to. Unlike ListenAddress
200
+ ~ above, you *can* specify 0.0.0.0 here if you want Thrift to listen on
201
+ ~ all interfaces.
202
+ ~
203
+ ~ Leaving this blank has the same effect it does for ListenAddress,
204
+ ~ (i.e. it will be based on the configured hostname of the node).
205
+ -->
206
+ <ThriftAddress>localhost</ThriftAddress>
207
+ <!-- Thrift RPC port (the port clients connect to). -->
208
+ <ThriftPort>9160</ThriftPort>
209
+ <!--
210
+ ~ Whether or not to use a framed transport for Thrift. If this option
211
+ ~ is set to true then you must also use a framed transport on the
212
+ ~ client-side, (framed and non-framed transports are not compatible).
213
+ -->
214
+ <ThriftFramedTransport>false</ThriftFramedTransport>
215
+
216
+
217
+ <!--======================================================================-->
218
+ <!-- Memory, Disk, and Performance -->
219
+ <!--======================================================================-->
220
+
221
+ <!--
222
+ ~ Access mode. mmapped i/o is substantially faster, but only practical on
223
+ ~ a 64bit machine (which notably does not include EC2 "small" instances)
224
+ ~ or relatively small datasets. "auto", the safe choice, will enable
225
+ ~ mmapping on a 64bit JVM. Other values are "mmap", "mmap_index_only"
226
+ ~ (which may allow you to get part of the benefits of mmap on a 32bit
227
+ ~ machine by mmapping only index files) and "standard".
228
+ ~ (The buffer size settings that follow only apply to standard,
229
+ ~ non-mmapped i/o.)
230
+ -->
231
+ <DiskAccessMode>auto</DiskAccessMode>
232
+
233
+ <!--
234
+ ~ Buffer size to use when performing contiguous column slices. Increase
235
+ ~ this to the size of the column slices you typically perform.
236
+ ~ (Name-based queries are performed with a buffer size of
237
+ ~ ColumnIndexSizeInKB.)
238
+ -->
239
+ <SlicedBufferSizeInKB>64</SlicedBufferSizeInKB>
240
+
241
+ <!--
242
+ ~ Buffer size to use when flushing memtables to disk. (Only one
243
+ ~ memtable is ever flushed at a time.) Increase (decrease) the index
244
+ ~ buffer size relative to the data buffer if you have few (many)
245
+ ~ columns per key. Bigger is only better _if_ your memtables get large
246
+ ~ enough to use the space. (Check in your data directory after your
247
+ ~ app has been running long enough.) -->
248
+ <FlushDataBufferSizeInMB>32</FlushDataBufferSizeInMB>
249
+ <FlushIndexBufferSizeInMB>8</FlushIndexBufferSizeInMB>
250
+
251
+ <!--
252
+ ~ Add column indexes to a row after its contents reach this size.
253
+ ~ Increase if your column values are large, or if you have a very large
254
+ ~ number of columns. The competing causes are, Cassandra has to
255
+ ~ deserialize this much of the row to read a single column, so you want
256
+ ~ it to be small - at least if you do many partial-row reads - but all
257
+ ~ the index data is read for each access, so you don't want to generate
258
+ ~ that wastefully either.
259
+ -->
260
+ <ColumnIndexSizeInKB>64</ColumnIndexSizeInKB>
261
+
262
+ <!--
263
+ ~ Flush memtable after this much data has been inserted, including
264
+ ~ overwritten data. There is one memtable per column family, and
265
+ ~ this threshold is based solely on the amount of data stored, not
266
+ ~ actual heap memory usage (there is some overhead in indexing the
267
+ ~ columns).
268
+ -->
269
+ <MemtableThroughputInMB>64</MemtableThroughputInMB>
270
+ <!--
271
+ ~ Throughput setting for Binary Memtables. Typically these are
272
+ ~ used for bulk load so you want them to be larger.
273
+ -->
274
+ <BinaryMemtableThroughputInMB>256</BinaryMemtableThroughputInMB>
275
+ <!--
276
+ ~ The maximum number of columns in millions to store in memory per
277
+ ~ ColumnFamily before flushing to disk. This is also a per-memtable
278
+ ~ setting. Use with MemtableThroughputInMB to tune memory usage.
279
+ -->
280
+ <MemtableOperationsInMillions>0.3</MemtableOperationsInMillions>
281
+ <!--
282
+ ~ The maximum time to leave a dirty memtable unflushed.
283
+ ~ (While any affected columnfamilies have unflushed data from a
284
+ ~ commit log segment, that segment cannot be deleted.)
285
+ ~ This needs to be large enough that it won't cause a flush storm
286
+ ~ of all your memtables flushing at once because none has hit
287
+ ~ the size or count thresholds yet. For production, a larger
288
+ ~ value such as 1440 is recommended.
289
+ -->
290
+ <MemtableFlushAfterMinutes>60</MemtableFlushAfterMinutes>
291
+
292
+ <!--
293
+ ~ Unlike most systems, in Cassandra writes are faster than reads, so
294
+ ~ you can afford more of those in parallel. A good rule of thumb is 2
295
+ ~ concurrent reads per processor core. Increase ConcurrentWrites to
296
+ ~ the number of clients writing at once if you enable CommitLogSync +
297
+ ~ CommitLogSyncDelay. -->
298
+ <ConcurrentReads>8</ConcurrentReads>
299
+ <ConcurrentWrites>32</ConcurrentWrites>
300
+
301
+ <!--
302
+ ~ CommitLogSync may be either "periodic" or "batch." When in batch
303
+ ~ mode, Cassandra won't ack writes until the commit log has been
304
+ ~ fsynced to disk. It will wait up to CommitLogSyncBatchWindowInMS
305
+ ~ milliseconds for other writes, before performing the sync.
306
+
307
+ ~ This is less necessary in Cassandra than in traditional databases
308
+ ~ since replication reduces the odds of losing data from a failure
309
+ ~ after writing the log entry but before it actually reaches the disk.
310
+ ~ So the other option is "timed," where writes may be acked immediately
311
+ ~ and the CommitLog is simply synced every CommitLogSyncPeriodInMS
312
+ ~ milliseconds.
313
+ -->
314
+ <CommitLogSync>periodic</CommitLogSync>
315
+ <!--
316
+ ~ Interval at which to perform syncs of the CommitLog in periodic mode.
317
+ ~ Usually the default of 10000ms is fine; increase it if your i/o
318
+ ~ load is such that syncs are taking excessively long times.
319
+ -->
320
+ <CommitLogSyncPeriodInMS>10000</CommitLogSyncPeriodInMS>
321
+ <!--
322
+ ~ Delay (in milliseconds) during which additional commit log entries
323
+ ~ may be written before fsync in batch mode. This will increase
324
+ ~ latency slightly, but can vastly improve throughput where there are
325
+ ~ many writers. Set to zero to disable (each entry will be synced
326
+ ~ individually). Reasonable values range from a minimal 0.1 to 10 or
327
+ ~ even more if throughput matters more than latency.
328
+ -->
329
+ <!-- <CommitLogSyncBatchWindowInMS>1</CommitLogSyncBatchWindowInMS> -->
330
+
331
+ <!--
332
+ ~ Time to wait before garbage-collection deletion markers. Set this to
333
+ ~ a large enough value that you are confident that the deletion marker
334
+ ~ will be propagated to all replicas by the time this many seconds has
335
+ ~ elapsed, even in the face of hardware failures. The default value is
336
+ ~ ten days.
337
+ -->
338
+ <GCGraceSeconds>864000</GCGraceSeconds>
238
339
  </Storage>