cassandra 0.7.6 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
data.tar.gz.sig CHANGED
Binary file
data/CHANGELOG CHANGED
@@ -1,3 +1,5 @@
1
+ v0.8.0 Compatibility with Cassandra 0.6 betas (no longer compatible with 0.5); assorted bugfixes.
2
+
1
3
  v0.7.6 Bugfixes.
2
4
 
3
5
  v0.7.5 Another packaging error.
data/Manifest CHANGED
@@ -20,6 +20,7 @@ lib/cassandra/mock.rb
20
20
  lib/cassandra/ordered_hash.rb
21
21
  lib/cassandra/protocol.rb
22
22
  lib/cassandra/time.rb
23
+ test/cassandra_client_test.rb
23
24
  test/cassandra_mock_test.rb
24
25
  test/cassandra_test.rb
25
26
  test/comparable_types_test.rb
data/Rakefile CHANGED
@@ -7,7 +7,7 @@ unless ENV['FROM_BIN_CASSANDRA_HELPER']
7
7
  p.project = "fauna"
8
8
  p.summary = "A Ruby client for the Cassandra distributed database."
9
9
  p.rubygems_version = ">= 0.8"
10
- p.dependencies = ['thrift_client >= 0.4.0', 'json', 'rake', 'simple_uuid >= 0.1.0']
10
+ p.dependencies = ['thrift_client >=0.4.0', 'json', 'rake', 'simple_uuid >=0.1.0']
11
11
  p.ignore_pattern = /^(data|vendor\/cassandra|cassandra|vendor\/thrift)/
12
12
  p.rdoc_pattern = /^(lib|bin|tasks|ext)|^README|^CHANGELOG|^TODO|^LICENSE|^COPYING$/
13
13
  p.url = "http://blog.evanweaver.com/files/doc/fauna/cassandra/"
@@ -15,8 +15,9 @@ unless ENV['FROM_BIN_CASSANDRA_HELPER']
15
15
  end
16
16
  end
17
17
 
18
- CASSANDRA_HOME = "#{ENV['HOME']}/cassandra"
19
- DIST_URL = "http://github.com/downloads/ryanking/cassandra/apache-cassandra-incubating-0.5.0.2010-02-21-bin.tar.gz"
18
+ CASSANDRA_HOME = ENV['CASSANDRA_HOME'] || "#{ENV['HOME']}/cassandra"
19
+ DOWNLOAD_DIR = "/tmp"
20
+ DIST_URL = "http://apache.osuosl.org/incubator/cassandra/0.6.0/apache-cassandra-0.6.0-beta2-bin.tar.gz"
20
21
  DIST_FILE = DIST_URL.split('/').last
21
22
 
22
23
  directory CASSANDRA_HOME
@@ -26,9 +27,9 @@ desc "Start Cassandra"
26
27
  task :cassandra => [:java, File.join(CASSANDRA_HOME, 'server'), File.join(CASSANDRA_HOME, 'test', 'data')] do
27
28
  env = ""
28
29
  if !ENV["CASSANDRA_INCLUDE"]
29
- env << "CASSANDRA_INCLUDE=#{Dir.pwd}/conf/cassandra.in.sh "
30
+ env << "CASSANDRA_INCLUDE=#{File.expand_path(Dir.pwd)}/conf/cassandra.in.sh "
30
31
  env << "CASSANDRA_HOME=#{CASSANDRA_HOME}/server "
31
- env << "CASSANDRA_CONF=#{Dir.pwd}/conf"
32
+ env << "CASSANDRA_CONF=#{File.expand_path(Dir.pwd)}/conf"
32
33
  end
33
34
 
34
35
  Dir.chdir(File.join(CASSANDRA_HOME, 'server')) do
@@ -36,16 +37,19 @@ task :cassandra => [:java, File.join(CASSANDRA_HOME, 'server'), File.join(CASSAN
36
37
  end
37
38
  end
38
39
 
39
- file File.join(CASSANDRA_HOME, 'server') => File.join(CASSANDRA_HOME, DIST_FILE) do
40
+ file File.join(CASSANDRA_HOME, 'server') => File.join(DOWNLOAD_DIR, DIST_FILE) do
40
41
  Dir.chdir(CASSANDRA_HOME) do
41
- sh "tar xzvf #{DIST_FILE}"
42
- sh "mv #{DIST_FILE.split('.')[0..2].join('.')} server"
42
+ sh "tar xzf #{DIST_FILE}"
43
+ sh "mv #{DIST_FILE.split('.')[0..2].join('.').sub('-bin', '')} server"
44
+ Dir.chdir('server') do
45
+ sh "ant ivy-retrieve"
46
+ end
43
47
  end
44
48
  end
45
49
 
46
- file File.join(CASSANDRA_HOME, DIST_FILE) => CASSANDRA_HOME do
50
+ file File.join(DOWNLOAD_DIR, DIST_FILE) => CASSANDRA_HOME do
47
51
  puts "downloading"
48
- cmd = "curl -L -o #{File.join(CASSANDRA_HOME, DIST_FILE)} #{DIST_URL}"
52
+ cmd = "curl -L -o #{File.join(DOWNLOAD_DIR, DIST_FILE)} #{DIST_URL}"
49
53
  sh cmd
50
54
  end
51
55
 
@@ -74,5 +78,5 @@ task :thrift do
74
78
  system(
75
79
  "cd vendor &&
76
80
  rm -rf gen-rb &&
77
- thrift -gen rb #{CASSANDRA_HOME}/interface/cassandra.thrift")
81
+ thrift -gen rb #{CASSANDRA_HOME}/server/interface/cassandra.thrift")
78
82
  end
data/cassandra.gemspec CHANGED
@@ -2,18 +2,18 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{cassandra}
5
- s.version = "0.7.6"
5
+ s.version = "0.8.0"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 0.8") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Evan Weaver, Ryan King"]
9
9
  s.cert_chain = ["/Users/ryan/.gemkeys/gem-public_cert.pem"]
10
- s.date = %q{2010-03-01}
10
+ s.date = %q{2010-03-23}
11
11
  s.default_executable = %q{cassandra_helper}
12
12
  s.description = %q{A Ruby client for the Cassandra distributed database.}
13
13
  s.email = %q{}
14
14
  s.executables = ["cassandra_helper"]
15
15
  s.extra_rdoc_files = ["CHANGELOG", "LICENSE", "README.rdoc", "bin/cassandra_helper", "lib/cassandra.rb", "lib/cassandra/array.rb", "lib/cassandra/cassandra.rb", "lib/cassandra/columns.rb", "lib/cassandra/comparable.rb", "lib/cassandra/constants.rb", "lib/cassandra/debug.rb", "lib/cassandra/helpers.rb", "lib/cassandra/long.rb", "lib/cassandra/mock.rb", "lib/cassandra/ordered_hash.rb", "lib/cassandra/protocol.rb", "lib/cassandra/time.rb"]
16
- s.files = ["CHANGELOG", "LICENSE", "Manifest", "README.rdoc", "Rakefile", "bin/cassandra_helper", "conf/cassandra.in.sh", "conf/log4j.properties", "conf/storage-conf.xml", "lib/cassandra.rb", "lib/cassandra/array.rb", "lib/cassandra/cassandra.rb", "lib/cassandra/columns.rb", "lib/cassandra/comparable.rb", "lib/cassandra/constants.rb", "lib/cassandra/debug.rb", "lib/cassandra/helpers.rb", "lib/cassandra/long.rb", "lib/cassandra/mock.rb", "lib/cassandra/ordered_hash.rb", "lib/cassandra/protocol.rb", "lib/cassandra/time.rb", "test/cassandra_mock_test.rb", "test/cassandra_test.rb", "test/comparable_types_test.rb", "test/ordered_hash_test.rb", "test/test_helper.rb", "vendor/gen-rb/cassandra.rb", "vendor/gen-rb/cassandra_constants.rb", "vendor/gen-rb/cassandra_types.rb", "cassandra.gemspec"]
16
+ s.files = ["CHANGELOG", "LICENSE", "Manifest", "README.rdoc", "Rakefile", "bin/cassandra_helper", "conf/cassandra.in.sh", "conf/log4j.properties", "conf/storage-conf.xml", "lib/cassandra.rb", "lib/cassandra/array.rb", "lib/cassandra/cassandra.rb", "lib/cassandra/columns.rb", "lib/cassandra/comparable.rb", "lib/cassandra/constants.rb", "lib/cassandra/debug.rb", "lib/cassandra/helpers.rb", "lib/cassandra/long.rb", "lib/cassandra/mock.rb", "lib/cassandra/ordered_hash.rb", "lib/cassandra/protocol.rb", "lib/cassandra/time.rb", "test/cassandra_client_test.rb", "test/cassandra_mock_test.rb", "test/cassandra_test.rb", "test/comparable_types_test.rb", "test/ordered_hash_test.rb", "test/test_helper.rb", "vendor/gen-rb/cassandra.rb", "vendor/gen-rb/cassandra_constants.rb", "vendor/gen-rb/cassandra_types.rb", "cassandra.gemspec"]
17
17
  s.homepage = %q{http://blog.evanweaver.com/files/doc/fauna/cassandra/}
18
18
  s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Cassandra", "--main", "README.rdoc"]
19
19
  s.require_paths = ["lib"]
@@ -21,27 +21,27 @@ Gem::Specification.new do |s|
21
21
  s.rubygems_version = %q{1.3.5}
22
22
  s.signing_key = %q{/Users/ryan/.gemkeys/gem-private_key.pem}
23
23
  s.summary = %q{A Ruby client for the Cassandra distributed database.}
24
- s.test_files = ["test/cassandra_mock_test.rb", "test/cassandra_test.rb", "test/comparable_types_test.rb", "test/ordered_hash_test.rb", "test/test_helper.rb"]
24
+ s.test_files = ["test/cassandra_client_test.rb", "test/cassandra_mock_test.rb", "test/cassandra_test.rb", "test/comparable_types_test.rb", "test/ordered_hash_test.rb", "test/test_helper.rb"]
25
25
 
26
26
  if s.respond_to? :specification_version then
27
27
  current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
28
28
  s.specification_version = 3
29
29
 
30
30
  if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
31
- s.add_runtime_dependency(%q<thrift_client>, [">= 0", "= 0.4.0"])
31
+ s.add_runtime_dependency(%q<thrift_client>, [">= 0.4.0"])
32
32
  s.add_runtime_dependency(%q<json>, [">= 0"])
33
33
  s.add_runtime_dependency(%q<rake>, [">= 0"])
34
- s.add_runtime_dependency(%q<simple_uuid>, [">= 0", "= 0.1.0"])
34
+ s.add_runtime_dependency(%q<simple_uuid>, [">= 0.1.0"])
35
35
  else
36
- s.add_dependency(%q<thrift_client>, [">= 0", "= 0.4.0"])
36
+ s.add_dependency(%q<thrift_client>, [">= 0.4.0"])
37
37
  s.add_dependency(%q<json>, [">= 0"])
38
38
  s.add_dependency(%q<rake>, [">= 0"])
39
- s.add_dependency(%q<simple_uuid>, [">= 0", "= 0.1.0"])
39
+ s.add_dependency(%q<simple_uuid>, [">= 0.1.0"])
40
40
  end
41
41
  else
42
- s.add_dependency(%q<thrift_client>, [">= 0", "= 0.4.0"])
42
+ s.add_dependency(%q<thrift_client>, [">= 0.4.0"])
43
43
  s.add_dependency(%q<json>, [">= 0"])
44
44
  s.add_dependency(%q<rake>, [">= 0"])
45
- s.add_dependency(%q<simple_uuid>, [">= 0", "= 0.1.0"])
45
+ s.add_dependency(%q<simple_uuid>, [">= 0.1.0"])
46
46
  end
47
47
  end
data/conf/cassandra.in.sh CHANGED
@@ -18,34 +18,30 @@
18
18
  CASSANDRA_CONF=$CASSANDRA_CONF
19
19
 
20
20
  # This can be the path to a jar file, or a directory containing the
21
- # compiled classes.
21
+ # compiled classes. NOTE: This isn't needed by the startup script,
22
+ # it's just used here in constructing the classpath.
22
23
  cassandra_bin=$CASSANDRA_HOME/build/classes
23
24
 
24
25
  # The java classpath (required)
25
- CLASSPATH=$CASSANDRA_CONF:$cassandra_bin
26
+ CLASSPATH=$CASSANDRA_CONF:$CASSANDRA_BIN
26
27
 
27
- for jar in $CASSANDRA_HOME/lib/*.jar; do
28
+ for jar in $CASSANDRA_HOME/lib/*.jar $CASSANDRA_HOME/build/lib/jars/*.jar; do
28
29
  CLASSPATH=$CLASSPATH:$jar
29
30
  done
30
31
 
31
- echo "CASSANDRA_HOME: $CASSANDRA_HOME"
32
- echo "CASSANDRA_CONF: $CASSANDRA_CONF"
33
-
34
32
  # Arguments to pass to the JVM
35
33
  JVM_OPTS=" \
36
34
  -ea \
37
- -Xdebug \
38
- -Xrunjdwp:transport=dt_socket,server=y,address=8888,suspend=n \
39
- -Xms512M \
35
+ -Xms128M \
40
36
  -Xmx1G \
41
- -XX:SurvivorRatio=8 \
42
37
  -XX:TargetSurvivorRatio=90 \
43
38
  -XX:+AggressiveOpts \
44
39
  -XX:+UseParNewGC \
45
40
  -XX:+UseConcMarkSweepGC \
46
- -XX:CMSInitiatingOccupancyFraction=1 \
47
41
  -XX:+CMSParallelRemarkEnabled \
48
42
  -XX:+HeapDumpOnOutOfMemoryError \
43
+ -XX:SurvivorRatio=128 \
44
+ -XX:MaxTenuringThreshold=0 \
49
45
  -Dcom.sun.management.jmxremote.port=8080 \
50
46
  -Dcom.sun.management.jmxremote.ssl=false \
51
47
  -Dcom.sun.management.jmxremote.authenticate=false"
@@ -7,7 +7,7 @@
7
7
  ~ "License"); you may not use this file except in compliance
8
8
  ~ with the License. You may obtain a copy of the License at
9
9
  ~
10
- ~ http:/www.apache.org/licenses/LICENSE-2.0
10
+ ~ http://www.apache.org/licenses/LICENSE-2.0
11
11
  ~
12
12
  ~ Unless required by applicable law or agreed to in writing,
13
13
  ~ software distributed under the License is distributed on an
@@ -15,224 +15,325 @@
15
15
  ~ KIND, either express or implied. See the License for the
16
16
  ~ specific language governing permissions and limitations
17
17
  ~ under the License.
18
- -->
18
+ -->
19
19
  <Storage>
20
- <!--======================================================================-->
21
- <!-- Basic Configuration -->
22
- <!--======================================================================-->
23
- <ClusterName>Test</ClusterName>
24
-
25
- <!-- Tables and ColumnFamilies
26
- Think of a table as a namespace, not a relational table.
27
- (ColumnFamilies are closer in meaning to those.)
28
-
29
- There is an implicit table named 'system' for Cassandra internals.
30
- -->
31
- <Keyspaces>
32
- <Keyspace Name="Twitter">
33
- <KeysCachedFraction>0.01</KeysCachedFraction>
34
- <ColumnFamily CompareWith="UTF8Type" Name="Users" />
35
- <ColumnFamily CompareWith="UTF8Type" Name="UserAudits" />
36
- <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="UserRelationships" />
37
- <ColumnFamily CompareWith="UTF8Type" Name="Usernames" />
38
- <ColumnFamily CompareWith="UTF8Type" Name="Statuses" />
39
- <ColumnFamily CompareWith="UTF8Type" Name="StatusAudits" />
40
- <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="StatusRelationships" />
41
- <ColumnFamily CompareWith="UTF8Type" ColumnType="Super" Name="Index" />
42
- </Keyspace>
43
-
44
- <Keyspace Name="Multiblog">
45
- <KeysCachedFraction>0.01</KeysCachedFraction>
46
- <ColumnFamily CompareWith="TimeUUIDType" Name="Blogs"/>
47
- <ColumnFamily CompareWith="TimeUUIDType" Name="Comments"/>
48
- </Keyspace>
20
+ <!--======================================================================-->
21
+ <!-- Basic Configuration -->
22
+ <!--======================================================================-->
49
23
 
50
- <Keyspace Name="MultiblogLong">
51
- <KeysCachedFraction>0.01</KeysCachedFraction>
52
- <ColumnFamily CompareWith="LongType" Name="Blogs"/>
53
- <ColumnFamily CompareWith="LongType" Name="Comments"/>
54
- </Keyspace>
55
- <Keyspace Name="CassandraObject">
24
+ <!--
25
+ ~ The name of this cluster. This is mainly used to prevent machines in
26
+ ~ one logical cluster from joining another.
27
+ -->
28
+ <ClusterName>Test</ClusterName>
29
+
30
+ <!--
31
+ ~ Turn on to make new [non-seed] nodes automatically migrate the right data
32
+ ~ to themselves. (If no InitialToken is specified, they will pick one
33
+ ~ such that they will get half the range of the most-loaded node.)
34
+ ~ If a node starts up without bootstrapping, it will mark itself bootstrapped
35
+ ~ so that you can't subsequently accidently bootstrap a node with
36
+ ~ data on it. (You can reset this by wiping your data and commitlog
37
+ ~ directories.)
38
+ ~
39
+ ~ Off by default so that new clusters and upgraders from 0.4 don't
40
+ ~ bootstrap immediately. You should turn this on when you start adding
41
+ ~ new nodes to a cluster that already has data on it. (If you are upgrading
42
+ ~ from 0.4, start your cluster with it off once before changing it to true.
43
+ ~ Otherwise, no data will be lost but you will incur a lot of unnecessary
44
+ ~ I/O before your cluster starts up.)
45
+ -->
46
+ <AutoBootstrap>false</AutoBootstrap>
47
+
48
+ <!--
49
+ ~ Keyspaces and ColumnFamilies:
50
+ ~ A ColumnFamily is the Cassandra concept closest to a relational
51
+ ~ table. Keyspaces are separate groups of ColumnFamilies. Except in
52
+ ~ very unusual circumstances you will have one Keyspace per application.
53
+
54
+ ~ There is an implicit keyspace named 'system' for Cassandra internals.
55
+ -->
56
+ <Keyspaces>
57
+ <Keyspace Name="Twitter">
56
58
  <KeysCachedFraction>0.01</KeysCachedFraction>
57
- <ColumnFamily CompareWith="UTF8Type" Name="Customers" />
58
- <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="CustomerRelationships" />
59
- <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="CustomersByLastName" />
60
- <ColumnFamily CompareWith="UTF8Type" Name="Invoices" />
61
- <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="InvoiceRelationships" />
62
- <ColumnFamily CompareWith="UTF8Type" Name="InvoicesByNumber" />
63
- <ColumnFamily CompareWith="UTF8Type" Name="Payments" />
64
- <ColumnFamily CompareWith="UTF8Type" Name="Appointments" />
65
- </Keyspace>
66
- </Keyspaces>
67
-
68
- <!-- Partitioner: any IPartitioner may be used, including your own
69
- as long as it is on the classpath. Out of the box,
70
- Cassandra provides
71
- org.apache.cassandra.dht.RandomPartitioner and
72
- org.apache.cassandra.dht.OrderPreservingPartitioner.
73
- Range queries require using OrderPreservingPartitioner or a subclass.
74
-
75
- Achtung! Changing this parameter requires wiping your data directories,
76
- since the partitioner can modify the sstable on-disk format.
77
- -->
78
- <Partitioner>org.apache.cassandra.dht.OrderPreservingPartitioner</Partitioner>
79
-
80
- <!-- If you are using the OrderPreservingPartitioner and you know your key
81
- distribution, you can specify the token for this node to use.
82
- (Keys are sent to the node with the "closest" token, so distributing
83
- your tokens equally along the key distribution space will spread
84
- keys evenly across your cluster.) This setting is only checked the
85
- first time a node is started.
86
-
87
- This can also be useful with RandomPartitioner to force equal
88
- spacing of tokens around the hash space, especially for
89
- clusters with a small number of nodes. -->
90
- <InitialToken></InitialToken>
91
-
92
-
93
- <!-- EndPointSnitch: Setting this to the class that implements IEndPointSnitch
94
- which will see if two endpoints are in the same data center or on the same rack.
95
- Out of the box, Cassandra provides
96
- org.apache.cassandra.locator.EndPointSnitch
97
- -->
98
- <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
99
-
100
- <!-- Strategy: Setting this to the class that implements IReplicaPlacementStrategy
101
- will change the way the node picker works.
102
- Out of the box, Cassandra provides
103
- org.apache.cassandra.locator.RackUnawareStrategy
104
- org.apache.cassandra.locator.RackAwareStrategy
105
- (place one replica in a different datacenter, and the
106
- others on different racks in the same one.)
107
- -->
108
- <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
109
-
110
- <!-- Number of replicas of the data-->
111
- <ReplicationFactor>1</ReplicationFactor>
112
-
113
- <!-- Directories: Specify where Cassandra should store different data on disk
114
- Keep the data disks and the CommitLog disks separate for best performance
115
- -->
116
- <CommitLogDirectory>data/commitlog</CommitLogDirectory>
117
- <DataFileDirectories>
118
- <DataFileDirectory>data/data</DataFileDirectory>
119
- </DataFileDirectories>
120
- <CalloutLocation>data/callouts</CalloutLocation>
121
- <BootstrapFileDirectory>data/bootstrap</BootstrapFileDirectory>
122
- <StagingFileDirectory>data/staging</StagingFileDirectory>
123
-
124
- <!-- Addresses of hosts that are deemed contact points. Cassandra nodes use
125
- this list of hosts to find each other and learn the topology of the ring.
126
- You must change this if you are running multiple nodes!
127
- -->
128
- <Seeds>
129
- <Seed>127.0.0.1</Seed>
130
- </Seeds>
131
-
132
-
133
- <!-- Miscellaneous -->
134
-
135
- <!-- time to wait for a reply from other nodes before failing the command -->
136
- <RpcTimeoutInMillis>5000</RpcTimeoutInMillis>
137
- <!-- size to allow commitlog to grow to before creating a new segment -->
138
- <CommitLogRotationThresholdInMB>128</CommitLogRotationThresholdInMB>
139
-
140
-
141
- <!-- Local hosts and ports -->
142
-
143
- <!-- Address to bind to and tell other nodes to connect to.
144
- You _must_ change this if you want multiple nodes to be able
145
- to communicate!
146
-
147
- Leaving it blank leaves it up to InetAddress.getLocalHost().
148
- This will always do the Right Thing *if* the node is properly
149
- configured (hostname, name resolution, etc), and the Right
150
- Thing is to use the address associated with the hostname (it
151
- might not be). -->
152
- <ListenAddress>localhost</ListenAddress>
153
- <!-- TCP port, for commands and data -->
154
- <StoragePort>7000</StoragePort>
155
- <!-- UDP port, for membership communications (gossip) -->
156
- <ControlPort>7001</ControlPort>
157
-
158
- <!-- The address to bind the Thrift RPC service to. Unlike
159
- ListenAddress above, you *can* specify 0.0.0.0 here if you want
160
- Thrift to listen on all interfaces.
161
-
162
- Leaving this blank has the same effect it does for ListenAddress,
163
- (i.e. it will be based on the configured hostname of the node).
164
- -->
165
- <ThriftAddress>localhost</ThriftAddress>
166
- <!-- Thrift RPC port (the port clients connect to). -->
167
- <ThriftPort>9160</ThriftPort>
168
-
169
-
170
- <!--======================================================================-->
171
- <!-- Memory, Disk, and Performance -->
172
- <!--======================================================================-->
173
-
174
- <!-- Add column indexes to a row after its contents reach this size -->
175
- <ColumnIndexSizeInKB>256</ColumnIndexSizeInKB>
176
-
177
- <!--
178
- The maximum amount of data to store in memory before flushing to
179
- disk. Note: There is one memtable per column family, and this threshold
180
- is based solely on the amount of data stored, not actual heap memory
181
- usage (there is some overhead in indexing the columns).
182
- -->
183
- <MemtableSizeInMB>32</MemtableSizeInMB>
184
-
185
- <!--
186
- The maximum number of columns in millions to store in memory
187
- before flushing to disk. This is also a per-memtable setting.
188
- Use with MemtableSizeInMB to tune memory usage.
189
- -->
190
- <MemtableObjectCountInMillions>0.01</MemtableObjectCountInMillions>
191
-
192
- <!-- Unlike most systems, in Cassandra writes are faster than
193
- reads, so you can afford more of those in parallel.
194
- A good rule of thumb is 2 concurrent reads per processor core.
195
- You especially want more concurrentwrites if you are using
196
- CommitLogSync + CommitLogSyncDelay. -->
197
- <ConcurrentReads>8</ConcurrentReads>
198
- <ConcurrentWrites>32</ConcurrentWrites>
199
-
200
- <!-- CommitLogSync may be either "periodic" or "batch."
201
- When in batch mode, Cassandra won't ack writes until the commit log
202
- has been fsynced to disk. It will wait up to CommitLogSyncBatchWindowInMS
203
- milliseconds for other writes, before performing the sync.
204
-
205
- This is less necessary in Cassandra
206
- than in traditional databases since replication reduces the
207
- odds of losing data from a failure after writing the log
208
- entry but before it actually reaches the disk. So the other
209
- option is "timed," where wirtes may be acked immediately
210
- and the CommitLog is simply synced every CommitLogSyncPeriodInMS
211
- milliseconds.
212
- -->
213
- <CommitLogSync>periodic</CommitLogSync>
214
- <!-- Interval at which to perform syncs of the CommitLog in periodic
215
- mode. Usually the default of 1000ms is fine; increase it
216
- only if the CommitLog PendingTasks backlog in jmx shows that
217
- you are frequently scheduling a second sync while the first
218
- has not yet been processed.
219
- -->
220
- <CommitLogSyncPeriodInMS>1000</CommitLogSyncPeriodInMS>
221
- <!-- Delay (in microseconds) during which additional commit log
222
- entries may be written before fsync in batch mode. This will increase
223
- latency slightly, but can vastly improve throughput where
224
- there are many writers. Set to zero to disable
225
- (each entry will be synced individually).
226
- Reasonable values range from a minimal 0.1 to 10 or even more
227
- if throughput matters more than latency.
228
- -->
229
- <!-- <CommitLogSyncBatchWindowInMS>1</CommitLogSyncBatchWindowInMS> -->
230
-
231
- <!-- Time to wait before garbage-collection deletion markers.
232
- Set this to a large enough value that you are confident
233
- that the deletion marker will be propagated to all replicas
234
- by the time this many seconds has elapsed, even in the
235
- face of hardware failures. The default value is ten days.
236
- -->
237
- <GCGraceSeconds>864000</GCGraceSeconds>
59
+ <ColumnFamily CompareWith="UTF8Type" Name="Users" />
60
+ <ColumnFamily CompareWith="UTF8Type" Name="UserAudits" />
61
+ <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="UserRelationships" />
62
+ <ColumnFamily CompareWith="UTF8Type" Name="Usernames" />
63
+ <ColumnFamily CompareWith="UTF8Type" Name="Statuses" />
64
+ <ColumnFamily CompareWith="UTF8Type" Name="StatusAudits" />
65
+ <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="StatusRelationships" />
66
+ <ColumnFamily CompareWith="UTF8Type" ColumnType="Super" Name="Index" />
67
+
68
+ <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
69
+ <ReplicationFactor>1</ReplicationFactor>
70
+ <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
71
+ </Keyspace>
72
+
73
+ <Keyspace Name="Multiblog">
74
+ <KeysCachedFraction>0.01</KeysCachedFraction>
75
+ <ColumnFamily CompareWith="TimeUUIDType" Name="Blogs"/>
76
+ <ColumnFamily CompareWith="TimeUUIDType" Name="Comments"/>
77
+
78
+ <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
79
+ <ReplicationFactor>1</ReplicationFactor>
80
+ <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
81
+ </Keyspace>
82
+
83
+ <Keyspace Name="MultiblogLong">
84
+ <KeysCachedFraction>0.01</KeysCachedFraction>
85
+ <ColumnFamily CompareWith="LongType" Name="Blogs"/>
86
+ <ColumnFamily CompareWith="LongType" Name="Comments"/>
87
+
88
+ <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
89
+ <ReplicationFactor>1</ReplicationFactor>
90
+ <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
91
+ </Keyspace>
92
+
93
+ <Keyspace Name="CassandraObject">
94
+ <KeysCachedFraction>0.01</KeysCachedFraction>
95
+ <ColumnFamily CompareWith="UTF8Type" Name="Customers" />
96
+ <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="CustomerRelationships" />
97
+ <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="CustomersByLastName" />
98
+ <ColumnFamily CompareWith="UTF8Type" Name="Invoices" />
99
+ <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="InvoiceRelationships" />
100
+ <ColumnFamily CompareWith="UTF8Type" Name="InvoicesByNumber" />
101
+ <ColumnFamily CompareWith="UTF8Type" Name="Payments" />
102
+ <ColumnFamily CompareWith="UTF8Type" Name="Appointments" />
103
+
104
+ <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
105
+ <ReplicationFactor>1</ReplicationFactor>
106
+ <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
107
+ </Keyspace>
108
+ </Keyspaces>
109
+
110
+ <!--
111
+ ~ Authenticator: any IAuthenticator may be used, including your own as long
112
+ ~ as it is on the classpath. Out of the box, Cassandra provides
113
+ ~ org.apache.cassandra.auth.AllowAllAuthenticator and,
114
+ ~ org.apache.cassandra.auth.SimpleAuthenticator
115
+ ~ (SimpleAuthenticator uses access.properties and passwd.properties by
116
+ ~ default).
117
+ ~
118
+ ~ If you don't specify an authenticator, AllowAllAuthenticator is used.
119
+ -->
120
+ <Authenticator>org.apache.cassandra.auth.AllowAllAuthenticator</Authenticator>
121
+
122
+ <!--
123
+ ~ Partitioner: any IPartitioner may be used, including your own as long
124
+ ~ as it is on the classpath. Out of the box, Cassandra provides
125
+ ~ org.apache.cassandra.dht.RandomPartitioner,
126
+ ~ org.apache.cassandra.dht.OrderPreservingPartitioner, and
127
+ ~ org.apache.cassandra.dht.CollatingOrderPreservingPartitioner.
128
+ ~ (CollatingOPP colates according to EN,US rules, not naive byte
129
+ ~ ordering. Use this as an example if you need locale-aware collation.)
130
+ ~ Range queries require using an order-preserving partitioner.
131
+ ~
132
+ ~ Achtung! Changing this parameter requires wiping your data
133
+ ~ directories, since the partitioner can modify the sstable on-disk
134
+ ~ format.
135
+ -->
136
+ <Partitioner>org.apache.cassandra.dht.RandomPartitioner</Partitioner>
137
+
138
+ <!--
139
+ ~ If you are using an order-preserving partitioner and you know your key
140
+ ~ distribution, you can specify the token for this node to use. (Keys
141
+ ~ are sent to the node with the "closest" token, so distributing your
142
+ ~ tokens equally along the key distribution space will spread keys
143
+ ~ evenly across your cluster.) This setting is only checked the first
144
+ ~ time a node is started.
145
+
146
+ ~ This can also be useful with RandomPartitioner to force equal spacing
147
+ ~ of tokens around the hash space, especially for clusters with a small
148
+ ~ number of nodes.
149
+ -->
150
+ <InitialToken></InitialToken>
151
+
152
+ <!--
153
+ ~ Directories: Specify where Cassandra should store different data on
154
+ ~ disk. Keep the data disks and the CommitLog disks separate for best
155
+ ~ performance
156
+ -->
157
+ <CommitLogDirectory>/var/lib/cassandra/commitlog</CommitLogDirectory>
158
+ <DataFileDirectories>
159
+ <DataFileDirectory>/var/lib/cassandra/data</DataFileDirectory>
160
+ </DataFileDirectories>
161
+ <CalloutLocation>/var/lib/cassandra/callouts</CalloutLocation>
162
+ <StagingFileDirectory>/var/lib/cassandra/staging</StagingFileDirectory>
163
+
164
+
165
+ <!--
166
+ ~ Addresses of hosts that are deemed contact points. Cassandra nodes
167
+ ~ use this list of hosts to find each other and learn the topology of
168
+ ~ the ring. You must change this if you are running multiple nodes!
169
+ -->
170
+ <Seeds>
171
+ <Seed>127.0.0.1</Seed>
172
+ </Seeds>
173
+
174
+
175
+ <!-- Miscellaneous -->
176
+
177
+ <!-- Time to wait for a reply from other nodes before failing the command -->
178
+ <RpcTimeoutInMillis>5000</RpcTimeoutInMillis>
179
+ <!-- Size to allow commitlog to grow to before creating a new segment -->
180
+ <CommitLogRotationThresholdInMB>128</CommitLogRotationThresholdInMB>
181
+
182
+
183
+ <!-- Local hosts and ports -->
184
+
185
+ <!--
186
+ ~ Address to bind to and tell other nodes to connect to. You _must_
187
+ ~ change this if you want multiple nodes to be able to communicate!
188
+ ~
189
+ ~ Leaving it blank leaves it up to InetAddress.getLocalHost(). This
190
+ ~ will always do the Right Thing *if* the node is properly configured
191
+ ~ (hostname, name resolution, etc), and the Right Thing is to use the
192
+ ~ address associated with the hostname (it might not be).
193
+ -->
194
+ <ListenAddress>localhost</ListenAddress>
195
+ <!-- internal communications port -->
196
+ <StoragePort>7000</StoragePort>
197
+
198
+ <!--
199
+ ~ The address to bind the Thrift RPC service to. Unlike ListenAddress
200
+ ~ above, you *can* specify 0.0.0.0 here if you want Thrift to listen on
201
+ ~ all interfaces.
202
+ ~
203
+ ~ Leaving this blank has the same effect it does for ListenAddress,
204
+ ~ (i.e. it will be based on the configured hostname of the node).
205
+ -->
206
+ <ThriftAddress>localhost</ThriftAddress>
207
+ <!-- Thrift RPC port (the port clients connect to). -->
208
+ <ThriftPort>9160</ThriftPort>
209
+ <!--
210
+ ~ Whether or not to use a framed transport for Thrift. If this option
211
+ ~ is set to true then you must also use a framed transport on the
212
+ ~ client-side, (framed and non-framed transports are not compatible).
213
+ -->
214
+ <ThriftFramedTransport>false</ThriftFramedTransport>
215
+
216
+
217
+ <!--======================================================================-->
218
+ <!-- Memory, Disk, and Performance -->
219
+ <!--======================================================================-->
220
+
221
+ <!--
222
+ ~ Access mode. mmapped i/o is substantially faster, but only practical on
223
+ ~ a 64bit machine (which notably does not include EC2 "small" instances)
224
+ ~ or relatively small datasets. "auto", the safe choice, will enable
225
+ ~ mmapping on a 64bit JVM. Other values are "mmap", "mmap_index_only"
226
+ ~ (which may allow you to get part of the benefits of mmap on a 32bit
227
+ ~ machine by mmapping only index files) and "standard".
228
+ ~ (The buffer size settings that follow only apply to standard,
229
+ ~ non-mmapped i/o.)
230
+ -->
231
+ <DiskAccessMode>auto</DiskAccessMode>
232
+
233
+ <!--
234
+ ~ Buffer size to use when performing contiguous column slices. Increase
235
+ ~ this to the size of the column slices you typically perform.
236
+ ~ (Name-based queries are performed with a buffer size of
237
+ ~ ColumnIndexSizeInKB.)
238
+ -->
239
+ <SlicedBufferSizeInKB>64</SlicedBufferSizeInKB>
240
+
241
+ <!--
242
+ ~ Buffer size to use when flushing memtables to disk. (Only one
243
+ ~ memtable is ever flushed at a time.) Increase (decrease) the index
244
+ ~ buffer size relative to the data buffer if you have few (many)
245
+ ~ columns per key. Bigger is only better _if_ your memtables get large
246
+ ~ enough to use the space. (Check in your data directory after your
247
+ ~ app has been running long enough.) -->
248
+ <FlushDataBufferSizeInMB>32</FlushDataBufferSizeInMB>
249
+ <FlushIndexBufferSizeInMB>8</FlushIndexBufferSizeInMB>
250
+
251
+ <!--
252
+ ~ Add column indexes to a row after its contents reach this size.
253
+ ~ Increase if your column values are large, or if you have a very large
254
+ ~ number of columns. The competing causes are, Cassandra has to
255
+ ~ deserialize this much of the row to read a single column, so you want
256
+ ~ it to be small - at least if you do many partial-row reads - but all
257
+ ~ the index data is read for each access, so you don't want to generate
258
+ ~ that wastefully either.
259
+ -->
260
+ <ColumnIndexSizeInKB>64</ColumnIndexSizeInKB>
261
+
262
+ <!--
263
+ ~ Flush memtable after this much data has been inserted, including
264
+ ~ overwritten data. There is one memtable per column family, and
265
+ ~ this threshold is based solely on the amount of data stored, not
266
+ ~ actual heap memory usage (there is some overhead in indexing the
267
+ ~ columns).
268
+ -->
269
+ <MemtableThroughputInMB>64</MemtableThroughputInMB>
270
+ <!--
271
+ ~ Throughput setting for Binary Memtables. Typically these are
272
+ ~ used for bulk load so you want them to be larger.
273
+ -->
274
+ <BinaryMemtableThroughputInMB>256</BinaryMemtableThroughputInMB>
275
+ <!--
276
+ ~ The maximum number of columns in millions to store in memory per
277
+ ~ ColumnFamily before flushing to disk. This is also a per-memtable
278
+ ~ setting. Use with MemtableThroughputInMB to tune memory usage.
279
+ -->
280
+ <MemtableOperationsInMillions>0.3</MemtableOperationsInMillions>
281
+ <!--
282
+ ~ The maximum time to leave a dirty memtable unflushed.
283
+ ~ (While any affected columnfamilies have unflushed data from a
284
+ ~ commit log segment, that segment cannot be deleted.)
285
+ ~ This needs to be large enough that it won't cause a flush storm
286
+ ~ of all your memtables flushing at once because none has hit
287
+ ~ the size or count thresholds yet. For production, a larger
288
+ ~ value such as 1440 is recommended.
289
+ -->
290
+ <MemtableFlushAfterMinutes>60</MemtableFlushAfterMinutes>
291
+
292
+ <!--
293
+ ~ Unlike most systems, in Cassandra writes are faster than reads, so
294
+ ~ you can afford more of those in parallel. A good rule of thumb is 2
295
+ ~ concurrent reads per processor core. Increase ConcurrentWrites to
296
+ ~ the number of clients writing at once if you enable CommitLogSync +
297
+ ~ CommitLogSyncDelay. -->
298
+ <ConcurrentReads>8</ConcurrentReads>
299
+ <ConcurrentWrites>32</ConcurrentWrites>
300
+
301
+ <!--
302
+ ~ CommitLogSync may be either "periodic" or "batch." When in batch
303
+ ~ mode, Cassandra won't ack writes until the commit log has been
304
+ ~ fsynced to disk. It will wait up to CommitLogSyncBatchWindowInMS
305
+ ~ milliseconds for other writes, before performing the sync.
306
+
307
+ ~ This is less necessary in Cassandra than in traditional databases
308
+ ~ since replication reduces the odds of losing data from a failure
309
+ ~ after writing the log entry but before it actually reaches the disk.
310
+ ~ So the other option is "timed," where writes may be acked immediately
311
+ ~ and the CommitLog is simply synced every CommitLogSyncPeriodInMS
312
+ ~ milliseconds.
313
+ -->
314
+ <CommitLogSync>periodic</CommitLogSync>
315
+ <!--
316
+ ~ Interval at which to perform syncs of the CommitLog in periodic mode.
317
+ ~ Usually the default of 10000ms is fine; increase it if your i/o
318
+ ~ load is such that syncs are taking excessively long times.
319
+ -->
320
+ <CommitLogSyncPeriodInMS>10000</CommitLogSyncPeriodInMS>
321
+ <!--
322
+ ~ Delay (in milliseconds) during which additional commit log entries
323
+ ~ may be written before fsync in batch mode. This will increase
324
+ ~ latency slightly, but can vastly improve throughput where there are
325
+ ~ many writers. Set to zero to disable (each entry will be synced
326
+ ~ individually). Reasonable values range from a minimal 0.1 to 10 or
327
+ ~ even more if throughput matters more than latency.
328
+ -->
329
+ <!-- <CommitLogSyncBatchWindowInMS>1</CommitLogSyncBatchWindowInMS> -->
330
+
331
+ <!--
332
+ ~ Time to wait before garbage-collection deletion markers. Set this to
333
+ ~ a large enough value that you are confident that the deletion marker
334
+ ~ will be propagated to all replicas by the time this many seconds has
335
+ ~ elapsed, even in the face of hardware failures. The default value is
336
+ ~ ten days.
337
+ -->
338
+ <GCGraceSeconds>864000</GCGraceSeconds>
238
339
  </Storage>