hallelujah-cassandra 0.12.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (92) hide show
  1. data/CHANGELOG +111 -0
  2. data/Gemfile +8 -0
  3. data/LICENSE +202 -0
  4. data/Manifest +91 -0
  5. data/README.md +352 -0
  6. data/Rakefile +171 -0
  7. data/bin/cassandra_helper +16 -0
  8. data/conf/0.6/cassandra.in.sh +47 -0
  9. data/conf/0.6/log4j.properties +38 -0
  10. data/conf/0.6/schema.json +57 -0
  11. data/conf/0.6/storage-conf.xml +352 -0
  12. data/conf/0.7/cassandra.in.sh +46 -0
  13. data/conf/0.7/cassandra.yaml +336 -0
  14. data/conf/0.7/log4j-server.properties +41 -0
  15. data/conf/0.7/schema.json +57 -0
  16. data/conf/0.7/schema.txt +45 -0
  17. data/conf/0.8/cassandra.in.sh +41 -0
  18. data/conf/0.8/cassandra.yaml +61 -0
  19. data/conf/0.8/log4j-server.properties +40 -0
  20. data/conf/0.8/schema.json +69 -0
  21. data/conf/0.8/schema.txt +51 -0
  22. data/conf/1.0/cassandra.in.sh +41 -0
  23. data/conf/1.0/cassandra.yaml +415 -0
  24. data/conf/1.0/log4j-server.properties +40 -0
  25. data/conf/1.0/schema.json +69 -0
  26. data/conf/1.0/schema.txt +51 -0
  27. data/conf/1.1/cassandra.in.sh +41 -0
  28. data/conf/1.1/cassandra.yaml +560 -0
  29. data/conf/1.1/log4j-server.properties +44 -0
  30. data/conf/1.1/schema.json +69 -0
  31. data/conf/1.1/schema.txt +51 -0
  32. data/hallelujah-cassandra.gemspec +43 -0
  33. data/lib/cassandra.rb +39 -0
  34. data/lib/cassandra/0.6.rb +7 -0
  35. data/lib/cassandra/0.6/cassandra.rb +113 -0
  36. data/lib/cassandra/0.6/columns.rb +78 -0
  37. data/lib/cassandra/0.6/protocol.rb +90 -0
  38. data/lib/cassandra/0.7.rb +7 -0
  39. data/lib/cassandra/0.7/cassandra.rb +2 -0
  40. data/lib/cassandra/0.7/columns.rb +4 -0
  41. data/lib/cassandra/0.7/protocol.rb +5 -0
  42. data/lib/cassandra/0.8.rb +7 -0
  43. data/lib/cassandra/0.8/cassandra.rb +25 -0
  44. data/lib/cassandra/0.8/columns.rb +28 -0
  45. data/lib/cassandra/0.8/protocol.rb +10 -0
  46. data/lib/cassandra/1.0.rb +7 -0
  47. data/lib/cassandra/1.0/cassandra.rb +25 -0
  48. data/lib/cassandra/1.0/columns.rb +28 -0
  49. data/lib/cassandra/1.0/protocol.rb +12 -0
  50. data/lib/cassandra/1.1.rb +6 -0
  51. data/lib/cassandra/1.1/cassandra.rb +25 -0
  52. data/lib/cassandra/1.1/columns.rb +28 -0
  53. data/lib/cassandra/1.1/protocol.rb +12 -0
  54. data/lib/cassandra/array.rb +8 -0
  55. data/lib/cassandra/cassandra.rb +1070 -0
  56. data/lib/cassandra/column_family.rb +3 -0
  57. data/lib/cassandra/columns.rb +147 -0
  58. data/lib/cassandra/comparable.rb +28 -0
  59. data/lib/cassandra/composite.rb +118 -0
  60. data/lib/cassandra/constants.rb +11 -0
  61. data/lib/cassandra/debug.rb +9 -0
  62. data/lib/cassandra/helpers.rb +41 -0
  63. data/lib/cassandra/keyspace.rb +3 -0
  64. data/lib/cassandra/long.rb +58 -0
  65. data/lib/cassandra/mock.rb +511 -0
  66. data/lib/cassandra/ordered_hash.rb +192 -0
  67. data/lib/cassandra/protocol.rb +120 -0
  68. data/lib/cassandra/time.rb +11 -0
  69. data/test/cassandra_client_test.rb +20 -0
  70. data/test/cassandra_mock_test.rb +122 -0
  71. data/test/cassandra_test.rb +922 -0
  72. data/test/comparable_types_test.rb +45 -0
  73. data/test/composite_type_test.rb +29 -0
  74. data/test/eventmachine_test.rb +42 -0
  75. data/test/ordered_hash_test.rb +386 -0
  76. data/test/test_helper.rb +15 -0
  77. data/vendor/0.6/gen-rb/cassandra.rb +1481 -0
  78. data/vendor/0.6/gen-rb/cassandra_constants.rb +12 -0
  79. data/vendor/0.6/gen-rb/cassandra_types.rb +482 -0
  80. data/vendor/0.7/gen-rb/cassandra.rb +1936 -0
  81. data/vendor/0.7/gen-rb/cassandra_constants.rb +12 -0
  82. data/vendor/0.7/gen-rb/cassandra_types.rb +681 -0
  83. data/vendor/0.8/gen-rb/cassandra.rb +2215 -0
  84. data/vendor/0.8/gen-rb/cassandra_constants.rb +12 -0
  85. data/vendor/0.8/gen-rb/cassandra_types.rb +824 -0
  86. data/vendor/1.0/gen-rb/cassandra.rb +2215 -0
  87. data/vendor/1.0/gen-rb/cassandra_constants.rb +12 -0
  88. data/vendor/1.0/gen-rb/cassandra_types.rb +857 -0
  89. data/vendor/1.1/gen-rb/cassandra.rb +2571 -0
  90. data/vendor/1.1/gen-rb/cassandra_constants.rb +12 -0
  91. data/vendor/1.1/gen-rb/cassandra_types.rb +928 -0
  92. metadata +260 -0
@@ -0,0 +1,171 @@
1
+ require 'fileutils'
2
+
3
+ unless ENV['FROM_BIN_CASSANDRA_HELPER']
4
+ require 'rubygems'
5
+ require 'echoe'
6
+
7
+ Echoe.new("hallelujah-cassandra") do |p|
8
+ p.author = "Evan Weaver, Ryan King"
9
+ p.project = "fauna"
10
+ p.summary = "A Ruby client for the Cassandra distributed database."
11
+ p.rubygems_version = ">= 0.8"
12
+ p.dependencies = ['thrift_client >=0.7.0 <0.9', 'json', 'rake', 'simple_uuid ~>0.2.0']
13
+ p.ignore_pattern = /^(data|vendor\/cassandra|cassandra|vendor\/thrift|.*\.rbc)/
14
+ p.rdoc_pattern = /^(lib|bin|tasks|ext)|^README|^CHANGELOG|^TODO|^LICENSE|^COPYING$/
15
+ p.retain_gemspec = true
16
+ end
17
+ end
18
+
19
+ CassandraBinaries = {
20
+ '0.6' => 'http://archive.apache.org/dist/cassandra/0.6.13/apache-cassandra-0.6.13-bin.tar.gz',
21
+ '0.7' => 'http://archive.apache.org/dist/cassandra/0.7.9/apache-cassandra-0.7.9-bin.tar.gz',
22
+ '0.8' => 'http://archive.apache.org/dist/cassandra/0.8.7/apache-cassandra-0.8.7-bin.tar.gz',
23
+ '1.0' => 'http://archive.apache.org/dist/cassandra/1.0.6/apache-cassandra-1.0.6-bin.tar.gz',
24
+ '1.1' => 'http://archive.apache.org/dist/cassandra/1.1.2/apache-cassandra-1.1.2-bin.tar.gz'
25
+
26
+ }
27
+
28
+ CASSANDRA_HOME = ENV['CASSANDRA_HOME'] || "#{ENV['HOME']}/cassandra"
29
+ CASSANDRA_VERSION = ENV['CASSANDRA_VERSION'] || '0.8'
30
+ CASSANDRA_PIDFILE = ENV['CASSANDRA_PIDFILE'] || "#{CASSANDRA_HOME}/cassandra.pid"
31
+
32
+ def setup_cassandra_version(version = CASSANDRA_VERSION)
33
+ FileUtils.mkdir_p CASSANDRA_HOME
34
+
35
+ destination_directory = File.join(CASSANDRA_HOME, 'cassandra-' + CASSANDRA_VERSION)
36
+
37
+ unless File.exists?(File.join(destination_directory, 'bin','cassandra'))
38
+ download_source = CassandraBinaries[CASSANDRA_VERSION]
39
+ download_destination = File.join("/tmp", File.basename(download_source))
40
+ untar_directory = File.join(CASSANDRA_HOME, File.basename(download_source,'-bin.tar.gz'))
41
+
42
+ puts "downloading cassandra"
43
+ sh "curl -L -o #{download_destination} #{download_source}"
44
+
45
+ sh "tar xzf #{download_destination} -C #{CASSANDRA_HOME}"
46
+ sh "mv #{untar_directory} #{destination_directory}"
47
+ end
48
+ end
49
+
50
+ def setup_environment
51
+ env = ""
52
+ if !ENV["CASSANDRA_INCLUDE"]
53
+ env << "CASSANDRA_INCLUDE=#{File.expand_path(Dir.pwd)}/conf/#{CASSANDRA_VERSION}/cassandra.in.sh "
54
+ env << "CASSANDRA_HOME=#{CASSANDRA_HOME}/cassandra-#{CASSANDRA_VERSION} "
55
+ env << "CASSANDRA_CONF=#{File.expand_path(Dir.pwd)}/conf/#{CASSANDRA_VERSION}"
56
+ else
57
+ env << "CASSANDRA_INCLUDE=#{ENV['CASSANDRA_INCLUDE']} "
58
+ env << "CASSANDRA_HOME=#{ENV['CASSANDRA_HOME']} "
59
+ env << "CASSANDRA_CONF=#{ENV['CASSANDRA_CONF']}"
60
+ end
61
+
62
+ env
63
+ end
64
+
65
+ def running?(pid_file = nil)
66
+ pid_file ||= CASSANDRA_PIDFILE
67
+
68
+ if File.exists?(pid_file)
69
+ pid = File.new(pid_file).read.to_i
70
+ begin
71
+ Process.kill(0, pid)
72
+ return true
73
+ rescue
74
+ File.delete(pid_file)
75
+ end
76
+ end
77
+
78
+ false
79
+ end
80
+
81
+ namespace :cassandra do
82
+ desc "Start Cassandra"
83
+ task :start, [:daemonize] => :java do |t, args|
84
+ args.with_defaults(:daemonize => true)
85
+
86
+ setup_cassandra_version
87
+
88
+ env = setup_environment
89
+
90
+ Dir.chdir(File.join(CASSANDRA_HOME, "cassandra-#{CASSANDRA_VERSION}")) do
91
+ sh("env #{env} bin/cassandra #{'-f' unless args.daemonize} -p #{CASSANDRA_PIDFILE}")
92
+ end
93
+ end
94
+
95
+ desc "Stop Cassandra"
96
+ task :stop => :java do
97
+ setup_cassandra_version
98
+ env = setup_environment
99
+ sh("kill $(cat #{CASSANDRA_PIDFILE})")
100
+ end
101
+ end
102
+
103
+ desc "Start Cassandra"
104
+ task :cassandra => :java do
105
+ begin
106
+ Rake::Task["cassandra:start"].invoke(false)
107
+ rescue RuntimeError => e
108
+ raise e unless e.message =~ /Command failed with status \(130\)/ # handle keyboard interupt errors
109
+ end
110
+ end
111
+
112
+ desc "Run the Cassandra CLI"
113
+ task :cli do
114
+ Dir.chdir(File.join(CASSANDRA_HOME, "cassandra-#{CASSANDRA_VERSION}")) do
115
+ sh("bin/cassandra-cli -host localhost -port 9160")
116
+ end
117
+ end
118
+
119
+ desc "Check Java version"
120
+ task :java do
121
+ unless `java -version 2>&1`.split("\n").first =~ /java version "1.6/ #"
122
+ puts "You need to configure your environment for Java 1.6."
123
+ puts "If you're on OS X, just export the following environment variables:"
124
+ puts ' JAVA_HOME="/System/Library/Frameworks/JavaVM.framework/Versions/1.6/Home"'
125
+ puts ' PATH="/System/Library/Frameworks/JavaVM.framework/Versions/1.6/Home/bin:$PATH"'
126
+ exit(1)
127
+ end
128
+ end
129
+
130
+ namespace :data do
131
+ desc "Reset test data"
132
+ task :reset do
133
+ puts "Resetting test data"
134
+ sh("rm -rf #{File.join(CASSANDRA_HOME, "cassandra-#{CASSANDRA_VERSION}", 'data')}")
135
+ end
136
+
137
+ desc "Load test data structures."
138
+ task :load do
139
+ unless CASSANDRA_VERSION == '0.6'
140
+
141
+ schema_path = "#{File.expand_path(Dir.pwd)}/conf/#{CASSANDRA_VERSION}/schema.txt"
142
+ puts "Loading test data structures."
143
+ Dir.chdir(File.join(CASSANDRA_HOME, "cassandra-#{CASSANDRA_VERSION}")) do
144
+ begin
145
+ sh("bin/cassandra-cli --host localhost --batch < #{schema_path}")
146
+ rescue
147
+ puts "Schema already loaded."
148
+ end
149
+ end
150
+ end
151
+ end
152
+ end
153
+
154
+ task :test => 'data:load'
155
+
156
+ # desc "Regenerate thrift bindings for Cassandra" # Dev only
157
+ task :thrift do
158
+ puts "Generating Thrift bindings"
159
+ FileUtils.mkdir_p "vendor/#{CASSANDRA_VERSION}"
160
+
161
+ system(
162
+ "cd vendor/#{CASSANDRA_VERSION} &&
163
+ rm -rf gen-rb &&
164
+ thrift -gen rb #{File.join(CASSANDRA_HOME, "cassandra-#{CASSANDRA_VERSION}")}/interface/cassandra.thrift")
165
+ end
166
+
167
+ task :fix_perms do
168
+ chmod_R 0755, './'
169
+ end
170
+
171
+ task :pkg => [:fix_perms]
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'rake'
5
+ require 'cassandra'
6
+
7
+ gem_path = $LOAD_PATH.last.sub(/lib$/, "")
8
+
9
+ Dir.chdir(gem_path) do
10
+ if !ENV["CASSANDRA_INCLUDE"]
11
+ puts "Set the CASSANDRA_INCLUDE environment variable to use a non-default cassandra.in.sh and friends."
12
+ end
13
+
14
+ ARGV << "-T" if ARGV.empty?
15
+ exec("env FROM_BIN_CASSANDRA_HELPER=1 rake #{ARGV.join(' ')}")
16
+ end
@@ -0,0 +1,47 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ # The directory where Cassandra's configs live (required)
18
+ CASSANDRA_CONF=$CASSANDRA_CONF
19
+
20
+ # This can be the path to a jar file, or a directory containing the
21
+ # compiled classes. NOTE: This isn't needed by the startup script,
22
+ # it's just used here in constructing the classpath.
23
+ cassandra_bin=$CASSANDRA_HOME/build/classes
24
+
25
+ # The java classpath (required)
26
+ CLASSPATH=$CASSANDRA_CONF:$CASSANDRA_BIN
27
+
28
+ for jar in $CASSANDRA_HOME/lib/*.jar $CASSANDRA_HOME/build/lib/jars/*.jar; do
29
+ CLASSPATH=$CLASSPATH:$jar
30
+ done
31
+
32
+ # Arguments to pass to the JVM
33
+ JVM_OPTS=" \
34
+ -ea \
35
+ -Xms128M \
36
+ -Xmx1G \
37
+ -XX:TargetSurvivorRatio=90 \
38
+ -XX:+AggressiveOpts \
39
+ -XX:+UseParNewGC \
40
+ -XX:+UseConcMarkSweepGC \
41
+ -XX:+CMSParallelRemarkEnabled \
42
+ -XX:+HeapDumpOnOutOfMemoryError \
43
+ -XX:SurvivorRatio=128 \
44
+ -XX:MaxTenuringThreshold=0 \
45
+ -Dcom.sun.management.jmxremote.port=8080 \
46
+ -Dcom.sun.management.jmxremote.ssl=false \
47
+ -Dcom.sun.management.jmxremote.authenticate=false"
@@ -0,0 +1,38 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ # for production, you should probably set the root to INFO
18
+ # and the pattern to %c instead of %l. (%l is slower.)
19
+
20
+ # output messages into a rolling log file as well as stdout
21
+ log4j.rootLogger=DEBUG,stdout,R
22
+
23
+ # stdout
24
+ log4j.appender.stdout=org.apache.log4j.ConsoleAppender
25
+ log4j.appender.stdout.layout=org.apache.log4j.SimpleLayout
26
+
27
+ # rolling log file ("system.log
28
+ log4j.appender.R=org.apache.log4j.DailyRollingFileAppender
29
+ log4j.appender.R.DatePattern='.'yyyy-MM-dd-HH
30
+ log4j.appender.R.layout=org.apache.log4j.PatternLayout
31
+ log4j.appender.R.layout.ConversionPattern=%5p [%t] %d{ISO8601} %F (line %L) %m%n
32
+ # Edit the next line to point to your logs directory
33
+ log4j.appender.R.File=data/logs/system.log
34
+
35
+ # Application logging options
36
+ #log4j.logger.com.facebook=DEBUG
37
+ #log4j.logger.com.facebook.infrastructure.gms=DEBUG
38
+ #log4j.logger.com.facebook.infrastructure.db=DEBUG
@@ -0,0 +1,57 @@
1
+ {"Twitter":{
2
+ "Users":{
3
+ "CompareWith":"org.apache.cassandra.db.marshal.UTF8Type",
4
+ "Type":"Standard"},
5
+ "UserAudits":{
6
+ "CompareWith":"org.apache.cassandra.db.marshal.UTF8Type",
7
+ "Type":"Standard"},
8
+ "UserRelationships":{
9
+ "CompareSubcolumnsWith":"org.apache.cassandra.db.marshal.TimeUUIDType",
10
+ "CompareWith":"org.apache.cassandra.db.marshal.UTF8Type",
11
+ "Type":"Super"},
12
+ "Usernames":{
13
+ "CompareWith":"org.apache.cassandra.db.marshal.UTF8Type",
14
+ "Type":"Standard"},
15
+ "Statuses":{
16
+ "CompareWith":"org.apache.cassandra.db.marshal.UTF8Type",
17
+ "Type":"Standard"},
18
+ "StatusAudits":{
19
+ "CompareWith":"org.apache.cassandra.db.marshal.UTF8Type",
20
+ "Type":"Standard"},
21
+ "StatusRelationships":{
22
+ "CompareSubcolumnsWith":"org.apache.cassandra.db.marshal.TimeUUIDType",
23
+ "CompareWith":"org.apache.cassandra.db.marshal.UTF8Type",
24
+ "Type":"Super"},
25
+ "Index":{
26
+ "CompareWith":"org.apache.cassandra.db.marshal.UTF8Type",
27
+ "Type":"Super"},
28
+ "TimelinishThings":{
29
+ "CompareWith":"org.apache.cassandra.db.marshal.BytesType",
30
+ "Type":"Standard"}
31
+ },
32
+ "Multiblog":{
33
+ "Blogs":{
34
+ "CompareWith":"org.apache.cassandra.db.marshal.TimeUUIDType",
35
+ "Type":"Standard"},
36
+ "Comments":{
37
+ "CompareWith":"org.apache.cassandra.db.marshal.TimeUUIDType",
38
+ "Type":"Standard"}
39
+ },
40
+ "MultiblogLong":{
41
+ "Blogs":{
42
+ "CompareWith":"org.apache.cassandra.db.marshal.LongType",
43
+ "Type":"Standard"},
44
+ "Comments":{
45
+ "CompareWith":"org.apache.cassandra.db.marshal.LongType",
46
+ "Type":"Standard"}
47
+ },
48
+ "TypeConversions":{
49
+ "UUIDColumnConversion":{
50
+ "CompareWith":"org.apache.cassandra.db.marshal.TimeUUIDType",
51
+ "Type":"Standard"},
52
+ "SuperUUID":{
53
+ "CompareSubcolumnsWith":"org.apache.cassandra.db.marshal.TimeUUIDType",
54
+ "CompareWith":"org.apache.cassandra.db.marshal.TimeUUIDType",
55
+ "Type":"Super"}
56
+ }
57
+ }
@@ -0,0 +1,352 @@
1
+ <!--
2
+ ~ Licensed to the Apache Software Foundation (ASF) under one
3
+ ~ or more contributor license agreements. See the NOTICE file
4
+ ~ distributed with this work for additional information
5
+ ~ regarding copyright ownership. The ASF licenses this file
6
+ ~ to you under the Apache License, Version 2.0 (the
7
+ ~ "License"); you may not use this file except in compliance
8
+ ~ with the License. You may obtain a copy of the License at
9
+ ~
10
+ ~ http://www.apache.org/licenses/LICENSE-2.0
11
+ ~
12
+ ~ Unless required by applicable law or agreed to in writing,
13
+ ~ software distributed under the License is distributed on an
14
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ ~ KIND, either express or implied. See the License for the
16
+ ~ specific language governing permissions and limitations
17
+ ~ under the License.
18
+ -->
19
+ <Storage>
20
+ <!--======================================================================-->
21
+ <!-- Basic Configuration -->
22
+ <!--======================================================================-->
23
+
24
+ <!--
25
+ ~ The name of this cluster. This is mainly used to prevent machines in
26
+ ~ one logical cluster from joining another.
27
+ -->
28
+ <ClusterName>Test</ClusterName>
29
+
30
+ <!--
31
+ ~ Turn on to make new [non-seed] nodes automatically migrate the right data
32
+ ~ to themselves. (If no InitialToken is specified, they will pick one
33
+ ~ such that they will get half the range of the most-loaded node.)
34
+ ~ If a node starts up without bootstrapping, it will mark itself bootstrapped
35
+ ~ so that you can't subsequently accidently bootstrap a node with
36
+ ~ data on it. (You can reset this by wiping your data and commitlog
37
+ ~ directories.)
38
+ ~
39
+ ~ Off by default so that new clusters and upgraders from 0.4 don't
40
+ ~ bootstrap immediately. You should turn this on when you start adding
41
+ ~ new nodes to a cluster that already has data on it. (If you are upgrading
42
+ ~ from 0.4, start your cluster with it off once before changing it to true.
43
+ ~ Otherwise, no data will be lost but you will incur a lot of unnecessary
44
+ ~ I/O before your cluster starts up.)
45
+ -->
46
+ <AutoBootstrap>false</AutoBootstrap>
47
+
48
+ <!--
49
+ ~ Keyspaces and ColumnFamilies:
50
+ ~ A ColumnFamily is the Cassandra concept closest to a relational
51
+ ~ table. Keyspaces are separate groups of ColumnFamilies. Except in
52
+ ~ very unusual circumstances you will have one Keyspace per application.
53
+
54
+ ~ There is an implicit keyspace named 'system' for Cassandra internals.
55
+ -->
56
+ <Keyspaces>
57
+ <Keyspace Name="Twitter">
58
+ <KeysCachedFraction>0.01</KeysCachedFraction>
59
+ <ColumnFamily CompareWith="UTF8Type" Name="Users" />
60
+ <ColumnFamily CompareWith="UTF8Type" Name="UserAudits" />
61
+ <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="UserRelationships" />
62
+ <ColumnFamily CompareWith="UTF8Type" Name="Usernames" />
63
+ <ColumnFamily CompareWith="UTF8Type" Name="Statuses" />
64
+ <ColumnFamily CompareWith="UTF8Type" Name="StatusAudits" />
65
+ <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="StatusRelationships" />
66
+ <ColumnFamily CompareWith="UTF8Type" ColumnType="Super" Name="Index" />
67
+ <ColumnFamily CompareWith="BytesType" ColumnType="Standard" Name="TimelinishThings" />
68
+
69
+ <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
70
+ <ReplicationFactor>1</ReplicationFactor>
71
+ <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
72
+ </Keyspace>
73
+
74
+ <Keyspace Name="Multiblog">
75
+ <KeysCachedFraction>0.01</KeysCachedFraction>
76
+ <ColumnFamily CompareWith="TimeUUIDType" Name="Blogs"/>
77
+ <ColumnFamily CompareWith="TimeUUIDType" Name="Comments"/>
78
+
79
+ <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
80
+ <ReplicationFactor>1</ReplicationFactor>
81
+ <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
82
+ </Keyspace>
83
+
84
+ <Keyspace Name="MultiblogLong">
85
+ <KeysCachedFraction>0.01</KeysCachedFraction>
86
+ <ColumnFamily CompareWith="LongType" Name="Blogs"/>
87
+ <ColumnFamily CompareWith="LongType" Name="Comments"/>
88
+
89
+ <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
90
+ <ReplicationFactor>1</ReplicationFactor>
91
+ <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
92
+ </Keyspace>
93
+
94
+ <Keyspace Name="TypeConversions">
95
+ <KeysCachedFraction>0.01</KeysCachedFraction>
96
+ <ColumnFamily CompareWith="TimeUUIDType" Name="UUIDColumnConversion"/>
97
+ <ColumnFamily CompareWith="TimeUUIDType" CompareSubcolumnsWith="UTF8Type" ColumnType="Super" Name="SuperUUID"/>
98
+
99
+ <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
100
+ <ReplicationFactor>1</ReplicationFactor>
101
+ <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
102
+ </Keyspace>
103
+
104
+ <Keyspace Name="CassandraObject">
105
+ <KeysCachedFraction>0.01</KeysCachedFraction>
106
+ <ColumnFamily CompareWith="UTF8Type" Name="Customers" />
107
+ <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="CustomerRelationships" />
108
+ <ColumnFamily CompareWith="TimeUUIDType" Name="CustomersByLastName" />
109
+ <ColumnFamily CompareWith="UTF8Type" Name="Invoices" />
110
+ <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="InvoiceRelationships" />
111
+ <ColumnFamily CompareWith="UTF8Type" Name="InvoicesByNumber" />
112
+ <ColumnFamily CompareWith="UTF8Type" Name="Payments" />
113
+ <ColumnFamily CompareWith="UTF8Type" Name="Appointments" />
114
+ <!-- <ColumnFamily CompareWith="UTF8Type" Name="FirstNames" /> -->
115
+
116
+ <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
117
+ <ReplicationFactor>1</ReplicationFactor>
118
+ <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
119
+ </Keyspace>
120
+ </Keyspaces>
121
+
122
+ <!--
123
+ ~ Authenticator: any IAuthenticator may be used, including your own as long
124
+ ~ as it is on the classpath. Out of the box, Cassandra provides
125
+ ~ org.apache.cassandra.auth.AllowAllAuthenticator and,
126
+ ~ org.apache.cassandra.auth.SimpleAuthenticator
127
+ ~ (SimpleAuthenticator uses access.properties and passwd.properties by
128
+ ~ default).
129
+ ~
130
+ ~ If you don't specify an authenticator, AllowAllAuthenticator is used.
131
+ -->
132
+ <Authenticator>org.apache.cassandra.auth.AllowAllAuthenticator</Authenticator>
133
+
134
+ <!--
135
+ ~ Partitioner: any IPartitioner may be used, including your own as long
136
+ ~ as it is on the classpath. Out of the box, Cassandra provides
137
+ ~ org.apache.cassandra.dht.RandomPartitioner,
138
+ ~ org.apache.cassandra.dht.OrderPreservingPartitioner, and
139
+ ~ org.apache.cassandra.dht.CollatingOrderPreservingPartitioner.
140
+ ~ (CollatingOPP colates according to EN,US rules, not naive byte
141
+ ~ ordering. Use this as an example if you need locale-aware collation.)
142
+ ~ Range queries require using an order-preserving partitioner.
143
+ ~
144
+ ~ Achtung! Changing this parameter requires wiping your data
145
+ ~ directories, since the partitioner can modify the sstable on-disk
146
+ ~ format.
147
+ -->
148
+ <Partitioner>org.apache.cassandra.dht.RandomPartitioner</Partitioner>
149
+
150
+ <!--
151
+ ~ If you are using an order-preserving partitioner and you know your key
152
+ ~ distribution, you can specify the token for this node to use. (Keys
153
+ ~ are sent to the node with the "closest" token, so distributing your
154
+ ~ tokens equally along the key distribution space will spread keys
155
+ ~ evenly across your cluster.) This setting is only checked the first
156
+ ~ time a node is started.
157
+
158
+ ~ This can also be useful with RandomPartitioner to force equal spacing
159
+ ~ of tokens around the hash space, especially for clusters with a small
160
+ ~ number of nodes.
161
+ -->
162
+ <InitialToken></InitialToken>
163
+
164
+ <!--
165
+ ~ Directories: Specify where Cassandra should store different data on
166
+ ~ disk. Keep the data disks and the CommitLog disks separate for best
167
+ ~ performance
168
+ -->
169
+ <CommitLogDirectory>data/commitlog</CommitLogDirectory>
170
+ <DataFileDirectories>
171
+ <DataFileDirectory>data/data</DataFileDirectory>
172
+ </DataFileDirectories>
173
+ <CalloutLocation>data/callouts</CalloutLocation>
174
+ <StagingFileDirectory>data/staging</StagingFileDirectory>
175
+ <SavedCachesDirectory>data/saved_caches</SavedCachesDirectory>
176
+
177
+
178
+ <!--
179
+ ~ Addresses of hosts that are deemed contact points. Cassandra nodes
180
+ ~ use this list of hosts to find each other and learn the topology of
181
+ ~ the ring. You must change this if you are running multiple nodes!
182
+ -->
183
+ <Seeds>
184
+ <Seed>127.0.0.1</Seed>
185
+ </Seeds>
186
+
187
+
188
+ <!-- Miscellaneous -->
189
+
190
+ <!-- Time to wait for a reply from other nodes before failing the command -->
191
+ <RpcTimeoutInMillis>5000</RpcTimeoutInMillis>
192
+ <!-- Size to allow commitlog to grow to before creating a new segment -->
193
+ <CommitLogRotationThresholdInMB>128</CommitLogRotationThresholdInMB>
194
+
195
+
196
+ <!-- Local hosts and ports -->
197
+
198
+ <!--
199
+ ~ Address to bind to and tell other nodes to connect to. You _must_
200
+ ~ change this if you want multiple nodes to be able to communicate!
201
+ ~
202
+ ~ Leaving it blank leaves it up to InetAddress.getLocalHost(). This
203
+ ~ will always do the Right Thing *if* the node is properly configured
204
+ ~ (hostname, name resolution, etc), and the Right Thing is to use the
205
+ ~ address associated with the hostname (it might not be).
206
+ -->
207
+ <ListenAddress>localhost</ListenAddress>
208
+ <!-- internal communications port -->
209
+ <StoragePort>7000</StoragePort>
210
+
211
+ <!--
212
+ ~ The address to bind the Thrift RPC service to. Unlike ListenAddress
213
+ ~ above, you *can* specify 0.0.0.0 here if you want Thrift to listen on
214
+ ~ all interfaces.
215
+ ~
216
+ ~ Leaving this blank has the same effect it does for ListenAddress,
217
+ ~ (i.e. it will be based on the configured hostname of the node).
218
+ -->
219
+ <ThriftAddress>localhost</ThriftAddress>
220
+ <!-- Thrift RPC port (the port clients connect to). -->
221
+ <ThriftPort>9160</ThriftPort>
222
+ <!--
223
+ ~ Whether or not to use a framed transport for Thrift. If this option
224
+ ~ is set to true then you must also use a framed transport on the
225
+ ~ client-side, (framed and non-framed transports are not compatible).
226
+ -->
227
+ <ThriftFramedTransport>false</ThriftFramedTransport>
228
+
229
+
230
+ <!--======================================================================-->
231
+ <!-- Memory, Disk, and Performance -->
232
+ <!--======================================================================-->
233
+
234
+ <!--
235
+ ~ Access mode. mmapped i/o is substantially faster, but only practical on
236
+ ~ a 64bit machine (which notably does not include EC2 "small" instances)
237
+ ~ or relatively small datasets. "auto", the safe choice, will enable
238
+ ~ mmapping on a 64bit JVM. Other values are "mmap", "mmap_index_only"
239
+ ~ (which may allow you to get part of the benefits of mmap on a 32bit
240
+ ~ machine by mmapping only index files) and "standard".
241
+ ~ (The buffer size settings that follow only apply to standard,
242
+ ~ non-mmapped i/o.)
243
+ -->
244
+ <DiskAccessMode>auto</DiskAccessMode>
245
+
246
+ <!--
247
+ ~ Buffer size to use when performing contiguous column slices. Increase
248
+ ~ this to the size of the column slices you typically perform.
249
+ ~ (Name-based queries are performed with a buffer size of
250
+ ~ ColumnIndexSizeInKB.)
251
+ -->
252
+ <SlicedBufferSizeInKB>64</SlicedBufferSizeInKB>
253
+
254
+ <!--
255
+ ~ Buffer size to use when flushing memtables to disk. (Only one
256
+ ~ memtable is ever flushed at a time.) Increase (decrease) the index
257
+ ~ buffer size relative to the data buffer if you have few (many)
258
+ ~ columns per key. Bigger is only better _if_ your memtables get large
259
+ ~ enough to use the space. (Check in your data directory after your
260
+ ~ app has been running long enough.) -->
261
+ <FlushDataBufferSizeInMB>32</FlushDataBufferSizeInMB>
262
+ <FlushIndexBufferSizeInMB>8</FlushIndexBufferSizeInMB>
263
+
264
+ <!--
265
+ ~ Add column indexes to a row after its contents reach this size.
266
+ ~ Increase if your column values are large, or if you have a very large
267
+ ~ number of columns. The competing causes are, Cassandra has to
268
+ ~ deserialize this much of the row to read a single column, so you want
269
+ ~ it to be small - at least if you do many partial-row reads - but all
270
+ ~ the index data is read for each access, so you don't want to generate
271
+ ~ that wastefully either.
272
+ -->
273
+ <ColumnIndexSizeInKB>64</ColumnIndexSizeInKB>
274
+
275
+ <!--
276
+ ~ Flush memtable after this much data has been inserted, including
277
+ ~ overwritten data. There is one memtable per column family, and
278
+ ~ this threshold is based solely on the amount of data stored, not
279
+ ~ actual heap memory usage (there is some overhead in indexing the
280
+ ~ columns).
281
+ -->
282
+ <MemtableThroughputInMB>64</MemtableThroughputInMB>
283
+ <!--
284
+ ~ Throughput setting for Binary Memtables. Typically these are
285
+ ~ used for bulk load so you want them to be larger.
286
+ -->
287
+ <BinaryMemtableThroughputInMB>256</BinaryMemtableThroughputInMB>
288
+ <!--
289
+ ~ The maximum number of columns in millions to store in memory per
290
+ ~ ColumnFamily before flushing to disk. This is also a per-memtable
291
+ ~ setting. Use with MemtableThroughputInMB to tune memory usage.
292
+ -->
293
+ <MemtableOperationsInMillions>0.3</MemtableOperationsInMillions>
294
+ <!--
295
+ ~ The maximum time to leave a dirty memtable unflushed.
296
+ ~ (While any affected columnfamilies have unflushed data from a
297
+ ~ commit log segment, that segment cannot be deleted.)
298
+ ~ This needs to be large enough that it won't cause a flush storm
299
+ ~ of all your memtables flushing at once because none has hit
300
+ ~ the size or count thresholds yet. For production, a larger
301
+ ~ value such as 1440 is recommended.
302
+ -->
303
+ <MemtableFlushAfterMinutes>60</MemtableFlushAfterMinutes>
304
+
305
+ <!--
306
+ ~ Unlike most systems, in Cassandra writes are faster than reads, so
307
+ ~ you can afford more of those in parallel. A good rule of thumb is 2
308
+ ~ concurrent reads per processor core. Increase ConcurrentWrites to
309
+ ~ the number of clients writing at once if you enable CommitLogSync +
310
+ ~ CommitLogSyncDelay. -->
311
+ <ConcurrentReads>8</ConcurrentReads>
312
+ <ConcurrentWrites>32</ConcurrentWrites>
313
+
314
+ <!--
315
+ ~ CommitLogSync may be either "periodic" or "batch." When in batch
316
+ ~ mode, Cassandra won't ack writes until the commit log has been
317
+ ~ fsynced to disk. It will wait up to CommitLogSyncBatchWindowInMS
318
+ ~ milliseconds for other writes, before performing the sync.
319
+
320
+ ~ This is less necessary in Cassandra than in traditional databases
321
+ ~ since replication reduces the odds of losing data from a failure
322
+ ~ after writing the log entry but before it actually reaches the disk.
323
+ ~ So the other option is "timed," where writes may be acked immediately
324
+ ~ and the CommitLog is simply synced every CommitLogSyncPeriodInMS
325
+ ~ milliseconds.
326
+ -->
327
+ <CommitLogSync>periodic</CommitLogSync>
328
+ <!--
329
+ ~ Interval at which to perform syncs of the CommitLog in periodic mode.
330
+ ~ Usually the default of 10000ms is fine; increase it if your i/o
331
+ ~ load is such that syncs are taking excessively long times.
332
+ -->
333
+ <CommitLogSyncPeriodInMS>10000</CommitLogSyncPeriodInMS>
334
+ <!--
335
+ ~ Delay (in milliseconds) during which additional commit log entries
336
+ ~ may be written before fsync in batch mode. This will increase
337
+ ~ latency slightly, but can vastly improve throughput where there are
338
+ ~ many writers. Set to zero to disable (each entry will be synced
339
+ ~ individually). Reasonable values range from a minimal 0.1 to 10 or
340
+ ~ even more if throughput matters more than latency.
341
+ -->
342
+ <!-- <CommitLogSyncBatchWindowInMS>1</CommitLogSyncBatchWindowInMS> -->
343
+
344
+ <!--
345
+ ~ Time to wait before garbage-collection deletion markers. Set this to
346
+ ~ a large enough value that you are confident that the deletion marker
347
+ ~ will be propagated to all replicas by the time this many seconds has
348
+ ~ elapsed, even in the face of hardware failures. The default value is
349
+ ~ ten days.
350
+ -->
351
+ <GCGraceSeconds>864000</GCGraceSeconds>
352
+ </Storage>