RubyGems - cassandra-model - Versions diffs - 0.1.1 → 0.2.0 - Mend

cassandra-model 0.1.1 → 0.2.0

Files changed (10) hide show

data/Rakefile +1 -1
data/VERSION +1 -1
data/cassandra-model.gemspec +4 -3
data/lib/cassandra-model.rb +1 -1
data/lib/cassandra-model/base.rb +2 -2
data/lib/cassandra-model/persistence.rb +4 -0
data/test/cassandra_model_test.rb +6 -0
data/test/config/cassandra.yaml +427 -0
data/test/config/{log4j.properties → log4j-server.properties} +5 -5
metadata +6 -5

data/Rakefile CHANGED Viewed

@@ -54,7 +54,7 @@ Rake::RDocTask.new do |rdoc|
 end
-CASSANDRA_HOME = ENV["CASSANDRA_HOME"] || "#{ENV["HOME"]}/apache-cassandra-0.6.0"
+CASSANDRA_HOME = ENV["CASSANDRA_HOME"] || "#{ENV["HOME"]}/apache-cassandra"
 CASSANDRA_PID  = ENV["CASSANDRA_PID"] || "/tmp/cassandra.pid".freeze
 cassandra_env = ""

data/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 0.1.1
1	+ 0.2.0

data/cassandra-model.gemspec CHANGED Viewed

@@ -5,11 +5,11 @@
 Gem::Specification.new do |s|
   s.name = %q{cassandra-model}
-  s.version = "0.1.1"
+  s.version = "0.2.0"
   s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
   s.authors = ["Tien Le"]
-  s.date = %q{2010-12-22}
+  s.date = %q{2011-01-23}
   s.description = %q{Cassandra-model allows you to map ColumnFamily/SuperColumnFamily in Cassandra to Ruby objects. It was designed to be fast and simple.}
   s.email = %q{tienlx /at/ gmail /dot/ com}
   s.extra_rdoc_files = [
@@ -33,8 +33,9 @@ Gem::Specification.new do |s|
      "test/callbacks_test.rb",
      "test/cassandra_model_test.rb",
      "test/config/cassandra.in.sh",
+     "test/config/cassandra.yaml",
+     "test/config/log4j-server.properties",
      "test/config/log4j-tools.properties",
-     "test/config/log4j.properties",
      "test/config/storage-conf.xml",
      "test/test_helper.rb"
   ]

data/lib/cassandra-model.rb CHANGED Viewed

@@ -1,4 +1,4 @@
-require 'cassandra'
+require 'cassandra/0.7'
 require 'forwardable'
 require 'date'

data/lib/cassandra-model/base.rb CHANGED Viewed

@@ -59,8 +59,8 @@ module CassandraModel
       end
     end
-    attr_accessor :key, :new_record
-    attr_reader :attributes, :errors
+    attr_accessor :key, :new_record, :errors
+    attr_reader :attributes
     def initialize(attrs = {}, convert = true)
       @new_record = true

data/lib/cassandra-model/persistence.rb CHANGED Viewed

@@ -114,6 +114,10 @@ module CassandraModel
         connection.remove(column_family, key, column,
                           :consistency => @write_consistency_level || Cassandra::Consistency::QUORUM)
       end
+      def truncate!
+        connection.truncate!(column_family.to_s)
+      end
     end
   end
 end

data/test/cassandra_model_test.rb CHANGED Viewed

@@ -80,5 +80,11 @@ class CassandraModelTest < Test::Unit::TestCase
       user.save
       assert_equal ["created_at", "full_name"], @connection.get(:Users, "abc").keys
     end
+    should "truncate the column family" do
+      assert !User.all.empty?
+      User.truncate!
+      assert User.all.empty?
+    end
   end
 end

data/test/config/cassandra.yaml ADDED Viewed

@@ -0,0 +1,427 @@
+# Cassandra storage config YAML
+# NOTE:
+#   See http://wiki.apache.org/cassandra/StorageConfiguration for
+#   full explanations of configuration directives
+# /NOTE
+# The name of the cluster. This is mainly used to prevent machines in
+# one logical cluster from joining another.
+cluster_name: 'Test Cluster'
+# You should always specify InitialToken when setting up a production
+# cluster for the first time, and often when adding capacity later.
+# The principle is that each node should be given an equal slice of
+# the token ring; see http://wiki.apache.org/cassandra/Operations
+# for more details.
+#
+# If blank, Cassandra will request a token bisecting the range of
+# the heaviest-loaded existing node.  If there is no load information
+# available, such as is the case with a new cluster, it will pick
+# a random token, which will lead to hot spots.
+initial_token:
+# Set to true to make new [non-seed] nodes automatically migrate data
+# to themselves from the pre-existing nodes in the cluster.  Defaults
+# to false because you can only bootstrap N machines at a time from
+# an existing cluster of N, so if you are bringing up a cluster of
+# 10 machines with 3 seeds you would have to do it in stages.  Leaving
+# this off for the initial start simplifies that.
+auto_bootstrap: false
+# See http://wiki.apache.org/cassandra/HintedHandoff
+hinted_handoff_enabled: true
+# authentication backend, implementing IAuthenticator; used to identify users
+authenticator: org.apache.cassandra.auth.AllowAllAuthenticator
+# authorization backend, implementing IAuthority; used to limit access/provide permissions
+authority: org.apache.cassandra.auth.AllowAllAuthority
+# The partitioner is responsible for distributing rows (by key) across
+# nodes in the cluster.  Any IPartitioner may be used, including your
+# own as long as it is on the classpath.  Out of the box, Cassandra
+# provides org.apache.cassandra.dht.RandomPartitioner
+# org.apache.cassandra.dht.ByteOrderedPartitioner,
+# org.apache.cassandra.dht.OrderPreservingPartitioner (deprecated),
+# and org.apache.cassandra.dht.CollatingOrderPreservingPartitioner
+# (deprecated).
+#
+# - RandomPartitioner distributes rows across the cluster evenly by md5.
+#   When in doubt, this is the best option.
+# - ByteOrderedPartitioner orders rows lexically by key bytes.  BOP allows
+#   scanning rows in key order, but the ordering can generate hot spots
+#   for sequential insertion workloads.
+# - OrderPreservingPartitioner is an obsolete form of BOP, that stores
+# - keys in a less-efficient format and only works with keys that are
+#   UTF8-encoded Strings.
+# - CollatingOPP colates according to EN,US rules rather than lexical byte
+#   ordering.  Use this as an example if you need custom collation.
+#
+# See http://wiki.apache.org/cassandra/Operations for more on
+# partitioners and token selection.
+partitioner: org.apache.cassandra.dht.RandomPartitioner
+# directories where Cassandra should store data on disk.
+data_file_directories:
+    - /var/lib/cassandra/data
+# commit log
+commitlog_directory: /var/lib/cassandra/commitlog
+# saved caches
+saved_caches_directory: /var/lib/cassandra/saved_caches
+# Size to allow commitlog to grow to before creating a new segment
+commitlog_rotation_threshold_in_mb: 128
+# commitlog_sync may be either "periodic" or "batch."
+# When in batch mode, Cassandra won't ack writes until the commit log
+# has been fsynced to disk.  It will wait up to
+# CommitLogSyncBatchWindowInMS milliseconds for other writes, before
+# performing the sync.
+commitlog_sync: periodic
+# the other option is "timed," where writes may be acked immediately
+# and the CommitLog is simply synced every commitlog_sync_period_in_ms
+# milliseconds.
+commitlog_sync_period_in_ms: 10000
+# Addresses of hosts that are deemed contact points.
+# Cassandra nodes use this list of hosts to find each other and learn
+# the topology of the ring.  You must change this if you are running
+# multiple nodes!
+seeds:
+    - 127.0.0.1
+# Access mode.  mmapped i/o is substantially faster, but only practical on
+# a 64bit machine (which notably does not include EC2 "small" instances)
+# or relatively small datasets.  "auto", the safe choice, will enable
+# mmapping on a 64bit JVM.  Other values are "mmap", "mmap_index_only"
+# (which may allow you to get part of the benefits of mmap on a 32bit
+# machine by mmapping only index files) and "standard".
+# (The buffer size settings that follow only apply to standard,
+# non-mmapped i/o.)
+disk_access_mode: auto
+# Unlike most systems, in Cassandra writes are faster than reads, so
+# you can afford more of those in parallel.  A good rule of thumb is 2
+# concurrent reads per processor core.  Increase ConcurrentWrites to
+# the number of clients writing at once if you enable CommitLogSync +
+# CommitLogSyncDelay. -->
+concurrent_reads: 8
+concurrent_writes: 32
+# This sets the amount of memtable flush writer threads.  These will
+# be blocked by disk io, and each one will hold a memtable in memory
+# while blocked. If you have a large heap and many data directories,
+# you can increase this value for better flush performance.
+# By default this will be set to the amount of data directories defined.
+#memtable_flush_writers: 1
+# Buffer size to use when performing contiguous column slices.
+# Increase this to the size of the column slices you typically perform
+sliced_buffer_size_in_kb: 64
+# TCP port, for commands and data
+storage_port: 7000
+# Address to bind to and tell other Cassandra nodes to connect to. You
+# _must_ change this if you want multiple nodes to be able to
+# communicate!
+#
+# Leaving it blank leaves it up to InetAddress.getLocalHost(). This
+# will always do the Right Thing *if* the node is properly configured
+# (hostname, name resolution, etc), and the Right Thing is to use the
+# address associated with the hostname (it might not be).
+#
+# Setting this to 0.0.0.0 is always wrong.
+listen_address: localhost
+# The address to bind the Thrift RPC service to -- clients connect
+# here. Unlike ListenAddress above, you *can* specify 0.0.0.0 here if
+# you want Thrift to listen on all interfaces.
+#
+# Leaving this blank has the same effect it does for ListenAddress,
+# (i.e. it will be based on the configured hostname of the node).
+rpc_address: localhost
+# port for Thrift to listen for clients on
+rpc_port: 9160
+# enable or disable keepalive on rpc connections
+rpc_keepalive: true
+# uncomment to set socket buffer sizes on rpc connections
+# rpc_send_buff_size_in_bytes:
+# rpc_recv_buff_size_in_bytes:
+# Frame size for thrift (maximum field length).
+# 0 disables TFramedTransport in favor of TSocket. This option
+# is deprecated; we strongly recommend using Framed mode.
+thrift_framed_transport_size_in_mb: 15
+# The max length of a thrift message, including all fields and
+# internal thrift overhead.
+thrift_max_message_length_in_mb: 16
+# Whether or not to take a snapshot before each compaction.  Be
+# careful using this option, since Cassandra won't clean up the
+# snapshots for you.  Mostly useful if you're paranoid when there
+# is a data format change.
+snapshot_before_compaction: false
+# change this to increase the compaction thread's priority.  In java, 1 is the
+# lowest priority and that is our default.
+# compaction_thread_priority: 1
+# The threshold size in megabytes the binary memtable must grow to,
+# before it's submitted for flushing to disk.
+binary_memtable_throughput_in_mb: 256
+# Add column indexes to a row after its contents reach this size.
+# Increase if your column values are large, or if you have a very large
+# number of columns.  The competing causes are, Cassandra has to
+# deserialize this much of the row to read a single column, so you want
+# it to be small - at least if you do many partial-row reads - but all
+# the index data is read for each access, so you don't want to generate
+# that wastefully either.
+column_index_size_in_kb: 64
+# Size limit for rows being compacted in memory.  Larger rows will spill
+# over to disk and use a slower two-pass compaction process.  A message
+# will be logged specifying the row key.
+in_memory_compaction_limit_in_mb: 64
+# Time to wait for a reply from other nodes before failing the command
+rpc_timeout_in_ms: 10000
+# phi value that must be reached for a host to be marked down.
+# most users should never need to adjust this.
+# phi_convict_threshold: 8
+# endpoint_snitch -- Set this to a class that implements
+# IEndpointSnitch, which will let Cassandra know enough
+# about your network topology to route requests efficiently.
+# Out of the box, Cassandra provides
+#  - org.apache.cassandra.locator.SimpleSnitch:
+#    Treats Strategy order as proximity. This improves cache locality
+#    when disabling read repair, which can further improve throughput.
+#  - org.apache.cassandra.locator.RackInferringSnitch:
+#    Proximity is determined by rack and data center, which are
+#    assumed to correspond to the 3rd and 2nd octet of each node's
+#    IP address, respectively
+# org.apache.cassandra.locator.PropertyFileSnitch:
+#  - Proximity is determined by rack and data center, which are
+#    explicitly configured in cassandra-topology.properties.
+endpoint_snitch: org.apache.cassandra.locator.SimpleSnitch
+# dynamic_snitch -- This boolean controls whether the above snitch is
+# wrapped with a dynamic snitch, which will monitor read latencies
+# and avoid reading from hosts that have slowed (due to compaction,
+# for instance)
+dynamic_snitch: true
+# controls how often to perform the more expensive part of host score
+# calculation
+dynamic_snitch_update_interval_in_ms: 100
+# controls how often to reset all host scores, allowing a bad host to
+# possibly recover
+dynamic_snitch_reset_interval_in_ms: 600000
+# if set greater than zero and read_repair_chance is < 1.0, this will allow
+# 'pinning' of replicas to hosts in order to increase cache capacity.
+# The badness threshold will control how much worse the pinned host has to be
+# before the dynamic snitch will prefer other replicas over it.  This is
+# expressed as a double which represents a percentage.  Thus, a value of
+# 0.2 means Cassandra would continue to prefer the static snitch values
+# until the pinned host was 20% worse than the fastest.
+dynamic_snitch_badness_threshold: 0.0
+# request_scheduler -- Set this to a class that implements
+# RequestScheduler, which will schedule incoming client requests
+# according to the specific policy. This is useful for multi-tenancy
+# with a single Cassandra cluster.
+# NOTE: This is specifically for requests from the client and does
+# not affect inter node communication.
+# org.apache.cassandra.scheduler.NoScheduler - No scheduling takes place
+# org.apache.cassandra.scheduler.RoundRobinScheduler - Round robin of
+# client requests to a node with a separate queue for each
+# request_scheduler_id. The scheduler is further customized by
+# request_scheduler_options as described below.
+request_scheduler: org.apache.cassandra.scheduler.NoScheduler
+# Scheduler Options vary based on the type of scheduler
+# NoScheduler - Has no options
+# RoundRobin
+#  - throttle_limit -- The throttle_limit is the number of in-flight
+#                      requests per client.  Requests beyond
+#                      that limit are queued up until
+#                      running requests can complete.
+#                      The value of 80 here is twice the number of
+#                      concurrent_reads + concurrent_writes.
+#  - default_weight -- default_weight is optional and allows for
+#                      overriding the default which is 1.
+#  - weights -- Weights are optional and will default to 1 or the
+#               overridden default_weight. The weight translates into how
+#               many requests are handled during each turn of the
+#               RoundRobin, based on the scheduler id.
+#
+# request_scheduler_options:
+#    throttle_limit: 80
+#    default_weight: 5
+#    weights:
+#      Keyspace1: 1
+#      Keyspace2: 5
+# request_scheduler_id -- An identifer based on which to perform
+# the request scheduling. Currently the only valid option is keyspace.
+# request_scheduler_id: keyspace
+# The Index Interval determines how large the sampling of row keys
+#  is for a given SSTable. The larger the sampling, the more effective
+#  the index is at the cost of space.
+index_interval: 128
+# Keyspaces have ColumnFamilies.        (Usually 1 KS per application.)
+# ColumnFamilies have Rows.             (Dozens of CFs per KS.)
+# Rows contain Columns.                 (Many per CF.)
+# Columns contain name:value:timestamp. (Many per Row.)
+#
+# A KS is most similar to a schema, and a CF is most similar to a relational table.
+#
+# Keyspaces, ColumnFamilies, and Columns may carry additional
+# metadata that change their behavior. These are as follows:
+#
+# Keyspace required parameters:
+# - name: name of the keyspace; "system" is
+#   reserved for Cassandra Internals.
+# - replica_placement_strategy: the class that determines how replicas
+#   are distributed among nodes. Contains both the class as well as
+#   configuration information.  Must extend AbstractReplicationStrategy.
+#   Out of the box, Cassandra provides
+#     * org.apache.cassandra.locator.SimpleStrategy
+#     * org.apache.cassandra.locator.NetworkTopologyStrategy
+#     * org.apache.cassandra.locator.OldNetworkTopologyStrategy
+#
+#   SimpleStrategy merely places the first
+#   replica at the node whose token is closest to the key (as determined
+#   by the Partitioner), and additional replicas on subsequent nodes
+#   along the ring in increasing Token order.
+#
+#   With NetworkTopologyStrategy,
+#   for each datacenter, you can specify how many replicas you want
+#   on a per-keyspace basis.  Replicas are placed on different racks
+#   within each DC, if possible. This strategy also requires rack aware
+#   snitch, such as RackInferringSnitch or PropertyFileSnitch.
+#   An example:
+#    - name: Keyspace1
+#      replica_placement_strategy: org.apache.cassandra.locator.NetworkTopologyStrategy
+#      strategy_options:
+#        DC1 : 3
+#        DC2 : 2
+#        DC3 : 1
+#
+#   OldNetworkToplogyStrategy [formerly RackAwareStrategy]
+#   places one replica in each of two datacenters, and the third on a
+#   different rack in in the first.  Additional datacenters are not
+#   guaranteed to get a replica.  Additional replicas after three are placed
+#   in ring order after the third without regard to rack or datacenter.
+# - replication_factor: Number of replicas of each row
+# Keyspace optional paramaters:
+# - strategy_options: Additional information for the replication strategy.
+# - column_families:
+#     ColumnFamily required parameters:
+#     - name: name of the ColumnFamily.  Must not contain the character "-".
+#     - compare_with: tells Cassandra how to sort the columns for slicing
+#       operations. The default is BytesType, which is a straightforward
+#       lexical comparison of the bytes in each column.  Other options are
+#       AsciiType, UTF8Type, LexicalUUIDType, TimeUUIDType, LongType,
+#       and IntegerType (a generic variable-length integer type).
+#       You can also specify the fully-qualified class name to a class of
+#       your choice extending org.apache.cassandra.db.marshal.AbstractType.
+#
+#     ColumnFamily optional parameters:
+#     - keys_cached: specifies the number of keys per sstable whose
+#        locations we keep in memory in "mostly LRU" order.  (JUST the key
+#        locations, NOT any column values.) Specify a fraction (value less
+#        than 1) or an absolute number of keys to cache.  Defaults to 200000
+#        keys.
+#     - rows_cached: specifies the number of rows whose entire contents we
+#        cache in memory. Do not use this on ColumnFamilies with large rows,
+#        or ColumnFamilies with high write:read ratios. Specify a fraction
+#        (value less than 1) or an absolute number of rows to cache.
+#        Defaults to 0. (i.e. row caching is off by default)
+#     - comment: used to attach additional human-readable information about
+#        the column family to its definition.
+#     - read_repair_chance: specifies the probability with which read
+#        repairs should be invoked on non-quorum reads.  must be between 0
+#        and 1. defaults to 1.0 (always read repair).
+#     - gc_grace_seconds: specifies the time to wait before garbage
+#        collecting tombstones (deletion markers). defaults to 864000 (10
+#        days). See http://wiki.apache.org/cassandra/DistributedDeletes
+#     - default_validation_class: specifies a validator class to use for
+#        validating all the column values in the CF.
+#     NOTE:
+#     min_ must be less than max_compaction_threshold!
+#     - min_compaction_threshold: the minimum number of SSTables needed
+#        to start a minor compaction.  increasing this will cause minor
+#        compactions to start less frequently and be more intensive. setting
+#        this to 0 disables minor compactions.  defaults to 4.
+#     - max_compaction_threshold: the maximum number of SSTables allowed
+#        before a minor compaction is forced.  decreasing this will cause
+#        minor compactions to start more frequently and be less intensive.
+#        setting this to 0 disables minor compactions.  defaults to 32.
+#     /NOTE
+#     - row_cache_save_period_in_seconds: number of seconds between saving
+#        row caches.  The row caches can be saved periodically and if one
+#        exists on startup it will be loaded.
+#     - key_cache_save_period_in_seconds: number of seconds between saving
+#        key caches.  The key caches can be saved periodically and if one
+#        exists on startup it will be loaded.
+#     - memtable_flush_after_mins: The maximum time to leave a dirty table
+#        unflushed.  This should be large enough that it won't cause a flush
+#        storm of all memtables during periods of inactivity.
+#     - memtable_throughput_in_mb: The maximum size of the memtable before
+#        it is flushed.  If undefined, 1/8 * heapsize will be used.
+#     - memtable_operations_in_millions: Number of operations in millions
+#        before the memtable is flushed. If undefined, throughput / 64 * 0.3
+#        will be used.
+#     - column_metadata:
+#         Column required parameters:
+#         - name: binds a validator (and optionally an indexer) to columns
+#            with this name in any row of the enclosing column family.
+#         - validator: like cf.compare_with, an AbstractType that checks
+#            that the value of the column is well-defined.
+#         Column optional parameters:
+#         NOTE:
+#         index_name cannot be set if index_type is not also set!
+#         - index_name: User-friendly name for the index.
+#         - index_type: The type of index to be created. Currently only
+#            KEYS is supported.
+#         /NOTE
+#
+# NOTE:
+#   this keyspace definition is for demonstration purposes only.
+#   Cassandra will not load these definitions during startup. See
+#   http://wiki.apache.org/cassandra/FAQ#no_keyspaces for an explanation.
+# /NOTE
+keyspaces:
+    - name: CassandraModel
+      replica_placement_strategy: org.apache.cassandra.locator.SimpleStrategy
+      replication_factor: 1
+      column_families:
+        - name: Users
+          compare_with: BytesType
+          keys_cached: 10000
+          rows_cached: 1000
+          row_cache_save_period_in_seconds: 0
+          key_cache_save_period_in_seconds: 3600
+          memtable_flush_after_mins: 59
+          memtable_throughput_in_mb: 255
+          memtable_operations_in_millions: 0.29
+        - name: Posts
+          compare_with: BytesType
+        - name: Comments
+          column_type: Super
+          compare_with: TimeUUIDType
+          compare_subcolumns_with: BytesType

data/test/config/{log4j.properties → log4j-server.properties} RENAMED Viewed

@@ -14,8 +14,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# for production, you should probably set the root to INFO
-# and the pattern to %c instead of %l.  (%l is slower.)
+# for production, you should probably set pattern to %c instead of %l.
+# (%l is slower.)
 # output messages into a rolling log file as well as stdout
 log4j.rootLogger=INFO,stdout,R
@@ -27,12 +27,12 @@ log4j.appender.stdout.layout.ConversionPattern=%5p %d{HH:mm:ss,SSS} %m%n
 # rolling log file
 log4j.appender.R=org.apache.log4j.RollingFileAppender
-log4j.appender.file.maxFileSize=20MB
-log4j.appender.file.maxBackupIndex=50
+log4j.appender.R.maxFileSize=20MB
+log4j.appender.R.maxBackupIndex=50
 log4j.appender.R.layout=org.apache.log4j.PatternLayout
 log4j.appender.R.layout.ConversionPattern=%5p [%t] %d{ISO8601} %F (line %L) %m%n
 # Edit the next line to point to your logs directory
-log4j.appender.R.File=data/logs/system.log
+log4j.appender.R.File=/var/log/cassandra/system.log
 # Application logging options
 #log4j.logger.com.facebook=DEBUG

metadata CHANGED Viewed

@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
   prerelease: false
   segments:
   - 0
-  - 1
-  - 1
-  version: 0.1.1
+  - 2
+  - 0
+  version: 0.2.0
 platform: ruby
 authors:
 - Tien Le
@@ -14,7 +14,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2010-12-22 00:00:00 +07:00
+date: 2011-01-23 00:00:00 +07:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency
@@ -69,8 +69,9 @@ files:
 - test/callbacks_test.rb
 - test/cassandra_model_test.rb
 - test/config/cassandra.in.sh
+- test/config/cassandra.yaml
+- test/config/log4j-server.properties
 - test/config/log4j-tools.properties
-- test/config/log4j.properties
 - test/config/storage-conf.xml
 - test/test_helper.rb
 has_rdoc: true