right_support 2.8.12 → 2.8.15

Sign up to get free protection for your applications and to get access to all the features.
data/.rspec CHANGED
@@ -1,4 +1,3 @@
1
1
  --colour
2
2
  --format=nested
3
3
  --backtrace
4
- --debugger
data/Gemfile CHANGED
@@ -1,30 +1,24 @@
1
- source "http://rubygems.org"
1
+ source 'https://rubygems.org'
2
2
 
3
3
  # Gems that RightSupport can optionally make use of, but which it does
4
- # not require to be installed. These would be "optional dependencies"
4
+ # not require to be installed. These would be 'optional dependencies'
5
5
  # if gemspecs allowed for them.
6
6
  group :optional do
7
- gem "net-ssh", "~> 2.0"
8
- gem "rest-client", "~> 1.6"
9
- gem "addressable", "~> 2.2.7"
10
- gem "uuidtools", "~> 2.0", :require => nil
11
- gem "simple_uuid", "~> 0.2", :require => nil
12
- gem "uuid", "~> 2.3", :require => nil
13
- gem "yajl-ruby", "~> 1.1"
14
- gem "iconv", :platforms => :ruby_18
7
+ gem 'net-ssh', '~> 2.0'
8
+ gem 'mime-types', '~> 1.0' # mime-types 2.x is no longer compatible with Ruby 1.8 and mime-types 1.x is only being maintained for security issues
9
+ gem 'rest-client', '~> 1.6'
10
+ gem 'addressable', '~> 2.2.7'
11
+ gem 'uuidtools', '~> 2.0', :require => nil
12
+ gem 'simple_uuid', '~> 0.2', :require => nil
13
+ gem 'uuid', '~> 2.3', :require => nil
14
+ gem 'yajl-ruby', '~> 1.1'
15
+ gem 'iconv', :platforms => :ruby_18
15
16
  end
16
17
 
17
18
  # Gems used during test and development of RightSupport.
18
19
  group :development do
19
- gem "rake", "~> 0.9"
20
- gem "jeweler", "~> 1.8.3"
21
- gem "right_develop", "~> 1.0",
22
- :git => "git@github.com:rightscale/right_develop.git",
23
- :branch => "master"
24
- gem "ruby-debug", ">= 0.10", :platforms => :ruby_18
25
- gem "ruby-debug19", ">= 0.11.6", :platforms => :ruby_19
26
- gem "rdoc", ">= 2.4.2"
27
- gem "flexmock", "~> 1.0"
28
- gem "syntax", "~> 1.0.0" #rspec will syntax-highlight code snippets if this gem is available
29
- gem "nokogiri", "~> 1.5"
20
+ gem 'jeweler', '~> 2.0'
21
+ gem 'flexmock', '~> 1.0'
22
+ gem 'rspec', '~> 2.13.0'
23
+ gem 'cucumber', ['~> 1.0', '< 1.3.3'] # Cuke >= 1.3.3 depends on RubyGems > 2.0 without specifying that in its gemspec
30
24
  end
data/Gemfile.lock CHANGED
@@ -1,115 +1,91 @@
1
- GIT
2
- remote: git@github.com:rightscale/right_develop.git
3
- revision: dedba69a68e8b56cc88db7d6dc518d42747b0586
4
- branch: master
5
- specs:
6
- right_develop (1.2.2)
7
- actionpack (>= 2.3.0, < 4.0)
8
- builder (~> 3.0)
9
- cucumber (~> 1.0)
10
- rake (>= 0.8.7, < 0.10)
11
- right_support (~> 2.0)
12
- rspec (>= 1.3, < 3.0)
13
- trollop (>= 1.0, < 3.0)
14
-
15
1
  GEM
16
- remote: http://rubygems.org/
2
+ remote: https://rubygems.org/
17
3
  specs:
18
- actionpack (2.3.18)
19
- activesupport (= 2.3.18)
20
- rack (~> 1.1.0)
21
- activesupport (2.3.18)
22
4
  addressable (2.2.8)
23
- archive-tar-minitar (0.5.2)
24
5
  builder (3.2.2)
25
- columnize (0.3.6)
26
- cucumber (1.3.3)
6
+ cucumber (1.3.2)
27
7
  builder (>= 2.1.2)
28
8
  diff-lcs (>= 1.1.3)
29
9
  gherkin (~> 2.12.0)
30
- multi_json (~> 1.7.5)
31
- multi_test (~> 0.0.1)
32
- diff-lcs (1.2.4)
33
- flexmock (1.3.2)
34
- gherkin (2.12.0)
35
10
  multi_json (~> 1.3)
36
- git (1.2.5)
11
+ diff-lcs (1.2.5)
12
+ faraday (0.8.9)
13
+ multipart-post (~> 1.2.0)
14
+ flexmock (1.3.3)
15
+ gherkin (2.12.2)
16
+ multi_json (~> 1.3)
17
+ git (1.2.6)
18
+ github_api (0.10.1)
19
+ addressable
20
+ faraday (~> 0.8.1)
21
+ hashie (>= 1.2)
22
+ multi_json (~> 1.4)
23
+ nokogiri (~> 1.5.2)
24
+ oauth2
25
+ hashie (2.0.5)
26
+ highline (1.6.20)
37
27
  iconv (1.0.4)
38
- jeweler (1.8.4)
39
- bundler (~> 1.0)
28
+ jeweler (2.0.1)
29
+ builder
30
+ bundler (>= 1.0)
40
31
  git (>= 1.2.5)
32
+ github_api
33
+ highline (>= 1.6.15)
34
+ nokogiri (>= 1.5.10)
41
35
  rake
42
36
  rdoc
43
- json (1.7.7)
44
- linecache (0.46)
45
- rbx-require-relative (> 0.0.4)
46
- linecache19 (0.5.12)
47
- ruby_core_source (>= 0.1.4)
48
- macaddr (1.6.1)
49
- systemu (~> 2.5.0)
50
- mime-types (1.22)
51
- multi_json (1.7.7)
52
- multi_test (0.0.1)
53
- net-ssh (2.6.6)
54
- nokogiri (1.5.9)
55
- rack (1.1.6)
56
- rake (0.9.6)
57
- rbx-require-relative (0.0.9)
58
- rdoc (4.0.1)
37
+ json (1.8.1)
38
+ jwt (0.1.11)
39
+ multi_json (>= 1.5)
40
+ macaddr (1.6.5)
41
+ systemu (~> 2.6.2)
42
+ mime-types (1.25.1)
43
+ multi_json (1.8.4)
44
+ multi_xml (0.5.5)
45
+ multipart-post (1.2.0)
46
+ net-ssh (2.8.0)
47
+ nokogiri (1.5.10)
48
+ oauth2 (0.9.3)
49
+ faraday (>= 0.8, < 0.10)
50
+ jwt (~> 0.1.8)
51
+ multi_json (~> 1.3)
52
+ multi_xml (~> 0.5)
53
+ rack (~> 1.2)
54
+ rack (1.5.2)
55
+ rake (10.1.1)
56
+ rdoc (4.1.1)
59
57
  json (~> 1.4)
60
58
  rest-client (1.6.7)
61
59
  mime-types (>= 1.16)
62
- right_support (2.7.0)
63
- rspec (2.14.0)
64
- rspec-core (~> 2.14.0)
65
- rspec-expectations (~> 2.14.0)
66
- rspec-mocks (~> 2.14.0)
67
- rspec-core (2.14.0)
68
- rspec-expectations (2.14.0)
60
+ rspec (2.13.0)
61
+ rspec-core (~> 2.13.0)
62
+ rspec-expectations (~> 2.13.0)
63
+ rspec-mocks (~> 2.13.0)
64
+ rspec-core (2.13.1)
65
+ rspec-expectations (2.13.0)
69
66
  diff-lcs (>= 1.1.3, < 2.0)
70
- rspec-mocks (2.14.1)
71
- ruby-debug (0.10.4)
72
- columnize (>= 0.1)
73
- ruby-debug-base (~> 0.10.4.0)
74
- ruby-debug-base (0.10.4)
75
- linecache (>= 0.3)
76
- ruby-debug-base19 (0.11.25)
77
- columnize (>= 0.3.1)
78
- linecache19 (>= 0.5.11)
79
- ruby_core_source (>= 0.1.4)
80
- ruby-debug19 (0.11.6)
81
- columnize (>= 0.3.1)
82
- linecache19 (>= 0.5.11)
83
- ruby-debug-base19 (>= 0.11.19)
84
- ruby_core_source (0.1.5)
85
- archive-tar-minitar (>= 0.5.2)
86
- simple_uuid (0.3.0)
87
- syntax (1.0.0)
88
- systemu (2.5.2)
89
- trollop (2.0)
67
+ rspec-mocks (2.13.1)
68
+ simple_uuid (0.4.0)
69
+ systemu (2.6.3)
90
70
  uuid (2.3.7)
91
71
  macaddr (~> 1.0)
92
- uuidtools (2.1.3)
93
- yajl-ruby (1.1.0)
72
+ uuidtools (2.1.4)
73
+ yajl-ruby (1.2.0)
94
74
 
95
75
  PLATFORMS
96
76
  ruby
97
77
 
98
78
  DEPENDENCIES
99
79
  addressable (~> 2.2.7)
80
+ cucumber (~> 1.0, < 1.3.3)
100
81
  flexmock (~> 1.0)
101
82
  iconv
102
- jeweler (~> 1.8.3)
83
+ jeweler (~> 2.0)
84
+ mime-types (~> 1.0)
103
85
  net-ssh (~> 2.0)
104
- nokogiri (~> 1.5)
105
- rake (~> 0.9)
106
- rdoc (>= 2.4.2)
107
86
  rest-client (~> 1.6)
108
- right_develop (~> 1.0)!
109
- ruby-debug (>= 0.10)
110
- ruby-debug19 (>= 0.11.6)
87
+ rspec (~> 2.13.0)
111
88
  simple_uuid (~> 0.2)
112
- syntax (~> 1.0.0)
113
89
  uuid (~> 2.3)
114
90
  uuidtools (~> 2.0)
115
91
  yajl-ruby (~> 1.1)
data/Rakefile CHANGED
@@ -10,8 +10,6 @@ require 'rake/clean'
10
10
  require 'rspec/core/rake_task'
11
11
  require 'cucumber/rake/task'
12
12
 
13
- require 'right_develop/ci/rake_task'
14
-
15
13
  # But, we have a very special need, because OUR Cucumbers need to run with a pristine
16
14
  # environment that isn't polluted by RVM or RubyGems or anyone else, in order to validate
17
15
  # that RightSupport's CI harness doesn't break your app if those gems are unavailable.
@@ -63,5 +61,3 @@ tasks.jeweler.commit = false
63
61
  Jeweler::RubygemsDotOrgTasks.new
64
62
 
65
63
  CLEAN.include('pkg')
66
-
67
- RightDevelop::CI::RakeTask.new
data/VERSION CHANGED
@@ -1 +1 @@
1
- 2.8.12
1
+ 2.8.15
@@ -0,0 +1,27 @@
1
+ Feature: tools for hash manipulation
2
+ In order to successfully work with hashes
3
+ RightSupport should have a tested set of tools
4
+ So the developers can trust each method
5
+
6
+ Scenario: getting a value from a path
7
+ Given a hash map with the form {"key1":"val1", "key2":{"key3":"val2"}}
8
+ And a path array ["key2", "key3"]
9
+ When I call the deep_get method
10
+ Then The deep_get method response should be: val2
11
+
12
+ Scenario: setting a value on a path
13
+ Given a hash map with the form {"key1":{"key4":{"key5":"no"}}, "key2":{"key3":"val2"}}
14
+ And a path array ["key1", "key4", "key5"]
15
+ And a value val3
16
+ When I call the deep_set! method
17
+ Then The hash should be: {"key1":{"key4":{"key5":"val3"}}, "key2":{"key3":"val2"}}
18
+
19
+ Scenario: merging two hashes
20
+ Given a hash map with the form {"key1":"val1", "key2":{"key3":"val2"}}
21
+ When I call the deep_merge! method with the source: {"key1":{"key4":"val3"}}
22
+ Then The hash should be: {"key1":{"key4":"val3"}, "key2":{"key3":"val2"}}
23
+
24
+ Scenario: subtracting a hash
25
+ Given a hash map with the form {"key1":"val1", "key2":{"key3":"val2"}}
26
+ When I call the deep_remove! method with the source: {"key1":"val1"}
27
+ Then The hash should be: {"key2":{"key3":"val2"}}
@@ -0,0 +1,41 @@
1
+ require 'json'
2
+
3
+ Given /^a hash map with the form (.*)$/ do |hash|
4
+ @hash = JSON.parse(hash)
5
+ end
6
+
7
+ Given /^a path array (.*)$/ do |path|
8
+ @path = JSON.parse(path)
9
+ end
10
+
11
+ Given /^a value (.*)$/ do |value|
12
+ @value = value
13
+ end
14
+
15
+ When /^I call the deep_get method$/ do
16
+ @deep_get_value = RightSupport::Data::HashTools.deep_get(@hash, @path)
17
+ end
18
+
19
+ When /^I call the deep_set! method$/ do
20
+ RightSupport::Data::HashTools.deep_set!(@hash, @path, @value)
21
+ end
22
+
23
+ When /^I call the deep_merge! method with the source: (.*)$/ do |source|
24
+ RightSupport::Data::HashTools.deep_merge!(@hash, JSON.parse(source))
25
+ end
26
+
27
+ When /^I call the deep_remove! method with the source: (.*)$/ do |source|
28
+ RightSupport::Data::HashTools.deep_remove!(@hash, JSON.parse(source))
29
+ end
30
+
31
+ Then /^The hash should be: (.*)$/ do |hash|
32
+ @hash == JSON.parse(hash)
33
+ end
34
+
35
+ Then /^The deep_get method response should be: (.*)$/ do |expression|
36
+ @deep_get_value == expression
37
+ end
38
+
39
+ Then /^The generate method response should be: (.*)$/ do |expression|
40
+ @generated_value == expression
41
+ end
@@ -34,11 +34,11 @@ if require_succeeds?('cassandra/0.8')
34
34
  predicate = CassandraThrift::SlicePredicate.new(:column_names => [columns].flatten)
35
35
  else
36
36
  predicate = CassandraThrift::SlicePredicate.new(:slice_range =>
37
- CassandraThrift::SliceRange.new(
38
- :reversed => reversed,
39
- :count => count,
40
- :start => start,
41
- :finish => finish))
37
+ CassandraThrift::SliceRange.new(
38
+ :reversed => reversed,
39
+ :count => count,
40
+ :start => start,
41
+ :finish => finish))
42
42
  end
43
43
  client.get_indexed_slices(column_parent, index_clause, predicate, consistency)
44
44
  end
@@ -48,7 +48,7 @@ if require_succeeds?('cassandra/0.8')
48
48
  # rather than converting it to a Hash
49
49
  def get_range_single(column_family, options = {})
50
50
  return_empty_rows = options.delete(:return_empty_rows) || false
51
- slices_please = options.delete(:slices_not_hash) || false
51
+ slices_please = options.delete(:slices_not_hash) || false
52
52
 
53
53
  column_family, _, _, options =
54
54
  extract_and_validate_params(column_family, "", [options],
@@ -57,19 +57,19 @@ if require_succeeds?('cassandra/0.8')
57
57
  :key_count => 100,
58
58
  :columns => nil,
59
59
  :reversed => false
60
- )
61
- )
62
-
63
- results = _get_range( column_family,
64
- options[:start_key].to_s,
65
- options[:finish_key].to_s,
66
- options[:key_count],
67
- options[:columns],
68
- options[:start].to_s,
69
- options[:finish].to_s,
70
- options[:count],
71
- options[:consistency],
72
- options[:reversed] )
60
+ )
61
+ )
62
+
63
+ results = _get_range(column_family,
64
+ options[:start_key].to_s,
65
+ options[:finish_key].to_s,
66
+ options[:key_count],
67
+ options[:columns],
68
+ options[:start].to_s,
69
+ options[:finish].to_s,
70
+ options[:count],
71
+ options[:consistency],
72
+ options[:reversed])
73
73
 
74
74
  unless slices_please
75
75
  multi_key_slices_to_hash(column_family, results, return_empty_rows)
@@ -95,9 +95,9 @@ if require_succeeds?('cassandra/0.8')
95
95
  end
96
96
 
97
97
  key_slices = _get_indexed_slices(column_family, index_clause, columns, options[:count], options[:start],
98
- options[:finish], options[:reversed], options[:consistency])
98
+ options[:finish], options[:reversed], options[:consistency])
99
99
 
100
- key_slices.inject(OrderedHash.new){|h, key_slice| h[key_slice.key] = key_slice.columns; h}
100
+ key_slices.inject(OrderedHash.new) { |h, key_slice| h[key_slice.key] = key_slice.columns; h }
101
101
  end
102
102
  end
103
103
  end
@@ -110,12 +110,12 @@ if (RUBY_PLATFORM =~ /java/) && require_succeeds?('thrift')
110
110
  def open
111
111
  begin
112
112
  addrinfo = ::Socket::getaddrinfo(@host, @port).first
113
- @handle = ::Socket.new(addrinfo[4], ::Socket::SOCK_STREAM, 0)
113
+ @handle = ::Socket.new(addrinfo[4], ::Socket::SOCK_STREAM, 0)
114
114
  sockaddr = ::Socket.sockaddr_in(addrinfo[1], addrinfo[3])
115
115
  begin
116
116
  @handle.connect_nonblock(sockaddr)
117
117
  rescue Errno::EINPROGRESS
118
- resp = IO.select(nil, [ @handle ], nil, @timeout) # 3 lines removed here, 1 line added
118
+ resp = IO.select(nil, [@handle], nil, @timeout) # 3 lines removed here, 1 line added
119
119
  begin
120
120
  @handle.connect_nonblock(sockaddr)
121
121
  rescue Errno::EISCONN
@@ -132,8 +132,10 @@ end
132
132
 
133
133
  module RightSupport::DB
134
134
  # Exception that indicates database configuration info is missing.
135
- class MissingConfiguration < Exception; end
136
- class UnsupportedRubyVersion < Exception; end
135
+ class MissingConfiguration < Exception;
136
+ end
137
+ class UnsupportedRubyVersion < Exception;
138
+ end
137
139
  # Base class for a column family in a keyspace
138
140
  # Used to access data persisted in Cassandra
139
141
  # Provides wrappers for Cassandra client methods
@@ -144,12 +146,12 @@ module RightSupport::DB
144
146
  DEFAULT_TIMEOUT = 20
145
147
 
146
148
  # Default maximum number of rows to retrieve in one chunk
147
- DEFAULT_COUNT = 100
149
+ DEFAULT_COUNT = 100
148
150
 
149
- # Wrappers for Cassandra client
151
+ # Wrappers for Cassandra client
150
152
  class << self
151
153
 
152
- attr_reader :default_keyspace
154
+ attr_reader :default_keyspace
153
155
  attr_accessor :column_family
154
156
 
155
157
  @@current_keyspace = nil
@@ -242,8 +244,8 @@ module RightSupport::DB
242
244
  # keyspace(String):: Set the default keyspace
243
245
 
244
246
  def keyspace=(kyspc)
245
- env = ENV['RACK_ENV'] || 'development'
246
- nspace = namespace(env)
247
+ env = ENV['RACK_ENV'] || 'development'
248
+ nspace = namespace(env)
247
249
  @@default_keyspace = "#{kyspc}_#{env}"
248
250
  @@default_keyspace += "_#{nspace}" if nspace
249
251
  end
@@ -258,8 +260,8 @@ module RightSupport::DB
258
260
  # block(Proc):: Code that will be called in keyspace context
259
261
  def with_keyspace(keyspace, append_env=true, &block)
260
262
  @@current_keyspace = keyspace
261
- env = ENV['RACK_ENV'] || 'development'
262
- nspace = namespace(env)
263
+ env = ENV['RACK_ENV'] || 'development'
264
+ nspace = namespace(env)
263
265
  if append_env
264
266
  if nspace
265
267
  tail = "_#{env}_#{nspace}"
@@ -269,14 +271,14 @@ module RightSupport::DB
269
271
  @@current_keyspace += tail unless @@current_keyspace.end_with?(tail)
270
272
  end
271
273
  block.call
272
- ensure
273
- @@current_keyspace = nil
274
+ ensure
275
+ @@current_keyspace = nil
274
276
  end
275
277
 
276
278
  def get_connection(current=nil)
277
- config = env_config
279
+ config = env_config
278
280
  thrift_client_options = {
279
- :timeout => RightSupport::DB::CassandraModel::DEFAULT_TIMEOUT,
281
+ :timeout => RightSupport::DB::CassandraModel::DEFAULT_TIMEOUT,
280
282
  :server_retry_period => nil,
281
283
  }
282
284
 
@@ -289,14 +291,17 @@ module RightSupport::DB
289
291
  current
290
292
  end
291
293
 
292
- # Client connected to Cassandra server
293
- # Create connection if does not already exist
294
- # Use BinaryProtocolAccelerated if it available
294
+ # Return a Cassandra client object connected to a server and authorized to a suitable keyspace.
295
+ # Create connection if does not already exist; use BinaryProtocolAccelerated if available.
295
296
  #
296
- # === Return
297
- # (Cassandra):: Client connected to server
297
+ # This method determines the current keyspace based on the return value of self.keyspace
298
+ # which looks at the value of @@current_keyspace or @@default_keyspace to determine the keyspace it is operating
299
+ # under. If a connection already exists for the keyspace it will re-use it. If a connection does not exist,
300
+ # it will create a new persistent connection for that keyspace that can be re-used with future requests
301
+ #
302
+ # @return [Cassandra] a client object that can be used to send requests to the ring
298
303
  def conn()
299
- @@connections ||= {}
304
+ @@connections ||= {}
300
305
  @@connections[self.keyspace] = get_connection(@@connections[self.keyspace])
301
306
  @@connections[self.keyspace]
302
307
  end
@@ -351,9 +356,9 @@ module RightSupport::DB
351
356
  elsif opt[:count]
352
357
  do_op(:get, column_family, k, opt)
353
358
  else
354
- opt = opt.clone
359
+ opt = opt.clone
355
360
  opt[:count] = DEFAULT_COUNT
356
- columns = Cassandra::OrderedHash.new
361
+ columns = Cassandra::OrderedHash.new
357
362
  loop do
358
363
  chunk = do_op(:get, column_family, k, opt)
359
364
  columns.merge!(chunk)
@@ -375,7 +380,7 @@ module RightSupport::DB
375
380
  # index(String):: Name of secondary index
376
381
  # key(String):: Index value that each selected row is required to match
377
382
  # columns(Array|nil):: Names of columns to be retrieved, defaults to all
378
- # opt(Hash):: Request options with only :consistency used
383
+ # opt(Hash):: Request options with only :consistency and :count used
379
384
  #
380
385
  # === Block
381
386
  # Optional block that is yielded each chunk as it is retrieved as an array
@@ -384,20 +389,20 @@ module RightSupport::DB
384
389
  # === Return
385
390
  # (OrderedHash):: Rows retrieved with each key, value is columns
386
391
  def get_all_indexed_slices(index, key, columns = nil, opt = {})
387
- rows = Cassandra::OrderedHash.new
392
+ rows = Cassandra::OrderedHash.new
388
393
  start = ""
389
394
  count = opt.delete(:count) || DEFAULT_COUNT
390
- expr = do_op(:create_idx_expr, index, key, "EQ")
391
- opt = opt[:consistency] ? {:consistency => opt[:consistency]} : {}
395
+ expr = do_op(:create_idx_expr, index, key, "EQ")
396
+ opt = opt[:consistency] ? {:consistency => opt[:consistency]} : {}
392
397
  while true
393
398
  clause = do_op(:create_idx_clause, [expr], start, count)
394
- chunk = self.conn.get_indexed_slices(column_family, clause, columns, opt)
399
+ chunk = self.conn.get_indexed_slices(column_family, clause, columns, opt)
395
400
  rows.merge!(chunk)
396
401
  if chunk.size == count
397
402
  # Assume there are more chunks, use last key as start of next get
398
403
  start = chunk.keys.last
399
404
  else
400
- # This must be the last chunk
405
+ # This must be the last chunk
401
406
  break
402
407
  end
403
408
  end
@@ -407,88 +412,97 @@ module RightSupport::DB
407
412
  # This method is an attempt to circumvent the Cassandra gem limitation of returning only 100 columns for wide rows,
408
413
  # and also to help reliably iterate through a column family when the node is busy and experiencing many timeouts.
409
414
  #
410
- # Internally, it uses Cassandra#get_indexed_slices to find rows that match your index constraint; when it finds
411
- # a wide row (with more than 1,000 columns), it continues to iterate through the columns of that row
415
+ # Internally, it uses Cassandra#get_indexed_slices to find rows that match your index constraint; for each matching
416
+ # row key, it iterates through all columns of that row, in chunks, using Cassandra#get_range. This approach is less
417
+ # efficient than grabbing some column values in the initial #get_indexed_slices, but it allows us to preserve the
418
+ # natural ordering of the columns we yield, and prevents us from yielding any column twice.
419
+ #
420
+ # A row key may be yielded more than once as each "chunk" of columns from that row is
421
+ # read from the ring, but each column will be yielded exactly once.
412
422
  #
413
423
  # == Parameters:
414
424
  # @param [String] index column name
415
425
  # @param [String] index column value
416
426
  #
417
427
  # == Yields:
418
- # @yield [Array<String, Array<CassandraThrift::ColumnOrSuperColumn>>] array containing index column value passed in and an array of columns matching the index query
428
+ # @yield [row_key, columns] yields one or more times for every row that contains a matching index column, ultimately yielding EVERY column in that row
429
+ # @yieldparam [String] row_key the row key currently being processes
430
+ # @yieldparam [Array] columns an array of Cassandra CassandraThrift::ColumnOrSuperColumn objects
419
431
  def stream_all_indexed_slices(index, key)
420
- expr = do_op(:create_idx_expr, index, key, "EQ")
421
- start_row = ''
432
+ expr = do_op(:create_idx_expr, index, key, "EQ")
433
+ start_row = ''
422
434
 
423
435
  # Loop over all CF rows, with batches of X
424
436
  while (start_row != nil)
425
- # Reset these to their initieral values on every iteration thru the loop, in case
437
+ # Reset these to their initial values on every iteration thru the loop, in case
426
438
  # we backed off due to timeouts (see rescue clauses below)
427
- max_row_count = 100 # how many rows to grab at once
428
- max_initial_column_count = 1000 # how much to grab at first for each row
429
- max_additional_column_count = 1000 # how much to grab in each chunk of a long row
439
+ row_count = 100 # how many rows to grab at once
440
+ column_count = 1000 # how much to grab in each chunk of a long row
430
441
 
431
- clause = do_op(:create_idx_clause, [expr], start_row, max_row_count)
442
+ clause = do_op(:create_idx_clause, [expr], start_row, row_count)
432
443
 
433
- # Now, for each batch of rows, make sure don't ask for "ALL" columns of each row, to avoid hitting rows with a huge amount of columns,
434
- # which would cause large memory pressure here in the client, but more specially might cause long wait times and possible timeouts.
444
+ # Ask for a single column from each row, because we don't care about the column values
445
+ # in this step; we just want the row keys that contain a matching index column.
435
446
  begin
436
- rows = self.conn.get_indexed_slices(column_family, clause, :count => max_initial_column_count)
447
+ row_keys = self.conn.get_indexed_slices(column_family, clause, :count => 1).keys
437
448
  rescue Exception => e
438
449
  if retryable_read_timeout?(e)
439
- logger.error "CassandraModel#stream_all_indexed_slices retrying get_indexed_slices with fewer rows/cols due to a %s: %s @ %s (cf='%s' start_row='%s' count=%d)" %
440
- [e.class.name, e.message, e.backtrace.first, column_family, start_row, max_row_count]
441
- max_row_count /= 10 if max_row_count > 1
442
- max_initial_column_count /= 10 if max_initial_column_count > 1
450
+ logger.error "CassandraModel#stream_all_indexed_slices retrying get_indexed_slices with fewer rows due to a %s: %s @ %s (cf='%s' start_row='%s' row_count=%d)" %
451
+ [e.class.name, e.message, e.backtrace.first, column_family, start_row, row_count]
452
+ row_count /= 10 if row_count > 1
443
453
  retry
444
454
  else
445
455
  raise
446
456
  end
447
457
  end
448
458
 
449
- rows.each_pair do |row_key, columns|
450
- # We already processed this row the previous iteration
459
+ row_keys.each do |row_key|
460
+ # We already processed this row the previous iteration; skip it
451
461
  next if row_key == start_row
452
462
 
453
- yield(row_key, columns)
454
-
455
- if columns.size >= max_initial_column_count
456
- # Loop over all columns of the row (1000 at a time) starting at the last column name
457
- last_column_name = columns.last.column.name
458
- while( last_column_name != nil )
459
- begin
460
- # Retrieve a slice of this row excluding the first column
461
- # as it's already been processed.
462
- more_cols = self.conn.get_range(
463
- column_family,
464
- :start_key => row_key,
465
- :finish_key => row_key,
466
- :count => max_additional_column_count,
467
- :start => last_column_name,
468
- :slices_not_hash => true ).first.columns[1..-1]
469
- rescue Exception => e
470
- if retryable_read_timeout?(e)
471
- logger.error "CassandraModel#stream_all_indexed_slices retrying get_range with fewer rows/cols due to a %s: %s @ %s (cf='%s' row='%s' start='%s' count=%d)" %
472
- [e.class.name, e.message, e.backtrace.first, column_family, row_key, last_column_name, max_additional_column_count]
473
- max_additional_column_count /= 10 if max_additional_column_count > 1
474
- retry
475
- else
476
- raise
477
- end
463
+ start_column = ''
464
+
465
+ while start_column != nil
466
+ begin
467
+ options = {
468
+ :start_key => row_key,
469
+ :finish_key => row_key,
470
+ :start => start_column,
471
+ :count => column_count,
472
+ :slices_not_hash => true
473
+ }
474
+
475
+ columns = self.conn.get_range(column_family, options).first.columns
476
+
477
+ if columns[0].column.name == start_column
478
+ yield(row_key, columns[1..-1])
479
+ else
480
+ yield(row_key, columns)
478
481
  end
479
482
 
480
- yield(row_key, more_cols)
481
- if more_cols.size < max_additional_column_count
482
- last_column_name = nil
483
+ if columns.size >= column_count
484
+ start_column = columns.last.column.name
485
+ else
486
+ start_column = nil
487
+ end
488
+ rescue Exception => e
489
+ if retryable_read_timeout?(e)
490
+ logger.error "CassandraModel#stream_all_indexed_slices retrying get_range with fewer cols due to a %s: %s @ %s (cf='%s' row='%s' start='%s' count=%d)" %
491
+ [e.class.name, e.message, e.backtrace.first, column_family, row_key, last_column_name, column_count]
492
+ column_count /= 10 if column_count > 1
493
+ retry
483
494
  else
484
- last_column_name = more_cols.last.column.name
495
+ raise
485
496
  end
486
497
  end
487
498
  end
488
499
  end
489
500
 
490
- break if rows.size < max_row_count
491
- start_row = rows.keys.last
501
+ if row_keys.size >= row_count
502
+ start_row = row_keys.last
503
+ else
504
+ start_row = nil
505
+ end
492
506
  end
493
507
  end
494
508
 
@@ -505,14 +519,14 @@ module RightSupport::DB
505
519
  # given class as value, but object only contains values for the columns retrieved;
506
520
  # array is always empty if a block is given
507
521
  def get_indexed(index, key, columns = nil, opt = {})
508
- rows = []
522
+ rows = []
509
523
  start = ""
510
524
  count = DEFAULT_COUNT
511
- expr = do_op(:create_idx_expr, index, key, "EQ")
512
- opt = opt[:consistency] ? {:consistency => opt[:consistency]} : {}
525
+ expr = do_op(:create_idx_expr, index, key, "EQ")
526
+ opt = opt[:consistency] ? {:consistency => opt[:consistency]} : {}
513
527
  loop do
514
- clause = do_op(:create_idx_clause, [expr], start, count)
515
- chunk = do_op(:get_indexed_slices, column_family, clause, columns, opt)
528
+ clause = do_op(:create_idx_clause, [expr], start, count)
529
+ chunk = do_op(:get_indexed_slices, column_family, clause, columns, opt)
516
530
  chunk_rows = []
517
531
  chunk.each do |row_key, row_columns|
518
532
  if row_columns && row_key != start
@@ -610,10 +624,10 @@ module RightSupport::DB
610
624
  def do_op(meth, *args, &block)
611
625
  first_started_at ||= Time.now
612
626
  retries ||= 0
613
- started_at = Time.now
627
+ started_at = Time.now
614
628
 
615
629
  # cassandra functionality
616
- result = conn.send(meth, *args, &block)
630
+ result = conn.send(meth, *args, &block)
617
631
 
618
632
  # log functionality
619
633
  do_op_log(first_started_at, started_at, retries, meth, args[0], args[1])
@@ -635,7 +649,7 @@ module RightSupport::DB
635
649
  log_string = sprintf("CassandraModel %s, cf=%s, keys=%d, time=%.1fms", meth, cf, key_count, attempt_time*1000)
636
650
 
637
651
  if retries && retries > 0
638
- total_time = now - first_started_at
652
+ total_time = now - first_started_at
639
653
  log_string += sprintf(", retries=%d, total_time=%.1fms", retries, total_time*1000)
640
654
  end
641
655
 
@@ -724,7 +738,7 @@ module RightSupport::DB
724
738
  # attrs(Hash):: Attributes for object which form Cassandra row
725
739
  # with column name as key and column value as value
726
740
  def initialize(key, attrs = {})
727
- self.key = key
741
+ self.key = key
728
742
  self.attributes = attrs
729
743
  end
730
744