couchproxy 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README CHANGED
@@ -21,10 +21,11 @@ memory.
21
21
 
22
22
  == Dependencies
23
23
 
24
- * em-http-request >= 0.2.11
24
+ * em-http-request >= 0.2.15
25
25
  * json >= 1.4.6
26
26
  * json-stream >= 0.1.0
27
27
  * thin >= 1.2.7
28
+ * rbtree >= 0.3.0
28
29
  * ruby >= 1.9.1
29
30
 
30
31
  == Contact
data/Rakefile CHANGED
@@ -19,7 +19,7 @@ returning the results to the client."
19
19
  s.homepage = "http://github.com/dgraham/couchproxy"
20
20
  s.authors = ["David Graham"]
21
21
  s.files = FileList['[A-Z]*', '{bin,lib,conf}/**/*']
22
- s.test_files = FileList["test/**/*test.rb"]
22
+ s.test_files = FileList["test/**/*"]
23
23
  s.executables = %w[couchproxy]
24
24
  s.require_path = "lib"
25
25
  s.has_rdoc = true
@@ -27,6 +27,7 @@ returning the results to the client."
27
27
  s.add_dependency('json', '~> 1.4')
28
28
  s.add_dependency('json-stream', '~> 0.1')
29
29
  s.add_dependency('thin', '~> 1.2')
30
+ s.add_dependency('rbtree', '~> 0.3')
30
31
  s.required_ruby_version = '>= 1.9.1'
31
32
  end
32
33
 
@@ -35,6 +36,7 @@ Rake::GemPackageTask.new(spec) do |pkg|
35
36
  end
36
37
 
37
38
  Rake::TestTask.new(:test) do |test|
39
+ test.libs << 'test'
38
40
  test.pattern = 'test/**/*_test.rb'
39
41
  test.warning = true
40
42
  end
@@ -3,10 +3,16 @@
3
3
  $:.unshift File.dirname(__FILE__) unless
4
4
  $:.include?(File.dirname(__FILE__))
5
5
 
6
+ module CouchProxy
7
+ VERSION = '0.2.0'
8
+ end
9
+
6
10
  %w[
11
+ digest
7
12
  em-http
8
13
  json
9
14
  json/stream
15
+ rbtree
10
16
  thin
11
17
  time
12
18
  uri
@@ -18,9 +24,14 @@ $:.unshift File.dirname(__FILE__) unless
18
24
  couchproxy/node
19
25
  couchproxy/partition
20
26
  couchproxy/deferrable_body
21
- couchproxy/reducer
22
27
  couchproxy/request
23
28
  couchproxy/router
29
+ couchproxy/row_filter
30
+
31
+ couchproxy/reducer
32
+ couchproxy/reduce/base_reducer
33
+ couchproxy/reduce/map_reducer
34
+ couchproxy/reduce/reduce_reducer
24
35
 
25
36
  couchproxy/rack/base
26
37
  couchproxy/rack/all_databases
@@ -42,7 +53,3 @@ $:.unshift File.dirname(__FILE__) unless
42
53
  couchproxy/rack/uuids
43
54
  couchproxy/rack/view_cleanup
44
55
  ].each {|f| require f }
45
-
46
- module CouchProxy
47
- VERSION = '0.1.0'
48
- end
@@ -1,3 +1,5 @@
1
+ # encoding: UTF-8
2
+
1
3
  $:.unshift File.dirname(__FILE__) unless
2
4
  $:.include?(File.dirname(__FILE__))
3
5
 
@@ -1,8 +1,10 @@
1
+ # encoding: UTF-8
2
+
1
3
  module CouchProxy
4
+
2
5
  # Implements the JSON sorting rules defined at
3
6
  # http://wiki.apache.org/couchdb/View_collation.
4
7
  class Collator
5
- CLASSES = [NilClass, FalseClass, TrueClass, Numeric, String, Array, Hash]
6
8
 
7
9
  def initialize(reverse=false)
8
10
  @reverse = reverse
@@ -27,11 +29,24 @@ module CouchProxy
27
29
  private
28
30
 
29
31
  def compare_class(a, b)
30
- aix = CLASSES.find_index {|c| a.is_a?(c) }
31
- bix = CLASSES.find_index {|c| b.is_a?(c) }
32
+ # optimize common case
33
+ return 0 if a.class == b.class
34
+ aix, bix = class_index(a), class_index(b)
32
35
  aix == bix ? 0 : aix < bix ? -1 : 1
33
36
  end
34
37
 
38
+ def class_index(value)
39
+ case value
40
+ when NilClass then 0
41
+ when FalseClass then 1
42
+ when TrueClass then 2
43
+ when Numeric then 3
44
+ when String then 4
45
+ when Array then 5
46
+ when Hash then 6
47
+ end
48
+ end
49
+
35
50
  # FIXME Implement UCA sorting with ICU
36
51
  def compare_string(a, b)
37
52
  a <=> b
@@ -1,9 +1,14 @@
1
+ # encoding: UTF-8
2
+
1
3
  module CouchProxy
2
4
  module Rack
3
5
  class Base
6
+ APPLICATION_JSON = "application/json".freeze
7
+ TEXT_PLAIN = "text/plain;charset=utf-8".freeze
4
8
  DESIGN_ID = /^_design\/.+/
5
9
  METHODS = [:get, :put, :post, :delete, :head].freeze
6
10
  INVALID_JSON = '{"error":"bad_request","reason":"invalid UTF-8 JSON"}'.freeze
11
+ SERVER_VERSION = "CouchProxy/#{CouchProxy::VERSION}".freeze
7
12
 
8
13
  attr_reader :request, :cluster
9
14
 
@@ -12,14 +17,14 @@ module CouchProxy
12
17
  end
13
18
 
14
19
  def method_missing(name)
15
- allowed = methods.map {|m| m.to_sym } & METHODS
16
- allowed = allowed.map {|m| m.to_s.upcase }.join(',')
20
+ allowed = (methods & METHODS).map {|m| m.to_s.upcase }.sort.join(',')
17
21
  body = "{\"error\:\"method_not_allowed\",\"reason\":\"Only #{allowed} allowed\"}"
18
- send_response(405, response_headers, [body])
22
+ headers = response_headers.tap {|h| h['Allow'] = allowed }
23
+ send_response(405, headers, [body])
19
24
  end
20
25
 
21
26
  def proxy_to(node, &finish)
22
- head_proxy_to(node, finish) if @request.request_method == 'HEAD'
27
+ head_proxy_to(node, &finish) if @request.head?
23
28
 
24
29
  body, started = DeferrableBody.new, false
25
30
  uri = "#{node.uri}#{@request.fullpath}"
@@ -30,7 +35,7 @@ module CouchProxy
30
35
  unless started
31
36
  started = true
32
37
  head = normalize(res.response_header).tap do |h|
33
- h['Server'] = "CouchProxy/#{CouchProxy::VERSION}"
38
+ h['Server'] = SERVER_VERSION
34
39
  if res.response_header.location
35
40
  h['Location'] = rewrite_location(res.response_header.location)
36
41
  end
@@ -59,7 +64,7 @@ module CouchProxy
59
64
  end
60
65
 
61
66
  def proxy_to_any_partition
62
- partition = cluster.any_partition
67
+ partition = @cluster.any_partition
63
68
  request.rewrite_proxy_url!(partition.num)
64
69
  proxy_to(partition.node)
65
70
  end
@@ -67,7 +72,7 @@ module CouchProxy
67
72
  def proxy_to_all_nodes(&callback)
68
73
  method = request.request_method.downcase
69
74
  multi = EM::MultiRequest.new
70
- cluster.nodes.each do |n|
75
+ @cluster.nodes.each do |n|
71
76
  uri = "#{n.uri}#{@request.fullpath}"
72
77
  req = EM::HttpRequest.new(uri).send(method,
73
78
  :head => proxy_headers, :body => @request.content)
@@ -79,9 +84,8 @@ module CouchProxy
79
84
  def proxy_to_all_partitions(&callback)
80
85
  method = request.request_method.downcase
81
86
  multi = EM::MultiRequest.new
82
- cluster.partitions.each do |p|
83
- uri = "#{p.node.uri}#{@request.rewrite_proxy_url(p.num)}"
84
- uri << "?#{@request.query_string}" unless @request.query_string.empty?
87
+ @cluster.partitions.each do |p|
88
+ uri = "#{p.node.uri}#{@request.rewrite_proxy_url(p.num)}#{query_string}"
85
89
  multi.add EM::HttpRequest.new(uri).send(method,
86
90
  :head => proxy_headers, :body => @request.content)
87
91
  end
@@ -102,7 +106,7 @@ module CouchProxy
102
106
  end
103
107
 
104
108
  def uuids(count, &callback)
105
- http = EM::HttpRequest.new("#{cluster.any_node.uri}/_uuids?count=#{count}").get
109
+ http = EM::HttpRequest.new("#{@cluster.any_node.uri}/_uuids?count=#{count}").get
106
110
  http.errback { callback.call(nil) }
107
111
  http.callback do |res|
108
112
  if res.response_header.status == 200
@@ -147,7 +151,9 @@ module CouchProxy
147
151
  http = EM::HttpRequest.new(uri).head(:head => proxy_headers)
148
152
  http.callback do
149
153
  status = http.response_header.status
150
- headers = normalize(http.response_header)
154
+ headers = normalize(http.response_header).tap do |h|
155
+ h['Server'] = SERVER_VERSION
156
+ end
151
157
  send_response(status, headers, [])
152
158
  finish.call if finish
153
159
  end
@@ -172,11 +178,10 @@ module CouchProxy
172
178
  end
173
179
 
174
180
  def response_headers
175
- type = @request.json? ? "application/json" : "text/plain;charset=utf-8"
176
181
  {
177
- "Server" => "CouchProxy/#{CouchProxy::VERSION}",
182
+ "Server" => SERVER_VERSION,
178
183
  "Date" => Time.now.httpdate,
179
- "Content-Type" => type,
184
+ "Content-Type" => @request.json? ? APPLICATION_JSON : TEXT_PLAIN,
180
185
  "Cache-Control" => "must-revalidate"
181
186
  }
182
187
  end
@@ -186,11 +191,18 @@ module CouchProxy
186
191
  end
187
192
 
188
193
  def delete_query_param(param)
189
- value = @request.GET.delete(param)
190
- if value
194
+ @request.GET.delete(param).tap do |value|
191
195
  @request.env['QUERY_STRING'] = ::Rack::Utils.build_query(@request.GET)
192
196
  end
193
- value
197
+ end
198
+
199
+ def update_query_param(param, value)
200
+ @request[param] = value
201
+ @request.env['QUERY_STRING'] = ::Rack::Utils.build_query(@request.GET)
202
+ end
203
+
204
+ def query_string
205
+ @request.query_string.empty? ? '' : "?#{@request.query_string}"
194
206
  end
195
207
  end
196
208
  end
@@ -6,9 +6,6 @@ module CouchProxy
6
6
  QUERY = /_view\/.+$/
7
7
  INFO = /\/_info$/
8
8
  VIEW_NAME = /_view\/(.*)$/
9
- COUNT = '_count'.freeze
10
- SUM = '_sum'.freeze
11
- STATS = '_stats'.freeze
12
9
  REDUCE_ERROR = '{"error":"query_parse_error","reason":"Invalid URL parameter `reduce` for map view."}'.freeze
13
10
 
14
11
  def get
@@ -19,6 +16,20 @@ module CouchProxy
19
16
  end
20
17
  end
21
18
 
19
+ def head
20
+ case request.path_info
21
+ when QUERY
22
+ proxy_to_all_partitions do |responses|
23
+ etags = responses.map {|r| r.response_header.etag }
24
+ head = response_headers.tap do |h|
25
+ h['ETag'] = etag(etags)
26
+ end
27
+ send_response(responses.first.response_header.status, head, [])
28
+ end
29
+ else proxy_to_any_partition
30
+ end
31
+ end
32
+
22
33
  def post
23
34
  # FIXME same as get, but body can have keys in it
24
35
  end
@@ -34,7 +45,7 @@ module CouchProxy
34
45
  sender = proc do
35
46
  send_response(res.response_header.status, head, [res.response])
36
47
  end
37
- if (200...300).include?(res.response_header.status)
48
+ if success?(res)
38
49
  head.tap do |h|
39
50
  h['ETag'] = res.response_header.etag
40
51
  h['Location'] = rewrite_location(res.response_header.location)
@@ -57,10 +68,6 @@ module CouchProxy
57
68
  end
58
69
  end
59
70
 
60
- def head
61
- # FIXME
62
- end
63
-
64
71
  private
65
72
 
66
73
  def query_params
@@ -70,115 +77,118 @@ module CouchProxy
70
77
  params[:descending] = (request['descending'] == 'true')
71
78
  params[:limit] = request['limit'] || ''
72
79
  params[:limit] = params[:limit].empty? ? nil : params[:limit].to_i
73
- params[:skip] = (params[:limit] == 0) ? 0 : delete_query_param('skip').to_i
74
- delete_query_param('limit') if params[:skip] > (params[:limit] || 0)
80
+ params[:skip] = delete_query_param('skip').to_i
81
+ params[:skip] = 0 if params[:limit] == 0
82
+ update_query_param('limit', params[:limit] + params[:skip]) if params[:limit]
75
83
  params[:collator] = CouchProxy::Collator.new(params[:descending])
76
84
  end
77
85
  end
78
86
 
87
+ def send_chunk(body, chunk)
88
+ body.call(["%s\r\n%s\r\n" % [chunk.bytesize.to_s(16), chunk]])
89
+ end
90
+
79
91
  def query
80
92
  params = query_params
81
- proxy_to_all_partitions do |responses|
82
- view_doc do |doc|
83
- if doc
84
- fn = doc['views'][view_name]['reduce']
85
- if request['reduce'] && fn.nil?
86
- send_response(400, response_headers, [REDUCE_ERROR])
87
- elsif params[:reduce] && fn
88
- reduce(params, responses, fn)
89
- else
90
- map(params, responses)
91
- end
93
+ view_doc do |doc|
94
+ if doc
95
+ fn = doc['views'][view_name]['reduce']
96
+ if request['reduce'] && fn.nil?
97
+ send_response(400, response_headers, [REDUCE_ERROR])
98
+ elsif params[:reduce] && fn
99
+ reduce(params, fn)
92
100
  else
93
- send_error_response
101
+ map(params)
94
102
  end
103
+ else
104
+ send_error_response
95
105
  end
96
106
  end
97
107
  end
98
108
 
99
- def map(params, responses)
100
- total = {:total_rows => 0, :offset => 0, :rows =>[]}
101
- responses.each do |res|
102
- result = JSON.parse(res.response)
103
- %w[total_rows rows].each {|k| total[k.to_sym] += result[k] }
104
- end
105
- total[:rows].sort! do |a, b|
106
- key = params[:collator].compare(a['key'], b['key'])
107
- (key == 0) ? params[:collator].compare(a['id'], b['id']) : key
108
- end
109
- total[:rows].slice!(0, params[:skip])
110
- total[:rows].slice!(params[:limit], total[:rows].size) if params[:limit]
111
- total[:offset] = [params[:skip], total[:total_rows]].min
112
- send_response(responses.first.response_header.status,
113
- response_headers, [total.to_json])
114
- end
115
-
116
- def reduce(params, responses, fn)
117
- total = {:rows =>[]}
118
- responses.each do |res|
119
- result = JSON.parse(res.response)
120
- total[:rows] += result['rows']
121
- end
122
- groups = total[:rows].group_by {|row| row['key'] }
123
- case fn
124
- when SUM, COUNT
125
- sum(params, groups)
126
- when STATS
127
- stats(params, groups)
128
- else
129
- view_server(params, fn, groups)
130
- end
131
- end
132
-
133
- def view_server(params, fn, groups)
134
- reduced = {:rows => []}
135
- groups.each do |key, rows|
136
- values = rows.map {|row| row['value'] }
137
- cluster.reducer.rereduce(fn, values) do |result|
138
- success, value = result.flatten
139
- if success
140
- reduced[:rows] << {:key => key, :value => value}
141
- if reduced[:rows].size == groups.size
142
- reduced[:rows].sort! do |a, b|
143
- params[:collator].compare(a[:key], b[:key])
144
- end
145
- send_response(200, response_headers, [reduced.to_json])
146
- end
147
- else
148
- send_error_response
149
- end
150
- end
109
+ def map(params)
110
+ reducer = proc do |sources|
111
+ args = params.merge({:sources => sources})
112
+ CouchProxy::Reduce::MapReducer.new(args)
113
+ end
114
+ spray(reducer) do |total_rows|
115
+ offset = [params[:skip], total_rows].min
116
+ "\n],\"total_rows\":%s,\"offset\":%s}" % [total_rows, offset]
151
117
  end
152
118
  end
153
119
 
154
- def sum(params, groups)
155
- reduced = {:rows => []}
156
- groups.each do |key, rows|
157
- value = rows.map {|row| row['value'] }.inject(:+)
158
- reduced[:rows] << {:key => key, :value => value}
120
+ def reduce(params, fn)
121
+ reducer = proc do |sources|
122
+ args = params.merge({:sources => sources, :fn => fn,
123
+ :reducers => cluster.method(:reducer)})
124
+ CouchProxy::Reduce::ReduceReducer.new(args)
159
125
  end
160
- reduced[:rows].sort! do |a, b|
161
- params[:collator].compare(a[:key], b[:key])
162
- end
163
- send_response(200, response_headers, [reduced.to_json])
126
+ spray(reducer) {|total_rows| "\n]}" }
164
127
  end
165
128
 
166
- def stats(groups)
167
- reduced = {:rows => []}
168
- groups.each do |key, rows|
169
- values = rows.map {|row| row['value'] }
170
- min, max = values.map {|v| [v['min'], v['max']] }.flatten.minmax
171
- sum, count, sumsqr = %w[sum count sumsqr].map do |k|
172
- values.map {|v| v[k] }.inject(:+)
129
+ def spray(reducer, &finish)
130
+ body, etags = DeferrableBody.new, []
131
+
132
+ requests = cluster.partitions.map do |p|
133
+ uri = "#{p.node.uri}#{request.rewrite_proxy_url(p.num)}#{query_string}"
134
+ EM::HttpRequest.new(uri).send(request.request_method.downcase,
135
+ :head => proxy_headers, :body => request.content, :timeout => 300)
136
+ end
137
+
138
+ started = false
139
+ start = proc do
140
+ started = true
141
+ headers = response_headers.tap do |h|
142
+ h['Transfer-Encoding'] = 'chunked'
143
+ h['ETag'] = etag(etags)
144
+ end
145
+ send_response(200, headers, body)
146
+ send_chunk(body, "{\"rows\":[\n")
147
+ end
148
+
149
+ closed = false
150
+ close = proc do
151
+ unless closed
152
+ closed = true
153
+ requests.each {|req| req.close_connection }
154
+ send_error_response
173
155
  end
174
- value = {:sum => sum, :count => count, :min => min, :max => max,
175
- :sumsqr => sumsqr}
176
- reduced[:rows] << {:key => key, :value => value}
177
156
  end
178
- reduced[:rows].sort! do |a, b|
179
- params[:collator].compare(a[:key], b[:key])
157
+
158
+ total_rows = 0
159
+ reducer = reducer.call(requests)
160
+ reducer.error(&close)
161
+ reducer.results do |results|
162
+ start.call unless started
163
+ json = results.map {|row| row.to_json }.join(",\n")
164
+ json << ",\n" unless reducer.complete?
165
+ send_chunk(body, json)
166
+ end
167
+ reducer.complete do
168
+ start.call unless started
169
+ requests.each {|req| req.close_connection }
170
+ chunk = finish.call(total_rows)
171
+ [chunk, ''].each {|c| send_chunk(body, c) }
172
+ body.succeed
173
+ end
174
+
175
+ multi = EM::MultiRequest.new
176
+ requests.each do |req|
177
+ parser = JSON::Stream::Parser.new
178
+ CouchProxy::RowFilter.new(parser) do
179
+ total_rows {|total| total_rows += total }
180
+ rows do |rows, complete|
181
+ reducer.reduce(rows, req, complete)
182
+ end
183
+ end
184
+ req.stream {|chunk| parser << chunk unless closed }
185
+ req.errback(&close)
186
+ req.headers do |h|
187
+ etags << h['ETAG']
188
+ close.call unless success?(req)
189
+ end
190
+ multi.add(req)
180
191
  end
181
- send_response(200, response_headers, [reduced.to_json])
182
192
  end
183
193
 
184
194
  def info
@@ -202,6 +212,15 @@ module CouchProxy
202
212
  end
203
213
  end
204
214
 
215
+ def success?(response)
216
+ (200...300).include?(response.response_header.status)
217
+ end
218
+
219
+ def etag(etags)
220
+ etags = etags.map {|etag| etag || '' }.sort.join
221
+ '"%s"' % Digest::SHA256.hexdigest(etags)
222
+ end
223
+
205
224
  def view_doc_id
206
225
  request.doc_id.split('/')[0..1].join('/')
207
226
  end
@@ -8,8 +8,9 @@ module CouchProxy
8
8
  request.rewrite_proxy_url!(partition.num)
9
9
  proxy_to(partition.node)
10
10
  end
11
- alias :put :get
11
+ alias :put :get
12
12
  alias :delete :get
13
+ alias :head :get
13
14
  end
14
15
  end
15
16
  end
@@ -31,6 +31,10 @@ module CouchProxy
31
31
  end
32
32
  end
33
33
 
34
+ def head
35
+ # FIXME
36
+ end
37
+
34
38
  private
35
39
 
36
40
  def parse(body)
@@ -3,7 +3,8 @@
3
3
  module CouchProxy
4
4
  module Rack
5
5
  class Uuids < Base
6
- alias :get :proxy_to_any_node
6
+ alias :get :proxy_to_any_node
7
+ alias :head :proxy_to_any_node
7
8
  end
8
9
  end
9
10
  end
@@ -0,0 +1,121 @@
1
+ # encoding: UTF-8
2
+
3
+ module CouchProxy
4
+ module Reduce
5
+
6
+ # Sorts and merges results from many different source streams as the data
7
+ # arrives from CouchDB over the network. This uses constant memory space to
8
+ # do the merge so we can handle huge datasets streaming back from the
9
+ # databases. Subclasses must provide a @sorter member variable, used to
10
+ # sort streaming rows before they're processed.
11
+ class BaseReducer
12
+ KEY = 'key'.freeze
13
+ ID = 'id'.freeze
14
+
15
+ # Args should contain the following keys:
16
+ # sources: List of stream sources used to identify from where
17
+ # streaming rows are arriving.
18
+ # limit: Maximum number of rows to return. If not specified, all
19
+ # rows are returned.
20
+ # skip: Number of rows at the start of the stream to skip before
21
+ # returning the rest. If not specified, no rows are skipped.
22
+ def initialize(args)
23
+ @sources, @limit, @skip = args.values_at(:sources, :limit, :skip)
24
+ @sources = Hash[@sources.map {|s| [s, 0] }]
25
+ @listeners = Hash.new {|h, k| h[k] = [] }
26
+ @skip ||= 0
27
+ @returned, @skipped_rows = 0, 0
28
+ @rows = MultiRBTree.new.tap {|t| t.readjust(@sorter) }
29
+ end
30
+
31
+ %w[results complete error].each do |name|
32
+ define_method(name) do |&block|
33
+ @listeners[name] << block
34
+ end
35
+
36
+ define_method("notify_#{name}") do |*args|
37
+ @listeners[name].each do |block|
38
+ block.call(*args)
39
+ end
40
+ end
41
+ private "notify_#{name}"
42
+ end
43
+
44
+ # Gives the reducer more rows to process with their source connection.
45
+ # Complete must be a boolean, signaling whether this stream of rows has
46
+ # finished.
47
+ def reduce(rows, source, complete)
48
+ return if complete?
49
+ rows.each do |row|
50
+ row[:proxy_source] = source
51
+ key = [row[KEY], row[ID]]
52
+ @rows[key] = row
53
+ end
54
+ @sources[source] += rows.size
55
+ @sources.delete(source) if complete
56
+ source.pause unless complete
57
+ process do |results|
58
+ if results
59
+ results = limit(skip(results))
60
+ notify_results(results) if results.any?
61
+ notify_complete if complete?
62
+ resume_streams unless complete?
63
+ else
64
+ notify_error
65
+ end
66
+ end if process?
67
+ end
68
+
69
+ # Returns true if all streams of rows have arrived and the reduce
70
+ # processing is complete.
71
+ def complete?
72
+ @sources.empty?
73
+ end
74
+
75
+ private
76
+
77
+ def resume_streams
78
+ paused = @sources.select {|k, v| k.paused? }.keys
79
+ empty = @sources.select {|k, v| k.paused? && v == 0 }.keys
80
+ (empty.any? ? empty : paused).each {|source| source.resume }
81
+ end
82
+
83
+ def skip(sorted)
84
+ if @skip > @skipped_rows
85
+ @skipped_rows += sorted.slice!(0, @skip - @skipped_rows).size
86
+ end
87
+ sorted
88
+ end
89
+
90
+ def limit(sorted)
91
+ return sorted unless @limit
92
+ if @returned + sorted.size > @limit
93
+ sorted = sorted[0, @limit - @returned]
94
+ end
95
+ @returned += sorted.size
96
+ if @returned == @limit
97
+ [@sources, @rows].each {|arr| arr.clear }
98
+ end
99
+ sorted
100
+ end
101
+
102
+ def process(&callback)
103
+ sorted = [].tap do |rows|
104
+ rows << shift while @rows.any? && process?
105
+ end
106
+ callback.call(sorted)
107
+ end
108
+
109
+ def shift
110
+ @rows.shift.tap do |key, row|
111
+ source = row.delete(:proxy_source)
112
+ @sources[source] -= 1 if @sources.key?(source)
113
+ end[1]
114
+ end
115
+
116
+ def process?
117
+ !@sources.values.include?(0)
118
+ end
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,28 @@
1
+ # encoding: UTF-8
2
+
3
+ module CouchProxy
4
+ module Reduce
5
+
6
+ # Sorts and merges map query results from many different source streams.
7
+ class MapReducer < BaseReducer
8
+
9
+ # Args should contain the following keys:
10
+ # sources: List of stream sources used to identify from where
11
+ # streaming rows are arriving.
12
+ # limit: Maximum number of rows to return. If not specified, all
13
+ # rows are returned.
14
+ # skip: Number of rows at the start of the stream to skip before
15
+ # returning the rest. If not specified, no rows are skipped.
16
+ # collator: A CouchProxy::Collator instance used to sort rows.
17
+ def initialize(args)
18
+ collator = args[:collator]
19
+ # key = 0, id = 1
20
+ @sorter = proc do |a, b|
21
+ key = collator.compare(a[0], b[0])
22
+ (key == 0) ? collator.compare(a[1], b[1]) : key
23
+ end
24
+ super(args)
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,95 @@
1
+ # encoding: UTF-8
2
+
3
+ module CouchProxy
4
+ module Reduce
5
+
6
+ # Sorts and merges reduce query results from many different source streams.
7
+ class ReduceReducer < BaseReducer
8
+ KEY = 'key'.freeze
9
+ VALUE = 'value'.freeze
10
+ COUNT = '_count'.freeze
11
+ SUM = '_sum'.freeze
12
+ STATS = '_stats'.freeze
13
+ BUILT_INS = [COUNT, SUM, STATS]
14
+ NONE = Struct.new(:none)
15
+
16
+ # Args should contain the following keys:
17
+ # sources: List of stream sources used to identify from where
18
+ # streaming rows are arriving.
19
+ # limit: Maximum number of rows to return. If not specified, all
20
+ # rows are returned.
21
+ # skip: Number of rows at the start of the stream to skip before
22
+ # returning the rest. If not specified, no rows are skipped.
23
+ # collator: A CouchProxy::Collator instance used to sort rows.
24
+ # fn: The JavaScript reduce function to apply to the rows.
25
+ # reducers: A block that, when called, returns a CouchProxy::Reducer
26
+ # instance.
27
+ def initialize(args)
28
+ @fn, @reducers, collator = args.values_at(:fn, :reducers, :collator)
29
+ # key = 0, id = 1
30
+ @sorter = proc {|a, b| collator.compare(a[0], b[0]) }
31
+ @processes = []
32
+ super(args)
33
+ end
34
+
35
+ def complete?
36
+ super && @processes.empty?
37
+ end
38
+
39
+ private
40
+
41
+ def process(&callback)
42
+ sorted = [].tap do |rows|
43
+ while @rows.any? && process?
44
+ case @fn
45
+ when SUM, COUNT then rows << sum(next_group)
46
+ when STATS then rows << stats(next_group)
47
+ else view_server(next_group, callback)
48
+ end
49
+ end
50
+ end
51
+ callback.call(sorted) if built_in?
52
+ end
53
+
54
+ def next_group
55
+ key, row = @rows.first
56
+ @rows.bound(key, key).map { shift }
57
+ end
58
+
59
+ def view_server(rows, callback)
60
+ tracker = (@processes << {:value => NONE}).last
61
+ values = rows.map {|row| row[VALUE] }
62
+ @reducers.call.rereduce(@fn, values) do |result|
63
+ success, value = result.flatten
64
+ if success
65
+ tracker[:value] = {:key => rows.first[KEY], :value => value}
66
+ ix = @processes.index {|t| t[:value] == NONE } || @processes.size
67
+ finished = @processes.slice!(0, ix).map {|t| t[:value] }
68
+ callback.call(finished)
69
+ else
70
+ callback.call(nil)
71
+ end
72
+ end
73
+ end
74
+
75
+ def sum(rows)
76
+ value = rows.map {|row| row[VALUE] }.inject(:+)
77
+ {:key => rows.first[KEY], :value => value}
78
+ end
79
+
80
+ def stats(rows)
81
+ values = rows.map {|row| row[VALUE] }
82
+ min, max = values.map {|v| [v['min'], v['max']] }.flatten.minmax
83
+ sum, count, sumsqr = %w[sum count sumsqr].map do |k|
84
+ values.map {|v| v[k] }.inject(:+)
85
+ end
86
+ value = {:sum => sum, :count => count, :min => min, :max => max, :sumsqr => sumsqr}
87
+ {:key => rows.first[KEY], :value => value}
88
+ end
89
+
90
+ def built_in?
91
+ BUILT_INS.include?(@fn)
92
+ end
93
+ end
94
+ end
95
+ end
@@ -23,7 +23,7 @@ module CouchProxy
23
23
  end
24
24
 
25
25
  class ReduceProcess < EventMachine::Connection
26
- def initialize(unbind)
26
+ def initialize(unbind=nil)
27
27
  @unbind, @connected, @callbacks, @deferred = unbind, false, [], []
28
28
  end
29
29
 
@@ -51,7 +51,7 @@ module CouchProxy
51
51
 
52
52
  def unbind
53
53
  @connected = false
54
- @unbind.call
54
+ @unbind.call if @unbind
55
55
  end
56
56
  end
57
57
  end
@@ -1,4 +1,7 @@
1
+ # encoding: UTF-8
2
+
1
3
  module Rack
4
+
2
5
  # Add a few helper methods to Rack's Request class.
3
6
  class Request
4
7
  def json?
@@ -0,0 +1,69 @@
1
+ # encoding: UTF-8
2
+
3
+ module CouchProxy
4
+
5
+ # A JSON::Stream::Parser listener that listens for the 'rows' key
6
+ # in a CouchDB map/reduce result stream. As row objects are parsed
7
+ # they are sent to callbacks for processing. Typically the callback
8
+ # will perform some kind of reduce on the rows before sending them
9
+ # to the client.
10
+ #
11
+ # Example usage:
12
+ # parser = JSON::Stream::Parser.new
13
+ # filter = CouchProxy::RowFilter.new(parser) do
14
+ # total_rows {|total| puts total }
15
+ # rows do |rows, complete|
16
+ # # process rows, complete tells us if this is the last row
17
+ # end
18
+ # end
19
+ class RowFilter < JSON::Stream::Builder
20
+ TOTAL_ROWS = 'total_rows'.freeze
21
+ MAX_ROWS = 100
22
+
23
+ def initialize(parser, &block)
24
+ @listeners = Hash.new {|h, k| h[k] = [] }
25
+ @total_rows_key = false
26
+ super(parser)
27
+ instance_eval(&block) if block_given?
28
+ end
29
+
30
+ %w[total_rows rows].each do |name|
31
+ define_method(name) do |&block|
32
+ @listeners[name] << block
33
+ end
34
+
35
+ define_method("notify_#{name}") do |*args|
36
+ @listeners[name].each do |block|
37
+ block.call(*args)
38
+ end
39
+ end
40
+ private "notify_#{name}"
41
+ end
42
+
43
+ def key(key)
44
+ super
45
+ @total_rows_key = (@stack.size == 1 && key == TOTAL_ROWS)
46
+ end
47
+
48
+ def value(value)
49
+ super
50
+ if @total_rows_key
51
+ @total_rows_key = false
52
+ notify_total_rows(value)
53
+ end
54
+ end
55
+
56
+ def end_document
57
+ notify_rows(@stack.pop.obj['rows'], true)
58
+ end
59
+
60
+ def end_object
61
+ super
62
+ # row object complete
63
+ if @stack.size == 2 && @stack[-1].obj.size >= MAX_ROWS
64
+ notify_rows(@stack.pop.obj, false)
65
+ @stack.push(JSON::Stream::ArrayNode.new)
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,93 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'couchproxy'
4
+ require 'mock_source'
5
+ require 'test/unit'
6
+
7
+ class MapReducerTest < Test::Unit::TestCase
8
+ def setup
9
+ @collator = CouchProxy::Collator.new
10
+ end
11
+
12
+ def test_no_rows_from_one_source
13
+ source = MockSource.new('1')
14
+ reducer = CouchProxy::Reduce::MapReducer.new(
15
+ :sources => [source], :collator => @collator)
16
+ reducer.results do |results|
17
+ flunk("No results expected")
18
+ end
19
+ complete = []
20
+ reducer.complete { complete << 1 }
21
+ reducer.reduce([], source, true)
22
+ assert_equal(1, complete.inject(:+))
23
+ assert(reducer.complete?)
24
+ end
25
+
26
+ def test_no_rows_from_two_sources
27
+ sources = %w[1 2].map {|uri| MockSource.new(uri) }
28
+ reducer = CouchProxy::Reduce::MapReducer.new(
29
+ :sources => sources, :collator => @collator)
30
+ reducer.results do |results|
31
+ flunk("No results expected")
32
+ end
33
+ complete = []
34
+ reducer.complete { complete << 1 }
35
+ reducer.reduce([], sources[0], true)
36
+ assert(!reducer.complete?)
37
+ reducer.reduce([], sources[1], true)
38
+ assert_equal(1, complete.inject(:+))
39
+ assert(reducer.complete?)
40
+ end
41
+
42
+ def test_row_sorting
43
+ sources = %w[1 2].map {|uri| MockSource.new(uri) }
44
+ reducer = CouchProxy::Reduce::MapReducer.new(
45
+ :sources => sources, :collator => @collator)
46
+ rows, complete = [], []
47
+ reducer.results {|results| rows += results }
48
+ reducer.complete { complete << 1 }
49
+ reducer.reduce([{'id' => '2', 'key' => 'a', 'value' => 'v2'}], sources[1], false)
50
+ reducer.reduce([{'id' => '1', 'key' => 'a', 'value' => 'v1'}], sources[0], false)
51
+ reducer.reduce([{'id' => '3', 'key' => 'c', 'value' => 'v4'}], sources[0], true)
52
+ reducer.reduce([{'id' => '4', 'key' => 'b', 'value' => 'v3'}], sources[1], true)
53
+ assert_equal(1, complete.inject(:+))
54
+ assert(reducer.complete?)
55
+ assert_equal(4, rows.size)
56
+ assert_equal(%w[v1 v2 v3 v4], rows.map {|r| r['value'] })
57
+ end
58
+
59
+ def test_limit
60
+ sources = %w[1 2].map {|uri| MockSource.new(uri) }
61
+ reducer = CouchProxy::Reduce::MapReducer.new(
62
+ :sources => sources, :collator => @collator, :limit => 2)
63
+ rows, complete = [], []
64
+ reducer.results {|results| rows += results }
65
+ reducer.complete { complete << 1 }
66
+ reducer.reduce([{'id' => '1', 'key' => 'a', 'value' => 'v1'}], sources[0], false)
67
+ reducer.reduce([{'id' => '3', 'key' => 'c', 'value' => 'v4'}], sources[0], false)
68
+ assert(!reducer.complete?)
69
+ reducer.reduce([{'id' => '2', 'key' => 'a', 'value' => 'v2'}], sources[1], false)
70
+ assert(reducer.complete?)
71
+ reducer.reduce([{'id' => '4', 'key' => 'b', 'value' => 'v3'}], sources[1], false)
72
+ assert_equal(1, complete.inject(:+))
73
+ assert_equal(2, rows.size)
74
+ assert_equal(%w[v1 v2], rows.map {|r| r['value'] })
75
+ end
76
+
77
+ def test_skip
78
+ sources = %w[1 2].map {|uri| MockSource.new(uri) }
79
+ reducer = CouchProxy::Reduce::MapReducer.new(
80
+ :sources => sources, :collator => @collator, :skip => 2)
81
+ rows, complete = [], []
82
+ reducer.results {|results| rows += results }
83
+ reducer.complete { complete << 1 }
84
+ reducer.reduce([{'id' => '1', 'key' => 'a', 'value' => 'v1'}], sources[0], false)
85
+ reducer.reduce([{'id' => '3', 'key' => 'c', 'value' => 'v4'}], sources[0], true)
86
+ reducer.reduce([{'id' => '2', 'key' => 'a', 'value' => 'v2'}], sources[1], false)
87
+ reducer.reduce([{'id' => '4', 'key' => 'b', 'value' => 'v3'}], sources[1], true)
88
+ assert(reducer.complete?)
89
+ assert_equal(1, complete.inject(:+))
90
+ assert_equal(2, rows.size)
91
+ assert_equal(%w[v3 v4], rows.map {|r| r['value'] })
92
+ end
93
+ end
@@ -0,0 +1,32 @@
1
+ # encoding: UTF-8
2
+
3
+ # A source of rows that responds to EventMachine::Connection pause and resume
4
+ # methods. The reducer pauses sources to allow rows from slower connections
5
+ # to be read.
6
+ class MockSource
7
+ attr_reader :uri
8
+
9
+ def initialize(uri)
10
+ @uri, @paused = uri, false
11
+ end
12
+
13
+ def pause
14
+ @paused = true
15
+ end
16
+
17
+ def paused?
18
+ @paused
19
+ end
20
+
21
+ def resume
22
+ @paused = false
23
+ end
24
+
25
+ def ==(source)
26
+ @uri == source.uri
27
+ end
28
+
29
+ def <=>(source)
30
+ @uri <=> source.uri
31
+ end
32
+ end
@@ -0,0 +1,60 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'couchproxy'
4
+ require 'mock_source'
5
+ require 'test/unit'
6
+
7
+ class ReduceReducerTest < Test::Unit::TestCase
8
+ def setup
9
+ @collator = CouchProxy::Collator.new
10
+ end
11
+
12
+ def test_sum
13
+ sources = %w[1 2].map {|uri| MockSource.new(uri) }
14
+ reducer = CouchProxy::Reduce::ReduceReducer.new(
15
+ :sources => sources, :collator => @collator, :fn => '_sum')
16
+ rows, complete = [], []
17
+ reducer.results {|results| rows += results }
18
+ reducer.complete { complete << 1 }
19
+ reducer.reduce([{'key' => 'a', 'value' => 2}], sources[1], false)
20
+ reducer.reduce([{'key' => 'a', 'value' => 4}], sources[0], false)
21
+ reducer.reduce([{'key' => 'c', 'value' => 6}], sources[0], true)
22
+ reducer.reduce([{'key' => 'b', 'value' => 8}], sources[1], true)
23
+ assert_equal(1, complete.inject(:+))
24
+ assert(reducer.complete?)
25
+ assert_equal(3, rows.size)
26
+ results = rows.map {|r| r.values_at(:key, :value) }.flatten
27
+ assert_equal(['a', 6, 'b', 8, 'c', 6], results)
28
+ end
29
+
30
+ def test_stats
31
+ sources = %w[1 2].map {|uri| MockSource.new(uri) }
32
+ reducer = CouchProxy::Reduce::ReduceReducer.new(
33
+ :sources => sources, :collator => @collator, :fn => '_stats')
34
+ rows, complete = [], []
35
+ reducer.results {|results| rows += results }
36
+ reducer.complete { complete << 1 }
37
+ values = [
38
+ {'sum' => 2, 'count' => 3, 'min' => 0, 'max' => 2, 'sumsqr' => 1},
39
+ {'sum' => 4, 'count' => 6, 'min' => 1, 'max' => 3, 'sumsqr' => 2},
40
+ {'sum' => 2, 'count' => 3, 'min' => 0, 'max' => 2, 'sumsqr' => 3},
41
+ {'sum' => 2, 'count' => 3, 'min' => 0, 'max' => 2, 'sumsqr' => 4}
42
+ ]
43
+ reducer.reduce([{'key' => 'a', 'value' => values[0]}], sources[1], false)
44
+ reducer.reduce([{'key' => 'a', 'value' => values[1]}], sources[0], false)
45
+ reducer.reduce([{'key' => 'c', 'value' => values[2]}], sources[0], true)
46
+ reducer.reduce([{'key' => 'b', 'value' => values[3]}], sources[1], true)
47
+ assert_equal(1, complete.inject(:+))
48
+ assert(reducer.complete?)
49
+ assert_equal(3, rows.size)
50
+ results = rows.map {|r| r.values_at(:key, :value) }.flatten
51
+ combined = {:sum => 6, :count => 9, :min => 0, :max => 3, :sumsqr => 3}
52
+ assert_equal(['a', combined, 'b', to_sym(values[3]), 'c', to_sym(values[2])], results)
53
+ end
54
+
55
+ private
56
+
57
+ def to_sym(hash)
58
+ Hash[hash.map {|k,v| [k.to_sym, v]}]
59
+ end
60
+ end
@@ -0,0 +1,51 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'couchproxy'
4
+ require 'test/unit'
5
+
6
+ class RowFilterTest < Test::Unit::TestCase
7
+ def setup
8
+ @parser = JSON::Stream::Parser.new
9
+ @filter = CouchProxy::RowFilter.new(@parser)
10
+ end
11
+
12
+ def test_total_rows
13
+ total_rows = -1
14
+ @filter.total_rows {|total| total_rows = total }
15
+ @parser << {:total_rows => 2, :offset => 0, :rows => [
16
+ {:id => "1", :key => {:total_rows => 42}, :value => {:total_rows => 42}},
17
+ {:id => "2", :key => {:total_rows => 42}, :value => {:total_rows => 42}}
18
+ ]}.to_json
19
+ assert_equal(2, total_rows)
20
+ end
21
+
22
+ def test_total_rows_missing
23
+ total_rows = -1
24
+ @filter.total_rows {|total| total_rows = total }
25
+ @parser << {:offset => 0, :rows => [
26
+ {:id => "1", :key => nil, :value => {:total_rows => 42}}
27
+ ]}.to_json
28
+ assert_equal(-1, total_rows)
29
+ end
30
+
31
+ def test_rows_with_small_dataset
32
+ test_rows(3)
33
+ end
34
+
35
+ def test_rows_with_large_dataset
36
+ test_rows(5003)
37
+ end
38
+
39
+ private
40
+
41
+ def test_rows(count)
42
+ all_rows = []
43
+ @filter.rows {|rows| all_rows += rows }
44
+ rows = Array.new(count) do |i|
45
+ {:id => i.to_s, :key => {:rows => [42]}, :value => {:rows => [42]}}
46
+ end
47
+ @parser << {:total_rows => 2, :offset => 0, :rows => rows}.to_json
48
+ assert_equal(count, all_rows.size)
49
+ assert_equal(Array.new(count) {|i| i.to_s }, all_rows.map {|r| r['id'] })
50
+ end
51
+ end
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 0
7
- - 1
7
+ - 2
8
8
  - 0
9
- version: 0.1.0
9
+ version: 0.2.0
10
10
  platform: ruby
11
11
  authors:
12
12
  - David Graham
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-09-06 00:00:00 -06:00
17
+ date: 2011-01-03 00:00:00 -07:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -73,6 +73,20 @@ dependencies:
73
73
  version: "1.2"
74
74
  type: :runtime
75
75
  version_requirements: *id004
76
+ - !ruby/object:Gem::Dependency
77
+ name: rbtree
78
+ prerelease: false
79
+ requirement: &id005 !ruby/object:Gem::Requirement
80
+ none: false
81
+ requirements:
82
+ - - ~>
83
+ - !ruby/object:Gem::Version
84
+ segments:
85
+ - 0
86
+ - 3
87
+ version: "0.3"
88
+ type: :runtime
89
+ version_requirements: *id005
76
90
  description: |-
77
91
  CouchProxy is a simple proxy server that distributes reads and writes to a
78
92
  cluster of Apache CouchDB servers so they appear to be a single huge database.
@@ -119,13 +133,21 @@ files:
119
133
  - lib/couchproxy/rack/users.rb
120
134
  - lib/couchproxy/rack/uuids.rb
121
135
  - lib/couchproxy/rack/view_cleanup.rb
136
+ - lib/couchproxy/reduce/base_reducer.rb
137
+ - lib/couchproxy/reduce/map_reducer.rb
138
+ - lib/couchproxy/reduce/reduce_reducer.rb
122
139
  - lib/couchproxy/reducer.rb
123
140
  - lib/couchproxy/request.rb
124
141
  - lib/couchproxy/router.rb
142
+ - lib/couchproxy/row_filter.rb
125
143
  - lib/couchproxy.rb
126
144
  - lib/couchproxy.ru
127
145
  - conf/couchproxy.yml
128
146
  - test/collator_test.rb
147
+ - test/map_reducer_test.rb
148
+ - test/mock_source.rb
149
+ - test/reduce_reducer_test.rb
150
+ - test/row_filter_test.rb
129
151
  has_rdoc: true
130
152
  homepage: http://github.com/dgraham/couchproxy
131
153
  licenses: []
@@ -162,3 +184,7 @@ specification_version: 3
162
184
  summary: A proxy server for Apache CouchDB clusters.
163
185
  test_files:
164
186
  - test/collator_test.rb
187
+ - test/map_reducer_test.rb
188
+ - test/mock_source.rb
189
+ - test/reduce_reducer_test.rb
190
+ - test/row_filter_test.rb