couchproxy 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/README CHANGED
@@ -21,10 +21,11 @@ memory.
21
21
 
22
22
  == Dependencies
23
23
 
24
- * em-http-request >= 0.2.11
24
+ * em-http-request >= 0.2.15
25
25
  * json >= 1.4.6
26
26
  * json-stream >= 0.1.0
27
27
  * thin >= 1.2.7
28
+ * rbtree >= 0.3.0
28
29
  * ruby >= 1.9.1
29
30
 
30
31
  == Contact
data/Rakefile CHANGED
@@ -19,7 +19,7 @@ returning the results to the client."
19
19
  s.homepage = "http://github.com/dgraham/couchproxy"
20
20
  s.authors = ["David Graham"]
21
21
  s.files = FileList['[A-Z]*', '{bin,lib,conf}/**/*']
22
- s.test_files = FileList["test/**/*test.rb"]
22
+ s.test_files = FileList["test/**/*"]
23
23
  s.executables = %w[couchproxy]
24
24
  s.require_path = "lib"
25
25
  s.has_rdoc = true
@@ -27,6 +27,7 @@ returning the results to the client."
27
27
  s.add_dependency('json', '~> 1.4')
28
28
  s.add_dependency('json-stream', '~> 0.1')
29
29
  s.add_dependency('thin', '~> 1.2')
30
+ s.add_dependency('rbtree', '~> 0.3')
30
31
  s.required_ruby_version = '>= 1.9.1'
31
32
  end
32
33
 
@@ -35,6 +36,7 @@ Rake::GemPackageTask.new(spec) do |pkg|
35
36
  end
36
37
 
37
38
  Rake::TestTask.new(:test) do |test|
39
+ test.libs << 'test'
38
40
  test.pattern = 'test/**/*_test.rb'
39
41
  test.warning = true
40
42
  end
@@ -3,10 +3,16 @@
3
3
  $:.unshift File.dirname(__FILE__) unless
4
4
  $:.include?(File.dirname(__FILE__))
5
5
 
6
+ module CouchProxy
7
+ VERSION = '0.2.0'
8
+ end
9
+
6
10
  %w[
11
+ digest
7
12
  em-http
8
13
  json
9
14
  json/stream
15
+ rbtree
10
16
  thin
11
17
  time
12
18
  uri
@@ -18,9 +24,14 @@ $:.unshift File.dirname(__FILE__) unless
18
24
  couchproxy/node
19
25
  couchproxy/partition
20
26
  couchproxy/deferrable_body
21
- couchproxy/reducer
22
27
  couchproxy/request
23
28
  couchproxy/router
29
+ couchproxy/row_filter
30
+
31
+ couchproxy/reducer
32
+ couchproxy/reduce/base_reducer
33
+ couchproxy/reduce/map_reducer
34
+ couchproxy/reduce/reduce_reducer
24
35
 
25
36
  couchproxy/rack/base
26
37
  couchproxy/rack/all_databases
@@ -42,7 +53,3 @@ $:.unshift File.dirname(__FILE__) unless
42
53
  couchproxy/rack/uuids
43
54
  couchproxy/rack/view_cleanup
44
55
  ].each {|f| require f }
45
-
46
- module CouchProxy
47
- VERSION = '0.1.0'
48
- end
@@ -1,3 +1,5 @@
1
+ # encoding: UTF-8
2
+
1
3
  $:.unshift File.dirname(__FILE__) unless
2
4
  $:.include?(File.dirname(__FILE__))
3
5
 
@@ -1,8 +1,10 @@
1
+ # encoding: UTF-8
2
+
1
3
  module CouchProxy
4
+
2
5
  # Implements the JSON sorting rules defined at
3
6
  # http://wiki.apache.org/couchdb/View_collation.
4
7
  class Collator
5
- CLASSES = [NilClass, FalseClass, TrueClass, Numeric, String, Array, Hash]
6
8
 
7
9
  def initialize(reverse=false)
8
10
  @reverse = reverse
@@ -27,11 +29,24 @@ module CouchProxy
27
29
  private
28
30
 
29
31
  def compare_class(a, b)
30
- aix = CLASSES.find_index {|c| a.is_a?(c) }
31
- bix = CLASSES.find_index {|c| b.is_a?(c) }
32
+ # optimize common case
33
+ return 0 if a.class == b.class
34
+ aix, bix = class_index(a), class_index(b)
32
35
  aix == bix ? 0 : aix < bix ? -1 : 1
33
36
  end
34
37
 
38
+ def class_index(value)
39
+ case value
40
+ when NilClass then 0
41
+ when FalseClass then 1
42
+ when TrueClass then 2
43
+ when Numeric then 3
44
+ when String then 4
45
+ when Array then 5
46
+ when Hash then 6
47
+ end
48
+ end
49
+
35
50
  # FIXME Implement UCA sorting with ICU
36
51
  def compare_string(a, b)
37
52
  a <=> b
@@ -1,9 +1,14 @@
1
+ # encoding: UTF-8
2
+
1
3
  module CouchProxy
2
4
  module Rack
3
5
  class Base
6
+ APPLICATION_JSON = "application/json".freeze
7
+ TEXT_PLAIN = "text/plain;charset=utf-8".freeze
4
8
  DESIGN_ID = /^_design\/.+/
5
9
  METHODS = [:get, :put, :post, :delete, :head].freeze
6
10
  INVALID_JSON = '{"error":"bad_request","reason":"invalid UTF-8 JSON"}'.freeze
11
+ SERVER_VERSION = "CouchProxy/#{CouchProxy::VERSION}".freeze
7
12
 
8
13
  attr_reader :request, :cluster
9
14
 
@@ -12,14 +17,14 @@ module CouchProxy
12
17
  end
13
18
 
14
19
  def method_missing(name)
15
- allowed = methods.map {|m| m.to_sym } & METHODS
16
- allowed = allowed.map {|m| m.to_s.upcase }.join(',')
20
+ allowed = (methods & METHODS).map {|m| m.to_s.upcase }.sort.join(',')
17
21
  body = "{\"error\:\"method_not_allowed\",\"reason\":\"Only #{allowed} allowed\"}"
18
- send_response(405, response_headers, [body])
22
+ headers = response_headers.tap {|h| h['Allow'] = allowed }
23
+ send_response(405, headers, [body])
19
24
  end
20
25
 
21
26
  def proxy_to(node, &finish)
22
- head_proxy_to(node, finish) if @request.request_method == 'HEAD'
27
+ head_proxy_to(node, &finish) if @request.head?
23
28
 
24
29
  body, started = DeferrableBody.new, false
25
30
  uri = "#{node.uri}#{@request.fullpath}"
@@ -30,7 +35,7 @@ module CouchProxy
30
35
  unless started
31
36
  started = true
32
37
  head = normalize(res.response_header).tap do |h|
33
- h['Server'] = "CouchProxy/#{CouchProxy::VERSION}"
38
+ h['Server'] = SERVER_VERSION
34
39
  if res.response_header.location
35
40
  h['Location'] = rewrite_location(res.response_header.location)
36
41
  end
@@ -59,7 +64,7 @@ module CouchProxy
59
64
  end
60
65
 
61
66
  def proxy_to_any_partition
62
- partition = cluster.any_partition
67
+ partition = @cluster.any_partition
63
68
  request.rewrite_proxy_url!(partition.num)
64
69
  proxy_to(partition.node)
65
70
  end
@@ -67,7 +72,7 @@ module CouchProxy
67
72
  def proxy_to_all_nodes(&callback)
68
73
  method = request.request_method.downcase
69
74
  multi = EM::MultiRequest.new
70
- cluster.nodes.each do |n|
75
+ @cluster.nodes.each do |n|
71
76
  uri = "#{n.uri}#{@request.fullpath}"
72
77
  req = EM::HttpRequest.new(uri).send(method,
73
78
  :head => proxy_headers, :body => @request.content)
@@ -79,9 +84,8 @@ module CouchProxy
79
84
  def proxy_to_all_partitions(&callback)
80
85
  method = request.request_method.downcase
81
86
  multi = EM::MultiRequest.new
82
- cluster.partitions.each do |p|
83
- uri = "#{p.node.uri}#{@request.rewrite_proxy_url(p.num)}"
84
- uri << "?#{@request.query_string}" unless @request.query_string.empty?
87
+ @cluster.partitions.each do |p|
88
+ uri = "#{p.node.uri}#{@request.rewrite_proxy_url(p.num)}#{query_string}"
85
89
  multi.add EM::HttpRequest.new(uri).send(method,
86
90
  :head => proxy_headers, :body => @request.content)
87
91
  end
@@ -102,7 +106,7 @@ module CouchProxy
102
106
  end
103
107
 
104
108
  def uuids(count, &callback)
105
- http = EM::HttpRequest.new("#{cluster.any_node.uri}/_uuids?count=#{count}").get
109
+ http = EM::HttpRequest.new("#{@cluster.any_node.uri}/_uuids?count=#{count}").get
106
110
  http.errback { callback.call(nil) }
107
111
  http.callback do |res|
108
112
  if res.response_header.status == 200
@@ -147,7 +151,9 @@ module CouchProxy
147
151
  http = EM::HttpRequest.new(uri).head(:head => proxy_headers)
148
152
  http.callback do
149
153
  status = http.response_header.status
150
- headers = normalize(http.response_header)
154
+ headers = normalize(http.response_header).tap do |h|
155
+ h['Server'] = SERVER_VERSION
156
+ end
151
157
  send_response(status, headers, [])
152
158
  finish.call if finish
153
159
  end
@@ -172,11 +178,10 @@ module CouchProxy
172
178
  end
173
179
 
174
180
  def response_headers
175
- type = @request.json? ? "application/json" : "text/plain;charset=utf-8"
176
181
  {
177
- "Server" => "CouchProxy/#{CouchProxy::VERSION}",
182
+ "Server" => SERVER_VERSION,
178
183
  "Date" => Time.now.httpdate,
179
- "Content-Type" => type,
184
+ "Content-Type" => @request.json? ? APPLICATION_JSON : TEXT_PLAIN,
180
185
  "Cache-Control" => "must-revalidate"
181
186
  }
182
187
  end
@@ -186,11 +191,18 @@ module CouchProxy
186
191
  end
187
192
 
188
193
  def delete_query_param(param)
189
- value = @request.GET.delete(param)
190
- if value
194
+ @request.GET.delete(param).tap do |value|
191
195
  @request.env['QUERY_STRING'] = ::Rack::Utils.build_query(@request.GET)
192
196
  end
193
- value
197
+ end
198
+
199
+ def update_query_param(param, value)
200
+ @request[param] = value
201
+ @request.env['QUERY_STRING'] = ::Rack::Utils.build_query(@request.GET)
202
+ end
203
+
204
+ def query_string
205
+ @request.query_string.empty? ? '' : "?#{@request.query_string}"
194
206
  end
195
207
  end
196
208
  end
@@ -6,9 +6,6 @@ module CouchProxy
6
6
  QUERY = /_view\/.+$/
7
7
  INFO = /\/_info$/
8
8
  VIEW_NAME = /_view\/(.*)$/
9
- COUNT = '_count'.freeze
10
- SUM = '_sum'.freeze
11
- STATS = '_stats'.freeze
12
9
  REDUCE_ERROR = '{"error":"query_parse_error","reason":"Invalid URL parameter `reduce` for map view."}'.freeze
13
10
 
14
11
  def get
@@ -19,6 +16,20 @@ module CouchProxy
19
16
  end
20
17
  end
21
18
 
19
+ def head
20
+ case request.path_info
21
+ when QUERY
22
+ proxy_to_all_partitions do |responses|
23
+ etags = responses.map {|r| r.response_header.etag }
24
+ head = response_headers.tap do |h|
25
+ h['ETag'] = etag(etags)
26
+ end
27
+ send_response(responses.first.response_header.status, head, [])
28
+ end
29
+ else proxy_to_any_partition
30
+ end
31
+ end
32
+
22
33
  def post
23
34
  # FIXME same as get, but body can have keys in it
24
35
  end
@@ -34,7 +45,7 @@ module CouchProxy
34
45
  sender = proc do
35
46
  send_response(res.response_header.status, head, [res.response])
36
47
  end
37
- if (200...300).include?(res.response_header.status)
48
+ if success?(res)
38
49
  head.tap do |h|
39
50
  h['ETag'] = res.response_header.etag
40
51
  h['Location'] = rewrite_location(res.response_header.location)
@@ -57,10 +68,6 @@ module CouchProxy
57
68
  end
58
69
  end
59
70
 
60
- def head
61
- # FIXME
62
- end
63
-
64
71
  private
65
72
 
66
73
  def query_params
@@ -70,115 +77,118 @@ module CouchProxy
70
77
  params[:descending] = (request['descending'] == 'true')
71
78
  params[:limit] = request['limit'] || ''
72
79
  params[:limit] = params[:limit].empty? ? nil : params[:limit].to_i
73
- params[:skip] = (params[:limit] == 0) ? 0 : delete_query_param('skip').to_i
74
- delete_query_param('limit') if params[:skip] > (params[:limit] || 0)
80
+ params[:skip] = delete_query_param('skip').to_i
81
+ params[:skip] = 0 if params[:limit] == 0
82
+ update_query_param('limit', params[:limit] + params[:skip]) if params[:limit]
75
83
  params[:collator] = CouchProxy::Collator.new(params[:descending])
76
84
  end
77
85
  end
78
86
 
87
+ def send_chunk(body, chunk)
88
+ body.call(["%s\r\n%s\r\n" % [chunk.bytesize.to_s(16), chunk]])
89
+ end
90
+
79
91
  def query
80
92
  params = query_params
81
- proxy_to_all_partitions do |responses|
82
- view_doc do |doc|
83
- if doc
84
- fn = doc['views'][view_name]['reduce']
85
- if request['reduce'] && fn.nil?
86
- send_response(400, response_headers, [REDUCE_ERROR])
87
- elsif params[:reduce] && fn
88
- reduce(params, responses, fn)
89
- else
90
- map(params, responses)
91
- end
93
+ view_doc do |doc|
94
+ if doc
95
+ fn = doc['views'][view_name]['reduce']
96
+ if request['reduce'] && fn.nil?
97
+ send_response(400, response_headers, [REDUCE_ERROR])
98
+ elsif params[:reduce] && fn
99
+ reduce(params, fn)
92
100
  else
93
- send_error_response
101
+ map(params)
94
102
  end
103
+ else
104
+ send_error_response
95
105
  end
96
106
  end
97
107
  end
98
108
 
99
- def map(params, responses)
100
- total = {:total_rows => 0, :offset => 0, :rows =>[]}
101
- responses.each do |res|
102
- result = JSON.parse(res.response)
103
- %w[total_rows rows].each {|k| total[k.to_sym] += result[k] }
104
- end
105
- total[:rows].sort! do |a, b|
106
- key = params[:collator].compare(a['key'], b['key'])
107
- (key == 0) ? params[:collator].compare(a['id'], b['id']) : key
108
- end
109
- total[:rows].slice!(0, params[:skip])
110
- total[:rows].slice!(params[:limit], total[:rows].size) if params[:limit]
111
- total[:offset] = [params[:skip], total[:total_rows]].min
112
- send_response(responses.first.response_header.status,
113
- response_headers, [total.to_json])
114
- end
115
-
116
- def reduce(params, responses, fn)
117
- total = {:rows =>[]}
118
- responses.each do |res|
119
- result = JSON.parse(res.response)
120
- total[:rows] += result['rows']
121
- end
122
- groups = total[:rows].group_by {|row| row['key'] }
123
- case fn
124
- when SUM, COUNT
125
- sum(params, groups)
126
- when STATS
127
- stats(params, groups)
128
- else
129
- view_server(params, fn, groups)
130
- end
131
- end
132
-
133
- def view_server(params, fn, groups)
134
- reduced = {:rows => []}
135
- groups.each do |key, rows|
136
- values = rows.map {|row| row['value'] }
137
- cluster.reducer.rereduce(fn, values) do |result|
138
- success, value = result.flatten
139
- if success
140
- reduced[:rows] << {:key => key, :value => value}
141
- if reduced[:rows].size == groups.size
142
- reduced[:rows].sort! do |a, b|
143
- params[:collator].compare(a[:key], b[:key])
144
- end
145
- send_response(200, response_headers, [reduced.to_json])
146
- end
147
- else
148
- send_error_response
149
- end
150
- end
109
+ def map(params)
110
+ reducer = proc do |sources|
111
+ args = params.merge({:sources => sources})
112
+ CouchProxy::Reduce::MapReducer.new(args)
113
+ end
114
+ spray(reducer) do |total_rows|
115
+ offset = [params[:skip], total_rows].min
116
+ "\n],\"total_rows\":%s,\"offset\":%s}" % [total_rows, offset]
151
117
  end
152
118
  end
153
119
 
154
- def sum(params, groups)
155
- reduced = {:rows => []}
156
- groups.each do |key, rows|
157
- value = rows.map {|row| row['value'] }.inject(:+)
158
- reduced[:rows] << {:key => key, :value => value}
120
+ def reduce(params, fn)
121
+ reducer = proc do |sources|
122
+ args = params.merge({:sources => sources, :fn => fn,
123
+ :reducers => cluster.method(:reducer)})
124
+ CouchProxy::Reduce::ReduceReducer.new(args)
159
125
  end
160
- reduced[:rows].sort! do |a, b|
161
- params[:collator].compare(a[:key], b[:key])
162
- end
163
- send_response(200, response_headers, [reduced.to_json])
126
+ spray(reducer) {|total_rows| "\n]}" }
164
127
  end
165
128
 
166
- def stats(groups)
167
- reduced = {:rows => []}
168
- groups.each do |key, rows|
169
- values = rows.map {|row| row['value'] }
170
- min, max = values.map {|v| [v['min'], v['max']] }.flatten.minmax
171
- sum, count, sumsqr = %w[sum count sumsqr].map do |k|
172
- values.map {|v| v[k] }.inject(:+)
129
+ def spray(reducer, &finish)
130
+ body, etags = DeferrableBody.new, []
131
+
132
+ requests = cluster.partitions.map do |p|
133
+ uri = "#{p.node.uri}#{request.rewrite_proxy_url(p.num)}#{query_string}"
134
+ EM::HttpRequest.new(uri).send(request.request_method.downcase,
135
+ :head => proxy_headers, :body => request.content, :timeout => 300)
136
+ end
137
+
138
+ started = false
139
+ start = proc do
140
+ started = true
141
+ headers = response_headers.tap do |h|
142
+ h['Transfer-Encoding'] = 'chunked'
143
+ h['ETag'] = etag(etags)
144
+ end
145
+ send_response(200, headers, body)
146
+ send_chunk(body, "{\"rows\":[\n")
147
+ end
148
+
149
+ closed = false
150
+ close = proc do
151
+ unless closed
152
+ closed = true
153
+ requests.each {|req| req.close_connection }
154
+ send_error_response
173
155
  end
174
- value = {:sum => sum, :count => count, :min => min, :max => max,
175
- :sumsqr => sumsqr}
176
- reduced[:rows] << {:key => key, :value => value}
177
156
  end
178
- reduced[:rows].sort! do |a, b|
179
- params[:collator].compare(a[:key], b[:key])
157
+
158
+ total_rows = 0
159
+ reducer = reducer.call(requests)
160
+ reducer.error(&close)
161
+ reducer.results do |results|
162
+ start.call unless started
163
+ json = results.map {|row| row.to_json }.join(",\n")
164
+ json << ",\n" unless reducer.complete?
165
+ send_chunk(body, json)
166
+ end
167
+ reducer.complete do
168
+ start.call unless started
169
+ requests.each {|req| req.close_connection }
170
+ chunk = finish.call(total_rows)
171
+ [chunk, ''].each {|c| send_chunk(body, c) }
172
+ body.succeed
173
+ end
174
+
175
+ multi = EM::MultiRequest.new
176
+ requests.each do |req|
177
+ parser = JSON::Stream::Parser.new
178
+ CouchProxy::RowFilter.new(parser) do
179
+ total_rows {|total| total_rows += total }
180
+ rows do |rows, complete|
181
+ reducer.reduce(rows, req, complete)
182
+ end
183
+ end
184
+ req.stream {|chunk| parser << chunk unless closed }
185
+ req.errback(&close)
186
+ req.headers do |h|
187
+ etags << h['ETAG']
188
+ close.call unless success?(req)
189
+ end
190
+ multi.add(req)
180
191
  end
181
- send_response(200, response_headers, [reduced.to_json])
182
192
  end
183
193
 
184
194
  def info
@@ -202,6 +212,15 @@ module CouchProxy
202
212
  end
203
213
  end
204
214
 
215
+ def success?(response)
216
+ (200...300).include?(response.response_header.status)
217
+ end
218
+
219
+ def etag(etags)
220
+ etags = etags.map {|etag| etag || '' }.sort.join
221
+ '"%s"' % Digest::SHA256.hexdigest(etags)
222
+ end
223
+
205
224
  def view_doc_id
206
225
  request.doc_id.split('/')[0..1].join('/')
207
226
  end
@@ -8,8 +8,9 @@ module CouchProxy
8
8
  request.rewrite_proxy_url!(partition.num)
9
9
  proxy_to(partition.node)
10
10
  end
11
- alias :put :get
11
+ alias :put :get
12
12
  alias :delete :get
13
+ alias :head :get
13
14
  end
14
15
  end
15
16
  end
@@ -31,6 +31,10 @@ module CouchProxy
31
31
  end
32
32
  end
33
33
 
34
+ def head
35
+ # FIXME
36
+ end
37
+
34
38
  private
35
39
 
36
40
  def parse(body)
@@ -3,7 +3,8 @@
3
3
  module CouchProxy
4
4
  module Rack
5
5
  class Uuids < Base
6
- alias :get :proxy_to_any_node
6
+ alias :get :proxy_to_any_node
7
+ alias :head :proxy_to_any_node
7
8
  end
8
9
  end
9
10
  end
@@ -0,0 +1,121 @@
1
+ # encoding: UTF-8
2
+
3
+ module CouchProxy
4
+ module Reduce
5
+
6
+ # Sorts and merges results from many different source streams as the data
7
+ # arrives from CouchDB over the network. This uses constant memory space to
8
+ # do the merge so we can handle huge datasets streaming back from the
9
+ # databases. Subclasses must provide a @sorter member variable, used to
10
+ # sort streaming rows before they're processed.
11
+ class BaseReducer
12
+ KEY = 'key'.freeze
13
+ ID = 'id'.freeze
14
+
15
+ # Args should contain the following keys:
16
+ # sources: List of stream sources used to identify from where
17
+ # streaming rows are arriving.
18
+ # limit: Maximum number of rows to return. If not specified, all
19
+ # rows are returned.
20
+ # skip: Number of rows at the start of the stream to skip before
21
+ # returning the rest. If not specified, no rows are skipped.
22
+ def initialize(args)
23
+ @sources, @limit, @skip = args.values_at(:sources, :limit, :skip)
24
+ @sources = Hash[@sources.map {|s| [s, 0] }]
25
+ @listeners = Hash.new {|h, k| h[k] = [] }
26
+ @skip ||= 0
27
+ @returned, @skipped_rows = 0, 0
28
+ @rows = MultiRBTree.new.tap {|t| t.readjust(@sorter) }
29
+ end
30
+
31
+ %w[results complete error].each do |name|
32
+ define_method(name) do |&block|
33
+ @listeners[name] << block
34
+ end
35
+
36
+ define_method("notify_#{name}") do |*args|
37
+ @listeners[name].each do |block|
38
+ block.call(*args)
39
+ end
40
+ end
41
+ private "notify_#{name}"
42
+ end
43
+
44
+ # Gives the reducer more rows to process with their source connection.
45
+ # Complete must be a boolean, signaling whether this stream of rows has
46
+ # finished.
47
+ def reduce(rows, source, complete)
48
+ return if complete?
49
+ rows.each do |row|
50
+ row[:proxy_source] = source
51
+ key = [row[KEY], row[ID]]
52
+ @rows[key] = row
53
+ end
54
+ @sources[source] += rows.size
55
+ @sources.delete(source) if complete
56
+ source.pause unless complete
57
+ process do |results|
58
+ if results
59
+ results = limit(skip(results))
60
+ notify_results(results) if results.any?
61
+ notify_complete if complete?
62
+ resume_streams unless complete?
63
+ else
64
+ notify_error
65
+ end
66
+ end if process?
67
+ end
68
+
69
+ # Returns true if all streams of rows have arrived and the reduce
70
+ # processing is complete.
71
+ def complete?
72
+ @sources.empty?
73
+ end
74
+
75
+ private
76
+
77
+ def resume_streams
78
+ paused = @sources.select {|k, v| k.paused? }.keys
79
+ empty = @sources.select {|k, v| k.paused? && v == 0 }.keys
80
+ (empty.any? ? empty : paused).each {|source| source.resume }
81
+ end
82
+
83
+ def skip(sorted)
84
+ if @skip > @skipped_rows
85
+ @skipped_rows += sorted.slice!(0, @skip - @skipped_rows).size
86
+ end
87
+ sorted
88
+ end
89
+
90
+ def limit(sorted)
91
+ return sorted unless @limit
92
+ if @returned + sorted.size > @limit
93
+ sorted = sorted[0, @limit - @returned]
94
+ end
95
+ @returned += sorted.size
96
+ if @returned == @limit
97
+ [@sources, @rows].each {|arr| arr.clear }
98
+ end
99
+ sorted
100
+ end
101
+
102
+ def process(&callback)
103
+ sorted = [].tap do |rows|
104
+ rows << shift while @rows.any? && process?
105
+ end
106
+ callback.call(sorted)
107
+ end
108
+
109
+ def shift
110
+ @rows.shift.tap do |key, row|
111
+ source = row.delete(:proxy_source)
112
+ @sources[source] -= 1 if @sources.key?(source)
113
+ end[1]
114
+ end
115
+
116
+ def process?
117
+ !@sources.values.include?(0)
118
+ end
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,28 @@
1
+ # encoding: UTF-8
2
+
3
+ module CouchProxy
4
+ module Reduce
5
+
6
+ # Sorts and merges map query results from many different source streams.
7
+ class MapReducer < BaseReducer
8
+
9
+ # Args should contain the following keys:
10
+ # sources: List of stream sources used to identify from where
11
+ # streaming rows are arriving.
12
+ # limit: Maximum number of rows to return. If not specified, all
13
+ # rows are returned.
14
+ # skip: Number of rows at the start of the stream to skip before
15
+ # returning the rest. If not specified, no rows are skipped.
16
+ # collator: A CouchProxy::Collator instance used to sort rows.
17
+ def initialize(args)
18
+ collator = args[:collator]
19
+ # key = 0, id = 1
20
+ @sorter = proc do |a, b|
21
+ key = collator.compare(a[0], b[0])
22
+ (key == 0) ? collator.compare(a[1], b[1]) : key
23
+ end
24
+ super(args)
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,95 @@
1
+ # encoding: UTF-8
2
+
3
+ module CouchProxy
4
+ module Reduce
5
+
6
+ # Sorts and merges reduce query results from many different source streams.
7
+ class ReduceReducer < BaseReducer
8
+ KEY = 'key'.freeze
9
+ VALUE = 'value'.freeze
10
+ COUNT = '_count'.freeze
11
+ SUM = '_sum'.freeze
12
+ STATS = '_stats'.freeze
13
+ BUILT_INS = [COUNT, SUM, STATS]
14
+ NONE = Struct.new(:none)
15
+
16
+ # Args should contain the following keys:
17
+ # sources: List of stream sources used to identify from where
18
+ # streaming rows are arriving.
19
+ # limit: Maximum number of rows to return. If not specified, all
20
+ # rows are returned.
21
+ # skip: Number of rows at the start of the stream to skip before
22
+ # returning the rest. If not specified, no rows are skipped.
23
+ # collator: A CouchProxy::Collator instance used to sort rows.
24
+ # fn: The JavaScript reduce function to apply to the rows.
25
+ # reducers: A block that, when called, returns a CouchProxy::Reducer
26
+ # instance.
27
+ def initialize(args)
28
+ @fn, @reducers, collator = args.values_at(:fn, :reducers, :collator)
29
+ # key = 0, id = 1
30
+ @sorter = proc {|a, b| collator.compare(a[0], b[0]) }
31
+ @processes = []
32
+ super(args)
33
+ end
34
+
35
+ def complete?
36
+ super && @processes.empty?
37
+ end
38
+
39
+ private
40
+
41
+ def process(&callback)
42
+ sorted = [].tap do |rows|
43
+ while @rows.any? && process?
44
+ case @fn
45
+ when SUM, COUNT then rows << sum(next_group)
46
+ when STATS then rows << stats(next_group)
47
+ else view_server(next_group, callback)
48
+ end
49
+ end
50
+ end
51
+ callback.call(sorted) if built_in?
52
+ end
53
+
54
+ def next_group
55
+ key, row = @rows.first
56
+ @rows.bound(key, key).map { shift }
57
+ end
58
+
59
+ def view_server(rows, callback)
60
+ tracker = (@processes << {:value => NONE}).last
61
+ values = rows.map {|row| row[VALUE] }
62
+ @reducers.call.rereduce(@fn, values) do |result|
63
+ success, value = result.flatten
64
+ if success
65
+ tracker[:value] = {:key => rows.first[KEY], :value => value}
66
+ ix = @processes.index {|t| t[:value] == NONE } || @processes.size
67
+ finished = @processes.slice!(0, ix).map {|t| t[:value] }
68
+ callback.call(finished)
69
+ else
70
+ callback.call(nil)
71
+ end
72
+ end
73
+ end
74
+
75
+ def sum(rows)
76
+ value = rows.map {|row| row[VALUE] }.inject(:+)
77
+ {:key => rows.first[KEY], :value => value}
78
+ end
79
+
80
+ def stats(rows)
81
+ values = rows.map {|row| row[VALUE] }
82
+ min, max = values.map {|v| [v['min'], v['max']] }.flatten.minmax
83
+ sum, count, sumsqr = %w[sum count sumsqr].map do |k|
84
+ values.map {|v| v[k] }.inject(:+)
85
+ end
86
+ value = {:sum => sum, :count => count, :min => min, :max => max, :sumsqr => sumsqr}
87
+ {:key => rows.first[KEY], :value => value}
88
+ end
89
+
90
+ def built_in?
91
+ BUILT_INS.include?(@fn)
92
+ end
93
+ end
94
+ end
95
+ end
@@ -23,7 +23,7 @@ module CouchProxy
23
23
  end
24
24
 
25
25
  class ReduceProcess < EventMachine::Connection
26
- def initialize(unbind)
26
+ def initialize(unbind=nil)
27
27
  @unbind, @connected, @callbacks, @deferred = unbind, false, [], []
28
28
  end
29
29
 
@@ -51,7 +51,7 @@ module CouchProxy
51
51
 
52
52
  def unbind
53
53
  @connected = false
54
- @unbind.call
54
+ @unbind.call if @unbind
55
55
  end
56
56
  end
57
57
  end
@@ -1,4 +1,7 @@
1
+ # encoding: UTF-8
2
+
1
3
  module Rack
4
+
2
5
  # Add a few helper methods to Rack's Request class.
3
6
  class Request
4
7
  def json?
@@ -0,0 +1,69 @@
1
+ # encoding: UTF-8
2
+
3
+ module CouchProxy
4
+
5
+ # A JSON::Stream::Parser listener that listens for the 'rows' key
6
+ # in a CouchDB map/reduce result stream. As row objects are parsed
7
+ # they are sent to callbacks for processing. Typically the callback
8
+ # will perform some kind of reduce on the rows before sending them
9
+ # to the client.
10
+ #
11
+ # Example usage:
12
+ # parser = JSON::Stream::Parser.new
13
+ # filter = CouchProxy::RowFilter.new(parser) do
14
+ # total_rows {|total| puts total }
15
+ # rows do |rows, complete|
16
+ # # process rows, complete tells us if this is the last row
17
+ # end
18
+ # end
19
+ class RowFilter < JSON::Stream::Builder
20
+ TOTAL_ROWS = 'total_rows'.freeze
21
+ MAX_ROWS = 100
22
+
23
+ def initialize(parser, &block)
24
+ @listeners = Hash.new {|h, k| h[k] = [] }
25
+ @total_rows_key = false
26
+ super(parser)
27
+ instance_eval(&block) if block_given?
28
+ end
29
+
30
+ %w[total_rows rows].each do |name|
31
+ define_method(name) do |&block|
32
+ @listeners[name] << block
33
+ end
34
+
35
+ define_method("notify_#{name}") do |*args|
36
+ @listeners[name].each do |block|
37
+ block.call(*args)
38
+ end
39
+ end
40
+ private "notify_#{name}"
41
+ end
42
+
43
+ def key(key)
44
+ super
45
+ @total_rows_key = (@stack.size == 1 && key == TOTAL_ROWS)
46
+ end
47
+
48
+ def value(value)
49
+ super
50
+ if @total_rows_key
51
+ @total_rows_key = false
52
+ notify_total_rows(value)
53
+ end
54
+ end
55
+
56
+ def end_document
57
+ notify_rows(@stack.pop.obj['rows'], true)
58
+ end
59
+
60
+ def end_object
61
+ super
62
+ # row object complete
63
+ if @stack.size == 2 && @stack[-1].obj.size >= MAX_ROWS
64
+ notify_rows(@stack.pop.obj, false)
65
+ @stack.push(JSON::Stream::ArrayNode.new)
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,93 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'couchproxy'
4
+ require 'mock_source'
5
+ require 'test/unit'
6
+
7
+ class MapReducerTest < Test::Unit::TestCase
8
+ def setup
9
+ @collator = CouchProxy::Collator.new
10
+ end
11
+
12
+ def test_no_rows_from_one_source
13
+ source = MockSource.new('1')
14
+ reducer = CouchProxy::Reduce::MapReducer.new(
15
+ :sources => [source], :collator => @collator)
16
+ reducer.results do |results|
17
+ flunk("No results expected")
18
+ end
19
+ complete = []
20
+ reducer.complete { complete << 1 }
21
+ reducer.reduce([], source, true)
22
+ assert_equal(1, complete.inject(:+))
23
+ assert(reducer.complete?)
24
+ end
25
+
26
+ def test_no_rows_from_two_sources
27
+ sources = %w[1 2].map {|uri| MockSource.new(uri) }
28
+ reducer = CouchProxy::Reduce::MapReducer.new(
29
+ :sources => sources, :collator => @collator)
30
+ reducer.results do |results|
31
+ flunk("No results expected")
32
+ end
33
+ complete = []
34
+ reducer.complete { complete << 1 }
35
+ reducer.reduce([], sources[0], true)
36
+ assert(!reducer.complete?)
37
+ reducer.reduce([], sources[1], true)
38
+ assert_equal(1, complete.inject(:+))
39
+ assert(reducer.complete?)
40
+ end
41
+
42
+ def test_row_sorting
43
+ sources = %w[1 2].map {|uri| MockSource.new(uri) }
44
+ reducer = CouchProxy::Reduce::MapReducer.new(
45
+ :sources => sources, :collator => @collator)
46
+ rows, complete = [], []
47
+ reducer.results {|results| rows += results }
48
+ reducer.complete { complete << 1 }
49
+ reducer.reduce([{'id' => '2', 'key' => 'a', 'value' => 'v2'}], sources[1], false)
50
+ reducer.reduce([{'id' => '1', 'key' => 'a', 'value' => 'v1'}], sources[0], false)
51
+ reducer.reduce([{'id' => '3', 'key' => 'c', 'value' => 'v4'}], sources[0], true)
52
+ reducer.reduce([{'id' => '4', 'key' => 'b', 'value' => 'v3'}], sources[1], true)
53
+ assert_equal(1, complete.inject(:+))
54
+ assert(reducer.complete?)
55
+ assert_equal(4, rows.size)
56
+ assert_equal(%w[v1 v2 v3 v4], rows.map {|r| r['value'] })
57
+ end
58
+
59
+ def test_limit
60
+ sources = %w[1 2].map {|uri| MockSource.new(uri) }
61
+ reducer = CouchProxy::Reduce::MapReducer.new(
62
+ :sources => sources, :collator => @collator, :limit => 2)
63
+ rows, complete = [], []
64
+ reducer.results {|results| rows += results }
65
+ reducer.complete { complete << 1 }
66
+ reducer.reduce([{'id' => '1', 'key' => 'a', 'value' => 'v1'}], sources[0], false)
67
+ reducer.reduce([{'id' => '3', 'key' => 'c', 'value' => 'v4'}], sources[0], false)
68
+ assert(!reducer.complete?)
69
+ reducer.reduce([{'id' => '2', 'key' => 'a', 'value' => 'v2'}], sources[1], false)
70
+ assert(reducer.complete?)
71
+ reducer.reduce([{'id' => '4', 'key' => 'b', 'value' => 'v3'}], sources[1], false)
72
+ assert_equal(1, complete.inject(:+))
73
+ assert_equal(2, rows.size)
74
+ assert_equal(%w[v1 v2], rows.map {|r| r['value'] })
75
+ end
76
+
77
+ def test_skip
78
+ sources = %w[1 2].map {|uri| MockSource.new(uri) }
79
+ reducer = CouchProxy::Reduce::MapReducer.new(
80
+ :sources => sources, :collator => @collator, :skip => 2)
81
+ rows, complete = [], []
82
+ reducer.results {|results| rows += results }
83
+ reducer.complete { complete << 1 }
84
+ reducer.reduce([{'id' => '1', 'key' => 'a', 'value' => 'v1'}], sources[0], false)
85
+ reducer.reduce([{'id' => '3', 'key' => 'c', 'value' => 'v4'}], sources[0], true)
86
+ reducer.reduce([{'id' => '2', 'key' => 'a', 'value' => 'v2'}], sources[1], false)
87
+ reducer.reduce([{'id' => '4', 'key' => 'b', 'value' => 'v3'}], sources[1], true)
88
+ assert(reducer.complete?)
89
+ assert_equal(1, complete.inject(:+))
90
+ assert_equal(2, rows.size)
91
+ assert_equal(%w[v3 v4], rows.map {|r| r['value'] })
92
+ end
93
+ end
@@ -0,0 +1,32 @@
1
+ # encoding: UTF-8
2
+
3
+ # A source of rows that responds to EventMachine::Connection pause and resume
4
+ # methods. The reducer pauses sources to allow rows from slower connections
5
+ # to be read.
6
+ class MockSource
7
+ attr_reader :uri
8
+
9
+ def initialize(uri)
10
+ @uri, @paused = uri, false
11
+ end
12
+
13
+ def pause
14
+ @paused = true
15
+ end
16
+
17
+ def paused?
18
+ @paused
19
+ end
20
+
21
+ def resume
22
+ @paused = false
23
+ end
24
+
25
+ def ==(source)
26
+ @uri == source.uri
27
+ end
28
+
29
+ def <=>(source)
30
+ @uri <=> source.uri
31
+ end
32
+ end
@@ -0,0 +1,60 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'couchproxy'
4
+ require 'mock_source'
5
+ require 'test/unit'
6
+
7
+ class ReduceReducerTest < Test::Unit::TestCase
8
+ def setup
9
+ @collator = CouchProxy::Collator.new
10
+ end
11
+
12
+ def test_sum
13
+ sources = %w[1 2].map {|uri| MockSource.new(uri) }
14
+ reducer = CouchProxy::Reduce::ReduceReducer.new(
15
+ :sources => sources, :collator => @collator, :fn => '_sum')
16
+ rows, complete = [], []
17
+ reducer.results {|results| rows += results }
18
+ reducer.complete { complete << 1 }
19
+ reducer.reduce([{'key' => 'a', 'value' => 2}], sources[1], false)
20
+ reducer.reduce([{'key' => 'a', 'value' => 4}], sources[0], false)
21
+ reducer.reduce([{'key' => 'c', 'value' => 6}], sources[0], true)
22
+ reducer.reduce([{'key' => 'b', 'value' => 8}], sources[1], true)
23
+ assert_equal(1, complete.inject(:+))
24
+ assert(reducer.complete?)
25
+ assert_equal(3, rows.size)
26
+ results = rows.map {|r| r.values_at(:key, :value) }.flatten
27
+ assert_equal(['a', 6, 'b', 8, 'c', 6], results)
28
+ end
29
+
30
+ def test_stats
31
+ sources = %w[1 2].map {|uri| MockSource.new(uri) }
32
+ reducer = CouchProxy::Reduce::ReduceReducer.new(
33
+ :sources => sources, :collator => @collator, :fn => '_stats')
34
+ rows, complete = [], []
35
+ reducer.results {|results| rows += results }
36
+ reducer.complete { complete << 1 }
37
+ values = [
38
+ {'sum' => 2, 'count' => 3, 'min' => 0, 'max' => 2, 'sumsqr' => 1},
39
+ {'sum' => 4, 'count' => 6, 'min' => 1, 'max' => 3, 'sumsqr' => 2},
40
+ {'sum' => 2, 'count' => 3, 'min' => 0, 'max' => 2, 'sumsqr' => 3},
41
+ {'sum' => 2, 'count' => 3, 'min' => 0, 'max' => 2, 'sumsqr' => 4}
42
+ ]
43
+ reducer.reduce([{'key' => 'a', 'value' => values[0]}], sources[1], false)
44
+ reducer.reduce([{'key' => 'a', 'value' => values[1]}], sources[0], false)
45
+ reducer.reduce([{'key' => 'c', 'value' => values[2]}], sources[0], true)
46
+ reducer.reduce([{'key' => 'b', 'value' => values[3]}], sources[1], true)
47
+ assert_equal(1, complete.inject(:+))
48
+ assert(reducer.complete?)
49
+ assert_equal(3, rows.size)
50
+ results = rows.map {|r| r.values_at(:key, :value) }.flatten
51
+ combined = {:sum => 6, :count => 9, :min => 0, :max => 3, :sumsqr => 3}
52
+ assert_equal(['a', combined, 'b', to_sym(values[3]), 'c', to_sym(values[2])], results)
53
+ end
54
+
55
+ private
56
+
57
+ def to_sym(hash)
58
+ Hash[hash.map {|k,v| [k.to_sym, v]}]
59
+ end
60
+ end
@@ -0,0 +1,51 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'couchproxy'
4
+ require 'test/unit'
5
+
6
+ class RowFilterTest < Test::Unit::TestCase
7
+ def setup
8
+ @parser = JSON::Stream::Parser.new
9
+ @filter = CouchProxy::RowFilter.new(@parser)
10
+ end
11
+
12
+ def test_total_rows
13
+ total_rows = -1
14
+ @filter.total_rows {|total| total_rows = total }
15
+ @parser << {:total_rows => 2, :offset => 0, :rows => [
16
+ {:id => "1", :key => {:total_rows => 42}, :value => {:total_rows => 42}},
17
+ {:id => "2", :key => {:total_rows => 42}, :value => {:total_rows => 42}}
18
+ ]}.to_json
19
+ assert_equal(2, total_rows)
20
+ end
21
+
22
+ def test_total_rows_missing
23
+ total_rows = -1
24
+ @filter.total_rows {|total| total_rows = total }
25
+ @parser << {:offset => 0, :rows => [
26
+ {:id => "1", :key => nil, :value => {:total_rows => 42}}
27
+ ]}.to_json
28
+ assert_equal(-1, total_rows)
29
+ end
30
+
31
+ def test_rows_with_small_dataset
32
+ test_rows(3)
33
+ end
34
+
35
+ def test_rows_with_large_dataset
36
+ test_rows(5003)
37
+ end
38
+
39
+ private
40
+
41
+ def test_rows(count)
42
+ all_rows = []
43
+ @filter.rows {|rows| all_rows += rows }
44
+ rows = Array.new(count) do |i|
45
+ {:id => i.to_s, :key => {:rows => [42]}, :value => {:rows => [42]}}
46
+ end
47
+ @parser << {:total_rows => 2, :offset => 0, :rows => rows}.to_json
48
+ assert_equal(count, all_rows.size)
49
+ assert_equal(Array.new(count) {|i| i.to_s }, all_rows.map {|r| r['id'] })
50
+ end
51
+ end
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 0
7
- - 1
7
+ - 2
8
8
  - 0
9
- version: 0.1.0
9
+ version: 0.2.0
10
10
  platform: ruby
11
11
  authors:
12
12
  - David Graham
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-09-06 00:00:00 -06:00
17
+ date: 2011-01-03 00:00:00 -07:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -73,6 +73,20 @@ dependencies:
73
73
  version: "1.2"
74
74
  type: :runtime
75
75
  version_requirements: *id004
76
+ - !ruby/object:Gem::Dependency
77
+ name: rbtree
78
+ prerelease: false
79
+ requirement: &id005 !ruby/object:Gem::Requirement
80
+ none: false
81
+ requirements:
82
+ - - ~>
83
+ - !ruby/object:Gem::Version
84
+ segments:
85
+ - 0
86
+ - 3
87
+ version: "0.3"
88
+ type: :runtime
89
+ version_requirements: *id005
76
90
  description: |-
77
91
  CouchProxy is a simple proxy server that distributes reads and writes to a
78
92
  cluster of Apache CouchDB servers so they appear to be a single huge database.
@@ -119,13 +133,21 @@ files:
119
133
  - lib/couchproxy/rack/users.rb
120
134
  - lib/couchproxy/rack/uuids.rb
121
135
  - lib/couchproxy/rack/view_cleanup.rb
136
+ - lib/couchproxy/reduce/base_reducer.rb
137
+ - lib/couchproxy/reduce/map_reducer.rb
138
+ - lib/couchproxy/reduce/reduce_reducer.rb
122
139
  - lib/couchproxy/reducer.rb
123
140
  - lib/couchproxy/request.rb
124
141
  - lib/couchproxy/router.rb
142
+ - lib/couchproxy/row_filter.rb
125
143
  - lib/couchproxy.rb
126
144
  - lib/couchproxy.ru
127
145
  - conf/couchproxy.yml
128
146
  - test/collator_test.rb
147
+ - test/map_reducer_test.rb
148
+ - test/mock_source.rb
149
+ - test/reduce_reducer_test.rb
150
+ - test/row_filter_test.rb
129
151
  has_rdoc: true
130
152
  homepage: http://github.com/dgraham/couchproxy
131
153
  licenses: []
@@ -162,3 +184,7 @@ specification_version: 3
162
184
  summary: A proxy server for Apache CouchDB clusters.
163
185
  test_files:
164
186
  - test/collator_test.rb
187
+ - test/map_reducer_test.rb
188
+ - test/mock_source.rb
189
+ - test/reduce_reducer_test.rb
190
+ - test/row_filter_test.rb