conveyor 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
data.tar.gz.sig CHANGED
Binary file
data/History.txt CHANGED
@@ -1,3 +1,6 @@
1
+ == 0.1.2 / 2008-02-05
2
+ * various bugfixes
3
+
1
4
  == 0.1.1 / 2008-01-31
2
5
 
3
6
  * Fix TCP binding.
data/Manifest.txt CHANGED
@@ -9,6 +9,7 @@ docs/protocol.mkd
9
9
  lib/conveyor.rb
10
10
  lib/conveyor/base_channel.rb
11
11
  lib/conveyor/channel.rb
12
+ lib/conveyor/client.rb
12
13
  lib/conveyor/server.rb
13
14
  lib/priority_queue.rb
14
15
  test/rand.rb
data/Rakefile CHANGED
@@ -8,8 +8,8 @@ Hoe.new('conveyor', Conveyor::VERSION) do |p|
8
8
  p.rubyforge_name = 'conveyor'
9
9
  p.author = 'Ryan King'
10
10
  p.email = 'ryan@theryanking.com'
11
+ p.remote_rdoc_dir = ''
11
12
  p.extra_deps << ['mongrel']
12
- p.extra_deps << ['activesupport']
13
13
  p.extra_deps << ['json']
14
14
  end
15
15
 
data/bin/conveyor CHANGED
@@ -1,11 +1,42 @@
1
1
  #!/usr/bin/env ruby -KU
2
2
 
3
- unless ARGV.length == 2
4
- puts "usage: #{$0} <port> <data directory>"
3
+ require 'optparse'
4
+ require 'rubygems'
5
+ require 'daemons/daemonize'
6
+
7
+ options = {:port => 8011, :daemonize => false}
8
+ opts = OptionParser.new do |opts|
9
+ opts.summary_indent = " "
10
+ opts.banner = "usage: #{$0} [options] <data directory>"
11
+
12
+ opts.separator ""
13
+ opts.separator "Options:"
14
+ opts.on("-p PORT", Integer, "HTTP port number. Default: 8011") do |port|
15
+ options[:port] = port
16
+ end
17
+
18
+ opts.on("-l LOGDIRECTORY", "Directory for log files.") do |log_dir|
19
+ options[:log_directory] = log_dir
20
+ end
21
+
22
+ opts.on("-d", "Daemonize.") do |d|
23
+ options[:daemonize] = d
24
+ end
25
+ end
26
+
27
+ opts.parse!
28
+ p options
29
+ unless ARGV.length == 1
30
+ puts opts.help
5
31
  exit
6
32
  end
7
33
 
8
34
  $: << 'lib'
9
35
 
10
36
  require 'conveyor/server'
11
- Conveyor::Server.new('0.0.0.0', ARGV[0].to_i, ARGV[1]).run.join
37
+
38
+ if options[:daemonize]
39
+ Daemonize.daemonize
40
+ end
41
+
42
+ Conveyor::Server.new('0.0.0.0', options[:port], ARGV[0], options[:log_directory]).run.join
@@ -27,3 +27,13 @@ notes:
27
27
  * 1 is the filename
28
28
  * assuming a lucene-style directory of datafiles + ToC/index
29
29
  * given that the files are written sequentially we can avoid writing every entry to the index file (as long as you write the first and last entry to the index). At most this means you have to read n entries, where n is the gap between index entries. Given that most clients will have persistent connections and be reading sequentially, we can do some clever things on the server side to make this really efficient (basically meaning you'll only have to pay that penalty on the first item you read).
30
+
31
+ == LOG FILES
32
+
33
+ When given -l LOGDIR, conveyor will write a log file with this format:
34
+
35
+ IP HTTP_VERB REQUEST_URI STATUS ID LENGTH HASH
36
+
37
+ (In addition to the formatting that Logger adds.)
38
+
39
+ Not every operation has every bit of that data, but optional bits are on the right.
data/docs/protocol.mkd CHANGED
@@ -55,9 +55,17 @@ If this is called for the first time, it will return the first item in the chann
55
55
  ### Get Next by Group (Multi-consumer queue) ###
56
56
 
57
57
  Request
58
- : GET /channels/{channel name}?next&group=foo
58
+ : GET /channels/{channel name}?next&group={group name}
59
59
 
60
60
  Response
61
61
  : success: 200
62
62
 
63
- If this is called for the first time, it will return the first item in the channel. Otherwise it will return the next item.
63
+ If this is called for the first time, it will return the first item in the channel. Otherwise it will return the next item.
64
+
65
+ ### Rewinding to id ###
66
+
67
+ Request
68
+ : POST /channels/{channel name}?rewind_id={id}
69
+
70
+ Response
71
+ :success: 200
data/lib/conveyor.rb CHANGED
@@ -1,4 +1,4 @@
1
1
  module Conveyor
2
- VERSION = '0.1.1'
2
+ VERSION = '0.1.2'
3
3
  QUALITY = 'alpha'
4
4
  end
@@ -1,6 +1,6 @@
1
1
  require 'digest/md5'
2
2
  require 'rubygems'
3
- require 'active_support/core_ext/date/conversions'
3
+ require 'time'
4
4
  require 'priority_queue'
5
5
 
6
6
  module Conveyor
@@ -16,7 +16,7 @@ module Conveyor
16
16
  @directory = directory
17
17
  @data_files = []
18
18
  @index = []
19
- @iterator = 0
19
+ @iterator = 1
20
20
 
21
21
  if File.exists?(@directory)
22
22
  if !File.directory?(@directory)
@@ -45,7 +45,7 @@ module Conveyor
45
45
  end
46
46
 
47
47
  def inspect
48
- "<#{self.class} dir:'#{@directory.to_s}' last_id:#{@last_id}>"
48
+ "<#{self.class} dir:'#{@directory.to_s}' last_id:#{@last_id} iterator:#{@iterator}>"
49
49
  end
50
50
 
51
51
  def pick_bucket i
@@ -29,7 +29,7 @@ module Conveyor
29
29
  Dir.glob(File.join(@directory, 'iterator-*')) do |i|
30
30
  g = i.split(%r{/}).last.match(%r{iterator-(.*)}).captures[0]
31
31
  @group_iterators_files[g] = File.open(i, 'r+')
32
- @group_iterators[g] = 0
32
+ @group_iterators[g] = 1
33
33
  @group_iterators_files[g].each_line do |line|
34
34
  @group_iterators[g] = line.to_i
35
35
  end
@@ -46,20 +46,24 @@ module Conveyor
46
46
  def get_next
47
47
  r = nil
48
48
  Thread.exclusive do
49
- @iterator += 1 # TODO make sure this is lower than @last_id
50
- r = get(@iterator)
51
- @iterator_file.write("#{@iterator}\n")
49
+ if @iterator <= @last_id
50
+ r = get(@iterator)
51
+ @iterator += 1
52
+ @iterator_file.write("#{@iterator}\n")
53
+ r
54
+ else
55
+ nil
56
+ end
52
57
  end
53
- r
54
58
  end
55
59
 
56
60
  # Returns the next item for +group+. If +group+ hasn't been seen before, the first item is returned.
57
61
  def get_next_by_group group
58
62
  r = nil
59
63
  Thread.exclusive do
60
- @group_iterators[group] = 0 unless @group_iterators.key?(group)
61
- @group_iterators[group] += 1
64
+ @group_iterators[group] = 1 unless @group_iterators.key?(group)
62
65
  r = get(@group_iterators[group])
66
+ @group_iterators[group] += 1
63
67
  group_iterators_file(group) do |f|
64
68
  f.write("#{@group_iterators[group]}\n")
65
69
  end
@@ -79,6 +83,25 @@ module Conveyor
79
83
  }
80
84
  end
81
85
 
86
+ def rewind *opts
87
+ opts = opts.first
88
+ if opts.key?(:id)
89
+ if opts.key?(:group)
90
+ Thread.exclusive do
91
+ @group_iterators[opts[:group]] = opts[:id].to_i
92
+ group_iterators_file(opts[:group]) do |f|
93
+ f.write("#{@group_iterators[opts[:group]]}\n")
94
+ end
95
+ end
96
+ else
97
+ Thread.exclusive do
98
+ @iterator = opts[:id].to_i
99
+ @iterator_file.write("#{@iterator}\n")
100
+ end
101
+ end
102
+ end
103
+ end
104
+
82
105
  private
83
106
 
84
107
  def group_iterators_file group
@@ -0,0 +1,35 @@
1
+ require 'net/http'
2
+
3
+ module Conveyor
4
+ class Client
5
+ def initialize host, port = 8011
6
+ @host = host
7
+ @port = port
8
+ connect!
9
+ end
10
+
11
+ def connect!
12
+ @conn = Net::HTTP.start(@host, @port)
13
+ end
14
+
15
+ def create_channel channel_name
16
+ @conn.put("/channels/#{channel_name}", nil, {'Content-Type' => 'application/octet-stream'})
17
+ end
18
+
19
+ def post channel_name, content
20
+ @conn.post("/channels/#{channel_name}", content, {'Content-Type' => 'application/octet-stream', 'Date' => Time.now.to_s})
21
+ end
22
+
23
+ def get channel_name, id
24
+ @conn.get("/channels/#{channel_name}/#{id}").body
25
+ end
26
+
27
+ def get_next channel_name, group=nil
28
+ if group
29
+ @conn.get("/channels/#{channel_name}?next&group=#{group}").body
30
+ else
31
+ @conn.get("/channels/#{channel_name}?next").body
32
+ end
33
+ end
34
+ end
35
+ end
@@ -3,6 +3,7 @@ require 'mongrel'
3
3
  require 'conveyor/channel'
4
4
  require 'fileutils'
5
5
  require 'json'
6
+ require 'logger'
6
7
 
7
8
  class Mongrel::HttpRequest
8
9
  def put?
@@ -30,8 +31,14 @@ module Conveyor
30
31
  # A Mongrel handler for multiple Conveyor Channels.
31
32
  class ChannelsHandler < Mongrel::HttpHandler
32
33
 
33
- def initialize data_directory
34
+ def initialize data_directory, log_directory=nil
34
35
  @data_directory = data_directory
36
+ if log_directory
37
+ @logger = Logger.new File.join(log_directory, 'conveyor.log')
38
+ else
39
+ @logger = Logger.new '/dev/null'
40
+ end
41
+
35
42
  @channels = {}
36
43
  Dir.entries(@data_directory).each do |e|
37
44
  if !['.', '..'].include?(e) && File.directory?(File.join(@data_directory, e))
@@ -44,28 +51,51 @@ module Conveyor
44
51
  @channels[channel_name] = Conveyor::Channel.new(File.join(@data_directory, channel_name))
45
52
  end
46
53
 
54
+ def i str
55
+ @logger.info str
56
+ end
57
+
47
58
  def process request, response
48
59
  if request.put? && m = request.path_match(%r{/channels/(.*)})
49
60
  if Channel.valid_channel_name?(m.captures[0])
50
- create_new_channel m.captures[0]
51
- response.start(201) do |head, out|
52
- out.write("created channel #{m.captures[0]}")
61
+ if !@channels.key?(m.captures[0])
62
+ create_new_channel m.captures[0]
63
+ response.start(201) do |head, out|
64
+ out.write("created channel #{m.captures[0]}")
65
+ end
66
+ i "#{request.params["REMOTE_ADDR"]} PUT #{request.params["REQUEST_PATH"]} 201"
67
+ else
68
+ response.start(202) do |head, out|
69
+ out.write("channel already exists. didn't do anything")
70
+ end
71
+ i "#{request.params["REMOTE_ADDR"]} PUT #{request.params["REQUEST_PATH"]} 202"
53
72
  end
54
73
  else
55
74
  response.start(406) do |head, out|
56
75
  out.write("invalid channel name. must match #{Channel::NAME_PATTERN}")
76
+ i "#{request.params["REMOTE_ADDR"]} GET #{request.params["REQUEST_PATH"]} 406"
57
77
  end
58
78
  end
59
79
  elsif request.post? && m = request.path_match(%r{/channels/(.*)})
60
80
  if @channels.key?(m.captures[0])
61
- if request.params.include?('HTTP_DATE') && d = Time.parse(request.params['HTTP_DATE'])
62
- id = @channels[m.captures[0]].post(request.body.read)
63
- response.start(202) do |head, out|
64
- head["Location"] = "/channels/#{m.captures[0]}/#{id}"
81
+ params = Mongrel::HttpRequest.query_parse(request.params['QUERY_STRING'])
82
+ if params.include?('rewind_id')
83
+ @channels[m.captures[0]].rewind(:id => params['rewind_id']).to_i # TODO make sure this is an integer
84
+ response.start(200) do |head, out|
85
+ out.write "iterator rewound to #{params['rewind_id']}"
65
86
  end
66
87
  else
67
- response.start(400) do |head, out|
68
- out.write "A valid Date header is required for all POSTs."
88
+ if request.params.include?('HTTP_DATE') && d = Time.parse(request.params['HTTP_DATE'])
89
+ id = @channels[m.captures[0]].post(request.body.read)
90
+ response.start(202) do |head, out|
91
+ head["Location"] = "/channels/#{m.captures[0]}/#{id}"
92
+ end
93
+ i "#{request.params["REMOTE_ADDR"]} GET #{request.params["REQUEST_PATH"]} 202"
94
+ else
95
+ response.start(400) do |head, out|
96
+ out.write "A valid Date header is required for all POSTs."
97
+ end
98
+ i "#{request.params["REMOTE_ADDR"]} GET #{request.params["REQUEST_PATH"]} 400"
69
99
  end
70
100
  end
71
101
  end
@@ -105,6 +135,7 @@ module Conveyor
105
135
  head['Last-Modified'] = Time.parse(headers[:time]).gmtime.to_s
106
136
  out.write content
107
137
  end
138
+ i "#{request.params["REMOTE_ADDR"]} GET #{request.params["REQUEST_PATH"]} 200 #{headers[:id]} #{headers[:length]} #{headers[:hash]}"
108
139
  end
109
140
 
110
141
  end
@@ -113,9 +144,9 @@ module Conveyor
113
144
 
114
145
  # +host+ and +port+ are passed along to Mongrel::HttpServer for TCP binding. +data_directory+ is used to store
115
146
  # all channel data and should be created before intializing a Server.
116
- def initialize(host, port, data_directory)
147
+ def initialize(host, port, data_directory, log_directory = nil)
117
148
  super(host, port)
118
- ch = ChannelsHandler.new(data_directory)
149
+ ch = ChannelsHandler.new(data_directory, log_directory)
119
150
  register("/channels", ch)
120
151
  end
121
152
  end
data/test/test_channel.rb CHANGED
@@ -66,6 +66,7 @@ class TestConveyorChannel < Test::Unit::TestCase
66
66
  assert_equal 'bar', c.get_next[1]
67
67
  assert_equal 'bam', c.get_next[1]
68
68
  assert_equal nil, c.get_next
69
+ assert_equal 4, c.status[:iterator][:position]
69
70
  end
70
71
 
71
72
  def test_get_next_interupted
@@ -137,10 +138,40 @@ class TestConveyorChannel < Test::Unit::TestCase
137
138
  :data_files => [
138
139
  {:path => '/tmp/bar/0', :bytes => 210}
139
140
  ],
140
- :iterator => {:position => 0},
141
+ :iterator => {:position => 1},
141
142
  :iterator_groups => {}
142
143
  }
143
144
 
144
145
  assert_equal(status, c.status)
145
146
  end
147
+
148
+ def test_rewind
149
+ FileUtils.rm_r('/tmp/bar') rescue nil
150
+ c = Channel.new('/tmp/bar')
151
+ c.post 'foo'
152
+
153
+ assert_equal 'foo', c.get_next[1]
154
+ c.rewind(:id => 1)
155
+ assert_equal 'foo', c.get_next[1]
156
+ c.rewind(:id => 1)
157
+
158
+ d = Channel.new('/tmp/bar')
159
+ assert_equal 'foo', d.get_next[1]
160
+ end
161
+
162
+ def test_group_rewind
163
+ FileUtils.rm_r('/tmp/bar') rescue nil
164
+ c = Channel.new('/tmp/bar')
165
+ c.post 'foo'
166
+
167
+ assert_equal 'foo', c.get_next_by_group('bar')[1]
168
+ c.rewind(:id => 1, :group => 'bar')
169
+ assert_equal 'foo', c.get_next_by_group('bar')[1]
170
+ c.rewind(:id => 1, :group => 'bar')
171
+
172
+ d = Channel.new('/tmp/bar')
173
+ assert_equal 'foo', d.get_next_by_group('bar')[1]
174
+ end
175
+
176
+
146
177
  end
data/test/test_server.rb CHANGED
@@ -6,7 +6,7 @@ class TestConveyorServer < Test::Unit::TestCase
6
6
  def setup
7
7
  FileUtils.rm_r('/tmp/asdf') rescue nil
8
8
  FileUtils.mkdir('/tmp/asdf')
9
- @server = Conveyor::Server.new("127.0.0.1", 8888, '/tmp/asdf')
9
+ @server = Conveyor::Server.new("127.0.0.1", 8011, '/tmp/asdf')
10
10
  @server.run
11
11
  end
12
12
 
@@ -15,14 +15,14 @@ class TestConveyorServer < Test::Unit::TestCase
15
15
  end
16
16
 
17
17
  def test_channels
18
- Net::HTTP.start("localhost", 8888) do |h|
18
+ Net::HTTP.start("localhost", 8011) do |h|
19
19
  req = h.get('/channels')
20
20
  assert_equal Net::HTTPOK, req.class
21
21
  end
22
22
  end
23
23
 
24
24
  def test_create_channel
25
- Net::HTTP.start('localhost', 8888) do |h|
25
+ Net::HTTP.start('localhost', 8011) do |h|
26
26
  req = h.put('/channels/foo', '', {'Content-Type' => 'application/octet-stream'})
27
27
  assert_equal Net::HTTPCreated, req.class
28
28
 
@@ -32,7 +32,7 @@ class TestConveyorServer < Test::Unit::TestCase
32
32
  end
33
33
 
34
34
  def test_post
35
- Net::HTTP.start('localhost', 8888) do |h|
35
+ Net::HTTP.start('localhost', 8011) do |h|
36
36
  req = h.put('/channels/bar', '', {'Content-Type' => 'application/octet-stream'})
37
37
  assert_equal Net::HTTPCreated, req.class
38
38
 
@@ -58,7 +58,7 @@ class TestConveyorServer < Test::Unit::TestCase
58
58
  end
59
59
 
60
60
  def test_invalid_channel
61
- Net::HTTP.start('localhost', 8888) do |h|
61
+ Net::HTTP.start('localhost', 8011) do |h|
62
62
  req = h.put('/channels/|', '', {'Content-Type' => 'application/octet-stream'})
63
63
  assert_equal Net::HTTPNotAcceptable, req.class
64
64
  end
@@ -66,7 +66,7 @@ class TestConveyorServer < Test::Unit::TestCase
66
66
  end
67
67
 
68
68
  def test_get_next
69
- Net::HTTP.start('localhost', 8888) do |h|
69
+ Net::HTTP.start('localhost', 8011) do |h|
70
70
  req = h.put('/channels/bar', '', {'Content-Type' => 'application/octet-stream'})
71
71
  assert_equal Net::HTTPCreated, req.class
72
72
 
@@ -92,7 +92,7 @@ class TestConveyorServer < Test::Unit::TestCase
92
92
  end
93
93
 
94
94
  def test_status
95
- Net::HTTP.start('localhost', 8888) do |h|
95
+ Net::HTTP.start('localhost', 8011) do |h|
96
96
  req = h.put('/channels/bar', '', {'Content-Type' => 'application/octet-stream'})
97
97
  assert_equal Net::HTTPCreated, req.class
98
98
 
@@ -111,12 +111,45 @@ class TestConveyorServer < Test::Unit::TestCase
111
111
  "index"=>{"size"=>1},
112
112
  "directory"=>"/tmp/asdf/bar",
113
113
  "data_files"=>[{"path"=>"/tmp/asdf/bar/0","bytes"=>139}],
114
- "iterator"=>{"position"=>0}
114
+ "iterator"=>{"position"=>1}
115
115
  }
116
116
  assert_equal json, JSON::parse(req.body)
117
117
 
118
118
  end
119
+ end
120
+
121
+ def test_rewinding
122
+ Net::HTTP.start('localhost', 8011) do |h|
123
+ req = h.put('/channels/bar', '', {'Content-Type' => 'application/octet-stream'})
124
+ assert_equal Net::HTTPCreated, req.class
125
+
126
+ data =
127
+ ["ZqZyDN2SouQCYEHYS0LuM1XeqsF0MKIbFEBE6xQ972VqEcjs21wJSosvZMWEH1lq5ukTq4Ze"]
128
+
129
+ data.each do |d|
130
+ req = h.post('/channels/bar', d, {'Content-Type' => 'application/octet-stream', 'Date' => Time.now.to_s})
131
+ assert_equal Net::HTTPAccepted, req.class
132
+ end
133
+
134
+ req = h.get('/channels/bar?next')
135
+
136
+ assert_kind_of Net::HTTPOK, req
137
+ assert_equal data[0], req.body
138
+
139
+ req = h.get('/channels/bar?next')
119
140
 
141
+ assert_kind_of Net::HTTPNotFound, req
120
142
 
143
+ req = h.post('/channels/bar?rewind_id=1', nil)
144
+ assert_kind_of Net::HTTPOK, req
145
+
146
+ req = h.get('/channels/bar?next')
147
+
148
+ assert_kind_of Net::HTTPOK, req
149
+ assert_equal data[0], req.body
150
+
151
+ req = h.get('/channels/bar?next')
152
+ assert_kind_of Net::HTTPNotFound, req
153
+ end
121
154
  end
122
155
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: conveyor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan King
@@ -30,7 +30,7 @@ cert_chain:
30
30
  Zls3y84CmyAEGg==
31
31
  -----END CERTIFICATE-----
32
32
 
33
- date: 2008-01-31 00:00:00 -08:00
33
+ date: 2008-02-05 00:00:00 -08:00
34
34
  default_executable:
35
35
  dependencies:
36
36
  - !ruby/object:Gem::Dependency
@@ -42,15 +42,6 @@ dependencies:
42
42
  - !ruby/object:Gem::Version
43
43
  version: "0"
44
44
  version:
45
- - !ruby/object:Gem::Dependency
46
- name: activesupport
47
- version_requirement:
48
- version_requirements: !ruby/object:Gem::Requirement
49
- requirements:
50
- - - ">="
51
- - !ruby/object:Gem::Version
52
- version: "0"
53
- version:
54
45
  - !ruby/object:Gem::Dependency
55
46
  name: json
56
47
  version_requirement:
@@ -91,6 +82,7 @@ files:
91
82
  - lib/conveyor.rb
92
83
  - lib/conveyor/base_channel.rb
93
84
  - lib/conveyor/channel.rb
85
+ - lib/conveyor/client.rb
94
86
  - lib/conveyor/server.rb
95
87
  - lib/priority_queue.rb
96
88
  - test/rand.rb
metadata.gz.sig CHANGED
Binary file