regurgitator 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/GIT-VERSION-FILE ADDED
@@ -0,0 +1 @@
1
+ GIT_VERSION = 0.0.0
data/GIT-VERSION-GEN ADDED
@@ -0,0 +1,40 @@
1
+ #!/bin/sh
2
+
3
+ GVF=GIT-VERSION-FILE
4
+ DEF_VER=v0.0.0.GIT
5
+
6
+ LF='
7
+ '
8
+
9
+ # First see if there is a version file (included in release tarballs),
10
+ # then try git-describe, then default.
11
+ if test -f version
12
+ then
13
+ VN=$(cat version) || VN="$DEF_VER"
14
+ elif test -d .git -o -f .git &&
15
+ VN=$(git describe --abbrev=4 HEAD 2>/dev/null) &&
16
+ case "$VN" in
17
+ *$LF*) (exit 1) ;;
18
+ v[0-9]*)
19
+ git update-index -q --refresh
20
+ test -z "$(git diff-index --name-only HEAD --)" ||
21
+ VN="$VN-dirty" ;;
22
+ esac
23
+ then
24
+ VN=$(echo "$VN" | sed -e 's/-/./g');
25
+ else
26
+ VN="$DEF_VER"
27
+ fi
28
+
29
+ VN=$(expr "$VN" : v*'\(.*\)')
30
+
31
+ if test -r $GVF
32
+ then
33
+ VC=$(sed -e 's/^GIT_VERSION = //' <$GVF)
34
+ else
35
+ VC=unset
36
+ fi
37
+ test "$VN" = "$VC" || {
38
+ echo >&2 "GIT_VERSION = $VN"
39
+ echo "GIT_VERSION = $VN" >$GVF
40
+ }
data/GNUmakefile ADDED
@@ -0,0 +1,8 @@
1
+ all::
2
+ RSYNC_DEST := bogomips.org:/srv/bogomips/regurgitator
3
+ rfproject := rainbows
4
+ rfpackage := regurgitator
5
+ include pkg.mk
6
+ all:: test-integration
7
+ test-integration:
8
+ $(MAKE) -C t
data/LATEST ADDED
@@ -0,0 +1 @@
1
+ Currently unreleased
data/LICENSE ADDED
@@ -0,0 +1,18 @@
1
+ regurgitator is copyrighted Free Software by all contributors, see logs
2
+ in revision control for names and email addresses of all of them.
3
+
4
+ You can redistribute it and/or modify it under the terms of the GNU
5
+ General Public License (GPL) as published by the Free Software
6
+ Foundation, version {2}[http://www.gnu.org/licenses/gpl-2.0.txt] or
7
+ or {3}[http://www.gnu.org/licenses/gpl-3.0.txt] (see link:COPYING).
8
+ The regurgitator project leader (Eric Wong) reserves the right to
9
+ relicense regurgitator under future versions of the GPL.
10
+
11
+ regurgitator is distributed in the hope that it will be useful, but
12
+ WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ General Public License for more details.
15
+
16
+ You should have received a copy of the GNU General Public License
17
+ along with this library; if not, write to the Free Software Foundation,
18
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
data/NEWS ADDED
@@ -0,0 +1 @@
1
+ No news yet.
data/README ADDED
@@ -0,0 +1,49 @@
1
+ = \Regurgitator - read-only Rack endpoints for MogileFS
2
+
3
+ \Regurgitator is an GPL-licensed library and Rack middleware for
4
+ serving files stored in MogileFS. It can be embedded inside
5
+ any existing Rack application or be used as a standalone Rack app.
6
+
7
+ \Regurgitator talks directly to the underlying RDBMS powering MogileFS
8
+ to avoid extra layers and latency. \Regurgitator only needs
9
+ read-only access to the RDBMS and can connect to slave databases.
10
+
11
+ == Rack Endpoints
12
+
13
+ \Regurgitator comes with your choice of three standard Rack endpoints for
14
+ serving files stored in MogileFS. Pick the one best suited for your
15
+ application:
16
+
17
+ * {DomainPath}[link:Regurgitator/DomainPath.html]
18
+ * {DomainHost}[link:Regurgitator/DomainHost.html]
19
+ * {OneDomain}[link:Regurgitator/OneDomain.html]
20
+
21
+ It should also be easy to roll your own based on your needs by reusing
22
+ our library code.
23
+
24
+ == Hacking
25
+
26
+ You can get the latest source via git from the following locations:
27
+
28
+ git://bogomips.org/regurgitator.git
29
+ git://repo.or.cz/regurgitator.git (mirror)
30
+
31
+ You may browse the code from the web and download the latest snapshot
32
+ tarballs here:
33
+
34
+ * http://bogomips.org/regurgitator.git (cgit)
35
+ * http://repo.or.cz/w/regurgitator.git (gitweb)
36
+
37
+ Inline patches (from "git format-patch") to the mailing list are
38
+ preferred because they allow code review and comments in the reply to
39
+ the patch.
40
+
41
+ We will adhere to mostly the same conventions for patch submissions as
42
+ git itself. See the Documentation/SubmittingPatches document
43
+ distributed with git on on patch submission guidelines to follow. Just
44
+ don't email the git mailing list or maintainer with \Regurgitator patches.
45
+
46
+ == Contact
47
+
48
+ All feedback (bug reports, user/development discussion, patches, pull
49
+ requests) go to the mailing list: mailto:barfs@librelist.org
@@ -0,0 +1,6 @@
1
+ require 'regurgitator'
2
+ db = Sequel.connect 'mysql2://user@example.com/mogilefs'
3
+ use Rack::ContentLength
4
+ use Rack::ContentType, 'text/plain'
5
+ err = lambda { |env| [ 404, [], [] ] }
6
+ run Regurgitator::DomainHost.new(err, :db => db, :suffix => '.example.com')
@@ -0,0 +1,5 @@
1
+ require 'regurgitator'
2
+ db = Sequel.connect 'mysql2://user@example.com/mogilefs'
3
+ use Rack::ContentLength
4
+ use Rack::ContentType, 'text/plain'
5
+ run Regurgitator::DomainPath.new(lambda { |env| [ 404, [], [] ] }, db)
@@ -0,0 +1,6 @@
1
+ require 'regurgitator'
2
+ db = Sequel.connect 'mysql2://user@example.com/mogilefs'
3
+ use Rack::ContentLength
4
+ use Rack::ContentType, 'text/plain'
5
+ err = lambda { |env| [ 404, [], [] ] }
6
+ run Regurgitator::OneDomain.new(err, :db => db, :domain => 'my.domain')
@@ -0,0 +1,43 @@
1
+ # -*- encoding: binary -*-
2
+ require 'uri'
3
+ require 'rack'
4
+ require 'thread'
5
+ require 'sequel'
6
+ require 'http_spew'
7
+ require 'rpatricia'
8
+
9
+ # The Regurgitator main module, serving as a namespace for all
10
+ # modules and classes.
11
+ #
12
+ # All modules meant for use in applications are autoload-ed,
13
+ # so just "require 'regurgitator'" in your code.
14
+ module Regurgitator
15
+
16
+ # This HTTP header is read to determine a location to reproxy to.
17
+ # Add this header in your nginx config.
18
+ REPROXY_KEY = 'HTTP_X_REPROXY_PATH'
19
+
20
+ autoload :ServerSettings, 'regurgitator/server_settings'
21
+ autoload :Domain, 'regurgitator/domain'
22
+ autoload :Device, 'regurgitator/device'
23
+ autoload :FileInfo, 'regurgitator/file_info'
24
+ autoload :Endpoint, 'regurgitator/endpoint'
25
+ autoload :FileRequest, 'regurgitator/file_request'
26
+ autoload :Local, 'regurgitator/local'
27
+ autoload :LocalFile, 'regurgitator/local_file'
28
+
29
+ # Rack middlewares/apps
30
+ autoload :DomainPath, 'regurgitator/domain_path'
31
+ autoload :DomainHost, 'regurgitator/domain_host'
32
+ autoload :OneDomain, 'regurgitator/one_domain'
33
+
34
+ # used to wrap up all Regurgitator-specific extensions
35
+ class Error < StandardError; end
36
+
37
+ # raised when there are no readable devices
38
+ class NoDevices < Error; end
39
+
40
+ # raised by FileRequest when the HTTP status code
41
+ # is outside of (200-299, 304)
42
+ class BadResponse < Error; end
43
+ end
@@ -0,0 +1,79 @@
1
+ # -*- encoding: binary -*-
2
+ # helpers for device lookups
3
+ module Regurgitator::Device
4
+ include Regurgitator::ServerSettings
5
+
6
+ # :stopdoc:
7
+ REFRESH_DEVICE = <<-EOS
8
+ SELECT d.devid, h.hostip, h.altip, h.altmask, h.http_port, h.http_get_port
9
+ FROM device d
10
+ LEFT JOIN host h ON d.hostid = h.hostid
11
+ WHERE d.status IN ('readonly','alive','drain') AND h.status = 'alive'
12
+ EOS
13
+
14
+ DEVICES_ON = 'SELECT devid FROM file_on WHERE fid = ?'
15
+ # :startdoc:
16
+
17
+ def initialize(*args) # :nodoc:
18
+ super
19
+ init_device_ivars
20
+ end
21
+
22
+ def init_device_ivars
23
+ @device_cache_mtime = Time.at(0)
24
+ @device_cache = nil
25
+ @device_cache_lock = Mutex.new
26
+ end
27
+
28
+ def self.extended(obj)
29
+ obj.init_device_ivars
30
+ end
31
+
32
+ def device_uri_pair!(opts, get_port)
33
+ rv = [ URI::HTTP.build(opts) ]
34
+ if get_port
35
+ opts[:port] = get_port
36
+ rv << URI::HTTP.build(opts)
37
+ end
38
+ rv
39
+ end
40
+
41
+ # Returns a hash of device info with the Integer +devid+
42
+ # as the hash key.
43
+ def refresh_device(force = false) # :nodoc:
44
+ @device_cache_lock.synchronize { refresh_device_unlocked(force) }
45
+ end
46
+
47
+ def refresh_device_unlocked(force) # :nodoc:
48
+ return @device_cache if ! force && ((Time.now - @device_cache_mtime) < 60)
49
+ tmp = {}.compare_by_identity
50
+ refresh_zone(force)
51
+ @db[REFRESH_DEVICE].each do |x|
52
+ # devices in "drain" status may hit raciness try those as a last resort
53
+ x[:preferred] = !!(x[:d_status] =~ %r{\A(?:readonly|alive)\z})
54
+ hostip = x[:hostip]
55
+ port = x[:http_port] || 80
56
+ get_port = x[:http_get_port]
57
+ x[:ipaddr] = { :hostip => hostip }
58
+ x[:zone] = zone_for(hostip)
59
+ devid = x[:devid]
60
+ o = { :path => "/dev#{devid}", :port => port, :host => hostip }
61
+
62
+ x[:uris] = { :pri => device_uri_pair!(o, get_port) }
63
+ x[:ipaddr][:altmask] = pat = Patricia.new
64
+ altmask = x[:altmask] and pat.add(altmask)
65
+
66
+ if altip = x[:altip]
67
+ x[:ipaddr][:altip] = altip
68
+ o[:host] = altip
69
+ o[:port] = port
70
+ x[:uris][:alt] = device_uri_pair!(o, get_port)
71
+ end
72
+
73
+ tmp[devid] = x
74
+ end
75
+ Regurgitator::Local.refresh_addrs!
76
+ @device_cache_mtime = Time.now
77
+ @device_cache = tmp
78
+ end
79
+ end
@@ -0,0 +1,37 @@
1
+ # -*- encoding: binary -*-
2
+ # helpers for domain lookups
3
+ module Regurgitator::Domain
4
+
5
+ REFRESH_DOMAIN = 'SELECT dmid,namespace FROM domain' # :nodoc:
6
+
7
+ def initialize(*args) # :nodoc:
8
+ super
9
+ @domain_lock = Mutex.new
10
+ @domain_cache_mtime = Time.at(0)
11
+ @domain_cache = nil
12
+ end
13
+
14
+ # returns the +dmid+ (domain identifier/primary key) for a string +domain+
15
+ def get_dmid(domain)
16
+ rv = refresh_domain[domain] and return rv
17
+ return false if false == rv
18
+ rv = refresh_domain(true)[domain] and return rv
19
+ @domain_lock.synchronize { @domain_cache[domain] = false }
20
+ end
21
+
22
+ # We cache the list of all available domains in memory, this shouldn't
23
+ # be too huge, though...
24
+ #
25
+ # Returns a hash with string namespaces as keys and dmids as values
26
+ def refresh_domain(force = false) # :nodoc:
27
+ @domain_lock.synchronize { refresh_domain_unlocked(force) }
28
+ end
29
+
30
+ def refresh_domain_unlocked(force) # :nodoc:
31
+ return @domain_cache if ! force && ((Time.now - @domain_cache_mtime) < 60)
32
+ tmp = {}
33
+ @db[REFRESH_DOMAIN].each { |x| tmp[x[:namespace].freeze] = x[:dmid] }
34
+ @domain_cache_mtime = Time.now
35
+ @domain_cache = tmp
36
+ end
37
+ end
@@ -0,0 +1,43 @@
1
+ # -*- encoding: binary -*-
2
+
3
+ # Matches GET and HEAD requests in the format of ":domain.example.com/:dkey"
4
+ # where domain is read from the Host: header.
5
+ #
6
+ # If a client were to make a request to "http://foo.example.com/bar",
7
+ # this endpoint would respond with the file from the "foo" domain
8
+ # with the key "bar".
9
+ # To use as middleware:
10
+ # require 'regurgitator'
11
+ # db = Sequel.connect('mysql2://user@example.com/mogilefs')
12
+ # use Regurgitator::DomainHost, :suffix => '.example.com', :db => db
13
+ #
14
+ # See the {domain_host.ru}[link:examples/domain_host.ru]
15
+ # example for a standalone app.
16
+
17
+ class Regurgitator::DomainHost
18
+ include Regurgitator::Endpoint
19
+
20
+ def initialize(app, opts) # :nodoc:
21
+ case suffix = opts[:suffix]
22
+ when String
23
+ suffix = Regexp.quote(suffix)
24
+ suffix = %r{\A(.*)#{suffix}\z}
25
+ when Regexp
26
+ else
27
+ raise TypeError, ":suffix must be a String or Regexp #{suffix.inspect}"
28
+ end
29
+ @domain_regexp = suffix
30
+ super(app, opts[:db])
31
+ end
32
+
33
+ def call(env) # :nodoc:
34
+ case env['REQUEST_METHOD']
35
+ when 'GET', 'HEAD'
36
+ host = env['HTTP_HOST'] or return @app.call(env)
37
+ domain = @domain_regexp =~ host ? $1 : host
38
+ serve_file(env, domain, env['PATH_INFO'][1..-1])
39
+ else
40
+ @app.call(env)
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,33 @@
1
+ # -*- encoding: binary -*-
2
+
3
+ # matches GET and HEAD requests in the format of "/:namespace/:dkey"
4
+ # where +:dkey+ may contain multiple slashes
5
+ #
6
+ # If a client were to make a request to "http://example.com/d/foo/bar",
7
+ # this endpoint would respond with the file from the "d" domain
8
+ # with the key "foo/bar".
9
+ #
10
+ # To use as middleware:
11
+ # require 'regurgitator'
12
+ # db = Sequel.connect('mysql2://user@example.com/mogilefs')
13
+ # use Regurgitator::DomainPath, db
14
+ #
15
+ # See the {domain_path.ru}[link:examples/domain_host.ru]
16
+ # example for a standalone app.
17
+ class Regurgitator::DomainPath
18
+ include Regurgitator::Endpoint
19
+
20
+ def call(env) # :nodoc:
21
+ case env['REQUEST_METHOD']
22
+ when 'GET', 'HEAD'
23
+ env['PATH_INFO'] =~ %r{\A/([^/]+)/(.+)\z} or return @app.call(env)
24
+ serve_file(env, $1, $2)
25
+ else
26
+ @app.call(env)
27
+ end
28
+ end
29
+
30
+ def initialize(app, db) # :nodoc:
31
+ super(app, Hash === db ? db[:db] : db)
32
+ end
33
+ end
@@ -0,0 +1,97 @@
1
+ # -*- encoding: binary -*-
2
+ require "socket"
3
+ module Regurgitator::Endpoint
4
+ include Regurgitator::FileInfo
5
+ include Rack::Utils
6
+ include Rack::Mime
7
+
8
+ REPROXY_KEY = Regurgitator::REPROXY_KEY # :nodoc:
9
+
10
+ def initialize(app, db)
11
+ @app = app
12
+ @db = db
13
+ super
14
+ end
15
+
16
+ def best_addr(env)
17
+ env.include?(REPROXY_KEY) ? env['REMOTE_ADDR'] : Regurgitator::Local.addr
18
+ end
19
+
20
+ # see nginx config examples for reproxy
21
+ def reproxy_path(env)
22
+ env[REPROXY_KEY]
23
+ end
24
+
25
+ # allow users to specify desired "Save As" filenames in the query string
26
+ def filename!(env, headers, dkey)
27
+ params = parse_query(env['QUERY_STRING'])
28
+ cd = nil
29
+ if fn = params['inline'] and escape(fn) == fn
30
+ cd, dkey = 'inline', fn
31
+ elsif fn = (params['attachment'] || params['filename']) and
32
+ escape(fn) == fn
33
+ cd, dkey = 'attachment', fn
34
+ end
35
+ headers['Content-Disposition'] = "#{cd}; filename=#{fn}" if cd
36
+ headers['Content-Type'] = mime_type(File.extname(dkey))
37
+ end
38
+
39
+ def empty_file(env, dkey)
40
+ headers = { 'Content-Length' => '0' }
41
+ filename!(env, headers, dkey)
42
+ [ 200, headers, [] ]
43
+ end
44
+
45
+ def serve_file(env, domain, dkey)
46
+ env['regurgitator.zone'] ||= best_addr(env)
47
+ info = file_info(env, domain, dkey) or return @app.call(env)
48
+ serve_info(env, info, dkey)
49
+ end
50
+
51
+ def serve_info(env, info, dkey)
52
+ info[:length] == 0 and return empty_file(env, dkey)
53
+ zone_uris = info[:uris]
54
+
55
+ zone = env['regurgitator.zone']
56
+ begin
57
+ uris = zone_uris.delete(zone)
58
+ uris ||= zone_uris.delete(zone_for(Regurgitator::Local.addr))
59
+
60
+ # no nearby zones? try all of them at once!
61
+ if uris.nil?
62
+ uris = []
63
+ zone_uris.each_value { |v| uris.concat v }
64
+ zone_uris.clear
65
+ end
66
+ uris or next
67
+
68
+ status, headers, body = r = Regurgitator::FileRequest.run(env, uris)
69
+ filename!(env, headers, dkey)
70
+ headers['ETag'] = %("#{info[:fid]}")
71
+
72
+ case env["REQUEST_METHOD"]
73
+ when "GET"
74
+ if body.respond_to?(:uri) && path = reproxy_path(env)
75
+ body.close if body.respond_to?(:close)
76
+ headers['Content-Length'] = '0'
77
+ headers['Location'] = body.uri.to_s
78
+ headers['X-Accel-Redirect'] = path
79
+ # yes we violate Rack::Lint here
80
+ %w(Content-Type Last-Modified).each do |key|
81
+ headers["X-Reproxy-#{key}"] = headers.delete(key)
82
+ end
83
+ return [ 302, headers, [] ]
84
+ end
85
+ return r
86
+ when "HEAD"
87
+ body.close if body.respond_to?(:close)
88
+ return [ status, headers, [] ]
89
+ else
90
+ abort "Unexpected REQUEST_METHOD=#{env['REQUEST_METHOD']}"
91
+ end
92
+ rescue Regurgitator::NoDevices
93
+ retry unless zone_uris.empty?
94
+ end until zone_uris.empty?
95
+ @app.call(env)
96
+ end
97
+ end