regurgitator 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/GIT-VERSION-FILE ADDED
@@ -0,0 +1 @@
1
+ GIT_VERSION = 0.0.0
data/GIT-VERSION-GEN ADDED
@@ -0,0 +1,40 @@
1
+ #!/bin/sh
2
+
3
+ GVF=GIT-VERSION-FILE
4
+ DEF_VER=v0.0.0.GIT
5
+
6
+ LF='
7
+ '
8
+
9
+ # First see if there is a version file (included in release tarballs),
10
+ # then try git-describe, then default.
11
+ if test -f version
12
+ then
13
+ VN=$(cat version) || VN="$DEF_VER"
14
+ elif test -d .git -o -f .git &&
15
+ VN=$(git describe --abbrev=4 HEAD 2>/dev/null) &&
16
+ case "$VN" in
17
+ *$LF*) (exit 1) ;;
18
+ v[0-9]*)
19
+ git update-index -q --refresh
20
+ test -z "$(git diff-index --name-only HEAD --)" ||
21
+ VN="$VN-dirty" ;;
22
+ esac
23
+ then
24
+ VN=$(echo "$VN" | sed -e 's/-/./g');
25
+ else
26
+ VN="$DEF_VER"
27
+ fi
28
+
29
+ VN=$(expr "$VN" : v*'\(.*\)')
30
+
31
+ if test -r $GVF
32
+ then
33
+ VC=$(sed -e 's/^GIT_VERSION = //' <$GVF)
34
+ else
35
+ VC=unset
36
+ fi
37
+ test "$VN" = "$VC" || {
38
+ echo >&2 "GIT_VERSION = $VN"
39
+ echo "GIT_VERSION = $VN" >$GVF
40
+ }
data/GNUmakefile ADDED
@@ -0,0 +1,8 @@
1
+ all::
2
+ RSYNC_DEST := bogomips.org:/srv/bogomips/regurgitator
3
+ rfproject := rainbows
4
+ rfpackage := regurgitator
5
+ include pkg.mk
6
+ all:: test-integration
7
+ test-integration:
8
+ $(MAKE) -C t
data/LATEST ADDED
@@ -0,0 +1 @@
1
+ Currently unreleased
data/LICENSE ADDED
@@ -0,0 +1,18 @@
1
+ regurgitator is copyrighted Free Software by all contributors, see logs
2
+ in revision control for names and email addresses of all of them.
3
+
4
+ You can redistribute it and/or modify it under the terms of the GNU
5
+ General Public License (GPL) as published by the Free Software
6
+ Foundation, version {2}[http://www.gnu.org/licenses/gpl-2.0.txt] or
7
+ or {3}[http://www.gnu.org/licenses/gpl-3.0.txt] (see link:COPYING).
8
+ The regurgitator project leader (Eric Wong) reserves the right to
9
+ relicense regurgitator under future versions of the GPL.
10
+
11
+ regurgitator is distributed in the hope that it will be useful, but
12
+ WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ General Public License for more details.
15
+
16
+ You should have received a copy of the GNU General Public License
17
+ along with this library; if not, write to the Free Software Foundation,
18
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
data/NEWS ADDED
@@ -0,0 +1 @@
1
+ No news yet.
data/README ADDED
@@ -0,0 +1,49 @@
1
+ = \Regurgitator - read-only Rack endpoints for MogileFS
2
+
3
+ \Regurgitator is an GPL-licensed library and Rack middleware for
4
+ serving files stored in MogileFS. It can be embedded inside
5
+ any existing Rack application or be used as a standalone Rack app.
6
+
7
+ \Regurgitator talks directly to the underlying RDBMS powering MogileFS
8
+ to avoid extra layers and latency. \Regurgitator only needs
9
+ read-only access to the RDBMS and can connect to slave databases.
10
+
11
+ == Rack Endpoints
12
+
13
+ \Regurgitator comes with your choice of three standard Rack endpoints for
14
+ serving files stored in MogileFS. Pick the one best suited for your
15
+ application:
16
+
17
+ * {DomainPath}[link:Regurgitator/DomainPath.html]
18
+ * {DomainHost}[link:Regurgitator/DomainHost.html]
19
+ * {OneDomain}[link:Regurgitator/OneDomain.html]
20
+
21
+ It should also be easy to roll your own based on your needs by reusing
22
+ our library code.
23
+
24
+ == Hacking
25
+
26
+ You can get the latest source via git from the following locations:
27
+
28
+ git://bogomips.org/regurgitator.git
29
+ git://repo.or.cz/regurgitator.git (mirror)
30
+
31
+ You may browse the code from the web and download the latest snapshot
32
+ tarballs here:
33
+
34
+ * http://bogomips.org/regurgitator.git (cgit)
35
+ * http://repo.or.cz/w/regurgitator.git (gitweb)
36
+
37
+ Inline patches (from "git format-patch") to the mailing list are
38
+ preferred because they allow code review and comments in the reply to
39
+ the patch.
40
+
41
+ We will adhere to mostly the same conventions for patch submissions as
42
+ git itself. See the Documentation/SubmittingPatches document
43
+ distributed with git on on patch submission guidelines to follow. Just
44
+ don't email the git mailing list or maintainer with \Regurgitator patches.
45
+
46
+ == Contact
47
+
48
+ All feedback (bug reports, user/development discussion, patches, pull
49
+ requests) go to the mailing list: mailto:barfs@librelist.org
@@ -0,0 +1,6 @@
1
+ require 'regurgitator'
2
+ db = Sequel.connect 'mysql2://user@example.com/mogilefs'
3
+ use Rack::ContentLength
4
+ use Rack::ContentType, 'text/plain'
5
+ err = lambda { |env| [ 404, [], [] ] }
6
+ run Regurgitator::DomainHost.new(err, :db => db, :suffix => '.example.com')
@@ -0,0 +1,5 @@
1
+ require 'regurgitator'
2
+ db = Sequel.connect 'mysql2://user@example.com/mogilefs'
3
+ use Rack::ContentLength
4
+ use Rack::ContentType, 'text/plain'
5
+ run Regurgitator::DomainPath.new(lambda { |env| [ 404, [], [] ] }, db)
@@ -0,0 +1,6 @@
1
+ require 'regurgitator'
2
+ db = Sequel.connect 'mysql2://user@example.com/mogilefs'
3
+ use Rack::ContentLength
4
+ use Rack::ContentType, 'text/plain'
5
+ err = lambda { |env| [ 404, [], [] ] }
6
+ run Regurgitator::OneDomain.new(err, :db => db, :domain => 'my.domain')
@@ -0,0 +1,43 @@
1
+ # -*- encoding: binary -*-
2
+ require 'uri'
3
+ require 'rack'
4
+ require 'thread'
5
+ require 'sequel'
6
+ require 'http_spew'
7
+ require 'rpatricia'
8
+
9
+ # The Regurgitator main module, serving as a namespace for all
10
+ # modules and classes.
11
+ #
12
+ # All modules meant for use in applications are autoload-ed,
13
+ # so just "require 'regurgitator'" in your code.
14
+ module Regurgitator
15
+
16
+ # This HTTP header is read to determine a location to reproxy to.
17
+ # Add this header in your nginx config.
18
+ REPROXY_KEY = 'HTTP_X_REPROXY_PATH'
19
+
20
+ autoload :ServerSettings, 'regurgitator/server_settings'
21
+ autoload :Domain, 'regurgitator/domain'
22
+ autoload :Device, 'regurgitator/device'
23
+ autoload :FileInfo, 'regurgitator/file_info'
24
+ autoload :Endpoint, 'regurgitator/endpoint'
25
+ autoload :FileRequest, 'regurgitator/file_request'
26
+ autoload :Local, 'regurgitator/local'
27
+ autoload :LocalFile, 'regurgitator/local_file'
28
+
29
+ # Rack middlewares/apps
30
+ autoload :DomainPath, 'regurgitator/domain_path'
31
+ autoload :DomainHost, 'regurgitator/domain_host'
32
+ autoload :OneDomain, 'regurgitator/one_domain'
33
+
34
+ # used to wrap up all Regurgitator-specific extensions
35
+ class Error < StandardError; end
36
+
37
+ # raised when there are no readable devices
38
+ class NoDevices < Error; end
39
+
40
+ # raised by FileRequest when the HTTP status code
41
+ # is outside of (200-299, 304)
42
+ class BadResponse < Error; end
43
+ end
@@ -0,0 +1,79 @@
1
+ # -*- encoding: binary -*-
2
+ # helpers for device lookups
3
+ module Regurgitator::Device
4
+ include Regurgitator::ServerSettings
5
+
6
+ # :stopdoc:
7
+ REFRESH_DEVICE = <<-EOS
8
+ SELECT d.devid, h.hostip, h.altip, h.altmask, h.http_port, h.http_get_port
9
+ FROM device d
10
+ LEFT JOIN host h ON d.hostid = h.hostid
11
+ WHERE d.status IN ('readonly','alive','drain') AND h.status = 'alive'
12
+ EOS
13
+
14
+ DEVICES_ON = 'SELECT devid FROM file_on WHERE fid = ?'
15
+ # :startdoc:
16
+
17
+ def initialize(*args) # :nodoc:
18
+ super
19
+ init_device_ivars
20
+ end
21
+
22
+ def init_device_ivars
23
+ @device_cache_mtime = Time.at(0)
24
+ @device_cache = nil
25
+ @device_cache_lock = Mutex.new
26
+ end
27
+
28
+ def self.extended(obj)
29
+ obj.init_device_ivars
30
+ end
31
+
32
+ def device_uri_pair!(opts, get_port)
33
+ rv = [ URI::HTTP.build(opts) ]
34
+ if get_port
35
+ opts[:port] = get_port
36
+ rv << URI::HTTP.build(opts)
37
+ end
38
+ rv
39
+ end
40
+
41
+ # Returns a hash of device info with the Integer +devid+
42
+ # as the hash key.
43
+ def refresh_device(force = false) # :nodoc:
44
+ @device_cache_lock.synchronize { refresh_device_unlocked(force) }
45
+ end
46
+
47
+ def refresh_device_unlocked(force) # :nodoc:
48
+ return @device_cache if ! force && ((Time.now - @device_cache_mtime) < 60)
49
+ tmp = {}.compare_by_identity
50
+ refresh_zone(force)
51
+ @db[REFRESH_DEVICE].each do |x|
52
+ # devices in "drain" status may hit raciness try those as a last resort
53
+ x[:preferred] = !!(x[:d_status] =~ %r{\A(?:readonly|alive)\z})
54
+ hostip = x[:hostip]
55
+ port = x[:http_port] || 80
56
+ get_port = x[:http_get_port]
57
+ x[:ipaddr] = { :hostip => hostip }
58
+ x[:zone] = zone_for(hostip)
59
+ devid = x[:devid]
60
+ o = { :path => "/dev#{devid}", :port => port, :host => hostip }
61
+
62
+ x[:uris] = { :pri => device_uri_pair!(o, get_port) }
63
+ x[:ipaddr][:altmask] = pat = Patricia.new
64
+ altmask = x[:altmask] and pat.add(altmask)
65
+
66
+ if altip = x[:altip]
67
+ x[:ipaddr][:altip] = altip
68
+ o[:host] = altip
69
+ o[:port] = port
70
+ x[:uris][:alt] = device_uri_pair!(o, get_port)
71
+ end
72
+
73
+ tmp[devid] = x
74
+ end
75
+ Regurgitator::Local.refresh_addrs!
76
+ @device_cache_mtime = Time.now
77
+ @device_cache = tmp
78
+ end
79
+ end
@@ -0,0 +1,37 @@
1
+ # -*- encoding: binary -*-
2
+ # helpers for domain lookups
3
+ module Regurgitator::Domain
4
+
5
+ REFRESH_DOMAIN = 'SELECT dmid,namespace FROM domain' # :nodoc:
6
+
7
+ def initialize(*args) # :nodoc:
8
+ super
9
+ @domain_lock = Mutex.new
10
+ @domain_cache_mtime = Time.at(0)
11
+ @domain_cache = nil
12
+ end
13
+
14
+ # returns the +dmid+ (domain identifier/primary key) for a string +domain+
15
+ def get_dmid(domain)
16
+ rv = refresh_domain[domain] and return rv
17
+ return false if false == rv
18
+ rv = refresh_domain(true)[domain] and return rv
19
+ @domain_lock.synchronize { @domain_cache[domain] = false }
20
+ end
21
+
22
+ # We cache the list of all available domains in memory, this shouldn't
23
+ # be too huge, though...
24
+ #
25
+ # Returns a hash with string namespaces as keys and dmids as values
26
+ def refresh_domain(force = false) # :nodoc:
27
+ @domain_lock.synchronize { refresh_domain_unlocked(force) }
28
+ end
29
+
30
+ def refresh_domain_unlocked(force) # :nodoc:
31
+ return @domain_cache if ! force && ((Time.now - @domain_cache_mtime) < 60)
32
+ tmp = {}
33
+ @db[REFRESH_DOMAIN].each { |x| tmp[x[:namespace].freeze] = x[:dmid] }
34
+ @domain_cache_mtime = Time.now
35
+ @domain_cache = tmp
36
+ end
37
+ end
@@ -0,0 +1,43 @@
1
+ # -*- encoding: binary -*-
2
+
3
+ # Matches GET and HEAD requests in the format of ":domain.example.com/:dkey"
4
+ # where domain is read from the Host: header.
5
+ #
6
+ # If a client were to make a request to "http://foo.example.com/bar",
7
+ # this endpoint would respond with the file from the "foo" domain
8
+ # with the key "bar".
9
+ # To use as middleware:
10
+ # require 'regurgitator'
11
+ # db = Sequel.connect('mysql2://user@example.com/mogilefs')
12
+ # use Regurgitator::DomainHost, :suffix => '.example.com', :db => db
13
+ #
14
+ # See the {domain_host.ru}[link:examples/domain_host.ru]
15
+ # example for a standalone app.
16
+
17
+ class Regurgitator::DomainHost
18
+ include Regurgitator::Endpoint
19
+
20
+ def initialize(app, opts) # :nodoc:
21
+ case suffix = opts[:suffix]
22
+ when String
23
+ suffix = Regexp.quote(suffix)
24
+ suffix = %r{\A(.*)#{suffix}\z}
25
+ when Regexp
26
+ else
27
+ raise TypeError, ":suffix must be a String or Regexp #{suffix.inspect}"
28
+ end
29
+ @domain_regexp = suffix
30
+ super(app, opts[:db])
31
+ end
32
+
33
+ def call(env) # :nodoc:
34
+ case env['REQUEST_METHOD']
35
+ when 'GET', 'HEAD'
36
+ host = env['HTTP_HOST'] or return @app.call(env)
37
+ domain = @domain_regexp =~ host ? $1 : host
38
+ serve_file(env, domain, env['PATH_INFO'][1..-1])
39
+ else
40
+ @app.call(env)
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,33 @@
1
+ # -*- encoding: binary -*-
2
+
3
+ # matches GET and HEAD requests in the format of "/:namespace/:dkey"
4
+ # where +:dkey+ may contain multiple slashes
5
+ #
6
+ # If a client were to make a request to "http://example.com/d/foo/bar",
7
+ # this endpoint would respond with the file from the "d" domain
8
+ # with the key "foo/bar".
9
+ #
10
+ # To use as middleware:
11
+ # require 'regurgitator'
12
+ # db = Sequel.connect('mysql2://user@example.com/mogilefs')
13
+ # use Regurgitator::DomainPath, db
14
+ #
15
+ # See the {domain_path.ru}[link:examples/domain_host.ru]
16
+ # example for a standalone app.
17
+ class Regurgitator::DomainPath
18
+ include Regurgitator::Endpoint
19
+
20
+ def call(env) # :nodoc:
21
+ case env['REQUEST_METHOD']
22
+ when 'GET', 'HEAD'
23
+ env['PATH_INFO'] =~ %r{\A/([^/]+)/(.+)\z} or return @app.call(env)
24
+ serve_file(env, $1, $2)
25
+ else
26
+ @app.call(env)
27
+ end
28
+ end
29
+
30
+ def initialize(app, db) # :nodoc:
31
+ super(app, Hash === db ? db[:db] : db)
32
+ end
33
+ end
@@ -0,0 +1,97 @@
1
+ # -*- encoding: binary -*-
2
+ require "socket"
3
+ module Regurgitator::Endpoint
4
+ include Regurgitator::FileInfo
5
+ include Rack::Utils
6
+ include Rack::Mime
7
+
8
+ REPROXY_KEY = Regurgitator::REPROXY_KEY # :nodoc:
9
+
10
+ def initialize(app, db)
11
+ @app = app
12
+ @db = db
13
+ super
14
+ end
15
+
16
+ def best_addr(env)
17
+ env.include?(REPROXY_KEY) ? env['REMOTE_ADDR'] : Regurgitator::Local.addr
18
+ end
19
+
20
+ # see nginx config examples for reproxy
21
+ def reproxy_path(env)
22
+ env[REPROXY_KEY]
23
+ end
24
+
25
+ # allow users to specify desired "Save As" filenames in the query string
26
+ def filename!(env, headers, dkey)
27
+ params = parse_query(env['QUERY_STRING'])
28
+ cd = nil
29
+ if fn = params['inline'] and escape(fn) == fn
30
+ cd, dkey = 'inline', fn
31
+ elsif fn = (params['attachment'] || params['filename']) and
32
+ escape(fn) == fn
33
+ cd, dkey = 'attachment', fn
34
+ end
35
+ headers['Content-Disposition'] = "#{cd}; filename=#{fn}" if cd
36
+ headers['Content-Type'] = mime_type(File.extname(dkey))
37
+ end
38
+
39
+ def empty_file(env, dkey)
40
+ headers = { 'Content-Length' => '0' }
41
+ filename!(env, headers, dkey)
42
+ [ 200, headers, [] ]
43
+ end
44
+
45
+ def serve_file(env, domain, dkey)
46
+ env['regurgitator.zone'] ||= best_addr(env)
47
+ info = file_info(env, domain, dkey) or return @app.call(env)
48
+ serve_info(env, info, dkey)
49
+ end
50
+
51
+ def serve_info(env, info, dkey)
52
+ info[:length] == 0 and return empty_file(env, dkey)
53
+ zone_uris = info[:uris]
54
+
55
+ zone = env['regurgitator.zone']
56
+ begin
57
+ uris = zone_uris.delete(zone)
58
+ uris ||= zone_uris.delete(zone_for(Regurgitator::Local.addr))
59
+
60
+ # no nearby zones? try all of them at once!
61
+ if uris.nil?
62
+ uris = []
63
+ zone_uris.each_value { |v| uris.concat v }
64
+ zone_uris.clear
65
+ end
66
+ uris or next
67
+
68
+ status, headers, body = r = Regurgitator::FileRequest.run(env, uris)
69
+ filename!(env, headers, dkey)
70
+ headers['ETag'] = %("#{info[:fid]}")
71
+
72
+ case env["REQUEST_METHOD"]
73
+ when "GET"
74
+ if body.respond_to?(:uri) && path = reproxy_path(env)
75
+ body.close if body.respond_to?(:close)
76
+ headers['Content-Length'] = '0'
77
+ headers['Location'] = body.uri.to_s
78
+ headers['X-Accel-Redirect'] = path
79
+ # yes we violate Rack::Lint here
80
+ %w(Content-Type Last-Modified).each do |key|
81
+ headers["X-Reproxy-#{key}"] = headers.delete(key)
82
+ end
83
+ return [ 302, headers, [] ]
84
+ end
85
+ return r
86
+ when "HEAD"
87
+ body.close if body.respond_to?(:close)
88
+ return [ status, headers, [] ]
89
+ else
90
+ abort "Unexpected REQUEST_METHOD=#{env['REQUEST_METHOD']}"
91
+ end
92
+ rescue Regurgitator::NoDevices
93
+ retry unless zone_uris.empty?
94
+ end until zone_uris.empty?
95
+ @app.call(env)
96
+ end
97
+ end