regurgitator 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,53 @@
1
+ # -*- encoding: binary -*-
2
+ module Regurgitator::FileInfo
3
+ include Regurgitator::Domain
4
+ include Regurgitator::Device
5
+
6
+ # :stopdoc:
7
+ FILE_INFO = 'SELECT fid,length FROM file WHERE dmid = ? AND dkey = ? LIMIT 1'
8
+ # :startdoc:
9
+
10
+ # returns a hash with file information and URIs for accessing the file:
11
+ # {
12
+ # :fid => 3149113,
13
+ # :length => 17,
14
+ # :uris => {
15
+ # "east" => [ [read_write,read_only], [read_write,read_only] ],
16
+ # "west" => [ [read_write,read_only], [read_write,read_only] ],
17
+ # nil => [ [read_write,read_only], [read_write,read_only] ],
18
+ # }
19
+ # }
20
+ def file_info(env, domain, dkey, update = false)
21
+ dmid = get_dmid(domain) or return
22
+ devices = refresh_device(update)
23
+ info = @db[FILE_INFO, dmid, dkey].first or return
24
+ 0 == info[:length] and return info
25
+ fid = info[:fid]
26
+ read_uris = Hash.new { |h,k| h[k] = [] }
27
+ drain_uris = Hash.new { |h,k| h[k] = [] }
28
+
29
+ zone = env['regurgitator.zone']
30
+
31
+ @db[DEVICES_ON, fid].each do |x|
32
+ devinfo = devices[x[:devid]] or next
33
+ fid >= 10_000_000_000 and next # TODO: support larger FIDs
34
+
35
+ uris = devinfo[:uris][alt_zone?(zone, devinfo) ? :alt : :pri]
36
+ nfid = sprintf('%010u', fid)
37
+ /\A(\d)(\d{3})(\d{3})(?:\d{3})\z/ =~ nfid
38
+ fid_path = "/#$1/#$2/#$3/#{nfid}.fid"
39
+ uris = uris.map do |u|
40
+ u = u.dup
41
+ u.path += fid_path # must be a copy
42
+ u
43
+ end
44
+ (devinfo[:preferred] ? read_uris : drain_uris)[devinfo[:zone]] << uris
45
+ end
46
+ uris = info[:uris] = read_uris.empty? ? drain_uris : read_uris
47
+ (uris.empty? && ! update) ? file_info(env, domain, dkey, true) : info
48
+ end
49
+
50
+ def alt_zone?(zone, devinfo)
51
+ zone == :alt || devinfo[:ipaddr][:altmask].include?(zone)
52
+ end
53
+ end
@@ -0,0 +1,48 @@
1
+ # -*- encoding: binary -*-
2
+
3
+ # Opens a IO.select-able object to read a (potentially large) file from a
4
+ # backend. We don't bother with keepalive since we want send buffers
5
+ # to be empty, so make sure you have enough local ports. You may want
6
+ # to enable TIME-WAIT reuse/recycling in your TCP stack if your
7
+ # network supports it. This object is used as a Rack response
8
+ # body.
9
+ class Regurgitator::FileRequest < HTTP_Spew::Request
10
+
11
+ # used for doing redirects or reproxying with nginx
12
+ attr_reader :uri
13
+
14
+ extend Regurgitator::Local
15
+
16
+ # :stopdoc:
17
+ USER_AGENT = name.dup
18
+ PASS_HEADERS = %w(Range If-Modified-Since Accept-Encoding)
19
+ PASS_HEADERS.map! { |x| "HTTP_#{x.tr!('a-z-', 'A-Z_')}".freeze }
20
+ # :startdoc:
21
+
22
+ # +uris+ may be an array, read-only URIs are at the end
23
+ def initialize(env, uris)
24
+ uri = uris[-1] # a subclass of this may call multiple URIs
25
+ addr = Socket.pack_sockaddr_in(uri.port, uri.host)
26
+ req = {
27
+ "REQUEST_URI" => uri.request_uri,
28
+ "HTTP_HOST" => "#{uri.host}:#{uri.port}",
29
+ }.merge!(env)
30
+ @uri = uri
31
+ super req, nil, addr
32
+ end
33
+
34
+ def self.run(env, uri_group, timeout = env["regurgitator.timeout"] || 5)
35
+ local_file = trylocal(env, uri_group) and return local_file.response
36
+
37
+ req = {
38
+ "REQUEST_METHOD" => env["REQUEST_METHOD"],
39
+ "HTTP_USER_AGENT" => USER_AGENT,
40
+ }
41
+ PASS_HEADERS.each { |k| pass = env[k] and req[k] = pass }
42
+ tmp = HTTP_Spew.wait(1, uri_group.map { |uris| new(req, uris) }, timeout)
43
+ tmp.delete_if { |t| t.error }
44
+ ready = tmp.shift or raise Regurgitator::NoDevices, "no readable devices"
45
+ tmp.each { |t| t.close }
46
+ ready.response
47
+ end
48
+ end
@@ -0,0 +1,89 @@
1
+ # -*- encoding: binary -*-
2
+ require "socket"
3
+ require "set"
4
+
5
+ module Regurgitator::Local
6
+
7
+ # This is used to register local storage endpoints so we can
8
+ # short-circuit and avoid making HTTP requests to devices
9
+ # Must be configured by the user
10
+ STORE_PATHS = Hash.new do |h,port|
11
+ h[port] = {} # key: directory root path, value: whatever...
12
+ end.compare_by_identity
13
+
14
+ @local_addrs = {}
15
+ @local_addrs_lock = Mutex.new
16
+
17
+ def self.include?(addr)
18
+ @local_addrs_lock.synchronize { @local_addrs.include?(addr) }
19
+ end
20
+
21
+ def self.addr
22
+ (@local_addrs_lock.synchronize { @local_addrs.first })[0]
23
+ end
24
+
25
+ def self.addrs
26
+ @local_addrs_lock.synchronize { @local_addrs.keys }
27
+ end
28
+
29
+ # Normally not needed unless you dynamically bring up/down network devices.
30
+ # It may be useful to call this periodically or to integrate with
31
+ # some network device state monitoring system.
32
+ def self.refresh_addrs!
33
+ @local_addrs_lock.synchronize do
34
+ tmp = {}
35
+ Socket.ip_address_list.keep_if do |ip|
36
+ ip.ipv4? && ! ip.ipv4_multicast?
37
+ end.each { |ip| tmp[ip.ip_address.freeze] = true }
38
+ @local_addrs = tmp
39
+ end
40
+ end
41
+ refresh_addrs!
42
+
43
+ # registers a local path for a given +tcp_port+ and +directory_root+
44
+ def self.register(tcp_port, directory_root)
45
+ directory_root = directory_root.gsub(%r{/+\z}, "")
46
+ dev_dirs = Dir.foreach(directory_root).grep(/\Adev\d+\z/)
47
+ raise ArgumentError, 'no /dev\d+/ directories found' if dev_dirs.empty?
48
+ STORE_PATHS[tcp_port][directory_root] = Set.new(dev_dirs)
49
+ end
50
+
51
+ # returns +nil+ if nothing was found
52
+ # returns a path and associated File::Stat to the local FS if a
53
+ # matching path is possible
54
+ def device_path_stat(uri)
55
+ Regurgitator::Local.include?(uri.host) or return
56
+ (roots = STORE_PATHS[uri.port]).empty? and return
57
+ uri.path =~ %r{\A/(dev\d+)/} or
58
+ abort "BUG: path needs to match '\\A/dev\d+/' (#{uri})"
59
+ devN = $1
60
+ rv = nil
61
+ roots.each do |root, dev_dirs|
62
+ dev_dirs.include?(devN) or next
63
+ begin
64
+ path = "#{root}#{uri.path}"
65
+ stat = File.stat(path)
66
+
67
+ # multiple "/devN" paths for the same ports would be ambiguous,
68
+ # so we cannot optimize away the HTTP request for those
69
+ return nil if rv
70
+
71
+ rv = [ path, stat ]
72
+ rescue => e
73
+ warn "E: #{e.message}, root=#{root} failed for #{uri}"
74
+ end
75
+ end
76
+
77
+ rv
78
+ end
79
+
80
+ def trylocal(env, uri_group)
81
+ STORE_PATHS.empty? and return
82
+ uri_group.flatten.each do |uri|
83
+ path_stat = device_path_stat(uri) or next
84
+ path, stat = path_stat
85
+ return Regurgitator::LocalFile.new(env, path, uri, stat)
86
+ end
87
+ nil
88
+ end
89
+ end
@@ -0,0 +1,61 @@
1
+ # -*- encoding: binary -*-
2
+ require "time"
3
+
4
+ # used to serve local files, bypassing HTTP to the backend device
5
+ class Regurgitator::LocalFile
6
+
7
+ # may be called by some Rack web servers to use sendfile(2)
8
+ attr_reader :to_path
9
+
10
+ # used to provide redirects to clients
11
+ attr_reader :uri
12
+
13
+ # Stores the Rack response (a 3-element Array) on success to simulate
14
+ # HTTP_Spew::Request objects
15
+ attr_reader :response
16
+
17
+ def initialize(env, path, uri, stat)
18
+ if modified_since = env["HTTP_IF_MODIFIED_SINCE"]
19
+ modified_since?(modified_since, stat) or return
20
+ end
21
+
22
+ @response = [ 200, {
23
+ "Content-Type" => "application/octet-stream", # always ".fid"
24
+ "Content-Length" => stat.size.to_s,
25
+ "Last-Modified" => stat.mtime.httpdate,
26
+ } ]
27
+ case env["REQUEST_METHOD"]
28
+ when "GET"
29
+ @to_path, @uri = path, uri
30
+ @response << self
31
+ when "HEAD"
32
+ @response << []
33
+ else
34
+ abort "Unexpected REQUEST_METHOD=#{env['REQUEST_METHOD']}"
35
+ end
36
+ end
37
+
38
+ # normal Rack HTTP server endpoint, used if the server can't handle
39
+ # +to_path+
40
+ def each
41
+ buf = ""
42
+ File.open(@to_path) do |fp|
43
+ while fp.read(0x4000, buf)
44
+ yield buf
45
+ end
46
+ end
47
+ end
48
+
49
+ def modified_since?(modified_since, stat)
50
+ begin
51
+ modified_since = Time.httpdate(modified_since)
52
+ rescue ArgumentError
53
+ h = { "Content-Type" => "text/plain", "Content-Length" => "0" }
54
+ @response = [ 400, h, [] ]
55
+ return false
56
+ end
57
+ stat.mtime > modified_since and return true
58
+ @response = [ 304, {}, [] ]
59
+ false
60
+ end
61
+ end
@@ -0,0 +1,33 @@
1
+ # -*- encoding: binary -*-
2
+
3
+ # matches GET and HEAD requests in the format of "/:dkey" from a
4
+ # single, preconfigured domain.
5
+ #
6
+ # If a client were to make a request to "http://example.com/bar",
7
+ # this endpoint would respond with the file with the key "bar"
8
+ # from the preconfigured domain.
9
+ #
10
+ # To use as middleware:
11
+ # require 'regurgitator'
12
+ # db = Sequel.connect('mysql2://user@example.com/mogilefs')
13
+ # use Regurgitator::OneDomain, :domain => 'foo', :db => db
14
+ #
15
+ # See the {one_domain.ru}[link:examples/one_domain.ru]
16
+ # example for a standalone app.
17
+ class Regurgitator::OneDomain
18
+ include Regurgitator::Endpoint
19
+
20
+ def initialize(app, opts) # :nodoc:
21
+ @domain = opts[:domain]
22
+ super(app, opts[:db])
23
+ end
24
+
25
+ def call(env) # :nodoc:
26
+ case env['REQUEST_METHOD']
27
+ when 'GET', 'HEAD'
28
+ serve_file(env, @domain, env['PATH_INFO'][1..-1])
29
+ else
30
+ @app.call(env)
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,55 @@
1
+ # -*- encoding: binary -*-
2
+ # helpers for server_settings lookups
3
+ module Regurgitator::ServerSettings
4
+
5
+ # :stopdoc:
6
+ SETTINGS_LOOKUP = 'SELECT value FROM server_settings WHERE field = ? LIMIT 1'
7
+ # :startdoc:
8
+
9
+ def initialize(*args) # :nodoc:
10
+ super
11
+ init_zone_ivars
12
+ end
13
+
14
+ def init_zone_ivars
15
+ @zone_cache_mtime = Time.at(0)
16
+ @zone_cache = nil
17
+ @zone_cache_lock = Mutex.new
18
+ end
19
+
20
+ def self.extended(obj)
21
+ obj.init_zone_ivars
22
+ end
23
+
24
+ def refresh_zone(force = false) # :nodoc:
25
+ @zone_cache_lock.synchronize { refresh_zone_unlocked(force) }
26
+ end
27
+
28
+ def refresh_zone_unlocked(force)
29
+ return @zone_cache if ! force && ((Time.now - @zone_cache_mtime) < 60)
30
+ tmp = Patricia.new
31
+ begin
32
+ row = @db[SETTINGS_LOOKUP, 'network_zones'].first or return tmp
33
+ row[:value].split(/\s*,\s*/).each do |zone|
34
+ row = @db[SETTINGS_LOOKUP, "zone_#{zone}"].first or next
35
+ begin
36
+ tmp.add(row[:value], zone)
37
+ rescue ArgumentError
38
+ end
39
+ end
40
+ ensure
41
+ @zone_cache_mtime = Time.now
42
+ return @zone_cache = tmp
43
+ end
44
+ end
45
+
46
+ # If +skip_local+ is true, it may return +:local+ if +addr+ is the address
47
+ # of the host this method is running on
48
+ # Returns the zone (a String) for a given +addr+, if there is one
49
+ # Returns +nil+ if zone information is unknown.
50
+ def zone_for(addr, skip_local = false)
51
+ return :local if skip_local && Regurgitator::Local.include?(addr)
52
+ zone = refresh_zone.search_best(addr) and return zone.data # String
53
+ nil
54
+ end
55
+ end
data/pkg.mk ADDED
@@ -0,0 +1,175 @@
1
+ RUBY = ruby
2
+ RAKE = rake
3
+ RSYNC = rsync
4
+ WRONGDOC = wrongdoc
5
+
6
+ GIT-VERSION-FILE: .FORCE-GIT-VERSION-FILE
7
+ @./GIT-VERSION-GEN
8
+ -include GIT-VERSION-FILE
9
+ -include local.mk
10
+ DLEXT := $(shell $(RUBY) -rrbconfig -e 'puts RbConfig::CONFIG["DLEXT"]')
11
+ RUBY_VERSION := $(shell $(RUBY) -e 'puts RUBY_VERSION')
12
+ RUBY_ENGINE := $(shell $(RUBY) -e 'puts((RUBY_ENGINE rescue "ruby"))')
13
+ lib := lib
14
+
15
+ ifeq ($(shell test -f script/isolate_for_tests && echo t),t)
16
+ isolate_libs := tmp/isolate/$(RUBY_ENGINE)-$(RUBY_VERSION)/isolate.mk
17
+ $(isolate_libs): script/isolate_for_tests
18
+ @$(RUBY) script/isolate_for_tests
19
+ -include $(isolate_libs)
20
+ lib := $(lib):$(ISOLATE_LIBS)
21
+ endif
22
+
23
+ ext := $(firstword $(wildcard ext/*))
24
+ ifneq ($(ext),)
25
+ ext_pfx := tmp/ext/$(RUBY_ENGINE)-$(RUBY_VERSION)
26
+ ext_h := $(wildcard $(ext)/*/*.h $(ext)/*.h)
27
+ ext_src := $(wildcard $(ext)/*.c $(ext_h))
28
+ ext_pfx_src := $(addprefix $(ext_pfx)/,$(ext_src))
29
+ ext_d := $(ext_pfx)/$(ext)/.d
30
+ $(ext)/extconf.rb: $(wildcard $(ext)/*.h)
31
+ @>> $@
32
+ $(ext_d):
33
+ @mkdir -p $(@D)
34
+ @> $@
35
+ $(ext_pfx)/$(ext)/%: $(ext)/% $(ext_d)
36
+ install -m 644 $< $@
37
+ $(ext_pfx)/$(ext)/Makefile: $(ext)/extconf.rb $(ext_d) $(ext_h)
38
+ $(RM) -f $(@D)/*.o
39
+ cd $(@D) && $(RUBY) $(CURDIR)/$(ext)/extconf.rb
40
+ ext_sfx := _ext.$(DLEXT)
41
+ ext_dl := $(ext_pfx)/$(ext)/$(notdir $(ext)_ext.$(DLEXT))
42
+ $(ext_dl): $(ext_src) $(ext_pfx_src) $(ext_pfx)/$(ext)/Makefile
43
+ @echo $^ == $@
44
+ $(MAKE) -C $(@D)
45
+ lib := $(lib):$(ext_pfx)/$(ext)
46
+ build: $(ext_dl)
47
+ else
48
+ build:
49
+ endif
50
+
51
+ pkg_extra += GIT-VERSION-FILE NEWS ChangeLog LATEST
52
+ ChangeLog: GIT-VERSION-FILE .wrongdoc.yml
53
+ $(WRONGDOC) prepare
54
+ NEWS LATEST: ChangeLog
55
+
56
+ manifest:
57
+ $(RM) .manifest
58
+ $(MAKE) .manifest
59
+
60
+ .manifest: $(pkg_extra)
61
+ (git ls-files && for i in $@ $(pkg_extra); do echo $$i; done) | \
62
+ LC_ALL=C sort > $@+
63
+ cmp $@+ $@ || mv $@+ $@
64
+ $(RM) $@+
65
+
66
+ doc:: .document .wrongdoc.yml $(pkg_extra)
67
+ -find lib -type f -name '*.rbc' -exec rm -f '{}' ';'
68
+ -find ext -type f -name '*.rbc' -exec rm -f '{}' ';'
69
+ $(RM) -r doc
70
+ $(WRONGDOC) all
71
+ install -m644 COPYING doc/COPYING
72
+ install -m644 $(shell grep '^[A-Z]' .document) doc/
73
+
74
+ ifneq ($(VERSION),)
75
+ pkggem := pkg/$(rfpackage)-$(VERSION).gem
76
+ pkgtgz := pkg/$(rfpackage)-$(VERSION).tgz
77
+ release_notes := release_notes-$(VERSION)
78
+ release_changes := release_changes-$(VERSION)
79
+
80
+ release-notes: $(release_notes)
81
+ release-changes: $(release_changes)
82
+ $(release_changes):
83
+ $(WRONGDOC) release_changes > $@+
84
+ $(VISUAL) $@+ && test -s $@+ && mv $@+ $@
85
+ $(release_notes):
86
+ $(WRONGDOC) release_notes > $@+
87
+ $(VISUAL) $@+ && test -s $@+ && mv $@+ $@
88
+
89
+ # ensures we're actually on the tagged $(VERSION), only used for release
90
+ verify:
91
+ test x"$(shell umask)" = x0022
92
+ git rev-parse --verify refs/tags/v$(VERSION)^{}
93
+ git diff-index --quiet HEAD^0
94
+ test $$(git rev-parse --verify HEAD^0) = \
95
+ $$(git rev-parse --verify refs/tags/v$(VERSION)^{})
96
+
97
+ fix-perms:
98
+ -git ls-tree -r HEAD | awk '/^100644 / {print $$NF}' | xargs chmod 644
99
+ -git ls-tree -r HEAD | awk '/^100755 / {print $$NF}' | xargs chmod 755
100
+
101
+ gem: $(pkggem)
102
+
103
+ install-gem: $(pkggem)
104
+ gem install $(CURDIR)/$<
105
+
106
+ $(pkggem): manifest fix-perms
107
+ gem build $(rfpackage).gemspec
108
+ mkdir -p pkg
109
+ mv $(@F) $@
110
+
111
+ $(pkgtgz): distdir = $(basename $@)
112
+ $(pkgtgz): HEAD = v$(VERSION)
113
+ $(pkgtgz): manifest fix-perms
114
+ @test -n "$(distdir)"
115
+ $(RM) -r $(distdir)
116
+ mkdir -p $(distdir)
117
+ tar cf - $$(cat .manifest) | (cd $(distdir) && tar xf -)
118
+ cd pkg && tar cf - $(basename $(@F)) | gzip -9 > $(@F)+
119
+ mv $@+ $@
120
+
121
+ package: $(pkgtgz) $(pkggem)
122
+
123
+ test-release:: verify package $(release_notes) $(release_changes)
124
+ # make tgz release on RubyForge
125
+ @echo rubyforge add_release -f \
126
+ -n $(release_notes) -a $(release_changes) \
127
+ $(rfproject) $(rfpackage) $(VERSION) $(pkgtgz)
128
+ @echo gem push $(pkggem)
129
+ @echo rubyforge add_file \
130
+ $(rfproject) $(rfpackage) $(VERSION) $(pkggem)
131
+ release:: verify package $(release_notes) $(release_changes)
132
+ # make tgz release on RubyForge
133
+ rubyforge add_release -f -n $(release_notes) -a $(release_changes) \
134
+ $(rfproject) $(rfpackage) $(VERSION) $(pkgtgz)
135
+ # push gem to RubyGems.org
136
+ gem push $(pkggem)
137
+ # in case of gem downloads from RubyForge releases page
138
+ rubyforge add_file \
139
+ $(rfproject) $(rfpackage) $(VERSION) $(pkggem)
140
+ else
141
+ gem install-gem: GIT-VERSION-FILE
142
+ $(MAKE) $@ VERSION=$(GIT_VERSION)
143
+ endif
144
+
145
+ all:: test
146
+ test_units := $(wildcard test/test_*.rb)
147
+ test: test-unit
148
+ test-unit: $(test_units)
149
+ $(test_units): build
150
+ $(RUBY) -I $(lib) $@ $(RUBY_TEST_OPTS)
151
+
152
+ # this requires GNU coreutils variants
153
+ ifneq ($(RSYNC_DEST),)
154
+ publish_doc:
155
+ -git set-file-times
156
+ $(MAKE) doc
157
+ find doc/images -type f | \
158
+ TZ=UTC xargs touch -d '1970-01-01 00:00:06' doc/rdoc.css
159
+ $(MAKE) doc_gz
160
+ $(RSYNC) -av doc/ $(RSYNC_DEST)/
161
+ git ls-files | xargs touch
162
+ endif
163
+
164
+ # Create gzip variants of the same timestamp as the original so nginx
165
+ # "gzip_static on" can serve the gzipped versions directly.
166
+ doc_gz: docs = $(shell find doc -type f ! -regex '^.*\.\(gif\|jpg\|png\|gz\)$$')
167
+ doc_gz:
168
+ for i in $(docs); do \
169
+ gzip --rsyncable -9 < $$i > $$i.gz; touch -r $$i $$i.gz; done
170
+ check-warnings:
171
+ @(for i in $$(git ls-files '*.rb'|grep -v '^setup\.rb$$'); \
172
+ do $(RUBY) -d -W2 -c $$i; done) | grep -v '^Syntax OK$$' || :
173
+
174
+ .PHONY: all .FORCE-GIT-VERSION-FILE doc test $(test_units) manifest
175
+ .PHONY: check-warnings