regurgitator 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,53 @@
1
+ # -*- encoding: binary -*-
2
+ module Regurgitator::FileInfo
3
+ include Regurgitator::Domain
4
+ include Regurgitator::Device
5
+
6
+ # :stopdoc:
7
+ FILE_INFO = 'SELECT fid,length FROM file WHERE dmid = ? AND dkey = ? LIMIT 1'
8
+ # :startdoc:
9
+
10
+ # returns a hash with file information and URIs for accessing the file:
11
+ # {
12
+ # :fid => 3149113,
13
+ # :length => 17,
14
+ # :uris => {
15
+ # "east" => [ [read_write,read_only], [read_write,read_only] ],
16
+ # "west" => [ [read_write,read_only], [read_write,read_only] ],
17
+ # nil => [ [read_write,read_only], [read_write,read_only] ],
18
+ # }
19
+ # }
20
+ def file_info(env, domain, dkey, update = false)
21
+ dmid = get_dmid(domain) or return
22
+ devices = refresh_device(update)
23
+ info = @db[FILE_INFO, dmid, dkey].first or return
24
+ 0 == info[:length] and return info
25
+ fid = info[:fid]
26
+ read_uris = Hash.new { |h,k| h[k] = [] }
27
+ drain_uris = Hash.new { |h,k| h[k] = [] }
28
+
29
+ zone = env['regurgitator.zone']
30
+
31
+ @db[DEVICES_ON, fid].each do |x|
32
+ devinfo = devices[x[:devid]] or next
33
+ fid >= 10_000_000_000 and next # TODO: support larger FIDs
34
+
35
+ uris = devinfo[:uris][alt_zone?(zone, devinfo) ? :alt : :pri]
36
+ nfid = sprintf('%010u', fid)
37
+ /\A(\d)(\d{3})(\d{3})(?:\d{3})\z/ =~ nfid
38
+ fid_path = "/#$1/#$2/#$3/#{nfid}.fid"
39
+ uris = uris.map do |u|
40
+ u = u.dup
41
+ u.path += fid_path # must be a copy
42
+ u
43
+ end
44
+ (devinfo[:preferred] ? read_uris : drain_uris)[devinfo[:zone]] << uris
45
+ end
46
+ uris = info[:uris] = read_uris.empty? ? drain_uris : read_uris
47
+ (uris.empty? && ! update) ? file_info(env, domain, dkey, true) : info
48
+ end
49
+
50
+ def alt_zone?(zone, devinfo)
51
+ zone == :alt || devinfo[:ipaddr][:altmask].include?(zone)
52
+ end
53
+ end
@@ -0,0 +1,48 @@
1
+ # -*- encoding: binary -*-
2
+
3
+ # Opens a IO.select-able object to read a (potentially large) file from a
4
+ # backend. We don't bother with keepalive since we want send buffers
5
+ # to be empty, so make sure you have enough local ports. You may want
6
+ # to enable TIME-WAIT reuse/recycling in your TCP stack if your
7
+ # network supports it. This object is used as a Rack response
8
+ # body.
9
+ class Regurgitator::FileRequest < HTTP_Spew::Request
10
+
11
+ # used for doing redirects or reproxying with nginx
12
+ attr_reader :uri
13
+
14
+ extend Regurgitator::Local
15
+
16
+ # :stopdoc:
17
+ USER_AGENT = name.dup
18
+ PASS_HEADERS = %w(Range If-Modified-Since Accept-Encoding)
19
+ PASS_HEADERS.map! { |x| "HTTP_#{x.tr!('a-z-', 'A-Z_')}".freeze }
20
+ # :startdoc:
21
+
22
+ # +uris+ may be an array, read-only URIs are at the end
23
+ def initialize(env, uris)
24
+ uri = uris[-1] # a subclass of this may call multiple URIs
25
+ addr = Socket.pack_sockaddr_in(uri.port, uri.host)
26
+ req = {
27
+ "REQUEST_URI" => uri.request_uri,
28
+ "HTTP_HOST" => "#{uri.host}:#{uri.port}",
29
+ }.merge!(env)
30
+ @uri = uri
31
+ super req, nil, addr
32
+ end
33
+
34
+ def self.run(env, uri_group, timeout = env["regurgitator.timeout"] || 5)
35
+ local_file = trylocal(env, uri_group) and return local_file.response
36
+
37
+ req = {
38
+ "REQUEST_METHOD" => env["REQUEST_METHOD"],
39
+ "HTTP_USER_AGENT" => USER_AGENT,
40
+ }
41
+ PASS_HEADERS.each { |k| pass = env[k] and req[k] = pass }
42
+ tmp = HTTP_Spew.wait(1, uri_group.map { |uris| new(req, uris) }, timeout)
43
+ tmp.delete_if { |t| t.error }
44
+ ready = tmp.shift or raise Regurgitator::NoDevices, "no readable devices"
45
+ tmp.each { |t| t.close }
46
+ ready.response
47
+ end
48
+ end
@@ -0,0 +1,89 @@
1
+ # -*- encoding: binary -*-
2
+ require "socket"
3
+ require "set"
4
+
5
+ module Regurgitator::Local
6
+
7
+ # This is used to register local storage endpoints so we can
8
+ # short-circuit and avoid making HTTP requests to devices
9
+ # Must be configured by the user
10
+ STORE_PATHS = Hash.new do |h,port|
11
+ h[port] = {} # key: directory root path, value: whatever...
12
+ end.compare_by_identity
13
+
14
+ @local_addrs = {}
15
+ @local_addrs_lock = Mutex.new
16
+
17
+ def self.include?(addr)
18
+ @local_addrs_lock.synchronize { @local_addrs.include?(addr) }
19
+ end
20
+
21
+ def self.addr
22
+ (@local_addrs_lock.synchronize { @local_addrs.first })[0]
23
+ end
24
+
25
+ def self.addrs
26
+ @local_addrs_lock.synchronize { @local_addrs.keys }
27
+ end
28
+
29
+ # Normally not needed unless you dynamically bring up/down network devices.
30
+ # It may be useful to call this periodically or to integrate with
31
+ # some network device state monitoring system.
32
+ def self.refresh_addrs!
33
+ @local_addrs_lock.synchronize do
34
+ tmp = {}
35
+ Socket.ip_address_list.keep_if do |ip|
36
+ ip.ipv4? && ! ip.ipv4_multicast?
37
+ end.each { |ip| tmp[ip.ip_address.freeze] = true }
38
+ @local_addrs = tmp
39
+ end
40
+ end
41
+ refresh_addrs!
42
+
43
+ # registers a local path for a given +tcp_port+ and +directory_root+
44
+ def self.register(tcp_port, directory_root)
45
+ directory_root = directory_root.gsub(%r{/+\z}, "")
46
+ dev_dirs = Dir.foreach(directory_root).grep(/\Adev\d+\z/)
47
+ raise ArgumentError, 'no /dev\d+/ directories found' if dev_dirs.empty?
48
+ STORE_PATHS[tcp_port][directory_root] = Set.new(dev_dirs)
49
+ end
50
+
51
+ # returns +nil+ if nothing was found
52
+ # returns a path and associated File::Stat to the local FS if a
53
+ # matching path is possible
54
+ def device_path_stat(uri)
55
+ Regurgitator::Local.include?(uri.host) or return
56
+ (roots = STORE_PATHS[uri.port]).empty? and return
57
+ uri.path =~ %r{\A/(dev\d+)/} or
58
+ abort "BUG: path needs to match '\\A/dev\d+/' (#{uri})"
59
+ devN = $1
60
+ rv = nil
61
+ roots.each do |root, dev_dirs|
62
+ dev_dirs.include?(devN) or next
63
+ begin
64
+ path = "#{root}#{uri.path}"
65
+ stat = File.stat(path)
66
+
67
+ # multiple "/devN" paths for the same ports would be ambiguous,
68
+ # so we cannot optimize away the HTTP request for those
69
+ return nil if rv
70
+
71
+ rv = [ path, stat ]
72
+ rescue => e
73
+ warn "E: #{e.message}, root=#{root} failed for #{uri}"
74
+ end
75
+ end
76
+
77
+ rv
78
+ end
79
+
80
+ def trylocal(env, uri_group)
81
+ STORE_PATHS.empty? and return
82
+ uri_group.flatten.each do |uri|
83
+ path_stat = device_path_stat(uri) or next
84
+ path, stat = path_stat
85
+ return Regurgitator::LocalFile.new(env, path, uri, stat)
86
+ end
87
+ nil
88
+ end
89
+ end
@@ -0,0 +1,61 @@
1
+ # -*- encoding: binary -*-
2
+ require "time"
3
+
4
+ # used to serve local files, bypassing HTTP to the backend device
5
+ class Regurgitator::LocalFile
6
+
7
+ # may be called by some Rack web servers to use sendfile(2)
8
+ attr_reader :to_path
9
+
10
+ # used to provide redirects to clients
11
+ attr_reader :uri
12
+
13
+ # Stores the Rack response (a 3-element Array) on success to simulate
14
+ # HTTP_Spew::Request objects
15
+ attr_reader :response
16
+
17
+ def initialize(env, path, uri, stat)
18
+ if modified_since = env["HTTP_IF_MODIFIED_SINCE"]
19
+ modified_since?(modified_since, stat) or return
20
+ end
21
+
22
+ @response = [ 200, {
23
+ "Content-Type" => "application/octet-stream", # always ".fid"
24
+ "Content-Length" => stat.size.to_s,
25
+ "Last-Modified" => stat.mtime.httpdate,
26
+ } ]
27
+ case env["REQUEST_METHOD"]
28
+ when "GET"
29
+ @to_path, @uri = path, uri
30
+ @response << self
31
+ when "HEAD"
32
+ @response << []
33
+ else
34
+ abort "Unexpected REQUEST_METHOD=#{env['REQUEST_METHOD']}"
35
+ end
36
+ end
37
+
38
+ # normal Rack HTTP server endpoint, used if the server can't handle
39
+ # +to_path+
40
+ def each
41
+ buf = ""
42
+ File.open(@to_path) do |fp|
43
+ while fp.read(0x4000, buf)
44
+ yield buf
45
+ end
46
+ end
47
+ end
48
+
49
+ def modified_since?(modified_since, stat)
50
+ begin
51
+ modified_since = Time.httpdate(modified_since)
52
+ rescue ArgumentError
53
+ h = { "Content-Type" => "text/plain", "Content-Length" => "0" }
54
+ @response = [ 400, h, [] ]
55
+ return false
56
+ end
57
+ stat.mtime > modified_since and return true
58
+ @response = [ 304, {}, [] ]
59
+ false
60
+ end
61
+ end
@@ -0,0 +1,33 @@
1
+ # -*- encoding: binary -*-
2
+
3
+ # matches GET and HEAD requests in the format of "/:dkey" from a
4
+ # single, preconfigured domain.
5
+ #
6
+ # If a client were to make a request to "http://example.com/bar",
7
+ # this endpoint would respond with the file with the key "bar"
8
+ # from the preconfigured domain.
9
+ #
10
+ # To use as middleware:
11
+ # require 'regurgitator'
12
+ # db = Sequel.connect('mysql2://user@example.com/mogilefs')
13
+ # use Regurgitator::OneDomain, :domain => 'foo', :db => db
14
+ #
15
+ # See the {one_domain.ru}[link:examples/one_domain.ru]
16
+ # example for a standalone app.
17
+ class Regurgitator::OneDomain
18
+ include Regurgitator::Endpoint
19
+
20
+ def initialize(app, opts) # :nodoc:
21
+ @domain = opts[:domain]
22
+ super(app, opts[:db])
23
+ end
24
+
25
+ def call(env) # :nodoc:
26
+ case env['REQUEST_METHOD']
27
+ when 'GET', 'HEAD'
28
+ serve_file(env, @domain, env['PATH_INFO'][1..-1])
29
+ else
30
+ @app.call(env)
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,55 @@
1
+ # -*- encoding: binary -*-
2
+ # helpers for server_settings lookups
3
+ module Regurgitator::ServerSettings
4
+
5
+ # :stopdoc:
6
+ SETTINGS_LOOKUP = 'SELECT value FROM server_settings WHERE field = ? LIMIT 1'
7
+ # :startdoc:
8
+
9
+ def initialize(*args) # :nodoc:
10
+ super
11
+ init_zone_ivars
12
+ end
13
+
14
+ def init_zone_ivars
15
+ @zone_cache_mtime = Time.at(0)
16
+ @zone_cache = nil
17
+ @zone_cache_lock = Mutex.new
18
+ end
19
+
20
+ def self.extended(obj)
21
+ obj.init_zone_ivars
22
+ end
23
+
24
+ def refresh_zone(force = false) # :nodoc:
25
+ @zone_cache_lock.synchronize { refresh_zone_unlocked(force) }
26
+ end
27
+
28
+ def refresh_zone_unlocked(force)
29
+ return @zone_cache if ! force && ((Time.now - @zone_cache_mtime) < 60)
30
+ tmp = Patricia.new
31
+ begin
32
+ row = @db[SETTINGS_LOOKUP, 'network_zones'].first or return tmp
33
+ row[:value].split(/\s*,\s*/).each do |zone|
34
+ row = @db[SETTINGS_LOOKUP, "zone_#{zone}"].first or next
35
+ begin
36
+ tmp.add(row[:value], zone)
37
+ rescue ArgumentError
38
+ end
39
+ end
40
+ ensure
41
+ @zone_cache_mtime = Time.now
42
+ return @zone_cache = tmp
43
+ end
44
+ end
45
+
46
+ # If +skip_local+ is true, it may return +:local+ if +addr+ is the address
47
+ # of the host this method is running on
48
+ # Returns the zone (a String) for a given +addr+, if there is one
49
+ # Returns +nil+ if zone information is unknown.
50
+ def zone_for(addr, skip_local = false)
51
+ return :local if skip_local && Regurgitator::Local.include?(addr)
52
+ zone = refresh_zone.search_best(addr) and return zone.data # String
53
+ nil
54
+ end
55
+ end
data/pkg.mk ADDED
@@ -0,0 +1,175 @@
1
+ RUBY = ruby
2
+ RAKE = rake
3
+ RSYNC = rsync
4
+ WRONGDOC = wrongdoc
5
+
6
+ GIT-VERSION-FILE: .FORCE-GIT-VERSION-FILE
7
+ @./GIT-VERSION-GEN
8
+ -include GIT-VERSION-FILE
9
+ -include local.mk
10
+ DLEXT := $(shell $(RUBY) -rrbconfig -e 'puts RbConfig::CONFIG["DLEXT"]')
11
+ RUBY_VERSION := $(shell $(RUBY) -e 'puts RUBY_VERSION')
12
+ RUBY_ENGINE := $(shell $(RUBY) -e 'puts((RUBY_ENGINE rescue "ruby"))')
13
+ lib := lib
14
+
15
+ ifeq ($(shell test -f script/isolate_for_tests && echo t),t)
16
+ isolate_libs := tmp/isolate/$(RUBY_ENGINE)-$(RUBY_VERSION)/isolate.mk
17
+ $(isolate_libs): script/isolate_for_tests
18
+ @$(RUBY) script/isolate_for_tests
19
+ -include $(isolate_libs)
20
+ lib := $(lib):$(ISOLATE_LIBS)
21
+ endif
22
+
23
+ ext := $(firstword $(wildcard ext/*))
24
+ ifneq ($(ext),)
25
+ ext_pfx := tmp/ext/$(RUBY_ENGINE)-$(RUBY_VERSION)
26
+ ext_h := $(wildcard $(ext)/*/*.h $(ext)/*.h)
27
+ ext_src := $(wildcard $(ext)/*.c $(ext_h))
28
+ ext_pfx_src := $(addprefix $(ext_pfx)/,$(ext_src))
29
+ ext_d := $(ext_pfx)/$(ext)/.d
30
+ $(ext)/extconf.rb: $(wildcard $(ext)/*.h)
31
+ @>> $@
32
+ $(ext_d):
33
+ @mkdir -p $(@D)
34
+ @> $@
35
+ $(ext_pfx)/$(ext)/%: $(ext)/% $(ext_d)
36
+ install -m 644 $< $@
37
+ $(ext_pfx)/$(ext)/Makefile: $(ext)/extconf.rb $(ext_d) $(ext_h)
38
+ $(RM) -f $(@D)/*.o
39
+ cd $(@D) && $(RUBY) $(CURDIR)/$(ext)/extconf.rb
40
+ ext_sfx := _ext.$(DLEXT)
41
+ ext_dl := $(ext_pfx)/$(ext)/$(notdir $(ext)_ext.$(DLEXT))
42
+ $(ext_dl): $(ext_src) $(ext_pfx_src) $(ext_pfx)/$(ext)/Makefile
43
+ @echo $^ == $@
44
+ $(MAKE) -C $(@D)
45
+ lib := $(lib):$(ext_pfx)/$(ext)
46
+ build: $(ext_dl)
47
+ else
48
+ build:
49
+ endif
50
+
51
+ pkg_extra += GIT-VERSION-FILE NEWS ChangeLog LATEST
52
+ ChangeLog: GIT-VERSION-FILE .wrongdoc.yml
53
+ $(WRONGDOC) prepare
54
+ NEWS LATEST: ChangeLog
55
+
56
+ manifest:
57
+ $(RM) .manifest
58
+ $(MAKE) .manifest
59
+
60
+ .manifest: $(pkg_extra)
61
+ (git ls-files && for i in $@ $(pkg_extra); do echo $$i; done) | \
62
+ LC_ALL=C sort > $@+
63
+ cmp $@+ $@ || mv $@+ $@
64
+ $(RM) $@+
65
+
66
+ doc:: .document .wrongdoc.yml $(pkg_extra)
67
+ -find lib -type f -name '*.rbc' -exec rm -f '{}' ';'
68
+ -find ext -type f -name '*.rbc' -exec rm -f '{}' ';'
69
+ $(RM) -r doc
70
+ $(WRONGDOC) all
71
+ install -m644 COPYING doc/COPYING
72
+ install -m644 $(shell grep '^[A-Z]' .document) doc/
73
+
74
+ ifneq ($(VERSION),)
75
+ pkggem := pkg/$(rfpackage)-$(VERSION).gem
76
+ pkgtgz := pkg/$(rfpackage)-$(VERSION).tgz
77
+ release_notes := release_notes-$(VERSION)
78
+ release_changes := release_changes-$(VERSION)
79
+
80
+ release-notes: $(release_notes)
81
+ release-changes: $(release_changes)
82
+ $(release_changes):
83
+ $(WRONGDOC) release_changes > $@+
84
+ $(VISUAL) $@+ && test -s $@+ && mv $@+ $@
85
+ $(release_notes):
86
+ $(WRONGDOC) release_notes > $@+
87
+ $(VISUAL) $@+ && test -s $@+ && mv $@+ $@
88
+
89
+ # ensures we're actually on the tagged $(VERSION), only used for release
90
+ verify:
91
+ test x"$(shell umask)" = x0022
92
+ git rev-parse --verify refs/tags/v$(VERSION)^{}
93
+ git diff-index --quiet HEAD^0
94
+ test $$(git rev-parse --verify HEAD^0) = \
95
+ $$(git rev-parse --verify refs/tags/v$(VERSION)^{})
96
+
97
+ fix-perms:
98
+ -git ls-tree -r HEAD | awk '/^100644 / {print $$NF}' | xargs chmod 644
99
+ -git ls-tree -r HEAD | awk '/^100755 / {print $$NF}' | xargs chmod 755
100
+
101
+ gem: $(pkggem)
102
+
103
+ install-gem: $(pkggem)
104
+ gem install $(CURDIR)/$<
105
+
106
+ $(pkggem): manifest fix-perms
107
+ gem build $(rfpackage).gemspec
108
+ mkdir -p pkg
109
+ mv $(@F) $@
110
+
111
+ $(pkgtgz): distdir = $(basename $@)
112
+ $(pkgtgz): HEAD = v$(VERSION)
113
+ $(pkgtgz): manifest fix-perms
114
+ @test -n "$(distdir)"
115
+ $(RM) -r $(distdir)
116
+ mkdir -p $(distdir)
117
+ tar cf - $$(cat .manifest) | (cd $(distdir) && tar xf -)
118
+ cd pkg && tar cf - $(basename $(@F)) | gzip -9 > $(@F)+
119
+ mv $@+ $@
120
+
121
+ package: $(pkgtgz) $(pkggem)
122
+
123
+ test-release:: verify package $(release_notes) $(release_changes)
124
+ # make tgz release on RubyForge
125
+ @echo rubyforge add_release -f \
126
+ -n $(release_notes) -a $(release_changes) \
127
+ $(rfproject) $(rfpackage) $(VERSION) $(pkgtgz)
128
+ @echo gem push $(pkggem)
129
+ @echo rubyforge add_file \
130
+ $(rfproject) $(rfpackage) $(VERSION) $(pkggem)
131
+ release:: verify package $(release_notes) $(release_changes)
132
+ # make tgz release on RubyForge
133
+ rubyforge add_release -f -n $(release_notes) -a $(release_changes) \
134
+ $(rfproject) $(rfpackage) $(VERSION) $(pkgtgz)
135
+ # push gem to RubyGems.org
136
+ gem push $(pkggem)
137
+ # in case of gem downloads from RubyForge releases page
138
+ rubyforge add_file \
139
+ $(rfproject) $(rfpackage) $(VERSION) $(pkggem)
140
+ else
141
+ gem install-gem: GIT-VERSION-FILE
142
+ $(MAKE) $@ VERSION=$(GIT_VERSION)
143
+ endif
144
+
145
+ all:: test
146
+ test_units := $(wildcard test/test_*.rb)
147
+ test: test-unit
148
+ test-unit: $(test_units)
149
+ $(test_units): build
150
+ $(RUBY) -I $(lib) $@ $(RUBY_TEST_OPTS)
151
+
152
+ # this requires GNU coreutils variants
153
+ ifneq ($(RSYNC_DEST),)
154
+ publish_doc:
155
+ -git set-file-times
156
+ $(MAKE) doc
157
+ find doc/images -type f | \
158
+ TZ=UTC xargs touch -d '1970-01-01 00:00:06' doc/rdoc.css
159
+ $(MAKE) doc_gz
160
+ $(RSYNC) -av doc/ $(RSYNC_DEST)/
161
+ git ls-files | xargs touch
162
+ endif
163
+
164
+ # Create gzip variants of the same timestamp as the original so nginx
165
+ # "gzip_static on" can serve the gzipped versions directly.
166
+ doc_gz: docs = $(shell find doc -type f ! -regex '^.*\.\(gif\|jpg\|png\|gz\)$$')
167
+ doc_gz:
168
+ for i in $(docs); do \
169
+ gzip --rsyncable -9 < $$i > $$i.gz; touch -r $$i $$i.gz; done
170
+ check-warnings:
171
+ @(for i in $$(git ls-files '*.rb'|grep -v '^setup\.rb$$'); \
172
+ do $(RUBY) -d -W2 -c $$i; done) | grep -v '^Syntax OK$$' || :
173
+
174
+ .PHONY: all .FORCE-GIT-VERSION-FILE doc test $(test_units) manifest
175
+ .PHONY: check-warnings