mogilefs-client 2.2.0 → 3.0.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. data/.document +11 -0
  2. data/.gemtest +0 -0
  3. data/.gitignore +4 -0
  4. data/.wrongdoc.yml +5 -0
  5. data/GIT-VERSION-GEN +28 -0
  6. data/GNUmakefile +44 -0
  7. data/HACKING +33 -0
  8. data/{History.txt → History} +0 -1
  9. data/{LICENSE.txt → LICENSE} +0 -1
  10. data/Manifest.txt +34 -7
  11. data/README +51 -0
  12. data/Rakefile +11 -11
  13. data/TODO +10 -0
  14. data/bin/mog +109 -68
  15. data/examples/mogstored_rack.rb +189 -0
  16. data/lib/mogilefs.rb +56 -17
  17. data/lib/mogilefs/admin.rb +128 -62
  18. data/lib/mogilefs/backend.rb +205 -95
  19. data/lib/mogilefs/bigfile.rb +54 -70
  20. data/lib/mogilefs/bigfile/filter.rb +58 -0
  21. data/lib/mogilefs/chunker.rb +30 -0
  22. data/lib/mogilefs/client.rb +0 -2
  23. data/lib/mogilefs/copy_stream.rb +30 -0
  24. data/lib/mogilefs/http_file.rb +175 -0
  25. data/lib/mogilefs/http_reader.rb +79 -0
  26. data/lib/mogilefs/mogilefs.rb +242 -148
  27. data/lib/mogilefs/mysql.rb +3 -4
  28. data/lib/mogilefs/paths_size.rb +24 -0
  29. data/lib/mogilefs/pool.rb +0 -1
  30. data/lib/mogilefs/socket.rb +9 -0
  31. data/lib/mogilefs/socket/kgio.rb +55 -0
  32. data/lib/mogilefs/socket/pure_ruby.rb +70 -0
  33. data/lib/mogilefs/socket_common.rb +58 -0
  34. data/lib/mogilefs/util.rb +6 -169
  35. data/test/aggregate.rb +11 -11
  36. data/test/exec.rb +72 -0
  37. data/test/fresh.rb +222 -0
  38. data/test/integration.rb +43 -0
  39. data/test/setup.rb +1 -0
  40. data/test/socket_test.rb +98 -0
  41. data/test/test_admin.rb +14 -37
  42. data/test/test_backend.rb +50 -107
  43. data/test/test_bigfile.rb +2 -2
  44. data/test/test_db_backend.rb +1 -2
  45. data/test/test_fresh.rb +8 -0
  46. data/test/test_http_reader.rb +34 -0
  47. data/test/test_mogilefs.rb +278 -98
  48. data/test/test_mogilefs_integration.rb +174 -0
  49. data/test/test_mogilefs_integration_large_pipe.rb +62 -0
  50. data/test/test_mogilefs_integration_list_keys.rb +40 -0
  51. data/test/test_mogilefs_socket_kgio.rb +11 -0
  52. data/test/test_mogilefs_socket_pure.rb +7 -0
  53. data/test/test_mogstored_rack.rb +89 -0
  54. data/test/test_mogtool_bigfile.rb +116 -0
  55. data/test/test_mysql.rb +1 -2
  56. data/test/test_pool.rb +1 -1
  57. data/test/test_unit_mogstored_rack.rb +72 -0
  58. metadata +76 -54
  59. data/README.txt +0 -80
  60. data/lib/mogilefs/httpfile.rb +0 -157
  61. data/lib/mogilefs/network.rb +0 -107
  62. data/test/test_network.rb +0 -56
  63. data/test/test_util.rb +0 -121
@@ -0,0 +1,58 @@
1
+ # -*- encoding: binary -*-
2
+ require 'zlib'
3
+ require 'digest/md5'
4
+
5
+ # Filter class to wrap IO objects and uncompress DEFLATE'd files
6
+ #
7
+ # This is used for reading "bigfile" objects generated by the
8
+ # (deprecated) mogtool(1)
9
+ class MogileFS::Bigfile::Filter
10
+ GZIP_HEADER = "\x1f\x8b"
11
+ INFLATABLE_TYPES = { "file" => true }
12
+ attr_reader :flushed_bytes
13
+
14
+ def initialize(io, info, opts)
15
+ @io = io
16
+ @info = info
17
+ @md5 = opts[:verify] ? Digest::MD5.new : nil
18
+ @zi = nil
19
+ @flushed_bytes = 0
20
+ end
21
+
22
+ def md5_check!(expect)
23
+ return unless @md5
24
+ current = @md5.hexdigest
25
+ current == expect or
26
+ raise MogileFS::ChecksumMismatchError, "#{current} != #{expect}"
27
+ @md5.reset
28
+ end
29
+
30
+ def flush
31
+ @flushed_bytes = @io.write(@zi.finish) if @zi
32
+ @io.flush
33
+ end
34
+
35
+ def write(buf)
36
+ if nil == @zi
37
+ if @info[:compressed] &&
38
+ INFLATABLE_TYPES.include?(@info[:type]) &&
39
+ buf.bytesize >= 2 &&
40
+ buf[0,2] != GZIP_HEADER
41
+
42
+ @zi = Zlib::Inflate.new
43
+
44
+ # mogtool(1) seems to have a bug that causes it to generate bogus
45
+ # MD5s if zlib deflate is used. Don't trust those MD5s for now...
46
+ @md5 = nil
47
+ else
48
+ @zi = false
49
+ end
50
+ end
51
+ if @zi
52
+ buf = @zi.inflate(buf)
53
+ else
54
+ @md5 << buf
55
+ end
56
+ @io.write(buf)
57
+ end
58
+ end
@@ -0,0 +1,30 @@
1
+ # -*- encoding: binary -*-
2
+ require "digest/md5"
3
+ class MogileFS::Chunker
4
+ CRLF = "\r\n"
5
+ attr_reader :io
6
+
7
+ def initialize(io, md5)
8
+ @io = io
9
+ @md5 = md5 ? Digest::MD5.new : nil
10
+ end
11
+
12
+ def write(buf)
13
+ rv = buf.bytesize
14
+ @io.write("#{rv.to_s(16)}\r\n")
15
+ @io.write(buf)
16
+ @md5.update(buf) if @md5
17
+ @io.write(CRLF)
18
+ rv
19
+ end
20
+
21
+ def flush
22
+ if @md5
23
+ content_md5 = [ @md5.digest ].pack('m').strip
24
+ warn "Content-MD5: #{content_md5}\r\n" if $DEBUG
25
+ @io.write("0\r\nContent-MD5: #{content_md5}\r\n\r\n")
26
+ else
27
+ @io.write("0\r\n\r\n")
28
+ end
29
+ end
30
+ end
@@ -1,11 +1,9 @@
1
1
  # -*- encoding: binary -*-
2
- require 'mogilefs/backend'
3
2
 
4
3
  ##
5
4
  # MogileFS::Client is the MogileFS client base class. Concrete clients like
6
5
  # MogileFS::MogileFS and MogileFS::Admin are implemented atop this one to do
7
6
  # real work.
8
-
9
7
  class MogileFS::Client
10
8
 
11
9
  ##
@@ -0,0 +1,30 @@
1
+ # -*- encoding: binary -*-
2
+
3
+ # internal compatibility class for older Rubies
4
+ module MogileFS::CopyStream # :nodoc:
5
+ @r_args = IO::RDONLY | IO::NOCTTY
6
+ @w_args = [ IO::WRONLY|IO::CREAT|IO::NOCTTY|IO::TRUNC, 0600 ]
7
+ def self.copy_stream(src, dst)
8
+ src_io = src.respond_to?(:to_str) ? File.open(src, @r_args) : src
9
+ dst_io = dst.respond_to?(:to_str) ? File.open(dst, *@w_args) : dst
10
+ buf = ""
11
+ written = 0
12
+ if src_io.respond_to?(:readpartial)
13
+ begin
14
+ src_io.readpartial(0x4000, buf)
15
+ written += dst_io.write(buf)
16
+ rescue EOFError
17
+ break
18
+ end while true
19
+ else
20
+ while src_io.read(0x4000, buf)
21
+ written += dst_io.write(buf)
22
+ end
23
+ end
24
+ dst_io.flush if dst_io.respond_to?(:flush)
25
+ written
26
+ ensure
27
+ src_io.close if src.respond_to?(:to_str)
28
+ dst_io.close if dst.respond_to?(:to_str)
29
+ end
30
+ end
@@ -0,0 +1,175 @@
1
+ # -*- encoding: binary -*-
2
+ # here are internal implementation details, do not use them in your code
3
+ require 'stringio'
4
+ require 'uri'
5
+ require 'mogilefs/chunker'
6
+
7
+ ##
8
+ # HTTPFile wraps up the new file operations for storing files onto an HTTP
9
+ # storage node.
10
+ #
11
+ # You really don't want to create an HTTPFile by hand. Instead you want to
12
+ # create a new file using MogileFS::MogileFS.new_file.
13
+ #
14
+ class MogileFS::HTTPFile < StringIO
15
+ class EmptyResponseError < MogileFS::Error; end
16
+ class BadResponseError < MogileFS::Error; end
17
+ class UnparseableResponseError < MogileFS::Error; end
18
+ class NoStorageNodesError < MogileFS::Error
19
+ def message; 'Unable to open socket to storage node'; end
20
+ end
21
+ class NonRetryableError < MogileFS::Error; end
22
+
23
+ # :stopdoc:
24
+ MD5_TRAILER_NODES = {} # :nodoc: # EXPERIMENTAL
25
+ class << self
26
+ attr_accessor :response_timeout_cb
27
+ end
28
+
29
+ @response_timeout_cb = lambda do |elapsed_time, bytes_uploaded|
30
+ mbytes_uploaded = bytes_uploaded / (1024.0 * 1024.0)
31
+ # assumes worst case is 10M/s on the remote storage disk
32
+ t = mbytes_uploaded * 10 + elapsed_time
33
+ t < 5 ? 5 : t
34
+ end
35
+ # :startdoc:
36
+
37
+ ##
38
+ # The URI this file will be stored to.
39
+
40
+ attr_reader :uri
41
+
42
+ attr_reader :devid
43
+
44
+ ##
45
+ # The big_io name in case we have file > 256M
46
+
47
+ attr_accessor :big_io
48
+
49
+ attr_accessor :streaming_io
50
+
51
+ ##
52
+ # Creates a new HTTPFile with MogileFS-specific data. Use
53
+ # MogileFS::MogileFS#new_file instead of this method.
54
+
55
+ def initialize(dests, content_length)
56
+ super ""
57
+ @streaming_io = @big_io = @uri = @devid = @active = nil
58
+ @dests = dests
59
+ end
60
+
61
+ def request_put(sock, uri, file_size, input = nil)
62
+ host_with_port = "#{uri.host}:#{uri.port}"
63
+ md5 = false
64
+ if MD5_TRAILER_NODES[host_with_port]
65
+ file_size = nil
66
+ md5 = true
67
+ end
68
+
69
+ if file_size
70
+ sock.write("PUT #{uri.request_uri} HTTP/1.0\r\n" \
71
+ "Content-Length: #{file_size}\r\n\r\n")
72
+ input ? MogileFS.io.copy_stream(@active = input, sock) : yield(sock)
73
+ else
74
+ trailers = md5 ? "Trailer: Content-MD5\r\n" : ""
75
+ sock.write("PUT #{uri.request_uri} HTTP/1.1\r\n" \
76
+ "Host: #{host_with_port}\r\n#{trailers}" \
77
+ "Transfer-Encoding: chunked\r\n\r\n")
78
+ tmp = MogileFS::Chunker.new(sock, md5)
79
+ rv = input ? MogileFS.io.copy_stream(@active = input, tmp) : yield(tmp)
80
+ tmp.flush
81
+ rv
82
+ end
83
+ end
84
+
85
+ def put_streaming_io(sock, uri) # unlikely to be used
86
+ file_size = @streaming_io.length
87
+ written = 0
88
+ request_put(sock, uri, file_size) do |wr|
89
+ @streaming_io.call(Proc.new do |data_to_write|
90
+ written += wr.write(data_to_write)
91
+ end)
92
+ end
93
+ file_size ? file_size : written
94
+ end
95
+
96
+ def rewind_or_raise!(uri, err)
97
+ @active.rewind if @active
98
+ rescue => e
99
+ msg = "#{uri} failed with #{err.message} (#{err.class}) and " \
100
+ "retrying is impossible as rewind on " \
101
+ "#{@active.inspect} failed with: #{e.message} (#{e.class})"
102
+ raise NonRetryableError, msg, e.backtrace
103
+ end
104
+
105
+ ##
106
+ # Writes an HTTP PUT request to +sock+ to upload the file and
107
+ # returns file size if the socket finished writing
108
+ def upload(devid, uri) # :nodoc:
109
+ start = Time.now
110
+ sock = MogileFS::Socket.tcp(uri.host, uri.port)
111
+ file_size = length
112
+
113
+ if @streaming_io
114
+ file_size = put_streaming_io(sock, uri)
115
+ elsif @big_io
116
+ if String === @big_io || @big_io.respond_to?(:to_path)
117
+ file = File.open(@big_io)
118
+ stat = file.stat
119
+ file_size = request_put(sock, uri, stat.file? ? stat.size : nil, file)
120
+ else
121
+ size = nil
122
+ if @big_io.respond_to?(:stat)
123
+ stat = @big_io.stat
124
+ size = stat.size if stat.file?
125
+ elsif @big_io.respond_to?(:size)
126
+ size = @big_io.size
127
+ end
128
+ file_size = request_put(sock, uri, size, @big_io)
129
+ end
130
+ else
131
+ rewind
132
+ request_put(sock, uri, file_size, self)
133
+ end
134
+
135
+ tout = self.class.response_timeout_cb.call(Time.now - start, file_size)
136
+
137
+ case line = sock.timed_read(23, "", tout)
138
+ when %r{^HTTP/\d\.\d\s+(2\d\d)\s} # success!
139
+ file_size
140
+ when nil
141
+ raise EmptyResponseError, 'Unable to read response line from server'
142
+ when %r{^HTTP/\d\.\d\s+(\d+)}
143
+ raise BadResponseError, "HTTP response status from upload: #$1"
144
+ else
145
+ raise UnparseableResponseError,
146
+ "Response line not understood: #{line.inspect}"
147
+ end
148
+ rescue => err
149
+ rewind_or_raise!(uri, err)
150
+ raise
151
+ ensure
152
+ file.close if file
153
+ sock.close if sock
154
+ end
155
+
156
+ def commit
157
+ errors = nil
158
+ @dests.each do |devid, path|
159
+ begin
160
+ uri = URI.parse(path)
161
+ bytes_uploaded = upload(devid, uri)
162
+ @devid, @uri = devid, uri
163
+ return bytes_uploaded
164
+ rescue NonRetryableError
165
+ raise
166
+ rescue => e
167
+ errors ||= []
168
+ errors << "#{path} - #{e.message} (#{e.class})"
169
+ end
170
+ end
171
+
172
+ raise NoStorageNodesError,
173
+ "all paths failed with PUT: #{errors.join(', ')}", []
174
+ end
175
+ end
@@ -0,0 +1,79 @@
1
+ # -*- encoding: binary -*-
2
+ # internal implementation details here, do not rely on them in your code
3
+
4
+ # This class is needed because Net::HTTP streaming is still inefficient
5
+ # for reading huge response bodies over fast LANs.
6
+ class MogileFS::HTTPReader < MogileFS::Socket
7
+ attr_accessor :content_length, :uri
8
+
9
+ # backwards compat, if anybody cares
10
+ alias mogilefs_size content_length # :nodoc:
11
+
12
+ # this may OOM your system on large files
13
+ def to_s
14
+ buf = ""
15
+ read(@content_length, buf)
16
+ return buf if buf.size == @content_length
17
+
18
+ raise MogileFS::SizeMismatchError,
19
+ "read=#{buf.size} bytes, expected=#@content_length from #@uri", []
20
+ end
21
+
22
+ def stream_to(dest)
23
+ rv = MogileFS.io.copy_stream(self, dest)
24
+ return rv if rv == @content_length
25
+ raise MogileFS::SizeMismatchError,
26
+ "read=#{rv} bytes, expected=#@content_length from #@uri", []
27
+ end
28
+
29
+ def self.first(paths, timeout, count = nil, offset = nil)
30
+ errors = nil
31
+ if offset || count
32
+ offset ||= 0
33
+ range_end = count ? offset + count - 1 : ""
34
+ range = "Range: bytes=#{offset}-#{range_end}\r\n"
35
+ end
36
+
37
+ paths.each do |path|
38
+ begin
39
+ sock = try(path, timeout, range) and return sock
40
+ rescue => e
41
+ errors ||= []
42
+ errors << "#{path} - #{e.message} (#{e.class})"
43
+ end
44
+ end
45
+ raise MogileFS::Error,
46
+ "all paths failed with GET: #{errors.join(', ')}", []
47
+ end
48
+
49
+ # given a path, this returns a readable socket with ready data from the
50
+ # body of the response.
51
+ def self.try(path, timeout, range) # :nodoc:
52
+ uri = URI.parse(path)
53
+ sock = tcp(uri.host, uri.port, timeout)
54
+ buf = "GET #{uri.request_uri} HTTP/1.0\r\n#{range}\r\n" # no chunking
55
+ sock.timed_write(buf, timeout)
56
+
57
+ sock.timed_peek(2048, buf, timeout) or
58
+ raise MogileFS::InvalidResponseError, "EOF while reading header", []
59
+
60
+ head, _ = buf.split(/\r\n\r\n/, 2)
61
+
62
+ # we're dealing with a seriously slow/stupid HTTP server if we can't
63
+ # get the header in a single recv(2) syscall.
64
+ if ((range && head =~ %r{\AHTTP/\d+\.\d+\s+206\s*}) ||
65
+ (!range && head =~ %r{\AHTTP/\d+\.\d+\s+200\s*})) &&
66
+ head =~ %r{^Content-Length:\s*(\d+)}i
67
+ sock.content_length = $1.to_i
68
+ sock.uri = uri
69
+ sock.timed_read(head.bytesize + 4, buf, 0)
70
+ return sock
71
+ end
72
+ msg = range ? "Expected 206 w/#{range.strip}: " : "header="
73
+ msg << head.inspect
74
+ raise MogileFS::InvalidResponseError, msg, []
75
+ rescue
76
+ sock.close if sock
77
+ raise
78
+ end
79
+ end
@@ -1,29 +1,44 @@
1
1
  # -*- encoding: binary -*-
2
- require 'mogilefs/client'
3
- require 'mogilefs/util'
4
-
5
- ##
6
- # MogileFS File manipulation client.
7
2
 
3
+ # \MogileFS file manipulation client.
4
+ #
5
+ # Create a new instance that will communicate with these trackers:
6
+ # hosts = %w[192.168.1.69:6001 192.168.1.70:6001]
7
+ # mg = MogileFS::MogileFS.new(:domain => 'test', :hosts => hosts)
8
+ #
9
+ # # Stores "A bunch of text to store" into 'some_key' with a class of 'text'.
10
+ # mg.store_content('some_key', 'text', "A bunch of text to store")
11
+ #
12
+ # # Retrieve data from 'some_key' as a string
13
+ # data = mg.get_file_data('some_key')
14
+ #
15
+ # # Store the contents of 'image.jpeg' into the key 'my_image' with a
16
+ # # class of 'image'.
17
+ # mg.store_file('my_image', 'image', 'image.jpeg')
18
+ #
19
+ # # Store the contents of 'image.jpeg' into the key 'my_image' with a
20
+ # # class of 'image' using an open IO object.
21
+ # File.open('image.jpeg') { |fp| mg.store_file('my_image', 'image', fp) }
22
+ #
23
+ # # Retrieve the contents of 'my_image' into '/path/to/huge_file'
24
+ # # without slurping the entire contents into memory:
25
+ # mg.get_file_data('my_image', '/path/to/huge_file')
26
+ #
27
+ # # Remove the key 'my_image' and 'some_key'.
28
+ # mg.delete('my_image')
29
+ # mg.delete('some_key')
30
+ #
8
31
  class MogileFS::MogileFS < MogileFS::Client
9
-
10
- include MogileFS::Util
11
32
  include MogileFS::Bigfile
12
33
 
13
- ##
14
34
  # The domain of keys for this MogileFS client.
35
+ attr_accessor :domain
15
36
 
16
- attr_reader :domain
17
-
18
- ##
19
37
  # The timeout for get_file_data. Defaults to five seconds.
20
-
21
38
  attr_accessor :get_file_data_timeout
22
39
 
23
- ##
24
40
  # Creates a new MogileFS::MogileFS instance. +args+ must include a key
25
41
  # :domain specifying the domain of this client.
26
-
27
42
  def initialize(args = {})
28
43
  @domain = args[:domain]
29
44
 
@@ -38,70 +53,86 @@ class MogileFS::MogileFS < MogileFS::Client
38
53
  end
39
54
  end
40
55
 
41
- ##
42
- # Enumerates keys starting with +key+.
43
-
44
- def each_key(prefix)
56
+ # Enumerates keys, limited by optional +prefix+
57
+ def each_key(prefix = "", &block)
45
58
  after = nil
46
-
47
- keys, after = list_keys prefix
48
-
49
- until keys.nil? or keys.empty? do
50
- keys.each { |k| yield k }
51
- keys, after = list_keys prefix, after
52
- end
53
-
59
+ begin
60
+ keys, after = list_keys(prefix, after, 1000, &block)
61
+ end while keys && keys[0]
54
62
  nil
55
63
  end
56
64
 
57
- ##
58
- # Retrieves the contents of +key+.
59
-
60
- def get_file_data(key, &block)
61
- paths = get_paths(key) or return nil
62
- paths.each do |path|
63
- begin
64
- sock = http_read_sock(URI.parse(path))
65
- begin
66
- return yield(sock) if block_given?
67
- return sysread_full(sock, sock.mogilefs_size, @get_file_data_timeout)
68
- ensure
69
- sock.close rescue nil
70
- end
71
- rescue MogileFS::Timeout, MogileFS::InvalidResponseError,
72
- Errno::ECONNREFUSED, EOFError, SystemCallError
73
- end
65
+ # Retrieves the contents of +key+. If +dst+ is specified, +dst+
66
+ # should be an IO-like object capable of receiving the +write+ method
67
+ # or a path name. +copy_length+ may be specified to limit the number of
68
+ # bytes to retrieve, and +src_offset+ can be specified to specified the
69
+ # start position of the copy.
70
+ def get_file_data(key, dst = nil, copy_length = nil, src_offset = nil)
71
+ paths = get_paths(key)
72
+ sock = MogileFS::HTTPReader.first(paths, @get_file_data_timeout,
73
+ copy_length, src_offset)
74
+ if dst
75
+ sock.stream_to(dst)
76
+ elsif block_given?
77
+ yield(sock)
78
+ else
79
+ sock.to_s
74
80
  end
75
- nil
81
+ ensure
82
+ sock.close if sock && ! sock.closed?
76
83
  end
77
84
 
78
- ##
79
- # Get the paths for +key+.
85
+ # Get the paths (URLs as strings) for +key+. If +args+ is specified,
86
+ # it may contain:
87
+ # - :noverify -> boolean, whether or not the tracker checks (default: true)
88
+ # - :pathcount -> a positive integer of URLs to retrieve (default: 2)
89
+ # - :zone -> "alt" or nil (default: nil)
90
+ #
91
+ # :noverify defaults to false because this client library is capable of
92
+ # verifying paths for readability itself. It is also faster and more
93
+ # reliable to verify paths on the client.
94
+ def get_paths(key, *args)
95
+ opts = {
96
+ :domain => @domain,
97
+ :key => key,
98
+ :noverify => args[0],
99
+ :zone => args[1],
100
+ }
101
+ if Hash === args[0]
102
+ args = args[0]
103
+ opts[:noverify] = args[:noverify]
104
+ opts[:zone] = args[:zone]
105
+ pathcount = args[:pathcount] and opts[:pathcount] = pathcount.to_i
106
+ end
80
107
 
81
- def get_paths(key, noverify = true, zone = nil)
82
- opts = { :domain => @domain, :key => key,
83
- :noverify => noverify ? 1 : 0, :zone => zone }
108
+ opts[:noverify] = false == opts[:noverify] ? 0 : 1
84
109
  @backend.respond_to?(:_get_paths) and return @backend._get_paths(opts)
85
110
  res = @backend.get_paths(opts)
86
- (1..res['paths'].to_i).map { |i| res["path#{i}"] }.compact
111
+ (1..res['paths'].to_i).map { |i| res["path#{i}"] }
87
112
  end
88
113
 
89
- ##
90
- # Get the URIs for +key+.
114
+ # Returns +true+ if +key+ exists, +false+ if not
115
+ def exist?(key)
116
+ rv = nil
117
+ args = { :key => key, :domain => @domain }
118
+ @backend.pipeline_dispatch(:get_paths, args) { |x| rv = (Hash === x) }
119
+ @backend.pipeline_wait(1)
120
+ rv
121
+ end
91
122
 
92
- def get_uris(key, noverify = true, zone = nil)
93
- get_paths(key, noverify, zone).map { |path| URI.parse(path) }
123
+ # Get the URIs for +key+ (paths) as URI::HTTP objects
124
+ def get_uris(key, *args)
125
+ get_paths(key, *args).map! { |path| URI.parse(path) }
94
126
  end
95
127
 
96
- ##
97
128
  # Creates a new file +key+ in +klass+. +bytes+ is currently unused.
98
- #
99
- # The +block+ operates like File.open.
100
-
101
- def new_file(key, klass = nil, bytes = 0, &block) # :yields: file
129
+ # Consider using store_file instead of this method for large files.
130
+ # This requires a block passed to it and operates like File.open.
131
+ # This atomically replaces existing data stored as +key+ when
132
+ def new_file(key, klass = nil, bytes = 0) # :yields: file
102
133
  raise MogileFS::ReadOnlyError if readonly?
103
134
  opts = { :domain => @domain, :key => key, :multi_dest => 1 }
104
- opts[:class] = klass if klass
135
+ opts[:class] = klass if klass && klass != "default"
105
136
  res = @backend.create_open(opts)
106
137
 
107
138
  dests = if dev_count = res['dev_count'] # multi_dest succeeded
@@ -119,43 +150,38 @@ class MogileFS::MogileFS < MogileFS::Client
119
150
  end
120
151
 
121
152
  case (dests[0][1] rescue nil)
122
- when nil, '' then
123
- raise MogileFS::EmptyPathError
124
153
  when /^http:\/\// then
125
- MogileFS::HTTPFile.open(self, res['fid'], klass, key,
126
- dests, bytes, &block)
154
+ http_file = MogileFS::HTTPFile.new(dests, bytes)
155
+ yield http_file
156
+ rv = http_file.commit
157
+ @backend.create_close(:fid => res['fid'],
158
+ :devid => http_file.devid,
159
+ :domain => @domain,
160
+ :key => key,
161
+ :path => http_file.uri.to_s,
162
+ :size => rv)
163
+ rv
164
+ when nil, '' then
165
+ raise MogileFS::EmptyPathError,
166
+ "Empty path for mogile upload res=#{res.inspect}"
127
167
  else
128
168
  raise MogileFS::UnsupportedPathError,
129
169
  "paths '#{dests.inspect}' returned by backend is not supported"
130
170
  end
131
171
  end
132
172
 
133
- ##
134
173
  # Copies the contents of +file+ into +key+ in class +klass+. +file+ can be
135
- # either a file name or an object that responds to #sysread.
136
- # Returns size of +file+ stored
137
-
174
+ # either a path name (String or Pathname object) or an IO-like object that
175
+ # responds to #read or #readpartial. Returns size of +file+ stored.
176
+ # This atomically replaces existing data stored as +key+
138
177
  def store_file(key, klass, file)
139
178
  raise MogileFS::ReadOnlyError if readonly?
140
179
 
141
- new_file key, klass do |mfp|
142
- if file.respond_to? :sysread then
143
- sysrwloop(file, mfp)
144
- else
145
- size = File.size(file)
146
- if size > 0x10000 # Bigass file, handle differently
147
- mfp.big_io = file
148
- size
149
- else
150
- File.open(file, "rb") { |fp| sysrwloop(fp, mfp) }
151
- end
152
- end
153
- end
180
+ new_file(key, klass) { |mfp| mfp.big_io = file }
154
181
  end
155
182
 
156
- ##
157
- # Stores +content+ into +key+ in class +klass+.
158
-
183
+ # Stores +content+ into +key+ in class +klass+, where +content+ is a String
184
+ # This atomically replaces existing data stored as +key+
159
185
  def store_content(key, klass, content)
160
186
  raise MogileFS::ReadOnlyError if readonly?
161
187
 
@@ -166,29 +192,22 @@ class MogileFS::MogileFS < MogileFS::Client
166
192
  mfp << content
167
193
  end
168
194
  end
169
-
170
- content.length
171
195
  end
172
196
 
173
- ##
174
197
  # Removes +key+.
175
-
176
198
  def delete(key)
177
199
  raise MogileFS::ReadOnlyError if readonly?
178
200
 
179
201
  @backend.delete :domain => @domain, :key => key
202
+ true
180
203
  end
181
204
 
182
- ##
183
- # Sleeps +duration+.
184
-
185
- def sleep(duration)
205
+ # Sleeps +duration+, only used for testing
206
+ def sleep(duration) # :nodoc:
186
207
  @backend.sleep :duration => duration
187
208
  end
188
209
 
189
- ##
190
210
  # Renames a key +from+ to key +to+.
191
-
192
211
  def rename(from, to)
193
212
  raise MogileFS::ReadOnlyError if readonly?
194
213
 
@@ -196,80 +215,155 @@ class MogileFS::MogileFS < MogileFS::Client
196
215
  nil
197
216
  end
198
217
 
199
- ##
200
218
  # Returns the size of +key+.
201
219
  def size(key)
202
220
  @backend.respond_to?(:_size) and return @backend._size(domain, key)
203
- paths = get_paths(key) or return nil
204
- paths_size(paths)
221
+ begin
222
+ file_info(key)["length"].to_i
223
+ rescue MogileFS::Backend::UnknownCommandError
224
+ paths_size(get_paths(key))
225
+ end
205
226
  end
206
227
 
207
- def paths_size(paths)
208
- paths.each do |path|
209
- begin
210
- return http_read_sock(URI.parse(path), "HEAD").mogilefs_size
211
- rescue MogileFS::InvalidResponseError, MogileFS::Timeout,
212
- Errno::ECONNREFUSED, EOFError, SystemCallError => err
213
- next
214
- end
215
- end
216
- nil
228
+ def paths_size(paths) # :nodoc:
229
+ require "mogilefs/paths_size"
230
+ MogileFS::PathsSize.call(paths)
217
231
  end
218
232
 
219
- ##
220
- # Lists keys starting with +prefix+ follwing +after+ up to +limit+. If
233
+ # Lists keys starting with +prefix+ following +after+ up to +limit+. If
221
234
  # +after+ is nil the list starts at the beginning.
222
-
223
- def list_keys(prefix, after = nil, limit = 1000, &block)
224
- if @backend.respond_to?(:_list_keys)
235
+ def list_keys(prefix = "", after = nil, limit = 1000, &block)
236
+ @backend.respond_to?(:_list_keys) and
225
237
  return @backend._list_keys(domain, prefix, after, limit, &block)
226
- end
227
238
 
228
- res = begin
229
- @backend.list_keys(:domain => domain, :prefix => prefix,
230
- :after => after, :limit => limit)
239
+ begin
240
+ res = @backend.list_keys(:domain => domain, :prefix => prefix,
241
+ :after => after, :limit => limit)
231
242
  rescue MogileFS::Backend::NoneMatchError
232
- return nil
243
+ return
233
244
  end
234
245
 
235
246
  keys = (1..res['key_count'].to_i).map { |i| res["key_#{i}"] }
236
- if block_given?
237
- # emulate the MogileFS::Mysql interface, slowly...
238
- keys.each do |key|
239
- paths = get_paths(key) or next
240
- length = paths_size(paths) or next
241
- yield key, length, paths.size
247
+ if block
248
+ if 1 == block.arity
249
+ keys.each { |key| block.call(key) }
250
+ else
251
+ list_keys_verbose(keys, block)
242
252
  end
243
253
  end
244
254
 
245
255
  [ keys, res['next_after'] ]
246
256
  end
247
257
 
248
- protected
249
-
250
- # given a URI, this returns a readable socket with ready data from the
251
- # body of the response.
252
- def http_read_sock(uri, http_method = "GET")
253
- sock = Socket.mogilefs_new_request(uri.host, uri.port,
254
- "#{http_method} #{uri.request_uri} HTTP/1.0\r\n\r\n",
255
- @get_file_data_timeout)
256
- buf = sock.recv_nonblock(4096, Socket::MSG_PEEK)
257
- head, body = buf.split(/\r\n\r\n/, 2)
258
-
259
- # we're dealing with a seriously slow/stupid HTTP server if we can't
260
- # get the header in a single read(2) syscall.
261
- if head =~ %r{\AHTTP/\d+\.\d+\s+200\s*} &&
262
- head =~ %r{^Content-Length:\s*(\d+)}i
263
- sock.mogilefs_size = $1.to_i
264
- case http_method
265
- when "HEAD" then sock.close
266
- when "GET" then sock.recv(head.size + 4, 0)
267
- end
268
- return sock
258
+ def list_keys_verbose(keys, block) # :nodoc:
259
+ # emulate the MogileFS::Mysql interface, slowly...
260
+ ordered = keys.dup
261
+ ready = {}
262
+ on_file_info = lambda do |info|
263
+ Hash === info or raise info
264
+ file_info_cleanup(info)
265
+
266
+ # deal with trackers with multiple queryworkers responding out-of-order
267
+ ready[info["key"]] = info
268
+ while info = ready.delete(ordered[0])
269
+ block.call(ordered.shift, info["length"], info["devcount"])
270
+ end
271
+ end
272
+ opts = { :domain => @domain }
273
+ begin
274
+ keys.each do |key|
275
+ opts[:key] = key
276
+ @backend.pipeline_dispatch(:file_info, opts, &on_file_info)
269
277
  end
270
- sock.close rescue nil
271
- raise MogileFS::InvalidResponseError,
272
- "#{http_method} on #{uri} returned: #{head.inspect}"
273
- end # def http_read_sock
278
+ @backend.pipeline_wait
279
+ rescue MogileFS::Backend::UnknownCommandError # MogileFS < 2.45
280
+ @backend.shutdown # reset the socket
281
+ args = { :pathcount => 0x7fffffff }
282
+ keys.each do |key|
283
+ paths = get_paths(key, args)
284
+ block.call(key, paths_size(paths), paths.size)
285
+ end
286
+ rescue MogileFS::PipelineError, SystemCallError,
287
+ MogileFS::RequestTruncatedError,
288
+ MogileFS::UnreadableSocketError,
289
+ MogileFS::InvalidResponseError, # truncated response
290
+ MogileFS::Timeout
291
+ @backend.shutdown
292
+ keys = ordered - ready.keys
293
+ retry
294
+ rescue
295
+ @backend.shutdown
296
+ raise
297
+ end
298
+ end
299
+
300
+ # Return metadata about a file as a hash.
301
+ # Returns the domain, class, length, devcount, etc. as keys.
302
+ # Optionally, device ids (not paths) can be returned as
303
+ # well if :devices is specified and +true+.
304
+ #
305
+ # This should only be used for informational purposes, and not usually
306
+ # for dynamically serving files.
307
+ #
308
+ # mg.file_info("bar")
309
+ #
310
+ # Returns:
311
+ #
312
+ # {
313
+ # "domain" => "foo",
314
+ # "key" => "bar",
315
+ # "class" => "default",
316
+ # "devcount" => 2,
317
+ # "length => 666
318
+ # }
319
+ def file_info(key, args = nil)
320
+ opts = { :domain => @domain, :key => key }
321
+ args and devices = args[:devices] and opts[:devices] = devices ? 1 : 0
322
+ file_info_cleanup(@backend.file_info(opts))
323
+ end
324
+
325
+ def file_info_cleanup(rv) # :nodoc:
326
+ %w(fid length devcount).each { |f| rv[f] = rv[f].to_i }
327
+ devids = rv["devids"] and
328
+ rv["devids"] = devids.split(/,/).map! { |x| x.to_i }
329
+ rv
330
+ end
274
331
 
332
+ # Given an Integer +fid+ or String +key+ and domain, thorougly search
333
+ # the database for all occurences of a particular fid.
334
+ #
335
+ # Use this sparingly, this command hits the master database numerous
336
+ # times and is very expensive. This is not for production use, only
337
+ # troubleshooting and debugging.
338
+ #
339
+ # Searches for fid=666:
340
+ #
341
+ # client.file_debug(666)
342
+ #
343
+ # Search for key=foo using the default domain for this object:
344
+ #
345
+ # client.file_debug("foo")
346
+ #
347
+ # Search for key=foo in domain="bar":
348
+ #
349
+ # client.file_debug(:key => "foo", :domain => "bar")
350
+ #
351
+ def file_debug(args)
352
+ case args
353
+ when Integer then args = { "fid" => args }
354
+ when String then args = { "key" => args }
355
+ end
356
+ opts = { :domain => args[:domain] || @domain }.merge!(args)
357
+
358
+ rv = @backend.file_debug(opts)
359
+ rv.each do |k,v|
360
+ case k
361
+ when /_(?:classid|devcount|dmid|fid|length|
362
+ nexttry|fromdevid|failcount|flags|devid|type)\z/x
363
+ rv[k] = v.to_i
364
+ when /devids\z/
365
+ rv[k] = v.split(/,/).map! { |x| x.to_i }
366
+ end
367
+ end
368
+ end
275
369
  end