httpdisk 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/test.yml +26 -0
- data/.gitignore +3 -0
- data/Gemfile +10 -0
- data/Gemfile.lock +69 -0
- data/LICENSE +21 -0
- data/README.md +179 -0
- data/Rakefile +47 -0
- data/bin/httpdisk +41 -0
- data/examples.rb +117 -0
- data/httpdisk.gemspec +28 -0
- data/lib/httpdisk.rb +12 -0
- data/lib/httpdisk/cache.rb +80 -0
- data/lib/httpdisk/cache_key.rb +100 -0
- data/lib/httpdisk/cli.rb +218 -0
- data/lib/httpdisk/cli_slop.rb +54 -0
- data/lib/httpdisk/client.rb +102 -0
- data/lib/httpdisk/error.rb +3 -0
- data/lib/httpdisk/payload.rb +61 -0
- data/lib/httpdisk/version.rb +3 -0
- data/logo.svg +12 -0
- metadata +119 -0
data/httpdisk.gemspec
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
require_relative 'lib/httpdisk/version'
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = 'httpdisk'
|
5
|
+
s.version = HTTPDisk::VERSION
|
6
|
+
s.authors = [ 'Adam Doppelt' ]
|
7
|
+
s.email = 'amd@gurge.com'
|
8
|
+
|
9
|
+
s.summary = 'httpdisk - disk cache for faraday'
|
10
|
+
s.description = 'httpdisk works with faraday to aggressively cache responses on disk.'
|
11
|
+
s.homepage = 'http://github.com/gurgeous/httpdisk'
|
12
|
+
s.license = 'MIT'
|
13
|
+
s.required_ruby_version = '>= 2.7.0'
|
14
|
+
|
15
|
+
# what's in the gem?
|
16
|
+
s.files = Dir.chdir(File.expand_path(__dir__)) do
|
17
|
+
`git ls-files -z`.split("\x0").reject { _1.match(%r{^test/}) }
|
18
|
+
end
|
19
|
+
s.bindir = 'bin'
|
20
|
+
s.executables = s.files.grep(%r{^#{s.bindir}/}) { File.basename(_1) }
|
21
|
+
s.require_paths = [ 'lib' ]
|
22
|
+
|
23
|
+
# gem dependencies
|
24
|
+
s.add_dependency 'faraday', '~> 1.4'
|
25
|
+
s.add_dependency 'faraday-cookie_jar', '~> 0.0'
|
26
|
+
s.add_dependency 'faraday_middleware', '~> 1.0'
|
27
|
+
s.add_dependency 'slop', '~> 4.8'
|
28
|
+
end
|
data/lib/httpdisk.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'httpdisk/cache_key'
|
2
|
+
require 'httpdisk/cache'
|
3
|
+
require 'httpdisk/cli_slop'
|
4
|
+
require 'httpdisk/cli'
|
5
|
+
require 'httpdisk/client'
|
6
|
+
require 'httpdisk/error'
|
7
|
+
require 'httpdisk/payload'
|
8
|
+
require 'httpdisk/version'
|
9
|
+
|
10
|
+
module HTTPDisk
|
11
|
+
ERROR_STATUS = 999
|
12
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
|
3
|
+
module HTTPDisk
|
4
|
+
# Disk cache for cache_keys => response. Files are compressed.
|
5
|
+
class Cache
|
6
|
+
attr_reader :options
|
7
|
+
|
8
|
+
def initialize(options)
|
9
|
+
@options = options
|
10
|
+
|
11
|
+
# heavy sanity checking on arguments here
|
12
|
+
if !dir.is_a?(String)
|
13
|
+
raise ArgumentError, "expected :dir to be a string, not #{dir.inspect}"
|
14
|
+
end
|
15
|
+
if expires_in && !expires_in.is_a?(Integer)
|
16
|
+
raise ArgumentError, "expected :expires_in to be an integer, not #{expires_in.inspect}"
|
17
|
+
end
|
18
|
+
%i[force force_errors].each do
|
19
|
+
value = send(_1)
|
20
|
+
if ![ nil, true, false ].include?(value)
|
21
|
+
raise ArgumentError, "expected #{_1} to be a boolean, not #{value.inspect}"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
%i[dir expires_in force force_errors].each do |method|
|
27
|
+
define_method(method) do
|
28
|
+
options[method]
|
29
|
+
end
|
30
|
+
end
|
31
|
+
alias force? force
|
32
|
+
alias force_errors? force_errors
|
33
|
+
|
34
|
+
# Get cached response. If there is a cached error it will be raised.
|
35
|
+
def read(cache_key)
|
36
|
+
payload_or_status = read0(cache_key)
|
37
|
+
payload_or_status.is_a?(Symbol) ? nil : payload_or_status
|
38
|
+
end
|
39
|
+
|
40
|
+
# Cache status for a cache_key, %i[error force hit miss stale]
|
41
|
+
def status(cache_key)
|
42
|
+
payload_or_status = read0(cache_key)
|
43
|
+
return payload_or_status if payload_or_status.is_a?(Symbol)
|
44
|
+
payload_or_status.error_999? ? :error : :hit
|
45
|
+
end
|
46
|
+
|
47
|
+
# Write response to the disk cache
|
48
|
+
def write(cache_key, payload)
|
49
|
+
path = diskpath(cache_key)
|
50
|
+
FileUtils.mkdir_p(File.dirname(path))
|
51
|
+
Zlib::GzipWriter.open(path) { payload.write(_1) }
|
52
|
+
end
|
53
|
+
|
54
|
+
# Relative path for this cache_key based on the cache key
|
55
|
+
def diskpath(cache_key)
|
56
|
+
File.join(dir, cache_key.diskpath)
|
57
|
+
end
|
58
|
+
|
59
|
+
protected
|
60
|
+
|
61
|
+
# low level read, returns payload or status
|
62
|
+
def read0(cache_key)
|
63
|
+
path = diskpath(cache_key)
|
64
|
+
|
65
|
+
return :miss if !File.exist?(path)
|
66
|
+
return :stale if expired?(path)
|
67
|
+
return :force if force?
|
68
|
+
|
69
|
+
payload = Zlib::GzipReader.open(path) { Payload.read(_1) }
|
70
|
+
return :force if force_errors? && payload.error_999?
|
71
|
+
|
72
|
+
payload
|
73
|
+
end
|
74
|
+
|
75
|
+
# Is this path expired?
|
76
|
+
def expired?(path)
|
77
|
+
expires_in && File.stat(path).mtime < Time.now - expires_in
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
require 'cgi'
|
2
|
+
require 'digest/md5'
|
3
|
+
require 'uri'
|
4
|
+
|
5
|
+
module HTTPDisk
|
6
|
+
class CacheKey
|
7
|
+
attr_reader :env
|
8
|
+
|
9
|
+
def initialize(env)
|
10
|
+
@env = env
|
11
|
+
|
12
|
+
# sanity checks
|
13
|
+
raise 'http/https required' if env.url.scheme !~ /^https?$/
|
14
|
+
raise 'hostname required' if !env.url.host
|
15
|
+
end
|
16
|
+
|
17
|
+
def url
|
18
|
+
env.url
|
19
|
+
end
|
20
|
+
|
21
|
+
# Cache key (memoized)
|
22
|
+
def key
|
23
|
+
@key ||= calculate_key
|
24
|
+
end
|
25
|
+
|
26
|
+
# md5(key) (memoized)
|
27
|
+
def digest
|
28
|
+
@digest ||= Digest::MD5.hexdigest(key)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Relative path for this cache key based on hostdir & digest.
|
32
|
+
def diskpath
|
33
|
+
@diskpath ||= File.join(hostdir, digest[0, 3], digest[3..])
|
34
|
+
end
|
35
|
+
|
36
|
+
def to_s
|
37
|
+
key
|
38
|
+
end
|
39
|
+
|
40
|
+
protected
|
41
|
+
|
42
|
+
# Calculate cache key for url
|
43
|
+
def calculate_key
|
44
|
+
key = []
|
45
|
+
key << env.method.upcase
|
46
|
+
key << ' '
|
47
|
+
key << url.scheme
|
48
|
+
key << '://'
|
49
|
+
key << url.host.downcase
|
50
|
+
if !default_port?
|
51
|
+
key << ':'
|
52
|
+
key << url.port
|
53
|
+
end
|
54
|
+
if url.path != '/'
|
55
|
+
key << url.path
|
56
|
+
end
|
57
|
+
if (q = url.query) && q != ''
|
58
|
+
key << '?'
|
59
|
+
key << querykey(q)
|
60
|
+
end
|
61
|
+
if env.request_body
|
62
|
+
key << ' '
|
63
|
+
key << bodykey
|
64
|
+
end
|
65
|
+
key.join
|
66
|
+
end
|
67
|
+
|
68
|
+
# Calculate cache key segment for body
|
69
|
+
def bodykey
|
70
|
+
body = env.request_body.to_s
|
71
|
+
case
|
72
|
+
when env.request_headers['Content-Type'] == 'application/x-www-form-urlencoded'
|
73
|
+
querykey(body)
|
74
|
+
when body.length < 50
|
75
|
+
body
|
76
|
+
else
|
77
|
+
Digest::MD5.hexdigest(body)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# Calculate canonical key for a query
|
82
|
+
def querykey(q)
|
83
|
+
q.split('&').sort.join('&')
|
84
|
+
end
|
85
|
+
|
86
|
+
def default_port?
|
87
|
+
url.default_port == url.port
|
88
|
+
end
|
89
|
+
|
90
|
+
# Calculate nice directory name from url.host
|
91
|
+
def hostdir
|
92
|
+
hostdir = url.host.downcase
|
93
|
+
hostdir = hostdir.gsub(/^www\./, '')
|
94
|
+
hostdir = hostdir.gsub(/[^a-z0-9._-]/, '')
|
95
|
+
hostdir = hostdir.squeeze('.')
|
96
|
+
hostdir = 'any' if hostdir.empty?
|
97
|
+
hostdir
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
data/lib/httpdisk/cli.rb
ADDED
@@ -0,0 +1,218 @@
|
|
1
|
+
require 'faraday-cookie_jar'
|
2
|
+
require 'faraday_middleware'
|
3
|
+
require 'ostruct'
|
4
|
+
|
5
|
+
module HTTPDisk
|
6
|
+
# Command line httpdisk command.
|
7
|
+
class Cli
|
8
|
+
attr_reader :options
|
9
|
+
|
10
|
+
# for --expires
|
11
|
+
UNITS = {
|
12
|
+
s: 1,
|
13
|
+
m: 60,
|
14
|
+
h: 60 * 60,
|
15
|
+
d: 24 * 60 * 60,
|
16
|
+
w: 7 * 24 * 60 * 60,
|
17
|
+
y: 365 * 7 * 24 * 60 * 60,
|
18
|
+
}.freeze
|
19
|
+
|
20
|
+
def initialize(options)
|
21
|
+
@options = options
|
22
|
+
end
|
23
|
+
|
24
|
+
# we have a very liberal retry policy
|
25
|
+
RETRY_OPTIONS = {
|
26
|
+
methods: %w[delete get head options patch post put trace],
|
27
|
+
retry_statuses: (400..600).to_a,
|
28
|
+
retry_if: ->(_env, _err) { true },
|
29
|
+
}.freeze
|
30
|
+
|
31
|
+
# Make the request (or print status)
|
32
|
+
def run
|
33
|
+
# short circuit --status
|
34
|
+
if options[:status]
|
35
|
+
status
|
36
|
+
return
|
37
|
+
end
|
38
|
+
|
39
|
+
# create Faraday client
|
40
|
+
faraday = create_faraday
|
41
|
+
|
42
|
+
# run request
|
43
|
+
response = faraday.run_request(request_method, request_url, request_body, request_headers)
|
44
|
+
if response.status >= 400
|
45
|
+
raise CliError, "the requested URL returned error: #{response.status} #{response.reason_phrase}"
|
46
|
+
end
|
47
|
+
|
48
|
+
# output
|
49
|
+
if options[:output]
|
50
|
+
File.open(options[:output], 'w') { output(response, _1) }
|
51
|
+
else
|
52
|
+
output(response, $stdout)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def create_faraday
|
57
|
+
Faraday.new do
|
58
|
+
# connection settings
|
59
|
+
_1.proxy = proxy if options[:proxy]
|
60
|
+
_1.options.timeout = options[:max_time] if options[:max_time]
|
61
|
+
|
62
|
+
# cookie middleware
|
63
|
+
_1.use :cookie_jar
|
64
|
+
|
65
|
+
# BEFORE httpdisk so each redirect segment is cached
|
66
|
+
_1.response :follow_redirects
|
67
|
+
|
68
|
+
# httpdisk
|
69
|
+
_1.use :httpdisk, client_options
|
70
|
+
|
71
|
+
# AFTER httpdisk so transient failures are not cached
|
72
|
+
if options[:retry]
|
73
|
+
_1.request :retry, RETRY_OPTIONS.merge(max: options[:retry])
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
# Support for --status
|
79
|
+
def status
|
80
|
+
# build env
|
81
|
+
env = Faraday::Env.new.tap do
|
82
|
+
_1.method = request_method
|
83
|
+
_1.request_body = request_body
|
84
|
+
_1.request_headers = request_headers
|
85
|
+
_1.url = request_url
|
86
|
+
end
|
87
|
+
|
88
|
+
# now print status
|
89
|
+
client = HTTPDisk::Client.new(nil, client_options)
|
90
|
+
client.status(env).each do
|
91
|
+
puts "#{_1}: #{_2.inspect}"
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
# Output response to f
|
96
|
+
def output(response, f)
|
97
|
+
if options[:include]
|
98
|
+
f.puts "HTTPDISK #{response.status} #{response.reason_phrase}"
|
99
|
+
response.headers.each { f.puts("#{_1}: #{_2}") }
|
100
|
+
f.puts
|
101
|
+
end
|
102
|
+
f.write(response.body)
|
103
|
+
end
|
104
|
+
|
105
|
+
#
|
106
|
+
# request_XXX
|
107
|
+
#
|
108
|
+
|
109
|
+
# HTTP method (get, post, etc.)
|
110
|
+
def request_method
|
111
|
+
method = if options[:request]
|
112
|
+
options[:request]
|
113
|
+
elsif options[:data]
|
114
|
+
'post'
|
115
|
+
end
|
116
|
+
method ||= 'get'
|
117
|
+
method = method.downcase.to_sym
|
118
|
+
|
119
|
+
if !Faraday::Connection::METHODS.include?(method)
|
120
|
+
raise CliError, "invalid --request #{method.inspect}"
|
121
|
+
end
|
122
|
+
method
|
123
|
+
end
|
124
|
+
|
125
|
+
# Request url
|
126
|
+
def request_url
|
127
|
+
url = options[:url]
|
128
|
+
# recover from missing http:
|
129
|
+
if url !~ %r{^https?://}i
|
130
|
+
if url =~ %r{^\w+://}
|
131
|
+
raise CliError, 'only http/https supported'
|
132
|
+
end
|
133
|
+
url = "http://#{url}"
|
134
|
+
end
|
135
|
+
URI.parse(url)
|
136
|
+
rescue URI::InvalidURIError
|
137
|
+
raise CliError, "invalid url #{url.inspect}"
|
138
|
+
end
|
139
|
+
|
140
|
+
# Request body
|
141
|
+
def request_body
|
142
|
+
options[:data]
|
143
|
+
end
|
144
|
+
|
145
|
+
# Request headers
|
146
|
+
def request_headers
|
147
|
+
{}.tap do |headers|
|
148
|
+
if options[:user_agent]
|
149
|
+
headers['User-Agent'] = options[:user_agent]
|
150
|
+
end
|
151
|
+
|
152
|
+
options[:header].each do |header|
|
153
|
+
key, value = header.split(': ', 2)
|
154
|
+
if !key || !value || key.empty? || value.empty?
|
155
|
+
raise CliError, "invalid --header #{header.inspect}"
|
156
|
+
end
|
157
|
+
headers[key] = value
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
#
|
163
|
+
# helpers
|
164
|
+
#
|
165
|
+
|
166
|
+
# Options to HTTPDisk::Client
|
167
|
+
def client_options
|
168
|
+
{}.tap do |client_options|
|
169
|
+
client_options[:dir] = options[:dir]
|
170
|
+
if options[:expires]
|
171
|
+
seconds = parse_expires(options[:expires])
|
172
|
+
if !seconds
|
173
|
+
raise CliError, "invalid --expires #{options[:expires].inspect}"
|
174
|
+
end
|
175
|
+
client_options[:expires_in] = seconds
|
176
|
+
end
|
177
|
+
client_options[:force] = options[:force]
|
178
|
+
client_options[:force_errors] = options[:force_errors]
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
# Return validated --proxy flag if present
|
183
|
+
def proxy
|
184
|
+
return if !options[:proxy]
|
185
|
+
|
186
|
+
proxy = parse_proxy(options[:proxy])
|
187
|
+
raise CliError, "--proxy should be host[:port], not #{options[:proxy].inspect}" if !proxy
|
188
|
+
proxy
|
189
|
+
end
|
190
|
+
|
191
|
+
# Parse --expires flag
|
192
|
+
def parse_expires(s)
|
193
|
+
m = s.match(/^(\d+)([smhdwy])?$/)
|
194
|
+
return if !m
|
195
|
+
|
196
|
+
num, unit = m[1].to_i, (m[2] || 's').to_sym
|
197
|
+
return if !UNITS.key?(unit)
|
198
|
+
|
199
|
+
num * UNITS[unit]
|
200
|
+
end
|
201
|
+
|
202
|
+
# Parse --proxy flag
|
203
|
+
def parse_proxy(proxy_flag)
|
204
|
+
host, port = proxy_flag.split(':', 2)
|
205
|
+
return if !host || host.empty?
|
206
|
+
return if port&.empty?
|
207
|
+
|
208
|
+
URI.parse('http://placeholder').tap do
|
209
|
+
begin
|
210
|
+
_1.host = host
|
211
|
+
_1.port = port if port
|
212
|
+
rescue URI::InvalidComponentError
|
213
|
+
return
|
214
|
+
end
|
215
|
+
end.to_s
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# manually load dependencies here since this is loaded standalone by bin
|
2
|
+
require 'httpdisk/error'
|
3
|
+
require 'httpdisk/version'
|
4
|
+
require 'slop'
|
5
|
+
|
6
|
+
module HTTPDisk
|
7
|
+
# Slop parsing. This is broken out so we can run without require 'httpdisk'.
|
8
|
+
module CliSlop
|
9
|
+
def self.slop(args)
|
10
|
+
slop = Slop.parse(args) do |o|
|
11
|
+
o.banner = 'httpdisk [options] [url]'
|
12
|
+
|
13
|
+
# similar to curl
|
14
|
+
o.separator 'Similar to curl:'
|
15
|
+
o.string '-d', '--data', 'HTTP POST data'
|
16
|
+
o.array '-H', '--header', 'pass custom header(s) to server', delimiter: nil
|
17
|
+
o.boolean '-i', '--include', 'include response headers in the output'
|
18
|
+
o.integer '-m', '--max-time', 'maximum time allowed for the transfer'
|
19
|
+
o.string '-o', '--output', 'write to file instead of stdout'
|
20
|
+
o.string '-x', '--proxy', 'use host[:port] as proxy'
|
21
|
+
o.string '-X', '--request', 'HTTP method to use'
|
22
|
+
o.integer '--retry', 'retry request if problems occur'
|
23
|
+
o.boolean '-s', '--silent', "silent mode (don't print errors)"
|
24
|
+
o.string '-A', '--user-agent', 'send User-Agent to server'
|
25
|
+
|
26
|
+
# from httpdisk
|
27
|
+
o.separator 'Specific to httpdisk:'
|
28
|
+
o.string '--dir', 'httpdisk cache directory (defaults to ~/httpdisk)'
|
29
|
+
o.string '--expires', 'when to expire cached requests (ex: 1h, 2d, 3w)'
|
30
|
+
o.boolean '--force', "don't read anything from cache (but still write)"
|
31
|
+
o.boolean '--force-errors', "don't read errors from cache (but still write)"
|
32
|
+
o.boolean '--status', 'show status for a url in the cache'
|
33
|
+
|
34
|
+
# generic
|
35
|
+
o.boolean '--version', 'show version' do
|
36
|
+
puts "httpdisk #{HTTPDisk::VERSION}"
|
37
|
+
exit
|
38
|
+
end
|
39
|
+
o.on '--help', 'show this help' do
|
40
|
+
puts o
|
41
|
+
exit
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
raise Slop::Error, '' if args.empty?
|
46
|
+
raise Slop::Error, 'no URL specified' if slop.args.empty?
|
47
|
+
raise Slop::Error, 'more than one URL specified' if slop.args.length > 1
|
48
|
+
|
49
|
+
slop.to_h.tap do
|
50
|
+
_1[:url] = slop.args.first
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|