httpdisk 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.github/workflows/test.yml +26 -0
- data/.gitignore +3 -0
- data/Gemfile +10 -0
- data/Gemfile.lock +69 -0
- data/LICENSE +21 -0
- data/README.md +179 -0
- data/Rakefile +47 -0
- data/bin/httpdisk +41 -0
- data/examples.rb +117 -0
- data/httpdisk.gemspec +28 -0
- data/lib/httpdisk.rb +12 -0
- data/lib/httpdisk/cache.rb +80 -0
- data/lib/httpdisk/cache_key.rb +100 -0
- data/lib/httpdisk/cli.rb +218 -0
- data/lib/httpdisk/cli_slop.rb +54 -0
- data/lib/httpdisk/client.rb +102 -0
- data/lib/httpdisk/error.rb +3 -0
- data/lib/httpdisk/payload.rb +61 -0
- data/lib/httpdisk/version.rb +3 -0
- data/logo.svg +12 -0
- metadata +119 -0
data/httpdisk.gemspec
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
require_relative 'lib/httpdisk/version'
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = 'httpdisk'
|
5
|
+
s.version = HTTPDisk::VERSION
|
6
|
+
s.authors = [ 'Adam Doppelt' ]
|
7
|
+
s.email = 'amd@gurge.com'
|
8
|
+
|
9
|
+
s.summary = 'httpdisk - disk cache for faraday'
|
10
|
+
s.description = 'httpdisk works with faraday to aggressively cache responses on disk.'
|
11
|
+
s.homepage = 'http://github.com/gurgeous/httpdisk'
|
12
|
+
s.license = 'MIT'
|
13
|
+
s.required_ruby_version = '>= 2.7.0'
|
14
|
+
|
15
|
+
# what's in the gem?
|
16
|
+
s.files = Dir.chdir(File.expand_path(__dir__)) do
|
17
|
+
`git ls-files -z`.split("\x0").reject { _1.match(%r{^test/}) }
|
18
|
+
end
|
19
|
+
s.bindir = 'bin'
|
20
|
+
s.executables = s.files.grep(%r{^#{s.bindir}/}) { File.basename(_1) }
|
21
|
+
s.require_paths = [ 'lib' ]
|
22
|
+
|
23
|
+
# gem dependencies
|
24
|
+
s.add_dependency 'faraday', '~> 1.4'
|
25
|
+
s.add_dependency 'faraday-cookie_jar', '~> 0.0'
|
26
|
+
s.add_dependency 'faraday_middleware', '~> 1.0'
|
27
|
+
s.add_dependency 'slop', '~> 4.8'
|
28
|
+
end
|
data/lib/httpdisk.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'httpdisk/cache_key'
|
2
|
+
require 'httpdisk/cache'
|
3
|
+
require 'httpdisk/cli_slop'
|
4
|
+
require 'httpdisk/cli'
|
5
|
+
require 'httpdisk/client'
|
6
|
+
require 'httpdisk/error'
|
7
|
+
require 'httpdisk/payload'
|
8
|
+
require 'httpdisk/version'
|
9
|
+
|
10
|
+
module HTTPDisk
|
11
|
+
ERROR_STATUS = 999
|
12
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
|
3
|
+
module HTTPDisk
|
4
|
+
# Disk cache for cache_keys => response. Files are compressed.
|
5
|
+
class Cache
|
6
|
+
attr_reader :options
|
7
|
+
|
8
|
+
def initialize(options)
|
9
|
+
@options = options
|
10
|
+
|
11
|
+
# heavy sanity checking on arguments here
|
12
|
+
if !dir.is_a?(String)
|
13
|
+
raise ArgumentError, "expected :dir to be a string, not #{dir.inspect}"
|
14
|
+
end
|
15
|
+
if expires_in && !expires_in.is_a?(Integer)
|
16
|
+
raise ArgumentError, "expected :expires_in to be an integer, not #{expires_in.inspect}"
|
17
|
+
end
|
18
|
+
%i[force force_errors].each do
|
19
|
+
value = send(_1)
|
20
|
+
if ![ nil, true, false ].include?(value)
|
21
|
+
raise ArgumentError, "expected #{_1} to be a boolean, not #{value.inspect}"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
%i[dir expires_in force force_errors].each do |method|
|
27
|
+
define_method(method) do
|
28
|
+
options[method]
|
29
|
+
end
|
30
|
+
end
|
31
|
+
alias force? force
|
32
|
+
alias force_errors? force_errors
|
33
|
+
|
34
|
+
# Get cached response. If there is a cached error it will be raised.
|
35
|
+
def read(cache_key)
|
36
|
+
payload_or_status = read0(cache_key)
|
37
|
+
payload_or_status.is_a?(Symbol) ? nil : payload_or_status
|
38
|
+
end
|
39
|
+
|
40
|
+
# Cache status for a cache_key, %i[error force hit miss stale]
|
41
|
+
def status(cache_key)
|
42
|
+
payload_or_status = read0(cache_key)
|
43
|
+
return payload_or_status if payload_or_status.is_a?(Symbol)
|
44
|
+
payload_or_status.error_999? ? :error : :hit
|
45
|
+
end
|
46
|
+
|
47
|
+
# Write response to the disk cache
|
48
|
+
def write(cache_key, payload)
|
49
|
+
path = diskpath(cache_key)
|
50
|
+
FileUtils.mkdir_p(File.dirname(path))
|
51
|
+
Zlib::GzipWriter.open(path) { payload.write(_1) }
|
52
|
+
end
|
53
|
+
|
54
|
+
# Relative path for this cache_key based on the cache key
|
55
|
+
def diskpath(cache_key)
|
56
|
+
File.join(dir, cache_key.diskpath)
|
57
|
+
end
|
58
|
+
|
59
|
+
protected
|
60
|
+
|
61
|
+
# low level read, returns payload or status
|
62
|
+
def read0(cache_key)
|
63
|
+
path = diskpath(cache_key)
|
64
|
+
|
65
|
+
return :miss if !File.exist?(path)
|
66
|
+
return :stale if expired?(path)
|
67
|
+
return :force if force?
|
68
|
+
|
69
|
+
payload = Zlib::GzipReader.open(path) { Payload.read(_1) }
|
70
|
+
return :force if force_errors? && payload.error_999?
|
71
|
+
|
72
|
+
payload
|
73
|
+
end
|
74
|
+
|
75
|
+
# Is this path expired?
|
76
|
+
def expired?(path)
|
77
|
+
expires_in && File.stat(path).mtime < Time.now - expires_in
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
require 'cgi'
|
2
|
+
require 'digest/md5'
|
3
|
+
require 'uri'
|
4
|
+
|
5
|
+
module HTTPDisk
|
6
|
+
class CacheKey
|
7
|
+
attr_reader :env
|
8
|
+
|
9
|
+
def initialize(env)
|
10
|
+
@env = env
|
11
|
+
|
12
|
+
# sanity checks
|
13
|
+
raise 'http/https required' if env.url.scheme !~ /^https?$/
|
14
|
+
raise 'hostname required' if !env.url.host
|
15
|
+
end
|
16
|
+
|
17
|
+
def url
|
18
|
+
env.url
|
19
|
+
end
|
20
|
+
|
21
|
+
# Cache key (memoized)
|
22
|
+
def key
|
23
|
+
@key ||= calculate_key
|
24
|
+
end
|
25
|
+
|
26
|
+
# md5(key) (memoized)
|
27
|
+
def digest
|
28
|
+
@digest ||= Digest::MD5.hexdigest(key)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Relative path for this cache key based on hostdir & digest.
|
32
|
+
def diskpath
|
33
|
+
@diskpath ||= File.join(hostdir, digest[0, 3], digest[3..])
|
34
|
+
end
|
35
|
+
|
36
|
+
def to_s
|
37
|
+
key
|
38
|
+
end
|
39
|
+
|
40
|
+
protected
|
41
|
+
|
42
|
+
# Calculate cache key for url
|
43
|
+
def calculate_key
|
44
|
+
key = []
|
45
|
+
key << env.method.upcase
|
46
|
+
key << ' '
|
47
|
+
key << url.scheme
|
48
|
+
key << '://'
|
49
|
+
key << url.host.downcase
|
50
|
+
if !default_port?
|
51
|
+
key << ':'
|
52
|
+
key << url.port
|
53
|
+
end
|
54
|
+
if url.path != '/'
|
55
|
+
key << url.path
|
56
|
+
end
|
57
|
+
if (q = url.query) && q != ''
|
58
|
+
key << '?'
|
59
|
+
key << querykey(q)
|
60
|
+
end
|
61
|
+
if env.request_body
|
62
|
+
key << ' '
|
63
|
+
key << bodykey
|
64
|
+
end
|
65
|
+
key.join
|
66
|
+
end
|
67
|
+
|
68
|
+
# Calculate cache key segment for body
|
69
|
+
def bodykey
|
70
|
+
body = env.request_body.to_s
|
71
|
+
case
|
72
|
+
when env.request_headers['Content-Type'] == 'application/x-www-form-urlencoded'
|
73
|
+
querykey(body)
|
74
|
+
when body.length < 50
|
75
|
+
body
|
76
|
+
else
|
77
|
+
Digest::MD5.hexdigest(body)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# Calculate canonical key for a query
|
82
|
+
def querykey(q)
|
83
|
+
q.split('&').sort.join('&')
|
84
|
+
end
|
85
|
+
|
86
|
+
def default_port?
|
87
|
+
url.default_port == url.port
|
88
|
+
end
|
89
|
+
|
90
|
+
# Calculate nice directory name from url.host
|
91
|
+
def hostdir
|
92
|
+
hostdir = url.host.downcase
|
93
|
+
hostdir = hostdir.gsub(/^www\./, '')
|
94
|
+
hostdir = hostdir.gsub(/[^a-z0-9._-]/, '')
|
95
|
+
hostdir = hostdir.squeeze('.')
|
96
|
+
hostdir = 'any' if hostdir.empty?
|
97
|
+
hostdir
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
data/lib/httpdisk/cli.rb
ADDED
@@ -0,0 +1,218 @@
|
|
1
|
+
require 'faraday-cookie_jar'
|
2
|
+
require 'faraday_middleware'
|
3
|
+
require 'ostruct'
|
4
|
+
|
5
|
+
module HTTPDisk
|
6
|
+
# Command line httpdisk command.
|
7
|
+
class Cli
|
8
|
+
attr_reader :options
|
9
|
+
|
10
|
+
# for --expires
|
11
|
+
UNITS = {
|
12
|
+
s: 1,
|
13
|
+
m: 60,
|
14
|
+
h: 60 * 60,
|
15
|
+
d: 24 * 60 * 60,
|
16
|
+
w: 7 * 24 * 60 * 60,
|
17
|
+
y: 365 * 7 * 24 * 60 * 60,
|
18
|
+
}.freeze
|
19
|
+
|
20
|
+
def initialize(options)
|
21
|
+
@options = options
|
22
|
+
end
|
23
|
+
|
24
|
+
# we have a very liberal retry policy
|
25
|
+
RETRY_OPTIONS = {
|
26
|
+
methods: %w[delete get head options patch post put trace],
|
27
|
+
retry_statuses: (400..600).to_a,
|
28
|
+
retry_if: ->(_env, _err) { true },
|
29
|
+
}.freeze
|
30
|
+
|
31
|
+
# Make the request (or print status)
|
32
|
+
def run
|
33
|
+
# short circuit --status
|
34
|
+
if options[:status]
|
35
|
+
status
|
36
|
+
return
|
37
|
+
end
|
38
|
+
|
39
|
+
# create Faraday client
|
40
|
+
faraday = create_faraday
|
41
|
+
|
42
|
+
# run request
|
43
|
+
response = faraday.run_request(request_method, request_url, request_body, request_headers)
|
44
|
+
if response.status >= 400
|
45
|
+
raise CliError, "the requested URL returned error: #{response.status} #{response.reason_phrase}"
|
46
|
+
end
|
47
|
+
|
48
|
+
# output
|
49
|
+
if options[:output]
|
50
|
+
File.open(options[:output], 'w') { output(response, _1) }
|
51
|
+
else
|
52
|
+
output(response, $stdout)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def create_faraday
|
57
|
+
Faraday.new do
|
58
|
+
# connection settings
|
59
|
+
_1.proxy = proxy if options[:proxy]
|
60
|
+
_1.options.timeout = options[:max_time] if options[:max_time]
|
61
|
+
|
62
|
+
# cookie middleware
|
63
|
+
_1.use :cookie_jar
|
64
|
+
|
65
|
+
# BEFORE httpdisk so each redirect segment is cached
|
66
|
+
_1.response :follow_redirects
|
67
|
+
|
68
|
+
# httpdisk
|
69
|
+
_1.use :httpdisk, client_options
|
70
|
+
|
71
|
+
# AFTER httpdisk so transient failures are not cached
|
72
|
+
if options[:retry]
|
73
|
+
_1.request :retry, RETRY_OPTIONS.merge(max: options[:retry])
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
# Support for --status
|
79
|
+
def status
|
80
|
+
# build env
|
81
|
+
env = Faraday::Env.new.tap do
|
82
|
+
_1.method = request_method
|
83
|
+
_1.request_body = request_body
|
84
|
+
_1.request_headers = request_headers
|
85
|
+
_1.url = request_url
|
86
|
+
end
|
87
|
+
|
88
|
+
# now print status
|
89
|
+
client = HTTPDisk::Client.new(nil, client_options)
|
90
|
+
client.status(env).each do
|
91
|
+
puts "#{_1}: #{_2.inspect}"
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
# Output response to f
|
96
|
+
def output(response, f)
|
97
|
+
if options[:include]
|
98
|
+
f.puts "HTTPDISK #{response.status} #{response.reason_phrase}"
|
99
|
+
response.headers.each { f.puts("#{_1}: #{_2}") }
|
100
|
+
f.puts
|
101
|
+
end
|
102
|
+
f.write(response.body)
|
103
|
+
end
|
104
|
+
|
105
|
+
#
|
106
|
+
# request_XXX
|
107
|
+
#
|
108
|
+
|
109
|
+
# HTTP method (get, post, etc.)
|
110
|
+
def request_method
|
111
|
+
method = if options[:request]
|
112
|
+
options[:request]
|
113
|
+
elsif options[:data]
|
114
|
+
'post'
|
115
|
+
end
|
116
|
+
method ||= 'get'
|
117
|
+
method = method.downcase.to_sym
|
118
|
+
|
119
|
+
if !Faraday::Connection::METHODS.include?(method)
|
120
|
+
raise CliError, "invalid --request #{method.inspect}"
|
121
|
+
end
|
122
|
+
method
|
123
|
+
end
|
124
|
+
|
125
|
+
# Request url
|
126
|
+
def request_url
|
127
|
+
url = options[:url]
|
128
|
+
# recover from missing http:
|
129
|
+
if url !~ %r{^https?://}i
|
130
|
+
if url =~ %r{^\w+://}
|
131
|
+
raise CliError, 'only http/https supported'
|
132
|
+
end
|
133
|
+
url = "http://#{url}"
|
134
|
+
end
|
135
|
+
URI.parse(url)
|
136
|
+
rescue URI::InvalidURIError
|
137
|
+
raise CliError, "invalid url #{url.inspect}"
|
138
|
+
end
|
139
|
+
|
140
|
+
# Request body
|
141
|
+
def request_body
|
142
|
+
options[:data]
|
143
|
+
end
|
144
|
+
|
145
|
+
# Request headers
|
146
|
+
def request_headers
|
147
|
+
{}.tap do |headers|
|
148
|
+
if options[:user_agent]
|
149
|
+
headers['User-Agent'] = options[:user_agent]
|
150
|
+
end
|
151
|
+
|
152
|
+
options[:header].each do |header|
|
153
|
+
key, value = header.split(': ', 2)
|
154
|
+
if !key || !value || key.empty? || value.empty?
|
155
|
+
raise CliError, "invalid --header #{header.inspect}"
|
156
|
+
end
|
157
|
+
headers[key] = value
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
#
|
163
|
+
# helpers
|
164
|
+
#
|
165
|
+
|
166
|
+
# Options to HTTPDisk::Client
|
167
|
+
def client_options
|
168
|
+
{}.tap do |client_options|
|
169
|
+
client_options[:dir] = options[:dir]
|
170
|
+
if options[:expires]
|
171
|
+
seconds = parse_expires(options[:expires])
|
172
|
+
if !seconds
|
173
|
+
raise CliError, "invalid --expires #{options[:expires].inspect}"
|
174
|
+
end
|
175
|
+
client_options[:expires_in] = seconds
|
176
|
+
end
|
177
|
+
client_options[:force] = options[:force]
|
178
|
+
client_options[:force_errors] = options[:force_errors]
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
# Return validated --proxy flag if present
|
183
|
+
def proxy
|
184
|
+
return if !options[:proxy]
|
185
|
+
|
186
|
+
proxy = parse_proxy(options[:proxy])
|
187
|
+
raise CliError, "--proxy should be host[:port], not #{options[:proxy].inspect}" if !proxy
|
188
|
+
proxy
|
189
|
+
end
|
190
|
+
|
191
|
+
# Parse --expires flag
|
192
|
+
def parse_expires(s)
|
193
|
+
m = s.match(/^(\d+)([smhdwy])?$/)
|
194
|
+
return if !m
|
195
|
+
|
196
|
+
num, unit = m[1].to_i, (m[2] || 's').to_sym
|
197
|
+
return if !UNITS.key?(unit)
|
198
|
+
|
199
|
+
num * UNITS[unit]
|
200
|
+
end
|
201
|
+
|
202
|
+
# Parse --proxy flag
|
203
|
+
def parse_proxy(proxy_flag)
|
204
|
+
host, port = proxy_flag.split(':', 2)
|
205
|
+
return if !host || host.empty?
|
206
|
+
return if port&.empty?
|
207
|
+
|
208
|
+
URI.parse('http://placeholder').tap do
|
209
|
+
begin
|
210
|
+
_1.host = host
|
211
|
+
_1.port = port if port
|
212
|
+
rescue URI::InvalidComponentError
|
213
|
+
return
|
214
|
+
end
|
215
|
+
end.to_s
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# manually load dependencies here since this is loaded standalone by bin
|
2
|
+
require 'httpdisk/error'
|
3
|
+
require 'httpdisk/version'
|
4
|
+
require 'slop'
|
5
|
+
|
6
|
+
module HTTPDisk
|
7
|
+
# Slop parsing. This is broken out so we can run without require 'httpdisk'.
|
8
|
+
module CliSlop
|
9
|
+
def self.slop(args)
|
10
|
+
slop = Slop.parse(args) do |o|
|
11
|
+
o.banner = 'httpdisk [options] [url]'
|
12
|
+
|
13
|
+
# similar to curl
|
14
|
+
o.separator 'Similar to curl:'
|
15
|
+
o.string '-d', '--data', 'HTTP POST data'
|
16
|
+
o.array '-H', '--header', 'pass custom header(s) to server', delimiter: nil
|
17
|
+
o.boolean '-i', '--include', 'include response headers in the output'
|
18
|
+
o.integer '-m', '--max-time', 'maximum time allowed for the transfer'
|
19
|
+
o.string '-o', '--output', 'write to file instead of stdout'
|
20
|
+
o.string '-x', '--proxy', 'use host[:port] as proxy'
|
21
|
+
o.string '-X', '--request', 'HTTP method to use'
|
22
|
+
o.integer '--retry', 'retry request if problems occur'
|
23
|
+
o.boolean '-s', '--silent', "silent mode (don't print errors)"
|
24
|
+
o.string '-A', '--user-agent', 'send User-Agent to server'
|
25
|
+
|
26
|
+
# from httpdisk
|
27
|
+
o.separator 'Specific to httpdisk:'
|
28
|
+
o.string '--dir', 'httpdisk cache directory (defaults to ~/httpdisk)'
|
29
|
+
o.string '--expires', 'when to expire cached requests (ex: 1h, 2d, 3w)'
|
30
|
+
o.boolean '--force', "don't read anything from cache (but still write)"
|
31
|
+
o.boolean '--force-errors', "don't read errors from cache (but still write)"
|
32
|
+
o.boolean '--status', 'show status for a url in the cache'
|
33
|
+
|
34
|
+
# generic
|
35
|
+
o.boolean '--version', 'show version' do
|
36
|
+
puts "httpdisk #{HTTPDisk::VERSION}"
|
37
|
+
exit
|
38
|
+
end
|
39
|
+
o.on '--help', 'show this help' do
|
40
|
+
puts o
|
41
|
+
exit
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
raise Slop::Error, '' if args.empty?
|
46
|
+
raise Slop::Error, 'no URL specified' if slop.args.empty?
|
47
|
+
raise Slop::Error, 'more than one URL specified' if slop.args.length > 1
|
48
|
+
|
49
|
+
slop.to_h.tap do
|
50
|
+
_1[:url] = slop.args.first
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|