httpdisk 0.1.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +28 -0
- data/Gemfile +1 -0
- data/Gemfile.lock +36 -4
- data/README.md +36 -4
- data/Rakefile +21 -10
- data/bin/httpdisk +10 -8
- data/bin/httpdisk-grep +46 -0
- data/examples.rb +1 -2
- data/httpdisk.gemspec +3 -2
- data/lib/httpdisk.rb +10 -5
- data/lib/httpdisk/cache.rb +33 -22
- data/lib/httpdisk/cache_key.rb +17 -9
- data/lib/httpdisk/cli/args.rb +57 -0
- data/lib/httpdisk/cli/main.rb +169 -0
- data/lib/httpdisk/client.rb +94 -18
- data/lib/httpdisk/error.rb +4 -0
- data/lib/httpdisk/grep/args.rb +35 -0
- data/lib/httpdisk/grep/main.rb +112 -0
- data/lib/httpdisk/grep/printer.rb +99 -0
- data/lib/httpdisk/payload.rb +11 -9
- data/lib/httpdisk/slop_duration.rb +24 -0
- data/lib/httpdisk/sloptions.rb +105 -0
- data/lib/httpdisk/version.rb +1 -1
- metadata +26 -4
- data/lib/httpdisk/cli.rb +0 -218
- data/lib/httpdisk/cli_slop.rb +0 -54
data/lib/httpdisk/cache_key.rb
CHANGED
@@ -4,14 +4,14 @@ require 'uri'
|
|
4
4
|
|
5
5
|
module HTTPDisk
|
6
6
|
class CacheKey
|
7
|
-
attr_reader :env
|
7
|
+
attr_reader :env, :ignore_params
|
8
8
|
|
9
|
-
def initialize(env)
|
10
|
-
@env = env
|
9
|
+
def initialize(env, ignore_params: [])
|
10
|
+
@env, @ignore_params = env, ignore_params
|
11
11
|
|
12
12
|
# sanity checks
|
13
|
-
raise
|
14
|
-
raise
|
13
|
+
raise InvalidUrl, "http/https required #{env.url.inspect}" if env.url.scheme !~ /^https?$/
|
14
|
+
raise InvalidUrl, "hostname required #{env.url.inspect}" if !env.url.host
|
15
15
|
end
|
16
16
|
|
17
17
|
def url
|
@@ -68,10 +68,9 @@ module HTTPDisk
|
|
68
68
|
# Calculate cache key segment for body
|
69
69
|
def bodykey
|
70
70
|
body = env.request_body.to_s
|
71
|
-
|
72
|
-
when env.request_headers['Content-Type'] == 'application/x-www-form-urlencoded'
|
71
|
+
if env.request_headers['Content-Type'] == 'application/x-www-form-urlencoded'
|
73
72
|
querykey(body)
|
74
|
-
|
73
|
+
elsif body.length < 50
|
75
74
|
body
|
76
75
|
else
|
77
76
|
Digest::MD5.hexdigest(body)
|
@@ -80,7 +79,16 @@ module HTTPDisk
|
|
80
79
|
|
81
80
|
# Calculate canonical key for a query
|
82
81
|
def querykey(q)
|
83
|
-
q.split('&').sort
|
82
|
+
parts = q.split('&').sort
|
83
|
+
if !ignore_params.empty?
|
84
|
+
parts = parts.map do |part|
|
85
|
+
key, value = part.split('=', 2)
|
86
|
+
next if ignore_params.include?(key)
|
87
|
+
|
88
|
+
"#{key}=#{value}"
|
89
|
+
end.compact
|
90
|
+
end
|
91
|
+
parts.join('&')
|
84
92
|
end
|
85
93
|
|
86
94
|
def default_port?
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# manually load dependencies here since this is loaded standalone by bin
|
2
|
+
require 'httpdisk/error'
|
3
|
+
require 'httpdisk/slop_duration'
|
4
|
+
require 'httpdisk/version'
|
5
|
+
require 'slop'
|
6
|
+
|
7
|
+
module HTTPDisk
|
8
|
+
module Cli
|
9
|
+
# Slop parsing. This is broken out so we can run without require 'httpdisk'.
|
10
|
+
module Args
|
11
|
+
def self.slop(args)
|
12
|
+
slop = Slop.parse(args) do |o|
|
13
|
+
o.banner = 'httpdisk [options] [url]'
|
14
|
+
|
15
|
+
# similar to curl
|
16
|
+
o.separator 'Similar to curl:'
|
17
|
+
o.string '-d', '--data', 'HTTP POST data'
|
18
|
+
o.array '-H', '--header', 'pass custom header(s) to server', delimiter: nil
|
19
|
+
o.boolean '-i', '--include', 'include response headers in the output'
|
20
|
+
o.integer '-m', '--max-time', 'maximum time allowed for the transfer'
|
21
|
+
o.string '-o', '--output', 'write to file instead of stdout'
|
22
|
+
o.string '-x', '--proxy', 'use host[:port] as proxy'
|
23
|
+
o.string '-X', '--request', 'HTTP method to use'
|
24
|
+
o.integer '--retry', 'retry request if problems occur'
|
25
|
+
o.boolean '-s', '--silent', "silent mode (don't print errors)"
|
26
|
+
o.string '-A', '--user-agent', 'send User-Agent to server'
|
27
|
+
|
28
|
+
# from httpdisk
|
29
|
+
o.separator 'Specific to httpdisk:'
|
30
|
+
o.string '--dir', 'httpdisk cache directory (defaults to ~/httpdisk)'
|
31
|
+
o.duration '--expires', 'when to expire cached requests (ex: 1h, 2d, 3w)'
|
32
|
+
o.boolean '--force', "don't read anything from cache (but still write)"
|
33
|
+
o.boolean '--force-errors', "don't read errors from cache (but still write)"
|
34
|
+
o.boolean '--status', 'show status for a url in the cache'
|
35
|
+
|
36
|
+
# generic
|
37
|
+
o.boolean '--version', 'show version' do
|
38
|
+
puts "httpdisk #{HTTPDisk::VERSION}"
|
39
|
+
exit
|
40
|
+
end
|
41
|
+
o.on '--help', 'show this help' do
|
42
|
+
puts o
|
43
|
+
exit
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
raise Slop::Error, '' if args.empty?
|
48
|
+
raise Slop::Error, 'no URL specified' if slop.args.empty?
|
49
|
+
raise Slop::Error, 'more than one URL specified' if slop.args.length > 1
|
50
|
+
|
51
|
+
slop.to_h.tap do
|
52
|
+
_1[:url] = slop.args.first
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,169 @@
|
|
1
|
+
require 'faraday-cookie_jar'
|
2
|
+
require 'faraday_middleware'
|
3
|
+
require 'ostruct'
|
4
|
+
|
5
|
+
module HTTPDisk
|
6
|
+
module Cli
|
7
|
+
# Command line httpdisk command.
|
8
|
+
class Main
|
9
|
+
attr_reader :options
|
10
|
+
|
11
|
+
def initialize(options)
|
12
|
+
@options = options
|
13
|
+
end
|
14
|
+
|
15
|
+
# Make the request (or print status)
|
16
|
+
def run
|
17
|
+
# short circuit --status
|
18
|
+
if options[:status]
|
19
|
+
status
|
20
|
+
return
|
21
|
+
end
|
22
|
+
|
23
|
+
# create Faraday client
|
24
|
+
faraday = create_faraday
|
25
|
+
|
26
|
+
# run request
|
27
|
+
response = faraday.run_request(request_method, request_url, request_body, request_headers)
|
28
|
+
if response.status >= 400
|
29
|
+
raise CliError, "the requested URL returned error: #{response.status} #{response.reason_phrase}"
|
30
|
+
end
|
31
|
+
|
32
|
+
# output
|
33
|
+
if options[:output]
|
34
|
+
File.open(options[:output], 'w') { output(response, _1) }
|
35
|
+
else
|
36
|
+
output(response, $stdout)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def create_faraday
|
41
|
+
Faraday.new do
|
42
|
+
# connection settings
|
43
|
+
_1.proxy = options[:proxy] if options[:proxy]
|
44
|
+
_1.options.timeout = options[:max_time] if options[:max_time]
|
45
|
+
|
46
|
+
# cookie middleware
|
47
|
+
_1.use :cookie_jar
|
48
|
+
|
49
|
+
# BEFORE httpdisk so each redirect segment is cached
|
50
|
+
_1.response :follow_redirects
|
51
|
+
|
52
|
+
# httpdisk
|
53
|
+
_1.use :httpdisk, client_options
|
54
|
+
|
55
|
+
# AFTER httpdisk so transient failures are not cached
|
56
|
+
if options[:retry]
|
57
|
+
# we have a very liberal retry policy
|
58
|
+
retry_options = {
|
59
|
+
max: options[:retry],
|
60
|
+
methods: %w[delete get head options patch post put trace],
|
61
|
+
retry_statuses: (500..600).to_a,
|
62
|
+
retry_if: ->(_env, _err) { true },
|
63
|
+
}
|
64
|
+
_1.request :retry, retry_options
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# Support for --status
|
70
|
+
def status
|
71
|
+
# build env
|
72
|
+
env = Faraday::Env.new.tap do
|
73
|
+
_1.method = request_method
|
74
|
+
_1.request_body = request_body
|
75
|
+
_1.request_headers = request_headers
|
76
|
+
# Run the url through Faraday to make sure we see the same stuff as middleware.
|
77
|
+
_1.url = Faraday.new.build_url(request_url)
|
78
|
+
end
|
79
|
+
|
80
|
+
# now print status
|
81
|
+
client = HTTPDisk::Client.new(nil, client_options)
|
82
|
+
client.status(env).each do
|
83
|
+
puts "#{_1}: #{_2.inspect}"
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
# Output response to f
|
88
|
+
def output(response, f)
|
89
|
+
if options[:include]
|
90
|
+
f.puts "HTTPDISK #{response.status} #{response.reason_phrase}"
|
91
|
+
response.headers.each { f.puts("#{_1}: #{_2}") }
|
92
|
+
f.puts
|
93
|
+
end
|
94
|
+
f.write(response.body)
|
95
|
+
end
|
96
|
+
|
97
|
+
#
|
98
|
+
# request_XXX
|
99
|
+
#
|
100
|
+
|
101
|
+
# HTTP method (get, post, etc.)
|
102
|
+
def request_method
|
103
|
+
method = if options[:request]
|
104
|
+
options[:request]
|
105
|
+
elsif options[:data]
|
106
|
+
'post'
|
107
|
+
end
|
108
|
+
method ||= 'get'
|
109
|
+
method = method.downcase.to_sym
|
110
|
+
|
111
|
+
if !Faraday::Connection::METHODS.include?(method)
|
112
|
+
raise CliError, "invalid --request #{method.inspect}"
|
113
|
+
end
|
114
|
+
|
115
|
+
method
|
116
|
+
end
|
117
|
+
|
118
|
+
# Request url
|
119
|
+
def request_url
|
120
|
+
url = options[:url]
|
121
|
+
# recover from missing http:
|
122
|
+
if url !~ %r{^https?://}i
|
123
|
+
if url =~ %r{^\w+://}
|
124
|
+
raise CliError, 'only http/https supported'
|
125
|
+
end
|
126
|
+
|
127
|
+
url = "http://#{url}"
|
128
|
+
end
|
129
|
+
URI.parse(url)
|
130
|
+
rescue URI::InvalidURIError
|
131
|
+
raise CliError, "invalid url #{url.inspect}"
|
132
|
+
end
|
133
|
+
|
134
|
+
# Request body
|
135
|
+
def request_body
|
136
|
+
options[:data]
|
137
|
+
end
|
138
|
+
|
139
|
+
# Request headers
|
140
|
+
def request_headers
|
141
|
+
{}.tap do |headers|
|
142
|
+
if options[:user_agent]
|
143
|
+
headers['User-Agent'] = options[:user_agent]
|
144
|
+
end
|
145
|
+
|
146
|
+
options[:header].each do |header|
|
147
|
+
key, value = header.split(': ', 2)
|
148
|
+
if !key || !value || key.empty? || value.empty?
|
149
|
+
raise CliError, "invalid --header #{header.inspect}"
|
150
|
+
end
|
151
|
+
|
152
|
+
headers[key] = value
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
#
|
158
|
+
# helpers
|
159
|
+
#
|
160
|
+
|
161
|
+
# Options to HTTPDisk::Client
|
162
|
+
def client_options
|
163
|
+
client_options = options.slice(:dir, :expires, :force, :force_errors)
|
164
|
+
client_options[:utf8] = true
|
165
|
+
client_options
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
data/lib/httpdisk/client.rb
CHANGED
@@ -1,34 +1,43 @@
|
|
1
|
+
require 'content-type'
|
1
2
|
require 'faraday'
|
3
|
+
require 'logger'
|
2
4
|
|
3
5
|
module HTTPDisk
|
4
|
-
OPTIONS = {
|
5
|
-
dir: File.join(ENV['HOME'], 'httpdisk'),
|
6
|
-
expires_in: nil,
|
7
|
-
force: false,
|
8
|
-
force_errors: false,
|
9
|
-
}.freeze
|
10
|
-
|
11
6
|
# Middleware and main entry point.
|
12
7
|
class Client < Faraday::Middleware
|
13
8
|
attr_reader :cache, :options
|
14
9
|
|
15
10
|
def initialize(app, options = {})
|
16
|
-
|
11
|
+
options = Sloptions.parse(options) do
|
12
|
+
_1.string :dir, default: File.join(ENV['HOME'], 'httpdisk')
|
13
|
+
_1.integer :expires
|
14
|
+
_1.boolean :force
|
15
|
+
_1.boolean :force_errors
|
16
|
+
_1.array :ignore_params, default: []
|
17
|
+
_1.on :logger, type: [:boolean, Logger]
|
18
|
+
_1.boolean :utf8
|
19
|
+
end
|
20
|
+
|
21
|
+
super(app, options)
|
17
22
|
@cache = Cache.new(options)
|
18
23
|
end
|
19
24
|
|
20
25
|
def call(env)
|
21
|
-
cache_key = CacheKey.new(env)
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
26
|
+
cache_key = CacheKey.new(env, ignore_params: ignore_params)
|
27
|
+
logger&.info("#{env.method.upcase} #{env.url} (#{cache.status(cache_key)})")
|
28
|
+
env[:httpdisk_diskpath] = cache.diskpath(cache_key)
|
29
|
+
|
30
|
+
# check cache, fallback to network
|
31
|
+
if response = read(cache_key, env)
|
32
|
+
response.env[:httpdisk] = true
|
33
|
+
else
|
34
|
+
response = perform(env)
|
35
|
+
response.env[:httpdisk] = false
|
30
36
|
write(cache_key, env, response)
|
31
37
|
end
|
38
|
+
|
39
|
+
encode_body(response)
|
40
|
+
response
|
32
41
|
end
|
33
42
|
|
34
43
|
# Returns cache status for this request
|
@@ -46,7 +55,7 @@ module HTTPDisk
|
|
46
55
|
protected
|
47
56
|
|
48
57
|
# perform the request, return Faraday::Response
|
49
|
-
def perform(
|
58
|
+
def perform(env)
|
50
59
|
app.call(env)
|
51
60
|
rescue Faraday::ConnectionFailed, Faraday::SSLError, Faraday::TimeoutError => e
|
52
61
|
# try to avoid caching proxy errors
|
@@ -95,6 +104,73 @@ module HTTPDisk
|
|
95
104
|
|
96
105
|
err.to_s =~ /#{proxy.host}.*#{proxy.port}/
|
97
106
|
end
|
107
|
+
|
108
|
+
# Set string encoding for response body. The cache always returns
|
109
|
+
# ASCII-8BIT, but we have no idea what the encoding will be from the
|
110
|
+
# network. Not all adapters honor Content-Type (including the default
|
111
|
+
# adapter).
|
112
|
+
def encode_body(response)
|
113
|
+
body = response.body || ''
|
114
|
+
|
115
|
+
# parse Content-Type
|
116
|
+
begin
|
117
|
+
content_type = response['Content-Type'] && ContentType.parse(response['Content-Type'])
|
118
|
+
rescue Parslet::ParseFailed
|
119
|
+
# unparsable
|
120
|
+
end
|
121
|
+
|
122
|
+
# look at charset and set body encoding if necessary
|
123
|
+
encoding = encoding_for(content_type)
|
124
|
+
if body.encoding != encoding
|
125
|
+
body = body.dup if body.frozen?
|
126
|
+
body.force_encoding(encoding)
|
127
|
+
end
|
128
|
+
|
129
|
+
# if :utf8, force body to UTF-8
|
130
|
+
if options[:utf8] && content_type && response_text?(content_type)
|
131
|
+
body = body.dup if body.frozen?
|
132
|
+
begin
|
133
|
+
body.encode!('UTF-8', invalid: :replace, undef: :replace, replace: '?')
|
134
|
+
rescue Encoding::ConverterNotFoundError
|
135
|
+
# rare, can't do anything here
|
136
|
+
body = "httpdisk could not convert from #{body.encoding.name} to UTF-8"
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
response.env[:body] = body
|
141
|
+
end
|
142
|
+
|
143
|
+
def encoding_for(content_type)
|
144
|
+
if content_type&.charset
|
145
|
+
begin
|
146
|
+
return Encoding.find(content_type.charset)
|
147
|
+
rescue ArgumentError
|
148
|
+
# unknown charset
|
149
|
+
end
|
150
|
+
end
|
151
|
+
Encoding::ASCII_8BIT
|
152
|
+
end
|
153
|
+
|
154
|
+
def response_text?(content_type)
|
155
|
+
content_type.type == 'text' || content_type.mime_type == 'application/json'
|
156
|
+
end
|
157
|
+
|
158
|
+
#
|
159
|
+
# options
|
160
|
+
#
|
161
|
+
|
162
|
+
def ignore_params
|
163
|
+
@ignore_params ||= options[:ignore_params].map { CGI.escape(_1.to_s) }.to_set
|
164
|
+
end
|
165
|
+
|
166
|
+
def logger
|
167
|
+
return if !options[:logger]
|
168
|
+
|
169
|
+
@logger ||= case options[:logger]
|
170
|
+
when true then Logger.new($stderr)
|
171
|
+
when Logger then options[:logger]
|
172
|
+
end
|
173
|
+
end
|
98
174
|
end
|
99
175
|
end
|
100
176
|
|
data/lib/httpdisk/error.rb
CHANGED
@@ -0,0 +1,35 @@
|
|
1
|
+
# manually load dependencies here since this is loaded standalone by bin
|
2
|
+
require 'httpdisk/version'
|
3
|
+
require 'slop'
|
4
|
+
|
5
|
+
module HTTPDisk
|
6
|
+
module Grep
|
7
|
+
module Args
|
8
|
+
# Slop parsing. This is broken out so we can run without require 'httpdisk'.
|
9
|
+
def self.slop(args)
|
10
|
+
slop = Slop.parse(args) do |o|
|
11
|
+
o.banner = 'httpdisk-grep [options] pattern [path ...]'
|
12
|
+
o.boolean '-c', '--count', 'suppress normal output and show count'
|
13
|
+
o.boolean '-h', '--head', 'show req headers before each match'
|
14
|
+
o.boolean '-s', '--silent', 'do not print anything to stdout'
|
15
|
+
o.boolean '--version', 'show version' do
|
16
|
+
puts "httpdisk-grep #{HTTPDisk::VERSION}"
|
17
|
+
exit
|
18
|
+
end
|
19
|
+
o.on '--help', 'show this help' do
|
20
|
+
puts o
|
21
|
+
exit
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
raise Slop::Error, '' if args.empty?
|
26
|
+
raise Slop::Error, 'no PATTERN specified' if slop.args.empty?
|
27
|
+
|
28
|
+
slop.to_h.tap do
|
29
|
+
_1[:pattern] = slop.args.shift
|
30
|
+
_1[:roots] = slop.args
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|