httpdisk 0.2.0 → 0.5.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +9 -0
- data/Gemfile.lock +16 -4
- data/README.md +32 -6
- data/Rakefile +14 -11
- data/bin/httpdisk +9 -7
- data/bin/httpdisk-grep +46 -0
- data/httpdisk.gemspec +1 -0
- data/lib/httpdisk.rb +10 -5
- data/lib/httpdisk/cache.rb +31 -21
- data/lib/httpdisk/cache_key.rb +15 -6
- data/lib/httpdisk/cli/args.rb +57 -0
- data/lib/httpdisk/cli/main.rb +169 -0
- data/lib/httpdisk/client.rb +82 -19
- data/lib/httpdisk/error.rb +4 -0
- data/lib/httpdisk/grep/args.rb +35 -0
- data/lib/httpdisk/grep/main.rb +112 -0
- data/lib/httpdisk/grep/printer.rb +99 -0
- data/lib/httpdisk/payload.rb +7 -5
- data/lib/httpdisk/slop_duration.rb +24 -0
- data/lib/httpdisk/sloptions.rb +105 -0
- data/lib/httpdisk/version.rb +1 -1
- metadata +25 -4
- data/lib/httpdisk/cli.rb +0 -223
- data/lib/httpdisk/cli_slop.rb +0 -54
data/lib/httpdisk/cache_key.rb
CHANGED
@@ -4,14 +4,14 @@ require 'uri'
|
|
4
4
|
|
5
5
|
module HTTPDisk
|
6
6
|
class CacheKey
|
7
|
-
attr_reader :env
|
7
|
+
attr_reader :env, :ignore_params
|
8
8
|
|
9
|
-
def initialize(env)
|
10
|
-
@env = env
|
9
|
+
def initialize(env, ignore_params: [])
|
10
|
+
@env, @ignore_params = env, ignore_params
|
11
11
|
|
12
12
|
# sanity checks
|
13
|
-
raise
|
14
|
-
raise
|
13
|
+
raise InvalidUrl, "http/https required #{env.url.inspect}" if env.url.scheme !~ /^https?$/
|
14
|
+
raise InvalidUrl, "hostname required #{env.url.inspect}" if !env.url.host
|
15
15
|
end
|
16
16
|
|
17
17
|
def url
|
@@ -79,7 +79,16 @@ module HTTPDisk
|
|
79
79
|
|
80
80
|
# Calculate canonical key for a query
|
81
81
|
def querykey(q)
|
82
|
-
q.split('&').sort
|
82
|
+
parts = q.split('&').sort
|
83
|
+
if !ignore_params.empty?
|
84
|
+
parts = parts.map do |part|
|
85
|
+
key, value = part.split('=', 2)
|
86
|
+
next if ignore_params.include?(key)
|
87
|
+
|
88
|
+
"#{key}=#{value}"
|
89
|
+
end.compact
|
90
|
+
end
|
91
|
+
parts.join('&')
|
83
92
|
end
|
84
93
|
|
85
94
|
def default_port?
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# manually load dependencies here since this is loaded standalone by bin
|
2
|
+
require 'httpdisk/error'
|
3
|
+
require 'httpdisk/slop_duration'
|
4
|
+
require 'httpdisk/version'
|
5
|
+
require 'slop'
|
6
|
+
|
7
|
+
module HTTPDisk
|
8
|
+
module Cli
|
9
|
+
# Slop parsing. This is broken out so we can run without require 'httpdisk'.
|
10
|
+
module Args
|
11
|
+
def self.slop(args)
|
12
|
+
slop = Slop.parse(args) do |o|
|
13
|
+
o.banner = 'httpdisk [options] [url]'
|
14
|
+
|
15
|
+
# similar to curl
|
16
|
+
o.separator 'Similar to curl:'
|
17
|
+
o.string '-d', '--data', 'HTTP POST data'
|
18
|
+
o.array '-H', '--header', 'pass custom header(s) to server', delimiter: nil
|
19
|
+
o.boolean '-i', '--include', 'include response headers in the output'
|
20
|
+
o.integer '-m', '--max-time', 'maximum time allowed for the transfer'
|
21
|
+
o.string '-o', '--output', 'write to file instead of stdout'
|
22
|
+
o.string '-x', '--proxy', 'use host[:port] as proxy'
|
23
|
+
o.string '-X', '--request', 'HTTP method to use'
|
24
|
+
o.integer '--retry', 'retry request if problems occur'
|
25
|
+
o.boolean '-s', '--silent', "silent mode (don't print errors)"
|
26
|
+
o.string '-A', '--user-agent', 'send User-Agent to server'
|
27
|
+
|
28
|
+
# from httpdisk
|
29
|
+
o.separator 'Specific to httpdisk:'
|
30
|
+
o.string '--dir', 'httpdisk cache directory (defaults to ~/httpdisk)'
|
31
|
+
o.duration '--expires', 'when to expire cached requests (ex: 1h, 2d, 3w)'
|
32
|
+
o.boolean '--force', "don't read anything from cache (but still write)"
|
33
|
+
o.boolean '--force-errors', "don't read errors from cache (but still write)"
|
34
|
+
o.boolean '--status', 'show status for a url in the cache'
|
35
|
+
|
36
|
+
# generic
|
37
|
+
o.boolean '--version', 'show version' do
|
38
|
+
puts "httpdisk #{HTTPDisk::VERSION}"
|
39
|
+
exit
|
40
|
+
end
|
41
|
+
o.on '--help', 'show this help' do
|
42
|
+
puts o
|
43
|
+
exit
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
raise Slop::Error, '' if args.empty?
|
48
|
+
raise Slop::Error, 'no URL specified' if slop.args.empty?
|
49
|
+
raise Slop::Error, 'more than one URL specified' if slop.args.length > 1
|
50
|
+
|
51
|
+
slop.to_h.tap do
|
52
|
+
_1[:url] = slop.args.first
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,169 @@
|
|
1
|
+
require 'faraday-cookie_jar'
|
2
|
+
require 'faraday_middleware'
|
3
|
+
require 'ostruct'
|
4
|
+
|
5
|
+
module HTTPDisk
|
6
|
+
module Cli
|
7
|
+
# Command line httpdisk command.
|
8
|
+
class Main
|
9
|
+
attr_reader :options
|
10
|
+
|
11
|
+
def initialize(options)
|
12
|
+
@options = options
|
13
|
+
end
|
14
|
+
|
15
|
+
# Make the request (or print status)
|
16
|
+
def run
|
17
|
+
# short circuit --status
|
18
|
+
if options[:status]
|
19
|
+
status
|
20
|
+
return
|
21
|
+
end
|
22
|
+
|
23
|
+
# create Faraday client
|
24
|
+
faraday = create_faraday
|
25
|
+
|
26
|
+
# run request
|
27
|
+
response = faraday.run_request(request_method, request_url, request_body, request_headers)
|
28
|
+
if response.status >= 400
|
29
|
+
raise CliError, "the requested URL returned error: #{response.status} #{response.reason_phrase}"
|
30
|
+
end
|
31
|
+
|
32
|
+
# output
|
33
|
+
if options[:output]
|
34
|
+
File.open(options[:output], 'w') { output(response, _1) }
|
35
|
+
else
|
36
|
+
output(response, $stdout)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def create_faraday
|
41
|
+
Faraday.new do
|
42
|
+
# connection settings
|
43
|
+
_1.proxy = options[:proxy] if options[:proxy]
|
44
|
+
_1.options.timeout = options[:max_time] if options[:max_time]
|
45
|
+
|
46
|
+
# cookie middleware
|
47
|
+
_1.use :cookie_jar
|
48
|
+
|
49
|
+
# BEFORE httpdisk so each redirect segment is cached
|
50
|
+
_1.response :follow_redirects
|
51
|
+
|
52
|
+
# httpdisk
|
53
|
+
_1.use :httpdisk, client_options
|
54
|
+
|
55
|
+
# AFTER httpdisk so transient failures are not cached
|
56
|
+
if options[:retry]
|
57
|
+
# we have a very liberal retry policy
|
58
|
+
retry_options = {
|
59
|
+
max: options[:retry],
|
60
|
+
methods: %w[delete get head options patch post put trace],
|
61
|
+
retry_statuses: (500..600).to_a,
|
62
|
+
retry_if: ->(_env, _err) { true },
|
63
|
+
}
|
64
|
+
_1.request :retry, retry_options
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# Support for --status
|
70
|
+
def status
|
71
|
+
# build env
|
72
|
+
env = Faraday::Env.new.tap do
|
73
|
+
_1.method = request_method
|
74
|
+
_1.request_body = request_body
|
75
|
+
_1.request_headers = request_headers
|
76
|
+
# Run the url through Faraday to make sure we see the same stuff as middleware.
|
77
|
+
_1.url = Faraday.new.build_url(request_url)
|
78
|
+
end
|
79
|
+
|
80
|
+
# now print status
|
81
|
+
client = HTTPDisk::Client.new(nil, client_options)
|
82
|
+
client.status(env).each do
|
83
|
+
puts "#{_1}: #{_2.inspect}"
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
# Output response to f
|
88
|
+
def output(response, f)
|
89
|
+
if options[:include]
|
90
|
+
f.puts "HTTPDISK #{response.status} #{response.reason_phrase}"
|
91
|
+
response.headers.each { f.puts("#{_1}: #{_2}") }
|
92
|
+
f.puts
|
93
|
+
end
|
94
|
+
f.write(response.body)
|
95
|
+
end
|
96
|
+
|
97
|
+
#
|
98
|
+
# request_XXX
|
99
|
+
#
|
100
|
+
|
101
|
+
# HTTP method (get, post, etc.)
|
102
|
+
def request_method
|
103
|
+
method = if options[:request]
|
104
|
+
options[:request]
|
105
|
+
elsif options[:data]
|
106
|
+
'post'
|
107
|
+
end
|
108
|
+
method ||= 'get'
|
109
|
+
method = method.downcase.to_sym
|
110
|
+
|
111
|
+
if !Faraday::Connection::METHODS.include?(method)
|
112
|
+
raise CliError, "invalid --request #{method.inspect}"
|
113
|
+
end
|
114
|
+
|
115
|
+
method
|
116
|
+
end
|
117
|
+
|
118
|
+
# Request url
|
119
|
+
def request_url
|
120
|
+
url = options[:url]
|
121
|
+
# recover from missing http:
|
122
|
+
if url !~ %r{^https?://}i
|
123
|
+
if url =~ %r{^\w+://}
|
124
|
+
raise CliError, 'only http/https supported'
|
125
|
+
end
|
126
|
+
|
127
|
+
url = "http://#{url}"
|
128
|
+
end
|
129
|
+
URI.parse(url)
|
130
|
+
rescue URI::InvalidURIError
|
131
|
+
raise CliError, "invalid url #{url.inspect}"
|
132
|
+
end
|
133
|
+
|
134
|
+
# Request body
|
135
|
+
def request_body
|
136
|
+
options[:data]
|
137
|
+
end
|
138
|
+
|
139
|
+
# Request headers
|
140
|
+
def request_headers
|
141
|
+
{}.tap do |headers|
|
142
|
+
if options[:user_agent]
|
143
|
+
headers['User-Agent'] = options[:user_agent]
|
144
|
+
end
|
145
|
+
|
146
|
+
options[:header].each do |header|
|
147
|
+
key, value = header.split(': ', 2)
|
148
|
+
if !key || !value || key.empty? || value.empty?
|
149
|
+
raise CliError, "invalid --header #{header.inspect}"
|
150
|
+
end
|
151
|
+
|
152
|
+
headers[key] = value
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
#
|
158
|
+
# helpers
|
159
|
+
#
|
160
|
+
|
161
|
+
# Options to HTTPDisk::Client
|
162
|
+
def client_options
|
163
|
+
client_options = options.slice(:dir, :expires, :force, :force_errors)
|
164
|
+
client_options[:utf8] = true
|
165
|
+
client_options
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
data/lib/httpdisk/client.rb
CHANGED
@@ -1,38 +1,43 @@
|
|
1
|
+
require 'content-type'
|
1
2
|
require 'faraday'
|
2
3
|
require 'logger'
|
3
4
|
|
4
5
|
module HTTPDisk
|
5
|
-
OPTIONS = {
|
6
|
-
dir: File.join(ENV['HOME'], 'httpdisk'),
|
7
|
-
expires_in: nil,
|
8
|
-
force: false,
|
9
|
-
force_errors: false,
|
10
|
-
logger: false,
|
11
|
-
}.freeze
|
12
|
-
|
13
6
|
# Middleware and main entry point.
|
14
7
|
class Client < Faraday::Middleware
|
15
8
|
attr_reader :cache, :options
|
16
9
|
|
17
10
|
def initialize(app, options = {})
|
18
|
-
|
11
|
+
options = Sloptions.parse(options) do
|
12
|
+
_1.string :dir, default: File.join(ENV['HOME'], 'httpdisk')
|
13
|
+
_1.integer :expires
|
14
|
+
_1.boolean :force
|
15
|
+
_1.boolean :force_errors
|
16
|
+
_1.array :ignore_params, default: []
|
17
|
+
_1.on :logger, type: [:boolean, Logger]
|
18
|
+
_1.boolean :utf8
|
19
|
+
end
|
20
|
+
|
21
|
+
super(app, options)
|
19
22
|
@cache = Cache.new(options)
|
20
23
|
end
|
21
24
|
|
22
25
|
def call(env)
|
23
|
-
cache_key = CacheKey.new(env)
|
26
|
+
cache_key = CacheKey.new(env, ignore_params: ignore_params)
|
24
27
|
logger&.info("#{env.method.upcase} #{env.url} (#{cache.status(cache_key)})")
|
28
|
+
env[:httpdisk_diskpath] = cache.diskpath(cache_key)
|
25
29
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
# miss
|
32
|
-
perform(env).tap do |response|
|
30
|
+
# check cache, fallback to network
|
31
|
+
if response = read(cache_key, env)
|
32
|
+
response.env[:httpdisk] = true
|
33
|
+
else
|
34
|
+
response = perform(env)
|
33
35
|
response.env[:httpdisk] = false
|
34
36
|
write(cache_key, env, response)
|
35
37
|
end
|
38
|
+
|
39
|
+
encode_body(response)
|
40
|
+
response
|
36
41
|
end
|
37
42
|
|
38
43
|
# Returns cache status for this request
|
@@ -100,10 +105,68 @@ module HTTPDisk
|
|
100
105
|
err.to_s =~ /#{proxy.host}.*#{proxy.port}/
|
101
106
|
end
|
102
107
|
|
108
|
+
# Set string encoding for response body. The cache always returns
|
109
|
+
# ASCII-8BIT, but we have no idea what the encoding will be from the
|
110
|
+
# network. Not all adapters honor Content-Type (including the default
|
111
|
+
# adapter).
|
112
|
+
def encode_body(response)
|
113
|
+
body = response.body || ''
|
114
|
+
|
115
|
+
# parse Content-Type
|
116
|
+
begin
|
117
|
+
content_type = response['Content-Type'] && ContentType.parse(response['Content-Type'])
|
118
|
+
rescue Parslet::ParseFailed
|
119
|
+
# unparsable
|
120
|
+
end
|
121
|
+
|
122
|
+
# look at charset and set body encoding if necessary
|
123
|
+
encoding = encoding_for(content_type)
|
124
|
+
if body.encoding != encoding
|
125
|
+
body = body.dup if body.frozen?
|
126
|
+
body.force_encoding(encoding)
|
127
|
+
end
|
128
|
+
|
129
|
+
# if :utf8, force body to UTF-8
|
130
|
+
if options[:utf8] && content_type && response_text?(content_type)
|
131
|
+
body = body.dup if body.frozen?
|
132
|
+
begin
|
133
|
+
body.encode!('UTF-8', invalid: :replace, undef: :replace, replace: '?')
|
134
|
+
rescue Encoding::ConverterNotFoundError
|
135
|
+
# rare, can't do anything here
|
136
|
+
body = "httpdisk could not convert from #{body.encoding.name} to UTF-8"
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
response.env[:body] = body
|
141
|
+
end
|
142
|
+
|
143
|
+
def encoding_for(content_type)
|
144
|
+
if content_type&.charset
|
145
|
+
begin
|
146
|
+
return Encoding.find(content_type.charset)
|
147
|
+
rescue ArgumentError
|
148
|
+
# unknown charset
|
149
|
+
end
|
150
|
+
end
|
151
|
+
Encoding::ASCII_8BIT
|
152
|
+
end
|
153
|
+
|
154
|
+
def response_text?(content_type)
|
155
|
+
content_type.type == 'text' || content_type.mime_type == 'application/json'
|
156
|
+
end
|
157
|
+
|
158
|
+
#
|
159
|
+
# options
|
160
|
+
#
|
161
|
+
|
162
|
+
def ignore_params
|
163
|
+
@ignore_params ||= options[:ignore_params].map { CGI.escape(_1.to_s) }.to_set
|
164
|
+
end
|
165
|
+
|
103
166
|
def logger
|
104
|
-
return
|
167
|
+
return if !options[:logger]
|
105
168
|
|
106
|
-
@logger
|
169
|
+
@logger ||= case options[:logger]
|
107
170
|
when true then Logger.new($stderr)
|
108
171
|
when Logger then options[:logger]
|
109
172
|
end
|
data/lib/httpdisk/error.rb
CHANGED
@@ -0,0 +1,35 @@
|
|
1
|
+
# manually load dependencies here since this is loaded standalone by bin
|
2
|
+
require 'httpdisk/version'
|
3
|
+
require 'slop'
|
4
|
+
|
5
|
+
module HTTPDisk
|
6
|
+
module Grep
|
7
|
+
module Args
|
8
|
+
# Slop parsing. This is broken out so we can run without require 'httpdisk'.
|
9
|
+
def self.slop(args)
|
10
|
+
slop = Slop.parse(args) do |o|
|
11
|
+
o.banner = 'httpdisk-grep [options] pattern [path ...]'
|
12
|
+
o.boolean '-c', '--count', 'suppress normal output and show count'
|
13
|
+
o.boolean '-h', '--head', 'show req headers before each match'
|
14
|
+
o.boolean '-s', '--silent', 'do not print anything to stdout'
|
15
|
+
o.boolean '--version', 'show version' do
|
16
|
+
puts "httpdisk-grep #{HTTPDisk::VERSION}"
|
17
|
+
exit
|
18
|
+
end
|
19
|
+
o.on '--help', 'show this help' do
|
20
|
+
puts o
|
21
|
+
exit
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
raise Slop::Error, '' if args.empty?
|
26
|
+
raise Slop::Error, 'no PATTERN specified' if slop.args.empty?
|
27
|
+
|
28
|
+
slop.to_h.tap do
|
29
|
+
_1[:pattern] = slop.args.shift
|
30
|
+
_1[:roots] = slop.args
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
require 'find'
|
2
|
+
require 'json'
|
3
|
+
|
4
|
+
module HTTPDisk
|
5
|
+
module Grep
|
6
|
+
class Main
|
7
|
+
attr_reader :options, :success
|
8
|
+
|
9
|
+
def initialize(options)
|
10
|
+
@options = options
|
11
|
+
end
|
12
|
+
|
13
|
+
# Enumerate file paths one at a time. Returns true if matches were found.
|
14
|
+
def run
|
15
|
+
paths.each do
|
16
|
+
begin
|
17
|
+
run_one(_1)
|
18
|
+
rescue StandardError => e
|
19
|
+
if ENV['HTTPDISK_DEBUG']
|
20
|
+
$stderr.puts
|
21
|
+
$stderr.puts e.class
|
22
|
+
$stderr.puts e.backtrace.join("\n")
|
23
|
+
end
|
24
|
+
raise CliError, "#{e.message[0, 70]} (#{_1})"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
success
|
28
|
+
end
|
29
|
+
|
30
|
+
def run_one(path)
|
31
|
+
# read payload & body
|
32
|
+
payload = Zlib::GzipReader.open(path, encoding: 'ASCII-8BIT') do
|
33
|
+
Payload.read(_1)
|
34
|
+
end
|
35
|
+
body = prepare_body(payload)
|
36
|
+
|
37
|
+
# collect all_matches
|
38
|
+
all_matches = body.each_line.map do |line|
|
39
|
+
[].tap do |matches|
|
40
|
+
line.scan(pattern) { matches << Regexp.last_match }
|
41
|
+
end
|
42
|
+
end.reject(&:empty?)
|
43
|
+
return if all_matches.empty?
|
44
|
+
|
45
|
+
# print
|
46
|
+
@success = true
|
47
|
+
printer.print(path, payload, all_matches)
|
48
|
+
end
|
49
|
+
|
50
|
+
# file paths to be searched
|
51
|
+
def paths
|
52
|
+
# roots
|
53
|
+
roots = options[:roots]
|
54
|
+
roots = ['.'] if roots.empty?
|
55
|
+
|
56
|
+
# find files in roots
|
57
|
+
paths = roots.flat_map { Find.find(_1).to_a }.sort
|
58
|
+
paths = paths.select { File.file?(_1) }
|
59
|
+
|
60
|
+
# strip default './'
|
61
|
+
paths = paths.map { _1.gsub(%r{^\./}, '') } if options[:roots].empty?
|
62
|
+
paths
|
63
|
+
end
|
64
|
+
|
65
|
+
# convert raw body into something palatable for pattern matching
|
66
|
+
def prepare_body(payload)
|
67
|
+
body = payload.body
|
68
|
+
|
69
|
+
if content_type = payload.headers['Content-Type']
|
70
|
+
# Mismatches between Content-Type and body.encoding are fatal, so make
|
71
|
+
# an effort to align them.
|
72
|
+
if charset = content_type[/charset=([^;]+)/, 1]
|
73
|
+
encoding = begin
|
74
|
+
Encoding.find(charset)
|
75
|
+
rescue StandardError
|
76
|
+
nil
|
77
|
+
end
|
78
|
+
if encoding && body.encoding != encoding
|
79
|
+
body.force_encoding(encoding)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
# pretty print json for easier searching
|
84
|
+
if content_type =~ /\bjson\b/
|
85
|
+
body = JSON.pretty_generate(JSON.parse(body))
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
body
|
90
|
+
end
|
91
|
+
|
92
|
+
# regex pattern from options
|
93
|
+
def pattern
|
94
|
+
@pattern ||= Regexp.new(options[:pattern], Regexp::IGNORECASE)
|
95
|
+
end
|
96
|
+
|
97
|
+
# printer for output
|
98
|
+
def printer
|
99
|
+
@printer ||= case
|
100
|
+
when options[:silent]
|
101
|
+
Grep::SilentPrinter.new
|
102
|
+
when options[:count]
|
103
|
+
Grep::CountPrinter.new($stdout)
|
104
|
+
when options[:head] || $stdout.tty?
|
105
|
+
Grep::HeaderPrinter.new($stdout, options[:head])
|
106
|
+
else
|
107
|
+
Grep::TersePrinter.new($stdout)
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|