httpdisk 0.2.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,14 +4,14 @@ require 'uri'
4
4
 
5
5
  module HTTPDisk
6
6
  class CacheKey
7
- attr_reader :env
7
+ attr_reader :env, :ignore_params
8
8
 
9
- def initialize(env)
10
- @env = env
9
+ def initialize(env, ignore_params: [])
10
+ @env, @ignore_params = env, ignore_params
11
11
 
12
12
  # sanity checks
13
- raise 'http/https required' if env.url.scheme !~ /^https?$/
14
- raise 'hostname required' if !env.url.host
13
+ raise InvalidUrl, "http/https required #{env.url.inspect}" if env.url.scheme !~ /^https?$/
14
+ raise InvalidUrl, "hostname required #{env.url.inspect}" if !env.url.host
15
15
  end
16
16
 
17
17
  def url
@@ -79,7 +79,16 @@ module HTTPDisk
79
79
 
80
80
  # Calculate canonical key for a query
81
81
  def querykey(q)
82
- q.split('&').sort.join('&')
82
+ parts = q.split('&').sort
83
+ if !ignore_params.empty?
84
+ parts = parts.map do |part|
85
+ key, value = part.split('=', 2)
86
+ next if ignore_params.include?(key)
87
+
88
+ "#{key}=#{value}"
89
+ end.compact
90
+ end
91
+ parts.join('&')
83
92
  end
84
93
 
85
94
  def default_port?
@@ -0,0 +1,57 @@
1
+ # manually load dependencies here since this is loaded standalone by bin
2
+ require 'httpdisk/error'
3
+ require 'httpdisk/slop_duration'
4
+ require 'httpdisk/version'
5
+ require 'slop'
6
+
7
+ module HTTPDisk
8
+ module Cli
9
+ # Slop parsing. This is broken out so we can run without require 'httpdisk'.
10
+ module Args
11
+ def self.slop(args)
12
+ slop = Slop.parse(args) do |o|
13
+ o.banner = 'httpdisk [options] [url]'
14
+
15
+ # similar to curl
16
+ o.separator 'Similar to curl:'
17
+ o.string '-d', '--data', 'HTTP POST data'
18
+ o.array '-H', '--header', 'pass custom header(s) to server', delimiter: nil
19
+ o.boolean '-i', '--include', 'include response headers in the output'
20
+ o.integer '-m', '--max-time', 'maximum time allowed for the transfer'
21
+ o.string '-o', '--output', 'write to file instead of stdout'
22
+ o.string '-x', '--proxy', 'use host[:port] as proxy'
23
+ o.string '-X', '--request', 'HTTP method to use'
24
+ o.integer '--retry', 'retry request if problems occur'
25
+ o.boolean '-s', '--silent', "silent mode (don't print errors)"
26
+ o.string '-A', '--user-agent', 'send User-Agent to server'
27
+
28
+ # from httpdisk
29
+ o.separator 'Specific to httpdisk:'
30
+ o.string '--dir', 'httpdisk cache directory (defaults to ~/httpdisk)'
31
+ o.duration '--expires', 'when to expire cached requests (ex: 1h, 2d, 3w)'
32
+ o.boolean '--force', "don't read anything from cache (but still write)"
33
+ o.boolean '--force-errors', "don't read errors from cache (but still write)"
34
+ o.boolean '--status', 'show status for a url in the cache'
35
+
36
+ # generic
37
+ o.boolean '--version', 'show version' do
38
+ puts "httpdisk #{HTTPDisk::VERSION}"
39
+ exit
40
+ end
41
+ o.on '--help', 'show this help' do
42
+ puts o
43
+ exit
44
+ end
45
+ end
46
+
47
+ raise Slop::Error, '' if args.empty?
48
+ raise Slop::Error, 'no URL specified' if slop.args.empty?
49
+ raise Slop::Error, 'more than one URL specified' if slop.args.length > 1
50
+
51
+ slop.to_h.tap do
52
+ _1[:url] = slop.args.first
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,169 @@
1
+ require 'faraday-cookie_jar'
2
+ require 'faraday_middleware'
3
+ require 'ostruct'
4
+
5
+ module HTTPDisk
6
+ module Cli
7
+ # Command line httpdisk command.
8
+ class Main
9
+ attr_reader :options
10
+
11
+ def initialize(options)
12
+ @options = options
13
+ end
14
+
15
+ # Make the request (or print status)
16
+ def run
17
+ # short circuit --status
18
+ if options[:status]
19
+ status
20
+ return
21
+ end
22
+
23
+ # create Faraday client
24
+ faraday = create_faraday
25
+
26
+ # run request
27
+ response = faraday.run_request(request_method, request_url, request_body, request_headers)
28
+ if response.status >= 400
29
+ raise CliError, "the requested URL returned error: #{response.status} #{response.reason_phrase}"
30
+ end
31
+
32
+ # output
33
+ if options[:output]
34
+ File.open(options[:output], 'w') { output(response, _1) }
35
+ else
36
+ output(response, $stdout)
37
+ end
38
+ end
39
+
40
+ def create_faraday
41
+ Faraday.new do
42
+ # connection settings
43
+ _1.proxy = options[:proxy] if options[:proxy]
44
+ _1.options.timeout = options[:max_time] if options[:max_time]
45
+
46
+ # cookie middleware
47
+ _1.use :cookie_jar
48
+
49
+ # BEFORE httpdisk so each redirect segment is cached
50
+ _1.response :follow_redirects
51
+
52
+ # httpdisk
53
+ _1.use :httpdisk, client_options
54
+
55
+ # AFTER httpdisk so transient failures are not cached
56
+ if options[:retry]
57
+ # we have a very liberal retry policy
58
+ retry_options = {
59
+ max: options[:retry],
60
+ methods: %w[delete get head options patch post put trace],
61
+ retry_statuses: (500..600).to_a,
62
+ retry_if: ->(_env, _err) { true },
63
+ }
64
+ _1.request :retry, retry_options
65
+ end
66
+ end
67
+ end
68
+
69
+ # Support for --status
70
+ def status
71
+ # build env
72
+ env = Faraday::Env.new.tap do
73
+ _1.method = request_method
74
+ _1.request_body = request_body
75
+ _1.request_headers = request_headers
76
+ # Run the url through Faraday to make sure we see the same stuff as middleware.
77
+ _1.url = Faraday.new.build_url(request_url)
78
+ end
79
+
80
+ # now print status
81
+ client = HTTPDisk::Client.new(nil, client_options)
82
+ client.status(env).each do
83
+ puts "#{_1}: #{_2.inspect}"
84
+ end
85
+ end
86
+
87
+ # Output response to f
88
+ def output(response, f)
89
+ if options[:include]
90
+ f.puts "HTTPDISK #{response.status} #{response.reason_phrase}"
91
+ response.headers.each { f.puts("#{_1}: #{_2}") }
92
+ f.puts
93
+ end
94
+ f.write(response.body)
95
+ end
96
+
97
+ #
98
+ # request_XXX
99
+ #
100
+
101
+ # HTTP method (get, post, etc.)
102
+ def request_method
103
+ method = if options[:request]
104
+ options[:request]
105
+ elsif options[:data]
106
+ 'post'
107
+ end
108
+ method ||= 'get'
109
+ method = method.downcase.to_sym
110
+
111
+ if !Faraday::Connection::METHODS.include?(method)
112
+ raise CliError, "invalid --request #{method.inspect}"
113
+ end
114
+
115
+ method
116
+ end
117
+
118
+ # Request url
119
+ def request_url
120
+ url = options[:url]
121
+ # recover from missing http:
122
+ if url !~ %r{^https?://}i
123
+ if url =~ %r{^\w+://}
124
+ raise CliError, 'only http/https supported'
125
+ end
126
+
127
+ url = "http://#{url}"
128
+ end
129
+ URI.parse(url)
130
+ rescue URI::InvalidURIError
131
+ raise CliError, "invalid url #{url.inspect}"
132
+ end
133
+
134
+ # Request body
135
+ def request_body
136
+ options[:data]
137
+ end
138
+
139
+ # Request headers
140
+ def request_headers
141
+ {}.tap do |headers|
142
+ if options[:user_agent]
143
+ headers['User-Agent'] = options[:user_agent]
144
+ end
145
+
146
+ options[:header].each do |header|
147
+ key, value = header.split(': ', 2)
148
+ if !key || !value || key.empty? || value.empty?
149
+ raise CliError, "invalid --header #{header.inspect}"
150
+ end
151
+
152
+ headers[key] = value
153
+ end
154
+ end
155
+ end
156
+
157
+ #
158
+ # helpers
159
+ #
160
+
161
+ # Options to HTTPDisk::Client
162
+ def client_options
163
+ client_options = options.slice(:dir, :expires, :force, :force_errors)
164
+ client_options[:utf8] = true
165
+ client_options
166
+ end
167
+ end
168
+ end
169
+ end
@@ -1,38 +1,43 @@
1
+ require 'content-type'
1
2
  require 'faraday'
2
3
  require 'logger'
3
4
 
4
5
  module HTTPDisk
5
- OPTIONS = {
6
- dir: File.join(ENV['HOME'], 'httpdisk'),
7
- expires_in: nil,
8
- force: false,
9
- force_errors: false,
10
- logger: false,
11
- }.freeze
12
-
13
6
  # Middleware and main entry point.
14
7
  class Client < Faraday::Middleware
15
8
  attr_reader :cache, :options
16
9
 
17
10
  def initialize(app, options = {})
18
- super(app, options = OPTIONS.merge(options.compact))
11
+ options = Sloptions.parse(options) do
12
+ _1.string :dir, default: File.join(ENV['HOME'], 'httpdisk')
13
+ _1.integer :expires
14
+ _1.boolean :force
15
+ _1.boolean :force_errors
16
+ _1.array :ignore_params, default: []
17
+ _1.on :logger, type: [:boolean, Logger]
18
+ _1.boolean :utf8
19
+ end
20
+
21
+ super(app, options)
19
22
  @cache = Cache.new(options)
20
23
  end
21
24
 
22
25
  def call(env)
23
- cache_key = CacheKey.new(env)
26
+ cache_key = CacheKey.new(env, ignore_params: ignore_params)
24
27
  logger&.info("#{env.method.upcase} #{env.url} (#{cache.status(cache_key)})")
28
+ env[:httpdisk_diskpath] = cache.diskpath(cache_key)
25
29
 
26
- if cached_response = read(cache_key, env)
27
- cached_response.env[:httpdisk] = true
28
- return cached_response
29
- end
30
-
31
- # miss
32
- perform(env).tap do |response|
30
+ # check cache, fallback to network
31
+ if response = read(cache_key, env)
32
+ response.env[:httpdisk] = true
33
+ else
34
+ response = perform(env)
33
35
  response.env[:httpdisk] = false
34
36
  write(cache_key, env, response)
35
37
  end
38
+
39
+ encode_body(response)
40
+ response
36
41
  end
37
42
 
38
43
  # Returns cache status for this request
@@ -100,10 +105,68 @@ module HTTPDisk
100
105
  err.to_s =~ /#{proxy.host}.*#{proxy.port}/
101
106
  end
102
107
 
108
+ # Set string encoding for response body. The cache always returns
109
+ # ASCII-8BIT, but we have no idea what the encoding will be from the
110
+ # network. Not all adapters honor Content-Type (including the default
111
+ # adapter).
112
+ def encode_body(response)
113
+ body = response.body || ''
114
+
115
+ # parse Content-Type
116
+ begin
117
+ content_type = response['Content-Type'] && ContentType.parse(response['Content-Type'])
118
+ rescue Parslet::ParseFailed
119
+ # unparsable
120
+ end
121
+
122
+ # look at charset and set body encoding if necessary
123
+ encoding = encoding_for(content_type)
124
+ if body.encoding != encoding
125
+ body = body.dup if body.frozen?
126
+ body.force_encoding(encoding)
127
+ end
128
+
129
+ # if :utf8, force body to UTF-8
130
+ if options[:utf8] && content_type && response_text?(content_type)
131
+ body = body.dup if body.frozen?
132
+ begin
133
+ body.encode!('UTF-8', invalid: :replace, undef: :replace, replace: '?')
134
+ rescue Encoding::ConverterNotFoundError
135
+ # rare, can't do anything here
136
+ body = "httpdisk could not convert from #{body.encoding.name} to UTF-8"
137
+ end
138
+ end
139
+
140
+ response.env[:body] = body
141
+ end
142
+
143
+ def encoding_for(content_type)
144
+ if content_type&.charset
145
+ begin
146
+ return Encoding.find(content_type.charset)
147
+ rescue ArgumentError
148
+ # unknown charset
149
+ end
150
+ end
151
+ Encoding::ASCII_8BIT
152
+ end
153
+
154
+ def response_text?(content_type)
155
+ content_type.type == 'text' || content_type.mime_type == 'application/json'
156
+ end
157
+
158
+ #
159
+ # options
160
+ #
161
+
162
+ def ignore_params
163
+ @ignore_params ||= options[:ignore_params].map { CGI.escape(_1.to_s) }.to_set
164
+ end
165
+
103
166
  def logger
104
- return @logger if defined?(@logger)
167
+ return if !options[:logger]
105
168
 
106
- @logger = case options[:logger]
169
+ @logger ||= case options[:logger]
107
170
  when true then Logger.new($stderr)
108
171
  when Logger then options[:logger]
109
172
  end
@@ -1,3 +1,7 @@
1
1
  module HTTPDisk
2
+ ERROR_STATUS = 999
3
+
2
4
  class CliError < StandardError; end
5
+
6
+ class InvalidUrl < StandardError; end
3
7
  end
@@ -0,0 +1,35 @@
1
+ # manually load dependencies here since this is loaded standalone by bin
2
+ require 'httpdisk/version'
3
+ require 'slop'
4
+
5
+ module HTTPDisk
6
+ module Grep
7
+ module Args
8
+ # Slop parsing. This is broken out so we can run without require 'httpdisk'.
9
+ def self.slop(args)
10
+ slop = Slop.parse(args) do |o|
11
+ o.banner = 'httpdisk-grep [options] pattern [path ...]'
12
+ o.boolean '-c', '--count', 'suppress normal output and show count'
13
+ o.boolean '-h', '--head', 'show req headers before each match'
14
+ o.boolean '-s', '--silent', 'do not print anything to stdout'
15
+ o.boolean '--version', 'show version' do
16
+ puts "httpdisk-grep #{HTTPDisk::VERSION}"
17
+ exit
18
+ end
19
+ o.on '--help', 'show this help' do
20
+ puts o
21
+ exit
22
+ end
23
+ end
24
+
25
+ raise Slop::Error, '' if args.empty?
26
+ raise Slop::Error, 'no PATTERN specified' if slop.args.empty?
27
+
28
+ slop.to_h.tap do
29
+ _1[:pattern] = slop.args.shift
30
+ _1[:roots] = slop.args
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,112 @@
1
+ require 'find'
2
+ require 'json'
3
+
4
+ module HTTPDisk
5
+ module Grep
6
+ class Main
7
+ attr_reader :options, :success
8
+
9
+ def initialize(options)
10
+ @options = options
11
+ end
12
+
13
+ # Enumerate file paths one at a time. Returns true if matches were found.
14
+ def run
15
+ paths.each do
16
+ begin
17
+ run_one(_1)
18
+ rescue StandardError => e
19
+ if ENV['HTTPDISK_DEBUG']
20
+ $stderr.puts
21
+ $stderr.puts e.class
22
+ $stderr.puts e.backtrace.join("\n")
23
+ end
24
+ raise CliError, "#{e.message[0, 70]} (#{_1})"
25
+ end
26
+ end
27
+ success
28
+ end
29
+
30
+ def run_one(path)
31
+ # read payload & body
32
+ payload = Zlib::GzipReader.open(path, encoding: 'ASCII-8BIT') do
33
+ Payload.read(_1)
34
+ end
35
+ body = prepare_body(payload)
36
+
37
+ # collect all_matches
38
+ all_matches = body.each_line.map do |line|
39
+ [].tap do |matches|
40
+ line.scan(pattern) { matches << Regexp.last_match }
41
+ end
42
+ end.reject(&:empty?)
43
+ return if all_matches.empty?
44
+
45
+ # print
46
+ @success = true
47
+ printer.print(path, payload, all_matches)
48
+ end
49
+
50
+ # file paths to be searched
51
+ def paths
52
+ # roots
53
+ roots = options[:roots]
54
+ roots = ['.'] if roots.empty?
55
+
56
+ # find files in roots
57
+ paths = roots.flat_map { Find.find(_1).to_a }.sort
58
+ paths = paths.select { File.file?(_1) }
59
+
60
+ # strip default './'
61
+ paths = paths.map { _1.gsub(%r{^\./}, '') } if options[:roots].empty?
62
+ paths
63
+ end
64
+
65
+ # convert raw body into something palatable for pattern matching
66
+ def prepare_body(payload)
67
+ body = payload.body
68
+
69
+ if content_type = payload.headers['Content-Type']
70
+ # Mismatches between Content-Type and body.encoding are fatal, so make
71
+ # an effort to align them.
72
+ if charset = content_type[/charset=([^;]+)/, 1]
73
+ encoding = begin
74
+ Encoding.find(charset)
75
+ rescue StandardError
76
+ nil
77
+ end
78
+ if encoding && body.encoding != encoding
79
+ body.force_encoding(encoding)
80
+ end
81
+ end
82
+
83
+ # pretty print json for easier searching
84
+ if content_type =~ /\bjson\b/
85
+ body = JSON.pretty_generate(JSON.parse(body))
86
+ end
87
+ end
88
+
89
+ body
90
+ end
91
+
92
+ # regex pattern from options
93
+ def pattern
94
+ @pattern ||= Regexp.new(options[:pattern], Regexp::IGNORECASE)
95
+ end
96
+
97
+ # printer for output
98
+ def printer
99
+ @printer ||= case
100
+ when options[:silent]
101
+ Grep::SilentPrinter.new
102
+ when options[:count]
103
+ Grep::CountPrinter.new($stdout)
104
+ when options[:head] || $stdout.tty?
105
+ Grep::HeaderPrinter.new($stdout, options[:head])
106
+ else
107
+ Grep::TersePrinter.new($stdout)
108
+ end
109
+ end
110
+ end
111
+ end
112
+ end