httpdisk 0.2.0 → 0.5.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -4,14 +4,14 @@ require 'uri'
4
4
 
5
5
  module HTTPDisk
6
6
  class CacheKey
7
- attr_reader :env
7
+ attr_reader :env, :ignore_params
8
8
 
9
- def initialize(env)
10
- @env = env
9
+ def initialize(env, ignore_params: [])
10
+ @env, @ignore_params = env, ignore_params
11
11
 
12
12
  # sanity checks
13
- raise 'http/https required' if env.url.scheme !~ /^https?$/
14
- raise 'hostname required' if !env.url.host
13
+ raise InvalidUrl, "http/https required #{env.url.inspect}" if env.url.scheme !~ /^https?$/
14
+ raise InvalidUrl, "hostname required #{env.url.inspect}" if !env.url.host
15
15
  end
16
16
 
17
17
  def url
@@ -79,7 +79,16 @@ module HTTPDisk
79
79
 
80
80
  # Calculate canonical key for a query
81
81
  def querykey(q)
82
- q.split('&').sort.join('&')
82
+ parts = q.split('&').sort
83
+ if !ignore_params.empty?
84
+ parts = parts.map do |part|
85
+ key, value = part.split('=', 2)
86
+ next if ignore_params.include?(key)
87
+
88
+ "#{key}=#{value}"
89
+ end.compact
90
+ end
91
+ parts.join('&')
83
92
  end
84
93
 
85
94
  def default_port?
@@ -0,0 +1,57 @@
1
+ # manually load dependencies here since this is loaded standalone by bin
2
+ require 'httpdisk/error'
3
+ require 'httpdisk/slop_duration'
4
+ require 'httpdisk/version'
5
+ require 'slop'
6
+
7
+ module HTTPDisk
8
+ module Cli
9
+ # Slop parsing. This is broken out so we can run without require 'httpdisk'.
10
+ module Args
11
+ def self.slop(args)
12
+ slop = Slop.parse(args) do |o|
13
+ o.banner = 'httpdisk [options] [url]'
14
+
15
+ # similar to curl
16
+ o.separator 'Similar to curl:'
17
+ o.string '-d', '--data', 'HTTP POST data'
18
+ o.array '-H', '--header', 'pass custom header(s) to server', delimiter: nil
19
+ o.boolean '-i', '--include', 'include response headers in the output'
20
+ o.integer '-m', '--max-time', 'maximum time allowed for the transfer'
21
+ o.string '-o', '--output', 'write to file instead of stdout'
22
+ o.string '-x', '--proxy', 'use host[:port] as proxy'
23
+ o.string '-X', '--request', 'HTTP method to use'
24
+ o.integer '--retry', 'retry request if problems occur'
25
+ o.boolean '-s', '--silent', "silent mode (don't print errors)"
26
+ o.string '-A', '--user-agent', 'send User-Agent to server'
27
+
28
+ # from httpdisk
29
+ o.separator 'Specific to httpdisk:'
30
+ o.string '--dir', 'httpdisk cache directory (defaults to ~/httpdisk)'
31
+ o.duration '--expires', 'when to expire cached requests (ex: 1h, 2d, 3w)'
32
+ o.boolean '--force', "don't read anything from cache (but still write)"
33
+ o.boolean '--force-errors', "don't read errors from cache (but still write)"
34
+ o.boolean '--status', 'show status for a url in the cache'
35
+
36
+ # generic
37
+ o.boolean '--version', 'show version' do
38
+ puts "httpdisk #{HTTPDisk::VERSION}"
39
+ exit
40
+ end
41
+ o.on '--help', 'show this help' do
42
+ puts o
43
+ exit
44
+ end
45
+ end
46
+
47
+ raise Slop::Error, '' if args.empty?
48
+ raise Slop::Error, 'no URL specified' if slop.args.empty?
49
+ raise Slop::Error, 'more than one URL specified' if slop.args.length > 1
50
+
51
+ slop.to_h.tap do
52
+ _1[:url] = slop.args.first
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,169 @@
1
+ require 'faraday-cookie_jar'
2
+ require 'faraday_middleware'
3
+ require 'ostruct'
4
+
5
+ module HTTPDisk
6
+ module Cli
7
+ # Command line httpdisk command.
8
+ class Main
9
+ attr_reader :options
10
+
11
+ def initialize(options)
12
+ @options = options
13
+ end
14
+
15
+ # Make the request (or print status)
16
+ def run
17
+ # short circuit --status
18
+ if options[:status]
19
+ status
20
+ return
21
+ end
22
+
23
+ # create Faraday client
24
+ faraday = create_faraday
25
+
26
+ # run request
27
+ response = faraday.run_request(request_method, request_url, request_body, request_headers)
28
+ if response.status >= 400
29
+ raise CliError, "the requested URL returned error: #{response.status} #{response.reason_phrase}"
30
+ end
31
+
32
+ # output
33
+ if options[:output]
34
+ File.open(options[:output], 'w') { output(response, _1) }
35
+ else
36
+ output(response, $stdout)
37
+ end
38
+ end
39
+
40
+ def create_faraday
41
+ Faraday.new do
42
+ # connection settings
43
+ _1.proxy = options[:proxy] if options[:proxy]
44
+ _1.options.timeout = options[:max_time] if options[:max_time]
45
+
46
+ # cookie middleware
47
+ _1.use :cookie_jar
48
+
49
+ # BEFORE httpdisk so each redirect segment is cached
50
+ _1.response :follow_redirects
51
+
52
+ # httpdisk
53
+ _1.use :httpdisk, client_options
54
+
55
+ # AFTER httpdisk so transient failures are not cached
56
+ if options[:retry]
57
+ # we have a very liberal retry policy
58
+ retry_options = {
59
+ max: options[:retry],
60
+ methods: %w[delete get head options patch post put trace],
61
+ retry_statuses: (500..600).to_a,
62
+ retry_if: ->(_env, _err) { true },
63
+ }
64
+ _1.request :retry, retry_options
65
+ end
66
+ end
67
+ end
68
+
69
+ # Support for --status
70
+ def status
71
+ # build env
72
+ env = Faraday::Env.new.tap do
73
+ _1.method = request_method
74
+ _1.request_body = request_body
75
+ _1.request_headers = request_headers
76
+ # Run the url through Faraday to make sure we see the same stuff as middleware.
77
+ _1.url = Faraday.new.build_url(request_url)
78
+ end
79
+
80
+ # now print status
81
+ client = HTTPDisk::Client.new(nil, client_options)
82
+ client.status(env).each do
83
+ puts "#{_1}: #{_2.inspect}"
84
+ end
85
+ end
86
+
87
+ # Output response to f
88
+ def output(response, f)
89
+ if options[:include]
90
+ f.puts "HTTPDISK #{response.status} #{response.reason_phrase}"
91
+ response.headers.each { f.puts("#{_1}: #{_2}") }
92
+ f.puts
93
+ end
94
+ f.write(response.body)
95
+ end
96
+
97
+ #
98
+ # request_XXX
99
+ #
100
+
101
+ # HTTP method (get, post, etc.)
102
+ def request_method
103
+ method = if options[:request]
104
+ options[:request]
105
+ elsif options[:data]
106
+ 'post'
107
+ end
108
+ method ||= 'get'
109
+ method = method.downcase.to_sym
110
+
111
+ if !Faraday::Connection::METHODS.include?(method)
112
+ raise CliError, "invalid --request #{method.inspect}"
113
+ end
114
+
115
+ method
116
+ end
117
+
118
+ # Request url
119
+ def request_url
120
+ url = options[:url]
121
+ # recover from missing http:
122
+ if url !~ %r{^https?://}i
123
+ if url =~ %r{^\w+://}
124
+ raise CliError, 'only http/https supported'
125
+ end
126
+
127
+ url = "http://#{url}"
128
+ end
129
+ URI.parse(url)
130
+ rescue URI::InvalidURIError
131
+ raise CliError, "invalid url #{url.inspect}"
132
+ end
133
+
134
+ # Request body
135
+ def request_body
136
+ options[:data]
137
+ end
138
+
139
+ # Request headers
140
+ def request_headers
141
+ {}.tap do |headers|
142
+ if options[:user_agent]
143
+ headers['User-Agent'] = options[:user_agent]
144
+ end
145
+
146
+ options[:header].each do |header|
147
+ key, value = header.split(': ', 2)
148
+ if !key || !value || key.empty? || value.empty?
149
+ raise CliError, "invalid --header #{header.inspect}"
150
+ end
151
+
152
+ headers[key] = value
153
+ end
154
+ end
155
+ end
156
+
157
+ #
158
+ # helpers
159
+ #
160
+
161
+ # Options to HTTPDisk::Client
162
+ def client_options
163
+ client_options = options.slice(:dir, :expires, :force, :force_errors)
164
+ client_options[:utf8] = true
165
+ client_options
166
+ end
167
+ end
168
+ end
169
+ end
@@ -1,38 +1,43 @@
1
+ require 'content-type'
1
2
  require 'faraday'
2
3
  require 'logger'
3
4
 
4
5
  module HTTPDisk
5
- OPTIONS = {
6
- dir: File.join(ENV['HOME'], 'httpdisk'),
7
- expires_in: nil,
8
- force: false,
9
- force_errors: false,
10
- logger: false,
11
- }.freeze
12
-
13
6
  # Middleware and main entry point.
14
7
  class Client < Faraday::Middleware
15
8
  attr_reader :cache, :options
16
9
 
17
10
  def initialize(app, options = {})
18
- super(app, options = OPTIONS.merge(options.compact))
11
+ options = Sloptions.parse(options) do
12
+ _1.string :dir, default: File.join(ENV['HOME'], 'httpdisk')
13
+ _1.integer :expires
14
+ _1.boolean :force
15
+ _1.boolean :force_errors
16
+ _1.array :ignore_params, default: []
17
+ _1.on :logger, type: [:boolean, Logger]
18
+ _1.boolean :utf8
19
+ end
20
+
21
+ super(app, options)
19
22
  @cache = Cache.new(options)
20
23
  end
21
24
 
22
25
  def call(env)
23
- cache_key = CacheKey.new(env)
26
+ cache_key = CacheKey.new(env, ignore_params: ignore_params)
24
27
  logger&.info("#{env.method.upcase} #{env.url} (#{cache.status(cache_key)})")
28
+ env[:httpdisk_diskpath] = cache.diskpath(cache_key)
25
29
 
26
- if cached_response = read(cache_key, env)
27
- cached_response.env[:httpdisk] = true
28
- return cached_response
29
- end
30
-
31
- # miss
32
- perform(env).tap do |response|
30
+ # check cache, fallback to network
31
+ if response = read(cache_key, env)
32
+ response.env[:httpdisk] = true
33
+ else
34
+ response = perform(env)
33
35
  response.env[:httpdisk] = false
34
36
  write(cache_key, env, response)
35
37
  end
38
+
39
+ encode_body(response)
40
+ response
36
41
  end
37
42
 
38
43
  # Returns cache status for this request
@@ -100,10 +105,68 @@ module HTTPDisk
100
105
  err.to_s =~ /#{proxy.host}.*#{proxy.port}/
101
106
  end
102
107
 
108
+ # Set string encoding for response body. The cache always returns
109
+ # ASCII-8BIT, but we have no idea what the encoding will be from the
110
+ # network. Not all adapters honor Content-Type (including the default
111
+ # adapter).
112
+ def encode_body(response)
113
+ body = response.body || ''
114
+
115
+ # parse Content-Type
116
+ begin
117
+ content_type = response['Content-Type'] && ContentType.parse(response['Content-Type'])
118
+ rescue Parslet::ParseFailed
119
+ # unparsable
120
+ end
121
+
122
+ # look at charset and set body encoding if necessary
123
+ encoding = encoding_for(content_type)
124
+ if body.encoding != encoding
125
+ body = body.dup if body.frozen?
126
+ body.force_encoding(encoding)
127
+ end
128
+
129
+ # if :utf8, force body to UTF-8
130
+ if options[:utf8] && content_type && response_text?(content_type)
131
+ body = body.dup if body.frozen?
132
+ begin
133
+ body.encode!('UTF-8', invalid: :replace, undef: :replace, replace: '?')
134
+ rescue Encoding::ConverterNotFoundError
135
+ # rare, can't do anything here
136
+ body = "httpdisk could not convert from #{body.encoding.name} to UTF-8"
137
+ end
138
+ end
139
+
140
+ response.env[:body] = body
141
+ end
142
+
143
+ def encoding_for(content_type)
144
+ if content_type&.charset
145
+ begin
146
+ return Encoding.find(content_type.charset)
147
+ rescue ArgumentError
148
+ # unknown charset
149
+ end
150
+ end
151
+ Encoding::ASCII_8BIT
152
+ end
153
+
154
+ def response_text?(content_type)
155
+ content_type.type == 'text' || content_type.mime_type == 'application/json'
156
+ end
157
+
158
+ #
159
+ # options
160
+ #
161
+
162
+ def ignore_params
163
+ @ignore_params ||= options[:ignore_params].map { CGI.escape(_1.to_s) }.to_set
164
+ end
165
+
103
166
  def logger
104
- return @logger if defined?(@logger)
167
+ return if !options[:logger]
105
168
 
106
- @logger = case options[:logger]
169
+ @logger ||= case options[:logger]
107
170
  when true then Logger.new($stderr)
108
171
  when Logger then options[:logger]
109
172
  end
@@ -1,3 +1,7 @@
1
1
  module HTTPDisk
2
+ ERROR_STATUS = 999
3
+
2
4
  class CliError < StandardError; end
5
+
6
+ class InvalidUrl < StandardError; end
3
7
  end
@@ -0,0 +1,35 @@
1
+ # manually load dependencies here since this is loaded standalone by bin
2
+ require 'httpdisk/version'
3
+ require 'slop'
4
+
5
+ module HTTPDisk
6
+ module Grep
7
+ module Args
8
+ # Slop parsing. This is broken out so we can run without require 'httpdisk'.
9
+ def self.slop(args)
10
+ slop = Slop.parse(args) do |o|
11
+ o.banner = 'httpdisk-grep [options] pattern [path ...]'
12
+ o.boolean '-c', '--count', 'suppress normal output and show count'
13
+ o.boolean '-h', '--head', 'show req headers before each match'
14
+ o.boolean '-s', '--silent', 'do not print anything to stdout'
15
+ o.boolean '--version', 'show version' do
16
+ puts "httpdisk-grep #{HTTPDisk::VERSION}"
17
+ exit
18
+ end
19
+ o.on '--help', 'show this help' do
20
+ puts o
21
+ exit
22
+ end
23
+ end
24
+
25
+ raise Slop::Error, '' if args.empty?
26
+ raise Slop::Error, 'no PATTERN specified' if slop.args.empty?
27
+
28
+ slop.to_h.tap do
29
+ _1[:pattern] = slop.args.shift
30
+ _1[:roots] = slop.args
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,112 @@
1
+ require 'find'
2
+ require 'json'
3
+
4
+ module HTTPDisk
5
+ module Grep
6
+ class Main
7
+ attr_reader :options, :success
8
+
9
+ def initialize(options)
10
+ @options = options
11
+ end
12
+
13
+ # Enumerate file paths one at a time. Returns true if matches were found.
14
+ def run
15
+ paths.each do
16
+ begin
17
+ run_one(_1)
18
+ rescue StandardError => e
19
+ if ENV['HTTPDISK_DEBUG']
20
+ $stderr.puts
21
+ $stderr.puts e.class
22
+ $stderr.puts e.backtrace.join("\n")
23
+ end
24
+ raise CliError, "#{e.message[0, 70]} (#{_1})"
25
+ end
26
+ end
27
+ success
28
+ end
29
+
30
+ def run_one(path)
31
+ # read payload & body
32
+ payload = Zlib::GzipReader.open(path, encoding: 'ASCII-8BIT') do
33
+ Payload.read(_1)
34
+ end
35
+ body = prepare_body(payload)
36
+
37
+ # collect all_matches
38
+ all_matches = body.each_line.map do |line|
39
+ [].tap do |matches|
40
+ line.scan(pattern) { matches << Regexp.last_match }
41
+ end
42
+ end.reject(&:empty?)
43
+ return if all_matches.empty?
44
+
45
+ # print
46
+ @success = true
47
+ printer.print(path, payload, all_matches)
48
+ end
49
+
50
+ # file paths to be searched
51
+ def paths
52
+ # roots
53
+ roots = options[:roots]
54
+ roots = ['.'] if roots.empty?
55
+
56
+ # find files in roots
57
+ paths = roots.flat_map { Find.find(_1).to_a }.sort
58
+ paths = paths.select { File.file?(_1) }
59
+
60
+ # strip default './'
61
+ paths = paths.map { _1.gsub(%r{^\./}, '') } if options[:roots].empty?
62
+ paths
63
+ end
64
+
65
+ # convert raw body into something palatable for pattern matching
66
+ def prepare_body(payload)
67
+ body = payload.body
68
+
69
+ if content_type = payload.headers['Content-Type']
70
+ # Mismatches between Content-Type and body.encoding are fatal, so make
71
+ # an effort to align them.
72
+ if charset = content_type[/charset=([^;]+)/, 1]
73
+ encoding = begin
74
+ Encoding.find(charset)
75
+ rescue StandardError
76
+ nil
77
+ end
78
+ if encoding && body.encoding != encoding
79
+ body.force_encoding(encoding)
80
+ end
81
+ end
82
+
83
+ # pretty print json for easier searching
84
+ if content_type =~ /\bjson\b/
85
+ body = JSON.pretty_generate(JSON.parse(body))
86
+ end
87
+ end
88
+
89
+ body
90
+ end
91
+
92
+ # regex pattern from options
93
+ def pattern
94
+ @pattern ||= Regexp.new(options[:pattern], Regexp::IGNORECASE)
95
+ end
96
+
97
+ # printer for output
98
+ def printer
99
+ @printer ||= case
100
+ when options[:silent]
101
+ Grep::SilentPrinter.new
102
+ when options[:count]
103
+ Grep::CountPrinter.new($stdout)
104
+ when options[:head] || $stdout.tty?
105
+ Grep::HeaderPrinter.new($stdout, options[:head])
106
+ else
107
+ Grep::TersePrinter.new($stdout)
108
+ end
109
+ end
110
+ end
111
+ end
112
+ end