httpdisk 0.1.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,14 +4,14 @@ require 'uri'
4
4
 
5
5
  module HTTPDisk
6
6
  class CacheKey
7
- attr_reader :env
7
+ attr_reader :env, :ignore_params
8
8
 
9
- def initialize(env)
10
- @env = env
9
+ def initialize(env, ignore_params: [])
10
+ @env, @ignore_params = env, ignore_params
11
11
 
12
12
  # sanity checks
13
- raise 'http/https required' if env.url.scheme !~ /^https?$/
14
- raise 'hostname required' if !env.url.host
13
+ raise InvalidUrl, "http/https required #{env.url.inspect}" if env.url.scheme !~ /^https?$/
14
+ raise InvalidUrl, "hostname required #{env.url.inspect}" if !env.url.host
15
15
  end
16
16
 
17
17
  def url
@@ -68,10 +68,9 @@ module HTTPDisk
68
68
  # Calculate cache key segment for body
69
69
  def bodykey
70
70
  body = env.request_body.to_s
71
- case
72
- when env.request_headers['Content-Type'] == 'application/x-www-form-urlencoded'
71
+ if env.request_headers['Content-Type'] == 'application/x-www-form-urlencoded'
73
72
  querykey(body)
74
- when body.length < 50
73
+ elsif body.length < 50
75
74
  body
76
75
  else
77
76
  Digest::MD5.hexdigest(body)
@@ -80,7 +79,16 @@ module HTTPDisk
80
79
 
81
80
  # Calculate canonical key for a query
82
81
  def querykey(q)
83
- q.split('&').sort.join('&')
82
+ parts = q.split('&').sort
83
+ if !ignore_params.empty?
84
+ parts = parts.map do |part|
85
+ key, value = part.split('=', 2)
86
+ next if ignore_params.include?(key)
87
+
88
+ "#{key}=#{value}"
89
+ end.compact
90
+ end
91
+ parts.join('&')
84
92
  end
85
93
 
86
94
  def default_port?
@@ -0,0 +1,57 @@
1
+ # manually load dependencies here since this is loaded standalone by bin
2
+ require 'httpdisk/error'
3
+ require 'httpdisk/slop_duration'
4
+ require 'httpdisk/version'
5
+ require 'slop'
6
+
7
+ module HTTPDisk
8
+ module Cli
9
+ # Slop parsing. This is broken out so we can run without require 'httpdisk'.
10
+ module Args
11
+ def self.slop(args)
12
+ slop = Slop.parse(args) do |o|
13
+ o.banner = 'httpdisk [options] [url]'
14
+
15
+ # similar to curl
16
+ o.separator 'Similar to curl:'
17
+ o.string '-d', '--data', 'HTTP POST data'
18
+ o.array '-H', '--header', 'pass custom header(s) to server', delimiter: nil
19
+ o.boolean '-i', '--include', 'include response headers in the output'
20
+ o.integer '-m', '--max-time', 'maximum time allowed for the transfer'
21
+ o.string '-o', '--output', 'write to file instead of stdout'
22
+ o.string '-x', '--proxy', 'use host[:port] as proxy'
23
+ o.string '-X', '--request', 'HTTP method to use'
24
+ o.integer '--retry', 'retry request if problems occur'
25
+ o.boolean '-s', '--silent', "silent mode (don't print errors)"
26
+ o.string '-A', '--user-agent', 'send User-Agent to server'
27
+
28
+ # from httpdisk
29
+ o.separator 'Specific to httpdisk:'
30
+ o.string '--dir', 'httpdisk cache directory (defaults to ~/httpdisk)'
31
+ o.duration '--expires', 'when to expire cached requests (ex: 1h, 2d, 3w)'
32
+ o.boolean '--force', "don't read anything from cache (but still write)"
33
+ o.boolean '--force-errors', "don't read errors from cache (but still write)"
34
+ o.boolean '--status', 'show status for a url in the cache'
35
+
36
+ # generic
37
+ o.boolean '--version', 'show version' do
38
+ puts "httpdisk #{HTTPDisk::VERSION}"
39
+ exit
40
+ end
41
+ o.on '--help', 'show this help' do
42
+ puts o
43
+ exit
44
+ end
45
+ end
46
+
47
+ raise Slop::Error, '' if args.empty?
48
+ raise Slop::Error, 'no URL specified' if slop.args.empty?
49
+ raise Slop::Error, 'more than one URL specified' if slop.args.length > 1
50
+
51
+ slop.to_h.tap do
52
+ _1[:url] = slop.args.first
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,169 @@
1
+ require 'faraday-cookie_jar'
2
+ require 'faraday_middleware'
3
+ require 'ostruct'
4
+
5
+ module HTTPDisk
6
+ module Cli
7
+ # Command line httpdisk command.
8
+ class Main
9
+ attr_reader :options
10
+
11
+ def initialize(options)
12
+ @options = options
13
+ end
14
+
15
+ # Make the request (or print status)
16
+ def run
17
+ # short circuit --status
18
+ if options[:status]
19
+ status
20
+ return
21
+ end
22
+
23
+ # create Faraday client
24
+ faraday = create_faraday
25
+
26
+ # run request
27
+ response = faraday.run_request(request_method, request_url, request_body, request_headers)
28
+ if response.status >= 400
29
+ raise CliError, "the requested URL returned error: #{response.status} #{response.reason_phrase}"
30
+ end
31
+
32
+ # output
33
+ if options[:output]
34
+ File.open(options[:output], 'w') { output(response, _1) }
35
+ else
36
+ output(response, $stdout)
37
+ end
38
+ end
39
+
40
+ def create_faraday
41
+ Faraday.new do
42
+ # connection settings
43
+ _1.proxy = options[:proxy] if options[:proxy]
44
+ _1.options.timeout = options[:max_time] if options[:max_time]
45
+
46
+ # cookie middleware
47
+ _1.use :cookie_jar
48
+
49
+ # BEFORE httpdisk so each redirect segment is cached
50
+ _1.response :follow_redirects
51
+
52
+ # httpdisk
53
+ _1.use :httpdisk, client_options
54
+
55
+ # AFTER httpdisk so transient failures are not cached
56
+ if options[:retry]
57
+ # we have a very liberal retry policy
58
+ retry_options = {
59
+ max: options[:retry],
60
+ methods: %w[delete get head options patch post put trace],
61
+ retry_statuses: (500..600).to_a,
62
+ retry_if: ->(_env, _err) { true },
63
+ }
64
+ _1.request :retry, retry_options
65
+ end
66
+ end
67
+ end
68
+
69
+ # Support for --status
70
+ def status
71
+ # build env
72
+ env = Faraday::Env.new.tap do
73
+ _1.method = request_method
74
+ _1.request_body = request_body
75
+ _1.request_headers = request_headers
76
+ # Run the url through Faraday to make sure we see the same stuff as middleware.
77
+ _1.url = Faraday.new.build_url(request_url)
78
+ end
79
+
80
+ # now print status
81
+ client = HTTPDisk::Client.new(nil, client_options)
82
+ client.status(env).each do
83
+ puts "#{_1}: #{_2.inspect}"
84
+ end
85
+ end
86
+
87
+ # Output response to f
88
+ def output(response, f)
89
+ if options[:include]
90
+ f.puts "HTTPDISK #{response.status} #{response.reason_phrase}"
91
+ response.headers.each { f.puts("#{_1}: #{_2}") }
92
+ f.puts
93
+ end
94
+ f.write(response.body)
95
+ end
96
+
97
+ #
98
+ # request_XXX
99
+ #
100
+
101
+ # HTTP method (get, post, etc.)
102
+ def request_method
103
+ method = if options[:request]
104
+ options[:request]
105
+ elsif options[:data]
106
+ 'post'
107
+ end
108
+ method ||= 'get'
109
+ method = method.downcase.to_sym
110
+
111
+ if !Faraday::Connection::METHODS.include?(method)
112
+ raise CliError, "invalid --request #{method.inspect}"
113
+ end
114
+
115
+ method
116
+ end
117
+
118
+ # Request url
119
+ def request_url
120
+ url = options[:url]
121
+ # recover from missing http:
122
+ if url !~ %r{^https?://}i
123
+ if url =~ %r{^\w+://}
124
+ raise CliError, 'only http/https supported'
125
+ end
126
+
127
+ url = "http://#{url}"
128
+ end
129
+ URI.parse(url)
130
+ rescue URI::InvalidURIError
131
+ raise CliError, "invalid url #{url.inspect}"
132
+ end
133
+
134
+ # Request body
135
+ def request_body
136
+ options[:data]
137
+ end
138
+
139
+ # Request headers
140
+ def request_headers
141
+ {}.tap do |headers|
142
+ if options[:user_agent]
143
+ headers['User-Agent'] = options[:user_agent]
144
+ end
145
+
146
+ options[:header].each do |header|
147
+ key, value = header.split(': ', 2)
148
+ if !key || !value || key.empty? || value.empty?
149
+ raise CliError, "invalid --header #{header.inspect}"
150
+ end
151
+
152
+ headers[key] = value
153
+ end
154
+ end
155
+ end
156
+
157
+ #
158
+ # helpers
159
+ #
160
+
161
+ # Options to HTTPDisk::Client
162
+ def client_options
163
+ client_options = options.slice(:dir, :expires, :force, :force_errors)
164
+ client_options[:utf8] = true
165
+ client_options
166
+ end
167
+ end
168
+ end
169
+ end
@@ -1,34 +1,43 @@
1
+ require 'content-type'
1
2
  require 'faraday'
3
+ require 'logger'
2
4
 
3
5
  module HTTPDisk
4
- OPTIONS = {
5
- dir: File.join(ENV['HOME'], 'httpdisk'),
6
- expires_in: nil,
7
- force: false,
8
- force_errors: false,
9
- }.freeze
10
-
11
6
  # Middleware and main entry point.
12
7
  class Client < Faraday::Middleware
13
8
  attr_reader :cache, :options
14
9
 
15
10
  def initialize(app, options = {})
16
- super(app, options = OPTIONS.merge(options.compact))
11
+ options = Sloptions.parse(options) do
12
+ _1.string :dir, default: File.join(ENV['HOME'], 'httpdisk')
13
+ _1.integer :expires
14
+ _1.boolean :force
15
+ _1.boolean :force_errors
16
+ _1.array :ignore_params, default: []
17
+ _1.on :logger, type: [:boolean, Logger]
18
+ _1.boolean :utf8
19
+ end
20
+
21
+ super(app, options)
17
22
  @cache = Cache.new(options)
18
23
  end
19
24
 
20
25
  def call(env)
21
- cache_key = CacheKey.new(env)
22
-
23
- # hit?
24
- if cached_response = read(cache_key, env)
25
- return cached_response
26
- end
27
-
28
- # miss
29
- perform(cache_key, env).tap do |response|
26
+ cache_key = CacheKey.new(env, ignore_params: ignore_params)
27
+ logger&.info("#{env.method.upcase} #{env.url} (#{cache.status(cache_key)})")
28
+ env[:httpdisk_diskpath] = cache.diskpath(cache_key)
29
+
30
+ # check cache, fallback to network
31
+ if response = read(cache_key, env)
32
+ response.env[:httpdisk] = true
33
+ else
34
+ response = perform(env)
35
+ response.env[:httpdisk] = false
30
36
  write(cache_key, env, response)
31
37
  end
38
+
39
+ encode_body(response)
40
+ response
32
41
  end
33
42
 
34
43
  # Returns cache status for this request
@@ -46,7 +55,7 @@ module HTTPDisk
46
55
  protected
47
56
 
48
57
  # perform the request, return Faraday::Response
49
- def perform(cache_key, env)
58
+ def perform(env)
50
59
  app.call(env)
51
60
  rescue Faraday::ConnectionFailed, Faraday::SSLError, Faraday::TimeoutError => e
52
61
  # try to avoid caching proxy errors
@@ -95,6 +104,73 @@ module HTTPDisk
95
104
 
96
105
  err.to_s =~ /#{proxy.host}.*#{proxy.port}/
97
106
  end
107
+
108
+ # Set string encoding for response body. The cache always returns
109
+ # ASCII-8BIT, but we have no idea what the encoding will be from the
110
+ # network. Not all adapters honor Content-Type (including the default
111
+ # adapter).
112
+ def encode_body(response)
113
+ body = response.body || ''
114
+
115
+ # parse Content-Type
116
+ begin
117
+ content_type = response['Content-Type'] && ContentType.parse(response['Content-Type'])
118
+ rescue Parslet::ParseFailed
119
+ # unparsable
120
+ end
121
+
122
+ # look at charset and set body encoding if necessary
123
+ encoding = encoding_for(content_type)
124
+ if body.encoding != encoding
125
+ body = body.dup if body.frozen?
126
+ body.force_encoding(encoding)
127
+ end
128
+
129
+ # if :utf8, force body to UTF-8
130
+ if options[:utf8] && content_type && response_text?(content_type)
131
+ body = body.dup if body.frozen?
132
+ begin
133
+ body.encode!('UTF-8', invalid: :replace, undef: :replace, replace: '?')
134
+ rescue Encoding::ConverterNotFoundError
135
+ # rare, can't do anything here
136
+ body = "httpdisk could not convert from #{body.encoding.name} to UTF-8"
137
+ end
138
+ end
139
+
140
+ response.env[:body] = body
141
+ end
142
+
143
+ def encoding_for(content_type)
144
+ if content_type&.charset
145
+ begin
146
+ return Encoding.find(content_type.charset)
147
+ rescue ArgumentError
148
+ # unknown charset
149
+ end
150
+ end
151
+ Encoding::ASCII_8BIT
152
+ end
153
+
154
+ def response_text?(content_type)
155
+ content_type.type == 'text' || content_type.mime_type == 'application/json'
156
+ end
157
+
158
+ #
159
+ # options
160
+ #
161
+
162
+ def ignore_params
163
+ @ignore_params ||= options[:ignore_params].map { CGI.escape(_1.to_s) }.to_set
164
+ end
165
+
166
+ def logger
167
+ return if !options[:logger]
168
+
169
+ @logger ||= case options[:logger]
170
+ when true then Logger.new($stderr)
171
+ when Logger then options[:logger]
172
+ end
173
+ end
98
174
  end
99
175
  end
100
176
 
@@ -1,3 +1,7 @@
1
1
  module HTTPDisk
2
+ ERROR_STATUS = 999
3
+
2
4
  class CliError < StandardError; end
5
+
6
+ class InvalidUrl < StandardError; end
3
7
  end
@@ -0,0 +1,35 @@
1
+ # manually load dependencies here since this is loaded standalone by bin
2
+ require 'httpdisk/version'
3
+ require 'slop'
4
+
5
+ module HTTPDisk
6
+ module Grep
7
+ module Args
8
+ # Slop parsing. This is broken out so we can run without require 'httpdisk'.
9
+ def self.slop(args)
10
+ slop = Slop.parse(args) do |o|
11
+ o.banner = 'httpdisk-grep [options] pattern [path ...]'
12
+ o.boolean '-c', '--count', 'suppress normal output and show count'
13
+ o.boolean '-h', '--head', 'show req headers before each match'
14
+ o.boolean '-s', '--silent', 'do not print anything to stdout'
15
+ o.boolean '--version', 'show version' do
16
+ puts "httpdisk-grep #{HTTPDisk::VERSION}"
17
+ exit
18
+ end
19
+ o.on '--help', 'show this help' do
20
+ puts o
21
+ exit
22
+ end
23
+ end
24
+
25
+ raise Slop::Error, '' if args.empty?
26
+ raise Slop::Error, 'no PATTERN specified' if slop.args.empty?
27
+
28
+ slop.to_h.tap do
29
+ _1[:pattern] = slop.args.shift
30
+ _1[:roots] = slop.args
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end