httpdisk 0.5.2 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,9 @@
1
- require 'fileutils'
2
- require 'tempfile'
1
+ require "fileutils"
2
+ require "tempfile"
3
+ require "zlib"
3
4
 
4
5
  module HTTPDisk
5
- # Disk cache for cache_keys => response. Files are compressed.
6
+ # Disk cache for cache_keys => response. Files may be compressed or plain.
6
7
  class Cache
7
8
  attr_reader :options
8
9
 
@@ -10,13 +11,14 @@ module HTTPDisk
10
11
  @options = options
11
12
  end
12
13
 
13
- %i[dir expires force force_errors].each do |method|
14
+ %i[compress dir expires force force_errors].each do |method|
14
15
  define_method(method) do
15
16
  options[method]
16
17
  end
17
18
  end
18
- alias force? force
19
- alias force_errors? force_errors
19
+ alias_method :compress?, :compress
20
+ alias_method :force?, :force
21
+ alias_method :force_errors?, :force_errors
20
22
 
21
23
  # Get cached response. If there is a cached error it will be raised.
22
24
  def read(cache_key)
@@ -37,15 +39,11 @@ module HTTPDisk
37
39
  path = diskpath(cache_key)
38
40
  FileUtils.mkdir_p(File.dirname(path))
39
41
 
40
- # Atomically write gzipped payload. Put our underlying Tempfile into
41
- # binmode to avoid accidental newline conversion or string encoding. Not
42
- # required for *nix systems, but I've heard rumors it's helpful for
43
- # Windows.
42
+ # Atomically write payload. Put our underlying Tempfile into binmode to
43
+ # avoid accidental newline conversion or string encoding. Not required for
44
+ # *nix systems, but I've heard rumors it's helpful for Windows.
44
45
  Tempfile.new(binmode: true).tap do |tmp|
45
- Zlib::GzipWriter.new(tmp).tap do |gzip|
46
- payload.write(gzip)
47
- gzip.close
48
- end
46
+ write_payload(tmp, payload)
49
47
  tmp.close
50
48
  FileUtils.mv(tmp.path, path)
51
49
  end
@@ -73,10 +71,8 @@ module HTTPDisk
73
71
  return :force if force?
74
72
 
75
73
  begin
76
- payload = Zlib::GzipReader.open(path, encoding: 'ASCII-8BIT') do
77
- Payload.read(_1, peek: peek)
78
- end
79
- rescue StandardError => e
74
+ payload = read_payload(path, peek:)
75
+ rescue => e
80
76
  raise "#{path}: #{e}"
81
77
  end
82
78
  return :force if force_errors? && payload.error?
@@ -88,5 +84,27 @@ module HTTPDisk
88
84
  def expired?(path)
89
85
  expires && File.stat(path).mtime < Time.now - expires
90
86
  end
87
+
88
+ def read_payload(path, peek:)
89
+ if compressed?(path)
90
+ Zlib::GzipReader.open(path, encoding: "ASCII-8BIT") { Payload.read(_1, peek:) }
91
+ else
92
+ File.open(path, "rb") { Payload.read(_1, peek:) }
93
+ end
94
+ end
95
+
96
+ def write_payload(tmp, payload)
97
+ if compress?
98
+ Zlib::GzipWriter.new(tmp).tap do |gzip|
99
+ payload.write(gzip)
100
+ gzip.close
101
+ end
102
+ else
103
+ payload.write(tmp)
104
+ end
105
+ end
106
+
107
+ # check got gz magic
108
+ def compressed?(path) = File.binread(path, 2) == "\x1f\x8b".b
91
109
  end
92
110
  end
@@ -1,66 +1,89 @@
1
- require 'cgi'
2
- require 'digest/md5'
3
- require 'uri'
1
+ require "cgi"
2
+ require "digest/md5"
3
+ require "uri"
4
4
 
5
5
  module HTTPDisk
6
6
  class CacheKey
7
- attr_reader :env, :ignore_params
7
+ PARTS = %i[http_method scheme host port path query body].freeze
8
+
9
+ attr_accessor :env, :ignore_params
10
+ attr_reader(*PARTS)
8
11
 
9
12
  def initialize(env, ignore_params: [])
10
13
  @env, @ignore_params = env, ignore_params
11
14
 
15
+ # setup defaults, user can override
16
+ @http_method = env.method
17
+ @scheme = url.scheme
18
+ @host = url.host
19
+ @port = default_port? ? nil : url.port
20
+ @path = (url.path == "/") ? nil : url.path
21
+ @query = url.query
22
+ @body = env.request_body ? bodykey : nil
23
+
12
24
  # sanity checks
13
- raise InvalidUrl, "http/https required #{env.url.inspect}" if env.url.scheme !~ /^https?$/
14
- raise InvalidUrl, "hostname required #{env.url.inspect}" if !env.url.host
25
+ raise InvalidUrl, "http/https required #{env.url.inspect}" if !/^https?$/.match?(scheme)
26
+ raise InvalidUrl, "hostname required #{env.url.inspect}" if !host
15
27
  end
16
28
 
17
- def url
18
- env.url
19
- end
29
+ def query_params
30
+ return {} if blank?(query)
20
31
 
21
- # Cache key (memoized)
22
- def key
23
- @key ||= calculate_key
32
+ CGI.parse(query).transform_values do
33
+ (_1.length == 1) ? _1.first : _1
34
+ end
24
35
  end
25
36
 
26
- # md5(key) (memoized)
27
- def digest
28
- @digest ||= Digest::MD5.hexdigest(key)
29
- end
37
+ # one-liners
38
+ def blank?(value) = value.nil? || value == ""
39
+ def default_port? = url.default_port == url.port
40
+ def digest = @digest ||= Digest::MD5.hexdigest(key)
41
+ def diskpath = @diskpath ||= File.join(hostdir, digest[0, 3], digest[3..])
42
+ def invalidate! = @digest = @key = @diskpath = nil
43
+ def key = @key ||= key0
44
+ def to_s = key
45
+ def url = env.url
30
46
 
31
- # Relative path for this cache key based on hostdir & digest.
32
- def diskpath
33
- @diskpath ||= File.join(hostdir, digest[0, 3], digest[3..])
47
+ def query_params=(value)
48
+ self.query = URI.encode_www_form(flatten_query_params(value))
34
49
  end
35
50
 
36
- def to_s
37
- key
51
+ # the parts that feed into `key` invalidate memoized stuff
52
+ PARTS.each do |part|
53
+ define_method("#{part}=") do |value|
54
+ invalidate!
55
+ instance_variable_set(:"@#{part}", value)
56
+ end
38
57
  end
39
58
 
40
59
  protected
41
60
 
42
- # Calculate cache key for url
43
- def calculate_key
61
+ def key0
44
62
  key = []
45
- key << env.method.upcase
46
- key << ' '
47
- key << url.scheme
48
- key << '://'
49
- key << url.host.downcase
50
- if !default_port?
51
- key << ':'
52
- key << url.port
63
+ key << http_method.to_s.upcase
64
+ key << " "
65
+ key << scheme.to_s
66
+ key << "://"
67
+ key << host.downcase
68
+ if port
69
+ key << ":"
70
+ key << port
71
+ end
72
+ if (path = (blank?(self.path) || self.path == "/") ? nil : self.path)
73
+ key << path
53
74
  end
54
- if url.path != '/'
55
- key << url.path
75
+ if (query = canonical_query(self.query))
76
+ key << "?"
77
+ key << query
56
78
  end
57
- if (q = url.query) && q != ''
58
- key << '?'
59
- key << querykey(q)
79
+ body = if env.request_headers["Content-Type"] == "application/x-www-form-urlencoded"
80
+ canonical_query(self.body)
81
+ else
82
+ self.body
60
83
  end
61
- if env.request_body
62
- key << ' '
63
- key << bodykey
84
+ if body
85
+ key << " "
86
+ key << body
64
87
  end
65
88
  key.join
66
89
  end
@@ -68,8 +91,8 @@ module HTTPDisk
68
91
  # Calculate cache key segment for body
69
92
  def bodykey
70
93
  body = env.request_body.to_s
71
- if env.request_headers['Content-Type'] == 'application/x-www-form-urlencoded'
72
- querykey(body)
94
+ if env.request_headers["Content-Type"] == "application/x-www-form-urlencoded"
95
+ canonical_query(body)
73
96
  elsif body.length < 50
74
97
  body
75
98
  else
@@ -77,32 +100,40 @@ module HTTPDisk
77
100
  end
78
101
  end
79
102
 
80
- # Calculate canonical key for a query
81
- def querykey(q)
82
- parts = q.split('&').sort
103
+ def canonical_query(q)
104
+ return if blank?(q)
105
+
106
+ parts = q.split("&").sort
83
107
  if !ignore_params.empty?
84
- parts = parts.map do |part|
85
- key, value = part.split('=', 2)
108
+ parts = parts.filter_map do |part|
109
+ key, value = part.split("=", 2)
86
110
  next if ignore_params.include?(key)
87
111
 
88
112
  "#{key}=#{value}"
89
- end.compact
113
+ end
90
114
  end
91
- parts.join('&')
92
- end
93
-
94
- def default_port?
95
- url.default_port == url.port
115
+ query = parts.join("&")
116
+ (query == "") ? nil : query
96
117
  end
97
118
 
98
119
  # Calculate nice directory name from url.host
99
120
  def hostdir
100
- hostdir = url.host.downcase
101
- hostdir = hostdir.gsub(/^www\./, '')
102
- hostdir = hostdir.gsub(/[^a-z0-9._-]/, '')
103
- hostdir = hostdir.squeeze('.')
104
- hostdir = 'any' if hostdir.empty?
121
+ hostdir = host.downcase
122
+ hostdir = hostdir.gsub(/^www\./, "")
123
+ hostdir = hostdir.gsub(/[^a-z0-9._-]/, "")
124
+ hostdir = hostdir.squeeze(".")
125
+ hostdir = "any" if hostdir.empty?
105
126
  hostdir
106
127
  end
128
+
129
+ def flatten_query_params(value)
130
+ value.flat_map do |k, v|
131
+ if v.is_a?(Array)
132
+ v.map { [k, _1] }
133
+ else
134
+ [[k, v]]
135
+ end
136
+ end
137
+ end
107
138
  end
108
139
  end
@@ -1,8 +1,8 @@
1
1
  # manually load dependencies here since this is loaded standalone by bin
2
- require 'httpdisk/error'
3
- require 'httpdisk/slop_duration'
4
- require 'httpdisk/version'
5
- require 'slop'
2
+ require "httpdisk/error"
3
+ require "httpdisk/slop_duration"
4
+ require "httpdisk/version"
5
+ require "slop"
6
6
 
7
7
  module HTTPDisk
8
8
  module Cli
@@ -10,43 +10,43 @@ module HTTPDisk
10
10
  module Args
11
11
  def self.slop(args)
12
12
  slop = Slop.parse(args) do |o|
13
- o.banner = 'httpdisk [options] [url]'
13
+ o.banner = "httpdisk [options] [url]"
14
14
 
15
15
  # similar to curl
16
- o.separator 'Similar to curl:'
17
- o.string '-d', '--data', 'HTTP POST data'
18
- o.array '-H', '--header', 'pass custom header(s) to server', delimiter: nil
19
- o.boolean '-i', '--include', 'include response headers in the output'
20
- o.integer '-m', '--max-time', 'maximum time allowed for the transfer'
21
- o.string '-o', '--output', 'write to file instead of stdout'
22
- o.string '-x', '--proxy', 'use host[:port] as proxy'
23
- o.string '-X', '--request', 'HTTP method to use'
24
- o.integer '--retry', 'retry request if problems occur'
25
- o.boolean '-s', '--silent', "silent mode (don't print errors)"
26
- o.string '-A', '--user-agent', 'send User-Agent to server'
16
+ o.separator "Similar to curl:"
17
+ o.string "-d", "--data", "HTTP POST data"
18
+ o.array "-H", "--header", "pass custom header(s) to server", delimiter: nil
19
+ o.boolean "-i", "--include", "include response headers in the output"
20
+ o.integer "-m", "--max-time", "maximum time allowed for the transfer"
21
+ o.string "-o", "--output", "write to file instead of stdout"
22
+ o.string "-x", "--proxy", "use host[:port] as proxy"
23
+ o.string "-X", "--request", "HTTP method to use"
24
+ o.integer "--retry", "retry request if problems occur"
25
+ o.boolean "-s", "--silent", "silent mode (don't print errors)"
26
+ o.string "-A", "--user-agent", "send User-Agent to server"
27
27
 
28
28
  # from httpdisk
29
- o.separator 'Specific to httpdisk:'
30
- o.string '--dir', 'httpdisk cache directory (defaults to ~/httpdisk)'
31
- o.duration '--expires', 'when to expire cached requests (ex: 1h, 2d, 3w)'
32
- o.boolean '--force', "don't read anything from cache (but still write)"
33
- o.boolean '--force-errors', "don't read errors from cache (but still write)"
34
- o.boolean '--status', 'show status for a url in the cache'
29
+ o.separator "Specific to httpdisk:"
30
+ o.string "--dir", "httpdisk cache directory (defaults to ~/httpdisk)"
31
+ o.duration "--expires", "when to expire cached requests (ex: 1h, 2d, 3w)"
32
+ o.boolean "--force", "don't read anything from cache (but still write)"
33
+ o.boolean "--force-errors", "don't read errors from cache (but still write)"
34
+ o.boolean "--status", "show status for a url in the cache"
35
35
 
36
36
  # generic
37
- o.boolean '--version', 'show version' do
37
+ o.boolean "--version", "show version" do
38
38
  puts "httpdisk #{HTTPDisk::VERSION}"
39
39
  exit
40
40
  end
41
- o.on '--help', 'show this help' do
41
+ o.on "--help", "show this help" do
42
42
  puts o
43
43
  exit
44
44
  end
45
45
  end
46
46
 
47
- raise Slop::Error, '' if args.empty?
48
- raise Slop::Error, 'no URL specified' if slop.args.empty?
49
- raise Slop::Error, 'more than one URL specified' if slop.args.length > 1
47
+ raise Slop::Error, "" if args.empty?
48
+ raise Slop::Error, "no URL specified" if slop.args.empty?
49
+ raise Slop::Error, "more than one URL specified" if slop.args.length > 1
50
50
 
51
51
  slop.to_h.tap do
52
52
  _1[:url] = slop.args.first
@@ -1,6 +1,6 @@
1
- require 'faraday-cookie_jar'
2
- require 'faraday_middleware'
3
- require 'ostruct'
1
+ require "faraday-cookie_jar"
2
+ require "faraday/follow_redirects"
3
+ require "ostruct"
4
4
 
5
5
  module HTTPDisk
6
6
  module Cli
@@ -31,7 +31,7 @@ module HTTPDisk
31
31
 
32
32
  # output
33
33
  if options[:output]
34
- File.open(options[:output], 'w') { output(response, _1) }
34
+ File.open(options[:output], "w") { output(response, _1) }
35
35
  else
36
36
  output(response, $stdout)
37
37
  end
@@ -103,9 +103,9 @@ module HTTPDisk
103
103
  method = if options[:request]
104
104
  options[:request]
105
105
  elsif options[:data]
106
- 'post'
106
+ "post"
107
107
  end
108
- method ||= 'get'
108
+ method ||= "get"
109
109
  method = method.downcase.to_sym
110
110
 
111
111
  if !Faraday::Connection::METHODS.include?(method)
@@ -119,9 +119,9 @@ module HTTPDisk
119
119
  def request_url
120
120
  url = options[:url]
121
121
  # recover from missing http:
122
- if url !~ %r{^https?://}i
123
- if url =~ %r{^\w+://}
124
- raise CliError, 'only http/https supported'
122
+ if !%r{^https?://}i.match?(url)
123
+ if %r{^\w+://}.match?(url)
124
+ raise CliError, "only http/https supported"
125
125
  end
126
126
 
127
127
  url = "http://#{url}"
@@ -140,11 +140,11 @@ module HTTPDisk
140
140
  def request_headers
141
141
  {}.tap do |headers|
142
142
  if options[:user_agent]
143
- headers['User-Agent'] = options[:user_agent]
143
+ headers["User-Agent"] = options[:user_agent]
144
144
  end
145
145
 
146
146
  options[:header].each do |header|
147
- key, value = header.split(': ', 2)
147
+ key, value = header.split(": ", 2)
148
148
  if !key || !value || key.empty? || value.empty?
149
149
  raise CliError, "invalid --header #{header.inspect}"
150
150
  end
@@ -1,6 +1,6 @@
1
- require 'content-type'
2
- require 'faraday'
3
- require 'logger'
1
+ require "content-type"
2
+ require "faraday"
3
+ require "logger"
4
4
 
5
5
  module HTTPDisk
6
6
  # Middleware and main entry point.
@@ -9,26 +9,28 @@ module HTTPDisk
9
9
 
10
10
  def initialize(app, options = {})
11
11
  options = Sloptions.parse(options) do
12
- _1.string :dir, default: File.join(ENV['HOME'], 'httpdisk')
12
+ _1.string :dir, default: File.join(ENV["HOME"], "httpdisk")
13
+ _1.boolean :compress, default: true
13
14
  _1.integer :expires
14
15
  _1.boolean :force
15
16
  _1.boolean :force_errors
16
17
  _1.array :ignore_params, default: []
18
+ _1.on :key_transform, type: Proc
17
19
  _1.on :logger, type: [:boolean, Logger]
18
20
  _1.boolean :utf8
19
21
  end
20
22
 
21
- super(app, options)
23
+ super
22
24
  @cache = Cache.new(options)
23
25
  end
24
26
 
25
27
  def call(env)
26
- cache_key = CacheKey.new(env, ignore_params: ignore_params)
28
+ cache_key = build_cache_key(env)
27
29
  logger&.info("#{env.method.upcase} #{env.url} (#{cache.status(cache_key)})")
28
30
  env[:httpdisk_diskpath] = cache.diskpath(cache_key)
29
31
 
30
32
  # check cache, fallback to network
31
- if response = read(cache_key, env)
33
+ if (response = read(cache_key, env))
32
34
  response.env[:httpdisk] = true
33
35
  else
34
36
  response = perform(env)
@@ -42,7 +44,7 @@ module HTTPDisk
42
44
 
43
45
  # Returns cache status for this request
44
46
  def status(env)
45
- cache_key = CacheKey.new(env)
47
+ cache_key = build_cache_key(env)
46
48
  {
47
49
  url: env.url.to_s,
48
50
  status: cache.status(cache_key).to_s,
@@ -90,7 +92,7 @@ module HTTPDisk
90
92
  def stuff_999_response(env, err)
91
93
  env.tap do
92
94
  _1.reason_phrase = "#{err.class} #{err.message}"
93
- _1.response_body = ''
95
+ _1.response_body = ""
94
96
  _1.response_headers = Faraday::Utils::Headers.new
95
97
  _1.status = HTTPDisk::ERROR_STATUS
96
98
  end
@@ -110,11 +112,11 @@ module HTTPDisk
110
112
  # network. Not all adapters honor Content-Type (including the default
111
113
  # adapter).
112
114
  def encode_body(response)
113
- body = response.body || ''
115
+ body = response.body || ""
114
116
 
115
117
  # parse Content-Type
116
118
  begin
117
- content_type = response['Content-Type'] && ContentType.parse(response['Content-Type'])
119
+ content_type = response["Content-Type"] && ContentType.parse(response["Content-Type"])
118
120
  rescue Parslet::ParseFailed
119
121
  # unparsable
120
122
  end
@@ -122,15 +124,14 @@ module HTTPDisk
122
124
  # look at charset and set body encoding if necessary
123
125
  encoding = encoding_for(content_type)
124
126
  if body.encoding != encoding
125
- body = body.dup if body.frozen?
126
- body.force_encoding(encoding)
127
+ body = body.dup.force_encoding(encoding)
127
128
  end
128
129
 
129
130
  # if :utf8, force body to UTF-8
130
131
  if options[:utf8] && content_type && response_text?(content_type)
131
132
  body = body.dup if body.frozen?
132
133
  begin
133
- body.encode!('UTF-8', invalid: :replace, undef: :replace, replace: '?')
134
+ body.encode!("UTF-8", invalid: :replace, undef: :replace, replace: "?")
134
135
  rescue Encoding::ConverterNotFoundError
135
136
  # rare, can't do anything here
136
137
  body = "httpdisk could not convert from #{body.encoding.name} to UTF-8"
@@ -152,7 +153,7 @@ module HTTPDisk
152
153
  end
153
154
 
154
155
  def response_text?(content_type)
155
- content_type.type == 'text' || content_type.mime_type == 'application/json'
156
+ content_type.type == "text" || content_type.mime_type == "application/json"
156
157
  end
157
158
 
158
159
  #
@@ -163,6 +164,12 @@ module HTTPDisk
163
164
  @ignore_params ||= options[:ignore_params].map { CGI.escape(_1.to_s) }.to_set
164
165
  end
165
166
 
167
+ def build_cache_key(env)
168
+ CacheKey.new(env, ignore_params:).tap do
169
+ options[:key_transform]&.call(_1)
170
+ end
171
+ end
172
+
166
173
  def logger
167
174
  return if !options[:logger]
168
175
 
@@ -1,6 +1,6 @@
1
1
  # manually load dependencies here since this is loaded standalone by bin
2
- require 'httpdisk/version'
3
- require 'slop'
2
+ require "httpdisk/version"
3
+ require "slop"
4
4
 
5
5
  module HTTPDisk
6
6
  module Grep
@@ -8,22 +8,22 @@ module HTTPDisk
8
8
  # Slop parsing. This is broken out so we can run without require 'httpdisk'.
9
9
  def self.slop(args)
10
10
  slop = Slop.parse(args) do |o|
11
- o.banner = 'httpdisk-grep [options] pattern [path ...]'
12
- o.boolean '-c', '--count', 'suppress normal output and show count'
13
- o.boolean '-h', '--head', 'show req headers before each match'
14
- o.boolean '-s', '--silent', 'do not print anything to stdout'
15
- o.boolean '--version', 'show version' do
11
+ o.banner = "httpdisk-grep [options] pattern [path ...]"
12
+ o.boolean "-c", "--count", "suppress normal output and show count"
13
+ o.boolean "-h", "--head", "show req headers before each match"
14
+ o.boolean "-s", "--silent", "do not print anything to stdout"
15
+ o.boolean "--version", "show version" do
16
16
  puts "httpdisk-grep #{HTTPDisk::VERSION}"
17
17
  exit
18
18
  end
19
- o.on '--help', 'show this help' do
19
+ o.on "--help", "show this help" do
20
20
  puts o
21
21
  exit
22
22
  end
23
23
  end
24
24
 
25
- raise Slop::Error, '' if args.empty?
26
- raise Slop::Error, 'no PATTERN specified' if slop.args.empty?
25
+ raise Slop::Error, "" if args.empty?
26
+ raise Slop::Error, "no PATTERN specified" if slop.args.empty?
27
27
 
28
28
  slop.to_h.tap do
29
29
  _1[:pattern] = slop.args.shift