httpdisk 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7b1e66859e069e390390dd87fac3753ee43501613884bfca2d7f40f70a224274
4
- data.tar.gz: 8de194055b6fc7ac858305eb739dc25494b139413d36decbbbaeaa00ce8f1da0
3
+ metadata.gz: fece53d7b5f7694a0f533797d119495ad2e8bc79667560f458aa23c0fb6eebf5
4
+ data.tar.gz: 2cb035d54e62cb9195959797fbfb712aabab9fc78db2ddfaf59e714c1da66ba5
5
5
  SHA512:
6
- metadata.gz: 4594b3eb07cd683a901883484ce88ed3be63f4a65a1c2d9c1a2eb80c9791606ceaaf7287ca74ae35d5fa5015e3218133a7e1199111233841f58c67696edc3f0e
7
- data.tar.gz: 5d9f0dd6f3d8407e4b5133ec407d4aa56b6c442d42679c7715f9519b60a791ebb212e11c3c8474ad92bf4dd9e2d5d8daaded2eadcff4b210a6fa5b1e0245677b
6
+ metadata.gz: '09ace3872dfbe2764e42eff568576f44ee5581c750dfbb5a907bd26a6581e047d1529c8928e2f430d804e3d2669eac6c8ca3307c47e34bab8674e810550a4aa8'
7
+ data.tar.gz: 70ea93666f7dff7283fc1b6ef3860e52a2309828874a157e7fdd29cc48cef67f814907e2be18c87c9404c33b65b0fed2f9156b31cf747d3bb9398c60e8f11335
data/.rubocop.yml CHANGED
@@ -17,10 +17,12 @@ Style/Documentation: { Enabled: false }
17
17
  Style/DoubleNegation: { Enabled: false }
18
18
  Style/EmptyCaseCondition: { Enabled: false }
19
19
  Style/FrozenStringLiteralComment: { Enabled: false }
20
+ Style/GuardClause: { Enabled: false }
20
21
  Style/IfUnlessModifier: { Enabled: false }
21
22
  Style/NegatedIf: { Enabled: false }
22
23
  Style/NumericPredicate: { Enabled: false }
23
24
  Style/ParallelAssignment: { Enabled: false }
25
+ Style/SoleNestedConditional: { Enabled: false }
24
26
  Style/StderrPuts: { Enabled: false }
25
27
  Style/TrailingCommaInArrayLiteral: { EnforcedStyleForMultiline: consistent_comma }
26
28
  Style/TrailingCommaInHashLiteral: { EnforcedStyleForMultiline: consistent_comma }
data/Gemfile.lock CHANGED
@@ -1,7 +1,8 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- httpdisk (0.4.0)
4
+ httpdisk (0.5.0)
5
+ content-type (~> 0.0)
5
6
  faraday (~> 1.4)
6
7
  faraday-cookie_jar (~> 0.0)
7
8
  faraday_middleware (~> 1.0)
@@ -14,11 +15,13 @@ GEM
14
15
  public_suffix (>= 2.0.2, < 5.0)
15
16
  ast (2.4.2)
16
17
  coderay (1.1.3)
18
+ content-type (0.0.1)
19
+ parslet (~> 1.5)
17
20
  crack (0.4.5)
18
21
  rexml
19
22
  domain_name (0.5.20190701)
20
23
  unf (>= 0.0.5, < 1.0.0)
21
- faraday (1.4.2)
24
+ faraday (1.4.3)
22
25
  faraday-em_http (~> 1.0)
23
26
  faraday-em_synchrony (~> 1.0)
24
27
  faraday-excon (~> 1.1)
@@ -37,7 +40,7 @@ GEM
37
40
  faraday_middleware (1.0.0)
38
41
  faraday (~> 1.0)
39
42
  hashdiff (1.0.1)
40
- http-cookie (1.0.3)
43
+ http-cookie (1.0.4)
41
44
  domain_name (~> 0.5)
42
45
  method_source (1.0.0)
43
46
  minitest (5.14.4)
@@ -46,6 +49,7 @@ GEM
46
49
  parallel (1.20.1)
47
50
  parser (3.0.1.1)
48
51
  ast (~> 2.4.1)
52
+ parslet (1.8.2)
49
53
  pry (0.13.1)
50
54
  coderay (~> 1.1)
51
55
  method_source (~> 1.0)
@@ -67,7 +71,7 @@ GEM
67
71
  parser (>= 3.0.1.1)
68
72
  ruby-progressbar (1.11.0)
69
73
  ruby2_keywords (0.0.4)
70
- slop (4.9.0)
74
+ slop (4.9.1)
71
75
  unf (0.1.4)
72
76
  unf_ext
73
77
  unf_ext (0.0.7.7)
data/README.md CHANGED
@@ -65,7 +65,6 @@ faraday = Faraday.new do
65
65
  _1.request :url_encoded # auto-encode form bodies
66
66
  _1.response :json # auto-decode JSON responses
67
67
  _1.response :follow_redirects # follow redirects (should be above httpdisk)
68
- _1.response :encoding # set Ruby string encoding based on Content-Type (should be above httpdisk)
69
68
  _1.use :httpdisk
70
69
  _1.request :retry # retry failed responses (should be below httpdisk)
71
70
  end
@@ -120,6 +119,10 @@ httpdisk caches all responses. POST responses are cached, along with 500 respons
120
119
 
121
120
  In general, if you make a request it will be cached regardless of the outcome.
122
121
 
122
+ ## String Encoding
123
+
124
+ httpdisk will honor the `Content-Type` from responses. Unfortunately, it is entirely possible to get invalid bodies if the `Content-Type` doesn't match the bytes. This is a major bummer, so httpdisk provides a `utf8:` option that forces text response bodies to UTF-8.
125
+
123
126
  ## Configuration
124
127
 
125
128
  httpdisk supports a few options:
@@ -130,6 +133,7 @@ httpdisk supports a few options:
130
133
  - `force_errors:` don't read errors from cache (but still write)
131
134
  - `ignore_params:` array of query params to ignore when calculating cache_key
132
135
  - `logger`: log requests to stderr, or pass your own logger
136
+ - `utf8`: if true, force text response bodies to valid UTF-8
133
137
 
134
138
  Pass these in when setting up Faraday:
135
139
 
@@ -180,6 +184,11 @@ It can be challenging to use grep/ripgrep because cache files are compressed and
180
184
 
181
185
  ## Changelog
182
186
 
187
+ #### 0.5
188
+
189
+ - honor Content-Type
190
+ - added `:utf8` option to force text-like response bodies to UTF-8
191
+
183
192
  #### 0.4
184
193
 
185
194
  - added httpdisk-grep for searching cache files
data/httpdisk.gemspec CHANGED
@@ -21,6 +21,7 @@ Gem::Specification.new do |s|
21
21
  s.require_paths = ['lib']
22
22
 
23
23
  # gem dependencies
24
+ s.add_dependency 'content-type', '~> 0.0'
24
25
  s.add_dependency 'faraday', '~> 1.4'
25
26
  s.add_dependency 'faraday-cookie_jar', '~> 0.0'
26
27
  s.add_dependency 'faraday_middleware', '~> 1.0'
@@ -35,7 +35,19 @@ module HTTPDisk
35
35
  def write(cache_key, payload)
36
36
  path = diskpath(cache_key)
37
37
  FileUtils.mkdir_p(File.dirname(path))
38
- Zlib::GzipWriter.open(path) { payload.write(_1) }
38
+
39
+ # Atomically write gzipped payload. Put our underlying Tempfile into
40
+ # binmode to avoid accidental newline conversion or string encoding. Not
41
+ # required for *nix systems, but I've heard rumors it's helpful for
42
+ # Windows.
43
+ Tempfile.new(binmode: true).tap do |tmp|
44
+ Zlib::GzipWriter.new(tmp).tap do |gzip|
45
+ payload.write(gzip)
46
+ gzip.close
47
+ end
48
+ tmp.close
49
+ FileUtils.mv(tmp.path, path)
50
+ end
39
51
  end
40
52
 
41
53
  # Delete existing response, if any
@@ -59,7 +71,13 @@ module HTTPDisk
59
71
  return :stale if expired?(path)
60
72
  return :force if force?
61
73
 
62
- payload = Zlib::GzipReader.open(path) { Payload.read(_1, peek: peek) }
74
+ begin
75
+ payload = Zlib::GzipReader.open(path, encoding: 'ASCII-8BIT') do
76
+ Payload.read(_1, peek: peek)
77
+ end
78
+ rescue StandardError => e
79
+ raise "#{path}: #{e}"
80
+ end
63
81
  return :force if force_errors? && payload.error?
64
82
 
65
83
  payload
@@ -10,8 +10,8 @@ module HTTPDisk
10
10
  @env, @ignore_params = env, ignore_params
11
11
 
12
12
  # sanity checks
13
- raise 'http/https required' if env.url.scheme !~ /^https?$/
14
- raise 'hostname required' if !env.url.host
13
+ raise InvalidUrl, "http/https required #{env.url.inspect}" if env.url.scheme !~ /^https?$/
14
+ raise InvalidUrl, "hostname required #{env.url.inspect}" if !env.url.host
15
15
  end
16
16
 
17
17
  def url
@@ -73,7 +73,8 @@ module HTTPDisk
73
73
  _1.method = request_method
74
74
  _1.request_body = request_body
75
75
  _1.request_headers = request_headers
76
- _1.url = request_url
76
+ # Run the url through Faraday to make sure we see the same stuff as middleware.
77
+ _1.url = Faraday.new.build_url(request_url)
77
78
  end
78
79
 
79
80
  # now print status
@@ -159,7 +160,9 @@ module HTTPDisk
159
160
 
160
161
  # Options to HTTPDisk::Client
161
162
  def client_options
162
- options.slice(:dir, :expires, :force, :force_errors)
163
+ client_options = options.slice(:dir, :expires, :force, :force_errors)
164
+ client_options[:utf8] = true
165
+ client_options
163
166
  end
164
167
  end
165
168
  end
@@ -1,3 +1,4 @@
1
+ require 'content-type'
1
2
  require 'faraday'
2
3
  require 'logger'
3
4
 
@@ -14,6 +15,7 @@ module HTTPDisk
14
15
  _1.boolean :force_errors
15
16
  _1.array :ignore_params, default: []
16
17
  _1.on :logger, type: [:boolean, Logger]
18
+ _1.boolean :utf8
17
19
  end
18
20
 
19
21
  super(app, options)
@@ -23,17 +25,19 @@ module HTTPDisk
23
25
  def call(env)
24
26
  cache_key = CacheKey.new(env, ignore_params: ignore_params)
25
27
  logger&.info("#{env.method.upcase} #{env.url} (#{cache.status(cache_key)})")
28
+ env[:httpdisk_diskpath] = cache.diskpath(cache_key)
26
29
 
27
- if cached_response = read(cache_key, env)
28
- cached_response.env[:httpdisk] = true
29
- return cached_response
30
- end
31
-
32
- # miss
33
- perform(env).tap do |response|
30
+ # check cache, fallback to network
31
+ if response = read(cache_key, env)
32
+ response.env[:httpdisk] = true
33
+ else
34
+ response = perform(env)
34
35
  response.env[:httpdisk] = false
35
36
  write(cache_key, env, response)
36
37
  end
38
+
39
+ encode_body(response)
40
+ response
37
41
  end
38
42
 
39
43
  # Returns cache status for this request
@@ -101,6 +105,54 @@ module HTTPDisk
101
105
  err.to_s =~ /#{proxy.host}.*#{proxy.port}/
102
106
  end
103
107
 
108
+ # Set string encoding for response body. The cache always returns
109
+ # ASCII-8BIT, but we have no idea what the encoding will be from the
110
+ # network. Not all adapters honor Content-Type (including the default
111
+ # adapter).
112
+ def encode_body(response)
113
+ body = response.body || ''
114
+
115
+ # parse Content-Type
116
+ begin
117
+ content_type = response['Content-Type'] && ContentType.parse(response['Content-Type'])
118
+ rescue Parslet::ParseFailed
119
+ # unparsable
120
+ end
121
+
122
+ # look at charset and set body encoding if necessary
123
+ encoding = encoding_for(content_type)
124
+ if body.encoding != encoding
125
+ body = body.dup if body.frozen?
126
+ body.force_encoding(encoding)
127
+ end
128
+
129
+ # if :utf8, force body to UTF-8
130
+ if options[:utf8] && content_type && response_text?(content_type)
131
+ body = body.dup if body.frozen?
132
+ begin
133
+ body.encode!('UTF-8', invalid: :replace, undef: :replace, replace: '?')
134
+ rescue Encoding::ConverterNotFoundError
135
+ # rare, can't do anything here
136
+ body = "httpdisk could not convert from #{body.encoding.name} to UTF-8"
137
+ end
138
+ end
139
+
140
+ response.env[:body] = body
141
+ end
142
+
143
+ def encoding_for(content_type)
144
+ begin
145
+ return Encoding.find(content_type.charset) if content_type
146
+ rescue ArgumentError
147
+ # unknown charset
148
+ end
149
+ Encoding::ASCII_8BIT
150
+ end
151
+
152
+ def response_text?(content_type)
153
+ content_type.type == 'text' || content_type.mime_type == 'application/json'
154
+ end
155
+
104
156
  #
105
157
  # options
106
158
  #
@@ -2,4 +2,6 @@ module HTTPDisk
2
2
  ERROR_STATUS = 999
3
3
 
4
4
  class CliError < StandardError; end
5
+
6
+ class InvalidUrl < StandardError; end
5
7
  end
@@ -29,7 +29,9 @@ module HTTPDisk
29
29
 
30
30
  def run_one(path)
31
31
  # read payload & body
32
- payload = Zlib::GzipReader.open(path) { Payload.read(_1) }
32
+ payload = Zlib::GzipReader.open(path, encoding: 'ASCII-8BIT') do
33
+ Payload.read(_1)
34
+ end
33
35
  body = prepare_body(payload)
34
36
 
35
37
  # collect all_matches
@@ -1,3 +1,3 @@
1
1
  module HTTPDisk
2
- VERSION = '0.4.0'.freeze
2
+ VERSION = '0.5.0'.freeze
3
3
  end
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: httpdisk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adam Doppelt
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-06-05 00:00:00.000000000 Z
11
+ date: 2021-06-30 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: content-type
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '0.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '0.0'
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: faraday
15
29
  requirement: !ruby/object:Gem::Requirement