httpdisk 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7b1e66859e069e390390dd87fac3753ee43501613884bfca2d7f40f70a224274
4
- data.tar.gz: 8de194055b6fc7ac858305eb739dc25494b139413d36decbbbaeaa00ce8f1da0
3
+ metadata.gz: fece53d7b5f7694a0f533797d119495ad2e8bc79667560f458aa23c0fb6eebf5
4
+ data.tar.gz: 2cb035d54e62cb9195959797fbfb712aabab9fc78db2ddfaf59e714c1da66ba5
5
5
  SHA512:
6
- metadata.gz: 4594b3eb07cd683a901883484ce88ed3be63f4a65a1c2d9c1a2eb80c9791606ceaaf7287ca74ae35d5fa5015e3218133a7e1199111233841f58c67696edc3f0e
7
- data.tar.gz: 5d9f0dd6f3d8407e4b5133ec407d4aa56b6c442d42679c7715f9519b60a791ebb212e11c3c8474ad92bf4dd9e2d5d8daaded2eadcff4b210a6fa5b1e0245677b
6
+ metadata.gz: '09ace3872dfbe2764e42eff568576f44ee5581c750dfbb5a907bd26a6581e047d1529c8928e2f430d804e3d2669eac6c8ca3307c47e34bab8674e810550a4aa8'
7
+ data.tar.gz: 70ea93666f7dff7283fc1b6ef3860e52a2309828874a157e7fdd29cc48cef67f814907e2be18c87c9404c33b65b0fed2f9156b31cf747d3bb9398c60e8f11335
data/.rubocop.yml CHANGED
@@ -17,10 +17,12 @@ Style/Documentation: { Enabled: false }
17
17
  Style/DoubleNegation: { Enabled: false }
18
18
  Style/EmptyCaseCondition: { Enabled: false }
19
19
  Style/FrozenStringLiteralComment: { Enabled: false }
20
+ Style/GuardClause: { Enabled: false }
20
21
  Style/IfUnlessModifier: { Enabled: false }
21
22
  Style/NegatedIf: { Enabled: false }
22
23
  Style/NumericPredicate: { Enabled: false }
23
24
  Style/ParallelAssignment: { Enabled: false }
25
+ Style/SoleNestedConditional: { Enabled: false }
24
26
  Style/StderrPuts: { Enabled: false }
25
27
  Style/TrailingCommaInArrayLiteral: { EnforcedStyleForMultiline: consistent_comma }
26
28
  Style/TrailingCommaInHashLiteral: { EnforcedStyleForMultiline: consistent_comma }
data/Gemfile.lock CHANGED
@@ -1,7 +1,8 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- httpdisk (0.4.0)
4
+ httpdisk (0.5.0)
5
+ content-type (~> 0.0)
5
6
  faraday (~> 1.4)
6
7
  faraday-cookie_jar (~> 0.0)
7
8
  faraday_middleware (~> 1.0)
@@ -14,11 +15,13 @@ GEM
14
15
  public_suffix (>= 2.0.2, < 5.0)
15
16
  ast (2.4.2)
16
17
  coderay (1.1.3)
18
+ content-type (0.0.1)
19
+ parslet (~> 1.5)
17
20
  crack (0.4.5)
18
21
  rexml
19
22
  domain_name (0.5.20190701)
20
23
  unf (>= 0.0.5, < 1.0.0)
21
- faraday (1.4.2)
24
+ faraday (1.4.3)
22
25
  faraday-em_http (~> 1.0)
23
26
  faraday-em_synchrony (~> 1.0)
24
27
  faraday-excon (~> 1.1)
@@ -37,7 +40,7 @@ GEM
37
40
  faraday_middleware (1.0.0)
38
41
  faraday (~> 1.0)
39
42
  hashdiff (1.0.1)
40
- http-cookie (1.0.3)
43
+ http-cookie (1.0.4)
41
44
  domain_name (~> 0.5)
42
45
  method_source (1.0.0)
43
46
  minitest (5.14.4)
@@ -46,6 +49,7 @@ GEM
46
49
  parallel (1.20.1)
47
50
  parser (3.0.1.1)
48
51
  ast (~> 2.4.1)
52
+ parslet (1.8.2)
49
53
  pry (0.13.1)
50
54
  coderay (~> 1.1)
51
55
  method_source (~> 1.0)
@@ -67,7 +71,7 @@ GEM
67
71
  parser (>= 3.0.1.1)
68
72
  ruby-progressbar (1.11.0)
69
73
  ruby2_keywords (0.0.4)
70
- slop (4.9.0)
74
+ slop (4.9.1)
71
75
  unf (0.1.4)
72
76
  unf_ext
73
77
  unf_ext (0.0.7.7)
data/README.md CHANGED
@@ -65,7 +65,6 @@ faraday = Faraday.new do
65
65
  _1.request :url_encoded # auto-encode form bodies
66
66
  _1.response :json # auto-decode JSON responses
67
67
  _1.response :follow_redirects # follow redirects (should be above httpdisk)
68
- _1.response :encoding # set Ruby string encoding based on Content-Type (should be above httpdisk)
69
68
  _1.use :httpdisk
70
69
  _1.request :retry # retry failed responses (should be below httpdisk)
71
70
  end
@@ -120,6 +119,10 @@ httpdisk caches all responses. POST responses are cached, along with 500 respons
120
119
 
121
120
  In general, if you make a request it will be cached regardless of the outcome.
122
121
 
122
+ ## String Encoding
123
+
124
+ httpdisk will honor the `Content-Type` from responses. Unfortunately, it is entirely possible to get invalid bodies if the `Content-Type` doesn't match the bytes. This is a major bummer, so httpdisk provides a `utf8:` option that forces text response bodies to UTF-8.
125
+
123
126
  ## Configuration
124
127
 
125
128
  httpdisk supports a few options:
@@ -130,6 +133,7 @@ httpdisk supports a few options:
130
133
  - `force_errors:` don't read errors from cache (but still write)
131
134
  - `ignore_params:` array of query params to ignore when calculating cache_key
132
135
  - `logger`: log requests to stderr, or pass your own logger
136
+ - `utf8`: if true, force text response bodies to valid UTF-8
133
137
 
134
138
  Pass these in when setting up Faraday:
135
139
 
@@ -180,6 +184,11 @@ It can be challenging to use grep/ripgrep because cache files are compressed and
180
184
 
181
185
  ## Changelog
182
186
 
187
+ #### 0.5
188
+
189
+ - honor Content-Type
190
+ - added `:utf8` option to force text-like response bodies to UTF-8
191
+
183
192
  #### 0.4
184
193
 
185
194
  - added httpdisk-grep for searching cache files
data/httpdisk.gemspec CHANGED
@@ -21,6 +21,7 @@ Gem::Specification.new do |s|
21
21
  s.require_paths = ['lib']
22
22
 
23
23
  # gem dependencies
24
+ s.add_dependency 'content-type', '~> 0.0'
24
25
  s.add_dependency 'faraday', '~> 1.4'
25
26
  s.add_dependency 'faraday-cookie_jar', '~> 0.0'
26
27
  s.add_dependency 'faraday_middleware', '~> 1.0'
@@ -35,7 +35,19 @@ module HTTPDisk
35
35
  def write(cache_key, payload)
36
36
  path = diskpath(cache_key)
37
37
  FileUtils.mkdir_p(File.dirname(path))
38
- Zlib::GzipWriter.open(path) { payload.write(_1) }
38
+
39
+ # Atomically write gzipped payload. Put our underlying Tempfile into
40
+ # binmode to avoid accidental newline conversion or string encoding. Not
41
+ # required for *nix systems, but I've heard rumors it's helpful for
42
+ # Windows.
43
+ Tempfile.new(binmode: true).tap do |tmp|
44
+ Zlib::GzipWriter.new(tmp).tap do |gzip|
45
+ payload.write(gzip)
46
+ gzip.close
47
+ end
48
+ tmp.close
49
+ FileUtils.mv(tmp.path, path)
50
+ end
39
51
  end
40
52
 
41
53
  # Delete existing response, if any
@@ -59,7 +71,13 @@ module HTTPDisk
59
71
  return :stale if expired?(path)
60
72
  return :force if force?
61
73
 
62
- payload = Zlib::GzipReader.open(path) { Payload.read(_1, peek: peek) }
74
+ begin
75
+ payload = Zlib::GzipReader.open(path, encoding: 'ASCII-8BIT') do
76
+ Payload.read(_1, peek: peek)
77
+ end
78
+ rescue StandardError => e
79
+ raise "#{path}: #{e}"
80
+ end
63
81
  return :force if force_errors? && payload.error?
64
82
 
65
83
  payload
@@ -10,8 +10,8 @@ module HTTPDisk
10
10
  @env, @ignore_params = env, ignore_params
11
11
 
12
12
  # sanity checks
13
- raise 'http/https required' if env.url.scheme !~ /^https?$/
14
- raise 'hostname required' if !env.url.host
13
+ raise InvalidUrl, "http/https required #{env.url.inspect}" if env.url.scheme !~ /^https?$/
14
+ raise InvalidUrl, "hostname required #{env.url.inspect}" if !env.url.host
15
15
  end
16
16
 
17
17
  def url
@@ -73,7 +73,8 @@ module HTTPDisk
73
73
  _1.method = request_method
74
74
  _1.request_body = request_body
75
75
  _1.request_headers = request_headers
76
- _1.url = request_url
76
+ # Run the url through Faraday to make sure we see the same stuff as middleware.
77
+ _1.url = Faraday.new.build_url(request_url)
77
78
  end
78
79
 
79
80
  # now print status
@@ -159,7 +160,9 @@ module HTTPDisk
159
160
 
160
161
  # Options to HTTPDisk::Client
161
162
  def client_options
162
- options.slice(:dir, :expires, :force, :force_errors)
163
+ client_options = options.slice(:dir, :expires, :force, :force_errors)
164
+ client_options[:utf8] = true
165
+ client_options
163
166
  end
164
167
  end
165
168
  end
@@ -1,3 +1,4 @@
1
+ require 'content-type'
1
2
  require 'faraday'
2
3
  require 'logger'
3
4
 
@@ -14,6 +15,7 @@ module HTTPDisk
14
15
  _1.boolean :force_errors
15
16
  _1.array :ignore_params, default: []
16
17
  _1.on :logger, type: [:boolean, Logger]
18
+ _1.boolean :utf8
17
19
  end
18
20
 
19
21
  super(app, options)
@@ -23,17 +25,19 @@ module HTTPDisk
23
25
  def call(env)
24
26
  cache_key = CacheKey.new(env, ignore_params: ignore_params)
25
27
  logger&.info("#{env.method.upcase} #{env.url} (#{cache.status(cache_key)})")
28
+ env[:httpdisk_diskpath] = cache.diskpath(cache_key)
26
29
 
27
- if cached_response = read(cache_key, env)
28
- cached_response.env[:httpdisk] = true
29
- return cached_response
30
- end
31
-
32
- # miss
33
- perform(env).tap do |response|
30
+ # check cache, fallback to network
31
+ if response = read(cache_key, env)
32
+ response.env[:httpdisk] = true
33
+ else
34
+ response = perform(env)
34
35
  response.env[:httpdisk] = false
35
36
  write(cache_key, env, response)
36
37
  end
38
+
39
+ encode_body(response)
40
+ response
37
41
  end
38
42
 
39
43
  # Returns cache status for this request
@@ -101,6 +105,54 @@ module HTTPDisk
101
105
  err.to_s =~ /#{proxy.host}.*#{proxy.port}/
102
106
  end
103
107
 
108
+ # Set string encoding for response body. The cache always returns
109
+ # ASCII-8BIT, but we have no idea what the encoding will be from the
110
+ # network. Not all adapters honor Content-Type (including the default
111
+ # adapter).
112
+ def encode_body(response)
113
+ body = response.body || ''
114
+
115
+ # parse Content-Type
116
+ begin
117
+ content_type = response['Content-Type'] && ContentType.parse(response['Content-Type'])
118
+ rescue Parslet::ParseFailed
119
+ # unparsable
120
+ end
121
+
122
+ # look at charset and set body encoding if necessary
123
+ encoding = encoding_for(content_type)
124
+ if body.encoding != encoding
125
+ body = body.dup if body.frozen?
126
+ body.force_encoding(encoding)
127
+ end
128
+
129
+ # if :utf8, force body to UTF-8
130
+ if options[:utf8] && content_type && response_text?(content_type)
131
+ body = body.dup if body.frozen?
132
+ begin
133
+ body.encode!('UTF-8', invalid: :replace, undef: :replace, replace: '?')
134
+ rescue Encoding::ConverterNotFoundError
135
+ # rare, can't do anything here
136
+ body = "httpdisk could not convert from #{body.encoding.name} to UTF-8"
137
+ end
138
+ end
139
+
140
+ response.env[:body] = body
141
+ end
142
+
143
+ def encoding_for(content_type)
144
+ begin
145
+ return Encoding.find(content_type.charset) if content_type
146
+ rescue ArgumentError
147
+ # unknown charset
148
+ end
149
+ Encoding::ASCII_8BIT
150
+ end
151
+
152
+ def response_text?(content_type)
153
+ content_type.type == 'text' || content_type.mime_type == 'application/json'
154
+ end
155
+
104
156
  #
105
157
  # options
106
158
  #
@@ -2,4 +2,6 @@ module HTTPDisk
2
2
  ERROR_STATUS = 999
3
3
 
4
4
  class CliError < StandardError; end
5
+
6
+ class InvalidUrl < StandardError; end
5
7
  end
@@ -29,7 +29,9 @@ module HTTPDisk
29
29
 
30
30
  def run_one(path)
31
31
  # read payload & body
32
- payload = Zlib::GzipReader.open(path) { Payload.read(_1) }
32
+ payload = Zlib::GzipReader.open(path, encoding: 'ASCII-8BIT') do
33
+ Payload.read(_1)
34
+ end
33
35
  body = prepare_body(payload)
34
36
 
35
37
  # collect all_matches
@@ -1,3 +1,3 @@
1
1
  module HTTPDisk
2
- VERSION = '0.4.0'.freeze
2
+ VERSION = '0.5.0'.freeze
3
3
  end
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: httpdisk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adam Doppelt
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-06-05 00:00:00.000000000 Z
11
+ date: 2021-06-30 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: content-type
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '0.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '0.0'
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: faraday
15
29
  requirement: !ruby/object:Gem::Requirement