httpdisk 0.2.0 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +9 -0
- data/Gemfile.lock +16 -4
- data/README.md +32 -6
- data/Rakefile +14 -11
- data/bin/httpdisk +9 -7
- data/bin/httpdisk-grep +46 -0
- data/httpdisk.gemspec +1 -0
- data/lib/httpdisk.rb +10 -5
- data/lib/httpdisk/cache.rb +31 -21
- data/lib/httpdisk/cache_key.rb +15 -6
- data/lib/httpdisk/cli/args.rb +57 -0
- data/lib/httpdisk/cli/main.rb +169 -0
- data/lib/httpdisk/client.rb +82 -19
- data/lib/httpdisk/error.rb +4 -0
- data/lib/httpdisk/grep/args.rb +35 -0
- data/lib/httpdisk/grep/main.rb +112 -0
- data/lib/httpdisk/grep/printer.rb +99 -0
- data/lib/httpdisk/payload.rb +7 -5
- data/lib/httpdisk/slop_duration.rb +24 -0
- data/lib/httpdisk/sloptions.rb +105 -0
- data/lib/httpdisk/version.rb +1 -1
- metadata +25 -4
- data/lib/httpdisk/cli.rb +0 -223
- data/lib/httpdisk/cli_slop.rb +0 -54
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 534d055da653551f2dbf23db1c323c6858fcaa0bab09354491c175bffadf6d54
|
4
|
+
data.tar.gz: 825a6779fec3c5ce2828363e9022da231017ae2d13aa8fa88acff9692d8cd7fe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 750eaaca6bba23b1a8af351d916c79e470928c23cb3f6517662d6e99a7278c963cfe9dbbde564282d1367dd0f6bb34c166066b2babdb29d1c29f398fe9498245
|
7
|
+
data.tar.gz: 951f2f668018872bd4a970be506e4fc4c9dce302b4fa03e310eb0269dba428216bdd23a4da1a6fa86ad75f2e75b5d3fbe9988e1018cd328c6bbf6eb790d3406c
|
data/.rubocop.yml
CHANGED
@@ -2,18 +2,27 @@ AllCops:
|
|
2
2
|
NewCops: enable
|
3
3
|
SuggestExtensions: false
|
4
4
|
|
5
|
+
# this is buggy in 2.7.0
|
6
|
+
Style/HashTransformValues: { Enabled: false }
|
7
|
+
|
5
8
|
# minimal personal preference
|
6
9
|
Layout/CaseIndentation: { Enabled: false }
|
7
10
|
Layout/EndAlignment: { EnforcedStyleAlignWith: variable }
|
8
11
|
Lint/AssignmentInCondition: { Enabled: false }
|
12
|
+
Lint/NonLocalExitFromIterator: { Enabled: false }
|
9
13
|
Metrics: { Enabled: false }
|
10
14
|
Naming/MethodParameterName: { Enabled: false }
|
11
15
|
Naming/VariableNumber: { Enabled: false }
|
12
16
|
Style/Documentation: { Enabled: false }
|
17
|
+
Style/DoubleNegation: { Enabled: false }
|
18
|
+
Style/EmptyCaseCondition: { Enabled: false }
|
13
19
|
Style/FrozenStringLiteralComment: { Enabled: false }
|
20
|
+
Style/GuardClause: { Enabled: false }
|
14
21
|
Style/IfUnlessModifier: { Enabled: false }
|
15
22
|
Style/NegatedIf: { Enabled: false }
|
23
|
+
Style/NumericPredicate: { Enabled: false }
|
16
24
|
Style/ParallelAssignment: { Enabled: false }
|
25
|
+
Style/SoleNestedConditional: { Enabled: false }
|
17
26
|
Style/StderrPuts: { Enabled: false }
|
18
27
|
Style/TrailingCommaInArrayLiteral: { EnforcedStyleForMultiline: consistent_comma }
|
19
28
|
Style/TrailingCommaInHashLiteral: { EnforcedStyleForMultiline: consistent_comma }
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
httpdisk (0.2
|
4
|
+
httpdisk (0.5.2)
|
5
|
+
content-type (~> 0.0)
|
5
6
|
faraday (~> 1.4)
|
6
7
|
faraday-cookie_jar (~> 0.0)
|
7
8
|
faraday_middleware (~> 1.0)
|
@@ -14,26 +15,36 @@ GEM
|
|
14
15
|
public_suffix (>= 2.0.2, < 5.0)
|
15
16
|
ast (2.4.2)
|
16
17
|
coderay (1.1.3)
|
18
|
+
content-type (0.0.1)
|
19
|
+
parslet (~> 1.5)
|
17
20
|
crack (0.4.5)
|
18
21
|
rexml
|
19
22
|
domain_name (0.5.20190701)
|
20
23
|
unf (>= 0.0.5, < 1.0.0)
|
21
|
-
faraday (1.
|
24
|
+
faraday (1.5.0)
|
25
|
+
faraday-em_http (~> 1.0)
|
26
|
+
faraday-em_synchrony (~> 1.0)
|
22
27
|
faraday-excon (~> 1.1)
|
28
|
+
faraday-httpclient (~> 1.0.1)
|
23
29
|
faraday-net_http (~> 1.0)
|
24
30
|
faraday-net_http_persistent (~> 1.1)
|
31
|
+
faraday-patron (~> 1.0)
|
25
32
|
multipart-post (>= 1.2, < 3)
|
26
33
|
ruby2_keywords (>= 0.0.4)
|
27
34
|
faraday-cookie_jar (0.0.7)
|
28
35
|
faraday (>= 0.8.0)
|
29
36
|
http-cookie (~> 1.0.0)
|
37
|
+
faraday-em_http (1.0.0)
|
38
|
+
faraday-em_synchrony (1.0.0)
|
30
39
|
faraday-excon (1.1.0)
|
40
|
+
faraday-httpclient (1.0.1)
|
31
41
|
faraday-net_http (1.0.1)
|
32
42
|
faraday-net_http_persistent (1.1.0)
|
43
|
+
faraday-patron (1.0.0)
|
33
44
|
faraday_middleware (1.0.0)
|
34
45
|
faraday (~> 1.0)
|
35
46
|
hashdiff (1.0.1)
|
36
|
-
http-cookie (1.0.
|
47
|
+
http-cookie (1.0.4)
|
37
48
|
domain_name (~> 0.5)
|
38
49
|
method_source (1.0.0)
|
39
50
|
minitest (5.14.4)
|
@@ -42,6 +53,7 @@ GEM
|
|
42
53
|
parallel (1.20.1)
|
43
54
|
parser (3.0.1.1)
|
44
55
|
ast (~> 2.4.1)
|
56
|
+
parslet (1.8.2)
|
45
57
|
pry (0.13.1)
|
46
58
|
coderay (~> 1.1)
|
47
59
|
method_source (~> 1.0)
|
@@ -63,7 +75,7 @@ GEM
|
|
63
75
|
parser (>= 3.0.1.1)
|
64
76
|
ruby-progressbar (1.11.0)
|
65
77
|
ruby2_keywords (0.0.4)
|
66
|
-
slop (4.
|
78
|
+
slop (4.9.1)
|
67
79
|
unf (0.1.4)
|
68
80
|
unf_ext
|
69
81
|
unf_ext (0.0.7.7)
|
data/README.md
CHANGED
@@ -65,7 +65,6 @@ faraday = Faraday.new do
|
|
65
65
|
_1.request :url_encoded # auto-encode form bodies
|
66
66
|
_1.response :json # auto-decode JSON responses
|
67
67
|
_1.response :follow_redirects # follow redirects (should be above httpdisk)
|
68
|
-
_1.response :encoding # set Ruby string encoding based on Content-Type (should be above httpdisk)
|
69
68
|
_1.use :httpdisk
|
70
69
|
_1.request :retry # retry failed responses (should be below httpdisk)
|
71
70
|
end
|
@@ -120,21 +119,27 @@ httpdisk caches all responses. POST responses are cached, along with 500 respons
|
|
120
119
|
|
121
120
|
In general, if you make a request it will be cached regardless of the outcome.
|
122
121
|
|
122
|
+
## String Encoding
|
123
|
+
|
124
|
+
httpdisk will honor the `Content-Type` from responses. Unfortunately, it is entirely possible to get invalid bodies if the `Content-Type` doesn't match the bytes. This is a major bummer, so httpdisk provides a `utf8:` option that forces text response bodies to UTF-8.
|
125
|
+
|
123
126
|
## Configuration
|
124
127
|
|
125
128
|
httpdisk supports a few options:
|
126
129
|
|
127
130
|
- `dir:` location for disk cache, defaults to `~/httpdisk`
|
128
|
-
- `
|
131
|
+
- `expires:` when to expire cached requests, default is nil (never expire)
|
129
132
|
- `force:` don't read anything from cache (but still write)
|
130
133
|
- `force_errors:` don't read errors from cache (but still write)
|
134
|
+
- `ignore_params:` array of query params to ignore when calculating cache_key
|
131
135
|
- `logger`: log requests to stderr, or pass your own logger
|
136
|
+
- `utf8`: if true, force text response bodies to valid UTF-8
|
132
137
|
|
133
138
|
Pass these in when setting up Faraday:
|
134
139
|
|
135
140
|
```ruby
|
136
141
|
faraday = Faraday.new do
|
137
|
-
_1.use :httpdisk,
|
142
|
+
_1.use :httpdisk, expires: 7*24*60*60, force: true
|
138
143
|
end
|
139
144
|
```
|
140
145
|
|
@@ -162,10 +167,13 @@ Specific to httpdisk:
|
|
162
167
|
--force don't read anything from cache (but still write)
|
163
168
|
--force-errors don't read errors from cache (but still write)
|
164
169
|
--status show status for a url in the cache
|
165
|
-
--version show version
|
166
|
-
--help show this help
|
167
170
|
```
|
168
171
|
|
172
|
+
## Goodies: httpdisk-grep
|
173
|
+
|
174
|
+
The `httpdisk-grep` command makes it easy to search your cache directory.
|
175
|
+
It can be challenging to use grep/ripgrep because cache files are compressed and JSON bodies often lack newlines. httpdisk-grep is the right tool for the job. See `httpdisk-grep --help`.
|
176
|
+
|
169
177
|
## Limitations & Gotchas
|
170
178
|
|
171
179
|
- Transient errors are cached. This is appropriate for many uses cases (like crawling) but can be confusing. Use `httpdisk --status` to debug.
|
@@ -176,10 +184,28 @@ Specific to httpdisk:
|
|
176
184
|
|
177
185
|
## Changelog
|
178
186
|
|
187
|
+
#### 0.5
|
188
|
+
|
189
|
+
- honor Content-Type
|
190
|
+
- added `:utf8` option to force text-like response bodies to UTF-8
|
191
|
+
|
192
|
+
#### 0.4
|
193
|
+
|
194
|
+
- added httpdisk-grep for searching cache files
|
195
|
+
- added HTTPDisk::Cache#delete
|
196
|
+
- rename `:expires_in` to `:expires`
|
197
|
+
|
198
|
+
#### 0.3
|
199
|
+
|
200
|
+
- added :ignore_params, for ignoring query params when generating cache keys
|
201
|
+
- HTTP 40x & 50x responses return :error status (and respond to `force_error`)
|
202
|
+
|
179
203
|
#### 0.2 - May 2020
|
204
|
+
|
180
205
|
- added `response.env[:httpdisk]`, which will be true if the response came from the cache
|
181
|
-
- `:logger` option
|
206
|
+
- added `:logger` option
|
182
207
|
- rake rubocop
|
183
208
|
|
184
209
|
#### 0.1 - April 2020
|
210
|
+
|
185
211
|
- Original release
|
data/Rakefile
CHANGED
@@ -10,12 +10,15 @@ spec = Gem::Specification.load('httpdisk.gemspec')
|
|
10
10
|
#
|
11
11
|
|
12
12
|
# test (default)
|
13
|
-
Rake::TestTask.new
|
13
|
+
Rake::TestTask.new do
|
14
|
+
_1.libs << 'test'
|
15
|
+
_1.warning = false # https://github.com/lostisland/faraday/issues/1285
|
16
|
+
end
|
14
17
|
task default: :test
|
15
18
|
|
16
|
-
# Watch files, run tests whenever something changes
|
19
|
+
# Watch rb files, run tests whenever something changes
|
17
20
|
task :watch do
|
18
|
-
|
21
|
+
sh "find . -name '*.rb' | entr -c rake"
|
19
22
|
end
|
20
23
|
|
21
24
|
#
|
@@ -23,7 +26,7 @@ end
|
|
23
26
|
#
|
24
27
|
|
25
28
|
task :pry do
|
26
|
-
|
29
|
+
sh 'pry -I lib -r httpdisk.rb'
|
27
30
|
end
|
28
31
|
|
29
32
|
#
|
@@ -31,7 +34,7 @@ end
|
|
31
34
|
#
|
32
35
|
|
33
36
|
task :rubocop do
|
34
|
-
|
37
|
+
sh 'bundle exec rubocop -A .'
|
35
38
|
end
|
36
39
|
|
37
40
|
#
|
@@ -39,17 +42,17 @@ end
|
|
39
42
|
#
|
40
43
|
|
41
44
|
task :build do
|
42
|
-
|
45
|
+
sh 'gem build --quiet httpdisk.gemspec'
|
43
46
|
end
|
44
47
|
|
45
48
|
task install: :build do
|
46
|
-
|
49
|
+
sh "gem install --quiet httpdisk-#{spec.version}.gem"
|
47
50
|
end
|
48
51
|
|
49
|
-
task release: %i[test build] do
|
52
|
+
task release: %i[rubocop test build] do
|
50
53
|
raise "looks like git isn't clean" unless `git status --porcelain`.empty?
|
51
54
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
+
sh "git tag -a #{spec.version} -m 'Tagging #{spec.version}'"
|
56
|
+
sh 'git push --tags'
|
57
|
+
sh "gem push httpdisk-#{spec.version}.gem"
|
55
58
|
end
|
data/bin/httpdisk
CHANGED
@@ -6,20 +6,22 @@
|
|
6
6
|
|
7
7
|
$LOAD_PATH.unshift(File.join(__dir__, '../lib'))
|
8
8
|
|
9
|
+
BIN = File.basename($PROGRAM_NAME)
|
10
|
+
|
9
11
|
def puts_error(s)
|
10
|
-
$stderr.puts "
|
12
|
+
$stderr.puts "#{BIN}: #{s}"
|
11
13
|
end
|
12
14
|
|
13
15
|
#
|
14
16
|
# Load the bare minimum and parse args with slop. We do this separately for speed.
|
15
17
|
#
|
16
18
|
|
17
|
-
require 'httpdisk/
|
19
|
+
require 'httpdisk/cli/args'
|
18
20
|
begin
|
19
|
-
slop = HTTPDisk::
|
21
|
+
slop = HTTPDisk::Cli::Args.slop(ARGV)
|
20
22
|
rescue Slop::Error => e
|
21
23
|
puts_error(e) if e.message != ''
|
22
|
-
puts_error("try '
|
24
|
+
puts_error("try '#{BIN} --help' for more information")
|
23
25
|
exit 1
|
24
26
|
end
|
25
27
|
|
@@ -28,11 +30,11 @@ end
|
|
28
30
|
#
|
29
31
|
|
30
32
|
require 'httpdisk'
|
31
|
-
|
33
|
+
main = HTTPDisk::Cli::Main.new(slop)
|
32
34
|
begin
|
33
|
-
|
35
|
+
main.run
|
34
36
|
rescue StandardError => e
|
35
|
-
puts_error(e) if !
|
37
|
+
puts_error(e) if !main.options[:silent]
|
36
38
|
if ENV['HTTPDISK_DEBUG']
|
37
39
|
$stderr.puts
|
38
40
|
$stderr.puts e.backtrace.join("\n")
|
data/bin/httpdisk-grep
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
#
|
4
|
+
# Search an HTTPDisk cache, similar to grep.
|
5
|
+
#
|
6
|
+
|
7
|
+
$LOAD_PATH.unshift(File.join(__dir__, '../lib'))
|
8
|
+
|
9
|
+
BIN = File.basename($PROGRAM_NAME)
|
10
|
+
|
11
|
+
def puts_error(s)
|
12
|
+
$stderr.puts "#{BIN}: #{s}"
|
13
|
+
end
|
14
|
+
|
15
|
+
#
|
16
|
+
# Load the bare minimum and parse args with slop. We do this separately for speed.
|
17
|
+
#
|
18
|
+
|
19
|
+
require 'httpdisk/grep/args'
|
20
|
+
begin
|
21
|
+
slop = HTTPDisk::Grep::Args.slop(ARGV)
|
22
|
+
rescue Slop::Error => e
|
23
|
+
puts_error(e) if e.message != ''
|
24
|
+
puts_error("try '#{BIN} --help' for more information")
|
25
|
+
exit 1
|
26
|
+
end
|
27
|
+
|
28
|
+
#
|
29
|
+
# now load everything and run
|
30
|
+
#
|
31
|
+
|
32
|
+
require 'httpdisk'
|
33
|
+
|
34
|
+
main = HTTPDisk::Grep::Main.new(slop)
|
35
|
+
begin
|
36
|
+
success = main.run
|
37
|
+
exit 1 if !success
|
38
|
+
rescue StandardError => e
|
39
|
+
puts_error(e)
|
40
|
+
if ENV['HTTPDISK_DEBUG']
|
41
|
+
$stderr.puts
|
42
|
+
$stderr.puts e.class
|
43
|
+
$stderr.puts e.backtrace.join("\n")
|
44
|
+
end
|
45
|
+
exit 2
|
46
|
+
end
|
data/httpdisk.gemspec
CHANGED
@@ -21,6 +21,7 @@ Gem::Specification.new do |s|
|
|
21
21
|
s.require_paths = ['lib']
|
22
22
|
|
23
23
|
# gem dependencies
|
24
|
+
s.add_dependency 'content-type', '~> 0.0'
|
24
25
|
s.add_dependency 'faraday', '~> 1.4'
|
25
26
|
s.add_dependency 'faraday-cookie_jar', '~> 0.0'
|
26
27
|
s.add_dependency 'faraday_middleware', '~> 1.0'
|
data/lib/httpdisk.rb
CHANGED
@@ -1,12 +1,17 @@
|
|
1
1
|
require 'httpdisk/cache_key'
|
2
2
|
require 'httpdisk/cache'
|
3
|
-
require 'httpdisk/cli_slop'
|
4
|
-
require 'httpdisk/cli'
|
5
3
|
require 'httpdisk/client'
|
6
4
|
require 'httpdisk/error'
|
7
5
|
require 'httpdisk/payload'
|
6
|
+
require 'httpdisk/slop_duration'
|
7
|
+
require 'httpdisk/sloptions'
|
8
8
|
require 'httpdisk/version'
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
10
|
+
# cli
|
11
|
+
require 'httpdisk/cli/args'
|
12
|
+
require 'httpdisk/cli/main'
|
13
|
+
|
14
|
+
# grep
|
15
|
+
require 'httpdisk/grep/args'
|
16
|
+
require 'httpdisk/grep/main'
|
17
|
+
require 'httpdisk/grep/printer'
|
data/lib/httpdisk/cache.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'fileutils'
|
2
|
+
require 'tempfile'
|
2
3
|
|
3
4
|
module HTTPDisk
|
4
5
|
# Disk cache for cache_keys => response. Files are compressed.
|
@@ -7,24 +8,9 @@ module HTTPDisk
|
|
7
8
|
|
8
9
|
def initialize(options)
|
9
10
|
@options = options
|
10
|
-
|
11
|
-
# heavy sanity checking on arguments here
|
12
|
-
if !dir.is_a?(String)
|
13
|
-
raise ArgumentError, "expected :dir to be a string, not #{dir.inspect}"
|
14
|
-
end
|
15
|
-
if expires_in && !expires_in.is_a?(Integer)
|
16
|
-
raise ArgumentError, "expected :expires_in to be an integer, not #{expires_in.inspect}"
|
17
|
-
end
|
18
|
-
|
19
|
-
%i[force force_errors].each do
|
20
|
-
value = send(_1)
|
21
|
-
if ![nil, true, false].include?(value)
|
22
|
-
raise ArgumentError, "expected #{_1} to be a boolean, not #{value.inspect}"
|
23
|
-
end
|
24
|
-
end
|
25
11
|
end
|
26
12
|
|
27
|
-
%i[dir
|
13
|
+
%i[dir expires force force_errors].each do |method|
|
28
14
|
define_method(method) do
|
29
15
|
options[method]
|
30
16
|
end
|
@@ -43,14 +29,32 @@ module HTTPDisk
|
|
43
29
|
payload_or_status = read0(cache_key, peek: true)
|
44
30
|
return payload_or_status if payload_or_status.is_a?(Symbol)
|
45
31
|
|
46
|
-
payload_or_status.
|
32
|
+
payload_or_status.error? ? :error : :hit
|
47
33
|
end
|
48
34
|
|
49
35
|
# Write response to the disk cache
|
50
36
|
def write(cache_key, payload)
|
51
37
|
path = diskpath(cache_key)
|
52
38
|
FileUtils.mkdir_p(File.dirname(path))
|
53
|
-
|
39
|
+
|
40
|
+
# Atomically write gzipped payload. Put our underlying Tempfile into
|
41
|
+
# binmode to avoid accidental newline conversion or string encoding. Not
|
42
|
+
# required for *nix systems, but I've heard rumors it's helpful for
|
43
|
+
# Windows.
|
44
|
+
Tempfile.new(binmode: true).tap do |tmp|
|
45
|
+
Zlib::GzipWriter.new(tmp).tap do |gzip|
|
46
|
+
payload.write(gzip)
|
47
|
+
gzip.close
|
48
|
+
end
|
49
|
+
tmp.close
|
50
|
+
FileUtils.mv(tmp.path, path)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
# Delete existing response, if any
|
55
|
+
def delete(cache_key)
|
56
|
+
path = diskpath(cache_key)
|
57
|
+
FileUtils.rm(path) if File.exist?(path)
|
54
58
|
end
|
55
59
|
|
56
60
|
# Relative path for this cache_key based on the cache key
|
@@ -68,15 +72,21 @@ module HTTPDisk
|
|
68
72
|
return :stale if expired?(path)
|
69
73
|
return :force if force?
|
70
74
|
|
71
|
-
|
72
|
-
|
75
|
+
begin
|
76
|
+
payload = Zlib::GzipReader.open(path, encoding: 'ASCII-8BIT') do
|
77
|
+
Payload.read(_1, peek: peek)
|
78
|
+
end
|
79
|
+
rescue StandardError => e
|
80
|
+
raise "#{path}: #{e}"
|
81
|
+
end
|
82
|
+
return :force if force_errors? && payload.error?
|
73
83
|
|
74
84
|
payload
|
75
85
|
end
|
76
86
|
|
77
87
|
# Is this path expired?
|
78
88
|
def expired?(path)
|
79
|
-
|
89
|
+
expires && File.stat(path).mtime < Time.now - expires
|
80
90
|
end
|
81
91
|
end
|
82
92
|
end
|