httpdisk 0.2.0 → 0.5.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +9 -0
- data/Gemfile.lock +16 -4
- data/README.md +32 -6
- data/Rakefile +14 -11
- data/bin/httpdisk +9 -7
- data/bin/httpdisk-grep +46 -0
- data/httpdisk.gemspec +1 -0
- data/lib/httpdisk.rb +10 -5
- data/lib/httpdisk/cache.rb +31 -21
- data/lib/httpdisk/cache_key.rb +15 -6
- data/lib/httpdisk/cli/args.rb +57 -0
- data/lib/httpdisk/cli/main.rb +169 -0
- data/lib/httpdisk/client.rb +82 -19
- data/lib/httpdisk/error.rb +4 -0
- data/lib/httpdisk/grep/args.rb +35 -0
- data/lib/httpdisk/grep/main.rb +112 -0
- data/lib/httpdisk/grep/printer.rb +99 -0
- data/lib/httpdisk/payload.rb +7 -5
- data/lib/httpdisk/slop_duration.rb +24 -0
- data/lib/httpdisk/sloptions.rb +105 -0
- data/lib/httpdisk/version.rb +1 -1
- metadata +25 -4
- data/lib/httpdisk/cli.rb +0 -223
- data/lib/httpdisk/cli_slop.rb +0 -54
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 534d055da653551f2dbf23db1c323c6858fcaa0bab09354491c175bffadf6d54
|
4
|
+
data.tar.gz: 825a6779fec3c5ce2828363e9022da231017ae2d13aa8fa88acff9692d8cd7fe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 750eaaca6bba23b1a8af351d916c79e470928c23cb3f6517662d6e99a7278c963cfe9dbbde564282d1367dd0f6bb34c166066b2babdb29d1c29f398fe9498245
|
7
|
+
data.tar.gz: 951f2f668018872bd4a970be506e4fc4c9dce302b4fa03e310eb0269dba428216bdd23a4da1a6fa86ad75f2e75b5d3fbe9988e1018cd328c6bbf6eb790d3406c
|
data/.rubocop.yml
CHANGED
@@ -2,18 +2,27 @@ AllCops:
|
|
2
2
|
NewCops: enable
|
3
3
|
SuggestExtensions: false
|
4
4
|
|
5
|
+
# this is buggy in 2.7.0
|
6
|
+
Style/HashTransformValues: { Enabled: false }
|
7
|
+
|
5
8
|
# minimal personal preference
|
6
9
|
Layout/CaseIndentation: { Enabled: false }
|
7
10
|
Layout/EndAlignment: { EnforcedStyleAlignWith: variable }
|
8
11
|
Lint/AssignmentInCondition: { Enabled: false }
|
12
|
+
Lint/NonLocalExitFromIterator: { Enabled: false }
|
9
13
|
Metrics: { Enabled: false }
|
10
14
|
Naming/MethodParameterName: { Enabled: false }
|
11
15
|
Naming/VariableNumber: { Enabled: false }
|
12
16
|
Style/Documentation: { Enabled: false }
|
17
|
+
Style/DoubleNegation: { Enabled: false }
|
18
|
+
Style/EmptyCaseCondition: { Enabled: false }
|
13
19
|
Style/FrozenStringLiteralComment: { Enabled: false }
|
20
|
+
Style/GuardClause: { Enabled: false }
|
14
21
|
Style/IfUnlessModifier: { Enabled: false }
|
15
22
|
Style/NegatedIf: { Enabled: false }
|
23
|
+
Style/NumericPredicate: { Enabled: false }
|
16
24
|
Style/ParallelAssignment: { Enabled: false }
|
25
|
+
Style/SoleNestedConditional: { Enabled: false }
|
17
26
|
Style/StderrPuts: { Enabled: false }
|
18
27
|
Style/TrailingCommaInArrayLiteral: { EnforcedStyleForMultiline: consistent_comma }
|
19
28
|
Style/TrailingCommaInHashLiteral: { EnforcedStyleForMultiline: consistent_comma }
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
httpdisk (0.2
|
4
|
+
httpdisk (0.5.2)
|
5
|
+
content-type (~> 0.0)
|
5
6
|
faraday (~> 1.4)
|
6
7
|
faraday-cookie_jar (~> 0.0)
|
7
8
|
faraday_middleware (~> 1.0)
|
@@ -14,26 +15,36 @@ GEM
|
|
14
15
|
public_suffix (>= 2.0.2, < 5.0)
|
15
16
|
ast (2.4.2)
|
16
17
|
coderay (1.1.3)
|
18
|
+
content-type (0.0.1)
|
19
|
+
parslet (~> 1.5)
|
17
20
|
crack (0.4.5)
|
18
21
|
rexml
|
19
22
|
domain_name (0.5.20190701)
|
20
23
|
unf (>= 0.0.5, < 1.0.0)
|
21
|
-
faraday (1.
|
24
|
+
faraday (1.5.0)
|
25
|
+
faraday-em_http (~> 1.0)
|
26
|
+
faraday-em_synchrony (~> 1.0)
|
22
27
|
faraday-excon (~> 1.1)
|
28
|
+
faraday-httpclient (~> 1.0.1)
|
23
29
|
faraday-net_http (~> 1.0)
|
24
30
|
faraday-net_http_persistent (~> 1.1)
|
31
|
+
faraday-patron (~> 1.0)
|
25
32
|
multipart-post (>= 1.2, < 3)
|
26
33
|
ruby2_keywords (>= 0.0.4)
|
27
34
|
faraday-cookie_jar (0.0.7)
|
28
35
|
faraday (>= 0.8.0)
|
29
36
|
http-cookie (~> 1.0.0)
|
37
|
+
faraday-em_http (1.0.0)
|
38
|
+
faraday-em_synchrony (1.0.0)
|
30
39
|
faraday-excon (1.1.0)
|
40
|
+
faraday-httpclient (1.0.1)
|
31
41
|
faraday-net_http (1.0.1)
|
32
42
|
faraday-net_http_persistent (1.1.0)
|
43
|
+
faraday-patron (1.0.0)
|
33
44
|
faraday_middleware (1.0.0)
|
34
45
|
faraday (~> 1.0)
|
35
46
|
hashdiff (1.0.1)
|
36
|
-
http-cookie (1.0.
|
47
|
+
http-cookie (1.0.4)
|
37
48
|
domain_name (~> 0.5)
|
38
49
|
method_source (1.0.0)
|
39
50
|
minitest (5.14.4)
|
@@ -42,6 +53,7 @@ GEM
|
|
42
53
|
parallel (1.20.1)
|
43
54
|
parser (3.0.1.1)
|
44
55
|
ast (~> 2.4.1)
|
56
|
+
parslet (1.8.2)
|
45
57
|
pry (0.13.1)
|
46
58
|
coderay (~> 1.1)
|
47
59
|
method_source (~> 1.0)
|
@@ -63,7 +75,7 @@ GEM
|
|
63
75
|
parser (>= 3.0.1.1)
|
64
76
|
ruby-progressbar (1.11.0)
|
65
77
|
ruby2_keywords (0.0.4)
|
66
|
-
slop (4.
|
78
|
+
slop (4.9.1)
|
67
79
|
unf (0.1.4)
|
68
80
|
unf_ext
|
69
81
|
unf_ext (0.0.7.7)
|
data/README.md
CHANGED
@@ -65,7 +65,6 @@ faraday = Faraday.new do
|
|
65
65
|
_1.request :url_encoded # auto-encode form bodies
|
66
66
|
_1.response :json # auto-decode JSON responses
|
67
67
|
_1.response :follow_redirects # follow redirects (should be above httpdisk)
|
68
|
-
_1.response :encoding # set Ruby string encoding based on Content-Type (should be above httpdisk)
|
69
68
|
_1.use :httpdisk
|
70
69
|
_1.request :retry # retry failed responses (should be below httpdisk)
|
71
70
|
end
|
@@ -120,21 +119,27 @@ httpdisk caches all responses. POST responses are cached, along with 500 respons
|
|
120
119
|
|
121
120
|
In general, if you make a request it will be cached regardless of the outcome.
|
122
121
|
|
122
|
+
## String Encoding
|
123
|
+
|
124
|
+
httpdisk will honor the `Content-Type` from responses. Unfortunately, it is entirely possible to get invalid bodies if the `Content-Type` doesn't match the bytes. This is a major bummer, so httpdisk provides a `utf8:` option that forces text response bodies to UTF-8.
|
125
|
+
|
123
126
|
## Configuration
|
124
127
|
|
125
128
|
httpdisk supports a few options:
|
126
129
|
|
127
130
|
- `dir:` location for disk cache, defaults to `~/httpdisk`
|
128
|
-
- `
|
131
|
+
- `expires:` when to expire cached requests, default is nil (never expire)
|
129
132
|
- `force:` don't read anything from cache (but still write)
|
130
133
|
- `force_errors:` don't read errors from cache (but still write)
|
134
|
+
- `ignore_params:` array of query params to ignore when calculating cache_key
|
131
135
|
- `logger`: log requests to stderr, or pass your own logger
|
136
|
+
- `utf8`: if true, force text response bodies to valid UTF-8
|
132
137
|
|
133
138
|
Pass these in when setting up Faraday:
|
134
139
|
|
135
140
|
```ruby
|
136
141
|
faraday = Faraday.new do
|
137
|
-
_1.use :httpdisk,
|
142
|
+
_1.use :httpdisk, expires: 7*24*60*60, force: true
|
138
143
|
end
|
139
144
|
```
|
140
145
|
|
@@ -162,10 +167,13 @@ Specific to httpdisk:
|
|
162
167
|
--force don't read anything from cache (but still write)
|
163
168
|
--force-errors don't read errors from cache (but still write)
|
164
169
|
--status show status for a url in the cache
|
165
|
-
--version show version
|
166
|
-
--help show this help
|
167
170
|
```
|
168
171
|
|
172
|
+
## Goodies: httpdisk-grep
|
173
|
+
|
174
|
+
The `httpdisk-grep` command makes it easy to search your cache directory.
|
175
|
+
It can be challenging to use grep/ripgrep because cache files are compressed and JSON bodies often lack newlines. httpdisk-grep is the right tool for the job. See `httpdisk-grep --help`.
|
176
|
+
|
169
177
|
## Limitations & Gotchas
|
170
178
|
|
171
179
|
- Transient errors are cached. This is appropriate for many uses cases (like crawling) but can be confusing. Use `httpdisk --status` to debug.
|
@@ -176,10 +184,28 @@ Specific to httpdisk:
|
|
176
184
|
|
177
185
|
## Changelog
|
178
186
|
|
187
|
+
#### 0.5
|
188
|
+
|
189
|
+
- honor Content-Type
|
190
|
+
- added `:utf8` option to force text-like response bodies to UTF-8
|
191
|
+
|
192
|
+
#### 0.4
|
193
|
+
|
194
|
+
- added httpdisk-grep for searching cache files
|
195
|
+
- added HTTPDisk::Cache#delete
|
196
|
+
- rename `:expires_in` to `:expires`
|
197
|
+
|
198
|
+
#### 0.3
|
199
|
+
|
200
|
+
- added :ignore_params, for ignoring query params when generating cache keys
|
201
|
+
- HTTP 40x & 50x responses return :error status (and respond to `force_error`)
|
202
|
+
|
179
203
|
#### 0.2 - May 2020
|
204
|
+
|
180
205
|
- added `response.env[:httpdisk]`, which will be true if the response came from the cache
|
181
|
-
- `:logger` option
|
206
|
+
- added `:logger` option
|
182
207
|
- rake rubocop
|
183
208
|
|
184
209
|
#### 0.1 - April 2020
|
210
|
+
|
185
211
|
- Original release
|
data/Rakefile
CHANGED
@@ -10,12 +10,15 @@ spec = Gem::Specification.load('httpdisk.gemspec')
|
|
10
10
|
#
|
11
11
|
|
12
12
|
# test (default)
|
13
|
-
Rake::TestTask.new
|
13
|
+
Rake::TestTask.new do
|
14
|
+
_1.libs << 'test'
|
15
|
+
_1.warning = false # https://github.com/lostisland/faraday/issues/1285
|
16
|
+
end
|
14
17
|
task default: :test
|
15
18
|
|
16
|
-
# Watch files, run tests whenever something changes
|
19
|
+
# Watch rb files, run tests whenever something changes
|
17
20
|
task :watch do
|
18
|
-
|
21
|
+
sh "find . -name '*.rb' | entr -c rake"
|
19
22
|
end
|
20
23
|
|
21
24
|
#
|
@@ -23,7 +26,7 @@ end
|
|
23
26
|
#
|
24
27
|
|
25
28
|
task :pry do
|
26
|
-
|
29
|
+
sh 'pry -I lib -r httpdisk.rb'
|
27
30
|
end
|
28
31
|
|
29
32
|
#
|
@@ -31,7 +34,7 @@ end
|
|
31
34
|
#
|
32
35
|
|
33
36
|
task :rubocop do
|
34
|
-
|
37
|
+
sh 'bundle exec rubocop -A .'
|
35
38
|
end
|
36
39
|
|
37
40
|
#
|
@@ -39,17 +42,17 @@ end
|
|
39
42
|
#
|
40
43
|
|
41
44
|
task :build do
|
42
|
-
|
45
|
+
sh 'gem build --quiet httpdisk.gemspec'
|
43
46
|
end
|
44
47
|
|
45
48
|
task install: :build do
|
46
|
-
|
49
|
+
sh "gem install --quiet httpdisk-#{spec.version}.gem"
|
47
50
|
end
|
48
51
|
|
49
|
-
task release: %i[test build] do
|
52
|
+
task release: %i[rubocop test build] do
|
50
53
|
raise "looks like git isn't clean" unless `git status --porcelain`.empty?
|
51
54
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
+
sh "git tag -a #{spec.version} -m 'Tagging #{spec.version}'"
|
56
|
+
sh 'git push --tags'
|
57
|
+
sh "gem push httpdisk-#{spec.version}.gem"
|
55
58
|
end
|
data/bin/httpdisk
CHANGED
@@ -6,20 +6,22 @@
|
|
6
6
|
|
7
7
|
$LOAD_PATH.unshift(File.join(__dir__, '../lib'))
|
8
8
|
|
9
|
+
BIN = File.basename($PROGRAM_NAME)
|
10
|
+
|
9
11
|
def puts_error(s)
|
10
|
-
$stderr.puts "
|
12
|
+
$stderr.puts "#{BIN}: #{s}"
|
11
13
|
end
|
12
14
|
|
13
15
|
#
|
14
16
|
# Load the bare minimum and parse args with slop. We do this separately for speed.
|
15
17
|
#
|
16
18
|
|
17
|
-
require 'httpdisk/
|
19
|
+
require 'httpdisk/cli/args'
|
18
20
|
begin
|
19
|
-
slop = HTTPDisk::
|
21
|
+
slop = HTTPDisk::Cli::Args.slop(ARGV)
|
20
22
|
rescue Slop::Error => e
|
21
23
|
puts_error(e) if e.message != ''
|
22
|
-
puts_error("try '
|
24
|
+
puts_error("try '#{BIN} --help' for more information")
|
23
25
|
exit 1
|
24
26
|
end
|
25
27
|
|
@@ -28,11 +30,11 @@ end
|
|
28
30
|
#
|
29
31
|
|
30
32
|
require 'httpdisk'
|
31
|
-
|
33
|
+
main = HTTPDisk::Cli::Main.new(slop)
|
32
34
|
begin
|
33
|
-
|
35
|
+
main.run
|
34
36
|
rescue StandardError => e
|
35
|
-
puts_error(e) if !
|
37
|
+
puts_error(e) if !main.options[:silent]
|
36
38
|
if ENV['HTTPDISK_DEBUG']
|
37
39
|
$stderr.puts
|
38
40
|
$stderr.puts e.backtrace.join("\n")
|
data/bin/httpdisk-grep
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
#
|
4
|
+
# Search an HTTPDisk cache, similar to grep.
|
5
|
+
#
|
6
|
+
|
7
|
+
$LOAD_PATH.unshift(File.join(__dir__, '../lib'))
|
8
|
+
|
9
|
+
BIN = File.basename($PROGRAM_NAME)
|
10
|
+
|
11
|
+
def puts_error(s)
|
12
|
+
$stderr.puts "#{BIN}: #{s}"
|
13
|
+
end
|
14
|
+
|
15
|
+
#
|
16
|
+
# Load the bare minimum and parse args with slop. We do this separately for speed.
|
17
|
+
#
|
18
|
+
|
19
|
+
require 'httpdisk/grep/args'
|
20
|
+
begin
|
21
|
+
slop = HTTPDisk::Grep::Args.slop(ARGV)
|
22
|
+
rescue Slop::Error => e
|
23
|
+
puts_error(e) if e.message != ''
|
24
|
+
puts_error("try '#{BIN} --help' for more information")
|
25
|
+
exit 1
|
26
|
+
end
|
27
|
+
|
28
|
+
#
|
29
|
+
# now load everything and run
|
30
|
+
#
|
31
|
+
|
32
|
+
require 'httpdisk'
|
33
|
+
|
34
|
+
main = HTTPDisk::Grep::Main.new(slop)
|
35
|
+
begin
|
36
|
+
success = main.run
|
37
|
+
exit 1 if !success
|
38
|
+
rescue StandardError => e
|
39
|
+
puts_error(e)
|
40
|
+
if ENV['HTTPDISK_DEBUG']
|
41
|
+
$stderr.puts
|
42
|
+
$stderr.puts e.class
|
43
|
+
$stderr.puts e.backtrace.join("\n")
|
44
|
+
end
|
45
|
+
exit 2
|
46
|
+
end
|
data/httpdisk.gemspec
CHANGED
@@ -21,6 +21,7 @@ Gem::Specification.new do |s|
|
|
21
21
|
s.require_paths = ['lib']
|
22
22
|
|
23
23
|
# gem dependencies
|
24
|
+
s.add_dependency 'content-type', '~> 0.0'
|
24
25
|
s.add_dependency 'faraday', '~> 1.4'
|
25
26
|
s.add_dependency 'faraday-cookie_jar', '~> 0.0'
|
26
27
|
s.add_dependency 'faraday_middleware', '~> 1.0'
|
data/lib/httpdisk.rb
CHANGED
@@ -1,12 +1,17 @@
|
|
1
1
|
require 'httpdisk/cache_key'
|
2
2
|
require 'httpdisk/cache'
|
3
|
-
require 'httpdisk/cli_slop'
|
4
|
-
require 'httpdisk/cli'
|
5
3
|
require 'httpdisk/client'
|
6
4
|
require 'httpdisk/error'
|
7
5
|
require 'httpdisk/payload'
|
6
|
+
require 'httpdisk/slop_duration'
|
7
|
+
require 'httpdisk/sloptions'
|
8
8
|
require 'httpdisk/version'
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
10
|
+
# cli
|
11
|
+
require 'httpdisk/cli/args'
|
12
|
+
require 'httpdisk/cli/main'
|
13
|
+
|
14
|
+
# grep
|
15
|
+
require 'httpdisk/grep/args'
|
16
|
+
require 'httpdisk/grep/main'
|
17
|
+
require 'httpdisk/grep/printer'
|
data/lib/httpdisk/cache.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'fileutils'
|
2
|
+
require 'tempfile'
|
2
3
|
|
3
4
|
module HTTPDisk
|
4
5
|
# Disk cache for cache_keys => response. Files are compressed.
|
@@ -7,24 +8,9 @@ module HTTPDisk
|
|
7
8
|
|
8
9
|
def initialize(options)
|
9
10
|
@options = options
|
10
|
-
|
11
|
-
# heavy sanity checking on arguments here
|
12
|
-
if !dir.is_a?(String)
|
13
|
-
raise ArgumentError, "expected :dir to be a string, not #{dir.inspect}"
|
14
|
-
end
|
15
|
-
if expires_in && !expires_in.is_a?(Integer)
|
16
|
-
raise ArgumentError, "expected :expires_in to be an integer, not #{expires_in.inspect}"
|
17
|
-
end
|
18
|
-
|
19
|
-
%i[force force_errors].each do
|
20
|
-
value = send(_1)
|
21
|
-
if ![nil, true, false].include?(value)
|
22
|
-
raise ArgumentError, "expected #{_1} to be a boolean, not #{value.inspect}"
|
23
|
-
end
|
24
|
-
end
|
25
11
|
end
|
26
12
|
|
27
|
-
%i[dir
|
13
|
+
%i[dir expires force force_errors].each do |method|
|
28
14
|
define_method(method) do
|
29
15
|
options[method]
|
30
16
|
end
|
@@ -43,14 +29,32 @@ module HTTPDisk
|
|
43
29
|
payload_or_status = read0(cache_key, peek: true)
|
44
30
|
return payload_or_status if payload_or_status.is_a?(Symbol)
|
45
31
|
|
46
|
-
payload_or_status.
|
32
|
+
payload_or_status.error? ? :error : :hit
|
47
33
|
end
|
48
34
|
|
49
35
|
# Write response to the disk cache
|
50
36
|
def write(cache_key, payload)
|
51
37
|
path = diskpath(cache_key)
|
52
38
|
FileUtils.mkdir_p(File.dirname(path))
|
53
|
-
|
39
|
+
|
40
|
+
# Atomically write gzipped payload. Put our underlying Tempfile into
|
41
|
+
# binmode to avoid accidental newline conversion or string encoding. Not
|
42
|
+
# required for *nix systems, but I've heard rumors it's helpful for
|
43
|
+
# Windows.
|
44
|
+
Tempfile.new(binmode: true).tap do |tmp|
|
45
|
+
Zlib::GzipWriter.new(tmp).tap do |gzip|
|
46
|
+
payload.write(gzip)
|
47
|
+
gzip.close
|
48
|
+
end
|
49
|
+
tmp.close
|
50
|
+
FileUtils.mv(tmp.path, path)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
# Delete existing response, if any
|
55
|
+
def delete(cache_key)
|
56
|
+
path = diskpath(cache_key)
|
57
|
+
FileUtils.rm(path) if File.exist?(path)
|
54
58
|
end
|
55
59
|
|
56
60
|
# Relative path for this cache_key based on the cache key
|
@@ -68,15 +72,21 @@ module HTTPDisk
|
|
68
72
|
return :stale if expired?(path)
|
69
73
|
return :force if force?
|
70
74
|
|
71
|
-
|
72
|
-
|
75
|
+
begin
|
76
|
+
payload = Zlib::GzipReader.open(path, encoding: 'ASCII-8BIT') do
|
77
|
+
Payload.read(_1, peek: peek)
|
78
|
+
end
|
79
|
+
rescue StandardError => e
|
80
|
+
raise "#{path}: #{e}"
|
81
|
+
end
|
82
|
+
return :force if force_errors? && payload.error?
|
73
83
|
|
74
84
|
payload
|
75
85
|
end
|
76
86
|
|
77
87
|
# Is this path expired?
|
78
88
|
def expired?(path)
|
79
|
-
|
89
|
+
expires && File.stat(path).mtime < Time.now - expires
|
80
90
|
end
|
81
91
|
end
|
82
92
|
end
|