httpdisk 0.2.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d311812fa4d8d034c9eda6b9b18df36e21eb14ebc9d632cd1fe071268ca77786
4
- data.tar.gz: 90e891451c1805d6d8ba5ad268bc2682528954a2a8acd37546f69ab43d493311
3
+ metadata.gz: 534d055da653551f2dbf23db1c323c6858fcaa0bab09354491c175bffadf6d54
4
+ data.tar.gz: 825a6779fec3c5ce2828363e9022da231017ae2d13aa8fa88acff9692d8cd7fe
5
5
  SHA512:
6
- metadata.gz: c61e162e26d8a7b86fe00165e095d72f99415e8e5a4245513758731a009186999d5cf171db45c8b83115dae8c376e8391aa0bfbd00d3a2fd086c0451f77269ad
7
- data.tar.gz: 1ad5bdb0a51f2b84822a6164379a724a7b03338b30f9c427b956bbe4263b0326720aea3bcb02b1fa37110a8aa5adc24dc8f3f257733b11667df913b863da0d52
6
+ metadata.gz: 750eaaca6bba23b1a8af351d916c79e470928c23cb3f6517662d6e99a7278c963cfe9dbbde564282d1367dd0f6bb34c166066b2babdb29d1c29f398fe9498245
7
+ data.tar.gz: 951f2f668018872bd4a970be506e4fc4c9dce302b4fa03e310eb0269dba428216bdd23a4da1a6fa86ad75f2e75b5d3fbe9988e1018cd328c6bbf6eb790d3406c
data/.rubocop.yml CHANGED
@@ -2,18 +2,27 @@ AllCops:
2
2
  NewCops: enable
3
3
  SuggestExtensions: false
4
4
 
5
+ # this is buggy in 2.7.0
6
+ Style/HashTransformValues: { Enabled: false }
7
+
5
8
  # minimal personal preference
6
9
  Layout/CaseIndentation: { Enabled: false }
7
10
  Layout/EndAlignment: { EnforcedStyleAlignWith: variable }
8
11
  Lint/AssignmentInCondition: { Enabled: false }
12
+ Lint/NonLocalExitFromIterator: { Enabled: false }
9
13
  Metrics: { Enabled: false }
10
14
  Naming/MethodParameterName: { Enabled: false }
11
15
  Naming/VariableNumber: { Enabled: false }
12
16
  Style/Documentation: { Enabled: false }
17
+ Style/DoubleNegation: { Enabled: false }
18
+ Style/EmptyCaseCondition: { Enabled: false }
13
19
  Style/FrozenStringLiteralComment: { Enabled: false }
20
+ Style/GuardClause: { Enabled: false }
14
21
  Style/IfUnlessModifier: { Enabled: false }
15
22
  Style/NegatedIf: { Enabled: false }
23
+ Style/NumericPredicate: { Enabled: false }
16
24
  Style/ParallelAssignment: { Enabled: false }
25
+ Style/SoleNestedConditional: { Enabled: false }
17
26
  Style/StderrPuts: { Enabled: false }
18
27
  Style/TrailingCommaInArrayLiteral: { EnforcedStyleForMultiline: consistent_comma }
19
28
  Style/TrailingCommaInHashLiteral: { EnforcedStyleForMultiline: consistent_comma }
data/Gemfile.lock CHANGED
@@ -1,7 +1,8 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- httpdisk (0.2.0)
4
+ httpdisk (0.5.2)
5
+ content-type (~> 0.0)
5
6
  faraday (~> 1.4)
6
7
  faraday-cookie_jar (~> 0.0)
7
8
  faraday_middleware (~> 1.0)
@@ -14,26 +15,36 @@ GEM
14
15
  public_suffix (>= 2.0.2, < 5.0)
15
16
  ast (2.4.2)
16
17
  coderay (1.1.3)
18
+ content-type (0.0.1)
19
+ parslet (~> 1.5)
17
20
  crack (0.4.5)
18
21
  rexml
19
22
  domain_name (0.5.20190701)
20
23
  unf (>= 0.0.5, < 1.0.0)
21
- faraday (1.4.1)
24
+ faraday (1.5.0)
25
+ faraday-em_http (~> 1.0)
26
+ faraday-em_synchrony (~> 1.0)
22
27
  faraday-excon (~> 1.1)
28
+ faraday-httpclient (~> 1.0.1)
23
29
  faraday-net_http (~> 1.0)
24
30
  faraday-net_http_persistent (~> 1.1)
31
+ faraday-patron (~> 1.0)
25
32
  multipart-post (>= 1.2, < 3)
26
33
  ruby2_keywords (>= 0.0.4)
27
34
  faraday-cookie_jar (0.0.7)
28
35
  faraday (>= 0.8.0)
29
36
  http-cookie (~> 1.0.0)
37
+ faraday-em_http (1.0.0)
38
+ faraday-em_synchrony (1.0.0)
30
39
  faraday-excon (1.1.0)
40
+ faraday-httpclient (1.0.1)
31
41
  faraday-net_http (1.0.1)
32
42
  faraday-net_http_persistent (1.1.0)
43
+ faraday-patron (1.0.0)
33
44
  faraday_middleware (1.0.0)
34
45
  faraday (~> 1.0)
35
46
  hashdiff (1.0.1)
36
- http-cookie (1.0.3)
47
+ http-cookie (1.0.4)
37
48
  domain_name (~> 0.5)
38
49
  method_source (1.0.0)
39
50
  minitest (5.14.4)
@@ -42,6 +53,7 @@ GEM
42
53
  parallel (1.20.1)
43
54
  parser (3.0.1.1)
44
55
  ast (~> 2.4.1)
56
+ parslet (1.8.2)
45
57
  pry (0.13.1)
46
58
  coderay (~> 1.1)
47
59
  method_source (~> 1.0)
@@ -63,7 +75,7 @@ GEM
63
75
  parser (>= 3.0.1.1)
64
76
  ruby-progressbar (1.11.0)
65
77
  ruby2_keywords (0.0.4)
66
- slop (4.8.2)
78
+ slop (4.9.1)
67
79
  unf (0.1.4)
68
80
  unf_ext
69
81
  unf_ext (0.0.7.7)
data/README.md CHANGED
@@ -65,7 +65,6 @@ faraday = Faraday.new do
65
65
  _1.request :url_encoded # auto-encode form bodies
66
66
  _1.response :json # auto-decode JSON responses
67
67
  _1.response :follow_redirects # follow redirects (should be above httpdisk)
68
- _1.response :encoding # set Ruby string encoding based on Content-Type (should be above httpdisk)
69
68
  _1.use :httpdisk
70
69
  _1.request :retry # retry failed responses (should be below httpdisk)
71
70
  end
@@ -120,21 +119,27 @@ httpdisk caches all responses. POST responses are cached, along with 500 respons
120
119
 
121
120
  In general, if you make a request it will be cached regardless of the outcome.
122
121
 
122
+ ## String Encoding
123
+
124
+ httpdisk will honor the `Content-Type` from responses. Unfortunately, it is entirely possible to get invalid bodies if the `Content-Type` doesn't match the bytes. This is a major bummer, so httpdisk provides a `utf8:` option that forces text response bodies to UTF-8.
125
+
123
126
  ## Configuration
124
127
 
125
128
  httpdisk supports a few options:
126
129
 
127
130
  - `dir:` location for disk cache, defaults to `~/httpdisk`
128
- - `expires_in:` when to expire cached requests, default is nil (never expire)
131
+ - `expires:` when to expire cached requests, default is nil (never expire)
129
132
  - `force:` don't read anything from cache (but still write)
130
133
  - `force_errors:` don't read errors from cache (but still write)
134
+ - `ignore_params:` array of query params to ignore when calculating cache_key
131
135
  - `logger`: log requests to stderr, or pass your own logger
136
+ - `utf8`: if true, force text response bodies to valid UTF-8
132
137
 
133
138
  Pass these in when setting up Faraday:
134
139
 
135
140
  ```ruby
136
141
  faraday = Faraday.new do
137
- _1.use :httpdisk, expires_in: 7*24*60*60, force: true
142
+ _1.use :httpdisk, expires: 7*24*60*60, force: true
138
143
  end
139
144
  ```
140
145
 
@@ -162,10 +167,13 @@ Specific to httpdisk:
162
167
  --force don't read anything from cache (but still write)
163
168
  --force-errors don't read errors from cache (but still write)
164
169
  --status show status for a url in the cache
165
- --version show version
166
- --help show this help
167
170
  ```
168
171
 
172
+ ## Goodies: httpdisk-grep
173
+
174
+ The `httpdisk-grep` command makes it easy to search your cache directory.
175
+ It can be challenging to use grep/ripgrep because cache files are compressed and JSON bodies often lack newlines. httpdisk-grep is the right tool for the job. See `httpdisk-grep --help`.
176
+
169
177
  ## Limitations & Gotchas
170
178
 
171
179
  - Transient errors are cached. This is appropriate for many uses cases (like crawling) but can be confusing. Use `httpdisk --status` to debug.
@@ -176,10 +184,28 @@ Specific to httpdisk:
176
184
 
177
185
  ## Changelog
178
186
 
187
+ #### 0.5
188
+
189
+ - honor Content-Type
190
+ - added `:utf8` option to force text-like response bodies to UTF-8
191
+
192
+ #### 0.4
193
+
194
+ - added httpdisk-grep for searching cache files
195
+ - added HTTPDisk::Cache#delete
196
+ - rename `:expires_in` to `:expires`
197
+
198
+ #### 0.3
199
+
200
+ - added :ignore_params, for ignoring query params when generating cache keys
201
+ - HTTP 40x & 50x responses return :error status (and respond to `force_error`)
202
+
179
203
  #### 0.2 - May 2020
204
+
180
205
  - added `response.env[:httpdisk]`, which will be true if the response came from the cache
181
- - `:logger` option
206
+ - added `:logger` option
182
207
  - rake rubocop
183
208
 
184
209
  #### 0.1 - April 2020
210
+
185
211
  - Original release
data/Rakefile CHANGED
@@ -10,12 +10,15 @@ spec = Gem::Specification.load('httpdisk.gemspec')
10
10
  #
11
11
 
12
12
  # test (default)
13
- Rake::TestTask.new { _1.libs << 'test' }
13
+ Rake::TestTask.new do
14
+ _1.libs << 'test'
15
+ _1.warning = false # https://github.com/lostisland/faraday/issues/1285
16
+ end
14
17
  task default: :test
15
18
 
16
- # Watch files, run tests whenever something changes
19
+ # Watch rb files, run tests whenever something changes
17
20
  task :watch do
18
- system('find . | entr -c rake test')
21
+ sh "find . -name '*.rb' | entr -c rake"
19
22
  end
20
23
 
21
24
  #
@@ -23,7 +26,7 @@ end
23
26
  #
24
27
 
25
28
  task :pry do
26
- system 'pry -I lib -r httpdisk.rb'
29
+ sh 'pry -I lib -r httpdisk.rb'
27
30
  end
28
31
 
29
32
  #
@@ -31,7 +34,7 @@ end
31
34
  #
32
35
 
33
36
  task :rubocop do
34
- system('bundle exec rubocop -A .', exception: true)
37
+ sh 'bundle exec rubocop -A .'
35
38
  end
36
39
 
37
40
  #
@@ -39,17 +42,17 @@ end
39
42
  #
40
43
 
41
44
  task :build do
42
- system('gem build --quiet httpdisk.gemspec', exception: true)
45
+ sh 'gem build --quiet httpdisk.gemspec'
43
46
  end
44
47
 
45
48
  task install: :build do
46
- system("gem install --quiet httpdisk-#{spec.version}.gem", exception: true)
49
+ sh "gem install --quiet httpdisk-#{spec.version}.gem"
47
50
  end
48
51
 
49
- task release: %i[test build] do
52
+ task release: %i[rubocop test build] do
50
53
  raise "looks like git isn't clean" unless `git status --porcelain`.empty?
51
54
 
52
- system("git tag -a #{spec.version} -m 'Tagging #{spec.version}'", exception: true)
53
- system('git push --tags', exception: true)
54
- system("gem push httpdisk-#{spec.version}.gem", exception: true)
55
+ sh "git tag -a #{spec.version} -m 'Tagging #{spec.version}'"
56
+ sh 'git push --tags'
57
+ sh "gem push httpdisk-#{spec.version}.gem"
55
58
  end
data/bin/httpdisk CHANGED
@@ -6,20 +6,22 @@
6
6
 
7
7
  $LOAD_PATH.unshift(File.join(__dir__, '../lib'))
8
8
 
9
+ BIN = File.basename($PROGRAM_NAME)
10
+
9
11
  def puts_error(s)
10
- $stderr.puts "httpdisk: #{s}"
12
+ $stderr.puts "#{BIN}: #{s}"
11
13
  end
12
14
 
13
15
  #
14
16
  # Load the bare minimum and parse args with slop. We do this separately for speed.
15
17
  #
16
18
 
17
- require 'httpdisk/cli_slop'
19
+ require 'httpdisk/cli/args'
18
20
  begin
19
- slop = HTTPDisk::CliSlop.slop(ARGV)
21
+ slop = HTTPDisk::Cli::Args.slop(ARGV)
20
22
  rescue Slop::Error => e
21
23
  puts_error(e) if e.message != ''
22
- puts_error("try 'httpdisk --help' for more information")
24
+ puts_error("try '#{BIN} --help' for more information")
23
25
  exit 1
24
26
  end
25
27
 
@@ -28,11 +30,11 @@ end
28
30
  #
29
31
 
30
32
  require 'httpdisk'
31
- cli = HTTPDisk::Cli.new(slop)
33
+ main = HTTPDisk::Cli::Main.new(slop)
32
34
  begin
33
- cli.run
35
+ main.run
34
36
  rescue StandardError => e
35
- puts_error(e) if !cli.options[:silent]
37
+ puts_error(e) if !main.options[:silent]
36
38
  if ENV['HTTPDISK_DEBUG']
37
39
  $stderr.puts
38
40
  $stderr.puts e.backtrace.join("\n")
data/bin/httpdisk-grep ADDED
@@ -0,0 +1,46 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ #
4
+ # Search an HTTPDisk cache, similar to grep.
5
+ #
6
+
7
+ $LOAD_PATH.unshift(File.join(__dir__, '../lib'))
8
+
9
+ BIN = File.basename($PROGRAM_NAME)
10
+
11
+ def puts_error(s)
12
+ $stderr.puts "#{BIN}: #{s}"
13
+ end
14
+
15
+ #
16
+ # Load the bare minimum and parse args with slop. We do this separately for speed.
17
+ #
18
+
19
+ require 'httpdisk/grep/args'
20
+ begin
21
+ slop = HTTPDisk::Grep::Args.slop(ARGV)
22
+ rescue Slop::Error => e
23
+ puts_error(e) if e.message != ''
24
+ puts_error("try '#{BIN} --help' for more information")
25
+ exit 1
26
+ end
27
+
28
+ #
29
+ # now load everything and run
30
+ #
31
+
32
+ require 'httpdisk'
33
+
34
+ main = HTTPDisk::Grep::Main.new(slop)
35
+ begin
36
+ success = main.run
37
+ exit 1 if !success
38
+ rescue StandardError => e
39
+ puts_error(e)
40
+ if ENV['HTTPDISK_DEBUG']
41
+ $stderr.puts
42
+ $stderr.puts e.class
43
+ $stderr.puts e.backtrace.join("\n")
44
+ end
45
+ exit 2
46
+ end
data/httpdisk.gemspec CHANGED
@@ -21,6 +21,7 @@ Gem::Specification.new do |s|
21
21
  s.require_paths = ['lib']
22
22
 
23
23
  # gem dependencies
24
+ s.add_dependency 'content-type', '~> 0.0'
24
25
  s.add_dependency 'faraday', '~> 1.4'
25
26
  s.add_dependency 'faraday-cookie_jar', '~> 0.0'
26
27
  s.add_dependency 'faraday_middleware', '~> 1.0'
data/lib/httpdisk.rb CHANGED
@@ -1,12 +1,17 @@
1
1
  require 'httpdisk/cache_key'
2
2
  require 'httpdisk/cache'
3
- require 'httpdisk/cli_slop'
4
- require 'httpdisk/cli'
5
3
  require 'httpdisk/client'
6
4
  require 'httpdisk/error'
7
5
  require 'httpdisk/payload'
6
+ require 'httpdisk/slop_duration'
7
+ require 'httpdisk/sloptions'
8
8
  require 'httpdisk/version'
9
9
 
10
- module HTTPDisk
11
- ERROR_STATUS = 999
12
- end
10
+ # cli
11
+ require 'httpdisk/cli/args'
12
+ require 'httpdisk/cli/main'
13
+
14
+ # grep
15
+ require 'httpdisk/grep/args'
16
+ require 'httpdisk/grep/main'
17
+ require 'httpdisk/grep/printer'
@@ -1,4 +1,5 @@
1
1
  require 'fileutils'
2
+ require 'tempfile'
2
3
 
3
4
  module HTTPDisk
4
5
  # Disk cache for cache_keys => response. Files are compressed.
@@ -7,24 +8,9 @@ module HTTPDisk
7
8
 
8
9
  def initialize(options)
9
10
  @options = options
10
-
11
- # heavy sanity checking on arguments here
12
- if !dir.is_a?(String)
13
- raise ArgumentError, "expected :dir to be a string, not #{dir.inspect}"
14
- end
15
- if expires_in && !expires_in.is_a?(Integer)
16
- raise ArgumentError, "expected :expires_in to be an integer, not #{expires_in.inspect}"
17
- end
18
-
19
- %i[force force_errors].each do
20
- value = send(_1)
21
- if ![nil, true, false].include?(value)
22
- raise ArgumentError, "expected #{_1} to be a boolean, not #{value.inspect}"
23
- end
24
- end
25
11
  end
26
12
 
27
- %i[dir expires_in force force_errors].each do |method|
13
+ %i[dir expires force force_errors].each do |method|
28
14
  define_method(method) do
29
15
  options[method]
30
16
  end
@@ -43,14 +29,32 @@ module HTTPDisk
43
29
  payload_or_status = read0(cache_key, peek: true)
44
30
  return payload_or_status if payload_or_status.is_a?(Symbol)
45
31
 
46
- payload_or_status.error_999? ? :error : :hit
32
+ payload_or_status.error? ? :error : :hit
47
33
  end
48
34
 
49
35
  # Write response to the disk cache
50
36
  def write(cache_key, payload)
51
37
  path = diskpath(cache_key)
52
38
  FileUtils.mkdir_p(File.dirname(path))
53
- Zlib::GzipWriter.open(path) { payload.write(_1) }
39
+
40
+ # Atomically write gzipped payload. Put our underlying Tempfile into
41
+ # binmode to avoid accidental newline conversion or string encoding. Not
42
+ # required for *nix systems, but I've heard rumors it's helpful for
43
+ # Windows.
44
+ Tempfile.new(binmode: true).tap do |tmp|
45
+ Zlib::GzipWriter.new(tmp).tap do |gzip|
46
+ payload.write(gzip)
47
+ gzip.close
48
+ end
49
+ tmp.close
50
+ FileUtils.mv(tmp.path, path)
51
+ end
52
+ end
53
+
54
+ # Delete existing response, if any
55
+ def delete(cache_key)
56
+ path = diskpath(cache_key)
57
+ FileUtils.rm(path) if File.exist?(path)
54
58
  end
55
59
 
56
60
  # Relative path for this cache_key based on the cache key
@@ -68,15 +72,21 @@ module HTTPDisk
68
72
  return :stale if expired?(path)
69
73
  return :force if force?
70
74
 
71
- payload = Zlib::GzipReader.open(path) { Payload.read(_1, peek: peek) }
72
- return :force if force_errors? && payload.error_999?
75
+ begin
76
+ payload = Zlib::GzipReader.open(path, encoding: 'ASCII-8BIT') do
77
+ Payload.read(_1, peek: peek)
78
+ end
79
+ rescue StandardError => e
80
+ raise "#{path}: #{e}"
81
+ end
82
+ return :force if force_errors? && payload.error?
73
83
 
74
84
  payload
75
85
  end
76
86
 
77
87
  # Is this path expired?
78
88
  def expired?(path)
79
- expires_in && File.stat(path).mtime < Time.now - expires_in
89
+ expires && File.stat(path).mtime < Time.now - expires
80
90
  end
81
91
  end
82
92
  end