httpdisk 0.2.0 → 0.5.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d311812fa4d8d034c9eda6b9b18df36e21eb14ebc9d632cd1fe071268ca77786
4
- data.tar.gz: 90e891451c1805d6d8ba5ad268bc2682528954a2a8acd37546f69ab43d493311
3
+ metadata.gz: 534d055da653551f2dbf23db1c323c6858fcaa0bab09354491c175bffadf6d54
4
+ data.tar.gz: 825a6779fec3c5ce2828363e9022da231017ae2d13aa8fa88acff9692d8cd7fe
5
5
  SHA512:
6
- metadata.gz: c61e162e26d8a7b86fe00165e095d72f99415e8e5a4245513758731a009186999d5cf171db45c8b83115dae8c376e8391aa0bfbd00d3a2fd086c0451f77269ad
7
- data.tar.gz: 1ad5bdb0a51f2b84822a6164379a724a7b03338b30f9c427b956bbe4263b0326720aea3bcb02b1fa37110a8aa5adc24dc8f3f257733b11667df913b863da0d52
6
+ metadata.gz: 750eaaca6bba23b1a8af351d916c79e470928c23cb3f6517662d6e99a7278c963cfe9dbbde564282d1367dd0f6bb34c166066b2babdb29d1c29f398fe9498245
7
+ data.tar.gz: 951f2f668018872bd4a970be506e4fc4c9dce302b4fa03e310eb0269dba428216bdd23a4da1a6fa86ad75f2e75b5d3fbe9988e1018cd328c6bbf6eb790d3406c
data/.rubocop.yml CHANGED
@@ -2,18 +2,27 @@ AllCops:
2
2
  NewCops: enable
3
3
  SuggestExtensions: false
4
4
 
5
+ # this is buggy in 2.7.0
6
+ Style/HashTransformValues: { Enabled: false }
7
+
5
8
  # minimal personal preference
6
9
  Layout/CaseIndentation: { Enabled: false }
7
10
  Layout/EndAlignment: { EnforcedStyleAlignWith: variable }
8
11
  Lint/AssignmentInCondition: { Enabled: false }
12
+ Lint/NonLocalExitFromIterator: { Enabled: false }
9
13
  Metrics: { Enabled: false }
10
14
  Naming/MethodParameterName: { Enabled: false }
11
15
  Naming/VariableNumber: { Enabled: false }
12
16
  Style/Documentation: { Enabled: false }
17
+ Style/DoubleNegation: { Enabled: false }
18
+ Style/EmptyCaseCondition: { Enabled: false }
13
19
  Style/FrozenStringLiteralComment: { Enabled: false }
20
+ Style/GuardClause: { Enabled: false }
14
21
  Style/IfUnlessModifier: { Enabled: false }
15
22
  Style/NegatedIf: { Enabled: false }
23
+ Style/NumericPredicate: { Enabled: false }
16
24
  Style/ParallelAssignment: { Enabled: false }
25
+ Style/SoleNestedConditional: { Enabled: false }
17
26
  Style/StderrPuts: { Enabled: false }
18
27
  Style/TrailingCommaInArrayLiteral: { EnforcedStyleForMultiline: consistent_comma }
19
28
  Style/TrailingCommaInHashLiteral: { EnforcedStyleForMultiline: consistent_comma }
data/Gemfile.lock CHANGED
@@ -1,7 +1,8 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- httpdisk (0.2.0)
4
+ httpdisk (0.5.2)
5
+ content-type (~> 0.0)
5
6
  faraday (~> 1.4)
6
7
  faraday-cookie_jar (~> 0.0)
7
8
  faraday_middleware (~> 1.0)
@@ -14,26 +15,36 @@ GEM
14
15
  public_suffix (>= 2.0.2, < 5.0)
15
16
  ast (2.4.2)
16
17
  coderay (1.1.3)
18
+ content-type (0.0.1)
19
+ parslet (~> 1.5)
17
20
  crack (0.4.5)
18
21
  rexml
19
22
  domain_name (0.5.20190701)
20
23
  unf (>= 0.0.5, < 1.0.0)
21
- faraday (1.4.1)
24
+ faraday (1.5.0)
25
+ faraday-em_http (~> 1.0)
26
+ faraday-em_synchrony (~> 1.0)
22
27
  faraday-excon (~> 1.1)
28
+ faraday-httpclient (~> 1.0.1)
23
29
  faraday-net_http (~> 1.0)
24
30
  faraday-net_http_persistent (~> 1.1)
31
+ faraday-patron (~> 1.0)
25
32
  multipart-post (>= 1.2, < 3)
26
33
  ruby2_keywords (>= 0.0.4)
27
34
  faraday-cookie_jar (0.0.7)
28
35
  faraday (>= 0.8.0)
29
36
  http-cookie (~> 1.0.0)
37
+ faraday-em_http (1.0.0)
38
+ faraday-em_synchrony (1.0.0)
30
39
  faraday-excon (1.1.0)
40
+ faraday-httpclient (1.0.1)
31
41
  faraday-net_http (1.0.1)
32
42
  faraday-net_http_persistent (1.1.0)
43
+ faraday-patron (1.0.0)
33
44
  faraday_middleware (1.0.0)
34
45
  faraday (~> 1.0)
35
46
  hashdiff (1.0.1)
36
- http-cookie (1.0.3)
47
+ http-cookie (1.0.4)
37
48
  domain_name (~> 0.5)
38
49
  method_source (1.0.0)
39
50
  minitest (5.14.4)
@@ -42,6 +53,7 @@ GEM
42
53
  parallel (1.20.1)
43
54
  parser (3.0.1.1)
44
55
  ast (~> 2.4.1)
56
+ parslet (1.8.2)
45
57
  pry (0.13.1)
46
58
  coderay (~> 1.1)
47
59
  method_source (~> 1.0)
@@ -63,7 +75,7 @@ GEM
63
75
  parser (>= 3.0.1.1)
64
76
  ruby-progressbar (1.11.0)
65
77
  ruby2_keywords (0.0.4)
66
- slop (4.8.2)
78
+ slop (4.9.1)
67
79
  unf (0.1.4)
68
80
  unf_ext
69
81
  unf_ext (0.0.7.7)
data/README.md CHANGED
@@ -65,7 +65,6 @@ faraday = Faraday.new do
65
65
  _1.request :url_encoded # auto-encode form bodies
66
66
  _1.response :json # auto-decode JSON responses
67
67
  _1.response :follow_redirects # follow redirects (should be above httpdisk)
68
- _1.response :encoding # set Ruby string encoding based on Content-Type (should be above httpdisk)
69
68
  _1.use :httpdisk
70
69
  _1.request :retry # retry failed responses (should be below httpdisk)
71
70
  end
@@ -120,21 +119,27 @@ httpdisk caches all responses. POST responses are cached, along with 500 respons
120
119
 
121
120
  In general, if you make a request it will be cached regardless of the outcome.
122
121
 
122
+ ## String Encoding
123
+
124
+ httpdisk will honor the `Content-Type` from responses. Unfortunately, it is entirely possible to get invalid bodies if the `Content-Type` doesn't match the bytes. This is a major bummer, so httpdisk provides a `utf8:` option that forces text response bodies to UTF-8.
125
+
123
126
  ## Configuration
124
127
 
125
128
  httpdisk supports a few options:
126
129
 
127
130
  - `dir:` location for disk cache, defaults to `~/httpdisk`
128
- - `expires_in:` when to expire cached requests, default is nil (never expire)
131
+ - `expires:` when to expire cached requests, default is nil (never expire)
129
132
  - `force:` don't read anything from cache (but still write)
130
133
  - `force_errors:` don't read errors from cache (but still write)
134
+ - `ignore_params:` array of query params to ignore when calculating cache_key
131
135
  - `logger`: log requests to stderr, or pass your own logger
136
+ - `utf8`: if true, force text response bodies to valid UTF-8
132
137
 
133
138
  Pass these in when setting up Faraday:
134
139
 
135
140
  ```ruby
136
141
  faraday = Faraday.new do
137
- _1.use :httpdisk, expires_in: 7*24*60*60, force: true
142
+ _1.use :httpdisk, expires: 7*24*60*60, force: true
138
143
  end
139
144
  ```
140
145
 
@@ -162,10 +167,13 @@ Specific to httpdisk:
162
167
  --force don't read anything from cache (but still write)
163
168
  --force-errors don't read errors from cache (but still write)
164
169
  --status show status for a url in the cache
165
- --version show version
166
- --help show this help
167
170
  ```
168
171
 
172
+ ## Goodies: httpdisk-grep
173
+
174
+ The `httpdisk-grep` command makes it easy to search your cache directory.
175
+ It can be challenging to use grep/ripgrep because cache files are compressed and JSON bodies often lack newlines. httpdisk-grep is the right tool for the job. See `httpdisk-grep --help`.
176
+
169
177
  ## Limitations & Gotchas
170
178
 
171
179
  - Transient errors are cached. This is appropriate for many uses cases (like crawling) but can be confusing. Use `httpdisk --status` to debug.
@@ -176,10 +184,28 @@ Specific to httpdisk:
176
184
 
177
185
  ## Changelog
178
186
 
187
+ #### 0.5
188
+
189
+ - honor Content-Type
190
+ - added `:utf8` option to force text-like response bodies to UTF-8
191
+
192
+ #### 0.4
193
+
194
+ - added httpdisk-grep for searching cache files
195
+ - added HTTPDisk::Cache#delete
196
+ - rename `:expires_in` to `:expires`
197
+
198
+ #### 0.3
199
+
200
+ - added :ignore_params, for ignoring query params when generating cache keys
201
+ - HTTP 40x & 50x responses return :error status (and respond to `force_error`)
202
+
179
203
  #### 0.2 - May 2020
204
+
180
205
  - added `response.env[:httpdisk]`, which will be true if the response came from the cache
181
- - `:logger` option
206
+ - added `:logger` option
182
207
  - rake rubocop
183
208
 
184
209
  #### 0.1 - April 2020
210
+
185
211
  - Original release
data/Rakefile CHANGED
@@ -10,12 +10,15 @@ spec = Gem::Specification.load('httpdisk.gemspec')
10
10
  #
11
11
 
12
12
  # test (default)
13
- Rake::TestTask.new { _1.libs << 'test' }
13
+ Rake::TestTask.new do
14
+ _1.libs << 'test'
15
+ _1.warning = false # https://github.com/lostisland/faraday/issues/1285
16
+ end
14
17
  task default: :test
15
18
 
16
- # Watch files, run tests whenever something changes
19
+ # Watch rb files, run tests whenever something changes
17
20
  task :watch do
18
- system('find . | entr -c rake test')
21
+ sh "find . -name '*.rb' | entr -c rake"
19
22
  end
20
23
 
21
24
  #
@@ -23,7 +26,7 @@ end
23
26
  #
24
27
 
25
28
  task :pry do
26
- system 'pry -I lib -r httpdisk.rb'
29
+ sh 'pry -I lib -r httpdisk.rb'
27
30
  end
28
31
 
29
32
  #
@@ -31,7 +34,7 @@ end
31
34
  #
32
35
 
33
36
  task :rubocop do
34
- system('bundle exec rubocop -A .', exception: true)
37
+ sh 'bundle exec rubocop -A .'
35
38
  end
36
39
 
37
40
  #
@@ -39,17 +42,17 @@ end
39
42
  #
40
43
 
41
44
  task :build do
42
- system('gem build --quiet httpdisk.gemspec', exception: true)
45
+ sh 'gem build --quiet httpdisk.gemspec'
43
46
  end
44
47
 
45
48
  task install: :build do
46
- system("gem install --quiet httpdisk-#{spec.version}.gem", exception: true)
49
+ sh "gem install --quiet httpdisk-#{spec.version}.gem"
47
50
  end
48
51
 
49
- task release: %i[test build] do
52
+ task release: %i[rubocop test build] do
50
53
  raise "looks like git isn't clean" unless `git status --porcelain`.empty?
51
54
 
52
- system("git tag -a #{spec.version} -m 'Tagging #{spec.version}'", exception: true)
53
- system('git push --tags', exception: true)
54
- system("gem push httpdisk-#{spec.version}.gem", exception: true)
55
+ sh "git tag -a #{spec.version} -m 'Tagging #{spec.version}'"
56
+ sh 'git push --tags'
57
+ sh "gem push httpdisk-#{spec.version}.gem"
55
58
  end
data/bin/httpdisk CHANGED
@@ -6,20 +6,22 @@
6
6
 
7
7
  $LOAD_PATH.unshift(File.join(__dir__, '../lib'))
8
8
 
9
+ BIN = File.basename($PROGRAM_NAME)
10
+
9
11
  def puts_error(s)
10
- $stderr.puts "httpdisk: #{s}"
12
+ $stderr.puts "#{BIN}: #{s}"
11
13
  end
12
14
 
13
15
  #
14
16
  # Load the bare minimum and parse args with slop. We do this separately for speed.
15
17
  #
16
18
 
17
- require 'httpdisk/cli_slop'
19
+ require 'httpdisk/cli/args'
18
20
  begin
19
- slop = HTTPDisk::CliSlop.slop(ARGV)
21
+ slop = HTTPDisk::Cli::Args.slop(ARGV)
20
22
  rescue Slop::Error => e
21
23
  puts_error(e) if e.message != ''
22
- puts_error("try 'httpdisk --help' for more information")
24
+ puts_error("try '#{BIN} --help' for more information")
23
25
  exit 1
24
26
  end
25
27
 
@@ -28,11 +30,11 @@ end
28
30
  #
29
31
 
30
32
  require 'httpdisk'
31
- cli = HTTPDisk::Cli.new(slop)
33
+ main = HTTPDisk::Cli::Main.new(slop)
32
34
  begin
33
- cli.run
35
+ main.run
34
36
  rescue StandardError => e
35
- puts_error(e) if !cli.options[:silent]
37
+ puts_error(e) if !main.options[:silent]
36
38
  if ENV['HTTPDISK_DEBUG']
37
39
  $stderr.puts
38
40
  $stderr.puts e.backtrace.join("\n")
data/bin/httpdisk-grep ADDED
@@ -0,0 +1,46 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ #
4
+ # Search an HTTPDisk cache, similar to grep.
5
+ #
6
+
7
+ $LOAD_PATH.unshift(File.join(__dir__, '../lib'))
8
+
9
+ BIN = File.basename($PROGRAM_NAME)
10
+
11
+ def puts_error(s)
12
+ $stderr.puts "#{BIN}: #{s}"
13
+ end
14
+
15
+ #
16
+ # Load the bare minimum and parse args with slop. We do this separately for speed.
17
+ #
18
+
19
+ require 'httpdisk/grep/args'
20
+ begin
21
+ slop = HTTPDisk::Grep::Args.slop(ARGV)
22
+ rescue Slop::Error => e
23
+ puts_error(e) if e.message != ''
24
+ puts_error("try '#{BIN} --help' for more information")
25
+ exit 1
26
+ end
27
+
28
+ #
29
+ # now load everything and run
30
+ #
31
+
32
+ require 'httpdisk'
33
+
34
+ main = HTTPDisk::Grep::Main.new(slop)
35
+ begin
36
+ success = main.run
37
+ exit 1 if !success
38
+ rescue StandardError => e
39
+ puts_error(e)
40
+ if ENV['HTTPDISK_DEBUG']
41
+ $stderr.puts
42
+ $stderr.puts e.class
43
+ $stderr.puts e.backtrace.join("\n")
44
+ end
45
+ exit 2
46
+ end
data/httpdisk.gemspec CHANGED
@@ -21,6 +21,7 @@ Gem::Specification.new do |s|
21
21
  s.require_paths = ['lib']
22
22
 
23
23
  # gem dependencies
24
+ s.add_dependency 'content-type', '~> 0.0'
24
25
  s.add_dependency 'faraday', '~> 1.4'
25
26
  s.add_dependency 'faraday-cookie_jar', '~> 0.0'
26
27
  s.add_dependency 'faraday_middleware', '~> 1.0'
data/lib/httpdisk.rb CHANGED
@@ -1,12 +1,17 @@
1
1
  require 'httpdisk/cache_key'
2
2
  require 'httpdisk/cache'
3
- require 'httpdisk/cli_slop'
4
- require 'httpdisk/cli'
5
3
  require 'httpdisk/client'
6
4
  require 'httpdisk/error'
7
5
  require 'httpdisk/payload'
6
+ require 'httpdisk/slop_duration'
7
+ require 'httpdisk/sloptions'
8
8
  require 'httpdisk/version'
9
9
 
10
- module HTTPDisk
11
- ERROR_STATUS = 999
12
- end
10
+ # cli
11
+ require 'httpdisk/cli/args'
12
+ require 'httpdisk/cli/main'
13
+
14
+ # grep
15
+ require 'httpdisk/grep/args'
16
+ require 'httpdisk/grep/main'
17
+ require 'httpdisk/grep/printer'
@@ -1,4 +1,5 @@
1
1
  require 'fileutils'
2
+ require 'tempfile'
2
3
 
3
4
  module HTTPDisk
4
5
  # Disk cache for cache_keys => response. Files are compressed.
@@ -7,24 +8,9 @@ module HTTPDisk
7
8
 
8
9
  def initialize(options)
9
10
  @options = options
10
-
11
- # heavy sanity checking on arguments here
12
- if !dir.is_a?(String)
13
- raise ArgumentError, "expected :dir to be a string, not #{dir.inspect}"
14
- end
15
- if expires_in && !expires_in.is_a?(Integer)
16
- raise ArgumentError, "expected :expires_in to be an integer, not #{expires_in.inspect}"
17
- end
18
-
19
- %i[force force_errors].each do
20
- value = send(_1)
21
- if ![nil, true, false].include?(value)
22
- raise ArgumentError, "expected #{_1} to be a boolean, not #{value.inspect}"
23
- end
24
- end
25
11
  end
26
12
 
27
- %i[dir expires_in force force_errors].each do |method|
13
+ %i[dir expires force force_errors].each do |method|
28
14
  define_method(method) do
29
15
  options[method]
30
16
  end
@@ -43,14 +29,32 @@ module HTTPDisk
43
29
  payload_or_status = read0(cache_key, peek: true)
44
30
  return payload_or_status if payload_or_status.is_a?(Symbol)
45
31
 
46
- payload_or_status.error_999? ? :error : :hit
32
+ payload_or_status.error? ? :error : :hit
47
33
  end
48
34
 
49
35
  # Write response to the disk cache
50
36
  def write(cache_key, payload)
51
37
  path = diskpath(cache_key)
52
38
  FileUtils.mkdir_p(File.dirname(path))
53
- Zlib::GzipWriter.open(path) { payload.write(_1) }
39
+
40
+ # Atomically write gzipped payload. Put our underlying Tempfile into
41
+ # binmode to avoid accidental newline conversion or string encoding. Not
42
+ # required for *nix systems, but I've heard rumors it's helpful for
43
+ # Windows.
44
+ Tempfile.new(binmode: true).tap do |tmp|
45
+ Zlib::GzipWriter.new(tmp).tap do |gzip|
46
+ payload.write(gzip)
47
+ gzip.close
48
+ end
49
+ tmp.close
50
+ FileUtils.mv(tmp.path, path)
51
+ end
52
+ end
53
+
54
+ # Delete existing response, if any
55
+ def delete(cache_key)
56
+ path = diskpath(cache_key)
57
+ FileUtils.rm(path) if File.exist?(path)
54
58
  end
55
59
 
56
60
  # Relative path for this cache_key based on the cache key
@@ -68,15 +72,21 @@ module HTTPDisk
68
72
  return :stale if expired?(path)
69
73
  return :force if force?
70
74
 
71
- payload = Zlib::GzipReader.open(path) { Payload.read(_1, peek: peek) }
72
- return :force if force_errors? && payload.error_999?
75
+ begin
76
+ payload = Zlib::GzipReader.open(path, encoding: 'ASCII-8BIT') do
77
+ Payload.read(_1, peek: peek)
78
+ end
79
+ rescue StandardError => e
80
+ raise "#{path}: #{e}"
81
+ end
82
+ return :force if force_errors? && payload.error?
73
83
 
74
84
  payload
75
85
  end
76
86
 
77
87
  # Is this path expired?
78
88
  def expired?(path)
79
- expires_in && File.stat(path).mtime < Time.now - expires_in
89
+ expires && File.stat(path).mtime < Time.now - expires
80
90
  end
81
91
  end
82
92
  end