httpdisk 0.1.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0c47ec4fda68047f57e8348746cf7e08467151a4b3d193cce16652980b5b8a47
4
- data.tar.gz: 158e71dc98ba8a954eb3e744140a6e1f468de06e54a482690e7347d4b8750153
3
+ metadata.gz: 994e5e02b7c4cdbfbfcef40099558c8fa86a97538da74065a76321c8ee25e129
4
+ data.tar.gz: 97ae05c7a08e2c9b3bdccc01a9679d63ab35ce4fabc6307087fbfa7946fbecef
5
5
  SHA512:
6
- metadata.gz: 4e261b58f3c1246dec8ab9ab5732c06c595af86b27c14866045fb9992983b1f8c89d93f788272c2331ded4f5424e424ebdf5ae0bdc886e95537e5696ad2ea04b
7
- data.tar.gz: 9947c8fd27c4f7dbb98b9eb9c3aca086d7c0041ec51353b8163371d89b98c745592f764cfb05074daff0c8e9a6e9500863bf16d5f200b54e92e61ccc9006eaca
6
+ metadata.gz: '09d938bd3b32d743aab858883c94a760a27c40762a98d792b4135df29625995582aac5571aca0b510d0e85adc8b99436f2ede87196581544ae5759de35df9cfb'
7
+ data.tar.gz: 18c4f281814896c8d8a9e8d513416538770263b4b17dea5fbc5a5e5612af9f3b0f39d86b5e54919df6466a2e18e73f52e125ad9d167b1912c4f3acc2cb8b8a27
data/.rubocop.yml ADDED
@@ -0,0 +1,28 @@
1
+ AllCops:
2
+ NewCops: enable
3
+ SuggestExtensions: false
4
+
5
+ # this is buggy in 2.7.0
6
+ Style/HashTransformValues: { Enabled: false }
7
+
8
+ # minimal personal preference
9
+ Layout/CaseIndentation: { Enabled: false }
10
+ Layout/EndAlignment: { EnforcedStyleAlignWith: variable }
11
+ Lint/AssignmentInCondition: { Enabled: false }
12
+ Lint/NonLocalExitFromIterator: { Enabled: false }
13
+ Metrics: { Enabled: false }
14
+ Naming/MethodParameterName: { Enabled: false }
15
+ Naming/VariableNumber: { Enabled: false }
16
+ Style/Documentation: { Enabled: false }
17
+ Style/DoubleNegation: { Enabled: false }
18
+ Style/EmptyCaseCondition: { Enabled: false }
19
+ Style/FrozenStringLiteralComment: { Enabled: false }
20
+ Style/GuardClause: { Enabled: false }
21
+ Style/IfUnlessModifier: { Enabled: false }
22
+ Style/NegatedIf: { Enabled: false }
23
+ Style/NumericPredicate: { Enabled: false }
24
+ Style/ParallelAssignment: { Enabled: false }
25
+ Style/SoleNestedConditional: { Enabled: false }
26
+ Style/StderrPuts: { Enabled: false }
27
+ Style/TrailingCommaInArrayLiteral: { EnforcedStyleForMultiline: consistent_comma }
28
+ Style/TrailingCommaInHashLiteral: { EnforcedStyleForMultiline: consistent_comma }
data/Gemfile CHANGED
@@ -6,5 +6,6 @@ group :development, :test do
6
6
  gem 'mocha'
7
7
  gem 'pry'
8
8
  gem 'rake'
9
+ gem 'rubocop', '~> 1.13.0'
9
10
  gem 'webmock'
10
11
  end
data/Gemfile.lock CHANGED
@@ -1,7 +1,8 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- httpdisk (0.1.0)
4
+ httpdisk (0.5.1)
5
+ content-type (~> 0.0)
5
6
  faraday (~> 1.4)
6
7
  faraday-cookie_jar (~> 0.0)
7
8
  faraday_middleware (~> 1.0)
@@ -12,43 +13,73 @@ GEM
12
13
  specs:
13
14
  addressable (2.7.0)
14
15
  public_suffix (>= 2.0.2, < 5.0)
16
+ ast (2.4.2)
15
17
  coderay (1.1.3)
18
+ content-type (0.0.1)
19
+ parslet (~> 1.5)
16
20
  crack (0.4.5)
17
21
  rexml
18
22
  domain_name (0.5.20190701)
19
23
  unf (>= 0.0.5, < 1.0.0)
20
- faraday (1.4.1)
24
+ faraday (1.5.0)
25
+ faraday-em_http (~> 1.0)
26
+ faraday-em_synchrony (~> 1.0)
21
27
  faraday-excon (~> 1.1)
28
+ faraday-httpclient (~> 1.0.1)
22
29
  faraday-net_http (~> 1.0)
23
30
  faraday-net_http_persistent (~> 1.1)
31
+ faraday-patron (~> 1.0)
24
32
  multipart-post (>= 1.2, < 3)
25
33
  ruby2_keywords (>= 0.0.4)
26
34
  faraday-cookie_jar (0.0.7)
27
35
  faraday (>= 0.8.0)
28
36
  http-cookie (~> 1.0.0)
37
+ faraday-em_http (1.0.0)
38
+ faraday-em_synchrony (1.0.0)
29
39
  faraday-excon (1.1.0)
40
+ faraday-httpclient (1.0.1)
30
41
  faraday-net_http (1.0.1)
31
42
  faraday-net_http_persistent (1.1.0)
43
+ faraday-patron (1.0.0)
32
44
  faraday_middleware (1.0.0)
33
45
  faraday (~> 1.0)
34
46
  hashdiff (1.0.1)
35
- http-cookie (1.0.3)
47
+ http-cookie (1.0.4)
36
48
  domain_name (~> 0.5)
37
49
  method_source (1.0.0)
38
50
  minitest (5.14.4)
39
51
  mocha (1.11.2)
40
52
  multipart-post (2.1.1)
53
+ parallel (1.20.1)
54
+ parser (3.0.1.1)
55
+ ast (~> 2.4.1)
56
+ parslet (1.8.2)
41
57
  pry (0.13.1)
42
58
  coderay (~> 1.1)
43
59
  method_source (~> 1.0)
44
60
  public_suffix (4.0.6)
61
+ rainbow (3.0.0)
45
62
  rake (13.0.3)
63
+ regexp_parser (2.1.1)
46
64
  rexml (3.2.5)
65
+ rubocop (1.13.0)
66
+ parallel (~> 1.10)
67
+ parser (>= 3.0.0.0)
68
+ rainbow (>= 2.2.2, < 4.0)
69
+ regexp_parser (>= 1.8, < 3.0)
70
+ rexml
71
+ rubocop-ast (>= 1.2.0, < 2.0)
72
+ ruby-progressbar (~> 1.7)
73
+ unicode-display_width (>= 1.4.0, < 3.0)
74
+ rubocop-ast (1.5.0)
75
+ parser (>= 3.0.1.1)
76
+ ruby-progressbar (1.11.0)
47
77
  ruby2_keywords (0.0.4)
48
- slop (4.8.2)
78
+ slop (4.9.1)
49
79
  unf (0.1.4)
50
80
  unf_ext
51
81
  unf_ext (0.0.7.7)
82
+ unicode-display_width (2.0.0)
52
83
  webmock (3.12.2)
53
84
  addressable (>= 2.3.6)
54
85
  crack (>= 0.3.2)
@@ -63,6 +94,7 @@ DEPENDENCIES
63
94
  mocha
64
95
  pry
65
96
  rake
97
+ rubocop (~> 1.13.0)
66
98
  webmock
67
99
 
68
100
  BUNDLED WITH
data/README.md CHANGED
@@ -119,20 +119,27 @@ httpdisk caches all responses. POST responses are cached, along with 500 respons
119
119
 
120
120
  In general, if you make a request it will be cached regardless of the outcome.
121
121
 
122
+ ## String Encoding
123
+
124
+ httpdisk will honor the `Content-Type` from responses. Unfortunately, it is entirely possible to get invalid bodies if the `Content-Type` doesn't match the bytes. This is a major bummer, so httpdisk provides a `utf8:` option that forces text response bodies to UTF-8.
125
+
122
126
  ## Configuration
123
127
 
124
128
  httpdisk supports a few options:
125
129
 
126
130
  - `dir:` location for disk cache, defaults to `~/httpdisk`
127
- - `expires_in:` when to expire cached requests, default is nil (never expire)
131
+ - `expires:` when to expire cached requests, default is nil (never expire)
128
132
  - `force:` don't read anything from cache (but still write)
129
133
  - `force_errors:` don't read errors from cache (but still write)
134
+ - `ignore_params:` array of query params to ignore when calculating cache_key
135
+ - `logger`: log requests to stderr, or pass your own logger
136
+ - `utf8`: if true, force text response bodies to valid UTF-8
130
137
 
131
138
  Pass these in when setting up Faraday:
132
139
 
133
140
  ```ruby
134
141
  faraday = Faraday.new do
135
- _1.use :httpdisk, expires_in: 7*24*60*60, force: true
142
+ _1.use :httpdisk, expires: 7*24*60*60, force: true
136
143
  end
137
144
  ```
138
145
 
@@ -160,10 +167,13 @@ Specific to httpdisk:
160
167
  --force don't read anything from cache (but still write)
161
168
  --force-errors don't read errors from cache (but still write)
162
169
  --status show status for a url in the cache
163
- --version show version
164
- --help show this help
165
170
  ```
166
171
 
172
+ ## Goodies: httpdisk-grep
173
+
174
+ The `httpdisk-grep` command makes it easy to search your cache directory.
175
+ It can be challenging to use grep/ripgrep because cache files are compressed and JSON bodies often lack newlines. httpdisk-grep is the right tool for the job. See `httpdisk-grep --help`.
176
+
167
177
  ## Limitations & Gotchas
168
178
 
169
179
  - Transient errors are cached. This is appropriate for many uses cases (like crawling) but can be confusing. Use `httpdisk --status` to debug.
@@ -174,6 +184,28 @@ Specific to httpdisk:
174
184
 
175
185
  ## Changelog
176
186
 
187
+ #### 0.5
188
+
189
+ - honor Content-Type
190
+ - added `:utf8` option to force text-like response bodies to UTF-8
191
+
192
+ #### 0.4
193
+
194
+ - added httpdisk-grep for searching cache files
195
+ - added HTTPDisk::Cache#delete
196
+ - rename `:expires_in` to `:expires`
197
+
198
+ #### 0.3
199
+
200
+ - added :ignore_params, for ignoring query params when generating cache keys
201
+ - HTTP 40x & 50x responses return :error status (and respond to `force_error`)
202
+
203
+ #### 0.2 - May 2020
204
+
205
+ - added `response.env[:httpdisk]`, which will be true if the response came from the cache
206
+ - added `:logger` option
207
+ - rake rubocop
208
+
177
209
  #### 0.1 - April 2020
178
210
 
179
211
  - Original release
data/Rakefile CHANGED
@@ -10,12 +10,15 @@ spec = Gem::Specification.load('httpdisk.gemspec')
10
10
  #
11
11
 
12
12
  # test (default)
13
- Rake::TestTask.new { _1.libs << 'test' }
13
+ Rake::TestTask.new do
14
+ _1.libs << 'test'
15
+ _1.warning = false # https://github.com/lostisland/faraday/issues/1285
16
+ end
14
17
  task default: :test
15
18
 
16
- # Watch files, run tests whenever something changes
19
+ # Watch rb files, run tests whenever something changes
17
20
  task :watch do
18
- system('find . | entr -c rake test')
21
+ sh "find . -name '*.rb' | entr -c rake"
19
22
  end
20
23
 
21
24
  #
@@ -23,7 +26,15 @@ end
23
26
  #
24
27
 
25
28
  task :pry do
26
- system 'pry -I lib -r httpdisk.rb'
29
+ sh 'pry -I lib -r httpdisk.rb'
30
+ end
31
+
32
+ #
33
+ # rubocop
34
+ #
35
+
36
+ task :rubocop do
37
+ sh 'bundle exec rubocop -A .'
27
38
  end
28
39
 
29
40
  #
@@ -31,17 +42,17 @@ end
31
42
  #
32
43
 
33
44
  task :build do
34
- system('gem build --quiet httpdisk.gemspec', exception: true)
45
+ sh 'gem build --quiet httpdisk.gemspec'
35
46
  end
36
47
 
37
48
  task install: :build do
38
- system("gem install --quiet httpdisk-#{spec.version}.gem", exception: true)
49
+ sh "gem install --quiet httpdisk-#{spec.version}.gem"
39
50
  end
40
51
 
41
- task release: :build do
52
+ task release: %i[rubocop test build] do
42
53
  raise "looks like git isn't clean" unless `git status --porcelain`.empty?
43
54
 
44
- system("git tag -a #{spec.version} -m 'Tagging #{spec.version}'", exception: true)
45
- system('git push --tags', exception: true)
46
- system("gem push httpdisk-#{spec.version}.gem", exception: true)
55
+ sh "git tag -a #{spec.version} -m 'Tagging #{spec.version}'"
56
+ sh 'git push --tags'
57
+ sh "gem push httpdisk-#{spec.version}.gem"
47
58
  end
data/bin/httpdisk CHANGED
@@ -6,20 +6,22 @@
6
6
 
7
7
  $LOAD_PATH.unshift(File.join(__dir__, '../lib'))
8
8
 
9
- def puts_error(s, exit: false)
10
- $stderr.puts "httpdisk: #{s}"
9
+ BIN = File.basename($PROGRAM_NAME)
10
+
11
+ def puts_error(s)
12
+ $stderr.puts "#{BIN}: #{s}"
11
13
  end
12
14
 
13
15
  #
14
16
  # Load the bare minimum and parse args with slop. We do this separately for speed.
15
17
  #
16
18
 
17
- require 'httpdisk/cli_slop'
19
+ require 'httpdisk/cli/args'
18
20
  begin
19
- slop = HTTPDisk::CliSlop.slop(ARGV)
21
+ slop = HTTPDisk::Cli::Args.slop(ARGV)
20
22
  rescue Slop::Error => e
21
23
  puts_error(e) if e.message != ''
22
- puts_error("try 'httpdisk --help' for more information")
24
+ puts_error("try '#{BIN} --help' for more information")
23
25
  exit 1
24
26
  end
25
27
 
@@ -28,11 +30,11 @@ end
28
30
  #
29
31
 
30
32
  require 'httpdisk'
31
- cli = HTTPDisk::Cli.new(slop)
33
+ main = HTTPDisk::Cli::Main.new(slop)
32
34
  begin
33
- cli.run
35
+ main.run
34
36
  rescue StandardError => e
35
- puts_error(e) if !cli.options[:silent]
37
+ puts_error(e) if !main.options[:silent]
36
38
  if ENV['HTTPDISK_DEBUG']
37
39
  $stderr.puts
38
40
  $stderr.puts e.backtrace.join("\n")
data/bin/httpdisk-grep ADDED
@@ -0,0 +1,46 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ #
4
+ # Search an HTTPDisk cache, similar to grep.
5
+ #
6
+
7
+ $LOAD_PATH.unshift(File.join(__dir__, '../lib'))
8
+
9
+ BIN = File.basename($PROGRAM_NAME)
10
+
11
+ def puts_error(s)
12
+ $stderr.puts "#{BIN}: #{s}"
13
+ end
14
+
15
+ #
16
+ # Load the bare minimum and parse args with slop. We do this separately for speed.
17
+ #
18
+
19
+ require 'httpdisk/grep/args'
20
+ begin
21
+ slop = HTTPDisk::Grep::Args.slop(ARGV)
22
+ rescue Slop::Error => e
23
+ puts_error(e) if e.message != ''
24
+ puts_error("try '#{BIN} --help' for more information")
25
+ exit 1
26
+ end
27
+
28
+ #
29
+ # now load everything and run
30
+ #
31
+
32
+ require 'httpdisk'
33
+
34
+ main = HTTPDisk::Grep::Main.new(slop)
35
+ begin
36
+ success = main.run
37
+ exit 1 if !success
38
+ rescue StandardError => e
39
+ puts_error(e)
40
+ if ENV['HTTPDISK_DEBUG']
41
+ $stderr.puts
42
+ $stderr.puts e.class
43
+ $stderr.puts e.backtrace.join("\n")
44
+ end
45
+ exit 2
46
+ end
data/examples.rb CHANGED
@@ -18,7 +18,6 @@ class Examples
18
18
  faraday.get('http://www.google.com', nil, { "User-Agent": 'test-agent' })
19
19
  faraday.get('http://www.google.com', { q: 'ruby' })
20
20
  faraday.post('http://httpbin.org/post', 'name=hello')
21
- exit
22
21
 
23
22
  3.times { puts }
24
23
  response = faraday.get('http://httpbingo.org/get')
@@ -106,7 +105,7 @@ class Examples
106
105
  end
107
106
 
108
107
  3.times { puts }
109
- response = faraday.post('http://httpbingo.org/post', { this_is: [ 'json' ] })
108
+ response = faraday.post('http://httpbingo.org/post', { this_is: ['json'] })
110
109
  puts response.env.url
111
110
  puts JSON.pretty_generate(response.body)
112
111
  end
data/httpdisk.gemspec CHANGED
@@ -3,7 +3,7 @@ require_relative 'lib/httpdisk/version'
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'httpdisk'
5
5
  s.version = HTTPDisk::VERSION
6
- s.authors = [ 'Adam Doppelt' ]
6
+ s.authors = ['Adam Doppelt']
7
7
  s.email = 'amd@gurge.com'
8
8
 
9
9
  s.summary = 'httpdisk - disk cache for faraday'
@@ -18,9 +18,10 @@ Gem::Specification.new do |s|
18
18
  end
19
19
  s.bindir = 'bin'
20
20
  s.executables = s.files.grep(%r{^#{s.bindir}/}) { File.basename(_1) }
21
- s.require_paths = [ 'lib' ]
21
+ s.require_paths = ['lib']
22
22
 
23
23
  # gem dependencies
24
+ s.add_dependency 'content-type', '~> 0.0'
24
25
  s.add_dependency 'faraday', '~> 1.4'
25
26
  s.add_dependency 'faraday-cookie_jar', '~> 0.0'
26
27
  s.add_dependency 'faraday_middleware', '~> 1.0'
data/lib/httpdisk.rb CHANGED
@@ -1,12 +1,17 @@
1
1
  require 'httpdisk/cache_key'
2
2
  require 'httpdisk/cache'
3
- require 'httpdisk/cli_slop'
4
- require 'httpdisk/cli'
5
3
  require 'httpdisk/client'
6
4
  require 'httpdisk/error'
7
5
  require 'httpdisk/payload'
6
+ require 'httpdisk/slop_duration'
7
+ require 'httpdisk/sloptions'
8
8
  require 'httpdisk/version'
9
9
 
10
- module HTTPDisk
11
- ERROR_STATUS = 999
12
- end
10
+ # cli
11
+ require 'httpdisk/cli/args'
12
+ require 'httpdisk/cli/main'
13
+
14
+ # grep
15
+ require 'httpdisk/grep/args'
16
+ require 'httpdisk/grep/main'
17
+ require 'httpdisk/grep/printer'
@@ -7,23 +7,9 @@ module HTTPDisk
7
7
 
8
8
  def initialize(options)
9
9
  @options = options
10
-
11
- # heavy sanity checking on arguments here
12
- if !dir.is_a?(String)
13
- raise ArgumentError, "expected :dir to be a string, not #{dir.inspect}"
14
- end
15
- if expires_in && !expires_in.is_a?(Integer)
16
- raise ArgumentError, "expected :expires_in to be an integer, not #{expires_in.inspect}"
17
- end
18
- %i[force force_errors].each do
19
- value = send(_1)
20
- if ![ nil, true, false ].include?(value)
21
- raise ArgumentError, "expected #{_1} to be a boolean, not #{value.inspect}"
22
- end
23
- end
24
10
  end
25
11
 
26
- %i[dir expires_in force force_errors].each do |method|
12
+ %i[dir expires force force_errors].each do |method|
27
13
  define_method(method) do
28
14
  options[method]
29
15
  end
@@ -39,16 +25,35 @@ module HTTPDisk
39
25
 
40
26
  # Cache status for a cache_key, %i[error force hit miss stale]
41
27
  def status(cache_key)
42
- payload_or_status = read0(cache_key)
28
+ payload_or_status = read0(cache_key, peek: true)
43
29
  return payload_or_status if payload_or_status.is_a?(Symbol)
44
- payload_or_status.error_999? ? :error : :hit
30
+
31
+ payload_or_status.error? ? :error : :hit
45
32
  end
46
33
 
47
34
  # Write response to the disk cache
48
35
  def write(cache_key, payload)
49
36
  path = diskpath(cache_key)
50
37
  FileUtils.mkdir_p(File.dirname(path))
51
- Zlib::GzipWriter.open(path) { payload.write(_1) }
38
+
39
+ # Atomically write gzipped payload. Put our underlying Tempfile into
40
+ # binmode to avoid accidental newline conversion or string encoding. Not
41
+ # required for *nix systems, but I've heard rumors it's helpful for
42
+ # Windows.
43
+ Tempfile.new(binmode: true).tap do |tmp|
44
+ Zlib::GzipWriter.new(tmp).tap do |gzip|
45
+ payload.write(gzip)
46
+ gzip.close
47
+ end
48
+ tmp.close
49
+ FileUtils.mv(tmp.path, path)
50
+ end
51
+ end
52
+
53
+ # Delete existing response, if any
54
+ def delete(cache_key)
55
+ path = diskpath(cache_key)
56
+ FileUtils.rm(path) if File.exist?(path)
52
57
  end
53
58
 
54
59
  # Relative path for this cache_key based on the cache key
@@ -59,22 +64,28 @@ module HTTPDisk
59
64
  protected
60
65
 
61
66
  # low level read, returns payload or status
62
- def read0(cache_key)
67
+ def read0(cache_key, peek: false)
63
68
  path = diskpath(cache_key)
64
69
 
65
70
  return :miss if !File.exist?(path)
66
71
  return :stale if expired?(path)
67
72
  return :force if force?
68
73
 
69
- payload = Zlib::GzipReader.open(path) { Payload.read(_1) }
70
- return :force if force_errors? && payload.error_999?
74
+ begin
75
+ payload = Zlib::GzipReader.open(path, encoding: 'ASCII-8BIT') do
76
+ Payload.read(_1, peek: peek)
77
+ end
78
+ rescue StandardError => e
79
+ raise "#{path}: #{e}"
80
+ end
81
+ return :force if force_errors? && payload.error?
71
82
 
72
83
  payload
73
84
  end
74
85
 
75
86
  # Is this path expired?
76
87
  def expired?(path)
77
- expires_in && File.stat(path).mtime < Time.now - expires_in
88
+ expires && File.stat(path).mtime < Time.now - expires
78
89
  end
79
90
  end
80
91
  end