httpdisk 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/httpdisk.gemspec ADDED
@@ -0,0 +1,28 @@
1
+ require_relative 'lib/httpdisk/version'
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = 'httpdisk'
5
+ s.version = HTTPDisk::VERSION
6
+ s.authors = [ 'Adam Doppelt' ]
7
+ s.email = 'amd@gurge.com'
8
+
9
+ s.summary = 'httpdisk - disk cache for faraday'
10
+ s.description = 'httpdisk works with faraday to aggressively cache responses on disk.'
11
+ s.homepage = 'http://github.com/gurgeous/httpdisk'
12
+ s.license = 'MIT'
13
+ s.required_ruby_version = '>= 2.7.0'
14
+
15
+ # what's in the gem?
16
+ s.files = Dir.chdir(File.expand_path(__dir__)) do
17
+ `git ls-files -z`.split("\x0").reject { _1.match(%r{^test/}) }
18
+ end
19
+ s.bindir = 'bin'
20
+ s.executables = s.files.grep(%r{^#{s.bindir}/}) { File.basename(_1) }
21
+ s.require_paths = [ 'lib' ]
22
+
23
+ # gem dependencies
24
+ s.add_dependency 'faraday', '~> 1.4'
25
+ s.add_dependency 'faraday-cookie_jar', '~> 0.0'
26
+ s.add_dependency 'faraday_middleware', '~> 1.0'
27
+ s.add_dependency 'slop', '~> 4.8'
28
+ end
data/lib/httpdisk.rb ADDED
@@ -0,0 +1,12 @@
1
+ require 'httpdisk/cache_key'
2
+ require 'httpdisk/cache'
3
+ require 'httpdisk/cli_slop'
4
+ require 'httpdisk/cli'
5
+ require 'httpdisk/client'
6
+ require 'httpdisk/error'
7
+ require 'httpdisk/payload'
8
+ require 'httpdisk/version'
9
+
10
+ module HTTPDisk
11
+ ERROR_STATUS = 999
12
+ end
@@ -0,0 +1,80 @@
1
+ require 'fileutils'
2
+
3
+ module HTTPDisk
4
+ # Disk cache for cache_keys => response. Files are compressed.
5
+ class Cache
6
+ attr_reader :options
7
+
8
+ def initialize(options)
9
+ @options = options
10
+
11
+ # heavy sanity checking on arguments here
12
+ if !dir.is_a?(String)
13
+ raise ArgumentError, "expected :dir to be a string, not #{dir.inspect}"
14
+ end
15
+ if expires_in && !expires_in.is_a?(Integer)
16
+ raise ArgumentError, "expected :expires_in to be an integer, not #{expires_in.inspect}"
17
+ end
18
+ %i[force force_errors].each do
19
+ value = send(_1)
20
+ if ![ nil, true, false ].include?(value)
21
+ raise ArgumentError, "expected #{_1} to be a boolean, not #{value.inspect}"
22
+ end
23
+ end
24
+ end
25
+
26
+ %i[dir expires_in force force_errors].each do |method|
27
+ define_method(method) do
28
+ options[method]
29
+ end
30
+ end
31
+ alias force? force
32
+ alias force_errors? force_errors
33
+
34
+ # Get cached response. If there is a cached error it will be raised.
35
+ def read(cache_key)
36
+ payload_or_status = read0(cache_key)
37
+ payload_or_status.is_a?(Symbol) ? nil : payload_or_status
38
+ end
39
+
40
+ # Cache status for a cache_key, %i[error force hit miss stale]
41
+ def status(cache_key)
42
+ payload_or_status = read0(cache_key)
43
+ return payload_or_status if payload_or_status.is_a?(Symbol)
44
+ payload_or_status.error_999? ? :error : :hit
45
+ end
46
+
47
+ # Write response to the disk cache
48
+ def write(cache_key, payload)
49
+ path = diskpath(cache_key)
50
+ FileUtils.mkdir_p(File.dirname(path))
51
+ Zlib::GzipWriter.open(path) { payload.write(_1) }
52
+ end
53
+
54
+ # Relative path for this cache_key based on the cache key
55
+ def diskpath(cache_key)
56
+ File.join(dir, cache_key.diskpath)
57
+ end
58
+
59
+ protected
60
+
61
+ # low level read, returns payload or status
62
+ def read0(cache_key)
63
+ path = diskpath(cache_key)
64
+
65
+ return :miss if !File.exist?(path)
66
+ return :stale if expired?(path)
67
+ return :force if force?
68
+
69
+ payload = Zlib::GzipReader.open(path) { Payload.read(_1) }
70
+ return :force if force_errors? && payload.error_999?
71
+
72
+ payload
73
+ end
74
+
75
+ # Is this path expired?
76
+ def expired?(path)
77
+ expires_in && File.stat(path).mtime < Time.now - expires_in
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,100 @@
1
+ require 'cgi'
2
+ require 'digest/md5'
3
+ require 'uri'
4
+
5
+ module HTTPDisk
6
+ class CacheKey
7
+ attr_reader :env
8
+
9
+ def initialize(env)
10
+ @env = env
11
+
12
+ # sanity checks
13
+ raise 'http/https required' if env.url.scheme !~ /^https?$/
14
+ raise 'hostname required' if !env.url.host
15
+ end
16
+
17
+ def url
18
+ env.url
19
+ end
20
+
21
+ # Cache key (memoized)
22
+ def key
23
+ @key ||= calculate_key
24
+ end
25
+
26
+ # md5(key) (memoized)
27
+ def digest
28
+ @digest ||= Digest::MD5.hexdigest(key)
29
+ end
30
+
31
+ # Relative path for this cache key based on hostdir & digest.
32
+ def diskpath
33
+ @diskpath ||= File.join(hostdir, digest[0, 3], digest[3..])
34
+ end
35
+
36
+ def to_s
37
+ key
38
+ end
39
+
40
+ protected
41
+
42
+ # Calculate cache key for url
43
+ def calculate_key
44
+ key = []
45
+ key << env.method.upcase
46
+ key << ' '
47
+ key << url.scheme
48
+ key << '://'
49
+ key << url.host.downcase
50
+ if !default_port?
51
+ key << ':'
52
+ key << url.port
53
+ end
54
+ if url.path != '/'
55
+ key << url.path
56
+ end
57
+ if (q = url.query) && q != ''
58
+ key << '?'
59
+ key << querykey(q)
60
+ end
61
+ if env.request_body
62
+ key << ' '
63
+ key << bodykey
64
+ end
65
+ key.join
66
+ end
67
+
68
+ # Calculate cache key segment for body
69
+ def bodykey
70
+ body = env.request_body.to_s
71
+ case
72
+ when env.request_headers['Content-Type'] == 'application/x-www-form-urlencoded'
73
+ querykey(body)
74
+ when body.length < 50
75
+ body
76
+ else
77
+ Digest::MD5.hexdigest(body)
78
+ end
79
+ end
80
+
81
+ # Calculate canonical key for a query
82
+ def querykey(q)
83
+ q.split('&').sort.join('&')
84
+ end
85
+
86
+ def default_port?
87
+ url.default_port == url.port
88
+ end
89
+
90
+ # Calculate nice directory name from url.host
91
+ def hostdir
92
+ hostdir = url.host.downcase
93
+ hostdir = hostdir.gsub(/^www\./, '')
94
+ hostdir = hostdir.gsub(/[^a-z0-9._-]/, '')
95
+ hostdir = hostdir.squeeze('.')
96
+ hostdir = 'any' if hostdir.empty?
97
+ hostdir
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,218 @@
1
+ require 'faraday-cookie_jar'
2
+ require 'faraday_middleware'
3
+ require 'ostruct'
4
+
5
+ module HTTPDisk
6
+ # Command line httpdisk command.
7
+ class Cli
8
+ attr_reader :options
9
+
10
+ # for --expires
11
+ UNITS = {
12
+ s: 1,
13
+ m: 60,
14
+ h: 60 * 60,
15
+ d: 24 * 60 * 60,
16
+ w: 7 * 24 * 60 * 60,
17
+ y: 365 * 7 * 24 * 60 * 60,
18
+ }.freeze
19
+
20
+ def initialize(options)
21
+ @options = options
22
+ end
23
+
24
+ # we have a very liberal retry policy
25
+ RETRY_OPTIONS = {
26
+ methods: %w[delete get head options patch post put trace],
27
+ retry_statuses: (400..600).to_a,
28
+ retry_if: ->(_env, _err) { true },
29
+ }.freeze
30
+
31
+ # Make the request (or print status)
32
+ def run
33
+ # short circuit --status
34
+ if options[:status]
35
+ status
36
+ return
37
+ end
38
+
39
+ # create Faraday client
40
+ faraday = create_faraday
41
+
42
+ # run request
43
+ response = faraday.run_request(request_method, request_url, request_body, request_headers)
44
+ if response.status >= 400
45
+ raise CliError, "the requested URL returned error: #{response.status} #{response.reason_phrase}"
46
+ end
47
+
48
+ # output
49
+ if options[:output]
50
+ File.open(options[:output], 'w') { output(response, _1) }
51
+ else
52
+ output(response, $stdout)
53
+ end
54
+ end
55
+
56
+ def create_faraday
57
+ Faraday.new do
58
+ # connection settings
59
+ _1.proxy = proxy if options[:proxy]
60
+ _1.options.timeout = options[:max_time] if options[:max_time]
61
+
62
+ # cookie middleware
63
+ _1.use :cookie_jar
64
+
65
+ # BEFORE httpdisk so each redirect segment is cached
66
+ _1.response :follow_redirects
67
+
68
+ # httpdisk
69
+ _1.use :httpdisk, client_options
70
+
71
+ # AFTER httpdisk so transient failures are not cached
72
+ if options[:retry]
73
+ _1.request :retry, RETRY_OPTIONS.merge(max: options[:retry])
74
+ end
75
+ end
76
+ end
77
+
78
+ # Support for --status
79
+ def status
80
+ # build env
81
+ env = Faraday::Env.new.tap do
82
+ _1.method = request_method
83
+ _1.request_body = request_body
84
+ _1.request_headers = request_headers
85
+ _1.url = request_url
86
+ end
87
+
88
+ # now print status
89
+ client = HTTPDisk::Client.new(nil, client_options)
90
+ client.status(env).each do
91
+ puts "#{_1}: #{_2.inspect}"
92
+ end
93
+ end
94
+
95
+ # Output response to f
96
+ def output(response, f)
97
+ if options[:include]
98
+ f.puts "HTTPDISK #{response.status} #{response.reason_phrase}"
99
+ response.headers.each { f.puts("#{_1}: #{_2}") }
100
+ f.puts
101
+ end
102
+ f.write(response.body)
103
+ end
104
+
105
+ #
106
+ # request_XXX
107
+ #
108
+
109
+ # HTTP method (get, post, etc.)
110
+ def request_method
111
+ method = if options[:request]
112
+ options[:request]
113
+ elsif options[:data]
114
+ 'post'
115
+ end
116
+ method ||= 'get'
117
+ method = method.downcase.to_sym
118
+
119
+ if !Faraday::Connection::METHODS.include?(method)
120
+ raise CliError, "invalid --request #{method.inspect}"
121
+ end
122
+ method
123
+ end
124
+
125
+ # Request url
126
+ def request_url
127
+ url = options[:url]
128
+ # recover from missing http:
129
+ if url !~ %r{^https?://}i
130
+ if url =~ %r{^\w+://}
131
+ raise CliError, 'only http/https supported'
132
+ end
133
+ url = "http://#{url}"
134
+ end
135
+ URI.parse(url)
136
+ rescue URI::InvalidURIError
137
+ raise CliError, "invalid url #{url.inspect}"
138
+ end
139
+
140
+ # Request body
141
+ def request_body
142
+ options[:data]
143
+ end
144
+
145
+ # Request headers
146
+ def request_headers
147
+ {}.tap do |headers|
148
+ if options[:user_agent]
149
+ headers['User-Agent'] = options[:user_agent]
150
+ end
151
+
152
+ options[:header].each do |header|
153
+ key, value = header.split(': ', 2)
154
+ if !key || !value || key.empty? || value.empty?
155
+ raise CliError, "invalid --header #{header.inspect}"
156
+ end
157
+ headers[key] = value
158
+ end
159
+ end
160
+ end
161
+
162
+ #
163
+ # helpers
164
+ #
165
+
166
+ # Options to HTTPDisk::Client
167
+ def client_options
168
+ {}.tap do |client_options|
169
+ client_options[:dir] = options[:dir]
170
+ if options[:expires]
171
+ seconds = parse_expires(options[:expires])
172
+ if !seconds
173
+ raise CliError, "invalid --expires #{options[:expires].inspect}"
174
+ end
175
+ client_options[:expires_in] = seconds
176
+ end
177
+ client_options[:force] = options[:force]
178
+ client_options[:force_errors] = options[:force_errors]
179
+ end
180
+ end
181
+
182
+ # Return validated --proxy flag if present
183
+ def proxy
184
+ return if !options[:proxy]
185
+
186
+ proxy = parse_proxy(options[:proxy])
187
+ raise CliError, "--proxy should be host[:port], not #{options[:proxy].inspect}" if !proxy
188
+ proxy
189
+ end
190
+
191
+ # Parse --expires flag
192
+ def parse_expires(s)
193
+ m = s.match(/^(\d+)([smhdwy])?$/)
194
+ return if !m
195
+
196
+ num, unit = m[1].to_i, (m[2] || 's').to_sym
197
+ return if !UNITS.key?(unit)
198
+
199
+ num * UNITS[unit]
200
+ end
201
+
202
+ # Parse --proxy flag
203
+ def parse_proxy(proxy_flag)
204
+ host, port = proxy_flag.split(':', 2)
205
+ return if !host || host.empty?
206
+ return if port&.empty?
207
+
208
+ URI.parse('http://placeholder').tap do
209
+ begin
210
+ _1.host = host
211
+ _1.port = port if port
212
+ rescue URI::InvalidComponentError
213
+ return
214
+ end
215
+ end.to_s
216
+ end
217
+ end
218
+ end
@@ -0,0 +1,54 @@
1
+ # manually load dependencies here since this is loaded standalone by bin
2
+ require 'httpdisk/error'
3
+ require 'httpdisk/version'
4
+ require 'slop'
5
+
6
+ module HTTPDisk
7
+ # Slop parsing. This is broken out so we can run without require 'httpdisk'.
8
+ module CliSlop
9
+ def self.slop(args)
10
+ slop = Slop.parse(args) do |o|
11
+ o.banner = 'httpdisk [options] [url]'
12
+
13
+ # similar to curl
14
+ o.separator 'Similar to curl:'
15
+ o.string '-d', '--data', 'HTTP POST data'
16
+ o.array '-H', '--header', 'pass custom header(s) to server', delimiter: nil
17
+ o.boolean '-i', '--include', 'include response headers in the output'
18
+ o.integer '-m', '--max-time', 'maximum time allowed for the transfer'
19
+ o.string '-o', '--output', 'write to file instead of stdout'
20
+ o.string '-x', '--proxy', 'use host[:port] as proxy'
21
+ o.string '-X', '--request', 'HTTP method to use'
22
+ o.integer '--retry', 'retry request if problems occur'
23
+ o.boolean '-s', '--silent', "silent mode (don't print errors)"
24
+ o.string '-A', '--user-agent', 'send User-Agent to server'
25
+
26
+ # from httpdisk
27
+ o.separator 'Specific to httpdisk:'
28
+ o.string '--dir', 'httpdisk cache directory (defaults to ~/httpdisk)'
29
+ o.string '--expires', 'when to expire cached requests (ex: 1h, 2d, 3w)'
30
+ o.boolean '--force', "don't read anything from cache (but still write)"
31
+ o.boolean '--force-errors', "don't read errors from cache (but still write)"
32
+ o.boolean '--status', 'show status for a url in the cache'
33
+
34
+ # generic
35
+ o.boolean '--version', 'show version' do
36
+ puts "httpdisk #{HTTPDisk::VERSION}"
37
+ exit
38
+ end
39
+ o.on '--help', 'show this help' do
40
+ puts o
41
+ exit
42
+ end
43
+ end
44
+
45
+ raise Slop::Error, '' if args.empty?
46
+ raise Slop::Error, 'no URL specified' if slop.args.empty?
47
+ raise Slop::Error, 'more than one URL specified' if slop.args.length > 1
48
+
49
+ slop.to_h.tap do
50
+ _1[:url] = slop.args.first
51
+ end
52
+ end
53
+ end
54
+ end