httpdisk 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/httpdisk.gemspec ADDED
@@ -0,0 +1,28 @@
1
+ require_relative 'lib/httpdisk/version'
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = 'httpdisk'
5
+ s.version = HTTPDisk::VERSION
6
+ s.authors = [ 'Adam Doppelt' ]
7
+ s.email = 'amd@gurge.com'
8
+
9
+ s.summary = 'httpdisk - disk cache for faraday'
10
+ s.description = 'httpdisk works with faraday to aggressively cache responses on disk.'
11
+ s.homepage = 'http://github.com/gurgeous/httpdisk'
12
+ s.license = 'MIT'
13
+ s.required_ruby_version = '>= 2.7.0'
14
+
15
+ # what's in the gem?
16
+ s.files = Dir.chdir(File.expand_path(__dir__)) do
17
+ `git ls-files -z`.split("\x0").reject { _1.match(%r{^test/}) }
18
+ end
19
+ s.bindir = 'bin'
20
+ s.executables = s.files.grep(%r{^#{s.bindir}/}) { File.basename(_1) }
21
+ s.require_paths = [ 'lib' ]
22
+
23
+ # gem dependencies
24
+ s.add_dependency 'faraday', '~> 1.4'
25
+ s.add_dependency 'faraday-cookie_jar', '~> 0.0'
26
+ s.add_dependency 'faraday_middleware', '~> 1.0'
27
+ s.add_dependency 'slop', '~> 4.8'
28
+ end
data/lib/httpdisk.rb ADDED
@@ -0,0 +1,12 @@
1
+ require 'httpdisk/cache_key'
2
+ require 'httpdisk/cache'
3
+ require 'httpdisk/cli_slop'
4
+ require 'httpdisk/cli'
5
+ require 'httpdisk/client'
6
+ require 'httpdisk/error'
7
+ require 'httpdisk/payload'
8
+ require 'httpdisk/version'
9
+
10
+ module HTTPDisk
11
+ ERROR_STATUS = 999
12
+ end
@@ -0,0 +1,80 @@
1
+ require 'fileutils'
2
+
3
+ module HTTPDisk
4
+ # Disk cache for cache_keys => response. Files are compressed.
5
+ class Cache
6
+ attr_reader :options
7
+
8
+ def initialize(options)
9
+ @options = options
10
+
11
+ # heavy sanity checking on arguments here
12
+ if !dir.is_a?(String)
13
+ raise ArgumentError, "expected :dir to be a string, not #{dir.inspect}"
14
+ end
15
+ if expires_in && !expires_in.is_a?(Integer)
16
+ raise ArgumentError, "expected :expires_in to be an integer, not #{expires_in.inspect}"
17
+ end
18
+ %i[force force_errors].each do
19
+ value = send(_1)
20
+ if ![ nil, true, false ].include?(value)
21
+ raise ArgumentError, "expected #{_1} to be a boolean, not #{value.inspect}"
22
+ end
23
+ end
24
+ end
25
+
26
+ %i[dir expires_in force force_errors].each do |method|
27
+ define_method(method) do
28
+ options[method]
29
+ end
30
+ end
31
+ alias force? force
32
+ alias force_errors? force_errors
33
+
34
+ # Get cached response. If there is a cached error it will be raised.
35
+ def read(cache_key)
36
+ payload_or_status = read0(cache_key)
37
+ payload_or_status.is_a?(Symbol) ? nil : payload_or_status
38
+ end
39
+
40
+ # Cache status for a cache_key, %i[error force hit miss stale]
41
+ def status(cache_key)
42
+ payload_or_status = read0(cache_key)
43
+ return payload_or_status if payload_or_status.is_a?(Symbol)
44
+ payload_or_status.error_999? ? :error : :hit
45
+ end
46
+
47
+ # Write response to the disk cache
48
+ def write(cache_key, payload)
49
+ path = diskpath(cache_key)
50
+ FileUtils.mkdir_p(File.dirname(path))
51
+ Zlib::GzipWriter.open(path) { payload.write(_1) }
52
+ end
53
+
54
+ # Relative path for this cache_key based on the cache key
55
+ def diskpath(cache_key)
56
+ File.join(dir, cache_key.diskpath)
57
+ end
58
+
59
+ protected
60
+
61
+ # low level read, returns payload or status
62
+ def read0(cache_key)
63
+ path = diskpath(cache_key)
64
+
65
+ return :miss if !File.exist?(path)
66
+ return :stale if expired?(path)
67
+ return :force if force?
68
+
69
+ payload = Zlib::GzipReader.open(path) { Payload.read(_1) }
70
+ return :force if force_errors? && payload.error_999?
71
+
72
+ payload
73
+ end
74
+
75
+ # Is this path expired?
76
+ def expired?(path)
77
+ expires_in && File.stat(path).mtime < Time.now - expires_in
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,100 @@
1
+ require 'cgi'
2
+ require 'digest/md5'
3
+ require 'uri'
4
+
5
+ module HTTPDisk
6
+ class CacheKey
7
+ attr_reader :env
8
+
9
+ def initialize(env)
10
+ @env = env
11
+
12
+ # sanity checks
13
+ raise 'http/https required' if env.url.scheme !~ /^https?$/
14
+ raise 'hostname required' if !env.url.host
15
+ end
16
+
17
+ def url
18
+ env.url
19
+ end
20
+
21
+ # Cache key (memoized)
22
+ def key
23
+ @key ||= calculate_key
24
+ end
25
+
26
+ # md5(key) (memoized)
27
+ def digest
28
+ @digest ||= Digest::MD5.hexdigest(key)
29
+ end
30
+
31
+ # Relative path for this cache key based on hostdir & digest.
32
+ def diskpath
33
+ @diskpath ||= File.join(hostdir, digest[0, 3], digest[3..])
34
+ end
35
+
36
+ def to_s
37
+ key
38
+ end
39
+
40
+ protected
41
+
42
+ # Calculate cache key for url
43
+ def calculate_key
44
+ key = []
45
+ key << env.method.upcase
46
+ key << ' '
47
+ key << url.scheme
48
+ key << '://'
49
+ key << url.host.downcase
50
+ if !default_port?
51
+ key << ':'
52
+ key << url.port
53
+ end
54
+ if url.path != '/'
55
+ key << url.path
56
+ end
57
+ if (q = url.query) && q != ''
58
+ key << '?'
59
+ key << querykey(q)
60
+ end
61
+ if env.request_body
62
+ key << ' '
63
+ key << bodykey
64
+ end
65
+ key.join
66
+ end
67
+
68
+ # Calculate cache key segment for body
69
+ def bodykey
70
+ body = env.request_body.to_s
71
+ case
72
+ when env.request_headers['Content-Type'] == 'application/x-www-form-urlencoded'
73
+ querykey(body)
74
+ when body.length < 50
75
+ body
76
+ else
77
+ Digest::MD5.hexdigest(body)
78
+ end
79
+ end
80
+
81
+ # Calculate canonical key for a query
82
+ def querykey(q)
83
+ q.split('&').sort.join('&')
84
+ end
85
+
86
+ def default_port?
87
+ url.default_port == url.port
88
+ end
89
+
90
+ # Calculate nice directory name from url.host
91
+ def hostdir
92
+ hostdir = url.host.downcase
93
+ hostdir = hostdir.gsub(/^www\./, '')
94
+ hostdir = hostdir.gsub(/[^a-z0-9._-]/, '')
95
+ hostdir = hostdir.squeeze('.')
96
+ hostdir = 'any' if hostdir.empty?
97
+ hostdir
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,218 @@
1
+ require 'faraday-cookie_jar'
2
+ require 'faraday_middleware'
3
+ require 'ostruct'
4
+
5
+ module HTTPDisk
6
+ # Command line httpdisk command.
7
+ class Cli
8
+ attr_reader :options
9
+
10
+ # for --expires
11
+ UNITS = {
12
+ s: 1,
13
+ m: 60,
14
+ h: 60 * 60,
15
+ d: 24 * 60 * 60,
16
+ w: 7 * 24 * 60 * 60,
17
+ y: 365 * 7 * 24 * 60 * 60,
18
+ }.freeze
19
+
20
+ def initialize(options)
21
+ @options = options
22
+ end
23
+
24
+ # we have a very liberal retry policy
25
+ RETRY_OPTIONS = {
26
+ methods: %w[delete get head options patch post put trace],
27
+ retry_statuses: (400..600).to_a,
28
+ retry_if: ->(_env, _err) { true },
29
+ }.freeze
30
+
31
+ # Make the request (or print status)
32
+ def run
33
+ # short circuit --status
34
+ if options[:status]
35
+ status
36
+ return
37
+ end
38
+
39
+ # create Faraday client
40
+ faraday = create_faraday
41
+
42
+ # run request
43
+ response = faraday.run_request(request_method, request_url, request_body, request_headers)
44
+ if response.status >= 400
45
+ raise CliError, "the requested URL returned error: #{response.status} #{response.reason_phrase}"
46
+ end
47
+
48
+ # output
49
+ if options[:output]
50
+ File.open(options[:output], 'w') { output(response, _1) }
51
+ else
52
+ output(response, $stdout)
53
+ end
54
+ end
55
+
56
+ def create_faraday
57
+ Faraday.new do
58
+ # connection settings
59
+ _1.proxy = proxy if options[:proxy]
60
+ _1.options.timeout = options[:max_time] if options[:max_time]
61
+
62
+ # cookie middleware
63
+ _1.use :cookie_jar
64
+
65
+ # BEFORE httpdisk so each redirect segment is cached
66
+ _1.response :follow_redirects
67
+
68
+ # httpdisk
69
+ _1.use :httpdisk, client_options
70
+
71
+ # AFTER httpdisk so transient failures are not cached
72
+ if options[:retry]
73
+ _1.request :retry, RETRY_OPTIONS.merge(max: options[:retry])
74
+ end
75
+ end
76
+ end
77
+
78
+ # Support for --status
79
+ def status
80
+ # build env
81
+ env = Faraday::Env.new.tap do
82
+ _1.method = request_method
83
+ _1.request_body = request_body
84
+ _1.request_headers = request_headers
85
+ _1.url = request_url
86
+ end
87
+
88
+ # now print status
89
+ client = HTTPDisk::Client.new(nil, client_options)
90
+ client.status(env).each do
91
+ puts "#{_1}: #{_2.inspect}"
92
+ end
93
+ end
94
+
95
+ # Output response to f
96
+ def output(response, f)
97
+ if options[:include]
98
+ f.puts "HTTPDISK #{response.status} #{response.reason_phrase}"
99
+ response.headers.each { f.puts("#{_1}: #{_2}") }
100
+ f.puts
101
+ end
102
+ f.write(response.body)
103
+ end
104
+
105
+ #
106
+ # request_XXX
107
+ #
108
+
109
+ # HTTP method (get, post, etc.)
110
+ def request_method
111
+ method = if options[:request]
112
+ options[:request]
113
+ elsif options[:data]
114
+ 'post'
115
+ end
116
+ method ||= 'get'
117
+ method = method.downcase.to_sym
118
+
119
+ if !Faraday::Connection::METHODS.include?(method)
120
+ raise CliError, "invalid --request #{method.inspect}"
121
+ end
122
+ method
123
+ end
124
+
125
+ # Request url
126
+ def request_url
127
+ url = options[:url]
128
+ # recover from missing http:
129
+ if url !~ %r{^https?://}i
130
+ if url =~ %r{^\w+://}
131
+ raise CliError, 'only http/https supported'
132
+ end
133
+ url = "http://#{url}"
134
+ end
135
+ URI.parse(url)
136
+ rescue URI::InvalidURIError
137
+ raise CliError, "invalid url #{url.inspect}"
138
+ end
139
+
140
+ # Request body
141
+ def request_body
142
+ options[:data]
143
+ end
144
+
145
+ # Request headers
146
+ def request_headers
147
+ {}.tap do |headers|
148
+ if options[:user_agent]
149
+ headers['User-Agent'] = options[:user_agent]
150
+ end
151
+
152
+ options[:header].each do |header|
153
+ key, value = header.split(': ', 2)
154
+ if !key || !value || key.empty? || value.empty?
155
+ raise CliError, "invalid --header #{header.inspect}"
156
+ end
157
+ headers[key] = value
158
+ end
159
+ end
160
+ end
161
+
162
+ #
163
+ # helpers
164
+ #
165
+
166
+ # Options to HTTPDisk::Client
167
+ def client_options
168
+ {}.tap do |client_options|
169
+ client_options[:dir] = options[:dir]
170
+ if options[:expires]
171
+ seconds = parse_expires(options[:expires])
172
+ if !seconds
173
+ raise CliError, "invalid --expires #{options[:expires].inspect}"
174
+ end
175
+ client_options[:expires_in] = seconds
176
+ end
177
+ client_options[:force] = options[:force]
178
+ client_options[:force_errors] = options[:force_errors]
179
+ end
180
+ end
181
+
182
+ # Return validated --proxy flag if present
183
+ def proxy
184
+ return if !options[:proxy]
185
+
186
+ proxy = parse_proxy(options[:proxy])
187
+ raise CliError, "--proxy should be host[:port], not #{options[:proxy].inspect}" if !proxy
188
+ proxy
189
+ end
190
+
191
+ # Parse --expires flag
192
+ def parse_expires(s)
193
+ m = s.match(/^(\d+)([smhdwy])?$/)
194
+ return if !m
195
+
196
+ num, unit = m[1].to_i, (m[2] || 's').to_sym
197
+ return if !UNITS.key?(unit)
198
+
199
+ num * UNITS[unit]
200
+ end
201
+
202
+ # Parse --proxy flag
203
+ def parse_proxy(proxy_flag)
204
+ host, port = proxy_flag.split(':', 2)
205
+ return if !host || host.empty?
206
+ return if port&.empty?
207
+
208
+ URI.parse('http://placeholder').tap do
209
+ begin
210
+ _1.host = host
211
+ _1.port = port if port
212
+ rescue URI::InvalidComponentError
213
+ return
214
+ end
215
+ end.to_s
216
+ end
217
+ end
218
+ end
@@ -0,0 +1,54 @@
1
+ # manually load dependencies here since this is loaded standalone by bin
2
+ require 'httpdisk/error'
3
+ require 'httpdisk/version'
4
+ require 'slop'
5
+
6
+ module HTTPDisk
7
+ # Slop parsing. This is broken out so we can run without require 'httpdisk'.
8
+ module CliSlop
9
+ def self.slop(args)
10
+ slop = Slop.parse(args) do |o|
11
+ o.banner = 'httpdisk [options] [url]'
12
+
13
+ # similar to curl
14
+ o.separator 'Similar to curl:'
15
+ o.string '-d', '--data', 'HTTP POST data'
16
+ o.array '-H', '--header', 'pass custom header(s) to server', delimiter: nil
17
+ o.boolean '-i', '--include', 'include response headers in the output'
18
+ o.integer '-m', '--max-time', 'maximum time allowed for the transfer'
19
+ o.string '-o', '--output', 'write to file instead of stdout'
20
+ o.string '-x', '--proxy', 'use host[:port] as proxy'
21
+ o.string '-X', '--request', 'HTTP method to use'
22
+ o.integer '--retry', 'retry request if problems occur'
23
+ o.boolean '-s', '--silent', "silent mode (don't print errors)"
24
+ o.string '-A', '--user-agent', 'send User-Agent to server'
25
+
26
+ # from httpdisk
27
+ o.separator 'Specific to httpdisk:'
28
+ o.string '--dir', 'httpdisk cache directory (defaults to ~/httpdisk)'
29
+ o.string '--expires', 'when to expire cached requests (ex: 1h, 2d, 3w)'
30
+ o.boolean '--force', "don't read anything from cache (but still write)"
31
+ o.boolean '--force-errors', "don't read errors from cache (but still write)"
32
+ o.boolean '--status', 'show status for a url in the cache'
33
+
34
+ # generic
35
+ o.boolean '--version', 'show version' do
36
+ puts "httpdisk #{HTTPDisk::VERSION}"
37
+ exit
38
+ end
39
+ o.on '--help', 'show this help' do
40
+ puts o
41
+ exit
42
+ end
43
+ end
44
+
45
+ raise Slop::Error, '' if args.empty?
46
+ raise Slop::Error, 'no URL specified' if slop.args.empty?
47
+ raise Slop::Error, 'more than one URL specified' if slop.args.length > 1
48
+
49
+ slop.to_h.tap do
50
+ _1[:url] = slop.args.first
51
+ end
52
+ end
53
+ end
54
+ end