web_fetch 0.4.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.strong_versions.yml +6 -0
- data/Makefile +11 -2
- data/README.md +25 -6
- data/Rakefile +5 -3
- data/bin/strong_versions +29 -0
- data/config/locales/en.yml +0 -3
- data/docker/Dockerfile +2 -2
- data/lib/web_fetch.rb +4 -1
- data/lib/web_fetch/client.rb +28 -29
- data/lib/web_fetch/concerns/http_helpers.rb +8 -35
- data/lib/web_fetch/gatherer.rb +65 -3
- data/lib/web_fetch/logger.rb +1 -2
- data/lib/web_fetch/promise.rb +3 -3
- data/lib/web_fetch/request.rb +8 -15
- data/lib/web_fetch/resources.rb +12 -22
- data/lib/web_fetch/response.rb +15 -10
- data/lib/web_fetch/retriever.rb +6 -25
- data/lib/web_fetch/server.rb +15 -28
- data/lib/web_fetch/storage.rb +17 -13
- data/lib/web_fetch/storage/memcached.rb +45 -0
- data/lib/web_fetch/storage/memory.rb +35 -0
- data/lib/web_fetch/storage/redis.rb +45 -0
- data/lib/web_fetch/version.rb +1 -1
- data/manifest +64 -0
- data/spec/client_spec.rb +3 -6
- data/spec/gatherer_spec.rb +38 -21
- data/spec/promise_spec.rb +49 -11
- data/spec/resources_spec.rb +14 -17
- data/spec/response_spec.rb +13 -9
- data/spec/retriever_spec.rb +16 -17
- data/spec/router_spec.rb +6 -4
- data/spec/spec_helper.rb +15 -7
- data/spec/storage/memcached_spec.rb +27 -0
- data/spec/storage/memory_spec.rb +5 -0
- data/spec/storage/redis_spec.rb +27 -0
- data/spec/storage/shared_examples.rb +27 -0
- data/web_fetch.gemspec +9 -5
- metadata +75 -11
- data/lib/web_fetch/concerns/event_machine_helpers.rb +0 -57
- data/spec/storage_spec.rb +0 -27
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3ffbcc60483ca651ca9e7ca3a4ebf945bebb26e7e464ad0dbb6fde65aadd9772
|
4
|
+
data.tar.gz: 809d8962eb85f784844d38e8f57f30fd33e21c393bc71765ad36181f6ebd0d04
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 62186119c071a4345ad28dae952084b982327de94b5dbbaa8e8f01982e5e3a88278c877780afd4385b827a6c316d280a939476adb34e43a003d0d64844dd7cb8
|
7
|
+
data.tar.gz: 2f116096a7b94477abfefdf2dfa3ad64ae69898791f58a6010689c248d98298c4511b23ea252ff5a0a9833bb445bb5bd1cd739a7564bc7163623120b678e17d7
|
data/.gitignore
CHANGED
data/Makefile
CHANGED
@@ -1,6 +1,15 @@
|
|
1
|
-
.PHONY: docker
|
1
|
+
.PHONY: docker manifest
|
2
2
|
|
3
|
+
manifest:
|
4
|
+
git ls-files > manifest
|
5
|
+
|
6
|
+
test:
|
7
|
+
bin/rspec
|
8
|
+
bin/rubocop
|
9
|
+
bin/strong_versions
|
10
|
+
|
11
|
+
docker: version := $(shell bundle exec ruby -e "require 'web_fetch'; puts WebFetch::VERSION")
|
3
12
|
docker:
|
4
13
|
mkdir -p docker/.build
|
5
14
|
git archive --format tar.gz -o docker/.build/web_fetch.tar.gz master
|
6
|
-
docker build -t webfetch/webfetch docker
|
15
|
+
docker build -t webfetch/webfetch:${version} docker
|
data/README.md
CHANGED
@@ -18,7 +18,7 @@ This permits issuing multiple HTTP requests in parallel, in a fully encapsulated
|
|
18
18
|
In your `Gemfile`, add:
|
19
19
|
|
20
20
|
``` ruby
|
21
|
-
gem 'web_fetch'
|
21
|
+
gem 'web_fetch', '~> 0.5.0'
|
22
22
|
```
|
23
23
|
|
24
24
|
and update your bundle:
|
@@ -33,10 +33,31 @@ Require WebFetch in your application:
|
|
33
33
|
require 'web_fetch'
|
34
34
|
```
|
35
35
|
|
36
|
+
### Memcached
|
37
|
+
|
38
|
+
It is highly recommended to use _Memcached_ or _Redis_ as a back end to _WebFetch_ in production.
|
39
|
+
|
40
|
+
The following environment variables can be used to select and configure the back end:
|
41
|
+
|
42
|
+
```bash
|
43
|
+
# Memcached
|
44
|
+
WEB_FETCH_BACK_END=memcached
|
45
|
+
WEB_FETCH_MEMCACHED_HOST=localhost
|
46
|
+
WEB_FETCH_MEMCACHED_PORT=11211
|
47
|
+
WEB_FETCH_MEMCACHED_TTL=60
|
48
|
+
|
49
|
+
# Redis
|
50
|
+
WEB_FETCH_BACK_END=redis
|
51
|
+
WEB_FETCH_REDIS_HOST=localhost
|
52
|
+
WEB_FETCH_REDIS_PORT=6379
|
53
|
+
WEB_FETCH_REDIS_TTL=60
|
54
|
+
```
|
55
|
+
|
56
|
+
Note that _WebFetch_ is intended to function as a fast proxy server so a low TTL (default 60 seconds) is recommended. With a _Memcached_ back end multiple instances of _WebFetch_ can operate on the same memory store. This provides good options for containerisation, scaling, and high availability.
|
57
|
+
|
36
58
|
### Launch or connect to a server
|
37
59
|
|
38
|
-
Launch the server from your application (recommended for familiarising yourself
|
39
|
-
with WebFetch):
|
60
|
+
Launch the server from your application (recommended for familiarising yourself with WebFetch):
|
40
61
|
|
41
62
|
``` ruby
|
42
63
|
client = WebFetch::Client.create('localhost', 8077)
|
@@ -105,7 +126,7 @@ response.response_time
|
|
105
126
|
response.request # The original request, provided as a `WebFetch::Request` object
|
106
127
|
```
|
107
128
|
|
108
|
-
Note that `
|
129
|
+
Note that `WebFetch::Promise#fetch` will block until the response is complete by default. If you want to continue executing other code if the response is not ready (e.g. to see if any other responses are ready), you can pass `wait: false`
|
109
130
|
|
110
131
|
``` ruby
|
111
132
|
response = promises.first.fetch(wait: false)
|
@@ -198,8 +219,6 @@ web_fetch_control run -- --port 8000 --host 0.0.0.0
|
|
198
219
|
|
199
220
|
No pid file will be created unless the `--pidfile` parameter is passed. It is recommended to use a process monitoring tool (e.g. `monit` or `systemd`) to monitor the WebFetch process.
|
200
221
|
|
201
|
-
When running as a daemon, WebFetch will log to the null device so it is advised to always pass `--log` in this case.
|
202
|
-
|
203
222
|
## Docker
|
204
223
|
|
205
224
|
To use WebFetch in Docker you can either use the provided [`Dockerfile`](docker/Dockerfile) or the public image [`web_fetch/web_fetch`](https://hub.docker.com/r/webfetch/webfetch/)
|
data/Rakefile
CHANGED
data/bin/strong_versions
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# This file was generated by Bundler.
|
6
|
+
#
|
7
|
+
# The application 'strong_versions' is installed as part of a gem, and
|
8
|
+
# this file is here to facilitate running it.
|
9
|
+
#
|
10
|
+
|
11
|
+
require "pathname"
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
+
Pathname.new(__FILE__).realpath)
|
14
|
+
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
16
|
+
|
17
|
+
if File.file?(bundle_binstub)
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
19
|
+
load(bundle_binstub)
|
20
|
+
else
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require "rubygems"
|
27
|
+
require "bundler/setup"
|
28
|
+
|
29
|
+
load Gem.bin_path("strong_versions", "strong_versions")
|
data/config/locales/en.yml
CHANGED
@@ -8,9 +8,6 @@ en:
|
|
8
8
|
hash_or_uid_but_not_both: "Cannot retrieve by both `uid` and `hash`"
|
9
9
|
missing_hash_and_uid: "Must pass either `uid` or `hash` to retrieve"
|
10
10
|
|
11
|
-
uid_not_found: "Provided `uid` has not yet been requested"
|
12
|
-
hash_not_found: "Provided `hash` has not yet been requested"
|
13
|
-
|
14
11
|
pending: "Your request is still being processed"
|
15
12
|
|
16
13
|
no_request: "No active request found for UID: %{uid}"
|
data/docker/Dockerfile
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
FROM library/ruby
|
1
|
+
FROM library/ruby:2.5.3
|
2
2
|
ADD .build/web_fetch.tar.gz .
|
3
3
|
WORKDIR web_fetch
|
4
4
|
RUN bundle && bundle exec rake install
|
5
|
-
CMD ["web_fetch_control", "run", "--", "--port", "8077", "--host", "0.0.0.0"]
|
5
|
+
CMD ["bundle", "exec", "web_fetch_control", "run", "--", "--port", "8077", "--host", "0.0.0.0"]
|
data/lib/web_fetch.rb
CHANGED
@@ -6,6 +6,7 @@ require 'em-http'
|
|
6
6
|
require 'em-logger'
|
7
7
|
require 'i18n'
|
8
8
|
require 'logger'
|
9
|
+
require 'base64'
|
9
10
|
require 'json'
|
10
11
|
require 'digest'
|
11
12
|
require 'securerandom'
|
@@ -24,9 +25,11 @@ unless Gem.loaded_specs.key?('rails')
|
|
24
25
|
I18n.config.available_locales = :en
|
25
26
|
end
|
26
27
|
|
28
|
+
module WebFetch
|
29
|
+
end
|
30
|
+
|
27
31
|
require 'web_fetch/logger'
|
28
32
|
require 'web_fetch/helpers'
|
29
|
-
require 'web_fetch/concerns/event_machine_helpers'
|
30
33
|
require 'web_fetch/concerns/http_helpers'
|
31
34
|
require 'web_fetch/concerns/validatable'
|
32
35
|
require 'web_fetch/concerns/client_http'
|
data/lib/web_fetch/client.rb
CHANGED
@@ -48,30 +48,25 @@ module WebFetch
|
|
48
48
|
handle_error(JSON.parse(response.body)['error']) unless response.success?
|
49
49
|
|
50
50
|
requests = JSON.parse(response.body, symbolize_names: true)[:requests]
|
51
|
+
|
51
52
|
promises(requests)
|
52
53
|
end
|
53
54
|
|
54
55
|
def fetch(uid, options = {})
|
55
56
|
block = options.fetch(:wait, true)
|
56
57
|
|
57
|
-
outcome = block ?
|
58
|
+
outcome = block ? fetch_blocking(uid) : fetch_nonblocking(uid)
|
58
59
|
no_request_error(uid) if outcome.nil?
|
59
60
|
|
60
|
-
|
61
|
+
Response.new(outcome.merge(uid: uid))
|
61
62
|
end
|
62
63
|
|
63
64
|
def retrieve_by_uid(uid)
|
64
|
-
|
65
|
-
return nil unless response.success?
|
66
|
-
|
67
|
-
JSON.parse(response.body, symbolize_names: true)
|
65
|
+
fetch_blocking(uid)[:request]
|
68
66
|
end
|
69
67
|
|
70
68
|
def find_by_uid(uid)
|
71
|
-
|
72
|
-
return nil unless response.success?
|
73
|
-
|
74
|
-
JSON.parse(response.body, symbolize_names: true)
|
69
|
+
fetch_nonblocking(uid)[:request]
|
75
70
|
end
|
76
71
|
|
77
72
|
class << self
|
@@ -94,6 +89,29 @@ module WebFetch
|
|
94
89
|
|
95
90
|
private
|
96
91
|
|
92
|
+
def decode_response(response)
|
93
|
+
return response unless response[:command] == 'retrieve'
|
94
|
+
|
95
|
+
response[:request][:response][:body] = Base64.decode64(
|
96
|
+
response[:request][:response][:body]
|
97
|
+
)
|
98
|
+
response
|
99
|
+
end
|
100
|
+
|
101
|
+
def fetch_blocking(uid)
|
102
|
+
response = get("retrieve/#{uid}")
|
103
|
+
return nil unless response.success?
|
104
|
+
|
105
|
+
decode_response(JSON.parse(response.body, symbolize_names: true))
|
106
|
+
end
|
107
|
+
|
108
|
+
def fetch_nonblocking(uid)
|
109
|
+
response = get("find/#{uid}")
|
110
|
+
return nil unless response.success?
|
111
|
+
|
112
|
+
decode_response(JSON.parse(response.body, symbolize_names: true))
|
113
|
+
end
|
114
|
+
|
97
115
|
def handle_error(error)
|
98
116
|
raise WebFetch::ClientError, error
|
99
117
|
end
|
@@ -102,25 +120,6 @@ module WebFetch
|
|
102
120
|
raise RequestNotFoundError, [I18n.t('no_request', uid: uid)]
|
103
121
|
end
|
104
122
|
|
105
|
-
def new_response(outcome)
|
106
|
-
response = outcome[:response] || {}
|
107
|
-
# FIXME: This is sort-of duplicated from `Promise#new_response` but we
|
108
|
-
# build it very slightly differently. This means we have to update in
|
109
|
-
# both places if we change the structure. Not quite sure how to unify
|
110
|
-
# this and ensure the same structure in both places.
|
111
|
-
Response.new(
|
112
|
-
pending: outcome[:pending],
|
113
|
-
body: response[:body],
|
114
|
-
headers: response[:headers],
|
115
|
-
status: response[:status],
|
116
|
-
success: response[:success],
|
117
|
-
error: response[:error],
|
118
|
-
uid: outcome[:uid],
|
119
|
-
response_time: response[:response_time],
|
120
|
-
request: outcome[:request]
|
121
|
-
)
|
122
|
-
end
|
123
|
-
|
124
123
|
def promises(requests)
|
125
124
|
requests.map do |request|
|
126
125
|
Promise.new(self, uid: request[:uid], request: request[:request])
|
@@ -9,10 +9,10 @@ module WebFetch
|
|
9
9
|
response.send_response
|
10
10
|
end
|
11
11
|
|
12
|
-
def pending(
|
12
|
+
def pending(uid, response)
|
13
13
|
respond_immediately({
|
14
14
|
payload: {
|
15
|
-
uid:
|
15
|
+
uid: uid,
|
16
16
|
pending: true,
|
17
17
|
message: I18n.t(:pending)
|
18
18
|
}
|
@@ -45,45 +45,18 @@ module WebFetch
|
|
45
45
|
JSON.parse(@http_post_content, symbolize_names: true)
|
46
46
|
end
|
47
47
|
|
48
|
-
def succeed(
|
48
|
+
def succeed(resource, response)
|
49
49
|
response.status = 200
|
50
|
-
response.content = compress(JSON.dump(
|
50
|
+
response.content = compress(JSON.dump(resource))
|
51
|
+
storage.delete(resource[:request][:uid])
|
51
52
|
response.send_response
|
52
|
-
storage.delete(request[:uid])
|
53
53
|
end
|
54
54
|
|
55
|
-
def
|
56
|
-
result = request[:deferred]
|
57
|
-
{ response: {
|
58
|
-
success: true,
|
59
|
-
body: result.response,
|
60
|
-
headers: result.headers,
|
61
|
-
status: result.response_header.status,
|
62
|
-
response_time: request[:response_time]
|
63
|
-
},
|
64
|
-
request: request[:request],
|
65
|
-
uid: request[:uid] }
|
66
|
-
end
|
67
|
-
|
68
|
-
def fail_(request, response)
|
55
|
+
def fail_(resource, response)
|
69
56
|
response.status = 200
|
70
|
-
response.content = compress(JSON.dump(
|
57
|
+
response.content = compress(JSON.dump(resource))
|
58
|
+
storage.delete(resource[:request][:uid])
|
71
59
|
response.send_response
|
72
|
-
storage.delete(request[:uid])
|
73
|
-
end
|
74
|
-
|
75
|
-
def failure(request)
|
76
|
-
result = request[:deferred]
|
77
|
-
{ response: {
|
78
|
-
success: false,
|
79
|
-
body: result.response,
|
80
|
-
headers: result.headers,
|
81
|
-
status: result.response_header.status,
|
82
|
-
response_time: request[:response_time],
|
83
|
-
error: (result.error&.inspect)
|
84
|
-
},
|
85
|
-
request: request[:request],
|
86
|
-
uid: request[:uid] }
|
87
60
|
end
|
88
61
|
|
89
62
|
def accept_gzip?
|
data/lib/web_fetch/gatherer.rb
CHANGED
@@ -8,19 +8,59 @@ module WebFetch
|
|
8
8
|
|
9
9
|
HASHABLE_KEYS = %i[url query_string headers method].freeze
|
10
10
|
|
11
|
-
def initialize(
|
11
|
+
def initialize(storage, params, logger = Logger, http = EM::HttpRequest)
|
12
12
|
@requests = params[:requests]
|
13
|
-
@
|
13
|
+
@storage = storage
|
14
|
+
@logger = logger
|
15
|
+
@http = http
|
14
16
|
end
|
15
17
|
|
16
18
|
def start
|
17
19
|
tagged = { requests: tag_requests }
|
18
|
-
|
20
|
+
gather(tagged[:requests])
|
19
21
|
tagged
|
20
22
|
end
|
21
23
|
|
22
24
|
private
|
23
25
|
|
26
|
+
def gather(targets)
|
27
|
+
targets.each do |target|
|
28
|
+
uid = target[:uid]
|
29
|
+
@logger.debug("Initialising async for uid: #{uid}")
|
30
|
+
deferred = request_async(target)
|
31
|
+
request = { uid: uid, start_time: target[:start_time],
|
32
|
+
request: target[:request] }
|
33
|
+
apply_callbacks(request, deferred)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def apply_callbacks(request, deferred)
|
38
|
+
uid = request[:uid]
|
39
|
+
deferred.callback do
|
40
|
+
@logger.debug("HTTP fetch successful for uid: #{uid}")
|
41
|
+
@storage.store(uid, response(request, deferred, success: true))
|
42
|
+
end
|
43
|
+
|
44
|
+
deferred.errback do
|
45
|
+
@logger.debug("HTTP fetch failure for uid: #{uid}")
|
46
|
+
@storage.store(uid, response(request, deferred, success: false))
|
47
|
+
end
|
48
|
+
|
49
|
+
@logger.debug("HTTP fetch started for uid: #{uid}")
|
50
|
+
end
|
51
|
+
|
52
|
+
def request_async(target)
|
53
|
+
request = target[:request]
|
54
|
+
target[:start_time] = Time.now.utc
|
55
|
+
async_request = @http.new(request[:url])
|
56
|
+
method = request.fetch(:method, 'GET').downcase.to_sym
|
57
|
+
async_request.public_send(
|
58
|
+
method, head: request[:headers],
|
59
|
+
query: request.fetch(:query, {}),
|
60
|
+
body: request.fetch(:body, nil)
|
61
|
+
)
|
62
|
+
end
|
63
|
+
|
24
64
|
def validate
|
25
65
|
error(:requests_missing) if requests_missing?
|
26
66
|
error(:requests_not_array) if requests_not_array?
|
@@ -58,5 +98,27 @@ module WebFetch
|
|
58
98
|
def uid
|
59
99
|
SecureRandom.uuid
|
60
100
|
end
|
101
|
+
|
102
|
+
def response_time(request)
|
103
|
+
Time.now.utc - request[:start_time]
|
104
|
+
end
|
105
|
+
|
106
|
+
def response(request, result, options = {})
|
107
|
+
{
|
108
|
+
response: result(request, result, options.fetch(:success)),
|
109
|
+
request: request,
|
110
|
+
uid: request[:uid]
|
111
|
+
}
|
112
|
+
end
|
113
|
+
|
114
|
+
def result(request, result, success)
|
115
|
+
{
|
116
|
+
success: success,
|
117
|
+
body: Base64.encode64(result.response),
|
118
|
+
headers: result.headers,
|
119
|
+
status: result.response_header.status,
|
120
|
+
response_time: response_time(request)
|
121
|
+
}.merge(success ? {} : { error: (result.error&.inspect) })
|
122
|
+
end
|
61
123
|
end
|
62
124
|
end
|
data/lib/web_fetch/logger.rb
CHANGED