web_fetch 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.rspec +3 -0
- data/.rubocop.yml +10 -0
- data/.ruby-version +1 -0
- data/Gemfile +5 -0
- data/Gemfile.lock +120 -0
- data/LICENSE +7 -0
- data/README.md +149 -0
- data/TODO +0 -0
- data/bin/rspec +29 -0
- data/bin/rubocop +29 -0
- data/bin/web_fetch_control +6 -0
- data/bin/web_fetch_server +30 -0
- data/config/locales/en.yml +12 -0
- data/doc/client_example.rb +19 -0
- data/doc/web_fetch_architecture.png +0 -0
- data/lib/web_fetch/client.rb +101 -0
- data/lib/web_fetch/concerns/http_helpers.rb +64 -0
- data/lib/web_fetch/concerns/validatable.rb +31 -0
- data/lib/web_fetch/event_machine_helpers.rb +36 -0
- data/lib/web_fetch/gatherer.rb +62 -0
- data/lib/web_fetch/helpers.rb +11 -0
- data/lib/web_fetch/http_helpers.rb +71 -0
- data/lib/web_fetch/logger.rb +29 -0
- data/lib/web_fetch/resources.rb +59 -0
- data/lib/web_fetch/retriever.rb +39 -0
- data/lib/web_fetch/router.rb +71 -0
- data/lib/web_fetch/server.rb +49 -0
- data/lib/web_fetch/storage.rb +16 -0
- data/lib/web_fetch/version.rb +5 -0
- data/lib/web_fetch.rb +40 -0
- data/spec/client_spec.rb +63 -0
- data/spec/concerns/validatable_spec.rb +53 -0
- data/spec/features/http_fetching_spec.rb +0 -0
- data/spec/gatherer_spec.rb +109 -0
- data/spec/helpers_spec.rb +18 -0
- data/spec/i18n_spec.rb +8 -0
- data/spec/resources_spec.rb +42 -0
- data/spec/retriever_spec.rb +68 -0
- data/spec/router_spec.rb +43 -0
- data/spec/server_spec.rb +96 -0
- data/spec/spec_helper.rb +55 -0
- data/spec/storage_spec.rb +24 -0
- data/swagger.yaml +115 -0
- data/web_fetch.gemspec +41 -0
- metadata +314 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: dc2d09b5ad2253bdc730796a48ce5c6d08adacf91bee689aeb6dc50602199c63
|
4
|
+
data.tar.gz: 9ec7709c430d9a9b64f1de87301ee674f85f91412f2f2d108fba5a6179d031c8
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 7120f22d17fda9952b486b894315068504a99124e6a048908a0fc233c309edca4a4edaad5980e03166bf01f59bcaac415fda56b461f467164fc216272fbbfc7e
|
7
|
+
data.tar.gz: 2f9c1cdee57f1c9ccf219c619b78f4fa8d3fd6dbb058d9e8374fd41c5b7b74c10a99e173d6bda8373e5e181694793f8eab763c378ff6fc15cb25728115c77782
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.rubocop.yml
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.5.0
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,120 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
web_fetch (0.1.0)
|
5
|
+
activesupport (~> 4.0)
|
6
|
+
childprocess (~> 0.5)
|
7
|
+
daemons (~> 1.2)
|
8
|
+
em-http-request (~> 1.1)
|
9
|
+
em-logger (~> 0.1)
|
10
|
+
eventmachine (~> 1.0)
|
11
|
+
eventmachine_httpserver (~> 0.2)
|
12
|
+
faraday (~> 0.9)
|
13
|
+
hanami-router (~> 0.7)
|
14
|
+
hanami-utils (= 0.8.0)
|
15
|
+
i18n (~> 0.7)
|
16
|
+
rack (~> 1.6)
|
17
|
+
|
18
|
+
GEM
|
19
|
+
remote: https://rubygems.org/
|
20
|
+
specs:
|
21
|
+
activesupport (4.2.10)
|
22
|
+
i18n (~> 0.7)
|
23
|
+
minitest (~> 5.1)
|
24
|
+
thread_safe (~> 0.3, >= 0.3.4)
|
25
|
+
tzinfo (~> 1.1)
|
26
|
+
addressable (2.5.2)
|
27
|
+
public_suffix (>= 2.0.2, < 4.0)
|
28
|
+
ast (2.4.0)
|
29
|
+
byebug (9.1.0)
|
30
|
+
childprocess (0.9.0)
|
31
|
+
ffi (~> 1.0, >= 1.0.11)
|
32
|
+
concurrent-ruby (1.0.5)
|
33
|
+
cookiejar (0.3.3)
|
34
|
+
crack (0.4.3)
|
35
|
+
safe_yaml (~> 1.0.0)
|
36
|
+
daemons (1.2.6)
|
37
|
+
diff-lcs (1.3)
|
38
|
+
em-http-request (1.1.5)
|
39
|
+
addressable (>= 2.3.4)
|
40
|
+
cookiejar (!= 0.3.1)
|
41
|
+
em-socksify (>= 0.3)
|
42
|
+
eventmachine (>= 1.0.3)
|
43
|
+
http_parser.rb (>= 0.6.0)
|
44
|
+
em-logger (0.1.0)
|
45
|
+
eventmachine (>= 0.12.10)
|
46
|
+
em-socksify (0.3.2)
|
47
|
+
eventmachine (>= 1.0.0.beta.4)
|
48
|
+
eventmachine (1.2.7)
|
49
|
+
eventmachine_httpserver (0.2.1)
|
50
|
+
faraday (0.15.3)
|
51
|
+
multipart-post (>= 1.2, < 3)
|
52
|
+
ffi (1.9.25)
|
53
|
+
hanami-router (0.7.0)
|
54
|
+
hanami-utils (~> 0.8)
|
55
|
+
http_router (~> 0.11)
|
56
|
+
rack (~> 1.6)
|
57
|
+
hanami-utils (0.8.0)
|
58
|
+
hashdiff (0.3.7)
|
59
|
+
http_parser.rb (0.6.0)
|
60
|
+
http_router (0.11.2)
|
61
|
+
rack (>= 1.0.0)
|
62
|
+
url_mount (~> 0.2.1)
|
63
|
+
i18n (0.9.5)
|
64
|
+
concurrent-ruby (~> 1.0)
|
65
|
+
jaro_winkler (1.5.1)
|
66
|
+
minitest (5.11.3)
|
67
|
+
multipart-post (2.0.0)
|
68
|
+
parallel (1.12.1)
|
69
|
+
parser (2.5.1.2)
|
70
|
+
ast (~> 2.4.0)
|
71
|
+
powerpack (0.1.2)
|
72
|
+
public_suffix (3.0.3)
|
73
|
+
rack (1.6.10)
|
74
|
+
rainbow (3.0.0)
|
75
|
+
rspec (3.8.0)
|
76
|
+
rspec-core (~> 3.8.0)
|
77
|
+
rspec-expectations (~> 3.8.0)
|
78
|
+
rspec-mocks (~> 3.8.0)
|
79
|
+
rspec-core (3.8.0)
|
80
|
+
rspec-support (~> 3.8.0)
|
81
|
+
rspec-expectations (3.8.1)
|
82
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
83
|
+
rspec-support (~> 3.8.0)
|
84
|
+
rspec-mocks (3.8.0)
|
85
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
86
|
+
rspec-support (~> 3.8.0)
|
87
|
+
rspec-support (3.8.0)
|
88
|
+
rubocop (0.59.2)
|
89
|
+
jaro_winkler (~> 1.5.1)
|
90
|
+
parallel (~> 1.10)
|
91
|
+
parser (>= 2.5, != 2.5.1.1)
|
92
|
+
powerpack (~> 0.1)
|
93
|
+
rainbow (>= 2.2.2, < 4.0)
|
94
|
+
ruby-progressbar (~> 1.7)
|
95
|
+
unicode-display_width (~> 1.0, >= 1.0.1)
|
96
|
+
ruby-progressbar (1.10.0)
|
97
|
+
safe_yaml (1.0.4)
|
98
|
+
thread_safe (0.3.6)
|
99
|
+
tzinfo (1.2.5)
|
100
|
+
thread_safe (~> 0.1)
|
101
|
+
unicode-display_width (1.4.0)
|
102
|
+
url_mount (0.2.1)
|
103
|
+
rack
|
104
|
+
webmock (3.4.2)
|
105
|
+
addressable (>= 2.3.6)
|
106
|
+
crack (>= 0.3.2)
|
107
|
+
hashdiff
|
108
|
+
|
109
|
+
PLATFORMS
|
110
|
+
ruby
|
111
|
+
|
112
|
+
DEPENDENCIES
|
113
|
+
byebug (~> 9.0)
|
114
|
+
rspec (~> 3.5)
|
115
|
+
rubocop (~> 0.59.2)
|
116
|
+
web_fetch!
|
117
|
+
webmock (~> 3.4)
|
118
|
+
|
119
|
+
BUNDLED WITH
|
120
|
+
1.16.5
|
data/LICENSE
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
Copyright 2018 Robert Farrell
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
4
|
+
|
5
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
6
|
+
|
7
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,149 @@
|
|
1
|
+
# WebFetch
|
2
|
+
|
3
|
+
## Overview
|
4
|
+
|
5
|
+
WebFetch is an asynchronous HTTP proxy server that accepts multiple requests for HTTP retrieval, immediately returning a token for each request, and then allowing that token to be redeemed later when the entity has fully responded.
|
6
|
+
|
7
|
+
This permits issuing multiple HTTP requests in parallel, in a fully encapsulated and external process, without having to resort to multi-threading, multi-processing, or complex non-blocking IO implementations. [EventMachine][1] is used to handle the heavy lifting.
|
8
|
+
|
9
|
+
![WebFetch architecture][2]
|
10
|
+
|
11
|
+
## Getting Started
|
12
|
+
|
13
|
+
Although WebFetch runs as a web server and provides all functionality over a RESTful API (see below), the simplest way to use it is with its Ruby client implementation, which wraps the HTTP API for you, using [Faraday][3]. This also serves as a [reference][4] for writing WebFetch clients in other languages.
|
14
|
+
|
15
|
+
In your `Gemfile`, add:
|
16
|
+
|
17
|
+
``` ruby
|
18
|
+
gem 'web_fetch', git: 'https://github.com/bobf/web_fetch.git'
|
19
|
+
```
|
20
|
+
|
21
|
+
and update your bundle:
|
22
|
+
|
23
|
+
``` ruby
|
24
|
+
bundle install
|
25
|
+
```
|
26
|
+
|
27
|
+
Create, connect to, and wrap a Ruby client object around a new WebFetch server instance, listening as `localhost` on port `8077`:
|
28
|
+
|
29
|
+
``` ruby
|
30
|
+
require 'web_fetch'
|
31
|
+
client = WebFetch::Client.create('localhost', 8077)
|
32
|
+
```
|
33
|
+
|
34
|
+
Issue some requests [asynchronously]:
|
35
|
+
|
36
|
+
``` ruby
|
37
|
+
requests = [{ url: 'http://foobar.baz/' },
|
38
|
+
{ url: 'http://barfoo.baz/foobar',
|
39
|
+
headers: { 'User-Agent' => 'Foo Browser' } },
|
40
|
+
query: { foo: 'what is foo', bar: 'what is baz' } ]
|
41
|
+
jobs = client.gather(requests)
|
42
|
+
```
|
43
|
+
|
44
|
+
Retrieve the responses [synchronously - *any result that has not yet arrived will block until it has arrived while other requests continue to run in parallel*]:
|
45
|
+
|
46
|
+
``` ruby
|
47
|
+
responses = []
|
48
|
+
jobs.each do |job|
|
49
|
+
response = client.retrieve_by_uid(job[:uid])
|
50
|
+
responses.push(response)
|
51
|
+
end
|
52
|
+
```
|
53
|
+
|
54
|
+
See [a working example][5]
|
55
|
+
|
56
|
+
## HTTP API
|
57
|
+
|
58
|
+
If you need to use the WebFetch server's HTTP API directly refer to the [Swagger API Reference][6]
|
59
|
+
|
60
|
+
## Managing the WebFetch process yourself
|
61
|
+
|
62
|
+
You may want to run the WebFetch server yourself rather than instantiate it via the client. For this case, the executable `bin/web_fetch_control` is provided.
|
63
|
+
|
64
|
+
WebFetch can be started in the terminal with output going to STDOUT or as a daemon.
|
65
|
+
|
66
|
+
Run the server as a daemon:
|
67
|
+
|
68
|
+
```
|
69
|
+
$ bundle exec bin/web_fetch_control start -- --log /tmp/web_fetch.log
|
70
|
+
```
|
71
|
+
|
72
|
+
**Note that you should always pass `--log` when running as a daemon otherwise all output will go to the null device.**
|
73
|
+
|
74
|
+
Run the server in the terminal:
|
75
|
+
|
76
|
+
```
|
77
|
+
$ bundle exec bin/web_fetch_control run -- --port 8080
|
78
|
+
```
|
79
|
+
|
80
|
+
It is further recommended to use a process management tool to monitor the pidfile (pass `--pidfile /path/to/file.pid` to specify an explicit location).
|
81
|
+
|
82
|
+
To connect to an existing process, use `WebFetch::Client.new` rather than `WebFetch::Client.create`. For example:
|
83
|
+
|
84
|
+
``` ruby
|
85
|
+
WebFetch::Client.new('localhost', 8087)
|
86
|
+
```
|
87
|
+
|
88
|
+
## WebFetch Client request options
|
89
|
+
|
90
|
+
`WebFetch::Client#gather` accepts an array of hashes which may contain the following parameters:
|
91
|
+
|
92
|
+
* `url`: The target URL [string]
|
93
|
+
* `headers`: HTTP headers [hash]
|
94
|
+
* `query`: Query parameters [hash]
|
95
|
+
* `method`: HTTP method (default: `"GET"`) [string]
|
96
|
+
* `body`: HTTP body [string]
|
97
|
+
|
98
|
+
These parameters will all be used (where provided) when initiating the HTTP request on the target.
|
99
|
+
|
100
|
+
Arbitrary parameters can also be passed and will be returned by `#gather` (though they will not be used to construct the HTTP request). This allows tagging requests with arbitrary information if you need to identify them in a particular way. For example, you may want to generate your own unique identifier for a request, in which case you could do:
|
101
|
+
|
102
|
+
``` ruby
|
103
|
+
client.gather([{ url: 'http://foobar.baz', my_unique_id: '123-456-789' }])
|
104
|
+
# [{:request=>{:url=>"http://foobar.baz", :my_unique_id=>"123-456-789"}, :hash=>"7c511911d16e1072363fa1653bdd93df65208901", :uid=>"1fb4ee7a-9fc0-4896-9af2-7cbdf234a468"}]
|
105
|
+
```
|
106
|
+
|
107
|
+
## Logging
|
108
|
+
|
109
|
+
WebFetch logs to STDOUT by default. An alternative log file can be set either
|
110
|
+
by passing `--log /path/to/logfile` to the command line server, or by passing
|
111
|
+
`log: '/path/to/logfile'` to `WebFetch::Client.create`:
|
112
|
+
|
113
|
+
```
|
114
|
+
$ bundle exec bin/web_fetch_server --log /tmp/web_fetch.log
|
115
|
+
```
|
116
|
+
|
117
|
+
```
|
118
|
+
client = WebFetch::Client.create('localhost', 8077, log: '/tmp/web_fetch.log')
|
119
|
+
```
|
120
|
+
|
121
|
+
## Contributing
|
122
|
+
|
123
|
+
WebFetch uses `rspec` for testing:
|
124
|
+
|
125
|
+
```
|
126
|
+
bin/rspec
|
127
|
+
```
|
128
|
+
|
129
|
+
Rubocop is used for code style governance:
|
130
|
+
|
131
|
+
```
|
132
|
+
bin/rubocop
|
133
|
+
```
|
134
|
+
|
135
|
+
Make sure that any new code you write has an appropriate test and that all Rubocop checks pass.
|
136
|
+
|
137
|
+
Feel free to fork and create a pull request if you would like to make any changes.
|
138
|
+
|
139
|
+
## License
|
140
|
+
|
141
|
+
WebFetch is licensed under the [MIT License][7]. You are encouraged to re-use the code in any way you see fit as long as you give credit to the original author. If you do use the code for any other projects then feel free to let me know but, of course, this is not required.
|
142
|
+
|
143
|
+
[1]: https://github.com/eventmachine/eventmachine
|
144
|
+
[2]: doc/web_fetch_architecture.png
|
145
|
+
[3]: https://github.com/lostisland/faraday
|
146
|
+
[4]: lib/web_fetch/client.rb
|
147
|
+
[5]: doc/client_example.rb
|
148
|
+
[6]: swagger.yaml
|
149
|
+
[7]: LICENSE
|
data/TODO
ADDED
File without changes
|
data/bin/rspec
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# This file was generated by Bundler.
|
6
|
+
#
|
7
|
+
# The application 'rspec' is installed as part of a gem, and
|
8
|
+
# this file is here to facilitate running it.
|
9
|
+
#
|
10
|
+
|
11
|
+
require "pathname"
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
+
Pathname.new(__FILE__).realpath)
|
14
|
+
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
16
|
+
|
17
|
+
if File.file?(bundle_binstub)
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
19
|
+
load(bundle_binstub)
|
20
|
+
else
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require "rubygems"
|
27
|
+
require "bundler/setup"
|
28
|
+
|
29
|
+
load Gem.bin_path("rspec-core", "rspec")
|
data/bin/rubocop
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# This file was generated by Bundler.
|
6
|
+
#
|
7
|
+
# The application 'rubocop' is installed as part of a gem, and
|
8
|
+
# this file is here to facilitate running it.
|
9
|
+
#
|
10
|
+
|
11
|
+
require "pathname"
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
+
Pathname.new(__FILE__).realpath)
|
14
|
+
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
16
|
+
|
17
|
+
if File.file?(bundle_binstub)
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
19
|
+
load(bundle_binstub)
|
20
|
+
else
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require "rubygems"
|
27
|
+
require "bundler/setup"
|
28
|
+
|
29
|
+
load Gem.bin_path("rubocop", "rubocop")
|
@@ -0,0 +1,30 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
|
5
|
+
options = { host: 'localhost', port: '60089' }
|
6
|
+
OptionParser.new do |opt|
|
7
|
+
opt.on('--host localhost') { |o| options[:host] = o }
|
8
|
+
opt.on('--port 60089') { |o| options[:port] = o }
|
9
|
+
opt.on('--pidfile tmp/web_fetch.pid') { |o| options[:pidfile] = o }
|
10
|
+
opt.on('--log web_fetch.log') { |o| options[:log] = o }
|
11
|
+
end.parse!
|
12
|
+
|
13
|
+
require 'web_fetch'
|
14
|
+
WebFetch::Logger.logger(options[:log])
|
15
|
+
WebFetch::Logger.info("Loading WebFetch server: #{options}")
|
16
|
+
|
17
|
+
pid_path = options[:pidfile].nil? ? nil : File.expand_path(options[:pidfile])
|
18
|
+
if pid_path
|
19
|
+
pid = Process.pid
|
20
|
+
File.open(pid_path, 'w+') do |f|
|
21
|
+
f.write(pid)
|
22
|
+
end
|
23
|
+
WebFetch::Logger.info("Pidfile created: #{pid_path} (#{pid})")
|
24
|
+
end
|
25
|
+
|
26
|
+
EM.run do
|
27
|
+
EM.start_server(options[:host],
|
28
|
+
options[:port].to_i,
|
29
|
+
WebFetch::Server)
|
30
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
en:
|
2
|
+
requests_missing: "`requests` parameter missing"
|
3
|
+
requests_not_array: "`requests` parameter must be an array"
|
4
|
+
requests_empty: "`requests` parameter must not be empty"
|
5
|
+
missing_url: "All `requests` parameters must contain a URL"
|
6
|
+
bad_json: "`json` parameter could not be parsed as valid JSON"
|
7
|
+
|
8
|
+
hash_or_uid_but_not_both: "Cannot retrieve by both `uid` and `hash`"
|
9
|
+
missing_hash_and_uid: "Must pass either `uid` or `hash` to retrieve"
|
10
|
+
|
11
|
+
uid_not_found: "Provided `uid` has not yet been requested"
|
12
|
+
hash_not_found: "Provided `hash` has not yet been requested"
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Run me with:
|
4
|
+
# bundle exec ruby doc/client_example.rb
|
5
|
+
require 'web_fetch'
|
6
|
+
begin
|
7
|
+
cli = WebFetch::Client.create('localhost', 8077)
|
8
|
+
results = cli.gather([
|
9
|
+
{ url: 'http://localhost:8077/' },
|
10
|
+
{ url: 'http://yahoo.com/' },
|
11
|
+
{ url: 'http://lycos.com/' },
|
12
|
+
{ url: 'http://google.com/' }
|
13
|
+
])
|
14
|
+
results.each do |res|
|
15
|
+
p cli.retrieve_by_uid(res[:uid])
|
16
|
+
end
|
17
|
+
ensure
|
18
|
+
cli.stop
|
19
|
+
end
|
Binary file
|
@@ -0,0 +1,101 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module WebFetch
|
4
|
+
# Client to be used in application code. Capable of spawning a server and
|
5
|
+
# interacting with it to gather requests and retrieve them
|
6
|
+
class Client
|
7
|
+
def initialize(host, port, options = {})
|
8
|
+
@host = host
|
9
|
+
@port = port
|
10
|
+
@process = options[:process]
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.create(host, port, options = {})
|
14
|
+
# Will block until process is responsive
|
15
|
+
process = spawn(host, port, options)
|
16
|
+
client = new(host, port, process: process)
|
17
|
+
sleep 0.1 until client.alive?
|
18
|
+
client
|
19
|
+
end
|
20
|
+
|
21
|
+
def stop
|
22
|
+
# Will block until process dies
|
23
|
+
return if @process.nil?
|
24
|
+
|
25
|
+
@process.stop
|
26
|
+
@process.wait
|
27
|
+
end
|
28
|
+
|
29
|
+
def alive?
|
30
|
+
begin
|
31
|
+
response = get('')
|
32
|
+
rescue Faraday::ConnectionFailed
|
33
|
+
return false
|
34
|
+
end
|
35
|
+
return false unless response.success?
|
36
|
+
|
37
|
+
JSON.parse(response.body)['application'] == 'WebFetch'
|
38
|
+
end
|
39
|
+
|
40
|
+
def gather(requests)
|
41
|
+
json = JSON.dump(requests: requests)
|
42
|
+
response = post('gather', json)
|
43
|
+
return nil unless response.success?
|
44
|
+
|
45
|
+
JSON.parse(response.body, symbolize_names: true)[:requests]
|
46
|
+
end
|
47
|
+
|
48
|
+
def retrieve_by_uid(uid)
|
49
|
+
response = get('retrieve', uid: uid)
|
50
|
+
return nil unless response.success?
|
51
|
+
|
52
|
+
JSON.parse(response.body, symbolize_names: true)
|
53
|
+
end
|
54
|
+
|
55
|
+
class << self
|
56
|
+
def spawn(host, port, options)
|
57
|
+
process = build_process(host, port, options)
|
58
|
+
process.cwd = File.join(File.dirname(__dir__), '..')
|
59
|
+
process.io.inherit!
|
60
|
+
process.start
|
61
|
+
process
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
|
66
|
+
def build_process(host, port, options)
|
67
|
+
command = options.fetch(:start_command, standard_start_command)
|
68
|
+
args = ['--host', host, '--port', port.to_s]
|
69
|
+
args += ['--log', options[:log]] unless options[:log].nil?
|
70
|
+
args.push('--daemonize') if options[:daemonize]
|
71
|
+
ChildProcess.build(*command, *args)
|
72
|
+
end
|
73
|
+
|
74
|
+
def standard_start_command
|
75
|
+
%w[bundle exec ./bin/web_fetch_control run --]
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
def base_uri
|
82
|
+
"http://#{@host}:#{@port}"
|
83
|
+
end
|
84
|
+
|
85
|
+
def get(endpoint, params = {})
|
86
|
+
conn = Faraday.new(url: base_uri)
|
87
|
+
conn.get do |request|
|
88
|
+
request.url "/#{endpoint}"
|
89
|
+
request.params.merge!(params)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def post(endpoint, body)
|
94
|
+
conn = Faraday.new(url: base_uri)
|
95
|
+
conn.post do |request|
|
96
|
+
request.url "/#{endpoint}"
|
97
|
+
request.body = body
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module WebFetch
|
4
|
+
module HttpHelpers
|
5
|
+
def compress(string)
|
6
|
+
ActiveSupport::Gzip.compress(string)
|
7
|
+
end
|
8
|
+
|
9
|
+
def default_headers(response)
|
10
|
+
response.headers['Content-Type'] = 'application/json; charset=utf-8'
|
11
|
+
response.headers['Cache-Control'] = 'max-age=0, private, must-revalidate'
|
12
|
+
response.headers['Content-Encoding'] = 'gzip'
|
13
|
+
response.headers['Vary'] = 'Accept-Encoding'
|
14
|
+
end
|
15
|
+
|
16
|
+
def request_params
|
17
|
+
{ method: @http_request_method,
|
18
|
+
query_string: @http_query_string,
|
19
|
+
post_data: post_data,
|
20
|
+
server: self }
|
21
|
+
end
|
22
|
+
|
23
|
+
def post_data
|
24
|
+
return nil unless @http_post_content
|
25
|
+
|
26
|
+
JSON.parse(@http_post_content, symbolize_names: true)
|
27
|
+
end
|
28
|
+
|
29
|
+
def succeed(deferred, response)
|
30
|
+
response.status = 200
|
31
|
+
response.content = compress(JSON.dump(success(deferred)))
|
32
|
+
response.send_response
|
33
|
+
end
|
34
|
+
|
35
|
+
def success(deferred)
|
36
|
+
result = deferred[:http]
|
37
|
+
{ response: {
|
38
|
+
success: true,
|
39
|
+
body: result.response,
|
40
|
+
headers: result.headers,
|
41
|
+
status: result.response_header.status
|
42
|
+
},
|
43
|
+
uid: deferred[:uid] }
|
44
|
+
end
|
45
|
+
|
46
|
+
def fail_(deferred, response)
|
47
|
+
response.status = 200
|
48
|
+
response.content = compress(JSON.dump(failure(deferred)))
|
49
|
+
response.send_response
|
50
|
+
end
|
51
|
+
|
52
|
+
def failure(deferred)
|
53
|
+
result = deferred[:http]
|
54
|
+
{ response: {
|
55
|
+
success: false,
|
56
|
+
body: result.response,
|
57
|
+
headers: result.headers,
|
58
|
+
status: result.response_header.status,
|
59
|
+
error: (result.error&.inspect)
|
60
|
+
},
|
61
|
+
uid: deferred[:uid] }
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module WebFetch
|
4
|
+
# Provides boilerplate for a validatable model
|
5
|
+
module Validatable
|
6
|
+
attr_reader :errors
|
7
|
+
|
8
|
+
def valid?
|
9
|
+
@errors = []
|
10
|
+
validate
|
11
|
+
@errors.empty?
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def validate
|
17
|
+
error = <<-MSG.gsub(/\s+/, ' ')
|
18
|
+
Override and call `error(:i18n_key)` as many times as required for each
|
19
|
+
validation failure.
|
20
|
+
Supplementary text can be added to the error by passing as the second
|
21
|
+
parameter to `error`
|
22
|
+
MSG
|
23
|
+
raise NotImplementedError, error
|
24
|
+
end
|
25
|
+
|
26
|
+
def error(name, aux = '')
|
27
|
+
aux = ' ' + aux unless aux.empty?
|
28
|
+
@errors.push(I18n.t(name) + aux)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|