async-http 0.9.0 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +1 -3
- data/async-http.gemspec +1 -1
- data/lib/async/http/client.rb +8 -3
- data/lib/async/http/headers.rb +85 -0
- data/lib/async/http/pool.rb +4 -2
- data/lib/async/http/protocol/http10.rb +2 -2
- data/lib/async/http/protocol/http11.rb +16 -12
- data/lib/async/http/protocol/http2.rb +32 -20
- data/lib/async/http/protocol/request.rb +1 -1
- data/lib/async/http/server.rb +1 -1
- data/lib/async/http/url_endpoint.rb +7 -4
- data/lib/async/http/version.rb +1 -1
- metadata +5 -5
- data/examples/spider.rb +0 -129
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c74bbff1b6216d57e8d60e5a54700f9461cef5aff2436db4abbf07c300a1af09
|
4
|
+
data.tar.gz: 1354a61e69f69706688f112fef4c958af285bec43172f00d7993bff7fcb9dc7b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d62959c37bc323c82d57ca4e745ef4d2a715f3ec0e580e0d4a1e5cbdb65766f6b4c671efab4f8e85fb0fabe6a5cac99be1de1aa2cc92c30bc39e5e51f9f40d51
|
7
|
+
data.tar.gz: 75b83edd5bd767e638ebeacbd3446e317b24b49eba17628af95e32e78690cc0b26c8c998ed1aaed7248c8e796c7e77fd43a8c80f391fcc5876a8cd84533bdd1d
|
data/Gemfile
CHANGED
data/async-http.gemspec
CHANGED
@@ -17,7 +17,7 @@ Gem::Specification.new do |spec|
|
|
17
17
|
spec.require_paths = ["lib"]
|
18
18
|
|
19
19
|
spec.add_dependency("async", "~> 1.4")
|
20
|
-
spec.add_dependency("async-io", "~> 1.
|
20
|
+
spec.add_dependency("async-io", "~> 1.6")
|
21
21
|
|
22
22
|
spec.add_dependency("http-2", "~> 0.8")
|
23
23
|
# spec.add_dependency("openssl")
|
data/lib/async/http/client.rb
CHANGED
@@ -25,14 +25,19 @@ require_relative 'protocol'
|
|
25
25
|
module Async
|
26
26
|
module HTTP
|
27
27
|
class Client
|
28
|
-
def initialize(endpoint, protocol =
|
28
|
+
def initialize(endpoint, protocol = nil, authority = nil, **options)
|
29
29
|
@endpoint = endpoint
|
30
30
|
|
31
|
-
@protocol = protocol
|
31
|
+
@protocol = protocol || endpoint.protocol
|
32
|
+
@authority = authority || endpoint.hostname
|
32
33
|
|
33
34
|
@connections = connect(**options)
|
34
35
|
end
|
35
36
|
|
37
|
+
attr :endpoint
|
38
|
+
attr :protocol
|
39
|
+
attr :authority
|
40
|
+
|
36
41
|
def self.open(*args, &block)
|
37
42
|
client = self.new(*args)
|
38
43
|
|
@@ -59,7 +64,7 @@ module Async
|
|
59
64
|
|
60
65
|
def request(*args)
|
61
66
|
@connections.acquire do |connection|
|
62
|
-
connection.send_request(*args)
|
67
|
+
connection.send_request(@authority, *args)
|
63
68
|
end
|
64
69
|
end
|
65
70
|
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# Copyright, 2017, by Samuel G. D. Williams. <http://www.codeotaku.com>
|
2
|
+
#
|
3
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
# of this software and associated documentation files (the "Software"), to deal
|
5
|
+
# in the Software without restriction, including without limitation the rights
|
6
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
# copies of the Software, and to permit persons to whom the Software is
|
8
|
+
# furnished to do so, subject to the following conditions:
|
9
|
+
#
|
10
|
+
# The above copyright notice and this permission notice shall be included in
|
11
|
+
# all copies or substantial portions of the Software.
|
12
|
+
#
|
13
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19
|
+
# THE SOFTWARE.
|
20
|
+
|
21
|
+
module Async
|
22
|
+
module HTTP
|
23
|
+
class Headers
|
24
|
+
def initialize
|
25
|
+
@hash = {}
|
26
|
+
end
|
27
|
+
|
28
|
+
def freeze
|
29
|
+
return unless frozen?
|
30
|
+
|
31
|
+
@hash.freeze
|
32
|
+
|
33
|
+
super
|
34
|
+
end
|
35
|
+
|
36
|
+
def inspect
|
37
|
+
@hash.inspect
|
38
|
+
end
|
39
|
+
|
40
|
+
def []= key, value
|
41
|
+
@hash[symbolize(key)] = value
|
42
|
+
end
|
43
|
+
|
44
|
+
def [] key
|
45
|
+
@hash[key]
|
46
|
+
end
|
47
|
+
|
48
|
+
def == other
|
49
|
+
@hash == other.to_hash
|
50
|
+
end
|
51
|
+
|
52
|
+
def delete(key)
|
53
|
+
@hash.delete(key)
|
54
|
+
end
|
55
|
+
|
56
|
+
def each
|
57
|
+
return to_enum unless block_given?
|
58
|
+
|
59
|
+
@hash.each do |key, value|
|
60
|
+
yield stringify(key), value
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def symbolize(value)
|
65
|
+
Headers[value]
|
66
|
+
end
|
67
|
+
|
68
|
+
def stringify(key)
|
69
|
+
key.to_s.tr('_', '-')
|
70
|
+
end
|
71
|
+
|
72
|
+
def to_hash
|
73
|
+
@hash
|
74
|
+
end
|
75
|
+
|
76
|
+
def to_http_hash
|
77
|
+
Hash[@hash.map{|key, value| ["HTTP_#{key.to_s.upcase}", value]}]
|
78
|
+
end
|
79
|
+
|
80
|
+
def self.[] value
|
81
|
+
value.downcase.tr('-', '_').to_sym
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
data/lib/async/http/pool.rb
CHANGED
@@ -112,8 +112,10 @@ module Async
|
|
112
112
|
end
|
113
113
|
end
|
114
114
|
|
115
|
-
|
116
|
-
|
115
|
+
if !@limit or @available.count < @limit
|
116
|
+
Async.logger.debug(self) {"No available resources, allocating new one..."}
|
117
|
+
return create_resource
|
118
|
+
end
|
117
119
|
end
|
118
120
|
end
|
119
121
|
end
|
@@ -35,7 +35,7 @@ module Async
|
|
35
35
|
end
|
36
36
|
|
37
37
|
def keep_alive?(headers)
|
38
|
-
headers[
|
38
|
+
headers[:connection] == KEEP_ALIVE
|
39
39
|
end
|
40
40
|
|
41
41
|
# Server loop.
|
@@ -64,7 +64,7 @@ module Async
|
|
64
64
|
end
|
65
65
|
|
66
66
|
def read_body(headers)
|
67
|
-
if content_length = headers[
|
67
|
+
if content_length = headers[:content_length]
|
68
68
|
return @stream.read(Integer(content_length))
|
69
69
|
# elsif !keep_alive?(headers)
|
70
70
|
# return @stream.read
|
@@ -22,14 +22,15 @@ require 'async/io/protocol/line'
|
|
22
22
|
|
23
23
|
require_relative 'request'
|
24
24
|
require_relative 'response'
|
25
|
+
require_relative '../headers'
|
25
26
|
|
26
27
|
module Async
|
27
28
|
module HTTP
|
28
29
|
module Protocol
|
29
30
|
# Implements basic HTTP/1.1 request/response.
|
30
31
|
class HTTP11 < Async::IO::Protocol::Line
|
31
|
-
|
32
|
-
|
32
|
+
CONTENT_LENGTH = Headers['Content-Length']
|
33
|
+
TRANSFER_ENCODING = Headers['Transfer-Encoding']
|
33
34
|
|
34
35
|
CRLF = "\r\n".freeze
|
35
36
|
|
@@ -53,7 +54,6 @@ module Async
|
|
53
54
|
alias client new
|
54
55
|
end
|
55
56
|
|
56
|
-
HTTP_CONNECTION = 'HTTP_CONNECTION'.freeze
|
57
57
|
KEEP_ALIVE = 'keep-alive'.freeze
|
58
58
|
CLOSE = 'close'.freeze
|
59
59
|
|
@@ -64,7 +64,7 @@ module Async
|
|
64
64
|
end
|
65
65
|
|
66
66
|
def keep_alive?(headers)
|
67
|
-
headers[
|
67
|
+
headers[:connection] != CLOSE
|
68
68
|
end
|
69
69
|
|
70
70
|
# Server loop.
|
@@ -88,16 +88,20 @@ module Async
|
|
88
88
|
end
|
89
89
|
|
90
90
|
# Client request.
|
91
|
-
def send_request(method, path, headers = {}, body = [])
|
92
|
-
|
91
|
+
def send_request(authority, method, path, headers = {}, body = [])
|
92
|
+
Async.logger.debug(self) {"#{method} #{path} #{headers.inspect}"}
|
93
|
+
|
94
|
+
write_request(authority, method, path, version, headers, body)
|
93
95
|
|
94
96
|
return Response.new(*read_response)
|
95
97
|
rescue EOFError
|
96
98
|
return nil
|
97
99
|
end
|
98
100
|
|
99
|
-
def write_request(method, path, version, headers, body)
|
101
|
+
def write_request(authority, method, path, version, headers, body)
|
100
102
|
@stream.write("#{method} #{path} #{version}\r\n")
|
103
|
+
@stream.write("Host: #{authority}\r\n")
|
104
|
+
|
101
105
|
write_headers(headers)
|
102
106
|
write_body(body)
|
103
107
|
|
@@ -121,7 +125,7 @@ module Async
|
|
121
125
|
headers = read_headers
|
122
126
|
body = read_body(headers)
|
123
127
|
|
124
|
-
return method, path, version, headers, body
|
128
|
+
return headers.delete(:host), method, path, version, headers, body
|
125
129
|
end
|
126
130
|
|
127
131
|
def write_response(version, status, headers, body)
|
@@ -142,11 +146,11 @@ module Async
|
|
142
146
|
end
|
143
147
|
end
|
144
148
|
|
145
|
-
def read_headers(headers =
|
149
|
+
def read_headers(headers = Headers.new)
|
146
150
|
# Parsing headers:
|
147
151
|
each_line do |line|
|
148
152
|
if line =~ /^([a-zA-Z\-]+):\s*(.+?)\s*$/
|
149
|
-
headers[
|
153
|
+
headers[$1] = $2
|
150
154
|
else
|
151
155
|
break
|
152
156
|
end
|
@@ -178,7 +182,7 @@ module Async
|
|
178
182
|
end
|
179
183
|
|
180
184
|
def read_body(headers)
|
181
|
-
if headers[
|
185
|
+
if headers[:transfer_encoding] == 'chunked'
|
182
186
|
buffer = Async::IO::BinaryString.new
|
183
187
|
|
184
188
|
while true
|
@@ -195,7 +199,7 @@ module Async
|
|
195
199
|
end
|
196
200
|
|
197
201
|
return buffer
|
198
|
-
elsif content_length = headers[
|
202
|
+
elsif content_length = headers[:content_length]
|
199
203
|
return @stream.read(Integer(content_length))
|
200
204
|
end
|
201
205
|
end
|
@@ -20,6 +20,7 @@
|
|
20
20
|
|
21
21
|
require_relative 'request'
|
22
22
|
require_relative 'response'
|
23
|
+
require_relative '../headers'
|
23
24
|
|
24
25
|
require 'async/notification'
|
25
26
|
|
@@ -38,6 +39,14 @@ module Async
|
|
38
39
|
self.new(::HTTP2::Server.new, stream)
|
39
40
|
end
|
40
41
|
|
42
|
+
HTTPS = 'https'.freeze
|
43
|
+
SCHEME = ':scheme'.freeze
|
44
|
+
METHOD = ':method'.freeze
|
45
|
+
PATH = ':path'.freeze
|
46
|
+
AUTHORITY = ':authority'.freeze
|
47
|
+
REASON = ':reason'.freeze
|
48
|
+
STATUS = ':status'.freeze
|
49
|
+
|
41
50
|
def initialize(controller, stream)
|
42
51
|
@controller = controller
|
43
52
|
@stream = stream
|
@@ -72,15 +81,13 @@ module Async
|
|
72
81
|
|
73
82
|
def read_in_background(task: Task.current)
|
74
83
|
task.async do |nested_task|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
else
|
80
|
-
Async.logger.debug(self) {"Connection reset by peer!"}
|
81
|
-
break
|
82
|
-
end
|
84
|
+
buffer = Async::IO::BinaryString.new
|
85
|
+
|
86
|
+
while data = @stream.io.read(1024*8, buffer)
|
87
|
+
@controller << data
|
83
88
|
end
|
89
|
+
|
90
|
+
Async.logger.debug(self) {"Connection reset by peer!"}
|
84
91
|
end
|
85
92
|
end
|
86
93
|
|
@@ -95,17 +102,19 @@ module Async
|
|
95
102
|
@controller.on(:stream) do |stream|
|
96
103
|
request = Request.new
|
97
104
|
request.version = "HTTP/2.0"
|
98
|
-
request.headers =
|
105
|
+
request.headers = Headers.new
|
99
106
|
|
100
107
|
# stream.on(:active) { } # fires when stream transitions to open state
|
101
108
|
# stream.on(:close) { } # stream is closed by client and server
|
102
109
|
|
103
110
|
stream.on(:headers) do |headers|
|
104
111
|
headers.each do |key, value|
|
105
|
-
if key ==
|
112
|
+
if key == METHOD
|
106
113
|
request.method = value
|
107
|
-
elsif key ==
|
114
|
+
elsif key == PATH
|
108
115
|
request.path = value
|
116
|
+
elsif key == AUTHORITY
|
117
|
+
request.authority = value
|
109
118
|
else
|
110
119
|
request.headers[key] = value
|
111
120
|
end
|
@@ -120,7 +129,7 @@ module Async
|
|
120
129
|
response = yield request
|
121
130
|
|
122
131
|
# send response
|
123
|
-
stream.headers(
|
132
|
+
stream.headers(STATUS => response[0].to_s)
|
124
133
|
|
125
134
|
stream.headers(response[1]) unless response[1].empty?
|
126
135
|
|
@@ -137,13 +146,16 @@ module Async
|
|
137
146
|
end
|
138
147
|
end
|
139
148
|
|
140
|
-
|
149
|
+
RESPONSE_VERSION = 'HTTP/2'.freeze
|
150
|
+
|
151
|
+
def send_request(authority, method, path, headers = {}, body = nil)
|
141
152
|
stream = @controller.new_stream
|
142
153
|
|
143
154
|
internal_headers = {
|
144
|
-
|
145
|
-
|
146
|
-
|
155
|
+
SCHEME => HTTPS,
|
156
|
+
METHOD => method,
|
157
|
+
PATH => path,
|
158
|
+
AUTHORITY => authority,
|
147
159
|
}.merge(headers)
|
148
160
|
|
149
161
|
stream.headers(internal_headers, end_stream: true)
|
@@ -157,17 +169,17 @@ module Async
|
|
157
169
|
# end
|
158
170
|
|
159
171
|
response = Response.new
|
160
|
-
response.version =
|
161
|
-
response.headers =
|
172
|
+
response.version = RESPONSE_VERSION
|
173
|
+
response.headers = Headers.new
|
162
174
|
response.body = Async::IO::BinaryString.new
|
163
175
|
|
164
176
|
stream.on(:headers) do |headers|
|
165
177
|
# Async.logger.debug(self) {"Stream headers: #{headers.inspect}"}
|
166
178
|
|
167
179
|
headers.each do |key, value|
|
168
|
-
if key ==
|
180
|
+
if key == STATUS
|
169
181
|
response.status = value.to_i
|
170
|
-
elsif key ==
|
182
|
+
elsif key == REASON
|
171
183
|
response.reason = value
|
172
184
|
else
|
173
185
|
response.headers[key] = value
|
data/lib/async/http/server.rb
CHANGED
@@ -31,9 +31,14 @@ module Async
|
|
31
31
|
end
|
32
32
|
|
33
33
|
def initialize(url, endpoint = nil, **options)
|
34
|
+
super(**options)
|
35
|
+
|
34
36
|
@url = url
|
35
37
|
@endpoint = endpoint
|
36
|
-
|
38
|
+
end
|
39
|
+
|
40
|
+
def to_s
|
41
|
+
"\#<#{self.class} #{@url} #{@options.inspect}>"
|
37
42
|
end
|
38
43
|
|
39
44
|
attr :url
|
@@ -64,7 +69,7 @@ module Async
|
|
64
69
|
end
|
65
70
|
|
66
71
|
def hostname
|
67
|
-
@url.hostname
|
72
|
+
@options.fetch(:hostname, @url.hostname)
|
68
73
|
end
|
69
74
|
|
70
75
|
def ssl_context
|
@@ -79,8 +84,6 @@ module Async
|
|
79
84
|
@endpoint = Async::IO::Endpoint.tcp(hostname, port)
|
80
85
|
|
81
86
|
if secure?
|
82
|
-
Async.logger.debug(self) {"Setting hostname: #{self.hostname}"}
|
83
|
-
|
84
87
|
# Wrap it in SSL:
|
85
88
|
@endpoint = Async::IO::SecureEndpoint.new(@endpoint,
|
86
89
|
ssl_context: ssl_context,
|
data/lib/async/http/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: async-http
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.10.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Samuel Williams
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-03-
|
11
|
+
date: 2018-03-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: async
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '1.
|
33
|
+
version: '1.6'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '1.
|
40
|
+
version: '1.6'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: http-2
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -123,9 +123,9 @@ files:
|
|
123
123
|
- README.md
|
124
124
|
- Rakefile
|
125
125
|
- async-http.gemspec
|
126
|
-
- examples/spider.rb
|
127
126
|
- lib/async/http.rb
|
128
127
|
- lib/async/http/client.rb
|
128
|
+
- lib/async/http/headers.rb
|
129
129
|
- lib/async/http/pool.rb
|
130
130
|
- lib/async/http/protocol.rb
|
131
131
|
- lib/async/http/protocol/http1.rb
|
data/examples/spider.rb
DELETED
@@ -1,129 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'async/await'
|
4
|
-
|
5
|
-
require 'pry'
|
6
|
-
|
7
|
-
require_relative '../lib/async/http/client'
|
8
|
-
require '../lib/async/http/url_endpoint'
|
9
|
-
require '../lib/async/http/protocol/https'
|
10
|
-
|
11
|
-
require 'trenni/sanitize'
|
12
|
-
require 'set'
|
13
|
-
|
14
|
-
Async.logger.level = Logger::DEBUG
|
15
|
-
|
16
|
-
class HTML < Trenni::Sanitize::Filter
|
17
|
-
def initialize(*)
|
18
|
-
super
|
19
|
-
|
20
|
-
@base = nil
|
21
|
-
@links = []
|
22
|
-
end
|
23
|
-
|
24
|
-
attr :base
|
25
|
-
attr :links
|
26
|
-
|
27
|
-
def filter(node)
|
28
|
-
if node.name == 'base'
|
29
|
-
@base = node['href']
|
30
|
-
elsif node.name == 'a'
|
31
|
-
@links << node['href']
|
32
|
-
end
|
33
|
-
|
34
|
-
node.skip!(TAG)
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
class Cache
|
39
|
-
def initialize
|
40
|
-
@clients = {}
|
41
|
-
end
|
42
|
-
|
43
|
-
def close
|
44
|
-
@clients.each(&:close)
|
45
|
-
@clients.clear
|
46
|
-
end
|
47
|
-
|
48
|
-
def [] endpoint
|
49
|
-
url = endpoint.specification
|
50
|
-
key = "#{url.scheme}://#{url.userinfo}@#{url.hostname}"
|
51
|
-
|
52
|
-
@clients[key] ||= Async::HTTP::Client.new(endpoint, endpoint.secure? ? Async::HTTP::Protocol::HTTPS : Async::HTTP::Protocol::HTTP1)
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
class << self
|
57
|
-
include Async::Await
|
58
|
-
|
59
|
-
async def fetch(url, depth = 4, fetched = Set.new, clients = Cache.new)
|
60
|
-
return if fetched.include?(url) or depth == 0 or url.host != "www.codeotaku.com"
|
61
|
-
fetched << url
|
62
|
-
|
63
|
-
endpoint = Async::HTTP::URLEndpoint.new(url)
|
64
|
-
client = clients[endpoint]
|
65
|
-
|
66
|
-
request_uri = endpoint.specification.request_uri
|
67
|
-
puts "GET #{url} (depth = #{depth})"
|
68
|
-
|
69
|
-
response = timeout(10) do
|
70
|
-
client.get(request_uri, {
|
71
|
-
':authority' => endpoint.specification.hostname,
|
72
|
-
'accept' => '*/*',
|
73
|
-
'user-agent' => 'spider',
|
74
|
-
})
|
75
|
-
end
|
76
|
-
|
77
|
-
if response.status >= 300 && response.status < 400
|
78
|
-
location = url + response.headers['location']
|
79
|
-
# puts "Following redirect to #{location}"
|
80
|
-
return fetch(location, depth-1, fetched)
|
81
|
-
end
|
82
|
-
|
83
|
-
content_type = response.headers['content-type']
|
84
|
-
unless content_type&.start_with? 'text/html'
|
85
|
-
# puts "Unsupported content type: #{response.headers['content-type']}"
|
86
|
-
return
|
87
|
-
end
|
88
|
-
|
89
|
-
base = endpoint.specification
|
90
|
-
|
91
|
-
begin
|
92
|
-
html = HTML.parse(response.body)
|
93
|
-
rescue
|
94
|
-
# Async.logger.error($!)
|
95
|
-
return
|
96
|
-
end
|
97
|
-
|
98
|
-
if html.base
|
99
|
-
base = base + html.base
|
100
|
-
end
|
101
|
-
|
102
|
-
html.links.each do |href|
|
103
|
-
begin
|
104
|
-
full_url = base + href
|
105
|
-
|
106
|
-
fetch(full_url, depth - 1, fetched) if full_url.kind_of? URI::HTTP
|
107
|
-
rescue ArgumentError, URI::InvalidURIError
|
108
|
-
# puts "Could not fetch #{href}, relative to #{base}."
|
109
|
-
end
|
110
|
-
end
|
111
|
-
rescue Async::TimeoutError
|
112
|
-
Async.logger.error("Timeout while fetching #{url}")
|
113
|
-
rescue StandardError
|
114
|
-
Async.logger.error($!)
|
115
|
-
ensure
|
116
|
-
puts "Closing client from spider..."
|
117
|
-
client.close if client
|
118
|
-
end
|
119
|
-
|
120
|
-
async def fetch_one(url)
|
121
|
-
endpoint = Async::HTTP::URLEndpoint.new(url)
|
122
|
-
client = Async::HTTP::Client.new(endpoint, endpoint.secure? ? Async::HTTP::Protocol::HTTPS : Async::HTTP::Protocol::HTTP1)
|
123
|
-
|
124
|
-
binding.pry
|
125
|
-
end
|
126
|
-
end
|
127
|
-
|
128
|
-
fetch_one(URI.parse("https://www.codeotaku.com"))
|
129
|
-
#puts "Finished."
|