nokogumbo 1.0 → 1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/gumbo-parser/src/tokenizer.c +1 -1
- data/lib/nokogumbo.rb +27 -5
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
MDM0N2MwNTE2MzI0MmFjNTZmYzQ5YTk2MDMwNWQ1YzVlNTk5NTMzYQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
OTNlOWZkMDc0NTc3N2U5MjY1ODhlNTEwMmMxMmMxOTBiYTA0NTViZA==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
YjI2NzhjYzE0ZjQ1OTA3MDRiNWJjOGZkNzRlZDNhOTQ2N2ViODZjNDFlOWE3
|
10
|
+
Zjk1MTY0MmQ2ZDgwMTQxYzczNDQxYTFmYjdiNGY3ZjAwZDYxNzVlNjE2MWJh
|
11
|
+
NDQyYzk5MGU5ZTZlNDdkZWUxMjE3ZjhmNWIxZWIzYzFkZDRkYTA=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
YjE4NDhmMTIxNTI5MThlM2Q1MmRkNmVkNWRmYmFjNjllYzQ3YTFkMWJmNWI1
|
14
|
+
MWRkNDI2NzY0MWRhNjVlMjNjNmQ2NWZjOTQ3YzUwNzc3ZmNiMDBjNTBlZTBk
|
15
|
+
YTZkYzU2NTQ3NDNmMzIxNzA4NzQ3ZDI2NmFjY2Y3YWU3MmFmNTY=
|
@@ -442,7 +442,7 @@ static void finish_token(GumboParser* parser, GumboToken* token) {
|
|
442
442
|
reset_token_start_point(tokenizer);
|
443
443
|
token->original_text.length =
|
444
444
|
tokenizer->_token_start - token->original_text.data;
|
445
|
-
if (token->original_text.data[token->original_text.length - 1] == '\r') {
|
445
|
+
if (token->original_text.length > 0 && token->original_text.data[token->original_text.length - 1] == '\r') {
|
446
446
|
// The UTF8 iterator will ignore carriage returns in the input stream, which
|
447
447
|
// means that the next token may start one past a \r character. The pointer
|
448
448
|
// arithmetic above results in that \r being appended to the original text
|
data/lib/nokogumbo.rb
CHANGED
@@ -26,26 +26,48 @@ module Nokogiri
|
|
26
26
|
|
27
27
|
# Fetch and parse a HTML document from the web, following redirects,
|
28
28
|
# handling https, and determining the character encoding using HTML5
|
29
|
-
# rules. +uri+ may be a +String+ or a +URI+. +
|
30
|
-
#
|
31
|
-
|
29
|
+
# rules. +uri+ may be a +String+ or a +URI+. +options+ contains
|
30
|
+
# http headers and special options. Everything which is not a
|
31
|
+
# special option is considered a header. Special options include:
|
32
|
+
# * :follow_limit => number of redirects which are followed
|
33
|
+
# * :basic_auth => [username, password]
|
34
|
+
def self.get(uri, options={})
|
35
|
+
headers = options.clone
|
36
|
+
headers = {:follow_limit => headers} if Numeric === headers # deprecated
|
37
|
+
limit=headers[:follow_limit] ? headers.delete(:follow_limit).to_i : 10
|
38
|
+
|
32
39
|
require 'net/http'
|
33
40
|
uri = URI(uri) unless URI === uri
|
34
41
|
|
35
42
|
http = Net::HTTP.new(uri.host, uri.port)
|
43
|
+
|
44
|
+
# TLS / SSL support
|
36
45
|
if uri.scheme == 'https'
|
37
46
|
http.use_ssl = true
|
38
47
|
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
39
48
|
end
|
49
|
+
|
40
50
|
request = Net::HTTP::Get.new(uri.request_uri)
|
51
|
+
|
52
|
+
# basic authentication
|
53
|
+
auth = headers.delete(:basic_auth)
|
54
|
+
auth ||= [uri.user, uri.password] if uri.user and uri.password
|
55
|
+
request.basic_auth auth.first, auth.last if auth
|
56
|
+
|
57
|
+
# remaining options are treated as headers
|
58
|
+
headers.each {|key, value| request[key.to_s] = value.to_s}
|
59
|
+
|
41
60
|
response = http.request(request)
|
42
61
|
|
43
62
|
case response
|
44
63
|
when Net::HTTPSuccess
|
45
|
-
parse(reencode(response.body, response['content-type']))
|
64
|
+
doc = parse(reencode(response.body, response['content-type']))
|
65
|
+
doc.instance_variable_set('@response', response)
|
66
|
+
doc.class.send(:attr_reader, :response)
|
67
|
+
doc
|
46
68
|
when Net::HTTPRedirection
|
47
69
|
response.value if limit <= 1
|
48
|
-
get(response['location'], limit-1)
|
70
|
+
get(response['location'], options.merge(:follow_limit => limit-1))
|
49
71
|
else
|
50
72
|
response.value
|
51
73
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokogumbo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '1.
|
4
|
+
version: '1.1'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sam Ruby
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-09-
|
11
|
+
date: 2013-09-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|