nokogumbo 1.0 → 1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/gumbo-parser/src/tokenizer.c +1 -1
- data/lib/nokogumbo.rb +27 -5
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
MDM0N2MwNTE2MzI0MmFjNTZmYzQ5YTk2MDMwNWQ1YzVlNTk5NTMzYQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
OTNlOWZkMDc0NTc3N2U5MjY1ODhlNTEwMmMxMmMxOTBiYTA0NTViZA==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
YjI2NzhjYzE0ZjQ1OTA3MDRiNWJjOGZkNzRlZDNhOTQ2N2ViODZjNDFlOWE3
|
10
|
+
Zjk1MTY0MmQ2ZDgwMTQxYzczNDQxYTFmYjdiNGY3ZjAwZDYxNzVlNjE2MWJh
|
11
|
+
NDQyYzk5MGU5ZTZlNDdkZWUxMjE3ZjhmNWIxZWIzYzFkZDRkYTA=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
YjE4NDhmMTIxNTI5MThlM2Q1MmRkNmVkNWRmYmFjNjllYzQ3YTFkMWJmNWI1
|
14
|
+
MWRkNDI2NzY0MWRhNjVlMjNjNmQ2NWZjOTQ3YzUwNzc3ZmNiMDBjNTBlZTBk
|
15
|
+
YTZkYzU2NTQ3NDNmMzIxNzA4NzQ3ZDI2NmFjY2Y3YWU3MmFmNTY=
|
@@ -442,7 +442,7 @@ static void finish_token(GumboParser* parser, GumboToken* token) {
|
|
442
442
|
reset_token_start_point(tokenizer);
|
443
443
|
token->original_text.length =
|
444
444
|
tokenizer->_token_start - token->original_text.data;
|
445
|
-
if (token->original_text.data[token->original_text.length - 1] == '\r') {
|
445
|
+
if (token->original_text.length > 0 && token->original_text.data[token->original_text.length - 1] == '\r') {
|
446
446
|
// The UTF8 iterator will ignore carriage returns in the input stream, which
|
447
447
|
// means that the next token may start one past a \r character. The pointer
|
448
448
|
// arithmetic above results in that \r being appended to the original text
|
data/lib/nokogumbo.rb
CHANGED
@@ -26,26 +26,48 @@ module Nokogiri
|
|
26
26
|
|
27
27
|
# Fetch and parse a HTML document from the web, following redirects,
|
28
28
|
# handling https, and determining the character encoding using HTML5
|
29
|
-
# rules. +uri+ may be a +String+ or a +URI+. +
|
30
|
-
#
|
31
|
-
|
29
|
+
# rules. +uri+ may be a +String+ or a +URI+. +options+ contains
|
30
|
+
# http headers and special options. Everything which is not a
|
31
|
+
# special option is considered a header. Special options include:
|
32
|
+
# * :follow_limit => number of redirects which are followed
|
33
|
+
# * :basic_auth => [username, password]
|
34
|
+
def self.get(uri, options={})
|
35
|
+
headers = options.clone
|
36
|
+
headers = {:follow_limit => headers} if Numeric === headers # deprecated
|
37
|
+
limit=headers[:follow_limit] ? headers.delete(:follow_limit).to_i : 10
|
38
|
+
|
32
39
|
require 'net/http'
|
33
40
|
uri = URI(uri) unless URI === uri
|
34
41
|
|
35
42
|
http = Net::HTTP.new(uri.host, uri.port)
|
43
|
+
|
44
|
+
# TLS / SSL support
|
36
45
|
if uri.scheme == 'https'
|
37
46
|
http.use_ssl = true
|
38
47
|
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
39
48
|
end
|
49
|
+
|
40
50
|
request = Net::HTTP::Get.new(uri.request_uri)
|
51
|
+
|
52
|
+
# basic authentication
|
53
|
+
auth = headers.delete(:basic_auth)
|
54
|
+
auth ||= [uri.user, uri.password] if uri.user and uri.password
|
55
|
+
request.basic_auth auth.first, auth.last if auth
|
56
|
+
|
57
|
+
# remaining options are treated as headers
|
58
|
+
headers.each {|key, value| request[key.to_s] = value.to_s}
|
59
|
+
|
41
60
|
response = http.request(request)
|
42
61
|
|
43
62
|
case response
|
44
63
|
when Net::HTTPSuccess
|
45
|
-
parse(reencode(response.body, response['content-type']))
|
64
|
+
doc = parse(reencode(response.body, response['content-type']))
|
65
|
+
doc.instance_variable_set('@response', response)
|
66
|
+
doc.class.send(:attr_reader, :response)
|
67
|
+
doc
|
46
68
|
when Net::HTTPRedirection
|
47
69
|
response.value if limit <= 1
|
48
|
-
get(response['location'], limit-1)
|
70
|
+
get(response['location'], options.merge(:follow_limit => limit-1))
|
49
71
|
else
|
50
72
|
response.value
|
51
73
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokogumbo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '1.
|
4
|
+
version: '1.1'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sam Ruby
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-09-
|
11
|
+
date: 2013-09-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|