nokogumbo 1.0 → 1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- YzMwY2Q3N2Y1YWRmNDhiYzYzNzdmMDZmOGNkYzU3YmE4YThmMGMwMg==
4
+ MDM0N2MwNTE2MzI0MmFjNTZmYzQ5YTk2MDMwNWQ1YzVlNTk5NTMzYQ==
5
5
  data.tar.gz: !binary |-
6
- YzZlNDYxNzNhNzA3NzQ0OGE4ZTAyZTYxYWM2MWVkNGU0MzZjMjU1Yw==
6
+ OTNlOWZkMDc0NTc3N2U5MjY1ODhlNTEwMmMxMmMxOTBiYTA0NTViZA==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- NDdmOGQ5Nzc0M2FkYTM4NzMwYmNkMjk0ZTMyMzYzYTYzMDBmNzMxNTczNTFl
10
- ZjIwMjAwNmRmM2EyZGRkNmU1NDM5Y2M0MjNkM2ZiZjk5YzgwNDc1YWZiNTQ2
11
- MDk2MTBlN2QzZGNjNjAyZWExODZlNGY1YTE3Y2JjYjY1NTQwNDQ=
9
+ YjI2NzhjYzE0ZjQ1OTA3MDRiNWJjOGZkNzRlZDNhOTQ2N2ViODZjNDFlOWE3
10
+ Zjk1MTY0MmQ2ZDgwMTQxYzczNDQxYTFmYjdiNGY3ZjAwZDYxNzVlNjE2MWJh
11
+ NDQyYzk5MGU5ZTZlNDdkZWUxMjE3ZjhmNWIxZWIzYzFkZDRkYTA=
12
12
  data.tar.gz: !binary |-
13
- NTgwODgxYWU4NzMyMDk1M2VkZjEyZTAzMTBiODQ3N2I1OTUxYzBkNTQyMGQx
14
- MWUwYTI3OGZhMmNhOTU0MTBiMmZlMDBlZjg4NDgyODc2ODJkMzkyNDJhZmMx
15
- ZmYzODA0YTI2Njk4MjQxNGI1NDAyMzdiN2UyZWEzMzBkMWJkZTg=
13
+ YjE4NDhmMTIxNTI5MThlM2Q1MmRkNmVkNWRmYmFjNjllYzQ3YTFkMWJmNWI1
14
+ MWRkNDI2NzY0MWRhNjVlMjNjNmQ2NWZjOTQ3YzUwNzc3ZmNiMDBjNTBlZTBk
15
+ YTZkYzU2NTQ3NDNmMzIxNzA4NzQ3ZDI2NmFjY2Y3YWU3MmFmNTY=
@@ -442,7 +442,7 @@ static void finish_token(GumboParser* parser, GumboToken* token) {
442
442
  reset_token_start_point(tokenizer);
443
443
  token->original_text.length =
444
444
  tokenizer->_token_start - token->original_text.data;
445
- if (token->original_text.data[token->original_text.length - 1] == '\r') {
445
+ if (token->original_text.length > 0 && token->original_text.data[token->original_text.length - 1] == '\r') {
446
446
  // The UTF8 iterator will ignore carriage returns in the input stream, which
447
447
  // means that the next token may start one past a \r character. The pointer
448
448
  // arithmetic above results in that \r being appended to the original text
data/lib/nokogumbo.rb CHANGED
@@ -26,26 +26,48 @@ module Nokogiri
26
26
 
27
27
  # Fetch and parse a HTML document from the web, following redirects,
28
28
  # handling https, and determining the character encoding using HTML5
29
- # rules. +uri+ may be a +String+ or a +URI+. +limit+ controls the
30
- # number of redirects that will be followed.
31
- def self.get(uri, limit=10)
29
+ # rules. +uri+ may be a +String+ or a +URI+. +options+ contains
30
+ # http headers and special options. Everything which is not a
31
+ # special option is considered a header. Special options include:
32
+ # * :follow_limit => number of redirects which are followed
33
+ # * :basic_auth => [username, password]
34
+ def self.get(uri, options={})
35
+ headers = options.clone
36
+ headers = {:follow_limit => headers} if Numeric === headers # deprecated
37
+ limit=headers[:follow_limit] ? headers.delete(:follow_limit).to_i : 10
38
+
32
39
  require 'net/http'
33
40
  uri = URI(uri) unless URI === uri
34
41
 
35
42
  http = Net::HTTP.new(uri.host, uri.port)
43
+
44
+ # TLS / SSL support
36
45
  if uri.scheme == 'https'
37
46
  http.use_ssl = true
38
47
  http.verify_mode = OpenSSL::SSL::VERIFY_NONE
39
48
  end
49
+
40
50
  request = Net::HTTP::Get.new(uri.request_uri)
51
+
52
+ # basic authentication
53
+ auth = headers.delete(:basic_auth)
54
+ auth ||= [uri.user, uri.password] if uri.user and uri.password
55
+ request.basic_auth auth.first, auth.last if auth
56
+
57
+ # remaining options are treated as headers
58
+ headers.each {|key, value| request[key.to_s] = value.to_s}
59
+
41
60
  response = http.request(request)
42
61
 
43
62
  case response
44
63
  when Net::HTTPSuccess
45
- parse(reencode(response.body, response['content-type']))
64
+ doc = parse(reencode(response.body, response['content-type']))
65
+ doc.instance_variable_set('@response', response)
66
+ doc.class.send(:attr_reader, :response)
67
+ doc
46
68
  when Net::HTTPRedirection
47
69
  response.value if limit <= 1
48
- get(response['location'], limit-1)
70
+ get(response['location'], options.merge(:follow_limit => limit-1))
49
71
  else
50
72
  response.value
51
73
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nokogumbo
3
3
  version: !ruby/object:Gem::Version
4
- version: '1.0'
4
+ version: '1.1'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sam Ruby
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-09-10 00:00:00.000000000 Z
11
+ date: 2013-09-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri