nokogumbo 1.0 → 1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- YzMwY2Q3N2Y1YWRmNDhiYzYzNzdmMDZmOGNkYzU3YmE4YThmMGMwMg==
4
+ MDM0N2MwNTE2MzI0MmFjNTZmYzQ5YTk2MDMwNWQ1YzVlNTk5NTMzYQ==
5
5
  data.tar.gz: !binary |-
6
- YzZlNDYxNzNhNzA3NzQ0OGE4ZTAyZTYxYWM2MWVkNGU0MzZjMjU1Yw==
6
+ OTNlOWZkMDc0NTc3N2U5MjY1ODhlNTEwMmMxMmMxOTBiYTA0NTViZA==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- NDdmOGQ5Nzc0M2FkYTM4NzMwYmNkMjk0ZTMyMzYzYTYzMDBmNzMxNTczNTFl
10
- ZjIwMjAwNmRmM2EyZGRkNmU1NDM5Y2M0MjNkM2ZiZjk5YzgwNDc1YWZiNTQ2
11
- MDk2MTBlN2QzZGNjNjAyZWExODZlNGY1YTE3Y2JjYjY1NTQwNDQ=
9
+ YjI2NzhjYzE0ZjQ1OTA3MDRiNWJjOGZkNzRlZDNhOTQ2N2ViODZjNDFlOWE3
10
+ Zjk1MTY0MmQ2ZDgwMTQxYzczNDQxYTFmYjdiNGY3ZjAwZDYxNzVlNjE2MWJh
11
+ NDQyYzk5MGU5ZTZlNDdkZWUxMjE3ZjhmNWIxZWIzYzFkZDRkYTA=
12
12
  data.tar.gz: !binary |-
13
- NTgwODgxYWU4NzMyMDk1M2VkZjEyZTAzMTBiODQ3N2I1OTUxYzBkNTQyMGQx
14
- MWUwYTI3OGZhMmNhOTU0MTBiMmZlMDBlZjg4NDgyODc2ODJkMzkyNDJhZmMx
15
- ZmYzODA0YTI2Njk4MjQxNGI1NDAyMzdiN2UyZWEzMzBkMWJkZTg=
13
+ YjE4NDhmMTIxNTI5MThlM2Q1MmRkNmVkNWRmYmFjNjllYzQ3YTFkMWJmNWI1
14
+ MWRkNDI2NzY0MWRhNjVlMjNjNmQ2NWZjOTQ3YzUwNzc3ZmNiMDBjNTBlZTBk
15
+ YTZkYzU2NTQ3NDNmMzIxNzA4NzQ3ZDI2NmFjY2Y3YWU3MmFmNTY=
@@ -442,7 +442,7 @@ static void finish_token(GumboParser* parser, GumboToken* token) {
442
442
  reset_token_start_point(tokenizer);
443
443
  token->original_text.length =
444
444
  tokenizer->_token_start - token->original_text.data;
445
- if (token->original_text.data[token->original_text.length - 1] == '\r') {
445
+ if (token->original_text.length > 0 && token->original_text.data[token->original_text.length - 1] == '\r') {
446
446
  // The UTF8 iterator will ignore carriage returns in the input stream, which
447
447
  // means that the next token may start one past a \r character. The pointer
448
448
  // arithmetic above results in that \r being appended to the original text
data/lib/nokogumbo.rb CHANGED
@@ -26,26 +26,48 @@ module Nokogiri
26
26
 
27
27
  # Fetch and parse a HTML document from the web, following redirects,
28
28
  # handling https, and determining the character encoding using HTML5
29
- # rules. +uri+ may be a +String+ or a +URI+. +limit+ controls the
30
- # number of redirects that will be followed.
31
- def self.get(uri, limit=10)
29
+ # rules. +uri+ may be a +String+ or a +URI+. +options+ contains
30
+ # http headers and special options. Everything which is not a
31
+ # special option is considered a header. Special options include:
32
+ # * :follow_limit => number of redirects which are followed
33
+ # * :basic_auth => [username, password]
34
+ def self.get(uri, options={})
35
+ headers = options.clone
36
+ headers = {:follow_limit => headers} if Numeric === headers # deprecated
37
+ limit=headers[:follow_limit] ? headers.delete(:follow_limit).to_i : 10
38
+
32
39
  require 'net/http'
33
40
  uri = URI(uri) unless URI === uri
34
41
 
35
42
  http = Net::HTTP.new(uri.host, uri.port)
43
+
44
+ # TLS / SSL support
36
45
  if uri.scheme == 'https'
37
46
  http.use_ssl = true
38
47
  http.verify_mode = OpenSSL::SSL::VERIFY_NONE
39
48
  end
49
+
40
50
  request = Net::HTTP::Get.new(uri.request_uri)
51
+
52
+ # basic authentication
53
+ auth = headers.delete(:basic_auth)
54
+ auth ||= [uri.user, uri.password] if uri.user and uri.password
55
+ request.basic_auth auth.first, auth.last if auth
56
+
57
+ # remaining options are treated as headers
58
+ headers.each {|key, value| request[key.to_s] = value.to_s}
59
+
41
60
  response = http.request(request)
42
61
 
43
62
  case response
44
63
  when Net::HTTPSuccess
45
- parse(reencode(response.body, response['content-type']))
64
+ doc = parse(reencode(response.body, response['content-type']))
65
+ doc.instance_variable_set('@response', response)
66
+ doc.class.send(:attr_reader, :response)
67
+ doc
46
68
  when Net::HTTPRedirection
47
69
  response.value if limit <= 1
48
- get(response['location'], limit-1)
70
+ get(response['location'], options.merge(:follow_limit => limit-1))
49
71
  else
50
72
  response.value
51
73
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nokogumbo
3
3
  version: !ruby/object:Gem::Version
4
- version: '1.0'
4
+ version: '1.1'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sam Ruby
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-09-10 00:00:00.000000000 Z
11
+ date: 2013-09-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri