nokogumbo 0.7 → 0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +16 -7
- data/{work → gumbo-parser/src}/attribute.c +0 -0
- data/{work → gumbo-parser/src}/attribute.h +0 -0
- data/{work → gumbo-parser/src}/char_ref.c +0 -0
- data/{work → gumbo-parser/src}/char_ref.h +0 -0
- data/{work → gumbo-parser/src}/error.c +0 -0
- data/{work → gumbo-parser/src}/error.h +0 -0
- data/{work → gumbo-parser/src}/gumbo.h +0 -0
- data/{work → gumbo-parser/src}/insertion_mode.h +0 -0
- data/{work → gumbo-parser/src}/parser.c +0 -0
- data/{work → gumbo-parser/src}/parser.h +0 -0
- data/{work → gumbo-parser/src}/string_buffer.c +0 -0
- data/{work → gumbo-parser/src}/string_buffer.h +0 -0
- data/{work → gumbo-parser/src}/string_piece.c +0 -0
- data/{work → gumbo-parser/src}/string_piece.h +0 -0
- data/{work → gumbo-parser/src}/tag.c +0 -0
- data/{work → gumbo-parser/src}/token_type.h +0 -0
- data/{work → gumbo-parser/src}/tokenizer.c +0 -0
- data/{work → gumbo-parser/src}/tokenizer.h +0 -0
- data/{work → gumbo-parser/src}/tokenizer_states.h +0 -0
- data/{work → gumbo-parser/src}/utf8.c +0 -0
- data/{work → gumbo-parser/src}/utf8.h +0 -0
- data/{work → gumbo-parser/src}/util.c +0 -0
- data/{work → gumbo-parser/src}/util.h +0 -0
- data/{work → gumbo-parser/src}/vector.c +0 -0
- data/{work → gumbo-parser/src}/vector.h +0 -0
- data/lib/nokogumbo.rb +5 -1
- data/work/extconf.rb +12 -1
- metadata +27 -27
data/README.md
CHANGED
@@ -6,7 +6,7 @@ Nokogumbo provides the ability for a Ruby program to invoke the
|
|
6
6
|
and to access the result as a
|
7
7
|
[Nokogiri::HTML::Document](http://nokogiri.org/Nokogiri/HTML/Document.html).
|
8
8
|
|
9
|
-
Usage
|
9
|
+
Usage
|
10
10
|
-----
|
11
11
|
|
12
12
|
```ruby
|
@@ -14,14 +14,22 @@ require 'nokogumbo'
|
|
14
14
|
doc = Nokogiri::HTML5(string)
|
15
15
|
```
|
16
16
|
|
17
|
-
Because HTML is often fetched via the web, a convenience interface
|
18
|
-
provided:
|
17
|
+
Because HTML is often fetched via the web, a convenience interface to
|
18
|
+
HTTP get is also provided:
|
19
19
|
|
20
20
|
```ruby
|
21
21
|
require 'nokogumbo'
|
22
22
|
doc = Nokogiri::HTML5.get(uri)
|
23
23
|
```
|
24
|
-
|
24
|
+
|
25
|
+
Example
|
26
|
+
-----
|
27
|
+
```ruby
|
28
|
+
require 'nokogumbo'
|
29
|
+
puts Nokogiri::HTML5.get('http://nokogiri.org').at('h1 abbr')['title']
|
30
|
+
```
|
31
|
+
|
32
|
+
Notes
|
25
33
|
-----
|
26
34
|
|
27
35
|
* The `Nokogiri::HTML5.parse` function takes a string and passes it to the
|
@@ -43,16 +51,17 @@ rules defined in the HTML5 specification for doing so.
|
|
43
51
|
* Instead of returning `unknown` as the element name for unknown tags, the
|
44
52
|
original tag name is returned verbatim.
|
45
53
|
|
46
|
-
*
|
54
|
+
* If the Gumbo HTML5 parser is not already installed, the source for the
|
55
|
+
parser will be downloaded and compiled into the Gem itself.
|
47
56
|
|
48
|
-
Installation
|
57
|
+
Installation
|
49
58
|
============
|
50
59
|
|
51
60
|
* Execute `rake gem`
|
52
61
|
|
53
62
|
* [sudo] gem install pkg/nokogumbo*.gem
|
54
63
|
|
55
|
-
Related efforts
|
64
|
+
Related efforts
|
56
65
|
============
|
57
66
|
|
58
67
|
* [ruby-gumbo](https://github.com/galdor/ruby-gumbo#readme) - a ruby binding
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
data/lib/nokogumbo.rb
CHANGED
@@ -98,7 +98,11 @@ module Nokogiri
|
|
98
98
|
encoding ||= Encoding::ISO_8859_1
|
99
99
|
|
100
100
|
# change the encoding to match the detected or inferred encoding
|
101
|
-
|
101
|
+
begin
|
102
|
+
body.force_encoding(encoding)
|
103
|
+
rescue ArgumentError
|
104
|
+
body.force_encoding(Encoding::ISO_8859_1)
|
105
|
+
end
|
102
106
|
end
|
103
107
|
|
104
108
|
body.encode(Encoding::UTF_8)
|
data/work/extconf.rb
CHANGED
@@ -1,7 +1,18 @@
|
|
1
1
|
require 'mkmf'
|
2
2
|
$CFLAGS = " -std=c99"
|
3
|
+
|
4
|
+
# libxml2 libraries from http://www.xmlsoft.org/
|
3
5
|
pkg_config('libxml-2.0')
|
4
|
-
|
6
|
+
|
7
|
+
# nokogiri headers from gem install
|
8
|
+
nokogiri_lib = Gem.find_files('nokogiri').first or gem 'nokogiri'
|
5
9
|
nokogiri_ext = nokogiri_lib.sub(%r(lib/nokogiri$), 'ext/nokogiri')
|
6
10
|
find_header('nokogiri.h', nokogiri_ext)
|
11
|
+
|
12
|
+
# add in gumbo-parser source from github if not already installed
|
13
|
+
unless have_library('gumbo', 'gumbo_parse') or File.exist? 'work/gumbo.h'
|
14
|
+
require 'fileutils'
|
15
|
+
FileUtils.cp Dir['../gumbo-parser/src/*'], '.'
|
16
|
+
end
|
17
|
+
|
7
18
|
create_makefile('nokogumboc')
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokogumbo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.8'
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-08-
|
12
|
+
date: 2013-08-27 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
@@ -40,31 +40,31 @@ files:
|
|
40
40
|
- README.md
|
41
41
|
- work/extconf.rb
|
42
42
|
- work/nokogumbo.c
|
43
|
-
-
|
44
|
-
-
|
45
|
-
-
|
46
|
-
-
|
47
|
-
-
|
48
|
-
-
|
49
|
-
-
|
50
|
-
-
|
51
|
-
-
|
52
|
-
-
|
53
|
-
-
|
54
|
-
-
|
55
|
-
-
|
56
|
-
-
|
57
|
-
-
|
58
|
-
-
|
59
|
-
-
|
60
|
-
-
|
61
|
-
-
|
62
|
-
-
|
63
|
-
-
|
64
|
-
-
|
65
|
-
-
|
66
|
-
-
|
67
|
-
-
|
43
|
+
- gumbo-parser/src/utf8.h
|
44
|
+
- gumbo-parser/src/token_type.h
|
45
|
+
- gumbo-parser/src/util.h
|
46
|
+
- gumbo-parser/src/vector.c
|
47
|
+
- gumbo-parser/src/string_buffer.c
|
48
|
+
- gumbo-parser/src/tokenizer_states.h
|
49
|
+
- gumbo-parser/src/error.h
|
50
|
+
- gumbo-parser/src/parser.h
|
51
|
+
- gumbo-parser/src/error.c
|
52
|
+
- gumbo-parser/src/tokenizer.h
|
53
|
+
- gumbo-parser/src/string_buffer.h
|
54
|
+
- gumbo-parser/src/vector.h
|
55
|
+
- gumbo-parser/src/string_piece.h
|
56
|
+
- gumbo-parser/src/attribute.c
|
57
|
+
- gumbo-parser/src/char_ref.c
|
58
|
+
- gumbo-parser/src/string_piece.c
|
59
|
+
- gumbo-parser/src/gumbo.h
|
60
|
+
- gumbo-parser/src/tag.c
|
61
|
+
- gumbo-parser/src/util.c
|
62
|
+
- gumbo-parser/src/parser.c
|
63
|
+
- gumbo-parser/src/utf8.c
|
64
|
+
- gumbo-parser/src/attribute.h
|
65
|
+
- gumbo-parser/src/char_ref.h
|
66
|
+
- gumbo-parser/src/insertion_mode.h
|
67
|
+
- gumbo-parser/src/tokenizer.c
|
68
68
|
homepage: https://github.com/rubys/nokogumbo/#readme
|
69
69
|
licenses:
|
70
70
|
- Apache 2.0
|