nokogumbo 1.5.0 → 2.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +237 -26
  3. data/ext/nokogumbo/extconf.rb +144 -0
  4. data/ext/nokogumbo/nokogumbo.c +793 -0
  5. data/gumbo-parser/src/ascii.c +75 -0
  6. data/gumbo-parser/src/ascii.h +115 -0
  7. data/gumbo-parser/src/attribute.c +26 -28
  8. data/gumbo-parser/src/attribute.h +3 -23
  9. data/gumbo-parser/src/char_ref.c +5972 -6816
  10. data/gumbo-parser/src/char_ref.h +14 -45
  11. data/gumbo-parser/src/error.c +510 -163
  12. data/gumbo-parser/src/error.h +70 -147
  13. data/gumbo-parser/src/foreign_attrs.c +104 -0
  14. data/gumbo-parser/src/gumbo.h +577 -305
  15. data/gumbo-parser/src/insertion_mode.h +4 -28
  16. data/gumbo-parser/src/macros.h +91 -0
  17. data/gumbo-parser/src/parser.c +2922 -2228
  18. data/gumbo-parser/src/parser.h +6 -22
  19. data/gumbo-parser/src/replacement.h +33 -0
  20. data/gumbo-parser/src/string_buffer.c +43 -50
  21. data/gumbo-parser/src/string_buffer.h +24 -40
  22. data/gumbo-parser/src/string_piece.c +39 -39
  23. data/gumbo-parser/src/svg_attrs.c +174 -0
  24. data/gumbo-parser/src/svg_tags.c +137 -0
  25. data/gumbo-parser/src/tag.c +186 -59
  26. data/gumbo-parser/src/tag_lookup.c +382 -0
  27. data/gumbo-parser/src/tag_lookup.h +13 -0
  28. data/gumbo-parser/src/token_buffer.c +79 -0
  29. data/gumbo-parser/src/token_buffer.h +71 -0
  30. data/gumbo-parser/src/token_type.h +1 -25
  31. data/gumbo-parser/src/tokenizer.c +2128 -1562
  32. data/gumbo-parser/src/tokenizer.h +41 -52
  33. data/gumbo-parser/src/tokenizer_states.h +281 -45
  34. data/gumbo-parser/src/utf8.c +98 -123
  35. data/gumbo-parser/src/utf8.h +84 -52
  36. data/gumbo-parser/src/util.c +48 -38
  37. data/gumbo-parser/src/util.h +10 -40
  38. data/gumbo-parser/src/vector.c +45 -57
  39. data/gumbo-parser/src/vector.h +17 -39
  40. data/lib/nokogumbo.rb +18 -170
  41. data/lib/nokogumbo/html5.rb +252 -0
  42. data/lib/nokogumbo/html5/document.rb +53 -0
  43. data/lib/nokogumbo/html5/document_fragment.rb +62 -0
  44. data/lib/nokogumbo/html5/node.rb +72 -0
  45. data/lib/nokogumbo/version.rb +3 -0
  46. metadata +40 -21
  47. data/ext/nokogumboc/extconf.rb +0 -60
  48. data/ext/nokogumboc/nokogumbo.c +0 -295
  49. data/gumbo-parser/src/char_ref.rl +0 -2554
  50. data/gumbo-parser/src/string_piece.h +0 -38
  51. data/gumbo-parser/src/tag.in +0 -150
  52. data/gumbo-parser/src/tag_enum.h +0 -153
  53. data/gumbo-parser/src/tag_gperf.h +0 -105
  54. data/gumbo-parser/src/tag_sizes.h +0 -4
  55. data/gumbo-parser/src/tag_strings.h +0 -153
  56. data/gumbo-parser/visualc/include/strings.h +0 -4
  57. data/test-nokogumbo.rb +0 -190
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 96fa61565f78d5491e0b6d5b505cf936524745eb848b8b6584fc15e20c7ae35b
4
- data.tar.gz: e5416f71bbe90323f04b8aad4dc48b28947e43a9eb46f446f8ca1444f519a07b
3
+ metadata.gz: a84b367d94046358f7844781b0f92cea51a75e052d54e35b53ab03602743f1b8
4
+ data.tar.gz: 8d96a5adfa701f658f7ba193ee96bb8a7e6901c1ff4d3fb2dad6f3e372ce66d2
5
5
  SHA512:
6
- metadata.gz: 676bf3585d38cd4ad5c72b8b3afd4952e248c747683ae1072dd43f6ce1ccd279177e4d0c75a9821ed76d32806333128152231349d8d113ae5d81279580b13004
7
- data.tar.gz: 3459078d96977399e75551c4a3ee5623091f48569984b771e540ec111125f5af91e39a8d78cbd3ce9280326b1b9395dc4a0b0d7f0a72294876682cb9fe35e3d9
6
+ metadata.gz: de2472c6ff89e3f0076a44ac13fa67688e82f909b265a2b70fe45225daf01aaf6059c6ca94f06e10ff94e10ac8a8f42b685e63f494849f04f3af56f337a73382
7
+ data.tar.gz: 3880defdaa15cb278236cf170d5727d1d73b14698f1ea41e7a7141da7a2fe8c3bafea19367196214c0dc0c1c27854602714d80abd30ecfd6be90f4277f3e33d7
data/README.md CHANGED
@@ -1,23 +1,21 @@
1
- Nokogumbo - a Nokogiri interface to the Gumbo HTML5 parser.
2
- ===========
1
+ # Nokogumbo - a Nokogiri interface to the Gumbo HTML5 parser.
3
2
 
4
- Nokogumbo provides the ability for a Ruby program to invoke the
5
- [Gumbo HTML5 parser](https://github.com/google/gumbo-parser#readme)
3
+ Nokogumbo provides the ability for a Ruby program to invoke
4
+ [our version of the Gumbo HTML5 parser](https://github.com/rubys/nokogumbo/tree/master/gumbo-parser/src)
6
5
  and to access the result as a
7
6
  [Nokogiri::HTML::Document](http://rdoc.info/github/sparklemotion/nokogiri/Nokogiri/HTML/Document).
8
7
 
9
- [![Build Status](https://travis-ci.org/rubys/nokogumbo.svg)](https://travis-ci.org/rubys/nokogumbo)
8
+ [![Travis-CI Build Status](https://travis-ci.org/rubys/nokogumbo.svg)](https://travis-ci.org/rubys/nokogumbo)
9
+ [![Appveyor Build Status](https://ci.appveyor.com/api/projects/status/github/rubys/nokogumbo)](https://ci.appveyor.com/project/rubys/nokogumbo/branch/master)
10
10
 
11
- Usage
12
- -----
11
+ ## Usage
13
12
 
14
13
  ```ruby
15
14
  require 'nokogumbo'
16
- doc = Nokogiri::HTML5(string)
15
+ doc = Nokogiri.HTML5(string)
17
16
  ```
18
17
 
19
- An experimental _fragment_ method is also provided. While not HTML5
20
- compliant, it may be useful:
18
+ To parse an HTML fragment, a `fragment` method is provided.
21
19
 
22
20
  ```ruby
23
21
  require 'nokogumbo'
@@ -32,21 +30,207 @@ require 'nokogumbo'
32
30
  doc = Nokogiri::HTML5.get(uri)
33
31
  ```
34
32
 
35
- Example
36
- -----
33
+ ## Parsing options
34
+ The document and fragment parsing methods,
35
+ - `Nokogiri.HTML5(html, url = nil, encoding = nil, options = {})`
36
+ - `Nokogiri::HTML5.parse(html, url = nil, encoding = nil, options = {})`
37
+ - `Nokogiri::HTML5::Document.parse(html, url = nil, encoding = nil, options = {})`
38
+ - `Nokogiri::HTML5.fragment(html, encoding = nil, options = {})`
39
+ - `Nokogiri::HTML5::DocumentFragment.parse(html, encoding = nil, options = {})`
40
+ support options that are different from Nokogiri's.
41
+
42
+ The three currently supported options are `:max_errors`, `:max_tree_depth` and
43
+ `:max_attributes`, described below.
44
+
45
+ ### Error reporting
46
+ Nokogumbo contains an experimental parse error reporting facility. By default,
47
+ no parse errors are reported but this can be configured by passing the
48
+ `:max_errors` option to `::parse` or `::fragment`.
49
+
37
50
  ```ruby
38
51
  require 'nokogumbo'
39
- puts Nokogiri::HTML5.get('http://nokogiri.org').search('ol li')[2].text
52
+ doc = Nokogiri::HTML5.parse('<span/>Hi there!</span foo=bar />', max_errors: 10)
53
+ doc.errors.each do |err|
54
+ puts(err)
55
+ end
40
56
  ```
41
57
 
42
- Use `.to_html` instead of `.to_s` when parsing and serializing multiple times
58
+ This prints the following.
43
59
  ```
60
+ 1:1: ERROR: Expected a doctype token
61
+ <span/>Hi there!</span foo=bar />
62
+ ^
63
+ 1:1: ERROR: Start tag of nonvoid HTML element ends with '/>', use '>'.
64
+ <span/>Hi there!</span foo=bar />
65
+ ^
66
+ 1:17: ERROR: End tag ends with '/>', use '>'.
67
+ <span/>Hi there!</span foo=bar />
68
+ ^
69
+ 1:17: ERROR: End tag contains attributes.
70
+ <span/>Hi there!</span foo=bar />
71
+ ^
72
+ ```
73
+
74
+ Using `max_errors: -1` results in an unlimited number of errors being
75
+ returned.
76
+
77
+ The errors returned by `#errors` are instances of
78
+ [`Nokogiri::XML::SyntaxError`](https://www.rubydoc.info/github/sparklemotion/nokogiri/Nokogiri/XML/SyntaxError).
79
+
80
+ The [HTML
81
+ standard](https://html.spec.whatwg.org/multipage/parsing.html#parse-errors)
82
+ defines a number of standard parse error codes. These error codes only cover
83
+ the "tokenization" stage of parsing HTML. The parse errors in the
84
+ "tree construction" stage do not have standardized error codes (yet).
85
+
86
+ As a convenience to Nokogumbo users, the defined error codes are available
87
+ via the
88
+ [`Nokogiri::XML::SyntaxError#str1`](https://www.rubydoc.info/github/sparklemotion/nokogiri/Nokogiri/XML/SyntaxError#str1-instance_method)
89
+ method.
90
+
91
+ ```ruby
44
92
  require 'nokogumbo'
45
- Nokogiri::HTML5.parse(Nokogiri::HTML5.parse('<div></div> a').to_html).to_html
93
+ doc = Nokogiri::HTML5.parse('<span/>Hi there!</span foo=bar />', max_errors: 10)
94
+ doc.errors.each do |err|
95
+ puts("#{err.line}:#{err.column}: #{err.str1}")
96
+ end
97
+ ```
98
+
99
+ This prints the following.
100
+ ```
101
+ 1:1: generic-parser
102
+ 1:1: non-void-html-element-start-tag-with-trailing-solidus
103
+ 1:17: end-tag-with-trailing-solidus
104
+ 1:17: end-tag-with-attributes
105
+ ```
106
+
107
+ Note that the first error is `generic-parser` because it's an error from the
108
+ tree construction stage and doesn't have a standardized error code.
109
+
110
+ For the purposes of semantic versioning, the error messages, error locations,
111
+ and error codes are not part of Nokogumbo's public API. That is, these are
112
+ subject to change without Nokogumbo's major version number changing. These may
113
+ be stabilized in the future.
114
+
115
+ ### Maximum tree depth
116
+ The maximum depth of the DOM tree parsed by the various parsing methods is
117
+ configurable by the `:max_tree_depth` option. If the depth of the tree would
118
+ exceed this limit, then an
119
+ [ArgumentError](https://ruby-doc.org/core-2.5.0/ArgumentError.html) is thrown.
120
+
121
+ This limit (which defaults to `Nokogumbo::DEFAULT_MAX_TREE_DEPTH = 400`) can
122
+ be removed by giving the option `max_tree_depth: -1`.
123
+
124
+ ``` ruby
125
+ html = '<!DOCTYPE html>' + '<div>' * 1000
126
+ doc = Nokogiri.HTML5(html)
127
+ # raises ArgumentError: Document tree depth limit exceeded
128
+ doc = Nokogiri.HTML5(html, max_tree_depth: -1)
46
129
  ```
47
130
 
48
- Notes
49
- -----
131
+ ### Attribute limit per element
132
+ The maximum number of attributes per DOM element is configurable by the
133
+ `:max_attributes` option. If a given element would exceed this limit, then an
134
+ [ArgumentError](https://ruby-doc.org/core-2.5.0/ArgumentError.html) is thrown.
135
+
136
+ This limit (which defaults to `Nokogumbo::DEFAULT_MAX_ATTRIBUTES = 400`) can
137
+ be removed by giving the option `max_attributes: -1`.
138
+
139
+ ``` ruby
140
+ html = '<!DOCTYPE html><div ' + (1..1000).map { |x| "attr-#{x}" }.join(' ') + '>'
141
+ # "<!DOCTYPE html><div attr-1 attr-2 attr-3 ... attr-1000>"
142
+ doc = Nokogiri.HTML5(html)
143
+ # raises ArgumentError: Attributes per element limit exceeded
144
+ doc = Nokogiri.HTML5(html, max_attributes: -1)
145
+ ```
146
+
147
+ ## HTML Serialization
148
+
149
+ After parsing HTML, it may be serialized using any of the Nokogiri
150
+ [serialization
151
+ methods](https://www.rubydoc.info/gems/nokogiri/Nokogiri/XML/Node). In
152
+ particular, `#serialize`, `#to_html`, and `#to_s` will serialize a given node
153
+ and its children. (This is the equivalent of JavaScript's
154
+ `Element.outerHTML`.) Similarly, `#inner_html` will serialize the children of
155
+ a given node. (This is the equivalent of JavaScript's `Element.innerHTML`.)
156
+
157
+ ``` ruby
158
+ doc = Nokogiri::HTML5("<!DOCTYPE html><span>Hello world!</span>")
159
+ puts doc.serialize
160
+ # Prints: <!DOCTYPE html><html><head></head><body><span>Hello world!</span></body></html>
161
+ ```
162
+
163
+ Due to quirks in how HTML is parsed and serialized, it's possible for a DOM
164
+ tree to be serialized and then re-parsed, resulting in a different DOM.
165
+ Mostly, this happens with DOMs produced from invalid HTML. Unfortunately, even
166
+ valid HTML may not survive serialization and re-parsing.
167
+
168
+ In particular, a newline at the start of `pre`, `listing`, and `textarea`
169
+ elements is ignored by the parser.
170
+
171
+ ``` ruby
172
+ doc = Nokogiri::HTML5(<<-EOF)
173
+ <!DOCTYPE html>
174
+ <pre>
175
+ Content</pre>
176
+ EOF
177
+ puts doc.at('/html/body/pre').serialize
178
+ # Prints: <pre>Content</pre>
179
+ ```
180
+
181
+ In this case, the original HTML is semantically equivalent to the serialized
182
+ version. If the `pre`, `listing`, or `textarea` content starts with two
183
+ newlines, the first newline will be stripped on the first parse and the second
184
+ newline will be stripped on the second, leading to semantically different
185
+ DOMs. Passing the parameter `preserve_newline: true` will cause two or more
186
+ newlines to be preserved. (A single leading newline will still be removed.)
187
+
188
+ ``` ruby
189
+ doc = Nokogiri::HTML5(<<-EOF)
190
+ <!DOCTYPE html>
191
+ <listing>
192
+
193
+ Content</listing>
194
+ EOF
195
+ puts doc.at('/html/body/listing').serialize(preserve_newline: true)
196
+ # Prints: <listing>
197
+ #
198
+ # Content</listing>
199
+ ```
200
+
201
+ ## Encodings
202
+ Nokogumbo always parses HTML using
203
+ [UTF-8](https://en.wikipedia.org/wiki/UTF-8); however, the encoding of the
204
+ input can be explicitly selected via the optional `encoding` parameter. This
205
+ is most useful when the input comes not from a string but from an IO object.
206
+
207
+ When serializing a document or node, the encoding of the output string can be
208
+ specified via the `:encoding` options. Characters that cannot be encoded in
209
+ the selected encoding will be encoded as [HTML numeric
210
+ entities](https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references).
211
+
212
+ ``` ruby
213
+ frag = Nokogiri::HTML5.fragment('<span>아는 길도 물어가라</span>')
214
+ html = frag.serialize(encoding: 'US-ASCII')
215
+ puts html
216
+ # Prints: <span>&#xc544;&#xb294; &#xae38;&#xb3c4; &#xbb3c;&#xc5b4;&#xac00;&#xb77c;</span>
217
+ frag = Nokogiri::HTML5.fragment(html)
218
+ puts frag.serialize
219
+ # Prints: <span>아는 길도 물어가라</span>
220
+ ```
221
+
222
+ (There's a [bug](https://bugs.ruby-lang.org/issues/15033) in all current
223
+ versions of Ruby that can cause the entity encoding to fail. Of the mandated
224
+ supported encodings for HTML, the only encoding I'm aware of that has this bug
225
+ is `'ISO-2022-JP'`. I recommend avoiding this encoding.)
226
+
227
+ ## Examples
228
+ ```ruby
229
+ require 'nokogumbo'
230
+ puts Nokogiri::HTML5.get('http://nokogiri.org').search('ol li')[2].text
231
+ ```
232
+
233
+ ## Notes
50
234
 
51
235
  * The `Nokogiri::HTML5.fragment` function takes a string and parses it
52
236
  as a HTML5 document. The `<html>`, `<head>`, and `<body>` elements are
@@ -74,20 +258,47 @@ rules defined in the HTML5 specification for doing so.
74
258
  * Instead of returning `unknown` as the element name for unknown tags, the
75
259
  original tag name is returned verbatim.
76
260
 
77
- * If the Gumbo HTML5 parser is not already installed, the source for the
78
- parser will be downloaded and compiled into the Gem itself.
261
+ # Flavors of Nokogumbo
262
+ Nokogumbo uses libxml2, the XML library underlying Nokogiri, to speed up
263
+ parsing. If the libxml2 headers are not available, then Nokogumbo resorts to
264
+ using Nokogiri's Ruby API to construct the DOM tree.
265
+
266
+ Nokogiri can be configured to either use the system library version of libxml2
267
+ or use a bundled version. By default (as of Nokogiri version 1.8.4), Nokogiri
268
+ will use a bundled version.
269
+
270
+ To prevent differences between versions of libxml2, Nokogumbo will only use
271
+ libxml2 if the build process can find the exact same version used by Nokogiri.
272
+ This leads to three possibilities
273
+
274
+ 1. Nokogiri is compiled with the bundled libxml2. In this case, Nokogumbo will
275
+ (by default) use the same version of libxml2.
276
+ 2. Nokogiri is compiled with the system libxml2. In this case, if the libxml2
277
+ headers are available, then Nokogumbo will (by default) use the system
278
+ version and headers.
279
+ 3. Nokogiri is compiled with the system libxml2 but its headers aren't
280
+ available at build time for Nokogumbo. In this case, Nokogumbo will use the
281
+ slower Ruby API.
282
+
283
+ Using libxml2 can be required by passing `-- --with-libxml2` to `bundle exec
284
+ rake` or to `gem install`. Using libxml2 can be prohibited by instead passing
285
+ `-- --without-libxml2`.
286
+
287
+ Functionally, the only difference between using libxml2 or not is in the
288
+ behavior of `Nokogiri::XML::Node#line`. If it is used, then `#line` will
289
+ return the line number of the corresponding node. Otherwise, it will return 0.
79
290
 
80
- Installation
81
- ============
291
+ # Installation
82
292
 
83
- git clone --recursive https://github.com/rubys/nokogumbo.git
293
+ git clone https://github.com/rubys/nokogumbo.git
84
294
  cd nokogumbo
85
295
  bundle install
86
296
  rake gem
87
297
  gem install pkg/nokogumbo*.gem
88
298
 
89
- Related efforts
90
- ============
299
+ # Related efforts
91
300
 
92
- * [ruby-gumbo](https://github.com/nevir/ruby-gumbo#readme) - a ruby binding
93
- for the Gumbo HTML5 parser.
301
+ * [ruby-gumbo](https://github.com/nevir/ruby-gumbo#readme) -- a ruby binding
302
+ for the Gumbo HTML5 parser.
303
+ * [lua-gumbo](https://gitlab.com/craigbarnes/lua-gumbo) -- a lua binding for
304
+ the Gumbo HTML5 parser.
@@ -0,0 +1,144 @@
1
+ require 'rubygems'
2
+ require 'fileutils'
3
+ require 'mkmf'
4
+ require 'nokogiri'
5
+
6
+ $CFLAGS += " -std=c99"
7
+ $LDFLAGS.gsub!('-Wl,--no-undefined', '')
8
+ $DLDFLAGS.gsub!('-Wl,--no-undefined', '')
9
+ $warnflags = CONFIG['warnflags'] = '-Wall'
10
+
11
+ NG_SPEC = Gem::Specification.find_by_name('nokogiri', "= #{Nokogiri::VERSION}")
12
+
13
+ def download_headers
14
+ begin
15
+ require 'yaml'
16
+
17
+ dependencies = YAML.load_file(File.join(NG_SPEC.gem_dir, 'dependencies.yml'))
18
+ version = dependencies['libxml2']['version']
19
+ host = RbConfig::CONFIG["host_alias"].empty? ? RbConfig::CONFIG["host"] : RbConfig::CONFIG["host_alias"]
20
+ path = File.join('ports', host, 'libxml2', version, 'include/libxml2')
21
+ return path if File.directory?(path)
22
+
23
+ # Make sure we're using the same version Nokogiri uses
24
+ dep_index = NG_SPEC.dependencies.index { |dep| dep.name == 'mini_portile2' and dep.type == :runtime }
25
+ return nil if dep_index.nil?
26
+ requirement = NG_SPEC.dependencies[dep_index].requirement.to_s
27
+
28
+ gem 'mini_portile2', requirement
29
+ require 'mini_portile2'
30
+ p = MiniPortile::new('libxml2', version).tap do |r|
31
+ r.host = RbConfig::CONFIG["host_alias"].empty? ? RbConfig::CONFIG["host"] : RbConfig::CONFIG["host_alias"]
32
+ r.files = [{
33
+ url: "http://xmlsoft.org/sources/libxml2-#{r.version}.tar.gz",
34
+ sha256: dependencies['libxml2']['sha256']
35
+ }]
36
+ r.configure_options += [
37
+ "--without-python",
38
+ "--without-readline",
39
+ "--with-c14n",
40
+ "--with-debug",
41
+ "--with-threads"
42
+ ]
43
+ end
44
+ p.download unless p.downloaded?
45
+ p.extract
46
+ p.configure unless p.configured?
47
+ system('make', '-C', "tmp/#{p.host}/ports/libxml2/#{version}/libxml2-#{version}/include/libxml", 'install-xmlincHEADERS')
48
+ path
49
+ rescue
50
+ puts 'failed to download/install headers'
51
+ nil
52
+ end
53
+ end
54
+
55
+ required = arg_config('--with-libxml2')
56
+ prohibited = arg_config('--without-libxml2')
57
+ if required and prohibited
58
+ abort "cannot use both --with-libxml2 and --without-libxml2"
59
+ end
60
+
61
+ have_libxml2 = false
62
+ have_ng = false
63
+
64
+ def windows?
65
+ ::RUBY_PLATFORM =~ /mingw|mswin/
66
+ end
67
+
68
+ def modern_nokogiri?
69
+ nokogiri_version = Gem::Version.new(Nokogiri::VERSION)
70
+ requirement = windows? ? ">= 1.11.2" : ">= 1.11.0.rc4"
71
+ Gem::Requirement.new(requirement).satisfied_by?(nokogiri_version)
72
+ end
73
+
74
+ if !prohibited
75
+ if modern_nokogiri?
76
+ append_cflags(Nokogiri::VERSION_INFO["nokogiri"]["cppflags"])
77
+ append_ldflags(Nokogiri::VERSION_INFO["nokogiri"]["ldflags"]) # may be nil for nokogiri pre-1.11.2
78
+ have_libxml2 = if Nokogiri::VERSION_INFO["nokogiri"]["ldflags"].empty?
79
+ have_header('libxml/tree.h')
80
+ else
81
+ have_func("xmlNewDoc", "libxml/tree.h")
82
+ end
83
+ end
84
+
85
+ if !have_libxml2
86
+ if Nokogiri::VERSION_INFO.include?('libxml') and
87
+ Nokogiri::VERSION_INFO['libxml']['source'] == 'packaged'
88
+ # Nokogiri has libxml2 built in. Find the headers.
89
+ libxml2_path = File.join(Nokogiri::VERSION_INFO['libxml']['libxml2_path'],
90
+ 'include/libxml2')
91
+ if find_header('libxml/tree.h', libxml2_path)
92
+ have_libxml2 = true
93
+ else
94
+ # Unfortunately, some versions of Nokogiri delete these files.
95
+ # https://github.com/sparklemotion/nokogiri/pull/1788
96
+ # Try to download them
97
+ libxml2_path = download_headers
98
+ unless libxml2_path.nil?
99
+ have_libxml2 = find_header('libxml/tree.h', libxml2_path)
100
+ end
101
+ end
102
+ else
103
+ # Nokogiri is compiled with system headers.
104
+ # Hack to work around broken mkmf on macOS
105
+ # (https://bugs.ruby-lang.org/issues/14992 fixed now)
106
+ if RbConfig::MAKEFILE_CONFIG['LIBPATHENV'] == 'DYLD_LIBRARY_PATH'
107
+ RbConfig::MAKEFILE_CONFIG['LIBPATHENV'] = 'DYLD_FALLBACK_LIBRARY_PATH'
108
+ end
109
+
110
+ pkg_config('libxml-2.0')
111
+ have_libxml2 = have_library('xml2', 'xmlNewDoc')
112
+ end
113
+ end
114
+
115
+ if required and !have_libxml2
116
+ abort "libxml2 required but could not be located"
117
+ end
118
+
119
+
120
+ if have_libxml2
121
+ have_ng = have_header('nokogiri.h') || find_header('nokogiri.h', File.join(NG_SPEC.gem_dir, 'ext/nokogiri'))
122
+ end
123
+ end
124
+
125
+ if have_libxml2 and have_ng
126
+ $CFLAGS += " -DNGLIB=1"
127
+ end
128
+
129
+ # Symlink gumbo-parser source files.
130
+ ext_dir = File.dirname(__FILE__)
131
+
132
+ Dir.chdir(ext_dir) do
133
+ $srcs = Dir['*.c', '../../gumbo-parser/src/*.c']
134
+ $hdrs = Dir['*.h', '../../gumbo-parser/src/*.h']
135
+ end
136
+ $INCFLAGS << ' -I$(srcdir)/../../gumbo-parser/src'
137
+ $VPATH << '$(srcdir)/../../gumbo-parser/src'
138
+
139
+ create_makefile('nokogumbo/nokogumbo') do |conf|
140
+ conf.map! do |chunk|
141
+ chunk.gsub(/^HDRS = .*$/, "HDRS = #{$hdrs.map { |h| File.join('$(srcdir)', h)}.join(' ')}")
142
+ end
143
+ end
144
+ # vim: set sw=2 sts=2 ts=8 et: