nokogiri 1.10.9 → 1.11.0.rc4
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/LICENSE-DEPENDENCIES.md +1015 -947
- data/README.md +24 -22
- data/ext/nokogiri/depend +476 -357
- data/ext/nokogiri/extconf.rb +485 -352
- data/ext/nokogiri/html_document.c +79 -78
- data/ext/nokogiri/html_sax_parser_context.c +2 -2
- data/ext/nokogiri/nokogiri.c +34 -40
- data/ext/nokogiri/nokogiri.h +26 -17
- data/ext/nokogiri/xml_document.c +18 -4
- data/ext/nokogiri/xml_io.c +8 -6
- data/ext/nokogiri/xml_node.c +21 -1
- data/ext/nokogiri/xml_node_set.c +1 -1
- data/ext/nokogiri/xml_reader.c +6 -17
- data/ext/nokogiri/xml_relax_ng.c +29 -11
- data/ext/nokogiri/xml_sax_parser.c +2 -7
- data/ext/nokogiri/xml_sax_parser_context.c +2 -2
- data/ext/nokogiri/xml_schema.c +55 -13
- data/ext/nokogiri/xml_xpath_context.c +80 -4
- data/ext/nokogiri/xslt_stylesheet.c +1 -8
- data/lib/nokogiri.rb +4 -21
- data/lib/nokogiri/css.rb +1 -0
- data/lib/nokogiri/css/node.rb +1 -0
- data/lib/nokogiri/css/parser.rb +63 -62
- data/lib/nokogiri/css/parser.y +2 -2
- data/lib/nokogiri/css/parser_extras.rb +39 -36
- data/lib/nokogiri/css/syntax_error.rb +1 -0
- data/lib/nokogiri/css/tokenizer.rb +1 -0
- data/lib/nokogiri/css/xpath_visitor.rb +73 -43
- data/lib/nokogiri/decorators/slop.rb +1 -0
- data/lib/nokogiri/html.rb +1 -0
- data/lib/nokogiri/html/builder.rb +1 -0
- data/lib/nokogiri/html/document.rb +13 -26
- data/lib/nokogiri/html/document_fragment.rb +1 -0
- data/lib/nokogiri/html/element_description.rb +1 -0
- data/lib/nokogiri/html/element_description_defaults.rb +1 -0
- data/lib/nokogiri/html/entity_lookup.rb +1 -0
- data/lib/nokogiri/html/sax/parser.rb +1 -0
- data/lib/nokogiri/html/sax/parser_context.rb +1 -0
- data/lib/nokogiri/html/sax/push_parser.rb +1 -0
- data/lib/nokogiri/jruby/dependencies.rb +20 -0
- data/lib/nokogiri/syntax_error.rb +1 -0
- data/lib/nokogiri/version.rb +3 -109
- data/lib/nokogiri/version/constant.rb +5 -0
- data/lib/nokogiri/version/info.rb +182 -0
- data/lib/nokogiri/xml.rb +1 -0
- data/lib/nokogiri/xml/attr.rb +1 -0
- data/lib/nokogiri/xml/attribute_decl.rb +1 -0
- data/lib/nokogiri/xml/builder.rb +3 -2
- data/lib/nokogiri/xml/cdata.rb +1 -0
- data/lib/nokogiri/xml/character_data.rb +1 -0
- data/lib/nokogiri/xml/document.rb +20 -15
- data/lib/nokogiri/xml/document_fragment.rb +5 -6
- data/lib/nokogiri/xml/dtd.rb +1 -0
- data/lib/nokogiri/xml/element_content.rb +1 -0
- data/lib/nokogiri/xml/element_decl.rb +1 -0
- data/lib/nokogiri/xml/entity_decl.rb +1 -0
- data/lib/nokogiri/xml/entity_reference.rb +1 -0
- data/lib/nokogiri/xml/namespace.rb +1 -0
- data/lib/nokogiri/xml/node.rb +587 -249
- data/lib/nokogiri/xml/node/save_options.rb +1 -0
- data/lib/nokogiri/xml/node_set.rb +1 -0
- data/lib/nokogiri/xml/notation.rb +1 -0
- data/lib/nokogiri/xml/parse_options.rb +10 -3
- data/lib/nokogiri/xml/pp.rb +1 -0
- data/lib/nokogiri/xml/pp/character_data.rb +1 -0
- data/lib/nokogiri/xml/pp/node.rb +1 -0
- data/lib/nokogiri/xml/processing_instruction.rb +1 -0
- data/lib/nokogiri/xml/reader.rb +7 -3
- data/lib/nokogiri/xml/relax_ng.rb +7 -2
- data/lib/nokogiri/xml/sax.rb +1 -0
- data/lib/nokogiri/xml/sax/document.rb +1 -0
- data/lib/nokogiri/xml/sax/parser.rb +1 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
- data/lib/nokogiri/xml/schema.rb +13 -4
- data/lib/nokogiri/xml/searchable.rb +25 -16
- data/lib/nokogiri/xml/syntax_error.rb +1 -0
- data/lib/nokogiri/xml/text.rb +1 -0
- data/lib/nokogiri/xml/xpath.rb +1 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -0
- data/lib/nokogiri/xml/xpath_context.rb +1 -0
- data/lib/nokogiri/xslt.rb +1 -0
- data/lib/nokogiri/xslt/stylesheet.rb +1 -0
- data/lib/xsd/xmlparser/nokogiri.rb +1 -0
- data/patches/libxml2/0006-htmlParseComment-treat-as-if-it-closed-the-comment.patch +73 -0
- data/patches/libxml2/0007-use-new-htmlParseLookupCommentEnd-to-find-comment-en.patch +103 -0
- data/patches/libxml2/0008-use-glibc-strlen.patch +53 -0
- metadata +81 -48
@@ -0,0 +1,53 @@
|
|
1
|
+
From c94172d2a4451368530db2186190d70be8a1d9e5 Mon Sep 17 00:00:00 2001
|
2
|
+
From: Ilya Zub <ilya@serpapi.com>
|
3
|
+
Date: Wed, 23 Dec 2020 12:45:29 +0200
|
4
|
+
Subject: Use glibc strlen to speed up xmlStrlen
|
5
|
+
MIME-Version: 1.0
|
6
|
+
Content-Type: text/plain; charset=UTF-8
|
7
|
+
Content-Transfer-Encoding: 8bit
|
8
|
+
|
9
|
+
xmlStrlen (entire HTML file): 926171.936981 μs
|
10
|
+
glibc_xmlStrlen (entire HTML file): 36905.903992 μs
|
11
|
+
delta (xmlStrlen ÷ glibc_xmlStrlen): 25.094584 times
|
12
|
+
|
13
|
+
xmlStrlen (average string): 57479.204010 μs
|
14
|
+
glibc_xmlStrlen (average string): 5802.069000 μs
|
15
|
+
delta (xmlStrlen ÷ glibc_xmlStrlen): 9.905937 times
|
16
|
+
|
17
|
+
xmlStrlen (bigger string): 388056.315979 μs
|
18
|
+
glibc_xmlStrlen (bigger string): 12797.856995 μs
|
19
|
+
delta (xmlStrlen ÷ glibc_xmlStrlen): 30.318382 times
|
20
|
+
|
21
|
+
xmlStrlen (smallest string): 15870.046021 μs
|
22
|
+
glibc_xmlStrlen (smallest string): 6282.208984 μs
|
23
|
+
delta (xmlStrlen ÷ glibc_xmlStrlen): 2.527903 times
|
24
|
+
|
25
|
+
See https://gitlab.gnome.org/GNOME/libxml2/-/issues/212 for reference.
|
26
|
+
---
|
27
|
+
xmlstring.c | 9 ++-------
|
28
|
+
1 file changed, 2 insertions(+), 7 deletions(-)
|
29
|
+
|
30
|
+
diff --git a/xmlstring.c b/xmlstring.c
|
31
|
+
index e8a1e45d..df247dff 100644
|
32
|
+
--- a/xmlstring.c
|
33
|
+
+++ b/xmlstring.c
|
34
|
+
@@ -423,14 +423,9 @@ xmlStrsub(const xmlChar *str, int start, int len) {
|
35
|
+
|
36
|
+
int
|
37
|
+
xmlStrlen(const xmlChar *str) {
|
38
|
+
- int len = 0;
|
39
|
+
-
|
40
|
+
if (str == NULL) return(0);
|
41
|
+
- while (*str != 0) { /* non input consuming */
|
42
|
+
- str++;
|
43
|
+
- len++;
|
44
|
+
- }
|
45
|
+
- return(len);
|
46
|
+
+
|
47
|
+
+ return strlen((const char*)str);
|
48
|
+
}
|
49
|
+
|
50
|
+
/**
|
51
|
+
--
|
52
|
+
2.29.2
|
53
|
+
|
metadata
CHANGED
@@ -1,49 +1,83 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokogiri
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.11.0.rc4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
|
-
- Aaron Patterson
|
8
7
|
- Mike Dalessio
|
8
|
+
- Aaron Patterson
|
9
|
+
- John Shahid
|
9
10
|
- Yoko Harada
|
10
|
-
- Tim Elliott
|
11
11
|
- Akinori MUSHA
|
12
|
-
- John Shahid
|
13
12
|
- Lars Kanis
|
13
|
+
- Tim Elliott
|
14
14
|
autorequire:
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
|
-
date: 2020-
|
17
|
+
date: 2020-12-29 00:00:00.000000000 Z
|
18
18
|
dependencies:
|
19
|
+
- !ruby/object:Gem::Dependency
|
20
|
+
name: racc
|
21
|
+
requirement: !ruby/object:Gem::Requirement
|
22
|
+
requirements:
|
23
|
+
- - "~>"
|
24
|
+
- !ruby/object:Gem::Version
|
25
|
+
version: '1.4'
|
26
|
+
type: :runtime
|
27
|
+
prerelease: false
|
28
|
+
version_requirements: !ruby/object:Gem::Requirement
|
29
|
+
requirements:
|
30
|
+
- - "~>"
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '1.4'
|
19
33
|
- !ruby/object:Gem::Dependency
|
20
34
|
name: mini_portile2
|
21
35
|
requirement: !ruby/object:Gem::Requirement
|
22
36
|
requirements:
|
23
37
|
- - "~>"
|
24
38
|
- !ruby/object:Gem::Version
|
25
|
-
version: 2.
|
39
|
+
version: 2.5.0
|
26
40
|
type: :runtime
|
27
41
|
prerelease: false
|
28
42
|
version_requirements: !ruby/object:Gem::Requirement
|
29
43
|
requirements:
|
30
44
|
- - "~>"
|
31
45
|
- !ruby/object:Gem::Version
|
32
|
-
version: 2.
|
46
|
+
version: 2.5.0
|
33
47
|
- !ruby/object:Gem::Dependency
|
34
48
|
name: concourse
|
35
49
|
requirement: !ruby/object:Gem::Requirement
|
36
50
|
requirements:
|
37
51
|
- - "~>"
|
38
52
|
- !ruby/object:Gem::Version
|
39
|
-
version: '0.
|
53
|
+
version: '0.41'
|
40
54
|
type: :development
|
41
55
|
prerelease: false
|
42
56
|
version_requirements: !ruby/object:Gem::Requirement
|
43
57
|
requirements:
|
44
58
|
- - "~>"
|
45
59
|
- !ruby/object:Gem::Version
|
46
|
-
version: '0.
|
60
|
+
version: '0.41'
|
61
|
+
- !ruby/object:Gem::Dependency
|
62
|
+
name: hoe
|
63
|
+
requirement: !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - "~>"
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: '3.22'
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: 3.22.1
|
71
|
+
type: :development
|
72
|
+
prerelease: false
|
73
|
+
version_requirements: !ruby/object:Gem::Requirement
|
74
|
+
requirements:
|
75
|
+
- - "~>"
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '3.22'
|
78
|
+
- - ">="
|
79
|
+
- !ruby/object:Gem::Version
|
80
|
+
version: 3.22.1
|
47
81
|
- !ruby/object:Gem::Dependency
|
48
82
|
name: hoe-bundler
|
49
83
|
requirement: !ruby/object:Gem::Requirement
|
@@ -100,6 +134,20 @@ dependencies:
|
|
100
134
|
- - "~>"
|
101
135
|
- !ruby/object:Gem::Version
|
102
136
|
version: '1.6'
|
137
|
+
- !ruby/object:Gem::Dependency
|
138
|
+
name: hoe-markdown
|
139
|
+
requirement: !ruby/object:Gem::Requirement
|
140
|
+
requirements:
|
141
|
+
- - "~>"
|
142
|
+
- !ruby/object:Gem::Version
|
143
|
+
version: '1.1'
|
144
|
+
type: :development
|
145
|
+
prerelease: false
|
146
|
+
version_requirements: !ruby/object:Gem::Requirement
|
147
|
+
requirements:
|
148
|
+
- - "~>"
|
149
|
+
- !ruby/object:Gem::Version
|
150
|
+
version: '1.1'
|
103
151
|
- !ruby/object:Gem::Dependency
|
104
152
|
name: minitest
|
105
153
|
requirement: !ruby/object:Gem::Requirement
|
@@ -115,61 +163,61 @@ dependencies:
|
|
115
163
|
- !ruby/object:Gem::Version
|
116
164
|
version: '5.8'
|
117
165
|
- !ruby/object:Gem::Dependency
|
118
|
-
name:
|
166
|
+
name: minitest-reporters
|
119
167
|
requirement: !ruby/object:Gem::Requirement
|
120
168
|
requirements:
|
121
169
|
- - "~>"
|
122
170
|
- !ruby/object:Gem::Version
|
123
|
-
version: 1.4
|
171
|
+
version: '1.4'
|
124
172
|
type: :development
|
125
173
|
prerelease: false
|
126
174
|
version_requirements: !ruby/object:Gem::Requirement
|
127
175
|
requirements:
|
128
176
|
- - "~>"
|
129
177
|
- !ruby/object:Gem::Version
|
130
|
-
version: 1.4
|
178
|
+
version: '1.4'
|
131
179
|
- !ruby/object:Gem::Dependency
|
132
180
|
name: rake
|
133
181
|
requirement: !ruby/object:Gem::Requirement
|
134
182
|
requirements:
|
135
183
|
- - "~>"
|
136
184
|
- !ruby/object:Gem::Version
|
137
|
-
version: '
|
185
|
+
version: '13.0'
|
138
186
|
type: :development
|
139
187
|
prerelease: false
|
140
188
|
version_requirements: !ruby/object:Gem::Requirement
|
141
189
|
requirements:
|
142
190
|
- - "~>"
|
143
191
|
- !ruby/object:Gem::Version
|
144
|
-
version: '
|
192
|
+
version: '13.0'
|
145
193
|
- !ruby/object:Gem::Dependency
|
146
194
|
name: rake-compiler
|
147
195
|
requirement: !ruby/object:Gem::Requirement
|
148
196
|
requirements:
|
149
197
|
- - "~>"
|
150
198
|
- !ruby/object:Gem::Version
|
151
|
-
version: 1.1
|
199
|
+
version: '1.1'
|
152
200
|
type: :development
|
153
201
|
prerelease: false
|
154
202
|
version_requirements: !ruby/object:Gem::Requirement
|
155
203
|
requirements:
|
156
204
|
- - "~>"
|
157
205
|
- !ruby/object:Gem::Version
|
158
|
-
version: 1.1
|
206
|
+
version: '1.1'
|
159
207
|
- !ruby/object:Gem::Dependency
|
160
208
|
name: rake-compiler-dock
|
161
209
|
requirement: !ruby/object:Gem::Requirement
|
162
210
|
requirements:
|
163
211
|
- - "~>"
|
164
212
|
- !ruby/object:Gem::Version
|
165
|
-
version:
|
213
|
+
version: '1.1'
|
166
214
|
type: :development
|
167
215
|
prerelease: false
|
168
216
|
version_requirements: !ruby/object:Gem::Requirement
|
169
217
|
requirements:
|
170
218
|
- - "~>"
|
171
219
|
- !ruby/object:Gem::Version
|
172
|
-
version:
|
220
|
+
version: '1.1'
|
173
221
|
- !ruby/object:Gem::Dependency
|
174
222
|
name: rexical
|
175
223
|
requirement: !ruby/object:Gem::Requirement
|
@@ -190,28 +238,28 @@ dependencies:
|
|
190
238
|
requirements:
|
191
239
|
- - "~>"
|
192
240
|
- !ruby/object:Gem::Version
|
193
|
-
version: '0.
|
241
|
+
version: '0.88'
|
194
242
|
type: :development
|
195
243
|
prerelease: false
|
196
244
|
version_requirements: !ruby/object:Gem::Requirement
|
197
245
|
requirements:
|
198
246
|
- - "~>"
|
199
247
|
- !ruby/object:Gem::Version
|
200
|
-
version: '0.
|
248
|
+
version: '0.88'
|
201
249
|
- !ruby/object:Gem::Dependency
|
202
250
|
name: simplecov
|
203
251
|
requirement: !ruby/object:Gem::Requirement
|
204
252
|
requirements:
|
205
253
|
- - "~>"
|
206
254
|
- !ruby/object:Gem::Version
|
207
|
-
version:
|
255
|
+
version: 0.17.0
|
208
256
|
type: :development
|
209
257
|
prerelease: false
|
210
258
|
version_requirements: !ruby/object:Gem::Requirement
|
211
259
|
requirements:
|
212
260
|
- - "~>"
|
213
261
|
- !ruby/object:Gem::Version
|
214
|
-
version:
|
262
|
+
version: 0.17.0
|
215
263
|
- !ruby/object:Gem::Dependency
|
216
264
|
name: rdoc
|
217
265
|
requirement: !ruby/object:Gem::Requirement
|
@@ -232,32 +280,11 @@ dependencies:
|
|
232
280
|
- - "<"
|
233
281
|
- !ruby/object:Gem::Version
|
234
282
|
version: '7'
|
235
|
-
- !ruby/object:Gem::Dependency
|
236
|
-
name: hoe
|
237
|
-
requirement: !ruby/object:Gem::Requirement
|
238
|
-
requirements:
|
239
|
-
- - "~>"
|
240
|
-
- !ruby/object:Gem::Version
|
241
|
-
version: '3.22'
|
242
|
-
type: :development
|
243
|
-
prerelease: false
|
244
|
-
version_requirements: !ruby/object:Gem::Requirement
|
245
|
-
requirements:
|
246
|
-
- - "~>"
|
247
|
-
- !ruby/object:Gem::Version
|
248
|
-
version: '3.22'
|
249
283
|
description: |-
|
250
284
|
Nokogiri (鋸) is an HTML, XML, SAX, and Reader parser. Among
|
251
285
|
Nokogiri's many features is the ability to search documents via XPath
|
252
286
|
or CSS3 selectors.
|
253
|
-
email:
|
254
|
-
- aaronp@rubyforge.org
|
255
|
-
- mike.dalessio@gmail.com
|
256
|
-
- yokolet@gmail.com
|
257
|
-
- tle@holymonkey.com
|
258
|
-
- knu@idaemons.org
|
259
|
-
- jvshahid@gmail.com
|
260
|
-
- lars@greiz-reinsdorf.de
|
287
|
+
email: nokogiri-talk@googlegroups.com
|
261
288
|
executables:
|
262
289
|
- nokogiri
|
263
290
|
extensions:
|
@@ -397,8 +424,11 @@ files:
|
|
397
424
|
- lib/nokogiri/html/sax/parser.rb
|
398
425
|
- lib/nokogiri/html/sax/parser_context.rb
|
399
426
|
- lib/nokogiri/html/sax/push_parser.rb
|
427
|
+
- lib/nokogiri/jruby/dependencies.rb
|
400
428
|
- lib/nokogiri/syntax_error.rb
|
401
429
|
- lib/nokogiri/version.rb
|
430
|
+
- lib/nokogiri/version/constant.rb
|
431
|
+
- lib/nokogiri/version/info.rb
|
402
432
|
- lib/nokogiri/xml.rb
|
403
433
|
- lib/nokogiri/xml/attr.rb
|
404
434
|
- lib/nokogiri/xml/attribute_decl.rb
|
@@ -444,6 +474,9 @@ files:
|
|
444
474
|
- patches/libxml2/0003-Update-entities-to-remove-handling-of-ssi.patch
|
445
475
|
- patches/libxml2/0004-libxml2.la-is-in-top_builddir.patch
|
446
476
|
- patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch
|
477
|
+
- patches/libxml2/0006-htmlParseComment-treat-as-if-it-closed-the-comment.patch
|
478
|
+
- patches/libxml2/0007-use-new-htmlParseLookupCommentEnd-to-find-comment-en.patch
|
479
|
+
- patches/libxml2/0008-use-glibc-strlen.patch
|
447
480
|
- ports/archives/libxml2-2.9.10.tar.gz
|
448
481
|
- ports/archives/libxslt-1.1.34.tar.gz
|
449
482
|
homepage: https://nokogiri.org
|
@@ -465,14 +498,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
465
498
|
requirements:
|
466
499
|
- - ">="
|
467
500
|
- !ruby/object:Gem::Version
|
468
|
-
version: 2.
|
501
|
+
version: 2.5.0
|
469
502
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
470
503
|
requirements:
|
471
|
-
- - "
|
504
|
+
- - ">"
|
472
505
|
- !ruby/object:Gem::Version
|
473
|
-
version:
|
506
|
+
version: 1.3.1
|
474
507
|
requirements: []
|
475
|
-
rubygems_version: 3.1.
|
508
|
+
rubygems_version: 3.1.4
|
476
509
|
signing_key:
|
477
510
|
specification_version: 4
|
478
511
|
summary: Nokogiri (鋸) is an HTML, XML, SAX, and Reader parser
|