nokogiri 1.10.9 → 1.11.0.rc4

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (89) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE-DEPENDENCIES.md +1015 -947
  3. data/README.md +24 -22
  4. data/ext/nokogiri/depend +476 -357
  5. data/ext/nokogiri/extconf.rb +485 -352
  6. data/ext/nokogiri/html_document.c +79 -78
  7. data/ext/nokogiri/html_sax_parser_context.c +2 -2
  8. data/ext/nokogiri/nokogiri.c +34 -40
  9. data/ext/nokogiri/nokogiri.h +26 -17
  10. data/ext/nokogiri/xml_document.c +18 -4
  11. data/ext/nokogiri/xml_io.c +8 -6
  12. data/ext/nokogiri/xml_node.c +21 -1
  13. data/ext/nokogiri/xml_node_set.c +1 -1
  14. data/ext/nokogiri/xml_reader.c +6 -17
  15. data/ext/nokogiri/xml_relax_ng.c +29 -11
  16. data/ext/nokogiri/xml_sax_parser.c +2 -7
  17. data/ext/nokogiri/xml_sax_parser_context.c +2 -2
  18. data/ext/nokogiri/xml_schema.c +55 -13
  19. data/ext/nokogiri/xml_xpath_context.c +80 -4
  20. data/ext/nokogiri/xslt_stylesheet.c +1 -8
  21. data/lib/nokogiri.rb +4 -21
  22. data/lib/nokogiri/css.rb +1 -0
  23. data/lib/nokogiri/css/node.rb +1 -0
  24. data/lib/nokogiri/css/parser.rb +63 -62
  25. data/lib/nokogiri/css/parser.y +2 -2
  26. data/lib/nokogiri/css/parser_extras.rb +39 -36
  27. data/lib/nokogiri/css/syntax_error.rb +1 -0
  28. data/lib/nokogiri/css/tokenizer.rb +1 -0
  29. data/lib/nokogiri/css/xpath_visitor.rb +73 -43
  30. data/lib/nokogiri/decorators/slop.rb +1 -0
  31. data/lib/nokogiri/html.rb +1 -0
  32. data/lib/nokogiri/html/builder.rb +1 -0
  33. data/lib/nokogiri/html/document.rb +13 -26
  34. data/lib/nokogiri/html/document_fragment.rb +1 -0
  35. data/lib/nokogiri/html/element_description.rb +1 -0
  36. data/lib/nokogiri/html/element_description_defaults.rb +1 -0
  37. data/lib/nokogiri/html/entity_lookup.rb +1 -0
  38. data/lib/nokogiri/html/sax/parser.rb +1 -0
  39. data/lib/nokogiri/html/sax/parser_context.rb +1 -0
  40. data/lib/nokogiri/html/sax/push_parser.rb +1 -0
  41. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  42. data/lib/nokogiri/syntax_error.rb +1 -0
  43. data/lib/nokogiri/version.rb +3 -109
  44. data/lib/nokogiri/version/constant.rb +5 -0
  45. data/lib/nokogiri/version/info.rb +182 -0
  46. data/lib/nokogiri/xml.rb +1 -0
  47. data/lib/nokogiri/xml/attr.rb +1 -0
  48. data/lib/nokogiri/xml/attribute_decl.rb +1 -0
  49. data/lib/nokogiri/xml/builder.rb +3 -2
  50. data/lib/nokogiri/xml/cdata.rb +1 -0
  51. data/lib/nokogiri/xml/character_data.rb +1 -0
  52. data/lib/nokogiri/xml/document.rb +20 -15
  53. data/lib/nokogiri/xml/document_fragment.rb +5 -6
  54. data/lib/nokogiri/xml/dtd.rb +1 -0
  55. data/lib/nokogiri/xml/element_content.rb +1 -0
  56. data/lib/nokogiri/xml/element_decl.rb +1 -0
  57. data/lib/nokogiri/xml/entity_decl.rb +1 -0
  58. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  59. data/lib/nokogiri/xml/namespace.rb +1 -0
  60. data/lib/nokogiri/xml/node.rb +587 -249
  61. data/lib/nokogiri/xml/node/save_options.rb +1 -0
  62. data/lib/nokogiri/xml/node_set.rb +1 -0
  63. data/lib/nokogiri/xml/notation.rb +1 -0
  64. data/lib/nokogiri/xml/parse_options.rb +10 -3
  65. data/lib/nokogiri/xml/pp.rb +1 -0
  66. data/lib/nokogiri/xml/pp/character_data.rb +1 -0
  67. data/lib/nokogiri/xml/pp/node.rb +1 -0
  68. data/lib/nokogiri/xml/processing_instruction.rb +1 -0
  69. data/lib/nokogiri/xml/reader.rb +7 -3
  70. data/lib/nokogiri/xml/relax_ng.rb +7 -2
  71. data/lib/nokogiri/xml/sax.rb +1 -0
  72. data/lib/nokogiri/xml/sax/document.rb +1 -0
  73. data/lib/nokogiri/xml/sax/parser.rb +1 -0
  74. data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
  75. data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
  76. data/lib/nokogiri/xml/schema.rb +13 -4
  77. data/lib/nokogiri/xml/searchable.rb +25 -16
  78. data/lib/nokogiri/xml/syntax_error.rb +1 -0
  79. data/lib/nokogiri/xml/text.rb +1 -0
  80. data/lib/nokogiri/xml/xpath.rb +1 -0
  81. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -0
  82. data/lib/nokogiri/xml/xpath_context.rb +1 -0
  83. data/lib/nokogiri/xslt.rb +1 -0
  84. data/lib/nokogiri/xslt/stylesheet.rb +1 -0
  85. data/lib/xsd/xmlparser/nokogiri.rb +1 -0
  86. data/patches/libxml2/0006-htmlParseComment-treat-as-if-it-closed-the-comment.patch +73 -0
  87. data/patches/libxml2/0007-use-new-htmlParseLookupCommentEnd-to-find-comment-en.patch +103 -0
  88. data/patches/libxml2/0008-use-glibc-strlen.patch +53 -0
  89. metadata +81 -48
@@ -0,0 +1,53 @@
1
+ From c94172d2a4451368530db2186190d70be8a1d9e5 Mon Sep 17 00:00:00 2001
2
+ From: Ilya Zub <ilya@serpapi.com>
3
+ Date: Wed, 23 Dec 2020 12:45:29 +0200
4
+ Subject: Use glibc strlen to speed up xmlStrlen
5
+ MIME-Version: 1.0
6
+ Content-Type: text/plain; charset=UTF-8
7
+ Content-Transfer-Encoding: 8bit
8
+
9
+ xmlStrlen (entire HTML file): 926171.936981 μs
10
+ glibc_xmlStrlen (entire HTML file): 36905.903992 μs
11
+ delta (xmlStrlen ÷ glibc_xmlStrlen): 25.094584 times
12
+
13
+ xmlStrlen (average string): 57479.204010 μs
14
+ glibc_xmlStrlen (average string): 5802.069000 μs
15
+ delta (xmlStrlen ÷ glibc_xmlStrlen): 9.905937 times
16
+
17
+ xmlStrlen (bigger string): 388056.315979 μs
18
+ glibc_xmlStrlen (bigger string): 12797.856995 μs
19
+ delta (xmlStrlen ÷ glibc_xmlStrlen): 30.318382 times
20
+
21
+ xmlStrlen (smallest string): 15870.046021 μs
22
+ glibc_xmlStrlen (smallest string): 6282.208984 μs
23
+ delta (xmlStrlen ÷ glibc_xmlStrlen): 2.527903 times
24
+
25
+ See https://gitlab.gnome.org/GNOME/libxml2/-/issues/212 for reference.
26
+ ---
27
+ xmlstring.c | 9 ++-------
28
+ 1 file changed, 2 insertions(+), 7 deletions(-)
29
+
30
+ diff --git a/xmlstring.c b/xmlstring.c
31
+ index e8a1e45d..df247dff 100644
32
+ --- a/xmlstring.c
33
+ +++ b/xmlstring.c
34
+ @@ -423,14 +423,9 @@ xmlStrsub(const xmlChar *str, int start, int len) {
35
+
36
+ int
37
+ xmlStrlen(const xmlChar *str) {
38
+ - int len = 0;
39
+ -
40
+ if (str == NULL) return(0);
41
+ - while (*str != 0) { /* non input consuming */
42
+ - str++;
43
+ - len++;
44
+ - }
45
+ - return(len);
46
+ +
47
+ + return strlen((const char*)str);
48
+ }
49
+
50
+ /**
51
+ --
52
+ 2.29.2
53
+
metadata CHANGED
@@ -1,49 +1,83 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nokogiri
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.10.9
4
+ version: 1.11.0.rc4
5
5
  platform: ruby
6
6
  authors:
7
- - Aaron Patterson
8
7
  - Mike Dalessio
8
+ - Aaron Patterson
9
+ - John Shahid
9
10
  - Yoko Harada
10
- - Tim Elliott
11
11
  - Akinori MUSHA
12
- - John Shahid
13
12
  - Lars Kanis
13
+ - Tim Elliott
14
14
  autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
- date: 2020-03-01 00:00:00.000000000 Z
17
+ date: 2020-12-29 00:00:00.000000000 Z
18
18
  dependencies:
19
+ - !ruby/object:Gem::Dependency
20
+ name: racc
21
+ requirement: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - "~>"
24
+ - !ruby/object:Gem::Version
25
+ version: '1.4'
26
+ type: :runtime
27
+ prerelease: false
28
+ version_requirements: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: '1.4'
19
33
  - !ruby/object:Gem::Dependency
20
34
  name: mini_portile2
21
35
  requirement: !ruby/object:Gem::Requirement
22
36
  requirements:
23
37
  - - "~>"
24
38
  - !ruby/object:Gem::Version
25
- version: 2.4.0
39
+ version: 2.5.0
26
40
  type: :runtime
27
41
  prerelease: false
28
42
  version_requirements: !ruby/object:Gem::Requirement
29
43
  requirements:
30
44
  - - "~>"
31
45
  - !ruby/object:Gem::Version
32
- version: 2.4.0
46
+ version: 2.5.0
33
47
  - !ruby/object:Gem::Dependency
34
48
  name: concourse
35
49
  requirement: !ruby/object:Gem::Requirement
36
50
  requirements:
37
51
  - - "~>"
38
52
  - !ruby/object:Gem::Version
39
- version: '0.24'
53
+ version: '0.41'
40
54
  type: :development
41
55
  prerelease: false
42
56
  version_requirements: !ruby/object:Gem::Requirement
43
57
  requirements:
44
58
  - - "~>"
45
59
  - !ruby/object:Gem::Version
46
- version: '0.24'
60
+ version: '0.41'
61
+ - !ruby/object:Gem::Dependency
62
+ name: hoe
63
+ requirement: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: '3.22'
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: 3.22.1
71
+ type: :development
72
+ prerelease: false
73
+ version_requirements: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - "~>"
76
+ - !ruby/object:Gem::Version
77
+ version: '3.22'
78
+ - - ">="
79
+ - !ruby/object:Gem::Version
80
+ version: 3.22.1
47
81
  - !ruby/object:Gem::Dependency
48
82
  name: hoe-bundler
49
83
  requirement: !ruby/object:Gem::Requirement
@@ -100,6 +134,20 @@ dependencies:
100
134
  - - "~>"
101
135
  - !ruby/object:Gem::Version
102
136
  version: '1.6'
137
+ - !ruby/object:Gem::Dependency
138
+ name: hoe-markdown
139
+ requirement: !ruby/object:Gem::Requirement
140
+ requirements:
141
+ - - "~>"
142
+ - !ruby/object:Gem::Version
143
+ version: '1.1'
144
+ type: :development
145
+ prerelease: false
146
+ version_requirements: !ruby/object:Gem::Requirement
147
+ requirements:
148
+ - - "~>"
149
+ - !ruby/object:Gem::Version
150
+ version: '1.1'
103
151
  - !ruby/object:Gem::Dependency
104
152
  name: minitest
105
153
  requirement: !ruby/object:Gem::Requirement
@@ -115,61 +163,61 @@ dependencies:
115
163
  - !ruby/object:Gem::Version
116
164
  version: '5.8'
117
165
  - !ruby/object:Gem::Dependency
118
- name: racc
166
+ name: minitest-reporters
119
167
  requirement: !ruby/object:Gem::Requirement
120
168
  requirements:
121
169
  - - "~>"
122
170
  - !ruby/object:Gem::Version
123
- version: 1.4.14
171
+ version: '1.4'
124
172
  type: :development
125
173
  prerelease: false
126
174
  version_requirements: !ruby/object:Gem::Requirement
127
175
  requirements:
128
176
  - - "~>"
129
177
  - !ruby/object:Gem::Version
130
- version: 1.4.14
178
+ version: '1.4'
131
179
  - !ruby/object:Gem::Dependency
132
180
  name: rake
133
181
  requirement: !ruby/object:Gem::Requirement
134
182
  requirements:
135
183
  - - "~>"
136
184
  - !ruby/object:Gem::Version
137
- version: '12.0'
185
+ version: '13.0'
138
186
  type: :development
139
187
  prerelease: false
140
188
  version_requirements: !ruby/object:Gem::Requirement
141
189
  requirements:
142
190
  - - "~>"
143
191
  - !ruby/object:Gem::Version
144
- version: '12.0'
192
+ version: '13.0'
145
193
  - !ruby/object:Gem::Dependency
146
194
  name: rake-compiler
147
195
  requirement: !ruby/object:Gem::Requirement
148
196
  requirements:
149
197
  - - "~>"
150
198
  - !ruby/object:Gem::Version
151
- version: 1.1.0
199
+ version: '1.1'
152
200
  type: :development
153
201
  prerelease: false
154
202
  version_requirements: !ruby/object:Gem::Requirement
155
203
  requirements:
156
204
  - - "~>"
157
205
  - !ruby/object:Gem::Version
158
- version: 1.1.0
206
+ version: '1.1'
159
207
  - !ruby/object:Gem::Dependency
160
208
  name: rake-compiler-dock
161
209
  requirement: !ruby/object:Gem::Requirement
162
210
  requirements:
163
211
  - - "~>"
164
212
  - !ruby/object:Gem::Version
165
- version: 0.7.0
213
+ version: '1.1'
166
214
  type: :development
167
215
  prerelease: false
168
216
  version_requirements: !ruby/object:Gem::Requirement
169
217
  requirements:
170
218
  - - "~>"
171
219
  - !ruby/object:Gem::Version
172
- version: 0.7.0
220
+ version: '1.1'
173
221
  - !ruby/object:Gem::Dependency
174
222
  name: rexical
175
223
  requirement: !ruby/object:Gem::Requirement
@@ -190,28 +238,28 @@ dependencies:
190
238
  requirements:
191
239
  - - "~>"
192
240
  - !ruby/object:Gem::Version
193
- version: '0.73'
241
+ version: '0.88'
194
242
  type: :development
195
243
  prerelease: false
196
244
  version_requirements: !ruby/object:Gem::Requirement
197
245
  requirements:
198
246
  - - "~>"
199
247
  - !ruby/object:Gem::Version
200
- version: '0.73'
248
+ version: '0.88'
201
249
  - !ruby/object:Gem::Dependency
202
250
  name: simplecov
203
251
  requirement: !ruby/object:Gem::Requirement
204
252
  requirements:
205
253
  - - "~>"
206
254
  - !ruby/object:Gem::Version
207
- version: '0.16'
255
+ version: 0.17.0
208
256
  type: :development
209
257
  prerelease: false
210
258
  version_requirements: !ruby/object:Gem::Requirement
211
259
  requirements:
212
260
  - - "~>"
213
261
  - !ruby/object:Gem::Version
214
- version: '0.16'
262
+ version: 0.17.0
215
263
  - !ruby/object:Gem::Dependency
216
264
  name: rdoc
217
265
  requirement: !ruby/object:Gem::Requirement
@@ -232,32 +280,11 @@ dependencies:
232
280
  - - "<"
233
281
  - !ruby/object:Gem::Version
234
282
  version: '7'
235
- - !ruby/object:Gem::Dependency
236
- name: hoe
237
- requirement: !ruby/object:Gem::Requirement
238
- requirements:
239
- - - "~>"
240
- - !ruby/object:Gem::Version
241
- version: '3.22'
242
- type: :development
243
- prerelease: false
244
- version_requirements: !ruby/object:Gem::Requirement
245
- requirements:
246
- - - "~>"
247
- - !ruby/object:Gem::Version
248
- version: '3.22'
249
283
  description: |-
250
284
  Nokogiri (鋸) is an HTML, XML, SAX, and Reader parser. Among
251
285
  Nokogiri's many features is the ability to search documents via XPath
252
286
  or CSS3 selectors.
253
- email:
254
- - aaronp@rubyforge.org
255
- - mike.dalessio@gmail.com
256
- - yokolet@gmail.com
257
- - tle@holymonkey.com
258
- - knu@idaemons.org
259
- - jvshahid@gmail.com
260
- - lars@greiz-reinsdorf.de
287
+ email: nokogiri-talk@googlegroups.com
261
288
  executables:
262
289
  - nokogiri
263
290
  extensions:
@@ -397,8 +424,11 @@ files:
397
424
  - lib/nokogiri/html/sax/parser.rb
398
425
  - lib/nokogiri/html/sax/parser_context.rb
399
426
  - lib/nokogiri/html/sax/push_parser.rb
427
+ - lib/nokogiri/jruby/dependencies.rb
400
428
  - lib/nokogiri/syntax_error.rb
401
429
  - lib/nokogiri/version.rb
430
+ - lib/nokogiri/version/constant.rb
431
+ - lib/nokogiri/version/info.rb
402
432
  - lib/nokogiri/xml.rb
403
433
  - lib/nokogiri/xml/attr.rb
404
434
  - lib/nokogiri/xml/attribute_decl.rb
@@ -444,6 +474,9 @@ files:
444
474
  - patches/libxml2/0003-Update-entities-to-remove-handling-of-ssi.patch
445
475
  - patches/libxml2/0004-libxml2.la-is-in-top_builddir.patch
446
476
  - patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch
477
+ - patches/libxml2/0006-htmlParseComment-treat-as-if-it-closed-the-comment.patch
478
+ - patches/libxml2/0007-use-new-htmlParseLookupCommentEnd-to-find-comment-en.patch
479
+ - patches/libxml2/0008-use-glibc-strlen.patch
447
480
  - ports/archives/libxml2-2.9.10.tar.gz
448
481
  - ports/archives/libxslt-1.1.34.tar.gz
449
482
  homepage: https://nokogiri.org
@@ -465,14 +498,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
465
498
  requirements:
466
499
  - - ">="
467
500
  - !ruby/object:Gem::Version
468
- version: 2.3.0
501
+ version: 2.5.0
469
502
  required_rubygems_version: !ruby/object:Gem::Requirement
470
503
  requirements:
471
- - - ">="
504
+ - - ">"
472
505
  - !ruby/object:Gem::Version
473
- version: '0'
506
+ version: 1.3.1
474
507
  requirements: []
475
- rubygems_version: 3.1.2
508
+ rubygems_version: 3.1.4
476
509
  signing_key:
477
510
  specification_version: 4
478
511
  summary: Nokogiri (鋸) is an HTML, XML, SAX, and Reader parser