nokogiri 1.6.1 → 1.6.2.rc1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (93) hide show
  1. checksums.yaml +7 -7
  2. data/.editorconfig +17 -0
  3. data/.travis.yml +4 -6
  4. data/CHANGELOG.ja.rdoc +37 -8
  5. data/CHANGELOG.rdoc +48 -3
  6. data/Gemfile +3 -3
  7. data/Manifest.txt +57 -1
  8. data/README.ja.rdoc +22 -16
  9. data/README.rdoc +24 -19
  10. data/ROADMAP.md +1 -2
  11. data/Rakefile +161 -58
  12. data/build_all +56 -31
  13. data/dependencies.yml +3 -3
  14. data/ext/nokogiri/extconf.rb +379 -121
  15. data/ext/nokogiri/html_document.c +2 -2
  16. data/ext/nokogiri/nokogiri.c +6 -1
  17. data/ext/nokogiri/xml_document.c +5 -4
  18. data/ext/nokogiri/xml_node.c +11 -4
  19. data/ext/nokogiri/xml_reader.c +1 -1
  20. data/ext/nokogiri/xml_sax_parser_context.c +40 -0
  21. data/ext/nokogiri/xml_syntax_error.c +10 -5
  22. data/ext/nokogiri/xml_syntax_error.h +1 -1
  23. data/ext/nokogiri/xml_xpath_context.c +2 -14
  24. data/ext/nokogiri/xslt_stylesheet.c +1 -1
  25. data/lib/nokogiri.rb +31 -22
  26. data/lib/nokogiri/css/node.rb +0 -50
  27. data/lib/nokogiri/css/parser.rb +213 -218
  28. data/lib/nokogiri/css/parser.y +21 -30
  29. data/lib/nokogiri/css/xpath_visitor.rb +62 -14
  30. data/lib/nokogiri/html/document.rb +97 -18
  31. data/lib/nokogiri/html/sax/parser.rb +2 -2
  32. data/lib/nokogiri/version.rb +1 -1
  33. data/lib/nokogiri/xml/builder.rb +1 -1
  34. data/lib/nokogiri/xml/document.rb +2 -2
  35. data/lib/nokogiri/xml/dtd.rb +10 -0
  36. data/lib/nokogiri/xml/node.rb +26 -1
  37. data/lib/nokogiri/xml/sax/parser.rb +1 -1
  38. data/ports/archives/libxslt-1.1.28.tar.gz +0 -0
  39. data/ports/patches/libxml2/0001-Fix-parser-local-buffers-size-problems.patch +265 -0
  40. data/ports/patches/libxml2/0002-Fix-entities-local-buffers-size-problems.patch +102 -0
  41. data/ports/patches/libxml2/0003-Fix-an-error-in-previous-commit.patch +26 -0
  42. data/ports/patches/libxml2/0004-Fix-potential-out-of-bound-access.patch +26 -0
  43. data/ports/patches/libxml2/0005-Detect-excessive-entities-expansion-upon-replacement.patch +158 -0
  44. data/ports/patches/libxml2/0006-Do-not-fetch-external-parsed-entities.patch +78 -0
  45. data/ports/patches/libxml2/0007-Enforce-XML_PARSER_EOF-state-handling-through-the-pa.patch +480 -0
  46. data/ports/patches/libxml2/0008-Improve-handling-of-xmlStopParser.patch +315 -0
  47. data/ports/patches/libxml2/0009-Fix-a-couple-of-return-without-value.patch +37 -0
  48. data/ports/patches/libxslt/0001-Adding-doc-update-related-to-1.1.28.patch +222 -0
  49. data/ports/patches/libxslt/0002-Fix-a-couple-of-places-where-f-printf-parameters-wer.patch +53 -0
  50. data/ports/patches/libxslt/0003-Initialize-pseudo-random-number-generator-with-curre.patch +60 -0
  51. data/ports/patches/libxslt/0004-EXSLT-function-str-replace-is-broken-as-is.patch +42 -0
  52. data/ports/patches/libxslt/0006-Fix-str-padding-to-work-with-UTF-8-strings.patch +164 -0
  53. data/ports/patches/libxslt/0007-Separate-function-for-predicate-matching-in-patterns.patch +587 -0
  54. data/ports/patches/libxslt/0008-Fix-direct-pattern-matching.patch +80 -0
  55. data/ports/patches/libxslt/0009-Fix-certain-patterns-with-predicates.patch +185 -0
  56. data/ports/patches/libxslt/0010-Fix-handling-of-UTF-8-strings-in-EXSLT-crypto-module.patch +126 -0
  57. data/ports/patches/libxslt/0013-Memory-leak-in-xsltCompileIdKeyPattern-error-path.patch +25 -0
  58. data/ports/patches/libxslt/0014-Fix-for-bug-436589.patch +43 -0
  59. data/ports/patches/libxslt/0015-Fix-mkdir-for-mingw.patch +41 -0
  60. data/suppressions/README.txt +1 -0
  61. data/suppressions/nokogiri_ree-1.8.7.358.supp +61 -0
  62. data/suppressions/nokogiri_ruby-1.8.7.370.supp +0 -0
  63. data/suppressions/nokogiri_ruby-1.9.2.320.supp +28 -0
  64. data/suppressions/nokogiri_ruby-1.9.3.327.supp +28 -0
  65. data/test/css/test_nthiness.rb +65 -2
  66. data/test/css/test_parser.rb +27 -10
  67. data/test/css/test_tokenizer.rb +1 -1
  68. data/test/css/test_xpath_visitor.rb +6 -1
  69. data/test/files/atom.xml +344 -0
  70. data/test/files/shift_jis_no_charset.html +9 -0
  71. data/test/helper.rb +10 -0
  72. data/test/html/test_document.rb +74 -7
  73. data/test/html/test_document_encoding.rb +10 -0
  74. data/test/html/test_document_fragment.rb +3 -3
  75. data/test/namespaces/test_namespaces_in_cloned_doc.rb +31 -0
  76. data/test/test_nokogiri.rb +6 -0
  77. data/test/test_reader.rb +7 -4
  78. data/test/test_xslt_transforms.rb +25 -0
  79. data/test/xml/sax/test_parser.rb +16 -0
  80. data/test/xml/sax/test_parser_context.rb +9 -0
  81. data/test/xml/test_builder.rb +9 -0
  82. data/test/xml/test_c14n.rb +12 -2
  83. data/test/xml/test_document.rb +66 -0
  84. data/test/xml/test_document_fragment.rb +5 -0
  85. data/test/xml/test_dtd.rb +84 -0
  86. data/test/xml/test_entity_reference.rb +3 -3
  87. data/test/xml/test_node.rb +21 -3
  88. data/test/xml/test_node_attributes.rb +17 -0
  89. data/test/xml/test_schema.rb +26 -0
  90. data/test/xml/test_xpath.rb +81 -0
  91. metadata +254 -174
  92. data/ports/archives/libxslt-1.1.26.tar.gz +0 -0
  93. data/tasks/cross_compile.rb +0 -134
@@ -0,0 +1,26 @@
1
+ From c8385ccac9e9723a1f87da1c29da56d97df4af85 Mon Sep 17 00:00:00 2001
2
+ From: Daniel Veillard <veillard@redhat.com>
3
+ Date: Mon, 29 Oct 2012 10:39:55 +0800
4
+ Subject: [PATCH 4/9] Fix potential out of bound access
5
+
6
+ [Origin: 6a36fbe3b3e001a8a840b5c1fdd81cefc9947f0d]
7
+ ---
8
+ parser.c | 2 +-
9
+ 1 file changed, 1 insertion(+), 1 deletion(-)
10
+
11
+ diff --git a/parser.c b/parser.c
12
+ index 9863275..e1b0364 100644
13
+ --- a/parser.c
14
+ +++ b/parser.c
15
+ @@ -3932,7 +3932,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
16
+ c = CUR_CHAR(l);
17
+ }
18
+ if ((in_space) && (normalize)) {
19
+ - while (buf[len - 1] == 0x20) len--;
20
+ + while ((len > 0) && (buf[len - 1] == 0x20)) len--;
21
+ }
22
+ buf[len] = 0;
23
+ if (RAW == '<') {
24
+ --
25
+ 1.8.4.1
26
+
@@ -0,0 +1,158 @@
1
+ From e8b2f1774cfffce792f38eb1116ea1104758cfc5 Mon Sep 17 00:00:00 2001
2
+ From: Daniel Veillard <veillard@redhat.com>
3
+ Date: Tue, 19 Feb 2013 10:21:49 +0800
4
+ Subject: [PATCH 5/9] Detect excessive entities expansion upon replacement
5
+
6
+ If entities expansion in the XML parser is asked for,
7
+ it is possble to craft relatively small input document leading
8
+ to excessive on-the-fly content generation.
9
+ This patch accounts for those replacement and stop parsing
10
+ after a given threshold. it can be bypassed as usual with the
11
+ HUGE parser option.
12
+
13
+ [Origin: 23f05e0c33987d6605387b300c4be5da2120a7ab]
14
+ ---
15
+ include/libxml/parser.h | 1 +
16
+ parser.c | 44 ++++++++++++++++++++++++++++++++++++++------
17
+ parserInternals.c | 2 ++
18
+ 3 files changed, 41 insertions(+), 6 deletions(-)
19
+
20
+ diff --git a/include/libxml/parser.h b/include/libxml/parser.h
21
+ index 04edb9d..5b36584 100644
22
+ --- a/include/libxml/parser.h
23
+ +++ b/include/libxml/parser.h
24
+ @@ -310,6 +310,7 @@ struct _xmlParserCtxt {
25
+ xmlParserNodeInfo *nodeInfoTab; /* array of nodeInfos */
26
+
27
+ int input_id; /* we need to label inputs */
28
+ + unsigned long sizeentcopy; /* volume of entity copy */
29
+ };
30
+
31
+ /**
32
+ diff --git a/parser.c b/parser.c
33
+ index e1b0364..b206f05 100644
34
+ --- a/parser.c
35
+ +++ b/parser.c
36
+ @@ -119,7 +119,7 @@ xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
37
+ */
38
+ static int
39
+ xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
40
+ - xmlEntityPtr ent)
41
+ + xmlEntityPtr ent, size_t replacement)
42
+ {
43
+ size_t consumed = 0;
44
+
45
+ @@ -127,7 +127,24 @@ xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
46
+ return (0);
47
+ if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
48
+ return (1);
49
+ - if (size != 0) {
50
+ + if (replacement != 0) {
51
+ + if (replacement < XML_MAX_TEXT_LENGTH)
52
+ + return(0);
53
+ +
54
+ + /*
55
+ + * If the volume of entity copy reaches 10 times the
56
+ + * amount of parsed data and over the large text threshold
57
+ + * then that's very likely to be an abuse.
58
+ + */
59
+ + if (ctxt->input != NULL) {
60
+ + consumed = ctxt->input->consumed +
61
+ + (ctxt->input->cur - ctxt->input->base);
62
+ + }
63
+ + consumed += ctxt->sizeentities;
64
+ +
65
+ + if (replacement < XML_PARSER_NON_LINEAR * consumed)
66
+ + return(0);
67
+ + } else if (size != 0) {
68
+ /*
69
+ * Do the check based on the replacement size of the entity
70
+ */
71
+ @@ -173,7 +190,6 @@ xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
72
+ */
73
+ return (0);
74
+ }
75
+ -
76
+ xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
77
+ return (1);
78
+ }
79
+ @@ -2706,7 +2722,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
80
+ while (*current != 0) { /* non input consuming loop */
81
+ buffer[nbchars++] = *current++;
82
+ if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
83
+ - if (xmlParserEntityCheck(ctxt, nbchars, ent))
84
+ + if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
85
+ goto int_error;
86
+ growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
87
+ }
88
+ @@ -2748,7 +2764,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
89
+ while (*current != 0) { /* non input consuming loop */
90
+ buffer[nbchars++] = *current++;
91
+ if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
92
+ - if (xmlParserEntityCheck(ctxt, nbchars, ent))
93
+ + if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
94
+ goto int_error;
95
+ growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
96
+ }
97
+ @@ -6976,7 +6992,7 @@ xmlParseReference(xmlParserCtxtPtr ctxt) {
98
+ xmlFreeNodeList(list);
99
+ return;
100
+ }
101
+ - if (xmlParserEntityCheck(ctxt, 0, ent)) {
102
+ + if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
103
+ xmlFreeNodeList(list);
104
+ return;
105
+ }
106
+ @@ -7136,6 +7152,13 @@ xmlParseReference(xmlParserCtxtPtr ctxt) {
107
+ xmlNodePtr nw = NULL, cur, firstChild = NULL;
108
+
109
+ /*
110
+ + * We are copying here, make sure there is no abuse
111
+ + */
112
+ + ctxt->sizeentcopy += ent->length;
113
+ + if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
114
+ + return;
115
+ +
116
+ + /*
117
+ * when operating on a reader, the entities definitions
118
+ * are always owning the entities subtree.
119
+ if (ctxt->parseMode == XML_PARSE_READER)
120
+ @@ -7175,6 +7198,14 @@ xmlParseReference(xmlParserCtxtPtr ctxt) {
121
+ } else if (list == NULL) {
122
+ xmlNodePtr nw = NULL, cur, next, last,
123
+ firstChild = NULL;
124
+ +
125
+ + /*
126
+ + * We are copying here, make sure there is no abuse
127
+ + */
128
+ + ctxt->sizeentcopy += ent->length;
129
+ + if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
130
+ + return;
131
+ +
132
+ /*
133
+ * Copy the entity child list and make it the new
134
+ * entity child list. The goal is to make sure any
135
+ @@ -14355,6 +14386,7 @@ xmlCtxtReset(xmlParserCtxtPtr ctxt)
136
+ ctxt->catalogs = NULL;
137
+ ctxt->nbentities = 0;
138
+ ctxt->sizeentities = 0;
139
+ + ctxt->sizeentcopy = 0;
140
+ xmlInitNodeInfoSeq(&ctxt->node_seq);
141
+
142
+ if (ctxt->attsDefault != NULL) {
143
+ diff --git a/parserInternals.c b/parserInternals.c
144
+ index 746b7fd..d7e320c 100644
145
+ --- a/parserInternals.c
146
+ +++ b/parserInternals.c
147
+ @@ -1761,6 +1761,8 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
148
+ ctxt->charset = XML_CHAR_ENCODING_UTF8;
149
+ ctxt->catalogs = NULL;
150
+ ctxt->nbentities = 0;
151
+ + ctxt->sizeentities = 0;
152
+ + ctxt->sizeentcopy = 0;
153
+ ctxt->input_id = 1;
154
+ xmlInitNodeInfoSeq(&ctxt->node_seq);
155
+ return(0);
156
+ --
157
+ 1.8.4.1
158
+
@@ -0,0 +1,78 @@
1
+ From 8c222b89f6f68157283498d1bc6e0cce568c977b Mon Sep 17 00:00:00 2001
2
+ From: Daniel Veillard <veillard@redhat.com>
3
+ Date: Mon, 23 Jul 2012 14:15:40 +0800
4
+ Subject: [PATCH 6/9] Do not fetch external parsed entities
5
+
6
+ Unless explicietely asked for when validating or replacing entities
7
+ with their value. Problem pointed out by Tom Lane <tgl@redhat.com>
8
+
9
+ * parser.c: do not load external parsed entities unless needed
10
+ * test/errors/extparsedent.xml result/errors/extparsedent.xml*:
11
+ add a regression test to avoid change of the behaviour in the future
12
+
13
+ [Origin: 4629ee02ac649c27f9c0cf98ba017c6b5526070f]
14
+ ---
15
+ parser.c | 11 +++++++++--
16
+ result/errors/extparsedent.xml | 5 +++++
17
+ result/errors/extparsedent.xml.err | 0
18
+ result/errors/extparsedent.xml.str | 0
19
+ test/errors/extparsedent.xml | 5 +++++
20
+ 5 files changed, 19 insertions(+), 2 deletions(-)
21
+ create mode 100644 result/errors/extparsedent.xml
22
+ create mode 100644 result/errors/extparsedent.xml.err
23
+ create mode 100644 result/errors/extparsedent.xml.str
24
+ create mode 100644 test/errors/extparsedent.xml
25
+
26
+ diff --git a/parser.c b/parser.c
27
+ index b206f05..8fb16af 100644
28
+ --- a/parser.c
29
+ +++ b/parser.c
30
+ @@ -6943,8 +6943,15 @@ xmlParseReference(xmlParserCtxtPtr ctxt) {
31
+ * The first reference to the entity trigger a parsing phase
32
+ * where the ent->children is filled with the result from
33
+ * the parsing.
34
+ - */
35
+ - if (ent->checked == 0) {
36
+ + * Note: external parsed entities will not be loaded, it is not
37
+ + * required for a non-validating parser, unless the parsing option
38
+ + * of validating, or substituting entities were given. Doing so is
39
+ + * far more secure as the parser will only process data coming from
40
+ + * the document entity by default.
41
+ + */
42
+ + if ((ent->checked == 0) &&
43
+ + ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
44
+ + (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
45
+ unsigned long oldnbent = ctxt->nbentities;
46
+
47
+ /*
48
+ diff --git a/result/errors/extparsedent.xml b/result/errors/extparsedent.xml
49
+ new file mode 100644
50
+ index 0000000..07e4c54
51
+ --- /dev/null
52
+ +++ b/result/errors/extparsedent.xml
53
+ @@ -0,0 +1,5 @@
54
+ +<?xml version="1.0"?>
55
+ +<!DOCTYPE foo [
56
+ +<!ENTITY c PUBLIC "bar" "/etc/doesnotexist">
57
+ +]>
58
+ +<root>&c;</root>
59
+ diff --git a/result/errors/extparsedent.xml.err b/result/errors/extparsedent.xml.err
60
+ new file mode 100644
61
+ index 0000000..e69de29
62
+ diff --git a/result/errors/extparsedent.xml.str b/result/errors/extparsedent.xml.str
63
+ new file mode 100644
64
+ index 0000000..e69de29
65
+ diff --git a/test/errors/extparsedent.xml b/test/errors/extparsedent.xml
66
+ new file mode 100644
67
+ index 0000000..07e4c54
68
+ --- /dev/null
69
+ +++ b/test/errors/extparsedent.xml
70
+ @@ -0,0 +1,5 @@
71
+ +<?xml version="1.0"?>
72
+ +<!DOCTYPE foo [
73
+ +<!ENTITY c PUBLIC "bar" "/etc/doesnotexist">
74
+ +]>
75
+ +<root>&c;</root>
76
+ --
77
+ 1.8.4.1
78
+
@@ -0,0 +1,480 @@
1
+ From c27670420c22b2d64da7d44e266a73bb4e66c2cc Mon Sep 17 00:00:00 2001
2
+ From: Daniel Veillard <veillard@redhat.com>
3
+ Date: Mon, 30 Jul 2012 16:16:04 +0800
4
+ Subject: [PATCH 7/9] Enforce XML_PARSER_EOF state handling through the parser
5
+
6
+ That condition is one raised when the parser should positively stop
7
+ processing further even to report errors. Best is to test is after
8
+ most GROW call especially within loops
9
+
10
+ [Origin: 48b4cdde3483e054af8ea02e0cd7ee467b0e9a50]
11
+ ---
12
+ parser.c | 131 +++++++++++++++++++++++++++++++++++++++++++++++++++++----------
13
+ 1 file changed, 110 insertions(+), 21 deletions(-)
14
+
15
+ diff --git a/parser.c b/parser.c
16
+ index 8fb16af..409cde8 100644
17
+ --- a/parser.c
18
+ +++ b/parser.c
19
+ @@ -2161,6 +2161,8 @@ xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
20
+ "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
21
+ }
22
+ ret = inputPush(ctxt, input);
23
+ + if (ctxt->instate == XML_PARSER_EOF)
24
+ + return(-1);
25
+ GROW;
26
+ return(ret);
27
+ }
28
+ @@ -2197,6 +2199,8 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) {
29
+ if (count++ > 20) {
30
+ count = 0;
31
+ GROW;
32
+ + if (ctxt->instate == XML_PARSER_EOF)
33
+ + return(0);
34
+ }
35
+ if ((RAW >= '0') && (RAW <= '9'))
36
+ val = val * 16 + (CUR - '0');
37
+ @@ -2228,6 +2232,8 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) {
38
+ if (count++ > 20) {
39
+ count = 0;
40
+ GROW;
41
+ + if (ctxt->instate == XML_PARSER_EOF)
42
+ + return(0);
43
+ }
44
+ if ((RAW >= '0') && (RAW <= '9'))
45
+ val = val * 10 + (CUR - '0');
46
+ @@ -2576,6 +2582,8 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
47
+ * the amount of data in the buffer.
48
+ */
49
+ GROW
50
+ + if (ctxt->instate == XML_PARSER_EOF)
51
+ + return;
52
+ if ((ctxt->input->end - ctxt->input->cur)>=4) {
53
+ start[0] = RAW;
54
+ start[1] = NXT(1);
55
+ @@ -3194,6 +3202,8 @@ xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
56
+ * Handler for more complex cases
57
+ */
58
+ GROW;
59
+ + if (ctxt->instate == XML_PARSER_EOF)
60
+ + return(NULL);
61
+ c = CUR_CHAR(l);
62
+ if ((ctxt->options & XML_PARSE_OLD10) == 0) {
63
+ /*
64
+ @@ -3245,6 +3255,8 @@ xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
65
+ if (count++ > 100) {
66
+ count = 0;
67
+ GROW;
68
+ + if (ctxt->instate == XML_PARSER_EOF)
69
+ + return(NULL);
70
+ }
71
+ len += l;
72
+ NEXTL(l);
73
+ @@ -3269,6 +3281,8 @@ xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
74
+ if (count++ > 100) {
75
+ count = 0;
76
+ GROW;
77
+ + if (ctxt->instate == XML_PARSER_EOF)
78
+ + return(NULL);
79
+ }
80
+ len += l;
81
+ NEXTL(l);
82
+ @@ -3362,6 +3376,8 @@ xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
83
+ if (count++ > 100) {
84
+ count = 0;
85
+ GROW;
86
+ + if (ctxt->instate == XML_PARSER_EOF)
87
+ + return(NULL);
88
+ }
89
+ len += l;
90
+ NEXTL(l);
91
+ @@ -3442,6 +3458,8 @@ xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
92
+ const xmlChar *ret;
93
+
94
+ GROW;
95
+ + if (ctxt->instate == XML_PARSER_EOF)
96
+ + return(NULL);
97
+
98
+ in = ctxt->input->cur;
99
+ while (*in != 0 && *in == *cmp) {
100
+ @@ -3569,6 +3587,8 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
101
+ #endif
102
+
103
+ GROW;
104
+ + if (ctxt->instate == XML_PARSER_EOF)
105
+ + return(NULL);
106
+ c = CUR_CHAR(l);
107
+
108
+ while (xmlIsNameChar(ctxt, c)) {
109
+ @@ -3597,6 +3617,10 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
110
+ if (count++ > 100) {
111
+ count = 0;
112
+ GROW;
113
+ + if (ctxt->instate == XML_PARSER_EOF) {
114
+ + xmlFree(buffer);
115
+ + return(NULL);
116
+ + }
117
+ }
118
+ if (len + 10 > max) {
119
+ xmlChar *tmp;
120
+ @@ -3667,6 +3691,10 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
121
+ ctxt->instate = XML_PARSER_ENTITY_VALUE;
122
+ input = ctxt->input;
123
+ GROW;
124
+ + if (ctxt->instate == XML_PARSER_EOF) {
125
+ + xmlFree(buf);
126
+ + return(NULL);
127
+ + }
128
+ NEXT;
129
+ c = CUR_CHAR(l);
130
+ /*
131
+ @@ -3678,8 +3706,8 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
132
+ * In practice it means we stop the loop only when back at parsing
133
+ * the initial entity and the quote is found
134
+ */
135
+ - while ((IS_CHAR(c)) && ((c != stop) || /* checked */
136
+ - (ctxt->input != input))) {
137
+ + while (((IS_CHAR(c)) && ((c != stop) || /* checked */
138
+ + (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
139
+ if (len + 5 >= size) {
140
+ xmlChar *tmp;
141
+
142
+ @@ -3708,6 +3736,10 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
143
+ }
144
+ }
145
+ buf[len] = 0;
146
+ + if (ctxt->instate == XML_PARSER_EOF) {
147
+ + xmlFree(buf);
148
+ + return(NULL);
149
+ + }
150
+
151
+ /*
152
+ * Raise problem w.r.t. '&' and '%' being used in non-entities
153
+ @@ -3755,12 +3787,12 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
154
+ */
155
+ ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
156
+ 0, 0, 0);
157
+ - if (orig != NULL)
158
+ + if (orig != NULL)
159
+ *orig = buf;
160
+ else
161
+ xmlFree(buf);
162
+ }
163
+ -
164
+ +
165
+ return(ret);
166
+ }
167
+
168
+ @@ -3811,8 +3843,9 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
169
+ * OK loop until we reach one of the ending char or a size limit.
170
+ */
171
+ c = CUR_CHAR(l);
172
+ - while ((NXT(0) != limit) && /* checked */
173
+ - (IS_CHAR(c)) && (c != '<')) {
174
+ + while (((NXT(0) != limit) && /* checked */
175
+ + (IS_CHAR(c)) && (c != '<')) &&
176
+ + (ctxt->instate != XML_PARSER_EOF)) {
177
+ if (c == 0) break;
178
+ if (c == '&') {
179
+ in_space = 0;
180
+ @@ -3947,6 +3980,9 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
181
+ GROW;
182
+ c = CUR_CHAR(l);
183
+ }
184
+ + if (ctxt->instate == XML_PARSER_EOF)
185
+ + goto error;
186
+ +
187
+ if ((in_space) && (normalize)) {
188
+ while ((len > 0) && (buf[len - 1] == 0x20)) len--;
189
+ }
190
+ @@ -3979,6 +4015,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
191
+
192
+ mem_error:
193
+ xmlErrMemory(ctxt, NULL);
194
+ +error:
195
+ if (buf != NULL)
196
+ xmlFree(buf);
197
+ if (rep != NULL)
198
+ @@ -4084,6 +4121,10 @@ xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
199
+ if (count > 50) {
200
+ GROW;
201
+ count = 0;
202
+ + if (ctxt->instate == XML_PARSER_EOF) {
203
+ + xmlFree(buf);
204
+ + return(NULL);
205
+ + }
206
+ }
207
+ COPY_BUF(l,buf,len,cur);
208
+ NEXTL(l);
209
+ @@ -4161,6 +4202,10 @@ xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
210
+ if (count > 50) {
211
+ GROW;
212
+ count = 0;
213
+ + if (ctxt->instate == XML_PARSER_EOF) {
214
+ + xmlFree(buf);
215
+ + return(NULL);
216
+ + }
217
+ }
218
+ NEXT;
219
+ cur = CUR;
220
+ @@ -4367,6 +4412,8 @@ get_more:
221
+ }
222
+ SHRINK;
223
+ GROW;
224
+ + if (ctxt->instate == XML_PARSER_EOF)
225
+ + return;
226
+ in = ctxt->input->cur;
227
+ } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
228
+ nbchar = 0;
229
+ @@ -4435,6 +4482,8 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
230
+ if (count > 50) {
231
+ GROW;
232
+ count = 0;
233
+ + if (ctxt->instate == XML_PARSER_EOF)
234
+ + return;
235
+ }
236
+ NEXTL(l);
237
+ cur = CUR_CHAR(l);
238
+ @@ -4635,6 +4684,10 @@ xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
239
+ if (count > 50) {
240
+ GROW;
241
+ count = 0;
242
+ + if (ctxt->instate == XML_PARSER_EOF) {
243
+ + xmlFree(buf);
244
+ + return;
245
+ + }
246
+ }
247
+ NEXTL(l);
248
+ cur = CUR_CHAR(l);
249
+ @@ -4785,6 +4838,10 @@ get_more:
250
+ }
251
+ SHRINK;
252
+ GROW;
253
+ + if (ctxt->instate == XML_PARSER_EOF) {
254
+ + xmlFree(buf);
255
+ + return;
256
+ + }
257
+ in = ctxt->input->cur;
258
+ if (*in == '-') {
259
+ if (in[1] == '-') {
260
+ @@ -5022,6 +5079,10 @@ xmlParsePI(xmlParserCtxtPtr ctxt) {
261
+ count++;
262
+ if (count > 50) {
263
+ GROW;
264
+ + if (ctxt->instate == XML_PARSER_EOF) {
265
+ + xmlFree(buf);
266
+ + return;
267
+ + }
268
+ count = 0;
269
+ }
270
+ COPY_BUF(l,buf,len,cur);
271
+ @@ -5762,7 +5823,7 @@ xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
272
+ }
273
+ SKIP_BLANKS;
274
+ GROW;
275
+ - while (RAW != '>') {
276
+ + while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
277
+ const xmlChar *check = CUR_PTR;
278
+ int type;
279
+ int def;
280
+ @@ -5911,7 +5972,7 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
281
+ ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
282
+ if (ret == NULL) return(NULL);
283
+ }
284
+ - while (RAW == '|') {
285
+ + while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
286
+ NEXT;
287
+ if (elem == NULL) {
288
+ ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
289
+ @@ -6055,7 +6116,7 @@ xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
290
+ }
291
+ SKIP_BLANKS;
292
+ SHRINK;
293
+ - while (RAW != ')') {
294
+ + while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
295
+ /*
296
+ * Each loop we parse one separator and one element.
297
+ */
298
+ @@ -6334,6 +6395,8 @@ xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
299
+ }
300
+ NEXT;
301
+ GROW;
302
+ + if (ctxt->instate == XML_PARSER_EOF)
303
+ + return(-1);
304
+ SKIP_BLANKS;
305
+ if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
306
+ tree = xmlParseElementMixedContentDecl(ctxt, inputid);
307
+ @@ -6501,8 +6564,8 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
308
+ "Entering INCLUDE Conditional Section\n");
309
+ }
310
+
311
+ - while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
312
+ - (NXT(2) != '>'))) {
313
+ + while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
314
+ + (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
315
+ const xmlChar *check = CUR_PTR;
316
+ unsigned int cons = ctxt->input->consumed;
317
+
318
+ @@ -6570,7 +6633,8 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
319
+ if (ctxt->recovery == 0) ctxt->disableSAX = 1;
320
+ ctxt->instate = XML_PARSER_IGNORE;
321
+
322
+ - while ((depth >= 0) && (RAW != 0)) {
323
+ + while (((depth >= 0) && (RAW != 0)) &&
324
+ + (ctxt->instate != XML_PARSER_EOF)) {
325
+ if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
326
+ depth++;
327
+ SKIP(3);
328
+ @@ -6841,7 +6905,7 @@ xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
329
+ break;
330
+ }
331
+ }
332
+ -
333
+ +
334
+ if (RAW != 0) {
335
+ xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
336
+ }
337
+ @@ -7310,6 +7374,8 @@ xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
338
+ xmlEntityPtr ent = NULL;
339
+
340
+ GROW;
341
+ + if (ctxt->instate == XML_PARSER_EOF)
342
+ + return(NULL);
343
+
344
+ if (RAW != '&')
345
+ return(NULL);
346
+ @@ -7840,6 +7906,10 @@ xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
347
+ if (count++ > 100) {
348
+ count = 0;
349
+ GROW;
350
+ + if (ctxt->instate == XML_PARSER_EOF) {
351
+ + xmlBufferFree(buf);
352
+ + return(-1);
353
+ + }
354
+ }
355
+ NEXTL(l);
356
+ c = CUR_CHAR(l);
357
+ @@ -8073,7 +8143,7 @@ xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
358
+ * PEReferences.
359
+ * Subsequence (markupdecl | PEReference | S)*
360
+ */
361
+ - while (RAW != ']') {
362
+ + while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
363
+ const xmlChar *check = CUR_PTR;
364
+ unsigned int cons = ctxt->input->consumed;
365
+
366
+ @@ -8259,9 +8329,9 @@ xmlParseStartTag(xmlParserCtxtPtr ctxt) {
367
+ SKIP_BLANKS;
368
+ GROW;
369
+
370
+ - while ((RAW != '>') &&
371
+ + while (((RAW != '>') &&
372
+ ((RAW != '/') || (NXT(1) != '>')) &&
373
+ - (IS_BYTE_CHAR(RAW))) {
374
+ + (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
375
+ const xmlChar *q = CUR_PTR;
376
+ unsigned int cons = ctxt->input->consumed;
377
+
378
+ @@ -8685,6 +8755,8 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
379
+ if (in >= end) {
380
+ const xmlChar *oldbase = ctxt->input->base;
381
+ GROW;
382
+ + if (ctxt->instate == XML_PARSER_EOF)
383
+ + return(NULL);
384
+ if (oldbase != ctxt->input->base) {
385
+ long delta = ctxt->input->base - oldbase;
386
+ start = start + delta;
387
+ @@ -8699,6 +8771,8 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
388
+ if (in >= end) {
389
+ const xmlChar *oldbase = ctxt->input->base;
390
+ GROW;
391
+ + if (ctxt->instate == XML_PARSER_EOF)
392
+ + return(NULL);
393
+ if (oldbase != ctxt->input->base) {
394
+ long delta = ctxt->input->base - oldbase;
395
+ start = start + delta;
396
+ @@ -8719,6 +8793,8 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
397
+ if (in >= end) {
398
+ const xmlChar *oldbase = ctxt->input->base;
399
+ GROW;
400
+ + if (ctxt->instate == XML_PARSER_EOF)
401
+ + return(NULL);
402
+ if (oldbase != ctxt->input->base) {
403
+ long delta = ctxt->input->base - oldbase;
404
+ start = start + delta;
405
+ @@ -8736,6 +8812,8 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
406
+ if (in >= end) {
407
+ const xmlChar *oldbase = ctxt->input->base;
408
+ GROW;
409
+ + if (ctxt->instate == XML_PARSER_EOF)
410
+ + return(NULL);
411
+ if (oldbase != ctxt->input->base) {
412
+ long delta = ctxt->input->base - oldbase;
413
+ start = start + delta;
414
+ @@ -8967,9 +9045,9 @@ reparse:
415
+ GROW;
416
+ if (ctxt->input->base != base) goto base_changed;
417
+
418
+ - while ((RAW != '>') &&
419
+ + while (((RAW != '>') &&
420
+ ((RAW != '/') || (NXT(1) != '>')) &&
421
+ - (IS_BYTE_CHAR(RAW))) {
422
+ + (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
423
+ const xmlChar *q = CUR_PTR;
424
+ unsigned int cons = ctxt->input->consumed;
425
+ int len = -1, alloc = 0;
426
+ @@ -9140,6 +9218,8 @@ skip_ns:
427
+ failed:
428
+
429
+ GROW
430
+ + if (ctxt->instate == XML_PARSER_EOF)
431
+ + break;
432
+ if (ctxt->input->base != base) goto base_changed;
433
+ if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
434
+ break;
435
+ @@ -9377,6 +9457,8 @@ xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
436
+ * We should definitely be at the ending "S? '>'" part
437
+ */
438
+ GROW;
439
+ + if (ctxt->instate == XML_PARSER_EOF)
440
+ + return;
441
+ SKIP_BLANKS;
442
+ if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
443
+ xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
444
+ @@ -9485,6 +9567,10 @@ xmlParseCDSect(xmlParserCtxtPtr ctxt) {
445
+ count++;
446
+ if (count > 50) {
447
+ GROW;
448
+ + if (ctxt->instate == XML_PARSER_EOF) {
449
+ + xmlFree(buf);
450
+ + return;
451
+ + }
452
+ count = 0;
453
+ }
454
+ NEXTL(l);
455
+ @@ -10255,9 +10341,10 @@ xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
456
+
457
+ void
458
+ xmlParseMisc(xmlParserCtxtPtr ctxt) {
459
+ - while (((RAW == '<') && (NXT(1) == '?')) ||
460
+ - (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
461
+ - IS_BLANK_CH(CUR)) {
462
+ + while ((ctxt->instate != XML_PARSER_EOF) &&
463
+ + (((RAW == '<') && (NXT(1) == '?')) ||
464
+ + (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
465
+ + IS_BLANK_CH(CUR))) {
466
+ if ((RAW == '<') && (NXT(1) == '?')) {
467
+ xmlParsePI(ctxt);
468
+ } else if (IS_BLANK_CH(CUR)) {
469
+ @@ -11727,6 +11814,8 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
470
+ return(XML_ERR_INTERNAL_ERROR);
471
+ if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
472
+ return(ctxt->errNo);
473
+ + if (ctxt->instate == XML_PARSER_EOF)
474
+ + return(-1);
475
+ if (ctxt->instate == XML_PARSER_START)
476
+ xmlDetectSAX2(ctxt);
477
+ if ((size > 0) && (chunk != NULL) && (!terminate) &&
478
+ --
479
+ 1.8.4.1
480
+