nokogiri 1.10.10-x86-mingw32 → 1.11.0-x86-mingw32
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +3 -0
- data/LICENSE-DEPENDENCIES.md +1015 -947
- data/README.md +165 -91
- data/ext/nokogiri/depend +476 -357
- data/ext/nokogiri/extconf.rb +507 -357
- data/ext/nokogiri/html_document.c +79 -78
- data/ext/nokogiri/html_sax_parser_context.c +2 -2
- data/ext/nokogiri/include/libexslt/exslt.h +102 -0
- data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
- data/ext/nokogiri/include/libexslt/exsltexports.h +140 -0
- data/ext/nokogiri/include/libxml2/libxml/DOCBparser.h +96 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +306 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX.h +173 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX2.h +178 -0
- data/ext/nokogiri/include/libxml2/libxml/c14n.h +126 -0
- data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
- data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
- data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
- data/ext/nokogiri/include/libxml2/libxml/dict.h +79 -0
- data/ext/nokogiri/include/libxml2/libxml/encoding.h +245 -0
- data/ext/nokogiri/include/libxml2/libxml/entities.h +151 -0
- data/ext/nokogiri/include/libxml2/libxml/globals.h +508 -0
- data/ext/nokogiri/include/libxml2/libxml/hash.h +236 -0
- data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
- data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +163 -0
- data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +81 -0
- data/ext/nokogiri/include/libxml2/libxml/parser.h +1241 -0
- data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +644 -0
- data/ext/nokogiri/include/libxml2/libxml/pattern.h +100 -0
- data/ext/nokogiri/include/libxml2/libxml/relaxng.h +217 -0
- data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +958 -0
- data/ext/nokogiri/include/libxml2/libxml/schematron.h +142 -0
- data/ext/nokogiri/include/libxml2/libxml/threads.h +89 -0
- data/ext/nokogiri/include/libxml2/libxml/tree.h +1311 -0
- data/ext/nokogiri/include/libxml2/libxml/uri.h +94 -0
- data/ext/nokogiri/include/libxml2/libxml/valid.h +458 -0
- data/ext/nokogiri/include/libxml2/libxml/xinclude.h +129 -0
- data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +366 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +945 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +153 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +224 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +428 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +222 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +88 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +246 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +151 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +202 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +485 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +488 -0
- data/ext/nokogiri/include/libxml2/libxml/xpath.h +566 -0
- data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +632 -0
- data/ext/nokogiri/include/libxml2/libxml/xpointer.h +114 -0
- data/ext/nokogiri/include/libxslt/attributes.h +38 -0
- data/ext/nokogiri/include/libxslt/documents.h +93 -0
- data/ext/nokogiri/include/libxslt/extensions.h +262 -0
- data/ext/nokogiri/include/libxslt/extra.h +72 -0
- data/ext/nokogiri/include/libxslt/functions.h +78 -0
- data/ext/nokogiri/include/libxslt/imports.h +75 -0
- data/ext/nokogiri/include/libxslt/keys.h +53 -0
- data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
- data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
- data/ext/nokogiri/include/libxslt/pattern.h +84 -0
- data/ext/nokogiri/include/libxslt/preproc.h +43 -0
- data/ext/nokogiri/include/libxslt/security.h +104 -0
- data/ext/nokogiri/include/libxslt/templates.h +77 -0
- data/ext/nokogiri/include/libxslt/transform.h +207 -0
- data/ext/nokogiri/include/libxslt/variables.h +118 -0
- data/ext/nokogiri/include/libxslt/xslt.h +110 -0
- data/ext/nokogiri/include/libxslt/xsltInternals.h +1978 -0
- data/ext/nokogiri/include/libxslt/xsltconfig.h +180 -0
- data/ext/nokogiri/include/libxslt/xsltexports.h +142 -0
- data/ext/nokogiri/include/libxslt/xsltlocale.h +76 -0
- data/ext/nokogiri/include/libxslt/xsltutils.h +313 -0
- data/ext/nokogiri/nokogiri.c +34 -40
- data/ext/nokogiri/nokogiri.h +26 -17
- data/ext/nokogiri/xml_document.c +18 -4
- data/ext/nokogiri/xml_io.c +8 -6
- data/ext/nokogiri/xml_node.c +21 -1
- data/ext/nokogiri/xml_node_set.c +1 -1
- data/ext/nokogiri/xml_reader.c +6 -17
- data/ext/nokogiri/xml_relax_ng.c +29 -11
- data/ext/nokogiri/xml_sax_parser.c +2 -7
- data/ext/nokogiri/xml_sax_parser_context.c +2 -2
- data/ext/nokogiri/xml_schema.c +55 -13
- data/ext/nokogiri/xml_xpath_context.c +80 -4
- data/ext/nokogiri/xslt_stylesheet.c +1 -8
- data/lib/nokogiri.rb +22 -22
- data/lib/nokogiri/2.5/nokogiri.so +0 -0
- data/lib/nokogiri/2.6/nokogiri.so +0 -0
- data/lib/nokogiri/2.7/nokogiri.so +0 -0
- data/lib/nokogiri/3.0/nokogiri.so +0 -0
- data/lib/nokogiri/css.rb +1 -0
- data/lib/nokogiri/css/node.rb +1 -0
- data/lib/nokogiri/css/parser.rb +63 -62
- data/lib/nokogiri/css/parser.y +2 -2
- data/lib/nokogiri/css/parser_extras.rb +39 -36
- data/lib/nokogiri/css/syntax_error.rb +1 -0
- data/lib/nokogiri/css/tokenizer.rb +1 -0
- data/lib/nokogiri/css/xpath_visitor.rb +73 -43
- data/lib/nokogiri/decorators/slop.rb +1 -0
- data/lib/nokogiri/html.rb +1 -0
- data/lib/nokogiri/html/builder.rb +1 -0
- data/lib/nokogiri/html/document.rb +13 -26
- data/lib/nokogiri/html/document_fragment.rb +1 -0
- data/lib/nokogiri/html/element_description.rb +1 -0
- data/lib/nokogiri/html/element_description_defaults.rb +1 -0
- data/lib/nokogiri/html/entity_lookup.rb +1 -0
- data/lib/nokogiri/html/sax/parser.rb +1 -0
- data/lib/nokogiri/html/sax/parser_context.rb +1 -0
- data/lib/nokogiri/html/sax/push_parser.rb +1 -0
- data/lib/nokogiri/jruby/dependencies.rb +20 -0
- data/lib/nokogiri/syntax_error.rb +1 -0
- data/lib/nokogiri/version.rb +3 -109
- data/lib/nokogiri/version/constant.rb +5 -0
- data/lib/nokogiri/version/info.rb +182 -0
- data/lib/nokogiri/xml.rb +1 -0
- data/lib/nokogiri/xml/attr.rb +1 -0
- data/lib/nokogiri/xml/attribute_decl.rb +1 -0
- data/lib/nokogiri/xml/builder.rb +3 -2
- data/lib/nokogiri/xml/cdata.rb +1 -0
- data/lib/nokogiri/xml/character_data.rb +1 -0
- data/lib/nokogiri/xml/document.rb +20 -15
- data/lib/nokogiri/xml/document_fragment.rb +5 -6
- data/lib/nokogiri/xml/dtd.rb +1 -0
- data/lib/nokogiri/xml/element_content.rb +1 -0
- data/lib/nokogiri/xml/element_decl.rb +1 -0
- data/lib/nokogiri/xml/entity_decl.rb +1 -0
- data/lib/nokogiri/xml/entity_reference.rb +1 -0
- data/lib/nokogiri/xml/namespace.rb +1 -0
- data/lib/nokogiri/xml/node.rb +587 -249
- data/lib/nokogiri/xml/node/save_options.rb +1 -0
- data/lib/nokogiri/xml/node_set.rb +1 -0
- data/lib/nokogiri/xml/notation.rb +1 -0
- data/lib/nokogiri/xml/parse_options.rb +10 -3
- data/lib/nokogiri/xml/pp.rb +1 -0
- data/lib/nokogiri/xml/pp/character_data.rb +1 -0
- data/lib/nokogiri/xml/pp/node.rb +1 -0
- data/lib/nokogiri/xml/processing_instruction.rb +1 -0
- data/lib/nokogiri/xml/reader.rb +7 -3
- data/lib/nokogiri/xml/relax_ng.rb +7 -2
- data/lib/nokogiri/xml/sax.rb +1 -0
- data/lib/nokogiri/xml/sax/document.rb +1 -0
- data/lib/nokogiri/xml/sax/parser.rb +1 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
- data/lib/nokogiri/xml/schema.rb +13 -4
- data/lib/nokogiri/xml/searchable.rb +25 -16
- data/lib/nokogiri/xml/syntax_error.rb +1 -0
- data/lib/nokogiri/xml/text.rb +1 -0
- data/lib/nokogiri/xml/xpath.rb +1 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -0
- data/lib/nokogiri/xml/xpath_context.rb +1 -0
- data/lib/nokogiri/xslt.rb +1 -0
- data/lib/nokogiri/xslt/stylesheet.rb +1 -0
- data/lib/xsd/xmlparser/nokogiri.rb +1 -0
- metadata +148 -131
- data/lib/nokogiri/2.3/nokogiri.so +0 -0
- data/lib/nokogiri/2.4/nokogiri.so +0 -0
@@ -0,0 +1,96 @@
|
|
1
|
+
/*
|
2
|
+
* Summary: old DocBook SGML parser
|
3
|
+
* Description: interface for a DocBook SGML non-verifying parser
|
4
|
+
* This code is DEPRECATED, and should not be used anymore.
|
5
|
+
*
|
6
|
+
* Copy: See Copyright for the status of this software.
|
7
|
+
*
|
8
|
+
* Author: Daniel Veillard
|
9
|
+
*/
|
10
|
+
|
11
|
+
#ifndef __DOCB_PARSER_H__
|
12
|
+
#define __DOCB_PARSER_H__
|
13
|
+
#include <libxml/xmlversion.h>
|
14
|
+
|
15
|
+
#ifdef LIBXML_DOCB_ENABLED
|
16
|
+
|
17
|
+
#include <libxml/parser.h>
|
18
|
+
#include <libxml/parserInternals.h>
|
19
|
+
|
20
|
+
#ifndef IN_LIBXML
|
21
|
+
#ifdef __GNUC__
|
22
|
+
#warning "The DOCBparser module has been deprecated in libxml2-2.6.0"
|
23
|
+
#endif
|
24
|
+
#endif
|
25
|
+
|
26
|
+
#ifdef __cplusplus
|
27
|
+
extern "C" {
|
28
|
+
#endif
|
29
|
+
|
30
|
+
/*
|
31
|
+
* Most of the back-end structures from XML and SGML are shared.
|
32
|
+
*/
|
33
|
+
typedef xmlParserCtxt docbParserCtxt;
|
34
|
+
typedef xmlParserCtxtPtr docbParserCtxtPtr;
|
35
|
+
typedef xmlSAXHandler docbSAXHandler;
|
36
|
+
typedef xmlSAXHandlerPtr docbSAXHandlerPtr;
|
37
|
+
typedef xmlParserInput docbParserInput;
|
38
|
+
typedef xmlParserInputPtr docbParserInputPtr;
|
39
|
+
typedef xmlDocPtr docbDocPtr;
|
40
|
+
|
41
|
+
/*
|
42
|
+
* There is only few public functions.
|
43
|
+
*/
|
44
|
+
XMLPUBFUN int XMLCALL
|
45
|
+
docbEncodeEntities(unsigned char *out,
|
46
|
+
int *outlen,
|
47
|
+
const unsigned char *in,
|
48
|
+
int *inlen, int quoteChar);
|
49
|
+
|
50
|
+
XMLPUBFUN docbDocPtr XMLCALL
|
51
|
+
docbSAXParseDoc (xmlChar *cur,
|
52
|
+
const char *encoding,
|
53
|
+
docbSAXHandlerPtr sax,
|
54
|
+
void *userData);
|
55
|
+
XMLPUBFUN docbDocPtr XMLCALL
|
56
|
+
docbParseDoc (xmlChar *cur,
|
57
|
+
const char *encoding);
|
58
|
+
XMLPUBFUN docbDocPtr XMLCALL
|
59
|
+
docbSAXParseFile (const char *filename,
|
60
|
+
const char *encoding,
|
61
|
+
docbSAXHandlerPtr sax,
|
62
|
+
void *userData);
|
63
|
+
XMLPUBFUN docbDocPtr XMLCALL
|
64
|
+
docbParseFile (const char *filename,
|
65
|
+
const char *encoding);
|
66
|
+
|
67
|
+
/**
|
68
|
+
* Interfaces for the Push mode.
|
69
|
+
*/
|
70
|
+
XMLPUBFUN void XMLCALL
|
71
|
+
docbFreeParserCtxt (docbParserCtxtPtr ctxt);
|
72
|
+
XMLPUBFUN docbParserCtxtPtr XMLCALL
|
73
|
+
docbCreatePushParserCtxt(docbSAXHandlerPtr sax,
|
74
|
+
void *user_data,
|
75
|
+
const char *chunk,
|
76
|
+
int size,
|
77
|
+
const char *filename,
|
78
|
+
xmlCharEncoding enc);
|
79
|
+
XMLPUBFUN int XMLCALL
|
80
|
+
docbParseChunk (docbParserCtxtPtr ctxt,
|
81
|
+
const char *chunk,
|
82
|
+
int size,
|
83
|
+
int terminate);
|
84
|
+
XMLPUBFUN docbParserCtxtPtr XMLCALL
|
85
|
+
docbCreateFileParserCtxt(const char *filename,
|
86
|
+
const char *encoding);
|
87
|
+
XMLPUBFUN int XMLCALL
|
88
|
+
docbParseDocument (docbParserCtxtPtr ctxt);
|
89
|
+
|
90
|
+
#ifdef __cplusplus
|
91
|
+
}
|
92
|
+
#endif
|
93
|
+
|
94
|
+
#endif /* LIBXML_DOCB_ENABLED */
|
95
|
+
|
96
|
+
#endif /* __DOCB_PARSER_H__ */
|
@@ -0,0 +1,306 @@
|
|
1
|
+
/*
|
2
|
+
* Summary: interface for an HTML 4.0 non-verifying parser
|
3
|
+
* Description: this module implements an HTML 4.0 non-verifying parser
|
4
|
+
* with API compatible with the XML parser ones. It should
|
5
|
+
* be able to parse "real world" HTML, even if severely
|
6
|
+
* broken from a specification point of view.
|
7
|
+
*
|
8
|
+
* Copy: See Copyright for the status of this software.
|
9
|
+
*
|
10
|
+
* Author: Daniel Veillard
|
11
|
+
*/
|
12
|
+
|
13
|
+
#ifndef __HTML_PARSER_H__
|
14
|
+
#define __HTML_PARSER_H__
|
15
|
+
#include <libxml/xmlversion.h>
|
16
|
+
#include <libxml/parser.h>
|
17
|
+
|
18
|
+
#ifdef LIBXML_HTML_ENABLED
|
19
|
+
|
20
|
+
#ifdef __cplusplus
|
21
|
+
extern "C" {
|
22
|
+
#endif
|
23
|
+
|
24
|
+
/*
|
25
|
+
* Most of the back-end structures from XML and HTML are shared.
|
26
|
+
*/
|
27
|
+
typedef xmlParserCtxt htmlParserCtxt;
|
28
|
+
typedef xmlParserCtxtPtr htmlParserCtxtPtr;
|
29
|
+
typedef xmlParserNodeInfo htmlParserNodeInfo;
|
30
|
+
typedef xmlSAXHandler htmlSAXHandler;
|
31
|
+
typedef xmlSAXHandlerPtr htmlSAXHandlerPtr;
|
32
|
+
typedef xmlParserInput htmlParserInput;
|
33
|
+
typedef xmlParserInputPtr htmlParserInputPtr;
|
34
|
+
typedef xmlDocPtr htmlDocPtr;
|
35
|
+
typedef xmlNodePtr htmlNodePtr;
|
36
|
+
|
37
|
+
/*
|
38
|
+
* Internal description of an HTML element, representing HTML 4.01
|
39
|
+
* and XHTML 1.0 (which share the same structure).
|
40
|
+
*/
|
41
|
+
typedef struct _htmlElemDesc htmlElemDesc;
|
42
|
+
typedef htmlElemDesc *htmlElemDescPtr;
|
43
|
+
struct _htmlElemDesc {
|
44
|
+
const char *name; /* The tag name */
|
45
|
+
char startTag; /* Whether the start tag can be implied */
|
46
|
+
char endTag; /* Whether the end tag can be implied */
|
47
|
+
char saveEndTag; /* Whether the end tag should be saved */
|
48
|
+
char empty; /* Is this an empty element ? */
|
49
|
+
char depr; /* Is this a deprecated element ? */
|
50
|
+
char dtd; /* 1: only in Loose DTD, 2: only Frameset one */
|
51
|
+
char isinline; /* is this a block 0 or inline 1 element */
|
52
|
+
const char *desc; /* the description */
|
53
|
+
|
54
|
+
/* NRK Jan.2003
|
55
|
+
* New fields encapsulating HTML structure
|
56
|
+
*
|
57
|
+
* Bugs:
|
58
|
+
* This is a very limited representation. It fails to tell us when
|
59
|
+
* an element *requires* subelements (we only have whether they're
|
60
|
+
* allowed or not), and it doesn't tell us where CDATA and PCDATA
|
61
|
+
* are allowed. Some element relationships are not fully represented:
|
62
|
+
* these are flagged with the word MODIFIER
|
63
|
+
*/
|
64
|
+
const char** subelts; /* allowed sub-elements of this element */
|
65
|
+
const char* defaultsubelt; /* subelement for suggested auto-repair
|
66
|
+
if necessary or NULL */
|
67
|
+
const char** attrs_opt; /* Optional Attributes */
|
68
|
+
const char** attrs_depr; /* Additional deprecated attributes */
|
69
|
+
const char** attrs_req; /* Required attributes */
|
70
|
+
};
|
71
|
+
|
72
|
+
/*
|
73
|
+
* Internal description of an HTML entity.
|
74
|
+
*/
|
75
|
+
typedef struct _htmlEntityDesc htmlEntityDesc;
|
76
|
+
typedef htmlEntityDesc *htmlEntityDescPtr;
|
77
|
+
struct _htmlEntityDesc {
|
78
|
+
unsigned int value; /* the UNICODE value for the character */
|
79
|
+
const char *name; /* The entity name */
|
80
|
+
const char *desc; /* the description */
|
81
|
+
};
|
82
|
+
|
83
|
+
/*
|
84
|
+
* There is only few public functions.
|
85
|
+
*/
|
86
|
+
XMLPUBFUN const htmlElemDesc * XMLCALL
|
87
|
+
htmlTagLookup (const xmlChar *tag);
|
88
|
+
XMLPUBFUN const htmlEntityDesc * XMLCALL
|
89
|
+
htmlEntityLookup(const xmlChar *name);
|
90
|
+
XMLPUBFUN const htmlEntityDesc * XMLCALL
|
91
|
+
htmlEntityValueLookup(unsigned int value);
|
92
|
+
|
93
|
+
XMLPUBFUN int XMLCALL
|
94
|
+
htmlIsAutoClosed(htmlDocPtr doc,
|
95
|
+
htmlNodePtr elem);
|
96
|
+
XMLPUBFUN int XMLCALL
|
97
|
+
htmlAutoCloseTag(htmlDocPtr doc,
|
98
|
+
const xmlChar *name,
|
99
|
+
htmlNodePtr elem);
|
100
|
+
XMLPUBFUN const htmlEntityDesc * XMLCALL
|
101
|
+
htmlParseEntityRef(htmlParserCtxtPtr ctxt,
|
102
|
+
const xmlChar **str);
|
103
|
+
XMLPUBFUN int XMLCALL
|
104
|
+
htmlParseCharRef(htmlParserCtxtPtr ctxt);
|
105
|
+
XMLPUBFUN void XMLCALL
|
106
|
+
htmlParseElement(htmlParserCtxtPtr ctxt);
|
107
|
+
|
108
|
+
XMLPUBFUN htmlParserCtxtPtr XMLCALL
|
109
|
+
htmlNewParserCtxt(void);
|
110
|
+
|
111
|
+
XMLPUBFUN htmlParserCtxtPtr XMLCALL
|
112
|
+
htmlCreateMemoryParserCtxt(const char *buffer,
|
113
|
+
int size);
|
114
|
+
|
115
|
+
XMLPUBFUN int XMLCALL
|
116
|
+
htmlParseDocument(htmlParserCtxtPtr ctxt);
|
117
|
+
XMLPUBFUN htmlDocPtr XMLCALL
|
118
|
+
htmlSAXParseDoc (const xmlChar *cur,
|
119
|
+
const char *encoding,
|
120
|
+
htmlSAXHandlerPtr sax,
|
121
|
+
void *userData);
|
122
|
+
XMLPUBFUN htmlDocPtr XMLCALL
|
123
|
+
htmlParseDoc (const xmlChar *cur,
|
124
|
+
const char *encoding);
|
125
|
+
XMLPUBFUN htmlDocPtr XMLCALL
|
126
|
+
htmlSAXParseFile(const char *filename,
|
127
|
+
const char *encoding,
|
128
|
+
htmlSAXHandlerPtr sax,
|
129
|
+
void *userData);
|
130
|
+
XMLPUBFUN htmlDocPtr XMLCALL
|
131
|
+
htmlParseFile (const char *filename,
|
132
|
+
const char *encoding);
|
133
|
+
XMLPUBFUN int XMLCALL
|
134
|
+
UTF8ToHtml (unsigned char *out,
|
135
|
+
int *outlen,
|
136
|
+
const unsigned char *in,
|
137
|
+
int *inlen);
|
138
|
+
XMLPUBFUN int XMLCALL
|
139
|
+
htmlEncodeEntities(unsigned char *out,
|
140
|
+
int *outlen,
|
141
|
+
const unsigned char *in,
|
142
|
+
int *inlen, int quoteChar);
|
143
|
+
XMLPUBFUN int XMLCALL
|
144
|
+
htmlIsScriptAttribute(const xmlChar *name);
|
145
|
+
XMLPUBFUN int XMLCALL
|
146
|
+
htmlHandleOmittedElem(int val);
|
147
|
+
|
148
|
+
#ifdef LIBXML_PUSH_ENABLED
|
149
|
+
/**
|
150
|
+
* Interfaces for the Push mode.
|
151
|
+
*/
|
152
|
+
XMLPUBFUN htmlParserCtxtPtr XMLCALL
|
153
|
+
htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax,
|
154
|
+
void *user_data,
|
155
|
+
const char *chunk,
|
156
|
+
int size,
|
157
|
+
const char *filename,
|
158
|
+
xmlCharEncoding enc);
|
159
|
+
XMLPUBFUN int XMLCALL
|
160
|
+
htmlParseChunk (htmlParserCtxtPtr ctxt,
|
161
|
+
const char *chunk,
|
162
|
+
int size,
|
163
|
+
int terminate);
|
164
|
+
#endif /* LIBXML_PUSH_ENABLED */
|
165
|
+
|
166
|
+
XMLPUBFUN void XMLCALL
|
167
|
+
htmlFreeParserCtxt (htmlParserCtxtPtr ctxt);
|
168
|
+
|
169
|
+
/*
|
170
|
+
* New set of simpler/more flexible APIs
|
171
|
+
*/
|
172
|
+
/**
|
173
|
+
* xmlParserOption:
|
174
|
+
*
|
175
|
+
* This is the set of XML parser options that can be passed down
|
176
|
+
* to the xmlReadDoc() and similar calls.
|
177
|
+
*/
|
178
|
+
typedef enum {
|
179
|
+
HTML_PARSE_RECOVER = 1<<0, /* Relaxed parsing */
|
180
|
+
HTML_PARSE_NODEFDTD = 1<<2, /* do not default a doctype if not found */
|
181
|
+
HTML_PARSE_NOERROR = 1<<5, /* suppress error reports */
|
182
|
+
HTML_PARSE_NOWARNING= 1<<6, /* suppress warning reports */
|
183
|
+
HTML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */
|
184
|
+
HTML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */
|
185
|
+
HTML_PARSE_NONET = 1<<11,/* Forbid network access */
|
186
|
+
HTML_PARSE_NOIMPLIED= 1<<13,/* Do not add implied html/body... elements */
|
187
|
+
HTML_PARSE_COMPACT = 1<<16,/* compact small text nodes */
|
188
|
+
HTML_PARSE_IGNORE_ENC=1<<21 /* ignore internal document encoding hint */
|
189
|
+
} htmlParserOption;
|
190
|
+
|
191
|
+
XMLPUBFUN void XMLCALL
|
192
|
+
htmlCtxtReset (htmlParserCtxtPtr ctxt);
|
193
|
+
XMLPUBFUN int XMLCALL
|
194
|
+
htmlCtxtUseOptions (htmlParserCtxtPtr ctxt,
|
195
|
+
int options);
|
196
|
+
XMLPUBFUN htmlDocPtr XMLCALL
|
197
|
+
htmlReadDoc (const xmlChar *cur,
|
198
|
+
const char *URL,
|
199
|
+
const char *encoding,
|
200
|
+
int options);
|
201
|
+
XMLPUBFUN htmlDocPtr XMLCALL
|
202
|
+
htmlReadFile (const char *URL,
|
203
|
+
const char *encoding,
|
204
|
+
int options);
|
205
|
+
XMLPUBFUN htmlDocPtr XMLCALL
|
206
|
+
htmlReadMemory (const char *buffer,
|
207
|
+
int size,
|
208
|
+
const char *URL,
|
209
|
+
const char *encoding,
|
210
|
+
int options);
|
211
|
+
XMLPUBFUN htmlDocPtr XMLCALL
|
212
|
+
htmlReadFd (int fd,
|
213
|
+
const char *URL,
|
214
|
+
const char *encoding,
|
215
|
+
int options);
|
216
|
+
XMLPUBFUN htmlDocPtr XMLCALL
|
217
|
+
htmlReadIO (xmlInputReadCallback ioread,
|
218
|
+
xmlInputCloseCallback ioclose,
|
219
|
+
void *ioctx,
|
220
|
+
const char *URL,
|
221
|
+
const char *encoding,
|
222
|
+
int options);
|
223
|
+
XMLPUBFUN htmlDocPtr XMLCALL
|
224
|
+
htmlCtxtReadDoc (xmlParserCtxtPtr ctxt,
|
225
|
+
const xmlChar *cur,
|
226
|
+
const char *URL,
|
227
|
+
const char *encoding,
|
228
|
+
int options);
|
229
|
+
XMLPUBFUN htmlDocPtr XMLCALL
|
230
|
+
htmlCtxtReadFile (xmlParserCtxtPtr ctxt,
|
231
|
+
const char *filename,
|
232
|
+
const char *encoding,
|
233
|
+
int options);
|
234
|
+
XMLPUBFUN htmlDocPtr XMLCALL
|
235
|
+
htmlCtxtReadMemory (xmlParserCtxtPtr ctxt,
|
236
|
+
const char *buffer,
|
237
|
+
int size,
|
238
|
+
const char *URL,
|
239
|
+
const char *encoding,
|
240
|
+
int options);
|
241
|
+
XMLPUBFUN htmlDocPtr XMLCALL
|
242
|
+
htmlCtxtReadFd (xmlParserCtxtPtr ctxt,
|
243
|
+
int fd,
|
244
|
+
const char *URL,
|
245
|
+
const char *encoding,
|
246
|
+
int options);
|
247
|
+
XMLPUBFUN htmlDocPtr XMLCALL
|
248
|
+
htmlCtxtReadIO (xmlParserCtxtPtr ctxt,
|
249
|
+
xmlInputReadCallback ioread,
|
250
|
+
xmlInputCloseCallback ioclose,
|
251
|
+
void *ioctx,
|
252
|
+
const char *URL,
|
253
|
+
const char *encoding,
|
254
|
+
int options);
|
255
|
+
|
256
|
+
/* NRK/Jan2003: further knowledge of HTML structure
|
257
|
+
*/
|
258
|
+
typedef enum {
|
259
|
+
HTML_NA = 0 , /* something we don't check at all */
|
260
|
+
HTML_INVALID = 0x1 ,
|
261
|
+
HTML_DEPRECATED = 0x2 ,
|
262
|
+
HTML_VALID = 0x4 ,
|
263
|
+
HTML_REQUIRED = 0xc /* VALID bit set so ( & HTML_VALID ) is TRUE */
|
264
|
+
} htmlStatus ;
|
265
|
+
|
266
|
+
/* Using htmlElemDesc rather than name here, to emphasise the fact
|
267
|
+
that otherwise there's a lookup overhead
|
268
|
+
*/
|
269
|
+
XMLPUBFUN htmlStatus XMLCALL htmlAttrAllowed(const htmlElemDesc*, const xmlChar*, int) ;
|
270
|
+
XMLPUBFUN int XMLCALL htmlElementAllowedHere(const htmlElemDesc*, const xmlChar*) ;
|
271
|
+
XMLPUBFUN htmlStatus XMLCALL htmlElementStatusHere(const htmlElemDesc*, const htmlElemDesc*) ;
|
272
|
+
XMLPUBFUN htmlStatus XMLCALL htmlNodeStatus(const htmlNodePtr, int) ;
|
273
|
+
/**
|
274
|
+
* htmlDefaultSubelement:
|
275
|
+
* @elt: HTML element
|
276
|
+
*
|
277
|
+
* Returns the default subelement for this element
|
278
|
+
*/
|
279
|
+
#define htmlDefaultSubelement(elt) elt->defaultsubelt
|
280
|
+
/**
|
281
|
+
* htmlElementAllowedHereDesc:
|
282
|
+
* @parent: HTML parent element
|
283
|
+
* @elt: HTML element
|
284
|
+
*
|
285
|
+
* Checks whether an HTML element description may be a
|
286
|
+
* direct child of the specified element.
|
287
|
+
*
|
288
|
+
* Returns 1 if allowed; 0 otherwise.
|
289
|
+
*/
|
290
|
+
#define htmlElementAllowedHereDesc(parent,elt) \
|
291
|
+
htmlElementAllowedHere((parent), (elt)->name)
|
292
|
+
/**
|
293
|
+
* htmlRequiredAttrs:
|
294
|
+
* @elt: HTML element
|
295
|
+
*
|
296
|
+
* Returns the attributes required for the specified element.
|
297
|
+
*/
|
298
|
+
#define htmlRequiredAttrs(elt) (elt)->attrs_req
|
299
|
+
|
300
|
+
|
301
|
+
#ifdef __cplusplus
|
302
|
+
}
|
303
|
+
#endif
|
304
|
+
|
305
|
+
#endif /* LIBXML_HTML_ENABLED */
|
306
|
+
#endif /* __HTML_PARSER_H__ */
|
@@ -0,0 +1,147 @@
|
|
1
|
+
/*
|
2
|
+
* Summary: specific APIs to process HTML tree, especially serialization
|
3
|
+
* Description: this module implements a few function needed to process
|
4
|
+
* tree in an HTML specific way.
|
5
|
+
*
|
6
|
+
* Copy: See Copyright for the status of this software.
|
7
|
+
*
|
8
|
+
* Author: Daniel Veillard
|
9
|
+
*/
|
10
|
+
|
11
|
+
#ifndef __HTML_TREE_H__
|
12
|
+
#define __HTML_TREE_H__
|
13
|
+
|
14
|
+
#include <stdio.h>
|
15
|
+
#include <libxml/xmlversion.h>
|
16
|
+
#include <libxml/tree.h>
|
17
|
+
#include <libxml/HTMLparser.h>
|
18
|
+
|
19
|
+
#ifdef LIBXML_HTML_ENABLED
|
20
|
+
|
21
|
+
#ifdef __cplusplus
|
22
|
+
extern "C" {
|
23
|
+
#endif
|
24
|
+
|
25
|
+
|
26
|
+
/**
|
27
|
+
* HTML_TEXT_NODE:
|
28
|
+
*
|
29
|
+
* Macro. A text node in a HTML document is really implemented
|
30
|
+
* the same way as a text node in an XML document.
|
31
|
+
*/
|
32
|
+
#define HTML_TEXT_NODE XML_TEXT_NODE
|
33
|
+
/**
|
34
|
+
* HTML_ENTITY_REF_NODE:
|
35
|
+
*
|
36
|
+
* Macro. An entity reference in a HTML document is really implemented
|
37
|
+
* the same way as an entity reference in an XML document.
|
38
|
+
*/
|
39
|
+
#define HTML_ENTITY_REF_NODE XML_ENTITY_REF_NODE
|
40
|
+
/**
|
41
|
+
* HTML_COMMENT_NODE:
|
42
|
+
*
|
43
|
+
* Macro. A comment in a HTML document is really implemented
|
44
|
+
* the same way as a comment in an XML document.
|
45
|
+
*/
|
46
|
+
#define HTML_COMMENT_NODE XML_COMMENT_NODE
|
47
|
+
/**
|
48
|
+
* HTML_PRESERVE_NODE:
|
49
|
+
*
|
50
|
+
* Macro. A preserved node in a HTML document is really implemented
|
51
|
+
* the same way as a CDATA section in an XML document.
|
52
|
+
*/
|
53
|
+
#define HTML_PRESERVE_NODE XML_CDATA_SECTION_NODE
|
54
|
+
/**
|
55
|
+
* HTML_PI_NODE:
|
56
|
+
*
|
57
|
+
* Macro. A processing instruction in a HTML document is really implemented
|
58
|
+
* the same way as a processing instruction in an XML document.
|
59
|
+
*/
|
60
|
+
#define HTML_PI_NODE XML_PI_NODE
|
61
|
+
|
62
|
+
XMLPUBFUN htmlDocPtr XMLCALL
|
63
|
+
htmlNewDoc (const xmlChar *URI,
|
64
|
+
const xmlChar *ExternalID);
|
65
|
+
XMLPUBFUN htmlDocPtr XMLCALL
|
66
|
+
htmlNewDocNoDtD (const xmlChar *URI,
|
67
|
+
const xmlChar *ExternalID);
|
68
|
+
XMLPUBFUN const xmlChar * XMLCALL
|
69
|
+
htmlGetMetaEncoding (htmlDocPtr doc);
|
70
|
+
XMLPUBFUN int XMLCALL
|
71
|
+
htmlSetMetaEncoding (htmlDocPtr doc,
|
72
|
+
const xmlChar *encoding);
|
73
|
+
#ifdef LIBXML_OUTPUT_ENABLED
|
74
|
+
XMLPUBFUN void XMLCALL
|
75
|
+
htmlDocDumpMemory (xmlDocPtr cur,
|
76
|
+
xmlChar **mem,
|
77
|
+
int *size);
|
78
|
+
XMLPUBFUN void XMLCALL
|
79
|
+
htmlDocDumpMemoryFormat (xmlDocPtr cur,
|
80
|
+
xmlChar **mem,
|
81
|
+
int *size,
|
82
|
+
int format);
|
83
|
+
XMLPUBFUN int XMLCALL
|
84
|
+
htmlDocDump (FILE *f,
|
85
|
+
xmlDocPtr cur);
|
86
|
+
XMLPUBFUN int XMLCALL
|
87
|
+
htmlSaveFile (const char *filename,
|
88
|
+
xmlDocPtr cur);
|
89
|
+
XMLPUBFUN int XMLCALL
|
90
|
+
htmlNodeDump (xmlBufferPtr buf,
|
91
|
+
xmlDocPtr doc,
|
92
|
+
xmlNodePtr cur);
|
93
|
+
XMLPUBFUN void XMLCALL
|
94
|
+
htmlNodeDumpFile (FILE *out,
|
95
|
+
xmlDocPtr doc,
|
96
|
+
xmlNodePtr cur);
|
97
|
+
XMLPUBFUN int XMLCALL
|
98
|
+
htmlNodeDumpFileFormat (FILE *out,
|
99
|
+
xmlDocPtr doc,
|
100
|
+
xmlNodePtr cur,
|
101
|
+
const char *encoding,
|
102
|
+
int format);
|
103
|
+
XMLPUBFUN int XMLCALL
|
104
|
+
htmlSaveFileEnc (const char *filename,
|
105
|
+
xmlDocPtr cur,
|
106
|
+
const char *encoding);
|
107
|
+
XMLPUBFUN int XMLCALL
|
108
|
+
htmlSaveFileFormat (const char *filename,
|
109
|
+
xmlDocPtr cur,
|
110
|
+
const char *encoding,
|
111
|
+
int format);
|
112
|
+
|
113
|
+
XMLPUBFUN void XMLCALL
|
114
|
+
htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf,
|
115
|
+
xmlDocPtr doc,
|
116
|
+
xmlNodePtr cur,
|
117
|
+
const char *encoding,
|
118
|
+
int format);
|
119
|
+
XMLPUBFUN void XMLCALL
|
120
|
+
htmlDocContentDumpOutput(xmlOutputBufferPtr buf,
|
121
|
+
xmlDocPtr cur,
|
122
|
+
const char *encoding);
|
123
|
+
XMLPUBFUN void XMLCALL
|
124
|
+
htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf,
|
125
|
+
xmlDocPtr cur,
|
126
|
+
const char *encoding,
|
127
|
+
int format);
|
128
|
+
XMLPUBFUN void XMLCALL
|
129
|
+
htmlNodeDumpOutput (xmlOutputBufferPtr buf,
|
130
|
+
xmlDocPtr doc,
|
131
|
+
xmlNodePtr cur,
|
132
|
+
const char *encoding);
|
133
|
+
|
134
|
+
#endif /* LIBXML_OUTPUT_ENABLED */
|
135
|
+
|
136
|
+
XMLPUBFUN int XMLCALL
|
137
|
+
htmlIsBooleanAttr (const xmlChar *name);
|
138
|
+
|
139
|
+
|
140
|
+
#ifdef __cplusplus
|
141
|
+
}
|
142
|
+
#endif
|
143
|
+
|
144
|
+
#endif /* LIBXML_HTML_ENABLED */
|
145
|
+
|
146
|
+
#endif /* __HTML_TREE_H__ */
|
147
|
+
|