nokogiri 1.14.0-arm-linux

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (200) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +38 -0
  3. data/LICENSE-DEPENDENCIES.md +2224 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +287 -0
  6. data/bin/nokogiri +131 -0
  7. data/dependencies.yml +41 -0
  8. data/ext/nokogiri/depend +38 -0
  9. data/ext/nokogiri/extconf.rb +1082 -0
  10. data/ext/nokogiri/gumbo.c +594 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +114 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/include/libexslt/exslt.h +108 -0
  17. data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
  18. data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
  19. data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +306 -0
  20. data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
  21. data/ext/nokogiri/include/libxml2/libxml/SAX.h +204 -0
  22. data/ext/nokogiri/include/libxml2/libxml/SAX2.h +172 -0
  23. data/ext/nokogiri/include/libxml2/libxml/c14n.h +128 -0
  24. data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
  25. data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
  26. data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
  27. data/ext/nokogiri/include/libxml2/libxml/dict.h +81 -0
  28. data/ext/nokogiri/include/libxml2/libxml/encoding.h +232 -0
  29. data/ext/nokogiri/include/libxml2/libxml/entities.h +153 -0
  30. data/ext/nokogiri/include/libxml2/libxml/globals.h +499 -0
  31. data/ext/nokogiri/include/libxml2/libxml/hash.h +236 -0
  32. data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
  33. data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
  34. data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +81 -0
  35. data/ext/nokogiri/include/libxml2/libxml/parser.h +1244 -0
  36. data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +656 -0
  37. data/ext/nokogiri/include/libxml2/libxml/pattern.h +100 -0
  38. data/ext/nokogiri/include/libxml2/libxml/relaxng.h +218 -0
  39. data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +958 -0
  40. data/ext/nokogiri/include/libxml2/libxml/schematron.h +142 -0
  41. data/ext/nokogiri/include/libxml2/libxml/threads.h +91 -0
  42. data/ext/nokogiri/include/libxml2/libxml/tree.h +1312 -0
  43. data/ext/nokogiri/include/libxml2/libxml/uri.h +94 -0
  44. data/ext/nokogiri/include/libxml2/libxml/valid.h +463 -0
  45. data/ext/nokogiri/include/libxml2/libxml/xinclude.h +129 -0
  46. data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
  47. data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +368 -0
  48. data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
  49. data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +947 -0
  50. data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +77 -0
  51. data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +226 -0
  52. data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
  53. data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +428 -0
  54. data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +222 -0
  55. data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +88 -0
  56. data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +246 -0
  57. data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
  58. data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
  59. data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +202 -0
  60. data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +503 -0
  61. data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +488 -0
  62. data/ext/nokogiri/include/libxml2/libxml/xpath.h +575 -0
  63. data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +632 -0
  64. data/ext/nokogiri/include/libxml2/libxml/xpointer.h +137 -0
  65. data/ext/nokogiri/include/libxslt/attributes.h +38 -0
  66. data/ext/nokogiri/include/libxslt/documents.h +93 -0
  67. data/ext/nokogiri/include/libxslt/extensions.h +262 -0
  68. data/ext/nokogiri/include/libxslt/extra.h +72 -0
  69. data/ext/nokogiri/include/libxslt/functions.h +78 -0
  70. data/ext/nokogiri/include/libxslt/imports.h +75 -0
  71. data/ext/nokogiri/include/libxslt/keys.h +53 -0
  72. data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
  73. data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
  74. data/ext/nokogiri/include/libxslt/pattern.h +84 -0
  75. data/ext/nokogiri/include/libxslt/preproc.h +43 -0
  76. data/ext/nokogiri/include/libxslt/security.h +104 -0
  77. data/ext/nokogiri/include/libxslt/templates.h +77 -0
  78. data/ext/nokogiri/include/libxslt/transform.h +207 -0
  79. data/ext/nokogiri/include/libxslt/variables.h +118 -0
  80. data/ext/nokogiri/include/libxslt/xslt.h +110 -0
  81. data/ext/nokogiri/include/libxslt/xsltInternals.h +1982 -0
  82. data/ext/nokogiri/include/libxslt/xsltconfig.h +179 -0
  83. data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
  84. data/ext/nokogiri/include/libxslt/xsltlocale.h +76 -0
  85. data/ext/nokogiri/include/libxslt/xsltutils.h +310 -0
  86. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  87. data/ext/nokogiri/nokogiri.c +260 -0
  88. data/ext/nokogiri/nokogiri.h +235 -0
  89. data/ext/nokogiri/test_global_handlers.c +40 -0
  90. data/ext/nokogiri/xml_attr.c +103 -0
  91. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  92. data/ext/nokogiri/xml_cdata.c +57 -0
  93. data/ext/nokogiri/xml_comment.c +62 -0
  94. data/ext/nokogiri/xml_document.c +689 -0
  95. data/ext/nokogiri/xml_document_fragment.c +44 -0
  96. data/ext/nokogiri/xml_dtd.c +208 -0
  97. data/ext/nokogiri/xml_element_content.c +128 -0
  98. data/ext/nokogiri/xml_element_decl.c +69 -0
  99. data/ext/nokogiri/xml_encoding_handler.c +104 -0
  100. data/ext/nokogiri/xml_entity_decl.c +112 -0
  101. data/ext/nokogiri/xml_entity_reference.c +50 -0
  102. data/ext/nokogiri/xml_namespace.c +186 -0
  103. data/ext/nokogiri/xml_node.c +2425 -0
  104. data/ext/nokogiri/xml_node_set.c +496 -0
  105. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  106. data/ext/nokogiri/xml_reader.c +794 -0
  107. data/ext/nokogiri/xml_relax_ng.c +183 -0
  108. data/ext/nokogiri/xml_sax_parser.c +316 -0
  109. data/ext/nokogiri/xml_sax_parser_context.c +283 -0
  110. data/ext/nokogiri/xml_sax_push_parser.c +166 -0
  111. data/ext/nokogiri/xml_schema.c +282 -0
  112. data/ext/nokogiri/xml_syntax_error.c +85 -0
  113. data/ext/nokogiri/xml_text.c +48 -0
  114. data/ext/nokogiri/xml_xpath_context.c +413 -0
  115. data/ext/nokogiri/xslt_stylesheet.c +363 -0
  116. data/gumbo-parser/CHANGES.md +63 -0
  117. data/gumbo-parser/Makefile +111 -0
  118. data/gumbo-parser/THANKS +27 -0
  119. data/lib/nokogiri/2.7/nokogiri.so +0 -0
  120. data/lib/nokogiri/3.0/nokogiri.so +0 -0
  121. data/lib/nokogiri/3.1/nokogiri.so +0 -0
  122. data/lib/nokogiri/3.2/nokogiri.so +0 -0
  123. data/lib/nokogiri/class_resolver.rb +67 -0
  124. data/lib/nokogiri/css/node.rb +54 -0
  125. data/lib/nokogiri/css/parser.rb +770 -0
  126. data/lib/nokogiri/css/parser.y +277 -0
  127. data/lib/nokogiri/css/parser_extras.rb +96 -0
  128. data/lib/nokogiri/css/syntax_error.rb +9 -0
  129. data/lib/nokogiri/css/tokenizer.rb +155 -0
  130. data/lib/nokogiri/css/tokenizer.rex +56 -0
  131. data/lib/nokogiri/css/xpath_visitor.rb +359 -0
  132. data/lib/nokogiri/css.rb +66 -0
  133. data/lib/nokogiri/decorators/slop.rb +44 -0
  134. data/lib/nokogiri/encoding_handler.rb +57 -0
  135. data/lib/nokogiri/extension.rb +32 -0
  136. data/lib/nokogiri/gumbo.rb +15 -0
  137. data/lib/nokogiri/html.rb +48 -0
  138. data/lib/nokogiri/html4/builder.rb +37 -0
  139. data/lib/nokogiri/html4/document.rb +214 -0
  140. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  141. data/lib/nokogiri/html4/element_description.rb +25 -0
  142. data/lib/nokogiri/html4/element_description_defaults.rb +572 -0
  143. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  144. data/lib/nokogiri/html4/entity_lookup.rb +15 -0
  145. data/lib/nokogiri/html4/sax/parser.rb +63 -0
  146. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  147. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  148. data/lib/nokogiri/html4.rb +47 -0
  149. data/lib/nokogiri/html5/document.rb +168 -0
  150. data/lib/nokogiri/html5/document_fragment.rb +90 -0
  151. data/lib/nokogiri/html5/node.rb +98 -0
  152. data/lib/nokogiri/html5.rb +389 -0
  153. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  154. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  155. data/lib/nokogiri/syntax_error.rb +6 -0
  156. data/lib/nokogiri/version/constant.rb +6 -0
  157. data/lib/nokogiri/version/info.rb +223 -0
  158. data/lib/nokogiri/version.rb +4 -0
  159. data/lib/nokogiri/xml/attr.rb +66 -0
  160. data/lib/nokogiri/xml/attribute_decl.rb +20 -0
  161. data/lib/nokogiri/xml/builder.rb +487 -0
  162. data/lib/nokogiri/xml/cdata.rb +13 -0
  163. data/lib/nokogiri/xml/character_data.rb +9 -0
  164. data/lib/nokogiri/xml/document.rb +471 -0
  165. data/lib/nokogiri/xml/document_fragment.rb +205 -0
  166. data/lib/nokogiri/xml/dtd.rb +34 -0
  167. data/lib/nokogiri/xml/element_content.rb +38 -0
  168. data/lib/nokogiri/xml/element_decl.rb +15 -0
  169. data/lib/nokogiri/xml/entity_decl.rb +21 -0
  170. data/lib/nokogiri/xml/entity_reference.rb +20 -0
  171. data/lib/nokogiri/xml/namespace.rb +58 -0
  172. data/lib/nokogiri/xml/node/save_options.rb +68 -0
  173. data/lib/nokogiri/xml/node.rb +1563 -0
  174. data/lib/nokogiri/xml/node_set.rb +446 -0
  175. data/lib/nokogiri/xml/notation.rb +19 -0
  176. data/lib/nokogiri/xml/parse_options.rb +213 -0
  177. data/lib/nokogiri/xml/pp/character_data.rb +21 -0
  178. data/lib/nokogiri/xml/pp/node.rb +57 -0
  179. data/lib/nokogiri/xml/pp.rb +4 -0
  180. data/lib/nokogiri/xml/processing_instruction.rb +11 -0
  181. data/lib/nokogiri/xml/reader.rb +105 -0
  182. data/lib/nokogiri/xml/relax_ng.rb +38 -0
  183. data/lib/nokogiri/xml/sax/document.rb +167 -0
  184. data/lib/nokogiri/xml/sax/parser.rb +125 -0
  185. data/lib/nokogiri/xml/sax/parser_context.rb +21 -0
  186. data/lib/nokogiri/xml/sax/push_parser.rb +61 -0
  187. data/lib/nokogiri/xml/sax.rb +6 -0
  188. data/lib/nokogiri/xml/schema.rb +73 -0
  189. data/lib/nokogiri/xml/searchable.rb +270 -0
  190. data/lib/nokogiri/xml/syntax_error.rb +72 -0
  191. data/lib/nokogiri/xml/text.rb +11 -0
  192. data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
  193. data/lib/nokogiri/xml/xpath.rb +21 -0
  194. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  195. data/lib/nokogiri/xml.rb +76 -0
  196. data/lib/nokogiri/xslt/stylesheet.rb +27 -0
  197. data/lib/nokogiri/xslt.rb +65 -0
  198. data/lib/nokogiri.rb +120 -0
  199. data/lib/xsd/xmlparser/nokogiri.rb +106 -0
  200. metadata +317 -0
@@ -0,0 +1,108 @@
1
+ /*
2
+ * Summary: main header file
3
+ *
4
+ * Copy: See Copyright for the status of this software.
5
+ */
6
+
7
+
8
+ #ifndef __EXSLT_H__
9
+ #define __EXSLT_H__
10
+
11
+ #include <libxml/tree.h>
12
+ #include <libxml/xpath.h>
13
+ #include "exsltexports.h"
14
+ #include <libexslt/exsltconfig.h>
15
+
16
+ #ifdef __cplusplus
17
+ extern "C" {
18
+ #endif
19
+
20
+ EXSLTPUBVAR const char *exsltLibraryVersion;
21
+ EXSLTPUBVAR const int exsltLibexsltVersion;
22
+ EXSLTPUBVAR const int exsltLibxsltVersion;
23
+ EXSLTPUBVAR const int exsltLibxmlVersion;
24
+
25
+ /**
26
+ * EXSLT_COMMON_NAMESPACE:
27
+ *
28
+ * Namespace for EXSLT common functions
29
+ */
30
+ #define EXSLT_COMMON_NAMESPACE ((const xmlChar *) "http://exslt.org/common")
31
+ /**
32
+ * EXSLT_CRYPTO_NAMESPACE:
33
+ *
34
+ * Namespace for EXSLT crypto functions
35
+ */
36
+ #define EXSLT_CRYPTO_NAMESPACE ((const xmlChar *) "http://exslt.org/crypto")
37
+ /**
38
+ * EXSLT_MATH_NAMESPACE:
39
+ *
40
+ * Namespace for EXSLT math functions
41
+ */
42
+ #define EXSLT_MATH_NAMESPACE ((const xmlChar *) "http://exslt.org/math")
43
+ /**
44
+ * EXSLT_SETS_NAMESPACE:
45
+ *
46
+ * Namespace for EXSLT set functions
47
+ */
48
+ #define EXSLT_SETS_NAMESPACE ((const xmlChar *) "http://exslt.org/sets")
49
+ /**
50
+ * EXSLT_FUNCTIONS_NAMESPACE:
51
+ *
52
+ * Namespace for EXSLT functions extension functions
53
+ */
54
+ #define EXSLT_FUNCTIONS_NAMESPACE ((const xmlChar *) "http://exslt.org/functions")
55
+ /**
56
+ * EXSLT_STRINGS_NAMESPACE:
57
+ *
58
+ * Namespace for EXSLT strings functions
59
+ */
60
+ #define EXSLT_STRINGS_NAMESPACE ((const xmlChar *) "http://exslt.org/strings")
61
+ /**
62
+ * EXSLT_DATE_NAMESPACE:
63
+ *
64
+ * Namespace for EXSLT date functions
65
+ */
66
+ #define EXSLT_DATE_NAMESPACE ((const xmlChar *) "http://exslt.org/dates-and-times")
67
+ /**
68
+ * EXSLT_DYNAMIC_NAMESPACE:
69
+ *
70
+ * Namespace for EXSLT dynamic functions
71
+ */
72
+ #define EXSLT_DYNAMIC_NAMESPACE ((const xmlChar *) "http://exslt.org/dynamic")
73
+
74
+ /**
75
+ * SAXON_NAMESPACE:
76
+ *
77
+ * Namespace for SAXON extensions functions
78
+ */
79
+ #define SAXON_NAMESPACE ((const xmlChar *) "http://icl.com/saxon")
80
+
81
+ EXSLTPUBFUN void EXSLTCALL exsltCommonRegister (void);
82
+ #ifdef EXSLT_CRYPTO_ENABLED
83
+ EXSLTPUBFUN void EXSLTCALL exsltCryptoRegister (void);
84
+ #endif
85
+ EXSLTPUBFUN void EXSLTCALL exsltMathRegister (void);
86
+ EXSLTPUBFUN void EXSLTCALL exsltSetsRegister (void);
87
+ EXSLTPUBFUN void EXSLTCALL exsltFuncRegister (void);
88
+ EXSLTPUBFUN void EXSLTCALL exsltStrRegister (void);
89
+ EXSLTPUBFUN void EXSLTCALL exsltDateRegister (void);
90
+ EXSLTPUBFUN void EXSLTCALL exsltSaxonRegister (void);
91
+ EXSLTPUBFUN void EXSLTCALL exsltDynRegister(void);
92
+
93
+ EXSLTPUBFUN void EXSLTCALL exsltRegisterAll (void);
94
+
95
+ EXSLTPUBFUN int EXSLTCALL exsltDateXpathCtxtRegister (xmlXPathContextPtr ctxt,
96
+ const xmlChar *prefix);
97
+ EXSLTPUBFUN int EXSLTCALL exsltMathXpathCtxtRegister (xmlXPathContextPtr ctxt,
98
+ const xmlChar *prefix);
99
+ EXSLTPUBFUN int EXSLTCALL exsltSetsXpathCtxtRegister (xmlXPathContextPtr ctxt,
100
+ const xmlChar *prefix);
101
+ EXSLTPUBFUN int EXSLTCALL exsltStrXpathCtxtRegister (xmlXPathContextPtr ctxt,
102
+ const xmlChar *prefix);
103
+
104
+ #ifdef __cplusplus
105
+ }
106
+ #endif
107
+ #endif /* __EXSLT_H__ */
108
+
@@ -0,0 +1,70 @@
1
+ /*
2
+ * exsltconfig.h: compile-time version information for the EXSLT library
3
+ *
4
+ * See Copyright for the status of this software.
5
+ *
6
+ * daniel@veillard.com
7
+ */
8
+
9
+ #ifndef __XML_EXSLTCONFIG_H__
10
+ #define __XML_EXSLTCONFIG_H__
11
+
12
+ #ifdef __cplusplus
13
+ extern "C" {
14
+ #endif
15
+
16
+ /**
17
+ * LIBEXSLT_DOTTED_VERSION:
18
+ *
19
+ * the version string like "1.2.3"
20
+ */
21
+ #define LIBEXSLT_DOTTED_VERSION "0.8.20"
22
+
23
+ /**
24
+ * LIBEXSLT_VERSION:
25
+ *
26
+ * the version number: 1.2.3 value is 10203
27
+ */
28
+ #define LIBEXSLT_VERSION 820
29
+
30
+ /**
31
+ * LIBEXSLT_VERSION_STRING:
32
+ *
33
+ * the version number string, 1.2.3 value is "10203"
34
+ */
35
+ #define LIBEXSLT_VERSION_STRING "820"
36
+
37
+ /**
38
+ * LIBEXSLT_VERSION_EXTRA:
39
+ *
40
+ * extra version information, used to show a Git commit description
41
+ */
42
+ #define LIBEXSLT_VERSION_EXTRA ""
43
+
44
+ /**
45
+ * WITH_CRYPTO:
46
+ *
47
+ * Whether crypto support is configured into exslt
48
+ */
49
+ #if 0
50
+ #define EXSLT_CRYPTO_ENABLED
51
+ #endif
52
+
53
+ /**
54
+ * ATTRIBUTE_UNUSED:
55
+ *
56
+ * This macro is used to flag unused function parameters to GCC
57
+ */
58
+ #ifdef __GNUC__
59
+ #ifndef ATTRIBUTE_UNUSED
60
+ #define ATTRIBUTE_UNUSED __attribute__((unused))
61
+ #endif
62
+ #else
63
+ #define ATTRIBUTE_UNUSED
64
+ #endif
65
+
66
+ #ifdef __cplusplus
67
+ }
68
+ #endif
69
+
70
+ #endif /* __XML_EXSLTCONFIG_H__ */
@@ -0,0 +1,63 @@
1
+ /*
2
+ * Summary: macros for marking symbols as exportable/importable.
3
+ *
4
+ * Copy: See Copyright for the status of this software.
5
+ */
6
+
7
+ #ifndef __EXSLT_EXPORTS_H__
8
+ #define __EXSLT_EXPORTS_H__
9
+
10
+ #if defined(_WIN32) || defined(__CYGWIN__)
11
+ /** DOC_DISABLE */
12
+
13
+ #ifdef LIBEXSLT_STATIC
14
+ #define EXSLTPUBLIC
15
+ #elif defined(IN_LIBEXSLT)
16
+ #define EXSLTPUBLIC __declspec(dllexport)
17
+ #else
18
+ #define EXSLTPUBLIC __declspec(dllimport)
19
+ #endif
20
+
21
+ #define EXSLTCALL __cdecl
22
+
23
+ /** DOC_ENABLE */
24
+ #else /* not Windows */
25
+
26
+ /**
27
+ * EXSLTPUBLIC:
28
+ *
29
+ * Macro which declares a public symbol
30
+ */
31
+ #define EXSLTPUBLIC
32
+
33
+ /**
34
+ * EXSLTCALL:
35
+ *
36
+ * Macro which declares the calling convention for exported functions
37
+ */
38
+ #define EXSLTCALL
39
+
40
+ #endif /* platform switch */
41
+
42
+ /*
43
+ * EXSLTPUBFUN:
44
+ *
45
+ * Macro which declares an exportable function
46
+ */
47
+ #define EXSLTPUBFUN EXSLTPUBLIC
48
+
49
+ /**
50
+ * EXSLTPUBVAR:
51
+ *
52
+ * Macro which declares an exportable variable
53
+ */
54
+ #define EXSLTPUBVAR EXSLTPUBLIC extern
55
+
56
+ /* Compatibility */
57
+ #if !defined(LIBEXSLT_PUBLIC)
58
+ #define LIBEXSLT_PUBLIC EXSLTPUBVAR
59
+ #endif
60
+
61
+ #endif /* __EXSLT_EXPORTS_H__ */
62
+
63
+
@@ -0,0 +1,306 @@
1
+ /*
2
+ * Summary: interface for an HTML 4.0 non-verifying parser
3
+ * Description: this module implements an HTML 4.0 non-verifying parser
4
+ * with API compatible with the XML parser ones. It should
5
+ * be able to parse "real world" HTML, even if severely
6
+ * broken from a specification point of view.
7
+ *
8
+ * Copy: See Copyright for the status of this software.
9
+ *
10
+ * Author: Daniel Veillard
11
+ */
12
+
13
+ #ifndef __HTML_PARSER_H__
14
+ #define __HTML_PARSER_H__
15
+ #include <libxml/xmlversion.h>
16
+ #include <libxml/parser.h>
17
+
18
+ #ifdef LIBXML_HTML_ENABLED
19
+
20
+ #ifdef __cplusplus
21
+ extern "C" {
22
+ #endif
23
+
24
+ /*
25
+ * Most of the back-end structures from XML and HTML are shared.
26
+ */
27
+ typedef xmlParserCtxt htmlParserCtxt;
28
+ typedef xmlParserCtxtPtr htmlParserCtxtPtr;
29
+ typedef xmlParserNodeInfo htmlParserNodeInfo;
30
+ typedef xmlSAXHandler htmlSAXHandler;
31
+ typedef xmlSAXHandlerPtr htmlSAXHandlerPtr;
32
+ typedef xmlParserInput htmlParserInput;
33
+ typedef xmlParserInputPtr htmlParserInputPtr;
34
+ typedef xmlDocPtr htmlDocPtr;
35
+ typedef xmlNodePtr htmlNodePtr;
36
+
37
+ /*
38
+ * Internal description of an HTML element, representing HTML 4.01
39
+ * and XHTML 1.0 (which share the same structure).
40
+ */
41
+ typedef struct _htmlElemDesc htmlElemDesc;
42
+ typedef htmlElemDesc *htmlElemDescPtr;
43
+ struct _htmlElemDesc {
44
+ const char *name; /* The tag name */
45
+ char startTag; /* Whether the start tag can be implied */
46
+ char endTag; /* Whether the end tag can be implied */
47
+ char saveEndTag; /* Whether the end tag should be saved */
48
+ char empty; /* Is this an empty element ? */
49
+ char depr; /* Is this a deprecated element ? */
50
+ char dtd; /* 1: only in Loose DTD, 2: only Frameset one */
51
+ char isinline; /* is this a block 0 or inline 1 element */
52
+ const char *desc; /* the description */
53
+
54
+ /* NRK Jan.2003
55
+ * New fields encapsulating HTML structure
56
+ *
57
+ * Bugs:
58
+ * This is a very limited representation. It fails to tell us when
59
+ * an element *requires* subelements (we only have whether they're
60
+ * allowed or not), and it doesn't tell us where CDATA and PCDATA
61
+ * are allowed. Some element relationships are not fully represented:
62
+ * these are flagged with the word MODIFIER
63
+ */
64
+ const char** subelts; /* allowed sub-elements of this element */
65
+ const char* defaultsubelt; /* subelement for suggested auto-repair
66
+ if necessary or NULL */
67
+ const char** attrs_opt; /* Optional Attributes */
68
+ const char** attrs_depr; /* Additional deprecated attributes */
69
+ const char** attrs_req; /* Required attributes */
70
+ };
71
+
72
+ /*
73
+ * Internal description of an HTML entity.
74
+ */
75
+ typedef struct _htmlEntityDesc htmlEntityDesc;
76
+ typedef htmlEntityDesc *htmlEntityDescPtr;
77
+ struct _htmlEntityDesc {
78
+ unsigned int value; /* the UNICODE value for the character */
79
+ const char *name; /* The entity name */
80
+ const char *desc; /* the description */
81
+ };
82
+
83
+ /*
84
+ * There is only few public functions.
85
+ */
86
+ XMLPUBFUN const htmlElemDesc * XMLCALL
87
+ htmlTagLookup (const xmlChar *tag);
88
+ XMLPUBFUN const htmlEntityDesc * XMLCALL
89
+ htmlEntityLookup(const xmlChar *name);
90
+ XMLPUBFUN const htmlEntityDesc * XMLCALL
91
+ htmlEntityValueLookup(unsigned int value);
92
+
93
+ XMLPUBFUN int XMLCALL
94
+ htmlIsAutoClosed(htmlDocPtr doc,
95
+ htmlNodePtr elem);
96
+ XMLPUBFUN int XMLCALL
97
+ htmlAutoCloseTag(htmlDocPtr doc,
98
+ const xmlChar *name,
99
+ htmlNodePtr elem);
100
+ XMLPUBFUN const htmlEntityDesc * XMLCALL
101
+ htmlParseEntityRef(htmlParserCtxtPtr ctxt,
102
+ const xmlChar **str);
103
+ XMLPUBFUN int XMLCALL
104
+ htmlParseCharRef(htmlParserCtxtPtr ctxt);
105
+ XMLPUBFUN void XMLCALL
106
+ htmlParseElement(htmlParserCtxtPtr ctxt);
107
+
108
+ XMLPUBFUN htmlParserCtxtPtr XMLCALL
109
+ htmlNewParserCtxt(void);
110
+
111
+ XMLPUBFUN htmlParserCtxtPtr XMLCALL
112
+ htmlCreateMemoryParserCtxt(const char *buffer,
113
+ int size);
114
+
115
+ XMLPUBFUN int XMLCALL
116
+ htmlParseDocument(htmlParserCtxtPtr ctxt);
117
+ XMLPUBFUN htmlDocPtr XMLCALL
118
+ htmlSAXParseDoc (const xmlChar *cur,
119
+ const char *encoding,
120
+ htmlSAXHandlerPtr sax,
121
+ void *userData);
122
+ XMLPUBFUN htmlDocPtr XMLCALL
123
+ htmlParseDoc (const xmlChar *cur,
124
+ const char *encoding);
125
+ XMLPUBFUN htmlDocPtr XMLCALL
126
+ htmlSAXParseFile(const char *filename,
127
+ const char *encoding,
128
+ htmlSAXHandlerPtr sax,
129
+ void *userData);
130
+ XMLPUBFUN htmlDocPtr XMLCALL
131
+ htmlParseFile (const char *filename,
132
+ const char *encoding);
133
+ XMLPUBFUN int XMLCALL
134
+ UTF8ToHtml (unsigned char *out,
135
+ int *outlen,
136
+ const unsigned char *in,
137
+ int *inlen);
138
+ XMLPUBFUN int XMLCALL
139
+ htmlEncodeEntities(unsigned char *out,
140
+ int *outlen,
141
+ const unsigned char *in,
142
+ int *inlen, int quoteChar);
143
+ XMLPUBFUN int XMLCALL
144
+ htmlIsScriptAttribute(const xmlChar *name);
145
+ XMLPUBFUN int XMLCALL
146
+ htmlHandleOmittedElem(int val);
147
+
148
+ #ifdef LIBXML_PUSH_ENABLED
149
+ /**
150
+ * Interfaces for the Push mode.
151
+ */
152
+ XMLPUBFUN htmlParserCtxtPtr XMLCALL
153
+ htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax,
154
+ void *user_data,
155
+ const char *chunk,
156
+ int size,
157
+ const char *filename,
158
+ xmlCharEncoding enc);
159
+ XMLPUBFUN int XMLCALL
160
+ htmlParseChunk (htmlParserCtxtPtr ctxt,
161
+ const char *chunk,
162
+ int size,
163
+ int terminate);
164
+ #endif /* LIBXML_PUSH_ENABLED */
165
+
166
+ XMLPUBFUN void XMLCALL
167
+ htmlFreeParserCtxt (htmlParserCtxtPtr ctxt);
168
+
169
+ /*
170
+ * New set of simpler/more flexible APIs
171
+ */
172
+ /**
173
+ * xmlParserOption:
174
+ *
175
+ * This is the set of XML parser options that can be passed down
176
+ * to the xmlReadDoc() and similar calls.
177
+ */
178
+ typedef enum {
179
+ HTML_PARSE_RECOVER = 1<<0, /* Relaxed parsing */
180
+ HTML_PARSE_NODEFDTD = 1<<2, /* do not default a doctype if not found */
181
+ HTML_PARSE_NOERROR = 1<<5, /* suppress error reports */
182
+ HTML_PARSE_NOWARNING= 1<<6, /* suppress warning reports */
183
+ HTML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */
184
+ HTML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */
185
+ HTML_PARSE_NONET = 1<<11,/* Forbid network access */
186
+ HTML_PARSE_NOIMPLIED= 1<<13,/* Do not add implied html/body... elements */
187
+ HTML_PARSE_COMPACT = 1<<16,/* compact small text nodes */
188
+ HTML_PARSE_IGNORE_ENC=1<<21 /* ignore internal document encoding hint */
189
+ } htmlParserOption;
190
+
191
+ XMLPUBFUN void XMLCALL
192
+ htmlCtxtReset (htmlParserCtxtPtr ctxt);
193
+ XMLPUBFUN int XMLCALL
194
+ htmlCtxtUseOptions (htmlParserCtxtPtr ctxt,
195
+ int options);
196
+ XMLPUBFUN htmlDocPtr XMLCALL
197
+ htmlReadDoc (const xmlChar *cur,
198
+ const char *URL,
199
+ const char *encoding,
200
+ int options);
201
+ XMLPUBFUN htmlDocPtr XMLCALL
202
+ htmlReadFile (const char *URL,
203
+ const char *encoding,
204
+ int options);
205
+ XMLPUBFUN htmlDocPtr XMLCALL
206
+ htmlReadMemory (const char *buffer,
207
+ int size,
208
+ const char *URL,
209
+ const char *encoding,
210
+ int options);
211
+ XMLPUBFUN htmlDocPtr XMLCALL
212
+ htmlReadFd (int fd,
213
+ const char *URL,
214
+ const char *encoding,
215
+ int options);
216
+ XMLPUBFUN htmlDocPtr XMLCALL
217
+ htmlReadIO (xmlInputReadCallback ioread,
218
+ xmlInputCloseCallback ioclose,
219
+ void *ioctx,
220
+ const char *URL,
221
+ const char *encoding,
222
+ int options);
223
+ XMLPUBFUN htmlDocPtr XMLCALL
224
+ htmlCtxtReadDoc (xmlParserCtxtPtr ctxt,
225
+ const xmlChar *cur,
226
+ const char *URL,
227
+ const char *encoding,
228
+ int options);
229
+ XMLPUBFUN htmlDocPtr XMLCALL
230
+ htmlCtxtReadFile (xmlParserCtxtPtr ctxt,
231
+ const char *filename,
232
+ const char *encoding,
233
+ int options);
234
+ XMLPUBFUN htmlDocPtr XMLCALL
235
+ htmlCtxtReadMemory (xmlParserCtxtPtr ctxt,
236
+ const char *buffer,
237
+ int size,
238
+ const char *URL,
239
+ const char *encoding,
240
+ int options);
241
+ XMLPUBFUN htmlDocPtr XMLCALL
242
+ htmlCtxtReadFd (xmlParserCtxtPtr ctxt,
243
+ int fd,
244
+ const char *URL,
245
+ const char *encoding,
246
+ int options);
247
+ XMLPUBFUN htmlDocPtr XMLCALL
248
+ htmlCtxtReadIO (xmlParserCtxtPtr ctxt,
249
+ xmlInputReadCallback ioread,
250
+ xmlInputCloseCallback ioclose,
251
+ void *ioctx,
252
+ const char *URL,
253
+ const char *encoding,
254
+ int options);
255
+
256
+ /* NRK/Jan2003: further knowledge of HTML structure
257
+ */
258
+ typedef enum {
259
+ HTML_NA = 0 , /* something we don't check at all */
260
+ HTML_INVALID = 0x1 ,
261
+ HTML_DEPRECATED = 0x2 ,
262
+ HTML_VALID = 0x4 ,
263
+ HTML_REQUIRED = 0xc /* VALID bit set so ( & HTML_VALID ) is TRUE */
264
+ } htmlStatus ;
265
+
266
+ /* Using htmlElemDesc rather than name here, to emphasise the fact
267
+ that otherwise there's a lookup overhead
268
+ */
269
+ XMLPUBFUN htmlStatus XMLCALL htmlAttrAllowed(const htmlElemDesc*, const xmlChar*, int) ;
270
+ XMLPUBFUN int XMLCALL htmlElementAllowedHere(const htmlElemDesc*, const xmlChar*) ;
271
+ XMLPUBFUN htmlStatus XMLCALL htmlElementStatusHere(const htmlElemDesc*, const htmlElemDesc*) ;
272
+ XMLPUBFUN htmlStatus XMLCALL htmlNodeStatus(const htmlNodePtr, int) ;
273
+ /**
274
+ * htmlDefaultSubelement:
275
+ * @elt: HTML element
276
+ *
277
+ * Returns the default subelement for this element
278
+ */
279
+ #define htmlDefaultSubelement(elt) elt->defaultsubelt
280
+ /**
281
+ * htmlElementAllowedHereDesc:
282
+ * @parent: HTML parent element
283
+ * @elt: HTML element
284
+ *
285
+ * Checks whether an HTML element description may be a
286
+ * direct child of the specified element.
287
+ *
288
+ * Returns 1 if allowed; 0 otherwise.
289
+ */
290
+ #define htmlElementAllowedHereDesc(parent,elt) \
291
+ htmlElementAllowedHere((parent), (elt)->name)
292
+ /**
293
+ * htmlRequiredAttrs:
294
+ * @elt: HTML element
295
+ *
296
+ * Returns the attributes required for the specified element.
297
+ */
298
+ #define htmlRequiredAttrs(elt) (elt)->attrs_req
299
+
300
+
301
+ #ifdef __cplusplus
302
+ }
303
+ #endif
304
+
305
+ #endif /* LIBXML_HTML_ENABLED */
306
+ #endif /* __HTML_PARSER_H__ */
@@ -0,0 +1,147 @@
1
+ /*
2
+ * Summary: specific APIs to process HTML tree, especially serialization
3
+ * Description: this module implements a few function needed to process
4
+ * tree in an HTML specific way.
5
+ *
6
+ * Copy: See Copyright for the status of this software.
7
+ *
8
+ * Author: Daniel Veillard
9
+ */
10
+
11
+ #ifndef __HTML_TREE_H__
12
+ #define __HTML_TREE_H__
13
+
14
+ #include <stdio.h>
15
+ #include <libxml/xmlversion.h>
16
+ #include <libxml/tree.h>
17
+ #include <libxml/HTMLparser.h>
18
+
19
+ #ifdef LIBXML_HTML_ENABLED
20
+
21
+ #ifdef __cplusplus
22
+ extern "C" {
23
+ #endif
24
+
25
+
26
+ /**
27
+ * HTML_TEXT_NODE:
28
+ *
29
+ * Macro. A text node in a HTML document is really implemented
30
+ * the same way as a text node in an XML document.
31
+ */
32
+ #define HTML_TEXT_NODE XML_TEXT_NODE
33
+ /**
34
+ * HTML_ENTITY_REF_NODE:
35
+ *
36
+ * Macro. An entity reference in a HTML document is really implemented
37
+ * the same way as an entity reference in an XML document.
38
+ */
39
+ #define HTML_ENTITY_REF_NODE XML_ENTITY_REF_NODE
40
+ /**
41
+ * HTML_COMMENT_NODE:
42
+ *
43
+ * Macro. A comment in a HTML document is really implemented
44
+ * the same way as a comment in an XML document.
45
+ */
46
+ #define HTML_COMMENT_NODE XML_COMMENT_NODE
47
+ /**
48
+ * HTML_PRESERVE_NODE:
49
+ *
50
+ * Macro. A preserved node in a HTML document is really implemented
51
+ * the same way as a CDATA section in an XML document.
52
+ */
53
+ #define HTML_PRESERVE_NODE XML_CDATA_SECTION_NODE
54
+ /**
55
+ * HTML_PI_NODE:
56
+ *
57
+ * Macro. A processing instruction in a HTML document is really implemented
58
+ * the same way as a processing instruction in an XML document.
59
+ */
60
+ #define HTML_PI_NODE XML_PI_NODE
61
+
62
+ XMLPUBFUN htmlDocPtr XMLCALL
63
+ htmlNewDoc (const xmlChar *URI,
64
+ const xmlChar *ExternalID);
65
+ XMLPUBFUN htmlDocPtr XMLCALL
66
+ htmlNewDocNoDtD (const xmlChar *URI,
67
+ const xmlChar *ExternalID);
68
+ XMLPUBFUN const xmlChar * XMLCALL
69
+ htmlGetMetaEncoding (htmlDocPtr doc);
70
+ XMLPUBFUN int XMLCALL
71
+ htmlSetMetaEncoding (htmlDocPtr doc,
72
+ const xmlChar *encoding);
73
+ #ifdef LIBXML_OUTPUT_ENABLED
74
+ XMLPUBFUN void XMLCALL
75
+ htmlDocDumpMemory (xmlDocPtr cur,
76
+ xmlChar **mem,
77
+ int *size);
78
+ XMLPUBFUN void XMLCALL
79
+ htmlDocDumpMemoryFormat (xmlDocPtr cur,
80
+ xmlChar **mem,
81
+ int *size,
82
+ int format);
83
+ XMLPUBFUN int XMLCALL
84
+ htmlDocDump (FILE *f,
85
+ xmlDocPtr cur);
86
+ XMLPUBFUN int XMLCALL
87
+ htmlSaveFile (const char *filename,
88
+ xmlDocPtr cur);
89
+ XMLPUBFUN int XMLCALL
90
+ htmlNodeDump (xmlBufferPtr buf,
91
+ xmlDocPtr doc,
92
+ xmlNodePtr cur);
93
+ XMLPUBFUN void XMLCALL
94
+ htmlNodeDumpFile (FILE *out,
95
+ xmlDocPtr doc,
96
+ xmlNodePtr cur);
97
+ XMLPUBFUN int XMLCALL
98
+ htmlNodeDumpFileFormat (FILE *out,
99
+ xmlDocPtr doc,
100
+ xmlNodePtr cur,
101
+ const char *encoding,
102
+ int format);
103
+ XMLPUBFUN int XMLCALL
104
+ htmlSaveFileEnc (const char *filename,
105
+ xmlDocPtr cur,
106
+ const char *encoding);
107
+ XMLPUBFUN int XMLCALL
108
+ htmlSaveFileFormat (const char *filename,
109
+ xmlDocPtr cur,
110
+ const char *encoding,
111
+ int format);
112
+
113
+ XMLPUBFUN void XMLCALL
114
+ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf,
115
+ xmlDocPtr doc,
116
+ xmlNodePtr cur,
117
+ const char *encoding,
118
+ int format);
119
+ XMLPUBFUN void XMLCALL
120
+ htmlDocContentDumpOutput(xmlOutputBufferPtr buf,
121
+ xmlDocPtr cur,
122
+ const char *encoding);
123
+ XMLPUBFUN void XMLCALL
124
+ htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf,
125
+ xmlDocPtr cur,
126
+ const char *encoding,
127
+ int format);
128
+ XMLPUBFUN void XMLCALL
129
+ htmlNodeDumpOutput (xmlOutputBufferPtr buf,
130
+ xmlDocPtr doc,
131
+ xmlNodePtr cur,
132
+ const char *encoding);
133
+
134
+ #endif /* LIBXML_OUTPUT_ENABLED */
135
+
136
+ XMLPUBFUN int XMLCALL
137
+ htmlIsBooleanAttr (const xmlChar *name);
138
+
139
+
140
+ #ifdef __cplusplus
141
+ }
142
+ #endif
143
+
144
+ #endif /* LIBXML_HTML_ENABLED */
145
+
146
+ #endif /* __HTML_TREE_H__ */
147
+