nokolexbor 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/ext/nokolexbor/config.h +186 -0
- data/ext/nokolexbor/extconf.rb +131 -0
- data/ext/nokolexbor/libxml/HTMLparser.h +320 -0
- data/ext/nokolexbor/libxml/SAX2.h +173 -0
- data/ext/nokolexbor/libxml/chvalid.h +230 -0
- data/ext/nokolexbor/libxml/debugXML.h +217 -0
- data/ext/nokolexbor/libxml/dict.h +81 -0
- data/ext/nokolexbor/libxml/encoding.h +232 -0
- data/ext/nokolexbor/libxml/entities.h +153 -0
- data/ext/nokolexbor/libxml/globals.h +529 -0
- data/ext/nokolexbor/libxml/hash.h +236 -0
- data/ext/nokolexbor/libxml/list.h +137 -0
- data/ext/nokolexbor/libxml/parser.h +1264 -0
- data/ext/nokolexbor/libxml/parserInternals.h +641 -0
- data/ext/nokolexbor/libxml/pattern.h +100 -0
- data/ext/nokolexbor/libxml/threads.h +94 -0
- data/ext/nokolexbor/libxml/tree.h +1315 -0
- data/ext/nokolexbor/libxml/uri.h +94 -0
- data/ext/nokolexbor/libxml/valid.h +448 -0
- data/ext/nokolexbor/libxml/xmlIO.h +369 -0
- data/ext/nokolexbor/libxml/xmlautomata.h +146 -0
- data/ext/nokolexbor/libxml/xmlerror.h +919 -0
- data/ext/nokolexbor/libxml/xmlexports.h +79 -0
- data/ext/nokolexbor/libxml/xmlmemory.h +226 -0
- data/ext/nokolexbor/libxml/xmlregexp.h +222 -0
- data/ext/nokolexbor/libxml/xmlstring.h +140 -0
- data/ext/nokolexbor/libxml/xmlversion.h +526 -0
- data/ext/nokolexbor/libxml/xpath.h +575 -0
- data/ext/nokolexbor/libxml/xpathInternals.h +632 -0
- data/ext/nokolexbor/libxml/xpointer.h +137 -0
- data/ext/nokolexbor/libxml.h +76 -0
- data/ext/nokolexbor/memory.c +39 -0
- data/ext/nokolexbor/nl_document.c +51 -0
- data/ext/nokolexbor/nl_node.c +790 -0
- data/ext/nokolexbor/nl_node_set.c +368 -0
- data/ext/nokolexbor/nl_xpath_context.c +200 -0
- data/ext/nokolexbor/nokolexbor.c +63 -0
- data/ext/nokolexbor/nokolexbor.h +37 -0
- data/ext/nokolexbor/private/buf.h +70 -0
- data/ext/nokolexbor/private/dict.h +11 -0
- data/ext/nokolexbor/private/enc.h +17 -0
- data/ext/nokolexbor/private/error.h +21 -0
- data/ext/nokolexbor/private/globals.h +9 -0
- data/ext/nokolexbor/private/memory.h +9 -0
- data/ext/nokolexbor/private/parser.h +27 -0
- data/ext/nokolexbor/private/string.h +9 -0
- data/ext/nokolexbor/private/threads.h +50 -0
- data/ext/nokolexbor/private/tree.h +18 -0
- data/ext/nokolexbor/private/xpath.h +7 -0
- data/ext/nokolexbor/timsort.h +601 -0
- data/ext/nokolexbor/xml_SAX2.c +80 -0
- data/ext/nokolexbor/xml_buf.c +363 -0
- data/ext/nokolexbor/xml_chvalid.c +334 -0
- data/ext/nokolexbor/xml_dict.c +1264 -0
- data/ext/nokolexbor/xml_encoding.c +124 -0
- data/ext/nokolexbor/xml_error.c +134 -0
- data/ext/nokolexbor/xml_globals.c +1085 -0
- data/ext/nokolexbor/xml_hash.c +1141 -0
- data/ext/nokolexbor/xml_memory.c +203 -0
- data/ext/nokolexbor/xml_parser.c +127 -0
- data/ext/nokolexbor/xml_parserInternals.c +338 -0
- data/ext/nokolexbor/xml_pattern.c +2375 -0
- data/ext/nokolexbor/xml_string.c +1051 -0
- data/ext/nokolexbor/xml_threads.c +881 -0
- data/ext/nokolexbor/xml_tree.c +148 -0
- data/ext/nokolexbor/xml_xpath.c +14743 -0
- data/lib/nokolexbor/attribute.rb +18 -0
- data/lib/nokolexbor/document.rb +6 -0
- data/lib/nokolexbor/node.rb +264 -0
- data/lib/nokolexbor/node_set.rb +124 -0
- data/lib/nokolexbor/version.rb +5 -0
- data/lib/nokolexbor/xpath_context.rb +14 -0
- data/lib/nokolexbor.rb +17 -0
- data/patches/0001-lexbor-support-text-pseudo-element.patch +137 -0
- data/patches/0002-lexbor-match-id-class-case-sensitive.patch +22 -0
- data/patches/0003-lexbor-attach-template-content-to-self.patch +13 -0
- data/vendor/lexbor/CMakeLists.txt +331 -0
- data/vendor/lexbor/config.cmake +890 -0
- data/vendor/lexbor/feature.cmake +134 -0
- data/vendor/lexbor/source/lexbor/core/array.c +208 -0
- data/vendor/lexbor/source/lexbor/core/array.h +100 -0
- data/vendor/lexbor/source/lexbor/core/array_obj.c +216 -0
- data/vendor/lexbor/source/lexbor/core/array_obj.h +134 -0
- data/vendor/lexbor/source/lexbor/core/avl.c +442 -0
- data/vendor/lexbor/source/lexbor/core/avl.h +82 -0
- data/vendor/lexbor/source/lexbor/core/base.h +86 -0
- data/vendor/lexbor/source/lexbor/core/bst.c +468 -0
- data/vendor/lexbor/source/lexbor/core/bst.h +108 -0
- data/vendor/lexbor/source/lexbor/core/bst_map.c +238 -0
- data/vendor/lexbor/source/lexbor/core/bst_map.h +87 -0
- data/vendor/lexbor/source/lexbor/core/config.cmake +12 -0
- data/vendor/lexbor/source/lexbor/core/conv.c +203 -0
- data/vendor/lexbor/source/lexbor/core/conv.h +53 -0
- data/vendor/lexbor/source/lexbor/core/core.h +35 -0
- data/vendor/lexbor/source/lexbor/core/def.h +57 -0
- data/vendor/lexbor/source/lexbor/core/diyfp.c +153 -0
- data/vendor/lexbor/source/lexbor/core/diyfp.h +258 -0
- data/vendor/lexbor/source/lexbor/core/dobject.c +187 -0
- data/vendor/lexbor/source/lexbor/core/dobject.h +92 -0
- data/vendor/lexbor/source/lexbor/core/dtoa.c +404 -0
- data/vendor/lexbor/source/lexbor/core/dtoa.h +28 -0
- data/vendor/lexbor/source/lexbor/core/fs.h +60 -0
- data/vendor/lexbor/source/lexbor/core/hash.c +476 -0
- data/vendor/lexbor/source/lexbor/core/hash.h +218 -0
- data/vendor/lexbor/source/lexbor/core/in.c +267 -0
- data/vendor/lexbor/source/lexbor/core/in.h +172 -0
- data/vendor/lexbor/source/lexbor/core/lexbor.h +35 -0
- data/vendor/lexbor/source/lexbor/core/mem.c +228 -0
- data/vendor/lexbor/source/lexbor/core/mem.h +141 -0
- data/vendor/lexbor/source/lexbor/core/mraw.c +428 -0
- data/vendor/lexbor/source/lexbor/core/mraw.h +114 -0
- data/vendor/lexbor/source/lexbor/core/perf.h +45 -0
- data/vendor/lexbor/source/lexbor/core/plog.c +73 -0
- data/vendor/lexbor/source/lexbor/core/plog.h +102 -0
- data/vendor/lexbor/source/lexbor/core/print.c +168 -0
- data/vendor/lexbor/source/lexbor/core/print.h +39 -0
- data/vendor/lexbor/source/lexbor/core/sbst.h +59 -0
- data/vendor/lexbor/source/lexbor/core/serialize.c +27 -0
- data/vendor/lexbor/source/lexbor/core/serialize.h +32 -0
- data/vendor/lexbor/source/lexbor/core/shs.c +118 -0
- data/vendor/lexbor/source/lexbor/core/shs.h +82 -0
- data/vendor/lexbor/source/lexbor/core/str.c +617 -0
- data/vendor/lexbor/source/lexbor/core/str.h +247 -0
- data/vendor/lexbor/source/lexbor/core/str_res.h +369 -0
- data/vendor/lexbor/source/lexbor/core/strtod.c +326 -0
- data/vendor/lexbor/source/lexbor/core/strtod.h +28 -0
- data/vendor/lexbor/source/lexbor/core/types.h +39 -0
- data/vendor/lexbor/source/lexbor/core/utils.c +43 -0
- data/vendor/lexbor/source/lexbor/core/utils.h +36 -0
- data/vendor/lexbor/source/lexbor/css/base.h +44 -0
- data/vendor/lexbor/source/lexbor/css/config.cmake +2 -0
- data/vendor/lexbor/source/lexbor/css/css.h +25 -0
- data/vendor/lexbor/source/lexbor/css/log.c +336 -0
- data/vendor/lexbor/source/lexbor/css/log.h +103 -0
- data/vendor/lexbor/source/lexbor/css/node.h +29 -0
- data/vendor/lexbor/source/lexbor/css/parser.c +473 -0
- data/vendor/lexbor/source/lexbor/css/parser.h +368 -0
- data/vendor/lexbor/source/lexbor/css/selectors/base.h +48 -0
- data/vendor/lexbor/source/lexbor/css/selectors/pseudo.c +91 -0
- data/vendor/lexbor/source/lexbor/css/selectors/pseudo.h +66 -0
- data/vendor/lexbor/source/lexbor/css/selectors/pseudo_const.h +109 -0
- data/vendor/lexbor/source/lexbor/css/selectors/pseudo_res.h +302 -0
- data/vendor/lexbor/source/lexbor/css/selectors/pseudo_state.c +279 -0
- data/vendor/lexbor/source/lexbor/css/selectors/pseudo_state.h +85 -0
- data/vendor/lexbor/source/lexbor/css/selectors/selector.c +927 -0
- data/vendor/lexbor/source/lexbor/css/selectors/selector.h +200 -0
- data/vendor/lexbor/source/lexbor/css/selectors/selectors.c +340 -0
- data/vendor/lexbor/source/lexbor/css/selectors/selectors.h +137 -0
- data/vendor/lexbor/source/lexbor/css/selectors/state.c +1718 -0
- data/vendor/lexbor/source/lexbor/css/selectors/state.h +79 -0
- data/vendor/lexbor/source/lexbor/css/stylesheet.h +37 -0
- data/vendor/lexbor/source/lexbor/css/syntax/anb.c +443 -0
- data/vendor/lexbor/source/lexbor/css/syntax/anb.h +45 -0
- data/vendor/lexbor/source/lexbor/css/syntax/base.h +33 -0
- data/vendor/lexbor/source/lexbor/css/syntax/parser.c +9 -0
- data/vendor/lexbor/source/lexbor/css/syntax/parser.h +25 -0
- data/vendor/lexbor/source/lexbor/css/syntax/res.h +48 -0
- data/vendor/lexbor/source/lexbor/css/syntax/state.c +2603 -0
- data/vendor/lexbor/source/lexbor/css/syntax/state.h +140 -0
- data/vendor/lexbor/source/lexbor/css/syntax/state_res.h +273 -0
- data/vendor/lexbor/source/lexbor/css/syntax/syntax.c +67 -0
- data/vendor/lexbor/source/lexbor/css/syntax/token.c +618 -0
- data/vendor/lexbor/source/lexbor/css/syntax/token.h +298 -0
- data/vendor/lexbor/source/lexbor/css/syntax/token_res.h +68 -0
- data/vendor/lexbor/source/lexbor/css/syntax/tokenizer/error.c +30 -0
- data/vendor/lexbor/source/lexbor/css/syntax/tokenizer/error.h +58 -0
- data/vendor/lexbor/source/lexbor/css/syntax/tokenizer.c +278 -0
- data/vendor/lexbor/source/lexbor/css/syntax/tokenizer.h +121 -0
- data/vendor/lexbor/source/lexbor/dom/base.h +32 -0
- data/vendor/lexbor/source/lexbor/dom/collection.c +97 -0
- data/vendor/lexbor/source/lexbor/dom/collection.h +112 -0
- data/vendor/lexbor/source/lexbor/dom/config.cmake +3 -0
- data/vendor/lexbor/source/lexbor/dom/dom.h +29 -0
- data/vendor/lexbor/source/lexbor/dom/exception.c +18 -0
- data/vendor/lexbor/source/lexbor/dom/exception.h +73 -0
- data/vendor/lexbor/source/lexbor/dom/interface.c +110 -0
- data/vendor/lexbor/source/lexbor/dom/interface.h +88 -0
- data/vendor/lexbor/source/lexbor/dom/interfaces/attr.c +445 -0
- data/vendor/lexbor/source/lexbor/dom/interfaces/attr.h +152 -0
- data/vendor/lexbor/source/lexbor/dom/interfaces/attr_const.h +62 -0
- data/vendor/lexbor/source/lexbor/dom/interfaces/attr_res.h +143 -0
- data/vendor/lexbor/source/lexbor/dom/interfaces/cdata_section.c +55 -0
- data/vendor/lexbor/source/lexbor/dom/interfaces/cdata_section.h +38 -0
- data/vendor/lexbor/source/lexbor/dom/interfaces/character_data.c +110 -0
- data/vendor/lexbor/source/lexbor/dom/interfaces/character_data.h +51 -0
- data/vendor/lexbor/source/lexbor/dom/interfaces/comment.c +64 -0
- data/vendor/lexbor/source/lexbor/dom/interfaces/comment.h +42 -0
- data/vendor/lexbor/source/lexbor/dom/interfaces/document.c +536 -0
- data/vendor/lexbor/source/lexbor/dom/interfaces/document.h +243 -0
- data/vendor/lexbor/source/lexbor/dom/interfaces/document_fragment.c +36 -0
- data/vendor/lexbor/source/lexbor/dom/interfaces/document_fragment.h +36 -0
- data/vendor/lexbor/source/lexbor/dom/interfaces/document_type.c +125 -0
- data/vendor/lexbor/source/lexbor/dom/interfaces/document_type.h +108 -0
- data/vendor/lexbor/source/lexbor/dom/interfaces/element.c +1411 -0
- data/vendor/lexbor/source/lexbor/dom/interfaces/element.h +319 -0
- data/vendor/lexbor/source/lexbor/dom/interfaces/event_target.c +32 -0
- data/vendor/lexbor/source/lexbor/dom/interfaces/event_target.h +34 -0
- data/vendor/lexbor/source/lexbor/dom/interfaces/node.c +661 -0
- data/vendor/lexbor/source/lexbor/dom/interfaces/node.h +192 -0
- data/vendor/lexbor/source/lexbor/dom/interfaces/processing_instruction.c +87 -0
- data/vendor/lexbor/source/lexbor/dom/interfaces/processing_instruction.h +66 -0
- data/vendor/lexbor/source/lexbor/dom/interfaces/shadow_root.c +36 -0
- data/vendor/lexbor/source/lexbor/dom/interfaces/shadow_root.h +44 -0
- data/vendor/lexbor/source/lexbor/dom/interfaces/text.c +63 -0
- data/vendor/lexbor/source/lexbor/dom/interfaces/text.h +42 -0
- data/vendor/lexbor/source/lexbor/encoding/base.h +218 -0
- data/vendor/lexbor/source/lexbor/encoding/big5.c +42839 -0
- data/vendor/lexbor/source/lexbor/encoding/config.cmake +12 -0
- data/vendor/lexbor/source/lexbor/encoding/const.h +65 -0
- data/vendor/lexbor/source/lexbor/encoding/decode.c +3193 -0
- data/vendor/lexbor/source/lexbor/encoding/decode.h +370 -0
- data/vendor/lexbor/source/lexbor/encoding/encode.c +1931 -0
- data/vendor/lexbor/source/lexbor/encoding/encode.h +377 -0
- data/vendor/lexbor/source/lexbor/encoding/encoding.c +252 -0
- data/vendor/lexbor/source/lexbor/encoding/encoding.h +475 -0
- data/vendor/lexbor/source/lexbor/encoding/euc_kr.c +53883 -0
- data/vendor/lexbor/source/lexbor/encoding/gb18030.c +47905 -0
- data/vendor/lexbor/source/lexbor/encoding/iso_2022_jp_katakana.c +159 -0
- data/vendor/lexbor/source/lexbor/encoding/jis0208.c +22477 -0
- data/vendor/lexbor/source/lexbor/encoding/jis0212.c +15787 -0
- data/vendor/lexbor/source/lexbor/encoding/multi.h +53 -0
- data/vendor/lexbor/source/lexbor/encoding/range.c +71 -0
- data/vendor/lexbor/source/lexbor/encoding/range.h +34 -0
- data/vendor/lexbor/source/lexbor/encoding/res.c +222 -0
- data/vendor/lexbor/source/lexbor/encoding/res.h +34 -0
- data/vendor/lexbor/source/lexbor/encoding/single.c +13748 -0
- data/vendor/lexbor/source/lexbor/encoding/single.h +116 -0
- data/vendor/lexbor/source/lexbor/html/base.h +44 -0
- data/vendor/lexbor/source/lexbor/html/config.cmake +3 -0
- data/vendor/lexbor/source/lexbor/html/encoding.c +574 -0
- data/vendor/lexbor/source/lexbor/html/encoding.h +106 -0
- data/vendor/lexbor/source/lexbor/html/html.h +107 -0
- data/vendor/lexbor/source/lexbor/html/interface.c +165 -0
- data/vendor/lexbor/source/lexbor/html/interface.h +186 -0
- data/vendor/lexbor/source/lexbor/html/interface_res.h +4449 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/anchor_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/anchor_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/area_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/area_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/audio_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/audio_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/base_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/base_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/body_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/body_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/br_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/br_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/button_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/button_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/canvas_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/canvas_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/d_list_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/d_list_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/data_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/data_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/data_list_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/data_list_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/details_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/details_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/dialog_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/dialog_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/directory_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/directory_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/div_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/div_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/document.c +444 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/document.h +256 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/element.c +64 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/element.h +54 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/embed_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/embed_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/field_set_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/field_set_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/font_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/font_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/form_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/form_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/frame_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/frame_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/frame_set_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/frame_set_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/head_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/head_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/heading_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/heading_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/hr_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/hr_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/html_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/html_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/iframe_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/iframe_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/image_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/image_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/input_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/input_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/label_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/label_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/legend_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/legend_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/li_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/li_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/link_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/link_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/map_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/map_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/marquee_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/marquee_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/media_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/media_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/menu_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/menu_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/meta_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/meta_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/meter_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/meter_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/mod_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/mod_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/o_list_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/o_list_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/object_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/object_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/opt_group_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/opt_group_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/option_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/option_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/output_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/output_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/paragraph_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/paragraph_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/param_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/param_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/picture_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/picture_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/pre_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/pre_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/progress_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/progress_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/quote_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/quote_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/script_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/script_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/select_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/select_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/slot_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/slot_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/source_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/source_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/span_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/span_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/style_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/style_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/table_caption_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/table_caption_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/table_cell_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/table_cell_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/table_col_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/table_col_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/table_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/table_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/table_row_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/table_row_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/table_section_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/table_section_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/template_element.c +46 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/template_element.h +38 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/text_area_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/text_area_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/time_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/time_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/title_element.c +133 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/title_element.h +42 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/track_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/track_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/u_list_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/u_list_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/unknown_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/unknown_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/video_element.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/video_element.h +34 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/window.c +36 -0
- data/vendor/lexbor/source/lexbor/html/interfaces/window.h +34 -0
- data/vendor/lexbor/source/lexbor/html/node.c +14 -0
- data/vendor/lexbor/source/lexbor/html/node.h +67 -0
- data/vendor/lexbor/source/lexbor/html/parser.c +469 -0
- data/vendor/lexbor/source/lexbor/html/parser.h +170 -0
- data/vendor/lexbor/source/lexbor/html/serialize.c +1510 -0
- data/vendor/lexbor/source/lexbor/html/serialize.h +93 -0
- data/vendor/lexbor/source/lexbor/html/tag.h +103 -0
- data/vendor/lexbor/source/lexbor/html/tag_res.h +2262 -0
- data/vendor/lexbor/source/lexbor/html/token.c +386 -0
- data/vendor/lexbor/source/lexbor/html/token.h +130 -0
- data/vendor/lexbor/source/lexbor/html/token_attr.c +44 -0
- data/vendor/lexbor/source/lexbor/html/token_attr.h +67 -0
- data/vendor/lexbor/source/lexbor/html/tokenizer/error.c +28 -0
- data/vendor/lexbor/source/lexbor/html/tokenizer/error.h +141 -0
- data/vendor/lexbor/source/lexbor/html/tokenizer/res.h +4956 -0
- data/vendor/lexbor/source/lexbor/html/tokenizer/state.c +2171 -0
- data/vendor/lexbor/source/lexbor/html/tokenizer/state.h +225 -0
- data/vendor/lexbor/source/lexbor/html/tokenizer/state_comment.c +489 -0
- data/vendor/lexbor/source/lexbor/html/tokenizer/state_comment.h +27 -0
- data/vendor/lexbor/source/lexbor/html/tokenizer/state_doctype.c +1654 -0
- data/vendor/lexbor/source/lexbor/html/tokenizer/state_doctype.h +27 -0
- data/vendor/lexbor/source/lexbor/html/tokenizer/state_rawtext.c +303 -0
- data/vendor/lexbor/source/lexbor/html/tokenizer/state_rawtext.h +32 -0
- data/vendor/lexbor/source/lexbor/html/tokenizer/state_rcdata.c +311 -0
- data/vendor/lexbor/source/lexbor/html/tokenizer/state_rcdata.h +32 -0
- data/vendor/lexbor/source/lexbor/html/tokenizer/state_script.c +1209 -0
- data/vendor/lexbor/source/lexbor/html/tokenizer/state_script.h +32 -0
- data/vendor/lexbor/source/lexbor/html/tokenizer.c +499 -0
- data/vendor/lexbor/source/lexbor/html/tokenizer.h +343 -0
- data/vendor/lexbor/source/lexbor/html/tree/active_formatting.c +241 -0
- data/vendor/lexbor/source/lexbor/html/tree/active_formatting.h +117 -0
- data/vendor/lexbor/source/lexbor/html/tree/error.c +26 -0
- data/vendor/lexbor/source/lexbor/html/tree/error.h +114 -0
- data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/after_after_body.c +62 -0
- data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/after_after_frameset.c +63 -0
- data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/after_body.c +82 -0
- data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/after_frameset.c +88 -0
- data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/after_head.c +222 -0
- data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/before_head.c +144 -0
- data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/before_html.c +166 -0
- data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/foreign_content.c +358 -0
- data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_body.c +1974 -0
- data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_caption.c +158 -0
- data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_cell.c +187 -0
- data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_column_group.c +194 -0
- data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_frameset.c +149 -0
- data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_head.c +374 -0
- data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_head_noscript.c +121 -0
- data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_row.c +211 -0
- data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_select.c +341 -0
- data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_select_in_table.c +115 -0
- data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_table.c +451 -0
- data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_table_body.c +208 -0
- data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_table_text.c +127 -0
- data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_template.c +189 -0
- data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/initial.c +411 -0
- data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/text.c +61 -0
- data/vendor/lexbor/source/lexbor/html/tree/insertion_mode.h +135 -0
- data/vendor/lexbor/source/lexbor/html/tree/open_elements.c +251 -0
- data/vendor/lexbor/source/lexbor/html/tree/open_elements.h +105 -0
- data/vendor/lexbor/source/lexbor/html/tree/template_insertion.c +10 -0
- data/vendor/lexbor/source/lexbor/html/tree/template_insertion.h +100 -0
- data/vendor/lexbor/source/lexbor/html/tree.c +1726 -0
- data/vendor/lexbor/source/lexbor/html/tree.h +431 -0
- data/vendor/lexbor/source/lexbor/html/tree_res.h +111 -0
- data/vendor/lexbor/source/lexbor/ns/base.h +32 -0
- data/vendor/lexbor/source/lexbor/ns/config.cmake +2 -0
- data/vendor/lexbor/source/lexbor/ns/const.h +37 -0
- data/vendor/lexbor/source/lexbor/ns/ns.c +154 -0
- data/vendor/lexbor/source/lexbor/ns/ns.h +66 -0
- data/vendor/lexbor/source/lexbor/ns/res.h +97 -0
- data/vendor/lexbor/source/lexbor/ports/posix/config.cmake +11 -0
- data/vendor/lexbor/source/lexbor/ports/posix/lexbor/core/fs.c +236 -0
- data/vendor/lexbor/source/lexbor/ports/posix/lexbor/core/memory.c +33 -0
- data/vendor/lexbor/source/lexbor/ports/posix/lexbor/core/perf.c +158 -0
- data/vendor/lexbor/source/lexbor/ports/windows_nt/config.cmake +18 -0
- data/vendor/lexbor/source/lexbor/ports/windows_nt/lexbor/core/fs.c +239 -0
- data/vendor/lexbor/source/lexbor/ports/windows_nt/lexbor/core/memory.c +33 -0
- data/vendor/lexbor/source/lexbor/ports/windows_nt/lexbor/core/perf.c +81 -0
- data/vendor/lexbor/source/lexbor/selectors/base.h +30 -0
- data/vendor/lexbor/source/lexbor/selectors/config.cmake +2 -0
- data/vendor/lexbor/source/lexbor/selectors/selectors.c +1591 -0
- data/vendor/lexbor/source/lexbor/selectors/selectors.h +71 -0
- data/vendor/lexbor/source/lexbor/tag/base.h +32 -0
- data/vendor/lexbor/source/lexbor/tag/config.cmake +2 -0
- data/vendor/lexbor/source/lexbor/tag/const.h +225 -0
- data/vendor/lexbor/source/lexbor/tag/res.h +562 -0
- data/vendor/lexbor/source/lexbor/tag/tag.c +144 -0
- data/vendor/lexbor/source/lexbor/tag/tag.h +123 -0
- data/vendor/lexbor/source/lexbor/utils/base.h +32 -0
- data/vendor/lexbor/source/lexbor/utils/config.cmake +2 -0
- data/vendor/lexbor/source/lexbor/utils/http.c +534 -0
- data/vendor/lexbor/source/lexbor/utils/http.h +90 -0
- data/vendor/lexbor/source/lexbor/utils/utils.h +15 -0
- data/vendor/lexbor/source/lexbor/utils/warc.c +817 -0
- data/vendor/lexbor/source/lexbor/utils/warc.h +126 -0
- data/vendor/lexbor/utils/lexbor/css/selectors/pseudo.py +231 -0
- data/vendor/lexbor/utils/lexbor/css/selectors/tmp/const.h +21 -0
- data/vendor/lexbor/utils/lexbor/css/selectors/tmp/res.h +26 -0
- data/vendor/lexbor/utils/lexbor/css/syntax/definitions.py +49 -0
- data/vendor/lexbor/utils/lexbor/css/syntax/token_res.py +54 -0
- data/vendor/lexbor/utils/lexbor/css/syntax/tokenizer_code_map.py +36 -0
- data/vendor/lexbor/version +1 -0
- metadata +542 -0
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* xmlmemory.c: libxml memory allocator wrapper.
|
|
3
|
+
*
|
|
4
|
+
* daniel@veillard.com
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
#define IN_LIBXML
|
|
8
|
+
#include "libxml.h"
|
|
9
|
+
|
|
10
|
+
#include <string.h>
|
|
11
|
+
#include <stdlib.h>
|
|
12
|
+
#include <ctype.h>
|
|
13
|
+
#include <time.h>
|
|
14
|
+
|
|
15
|
+
/* #define DEBUG_MEMORY */
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* MEM_LIST:
|
|
19
|
+
*
|
|
20
|
+
* keep track of all allocated blocks for error reporting
|
|
21
|
+
* Always build the memory list !
|
|
22
|
+
*/
|
|
23
|
+
#ifdef DEBUG_MEMORY_LOCATION
|
|
24
|
+
#ifndef MEM_LIST
|
|
25
|
+
#define MEM_LIST /* keep a list of all the allocated memory blocks */
|
|
26
|
+
#endif
|
|
27
|
+
#endif
|
|
28
|
+
|
|
29
|
+
#include "libxml/globals.h" /* must come before xmlmemory.h */
|
|
30
|
+
#include "libxml/xmlmemory.h"
|
|
31
|
+
#include "libxml/xmlerror.h"
|
|
32
|
+
#include "libxml/threads.h"
|
|
33
|
+
|
|
34
|
+
#include "private/memory.h"
|
|
35
|
+
#include "private/threads.h"
|
|
36
|
+
|
|
37
|
+
static unsigned long debugMemSize = 0;
|
|
38
|
+
static unsigned long debugMemBlocks = 0;
|
|
39
|
+
static unsigned long debugMaxMemSize = 0;
|
|
40
|
+
static xmlMutex xmlMemMutex;
|
|
41
|
+
|
|
42
|
+
void xmlMallocBreakpoint(void);
|
|
43
|
+
|
|
44
|
+
/************************************************************************
|
|
45
|
+
* *
|
|
46
|
+
* Macros, variables and associated types *
|
|
47
|
+
* *
|
|
48
|
+
************************************************************************/
|
|
49
|
+
|
|
50
|
+
#if !defined(LIBXML_THREAD_ENABLED) && !defined(LIBXML_THREAD_ALLOC_ENABLED)
|
|
51
|
+
#ifdef xmlMalloc
|
|
52
|
+
#undef xmlMalloc
|
|
53
|
+
#endif
|
|
54
|
+
#ifdef xmlRealloc
|
|
55
|
+
#undef xmlRealloc
|
|
56
|
+
#endif
|
|
57
|
+
#ifdef xmlMemStrdup
|
|
58
|
+
#undef xmlMemStrdup
|
|
59
|
+
#endif
|
|
60
|
+
#endif
|
|
61
|
+
|
|
62
|
+
/*
|
|
63
|
+
* Each of the blocks allocated begin with a header containing information
|
|
64
|
+
*/
|
|
65
|
+
|
|
66
|
+
#define MEMTAG 0x5aa5U
|
|
67
|
+
|
|
68
|
+
#define MALLOC_TYPE 1
|
|
69
|
+
#define REALLOC_TYPE 2
|
|
70
|
+
#define STRDUP_TYPE 3
|
|
71
|
+
#define MALLOC_ATOMIC_TYPE 4
|
|
72
|
+
#define REALLOC_ATOMIC_TYPE 5
|
|
73
|
+
|
|
74
|
+
typedef struct memnod {
|
|
75
|
+
unsigned int mh_tag;
|
|
76
|
+
unsigned int mh_type;
|
|
77
|
+
unsigned long mh_number;
|
|
78
|
+
size_t mh_size;
|
|
79
|
+
#ifdef MEM_LIST
|
|
80
|
+
struct memnod *mh_next;
|
|
81
|
+
struct memnod *mh_prev;
|
|
82
|
+
#endif
|
|
83
|
+
const char *mh_file;
|
|
84
|
+
unsigned int mh_line;
|
|
85
|
+
} MEMHDR;
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
#ifdef SUN4
|
|
89
|
+
#define ALIGN_SIZE 16
|
|
90
|
+
#else
|
|
91
|
+
#define ALIGN_SIZE sizeof(double)
|
|
92
|
+
#endif
|
|
93
|
+
#define HDR_SIZE sizeof(MEMHDR)
|
|
94
|
+
#define RESERVE_SIZE (((HDR_SIZE + (ALIGN_SIZE-1)) \
|
|
95
|
+
/ ALIGN_SIZE ) * ALIGN_SIZE)
|
|
96
|
+
|
|
97
|
+
#define MAX_SIZE_T ((size_t)-1)
|
|
98
|
+
|
|
99
|
+
#define CLIENT_2_HDR(a) ((void *) (((char *) (a)) - RESERVE_SIZE))
|
|
100
|
+
#define HDR_2_CLIENT(a) ((void *) (((char *) (a)) + RESERVE_SIZE))
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
static unsigned int block=0;
|
|
104
|
+
static unsigned int xmlMemStopAtBlock = 0;
|
|
105
|
+
static void *xmlMemTraceBlockAt = NULL;
|
|
106
|
+
#ifdef MEM_LIST
|
|
107
|
+
static MEMHDR *memlist = NULL;
|
|
108
|
+
#endif
|
|
109
|
+
|
|
110
|
+
static void debugmem_tag_error(void *addr);
|
|
111
|
+
#ifdef MEM_LIST
|
|
112
|
+
static void debugmem_list_add(MEMHDR *);
|
|
113
|
+
static void debugmem_list_delete(MEMHDR *);
|
|
114
|
+
#endif
|
|
115
|
+
#define Mem_Tag_Err(a) debugmem_tag_error(a);
|
|
116
|
+
|
|
117
|
+
#ifndef TEST_POINT
|
|
118
|
+
#define TEST_POINT
|
|
119
|
+
#endif
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* xmlInitMemory:
|
|
123
|
+
*
|
|
124
|
+
* DEPRECATED: Alias for xmlInitParser.
|
|
125
|
+
*/
|
|
126
|
+
int
|
|
127
|
+
xmlInitMemory(void) {
|
|
128
|
+
xmlInitParser();
|
|
129
|
+
return(0);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* xmlInitMemoryInternal:
|
|
134
|
+
*
|
|
135
|
+
* Initialize the memory layer.
|
|
136
|
+
*
|
|
137
|
+
* Returns 0 on success
|
|
138
|
+
*/
|
|
139
|
+
void
|
|
140
|
+
xmlInitMemoryInternal(void) {
|
|
141
|
+
char *breakpoint;
|
|
142
|
+
#ifdef DEBUG_MEMORY
|
|
143
|
+
xmlGenericError(xmlGenericErrorContext,
|
|
144
|
+
"xmlInitMemory()\n");
|
|
145
|
+
#endif
|
|
146
|
+
xmlInitMutex(&xmlMemMutex);
|
|
147
|
+
|
|
148
|
+
breakpoint = getenv("XML_MEM_BREAKPOINT");
|
|
149
|
+
if (breakpoint != NULL) {
|
|
150
|
+
sscanf(breakpoint, "%ud", &xmlMemStopAtBlock);
|
|
151
|
+
}
|
|
152
|
+
breakpoint = getenv("XML_MEM_TRACE");
|
|
153
|
+
if (breakpoint != NULL) {
|
|
154
|
+
sscanf(breakpoint, "%p", &xmlMemTraceBlockAt);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
#ifdef DEBUG_MEMORY
|
|
158
|
+
xmlGenericError(xmlGenericErrorContext,
|
|
159
|
+
"xmlInitMemory() Ok\n");
|
|
160
|
+
#endif
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* xmlMemSetup:
|
|
165
|
+
* @freeFunc: the free() function to use
|
|
166
|
+
* @mallocFunc: the malloc() function to use
|
|
167
|
+
* @reallocFunc: the realloc() function to use
|
|
168
|
+
* @strdupFunc: the strdup() function to use
|
|
169
|
+
*
|
|
170
|
+
* Override the default memory access functions with a new set
|
|
171
|
+
* This has to be called before any other libxml routines !
|
|
172
|
+
*
|
|
173
|
+
* Should this be blocked if there was already some allocations
|
|
174
|
+
* done ?
|
|
175
|
+
*
|
|
176
|
+
* Returns 0 on success
|
|
177
|
+
*/
|
|
178
|
+
int
|
|
179
|
+
xmlMemSetup(xmlFreeFunc freeFunc, xmlMallocFunc mallocFunc,
|
|
180
|
+
xmlReallocFunc reallocFunc, xmlStrdupFunc strdupFunc) {
|
|
181
|
+
#ifdef DEBUG_MEMORY
|
|
182
|
+
xmlGenericError(xmlGenericErrorContext,
|
|
183
|
+
"xmlMemSetup()\n");
|
|
184
|
+
#endif
|
|
185
|
+
if (freeFunc == NULL)
|
|
186
|
+
return(-1);
|
|
187
|
+
if (mallocFunc == NULL)
|
|
188
|
+
return(-1);
|
|
189
|
+
if (reallocFunc == NULL)
|
|
190
|
+
return(-1);
|
|
191
|
+
if (strdupFunc == NULL)
|
|
192
|
+
return(-1);
|
|
193
|
+
xmlFree = freeFunc;
|
|
194
|
+
xmlMalloc = mallocFunc;
|
|
195
|
+
xmlMallocAtomic = mallocFunc;
|
|
196
|
+
xmlRealloc = reallocFunc;
|
|
197
|
+
xmlMemStrdup = strdupFunc;
|
|
198
|
+
#ifdef DEBUG_MEMORY
|
|
199
|
+
xmlGenericError(xmlGenericErrorContext,
|
|
200
|
+
"xmlMemSetup() Ok\n");
|
|
201
|
+
#endif
|
|
202
|
+
return(0);
|
|
203
|
+
}
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* parser.c : an XML 1.0 parser, namespaces and validity support are mostly
|
|
3
|
+
* implemented on top of the SAX interfaces
|
|
4
|
+
*
|
|
5
|
+
* References:
|
|
6
|
+
* The XML specification:
|
|
7
|
+
* http://www.w3.org/TR/REC-xml
|
|
8
|
+
* Original 1.0 version:
|
|
9
|
+
* http://www.w3.org/TR/1998/REC-xml-19980210
|
|
10
|
+
* XML second edition working draft
|
|
11
|
+
* http://www.w3.org/TR/2000/WD-xml-2e-20000814
|
|
12
|
+
*
|
|
13
|
+
* Okay this is a big file, the parser core is around 7000 lines, then it
|
|
14
|
+
* is followed by the progressive parser top routines, then the various
|
|
15
|
+
* high level APIs to call the parser and a few miscellaneous functions.
|
|
16
|
+
* A number of helper functions and deprecated ones have been moved to
|
|
17
|
+
* parserInternals.c to reduce this file size.
|
|
18
|
+
* As much as possible the functions are associated with their relative
|
|
19
|
+
* production in the XML specification. A few productions defining the
|
|
20
|
+
* different ranges of character are actually implanted either in
|
|
21
|
+
* parserInternals.h or parserInternals.c
|
|
22
|
+
* The DOM tree build is realized from the default SAX callbacks in
|
|
23
|
+
* the module SAX.c.
|
|
24
|
+
* The routines doing the validation checks are in valid.c and called either
|
|
25
|
+
* from the SAX callbacks or as standalone functions using a preparsed
|
|
26
|
+
* document.
|
|
27
|
+
*
|
|
28
|
+
* See Copyright for the status of this software.
|
|
29
|
+
*
|
|
30
|
+
* daniel@veillard.com
|
|
31
|
+
*/
|
|
32
|
+
|
|
33
|
+
/* To avoid EBCDIC trouble when parsing on zOS */
|
|
34
|
+
#if defined(__MVS__)
|
|
35
|
+
#pragma convert("ISO8859-1")
|
|
36
|
+
#endif
|
|
37
|
+
|
|
38
|
+
#define IN_LIBXML
|
|
39
|
+
#include "libxml.h"
|
|
40
|
+
|
|
41
|
+
#if defined(_WIN32)
|
|
42
|
+
#define XML_DIR_SEP '\\'
|
|
43
|
+
#else
|
|
44
|
+
#define XML_DIR_SEP '/'
|
|
45
|
+
#endif
|
|
46
|
+
|
|
47
|
+
#include <stdlib.h>
|
|
48
|
+
#include <limits.h>
|
|
49
|
+
#include <string.h>
|
|
50
|
+
#include <stdarg.h>
|
|
51
|
+
#include <stddef.h>
|
|
52
|
+
#include <ctype.h>
|
|
53
|
+
#include <stdlib.h>
|
|
54
|
+
#include "libxml/xmlmemory.h"
|
|
55
|
+
#include "libxml/threads.h"
|
|
56
|
+
#include "libxml/globals.h"
|
|
57
|
+
#include "libxml/tree.h"
|
|
58
|
+
#include "libxml/parser.h"
|
|
59
|
+
#include "libxml/parserInternals.h"
|
|
60
|
+
#include "libxml/HTMLparser.h"
|
|
61
|
+
#include "libxml/valid.h"
|
|
62
|
+
#include "libxml/entities.h"
|
|
63
|
+
#include "libxml/xmlerror.h"
|
|
64
|
+
#include "libxml/encoding.h"
|
|
65
|
+
#include "libxml/xmlIO.h"
|
|
66
|
+
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
|
|
67
|
+
#include "libxml/xpath.h"
|
|
68
|
+
#endif
|
|
69
|
+
|
|
70
|
+
#include "private/threads.h"
|
|
71
|
+
#include "private/enc.h"
|
|
72
|
+
#include "private/xpath.h"
|
|
73
|
+
#include "private/dict.h"
|
|
74
|
+
#include "private/memory.h"
|
|
75
|
+
#include "private/globals.h"
|
|
76
|
+
|
|
77
|
+
/************************************************************************
|
|
78
|
+
* *
|
|
79
|
+
* Miscellaneous *
|
|
80
|
+
* *
|
|
81
|
+
************************************************************************/
|
|
82
|
+
|
|
83
|
+
static int xmlParserInitialized = 0;
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* xmlInitParser:
|
|
87
|
+
*
|
|
88
|
+
* Initialization function for the XML parser.
|
|
89
|
+
* This is not reentrant. Call once before processing in case of
|
|
90
|
+
* use in multithreaded programs.
|
|
91
|
+
*/
|
|
92
|
+
|
|
93
|
+
void
|
|
94
|
+
xmlInitParser(void) {
|
|
95
|
+
/*
|
|
96
|
+
* Note that the initialization code must not make memory allocations.
|
|
97
|
+
*/
|
|
98
|
+
if (xmlParserInitialized != 0)
|
|
99
|
+
return;
|
|
100
|
+
|
|
101
|
+
#ifdef LIBXML_THREAD_ENABLED
|
|
102
|
+
__xmlGlobalInitMutexLock();
|
|
103
|
+
if (xmlParserInitialized == 0) {
|
|
104
|
+
#endif
|
|
105
|
+
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
|
|
106
|
+
if (xmlFree == free)
|
|
107
|
+
atexit(xmlCleanupParser);
|
|
108
|
+
#endif
|
|
109
|
+
|
|
110
|
+
xmlInitThreadsInternal();
|
|
111
|
+
xmlInitGlobalsInternal();
|
|
112
|
+
xmlInitMemoryInternal();
|
|
113
|
+
__xmlInitializeDict();
|
|
114
|
+
xmlInitEncodingInternal();
|
|
115
|
+
// xmlRegisterDefaultInputCallbacks();
|
|
116
|
+
// #ifdef LIBXML_OUTPUT_ENABLED
|
|
117
|
+
// xmlRegisterDefaultOutputCallbacks();
|
|
118
|
+
// #endif /* LIBXML_OUTPUT_ENABLED */
|
|
119
|
+
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
|
|
120
|
+
xmlInitXPathInternal();
|
|
121
|
+
#endif
|
|
122
|
+
xmlParserInitialized = 1;
|
|
123
|
+
#ifdef LIBXML_THREAD_ENABLED
|
|
124
|
+
}
|
|
125
|
+
__xmlGlobalInitMutexUnlock();
|
|
126
|
+
#endif
|
|
127
|
+
}
|
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* parserInternals.c : Internal routines (and obsolete ones) needed for the
|
|
3
|
+
* XML and HTML parsers.
|
|
4
|
+
*
|
|
5
|
+
* See Copyright for the status of this software.
|
|
6
|
+
*
|
|
7
|
+
* daniel@veillard.com
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
#define IN_LIBXML
|
|
11
|
+
#include "libxml.h"
|
|
12
|
+
|
|
13
|
+
#if defined(_WIN32)
|
|
14
|
+
#define XML_DIR_SEP '\\'
|
|
15
|
+
#else
|
|
16
|
+
#define XML_DIR_SEP '/'
|
|
17
|
+
#endif
|
|
18
|
+
|
|
19
|
+
#include <string.h>
|
|
20
|
+
#include <ctype.h>
|
|
21
|
+
#include <stdlib.h>
|
|
22
|
+
|
|
23
|
+
#include "libxml/xmlmemory.h"
|
|
24
|
+
#include "libxml/parser.h"
|
|
25
|
+
#include "libxml/parserInternals.h"
|
|
26
|
+
#include "libxml/globals.h"
|
|
27
|
+
#include "libxml/chvalid.h"
|
|
28
|
+
|
|
29
|
+
#define CUR(ctxt) ctxt->input->cur
|
|
30
|
+
#define END(ctxt) ctxt->input->end
|
|
31
|
+
#define VALID_CTXT(ctxt) (CUR(ctxt) <= END(ctxt))
|
|
32
|
+
|
|
33
|
+
#include "private/buf.h"
|
|
34
|
+
#include "private/error.h"
|
|
35
|
+
#include "private/parser.h"
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* xmlErrMemory:
|
|
39
|
+
* @ctxt: an XML parser context
|
|
40
|
+
* @extra: extra information
|
|
41
|
+
*
|
|
42
|
+
* Handle a redefinition of attribute error
|
|
43
|
+
*/
|
|
44
|
+
void
|
|
45
|
+
xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
|
|
46
|
+
{
|
|
47
|
+
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
|
|
48
|
+
(ctxt->instate == XML_PARSER_EOF))
|
|
49
|
+
return;
|
|
50
|
+
if (ctxt != NULL) {
|
|
51
|
+
ctxt->errNo = XML_ERR_NO_MEMORY;
|
|
52
|
+
ctxt->instate = XML_PARSER_EOF;
|
|
53
|
+
ctxt->disableSAX = 1;
|
|
54
|
+
}
|
|
55
|
+
if (extra)
|
|
56
|
+
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
|
|
57
|
+
XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
|
|
58
|
+
NULL, NULL, 0, 0,
|
|
59
|
+
"Memory allocation failed : %s\n", extra);
|
|
60
|
+
else
|
|
61
|
+
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
|
|
62
|
+
XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL,
|
|
63
|
+
NULL, NULL, 0, 0, "Memory allocation failed\n");
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* __xmlErrEncoding:
|
|
68
|
+
* @ctxt: an XML parser context
|
|
69
|
+
* @xmlerr: the error number
|
|
70
|
+
* @msg: the error message
|
|
71
|
+
* @str1: an string info
|
|
72
|
+
* @str2: an string info
|
|
73
|
+
*
|
|
74
|
+
* Handle an encoding error
|
|
75
|
+
*/
|
|
76
|
+
void
|
|
77
|
+
__xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr,
|
|
78
|
+
const char *msg, const xmlChar * str1, const xmlChar * str2)
|
|
79
|
+
{
|
|
80
|
+
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
|
|
81
|
+
(ctxt->instate == XML_PARSER_EOF))
|
|
82
|
+
return;
|
|
83
|
+
if (ctxt != NULL)
|
|
84
|
+
ctxt->errNo = xmlerr;
|
|
85
|
+
__xmlRaiseError(NULL, NULL, NULL,
|
|
86
|
+
ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL,
|
|
87
|
+
NULL, 0, (const char *) str1, (const char *) str2,
|
|
88
|
+
NULL, 0, 0, msg, str1, str2);
|
|
89
|
+
if (ctxt != NULL) {
|
|
90
|
+
ctxt->wellFormed = 0;
|
|
91
|
+
if (ctxt->recovery == 0)
|
|
92
|
+
ctxt->disableSAX = 1;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* xmlErrInternal:
|
|
98
|
+
* @ctxt: an XML parser context
|
|
99
|
+
* @msg: the error message
|
|
100
|
+
* @str: error information
|
|
101
|
+
*
|
|
102
|
+
* Handle an internal error
|
|
103
|
+
*/
|
|
104
|
+
static void LIBXML_ATTR_FORMAT(2,0)
|
|
105
|
+
xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str)
|
|
106
|
+
{
|
|
107
|
+
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
|
|
108
|
+
(ctxt->instate == XML_PARSER_EOF))
|
|
109
|
+
return;
|
|
110
|
+
if (ctxt != NULL)
|
|
111
|
+
ctxt->errNo = XML_ERR_INTERNAL_ERROR;
|
|
112
|
+
__xmlRaiseError(NULL, NULL, NULL,
|
|
113
|
+
ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR,
|
|
114
|
+
XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL,
|
|
115
|
+
0, 0, msg, str);
|
|
116
|
+
if (ctxt != NULL) {
|
|
117
|
+
ctxt->wellFormed = 0;
|
|
118
|
+
if (ctxt->recovery == 0)
|
|
119
|
+
ctxt->disableSAX = 1;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* xmlErrEncodingInt:
|
|
125
|
+
* @ctxt: an XML parser context
|
|
126
|
+
* @error: the error number
|
|
127
|
+
* @msg: the error message
|
|
128
|
+
* @val: an integer value
|
|
129
|
+
*
|
|
130
|
+
* n encoding error
|
|
131
|
+
*/
|
|
132
|
+
static void LIBXML_ATTR_FORMAT(3,0)
|
|
133
|
+
xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
|
|
134
|
+
const char *msg, int val)
|
|
135
|
+
{
|
|
136
|
+
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
|
|
137
|
+
(ctxt->instate == XML_PARSER_EOF))
|
|
138
|
+
return;
|
|
139
|
+
if (ctxt != NULL)
|
|
140
|
+
ctxt->errNo = error;
|
|
141
|
+
__xmlRaiseError(NULL, NULL, NULL,
|
|
142
|
+
ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
|
|
143
|
+
NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
|
|
144
|
+
if (ctxt != NULL) {
|
|
145
|
+
ctxt->wellFormed = 0;
|
|
146
|
+
if (ctxt->recovery == 0)
|
|
147
|
+
ctxt->disableSAX = 1;
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* xmlIsLetter:
|
|
153
|
+
* @c: an unicode character (int)
|
|
154
|
+
*
|
|
155
|
+
* Check whether the character is allowed by the production
|
|
156
|
+
* [84] Letter ::= BaseChar | Ideographic
|
|
157
|
+
*
|
|
158
|
+
* Returns 0 if not, non-zero otherwise
|
|
159
|
+
*/
|
|
160
|
+
int
|
|
161
|
+
xmlIsLetter(int c) {
|
|
162
|
+
return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* xmlCopyCharMultiByte:
|
|
167
|
+
* @out: pointer to an array of xmlChar
|
|
168
|
+
* @val: the char value
|
|
169
|
+
*
|
|
170
|
+
* append the char value in the array
|
|
171
|
+
*
|
|
172
|
+
* Returns the number of xmlChar written
|
|
173
|
+
*/
|
|
174
|
+
int
|
|
175
|
+
xmlCopyCharMultiByte(xmlChar *out, int val) {
|
|
176
|
+
if ((out == NULL) || (val < 0)) return(0);
|
|
177
|
+
/*
|
|
178
|
+
* We are supposed to handle UTF8, check it's valid
|
|
179
|
+
* From rfc2044: encoding of the Unicode values on UTF-8:
|
|
180
|
+
*
|
|
181
|
+
* UCS-4 range (hex.) UTF-8 octet sequence (binary)
|
|
182
|
+
* 0000 0000-0000 007F 0xxxxxxx
|
|
183
|
+
* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
|
|
184
|
+
* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
|
|
185
|
+
*/
|
|
186
|
+
if (val >= 0x80) {
|
|
187
|
+
xmlChar *savedout = out;
|
|
188
|
+
int bits;
|
|
189
|
+
if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
|
|
190
|
+
else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
|
|
191
|
+
else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
|
|
192
|
+
else {
|
|
193
|
+
xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR,
|
|
194
|
+
"Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
|
|
195
|
+
val);
|
|
196
|
+
return(0);
|
|
197
|
+
}
|
|
198
|
+
for ( ; bits >= 0; bits-= 6)
|
|
199
|
+
*out++= ((val >> bits) & 0x3F) | 0x80 ;
|
|
200
|
+
return (out - savedout);
|
|
201
|
+
}
|
|
202
|
+
*out = val;
|
|
203
|
+
return 1;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
/**
|
|
207
|
+
* xmlCopyChar:
|
|
208
|
+
* @len: Ignored, compatibility
|
|
209
|
+
* @out: pointer to an array of xmlChar
|
|
210
|
+
* @val: the char value
|
|
211
|
+
*
|
|
212
|
+
* append the char value in the array
|
|
213
|
+
*
|
|
214
|
+
* Returns the number of xmlChar written
|
|
215
|
+
*/
|
|
216
|
+
|
|
217
|
+
int
|
|
218
|
+
xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
|
|
219
|
+
if ((out == NULL) || (val < 0)) return(0);
|
|
220
|
+
/* the len parameter is ignored */
|
|
221
|
+
if (val >= 0x80) {
|
|
222
|
+
return(xmlCopyCharMultiByte (out, val));
|
|
223
|
+
}
|
|
224
|
+
*out = val;
|
|
225
|
+
return 1;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
/**
|
|
229
|
+
* xmlStringCurrentChar:
|
|
230
|
+
* @ctxt: the XML parser context
|
|
231
|
+
* @cur: pointer to the beginning of the char
|
|
232
|
+
* @len: pointer to the length of the char read
|
|
233
|
+
*
|
|
234
|
+
* The current char value, if using UTF-8 this may actually span multiple
|
|
235
|
+
* bytes in the input buffer.
|
|
236
|
+
*
|
|
237
|
+
* Returns the current char value and its length
|
|
238
|
+
*/
|
|
239
|
+
|
|
240
|
+
int
|
|
241
|
+
xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
|
|
242
|
+
{
|
|
243
|
+
if ((len == NULL) || (cur == NULL)) return(0);
|
|
244
|
+
if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
|
|
245
|
+
/*
|
|
246
|
+
* We are supposed to handle UTF8, check it's valid
|
|
247
|
+
* From rfc2044: encoding of the Unicode values on UTF-8:
|
|
248
|
+
*
|
|
249
|
+
* UCS-4 range (hex.) UTF-8 octet sequence (binary)
|
|
250
|
+
* 0000 0000-0000 007F 0xxxxxxx
|
|
251
|
+
* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
|
|
252
|
+
* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
|
|
253
|
+
*
|
|
254
|
+
* Check for the 0x110000 limit too
|
|
255
|
+
*/
|
|
256
|
+
unsigned char c;
|
|
257
|
+
unsigned int val;
|
|
258
|
+
|
|
259
|
+
c = *cur;
|
|
260
|
+
if (c & 0x80) {
|
|
261
|
+
if ((cur[1] & 0xc0) != 0x80)
|
|
262
|
+
goto encoding_error;
|
|
263
|
+
if ((c & 0xe0) == 0xe0) {
|
|
264
|
+
|
|
265
|
+
if ((cur[2] & 0xc0) != 0x80)
|
|
266
|
+
goto encoding_error;
|
|
267
|
+
if ((c & 0xf0) == 0xf0) {
|
|
268
|
+
if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
|
|
269
|
+
goto encoding_error;
|
|
270
|
+
/* 4-byte code */
|
|
271
|
+
*len = 4;
|
|
272
|
+
val = (cur[0] & 0x7) << 18;
|
|
273
|
+
val |= (cur[1] & 0x3f) << 12;
|
|
274
|
+
val |= (cur[2] & 0x3f) << 6;
|
|
275
|
+
val |= cur[3] & 0x3f;
|
|
276
|
+
} else {
|
|
277
|
+
/* 3-byte code */
|
|
278
|
+
*len = 3;
|
|
279
|
+
val = (cur[0] & 0xf) << 12;
|
|
280
|
+
val |= (cur[1] & 0x3f) << 6;
|
|
281
|
+
val |= cur[2] & 0x3f;
|
|
282
|
+
}
|
|
283
|
+
} else {
|
|
284
|
+
/* 2-byte code */
|
|
285
|
+
*len = 2;
|
|
286
|
+
val = (cur[0] & 0x1f) << 6;
|
|
287
|
+
val |= cur[1] & 0x3f;
|
|
288
|
+
}
|
|
289
|
+
if (!IS_CHAR(val)) {
|
|
290
|
+
xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
|
|
291
|
+
"Char 0x%X out of allowed range\n", val);
|
|
292
|
+
}
|
|
293
|
+
return (val);
|
|
294
|
+
} else {
|
|
295
|
+
/* 1-byte code */
|
|
296
|
+
*len = 1;
|
|
297
|
+
return (*cur);
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
/*
|
|
301
|
+
* Assume it's a fixed length encoding (1) with
|
|
302
|
+
* a compatible encoding for the ASCII set, since
|
|
303
|
+
* XML constructs only use < 128 chars
|
|
304
|
+
*/
|
|
305
|
+
*len = 1;
|
|
306
|
+
return (*cur);
|
|
307
|
+
encoding_error:
|
|
308
|
+
|
|
309
|
+
/*
|
|
310
|
+
* An encoding problem may arise from a truncated input buffer
|
|
311
|
+
* splitting a character in the middle. In that case do not raise
|
|
312
|
+
* an error but return 0 to indicate an end of stream problem
|
|
313
|
+
*/
|
|
314
|
+
if ((ctxt == NULL) || (ctxt->input == NULL) ||
|
|
315
|
+
(ctxt->input->end - ctxt->input->cur < 4)) {
|
|
316
|
+
*len = 0;
|
|
317
|
+
return(0);
|
|
318
|
+
}
|
|
319
|
+
/*
|
|
320
|
+
* If we detect an UTF8 error that probably mean that the
|
|
321
|
+
* input encoding didn't get properly advertised in the
|
|
322
|
+
* declaration header. Report the error and switch the encoding
|
|
323
|
+
* to ISO-Latin-1 (if you don't like this policy, just declare the
|
|
324
|
+
* encoding !)
|
|
325
|
+
*/
|
|
326
|
+
{
|
|
327
|
+
char buffer[150];
|
|
328
|
+
|
|
329
|
+
snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
|
|
330
|
+
ctxt->input->cur[0], ctxt->input->cur[1],
|
|
331
|
+
ctxt->input->cur[2], ctxt->input->cur[3]);
|
|
332
|
+
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
|
|
333
|
+
"Input is not proper UTF-8, indicate encoding !\n%s",
|
|
334
|
+
BAD_CAST buffer, NULL);
|
|
335
|
+
}
|
|
336
|
+
*len = 1;
|
|
337
|
+
return (*cur);
|
|
338
|
+
}
|