makiri 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/release.yml +11 -5
- data/CHANGELOG.md +65 -1
- data/ext/makiri/glue/glue.h +6 -0
- data/ext/makiri/glue/ruby_doc.c +56 -0
- data/ext/makiri/glue/ruby_mutate.c +37 -0
- data/ext/makiri/glue/ruby_node.c +197 -4
- data/ext/makiri/glue/ruby_xpath.c +1 -1
- data/lib/makiri/version.rb +1 -1
- metadata +1 -557
- data/vendor/lexbor/.github/FUNDING.yml +0 -12
- data/vendor/lexbor/.github/workflows/cmake.yml +0 -37
- data/vendor/lexbor/benchmarks/CMakeLists.txt +0 -22
- data/vendor/lexbor/benchmarks/benchmark.h +0 -101
- data/vendor/lexbor/benchmarks/lexbor/html/CMakeLists.txt +0 -16
- data/vendor/lexbor/benchmarks/lexbor/html/tokenizer/input_validation.c +0 -100
- data/vendor/lexbor/benchmarks/lexbor/html/tokenizer/parse.c +0 -95
- data/vendor/lexbor/benchmarks/lexbor/selectors/CMakeLists.txt +0 -16
- data/vendor/lexbor/benchmarks/lexbor/selectors/files/average.html +0 -41
- data/vendor/lexbor/benchmarks/lexbor/selectors/selectors.c +0 -144
- data/vendor/lexbor/examples/CMakeLists.txt +0 -17
- data/vendor/lexbor/examples/lexbor/css/CMakeLists.txt +0 -25
- data/vendor/lexbor/examples/lexbor/css/StyleSheet.c +0 -70
- data/vendor/lexbor/examples/lexbor/css/base.h +0 -34
- data/vendor/lexbor/examples/lexbor/css/selectors/list_easy_way.c +0 -74
- data/vendor/lexbor/examples/lexbor/css/selectors/list_fast_way.c +0 -149
- data/vendor/lexbor/examples/lexbor/css/syntax/structure_parse_file.c +0 -467
- data/vendor/lexbor/examples/lexbor/css/syntax/tokenizer/from_file.c +0 -87
- data/vendor/lexbor/examples/lexbor/css/syntax/tokenizer/print_raw.c +0 -100
- data/vendor/lexbor/examples/lexbor/encoding/CMakeLists.txt +0 -11
- data/vendor/lexbor/examples/lexbor/encoding/buffer/decode/decode.c +0 -58
- data/vendor/lexbor/examples/lexbor/encoding/buffer/decode/decoder.c +0 -140
- data/vendor/lexbor/examples/lexbor/encoding/buffer/decode/validate.c +0 -65
- data/vendor/lexbor/examples/lexbor/encoding/buffer/encode/encode.c +0 -67
- data/vendor/lexbor/examples/lexbor/encoding/buffer/encode/encoder.c +0 -262
- data/vendor/lexbor/examples/lexbor/encoding/buffer/encode/validate.c +0 -77
- data/vendor/lexbor/examples/lexbor/encoding/buffer/from_to.c +0 -193
- data/vendor/lexbor/examples/lexbor/encoding/data_by_name.c +0 -23
- data/vendor/lexbor/examples/lexbor/encoding/single/decode/decode.c +0 -55
- data/vendor/lexbor/examples/lexbor/encoding/single/decode/decoder.c +0 -115
- data/vendor/lexbor/examples/lexbor/encoding/single/decode/validate.c +0 -59
- data/vendor/lexbor/examples/lexbor/encoding/single/encode/encode.c +0 -65
- data/vendor/lexbor/examples/lexbor/encoding/single/encode/encoder.c +0 -241
- data/vendor/lexbor/examples/lexbor/encoding/single/encode/validate.c +0 -85
- data/vendor/lexbor/examples/lexbor/encoding/single/from_to.c +0 -156
- data/vendor/lexbor/examples/lexbor/html/CMakeLists.txt +0 -21
- data/vendor/lexbor/examples/lexbor/html/base.h +0 -98
- data/vendor/lexbor/examples/lexbor/html/document_parse.c +0 -43
- data/vendor/lexbor/examples/lexbor/html/document_parse_chunk.c +0 -72
- data/vendor/lexbor/examples/lexbor/html/document_title.c +0 -84
- data/vendor/lexbor/examples/lexbor/html/element_attributes.c +0 -134
- data/vendor/lexbor/examples/lexbor/html/element_create.c +0 -84
- data/vendor/lexbor/examples/lexbor/html/element_innerHTML.c +0 -52
- data/vendor/lexbor/examples/lexbor/html/elements_by_attr.c +0 -106
- data/vendor/lexbor/examples/lexbor/html/elements_by_class_name.c +0 -55
- data/vendor/lexbor/examples/lexbor/html/elements_by_tag_name.c +0 -51
- data/vendor/lexbor/examples/lexbor/html/encoding.c +0 -95
- data/vendor/lexbor/examples/lexbor/html/html2sexpr.c +0 -231
- data/vendor/lexbor/examples/lexbor/html/parse.c +0 -69
- data/vendor/lexbor/examples/lexbor/html/parse_chunk.c +0 -77
- data/vendor/lexbor/examples/lexbor/html/tokenizer/callback.c +0 -78
- data/vendor/lexbor/examples/lexbor/html/tokenizer/simple.c +0 -118
- data/vendor/lexbor/examples/lexbor/html/tokenizer/tag_attributes.c +0 -106
- data/vendor/lexbor/examples/lexbor/html/tokenizer/text.c +0 -75
- data/vendor/lexbor/examples/lexbor/punycode/CMakeLists.txt +0 -11
- data/vendor/lexbor/examples/lexbor/punycode/decode.c +0 -102
- data/vendor/lexbor/examples/lexbor/punycode/encode.c +0 -102
- data/vendor/lexbor/examples/lexbor/selectors/CMakeLists.txt +0 -15
- data/vendor/lexbor/examples/lexbor/selectors/easy_way.c +0 -120
- data/vendor/lexbor/examples/lexbor/selectors/normal_way.c +0 -172
- data/vendor/lexbor/examples/lexbor/selectors/unique_nodes.c +0 -142
- data/vendor/lexbor/examples/lexbor/styles/CMakeLists.txt +0 -15
- data/vendor/lexbor/examples/lexbor/styles/attribute_style.c +0 -110
- data/vendor/lexbor/examples/lexbor/styles/base.h +0 -34
- data/vendor/lexbor/examples/lexbor/styles/events_insert.c +0 -199
- data/vendor/lexbor/examples/lexbor/styles/stylesheet.c +0 -141
- data/vendor/lexbor/examples/lexbor/styles/walk.c +0 -170
- data/vendor/lexbor/examples/lexbor/unicode/CMakeLists.txt +0 -17
- data/vendor/lexbor/examples/lexbor/unicode/idna_to_ascii.c +0 -115
- data/vendor/lexbor/examples/lexbor/unicode/normalization_form.c +0 -99
- data/vendor/lexbor/examples/lexbor/unicode/normalization_form_stdin.c +0 -99
- data/vendor/lexbor/examples/lexbor/url/CMakeLists.txt +0 -15
- data/vendor/lexbor/examples/lexbor/url/parse.c +0 -101
- data/vendor/lexbor/examples/lexbor/url/relative.c +0 -112
- data/vendor/lexbor/images/SerpApi-logo.png +0 -0
- data/vendor/lexbor/images/neural-logo.png +0 -0
- data/vendor/lexbor/packaging/Makefile +0 -26
- data/vendor/lexbor/packaging/README.md +0 -17
- data/vendor/lexbor/packaging/deb/Makefile.in +0 -40
- data/vendor/lexbor/packaging/deb/Makefile.module.in +0 -15
- data/vendor/lexbor/packaging/deb/debian_in/changelog +0 -6
- data/vendor/lexbor/packaging/deb/debian_in/control +0 -25
- data/vendor/lexbor/packaging/deb/debian_in/copyright +0 -29
- data/vendor/lexbor/packaging/deb/debian_in/dev.dirs +0 -2
- data/vendor/lexbor/packaging/deb/debian_in/dev.install +0 -3
- data/vendor/lexbor/packaging/deb/debian_in/dirs +0 -1
- data/vendor/lexbor/packaging/deb/debian_in/docs +0 -2
- data/vendor/lexbor/packaging/deb/debian_in/install +0 -1
- data/vendor/lexbor/packaging/deb/debian_in/not-installed +0 -4
- data/vendor/lexbor/packaging/deb/debian_in/rules +0 -15
- data/vendor/lexbor/packaging/deb/debian_in/source/format +0 -1
- data/vendor/lexbor/packaging/deb/debian_main_in/changelog +0 -6
- data/vendor/lexbor/packaging/deb/debian_main_in/control +0 -33
- data/vendor/lexbor/packaging/deb/debian_main_in/copyright +0 -29
- data/vendor/lexbor/packaging/deb/debian_main_in/dev.dirs +0 -3
- data/vendor/lexbor/packaging/deb/debian_main_in/dev.install +0 -5
- data/vendor/lexbor/packaging/deb/debian_main_in/dirs +0 -1
- data/vendor/lexbor/packaging/deb/debian_main_in/docs +0 -2
- data/vendor/lexbor/packaging/deb/debian_main_in/install +0 -1
- data/vendor/lexbor/packaging/deb/debian_main_in/rules +0 -15
- data/vendor/lexbor/packaging/deb/debian_main_in/source/format +0 -1
- data/vendor/lexbor/packaging/rpm/Makefile +0 -14
- data/vendor/lexbor/packaging/rpm/build.sh +0 -105
- data/vendor/lexbor/packaging/rpm/liblexbor-module.spec.in +0 -31
- data/vendor/lexbor/packaging/rpm/liblexbor.spec.in +0 -62
- data/vendor/lexbor/test/CMakeLists.txt +0 -44
- data/vendor/lexbor/test/amalgamation/code/_base.h +0 -33
- data/vendor/lexbor/test/amalgamation/code/html.c +0 -35
- data/vendor/lexbor/test/amalgamation/generate_and_compile.sh +0 -130
- data/vendor/lexbor/test/external/commoncrawl.py +0 -110
- data/vendor/lexbor/test/files/lexbor/css/declarations/display.ton +0 -801
- data/vendor/lexbor/test/files/lexbor/css/declarations/height.ton +0 -367
- data/vendor/lexbor/test/files/lexbor/css/declarations/syntax.ton +0 -189
- data/vendor/lexbor/test/files/lexbor/css/declarations/width.ton +0 -367
- data/vendor/lexbor/test/files/lexbor/css/lexbor.css +0 -205
- data/vendor/lexbor/test/files/lexbor/css/syntax/parser/at.ton +0 -518
- data/vendor/lexbor/test/files/lexbor/css/syntax/parser/other.ton +0 -80
- data/vendor/lexbor/test/files/lexbor/css/syntax/parser/qualified.ton +0 -799
- data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/CDO-CDC.ton +0 -226
- data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/at.ton +0 -170
- data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/broken-utf-8.ton +0 -101
- data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/comment.ton +0 -95
- data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/hash.ton +0 -181
- data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/ident.ton +0 -245
- data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/number.ton +0 -694
- data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/other.ton +0 -16
- data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/reverse-solidus.ton +0 -111
- data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/single-tokens.ton +0 -66
- data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/string.ton +0 -303
- data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/unicode_range.ton +0 -139
- data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/url-function.ton +0 -229
- data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/whitespace.ton +0 -45
- data/vendor/lexbor/test/files/lexbor/encoding/big5_map_decode.txt +0 -14699
- data/vendor/lexbor/test/files/lexbor/encoding/euc_jp_map_decode.txt +0 -7737
- data/vendor/lexbor/test/files/lexbor/encoding/euc_kr_map_decode.txt +0 -17189
- data/vendor/lexbor/test/files/lexbor/encoding/gb18030_map_decode.txt +0 -27672
- data/vendor/lexbor/test/files/lexbor/encoding/iso_2022_jp_map_decode.txt +0 -7928
- data/vendor/lexbor/test/files/lexbor/encoding/shift_jis_map_decode.txt +0 -5138
- data/vendor/lexbor/test/files/lexbor/html/html5_test/README.md +0 -12
- data/vendor/lexbor/test/files/lexbor/html/html5_test/adoption01.ton +0 -442
- data/vendor/lexbor/test/files/lexbor/html/html5_test/adoption02.ton +0 -53
- data/vendor/lexbor/test/files/lexbor/html/html5_test/attributes.ton +0 -29
- data/vendor/lexbor/test/files/lexbor/html/html5_test/blocks.ton +0 -891
- data/vendor/lexbor/test/files/lexbor/html/html5_test/char_ref.ton +0 -51
- data/vendor/lexbor/test/files/lexbor/html/html5_test/comments01.ton +0 -290
- data/vendor/lexbor/test/files/lexbor/html/html5_test/doctype01.ton +0 -637
- data/vendor/lexbor/test/files/lexbor/html/html5_test/domjs-unsafe.ton +0 -822
- data/vendor/lexbor/test/files/lexbor/html/html5_test/entities01.ton +0 -1262
- data/vendor/lexbor/test/files/lexbor/html/html5_test/entities02.ton +0 -416
- data/vendor/lexbor/test/files/lexbor/html/html5_test/foreign-fragment.ton +0 -859
- data/vendor/lexbor/test/files/lexbor/html/html5_test/html5test-com.ton +0 -414
- data/vendor/lexbor/test/files/lexbor/html/html5_test/inbody01.ton +0 -78
- data/vendor/lexbor/test/files/lexbor/html/html5_test/isindex.ton +0 -67
- data/vendor/lexbor/test/files/lexbor/html/html5_test/main-element.ton +0 -63
- data/vendor/lexbor/test/files/lexbor/html/html5_test/math.ton +0 -140
- data/vendor/lexbor/test/files/lexbor/html/html5_test/menuitem-element.ton +0 -345
- data/vendor/lexbor/test/files/lexbor/html/html5_test/namespace-sensitivity.ton +0 -31
- data/vendor/lexbor/test/files/lexbor/html/html5_test/noscript01.ton +0 -344
- data/vendor/lexbor/test/files/lexbor/html/html5_test/pending-spec-changes-plain-text-unsafe.ton +0 -39
- data/vendor/lexbor/test/files/lexbor/html/html5_test/pending-spec-changes.ton +0 -65
- data/vendor/lexbor/test/files/lexbor/html/html5_test/plain-text-unsafe.ton +0 -657
- data/vendor/lexbor/test/files/lexbor/html/html5_test/quirks01.ton +0 -77
- data/vendor/lexbor/test/files/lexbor/html/html5_test/ruby.ton +0 -411
- data/vendor/lexbor/test/files/lexbor/html/html5_test/scriptdata01.ton +0 -499
- data/vendor/lexbor/test/files/lexbor/html/html5_test/search-element.ton +0 -63
- data/vendor/lexbor/test/files/lexbor/html/html5_test/svg.ton +0 -140
- data/vendor/lexbor/test/files/lexbor/html/html5_test/tables01.ton +0 -421
- data/vendor/lexbor/test/files/lexbor/html/html5_test/template.ton +0 -2199
- data/vendor/lexbor/test/files/lexbor/html/html5_test/tests1.ton +0 -2486
- data/vendor/lexbor/test/files/lexbor/html/html5_test/tests10.ton +0 -1090
- data/vendor/lexbor/test/files/lexbor/html/html5_test/tests11.ton +0 -317
- data/vendor/lexbor/test/files/lexbor/html/html5_test/tests12.ton +0 -72
- data/vendor/lexbor/test/files/lexbor/html/html5_test/tests14.ton +0 -100
- data/vendor/lexbor/test/files/lexbor/html/html5_test/tests15.ton +0 -290
- data/vendor/lexbor/test/files/lexbor/html/html5_test/tests16.ton +0 -3471
- data/vendor/lexbor/test/files/lexbor/html/html5_test/tests17.ton +0 -244
- data/vendor/lexbor/test/files/lexbor/html/html5_test/tests18.ton +0 -752
- data/vendor/lexbor/test/files/lexbor/html/html5_test/tests19.ton +0 -1889
- data/vendor/lexbor/test/files/lexbor/html/html5_test/tests2.ton +0 -1093
- data/vendor/lexbor/test/files/lexbor/html/html5_test/tests20.ton +0 -1158
- data/vendor/lexbor/test/files/lexbor/html/html5_test/tests21.ton +0 -416
- data/vendor/lexbor/test/files/lexbor/html/html5_test/tests22.ton +0 -192
- data/vendor/lexbor/test/files/lexbor/html/html5_test/tests23.ton +0 -148
- data/vendor/lexbor/test/files/lexbor/html/html5_test/tests24.ton +0 -107
- data/vendor/lexbor/test/files/lexbor/html/html5_test/tests25.ton +0 -390
- data/vendor/lexbor/test/files/lexbor/html/html5_test/tests26.ton +0 -546
- data/vendor/lexbor/test/files/lexbor/html/html5_test/tests3.ton +0 -407
- data/vendor/lexbor/test/files/lexbor/html/html5_test/tests4.ton +0 -96
- data/vendor/lexbor/test/files/lexbor/html/html5_test/tests5.ton +0 -299
- data/vendor/lexbor/test/files/lexbor/html/html5_test/tests6.ton +0 -908
- data/vendor/lexbor/test/files/lexbor/html/html5_test/tests7.ton +0 -597
- data/vendor/lexbor/test/files/lexbor/html/html5_test/tests8.ton +0 -219
- data/vendor/lexbor/test/files/lexbor/html/html5_test/tests9.ton +0 -585
- data/vendor/lexbor/test/files/lexbor/html/html5_test/tests_innerHTML_1.ton +0 -1164
- data/vendor/lexbor/test/files/lexbor/html/html5_test/tricky01.ton +0 -378
- data/vendor/lexbor/test/files/lexbor/html/html5_test/webkit01.ton +0 -1022
- data/vendor/lexbor/test/files/lexbor/html/html5_test/webkit02.ton +0 -996
- data/vendor/lexbor/test/files/lexbor/html/html5lib_encoding/README.md +0 -12
- data/vendor/lexbor/test/files/lexbor/html/html5lib_encoding/test-yahoo-jp.dat +0 -10
- data/vendor/lexbor/test/files/lexbor/html/html5lib_encoding/tests1.dat +0 -388
- data/vendor/lexbor/test/files/lexbor/html/html5lib_encoding/tests2.dat +0 -115
- data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/README.md +0 -12
- data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/contentModelFlags.test +0 -93
- data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/domjs.test +0 -335
- data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/entities.test +0 -542
- data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/escapeFlag.test +0 -36
- data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/namedEntities.test +0 -42422
- data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/numericEntities.test +0 -1677
- data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/pendingSpecChanges.test +0 -9
- data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/test1.test +0 -353
- data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/test2.test +0 -275
- data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/test3.test +0 -11233
- data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/test4.test +0 -532
- data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/unicodeChars.test +0 -1577
- data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/unicodeCharsProblematic.test +0 -41
- data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/xmlViolation.test +0 -20
- data/vendor/lexbor/test/files/lexbor/html/lexbor.html +0 -150
- data/vendor/lexbor/test/files/lexbor/html/serialize_ext/attributes.ton +0 -167
- data/vendor/lexbor/test/files/lexbor/html/serialize_ext/comment.ton +0 -218
- data/vendor/lexbor/test/files/lexbor/html/serialize_ext/document_type.ton +0 -180
- data/vendor/lexbor/test/files/lexbor/html/serialize_ext/element.ton +0 -392
- data/vendor/lexbor/test/files/lexbor/html/serialize_ext/processing_instruction.ton +0 -45
- data/vendor/lexbor/test/files/lexbor/html/serialize_ext/serialize_ext.ton +0 -277
- data/vendor/lexbor/test/files/lexbor/html/serialize_ext/text.ton +0 -308
- data/vendor/lexbor/test/files/lexbor/html/tokenizer/char_ref.ton +0 -563
- data/vendor/lexbor/test/files/lexbor/html/tokenizer/comment.ton +0 -28
- data/vendor/lexbor/test/files/lexbor/html/tokenizer/doctype.ton +0 -257
- data/vendor/lexbor/test/files/lexbor/html/tokenizer/tag_attr.ton +0 -107
- data/vendor/lexbor/test/files/lexbor/html/tokenizer/tag_name.ton +0 -51
- data/vendor/lexbor/test/files/lexbor/url/changes.ton +0 -1005
- data/vendor/lexbor/test/files/lexbor/url/domain.ton +0 -93
- data/vendor/lexbor/test/files/lexbor/url/file.ton +0 -29
- data/vendor/lexbor/test/files/lexbor/url/fragment.ton +0 -47
- data/vendor/lexbor/test/files/lexbor/url/ipv4.ton +0 -221
- data/vendor/lexbor/test/files/lexbor/url/ipv6.ton +0 -197
- data/vendor/lexbor/test/files/lexbor/url/path.ton +0 -510
- data/vendor/lexbor/test/files/lexbor/url/query.ton +0 -135
- data/vendor/lexbor/test/files/lexbor/url/scheme.ton +0 -139
- data/vendor/lexbor/test/files/lexbor/url/slow_path.ton +0 -460
- data/vendor/lexbor/test/files/lexbor/url/url.ton +0 -78
- data/vendor/lexbor/test/files/lexbor/url/username_password.ton +0 -127
- data/vendor/lexbor/test/fuzzers/lexbor/css/CMakeLists.txt +0 -16
- data/vendor/lexbor/test/fuzzers/lexbor/css/css.dict +0 -307
- data/vendor/lexbor/test/fuzzers/lexbor/css/stylesheet.c +0 -55
- data/vendor/lexbor/test/fuzzers/lexbor/css/syntax/syntax.dict +0 -41
- data/vendor/lexbor/test/fuzzers/lexbor/css/syntax/tokenizer.c +0 -99
- data/vendor/lexbor/test/fuzzers/lexbor/encoding/CMakeLists.txt +0 -16
- data/vendor/lexbor/test/fuzzers/lexbor/encoding/decode.c +0 -29
- data/vendor/lexbor/test/fuzzers/lexbor/html/CMakeLists.txt +0 -16
- data/vendor/lexbor/test/fuzzers/lexbor/html/document_parse.c +0 -23
- data/vendor/lexbor/test/fuzzers/lexbor/punycode/CMakeLists.txt +0 -16
- data/vendor/lexbor/test/fuzzers/lexbor/punycode/base.c +0 -89
- data/vendor/lexbor/test/fuzzers/lexbor/selectors/CMakeLists.txt +0 -16
- data/vendor/lexbor/test/fuzzers/lexbor/selectors/find.c +0 -146
- data/vendor/lexbor/test/fuzzers/lexbor/selectors/selectors.dict +0 -71
- data/vendor/lexbor/test/fuzzers/lexbor/unicode/CMakeLists.txt +0 -16
- data/vendor/lexbor/test/fuzzers/lexbor/unicode/idna_to_ascii.c +0 -40
- data/vendor/lexbor/test/fuzzers/lexbor/unicode/normalization_forms.c +0 -41
- data/vendor/lexbor/test/fuzzers/lexbor/url/CMakeLists.txt +0 -16
- data/vendor/lexbor/test/fuzzers/lexbor/url/modify.c +0 -117
- data/vendor/lexbor/test/fuzzers/lexbor/url/parser.c +0 -132
- data/vendor/lexbor/test/fuzzers/lexbor/url/url.dict +0 -13
- data/vendor/lexbor/test/lexbor/core/CMakeLists.txt +0 -17
- data/vendor/lexbor/test/lexbor/core/array.c +0 -443
- data/vendor/lexbor/test/lexbor/core/array_obj.c +0 -306
- data/vendor/lexbor/test/lexbor/core/avl.c +0 -1588
- data/vendor/lexbor/test/lexbor/core/bst.c +0 -388
- data/vendor/lexbor/test/lexbor/core/bst_map.c +0 -209
- data/vendor/lexbor/test/lexbor/core/dobject.c +0 -322
- data/vendor/lexbor/test/lexbor/core/hash.c +0 -171
- data/vendor/lexbor/test/lexbor/core/in.c +0 -356
- data/vendor/lexbor/test/lexbor/core/mem.c +0 -332
- data/vendor/lexbor/test/lexbor/core/mraw.c +0 -612
- data/vendor/lexbor/test/lexbor/core/str.c +0 -433
- data/vendor/lexbor/test/lexbor/css/CMakeLists.txt +0 -25
- data/vendor/lexbor/test/lexbor/css/declarations.c +0 -571
- data/vendor/lexbor/test/lexbor/css/selectors/selectors.c +0 -894
- data/vendor/lexbor/test/lexbor/css/selectors/specificity.c +0 -177
- data/vendor/lexbor/test/lexbor/css/stylesheet.c +0 -196
- data/vendor/lexbor/test/lexbor/css/syntax/an_plus_b.c +0 -233
- data/vendor/lexbor/test/lexbor/css/syntax/parser.c +0 -1134
- data/vendor/lexbor/test/lexbor/css/syntax/style.c +0 -67
- data/vendor/lexbor/test/lexbor/css/syntax/tokenizer.c +0 -485
- data/vendor/lexbor/test/lexbor/css/syntax/tokenizer_queue.c +0 -92
- data/vendor/lexbor/test/lexbor/dom/CMakeLists.txt +0 -17
- data/vendor/lexbor/test/lexbor/dom/exception.c +0 -210
- data/vendor/lexbor/test/lexbor/dom/node.c +0 -441
- data/vendor/lexbor/test/lexbor/encoding/CMakeLists.txt +0 -42
- data/vendor/lexbor/test/lexbor/encoding/buffer/big5.c +0 -210
- data/vendor/lexbor/test/lexbor/encoding/buffer/encoding.h +0 -243
- data/vendor/lexbor/test/lexbor/encoding/buffer/euc_jp.c +0 -228
- data/vendor/lexbor/test/lexbor/encoding/buffer/euc_kr.c +0 -172
- data/vendor/lexbor/test/lexbor/encoding/buffer/gb18030.c +0 -297
- data/vendor/lexbor/test/lexbor/encoding/buffer/ibm866.c +0 -123
- data/vendor/lexbor/test/lexbor/encoding/buffer/iso_2022_jp.c +0 -403
- data/vendor/lexbor/test/lexbor/encoding/buffer/iso_8859_10.c +0 -123
- data/vendor/lexbor/test/lexbor/encoding/buffer/iso_8859_13.c +0 -123
- data/vendor/lexbor/test/lexbor/encoding/buffer/iso_8859_14.c +0 -123
- data/vendor/lexbor/test/lexbor/encoding/buffer/iso_8859_15.c +0 -123
- data/vendor/lexbor/test/lexbor/encoding/buffer/iso_8859_16.c +0 -123
- data/vendor/lexbor/test/lexbor/encoding/buffer/iso_8859_2.c +0 -123
- data/vendor/lexbor/test/lexbor/encoding/buffer/iso_8859_3.c +0 -123
- data/vendor/lexbor/test/lexbor/encoding/buffer/iso_8859_4.c +0 -123
- data/vendor/lexbor/test/lexbor/encoding/buffer/iso_8859_5.c +0 -123
- data/vendor/lexbor/test/lexbor/encoding/buffer/iso_8859_6.c +0 -123
- data/vendor/lexbor/test/lexbor/encoding/buffer/iso_8859_7.c +0 -123
- data/vendor/lexbor/test/lexbor/encoding/buffer/iso_8859_8.c +0 -123
- data/vendor/lexbor/test/lexbor/encoding/buffer/koi8_r.c +0 -123
- data/vendor/lexbor/test/lexbor/encoding/buffer/koi8_u.c +0 -123
- data/vendor/lexbor/test/lexbor/encoding/buffer/macintosh.c +0 -123
- data/vendor/lexbor/test/lexbor/encoding/buffer/shift_jis.c +0 -230
- data/vendor/lexbor/test/lexbor/encoding/buffer/utf-16.c +0 -230
- data/vendor/lexbor/test/lexbor/encoding/buffer/utf-8.c +0 -282
- data/vendor/lexbor/test/lexbor/encoding/buffer/windows_1250.c +0 -123
- data/vendor/lexbor/test/lexbor/encoding/buffer/windows_1251.c +0 -123
- data/vendor/lexbor/test/lexbor/encoding/buffer/windows_1252.c +0 -123
- data/vendor/lexbor/test/lexbor/encoding/buffer/windows_1253.c +0 -123
- data/vendor/lexbor/test/lexbor/encoding/buffer/windows_1254.c +0 -123
- data/vendor/lexbor/test/lexbor/encoding/buffer/windows_1255.c +0 -123
- data/vendor/lexbor/test/lexbor/encoding/buffer/windows_1256.c +0 -123
- data/vendor/lexbor/test/lexbor/encoding/buffer/windows_1257.c +0 -123
- data/vendor/lexbor/test/lexbor/encoding/buffer/windows_1258.c +0 -123
- data/vendor/lexbor/test/lexbor/encoding/buffer/windows_874.c +0 -123
- data/vendor/lexbor/test/lexbor/encoding/buffer/x_mac_cyrillic.c +0 -123
- data/vendor/lexbor/test/lexbor/encoding/encoding.c +0 -97
- data/vendor/lexbor/test/lexbor/encoding/parser.h +0 -225
- data/vendor/lexbor/test/lexbor/encoding/single/big5.c +0 -203
- data/vendor/lexbor/test/lexbor/encoding/single/encoding.h +0 -227
- data/vendor/lexbor/test/lexbor/encoding/single/euc_jp.c +0 -220
- data/vendor/lexbor/test/lexbor/encoding/single/euc_kr.c +0 -162
- data/vendor/lexbor/test/lexbor/encoding/single/gb18030.c +0 -277
- data/vendor/lexbor/test/lexbor/encoding/single/ibm866.c +0 -114
- data/vendor/lexbor/test/lexbor/encoding/single/iso_2022_jp.c +0 -342
- data/vendor/lexbor/test/lexbor/encoding/single/iso_8859_10.c +0 -114
- data/vendor/lexbor/test/lexbor/encoding/single/iso_8859_13.c +0 -114
- data/vendor/lexbor/test/lexbor/encoding/single/iso_8859_14.c +0 -114
- data/vendor/lexbor/test/lexbor/encoding/single/iso_8859_15.c +0 -114
- data/vendor/lexbor/test/lexbor/encoding/single/iso_8859_16.c +0 -114
- data/vendor/lexbor/test/lexbor/encoding/single/iso_8859_2.c +0 -114
- data/vendor/lexbor/test/lexbor/encoding/single/iso_8859_3.c +0 -114
- data/vendor/lexbor/test/lexbor/encoding/single/iso_8859_4.c +0 -114
- data/vendor/lexbor/test/lexbor/encoding/single/iso_8859_5.c +0 -114
- data/vendor/lexbor/test/lexbor/encoding/single/iso_8859_6.c +0 -114
- data/vendor/lexbor/test/lexbor/encoding/single/iso_8859_7.c +0 -114
- data/vendor/lexbor/test/lexbor/encoding/single/iso_8859_8.c +0 -114
- data/vendor/lexbor/test/lexbor/encoding/single/koi8_r.c +0 -114
- data/vendor/lexbor/test/lexbor/encoding/single/koi8_u.c +0 -114
- data/vendor/lexbor/test/lexbor/encoding/single/macintosh.c +0 -114
- data/vendor/lexbor/test/lexbor/encoding/single/shift_jis.c +0 -203
- data/vendor/lexbor/test/lexbor/encoding/single/utf-16.c +0 -216
- data/vendor/lexbor/test/lexbor/encoding/single/utf-8.c +0 -227
- data/vendor/lexbor/test/lexbor/encoding/single/windows_1250.c +0 -114
- data/vendor/lexbor/test/lexbor/encoding/single/windows_1251.c +0 -114
- data/vendor/lexbor/test/lexbor/encoding/single/windows_1252.c +0 -114
- data/vendor/lexbor/test/lexbor/encoding/single/windows_1253.c +0 -114
- data/vendor/lexbor/test/lexbor/encoding/single/windows_1254.c +0 -114
- data/vendor/lexbor/test/lexbor/encoding/single/windows_1255.c +0 -114
- data/vendor/lexbor/test/lexbor/encoding/single/windows_1256.c +0 -114
- data/vendor/lexbor/test/lexbor/encoding/single/windows_1257.c +0 -114
- data/vendor/lexbor/test/lexbor/encoding/single/windows_1258.c +0 -114
- data/vendor/lexbor/test/lexbor/encoding/single/windows_874.c +0 -114
- data/vendor/lexbor/test/lexbor/encoding/single/x_mac_cyrillic.c +0 -114
- data/vendor/lexbor/test/lexbor/html/CMakeLists.txt +0 -35
- data/vendor/lexbor/test/lexbor/html/attributes.c +0 -105
- data/vendor/lexbor/test/lexbor/html/build-cpp.cpp +0 -68
- data/vendor/lexbor/test/lexbor/html/clone.c +0 -356
- data/vendor/lexbor/test/lexbor/html/dom/document_type.c +0 -125
- data/vendor/lexbor/test/lexbor/html/element_by.c +0 -147
- data/vendor/lexbor/test/lexbor/html/encoding.c +0 -228
- data/vendor/lexbor/test/lexbor/html/encoding_html5lib_tests.c +0 -308
- data/vendor/lexbor/test/lexbor/html/encoding_prescan.c +0 -1686
- data/vendor/lexbor/test/lexbor/html/inner.c +0 -103
- data/vendor/lexbor/test/lexbor/html/other.c +0 -139
- data/vendor/lexbor/test/lexbor/html/parse.c +0 -380
- data/vendor/lexbor/test/lexbor/html/perf.c +0 -161
- data/vendor/lexbor/test/lexbor/html/serialize.c +0 -56
- data/vendor/lexbor/test/lexbor/html/serialize_ext.c +0 -461
- data/vendor/lexbor/test/lexbor/html/tags.c +0 -140
- data/vendor/lexbor/test/lexbor/html/tokenizer/errors.c +0 -34
- data/vendor/lexbor/test/lexbor/html/tokenizer/html5lib_tests.c +0 -1168
- data/vendor/lexbor/test/lexbor/html/tokenizer_helper.h +0 -403
- data/vendor/lexbor/test/lexbor/html/tokenizer_tokens.c +0 -754
- data/vendor/lexbor/test/lexbor/html/tree/errors.c +0 -34
- data/vendor/lexbor/test/lexbor/html/tree/open_elements.c +0 -99
- data/vendor/lexbor/test/lexbor/html/tree_builder.c +0 -536
- data/vendor/lexbor/test/lexbor/ns/CMakeLists.txt +0 -17
- data/vendor/lexbor/test/lexbor/ns/res.c +0 -55
- data/vendor/lexbor/test/lexbor/punycode/CMakeLists.txt +0 -17
- data/vendor/lexbor/test/lexbor/punycode/base.c +0 -240
- data/vendor/lexbor/test/lexbor/selectors/CMakeLists.txt +0 -17
- data/vendor/lexbor/test/lexbor/selectors/selectors.c +0 -911
- data/vendor/lexbor/test/lexbor/style/CMakeLists.txt +0 -17
- data/vendor/lexbor/test/lexbor/style/element_events.c +0 -291
- data/vendor/lexbor/test/lexbor/style/element_style_steps.c +0 -5035
- data/vendor/lexbor/test/lexbor/style/not_html_namespace.c +0 -87
- data/vendor/lexbor/test/lexbor/style/style_tag.c +0 -184
- data/vendor/lexbor/test/lexbor/style/stylesheet.c +0 -51
- data/vendor/lexbor/test/lexbor/style/wo_events.c +0 -351
- data/vendor/lexbor/test/lexbor/tag/CMakeLists.txt +0 -17
- data/vendor/lexbor/test/lexbor/tag/res.c +0 -440
- data/vendor/lexbor/test/lexbor/unicode/CMakeLists.txt +0 -17
- data/vendor/lexbor/test/lexbor/unicode/composition_test.c +0 -1095
- data/vendor/lexbor/test/lexbor/unicode/edges_normalization_forms.c +0 -220
- data/vendor/lexbor/test/lexbor/unicode/idna.c +0 -98
- data/vendor/lexbor/test/lexbor/unicode/idna_codepoints.c +0 -110
- data/vendor/lexbor/test/lexbor/unicode/idna_type.c +0 -31
- data/vendor/lexbor/test/lexbor/unicode/normalization_forms.c +0 -205
- data/vendor/lexbor/test/lexbor/unicode/normalization_forms_code_points.c +0 -214
- data/vendor/lexbor/test/lexbor/unicode/unicode_idna_test_res.h +0 -6423
- data/vendor/lexbor/test/lexbor/unicode/unicode_normalization_test_res.h +0 -120229
- data/vendor/lexbor/test/lexbor/url/CMakeLists.txt +0 -22
- data/vendor/lexbor/test/lexbor/url/errors.c +0 -41
- data/vendor/lexbor/test/lexbor/url/other.c +0 -134
- data/vendor/lexbor/test/lexbor/url/parser.c +0 -872
- data/vendor/lexbor/test/lexbor/url/search_params.c +0 -616
- data/vendor/lexbor/test/lexbor/url/validation.c +0 -185
- data/vendor/lexbor/test/unit/CMakeLists.txt +0 -49
- data/vendor/lexbor/test/unit/kv.c +0 -538
- data/vendor/lexbor/test/unit/kv.h +0 -301
- data/vendor/lexbor/test/unit/kv_rules.c +0 -609
- data/vendor/lexbor/test/unit/kv_state.c +0 -1470
- data/vendor/lexbor/test/unit/test.c +0 -131
- data/vendor/lexbor/test/unit/test.h +0 -410
- data/vendor/lexbor/utils/CMakeLists.txt +0 -11
- data/vendor/lexbor/utils/lexbor/css/grammar.txt +0 -263
- data/vendor/lexbor/utils/lexbor/css/names.py +0 -768
- data/vendor/lexbor/utils/lexbor/css/selectors/pseudo.py +0 -234
- data/vendor/lexbor/utils/lexbor/css/selectors/tmp/const.h +0 -21
- data/vendor/lexbor/utils/lexbor/css/selectors/tmp/res.h +0 -26
- data/vendor/lexbor/utils/lexbor/css/syntax/definitions.py +0 -62
- data/vendor/lexbor/utils/lexbor/css/syntax/non_ascii.pl +0 -77
- data/vendor/lexbor/utils/lexbor/css/syntax/token_res.py +0 -55
- data/vendor/lexbor/utils/lexbor/css/syntax/tokenizer_code_map.py +0 -36
- data/vendor/lexbor/utils/lexbor/css/tmp/const.h +0 -24
- data/vendor/lexbor/utils/lexbor/css/tmp/res.h +0 -26
- data/vendor/lexbor/utils/lexbor/css/tmp/types.h +0 -21
- data/vendor/lexbor/utils/lexbor/css/tmp/value_const.h +0 -21
- data/vendor/lexbor/utils/lexbor/css/tmp/value_res.h +0 -25
- data/vendor/lexbor/utils/lexbor/dom/attr.py +0 -129
- data/vendor/lexbor/utils/lexbor/dom/tmp/const.h +0 -23
- data/vendor/lexbor/utils/lexbor/dom/tmp/res.h +0 -27
- data/vendor/lexbor/utils/lexbor/encoding/CMakeLists.txt +0 -32
- data/vendor/lexbor/utils/lexbor/encoding/big5_map_decode.c +0 -93
- data/vendor/lexbor/utils/lexbor/encoding/buffer-single-byte.py +0 -95
- data/vendor/lexbor/utils/lexbor/encoding/encodings.json +0 -456
- data/vendor/lexbor/utils/lexbor/encoding/euc_jp_map_decode.c +0 -83
- data/vendor/lexbor/utils/lexbor/encoding/euc_kr_map_decode.c +0 -89
- data/vendor/lexbor/utils/lexbor/encoding/gb18030_map_decode.c +0 -170
- data/vendor/lexbor/utils/lexbor/encoding/iso_2022_jp_map_decode.c +0 -120
- data/vendor/lexbor/utils/lexbor/encoding/multi-byte/index-big5.txt +0 -18596
- data/vendor/lexbor/utils/lexbor/encoding/multi-byte/index-euc-kr.txt +0 -17054
- data/vendor/lexbor/utils/lexbor/encoding/multi-byte/index-gb18030.txt +0 -23946
- data/vendor/lexbor/utils/lexbor/encoding/multi-byte/index-iso-2022-jp-katakana.txt +0 -69
- data/vendor/lexbor/utils/lexbor/encoding/multi-byte/index-jis0208.txt +0 -7730
- data/vendor/lexbor/utils/lexbor/encoding/multi-byte/index-jis0212.txt +0 -6073
- data/vendor/lexbor/utils/lexbor/encoding/multi-byte.pl +0 -424
- data/vendor/lexbor/utils/lexbor/encoding/range-byte.py +0 -118
- data/vendor/lexbor/utils/lexbor/encoding/ranges/index-gb18030-ranges.txt +0 -213
- data/vendor/lexbor/utils/lexbor/encoding/res.py +0 -231
- data/vendor/lexbor/utils/lexbor/encoding/shift_jis_map_decode.c +0 -102
- data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-ibm866.txt +0 -134
- data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-iso-8859-10.txt +0 -134
- data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-iso-8859-13.txt +0 -134
- data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-iso-8859-14.txt +0 -134
- data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-iso-8859-15.txt +0 -134
- data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-iso-8859-16.txt +0 -134
- data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-iso-8859-2.txt +0 -134
- data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-iso-8859-3.txt +0 -127
- data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-iso-8859-4.txt +0 -134
- data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-iso-8859-5.txt +0 -134
- data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-iso-8859-6.txt +0 -89
- data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-iso-8859-7.txt +0 -131
- data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-iso-8859-8.txt +0 -98
- data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-koi8-r.txt +0 -134
- data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-koi8-u.txt +0 -134
- data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-macintosh.txt +0 -134
- data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-windows-1250.txt +0 -134
- data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-windows-1251.txt +0 -134
- data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-windows-1252.txt +0 -134
- data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-windows-1253.txt +0 -131
- data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-windows-1254.txt +0 -134
- data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-windows-1255.txt +0 -124
- data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-windows-1256.txt +0 -134
- data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-windows-1257.txt +0 -132
- data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-windows-1258.txt +0 -134
- data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-windows-874.txt +0 -126
- data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-x-mac-cyrillic.txt +0 -134
- data/vendor/lexbor/utils/lexbor/encoding/single-byte.py +0 -179
- data/vendor/lexbor/utils/lexbor/encoding/tmp/buffer_single_byte_test.c +0 -123
- data/vendor/lexbor/utils/lexbor/encoding/tmp/const.h +0 -19
- data/vendor/lexbor/utils/lexbor/encoding/tmp/multi.c +0 -20
- data/vendor/lexbor/utils/lexbor/encoding/tmp/multi.h +0 -37
- data/vendor/lexbor/utils/lexbor/encoding/tmp/range.c +0 -17
- data/vendor/lexbor/utils/lexbor/encoding/tmp/range.h +0 -35
- data/vendor/lexbor/utils/lexbor/encoding/tmp/res.c +0 -22
- data/vendor/lexbor/utils/lexbor/encoding/tmp/res.h +0 -34
- data/vendor/lexbor/utils/lexbor/encoding/tmp/single.c +0 -20
- data/vendor/lexbor/utils/lexbor/encoding/tmp/single.h +0 -37
- data/vendor/lexbor/utils/lexbor/encoding/tmp/single_byte_test.c +0 -114
- data/vendor/lexbor/utils/lexbor/grammar/CMakeLists.txt +0 -63
- data/vendor/lexbor/utils/lexbor/grammar/base.h +0 -89
- data/vendor/lexbor/utils/lexbor/grammar/document.h +0 -34
- data/vendor/lexbor/utils/lexbor/grammar/grammar.c +0 -243
- data/vendor/lexbor/utils/lexbor/grammar/json.c +0 -368
- data/vendor/lexbor/utils/lexbor/grammar/json.h +0 -48
- data/vendor/lexbor/utils/lexbor/grammar/node.c +0 -653
- data/vendor/lexbor/utils/lexbor/grammar/node.h +0 -120
- data/vendor/lexbor/utils/lexbor/grammar/parser.c +0 -724
- data/vendor/lexbor/utils/lexbor/grammar/parser.h +0 -75
- data/vendor/lexbor/utils/lexbor/grammar/test.c +0 -1762
- data/vendor/lexbor/utils/lexbor/grammar/test.h +0 -35
- data/vendor/lexbor/utils/lexbor/grammar/token.c +0 -258
- data/vendor/lexbor/utils/lexbor/grammar/token.h +0 -91
- data/vendor/lexbor/utils/lexbor/grammar/tokenizer.c +0 -706
- data/vendor/lexbor/utils/lexbor/grammar/tokenizer.h +0 -73
- data/vendor/lexbor/utils/lexbor/html/convert_html5_tests.py +0 -162
- data/vendor/lexbor/utils/lexbor/html/data/entities.json +0 -2233
- data/vendor/lexbor/utils/lexbor/html/insertion_mode.py +0 -61
- data/vendor/lexbor/utils/lexbor/html/reorder_html5_tests_tokenizer_errors.py +0 -137
- data/vendor/lexbor/utils/lexbor/html/tmp/insertion_mode.c +0 -53
- data/vendor/lexbor/utils/lexbor/html/tmp/insertion_mode.h +0 -18
- data/vendor/lexbor/utils/lexbor/html/tmp/tokenizer_res.h +0 -20
- data/vendor/lexbor/utils/lexbor/html/tokenizer_entities_bst.py +0 -209
- data/vendor/lexbor/utils/lexbor/html/tokenizer_entities_switch.py +0 -162
- data/vendor/lexbor/utils/lexbor/html/tokenizer_parse_error.pl +0 -97
- data/vendor/lexbor/utils/lexbor/lexbor/LXB.py +0 -498
- data/vendor/lexbor/utils/lexbor/lexbor/res.py +0 -130
- data/vendor/lexbor/utils/lexbor/tag_ns/data/interfaces.json +0 -98
- data/vendor/lexbor/utils/lexbor/tag_ns/data/tags.json +0 -371
- data/vendor/lexbor/utils/lexbor/tag_ns/interfaces.py +0 -175
- data/vendor/lexbor/utils/lexbor/tag_ns/tags.py +0 -808
- data/vendor/lexbor/utils/lexbor/tag_ns/tmp/html_attribute_steps_res.h +0 -21
- data/vendor/lexbor/utils/lexbor/tag_ns/tmp/html_element_steps_res.h +0 -21
- data/vendor/lexbor/utils/lexbor/tag_ns/tmp/html_interface_res.h +0 -29
- data/vendor/lexbor/utils/lexbor/tag_ns/tmp/html_open_elements_res.h +0 -21
- data/vendor/lexbor/utils/lexbor/tag_ns/tmp/html_tag_res.h +0 -25
- data/vendor/lexbor/utils/lexbor/tag_ns/tmp/interface.c +0 -36
- data/vendor/lexbor/utils/lexbor/tag_ns/tmp/interface.h +0 -33
- data/vendor/lexbor/utils/lexbor/tag_ns/tmp/ns_const.h +0 -26
- data/vendor/lexbor/utils/lexbor/tag_ns/tmp/ns_res.h +0 -29
- data/vendor/lexbor/utils/lexbor/tag_ns/tmp/open_elements_res.h +0 -21
- data/vendor/lexbor/utils/lexbor/tag_ns/tmp/steps_res.h +0 -23
- data/vendor/lexbor/utils/lexbor/tag_ns/tmp/tag_const.h +0 -26
- data/vendor/lexbor/utils/lexbor/tag_ns/tmp/tag_res.h +0 -26
- data/vendor/lexbor/utils/lexbor/tag_ns/tmp/test/ns_res.c +0 -44
- data/vendor/lexbor/utils/lexbor/tag_ns/tmp/test/tag_res.c +0 -45
- data/vendor/lexbor/utils/lexbor/unicode/build.pl +0 -1323
- data/vendor/lexbor/utils/lexbor/unicode/idna_test.pl +0 -398
- data/vendor/lexbor/utils/lexbor/unicode/normalization_test.pl +0 -157
- data/vendor/lexbor/utils/wasm/gen_constants.py +0 -186
- data/vendor/lexbor/wasm/CMakeLists.txt +0 -18
- data/vendor/lexbor/wasm/lexbor/engine/CMakeLists.txt +0 -21
- data/vendor/lexbor/wasm/lexbor/engine/index.html +0 -406
- data/vendor/lexbor/wasm/lexbor/engine/lexbor.c +0 -1340
- data/vendor/lexbor/wasm/lexbor/html/CMakeLists.txt +0 -11
- data/vendor/lexbor/wasm/lexbor/html/parse.c +0 -58
|
@@ -1,220 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Copyright (C) 2019 Alexander Borisov
|
|
3
|
-
*
|
|
4
|
-
* Author: Alexander Borisov <borisov@lexbor.com>
|
|
5
|
-
*/
|
|
6
|
-
|
|
7
|
-
#include <unit/test.h>
|
|
8
|
-
|
|
9
|
-
#include "encoding.h"
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
static const char *lxb_filepath_test;
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
TEST_BEGIN(decode)
|
|
16
|
-
{
|
|
17
|
-
lxb_char_t *buf, *end;
|
|
18
|
-
const lxb_encoding_data_t *enc_data;
|
|
19
|
-
|
|
20
|
-
size_t size;
|
|
21
|
-
lxb_codepoint_t cps_buffer[1024];
|
|
22
|
-
|
|
23
|
-
enc_data = lxb_encoding_data(LXB_ENCODING_EUC_JP);
|
|
24
|
-
test_ne(enc_data, NULL);
|
|
25
|
-
|
|
26
|
-
/* UTF-8: \x58; Unicode: \x00\x58; Code point: 88 */
|
|
27
|
-
to_update_buffer("\x58");
|
|
28
|
-
test_buffer(test_decode_chunks, 1, 88);
|
|
29
|
-
test_buffer(test_decode_full, 1, 88);
|
|
30
|
-
|
|
31
|
-
/* LXB_ENCODING_DECODE_ERROR */
|
|
32
|
-
to_update_buffer("\x8D");
|
|
33
|
-
test_buffer(test_decode_chunks, 1, LXB_ENCODING_REPLACEMENT_CODEPOINT);
|
|
34
|
-
test_buffer(test_decode_full, 1, LXB_ENCODING_REPLACEMENT_CODEPOINT);
|
|
35
|
-
|
|
36
|
-
/* LXB_ENCODING_DECODE_ERROR */
|
|
37
|
-
to_update_buffer("\x90");
|
|
38
|
-
test_buffer(test_decode_chunks, 1, LXB_ENCODING_REPLACEMENT_CODEPOINT);
|
|
39
|
-
test_buffer(test_decode_full, 1, LXB_ENCODING_REPLACEMENT_CODEPOINT);
|
|
40
|
-
|
|
41
|
-
/* LXB_ENCODING_DECODE_ERROR */
|
|
42
|
-
to_update_buffer("\xA0");
|
|
43
|
-
test_buffer(test_decode_chunks, 1, LXB_ENCODING_REPLACEMENT_CODEPOINT);
|
|
44
|
-
test_buffer(test_decode_full, 1, LXB_ENCODING_REPLACEMENT_CODEPOINT);
|
|
45
|
-
|
|
46
|
-
/* LXB_ENCODING_DECODE_ERROR */
|
|
47
|
-
to_update_buffer("\xFF");
|
|
48
|
-
test_buffer(test_decode_chunks, 1, LXB_ENCODING_REPLACEMENT_CODEPOINT);
|
|
49
|
-
test_buffer(test_decode_full, 1, LXB_ENCODING_REPLACEMENT_CODEPOINT);
|
|
50
|
-
|
|
51
|
-
/* UTF-8: \xEF\xBD\xA1; Unicode: \xFF\x61; Code point: 65377 */
|
|
52
|
-
to_update_buffer("\x8E\xA1");
|
|
53
|
-
test_buffer(test_decode_chunks, 1, 0xFF61);
|
|
54
|
-
test_buffer(test_decode_full, 1, 0xFF61);
|
|
55
|
-
|
|
56
|
-
/* UTF-8: \xEF\xBE\x9F; Unicode: \xFF\x9F; Code point: 65439 */
|
|
57
|
-
to_update_buffer("\x8E\xDF");
|
|
58
|
-
test_buffer(test_decode_chunks, 1, 0xFF9F);
|
|
59
|
-
test_buffer(test_decode_full, 1, 0xFF9F);
|
|
60
|
-
|
|
61
|
-
/* LXB_ENCODING_DECODE_ERROR */
|
|
62
|
-
to_update_buffer("\x8E\xA0");
|
|
63
|
-
test_buffer(test_decode_chunks, 1, LXB_ENCODING_REPLACEMENT_CODEPOINT);
|
|
64
|
-
test_buffer(test_decode_full, 1, LXB_ENCODING_REPLACEMENT_CODEPOINT);
|
|
65
|
-
|
|
66
|
-
/* LXB_ENCODING_DECODE_ERROR */
|
|
67
|
-
to_update_buffer("\x8E\xE0");
|
|
68
|
-
test_buffer(test_decode_chunks, 1, LXB_ENCODING_REPLACEMENT_CODEPOINT);
|
|
69
|
-
test_buffer(test_decode_full, 1, LXB_ENCODING_REPLACEMENT_CODEPOINT);
|
|
70
|
-
|
|
71
|
-
/* LXB_ENCODING_DECODE_ERROR */
|
|
72
|
-
to_update_buffer("\x8F\xA1\xA1");
|
|
73
|
-
test_buffer(test_decode_chunks, 1, LXB_ENCODING_REPLACEMENT_CODEPOINT);
|
|
74
|
-
test_buffer(test_decode_full, 1, LXB_ENCODING_REPLACEMENT_CODEPOINT);
|
|
75
|
-
|
|
76
|
-
/* UTF-8: \xCB\x98; Unicode: \x02\xD8; Code point: 728 */
|
|
77
|
-
to_update_buffer("\x8F\xA2\xAF");
|
|
78
|
-
test_buffer(test_decode_chunks, 1, 0x02D8);
|
|
79
|
-
test_buffer(test_decode_full, 1, 0x02D8);
|
|
80
|
-
|
|
81
|
-
/* UTF-8: \xE7\x92\xAF; Unicode: \x74\xAF; Code point: 29871 */
|
|
82
|
-
to_update_buffer("\x8F\xCC\xE3");
|
|
83
|
-
test_buffer(test_decode_chunks, 1, 0x74af);
|
|
84
|
-
test_buffer(test_decode_full, 1, 0x74af);
|
|
85
|
-
|
|
86
|
-
/* UTF-8: \xE7\x92\xAF; Unicode: \x9F\xA5; Code point: 40869 */
|
|
87
|
-
to_update_buffer("\x8F\xED\xE3");
|
|
88
|
-
test_buffer(test_decode_chunks, 1, 0x9FA5);
|
|
89
|
-
test_buffer(test_decode_full, 1, 0x9FA5);
|
|
90
|
-
|
|
91
|
-
/* LXB_ENCODING_DECODE_ERROR */
|
|
92
|
-
to_update_buffer("\x8F\xFE\xFE");
|
|
93
|
-
test_buffer(test_decode_chunks, 1, LXB_ENCODING_REPLACEMENT_CODEPOINT);
|
|
94
|
-
test_buffer(test_decode_full, 1, LXB_ENCODING_REPLACEMENT_CODEPOINT);
|
|
95
|
-
|
|
96
|
-
/* UTF-8: \xEF\xBC\x82; Unicode: \xFF\x02; Code point: 65282 */
|
|
97
|
-
to_update_buffer("\xFC\xFE");
|
|
98
|
-
test_buffer(test_decode_chunks, 1, 0xFF02);
|
|
99
|
-
test_buffer(test_decode_full, 1, 0xFF02);
|
|
100
|
-
|
|
101
|
-
/* LXB_ENCODING_DECODE_ERROR */
|
|
102
|
-
to_update_buffer("\xFE\xFE");
|
|
103
|
-
test_buffer(test_decode_chunks, 1, LXB_ENCODING_REPLACEMENT_CODEPOINT);
|
|
104
|
-
test_buffer(test_decode_full, 1, LXB_ENCODING_REPLACEMENT_CODEPOINT);
|
|
105
|
-
|
|
106
|
-
to_update_buffer("\xFC\xFE\xFC\xFE");
|
|
107
|
-
test_buffer(test_decode_chunks, 2, 0xFF02, 0xFF02);
|
|
108
|
-
test_buffer(test_decode_full, 2, 0xFF02, 0xFF02);
|
|
109
|
-
}
|
|
110
|
-
TEST_END
|
|
111
|
-
|
|
112
|
-
/* Broken encoding. Prepend to stream test. */
|
|
113
|
-
TEST_BEGIN(decode_prepend)
|
|
114
|
-
{
|
|
115
|
-
lxb_char_t *buf, *end;
|
|
116
|
-
const lxb_encoding_data_t *enc_data;
|
|
117
|
-
|
|
118
|
-
size_t size;
|
|
119
|
-
lxb_codepoint_t rp_cp = LXB_ENCODING_REPLACEMENT_CODEPOINT;
|
|
120
|
-
lxb_codepoint_t cps_buffer[1024];
|
|
121
|
-
|
|
122
|
-
enc_data = lxb_encoding_data(LXB_ENCODING_EUC_JP);
|
|
123
|
-
test_ne(enc_data, NULL);
|
|
124
|
-
|
|
125
|
-
to_update_buffer("\xFF\xFC\xFE");
|
|
126
|
-
test_buffer(test_decode_chunks, 2, rp_cp, 0xFF02);
|
|
127
|
-
test_buffer(test_decode_full, 2, rp_cp, 0xFF02);
|
|
128
|
-
|
|
129
|
-
to_update_buffer("\xFF\x8F\xA2\xAF");
|
|
130
|
-
test_buffer(test_decode_chunks, 2, rp_cp, 0x02D8);
|
|
131
|
-
test_buffer(test_decode_full, 2, rp_cp, 0x02D8);
|
|
132
|
-
|
|
133
|
-
to_update_buffer("\x8F\xA2\xFF\xAF");
|
|
134
|
-
test_buffer(test_decode_chunks, 2, rp_cp, LXB_ENCODING_DECODE_CONTINUE);
|
|
135
|
-
test_buffer(test_decode_full, 2, rp_cp, LXB_ENCODING_DECODE_CONTINUE);
|
|
136
|
-
|
|
137
|
-
to_update_buffer("\xA2\x32\xFC\xFE");
|
|
138
|
-
test_buffer(test_decode_chunks, 3, rp_cp, 0x32, 0xFF02);
|
|
139
|
-
test_buffer(test_decode_full, 3, rp_cp, 0x32, 0xFF02);
|
|
140
|
-
|
|
141
|
-
to_update_buffer("\x8F\xED\x32\xFC\xFE");
|
|
142
|
-
test_buffer(test_decode_chunks, 3, rp_cp, 0x32, 0xFF02);
|
|
143
|
-
test_buffer(test_decode_full, 3, rp_cp, 0x32, 0xFF02);
|
|
144
|
-
}
|
|
145
|
-
TEST_END
|
|
146
|
-
|
|
147
|
-
TEST_BEGIN(decode_map)
|
|
148
|
-
{
|
|
149
|
-
size_t line;
|
|
150
|
-
lxb_status_t status;
|
|
151
|
-
const lxb_encoding_data_t *enc_data;
|
|
152
|
-
|
|
153
|
-
enc_data = lxb_encoding_data(LXB_ENCODING_EUC_JP);
|
|
154
|
-
|
|
155
|
-
status = test_encoding_process_file(lxb_filepath_test, test_decode_process_file,
|
|
156
|
-
(void *) enc_data, &line);
|
|
157
|
-
if (status != LXB_STATUS_OK) {
|
|
158
|
-
failed_and_exit(line);
|
|
159
|
-
}
|
|
160
|
-
}
|
|
161
|
-
TEST_END
|
|
162
|
-
|
|
163
|
-
TEST_BEGIN(encode_map)
|
|
164
|
-
{
|
|
165
|
-
size_t line;
|
|
166
|
-
lxb_status_t status;
|
|
167
|
-
const lxb_encoding_data_t *enc_data;
|
|
168
|
-
|
|
169
|
-
enc_data = lxb_encoding_data(LXB_ENCODING_EUC_JP);
|
|
170
|
-
|
|
171
|
-
status = test_encoding_process_file(lxb_filepath_test, test_encode_process_file,
|
|
172
|
-
(void *) enc_data, &line);
|
|
173
|
-
if (status != LXB_STATUS_OK) {
|
|
174
|
-
failed_and_exit(line);
|
|
175
|
-
}
|
|
176
|
-
}
|
|
177
|
-
TEST_END
|
|
178
|
-
|
|
179
|
-
TEST_BEGIN(encode_buffer_check)
|
|
180
|
-
{
|
|
181
|
-
int8_t size;
|
|
182
|
-
lxb_char_t ch1, ch2[2];
|
|
183
|
-
lxb_char_t *ref;
|
|
184
|
-
lxb_encoding_encode_t ctx = {0};
|
|
185
|
-
const lxb_encoding_data_t *enc_data;
|
|
186
|
-
|
|
187
|
-
enc_data = lxb_encoding_data(LXB_ENCODING_EUC_JP);
|
|
188
|
-
|
|
189
|
-
/* 2 */
|
|
190
|
-
ref = &ch1;
|
|
191
|
-
size = enc_data->encode_single(&ctx, &ref, ref + 1, 0xFA1F);
|
|
192
|
-
test_eq(size, LXB_ENCODING_ENCODE_SMALL_BUFFER);
|
|
193
|
-
|
|
194
|
-
ref = ch2;
|
|
195
|
-
size = enc_data->encode_single(&ctx, &ref, ref + 2, 0xFA1F);
|
|
196
|
-
test_eq(size, 2);
|
|
197
|
-
}
|
|
198
|
-
TEST_END
|
|
199
|
-
|
|
200
|
-
int
|
|
201
|
-
main(int argc, const char * argv[])
|
|
202
|
-
{
|
|
203
|
-
if (argc != 2) {
|
|
204
|
-
printf("Usage:\n\teuc_jp <filepath>\n");
|
|
205
|
-
return EXIT_FAILURE;
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
lxb_filepath_test = argv[1];
|
|
209
|
-
|
|
210
|
-
TEST_INIT();
|
|
211
|
-
|
|
212
|
-
TEST_ADD(decode);
|
|
213
|
-
TEST_ADD(decode_prepend);
|
|
214
|
-
TEST_ADD(decode_map);
|
|
215
|
-
TEST_ADD(encode_map);
|
|
216
|
-
TEST_ADD(encode_buffer_check);
|
|
217
|
-
|
|
218
|
-
TEST_RUN("lexbor/encoding/euc_jp");
|
|
219
|
-
TEST_RELEASE();
|
|
220
|
-
}
|
|
@@ -1,162 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Copyright (C) 2019 Alexander Borisov
|
|
3
|
-
*
|
|
4
|
-
* Author: Alexander Borisov <borisov@lexbor.com>
|
|
5
|
-
*/
|
|
6
|
-
|
|
7
|
-
#include <unit/test.h>
|
|
8
|
-
|
|
9
|
-
#include "encoding.h"
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
static const char *lxb_filepath_test;
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
TEST_BEGIN(decode)
|
|
16
|
-
{
|
|
17
|
-
lxb_char_t *buf, *end;
|
|
18
|
-
const lxb_encoding_data_t *enc_data;
|
|
19
|
-
|
|
20
|
-
size_t size;
|
|
21
|
-
lxb_codepoint_t rp_cp = LXB_ENCODING_REPLACEMENT_CODEPOINT;
|
|
22
|
-
lxb_codepoint_t cps_buffer[1024];
|
|
23
|
-
|
|
24
|
-
enc_data = lxb_encoding_data(LXB_ENCODING_EUC_KR);
|
|
25
|
-
test_ne(enc_data, NULL);
|
|
26
|
-
|
|
27
|
-
/* UTF-8: \x58; Unicode: \x00\x58; Code point: 88 */
|
|
28
|
-
to_update_buffer("\x58");
|
|
29
|
-
test_buffer(test_decode_chunks, 1, 88);
|
|
30
|
-
test_buffer(test_decode_full, 1, 88);
|
|
31
|
-
|
|
32
|
-
to_update_buffer("\x80");
|
|
33
|
-
test_buffer(test_decode_chunks, 1, rp_cp);
|
|
34
|
-
test_buffer(test_decode_full, 1, rp_cp);
|
|
35
|
-
|
|
36
|
-
to_update_buffer("\xFF");
|
|
37
|
-
test_buffer(test_decode_chunks, 1, rp_cp);
|
|
38
|
-
test_buffer(test_decode_full, 1, rp_cp);
|
|
39
|
-
|
|
40
|
-
to_update_buffer("\x81\x40");
|
|
41
|
-
test_buffer(test_decode_chunks, 2, rp_cp, 0x40);
|
|
42
|
-
test_buffer(test_decode_full, 2, rp_cp, 0x40);
|
|
43
|
-
|
|
44
|
-
to_update_buffer("\x81\xFF");
|
|
45
|
-
test_buffer(test_decode_chunks, 1, rp_cp);
|
|
46
|
-
test_buffer(test_decode_full, 1, rp_cp);
|
|
47
|
-
|
|
48
|
-
to_update_buffer("\x81\x41");
|
|
49
|
-
test_buffer(test_decode_chunks, 1, 0xAC02);
|
|
50
|
-
test_buffer(test_decode_full, 1, 0xAC02);
|
|
51
|
-
|
|
52
|
-
to_update_buffer("\xFD\xFE");
|
|
53
|
-
test_buffer(test_decode_chunks, 1, 0x8A70);
|
|
54
|
-
test_buffer(test_decode_full, 1, 0x8A70);
|
|
55
|
-
|
|
56
|
-
to_update_buffer("\xFE\xFE");
|
|
57
|
-
test_buffer(test_decode_chunks, 1, rp_cp);
|
|
58
|
-
test_buffer(test_decode_full, 1, rp_cp);
|
|
59
|
-
|
|
60
|
-
to_update_buffer("\xFD\xFE\xFD\xFE");
|
|
61
|
-
test_buffer(test_decode_chunks, 2, 0x8A70, 0x8A70);
|
|
62
|
-
test_buffer(test_decode_full, 2, 0x8A70, 0x8A70);
|
|
63
|
-
}
|
|
64
|
-
TEST_END
|
|
65
|
-
|
|
66
|
-
/* Broken encoding. Prepend to stream test. */
|
|
67
|
-
TEST_BEGIN(decode_prepend)
|
|
68
|
-
{
|
|
69
|
-
lxb_char_t *buf, *end;
|
|
70
|
-
const lxb_encoding_data_t *enc_data;
|
|
71
|
-
|
|
72
|
-
size_t size;
|
|
73
|
-
lxb_codepoint_t rp_cp = LXB_ENCODING_REPLACEMENT_CODEPOINT;
|
|
74
|
-
lxb_codepoint_t cps_buffer[1024];
|
|
75
|
-
|
|
76
|
-
enc_data = lxb_encoding_data(LXB_ENCODING_EUC_KR);
|
|
77
|
-
test_ne(enc_data, NULL);
|
|
78
|
-
|
|
79
|
-
to_update_buffer("\xFE\x41");
|
|
80
|
-
test_buffer(test_decode_chunks, 2, rp_cp, 0x41);
|
|
81
|
-
test_buffer(test_decode_full, 2, rp_cp, 0x41);
|
|
82
|
-
|
|
83
|
-
to_update_buffer("\xFE\xFE");
|
|
84
|
-
test_buffer(test_decode_chunks, 1, rp_cp);
|
|
85
|
-
test_buffer(test_decode_full, 1, rp_cp);
|
|
86
|
-
}
|
|
87
|
-
TEST_END
|
|
88
|
-
|
|
89
|
-
TEST_BEGIN(decode_map)
|
|
90
|
-
{
|
|
91
|
-
size_t line;
|
|
92
|
-
lxb_status_t status;
|
|
93
|
-
const lxb_encoding_data_t *enc_data;
|
|
94
|
-
|
|
95
|
-
enc_data = lxb_encoding_data(LXB_ENCODING_EUC_KR);
|
|
96
|
-
|
|
97
|
-
status = test_encoding_process_file(lxb_filepath_test, test_decode_process_file,
|
|
98
|
-
(void *) enc_data, &line);
|
|
99
|
-
if (status != LXB_STATUS_OK) {
|
|
100
|
-
failed_and_exit(line);
|
|
101
|
-
}
|
|
102
|
-
}
|
|
103
|
-
TEST_END
|
|
104
|
-
|
|
105
|
-
TEST_BEGIN(encode_map)
|
|
106
|
-
{
|
|
107
|
-
size_t line;
|
|
108
|
-
lxb_status_t status;
|
|
109
|
-
const lxb_encoding_data_t *enc_data;
|
|
110
|
-
|
|
111
|
-
enc_data = lxb_encoding_data(LXB_ENCODING_EUC_KR);
|
|
112
|
-
|
|
113
|
-
status = test_encoding_process_file(lxb_filepath_test, test_encode_process_file,
|
|
114
|
-
(void *) enc_data, &line);
|
|
115
|
-
if (status != LXB_STATUS_OK) {
|
|
116
|
-
failed_and_exit(line);
|
|
117
|
-
}
|
|
118
|
-
}
|
|
119
|
-
TEST_END
|
|
120
|
-
|
|
121
|
-
TEST_BEGIN(encode_buffer_check)
|
|
122
|
-
{
|
|
123
|
-
int8_t size;
|
|
124
|
-
lxb_char_t ch1, ch2[2];
|
|
125
|
-
lxb_char_t *ref;
|
|
126
|
-
lxb_encoding_encode_t ctx = {0};
|
|
127
|
-
const lxb_encoding_data_t *enc_data;
|
|
128
|
-
|
|
129
|
-
enc_data = lxb_encoding_data(LXB_ENCODING_EUC_KR);
|
|
130
|
-
|
|
131
|
-
/* 2 */
|
|
132
|
-
ref = &ch1;
|
|
133
|
-
size = enc_data->encode_single(&ctx, &ref, ref + 1, 0x8A70);
|
|
134
|
-
test_eq(size, LXB_ENCODING_ENCODE_SMALL_BUFFER);
|
|
135
|
-
|
|
136
|
-
ref = ch2;
|
|
137
|
-
size = enc_data->encode_single(&ctx, &ref, ref + 2, 0x8A70);
|
|
138
|
-
test_eq(size, 2);
|
|
139
|
-
}
|
|
140
|
-
TEST_END
|
|
141
|
-
|
|
142
|
-
int
|
|
143
|
-
main(int argc, const char * argv[])
|
|
144
|
-
{
|
|
145
|
-
if (argc != 2) {
|
|
146
|
-
printf("Usage:\n\teuc_kr <filepath>\n");
|
|
147
|
-
return EXIT_FAILURE;
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
lxb_filepath_test = argv[1];
|
|
151
|
-
|
|
152
|
-
TEST_INIT();
|
|
153
|
-
|
|
154
|
-
TEST_ADD(decode);
|
|
155
|
-
TEST_ADD(decode_prepend);
|
|
156
|
-
TEST_ADD(decode_map);
|
|
157
|
-
TEST_ADD(encode_map);
|
|
158
|
-
TEST_ADD(encode_buffer_check);
|
|
159
|
-
|
|
160
|
-
TEST_RUN("lexbor/encoding/euc_kr");
|
|
161
|
-
TEST_RELEASE();
|
|
162
|
-
}
|
|
@@ -1,277 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Copyright (C) 2019 Alexander Borisov
|
|
3
|
-
*
|
|
4
|
-
* Author: Alexander Borisov <borisov@lexbor.com>
|
|
5
|
-
*/
|
|
6
|
-
|
|
7
|
-
#include <unit/test.h>
|
|
8
|
-
|
|
9
|
-
#include "encoding.h"
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
static const char *lxb_filepath_test;
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
TEST_BEGIN(decode)
|
|
16
|
-
{
|
|
17
|
-
lxb_char_t *buf, *end;
|
|
18
|
-
const lxb_encoding_data_t *enc_data;
|
|
19
|
-
|
|
20
|
-
size_t size;
|
|
21
|
-
lxb_codepoint_t rp_cp = LXB_ENCODING_REPLACEMENT_CODEPOINT;
|
|
22
|
-
lxb_codepoint_t cps_buffer[1024];
|
|
23
|
-
|
|
24
|
-
enc_data = lxb_encoding_data(LXB_ENCODING_GB18030);
|
|
25
|
-
test_ne(enc_data, NULL);
|
|
26
|
-
|
|
27
|
-
/* UTF-8: \x58; Unicode: \x58\x00; Code point: 88 */
|
|
28
|
-
to_update_buffer("\x58");
|
|
29
|
-
test_buffer(test_decode_chunks, 1, 88);
|
|
30
|
-
test_buffer(test_decode_full, 1, 88);
|
|
31
|
-
|
|
32
|
-
/* UTF-8: \xE2\x82\xAC; Unicode: \x20\xAC; Code point: 8364 */
|
|
33
|
-
to_update_buffer("\x80");
|
|
34
|
-
test_buffer(test_decode_chunks, 1, 8364);
|
|
35
|
-
test_buffer(test_decode_full, 1, 8364);
|
|
36
|
-
|
|
37
|
-
/* LXB_ENCODING_DECODE_ERROR */
|
|
38
|
-
to_update_buffer("\xFF");
|
|
39
|
-
test_buffer(test_decode_chunks, 1, rp_cp);
|
|
40
|
-
test_buffer(test_decode_full, 1, rp_cp);
|
|
41
|
-
|
|
42
|
-
/* UTF-8: \xE4\xBA\x8A; Unicode: \x4E\x8A; Code point: 20106 */
|
|
43
|
-
to_update_buffer("\x81\x7E");
|
|
44
|
-
test_buffer(test_decode_chunks, 1, 20106);
|
|
45
|
-
test_buffer(test_decode_full, 1, 20106);
|
|
46
|
-
|
|
47
|
-
/* UTF-8: \xE4\xBA\xB8; Unicode: \x4E\xB8; Code point: 20152 */
|
|
48
|
-
to_update_buffer("\x81\x8F");
|
|
49
|
-
test_buffer(test_decode_chunks, 1, 20152);
|
|
50
|
-
test_buffer(test_decode_full, 1, 20152);
|
|
51
|
-
|
|
52
|
-
/* LXB_ENCODING_DECODE_ERROR */
|
|
53
|
-
to_update_buffer("\x81\x29");
|
|
54
|
-
test_buffer(test_decode_chunks, 2, rp_cp, 41);
|
|
55
|
-
test_buffer(test_decode_full, 2, rp_cp, 41);
|
|
56
|
-
|
|
57
|
-
/* UTF-8: \xE4\xB8\x82; Unicode: \x4E\x02; Code point: 19970 */
|
|
58
|
-
to_update_buffer("\x81\x40");
|
|
59
|
-
test_buffer(test_decode_chunks, 1, 19970);
|
|
60
|
-
test_buffer(test_decode_full, 1, 19970);
|
|
61
|
-
|
|
62
|
-
/* LXB_ENCODING_DECODE_ERROR */
|
|
63
|
-
to_update_buffer("\x81\x30\x80");
|
|
64
|
-
test_buffer(test_decode_chunks, 3, rp_cp, 0x30, 0x20AC);
|
|
65
|
-
test_buffer(test_decode_full, 3, rp_cp, 0x30, 0x20AC);
|
|
66
|
-
|
|
67
|
-
/* LXB_ENCODING_DECODE_ERROR */
|
|
68
|
-
to_update_buffer("\x81\x30\x81\x29");
|
|
69
|
-
test_buffer(test_decode_chunks, 4, rp_cp, 0x30, rp_cp, 0x29);
|
|
70
|
-
test_buffer(test_decode_full, 4, rp_cp, 0x30, rp_cp, 0x29);
|
|
71
|
-
|
|
72
|
-
/* LXB_ENCODING_DECODE_ERROR */
|
|
73
|
-
to_update_buffer("\x81\xFF");
|
|
74
|
-
test_buffer(test_decode_chunks, 1, rp_cp);
|
|
75
|
-
test_buffer(test_decode_full, 1, rp_cp);
|
|
76
|
-
|
|
77
|
-
/* UTF-8: \xE2\x8F\xAD; Unicode: \x23\xED; Code point: 9197 */
|
|
78
|
-
to_update_buffer("\x81\x37\x81\x31");
|
|
79
|
-
test_buffer(test_decode_chunks, 1, 9197);
|
|
80
|
-
test_buffer(test_decode_full, 1, 9197);
|
|
81
|
-
|
|
82
|
-
/* UTF-8: \xC2\x80; Unicode: \x80; Code point: 128 */
|
|
83
|
-
to_update_buffer("\x81\x30\x81\x30");
|
|
84
|
-
test_buffer(test_decode_chunks, 1, 128);
|
|
85
|
-
test_buffer(test_decode_full, 1, 128);
|
|
86
|
-
|
|
87
|
-
/* UTF-8: \xC2\x80; Unicode: \x80; Code point: 128 */
|
|
88
|
-
to_update_buffer("\x81\x30\x81\x30");
|
|
89
|
-
test_buffer(test_decode_chunks, 1, 128);
|
|
90
|
-
test_buffer(test_decode_full, 1, 128);
|
|
91
|
-
|
|
92
|
-
/* UTF-8: \xE2\xBA\x9B; Unicode: \x2E\x9B; Code point: 11931 */
|
|
93
|
-
to_update_buffer("\x81\x39\x81\x39");
|
|
94
|
-
test_buffer(test_decode_chunks, 1, 11931);
|
|
95
|
-
test_buffer(test_decode_full, 1, 11931);
|
|
96
|
-
|
|
97
|
-
/* UTF-8: \xE2\xBA\x9B; Unicode: \x2E\x9B; Code point: 11931 */
|
|
98
|
-
to_update_buffer("\x81\x39\x81\x39");
|
|
99
|
-
test_buffer(test_decode_chunks, 1, 11931);
|
|
100
|
-
test_buffer(test_decode_full, 1, 11931);
|
|
101
|
-
|
|
102
|
-
/* LXB_ENCODING_DECODE_ERROR */
|
|
103
|
-
to_update_buffer("\xFE\x30\x81\x30");
|
|
104
|
-
test_buffer(test_decode_chunks, 1, rp_cp);
|
|
105
|
-
test_buffer(test_decode_full, 1, rp_cp);
|
|
106
|
-
|
|
107
|
-
/* LXB_ENCODING_DECODE_ERROR */
|
|
108
|
-
to_update_buffer("\xFE\x30\xFE\x30");
|
|
109
|
-
test_buffer(test_decode_chunks, 1, rp_cp);
|
|
110
|
-
test_buffer(test_decode_full, 1, rp_cp);
|
|
111
|
-
|
|
112
|
-
/* LXB_ENCODING_DECODE_ERROR */
|
|
113
|
-
to_update_buffer("\xFE\x39\xFE\x39");
|
|
114
|
-
test_buffer(test_decode_chunks, 1, rp_cp);
|
|
115
|
-
test_buffer(test_decode_full, 1, rp_cp);
|
|
116
|
-
|
|
117
|
-
/* UTF-8: \xD8\x80; Unicode: \x06\x00; Code point: 1536 */
|
|
118
|
-
to_update_buffer("\x81\x30\xFE\x30");
|
|
119
|
-
test_buffer(test_decode_chunks, 1, 1536);
|
|
120
|
-
test_buffer(test_decode_full, 1, 1536);
|
|
121
|
-
|
|
122
|
-
/* UTF-8: \xE3\x92\xA2; Unicode: \x34\xA2; Code point: 13474 */
|
|
123
|
-
to_update_buffer("\x81\x39\xFE\x39");
|
|
124
|
-
test_buffer(test_decode_chunks, 1, 13474);
|
|
125
|
-
test_buffer(test_decode_full, 1, 13474);
|
|
126
|
-
|
|
127
|
-
to_update_buffer("\x81\x39\xFE\x39\x81\x39\xFE\x39");
|
|
128
|
-
test_buffer(test_decode_chunks, 2, 13474, 13474);
|
|
129
|
-
test_buffer(test_decode_full, 2, 13474, 13474);
|
|
130
|
-
}
|
|
131
|
-
TEST_END
|
|
132
|
-
|
|
133
|
-
/* Broken encoding. Prepend to stream test. */
|
|
134
|
-
TEST_BEGIN(decode_prepend)
|
|
135
|
-
{
|
|
136
|
-
lxb_char_t *buf, *end;
|
|
137
|
-
const lxb_encoding_data_t *enc_data;
|
|
138
|
-
|
|
139
|
-
size_t size;
|
|
140
|
-
lxb_codepoint_t rp_cp = LXB_ENCODING_REPLACEMENT_CODEPOINT;
|
|
141
|
-
lxb_codepoint_t cps_buffer[1024];
|
|
142
|
-
|
|
143
|
-
enc_data = lxb_encoding_data(LXB_ENCODING_GB18030);
|
|
144
|
-
test_ne(enc_data, NULL);
|
|
145
|
-
|
|
146
|
-
/* Fisrt */
|
|
147
|
-
to_update_buffer("\x81\x7F");
|
|
148
|
-
test_buffer(test_decode_chunks, 2, rp_cp, 0x7F);
|
|
149
|
-
test_buffer(test_decode_full, 2, rp_cp, 0x7F);
|
|
150
|
-
|
|
151
|
-
/* Second */
|
|
152
|
-
to_update_buffer("\x81\x30\x20");
|
|
153
|
-
test_buffer(test_decode_chunks, 3, rp_cp, 0x30, 0x20);
|
|
154
|
-
test_buffer(test_decode_full, 3, rp_cp, 0x30, 0x20);
|
|
155
|
-
|
|
156
|
-
to_update_buffer("\x81\x30\x80");
|
|
157
|
-
test_buffer(test_decode_chunks, 3, rp_cp, 0x30, 0x20AC);
|
|
158
|
-
test_buffer(test_decode_full, 3, rp_cp, 0x30, 0x20AC);
|
|
159
|
-
|
|
160
|
-
to_update_buffer("\x81\x30\xFF");
|
|
161
|
-
test_buffer(test_decode_chunks, 3, rp_cp, 0x30, rp_cp);
|
|
162
|
-
test_buffer(test_decode_full, 3, rp_cp, 0x30, rp_cp);
|
|
163
|
-
|
|
164
|
-
to_update_buffer("\x81\x30\xFF\x81\x37\x81\x31");
|
|
165
|
-
test_buffer(test_decode_chunks, 4, rp_cp, 0x30, rp_cp, 0x23ED);
|
|
166
|
-
test_buffer(test_decode_full, 4, rp_cp, 0x30, rp_cp, 0x23ED);
|
|
167
|
-
|
|
168
|
-
to_update_buffer("\x81\xFF\x81\x81\x37\x81\x31");
|
|
169
|
-
test_buffer(test_decode_chunks, 4, rp_cp, 0x4E96, 0x37, LXB_ENCODING_DECODE_CONTINUE);
|
|
170
|
-
test_buffer(test_decode_full, 4, rp_cp, 0x4E96, 0x37, LXB_ENCODING_DECODE_CONTINUE);
|
|
171
|
-
|
|
172
|
-
to_update_buffer("\x81\xFF\x81\x81\x37\x81\x31\x81\x31");
|
|
173
|
-
test_buffer(test_decode_chunks, 4, rp_cp, 0x4E96, 0x37, 0x060B);
|
|
174
|
-
test_buffer(test_decode_full, 4, rp_cp, 0x4E96, 0x37, 0x060B);
|
|
175
|
-
|
|
176
|
-
/* Third */
|
|
177
|
-
to_update_buffer("\x81\x30\x81\x81\x37\x81\x31");
|
|
178
|
-
test_buffer(test_decode_full, 5, rp_cp, 0x30, 0x4E96, 0x37, LXB_ENCODING_DECODE_CONTINUE);
|
|
179
|
-
test_buffer(test_decode_chunks, 5, rp_cp, 0x30, 0x4E96, 0x37, LXB_ENCODING_DECODE_CONTINUE);
|
|
180
|
-
|
|
181
|
-
to_update_buffer("\x81\x30\x81\x40");
|
|
182
|
-
test_buffer(test_decode_chunks, 3, rp_cp, 0x30, 0x4E02);
|
|
183
|
-
test_buffer(test_decode_full, 3, rp_cp, 0x30, 0x4E02);
|
|
184
|
-
}
|
|
185
|
-
TEST_END
|
|
186
|
-
|
|
187
|
-
TEST_BEGIN(decode_map)
|
|
188
|
-
{
|
|
189
|
-
size_t line;
|
|
190
|
-
lxb_status_t status;
|
|
191
|
-
const lxb_encoding_data_t *enc_data;
|
|
192
|
-
|
|
193
|
-
enc_data = lxb_encoding_data(LXB_ENCODING_GB18030);
|
|
194
|
-
|
|
195
|
-
status = test_encoding_process_file(lxb_filepath_test, test_decode_process_file,
|
|
196
|
-
(void *) enc_data, &line);
|
|
197
|
-
if (status != LXB_STATUS_OK) {
|
|
198
|
-
failed_and_exit(line);
|
|
199
|
-
}
|
|
200
|
-
}
|
|
201
|
-
TEST_END
|
|
202
|
-
|
|
203
|
-
TEST_BEGIN(encode_map)
|
|
204
|
-
{
|
|
205
|
-
size_t line;
|
|
206
|
-
lxb_status_t status;
|
|
207
|
-
const lxb_encoding_data_t *enc_data;
|
|
208
|
-
|
|
209
|
-
enc_data = lxb_encoding_data(LXB_ENCODING_GB18030);
|
|
210
|
-
|
|
211
|
-
status = test_encoding_process_file(lxb_filepath_test, test_encode_process_file,
|
|
212
|
-
(void *) enc_data, &line);
|
|
213
|
-
if (status != LXB_STATUS_OK) {
|
|
214
|
-
failed_and_exit(line);
|
|
215
|
-
}
|
|
216
|
-
}
|
|
217
|
-
TEST_END
|
|
218
|
-
|
|
219
|
-
TEST_BEGIN(encode_buffer_check)
|
|
220
|
-
{
|
|
221
|
-
int8_t size;
|
|
222
|
-
lxb_char_t ch1, ch2[2], ch3[3], ch4[4];
|
|
223
|
-
lxb_char_t *ref;
|
|
224
|
-
lxb_encoding_encode_t ctx = {0};
|
|
225
|
-
const lxb_encoding_data_t *enc_data;
|
|
226
|
-
|
|
227
|
-
enc_data = lxb_encoding_data(LXB_ENCODING_GB18030);
|
|
228
|
-
|
|
229
|
-
/* 4 */
|
|
230
|
-
ref = &ch1;
|
|
231
|
-
size = enc_data->encode_single(&ctx, &ref, ref + 1, 0x022E);
|
|
232
|
-
test_eq(size, LXB_ENCODING_ENCODE_SMALL_BUFFER);
|
|
233
|
-
|
|
234
|
-
ref = ch2;
|
|
235
|
-
size = enc_data->encode_single(&ctx, &ref, ref + 2, 0x022E);
|
|
236
|
-
test_eq(size, LXB_ENCODING_ENCODE_SMALL_BUFFER);
|
|
237
|
-
|
|
238
|
-
ref = ch3;
|
|
239
|
-
size = enc_data->encode_single(&ctx, &ref, ref + 3, 0x022E);
|
|
240
|
-
test_eq(size, LXB_ENCODING_ENCODE_SMALL_BUFFER);
|
|
241
|
-
|
|
242
|
-
ref = ch4;
|
|
243
|
-
size = enc_data->encode_single(&ctx, &ref, ref + 4, 0x022E);
|
|
244
|
-
test_eq(size, 4);
|
|
245
|
-
|
|
246
|
-
/* 2 */
|
|
247
|
-
ref = &ch1;
|
|
248
|
-
size = enc_data->encode_single(&ctx, &ref, ref + 1, 0x5ABE);
|
|
249
|
-
test_eq(size, LXB_ENCODING_ENCODE_SMALL_BUFFER);
|
|
250
|
-
|
|
251
|
-
ref = ch2;
|
|
252
|
-
size = enc_data->encode_single(&ctx, &ref, ref + 2, 0x5ABE);
|
|
253
|
-
test_eq(size, 2);
|
|
254
|
-
}
|
|
255
|
-
TEST_END
|
|
256
|
-
|
|
257
|
-
int
|
|
258
|
-
main(int argc, const char * argv[])
|
|
259
|
-
{
|
|
260
|
-
if (argc != 2) {
|
|
261
|
-
printf("Usage:\n\tgb18030 <filepath>\n");
|
|
262
|
-
return EXIT_FAILURE;
|
|
263
|
-
}
|
|
264
|
-
|
|
265
|
-
lxb_filepath_test = argv[1];
|
|
266
|
-
|
|
267
|
-
TEST_INIT();
|
|
268
|
-
|
|
269
|
-
TEST_ADD(decode);
|
|
270
|
-
TEST_ADD(decode_prepend);
|
|
271
|
-
TEST_ADD(decode_map);
|
|
272
|
-
TEST_ADD(encode_map);
|
|
273
|
-
TEST_ADD(encode_buffer_check);
|
|
274
|
-
|
|
275
|
-
TEST_RUN("lexbor/encoding/gb18030");
|
|
276
|
-
TEST_RELEASE();
|
|
277
|
-
}
|