makiri 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/conformance.yml +22 -0
- data/.github/workflows/libfuzzer.yml +83 -0
- data/.github/workflows/release.yml +12 -7
- data/.github/workflows/security.yml +88 -3
- data/.github/workflows/valgrind.yml +135 -0
- data/CHANGELOG.md +152 -15
- data/README.md +183 -13
- data/Rakefile +294 -7
- data/ext/makiri/bridge/bridge.h +28 -0
- data/ext/makiri/bridge/ruby_string.c +282 -12
- data/ext/makiri/core/mkr_alloc.c +40 -3
- data/ext/makiri/core/mkr_alloc.h +28 -5
- data/ext/makiri/core/mkr_buf.c +47 -3
- data/ext/makiri/core/mkr_buf.h +112 -3
- data/ext/makiri/core/mkr_core.c +143 -0
- data/ext/makiri/core/mkr_core.h +11 -2
- data/ext/makiri/core/mkr_hash.h +1 -1
- data/ext/makiri/core/mkr_span.h +186 -0
- data/ext/makiri/core/mkr_text.h +8 -8
- data/ext/makiri/core/mkr_utf8.c +101 -0
- data/ext/makiri/core/mkr_utf8.h +88 -0
- data/ext/makiri/extconf.rb +123 -10
- data/ext/makiri/fuzz/Makefile +95 -0
- data/ext/makiri/fuzz/check_fuzzer.cc +4 -0
- data/ext/makiri/fuzz/xml_fuzz.c +24 -0
- data/ext/makiri/fuzz/xpath_fuzz.c +109 -0
- data/ext/makiri/glue/glue.h +55 -11
- data/ext/makiri/glue/ruby_doc.c +129 -59
- data/ext/makiri/glue/ruby_html_css.c +292 -0
- data/ext/makiri/glue/{ruby_mutate.c → ruby_html_mutate.c} +248 -52
- data/ext/makiri/glue/ruby_html_node.c +859 -0
- data/ext/makiri/glue/ruby_html_serialize.c +154 -0
- data/ext/makiri/glue/ruby_node.c +74 -729
- data/ext/makiri/glue/ruby_node_set.c +167 -32
- data/ext/makiri/glue/ruby_xml.c +602 -0
- data/ext/makiri/glue/ruby_xml_node.c +1373 -0
- data/ext/makiri/glue/ruby_xpath.c +63 -30
- data/ext/makiri/glue/ruby_xpath.h +19 -0
- data/ext/makiri/lexbor_compat/compat.h +42 -9
- data/ext/makiri/lexbor_compat/compat_internal.h +1 -1
- data/ext/makiri/lexbor_compat/dom_index.c +2 -2
- data/ext/makiri/lexbor_compat/post_parse.c +100 -10
- data/ext/makiri/lexbor_compat/source_loc.c +15 -13
- data/ext/makiri/lexbor_compat/text_index.c +14 -8
- data/ext/makiri/lexbor_compat/utf8_input.c +19 -33
- data/ext/makiri/makiri.c +184 -6
- data/ext/makiri/makiri.h +43 -2
- data/ext/makiri/xml/mkr_xml.h +125 -0
- data/ext/makiri/xml/mkr_xml_chars.c +195 -0
- data/ext/makiri/xml/mkr_xml_index.c +169 -0
- data/ext/makiri/xml/mkr_xml_index.h +48 -0
- data/ext/makiri/xml/mkr_xml_mutate.c +817 -0
- data/ext/makiri/xml/mkr_xml_mutate.h +139 -0
- data/ext/makiri/xml/mkr_xml_node.c +399 -0
- data/ext/makiri/xml/mkr_xml_node.h +184 -0
- data/ext/makiri/xml/mkr_xml_tree.c +1515 -0
- data/ext/makiri/xpath/mkr_css.c +1023 -0
- data/ext/makiri/xpath/mkr_css.h +65 -0
- data/ext/makiri/xpath/mkr_xpath.c +96 -32
- data/ext/makiri/xpath/mkr_xpath.h +109 -4
- data/ext/makiri/xpath/mkr_xpath_engine_html.c +17 -0
- data/ext/makiri/xpath/mkr_xpath_engine_xml.c +12 -0
- data/ext/makiri/xpath/{mkr_xpath_eval.c → mkr_xpath_eval_body.h} +551 -241
- data/ext/makiri/xpath/{mkr_xpath_funcs.c → mkr_xpath_funcs_body.h} +318 -276
- data/ext/makiri/xpath/mkr_xpath_internal.h +177 -206
- data/ext/makiri/xpath/mkr_xpath_lex.c +95 -125
- data/ext/makiri/xpath/mkr_xpath_node_access_html.h +138 -0
- data/ext/makiri/xpath/mkr_xpath_node_access_xml.h +145 -0
- data/ext/makiri/xpath/mkr_xpath_number.c +109 -0
- data/ext/makiri/xpath/mkr_xpath_parse.c +83 -94
- data/ext/makiri/xpath/mkr_xpath_prelude_html.h +30 -0
- data/ext/makiri/xpath/mkr_xpath_prelude_xml.h +28 -0
- data/ext/makiri/xpath/mkr_xpath_shared.c +609 -0
- data/ext/makiri/xpath/mkr_xpath_value_body.h +801 -0
- data/ext/makiri/xpath/mkr_xpath_xml_selftest.c +76 -0
- data/lib/makiri/{attribute.rb → attr.rb} +7 -3
- data/lib/makiri/cdata_section.rb +19 -0
- data/lib/makiri/comment.rb +10 -0
- data/lib/makiri/compat_aliases.rb +30 -0
- data/lib/makiri/document.rb +9 -73
- data/lib/makiri/document_fragment.rb +14 -9
- data/lib/makiri/element.rb +4 -4
- data/lib/makiri/html/document.rb +106 -0
- data/lib/makiri/html/node_methods.rb +19 -0
- data/lib/makiri/html.rb +12 -0
- data/lib/makiri/node.rb +58 -15
- data/lib/makiri/node_set.rb +8 -0
- data/lib/makiri/processing_instruction.rb +10 -0
- data/lib/makiri/text.rb +1 -1
- data/lib/makiri/version.rb +1 -1
- data/lib/makiri/xml/builder.rb +263 -0
- data/lib/makiri/xml/document.rb +24 -0
- data/lib/makiri/xml/node_methods.rb +84 -0
- data/lib/makiri/xml.rb +10 -0
- data/lib/makiri/xpath_context.rb +1 -1
- data/lib/makiri.rb +24 -5
- data/script/build_native_gem.rb +2 -2
- data/script/check_alloc_failures.rb +266 -0
- data/script/check_c_safety.rb +77 -2
- data/script/check_c_safety_allowlist.yml +102 -0
- data/script/check_leaks.rb +64 -0
- data/script/leaks_harness.rb +64 -0
- data/vendor/lexbor/CMakeLists.txt +6 -0
- data/vendor/lexbor/README.md +12 -0
- data/vendor/lexbor/config.cmake +1 -1
- data/vendor/lexbor/source/lexbor/core/base.h +1 -1
- data/vendor/lexbor/source/lexbor/core/config.cmake +9 -1
- data/vendor/lexbor/source/lexbor/css/selectors/pseudo_state.c +2 -3
- data/vendor/lexbor/source/lexbor/css/selectors/state.c +3 -0
- data/vendor/lexbor/source/lexbor/dom/interfaces/element.c +21 -0
- data/vendor/lexbor/source/lexbor/dom/interfaces/element.h +5 -0
- data/vendor/lexbor/source/lexbor/encoding/decode.c +33 -4
- data/vendor/lexbor/source/lexbor/html/base.h +1 -1
- data/vendor/lexbor/source/lexbor/html/interfaces/select_element.c +4 -0
- data/vendor/lexbor/source/lexbor/html/serialize.c +545 -41
- data/vendor/lexbor/source/lexbor/html/serialize.h +2 -1
- data/vendor/lexbor/source/lexbor/html/tokenizer.h +2 -2
- data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_body.c +1 -1
- data/vendor/lexbor/source/lexbor/html/tree.c +6 -6
- data/vendor/lexbor/source/lexbor/selectors/selectors.c +12 -3
- data/vendor/lexbor/source/lexbor/url/base.h +1 -1
- data/vendor/lexbor/source/lexbor/url/url.c +5 -2
- data/vendor/lexbor/source/lexbor/url/url.h +9 -0
- data/vendor/lexbor/version +1 -1
- metadata +53 -9
- data/ext/makiri/glue/ruby_css.c +0 -185
- data/ext/makiri/glue/ruby_serialize.c +0 -92
- data/ext/makiri/xpath/mkr_xpath_value.c +0 -1286
- data/lib/makiri/cdata.rb +0 -6
|
@@ -42,6 +42,13 @@ typedef struct {
|
|
|
42
42
|
}
|
|
43
43
|
lxb_html_serialize_ctx_t;
|
|
44
44
|
|
|
45
|
+
typedef struct {
|
|
46
|
+
lxb_dom_attr_t *attr;
|
|
47
|
+
size_t offset;
|
|
48
|
+
size_t length;
|
|
49
|
+
}
|
|
50
|
+
lxb_html_serialize_attr_entry_t;
|
|
51
|
+
|
|
45
52
|
|
|
46
53
|
static lxb_status_t
|
|
47
54
|
lxb_html_serialize_str_callback(const lxb_char_t *data, size_t len, void *ctx);
|
|
@@ -77,6 +84,7 @@ lxb_html_serialize_document_type_cb(lxb_dom_document_type_t *doctype,
|
|
|
77
84
|
|
|
78
85
|
static lxb_status_t
|
|
79
86
|
lxb_html_serialize_document_type_full_cb(lxb_dom_document_type_t *doctype,
|
|
87
|
+
lxb_html_serialize_opt_t opt,
|
|
80
88
|
lxb_html_serialize_cb_f cb, void *ctx);
|
|
81
89
|
|
|
82
90
|
static lxb_status_t
|
|
@@ -94,7 +102,7 @@ lxb_html_serialize_send_escaping_string(const lxb_char_t *data, size_t len,
|
|
|
94
102
|
lxb_html_serialize_cb_f cb, void *ctx);
|
|
95
103
|
|
|
96
104
|
static lxb_status_t
|
|
97
|
-
lxb_html_serialize_attribute_cb(lxb_dom_attr_t *attr,
|
|
105
|
+
lxb_html_serialize_attribute_cb(lxb_dom_attr_t *attr,
|
|
98
106
|
lxb_html_serialize_cb_f cb, void *ctx);
|
|
99
107
|
|
|
100
108
|
static lxb_status_t
|
|
@@ -107,6 +115,34 @@ lxb_html_serialize_pretty_element_cb(lxb_dom_element_t *element,
|
|
|
107
115
|
lxb_html_serialize_opt_t opt, size_t indent,
|
|
108
116
|
lxb_html_serialize_cb_f cb, void *ctx);
|
|
109
117
|
|
|
118
|
+
static lxb_status_t
|
|
119
|
+
lxb_html_serialize_pretty_attribute_cb(lxb_dom_attr_t *attr,
|
|
120
|
+
lxb_html_serialize_opt_t opt, bool has_raw,
|
|
121
|
+
lxb_html_serialize_cb_f cb, void *ctx);
|
|
122
|
+
|
|
123
|
+
static lxb_status_t
|
|
124
|
+
lxb_html_serialize_pretty_attributes_sorted(lxb_dom_element_t *element,
|
|
125
|
+
lxb_html_serialize_opt_t opt,
|
|
126
|
+
size_t indent,
|
|
127
|
+
lxb_html_serialize_cb_f cb,
|
|
128
|
+
void *ctx);
|
|
129
|
+
|
|
130
|
+
static size_t
|
|
131
|
+
lxb_html_serialize_attr_name_size(const lxb_dom_attr_t *attr);
|
|
132
|
+
|
|
133
|
+
static size_t
|
|
134
|
+
lxb_html_serialize_attr_name_build(const lxb_dom_attr_t *attr,
|
|
135
|
+
lxb_char_t *buf, size_t cap);
|
|
136
|
+
|
|
137
|
+
static int
|
|
138
|
+
lxb_html_serialize_attr_entry_cmp(const lxb_html_serialize_attr_entry_t *a,
|
|
139
|
+
const lxb_html_serialize_attr_entry_t *b,
|
|
140
|
+
const lxb_char_t *names);
|
|
141
|
+
|
|
142
|
+
static void
|
|
143
|
+
lxb_html_serialize_attr_sort(lxb_html_serialize_attr_entry_t *entries,
|
|
144
|
+
size_t n, const lxb_char_t *names);
|
|
145
|
+
|
|
110
146
|
static lxb_status_t
|
|
111
147
|
lxb_html_serialize_pretty_text_cb(lxb_dom_text_t *text,
|
|
112
148
|
lxb_html_serialize_opt_t opt, size_t indent,
|
|
@@ -354,7 +390,7 @@ lxb_html_serialize_element_cb(lxb_dom_element_t *element,
|
|
|
354
390
|
while (attr != NULL) {
|
|
355
391
|
lxb_html_serialize_send(" ", 1, ctx);
|
|
356
392
|
|
|
357
|
-
status = lxb_html_serialize_attribute_cb(attr,
|
|
393
|
+
status = lxb_html_serialize_attribute_cb(attr, cb, ctx);
|
|
358
394
|
if (status != LXB_STATUS_OK) {
|
|
359
395
|
return status;
|
|
360
396
|
}
|
|
@@ -483,8 +519,10 @@ lxb_html_serialize_document_type_cb(lxb_dom_document_type_t *doctype,
|
|
|
483
519
|
|
|
484
520
|
static lxb_status_t
|
|
485
521
|
lxb_html_serialize_document_type_full_cb(lxb_dom_document_type_t *doctype,
|
|
522
|
+
lxb_html_serialize_opt_t opt,
|
|
486
523
|
lxb_html_serialize_cb_f cb, void *ctx)
|
|
487
524
|
{
|
|
525
|
+
bool have_pub, have_sys;
|
|
488
526
|
size_t length;
|
|
489
527
|
const lxb_char_t *name;
|
|
490
528
|
lxb_status_t status;
|
|
@@ -498,27 +536,54 @@ lxb_html_serialize_document_type_full_cb(lxb_dom_document_type_t *doctype,
|
|
|
498
536
|
lxb_html_serialize_send(name, length, ctx);
|
|
499
537
|
}
|
|
500
538
|
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
lxb_html_serialize_send("\"", 1, ctx);
|
|
539
|
+
have_pub = doctype->public_id.length != 0;
|
|
540
|
+
have_sys = doctype->system_id.length != 0;
|
|
504
541
|
|
|
505
|
-
|
|
506
|
-
|
|
542
|
+
if (opt & LXB_HTML_SERIALIZE_OPT_HTML5TEST) {
|
|
543
|
+
/*
|
|
544
|
+
* html5lib-tests format: when either PUBLIC or SYSTEM identifier
|
|
545
|
+
* is present, emit both slots. A missing identifier is shown as "".
|
|
546
|
+
*/
|
|
547
|
+
if (have_pub || have_sys) {
|
|
548
|
+
lxb_html_serialize_send(" \"", 2, ctx);
|
|
549
|
+
|
|
550
|
+
if (have_pub) {
|
|
551
|
+
lxb_html_serialize_send(doctype->public_id.data,
|
|
552
|
+
doctype->public_id.length, ctx);
|
|
553
|
+
}
|
|
507
554
|
|
|
508
|
-
|
|
555
|
+
lxb_html_serialize_send("\" \"", 3, ctx);
|
|
556
|
+
|
|
557
|
+
if (have_sys) {
|
|
558
|
+
lxb_html_serialize_send(doctype->system_id.data,
|
|
559
|
+
doctype->system_id.length, ctx);
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
lxb_html_serialize_send("\"", 1, ctx);
|
|
563
|
+
}
|
|
509
564
|
}
|
|
565
|
+
else {
|
|
566
|
+
if (have_pub) {
|
|
567
|
+
lxb_html_serialize_send(" PUBLIC \"", 9, ctx);
|
|
568
|
+
|
|
569
|
+
lxb_html_serialize_send(doctype->public_id.data,
|
|
570
|
+
doctype->public_id.length, ctx);
|
|
510
571
|
|
|
511
|
-
|
|
512
|
-
if (doctype->public_id.length == 0) {
|
|
513
|
-
lxb_html_serialize_send(" SYSTEM", 7, ctx);
|
|
572
|
+
lxb_html_serialize_send("\"", 1, ctx);
|
|
514
573
|
}
|
|
515
574
|
|
|
516
|
-
|
|
575
|
+
if (have_sys) {
|
|
576
|
+
if (!have_pub) {
|
|
577
|
+
lxb_html_serialize_send(" SYSTEM", 7, ctx);
|
|
578
|
+
}
|
|
517
579
|
|
|
518
|
-
|
|
519
|
-
doctype->system_id.length, ctx);
|
|
580
|
+
lxb_html_serialize_send(" \"", 2, ctx);
|
|
520
581
|
|
|
521
|
-
|
|
582
|
+
lxb_html_serialize_send(doctype->system_id.data,
|
|
583
|
+
doctype->system_id.length, ctx);
|
|
584
|
+
|
|
585
|
+
lxb_html_serialize_send("\"", 1, ctx);
|
|
586
|
+
}
|
|
522
587
|
}
|
|
523
588
|
|
|
524
589
|
lxb_html_serialize_send(">", 1, ctx);
|
|
@@ -727,7 +792,7 @@ lxb_html_serialize_send_escaping_string(const lxb_char_t *data, size_t len,
|
|
|
727
792
|
}
|
|
728
793
|
|
|
729
794
|
static lxb_status_t
|
|
730
|
-
lxb_html_serialize_attribute_cb(lxb_dom_attr_t *attr,
|
|
795
|
+
lxb_html_serialize_attribute_cb(lxb_dom_attr_t *attr,
|
|
731
796
|
lxb_html_serialize_cb_f cb, void *ctx)
|
|
732
797
|
{
|
|
733
798
|
size_t length;
|
|
@@ -796,16 +861,11 @@ value:
|
|
|
796
861
|
|
|
797
862
|
lxb_html_serialize_send("=\"", 2, ctx);
|
|
798
863
|
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
status
|
|
804
|
-
attr->value->length,
|
|
805
|
-
cb, ctx);
|
|
806
|
-
if (status != LXB_STATUS_OK) {
|
|
807
|
-
return status;
|
|
808
|
-
}
|
|
864
|
+
status = lxb_html_serialize_send_escaping_attribute_string(attr->value->data,
|
|
865
|
+
attr->value->length,
|
|
866
|
+
cb, ctx);
|
|
867
|
+
if (status != LXB_STATUS_OK) {
|
|
868
|
+
return status;
|
|
809
869
|
}
|
|
810
870
|
|
|
811
871
|
lxb_html_serialize_send("\"", 1, ctx);
|
|
@@ -820,6 +880,14 @@ lxb_html_serialize_pretty_cb(lxb_dom_node_t *node,
|
|
|
820
880
|
{
|
|
821
881
|
lxb_status_t status;
|
|
822
882
|
|
|
883
|
+
if (opt & LXB_HTML_SERIALIZE_OPT_HTML5TEST) {
|
|
884
|
+
opt |= LXB_HTML_SERIALIZE_OPT_WITHOUT_CLOSING
|
|
885
|
+
| LXB_HTML_SERIALIZE_OPT_TAG_WITH_NS
|
|
886
|
+
| LXB_HTML_SERIALIZE_OPT_WITHOUT_TEXT_INDENT
|
|
887
|
+
| LXB_HTML_SERIALIZE_OPT_FULL_DOCTYPE
|
|
888
|
+
| LXB_HTML_SERIALIZE_OPT_RAW;
|
|
889
|
+
}
|
|
890
|
+
|
|
823
891
|
switch (node->type) {
|
|
824
892
|
case LXB_DOM_NODE_TYPE_ELEMENT:
|
|
825
893
|
lxb_html_serialize_send_indent(indent, ctx);
|
|
@@ -861,7 +929,7 @@ lxb_html_serialize_pretty_cb(lxb_dom_node_t *node,
|
|
|
861
929
|
|
|
862
930
|
if (opt & LXB_HTML_SERIALIZE_OPT_FULL_DOCTYPE) {
|
|
863
931
|
status = lxb_html_serialize_document_type_full_cb(lxb_dom_interface_document_type(node),
|
|
864
|
-
|
|
932
|
+
opt, cb, ctx);
|
|
865
933
|
}
|
|
866
934
|
else {
|
|
867
935
|
status = lxb_html_serialize_document_type_cb(lxb_dom_interface_document_type(node),
|
|
@@ -920,6 +988,14 @@ lxb_html_serialize_pretty_deep_cb(lxb_dom_node_t *node,
|
|
|
920
988
|
{
|
|
921
989
|
lxb_status_t status;
|
|
922
990
|
|
|
991
|
+
if (opt & LXB_HTML_SERIALIZE_OPT_HTML5TEST) {
|
|
992
|
+
opt |= LXB_HTML_SERIALIZE_OPT_WITHOUT_CLOSING
|
|
993
|
+
| LXB_HTML_SERIALIZE_OPT_TAG_WITH_NS
|
|
994
|
+
| LXB_HTML_SERIALIZE_OPT_WITHOUT_TEXT_INDENT
|
|
995
|
+
| LXB_HTML_SERIALIZE_OPT_FULL_DOCTYPE
|
|
996
|
+
| LXB_HTML_SERIALIZE_OPT_RAW;
|
|
997
|
+
}
|
|
998
|
+
|
|
923
999
|
node = node->first_child;
|
|
924
1000
|
|
|
925
1001
|
while (node != NULL) {
|
|
@@ -977,12 +1053,20 @@ lxb_html_serialize_pretty_node_cb(lxb_dom_node_t *node,
|
|
|
977
1053
|
|
|
978
1054
|
temp = lxb_html_interface_template(node);
|
|
979
1055
|
|
|
1056
|
+
if (opt & LXB_HTML_SERIALIZE_OPT_HTML5TEST) {
|
|
1057
|
+
lxb_html_serialize_send_indent((deep + 1), ctx);
|
|
1058
|
+
lxb_html_serialize_send("content", 7, ctx);
|
|
1059
|
+
lxb_html_serialize_send("\n", 1, ctx);
|
|
1060
|
+
}
|
|
1061
|
+
|
|
980
1062
|
if (temp->content != NULL) {
|
|
981
1063
|
if (temp->content->node.first_child != NULL)
|
|
982
1064
|
{
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
1065
|
+
if ((opt & LXB_HTML_SERIALIZE_OPT_HTML5TEST) == 0) {
|
|
1066
|
+
lxb_html_serialize_send_indent((deep + 1), ctx);
|
|
1067
|
+
lxb_html_serialize_send("#document-fragment", 18, ctx);
|
|
1068
|
+
lxb_html_serialize_send("\n", 1, ctx);
|
|
1069
|
+
}
|
|
986
1070
|
|
|
987
1071
|
status = lxb_html_serialize_pretty_deep_cb(&temp->content->node,
|
|
988
1072
|
opt, (deep + 2),
|
|
@@ -1088,7 +1172,13 @@ lxb_html_serialize_pretty_element_cb(lxb_dom_element_t *element,
|
|
|
1088
1172
|
if (data != NULL) {
|
|
1089
1173
|
lxb_html_serialize_send(lexbor_hash_entry_str(&data->entry),
|
|
1090
1174
|
data->entry.length, ctx);
|
|
1091
|
-
|
|
1175
|
+
|
|
1176
|
+
if (opt & LXB_HTML_SERIALIZE_OPT_HTML5TEST) {
|
|
1177
|
+
lxb_html_serialize_send(" ", 1, ctx);
|
|
1178
|
+
}
|
|
1179
|
+
else {
|
|
1180
|
+
lxb_html_serialize_send(":", 1, ctx);
|
|
1181
|
+
}
|
|
1092
1182
|
}
|
|
1093
1183
|
}
|
|
1094
1184
|
|
|
@@ -1117,26 +1207,430 @@ lxb_html_serialize_pretty_element_cb(lxb_dom_element_t *element,
|
|
|
1117
1207
|
}
|
|
1118
1208
|
}
|
|
1119
1209
|
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
while (attr != NULL) {
|
|
1123
|
-
lxb_html_serialize_send(" ", 1, ctx);
|
|
1210
|
+
if (opt & LXB_HTML_SERIALIZE_OPT_HTML5TEST) {
|
|
1211
|
+
lxb_html_serialize_send(">", 1, ctx);
|
|
1124
1212
|
|
|
1125
|
-
status =
|
|
1126
|
-
|
|
1127
|
-
cb, ctx);
|
|
1213
|
+
status = lxb_html_serialize_pretty_attributes_sorted(element, opt,
|
|
1214
|
+
indent, cb, ctx);
|
|
1128
1215
|
if (status != LXB_STATUS_OK) {
|
|
1129
1216
|
return status;
|
|
1130
1217
|
}
|
|
1218
|
+
}
|
|
1219
|
+
else {
|
|
1220
|
+
attr = element->first_attr;
|
|
1131
1221
|
|
|
1132
|
-
attr
|
|
1222
|
+
while (attr != NULL) {
|
|
1223
|
+
lxb_html_serialize_send(" ", 1, ctx);
|
|
1224
|
+
|
|
1225
|
+
status = lxb_html_serialize_pretty_attribute_cb(attr, opt,
|
|
1226
|
+
(opt & LXB_HTML_SERIALIZE_OPT_RAW),
|
|
1227
|
+
cb, ctx);
|
|
1228
|
+
if (status != LXB_STATUS_OK) {
|
|
1229
|
+
return status;
|
|
1230
|
+
}
|
|
1231
|
+
|
|
1232
|
+
attr = attr->next;
|
|
1233
|
+
}
|
|
1234
|
+
|
|
1235
|
+
lxb_html_serialize_send(">", 1, ctx);
|
|
1133
1236
|
}
|
|
1134
1237
|
|
|
1135
|
-
|
|
1238
|
+
return LXB_STATUS_OK;
|
|
1239
|
+
}
|
|
1240
|
+
|
|
1241
|
+
static lxb_status_t
|
|
1242
|
+
lxb_html_serialize_pretty_attribute_cb(lxb_dom_attr_t *attr,
|
|
1243
|
+
lxb_html_serialize_opt_t opt, bool has_raw,
|
|
1244
|
+
lxb_html_serialize_cb_f cb, void *ctx)
|
|
1245
|
+
{
|
|
1246
|
+
size_t length;
|
|
1247
|
+
lxb_status_t status;
|
|
1248
|
+
const lxb_char_t *str;
|
|
1249
|
+
const lxb_dom_attr_data_t *data;
|
|
1250
|
+
lxb_char_t spliter;
|
|
1251
|
+
|
|
1252
|
+
if (opt & LXB_HTML_SERIALIZE_OPT_HTML5TEST) {
|
|
1253
|
+
spliter = ' ';
|
|
1254
|
+
}
|
|
1255
|
+
else {
|
|
1256
|
+
spliter = ':';
|
|
1257
|
+
}
|
|
1258
|
+
|
|
1259
|
+
data = lxb_dom_attr_data_by_id(attr->node.owner_document->attrs,
|
|
1260
|
+
attr->node.local_name);
|
|
1261
|
+
if (data == NULL) {
|
|
1262
|
+
return LXB_STATUS_ERROR;
|
|
1263
|
+
}
|
|
1264
|
+
|
|
1265
|
+
if (attr->node.ns == LXB_NS__UNDEF) {
|
|
1266
|
+
lxb_html_serialize_send(lexbor_hash_entry_str(&data->entry),
|
|
1267
|
+
data->entry.length, ctx);
|
|
1268
|
+
goto value;
|
|
1269
|
+
}
|
|
1270
|
+
|
|
1271
|
+
if (attr->node.ns == LXB_NS_XML) {
|
|
1272
|
+
lxb_html_serialize_send((const lxb_char_t *) "xml", 3, ctx);
|
|
1273
|
+
lxb_html_serialize_send(&spliter, 1, ctx);
|
|
1274
|
+
lxb_html_serialize_send(lexbor_hash_entry_str(&data->entry),
|
|
1275
|
+
data->entry.length, ctx);
|
|
1276
|
+
|
|
1277
|
+
goto value;
|
|
1278
|
+
}
|
|
1279
|
+
|
|
1280
|
+
if (attr->node.ns == LXB_NS_XMLNS)
|
|
1281
|
+
{
|
|
1282
|
+
if (data->entry.length == 5
|
|
1283
|
+
&& lexbor_str_data_cmp(lexbor_hash_entry_str(&data->entry),
|
|
1284
|
+
(const lxb_char_t *) "xmlns"))
|
|
1285
|
+
{
|
|
1286
|
+
lxb_html_serialize_send((const lxb_char_t *) "xmlns", 5, ctx);
|
|
1287
|
+
}
|
|
1288
|
+
else {
|
|
1289
|
+
lxb_html_serialize_send((const lxb_char_t *) "xmlns", 5, ctx);
|
|
1290
|
+
lxb_html_serialize_send(&spliter, 1, ctx);
|
|
1291
|
+
lxb_html_serialize_send(lexbor_hash_entry_str(&data->entry),
|
|
1292
|
+
data->entry.length, ctx);
|
|
1293
|
+
}
|
|
1294
|
+
|
|
1295
|
+
goto value;
|
|
1296
|
+
}
|
|
1297
|
+
|
|
1298
|
+
if (attr->node.ns == LXB_NS_XLINK) {
|
|
1299
|
+
lxb_html_serialize_send((const lxb_char_t *) "xlink", 5, ctx);
|
|
1300
|
+
lxb_html_serialize_send(&spliter, 1, ctx);
|
|
1301
|
+
lxb_html_serialize_send(lexbor_hash_entry_str(&data->entry),
|
|
1302
|
+
data->entry.length, ctx);
|
|
1303
|
+
|
|
1304
|
+
goto value;
|
|
1305
|
+
}
|
|
1306
|
+
|
|
1307
|
+
str = lxb_dom_attr_qualified_name(attr, &length);
|
|
1308
|
+
if (str == NULL) {
|
|
1309
|
+
return LXB_STATUS_ERROR;
|
|
1310
|
+
}
|
|
1311
|
+
|
|
1312
|
+
lxb_html_serialize_send(str, length, ctx);
|
|
1313
|
+
|
|
1314
|
+
value:
|
|
1315
|
+
|
|
1316
|
+
if (attr->value == NULL) {
|
|
1317
|
+
lxb_html_serialize_send("=\"\"", 3, ctx);
|
|
1318
|
+
return LXB_STATUS_OK;
|
|
1319
|
+
}
|
|
1320
|
+
|
|
1321
|
+
lxb_html_serialize_send("=\"", 2, ctx);
|
|
1322
|
+
|
|
1323
|
+
if (has_raw) {
|
|
1324
|
+
lxb_html_serialize_send(attr->value->data, attr->value->length, ctx);
|
|
1325
|
+
}
|
|
1326
|
+
else {
|
|
1327
|
+
status = lxb_html_serialize_send_escaping_attribute_string(attr->value->data,
|
|
1328
|
+
attr->value->length,
|
|
1329
|
+
cb, ctx);
|
|
1330
|
+
if (status != LXB_STATUS_OK) {
|
|
1331
|
+
return status;
|
|
1332
|
+
}
|
|
1333
|
+
}
|
|
1334
|
+
|
|
1335
|
+
lxb_html_serialize_send("\"", 1, ctx);
|
|
1136
1336
|
|
|
1137
1337
|
return LXB_STATUS_OK;
|
|
1138
1338
|
}
|
|
1139
1339
|
|
|
1340
|
+
static size_t
|
|
1341
|
+
lxb_html_serialize_attr_name_build(const lxb_dom_attr_t *attr,
|
|
1342
|
+
lxb_char_t *buf, size_t cap)
|
|
1343
|
+
{
|
|
1344
|
+
size_t length, xmlns_len, pos;
|
|
1345
|
+
const lxb_char_t *str;
|
|
1346
|
+
const lexbor_str_t *ns;
|
|
1347
|
+
const lxb_dom_attr_data_t *data;
|
|
1348
|
+
|
|
1349
|
+
static const lexbor_str_t str_xml = lexbor_str("xml ");
|
|
1350
|
+
static const lexbor_str_t str_xmlns = lexbor_str("xmlns ");
|
|
1351
|
+
static const lexbor_str_t str_xlink = lexbor_str("xlink ");
|
|
1352
|
+
|
|
1353
|
+
data = lxb_dom_attr_data_by_id(attr->node.owner_document->attrs,
|
|
1354
|
+
attr->node.local_name);
|
|
1355
|
+
if (data == NULL) {
|
|
1356
|
+
return 0;
|
|
1357
|
+
}
|
|
1358
|
+
|
|
1359
|
+
pos = 0;
|
|
1360
|
+
str = lexbor_hash_entry_str(&data->entry);
|
|
1361
|
+
length = data->entry.length;
|
|
1362
|
+
|
|
1363
|
+
switch (attr->node.ns) {
|
|
1364
|
+
case LXB_NS_XML:
|
|
1365
|
+
if (str_xml.length + length > cap) {
|
|
1366
|
+
return 0;
|
|
1367
|
+
}
|
|
1368
|
+
|
|
1369
|
+
ns = &str_xml;
|
|
1370
|
+
goto done;
|
|
1371
|
+
|
|
1372
|
+
case LXB_NS_XMLNS:
|
|
1373
|
+
xmlns_len = str_xmlns.length - 1;
|
|
1374
|
+
|
|
1375
|
+
if (length == xmlns_len
|
|
1376
|
+
&& lexbor_str_data_ncmp(str, str_xmlns.data, xmlns_len))
|
|
1377
|
+
{
|
|
1378
|
+
if (xmlns_len > cap) {
|
|
1379
|
+
return 0;
|
|
1380
|
+
}
|
|
1381
|
+
|
|
1382
|
+
memcpy(buf, str_xmlns.data, xmlns_len);
|
|
1383
|
+
return xmlns_len;
|
|
1384
|
+
}
|
|
1385
|
+
|
|
1386
|
+
if (str_xmlns.length + length > cap) {
|
|
1387
|
+
return 0;
|
|
1388
|
+
}
|
|
1389
|
+
|
|
1390
|
+
ns = &str_xmlns;
|
|
1391
|
+
goto done;
|
|
1392
|
+
|
|
1393
|
+
case LXB_NS_XLINK:
|
|
1394
|
+
if (str_xlink.length + length > cap) {
|
|
1395
|
+
return 0;
|
|
1396
|
+
}
|
|
1397
|
+
|
|
1398
|
+
ns = &str_xlink;
|
|
1399
|
+
goto done;
|
|
1400
|
+
|
|
1401
|
+
case LXB_NS__UNDEF:
|
|
1402
|
+
if (length > cap) {
|
|
1403
|
+
return 0;
|
|
1404
|
+
}
|
|
1405
|
+
|
|
1406
|
+
memcpy(buf, str, length);
|
|
1407
|
+
return length;
|
|
1408
|
+
|
|
1409
|
+
default:
|
|
1410
|
+
if (attr->qualified_name != 0) {
|
|
1411
|
+
data = lxb_dom_attr_data_by_id(attr->node.owner_document->attrs,
|
|
1412
|
+
attr->qualified_name);
|
|
1413
|
+
if (data == NULL) {
|
|
1414
|
+
return 0;
|
|
1415
|
+
}
|
|
1416
|
+
|
|
1417
|
+
str = lexbor_hash_entry_str(&data->entry);
|
|
1418
|
+
length = data->entry.length;
|
|
1419
|
+
}
|
|
1420
|
+
|
|
1421
|
+
if (length > cap) {
|
|
1422
|
+
return 0;
|
|
1423
|
+
}
|
|
1424
|
+
|
|
1425
|
+
memcpy(buf, str, length);
|
|
1426
|
+
return length;
|
|
1427
|
+
}
|
|
1428
|
+
|
|
1429
|
+
done:
|
|
1430
|
+
|
|
1431
|
+
memcpy(buf, ns->data, ns->length);
|
|
1432
|
+
pos = ns->length;
|
|
1433
|
+
|
|
1434
|
+
memcpy(buf + pos, str, length);
|
|
1435
|
+
pos += length;
|
|
1436
|
+
|
|
1437
|
+
return pos;
|
|
1438
|
+
}
|
|
1439
|
+
|
|
1440
|
+
static size_t
|
|
1441
|
+
lxb_html_serialize_attr_name_size(const lxb_dom_attr_t *attr)
|
|
1442
|
+
{
|
|
1443
|
+
size_t length;
|
|
1444
|
+
const lxb_dom_attr_data_t *data;
|
|
1445
|
+
|
|
1446
|
+
static const lexbor_str_t str_xml = lexbor_str("xml ");
|
|
1447
|
+
static const lexbor_str_t str_xmlns = lexbor_str("xmlns ");
|
|
1448
|
+
static const lexbor_str_t str_xlink = lexbor_str("xlink ");
|
|
1449
|
+
|
|
1450
|
+
data = lxb_dom_attr_data_by_id(attr->node.owner_document->attrs,
|
|
1451
|
+
attr->node.local_name);
|
|
1452
|
+
if (data == NULL) {
|
|
1453
|
+
return 0;
|
|
1454
|
+
}
|
|
1455
|
+
|
|
1456
|
+
length = data->entry.length;
|
|
1457
|
+
|
|
1458
|
+
switch (attr->node.ns) {
|
|
1459
|
+
case LXB_NS_XML:
|
|
1460
|
+
return str_xml.length + length;
|
|
1461
|
+
|
|
1462
|
+
case LXB_NS_XMLNS:
|
|
1463
|
+
if (length == str_xmlns.length - 1
|
|
1464
|
+
&& lexbor_str_data_ncmp(lexbor_hash_entry_str(&data->entry),
|
|
1465
|
+
str_xmlns.data, str_xmlns.length - 1))
|
|
1466
|
+
{
|
|
1467
|
+
return str_xmlns.length - 1;
|
|
1468
|
+
}
|
|
1469
|
+
|
|
1470
|
+
return str_xmlns.length + length;
|
|
1471
|
+
|
|
1472
|
+
case LXB_NS_XLINK:
|
|
1473
|
+
return str_xlink.length + length;
|
|
1474
|
+
|
|
1475
|
+
case LXB_NS__UNDEF:
|
|
1476
|
+
return length;
|
|
1477
|
+
|
|
1478
|
+
default:
|
|
1479
|
+
if (attr->qualified_name != 0) {
|
|
1480
|
+
data = lxb_dom_attr_data_by_id(attr->node.owner_document->attrs,
|
|
1481
|
+
attr->qualified_name);
|
|
1482
|
+
if (data == NULL) {
|
|
1483
|
+
return 0;
|
|
1484
|
+
}
|
|
1485
|
+
|
|
1486
|
+
length = data->entry.length;
|
|
1487
|
+
}
|
|
1488
|
+
|
|
1489
|
+
return length;
|
|
1490
|
+
}
|
|
1491
|
+
}
|
|
1492
|
+
|
|
1493
|
+
static int
|
|
1494
|
+
lxb_html_serialize_attr_entry_cmp(const lxb_html_serialize_attr_entry_t *a,
|
|
1495
|
+
const lxb_html_serialize_attr_entry_t *b,
|
|
1496
|
+
const lxb_char_t *names)
|
|
1497
|
+
{
|
|
1498
|
+
int c;
|
|
1499
|
+
size_t min;
|
|
1500
|
+
|
|
1501
|
+
min = (a->length < b->length) ? a->length : b->length;
|
|
1502
|
+
|
|
1503
|
+
c = memcmp(names + a->offset, names + b->offset, min);
|
|
1504
|
+
if (c != 0) {
|
|
1505
|
+
return c;
|
|
1506
|
+
}
|
|
1507
|
+
|
|
1508
|
+
if (a->length < b->length) return -1;
|
|
1509
|
+
if (a->length > b->length) return 1;
|
|
1510
|
+
return 0;
|
|
1511
|
+
}
|
|
1512
|
+
|
|
1513
|
+
static void
|
|
1514
|
+
lxb_html_serialize_attr_sort(lxb_html_serialize_attr_entry_t *entries,
|
|
1515
|
+
size_t n, const lxb_char_t *names)
|
|
1516
|
+
{
|
|
1517
|
+
size_t i, j;
|
|
1518
|
+
lxb_html_serialize_attr_entry_t cur;
|
|
1519
|
+
|
|
1520
|
+
for (i = 1; i < n; i++) {
|
|
1521
|
+
cur = entries[i];
|
|
1522
|
+
j = i;
|
|
1523
|
+
|
|
1524
|
+
while (j > 0
|
|
1525
|
+
&& lxb_html_serialize_attr_entry_cmp(&entries[j - 1], &cur,
|
|
1526
|
+
names) > 0)
|
|
1527
|
+
{
|
|
1528
|
+
entries[j] = entries[j - 1];
|
|
1529
|
+
j--;
|
|
1530
|
+
}
|
|
1531
|
+
|
|
1532
|
+
entries[j] = cur;
|
|
1533
|
+
}
|
|
1534
|
+
}
|
|
1535
|
+
|
|
1536
|
+
static lxb_status_t
|
|
1537
|
+
lxb_html_serialize_pretty_attributes_sorted(lxb_dom_element_t *element,
|
|
1538
|
+
lxb_html_serialize_opt_t opt,
|
|
1539
|
+
size_t indent,
|
|
1540
|
+
lxb_html_serialize_cb_f cb,
|
|
1541
|
+
void *ctx)
|
|
1542
|
+
{
|
|
1543
|
+
size_t i, k, off, len, count, total;
|
|
1544
|
+
lxb_status_t status;
|
|
1545
|
+
lxb_dom_attr_t *attr;
|
|
1546
|
+
lxb_html_serialize_attr_entry_t *entries;
|
|
1547
|
+
lxb_char_t *names;
|
|
1548
|
+
lxb_char_t stack_names[256];
|
|
1549
|
+
lxb_html_serialize_attr_entry_t stack_entries[16];
|
|
1550
|
+
|
|
1551
|
+
count = 0;
|
|
1552
|
+
total = 0;
|
|
1553
|
+
entries = stack_entries;
|
|
1554
|
+
names = stack_names;
|
|
1555
|
+
|
|
1556
|
+
for (attr = element->first_attr; attr != NULL; attr = attr->next) {
|
|
1557
|
+
count += 1;
|
|
1558
|
+
total += lxb_html_serialize_attr_name_size(attr);
|
|
1559
|
+
}
|
|
1560
|
+
|
|
1561
|
+
if (count == 0) {
|
|
1562
|
+
return LXB_STATUS_OK;
|
|
1563
|
+
}
|
|
1564
|
+
|
|
1565
|
+
if (count > sizeof(stack_entries) / sizeof(stack_entries[0])) {
|
|
1566
|
+
entries = lexbor_malloc(count * sizeof(lxb_html_serialize_attr_entry_t));
|
|
1567
|
+
if (entries == NULL) {
|
|
1568
|
+
return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
|
|
1569
|
+
}
|
|
1570
|
+
}
|
|
1571
|
+
|
|
1572
|
+
if (total > sizeof(stack_names)) {
|
|
1573
|
+
names = lexbor_malloc(total);
|
|
1574
|
+
if (names == NULL) {
|
|
1575
|
+
if (entries != stack_entries) {
|
|
1576
|
+
lexbor_free(entries);
|
|
1577
|
+
}
|
|
1578
|
+
|
|
1579
|
+
return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
|
|
1580
|
+
}
|
|
1581
|
+
}
|
|
1582
|
+
|
|
1583
|
+
i = 0;
|
|
1584
|
+
off = 0;
|
|
1585
|
+
for (attr = element->first_attr; attr != NULL; attr = attr->next) {
|
|
1586
|
+
len = lxb_html_serialize_attr_name_build(attr, names + off,
|
|
1587
|
+
total - off);
|
|
1588
|
+
entries[i].attr = attr;
|
|
1589
|
+
entries[i].offset = off;
|
|
1590
|
+
entries[i].length = len;
|
|
1591
|
+
|
|
1592
|
+
off += len;
|
|
1593
|
+
i += 1;
|
|
1594
|
+
}
|
|
1595
|
+
|
|
1596
|
+
lxb_html_serialize_attr_sort(entries, count, names);
|
|
1597
|
+
|
|
1598
|
+
status = LXB_STATUS_OK;
|
|
1599
|
+
|
|
1600
|
+
for (i = 0; i < count; i++) {
|
|
1601
|
+
status = cb((const lxb_char_t *) "\n", 1, ctx);
|
|
1602
|
+
if (status != LXB_STATUS_OK) {
|
|
1603
|
+
goto done;
|
|
1604
|
+
}
|
|
1605
|
+
|
|
1606
|
+
for (k = 0; k < indent + 1; k++) {
|
|
1607
|
+
status = cb((const lxb_char_t *) " ", 2, ctx);
|
|
1608
|
+
if (status != LXB_STATUS_OK) {
|
|
1609
|
+
goto done;
|
|
1610
|
+
}
|
|
1611
|
+
}
|
|
1612
|
+
|
|
1613
|
+
status = lxb_html_serialize_pretty_attribute_cb(entries[i].attr, opt,
|
|
1614
|
+
(opt & LXB_HTML_SERIALIZE_OPT_RAW),
|
|
1615
|
+
cb, ctx);
|
|
1616
|
+
if (status != LXB_STATUS_OK) {
|
|
1617
|
+
goto done;
|
|
1618
|
+
}
|
|
1619
|
+
}
|
|
1620
|
+
|
|
1621
|
+
done:
|
|
1622
|
+
|
|
1623
|
+
if (names != stack_names) {
|
|
1624
|
+
lexbor_free(names);
|
|
1625
|
+
}
|
|
1626
|
+
|
|
1627
|
+
if (entries != stack_entries) {
|
|
1628
|
+
lexbor_free(entries);
|
|
1629
|
+
}
|
|
1630
|
+
|
|
1631
|
+
return status;
|
|
1632
|
+
}
|
|
1633
|
+
|
|
1140
1634
|
static lxb_status_t
|
|
1141
1635
|
lxb_html_serialize_pretty_text_cb(lxb_dom_text_t *text,
|
|
1142
1636
|
lxb_html_serialize_opt_t opt, size_t indent,
|
|
@@ -1163,7 +1657,9 @@ lxb_html_serialize_pretty_text_cb(lxb_dom_text_t *text,
|
|
|
1163
1657
|
pos++;
|
|
1164
1658
|
}
|
|
1165
1659
|
|
|
1166
|
-
|
|
1660
|
+
if (pos >= end) {
|
|
1661
|
+
return LXB_STATUS_OK;
|
|
1662
|
+
}
|
|
1167
1663
|
}
|
|
1168
1664
|
|
|
1169
1665
|
if (node->parent != NULL) {
|
|
@@ -1331,6 +1827,14 @@ lxb_html_serialize_pretty_tree_cb(lxb_dom_node_t *node,
|
|
|
1331
1827
|
lxb_html_serialize_opt_t opt, size_t indent,
|
|
1332
1828
|
lxb_html_serialize_cb_f cb, void *ctx)
|
|
1333
1829
|
{
|
|
1830
|
+
if (opt & LXB_HTML_SERIALIZE_OPT_HTML5TEST) {
|
|
1831
|
+
opt |= LXB_HTML_SERIALIZE_OPT_WITHOUT_CLOSING
|
|
1832
|
+
| LXB_HTML_SERIALIZE_OPT_TAG_WITH_NS
|
|
1833
|
+
| LXB_HTML_SERIALIZE_OPT_WITHOUT_TEXT_INDENT
|
|
1834
|
+
| LXB_HTML_SERIALIZE_OPT_FULL_DOCTYPE
|
|
1835
|
+
| LXB_HTML_SERIALIZE_OPT_RAW;
|
|
1836
|
+
}
|
|
1837
|
+
|
|
1334
1838
|
/* For a document we must serialize all children without document node. */
|
|
1335
1839
|
if (node->local_name == LXB_TAG__DOCUMENT) {
|
|
1336
1840
|
node = node->first_child;
|