makiri 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/conformance.yml +22 -0
  3. data/.github/workflows/libfuzzer.yml +83 -0
  4. data/.github/workflows/release.yml +12 -7
  5. data/.github/workflows/security.yml +88 -3
  6. data/.github/workflows/valgrind.yml +135 -0
  7. data/CHANGELOG.md +152 -15
  8. data/README.md +183 -13
  9. data/Rakefile +294 -7
  10. data/ext/makiri/bridge/bridge.h +28 -0
  11. data/ext/makiri/bridge/ruby_string.c +282 -12
  12. data/ext/makiri/core/mkr_alloc.c +40 -3
  13. data/ext/makiri/core/mkr_alloc.h +28 -5
  14. data/ext/makiri/core/mkr_buf.c +47 -3
  15. data/ext/makiri/core/mkr_buf.h +112 -3
  16. data/ext/makiri/core/mkr_core.c +143 -0
  17. data/ext/makiri/core/mkr_core.h +11 -2
  18. data/ext/makiri/core/mkr_hash.h +1 -1
  19. data/ext/makiri/core/mkr_span.h +186 -0
  20. data/ext/makiri/core/mkr_text.h +8 -8
  21. data/ext/makiri/core/mkr_utf8.c +101 -0
  22. data/ext/makiri/core/mkr_utf8.h +88 -0
  23. data/ext/makiri/extconf.rb +123 -10
  24. data/ext/makiri/fuzz/Makefile +95 -0
  25. data/ext/makiri/fuzz/check_fuzzer.cc +4 -0
  26. data/ext/makiri/fuzz/xml_fuzz.c +24 -0
  27. data/ext/makiri/fuzz/xpath_fuzz.c +109 -0
  28. data/ext/makiri/glue/glue.h +55 -11
  29. data/ext/makiri/glue/ruby_doc.c +129 -59
  30. data/ext/makiri/glue/ruby_html_css.c +292 -0
  31. data/ext/makiri/glue/{ruby_mutate.c → ruby_html_mutate.c} +248 -52
  32. data/ext/makiri/glue/ruby_html_node.c +859 -0
  33. data/ext/makiri/glue/ruby_html_serialize.c +154 -0
  34. data/ext/makiri/glue/ruby_node.c +74 -729
  35. data/ext/makiri/glue/ruby_node_set.c +167 -32
  36. data/ext/makiri/glue/ruby_xml.c +602 -0
  37. data/ext/makiri/glue/ruby_xml_node.c +1373 -0
  38. data/ext/makiri/glue/ruby_xpath.c +63 -30
  39. data/ext/makiri/glue/ruby_xpath.h +19 -0
  40. data/ext/makiri/lexbor_compat/compat.h +42 -9
  41. data/ext/makiri/lexbor_compat/compat_internal.h +1 -1
  42. data/ext/makiri/lexbor_compat/dom_index.c +2 -2
  43. data/ext/makiri/lexbor_compat/post_parse.c +100 -10
  44. data/ext/makiri/lexbor_compat/source_loc.c +15 -13
  45. data/ext/makiri/lexbor_compat/text_index.c +14 -8
  46. data/ext/makiri/lexbor_compat/utf8_input.c +19 -33
  47. data/ext/makiri/makiri.c +184 -6
  48. data/ext/makiri/makiri.h +43 -2
  49. data/ext/makiri/xml/mkr_xml.h +125 -0
  50. data/ext/makiri/xml/mkr_xml_chars.c +195 -0
  51. data/ext/makiri/xml/mkr_xml_index.c +169 -0
  52. data/ext/makiri/xml/mkr_xml_index.h +48 -0
  53. data/ext/makiri/xml/mkr_xml_mutate.c +817 -0
  54. data/ext/makiri/xml/mkr_xml_mutate.h +139 -0
  55. data/ext/makiri/xml/mkr_xml_node.c +399 -0
  56. data/ext/makiri/xml/mkr_xml_node.h +184 -0
  57. data/ext/makiri/xml/mkr_xml_tree.c +1515 -0
  58. data/ext/makiri/xpath/mkr_css.c +1023 -0
  59. data/ext/makiri/xpath/mkr_css.h +65 -0
  60. data/ext/makiri/xpath/mkr_xpath.c +96 -32
  61. data/ext/makiri/xpath/mkr_xpath.h +109 -4
  62. data/ext/makiri/xpath/mkr_xpath_engine_html.c +17 -0
  63. data/ext/makiri/xpath/mkr_xpath_engine_xml.c +12 -0
  64. data/ext/makiri/xpath/{mkr_xpath_eval.c → mkr_xpath_eval_body.h} +551 -241
  65. data/ext/makiri/xpath/{mkr_xpath_funcs.c → mkr_xpath_funcs_body.h} +318 -276
  66. data/ext/makiri/xpath/mkr_xpath_internal.h +177 -206
  67. data/ext/makiri/xpath/mkr_xpath_lex.c +95 -125
  68. data/ext/makiri/xpath/mkr_xpath_node_access_html.h +138 -0
  69. data/ext/makiri/xpath/mkr_xpath_node_access_xml.h +145 -0
  70. data/ext/makiri/xpath/mkr_xpath_number.c +109 -0
  71. data/ext/makiri/xpath/mkr_xpath_parse.c +83 -94
  72. data/ext/makiri/xpath/mkr_xpath_prelude_html.h +30 -0
  73. data/ext/makiri/xpath/mkr_xpath_prelude_xml.h +28 -0
  74. data/ext/makiri/xpath/mkr_xpath_shared.c +609 -0
  75. data/ext/makiri/xpath/mkr_xpath_value_body.h +801 -0
  76. data/ext/makiri/xpath/mkr_xpath_xml_selftest.c +76 -0
  77. data/lib/makiri/{attribute.rb → attr.rb} +7 -3
  78. data/lib/makiri/cdata_section.rb +19 -0
  79. data/lib/makiri/comment.rb +10 -0
  80. data/lib/makiri/compat_aliases.rb +30 -0
  81. data/lib/makiri/document.rb +9 -73
  82. data/lib/makiri/document_fragment.rb +14 -9
  83. data/lib/makiri/element.rb +4 -4
  84. data/lib/makiri/html/document.rb +106 -0
  85. data/lib/makiri/html/node_methods.rb +19 -0
  86. data/lib/makiri/html.rb +12 -0
  87. data/lib/makiri/node.rb +58 -15
  88. data/lib/makiri/node_set.rb +8 -0
  89. data/lib/makiri/processing_instruction.rb +10 -0
  90. data/lib/makiri/text.rb +1 -1
  91. data/lib/makiri/version.rb +1 -1
  92. data/lib/makiri/xml/builder.rb +263 -0
  93. data/lib/makiri/xml/document.rb +24 -0
  94. data/lib/makiri/xml/node_methods.rb +84 -0
  95. data/lib/makiri/xml.rb +10 -0
  96. data/lib/makiri/xpath_context.rb +1 -1
  97. data/lib/makiri.rb +24 -5
  98. data/script/build_native_gem.rb +2 -2
  99. data/script/check_alloc_failures.rb +266 -0
  100. data/script/check_c_safety.rb +77 -2
  101. data/script/check_c_safety_allowlist.yml +102 -0
  102. data/script/check_leaks.rb +64 -0
  103. data/script/leaks_harness.rb +64 -0
  104. data/vendor/lexbor/CMakeLists.txt +6 -0
  105. data/vendor/lexbor/README.md +12 -0
  106. data/vendor/lexbor/config.cmake +1 -1
  107. data/vendor/lexbor/source/lexbor/core/base.h +1 -1
  108. data/vendor/lexbor/source/lexbor/core/config.cmake +9 -1
  109. data/vendor/lexbor/source/lexbor/css/selectors/pseudo_state.c +2 -3
  110. data/vendor/lexbor/source/lexbor/css/selectors/state.c +3 -0
  111. data/vendor/lexbor/source/lexbor/dom/interfaces/element.c +21 -0
  112. data/vendor/lexbor/source/lexbor/dom/interfaces/element.h +5 -0
  113. data/vendor/lexbor/source/lexbor/encoding/decode.c +33 -4
  114. data/vendor/lexbor/source/lexbor/html/base.h +1 -1
  115. data/vendor/lexbor/source/lexbor/html/interfaces/select_element.c +4 -0
  116. data/vendor/lexbor/source/lexbor/html/serialize.c +545 -41
  117. data/vendor/lexbor/source/lexbor/html/serialize.h +2 -1
  118. data/vendor/lexbor/source/lexbor/html/tokenizer.h +2 -2
  119. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_body.c +1 -1
  120. data/vendor/lexbor/source/lexbor/html/tree.c +6 -6
  121. data/vendor/lexbor/source/lexbor/selectors/selectors.c +12 -3
  122. data/vendor/lexbor/source/lexbor/url/base.h +1 -1
  123. data/vendor/lexbor/source/lexbor/url/url.c +5 -2
  124. data/vendor/lexbor/source/lexbor/url/url.h +9 -0
  125. data/vendor/lexbor/version +1 -1
  126. metadata +53 -9
  127. data/ext/makiri/glue/ruby_css.c +0 -185
  128. data/ext/makiri/glue/ruby_serialize.c +0 -92
  129. data/ext/makiri/xpath/mkr_xpath_value.c +0 -1286
  130. data/lib/makiri/cdata.rb +0 -6
@@ -42,6 +42,13 @@ typedef struct {
42
42
  }
43
43
  lxb_html_serialize_ctx_t;
44
44
 
45
+ typedef struct {
46
+ lxb_dom_attr_t *attr;
47
+ size_t offset;
48
+ size_t length;
49
+ }
50
+ lxb_html_serialize_attr_entry_t;
51
+
45
52
 
46
53
  static lxb_status_t
47
54
  lxb_html_serialize_str_callback(const lxb_char_t *data, size_t len, void *ctx);
@@ -77,6 +84,7 @@ lxb_html_serialize_document_type_cb(lxb_dom_document_type_t *doctype,
77
84
 
78
85
  static lxb_status_t
79
86
  lxb_html_serialize_document_type_full_cb(lxb_dom_document_type_t *doctype,
87
+ lxb_html_serialize_opt_t opt,
80
88
  lxb_html_serialize_cb_f cb, void *ctx);
81
89
 
82
90
  static lxb_status_t
@@ -94,7 +102,7 @@ lxb_html_serialize_send_escaping_string(const lxb_char_t *data, size_t len,
94
102
  lxb_html_serialize_cb_f cb, void *ctx);
95
103
 
96
104
  static lxb_status_t
97
- lxb_html_serialize_attribute_cb(lxb_dom_attr_t *attr, bool has_raw,
105
+ lxb_html_serialize_attribute_cb(lxb_dom_attr_t *attr,
98
106
  lxb_html_serialize_cb_f cb, void *ctx);
99
107
 
100
108
  static lxb_status_t
@@ -107,6 +115,34 @@ lxb_html_serialize_pretty_element_cb(lxb_dom_element_t *element,
107
115
  lxb_html_serialize_opt_t opt, size_t indent,
108
116
  lxb_html_serialize_cb_f cb, void *ctx);
109
117
 
118
+ static lxb_status_t
119
+ lxb_html_serialize_pretty_attribute_cb(lxb_dom_attr_t *attr,
120
+ lxb_html_serialize_opt_t opt, bool has_raw,
121
+ lxb_html_serialize_cb_f cb, void *ctx);
122
+
123
+ static lxb_status_t
124
+ lxb_html_serialize_pretty_attributes_sorted(lxb_dom_element_t *element,
125
+ lxb_html_serialize_opt_t opt,
126
+ size_t indent,
127
+ lxb_html_serialize_cb_f cb,
128
+ void *ctx);
129
+
130
+ static size_t
131
+ lxb_html_serialize_attr_name_size(const lxb_dom_attr_t *attr);
132
+
133
+ static size_t
134
+ lxb_html_serialize_attr_name_build(const lxb_dom_attr_t *attr,
135
+ lxb_char_t *buf, size_t cap);
136
+
137
+ static int
138
+ lxb_html_serialize_attr_entry_cmp(const lxb_html_serialize_attr_entry_t *a,
139
+ const lxb_html_serialize_attr_entry_t *b,
140
+ const lxb_char_t *names);
141
+
142
+ static void
143
+ lxb_html_serialize_attr_sort(lxb_html_serialize_attr_entry_t *entries,
144
+ size_t n, const lxb_char_t *names);
145
+
110
146
  static lxb_status_t
111
147
  lxb_html_serialize_pretty_text_cb(lxb_dom_text_t *text,
112
148
  lxb_html_serialize_opt_t opt, size_t indent,
@@ -354,7 +390,7 @@ lxb_html_serialize_element_cb(lxb_dom_element_t *element,
354
390
  while (attr != NULL) {
355
391
  lxb_html_serialize_send(" ", 1, ctx);
356
392
 
357
- status = lxb_html_serialize_attribute_cb(attr, false, cb, ctx);
393
+ status = lxb_html_serialize_attribute_cb(attr, cb, ctx);
358
394
  if (status != LXB_STATUS_OK) {
359
395
  return status;
360
396
  }
@@ -483,8 +519,10 @@ lxb_html_serialize_document_type_cb(lxb_dom_document_type_t *doctype,
483
519
 
484
520
  static lxb_status_t
485
521
  lxb_html_serialize_document_type_full_cb(lxb_dom_document_type_t *doctype,
522
+ lxb_html_serialize_opt_t opt,
486
523
  lxb_html_serialize_cb_f cb, void *ctx)
487
524
  {
525
+ bool have_pub, have_sys;
488
526
  size_t length;
489
527
  const lxb_char_t *name;
490
528
  lxb_status_t status;
@@ -498,27 +536,54 @@ lxb_html_serialize_document_type_full_cb(lxb_dom_document_type_t *doctype,
498
536
  lxb_html_serialize_send(name, length, ctx);
499
537
  }
500
538
 
501
- if (doctype->public_id.data != NULL && doctype->public_id.length != 0) {
502
- lxb_html_serialize_send(" PUBLIC ", 8, ctx);
503
- lxb_html_serialize_send("\"", 1, ctx);
539
+ have_pub = doctype->public_id.length != 0;
540
+ have_sys = doctype->system_id.length != 0;
504
541
 
505
- lxb_html_serialize_send(doctype->public_id.data,
506
- doctype->public_id.length, ctx);
542
+ if (opt & LXB_HTML_SERIALIZE_OPT_HTML5TEST) {
543
+ /*
544
+ * html5lib-tests format: when either PUBLIC or SYSTEM identifier
545
+ * is present, emit both slots. A missing identifier is shown as "".
546
+ */
547
+ if (have_pub || have_sys) {
548
+ lxb_html_serialize_send(" \"", 2, ctx);
549
+
550
+ if (have_pub) {
551
+ lxb_html_serialize_send(doctype->public_id.data,
552
+ doctype->public_id.length, ctx);
553
+ }
507
554
 
508
- lxb_html_serialize_send("\"", 1, ctx);
555
+ lxb_html_serialize_send("\" \"", 3, ctx);
556
+
557
+ if (have_sys) {
558
+ lxb_html_serialize_send(doctype->system_id.data,
559
+ doctype->system_id.length, ctx);
560
+ }
561
+
562
+ lxb_html_serialize_send("\"", 1, ctx);
563
+ }
509
564
  }
565
+ else {
566
+ if (have_pub) {
567
+ lxb_html_serialize_send(" PUBLIC \"", 9, ctx);
568
+
569
+ lxb_html_serialize_send(doctype->public_id.data,
570
+ doctype->public_id.length, ctx);
510
571
 
511
- if (doctype->system_id.data != NULL && doctype->system_id.length != 0) {
512
- if (doctype->public_id.length == 0) {
513
- lxb_html_serialize_send(" SYSTEM", 7, ctx);
572
+ lxb_html_serialize_send("\"", 1, ctx);
514
573
  }
515
574
 
516
- lxb_html_serialize_send(" \"", 2, ctx);
575
+ if (have_sys) {
576
+ if (!have_pub) {
577
+ lxb_html_serialize_send(" SYSTEM", 7, ctx);
578
+ }
517
579
 
518
- lxb_html_serialize_send(doctype->system_id.data,
519
- doctype->system_id.length, ctx);
580
+ lxb_html_serialize_send(" \"", 2, ctx);
520
581
 
521
- lxb_html_serialize_send("\"", 1, ctx);
582
+ lxb_html_serialize_send(doctype->system_id.data,
583
+ doctype->system_id.length, ctx);
584
+
585
+ lxb_html_serialize_send("\"", 1, ctx);
586
+ }
522
587
  }
523
588
 
524
589
  lxb_html_serialize_send(">", 1, ctx);
@@ -727,7 +792,7 @@ lxb_html_serialize_send_escaping_string(const lxb_char_t *data, size_t len,
727
792
  }
728
793
 
729
794
  static lxb_status_t
730
- lxb_html_serialize_attribute_cb(lxb_dom_attr_t *attr, bool has_raw,
795
+ lxb_html_serialize_attribute_cb(lxb_dom_attr_t *attr,
731
796
  lxb_html_serialize_cb_f cb, void *ctx)
732
797
  {
733
798
  size_t length;
@@ -796,16 +861,11 @@ value:
796
861
 
797
862
  lxb_html_serialize_send("=\"", 2, ctx);
798
863
 
799
- if (has_raw) {
800
- lxb_html_serialize_send(attr->value->data, attr->value->length, ctx);
801
- }
802
- else {
803
- status = lxb_html_serialize_send_escaping_attribute_string(attr->value->data,
804
- attr->value->length,
805
- cb, ctx);
806
- if (status != LXB_STATUS_OK) {
807
- return status;
808
- }
864
+ status = lxb_html_serialize_send_escaping_attribute_string(attr->value->data,
865
+ attr->value->length,
866
+ cb, ctx);
867
+ if (status != LXB_STATUS_OK) {
868
+ return status;
809
869
  }
810
870
 
811
871
  lxb_html_serialize_send("\"", 1, ctx);
@@ -820,6 +880,14 @@ lxb_html_serialize_pretty_cb(lxb_dom_node_t *node,
820
880
  {
821
881
  lxb_status_t status;
822
882
 
883
+ if (opt & LXB_HTML_SERIALIZE_OPT_HTML5TEST) {
884
+ opt |= LXB_HTML_SERIALIZE_OPT_WITHOUT_CLOSING
885
+ | LXB_HTML_SERIALIZE_OPT_TAG_WITH_NS
886
+ | LXB_HTML_SERIALIZE_OPT_WITHOUT_TEXT_INDENT
887
+ | LXB_HTML_SERIALIZE_OPT_FULL_DOCTYPE
888
+ | LXB_HTML_SERIALIZE_OPT_RAW;
889
+ }
890
+
823
891
  switch (node->type) {
824
892
  case LXB_DOM_NODE_TYPE_ELEMENT:
825
893
  lxb_html_serialize_send_indent(indent, ctx);
@@ -861,7 +929,7 @@ lxb_html_serialize_pretty_cb(lxb_dom_node_t *node,
861
929
 
862
930
  if (opt & LXB_HTML_SERIALIZE_OPT_FULL_DOCTYPE) {
863
931
  status = lxb_html_serialize_document_type_full_cb(lxb_dom_interface_document_type(node),
864
- cb, ctx);
932
+ opt, cb, ctx);
865
933
  }
866
934
  else {
867
935
  status = lxb_html_serialize_document_type_cb(lxb_dom_interface_document_type(node),
@@ -920,6 +988,14 @@ lxb_html_serialize_pretty_deep_cb(lxb_dom_node_t *node,
920
988
  {
921
989
  lxb_status_t status;
922
990
 
991
+ if (opt & LXB_HTML_SERIALIZE_OPT_HTML5TEST) {
992
+ opt |= LXB_HTML_SERIALIZE_OPT_WITHOUT_CLOSING
993
+ | LXB_HTML_SERIALIZE_OPT_TAG_WITH_NS
994
+ | LXB_HTML_SERIALIZE_OPT_WITHOUT_TEXT_INDENT
995
+ | LXB_HTML_SERIALIZE_OPT_FULL_DOCTYPE
996
+ | LXB_HTML_SERIALIZE_OPT_RAW;
997
+ }
998
+
923
999
  node = node->first_child;
924
1000
 
925
1001
  while (node != NULL) {
@@ -977,12 +1053,20 @@ lxb_html_serialize_pretty_node_cb(lxb_dom_node_t *node,
977
1053
 
978
1054
  temp = lxb_html_interface_template(node);
979
1055
 
1056
+ if (opt & LXB_HTML_SERIALIZE_OPT_HTML5TEST) {
1057
+ lxb_html_serialize_send_indent((deep + 1), ctx);
1058
+ lxb_html_serialize_send("content", 7, ctx);
1059
+ lxb_html_serialize_send("\n", 1, ctx);
1060
+ }
1061
+
980
1062
  if (temp->content != NULL) {
981
1063
  if (temp->content->node.first_child != NULL)
982
1064
  {
983
- lxb_html_serialize_send_indent((deep + 1), ctx);
984
- lxb_html_serialize_send("#document-fragment", 18, ctx);
985
- lxb_html_serialize_send("\n", 1, ctx);
1065
+ if ((opt & LXB_HTML_SERIALIZE_OPT_HTML5TEST) == 0) {
1066
+ lxb_html_serialize_send_indent((deep + 1), ctx);
1067
+ lxb_html_serialize_send("#document-fragment", 18, ctx);
1068
+ lxb_html_serialize_send("\n", 1, ctx);
1069
+ }
986
1070
 
987
1071
  status = lxb_html_serialize_pretty_deep_cb(&temp->content->node,
988
1072
  opt, (deep + 2),
@@ -1088,7 +1172,13 @@ lxb_html_serialize_pretty_element_cb(lxb_dom_element_t *element,
1088
1172
  if (data != NULL) {
1089
1173
  lxb_html_serialize_send(lexbor_hash_entry_str(&data->entry),
1090
1174
  data->entry.length, ctx);
1091
- lxb_html_serialize_send(":", 1, ctx);
1175
+
1176
+ if (opt & LXB_HTML_SERIALIZE_OPT_HTML5TEST) {
1177
+ lxb_html_serialize_send(" ", 1, ctx);
1178
+ }
1179
+ else {
1180
+ lxb_html_serialize_send(":", 1, ctx);
1181
+ }
1092
1182
  }
1093
1183
  }
1094
1184
 
@@ -1117,26 +1207,430 @@ lxb_html_serialize_pretty_element_cb(lxb_dom_element_t *element,
1117
1207
  }
1118
1208
  }
1119
1209
 
1120
- attr = element->first_attr;
1121
-
1122
- while (attr != NULL) {
1123
- lxb_html_serialize_send(" ", 1, ctx);
1210
+ if (opt & LXB_HTML_SERIALIZE_OPT_HTML5TEST) {
1211
+ lxb_html_serialize_send(">", 1, ctx);
1124
1212
 
1125
- status = lxb_html_serialize_attribute_cb(attr,
1126
- (opt & LXB_HTML_SERIALIZE_OPT_RAW),
1127
- cb, ctx);
1213
+ status = lxb_html_serialize_pretty_attributes_sorted(element, opt,
1214
+ indent, cb, ctx);
1128
1215
  if (status != LXB_STATUS_OK) {
1129
1216
  return status;
1130
1217
  }
1218
+ }
1219
+ else {
1220
+ attr = element->first_attr;
1131
1221
 
1132
- attr = attr->next;
1222
+ while (attr != NULL) {
1223
+ lxb_html_serialize_send(" ", 1, ctx);
1224
+
1225
+ status = lxb_html_serialize_pretty_attribute_cb(attr, opt,
1226
+ (opt & LXB_HTML_SERIALIZE_OPT_RAW),
1227
+ cb, ctx);
1228
+ if (status != LXB_STATUS_OK) {
1229
+ return status;
1230
+ }
1231
+
1232
+ attr = attr->next;
1233
+ }
1234
+
1235
+ lxb_html_serialize_send(">", 1, ctx);
1133
1236
  }
1134
1237
 
1135
- lxb_html_serialize_send(">", 1, ctx);
1238
+ return LXB_STATUS_OK;
1239
+ }
1240
+
1241
+ static lxb_status_t
1242
+ lxb_html_serialize_pretty_attribute_cb(lxb_dom_attr_t *attr,
1243
+ lxb_html_serialize_opt_t opt, bool has_raw,
1244
+ lxb_html_serialize_cb_f cb, void *ctx)
1245
+ {
1246
+ size_t length;
1247
+ lxb_status_t status;
1248
+ const lxb_char_t *str;
1249
+ const lxb_dom_attr_data_t *data;
1250
+ lxb_char_t spliter;
1251
+
1252
+ if (opt & LXB_HTML_SERIALIZE_OPT_HTML5TEST) {
1253
+ spliter = ' ';
1254
+ }
1255
+ else {
1256
+ spliter = ':';
1257
+ }
1258
+
1259
+ data = lxb_dom_attr_data_by_id(attr->node.owner_document->attrs,
1260
+ attr->node.local_name);
1261
+ if (data == NULL) {
1262
+ return LXB_STATUS_ERROR;
1263
+ }
1264
+
1265
+ if (attr->node.ns == LXB_NS__UNDEF) {
1266
+ lxb_html_serialize_send(lexbor_hash_entry_str(&data->entry),
1267
+ data->entry.length, ctx);
1268
+ goto value;
1269
+ }
1270
+
1271
+ if (attr->node.ns == LXB_NS_XML) {
1272
+ lxb_html_serialize_send((const lxb_char_t *) "xml", 3, ctx);
1273
+ lxb_html_serialize_send(&spliter, 1, ctx);
1274
+ lxb_html_serialize_send(lexbor_hash_entry_str(&data->entry),
1275
+ data->entry.length, ctx);
1276
+
1277
+ goto value;
1278
+ }
1279
+
1280
+ if (attr->node.ns == LXB_NS_XMLNS)
1281
+ {
1282
+ if (data->entry.length == 5
1283
+ && lexbor_str_data_cmp(lexbor_hash_entry_str(&data->entry),
1284
+ (const lxb_char_t *) "xmlns"))
1285
+ {
1286
+ lxb_html_serialize_send((const lxb_char_t *) "xmlns", 5, ctx);
1287
+ }
1288
+ else {
1289
+ lxb_html_serialize_send((const lxb_char_t *) "xmlns", 5, ctx);
1290
+ lxb_html_serialize_send(&spliter, 1, ctx);
1291
+ lxb_html_serialize_send(lexbor_hash_entry_str(&data->entry),
1292
+ data->entry.length, ctx);
1293
+ }
1294
+
1295
+ goto value;
1296
+ }
1297
+
1298
+ if (attr->node.ns == LXB_NS_XLINK) {
1299
+ lxb_html_serialize_send((const lxb_char_t *) "xlink", 5, ctx);
1300
+ lxb_html_serialize_send(&spliter, 1, ctx);
1301
+ lxb_html_serialize_send(lexbor_hash_entry_str(&data->entry),
1302
+ data->entry.length, ctx);
1303
+
1304
+ goto value;
1305
+ }
1306
+
1307
+ str = lxb_dom_attr_qualified_name(attr, &length);
1308
+ if (str == NULL) {
1309
+ return LXB_STATUS_ERROR;
1310
+ }
1311
+
1312
+ lxb_html_serialize_send(str, length, ctx);
1313
+
1314
+ value:
1315
+
1316
+ if (attr->value == NULL) {
1317
+ lxb_html_serialize_send("=\"\"", 3, ctx);
1318
+ return LXB_STATUS_OK;
1319
+ }
1320
+
1321
+ lxb_html_serialize_send("=\"", 2, ctx);
1322
+
1323
+ if (has_raw) {
1324
+ lxb_html_serialize_send(attr->value->data, attr->value->length, ctx);
1325
+ }
1326
+ else {
1327
+ status = lxb_html_serialize_send_escaping_attribute_string(attr->value->data,
1328
+ attr->value->length,
1329
+ cb, ctx);
1330
+ if (status != LXB_STATUS_OK) {
1331
+ return status;
1332
+ }
1333
+ }
1334
+
1335
+ lxb_html_serialize_send("\"", 1, ctx);
1136
1336
 
1137
1337
  return LXB_STATUS_OK;
1138
1338
  }
1139
1339
 
1340
+ static size_t
1341
+ lxb_html_serialize_attr_name_build(const lxb_dom_attr_t *attr,
1342
+ lxb_char_t *buf, size_t cap)
1343
+ {
1344
+ size_t length, xmlns_len, pos;
1345
+ const lxb_char_t *str;
1346
+ const lexbor_str_t *ns;
1347
+ const lxb_dom_attr_data_t *data;
1348
+
1349
+ static const lexbor_str_t str_xml = lexbor_str("xml ");
1350
+ static const lexbor_str_t str_xmlns = lexbor_str("xmlns ");
1351
+ static const lexbor_str_t str_xlink = lexbor_str("xlink ");
1352
+
1353
+ data = lxb_dom_attr_data_by_id(attr->node.owner_document->attrs,
1354
+ attr->node.local_name);
1355
+ if (data == NULL) {
1356
+ return 0;
1357
+ }
1358
+
1359
+ pos = 0;
1360
+ str = lexbor_hash_entry_str(&data->entry);
1361
+ length = data->entry.length;
1362
+
1363
+ switch (attr->node.ns) {
1364
+ case LXB_NS_XML:
1365
+ if (str_xml.length + length > cap) {
1366
+ return 0;
1367
+ }
1368
+
1369
+ ns = &str_xml;
1370
+ goto done;
1371
+
1372
+ case LXB_NS_XMLNS:
1373
+ xmlns_len = str_xmlns.length - 1;
1374
+
1375
+ if (length == xmlns_len
1376
+ && lexbor_str_data_ncmp(str, str_xmlns.data, xmlns_len))
1377
+ {
1378
+ if (xmlns_len > cap) {
1379
+ return 0;
1380
+ }
1381
+
1382
+ memcpy(buf, str_xmlns.data, xmlns_len);
1383
+ return xmlns_len;
1384
+ }
1385
+
1386
+ if (str_xmlns.length + length > cap) {
1387
+ return 0;
1388
+ }
1389
+
1390
+ ns = &str_xmlns;
1391
+ goto done;
1392
+
1393
+ case LXB_NS_XLINK:
1394
+ if (str_xlink.length + length > cap) {
1395
+ return 0;
1396
+ }
1397
+
1398
+ ns = &str_xlink;
1399
+ goto done;
1400
+
1401
+ case LXB_NS__UNDEF:
1402
+ if (length > cap) {
1403
+ return 0;
1404
+ }
1405
+
1406
+ memcpy(buf, str, length);
1407
+ return length;
1408
+
1409
+ default:
1410
+ if (attr->qualified_name != 0) {
1411
+ data = lxb_dom_attr_data_by_id(attr->node.owner_document->attrs,
1412
+ attr->qualified_name);
1413
+ if (data == NULL) {
1414
+ return 0;
1415
+ }
1416
+
1417
+ str = lexbor_hash_entry_str(&data->entry);
1418
+ length = data->entry.length;
1419
+ }
1420
+
1421
+ if (length > cap) {
1422
+ return 0;
1423
+ }
1424
+
1425
+ memcpy(buf, str, length);
1426
+ return length;
1427
+ }
1428
+
1429
+ done:
1430
+
1431
+ memcpy(buf, ns->data, ns->length);
1432
+ pos = ns->length;
1433
+
1434
+ memcpy(buf + pos, str, length);
1435
+ pos += length;
1436
+
1437
+ return pos;
1438
+ }
1439
+
1440
+ static size_t
1441
+ lxb_html_serialize_attr_name_size(const lxb_dom_attr_t *attr)
1442
+ {
1443
+ size_t length;
1444
+ const lxb_dom_attr_data_t *data;
1445
+
1446
+ static const lexbor_str_t str_xml = lexbor_str("xml ");
1447
+ static const lexbor_str_t str_xmlns = lexbor_str("xmlns ");
1448
+ static const lexbor_str_t str_xlink = lexbor_str("xlink ");
1449
+
1450
+ data = lxb_dom_attr_data_by_id(attr->node.owner_document->attrs,
1451
+ attr->node.local_name);
1452
+ if (data == NULL) {
1453
+ return 0;
1454
+ }
1455
+
1456
+ length = data->entry.length;
1457
+
1458
+ switch (attr->node.ns) {
1459
+ case LXB_NS_XML:
1460
+ return str_xml.length + length;
1461
+
1462
+ case LXB_NS_XMLNS:
1463
+ if (length == str_xmlns.length - 1
1464
+ && lexbor_str_data_ncmp(lexbor_hash_entry_str(&data->entry),
1465
+ str_xmlns.data, str_xmlns.length - 1))
1466
+ {
1467
+ return str_xmlns.length - 1;
1468
+ }
1469
+
1470
+ return str_xmlns.length + length;
1471
+
1472
+ case LXB_NS_XLINK:
1473
+ return str_xlink.length + length;
1474
+
1475
+ case LXB_NS__UNDEF:
1476
+ return length;
1477
+
1478
+ default:
1479
+ if (attr->qualified_name != 0) {
1480
+ data = lxb_dom_attr_data_by_id(attr->node.owner_document->attrs,
1481
+ attr->qualified_name);
1482
+ if (data == NULL) {
1483
+ return 0;
1484
+ }
1485
+
1486
+ length = data->entry.length;
1487
+ }
1488
+
1489
+ return length;
1490
+ }
1491
+ }
1492
+
1493
+ static int
1494
+ lxb_html_serialize_attr_entry_cmp(const lxb_html_serialize_attr_entry_t *a,
1495
+ const lxb_html_serialize_attr_entry_t *b,
1496
+ const lxb_char_t *names)
1497
+ {
1498
+ int c;
1499
+ size_t min;
1500
+
1501
+ min = (a->length < b->length) ? a->length : b->length;
1502
+
1503
+ c = memcmp(names + a->offset, names + b->offset, min);
1504
+ if (c != 0) {
1505
+ return c;
1506
+ }
1507
+
1508
+ if (a->length < b->length) return -1;
1509
+ if (a->length > b->length) return 1;
1510
+ return 0;
1511
+ }
1512
+
1513
+ static void
1514
+ lxb_html_serialize_attr_sort(lxb_html_serialize_attr_entry_t *entries,
1515
+ size_t n, const lxb_char_t *names)
1516
+ {
1517
+ size_t i, j;
1518
+ lxb_html_serialize_attr_entry_t cur;
1519
+
1520
+ for (i = 1; i < n; i++) {
1521
+ cur = entries[i];
1522
+ j = i;
1523
+
1524
+ while (j > 0
1525
+ && lxb_html_serialize_attr_entry_cmp(&entries[j - 1], &cur,
1526
+ names) > 0)
1527
+ {
1528
+ entries[j] = entries[j - 1];
1529
+ j--;
1530
+ }
1531
+
1532
+ entries[j] = cur;
1533
+ }
1534
+ }
1535
+
1536
+ static lxb_status_t
1537
+ lxb_html_serialize_pretty_attributes_sorted(lxb_dom_element_t *element,
1538
+ lxb_html_serialize_opt_t opt,
1539
+ size_t indent,
1540
+ lxb_html_serialize_cb_f cb,
1541
+ void *ctx)
1542
+ {
1543
+ size_t i, k, off, len, count, total;
1544
+ lxb_status_t status;
1545
+ lxb_dom_attr_t *attr;
1546
+ lxb_html_serialize_attr_entry_t *entries;
1547
+ lxb_char_t *names;
1548
+ lxb_char_t stack_names[256];
1549
+ lxb_html_serialize_attr_entry_t stack_entries[16];
1550
+
1551
+ count = 0;
1552
+ total = 0;
1553
+ entries = stack_entries;
1554
+ names = stack_names;
1555
+
1556
+ for (attr = element->first_attr; attr != NULL; attr = attr->next) {
1557
+ count += 1;
1558
+ total += lxb_html_serialize_attr_name_size(attr);
1559
+ }
1560
+
1561
+ if (count == 0) {
1562
+ return LXB_STATUS_OK;
1563
+ }
1564
+
1565
+ if (count > sizeof(stack_entries) / sizeof(stack_entries[0])) {
1566
+ entries = lexbor_malloc(count * sizeof(lxb_html_serialize_attr_entry_t));
1567
+ if (entries == NULL) {
1568
+ return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
1569
+ }
1570
+ }
1571
+
1572
+ if (total > sizeof(stack_names)) {
1573
+ names = lexbor_malloc(total);
1574
+ if (names == NULL) {
1575
+ if (entries != stack_entries) {
1576
+ lexbor_free(entries);
1577
+ }
1578
+
1579
+ return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
1580
+ }
1581
+ }
1582
+
1583
+ i = 0;
1584
+ off = 0;
1585
+ for (attr = element->first_attr; attr != NULL; attr = attr->next) {
1586
+ len = lxb_html_serialize_attr_name_build(attr, names + off,
1587
+ total - off);
1588
+ entries[i].attr = attr;
1589
+ entries[i].offset = off;
1590
+ entries[i].length = len;
1591
+
1592
+ off += len;
1593
+ i += 1;
1594
+ }
1595
+
1596
+ lxb_html_serialize_attr_sort(entries, count, names);
1597
+
1598
+ status = LXB_STATUS_OK;
1599
+
1600
+ for (i = 0; i < count; i++) {
1601
+ status = cb((const lxb_char_t *) "\n", 1, ctx);
1602
+ if (status != LXB_STATUS_OK) {
1603
+ goto done;
1604
+ }
1605
+
1606
+ for (k = 0; k < indent + 1; k++) {
1607
+ status = cb((const lxb_char_t *) " ", 2, ctx);
1608
+ if (status != LXB_STATUS_OK) {
1609
+ goto done;
1610
+ }
1611
+ }
1612
+
1613
+ status = lxb_html_serialize_pretty_attribute_cb(entries[i].attr, opt,
1614
+ (opt & LXB_HTML_SERIALIZE_OPT_RAW),
1615
+ cb, ctx);
1616
+ if (status != LXB_STATUS_OK) {
1617
+ goto done;
1618
+ }
1619
+ }
1620
+
1621
+ done:
1622
+
1623
+ if (names != stack_names) {
1624
+ lexbor_free(names);
1625
+ }
1626
+
1627
+ if (entries != stack_entries) {
1628
+ lexbor_free(entries);
1629
+ }
1630
+
1631
+ return status;
1632
+ }
1633
+
1140
1634
  static lxb_status_t
1141
1635
  lxb_html_serialize_pretty_text_cb(lxb_dom_text_t *text,
1142
1636
  lxb_html_serialize_opt_t opt, size_t indent,
@@ -1163,7 +1657,9 @@ lxb_html_serialize_pretty_text_cb(lxb_dom_text_t *text,
1163
1657
  pos++;
1164
1658
  }
1165
1659
 
1166
- return LXB_STATUS_OK;
1660
+ if (pos >= end) {
1661
+ return LXB_STATUS_OK;
1662
+ }
1167
1663
  }
1168
1664
 
1169
1665
  if (node->parent != NULL) {
@@ -1331,6 +1827,14 @@ lxb_html_serialize_pretty_tree_cb(lxb_dom_node_t *node,
1331
1827
  lxb_html_serialize_opt_t opt, size_t indent,
1332
1828
  lxb_html_serialize_cb_f cb, void *ctx)
1333
1829
  {
1830
+ if (opt & LXB_HTML_SERIALIZE_OPT_HTML5TEST) {
1831
+ opt |= LXB_HTML_SERIALIZE_OPT_WITHOUT_CLOSING
1832
+ | LXB_HTML_SERIALIZE_OPT_TAG_WITH_NS
1833
+ | LXB_HTML_SERIALIZE_OPT_WITHOUT_TEXT_INDENT
1834
+ | LXB_HTML_SERIALIZE_OPT_FULL_DOCTYPE
1835
+ | LXB_HTML_SERIALIZE_OPT_RAW;
1836
+ }
1837
+
1334
1838
  /* For a document we must serialize all children without document node. */
1335
1839
  if (node->local_name == LXB_TAG__DOCUMENT) {
1336
1840
  node = node->first_child;