jsoncons 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. checksums.yaml +7 -0
  2. data/ext/jsoncons/extconf.rb +43 -0
  3. data/ext/jsoncons/jsoncons.cpp +161 -0
  4. data/ext/jsoncons/jsoncons.h +10 -0
  5. data/jsoncons.gemspec +44 -0
  6. data/lib/jsoncons/jsoncons/examples/input/address-book.json +13 -0
  7. data/lib/jsoncons/jsoncons/examples/input/books.json +28 -0
  8. data/lib/jsoncons/jsoncons/examples/input/countries.json +7 -0
  9. data/lib/jsoncons/jsoncons/examples/input/employees.json +30 -0
  10. data/lib/jsoncons/jsoncons/examples/input/jsonschema/name.json +15 -0
  11. data/lib/jsoncons/jsoncons/examples/input/multiple-json-objects.json +3 -0
  12. data/lib/jsoncons/jsoncons/examples/input/sales.csv +6 -0
  13. data/lib/jsoncons/jsoncons/examples/input/store.json +28 -0
  14. data/lib/jsoncons/jsoncons/examples/input/tasks.csv +6 -0
  15. data/lib/jsoncons/jsoncons/include/jsoncons/allocator_holder.hpp +38 -0
  16. data/lib/jsoncons/jsoncons/include/jsoncons/basic_json.hpp +5905 -0
  17. data/lib/jsoncons/jsoncons/include/jsoncons/bigint.hpp +1611 -0
  18. data/lib/jsoncons/jsoncons/include/jsoncons/byte_string.hpp +820 -0
  19. data/lib/jsoncons/jsoncons/include/jsoncons/config/binary_config.hpp +226 -0
  20. data/lib/jsoncons/jsoncons/include/jsoncons/config/compiler_support.hpp +375 -0
  21. data/lib/jsoncons/jsoncons/include/jsoncons/config/jsoncons_config.hpp +309 -0
  22. data/lib/jsoncons/jsoncons/include/jsoncons/config/version.hpp +40 -0
  23. data/lib/jsoncons/jsoncons/include/jsoncons/conv_error.hpp +218 -0
  24. data/lib/jsoncons/jsoncons/include/jsoncons/decode_json.hpp +209 -0
  25. data/lib/jsoncons/jsoncons/include/jsoncons/decode_traits.hpp +651 -0
  26. data/lib/jsoncons/jsoncons/include/jsoncons/detail/endian.hpp +44 -0
  27. data/lib/jsoncons/jsoncons/include/jsoncons/detail/grisu3.hpp +312 -0
  28. data/lib/jsoncons/jsoncons/include/jsoncons/detail/optional.hpp +483 -0
  29. data/lib/jsoncons/jsoncons/include/jsoncons/detail/parse_number.hpp +1133 -0
  30. data/lib/jsoncons/jsoncons/include/jsoncons/detail/span.hpp +188 -0
  31. data/lib/jsoncons/jsoncons/include/jsoncons/detail/string_view.hpp +537 -0
  32. data/lib/jsoncons/jsoncons/include/jsoncons/detail/string_wrapper.hpp +370 -0
  33. data/lib/jsoncons/jsoncons/include/jsoncons/detail/write_number.hpp +567 -0
  34. data/lib/jsoncons/jsoncons/include/jsoncons/encode_json.hpp +315 -0
  35. data/lib/jsoncons/jsoncons/include/jsoncons/encode_traits.hpp +378 -0
  36. data/lib/jsoncons/jsoncons/include/jsoncons/json.hpp +18 -0
  37. data/lib/jsoncons/jsoncons/include/jsoncons/json_array.hpp +324 -0
  38. data/lib/jsoncons/jsoncons/include/jsoncons/json_content_handler.hpp +12 -0
  39. data/lib/jsoncons/jsoncons/include/jsoncons/json_cursor.hpp +448 -0
  40. data/lib/jsoncons/jsoncons/include/jsoncons/json_decoder.hpp +420 -0
  41. data/lib/jsoncons/jsoncons/include/jsoncons/json_encoder.hpp +1587 -0
  42. data/lib/jsoncons/jsoncons/include/jsoncons/json_error.hpp +156 -0
  43. data/lib/jsoncons/jsoncons/include/jsoncons/json_exception.hpp +241 -0
  44. data/lib/jsoncons/jsoncons/include/jsoncons/json_filter.hpp +653 -0
  45. data/lib/jsoncons/jsoncons/include/jsoncons/json_fwd.hpp +23 -0
  46. data/lib/jsoncons/jsoncons/include/jsoncons/json_object.hpp +1772 -0
  47. data/lib/jsoncons/jsoncons/include/jsoncons/json_options.hpp +862 -0
  48. data/lib/jsoncons/jsoncons/include/jsoncons/json_parser.hpp +2900 -0
  49. data/lib/jsoncons/jsoncons/include/jsoncons/json_reader.hpp +731 -0
  50. data/lib/jsoncons/jsoncons/include/jsoncons/json_traits_macros.hpp +1072 -0
  51. data/lib/jsoncons/jsoncons/include/jsoncons/json_traits_macros_deprecated.hpp +144 -0
  52. data/lib/jsoncons/jsoncons/include/jsoncons/json_type.hpp +206 -0
  53. data/lib/jsoncons/jsoncons/include/jsoncons/json_type_traits.hpp +1830 -0
  54. data/lib/jsoncons/jsoncons/include/jsoncons/json_visitor.hpp +1560 -0
  55. data/lib/jsoncons/jsoncons/include/jsoncons/json_visitor2.hpp +2079 -0
  56. data/lib/jsoncons/jsoncons/include/jsoncons/pretty_print.hpp +89 -0
  57. data/lib/jsoncons/jsoncons/include/jsoncons/ser_context.hpp +62 -0
  58. data/lib/jsoncons/jsoncons/include/jsoncons/sink.hpp +289 -0
  59. data/lib/jsoncons/jsoncons/include/jsoncons/source.hpp +777 -0
  60. data/lib/jsoncons/jsoncons/include/jsoncons/source_adaptor.hpp +148 -0
  61. data/lib/jsoncons/jsoncons/include/jsoncons/staj2_cursor.hpp +1189 -0
  62. data/lib/jsoncons/jsoncons/include/jsoncons/staj_cursor.hpp +1254 -0
  63. data/lib/jsoncons/jsoncons/include/jsoncons/staj_iterator.hpp +449 -0
  64. data/lib/jsoncons/jsoncons/include/jsoncons/tag_type.hpp +245 -0
  65. data/lib/jsoncons/jsoncons/include/jsoncons/text_source_adaptor.hpp +144 -0
  66. data/lib/jsoncons/jsoncons/include/jsoncons/traits_extension.hpp +884 -0
  67. data/lib/jsoncons/jsoncons/include/jsoncons/typed_array_view.hpp +250 -0
  68. data/lib/jsoncons/jsoncons/include/jsoncons/unicode_traits.hpp +1330 -0
  69. data/lib/jsoncons/jsoncons/include/jsoncons/uri.hpp +635 -0
  70. data/lib/jsoncons/jsoncons/include/jsoncons/value_converter.hpp +340 -0
  71. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson.hpp +23 -0
  72. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson_cursor.hpp +320 -0
  73. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson_decimal128.hpp +865 -0
  74. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson_encoder.hpp +585 -0
  75. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson_error.hpp +103 -0
  76. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson_oid.hpp +245 -0
  77. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson_options.hpp +75 -0
  78. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson_parser.hpp +645 -0
  79. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson_reader.hpp +92 -0
  80. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson_type.hpp +44 -0
  81. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/decode_bson.hpp +201 -0
  82. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/encode_bson.hpp +144 -0
  83. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/cbor.hpp +26 -0
  84. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/cbor_cursor.hpp +351 -0
  85. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/cbor_cursor2.hpp +265 -0
  86. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/cbor_detail.hpp +93 -0
  87. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/cbor_encoder.hpp +1766 -0
  88. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/cbor_error.hpp +105 -0
  89. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/cbor_options.hpp +113 -0
  90. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/cbor_parser.hpp +1942 -0
  91. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/cbor_reader.hpp +116 -0
  92. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/decode_cbor.hpp +203 -0
  93. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/encode_cbor.hpp +151 -0
  94. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/csv.hpp +17 -0
  95. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/csv_cursor.hpp +358 -0
  96. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/csv_encoder.hpp +954 -0
  97. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/csv_error.hpp +85 -0
  98. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/csv_options.hpp +973 -0
  99. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/csv_parser.hpp +2099 -0
  100. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/csv_reader.hpp +348 -0
  101. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/csv_serializer.hpp +12 -0
  102. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/decode_csv.hpp +208 -0
  103. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/encode_csv.hpp +122 -0
  104. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jmespath/jmespath.hpp +5215 -0
  105. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jmespath/jmespath_error.hpp +215 -0
  106. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpatch/jsonpatch.hpp +579 -0
  107. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpatch/jsonpatch_error.hpp +121 -0
  108. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpath/expression.hpp +3329 -0
  109. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpath/flatten.hpp +432 -0
  110. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpath/json_location.hpp +445 -0
  111. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpath/json_query.hpp +115 -0
  112. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpath/jsonpath.hpp +13 -0
  113. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpath/jsonpath_error.hpp +240 -0
  114. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpath/jsonpath_expression.hpp +2612 -0
  115. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpath/jsonpath_selector.hpp +1322 -0
  116. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpointer/jsonpointer.hpp +1577 -0
  117. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpointer/jsonpointer_error.hpp +119 -0
  118. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/format_validator.hpp +968 -0
  119. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/json_validator.hpp +120 -0
  120. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/jsonschema.hpp +13 -0
  121. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/jsonschema_error.hpp +105 -0
  122. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/jsonschema_version.hpp +18 -0
  123. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/keyword_validator.hpp +1745 -0
  124. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/keyword_validator_factory.hpp +556 -0
  125. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/schema_draft7.hpp +198 -0
  126. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/schema_location.hpp +200 -0
  127. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/schema_version.hpp +35 -0
  128. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/subschema.hpp +144 -0
  129. data/lib/jsoncons/jsoncons/include/jsoncons_ext/mergepatch/mergepatch.hpp +103 -0
  130. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/decode_msgpack.hpp +202 -0
  131. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/encode_msgpack.hpp +142 -0
  132. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/msgpack.hpp +24 -0
  133. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/msgpack_cursor.hpp +343 -0
  134. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/msgpack_cursor2.hpp +259 -0
  135. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/msgpack_encoder.hpp +753 -0
  136. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/msgpack_error.hpp +94 -0
  137. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/msgpack_options.hpp +74 -0
  138. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/msgpack_parser.hpp +748 -0
  139. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/msgpack_reader.hpp +116 -0
  140. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/msgpack_type.hpp +63 -0
  141. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/decode_ubjson.hpp +201 -0
  142. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/encode_ubjson.hpp +142 -0
  143. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/ubjson.hpp +23 -0
  144. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/ubjson_cursor.hpp +307 -0
  145. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/ubjson_encoder.hpp +502 -0
  146. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/ubjson_error.hpp +100 -0
  147. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/ubjson_options.hpp +87 -0
  148. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/ubjson_parser.hpp +880 -0
  149. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/ubjson_reader.hpp +92 -0
  150. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/ubjson_type.hpp +43 -0
  151. data/lib/jsoncons/version.rb +5 -0
  152. data/lib/jsoncons.rb +33 -0
  153. data/test/jsoncons_test.rb +108 -0
  154. data/test/test_helper.rb +7 -0
  155. metadata +268 -0
@@ -0,0 +1,1330 @@
1
+ // Copyright 2016 Daniel Parker
2
+ // Distributed under the Boost license, Version 1.0.
3
+ // (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
4
+
5
+ // See https://github.com/danielaparker/unicode_traits for latest version
6
+
7
+ /*
8
+ * Includes code derived from Unicode, Inc decomposition code in ConvertUTF.h and ConvertUTF.c
9
+ * http://www.unicode.org/
10
+ *
11
+ * "Unicode, Inc. hereby grants the right to freely use the information
12
+ * supplied in this file in the creation of products supporting the
13
+ * Unicode Standard."
14
+ */
15
+
16
+ #ifndef JSONCONS_UNICODE_TRAITS_HPP
17
+ #define JSONCONS_UNICODE_TRAITS_HPP
18
+
19
+ #include <cstring>
20
+ #include <string>
21
+ #include <iterator>
22
+ #include <type_traits>
23
+ #include <system_error>
24
+ #include <limits>
25
+ #include <jsoncons/config/compiler_support.hpp>
26
+ #include <jsoncons/traits_extension.hpp>
27
+
28
+ namespace jsoncons { namespace unicode_traits {
29
+
30
+ enum class encoding_kind {undetected,utf8,utf16le,utf16be,utf32le,utf32be};
31
+
32
+ inline
33
+ std::string to_string(encoding_kind encoding)
34
+ {
35
+ switch (encoding)
36
+ {
37
+ case encoding_kind::utf8:
38
+ return "utf8";
39
+ case encoding_kind::utf16le:
40
+ return "utf16le";
41
+ case encoding_kind::utf16be:
42
+ return "utf16be";
43
+ case encoding_kind::utf32le:
44
+ return "utf32le";
45
+ case encoding_kind::utf32be:
46
+ return "utf32be";
47
+ default:
48
+ return "undetected";
49
+ }
50
+ }
51
+
52
+ template <class Byte>
53
+ struct detect_encoding_result
54
+ {
55
+ const Byte* ptr;
56
+ encoding_kind encoding;
57
+ };
58
+
59
+ template <class CharT>
60
+ typename std::enable_if<traits_extension::is_char8<CharT>::value,detect_encoding_result<CharT>>::type
61
+ detect_encoding_from_bom(const CharT* data, std::size_t length)
62
+ {
63
+ const uint8_t bom_utf8[] = {0xef,0xbb,0xbf};
64
+ const uint8_t bom_utf16le[] = {0xff,0xfe};
65
+ const uint8_t bom_utf16be[] = {0xfe,0xff};
66
+ const uint8_t bom_utf32le[] = {0xff,0xfe,0x00,0x00};
67
+ const uint8_t bom_utf32be[] = {0x00,0x00,0xfe,0xff};
68
+
69
+ if (length >= 4 && !memcmp(data,bom_utf32le,4))
70
+ {
71
+ return detect_encoding_result<CharT>{data+4,encoding_kind::utf32le};
72
+ }
73
+ else if (length >= 4 && !memcmp(data,bom_utf32be,4))
74
+ {
75
+ return detect_encoding_result<CharT>{data+4,encoding_kind::utf32be};
76
+ }
77
+ else if (length >= 2 && !memcmp(data,bom_utf16le,2))
78
+ {
79
+ return detect_encoding_result<CharT>{data+2,encoding_kind::utf16le};
80
+ }
81
+ else if (length >= 2 && !memcmp(data,bom_utf16be,2))
82
+ {
83
+ return detect_encoding_result<CharT>{data+2,encoding_kind::utf16be};
84
+ }
85
+ else if (length >= 3 && !memcmp(data,bom_utf8,3))
86
+ {
87
+ return detect_encoding_result<CharT>{data+3,encoding_kind::utf8};
88
+ }
89
+ else
90
+ {
91
+ return detect_encoding_result<CharT>{data,encoding_kind::undetected};
92
+ }
93
+ }
94
+
95
+ template <class CharT>
96
+ typename std::enable_if<traits_extension::is_char16<CharT>::value || traits_extension::is_char32<CharT>::value,detect_encoding_result<CharT>>::type
97
+ detect_encoding_from_bom(const CharT* data, std::size_t)
98
+ {
99
+ return detect_encoding_result<CharT>{data,encoding_kind::undetected};
100
+ }
101
+
102
+ template <class CharT>
103
+ typename std::enable_if<traits_extension::is_char8<CharT>::value,detect_encoding_result<CharT>>::type
104
+ detect_json_encoding(const CharT* data, std::size_t length)
105
+ {
106
+ detect_encoding_result<CharT> r = detect_encoding_from_bom(data,length);
107
+ if (r.encoding != encoding_kind::undetected)
108
+ {
109
+ return r;
110
+ }
111
+ else if (length < 4)
112
+ {
113
+ return detect_encoding_result<CharT>{data,encoding_kind::utf8};
114
+ }
115
+ else if (*data == 0 && *(data+1) == 0 && *(data+2) == 0)
116
+ {
117
+ return detect_encoding_result<CharT>{data,encoding_kind::utf32be};
118
+ }
119
+ else if (*data == 0 && *(data+2) == 0)
120
+ {
121
+ return detect_encoding_result<CharT>{data,encoding_kind::utf16be};
122
+ }
123
+ else if (*(data+1) == 0 && *(data+2) == 0 && *(data+3) == 0)
124
+ {
125
+ return detect_encoding_result<CharT>{data,encoding_kind::utf32le};
126
+ }
127
+ else if (*(data+1) == 0 && *(data+3) == 0)
128
+ {
129
+ return detect_encoding_result<CharT>{data,encoding_kind::utf16le};
130
+ }
131
+ else
132
+ {
133
+ return detect_encoding_result<CharT>{data,encoding_kind::utf8};
134
+ }
135
+ }
136
+
137
+ template <class CharT>
138
+ typename std::enable_if<traits_extension::is_char16<CharT>::value || traits_extension::is_char32<CharT>::value,detect_encoding_result<CharT>>::type
139
+ detect_json_encoding(const CharT* data, std::size_t)
140
+ {
141
+ return detect_encoding_result<CharT>{data,encoding_kind::undetected};
142
+ }
143
+
144
+ /*
145
+ * Magic values subtracted from a buffer value during UTF8 conversion.
146
+ * This table contains as many values as there might be trailing bytes
147
+ * in a UTF-8 sequence. Source: ConvertUTF.c
148
+ */
149
+ const uint32_t offsets_from_utf8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
150
+ 0x03C82080UL, 0xFA082080UL, 0x82082080UL };
151
+
152
+ /*
153
+ * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
154
+ * into the first byte, depending on how many bytes follow. There are
155
+ * as many entries in this table as there are UTF-8 sequence types.
156
+ * (I.e., one byte sequence, two byte... etc.). Remember that sequencs
157
+ * for *legal* UTF-8 will be 4 or fewer bytes total. Source: ConvertUTF.c
158
+ */
159
+ const uint8_t first_byte_mark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
160
+
161
+ /*
162
+ * Index into the table below with the first byte of a UTF-8 sequence to
163
+ * get the number of trailing bytes that are supposed to follow it.
164
+ * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
165
+ * left as-is for anyone who may want to do such conversion, which was
166
+ * allowed in earlier algorithms. Source: ConvertUTF.c
167
+ */
168
+ const uint8_t trailing_bytes_for_utf8[256] = {
169
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
170
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
171
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
172
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
173
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
174
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
175
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
176
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
177
+ };
178
+
179
+ // Some fundamental constants. Source: ConvertUTF.h
180
+ const uint32_t replacement_char = 0x0000FFFD;
181
+ const uint32_t max_bmp = 0x0000FFFF;
182
+ const uint32_t max_utf16 = 0x0010FFFF;
183
+ const uint32_t max_utf32 = 0x7FFFFFFF;
184
+ const uint32_t max_legal_utf32 = 0x0010FFFF;
185
+
186
+ const int half_shift = 10; // used for shifting by 10 bits
187
+ const uint32_t half_base = 0x0010000UL;
188
+ const uint32_t half_mask = 0x3FFUL;
189
+
190
+ const uint16_t sur_high_start = 0xD800;
191
+ const uint16_t sur_high_end = 0xDBFF;
192
+ const uint16_t sur_low_start = 0xDC00;
193
+ const uint16_t sur_low_end = 0xDFFF;
194
+
195
+ inline
196
+ static bool is_continuation_byte(unsigned char ch)
197
+ {
198
+ return (ch & 0xC0) == 0x80;
199
+ }
200
+
201
+ inline
202
+ bool is_high_surrogate(uint32_t ch) noexcept
203
+ {
204
+ return (ch >= sur_high_start && ch <= sur_high_end);
205
+ }
206
+
207
+ inline
208
+ bool is_low_surrogate(uint32_t ch) noexcept
209
+ {
210
+ return (ch >= sur_low_start && ch <= sur_low_end);
211
+ }
212
+
213
+ inline
214
+ bool is_surrogate(uint32_t ch) noexcept
215
+ {
216
+ return (ch >= sur_high_start && ch <= sur_low_end);
217
+ }
218
+
219
+ enum class conv_flags
220
+ {
221
+ strict = 0,
222
+ lenient
223
+ };
224
+
225
+ // conv_errc
226
+
227
+ enum class conv_errc
228
+ {
229
+ success = 0,
230
+ over_long_utf8_sequence = 1, // over long utf8 sequence
231
+ expected_continuation_byte, // expected continuation byte
232
+ unpaired_high_surrogate, // unpaired high surrogate UTF-16
233
+ illegal_surrogate_value, // UTF-16 surrogate values are illegal in UTF-32
234
+ source_exhausted, // partial character in source, but hit end
235
+ source_illegal // source sequence is illegal/malformed
236
+ };
237
+
238
+ class Unicode_traits_error_category_impl_
239
+ : public std::error_category
240
+ {
241
+ public:
242
+ virtual const char* name() const noexcept
243
+ {
244
+ return "unicode_traits conversion error";
245
+ }
246
+ virtual std::string message(int ev) const
247
+ {
248
+ switch (static_cast<conv_errc>(ev))
249
+ {
250
+ case conv_errc::over_long_utf8_sequence:
251
+ return "Over long utf8 sequence";
252
+ case conv_errc::expected_continuation_byte:
253
+ return "Expected continuation byte";
254
+ case conv_errc::unpaired_high_surrogate:
255
+ return "Unpaired high surrogate UTF-16";
256
+ case conv_errc::illegal_surrogate_value:
257
+ return "UTF-16 surrogate values are illegal in UTF-32";
258
+ case conv_errc::source_exhausted:
259
+ return "Partial character in source, but hit end";
260
+ case conv_errc::source_illegal:
261
+ return "Source sequence is illegal/malformed";
262
+ default:
263
+ return "";
264
+ break;
265
+ }
266
+ }
267
+ };
268
+
269
+ inline
270
+ const std::error_category& unicode_traits_error_category()
271
+ {
272
+ static Unicode_traits_error_category_impl_ instance;
273
+ return instance;
274
+ }
275
+
276
+ inline
277
+ std::error_code make_error_code(conv_errc result)
278
+ {
279
+ return std::error_code(static_cast<int>(result),unicode_traits_error_category());
280
+ }
281
+
282
+ } // unicode_traits
283
+ } // jsoncons
284
+
285
+ namespace std {
286
+ template<>
287
+ struct is_error_code_enum<jsoncons::unicode_traits::conv_errc> : public true_type
288
+ {
289
+ };
290
+ }
291
+
292
+ namespace jsoncons { namespace unicode_traits {
293
+
294
+ // utf8
295
+
296
+ template <class CharT>
297
+ typename std::enable_if<traits_extension::is_char8<CharT>::value, conv_errc>::type
298
+ is_legal_utf8(const CharT* first, std::size_t length)
299
+ {
300
+ uint8_t a;
301
+ const CharT* srcptr = first+length;
302
+ switch (length) {
303
+ default:
304
+ return conv_errc::over_long_utf8_sequence;
305
+ case 4:
306
+ if (((a = (*--srcptr))& 0xC0) != 0x80)
307
+ return conv_errc::expected_continuation_byte;
308
+ JSONCONS_FALLTHROUGH;
309
+ case 3:
310
+ if (((a = (*--srcptr))& 0xC0) != 0x80)
311
+ return conv_errc::expected_continuation_byte;
312
+ JSONCONS_FALLTHROUGH;
313
+ case 2:
314
+ if (((a = (*--srcptr))& 0xC0) != 0x80)
315
+ return conv_errc::expected_continuation_byte;
316
+
317
+ switch (static_cast<uint8_t>(*first))
318
+ {
319
+ // no fall-through in this inner switch
320
+ case 0xE0: if (a < 0xA0) return conv_errc::source_illegal; break;
321
+ case 0xED: if (a > 0x9F) return conv_errc::source_illegal; break;
322
+ case 0xF0: if (a < 0x90) return conv_errc::source_illegal; break;
323
+ case 0xF4: if (a > 0x8F) return conv_errc::source_illegal; break;
324
+ default: if (a < 0x80) return conv_errc::source_illegal;
325
+ }
326
+
327
+ JSONCONS_FALLTHROUGH;
328
+ case 1:
329
+ if (static_cast<uint8_t>(*first) >= 0x80 && static_cast<uint8_t>(*first) < 0xC2)
330
+ return conv_errc::source_illegal;
331
+ break;
332
+ }
333
+ if (static_cast<uint8_t>(*first) > 0xF4)
334
+ return conv_errc::source_illegal;
335
+
336
+ return conv_errc();
337
+ }
338
+
339
+ template <class...> using void_t = void;
340
+
341
+ template <class, class, class = void>
342
+ struct is_output_iterator : std::false_type {};
343
+
344
+ template <class I, class E>
345
+ struct is_output_iterator<I, E, void_t<
346
+ typename std::iterator_traits<I>::iterator_category,
347
+ decltype(*std::declval<I>() = std::declval<E>())>> : std::true_type {};
348
+
349
+ // is_same_size fixes issue with vs2013
350
+
351
+ // primary template
352
+ template<class T1, class T2, class Enable = void>
353
+ struct is_same_size : std::false_type
354
+ {
355
+ };
356
+
357
+ // specialization for non void types
358
+ template<class T1, class T2>
359
+ struct is_same_size<T1, T2, typename std::enable_if<!std::is_void<T1>::value && !std::is_void<T2>::value>::type>
360
+ {
361
+ static constexpr bool value = (sizeof(T1) == sizeof(T2));
362
+ };
363
+
364
+ // convert
365
+
366
+ template <class CharT>
367
+ struct convert_result
368
+ {
369
+ const CharT* ptr;
370
+ conv_errc ec;
371
+ };
372
+
373
+ // to_codepoint
374
+
375
+ template <class CharT,class CodepointT>
376
+ typename std::enable_if<traits_extension::is_char8<CharT>::value && traits_extension::is_char32<CodepointT>::value,
377
+ convert_result<CharT>>::type
378
+ to_codepoint(const CharT* first, const CharT* last,
379
+ CodepointT& ch,
380
+ conv_flags flags = conv_flags::strict) noexcept
381
+ {
382
+ ch = 0;
383
+ if (first >= last)
384
+ {
385
+ return convert_result<CharT>{first, conv_errc::source_exhausted};
386
+ }
387
+ conv_errc result = conv_errc();
388
+
389
+ unsigned short extra_bytes_to_read = trailing_bytes_for_utf8[static_cast<uint8_t>(*first)];
390
+ if (extra_bytes_to_read >= last - first)
391
+ {
392
+ result = conv_errc::source_exhausted;
393
+ return convert_result<CharT>{first, result};
394
+ }
395
+ // Do this check whether lenient or strict
396
+ if ((result=is_legal_utf8(first, extra_bytes_to_read+1)) != conv_errc())
397
+ {
398
+ return convert_result<CharT>{first, result};
399
+ }
400
+ // The cases all fall through. See "Note A" below.
401
+ switch (extra_bytes_to_read)
402
+ {
403
+ case 5:
404
+ ch += static_cast<uint8_t>(*first++);
405
+ ch <<= 6;
406
+ JSONCONS_FALLTHROUGH;
407
+ case 4:
408
+ ch += static_cast<uint8_t>(*first++);
409
+ ch <<= 6;
410
+ JSONCONS_FALLTHROUGH;
411
+ case 3:
412
+ ch += static_cast<uint8_t>(*first++);
413
+ ch <<= 6;
414
+ JSONCONS_FALLTHROUGH;
415
+ case 2:
416
+ ch += static_cast<uint8_t>(*first++);
417
+ ch <<= 6;
418
+ JSONCONS_FALLTHROUGH;
419
+ case 1:
420
+ ch += static_cast<uint8_t>(*first++);
421
+ ch <<= 6;
422
+ JSONCONS_FALLTHROUGH;
423
+ case 0:
424
+ ch += static_cast<uint8_t>(*first++);
425
+ break;
426
+ }
427
+ ch -= offsets_from_utf8[extra_bytes_to_read];
428
+
429
+ if (ch <= max_legal_utf32) {
430
+ /*
431
+ * UTF-16 surrogate values are illegal in UTF-32, and anything
432
+ * over Plane 17 (> 0x10FFFF) is illegal.
433
+ */
434
+ if (is_surrogate(ch) )
435
+ {
436
+ if (flags == conv_flags::strict)
437
+ {
438
+ first -= (extra_bytes_to_read+1); // return to the illegal value itself
439
+ result = conv_errc::source_illegal;
440
+ return convert_result<CharT>{first, result};
441
+ }
442
+ else
443
+ {
444
+ ch = replacement_char;
445
+ }
446
+ }
447
+ }
448
+ else // i.e., ch > max_legal_utf32
449
+ {
450
+ result = conv_errc::source_illegal;
451
+ ch = replacement_char;
452
+ }
453
+
454
+ return convert_result<CharT>{first,result} ;
455
+ }
456
+
457
+ template <class CharT,class CodepointT>
458
+ typename std::enable_if<traits_extension::is_char16<CharT>::value && traits_extension::is_char32<CodepointT>::value,
459
+ convert_result<CharT>>::type
460
+ to_codepoint(const CharT* first, const CharT* last,
461
+ CodepointT& ch,
462
+ conv_flags flags = conv_flags::strict) noexcept
463
+ {
464
+ ch = 0;
465
+ if (first >= last)
466
+ {
467
+ return convert_result<CharT>{first, conv_errc::source_exhausted};
468
+ }
469
+ conv_errc result = conv_errc();
470
+
471
+ ch = *first++;
472
+ // If we have a surrogate pair, convert to UTF32 first.
473
+ if (is_high_surrogate(ch))
474
+ {
475
+ // If the 16 bits following the high surrogate are in the first buffer...
476
+ if (first < last)
477
+ {
478
+ uint32_t ch2 = *first;
479
+ // If ptr's a low surrogate, convert to UTF32.
480
+ if (ch2 >= sur_low_start && ch2 <= sur_low_end )
481
+ {
482
+ ch = ((ch - sur_high_start) << half_shift)
483
+ + (ch2 - sur_low_start) + half_base;
484
+ ++first;
485
+ }
486
+ else if (flags == conv_flags::strict) // ptr's an unpaired high surrogate
487
+ {
488
+ --first; /* return to the illegal value itself */
489
+ result = conv_errc::source_illegal;
490
+ return convert_result<CharT>{first, result};
491
+ }
492
+ }
493
+ else
494
+ { /* We don't have the 16 bits following the high surrogate. */
495
+ --first; /* return to the high surrogate */
496
+ result = conv_errc::source_exhausted;
497
+ return convert_result<CharT>{first, result};
498
+ }
499
+ } else if (flags == conv_flags::strict) {
500
+ /* UTF-16 surrogate values are illegal in UTF-32 */
501
+ if (is_low_surrogate(ch) )
502
+ {
503
+ --first; /* return to the illegal value itself */
504
+ result = conv_errc::source_illegal;
505
+ return convert_result<CharT>{first, result};
506
+ }
507
+ }
508
+
509
+ return convert_result<CharT>{first,result} ;
510
+ }
511
+
512
+ template <class CharT,class CodepointT>
513
+ typename std::enable_if<traits_extension::is_char32<CharT>::value && traits_extension::is_char32<CodepointT>::value,
514
+ convert_result<CharT>>::type
515
+ to_codepoint(const CharT* first, const CharT* last,
516
+ CodepointT& ch,
517
+ conv_flags flags = conv_flags::strict) noexcept
518
+ {
519
+ ch = 0;
520
+ if (first >= last)
521
+ {
522
+ return convert_result<CharT>{first, conv_errc::source_exhausted};
523
+ }
524
+ conv_errc result = conv_errc();
525
+
526
+ ch = *first++;
527
+ if (flags == conv_flags::strict )
528
+ {
529
+ /* UTF-16 surrogate values are illegal in UTF-32 */
530
+ if (is_surrogate(ch))
531
+ {
532
+ --first; /* return to the illegal value itself */
533
+ result = conv_errc::illegal_surrogate_value;
534
+ return convert_result<CharT>{first,result} ;
535
+ }
536
+ }
537
+ if (!(ch <= max_legal_utf32))
538
+ {
539
+ ch = replacement_char;
540
+ result = conv_errc::source_illegal;
541
+ }
542
+
543
+ return convert_result<CharT>{first,result} ;
544
+ }
545
+
546
+ // convert
547
+
548
+ template <class CharT,class Container>
549
+ typename std::enable_if<traits_extension::is_char8<CharT>::value
550
+ && traits_extension::is_back_insertable<Container>::value
551
+ && traits_extension::is_char8<typename Container::value_type>::value,
552
+ convert_result<CharT>>::type
553
+ convert(const CharT* data, std::size_t length, Container& target, conv_flags flags=conv_flags::strict)
554
+ {
555
+ (void)flags;
556
+
557
+ conv_errc result = conv_errc();
558
+ const CharT* last = data + length;
559
+ while (data != last)
560
+ {
561
+ std::size_t len = trailing_bytes_for_utf8[static_cast<uint8_t>(*data)] + 1;
562
+ if (len > (std::size_t)(last - data))
563
+ {
564
+ return convert_result<CharT>{data, conv_errc::source_exhausted};
565
+ }
566
+ if ((result=is_legal_utf8(data, len)) != conv_errc())
567
+ {
568
+ return convert_result<CharT>{data,result};
569
+ }
570
+
571
+ switch (len) {
572
+ case 4: target.push_back(static_cast<uint8_t>(*data++));
573
+ JSONCONS_FALLTHROUGH;
574
+ case 3: target.push_back(static_cast<uint8_t>(*data++));
575
+ JSONCONS_FALLTHROUGH;
576
+ case 2: target.push_back(static_cast<uint8_t>(*data++));
577
+ JSONCONS_FALLTHROUGH;
578
+ case 1: target.push_back(static_cast<uint8_t>(*data++));
579
+ }
580
+ }
581
+ return convert_result<CharT>{data,result} ;
582
+ }
583
+
584
+ template <class CharT,class Container>
585
+ typename std::enable_if<traits_extension::is_char8<CharT>::value
586
+ && traits_extension::is_back_insertable<Container>::value
587
+ && traits_extension::is_char16<typename Container::value_type>::value,
588
+ convert_result<CharT>>::type
589
+ convert(const CharT* data, std::size_t length,
590
+ Container& target,
591
+ conv_flags flags = conv_flags::strict)
592
+ {
593
+ conv_errc result = conv_errc();
594
+
595
+ const CharT* last = data + length;
596
+ while (data != last)
597
+ {
598
+ unsigned short extra_bytes_to_read = trailing_bytes_for_utf8[static_cast<uint8_t>(*data)];
599
+ if (extra_bytes_to_read >= last - data)
600
+ {
601
+ result = conv_errc::source_exhausted;
602
+ break;
603
+ }
604
+ /* Do this check whether lenient or strict */
605
+ if ((result=is_legal_utf8(data, extra_bytes_to_read+1)) != conv_errc())
606
+ {
607
+ break;
608
+ }
609
+ /*
610
+ * The cases all fall through. See "Note A" below.
611
+ */
612
+ uint32_t ch = 0;
613
+ switch (extra_bytes_to_read) {
614
+ case 5: ch += static_cast<uint8_t>(*data++); ch <<= 6; /* remember, illegal UTF-8 */
615
+ JSONCONS_FALLTHROUGH;
616
+ case 4: ch += static_cast<uint8_t>(*data++); ch <<= 6; /* remember, illegal UTF-8 */
617
+ JSONCONS_FALLTHROUGH;
618
+ case 3: ch += static_cast<uint8_t>(*data++); ch <<= 6;
619
+ JSONCONS_FALLTHROUGH;
620
+ case 2: ch += static_cast<uint8_t>(*data++); ch <<= 6;
621
+ JSONCONS_FALLTHROUGH;
622
+ case 1: ch += static_cast<uint8_t>(*data++); ch <<= 6;
623
+ JSONCONS_FALLTHROUGH;
624
+ case 0: ch += static_cast<uint8_t>(*data++);
625
+ break;
626
+ }
627
+ ch -= offsets_from_utf8[extra_bytes_to_read];
628
+
629
+ if (ch <= max_bmp) { /* Target is a character <= 0xFFFF */
630
+ /* UTF-16 surrogate values are illegal in UTF-32 */
631
+ if (is_surrogate(ch) )
632
+ {
633
+ if (flags == conv_flags::strict) {
634
+ data -= (extra_bytes_to_read+1); /* return to the illegal value itself */
635
+ result = conv_errc::source_illegal;
636
+ break;
637
+ } else {
638
+ target.push_back(replacement_char);
639
+ }
640
+ } else {
641
+ target.push_back((uint16_t)ch); /* normal case */
642
+ }
643
+ } else if (ch > max_utf16) {
644
+ if (flags == conv_flags::strict) {
645
+ result = conv_errc::source_illegal;
646
+ data -= (extra_bytes_to_read+1); /* return to the start */
647
+ break; /* Bail out; shouldn't continue */
648
+ } else {
649
+ target.push_back(replacement_char);
650
+ }
651
+ } else {
652
+ /* target is a character in range 0xFFFF - 0x10FFFF. */
653
+ ch -= half_base;
654
+ target.push_back((uint16_t)((ch >> half_shift) + sur_high_start));
655
+ target.push_back((uint16_t)((ch & half_mask) + sur_low_start));
656
+ }
657
+ }
658
+ return convert_result<CharT>{data,result} ;
659
+ }
660
+
661
+ template <class CharT,class Container>
662
+ typename std::enable_if<traits_extension::is_char8<CharT>::value
663
+ && traits_extension::is_back_insertable<Container>::value
664
+ && traits_extension::is_char32<typename Container::value_type>::value,
665
+ convert_result<CharT>>::type
666
+ convert(const CharT* data, std::size_t length,
667
+ Container& target,
668
+ conv_flags flags = conv_flags::strict)
669
+ {
670
+ conv_errc result = conv_errc();
671
+
672
+ const CharT* last = data + length;
673
+ while (data < last)
674
+ {
675
+ uint32_t ch = 0;
676
+ unsigned short extra_bytes_to_read = trailing_bytes_for_utf8[static_cast<uint8_t>(*data)];
677
+ if (extra_bytes_to_read >= last - data)
678
+ {
679
+ result = conv_errc::source_exhausted;
680
+ break;
681
+ }
682
+ /* Do this check whether lenient or strict */
683
+ if ((result=is_legal_utf8(data, extra_bytes_to_read+1)) != conv_errc())
684
+ {
685
+ break;
686
+ }
687
+ /*
688
+ * The cases all fall through. See "Note A" below.
689
+ */
690
+ switch (extra_bytes_to_read)
691
+ {
692
+ case 5:
693
+ ch += static_cast<uint8_t>(*data++);
694
+ ch <<= 6;
695
+ JSONCONS_FALLTHROUGH;
696
+ case 4:
697
+ ch += static_cast<uint8_t>(*data++);
698
+ ch <<= 6;
699
+ JSONCONS_FALLTHROUGH;
700
+ case 3:
701
+ ch += static_cast<uint8_t>(*data++);
702
+ ch <<= 6;
703
+ JSONCONS_FALLTHROUGH;
704
+ case 2:
705
+ ch += static_cast<uint8_t>(*data++);
706
+ ch <<= 6;
707
+ JSONCONS_FALLTHROUGH;
708
+ case 1:
709
+ ch += static_cast<uint8_t>(*data++);
710
+ ch <<= 6;
711
+ JSONCONS_FALLTHROUGH;
712
+ case 0:
713
+ ch += static_cast<uint8_t>(*data++);
714
+ break;
715
+ }
716
+ ch -= offsets_from_utf8[extra_bytes_to_read];
717
+
718
+ if (ch <= max_legal_utf32) {
719
+ /*
720
+ * UTF-16 surrogate values are illegal in UTF-32, and anything
721
+ * over Plane 17 (> 0x10FFFF) is illegal.
722
+ */
723
+ if (is_surrogate(ch) )
724
+ {
725
+ if (flags == conv_flags::strict) {
726
+ data -= (extra_bytes_to_read+1); /* return to the illegal value itself */
727
+ result = conv_errc::source_illegal;
728
+ break;
729
+ } else {
730
+ target.push_back(replacement_char);
731
+ }
732
+ } else {
733
+ target.push_back(ch);
734
+ }
735
+ } else { /* i.e., ch > max_legal_utf32 */
736
+ result = conv_errc::source_illegal;
737
+ target.push_back(replacement_char);
738
+ }
739
+ }
740
+ return convert_result<CharT>{data,result} ;
741
+ }
742
+
743
+ // utf16
744
+
745
+ template <class CharT,class Container>
746
+ typename std::enable_if<traits_extension::is_char16<CharT>::value
747
+ && traits_extension::is_back_insertable<Container>::value
748
+ && traits_extension::is_char8<typename Container::value_type>::value,
749
+ convert_result<CharT>>::type
750
+ convert(const CharT* data, std::size_t length,
751
+ Container& target,
752
+ conv_flags flags = conv_flags::strict) {
753
+ conv_errc result = conv_errc();
754
+
755
+ const CharT* last = data + length;
756
+ while (data < last) {
757
+ unsigned short bytes_to_write = 0;
758
+ const uint32_t byteMask = 0xBF;
759
+ const uint32_t byteMark = 0x80;
760
+ uint32_t ch = *data++;
761
+ /* If we have a surrogate pair, convert to uint32_t data. */
762
+ if (is_high_surrogate(ch))
763
+ {
764
+ /* If the 16 bits following the high surrogate are in the data buffer... */
765
+ if (data < last) {
766
+ uint32_t ch2 = *data;
767
+ /* If ptr's a low surrogate, convert to uint32_t. */
768
+ if (ch2 >= sur_low_start && ch2 <= sur_low_end) {
769
+ ch = ((ch - sur_high_start) << half_shift)
770
+ + (ch2 - sur_low_start) + half_base;
771
+ ++data;
772
+ } else if (flags == conv_flags::strict) { /* ptr's an unpaired high surrogate */
773
+ --data; /* return to the illegal value itself */
774
+ result = conv_errc::unpaired_high_surrogate;
775
+ break;
776
+ }
777
+ } else { /* We don't have the 16 bits following the high surrogate. */
778
+ --data; /* return to the high surrogate */
779
+ result = conv_errc::source_exhausted;
780
+ break;
781
+ }
782
+ } else if (flags == conv_flags::strict) {
783
+ /* UTF-16 surrogate values are illegal in UTF-32 */
784
+ if (is_low_surrogate(ch))
785
+ {
786
+ --data; /* return to the illegal value itself */
787
+ result = conv_errc::source_illegal;
788
+ break;
789
+ }
790
+ }
791
+ /* Figure out how many bytes the result will require */
792
+ if (ch < (uint32_t)0x80) {
793
+ bytes_to_write = 1;
794
+ } else if (ch < (uint32_t)0x800) {
795
+ bytes_to_write = 2;
796
+ } else if (ch < (uint32_t)0x10000) {
797
+ bytes_to_write = 3;
798
+ } else if (ch < (uint32_t)0x110000) {
799
+ bytes_to_write = 4;
800
+ } else {
801
+ bytes_to_write = 3;
802
+ ch = replacement_char;
803
+ }
804
+
805
+ uint8_t byte1 = 0;
806
+ uint8_t byte2 = 0;
807
+ uint8_t byte3 = 0;
808
+ uint8_t byte4 = 0;
809
+
810
+ switch (bytes_to_write) { // note: everything falls through
811
+ case 4: byte4 = (uint8_t)((ch | byteMark) & byteMask); ch >>= 6;
812
+ JSONCONS_FALLTHROUGH;
813
+ case 3: byte3 = (uint8_t)((ch | byteMark) & byteMask); ch >>= 6;
814
+ JSONCONS_FALLTHROUGH;
815
+ case 2: byte2 = (uint8_t)((ch | byteMark) & byteMask); ch >>= 6;
816
+ JSONCONS_FALLTHROUGH;
817
+ case 1: byte1 = (uint8_t)(ch | first_byte_mark[bytes_to_write]);
818
+ break;
819
+ }
820
+ switch (bytes_to_write)
821
+ {
822
+ case 4:
823
+ target.push_back(byte1);
824
+ target.push_back(byte2);
825
+ target.push_back(byte3);
826
+ target.push_back(byte4);
827
+ break;
828
+ case 3:
829
+ target.push_back(byte1);
830
+ target.push_back(byte2);
831
+ target.push_back(byte3);
832
+ break;
833
+ case 2:
834
+ target.push_back(byte1);
835
+ target.push_back(byte2);
836
+ break;
837
+ case 1:
838
+ target.push_back(byte1);
839
+ break;
840
+ }
841
+ }
842
+ return convert_result<CharT>{data,result} ;
843
+ }
844
+
845
+ template <class CharT,class Container>
846
+ typename std::enable_if<traits_extension::is_char16<CharT>::value
847
+ && traits_extension::is_back_insertable<Container>::value
848
+ && traits_extension::is_char16<typename Container::value_type>::value,
849
+ convert_result<CharT>>::type
850
+ convert(const CharT* data, std::size_t length,
851
+ Container& target,
852
+ conv_flags flags = conv_flags::strict)
853
+ {
854
+ conv_errc result = conv_errc();
855
+
856
+ const CharT* last = data + length;
857
+ while (data != last)
858
+ {
859
+ uint32_t ch = *data++;
860
+ /* If we have a surrogate pair, convert to uint32_t data. */
861
+ if (is_high_surrogate(ch))
862
+ {
863
+ /* If the 16 bits following the high surrogate are in the data buffer... */
864
+ if (data < last) {
865
+ uint32_t ch2 = *data;
866
+ /* If ptr's a low surrogate, */
867
+ if (ch2 >= sur_low_start && ch2 <= sur_low_end) {
868
+ target.push_back((uint16_t)ch);
869
+ target.push_back((uint16_t)ch2);
870
+ ++data;
871
+ } else if (flags == conv_flags::strict) { /* ptr's an unpaired high surrogate */
872
+ --data; /* return to the illegal value itself */
873
+ result = conv_errc::unpaired_high_surrogate;
874
+ break;
875
+ }
876
+ } else { /* We don't have the 16 bits following the high surrogate. */
877
+ --data; /* return to the high surrogate */
878
+ result = conv_errc::source_exhausted;
879
+ break;
880
+ }
881
+ } else if (is_low_surrogate(ch))
882
+ {
883
+ // illegal leading low surrogate
884
+ if (flags == conv_flags::strict) {
885
+ --data; /* return to the illegal value itself */
886
+ result = conv_errc::source_illegal;
887
+ break;
888
+ }
889
+ else
890
+ {
891
+ target.push_back((uint16_t)ch);
892
+ }
893
+ }
894
+ else
895
+ {
896
+ target.push_back((uint16_t)ch);
897
+ }
898
+ }
899
+ return convert_result<CharT>{data,result} ;
900
+ }
901
+
902
+ template <class CharT,class Container>
903
+ typename std::enable_if<traits_extension::is_char16<CharT>::value
904
+ && traits_extension::is_back_insertable<Container>::value
905
+ && traits_extension::is_char32<typename Container::value_type>::value,
906
+ convert_result<CharT>>::type
907
+ convert(const CharT* data, std::size_t length,
908
+ Container& target,
909
+ conv_flags flags = conv_flags::strict)
910
+ {
911
+ conv_errc result = conv_errc();
912
+
913
+ const CharT* last = data + length;
914
+ while (data != last)
915
+ {
916
+ uint32_t ch = *data++;
917
+ /* If we have a surrogate pair, convert to UTF32 data. */
918
+ if (is_high_surrogate(ch))
919
+ {
920
+ /* If the 16 bits following the high surrogate are in the data buffer... */
921
+ if (data < last) {
922
+ uint32_t ch2 = *data;
923
+ /* If ptr's a low surrogate, convert to UTF32. */
924
+ if (ch2 >= sur_low_start && ch2 <= sur_low_end )
925
+ {
926
+ ch = ((ch - sur_high_start) << half_shift)
927
+ + (ch2 - sur_low_start) + half_base;
928
+ ++data;
929
+ } else if (flags == conv_flags::strict) { /* ptr's an unpaired high surrogate */
930
+ --data; /* return to the illegal value itself */
931
+ result = conv_errc::source_illegal;
932
+ break;
933
+ }
934
+ } else { /* We don't have the 16 bits following the high surrogate. */
935
+ --data; /* return to the high surrogate */
936
+ result = conv_errc::source_exhausted;
937
+ break;
938
+ }
939
+ } else if (flags == conv_flags::strict) {
940
+ /* UTF-16 surrogate values are illegal in UTF-32 */
941
+ if (is_low_surrogate(ch) )
942
+ {
943
+ --data; /* return to the illegal value itself */
944
+ result = conv_errc::source_illegal;
945
+ break;
946
+ }
947
+ }
948
+ target.push_back(ch);
949
+ }
950
+ return convert_result<CharT>{data,result} ;
951
+ }
952
+
953
+ // utf32
954
+
955
+ template <class CharT,class Container>
956
+ typename std::enable_if<traits_extension::is_char32<CharT>::value
957
+ && traits_extension::is_back_insertable<Container>::value
958
+ && traits_extension::is_char8<typename Container::value_type>::value,
959
+ convert_result<CharT>>::type
960
+ convert(const CharT* data, std::size_t length,
961
+ Container& target,
962
+ conv_flags flags = conv_flags::strict)
963
+ {
964
+ conv_errc result = conv_errc();
965
+ const CharT* last = data + length;
966
+ while (data < last)
967
+ {
968
+ unsigned short bytes_to_write = 0;
969
+ const uint32_t byteMask = 0xBF;
970
+ const uint32_t byteMark = 0x80;
971
+ uint32_t ch = *data++;
972
+ if (flags == conv_flags::strict )
973
+ {
974
+ /* UTF-16 surrogate values are illegal in UTF-32 */
975
+ if (is_surrogate(ch))
976
+ {
977
+ --data; /* return to the illegal value itself */
978
+ result = conv_errc::illegal_surrogate_value;
979
+ break;
980
+ }
981
+ }
982
+ /*
983
+ * Figure out how many bytes the result will require. Turn any
984
+ * illegally large UTF32 things (> Plane 17) into replacement chars.
985
+ */
986
+ if (ch < (uint32_t)0x80) { bytes_to_write = 1;
987
+ } else if (ch < (uint32_t)0x800) { bytes_to_write = 2;
988
+ } else if (ch < (uint32_t)0x10000) { bytes_to_write = 3;
989
+ } else if (ch <= max_legal_utf32) { bytes_to_write = 4;
990
+ } else {
991
+ bytes_to_write = 3;
992
+ ch = replacement_char;
993
+ result = conv_errc::source_illegal;
994
+ }
995
+
996
+ uint8_t byte1 = 0;
997
+ uint8_t byte2 = 0;
998
+ uint8_t byte3 = 0;
999
+ uint8_t byte4 = 0;
1000
+
1001
+ switch (bytes_to_write) {
1002
+ case 4:
1003
+ byte4 = (uint8_t)((ch | byteMark) & byteMask); ch >>= 6;
1004
+ JSONCONS_FALLTHROUGH;
1005
+ case 3:
1006
+ byte3 = (uint8_t)((ch | byteMark) & byteMask); ch >>= 6;
1007
+ JSONCONS_FALLTHROUGH;
1008
+ case 2:
1009
+ byte2 = (uint8_t)((ch | byteMark) & byteMask); ch >>= 6;
1010
+ JSONCONS_FALLTHROUGH;
1011
+ case 1:
1012
+ byte1 = (uint8_t) (ch | first_byte_mark[bytes_to_write]);
1013
+ break;
1014
+ }
1015
+
1016
+ switch (bytes_to_write)
1017
+ {
1018
+ case 4:
1019
+ target.push_back(byte1);
1020
+ target.push_back(byte2);
1021
+ target.push_back(byte3);
1022
+ target.push_back(byte4);
1023
+ break;
1024
+ case 3:
1025
+ target.push_back(byte1);
1026
+ target.push_back(byte2);
1027
+ target.push_back(byte3);
1028
+ break;
1029
+ case 2:
1030
+ target.push_back(byte1);
1031
+ target.push_back(byte2);
1032
+ break;
1033
+ case 1:
1034
+ target.push_back(byte1);
1035
+ break;
1036
+ }
1037
+ }
1038
+ return convert_result<CharT>{data,result} ;
1039
+ }
1040
+
1041
+ template <class CharT,class Container>
1042
+ typename std::enable_if<traits_extension::is_char32<CharT>::value
1043
+ && traits_extension::is_back_insertable<Container>::value
1044
+ && traits_extension::is_char16<typename Container::value_type>::value,
1045
+ convert_result<CharT>>::type
1046
+ convert(const CharT* data, std::size_t length,
1047
+ Container& target,
1048
+ conv_flags flags = conv_flags::strict)
1049
+ {
1050
+ conv_errc result = conv_errc();
1051
+
1052
+ const CharT* last = data + length;
1053
+ while (data != last)
1054
+ {
1055
+ uint32_t ch = *data++;
1056
+ if (ch <= max_bmp) { /* Target is a character <= 0xFFFF */
1057
+ /* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */
1058
+ if (is_surrogate(ch) )
1059
+ {
1060
+ if (flags == conv_flags::strict) {
1061
+ --data; /* return to the illegal value itself */
1062
+ result = conv_errc::source_illegal;
1063
+ break;
1064
+ } else {
1065
+ target.push_back(replacement_char);
1066
+ }
1067
+ } else {
1068
+ target.push_back((uint16_t)ch); /* normal case */
1069
+ }
1070
+ } else if (ch > max_legal_utf32) {
1071
+ if (flags == conv_flags::strict) {
1072
+ result = conv_errc::source_illegal;
1073
+ } else {
1074
+ target.push_back(replacement_char);
1075
+ }
1076
+ } else {
1077
+ /* target is a character in range 0xFFFF - 0x10FFFF. */
1078
+ ch -= half_base;
1079
+ target.push_back((uint16_t)((ch >> half_shift) + sur_high_start));
1080
+ target.push_back((uint16_t)((ch & half_mask) + sur_low_start));
1081
+ }
1082
+ }
1083
+ return convert_result<CharT>{data,result} ;
1084
+ }
1085
+
1086
+ template <class CharT,class Container>
1087
+ typename std::enable_if<traits_extension::is_char32<CharT>::value
1088
+ && traits_extension::is_back_insertable<Container>::value
1089
+ && traits_extension::is_char32<typename Container::value_type>::value,
1090
+ convert_result<CharT>>::type
1091
+ convert(const CharT* data, std::size_t length,
1092
+ Container& target,
1093
+ conv_flags flags = conv_flags::strict)
1094
+ {
1095
+ conv_errc result = conv_errc();
1096
+
1097
+ const CharT* last = data + length;
1098
+ while (data != last)
1099
+ {
1100
+ uint32_t ch = *data++;
1101
+ if (flags == conv_flags::strict )
1102
+ {
1103
+ /* UTF-16 surrogate values are illegal in UTF-32 */
1104
+ if (is_surrogate(ch))
1105
+ {
1106
+ --data; /* return to the illegal value itself */
1107
+ result = conv_errc::illegal_surrogate_value;
1108
+ break;
1109
+ }
1110
+ }
1111
+ if (ch <= max_legal_utf32)
1112
+ {
1113
+ target.push_back(ch);
1114
+ }
1115
+ else
1116
+ {
1117
+ target.push_back(replacement_char);
1118
+ result = conv_errc::source_illegal;
1119
+ }
1120
+ }
1121
+ return convert_result<CharT>{data,result} ;
1122
+ }
1123
+
1124
+ // validate
1125
+
1126
+ template <class CharT>
1127
+ typename std::enable_if<traits_extension::is_char8<CharT>::value,
1128
+ convert_result<CharT>>::type
1129
+ validate(const CharT* data, std::size_t length) noexcept
1130
+ {
1131
+ conv_errc result = conv_errc();
1132
+ const CharT* last = data + length;
1133
+ while (data != last)
1134
+ {
1135
+ std::size_t len = static_cast<std::size_t>(trailing_bytes_for_utf8[static_cast<uint8_t>(*data)]) + 1;
1136
+ if (len > (std::size_t)(last - data))
1137
+ {
1138
+ return convert_result<CharT>{data, conv_errc::source_exhausted};
1139
+ }
1140
+ if ((result=is_legal_utf8(data, len)) != conv_errc())
1141
+ {
1142
+ return convert_result<CharT>{data,result} ;
1143
+ }
1144
+ data += len;
1145
+ }
1146
+ return convert_result<CharT>{data,result} ;
1147
+ }
1148
+
1149
+ // utf16
1150
+
1151
+ template <class CharT>
1152
+ typename std::enable_if<traits_extension::is_char16<CharT>::value,
1153
+ convert_result<CharT>>::type
1154
+ validate(const CharT* data, std::size_t length) noexcept
1155
+ {
1156
+ conv_errc result = conv_errc();
1157
+
1158
+ const CharT* last = data + length;
1159
+ while (data != last)
1160
+ {
1161
+ uint32_t ch = *data++;
1162
+ /* If we have a surrogate pair, validate to uint32_t data. */
1163
+ if (is_high_surrogate(ch))
1164
+ {
1165
+ /* If the 16 bits following the high surrogate are in the data buffer... */
1166
+ if (data < last) {
1167
+ uint32_t ch2 = *data;
1168
+ /* If ptr's a low surrogate, */
1169
+ if (ch2 >= sur_low_start && ch2 <= sur_low_end) {
1170
+ ++data;
1171
+ } else {
1172
+ --data; /* return to the illegal value itself */
1173
+ result = conv_errc::unpaired_high_surrogate;
1174
+ break;
1175
+ }
1176
+ }
1177
+ else // We don't have the 16 bits following the high surrogate.
1178
+ {
1179
+ --data; /* return to the high surrogate */
1180
+ result = conv_errc::source_exhausted;
1181
+ break;
1182
+ }
1183
+ }
1184
+ else if (is_low_surrogate(ch))
1185
+ {
1186
+ /* UTF-16 surrogate values are illegal in UTF-32 */
1187
+ --data; /* return to the illegal value itself */
1188
+ result = conv_errc::source_illegal;
1189
+ break;
1190
+ }
1191
+ }
1192
+ return convert_result<CharT>{data,result} ;
1193
+ }
1194
+
1195
+ // utf32
1196
+
1197
+ template <class CharT>
1198
+ typename std::enable_if<traits_extension::is_char32<CharT>::value,
1199
+ convert_result<CharT>>::type
1200
+ validate(const CharT* data, std::size_t length) noexcept
1201
+ {
1202
+ conv_errc result = conv_errc();
1203
+
1204
+ const CharT* last = data + length;
1205
+ while (data != last)
1206
+ {
1207
+ uint32_t ch = *data++;
1208
+ /* UTF-16 surrogate values are illegal in UTF-32 */
1209
+ if (is_surrogate(ch))
1210
+ {
1211
+ --data; /* return to the illegal value itself */
1212
+ result = conv_errc::illegal_surrogate_value;
1213
+ break;
1214
+ }
1215
+ if (!(ch <= max_legal_utf32))
1216
+ {
1217
+ result = conv_errc::source_illegal;
1218
+ }
1219
+ }
1220
+ return convert_result<CharT>{data, result} ;
1221
+ }
1222
+
1223
+ enum class encoding {u8,u16le,u16be,u32le,u32be,undetected};
1224
+
1225
+ template <class Iterator>
1226
+ struct determine_encoding_result
1227
+ {
1228
+ Iterator it;
1229
+ encoding ec;
1230
+ };
1231
+
1232
+ template <class Iterator>
1233
+ typename std::enable_if<std::is_integral<typename std::iterator_traits<Iterator>::value_type>::value && sizeof(typename std::iterator_traits<Iterator>::value_type) == sizeof(uint8_t),
1234
+ determine_encoding_result<Iterator>>::type
1235
+ detect_encoding(Iterator first, Iterator last) noexcept
1236
+ {
1237
+ Iterator it1 = first;
1238
+ if (std::distance(first,last) < 4)
1239
+ {
1240
+ if (std::distance(first,last) == 3)
1241
+ {
1242
+ Iterator it2 = ++first;
1243
+ Iterator it3 = ++first;
1244
+ if (static_cast<uint8_t>(*it1) == 0xEF && static_cast<uint8_t>(*it2) == 0xBB && static_cast<uint8_t>(*it3) == 0xBF)
1245
+ {
1246
+ return determine_encoding_result<Iterator>{last,encoding::u8};
1247
+ }
1248
+ }
1249
+ return determine_encoding_result<Iterator>{it1,encoding::undetected};
1250
+ }
1251
+ else
1252
+ {
1253
+ Iterator it2 = ++first;
1254
+ Iterator it3 = ++first;
1255
+ Iterator it4 = ++first;
1256
+
1257
+ uint32_t bom = static_cast<uint8_t>(*it1) | (static_cast<uint8_t>(*it2) << 8) | (static_cast<uint8_t>(*it3) << 16) | (static_cast<uint8_t>(*it4) << 24);
1258
+ if (bom == 0xFFFE0000)
1259
+ {
1260
+ return determine_encoding_result<Iterator>{it4++,encoding::u32be};
1261
+ }
1262
+ else if (bom == 0x0000FEFF)
1263
+ {
1264
+ return determine_encoding_result<Iterator>{first,encoding::u32le};
1265
+ }
1266
+ else if ((bom & 0xFFFF) == 0xFFFE)
1267
+ {
1268
+ return determine_encoding_result<Iterator>{it3,encoding::u16be};
1269
+ }
1270
+ else if ((bom & 0xFFFF) == 0xFEFF)
1271
+ {
1272
+ return determine_encoding_result<Iterator>{it3,encoding::u16le};
1273
+ }
1274
+ else if ((bom & 0xFFFFFF) == 0xBFBBEF)
1275
+ {
1276
+ return determine_encoding_result<Iterator>{it4,encoding::u8};
1277
+ }
1278
+ else
1279
+ {
1280
+ uint32_t pattern = (static_cast<uint8_t>(*it1) ? 1 : 0) | (static_cast<uint8_t>(*it2) ? 2 : 0) | (static_cast<uint8_t>(*it3) ? 4 : 0) | (static_cast<uint8_t>(*it4) ? 8 : 0);
1281
+ switch (pattern) {
1282
+ case 0x08:
1283
+ return determine_encoding_result<Iterator>{it1,encoding::u32be};
1284
+ case 0x0A:
1285
+ return determine_encoding_result<Iterator>{it1,encoding::u16be};
1286
+ case 0x01:
1287
+ return determine_encoding_result<Iterator>{it1,encoding::u32le};
1288
+ case 0x05:
1289
+ return determine_encoding_result<Iterator>{it1,encoding::u16le};
1290
+ case 0x0F:
1291
+ return determine_encoding_result<Iterator>{it1,encoding::u8};
1292
+ default:
1293
+ return determine_encoding_result<Iterator>{it1,encoding::undetected};
1294
+ }
1295
+ }
1296
+ }
1297
+ }
1298
+
1299
+ // count_codepoints
1300
+
1301
+ template <class CharT>
1302
+ typename std::enable_if<traits_extension::is_char8<CharT>::value || traits_extension::is_char16<CharT>::value || traits_extension::is_char32<CharT>::value, std::size_t>::type
1303
+ count_codepoints(const CharT* data, std::size_t length,
1304
+ conv_flags flags = conv_flags::strict) noexcept
1305
+ {
1306
+ conv_errc ec = conv_errc();
1307
+
1308
+ std::size_t count = 0;
1309
+ const CharT* ptr = data;
1310
+ const CharT* last = data + length;
1311
+
1312
+ for (; ptr < last; ++count)
1313
+ {
1314
+ uint32_t cp = 0;
1315
+ auto r = to_codepoint(ptr, last, cp, flags);
1316
+ if (r.ec != conv_errc())
1317
+ {
1318
+ ec = r.ec;
1319
+ break;
1320
+ }
1321
+ ptr = r.ptr;
1322
+ }
1323
+ return ec == conv_errc() && ptr == last ? count : 0;
1324
+ }
1325
+
1326
+ } // unicode_traits
1327
+ } // jsoncons
1328
+
1329
+ #endif
1330
+