jsoncons 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (155) hide show
  1. checksums.yaml +7 -0
  2. data/ext/jsoncons/extconf.rb +43 -0
  3. data/ext/jsoncons/jsoncons.cpp +161 -0
  4. data/ext/jsoncons/jsoncons.h +10 -0
  5. data/jsoncons.gemspec +44 -0
  6. data/lib/jsoncons/jsoncons/examples/input/address-book.json +13 -0
  7. data/lib/jsoncons/jsoncons/examples/input/books.json +28 -0
  8. data/lib/jsoncons/jsoncons/examples/input/countries.json +7 -0
  9. data/lib/jsoncons/jsoncons/examples/input/employees.json +30 -0
  10. data/lib/jsoncons/jsoncons/examples/input/jsonschema/name.json +15 -0
  11. data/lib/jsoncons/jsoncons/examples/input/multiple-json-objects.json +3 -0
  12. data/lib/jsoncons/jsoncons/examples/input/sales.csv +6 -0
  13. data/lib/jsoncons/jsoncons/examples/input/store.json +28 -0
  14. data/lib/jsoncons/jsoncons/examples/input/tasks.csv +6 -0
  15. data/lib/jsoncons/jsoncons/include/jsoncons/allocator_holder.hpp +38 -0
  16. data/lib/jsoncons/jsoncons/include/jsoncons/basic_json.hpp +5905 -0
  17. data/lib/jsoncons/jsoncons/include/jsoncons/bigint.hpp +1611 -0
  18. data/lib/jsoncons/jsoncons/include/jsoncons/byte_string.hpp +820 -0
  19. data/lib/jsoncons/jsoncons/include/jsoncons/config/binary_config.hpp +226 -0
  20. data/lib/jsoncons/jsoncons/include/jsoncons/config/compiler_support.hpp +375 -0
  21. data/lib/jsoncons/jsoncons/include/jsoncons/config/jsoncons_config.hpp +309 -0
  22. data/lib/jsoncons/jsoncons/include/jsoncons/config/version.hpp +40 -0
  23. data/lib/jsoncons/jsoncons/include/jsoncons/conv_error.hpp +218 -0
  24. data/lib/jsoncons/jsoncons/include/jsoncons/decode_json.hpp +209 -0
  25. data/lib/jsoncons/jsoncons/include/jsoncons/decode_traits.hpp +651 -0
  26. data/lib/jsoncons/jsoncons/include/jsoncons/detail/endian.hpp +44 -0
  27. data/lib/jsoncons/jsoncons/include/jsoncons/detail/grisu3.hpp +312 -0
  28. data/lib/jsoncons/jsoncons/include/jsoncons/detail/optional.hpp +483 -0
  29. data/lib/jsoncons/jsoncons/include/jsoncons/detail/parse_number.hpp +1133 -0
  30. data/lib/jsoncons/jsoncons/include/jsoncons/detail/span.hpp +188 -0
  31. data/lib/jsoncons/jsoncons/include/jsoncons/detail/string_view.hpp +537 -0
  32. data/lib/jsoncons/jsoncons/include/jsoncons/detail/string_wrapper.hpp +370 -0
  33. data/lib/jsoncons/jsoncons/include/jsoncons/detail/write_number.hpp +567 -0
  34. data/lib/jsoncons/jsoncons/include/jsoncons/encode_json.hpp +315 -0
  35. data/lib/jsoncons/jsoncons/include/jsoncons/encode_traits.hpp +378 -0
  36. data/lib/jsoncons/jsoncons/include/jsoncons/json.hpp +18 -0
  37. data/lib/jsoncons/jsoncons/include/jsoncons/json_array.hpp +324 -0
  38. data/lib/jsoncons/jsoncons/include/jsoncons/json_content_handler.hpp +12 -0
  39. data/lib/jsoncons/jsoncons/include/jsoncons/json_cursor.hpp +448 -0
  40. data/lib/jsoncons/jsoncons/include/jsoncons/json_decoder.hpp +420 -0
  41. data/lib/jsoncons/jsoncons/include/jsoncons/json_encoder.hpp +1587 -0
  42. data/lib/jsoncons/jsoncons/include/jsoncons/json_error.hpp +156 -0
  43. data/lib/jsoncons/jsoncons/include/jsoncons/json_exception.hpp +241 -0
  44. data/lib/jsoncons/jsoncons/include/jsoncons/json_filter.hpp +653 -0
  45. data/lib/jsoncons/jsoncons/include/jsoncons/json_fwd.hpp +23 -0
  46. data/lib/jsoncons/jsoncons/include/jsoncons/json_object.hpp +1772 -0
  47. data/lib/jsoncons/jsoncons/include/jsoncons/json_options.hpp +862 -0
  48. data/lib/jsoncons/jsoncons/include/jsoncons/json_parser.hpp +2900 -0
  49. data/lib/jsoncons/jsoncons/include/jsoncons/json_reader.hpp +731 -0
  50. data/lib/jsoncons/jsoncons/include/jsoncons/json_traits_macros.hpp +1072 -0
  51. data/lib/jsoncons/jsoncons/include/jsoncons/json_traits_macros_deprecated.hpp +144 -0
  52. data/lib/jsoncons/jsoncons/include/jsoncons/json_type.hpp +206 -0
  53. data/lib/jsoncons/jsoncons/include/jsoncons/json_type_traits.hpp +1830 -0
  54. data/lib/jsoncons/jsoncons/include/jsoncons/json_visitor.hpp +1560 -0
  55. data/lib/jsoncons/jsoncons/include/jsoncons/json_visitor2.hpp +2079 -0
  56. data/lib/jsoncons/jsoncons/include/jsoncons/pretty_print.hpp +89 -0
  57. data/lib/jsoncons/jsoncons/include/jsoncons/ser_context.hpp +62 -0
  58. data/lib/jsoncons/jsoncons/include/jsoncons/sink.hpp +289 -0
  59. data/lib/jsoncons/jsoncons/include/jsoncons/source.hpp +777 -0
  60. data/lib/jsoncons/jsoncons/include/jsoncons/source_adaptor.hpp +148 -0
  61. data/lib/jsoncons/jsoncons/include/jsoncons/staj2_cursor.hpp +1189 -0
  62. data/lib/jsoncons/jsoncons/include/jsoncons/staj_cursor.hpp +1254 -0
  63. data/lib/jsoncons/jsoncons/include/jsoncons/staj_iterator.hpp +449 -0
  64. data/lib/jsoncons/jsoncons/include/jsoncons/tag_type.hpp +245 -0
  65. data/lib/jsoncons/jsoncons/include/jsoncons/text_source_adaptor.hpp +144 -0
  66. data/lib/jsoncons/jsoncons/include/jsoncons/traits_extension.hpp +884 -0
  67. data/lib/jsoncons/jsoncons/include/jsoncons/typed_array_view.hpp +250 -0
  68. data/lib/jsoncons/jsoncons/include/jsoncons/unicode_traits.hpp +1330 -0
  69. data/lib/jsoncons/jsoncons/include/jsoncons/uri.hpp +635 -0
  70. data/lib/jsoncons/jsoncons/include/jsoncons/value_converter.hpp +340 -0
  71. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson.hpp +23 -0
  72. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson_cursor.hpp +320 -0
  73. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson_decimal128.hpp +865 -0
  74. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson_encoder.hpp +585 -0
  75. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson_error.hpp +103 -0
  76. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson_oid.hpp +245 -0
  77. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson_options.hpp +75 -0
  78. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson_parser.hpp +645 -0
  79. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson_reader.hpp +92 -0
  80. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson_type.hpp +44 -0
  81. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/decode_bson.hpp +201 -0
  82. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/encode_bson.hpp +144 -0
  83. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/cbor.hpp +26 -0
  84. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/cbor_cursor.hpp +351 -0
  85. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/cbor_cursor2.hpp +265 -0
  86. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/cbor_detail.hpp +93 -0
  87. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/cbor_encoder.hpp +1766 -0
  88. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/cbor_error.hpp +105 -0
  89. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/cbor_options.hpp +113 -0
  90. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/cbor_parser.hpp +1942 -0
  91. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/cbor_reader.hpp +116 -0
  92. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/decode_cbor.hpp +203 -0
  93. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/encode_cbor.hpp +151 -0
  94. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/csv.hpp +17 -0
  95. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/csv_cursor.hpp +358 -0
  96. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/csv_encoder.hpp +954 -0
  97. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/csv_error.hpp +85 -0
  98. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/csv_options.hpp +973 -0
  99. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/csv_parser.hpp +2099 -0
  100. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/csv_reader.hpp +348 -0
  101. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/csv_serializer.hpp +12 -0
  102. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/decode_csv.hpp +208 -0
  103. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/encode_csv.hpp +122 -0
  104. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jmespath/jmespath.hpp +5215 -0
  105. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jmespath/jmespath_error.hpp +215 -0
  106. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpatch/jsonpatch.hpp +579 -0
  107. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpatch/jsonpatch_error.hpp +121 -0
  108. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpath/expression.hpp +3329 -0
  109. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpath/flatten.hpp +432 -0
  110. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpath/json_location.hpp +445 -0
  111. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpath/json_query.hpp +115 -0
  112. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpath/jsonpath.hpp +13 -0
  113. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpath/jsonpath_error.hpp +240 -0
  114. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpath/jsonpath_expression.hpp +2612 -0
  115. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpath/jsonpath_selector.hpp +1322 -0
  116. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpointer/jsonpointer.hpp +1577 -0
  117. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpointer/jsonpointer_error.hpp +119 -0
  118. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/format_validator.hpp +968 -0
  119. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/json_validator.hpp +120 -0
  120. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/jsonschema.hpp +13 -0
  121. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/jsonschema_error.hpp +105 -0
  122. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/jsonschema_version.hpp +18 -0
  123. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/keyword_validator.hpp +1745 -0
  124. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/keyword_validator_factory.hpp +556 -0
  125. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/schema_draft7.hpp +198 -0
  126. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/schema_location.hpp +200 -0
  127. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/schema_version.hpp +35 -0
  128. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/subschema.hpp +144 -0
  129. data/lib/jsoncons/jsoncons/include/jsoncons_ext/mergepatch/mergepatch.hpp +103 -0
  130. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/decode_msgpack.hpp +202 -0
  131. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/encode_msgpack.hpp +142 -0
  132. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/msgpack.hpp +24 -0
  133. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/msgpack_cursor.hpp +343 -0
  134. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/msgpack_cursor2.hpp +259 -0
  135. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/msgpack_encoder.hpp +753 -0
  136. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/msgpack_error.hpp +94 -0
  137. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/msgpack_options.hpp +74 -0
  138. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/msgpack_parser.hpp +748 -0
  139. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/msgpack_reader.hpp +116 -0
  140. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/msgpack_type.hpp +63 -0
  141. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/decode_ubjson.hpp +201 -0
  142. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/encode_ubjson.hpp +142 -0
  143. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/ubjson.hpp +23 -0
  144. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/ubjson_cursor.hpp +307 -0
  145. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/ubjson_encoder.hpp +502 -0
  146. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/ubjson_error.hpp +100 -0
  147. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/ubjson_options.hpp +87 -0
  148. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/ubjson_parser.hpp +880 -0
  149. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/ubjson_reader.hpp +92 -0
  150. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/ubjson_type.hpp +43 -0
  151. data/lib/jsoncons/version.rb +5 -0
  152. data/lib/jsoncons.rb +33 -0
  153. data/test/jsoncons_test.rb +108 -0
  154. data/test/test_helper.rb +7 -0
  155. metadata +268 -0
@@ -0,0 +1,1330 @@
1
+ // Copyright 2016 Daniel Parker
2
+ // Distributed under the Boost license, Version 1.0.
3
+ // (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
4
+
5
+ // See https://github.com/danielaparker/unicode_traits for latest version
6
+
7
+ /*
8
+ * Includes code derived from Unicode, Inc decomposition code in ConvertUTF.h and ConvertUTF.c
9
+ * http://www.unicode.org/
10
+ *
11
+ * "Unicode, Inc. hereby grants the right to freely use the information
12
+ * supplied in this file in the creation of products supporting the
13
+ * Unicode Standard."
14
+ */
15
+
16
+ #ifndef JSONCONS_UNICODE_TRAITS_HPP
17
+ #define JSONCONS_UNICODE_TRAITS_HPP
18
+
19
+ #include <cstring>
20
+ #include <string>
21
+ #include <iterator>
22
+ #include <type_traits>
23
+ #include <system_error>
24
+ #include <limits>
25
+ #include <jsoncons/config/compiler_support.hpp>
26
+ #include <jsoncons/traits_extension.hpp>
27
+
28
+ namespace jsoncons { namespace unicode_traits {
29
+
30
+ enum class encoding_kind {undetected,utf8,utf16le,utf16be,utf32le,utf32be};
31
+
32
+ inline
33
+ std::string to_string(encoding_kind encoding)
34
+ {
35
+ switch (encoding)
36
+ {
37
+ case encoding_kind::utf8:
38
+ return "utf8";
39
+ case encoding_kind::utf16le:
40
+ return "utf16le";
41
+ case encoding_kind::utf16be:
42
+ return "utf16be";
43
+ case encoding_kind::utf32le:
44
+ return "utf32le";
45
+ case encoding_kind::utf32be:
46
+ return "utf32be";
47
+ default:
48
+ return "undetected";
49
+ }
50
+ }
51
+
52
+ template <class Byte>
53
+ struct detect_encoding_result
54
+ {
55
+ const Byte* ptr;
56
+ encoding_kind encoding;
57
+ };
58
+
59
+ template <class CharT>
60
+ typename std::enable_if<traits_extension::is_char8<CharT>::value,detect_encoding_result<CharT>>::type
61
+ detect_encoding_from_bom(const CharT* data, std::size_t length)
62
+ {
63
+ const uint8_t bom_utf8[] = {0xef,0xbb,0xbf};
64
+ const uint8_t bom_utf16le[] = {0xff,0xfe};
65
+ const uint8_t bom_utf16be[] = {0xfe,0xff};
66
+ const uint8_t bom_utf32le[] = {0xff,0xfe,0x00,0x00};
67
+ const uint8_t bom_utf32be[] = {0x00,0x00,0xfe,0xff};
68
+
69
+ if (length >= 4 && !memcmp(data,bom_utf32le,4))
70
+ {
71
+ return detect_encoding_result<CharT>{data+4,encoding_kind::utf32le};
72
+ }
73
+ else if (length >= 4 && !memcmp(data,bom_utf32be,4))
74
+ {
75
+ return detect_encoding_result<CharT>{data+4,encoding_kind::utf32be};
76
+ }
77
+ else if (length >= 2 && !memcmp(data,bom_utf16le,2))
78
+ {
79
+ return detect_encoding_result<CharT>{data+2,encoding_kind::utf16le};
80
+ }
81
+ else if (length >= 2 && !memcmp(data,bom_utf16be,2))
82
+ {
83
+ return detect_encoding_result<CharT>{data+2,encoding_kind::utf16be};
84
+ }
85
+ else if (length >= 3 && !memcmp(data,bom_utf8,3))
86
+ {
87
+ return detect_encoding_result<CharT>{data+3,encoding_kind::utf8};
88
+ }
89
+ else
90
+ {
91
+ return detect_encoding_result<CharT>{data,encoding_kind::undetected};
92
+ }
93
+ }
94
+
95
+ template <class CharT>
96
+ typename std::enable_if<traits_extension::is_char16<CharT>::value || traits_extension::is_char32<CharT>::value,detect_encoding_result<CharT>>::type
97
+ detect_encoding_from_bom(const CharT* data, std::size_t)
98
+ {
99
+ return detect_encoding_result<CharT>{data,encoding_kind::undetected};
100
+ }
101
+
102
+ template <class CharT>
103
+ typename std::enable_if<traits_extension::is_char8<CharT>::value,detect_encoding_result<CharT>>::type
104
+ detect_json_encoding(const CharT* data, std::size_t length)
105
+ {
106
+ detect_encoding_result<CharT> r = detect_encoding_from_bom(data,length);
107
+ if (r.encoding != encoding_kind::undetected)
108
+ {
109
+ return r;
110
+ }
111
+ else if (length < 4)
112
+ {
113
+ return detect_encoding_result<CharT>{data,encoding_kind::utf8};
114
+ }
115
+ else if (*data == 0 && *(data+1) == 0 && *(data+2) == 0)
116
+ {
117
+ return detect_encoding_result<CharT>{data,encoding_kind::utf32be};
118
+ }
119
+ else if (*data == 0 && *(data+2) == 0)
120
+ {
121
+ return detect_encoding_result<CharT>{data,encoding_kind::utf16be};
122
+ }
123
+ else if (*(data+1) == 0 && *(data+2) == 0 && *(data+3) == 0)
124
+ {
125
+ return detect_encoding_result<CharT>{data,encoding_kind::utf32le};
126
+ }
127
+ else if (*(data+1) == 0 && *(data+3) == 0)
128
+ {
129
+ return detect_encoding_result<CharT>{data,encoding_kind::utf16le};
130
+ }
131
+ else
132
+ {
133
+ return detect_encoding_result<CharT>{data,encoding_kind::utf8};
134
+ }
135
+ }
136
+
137
+ template <class CharT>
138
+ typename std::enable_if<traits_extension::is_char16<CharT>::value || traits_extension::is_char32<CharT>::value,detect_encoding_result<CharT>>::type
139
+ detect_json_encoding(const CharT* data, std::size_t)
140
+ {
141
+ return detect_encoding_result<CharT>{data,encoding_kind::undetected};
142
+ }
143
+
144
+ /*
145
+ * Magic values subtracted from a buffer value during UTF8 conversion.
146
+ * This table contains as many values as there might be trailing bytes
147
+ * in a UTF-8 sequence. Source: ConvertUTF.c
148
+ */
149
+ const uint32_t offsets_from_utf8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
150
+ 0x03C82080UL, 0xFA082080UL, 0x82082080UL };
151
+
152
+ /*
153
+ * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
154
+ * into the first byte, depending on how many bytes follow. There are
155
+ * as many entries in this table as there are UTF-8 sequence types.
156
+ * (I.e., one byte sequence, two byte... etc.). Remember that sequencs
157
+ * for *legal* UTF-8 will be 4 or fewer bytes total. Source: ConvertUTF.c
158
+ */
159
+ const uint8_t first_byte_mark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
160
+
161
+ /*
162
+ * Index into the table below with the first byte of a UTF-8 sequence to
163
+ * get the number of trailing bytes that are supposed to follow it.
164
+ * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
165
+ * left as-is for anyone who may want to do such conversion, which was
166
+ * allowed in earlier algorithms. Source: ConvertUTF.c
167
+ */
168
+ const uint8_t trailing_bytes_for_utf8[256] = {
169
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
170
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
171
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
172
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
173
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
174
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
175
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
176
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
177
+ };
178
+
179
+ // Some fundamental constants. Source: ConvertUTF.h
180
+ const uint32_t replacement_char = 0x0000FFFD;
181
+ const uint32_t max_bmp = 0x0000FFFF;
182
+ const uint32_t max_utf16 = 0x0010FFFF;
183
+ const uint32_t max_utf32 = 0x7FFFFFFF;
184
+ const uint32_t max_legal_utf32 = 0x0010FFFF;
185
+
186
+ const int half_shift = 10; // used for shifting by 10 bits
187
+ const uint32_t half_base = 0x0010000UL;
188
+ const uint32_t half_mask = 0x3FFUL;
189
+
190
+ const uint16_t sur_high_start = 0xD800;
191
+ const uint16_t sur_high_end = 0xDBFF;
192
+ const uint16_t sur_low_start = 0xDC00;
193
+ const uint16_t sur_low_end = 0xDFFF;
194
+
195
+ inline
196
+ static bool is_continuation_byte(unsigned char ch)
197
+ {
198
+ return (ch & 0xC0) == 0x80;
199
+ }
200
+
201
+ inline
202
+ bool is_high_surrogate(uint32_t ch) noexcept
203
+ {
204
+ return (ch >= sur_high_start && ch <= sur_high_end);
205
+ }
206
+
207
+ inline
208
+ bool is_low_surrogate(uint32_t ch) noexcept
209
+ {
210
+ return (ch >= sur_low_start && ch <= sur_low_end);
211
+ }
212
+
213
+ inline
214
+ bool is_surrogate(uint32_t ch) noexcept
215
+ {
216
+ return (ch >= sur_high_start && ch <= sur_low_end);
217
+ }
218
+
219
+ enum class conv_flags
220
+ {
221
+ strict = 0,
222
+ lenient
223
+ };
224
+
225
+ // conv_errc
226
+
227
+ enum class conv_errc
228
+ {
229
+ success = 0,
230
+ over_long_utf8_sequence = 1, // over long utf8 sequence
231
+ expected_continuation_byte, // expected continuation byte
232
+ unpaired_high_surrogate, // unpaired high surrogate UTF-16
233
+ illegal_surrogate_value, // UTF-16 surrogate values are illegal in UTF-32
234
+ source_exhausted, // partial character in source, but hit end
235
+ source_illegal // source sequence is illegal/malformed
236
+ };
237
+
238
+ class Unicode_traits_error_category_impl_
239
+ : public std::error_category
240
+ {
241
+ public:
242
+ virtual const char* name() const noexcept
243
+ {
244
+ return "unicode_traits conversion error";
245
+ }
246
+ virtual std::string message(int ev) const
247
+ {
248
+ switch (static_cast<conv_errc>(ev))
249
+ {
250
+ case conv_errc::over_long_utf8_sequence:
251
+ return "Over long utf8 sequence";
252
+ case conv_errc::expected_continuation_byte:
253
+ return "Expected continuation byte";
254
+ case conv_errc::unpaired_high_surrogate:
255
+ return "Unpaired high surrogate UTF-16";
256
+ case conv_errc::illegal_surrogate_value:
257
+ return "UTF-16 surrogate values are illegal in UTF-32";
258
+ case conv_errc::source_exhausted:
259
+ return "Partial character in source, but hit end";
260
+ case conv_errc::source_illegal:
261
+ return "Source sequence is illegal/malformed";
262
+ default:
263
+ return "";
264
+ break;
265
+ }
266
+ }
267
+ };
268
+
269
+ inline
270
+ const std::error_category& unicode_traits_error_category()
271
+ {
272
+ static Unicode_traits_error_category_impl_ instance;
273
+ return instance;
274
+ }
275
+
276
+ inline
277
+ std::error_code make_error_code(conv_errc result)
278
+ {
279
+ return std::error_code(static_cast<int>(result),unicode_traits_error_category());
280
+ }
281
+
282
+ } // unicode_traits
283
+ } // jsoncons
284
+
285
+ namespace std {
286
+ template<>
287
+ struct is_error_code_enum<jsoncons::unicode_traits::conv_errc> : public true_type
288
+ {
289
+ };
290
+ }
291
+
292
+ namespace jsoncons { namespace unicode_traits {
293
+
294
+ // utf8
295
+
296
+ template <class CharT>
297
+ typename std::enable_if<traits_extension::is_char8<CharT>::value, conv_errc>::type
298
+ is_legal_utf8(const CharT* first, std::size_t length)
299
+ {
300
+ uint8_t a;
301
+ const CharT* srcptr = first+length;
302
+ switch (length) {
303
+ default:
304
+ return conv_errc::over_long_utf8_sequence;
305
+ case 4:
306
+ if (((a = (*--srcptr))& 0xC0) != 0x80)
307
+ return conv_errc::expected_continuation_byte;
308
+ JSONCONS_FALLTHROUGH;
309
+ case 3:
310
+ if (((a = (*--srcptr))& 0xC0) != 0x80)
311
+ return conv_errc::expected_continuation_byte;
312
+ JSONCONS_FALLTHROUGH;
313
+ case 2:
314
+ if (((a = (*--srcptr))& 0xC0) != 0x80)
315
+ return conv_errc::expected_continuation_byte;
316
+
317
+ switch (static_cast<uint8_t>(*first))
318
+ {
319
+ // no fall-through in this inner switch
320
+ case 0xE0: if (a < 0xA0) return conv_errc::source_illegal; break;
321
+ case 0xED: if (a > 0x9F) return conv_errc::source_illegal; break;
322
+ case 0xF0: if (a < 0x90) return conv_errc::source_illegal; break;
323
+ case 0xF4: if (a > 0x8F) return conv_errc::source_illegal; break;
324
+ default: if (a < 0x80) return conv_errc::source_illegal;
325
+ }
326
+
327
+ JSONCONS_FALLTHROUGH;
328
+ case 1:
329
+ if (static_cast<uint8_t>(*first) >= 0x80 && static_cast<uint8_t>(*first) < 0xC2)
330
+ return conv_errc::source_illegal;
331
+ break;
332
+ }
333
+ if (static_cast<uint8_t>(*first) > 0xF4)
334
+ return conv_errc::source_illegal;
335
+
336
+ return conv_errc();
337
+ }
338
+
339
+ template <class...> using void_t = void;
340
+
341
+ template <class, class, class = void>
342
+ struct is_output_iterator : std::false_type {};
343
+
344
+ template <class I, class E>
345
+ struct is_output_iterator<I, E, void_t<
346
+ typename std::iterator_traits<I>::iterator_category,
347
+ decltype(*std::declval<I>() = std::declval<E>())>> : std::true_type {};
348
+
349
+ // is_same_size fixes issue with vs2013
350
+
351
+ // primary template
352
+ template<class T1, class T2, class Enable = void>
353
+ struct is_same_size : std::false_type
354
+ {
355
+ };
356
+
357
+ // specialization for non void types
358
+ template<class T1, class T2>
359
+ struct is_same_size<T1, T2, typename std::enable_if<!std::is_void<T1>::value && !std::is_void<T2>::value>::type>
360
+ {
361
+ static constexpr bool value = (sizeof(T1) == sizeof(T2));
362
+ };
363
+
364
+ // convert
365
+
366
+ template <class CharT>
367
+ struct convert_result
368
+ {
369
+ const CharT* ptr;
370
+ conv_errc ec;
371
+ };
372
+
373
+ // to_codepoint
374
+
375
+ template <class CharT,class CodepointT>
376
+ typename std::enable_if<traits_extension::is_char8<CharT>::value && traits_extension::is_char32<CodepointT>::value,
377
+ convert_result<CharT>>::type
378
+ to_codepoint(const CharT* first, const CharT* last,
379
+ CodepointT& ch,
380
+ conv_flags flags = conv_flags::strict) noexcept
381
+ {
382
+ ch = 0;
383
+ if (first >= last)
384
+ {
385
+ return convert_result<CharT>{first, conv_errc::source_exhausted};
386
+ }
387
+ conv_errc result = conv_errc();
388
+
389
+ unsigned short extra_bytes_to_read = trailing_bytes_for_utf8[static_cast<uint8_t>(*first)];
390
+ if (extra_bytes_to_read >= last - first)
391
+ {
392
+ result = conv_errc::source_exhausted;
393
+ return convert_result<CharT>{first, result};
394
+ }
395
+ // Do this check whether lenient or strict
396
+ if ((result=is_legal_utf8(first, extra_bytes_to_read+1)) != conv_errc())
397
+ {
398
+ return convert_result<CharT>{first, result};
399
+ }
400
+ // The cases all fall through. See "Note A" below.
401
+ switch (extra_bytes_to_read)
402
+ {
403
+ case 5:
404
+ ch += static_cast<uint8_t>(*first++);
405
+ ch <<= 6;
406
+ JSONCONS_FALLTHROUGH;
407
+ case 4:
408
+ ch += static_cast<uint8_t>(*first++);
409
+ ch <<= 6;
410
+ JSONCONS_FALLTHROUGH;
411
+ case 3:
412
+ ch += static_cast<uint8_t>(*first++);
413
+ ch <<= 6;
414
+ JSONCONS_FALLTHROUGH;
415
+ case 2:
416
+ ch += static_cast<uint8_t>(*first++);
417
+ ch <<= 6;
418
+ JSONCONS_FALLTHROUGH;
419
+ case 1:
420
+ ch += static_cast<uint8_t>(*first++);
421
+ ch <<= 6;
422
+ JSONCONS_FALLTHROUGH;
423
+ case 0:
424
+ ch += static_cast<uint8_t>(*first++);
425
+ break;
426
+ }
427
+ ch -= offsets_from_utf8[extra_bytes_to_read];
428
+
429
+ if (ch <= max_legal_utf32) {
430
+ /*
431
+ * UTF-16 surrogate values are illegal in UTF-32, and anything
432
+ * over Plane 17 (> 0x10FFFF) is illegal.
433
+ */
434
+ if (is_surrogate(ch) )
435
+ {
436
+ if (flags == conv_flags::strict)
437
+ {
438
+ first -= (extra_bytes_to_read+1); // return to the illegal value itself
439
+ result = conv_errc::source_illegal;
440
+ return convert_result<CharT>{first, result};
441
+ }
442
+ else
443
+ {
444
+ ch = replacement_char;
445
+ }
446
+ }
447
+ }
448
+ else // i.e., ch > max_legal_utf32
449
+ {
450
+ result = conv_errc::source_illegal;
451
+ ch = replacement_char;
452
+ }
453
+
454
+ return convert_result<CharT>{first,result} ;
455
+ }
456
+
457
+ template <class CharT,class CodepointT>
458
+ typename std::enable_if<traits_extension::is_char16<CharT>::value && traits_extension::is_char32<CodepointT>::value,
459
+ convert_result<CharT>>::type
460
+ to_codepoint(const CharT* first, const CharT* last,
461
+ CodepointT& ch,
462
+ conv_flags flags = conv_flags::strict) noexcept
463
+ {
464
+ ch = 0;
465
+ if (first >= last)
466
+ {
467
+ return convert_result<CharT>{first, conv_errc::source_exhausted};
468
+ }
469
+ conv_errc result = conv_errc();
470
+
471
+ ch = *first++;
472
+ // If we have a surrogate pair, convert to UTF32 first.
473
+ if (is_high_surrogate(ch))
474
+ {
475
+ // If the 16 bits following the high surrogate are in the first buffer...
476
+ if (first < last)
477
+ {
478
+ uint32_t ch2 = *first;
479
+ // If ptr's a low surrogate, convert to UTF32.
480
+ if (ch2 >= sur_low_start && ch2 <= sur_low_end )
481
+ {
482
+ ch = ((ch - sur_high_start) << half_shift)
483
+ + (ch2 - sur_low_start) + half_base;
484
+ ++first;
485
+ }
486
+ else if (flags == conv_flags::strict) // ptr's an unpaired high surrogate
487
+ {
488
+ --first; /* return to the illegal value itself */
489
+ result = conv_errc::source_illegal;
490
+ return convert_result<CharT>{first, result};
491
+ }
492
+ }
493
+ else
494
+ { /* We don't have the 16 bits following the high surrogate. */
495
+ --first; /* return to the high surrogate */
496
+ result = conv_errc::source_exhausted;
497
+ return convert_result<CharT>{first, result};
498
+ }
499
+ } else if (flags == conv_flags::strict) {
500
+ /* UTF-16 surrogate values are illegal in UTF-32 */
501
+ if (is_low_surrogate(ch) )
502
+ {
503
+ --first; /* return to the illegal value itself */
504
+ result = conv_errc::source_illegal;
505
+ return convert_result<CharT>{first, result};
506
+ }
507
+ }
508
+
509
+ return convert_result<CharT>{first,result} ;
510
+ }
511
+
512
+ template <class CharT,class CodepointT>
513
+ typename std::enable_if<traits_extension::is_char32<CharT>::value && traits_extension::is_char32<CodepointT>::value,
514
+ convert_result<CharT>>::type
515
+ to_codepoint(const CharT* first, const CharT* last,
516
+ CodepointT& ch,
517
+ conv_flags flags = conv_flags::strict) noexcept
518
+ {
519
+ ch = 0;
520
+ if (first >= last)
521
+ {
522
+ return convert_result<CharT>{first, conv_errc::source_exhausted};
523
+ }
524
+ conv_errc result = conv_errc();
525
+
526
+ ch = *first++;
527
+ if (flags == conv_flags::strict )
528
+ {
529
+ /* UTF-16 surrogate values are illegal in UTF-32 */
530
+ if (is_surrogate(ch))
531
+ {
532
+ --first; /* return to the illegal value itself */
533
+ result = conv_errc::illegal_surrogate_value;
534
+ return convert_result<CharT>{first,result} ;
535
+ }
536
+ }
537
+ if (!(ch <= max_legal_utf32))
538
+ {
539
+ ch = replacement_char;
540
+ result = conv_errc::source_illegal;
541
+ }
542
+
543
+ return convert_result<CharT>{first,result} ;
544
+ }
545
+
546
+ // convert
547
+
548
+ template <class CharT,class Container>
549
+ typename std::enable_if<traits_extension::is_char8<CharT>::value
550
+ && traits_extension::is_back_insertable<Container>::value
551
+ && traits_extension::is_char8<typename Container::value_type>::value,
552
+ convert_result<CharT>>::type
553
+ convert(const CharT* data, std::size_t length, Container& target, conv_flags flags=conv_flags::strict)
554
+ {
555
+ (void)flags;
556
+
557
+ conv_errc result = conv_errc();
558
+ const CharT* last = data + length;
559
+ while (data != last)
560
+ {
561
+ std::size_t len = trailing_bytes_for_utf8[static_cast<uint8_t>(*data)] + 1;
562
+ if (len > (std::size_t)(last - data))
563
+ {
564
+ return convert_result<CharT>{data, conv_errc::source_exhausted};
565
+ }
566
+ if ((result=is_legal_utf8(data, len)) != conv_errc())
567
+ {
568
+ return convert_result<CharT>{data,result};
569
+ }
570
+
571
+ switch (len) {
572
+ case 4: target.push_back(static_cast<uint8_t>(*data++));
573
+ JSONCONS_FALLTHROUGH;
574
+ case 3: target.push_back(static_cast<uint8_t>(*data++));
575
+ JSONCONS_FALLTHROUGH;
576
+ case 2: target.push_back(static_cast<uint8_t>(*data++));
577
+ JSONCONS_FALLTHROUGH;
578
+ case 1: target.push_back(static_cast<uint8_t>(*data++));
579
+ }
580
+ }
581
+ return convert_result<CharT>{data,result} ;
582
+ }
583
+
584
+ template <class CharT,class Container>
585
+ typename std::enable_if<traits_extension::is_char8<CharT>::value
586
+ && traits_extension::is_back_insertable<Container>::value
587
+ && traits_extension::is_char16<typename Container::value_type>::value,
588
+ convert_result<CharT>>::type
589
+ convert(const CharT* data, std::size_t length,
590
+ Container& target,
591
+ conv_flags flags = conv_flags::strict)
592
+ {
593
+ conv_errc result = conv_errc();
594
+
595
+ const CharT* last = data + length;
596
+ while (data != last)
597
+ {
598
+ unsigned short extra_bytes_to_read = trailing_bytes_for_utf8[static_cast<uint8_t>(*data)];
599
+ if (extra_bytes_to_read >= last - data)
600
+ {
601
+ result = conv_errc::source_exhausted;
602
+ break;
603
+ }
604
+ /* Do this check whether lenient or strict */
605
+ if ((result=is_legal_utf8(data, extra_bytes_to_read+1)) != conv_errc())
606
+ {
607
+ break;
608
+ }
609
+ /*
610
+ * The cases all fall through. See "Note A" below.
611
+ */
612
+ uint32_t ch = 0;
613
+ switch (extra_bytes_to_read) {
614
+ case 5: ch += static_cast<uint8_t>(*data++); ch <<= 6; /* remember, illegal UTF-8 */
615
+ JSONCONS_FALLTHROUGH;
616
+ case 4: ch += static_cast<uint8_t>(*data++); ch <<= 6; /* remember, illegal UTF-8 */
617
+ JSONCONS_FALLTHROUGH;
618
+ case 3: ch += static_cast<uint8_t>(*data++); ch <<= 6;
619
+ JSONCONS_FALLTHROUGH;
620
+ case 2: ch += static_cast<uint8_t>(*data++); ch <<= 6;
621
+ JSONCONS_FALLTHROUGH;
622
+ case 1: ch += static_cast<uint8_t>(*data++); ch <<= 6;
623
+ JSONCONS_FALLTHROUGH;
624
+ case 0: ch += static_cast<uint8_t>(*data++);
625
+ break;
626
+ }
627
+ ch -= offsets_from_utf8[extra_bytes_to_read];
628
+
629
+ if (ch <= max_bmp) { /* Target is a character <= 0xFFFF */
630
+ /* UTF-16 surrogate values are illegal in UTF-32 */
631
+ if (is_surrogate(ch) )
632
+ {
633
+ if (flags == conv_flags::strict) {
634
+ data -= (extra_bytes_to_read+1); /* return to the illegal value itself */
635
+ result = conv_errc::source_illegal;
636
+ break;
637
+ } else {
638
+ target.push_back(replacement_char);
639
+ }
640
+ } else {
641
+ target.push_back((uint16_t)ch); /* normal case */
642
+ }
643
+ } else if (ch > max_utf16) {
644
+ if (flags == conv_flags::strict) {
645
+ result = conv_errc::source_illegal;
646
+ data -= (extra_bytes_to_read+1); /* return to the start */
647
+ break; /* Bail out; shouldn't continue */
648
+ } else {
649
+ target.push_back(replacement_char);
650
+ }
651
+ } else {
652
+ /* target is a character in range 0xFFFF - 0x10FFFF. */
653
+ ch -= half_base;
654
+ target.push_back((uint16_t)((ch >> half_shift) + sur_high_start));
655
+ target.push_back((uint16_t)((ch & half_mask) + sur_low_start));
656
+ }
657
+ }
658
+ return convert_result<CharT>{data,result} ;
659
+ }
660
+
661
+ template <class CharT,class Container>
662
+ typename std::enable_if<traits_extension::is_char8<CharT>::value
663
+ && traits_extension::is_back_insertable<Container>::value
664
+ && traits_extension::is_char32<typename Container::value_type>::value,
665
+ convert_result<CharT>>::type
666
+ convert(const CharT* data, std::size_t length,
667
+ Container& target,
668
+ conv_flags flags = conv_flags::strict)
669
+ {
670
+ conv_errc result = conv_errc();
671
+
672
+ const CharT* last = data + length;
673
+ while (data < last)
674
+ {
675
+ uint32_t ch = 0;
676
+ unsigned short extra_bytes_to_read = trailing_bytes_for_utf8[static_cast<uint8_t>(*data)];
677
+ if (extra_bytes_to_read >= last - data)
678
+ {
679
+ result = conv_errc::source_exhausted;
680
+ break;
681
+ }
682
+ /* Do this check whether lenient or strict */
683
+ if ((result=is_legal_utf8(data, extra_bytes_to_read+1)) != conv_errc())
684
+ {
685
+ break;
686
+ }
687
+ /*
688
+ * The cases all fall through. See "Note A" below.
689
+ */
690
+ switch (extra_bytes_to_read)
691
+ {
692
+ case 5:
693
+ ch += static_cast<uint8_t>(*data++);
694
+ ch <<= 6;
695
+ JSONCONS_FALLTHROUGH;
696
+ case 4:
697
+ ch += static_cast<uint8_t>(*data++);
698
+ ch <<= 6;
699
+ JSONCONS_FALLTHROUGH;
700
+ case 3:
701
+ ch += static_cast<uint8_t>(*data++);
702
+ ch <<= 6;
703
+ JSONCONS_FALLTHROUGH;
704
+ case 2:
705
+ ch += static_cast<uint8_t>(*data++);
706
+ ch <<= 6;
707
+ JSONCONS_FALLTHROUGH;
708
+ case 1:
709
+ ch += static_cast<uint8_t>(*data++);
710
+ ch <<= 6;
711
+ JSONCONS_FALLTHROUGH;
712
+ case 0:
713
+ ch += static_cast<uint8_t>(*data++);
714
+ break;
715
+ }
716
+ ch -= offsets_from_utf8[extra_bytes_to_read];
717
+
718
+ if (ch <= max_legal_utf32) {
719
+ /*
720
+ * UTF-16 surrogate values are illegal in UTF-32, and anything
721
+ * over Plane 17 (> 0x10FFFF) is illegal.
722
+ */
723
+ if (is_surrogate(ch) )
724
+ {
725
+ if (flags == conv_flags::strict) {
726
+ data -= (extra_bytes_to_read+1); /* return to the illegal value itself */
727
+ result = conv_errc::source_illegal;
728
+ break;
729
+ } else {
730
+ target.push_back(replacement_char);
731
+ }
732
+ } else {
733
+ target.push_back(ch);
734
+ }
735
+ } else { /* i.e., ch > max_legal_utf32 */
736
+ result = conv_errc::source_illegal;
737
+ target.push_back(replacement_char);
738
+ }
739
+ }
740
+ return convert_result<CharT>{data,result} ;
741
+ }
742
+
743
+ // utf16
744
+
745
+ template <class CharT,class Container>
746
+ typename std::enable_if<traits_extension::is_char16<CharT>::value
747
+ && traits_extension::is_back_insertable<Container>::value
748
+ && traits_extension::is_char8<typename Container::value_type>::value,
749
+ convert_result<CharT>>::type
750
+ convert(const CharT* data, std::size_t length,
751
+ Container& target,
752
+ conv_flags flags = conv_flags::strict) {
753
+ conv_errc result = conv_errc();
754
+
755
+ const CharT* last = data + length;
756
+ while (data < last) {
757
+ unsigned short bytes_to_write = 0;
758
+ const uint32_t byteMask = 0xBF;
759
+ const uint32_t byteMark = 0x80;
760
+ uint32_t ch = *data++;
761
+ /* If we have a surrogate pair, convert to uint32_t data. */
762
+ if (is_high_surrogate(ch))
763
+ {
764
+ /* If the 16 bits following the high surrogate are in the data buffer... */
765
+ if (data < last) {
766
+ uint32_t ch2 = *data;
767
+ /* If ptr's a low surrogate, convert to uint32_t. */
768
+ if (ch2 >= sur_low_start && ch2 <= sur_low_end) {
769
+ ch = ((ch - sur_high_start) << half_shift)
770
+ + (ch2 - sur_low_start) + half_base;
771
+ ++data;
772
+ } else if (flags == conv_flags::strict) { /* ptr's an unpaired high surrogate */
773
+ --data; /* return to the illegal value itself */
774
+ result = conv_errc::unpaired_high_surrogate;
775
+ break;
776
+ }
777
+ } else { /* We don't have the 16 bits following the high surrogate. */
778
+ --data; /* return to the high surrogate */
779
+ result = conv_errc::source_exhausted;
780
+ break;
781
+ }
782
+ } else if (flags == conv_flags::strict) {
783
+ /* UTF-16 surrogate values are illegal in UTF-32 */
784
+ if (is_low_surrogate(ch))
785
+ {
786
+ --data; /* return to the illegal value itself */
787
+ result = conv_errc::source_illegal;
788
+ break;
789
+ }
790
+ }
791
+ /* Figure out how many bytes the result will require */
792
+ if (ch < (uint32_t)0x80) {
793
+ bytes_to_write = 1;
794
+ } else if (ch < (uint32_t)0x800) {
795
+ bytes_to_write = 2;
796
+ } else if (ch < (uint32_t)0x10000) {
797
+ bytes_to_write = 3;
798
+ } else if (ch < (uint32_t)0x110000) {
799
+ bytes_to_write = 4;
800
+ } else {
801
+ bytes_to_write = 3;
802
+ ch = replacement_char;
803
+ }
804
+
805
+ uint8_t byte1 = 0;
806
+ uint8_t byte2 = 0;
807
+ uint8_t byte3 = 0;
808
+ uint8_t byte4 = 0;
809
+
810
+ switch (bytes_to_write) { // note: everything falls through
811
+ case 4: byte4 = (uint8_t)((ch | byteMark) & byteMask); ch >>= 6;
812
+ JSONCONS_FALLTHROUGH;
813
+ case 3: byte3 = (uint8_t)((ch | byteMark) & byteMask); ch >>= 6;
814
+ JSONCONS_FALLTHROUGH;
815
+ case 2: byte2 = (uint8_t)((ch | byteMark) & byteMask); ch >>= 6;
816
+ JSONCONS_FALLTHROUGH;
817
+ case 1: byte1 = (uint8_t)(ch | first_byte_mark[bytes_to_write]);
818
+ break;
819
+ }
820
+ switch (bytes_to_write)
821
+ {
822
+ case 4:
823
+ target.push_back(byte1);
824
+ target.push_back(byte2);
825
+ target.push_back(byte3);
826
+ target.push_back(byte4);
827
+ break;
828
+ case 3:
829
+ target.push_back(byte1);
830
+ target.push_back(byte2);
831
+ target.push_back(byte3);
832
+ break;
833
+ case 2:
834
+ target.push_back(byte1);
835
+ target.push_back(byte2);
836
+ break;
837
+ case 1:
838
+ target.push_back(byte1);
839
+ break;
840
+ }
841
+ }
842
+ return convert_result<CharT>{data,result} ;
843
+ }
844
+
845
+ template <class CharT,class Container>
846
+ typename std::enable_if<traits_extension::is_char16<CharT>::value
847
+ && traits_extension::is_back_insertable<Container>::value
848
+ && traits_extension::is_char16<typename Container::value_type>::value,
849
+ convert_result<CharT>>::type
850
+ convert(const CharT* data, std::size_t length,
851
+ Container& target,
852
+ conv_flags flags = conv_flags::strict)
853
+ {
854
+ conv_errc result = conv_errc();
855
+
856
+ const CharT* last = data + length;
857
+ while (data != last)
858
+ {
859
+ uint32_t ch = *data++;
860
+ /* If we have a surrogate pair, convert to uint32_t data. */
861
+ if (is_high_surrogate(ch))
862
+ {
863
+ /* If the 16 bits following the high surrogate are in the data buffer... */
864
+ if (data < last) {
865
+ uint32_t ch2 = *data;
866
+ /* If ptr's a low surrogate, */
867
+ if (ch2 >= sur_low_start && ch2 <= sur_low_end) {
868
+ target.push_back((uint16_t)ch);
869
+ target.push_back((uint16_t)ch2);
870
+ ++data;
871
+ } else if (flags == conv_flags::strict) { /* ptr's an unpaired high surrogate */
872
+ --data; /* return to the illegal value itself */
873
+ result = conv_errc::unpaired_high_surrogate;
874
+ break;
875
+ }
876
+ } else { /* We don't have the 16 bits following the high surrogate. */
877
+ --data; /* return to the high surrogate */
878
+ result = conv_errc::source_exhausted;
879
+ break;
880
+ }
881
+ } else if (is_low_surrogate(ch))
882
+ {
883
+ // illegal leading low surrogate
884
+ if (flags == conv_flags::strict) {
885
+ --data; /* return to the illegal value itself */
886
+ result = conv_errc::source_illegal;
887
+ break;
888
+ }
889
+ else
890
+ {
891
+ target.push_back((uint16_t)ch);
892
+ }
893
+ }
894
+ else
895
+ {
896
+ target.push_back((uint16_t)ch);
897
+ }
898
+ }
899
+ return convert_result<CharT>{data,result} ;
900
+ }
901
+
902
+ template <class CharT,class Container>
903
+ typename std::enable_if<traits_extension::is_char16<CharT>::value
904
+ && traits_extension::is_back_insertable<Container>::value
905
+ && traits_extension::is_char32<typename Container::value_type>::value,
906
+ convert_result<CharT>>::type
907
+ convert(const CharT* data, std::size_t length,
908
+ Container& target,
909
+ conv_flags flags = conv_flags::strict)
910
+ {
911
+ conv_errc result = conv_errc();
912
+
913
+ const CharT* last = data + length;
914
+ while (data != last)
915
+ {
916
+ uint32_t ch = *data++;
917
+ /* If we have a surrogate pair, convert to UTF32 data. */
918
+ if (is_high_surrogate(ch))
919
+ {
920
+ /* If the 16 bits following the high surrogate are in the data buffer... */
921
+ if (data < last) {
922
+ uint32_t ch2 = *data;
923
+ /* If ptr's a low surrogate, convert to UTF32. */
924
+ if (ch2 >= sur_low_start && ch2 <= sur_low_end )
925
+ {
926
+ ch = ((ch - sur_high_start) << half_shift)
927
+ + (ch2 - sur_low_start) + half_base;
928
+ ++data;
929
+ } else if (flags == conv_flags::strict) { /* ptr's an unpaired high surrogate */
930
+ --data; /* return to the illegal value itself */
931
+ result = conv_errc::source_illegal;
932
+ break;
933
+ }
934
+ } else { /* We don't have the 16 bits following the high surrogate. */
935
+ --data; /* return to the high surrogate */
936
+ result = conv_errc::source_exhausted;
937
+ break;
938
+ }
939
+ } else if (flags == conv_flags::strict) {
940
+ /* UTF-16 surrogate values are illegal in UTF-32 */
941
+ if (is_low_surrogate(ch) )
942
+ {
943
+ --data; /* return to the illegal value itself */
944
+ result = conv_errc::source_illegal;
945
+ break;
946
+ }
947
+ }
948
+ target.push_back(ch);
949
+ }
950
+ return convert_result<CharT>{data,result} ;
951
+ }
952
+
953
+ // utf32
954
+
955
+ template <class CharT,class Container>
956
+ typename std::enable_if<traits_extension::is_char32<CharT>::value
957
+ && traits_extension::is_back_insertable<Container>::value
958
+ && traits_extension::is_char8<typename Container::value_type>::value,
959
+ convert_result<CharT>>::type
960
+ convert(const CharT* data, std::size_t length,
961
+ Container& target,
962
+ conv_flags flags = conv_flags::strict)
963
+ {
964
+ conv_errc result = conv_errc();
965
+ const CharT* last = data + length;
966
+ while (data < last)
967
+ {
968
+ unsigned short bytes_to_write = 0;
969
+ const uint32_t byteMask = 0xBF;
970
+ const uint32_t byteMark = 0x80;
971
+ uint32_t ch = *data++;
972
+ if (flags == conv_flags::strict )
973
+ {
974
+ /* UTF-16 surrogate values are illegal in UTF-32 */
975
+ if (is_surrogate(ch))
976
+ {
977
+ --data; /* return to the illegal value itself */
978
+ result = conv_errc::illegal_surrogate_value;
979
+ break;
980
+ }
981
+ }
982
+ /*
983
+ * Figure out how many bytes the result will require. Turn any
984
+ * illegally large UTF32 things (> Plane 17) into replacement chars.
985
+ */
986
+ if (ch < (uint32_t)0x80) { bytes_to_write = 1;
987
+ } else if (ch < (uint32_t)0x800) { bytes_to_write = 2;
988
+ } else if (ch < (uint32_t)0x10000) { bytes_to_write = 3;
989
+ } else if (ch <= max_legal_utf32) { bytes_to_write = 4;
990
+ } else {
991
+ bytes_to_write = 3;
992
+ ch = replacement_char;
993
+ result = conv_errc::source_illegal;
994
+ }
995
+
996
+ uint8_t byte1 = 0;
997
+ uint8_t byte2 = 0;
998
+ uint8_t byte3 = 0;
999
+ uint8_t byte4 = 0;
1000
+
1001
+ switch (bytes_to_write) {
1002
+ case 4:
1003
+ byte4 = (uint8_t)((ch | byteMark) & byteMask); ch >>= 6;
1004
+ JSONCONS_FALLTHROUGH;
1005
+ case 3:
1006
+ byte3 = (uint8_t)((ch | byteMark) & byteMask); ch >>= 6;
1007
+ JSONCONS_FALLTHROUGH;
1008
+ case 2:
1009
+ byte2 = (uint8_t)((ch | byteMark) & byteMask); ch >>= 6;
1010
+ JSONCONS_FALLTHROUGH;
1011
+ case 1:
1012
+ byte1 = (uint8_t) (ch | first_byte_mark[bytes_to_write]);
1013
+ break;
1014
+ }
1015
+
1016
+ switch (bytes_to_write)
1017
+ {
1018
+ case 4:
1019
+ target.push_back(byte1);
1020
+ target.push_back(byte2);
1021
+ target.push_back(byte3);
1022
+ target.push_back(byte4);
1023
+ break;
1024
+ case 3:
1025
+ target.push_back(byte1);
1026
+ target.push_back(byte2);
1027
+ target.push_back(byte3);
1028
+ break;
1029
+ case 2:
1030
+ target.push_back(byte1);
1031
+ target.push_back(byte2);
1032
+ break;
1033
+ case 1:
1034
+ target.push_back(byte1);
1035
+ break;
1036
+ }
1037
+ }
1038
+ return convert_result<CharT>{data,result} ;
1039
+ }
1040
+
1041
+ template <class CharT,class Container>
1042
+ typename std::enable_if<traits_extension::is_char32<CharT>::value
1043
+ && traits_extension::is_back_insertable<Container>::value
1044
+ && traits_extension::is_char16<typename Container::value_type>::value,
1045
+ convert_result<CharT>>::type
1046
+ convert(const CharT* data, std::size_t length,
1047
+ Container& target,
1048
+ conv_flags flags = conv_flags::strict)
1049
+ {
1050
+ conv_errc result = conv_errc();
1051
+
1052
+ const CharT* last = data + length;
1053
+ while (data != last)
1054
+ {
1055
+ uint32_t ch = *data++;
1056
+ if (ch <= max_bmp) { /* Target is a character <= 0xFFFF */
1057
+ /* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */
1058
+ if (is_surrogate(ch) )
1059
+ {
1060
+ if (flags == conv_flags::strict) {
1061
+ --data; /* return to the illegal value itself */
1062
+ result = conv_errc::source_illegal;
1063
+ break;
1064
+ } else {
1065
+ target.push_back(replacement_char);
1066
+ }
1067
+ } else {
1068
+ target.push_back((uint16_t)ch); /* normal case */
1069
+ }
1070
+ } else if (ch > max_legal_utf32) {
1071
+ if (flags == conv_flags::strict) {
1072
+ result = conv_errc::source_illegal;
1073
+ } else {
1074
+ target.push_back(replacement_char);
1075
+ }
1076
+ } else {
1077
+ /* target is a character in range 0xFFFF - 0x10FFFF. */
1078
+ ch -= half_base;
1079
+ target.push_back((uint16_t)((ch >> half_shift) + sur_high_start));
1080
+ target.push_back((uint16_t)((ch & half_mask) + sur_low_start));
1081
+ }
1082
+ }
1083
+ return convert_result<CharT>{data,result} ;
1084
+ }
1085
+
1086
+ template <class CharT,class Container>
1087
+ typename std::enable_if<traits_extension::is_char32<CharT>::value
1088
+ && traits_extension::is_back_insertable<Container>::value
1089
+ && traits_extension::is_char32<typename Container::value_type>::value,
1090
+ convert_result<CharT>>::type
1091
+ convert(const CharT* data, std::size_t length,
1092
+ Container& target,
1093
+ conv_flags flags = conv_flags::strict)
1094
+ {
1095
+ conv_errc result = conv_errc();
1096
+
1097
+ const CharT* last = data + length;
1098
+ while (data != last)
1099
+ {
1100
+ uint32_t ch = *data++;
1101
+ if (flags == conv_flags::strict )
1102
+ {
1103
+ /* UTF-16 surrogate values are illegal in UTF-32 */
1104
+ if (is_surrogate(ch))
1105
+ {
1106
+ --data; /* return to the illegal value itself */
1107
+ result = conv_errc::illegal_surrogate_value;
1108
+ break;
1109
+ }
1110
+ }
1111
+ if (ch <= max_legal_utf32)
1112
+ {
1113
+ target.push_back(ch);
1114
+ }
1115
+ else
1116
+ {
1117
+ target.push_back(replacement_char);
1118
+ result = conv_errc::source_illegal;
1119
+ }
1120
+ }
1121
+ return convert_result<CharT>{data,result} ;
1122
+ }
1123
+
1124
+ // validate
1125
+
1126
+ template <class CharT>
1127
+ typename std::enable_if<traits_extension::is_char8<CharT>::value,
1128
+ convert_result<CharT>>::type
1129
+ validate(const CharT* data, std::size_t length) noexcept
1130
+ {
1131
+ conv_errc result = conv_errc();
1132
+ const CharT* last = data + length;
1133
+ while (data != last)
1134
+ {
1135
+ std::size_t len = static_cast<std::size_t>(trailing_bytes_for_utf8[static_cast<uint8_t>(*data)]) + 1;
1136
+ if (len > (std::size_t)(last - data))
1137
+ {
1138
+ return convert_result<CharT>{data, conv_errc::source_exhausted};
1139
+ }
1140
+ if ((result=is_legal_utf8(data, len)) != conv_errc())
1141
+ {
1142
+ return convert_result<CharT>{data,result} ;
1143
+ }
1144
+ data += len;
1145
+ }
1146
+ return convert_result<CharT>{data,result} ;
1147
+ }
1148
+
1149
+ // utf16
1150
+
1151
+ template <class CharT>
1152
+ typename std::enable_if<traits_extension::is_char16<CharT>::value,
1153
+ convert_result<CharT>>::type
1154
+ validate(const CharT* data, std::size_t length) noexcept
1155
+ {
1156
+ conv_errc result = conv_errc();
1157
+
1158
+ const CharT* last = data + length;
1159
+ while (data != last)
1160
+ {
1161
+ uint32_t ch = *data++;
1162
+ /* If we have a surrogate pair, validate to uint32_t data. */
1163
+ if (is_high_surrogate(ch))
1164
+ {
1165
+ /* If the 16 bits following the high surrogate are in the data buffer... */
1166
+ if (data < last) {
1167
+ uint32_t ch2 = *data;
1168
+ /* If ptr's a low surrogate, */
1169
+ if (ch2 >= sur_low_start && ch2 <= sur_low_end) {
1170
+ ++data;
1171
+ } else {
1172
+ --data; /* return to the illegal value itself */
1173
+ result = conv_errc::unpaired_high_surrogate;
1174
+ break;
1175
+ }
1176
+ }
1177
+ else // We don't have the 16 bits following the high surrogate.
1178
+ {
1179
+ --data; /* return to the high surrogate */
1180
+ result = conv_errc::source_exhausted;
1181
+ break;
1182
+ }
1183
+ }
1184
+ else if (is_low_surrogate(ch))
1185
+ {
1186
+ /* UTF-16 surrogate values are illegal in UTF-32 */
1187
+ --data; /* return to the illegal value itself */
1188
+ result = conv_errc::source_illegal;
1189
+ break;
1190
+ }
1191
+ }
1192
+ return convert_result<CharT>{data,result} ;
1193
+ }
1194
+
1195
+ // utf32
1196
+
1197
+ template <class CharT>
1198
+ typename std::enable_if<traits_extension::is_char32<CharT>::value,
1199
+ convert_result<CharT>>::type
1200
+ validate(const CharT* data, std::size_t length) noexcept
1201
+ {
1202
+ conv_errc result = conv_errc();
1203
+
1204
+ const CharT* last = data + length;
1205
+ while (data != last)
1206
+ {
1207
+ uint32_t ch = *data++;
1208
+ /* UTF-16 surrogate values are illegal in UTF-32 */
1209
+ if (is_surrogate(ch))
1210
+ {
1211
+ --data; /* return to the illegal value itself */
1212
+ result = conv_errc::illegal_surrogate_value;
1213
+ break;
1214
+ }
1215
+ if (!(ch <= max_legal_utf32))
1216
+ {
1217
+ result = conv_errc::source_illegal;
1218
+ }
1219
+ }
1220
+ return convert_result<CharT>{data, result} ;
1221
+ }
1222
+
1223
+ enum class encoding {u8,u16le,u16be,u32le,u32be,undetected};
1224
+
1225
+ template <class Iterator>
1226
+ struct determine_encoding_result
1227
+ {
1228
+ Iterator it;
1229
+ encoding ec;
1230
+ };
1231
+
1232
+ template <class Iterator>
1233
+ typename std::enable_if<std::is_integral<typename std::iterator_traits<Iterator>::value_type>::value && sizeof(typename std::iterator_traits<Iterator>::value_type) == sizeof(uint8_t),
1234
+ determine_encoding_result<Iterator>>::type
1235
+ detect_encoding(Iterator first, Iterator last) noexcept
1236
+ {
1237
+ Iterator it1 = first;
1238
+ if (std::distance(first,last) < 4)
1239
+ {
1240
+ if (std::distance(first,last) == 3)
1241
+ {
1242
+ Iterator it2 = ++first;
1243
+ Iterator it3 = ++first;
1244
+ if (static_cast<uint8_t>(*it1) == 0xEF && static_cast<uint8_t>(*it2) == 0xBB && static_cast<uint8_t>(*it3) == 0xBF)
1245
+ {
1246
+ return determine_encoding_result<Iterator>{last,encoding::u8};
1247
+ }
1248
+ }
1249
+ return determine_encoding_result<Iterator>{it1,encoding::undetected};
1250
+ }
1251
+ else
1252
+ {
1253
+ Iterator it2 = ++first;
1254
+ Iterator it3 = ++first;
1255
+ Iterator it4 = ++first;
1256
+
1257
+ uint32_t bom = static_cast<uint8_t>(*it1) | (static_cast<uint8_t>(*it2) << 8) | (static_cast<uint8_t>(*it3) << 16) | (static_cast<uint8_t>(*it4) << 24);
1258
+ if (bom == 0xFFFE0000)
1259
+ {
1260
+ return determine_encoding_result<Iterator>{it4++,encoding::u32be};
1261
+ }
1262
+ else if (bom == 0x0000FEFF)
1263
+ {
1264
+ return determine_encoding_result<Iterator>{first,encoding::u32le};
1265
+ }
1266
+ else if ((bom & 0xFFFF) == 0xFFFE)
1267
+ {
1268
+ return determine_encoding_result<Iterator>{it3,encoding::u16be};
1269
+ }
1270
+ else if ((bom & 0xFFFF) == 0xFEFF)
1271
+ {
1272
+ return determine_encoding_result<Iterator>{it3,encoding::u16le};
1273
+ }
1274
+ else if ((bom & 0xFFFFFF) == 0xBFBBEF)
1275
+ {
1276
+ return determine_encoding_result<Iterator>{it4,encoding::u8};
1277
+ }
1278
+ else
1279
+ {
1280
+ uint32_t pattern = (static_cast<uint8_t>(*it1) ? 1 : 0) | (static_cast<uint8_t>(*it2) ? 2 : 0) | (static_cast<uint8_t>(*it3) ? 4 : 0) | (static_cast<uint8_t>(*it4) ? 8 : 0);
1281
+ switch (pattern) {
1282
+ case 0x08:
1283
+ return determine_encoding_result<Iterator>{it1,encoding::u32be};
1284
+ case 0x0A:
1285
+ return determine_encoding_result<Iterator>{it1,encoding::u16be};
1286
+ case 0x01:
1287
+ return determine_encoding_result<Iterator>{it1,encoding::u32le};
1288
+ case 0x05:
1289
+ return determine_encoding_result<Iterator>{it1,encoding::u16le};
1290
+ case 0x0F:
1291
+ return determine_encoding_result<Iterator>{it1,encoding::u8};
1292
+ default:
1293
+ return determine_encoding_result<Iterator>{it1,encoding::undetected};
1294
+ }
1295
+ }
1296
+ }
1297
+ }
1298
+
1299
+ // count_codepoints
1300
+
1301
+ template <class CharT>
1302
+ typename std::enable_if<traits_extension::is_char8<CharT>::value || traits_extension::is_char16<CharT>::value || traits_extension::is_char32<CharT>::value, std::size_t>::type
1303
+ count_codepoints(const CharT* data, std::size_t length,
1304
+ conv_flags flags = conv_flags::strict) noexcept
1305
+ {
1306
+ conv_errc ec = conv_errc();
1307
+
1308
+ std::size_t count = 0;
1309
+ const CharT* ptr = data;
1310
+ const CharT* last = data + length;
1311
+
1312
+ for (; ptr < last; ++count)
1313
+ {
1314
+ uint32_t cp = 0;
1315
+ auto r = to_codepoint(ptr, last, cp, flags);
1316
+ if (r.ec != conv_errc())
1317
+ {
1318
+ ec = r.ec;
1319
+ break;
1320
+ }
1321
+ ptr = r.ptr;
1322
+ }
1323
+ return ec == conv_errc() && ptr == last ? count : 0;
1324
+ }
1325
+
1326
+ } // unicode_traits
1327
+ } // jsoncons
1328
+
1329
+ #endif
1330
+