jsoncons 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. checksums.yaml +7 -0
  2. data/ext/jsoncons/extconf.rb +43 -0
  3. data/ext/jsoncons/jsoncons.cpp +161 -0
  4. data/ext/jsoncons/jsoncons.h +10 -0
  5. data/jsoncons.gemspec +44 -0
  6. data/lib/jsoncons/jsoncons/examples/input/address-book.json +13 -0
  7. data/lib/jsoncons/jsoncons/examples/input/books.json +28 -0
  8. data/lib/jsoncons/jsoncons/examples/input/countries.json +7 -0
  9. data/lib/jsoncons/jsoncons/examples/input/employees.json +30 -0
  10. data/lib/jsoncons/jsoncons/examples/input/jsonschema/name.json +15 -0
  11. data/lib/jsoncons/jsoncons/examples/input/multiple-json-objects.json +3 -0
  12. data/lib/jsoncons/jsoncons/examples/input/sales.csv +6 -0
  13. data/lib/jsoncons/jsoncons/examples/input/store.json +28 -0
  14. data/lib/jsoncons/jsoncons/examples/input/tasks.csv +6 -0
  15. data/lib/jsoncons/jsoncons/include/jsoncons/allocator_holder.hpp +38 -0
  16. data/lib/jsoncons/jsoncons/include/jsoncons/basic_json.hpp +5905 -0
  17. data/lib/jsoncons/jsoncons/include/jsoncons/bigint.hpp +1611 -0
  18. data/lib/jsoncons/jsoncons/include/jsoncons/byte_string.hpp +820 -0
  19. data/lib/jsoncons/jsoncons/include/jsoncons/config/binary_config.hpp +226 -0
  20. data/lib/jsoncons/jsoncons/include/jsoncons/config/compiler_support.hpp +375 -0
  21. data/lib/jsoncons/jsoncons/include/jsoncons/config/jsoncons_config.hpp +309 -0
  22. data/lib/jsoncons/jsoncons/include/jsoncons/config/version.hpp +40 -0
  23. data/lib/jsoncons/jsoncons/include/jsoncons/conv_error.hpp +218 -0
  24. data/lib/jsoncons/jsoncons/include/jsoncons/decode_json.hpp +209 -0
  25. data/lib/jsoncons/jsoncons/include/jsoncons/decode_traits.hpp +651 -0
  26. data/lib/jsoncons/jsoncons/include/jsoncons/detail/endian.hpp +44 -0
  27. data/lib/jsoncons/jsoncons/include/jsoncons/detail/grisu3.hpp +312 -0
  28. data/lib/jsoncons/jsoncons/include/jsoncons/detail/optional.hpp +483 -0
  29. data/lib/jsoncons/jsoncons/include/jsoncons/detail/parse_number.hpp +1133 -0
  30. data/lib/jsoncons/jsoncons/include/jsoncons/detail/span.hpp +188 -0
  31. data/lib/jsoncons/jsoncons/include/jsoncons/detail/string_view.hpp +537 -0
  32. data/lib/jsoncons/jsoncons/include/jsoncons/detail/string_wrapper.hpp +370 -0
  33. data/lib/jsoncons/jsoncons/include/jsoncons/detail/write_number.hpp +567 -0
  34. data/lib/jsoncons/jsoncons/include/jsoncons/encode_json.hpp +315 -0
  35. data/lib/jsoncons/jsoncons/include/jsoncons/encode_traits.hpp +378 -0
  36. data/lib/jsoncons/jsoncons/include/jsoncons/json.hpp +18 -0
  37. data/lib/jsoncons/jsoncons/include/jsoncons/json_array.hpp +324 -0
  38. data/lib/jsoncons/jsoncons/include/jsoncons/json_content_handler.hpp +12 -0
  39. data/lib/jsoncons/jsoncons/include/jsoncons/json_cursor.hpp +448 -0
  40. data/lib/jsoncons/jsoncons/include/jsoncons/json_decoder.hpp +420 -0
  41. data/lib/jsoncons/jsoncons/include/jsoncons/json_encoder.hpp +1587 -0
  42. data/lib/jsoncons/jsoncons/include/jsoncons/json_error.hpp +156 -0
  43. data/lib/jsoncons/jsoncons/include/jsoncons/json_exception.hpp +241 -0
  44. data/lib/jsoncons/jsoncons/include/jsoncons/json_filter.hpp +653 -0
  45. data/lib/jsoncons/jsoncons/include/jsoncons/json_fwd.hpp +23 -0
  46. data/lib/jsoncons/jsoncons/include/jsoncons/json_object.hpp +1772 -0
  47. data/lib/jsoncons/jsoncons/include/jsoncons/json_options.hpp +862 -0
  48. data/lib/jsoncons/jsoncons/include/jsoncons/json_parser.hpp +2900 -0
  49. data/lib/jsoncons/jsoncons/include/jsoncons/json_reader.hpp +731 -0
  50. data/lib/jsoncons/jsoncons/include/jsoncons/json_traits_macros.hpp +1072 -0
  51. data/lib/jsoncons/jsoncons/include/jsoncons/json_traits_macros_deprecated.hpp +144 -0
  52. data/lib/jsoncons/jsoncons/include/jsoncons/json_type.hpp +206 -0
  53. data/lib/jsoncons/jsoncons/include/jsoncons/json_type_traits.hpp +1830 -0
  54. data/lib/jsoncons/jsoncons/include/jsoncons/json_visitor.hpp +1560 -0
  55. data/lib/jsoncons/jsoncons/include/jsoncons/json_visitor2.hpp +2079 -0
  56. data/lib/jsoncons/jsoncons/include/jsoncons/pretty_print.hpp +89 -0
  57. data/lib/jsoncons/jsoncons/include/jsoncons/ser_context.hpp +62 -0
  58. data/lib/jsoncons/jsoncons/include/jsoncons/sink.hpp +289 -0
  59. data/lib/jsoncons/jsoncons/include/jsoncons/source.hpp +777 -0
  60. data/lib/jsoncons/jsoncons/include/jsoncons/source_adaptor.hpp +148 -0
  61. data/lib/jsoncons/jsoncons/include/jsoncons/staj2_cursor.hpp +1189 -0
  62. data/lib/jsoncons/jsoncons/include/jsoncons/staj_cursor.hpp +1254 -0
  63. data/lib/jsoncons/jsoncons/include/jsoncons/staj_iterator.hpp +449 -0
  64. data/lib/jsoncons/jsoncons/include/jsoncons/tag_type.hpp +245 -0
  65. data/lib/jsoncons/jsoncons/include/jsoncons/text_source_adaptor.hpp +144 -0
  66. data/lib/jsoncons/jsoncons/include/jsoncons/traits_extension.hpp +884 -0
  67. data/lib/jsoncons/jsoncons/include/jsoncons/typed_array_view.hpp +250 -0
  68. data/lib/jsoncons/jsoncons/include/jsoncons/unicode_traits.hpp +1330 -0
  69. data/lib/jsoncons/jsoncons/include/jsoncons/uri.hpp +635 -0
  70. data/lib/jsoncons/jsoncons/include/jsoncons/value_converter.hpp +340 -0
  71. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson.hpp +23 -0
  72. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson_cursor.hpp +320 -0
  73. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson_decimal128.hpp +865 -0
  74. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson_encoder.hpp +585 -0
  75. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson_error.hpp +103 -0
  76. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson_oid.hpp +245 -0
  77. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson_options.hpp +75 -0
  78. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson_parser.hpp +645 -0
  79. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson_reader.hpp +92 -0
  80. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson_type.hpp +44 -0
  81. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/decode_bson.hpp +201 -0
  82. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/encode_bson.hpp +144 -0
  83. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/cbor.hpp +26 -0
  84. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/cbor_cursor.hpp +351 -0
  85. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/cbor_cursor2.hpp +265 -0
  86. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/cbor_detail.hpp +93 -0
  87. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/cbor_encoder.hpp +1766 -0
  88. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/cbor_error.hpp +105 -0
  89. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/cbor_options.hpp +113 -0
  90. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/cbor_parser.hpp +1942 -0
  91. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/cbor_reader.hpp +116 -0
  92. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/decode_cbor.hpp +203 -0
  93. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/encode_cbor.hpp +151 -0
  94. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/csv.hpp +17 -0
  95. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/csv_cursor.hpp +358 -0
  96. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/csv_encoder.hpp +954 -0
  97. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/csv_error.hpp +85 -0
  98. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/csv_options.hpp +973 -0
  99. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/csv_parser.hpp +2099 -0
  100. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/csv_reader.hpp +348 -0
  101. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/csv_serializer.hpp +12 -0
  102. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/decode_csv.hpp +208 -0
  103. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/encode_csv.hpp +122 -0
  104. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jmespath/jmespath.hpp +5215 -0
  105. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jmespath/jmespath_error.hpp +215 -0
  106. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpatch/jsonpatch.hpp +579 -0
  107. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpatch/jsonpatch_error.hpp +121 -0
  108. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpath/expression.hpp +3329 -0
  109. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpath/flatten.hpp +432 -0
  110. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpath/json_location.hpp +445 -0
  111. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpath/json_query.hpp +115 -0
  112. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpath/jsonpath.hpp +13 -0
  113. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpath/jsonpath_error.hpp +240 -0
  114. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpath/jsonpath_expression.hpp +2612 -0
  115. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpath/jsonpath_selector.hpp +1322 -0
  116. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpointer/jsonpointer.hpp +1577 -0
  117. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpointer/jsonpointer_error.hpp +119 -0
  118. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/format_validator.hpp +968 -0
  119. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/json_validator.hpp +120 -0
  120. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/jsonschema.hpp +13 -0
  121. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/jsonschema_error.hpp +105 -0
  122. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/jsonschema_version.hpp +18 -0
  123. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/keyword_validator.hpp +1745 -0
  124. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/keyword_validator_factory.hpp +556 -0
  125. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/schema_draft7.hpp +198 -0
  126. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/schema_location.hpp +200 -0
  127. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/schema_version.hpp +35 -0
  128. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/subschema.hpp +144 -0
  129. data/lib/jsoncons/jsoncons/include/jsoncons_ext/mergepatch/mergepatch.hpp +103 -0
  130. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/decode_msgpack.hpp +202 -0
  131. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/encode_msgpack.hpp +142 -0
  132. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/msgpack.hpp +24 -0
  133. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/msgpack_cursor.hpp +343 -0
  134. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/msgpack_cursor2.hpp +259 -0
  135. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/msgpack_encoder.hpp +753 -0
  136. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/msgpack_error.hpp +94 -0
  137. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/msgpack_options.hpp +74 -0
  138. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/msgpack_parser.hpp +748 -0
  139. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/msgpack_reader.hpp +116 -0
  140. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/msgpack_type.hpp +63 -0
  141. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/decode_ubjson.hpp +201 -0
  142. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/encode_ubjson.hpp +142 -0
  143. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/ubjson.hpp +23 -0
  144. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/ubjson_cursor.hpp +307 -0
  145. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/ubjson_encoder.hpp +502 -0
  146. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/ubjson_error.hpp +100 -0
  147. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/ubjson_options.hpp +87 -0
  148. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/ubjson_parser.hpp +880 -0
  149. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/ubjson_reader.hpp +92 -0
  150. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/ubjson_type.hpp +43 -0
  151. data/lib/jsoncons/version.rb +5 -0
  152. data/lib/jsoncons.rb +33 -0
  153. data/test/jsoncons_test.rb +108 -0
  154. data/test/test_helper.rb +7 -0
  155. metadata +268 -0
@@ -0,0 +1,2099 @@
1
+ // Copyright 2015 Daniel Parker
2
+ // Distributed under the Boost license, Version 1.0.
3
+ // (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
4
+
5
+ // See https://github.com/danielaparker/jsoncons for latest version
6
+
7
+ #ifndef JSONCONS_CSV_CSV_PARSER_HPP
8
+ #define JSONCONS_CSV_CSV_PARSER_HPP
9
+
10
+ #include <memory> // std::allocator
11
+ #include <string>
12
+ #include <sstream>
13
+ #include <vector>
14
+ #include <stdexcept>
15
+ #include <system_error>
16
+ #include <cctype>
17
+ #include <jsoncons/json_exception.hpp>
18
+ #include <jsoncons/json_visitor.hpp>
19
+ #include <jsoncons/json_reader.hpp>
20
+ #include <jsoncons/json_filter.hpp>
21
+ #include <jsoncons/json.hpp>
22
+ #include <jsoncons/detail/parse_number.hpp>
23
+ #include <jsoncons_ext/csv/csv_error.hpp>
24
+ #include <jsoncons_ext/csv/csv_options.hpp>
25
+
26
+ namespace jsoncons { namespace csv {
27
+
28
+ enum class csv_mode
29
+ {
30
+ initial,
31
+ header,
32
+ data,
33
+ subfields
34
+ };
35
+
36
+ enum class csv_parse_state
37
+ {
38
+ start,
39
+ cr,
40
+ column_labels,
41
+ expect_comment_or_record,
42
+ expect_record,
43
+ end_record,
44
+ no_more_records,
45
+ comment,
46
+ between_values,
47
+ quoted_string,
48
+ unquoted_string,
49
+ before_unquoted_string,
50
+ escaped_value,
51
+ minus,
52
+ zero,
53
+ integer,
54
+ fraction,
55
+ exp1,
56
+ exp2,
57
+ exp3,
58
+ accept,
59
+ before_unquoted_field,
60
+ before_unquoted_field_tail,
61
+ before_unquoted_field_tail1,
62
+ before_last_unquoted_field,
63
+ before_last_unquoted_field_tail,
64
+ before_unquoted_subfield,
65
+ before_unquoted_subfield_tail,
66
+ before_quoted_subfield,
67
+ before_quoted_subfield_tail,
68
+ before_quoted_field,
69
+ before_quoted_field_tail,
70
+ before_last_quoted_field,
71
+ before_last_quoted_field_tail,
72
+ done
73
+ };
74
+
75
+ enum class cached_state
76
+ {
77
+ begin_object,
78
+ end_object,
79
+ begin_array,
80
+ end_array,
81
+ name,
82
+ item,
83
+ done
84
+ };
85
+
86
+ struct default_csv_parsing
87
+ {
88
+ bool operator()(csv_errc, const ser_context&) noexcept
89
+ {
90
+ return false;
91
+ }
92
+ };
93
+
94
+ namespace detail {
95
+
96
+ template <class CharT,class TempAllocator>
97
+ class parse_event
98
+ {
99
+ using temp_allocator_type = TempAllocator;
100
+ using string_view_type = typename basic_json_visitor<CharT>::string_view_type;
101
+ using char_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<CharT>;
102
+ using byte_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<uint8_t>;
103
+ using string_type = std::basic_string<CharT,std::char_traits<CharT>,char_allocator_type>;
104
+ using byte_string_type = basic_byte_string<byte_allocator_type>;
105
+
106
+ staj_event_type event_type;
107
+ string_type string_value;
108
+ byte_string_type byte_string_value;
109
+ union
110
+ {
111
+ bool bool_value;
112
+ int64_t int64_value;
113
+ uint64_t uint64_value;
114
+ double double_value;
115
+ };
116
+ semantic_tag tag;
117
+ public:
118
+ parse_event(staj_event_type event_type, semantic_tag tag, const TempAllocator& alloc)
119
+ : event_type(event_type),
120
+ string_value(alloc),
121
+ byte_string_value(alloc),
122
+ tag(tag)
123
+ {
124
+ }
125
+
126
+ parse_event(const string_view_type& value, semantic_tag tag, const TempAllocator& alloc)
127
+ : event_type(staj_event_type::string_value),
128
+ string_value(value.data(),value.length(),alloc),
129
+ byte_string_value(alloc),
130
+ tag(tag)
131
+ {
132
+ }
133
+
134
+ parse_event(const byte_string_view& value, semantic_tag tag, const TempAllocator& alloc)
135
+ : event_type(staj_event_type::byte_string_value),
136
+ string_value(alloc),
137
+ byte_string_value(value.data(),value.size(),alloc),
138
+ tag(tag)
139
+ {
140
+ }
141
+
142
+ parse_event(bool value, semantic_tag tag, const TempAllocator& alloc)
143
+ : event_type(staj_event_type::bool_value),
144
+ string_value(alloc),
145
+ byte_string_value(alloc),
146
+ bool_value(value),
147
+ tag(tag)
148
+ {
149
+ }
150
+
151
+ parse_event(int64_t value, semantic_tag tag, const TempAllocator& alloc)
152
+ : event_type(staj_event_type::int64_value),
153
+ string_value(alloc),
154
+ byte_string_value(alloc),
155
+ int64_value(value),
156
+ tag(tag)
157
+ {
158
+ }
159
+
160
+ parse_event(uint64_t value, semantic_tag tag, const TempAllocator& alloc)
161
+ : event_type(staj_event_type::uint64_value),
162
+ string_value(alloc),
163
+ byte_string_value(alloc),
164
+ uint64_value(value),
165
+ tag(tag)
166
+ {
167
+ }
168
+
169
+ parse_event(double value, semantic_tag tag, const TempAllocator& alloc)
170
+ : event_type(staj_event_type::double_value),
171
+ string_value(alloc),
172
+ byte_string_value(alloc),
173
+ double_value(value),
174
+ tag(tag)
175
+ {
176
+ }
177
+
178
+ parse_event(const parse_event&) = default;
179
+ parse_event(parse_event&&) = default;
180
+ parse_event& operator=(const parse_event&) = default;
181
+ parse_event& operator=(parse_event&&) = default;
182
+
183
+ bool replay(basic_json_visitor<CharT>& visitor) const
184
+ {
185
+ switch (event_type)
186
+ {
187
+ case staj_event_type::begin_array:
188
+ return visitor.begin_array(tag, ser_context());
189
+ case staj_event_type::end_array:
190
+ return visitor.end_array(ser_context());
191
+ case staj_event_type::string_value:
192
+ return visitor.string_value(string_value, tag, ser_context());
193
+ case staj_event_type::byte_string_value:
194
+ case staj_event_type::null_value:
195
+ return visitor.null_value(tag, ser_context());
196
+ case staj_event_type::bool_value:
197
+ return visitor.bool_value(bool_value, tag, ser_context());
198
+ case staj_event_type::int64_value:
199
+ return visitor.int64_value(int64_value, tag, ser_context());
200
+ case staj_event_type::uint64_value:
201
+ return visitor.uint64_value(uint64_value, tag, ser_context());
202
+ case staj_event_type::double_value:
203
+ return visitor.double_value(double_value, tag, ser_context());
204
+ default:
205
+ return false;
206
+ }
207
+ }
208
+ };
209
+
210
+ template <class CharT, class TempAllocator>
211
+ class m_columns_filter : public basic_json_visitor<CharT>
212
+ {
213
+ public:
214
+ using string_view_type = typename basic_json_visitor<CharT>::string_view_type;
215
+ using char_type = CharT;
216
+ using temp_allocator_type = TempAllocator;
217
+
218
+ using char_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<CharT>;
219
+ using string_type = std::basic_string<CharT,std::char_traits<CharT>,char_allocator_type>;
220
+
221
+ using string_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<string_type>;
222
+ using parse_event_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<parse_event<CharT,TempAllocator>>;
223
+ using parse_event_vector_type = std::vector<parse_event<CharT,TempAllocator>, parse_event_allocator_type>;
224
+ using parse_event_vector_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<parse_event_vector_type>;
225
+ private:
226
+ TempAllocator alloc_;
227
+ std::size_t name_index_;
228
+ int level_;
229
+ cached_state state_;
230
+ std::size_t column_index_;
231
+ std::size_t row_index_;
232
+
233
+ std::vector<string_type, string_allocator_type> column_names_;
234
+ std::vector<parse_event_vector_type,parse_event_vector_allocator_type> cached_events_;
235
+ public:
236
+
237
+ m_columns_filter(const TempAllocator& alloc)
238
+ : alloc_(alloc),
239
+ name_index_(0),
240
+ level_(0),
241
+ state_(cached_state::begin_object),
242
+ column_index_(0),
243
+ row_index_(0),
244
+ column_names_(alloc),
245
+ cached_events_(alloc)
246
+ {
247
+ }
248
+
249
+ void reset()
250
+ {
251
+ name_index_ = 0;
252
+ level_ = 0;
253
+ state_ = cached_state::begin_object;
254
+ column_index_ = 0;
255
+ row_index_ = 0;
256
+ column_names_.clear();
257
+ cached_events_.clear();
258
+ }
259
+
260
+ bool done() const
261
+ {
262
+ return state_ == cached_state::done;
263
+ }
264
+
265
+ void initialize(const std::vector<string_type, string_allocator_type>& column_names)
266
+ {
267
+ for (const auto& name : column_names)
268
+ {
269
+ column_names_.push_back(name);
270
+ cached_events_.emplace_back(alloc_);
271
+ }
272
+ name_index_ = 0;
273
+ level_ = 0;
274
+ column_index_ = 0;
275
+ row_index_ = 0;
276
+ state_ = cached_state::begin_object;
277
+ }
278
+
279
+ void skip_column()
280
+ {
281
+ ++name_index_;
282
+ }
283
+
284
+ bool replay_parse_events(basic_json_visitor<CharT>& visitor)
285
+ {
286
+ bool more = true;
287
+ while (more)
288
+ {
289
+ switch (state_)
290
+ {
291
+ case cached_state::begin_object:
292
+ more = visitor.begin_object(semantic_tag::none, ser_context());
293
+ column_index_ = 0;
294
+ state_ = cached_state::name;
295
+ break;
296
+ case cached_state::end_object:
297
+ more = visitor.end_object(ser_context());
298
+ state_ = cached_state::done;
299
+ break;
300
+ case cached_state::name:
301
+ if (column_index_ < column_names_.size())
302
+ {
303
+ more = visitor.key(column_names_[column_index_], ser_context());
304
+ state_ = cached_state::begin_array;
305
+ }
306
+ else
307
+ {
308
+ state_ = cached_state::end_object;
309
+ }
310
+ break;
311
+ case cached_state::begin_array:
312
+ more = visitor.begin_array(semantic_tag::none, ser_context());
313
+ row_index_ = 0;
314
+ state_ = cached_state::item;
315
+ break;
316
+ case cached_state::end_array:
317
+ more = visitor.end_array(ser_context());
318
+ ++column_index_;
319
+ state_ = cached_state::name;
320
+ break;
321
+ case cached_state::item:
322
+ if (row_index_ < cached_events_[column_index_].size())
323
+ {
324
+ more = cached_events_[column_index_][row_index_].replay(visitor);
325
+ ++row_index_;
326
+ }
327
+ else
328
+ {
329
+ state_ = cached_state::end_array;
330
+ }
331
+ break;
332
+ default:
333
+ more = false;
334
+ break;
335
+ }
336
+ }
337
+ return more;
338
+ }
339
+
340
+ void visit_flush() override
341
+ {
342
+ }
343
+
344
+ bool visit_begin_object(semantic_tag, const ser_context&, std::error_code& ec) override
345
+ {
346
+ ec = csv_errc::invalid_parse_state;
347
+ return false;
348
+ }
349
+
350
+ bool visit_end_object(const ser_context&, std::error_code& ec) override
351
+ {
352
+ ec = csv_errc::invalid_parse_state;
353
+ return false;
354
+ }
355
+
356
+ bool visit_begin_array(semantic_tag tag, const ser_context&, std::error_code&) override
357
+ {
358
+ if (name_index_ < column_names_.size())
359
+ {
360
+ cached_events_[name_index_].emplace_back(staj_event_type::begin_array, tag, alloc_);
361
+
362
+ ++level_;
363
+ }
364
+ return true;
365
+ }
366
+
367
+ bool visit_end_array(const ser_context&, std::error_code&) override
368
+ {
369
+ if (level_ > 0)
370
+ {
371
+ cached_events_[name_index_].emplace_back(staj_event_type::end_array, semantic_tag::none, alloc_);
372
+ ++name_index_;
373
+ --level_;
374
+ }
375
+ else
376
+ {
377
+ name_index_ = 0;
378
+ }
379
+ return true;
380
+ }
381
+
382
+ bool visit_key(const string_view_type&, const ser_context&, std::error_code& ec) override
383
+ {
384
+ ec = csv_errc::invalid_parse_state;
385
+ return false;
386
+ }
387
+
388
+ bool visit_null(semantic_tag tag, const ser_context&, std::error_code&) override
389
+ {
390
+ if (name_index_ < column_names_.size())
391
+ {
392
+ cached_events_[name_index_].emplace_back(staj_event_type::null_value, tag, alloc_);
393
+ if (level_ == 0)
394
+ {
395
+ ++name_index_;
396
+ }
397
+ }
398
+ return true;
399
+ }
400
+
401
+ bool visit_string(const string_view_type& value, semantic_tag tag, const ser_context&, std::error_code&) override
402
+ {
403
+ if (name_index_ < column_names_.size())
404
+ {
405
+ cached_events_[name_index_].emplace_back(value, tag, alloc_);
406
+
407
+ if (level_ == 0)
408
+ {
409
+ ++name_index_;
410
+ }
411
+ }
412
+ return true;
413
+ }
414
+
415
+ bool visit_byte_string(const byte_string_view& value,
416
+ semantic_tag tag,
417
+ const ser_context&,
418
+ std::error_code&) override
419
+ {
420
+ if (name_index_ < column_names_.size())
421
+ {
422
+ cached_events_[name_index_].emplace_back(value, tag, alloc_);
423
+ if (level_ == 0)
424
+ {
425
+ ++name_index_;
426
+ }
427
+ }
428
+ return true;
429
+ }
430
+
431
+ bool visit_double(double value,
432
+ semantic_tag tag,
433
+ const ser_context&,
434
+ std::error_code&) override
435
+ {
436
+ if (name_index_ < column_names_.size())
437
+ {
438
+ cached_events_[name_index_].emplace_back(value, tag, alloc_);
439
+ if (level_ == 0)
440
+ {
441
+ ++name_index_;
442
+ }
443
+ }
444
+ return true;
445
+ }
446
+
447
+ bool visit_int64(int64_t value,
448
+ semantic_tag tag,
449
+ const ser_context&,
450
+ std::error_code&) override
451
+ {
452
+ if (name_index_ < column_names_.size())
453
+ {
454
+ cached_events_[name_index_].emplace_back(value, tag, alloc_);
455
+ if (level_ == 0)
456
+ {
457
+ ++name_index_;
458
+ }
459
+ }
460
+ return true;
461
+ }
462
+
463
+ bool visit_uint64(uint64_t value,
464
+ semantic_tag tag,
465
+ const ser_context&,
466
+ std::error_code&) override
467
+ {
468
+ if (name_index_ < column_names_.size())
469
+ {
470
+ cached_events_[name_index_].emplace_back(value, tag, alloc_);
471
+ if (level_ == 0)
472
+ {
473
+ ++name_index_;
474
+ }
475
+ }
476
+ return true;
477
+ }
478
+
479
+ bool visit_bool(bool value, semantic_tag tag, const ser_context&, std::error_code&) override
480
+ {
481
+ if (name_index_ < column_names_.size())
482
+ {
483
+ cached_events_[name_index_].emplace_back(value, tag, alloc_);
484
+ if (level_ == 0)
485
+ {
486
+ ++name_index_;
487
+ }
488
+ }
489
+ return true;
490
+ }
491
+ };
492
+
493
+ } // namespace detail
494
+
495
+ template<class CharT,class TempAllocator=std::allocator<char>>
496
+ class basic_csv_parser : public ser_context
497
+ {
498
+ public:
499
+ using string_view_type = jsoncons::basic_string_view<CharT>;
500
+ using char_type = CharT;
501
+ private:
502
+ struct string_maps_to_double
503
+ {
504
+ string_view_type s;
505
+
506
+ bool operator()(const std::pair<string_view_type,double>& val) const
507
+ {
508
+ return val.first == s;
509
+ }
510
+ };
511
+
512
+ using temp_allocator_type = TempAllocator;
513
+ typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<CharT> char_allocator_type;
514
+ using string_type = std::basic_string<CharT,std::char_traits<CharT>,char_allocator_type>;
515
+ typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<string_type> string_allocator_type;
516
+ typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<csv_mode> csv_mode_allocator_type;
517
+ typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<csv_type_info> csv_type_info_allocator_type;
518
+ typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<std::vector<string_type,string_allocator_type>> string_vector_allocator_type;
519
+ typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<csv_parse_state> csv_parse_state_allocator_type;
520
+
521
+ static constexpr int default_depth = 3;
522
+
523
+ temp_allocator_type alloc_;
524
+ csv_parse_state state_;
525
+ basic_json_visitor<CharT>* visitor_;
526
+ std::function<bool(csv_errc,const ser_context&)> err_handler_;
527
+ std::size_t column_;
528
+ std::size_t line_;
529
+ int depth_;
530
+ const basic_csv_decode_options<CharT> options_;
531
+ std::size_t column_index_;
532
+ std::size_t level_;
533
+ std::size_t offset_;
534
+ jsoncons::detail::chars_to to_double_;
535
+ const CharT* begin_input_;
536
+ const CharT* input_end_;
537
+ const CharT* input_ptr_;
538
+ bool more_;
539
+ std::size_t header_line_;
540
+
541
+ detail::m_columns_filter<CharT,TempAllocator> m_columns_filter_;
542
+ std::vector<csv_mode,csv_mode_allocator_type> stack_;
543
+ std::vector<string_type,string_allocator_type> column_names_;
544
+ std::vector<csv_type_info,csv_type_info_allocator_type> column_types_;
545
+ std::vector<string_type,string_allocator_type> column_defaults_;
546
+ std::vector<csv_parse_state,csv_parse_state_allocator_type> state_stack_;
547
+ string_type buffer_;
548
+ std::vector<std::pair<string_view_type,double>> string_double_map_;
549
+
550
+ public:
551
+ basic_csv_parser(const TempAllocator& alloc = TempAllocator())
552
+ : basic_csv_parser(basic_csv_decode_options<CharT>(),
553
+ default_csv_parsing(),
554
+ alloc)
555
+ {
556
+ }
557
+
558
+ basic_csv_parser(const basic_csv_decode_options<CharT>& options,
559
+ const TempAllocator& alloc = TempAllocator())
560
+ : basic_csv_parser(options,
561
+ default_csv_parsing(),
562
+ alloc)
563
+ {
564
+ }
565
+
566
+ basic_csv_parser(std::function<bool(csv_errc,const ser_context&)> err_handler,
567
+ const TempAllocator& alloc = TempAllocator())
568
+ : basic_csv_parser(basic_csv_decode_options<CharT>(),
569
+ err_handler,
570
+ alloc)
571
+ {
572
+ }
573
+
574
+ basic_csv_parser(const basic_csv_decode_options<CharT>& options,
575
+ std::function<bool(csv_errc,const ser_context&)> err_handler,
576
+ const TempAllocator& alloc = TempAllocator())
577
+ : alloc_(alloc),
578
+ state_(csv_parse_state::start),
579
+ visitor_(nullptr),
580
+ err_handler_(err_handler),
581
+ column_(1),
582
+ line_(1),
583
+ depth_(default_depth),
584
+ options_(options),
585
+ column_index_(0),
586
+ level_(0),
587
+ offset_(0),
588
+ begin_input_(nullptr),
589
+ input_end_(nullptr),
590
+ input_ptr_(nullptr),
591
+ more_(true),
592
+ header_line_(1),
593
+ m_columns_filter_(alloc),
594
+ stack_(alloc),
595
+ column_names_(alloc),
596
+ column_types_(alloc),
597
+ column_defaults_(alloc),
598
+ state_stack_(alloc),
599
+ buffer_(alloc)
600
+ {
601
+ if (options_.enable_str_to_nan())
602
+ {
603
+ string_double_map_.emplace_back(options_.nan_to_str(),std::nan(""));
604
+ }
605
+ if (options_.enable_str_to_inf())
606
+ {
607
+ string_double_map_.emplace_back(options_.inf_to_str(),std::numeric_limits<double>::infinity());
608
+ }
609
+ if (options_.enable_str_to_neginf())
610
+ {
611
+ string_double_map_.emplace_back(options_.neginf_to_str(),-std::numeric_limits<double>::infinity());
612
+ }
613
+
614
+ initialize();
615
+ }
616
+
617
+ ~basic_csv_parser() noexcept
618
+ {
619
+ }
620
+
621
+ bool done() const
622
+ {
623
+ return state_ == csv_parse_state::done;
624
+ }
625
+
626
+ bool accept() const
627
+ {
628
+ return state_ == csv_parse_state::accept || state_ == csv_parse_state::done;
629
+ }
630
+
631
+ bool stopped() const
632
+ {
633
+ return !more_;
634
+ }
635
+
636
+ bool source_exhausted() const
637
+ {
638
+ return input_ptr_ == input_end_;
639
+ }
640
+
641
+ const std::vector<string_type,string_allocator_type>& column_labels() const
642
+ {
643
+ return column_names_;
644
+ }
645
+
646
+ void reinitialize()
647
+ {
648
+ state_ = csv_parse_state::start;
649
+ visitor_ = nullptr;
650
+ column_ = 1;
651
+ line_ = 1;
652
+ depth_ = default_depth;
653
+ column_index_ = 0;
654
+ level_ = 0;
655
+ offset_ = 0;
656
+ begin_input_ = nullptr;
657
+ input_end_ = nullptr;
658
+ input_ptr_ = nullptr;
659
+ more_ = true;
660
+ header_line_ = 1;
661
+ m_columns_filter_.reset();
662
+ stack_.clear();
663
+ column_names_.clear();
664
+ column_types_.clear();
665
+ column_defaults_.clear();
666
+ state_stack_.clear();
667
+ buffer_.clear();
668
+
669
+ initialize();
670
+ }
671
+
672
+ void restart()
673
+ {
674
+ more_ = true;
675
+ }
676
+
677
+ void parse_some(basic_json_visitor<CharT>& visitor)
678
+ {
679
+ std::error_code ec;
680
+ parse_some(visitor,ec);
681
+ if (ec)
682
+ {
683
+ JSONCONS_THROW(ser_error(ec,line_,column_));
684
+ }
685
+ }
686
+
687
+ void parse_some(basic_json_visitor<CharT>& visitor, std::error_code& ec)
688
+ {
689
+ switch (options_.mapping_kind())
690
+ {
691
+ case csv_mapping_kind::m_columns:
692
+ visitor_ = &m_columns_filter_;
693
+ break;
694
+ default:
695
+ visitor_ = std::addressof(visitor);
696
+ break;
697
+ }
698
+
699
+ const CharT* local_input_end = input_end_;
700
+
701
+ if (input_ptr_ == local_input_end && more_)
702
+ {
703
+ switch (state_)
704
+ {
705
+ case csv_parse_state::start:
706
+ ec = csv_errc::source_error;
707
+ more_ = false;
708
+ return;
709
+ case csv_parse_state::before_unquoted_field:
710
+ case csv_parse_state::before_last_unquoted_field:
711
+ end_unquoted_string_value(ec);
712
+ state_ = csv_parse_state::before_last_unquoted_field_tail;
713
+ break;
714
+ case csv_parse_state::before_last_unquoted_field_tail:
715
+ if (stack_.back() == csv_mode::subfields)
716
+ {
717
+ stack_.pop_back();
718
+ more_ = visitor_->end_array(*this, ec);
719
+ }
720
+ ++column_index_;
721
+ state_ = csv_parse_state::end_record;
722
+ break;
723
+ case csv_parse_state::before_unquoted_string:
724
+ buffer_.clear();
725
+ JSONCONS_FALLTHROUGH;
726
+ case csv_parse_state::unquoted_string:
727
+ if (options_.trim_leading() || options_.trim_trailing())
728
+ {
729
+ trim_string_buffer(options_.trim_leading(),options_.trim_trailing());
730
+ }
731
+ if (options_.ignore_empty_values() && buffer_.empty())
732
+ {
733
+ state_ = csv_parse_state::end_record;
734
+ }
735
+ else
736
+ {
737
+ before_value(ec);
738
+ state_ = csv_parse_state::before_unquoted_field;
739
+ }
740
+ break;
741
+ case csv_parse_state::before_last_quoted_field:
742
+ end_quoted_string_value(ec);
743
+ ++column_index_;
744
+ state_ = csv_parse_state::end_record;
745
+ break;
746
+ case csv_parse_state::escaped_value:
747
+ if (options_.quote_escape_char() == options_.quote_char())
748
+ {
749
+ if (!(options_.ignore_empty_values() && buffer_.empty()))
750
+ {
751
+ before_value(ec);
752
+ ++column_;
753
+ state_ = csv_parse_state::before_last_quoted_field;
754
+ }
755
+ else
756
+ {
757
+ state_ = csv_parse_state::end_record;
758
+ }
759
+ }
760
+ else
761
+ {
762
+ ec = csv_errc::invalid_escaped_char;
763
+ more_ = false;
764
+ return;
765
+ }
766
+ break;
767
+ case csv_parse_state::end_record:
768
+ if (column_index_ > 0)
769
+ {
770
+ after_record(ec);
771
+ }
772
+ state_ = csv_parse_state::no_more_records;
773
+ break;
774
+ case csv_parse_state::no_more_records:
775
+ switch (stack_.back())
776
+ {
777
+ case csv_mode::header:
778
+ stack_.pop_back();
779
+ break;
780
+ case csv_mode::data:
781
+ stack_.pop_back();
782
+ break;
783
+ default:
784
+ break;
785
+ }
786
+ more_ = visitor_->end_array(*this, ec);
787
+ if (options_.mapping_kind() == csv_mapping_kind::m_columns)
788
+ {
789
+ if (!m_columns_filter_.done())
790
+ {
791
+ more_ = m_columns_filter_.replay_parse_events(visitor);
792
+ }
793
+ else
794
+ {
795
+ state_ = csv_parse_state::accept;
796
+ }
797
+ }
798
+ else
799
+ {
800
+ state_ = csv_parse_state::accept;
801
+ }
802
+ break;
803
+ case csv_parse_state::accept:
804
+ if (!(stack_.size() == 1 && stack_.back() == csv_mode::initial))
805
+ {
806
+ err_handler_(csv_errc::unexpected_eof, *this);
807
+ ec = csv_errc::unexpected_eof;
808
+ more_ = false;
809
+ return;
810
+ }
811
+ stack_.pop_back();
812
+ visitor_->flush();
813
+ state_ = csv_parse_state::done;
814
+ more_ = false;
815
+ return;
816
+ default:
817
+ state_ = csv_parse_state::end_record;
818
+ break;
819
+ }
820
+ }
821
+
822
+ for (; (input_ptr_ < local_input_end) && more_;)
823
+ {
824
+ CharT curr_char = *input_ptr_;
825
+
826
+ switch (state_)
827
+ {
828
+ case csv_parse_state::cr:
829
+ ++line_;
830
+ column_ = 1;
831
+ switch (*input_ptr_)
832
+ {
833
+ case '\n':
834
+ ++input_ptr_;
835
+ state_ = pop_state();
836
+ break;
837
+ default:
838
+ state_ = pop_state();
839
+ break;
840
+ }
841
+ break;
842
+ case csv_parse_state::start:
843
+ if (options_.mapping_kind() != csv_mapping_kind::m_columns)
844
+ {
845
+ more_ = visitor_->begin_array(semantic_tag::none, *this, ec);
846
+ }
847
+ if (options_.assume_header() && options_.mapping_kind() == csv_mapping_kind::n_rows && options_.column_names().size() > 0)
848
+ {
849
+ column_index_ = 0;
850
+ state_ = csv_parse_state::column_labels;
851
+ more_ = visitor_->begin_array(semantic_tag::none, *this, ec);
852
+ state_ = csv_parse_state::expect_comment_or_record;
853
+ }
854
+ else
855
+ {
856
+ state_ = csv_parse_state::expect_comment_or_record;
857
+ }
858
+ break;
859
+ case csv_parse_state::column_labels:
860
+ if (column_index_ < column_names_.size())
861
+ {
862
+ more_ = visitor_->string_value(column_names_[column_index_], semantic_tag::none, *this, ec);
863
+ ++column_index_;
864
+ }
865
+ else
866
+ {
867
+ more_ = visitor_->end_array(*this, ec);
868
+ state_ = csv_parse_state::expect_comment_or_record;
869
+ //stack_.back() = csv_mode::data;
870
+ column_index_ = 0;
871
+ }
872
+ break;
873
+ case csv_parse_state::comment:
874
+ switch (curr_char)
875
+ {
876
+ case '\n':
877
+ {
878
+ ++line_;
879
+ if (stack_.back() == csv_mode::header)
880
+ {
881
+ ++header_line_;
882
+ }
883
+ column_ = 1;
884
+ state_ = csv_parse_state::expect_comment_or_record;
885
+ break;
886
+ }
887
+ case '\r':
888
+ ++line_;
889
+ if (stack_.back() == csv_mode::header)
890
+ {
891
+ ++header_line_;
892
+ }
893
+ column_ = 1;
894
+ state_ = csv_parse_state::expect_comment_or_record;
895
+ push_state(state_);
896
+ state_ = csv_parse_state::cr;
897
+ break;
898
+ default:
899
+ ++column_;
900
+ break;
901
+ }
902
+ ++input_ptr_;
903
+ break;
904
+
905
+ case csv_parse_state::expect_comment_or_record:
906
+ buffer_.clear();
907
+ if (curr_char == options_.comment_starter())
908
+ {
909
+ state_ = csv_parse_state::comment;
910
+ ++column_;
911
+ ++input_ptr_;
912
+ }
913
+ else
914
+ {
915
+ state_ = csv_parse_state::expect_record;
916
+ }
917
+ break;
918
+ case csv_parse_state::quoted_string:
919
+ {
920
+ if (curr_char == options_.quote_escape_char())
921
+ {
922
+ state_ = csv_parse_state::escaped_value;
923
+ }
924
+ else if (curr_char == options_.quote_char())
925
+ {
926
+ state_ = csv_parse_state::between_values;
927
+ }
928
+ else
929
+ {
930
+ buffer_.push_back(static_cast<CharT>(curr_char));
931
+ }
932
+ }
933
+ ++column_;
934
+ ++input_ptr_;
935
+ break;
936
+ case csv_parse_state::escaped_value:
937
+ {
938
+ if (curr_char == options_.quote_char())
939
+ {
940
+ buffer_.push_back(static_cast<CharT>(curr_char));
941
+ state_ = csv_parse_state::quoted_string;
942
+ ++column_;
943
+ ++input_ptr_;
944
+ }
945
+ else if (options_.quote_escape_char() == options_.quote_char())
946
+ {
947
+ state_ = csv_parse_state::between_values;
948
+ }
949
+ else
950
+ {
951
+ ec = csv_errc::invalid_escaped_char;
952
+ more_ = false;
953
+ return;
954
+ }
955
+ }
956
+ break;
957
+ case csv_parse_state::between_values:
958
+ switch (curr_char)
959
+ {
960
+ case '\r':
961
+ case '\n':
962
+ {
963
+ if (options_.trim_leading() || options_.trim_trailing())
964
+ {
965
+ trim_string_buffer(options_.trim_leading(),options_.trim_trailing());
966
+ }
967
+ if (!(options_.ignore_empty_values() && buffer_.empty()))
968
+ {
969
+ before_value(ec);
970
+ state_ = csv_parse_state::before_last_quoted_field;
971
+ }
972
+ else
973
+ {
974
+ state_ = csv_parse_state::end_record;
975
+ }
976
+ break;
977
+ }
978
+ default:
979
+ if (curr_char == options_.field_delimiter())
980
+ {
981
+ if (options_.trim_leading() || options_.trim_trailing())
982
+ {
983
+ trim_string_buffer(options_.trim_leading(),options_.trim_trailing());
984
+ }
985
+ before_value(ec);
986
+ state_ = csv_parse_state::before_quoted_field;
987
+ }
988
+ else if (options_.subfield_delimiter() != char_type() && curr_char == options_.subfield_delimiter())
989
+ {
990
+ if (options_.trim_leading() || options_.trim_trailing())
991
+ {
992
+ trim_string_buffer(options_.trim_leading(),options_.trim_trailing());
993
+ }
994
+ before_value(ec);
995
+ state_ = csv_parse_state::before_quoted_subfield;
996
+ }
997
+ else if (curr_char == ' ' || curr_char == '\t')
998
+ {
999
+ ++column_;
1000
+ ++input_ptr_;
1001
+ }
1002
+ else
1003
+ {
1004
+ ec = csv_errc::unexpected_char_between_fields;
1005
+ more_ = false;
1006
+ return;
1007
+ }
1008
+ break;
1009
+ }
1010
+ break;
1011
+ case csv_parse_state::before_unquoted_string:
1012
+ {
1013
+ buffer_.clear();
1014
+ state_ = csv_parse_state::unquoted_string;
1015
+ break;
1016
+ }
1017
+ case csv_parse_state::before_unquoted_field:
1018
+ end_unquoted_string_value(ec);
1019
+ state_ = csv_parse_state::before_unquoted_field_tail;
1020
+ break;
1021
+ case csv_parse_state::before_unquoted_field_tail:
1022
+ {
1023
+ if (stack_.back() == csv_mode::subfields)
1024
+ {
1025
+ stack_.pop_back();
1026
+ more_ = visitor_->end_array(*this, ec);
1027
+ }
1028
+ ++column_index_;
1029
+ state_ = csv_parse_state::before_unquoted_string;
1030
+ ++column_;
1031
+ ++input_ptr_;
1032
+ break;
1033
+ }
1034
+ case csv_parse_state::before_unquoted_field_tail1:
1035
+ {
1036
+ if (stack_.back() == csv_mode::subfields)
1037
+ {
1038
+ stack_.pop_back();
1039
+ more_ = visitor_->end_array(*this, ec);
1040
+ }
1041
+ state_ = csv_parse_state::end_record;
1042
+ ++column_;
1043
+ ++input_ptr_;
1044
+ break;
1045
+ }
1046
+
1047
+ case csv_parse_state::before_last_unquoted_field:
1048
+ end_unquoted_string_value(ec);
1049
+ state_ = csv_parse_state::before_last_unquoted_field_tail;
1050
+ break;
1051
+
1052
+ case csv_parse_state::before_last_unquoted_field_tail:
1053
+ if (stack_.back() == csv_mode::subfields)
1054
+ {
1055
+ stack_.pop_back();
1056
+ more_ = visitor_->end_array(*this, ec);
1057
+ }
1058
+ ++column_index_;
1059
+ state_ = csv_parse_state::end_record;
1060
+ break;
1061
+
1062
+ case csv_parse_state::before_unquoted_subfield:
1063
+ if (stack_.back() == csv_mode::data)
1064
+ {
1065
+ stack_.push_back(csv_mode::subfields);
1066
+ more_ = visitor_->begin_array(semantic_tag::none, *this, ec);
1067
+ }
1068
+ state_ = csv_parse_state::before_unquoted_subfield_tail;
1069
+ break;
1070
+ case csv_parse_state::before_unquoted_subfield_tail:
1071
+ end_unquoted_string_value(ec);
1072
+ state_ = csv_parse_state::before_unquoted_string;
1073
+ ++column_;
1074
+ ++input_ptr_;
1075
+ break;
1076
+ case csv_parse_state::before_quoted_field:
1077
+ end_quoted_string_value(ec);
1078
+ state_ = csv_parse_state::before_unquoted_field_tail; // return to unquoted
1079
+ break;
1080
+ case csv_parse_state::before_quoted_subfield:
1081
+ if (stack_.back() == csv_mode::data)
1082
+ {
1083
+ stack_.push_back(csv_mode::subfields);
1084
+ more_ = visitor_->begin_array(semantic_tag::none, *this, ec);
1085
+ }
1086
+ state_ = csv_parse_state::before_quoted_subfield_tail;
1087
+ break;
1088
+ case csv_parse_state::before_quoted_subfield_tail:
1089
+ end_quoted_string_value(ec);
1090
+ state_ = csv_parse_state::before_unquoted_string;
1091
+ ++column_;
1092
+ ++input_ptr_;
1093
+ break;
1094
+ case csv_parse_state::before_last_quoted_field:
1095
+ end_quoted_string_value(ec);
1096
+ state_ = csv_parse_state::before_last_quoted_field_tail;
1097
+ break;
1098
+ case csv_parse_state::before_last_quoted_field_tail:
1099
+ if (stack_.back() == csv_mode::subfields)
1100
+ {
1101
+ stack_.pop_back();
1102
+ more_ = visitor_->end_array(*this, ec);
1103
+ }
1104
+ ++column_index_;
1105
+ state_ = csv_parse_state::end_record;
1106
+ break;
1107
+ case csv_parse_state::unquoted_string:
1108
+ {
1109
+ switch (curr_char)
1110
+ {
1111
+ case '\n':
1112
+ case '\r':
1113
+ {
1114
+ if (options_.trim_leading() || options_.trim_trailing())
1115
+ {
1116
+ trim_string_buffer(options_.trim_leading(),options_.trim_trailing());
1117
+ }
1118
+ if (!(options_.ignore_empty_values() && buffer_.empty()))
1119
+ {
1120
+ before_value(ec);
1121
+ state_ = csv_parse_state::before_last_unquoted_field;
1122
+ }
1123
+ else
1124
+ {
1125
+ state_ = csv_parse_state::end_record;
1126
+ }
1127
+ break;
1128
+ }
1129
+ default:
1130
+ if (curr_char == options_.field_delimiter())
1131
+ {
1132
+ if (options_.trim_leading() || options_.trim_trailing())
1133
+ {
1134
+ trim_string_buffer(options_.trim_leading(),options_.trim_trailing());
1135
+ }
1136
+ before_value(ec);
1137
+ state_ = csv_parse_state::before_unquoted_field;
1138
+ }
1139
+ else if (options_.subfield_delimiter() != char_type() && curr_char == options_.subfield_delimiter())
1140
+ {
1141
+ if (options_.trim_leading() || options_.trim_trailing())
1142
+ {
1143
+ trim_string_buffer(options_.trim_leading(),options_.trim_trailing());
1144
+ }
1145
+ before_value(ec);
1146
+ state_ = csv_parse_state::before_unquoted_subfield;
1147
+ }
1148
+ else if (curr_char == options_.quote_char())
1149
+ {
1150
+ buffer_.clear();
1151
+ state_ = csv_parse_state::quoted_string;
1152
+ ++column_;
1153
+ ++input_ptr_;
1154
+ }
1155
+ else
1156
+ {
1157
+ buffer_.push_back(static_cast<CharT>(curr_char));
1158
+ ++column_;
1159
+ ++input_ptr_;
1160
+ }
1161
+ break;
1162
+ }
1163
+ break;
1164
+ }
1165
+ case csv_parse_state::expect_record:
1166
+ {
1167
+ switch (curr_char)
1168
+ {
1169
+ case '\n':
1170
+ {
1171
+ if (!options_.ignore_empty_lines())
1172
+ {
1173
+ before_record(ec);
1174
+ state_ = csv_parse_state::end_record;
1175
+ }
1176
+ else
1177
+ {
1178
+ ++line_;
1179
+ column_ = 1;
1180
+ state_ = csv_parse_state::expect_comment_or_record;
1181
+ ++input_ptr_;
1182
+ }
1183
+ break;
1184
+ }
1185
+ case '\r':
1186
+ if (!options_.ignore_empty_lines())
1187
+ {
1188
+ before_record(ec);
1189
+ state_ = csv_parse_state::end_record;
1190
+ }
1191
+ else
1192
+ {
1193
+ ++line_;
1194
+ column_ = 1;
1195
+ state_ = csv_parse_state::expect_comment_or_record;
1196
+ ++input_ptr_;
1197
+ push_state(state_);
1198
+ state_ = csv_parse_state::cr;
1199
+ }
1200
+ break;
1201
+ case ' ':
1202
+ case '\t':
1203
+ if (!options_.trim_leading())
1204
+ {
1205
+ buffer_.push_back(static_cast<CharT>(curr_char));
1206
+ before_record(ec);
1207
+ state_ = csv_parse_state::unquoted_string;
1208
+ }
1209
+ ++column_;
1210
+ ++input_ptr_;
1211
+ break;
1212
+ default:
1213
+ before_record(ec);
1214
+ if (curr_char == options_.quote_char())
1215
+ {
1216
+ buffer_.clear();
1217
+ state_ = csv_parse_state::quoted_string;
1218
+ ++column_;
1219
+ ++input_ptr_;
1220
+ }
1221
+ else
1222
+ {
1223
+ state_ = csv_parse_state::unquoted_string;
1224
+ }
1225
+ break;
1226
+ }
1227
+ break;
1228
+ }
1229
+ case csv_parse_state::end_record:
1230
+ {
1231
+ switch (curr_char)
1232
+ {
1233
+ case '\n':
1234
+ {
1235
+ ++line_;
1236
+ column_ = 1;
1237
+ state_ = csv_parse_state::expect_comment_or_record;
1238
+ after_record(ec);
1239
+ ++input_ptr_;
1240
+ break;
1241
+ }
1242
+ case '\r':
1243
+ ++line_;
1244
+ column_ = 1;
1245
+ state_ = csv_parse_state::expect_comment_or_record;
1246
+ after_record(ec);
1247
+ push_state(state_);
1248
+ state_ = csv_parse_state::cr;
1249
+ ++input_ptr_;
1250
+ break;
1251
+ case ' ':
1252
+ case '\t':
1253
+ ++column_;
1254
+ ++input_ptr_;
1255
+ break;
1256
+ default:
1257
+ err_handler_(csv_errc::syntax_error, *this);
1258
+ ec = csv_errc::syntax_error;
1259
+ more_ = false;
1260
+ return;
1261
+ }
1262
+ break;
1263
+ }
1264
+ default:
1265
+ err_handler_(csv_errc::invalid_parse_state, *this);
1266
+ ec = csv_errc::invalid_parse_state;
1267
+ more_ = false;
1268
+ return;
1269
+ }
1270
+ if (line_ > options_.max_lines())
1271
+ {
1272
+ state_ = csv_parse_state::done;
1273
+ more_ = false;
1274
+ }
1275
+ }
1276
+ }
1277
+
1278
+ void finish_parse()
1279
+ {
1280
+ std::error_code ec;
1281
+ finish_parse(ec);
1282
+ if (ec)
1283
+ {
1284
+ JSONCONS_THROW(ser_error(ec,line_,column_));
1285
+ }
1286
+ }
1287
+
1288
+ void finish_parse(std::error_code& ec)
1289
+ {
1290
+ while (more_)
1291
+ {
1292
+ parse_some(ec);
1293
+ }
1294
+ }
1295
+
1296
+ csv_parse_state state() const
1297
+ {
1298
+ return state_;
1299
+ }
1300
+
1301
+ void update(const string_view_type sv)
1302
+ {
1303
+ update(sv.data(),sv.length());
1304
+ }
1305
+
1306
+ void update(const CharT* data, std::size_t length)
1307
+ {
1308
+ begin_input_ = data;
1309
+ input_end_ = data + length;
1310
+ input_ptr_ = begin_input_;
1311
+ }
1312
+
1313
+ std::size_t line() const override
1314
+ {
1315
+ return line_;
1316
+ }
1317
+
1318
+ std::size_t column() const override
1319
+ {
1320
+ return column_;
1321
+ }
1322
+
1323
+ private:
1324
+ void initialize()
1325
+ {
1326
+ jsoncons::csv::detail::parse_column_names(options_.column_names(), column_names_);
1327
+ jsoncons::csv::detail::parse_column_types(options_.column_types(), column_types_);
1328
+ jsoncons::csv::detail::parse_column_names(options_.column_defaults(), column_defaults_);
1329
+
1330
+ stack_.reserve(default_depth);
1331
+ stack_.push_back(csv_mode::initial);
1332
+ stack_.push_back((options_.header_lines() > 0) ? csv_mode::header
1333
+ : csv_mode::data);
1334
+ }
1335
+
1336
+ // name
1337
+ void before_value(std::error_code& ec)
1338
+ {
1339
+ switch (stack_.back())
1340
+ {
1341
+ case csv_mode::header:
1342
+ if (options_.trim_leading_inside_quotes() || options_.trim_trailing_inside_quotes())
1343
+ {
1344
+ trim_string_buffer(options_.trim_leading_inside_quotes(),options_.trim_trailing_inside_quotes());
1345
+ }
1346
+ if (line_ == header_line_)
1347
+ {
1348
+ column_names_.push_back(buffer_);
1349
+ if (options_.assume_header() && options_.mapping_kind() == csv_mapping_kind::n_rows)
1350
+ {
1351
+ more_ = visitor_->string_value(buffer_, semantic_tag::none, *this, ec);
1352
+ }
1353
+ }
1354
+ break;
1355
+ case csv_mode::data:
1356
+ if (options_.mapping_kind() == csv_mapping_kind::n_objects)
1357
+ {
1358
+ if (!(options_.ignore_empty_values() && buffer_.empty()))
1359
+ {
1360
+ if (column_index_ < column_names_.size() + offset_)
1361
+ {
1362
+ more_ = visitor_->key(column_names_[column_index_ - offset_], *this, ec);
1363
+ }
1364
+ }
1365
+ }
1366
+ break;
1367
+ default:
1368
+ break;
1369
+ }
1370
+ }
1371
+
1372
+ // begin_array or begin_record
1373
+ void before_record(std::error_code& ec)
1374
+ {
1375
+ offset_ = 0;
1376
+
1377
+ switch (stack_.back())
1378
+ {
1379
+ case csv_mode::header:
1380
+ if (options_.assume_header() && line_ == header_line_)
1381
+ {
1382
+ if (options_.mapping_kind() == csv_mapping_kind::n_rows)
1383
+ {
1384
+ more_ = visitor_->begin_array(semantic_tag::none, *this, ec);
1385
+ }
1386
+ }
1387
+ break;
1388
+ case csv_mode::data:
1389
+ switch (options_.mapping_kind())
1390
+ {
1391
+ case csv_mapping_kind::n_rows:
1392
+ more_ = visitor_->begin_array(semantic_tag::none, *this, ec);
1393
+ break;
1394
+ case csv_mapping_kind::n_objects:
1395
+ more_ = visitor_->begin_object(semantic_tag::none, *this, ec);
1396
+ break;
1397
+ case csv_mapping_kind::m_columns:
1398
+ break;
1399
+ default:
1400
+ break;
1401
+ }
1402
+ break;
1403
+ default:
1404
+ break;
1405
+ }
1406
+ }
1407
+
1408
+ // end_array, begin_array, string_value (headers)
1409
+ void after_record(std::error_code& ec)
1410
+ {
1411
+ if (column_types_.size() > 0)
1412
+ {
1413
+ if (level_ > 0)
1414
+ {
1415
+ more_ = visitor_->end_array(*this, ec);
1416
+ level_ = 0;
1417
+ }
1418
+ }
1419
+ switch (stack_.back())
1420
+ {
1421
+ case csv_mode::header:
1422
+ if (line_ >= options_.header_lines())
1423
+ {
1424
+ stack_.back() = csv_mode::data;
1425
+ }
1426
+ switch (options_.mapping_kind())
1427
+ {
1428
+ case csv_mapping_kind::n_rows:
1429
+ if (options_.assume_header())
1430
+ {
1431
+ more_ = visitor_->end_array(*this, ec);
1432
+ }
1433
+ break;
1434
+ case csv_mapping_kind::m_columns:
1435
+ m_columns_filter_.initialize(column_names_);
1436
+ break;
1437
+ default:
1438
+ break;
1439
+ }
1440
+ break;
1441
+ case csv_mode::data:
1442
+ case csv_mode::subfields:
1443
+ {
1444
+ switch (options_.mapping_kind())
1445
+ {
1446
+ case csv_mapping_kind::n_rows:
1447
+ more_ = visitor_->end_array(*this, ec);
1448
+ break;
1449
+ case csv_mapping_kind::n_objects:
1450
+ more_ = visitor_->end_object(*this, ec);
1451
+ break;
1452
+ case csv_mapping_kind::m_columns:
1453
+ more_ = visitor_->end_array(*this, ec);
1454
+ break;
1455
+ }
1456
+ break;
1457
+ }
1458
+ default:
1459
+ break;
1460
+ }
1461
+ column_index_ = 0;
1462
+ }
1463
+
1464
+ void trim_string_buffer(bool trim_leading, bool trim_trailing)
1465
+ {
1466
+ std::size_t start = 0;
1467
+ std::size_t length = buffer_.length();
1468
+ if (trim_leading)
1469
+ {
1470
+ bool done = false;
1471
+ while (!done && start < buffer_.length())
1472
+ {
1473
+ if ((buffer_[start] < 256) && std::isspace(buffer_[start]))
1474
+ {
1475
+ ++start;
1476
+ }
1477
+ else
1478
+ {
1479
+ done = true;
1480
+ }
1481
+ }
1482
+ }
1483
+ if (trim_trailing)
1484
+ {
1485
+ bool done = false;
1486
+ while (!done && length > 0)
1487
+ {
1488
+ if ((buffer_[length-1] < 256) && std::isspace(buffer_[length-1]))
1489
+ {
1490
+ --length;
1491
+ }
1492
+ else
1493
+ {
1494
+ done = true;
1495
+ }
1496
+ }
1497
+ }
1498
+ if (start != 0 || length != buffer_.size())
1499
+ {
1500
+ // Do not use buffer_.substr(...), as this won't preserve the allocator state.
1501
+ buffer_.resize(length);
1502
+ buffer_.erase(0, start);
1503
+ }
1504
+ }
1505
+
1506
+ /*
1507
+ end_array, begin_array, xxx_value (end_value)
1508
+ */
1509
+ void end_unquoted_string_value(std::error_code& ec)
1510
+ {
1511
+ switch (stack_.back())
1512
+ {
1513
+ case csv_mode::data:
1514
+ case csv_mode::subfields:
1515
+ switch (options_.mapping_kind())
1516
+ {
1517
+ case csv_mapping_kind::n_rows:
1518
+ if (options_.unquoted_empty_value_is_null() && buffer_.length() == 0)
1519
+ {
1520
+ more_ = visitor_->null_value(semantic_tag::none, *this, ec);
1521
+ }
1522
+ else
1523
+ {
1524
+ end_value(options_.infer_types(), ec);
1525
+ }
1526
+ break;
1527
+ case csv_mapping_kind::n_objects:
1528
+ if (!(options_.ignore_empty_values() && buffer_.empty()))
1529
+ {
1530
+ if (column_index_ < column_names_.size() + offset_)
1531
+ {
1532
+ if (options_.unquoted_empty_value_is_null() && buffer_.length() == 0)
1533
+ {
1534
+ more_ = visitor_->null_value(semantic_tag::none, *this, ec);
1535
+ }
1536
+ else
1537
+ {
1538
+ end_value(options_.infer_types(), ec);
1539
+ }
1540
+ }
1541
+ else if (level_ > 0)
1542
+ {
1543
+ if (options_.unquoted_empty_value_is_null() && buffer_.length() == 0)
1544
+ {
1545
+ more_ = visitor_->null_value(semantic_tag::none, *this, ec);
1546
+ }
1547
+ else
1548
+ {
1549
+ end_value(options_.infer_types(), ec);
1550
+ }
1551
+ }
1552
+ }
1553
+ break;
1554
+ case csv_mapping_kind::m_columns:
1555
+ if (!(options_.ignore_empty_values() && buffer_.empty()))
1556
+ {
1557
+ end_value(options_.infer_types(), ec);
1558
+ }
1559
+ else
1560
+ {
1561
+ m_columns_filter_.skip_column();
1562
+ }
1563
+ break;
1564
+ }
1565
+ break;
1566
+ default:
1567
+ break;
1568
+ }
1569
+ }
1570
+
1571
+ void end_quoted_string_value(std::error_code& ec)
1572
+ {
1573
+ switch (stack_.back())
1574
+ {
1575
+ case csv_mode::data:
1576
+ case csv_mode::subfields:
1577
+ if (options_.trim_leading_inside_quotes() || options_.trim_trailing_inside_quotes())
1578
+ {
1579
+ trim_string_buffer(options_.trim_leading_inside_quotes(),options_.trim_trailing_inside_quotes());
1580
+ }
1581
+ switch (options_.mapping_kind())
1582
+ {
1583
+ case csv_mapping_kind::n_rows:
1584
+ end_value(false, ec);
1585
+ break;
1586
+ case csv_mapping_kind::n_objects:
1587
+ if (!(options_.ignore_empty_values() && buffer_.empty()))
1588
+ {
1589
+ if (column_index_ < column_names_.size() + offset_)
1590
+ {
1591
+ if (options_.unquoted_empty_value_is_null() && buffer_.length() == 0)
1592
+ {
1593
+ more_ = visitor_->null_value(semantic_tag::none, *this, ec);
1594
+ }
1595
+ else
1596
+ {
1597
+ end_value(false, ec);
1598
+ }
1599
+ }
1600
+ else if (level_ > 0)
1601
+ {
1602
+ if (options_.unquoted_empty_value_is_null() && buffer_.length() == 0)
1603
+ {
1604
+ more_ = visitor_->null_value(semantic_tag::none, *this, ec);
1605
+ }
1606
+ else
1607
+ {
1608
+ end_value(false, ec);
1609
+ }
1610
+ }
1611
+ }
1612
+ break;
1613
+ case csv_mapping_kind::m_columns:
1614
+ if (!(options_.ignore_empty_values() && buffer_.empty()))
1615
+ {
1616
+ end_value(false, ec);
1617
+ }
1618
+ else
1619
+ {
1620
+ m_columns_filter_.skip_column();
1621
+ }
1622
+ break;
1623
+ }
1624
+ break;
1625
+ default:
1626
+ break;
1627
+ }
1628
+ }
1629
+
1630
+ void end_value(bool infer_types, std::error_code& ec)
1631
+ {
1632
+ auto it = std::find_if(string_double_map_.begin(), string_double_map_.end(), string_maps_to_double{ buffer_ });
1633
+ if (it != string_double_map_.end())
1634
+ {
1635
+ more_ = visitor_->double_value(it->second, semantic_tag::none, *this, ec);
1636
+ }
1637
+ else if (column_index_ < column_types_.size() + offset_)
1638
+ {
1639
+ if (column_types_[column_index_ - offset_].col_type == csv_column_type::repeat_t)
1640
+ {
1641
+ offset_ = offset_ + column_types_[column_index_ - offset_].rep_count;
1642
+ if (column_index_ - offset_ + 1 < column_types_.size())
1643
+ {
1644
+ if (column_index_ == offset_ || level_ > column_types_[column_index_-offset_].level)
1645
+ {
1646
+ more_ = visitor_->end_array(*this, ec);
1647
+ }
1648
+ level_ = column_index_ == offset_ ? 0 : column_types_[column_index_ - offset_].level;
1649
+ }
1650
+ }
1651
+ if (level_ < column_types_[column_index_ - offset_].level)
1652
+ {
1653
+ more_ = visitor_->begin_array(semantic_tag::none, *this, ec);
1654
+ level_ = column_types_[column_index_ - offset_].level;
1655
+ }
1656
+ else if (level_ > column_types_[column_index_ - offset_].level)
1657
+ {
1658
+ more_ = visitor_->end_array(*this, ec);
1659
+ level_ = column_types_[column_index_ - offset_].level;
1660
+ }
1661
+ switch (column_types_[column_index_ - offset_].col_type)
1662
+ {
1663
+ case csv_column_type::integer_t:
1664
+ {
1665
+ std::basic_istringstream<CharT,std::char_traits<CharT>,char_allocator_type> iss{buffer_};
1666
+ int64_t val;
1667
+ iss >> val;
1668
+ if (!iss.fail())
1669
+ {
1670
+ more_ = visitor_->int64_value(val, semantic_tag::none, *this, ec);
1671
+ }
1672
+ else
1673
+ {
1674
+ if (column_index_ - offset_ < column_defaults_.size() && column_defaults_[column_index_ - offset_].length() > 0)
1675
+ {
1676
+ basic_json_parser<CharT,temp_allocator_type> parser(alloc_);
1677
+ parser.update(column_defaults_[column_index_ - offset_].data(),column_defaults_[column_index_ - offset_].length());
1678
+ parser.parse_some(*visitor_);
1679
+ parser.finish_parse(*visitor_);
1680
+ }
1681
+ else
1682
+ {
1683
+ more_ = visitor_->null_value(semantic_tag::none, *this, ec);
1684
+ }
1685
+ }
1686
+ }
1687
+ break;
1688
+ case csv_column_type::float_t:
1689
+ {
1690
+ if (options_.lossless_number())
1691
+ {
1692
+ more_ = visitor_->string_value(buffer_,semantic_tag::bigdec, *this, ec);
1693
+ }
1694
+ else
1695
+ {
1696
+ std::basic_istringstream<CharT, std::char_traits<CharT>, char_allocator_type> iss{ buffer_ };
1697
+ double val;
1698
+ iss >> val;
1699
+ if (!iss.fail())
1700
+ {
1701
+ more_ = visitor_->double_value(val, semantic_tag::none, *this, ec);
1702
+ }
1703
+ else
1704
+ {
1705
+ if (column_index_ - offset_ < column_defaults_.size() && column_defaults_[column_index_ - offset_].length() > 0)
1706
+ {
1707
+ basic_json_parser<CharT,temp_allocator_type> parser(alloc_);
1708
+ parser.update(column_defaults_[column_index_ - offset_].data(),column_defaults_[column_index_ - offset_].length());
1709
+ parser.parse_some(*visitor_);
1710
+ parser.finish_parse(*visitor_);
1711
+ }
1712
+ else
1713
+ {
1714
+ more_ = visitor_->null_value(semantic_tag::none, *this, ec);
1715
+ }
1716
+ }
1717
+ }
1718
+ }
1719
+ break;
1720
+ case csv_column_type::boolean_t:
1721
+ {
1722
+ if (buffer_.length() == 1 && buffer_[0] == '0')
1723
+ {
1724
+ more_ = visitor_->bool_value(false, semantic_tag::none, *this, ec);
1725
+ }
1726
+ else if (buffer_.length() == 1 && buffer_[0] == '1')
1727
+ {
1728
+ more_ = visitor_->bool_value(true, semantic_tag::none, *this, ec);
1729
+ }
1730
+ else if (buffer_.length() == 5 && ((buffer_[0] == 'f' || buffer_[0] == 'F') && (buffer_[1] == 'a' || buffer_[1] == 'A') && (buffer_[2] == 'l' || buffer_[2] == 'L') && (buffer_[3] == 's' || buffer_[3] == 'S') && (buffer_[4] == 'e' || buffer_[4] == 'E')))
1731
+ {
1732
+ more_ = visitor_->bool_value(false, semantic_tag::none, *this, ec);
1733
+ }
1734
+ else if (buffer_.length() == 4 && ((buffer_[0] == 't' || buffer_[0] == 'T') && (buffer_[1] == 'r' || buffer_[1] == 'R') && (buffer_[2] == 'u' || buffer_[2] == 'U') && (buffer_[3] == 'e' || buffer_[3] == 'E')))
1735
+ {
1736
+ more_ = visitor_->bool_value(true, semantic_tag::none, *this, ec);
1737
+ }
1738
+ else
1739
+ {
1740
+ if (column_index_ - offset_ < column_defaults_.size() && column_defaults_[column_index_ - offset_].length() > 0)
1741
+ {
1742
+ basic_json_parser<CharT,temp_allocator_type> parser(alloc_);
1743
+ parser.update(column_defaults_[column_index_ - offset_].data(),column_defaults_[column_index_ - offset_].length());
1744
+ parser.parse_some(*visitor_);
1745
+ parser.finish_parse(*visitor_);
1746
+ }
1747
+ else
1748
+ {
1749
+ more_ = visitor_->null_value(semantic_tag::none, *this, ec);
1750
+ }
1751
+ }
1752
+ }
1753
+ break;
1754
+ default:
1755
+ if (buffer_.length() > 0)
1756
+ {
1757
+ more_ = visitor_->string_value(buffer_, semantic_tag::none, *this, ec);
1758
+ }
1759
+ else
1760
+ {
1761
+ if (column_index_ < column_defaults_.size() + offset_ && column_defaults_[column_index_ - offset_].length() > 0)
1762
+ {
1763
+ basic_json_parser<CharT,temp_allocator_type> parser(alloc_);
1764
+ parser.update(column_defaults_[column_index_ - offset_].data(),column_defaults_[column_index_ - offset_].length());
1765
+ parser.parse_some(*visitor_);
1766
+ parser.finish_parse(*visitor_);
1767
+ }
1768
+ else
1769
+ {
1770
+ more_ = visitor_->string_value(string_view_type(), semantic_tag::none, *this, ec);
1771
+ }
1772
+ }
1773
+ break;
1774
+ }
1775
+ }
1776
+ else
1777
+ {
1778
+ if (infer_types)
1779
+ {
1780
+ end_value_with_numeric_check(ec);
1781
+ }
1782
+ else
1783
+ {
1784
+ more_ = visitor_->string_value(buffer_, semantic_tag::none, *this, ec);
1785
+ }
1786
+ }
1787
+ }
1788
+
1789
+ enum class numeric_check_state
1790
+ {
1791
+ initial,
1792
+ null,
1793
+ boolean_true,
1794
+ boolean_false,
1795
+ minus,
1796
+ zero,
1797
+ integer,
1798
+ fraction1,
1799
+ fraction,
1800
+ exp1,
1801
+ exp,
1802
+ not_a_number
1803
+ };
1804
+
1805
+ /*
1806
+ xxx_value
1807
+ */
1808
+ void end_value_with_numeric_check(std::error_code& ec)
1809
+ {
1810
+ numeric_check_state state = numeric_check_state::initial;
1811
+ bool is_negative = false;
1812
+ int precision = 0;
1813
+ uint8_t decimal_places = 0;
1814
+
1815
+ auto last = buffer_.end();
1816
+
1817
+ std::string buffer;
1818
+ for (auto p = buffer_.begin(); state != numeric_check_state::not_a_number && p != last; ++p)
1819
+ {
1820
+ switch (state)
1821
+ {
1822
+ case numeric_check_state::initial:
1823
+ {
1824
+ switch (*p)
1825
+ {
1826
+ case 'n':case 'N':
1827
+ if ((last-p) == 4 && (p[1] == 'u' || p[1] == 'U') && (p[2] == 'l' || p[2] == 'L') && (p[3] == 'l' || p[3] == 'L'))
1828
+ {
1829
+ state = numeric_check_state::null;
1830
+ }
1831
+ else
1832
+ {
1833
+ state = numeric_check_state::not_a_number;
1834
+ }
1835
+ break;
1836
+ case 't':case 'T':
1837
+ if ((last-p) == 4 && (p[1] == 'r' || p[1] == 'R') && (p[2] == 'u' || p[2] == 'U') && (p[3] == 'e' || p[3] == 'U'))
1838
+ {
1839
+ state = numeric_check_state::boolean_true;
1840
+ }
1841
+ else
1842
+ {
1843
+ state = numeric_check_state::not_a_number;
1844
+ }
1845
+ break;
1846
+ case 'f':case 'F':
1847
+ if ((last-p) == 5 && (p[1] == 'a' || p[1] == 'A') && (p[2] == 'l' || p[2] == 'L') && (p[3] == 's' || p[3] == 'S') && (p[4] == 'e' || p[4] == 'E'))
1848
+ {
1849
+ state = numeric_check_state::boolean_false;
1850
+ }
1851
+ else
1852
+ {
1853
+ state = numeric_check_state::not_a_number;
1854
+ }
1855
+ break;
1856
+ case '-':
1857
+ is_negative = true;
1858
+ buffer.push_back(*p);
1859
+ state = numeric_check_state::minus;
1860
+ break;
1861
+ case '0':
1862
+ ++precision;
1863
+ buffer.push_back(*p);
1864
+ state = numeric_check_state::zero;
1865
+ break;
1866
+ case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9':
1867
+ ++precision;
1868
+ buffer.push_back(*p);
1869
+ state = numeric_check_state::integer;
1870
+ break;
1871
+ default:
1872
+ state = numeric_check_state::not_a_number;
1873
+ break;
1874
+ }
1875
+ break;
1876
+ }
1877
+ case numeric_check_state::zero:
1878
+ {
1879
+ switch (*p)
1880
+ {
1881
+ case '.':
1882
+ buffer.push_back(to_double_.get_decimal_point());
1883
+ state = numeric_check_state::fraction1;
1884
+ break;
1885
+ case 'e':case 'E':
1886
+ buffer.push_back(*p);
1887
+ state = numeric_check_state::exp1;
1888
+ break;
1889
+ default:
1890
+ state = numeric_check_state::not_a_number;
1891
+ break;
1892
+ }
1893
+ break;
1894
+ }
1895
+ case numeric_check_state::integer:
1896
+ {
1897
+ switch (*p)
1898
+ {
1899
+ case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9':
1900
+ ++precision;
1901
+ buffer.push_back(*p);
1902
+ break;
1903
+ case '.':
1904
+ buffer.push_back(to_double_.get_decimal_point());
1905
+ state = numeric_check_state::fraction1;
1906
+ break;
1907
+ case 'e':case 'E':
1908
+ buffer.push_back(*p);
1909
+ state = numeric_check_state::exp1;
1910
+ break;
1911
+ default:
1912
+ state = numeric_check_state::not_a_number;
1913
+ break;
1914
+ }
1915
+ break;
1916
+ }
1917
+ case numeric_check_state::minus:
1918
+ {
1919
+ switch (*p)
1920
+ {
1921
+ case '0':
1922
+ ++precision;
1923
+ buffer.push_back(*p);
1924
+ state = numeric_check_state::zero;
1925
+ break;
1926
+ case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9':
1927
+ ++precision;
1928
+ buffer.push_back(*p);
1929
+ state = numeric_check_state::integer;
1930
+ break;
1931
+ default:
1932
+ state = numeric_check_state::not_a_number;
1933
+ break;
1934
+ }
1935
+ break;
1936
+ }
1937
+ case numeric_check_state::fraction1:
1938
+ {
1939
+ switch (*p)
1940
+ {
1941
+ case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9':
1942
+ ++precision;
1943
+ ++decimal_places;
1944
+ buffer.push_back(*p);
1945
+ state = numeric_check_state::fraction;
1946
+ break;
1947
+ default:
1948
+ state = numeric_check_state::not_a_number;
1949
+ break;
1950
+ }
1951
+ break;
1952
+ }
1953
+ case numeric_check_state::fraction:
1954
+ {
1955
+ switch (*p)
1956
+ {
1957
+ case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9':
1958
+ ++precision;
1959
+ ++decimal_places;
1960
+ buffer.push_back(*p);
1961
+ break;
1962
+ case 'e':case 'E':
1963
+ buffer.push_back(*p);
1964
+ state = numeric_check_state::exp1;
1965
+ break;
1966
+ default:
1967
+ state = numeric_check_state::not_a_number;
1968
+ break;
1969
+ }
1970
+ break;
1971
+ }
1972
+ case numeric_check_state::exp1:
1973
+ {
1974
+ switch (*p)
1975
+ {
1976
+ case '-':
1977
+ buffer.push_back(*p);
1978
+ break;
1979
+ case '+':
1980
+ break;
1981
+ case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9':
1982
+ state = numeric_check_state::exp;
1983
+ buffer.push_back(*p);
1984
+ break;
1985
+ default:
1986
+ state = numeric_check_state::not_a_number;
1987
+ break;
1988
+ }
1989
+ break;
1990
+ }
1991
+ case numeric_check_state::exp:
1992
+ {
1993
+ switch (*p)
1994
+ {
1995
+ case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9':
1996
+ buffer.push_back(*p);
1997
+ break;
1998
+ default:
1999
+ state = numeric_check_state::not_a_number;
2000
+ break;
2001
+ }
2002
+ break;
2003
+ }
2004
+ default:
2005
+ break;
2006
+ }
2007
+ }
2008
+
2009
+ switch (state)
2010
+ {
2011
+ case numeric_check_state::null:
2012
+ more_ = visitor_->null_value(semantic_tag::none, *this, ec);
2013
+ break;
2014
+ case numeric_check_state::boolean_true:
2015
+ more_ = visitor_->bool_value(true, semantic_tag::none, *this, ec);
2016
+ break;
2017
+ case numeric_check_state::boolean_false:
2018
+ more_ = visitor_->bool_value(false, semantic_tag::none, *this, ec);
2019
+ break;
2020
+ case numeric_check_state::zero:
2021
+ case numeric_check_state::integer:
2022
+ {
2023
+ if (is_negative)
2024
+ {
2025
+ int64_t val{ 0 };
2026
+ auto result = jsoncons::detail::to_integer_decimal(buffer_.data(), buffer_.length(), val);
2027
+ if (result)
2028
+ {
2029
+ more_ = visitor_->int64_value(val, semantic_tag::none, *this, ec);
2030
+ }
2031
+ else // Must be overflow
2032
+ {
2033
+ more_ = visitor_->string_value(buffer_, semantic_tag::bigint, *this, ec);
2034
+ }
2035
+ }
2036
+ else
2037
+ {
2038
+ uint64_t val{ 0 };
2039
+ auto result = jsoncons::detail::to_integer_decimal(buffer_.data(), buffer_.length(), val);
2040
+ if (result)
2041
+ {
2042
+ more_ = visitor_->uint64_value(val, semantic_tag::none, *this, ec);
2043
+ }
2044
+ else if (result.ec == jsoncons::detail::to_integer_errc::overflow)
2045
+ {
2046
+ more_ = visitor_->string_value(buffer_, semantic_tag::bigint, *this, ec);
2047
+ }
2048
+ else
2049
+ {
2050
+ ec = result.ec;
2051
+ more_ = false;
2052
+ return;
2053
+ }
2054
+ }
2055
+ break;
2056
+ }
2057
+ case numeric_check_state::fraction:
2058
+ case numeric_check_state::exp:
2059
+ {
2060
+ if (options_.lossless_number())
2061
+ {
2062
+ more_ = visitor_->string_value(buffer_,semantic_tag::bigdec, *this, ec);
2063
+ }
2064
+ else
2065
+ {
2066
+ double d = to_double_(buffer.c_str(), buffer.length());
2067
+ more_ = visitor_->double_value(d, semantic_tag::none, *this, ec);
2068
+ }
2069
+ break;
2070
+ }
2071
+ default:
2072
+ {
2073
+ more_ = visitor_->string_value(buffer_, semantic_tag::none, *this, ec);
2074
+ break;
2075
+ }
2076
+ }
2077
+ }
2078
+
2079
+ void push_state(csv_parse_state state)
2080
+ {
2081
+ state_stack_.push_back(state);
2082
+ }
2083
+
2084
+ csv_parse_state pop_state()
2085
+ {
2086
+ JSONCONS_ASSERT(!state_stack_.empty())
2087
+ csv_parse_state state = state_stack_.back();
2088
+ state_stack_.pop_back();
2089
+ return state;
2090
+ }
2091
+ };
2092
+
2093
+ using csv_parser = basic_csv_parser<char>;
2094
+ using wcsv_parser = basic_csv_parser<wchar_t>;
2095
+
2096
+ }}
2097
+
2098
+ #endif
2099
+