jsoncons 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (155) hide show
  1. checksums.yaml +7 -0
  2. data/ext/jsoncons/extconf.rb +43 -0
  3. data/ext/jsoncons/jsoncons.cpp +161 -0
  4. data/ext/jsoncons/jsoncons.h +10 -0
  5. data/jsoncons.gemspec +44 -0
  6. data/lib/jsoncons/jsoncons/examples/input/address-book.json +13 -0
  7. data/lib/jsoncons/jsoncons/examples/input/books.json +28 -0
  8. data/lib/jsoncons/jsoncons/examples/input/countries.json +7 -0
  9. data/lib/jsoncons/jsoncons/examples/input/employees.json +30 -0
  10. data/lib/jsoncons/jsoncons/examples/input/jsonschema/name.json +15 -0
  11. data/lib/jsoncons/jsoncons/examples/input/multiple-json-objects.json +3 -0
  12. data/lib/jsoncons/jsoncons/examples/input/sales.csv +6 -0
  13. data/lib/jsoncons/jsoncons/examples/input/store.json +28 -0
  14. data/lib/jsoncons/jsoncons/examples/input/tasks.csv +6 -0
  15. data/lib/jsoncons/jsoncons/include/jsoncons/allocator_holder.hpp +38 -0
  16. data/lib/jsoncons/jsoncons/include/jsoncons/basic_json.hpp +5905 -0
  17. data/lib/jsoncons/jsoncons/include/jsoncons/bigint.hpp +1611 -0
  18. data/lib/jsoncons/jsoncons/include/jsoncons/byte_string.hpp +820 -0
  19. data/lib/jsoncons/jsoncons/include/jsoncons/config/binary_config.hpp +226 -0
  20. data/lib/jsoncons/jsoncons/include/jsoncons/config/compiler_support.hpp +375 -0
  21. data/lib/jsoncons/jsoncons/include/jsoncons/config/jsoncons_config.hpp +309 -0
  22. data/lib/jsoncons/jsoncons/include/jsoncons/config/version.hpp +40 -0
  23. data/lib/jsoncons/jsoncons/include/jsoncons/conv_error.hpp +218 -0
  24. data/lib/jsoncons/jsoncons/include/jsoncons/decode_json.hpp +209 -0
  25. data/lib/jsoncons/jsoncons/include/jsoncons/decode_traits.hpp +651 -0
  26. data/lib/jsoncons/jsoncons/include/jsoncons/detail/endian.hpp +44 -0
  27. data/lib/jsoncons/jsoncons/include/jsoncons/detail/grisu3.hpp +312 -0
  28. data/lib/jsoncons/jsoncons/include/jsoncons/detail/optional.hpp +483 -0
  29. data/lib/jsoncons/jsoncons/include/jsoncons/detail/parse_number.hpp +1133 -0
  30. data/lib/jsoncons/jsoncons/include/jsoncons/detail/span.hpp +188 -0
  31. data/lib/jsoncons/jsoncons/include/jsoncons/detail/string_view.hpp +537 -0
  32. data/lib/jsoncons/jsoncons/include/jsoncons/detail/string_wrapper.hpp +370 -0
  33. data/lib/jsoncons/jsoncons/include/jsoncons/detail/write_number.hpp +567 -0
  34. data/lib/jsoncons/jsoncons/include/jsoncons/encode_json.hpp +315 -0
  35. data/lib/jsoncons/jsoncons/include/jsoncons/encode_traits.hpp +378 -0
  36. data/lib/jsoncons/jsoncons/include/jsoncons/json.hpp +18 -0
  37. data/lib/jsoncons/jsoncons/include/jsoncons/json_array.hpp +324 -0
  38. data/lib/jsoncons/jsoncons/include/jsoncons/json_content_handler.hpp +12 -0
  39. data/lib/jsoncons/jsoncons/include/jsoncons/json_cursor.hpp +448 -0
  40. data/lib/jsoncons/jsoncons/include/jsoncons/json_decoder.hpp +420 -0
  41. data/lib/jsoncons/jsoncons/include/jsoncons/json_encoder.hpp +1587 -0
  42. data/lib/jsoncons/jsoncons/include/jsoncons/json_error.hpp +156 -0
  43. data/lib/jsoncons/jsoncons/include/jsoncons/json_exception.hpp +241 -0
  44. data/lib/jsoncons/jsoncons/include/jsoncons/json_filter.hpp +653 -0
  45. data/lib/jsoncons/jsoncons/include/jsoncons/json_fwd.hpp +23 -0
  46. data/lib/jsoncons/jsoncons/include/jsoncons/json_object.hpp +1772 -0
  47. data/lib/jsoncons/jsoncons/include/jsoncons/json_options.hpp +862 -0
  48. data/lib/jsoncons/jsoncons/include/jsoncons/json_parser.hpp +2900 -0
  49. data/lib/jsoncons/jsoncons/include/jsoncons/json_reader.hpp +731 -0
  50. data/lib/jsoncons/jsoncons/include/jsoncons/json_traits_macros.hpp +1072 -0
  51. data/lib/jsoncons/jsoncons/include/jsoncons/json_traits_macros_deprecated.hpp +144 -0
  52. data/lib/jsoncons/jsoncons/include/jsoncons/json_type.hpp +206 -0
  53. data/lib/jsoncons/jsoncons/include/jsoncons/json_type_traits.hpp +1830 -0
  54. data/lib/jsoncons/jsoncons/include/jsoncons/json_visitor.hpp +1560 -0
  55. data/lib/jsoncons/jsoncons/include/jsoncons/json_visitor2.hpp +2079 -0
  56. data/lib/jsoncons/jsoncons/include/jsoncons/pretty_print.hpp +89 -0
  57. data/lib/jsoncons/jsoncons/include/jsoncons/ser_context.hpp +62 -0
  58. data/lib/jsoncons/jsoncons/include/jsoncons/sink.hpp +289 -0
  59. data/lib/jsoncons/jsoncons/include/jsoncons/source.hpp +777 -0
  60. data/lib/jsoncons/jsoncons/include/jsoncons/source_adaptor.hpp +148 -0
  61. data/lib/jsoncons/jsoncons/include/jsoncons/staj2_cursor.hpp +1189 -0
  62. data/lib/jsoncons/jsoncons/include/jsoncons/staj_cursor.hpp +1254 -0
  63. data/lib/jsoncons/jsoncons/include/jsoncons/staj_iterator.hpp +449 -0
  64. data/lib/jsoncons/jsoncons/include/jsoncons/tag_type.hpp +245 -0
  65. data/lib/jsoncons/jsoncons/include/jsoncons/text_source_adaptor.hpp +144 -0
  66. data/lib/jsoncons/jsoncons/include/jsoncons/traits_extension.hpp +884 -0
  67. data/lib/jsoncons/jsoncons/include/jsoncons/typed_array_view.hpp +250 -0
  68. data/lib/jsoncons/jsoncons/include/jsoncons/unicode_traits.hpp +1330 -0
  69. data/lib/jsoncons/jsoncons/include/jsoncons/uri.hpp +635 -0
  70. data/lib/jsoncons/jsoncons/include/jsoncons/value_converter.hpp +340 -0
  71. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson.hpp +23 -0
  72. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson_cursor.hpp +320 -0
  73. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson_decimal128.hpp +865 -0
  74. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson_encoder.hpp +585 -0
  75. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson_error.hpp +103 -0
  76. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson_oid.hpp +245 -0
  77. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson_options.hpp +75 -0
  78. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson_parser.hpp +645 -0
  79. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson_reader.hpp +92 -0
  80. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/bson_type.hpp +44 -0
  81. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/decode_bson.hpp +201 -0
  82. data/lib/jsoncons/jsoncons/include/jsoncons_ext/bson/encode_bson.hpp +144 -0
  83. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/cbor.hpp +26 -0
  84. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/cbor_cursor.hpp +351 -0
  85. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/cbor_cursor2.hpp +265 -0
  86. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/cbor_detail.hpp +93 -0
  87. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/cbor_encoder.hpp +1766 -0
  88. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/cbor_error.hpp +105 -0
  89. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/cbor_options.hpp +113 -0
  90. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/cbor_parser.hpp +1942 -0
  91. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/cbor_reader.hpp +116 -0
  92. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/decode_cbor.hpp +203 -0
  93. data/lib/jsoncons/jsoncons/include/jsoncons_ext/cbor/encode_cbor.hpp +151 -0
  94. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/csv.hpp +17 -0
  95. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/csv_cursor.hpp +358 -0
  96. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/csv_encoder.hpp +954 -0
  97. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/csv_error.hpp +85 -0
  98. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/csv_options.hpp +973 -0
  99. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/csv_parser.hpp +2099 -0
  100. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/csv_reader.hpp +348 -0
  101. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/csv_serializer.hpp +12 -0
  102. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/decode_csv.hpp +208 -0
  103. data/lib/jsoncons/jsoncons/include/jsoncons_ext/csv/encode_csv.hpp +122 -0
  104. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jmespath/jmespath.hpp +5215 -0
  105. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jmespath/jmespath_error.hpp +215 -0
  106. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpatch/jsonpatch.hpp +579 -0
  107. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpatch/jsonpatch_error.hpp +121 -0
  108. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpath/expression.hpp +3329 -0
  109. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpath/flatten.hpp +432 -0
  110. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpath/json_location.hpp +445 -0
  111. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpath/json_query.hpp +115 -0
  112. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpath/jsonpath.hpp +13 -0
  113. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpath/jsonpath_error.hpp +240 -0
  114. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpath/jsonpath_expression.hpp +2612 -0
  115. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpath/jsonpath_selector.hpp +1322 -0
  116. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpointer/jsonpointer.hpp +1577 -0
  117. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonpointer/jsonpointer_error.hpp +119 -0
  118. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/format_validator.hpp +968 -0
  119. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/json_validator.hpp +120 -0
  120. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/jsonschema.hpp +13 -0
  121. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/jsonschema_error.hpp +105 -0
  122. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/jsonschema_version.hpp +18 -0
  123. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/keyword_validator.hpp +1745 -0
  124. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/keyword_validator_factory.hpp +556 -0
  125. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/schema_draft7.hpp +198 -0
  126. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/schema_location.hpp +200 -0
  127. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/schema_version.hpp +35 -0
  128. data/lib/jsoncons/jsoncons/include/jsoncons_ext/jsonschema/subschema.hpp +144 -0
  129. data/lib/jsoncons/jsoncons/include/jsoncons_ext/mergepatch/mergepatch.hpp +103 -0
  130. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/decode_msgpack.hpp +202 -0
  131. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/encode_msgpack.hpp +142 -0
  132. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/msgpack.hpp +24 -0
  133. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/msgpack_cursor.hpp +343 -0
  134. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/msgpack_cursor2.hpp +259 -0
  135. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/msgpack_encoder.hpp +753 -0
  136. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/msgpack_error.hpp +94 -0
  137. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/msgpack_options.hpp +74 -0
  138. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/msgpack_parser.hpp +748 -0
  139. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/msgpack_reader.hpp +116 -0
  140. data/lib/jsoncons/jsoncons/include/jsoncons_ext/msgpack/msgpack_type.hpp +63 -0
  141. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/decode_ubjson.hpp +201 -0
  142. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/encode_ubjson.hpp +142 -0
  143. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/ubjson.hpp +23 -0
  144. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/ubjson_cursor.hpp +307 -0
  145. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/ubjson_encoder.hpp +502 -0
  146. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/ubjson_error.hpp +100 -0
  147. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/ubjson_options.hpp +87 -0
  148. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/ubjson_parser.hpp +880 -0
  149. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/ubjson_reader.hpp +92 -0
  150. data/lib/jsoncons/jsoncons/include/jsoncons_ext/ubjson/ubjson_type.hpp +43 -0
  151. data/lib/jsoncons/version.rb +5 -0
  152. data/lib/jsoncons.rb +33 -0
  153. data/test/jsoncons_test.rb +108 -0
  154. data/test/test_helper.rb +7 -0
  155. metadata +268 -0
@@ -0,0 +1,2099 @@
1
+ // Copyright 2015 Daniel Parker
2
+ // Distributed under the Boost license, Version 1.0.
3
+ // (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
4
+
5
+ // See https://github.com/danielaparker/jsoncons for latest version
6
+
7
+ #ifndef JSONCONS_CSV_CSV_PARSER_HPP
8
+ #define JSONCONS_CSV_CSV_PARSER_HPP
9
+
10
+ #include <memory> // std::allocator
11
+ #include <string>
12
+ #include <sstream>
13
+ #include <vector>
14
+ #include <stdexcept>
15
+ #include <system_error>
16
+ #include <cctype>
17
+ #include <jsoncons/json_exception.hpp>
18
+ #include <jsoncons/json_visitor.hpp>
19
+ #include <jsoncons/json_reader.hpp>
20
+ #include <jsoncons/json_filter.hpp>
21
+ #include <jsoncons/json.hpp>
22
+ #include <jsoncons/detail/parse_number.hpp>
23
+ #include <jsoncons_ext/csv/csv_error.hpp>
24
+ #include <jsoncons_ext/csv/csv_options.hpp>
25
+
26
+ namespace jsoncons { namespace csv {
27
+
28
+ enum class csv_mode
29
+ {
30
+ initial,
31
+ header,
32
+ data,
33
+ subfields
34
+ };
35
+
36
+ enum class csv_parse_state
37
+ {
38
+ start,
39
+ cr,
40
+ column_labels,
41
+ expect_comment_or_record,
42
+ expect_record,
43
+ end_record,
44
+ no_more_records,
45
+ comment,
46
+ between_values,
47
+ quoted_string,
48
+ unquoted_string,
49
+ before_unquoted_string,
50
+ escaped_value,
51
+ minus,
52
+ zero,
53
+ integer,
54
+ fraction,
55
+ exp1,
56
+ exp2,
57
+ exp3,
58
+ accept,
59
+ before_unquoted_field,
60
+ before_unquoted_field_tail,
61
+ before_unquoted_field_tail1,
62
+ before_last_unquoted_field,
63
+ before_last_unquoted_field_tail,
64
+ before_unquoted_subfield,
65
+ before_unquoted_subfield_tail,
66
+ before_quoted_subfield,
67
+ before_quoted_subfield_tail,
68
+ before_quoted_field,
69
+ before_quoted_field_tail,
70
+ before_last_quoted_field,
71
+ before_last_quoted_field_tail,
72
+ done
73
+ };
74
+
75
+ enum class cached_state
76
+ {
77
+ begin_object,
78
+ end_object,
79
+ begin_array,
80
+ end_array,
81
+ name,
82
+ item,
83
+ done
84
+ };
85
+
86
+ struct default_csv_parsing
87
+ {
88
+ bool operator()(csv_errc, const ser_context&) noexcept
89
+ {
90
+ return false;
91
+ }
92
+ };
93
+
94
+ namespace detail {
95
+
96
+ template <class CharT,class TempAllocator>
97
+ class parse_event
98
+ {
99
+ using temp_allocator_type = TempAllocator;
100
+ using string_view_type = typename basic_json_visitor<CharT>::string_view_type;
101
+ using char_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<CharT>;
102
+ using byte_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<uint8_t>;
103
+ using string_type = std::basic_string<CharT,std::char_traits<CharT>,char_allocator_type>;
104
+ using byte_string_type = basic_byte_string<byte_allocator_type>;
105
+
106
+ staj_event_type event_type;
107
+ string_type string_value;
108
+ byte_string_type byte_string_value;
109
+ union
110
+ {
111
+ bool bool_value;
112
+ int64_t int64_value;
113
+ uint64_t uint64_value;
114
+ double double_value;
115
+ };
116
+ semantic_tag tag;
117
+ public:
118
+ parse_event(staj_event_type event_type, semantic_tag tag, const TempAllocator& alloc)
119
+ : event_type(event_type),
120
+ string_value(alloc),
121
+ byte_string_value(alloc),
122
+ tag(tag)
123
+ {
124
+ }
125
+
126
+ parse_event(const string_view_type& value, semantic_tag tag, const TempAllocator& alloc)
127
+ : event_type(staj_event_type::string_value),
128
+ string_value(value.data(),value.length(),alloc),
129
+ byte_string_value(alloc),
130
+ tag(tag)
131
+ {
132
+ }
133
+
134
+ parse_event(const byte_string_view& value, semantic_tag tag, const TempAllocator& alloc)
135
+ : event_type(staj_event_type::byte_string_value),
136
+ string_value(alloc),
137
+ byte_string_value(value.data(),value.size(),alloc),
138
+ tag(tag)
139
+ {
140
+ }
141
+
142
+ parse_event(bool value, semantic_tag tag, const TempAllocator& alloc)
143
+ : event_type(staj_event_type::bool_value),
144
+ string_value(alloc),
145
+ byte_string_value(alloc),
146
+ bool_value(value),
147
+ tag(tag)
148
+ {
149
+ }
150
+
151
+ parse_event(int64_t value, semantic_tag tag, const TempAllocator& alloc)
152
+ : event_type(staj_event_type::int64_value),
153
+ string_value(alloc),
154
+ byte_string_value(alloc),
155
+ int64_value(value),
156
+ tag(tag)
157
+ {
158
+ }
159
+
160
+ parse_event(uint64_t value, semantic_tag tag, const TempAllocator& alloc)
161
+ : event_type(staj_event_type::uint64_value),
162
+ string_value(alloc),
163
+ byte_string_value(alloc),
164
+ uint64_value(value),
165
+ tag(tag)
166
+ {
167
+ }
168
+
169
+ parse_event(double value, semantic_tag tag, const TempAllocator& alloc)
170
+ : event_type(staj_event_type::double_value),
171
+ string_value(alloc),
172
+ byte_string_value(alloc),
173
+ double_value(value),
174
+ tag(tag)
175
+ {
176
+ }
177
+
178
+ parse_event(const parse_event&) = default;
179
+ parse_event(parse_event&&) = default;
180
+ parse_event& operator=(const parse_event&) = default;
181
+ parse_event& operator=(parse_event&&) = default;
182
+
183
+ bool replay(basic_json_visitor<CharT>& visitor) const
184
+ {
185
+ switch (event_type)
186
+ {
187
+ case staj_event_type::begin_array:
188
+ return visitor.begin_array(tag, ser_context());
189
+ case staj_event_type::end_array:
190
+ return visitor.end_array(ser_context());
191
+ case staj_event_type::string_value:
192
+ return visitor.string_value(string_value, tag, ser_context());
193
+ case staj_event_type::byte_string_value:
194
+ case staj_event_type::null_value:
195
+ return visitor.null_value(tag, ser_context());
196
+ case staj_event_type::bool_value:
197
+ return visitor.bool_value(bool_value, tag, ser_context());
198
+ case staj_event_type::int64_value:
199
+ return visitor.int64_value(int64_value, tag, ser_context());
200
+ case staj_event_type::uint64_value:
201
+ return visitor.uint64_value(uint64_value, tag, ser_context());
202
+ case staj_event_type::double_value:
203
+ return visitor.double_value(double_value, tag, ser_context());
204
+ default:
205
+ return false;
206
+ }
207
+ }
208
+ };
209
+
210
+ template <class CharT, class TempAllocator>
211
+ class m_columns_filter : public basic_json_visitor<CharT>
212
+ {
213
+ public:
214
+ using string_view_type = typename basic_json_visitor<CharT>::string_view_type;
215
+ using char_type = CharT;
216
+ using temp_allocator_type = TempAllocator;
217
+
218
+ using char_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<CharT>;
219
+ using string_type = std::basic_string<CharT,std::char_traits<CharT>,char_allocator_type>;
220
+
221
+ using string_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<string_type>;
222
+ using parse_event_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<parse_event<CharT,TempAllocator>>;
223
+ using parse_event_vector_type = std::vector<parse_event<CharT,TempAllocator>, parse_event_allocator_type>;
224
+ using parse_event_vector_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<parse_event_vector_type>;
225
+ private:
226
+ TempAllocator alloc_;
227
+ std::size_t name_index_;
228
+ int level_;
229
+ cached_state state_;
230
+ std::size_t column_index_;
231
+ std::size_t row_index_;
232
+
233
+ std::vector<string_type, string_allocator_type> column_names_;
234
+ std::vector<parse_event_vector_type,parse_event_vector_allocator_type> cached_events_;
235
+ public:
236
+
237
+ m_columns_filter(const TempAllocator& alloc)
238
+ : alloc_(alloc),
239
+ name_index_(0),
240
+ level_(0),
241
+ state_(cached_state::begin_object),
242
+ column_index_(0),
243
+ row_index_(0),
244
+ column_names_(alloc),
245
+ cached_events_(alloc)
246
+ {
247
+ }
248
+
249
+ void reset()
250
+ {
251
+ name_index_ = 0;
252
+ level_ = 0;
253
+ state_ = cached_state::begin_object;
254
+ column_index_ = 0;
255
+ row_index_ = 0;
256
+ column_names_.clear();
257
+ cached_events_.clear();
258
+ }
259
+
260
+ bool done() const
261
+ {
262
+ return state_ == cached_state::done;
263
+ }
264
+
265
+ void initialize(const std::vector<string_type, string_allocator_type>& column_names)
266
+ {
267
+ for (const auto& name : column_names)
268
+ {
269
+ column_names_.push_back(name);
270
+ cached_events_.emplace_back(alloc_);
271
+ }
272
+ name_index_ = 0;
273
+ level_ = 0;
274
+ column_index_ = 0;
275
+ row_index_ = 0;
276
+ state_ = cached_state::begin_object;
277
+ }
278
+
279
+ void skip_column()
280
+ {
281
+ ++name_index_;
282
+ }
283
+
284
+ bool replay_parse_events(basic_json_visitor<CharT>& visitor)
285
+ {
286
+ bool more = true;
287
+ while (more)
288
+ {
289
+ switch (state_)
290
+ {
291
+ case cached_state::begin_object:
292
+ more = visitor.begin_object(semantic_tag::none, ser_context());
293
+ column_index_ = 0;
294
+ state_ = cached_state::name;
295
+ break;
296
+ case cached_state::end_object:
297
+ more = visitor.end_object(ser_context());
298
+ state_ = cached_state::done;
299
+ break;
300
+ case cached_state::name:
301
+ if (column_index_ < column_names_.size())
302
+ {
303
+ more = visitor.key(column_names_[column_index_], ser_context());
304
+ state_ = cached_state::begin_array;
305
+ }
306
+ else
307
+ {
308
+ state_ = cached_state::end_object;
309
+ }
310
+ break;
311
+ case cached_state::begin_array:
312
+ more = visitor.begin_array(semantic_tag::none, ser_context());
313
+ row_index_ = 0;
314
+ state_ = cached_state::item;
315
+ break;
316
+ case cached_state::end_array:
317
+ more = visitor.end_array(ser_context());
318
+ ++column_index_;
319
+ state_ = cached_state::name;
320
+ break;
321
+ case cached_state::item:
322
+ if (row_index_ < cached_events_[column_index_].size())
323
+ {
324
+ more = cached_events_[column_index_][row_index_].replay(visitor);
325
+ ++row_index_;
326
+ }
327
+ else
328
+ {
329
+ state_ = cached_state::end_array;
330
+ }
331
+ break;
332
+ default:
333
+ more = false;
334
+ break;
335
+ }
336
+ }
337
+ return more;
338
+ }
339
+
340
+ void visit_flush() override
341
+ {
342
+ }
343
+
344
+ bool visit_begin_object(semantic_tag, const ser_context&, std::error_code& ec) override
345
+ {
346
+ ec = csv_errc::invalid_parse_state;
347
+ return false;
348
+ }
349
+
350
+ bool visit_end_object(const ser_context&, std::error_code& ec) override
351
+ {
352
+ ec = csv_errc::invalid_parse_state;
353
+ return false;
354
+ }
355
+
356
+ bool visit_begin_array(semantic_tag tag, const ser_context&, std::error_code&) override
357
+ {
358
+ if (name_index_ < column_names_.size())
359
+ {
360
+ cached_events_[name_index_].emplace_back(staj_event_type::begin_array, tag, alloc_);
361
+
362
+ ++level_;
363
+ }
364
+ return true;
365
+ }
366
+
367
+ bool visit_end_array(const ser_context&, std::error_code&) override
368
+ {
369
+ if (level_ > 0)
370
+ {
371
+ cached_events_[name_index_].emplace_back(staj_event_type::end_array, semantic_tag::none, alloc_);
372
+ ++name_index_;
373
+ --level_;
374
+ }
375
+ else
376
+ {
377
+ name_index_ = 0;
378
+ }
379
+ return true;
380
+ }
381
+
382
+ bool visit_key(const string_view_type&, const ser_context&, std::error_code& ec) override
383
+ {
384
+ ec = csv_errc::invalid_parse_state;
385
+ return false;
386
+ }
387
+
388
+ bool visit_null(semantic_tag tag, const ser_context&, std::error_code&) override
389
+ {
390
+ if (name_index_ < column_names_.size())
391
+ {
392
+ cached_events_[name_index_].emplace_back(staj_event_type::null_value, tag, alloc_);
393
+ if (level_ == 0)
394
+ {
395
+ ++name_index_;
396
+ }
397
+ }
398
+ return true;
399
+ }
400
+
401
+ bool visit_string(const string_view_type& value, semantic_tag tag, const ser_context&, std::error_code&) override
402
+ {
403
+ if (name_index_ < column_names_.size())
404
+ {
405
+ cached_events_[name_index_].emplace_back(value, tag, alloc_);
406
+
407
+ if (level_ == 0)
408
+ {
409
+ ++name_index_;
410
+ }
411
+ }
412
+ return true;
413
+ }
414
+
415
+ bool visit_byte_string(const byte_string_view& value,
416
+ semantic_tag tag,
417
+ const ser_context&,
418
+ std::error_code&) override
419
+ {
420
+ if (name_index_ < column_names_.size())
421
+ {
422
+ cached_events_[name_index_].emplace_back(value, tag, alloc_);
423
+ if (level_ == 0)
424
+ {
425
+ ++name_index_;
426
+ }
427
+ }
428
+ return true;
429
+ }
430
+
431
+ bool visit_double(double value,
432
+ semantic_tag tag,
433
+ const ser_context&,
434
+ std::error_code&) override
435
+ {
436
+ if (name_index_ < column_names_.size())
437
+ {
438
+ cached_events_[name_index_].emplace_back(value, tag, alloc_);
439
+ if (level_ == 0)
440
+ {
441
+ ++name_index_;
442
+ }
443
+ }
444
+ return true;
445
+ }
446
+
447
+ bool visit_int64(int64_t value,
448
+ semantic_tag tag,
449
+ const ser_context&,
450
+ std::error_code&) override
451
+ {
452
+ if (name_index_ < column_names_.size())
453
+ {
454
+ cached_events_[name_index_].emplace_back(value, tag, alloc_);
455
+ if (level_ == 0)
456
+ {
457
+ ++name_index_;
458
+ }
459
+ }
460
+ return true;
461
+ }
462
+
463
+ bool visit_uint64(uint64_t value,
464
+ semantic_tag tag,
465
+ const ser_context&,
466
+ std::error_code&) override
467
+ {
468
+ if (name_index_ < column_names_.size())
469
+ {
470
+ cached_events_[name_index_].emplace_back(value, tag, alloc_);
471
+ if (level_ == 0)
472
+ {
473
+ ++name_index_;
474
+ }
475
+ }
476
+ return true;
477
+ }
478
+
479
+ bool visit_bool(bool value, semantic_tag tag, const ser_context&, std::error_code&) override
480
+ {
481
+ if (name_index_ < column_names_.size())
482
+ {
483
+ cached_events_[name_index_].emplace_back(value, tag, alloc_);
484
+ if (level_ == 0)
485
+ {
486
+ ++name_index_;
487
+ }
488
+ }
489
+ return true;
490
+ }
491
+ };
492
+
493
+ } // namespace detail
494
+
495
+ template<class CharT,class TempAllocator=std::allocator<char>>
496
+ class basic_csv_parser : public ser_context
497
+ {
498
+ public:
499
+ using string_view_type = jsoncons::basic_string_view<CharT>;
500
+ using char_type = CharT;
501
+ private:
502
+ struct string_maps_to_double
503
+ {
504
+ string_view_type s;
505
+
506
+ bool operator()(const std::pair<string_view_type,double>& val) const
507
+ {
508
+ return val.first == s;
509
+ }
510
+ };
511
+
512
+ using temp_allocator_type = TempAllocator;
513
+ typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<CharT> char_allocator_type;
514
+ using string_type = std::basic_string<CharT,std::char_traits<CharT>,char_allocator_type>;
515
+ typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<string_type> string_allocator_type;
516
+ typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<csv_mode> csv_mode_allocator_type;
517
+ typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<csv_type_info> csv_type_info_allocator_type;
518
+ typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<std::vector<string_type,string_allocator_type>> string_vector_allocator_type;
519
+ typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<csv_parse_state> csv_parse_state_allocator_type;
520
+
521
+ static constexpr int default_depth = 3;
522
+
523
+ temp_allocator_type alloc_;
524
+ csv_parse_state state_;
525
+ basic_json_visitor<CharT>* visitor_;
526
+ std::function<bool(csv_errc,const ser_context&)> err_handler_;
527
+ std::size_t column_;
528
+ std::size_t line_;
529
+ int depth_;
530
+ const basic_csv_decode_options<CharT> options_;
531
+ std::size_t column_index_;
532
+ std::size_t level_;
533
+ std::size_t offset_;
534
+ jsoncons::detail::chars_to to_double_;
535
+ const CharT* begin_input_;
536
+ const CharT* input_end_;
537
+ const CharT* input_ptr_;
538
+ bool more_;
539
+ std::size_t header_line_;
540
+
541
+ detail::m_columns_filter<CharT,TempAllocator> m_columns_filter_;
542
+ std::vector<csv_mode,csv_mode_allocator_type> stack_;
543
+ std::vector<string_type,string_allocator_type> column_names_;
544
+ std::vector<csv_type_info,csv_type_info_allocator_type> column_types_;
545
+ std::vector<string_type,string_allocator_type> column_defaults_;
546
+ std::vector<csv_parse_state,csv_parse_state_allocator_type> state_stack_;
547
+ string_type buffer_;
548
+ std::vector<std::pair<string_view_type,double>> string_double_map_;
549
+
550
+ public:
551
+ basic_csv_parser(const TempAllocator& alloc = TempAllocator())
552
+ : basic_csv_parser(basic_csv_decode_options<CharT>(),
553
+ default_csv_parsing(),
554
+ alloc)
555
+ {
556
+ }
557
+
558
+ basic_csv_parser(const basic_csv_decode_options<CharT>& options,
559
+ const TempAllocator& alloc = TempAllocator())
560
+ : basic_csv_parser(options,
561
+ default_csv_parsing(),
562
+ alloc)
563
+ {
564
+ }
565
+
566
+ basic_csv_parser(std::function<bool(csv_errc,const ser_context&)> err_handler,
567
+ const TempAllocator& alloc = TempAllocator())
568
+ : basic_csv_parser(basic_csv_decode_options<CharT>(),
569
+ err_handler,
570
+ alloc)
571
+ {
572
+ }
573
+
574
+ basic_csv_parser(const basic_csv_decode_options<CharT>& options,
575
+ std::function<bool(csv_errc,const ser_context&)> err_handler,
576
+ const TempAllocator& alloc = TempAllocator())
577
+ : alloc_(alloc),
578
+ state_(csv_parse_state::start),
579
+ visitor_(nullptr),
580
+ err_handler_(err_handler),
581
+ column_(1),
582
+ line_(1),
583
+ depth_(default_depth),
584
+ options_(options),
585
+ column_index_(0),
586
+ level_(0),
587
+ offset_(0),
588
+ begin_input_(nullptr),
589
+ input_end_(nullptr),
590
+ input_ptr_(nullptr),
591
+ more_(true),
592
+ header_line_(1),
593
+ m_columns_filter_(alloc),
594
+ stack_(alloc),
595
+ column_names_(alloc),
596
+ column_types_(alloc),
597
+ column_defaults_(alloc),
598
+ state_stack_(alloc),
599
+ buffer_(alloc)
600
+ {
601
+ if (options_.enable_str_to_nan())
602
+ {
603
+ string_double_map_.emplace_back(options_.nan_to_str(),std::nan(""));
604
+ }
605
+ if (options_.enable_str_to_inf())
606
+ {
607
+ string_double_map_.emplace_back(options_.inf_to_str(),std::numeric_limits<double>::infinity());
608
+ }
609
+ if (options_.enable_str_to_neginf())
610
+ {
611
+ string_double_map_.emplace_back(options_.neginf_to_str(),-std::numeric_limits<double>::infinity());
612
+ }
613
+
614
+ initialize();
615
+ }
616
+
617
+ ~basic_csv_parser() noexcept
618
+ {
619
+ }
620
+
621
+ bool done() const
622
+ {
623
+ return state_ == csv_parse_state::done;
624
+ }
625
+
626
+ bool accept() const
627
+ {
628
+ return state_ == csv_parse_state::accept || state_ == csv_parse_state::done;
629
+ }
630
+
631
+ bool stopped() const
632
+ {
633
+ return !more_;
634
+ }
635
+
636
+ bool source_exhausted() const
637
+ {
638
+ return input_ptr_ == input_end_;
639
+ }
640
+
641
+ const std::vector<string_type,string_allocator_type>& column_labels() const
642
+ {
643
+ return column_names_;
644
+ }
645
+
646
+ void reinitialize()
647
+ {
648
+ state_ = csv_parse_state::start;
649
+ visitor_ = nullptr;
650
+ column_ = 1;
651
+ line_ = 1;
652
+ depth_ = default_depth;
653
+ column_index_ = 0;
654
+ level_ = 0;
655
+ offset_ = 0;
656
+ begin_input_ = nullptr;
657
+ input_end_ = nullptr;
658
+ input_ptr_ = nullptr;
659
+ more_ = true;
660
+ header_line_ = 1;
661
+ m_columns_filter_.reset();
662
+ stack_.clear();
663
+ column_names_.clear();
664
+ column_types_.clear();
665
+ column_defaults_.clear();
666
+ state_stack_.clear();
667
+ buffer_.clear();
668
+
669
+ initialize();
670
+ }
671
+
672
+ void restart()
673
+ {
674
+ more_ = true;
675
+ }
676
+
677
+ void parse_some(basic_json_visitor<CharT>& visitor)
678
+ {
679
+ std::error_code ec;
680
+ parse_some(visitor,ec);
681
+ if (ec)
682
+ {
683
+ JSONCONS_THROW(ser_error(ec,line_,column_));
684
+ }
685
+ }
686
+
687
+ void parse_some(basic_json_visitor<CharT>& visitor, std::error_code& ec)
688
+ {
689
+ switch (options_.mapping_kind())
690
+ {
691
+ case csv_mapping_kind::m_columns:
692
+ visitor_ = &m_columns_filter_;
693
+ break;
694
+ default:
695
+ visitor_ = std::addressof(visitor);
696
+ break;
697
+ }
698
+
699
+ const CharT* local_input_end = input_end_;
700
+
701
+ if (input_ptr_ == local_input_end && more_)
702
+ {
703
+ switch (state_)
704
+ {
705
+ case csv_parse_state::start:
706
+ ec = csv_errc::source_error;
707
+ more_ = false;
708
+ return;
709
+ case csv_parse_state::before_unquoted_field:
710
+ case csv_parse_state::before_last_unquoted_field:
711
+ end_unquoted_string_value(ec);
712
+ state_ = csv_parse_state::before_last_unquoted_field_tail;
713
+ break;
714
+ case csv_parse_state::before_last_unquoted_field_tail:
715
+ if (stack_.back() == csv_mode::subfields)
716
+ {
717
+ stack_.pop_back();
718
+ more_ = visitor_->end_array(*this, ec);
719
+ }
720
+ ++column_index_;
721
+ state_ = csv_parse_state::end_record;
722
+ break;
723
+ case csv_parse_state::before_unquoted_string:
724
+ buffer_.clear();
725
+ JSONCONS_FALLTHROUGH;
726
+ case csv_parse_state::unquoted_string:
727
+ if (options_.trim_leading() || options_.trim_trailing())
728
+ {
729
+ trim_string_buffer(options_.trim_leading(),options_.trim_trailing());
730
+ }
731
+ if (options_.ignore_empty_values() && buffer_.empty())
732
+ {
733
+ state_ = csv_parse_state::end_record;
734
+ }
735
+ else
736
+ {
737
+ before_value(ec);
738
+ state_ = csv_parse_state::before_unquoted_field;
739
+ }
740
+ break;
741
+ case csv_parse_state::before_last_quoted_field:
742
+ end_quoted_string_value(ec);
743
+ ++column_index_;
744
+ state_ = csv_parse_state::end_record;
745
+ break;
746
+ case csv_parse_state::escaped_value:
747
+ if (options_.quote_escape_char() == options_.quote_char())
748
+ {
749
+ if (!(options_.ignore_empty_values() && buffer_.empty()))
750
+ {
751
+ before_value(ec);
752
+ ++column_;
753
+ state_ = csv_parse_state::before_last_quoted_field;
754
+ }
755
+ else
756
+ {
757
+ state_ = csv_parse_state::end_record;
758
+ }
759
+ }
760
+ else
761
+ {
762
+ ec = csv_errc::invalid_escaped_char;
763
+ more_ = false;
764
+ return;
765
+ }
766
+ break;
767
+ case csv_parse_state::end_record:
768
+ if (column_index_ > 0)
769
+ {
770
+ after_record(ec);
771
+ }
772
+ state_ = csv_parse_state::no_more_records;
773
+ break;
774
+ case csv_parse_state::no_more_records:
775
+ switch (stack_.back())
776
+ {
777
+ case csv_mode::header:
778
+ stack_.pop_back();
779
+ break;
780
+ case csv_mode::data:
781
+ stack_.pop_back();
782
+ break;
783
+ default:
784
+ break;
785
+ }
786
+ more_ = visitor_->end_array(*this, ec);
787
+ if (options_.mapping_kind() == csv_mapping_kind::m_columns)
788
+ {
789
+ if (!m_columns_filter_.done())
790
+ {
791
+ more_ = m_columns_filter_.replay_parse_events(visitor);
792
+ }
793
+ else
794
+ {
795
+ state_ = csv_parse_state::accept;
796
+ }
797
+ }
798
+ else
799
+ {
800
+ state_ = csv_parse_state::accept;
801
+ }
802
+ break;
803
+ case csv_parse_state::accept:
804
+ if (!(stack_.size() == 1 && stack_.back() == csv_mode::initial))
805
+ {
806
+ err_handler_(csv_errc::unexpected_eof, *this);
807
+ ec = csv_errc::unexpected_eof;
808
+ more_ = false;
809
+ return;
810
+ }
811
+ stack_.pop_back();
812
+ visitor_->flush();
813
+ state_ = csv_parse_state::done;
814
+ more_ = false;
815
+ return;
816
+ default:
817
+ state_ = csv_parse_state::end_record;
818
+ break;
819
+ }
820
+ }
821
+
822
+ for (; (input_ptr_ < local_input_end) && more_;)
823
+ {
824
+ CharT curr_char = *input_ptr_;
825
+
826
+ switch (state_)
827
+ {
828
+ case csv_parse_state::cr:
829
+ ++line_;
830
+ column_ = 1;
831
+ switch (*input_ptr_)
832
+ {
833
+ case '\n':
834
+ ++input_ptr_;
835
+ state_ = pop_state();
836
+ break;
837
+ default:
838
+ state_ = pop_state();
839
+ break;
840
+ }
841
+ break;
842
+ case csv_parse_state::start:
843
+ if (options_.mapping_kind() != csv_mapping_kind::m_columns)
844
+ {
845
+ more_ = visitor_->begin_array(semantic_tag::none, *this, ec);
846
+ }
847
+ if (options_.assume_header() && options_.mapping_kind() == csv_mapping_kind::n_rows && options_.column_names().size() > 0)
848
+ {
849
+ column_index_ = 0;
850
+ state_ = csv_parse_state::column_labels;
851
+ more_ = visitor_->begin_array(semantic_tag::none, *this, ec);
852
+ state_ = csv_parse_state::expect_comment_or_record;
853
+ }
854
+ else
855
+ {
856
+ state_ = csv_parse_state::expect_comment_or_record;
857
+ }
858
+ break;
859
+ case csv_parse_state::column_labels:
860
+ if (column_index_ < column_names_.size())
861
+ {
862
+ more_ = visitor_->string_value(column_names_[column_index_], semantic_tag::none, *this, ec);
863
+ ++column_index_;
864
+ }
865
+ else
866
+ {
867
+ more_ = visitor_->end_array(*this, ec);
868
+ state_ = csv_parse_state::expect_comment_or_record;
869
+ //stack_.back() = csv_mode::data;
870
+ column_index_ = 0;
871
+ }
872
+ break;
873
+ case csv_parse_state::comment:
874
+ switch (curr_char)
875
+ {
876
+ case '\n':
877
+ {
878
+ ++line_;
879
+ if (stack_.back() == csv_mode::header)
880
+ {
881
+ ++header_line_;
882
+ }
883
+ column_ = 1;
884
+ state_ = csv_parse_state::expect_comment_or_record;
885
+ break;
886
+ }
887
+ case '\r':
888
+ ++line_;
889
+ if (stack_.back() == csv_mode::header)
890
+ {
891
+ ++header_line_;
892
+ }
893
+ column_ = 1;
894
+ state_ = csv_parse_state::expect_comment_or_record;
895
+ push_state(state_);
896
+ state_ = csv_parse_state::cr;
897
+ break;
898
+ default:
899
+ ++column_;
900
+ break;
901
+ }
902
+ ++input_ptr_;
903
+ break;
904
+
905
+ case csv_parse_state::expect_comment_or_record:
906
+ buffer_.clear();
907
+ if (curr_char == options_.comment_starter())
908
+ {
909
+ state_ = csv_parse_state::comment;
910
+ ++column_;
911
+ ++input_ptr_;
912
+ }
913
+ else
914
+ {
915
+ state_ = csv_parse_state::expect_record;
916
+ }
917
+ break;
918
+ case csv_parse_state::quoted_string:
919
+ {
920
+ if (curr_char == options_.quote_escape_char())
921
+ {
922
+ state_ = csv_parse_state::escaped_value;
923
+ }
924
+ else if (curr_char == options_.quote_char())
925
+ {
926
+ state_ = csv_parse_state::between_values;
927
+ }
928
+ else
929
+ {
930
+ buffer_.push_back(static_cast<CharT>(curr_char));
931
+ }
932
+ }
933
+ ++column_;
934
+ ++input_ptr_;
935
+ break;
936
+ case csv_parse_state::escaped_value:
937
+ {
938
+ if (curr_char == options_.quote_char())
939
+ {
940
+ buffer_.push_back(static_cast<CharT>(curr_char));
941
+ state_ = csv_parse_state::quoted_string;
942
+ ++column_;
943
+ ++input_ptr_;
944
+ }
945
+ else if (options_.quote_escape_char() == options_.quote_char())
946
+ {
947
+ state_ = csv_parse_state::between_values;
948
+ }
949
+ else
950
+ {
951
+ ec = csv_errc::invalid_escaped_char;
952
+ more_ = false;
953
+ return;
954
+ }
955
+ }
956
+ break;
957
+ case csv_parse_state::between_values:
958
+ switch (curr_char)
959
+ {
960
+ case '\r':
961
+ case '\n':
962
+ {
963
+ if (options_.trim_leading() || options_.trim_trailing())
964
+ {
965
+ trim_string_buffer(options_.trim_leading(),options_.trim_trailing());
966
+ }
967
+ if (!(options_.ignore_empty_values() && buffer_.empty()))
968
+ {
969
+ before_value(ec);
970
+ state_ = csv_parse_state::before_last_quoted_field;
971
+ }
972
+ else
973
+ {
974
+ state_ = csv_parse_state::end_record;
975
+ }
976
+ break;
977
+ }
978
+ default:
979
+ if (curr_char == options_.field_delimiter())
980
+ {
981
+ if (options_.trim_leading() || options_.trim_trailing())
982
+ {
983
+ trim_string_buffer(options_.trim_leading(),options_.trim_trailing());
984
+ }
985
+ before_value(ec);
986
+ state_ = csv_parse_state::before_quoted_field;
987
+ }
988
+ else if (options_.subfield_delimiter() != char_type() && curr_char == options_.subfield_delimiter())
989
+ {
990
+ if (options_.trim_leading() || options_.trim_trailing())
991
+ {
992
+ trim_string_buffer(options_.trim_leading(),options_.trim_trailing());
993
+ }
994
+ before_value(ec);
995
+ state_ = csv_parse_state::before_quoted_subfield;
996
+ }
997
+ else if (curr_char == ' ' || curr_char == '\t')
998
+ {
999
+ ++column_;
1000
+ ++input_ptr_;
1001
+ }
1002
+ else
1003
+ {
1004
+ ec = csv_errc::unexpected_char_between_fields;
1005
+ more_ = false;
1006
+ return;
1007
+ }
1008
+ break;
1009
+ }
1010
+ break;
1011
+ case csv_parse_state::before_unquoted_string:
1012
+ {
1013
+ buffer_.clear();
1014
+ state_ = csv_parse_state::unquoted_string;
1015
+ break;
1016
+ }
1017
+ case csv_parse_state::before_unquoted_field:
1018
+ end_unquoted_string_value(ec);
1019
+ state_ = csv_parse_state::before_unquoted_field_tail;
1020
+ break;
1021
+ case csv_parse_state::before_unquoted_field_tail:
1022
+ {
1023
+ if (stack_.back() == csv_mode::subfields)
1024
+ {
1025
+ stack_.pop_back();
1026
+ more_ = visitor_->end_array(*this, ec);
1027
+ }
1028
+ ++column_index_;
1029
+ state_ = csv_parse_state::before_unquoted_string;
1030
+ ++column_;
1031
+ ++input_ptr_;
1032
+ break;
1033
+ }
1034
+ case csv_parse_state::before_unquoted_field_tail1:
1035
+ {
1036
+ if (stack_.back() == csv_mode::subfields)
1037
+ {
1038
+ stack_.pop_back();
1039
+ more_ = visitor_->end_array(*this, ec);
1040
+ }
1041
+ state_ = csv_parse_state::end_record;
1042
+ ++column_;
1043
+ ++input_ptr_;
1044
+ break;
1045
+ }
1046
+
1047
+ case csv_parse_state::before_last_unquoted_field:
1048
+ end_unquoted_string_value(ec);
1049
+ state_ = csv_parse_state::before_last_unquoted_field_tail;
1050
+ break;
1051
+
1052
+ case csv_parse_state::before_last_unquoted_field_tail:
1053
+ if (stack_.back() == csv_mode::subfields)
1054
+ {
1055
+ stack_.pop_back();
1056
+ more_ = visitor_->end_array(*this, ec);
1057
+ }
1058
+ ++column_index_;
1059
+ state_ = csv_parse_state::end_record;
1060
+ break;
1061
+
1062
+ case csv_parse_state::before_unquoted_subfield:
1063
+ if (stack_.back() == csv_mode::data)
1064
+ {
1065
+ stack_.push_back(csv_mode::subfields);
1066
+ more_ = visitor_->begin_array(semantic_tag::none, *this, ec);
1067
+ }
1068
+ state_ = csv_parse_state::before_unquoted_subfield_tail;
1069
+ break;
1070
+ case csv_parse_state::before_unquoted_subfield_tail:
1071
+ end_unquoted_string_value(ec);
1072
+ state_ = csv_parse_state::before_unquoted_string;
1073
+ ++column_;
1074
+ ++input_ptr_;
1075
+ break;
1076
+ case csv_parse_state::before_quoted_field:
1077
+ end_quoted_string_value(ec);
1078
+ state_ = csv_parse_state::before_unquoted_field_tail; // return to unquoted
1079
+ break;
1080
+ case csv_parse_state::before_quoted_subfield:
1081
+ if (stack_.back() == csv_mode::data)
1082
+ {
1083
+ stack_.push_back(csv_mode::subfields);
1084
+ more_ = visitor_->begin_array(semantic_tag::none, *this, ec);
1085
+ }
1086
+ state_ = csv_parse_state::before_quoted_subfield_tail;
1087
+ break;
1088
+ case csv_parse_state::before_quoted_subfield_tail:
1089
+ end_quoted_string_value(ec);
1090
+ state_ = csv_parse_state::before_unquoted_string;
1091
+ ++column_;
1092
+ ++input_ptr_;
1093
+ break;
1094
+ case csv_parse_state::before_last_quoted_field:
1095
+ end_quoted_string_value(ec);
1096
+ state_ = csv_parse_state::before_last_quoted_field_tail;
1097
+ break;
1098
+ case csv_parse_state::before_last_quoted_field_tail:
1099
+ if (stack_.back() == csv_mode::subfields)
1100
+ {
1101
+ stack_.pop_back();
1102
+ more_ = visitor_->end_array(*this, ec);
1103
+ }
1104
+ ++column_index_;
1105
+ state_ = csv_parse_state::end_record;
1106
+ break;
1107
+ case csv_parse_state::unquoted_string:
1108
+ {
1109
+ switch (curr_char)
1110
+ {
1111
+ case '\n':
1112
+ case '\r':
1113
+ {
1114
+ if (options_.trim_leading() || options_.trim_trailing())
1115
+ {
1116
+ trim_string_buffer(options_.trim_leading(),options_.trim_trailing());
1117
+ }
1118
+ if (!(options_.ignore_empty_values() && buffer_.empty()))
1119
+ {
1120
+ before_value(ec);
1121
+ state_ = csv_parse_state::before_last_unquoted_field;
1122
+ }
1123
+ else
1124
+ {
1125
+ state_ = csv_parse_state::end_record;
1126
+ }
1127
+ break;
1128
+ }
1129
+ default:
1130
+ if (curr_char == options_.field_delimiter())
1131
+ {
1132
+ if (options_.trim_leading() || options_.trim_trailing())
1133
+ {
1134
+ trim_string_buffer(options_.trim_leading(),options_.trim_trailing());
1135
+ }
1136
+ before_value(ec);
1137
+ state_ = csv_parse_state::before_unquoted_field;
1138
+ }
1139
+ else if (options_.subfield_delimiter() != char_type() && curr_char == options_.subfield_delimiter())
1140
+ {
1141
+ if (options_.trim_leading() || options_.trim_trailing())
1142
+ {
1143
+ trim_string_buffer(options_.trim_leading(),options_.trim_trailing());
1144
+ }
1145
+ before_value(ec);
1146
+ state_ = csv_parse_state::before_unquoted_subfield;
1147
+ }
1148
+ else if (curr_char == options_.quote_char())
1149
+ {
1150
+ buffer_.clear();
1151
+ state_ = csv_parse_state::quoted_string;
1152
+ ++column_;
1153
+ ++input_ptr_;
1154
+ }
1155
+ else
1156
+ {
1157
+ buffer_.push_back(static_cast<CharT>(curr_char));
1158
+ ++column_;
1159
+ ++input_ptr_;
1160
+ }
1161
+ break;
1162
+ }
1163
+ break;
1164
+ }
1165
+ case csv_parse_state::expect_record:
1166
+ {
1167
+ switch (curr_char)
1168
+ {
1169
+ case '\n':
1170
+ {
1171
+ if (!options_.ignore_empty_lines())
1172
+ {
1173
+ before_record(ec);
1174
+ state_ = csv_parse_state::end_record;
1175
+ }
1176
+ else
1177
+ {
1178
+ ++line_;
1179
+ column_ = 1;
1180
+ state_ = csv_parse_state::expect_comment_or_record;
1181
+ ++input_ptr_;
1182
+ }
1183
+ break;
1184
+ }
1185
+ case '\r':
1186
+ if (!options_.ignore_empty_lines())
1187
+ {
1188
+ before_record(ec);
1189
+ state_ = csv_parse_state::end_record;
1190
+ }
1191
+ else
1192
+ {
1193
+ ++line_;
1194
+ column_ = 1;
1195
+ state_ = csv_parse_state::expect_comment_or_record;
1196
+ ++input_ptr_;
1197
+ push_state(state_);
1198
+ state_ = csv_parse_state::cr;
1199
+ }
1200
+ break;
1201
+ case ' ':
1202
+ case '\t':
1203
+ if (!options_.trim_leading())
1204
+ {
1205
+ buffer_.push_back(static_cast<CharT>(curr_char));
1206
+ before_record(ec);
1207
+ state_ = csv_parse_state::unquoted_string;
1208
+ }
1209
+ ++column_;
1210
+ ++input_ptr_;
1211
+ break;
1212
+ default:
1213
+ before_record(ec);
1214
+ if (curr_char == options_.quote_char())
1215
+ {
1216
+ buffer_.clear();
1217
+ state_ = csv_parse_state::quoted_string;
1218
+ ++column_;
1219
+ ++input_ptr_;
1220
+ }
1221
+ else
1222
+ {
1223
+ state_ = csv_parse_state::unquoted_string;
1224
+ }
1225
+ break;
1226
+ }
1227
+ break;
1228
+ }
1229
+ case csv_parse_state::end_record:
1230
+ {
1231
+ switch (curr_char)
1232
+ {
1233
+ case '\n':
1234
+ {
1235
+ ++line_;
1236
+ column_ = 1;
1237
+ state_ = csv_parse_state::expect_comment_or_record;
1238
+ after_record(ec);
1239
+ ++input_ptr_;
1240
+ break;
1241
+ }
1242
+ case '\r':
1243
+ ++line_;
1244
+ column_ = 1;
1245
+ state_ = csv_parse_state::expect_comment_or_record;
1246
+ after_record(ec);
1247
+ push_state(state_);
1248
+ state_ = csv_parse_state::cr;
1249
+ ++input_ptr_;
1250
+ break;
1251
+ case ' ':
1252
+ case '\t':
1253
+ ++column_;
1254
+ ++input_ptr_;
1255
+ break;
1256
+ default:
1257
+ err_handler_(csv_errc::syntax_error, *this);
1258
+ ec = csv_errc::syntax_error;
1259
+ more_ = false;
1260
+ return;
1261
+ }
1262
+ break;
1263
+ }
1264
+ default:
1265
+ err_handler_(csv_errc::invalid_parse_state, *this);
1266
+ ec = csv_errc::invalid_parse_state;
1267
+ more_ = false;
1268
+ return;
1269
+ }
1270
+ if (line_ > options_.max_lines())
1271
+ {
1272
+ state_ = csv_parse_state::done;
1273
+ more_ = false;
1274
+ }
1275
+ }
1276
+ }
1277
+
1278
+ void finish_parse()
1279
+ {
1280
+ std::error_code ec;
1281
+ finish_parse(ec);
1282
+ if (ec)
1283
+ {
1284
+ JSONCONS_THROW(ser_error(ec,line_,column_));
1285
+ }
1286
+ }
1287
+
1288
+ void finish_parse(std::error_code& ec)
1289
+ {
1290
+ while (more_)
1291
+ {
1292
+ parse_some(ec);
1293
+ }
1294
+ }
1295
+
1296
+ csv_parse_state state() const
1297
+ {
1298
+ return state_;
1299
+ }
1300
+
1301
+ void update(const string_view_type sv)
1302
+ {
1303
+ update(sv.data(),sv.length());
1304
+ }
1305
+
1306
+ void update(const CharT* data, std::size_t length)
1307
+ {
1308
+ begin_input_ = data;
1309
+ input_end_ = data + length;
1310
+ input_ptr_ = begin_input_;
1311
+ }
1312
+
1313
+ std::size_t line() const override
1314
+ {
1315
+ return line_;
1316
+ }
1317
+
1318
+ std::size_t column() const override
1319
+ {
1320
+ return column_;
1321
+ }
1322
+
1323
+ private:
1324
+ void initialize()
1325
+ {
1326
+ jsoncons::csv::detail::parse_column_names(options_.column_names(), column_names_);
1327
+ jsoncons::csv::detail::parse_column_types(options_.column_types(), column_types_);
1328
+ jsoncons::csv::detail::parse_column_names(options_.column_defaults(), column_defaults_);
1329
+
1330
+ stack_.reserve(default_depth);
1331
+ stack_.push_back(csv_mode::initial);
1332
+ stack_.push_back((options_.header_lines() > 0) ? csv_mode::header
1333
+ : csv_mode::data);
1334
+ }
1335
+
1336
+ // name
1337
+ void before_value(std::error_code& ec)
1338
+ {
1339
+ switch (stack_.back())
1340
+ {
1341
+ case csv_mode::header:
1342
+ if (options_.trim_leading_inside_quotes() || options_.trim_trailing_inside_quotes())
1343
+ {
1344
+ trim_string_buffer(options_.trim_leading_inside_quotes(),options_.trim_trailing_inside_quotes());
1345
+ }
1346
+ if (line_ == header_line_)
1347
+ {
1348
+ column_names_.push_back(buffer_);
1349
+ if (options_.assume_header() && options_.mapping_kind() == csv_mapping_kind::n_rows)
1350
+ {
1351
+ more_ = visitor_->string_value(buffer_, semantic_tag::none, *this, ec);
1352
+ }
1353
+ }
1354
+ break;
1355
+ case csv_mode::data:
1356
+ if (options_.mapping_kind() == csv_mapping_kind::n_objects)
1357
+ {
1358
+ if (!(options_.ignore_empty_values() && buffer_.empty()))
1359
+ {
1360
+ if (column_index_ < column_names_.size() + offset_)
1361
+ {
1362
+ more_ = visitor_->key(column_names_[column_index_ - offset_], *this, ec);
1363
+ }
1364
+ }
1365
+ }
1366
+ break;
1367
+ default:
1368
+ break;
1369
+ }
1370
+ }
1371
+
1372
+ // begin_array or begin_record
1373
+ void before_record(std::error_code& ec)
1374
+ {
1375
+ offset_ = 0;
1376
+
1377
+ switch (stack_.back())
1378
+ {
1379
+ case csv_mode::header:
1380
+ if (options_.assume_header() && line_ == header_line_)
1381
+ {
1382
+ if (options_.mapping_kind() == csv_mapping_kind::n_rows)
1383
+ {
1384
+ more_ = visitor_->begin_array(semantic_tag::none, *this, ec);
1385
+ }
1386
+ }
1387
+ break;
1388
+ case csv_mode::data:
1389
+ switch (options_.mapping_kind())
1390
+ {
1391
+ case csv_mapping_kind::n_rows:
1392
+ more_ = visitor_->begin_array(semantic_tag::none, *this, ec);
1393
+ break;
1394
+ case csv_mapping_kind::n_objects:
1395
+ more_ = visitor_->begin_object(semantic_tag::none, *this, ec);
1396
+ break;
1397
+ case csv_mapping_kind::m_columns:
1398
+ break;
1399
+ default:
1400
+ break;
1401
+ }
1402
+ break;
1403
+ default:
1404
+ break;
1405
+ }
1406
+ }
1407
+
1408
+ // end_array, begin_array, string_value (headers)
1409
+ void after_record(std::error_code& ec)
1410
+ {
1411
+ if (column_types_.size() > 0)
1412
+ {
1413
+ if (level_ > 0)
1414
+ {
1415
+ more_ = visitor_->end_array(*this, ec);
1416
+ level_ = 0;
1417
+ }
1418
+ }
1419
+ switch (stack_.back())
1420
+ {
1421
+ case csv_mode::header:
1422
+ if (line_ >= options_.header_lines())
1423
+ {
1424
+ stack_.back() = csv_mode::data;
1425
+ }
1426
+ switch (options_.mapping_kind())
1427
+ {
1428
+ case csv_mapping_kind::n_rows:
1429
+ if (options_.assume_header())
1430
+ {
1431
+ more_ = visitor_->end_array(*this, ec);
1432
+ }
1433
+ break;
1434
+ case csv_mapping_kind::m_columns:
1435
+ m_columns_filter_.initialize(column_names_);
1436
+ break;
1437
+ default:
1438
+ break;
1439
+ }
1440
+ break;
1441
+ case csv_mode::data:
1442
+ case csv_mode::subfields:
1443
+ {
1444
+ switch (options_.mapping_kind())
1445
+ {
1446
+ case csv_mapping_kind::n_rows:
1447
+ more_ = visitor_->end_array(*this, ec);
1448
+ break;
1449
+ case csv_mapping_kind::n_objects:
1450
+ more_ = visitor_->end_object(*this, ec);
1451
+ break;
1452
+ case csv_mapping_kind::m_columns:
1453
+ more_ = visitor_->end_array(*this, ec);
1454
+ break;
1455
+ }
1456
+ break;
1457
+ }
1458
+ default:
1459
+ break;
1460
+ }
1461
+ column_index_ = 0;
1462
+ }
1463
+
1464
+ void trim_string_buffer(bool trim_leading, bool trim_trailing)
1465
+ {
1466
+ std::size_t start = 0;
1467
+ std::size_t length = buffer_.length();
1468
+ if (trim_leading)
1469
+ {
1470
+ bool done = false;
1471
+ while (!done && start < buffer_.length())
1472
+ {
1473
+ if ((buffer_[start] < 256) && std::isspace(buffer_[start]))
1474
+ {
1475
+ ++start;
1476
+ }
1477
+ else
1478
+ {
1479
+ done = true;
1480
+ }
1481
+ }
1482
+ }
1483
+ if (trim_trailing)
1484
+ {
1485
+ bool done = false;
1486
+ while (!done && length > 0)
1487
+ {
1488
+ if ((buffer_[length-1] < 256) && std::isspace(buffer_[length-1]))
1489
+ {
1490
+ --length;
1491
+ }
1492
+ else
1493
+ {
1494
+ done = true;
1495
+ }
1496
+ }
1497
+ }
1498
+ if (start != 0 || length != buffer_.size())
1499
+ {
1500
+ // Do not use buffer_.substr(...), as this won't preserve the allocator state.
1501
+ buffer_.resize(length);
1502
+ buffer_.erase(0, start);
1503
+ }
1504
+ }
1505
+
1506
+ /*
1507
+ end_array, begin_array, xxx_value (end_value)
1508
+ */
1509
+ void end_unquoted_string_value(std::error_code& ec)
1510
+ {
1511
+ switch (stack_.back())
1512
+ {
1513
+ case csv_mode::data:
1514
+ case csv_mode::subfields:
1515
+ switch (options_.mapping_kind())
1516
+ {
1517
+ case csv_mapping_kind::n_rows:
1518
+ if (options_.unquoted_empty_value_is_null() && buffer_.length() == 0)
1519
+ {
1520
+ more_ = visitor_->null_value(semantic_tag::none, *this, ec);
1521
+ }
1522
+ else
1523
+ {
1524
+ end_value(options_.infer_types(), ec);
1525
+ }
1526
+ break;
1527
+ case csv_mapping_kind::n_objects:
1528
+ if (!(options_.ignore_empty_values() && buffer_.empty()))
1529
+ {
1530
+ if (column_index_ < column_names_.size() + offset_)
1531
+ {
1532
+ if (options_.unquoted_empty_value_is_null() && buffer_.length() == 0)
1533
+ {
1534
+ more_ = visitor_->null_value(semantic_tag::none, *this, ec);
1535
+ }
1536
+ else
1537
+ {
1538
+ end_value(options_.infer_types(), ec);
1539
+ }
1540
+ }
1541
+ else if (level_ > 0)
1542
+ {
1543
+ if (options_.unquoted_empty_value_is_null() && buffer_.length() == 0)
1544
+ {
1545
+ more_ = visitor_->null_value(semantic_tag::none, *this, ec);
1546
+ }
1547
+ else
1548
+ {
1549
+ end_value(options_.infer_types(), ec);
1550
+ }
1551
+ }
1552
+ }
1553
+ break;
1554
+ case csv_mapping_kind::m_columns:
1555
+ if (!(options_.ignore_empty_values() && buffer_.empty()))
1556
+ {
1557
+ end_value(options_.infer_types(), ec);
1558
+ }
1559
+ else
1560
+ {
1561
+ m_columns_filter_.skip_column();
1562
+ }
1563
+ break;
1564
+ }
1565
+ break;
1566
+ default:
1567
+ break;
1568
+ }
1569
+ }
1570
+
1571
+ void end_quoted_string_value(std::error_code& ec)
1572
+ {
1573
+ switch (stack_.back())
1574
+ {
1575
+ case csv_mode::data:
1576
+ case csv_mode::subfields:
1577
+ if (options_.trim_leading_inside_quotes() || options_.trim_trailing_inside_quotes())
1578
+ {
1579
+ trim_string_buffer(options_.trim_leading_inside_quotes(),options_.trim_trailing_inside_quotes());
1580
+ }
1581
+ switch (options_.mapping_kind())
1582
+ {
1583
+ case csv_mapping_kind::n_rows:
1584
+ end_value(false, ec);
1585
+ break;
1586
+ case csv_mapping_kind::n_objects:
1587
+ if (!(options_.ignore_empty_values() && buffer_.empty()))
1588
+ {
1589
+ if (column_index_ < column_names_.size() + offset_)
1590
+ {
1591
+ if (options_.unquoted_empty_value_is_null() && buffer_.length() == 0)
1592
+ {
1593
+ more_ = visitor_->null_value(semantic_tag::none, *this, ec);
1594
+ }
1595
+ else
1596
+ {
1597
+ end_value(false, ec);
1598
+ }
1599
+ }
1600
+ else if (level_ > 0)
1601
+ {
1602
+ if (options_.unquoted_empty_value_is_null() && buffer_.length() == 0)
1603
+ {
1604
+ more_ = visitor_->null_value(semantic_tag::none, *this, ec);
1605
+ }
1606
+ else
1607
+ {
1608
+ end_value(false, ec);
1609
+ }
1610
+ }
1611
+ }
1612
+ break;
1613
+ case csv_mapping_kind::m_columns:
1614
+ if (!(options_.ignore_empty_values() && buffer_.empty()))
1615
+ {
1616
+ end_value(false, ec);
1617
+ }
1618
+ else
1619
+ {
1620
+ m_columns_filter_.skip_column();
1621
+ }
1622
+ break;
1623
+ }
1624
+ break;
1625
+ default:
1626
+ break;
1627
+ }
1628
+ }
1629
+
1630
+ void end_value(bool infer_types, std::error_code& ec)
1631
+ {
1632
+ auto it = std::find_if(string_double_map_.begin(), string_double_map_.end(), string_maps_to_double{ buffer_ });
1633
+ if (it != string_double_map_.end())
1634
+ {
1635
+ more_ = visitor_->double_value(it->second, semantic_tag::none, *this, ec);
1636
+ }
1637
+ else if (column_index_ < column_types_.size() + offset_)
1638
+ {
1639
+ if (column_types_[column_index_ - offset_].col_type == csv_column_type::repeat_t)
1640
+ {
1641
+ offset_ = offset_ + column_types_[column_index_ - offset_].rep_count;
1642
+ if (column_index_ - offset_ + 1 < column_types_.size())
1643
+ {
1644
+ if (column_index_ == offset_ || level_ > column_types_[column_index_-offset_].level)
1645
+ {
1646
+ more_ = visitor_->end_array(*this, ec);
1647
+ }
1648
+ level_ = column_index_ == offset_ ? 0 : column_types_[column_index_ - offset_].level;
1649
+ }
1650
+ }
1651
+ if (level_ < column_types_[column_index_ - offset_].level)
1652
+ {
1653
+ more_ = visitor_->begin_array(semantic_tag::none, *this, ec);
1654
+ level_ = column_types_[column_index_ - offset_].level;
1655
+ }
1656
+ else if (level_ > column_types_[column_index_ - offset_].level)
1657
+ {
1658
+ more_ = visitor_->end_array(*this, ec);
1659
+ level_ = column_types_[column_index_ - offset_].level;
1660
+ }
1661
+ switch (column_types_[column_index_ - offset_].col_type)
1662
+ {
1663
+ case csv_column_type::integer_t:
1664
+ {
1665
+ std::basic_istringstream<CharT,std::char_traits<CharT>,char_allocator_type> iss{buffer_};
1666
+ int64_t val;
1667
+ iss >> val;
1668
+ if (!iss.fail())
1669
+ {
1670
+ more_ = visitor_->int64_value(val, semantic_tag::none, *this, ec);
1671
+ }
1672
+ else
1673
+ {
1674
+ if (column_index_ - offset_ < column_defaults_.size() && column_defaults_[column_index_ - offset_].length() > 0)
1675
+ {
1676
+ basic_json_parser<CharT,temp_allocator_type> parser(alloc_);
1677
+ parser.update(column_defaults_[column_index_ - offset_].data(),column_defaults_[column_index_ - offset_].length());
1678
+ parser.parse_some(*visitor_);
1679
+ parser.finish_parse(*visitor_);
1680
+ }
1681
+ else
1682
+ {
1683
+ more_ = visitor_->null_value(semantic_tag::none, *this, ec);
1684
+ }
1685
+ }
1686
+ }
1687
+ break;
1688
+ case csv_column_type::float_t:
1689
+ {
1690
+ if (options_.lossless_number())
1691
+ {
1692
+ more_ = visitor_->string_value(buffer_,semantic_tag::bigdec, *this, ec);
1693
+ }
1694
+ else
1695
+ {
1696
+ std::basic_istringstream<CharT, std::char_traits<CharT>, char_allocator_type> iss{ buffer_ };
1697
+ double val;
1698
+ iss >> val;
1699
+ if (!iss.fail())
1700
+ {
1701
+ more_ = visitor_->double_value(val, semantic_tag::none, *this, ec);
1702
+ }
1703
+ else
1704
+ {
1705
+ if (column_index_ - offset_ < column_defaults_.size() && column_defaults_[column_index_ - offset_].length() > 0)
1706
+ {
1707
+ basic_json_parser<CharT,temp_allocator_type> parser(alloc_);
1708
+ parser.update(column_defaults_[column_index_ - offset_].data(),column_defaults_[column_index_ - offset_].length());
1709
+ parser.parse_some(*visitor_);
1710
+ parser.finish_parse(*visitor_);
1711
+ }
1712
+ else
1713
+ {
1714
+ more_ = visitor_->null_value(semantic_tag::none, *this, ec);
1715
+ }
1716
+ }
1717
+ }
1718
+ }
1719
+ break;
1720
+ case csv_column_type::boolean_t:
1721
+ {
1722
+ if (buffer_.length() == 1 && buffer_[0] == '0')
1723
+ {
1724
+ more_ = visitor_->bool_value(false, semantic_tag::none, *this, ec);
1725
+ }
1726
+ else if (buffer_.length() == 1 && buffer_[0] == '1')
1727
+ {
1728
+ more_ = visitor_->bool_value(true, semantic_tag::none, *this, ec);
1729
+ }
1730
+ else if (buffer_.length() == 5 && ((buffer_[0] == 'f' || buffer_[0] == 'F') && (buffer_[1] == 'a' || buffer_[1] == 'A') && (buffer_[2] == 'l' || buffer_[2] == 'L') && (buffer_[3] == 's' || buffer_[3] == 'S') && (buffer_[4] == 'e' || buffer_[4] == 'E')))
1731
+ {
1732
+ more_ = visitor_->bool_value(false, semantic_tag::none, *this, ec);
1733
+ }
1734
+ else if (buffer_.length() == 4 && ((buffer_[0] == 't' || buffer_[0] == 'T') && (buffer_[1] == 'r' || buffer_[1] == 'R') && (buffer_[2] == 'u' || buffer_[2] == 'U') && (buffer_[3] == 'e' || buffer_[3] == 'E')))
1735
+ {
1736
+ more_ = visitor_->bool_value(true, semantic_tag::none, *this, ec);
1737
+ }
1738
+ else
1739
+ {
1740
+ if (column_index_ - offset_ < column_defaults_.size() && column_defaults_[column_index_ - offset_].length() > 0)
1741
+ {
1742
+ basic_json_parser<CharT,temp_allocator_type> parser(alloc_);
1743
+ parser.update(column_defaults_[column_index_ - offset_].data(),column_defaults_[column_index_ - offset_].length());
1744
+ parser.parse_some(*visitor_);
1745
+ parser.finish_parse(*visitor_);
1746
+ }
1747
+ else
1748
+ {
1749
+ more_ = visitor_->null_value(semantic_tag::none, *this, ec);
1750
+ }
1751
+ }
1752
+ }
1753
+ break;
1754
+ default:
1755
+ if (buffer_.length() > 0)
1756
+ {
1757
+ more_ = visitor_->string_value(buffer_, semantic_tag::none, *this, ec);
1758
+ }
1759
+ else
1760
+ {
1761
+ if (column_index_ < column_defaults_.size() + offset_ && column_defaults_[column_index_ - offset_].length() > 0)
1762
+ {
1763
+ basic_json_parser<CharT,temp_allocator_type> parser(alloc_);
1764
+ parser.update(column_defaults_[column_index_ - offset_].data(),column_defaults_[column_index_ - offset_].length());
1765
+ parser.parse_some(*visitor_);
1766
+ parser.finish_parse(*visitor_);
1767
+ }
1768
+ else
1769
+ {
1770
+ more_ = visitor_->string_value(string_view_type(), semantic_tag::none, *this, ec);
1771
+ }
1772
+ }
1773
+ break;
1774
+ }
1775
+ }
1776
+ else
1777
+ {
1778
+ if (infer_types)
1779
+ {
1780
+ end_value_with_numeric_check(ec);
1781
+ }
1782
+ else
1783
+ {
1784
+ more_ = visitor_->string_value(buffer_, semantic_tag::none, *this, ec);
1785
+ }
1786
+ }
1787
+ }
1788
+
1789
+ enum class numeric_check_state
1790
+ {
1791
+ initial,
1792
+ null,
1793
+ boolean_true,
1794
+ boolean_false,
1795
+ minus,
1796
+ zero,
1797
+ integer,
1798
+ fraction1,
1799
+ fraction,
1800
+ exp1,
1801
+ exp,
1802
+ not_a_number
1803
+ };
1804
+
1805
+ /*
1806
+ xxx_value
1807
+ */
1808
+ void end_value_with_numeric_check(std::error_code& ec)
1809
+ {
1810
+ numeric_check_state state = numeric_check_state::initial;
1811
+ bool is_negative = false;
1812
+ int precision = 0;
1813
+ uint8_t decimal_places = 0;
1814
+
1815
+ auto last = buffer_.end();
1816
+
1817
+ std::string buffer;
1818
+ for (auto p = buffer_.begin(); state != numeric_check_state::not_a_number && p != last; ++p)
1819
+ {
1820
+ switch (state)
1821
+ {
1822
+ case numeric_check_state::initial:
1823
+ {
1824
+ switch (*p)
1825
+ {
1826
+ case 'n':case 'N':
1827
+ if ((last-p) == 4 && (p[1] == 'u' || p[1] == 'U') && (p[2] == 'l' || p[2] == 'L') && (p[3] == 'l' || p[3] == 'L'))
1828
+ {
1829
+ state = numeric_check_state::null;
1830
+ }
1831
+ else
1832
+ {
1833
+ state = numeric_check_state::not_a_number;
1834
+ }
1835
+ break;
1836
+ case 't':case 'T':
1837
+ if ((last-p) == 4 && (p[1] == 'r' || p[1] == 'R') && (p[2] == 'u' || p[2] == 'U') && (p[3] == 'e' || p[3] == 'U'))
1838
+ {
1839
+ state = numeric_check_state::boolean_true;
1840
+ }
1841
+ else
1842
+ {
1843
+ state = numeric_check_state::not_a_number;
1844
+ }
1845
+ break;
1846
+ case 'f':case 'F':
1847
+ if ((last-p) == 5 && (p[1] == 'a' || p[1] == 'A') && (p[2] == 'l' || p[2] == 'L') && (p[3] == 's' || p[3] == 'S') && (p[4] == 'e' || p[4] == 'E'))
1848
+ {
1849
+ state = numeric_check_state::boolean_false;
1850
+ }
1851
+ else
1852
+ {
1853
+ state = numeric_check_state::not_a_number;
1854
+ }
1855
+ break;
1856
+ case '-':
1857
+ is_negative = true;
1858
+ buffer.push_back(*p);
1859
+ state = numeric_check_state::minus;
1860
+ break;
1861
+ case '0':
1862
+ ++precision;
1863
+ buffer.push_back(*p);
1864
+ state = numeric_check_state::zero;
1865
+ break;
1866
+ case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9':
1867
+ ++precision;
1868
+ buffer.push_back(*p);
1869
+ state = numeric_check_state::integer;
1870
+ break;
1871
+ default:
1872
+ state = numeric_check_state::not_a_number;
1873
+ break;
1874
+ }
1875
+ break;
1876
+ }
1877
+ case numeric_check_state::zero:
1878
+ {
1879
+ switch (*p)
1880
+ {
1881
+ case '.':
1882
+ buffer.push_back(to_double_.get_decimal_point());
1883
+ state = numeric_check_state::fraction1;
1884
+ break;
1885
+ case 'e':case 'E':
1886
+ buffer.push_back(*p);
1887
+ state = numeric_check_state::exp1;
1888
+ break;
1889
+ default:
1890
+ state = numeric_check_state::not_a_number;
1891
+ break;
1892
+ }
1893
+ break;
1894
+ }
1895
+ case numeric_check_state::integer:
1896
+ {
1897
+ switch (*p)
1898
+ {
1899
+ case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9':
1900
+ ++precision;
1901
+ buffer.push_back(*p);
1902
+ break;
1903
+ case '.':
1904
+ buffer.push_back(to_double_.get_decimal_point());
1905
+ state = numeric_check_state::fraction1;
1906
+ break;
1907
+ case 'e':case 'E':
1908
+ buffer.push_back(*p);
1909
+ state = numeric_check_state::exp1;
1910
+ break;
1911
+ default:
1912
+ state = numeric_check_state::not_a_number;
1913
+ break;
1914
+ }
1915
+ break;
1916
+ }
1917
+ case numeric_check_state::minus:
1918
+ {
1919
+ switch (*p)
1920
+ {
1921
+ case '0':
1922
+ ++precision;
1923
+ buffer.push_back(*p);
1924
+ state = numeric_check_state::zero;
1925
+ break;
1926
+ case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9':
1927
+ ++precision;
1928
+ buffer.push_back(*p);
1929
+ state = numeric_check_state::integer;
1930
+ break;
1931
+ default:
1932
+ state = numeric_check_state::not_a_number;
1933
+ break;
1934
+ }
1935
+ break;
1936
+ }
1937
+ case numeric_check_state::fraction1:
1938
+ {
1939
+ switch (*p)
1940
+ {
1941
+ case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9':
1942
+ ++precision;
1943
+ ++decimal_places;
1944
+ buffer.push_back(*p);
1945
+ state = numeric_check_state::fraction;
1946
+ break;
1947
+ default:
1948
+ state = numeric_check_state::not_a_number;
1949
+ break;
1950
+ }
1951
+ break;
1952
+ }
1953
+ case numeric_check_state::fraction:
1954
+ {
1955
+ switch (*p)
1956
+ {
1957
+ case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9':
1958
+ ++precision;
1959
+ ++decimal_places;
1960
+ buffer.push_back(*p);
1961
+ break;
1962
+ case 'e':case 'E':
1963
+ buffer.push_back(*p);
1964
+ state = numeric_check_state::exp1;
1965
+ break;
1966
+ default:
1967
+ state = numeric_check_state::not_a_number;
1968
+ break;
1969
+ }
1970
+ break;
1971
+ }
1972
+ case numeric_check_state::exp1:
1973
+ {
1974
+ switch (*p)
1975
+ {
1976
+ case '-':
1977
+ buffer.push_back(*p);
1978
+ break;
1979
+ case '+':
1980
+ break;
1981
+ case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9':
1982
+ state = numeric_check_state::exp;
1983
+ buffer.push_back(*p);
1984
+ break;
1985
+ default:
1986
+ state = numeric_check_state::not_a_number;
1987
+ break;
1988
+ }
1989
+ break;
1990
+ }
1991
+ case numeric_check_state::exp:
1992
+ {
1993
+ switch (*p)
1994
+ {
1995
+ case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9':
1996
+ buffer.push_back(*p);
1997
+ break;
1998
+ default:
1999
+ state = numeric_check_state::not_a_number;
2000
+ break;
2001
+ }
2002
+ break;
2003
+ }
2004
+ default:
2005
+ break;
2006
+ }
2007
+ }
2008
+
2009
+ switch (state)
2010
+ {
2011
+ case numeric_check_state::null:
2012
+ more_ = visitor_->null_value(semantic_tag::none, *this, ec);
2013
+ break;
2014
+ case numeric_check_state::boolean_true:
2015
+ more_ = visitor_->bool_value(true, semantic_tag::none, *this, ec);
2016
+ break;
2017
+ case numeric_check_state::boolean_false:
2018
+ more_ = visitor_->bool_value(false, semantic_tag::none, *this, ec);
2019
+ break;
2020
+ case numeric_check_state::zero:
2021
+ case numeric_check_state::integer:
2022
+ {
2023
+ if (is_negative)
2024
+ {
2025
+ int64_t val{ 0 };
2026
+ auto result = jsoncons::detail::to_integer_decimal(buffer_.data(), buffer_.length(), val);
2027
+ if (result)
2028
+ {
2029
+ more_ = visitor_->int64_value(val, semantic_tag::none, *this, ec);
2030
+ }
2031
+ else // Must be overflow
2032
+ {
2033
+ more_ = visitor_->string_value(buffer_, semantic_tag::bigint, *this, ec);
2034
+ }
2035
+ }
2036
+ else
2037
+ {
2038
+ uint64_t val{ 0 };
2039
+ auto result = jsoncons::detail::to_integer_decimal(buffer_.data(), buffer_.length(), val);
2040
+ if (result)
2041
+ {
2042
+ more_ = visitor_->uint64_value(val, semantic_tag::none, *this, ec);
2043
+ }
2044
+ else if (result.ec == jsoncons::detail::to_integer_errc::overflow)
2045
+ {
2046
+ more_ = visitor_->string_value(buffer_, semantic_tag::bigint, *this, ec);
2047
+ }
2048
+ else
2049
+ {
2050
+ ec = result.ec;
2051
+ more_ = false;
2052
+ return;
2053
+ }
2054
+ }
2055
+ break;
2056
+ }
2057
+ case numeric_check_state::fraction:
2058
+ case numeric_check_state::exp:
2059
+ {
2060
+ if (options_.lossless_number())
2061
+ {
2062
+ more_ = visitor_->string_value(buffer_,semantic_tag::bigdec, *this, ec);
2063
+ }
2064
+ else
2065
+ {
2066
+ double d = to_double_(buffer.c_str(), buffer.length());
2067
+ more_ = visitor_->double_value(d, semantic_tag::none, *this, ec);
2068
+ }
2069
+ break;
2070
+ }
2071
+ default:
2072
+ {
2073
+ more_ = visitor_->string_value(buffer_, semantic_tag::none, *this, ec);
2074
+ break;
2075
+ }
2076
+ }
2077
+ }
2078
+
2079
+ void push_state(csv_parse_state state)
2080
+ {
2081
+ state_stack_.push_back(state);
2082
+ }
2083
+
2084
+ csv_parse_state pop_state()
2085
+ {
2086
+ JSONCONS_ASSERT(!state_stack_.empty())
2087
+ csv_parse_state state = state_stack_.back();
2088
+ state_stack_.pop_back();
2089
+ return state;
2090
+ }
2091
+ };
2092
+
2093
+ using csv_parser = basic_csv_parser<char>;
2094
+ using wcsv_parser = basic_csv_parser<wchar_t>;
2095
+
2096
+ }}
2097
+
2098
+ #endif
2099
+