quasardb 3.13.6.post1__cp38-cp38-win_amd64.whl → 3.13.7__cp38-cp38-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of quasardb might be problematic. Click here for more details.
- quasardb/CMakeFiles/generate.stamp.depend +1 -1
- quasardb/CMakeLists.txt +43 -22
- quasardb/INSTALL.vcxproj +11 -11
- quasardb/INSTALL.vcxproj.filters +2 -2
- quasardb/__init__.py +1 -1
- quasardb/add_boost_test.cmake +43 -0
- quasardb/batch_column.hpp +1 -1
- quasardb/batch_inserter.hpp +1 -1
- quasardb/blob.hpp +1 -1
- quasardb/cluster.hpp +42 -17
- quasardb/cmake_install.cmake +4 -4
- quasardb/concepts.hpp +30 -16
- quasardb/continuous.hpp +1 -1
- quasardb/convert/array.hpp +61 -7
- quasardb/convert/point.hpp +16 -11
- quasardb/convert/range.hpp +19 -25
- quasardb/convert/unicode.hpp +457 -149
- quasardb/convert/value.hpp +87 -18
- quasardb/convert.hpp +1 -1
- quasardb/date/ALL_BUILD.vcxproj +21 -21
- quasardb/date/ALL_BUILD.vcxproj.filters +1 -1
- quasardb/date/CMakeFiles/generate.stamp.depend +1 -1
- quasardb/date/INSTALL.vcxproj +11 -11
- quasardb/date/INSTALL.vcxproj.filters +2 -2
- quasardb/date/cmake_install.cmake +6 -6
- quasardb/date/date.sln +31 -31
- quasardb/date/dateTargets.cmake +2 -2
- quasardb/detail/qdb_resource.hpp +6 -6
- quasardb/detail/ts_column.hpp +1 -1
- quasardb/direct_blob.hpp +1 -1
- quasardb/direct_handle.hpp +1 -1
- quasardb/direct_integer.hpp +1 -1
- quasardb/{version.cpp → double.hpp} +46 -28
- quasardb/entry.hpp +1 -1
- quasardb/error.hpp +14 -10
- quasardb/handle.hpp +1 -1
- quasardb/integer.hpp +1 -1
- quasardb/logger.hpp +1 -1
- quasardb/masked_array.hpp +1 -1
- quasardb/module.cpp +73 -0
- quasardb/module.hpp +24 -0
- quasardb/node.hpp +16 -11
- quasardb/numpy/__init__.py +76 -7
- quasardb/numpy.hpp +1 -1
- quasardb/options.hpp +31 -9
- quasardb/pandas/__init__.py +1 -1
- quasardb/perf.hpp +6 -6
- quasardb/pinned_writer.hpp +1 -1
- quasardb/pybind11/ALL_BUILD.vcxproj +25 -25
- quasardb/pybind11/ALL_BUILD.vcxproj.filters +1 -1
- quasardb/pybind11/CMakeFiles/generate.stamp.depend +4 -4
- quasardb/pybind11/INSTALL.vcxproj +11 -11
- quasardb/pybind11/INSTALL.vcxproj.filters +2 -2
- quasardb/pybind11/cmake_install.cmake +1 -1
- quasardb/pybind11/pybind11.sln +31 -31
- quasardb/pytypes.hpp +2 -2
- quasardb/qdb_api.dll +0 -0
- quasardb/quasardb.pyd +0 -0
- quasardb/query.cpp +3 -3
- quasardb/query.hpp +1 -1
- quasardb/range-v3/ALL_BUILD.vcxproj +25 -25
- quasardb/range-v3/ALL_BUILD.vcxproj.filters +1 -1
- quasardb/range-v3/CMakeFiles/generate.stamp.depend +6 -6
- quasardb/range-v3/INSTALL.vcxproj +11 -11
- quasardb/range-v3/INSTALL.vcxproj.filters +2 -2
- quasardb/range-v3/Range-v3.sln +39 -39
- quasardb/range-v3/cmake_install.cmake +6 -6
- quasardb/range-v3/range-v3-config.cmake +3 -3
- quasardb/range-v3/range.v3.headers.vcxproj +338 -338
- quasardb/range-v3/range.v3.headers.vcxproj.filters +315 -315
- quasardb/reader/ts_row.hpp +1 -1
- quasardb/reader/ts_value.hpp +1 -1
- quasardb/string.hpp +160 -0
- quasardb/table.hpp +1 -1
- quasardb/table_reader.hpp +1 -1
- quasardb/tag.hpp +1 -1
- quasardb/timestamp.hpp +97 -0
- quasardb/utils.hpp +1 -1
- {quasardb-3.13.6.post1.dist-info → quasardb-3.13.7.dist-info}/LICENSE.md +1 -1
- {quasardb-3.13.6.post1.dist-info → quasardb-3.13.7.dist-info}/METADATA +1 -1
- quasardb-3.13.7.dist-info/RECORD +114 -0
- {quasardb-3.13.6.post1.dist-info → quasardb-3.13.7.dist-info}/WHEEL +1 -1
- quasardb/qdb_client.cpp +0 -67
- quasardb/version.hpp +0 -43
- quasardb-3.13.6.post1.dist-info/RECORD +0 -111
- {quasardb-3.13.6.post1.dist-info → quasardb-3.13.7.dist-info}/top_level.txt +0 -0
quasardb/convert/unicode.hpp
CHANGED
|
@@ -1,25 +1,29 @@
|
|
|
1
1
|
#pragma once
|
|
2
2
|
|
|
3
|
+
#include "../concepts.hpp"
|
|
3
4
|
#include <range/v3/range_fwd.hpp>
|
|
4
5
|
#include <range/v3/view/adaptor.hpp>
|
|
5
6
|
#include <range/v3/view/cache1.hpp>
|
|
7
|
+
#include <range/v3/view/transform.hpp>
|
|
8
|
+
#include <iostream>
|
|
9
|
+
#include <optional>
|
|
6
10
|
|
|
7
11
|
namespace qdb::convert::unicode
|
|
8
12
|
{
|
|
9
13
|
|
|
10
|
-
typedef std::uint32_t
|
|
11
|
-
typedef std::uint32_t u32_type; // UTF32 == CodePoint
|
|
14
|
+
typedef std::uint32_t u32_type;
|
|
12
15
|
typedef std::uint8_t u8_type;
|
|
13
16
|
|
|
14
|
-
namespace
|
|
17
|
+
namespace detail
|
|
15
18
|
{
|
|
16
19
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
20
|
+
/**
|
|
21
|
+
* General purpose buffer which holds the "next" characters. This is useful for
|
|
22
|
+
* buffering purposes, e.g. when encoding codepoints to UTF-8, where advancing the
|
|
23
|
+
* iterator forward by 1 can actually emit 4 characters.
|
|
24
|
+
*/
|
|
25
|
+
template <typename CharT, std::size_t Width = (sizeof(u32_type) / sizeof(CharT))>
|
|
26
|
+
requires(1 <= Width && Width <= 4) struct next_chars
|
|
23
27
|
{
|
|
24
28
|
public:
|
|
25
29
|
constexpr next_chars() = default;
|
|
@@ -29,45 +33,91 @@ public:
|
|
|
29
33
|
, n_{1}
|
|
30
34
|
{}
|
|
31
35
|
|
|
32
|
-
constexpr next_chars(CharT _1, CharT _2)
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
, n_{2}
|
|
36
|
+
constexpr next_chars(CharT _1, CharT _2) requires(Width >= 2)
|
|
37
|
+
: xs_{{_1, _2}}
|
|
38
|
+
, n_{2}
|
|
36
39
|
{}
|
|
37
40
|
|
|
38
|
-
constexpr next_chars(CharT _1, CharT _2, CharT _3)
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
, n_{3}
|
|
41
|
+
constexpr next_chars(CharT _1, CharT _2, CharT _3) requires(Width >= 3)
|
|
42
|
+
: xs_{{_1, _2, _3}}
|
|
43
|
+
, n_{3}
|
|
42
44
|
{}
|
|
43
45
|
|
|
44
|
-
constexpr next_chars(CharT _1, CharT _2, CharT _3, CharT _4)
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
, n_{4}
|
|
46
|
+
constexpr next_chars(CharT _1, CharT _2, CharT _3, CharT _4) requires(Width == 4)
|
|
47
|
+
: xs_{{_1, _2, _3, _4}}
|
|
48
|
+
, n_{4}
|
|
48
49
|
{}
|
|
50
|
+
|
|
49
51
|
constexpr inline CharT pop() noexcept
|
|
50
52
|
{
|
|
51
53
|
assert(p_ < n_);
|
|
52
54
|
return xs_[p_++];
|
|
53
55
|
};
|
|
54
56
|
|
|
57
|
+
constexpr inline CharT top() const noexcept
|
|
58
|
+
{
|
|
59
|
+
assert(p_ < n_);
|
|
60
|
+
return xs_[p_];
|
|
61
|
+
};
|
|
62
|
+
|
|
55
63
|
constexpr inline bool empty() const noexcept
|
|
56
64
|
{
|
|
57
65
|
return p_ == n_;
|
|
58
66
|
};
|
|
59
67
|
|
|
68
|
+
constexpr inline std::size_t size() const noexcept
|
|
69
|
+
{
|
|
70
|
+
return n_ - p_;
|
|
71
|
+
}
|
|
72
|
+
|
|
60
73
|
private:
|
|
61
74
|
std::array<CharT, Width> xs_{{}};
|
|
62
75
|
std::size_t n_ = 0;
|
|
63
76
|
std::size_t p_ = 0;
|
|
64
77
|
};
|
|
65
78
|
|
|
66
|
-
|
|
79
|
+
class code_point
|
|
80
|
+
{
|
|
81
|
+
private:
|
|
82
|
+
std::uint32_t value_;
|
|
83
|
+
|
|
84
|
+
public:
|
|
85
|
+
constexpr explicit code_point(std::uint32_t value)
|
|
86
|
+
: value_{value} {};
|
|
87
|
+
|
|
88
|
+
constexpr explicit code_point(std::uint32_t && value)
|
|
89
|
+
: value_{std::move(value)} {};
|
|
90
|
+
|
|
91
|
+
constexpr inline std::uint32_t get() const noexcept
|
|
92
|
+
{
|
|
93
|
+
return value_;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
friend constexpr bool operator==(code_point const & lhs, code_point const & rhs)
|
|
97
|
+
{
|
|
98
|
+
return lhs.value_ == rhs.value_;
|
|
99
|
+
}
|
|
100
|
+
friend constexpr bool operator!=(code_point const & lhs, code_point const & rhs)
|
|
101
|
+
{
|
|
102
|
+
return !(lhs == rhs);
|
|
103
|
+
}
|
|
104
|
+
};
|
|
105
|
+
|
|
106
|
+
static_assert(sizeof(code_point) == sizeof(std::uint32_t));
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Functor that accepts codepoints and emits encoded UTF characters.
|
|
110
|
+
*/
|
|
111
|
+
template <typename CharT>
|
|
112
|
+
struct encode_fn_;
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Functor for encoding codepoints to UTF-8.
|
|
116
|
+
*/
|
|
67
117
|
template <>
|
|
68
|
-
struct
|
|
118
|
+
struct encode_fn_<u8_type>
|
|
69
119
|
{
|
|
70
|
-
inline next_chars<u8_type> operator()(
|
|
120
|
+
inline next_chars<u8_type> operator()(std::uint32_t cp) const noexcept
|
|
71
121
|
{
|
|
72
122
|
if (cp >= (1L << 16)) [[unlikely]]
|
|
73
123
|
{
|
|
@@ -92,199 +142,457 @@ struct fn_<u8_type>
|
|
|
92
142
|
return {static_cast<u8_type>(cp)};
|
|
93
143
|
}
|
|
94
144
|
}
|
|
145
|
+
|
|
146
|
+
inline next_chars<u8_type> operator()(detail::code_point cp) const noexcept
|
|
147
|
+
{
|
|
148
|
+
return operator()(cp.get());
|
|
149
|
+
}
|
|
95
150
|
};
|
|
96
151
|
|
|
97
152
|
/**
|
|
98
|
-
*
|
|
99
|
-
*
|
|
100
|
-
* Reads codepoints (i.e. UTF32/UCS4) and emits UTF8. Effectively used for
|
|
101
|
-
* conversion from numpy to qdb.
|
|
153
|
+
* Functor that accepts UTF encoded characters and yields codepoints.
|
|
102
154
|
*/
|
|
103
|
-
template <typename
|
|
104
|
-
|
|
105
|
-
class view_ : public ranges::view_facade<view_<OutCharT, Rng>, ranges::finite>
|
|
106
|
-
{
|
|
107
|
-
friend ranges::range_access;
|
|
108
|
-
using iterator_t = ranges::iterator_t<Rng>;
|
|
109
|
-
using value_type = OutCharT;
|
|
110
|
-
|
|
111
|
-
public:
|
|
112
|
-
view_() = default;
|
|
113
|
-
explicit view_(Rng && rng) noexcept
|
|
114
|
-
: rng_{rng}
|
|
115
|
-
, iter_{ranges::begin(rng)}
|
|
116
|
-
{}
|
|
117
|
-
|
|
118
|
-
constexpr inline bool equal(ranges::default_sentinel_t) const noexcept
|
|
119
|
-
{
|
|
120
|
-
return iter_ == ranges::end(rng_) && next_.empty();
|
|
121
|
-
}
|
|
155
|
+
template <typename CharT>
|
|
156
|
+
struct decode_fn_;
|
|
122
157
|
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
158
|
+
/**
|
|
159
|
+
* UTF-8 -> CodePoint functor.
|
|
160
|
+
*/
|
|
161
|
+
template <>
|
|
162
|
+
struct decode_fn_<u8_type>
|
|
163
|
+
{
|
|
164
|
+
template <ranges::input_iterator I>
|
|
165
|
+
inline detail::code_point operator()(I it) const noexcept
|
|
126
166
|
{
|
|
127
|
-
|
|
128
|
-
// easily separate read() and next() without taking a performance hit.
|
|
129
|
-
//
|
|
130
|
-
// As such, we'll const_cast ourselves to make our internal 'next' chars
|
|
131
|
-
// writable.
|
|
132
|
-
view_ * this_ = const_cast<view_ *>(this);
|
|
133
|
-
return this_->read();
|
|
134
|
-
};
|
|
167
|
+
std::uint32_t cp;
|
|
135
168
|
|
|
136
|
-
|
|
137
|
-
{
|
|
138
|
-
if (next_.empty() == true) [[likely]]
|
|
169
|
+
if (*it < 0x80) [[likely]]
|
|
139
170
|
{
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
171
|
+
cp = *it++;
|
|
172
|
+
}
|
|
173
|
+
else if ((*it & 0xe0) == 0xc0)
|
|
174
|
+
{
|
|
175
|
+
cp = ((long)(*it++ & 0x1f) << 6) | ((long)(*it++ & 0x3f) << 0);
|
|
176
|
+
}
|
|
177
|
+
else if ((*it & 0xf0) == 0xe0)
|
|
178
|
+
{
|
|
179
|
+
cp = ((long)(*it++ & 0x0f) << 12) | ((long)(*it++ & 0x3f) << 6)
|
|
180
|
+
| ((long)(*it++ & 0x3f) << 0);
|
|
181
|
+
}
|
|
182
|
+
else if ((*it & 0xf8) == 0xf0 && (*it <= 0xf4))
|
|
183
|
+
{
|
|
184
|
+
cp = ((long)(*it++ & 0x07) << 18) | ((long)(*it++ & 0x3f) << 12)
|
|
185
|
+
| ((long)(*it++ & 0x3f) << 6) | ((long)(*it++ & 0x3f) << 0);
|
|
186
|
+
}
|
|
187
|
+
else [[unlikely]]
|
|
188
|
+
{
|
|
189
|
+
cp = -1;
|
|
190
|
+
++it;
|
|
191
|
+
}
|
|
149
192
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
iterator_t iter_;
|
|
153
|
-
next_chars<OutCharT> next_;
|
|
154
|
-
fn_<OutCharT> encode_;
|
|
193
|
+
return detail::code_point{cp};
|
|
194
|
+
}
|
|
155
195
|
};
|
|
156
196
|
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
template <class Rng>
|
|
164
|
-
view_<u8_type, Rng> utf8_view(Rng && rng)
|
|
165
|
-
{
|
|
166
|
-
return view<u8_type, Rng>(std::forward<Rng>(rng));
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
}; // namespace encode
|
|
197
|
+
/**
|
|
198
|
+
* Counts how many UTF characters are represented by a code point.
|
|
199
|
+
*/
|
|
200
|
+
template <typename CharT>
|
|
201
|
+
struct count_fn_;
|
|
170
202
|
|
|
171
|
-
|
|
203
|
+
/**
|
|
204
|
+
* Counts how many UTF-8 characters are represented by a code point.
|
|
205
|
+
*/
|
|
206
|
+
template <>
|
|
207
|
+
struct count_fn_<u8_type>
|
|
172
208
|
{
|
|
209
|
+
inline std::size_t operator()(detail::code_point cp) const noexcept
|
|
210
|
+
{
|
|
211
|
+
if (cp.get() >= (1L << 16)) [[unlikely]]
|
|
212
|
+
{
|
|
213
|
+
return 4;
|
|
214
|
+
}
|
|
215
|
+
else if (cp.get() >= (1L << 11))
|
|
216
|
+
{
|
|
217
|
+
return 3;
|
|
218
|
+
}
|
|
219
|
+
else if (cp.get() >= (1L << 7))
|
|
220
|
+
{
|
|
221
|
+
return 2;
|
|
222
|
+
}
|
|
223
|
+
else [[likely]]
|
|
224
|
+
{
|
|
225
|
+
return 1;
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
};
|
|
173
229
|
|
|
230
|
+
/**
|
|
231
|
+
* Skips as many UTF characters as necessary to read a single code point.
|
|
232
|
+
*/
|
|
174
233
|
template <typename CharT>
|
|
175
|
-
struct
|
|
234
|
+
struct skip_fn_;
|
|
176
235
|
|
|
236
|
+
/**
|
|
237
|
+
* Skips as many UTF-8 characters as necessary to read a single code point.
|
|
238
|
+
*/
|
|
177
239
|
template <>
|
|
178
|
-
struct
|
|
240
|
+
struct skip_fn_<u8_type>
|
|
179
241
|
{
|
|
180
242
|
template <ranges::input_iterator I>
|
|
181
|
-
constexpr inline
|
|
243
|
+
constexpr inline void operator()(I & it) const noexcept
|
|
182
244
|
{
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
if (it[0] < 0x80) [[likely]]
|
|
245
|
+
if (*it < 0x80) [[likely]]
|
|
186
246
|
{
|
|
187
|
-
|
|
247
|
+
ranges::advance(it, 1);
|
|
188
248
|
}
|
|
189
|
-
else if ((it
|
|
249
|
+
else if ((*it & 0xe0) == 0xc0)
|
|
190
250
|
{
|
|
191
|
-
|
|
192
|
-
it += 2;
|
|
251
|
+
ranges::advance(it, 2);
|
|
193
252
|
}
|
|
194
|
-
else if ((it
|
|
253
|
+
else if ((*it & 0xf0) == 0xe0)
|
|
195
254
|
{
|
|
196
|
-
|
|
197
|
-
| ((long)(it[2] & 0x3f) << 0);
|
|
198
|
-
it += 3;
|
|
255
|
+
ranges::advance(it, 3);
|
|
199
256
|
}
|
|
200
|
-
else if ((it
|
|
257
|
+
else if ((*it & 0xf8) == 0xf0 && (*it <= 0xf4))
|
|
201
258
|
{
|
|
202
|
-
|
|
203
|
-
| ((long)(it[2] & 0x3f) << 6) | ((long)(it[3] & 0x3f) << 0);
|
|
204
|
-
it += 4;
|
|
259
|
+
ranges::advance(it, 4);
|
|
205
260
|
}
|
|
206
261
|
else [[unlikely]]
|
|
207
262
|
{
|
|
208
|
-
|
|
209
|
-
++it;
|
|
263
|
+
ranges::advance(it, 1);
|
|
210
264
|
}
|
|
265
|
+
}
|
|
266
|
+
};
|
|
211
267
|
|
|
212
|
-
|
|
268
|
+
namespace utf8
|
|
269
|
+
{
|
|
270
|
+
|
|
271
|
+
/**
|
|
272
|
+
* CodePoint -> UTF8 encoding view
|
|
273
|
+
*
|
|
274
|
+
* Reads codepoints and emits UTF8. Effectively used for conversion from numpy to qdb.
|
|
275
|
+
*/
|
|
276
|
+
template <typename Rng>
|
|
277
|
+
requires(concepts::forward_range_t<Rng, detail::code_point>) class encode_view_
|
|
278
|
+
: public ranges::view_facade<encode_view_<Rng>, ranges::finite>
|
|
279
|
+
{
|
|
280
|
+
friend ranges::range_access;
|
|
281
|
+
using iterator_t = ranges::iterator_t<Rng>;
|
|
282
|
+
using value_type = u8_type;
|
|
283
|
+
|
|
284
|
+
/**
|
|
285
|
+
* Use a separate cursor struct, so we can implement the range as a forward range (i.e.
|
|
286
|
+
* make the iterators weakly comparable).
|
|
287
|
+
*/
|
|
288
|
+
template <typename IterT, typename SentT>
|
|
289
|
+
struct cursor
|
|
290
|
+
{
|
|
291
|
+
private:
|
|
292
|
+
IterT iter_;
|
|
293
|
+
SentT last_;
|
|
294
|
+
next_chars<value_type> next_;
|
|
295
|
+
encode_fn_<value_type> encode_;
|
|
296
|
+
count_fn_<value_type> count_;
|
|
297
|
+
|
|
298
|
+
public:
|
|
299
|
+
cursor() = default;
|
|
300
|
+
cursor(IterT iter, SentT last)
|
|
301
|
+
: iter_{iter}
|
|
302
|
+
, last_{last}
|
|
303
|
+
{
|
|
304
|
+
next();
|
|
305
|
+
};
|
|
306
|
+
|
|
307
|
+
inline value_type read() const noexcept
|
|
308
|
+
{
|
|
309
|
+
return next_.top();
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
constexpr inline void next() noexcept
|
|
313
|
+
{
|
|
314
|
+
if (next_.empty() == false) [[likely]]
|
|
315
|
+
{
|
|
316
|
+
next_.pop();
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
if (iter_ != last_ && next_.empty() == true) [[likely]]
|
|
320
|
+
{
|
|
321
|
+
next_ = encode_(*(iter_++));
|
|
322
|
+
}
|
|
323
|
+
};
|
|
324
|
+
|
|
325
|
+
constexpr inline bool equal(cursor<IterT, SentT> const & rhs) const noexcept
|
|
326
|
+
{
|
|
327
|
+
return iter_ == rhs.iter_ && next_.empty() == rhs.next_.empty();
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
constexpr inline bool equal(ranges::default_sentinel_t) const noexcept
|
|
331
|
+
{
|
|
332
|
+
return iter_ == last_ && next_.empty();
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
inline ranges::iter_difference_t<IterT> distance_to(ranges::default_sentinel_t) const
|
|
336
|
+
{
|
|
337
|
+
// This is a hack, but we initialize the return value to `1` if we already read
|
|
338
|
+
// the next token in our constructor.
|
|
339
|
+
ranges::iter_difference_t<IterT> ret(next_.size());
|
|
340
|
+
|
|
341
|
+
// And finally, let's parse all the remaining tokens
|
|
342
|
+
for (IterT cur = iter_; cur != last_; ++cur)
|
|
343
|
+
{
|
|
344
|
+
ret += count_(*cur);
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
return ret;
|
|
348
|
+
}
|
|
349
|
+
};
|
|
350
|
+
|
|
351
|
+
public:
|
|
352
|
+
encode_view_() = default;
|
|
353
|
+
explicit encode_view_(Rng && rng) noexcept
|
|
354
|
+
: rng_{rng}
|
|
355
|
+
{}
|
|
356
|
+
|
|
357
|
+
inline decltype(auto) begin_cursor() const
|
|
358
|
+
{
|
|
359
|
+
auto beg = ranges::begin(rng_);
|
|
360
|
+
auto end = ranges::end(rng_);
|
|
361
|
+
return cursor<decltype(beg), decltype(end)>{beg, end};
|
|
213
362
|
}
|
|
363
|
+
|
|
364
|
+
public:
|
|
365
|
+
private:
|
|
366
|
+
Rng rng_;
|
|
214
367
|
};
|
|
215
368
|
|
|
216
369
|
/**
|
|
217
|
-
* UTF8->
|
|
370
|
+
* UTF8->CodePoint decoding view
|
|
218
371
|
*
|
|
219
372
|
* Reads UTF8 and decodes it into codepoints (i.e. UTF32/UCS4). Effectively
|
|
220
373
|
* used for conversion from qdb strings to numpy.
|
|
221
374
|
*/
|
|
222
375
|
|
|
223
|
-
template <typename
|
|
224
|
-
|
|
225
|
-
|
|
376
|
+
template <typename Rng>
|
|
377
|
+
requires(ranges::forward_range<Rng>) class decode_view_
|
|
378
|
+
: public ranges::view_facade<decode_view_<Rng>, ranges::finite>
|
|
226
379
|
{
|
|
227
380
|
friend ranges::range_access;
|
|
228
381
|
using iterator_t = ranges::iterator_t<Rng>;
|
|
229
|
-
using value_type =
|
|
382
|
+
using value_type = detail::code_point;
|
|
383
|
+
|
|
384
|
+
/**
|
|
385
|
+
* Use a separate cursor struct, so we can implement the range as a forward range (i.e.
|
|
386
|
+
* make the iterators weakly comparable).
|
|
387
|
+
*/
|
|
388
|
+
template <typename IterT, typename SentT>
|
|
389
|
+
struct cursor
|
|
390
|
+
{
|
|
391
|
+
private:
|
|
392
|
+
IterT iter_;
|
|
393
|
+
SentT last_;
|
|
394
|
+
decode_fn_<u8_type> decode_{};
|
|
395
|
+
skip_fn_<u8_type> skip_{};
|
|
396
|
+
|
|
397
|
+
public:
|
|
398
|
+
cursor() = default;
|
|
399
|
+
cursor(IterT iter, SentT last)
|
|
400
|
+
: iter_{iter}
|
|
401
|
+
, last_{last} {};
|
|
402
|
+
|
|
403
|
+
inline value_type read() const noexcept
|
|
404
|
+
{
|
|
405
|
+
// XXX(leon)
|
|
406
|
+
// This is somewhat inefficient, as we're peeking ahead trying to find the "next"
|
|
407
|
+
// codepoint, which may actually iterate forward more than one step.
|
|
408
|
+
//
|
|
409
|
+
// Then, when next() is requested, we _actually_ move ahead, and have to do the same
|
|
410
|
+
// (similar) checks again.
|
|
411
|
+
//
|
|
412
|
+
// However, after too much edge cases and debugging, I conclude that in order to be
|
|
413
|
+
// "really" compatible with ranges, you *must* separate these two, there's no way around
|
|
414
|
+
// it.
|
|
415
|
+
return decode_(iter_);
|
|
416
|
+
};
|
|
417
|
+
|
|
418
|
+
constexpr inline void next() noexcept
|
|
419
|
+
{
|
|
420
|
+
skip_(iter_);
|
|
421
|
+
};
|
|
422
|
+
|
|
423
|
+
constexpr inline bool equal(cursor<IterT, SentT> const & rhs) const noexcept
|
|
424
|
+
{
|
|
425
|
+
return iter_ == rhs.iter_;
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
constexpr inline bool equal(ranges::default_sentinel_t) const noexcept
|
|
429
|
+
{
|
|
430
|
+
return iter_ == last_;
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
inline ranges::iter_difference_t<iterator_t> distance_to(ranges::default_sentinel_t) const
|
|
434
|
+
{
|
|
435
|
+
return _distance_to(last_);
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
private:
|
|
439
|
+
template <typename T>
|
|
440
|
+
inline ranges::iter_difference_t<iterator_t> _distance_to(T const & other) const
|
|
441
|
+
{
|
|
442
|
+
ranges::iter_difference_t<iterator_t> ret{0};
|
|
443
|
+
|
|
444
|
+
for (IterT cur = iter_; cur != last_; skip_(cur))
|
|
445
|
+
{
|
|
446
|
+
++ret;
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
return ret;
|
|
450
|
+
}
|
|
451
|
+
};
|
|
230
452
|
|
|
231
453
|
// Lifecycle
|
|
232
454
|
public:
|
|
233
|
-
|
|
234
|
-
explicit
|
|
235
|
-
: rng_{rng}
|
|
236
|
-
, iter_{ranges::begin(rng)} {};
|
|
455
|
+
decode_view_() = default;
|
|
456
|
+
explicit decode_view_(Rng && rng) noexcept
|
|
457
|
+
: rng_{rng} {};
|
|
237
458
|
|
|
238
|
-
|
|
459
|
+
inline decltype(auto) begin_cursor() const
|
|
239
460
|
{
|
|
240
|
-
|
|
461
|
+
auto beg = ranges::begin(rng_);
|
|
462
|
+
auto end = ranges::end(rng_);
|
|
463
|
+
|
|
464
|
+
return cursor<decltype(beg), decltype(end)>{beg, end};
|
|
241
465
|
}
|
|
242
466
|
|
|
243
|
-
|
|
467
|
+
private:
|
|
468
|
+
Rng rng_;
|
|
469
|
+
};
|
|
470
|
+
|
|
471
|
+
}; // namespace utf8
|
|
472
|
+
|
|
473
|
+
}; // namespace detail
|
|
474
|
+
|
|
475
|
+
namespace utf8
|
|
476
|
+
{
|
|
477
|
+
|
|
478
|
+
struct encode_view_base_fn
|
|
479
|
+
{
|
|
480
|
+
template <typename Rng>
|
|
481
|
+
constexpr detail::utf8::encode_view_<Rng> operator()(Rng && rng) const
|
|
244
482
|
{
|
|
245
|
-
return
|
|
483
|
+
return detail::utf8::encode_view_<Rng>{std::forward<Rng>(rng)};
|
|
246
484
|
}
|
|
485
|
+
};
|
|
247
486
|
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
constexpr
|
|
487
|
+
struct encode_view_bind_fn
|
|
488
|
+
{
|
|
489
|
+
constexpr auto operator()() const
|
|
251
490
|
{
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
491
|
+
return ranges::make_view_closure(encode_view_base_fn{});
|
|
492
|
+
}
|
|
493
|
+
};
|
|
494
|
+
|
|
495
|
+
struct encode_view_fn
|
|
496
|
+
: encode_view_base_fn
|
|
497
|
+
, encode_view_bind_fn
|
|
498
|
+
{
|
|
499
|
+
using encode_view_base_fn::operator();
|
|
500
|
+
using encode_view_bind_fn::operator();
|
|
501
|
+
};
|
|
260
502
|
|
|
261
|
-
|
|
503
|
+
RANGES_INLINE_VARIABLE(encode_view_fn, encode_view)
|
|
504
|
+
|
|
505
|
+
struct decode_view_base_fn
|
|
506
|
+
{
|
|
507
|
+
/**
|
|
508
|
+
* Default case: our input range already has a 'perfect' u8_type (unsigned char)
|
|
509
|
+
*/
|
|
510
|
+
template <typename Rng>
|
|
511
|
+
requires(concepts::range_t<Rng, u8_type>) constexpr detail::utf8::decode_view_<Rng> operator()(
|
|
512
|
+
Rng && rng) const
|
|
262
513
|
{
|
|
263
|
-
return
|
|
264
|
-
}
|
|
514
|
+
return detail::utf8::decode_view_<Rng>{std::forward<Rng>(rng)};
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
/**
|
|
518
|
+
* Non-default case, which happens when e.g. using a std::string as a range: the underlying type is
|
|
519
|
+
* not exactly u8_type, but we can easily convert it to the appropriate type.
|
|
520
|
+
*
|
|
521
|
+
* This avoids a case where signed chars were interpreted as unsigned chars and all kinds of
|
|
522
|
+
* shenanigans, which is fixed here.
|
|
523
|
+
*/
|
|
524
|
+
template <typename Rng>
|
|
525
|
+
requires(
|
|
526
|
+
!concepts::range_t<Rng,
|
|
527
|
+
u8_type> && std::is_nothrow_convertible_v<ranges::range_value_t<Rng>, u8_type>) constexpr decltype(auto)
|
|
528
|
+
operator()(Rng && rng) const
|
|
529
|
+
{
|
|
530
|
+
auto xform_fn = [](auto x) -> u8_type { return static_cast<u8_type>(x); };
|
|
265
531
|
|
|
266
|
-
|
|
532
|
+
// Transform and delegate to the 'regular' operator()
|
|
533
|
+
return operator()(ranges::views::transform(rng, xform_fn));
|
|
534
|
+
}
|
|
535
|
+
};
|
|
267
536
|
|
|
268
|
-
|
|
537
|
+
struct decode_view_bind_fn
|
|
538
|
+
{
|
|
539
|
+
constexpr auto operator()() const
|
|
540
|
+
{
|
|
541
|
+
return ranges::make_view_closure(decode_view_base_fn{});
|
|
542
|
+
}
|
|
543
|
+
};
|
|
269
544
|
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
545
|
+
struct decode_view_fn
|
|
546
|
+
: decode_view_base_fn
|
|
547
|
+
, decode_view_bind_fn
|
|
548
|
+
{
|
|
549
|
+
using decode_view_base_fn::operator();
|
|
550
|
+
using decode_view_bind_fn::operator();
|
|
274
551
|
};
|
|
275
552
|
|
|
276
|
-
|
|
277
|
-
|
|
553
|
+
RANGES_INLINE_VARIABLE(decode_view_fn, decode_view)
|
|
554
|
+
|
|
555
|
+
}; // namespace utf8
|
|
556
|
+
|
|
557
|
+
namespace utf32
|
|
558
|
+
{
|
|
559
|
+
|
|
560
|
+
/**
|
|
561
|
+
* Returns view that reads codepoints and emits UTF-32 characters.
|
|
562
|
+
*/
|
|
563
|
+
inline decltype(auto) encode_view()
|
|
278
564
|
{
|
|
279
|
-
|
|
565
|
+
auto xform_fn = [](detail::code_point x) -> u32_type { return static_cast<u32_type>(x.get()); };
|
|
566
|
+
return ranges::views::transform(xform_fn);
|
|
280
567
|
}
|
|
281
568
|
|
|
569
|
+
/**
|
|
570
|
+
* Returns view that reads codepoints and emits UTF-32 characters.
|
|
571
|
+
*/
|
|
282
572
|
template <class Rng>
|
|
283
|
-
inline decltype(auto)
|
|
573
|
+
requires(concepts::input_range_t<Rng, detail::code_point>) inline decltype(auto) encode_view(Rng && rng)
|
|
574
|
+
{
|
|
575
|
+
return rng | encode_view();
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
/**
|
|
579
|
+
* Returns view that reads UTF-32 characters and emits codepoints.
|
|
580
|
+
*/
|
|
581
|
+
inline decltype(auto) decode_view()
|
|
582
|
+
{
|
|
583
|
+
auto xform_fn = [](u32_type x) -> detail::code_point { return detail::code_point{x}; };
|
|
584
|
+
return ranges::views::transform(xform_fn);
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
/**
|
|
588
|
+
* Returns view that reads UTF-32 characters and emits codepoints.
|
|
589
|
+
*/
|
|
590
|
+
template <typename Rng>
|
|
591
|
+
inline decltype(auto) decode_view(Rng && rng)
|
|
284
592
|
{
|
|
285
|
-
return
|
|
593
|
+
return rng | decode_view();
|
|
286
594
|
}
|
|
287
595
|
|
|
288
|
-
}; // namespace
|
|
596
|
+
}; // namespace utf32
|
|
289
597
|
|
|
290
598
|
}; // namespace qdb::convert::unicode
|