oinky 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +22 -0
- data/README.md +141 -0
- data/ext/extconf.rb +79 -0
- data/ext/include/oinky.h +424 -0
- data/ext/include/oinky.hpp +63 -0
- data/ext/include/oinky/nky_base.hpp +1116 -0
- data/ext/include/oinky/nky_core.hpp +1603 -0
- data/ext/include/oinky/nky_cursor.hpp +665 -0
- data/ext/include/oinky/nky_dialect.hpp +107 -0
- data/ext/include/oinky/nky_error.hpp +164 -0
- data/ext/include/oinky/nky_fixed_table.hpp +710 -0
- data/ext/include/oinky/nky_handle.hpp +334 -0
- data/ext/include/oinky/nky_index.hpp +1038 -0
- data/ext/include/oinky/nky_log.hpp +15 -0
- data/ext/include/oinky/nky_merge_itr.hpp +403 -0
- data/ext/include/oinky/nky_model.hpp +110 -0
- data/ext/include/oinky/nky_pool.hpp +760 -0
- data/ext/include/oinky/nky_public.hpp +808 -0
- data/ext/include/oinky/nky_serializer.hpp +1625 -0
- data/ext/include/oinky/nky_strtable.hpp +504 -0
- data/ext/include/oinky/nky_table.hpp +1996 -0
- data/ext/nky_lib.cpp +390 -0
- data/ext/nky_lib_core.hpp +212 -0
- data/ext/nky_lib_index.cpp +158 -0
- data/ext/nky_lib_table.cpp +224 -0
- data/lib/oinky.rb +1284 -0
- data/lib/oinky/compiler.rb +106 -0
- data/lib/oinky/cpp_emitter.rb +311 -0
- data/lib/oinky/dsl.rb +167 -0
- data/lib/oinky/error.rb +19 -0
- data/lib/oinky/modelbase.rb +12 -0
- data/lib/oinky/nbuffer.rb +152 -0
- data/lib/oinky/normalize.rb +132 -0
- data/lib/oinky/oc_builder.rb +44 -0
- data/lib/oinky/query.rb +193 -0
- data/lib/oinky/rb_emitter.rb +147 -0
- data/lib/oinky/shard.rb +40 -0
- data/lib/oinky/testsup.rb +104 -0
- data/lib/oinky/version.rb +9 -0
- data/oinky.gemspec +36 -0
- metadata +120 -0
@@ -0,0 +1,63 @@
|
|
1
|
+
// This source is distributed under the terms of the MIT License. Refer
|
2
|
+
// to the 'LICENSE' file for details.
|
3
|
+
//
|
4
|
+
// Copyright (c) Jacob Lacouture, 2012
|
5
|
+
|
6
|
+
// This is the C++ header for Oinky. It provides a header-only implementation
|
7
|
+
// of Oinky, requiring no library.
|
8
|
+
|
9
|
+
#ifndef OINKY_HPP_INCLUDED
|
10
|
+
#define OINKY_HPP_INCLUDED
|
11
|
+
|
12
|
+
//stdc++
|
13
|
+
#include <cstdio>
|
14
|
+
#include <map>
|
15
|
+
#include <memory>
|
16
|
+
//TR1
|
17
|
+
#include <boost/tr1/tr1/unordered_map>
|
18
|
+
// This is for shared_ptr
|
19
|
+
#include <boost/tr1/tr1/memory>
|
20
|
+
//#include <unordered_map>
|
21
|
+
//BOOST
|
22
|
+
#include <boost/bind.hpp>
|
23
|
+
#include <boost/date_time.hpp>
|
24
|
+
#include <boost/detail/endian.hpp>
|
25
|
+
#include <boost/foreach.hpp>
|
26
|
+
#include <boost/function.hpp>
|
27
|
+
#include <boost/intrusive/list.hpp>
|
28
|
+
#include <boost/intrusive/avl_set.hpp>
|
29
|
+
#include <boost/intrusive/detail/rbtree_node.hpp>
|
30
|
+
#include <boost/intrusive/rbtree_algorithms.hpp>
|
31
|
+
#include <boost/intrusive/set.hpp>
|
32
|
+
#include <boost/iterator/indirect_iterator.hpp>
|
33
|
+
#include <boost/iterator/transform_iterator.hpp>
|
34
|
+
#include <boost/iterator/filter_iterator.hpp>
|
35
|
+
#include <boost/make_shared.hpp>
|
36
|
+
#include <boost/noncopyable.hpp>
|
37
|
+
#include <boost/static_assert.hpp>
|
38
|
+
#include <boost/strong_typedef.hpp>
|
39
|
+
#include <boost/system/error_code.hpp>
|
40
|
+
#include <boost/system/system_error.hpp>
|
41
|
+
#include <boost/throw_exception.hpp>
|
42
|
+
#include <boost/type_traits.hpp>
|
43
|
+
#include <boost/utility.hpp>
|
44
|
+
|
45
|
+
#include "oinky/nky_dialect.hpp"
|
46
|
+
#include "oinky/nky_error.hpp"
|
47
|
+
#include "oinky/nky_public.hpp"
|
48
|
+
#include "oinky/nky_pool.hpp"
|
49
|
+
#include "oinky/nky_merge_itr.hpp"
|
50
|
+
#include "oinky/nky_base.hpp"
|
51
|
+
#include "oinky/nky_strtable.hpp"
|
52
|
+
#include "oinky/nky_core.hpp"
|
53
|
+
#include "oinky/nky_fixed_table.hpp"
|
54
|
+
#include "oinky/nky_cursor.hpp"
|
55
|
+
#include "oinky/nky_index.hpp"
|
56
|
+
#include "oinky/nky_table.hpp"
|
57
|
+
#include "oinky/nky_handle.hpp"
|
58
|
+
#include "oinky/nky_serializer.hpp"
|
59
|
+
#include "oinky/nky_model.hpp"
|
60
|
+
|
61
|
+
//#ifndef OINKY_HPP_INCLUDED
|
62
|
+
#endif
|
63
|
+
|
@@ -0,0 +1,1116 @@
|
|
1
|
+
// This source is distributed under the terms of the MIT License. Refer
|
2
|
+
// to the 'LICENSE' file for details.
|
3
|
+
//
|
4
|
+
// Copyright (c) Jacob Lacouture, 2012
|
5
|
+
|
6
|
+
// This is a serialization engine for a SQL-like database.
|
7
|
+
//
|
8
|
+
// A SQL database implementation is often separated between storage
|
9
|
+
// engine and query engine. We further subdivide the storage engine into
|
10
|
+
// two layers: serialization and persistence. This layering is not often
|
11
|
+
// always a good one for what I would call "online databases," which are
|
12
|
+
// designed to support a large number of concurrent writes, while minimizing
|
13
|
+
// I/O. Such implementations can benefit from unifying the serialization/
|
14
|
+
// persistence processes, using techniques like logging, etc.
|
15
|
+
//
|
16
|
+
// The separation chosen here is designed for a more specific scenario:
|
17
|
+
// 1. Reads are optimized over writes.
|
18
|
+
// 2. Seek/addressing IO is optimized over raw byte IO throughput.
|
19
|
+
// 3. Extremely large dataset, with well understood partitioning
|
20
|
+
// (suitable for sharding).
|
21
|
+
//
|
22
|
+
|
23
|
+
|
24
|
+
namespace Oinky
|
25
|
+
{
|
26
|
+
using namespace Oinky::Errors;
|
27
|
+
using namespace Oinky::Utils;
|
28
|
+
|
29
|
+
namespace Internal
|
30
|
+
{
|
31
|
+
|
32
|
+
template<typename TABLE_CTX>
|
33
|
+
class column_selector_template;
|
34
|
+
|
35
|
+
template<typename DB>
|
36
|
+
class table_ctx_t;
|
37
|
+
|
38
|
+
template<typename TABLE_CTX>
|
39
|
+
class index_ctx_t;
|
40
|
+
|
41
|
+
template<typename TABLE_CTX>
|
42
|
+
class table_handle_t;
|
43
|
+
|
44
|
+
// No practical limit on the number of strings.
|
45
|
+
typedef uint32 base_strtable_ref;
|
46
|
+
|
47
|
+
// There were a few reasons for separating the metastrings table and the
|
48
|
+
// userstrings table. One was that the metastrings table would be modified
|
49
|
+
// less often (only on schema change) so could usually be serialized more
|
50
|
+
// efficiently, but it's so much smaller that I doubt it matters. Another
|
51
|
+
// was that meta-string lookup time would be more important to performance,
|
52
|
+
// but I doubt that too. At this point, I also don't see a strong argument
|
53
|
+
// for merging them into the same table, so separated they remain.
|
54
|
+
//
|
55
|
+
// The table reference types are identical. However, these typedefs keep
|
56
|
+
// us from accidentally assigning a reference from the the wrong table.
|
57
|
+
BOOST_STRONG_TYPEDEF(base_strtable_ref, u_strtable_ref);
|
58
|
+
BOOST_STRONG_TYPEDEF(base_strtable_ref, m_strtable_ref);
|
59
|
+
|
60
|
+
// This limits us to 65535 columns.
|
61
|
+
BOOST_STRONG_TYPEDEF(uint16,column_idx_t);
|
62
|
+
// This limits us to 65535 indexes.
|
63
|
+
BOOST_STRONG_TYPEDEF(uint16,index_idx_t);
|
64
|
+
// Actual indices are stored with variable width row indexes.
|
65
|
+
BOOST_STRONG_TYPEDEF(uint32,row_idx_t);
|
66
|
+
|
67
|
+
|
68
|
+
} //namespace Internal
|
69
|
+
|
70
|
+
|
71
|
+
namespace Serialization
|
72
|
+
{
|
73
|
+
struct v1_sformat_tag {};
|
74
|
+
|
75
|
+
template<typename TARGET, typename SCHEME>
|
76
|
+
class Serializer
|
77
|
+
{
|
78
|
+
BOOST_STATIC_ASSERT(sizeof(TARGET) != sizeof(TARGET));
|
79
|
+
};
|
80
|
+
|
81
|
+
|
82
|
+
template<typename TARGET>
|
83
|
+
class NativeSerializer
|
84
|
+
{
|
85
|
+
public:
|
86
|
+
// Native types also support non-bounds-checked unpack. The caller
|
87
|
+
// has already checked the buffer type (for an array, perhaps),
|
88
|
+
// and need not check it for every element.
|
89
|
+
inline static void pack_nbc(const TARGET &s, char *buffer_start)
|
90
|
+
{
|
91
|
+
*(TARGET *)buffer_start = s;
|
92
|
+
}
|
93
|
+
inline static void unpack_nbc(TARGET *t, const char *buffer)
|
94
|
+
{
|
95
|
+
*t = * (const TARGET *) buffer;
|
96
|
+
}
|
97
|
+
};
|
98
|
+
|
99
|
+
// This is a dumb serializer concept. It stands for non-bounds-checked.
|
100
|
+
//
|
101
|
+
// It doe the actual work of pack/unpack after the bounds check has been
|
102
|
+
// done, perhaps by a derived template, or by the owner of an array
|
103
|
+
// of such types.
|
104
|
+
template<typename TARGET>
|
105
|
+
class NBCSerializer
|
106
|
+
{
|
107
|
+
// This must be specialized for each type/platform. It can be either
|
108
|
+
// a NativeSerializer or a ByteSwapSerializer.
|
109
|
+
BOOST_STATIC_ASSERT(sizeof(TARGET) != sizeof(TARGET));
|
110
|
+
};
|
111
|
+
|
112
|
+
#define OINKY_USE_NATIVE_NBC_SERIALIZER(target) \
|
113
|
+
template<> class NBCSerializer<target> : \
|
114
|
+
public NativeSerializer<target> {};
|
115
|
+
|
116
|
+
template<typename TARGET>
|
117
|
+
class FixedSerializer : public NBCSerializer<TARGET>
|
118
|
+
{
|
119
|
+
public:
|
120
|
+
using NBCSerializer<TARGET>::pack_nbc;
|
121
|
+
using NBCSerializer<TARGET>::unpack_nbc;
|
122
|
+
|
123
|
+
// Native serialization is independent of value. Not all serializers are.
|
124
|
+
inline static uint32 virtual_pack() {
|
125
|
+
return sizeof(TARGET);
|
126
|
+
}
|
127
|
+
inline static uint32 virtual_pack(const TARGET &s) {
|
128
|
+
return sizeof(TARGET);
|
129
|
+
}
|
130
|
+
|
131
|
+
inline static void pack(const TARGET &s, char *&buffer_start, const char *buffer_end)
|
132
|
+
{
|
133
|
+
if (buffer_end - buffer_start < sizeof(TARGET)) {
|
134
|
+
throw_error(buffer_overflow());
|
135
|
+
}
|
136
|
+
pack_nbc(s, buffer_start);
|
137
|
+
buffer_start += sizeof(TARGET);
|
138
|
+
}
|
139
|
+
|
140
|
+
inline static void unpack(TARGET *t, const char *buffer, uint32 buflen, uint32 last_end, uint32 &this_end)
|
141
|
+
{
|
142
|
+
if (last_end + sizeof(TARGET) > buflen) {
|
143
|
+
throw_error(buffer_underflow());
|
144
|
+
}
|
145
|
+
unpack_nbc(t, buffer + last_end);
|
146
|
+
this_end = last_end + sizeof(TARGET);
|
147
|
+
}
|
148
|
+
};
|
149
|
+
|
150
|
+
// This only advances this_end if the string matches.
|
151
|
+
static bool check_bytes(const char *bytes, uint32 count, const char *buffer, uint32 buflen, uint32 last_end, uint32 &this_end)
|
152
|
+
{
|
153
|
+
if (count + last_end > buflen) {
|
154
|
+
throw_error(buffer_underflow());
|
155
|
+
}
|
156
|
+
if (0 == memcmp(bytes, buffer + last_end, count)) {
|
157
|
+
this_end = last_end + count;
|
158
|
+
return true;
|
159
|
+
}
|
160
|
+
return false;
|
161
|
+
}
|
162
|
+
|
163
|
+
#define OINKY_ENABLE_FIXED_SERIALIZATION(type,scheme) \
|
164
|
+
template<> class Serializer<type,scheme> : \
|
165
|
+
public FixedSerializer<type> {};
|
166
|
+
|
167
|
+
#define OINKY_INHERIT_NBC_SERIALIZER(derived,base) \
|
168
|
+
template<> class NBCSerializer<derived> { \
|
169
|
+
public: \
|
170
|
+
BOOST_STATIC_ASSERT(sizeof(base) == sizeof(derived)); \
|
171
|
+
inline static void pack_nbc(const derived &s, char *buffer) { \
|
172
|
+
NBCSerializer<base>::pack_nbc(*(const base *)&s,buffer); \
|
173
|
+
} \
|
174
|
+
inline static void unpack_nbc(derived *t, const char *buffer) { \
|
175
|
+
NBCSerializer<base>::unpack_nbc((base *)t,buffer); \
|
176
|
+
} \
|
177
|
+
};
|
178
|
+
|
179
|
+
#define OINKY_INHERIT_FIXED_SERIALIZER(derived,base,scheme) \
|
180
|
+
OINKY_INHERIT_NBC_SERIALIZER(derived,base) \
|
181
|
+
OINKY_ENABLE_FIXED_SERIALIZATION(derived,scheme) \
|
182
|
+
|
183
|
+
// This is a simple helper which infers the target type.
|
184
|
+
template<typename SCHEME, typename T>
|
185
|
+
inline void unpack(T* target, const char *buffer, uint32 buflen, uint32 last_end, uint32 &this_end) {
|
186
|
+
Serializer<T,SCHEME>::unpack(target, buffer, buflen, last_end, this_end);
|
187
|
+
}
|
188
|
+
template<typename SCHEME, typename T>
|
189
|
+
inline void unpack_nbc(T* target, const char *buffer) {
|
190
|
+
Serializer<T,SCHEME>::unpack_nbc(target, buffer);
|
191
|
+
}
|
192
|
+
template<typename SCHEME, typename T>
|
193
|
+
inline void pack(const T&s, char *&buffer_start, const char *buffer_end) {
|
194
|
+
Serializer<T,SCHEME>::pack(s, buffer_start, buffer_end);
|
195
|
+
}
|
196
|
+
template<typename SCHEME, typename T>
|
197
|
+
inline void pack_nbc(const T&s, char *buffer_start) {
|
198
|
+
Serializer<T,SCHEME>::pack_nbc(s, buffer_start);
|
199
|
+
}
|
200
|
+
template<typename SCHEME, typename T>
|
201
|
+
inline uint32 virtual_pack(const T&s) {
|
202
|
+
return Serializer<T,SCHEME>::virtual_pack(s);
|
203
|
+
}
|
204
|
+
|
205
|
+
// Bool is more specific than uint8
|
206
|
+
template<>
|
207
|
+
class NBCSerializer<bool>
|
208
|
+
{
|
209
|
+
public:
|
210
|
+
static inline void pack_nbc(const bool &s, char *buffer_start)
|
211
|
+
{
|
212
|
+
*(uint8 *)buffer_start = s ? 1 : 0;
|
213
|
+
}
|
214
|
+
static inline void unpack_nbc(bool *t, const char *buffer) {
|
215
|
+
int8 value = *buffer;
|
216
|
+
if (value == 1) {
|
217
|
+
*t = true;
|
218
|
+
} else if (value == 0) {
|
219
|
+
*t = false;
|
220
|
+
} else {
|
221
|
+
throw_error(bad_encoding());
|
222
|
+
}
|
223
|
+
}
|
224
|
+
};
|
225
|
+
|
226
|
+
|
227
|
+
|
228
|
+
// ##################
|
229
|
+
// Enable for testing. This isn't a great test, but it will catch any
|
230
|
+
// cases where you forget an odd number of times to reverse bytes.
|
231
|
+
//#define OINKY_REVERSE_NATIVE_ENDIAN_TEST
|
232
|
+
// ##################
|
233
|
+
|
234
|
+
|
235
|
+
// we use little endian encoding unless we're testing
|
236
|
+
#if defined(BOOST_BIG_ENDIAN)
|
237
|
+
|
238
|
+
#ifndef OINKY_REVERSE_NATIVE_ENDIAN_TEST
|
239
|
+
#define OINKY_REVERSE_ENDIAN
|
240
|
+
#define OINKY_LITTLE_ENDIAN_ENCODING
|
241
|
+
#else
|
242
|
+
#define OINKY_BIG_ENDIAN_ENCODING
|
243
|
+
#endif
|
244
|
+
|
245
|
+
//#ifdef BOOST_BIG_ENDIAN
|
246
|
+
#elif defined(BOOST_LITTLE_ENDIAN)
|
247
|
+
|
248
|
+
#ifdef OINKY_REVERSE_NATIVE_ENDIAN_TEST
|
249
|
+
#define OINKY_REVERSE_ENDIAN
|
250
|
+
#define OINKY_BIG_ENDIAN_ENCODING
|
251
|
+
#else
|
252
|
+
#define OINKY_LITTLE_ENDIAN_ENCODING
|
253
|
+
#endif
|
254
|
+
|
255
|
+
//#ifdef BOOST_BIG_ENDIAN / #elif defined(BOOST_LITTLE_ENDIAN)
|
256
|
+
#else
|
257
|
+
#error "Unrecognized architecture. Cannot define serializer."
|
258
|
+
#endif
|
259
|
+
|
260
|
+
// universal
|
261
|
+
OINKY_USE_NATIVE_NBC_SERIALIZER(uint8)
|
262
|
+
|
263
|
+
#ifndef OINKY_REVERSE_ENDIAN
|
264
|
+
OINKY_USE_NATIVE_NBC_SERIALIZER(uint64)
|
265
|
+
OINKY_USE_NATIVE_NBC_SERIALIZER(uint32)
|
266
|
+
OINKY_USE_NATIVE_NBC_SERIALIZER(uint16)
|
267
|
+
#else
|
268
|
+
template<> class NBCSerializer<uint16> {
|
269
|
+
public:
|
270
|
+
static inline void pack_nbc(const uint16 &s, char *buffer_start) {
|
271
|
+
*(uint16 *)buffer_start = (s >> 8) | (s << 8);
|
272
|
+
}
|
273
|
+
static inline void unpack_nbc(uint16 *t, const char *buffer) {
|
274
|
+
uint16 x = *(const uint16 *)buffer;
|
275
|
+
*t = (x >> 8) | (x << 8);
|
276
|
+
}
|
277
|
+
};
|
278
|
+
template<> class NBCSerializer<uint32> {
|
279
|
+
public:
|
280
|
+
static inline void pack_nbc(const uint32 &s, char *buffer_start) {
|
281
|
+
*(int32 *)buffer_start = __builtin_bswap32((int32)s);
|
282
|
+
}
|
283
|
+
static inline void unpack_nbc(uint32 *t, const char *buffer) {
|
284
|
+
*t = (uint32) __builtin_bswap32(*(int32 *)buffer);
|
285
|
+
}
|
286
|
+
};
|
287
|
+
template<> class NBCSerializer<uint64> {
|
288
|
+
public:
|
289
|
+
static inline void pack_nbc(const uint64 &s, char *buffer_start) {
|
290
|
+
*(int64 *)buffer_start = __builtin_bswap64((int64)s);
|
291
|
+
}
|
292
|
+
static inline void unpack_nbc(uint64 *t, const char *buffer) {
|
293
|
+
*t = (uint64) __builtin_bswap64(*(int64 *)buffer);
|
294
|
+
}
|
295
|
+
};
|
296
|
+
#endif
|
297
|
+
|
298
|
+
// We use the fixed serializer for all of these.
|
299
|
+
OINKY_ENABLE_FIXED_SERIALIZATION(uint64,v1_sformat_tag)
|
300
|
+
OINKY_ENABLE_FIXED_SERIALIZATION(uint32,v1_sformat_tag)
|
301
|
+
OINKY_ENABLE_FIXED_SERIALIZATION(uint16,v1_sformat_tag)
|
302
|
+
OINKY_ENABLE_FIXED_SERIALIZATION(uint8,v1_sformat_tag)
|
303
|
+
|
304
|
+
// All of these types derive from the 4 basic NBC serializers.
|
305
|
+
OINKY_INHERIT_FIXED_SERIALIZER(int64,uint64,v1_sformat_tag)
|
306
|
+
OINKY_INHERIT_FIXED_SERIALIZER(int32,uint32,v1_sformat_tag)
|
307
|
+
OINKY_INHERIT_FIXED_SERIALIZER(int16,uint16,v1_sformat_tag)
|
308
|
+
OINKY_INHERIT_FIXED_SERIALIZER(int8,uint8,v1_sformat_tag)
|
309
|
+
|
310
|
+
// Bool has its own NBC serializer, defined above.
|
311
|
+
OINKY_ENABLE_FIXED_SERIALIZATION(bool,v1_sformat_tag)
|
312
|
+
|
313
|
+
// Now the fake types (identical to above)
|
314
|
+
using namespace Oinky::Internal;
|
315
|
+
OINKY_INHERIT_FIXED_SERIALIZER(datetime_t,int64,v1_sformat_tag)
|
316
|
+
OINKY_INHERIT_FIXED_SERIALIZER(u_strtable_ref,uint32,v1_sformat_tag)
|
317
|
+
OINKY_INHERIT_FIXED_SERIALIZER(m_strtable_ref,uint32,v1_sformat_tag)
|
318
|
+
OINKY_INHERIT_FIXED_SERIALIZER(column_idx_t,uint16,v1_sformat_tag)
|
319
|
+
OINKY_INHERIT_FIXED_SERIALIZER(index_idx_t,uint16,v1_sformat_tag)
|
320
|
+
OINKY_INHERIT_FIXED_SERIALIZER(row_idx_t,uint32,v1_sformat_tag)
|
321
|
+
|
322
|
+
OINKY_INHERIT_FIXED_SERIALIZER(float32,int32,v1_sformat_tag)
|
323
|
+
OINKY_INHERIT_FIXED_SERIALIZER(float64,int64,v1_sformat_tag)
|
324
|
+
|
325
|
+
} //namespace Serialization
|
326
|
+
|
327
|
+
|
328
|
+
namespace Internal
|
329
|
+
{
|
330
|
+
|
331
|
+
typedef boost::intrusive::avl_set_base_hook<> strmap_hook;
|
332
|
+
|
333
|
+
template<typename REF_T>
|
334
|
+
class indirect_string_header_t : public strmap_hook, boost::noncopyable
|
335
|
+
{
|
336
|
+
typedef indirect_string_header_t header_t;
|
337
|
+
friend class stringtable_t<REF_T>;
|
338
|
+
|
339
|
+
// This is meaningless at first, but the serializer later uses this
|
340
|
+
// to cache the reference value that will be used in serialization.
|
341
|
+
// Only during the serialization process is this field used.
|
342
|
+
REF_T _ref;
|
343
|
+
uint32 _length;
|
344
|
+
|
345
|
+
indirect_string_header_t(const char *str, uint32 length) : _ref(0), _length(length) {
|
346
|
+
memcpy(((char *)this) + sizeof(header_t), str, length);
|
347
|
+
}
|
348
|
+
|
349
|
+
struct compare_header {
|
350
|
+
bool operator()(const header_t &left, const header_t &right) const {
|
351
|
+
return left.as_string() < right.as_string();
|
352
|
+
}
|
353
|
+
bool operator()(const db_string &left, const header_t &right) const {
|
354
|
+
return left < right.as_string();
|
355
|
+
}
|
356
|
+
bool operator()(const header_t &left, const db_string &right) const {
|
357
|
+
return left.as_string() < right;
|
358
|
+
}
|
359
|
+
};
|
360
|
+
|
361
|
+
public:
|
362
|
+
db_string as_string() const { return db_string(begin(), _length); }
|
363
|
+
|
364
|
+
const char *begin() const { return ((const char *) this) + sizeof(header_t); }
|
365
|
+
const char *end() const { return begin() + _length; }
|
366
|
+
uint32 length() const { return _length; }
|
367
|
+
|
368
|
+
// The live contexts.
|
369
|
+
typedef boost::intrusive::avl_set<
|
370
|
+
header_t,
|
371
|
+
boost::intrusive::compare<compare_header> > strmap_t;
|
372
|
+
typedef typename strmap_t::iterator strmap_itr_t;
|
373
|
+
|
374
|
+
// Given a tree and a string, return a pre-existing entry from the tree
|
375
|
+
// or create/insert a new one. Always returns a valid entry.
|
376
|
+
template<typename ALLOC>
|
377
|
+
static header_t *prepare(ALLOC &alloc, strmap_t &treehead, const db_string &str) {
|
378
|
+
// Search/prepare insert in one op.
|
379
|
+
typename strmap_t::insert_commit_data commit;
|
380
|
+
std::pair<strmap_itr_t,bool> res = treehead.insert_check(
|
381
|
+
str,
|
382
|
+
compare_header(),
|
383
|
+
commit);
|
384
|
+
|
385
|
+
// res.second tells us if a value CAN be inserted. False indicates that
|
386
|
+
// the iterator is valid. It is synonymous with the PRE-condition of map.insert
|
387
|
+
if (!res.second) {
|
388
|
+
OINKY_ASSERT(res.first->as_string() == str);
|
389
|
+
return &(*res.first);
|
390
|
+
}
|
391
|
+
|
392
|
+
header_t *x = (header_t *) alloc->malloc(sizeof(header_t) + str.length());
|
393
|
+
// Init
|
394
|
+
::new(x) header_t(str.begin(), str.length());
|
395
|
+
// Insert
|
396
|
+
treehead.insert_commit(*x, commit);
|
397
|
+
return x;
|
398
|
+
}
|
399
|
+
|
400
|
+
static void clear_reference_counts(strmap_t &map)
|
401
|
+
{
|
402
|
+
strmap_itr_t i = map.begin();
|
403
|
+
strmap_itr_t end = map.end();
|
404
|
+
for (;i != end; ++i) {
|
405
|
+
i->_ref = 0;
|
406
|
+
}
|
407
|
+
}
|
408
|
+
};
|
409
|
+
|
410
|
+
// Once a string is inserted into the database, we allocate space for it in the
|
411
|
+
// private heap, and do not reference it thereafter. Thus, this has no reference.
|
412
|
+
// The string's lifetime is equivalent to that of the DB instance.
|
413
|
+
//
|
414
|
+
// This structure can point to strings in the serialized string table, or to
|
415
|
+
// newly inserted strings, which have not yet been given a position in the
|
416
|
+
// table.
|
417
|
+
//
|
418
|
+
// This is identical to the db_string, except a string of this type
|
419
|
+
// can be trusted to be stored in the DB heap before it is constructed.
|
420
|
+
// Therefore, any method which may save the string value (such as table-insert)
|
421
|
+
// must demand this parameter type, while methods such as find() may require
|
422
|
+
// only the unsafe, db_string type.
|
423
|
+
//
|
424
|
+
template<typename REF_T>
|
425
|
+
class u_string_value_safe
|
426
|
+
{
|
427
|
+
friend class stringtable_t<REF_T>;
|
428
|
+
friend class safe_cv_t;
|
429
|
+
typedef u_string_value_safe<REF_T> safestr_t;
|
430
|
+
typedef indirect_string_header_t<REF_T> header_t;
|
431
|
+
|
432
|
+
// If this is a new string, this will be non-null. If it's from the
|
433
|
+
// stringtable, this will be NULL, and the db_string's reference will be
|
434
|
+
// valid.
|
435
|
+
header_t *newstr;
|
436
|
+
db_string str;
|
437
|
+
|
438
|
+
public:
|
439
|
+
// Possibly it will be neither, if it is uninitialized.
|
440
|
+
bool is_new() const { return newstr; }
|
441
|
+
bool is_old() const { return str.xref != (REF_T) 0; }
|
442
|
+
|
443
|
+
REF_T sref() const {
|
444
|
+
OINKY_ASSERT(is_old());
|
445
|
+
return (REF_T) str.xref;
|
446
|
+
}
|
447
|
+
|
448
|
+
const db_string &as_string() const {
|
449
|
+
return str;
|
450
|
+
}
|
451
|
+
operator const db_string &() const {
|
452
|
+
return str;
|
453
|
+
}
|
454
|
+
const db_string *operator->() const {
|
455
|
+
return &str;
|
456
|
+
}
|
457
|
+
|
458
|
+
u_string_value_safe() : newstr(NULL) {}
|
459
|
+
|
460
|
+
template<typename T>
|
461
|
+
bool operator==(const T &other) const { return compare_to(other) == 0; }
|
462
|
+
template<typename T>
|
463
|
+
bool operator!=(const T &other) const { return compare_to(other) != 0; }
|
464
|
+
template<typename T>
|
465
|
+
bool operator<(const T &other) const { return compare_to(other) < 0; }
|
466
|
+
template<typename T>
|
467
|
+
bool operator<=(const T &other) const { return compare_to(other) <= 0; }
|
468
|
+
template<typename T>
|
469
|
+
bool operator>(const T &other) const { return compare_to(other) > 0; }
|
470
|
+
template<typename T>
|
471
|
+
bool operator>=(const T &other) const { return compare_to(other) >= 0; }
|
472
|
+
|
473
|
+
int compare_to(const db_string &other) const {
|
474
|
+
return str.compare_to(other);
|
475
|
+
}
|
476
|
+
|
477
|
+
int compare_to(const safestr_t &other) const {
|
478
|
+
if (is_old() && other.is_old()) {
|
479
|
+
if (sref() < other.sref()) return -1;
|
480
|
+
if (sref() > other.sref()) return 1;
|
481
|
+
return 0;
|
482
|
+
}
|
483
|
+
return str.compare_to(other.str);
|
484
|
+
}
|
485
|
+
};
|
486
|
+
|
487
|
+
typedef u_string_value_safe<u_strtable_ref> ustring_safe;
|
488
|
+
typedef u_string_value_safe<m_strtable_ref> mstring_safe;
|
489
|
+
|
490
|
+
// Safe column value just means localizing any string values. Everything
|
491
|
+
// else is a straightforward copy.
|
492
|
+
class safe_cv_t
|
493
|
+
{
|
494
|
+
friend class stringtable_t<u_strtable_ref>;
|
495
|
+
|
496
|
+
// Column values are always user strings.
|
497
|
+
typedef indirect_string_header_t<u_strtable_ref> header_t;
|
498
|
+
|
499
|
+
// This will only be valid (non-null) if the value is a string, and
|
500
|
+
// it is new. (Highly similar to u_string_value_safe;
|
501
|
+
header_t *newstr;
|
502
|
+
variant_cv_t _value;
|
503
|
+
public:
|
504
|
+
safe_cv_t() : newstr(NULL) {}
|
505
|
+
|
506
|
+
safe_cv_t(const ustring_safe &str) : newstr(str.newstr), _value(str) {}
|
507
|
+
|
508
|
+
safe_cv_t(bool v) : newstr(NULL), _value(v) {}
|
509
|
+
safe_cv_t(int8 v) : newstr(NULL), _value(v) {}
|
510
|
+
safe_cv_t(int16 v) : newstr(NULL), _value(v) {}
|
511
|
+
safe_cv_t(int32 v) : newstr(NULL), _value(v) {}
|
512
|
+
safe_cv_t(int64 v) : newstr(NULL), _value(v) {}
|
513
|
+
safe_cv_t(uint8 v) : newstr(NULL), _value(v) {}
|
514
|
+
safe_cv_t(uint16 v) : newstr(NULL), _value(v) {}
|
515
|
+
safe_cv_t(uint32 v) : newstr(NULL), _value(v) {}
|
516
|
+
safe_cv_t(uint64 v) : newstr(NULL), _value(v) {}
|
517
|
+
safe_cv_t(float32 v) : newstr(NULL), _value(v) {}
|
518
|
+
safe_cv_t(float64 v) : newstr(NULL), _value(v) {}
|
519
|
+
safe_cv_t(datetime_t v) : newstr(NULL), _value(v) {}
|
520
|
+
|
521
|
+
inline void check_valid() const {
|
522
|
+
OINKY_ASSERT(_value.type() && (_value.type() <= OINKY_VALUE_TYPE_MAX));
|
523
|
+
OINKY_ASSERT((_value.type() != value_types::String) ||
|
524
|
+
(memcmp(string_value().begin(), string_value().begin(), string_value().length()) == 0));
|
525
|
+
}
|
526
|
+
|
527
|
+
template<typename STRTABLE>
|
528
|
+
safe_cv_t(column_type_code_t ct, const variant_cv_t &__val, STRTABLE *strtable) :
|
529
|
+
newstr(NULL),
|
530
|
+
_value(__val)
|
531
|
+
{
|
532
|
+
// First coerce the value. This will raise an exception if the
|
533
|
+
// column and value types are incompatible.
|
534
|
+
_value.coerce(ct);
|
535
|
+
// Now if the value is a string, we need to internalize it.
|
536
|
+
if (_value.is_string()) {
|
537
|
+
ustring_safe us = strtable->make_safestring(__val.string_value());
|
538
|
+
|
539
|
+
_value.string_value() = us;
|
540
|
+
newstr = us.newstr;
|
541
|
+
}
|
542
|
+
}
|
543
|
+
|
544
|
+
// This is to be called when we are converting a serialized value to a
|
545
|
+
// safe value. Such transformations are "safe" because we know the
|
546
|
+
// indirect value is already stored, and our reference should be valid.
|
547
|
+
// Thus, we know that we need not store an indirect pointer. It should
|
548
|
+
// obviously not be called except when unpacking fixed values.
|
549
|
+
template<typename STRTABLE>
|
550
|
+
static safe_cv_t from_fixed(const variant_cv_t &val, const STRTABLE *strtable) {
|
551
|
+
safe_cv_t r;
|
552
|
+
r._value = val;
|
553
|
+
r.newstr = NULL;
|
554
|
+
// Can assert validity. But should have been checked by caller.
|
555
|
+
// We know the string isn't new.
|
556
|
+
if (r._value.type() == value_types::String) {
|
557
|
+
const db_string &str(r._value.string_value());
|
558
|
+
OINKY_ASSERT( str.xref && (str == strtable->from_untrusted_ref((u_strtable_ref) str.xref)) );
|
559
|
+
}
|
560
|
+
return r;
|
561
|
+
}
|
562
|
+
|
563
|
+
void coerce(column_type_code_t ct) {
|
564
|
+
// Only integers get coerced, so this is orthogonal to
|
565
|
+
// newstr, so we can just pass it through.
|
566
|
+
_value.coerce(ct);
|
567
|
+
}
|
568
|
+
|
569
|
+
// This could change it to something unsafe.
|
570
|
+
//db_string &string_value() { return _value.string_value(); }
|
571
|
+
const db_string &string_value() const { return _value.string_value(); }
|
572
|
+
|
573
|
+
datetime_t &dt_value() { return _value.dt_value(); }
|
574
|
+
const datetime_t &dt_value() const { return _value.dt_value(); }
|
575
|
+
|
576
|
+
int64 &int_value() { return _value.int_value(); }
|
577
|
+
const int64 &int_value() const { return _value.int_value(); }
|
578
|
+
|
579
|
+
uint64 &uint_value() { return _value.uint_value(); }
|
580
|
+
const uint64 &uint_value() const { return _value.uint_value(); }
|
581
|
+
|
582
|
+
float64 &f64_value() { return _value.f64_value(); }
|
583
|
+
const float64 &f64_value() const { return _value.f64_value(); }
|
584
|
+
|
585
|
+
float32 &f32_value() { return _value.f32_value(); }
|
586
|
+
const float32 &f32_value() const { return _value.f32_value(); }
|
587
|
+
|
588
|
+
bool &bit_value() { return _value.bit_value(); }
|
589
|
+
bool bit_value() const { return _value.bit_value(); }
|
590
|
+
|
591
|
+
bool is_string() const { return _value.is_string(); }
|
592
|
+
bool is_date() const { return _value.is_date(); }
|
593
|
+
bool is_int() const { return _value.is_int(); }
|
594
|
+
bool is_uint() const { return _value.is_uint(); }
|
595
|
+
|
596
|
+
value_type_code_t type() const { return _value.type(); }
|
597
|
+
|
598
|
+
const variant_cv_t &value() const { return _value; }
|
599
|
+
operator const variant_cv_t &() const { return _value; }
|
600
|
+
|
601
|
+
int compare_to(const safe_cv_t &other) const {
|
602
|
+
return _value.compare_to(other.value());
|
603
|
+
}
|
604
|
+
int compare_to(const variant_cv_t &other) const {
|
605
|
+
return _value.compare_to(other);
|
606
|
+
}
|
607
|
+
};
|
608
|
+
|
609
|
+
//
|
610
|
+
// A pair of savepoint markers defines the lifespan of the object. The
|
611
|
+
// creation stamp and deletion stamp.
|
612
|
+
//
|
613
|
+
struct ls_marker
|
614
|
+
{
|
615
|
+
ls_marker() : insert_sp(0), delete_sp(0) {}
|
616
|
+
|
617
|
+
// The top SP marker at the time the object was created. 0 if the
|
618
|
+
// object was deserialized (created by a previous instance).
|
619
|
+
//
|
620
|
+
// If/when this object is un-created by a savepoint rollback, we will
|
621
|
+
// destroy/unlink it, and it will no longer be discoverable.
|
622
|
+
//
|
623
|
+
// If the object gets deleted in the same SP in which it is inserted
|
624
|
+
// (a new SP isn't created in the interim) them we can delete it
|
625
|
+
// immediately. Thus, the primary savepoint overhead (zombie pending_row
|
626
|
+
// objects) is only paid in the case that savepoints are actually being
|
627
|
+
// used. This restricts the paranoid behavior scenario to the one
|
628
|
+
// where the same rows are being repeatedly updated (deleted/inserted)
|
629
|
+
// AND savepoints are being set between each update.
|
630
|
+
sp_marker_t insert_sp;
|
631
|
+
|
632
|
+
// Has this row been deleted? If so, this will be nonzero, equal to the
|
633
|
+
// value of the savepoint marker at the time of delete.
|
634
|
+
//
|
635
|
+
// If/when this object is un-deleted by a savepoint rollback, this will
|
636
|
+
// be reset to zero.
|
637
|
+
sp_marker_t delete_sp;
|
638
|
+
|
639
|
+
bool is_deleted() const { return delete_sp != 0; }
|
640
|
+
};
|
641
|
+
|
642
|
+
struct row_offset_accumulator_t {
|
643
|
+
// Non-bit columns will not update the byte_offset.
|
644
|
+
// Bit columns will increment the bit count.
|
645
|
+
uint32 byte_offset;
|
646
|
+
uint16 bit_count;
|
647
|
+
|
648
|
+
uint32 full_width() const { return ((bit_count + 7) >> 3) + byte_offset; }
|
649
|
+
|
650
|
+
row_offset_accumulator_t() : byte_offset(0), bit_count(0) {}
|
651
|
+
};
|
652
|
+
|
653
|
+
// This is the dynamic column context. In serialization, the column definition
|
654
|
+
// vector and the column ordering in the serialized rows are equivalent. However,
|
655
|
+
// if we alter a table, these will change.
|
656
|
+
//
|
657
|
+
// Explode is the first step of alter-table. So the serialized values are
|
658
|
+
// not necessarily interesting, except if we've added rows in the interim, we
|
659
|
+
// have pending rows which are stored in the same order. Rather than swap
|
660
|
+
// all column values each time we insert a column (which is O(N) expensive
|
661
|
+
// for each column insert) we just keep this mapping. The overhead of this
|
662
|
+
// amounts to a single small allocation per table on mount.
|
663
|
+
//
|
664
|
+
// The index definitions reference columns by their index. Thus, we maintain
|
665
|
+
// a mapping from index to column contexts.
|
666
|
+
class column_ctx
|
667
|
+
{
|
668
|
+
public:
|
669
|
+
mstring_safe colname;
|
670
|
+
safe_cv_t default_value;
|
671
|
+
column_type_code_t ctype;
|
672
|
+
|
673
|
+
// Column create/drop
|
674
|
+
ls_marker ls;
|
675
|
+
|
676
|
+
typedef column_idx_t position_idx_t;
|
677
|
+
|
678
|
+
// When we serialize the column set, we assign each column a new position,
|
679
|
+
// restoring the density and sorted-column order, relative to what we
|
680
|
+
// maintain in memory.
|
681
|
+
column_idx_t new_position;
|
682
|
+
|
683
|
+
// Bytes from the beginning of the row data that this value is
|
684
|
+
// stored. The fixed and pending storage ranges are different because
|
685
|
+
// certain types (string/variant) are larger in pending-rows than they
|
686
|
+
// are in fixed rows.
|
687
|
+
|
688
|
+
// --pending
|
689
|
+
uint32 prow_byte_offset;
|
690
|
+
uint8 prow_bit_offset;
|
691
|
+
// --fixed
|
692
|
+
uint32 frow_byte_offset;
|
693
|
+
uint8 frow_bit_offset;
|
694
|
+
|
695
|
+
// When we assign the new_position, we also recompute what bytes (bits)
|
696
|
+
// in the fixed row this column will consume.
|
697
|
+
uint32 new_row_offset_bytes;
|
698
|
+
// We compute this in two passes. The first pass counts the number of
|
699
|
+
// single-bit columns, and thus needs to have that full range. We
|
700
|
+
// will do a second-pass to shift all but the low 3 bits into
|
701
|
+
// new_row_offset_bytes, after we know the non-bit serialized row length.
|
702
|
+
uint32 new_bit_offset;
|
703
|
+
|
704
|
+
inline column_type_code_t type() const { return ctype; }
|
705
|
+
inline bool is_bit() const { return ctype == column_types::Bit; }
|
706
|
+
|
707
|
+
public:
|
708
|
+
// Take the user's value and make it internal. This involves two steps:
|
709
|
+
// 1) coercion to column's type (if possible...exception otherwise).
|
710
|
+
// 2) internalizing any indirect data and getting a reference to the header.
|
711
|
+
template<typename STRINGTABLE_T>
|
712
|
+
safe_cv_t internalize(const variant_cv_t &val, STRINGTABLE_T *strtable) const {
|
713
|
+
return safe_cv_t(type(), val, strtable);
|
714
|
+
}
|
715
|
+
|
716
|
+
// This is the public/user interface to the index_ctx.
|
717
|
+
class column_handle : public table_column_def
|
718
|
+
{
|
719
|
+
const column_ctx *col;
|
720
|
+
friend class column_ctx;
|
721
|
+
|
722
|
+
public:
|
723
|
+
column_handle() {}
|
724
|
+
|
725
|
+
column_handle(const column_ctx *_col) :
|
726
|
+
table_column_def(
|
727
|
+
_col->colname.as_string(),
|
728
|
+
_col->type(),
|
729
|
+
variant_cv_t(_col->default_value.value())),
|
730
|
+
col(_col)
|
731
|
+
{}
|
732
|
+
};
|
733
|
+
|
734
|
+
static const column_ctx *from_handle(const column_handle &h) {
|
735
|
+
return h.col;
|
736
|
+
}
|
737
|
+
};
|
738
|
+
|
739
|
+
// Column descriptor for index columns.
|
740
|
+
struct idx_column_ctx
|
741
|
+
{
|
742
|
+
const column_ctx *column;
|
743
|
+
bool ascending;
|
744
|
+
// This is the pos'th column in the index. This is equal to
|
745
|
+
// itr - index_def.begin(). Ranges from 0 to (index.width-1)
|
746
|
+
column_idx_t index_pos;
|
747
|
+
|
748
|
+
operator const column_ctx &() const {
|
749
|
+
return *column;
|
750
|
+
}
|
751
|
+
};
|
752
|
+
|
753
|
+
// This is a concept, not a class. It can be used to enumerate/extract/compare
|
754
|
+
// multicolumn values. It also permits examination of column definitions
|
755
|
+
// (types/names), column count, etc. It does this for both serialized and
|
756
|
+
// dynamic rows, as well as both tables and indexes.
|
757
|
+
/*
|
758
|
+
struct multicolumn_value_accessor_concept
|
759
|
+
{
|
760
|
+
uint32 column_count();
|
761
|
+
|
762
|
+
// Parameters are a column definition and a value
|
763
|
+
// bool (*fn)(COL_DEF coldef, const variant_cv_t &val);
|
764
|
+
//
|
765
|
+
// COL_DEF depends on whether an index or a
|
766
|
+
template<typename FN>
|
767
|
+
void each_column_value(FN fn) const;
|
768
|
+
};*/
|
769
|
+
|
770
|
+
// This adds funtionality to the above methods.
|
771
|
+
template<typename BASE>
|
772
|
+
class multicolumn_value_accessor : public BASE
|
773
|
+
{
|
774
|
+
public:
|
775
|
+
multicolumn_value_accessor() : BASE() {}
|
776
|
+
template<typename T1>
|
777
|
+
multicolumn_value_accessor(T1 t1) : BASE(t1) {}
|
778
|
+
template<typename T1, typename T2>
|
779
|
+
multicolumn_value_accessor(T1 t1, T2 t2) : BASE(t1, t2) {}
|
780
|
+
template<typename T1, typename T2, typename T3>
|
781
|
+
multicolumn_value_accessor(T1 t1, T2 t2, T3 t3) : BASE(t1, t2, t3) {}
|
782
|
+
template<typename T1, typename T2, typename T3, typename T4>
|
783
|
+
multicolumn_value_accessor(T1 t1, T2 t2, T3 t3, T4 t4) : BASE(t1, t2, t3, t4) {}
|
784
|
+
|
785
|
+
private:
|
786
|
+
// This is an enumerator, which can be passed to a row_iterators column enumerator,
|
787
|
+
// which writes each element to a vector.
|
788
|
+
template<typename ITR>
|
789
|
+
static bool copy_row_cb_limit(ITR *i, uint32 *limit, const variant_cv_t &val)
|
790
|
+
{
|
791
|
+
**i = val;
|
792
|
+
++(*i);
|
793
|
+
--(*limit);
|
794
|
+
// We can only keep going if we have space remaining in the target.
|
795
|
+
return *limit > 0;
|
796
|
+
}
|
797
|
+
|
798
|
+
template<typename ITR>
|
799
|
+
static bool copy_row_cb(ITR *i, const ITR *end, const variant_cv_t &val)
|
800
|
+
{
|
801
|
+
**i = val;
|
802
|
+
++(*i);
|
803
|
+
// We can only keep going if we have space remaining in the target.
|
804
|
+
return *i != *end;
|
805
|
+
}
|
806
|
+
|
807
|
+
template<typename OSTREAM>
|
808
|
+
static bool format_cb(const variant_cv_t &value, OSTREAM *os, uint32 *count) {
|
809
|
+
if (*count) {
|
810
|
+
(*os) << ", ";
|
811
|
+
}
|
812
|
+
(*os) << value;
|
813
|
+
++(*count);
|
814
|
+
return true;
|
815
|
+
}
|
816
|
+
public:
|
817
|
+
template<typename OSTREAM>
|
818
|
+
void format(OSTREAM &os) const {
|
819
|
+
uint32 count = 0;
|
820
|
+
BASE::each_column_value(boost::bind(&format_cb<OSTREAM>, _2, &os, &count));
|
821
|
+
}
|
822
|
+
|
823
|
+
typedef multicolumn_value_accessor<BASE> this_t;
|
824
|
+
|
825
|
+
// Returns the end iterator of the new column sequence.
|
826
|
+
template<typename OUT_ITR>
|
827
|
+
OUT_ITR copy_to(OUT_ITR i, uint32 limit) const {
|
828
|
+
BASE::each_column_value(
|
829
|
+
boost::bind(©_row_cb_limit<OUT_ITR>, &i, &limit, _2)
|
830
|
+
);
|
831
|
+
return i;
|
832
|
+
}
|
833
|
+
|
834
|
+
// Returns the end iterator of the new column sequence.
|
835
|
+
template<typename OUT_ITR>
|
836
|
+
OUT_ITR copy_to(OUT_ITR i, OUT_ITR end) const {
|
837
|
+
BASE::each_column_value(
|
838
|
+
boost::bind(©_row_cb<OUT_ITR>, &i, &end, _2)
|
839
|
+
);
|
840
|
+
return i;
|
841
|
+
}
|
842
|
+
|
843
|
+
// Parameter is a variant_cv_t
|
844
|
+
template<typename FN>
|
845
|
+
void each_value(FN fn) const
|
846
|
+
{
|
847
|
+
//
|
848
|
+
check_function_concept<bool(const variant_cv_t &cv)>(fn);
|
849
|
+
|
850
|
+
struct v_only
|
851
|
+
{
|
852
|
+
FN *fn;
|
853
|
+
v_only(FN *_fn) : fn(_fn) {}
|
854
|
+
|
855
|
+
bool operator()(const table_column_def &col, const variant_cv_t &val) {
|
856
|
+
return (*fn)(val);
|
857
|
+
}
|
858
|
+
};
|
859
|
+
v_only cb(&fn);
|
860
|
+
BASE::each_column_value(fn);
|
861
|
+
}
|
862
|
+
|
863
|
+
private:
|
864
|
+
template<typename UCV_ITR>
|
865
|
+
struct _cmp
|
866
|
+
{
|
867
|
+
struct ctx_t {
|
868
|
+
UCV_ITR i;
|
869
|
+
const UCV_ITR &pos_end;
|
870
|
+
int result;
|
871
|
+
|
872
|
+
ctx_t(const UCV_ITR &_pos_begin, const UCV_ITR &_pos_end) :
|
873
|
+
i(_pos_begin), pos_end(_pos_end), result(0)
|
874
|
+
{}
|
875
|
+
|
876
|
+
bool fn(bool reverse, const variant_cv_t &val)
|
877
|
+
{
|
878
|
+
// Verify that enumeration is stopping when we ask it to.
|
879
|
+
OINKY_ASSERT(result == 0);
|
880
|
+
|
881
|
+
// The right position specifier is identical but shorter
|
882
|
+
// than the left. The left is therefore greater.
|
883
|
+
//
|
884
|
+
// NOTE: We define the ordering for partial matches regardless
|
885
|
+
// of the ascending/descending nature of unspecified columns.
|
886
|
+
if (i == pos_end) {
|
887
|
+
result = 1;
|
888
|
+
return false;
|
889
|
+
}
|
890
|
+
result = val.compare_to(*i);
|
891
|
+
// Handle the reverse case.
|
892
|
+
if (reverse) {
|
893
|
+
result = -result;
|
894
|
+
}
|
895
|
+
++i;
|
896
|
+
// If identical, then continue enumeration.
|
897
|
+
if (!result) return true;
|
898
|
+
// Otherwise, stop. We are done.
|
899
|
+
return false;
|
900
|
+
}
|
901
|
+
};
|
902
|
+
|
903
|
+
ctx_t &ctx;
|
904
|
+
|
905
|
+
_cmp(ctx_t &_ctx) : ctx(_ctx) {}
|
906
|
+
|
907
|
+
// Provide separate implementations, since this is used both for
|
908
|
+
// tables and indexes.
|
909
|
+
//
|
910
|
+
// NOTE: We provide a comparison function for tables even though
|
911
|
+
// tables aren't naturally sorted. Someone may still want to
|
912
|
+
// enumerate over a table and filter according to some comparison
|
913
|
+
// function.
|
914
|
+
bool operator()(const column_ctx &idef, const variant_cv_t &val) const
|
915
|
+
{
|
916
|
+
return ctx.fn(false, val);
|
917
|
+
}
|
918
|
+
};
|
919
|
+
|
920
|
+
public:
|
921
|
+
// Compares this row to an array of values.
|
922
|
+
template<typename UCV_ITR>
|
923
|
+
int compare_to(const UCV_ITR &pos_begin, const UCV_ITR &pos_end) const {
|
924
|
+
typename _cmp<UCV_ITR>::ctx_t c(pos_begin, pos_end);
|
925
|
+
_cmp<UCV_ITR> comparer(c);
|
926
|
+
BASE::each_column_value(comparer);
|
927
|
+
|
928
|
+
// If we didn't reach the end of the specifier, but matched the entire
|
929
|
+
// iterator, then the specifier (right) is beyond the iterator(left).
|
930
|
+
if ((c.result == 0) && (c.i != pos_end)) {
|
931
|
+
return -1;
|
932
|
+
}
|
933
|
+
return c.result;
|
934
|
+
}
|
935
|
+
|
936
|
+
// Compares this row to another row via its accessor.
|
937
|
+
template<typename OTHERBASE>
|
938
|
+
int compare_to(const multicolumn_value_accessor<OTHERBASE> &other) const {
|
939
|
+
// Just stack-alloc a vector to hold the other's values, then
|
940
|
+
// compare self to the vector.
|
941
|
+
uint32 othersize = other.column_count();
|
942
|
+
variant_cv_t *othera = (variant_cv_t *) alloca(sizeof(variant_cv_t) * othersize);
|
943
|
+
other.copy_to(othera, othera + othersize);
|
944
|
+
return compare_to(othera, othera + othersize);
|
945
|
+
}
|
946
|
+
};
|
947
|
+
|
948
|
+
template<typename SELECTOR_T>
|
949
|
+
struct column_selector_accessor
|
950
|
+
{
|
951
|
+
typedef SELECTOR_T selector_t;
|
952
|
+
typedef typename selector_t::table_ctx table_ctx;
|
953
|
+
|
954
|
+
const selector_t &cs;
|
955
|
+
|
956
|
+
column_selector_accessor(const selector_t &_cs) : cs(_cs) {}
|
957
|
+
|
958
|
+
const table_ctx *table() const { return cs.table; }
|
959
|
+
const column_ctx *const *colrefs() const { return cs.colrefs; }
|
960
|
+
uint32 colcount() const { return cs.colcount; }
|
961
|
+
template<typename TBL>
|
962
|
+
void check_valid(TBL *table) const { cs.check_valid(table); }
|
963
|
+
};
|
964
|
+
|
965
|
+
template<typename ITR_T>
|
966
|
+
class simple_sequence
|
967
|
+
{
|
968
|
+
ITR_T _begin, _end;
|
969
|
+
uint32 _count;
|
970
|
+
|
971
|
+
public:
|
972
|
+
typedef ITR_T itr_t;
|
973
|
+
|
974
|
+
simple_sequence(const ITR_T &__begin, const ITR_T &__end) :
|
975
|
+
_begin(__begin), _end(__end), _count(std::distance(_begin, _end))
|
976
|
+
{}
|
977
|
+
|
978
|
+
itr_t begin() const { return _begin; }
|
979
|
+
itr_t end() const { return _end; }
|
980
|
+
uint32 size() const { return _count; }
|
981
|
+
};
|
982
|
+
|
983
|
+
inline uint8 compute_serialized_width(column_type_code_t ct)
|
984
|
+
{
|
985
|
+
if (ct > OINKY_VALUE_TYPE_MAX) {
|
986
|
+
throw_error(bad_encoding());
|
987
|
+
}
|
988
|
+
// This is less than a byte, which we have no way of returning.
|
989
|
+
OINKY_ASSERT(ct != column_types::Bit);
|
990
|
+
|
991
|
+
// User string refs are always less than 8 bytes, so the variant width
|
992
|
+
// is max() + 1 = 9
|
993
|
+
static const uint8 widths[] = { 9, 0, 1, 2, 4, 8, 8, sizeof(u_strtable_ref), 1, 2, 4, 8, 4, 8 };
|
994
|
+
BOOST_STATIC_ASSERT(OINKY_VALUE_TYPE_MAX + 1== sizeof(widths));
|
995
|
+
return widths[ct];
|
996
|
+
}
|
997
|
+
inline uint8 compute_pending_width(column_type_code_t ct)
|
998
|
+
{
|
999
|
+
// This is less than a byte, which we have no way of returning.
|
1000
|
+
OINKY_ASSERT(ct != column_types::Bit);
|
1001
|
+
OINKY_ASSERT(ct <= OINKY_VALUE_TYPE_MAX);
|
1002
|
+
|
1003
|
+
// String and variant require safe_cv_t. Everything else is identical to serialized.
|
1004
|
+
static const uint8 widths[] = { sizeof(safe_cv_t), 0, 1, 2, 4, 8, 8, sizeof(safe_cv_t), 1, 2, 4, 8, 4, 8 };
|
1005
|
+
BOOST_STATIC_ASSERT(OINKY_VALUE_TYPE_MAX + 1== sizeof(widths));
|
1006
|
+
return widths[ct];
|
1007
|
+
}
|
1008
|
+
|
1009
|
+
struct index_rowkey_xform
|
1010
|
+
{
|
1011
|
+
uint8 dr_width;
|
1012
|
+
|
1013
|
+
// The xform process depends on machine architecture, not on target layout.
|
1014
|
+
// That's why we key off BOOST_BIN_ENDIAN not OINKY_REVERSE_ENDIAN
|
1015
|
+
uint8 dr_shift;
|
1016
|
+
uint32 dr_mask;
|
1017
|
+
|
1018
|
+
inline static uint8 compute_row_index_line_width(row_idx_t row_count)
|
1019
|
+
{
|
1020
|
+
// This case doesn't matter. We won't serialize any.
|
1021
|
+
if (row_count == 0) return 1;
|
1022
|
+
// Otherwise, the max idx is row_count-1.
|
1023
|
+
uint32 rc = (uint32) row_count - 1;
|
1024
|
+
if (rc & 0xff<<24) {
|
1025
|
+
// 32 bit indices
|
1026
|
+
return 4;
|
1027
|
+
} else if (rc & 0xff<<16) {
|
1028
|
+
// 24 bit indices
|
1029
|
+
return 3;
|
1030
|
+
} else if (rc & 0xff<<8) {
|
1031
|
+
// 16 bit indices
|
1032
|
+
return 2;
|
1033
|
+
} else {
|
1034
|
+
// 8 bit indices
|
1035
|
+
return 1;
|
1036
|
+
}
|
1037
|
+
}
|
1038
|
+
|
1039
|
+
index_rowkey_xform() : dr_width(0) {}
|
1040
|
+
index_rowkey_xform(row_idx_t row_count)
|
1041
|
+
{
|
1042
|
+
dr_width = compute_row_index_line_width(row_count);
|
1043
|
+
|
1044
|
+
const uint8 shift[] = {24, 16, 8, 0};
|
1045
|
+
dr_shift = shift[dr_width-1];
|
1046
|
+
|
1047
|
+
const uint32 mask[] = {0xff, 0xffff, 0xffffff, 0xffffffff};
|
1048
|
+
dr_mask = mask[dr_width-1];
|
1049
|
+
}
|
1050
|
+
|
1051
|
+
template<typename scheme>
|
1052
|
+
inline row_idx_t unpack(row_idx_t position, const char *base) const {
|
1053
|
+
OINKY_ASSERT(dr_width);
|
1054
|
+
|
1055
|
+
BOOST_STATIC_ASSERT(sizeof(row_idx_t) == 4);
|
1056
|
+
uint32 value;
|
1057
|
+
uint32 s_value = * (uint32 *)(base + (position * dr_width));
|
1058
|
+
Serialization::unpack_nbc<scheme,uint32>(&value, (const char *)&s_value);
|
1059
|
+
|
1060
|
+
#if defined(OINKY_BIG_ENDIAN_ENCODING)
|
1061
|
+
return (row_idx_t)(value >> dr_shift);
|
1062
|
+
#elif defined(OINKY_LITTLE_ENDIAN_ENCODING)
|
1063
|
+
return (row_idx_t)(value & dr_mask);
|
1064
|
+
#else
|
1065
|
+
#error "OINKY ENDIAN ENCODING UNDEFINED"
|
1066
|
+
#endif
|
1067
|
+
}
|
1068
|
+
|
1069
|
+
template<typename scheme>
|
1070
|
+
inline uint32 make_svalue(row_idx_t value) const {
|
1071
|
+
uint32 s_value;
|
1072
|
+
#if defined(OINKY_BIG_ENDIAN_ENCODING)
|
1073
|
+
value <<= dr_shift;
|
1074
|
+
#elif defined(OINKY_LITTLE_ENDIAN_ENCODING)
|
1075
|
+
OINKY_ASSERT((value & ~dr_mask) == 0);
|
1076
|
+
#else
|
1077
|
+
#error "OINKY ENDIAN ENCODING UNDEFINED"
|
1078
|
+
#endif
|
1079
|
+
Serialization::pack_nbc<scheme,row_idx_t>(value, (char *)&s_value);
|
1080
|
+
return s_value;
|
1081
|
+
}
|
1082
|
+
|
1083
|
+
template<typename scheme>
|
1084
|
+
inline void pack_nbc_destructive(char *target, row_idx_t value) const {
|
1085
|
+
uint32 s_value = make_svalue<scheme>(value);
|
1086
|
+
*(uint32 *)target = (uint32) s_value;
|
1087
|
+
}
|
1088
|
+
|
1089
|
+
template<typename scheme>
|
1090
|
+
inline void pack_nbc_orbits(char *target, row_idx_t value) const {
|
1091
|
+
uint32 s_value = make_svalue<scheme>(value);
|
1092
|
+
*(uint32 *)target |= (uint32) s_value;
|
1093
|
+
}
|
1094
|
+
};
|
1095
|
+
|
1096
|
+
|
1097
|
+
} //namespace Internal
|
1098
|
+
} //namespace Oinky
|
1099
|
+
|
1100
|
+
|
1101
|
+
namespace std
|
1102
|
+
{
|
1103
|
+
|
1104
|
+
template<class _Traits, class BASE>
|
1105
|
+
inline std::basic_ostream<char, _Traits>&
|
1106
|
+
operator<<(
|
1107
|
+
std::basic_ostream<char, _Traits> &os,
|
1108
|
+
const Oinky::Internal::multicolumn_value_accessor<BASE> &accessor
|
1109
|
+
)
|
1110
|
+
{
|
1111
|
+
accessor.format(os);
|
1112
|
+
return os;
|
1113
|
+
}
|
1114
|
+
|
1115
|
+
}//namespace std
|
1116
|
+
|