chaine 3.13.1__cp312-cp312-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of chaine might be problematic. Click here for more details.
- chaine/__init__.py +2 -0
- chaine/_core/crf.cpp +19854 -0
- chaine/_core/crf.cpython-312-darwin.so +0 -0
- chaine/_core/crf.pyx +271 -0
- chaine/_core/crfsuite/COPYING +27 -0
- chaine/_core/crfsuite/README +183 -0
- chaine/_core/crfsuite/include/crfsuite.h +1077 -0
- chaine/_core/crfsuite/include/crfsuite.hpp +649 -0
- chaine/_core/crfsuite/include/crfsuite_api.hpp +406 -0
- chaine/_core/crfsuite/include/os.h +65 -0
- chaine/_core/crfsuite/lib/cqdb/COPYING +28 -0
- chaine/_core/crfsuite/lib/cqdb/include/cqdb.h +518 -0
- chaine/_core/crfsuite/lib/cqdb/src/cqdb.c +639 -0
- chaine/_core/crfsuite/lib/cqdb/src/lookup3.c +1271 -0
- chaine/_core/crfsuite/lib/cqdb/src/main.c +184 -0
- chaine/_core/crfsuite/lib/crf/src/crf1d.h +354 -0
- chaine/_core/crfsuite/lib/crf/src/crf1d_context.c +788 -0
- chaine/_core/crfsuite/lib/crf/src/crf1d_encode.c +1020 -0
- chaine/_core/crfsuite/lib/crf/src/crf1d_feature.c +382 -0
- chaine/_core/crfsuite/lib/crf/src/crf1d_model.c +1085 -0
- chaine/_core/crfsuite/lib/crf/src/crf1d_tag.c +582 -0
- chaine/_core/crfsuite/lib/crf/src/crfsuite.c +500 -0
- chaine/_core/crfsuite/lib/crf/src/crfsuite_internal.h +233 -0
- chaine/_core/crfsuite/lib/crf/src/crfsuite_train.c +302 -0
- chaine/_core/crfsuite/lib/crf/src/dataset.c +115 -0
- chaine/_core/crfsuite/lib/crf/src/dictionary.c +127 -0
- chaine/_core/crfsuite/lib/crf/src/holdout.c +83 -0
- chaine/_core/crfsuite/lib/crf/src/json.c +1497 -0
- chaine/_core/crfsuite/lib/crf/src/json.h +120 -0
- chaine/_core/crfsuite/lib/crf/src/logging.c +85 -0
- chaine/_core/crfsuite/lib/crf/src/logging.h +49 -0
- chaine/_core/crfsuite/lib/crf/src/params.c +370 -0
- chaine/_core/crfsuite/lib/crf/src/params.h +84 -0
- chaine/_core/crfsuite/lib/crf/src/quark.c +180 -0
- chaine/_core/crfsuite/lib/crf/src/quark.h +46 -0
- chaine/_core/crfsuite/lib/crf/src/rumavl.c +1178 -0
- chaine/_core/crfsuite/lib/crf/src/rumavl.h +144 -0
- chaine/_core/crfsuite/lib/crf/src/train_arow.c +409 -0
- chaine/_core/crfsuite/lib/crf/src/train_averaged_perceptron.c +237 -0
- chaine/_core/crfsuite/lib/crf/src/train_l2sgd.c +491 -0
- chaine/_core/crfsuite/lib/crf/src/train_lbfgs.c +323 -0
- chaine/_core/crfsuite/lib/crf/src/train_passive_aggressive.c +442 -0
- chaine/_core/crfsuite/lib/crf/src/vecmath.h +360 -0
- chaine/_core/crfsuite/swig/crfsuite.cpp +1 -0
- chaine/_core/crfsuite_api.pxd +67 -0
- chaine/_core/liblbfgs/COPYING +22 -0
- chaine/_core/liblbfgs/README +71 -0
- chaine/_core/liblbfgs/include/lbfgs.h +745 -0
- chaine/_core/liblbfgs/lib/arithmetic_ansi.h +142 -0
- chaine/_core/liblbfgs/lib/arithmetic_sse_double.h +303 -0
- chaine/_core/liblbfgs/lib/arithmetic_sse_float.h +312 -0
- chaine/_core/liblbfgs/lib/lbfgs.c +1531 -0
- chaine/_core/tagger_wrapper.hpp +58 -0
- chaine/_core/trainer_wrapper.cpp +32 -0
- chaine/_core/trainer_wrapper.hpp +26 -0
- chaine/crf.py +505 -0
- chaine/logging.py +214 -0
- chaine/optimization/__init__.py +10 -0
- chaine/optimization/metrics.py +129 -0
- chaine/optimization/spaces.py +394 -0
- chaine/optimization/trial.py +103 -0
- chaine/optimization/utils.py +119 -0
- chaine/training.py +184 -0
- chaine/typing.py +18 -0
- chaine/validation.py +43 -0
- chaine-3.13.1.dist-info/METADATA +348 -0
- chaine-3.13.1.dist-info/RECORD +68 -0
- chaine-3.13.1.dist-info/WHEEL +5 -0
|
@@ -0,0 +1,1178 @@
|
|
|
1
|
+
/*----------------------------------------------------------------------------
|
|
2
|
+
* RumAVL - Threaded AVL Tree Implementation
|
|
3
|
+
*
|
|
4
|
+
* Copyright (c) 2005-2007 Jesse Long <jpl@unknown.za.net>
|
|
5
|
+
* All rights reserved.
|
|
6
|
+
*
|
|
7
|
+
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
8
|
+
* copy of this software and associated documentation files (the "Software"),
|
|
9
|
+
* to deal in the Software without restriction, including without limitation
|
|
10
|
+
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
11
|
+
* and/or sell copies of the Software, and to permit persons to whom the
|
|
12
|
+
* Software is furnished to do so, subject to the following conditions:
|
|
13
|
+
*
|
|
14
|
+
* 1. The above copyright notice and this permission notice shall be
|
|
15
|
+
* included in all copies or substantial portions of the Software.
|
|
16
|
+
* 2. The origin of the Software must not be misrepresented; you must not
|
|
17
|
+
* claim that you wrote the original Software.
|
|
18
|
+
* 3. Altered source versions of the Software must be plainly marked as
|
|
19
|
+
* such, and must not be misrepresented as being the original Software.
|
|
20
|
+
*
|
|
21
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
22
|
+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
23
|
+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
24
|
+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
25
|
+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
26
|
+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
27
|
+
* DEALINGS IN THE SOFTWARE.
|
|
28
|
+
*--------------------------------------------------------------------------*/
|
|
29
|
+
|
|
30
|
+
/*----------------------------------------------------------------------------
|
|
31
|
+
* Although not required by the license, I would appreciate it if you would
|
|
32
|
+
* send me a mail notifying me of bugfixes and enhancements you make to this
|
|
33
|
+
* code. My email address is <jpl@unknown.za.net>
|
|
34
|
+
*--------------------------------------------------------------------------*/
|
|
35
|
+
|
|
36
|
+
/*----------------------------------------------------------------------------
|
|
37
|
+
* DEVELOPEMENT NOTES
|
|
38
|
+
*
|
|
39
|
+
* Links
|
|
40
|
+
* Each node has two links, link[0] is the left child, and link[1] is the
|
|
41
|
+
* right child. When a link points to a node that is actually below it in
|
|
42
|
+
* the BST, the respective thread flag is marked 0. When the link is a
|
|
43
|
+
* thread, the respective thread flag is marked 1, or 2 if the thread is
|
|
44
|
+
* to the opposite edge of the BST.
|
|
45
|
+
*
|
|
46
|
+
* Direction
|
|
47
|
+
* In RumAVL we use the numbers -1 (RUMAVL_DESC) and +1 (RUMAVL_ASC) to
|
|
48
|
+
* indicate direction, where -1 (RUMAVL_DESC) means left or descending in
|
|
49
|
+
* value, and +1 (RUMAVL_ASC) means right or ascending in value.
|
|
50
|
+
*
|
|
51
|
+
* Threads
|
|
52
|
+
* In RumAVL, the threads (non-bst links of leaves) are implemented in a
|
|
53
|
+
* sort of circular list. It is important to note that you cannot go
|
|
54
|
+
* through the entire list by following the same link, as you would when
|
|
55
|
+
* going through a linked list. Draw an example threaded AVL tree on paper
|
|
56
|
+
* and see why.
|
|
57
|
+
*
|
|
58
|
+
*--------------------------------------------------------------------------*/
|
|
59
|
+
|
|
60
|
+
#include <stdlib.h>
|
|
61
|
+
#include <string.h>
|
|
62
|
+
|
|
63
|
+
#include "rumavl.h"
|
|
64
|
+
|
|
65
|
+
/* For memory allocation debugging
|
|
66
|
+
#ifdef USE_MEMBUG
|
|
67
|
+
#define MEMBUG_DEFINES
|
|
68
|
+
#include <membug.h>
|
|
69
|
+
#endif */
|
|
70
|
+
|
|
71
|
+
/*****************************************************************************
|
|
72
|
+
*
|
|
73
|
+
* MACROS - to make readability better
|
|
74
|
+
*
|
|
75
|
+
****************************************************************************/
|
|
76
|
+
|
|
77
|
+
/* Link numbers */
|
|
78
|
+
#define LEFT (0)
|
|
79
|
+
#define RIGHT (1)
|
|
80
|
+
|
|
81
|
+
/* Direction to link no, expects RUMAVL_DESC or RUMAVL_ASC */
|
|
82
|
+
#define LINK_NO(i) (((i) + 1) / 2) /* -1 => 0; 1 => 1 */
|
|
83
|
+
/* Get opposite link number, expects LEFT or RIGHT */
|
|
84
|
+
#define OTHER_LINK(i) ((i) ^ 1) /* 1 => 0; 0 => 1 */
|
|
85
|
+
|
|
86
|
+
/* link no to direction, expects LEFT or RIGHT */
|
|
87
|
+
#define DIR_NO(i) (((i)*2) - 1) /* 0 => -1; 1 => 1 */
|
|
88
|
+
/* opposite direction, expects RUMAVL_DESC or RUMAVL_ASC */
|
|
89
|
+
#define OTHER_DIR(i) ((i) * -1) /* -1 => 1; 1 => -1 */
|
|
90
|
+
|
|
91
|
+
/* Memory allocation functions */
|
|
92
|
+
#define mem_alloc(tree, bytes) mem_mgr((tree), NULL, (bytes))
|
|
93
|
+
#define mem_free(tree, ptr) mem_mgr((tree), (ptr), 0)
|
|
94
|
+
#define mem_relloc(tree, ptr, bytes) mem_mgr((tree), (ptr), (bytes))
|
|
95
|
+
|
|
96
|
+
/*****************************************************************************
|
|
97
|
+
*
|
|
98
|
+
* DATA TYPES
|
|
99
|
+
*
|
|
100
|
+
****************************************************************************/
|
|
101
|
+
|
|
102
|
+
/*
|
|
103
|
+
* RUMAVL - the handle on the tree
|
|
104
|
+
*
|
|
105
|
+
* All settings for a tree are in the RUMAVL object, including memory
|
|
106
|
+
* management, delete and overwrite callback functions, and the record
|
|
107
|
+
* comparison function pointer.
|
|
108
|
+
*/
|
|
109
|
+
struct rumavl
|
|
110
|
+
{
|
|
111
|
+
RUMAVL_NODE *root; /* root node in tree */
|
|
112
|
+
size_t reclen; /* length of records */
|
|
113
|
+
int (*cmp)(const void *, /* function to compare records */
|
|
114
|
+
const void *,
|
|
115
|
+
size_t,
|
|
116
|
+
void *);
|
|
117
|
+
int (*owcb)(RUMAVL *, RUMAVL_NODE *, void *, const void *, void *);
|
|
118
|
+
int (*delcb)(RUMAVL *, RUMAVL_NODE *, void *, void *);
|
|
119
|
+
void *(*alloc)(void *, size_t, void *);
|
|
120
|
+
void *udata; /* user data for callbacks */
|
|
121
|
+
};
|
|
122
|
+
|
|
123
|
+
/*
|
|
124
|
+
* RUMAVL_NODE - the node structure
|
|
125
|
+
*
|
|
126
|
+
* RUMAVL_NODE's contain all information about a specific node, including
|
|
127
|
+
* links to the right and left children of the node, and flags (thread)
|
|
128
|
+
* indicating whether or not the links are threads or not, and the balance
|
|
129
|
+
* factor of the node.
|
|
130
|
+
*
|
|
131
|
+
* The record associated with each node is allocated along with the node,
|
|
132
|
+
* and can be found directly after the node, by using the NODE_REC() macro.
|
|
133
|
+
*/
|
|
134
|
+
struct rumavl_node
|
|
135
|
+
{
|
|
136
|
+
RUMAVL_NODE *link[2]; /* links to child nodes */
|
|
137
|
+
char thread[2]; /* flags for links, normal link or thread? */
|
|
138
|
+
signed char balance; /* balance factor for node */
|
|
139
|
+
void *rec;
|
|
140
|
+
#define NODE_REC(node) ((node)->rec)
|
|
141
|
+
};
|
|
142
|
+
|
|
143
|
+
/*
|
|
144
|
+
* RUMAVL_STACK - a stack of nodes forming a path to a node
|
|
145
|
+
*
|
|
146
|
+
* RUMAVL_STACK's are used while deleting and inserting nodes, where effects
|
|
147
|
+
* could be felt by all parents of the node. RUMAVL_STACK's are implemented
|
|
148
|
+
* in a singly linked list. This is a change from the method used by most AVL
|
|
149
|
+
* trees, where a static array node pointers are allocated. Linked lists allow
|
|
150
|
+
* fo an unlimited height in the AVL tree.
|
|
151
|
+
*
|
|
152
|
+
* node is a pointer to the parent node's pointer to the node in question.
|
|
153
|
+
* dir is the direction of the descent from this node.
|
|
154
|
+
*/
|
|
155
|
+
typedef struct rumavl_stack RUMAVL_STACK;
|
|
156
|
+
struct rumavl_stack
|
|
157
|
+
{
|
|
158
|
+
RUMAVL_STACK *next;
|
|
159
|
+
RUMAVL_NODE **node;
|
|
160
|
+
int dir;
|
|
161
|
+
};
|
|
162
|
+
|
|
163
|
+
/* various other RumAVL specific structs defined in rumavl.h */
|
|
164
|
+
|
|
165
|
+
/*****************************************************************************
|
|
166
|
+
*
|
|
167
|
+
* FORWARD DECLERATIONS
|
|
168
|
+
*
|
|
169
|
+
****************************************************************************/
|
|
170
|
+
|
|
171
|
+
static RUMAVL_NODE *seq_next(RUMAVL_NODE *node, int dir);
|
|
172
|
+
static RUMAVL_NODE *node_new(RUMAVL *tree, const void *record);
|
|
173
|
+
static void node_destroy(RUMAVL *tree, RUMAVL_NODE *node);
|
|
174
|
+
static int stack_push(RUMAVL *tree, RUMAVL_STACK **stack, RUMAVL_NODE **node,
|
|
175
|
+
int dir);
|
|
176
|
+
static void stack_destroy(RUMAVL *tree, RUMAVL_STACK *stack);
|
|
177
|
+
static void stack_update(RUMAVL *tree, RUMAVL_STACK *stack, signed char diff);
|
|
178
|
+
|
|
179
|
+
static signed char balance(RUMAVL_NODE **node, int dir);
|
|
180
|
+
static signed char rotate(RUMAVL_NODE **node, int dir);
|
|
181
|
+
|
|
182
|
+
static void *mem_mgr(RUMAVL *tree, void *ptr, size_t size);
|
|
183
|
+
|
|
184
|
+
static int rec_cmp(RUMAVL *tree, const void *reca, const void *recb);
|
|
185
|
+
static int my_cmp(const void *a, const void *b, size_t n, void *udata);
|
|
186
|
+
|
|
187
|
+
static int insert_cb(RUMAVL *t, RUMAVL_NODE *n, void *r1, const void *r2,
|
|
188
|
+
void *udata);
|
|
189
|
+
|
|
190
|
+
/*****************************************************************************
|
|
191
|
+
*
|
|
192
|
+
* PUBLIC FUNCTIONS
|
|
193
|
+
*
|
|
194
|
+
****************************************************************************/
|
|
195
|
+
|
|
196
|
+
/*----------------------------------------------------------------------------
|
|
197
|
+
* rumavl_new - allocates a new RUMAVL object, and initialises it. This is the
|
|
198
|
+
* only time the user gets to set the record length and record comparison
|
|
199
|
+
* function, to avoid data loss.
|
|
200
|
+
*--------------------------------------------------------------------------*/
|
|
201
|
+
RUMAVL *rumavl_new(size_t reclen,
|
|
202
|
+
int (*cmp)(const void *, const void *, size_t, void *),
|
|
203
|
+
void *(*alloc)(void *, size_t, void *),
|
|
204
|
+
void *udata)
|
|
205
|
+
{
|
|
206
|
+
RUMAVL *tree;
|
|
207
|
+
|
|
208
|
+
if (reclen < 1)
|
|
209
|
+
return NULL;
|
|
210
|
+
|
|
211
|
+
if (alloc == NULL)
|
|
212
|
+
tree = malloc(sizeof(RUMAVL));
|
|
213
|
+
else
|
|
214
|
+
tree = alloc(NULL, sizeof(RUMAVL), udata);
|
|
215
|
+
|
|
216
|
+
if (tree == NULL)
|
|
217
|
+
return NULL;
|
|
218
|
+
|
|
219
|
+
tree->root = NULL;
|
|
220
|
+
|
|
221
|
+
tree->owcb = NULL;
|
|
222
|
+
tree->delcb = NULL;
|
|
223
|
+
|
|
224
|
+
tree->alloc = alloc;
|
|
225
|
+
|
|
226
|
+
tree->reclen = reclen;
|
|
227
|
+
tree->udata = udata;
|
|
228
|
+
|
|
229
|
+
if (cmp == NULL)
|
|
230
|
+
tree->cmp = my_cmp;
|
|
231
|
+
else
|
|
232
|
+
tree->cmp = cmp;
|
|
233
|
+
|
|
234
|
+
return tree;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
/*----------------------------------------------------------------------------
|
|
238
|
+
* rumavl_destroy - cleanly frees all memory used by the RUMAVL, as well as
|
|
239
|
+
* all nodes. All nodes are passed to the delete callback function in case the
|
|
240
|
+
* user has a special way of destroying nodes. The return value of the delete
|
|
241
|
+
* callback function is ignored, because once we start destroying we cant
|
|
242
|
+
* simply undestroy half the nodes.
|
|
243
|
+
*--------------------------------------------------------------------------*/
|
|
244
|
+
void rumavl_destroy(RUMAVL *tree)
|
|
245
|
+
{
|
|
246
|
+
RUMAVL_NODE *node, *tmp;
|
|
247
|
+
|
|
248
|
+
if (tree->root != NULL)
|
|
249
|
+
{
|
|
250
|
+
/* walk through tree deleting all */
|
|
251
|
+
node = tree->root;
|
|
252
|
+
while (node->thread[LEFT] == 0) /* move to bottom left most node */
|
|
253
|
+
node = node->link[LEFT];
|
|
254
|
+
while (node != NULL)
|
|
255
|
+
{
|
|
256
|
+
tmp = seq_next(node, RUMAVL_ASC);
|
|
257
|
+
if (tree->delcb != NULL)
|
|
258
|
+
{
|
|
259
|
+
tree->delcb(tree, node, NODE_REC(node), tree->udata);
|
|
260
|
+
}
|
|
261
|
+
node_destroy(tree, node);
|
|
262
|
+
node = tmp;
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
if (tree->alloc == NULL)
|
|
267
|
+
free(tree);
|
|
268
|
+
else
|
|
269
|
+
tree->alloc(tree, 0, tree->udata);
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
/*---------------------------------------------------------------------------
|
|
273
|
+
* rumavl_udata - get a pointer to the tree's user pointer
|
|
274
|
+
*-------------------------------------------------------------------------*/
|
|
275
|
+
void **rumavl_udata(RUMAVL *tree)
|
|
276
|
+
{
|
|
277
|
+
return &tree->udata;
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
int (**rumavl_owcb(RUMAVL *tree))(RUMAVL *, RUMAVL_NODE *, void *,
|
|
281
|
+
const void *, void *)
|
|
282
|
+
{
|
|
283
|
+
return &tree->owcb;
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
int (**rumavl_delcb(RUMAVL *tree))(RUMAVL *, RUMAVL_NODE *, void *, void *)
|
|
287
|
+
{
|
|
288
|
+
return &tree->delcb;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
/*----------------------------------------------------------------------------
|
|
292
|
+
* rumavl_set - set a node, overwriting if necessary, or creating if the node
|
|
293
|
+
* does not exist
|
|
294
|
+
*--------------------------------------------------------------------------*/
|
|
295
|
+
int rumavl_set(RUMAVL *tree, const void *record)
|
|
296
|
+
{
|
|
297
|
+
RUMAVL_NODE **node, *tmp;
|
|
298
|
+
RUMAVL_STACK *stack;
|
|
299
|
+
int ln;
|
|
300
|
+
|
|
301
|
+
if (tree->root == NULL)
|
|
302
|
+
{
|
|
303
|
+
/* This is the first node in the tree */
|
|
304
|
+
if ((tree->root = node_new(tree, record)) == NULL)
|
|
305
|
+
return RUMAVL_ERR_NOMEM;
|
|
306
|
+
tree->root->link[LEFT] = tree->root;
|
|
307
|
+
tree->root->link[RIGHT] = tree->root;
|
|
308
|
+
tree->root->thread[LEFT] = 2;
|
|
309
|
+
tree->root->thread[RIGHT] = 2;
|
|
310
|
+
return 0;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
/* Since the tree is not empty, we must descend towards the nodes ideal
|
|
314
|
+
* possition, and we may even find an existing node with the same record.
|
|
315
|
+
* We keep a list parents for the eventual node position, because these
|
|
316
|
+
* parents may become inbalanced by a new insertion. */
|
|
317
|
+
|
|
318
|
+
stack = NULL;
|
|
319
|
+
node = &tree->root;
|
|
320
|
+
for (;;)
|
|
321
|
+
{
|
|
322
|
+
if ((ln = rec_cmp(tree, record, NODE_REC(*node))) == 0)
|
|
323
|
+
{
|
|
324
|
+
/* OK, we found the exact node we wish to set, and we now
|
|
325
|
+
* overwrite it. No change happens to the tree structure */
|
|
326
|
+
stack_destroy(tree, stack);
|
|
327
|
+
|
|
328
|
+
if (tree->owcb != NULL &&
|
|
329
|
+
(ln = tree->owcb(tree, *node, NODE_REC(*node),
|
|
330
|
+
record, tree->udata)) != 0)
|
|
331
|
+
{
|
|
332
|
+
return ln;
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
memcpy(NODE_REC(*node), record, tree->reclen);
|
|
336
|
+
return 0;
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
/* *node is not the node we seek */
|
|
340
|
+
|
|
341
|
+
if (stack_push(tree, &stack, node, ln))
|
|
342
|
+
{
|
|
343
|
+
stack_destroy(tree, stack);
|
|
344
|
+
return RUMAVL_ERR_NOMEM;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
ln = LINK_NO(ln);
|
|
348
|
+
if ((*node)->thread[ln] > 0)
|
|
349
|
+
{
|
|
350
|
+
/* This is as close to the correct node as we can get. We will
|
|
351
|
+
* now break and add the new node as a leaf */
|
|
352
|
+
break;
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
node = &(*node)->link[ln];
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
/* we have reached a leaf, add new node here */
|
|
359
|
+
if ((tmp = node_new(tree, record)) == NULL)
|
|
360
|
+
{
|
|
361
|
+
stack_destroy(tree, stack);
|
|
362
|
+
return RUMAVL_ERR_NOMEM;
|
|
363
|
+
}
|
|
364
|
+
/* new child inherits parent thread */
|
|
365
|
+
tmp->link[ln] = (*node)->link[ln];
|
|
366
|
+
tmp->thread[ln] = (*node)->thread[ln];
|
|
367
|
+
if (tmp->thread[ln] == 2)
|
|
368
|
+
tmp->link[ln]->link[OTHER_LINK(ln)] = tmp;
|
|
369
|
+
|
|
370
|
+
tmp->link[OTHER_LINK(ln)] = *node;
|
|
371
|
+
tmp->thread[OTHER_LINK(ln)] = 1;
|
|
372
|
+
(*node)->link[ln] = tmp;
|
|
373
|
+
(*node)->thread[ln] = 0;
|
|
374
|
+
|
|
375
|
+
/* all parentage is now one level heavier - balance where necessary */
|
|
376
|
+
stack_update(tree, stack, +1);
|
|
377
|
+
|
|
378
|
+
return 0;
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
/*----------------------------------------------------------------------------
|
|
382
|
+
* rumavl_insert - like rumavl_set, but only works if the node does not
|
|
383
|
+
* exist. Temporarily replaces overwrite callback with a function that
|
|
384
|
+
* always prevents overwrite, and calls rumavl_set()
|
|
385
|
+
*--------------------------------------------------------------------------*/
|
|
386
|
+
int rumavl_insert(RUMAVL *tree, const void *record)
|
|
387
|
+
{
|
|
388
|
+
int retv;
|
|
389
|
+
int (*tmp)(RUMAVL *, RUMAVL_NODE *, void *, const void *, void *);
|
|
390
|
+
|
|
391
|
+
tmp = tree->owcb;
|
|
392
|
+
tree->owcb = insert_cb;
|
|
393
|
+
retv = rumavl_set(tree, record);
|
|
394
|
+
tree->owcb = tmp;
|
|
395
|
+
return retv;
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
/*----------------------------------------------------------------------------
|
|
399
|
+
* rumavl_delete - deletes a node. Beware! this function is the worst part of
|
|
400
|
+
* the library. Think (and draw pictures) when you edit this function.
|
|
401
|
+
*--------------------------------------------------------------------------*/
|
|
402
|
+
int rumavl_delete(RUMAVL *tree, const void *record)
|
|
403
|
+
{
|
|
404
|
+
RUMAVL_NODE **node, *tmpnode;
|
|
405
|
+
RUMAVL_STACK *stack;
|
|
406
|
+
int dir, ln;
|
|
407
|
+
|
|
408
|
+
if (tree->root == NULL) /* tree is empty */
|
|
409
|
+
return RUMAVL_ERR_NOENT;
|
|
410
|
+
|
|
411
|
+
stack = NULL;
|
|
412
|
+
node = &tree->root;
|
|
413
|
+
|
|
414
|
+
/* Find desired node */
|
|
415
|
+
while ((dir = rec_cmp(tree, record, NODE_REC(*node))) != 0)
|
|
416
|
+
{
|
|
417
|
+
if (stack_push(tree, &stack, node, dir) != 0)
|
|
418
|
+
goto nomemout;
|
|
419
|
+
|
|
420
|
+
if ((*node)->thread[LINK_NO(dir)] > 0)
|
|
421
|
+
{
|
|
422
|
+
/* desired node does not exist */
|
|
423
|
+
stack_destroy(tree, stack);
|
|
424
|
+
return RUMAVL_ERR_NOENT;
|
|
425
|
+
}
|
|
426
|
+
node = &(*node)->link[LINK_NO(dir)];
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
/* OK, we got the node to be deleted, now get confirmation from user */
|
|
430
|
+
if (tree->delcb != NULL &&
|
|
431
|
+
(ln = tree->delcb(tree, *node, NODE_REC(*node), tree->udata)) != 0)
|
|
432
|
+
{
|
|
433
|
+
stack_destroy(tree, stack);
|
|
434
|
+
return ln;
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
if ((*node)->thread[LEFT] > 0)
|
|
438
|
+
{
|
|
439
|
+
if ((*node)->thread[RIGHT] > 0)
|
|
440
|
+
{
|
|
441
|
+
/* ooh look, we're a leaf */
|
|
442
|
+
tmpnode = *node;
|
|
443
|
+
if (stack != NULL)
|
|
444
|
+
{
|
|
445
|
+
/* This node has a parent, which will need to take over a
|
|
446
|
+
* thread from the node being deleted. First we work out
|
|
447
|
+
* which (left/right) child we are of parent, then give
|
|
448
|
+
* parent the respective thread. If the thread destination
|
|
449
|
+
* points back to us (edge of tree thread), update it to
|
|
450
|
+
* point to our parent. */
|
|
451
|
+
ln = LINK_NO(stack->dir);
|
|
452
|
+
(*stack->node)->link[ln] = tmpnode->link[ln];
|
|
453
|
+
(*stack->node)->thread[ln] = tmpnode->thread[ln];
|
|
454
|
+
if ((*stack->node)->thread[ln] == 2)
|
|
455
|
+
(*stack->node)->link[ln]->link[OTHER_LINK(ln)] =
|
|
456
|
+
*stack->node;
|
|
457
|
+
}
|
|
458
|
+
else
|
|
459
|
+
{
|
|
460
|
+
/*
|
|
461
|
+
* the only time stack will == NULL is when we are
|
|
462
|
+
* deleting the root of the tree. We already know that
|
|
463
|
+
* this is a leaf, so we will be leaving the tree empty.
|
|
464
|
+
*/
|
|
465
|
+
tree->root = NULL;
|
|
466
|
+
}
|
|
467
|
+
node_destroy(tree, tmpnode);
|
|
468
|
+
}
|
|
469
|
+
else
|
|
470
|
+
{
|
|
471
|
+
/* *node has only one child, and can be pruned by replacing
|
|
472
|
+
* *node with its only child. This block of code and the next
|
|
473
|
+
* should be identical, except that all directions and link
|
|
474
|
+
* numbers are opposite.
|
|
475
|
+
*
|
|
476
|
+
* Let node being deleted = DELNODE for this comment.
|
|
477
|
+
* DELNODE only has one child (the right child). The left
|
|
478
|
+
* most descendant of DELNODE will have a thread (left thread)
|
|
479
|
+
* pointing to DELNODE. This thread must be updated to point
|
|
480
|
+
* to the node currently pointed to by DELNODE's left thread.
|
|
481
|
+
*
|
|
482
|
+
* DELNODE's left thread may point to the opposite edge of the
|
|
483
|
+
* BST. In this case, the destination of the thread will have
|
|
484
|
+
* a thread back to DELNODE. This will need to be updated to
|
|
485
|
+
* point back to the leftmost descendant of DELNODE.
|
|
486
|
+
*/
|
|
487
|
+
tmpnode = *node; /* node being deleted */
|
|
488
|
+
*node = (*node)->link[RIGHT]; /* right child */
|
|
489
|
+
/* find left most descendant */
|
|
490
|
+
while ((*node)->thread[LEFT] == 0)
|
|
491
|
+
node = &(*node)->link[LEFT];
|
|
492
|
+
/* inherit thread from node being deleted */
|
|
493
|
+
(*node)->link[LEFT] = tmpnode->link[LEFT];
|
|
494
|
+
(*node)->thread[LEFT] = tmpnode->thread[LEFT];
|
|
495
|
+
/* update reverse thread if necessary */
|
|
496
|
+
if ((*node)->thread[LEFT] == 2)
|
|
497
|
+
(*node)->link[LEFT]->link[RIGHT] = *node;
|
|
498
|
+
node_destroy(tree, tmpnode);
|
|
499
|
+
}
|
|
500
|
+
}
|
|
501
|
+
else if ((*node)->thread[RIGHT] > 0)
|
|
502
|
+
{
|
|
503
|
+
/* see above */
|
|
504
|
+
tmpnode = *node;
|
|
505
|
+
*node = (*node)->link[LEFT];
|
|
506
|
+
while ((*node)->thread[RIGHT] == 0)
|
|
507
|
+
node = &(*node)->link[RIGHT];
|
|
508
|
+
(*node)->link[RIGHT] = tmpnode->link[RIGHT];
|
|
509
|
+
(*node)->thread[RIGHT] = tmpnode->thread[RIGHT];
|
|
510
|
+
if ((*node)->thread[RIGHT] == 2)
|
|
511
|
+
(*node)->link[RIGHT]->link[LEFT] = *node;
|
|
512
|
+
node_destroy(tree, tmpnode);
|
|
513
|
+
}
|
|
514
|
+
else
|
|
515
|
+
{
|
|
516
|
+
/* Delete a node with children on both sides. We do this by replacing
|
|
517
|
+
* the node to be deleted (delnode) with its inner most child
|
|
518
|
+
* on the heavier side (repnode). This in place replacement is quicker
|
|
519
|
+
* than the previously used method of rotating delnode until it is a
|
|
520
|
+
* (semi) leaf.
|
|
521
|
+
*
|
|
522
|
+
* At this point node points to delnode's parent's link to delnode. */
|
|
523
|
+
RUMAVL_NODE *repnode, *parent;
|
|
524
|
+
int outdir, outln;
|
|
525
|
+
|
|
526
|
+
/* find heaviest subtree */
|
|
527
|
+
if ((*node)->balance > 0)
|
|
528
|
+
{
|
|
529
|
+
outdir = +1; /* outter direction */
|
|
530
|
+
dir = -1; /* inner direction */
|
|
531
|
+
outln = 1; /* outer link number */
|
|
532
|
+
ln = 0; /* inner link number */
|
|
533
|
+
}
|
|
534
|
+
else
|
|
535
|
+
{
|
|
536
|
+
outdir = -1; /* same as above, but opposite subtree */
|
|
537
|
+
dir = +1;
|
|
538
|
+
outln = 0;
|
|
539
|
+
ln = 1;
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
/* Add node to be deleted to the list of nodes to be rebalanced.
|
|
543
|
+
* Rememer that the replacement node will actually be acted apon,
|
|
544
|
+
* and that the replacement node should feel the effect of its own
|
|
545
|
+
* move */
|
|
546
|
+
if (stack_push(tree, &stack, node, outdir) != 0)
|
|
547
|
+
goto nomemout;
|
|
548
|
+
|
|
549
|
+
parent = *node;
|
|
550
|
+
repnode = parent->link[outln];
|
|
551
|
+
|
|
552
|
+
if (repnode->thread[ln] != 0)
|
|
553
|
+
{
|
|
554
|
+
/* repnode inherits delnode's lighter tree, and balance, and gets
|
|
555
|
+
* balance readjusted below */
|
|
556
|
+
repnode->link[ln] = (*node)->link[ln];
|
|
557
|
+
repnode->thread[ln] = (*node)->thread[ln];
|
|
558
|
+
repnode->balance = (*node)->balance;
|
|
559
|
+
}
|
|
560
|
+
else
|
|
561
|
+
{
|
|
562
|
+
/* Now we add delnodes direct child to the list of "to update".
|
|
563
|
+
* We pass a pointer to delnode's link to its direct child to
|
|
564
|
+
* stack_push(), but that pointer is invalid, because when
|
|
565
|
+
* stack_update() tries to access the link, delnode would have
|
|
566
|
+
* been destroyed. So, we remember the stack position at which
|
|
567
|
+
* we passed the faulty pointer to stack_push, and update its
|
|
568
|
+
* node pointer when we find repnode to point to repnodes
|
|
569
|
+
* link on the same side */
|
|
570
|
+
RUMAVL_STACK *tmpstack;
|
|
571
|
+
|
|
572
|
+
if (stack_push(tree, &stack, &parent->link[outln], dir) != 0)
|
|
573
|
+
goto nomemout;
|
|
574
|
+
|
|
575
|
+
tmpstack = stack;
|
|
576
|
+
|
|
577
|
+
parent = repnode;
|
|
578
|
+
repnode = repnode->link[ln];
|
|
579
|
+
|
|
580
|
+
/* move towards the innermost child of delnode */
|
|
581
|
+
while (repnode->thread[ln] == 0)
|
|
582
|
+
{
|
|
583
|
+
if (stack_push(tree, &stack, &parent->link[ln], dir) != 0)
|
|
584
|
+
goto nomemout;
|
|
585
|
+
parent = repnode;
|
|
586
|
+
repnode = repnode->link[ln];
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
if (repnode->thread[outln] == 0)
|
|
590
|
+
{
|
|
591
|
+
/* repnode's parent inherits repnodes only child */
|
|
592
|
+
parent->link[ln] = repnode->link[outln];
|
|
593
|
+
}
|
|
594
|
+
else
|
|
595
|
+
{
|
|
596
|
+
/* parent already has a link to repnode, but it must now be
|
|
597
|
+
* marked as a thread */
|
|
598
|
+
parent->thread[ln] = 1;
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
repnode->link[0] = (*node)->link[0];
|
|
602
|
+
repnode->thread[0] = (*node)->thread[0];
|
|
603
|
+
repnode->link[1] = (*node)->link[1];
|
|
604
|
+
repnode->thread[1] = (*node)->thread[1];
|
|
605
|
+
repnode->balance = (*node)->balance;
|
|
606
|
+
|
|
607
|
+
/* see comment above */
|
|
608
|
+
tmpstack->node = &repnode->link[outln];
|
|
609
|
+
}
|
|
610
|
+
node_destroy(tree, *node);
|
|
611
|
+
*node = repnode;
|
|
612
|
+
|
|
613
|
+
/* innermost child in lighter tree has an invalid thread to delnode,
|
|
614
|
+
* update it to point to repnode */
|
|
615
|
+
repnode = seq_next(repnode, dir);
|
|
616
|
+
repnode->link[outln] = *node;
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
/* update parents' balances */
|
|
620
|
+
stack_update(tree, stack, -1);
|
|
621
|
+
return 0;
|
|
622
|
+
|
|
623
|
+
nomemout:
|
|
624
|
+
stack_destroy(tree, stack);
|
|
625
|
+
return RUMAVL_ERR_NOMEM;
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
/*----------------------------------------------------------------------------
|
|
629
|
+
* rumavl_find
|
|
630
|
+
*
|
|
631
|
+
* Returns a pointer to the record that matches "record".
|
|
632
|
+
*--------------------------------------------------------------------------*/
|
|
633
|
+
void *rumavl_find(RUMAVL *tree, const void *find)
|
|
634
|
+
{
|
|
635
|
+
void *record;
|
|
636
|
+
rumavl_node_find(tree, find, &record);
|
|
637
|
+
return record;
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
void *(**rumavl_alloc(RUMAVL *tree))(void *ptr, size_t size, void *udata)
|
|
641
|
+
{
|
|
642
|
+
return &tree->alloc;
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
/*----------------------------------------------------------------------------
|
|
646
|
+
* rumavl_record_size - returns size of all records in a tree
|
|
647
|
+
*--------------------------------------------------------------------------*/
|
|
648
|
+
size_t rumavl_record_size(RUMAVL *tree)
|
|
649
|
+
{
|
|
650
|
+
return tree->reclen;
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
/*----------------------------------------------------------------------------
|
|
654
|
+
* rumavl_node_find
|
|
655
|
+
*
|
|
656
|
+
* Returns a pointer to the node that matches "record".
|
|
657
|
+
*--------------------------------------------------------------------------*/
|
|
658
|
+
RUMAVL_NODE *rumavl_node_find(RUMAVL *tree, const void *find, void **record)
|
|
659
|
+
{
|
|
660
|
+
RUMAVL_NODE *node;
|
|
661
|
+
int ln;
|
|
662
|
+
|
|
663
|
+
if (find == NULL || tree->root == NULL)
|
|
664
|
+
goto fail;
|
|
665
|
+
|
|
666
|
+
node = tree->root;
|
|
667
|
+
for (;;)
|
|
668
|
+
{
|
|
669
|
+
if ((ln = rec_cmp(tree, find, NODE_REC(node))) == 0)
|
|
670
|
+
{
|
|
671
|
+
if (record != NULL)
|
|
672
|
+
*record = NODE_REC(node);
|
|
673
|
+
return node;
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
ln = LINK_NO(ln);
|
|
677
|
+
if (node->thread[ln] > 0)
|
|
678
|
+
break;
|
|
679
|
+
|
|
680
|
+
node = node->link[ln];
|
|
681
|
+
}
|
|
682
|
+
/* we didn't find the desired node */
|
|
683
|
+
|
|
684
|
+
fail:
|
|
685
|
+
if (record != NULL)
|
|
686
|
+
*record = NULL;
|
|
687
|
+
|
|
688
|
+
return NULL;
|
|
689
|
+
}
|
|
690
|
+
|
|
691
|
+
/*----------------------------------------------------------------------------
|
|
692
|
+
* rumavl_node_next - find next node
|
|
693
|
+
*--------------------------------------------------------------------------*/
|
|
694
|
+
RUMAVL_NODE *rumavl_node_next(RUMAVL *tree, RUMAVL_NODE *node, int dir,
|
|
695
|
+
void **record)
|
|
696
|
+
{
|
|
697
|
+
/* make sure `dir' is either RUMAVL_ASC or RUMAVL_DESC */
|
|
698
|
+
if (dir == 0)
|
|
699
|
+
goto fail;
|
|
700
|
+
else if (dir > 0)
|
|
701
|
+
dir = RUMAVL_ASC;
|
|
702
|
+
else
|
|
703
|
+
dir = RUMAVL_DESC;
|
|
704
|
+
|
|
705
|
+
/* if node is uninitialised, start with first possible node in `dir'
|
|
706
|
+
* direction */
|
|
707
|
+
if (node == NULL)
|
|
708
|
+
{
|
|
709
|
+
/* unless the tree is empty of course */
|
|
710
|
+
if (tree->root == NULL)
|
|
711
|
+
goto fail;
|
|
712
|
+
|
|
713
|
+
dir = OTHER_LINK(LINK_NO(dir));
|
|
714
|
+
node = tree->root;
|
|
715
|
+
while (node->thread[dir] == 0)
|
|
716
|
+
{
|
|
717
|
+
node = node->link[dir];
|
|
718
|
+
}
|
|
719
|
+
goto found;
|
|
720
|
+
}
|
|
721
|
+
|
|
722
|
+
if ((node = seq_next(node, dir)) == NULL)
|
|
723
|
+
goto fail;
|
|
724
|
+
|
|
725
|
+
/* fall through */
|
|
726
|
+
|
|
727
|
+
found:
|
|
728
|
+
if (record != NULL)
|
|
729
|
+
*record = NODE_REC(node);
|
|
730
|
+
return node;
|
|
731
|
+
|
|
732
|
+
fail:
|
|
733
|
+
if (record != NULL)
|
|
734
|
+
*record = NULL;
|
|
735
|
+
return NULL;
|
|
736
|
+
}
|
|
737
|
+
|
|
738
|
+
/*----------------------------------------------------------------------------
|
|
739
|
+
* rumavl_node_record - returns a pointer to the record stored in a node
|
|
740
|
+
*--------------------------------------------------------------------------*/
|
|
741
|
+
void *rumavl_node_record(RUMAVL_NODE *node)
|
|
742
|
+
{
|
|
743
|
+
return NODE_REC(node);
|
|
744
|
+
}
|
|
745
|
+
|
|
746
|
+
/*----------------------------------------------------------------------------
|
|
747
|
+
* rumavl_foreach - loop through entire tree, using temporary iterator
|
|
748
|
+
*--------------------------------------------------------------------------*/
|
|
749
|
+
extern int rumavl_foreach(RUMAVL *tree, int dir,
|
|
750
|
+
int (*cbfn)(RUMAVL *, void *, void *), void *udata)
|
|
751
|
+
{
|
|
752
|
+
RUMAVL_NODE *node;
|
|
753
|
+
int retv;
|
|
754
|
+
void *record;
|
|
755
|
+
|
|
756
|
+
if (cbfn == NULL)
|
|
757
|
+
return RUMAVL_ERR_INVAL;
|
|
758
|
+
|
|
759
|
+
retv = RUMAVL_ERR_NOENT;
|
|
760
|
+
node = NULL;
|
|
761
|
+
while ((node = rumavl_node_next(tree, node, dir, &record)) != NULL)
|
|
762
|
+
{
|
|
763
|
+
if ((retv = cbfn(tree, record, udata)) != 0)
|
|
764
|
+
break;
|
|
765
|
+
}
|
|
766
|
+
|
|
767
|
+
return retv;
|
|
768
|
+
}
|
|
769
|
+
|
|
770
|
+
/*----------------------------------------------------------------------------
|
|
771
|
+
* rumavl_strerror - return string description of RumAVL error code
|
|
772
|
+
*--------------------------------------------------------------------------*/
|
|
773
|
+
const char *rumavl_strerror(int errno)
|
|
774
|
+
{
|
|
775
|
+
switch (errno)
|
|
776
|
+
{
|
|
777
|
+
case 0:
|
|
778
|
+
return "Operation successful";
|
|
779
|
+
case RUMAVL_ERR_INVAL:
|
|
780
|
+
return "Invalid argument to function";
|
|
781
|
+
case RUMAVL_ERR_NOMEM:
|
|
782
|
+
return "Insufficient memory to complete operation";
|
|
783
|
+
case RUMAVL_ERR_NOENT:
|
|
784
|
+
return "Entry does not exist";
|
|
785
|
+
case RUMAVL_ERR_EORNG:
|
|
786
|
+
return "No more entries in range";
|
|
787
|
+
case RUMAVL_ERR_EXIST:
|
|
788
|
+
return "Entry already exists";
|
|
789
|
+
}
|
|
790
|
+
return "UNKNOWN ERROR";
|
|
791
|
+
}
|
|
792
|
+
|
|
793
|
+
/*****************************************************************************
|
|
794
|
+
*
|
|
795
|
+
* PRIVATE FUNCTIONS
|
|
796
|
+
*
|
|
797
|
+
****************************************************************************/
|
|
798
|
+
|
|
799
|
+
/*----------------------------------------------------------------------------
|
|
800
|
+
* insert_cb - used by rumavl_insert() to disallow any overwriting by
|
|
801
|
+
* rumavl_set()
|
|
802
|
+
*--------------------------------------------------------------------------*/
|
|
803
|
+
static int insert_cb(RUMAVL *t, RUMAVL_NODE *n, void *r1, const void *r2,
|
|
804
|
+
void *udata)
|
|
805
|
+
{
|
|
806
|
+
(void)t;
|
|
807
|
+
(void)r1;
|
|
808
|
+
(void)r2;
|
|
809
|
+
(void)udata;
|
|
810
|
+
(void)n;
|
|
811
|
+
return RUMAVL_ERR_EXIST;
|
|
812
|
+
}
|
|
813
|
+
|
|
814
|
+
/*----------------------------------------------------------------------------
|
|
815
|
+
* seq_next - return a pointer to the next node in sequence
|
|
816
|
+
*--------------------------------------------------------------------------*/
|
|
817
|
+
static RUMAVL_NODE *seq_next(RUMAVL_NODE *node, int dir)
|
|
818
|
+
{
|
|
819
|
+
int ln;
|
|
820
|
+
|
|
821
|
+
ln = LINK_NO(dir);
|
|
822
|
+
if (node->thread[ln] == 2)
|
|
823
|
+
{
|
|
824
|
+
return NULL;
|
|
825
|
+
}
|
|
826
|
+
else if (node->thread[ln] == 1)
|
|
827
|
+
{
|
|
828
|
+
return node->link[ln];
|
|
829
|
+
}
|
|
830
|
+
node = node->link[ln];
|
|
831
|
+
ln = OTHER_LINK(ln);
|
|
832
|
+
while (node->thread[ln] == 0)
|
|
833
|
+
{
|
|
834
|
+
node = node->link[ln];
|
|
835
|
+
}
|
|
836
|
+
return node;
|
|
837
|
+
}
|
|
838
|
+
|
|
839
|
+
/*----------------------------------------------------------------------------
|
|
840
|
+
* node_new - create a new node. MUST update link[] and thread[] after calling
|
|
841
|
+
* this function
|
|
842
|
+
*--------------------------------------------------------------------------*/
|
|
843
|
+
static RUMAVL_NODE *node_new(RUMAVL *tree, const void *record)
|
|
844
|
+
{
|
|
845
|
+
RUMAVL_NODE *node;
|
|
846
|
+
|
|
847
|
+
if ((node = mem_alloc(tree, sizeof(RUMAVL_NODE))) == NULL)
|
|
848
|
+
return NULL;
|
|
849
|
+
|
|
850
|
+
if ((node->rec = mem_alloc(tree, tree->reclen)) == NULL)
|
|
851
|
+
{
|
|
852
|
+
mem_free(tree, node);
|
|
853
|
+
return NULL;
|
|
854
|
+
}
|
|
855
|
+
|
|
856
|
+
memcpy(node->rec, record, tree->reclen);
|
|
857
|
+
node->balance = 0;
|
|
858
|
+
node->link[0] = NULL;
|
|
859
|
+
node->link[1] = NULL;
|
|
860
|
+
node->thread[0] = 0;
|
|
861
|
+
node->thread[1] = 0;
|
|
862
|
+
return node;
|
|
863
|
+
}
|
|
864
|
+
|
|
865
|
+
/*----------------------------------------------------------------------------
|
|
866
|
+
* node_destroy - cleanly destroy node
|
|
867
|
+
*--------------------------------------------------------------------------*/
|
|
868
|
+
static void node_destroy(RUMAVL *tree, RUMAVL_NODE *node)
|
|
869
|
+
{
|
|
870
|
+
mem_free(tree, node->rec);
|
|
871
|
+
mem_free(tree, node);
|
|
872
|
+
}
|
|
873
|
+
|
|
874
|
+
/*----------------------------------------------------------------------------
|
|
875
|
+
* stack_push - push a node entry onto stack, for rumavl_set() and
|
|
876
|
+
* rumavl_delete(). If this is the first entry, *stack should == NULL
|
|
877
|
+
*--------------------------------------------------------------------------*/
|
|
878
|
+
static int stack_push(RUMAVL *tree, RUMAVL_STACK **stack, RUMAVL_NODE **node,
|
|
879
|
+
int dir)
|
|
880
|
+
{
|
|
881
|
+
RUMAVL_STACK *tmp;
|
|
882
|
+
|
|
883
|
+
if ((tmp = mem_alloc(tree, sizeof(RUMAVL_STACK))) == NULL)
|
|
884
|
+
return -1;
|
|
885
|
+
|
|
886
|
+
tmp->next = *stack;
|
|
887
|
+
*stack = tmp;
|
|
888
|
+
tmp->node = node;
|
|
889
|
+
tmp->dir = dir;
|
|
890
|
+
|
|
891
|
+
return 0;
|
|
892
|
+
}
|
|
893
|
+
|
|
894
|
+
/*----------------------------------------------------------------------------
|
|
895
|
+
* stack_destroy - free up a stack
|
|
896
|
+
*--------------------------------------------------------------------------*/
|
|
897
|
+
static void stack_destroy(RUMAVL *tree, RUMAVL_STACK *stack)
|
|
898
|
+
{
|
|
899
|
+
RUMAVL_STACK *tmp;
|
|
900
|
+
while (stack != NULL)
|
|
901
|
+
{
|
|
902
|
+
tmp = stack;
|
|
903
|
+
stack = stack->next;
|
|
904
|
+
mem_free(tree, tmp);
|
|
905
|
+
}
|
|
906
|
+
}
|
|
907
|
+
|
|
908
|
+
/*----------------------------------------------------------------------------
|
|
909
|
+
* stack_update - goes up stack readjusting balance as needed. This function
|
|
910
|
+
* serves as a testiment to the philosophy of commenting while you code, 'cos
|
|
911
|
+
* hell if I can remember how I got to this. I think is has something to do
|
|
912
|
+
* with the varying effects on tree height, depending on exactly which sub
|
|
913
|
+
* tree, or sub-sub tree was modified. TODO study and comment
|
|
914
|
+
*--------------------------------------------------------------------------*/
|
|
915
|
+
static void stack_update(RUMAVL *tree, RUMAVL_STACK *stack, signed char diff)
|
|
916
|
+
{
|
|
917
|
+
RUMAVL_STACK *tmpstack;
|
|
918
|
+
|
|
919
|
+
/* if diff becomes 0, we quit, because no further change to ancestors
|
|
920
|
+
* can be made */
|
|
921
|
+
while (stack != NULL && diff != 0)
|
|
922
|
+
{
|
|
923
|
+
signed char ob, nb;
|
|
924
|
+
ob = (*stack->node)->balance;
|
|
925
|
+
(*stack->node)->balance += diff * (signed char)stack->dir;
|
|
926
|
+
nb = (*stack->node)->balance;
|
|
927
|
+
if (diff < 0)
|
|
928
|
+
{
|
|
929
|
+
if (stack->dir == -1 && ob < 0)
|
|
930
|
+
{
|
|
931
|
+
if (nb > 0)
|
|
932
|
+
nb = 0;
|
|
933
|
+
diff = (nb - ob) * -1;
|
|
934
|
+
}
|
|
935
|
+
else if (stack->dir == 1 && ob > 0)
|
|
936
|
+
{
|
|
937
|
+
if (nb < 0)
|
|
938
|
+
nb = 0;
|
|
939
|
+
diff = nb - ob;
|
|
940
|
+
}
|
|
941
|
+
else
|
|
942
|
+
{
|
|
943
|
+
diff = 0;
|
|
944
|
+
}
|
|
945
|
+
}
|
|
946
|
+
else
|
|
947
|
+
{
|
|
948
|
+
if (stack->dir == -1 && nb < 0)
|
|
949
|
+
{
|
|
950
|
+
if (ob > 0)
|
|
951
|
+
ob = 0;
|
|
952
|
+
diff = (nb - ob) * -1;
|
|
953
|
+
}
|
|
954
|
+
else if (stack->dir == 1 && nb > 0)
|
|
955
|
+
{
|
|
956
|
+
if (ob < 0)
|
|
957
|
+
ob = 0;
|
|
958
|
+
diff = nb - ob;
|
|
959
|
+
}
|
|
960
|
+
else
|
|
961
|
+
{
|
|
962
|
+
diff = 0;
|
|
963
|
+
}
|
|
964
|
+
}
|
|
965
|
+
while ((*stack->node)->balance > 1)
|
|
966
|
+
{
|
|
967
|
+
diff += balance(stack->node, -1);
|
|
968
|
+
}
|
|
969
|
+
while ((*stack->node)->balance < -1)
|
|
970
|
+
{
|
|
971
|
+
diff += balance(stack->node, 1);
|
|
972
|
+
}
|
|
973
|
+
tmpstack = stack;
|
|
974
|
+
stack = stack->next;
|
|
975
|
+
mem_free(tree, tmpstack);
|
|
976
|
+
}
|
|
977
|
+
|
|
978
|
+
/* we may exit early if diff becomes 0. We still need to free all stack
|
|
979
|
+
* entries */
|
|
980
|
+
while (stack != NULL)
|
|
981
|
+
{
|
|
982
|
+
tmpstack = stack;
|
|
983
|
+
stack = stack->next;
|
|
984
|
+
mem_free(tree, tmpstack);
|
|
985
|
+
}
|
|
986
|
+
}
|
|
987
|
+
|
|
988
|
+
/*----------------------------------------------------------------------------
|
|
989
|
+
* my_cmp - a wrapper around memcmp() for default record comparison function.
|
|
990
|
+
*--------------------------------------------------------------------------*/
|
|
991
|
+
static int my_cmp(const void *a, const void *b, size_t n, void *udata)
|
|
992
|
+
{
|
|
993
|
+
(void)udata;
|
|
994
|
+
return memcmp(a, b, n);
|
|
995
|
+
}
|
|
996
|
+
|
|
997
|
+
/*----------------------------------------------------------------------------
|
|
998
|
+
* rec_cmp - a wrapper around the record comparison function, that only
|
|
999
|
+
* returns 0, RUMAVL_ASC or RUMAVL_DESC.
|
|
1000
|
+
*--------------------------------------------------------------------------*/
|
|
1001
|
+
static int rec_cmp(RUMAVL *tree, const void *reca, const void *recb)
|
|
1002
|
+
{
|
|
1003
|
+
int retv;
|
|
1004
|
+
retv = tree->cmp(reca, recb, tree->reclen, tree->udata);
|
|
1005
|
+
if (retv < 0)
|
|
1006
|
+
return RUMAVL_DESC;
|
|
1007
|
+
if (retv > 0)
|
|
1008
|
+
return RUMAVL_ASC;
|
|
1009
|
+
return 0;
|
|
1010
|
+
}
|
|
1011
|
+
|
|
1012
|
+
/*----------------------------------------------------------------------------
|
|
1013
|
+
* Balance - rotate or double rotate as needed. Sometimes simply rotating a
|
|
1014
|
+
* tree is inefficient, as it leaves the tree as inbalanced as it was before
|
|
1015
|
+
* the rotate. To rectify this, we first rotate the heavier child so that the
|
|
1016
|
+
* heavier grandchild is on the outside, then rotate as per normal.
|
|
1017
|
+
*
|
|
1018
|
+
* TODO Check all callers, and make sure that they call this function sanely,
|
|
1019
|
+
* and then remove unnecessary checks.
|
|
1020
|
+
*--------------------------------------------------------------------------*/
|
|
1021
|
+
static signed char balance(RUMAVL_NODE **node, int dir)
|
|
1022
|
+
{
|
|
1023
|
+
int ln;
|
|
1024
|
+
signed char retv;
|
|
1025
|
+
|
|
1026
|
+
if (node == NULL || *node == NULL || (dir * dir) != 1)
|
|
1027
|
+
return 0;
|
|
1028
|
+
|
|
1029
|
+
ln = OTHER_LINK(LINK_NO(dir)); /* link number of new root */
|
|
1030
|
+
|
|
1031
|
+
/* new root must exist */
|
|
1032
|
+
if ((*node)->thread[ln] > 0)
|
|
1033
|
+
return 0;
|
|
1034
|
+
|
|
1035
|
+
retv = 0;
|
|
1036
|
+
if ((*node)->link[ln]->balance == (char)dir &&
|
|
1037
|
+
(*node)->link[ln]->thread[OTHER_LINK(ln)] == 0)
|
|
1038
|
+
{
|
|
1039
|
+
/* double rotate if inner grandchild is heaviest */
|
|
1040
|
+
retv = rotate(&((*node)->link[ln]), OTHER_DIR(dir));
|
|
1041
|
+
}
|
|
1042
|
+
|
|
1043
|
+
return retv + rotate(node, dir);
|
|
1044
|
+
}
|
|
1045
|
+
|
|
1046
|
+
/*----------------------------------------------------------------------------
|
|
1047
|
+
* rotate
|
|
1048
|
+
*
|
|
1049
|
+
* rotates a tree rooted at *node. dir determines the direction of the rotate,
|
|
1050
|
+
* dir < 0 -> left rotate; dir >= 0 -> right rotate
|
|
1051
|
+
*
|
|
1052
|
+
* TODO How sure are we that all callers pass decent `dir' values?
|
|
1053
|
+
* TODO Restudy the tree height modification and balance factor algorithms,
|
|
1054
|
+
* and document them.
|
|
1055
|
+
*--------------------------------------------------------------------------*/
|
|
1056
|
+
static signed char rotate(RUMAVL_NODE **node, int dir)
|
|
1057
|
+
{
|
|
1058
|
+
RUMAVL_NODE *tmp;
|
|
1059
|
+
signed char a, b, ad, bd, retv;
|
|
1060
|
+
int ln;
|
|
1061
|
+
|
|
1062
|
+
/* force |dir| to be either -1 or +1 */
|
|
1063
|
+
if (node == NULL || *node == NULL || (dir * dir) != 1)
|
|
1064
|
+
return 0;
|
|
1065
|
+
|
|
1066
|
+
ln = LINK_NO(dir);
|
|
1067
|
+
ln = OTHER_LINK(ln); /* link number of new root */
|
|
1068
|
+
|
|
1069
|
+
/* new root must exist */
|
|
1070
|
+
if ((*node)->thread[ln] > 0)
|
|
1071
|
+
return 0;
|
|
1072
|
+
|
|
1073
|
+
/* calculate effect on tree height */
|
|
1074
|
+
if ((dir == 1 && (*node)->balance < 0 && (*node)->link[0]->balance >= 0) ||
|
|
1075
|
+
(dir == -1 && (*node)->balance > 0 && (*node)->link[1]->balance <= 0))
|
|
1076
|
+
{
|
|
1077
|
+
retv = 0;
|
|
1078
|
+
}
|
|
1079
|
+
else
|
|
1080
|
+
{
|
|
1081
|
+
if (dir == 1)
|
|
1082
|
+
{
|
|
1083
|
+
if ((*node)->balance < -1)
|
|
1084
|
+
retv = -1;
|
|
1085
|
+
else if ((*node)->balance == -1)
|
|
1086
|
+
retv = 0;
|
|
1087
|
+
else
|
|
1088
|
+
retv = +1;
|
|
1089
|
+
}
|
|
1090
|
+
else
|
|
1091
|
+
{
|
|
1092
|
+
if ((*node)->balance > 1)
|
|
1093
|
+
retv = -1;
|
|
1094
|
+
else if ((*node)->balance == 1)
|
|
1095
|
+
retv = 0;
|
|
1096
|
+
else
|
|
1097
|
+
retv = +1;
|
|
1098
|
+
}
|
|
1099
|
+
}
|
|
1100
|
+
|
|
1101
|
+
/* rotate tree */
|
|
1102
|
+
tmp = *node;
|
|
1103
|
+
*node = tmp->link[ln];
|
|
1104
|
+
if ((*node)->thread[OTHER_LINK(ln)] > 0)
|
|
1105
|
+
{
|
|
1106
|
+
tmp->thread[ln] = 1;
|
|
1107
|
+
}
|
|
1108
|
+
else
|
|
1109
|
+
{
|
|
1110
|
+
tmp->link[ln] = (*node)->link[OTHER_LINK(ln)];
|
|
1111
|
+
tmp->thread[ln] = 0;
|
|
1112
|
+
}
|
|
1113
|
+
(*node)->link[OTHER_LINK(ln)] = tmp;
|
|
1114
|
+
(*node)->thread[OTHER_LINK(ln)] = 0;
|
|
1115
|
+
|
|
1116
|
+
/* rebalance factors after rotate matrix */
|
|
1117
|
+
a = tmp->balance;
|
|
1118
|
+
b = (*node)->balance;
|
|
1119
|
+
|
|
1120
|
+
if (a > 0)
|
|
1121
|
+
ad = 1;
|
|
1122
|
+
else if (a < 0)
|
|
1123
|
+
ad = -1;
|
|
1124
|
+
else
|
|
1125
|
+
ad = 0;
|
|
1126
|
+
|
|
1127
|
+
if (b > 0)
|
|
1128
|
+
bd = 1;
|
|
1129
|
+
else if (b < 0)
|
|
1130
|
+
bd = -1;
|
|
1131
|
+
else
|
|
1132
|
+
bd = 0;
|
|
1133
|
+
|
|
1134
|
+
if (ad == OTHER_DIR(dir))
|
|
1135
|
+
{
|
|
1136
|
+
if (bd == OTHER_DIR(dir))
|
|
1137
|
+
{
|
|
1138
|
+
tmp->balance += (b * -1) + dir;
|
|
1139
|
+
if (tmp->balance * dir > 0)
|
|
1140
|
+
(*node)->balance = (tmp->balance - (b * -1)) + dir;
|
|
1141
|
+
else
|
|
1142
|
+
(*node)->balance += dir;
|
|
1143
|
+
}
|
|
1144
|
+
else
|
|
1145
|
+
{
|
|
1146
|
+
tmp->balance += dir;
|
|
1147
|
+
(*node)->balance += dir;
|
|
1148
|
+
}
|
|
1149
|
+
}
|
|
1150
|
+
else
|
|
1151
|
+
{
|
|
1152
|
+
if (bd == OTHER_DIR(dir))
|
|
1153
|
+
{
|
|
1154
|
+
tmp->balance += (b * -1) + dir;
|
|
1155
|
+
(*node)->balance += dir + tmp->balance;
|
|
1156
|
+
}
|
|
1157
|
+
else
|
|
1158
|
+
{
|
|
1159
|
+
tmp->balance += dir;
|
|
1160
|
+
(*node)->balance += dir + tmp->balance;
|
|
1161
|
+
}
|
|
1162
|
+
}
|
|
1163
|
+
|
|
1164
|
+
return retv;
|
|
1165
|
+
}
|
|
1166
|
+
|
|
1167
|
+
/*----------------------------------------------------------------------------
|
|
1168
|
+
* mem_alloc
|
|
1169
|
+
*
|
|
1170
|
+
* default memory allocation function (malloc wrapper)
|
|
1171
|
+
*--------------------------------------------------------------------------*/
|
|
1172
|
+
static void *mem_mgr(RUMAVL *tree, void *ptr, size_t size)
|
|
1173
|
+
{
|
|
1174
|
+
if (tree->alloc != NULL)
|
|
1175
|
+
return tree->alloc(ptr, size, tree->udata);
|
|
1176
|
+
|
|
1177
|
+
return realloc(ptr, size);
|
|
1178
|
+
}
|