rmultimarkdown 4.7.1.1 → 6.2.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE +2 -2
- data/README.md +7 -9
- data/Rakefile +33 -18
- data/ext/Makefile +261 -0
- data/ext/extconf.rb +23 -3
- data/ext/mmd/aho-corasick.c +596 -0
- data/ext/mmd/aho-corasick.h +120 -0
- data/ext/mmd/beamer.c +344 -0
- data/ext/mmd/beamer.h +72 -0
- data/ext/mmd/char.c +156 -0
- data/ext/mmd/char.h +111 -0
- data/ext/mmd/char_lookup.c +212 -0
- data/ext/mmd/critic_markup.c +330 -0
- data/ext/mmd/critic_markup.h +94 -0
- data/ext/mmd/d_string.c +402 -0
- data/ext/mmd/epub.c +563 -0
- data/ext/mmd/epub.h +69 -0
- data/ext/mmd/fodt.c +2288 -0
- data/ext/mmd/fodt.h +81 -0
- data/ext/mmd/html.c +2460 -0
- data/ext/mmd/html.h +81 -0
- data/ext/mmd/i18n.h +170 -0
- data/ext/mmd/include/d_string.h +182 -0
- data/ext/mmd/include/libMultiMarkdown.h +548 -0
- data/ext/mmd/include/token.h +233 -0
- data/ext/mmd/latex.c +2435 -0
- data/ext/mmd/latex.h +83 -0
- data/ext/mmd/lexer.c +3001 -0
- data/ext/mmd/lexer.h +75 -0
- data/ext/mmd/memoir.c +138 -0
- data/ext/mmd/memoir.h +67 -0
- data/ext/mmd/miniz.c +7557 -0
- data/ext/mmd/miniz.h +1328 -0
- data/ext/mmd/mmd.c +2798 -0
- data/ext/mmd/mmd.h +120 -0
- data/ext/mmd/object_pool.c +141 -0
- data/ext/mmd/object_pool.h +101 -0
- data/ext/mmd/opendocument-content.c +2071 -0
- data/ext/mmd/opendocument-content.h +135 -0
- data/ext/mmd/opendocument.c +981 -0
- data/ext/mmd/opendocument.h +118 -0
- data/ext/mmd/parser.c +1760 -0
- data/ext/mmd/parser.h +39 -0
- data/{MultiMarkdown-4 → ext/mmd}/rng.c +90 -49
- data/ext/mmd/scanners.c +77512 -0
- data/ext/mmd/scanners.h +101 -0
- data/ext/mmd/stack.c +142 -0
- data/ext/mmd/stack.h +113 -0
- data/ext/mmd/textbundle.c +455 -0
- data/ext/mmd/textbundle.h +115 -0
- data/ext/mmd/token.c +773 -0
- data/ext/mmd/token_pairs.c +263 -0
- data/ext/mmd/token_pairs.h +123 -0
- data/ext/mmd/transclude.c +549 -0
- data/ext/mmd/transclude.h +87 -0
- data/ext/mmd/uthash.h +1074 -0
- data/ext/mmd/uuid.c +154 -0
- data/ext/mmd/uuid.h +77 -0
- data/ext/mmd/version.h +111 -0
- data/ext/mmd/writer.c +2652 -0
- data/ext/mmd/writer.h +260 -0
- data/ext/mmd/zip.c +210 -0
- data/ext/mmd/zip.h +120 -0
- data/ext/{multi_markdown.c → ruby_multi_markdown.c} +87 -18
- data/lib/multi_markdown.bundle +0 -0
- data/lib/multi_markdown.rb +5 -8
- data/lib/multi_markdown/version.rb +1 -1
- data/rmultimarkdown.gemspec +2 -2
- data/test/{extensions_test.rb.rb → extensions_test.rb} +10 -54
- data/test/multi_markdown_test.rb +13 -0
- metadata +67 -47
- data/MultiMarkdown-4/GLibFacade.c +0 -310
- data/MultiMarkdown-4/GLibFacade.h +0 -100
- data/MultiMarkdown-4/beamer.c +0 -182
- data/MultiMarkdown-4/beamer.h +0 -11
- data/MultiMarkdown-4/critic.c +0 -111
- data/MultiMarkdown-4/critic.h +0 -15
- data/MultiMarkdown-4/glib.h +0 -11
- data/MultiMarkdown-4/html.c +0 -1117
- data/MultiMarkdown-4/html.h +0 -14
- data/MultiMarkdown-4/latex.c +0 -1217
- data/MultiMarkdown-4/latex.h +0 -16
- data/MultiMarkdown-4/libMultiMarkdown.h +0 -177
- data/MultiMarkdown-4/lyx.c +0 -2265
- data/MultiMarkdown-4/lyx.h +0 -37
- data/MultiMarkdown-4/lyxbeamer.c +0 -265
- data/MultiMarkdown-4/lyxbeamer.h +0 -11
- data/MultiMarkdown-4/memoir.c +0 -80
- data/MultiMarkdown-4/memoir.h +0 -10
- data/MultiMarkdown-4/multimarkdown.c +0 -518
- data/MultiMarkdown-4/odf.c +0 -1222
- data/MultiMarkdown-4/odf.h +0 -18
- data/MultiMarkdown-4/opml.c +0 -189
- data/MultiMarkdown-4/opml.h +0 -15
- data/MultiMarkdown-4/parse_utilities.c +0 -884
- data/MultiMarkdown-4/parser.c +0 -16656
- data/MultiMarkdown-4/parser.h +0 -188
- data/MultiMarkdown-4/rtf.c +0 -665
- data/MultiMarkdown-4/rtf.h +0 -17
- data/MultiMarkdown-4/strtok.c +0 -56
- data/MultiMarkdown-4/strtok.h +0 -9
- data/MultiMarkdown-4/text.c +0 -53
- data/MultiMarkdown-4/text.h +0 -11
- data/MultiMarkdown-4/toc.c +0 -142
- data/MultiMarkdown-4/toc.h +0 -15
- data/MultiMarkdown-4/transclude.c +0 -307
- data/MultiMarkdown-4/transclude.h +0 -28
- data/MultiMarkdown-4/writer.c +0 -731
- data/MultiMarkdown-4/writer.h +0 -38
@@ -0,0 +1,596 @@
|
|
1
|
+
/**
|
2
|
+
|
3
|
+
C-Template -- Boilerplate c project with cmake support, CuTest unit testing, and more.
|
4
|
+
|
5
|
+
@file aho-corasick.c
|
6
|
+
|
7
|
+
@brief C implementation of the Aho-Corasick algorithm for searching text
|
8
|
+
for multiple strings simultaneously in a single pass without backtracking.
|
9
|
+
|
10
|
+
<https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm>
|
11
|
+
|
12
|
+
|
13
|
+
@author Fletcher T. Penney
|
14
|
+
@bug
|
15
|
+
|
16
|
+
**/
|
17
|
+
|
18
|
+
/*
|
19
|
+
|
20
|
+
Copyright © 2015-2017 Fletcher T. Penney.
|
21
|
+
|
22
|
+
|
23
|
+
The `c-template` project is released under the MIT License.
|
24
|
+
|
25
|
+
GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project:
|
26
|
+
|
27
|
+
https://github.com/fletcher/MultiMarkdown-4/
|
28
|
+
|
29
|
+
MMD 4 is released under both the MIT License and GPL.
|
30
|
+
|
31
|
+
|
32
|
+
CuTest is released under the zlib/libpng license. See CuTest.c for the text
|
33
|
+
of the license.
|
34
|
+
|
35
|
+
|
36
|
+
## The MIT License ##
|
37
|
+
|
38
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
39
|
+
of this software and associated documentation files (the "Software"), to deal
|
40
|
+
in the Software without restriction, including without limitation the rights
|
41
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
42
|
+
copies of the Software, and to permit persons to whom the Software is
|
43
|
+
furnished to do so, subject to the following conditions:
|
44
|
+
|
45
|
+
The above copyright notice and this permission notice shall be included in
|
46
|
+
all copies or substantial portions of the Software.
|
47
|
+
|
48
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
49
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
50
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
51
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
52
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
53
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
54
|
+
THE SOFTWARE.
|
55
|
+
|
56
|
+
*/
|
57
|
+
|
58
|
+
#include <stdio.h>
|
59
|
+
#include <stdlib.h>
|
60
|
+
#include <stdbool.h>
|
61
|
+
|
62
|
+
#include "aho-corasick.h"
|
63
|
+
|
64
|
+
#define kTrieStartingSize 256
|
65
|
+
|
66
|
+
void trie_to_graphviz(trie * a);
|
67
|
+
|
68
|
+
|
69
|
+
trie * trie_new(size_t startingSize) {
|
70
|
+
trie * a = malloc(sizeof(trie));
|
71
|
+
|
72
|
+
if (a) {
|
73
|
+
if (startingSize <= 1) {
|
74
|
+
startingSize = kTrieStartingSize;
|
75
|
+
}
|
76
|
+
|
77
|
+
a->node = malloc(sizeof(trie_node) * startingSize);
|
78
|
+
|
79
|
+
if (!a->node) {
|
80
|
+
free(a);
|
81
|
+
return NULL;
|
82
|
+
}
|
83
|
+
|
84
|
+
// Clear memory
|
85
|
+
memset(a->node, 0, sizeof(trie_node) * startingSize);
|
86
|
+
|
87
|
+
// All tries have a root node
|
88
|
+
a->size = 1;
|
89
|
+
a->capacity = startingSize;
|
90
|
+
}
|
91
|
+
|
92
|
+
return a;
|
93
|
+
}
|
94
|
+
|
95
|
+
|
96
|
+
void trie_free(trie * a) {
|
97
|
+
free(a->node);
|
98
|
+
free(a);
|
99
|
+
}
|
100
|
+
|
101
|
+
|
102
|
+
bool trie_node_insert(trie * a, size_t s, const unsigned char * key, unsigned short match_type, unsigned short depth) {
|
103
|
+
// Get node for state s
|
104
|
+
trie_node * n = &a->node[s];
|
105
|
+
|
106
|
+
size_t i;
|
107
|
+
|
108
|
+
if (key[0] == '\0') {
|
109
|
+
// We've hit end of key
|
110
|
+
n->match_type = match_type;
|
111
|
+
n->len = depth;
|
112
|
+
return true; // Success
|
113
|
+
}
|
114
|
+
|
115
|
+
if (n->child[key[0]] != 0) {
|
116
|
+
// First character already in trie, advance forward
|
117
|
+
return trie_node_insert(a, n->child[key[0]], key + 1, match_type, ++depth);
|
118
|
+
} else {
|
119
|
+
// Create new node
|
120
|
+
|
121
|
+
// Ensure capacity
|
122
|
+
if (a->size == a->capacity) {
|
123
|
+
a->capacity *= 2;
|
124
|
+
a->node = realloc(a->node, a->capacity * sizeof(trie_node));
|
125
|
+
|
126
|
+
// Set n to new location
|
127
|
+
n = &(a->node[s]);
|
128
|
+
}
|
129
|
+
|
130
|
+
// Current node points to next node
|
131
|
+
i = a->size;
|
132
|
+
n->child[key[0]] = i;
|
133
|
+
|
134
|
+
// Initialize new node to 0
|
135
|
+
n = &a->node[i];
|
136
|
+
memset(n, 0, sizeof(trie_node));
|
137
|
+
|
138
|
+
// Set char for new node
|
139
|
+
n->c = key[0];
|
140
|
+
|
141
|
+
// Incremement size
|
142
|
+
a->size++;
|
143
|
+
|
144
|
+
// Advance forward
|
145
|
+
return trie_node_insert(a, i, key + 1, match_type, ++depth);
|
146
|
+
}
|
147
|
+
}
|
148
|
+
|
149
|
+
|
150
|
+
bool trie_insert(trie * a, const char * key, unsigned short match_type) {
|
151
|
+
if (a && key && (key[0] != '\0')) {
|
152
|
+
return trie_node_insert(a, 0, (const unsigned char *)key, match_type, 0);
|
153
|
+
}
|
154
|
+
|
155
|
+
return false;
|
156
|
+
}
|
157
|
+
|
158
|
+
|
159
|
+
#ifdef TEST
|
160
|
+
void Test_trie_insert(CuTest* tc) {
|
161
|
+
trie * a = trie_new(0);
|
162
|
+
|
163
|
+
CuAssertIntEquals(tc, kTrieStartingSize, a->capacity);
|
164
|
+
CuAssertIntEquals(tc, 1, a->size);
|
165
|
+
|
166
|
+
trie_insert(a, "foo", 42);
|
167
|
+
|
168
|
+
trie_node * n = &a->node[0];
|
169
|
+
CuAssertIntEquals(tc, 0, n->match_type);
|
170
|
+
CuAssertIntEquals(tc, 1, n->child['f']);
|
171
|
+
CuAssertIntEquals(tc, '\0', n->c);
|
172
|
+
|
173
|
+
n = &a->node[1];
|
174
|
+
CuAssertIntEquals(tc, 0, n->match_type);
|
175
|
+
CuAssertIntEquals(tc, 2, n->child['o']);
|
176
|
+
CuAssertIntEquals(tc, 'f', n->c);
|
177
|
+
|
178
|
+
n = &a->node[2];
|
179
|
+
CuAssertIntEquals(tc, 0, n->match_type);
|
180
|
+
CuAssertIntEquals(tc, 3, n->child['o']);
|
181
|
+
CuAssertIntEquals(tc, 'o', n->c);
|
182
|
+
|
183
|
+
n = &a->node[3];
|
184
|
+
CuAssertIntEquals(tc, 42, n->match_type);
|
185
|
+
CuAssertIntEquals(tc, 3, n->len);
|
186
|
+
CuAssertIntEquals(tc, 'o', n->c);
|
187
|
+
|
188
|
+
trie_free(a);
|
189
|
+
}
|
190
|
+
#endif
|
191
|
+
|
192
|
+
|
193
|
+
size_t trie_node_search(trie * a, size_t s, const char * query) {
|
194
|
+
if (query[0] == '\0') {
|
195
|
+
// Found matching state
|
196
|
+
return s;
|
197
|
+
}
|
198
|
+
|
199
|
+
if (a->node[s].child[(unsigned char)query[0]] == 0) {
|
200
|
+
// Failed to match
|
201
|
+
return -1;
|
202
|
+
}
|
203
|
+
|
204
|
+
// Partial match, keep going
|
205
|
+
return trie_node_search(a, a->node[s].child[(unsigned char)query[0]], query + 1);
|
206
|
+
}
|
207
|
+
|
208
|
+
|
209
|
+
size_t trie_search(trie * a, const char * query) {
|
210
|
+
if (a && query) {
|
211
|
+
return trie_node_search(a, 0, query);
|
212
|
+
}
|
213
|
+
|
214
|
+
return 0;
|
215
|
+
}
|
216
|
+
|
217
|
+
|
218
|
+
unsigned short trie_search_match_type(trie * a, const char * query) {
|
219
|
+
size_t s = trie_search(a, query);
|
220
|
+
|
221
|
+
if (s == -1) {
|
222
|
+
return -1;
|
223
|
+
}
|
224
|
+
|
225
|
+
return a->node[s].match_type;
|
226
|
+
}
|
227
|
+
|
228
|
+
|
229
|
+
#ifdef TEST
|
230
|
+
void Test_trie_search(CuTest* tc) {
|
231
|
+
trie * a = trie_new(0);
|
232
|
+
|
233
|
+
trie_insert(a, "foo", 42);
|
234
|
+
trie_insert(a, "bar", 41);
|
235
|
+
trie_insert(a, "food", 40);
|
236
|
+
|
237
|
+
CuAssertIntEquals(tc, 3, trie_search(a, "foo"));
|
238
|
+
CuAssertIntEquals(tc, 42, trie_search_match_type(a, "foo"));
|
239
|
+
|
240
|
+
CuAssertIntEquals(tc, 6, trie_search(a, "bar"));
|
241
|
+
CuAssertIntEquals(tc, 41, trie_search_match_type(a, "bar"));
|
242
|
+
|
243
|
+
CuAssertIntEquals(tc, 7, trie_search(a, "food"));
|
244
|
+
CuAssertIntEquals(tc, 40, trie_search_match_type(a, "food"));
|
245
|
+
|
246
|
+
CuAssertIntEquals(tc, -1, trie_search(a, "foot"));
|
247
|
+
CuAssertIntEquals(tc, (unsigned short) - 1, trie_search_match_type(a, "foot"));
|
248
|
+
|
249
|
+
trie_free(a);
|
250
|
+
}
|
251
|
+
#endif
|
252
|
+
|
253
|
+
|
254
|
+
void ac_trie_node_prepare(trie * a, size_t s, char * buffer, unsigned short depth, size_t last_match_state) {
|
255
|
+
|
256
|
+
buffer[depth] = '\0';
|
257
|
+
buffer[depth + 1] = '\0';
|
258
|
+
|
259
|
+
// Current node
|
260
|
+
trie_node * n = &(a->node[s]);
|
261
|
+
|
262
|
+
char * suffix = buffer;
|
263
|
+
|
264
|
+
// Longest match seen so far??
|
265
|
+
suffix += 1;
|
266
|
+
|
267
|
+
// Find valid suffixes for failure path
|
268
|
+
while ((suffix[0] != '\0') && (n->ac_fail == 0)) {
|
269
|
+
n->ac_fail = trie_search(a, suffix);
|
270
|
+
|
271
|
+
if (n->ac_fail == -1) {
|
272
|
+
n->ac_fail = 0;
|
273
|
+
}
|
274
|
+
|
275
|
+
if (n->ac_fail == s) {
|
276
|
+
// Something went wrong
|
277
|
+
fprintf(stderr, "Recursive trie fallback detected at state %lu('%c') - suffix:'%s'!\n", s, n->c, suffix);
|
278
|
+
n->ac_fail = 0;
|
279
|
+
}
|
280
|
+
|
281
|
+
suffix++;
|
282
|
+
}
|
283
|
+
|
284
|
+
|
285
|
+
// Prepare children
|
286
|
+
for (int i = 0; i < 256; ++i) {
|
287
|
+
if ((n->child[i] != 0) &&
|
288
|
+
(n->child[i] != s)) {
|
289
|
+
buffer[depth] = i;
|
290
|
+
|
291
|
+
ac_trie_node_prepare(a, n->child[i], buffer, depth + 1, last_match_state);
|
292
|
+
}
|
293
|
+
}
|
294
|
+
}
|
295
|
+
|
296
|
+
/// Prepare trie for Aho-Corasick search algorithm by mapping failure connections
|
297
|
+
void ac_trie_prepare(trie * a) {
|
298
|
+
// Clear old pointers
|
299
|
+
for (size_t i = 0; i < a->size; ++i) {
|
300
|
+
a->node[i].ac_fail = 0;
|
301
|
+
}
|
302
|
+
|
303
|
+
// Create a buffer to use
|
304
|
+
char buffer[a->capacity];
|
305
|
+
|
306
|
+
ac_trie_node_prepare(a, 0, buffer, 0, 0);
|
307
|
+
}
|
308
|
+
|
309
|
+
|
310
|
+
|
311
|
+
#ifdef TEST
|
312
|
+
void Test_trie_prepare(CuTest* tc) {
|
313
|
+
trie * a = trie_new(0);
|
314
|
+
|
315
|
+
trie_insert(a, "a", 1);
|
316
|
+
trie_insert(a, "aa", 2);
|
317
|
+
trie_insert(a, "aaa", 3);
|
318
|
+
trie_insert(a, "aaaa", 4);
|
319
|
+
|
320
|
+
ac_trie_prepare(a);
|
321
|
+
|
322
|
+
trie_free(a);
|
323
|
+
}
|
324
|
+
#endif
|
325
|
+
|
326
|
+
|
327
|
+
match * match_new(size_t start, size_t len, unsigned short match_type) {
|
328
|
+
match * m = malloc(sizeof(match));
|
329
|
+
|
330
|
+
if (m) {
|
331
|
+
m->start = start;
|
332
|
+
m->len = len;
|
333
|
+
m->match_type = match_type;
|
334
|
+
m->next = NULL;
|
335
|
+
m->prev = NULL;
|
336
|
+
}
|
337
|
+
|
338
|
+
return m;
|
339
|
+
}
|
340
|
+
|
341
|
+
|
342
|
+
void match_free(match * m) {
|
343
|
+
if (m) {
|
344
|
+
if (m->next) {
|
345
|
+
match_free(m->next);
|
346
|
+
}
|
347
|
+
|
348
|
+
free(m);
|
349
|
+
}
|
350
|
+
}
|
351
|
+
|
352
|
+
|
353
|
+
match * match_add(match * last, size_t start, size_t len, unsigned short match_type) {
|
354
|
+
if (last) {
|
355
|
+
last->next = match_new(start, len, match_type);
|
356
|
+
last->next->prev = last;
|
357
|
+
return last->next;
|
358
|
+
} else {
|
359
|
+
return match_new(start, len, match_type);
|
360
|
+
}
|
361
|
+
|
362
|
+
return NULL;
|
363
|
+
}
|
364
|
+
|
365
|
+
|
366
|
+
match * ac_trie_search(trie * a, const char * source, size_t start, size_t len) {
|
367
|
+
|
368
|
+
// Store results in a linked list
|
369
|
+
// match * result = match_new(0, 0, 0);
|
370
|
+
match * result = NULL;
|
371
|
+
match * m = result;
|
372
|
+
|
373
|
+
// Keep track of our state
|
374
|
+
size_t state = 0;
|
375
|
+
size_t temp_state;
|
376
|
+
|
377
|
+
// Character being compared
|
378
|
+
unsigned char test_value;
|
379
|
+
size_t counter = start;
|
380
|
+
size_t stop = start + len;
|
381
|
+
|
382
|
+
while ((counter < stop) && (source[counter] != '\0')) {
|
383
|
+
// Read next character
|
384
|
+
test_value = (unsigned char)source[counter++];
|
385
|
+
|
386
|
+
// Check for path that allows us to match next character
|
387
|
+
while (state != 0 && a->node[state].child[test_value] == 0) {
|
388
|
+
state = a->node[state].ac_fail;
|
389
|
+
}
|
390
|
+
|
391
|
+
// Advance state for the next character
|
392
|
+
state = a->node[state].child[test_value];
|
393
|
+
|
394
|
+
// Check for partial matches
|
395
|
+
temp_state = state;
|
396
|
+
|
397
|
+
while (temp_state != 0) {
|
398
|
+
if (a->node[temp_state].match_type) {
|
399
|
+
// This is a match
|
400
|
+
if (!m) {
|
401
|
+
result = match_new(0, 0, 0);
|
402
|
+
m = result;
|
403
|
+
}
|
404
|
+
|
405
|
+
m = match_add(m, counter - a->node[temp_state].len,
|
406
|
+
a->node[temp_state].len, a->node[temp_state].match_type);
|
407
|
+
}
|
408
|
+
|
409
|
+
// Iterate to find shorter matches
|
410
|
+
temp_state = a->node[temp_state].ac_fail;
|
411
|
+
}
|
412
|
+
}
|
413
|
+
|
414
|
+
return result;
|
415
|
+
}
|
416
|
+
|
417
|
+
|
418
|
+
void match_excise(match * m) {
|
419
|
+
if (m) {
|
420
|
+
if (m->prev) {
|
421
|
+
m->prev->next = m->next;
|
422
|
+
}
|
423
|
+
|
424
|
+
if (m->next) {
|
425
|
+
m->next->prev = m->prev;
|
426
|
+
}
|
427
|
+
|
428
|
+
free(m);
|
429
|
+
}
|
430
|
+
}
|
431
|
+
|
432
|
+
|
433
|
+
int match_count(match * m) {
|
434
|
+
int result = 0;
|
435
|
+
m = m->next; // Skip header
|
436
|
+
|
437
|
+
while (m) {
|
438
|
+
result++;
|
439
|
+
m = m->next;
|
440
|
+
}
|
441
|
+
|
442
|
+
return result;
|
443
|
+
}
|
444
|
+
|
445
|
+
|
446
|
+
void match_describe(match * m, const char * source) {
|
447
|
+
fprintf(stderr, "'%.*s'(%d) at %lu:%lu\n", (int)m->len, &source[m->start],
|
448
|
+
m->match_type, m->start, m->start + m->len);
|
449
|
+
}
|
450
|
+
|
451
|
+
|
452
|
+
void match_set_describe(match * m, const char * source) {
|
453
|
+
m = m->next; // Skip header
|
454
|
+
|
455
|
+
while (m) {
|
456
|
+
match_describe(m, source);
|
457
|
+
m = m->next;
|
458
|
+
}
|
459
|
+
}
|
460
|
+
|
461
|
+
|
462
|
+
void match_set_filter_leftmost_longest(match * header) {
|
463
|
+
// Filter results to include only leftmost/longest results
|
464
|
+
match * m = header->next; // Skip header
|
465
|
+
match * n;
|
466
|
+
|
467
|
+
while (m) {
|
468
|
+
if (m->next) {
|
469
|
+
if (m->start == m->next->start) {
|
470
|
+
// The next match is longer than this one
|
471
|
+
n = m;
|
472
|
+
m = m->next;
|
473
|
+
match_excise(n);
|
474
|
+
continue;
|
475
|
+
}
|
476
|
+
|
477
|
+
while (m->next &&
|
478
|
+
m->next->start > m->start &&
|
479
|
+
m->next->start < m->start + m->len) {
|
480
|
+
// This match is "lefter" than next
|
481
|
+
match_excise(m->next);
|
482
|
+
}
|
483
|
+
|
484
|
+
while (m->next &&
|
485
|
+
m->next->start < m->start) {
|
486
|
+
// Next match is "lefter" than us
|
487
|
+
n = m;
|
488
|
+
m = m->prev;
|
489
|
+
match_excise(n);
|
490
|
+
}
|
491
|
+
}
|
492
|
+
|
493
|
+
while (m->prev &&
|
494
|
+
m->prev->len &&
|
495
|
+
m->prev->start >= m->start) {
|
496
|
+
// We are "lefter" than previous
|
497
|
+
n = m->prev;
|
498
|
+
match_excise(n);
|
499
|
+
}
|
500
|
+
|
501
|
+
m = m->next;
|
502
|
+
}
|
503
|
+
}
|
504
|
+
|
505
|
+
|
506
|
+
match * ac_trie_leftmost_longest_search(trie * a, const char * source, size_t start, size_t len) {
|
507
|
+
match * result = ac_trie_search(a, source, start, len);
|
508
|
+
|
509
|
+
if (result) {
|
510
|
+
match_set_filter_leftmost_longest(result);
|
511
|
+
}
|
512
|
+
|
513
|
+
return result;
|
514
|
+
}
|
515
|
+
|
516
|
+
|
517
|
+
#ifdef TEST
|
518
|
+
void Test_aho_trie_search(CuTest* tc) {
|
519
|
+
trie * a = trie_new(0);
|
520
|
+
|
521
|
+
trie_insert(a, "foo", 42);
|
522
|
+
trie_insert(a, "bar", 41);
|
523
|
+
trie_insert(a, "food", 40);
|
524
|
+
|
525
|
+
ac_trie_prepare(a);
|
526
|
+
|
527
|
+
match * m = ac_trie_search(a, "this is a bar that serves food.", 0, 31);
|
528
|
+
|
529
|
+
match_free(m);
|
530
|
+
trie_free(a);
|
531
|
+
|
532
|
+
|
533
|
+
a = trie_new(0);
|
534
|
+
|
535
|
+
trie_insert(a, "A", 1);
|
536
|
+
trie_insert(a, "AB", 2);
|
537
|
+
trie_insert(a, "ABC", 3);
|
538
|
+
trie_insert(a, "BC", 4);
|
539
|
+
trie_insert(a, "BCD", 5);
|
540
|
+
trie_insert(a, "E", 6);
|
541
|
+
trie_insert(a, "EFGHIJ", 7);
|
542
|
+
trie_insert(a, "F", 8);
|
543
|
+
trie_insert(a, "ZABCABCZ", 9);
|
544
|
+
trie_insert(a, "ZAB", 10);
|
545
|
+
|
546
|
+
ac_trie_prepare(a);
|
547
|
+
|
548
|
+
m = ac_trie_search(a, "ABCDEFGGGAZABCABCDZABCABCZ", 0, 26);
|
549
|
+
fprintf(stderr, "Finish with %d matches\n", match_count(m));
|
550
|
+
match_set_describe(m, "ABCDEFGGGAZABCABCDZABCABCZ");
|
551
|
+
match_free(m);
|
552
|
+
|
553
|
+
m = ac_trie_leftmost_longest_search(a, "ABCDEFGGGAZABCABCDZABCABCZ", 0, 26);
|
554
|
+
fprintf(stderr, "Finish with %d matches\n", match_count(m));
|
555
|
+
match_set_describe(m, "ABCDEFGGGAZABCABCDZABCABCZ");
|
556
|
+
match_free(m);
|
557
|
+
|
558
|
+
// trie_to_graphviz(a);
|
559
|
+
|
560
|
+
trie_free(a);
|
561
|
+
}
|
562
|
+
#endif
|
563
|
+
|
564
|
+
|
565
|
+
void trie_node_to_graphviz(trie * a, size_t s) {
|
566
|
+
trie_node * n = &a->node[s];
|
567
|
+
|
568
|
+
if (n->match_type) {
|
569
|
+
fprintf(stderr, "\"%lu\" [shape=doublecircle]\n", s);
|
570
|
+
}
|
571
|
+
|
572
|
+
for (int i = 0; i < 256; ++i) {
|
573
|
+
if (n->child[i]) {
|
574
|
+
switch (i) {
|
575
|
+
default:
|
576
|
+
fprintf(stderr, "\"%lu\" -> \"%lu\" [label=\"%c\"]\n", s, n->child[i], (char)i);
|
577
|
+
}
|
578
|
+
}
|
579
|
+
}
|
580
|
+
|
581
|
+
if (n->ac_fail) {
|
582
|
+
fprintf(stderr, "\"%lu\" -> \"%lu\" [label=\"fail\"]\n", s, n->ac_fail);
|
583
|
+
}
|
584
|
+
}
|
585
|
+
|
586
|
+
|
587
|
+
void trie_to_graphviz(trie * a) {
|
588
|
+
fprintf(stderr, "digraph dfa {\n");
|
589
|
+
|
590
|
+
for (int i = 0; i < a->size; ++i) {
|
591
|
+
trie_node_to_graphviz(a, i);
|
592
|
+
}
|
593
|
+
|
594
|
+
fprintf(stderr, "}\n");
|
595
|
+
}
|
596
|
+
|