ruby-sfst 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +1 -0
- data/Manifest +31 -0
- data/README.rdoc +25 -0
- data/Rakefile +22 -0
- data/ext/sfst_machine/alphabet.C +807 -0
- data/ext/sfst_machine/alphabet.h +281 -0
- data/ext/sfst_machine/basic.C +84 -0
- data/ext/sfst_machine/basic.h +24 -0
- data/ext/sfst_machine/compact.C +616 -0
- data/ext/sfst_machine/compact.h +98 -0
- data/ext/sfst_machine/determinise.C +304 -0
- data/ext/sfst_machine/extconf.rb +4 -0
- data/ext/sfst_machine/fst-compiler.C +2375 -0
- data/ext/sfst_machine/fst-compiler.h +113 -0
- data/ext/sfst_machine/fst-compiler.yy +213 -0
- data/ext/sfst_machine/fst.C +966 -0
- data/ext/sfst_machine/fst.h +365 -0
- data/ext/sfst_machine/interface.C +1838 -0
- data/ext/sfst_machine/interface.h +94 -0
- data/ext/sfst_machine/make-compact.C +328 -0
- data/ext/sfst_machine/make-compact.h +34 -0
- data/ext/sfst_machine/mem.h +74 -0
- data/ext/sfst_machine/operators.C +1131 -0
- data/ext/sfst_machine/sfst_machine.cc +411 -0
- data/ext/sfst_machine/utf8-scanner.C +2197 -0
- data/ext/sfst_machine/utf8-scanner.ll +179 -0
- data/ext/sfst_machine/utf8.C +146 -0
- data/ext/sfst_machine/utf8.h +19 -0
- data/lib/sfst.rb +99 -0
- data/ruby-sfst.gemspec +34 -0
- data/test/test_sfst.fst +3 -0
- data/test/test_sfst.rb +119 -0
- metadata +100 -0
@@ -0,0 +1,1131 @@
|
|
1
|
+
|
2
|
+
/*******************************************************************/
|
3
|
+
/* */
|
4
|
+
/* FILE operators.C */
|
5
|
+
/* MODULE operators */
|
6
|
+
/* PROGRAM SFST */
|
7
|
+
/* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
|
8
|
+
/* */
|
9
|
+
/*******************************************************************/
|
10
|
+
|
11
|
+
|
12
|
+
#include "fst.h"
|
13
|
+
|
14
|
+
using std::pair;
|
15
|
+
using std::cerr;
|
16
|
+
|
17
|
+
static void compose_nodes( Node*, Node*, Node*, Transducer*, PairMapping& );
|
18
|
+
|
19
|
+
|
20
|
+
/*******************************************************************/
|
21
|
+
/* */
|
22
|
+
/* check_cyclicity */
|
23
|
+
/* */
|
24
|
+
/*******************************************************************/
|
25
|
+
|
26
|
+
static bool check_cyclicity( Node *node, NodeHashSet &visited,
|
27
|
+
const Alphabet &alphabet)
|
28
|
+
{
|
29
|
+
|
30
|
+
if (!visited.insert(node).second)
|
31
|
+
return true; // node was visited before
|
32
|
+
|
33
|
+
for( ArcsIter p(node->arcs()); p; p++ ) {
|
34
|
+
Arc *arc=p;
|
35
|
+
if (arc->label().upper_is_epsilon())
|
36
|
+
if (check_cyclicity(arc->target_node(), visited, alphabet)) {
|
37
|
+
cerr << alphabet.write_label(arc->label()) << "\n";
|
38
|
+
return true;
|
39
|
+
}
|
40
|
+
}
|
41
|
+
visited.erase(node);
|
42
|
+
return false;
|
43
|
+
}
|
44
|
+
|
45
|
+
|
46
|
+
/*******************************************************************/
|
47
|
+
/* */
|
48
|
+
/* Transducer::infinitely_ambiguous_node */
|
49
|
+
/* */
|
50
|
+
/*******************************************************************/
|
51
|
+
|
52
|
+
bool Transducer::infinitely_ambiguous_node( Node *node )
|
53
|
+
|
54
|
+
{
|
55
|
+
if (!node->was_visited( vmark )) {
|
56
|
+
NodeHashSet visited;
|
57
|
+
if (check_cyclicity(node, visited, alphabet))
|
58
|
+
return true;
|
59
|
+
|
60
|
+
// iterate over all outgoing arcs
|
61
|
+
for( ArcsIter p(node->arcs()); p; p++ ) {
|
62
|
+
Arc *arc=p;
|
63
|
+
if (infinitely_ambiguous_node( arc->target_node() ))
|
64
|
+
return true;
|
65
|
+
}
|
66
|
+
}
|
67
|
+
return false;
|
68
|
+
}
|
69
|
+
|
70
|
+
|
71
|
+
/*******************************************************************/
|
72
|
+
/* */
|
73
|
+
/* Transducer::is_infinitely_ambiguous */
|
74
|
+
/* */
|
75
|
+
/*******************************************************************/
|
76
|
+
|
77
|
+
bool Transducer::is_infinitely_ambiguous()
|
78
|
+
|
79
|
+
{
|
80
|
+
incr_vmark();
|
81
|
+
return infinitely_ambiguous_node(root_node());
|
82
|
+
}
|
83
|
+
|
84
|
+
|
85
|
+
/*******************************************************************/
|
86
|
+
/* */
|
87
|
+
/* Transducer::is_cyclic_node */
|
88
|
+
/* */
|
89
|
+
/*******************************************************************/
|
90
|
+
|
91
|
+
bool Transducer::is_cyclic_node( Node *node, NodeHashSet &previous )
|
92
|
+
|
93
|
+
{
|
94
|
+
if (!node->was_visited( vmark )) {
|
95
|
+
NodeHashSet visited;
|
96
|
+
|
97
|
+
NodeHashSet::iterator it=previous.insert(node).first;
|
98
|
+
|
99
|
+
// iterate over all outgoing arcs
|
100
|
+
for( ArcsIter p(node->arcs()); p; p++ ) {
|
101
|
+
Arc *arc=p;
|
102
|
+
if (previous.find(arc->target_node()) != previous.end() ||
|
103
|
+
is_cyclic_node( arc->target_node(), previous ))
|
104
|
+
return true;
|
105
|
+
}
|
106
|
+
|
107
|
+
previous.erase(it);
|
108
|
+
}
|
109
|
+
return false;
|
110
|
+
}
|
111
|
+
|
112
|
+
|
113
|
+
/*******************************************************************/
|
114
|
+
/* */
|
115
|
+
/* Transducer::is_cyclic */
|
116
|
+
/* */
|
117
|
+
/*******************************************************************/
|
118
|
+
|
119
|
+
bool Transducer::is_cyclic()
|
120
|
+
|
121
|
+
{
|
122
|
+
incr_vmark();
|
123
|
+
NodeHashSet previous;
|
124
|
+
return is_cyclic_node(root_node(), previous);
|
125
|
+
}
|
126
|
+
|
127
|
+
|
128
|
+
|
129
|
+
/*******************************************************************/
|
130
|
+
/* */
|
131
|
+
/* Transducer::is_automaton_node */
|
132
|
+
/* */
|
133
|
+
/*******************************************************************/
|
134
|
+
|
135
|
+
bool Transducer::is_automaton_node( Node *node )
|
136
|
+
|
137
|
+
{
|
138
|
+
if (!node->was_visited( vmark )) {
|
139
|
+
// iterate over all outgoing arcs
|
140
|
+
for( ArcsIter p(node->arcs()); p; p++ ) {
|
141
|
+
Arc *arc=p;
|
142
|
+
Label l=arc->label();
|
143
|
+
if (l.upper_char() != l.lower_char())
|
144
|
+
return false;
|
145
|
+
if (!is_automaton_node( arc->target_node()))
|
146
|
+
return false;
|
147
|
+
}
|
148
|
+
}
|
149
|
+
return true;
|
150
|
+
}
|
151
|
+
|
152
|
+
|
153
|
+
/*******************************************************************/
|
154
|
+
/* */
|
155
|
+
/* Transducer::is_automaton */
|
156
|
+
/* */
|
157
|
+
/*******************************************************************/
|
158
|
+
|
159
|
+
bool Transducer::is_automaton()
|
160
|
+
|
161
|
+
{
|
162
|
+
incr_vmark();
|
163
|
+
return is_automaton_node(root_node());
|
164
|
+
}
|
165
|
+
|
166
|
+
|
167
|
+
/*******************************************************************/
|
168
|
+
/* */
|
169
|
+
/* Transducer::is_empty */
|
170
|
+
/* */
|
171
|
+
/*******************************************************************/
|
172
|
+
|
173
|
+
bool Transducer::is_empty()
|
174
|
+
|
175
|
+
{
|
176
|
+
if (!minimised) {
|
177
|
+
Transducer *tmp=&minimise();
|
178
|
+
bool result=tmp->is_empty();
|
179
|
+
delete tmp;
|
180
|
+
return result;
|
181
|
+
}
|
182
|
+
if (root_node()->is_final())
|
183
|
+
return false;
|
184
|
+
return root_node()->arcs()->is_empty();
|
185
|
+
}
|
186
|
+
|
187
|
+
|
188
|
+
/*******************************************************************/
|
189
|
+
/* */
|
190
|
+
/* Transducer::generates_empty_string */
|
191
|
+
/* */
|
192
|
+
/*******************************************************************/
|
193
|
+
|
194
|
+
bool Transducer::generates_empty_string()
|
195
|
+
|
196
|
+
{
|
197
|
+
if (!minimised) {
|
198
|
+
Transducer *tmp=&minimise();
|
199
|
+
bool result=tmp->root_node()->is_final();
|
200
|
+
delete tmp;
|
201
|
+
return result;
|
202
|
+
}
|
203
|
+
return root_node()->is_final();
|
204
|
+
}
|
205
|
+
|
206
|
+
|
207
|
+
/*******************************************************************/
|
208
|
+
/* */
|
209
|
+
/* Transducer::reverse_node */
|
210
|
+
/* */
|
211
|
+
/*******************************************************************/
|
212
|
+
|
213
|
+
void Transducer::reverse_node( Node *node, Transducer *na )
|
214
|
+
|
215
|
+
{
|
216
|
+
if (!node->was_visited( vmark )) {
|
217
|
+
|
218
|
+
// create a new node
|
219
|
+
node->set_forward( na->new_node() );
|
220
|
+
|
221
|
+
if (node->is_final())
|
222
|
+
// add epsilon transition from new root to this node
|
223
|
+
na->root_node()->add_arc( Label(), node->forward(), na );
|
224
|
+
|
225
|
+
// iterate over all outgoing arcs
|
226
|
+
for( ArcsIter p(node->arcs()); p; p++ ) {
|
227
|
+
Arc *arc=p;
|
228
|
+
|
229
|
+
// reverse the subgraph headed by the target node
|
230
|
+
reverse_node( arc->target_node(), na );
|
231
|
+
Node *n = arc->target_node()->forward();
|
232
|
+
|
233
|
+
// create the reverse arc
|
234
|
+
n->add_arc( arc->label(), node->forward(), na );
|
235
|
+
}
|
236
|
+
}
|
237
|
+
}
|
238
|
+
|
239
|
+
|
240
|
+
/*******************************************************************/
|
241
|
+
/* */
|
242
|
+
/* Transducer::reverse */
|
243
|
+
/* */
|
244
|
+
/*******************************************************************/
|
245
|
+
|
246
|
+
Transducer &Transducer::reverse()
|
247
|
+
|
248
|
+
{
|
249
|
+
Transducer *na = new Transducer();
|
250
|
+
na->alphabet.copy(alphabet);
|
251
|
+
|
252
|
+
incr_vmark();
|
253
|
+
reverse_node(root_node(), na);
|
254
|
+
root_node()->forward()->set_final(1);
|
255
|
+
return *na;
|
256
|
+
}
|
257
|
+
|
258
|
+
|
259
|
+
/*******************************************************************/
|
260
|
+
/* */
|
261
|
+
/* Transducer::recode_label */
|
262
|
+
/* */
|
263
|
+
/*******************************************************************/
|
264
|
+
|
265
|
+
Label Transducer::recode_label( Label l, bool lswitch, bool recode,
|
266
|
+
Alphabet &al )
|
267
|
+
{
|
268
|
+
if (lswitch)
|
269
|
+
l = Label(l.upper_char(), l.lower_char());
|
270
|
+
|
271
|
+
if (recode) {
|
272
|
+
Character lc = al.add_symbol(alphabet.code2symbol(l.lower_char()));
|
273
|
+
Character uc = al.add_symbol(alphabet.code2symbol(l.upper_char()));
|
274
|
+
l = Label(lc, uc);
|
275
|
+
al.insert(l);
|
276
|
+
}
|
277
|
+
|
278
|
+
return l;
|
279
|
+
}
|
280
|
+
|
281
|
+
|
282
|
+
/*******************************************************************/
|
283
|
+
/* */
|
284
|
+
/* Transducer::copy_nodes */
|
285
|
+
/* */
|
286
|
+
/*******************************************************************/
|
287
|
+
|
288
|
+
Node *Transducer::copy_nodes( Node *node, Transducer *a,
|
289
|
+
bool lswitch, bool recode )
|
290
|
+
{
|
291
|
+
if (!node->was_visited(vmark)) {
|
292
|
+
|
293
|
+
node->set_forward(a->new_node());
|
294
|
+
|
295
|
+
// define final nodes
|
296
|
+
if (node->is_final())
|
297
|
+
node->forward()->set_final(1);
|
298
|
+
|
299
|
+
// iterate over all outgoing arcs of node
|
300
|
+
for( ArcsIter p(node->arcs()); p; p++ ) {
|
301
|
+
Arc *arc=p;
|
302
|
+
Node *tn = copy_nodes( arc->target_node(), a, lswitch, recode );
|
303
|
+
|
304
|
+
// Add a link to the new node
|
305
|
+
Label l=recode_label(arc->label(), lswitch, recode, a->alphabet);
|
306
|
+
node->forward()->add_arc( l, tn, a );
|
307
|
+
}
|
308
|
+
}
|
309
|
+
|
310
|
+
return node->forward();
|
311
|
+
}
|
312
|
+
|
313
|
+
|
314
|
+
/*******************************************************************/
|
315
|
+
/* */
|
316
|
+
/* Transducer::copy */
|
317
|
+
/* */
|
318
|
+
/*******************************************************************/
|
319
|
+
|
320
|
+
Transducer &Transducer::copy( bool lswitch, const Alphabet *al )
|
321
|
+
|
322
|
+
{
|
323
|
+
bool recode = false;
|
324
|
+
Transducer *na = new Transducer();
|
325
|
+
if (al == NULL)
|
326
|
+
al = &alphabet;
|
327
|
+
else
|
328
|
+
recode = true;
|
329
|
+
|
330
|
+
na->alphabet.utf8 = al->utf8;
|
331
|
+
if (lswitch) {
|
332
|
+
na->alphabet.insert_symbols(*al);
|
333
|
+
for( Alphabet::iterator it=al->begin(); it!=al->end(); it++ ) {
|
334
|
+
Character lc=it->lower_char();
|
335
|
+
Character uc=it->upper_char();
|
336
|
+
na->alphabet.insert(Label(uc,lc));
|
337
|
+
}
|
338
|
+
}
|
339
|
+
else
|
340
|
+
na->alphabet.copy(*al);
|
341
|
+
|
342
|
+
na->deterministic = deterministic;
|
343
|
+
na->minimised = minimised;
|
344
|
+
na->root_node()->set_final(root_node()->is_final());
|
345
|
+
incr_vmark();
|
346
|
+
|
347
|
+
root_node()->set_forward(na->root_node());
|
348
|
+
root_node()->was_visited(vmark);
|
349
|
+
|
350
|
+
for( ArcsIter p(root_node()->arcs()); p; p++ ) {
|
351
|
+
Arc *arc=p;
|
352
|
+
Node *target_node=copy_nodes(arc->target_node(), na, lswitch, recode);
|
353
|
+
Label l = recode_label(arc->label(), lswitch, recode, na->alphabet);
|
354
|
+
na->root_node()->add_arc( l, target_node, na);
|
355
|
+
}
|
356
|
+
|
357
|
+
return *na;
|
358
|
+
}
|
359
|
+
|
360
|
+
|
361
|
+
/*******************************************************************/
|
362
|
+
/* */
|
363
|
+
/* Transducer::operator | */
|
364
|
+
/* */
|
365
|
+
/*******************************************************************/
|
366
|
+
|
367
|
+
Transducer &Transducer::operator|( Transducer &a )
|
368
|
+
|
369
|
+
{
|
370
|
+
Transducer *na = new Transducer();
|
371
|
+
na->alphabet.copy(alphabet);
|
372
|
+
na->alphabet.copy(a.alphabet);
|
373
|
+
|
374
|
+
incr_vmark();
|
375
|
+
na->root_node()->add_arc( Label(), copy_nodes(root_node(), na), na);
|
376
|
+
a.incr_vmark();
|
377
|
+
na->root_node()->add_arc( Label(), a.copy_nodes(a.root_node(), na), na);
|
378
|
+
|
379
|
+
return *na;
|
380
|
+
}
|
381
|
+
|
382
|
+
|
383
|
+
/*******************************************************************/
|
384
|
+
/* */
|
385
|
+
/* Transducer::rec_cat_nodes */
|
386
|
+
/* */
|
387
|
+
/*******************************************************************/
|
388
|
+
|
389
|
+
void Transducer::rec_cat_nodes( Node *node, Node *node2 )
|
390
|
+
|
391
|
+
{
|
392
|
+
if (!node->was_visited( vmark )) {
|
393
|
+
|
394
|
+
// iterate over all outgoing arcs of node
|
395
|
+
for( ArcsIter p(node->arcs()); p; p++ ) {
|
396
|
+
Arc *arc=p;
|
397
|
+
rec_cat_nodes( arc->target_node(), node2 );
|
398
|
+
}
|
399
|
+
|
400
|
+
if (node->is_final()) {
|
401
|
+
// link this node to node2
|
402
|
+
node->set_final(0);
|
403
|
+
node->add_arc( Label(), node2, this );
|
404
|
+
}
|
405
|
+
}
|
406
|
+
}
|
407
|
+
|
408
|
+
|
409
|
+
/*******************************************************************/
|
410
|
+
/* */
|
411
|
+
/* Transducer::operator+ */
|
412
|
+
/* */
|
413
|
+
/*******************************************************************/
|
414
|
+
|
415
|
+
Transducer &Transducer::operator+( Transducer &a )
|
416
|
+
|
417
|
+
{
|
418
|
+
Transducer *na = new Transducer();
|
419
|
+
na->alphabet.copy(alphabet);
|
420
|
+
na->alphabet.copy(a.alphabet);
|
421
|
+
|
422
|
+
// copy Transducer1 to the new Transducer
|
423
|
+
incr_vmark();
|
424
|
+
Node *node=copy_nodes(root_node(), na);
|
425
|
+
na->root_node()->add_arc( Label(), node, na);
|
426
|
+
|
427
|
+
// copy Transducer2 to the new Transducer
|
428
|
+
a.incr_vmark();
|
429
|
+
node=a.copy_nodes(a.root_node(), na);
|
430
|
+
|
431
|
+
// catenate the two automata
|
432
|
+
na->incr_vmark();
|
433
|
+
na->rec_cat_nodes(na->root_node(), node);
|
434
|
+
|
435
|
+
return *na;
|
436
|
+
}
|
437
|
+
|
438
|
+
|
439
|
+
/*******************************************************************/
|
440
|
+
/* */
|
441
|
+
/* Transducer::kleene_star */
|
442
|
+
/* */
|
443
|
+
/*******************************************************************/
|
444
|
+
|
445
|
+
Transducer &Transducer::kleene_star()
|
446
|
+
|
447
|
+
{
|
448
|
+
Transducer *na = ©();
|
449
|
+
na->alphabet.copy(alphabet);
|
450
|
+
|
451
|
+
// link back to the start node
|
452
|
+
na->incr_vmark();
|
453
|
+
na->rec_cat_nodes(na->root_node(), na->root_node());
|
454
|
+
na->root_node()->set_final(1);
|
455
|
+
|
456
|
+
na->deterministic = na->minimised = false;
|
457
|
+
|
458
|
+
return *na;
|
459
|
+
}
|
460
|
+
|
461
|
+
|
462
|
+
/*******************************************************************/
|
463
|
+
/* */
|
464
|
+
/* Transducer::negate_nodes */
|
465
|
+
/* */
|
466
|
+
/*******************************************************************/
|
467
|
+
|
468
|
+
void Transducer::negate_nodes( Node *node, Node *accept )
|
469
|
+
|
470
|
+
{
|
471
|
+
if (!node->was_visited(vmark)) {
|
472
|
+
node->set_final( !node->is_final() );
|
473
|
+
|
474
|
+
for( ArcsIter p(node->arcs()); p; p++ ) {
|
475
|
+
Arc *arc=p;
|
476
|
+
negate_nodes( arc->target_node(), accept );
|
477
|
+
}
|
478
|
+
|
479
|
+
for( Alphabet::iterator it=alphabet.begin(); it!=alphabet.end(); it++)
|
480
|
+
if (!node->target_node(*it))
|
481
|
+
node->add_arc( *it, accept, this );
|
482
|
+
}
|
483
|
+
}
|
484
|
+
|
485
|
+
|
486
|
+
/*******************************************************************/
|
487
|
+
/* */
|
488
|
+
/* Transducer::productive_node */
|
489
|
+
/* */
|
490
|
+
/*******************************************************************/
|
491
|
+
|
492
|
+
bool Transducer::productive_node( Node *node )
|
493
|
+
|
494
|
+
{
|
495
|
+
if (node->was_visited(vmark))
|
496
|
+
return (node->forward() != NULL);
|
497
|
+
|
498
|
+
bool productive;
|
499
|
+
if (node->is_final()) {
|
500
|
+
productive = true;
|
501
|
+
node->set_forward( node );
|
502
|
+
}
|
503
|
+
else {
|
504
|
+
productive = false;
|
505
|
+
node->set_forward( NULL );
|
506
|
+
}
|
507
|
+
|
508
|
+
for( ArcsIter p(node->arcs()); p; p++ ) {
|
509
|
+
Arc *arc=p;
|
510
|
+
if (productive_node( arc->target_node() ))
|
511
|
+
productive = true;
|
512
|
+
}
|
513
|
+
|
514
|
+
if (productive)
|
515
|
+
// use forwardp to indicate whether the node is productive
|
516
|
+
node->set_forward(node);
|
517
|
+
return productive;
|
518
|
+
}
|
519
|
+
|
520
|
+
|
521
|
+
/*******************************************************************/
|
522
|
+
/* */
|
523
|
+
/* Transducer::prune_nodes */
|
524
|
+
/* */
|
525
|
+
/*******************************************************************/
|
526
|
+
|
527
|
+
bool Transducer::prune_nodes( Node *node )
|
528
|
+
|
529
|
+
{
|
530
|
+
if (!node->was_visited(vmark)) {
|
531
|
+
for( ArcsIter p(node->arcs()); p; p++ ) {
|
532
|
+
Arc *arc=p;
|
533
|
+
if (prune_nodes( arc->target_node() ))
|
534
|
+
node->arcs()->remove_arc(arc);
|
535
|
+
}
|
536
|
+
if (!node->arcs()->is_empty())
|
537
|
+
node->set_forward(node);
|
538
|
+
}
|
539
|
+
return (node->forward() == NULL);
|
540
|
+
}
|
541
|
+
|
542
|
+
|
543
|
+
/*******************************************************************/
|
544
|
+
/* */
|
545
|
+
/* Transducer::prune */
|
546
|
+
/* */
|
547
|
+
/*******************************************************************/
|
548
|
+
|
549
|
+
void Transducer::prune()
|
550
|
+
|
551
|
+
{
|
552
|
+
incr_vmark();
|
553
|
+
productive_node( root_node() );
|
554
|
+
incr_vmark();
|
555
|
+
prune_nodes( root_node() );
|
556
|
+
}
|
557
|
+
|
558
|
+
|
559
|
+
/*******************************************************************/
|
560
|
+
/* */
|
561
|
+
/* Transducer::operator! */
|
562
|
+
/* */
|
563
|
+
/*******************************************************************/
|
564
|
+
|
565
|
+
Transducer &Transducer::operator!()
|
566
|
+
|
567
|
+
{
|
568
|
+
Transducer *na;
|
569
|
+
|
570
|
+
if (alphabet.size() == 0)
|
571
|
+
throw "Negation of Transducer with undefined alphabet attempted!";
|
572
|
+
|
573
|
+
if (minimised)
|
574
|
+
na = ©();
|
575
|
+
else
|
576
|
+
na = &minimise();
|
577
|
+
na->alphabet.copy(alphabet);
|
578
|
+
|
579
|
+
Node *accept_node=na->new_node();
|
580
|
+
accept_node->set_final(1);
|
581
|
+
for( Alphabet::iterator it=alphabet.begin(); it!=alphabet.end(); it++)
|
582
|
+
accept_node->add_arc( *it, accept_node, na );
|
583
|
+
|
584
|
+
na->incr_vmark();
|
585
|
+
na->negate_nodes( na->root_node(), accept_node );
|
586
|
+
//na->prune();
|
587
|
+
na->minimised = na->deterministic = false;
|
588
|
+
|
589
|
+
return *na;
|
590
|
+
}
|
591
|
+
|
592
|
+
|
593
|
+
/*******************************************************************/
|
594
|
+
/* */
|
595
|
+
/* conjoin_nodes */
|
596
|
+
/* */
|
597
|
+
/*******************************************************************/
|
598
|
+
|
599
|
+
static void conjoin_nodes( Node *n1, Node *n2, Node *node,
|
600
|
+
Transducer *a, PairMapping &map )
|
601
|
+
|
602
|
+
{
|
603
|
+
// if both input nodes are final, so is the new one
|
604
|
+
if (n1->is_final() && n2->is_final())
|
605
|
+
node->set_final(1);
|
606
|
+
|
607
|
+
// iterate over all outgoing arcs of the first node
|
608
|
+
for( ArcsIter i(n1->arcs()); i; i++ ) {
|
609
|
+
Arc *arc=i;
|
610
|
+
Label l=arc->label();
|
611
|
+
Node *t1 = arc->target_node();
|
612
|
+
Node *t2 = n2->target_node(l);
|
613
|
+
|
614
|
+
// Does the second node have an outgoing arc with the same label?
|
615
|
+
if (t2) {
|
616
|
+
// Check whether this node pair has been encountered before
|
617
|
+
PairMapping::iterator it=map.find(t1, t2);
|
618
|
+
|
619
|
+
if (it == map.end()) {
|
620
|
+
// new node pair
|
621
|
+
// create a new node in the conjunction Transducer
|
622
|
+
Node *target_node = a->new_node();
|
623
|
+
// map the target node pair to the new node
|
624
|
+
map[pair<Node*,Node*>(t1,t2)] = target_node;
|
625
|
+
// add an arc to the new node
|
626
|
+
node->add_arc( l, target_node, a );
|
627
|
+
// recursion
|
628
|
+
conjoin_nodes( t1, t2, target_node, a, map );
|
629
|
+
}
|
630
|
+
else {
|
631
|
+
// add an arc to the already existing target node
|
632
|
+
node->add_arc( l, it->second, a );
|
633
|
+
}
|
634
|
+
}
|
635
|
+
}
|
636
|
+
}
|
637
|
+
|
638
|
+
|
639
|
+
/*******************************************************************/
|
640
|
+
/* */
|
641
|
+
/* Transducer::operator & */
|
642
|
+
/* */
|
643
|
+
/*******************************************************************/
|
644
|
+
|
645
|
+
Transducer &Transducer::operator&( Transducer &a )
|
646
|
+
|
647
|
+
{
|
648
|
+
Transducer *tmp1=NULL;
|
649
|
+
Transducer *tmp2=NULL;
|
650
|
+
Node *r1, *r2;
|
651
|
+
|
652
|
+
if (deterministic)
|
653
|
+
r1 = root_node();
|
654
|
+
else {
|
655
|
+
tmp1 = &determinise();
|
656
|
+
r1 = tmp1->root_node();
|
657
|
+
}
|
658
|
+
|
659
|
+
if (a.deterministic)
|
660
|
+
r2 = a.root_node();
|
661
|
+
else {
|
662
|
+
tmp2 = &a.determinise();
|
663
|
+
r2 = tmp2->root_node();
|
664
|
+
}
|
665
|
+
|
666
|
+
PairMapping map;
|
667
|
+
|
668
|
+
Transducer *na = new Transducer();
|
669
|
+
na->alphabet.copy(alphabet);
|
670
|
+
na->alphabet.copy(a.alphabet);
|
671
|
+
|
672
|
+
// map the two root nodes to the new root node
|
673
|
+
map[pair<Node*,Node*>(r1, r2)] = na->root_node();
|
674
|
+
|
675
|
+
// recursively conjoin the two automata
|
676
|
+
conjoin_nodes( r1, r2, na->root_node(), na, map);
|
677
|
+
|
678
|
+
na->deterministic = 1;
|
679
|
+
delete tmp1;
|
680
|
+
delete tmp2;
|
681
|
+
|
682
|
+
return *na;
|
683
|
+
}
|
684
|
+
|
685
|
+
|
686
|
+
/*******************************************************************/
|
687
|
+
/* */
|
688
|
+
/* add_composed_node */
|
689
|
+
/* */
|
690
|
+
/*******************************************************************/
|
691
|
+
|
692
|
+
static void add_composed_node( Label l, Node *n1, Node *n2, Node *node,
|
693
|
+
Transducer *a, PairMapping &map )
|
694
|
+
|
695
|
+
{
|
696
|
+
// Check whether this node pair has been encountered before
|
697
|
+
PairMapping::iterator it=map.find(n1, n2);
|
698
|
+
|
699
|
+
if (it != map.end()) {
|
700
|
+
// add an arc to the already existing target node
|
701
|
+
node->add_arc( l, it->second, a );
|
702
|
+
return;
|
703
|
+
}
|
704
|
+
|
705
|
+
// create a new node in the composed Transducer
|
706
|
+
Node *target_node = a->new_node();
|
707
|
+
|
708
|
+
// map the target node pair to the new node
|
709
|
+
map[pair<Node*,Node*>(n1,n2)] = target_node;
|
710
|
+
|
711
|
+
// add an arc to the new node
|
712
|
+
node->add_arc( l, target_node, a );
|
713
|
+
|
714
|
+
// recursion
|
715
|
+
compose_nodes( n1, n2, target_node, a, map );
|
716
|
+
}
|
717
|
+
|
718
|
+
|
719
|
+
/*******************************************************************/
|
720
|
+
/* */
|
721
|
+
/* compose_nodes */
|
722
|
+
/* */
|
723
|
+
/*******************************************************************/
|
724
|
+
|
725
|
+
static void compose_nodes( Node *n1, Node *n2, Node *node,
|
726
|
+
Transducer *a, PairMapping &map )
|
727
|
+
{
|
728
|
+
// if both input nodes are final, so is the new one
|
729
|
+
if (n1->is_final() && n2->is_final())
|
730
|
+
node->set_final(1);
|
731
|
+
|
732
|
+
// iterate over all outgoing arcs of the first node
|
733
|
+
for( ArcsIter i(n1->arcs()); i; i++ ) {
|
734
|
+
Arc *arc1=i;
|
735
|
+
Node *t1 = arc1->target_node();
|
736
|
+
Label l1=arc1->label();
|
737
|
+
Character uc1=l1.upper_char();
|
738
|
+
Character lc1=l1.lower_char();
|
739
|
+
|
740
|
+
if (uc1 == Label::epsilon)
|
741
|
+
add_composed_node( l1, t1, n2, node, a, map );
|
742
|
+
|
743
|
+
else {
|
744
|
+
for( ArcsIter k(n2->arcs()); k; k++ ) {
|
745
|
+
Arc *arc2=k;
|
746
|
+
Node *t2 = arc2->target_node();
|
747
|
+
Label l2=arc2->label();
|
748
|
+
Character lc2=l2.lower_char();
|
749
|
+
Character uc2=l2.upper_char();
|
750
|
+
|
751
|
+
if (uc1 == lc2)
|
752
|
+
add_composed_node( Label(lc1,uc2), t1, t2, node, a, map );
|
753
|
+
}
|
754
|
+
}
|
755
|
+
}
|
756
|
+
|
757
|
+
// epsilon input characters of the second Transducer
|
758
|
+
for( ArcsIter i(n2->arcs()); i; i++ ) {
|
759
|
+
Arc *arc=i;
|
760
|
+
Node *t = arc->target_node();
|
761
|
+
Label l=arc->label();
|
762
|
+
Character lc=l.lower_char();
|
763
|
+
|
764
|
+
if (lc == Label::epsilon)
|
765
|
+
add_composed_node( l, n1, t, node, a, map );
|
766
|
+
}
|
767
|
+
}
|
768
|
+
|
769
|
+
|
770
|
+
/*******************************************************************/
|
771
|
+
/* */
|
772
|
+
/* Transducer::operator || */
|
773
|
+
/* */
|
774
|
+
/*******************************************************************/
|
775
|
+
|
776
|
+
Transducer &Transducer::operator||( Transducer &a )
|
777
|
+
|
778
|
+
{
|
779
|
+
PairMapping map;
|
780
|
+
|
781
|
+
Transducer *na = new Transducer();
|
782
|
+
na->alphabet.compose(alphabet, a.alphabet);
|
783
|
+
|
784
|
+
// map the two root nodes to the new root node
|
785
|
+
map[pair<Node*,Node*>(root_node(), a.root_node())] = na->root_node();
|
786
|
+
|
787
|
+
// recursively compose the two automata
|
788
|
+
compose_nodes( root_node(), a.root_node(), na->root_node(), na, map );
|
789
|
+
|
790
|
+
return *na;
|
791
|
+
}
|
792
|
+
|
793
|
+
|
794
|
+
|
795
|
+
/*******************************************************************/
|
796
|
+
/* */
|
797
|
+
/* Transducer::operator / */
|
798
|
+
/* */
|
799
|
+
/*******************************************************************/
|
800
|
+
|
801
|
+
Transducer &Transducer::operator/( Transducer &a )
|
802
|
+
|
803
|
+
{
|
804
|
+
complete_alphabet();
|
805
|
+
a.alphabet.copy(alphabet);
|
806
|
+
Transducer *a1 = &(!a);
|
807
|
+
Transducer *a2 = &(*this & *a1);
|
808
|
+
delete a1;
|
809
|
+
return *a2;
|
810
|
+
}
|
811
|
+
|
812
|
+
|
813
|
+
/*******************************************************************/
|
814
|
+
/* */
|
815
|
+
/* Transducer::compare_nodes */
|
816
|
+
/* */
|
817
|
+
/*******************************************************************/
|
818
|
+
|
819
|
+
bool Transducer::compare_nodes( Node *node, Node *node2, Transducer &a2 )
|
820
|
+
|
821
|
+
{
|
822
|
+
if (node->was_visited( vmark )) {
|
823
|
+
if (node2->was_visited( a2.vmark ))
|
824
|
+
return (node->forward() == node2 && node2->forward() == node);
|
825
|
+
else
|
826
|
+
return false;
|
827
|
+
}
|
828
|
+
else if (node2->was_visited( a2.vmark ))
|
829
|
+
return false;
|
830
|
+
|
831
|
+
node->set_forward( node2 );
|
832
|
+
node2->set_forward( node );
|
833
|
+
|
834
|
+
if (node->is_final() != node2->is_final())
|
835
|
+
return false;
|
836
|
+
|
837
|
+
// iterate over all outgoing arcs
|
838
|
+
for( ArcsIter p(node->arcs()); p; p++ ) {
|
839
|
+
Arc *arc=p;
|
840
|
+
Node *t2=node2->target_node(arc->label());
|
841
|
+
|
842
|
+
if (t2 == NULL)
|
843
|
+
return false;
|
844
|
+
else if (!compare_nodes(arc->target_node(), t2, a2))
|
845
|
+
return false;
|
846
|
+
}
|
847
|
+
for( ArcsIter p(node2->arcs()); p; p++ ) {
|
848
|
+
Arc *arc=p;
|
849
|
+
if (node->target_node(arc->label()) == NULL)
|
850
|
+
return false;
|
851
|
+
}
|
852
|
+
|
853
|
+
return true;
|
854
|
+
}
|
855
|
+
|
856
|
+
|
857
|
+
/*******************************************************************/
|
858
|
+
/* */
|
859
|
+
/* Transducer::operator == */
|
860
|
+
/* */
|
861
|
+
/*******************************************************************/
|
862
|
+
|
863
|
+
bool Transducer::operator==( Transducer &a )
|
864
|
+
|
865
|
+
{
|
866
|
+
Transducer *p1 = (minimised)? this: &minimise();
|
867
|
+
Transducer *p2 = (a.minimised)? &a: &a.minimise();
|
868
|
+
|
869
|
+
p1->incr_vmark();
|
870
|
+
p2->incr_vmark();
|
871
|
+
bool result = p1->compare_nodes(p1->root_node(), p2->root_node(), *p2 );
|
872
|
+
|
873
|
+
if (p1 != this) delete p1;
|
874
|
+
if (p2 != &a) delete p2;
|
875
|
+
|
876
|
+
return result;
|
877
|
+
}
|
878
|
+
|
879
|
+
|
880
|
+
|
881
|
+
/*******************************************************************/
|
882
|
+
/* */
|
883
|
+
/* Transducer::map_nodes */
|
884
|
+
/* */
|
885
|
+
/*******************************************************************/
|
886
|
+
|
887
|
+
void Transducer::map_nodes( Node *node, Node *node2, Transducer *a, Level level)
|
888
|
+
|
889
|
+
{
|
890
|
+
if (!node->was_visited(vmark)) {
|
891
|
+
|
892
|
+
node->set_forward(node2);
|
893
|
+
|
894
|
+
// define final nodes
|
895
|
+
if (node->is_final())
|
896
|
+
node2->set_final(1);
|
897
|
+
|
898
|
+
// iterate over all outgoing arcs of node
|
899
|
+
for( ArcsIter p(node->arcs()); p; p++ ) {
|
900
|
+
Arc *arc=p;
|
901
|
+
Label l(arc->label().get_char(level));
|
902
|
+
Node *t2=NULL, *t=arc->target_node();
|
903
|
+
|
904
|
+
if (t->check_visited(vmark))
|
905
|
+
t2 = t->forward();
|
906
|
+
else
|
907
|
+
t2 = a->new_node(); // create a new node
|
908
|
+
|
909
|
+
node2->add_arc(l, t2, a); // add a link to the node
|
910
|
+
|
911
|
+
map_nodes( t, t2, a, level );
|
912
|
+
}
|
913
|
+
}
|
914
|
+
}
|
915
|
+
|
916
|
+
|
917
|
+
/*******************************************************************/
|
918
|
+
/* */
|
919
|
+
/* Transducer::level */
|
920
|
+
/* */
|
921
|
+
/*******************************************************************/
|
922
|
+
|
923
|
+
Transducer &Transducer::level( Level level )
|
924
|
+
|
925
|
+
{
|
926
|
+
Transducer *na = new Transducer();
|
927
|
+
|
928
|
+
for( Alphabet::iterator it=alphabet.begin(); it!=alphabet.end(); it++ ) {
|
929
|
+
Character c = it->get_char(level);
|
930
|
+
if (alphabet.code2symbol(c) != NULL)
|
931
|
+
na->alphabet.add_symbol( alphabet.code2symbol(c), c );
|
932
|
+
na->alphabet.insert(Label(c));
|
933
|
+
}
|
934
|
+
|
935
|
+
incr_vmark();
|
936
|
+
map_nodes(root_node(), na->root_node(), na, level );
|
937
|
+
|
938
|
+
return *na;
|
939
|
+
}
|
940
|
+
|
941
|
+
|
942
|
+
/*******************************************************************/
|
943
|
+
/* */
|
944
|
+
/* Transducer::freely_insert_at_node */
|
945
|
+
/* */
|
946
|
+
/*******************************************************************/
|
947
|
+
|
948
|
+
void Transducer::freely_insert_at_node( Node *node, Label l )
|
949
|
+
|
950
|
+
{
|
951
|
+
if (!node->was_visited(vmark)) {
|
952
|
+
node->add_arc(l, node, this); // add a recursive link labelled with l
|
953
|
+
|
954
|
+
// iterate over all outgoing arcs of node
|
955
|
+
for( ArcsIter p(node->arcs()); p; p++ ) {
|
956
|
+
Arc *arc=p;
|
957
|
+
freely_insert_at_node(arc->target_node(), l );
|
958
|
+
}
|
959
|
+
}
|
960
|
+
}
|
961
|
+
|
962
|
+
|
963
|
+
/*******************************************************************/
|
964
|
+
/* */
|
965
|
+
/* Transducer::freely_insert */
|
966
|
+
/* */
|
967
|
+
/*******************************************************************/
|
968
|
+
|
969
|
+
Transducer &Transducer::freely_insert( Label l )
|
970
|
+
|
971
|
+
{
|
972
|
+
Transducer *na = ©();
|
973
|
+
|
974
|
+
na->incr_vmark();
|
975
|
+
na->freely_insert_at_node(na->root_node(), l );
|
976
|
+
|
977
|
+
return *na;
|
978
|
+
}
|
979
|
+
|
980
|
+
|
981
|
+
/*******************************************************************/
|
982
|
+
/* */
|
983
|
+
/* Transducer::splice_arc */
|
984
|
+
/* */
|
985
|
+
/*******************************************************************/
|
986
|
+
|
987
|
+
void Transducer::splice_arc( Node *node, Node *node2, Node *next_node,
|
988
|
+
Transducer *a )
|
989
|
+
{
|
990
|
+
if (node->is_final()) {
|
991
|
+
// link final node to the next node
|
992
|
+
node2->add_arc( Label(), next_node, a );
|
993
|
+
return;
|
994
|
+
}
|
995
|
+
|
996
|
+
// iterate over the outgoing arcs
|
997
|
+
for( ArcsIter p(node->arcs()); p; p++ ) {
|
998
|
+
Arc *arc=p;
|
999
|
+
Node *tn=a->new_node();
|
1000
|
+
|
1001
|
+
node2->add_arc( arc->label(), tn, a );
|
1002
|
+
splice_arc( arc->target_node(), tn, next_node, a );
|
1003
|
+
}
|
1004
|
+
}
|
1005
|
+
|
1006
|
+
|
1007
|
+
/*******************************************************************/
|
1008
|
+
/* */
|
1009
|
+
/* Transducer::splice_nodes */
|
1010
|
+
/* */
|
1011
|
+
/*******************************************************************/
|
1012
|
+
|
1013
|
+
void Transducer::splice_nodes(Node *node, Node *node2, Label sl,
|
1014
|
+
Transducer *sa, Transducer *a)
|
1015
|
+
{
|
1016
|
+
if (!node->was_visited(vmark)) {
|
1017
|
+
|
1018
|
+
node->set_forward(node2);
|
1019
|
+
|
1020
|
+
// define final nodes
|
1021
|
+
if (node->is_final())
|
1022
|
+
node2->set_final(1);
|
1023
|
+
|
1024
|
+
// iterate over all outgoing arcs of node
|
1025
|
+
for( ArcsIter p(node->arcs()); p; p++ ) {
|
1026
|
+
Arc *arc=p;
|
1027
|
+
Node *t2=NULL, *t=arc->target_node();
|
1028
|
+
|
1029
|
+
if (t->check_visited(vmark))
|
1030
|
+
t2 = t->forward();
|
1031
|
+
else
|
1032
|
+
t2 = a->new_node(); // create a new node
|
1033
|
+
|
1034
|
+
if (arc->label() == sl)
|
1035
|
+
// insert the transducer
|
1036
|
+
splice_arc(sa->root_node(), node2, t2, a);
|
1037
|
+
else
|
1038
|
+
// add a link to the node
|
1039
|
+
node2->add_arc(arc->label(), t2, a);
|
1040
|
+
|
1041
|
+
splice_nodes( t, t2, sl, sa, a );
|
1042
|
+
}
|
1043
|
+
}
|
1044
|
+
}
|
1045
|
+
|
1046
|
+
|
1047
|
+
/*******************************************************************/
|
1048
|
+
/* */
|
1049
|
+
/* Transducer::splice */
|
1050
|
+
/* */
|
1051
|
+
/*******************************************************************/
|
1052
|
+
|
1053
|
+
Transducer &Transducer::splice( Label sl, Transducer *sa )
|
1054
|
+
|
1055
|
+
{
|
1056
|
+
Alphabet::iterator it;
|
1057
|
+
|
1058
|
+
Transducer *na = new Transducer();
|
1059
|
+
|
1060
|
+
for( it=alphabet.begin(); it!=alphabet.end(); it++ ) {
|
1061
|
+
Label l = *it;
|
1062
|
+
if (l != sl)
|
1063
|
+
na->alphabet.insert(l);
|
1064
|
+
}
|
1065
|
+
for( it=sa->alphabet.begin(); it!=sa->alphabet.end(); it++ )
|
1066
|
+
na->alphabet.insert(*it);
|
1067
|
+
|
1068
|
+
incr_vmark();
|
1069
|
+
splice_nodes(root_node(), na->root_node(), sl, sa, na );
|
1070
|
+
|
1071
|
+
return *na;
|
1072
|
+
}
|
1073
|
+
|
1074
|
+
|
1075
|
+
/*******************************************************************/
|
1076
|
+
/* */
|
1077
|
+
/* Transducer::replace_char */
|
1078
|
+
/* */
|
1079
|
+
/*******************************************************************/
|
1080
|
+
|
1081
|
+
Transducer &Transducer::replace_char( Character c, Character nc )
|
1082
|
+
|
1083
|
+
{
|
1084
|
+
Alphabet::iterator it;
|
1085
|
+
|
1086
|
+
Transducer *na = new Transducer();
|
1087
|
+
|
1088
|
+
for( it=alphabet.begin(); it!=alphabet.end(); it++ ) {
|
1089
|
+
Label l = *it;
|
1090
|
+
na->alphabet.insert(l.replace_char(c,nc));
|
1091
|
+
}
|
1092
|
+
|
1093
|
+
incr_vmark();
|
1094
|
+
replace_char2(root_node(), na->root_node(), c, nc, na );
|
1095
|
+
|
1096
|
+
return *na;
|
1097
|
+
}
|
1098
|
+
|
1099
|
+
|
1100
|
+
/*******************************************************************/
|
1101
|
+
/* */
|
1102
|
+
/* Transducer::replace_char2 */
|
1103
|
+
/* */
|
1104
|
+
/*******************************************************************/
|
1105
|
+
|
1106
|
+
void Transducer::replace_char2(Node *node, Node *node2, Character c,
|
1107
|
+
Character nc, Transducer *a)
|
1108
|
+
{
|
1109
|
+
if (!node->was_visited(vmark)) {
|
1110
|
+
|
1111
|
+
node->set_forward(node2);
|
1112
|
+
|
1113
|
+
// define final nodes
|
1114
|
+
if (node->is_final())
|
1115
|
+
node2->set_final(1);
|
1116
|
+
|
1117
|
+
// iterate over all outgoing arcs of node
|
1118
|
+
for( ArcsIter p(node->arcs()); p; p++ ) {
|
1119
|
+
Arc *arc=p;
|
1120
|
+
Node *t2=NULL, *t=arc->target_node();
|
1121
|
+
|
1122
|
+
if (t->check_visited(vmark))
|
1123
|
+
t2 = t->forward();
|
1124
|
+
else
|
1125
|
+
t2 = a->new_node(); // create a new node
|
1126
|
+
|
1127
|
+
node2->add_arc(arc->label().replace_char(c, nc), t2, a);
|
1128
|
+
replace_char2( t, t2, c, nc, a );
|
1129
|
+
}
|
1130
|
+
}
|
1131
|
+
}
|