ruby-sfst 0.4.3 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +1 -0
  3. data/COPYING +280 -0
  4. data/Gemfile +3 -0
  5. data/Gemfile.lock +54 -0
  6. data/README.md +1 -1
  7. data/Rakefile +9 -18
  8. data/bin/console +7 -0
  9. data/bin/setup +6 -0
  10. data/ext/sfst/alphabet.cc +879 -0
  11. data/ext/sfst/alphabet.h +302 -0
  12. data/ext/sfst/basic.cc +85 -0
  13. data/ext/{sfst_machine → sfst}/basic.h +7 -4
  14. data/ext/sfst/compact.cc +629 -0
  15. data/ext/sfst/compact.h +100 -0
  16. data/ext/sfst/determinise.cc +279 -0
  17. data/ext/{sfst_machine → sfst}/extconf.rb +2 -1
  18. data/ext/sfst/fst.cc +1150 -0
  19. data/ext/sfst/fst.h +374 -0
  20. data/ext/sfst/hopcroft.cc +681 -0
  21. data/ext/sfst/interface.cc +1921 -0
  22. data/ext/sfst/interface.h +171 -0
  23. data/ext/sfst/make-compact.cc +323 -0
  24. data/ext/{sfst_machine → sfst}/make-compact.h +15 -13
  25. data/ext/sfst/mem.h +80 -0
  26. data/ext/sfst/operators.cc +1273 -0
  27. data/ext/{sfst_machine → sfst}/sfst_machine.cc +89 -78
  28. data/ext/sfst/sgi.h +72 -0
  29. data/ext/sfst/utf8.cc +149 -0
  30. data/ext/{sfst_machine → sfst}/utf8.h +7 -4
  31. data/lib/sfst.rb +2 -1
  32. data/lib/sfst/version.rb +1 -1
  33. data/ruby-sfst.gemspec +23 -23
  34. metadata +107 -35
  35. data/ext/sfst_machine/alphabet.cc +0 -812
  36. data/ext/sfst_machine/alphabet.h +0 -273
  37. data/ext/sfst_machine/basic.cc +0 -84
  38. data/ext/sfst_machine/compact.cc +0 -616
  39. data/ext/sfst_machine/compact.h +0 -98
  40. data/ext/sfst_machine/determinise.cc +0 -303
  41. data/ext/sfst_machine/fst.cc +0 -1000
  42. data/ext/sfst_machine/fst.h +0 -369
  43. data/ext/sfst_machine/interface.cc +0 -1842
  44. data/ext/sfst_machine/interface.h +0 -93
  45. data/ext/sfst_machine/make-compact.cc +0 -327
  46. data/ext/sfst_machine/mem.h +0 -74
  47. data/ext/sfst_machine/operators.cc +0 -1131
  48. data/ext/sfst_machine/sgi.h +0 -44
  49. data/ext/sfst_machine/utf8.cc +0 -146
  50. data/test/test_sfst.fst +0 -3
  51. data/test/test_sfst.rb +0 -114
@@ -13,22 +13,24 @@
13
13
  #include "fst.h"
14
14
  #include "compact.h"
15
15
 
16
+ namespace SFST {
16
17
 
17
- class MakeCompactTransducer : CompactTransducer {
18
+ class MakeCompactTransducer : CompactTransducer {
18
19
 
19
- private:
20
- void count_arcs(Node *node, NodeNumbering &index, long vmark);
21
- void store_arcs(Node *node, NodeNumbering &index, long vmark);
22
- void store_finalp( FILE *file );
23
- void store_first_arcs( FILE *file );
24
- void store_target_nodes( FILE *file );
25
- void store_labels( FILE *file );
20
+ private:
21
+ void count_arcs(Node *node, VType vmark);
22
+ void store_arcs(Node *node, VType vmark);
23
+ void store_finalp( FILE *file );
24
+ void store_first_arcs( FILE *file );
25
+ void store_target_nodes( FILE *file );
26
+ void store_labels( FILE *file );
26
27
 
27
- public:
28
- MakeCompactTransducer( Transducer &a, Level sort=upper );
28
+ public:
29
+ MakeCompactTransducer( Transducer &a, Level sort=upper );
29
30
 
30
- void sort( Level );
31
- void store( FILE *file );
32
- };
31
+ void sort( Level );
32
+ void store( FILE *file );
33
+ };
33
34
 
35
+ }
34
36
  #endif
@@ -0,0 +1,80 @@
1
+ /*******************************************************************/
2
+ /* */
3
+ /* FILE mem.h */
4
+ /* MODULE mem */
5
+ /* PROGRAM SFST */
6
+ /* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
7
+ /* */
8
+ /* PURPOSE memory management functions */
9
+ /* */
10
+ /*******************************************************************/
11
+
12
+ #ifndef _MEM_H_
13
+ #define _MEM_H_
14
+
15
+ #include <stdlib.h>
16
+ #include <assert.h>
17
+
18
+ namespace SFST {
19
+
20
+ #define MEMBUFFER_SIZE 100000
21
+
22
+
23
+ /***************** class Mem *************************************/
24
+
25
+ class Mem {
26
+
27
+ private:
28
+
29
+ struct MemBuffer {
30
+ char buffer[MEMBUFFER_SIZE];
31
+ struct MemBuffer *next;
32
+ };
33
+
34
+ MemBuffer *first_buffer;
35
+ long pos;
36
+ void add_buffer() {
37
+ MemBuffer *mb=(MemBuffer*)malloc(sizeof(MemBuffer));
38
+ if (mb == NULL)
39
+ throw "Allocation of memory failed in Mem::add_buffer!";
40
+ mb->next = first_buffer;
41
+ first_buffer = mb;
42
+ pos = 0;
43
+ }
44
+
45
+ public:
46
+ Mem() { first_buffer = NULL; add_buffer(); }
47
+ ~Mem() { clear(); }
48
+
49
+ void clear() {
50
+ while (first_buffer) {
51
+ MemBuffer *next = first_buffer->next;
52
+ free(first_buffer);
53
+ first_buffer = next;
54
+ }
55
+ pos = 0;
56
+ }
57
+
58
+ void *alloc( size_t n ) {
59
+ void *result;
60
+
61
+ /* do memory alignment to multiples of 4 */
62
+ if (n % 4)
63
+ n += 4 - (n % 4);
64
+
65
+ if (first_buffer == NULL || pos+n > MEMBUFFER_SIZE)
66
+ add_buffer();
67
+ if (pos+n > MEMBUFFER_SIZE)
68
+ throw "Allocation of memory block larger than MEMBUFFER_SIZE attempted!";
69
+
70
+ result = (void*)(first_buffer->buffer + pos);
71
+ pos += n;
72
+ return result;
73
+ }
74
+
75
+ //class MemError {};
76
+
77
+ };
78
+
79
+ }
80
+ #endif
@@ -0,0 +1,1273 @@
1
+
2
+ /*******************************************************************/
3
+ /* */
4
+ /* FILE operators.C */
5
+ /* MODULE operators */
6
+ /* PROGRAM SFST */
7
+ /* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
8
+ /* */
9
+ /*******************************************************************/
10
+
11
+
12
+ #include "fst.h"
13
+
14
+ using std::pair;
15
+ using std::cerr;
16
+
17
+ namespace SFST {
18
+
19
+ typedef map<Character, vector<Arc*> > Sym2Arcs;
20
+
21
+ // special data structures for the optimization of composition
22
+ // All transitions outgoing from the same node and having the same
23
+ // symbol on the upper (or lower) layer are stored in a hash table
24
+ // for quick retrieval
25
+
26
+ /***************** class FromTo *********************************/
27
+
28
+ class FromTo {
29
+ public:
30
+ Index from, to; // start and end of a range of transitions
31
+ Index size() { return to - from; }
32
+ };
33
+
34
+ /***************** class NodeSym ********************************/
35
+
36
+ class NodeSym {
37
+ // pair consisting of a node and a symbol
38
+ public:
39
+ Index nodeID;
40
+ Character symbol;
41
+ NodeSym( Index n, Character s ) { nodeID = n; symbol = s; }
42
+ };
43
+
44
+
45
+ /***************** class CharNode2Trans **************************/
46
+
47
+ class CharNode2Trans {
48
+
49
+ struct hashf {
50
+ size_t operator()(const NodeSym &ns) const {
51
+ return ns.nodeID ^ ns.symbol;
52
+ }
53
+ };
54
+
55
+ struct equalf {
56
+ int operator()(const NodeSym &ns1, const NodeSym &ns2) const {
57
+ return (ns1.nodeID == ns2.nodeID && ns1.symbol == ns2.symbol);
58
+ }
59
+ };
60
+
61
+ typedef hash_map<NodeSym, FromTo, hashf, equalf > NodeSym2Range;
62
+
63
+ // data structure for storing an index from node + symbol to a list
64
+ // of transitions with that symbol on the upper/lower layer
65
+ Transducer &transducer;
66
+ vector<Index> node_size;
67
+ vector<Arc*> cs_transitions; // transitions for a certain character + state
68
+ NodeSym2Range trange;
69
+
70
+ public:
71
+
72
+ CharNode2Trans(Transducer &t);
73
+ size_t hash_transitions( Node *node, bool upper );
74
+
75
+ class iterator {
76
+ CharNode2Trans &c2t;
77
+ Index current, end;
78
+ public:
79
+ iterator( CharNode2Trans &table, Index nodeID, Character symbol )
80
+ : c2t(table)
81
+ {
82
+ FromTo range=c2t.trange[NodeSym(nodeID, symbol)];
83
+ current = range.from;
84
+ end = range.to;
85
+ }
86
+ void operator++( int ) { current++; }
87
+ Arc *operator*() { return c2t.cs_transitions[current]; }
88
+ bool finished() { return current == end; }
89
+ Index size() { return end-current; };
90
+ };
91
+ };
92
+
93
+
94
+ static void compose_nodes( Node*, Node*, Node*, Transducer*, PairMapping&,
95
+ CharNode2Trans&, CharNode2Trans& );
96
+
97
+
98
+ /*******************************************************************/
99
+ /* */
100
+ /* CharNode2Trans::CharNode2Trans */
101
+ /* */
102
+ /*******************************************************************/
103
+
104
+ CharNode2Trans::CharNode2Trans(Transducer &t): transducer(t)
105
+
106
+ {
107
+ pair<Index,Index> p = transducer.nodeindexing();
108
+ Index node_count = p.first;
109
+ Index transition_count = p.second;
110
+ node_size.resize(node_count, undef);
111
+ cs_transitions.reserve(transition_count);
112
+ }
113
+
114
+
115
+ /*******************************************************************/
116
+ /* */
117
+ /* CharNode2Trans::hash_transitions */
118
+ /* */
119
+ /*******************************************************************/
120
+
121
+ size_t CharNode2Trans::hash_transitions( Node *node, bool upper )
122
+
123
+ {
124
+ size_t n = node_size[node->index];
125
+ if (n != undef)
126
+ return n;
127
+
128
+ Sym2Arcs sym2arcs;
129
+
130
+ for( ArcsIter p(node->arcs()); p; p++ ) {
131
+ Arc *arc=p;
132
+ if (upper)
133
+ sym2arcs[arc->label().upper_char()].push_back(arc);
134
+ else
135
+ sym2arcs[arc->label().lower_char()].push_back(arc);
136
+ }
137
+
138
+ for( Sym2Arcs::iterator it=sym2arcs.begin(); it!=sym2arcs.end(); it++ ) {
139
+ Character sym = it->first;
140
+ vector<Arc*> &arc = it->second;
141
+ FromTo range;
142
+ range.from = (Index)cs_transitions.size();
143
+ for( size_t i=0; i<arc.size(); i++ )
144
+ cs_transitions.push_back( arc[i] );
145
+ range.to = (Index)cs_transitions.size();
146
+ trange[NodeSym(node->index, sym)] = range;
147
+ }
148
+ n = sym2arcs.size();
149
+ node_size[node->index] = (Index)n;
150
+ return n;
151
+ }
152
+
153
+
154
+ /*******************************************************************/
155
+ /* */
156
+ /* check_cyclicity */
157
+ /* */
158
+ /*******************************************************************/
159
+
160
+ static bool check_cyclicity( Node *node, NodeHashSet &visited,
161
+ const Alphabet &alphabet)
162
+ {
163
+
164
+ if (!visited.insert(node).second)
165
+ return true; // node was visited before
166
+
167
+ for( ArcsIter p(node->arcs()); p; p++ ) {
168
+ Arc *arc=p;
169
+ if (arc->label().upper_is_epsilon())
170
+ if (check_cyclicity(arc->target_node(), visited, alphabet)) {
171
+ cerr << alphabet.write_label(arc->label()) << "\n";
172
+ return true;
173
+ }
174
+ }
175
+ visited.erase(node);
176
+ return false;
177
+ }
178
+
179
+
180
+ /*******************************************************************/
181
+ /* */
182
+ /* Transducer::infinitely_ambiguous_node */
183
+ /* */
184
+ /*******************************************************************/
185
+
186
+ bool Transducer::infinitely_ambiguous_node( Node *node )
187
+
188
+ {
189
+ if (!node->was_visited( vmark )) {
190
+ NodeHashSet visited;
191
+ if (check_cyclicity(node, visited, alphabet))
192
+ return true;
193
+
194
+ // iterate over all outgoing arcs
195
+ for( ArcsIter p(node->arcs()); p; p++ ) {
196
+ Arc *arc=p;
197
+ if (infinitely_ambiguous_node( arc->target_node() ))
198
+ return true;
199
+ }
200
+ }
201
+ return false;
202
+ }
203
+
204
+
205
+ /*******************************************************************/
206
+ /* */
207
+ /* Transducer::is_infinitely_ambiguous */
208
+ /* */
209
+ /*******************************************************************/
210
+
211
+ bool Transducer::is_infinitely_ambiguous()
212
+
213
+ {
214
+ incr_vmark();
215
+ return infinitely_ambiguous_node(root_node());
216
+ }
217
+
218
+
219
+ /*******************************************************************/
220
+ /* */
221
+ /* Transducer::is_cyclic_node */
222
+ /* */
223
+ /*******************************************************************/
224
+
225
+ bool Transducer::is_cyclic_node( Node *node, NodeHashSet &previous )
226
+
227
+ {
228
+ if (!node->was_visited( vmark )) {
229
+ NodeHashSet visited;
230
+
231
+ NodeHashSet::iterator it=previous.insert(node).first;
232
+
233
+ // iterate over all outgoing arcs
234
+ for( ArcsIter p(node->arcs()); p; p++ ) {
235
+ Arc *arc=p;
236
+ if (previous.find(arc->target_node()) != previous.end() ||
237
+ is_cyclic_node( arc->target_node(), previous ))
238
+ return true;
239
+ }
240
+
241
+ previous.erase(it);
242
+ }
243
+ return false;
244
+ }
245
+
246
+
247
+ /*******************************************************************/
248
+ /* */
249
+ /* Transducer::is_cyclic */
250
+ /* */
251
+ /*******************************************************************/
252
+
253
+ bool Transducer::is_cyclic()
254
+
255
+ {
256
+ incr_vmark();
257
+ NodeHashSet previous;
258
+ return is_cyclic_node(root_node(), previous);
259
+ }
260
+
261
+
262
+
263
+ /*******************************************************************/
264
+ /* */
265
+ /* Transducer::is_automaton_node */
266
+ /* */
267
+ /*******************************************************************/
268
+
269
+ bool Transducer::is_automaton_node( Node *node )
270
+
271
+ {
272
+ if (!node->was_visited( vmark )) {
273
+ // iterate over all outgoing arcs
274
+ for( ArcsIter p(node->arcs()); p; p++ ) {
275
+ Arc *arc=p;
276
+ Label l=arc->label();
277
+ if (l.upper_char() != l.lower_char())
278
+ return false;
279
+ if (!is_automaton_node( arc->target_node()))
280
+ return false;
281
+ }
282
+ }
283
+ return true;
284
+ }
285
+
286
+
287
+ /*******************************************************************/
288
+ /* */
289
+ /* Transducer::is_automaton */
290
+ /* */
291
+ /*******************************************************************/
292
+
293
+ bool Transducer::is_automaton()
294
+
295
+ {
296
+ incr_vmark();
297
+ return is_automaton_node(root_node());
298
+ }
299
+
300
+
301
+ /*******************************************************************/
302
+ /* */
303
+ /* Transducer::is_empty */
304
+ /* */
305
+ /*******************************************************************/
306
+
307
+ bool Transducer::is_empty()
308
+
309
+ {
310
+ if (!minimised) {
311
+ Transducer *tmp=&minimise();
312
+ bool result=tmp->is_empty();
313
+ delete tmp;
314
+ return result;
315
+ }
316
+ if (root_node()->is_final())
317
+ return false;
318
+ return root_node()->arcs()->is_empty();
319
+ }
320
+
321
+
322
+ /*******************************************************************/
323
+ /* */
324
+ /* Transducer::generates_empty_string */
325
+ /* */
326
+ /*******************************************************************/
327
+
328
+ bool Transducer::generates_empty_string()
329
+
330
+ {
331
+ if (!minimised) {
332
+ Transducer *tmp=&minimise();
333
+ bool result=tmp->root_node()->is_final();
334
+ delete tmp;
335
+ return result;
336
+ }
337
+ return root_node()->is_final();
338
+ }
339
+
340
+
341
+ /*******************************************************************/
342
+ /* */
343
+ /* Transducer::reverse_node */
344
+ /* */
345
+ /*******************************************************************/
346
+
347
+ void Transducer::reverse_node( Node *node, Transducer *na )
348
+
349
+ {
350
+ if (!node->was_visited( vmark )) {
351
+
352
+ // create a new node
353
+ node->set_forward( na->new_node() );
354
+
355
+ if (node->is_final())
356
+ // add epsilon transition from new root to this node
357
+ na->root_node()->add_arc( Label(), node->forward(), na );
358
+
359
+ // iterate over all outgoing arcs
360
+ for( ArcsIter p(node->arcs()); p; p++ ) {
361
+ Arc *arc=p;
362
+
363
+ // reverse the subgraph headed by the target node
364
+ reverse_node( arc->target_node(), na );
365
+ Node *n = arc->target_node()->forward();
366
+
367
+ // create the reverse arc
368
+ n->add_arc( arc->label(), node->forward(), na );
369
+ }
370
+ }
371
+ }
372
+
373
+
374
+ /*******************************************************************/
375
+ /* */
376
+ /* Transducer::reverse */
377
+ /* */
378
+ /*******************************************************************/
379
+
380
+ Transducer &Transducer::reverse( bool copy_alphabet )
381
+
382
+ {
383
+ Transducer *na = new Transducer();
384
+ if (copy_alphabet)
385
+ na->alphabet.copy(alphabet);
386
+
387
+ incr_vmark();
388
+ reverse_node(root_node(), na);
389
+ root_node()->forward()->set_final(1);
390
+ return *na;
391
+ }
392
+
393
+
394
+ /*******************************************************************/
395
+ /* */
396
+ /* Transducer::recode_label */
397
+ /* */
398
+ /*******************************************************************/
399
+
400
+ Label Transducer::recode_label( Label l, bool lswitch, bool recode,
401
+ Alphabet &al )
402
+ {
403
+ if (lswitch)
404
+ l = Label(l.upper_char(), l.lower_char());
405
+
406
+ if (recode) {
407
+ Character lc = al.add_symbol(alphabet.code2symbol(l.lower_char()));
408
+ Character uc = al.add_symbol(alphabet.code2symbol(l.upper_char()));
409
+ l = Label(lc, uc);
410
+ al.insert(l);
411
+ }
412
+
413
+ return l;
414
+ }
415
+
416
+
417
+ /*******************************************************************/
418
+ /* */
419
+ /* Transducer::copy_nodes */
420
+ /* */
421
+ /*******************************************************************/
422
+
423
+ Node *Transducer::copy_nodes( Node *node, Transducer *a,
424
+ bool lswitch, bool recode )
425
+ {
426
+ if (!node->was_visited(vmark)) {
427
+
428
+ node->set_forward(a->new_node());
429
+
430
+ // define final nodes
431
+ if (node->is_final())
432
+ node->forward()->set_final(1);
433
+
434
+ // iterate over all outgoing arcs of node
435
+ for( ArcsIter p(node->arcs()); p; p++ ) {
436
+ Arc *arc=p;
437
+ Node *tn = copy_nodes( arc->target_node(), a, lswitch, recode );
438
+
439
+ // Add a link to the new node
440
+ Label l=recode_label(arc->label(), lswitch, recode, a->alphabet);
441
+ node->forward()->add_arc( l, tn, a );
442
+ }
443
+ }
444
+
445
+ return node->forward();
446
+ }
447
+
448
+
449
+ /*******************************************************************/
450
+ /* */
451
+ /* Transducer::copy */
452
+ /* */
453
+ /*******************************************************************/
454
+
455
+ Transducer &Transducer::copy( bool lswitch, const Alphabet *al )
456
+
457
+ {
458
+ bool recode = false;
459
+ Transducer *na = new Transducer();
460
+ if (al == NULL)
461
+ al = &alphabet;
462
+ else
463
+ recode = true;
464
+
465
+ na->alphabet.utf8 = al->utf8;
466
+ if (lswitch) {
467
+ na->alphabet.insert_symbols(*al);
468
+ for( Alphabet::iterator it=al->begin(); it!=al->end(); it++ ) {
469
+ Character lc=it->lower_char();
470
+ Character uc=it->upper_char();
471
+ na->alphabet.insert(Label(uc,lc));
472
+ }
473
+ }
474
+ else
475
+ na->alphabet.copy(*al);
476
+
477
+ na->deterministic = deterministic;
478
+ na->minimised = minimised;
479
+ na->root_node()->set_final(root_node()->is_final());
480
+ incr_vmark();
481
+
482
+ root_node()->set_forward(na->root_node());
483
+ root_node()->was_visited(vmark);
484
+
485
+ for( ArcsIter p(root_node()->arcs()); p; p++ ) {
486
+ Arc *arc=p;
487
+ Node *target_node=copy_nodes(arc->target_node(), na, lswitch, recode);
488
+ Label l = recode_label(arc->label(), lswitch, recode, na->alphabet);
489
+ na->root_node()->add_arc( l, target_node, na);
490
+ }
491
+
492
+ return *na;
493
+ }
494
+
495
+
496
+ /*******************************************************************/
497
+ /* */
498
+ /* Transducer::operator | */
499
+ /* */
500
+ /*******************************************************************/
501
+
502
+ Transducer &Transducer::operator|( Transducer &a )
503
+
504
+ {
505
+ Transducer *na = new Transducer();
506
+ na->alphabet.copy(alphabet);
507
+ na->alphabet.copy(a.alphabet);
508
+
509
+ incr_vmark();
510
+ na->root_node()->add_arc( Label(), copy_nodes(root_node(), na), na);
511
+ a.incr_vmark();
512
+ na->root_node()->add_arc( Label(), a.copy_nodes(a.root_node(), na), na);
513
+
514
+ return *na;
515
+ }
516
+
517
+
518
+ /*******************************************************************/
519
+ /* */
520
+ /* Transducer::rec_cat_nodes */
521
+ /* */
522
+ /*******************************************************************/
523
+
524
+ void Transducer::rec_cat_nodes( Node *node, Node *node2 )
525
+
526
+ {
527
+ if (!node->was_visited( vmark )) {
528
+
529
+ // iterate over all outgoing arcs of node
530
+ for( ArcsIter p(node->arcs()); p; p++ ) {
531
+ Arc *arc=p;
532
+ rec_cat_nodes( arc->target_node(), node2 );
533
+ }
534
+
535
+ if (node->is_final()) {
536
+ // link this node to node2
537
+ node->set_final(0);
538
+ node->add_arc( Label(), node2, this );
539
+ }
540
+ }
541
+ }
542
+
543
+
544
+ /*******************************************************************/
545
+ /* */
546
+ /* Transducer::operator+ */
547
+ /* */
548
+ /*******************************************************************/
549
+
550
+ Transducer &Transducer::operator+( Transducer &a )
551
+
552
+ {
553
+ Transducer *na = new Transducer();
554
+ na->alphabet.copy(alphabet);
555
+ na->alphabet.copy(a.alphabet);
556
+
557
+ // copy Transducer1 to the new Transducer
558
+ incr_vmark();
559
+ Node *node=copy_nodes(root_node(), na);
560
+ na->root_node()->add_arc( Label(), node, na);
561
+
562
+ // copy Transducer2 to the new Transducer
563
+ a.incr_vmark();
564
+ node=a.copy_nodes(a.root_node(), na);
565
+
566
+ // catenate the two automata
567
+ na->incr_vmark();
568
+ na->rec_cat_nodes(na->root_node(), node);
569
+
570
+ return *na;
571
+ }
572
+
573
+
574
+ /*******************************************************************/
575
+ /* */
576
+ /* Transducer::kleene_star */
577
+ /* (HFST addition: now works for cyclic transducers as well) */
578
+ /* */
579
+ /*******************************************************************/
580
+
581
+ Transducer &Transducer::kleene_star()
582
+
583
+ {
584
+ Transducer *na = &copy();
585
+ na->alphabet.copy(alphabet);
586
+
587
+ // HFST addition
588
+ Transducer eps;
589
+ eps.root_node()->set_final(1);
590
+ Transducer *tmp = &(eps + *na);
591
+ delete na;
592
+ na = tmp;
593
+
594
+ // link back to the start node
595
+ na->incr_vmark();
596
+ na->rec_cat_nodes(na->root_node(), na->root_node());
597
+
598
+ na->root_node()->set_final(1); // root node is already final
599
+ na->deterministic = na->minimised = false;
600
+
601
+ return *na;
602
+ }
603
+
604
+
605
+ /*******************************************************************/
606
+ /* */
607
+ /* Transducer::negate_nodes */
608
+ /* */
609
+ /*******************************************************************/
610
+
611
+ void Transducer::negate_nodes( Node *node, Node *accept )
612
+
613
+ {
614
+ if (!node->was_visited(vmark)) {
615
+ node->set_final( !node->is_final() );
616
+
617
+ for( ArcsIter p(node->arcs()); p; p++ ) {
618
+ Arc *arc=p;
619
+ negate_nodes( arc->target_node(), accept );
620
+ }
621
+
622
+ for( Alphabet::iterator it=alphabet.begin(); it!=alphabet.end(); it++)
623
+ if (!node->target_node(*it))
624
+ node->add_arc( *it, accept, this );
625
+ }
626
+ }
627
+
628
+
629
+ /*******************************************************************/
630
+ /* */
631
+ /* Transducer::operator! */
632
+ /* */
633
+ /*******************************************************************/
634
+
635
+ Transducer &Transducer::operator!()
636
+
637
+ {
638
+ Transducer *na;
639
+
640
+ if (alphabet.size() == 0) {
641
+ // throw "Negation of Transducer with undefined alphabet attempted!";
642
+ fprintf(stderr, "Warning: undefined alphabet\n");
643
+ na = new Transducer();
644
+ return *na;
645
+ }
646
+
647
+ if (minimised)
648
+ na = &copy();
649
+ else
650
+ na = &minimise();
651
+ na->alphabet.copy(alphabet);
652
+
653
+ Node *accept_node=na->new_node();
654
+ accept_node->set_final(1);
655
+ for( Alphabet::iterator it=alphabet.begin(); it!=alphabet.end(); it++)
656
+ accept_node->add_arc( *it, accept_node, na );
657
+
658
+ na->incr_vmark();
659
+ na->negate_nodes( na->root_node(), accept_node );
660
+ na->minimised = na->deterministic = false;
661
+
662
+ return *na;
663
+ }
664
+
665
+
666
+ /*******************************************************************/
667
+ /* */
668
+ /* conjoin_nodes */
669
+ /* */
670
+ /*******************************************************************/
671
+
672
+ static void conjoin_nodes( Node *n1, Node *n2, Node *node,
673
+ Transducer *a, PairMapping &map )
674
+
675
+ {
676
+ // if both input nodes are final, so is the new one
677
+ if (n1->is_final() && n2->is_final())
678
+ node->set_final(1);
679
+
680
+ // iterate over all outgoing arcs of the first node
681
+ for( ArcsIter i(n1->arcs()); i; i++ ) {
682
+ Arc *arc=i;
683
+ Label l=arc->label();
684
+ Node *t1 = arc->target_node();
685
+ Node *t2 = n2->target_node(l);
686
+
687
+ // Does the second node have an outgoing arc with the same label?
688
+ if (t2) {
689
+ // Check whether this node pair has been encountered before
690
+ PairMapping::iterator it=map.find(t1, t2);
691
+
692
+ if (it == map.end()) {
693
+ // new node pair
694
+ // create a new node in the conjunction Transducer
695
+ Node *target_node = a->new_node();
696
+ // map the target node pair to the new node
697
+ map[pair<Node*,Node*>(t1,t2)] = target_node;
698
+ // add an arc to the new node
699
+ node->add_arc( l, target_node, a );
700
+ // recursion
701
+ conjoin_nodes( t1, t2, target_node, a, map );
702
+ }
703
+ else {
704
+ // add an arc to the already existing target node
705
+ node->add_arc( l, it->second, a );
706
+ }
707
+ }
708
+ }
709
+ }
710
+
711
+
712
+ /*******************************************************************/
713
+ /* */
714
+ /* Transducer::operator & */
715
+ /* */
716
+ /*******************************************************************/
717
+
718
+ Transducer &Transducer::operator&( Transducer &a )
719
+
720
+ {
721
+ Transducer *tmp1=NULL;
722
+ Transducer *tmp2=NULL;
723
+ Node *r1, *r2;
724
+
725
+ if (deterministic)
726
+ r1 = root_node();
727
+ else {
728
+ tmp1 = &determinise();
729
+ r1 = tmp1->root_node();
730
+ }
731
+
732
+ if (a.deterministic)
733
+ r2 = a.root_node();
734
+ else {
735
+ tmp2 = &a.determinise();
736
+ r2 = tmp2->root_node();
737
+ }
738
+
739
+ PairMapping map;
740
+
741
+ Transducer *na = new Transducer();
742
+ na->alphabet.copy(alphabet);
743
+ na->alphabet.copy(a.alphabet);
744
+
745
+ // map the two root nodes to the new root node
746
+ map[pair<Node*,Node*>(r1, r2)] = na->root_node();
747
+
748
+ // recursively conjoin the two automata
749
+ conjoin_nodes( r1, r2, na->root_node(), na, map);
750
+
751
+ na->deterministic = 1;
752
+ delete tmp1;
753
+ delete tmp2;
754
+
755
+ return *na;
756
+ }
757
+
758
+
759
+ /*******************************************************************/
760
+ /* */
761
+ /* add_transition */
762
+ /* */
763
+ /*******************************************************************/
764
+
765
+ static void add_transition( Label l, Node *n1, Node *n2, Node *node,
766
+ Transducer *a, PairMapping &map,
767
+ CharNode2Trans &cn2trans1,
768
+ CharNode2Trans &cn2trans2 )
769
+
770
+ {
771
+ // fprintf(stderr,"transition from %u to %u with label %s\n",
772
+ // n1->index, n2->index, a->alphabet.write_label(l));
773
+
774
+ // Check whether this node pair has been encountered before
775
+ PairMapping::iterator it=map.find(n1, n2);
776
+
777
+ if (it != map.end()) {
778
+ // add an arc to the already existing target node
779
+ node->add_arc( l, it->second, a );
780
+ return;
781
+ }
782
+
783
+ // create a new node in the composed Transducer
784
+ Node *target_node = a->new_node();
785
+
786
+ // map the target node pair to the new node
787
+ map[pair<Node*,Node*>(n1,n2)] = target_node;
788
+
789
+ // add an arc to the new node
790
+ node->add_arc( l, target_node, a );
791
+
792
+ // recursion
793
+ compose_nodes( n1, n2, target_node, a, map, cn2trans1, cn2trans2 );
794
+ }
795
+
796
+
797
+ /*******************************************************************/
798
+ /* */
799
+ /* compose_nodes */
800
+ /* */
801
+ /*******************************************************************/
802
+
803
+ static void compose_nodes( Node *n1, Node *n2, Node *node, Transducer *a,
804
+ PairMapping &map, CharNode2Trans &cn2trans1,
805
+ CharNode2Trans &cn2trans2 )
806
+ {
807
+ // fprintf(stderr,"A%u || B%u\n",n1->index,n2->index);
808
+
809
+ // index upper character of first transducer
810
+ size_t size1 = cn2trans1.hash_transitions( n1, true );
811
+ // index lower character of second transducer
812
+ size_t size2 = cn2trans2.hash_transitions( n2, false );
813
+
814
+ // use the hashing of the transducer whose node is larger
815
+ bool hash2 = (size1 <= size2);
816
+
817
+ // if both input nodes are final, so is the new one
818
+ if (n1->is_final() && n2->is_final())
819
+ node->set_final(1);
820
+
821
+ if (hash2) {
822
+ // iterate over all outgoing arcs of the first node
823
+ for( ArcsIter i(n1->arcs()); i; i++ ) {
824
+ Arc *arc1=i;
825
+ Node *t1 = arc1->target_node();
826
+ Label l1=arc1->label();
827
+ Character uc1=l1.upper_char();
828
+ Character lc1=l1.lower_char();
829
+
830
+ if (uc1 == Label::epsilon)
831
+ add_transition( l1, t1, n2, node, a, map, cn2trans1, cn2trans2 );
832
+
833
+ else {
834
+ // iterate over the matching outgoing arcs of the second node
835
+ for( CharNode2Trans::iterator it(cn2trans2, n2->index, uc1 );
836
+ !it.finished(); it++ )
837
+ {
838
+ Arc *arc2 = *it;
839
+ Node *t2 = arc2->target_node();
840
+ Label l2=arc2->label();
841
+ assert(uc1 == l2.lower_char());
842
+ Character uc2=l2.upper_char();
843
+
844
+ add_transition( Label(lc1,uc2), t1, t2, node, a, map,
845
+ cn2trans1, cn2trans2 );
846
+ }
847
+ }
848
+ }
849
+
850
+ // epsilon input characters of the second Transducer
851
+ for( CharNode2Trans::iterator it(cn2trans2, n2->index, Label::epsilon );
852
+ !it.finished(); it++ )
853
+ {
854
+ Arc *arc2 = *it;
855
+ Node *t2 = arc2->target_node();
856
+ Label l=arc2->label();
857
+ assert(l.lower_char() == Label::epsilon);
858
+ add_transition( l, n1, t2, node, a, map, cn2trans1, cn2trans2 );
859
+ }
860
+ }
861
+
862
+ else { /* !hash2 */
863
+ // iterate over all outgoing arcs of the second node
864
+ for( ArcsIter i(n2->arcs()); i; i++ ) {
865
+ Arc *arc2=i;
866
+ Node *t2 = arc2->target_node();
867
+ Label l2=arc2->label();
868
+ Character uc2=l2.upper_char();
869
+ Character lc2=l2.lower_char();
870
+
871
+ if (lc2 == Label::epsilon)
872
+ add_transition( l2, n1, t2, node, a, map, cn2trans1, cn2trans2 );
873
+
874
+ else {
875
+ // iterate over the matching outgoing arcs of the first node
876
+ for( CharNode2Trans::iterator it(cn2trans1, n1->index, lc2 );
877
+ !it.finished(); it++ )
878
+ {
879
+ Arc *arc1 = *it;
880
+ Node *t1 = arc1->target_node();
881
+ Label l1=arc1->label();
882
+ assert(l1.upper_char() == lc2);
883
+ Character lc1=l1.lower_char();
884
+
885
+ add_transition( Label(lc1,uc2), t1, t2, node, a, map,
886
+ cn2trans1, cn2trans2 );
887
+ }
888
+ }
889
+ }
890
+
891
+ // epsilon output characters of the first Transducer
892
+ for( CharNode2Trans::iterator it(cn2trans1, n1->index, Label::epsilon );
893
+ !it.finished(); it++ )
894
+ {
895
+ Arc *arc1 = *it;
896
+ Node *t1 = arc1->target_node();
897
+ Label l=arc1->label();
898
+ assert(l.upper_char() == Label::epsilon);
899
+ add_transition( l, t1, n2, node, a, map, cn2trans1, cn2trans2 );
900
+ }
901
+ }
902
+ }
903
+
904
+
905
+ /*******************************************************************/
906
+ /* */
907
+ /* Transducer::operator || */
908
+ /* */
909
+ /*******************************************************************/
910
+
911
+ Transducer &Transducer::operator||( Transducer &a )
912
+
913
+ {
914
+ PairMapping map;
915
+
916
+ Transducer *na = new Transducer();
917
+ na->alphabet.compose(alphabet, a.alphabet);
918
+
919
+ // map the two root nodes to the new root node
920
+ map[pair<Node*,Node*>(root_node(), a.root_node())] = na->root_node();
921
+
922
+ // recursively compose the two automata
923
+ CharNode2Trans cn2trans1(*this);
924
+ CharNode2Trans cn2trans2(a);
925
+ compose_nodes( root_node(), a.root_node(), na->root_node(),
926
+ na, map, cn2trans1, cn2trans2 );
927
+
928
+ return *na;
929
+ }
930
+
931
+
932
+
933
+ /*******************************************************************/
934
+ /* */
935
+ /* Transducer::operator/ */
936
+ /* */
937
+ /*******************************************************************/
938
+
939
+ Transducer &Transducer::operator/( Transducer &a )
940
+
941
+ {
942
+ complete_alphabet();
943
+ a.alphabet.copy(alphabet);
944
+ // a-b = a & !b = a & !(a & b)
945
+ Transducer *a1 = &(*this & a);
946
+ Transducer *a2 = &(!*a1);
947
+ delete a1;
948
+ a1 = &(*this & *a2);
949
+ delete a2;
950
+ return *a1;
951
+ }
952
+
953
+
954
+ /*******************************************************************/
955
+ /* */
956
+ /* Transducer::compare_nodes */
957
+ /* */
958
+ /*******************************************************************/
959
+
960
+ bool Transducer::compare_nodes( Node *node, Node *node2, Transducer &a2 )
961
+
962
+ {
963
+ if (node->was_visited( vmark )) {
964
+ if (node2->was_visited( a2.vmark ))
965
+ return (node->forward() == node2 && node2->forward() == node);
966
+ else
967
+ return false;
968
+ }
969
+ else if (node2->was_visited( a2.vmark ))
970
+ return false;
971
+
972
+ node->set_forward( node2 );
973
+ node2->set_forward( node );
974
+
975
+ if (node->is_final() != node2->is_final())
976
+ return false;
977
+
978
+ // iterate over all outgoing arcs
979
+ for( ArcsIter p(node->arcs()); p; p++ ) {
980
+ Arc *arc=p;
981
+ Node *t2=node2->target_node(arc->label());
982
+
983
+ if (t2 == NULL)
984
+ return false;
985
+ else if (!compare_nodes(arc->target_node(), t2, a2))
986
+ return false;
987
+ }
988
+ for( ArcsIter p(node2->arcs()); p; p++ ) {
989
+ Arc *arc=p;
990
+ if (node->target_node(arc->label()) == NULL)
991
+ return false;
992
+ }
993
+
994
+ return true;
995
+ }
996
+
997
+
998
+ /*******************************************************************/
999
+ /* */
1000
+ /* Transducer::operator == */
1001
+ /* */
1002
+ /*******************************************************************/
1003
+
1004
+ bool Transducer::operator==( Transducer &a )
1005
+
1006
+ {
1007
+ Transducer *p1 = (minimised)? this: &minimise();
1008
+ Transducer *p2 = (a.minimised)? &a: &a.minimise();
1009
+
1010
+ p1->incr_vmark();
1011
+ p2->incr_vmark();
1012
+ bool result = p1->compare_nodes(p1->root_node(), p2->root_node(), *p2 );
1013
+
1014
+ if (p1 != this) delete p1;
1015
+ if (p2 != &a) delete p2;
1016
+
1017
+ return result;
1018
+ }
1019
+
1020
+
1021
+
1022
+ /*******************************************************************/
1023
+ /* */
1024
+ /* Transducer::map_nodes */
1025
+ /* */
1026
+ /*******************************************************************/
1027
+
1028
+ void Transducer::map_nodes( Node *node, Node *node2, Transducer *a, Level level)
1029
+
1030
+ {
1031
+ if (!node->was_visited(vmark)) {
1032
+
1033
+ node->set_forward(node2);
1034
+
1035
+ // define final nodes
1036
+ if (node->is_final())
1037
+ node2->set_final(1);
1038
+
1039
+ // iterate over all outgoing arcs of node
1040
+ for( ArcsIter p(node->arcs()); p; p++ ) {
1041
+ Arc *arc=p;
1042
+ Label l(arc->label().get_char(level));
1043
+ Node *t2=NULL, *t=arc->target_node();
1044
+
1045
+ if (t->check_visited(vmark))
1046
+ t2 = t->forward();
1047
+ else
1048
+ t2 = a->new_node(); // create a new node
1049
+
1050
+ node2->add_arc(l, t2, a); // add a link to the node
1051
+
1052
+ map_nodes( t, t2, a, level );
1053
+ }
1054
+ }
1055
+ }
1056
+
1057
+
1058
+ /*******************************************************************/
1059
+ /* */
1060
+ /* Transducer::level */
1061
+ /* */
1062
+ /*******************************************************************/
1063
+
1064
+ Transducer &Transducer::level( Level level )
1065
+
1066
+ {
1067
+ Transducer *na = new Transducer();
1068
+
1069
+ for( Alphabet::iterator it=alphabet.begin(); it!=alphabet.end(); it++ ) {
1070
+ Character c = it->get_char(level);
1071
+ if (alphabet.code2symbol(c) != NULL)
1072
+ na->alphabet.add_symbol( alphabet.code2symbol(c), c );
1073
+ na->alphabet.insert(Label(c));
1074
+ }
1075
+
1076
+ incr_vmark();
1077
+ map_nodes(root_node(), na->root_node(), na, level );
1078
+
1079
+ return *na;
1080
+ }
1081
+
1082
+
1083
+ /*******************************************************************/
1084
+ /* */
1085
+ /* Transducer::freely_insert_at_node */
1086
+ /* */
1087
+ /*******************************************************************/
1088
+
1089
+ void Transducer::freely_insert_at_node( Node *node, Label l )
1090
+
1091
+ {
1092
+ if (!node->was_visited(vmark)) {
1093
+ node->add_arc(l, node, this); // add a recursive link labelled with l
1094
+
1095
+ // iterate over all outgoing arcs of node
1096
+ for( ArcsIter p(node->arcs()); p; p++ ) {
1097
+ Arc *arc=p;
1098
+ freely_insert_at_node(arc->target_node(), l );
1099
+ }
1100
+ }
1101
+ }
1102
+
1103
+
1104
+ /*******************************************************************/
1105
+ /* */
1106
+ /* Transducer::freely_insert */
1107
+ /* */
1108
+ /*******************************************************************/
1109
+
1110
+ Transducer &Transducer::freely_insert( Label l )
1111
+
1112
+ {
1113
+ Transducer *na = &copy();
1114
+
1115
+ na->incr_vmark();
1116
+ na->freely_insert_at_node(na->root_node(), l );
1117
+
1118
+ return *na;
1119
+ }
1120
+
1121
+
1122
+ /*******************************************************************/
1123
+ /* */
1124
+ /* Transducer::splice_arc */
1125
+ /* */
1126
+ /*******************************************************************/
1127
+
1128
+ void Transducer::splice_arc( Node *node, Node *node2, Node *next_node,
1129
+ Transducer *a )
1130
+ {
1131
+ if (node->is_final()) {
1132
+ // link final node to the next node
1133
+ node2->add_arc( Label(), next_node, a );
1134
+ return;
1135
+ }
1136
+
1137
+ // iterate over the outgoing arcs
1138
+ for( ArcsIter p(node->arcs()); p; p++ ) {
1139
+ Arc *arc=p;
1140
+ Node *tn=a->new_node();
1141
+
1142
+ node2->add_arc( arc->label(), tn, a );
1143
+ splice_arc( arc->target_node(), tn, next_node, a );
1144
+ }
1145
+ }
1146
+
1147
+
1148
+ /*******************************************************************/
1149
+ /* */
1150
+ /* Transducer::splice_nodes */
1151
+ /* */
1152
+ /*******************************************************************/
1153
+
1154
+ void Transducer::splice_nodes(Node *node, Node *node2, Label sl,
1155
+ Transducer *sa, Transducer *a)
1156
+ {
1157
+ if (!node->was_visited(vmark)) {
1158
+
1159
+ node->set_forward(node2);
1160
+
1161
+ // define final nodes
1162
+ if (node->is_final())
1163
+ node2->set_final(1);
1164
+
1165
+ // iterate over all outgoing arcs of node
1166
+ for( ArcsIter p(node->arcs()); p; p++ ) {
1167
+ Arc *arc=p;
1168
+ Node *t2=NULL, *t=arc->target_node();
1169
+
1170
+ if (t->check_visited(vmark))
1171
+ t2 = t->forward();
1172
+ else
1173
+ t2 = a->new_node(); // create a new node
1174
+
1175
+ if (arc->label() == sl)
1176
+ // insert the transducer
1177
+ splice_arc(sa->root_node(), node2, t2, a);
1178
+ else
1179
+ // add a link to the node
1180
+ node2->add_arc(arc->label(), t2, a);
1181
+
1182
+ splice_nodes( t, t2, sl, sa, a );
1183
+ }
1184
+ }
1185
+ }
1186
+
1187
+
1188
+ /*******************************************************************/
1189
+ /* */
1190
+ /* Transducer::splice */
1191
+ /* */
1192
+ /*******************************************************************/
1193
+
1194
+ Transducer &Transducer::splice( Label sl, Transducer *sa )
1195
+
1196
+ {
1197
+ Alphabet::iterator it;
1198
+
1199
+ Transducer *na = new Transducer();
1200
+
1201
+ for( it=alphabet.begin(); it!=alphabet.end(); it++ ) {
1202
+ Label l = *it;
1203
+ if (l != sl)
1204
+ na->alphabet.insert(l);
1205
+ }
1206
+ for( it=sa->alphabet.begin(); it!=sa->alphabet.end(); it++ )
1207
+ na->alphabet.insert(*it);
1208
+
1209
+ incr_vmark();
1210
+ splice_nodes(root_node(), na->root_node(), sl, sa, na );
1211
+
1212
+ return *na;
1213
+ }
1214
+
1215
+
1216
+ /*******************************************************************/
1217
+ /* */
1218
+ /* Transducer::replace_char */
1219
+ /* */
1220
+ /*******************************************************************/
1221
+
1222
+ Transducer &Transducer::replace_char( Character c, Character nc )
1223
+
1224
+ {
1225
+ Alphabet::iterator it;
1226
+
1227
+ Transducer *na = new Transducer();
1228
+
1229
+ for( it=alphabet.begin(); it!=alphabet.end(); it++ ) {
1230
+ Label l = *it;
1231
+ na->alphabet.insert(l.replace_char(c,nc));
1232
+ }
1233
+
1234
+ incr_vmark();
1235
+ replace_char2(root_node(), na->root_node(), c, nc, na );
1236
+
1237
+ return *na;
1238
+ }
1239
+
1240
+
1241
+ /*******************************************************************/
1242
+ /* */
1243
+ /* Transducer::replace_char2 */
1244
+ /* */
1245
+ /*******************************************************************/
1246
+
1247
+ void Transducer::replace_char2(Node *node, Node *node2, Character c,
1248
+ Character nc, Transducer *a)
1249
+ {
1250
+ if (!node->was_visited(vmark)) {
1251
+
1252
+ node->set_forward(node2);
1253
+
1254
+ // define final nodes
1255
+ if (node->is_final())
1256
+ node2->set_final(1);
1257
+
1258
+ // iterate over all outgoing arcs of node
1259
+ for( ArcsIter p(node->arcs()); p; p++ ) {
1260
+ Arc *arc=p;
1261
+ Node *t2=NULL, *t=arc->target_node();
1262
+
1263
+ if (t->check_visited(vmark))
1264
+ t2 = t->forward();
1265
+ else
1266
+ t2 = a->new_node(); // create a new node
1267
+
1268
+ node2->add_arc(arc->label().replace_char(c, nc), t2, a);
1269
+ replace_char2( t, t2, c, nc, a );
1270
+ }
1271
+ }
1272
+ }
1273
+ }