ruby-sfst 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,966 @@
1
+
2
+ /*******************************************************************/
3
+ /* */
4
+ /* FILE fst.C */
5
+ /* MODULE fst */
6
+ /* PROGRAM SFST */
7
+ /* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
8
+ /* */
9
+ /* PURPOSE basic FST functions */
10
+ /* */
11
+ /*******************************************************************/
12
+
13
+ #include "fst.h"
14
+
15
+ using std::vector;
16
+ using std::istream;
17
+ using std::ostream;
18
+ using std::cerr;
19
+
20
+ const int BUFFER_SIZE=100000;
21
+
22
+
23
+ /*******************************************************************/
24
+ /* */
25
+ /* Arcs::size */
26
+ /* */
27
+ /*******************************************************************/
28
+
29
+ int Arcs::size() const
30
+
31
+ {
32
+ int n=0;
33
+ for( Arc *p=first_arcp; p; p=p->next ) n++;
34
+ for( Arc *p=first_epsilon_arcp; p; p=p->next ) n++;
35
+ return n;
36
+ }
37
+
38
+
39
+ /*******************************************************************/
40
+ /* */
41
+ /* Arcs::target_node */
42
+ /* */
43
+ /*******************************************************************/
44
+
45
+ Node *Arcs::target_node( Label l )
46
+
47
+ {
48
+ Arc *arc;
49
+
50
+ for( arc=first_arcp; arc; arc=arc->next)
51
+ if (arc->label() == l)
52
+ return arc->target_node();
53
+
54
+ return NULL;
55
+ }
56
+
57
+ const Node *Arcs::target_node( Label l ) const
58
+
59
+ {
60
+ const Arc *arc;
61
+
62
+ for( arc=first_arcp; arc; arc=arc->next)
63
+ if (arc->label() == l)
64
+ return arc->target_node();
65
+
66
+ return NULL;
67
+ }
68
+
69
+
70
+ /*******************************************************************/
71
+ /* */
72
+ /* Arcs::add_arc */
73
+ /* */
74
+ /*******************************************************************/
75
+
76
+ void Arcs::add_arc( Label l, Node *node, Transducer *a )
77
+
78
+ {
79
+ Arc *arc=a->new_arc( l, node );
80
+
81
+ if (l.is_epsilon()) {
82
+ arc->next = first_epsilon_arcp;
83
+ first_epsilon_arcp = arc;
84
+ }
85
+ else {
86
+ arc->next = first_arcp;
87
+ first_arcp = arc;
88
+ }
89
+ }
90
+
91
+
92
+ /*******************************************************************/
93
+ /* */
94
+ /* Arcs::remove_arc */
95
+ /* */
96
+ /*******************************************************************/
97
+
98
+ int Arcs::remove_arc( Arc *arc )
99
+
100
+ {
101
+ Arc **p = (arc->label().is_epsilon()) ? &first_epsilon_arcp : &first_arcp;
102
+ for( ; *p; p=&(*p)->next )
103
+ if (*p == arc) {
104
+ *p = arc->next;
105
+ return 1;
106
+ }
107
+ return 0;
108
+ }
109
+
110
+
111
+ /*******************************************************************/
112
+ /* */
113
+ /* Node::init */
114
+ /* */
115
+ /*******************************************************************/
116
+
117
+ void Node::init()
118
+
119
+ {
120
+ final = false;
121
+ visited = 0;
122
+ arcsp.init();
123
+ forwardp = NULL;
124
+ }
125
+
126
+
127
+ /*******************************************************************/
128
+ /* */
129
+ /* NodeNumbering::number_node */
130
+ /* */
131
+ /*******************************************************************/
132
+
133
+ void NodeNumbering::number_node( Node *node, Transducer &a )
134
+
135
+ {
136
+ if (!node->was_visited( a.vmark )) {
137
+ nummap[node] = nodes.size();
138
+ nodes.push_back(node);
139
+ for( ArcsIter p(node->arcs()); p; p++ ) {
140
+ Arc *arc=p;
141
+ number_node( arc->target_node(), a );
142
+ }
143
+ }
144
+ }
145
+
146
+
147
+ /*******************************************************************/
148
+ /* */
149
+ /* NodeNumbering::NodeNumbering */
150
+ /* */
151
+ /*******************************************************************/
152
+
153
+ NodeNumbering::NodeNumbering( Transducer &a )
154
+
155
+ {
156
+ a.incr_vmark();
157
+ number_node( a.root_node(), a );
158
+ }
159
+
160
+
161
+ /*******************************************************************/
162
+ /* */
163
+ /* Transducer::new_node */
164
+ /* */
165
+ /*******************************************************************/
166
+
167
+ Node *Transducer::new_node()
168
+
169
+ {
170
+ Node *node=(Node*)mem.alloc( sizeof(Node) );
171
+
172
+ node->init();
173
+ return node;
174
+ }
175
+
176
+
177
+ /*******************************************************************/
178
+ /* */
179
+ /* Transducer::new_arc */
180
+ /* */
181
+ /*******************************************************************/
182
+
183
+ Arc *Transducer::new_arc( Label l, Node *target )
184
+
185
+ {
186
+ Arc *arc=(Arc*)mem.alloc( sizeof(Arc) );
187
+
188
+ arc->init( l, target);
189
+ return arc;
190
+ }
191
+
192
+
193
+ /*******************************************************************/
194
+ /* */
195
+ /* Transducer::add_string */
196
+ /* */
197
+ /*******************************************************************/
198
+
199
+ void Transducer::add_string( char *s, bool extended )
200
+
201
+ {
202
+ Node *node=root_node();
203
+
204
+ Label l;
205
+ while (!(l = alphabet.next_label(s, extended)).is_epsilon()) {
206
+ alphabet.insert(l);
207
+ Arcs *arcs=node->arcs();
208
+ node = arcs->target_node( l );
209
+ if (node == NULL) {
210
+ node = new_node();
211
+ arcs->add_arc( l, node, this );
212
+ }
213
+ }
214
+ node->set_final(1);
215
+ }
216
+
217
+
218
+ /*******************************************************************/
219
+ /* */
220
+ /* Transducer::Transducer */
221
+ /* */
222
+ /*******************************************************************/
223
+
224
+ Transducer::Transducer( vector<Label> &path )
225
+ : root(), mem()
226
+ {
227
+ Node *node=root_node();
228
+
229
+ vmark = 0;
230
+ deterministic = minimised = true;
231
+ for( size_t i=0; i<path.size(); i++ ) {
232
+ Arcs *arcs=node->arcs();
233
+ node = new_node();
234
+ arcs->add_arc( path[i], node, this );
235
+ }
236
+ node->set_final(1);
237
+ }
238
+
239
+
240
+ /*******************************************************************/
241
+ /* */
242
+ /* Transducer::Transducer */
243
+ /* */
244
+ /*******************************************************************/
245
+
246
+ Transducer::Transducer( istream &is, const Alphabet *a, bool verbose )
247
+ : root(), mem()
248
+ {
249
+ bool extended=false;
250
+ int n=0;
251
+ char buffer[10000];
252
+
253
+ vmark = 0;
254
+ deterministic = true;
255
+ minimised = false;
256
+ if (a) {
257
+ alphabet.copy(*a);
258
+ extended = true;
259
+ }
260
+ while (is.getline(buffer, 10000)) {
261
+ if (verbose && ++n % 10000 == 0) {
262
+ if (n == 10000)
263
+ cerr << "\n";
264
+ cerr << "\r" << n << " words";
265
+ }
266
+ // delete final whitespace characters
267
+ int l;
268
+ for( l=strlen(buffer)-1; l>=0; l-- )
269
+ if ((buffer[l] != ' ' && buffer[l] != '\t' && buffer[l] != '\r') ||
270
+ (l > 0 && buffer[l-1] == '\\'))
271
+ break;
272
+ buffer[l+1] = 0;
273
+
274
+ add_string(buffer,extended);
275
+ }
276
+ if (verbose && n >= 10000)
277
+ cerr << "\n";
278
+ }
279
+
280
+
281
+ /*******************************************************************/
282
+ /* */
283
+ /* Transducer::Transducer */
284
+ /* */
285
+ /*******************************************************************/
286
+
287
+ Transducer::Transducer( char *s, const Alphabet *a, bool extended )
288
+ : root(), mem()
289
+ {
290
+ vmark = 0;
291
+ deterministic = minimised = true;
292
+ if (a)
293
+ alphabet.copy(*a);
294
+ add_string(s, extended);
295
+ }
296
+
297
+
298
+ /*******************************************************************/
299
+ /* */
300
+ /* Transducer::clear */
301
+ /* */
302
+ /*******************************************************************/
303
+
304
+ void Transducer::clear()
305
+
306
+ {
307
+ vmark = 0;
308
+ deterministic = minimised = false;
309
+ root.init();
310
+ mem.clear();
311
+ alphabet.clear();
312
+ }
313
+
314
+
315
+ /*******************************************************************/
316
+ /* */
317
+ /* Transducer::store_symbols */
318
+ /* */
319
+ /*******************************************************************/
320
+
321
+ void Transducer::store_symbols(Node *node, SymbolMap &symbol, LabelSet &labels)
322
+
323
+ {
324
+ if (!node->was_visited( vmark )) {
325
+ Arcs *arcs=node->arcs();
326
+ for( ArcsIter p(arcs); p; p++ ) {
327
+ Arc *arc=p;
328
+ Label l=arc->label();
329
+
330
+ labels.insert(l);
331
+
332
+ Character c = l.upper_char();
333
+ if (symbol.find(c) == symbol.end()) {
334
+ const char *s = alphabet.code2symbol(c);
335
+ if (s)
336
+ symbol[c] = fst_strdup(s);
337
+ }
338
+
339
+ c = l.lower_char();
340
+ if (symbol.find(c) == symbol.end()) {
341
+ const char *s = alphabet.code2symbol(c);
342
+ if (s)
343
+ symbol[c] = fst_strdup(s);
344
+ }
345
+
346
+ store_symbols( arc->target_node(), symbol, labels );
347
+ }
348
+ }
349
+ }
350
+
351
+
352
+ /*******************************************************************/
353
+ /* */
354
+ /* Transducer::minimise_alphabet */
355
+ /* */
356
+ /*******************************************************************/
357
+
358
+ void Transducer::minimise_alphabet()
359
+
360
+ {
361
+ SymbolMap symbols;
362
+ LabelSet labels;
363
+ incr_vmark();
364
+ store_symbols(root_node(), symbols, labels);
365
+ alphabet.clear();
366
+ for( SymbolMap::iterator it=symbols.begin(); it!=symbols.end(); it++ ) {
367
+ alphabet.add_symbol( it->second, it->first );
368
+ free(it->second);
369
+ }
370
+ for( LabelSet::iterator it=labels.begin(); it!=labels.end(); it++ )
371
+ alphabet.insert(*it);
372
+ }
373
+
374
+
375
+ /*******************************************************************/
376
+ /* */
377
+ /* Transducer::minimise */
378
+ /* */
379
+ /*******************************************************************/
380
+
381
+ Transducer &Transducer::minimise( bool verbose )
382
+
383
+ {
384
+ if (minimised)
385
+ return copy();
386
+
387
+ Transducer *a1, *a2;
388
+
389
+ a1 = &reverse();
390
+ a2 = &a1->determinise();
391
+ delete a1;
392
+
393
+ a1 = &a2->reverse();
394
+ delete a2;
395
+
396
+ a2 = &a1->determinise();
397
+ delete a1;
398
+
399
+ a2->minimised = true;
400
+ a2->minimise_alphabet();
401
+
402
+ return *a2;
403
+ }
404
+
405
+
406
+ /*******************************************************************/
407
+ /* */
408
+ /* Transducer::enumerate_paths_node */
409
+ /* */
410
+ /*******************************************************************/
411
+
412
+ void Transducer::enumerate_paths_node( Node *node, vector<Label> &path,
413
+ NodeHashSet &previous,
414
+ vector<Transducer*> &result )
415
+ {
416
+ if (node->is_final())
417
+ result.push_back(new Transducer(path));
418
+
419
+ for( ArcsIter it(node->arcs()); it; it++ ) {
420
+ Arc *arc=it;
421
+
422
+ NodeHashSet::iterator it=previous.insert(node).first;
423
+ path.push_back(arc->label());
424
+ enumerate_paths_node( arc->target_node(), path, previous, result );
425
+ path.pop_back();
426
+ previous.erase(it);
427
+ }
428
+ }
429
+
430
+
431
+ /*******************************************************************/
432
+ /* */
433
+ /* Transducer::enumerate_paths */
434
+ /* */
435
+ /*******************************************************************/
436
+
437
+ bool Transducer::enumerate_paths( vector<Transducer*> &result )
438
+
439
+ {
440
+ if (is_infinitely_ambiguous())
441
+ return true;
442
+ for( size_t i=0; i<result.size(); i++ )
443
+ delete result[i];
444
+ result.clear();
445
+
446
+ vector<Label> path;
447
+ NodeHashSet previous;
448
+ enumerate_paths_node( root_node(), path, previous, result );
449
+ return false;
450
+ }
451
+
452
+
453
+
454
+
455
+ /*******************************************************************/
456
+ /* */
457
+ /* Transducer::print_strings_node */
458
+ /* */
459
+ /*******************************************************************/
460
+
461
+ int Transducer::print_strings_node(Node *node, char *buffer, int pos,
462
+ FILE *file, bool with_brackets )
463
+ {
464
+ int result = 0;
465
+
466
+ if (node->was_visited( vmark )) {
467
+ if (node->forward() != NULL) { // cycle detected
468
+ cerr << "Warning: cyclic analyses (cycle aborted)\n";
469
+ return 0;
470
+ }
471
+ node->set_forward(node); // used like a flag for loop detection
472
+ }
473
+ if (pos == BUFFER_SIZE)
474
+ throw "Output string in function print_strings_node is too long";
475
+ if (node->is_final()) {
476
+ buffer[pos] = '\0';
477
+ fprintf(file,"%s\n", buffer);
478
+ result = 1;
479
+ }
480
+ for( ArcsIter i(node->arcs()); i; i++ ) {
481
+ int p=pos;
482
+ Arc *arc=i;
483
+ Label l=arc->label();
484
+ alphabet.write_label(l, buffer, &p, with_brackets);
485
+ result |= print_strings_node(arc->target_node(), buffer, p,
486
+ file, with_brackets );
487
+ }
488
+ node->set_forward(NULL);
489
+
490
+ return result;
491
+ }
492
+
493
+
494
+ /*******************************************************************/
495
+ /* */
496
+ /* Transducer::print_strings */
497
+ /* */
498
+ /*******************************************************************/
499
+
500
+ int Transducer::print_strings( FILE *file, bool with_brackets )
501
+
502
+ {
503
+ char buffer[BUFFER_SIZE];
504
+ incr_vmark();
505
+ return print_strings_node( root_node(), buffer, 0, file, with_brackets );
506
+ }
507
+
508
+
509
+ /*******************************************************************/
510
+ /* */
511
+ /* Transducer::analyze_string */
512
+ /* */
513
+ /*******************************************************************/
514
+
515
+ bool Transducer::analyze_string( char *string, FILE *file, bool with_brackets )
516
+
517
+ {
518
+ Transducer a1(string, &alphabet, false);
519
+ Transducer *a2=&(*this || a1);
520
+ Transducer *a3=&(a2->lower_level());
521
+ delete a2;
522
+ a2 = &a3->minimise();
523
+ delete a3;
524
+
525
+ a2->alphabet.copy(alphabet);
526
+ bool result = a2->print_strings( file, with_brackets );
527
+ delete a2;
528
+ return result;
529
+ }
530
+
531
+
532
+ /*******************************************************************/
533
+ /* */
534
+ /* Transducer::generate_string */
535
+ /* */
536
+ /*******************************************************************/
537
+
538
+ bool Transducer::generate_string( char *string, FILE *file, bool with_brackets)
539
+
540
+ {
541
+ Transducer a1(string, &alphabet, false);
542
+ Transducer *a2=&(a1 || *this);
543
+ Transducer *a3=&(a2->upper_level());
544
+ delete a2;
545
+ a2 = &a3->minimise();
546
+ delete a3;
547
+
548
+ a2->alphabet.copy(alphabet);
549
+ bool result = a2->print_strings( file, with_brackets );
550
+ delete a2;
551
+ return result;
552
+ }
553
+
554
+
555
+ /*******************************************************************/
556
+ /* */
557
+ /* complete */
558
+ /* */
559
+ /*******************************************************************/
560
+
561
+ static void complete( Node *node, Alphabet &alphabet, int vmark)
562
+
563
+ {
564
+ if (node->was_visited( vmark ))
565
+ return;
566
+ for( ArcsIter p(node->arcs()); p; p++ ) {
567
+ Arc *arc=p;
568
+ if (!arc->label().is_epsilon())
569
+ alphabet.insert(arc->label());
570
+ complete(arc->target_node(), alphabet, vmark);
571
+ }
572
+ }
573
+
574
+
575
+ /*******************************************************************/
576
+ /* */
577
+ /* Transducer::complete_alphabet */
578
+ /* */
579
+ /*******************************************************************/
580
+
581
+ void Transducer::complete_alphabet()
582
+
583
+ {
584
+ incr_vmark();
585
+ complete(root_node(), alphabet, vmark);
586
+ }
587
+
588
+
589
+ /*******************************************************************/
590
+ /* */
591
+ /* print_node */
592
+ /* */
593
+ /*******************************************************************/
594
+
595
+ static void print_node( ostream &s, Node *node, NodeNumbering &index,
596
+ long vmark, Alphabet &abc )
597
+
598
+ {
599
+ if (!node->was_visited( vmark )) {
600
+ Arcs *arcs=node->arcs();
601
+ if (node->is_final())
602
+ s << "final\t" << index[node] << "\n";
603
+ for( ArcsIter p(arcs); p; p++ ) {
604
+ Arc *arc=p;
605
+ s << index[node] << "\t";
606
+ s << abc.write_label(arc->label()) << "\t";
607
+ s << index[arc->target_node()] << "\n";
608
+ }
609
+ for( ArcsIter p(arcs); p; p++ ) {
610
+ Arc *arc=p;
611
+ print_node( s, arc->target_node(), index, vmark, abc );
612
+ }
613
+ }
614
+ }
615
+
616
+
617
+ /*******************************************************************/
618
+ /* */
619
+ /* operator<< */
620
+ /* */
621
+ /*******************************************************************/
622
+
623
+ ostream &operator<<( ostream &s, Transducer &a )
624
+
625
+ {
626
+ NodeNumbering index(a);
627
+ a.incr_vmark();
628
+ print_node( s, a.root_node(), index, a.vmark, a.alphabet );
629
+ return s;
630
+ }
631
+
632
+
633
+ /*******************************************************************/
634
+ /* */
635
+ /* store_node_info */
636
+ /* */
637
+ /*******************************************************************/
638
+
639
+ static void store_node_info( FILE *file, Node *node )
640
+
641
+ {
642
+ // write final flag
643
+ char c=node->is_final();
644
+ fwrite(&c,sizeof(c),1,file);
645
+
646
+ // write the number of arcs
647
+ int nn = node->arcs()->size();
648
+ if (nn > 65535)
649
+ throw "Error: in function store_node\n";
650
+ unsigned short n=(unsigned short)nn;
651
+ fwrite(&n,sizeof(n),1,file);
652
+ }
653
+
654
+
655
+ /*******************************************************************/
656
+ /* */
657
+ /* store_arc_label */
658
+ /* */
659
+ /*******************************************************************/
660
+
661
+ static void store_arc_label( FILE *file, Arc *arc )
662
+
663
+ {
664
+ Label l=arc->label();
665
+ Character lc=l.lower_char();
666
+ Character uc=l.upper_char();
667
+ fwrite(&lc,sizeof(lc),1,file);
668
+ fwrite(&uc,sizeof(uc),1,file);
669
+ }
670
+
671
+
672
+ /*******************************************************************/
673
+ /* */
674
+ /* store_node */
675
+ /* */
676
+ /*******************************************************************/
677
+
678
+ static void store_node( FILE *file, Node *node, NodeNumbering &index,
679
+ long vmark )
680
+ {
681
+ if (!node->was_visited( vmark )) {
682
+
683
+ store_node_info( file, node );
684
+
685
+ // write the arcs
686
+ for( ArcsIter p(node->arcs()); p; p++ ) {
687
+ Arc *arc=p;
688
+ store_arc_label( file, arc );
689
+ unsigned int t=index[arc->target_node()];
690
+ fwrite(&t,sizeof(t),1,file);
691
+ store_node(file, arc->target_node(), index, vmark );
692
+ }
693
+ }
694
+ }
695
+
696
+
697
+ /*******************************************************************/
698
+ /* */
699
+ /* store_lowmem_node */
700
+ /* */
701
+ /*******************************************************************/
702
+
703
+ static void store_lowmem_node( FILE *file, Node *node, NodeNumbering &index,
704
+ vector<unsigned int> &startpos)
705
+ {
706
+ store_node_info( file, node );
707
+
708
+ // write the arcs
709
+ for( ArcsIter p(node->arcs()); p; p++ ) {
710
+ Arc *arc=p;
711
+ store_arc_label( file, arc );
712
+ unsigned int t=startpos[index[arc->target_node()]];
713
+ fwrite(&t,sizeof(t),1,file);
714
+ }
715
+ }
716
+
717
+
718
+ /*******************************************************************/
719
+ /* */
720
+ /* Transducer::store_lowmem */
721
+ /* */
722
+ /*******************************************************************/
723
+
724
+ void Transducer::store_lowmem( FILE *file )
725
+
726
+ {
727
+ fputc('l',file);
728
+ alphabet.store(file);
729
+
730
+ // storing size of index table
731
+ NodeNumbering index(*this);
732
+
733
+ // compute the start position of the first node
734
+ unsigned int pos=(unsigned int)ftell(file);
735
+ vector<unsigned int> startpos;
736
+ for( size_t i=0; i<index.number_of_nodes(); i++ ) {
737
+ startpos.push_back(pos);
738
+ Node *node=index.get_node(i);
739
+ Arcs *arcs=node->arcs();
740
+ pos += sizeof(char) // size of final flag
741
+ + sizeof(unsigned short) // size of number of arcs
742
+ + arcs->size() * (sizeof(Character) * 2 + sizeof(unsigned int)); // size of n arcs
743
+ }
744
+
745
+ // storing nodes
746
+ for( size_t i=0; i<index.number_of_nodes(); i++ )
747
+ store_lowmem_node( file, index.get_node(i), index, startpos );
748
+ }
749
+
750
+
751
+ /*******************************************************************/
752
+ /* */
753
+ /* Transducer::store */
754
+ /* */
755
+ /*******************************************************************/
756
+
757
+ void Transducer::store( FILE *file )
758
+
759
+ {
760
+ fputc('a',file);
761
+
762
+ NodeNumbering index(*this);
763
+ incr_vmark();
764
+ unsigned int n=index.number_of_nodes();
765
+ fwrite(&n,sizeof(n),1,file);
766
+ store_node( file, root_node(), index, vmark );
767
+
768
+ alphabet.store(file);
769
+ }
770
+
771
+
772
+ /*******************************************************************/
773
+ /* */
774
+ /* read_node */
775
+ /* */
776
+ /*******************************************************************/
777
+
778
+ static void read_node( FILE *file, Node *node, Node **p, Transducer *a )
779
+ {
780
+ char c;
781
+ fread(&c,sizeof(c),1,file);
782
+ node->set_final(c);
783
+
784
+ unsigned short n;
785
+ fread( &n, sizeof(n), 1, file);
786
+
787
+ for( int i=0; i<n; i++ ) {
788
+ Character lc,uc;
789
+ unsigned int t;
790
+ fread(&lc,sizeof(lc),1,file);
791
+ fread(&uc,sizeof(uc),1,file);
792
+ fread(&t,sizeof(t),1,file);
793
+ if (ferror(file))
794
+ throw "Error encountered while reading transducer from file";
795
+ if (p[t])
796
+ node->add_arc( Label(lc,uc), p[t], a );
797
+ else {
798
+ p[t] = a->new_node();
799
+ node->add_arc( Label(lc,uc), p[t], a );
800
+ read_node(file, p[t], p, a );
801
+ }
802
+ }
803
+ }
804
+
805
+
806
+ /*******************************************************************/
807
+ /* */
808
+ /* Transducer::read_transducer_binary */
809
+ /* */
810
+ /*******************************************************************/
811
+
812
+ void Transducer::read_transducer_binary( FILE *file )
813
+
814
+ {
815
+ if (fgetc(file) != 'a')
816
+ throw "Error: wrong file format (not a standard transducer)\n";
817
+
818
+ vmark = deterministic = 0;
819
+ unsigned int n;
820
+ fread(&n,sizeof(n),1,file); // number of nodes
821
+ if (ferror(file))
822
+ throw "Error encountered while reading transducer from file";
823
+
824
+ Node **p=new Node*[n]; // maps indices to nodes
825
+ p[0] = root_node();
826
+ for( unsigned int i=1; i<n; i++)
827
+ p[i] = NULL;
828
+ read_node( file, root_node(), p, this );
829
+ delete[] p;
830
+
831
+ alphabet.read(file);
832
+
833
+ vmark = 1;
834
+ deterministic = minimised = 1;
835
+ }
836
+
837
+
838
+ /*******************************************************************/
839
+ /* */
840
+ /* error_message */
841
+ /* */
842
+ /*******************************************************************/
843
+
844
+ static void error_message( size_t line )
845
+
846
+ {
847
+ static char message[1000];
848
+ sprintf(message, "Error: in line %u of text transducer file",
849
+ (unsigned int)line);
850
+ throw message;
851
+ }
852
+
853
+
854
+ /*******************************************************************/
855
+ /* */
856
+ /* Transducer::create_node */
857
+ /* */
858
+ /*******************************************************************/
859
+
860
+ Node *Transducer::create_node( vector<Node*> &node, char *s, size_t line )
861
+
862
+ {
863
+ char *p;
864
+ long n = strtol(s, &p, 10);
865
+
866
+ if (s == p || n < 0)
867
+ error_message( line );
868
+ if ((long)node.size() <= n)
869
+ node.resize(n+1, NULL);
870
+ if (node[n] == NULL)
871
+ node[n] = new Node;
872
+
873
+ return node[n];
874
+ }
875
+
876
+
877
+ /*******************************************************************/
878
+ /* */
879
+ /* next_string */
880
+ /* */
881
+ /*******************************************************************/
882
+
883
+ static char *next_string( char* &s, size_t line )
884
+
885
+ {
886
+ // scan the input up to the next tab or newline character
887
+ // and unquote symbols preceded by a backslash
888
+ char *p = s;
889
+ char *q = s;
890
+ while (*q!=0 && *q!='\t' && *q!='\n' && *q!='\r') {
891
+ if (*q == '\\')
892
+ q++;
893
+ *(p++) = *(q++);
894
+ }
895
+ if (p == s)
896
+ error_message(line); // no string found
897
+
898
+ char *result=s;
899
+ // skip over following whitespace
900
+ while (*q == ' ' || *q == '\t' || *q == '\n' || *q == '\r')
901
+ q++;
902
+
903
+ if (*q == 0)
904
+ s = NULL; // end of string was reached
905
+ else
906
+ s = q; // move the string pointer s
907
+
908
+ *p = 0; // mark the end of the result string
909
+
910
+ return result;
911
+ }
912
+
913
+
914
+ /*******************************************************************/
915
+ /* */
916
+ /* Transducer::read_transducer_text */
917
+ /* */
918
+ /*******************************************************************/
919
+
920
+ void Transducer::read_transducer_text( FILE *file )
921
+
922
+ {
923
+ vector<Node*> nodes;
924
+ nodes.push_back(root_node());
925
+
926
+ vmark = deterministic = 0;
927
+ char buffer[10000];
928
+ for( size_t line=0; fgets(buffer, 10000, file ); line++ ) {
929
+ char *p = buffer;
930
+ char *s = next_string(p, line);
931
+ if (strcmp(s, "final") == 0) {
932
+ s = next_string(p, line);
933
+ create_node( nodes, s, line )->set_final(true);
934
+ }
935
+ else {
936
+ Node *node = create_node( nodes, s, line );
937
+ s = next_string(p, line);
938
+ Label l = alphabet.next_label( s, 2 );
939
+ if (*s != 0 || l == Label::epsilon)
940
+ error_message( line );
941
+ s = next_string(p, line);
942
+ Node *target = create_node( nodes, s, line );
943
+ node->add_arc( l, target, this );
944
+ }
945
+ }
946
+
947
+ vmark = 1;
948
+ deterministic = minimised = 1;
949
+ }
950
+
951
+
952
+ /*******************************************************************/
953
+ /* */
954
+ /* Transducer::Transducer */
955
+ /* */
956
+ /*******************************************************************/
957
+
958
+ Transducer::Transducer( FILE *file, bool binary )
959
+
960
+ {
961
+ if (binary)
962
+ read_transducer_binary( file );
963
+ else
964
+ read_transducer_text( file );
965
+ }
966
+