ruby-sfst 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,966 @@
1
+
2
+ /*******************************************************************/
3
+ /* */
4
+ /* FILE fst.C */
5
+ /* MODULE fst */
6
+ /* PROGRAM SFST */
7
+ /* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
8
+ /* */
9
+ /* PURPOSE basic FST functions */
10
+ /* */
11
+ /*******************************************************************/
12
+
13
+ #include "fst.h"
14
+
15
+ using std::vector;
16
+ using std::istream;
17
+ using std::ostream;
18
+ using std::cerr;
19
+
20
+ const int BUFFER_SIZE=100000;
21
+
22
+
23
+ /*******************************************************************/
24
+ /* */
25
+ /* Arcs::size */
26
+ /* */
27
+ /*******************************************************************/
28
+
29
+ int Arcs::size() const
30
+
31
+ {
32
+ int n=0;
33
+ for( Arc *p=first_arcp; p; p=p->next ) n++;
34
+ for( Arc *p=first_epsilon_arcp; p; p=p->next ) n++;
35
+ return n;
36
+ }
37
+
38
+
39
+ /*******************************************************************/
40
+ /* */
41
+ /* Arcs::target_node */
42
+ /* */
43
+ /*******************************************************************/
44
+
45
+ Node *Arcs::target_node( Label l )
46
+
47
+ {
48
+ Arc *arc;
49
+
50
+ for( arc=first_arcp; arc; arc=arc->next)
51
+ if (arc->label() == l)
52
+ return arc->target_node();
53
+
54
+ return NULL;
55
+ }
56
+
57
+ const Node *Arcs::target_node( Label l ) const
58
+
59
+ {
60
+ const Arc *arc;
61
+
62
+ for( arc=first_arcp; arc; arc=arc->next)
63
+ if (arc->label() == l)
64
+ return arc->target_node();
65
+
66
+ return NULL;
67
+ }
68
+
69
+
70
+ /*******************************************************************/
71
+ /* */
72
+ /* Arcs::add_arc */
73
+ /* */
74
+ /*******************************************************************/
75
+
76
+ void Arcs::add_arc( Label l, Node *node, Transducer *a )
77
+
78
+ {
79
+ Arc *arc=a->new_arc( l, node );
80
+
81
+ if (l.is_epsilon()) {
82
+ arc->next = first_epsilon_arcp;
83
+ first_epsilon_arcp = arc;
84
+ }
85
+ else {
86
+ arc->next = first_arcp;
87
+ first_arcp = arc;
88
+ }
89
+ }
90
+
91
+
92
+ /*******************************************************************/
93
+ /* */
94
+ /* Arcs::remove_arc */
95
+ /* */
96
+ /*******************************************************************/
97
+
98
+ int Arcs::remove_arc( Arc *arc )
99
+
100
+ {
101
+ Arc **p = (arc->label().is_epsilon()) ? &first_epsilon_arcp : &first_arcp;
102
+ for( ; *p; p=&(*p)->next )
103
+ if (*p == arc) {
104
+ *p = arc->next;
105
+ return 1;
106
+ }
107
+ return 0;
108
+ }
109
+
110
+
111
+ /*******************************************************************/
112
+ /* */
113
+ /* Node::init */
114
+ /* */
115
+ /*******************************************************************/
116
+
117
+ void Node::init()
118
+
119
+ {
120
+ final = false;
121
+ visited = 0;
122
+ arcsp.init();
123
+ forwardp = NULL;
124
+ }
125
+
126
+
127
+ /*******************************************************************/
128
+ /* */
129
+ /* NodeNumbering::number_node */
130
+ /* */
131
+ /*******************************************************************/
132
+
133
+ void NodeNumbering::number_node( Node *node, Transducer &a )
134
+
135
+ {
136
+ if (!node->was_visited( a.vmark )) {
137
+ nummap[node] = nodes.size();
138
+ nodes.push_back(node);
139
+ for( ArcsIter p(node->arcs()); p; p++ ) {
140
+ Arc *arc=p;
141
+ number_node( arc->target_node(), a );
142
+ }
143
+ }
144
+ }
145
+
146
+
147
+ /*******************************************************************/
148
+ /* */
149
+ /* NodeNumbering::NodeNumbering */
150
+ /* */
151
+ /*******************************************************************/
152
+
153
+ NodeNumbering::NodeNumbering( Transducer &a )
154
+
155
+ {
156
+ a.incr_vmark();
157
+ number_node( a.root_node(), a );
158
+ }
159
+
160
+
161
+ /*******************************************************************/
162
+ /* */
163
+ /* Transducer::new_node */
164
+ /* */
165
+ /*******************************************************************/
166
+
167
+ Node *Transducer::new_node()
168
+
169
+ {
170
+ Node *node=(Node*)mem.alloc( sizeof(Node) );
171
+
172
+ node->init();
173
+ return node;
174
+ }
175
+
176
+
177
+ /*******************************************************************/
178
+ /* */
179
+ /* Transducer::new_arc */
180
+ /* */
181
+ /*******************************************************************/
182
+
183
+ Arc *Transducer::new_arc( Label l, Node *target )
184
+
185
+ {
186
+ Arc *arc=(Arc*)mem.alloc( sizeof(Arc) );
187
+
188
+ arc->init( l, target);
189
+ return arc;
190
+ }
191
+
192
+
193
+ /*******************************************************************/
194
+ /* */
195
+ /* Transducer::add_string */
196
+ /* */
197
+ /*******************************************************************/
198
+
199
+ void Transducer::add_string( char *s, bool extended )
200
+
201
+ {
202
+ Node *node=root_node();
203
+
204
+ Label l;
205
+ while (!(l = alphabet.next_label(s, extended)).is_epsilon()) {
206
+ alphabet.insert(l);
207
+ Arcs *arcs=node->arcs();
208
+ node = arcs->target_node( l );
209
+ if (node == NULL) {
210
+ node = new_node();
211
+ arcs->add_arc( l, node, this );
212
+ }
213
+ }
214
+ node->set_final(1);
215
+ }
216
+
217
+
218
+ /*******************************************************************/
219
+ /* */
220
+ /* Transducer::Transducer */
221
+ /* */
222
+ /*******************************************************************/
223
+
224
+ Transducer::Transducer( vector<Label> &path )
225
+ : root(), mem()
226
+ {
227
+ Node *node=root_node();
228
+
229
+ vmark = 0;
230
+ deterministic = minimised = true;
231
+ for( size_t i=0; i<path.size(); i++ ) {
232
+ Arcs *arcs=node->arcs();
233
+ node = new_node();
234
+ arcs->add_arc( path[i], node, this );
235
+ }
236
+ node->set_final(1);
237
+ }
238
+
239
+
240
+ /*******************************************************************/
241
+ /* */
242
+ /* Transducer::Transducer */
243
+ /* */
244
+ /*******************************************************************/
245
+
246
+ Transducer::Transducer( istream &is, const Alphabet *a, bool verbose )
247
+ : root(), mem()
248
+ {
249
+ bool extended=false;
250
+ int n=0;
251
+ char buffer[10000];
252
+
253
+ vmark = 0;
254
+ deterministic = true;
255
+ minimised = false;
256
+ if (a) {
257
+ alphabet.copy(*a);
258
+ extended = true;
259
+ }
260
+ while (is.getline(buffer, 10000)) {
261
+ if (verbose && ++n % 10000 == 0) {
262
+ if (n == 10000)
263
+ cerr << "\n";
264
+ cerr << "\r" << n << " words";
265
+ }
266
+ // delete final whitespace characters
267
+ int l;
268
+ for( l=strlen(buffer)-1; l>=0; l-- )
269
+ if ((buffer[l] != ' ' && buffer[l] != '\t' && buffer[l] != '\r') ||
270
+ (l > 0 && buffer[l-1] == '\\'))
271
+ break;
272
+ buffer[l+1] = 0;
273
+
274
+ add_string(buffer,extended);
275
+ }
276
+ if (verbose && n >= 10000)
277
+ cerr << "\n";
278
+ }
279
+
280
+
281
+ /*******************************************************************/
282
+ /* */
283
+ /* Transducer::Transducer */
284
+ /* */
285
+ /*******************************************************************/
286
+
287
+ Transducer::Transducer( char *s, const Alphabet *a, bool extended )
288
+ : root(), mem()
289
+ {
290
+ vmark = 0;
291
+ deterministic = minimised = true;
292
+ if (a)
293
+ alphabet.copy(*a);
294
+ add_string(s, extended);
295
+ }
296
+
297
+
298
+ /*******************************************************************/
299
+ /* */
300
+ /* Transducer::clear */
301
+ /* */
302
+ /*******************************************************************/
303
+
304
+ void Transducer::clear()
305
+
306
+ {
307
+ vmark = 0;
308
+ deterministic = minimised = false;
309
+ root.init();
310
+ mem.clear();
311
+ alphabet.clear();
312
+ }
313
+
314
+
315
+ /*******************************************************************/
316
+ /* */
317
+ /* Transducer::store_symbols */
318
+ /* */
319
+ /*******************************************************************/
320
+
321
+ void Transducer::store_symbols(Node *node, SymbolMap &symbol, LabelSet &labels)
322
+
323
+ {
324
+ if (!node->was_visited( vmark )) {
325
+ Arcs *arcs=node->arcs();
326
+ for( ArcsIter p(arcs); p; p++ ) {
327
+ Arc *arc=p;
328
+ Label l=arc->label();
329
+
330
+ labels.insert(l);
331
+
332
+ Character c = l.upper_char();
333
+ if (symbol.find(c) == symbol.end()) {
334
+ const char *s = alphabet.code2symbol(c);
335
+ if (s)
336
+ symbol[c] = fst_strdup(s);
337
+ }
338
+
339
+ c = l.lower_char();
340
+ if (symbol.find(c) == symbol.end()) {
341
+ const char *s = alphabet.code2symbol(c);
342
+ if (s)
343
+ symbol[c] = fst_strdup(s);
344
+ }
345
+
346
+ store_symbols( arc->target_node(), symbol, labels );
347
+ }
348
+ }
349
+ }
350
+
351
+
352
+ /*******************************************************************/
353
+ /* */
354
+ /* Transducer::minimise_alphabet */
355
+ /* */
356
+ /*******************************************************************/
357
+
358
+ void Transducer::minimise_alphabet()
359
+
360
+ {
361
+ SymbolMap symbols;
362
+ LabelSet labels;
363
+ incr_vmark();
364
+ store_symbols(root_node(), symbols, labels);
365
+ alphabet.clear();
366
+ for( SymbolMap::iterator it=symbols.begin(); it!=symbols.end(); it++ ) {
367
+ alphabet.add_symbol( it->second, it->first );
368
+ free(it->second);
369
+ }
370
+ for( LabelSet::iterator it=labels.begin(); it!=labels.end(); it++ )
371
+ alphabet.insert(*it);
372
+ }
373
+
374
+
375
+ /*******************************************************************/
376
+ /* */
377
+ /* Transducer::minimise */
378
+ /* */
379
+ /*******************************************************************/
380
+
381
+ Transducer &Transducer::minimise( bool verbose )
382
+
383
+ {
384
+ if (minimised)
385
+ return copy();
386
+
387
+ Transducer *a1, *a2;
388
+
389
+ a1 = &reverse();
390
+ a2 = &a1->determinise();
391
+ delete a1;
392
+
393
+ a1 = &a2->reverse();
394
+ delete a2;
395
+
396
+ a2 = &a1->determinise();
397
+ delete a1;
398
+
399
+ a2->minimised = true;
400
+ a2->minimise_alphabet();
401
+
402
+ return *a2;
403
+ }
404
+
405
+
406
+ /*******************************************************************/
407
+ /* */
408
+ /* Transducer::enumerate_paths_node */
409
+ /* */
410
+ /*******************************************************************/
411
+
412
+ void Transducer::enumerate_paths_node( Node *node, vector<Label> &path,
413
+ NodeHashSet &previous,
414
+ vector<Transducer*> &result )
415
+ {
416
+ if (node->is_final())
417
+ result.push_back(new Transducer(path));
418
+
419
+ for( ArcsIter it(node->arcs()); it; it++ ) {
420
+ Arc *arc=it;
421
+
422
+ NodeHashSet::iterator it=previous.insert(node).first;
423
+ path.push_back(arc->label());
424
+ enumerate_paths_node( arc->target_node(), path, previous, result );
425
+ path.pop_back();
426
+ previous.erase(it);
427
+ }
428
+ }
429
+
430
+
431
+ /*******************************************************************/
432
+ /* */
433
+ /* Transducer::enumerate_paths */
434
+ /* */
435
+ /*******************************************************************/
436
+
437
+ bool Transducer::enumerate_paths( vector<Transducer*> &result )
438
+
439
+ {
440
+ if (is_infinitely_ambiguous())
441
+ return true;
442
+ for( size_t i=0; i<result.size(); i++ )
443
+ delete result[i];
444
+ result.clear();
445
+
446
+ vector<Label> path;
447
+ NodeHashSet previous;
448
+ enumerate_paths_node( root_node(), path, previous, result );
449
+ return false;
450
+ }
451
+
452
+
453
+
454
+
455
+ /*******************************************************************/
456
+ /* */
457
+ /* Transducer::print_strings_node */
458
+ /* */
459
+ /*******************************************************************/
460
+
461
+ int Transducer::print_strings_node(Node *node, char *buffer, int pos,
462
+ FILE *file, bool with_brackets )
463
+ {
464
+ int result = 0;
465
+
466
+ if (node->was_visited( vmark )) {
467
+ if (node->forward() != NULL) { // cycle detected
468
+ cerr << "Warning: cyclic analyses (cycle aborted)\n";
469
+ return 0;
470
+ }
471
+ node->set_forward(node); // used like a flag for loop detection
472
+ }
473
+ if (pos == BUFFER_SIZE)
474
+ throw "Output string in function print_strings_node is too long";
475
+ if (node->is_final()) {
476
+ buffer[pos] = '\0';
477
+ fprintf(file,"%s\n", buffer);
478
+ result = 1;
479
+ }
480
+ for( ArcsIter i(node->arcs()); i; i++ ) {
481
+ int p=pos;
482
+ Arc *arc=i;
483
+ Label l=arc->label();
484
+ alphabet.write_label(l, buffer, &p, with_brackets);
485
+ result |= print_strings_node(arc->target_node(), buffer, p,
486
+ file, with_brackets );
487
+ }
488
+ node->set_forward(NULL);
489
+
490
+ return result;
491
+ }
492
+
493
+
494
+ /*******************************************************************/
495
+ /* */
496
+ /* Transducer::print_strings */
497
+ /* */
498
+ /*******************************************************************/
499
+
500
+ int Transducer::print_strings( FILE *file, bool with_brackets )
501
+
502
+ {
503
+ char buffer[BUFFER_SIZE];
504
+ incr_vmark();
505
+ return print_strings_node( root_node(), buffer, 0, file, with_brackets );
506
+ }
507
+
508
+
509
+ /*******************************************************************/
510
+ /* */
511
+ /* Transducer::analyze_string */
512
+ /* */
513
+ /*******************************************************************/
514
+
515
+ bool Transducer::analyze_string( char *string, FILE *file, bool with_brackets )
516
+
517
+ {
518
+ Transducer a1(string, &alphabet, false);
519
+ Transducer *a2=&(*this || a1);
520
+ Transducer *a3=&(a2->lower_level());
521
+ delete a2;
522
+ a2 = &a3->minimise();
523
+ delete a3;
524
+
525
+ a2->alphabet.copy(alphabet);
526
+ bool result = a2->print_strings( file, with_brackets );
527
+ delete a2;
528
+ return result;
529
+ }
530
+
531
+
532
+ /*******************************************************************/
533
+ /* */
534
+ /* Transducer::generate_string */
535
+ /* */
536
+ /*******************************************************************/
537
+
538
+ bool Transducer::generate_string( char *string, FILE *file, bool with_brackets)
539
+
540
+ {
541
+ Transducer a1(string, &alphabet, false);
542
+ Transducer *a2=&(a1 || *this);
543
+ Transducer *a3=&(a2->upper_level());
544
+ delete a2;
545
+ a2 = &a3->minimise();
546
+ delete a3;
547
+
548
+ a2->alphabet.copy(alphabet);
549
+ bool result = a2->print_strings( file, with_brackets );
550
+ delete a2;
551
+ return result;
552
+ }
553
+
554
+
555
+ /*******************************************************************/
556
+ /* */
557
+ /* complete */
558
+ /* */
559
+ /*******************************************************************/
560
+
561
+ static void complete( Node *node, Alphabet &alphabet, int vmark)
562
+
563
+ {
564
+ if (node->was_visited( vmark ))
565
+ return;
566
+ for( ArcsIter p(node->arcs()); p; p++ ) {
567
+ Arc *arc=p;
568
+ if (!arc->label().is_epsilon())
569
+ alphabet.insert(arc->label());
570
+ complete(arc->target_node(), alphabet, vmark);
571
+ }
572
+ }
573
+
574
+
575
+ /*******************************************************************/
576
+ /* */
577
+ /* Transducer::complete_alphabet */
578
+ /* */
579
+ /*******************************************************************/
580
+
581
+ void Transducer::complete_alphabet()
582
+
583
+ {
584
+ incr_vmark();
585
+ complete(root_node(), alphabet, vmark);
586
+ }
587
+
588
+
589
+ /*******************************************************************/
590
+ /* */
591
+ /* print_node */
592
+ /* */
593
+ /*******************************************************************/
594
+
595
+ static void print_node( ostream &s, Node *node, NodeNumbering &index,
596
+ long vmark, Alphabet &abc )
597
+
598
+ {
599
+ if (!node->was_visited( vmark )) {
600
+ Arcs *arcs=node->arcs();
601
+ if (node->is_final())
602
+ s << "final\t" << index[node] << "\n";
603
+ for( ArcsIter p(arcs); p; p++ ) {
604
+ Arc *arc=p;
605
+ s << index[node] << "\t";
606
+ s << abc.write_label(arc->label()) << "\t";
607
+ s << index[arc->target_node()] << "\n";
608
+ }
609
+ for( ArcsIter p(arcs); p; p++ ) {
610
+ Arc *arc=p;
611
+ print_node( s, arc->target_node(), index, vmark, abc );
612
+ }
613
+ }
614
+ }
615
+
616
+
617
+ /*******************************************************************/
618
+ /* */
619
+ /* operator<< */
620
+ /* */
621
+ /*******************************************************************/
622
+
623
+ ostream &operator<<( ostream &s, Transducer &a )
624
+
625
+ {
626
+ NodeNumbering index(a);
627
+ a.incr_vmark();
628
+ print_node( s, a.root_node(), index, a.vmark, a.alphabet );
629
+ return s;
630
+ }
631
+
632
+
633
+ /*******************************************************************/
634
+ /* */
635
+ /* store_node_info */
636
+ /* */
637
+ /*******************************************************************/
638
+
639
+ static void store_node_info( FILE *file, Node *node )
640
+
641
+ {
642
+ // write final flag
643
+ char c=node->is_final();
644
+ fwrite(&c,sizeof(c),1,file);
645
+
646
+ // write the number of arcs
647
+ int nn = node->arcs()->size();
648
+ if (nn > 65535)
649
+ throw "Error: in function store_node\n";
650
+ unsigned short n=(unsigned short)nn;
651
+ fwrite(&n,sizeof(n),1,file);
652
+ }
653
+
654
+
655
+ /*******************************************************************/
656
+ /* */
657
+ /* store_arc_label */
658
+ /* */
659
+ /*******************************************************************/
660
+
661
+ static void store_arc_label( FILE *file, Arc *arc )
662
+
663
+ {
664
+ Label l=arc->label();
665
+ Character lc=l.lower_char();
666
+ Character uc=l.upper_char();
667
+ fwrite(&lc,sizeof(lc),1,file);
668
+ fwrite(&uc,sizeof(uc),1,file);
669
+ }
670
+
671
+
672
+ /*******************************************************************/
673
+ /* */
674
+ /* store_node */
675
+ /* */
676
+ /*******************************************************************/
677
+
678
+ static void store_node( FILE *file, Node *node, NodeNumbering &index,
679
+ long vmark )
680
+ {
681
+ if (!node->was_visited( vmark )) {
682
+
683
+ store_node_info( file, node );
684
+
685
+ // write the arcs
686
+ for( ArcsIter p(node->arcs()); p; p++ ) {
687
+ Arc *arc=p;
688
+ store_arc_label( file, arc );
689
+ unsigned int t=index[arc->target_node()];
690
+ fwrite(&t,sizeof(t),1,file);
691
+ store_node(file, arc->target_node(), index, vmark );
692
+ }
693
+ }
694
+ }
695
+
696
+
697
+ /*******************************************************************/
698
+ /* */
699
+ /* store_lowmem_node */
700
+ /* */
701
+ /*******************************************************************/
702
+
703
+ static void store_lowmem_node( FILE *file, Node *node, NodeNumbering &index,
704
+ vector<unsigned int> &startpos)
705
+ {
706
+ store_node_info( file, node );
707
+
708
+ // write the arcs
709
+ for( ArcsIter p(node->arcs()); p; p++ ) {
710
+ Arc *arc=p;
711
+ store_arc_label( file, arc );
712
+ unsigned int t=startpos[index[arc->target_node()]];
713
+ fwrite(&t,sizeof(t),1,file);
714
+ }
715
+ }
716
+
717
+
718
+ /*******************************************************************/
719
+ /* */
720
+ /* Transducer::store_lowmem */
721
+ /* */
722
+ /*******************************************************************/
723
+
724
+ void Transducer::store_lowmem( FILE *file )
725
+
726
+ {
727
+ fputc('l',file);
728
+ alphabet.store(file);
729
+
730
+ // storing size of index table
731
+ NodeNumbering index(*this);
732
+
733
+ // compute the start position of the first node
734
+ unsigned int pos=(unsigned int)ftell(file);
735
+ vector<unsigned int> startpos;
736
+ for( size_t i=0; i<index.number_of_nodes(); i++ ) {
737
+ startpos.push_back(pos);
738
+ Node *node=index.get_node(i);
739
+ Arcs *arcs=node->arcs();
740
+ pos += sizeof(char) // size of final flag
741
+ + sizeof(unsigned short) // size of number of arcs
742
+ + arcs->size() * (sizeof(Character) * 2 + sizeof(unsigned int)); // size of n arcs
743
+ }
744
+
745
+ // storing nodes
746
+ for( size_t i=0; i<index.number_of_nodes(); i++ )
747
+ store_lowmem_node( file, index.get_node(i), index, startpos );
748
+ }
749
+
750
+
751
+ /*******************************************************************/
752
+ /* */
753
+ /* Transducer::store */
754
+ /* */
755
+ /*******************************************************************/
756
+
757
+ void Transducer::store( FILE *file )
758
+
759
+ {
760
+ fputc('a',file);
761
+
762
+ NodeNumbering index(*this);
763
+ incr_vmark();
764
+ unsigned int n=index.number_of_nodes();
765
+ fwrite(&n,sizeof(n),1,file);
766
+ store_node( file, root_node(), index, vmark );
767
+
768
+ alphabet.store(file);
769
+ }
770
+
771
+
772
+ /*******************************************************************/
773
+ /* */
774
+ /* read_node */
775
+ /* */
776
+ /*******************************************************************/
777
+
778
+ static void read_node( FILE *file, Node *node, Node **p, Transducer *a )
779
+ {
780
+ char c;
781
+ fread(&c,sizeof(c),1,file);
782
+ node->set_final(c);
783
+
784
+ unsigned short n;
785
+ fread( &n, sizeof(n), 1, file);
786
+
787
+ for( int i=0; i<n; i++ ) {
788
+ Character lc,uc;
789
+ unsigned int t;
790
+ fread(&lc,sizeof(lc),1,file);
791
+ fread(&uc,sizeof(uc),1,file);
792
+ fread(&t,sizeof(t),1,file);
793
+ if (ferror(file))
794
+ throw "Error encountered while reading transducer from file";
795
+ if (p[t])
796
+ node->add_arc( Label(lc,uc), p[t], a );
797
+ else {
798
+ p[t] = a->new_node();
799
+ node->add_arc( Label(lc,uc), p[t], a );
800
+ read_node(file, p[t], p, a );
801
+ }
802
+ }
803
+ }
804
+
805
+
806
+ /*******************************************************************/
807
+ /* */
808
+ /* Transducer::read_transducer_binary */
809
+ /* */
810
+ /*******************************************************************/
811
+
812
+ void Transducer::read_transducer_binary( FILE *file )
813
+
814
+ {
815
+ if (fgetc(file) != 'a')
816
+ throw "Error: wrong file format (not a standard transducer)\n";
817
+
818
+ vmark = deterministic = 0;
819
+ unsigned int n;
820
+ fread(&n,sizeof(n),1,file); // number of nodes
821
+ if (ferror(file))
822
+ throw "Error encountered while reading transducer from file";
823
+
824
+ Node **p=new Node*[n]; // maps indices to nodes
825
+ p[0] = root_node();
826
+ for( unsigned int i=1; i<n; i++)
827
+ p[i] = NULL;
828
+ read_node( file, root_node(), p, this );
829
+ delete[] p;
830
+
831
+ alphabet.read(file);
832
+
833
+ vmark = 1;
834
+ deterministic = minimised = 1;
835
+ }
836
+
837
+
838
+ /*******************************************************************/
839
+ /* */
840
+ /* error_message */
841
+ /* */
842
+ /*******************************************************************/
843
+
844
+ static void error_message( size_t line )
845
+
846
+ {
847
+ static char message[1000];
848
+ sprintf(message, "Error: in line %u of text transducer file",
849
+ (unsigned int)line);
850
+ throw message;
851
+ }
852
+
853
+
854
+ /*******************************************************************/
855
+ /* */
856
+ /* Transducer::create_node */
857
+ /* */
858
+ /*******************************************************************/
859
+
860
+ Node *Transducer::create_node( vector<Node*> &node, char *s, size_t line )
861
+
862
+ {
863
+ char *p;
864
+ long n = strtol(s, &p, 10);
865
+
866
+ if (s == p || n < 0)
867
+ error_message( line );
868
+ if ((long)node.size() <= n)
869
+ node.resize(n+1, NULL);
870
+ if (node[n] == NULL)
871
+ node[n] = new Node;
872
+
873
+ return node[n];
874
+ }
875
+
876
+
877
+ /*******************************************************************/
878
+ /* */
879
+ /* next_string */
880
+ /* */
881
+ /*******************************************************************/
882
+
883
+ static char *next_string( char* &s, size_t line )
884
+
885
+ {
886
+ // scan the input up to the next tab or newline character
887
+ // and unquote symbols preceded by a backslash
888
+ char *p = s;
889
+ char *q = s;
890
+ while (*q!=0 && *q!='\t' && *q!='\n' && *q!='\r') {
891
+ if (*q == '\\')
892
+ q++;
893
+ *(p++) = *(q++);
894
+ }
895
+ if (p == s)
896
+ error_message(line); // no string found
897
+
898
+ char *result=s;
899
+ // skip over following whitespace
900
+ while (*q == ' ' || *q == '\t' || *q == '\n' || *q == '\r')
901
+ q++;
902
+
903
+ if (*q == 0)
904
+ s = NULL; // end of string was reached
905
+ else
906
+ s = q; // move the string pointer s
907
+
908
+ *p = 0; // mark the end of the result string
909
+
910
+ return result;
911
+ }
912
+
913
+
914
+ /*******************************************************************/
915
+ /* */
916
+ /* Transducer::read_transducer_text */
917
+ /* */
918
+ /*******************************************************************/
919
+
920
+ void Transducer::read_transducer_text( FILE *file )
921
+
922
+ {
923
+ vector<Node*> nodes;
924
+ nodes.push_back(root_node());
925
+
926
+ vmark = deterministic = 0;
927
+ char buffer[10000];
928
+ for( size_t line=0; fgets(buffer, 10000, file ); line++ ) {
929
+ char *p = buffer;
930
+ char *s = next_string(p, line);
931
+ if (strcmp(s, "final") == 0) {
932
+ s = next_string(p, line);
933
+ create_node( nodes, s, line )->set_final(true);
934
+ }
935
+ else {
936
+ Node *node = create_node( nodes, s, line );
937
+ s = next_string(p, line);
938
+ Label l = alphabet.next_label( s, 2 );
939
+ if (*s != 0 || l == Label::epsilon)
940
+ error_message( line );
941
+ s = next_string(p, line);
942
+ Node *target = create_node( nodes, s, line );
943
+ node->add_arc( l, target, this );
944
+ }
945
+ }
946
+
947
+ vmark = 1;
948
+ deterministic = minimised = 1;
949
+ }
950
+
951
+
952
+ /*******************************************************************/
953
+ /* */
954
+ /* Transducer::Transducer */
955
+ /* */
956
+ /*******************************************************************/
957
+
958
+ Transducer::Transducer( FILE *file, bool binary )
959
+
960
+ {
961
+ if (binary)
962
+ read_transducer_binary( file );
963
+ else
964
+ read_transducer_text( file );
965
+ }
966
+