ruby-sfst 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG CHANGED
@@ -1,2 +1,3 @@
1
+ v0.2.0. update to SFST 1.3
1
2
  v0.1.1. overflow and g++ 4.3 fixes
2
3
  v0.1.0. initial release
data/Manifest CHANGED
@@ -11,6 +11,7 @@ ext/sfst_machine/utf8-scanner.ll
11
11
  ext/sfst_machine/determinise.C
12
12
  ext/sfst_machine/interface.C
13
13
  ext/sfst_machine/compact.h
14
+ ext/sfst_machine/sgi.h
14
15
  ext/sfst_machine/basic.h
15
16
  ext/sfst_machine/fst.h
16
17
  ext/sfst_machine/make-compact.h
data/README.rdoc CHANGED
@@ -2,6 +2,8 @@
2
2
 
3
3
  A wrapper for the Stuttgart Finite State Transducer Tools (SFST).
4
4
 
5
+ The wrapper is based on SFST 1.3.
6
+
5
7
  == Installation
6
8
 
7
9
  gem install ruby-sfst
data/Rakefile CHANGED
@@ -4,7 +4,7 @@ require 'rake'
4
4
  begin
5
5
  require 'echoe'
6
6
 
7
- Echoe.new('ruby-sfst', '0.1.1') do |p|
7
+ Echoe.new('ruby-sfst', '0.2.0') do |p|
8
8
  p.summary = "A wrapper for the Stuttgart Finite State Transducer Tools (SFST)."
9
9
  p.author = 'Marius L. Jøhndal'
10
10
  p.email = "mariuslj (at) ifi [dot] uio (dot) no"
@@ -10,14 +10,14 @@
10
10
  /* */
11
11
  /*******************************************************************/
12
12
 
13
- #include <limits.h>
13
+ #include <climits>
14
+ #include <cstring>
15
+
14
16
  #include "utf8.h"
15
17
  #include "alphabet.h"
16
18
 
17
19
  using std::vector;
18
20
  using std::ostream;
19
- using __gnu_cxx::hash_map;
20
- using __gnu_cxx::hash_set;
21
21
 
22
22
  const int BUFFER_SIZE=100000;
23
23
 
@@ -425,13 +425,13 @@ ostream &operator<<( ostream &s, const Alphabet &a )
425
425
  /* Alphabet::next_mcsym */
426
426
  /* */
427
427
  /* recognizes multi-character symbols which are enclosed with */
428
- /* angle brackets <...>. If the value of the argument flag is */
429
- /* smaller than 2, the multi-character symbol must be already in */
430
- /* the lexicon in order to be recognized. */
428
+ /* angle brackets <...>. If the argument flag insert is true, */
429
+ /* the multi-character symbol must be already in the lexicon in */
430
+ /* order to be recognized. */
431
431
  /* */
432
432
  /*******************************************************************/
433
433
 
434
- int Alphabet::next_mcsym( char* &string, int extended )
434
+ int Alphabet::next_mcsym( char* &string, bool insert )
435
435
 
436
436
  {
437
437
  char *start=string;
@@ -446,7 +446,7 @@ int Alphabet::next_mcsym( char* &string, int extended )
446
446
  *end = 0;
447
447
 
448
448
  int c;
449
- if (extended <= 2)
449
+ if (insert)
450
450
  c = add_symbol( start );
451
451
  else
452
452
  c = symbol2code(start);
@@ -473,13 +473,13 @@ int Alphabet::next_mcsym( char* &string, int extended )
473
473
  /* */
474
474
  /*******************************************************************/
475
475
 
476
- int Alphabet::next_code( char* &string, int extended )
476
+ int Alphabet::next_code( char* &string, bool extended, bool insert )
477
477
 
478
478
  {
479
479
  if (*string == 0)
480
480
  return EOF; // finished
481
481
 
482
- int c = next_mcsym(string, extended);
482
+ int c = next_mcsym(string, insert);
483
483
  if (c != EOF)
484
484
  return c;
485
485
 
@@ -506,7 +506,7 @@ int Alphabet::next_code( char* &string, int extended )
506
506
  /* */
507
507
  /*******************************************************************/
508
508
 
509
- Label Alphabet::next_label( char* &string, int extended )
509
+ Label Alphabet::next_label( char* &string, bool extended )
510
510
 
511
511
  {
512
512
  // read first character
@@ -517,7 +517,7 @@ Label Alphabet::next_label( char* &string, int extended )
517
517
  Character lc=(Character)c;
518
518
  if (!extended || *string != ':') { // single character?
519
519
  if (lc == Label::epsilon)
520
- return next_label(string); // ignore epsilon
520
+ return next_label(string, extended); // ignore epsilon
521
521
  return Label(lc);
522
522
  }
523
523
 
@@ -532,7 +532,7 @@ Label Alphabet::next_label( char* &string, int extended )
532
532
 
533
533
  Label l(lc, (Character)c);
534
534
  if (l.is_epsilon())
535
- return next_label(string); // ignore epsilon transitions
535
+ return next_label(string, extended); // ignore epsilon transitions
536
536
  return l;
537
537
  }
538
538
 
@@ -782,8 +782,12 @@ char *Alphabet::print_analysis( Analysis &ana, bool both_layers )
782
782
  const char *s;
783
783
 
784
784
  // either print the analysis symbol or the whole label
785
- if (both_layers)
785
+ if (both_layers) {
786
786
  s = write_label(l);
787
+ // quote colons
788
+ if (strcmp(s,":") == 0)
789
+ ch.push_back('\\');
790
+ }
787
791
  else if (l.lower_char() != Label::epsilon)
788
792
  s = write_char(l.lower_char());
789
793
  else
@@ -13,7 +13,6 @@
13
13
  #define _ALPHABET_H_
14
14
 
15
15
  #include <stdio.h>
16
- #include <string.h>
17
16
 
18
17
  #include "basic.h"
19
18
 
@@ -22,6 +21,10 @@
22
21
 
23
22
  #include <iostream>
24
23
 
24
+ #include <cstring>
25
+
26
+ #include "sgi.h"
27
+
25
28
  #ifndef CODE_DATA_TYPE
26
29
  typedef unsigned short Character; // data type of the symbol codes
27
30
  #else
@@ -32,18 +35,6 @@ typedef unsigned CODE_DATA_TYPE Character;
32
35
  // on the analysis level (lower) or the surface level (upper)
33
36
  typedef enum {upper, lower} Level;
34
37
 
35
- #ifdef SGIext
36
-
37
- #include <ext/hash_set>
38
- #include <ext/hash_map>
39
-
40
- #else
41
-
42
- #include <hash_set>
43
- #include <hash_map>
44
-
45
- #endif
46
-
47
38
  extern char EpsilonString[]; // holds the symbol representing the empty string
48
39
  // which is usually "<>"
49
40
 
@@ -157,10 +148,10 @@ class Alphabet {
157
148
  typedef std::set<Label, Label::label_cmp> LabelSet;
158
149
 
159
150
  // hash table used to map the symbols to their codes
160
- typedef __gnu_cxx::hash_map<const char*, Character, __gnu_cxx::hash<const char*>,eqstr> SymbolMap;
151
+ typedef hash_map<const char*, Character, hash<const char*>,eqstr> SymbolMap;
161
152
 
162
153
  // hash table used to map the codes back to the symbols
163
- typedef __gnu_cxx::hash_map<Character, char*> CharMap;
154
+ typedef hash_map<Character, char*> CharMap;
164
155
 
165
156
  private:
166
157
  SymbolMap sm; // maps symbols to codes
@@ -249,17 +240,17 @@ class Alphabet {
249
240
  const char *write_label( Label l, bool with_brackets=true ) const;
250
241
 
251
242
  // scan the next multi-character symbol in the argument string
252
- int next_mcsym( char*&, int extended=1 );
243
+ int next_mcsym( char*&, bool insert=true );
253
244
 
254
245
  // scan the next symbol in the argument string
255
- int next_code( char*&, int extended=1 );
246
+ int next_code( char*&, bool extended=true, bool insert=true );
256
247
 
257
248
  // convert a character string into a symbol or label sequence
258
249
  void string2symseq( char*, std::vector<Character>& );
259
250
  void string2labelseq( char*, std::vector<Label>& );
260
251
 
261
252
  // scan the next label in the argument string
262
- Label next_label( char*&, int extended=1 );
253
+ Label next_label( char*&, bool extended=true );
263
254
 
264
255
  // store the alphabet in the argument file (in binary form)
265
256
  void store( FILE* ) const;
@@ -276,7 +267,7 @@ class Alphabet {
276
267
  };
277
268
 
278
269
  // write the alphabet to the output stream (in readable form)
279
- std::ostream &operator<<(std::ostream&, Alphabet&);
270
+ std::ostream &operator<<(std::ostream&, const Alphabet&);
280
271
 
281
272
 
282
273
  #endif
@@ -383,7 +383,7 @@ void CompactTransducer::longest_match2(unsigned int n, char *string, int l,
383
383
 
384
384
  // follow the non-epsilon transitions
385
385
  char *end=string;
386
- int c=alphabet.next_code(end, false);
386
+ int c=alphabet.next_code(end, false, false);
387
387
  l += end-string;
388
388
  if (c != EOF) {
389
389
  // find the set of arcs with matching upper character in the sort list
@@ -430,7 +430,7 @@ const char *CompactTransducer::longest_match( char* &string )
430
430
 
431
431
  // no match? return the next character
432
432
  if (ba.size() == 0) {
433
- int c=alphabet.next_code(string, false);
433
+ int c=alphabet.next_code(string, false, false);
434
434
  return alphabet.code2symbol(c);
435
435
  }
436
436
 
@@ -14,7 +14,6 @@
14
14
  using std::vector;
15
15
  using std::pair;
16
16
  using std::set;
17
- using __gnu_cxx::hash_map;
18
17
 
19
18
  /***************** class NodeSet *********************************/
20
19
 
@@ -1,4 +1,4 @@
1
1
  require 'mkmf'
2
2
  CONFIG['CC'] = 'g++'
3
- $CFLAGS='-Wall -O3 -Wall -Wcast-qual -Wconversion -DSGIext -DREADLINE'
3
+ $CFLAGS='-Wall -O3 -Wall -Wcast-qual -Wconversion -DSGI__gnu_cxx -DREADLINE'
4
4
  create_makefile "sfst_machine"
@@ -72,7 +72,7 @@
72
72
  PRINT = 261,
73
73
  POS = 262,
74
74
  INSERT = 263,
75
- REV = 264,
75
+ SWITCH = 264,
76
76
  ARROW = 265,
77
77
  REPLACE = 266,
78
78
  SYMBOL = 267,
@@ -94,7 +94,7 @@
94
94
  #define PRINT 261
95
95
  #define POS 262
96
96
  #define INSERT 263
97
- #define REV 264
97
+ #define SWITCH 264
98
98
  #define ARROW 265
99
99
  #define REPLACE 266
100
100
  #define SYMBOL 267
@@ -532,7 +532,7 @@ static const yytype_uint8 yyrline[] =
532
532
  static const char *const yytname[] =
533
533
  {
534
534
  "$end", "error", "$undefined", "NEWLINE", "ALPHA", "COMPOSE", "PRINT",
535
- "POS", "INSERT", "REV", "ARROW", "REPLACE", "SYMBOL", "VAR", "SVAR",
535
+ "POS", "INSERT", "SWITCH", "ARROW", "REPLACE", "SYMBOL", "VAR", "SVAR",
536
536
  "RVAR", "RSVAR", "STRING", "STRING2", "UTF8CHAR", "CHARACTER", "'|'",
537
537
  "'-'", "'&'", "SEQ", "'!'", "'^'", "'_'", "'*'", "'+'", "'='", "'?'",
538
538
  "'('", "')'", "'{'", "'}'", "':'", "'['", "']'", "'.'", "','", "$accept",
@@ -2020,7 +2020,7 @@ yyreduce:
2020
2020
 
2021
2021
  case 71:
2022
2022
  #line 165 "fst-compiler.yy"
2023
- { (yyval.longchar)=utf8toint((yyvsp[(1) - (1)].value)); ;}
2023
+ { (yyval.longchar)=utf8toint((yyvsp[(1) - (1)].value)); free((yyvsp[(1) - (1)].value)); ;}
2024
2024
  break;
2025
2025
 
2026
2026
  case 72:
@@ -2358,6 +2358,8 @@ yyreturn:
2358
2358
 
2359
2359
 
2360
2360
  extern FILE *yyin;
2361
+ static int Compact=0;
2362
+ static int LowMem=0;
2361
2363
 
2362
2364
  /*******************************************************************/
2363
2365
  /* */
@@ -2373,3 +2375,124 @@ void yyerror(char *text)
2373
2375
  exit(1);
2374
2376
  }
2375
2377
 
2378
+
2379
+ /*******************************************************************/
2380
+ /* */
2381
+ /* warn */
2382
+ /* */
2383
+ /*******************************************************************/
2384
+
2385
+ void warn(char *text)
2386
+
2387
+ {
2388
+ cerr << "\n" << FileName << ":" << yylineno << ": warning: " << text << "!\n";
2389
+ }
2390
+
2391
+
2392
+ /*******************************************************************/
2393
+ /* */
2394
+ /* warn2 */
2395
+ /* */
2396
+ /*******************************************************************/
2397
+
2398
+ void warn2(char *text, char *text2)
2399
+
2400
+ {
2401
+ cerr << "\n" << FileName << ":" << yylineno << ": warning: " << text << ": ";
2402
+ cerr << text2 << "\n";
2403
+ }
2404
+
2405
+
2406
+ /*******************************************************************/
2407
+ /* */
2408
+ /* get_flags */
2409
+ /* */
2410
+ /*******************************************************************/
2411
+
2412
+ void get_flags( int *argc, char **argv )
2413
+
2414
+ {
2415
+ for( int i=1; i<*argc; i++ ) {
2416
+ if (strcmp(argv[i],"-c") == 0) {
2417
+ Compact = 1;
2418
+ argv[i] = NULL;
2419
+ }
2420
+ else if (strcmp(argv[i],"-l") == 0) {
2421
+ LowMem = 1;
2422
+ argv[i] = NULL;
2423
+ }
2424
+ else if (strcmp(argv[i],"-q") == 0) {
2425
+ Verbose = 0;
2426
+ argv[i] = NULL;
2427
+ }
2428
+ else if (strcmp(argv[i],"-s") == 0) {
2429
+ Switch = 1;
2430
+ argv[i] = NULL;
2431
+ }
2432
+ }
2433
+ // remove flags from the argument list
2434
+ int k;
2435
+ for( int i=k=1; i<*argc; i++)
2436
+ if (argv[i] != NULL)
2437
+ argv[k++] = argv[i];
2438
+ *argc = k;
2439
+ }
2440
+
2441
+
2442
+ /*******************************************************************/
2443
+ /* */
2444
+ /* main */
2445
+ /* */
2446
+ /*******************************************************************/
2447
+
2448
+ int main( int argc, char *argv[] )
2449
+
2450
+ {
2451
+ FILE *file;
2452
+
2453
+ get_flags(&argc, argv);
2454
+ if (argc < 3) {
2455
+ fprintf(stderr,"\nUsage: %s [options] infile outfile\n", argv[0]);
2456
+ fprintf(stderr,"\nOPTIONS:\n");
2457
+ fprintf(stderr,"-c\tStore the transducer in fst-infl2 format.\n");
2458
+ fprintf(stderr,"-l\tStore the transducer in fst-infl3 format.\n");
2459
+ fprintf(stderr,"-s\tSwitch the upper and lower levels producing a transducer for generation rather than recognition.\n");
2460
+ fprintf(stderr,"-q\tquiet mode\n\n");
2461
+ exit(1);
2462
+ }
2463
+ if ((file = fopen(argv[1],"rt")) == NULL) {
2464
+ fprintf(stderr,"\nError: Cannot open grammar file \"%s\"\n\n", argv[1]);
2465
+ exit(1);
2466
+ }
2467
+ FileName = argv[1];
2468
+ Result = NULL;
2469
+ TheAlphabet.utf8 = UTF8;
2470
+ yyin = file;
2471
+ try {
2472
+ yyparse();
2473
+ Result->alphabet.utf8 = UTF8;
2474
+ if (Verbose)
2475
+ cerr << "\n";
2476
+ if (Result->is_empty())
2477
+ warn("resulting transducer is empty");
2478
+ if ((file = fopen(argv[2],"wb")) == NULL) {
2479
+ fprintf(stderr,"\nError: Cannot open output file %s\n\n", argv[2]);
2480
+ exit(1);
2481
+ }
2482
+ if (Compact) {
2483
+ MakeCompactTransducer ca(*Result);
2484
+ delete Result;
2485
+ ca.store(file);
2486
+ }
2487
+ else if (LowMem)
2488
+ Result->store_lowmem(file);
2489
+ else
2490
+ Result->store(file);
2491
+ fclose(file);
2492
+ }
2493
+ catch(const char* p) {
2494
+ cerr << "\n" << p << "\n\n";
2495
+ exit(1);
2496
+ }
2497
+ }
2498
+
@@ -45,7 +45,7 @@
45
45
  PRINT = 261,
46
46
  POS = 262,
47
47
  INSERT = 263,
48
- REV = 264,
48
+ SWITCH = 264,
49
49
  ARROW = 265,
50
50
  REPLACE = 266,
51
51
  SYMBOL = 267,
@@ -67,7 +67,7 @@
67
67
  #define PRINT 261
68
68
  #define POS 262
69
69
  #define INSERT 263
70
- #define REV 264
70
+ #define SWITCH 264
71
71
  #define ARROW 265
72
72
  #define REPLACE 266
73
73
  #define SYMBOL 267
@@ -43,7 +43,7 @@ Transducer *Result;
43
43
  Contexts *contexts;
44
44
  }
45
45
 
46
- %token <number> NEWLINE ALPHA COMPOSE PRINT POS INSERT REV
46
+ %token <number> NEWLINE ALPHA COMPOSE PRINT POS INSERT SWITCH
47
47
  %token <type> ARROW
48
48
  %token <rtype> REPLACE
49
49
  %token <name> SYMBOL VAR SVAR RVAR RSVAR
@@ -111,7 +111,7 @@ RE: RE ARROW CONTEXTS2 { $$ = restriction($1,$2,$3,0); }
111
111
  | RE '?' { $$ = optional($1); }
112
112
  | RE RE %prec SEQ { $$ = catenate($1, $2); }
113
113
  | '!' RE { $$ = negation($2); }
114
- | REV RE { $$ = switch_levels($2); }
114
+ | SWITCH RE { $$ = switch_levels($2); }
115
115
  | '^' RE { $$ = upper_level($2); }
116
116
  | '_' RE { $$ = lower_level($2); }
117
117
  | RE '&' RE { $$ = conjunction($1, $3); }
@@ -162,7 +162,7 @@ VALUE: LCHAR '-' LCHAR { $$=add_values($1,$3,NULL); }
162
162
  ;
163
163
 
164
164
  LCHAR: CHARACTER { $$=$1; }
165
- | UTF8CHAR { $$=utf8toint($1); }
165
+ | UTF8CHAR { $$=utf8toint($1); free($1); }
166
166
  | SCHAR { $$=$1; }
167
167
  ;
168
168
 
@@ -124,6 +124,27 @@ void Node::init()
124
124
  }
125
125
 
126
126
 
127
+ /*******************************************************************/
128
+ /* */
129
+ /* Node::clear_visited */
130
+ /* */
131
+ /*******************************************************************/
132
+
133
+ void Node::clear_visited( NodeHashSet &nodeset )
134
+
135
+ {
136
+ if (nodeset.find( this ) == nodeset.end()) {
137
+ visited = 0;
138
+ nodeset.insert( this );
139
+ fprintf(stderr," %lu", nodeset.size());
140
+ for( ArcsIter p(arcs()); p; p++ ) {
141
+ Arc *arc=p;
142
+ arc->target_node()->clear_visited( nodeset );
143
+ }
144
+ }
145
+ }
146
+
147
+
127
148
  /*******************************************************************/
128
149
  /* */
129
150
  /* NodeNumbering::number_node */
@@ -196,14 +217,16 @@ Arc *Transducer::new_arc( Label l, Node *target )
196
217
  /* */
197
218
  /*******************************************************************/
198
219
 
199
- void Transducer::add_string( char *s, bool extended )
220
+ void Transducer::add_string( char *s, bool extended, Alphabet *a )
200
221
 
201
222
  {
223
+ if (a == NULL)
224
+ a = &alphabet;
225
+
202
226
  Node *node=root_node();
203
-
204
227
  Label l;
205
- while (!(l = alphabet.next_label(s, extended)).is_epsilon()) {
206
- alphabet.insert(l);
228
+ while (!(l = a->next_label(s, extended)).is_epsilon()) {
229
+ a->insert(l);
207
230
  Arcs *arcs=node->arcs();
208
231
  node = arcs->target_node( l );
209
232
  if (node == NULL) {
@@ -271,7 +294,7 @@ Transducer::Transducer( istream &is, const Alphabet *a, bool verbose )
271
294
  break;
272
295
  buffer[l+1] = 0;
273
296
 
274
- add_string(buffer,extended);
297
+ add_string(buffer, extended);
275
298
  }
276
299
  if (verbose && n >= 10000)
277
300
  cerr << "\n";
@@ -515,7 +538,13 @@ int Transducer::print_strings( FILE *file, bool with_brackets )
515
538
  bool Transducer::analyze_string( char *string, FILE *file, bool with_brackets )
516
539
 
517
540
  {
518
- Transducer a1(string, &alphabet, false);
541
+ vector<Character> input;
542
+ alphabet.string2symseq( string, input );
543
+ vector<Label> labels;
544
+ for( size_t i=0; i<input.size(); i++ )
545
+ labels.push_back(Label(input[i]));
546
+
547
+ Transducer a1(labels);
519
548
  Transducer *a2=&(*this || a1);
520
549
  Transducer *a3=&(a2->lower_level());
521
550
  delete a2;
@@ -598,14 +627,15 @@ static void print_node( ostream &s, Node *node, NodeNumbering &index,
598
627
  {
599
628
  if (!node->was_visited( vmark )) {
600
629
  Arcs *arcs=node->arcs();
601
- if (node->is_final())
602
- s << "final\t" << index[node] << "\n";
603
630
  for( ArcsIter p(arcs); p; p++ ) {
604
631
  Arc *arc=p;
605
- s << index[node] << "\t";
606
- s << abc.write_label(arc->label()) << "\t";
607
- s << index[arc->target_node()] << "\n";
632
+ s << index[node] << "\t" << index[arc->target_node()];
633
+ s << "\t" << abc.write_char(arc->label().lower_char());
634
+ s << "\t" << abc.write_char(arc->label().upper_char());
635
+ s << "\n";
608
636
  }
637
+ if (node->is_final())
638
+ s << index[node] << "\n";
609
639
  for( ArcsIter p(arcs); p; p++ ) {
610
640
  Arc *arc=p;
611
641
  print_node( s, arc->target_node(), index, vmark, abc );
@@ -928,18 +958,22 @@ void Transducer::read_transducer_text( FILE *file )
928
958
  for( size_t line=0; fgets(buffer, 10000, file ); line++ ) {
929
959
  char *p = buffer;
930
960
  char *s = next_string(p, line);
931
- if (strcmp(s, "final") == 0) {
932
- s = next_string(p, line);
933
- create_node( nodes, s, line )->set_final(true);
934
- }
961
+ Node *node = create_node( nodes, s, line );
962
+ if (p == NULL)
963
+ node->set_final(true);
935
964
  else {
936
- Node *node = create_node( nodes, s, line );
937
- s = next_string(p, line);
938
- Label l = alphabet.next_label( s, 2 );
939
- if (*s != 0 || l == Label::epsilon)
940
- error_message( line );
941
965
  s = next_string(p, line);
942
966
  Node *target = create_node( nodes, s, line );
967
+
968
+ s = next_string(p, line);
969
+ Character lc = alphabet.add_symbol(s);
970
+ s = next_string(p, line);
971
+ Character uc = alphabet.add_symbol(s);
972
+ Label l(lc,uc);
973
+ if (l == Label::epsilon)
974
+ error_message( line );
975
+
976
+ alphabet.insert(l);
943
977
  node->add_arc( l, target, this );
944
978
  }
945
979
  }
@@ -25,7 +25,7 @@
25
25
 
26
26
  #include "mem.h"
27
27
 
28
- typedef unsigned long VType;
28
+ typedef unsigned short VType;
29
29
 
30
30
  extern int Quiet;
31
31
 
@@ -41,8 +41,7 @@ struct hashf {
41
41
  struct equalf {
42
42
  int operator()(const Node *n1, const Node *n2) const { return n1==n2; }
43
43
  };
44
- typedef __gnu_cxx::hash_set<Node*, hashf, equalf> NodeHashSet;
45
-
44
+ typedef hash_set<Node*, hashf, equalf> NodeHashSet;
46
45
 
47
46
 
48
47
  /***************** class Arc *************************************/
@@ -154,6 +153,7 @@ class Node {
154
153
  Arcs *arcs( void ) { return &arcsp; };
155
154
  const Arcs *arcs( void ) const { return &arcsp; };
156
155
  Node *forward( void ) { return forwardp; };
156
+ void clear_visited( NodeHashSet &nodeset );
157
157
  bool was_visited( VType vmark ) {
158
158
  if (visited == vmark)
159
159
  return true;
@@ -179,7 +179,7 @@ class Node2Int {
179
179
  return (n1 == n2);
180
180
  }
181
181
  };
182
- typedef __gnu_cxx::hash_map<Node*, int, hashf, equalf> NL;
182
+ typedef hash_map<Node*, int, hashf, equalf> NL;
183
183
 
184
184
  private:
185
185
  int current_number;
@@ -231,7 +231,7 @@ class PairMapping {
231
231
  return (p1.first==p2.first && p1.second == p2.second);
232
232
  }
233
233
  };
234
- typedef __gnu_cxx::hash_map<NodePair, Node*, hashf, equalf> PairMap;
234
+ typedef hash_map<NodePair, Node*, hashf, equalf> PairMap;
235
235
  PairMap pm;
236
236
 
237
237
  public:
@@ -256,8 +256,16 @@ class Transducer {
256
256
  Mem mem;
257
257
 
258
258
  typedef std::set<Label, Label::label_cmp> LabelSet;
259
- typedef __gnu_cxx::hash_map<Character, char*> SymbolMap;
259
+ typedef hash_map<Character, char*> SymbolMap;
260
260
 
261
+ void incr_vmark( void ) {
262
+ if (++vmark == 0) {
263
+ NodeHashSet nodes;
264
+ root.clear_visited( nodes );
265
+ fprintf(stderr,"clearing flags\n");
266
+ vmark = 1;
267
+ }
268
+ };
261
269
  void reverse_node( Node *old_node, Transducer *new_node );
262
270
  Label recode_label( Label, bool lswitch, bool recode, Alphabet& );
263
271
  Node *copy_nodes( Node *n, Transducer *a,
@@ -287,10 +295,6 @@ class Transducer {
287
295
 
288
296
  public:
289
297
  VType vmark;
290
- void incr_vmark( void ) {
291
- if (++vmark == 0)
292
- throw "Overflow of generation counter!";
293
- };
294
298
  Alphabet alphabet; // The set of all labels, i.e. character pairs
295
299
 
296
300
  Transducer( void ) : root(), mem()
@@ -308,7 +312,7 @@ class Transducer {
308
312
  const Node *root_node( void ) const { return &root; }; // returns the root node
309
313
  Node *new_node( void ); // memory alocation for a new node
310
314
  Arc *new_arc( Label l, Node *target ); // memory alocation for a new arc
311
- void add_string( char *s, bool extended=false );
315
+ void add_string( char *s, bool extended=false, Alphabet *a=NULL );
312
316
  void complete_alphabet( void );
313
317
  void minimise_alphabet( void );
314
318
  void prune( void ); // remove unnecessary arcs