ruby-sfst 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG CHANGED
@@ -1,2 +1,3 @@
1
+ v0.2.0. update to SFST 1.3
1
2
  v0.1.1. overflow and g++ 4.3 fixes
2
3
  v0.1.0. initial release
data/Manifest CHANGED
@@ -11,6 +11,7 @@ ext/sfst_machine/utf8-scanner.ll
11
11
  ext/sfst_machine/determinise.C
12
12
  ext/sfst_machine/interface.C
13
13
  ext/sfst_machine/compact.h
14
+ ext/sfst_machine/sgi.h
14
15
  ext/sfst_machine/basic.h
15
16
  ext/sfst_machine/fst.h
16
17
  ext/sfst_machine/make-compact.h
data/README.rdoc CHANGED
@@ -2,6 +2,8 @@
2
2
 
3
3
  A wrapper for the Stuttgart Finite State Transducer Tools (SFST).
4
4
 
5
+ The wrapper is based on SFST 1.3.
6
+
5
7
  == Installation
6
8
 
7
9
  gem install ruby-sfst
data/Rakefile CHANGED
@@ -4,7 +4,7 @@ require 'rake'
4
4
  begin
5
5
  require 'echoe'
6
6
 
7
- Echoe.new('ruby-sfst', '0.1.1') do |p|
7
+ Echoe.new('ruby-sfst', '0.2.0') do |p|
8
8
  p.summary = "A wrapper for the Stuttgart Finite State Transducer Tools (SFST)."
9
9
  p.author = 'Marius L. Jøhndal'
10
10
  p.email = "mariuslj (at) ifi [dot] uio (dot) no"
@@ -10,14 +10,14 @@
10
10
  /* */
11
11
  /*******************************************************************/
12
12
 
13
- #include <limits.h>
13
+ #include <climits>
14
+ #include <cstring>
15
+
14
16
  #include "utf8.h"
15
17
  #include "alphabet.h"
16
18
 
17
19
  using std::vector;
18
20
  using std::ostream;
19
- using __gnu_cxx::hash_map;
20
- using __gnu_cxx::hash_set;
21
21
 
22
22
  const int BUFFER_SIZE=100000;
23
23
 
@@ -425,13 +425,13 @@ ostream &operator<<( ostream &s, const Alphabet &a )
425
425
  /* Alphabet::next_mcsym */
426
426
  /* */
427
427
  /* recognizes multi-character symbols which are enclosed with */
428
- /* angle brackets <...>. If the value of the argument flag is */
429
- /* smaller than 2, the multi-character symbol must be already in */
430
- /* the lexicon in order to be recognized. */
428
+ /* angle brackets <...>. If the argument flag insert is true, */
429
+ /* the multi-character symbol must be already in the lexicon in */
430
+ /* order to be recognized. */
431
431
  /* */
432
432
  /*******************************************************************/
433
433
 
434
- int Alphabet::next_mcsym( char* &string, int extended )
434
+ int Alphabet::next_mcsym( char* &string, bool insert )
435
435
 
436
436
  {
437
437
  char *start=string;
@@ -446,7 +446,7 @@ int Alphabet::next_mcsym( char* &string, int extended )
446
446
  *end = 0;
447
447
 
448
448
  int c;
449
- if (extended <= 2)
449
+ if (insert)
450
450
  c = add_symbol( start );
451
451
  else
452
452
  c = symbol2code(start);
@@ -473,13 +473,13 @@ int Alphabet::next_mcsym( char* &string, int extended )
473
473
  /* */
474
474
  /*******************************************************************/
475
475
 
476
- int Alphabet::next_code( char* &string, int extended )
476
+ int Alphabet::next_code( char* &string, bool extended, bool insert )
477
477
 
478
478
  {
479
479
  if (*string == 0)
480
480
  return EOF; // finished
481
481
 
482
- int c = next_mcsym(string, extended);
482
+ int c = next_mcsym(string, insert);
483
483
  if (c != EOF)
484
484
  return c;
485
485
 
@@ -506,7 +506,7 @@ int Alphabet::next_code( char* &string, int extended )
506
506
  /* */
507
507
  /*******************************************************************/
508
508
 
509
- Label Alphabet::next_label( char* &string, int extended )
509
+ Label Alphabet::next_label( char* &string, bool extended )
510
510
 
511
511
  {
512
512
  // read first character
@@ -517,7 +517,7 @@ Label Alphabet::next_label( char* &string, int extended )
517
517
  Character lc=(Character)c;
518
518
  if (!extended || *string != ':') { // single character?
519
519
  if (lc == Label::epsilon)
520
- return next_label(string); // ignore epsilon
520
+ return next_label(string, extended); // ignore epsilon
521
521
  return Label(lc);
522
522
  }
523
523
 
@@ -532,7 +532,7 @@ Label Alphabet::next_label( char* &string, int extended )
532
532
 
533
533
  Label l(lc, (Character)c);
534
534
  if (l.is_epsilon())
535
- return next_label(string); // ignore epsilon transitions
535
+ return next_label(string, extended); // ignore epsilon transitions
536
536
  return l;
537
537
  }
538
538
 
@@ -782,8 +782,12 @@ char *Alphabet::print_analysis( Analysis &ana, bool both_layers )
782
782
  const char *s;
783
783
 
784
784
  // either print the analysis symbol or the whole label
785
- if (both_layers)
785
+ if (both_layers) {
786
786
  s = write_label(l);
787
+ // quote colons
788
+ if (strcmp(s,":") == 0)
789
+ ch.push_back('\\');
790
+ }
787
791
  else if (l.lower_char() != Label::epsilon)
788
792
  s = write_char(l.lower_char());
789
793
  else
@@ -13,7 +13,6 @@
13
13
  #define _ALPHABET_H_
14
14
 
15
15
  #include <stdio.h>
16
- #include <string.h>
17
16
 
18
17
  #include "basic.h"
19
18
 
@@ -22,6 +21,10 @@
22
21
 
23
22
  #include <iostream>
24
23
 
24
+ #include <cstring>
25
+
26
+ #include "sgi.h"
27
+
25
28
  #ifndef CODE_DATA_TYPE
26
29
  typedef unsigned short Character; // data type of the symbol codes
27
30
  #else
@@ -32,18 +35,6 @@ typedef unsigned CODE_DATA_TYPE Character;
32
35
  // on the analysis level (lower) or the surface level (upper)
33
36
  typedef enum {upper, lower} Level;
34
37
 
35
- #ifdef SGIext
36
-
37
- #include <ext/hash_set>
38
- #include <ext/hash_map>
39
-
40
- #else
41
-
42
- #include <hash_set>
43
- #include <hash_map>
44
-
45
- #endif
46
-
47
38
  extern char EpsilonString[]; // holds the symbol representing the empty string
48
39
  // which is usually "<>"
49
40
 
@@ -157,10 +148,10 @@ class Alphabet {
157
148
  typedef std::set<Label, Label::label_cmp> LabelSet;
158
149
 
159
150
  // hash table used to map the symbols to their codes
160
- typedef __gnu_cxx::hash_map<const char*, Character, __gnu_cxx::hash<const char*>,eqstr> SymbolMap;
151
+ typedef hash_map<const char*, Character, hash<const char*>,eqstr> SymbolMap;
161
152
 
162
153
  // hash table used to map the codes back to the symbols
163
- typedef __gnu_cxx::hash_map<Character, char*> CharMap;
154
+ typedef hash_map<Character, char*> CharMap;
164
155
 
165
156
  private:
166
157
  SymbolMap sm; // maps symbols to codes
@@ -249,17 +240,17 @@ class Alphabet {
249
240
  const char *write_label( Label l, bool with_brackets=true ) const;
250
241
 
251
242
  // scan the next multi-character symbol in the argument string
252
- int next_mcsym( char*&, int extended=1 );
243
+ int next_mcsym( char*&, bool insert=true );
253
244
 
254
245
  // scan the next symbol in the argument string
255
- int next_code( char*&, int extended=1 );
246
+ int next_code( char*&, bool extended=true, bool insert=true );
256
247
 
257
248
  // convert a character string into a symbol or label sequence
258
249
  void string2symseq( char*, std::vector<Character>& );
259
250
  void string2labelseq( char*, std::vector<Label>& );
260
251
 
261
252
  // scan the next label in the argument string
262
- Label next_label( char*&, int extended=1 );
253
+ Label next_label( char*&, bool extended=true );
263
254
 
264
255
  // store the alphabet in the argument file (in binary form)
265
256
  void store( FILE* ) const;
@@ -276,7 +267,7 @@ class Alphabet {
276
267
  };
277
268
 
278
269
  // write the alphabet to the output stream (in readable form)
279
- std::ostream &operator<<(std::ostream&, Alphabet&);
270
+ std::ostream &operator<<(std::ostream&, const Alphabet&);
280
271
 
281
272
 
282
273
  #endif
@@ -383,7 +383,7 @@ void CompactTransducer::longest_match2(unsigned int n, char *string, int l,
383
383
 
384
384
  // follow the non-epsilon transitions
385
385
  char *end=string;
386
- int c=alphabet.next_code(end, false);
386
+ int c=alphabet.next_code(end, false, false);
387
387
  l += end-string;
388
388
  if (c != EOF) {
389
389
  // find the set of arcs with matching upper character in the sort list
@@ -430,7 +430,7 @@ const char *CompactTransducer::longest_match( char* &string )
430
430
 
431
431
  // no match? return the next character
432
432
  if (ba.size() == 0) {
433
- int c=alphabet.next_code(string, false);
433
+ int c=alphabet.next_code(string, false, false);
434
434
  return alphabet.code2symbol(c);
435
435
  }
436
436
 
@@ -14,7 +14,6 @@
14
14
  using std::vector;
15
15
  using std::pair;
16
16
  using std::set;
17
- using __gnu_cxx::hash_map;
18
17
 
19
18
  /***************** class NodeSet *********************************/
20
19
 
@@ -1,4 +1,4 @@
1
1
  require 'mkmf'
2
2
  CONFIG['CC'] = 'g++'
3
- $CFLAGS='-Wall -O3 -Wall -Wcast-qual -Wconversion -DSGIext -DREADLINE'
3
+ $CFLAGS='-Wall -O3 -Wall -Wcast-qual -Wconversion -DSGI__gnu_cxx -DREADLINE'
4
4
  create_makefile "sfst_machine"
@@ -72,7 +72,7 @@
72
72
  PRINT = 261,
73
73
  POS = 262,
74
74
  INSERT = 263,
75
- REV = 264,
75
+ SWITCH = 264,
76
76
  ARROW = 265,
77
77
  REPLACE = 266,
78
78
  SYMBOL = 267,
@@ -94,7 +94,7 @@
94
94
  #define PRINT 261
95
95
  #define POS 262
96
96
  #define INSERT 263
97
- #define REV 264
97
+ #define SWITCH 264
98
98
  #define ARROW 265
99
99
  #define REPLACE 266
100
100
  #define SYMBOL 267
@@ -532,7 +532,7 @@ static const yytype_uint8 yyrline[] =
532
532
  static const char *const yytname[] =
533
533
  {
534
534
  "$end", "error", "$undefined", "NEWLINE", "ALPHA", "COMPOSE", "PRINT",
535
- "POS", "INSERT", "REV", "ARROW", "REPLACE", "SYMBOL", "VAR", "SVAR",
535
+ "POS", "INSERT", "SWITCH", "ARROW", "REPLACE", "SYMBOL", "VAR", "SVAR",
536
536
  "RVAR", "RSVAR", "STRING", "STRING2", "UTF8CHAR", "CHARACTER", "'|'",
537
537
  "'-'", "'&'", "SEQ", "'!'", "'^'", "'_'", "'*'", "'+'", "'='", "'?'",
538
538
  "'('", "')'", "'{'", "'}'", "':'", "'['", "']'", "'.'", "','", "$accept",
@@ -2020,7 +2020,7 @@ yyreduce:
2020
2020
 
2021
2021
  case 71:
2022
2022
  #line 165 "fst-compiler.yy"
2023
- { (yyval.longchar)=utf8toint((yyvsp[(1) - (1)].value)); ;}
2023
+ { (yyval.longchar)=utf8toint((yyvsp[(1) - (1)].value)); free((yyvsp[(1) - (1)].value)); ;}
2024
2024
  break;
2025
2025
 
2026
2026
  case 72:
@@ -2358,6 +2358,8 @@ yyreturn:
2358
2358
 
2359
2359
 
2360
2360
  extern FILE *yyin;
2361
+ static int Compact=0;
2362
+ static int LowMem=0;
2361
2363
 
2362
2364
  /*******************************************************************/
2363
2365
  /* */
@@ -2373,3 +2375,124 @@ void yyerror(char *text)
2373
2375
  exit(1);
2374
2376
  }
2375
2377
 
2378
+
2379
+ /*******************************************************************/
2380
+ /* */
2381
+ /* warn */
2382
+ /* */
2383
+ /*******************************************************************/
2384
+
2385
+ void warn(char *text)
2386
+
2387
+ {
2388
+ cerr << "\n" << FileName << ":" << yylineno << ": warning: " << text << "!\n";
2389
+ }
2390
+
2391
+
2392
+ /*******************************************************************/
2393
+ /* */
2394
+ /* warn2 */
2395
+ /* */
2396
+ /*******************************************************************/
2397
+
2398
+ void warn2(char *text, char *text2)
2399
+
2400
+ {
2401
+ cerr << "\n" << FileName << ":" << yylineno << ": warning: " << text << ": ";
2402
+ cerr << text2 << "\n";
2403
+ }
2404
+
2405
+
2406
+ /*******************************************************************/
2407
+ /* */
2408
+ /* get_flags */
2409
+ /* */
2410
+ /*******************************************************************/
2411
+
2412
+ void get_flags( int *argc, char **argv )
2413
+
2414
+ {
2415
+ for( int i=1; i<*argc; i++ ) {
2416
+ if (strcmp(argv[i],"-c") == 0) {
2417
+ Compact = 1;
2418
+ argv[i] = NULL;
2419
+ }
2420
+ else if (strcmp(argv[i],"-l") == 0) {
2421
+ LowMem = 1;
2422
+ argv[i] = NULL;
2423
+ }
2424
+ else if (strcmp(argv[i],"-q") == 0) {
2425
+ Verbose = 0;
2426
+ argv[i] = NULL;
2427
+ }
2428
+ else if (strcmp(argv[i],"-s") == 0) {
2429
+ Switch = 1;
2430
+ argv[i] = NULL;
2431
+ }
2432
+ }
2433
+ // remove flags from the argument list
2434
+ int k;
2435
+ for( int i=k=1; i<*argc; i++)
2436
+ if (argv[i] != NULL)
2437
+ argv[k++] = argv[i];
2438
+ *argc = k;
2439
+ }
2440
+
2441
+
2442
+ /*******************************************************************/
2443
+ /* */
2444
+ /* main */
2445
+ /* */
2446
+ /*******************************************************************/
2447
+
2448
+ int main( int argc, char *argv[] )
2449
+
2450
+ {
2451
+ FILE *file;
2452
+
2453
+ get_flags(&argc, argv);
2454
+ if (argc < 3) {
2455
+ fprintf(stderr,"\nUsage: %s [options] infile outfile\n", argv[0]);
2456
+ fprintf(stderr,"\nOPTIONS:\n");
2457
+ fprintf(stderr,"-c\tStore the transducer in fst-infl2 format.\n");
2458
+ fprintf(stderr,"-l\tStore the transducer in fst-infl3 format.\n");
2459
+ fprintf(stderr,"-s\tSwitch the upper and lower levels producing a transducer for generation rather than recognition.\n");
2460
+ fprintf(stderr,"-q\tquiet mode\n\n");
2461
+ exit(1);
2462
+ }
2463
+ if ((file = fopen(argv[1],"rt")) == NULL) {
2464
+ fprintf(stderr,"\nError: Cannot open grammar file \"%s\"\n\n", argv[1]);
2465
+ exit(1);
2466
+ }
2467
+ FileName = argv[1];
2468
+ Result = NULL;
2469
+ TheAlphabet.utf8 = UTF8;
2470
+ yyin = file;
2471
+ try {
2472
+ yyparse();
2473
+ Result->alphabet.utf8 = UTF8;
2474
+ if (Verbose)
2475
+ cerr << "\n";
2476
+ if (Result->is_empty())
2477
+ warn("resulting transducer is empty");
2478
+ if ((file = fopen(argv[2],"wb")) == NULL) {
2479
+ fprintf(stderr,"\nError: Cannot open output file %s\n\n", argv[2]);
2480
+ exit(1);
2481
+ }
2482
+ if (Compact) {
2483
+ MakeCompactTransducer ca(*Result);
2484
+ delete Result;
2485
+ ca.store(file);
2486
+ }
2487
+ else if (LowMem)
2488
+ Result->store_lowmem(file);
2489
+ else
2490
+ Result->store(file);
2491
+ fclose(file);
2492
+ }
2493
+ catch(const char* p) {
2494
+ cerr << "\n" << p << "\n\n";
2495
+ exit(1);
2496
+ }
2497
+ }
2498
+
@@ -45,7 +45,7 @@
45
45
  PRINT = 261,
46
46
  POS = 262,
47
47
  INSERT = 263,
48
- REV = 264,
48
+ SWITCH = 264,
49
49
  ARROW = 265,
50
50
  REPLACE = 266,
51
51
  SYMBOL = 267,
@@ -67,7 +67,7 @@
67
67
  #define PRINT 261
68
68
  #define POS 262
69
69
  #define INSERT 263
70
- #define REV 264
70
+ #define SWITCH 264
71
71
  #define ARROW 265
72
72
  #define REPLACE 266
73
73
  #define SYMBOL 267
@@ -43,7 +43,7 @@ Transducer *Result;
43
43
  Contexts *contexts;
44
44
  }
45
45
 
46
- %token <number> NEWLINE ALPHA COMPOSE PRINT POS INSERT REV
46
+ %token <number> NEWLINE ALPHA COMPOSE PRINT POS INSERT SWITCH
47
47
  %token <type> ARROW
48
48
  %token <rtype> REPLACE
49
49
  %token <name> SYMBOL VAR SVAR RVAR RSVAR
@@ -111,7 +111,7 @@ RE: RE ARROW CONTEXTS2 { $$ = restriction($1,$2,$3,0); }
111
111
  | RE '?' { $$ = optional($1); }
112
112
  | RE RE %prec SEQ { $$ = catenate($1, $2); }
113
113
  | '!' RE { $$ = negation($2); }
114
- | REV RE { $$ = switch_levels($2); }
114
+ | SWITCH RE { $$ = switch_levels($2); }
115
115
  | '^' RE { $$ = upper_level($2); }
116
116
  | '_' RE { $$ = lower_level($2); }
117
117
  | RE '&' RE { $$ = conjunction($1, $3); }
@@ -162,7 +162,7 @@ VALUE: LCHAR '-' LCHAR { $$=add_values($1,$3,NULL); }
162
162
  ;
163
163
 
164
164
  LCHAR: CHARACTER { $$=$1; }
165
- | UTF8CHAR { $$=utf8toint($1); }
165
+ | UTF8CHAR { $$=utf8toint($1); free($1); }
166
166
  | SCHAR { $$=$1; }
167
167
  ;
168
168
 
@@ -124,6 +124,27 @@ void Node::init()
124
124
  }
125
125
 
126
126
 
127
+ /*******************************************************************/
128
+ /* */
129
+ /* Node::clear_visited */
130
+ /* */
131
+ /*******************************************************************/
132
+
133
+ void Node::clear_visited( NodeHashSet &nodeset )
134
+
135
+ {
136
+ if (nodeset.find( this ) == nodeset.end()) {
137
+ visited = 0;
138
+ nodeset.insert( this );
139
+ fprintf(stderr," %lu", nodeset.size());
140
+ for( ArcsIter p(arcs()); p; p++ ) {
141
+ Arc *arc=p;
142
+ arc->target_node()->clear_visited( nodeset );
143
+ }
144
+ }
145
+ }
146
+
147
+
127
148
  /*******************************************************************/
128
149
  /* */
129
150
  /* NodeNumbering::number_node */
@@ -196,14 +217,16 @@ Arc *Transducer::new_arc( Label l, Node *target )
196
217
  /* */
197
218
  /*******************************************************************/
198
219
 
199
- void Transducer::add_string( char *s, bool extended )
220
+ void Transducer::add_string( char *s, bool extended, Alphabet *a )
200
221
 
201
222
  {
223
+ if (a == NULL)
224
+ a = &alphabet;
225
+
202
226
  Node *node=root_node();
203
-
204
227
  Label l;
205
- while (!(l = alphabet.next_label(s, extended)).is_epsilon()) {
206
- alphabet.insert(l);
228
+ while (!(l = a->next_label(s, extended)).is_epsilon()) {
229
+ a->insert(l);
207
230
  Arcs *arcs=node->arcs();
208
231
  node = arcs->target_node( l );
209
232
  if (node == NULL) {
@@ -271,7 +294,7 @@ Transducer::Transducer( istream &is, const Alphabet *a, bool verbose )
271
294
  break;
272
295
  buffer[l+1] = 0;
273
296
 
274
- add_string(buffer,extended);
297
+ add_string(buffer, extended);
275
298
  }
276
299
  if (verbose && n >= 10000)
277
300
  cerr << "\n";
@@ -515,7 +538,13 @@ int Transducer::print_strings( FILE *file, bool with_brackets )
515
538
  bool Transducer::analyze_string( char *string, FILE *file, bool with_brackets )
516
539
 
517
540
  {
518
- Transducer a1(string, &alphabet, false);
541
+ vector<Character> input;
542
+ alphabet.string2symseq( string, input );
543
+ vector<Label> labels;
544
+ for( size_t i=0; i<input.size(); i++ )
545
+ labels.push_back(Label(input[i]));
546
+
547
+ Transducer a1(labels);
519
548
  Transducer *a2=&(*this || a1);
520
549
  Transducer *a3=&(a2->lower_level());
521
550
  delete a2;
@@ -598,14 +627,15 @@ static void print_node( ostream &s, Node *node, NodeNumbering &index,
598
627
  {
599
628
  if (!node->was_visited( vmark )) {
600
629
  Arcs *arcs=node->arcs();
601
- if (node->is_final())
602
- s << "final\t" << index[node] << "\n";
603
630
  for( ArcsIter p(arcs); p; p++ ) {
604
631
  Arc *arc=p;
605
- s << index[node] << "\t";
606
- s << abc.write_label(arc->label()) << "\t";
607
- s << index[arc->target_node()] << "\n";
632
+ s << index[node] << "\t" << index[arc->target_node()];
633
+ s << "\t" << abc.write_char(arc->label().lower_char());
634
+ s << "\t" << abc.write_char(arc->label().upper_char());
635
+ s << "\n";
608
636
  }
637
+ if (node->is_final())
638
+ s << index[node] << "\n";
609
639
  for( ArcsIter p(arcs); p; p++ ) {
610
640
  Arc *arc=p;
611
641
  print_node( s, arc->target_node(), index, vmark, abc );
@@ -928,18 +958,22 @@ void Transducer::read_transducer_text( FILE *file )
928
958
  for( size_t line=0; fgets(buffer, 10000, file ); line++ ) {
929
959
  char *p = buffer;
930
960
  char *s = next_string(p, line);
931
- if (strcmp(s, "final") == 0) {
932
- s = next_string(p, line);
933
- create_node( nodes, s, line )->set_final(true);
934
- }
961
+ Node *node = create_node( nodes, s, line );
962
+ if (p == NULL)
963
+ node->set_final(true);
935
964
  else {
936
- Node *node = create_node( nodes, s, line );
937
- s = next_string(p, line);
938
- Label l = alphabet.next_label( s, 2 );
939
- if (*s != 0 || l == Label::epsilon)
940
- error_message( line );
941
965
  s = next_string(p, line);
942
966
  Node *target = create_node( nodes, s, line );
967
+
968
+ s = next_string(p, line);
969
+ Character lc = alphabet.add_symbol(s);
970
+ s = next_string(p, line);
971
+ Character uc = alphabet.add_symbol(s);
972
+ Label l(lc,uc);
973
+ if (l == Label::epsilon)
974
+ error_message( line );
975
+
976
+ alphabet.insert(l);
943
977
  node->add_arc( l, target, this );
944
978
  }
945
979
  }
@@ -25,7 +25,7 @@
25
25
 
26
26
  #include "mem.h"
27
27
 
28
- typedef unsigned long VType;
28
+ typedef unsigned short VType;
29
29
 
30
30
  extern int Quiet;
31
31
 
@@ -41,8 +41,7 @@ struct hashf {
41
41
  struct equalf {
42
42
  int operator()(const Node *n1, const Node *n2) const { return n1==n2; }
43
43
  };
44
- typedef __gnu_cxx::hash_set<Node*, hashf, equalf> NodeHashSet;
45
-
44
+ typedef hash_set<Node*, hashf, equalf> NodeHashSet;
46
45
 
47
46
 
48
47
  /***************** class Arc *************************************/
@@ -154,6 +153,7 @@ class Node {
154
153
  Arcs *arcs( void ) { return &arcsp; };
155
154
  const Arcs *arcs( void ) const { return &arcsp; };
156
155
  Node *forward( void ) { return forwardp; };
156
+ void clear_visited( NodeHashSet &nodeset );
157
157
  bool was_visited( VType vmark ) {
158
158
  if (visited == vmark)
159
159
  return true;
@@ -179,7 +179,7 @@ class Node2Int {
179
179
  return (n1 == n2);
180
180
  }
181
181
  };
182
- typedef __gnu_cxx::hash_map<Node*, int, hashf, equalf> NL;
182
+ typedef hash_map<Node*, int, hashf, equalf> NL;
183
183
 
184
184
  private:
185
185
  int current_number;
@@ -231,7 +231,7 @@ class PairMapping {
231
231
  return (p1.first==p2.first && p1.second == p2.second);
232
232
  }
233
233
  };
234
- typedef __gnu_cxx::hash_map<NodePair, Node*, hashf, equalf> PairMap;
234
+ typedef hash_map<NodePair, Node*, hashf, equalf> PairMap;
235
235
  PairMap pm;
236
236
 
237
237
  public:
@@ -256,8 +256,16 @@ class Transducer {
256
256
  Mem mem;
257
257
 
258
258
  typedef std::set<Label, Label::label_cmp> LabelSet;
259
- typedef __gnu_cxx::hash_map<Character, char*> SymbolMap;
259
+ typedef hash_map<Character, char*> SymbolMap;
260
260
 
261
+ void incr_vmark( void ) {
262
+ if (++vmark == 0) {
263
+ NodeHashSet nodes;
264
+ root.clear_visited( nodes );
265
+ fprintf(stderr,"clearing flags\n");
266
+ vmark = 1;
267
+ }
268
+ };
261
269
  void reverse_node( Node *old_node, Transducer *new_node );
262
270
  Label recode_label( Label, bool lswitch, bool recode, Alphabet& );
263
271
  Node *copy_nodes( Node *n, Transducer *a,
@@ -287,10 +295,6 @@ class Transducer {
287
295
 
288
296
  public:
289
297
  VType vmark;
290
- void incr_vmark( void ) {
291
- if (++vmark == 0)
292
- throw "Overflow of generation counter!";
293
- };
294
298
  Alphabet alphabet; // The set of all labels, i.e. character pairs
295
299
 
296
300
  Transducer( void ) : root(), mem()
@@ -308,7 +312,7 @@ class Transducer {
308
312
  const Node *root_node( void ) const { return &root; }; // returns the root node
309
313
  Node *new_node( void ); // memory alocation for a new node
310
314
  Arc *new_arc( Label l, Node *target ); // memory alocation for a new arc
311
- void add_string( char *s, bool extended=false );
315
+ void add_string( char *s, bool extended=false, Alphabet *a=NULL );
312
316
  void complete_alphabet( void );
313
317
  void minimise_alphabet( void );
314
318
  void prune( void ); // remove unnecessary arcs