ruby-sfst 0.4.3 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -0
- data/COPYING +280 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +54 -0
- data/README.md +1 -1
- data/Rakefile +9 -18
- data/bin/console +7 -0
- data/bin/setup +6 -0
- data/ext/sfst/alphabet.cc +879 -0
- data/ext/sfst/alphabet.h +302 -0
- data/ext/sfst/basic.cc +85 -0
- data/ext/{sfst_machine → sfst}/basic.h +7 -4
- data/ext/sfst/compact.cc +629 -0
- data/ext/sfst/compact.h +100 -0
- data/ext/sfst/determinise.cc +279 -0
- data/ext/{sfst_machine → sfst}/extconf.rb +2 -1
- data/ext/sfst/fst.cc +1150 -0
- data/ext/sfst/fst.h +374 -0
- data/ext/sfst/hopcroft.cc +681 -0
- data/ext/sfst/interface.cc +1921 -0
- data/ext/sfst/interface.h +171 -0
- data/ext/sfst/make-compact.cc +323 -0
- data/ext/{sfst_machine → sfst}/make-compact.h +15 -13
- data/ext/sfst/mem.h +80 -0
- data/ext/sfst/operators.cc +1273 -0
- data/ext/{sfst_machine → sfst}/sfst_machine.cc +89 -78
- data/ext/sfst/sgi.h +72 -0
- data/ext/sfst/utf8.cc +149 -0
- data/ext/{sfst_machine → sfst}/utf8.h +7 -4
- data/lib/sfst.rb +2 -1
- data/lib/sfst/version.rb +1 -1
- data/ruby-sfst.gemspec +23 -23
- metadata +107 -35
- data/ext/sfst_machine/alphabet.cc +0 -812
- data/ext/sfst_machine/alphabet.h +0 -273
- data/ext/sfst_machine/basic.cc +0 -84
- data/ext/sfst_machine/compact.cc +0 -616
- data/ext/sfst_machine/compact.h +0 -98
- data/ext/sfst_machine/determinise.cc +0 -303
- data/ext/sfst_machine/fst.cc +0 -1000
- data/ext/sfst_machine/fst.h +0 -369
- data/ext/sfst_machine/interface.cc +0 -1842
- data/ext/sfst_machine/interface.h +0 -93
- data/ext/sfst_machine/make-compact.cc +0 -327
- data/ext/sfst_machine/mem.h +0 -74
- data/ext/sfst_machine/operators.cc +0 -1131
- data/ext/sfst_machine/sgi.h +0 -44
- data/ext/sfst_machine/utf8.cc +0 -146
- data/test/test_sfst.fst +0 -3
- data/test/test_sfst.rb +0 -114
@@ -13,22 +13,24 @@
|
|
13
13
|
#include "fst.h"
|
14
14
|
#include "compact.h"
|
15
15
|
|
16
|
+
namespace SFST {
|
16
17
|
|
17
|
-
class MakeCompactTransducer : CompactTransducer {
|
18
|
+
class MakeCompactTransducer : CompactTransducer {
|
18
19
|
|
19
|
-
private:
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
20
|
+
private:
|
21
|
+
void count_arcs(Node *node, VType vmark);
|
22
|
+
void store_arcs(Node *node, VType vmark);
|
23
|
+
void store_finalp( FILE *file );
|
24
|
+
void store_first_arcs( FILE *file );
|
25
|
+
void store_target_nodes( FILE *file );
|
26
|
+
void store_labels( FILE *file );
|
26
27
|
|
27
|
-
public:
|
28
|
-
|
28
|
+
public:
|
29
|
+
MakeCompactTransducer( Transducer &a, Level sort=upper );
|
29
30
|
|
30
|
-
|
31
|
-
|
32
|
-
};
|
31
|
+
void sort( Level );
|
32
|
+
void store( FILE *file );
|
33
|
+
};
|
33
34
|
|
35
|
+
}
|
34
36
|
#endif
|
data/ext/sfst/mem.h
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
/*******************************************************************/
|
2
|
+
/* */
|
3
|
+
/* FILE mem.h */
|
4
|
+
/* MODULE mem */
|
5
|
+
/* PROGRAM SFST */
|
6
|
+
/* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
|
7
|
+
/* */
|
8
|
+
/* PURPOSE memory management functions */
|
9
|
+
/* */
|
10
|
+
/*******************************************************************/
|
11
|
+
|
12
|
+
#ifndef _MEM_H_
|
13
|
+
#define _MEM_H_
|
14
|
+
|
15
|
+
#include <stdlib.h>
|
16
|
+
#include <assert.h>
|
17
|
+
|
18
|
+
namespace SFST {
|
19
|
+
|
20
|
+
#define MEMBUFFER_SIZE 100000
|
21
|
+
|
22
|
+
|
23
|
+
/***************** class Mem *************************************/
|
24
|
+
|
25
|
+
class Mem {
|
26
|
+
|
27
|
+
private:
|
28
|
+
|
29
|
+
struct MemBuffer {
|
30
|
+
char buffer[MEMBUFFER_SIZE];
|
31
|
+
struct MemBuffer *next;
|
32
|
+
};
|
33
|
+
|
34
|
+
MemBuffer *first_buffer;
|
35
|
+
long pos;
|
36
|
+
void add_buffer() {
|
37
|
+
MemBuffer *mb=(MemBuffer*)malloc(sizeof(MemBuffer));
|
38
|
+
if (mb == NULL)
|
39
|
+
throw "Allocation of memory failed in Mem::add_buffer!";
|
40
|
+
mb->next = first_buffer;
|
41
|
+
first_buffer = mb;
|
42
|
+
pos = 0;
|
43
|
+
}
|
44
|
+
|
45
|
+
public:
|
46
|
+
Mem() { first_buffer = NULL; add_buffer(); }
|
47
|
+
~Mem() { clear(); }
|
48
|
+
|
49
|
+
void clear() {
|
50
|
+
while (first_buffer) {
|
51
|
+
MemBuffer *next = first_buffer->next;
|
52
|
+
free(first_buffer);
|
53
|
+
first_buffer = next;
|
54
|
+
}
|
55
|
+
pos = 0;
|
56
|
+
}
|
57
|
+
|
58
|
+
void *alloc( size_t n ) {
|
59
|
+
void *result;
|
60
|
+
|
61
|
+
/* do memory alignment to multiples of 4 */
|
62
|
+
if (n % 4)
|
63
|
+
n += 4 - (n % 4);
|
64
|
+
|
65
|
+
if (first_buffer == NULL || pos+n > MEMBUFFER_SIZE)
|
66
|
+
add_buffer();
|
67
|
+
if (pos+n > MEMBUFFER_SIZE)
|
68
|
+
throw "Allocation of memory block larger than MEMBUFFER_SIZE attempted!";
|
69
|
+
|
70
|
+
result = (void*)(first_buffer->buffer + pos);
|
71
|
+
pos += n;
|
72
|
+
return result;
|
73
|
+
}
|
74
|
+
|
75
|
+
//class MemError {};
|
76
|
+
|
77
|
+
};
|
78
|
+
|
79
|
+
}
|
80
|
+
#endif
|
@@ -0,0 +1,1273 @@
|
|
1
|
+
|
2
|
+
/*******************************************************************/
|
3
|
+
/* */
|
4
|
+
/* FILE operators.C */
|
5
|
+
/* MODULE operators */
|
6
|
+
/* PROGRAM SFST */
|
7
|
+
/* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
|
8
|
+
/* */
|
9
|
+
/*******************************************************************/
|
10
|
+
|
11
|
+
|
12
|
+
#include "fst.h"
|
13
|
+
|
14
|
+
using std::pair;
|
15
|
+
using std::cerr;
|
16
|
+
|
17
|
+
namespace SFST {
|
18
|
+
|
19
|
+
typedef map<Character, vector<Arc*> > Sym2Arcs;
|
20
|
+
|
21
|
+
// special data structures for the optimization of composition
|
22
|
+
// All transitions outgoing from the same node and having the same
|
23
|
+
// symbol on the upper (or lower) layer are stored in a hash table
|
24
|
+
// for quick retrieval
|
25
|
+
|
26
|
+
/***************** class FromTo *********************************/
|
27
|
+
|
28
|
+
class FromTo {
|
29
|
+
public:
|
30
|
+
Index from, to; // start and end of a range of transitions
|
31
|
+
Index size() { return to - from; }
|
32
|
+
};
|
33
|
+
|
34
|
+
/***************** class NodeSym ********************************/
|
35
|
+
|
36
|
+
class NodeSym {
|
37
|
+
// pair consisting of a node and a symbol
|
38
|
+
public:
|
39
|
+
Index nodeID;
|
40
|
+
Character symbol;
|
41
|
+
NodeSym( Index n, Character s ) { nodeID = n; symbol = s; }
|
42
|
+
};
|
43
|
+
|
44
|
+
|
45
|
+
/***************** class CharNode2Trans **************************/
|
46
|
+
|
47
|
+
class CharNode2Trans {
|
48
|
+
|
49
|
+
struct hashf {
|
50
|
+
size_t operator()(const NodeSym &ns) const {
|
51
|
+
return ns.nodeID ^ ns.symbol;
|
52
|
+
}
|
53
|
+
};
|
54
|
+
|
55
|
+
struct equalf {
|
56
|
+
int operator()(const NodeSym &ns1, const NodeSym &ns2) const {
|
57
|
+
return (ns1.nodeID == ns2.nodeID && ns1.symbol == ns2.symbol);
|
58
|
+
}
|
59
|
+
};
|
60
|
+
|
61
|
+
typedef hash_map<NodeSym, FromTo, hashf, equalf > NodeSym2Range;
|
62
|
+
|
63
|
+
// data structure for storing an index from node + symbol to a list
|
64
|
+
// of transitions with that symbol on the upper/lower layer
|
65
|
+
Transducer &transducer;
|
66
|
+
vector<Index> node_size;
|
67
|
+
vector<Arc*> cs_transitions; // transitions for a certain character + state
|
68
|
+
NodeSym2Range trange;
|
69
|
+
|
70
|
+
public:
|
71
|
+
|
72
|
+
CharNode2Trans(Transducer &t);
|
73
|
+
size_t hash_transitions( Node *node, bool upper );
|
74
|
+
|
75
|
+
class iterator {
|
76
|
+
CharNode2Trans &c2t;
|
77
|
+
Index current, end;
|
78
|
+
public:
|
79
|
+
iterator( CharNode2Trans &table, Index nodeID, Character symbol )
|
80
|
+
: c2t(table)
|
81
|
+
{
|
82
|
+
FromTo range=c2t.trange[NodeSym(nodeID, symbol)];
|
83
|
+
current = range.from;
|
84
|
+
end = range.to;
|
85
|
+
}
|
86
|
+
void operator++( int ) { current++; }
|
87
|
+
Arc *operator*() { return c2t.cs_transitions[current]; }
|
88
|
+
bool finished() { return current == end; }
|
89
|
+
Index size() { return end-current; };
|
90
|
+
};
|
91
|
+
};
|
92
|
+
|
93
|
+
|
94
|
+
static void compose_nodes( Node*, Node*, Node*, Transducer*, PairMapping&,
|
95
|
+
CharNode2Trans&, CharNode2Trans& );
|
96
|
+
|
97
|
+
|
98
|
+
/*******************************************************************/
|
99
|
+
/* */
|
100
|
+
/* CharNode2Trans::CharNode2Trans */
|
101
|
+
/* */
|
102
|
+
/*******************************************************************/
|
103
|
+
|
104
|
+
CharNode2Trans::CharNode2Trans(Transducer &t): transducer(t)
|
105
|
+
|
106
|
+
{
|
107
|
+
pair<Index,Index> p = transducer.nodeindexing();
|
108
|
+
Index node_count = p.first;
|
109
|
+
Index transition_count = p.second;
|
110
|
+
node_size.resize(node_count, undef);
|
111
|
+
cs_transitions.reserve(transition_count);
|
112
|
+
}
|
113
|
+
|
114
|
+
|
115
|
+
/*******************************************************************/
|
116
|
+
/* */
|
117
|
+
/* CharNode2Trans::hash_transitions */
|
118
|
+
/* */
|
119
|
+
/*******************************************************************/
|
120
|
+
|
121
|
+
size_t CharNode2Trans::hash_transitions( Node *node, bool upper )
|
122
|
+
|
123
|
+
{
|
124
|
+
size_t n = node_size[node->index];
|
125
|
+
if (n != undef)
|
126
|
+
return n;
|
127
|
+
|
128
|
+
Sym2Arcs sym2arcs;
|
129
|
+
|
130
|
+
for( ArcsIter p(node->arcs()); p; p++ ) {
|
131
|
+
Arc *arc=p;
|
132
|
+
if (upper)
|
133
|
+
sym2arcs[arc->label().upper_char()].push_back(arc);
|
134
|
+
else
|
135
|
+
sym2arcs[arc->label().lower_char()].push_back(arc);
|
136
|
+
}
|
137
|
+
|
138
|
+
for( Sym2Arcs::iterator it=sym2arcs.begin(); it!=sym2arcs.end(); it++ ) {
|
139
|
+
Character sym = it->first;
|
140
|
+
vector<Arc*> &arc = it->second;
|
141
|
+
FromTo range;
|
142
|
+
range.from = (Index)cs_transitions.size();
|
143
|
+
for( size_t i=0; i<arc.size(); i++ )
|
144
|
+
cs_transitions.push_back( arc[i] );
|
145
|
+
range.to = (Index)cs_transitions.size();
|
146
|
+
trange[NodeSym(node->index, sym)] = range;
|
147
|
+
}
|
148
|
+
n = sym2arcs.size();
|
149
|
+
node_size[node->index] = (Index)n;
|
150
|
+
return n;
|
151
|
+
}
|
152
|
+
|
153
|
+
|
154
|
+
/*******************************************************************/
|
155
|
+
/* */
|
156
|
+
/* check_cyclicity */
|
157
|
+
/* */
|
158
|
+
/*******************************************************************/
|
159
|
+
|
160
|
+
static bool check_cyclicity( Node *node, NodeHashSet &visited,
|
161
|
+
const Alphabet &alphabet)
|
162
|
+
{
|
163
|
+
|
164
|
+
if (!visited.insert(node).second)
|
165
|
+
return true; // node was visited before
|
166
|
+
|
167
|
+
for( ArcsIter p(node->arcs()); p; p++ ) {
|
168
|
+
Arc *arc=p;
|
169
|
+
if (arc->label().upper_is_epsilon())
|
170
|
+
if (check_cyclicity(arc->target_node(), visited, alphabet)) {
|
171
|
+
cerr << alphabet.write_label(arc->label()) << "\n";
|
172
|
+
return true;
|
173
|
+
}
|
174
|
+
}
|
175
|
+
visited.erase(node);
|
176
|
+
return false;
|
177
|
+
}
|
178
|
+
|
179
|
+
|
180
|
+
/*******************************************************************/
|
181
|
+
/* */
|
182
|
+
/* Transducer::infinitely_ambiguous_node */
|
183
|
+
/* */
|
184
|
+
/*******************************************************************/
|
185
|
+
|
186
|
+
bool Transducer::infinitely_ambiguous_node( Node *node )
|
187
|
+
|
188
|
+
{
|
189
|
+
if (!node->was_visited( vmark )) {
|
190
|
+
NodeHashSet visited;
|
191
|
+
if (check_cyclicity(node, visited, alphabet))
|
192
|
+
return true;
|
193
|
+
|
194
|
+
// iterate over all outgoing arcs
|
195
|
+
for( ArcsIter p(node->arcs()); p; p++ ) {
|
196
|
+
Arc *arc=p;
|
197
|
+
if (infinitely_ambiguous_node( arc->target_node() ))
|
198
|
+
return true;
|
199
|
+
}
|
200
|
+
}
|
201
|
+
return false;
|
202
|
+
}
|
203
|
+
|
204
|
+
|
205
|
+
/*******************************************************************/
|
206
|
+
/* */
|
207
|
+
/* Transducer::is_infinitely_ambiguous */
|
208
|
+
/* */
|
209
|
+
/*******************************************************************/
|
210
|
+
|
211
|
+
bool Transducer::is_infinitely_ambiguous()
|
212
|
+
|
213
|
+
{
|
214
|
+
incr_vmark();
|
215
|
+
return infinitely_ambiguous_node(root_node());
|
216
|
+
}
|
217
|
+
|
218
|
+
|
219
|
+
/*******************************************************************/
|
220
|
+
/* */
|
221
|
+
/* Transducer::is_cyclic_node */
|
222
|
+
/* */
|
223
|
+
/*******************************************************************/
|
224
|
+
|
225
|
+
bool Transducer::is_cyclic_node( Node *node, NodeHashSet &previous )
|
226
|
+
|
227
|
+
{
|
228
|
+
if (!node->was_visited( vmark )) {
|
229
|
+
NodeHashSet visited;
|
230
|
+
|
231
|
+
NodeHashSet::iterator it=previous.insert(node).first;
|
232
|
+
|
233
|
+
// iterate over all outgoing arcs
|
234
|
+
for( ArcsIter p(node->arcs()); p; p++ ) {
|
235
|
+
Arc *arc=p;
|
236
|
+
if (previous.find(arc->target_node()) != previous.end() ||
|
237
|
+
is_cyclic_node( arc->target_node(), previous ))
|
238
|
+
return true;
|
239
|
+
}
|
240
|
+
|
241
|
+
previous.erase(it);
|
242
|
+
}
|
243
|
+
return false;
|
244
|
+
}
|
245
|
+
|
246
|
+
|
247
|
+
/*******************************************************************/
|
248
|
+
/* */
|
249
|
+
/* Transducer::is_cyclic */
|
250
|
+
/* */
|
251
|
+
/*******************************************************************/
|
252
|
+
|
253
|
+
bool Transducer::is_cyclic()
|
254
|
+
|
255
|
+
{
|
256
|
+
incr_vmark();
|
257
|
+
NodeHashSet previous;
|
258
|
+
return is_cyclic_node(root_node(), previous);
|
259
|
+
}
|
260
|
+
|
261
|
+
|
262
|
+
|
263
|
+
/*******************************************************************/
|
264
|
+
/* */
|
265
|
+
/* Transducer::is_automaton_node */
|
266
|
+
/* */
|
267
|
+
/*******************************************************************/
|
268
|
+
|
269
|
+
bool Transducer::is_automaton_node( Node *node )
|
270
|
+
|
271
|
+
{
|
272
|
+
if (!node->was_visited( vmark )) {
|
273
|
+
// iterate over all outgoing arcs
|
274
|
+
for( ArcsIter p(node->arcs()); p; p++ ) {
|
275
|
+
Arc *arc=p;
|
276
|
+
Label l=arc->label();
|
277
|
+
if (l.upper_char() != l.lower_char())
|
278
|
+
return false;
|
279
|
+
if (!is_automaton_node( arc->target_node()))
|
280
|
+
return false;
|
281
|
+
}
|
282
|
+
}
|
283
|
+
return true;
|
284
|
+
}
|
285
|
+
|
286
|
+
|
287
|
+
/*******************************************************************/
|
288
|
+
/* */
|
289
|
+
/* Transducer::is_automaton */
|
290
|
+
/* */
|
291
|
+
/*******************************************************************/
|
292
|
+
|
293
|
+
bool Transducer::is_automaton()
|
294
|
+
|
295
|
+
{
|
296
|
+
incr_vmark();
|
297
|
+
return is_automaton_node(root_node());
|
298
|
+
}
|
299
|
+
|
300
|
+
|
301
|
+
/*******************************************************************/
|
302
|
+
/* */
|
303
|
+
/* Transducer::is_empty */
|
304
|
+
/* */
|
305
|
+
/*******************************************************************/
|
306
|
+
|
307
|
+
bool Transducer::is_empty()
|
308
|
+
|
309
|
+
{
|
310
|
+
if (!minimised) {
|
311
|
+
Transducer *tmp=&minimise();
|
312
|
+
bool result=tmp->is_empty();
|
313
|
+
delete tmp;
|
314
|
+
return result;
|
315
|
+
}
|
316
|
+
if (root_node()->is_final())
|
317
|
+
return false;
|
318
|
+
return root_node()->arcs()->is_empty();
|
319
|
+
}
|
320
|
+
|
321
|
+
|
322
|
+
/*******************************************************************/
|
323
|
+
/* */
|
324
|
+
/* Transducer::generates_empty_string */
|
325
|
+
/* */
|
326
|
+
/*******************************************************************/
|
327
|
+
|
328
|
+
bool Transducer::generates_empty_string()
|
329
|
+
|
330
|
+
{
|
331
|
+
if (!minimised) {
|
332
|
+
Transducer *tmp=&minimise();
|
333
|
+
bool result=tmp->root_node()->is_final();
|
334
|
+
delete tmp;
|
335
|
+
return result;
|
336
|
+
}
|
337
|
+
return root_node()->is_final();
|
338
|
+
}
|
339
|
+
|
340
|
+
|
341
|
+
/*******************************************************************/
|
342
|
+
/* */
|
343
|
+
/* Transducer::reverse_node */
|
344
|
+
/* */
|
345
|
+
/*******************************************************************/
|
346
|
+
|
347
|
+
void Transducer::reverse_node( Node *node, Transducer *na )
|
348
|
+
|
349
|
+
{
|
350
|
+
if (!node->was_visited( vmark )) {
|
351
|
+
|
352
|
+
// create a new node
|
353
|
+
node->set_forward( na->new_node() );
|
354
|
+
|
355
|
+
if (node->is_final())
|
356
|
+
// add epsilon transition from new root to this node
|
357
|
+
na->root_node()->add_arc( Label(), node->forward(), na );
|
358
|
+
|
359
|
+
// iterate over all outgoing arcs
|
360
|
+
for( ArcsIter p(node->arcs()); p; p++ ) {
|
361
|
+
Arc *arc=p;
|
362
|
+
|
363
|
+
// reverse the subgraph headed by the target node
|
364
|
+
reverse_node( arc->target_node(), na );
|
365
|
+
Node *n = arc->target_node()->forward();
|
366
|
+
|
367
|
+
// create the reverse arc
|
368
|
+
n->add_arc( arc->label(), node->forward(), na );
|
369
|
+
}
|
370
|
+
}
|
371
|
+
}
|
372
|
+
|
373
|
+
|
374
|
+
/*******************************************************************/
|
375
|
+
/* */
|
376
|
+
/* Transducer::reverse */
|
377
|
+
/* */
|
378
|
+
/*******************************************************************/
|
379
|
+
|
380
|
+
Transducer &Transducer::reverse( bool copy_alphabet )
|
381
|
+
|
382
|
+
{
|
383
|
+
Transducer *na = new Transducer();
|
384
|
+
if (copy_alphabet)
|
385
|
+
na->alphabet.copy(alphabet);
|
386
|
+
|
387
|
+
incr_vmark();
|
388
|
+
reverse_node(root_node(), na);
|
389
|
+
root_node()->forward()->set_final(1);
|
390
|
+
return *na;
|
391
|
+
}
|
392
|
+
|
393
|
+
|
394
|
+
/*******************************************************************/
|
395
|
+
/* */
|
396
|
+
/* Transducer::recode_label */
|
397
|
+
/* */
|
398
|
+
/*******************************************************************/
|
399
|
+
|
400
|
+
Label Transducer::recode_label( Label l, bool lswitch, bool recode,
|
401
|
+
Alphabet &al )
|
402
|
+
{
|
403
|
+
if (lswitch)
|
404
|
+
l = Label(l.upper_char(), l.lower_char());
|
405
|
+
|
406
|
+
if (recode) {
|
407
|
+
Character lc = al.add_symbol(alphabet.code2symbol(l.lower_char()));
|
408
|
+
Character uc = al.add_symbol(alphabet.code2symbol(l.upper_char()));
|
409
|
+
l = Label(lc, uc);
|
410
|
+
al.insert(l);
|
411
|
+
}
|
412
|
+
|
413
|
+
return l;
|
414
|
+
}
|
415
|
+
|
416
|
+
|
417
|
+
/*******************************************************************/
|
418
|
+
/* */
|
419
|
+
/* Transducer::copy_nodes */
|
420
|
+
/* */
|
421
|
+
/*******************************************************************/
|
422
|
+
|
423
|
+
Node *Transducer::copy_nodes( Node *node, Transducer *a,
|
424
|
+
bool lswitch, bool recode )
|
425
|
+
{
|
426
|
+
if (!node->was_visited(vmark)) {
|
427
|
+
|
428
|
+
node->set_forward(a->new_node());
|
429
|
+
|
430
|
+
// define final nodes
|
431
|
+
if (node->is_final())
|
432
|
+
node->forward()->set_final(1);
|
433
|
+
|
434
|
+
// iterate over all outgoing arcs of node
|
435
|
+
for( ArcsIter p(node->arcs()); p; p++ ) {
|
436
|
+
Arc *arc=p;
|
437
|
+
Node *tn = copy_nodes( arc->target_node(), a, lswitch, recode );
|
438
|
+
|
439
|
+
// Add a link to the new node
|
440
|
+
Label l=recode_label(arc->label(), lswitch, recode, a->alphabet);
|
441
|
+
node->forward()->add_arc( l, tn, a );
|
442
|
+
}
|
443
|
+
}
|
444
|
+
|
445
|
+
return node->forward();
|
446
|
+
}
|
447
|
+
|
448
|
+
|
449
|
+
/*******************************************************************/
|
450
|
+
/* */
|
451
|
+
/* Transducer::copy */
|
452
|
+
/* */
|
453
|
+
/*******************************************************************/
|
454
|
+
|
455
|
+
Transducer &Transducer::copy( bool lswitch, const Alphabet *al )
|
456
|
+
|
457
|
+
{
|
458
|
+
bool recode = false;
|
459
|
+
Transducer *na = new Transducer();
|
460
|
+
if (al == NULL)
|
461
|
+
al = &alphabet;
|
462
|
+
else
|
463
|
+
recode = true;
|
464
|
+
|
465
|
+
na->alphabet.utf8 = al->utf8;
|
466
|
+
if (lswitch) {
|
467
|
+
na->alphabet.insert_symbols(*al);
|
468
|
+
for( Alphabet::iterator it=al->begin(); it!=al->end(); it++ ) {
|
469
|
+
Character lc=it->lower_char();
|
470
|
+
Character uc=it->upper_char();
|
471
|
+
na->alphabet.insert(Label(uc,lc));
|
472
|
+
}
|
473
|
+
}
|
474
|
+
else
|
475
|
+
na->alphabet.copy(*al);
|
476
|
+
|
477
|
+
na->deterministic = deterministic;
|
478
|
+
na->minimised = minimised;
|
479
|
+
na->root_node()->set_final(root_node()->is_final());
|
480
|
+
incr_vmark();
|
481
|
+
|
482
|
+
root_node()->set_forward(na->root_node());
|
483
|
+
root_node()->was_visited(vmark);
|
484
|
+
|
485
|
+
for( ArcsIter p(root_node()->arcs()); p; p++ ) {
|
486
|
+
Arc *arc=p;
|
487
|
+
Node *target_node=copy_nodes(arc->target_node(), na, lswitch, recode);
|
488
|
+
Label l = recode_label(arc->label(), lswitch, recode, na->alphabet);
|
489
|
+
na->root_node()->add_arc( l, target_node, na);
|
490
|
+
}
|
491
|
+
|
492
|
+
return *na;
|
493
|
+
}
|
494
|
+
|
495
|
+
|
496
|
+
/*******************************************************************/
|
497
|
+
/* */
|
498
|
+
/* Transducer::operator | */
|
499
|
+
/* */
|
500
|
+
/*******************************************************************/
|
501
|
+
|
502
|
+
Transducer &Transducer::operator|( Transducer &a )
|
503
|
+
|
504
|
+
{
|
505
|
+
Transducer *na = new Transducer();
|
506
|
+
na->alphabet.copy(alphabet);
|
507
|
+
na->alphabet.copy(a.alphabet);
|
508
|
+
|
509
|
+
incr_vmark();
|
510
|
+
na->root_node()->add_arc( Label(), copy_nodes(root_node(), na), na);
|
511
|
+
a.incr_vmark();
|
512
|
+
na->root_node()->add_arc( Label(), a.copy_nodes(a.root_node(), na), na);
|
513
|
+
|
514
|
+
return *na;
|
515
|
+
}
|
516
|
+
|
517
|
+
|
518
|
+
/*******************************************************************/
|
519
|
+
/* */
|
520
|
+
/* Transducer::rec_cat_nodes */
|
521
|
+
/* */
|
522
|
+
/*******************************************************************/
|
523
|
+
|
524
|
+
void Transducer::rec_cat_nodes( Node *node, Node *node2 )
|
525
|
+
|
526
|
+
{
|
527
|
+
if (!node->was_visited( vmark )) {
|
528
|
+
|
529
|
+
// iterate over all outgoing arcs of node
|
530
|
+
for( ArcsIter p(node->arcs()); p; p++ ) {
|
531
|
+
Arc *arc=p;
|
532
|
+
rec_cat_nodes( arc->target_node(), node2 );
|
533
|
+
}
|
534
|
+
|
535
|
+
if (node->is_final()) {
|
536
|
+
// link this node to node2
|
537
|
+
node->set_final(0);
|
538
|
+
node->add_arc( Label(), node2, this );
|
539
|
+
}
|
540
|
+
}
|
541
|
+
}
|
542
|
+
|
543
|
+
|
544
|
+
/*******************************************************************/
|
545
|
+
/* */
|
546
|
+
/* Transducer::operator+ */
|
547
|
+
/* */
|
548
|
+
/*******************************************************************/
|
549
|
+
|
550
|
+
Transducer &Transducer::operator+( Transducer &a )
|
551
|
+
|
552
|
+
{
|
553
|
+
Transducer *na = new Transducer();
|
554
|
+
na->alphabet.copy(alphabet);
|
555
|
+
na->alphabet.copy(a.alphabet);
|
556
|
+
|
557
|
+
// copy Transducer1 to the new Transducer
|
558
|
+
incr_vmark();
|
559
|
+
Node *node=copy_nodes(root_node(), na);
|
560
|
+
na->root_node()->add_arc( Label(), node, na);
|
561
|
+
|
562
|
+
// copy Transducer2 to the new Transducer
|
563
|
+
a.incr_vmark();
|
564
|
+
node=a.copy_nodes(a.root_node(), na);
|
565
|
+
|
566
|
+
// catenate the two automata
|
567
|
+
na->incr_vmark();
|
568
|
+
na->rec_cat_nodes(na->root_node(), node);
|
569
|
+
|
570
|
+
return *na;
|
571
|
+
}
|
572
|
+
|
573
|
+
|
574
|
+
/*******************************************************************/
|
575
|
+
/* */
|
576
|
+
/* Transducer::kleene_star */
|
577
|
+
/* (HFST addition: now works for cyclic transducers as well) */
|
578
|
+
/* */
|
579
|
+
/*******************************************************************/
|
580
|
+
|
581
|
+
Transducer &Transducer::kleene_star()
|
582
|
+
|
583
|
+
{
|
584
|
+
Transducer *na = ©();
|
585
|
+
na->alphabet.copy(alphabet);
|
586
|
+
|
587
|
+
// HFST addition
|
588
|
+
Transducer eps;
|
589
|
+
eps.root_node()->set_final(1);
|
590
|
+
Transducer *tmp = &(eps + *na);
|
591
|
+
delete na;
|
592
|
+
na = tmp;
|
593
|
+
|
594
|
+
// link back to the start node
|
595
|
+
na->incr_vmark();
|
596
|
+
na->rec_cat_nodes(na->root_node(), na->root_node());
|
597
|
+
|
598
|
+
na->root_node()->set_final(1); // root node is already final
|
599
|
+
na->deterministic = na->minimised = false;
|
600
|
+
|
601
|
+
return *na;
|
602
|
+
}
|
603
|
+
|
604
|
+
|
605
|
+
/*******************************************************************/
|
606
|
+
/* */
|
607
|
+
/* Transducer::negate_nodes */
|
608
|
+
/* */
|
609
|
+
/*******************************************************************/
|
610
|
+
|
611
|
+
void Transducer::negate_nodes( Node *node, Node *accept )
|
612
|
+
|
613
|
+
{
|
614
|
+
if (!node->was_visited(vmark)) {
|
615
|
+
node->set_final( !node->is_final() );
|
616
|
+
|
617
|
+
for( ArcsIter p(node->arcs()); p; p++ ) {
|
618
|
+
Arc *arc=p;
|
619
|
+
negate_nodes( arc->target_node(), accept );
|
620
|
+
}
|
621
|
+
|
622
|
+
for( Alphabet::iterator it=alphabet.begin(); it!=alphabet.end(); it++)
|
623
|
+
if (!node->target_node(*it))
|
624
|
+
node->add_arc( *it, accept, this );
|
625
|
+
}
|
626
|
+
}
|
627
|
+
|
628
|
+
|
629
|
+
/*******************************************************************/
|
630
|
+
/* */
|
631
|
+
/* Transducer::operator! */
|
632
|
+
/* */
|
633
|
+
/*******************************************************************/
|
634
|
+
|
635
|
+
Transducer &Transducer::operator!()
|
636
|
+
|
637
|
+
{
|
638
|
+
Transducer *na;
|
639
|
+
|
640
|
+
if (alphabet.size() == 0) {
|
641
|
+
// throw "Negation of Transducer with undefined alphabet attempted!";
|
642
|
+
fprintf(stderr, "Warning: undefined alphabet\n");
|
643
|
+
na = new Transducer();
|
644
|
+
return *na;
|
645
|
+
}
|
646
|
+
|
647
|
+
if (minimised)
|
648
|
+
na = ©();
|
649
|
+
else
|
650
|
+
na = &minimise();
|
651
|
+
na->alphabet.copy(alphabet);
|
652
|
+
|
653
|
+
Node *accept_node=na->new_node();
|
654
|
+
accept_node->set_final(1);
|
655
|
+
for( Alphabet::iterator it=alphabet.begin(); it!=alphabet.end(); it++)
|
656
|
+
accept_node->add_arc( *it, accept_node, na );
|
657
|
+
|
658
|
+
na->incr_vmark();
|
659
|
+
na->negate_nodes( na->root_node(), accept_node );
|
660
|
+
na->minimised = na->deterministic = false;
|
661
|
+
|
662
|
+
return *na;
|
663
|
+
}
|
664
|
+
|
665
|
+
|
666
|
+
/*******************************************************************/
|
667
|
+
/* */
|
668
|
+
/* conjoin_nodes */
|
669
|
+
/* */
|
670
|
+
/*******************************************************************/
|
671
|
+
|
672
|
+
static void conjoin_nodes( Node *n1, Node *n2, Node *node,
|
673
|
+
Transducer *a, PairMapping &map )
|
674
|
+
|
675
|
+
{
|
676
|
+
// if both input nodes are final, so is the new one
|
677
|
+
if (n1->is_final() && n2->is_final())
|
678
|
+
node->set_final(1);
|
679
|
+
|
680
|
+
// iterate over all outgoing arcs of the first node
|
681
|
+
for( ArcsIter i(n1->arcs()); i; i++ ) {
|
682
|
+
Arc *arc=i;
|
683
|
+
Label l=arc->label();
|
684
|
+
Node *t1 = arc->target_node();
|
685
|
+
Node *t2 = n2->target_node(l);
|
686
|
+
|
687
|
+
// Does the second node have an outgoing arc with the same label?
|
688
|
+
if (t2) {
|
689
|
+
// Check whether this node pair has been encountered before
|
690
|
+
PairMapping::iterator it=map.find(t1, t2);
|
691
|
+
|
692
|
+
if (it == map.end()) {
|
693
|
+
// new node pair
|
694
|
+
// create a new node in the conjunction Transducer
|
695
|
+
Node *target_node = a->new_node();
|
696
|
+
// map the target node pair to the new node
|
697
|
+
map[pair<Node*,Node*>(t1,t2)] = target_node;
|
698
|
+
// add an arc to the new node
|
699
|
+
node->add_arc( l, target_node, a );
|
700
|
+
// recursion
|
701
|
+
conjoin_nodes( t1, t2, target_node, a, map );
|
702
|
+
}
|
703
|
+
else {
|
704
|
+
// add an arc to the already existing target node
|
705
|
+
node->add_arc( l, it->second, a );
|
706
|
+
}
|
707
|
+
}
|
708
|
+
}
|
709
|
+
}
|
710
|
+
|
711
|
+
|
712
|
+
/*******************************************************************/
|
713
|
+
/* */
|
714
|
+
/* Transducer::operator & */
|
715
|
+
/* */
|
716
|
+
/*******************************************************************/
|
717
|
+
|
718
|
+
Transducer &Transducer::operator&( Transducer &a )
|
719
|
+
|
720
|
+
{
|
721
|
+
Transducer *tmp1=NULL;
|
722
|
+
Transducer *tmp2=NULL;
|
723
|
+
Node *r1, *r2;
|
724
|
+
|
725
|
+
if (deterministic)
|
726
|
+
r1 = root_node();
|
727
|
+
else {
|
728
|
+
tmp1 = &determinise();
|
729
|
+
r1 = tmp1->root_node();
|
730
|
+
}
|
731
|
+
|
732
|
+
if (a.deterministic)
|
733
|
+
r2 = a.root_node();
|
734
|
+
else {
|
735
|
+
tmp2 = &a.determinise();
|
736
|
+
r2 = tmp2->root_node();
|
737
|
+
}
|
738
|
+
|
739
|
+
PairMapping map;
|
740
|
+
|
741
|
+
Transducer *na = new Transducer();
|
742
|
+
na->alphabet.copy(alphabet);
|
743
|
+
na->alphabet.copy(a.alphabet);
|
744
|
+
|
745
|
+
// map the two root nodes to the new root node
|
746
|
+
map[pair<Node*,Node*>(r1, r2)] = na->root_node();
|
747
|
+
|
748
|
+
// recursively conjoin the two automata
|
749
|
+
conjoin_nodes( r1, r2, na->root_node(), na, map);
|
750
|
+
|
751
|
+
na->deterministic = 1;
|
752
|
+
delete tmp1;
|
753
|
+
delete tmp2;
|
754
|
+
|
755
|
+
return *na;
|
756
|
+
}
|
757
|
+
|
758
|
+
|
759
|
+
/*******************************************************************/
|
760
|
+
/* */
|
761
|
+
/* add_transition */
|
762
|
+
/* */
|
763
|
+
/*******************************************************************/
|
764
|
+
|
765
|
+
static void add_transition( Label l, Node *n1, Node *n2, Node *node,
|
766
|
+
Transducer *a, PairMapping &map,
|
767
|
+
CharNode2Trans &cn2trans1,
|
768
|
+
CharNode2Trans &cn2trans2 )
|
769
|
+
|
770
|
+
{
|
771
|
+
// fprintf(stderr,"transition from %u to %u with label %s\n",
|
772
|
+
// n1->index, n2->index, a->alphabet.write_label(l));
|
773
|
+
|
774
|
+
// Check whether this node pair has been encountered before
|
775
|
+
PairMapping::iterator it=map.find(n1, n2);
|
776
|
+
|
777
|
+
if (it != map.end()) {
|
778
|
+
// add an arc to the already existing target node
|
779
|
+
node->add_arc( l, it->second, a );
|
780
|
+
return;
|
781
|
+
}
|
782
|
+
|
783
|
+
// create a new node in the composed Transducer
|
784
|
+
Node *target_node = a->new_node();
|
785
|
+
|
786
|
+
// map the target node pair to the new node
|
787
|
+
map[pair<Node*,Node*>(n1,n2)] = target_node;
|
788
|
+
|
789
|
+
// add an arc to the new node
|
790
|
+
node->add_arc( l, target_node, a );
|
791
|
+
|
792
|
+
// recursion
|
793
|
+
compose_nodes( n1, n2, target_node, a, map, cn2trans1, cn2trans2 );
|
794
|
+
}
|
795
|
+
|
796
|
+
|
797
|
+
/*******************************************************************/
|
798
|
+
/* */
|
799
|
+
/* compose_nodes */
|
800
|
+
/* */
|
801
|
+
/*******************************************************************/
|
802
|
+
|
803
|
+
static void compose_nodes( Node *n1, Node *n2, Node *node, Transducer *a,
|
804
|
+
PairMapping &map, CharNode2Trans &cn2trans1,
|
805
|
+
CharNode2Trans &cn2trans2 )
|
806
|
+
{
|
807
|
+
// fprintf(stderr,"A%u || B%u\n",n1->index,n2->index);
|
808
|
+
|
809
|
+
// index upper character of first transducer
|
810
|
+
size_t size1 = cn2trans1.hash_transitions( n1, true );
|
811
|
+
// index lower character of second transducer
|
812
|
+
size_t size2 = cn2trans2.hash_transitions( n2, false );
|
813
|
+
|
814
|
+
// use the hashing of the transducer whose node is larger
|
815
|
+
bool hash2 = (size1 <= size2);
|
816
|
+
|
817
|
+
// if both input nodes are final, so is the new one
|
818
|
+
if (n1->is_final() && n2->is_final())
|
819
|
+
node->set_final(1);
|
820
|
+
|
821
|
+
if (hash2) {
|
822
|
+
// iterate over all outgoing arcs of the first node
|
823
|
+
for( ArcsIter i(n1->arcs()); i; i++ ) {
|
824
|
+
Arc *arc1=i;
|
825
|
+
Node *t1 = arc1->target_node();
|
826
|
+
Label l1=arc1->label();
|
827
|
+
Character uc1=l1.upper_char();
|
828
|
+
Character lc1=l1.lower_char();
|
829
|
+
|
830
|
+
if (uc1 == Label::epsilon)
|
831
|
+
add_transition( l1, t1, n2, node, a, map, cn2trans1, cn2trans2 );
|
832
|
+
|
833
|
+
else {
|
834
|
+
// iterate over the matching outgoing arcs of the second node
|
835
|
+
for( CharNode2Trans::iterator it(cn2trans2, n2->index, uc1 );
|
836
|
+
!it.finished(); it++ )
|
837
|
+
{
|
838
|
+
Arc *arc2 = *it;
|
839
|
+
Node *t2 = arc2->target_node();
|
840
|
+
Label l2=arc2->label();
|
841
|
+
assert(uc1 == l2.lower_char());
|
842
|
+
Character uc2=l2.upper_char();
|
843
|
+
|
844
|
+
add_transition( Label(lc1,uc2), t1, t2, node, a, map,
|
845
|
+
cn2trans1, cn2trans2 );
|
846
|
+
}
|
847
|
+
}
|
848
|
+
}
|
849
|
+
|
850
|
+
// epsilon input characters of the second Transducer
|
851
|
+
for( CharNode2Trans::iterator it(cn2trans2, n2->index, Label::epsilon );
|
852
|
+
!it.finished(); it++ )
|
853
|
+
{
|
854
|
+
Arc *arc2 = *it;
|
855
|
+
Node *t2 = arc2->target_node();
|
856
|
+
Label l=arc2->label();
|
857
|
+
assert(l.lower_char() == Label::epsilon);
|
858
|
+
add_transition( l, n1, t2, node, a, map, cn2trans1, cn2trans2 );
|
859
|
+
}
|
860
|
+
}
|
861
|
+
|
862
|
+
else { /* !hash2 */
|
863
|
+
// iterate over all outgoing arcs of the second node
|
864
|
+
for( ArcsIter i(n2->arcs()); i; i++ ) {
|
865
|
+
Arc *arc2=i;
|
866
|
+
Node *t2 = arc2->target_node();
|
867
|
+
Label l2=arc2->label();
|
868
|
+
Character uc2=l2.upper_char();
|
869
|
+
Character lc2=l2.lower_char();
|
870
|
+
|
871
|
+
if (lc2 == Label::epsilon)
|
872
|
+
add_transition( l2, n1, t2, node, a, map, cn2trans1, cn2trans2 );
|
873
|
+
|
874
|
+
else {
|
875
|
+
// iterate over the matching outgoing arcs of the first node
|
876
|
+
for( CharNode2Trans::iterator it(cn2trans1, n1->index, lc2 );
|
877
|
+
!it.finished(); it++ )
|
878
|
+
{
|
879
|
+
Arc *arc1 = *it;
|
880
|
+
Node *t1 = arc1->target_node();
|
881
|
+
Label l1=arc1->label();
|
882
|
+
assert(l1.upper_char() == lc2);
|
883
|
+
Character lc1=l1.lower_char();
|
884
|
+
|
885
|
+
add_transition( Label(lc1,uc2), t1, t2, node, a, map,
|
886
|
+
cn2trans1, cn2trans2 );
|
887
|
+
}
|
888
|
+
}
|
889
|
+
}
|
890
|
+
|
891
|
+
// epsilon output characters of the first Transducer
|
892
|
+
for( CharNode2Trans::iterator it(cn2trans1, n1->index, Label::epsilon );
|
893
|
+
!it.finished(); it++ )
|
894
|
+
{
|
895
|
+
Arc *arc1 = *it;
|
896
|
+
Node *t1 = arc1->target_node();
|
897
|
+
Label l=arc1->label();
|
898
|
+
assert(l.upper_char() == Label::epsilon);
|
899
|
+
add_transition( l, t1, n2, node, a, map, cn2trans1, cn2trans2 );
|
900
|
+
}
|
901
|
+
}
|
902
|
+
}
|
903
|
+
|
904
|
+
|
905
|
+
/*******************************************************************/
|
906
|
+
/* */
|
907
|
+
/* Transducer::operator || */
|
908
|
+
/* */
|
909
|
+
/*******************************************************************/
|
910
|
+
|
911
|
+
Transducer &Transducer::operator||( Transducer &a )
|
912
|
+
|
913
|
+
{
|
914
|
+
PairMapping map;
|
915
|
+
|
916
|
+
Transducer *na = new Transducer();
|
917
|
+
na->alphabet.compose(alphabet, a.alphabet);
|
918
|
+
|
919
|
+
// map the two root nodes to the new root node
|
920
|
+
map[pair<Node*,Node*>(root_node(), a.root_node())] = na->root_node();
|
921
|
+
|
922
|
+
// recursively compose the two automata
|
923
|
+
CharNode2Trans cn2trans1(*this);
|
924
|
+
CharNode2Trans cn2trans2(a);
|
925
|
+
compose_nodes( root_node(), a.root_node(), na->root_node(),
|
926
|
+
na, map, cn2trans1, cn2trans2 );
|
927
|
+
|
928
|
+
return *na;
|
929
|
+
}
|
930
|
+
|
931
|
+
|
932
|
+
|
933
|
+
/*******************************************************************/
|
934
|
+
/* */
|
935
|
+
/* Transducer::operator/ */
|
936
|
+
/* */
|
937
|
+
/*******************************************************************/
|
938
|
+
|
939
|
+
Transducer &Transducer::operator/( Transducer &a )
|
940
|
+
|
941
|
+
{
|
942
|
+
complete_alphabet();
|
943
|
+
a.alphabet.copy(alphabet);
|
944
|
+
// a-b = a & !b = a & !(a & b)
|
945
|
+
Transducer *a1 = &(*this & a);
|
946
|
+
Transducer *a2 = &(!*a1);
|
947
|
+
delete a1;
|
948
|
+
a1 = &(*this & *a2);
|
949
|
+
delete a2;
|
950
|
+
return *a1;
|
951
|
+
}
|
952
|
+
|
953
|
+
|
954
|
+
/*******************************************************************/
|
955
|
+
/* */
|
956
|
+
/* Transducer::compare_nodes */
|
957
|
+
/* */
|
958
|
+
/*******************************************************************/
|
959
|
+
|
960
|
+
bool Transducer::compare_nodes( Node *node, Node *node2, Transducer &a2 )
|
961
|
+
|
962
|
+
{
|
963
|
+
if (node->was_visited( vmark )) {
|
964
|
+
if (node2->was_visited( a2.vmark ))
|
965
|
+
return (node->forward() == node2 && node2->forward() == node);
|
966
|
+
else
|
967
|
+
return false;
|
968
|
+
}
|
969
|
+
else if (node2->was_visited( a2.vmark ))
|
970
|
+
return false;
|
971
|
+
|
972
|
+
node->set_forward( node2 );
|
973
|
+
node2->set_forward( node );
|
974
|
+
|
975
|
+
if (node->is_final() != node2->is_final())
|
976
|
+
return false;
|
977
|
+
|
978
|
+
// iterate over all outgoing arcs
|
979
|
+
for( ArcsIter p(node->arcs()); p; p++ ) {
|
980
|
+
Arc *arc=p;
|
981
|
+
Node *t2=node2->target_node(arc->label());
|
982
|
+
|
983
|
+
if (t2 == NULL)
|
984
|
+
return false;
|
985
|
+
else if (!compare_nodes(arc->target_node(), t2, a2))
|
986
|
+
return false;
|
987
|
+
}
|
988
|
+
for( ArcsIter p(node2->arcs()); p; p++ ) {
|
989
|
+
Arc *arc=p;
|
990
|
+
if (node->target_node(arc->label()) == NULL)
|
991
|
+
return false;
|
992
|
+
}
|
993
|
+
|
994
|
+
return true;
|
995
|
+
}
|
996
|
+
|
997
|
+
|
998
|
+
/*******************************************************************/
|
999
|
+
/* */
|
1000
|
+
/* Transducer::operator == */
|
1001
|
+
/* */
|
1002
|
+
/*******************************************************************/
|
1003
|
+
|
1004
|
+
bool Transducer::operator==( Transducer &a )
|
1005
|
+
|
1006
|
+
{
|
1007
|
+
Transducer *p1 = (minimised)? this: &minimise();
|
1008
|
+
Transducer *p2 = (a.minimised)? &a: &a.minimise();
|
1009
|
+
|
1010
|
+
p1->incr_vmark();
|
1011
|
+
p2->incr_vmark();
|
1012
|
+
bool result = p1->compare_nodes(p1->root_node(), p2->root_node(), *p2 );
|
1013
|
+
|
1014
|
+
if (p1 != this) delete p1;
|
1015
|
+
if (p2 != &a) delete p2;
|
1016
|
+
|
1017
|
+
return result;
|
1018
|
+
}
|
1019
|
+
|
1020
|
+
|
1021
|
+
|
1022
|
+
/*******************************************************************/
|
1023
|
+
/* */
|
1024
|
+
/* Transducer::map_nodes */
|
1025
|
+
/* */
|
1026
|
+
/*******************************************************************/
|
1027
|
+
|
1028
|
+
void Transducer::map_nodes( Node *node, Node *node2, Transducer *a, Level level)
|
1029
|
+
|
1030
|
+
{
|
1031
|
+
if (!node->was_visited(vmark)) {
|
1032
|
+
|
1033
|
+
node->set_forward(node2);
|
1034
|
+
|
1035
|
+
// define final nodes
|
1036
|
+
if (node->is_final())
|
1037
|
+
node2->set_final(1);
|
1038
|
+
|
1039
|
+
// iterate over all outgoing arcs of node
|
1040
|
+
for( ArcsIter p(node->arcs()); p; p++ ) {
|
1041
|
+
Arc *arc=p;
|
1042
|
+
Label l(arc->label().get_char(level));
|
1043
|
+
Node *t2=NULL, *t=arc->target_node();
|
1044
|
+
|
1045
|
+
if (t->check_visited(vmark))
|
1046
|
+
t2 = t->forward();
|
1047
|
+
else
|
1048
|
+
t2 = a->new_node(); // create a new node
|
1049
|
+
|
1050
|
+
node2->add_arc(l, t2, a); // add a link to the node
|
1051
|
+
|
1052
|
+
map_nodes( t, t2, a, level );
|
1053
|
+
}
|
1054
|
+
}
|
1055
|
+
}
|
1056
|
+
|
1057
|
+
|
1058
|
+
/*******************************************************************/
|
1059
|
+
/* */
|
1060
|
+
/* Transducer::level */
|
1061
|
+
/* */
|
1062
|
+
/*******************************************************************/
|
1063
|
+
|
1064
|
+
Transducer &Transducer::level( Level level )
|
1065
|
+
|
1066
|
+
{
|
1067
|
+
Transducer *na = new Transducer();
|
1068
|
+
|
1069
|
+
for( Alphabet::iterator it=alphabet.begin(); it!=alphabet.end(); it++ ) {
|
1070
|
+
Character c = it->get_char(level);
|
1071
|
+
if (alphabet.code2symbol(c) != NULL)
|
1072
|
+
na->alphabet.add_symbol( alphabet.code2symbol(c), c );
|
1073
|
+
na->alphabet.insert(Label(c));
|
1074
|
+
}
|
1075
|
+
|
1076
|
+
incr_vmark();
|
1077
|
+
map_nodes(root_node(), na->root_node(), na, level );
|
1078
|
+
|
1079
|
+
return *na;
|
1080
|
+
}
|
1081
|
+
|
1082
|
+
|
1083
|
+
/*******************************************************************/
|
1084
|
+
/* */
|
1085
|
+
/* Transducer::freely_insert_at_node */
|
1086
|
+
/* */
|
1087
|
+
/*******************************************************************/
|
1088
|
+
|
1089
|
+
void Transducer::freely_insert_at_node( Node *node, Label l )
|
1090
|
+
|
1091
|
+
{
|
1092
|
+
if (!node->was_visited(vmark)) {
|
1093
|
+
node->add_arc(l, node, this); // add a recursive link labelled with l
|
1094
|
+
|
1095
|
+
// iterate over all outgoing arcs of node
|
1096
|
+
for( ArcsIter p(node->arcs()); p; p++ ) {
|
1097
|
+
Arc *arc=p;
|
1098
|
+
freely_insert_at_node(arc->target_node(), l );
|
1099
|
+
}
|
1100
|
+
}
|
1101
|
+
}
|
1102
|
+
|
1103
|
+
|
1104
|
+
/*******************************************************************/
|
1105
|
+
/* */
|
1106
|
+
/* Transducer::freely_insert */
|
1107
|
+
/* */
|
1108
|
+
/*******************************************************************/
|
1109
|
+
|
1110
|
+
Transducer &Transducer::freely_insert( Label l )
|
1111
|
+
|
1112
|
+
{
|
1113
|
+
Transducer *na = ©();
|
1114
|
+
|
1115
|
+
na->incr_vmark();
|
1116
|
+
na->freely_insert_at_node(na->root_node(), l );
|
1117
|
+
|
1118
|
+
return *na;
|
1119
|
+
}
|
1120
|
+
|
1121
|
+
|
1122
|
+
/*******************************************************************/
|
1123
|
+
/* */
|
1124
|
+
/* Transducer::splice_arc */
|
1125
|
+
/* */
|
1126
|
+
/*******************************************************************/
|
1127
|
+
|
1128
|
+
void Transducer::splice_arc( Node *node, Node *node2, Node *next_node,
|
1129
|
+
Transducer *a )
|
1130
|
+
{
|
1131
|
+
if (node->is_final()) {
|
1132
|
+
// link final node to the next node
|
1133
|
+
node2->add_arc( Label(), next_node, a );
|
1134
|
+
return;
|
1135
|
+
}
|
1136
|
+
|
1137
|
+
// iterate over the outgoing arcs
|
1138
|
+
for( ArcsIter p(node->arcs()); p; p++ ) {
|
1139
|
+
Arc *arc=p;
|
1140
|
+
Node *tn=a->new_node();
|
1141
|
+
|
1142
|
+
node2->add_arc( arc->label(), tn, a );
|
1143
|
+
splice_arc( arc->target_node(), tn, next_node, a );
|
1144
|
+
}
|
1145
|
+
}
|
1146
|
+
|
1147
|
+
|
1148
|
+
/*******************************************************************/
|
1149
|
+
/* */
|
1150
|
+
/* Transducer::splice_nodes */
|
1151
|
+
/* */
|
1152
|
+
/*******************************************************************/
|
1153
|
+
|
1154
|
+
void Transducer::splice_nodes(Node *node, Node *node2, Label sl,
|
1155
|
+
Transducer *sa, Transducer *a)
|
1156
|
+
{
|
1157
|
+
if (!node->was_visited(vmark)) {
|
1158
|
+
|
1159
|
+
node->set_forward(node2);
|
1160
|
+
|
1161
|
+
// define final nodes
|
1162
|
+
if (node->is_final())
|
1163
|
+
node2->set_final(1);
|
1164
|
+
|
1165
|
+
// iterate over all outgoing arcs of node
|
1166
|
+
for( ArcsIter p(node->arcs()); p; p++ ) {
|
1167
|
+
Arc *arc=p;
|
1168
|
+
Node *t2=NULL, *t=arc->target_node();
|
1169
|
+
|
1170
|
+
if (t->check_visited(vmark))
|
1171
|
+
t2 = t->forward();
|
1172
|
+
else
|
1173
|
+
t2 = a->new_node(); // create a new node
|
1174
|
+
|
1175
|
+
if (arc->label() == sl)
|
1176
|
+
// insert the transducer
|
1177
|
+
splice_arc(sa->root_node(), node2, t2, a);
|
1178
|
+
else
|
1179
|
+
// add a link to the node
|
1180
|
+
node2->add_arc(arc->label(), t2, a);
|
1181
|
+
|
1182
|
+
splice_nodes( t, t2, sl, sa, a );
|
1183
|
+
}
|
1184
|
+
}
|
1185
|
+
}
|
1186
|
+
|
1187
|
+
|
1188
|
+
/*******************************************************************/
|
1189
|
+
/* */
|
1190
|
+
/* Transducer::splice */
|
1191
|
+
/* */
|
1192
|
+
/*******************************************************************/
|
1193
|
+
|
1194
|
+
Transducer &Transducer::splice( Label sl, Transducer *sa )
|
1195
|
+
|
1196
|
+
{
|
1197
|
+
Alphabet::iterator it;
|
1198
|
+
|
1199
|
+
Transducer *na = new Transducer();
|
1200
|
+
|
1201
|
+
for( it=alphabet.begin(); it!=alphabet.end(); it++ ) {
|
1202
|
+
Label l = *it;
|
1203
|
+
if (l != sl)
|
1204
|
+
na->alphabet.insert(l);
|
1205
|
+
}
|
1206
|
+
for( it=sa->alphabet.begin(); it!=sa->alphabet.end(); it++ )
|
1207
|
+
na->alphabet.insert(*it);
|
1208
|
+
|
1209
|
+
incr_vmark();
|
1210
|
+
splice_nodes(root_node(), na->root_node(), sl, sa, na );
|
1211
|
+
|
1212
|
+
return *na;
|
1213
|
+
}
|
1214
|
+
|
1215
|
+
|
1216
|
+
/*******************************************************************/
|
1217
|
+
/* */
|
1218
|
+
/* Transducer::replace_char */
|
1219
|
+
/* */
|
1220
|
+
/*******************************************************************/
|
1221
|
+
|
1222
|
+
Transducer &Transducer::replace_char( Character c, Character nc )
|
1223
|
+
|
1224
|
+
{
|
1225
|
+
Alphabet::iterator it;
|
1226
|
+
|
1227
|
+
Transducer *na = new Transducer();
|
1228
|
+
|
1229
|
+
for( it=alphabet.begin(); it!=alphabet.end(); it++ ) {
|
1230
|
+
Label l = *it;
|
1231
|
+
na->alphabet.insert(l.replace_char(c,nc));
|
1232
|
+
}
|
1233
|
+
|
1234
|
+
incr_vmark();
|
1235
|
+
replace_char2(root_node(), na->root_node(), c, nc, na );
|
1236
|
+
|
1237
|
+
return *na;
|
1238
|
+
}
|
1239
|
+
|
1240
|
+
|
1241
|
+
/*******************************************************************/
|
1242
|
+
/* */
|
1243
|
+
/* Transducer::replace_char2 */
|
1244
|
+
/* */
|
1245
|
+
/*******************************************************************/
|
1246
|
+
|
1247
|
+
void Transducer::replace_char2(Node *node, Node *node2, Character c,
|
1248
|
+
Character nc, Transducer *a)
|
1249
|
+
{
|
1250
|
+
if (!node->was_visited(vmark)) {
|
1251
|
+
|
1252
|
+
node->set_forward(node2);
|
1253
|
+
|
1254
|
+
// define final nodes
|
1255
|
+
if (node->is_final())
|
1256
|
+
node2->set_final(1);
|
1257
|
+
|
1258
|
+
// iterate over all outgoing arcs of node
|
1259
|
+
for( ArcsIter p(node->arcs()); p; p++ ) {
|
1260
|
+
Arc *arc=p;
|
1261
|
+
Node *t2=NULL, *t=arc->target_node();
|
1262
|
+
|
1263
|
+
if (t->check_visited(vmark))
|
1264
|
+
t2 = t->forward();
|
1265
|
+
else
|
1266
|
+
t2 = a->new_node(); // create a new node
|
1267
|
+
|
1268
|
+
node2->add_arc(arc->label().replace_char(c, nc), t2, a);
|
1269
|
+
replace_char2( t, t2, c, nc, a );
|
1270
|
+
}
|
1271
|
+
}
|
1272
|
+
}
|
1273
|
+
}
|