ruby-sfst 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,304 @@
1
+
2
+ /*******************************************************************/
3
+ /* */
4
+ /* FILE determinise.C */
5
+ /* MODULE determinise */
6
+ /* PROGRAM SFST */
7
+ /* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
8
+ /* */
9
+ /*******************************************************************/
10
+
11
+
12
+ #include "fst.h"
13
+
14
+ using std::vector;
15
+ using std::pair;
16
+ using std::set;
17
+ using __gnu_cxx::hash_map;
18
+
19
+ /***************** class NodeSet *********************************/
20
+
21
+ class NodeSet {
22
+ // This class is used to store a set of nodes.
23
+ // Whenever a new node is added, all nodes accessible
24
+ // through epsilon transitions are added as well.
25
+
26
+ private:
27
+ set<Node*> ht;
28
+
29
+ public:
30
+ typedef set<Node*>::iterator iterator;
31
+ NodeSet() {};
32
+ void add( Node* );
33
+ bool insert(Node *node) {
34
+ pair<iterator, bool> result = ht.insert(node);
35
+ return result.second;
36
+ };
37
+ iterator begin() const { return ht.begin(); }
38
+ iterator end() const { return ht.end(); }
39
+ size_t size() const { return ht.size(); }
40
+ void clear() { ht.clear(); }
41
+ };
42
+
43
+
44
+ /***************** class NodeArray *******************************/
45
+
46
+ class NodeArray {
47
+
48
+ private:
49
+ size_t sizev;
50
+ bool final;
51
+ Node **node;
52
+
53
+ public:
54
+ NodeArray( NodeSet& );
55
+ ~NodeArray() { delete[] node; };
56
+ size_t size() const { return sizev; }
57
+ bool is_final() const { return final; };
58
+ Node* &operator[]( int i ) const { return node[i]; }
59
+ };
60
+
61
+
62
+ /***************** class Transition ******************************/
63
+
64
+ class Transition {
65
+ public:
66
+ Label label;
67
+ NodeArray *nodes;
68
+ Transition(Label l, NodeArray *na) { label = l; nodes = na; };
69
+ };
70
+
71
+
72
+ /***************** class NodeMapping ****************************/
73
+
74
+ class NodeMapping {
75
+ // This class is used to map a node set from one transducer
76
+ // to a single node in another transducer
77
+
78
+ private:
79
+ struct hashf {
80
+ size_t operator()(const NodeArray *na) const {
81
+ size_t key=na->size() ^ na->is_final();
82
+ for( size_t i=0; i<na->size(); i++)
83
+ key = (key<<1) ^ (size_t)(*na)[i];
84
+ return key;
85
+ }
86
+ };
87
+ struct equalf {
88
+ int operator()(const NodeArray *na1, const NodeArray *na2) const {
89
+ if (na1->size() != na2->size() || na1->is_final() != na2->is_final())
90
+ return 0;
91
+ for( size_t i=0; i<na1->size(); i++)
92
+ if ((*na1)[i] != (*na2)[i])
93
+ return 0;
94
+ return 1;
95
+ }
96
+ };
97
+ typedef hash_map<NodeArray*, Node*, hashf, equalf> NodeMap;
98
+ NodeMap hm;
99
+
100
+ public:
101
+ typedef NodeMap::iterator iterator;
102
+ ~NodeMapping();
103
+ iterator begin() { return hm.begin(); };
104
+ iterator end() { return hm.end(); };
105
+ iterator find( NodeArray *na) { return hm.find( na ); };
106
+ Node* &operator[]( NodeArray *na ) { return hm.operator[](na); };
107
+
108
+ };
109
+
110
+
111
+ /***************** class LabelMapping ****************************/
112
+
113
+ class LabelMapping {
114
+ // This class is used to map a label to a node set
115
+
116
+ private:
117
+ struct hashf {
118
+ size_t operator()(const Label l) const {
119
+ return l.lower_char() | (l.upper_char() << 16);
120
+ }
121
+ };
122
+ struct equalf {
123
+ int operator()(const Label l1, const Label l2) const {
124
+ return l1==l2;
125
+ }
126
+ };
127
+ typedef hash_map<const Label, NodeSet, hashf, equalf> LabelMap;
128
+ LabelMap lm;
129
+
130
+ public:
131
+ LabelMapping(): lm(8) {};
132
+ typedef LabelMap::iterator iterator;
133
+ iterator begin() { return lm.begin(); };
134
+ iterator end() { return lm.end(); };
135
+ size_t size() { return lm.size(); };
136
+ iterator find( Label l) { return lm.find( l ); };
137
+ NodeSet &operator[]( const Label l ) { return lm.operator[]( l ); };
138
+
139
+ };
140
+
141
+ static void determinise_node( NodeArray&, Node*, Transducer*, NodeMapping&, long );
142
+
143
+
144
+
145
+ /*******************************************************************/
146
+ /* */
147
+ /* NodeSet::add */
148
+ /* */
149
+ /*******************************************************************/
150
+
151
+ void NodeSet::add( Node *node )
152
+
153
+ {
154
+ pair<iterator, bool> result = ht.insert(node);
155
+ if (result.second) {
156
+ // new node, add nodes reachable with epsilon transitions
157
+ for( ArcsIter p(node->arcs(),ArcsIter::eps); p; p++ ) {
158
+ Arc *arc=p;
159
+ if (!arc->label().is_epsilon())
160
+ break;
161
+ add(arc->target_node());
162
+ }
163
+ }
164
+ }
165
+
166
+
167
+ /*******************************************************************/
168
+ /* */
169
+ /* NodeArray::NodeArray */
170
+ /* */
171
+ /*******************************************************************/
172
+
173
+ NodeArray::NodeArray( NodeSet &ns )
174
+
175
+ {
176
+ sizev = 0;
177
+ NodeSet::iterator it;
178
+
179
+ final = false;
180
+ node = new Node*[ns.size()];
181
+ for( it=ns.begin(); it!=ns.end(); it++ ) {
182
+ Node *nn = *it;
183
+ if (nn->arcs()->non_epsilon_transition_exists())
184
+ node[sizev++] = nn;
185
+ final |= nn->is_final();
186
+ }
187
+ std::sort(node, node+sizev);
188
+ }
189
+
190
+
191
+ /*******************************************************************/
192
+ /* */
193
+ /* NodeMapping::~NodeMapping */
194
+ /* */
195
+ /*******************************************************************/
196
+
197
+ NodeMapping::~NodeMapping()
198
+
199
+ {
200
+ // if we delete NodeArrays without removing them from NodeMapping,
201
+ // the system will crash when NodeMapping is deleted.
202
+ for( iterator it=hm.begin(); it!=hm.end(); ) {
203
+ NodeArray *na=it->first;
204
+ iterator old = it++;
205
+ hm.erase(old);
206
+ delete na;
207
+ }
208
+ }
209
+
210
+
211
+ /*******************************************************************/
212
+ /* */
213
+ /* compute_transitions */
214
+ /* */
215
+ /*******************************************************************/
216
+
217
+ static void compute_transitions( NodeArray &na, vector<Transition> &t )
218
+
219
+ {
220
+ LabelMapping lmap;
221
+
222
+ // for all nodes in the current set
223
+ for( size_t i=0; i<na.size(); i++) {
224
+ Node *n = na[i]; // old node
225
+
226
+ // For each non-epsilon transition, add the target node
227
+ // to the respective node set.
228
+ for( ArcsIter p(n->arcs(),ArcsIter::non_eps); p; p++ ) {
229
+ Arc *arc=p;
230
+ lmap[arc->label()].add(arc->target_node());
231
+ }
232
+ }
233
+
234
+ t.reserve(lmap.size());
235
+ for( LabelMapping::iterator it=lmap.begin(); it!=lmap.end(); it++ )
236
+ t.push_back(Transition(it->first, new NodeArray( it->second )));
237
+ }
238
+
239
+
240
+ /*******************************************************************/
241
+ /* */
242
+ /* determinise_node */
243
+ /* */
244
+ /*******************************************************************/
245
+
246
+ static void determinise_node( NodeArray &na, Node *node, Transducer *a,
247
+ NodeMapping &map, long depth )
248
+ {
249
+ if (depth > 10000)
250
+ fprintf(stderr,"\r%ld",depth);
251
+ node->set_final(na.is_final());
252
+
253
+ vector<Transition> t;
254
+ compute_transitions( na, t );
255
+
256
+ for( size_t i=0; i<t.size(); i++ ) {
257
+ NodeMapping::iterator it=map.find(t[i].nodes);
258
+ if (it == map.end()) {
259
+ // new node set
260
+ Node *target_node = a->new_node();
261
+ map[t[i].nodes] = target_node;
262
+ node->add_arc( t[i].label, target_node, a );
263
+ determinise_node( *t[i].nodes, target_node, a, map, depth+1 );
264
+ }
265
+ else {
266
+ delete t[i].nodes;
267
+ node->add_arc( t[i].label, it->second, a );
268
+ }
269
+ }
270
+ }
271
+
272
+
273
+ /*******************************************************************/
274
+ /* */
275
+ /* Transducer::determinise */
276
+ /* */
277
+ /*******************************************************************/
278
+
279
+ Transducer &Transducer::determinise()
280
+
281
+ {
282
+ // initialisations
283
+ NodeMapping map;
284
+
285
+ Transducer *a = new Transducer();
286
+ a->alphabet.copy(alphabet);
287
+
288
+ // creation of the initial node set consisting of all nodes
289
+ // reachable from the start node via epsilon transitions.
290
+ NodeArray *na;
291
+ {
292
+ NodeSet ns;
293
+ ns.add(root_node());
294
+ na = new NodeArray(ns);
295
+ }
296
+
297
+ // map the node set to the new root node
298
+ map[na] = a->root_node();
299
+
300
+ // determinise the transducer recursively
301
+ determinise_node( *na, a->root_node(), a, map, 0);
302
+ a->deterministic = 1;
303
+ return *a;
304
+ }
@@ -0,0 +1,4 @@
1
+ require 'mkmf'
2
+ CONFIG['CC'] = 'g++'
3
+ $CFLAGS='-Wall -O3 -Wall -Wcast-qual -Wconversion -DSGIext -DREADLINE'
4
+ create_makefile "sfst_machine"