ruby-sfst 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,304 @@
1
+
2
+ /*******************************************************************/
3
+ /* */
4
+ /* FILE determinise.C */
5
+ /* MODULE determinise */
6
+ /* PROGRAM SFST */
7
+ /* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
8
+ /* */
9
+ /*******************************************************************/
10
+
11
+
12
+ #include "fst.h"
13
+
14
+ using std::vector;
15
+ using std::pair;
16
+ using std::set;
17
+ using __gnu_cxx::hash_map;
18
+
19
+ /***************** class NodeSet *********************************/
20
+
21
+ class NodeSet {
22
+ // This class is used to store a set of nodes.
23
+ // Whenever a new node is added, all nodes accessible
24
+ // through epsilon transitions are added as well.
25
+
26
+ private:
27
+ set<Node*> ht;
28
+
29
+ public:
30
+ typedef set<Node*>::iterator iterator;
31
+ NodeSet() {};
32
+ void add( Node* );
33
+ bool insert(Node *node) {
34
+ pair<iterator, bool> result = ht.insert(node);
35
+ return result.second;
36
+ };
37
+ iterator begin() const { return ht.begin(); }
38
+ iterator end() const { return ht.end(); }
39
+ size_t size() const { return ht.size(); }
40
+ void clear() { ht.clear(); }
41
+ };
42
+
43
+
44
+ /***************** class NodeArray *******************************/
45
+
46
+ class NodeArray {
47
+
48
+ private:
49
+ size_t sizev;
50
+ bool final;
51
+ Node **node;
52
+
53
+ public:
54
+ NodeArray( NodeSet& );
55
+ ~NodeArray() { delete[] node; };
56
+ size_t size() const { return sizev; }
57
+ bool is_final() const { return final; };
58
+ Node* &operator[]( int i ) const { return node[i]; }
59
+ };
60
+
61
+
62
+ /***************** class Transition ******************************/
63
+
64
+ class Transition {
65
+ public:
66
+ Label label;
67
+ NodeArray *nodes;
68
+ Transition(Label l, NodeArray *na) { label = l; nodes = na; };
69
+ };
70
+
71
+
72
+ /***************** class NodeMapping ****************************/
73
+
74
+ class NodeMapping {
75
+ // This class is used to map a node set from one transducer
76
+ // to a single node in another transducer
77
+
78
+ private:
79
+ struct hashf {
80
+ size_t operator()(const NodeArray *na) const {
81
+ size_t key=na->size() ^ na->is_final();
82
+ for( size_t i=0; i<na->size(); i++)
83
+ key = (key<<1) ^ (size_t)(*na)[i];
84
+ return key;
85
+ }
86
+ };
87
+ struct equalf {
88
+ int operator()(const NodeArray *na1, const NodeArray *na2) const {
89
+ if (na1->size() != na2->size() || na1->is_final() != na2->is_final())
90
+ return 0;
91
+ for( size_t i=0; i<na1->size(); i++)
92
+ if ((*na1)[i] != (*na2)[i])
93
+ return 0;
94
+ return 1;
95
+ }
96
+ };
97
+ typedef hash_map<NodeArray*, Node*, hashf, equalf> NodeMap;
98
+ NodeMap hm;
99
+
100
+ public:
101
+ typedef NodeMap::iterator iterator;
102
+ ~NodeMapping();
103
+ iterator begin() { return hm.begin(); };
104
+ iterator end() { return hm.end(); };
105
+ iterator find( NodeArray *na) { return hm.find( na ); };
106
+ Node* &operator[]( NodeArray *na ) { return hm.operator[](na); };
107
+
108
+ };
109
+
110
+
111
+ /***************** class LabelMapping ****************************/
112
+
113
+ class LabelMapping {
114
+ // This class is used to map a label to a node set
115
+
116
+ private:
117
+ struct hashf {
118
+ size_t operator()(const Label l) const {
119
+ return l.lower_char() | (l.upper_char() << 16);
120
+ }
121
+ };
122
+ struct equalf {
123
+ int operator()(const Label l1, const Label l2) const {
124
+ return l1==l2;
125
+ }
126
+ };
127
+ typedef hash_map<const Label, NodeSet, hashf, equalf> LabelMap;
128
+ LabelMap lm;
129
+
130
+ public:
131
+ LabelMapping(): lm(8) {};
132
+ typedef LabelMap::iterator iterator;
133
+ iterator begin() { return lm.begin(); };
134
+ iterator end() { return lm.end(); };
135
+ size_t size() { return lm.size(); };
136
+ iterator find( Label l) { return lm.find( l ); };
137
+ NodeSet &operator[]( const Label l ) { return lm.operator[]( l ); };
138
+
139
+ };
140
+
141
+ static void determinise_node( NodeArray&, Node*, Transducer*, NodeMapping&, long );
142
+
143
+
144
+
145
+ /*******************************************************************/
146
+ /* */
147
+ /* NodeSet::add */
148
+ /* */
149
+ /*******************************************************************/
150
+
151
+ void NodeSet::add( Node *node )
152
+
153
+ {
154
+ pair<iterator, bool> result = ht.insert(node);
155
+ if (result.second) {
156
+ // new node, add nodes reachable with epsilon transitions
157
+ for( ArcsIter p(node->arcs(),ArcsIter::eps); p; p++ ) {
158
+ Arc *arc=p;
159
+ if (!arc->label().is_epsilon())
160
+ break;
161
+ add(arc->target_node());
162
+ }
163
+ }
164
+ }
165
+
166
+
167
+ /*******************************************************************/
168
+ /* */
169
+ /* NodeArray::NodeArray */
170
+ /* */
171
+ /*******************************************************************/
172
+
173
+ NodeArray::NodeArray( NodeSet &ns )
174
+
175
+ {
176
+ sizev = 0;
177
+ NodeSet::iterator it;
178
+
179
+ final = false;
180
+ node = new Node*[ns.size()];
181
+ for( it=ns.begin(); it!=ns.end(); it++ ) {
182
+ Node *nn = *it;
183
+ if (nn->arcs()->non_epsilon_transition_exists())
184
+ node[sizev++] = nn;
185
+ final |= nn->is_final();
186
+ }
187
+ std::sort(node, node+sizev);
188
+ }
189
+
190
+
191
+ /*******************************************************************/
192
+ /* */
193
+ /* NodeMapping::~NodeMapping */
194
+ /* */
195
+ /*******************************************************************/
196
+
197
+ NodeMapping::~NodeMapping()
198
+
199
+ {
200
+ // if we delete NodeArrays without removing them from NodeMapping,
201
+ // the system will crash when NodeMapping is deleted.
202
+ for( iterator it=hm.begin(); it!=hm.end(); ) {
203
+ NodeArray *na=it->first;
204
+ iterator old = it++;
205
+ hm.erase(old);
206
+ delete na;
207
+ }
208
+ }
209
+
210
+
211
+ /*******************************************************************/
212
+ /* */
213
+ /* compute_transitions */
214
+ /* */
215
+ /*******************************************************************/
216
+
217
+ static void compute_transitions( NodeArray &na, vector<Transition> &t )
218
+
219
+ {
220
+ LabelMapping lmap;
221
+
222
+ // for all nodes in the current set
223
+ for( size_t i=0; i<na.size(); i++) {
224
+ Node *n = na[i]; // old node
225
+
226
+ // For each non-epsilon transition, add the target node
227
+ // to the respective node set.
228
+ for( ArcsIter p(n->arcs(),ArcsIter::non_eps); p; p++ ) {
229
+ Arc *arc=p;
230
+ lmap[arc->label()].add(arc->target_node());
231
+ }
232
+ }
233
+
234
+ t.reserve(lmap.size());
235
+ for( LabelMapping::iterator it=lmap.begin(); it!=lmap.end(); it++ )
236
+ t.push_back(Transition(it->first, new NodeArray( it->second )));
237
+ }
238
+
239
+
240
+ /*******************************************************************/
241
+ /* */
242
+ /* determinise_node */
243
+ /* */
244
+ /*******************************************************************/
245
+
246
+ static void determinise_node( NodeArray &na, Node *node, Transducer *a,
247
+ NodeMapping &map, long depth )
248
+ {
249
+ if (depth > 10000)
250
+ fprintf(stderr,"\r%ld",depth);
251
+ node->set_final(na.is_final());
252
+
253
+ vector<Transition> t;
254
+ compute_transitions( na, t );
255
+
256
+ for( size_t i=0; i<t.size(); i++ ) {
257
+ NodeMapping::iterator it=map.find(t[i].nodes);
258
+ if (it == map.end()) {
259
+ // new node set
260
+ Node *target_node = a->new_node();
261
+ map[t[i].nodes] = target_node;
262
+ node->add_arc( t[i].label, target_node, a );
263
+ determinise_node( *t[i].nodes, target_node, a, map, depth+1 );
264
+ }
265
+ else {
266
+ delete t[i].nodes;
267
+ node->add_arc( t[i].label, it->second, a );
268
+ }
269
+ }
270
+ }
271
+
272
+
273
+ /*******************************************************************/
274
+ /* */
275
+ /* Transducer::determinise */
276
+ /* */
277
+ /*******************************************************************/
278
+
279
+ Transducer &Transducer::determinise()
280
+
281
+ {
282
+ // initialisations
283
+ NodeMapping map;
284
+
285
+ Transducer *a = new Transducer();
286
+ a->alphabet.copy(alphabet);
287
+
288
+ // creation of the initial node set consisting of all nodes
289
+ // reachable from the start node via epsilon transitions.
290
+ NodeArray *na;
291
+ {
292
+ NodeSet ns;
293
+ ns.add(root_node());
294
+ na = new NodeArray(ns);
295
+ }
296
+
297
+ // map the node set to the new root node
298
+ map[na] = a->root_node();
299
+
300
+ // determinise the transducer recursively
301
+ determinise_node( *na, a->root_node(), a, map, 0);
302
+ a->deterministic = 1;
303
+ return *a;
304
+ }
@@ -0,0 +1,4 @@
1
+ require 'mkmf'
2
+ CONFIG['CC'] = 'g++'
3
+ $CFLAGS='-Wall -O3 -Wall -Wcast-qual -Wconversion -DSGIext -DREADLINE'
4
+ create_makefile "sfst_machine"