ruby-sfst 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,113 @@
1
+ /* A Bison parser, made by GNU Bison 2.3. */
2
+
3
+ /* Skeleton interface for Bison's Yacc-like parsers in C
4
+
5
+ Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006
6
+ Free Software Foundation, Inc.
7
+
8
+ This program is free software; you can redistribute it and/or modify
9
+ it under the terms of the GNU General Public License as published by
10
+ the Free Software Foundation; either version 2, or (at your option)
11
+ any later version.
12
+
13
+ This program is distributed in the hope that it will be useful,
14
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
+ GNU General Public License for more details.
17
+
18
+ You should have received a copy of the GNU General Public License
19
+ along with this program; if not, write to the Free Software
20
+ Foundation, Inc., 51 Franklin Street, Fifth Floor,
21
+ Boston, MA 02110-1301, USA. */
22
+
23
+ /* As a special exception, you may create a larger work that contains
24
+ part or all of the Bison parser skeleton and distribute that work
25
+ under terms of your choice, so long as that work isn't itself a
26
+ parser generator using the skeleton or a modified version thereof
27
+ as a parser skeleton. Alternatively, if you modify or redistribute
28
+ the parser skeleton itself, you may (at your option) remove this
29
+ special exception, which will cause the skeleton and the resulting
30
+ Bison output files to be licensed under the GNU General Public
31
+ License without this special exception.
32
+
33
+ This special exception was added by the Free Software Foundation in
34
+ version 2.2 of Bison. */
35
+
36
+ /* Tokens. */
37
+ #ifndef YYTOKENTYPE
38
+ # define YYTOKENTYPE
39
+ /* Put the tokens into the symbol table, so that GDB and other debuggers
40
+ know about them. */
41
+ enum yytokentype {
42
+ NEWLINE = 258,
43
+ ALPHA = 259,
44
+ COMPOSE = 260,
45
+ PRINT = 261,
46
+ POS = 262,
47
+ INSERT = 263,
48
+ REV = 264,
49
+ ARROW = 265,
50
+ REPLACE = 266,
51
+ SYMBOL = 267,
52
+ VAR = 268,
53
+ SVAR = 269,
54
+ RVAR = 270,
55
+ RSVAR = 271,
56
+ STRING = 272,
57
+ STRING2 = 273,
58
+ UTF8CHAR = 274,
59
+ CHARACTER = 275,
60
+ SEQ = 276
61
+ };
62
+ #endif
63
+ /* Tokens. */
64
+ #define NEWLINE 258
65
+ #define ALPHA 259
66
+ #define COMPOSE 260
67
+ #define PRINT 261
68
+ #define POS 262
69
+ #define INSERT 263
70
+ #define REV 264
71
+ #define ARROW 265
72
+ #define REPLACE 266
73
+ #define SYMBOL 267
74
+ #define VAR 268
75
+ #define SVAR 269
76
+ #define RVAR 270
77
+ #define RSVAR 271
78
+ #define STRING 272
79
+ #define STRING2 273
80
+ #define UTF8CHAR 274
81
+ #define CHARACTER 275
82
+ #define SEQ 276
83
+
84
+
85
+
86
+
87
+ #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
88
+ typedef union YYSTYPE
89
+ #line 31 "fst-compiler.yy"
90
+ {
91
+ int number;
92
+ Twol_Type type;
93
+ Repl_Type rtype;
94
+ char *name;
95
+ char *value;
96
+ unsigned char uchar;
97
+ unsigned int longchar;
98
+ Character character;
99
+ Transducer *expression;
100
+ Range *range;
101
+ Ranges *ranges;
102
+ Contexts *contexts;
103
+ }
104
+ /* Line 1489 of yacc.c. */
105
+ #line 106 "fst-compiler.H"
106
+ YYSTYPE;
107
+ # define yystype YYSTYPE /* obsolescent; will be withdrawn */
108
+ # define YYSTYPE_IS_DECLARED 1
109
+ # define YYSTYPE_IS_TRIVIAL 1
110
+ #endif
111
+
112
+ extern YYSTYPE yylval;
113
+
@@ -0,0 +1,213 @@
1
+ %{
2
+ /*******************************************************************/
3
+ /* */
4
+ /* FILE fst-compiler.yy */
5
+ /* MODULE fst-compiler */
6
+ /* PROGRAM SFST */
7
+ /* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
8
+ /* */
9
+ /*******************************************************************/
10
+
11
+ #include <stdio.h>
12
+
13
+ #include "make-compact.h"
14
+ #include "interface.h"
15
+
16
+ using std::cerr;
17
+
18
+ extern int yylineno;
19
+ extern char *yytext;
20
+
21
+ void yyerror(char *text);
22
+ void warn(char *text);
23
+ void warn2(char *text, char *text2);
24
+ int yylex( void );
25
+ int yyparse( void );
26
+
27
+ static int Switch=0;
28
+ Transducer *Result;
29
+ %}
30
+
31
+ %union {
32
+ int number;
33
+ Twol_Type type;
34
+ Repl_Type rtype;
35
+ char *name;
36
+ char *value;
37
+ unsigned char uchar;
38
+ unsigned int longchar;
39
+ Character character;
40
+ Transducer *expression;
41
+ Range *range;
42
+ Ranges *ranges;
43
+ Contexts *contexts;
44
+ }
45
+
46
+ %token <number> NEWLINE ALPHA COMPOSE PRINT POS INSERT REV
47
+ %token <type> ARROW
48
+ %token <rtype> REPLACE
49
+ %token <name> SYMBOL VAR SVAR RVAR RSVAR
50
+ %token <value> STRING STRING2 UTF8CHAR
51
+ %token <uchar> CHARACTER
52
+
53
+ %type <uchar> SCHAR
54
+ %type <longchar> LCHAR
55
+ %type <character> CODE
56
+ %type <expression> RE
57
+ %type <range> RANGE VALUE VALUES
58
+ %type <ranges> RANGES
59
+ %type <contexts> CONTEXT CONTEXT2 CONTEXTS CONTEXTS2
60
+
61
+ %left PRINT INSERT
62
+ %left ARROW REPLACE
63
+ %left COMPOSE
64
+ %left '|'
65
+ %left '-'
66
+ %left '&'
67
+ %left SEQ
68
+ %left '!' '^' '_'
69
+ %left '*' '+'
70
+ %%
71
+
72
+ ALL: ASSIGNMENTS RE NEWLINES { Result=result($2, Switch); }
73
+ ;
74
+
75
+ ASSIGNMENTS: ASSIGNMENTS ASSIGNMENT {}
76
+ | ASSIGNMENTS NEWLINE {}
77
+ | /* nothing */ {}
78
+ ;
79
+
80
+ ASSIGNMENT: VAR '=' RE { if (def_var($1,$3)) warn2("assignment of empty transducer to",$1); }
81
+ | RVAR '=' RE { if (def_rvar($1,$3)) warn2("assignment of empty transducer to",$1); }
82
+ | SVAR '=' VALUES { if (def_svar($1,$3)) warn2("assignment of empty symbol range to",$1); }
83
+ | RSVAR '=' VALUES { if (def_svar($1,$3)) warn2("assignment of empty symbol range to",$1); }
84
+ | RE PRINT STRING { write_to_file($1, $3); }
85
+ | ALPHA RE { def_alphabet($2); }
86
+ ;
87
+
88
+ RE: RE ARROW CONTEXTS2 { $$ = restriction($1,$2,$3,0); }
89
+ | RE '^' ARROW CONTEXTS2 { $$ = restriction($1,$3,$4,1); }
90
+ | RE '_' ARROW CONTEXTS2 { $$ = restriction($1,$3,$4,-1); }
91
+ | RE REPLACE CONTEXT2 { $$ = replace_in_context(minimise(explode($1)),$2,$3,false); }
92
+ | RE REPLACE '?' CONTEXT2 { $$ = replace_in_context(minimise(explode($1)),$2,$4,true);}
93
+ | RE REPLACE '(' ')' { $$ = replace(minimise(explode($1)), $2, false); }
94
+ | RE REPLACE '?' '(' ')' { $$ = replace(minimise(explode($1)), $2, true); }
95
+ | RE RANGE ARROW RANGE RE { $$ = make_rule($1,$2,$3,$4,$5); }
96
+ | RE RANGE ARROW RANGE { $$ = make_rule($1,$2,$3,$4,NULL); }
97
+ | RANGE ARROW RANGE RE { $$ = make_rule(NULL,$1,$2,$3,$4); }
98
+ | RANGE ARROW RANGE { $$ = make_rule(NULL,$1,$2,$3,NULL); }
99
+ | RE COMPOSE RE { $$ = composition($1, $3); }
100
+ | '{' RANGES '}' ':' '{' RANGES '}' { $$ = make_mapping($2,$6); }
101
+ | RANGE ':' '{' RANGES '}' { $$ = make_mapping(add_range($1,NULL),$4); }
102
+ | '{' RANGES '}' ':' RANGE { $$ = make_mapping($2,add_range($5,NULL)); }
103
+ | RE INSERT CODE ':' CODE { $$ = freely_insert($1, $3, $5); }
104
+ | RE INSERT CODE { $$ = freely_insert($1, $3, $3); }
105
+ | RANGE ':' RANGE { $$ = new_transducer($1,$3); }
106
+ | RANGE { $$ = new_transducer($1,$1); }
107
+ | VAR { $$ = var_value($1); }
108
+ | RVAR { $$ = rvar_value($1); }
109
+ | RE '*' { $$ = repetition($1); }
110
+ | RE '+' { $$ = repetition2($1); }
111
+ | RE '?' { $$ = optional($1); }
112
+ | RE RE %prec SEQ { $$ = catenate($1, $2); }
113
+ | '!' RE { $$ = negation($2); }
114
+ | REV RE { $$ = switch_levels($2); }
115
+ | '^' RE { $$ = upper_level($2); }
116
+ | '_' RE { $$ = lower_level($2); }
117
+ | RE '&' RE { $$ = conjunction($1, $3); }
118
+ | RE '-' RE { $$ = subtraction($1, $3); }
119
+ | RE '|' RE { $$ = disjunction($1, $3); }
120
+ | '(' RE ')' { $$ = $2; }
121
+ | STRING { $$ = read_words($1); }
122
+ | STRING2 { $$ = read_transducer($1); }
123
+ ;
124
+
125
+ RANGES: RANGE RANGES { $$ = add_range($1,$2); }
126
+ | { $$ = NULL; }
127
+ ;
128
+
129
+ RANGE: '[' VALUES ']' { $$=$2; }
130
+ | '[' '^' VALUES ']' { $$=complement_range($3); }
131
+ | '[' RSVAR ']' { $$=rsvar_value($2); }
132
+ | '.' { $$=NULL; }
133
+ | CODE { $$=add_value($1,NULL); }
134
+ ;
135
+
136
+ CONTEXTS2: CONTEXTS { $$ = $1; }
137
+ | '(' CONTEXTS ')' { $$ = $2; }
138
+ ;
139
+
140
+ CONTEXTS: CONTEXT ',' CONTEXTS { $$ = add_context($1,$3); }
141
+ | CONTEXT { $$ = $1; }
142
+ ;
143
+
144
+ CONTEXT2: CONTEXT { $$ = $1; }
145
+ | '(' CONTEXT ')' { $$ = $2; }
146
+ ;
147
+
148
+ CONTEXT : RE POS RE { $$ = make_context($1, $3); }
149
+ | POS RE { $$ = make_context(NULL, $2); }
150
+ | RE POS { $$ = make_context($1, NULL); }
151
+ ;
152
+
153
+ VALUES: VALUE VALUES { $$=append_values($1,$2); }
154
+ | VALUE { $$ = $1; }
155
+ ;
156
+
157
+ VALUE: LCHAR '-' LCHAR { $$=add_values($1,$3,NULL); }
158
+ | SVAR { $$=svar_value($1); }
159
+ | LCHAR { $$=add_value(character_code($1),NULL); }
160
+ | CODE { $$=add_value($1,NULL); }
161
+ | SCHAR { $$=add_value($1,NULL); }
162
+ ;
163
+
164
+ LCHAR: CHARACTER { $$=$1; }
165
+ | UTF8CHAR { $$=utf8toint($1); }
166
+ | SCHAR { $$=$1; }
167
+ ;
168
+
169
+ CODE: CHARACTER { $$=character_code($1); }
170
+ | UTF8CHAR { $$=symbol_code($1); }
171
+ | SYMBOL { $$=symbol_code($1); }
172
+ ;
173
+
174
+ SCHAR: '.' { $$=character_code('.'); }
175
+ | '!' { $$=character_code('!'); }
176
+ | '?' { $$=character_code('?'); }
177
+ | '{' { $$=character_code('{'); }
178
+ | '}' { $$=character_code('}'); }
179
+ | ')' { $$=character_code(')'); }
180
+ | '(' { $$=character_code('('); }
181
+ | '&' { $$=character_code('&'); }
182
+ | '|' { $$=character_code('|'); }
183
+ | '*' { $$=character_code('*'); }
184
+ | '+' { $$=character_code('+'); }
185
+ | ':' { $$=character_code(':'); }
186
+ | ',' { $$=character_code(','); }
187
+ | '=' { $$=character_code('='); }
188
+ | '_' { $$=character_code('_'); }
189
+ | '^' { $$=character_code('^'); }
190
+ | '-' { $$=character_code('-'); }
191
+ ;
192
+
193
+ NEWLINES: NEWLINE NEWLINES {}
194
+ | /* nothing */ {}
195
+ ;
196
+
197
+ %%
198
+
199
+ extern FILE *yyin;
200
+
201
+ /*******************************************************************/
202
+ /* */
203
+ /* yyerror */
204
+ /* */
205
+ /*******************************************************************/
206
+
207
+ void yyerror(char *text)
208
+
209
+ {
210
+ cerr << "\n" << FileName << ":" << yylineno << ": " << text << " at: ";
211
+ cerr << yytext << "\naborted.\n";
212
+ exit(1);
213
+ }