mittens 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/Gemfile +7 -0
- data/LICENSE.txt +30 -0
- data/README.md +62 -0
- data/Rakefile +21 -0
- data/ext/mittens/ext.c +96 -0
- data/ext/mittens/extconf.rb +12 -0
- data/lib/mittens/version.rb +3 -0
- data/lib/mittens.rb +7 -0
- data/mittens.gemspec +22 -0
- data/vendor/snowball/.gitignore +26 -0
- data/vendor/snowball/.travis.yml +112 -0
- data/vendor/snowball/AUTHORS +27 -0
- data/vendor/snowball/CONTRIBUTING.rst +216 -0
- data/vendor/snowball/COPYING +29 -0
- data/vendor/snowball/GNUmakefile +742 -0
- data/vendor/snowball/NEWS +754 -0
- data/vendor/snowball/README.rst +37 -0
- data/vendor/snowball/ada/README.md +74 -0
- data/vendor/snowball/ada/generate/generate.adb +83 -0
- data/vendor/snowball/ada/generate.gpr +21 -0
- data/vendor/snowball/ada/src/stemmer.adb +620 -0
- data/vendor/snowball/ada/src/stemmer.ads +219 -0
- data/vendor/snowball/ada/src/stemwords.adb +70 -0
- data/vendor/snowball/ada/stemmer_config.gpr +83 -0
- data/vendor/snowball/ada/stemwords.gpr +21 -0
- data/vendor/snowball/algorithms/arabic.sbl +558 -0
- data/vendor/snowball/algorithms/armenian.sbl +301 -0
- data/vendor/snowball/algorithms/basque.sbl +149 -0
- data/vendor/snowball/algorithms/catalan.sbl +202 -0
- data/vendor/snowball/algorithms/danish.sbl +93 -0
- data/vendor/snowball/algorithms/dutch.sbl +164 -0
- data/vendor/snowball/algorithms/english.sbl +229 -0
- data/vendor/snowball/algorithms/finnish.sbl +197 -0
- data/vendor/snowball/algorithms/french.sbl +254 -0
- data/vendor/snowball/algorithms/german.sbl +139 -0
- data/vendor/snowball/algorithms/german2.sbl +145 -0
- data/vendor/snowball/algorithms/greek.sbl +701 -0
- data/vendor/snowball/algorithms/hindi.sbl +323 -0
- data/vendor/snowball/algorithms/hungarian.sbl +241 -0
- data/vendor/snowball/algorithms/indonesian.sbl +192 -0
- data/vendor/snowball/algorithms/irish.sbl +149 -0
- data/vendor/snowball/algorithms/italian.sbl +202 -0
- data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +240 -0
- data/vendor/snowball/algorithms/lithuanian.sbl +373 -0
- data/vendor/snowball/algorithms/lovins.sbl +208 -0
- data/vendor/snowball/algorithms/nepali.sbl +92 -0
- data/vendor/snowball/algorithms/norwegian.sbl +80 -0
- data/vendor/snowball/algorithms/porter.sbl +139 -0
- data/vendor/snowball/algorithms/portuguese.sbl +218 -0
- data/vendor/snowball/algorithms/romanian.sbl +236 -0
- data/vendor/snowball/algorithms/russian.sbl +221 -0
- data/vendor/snowball/algorithms/serbian.sbl +2379 -0
- data/vendor/snowball/algorithms/spanish.sbl +230 -0
- data/vendor/snowball/algorithms/swedish.sbl +72 -0
- data/vendor/snowball/algorithms/tamil.sbl +405 -0
- data/vendor/snowball/algorithms/turkish.sbl +470 -0
- data/vendor/snowball/algorithms/yiddish.sbl +460 -0
- data/vendor/snowball/charsets/ISO-8859-2.sbl +98 -0
- data/vendor/snowball/charsets/KOI8-R.sbl +74 -0
- data/vendor/snowball/charsets/cp850.sbl +130 -0
- data/vendor/snowball/compiler/analyser.c +1547 -0
- data/vendor/snowball/compiler/driver.c +615 -0
- data/vendor/snowball/compiler/generator.c +1748 -0
- data/vendor/snowball/compiler/generator_ada.c +1702 -0
- data/vendor/snowball/compiler/generator_csharp.c +1322 -0
- data/vendor/snowball/compiler/generator_go.c +1278 -0
- data/vendor/snowball/compiler/generator_java.c +1313 -0
- data/vendor/snowball/compiler/generator_js.c +1316 -0
- data/vendor/snowball/compiler/generator_pascal.c +1387 -0
- data/vendor/snowball/compiler/generator_python.c +1337 -0
- data/vendor/snowball/compiler/generator_rust.c +1295 -0
- data/vendor/snowball/compiler/header.h +418 -0
- data/vendor/snowball/compiler/space.c +286 -0
- data/vendor/snowball/compiler/syswords.h +86 -0
- data/vendor/snowball/compiler/syswords2.h +13 -0
- data/vendor/snowball/compiler/tokeniser.c +567 -0
- data/vendor/snowball/csharp/.gitignore +8 -0
- data/vendor/snowball/csharp/Snowball/Algorithms/.gitignore +1 -0
- data/vendor/snowball/csharp/Snowball/Among.cs +108 -0
- data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +36 -0
- data/vendor/snowball/csharp/Snowball/Stemmer.cs +660 -0
- data/vendor/snowball/csharp/Stemwords/App.config +6 -0
- data/vendor/snowball/csharp/Stemwords/Program.cs +114 -0
- data/vendor/snowball/doc/TODO +12 -0
- data/vendor/snowball/doc/libstemmer_c_README +148 -0
- data/vendor/snowball/doc/libstemmer_csharp_README +53 -0
- data/vendor/snowball/doc/libstemmer_java_README +67 -0
- data/vendor/snowball/doc/libstemmer_js_README +48 -0
- data/vendor/snowball/doc/libstemmer_python_README +113 -0
- data/vendor/snowball/examples/stemwords.c +204 -0
- data/vendor/snowball/go/README.md +55 -0
- data/vendor/snowball/go/among.go +16 -0
- data/vendor/snowball/go/env.go +403 -0
- data/vendor/snowball/go/stemwords/generate.go +68 -0
- data/vendor/snowball/go/stemwords/main.go +68 -0
- data/vendor/snowball/go/util.go +34 -0
- data/vendor/snowball/iconv.py +50 -0
- data/vendor/snowball/include/libstemmer.h +78 -0
- data/vendor/snowball/java/org/tartarus/snowball/Among.java +29 -0
- data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +381 -0
- data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +8 -0
- data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +75 -0
- data/vendor/snowball/javascript/base-stemmer.js +294 -0
- data/vendor/snowball/javascript/stemwords.js +106 -0
- data/vendor/snowball/libstemmer/libstemmer_c.in +96 -0
- data/vendor/snowball/libstemmer/mkalgorithms.pl +90 -0
- data/vendor/snowball/libstemmer/mkmodules.pl +267 -0
- data/vendor/snowball/libstemmer/modules.txt +63 -0
- data/vendor/snowball/libstemmer/test.c +34 -0
- data/vendor/snowball/pascal/.gitignore +4 -0
- data/vendor/snowball/pascal/SnowballProgram.pas +430 -0
- data/vendor/snowball/pascal/generate.pl +23 -0
- data/vendor/snowball/pascal/stemwords-template.dpr +78 -0
- data/vendor/snowball/python/MANIFEST.in +7 -0
- data/vendor/snowball/python/create_init.py +54 -0
- data/vendor/snowball/python/setup.cfg +6 -0
- data/vendor/snowball/python/setup.py +81 -0
- data/vendor/snowball/python/snowballstemmer/among.py +13 -0
- data/vendor/snowball/python/snowballstemmer/basestemmer.py +323 -0
- data/vendor/snowball/python/stemwords.py +101 -0
- data/vendor/snowball/python/testapp.py +28 -0
- data/vendor/snowball/runtime/api.c +58 -0
- data/vendor/snowball/runtime/api.h +32 -0
- data/vendor/snowball/runtime/header.h +61 -0
- data/vendor/snowball/runtime/utilities.c +513 -0
- data/vendor/snowball/rust/Cargo.toml +7 -0
- data/vendor/snowball/rust/build.rs +55 -0
- data/vendor/snowball/rust/rust-pre-1.27-compat.patch +30 -0
- data/vendor/snowball/rust/src/main.rs +102 -0
- data/vendor/snowball/rust/src/snowball/algorithms/mod.rs +2 -0
- data/vendor/snowball/rust/src/snowball/among.rs +6 -0
- data/vendor/snowball/rust/src/snowball/mod.rs +6 -0
- data/vendor/snowball/rust/src/snowball/snowball_env.rs +421 -0
- data/vendor/snowball/tests/stemtest.c +95 -0
- metadata +178 -0
|
@@ -0,0 +1,418 @@
|
|
|
1
|
+
#include <stdio.h>
|
|
2
|
+
|
|
3
|
+
#define SNOWBALL_VERSION "2.2.0"
|
|
4
|
+
|
|
5
|
+
typedef unsigned char byte;
|
|
6
|
+
typedef unsigned short symbol;
|
|
7
|
+
|
|
8
|
+
#define true 1
|
|
9
|
+
#define false 0
|
|
10
|
+
|
|
11
|
+
#define MALLOC check_malloc
|
|
12
|
+
#define FREE check_free
|
|
13
|
+
|
|
14
|
+
#define NEW(type, p) struct type * p = (struct type *) MALLOC(sizeof(struct type))
|
|
15
|
+
#define NEWVEC(type, p, n) struct type * p = (struct type *) MALLOC(sizeof(struct type) * (n))
|
|
16
|
+
|
|
17
|
+
#define SIZE(p) ((int *)(p))[-1]
|
|
18
|
+
#define CAPACITY(p) ((int *)(p))[-2]
|
|
19
|
+
|
|
20
|
+
extern symbol * create_b(int n);
|
|
21
|
+
extern void report_b(FILE * out, const symbol * p);
|
|
22
|
+
extern void lose_b(symbol * p);
|
|
23
|
+
extern symbol * increase_capacity(symbol * p, int n);
|
|
24
|
+
extern symbol * move_to_b(symbol * p, int n, const symbol * q);
|
|
25
|
+
extern symbol * add_to_b(symbol * p, int n, const symbol * q);
|
|
26
|
+
extern symbol * copy_b(const symbol * p);
|
|
27
|
+
extern char * b_to_s(const symbol * p);
|
|
28
|
+
extern symbol * add_s_to_b(symbol * p, const char * s);
|
|
29
|
+
|
|
30
|
+
#define MOVE_TO_B(B, LIT) \
|
|
31
|
+
move_to_b(B, sizeof(LIT) / sizeof(LIT[0]), LIT)
|
|
32
|
+
|
|
33
|
+
struct str; /* defined in space.c */
|
|
34
|
+
|
|
35
|
+
extern struct str * str_new(void);
|
|
36
|
+
extern void str_delete(struct str * str);
|
|
37
|
+
extern void str_append(struct str * str, const struct str * add);
|
|
38
|
+
extern void str_append_ch(struct str * str, char add);
|
|
39
|
+
extern void str_append_symbol(struct str * str, symbol add);
|
|
40
|
+
extern void str_append_b(struct str * str, const symbol * q);
|
|
41
|
+
extern void str_append_b_tail(struct str * str, const symbol * q, int skip);
|
|
42
|
+
extern void str_append_string(struct str * str, const char * s);
|
|
43
|
+
extern void str_append_int(struct str * str, int i);
|
|
44
|
+
extern void str_clear(struct str * str);
|
|
45
|
+
extern void str_assign(struct str * str, const char * s);
|
|
46
|
+
extern struct str * str_copy(const struct str * old);
|
|
47
|
+
extern symbol * str_data(const struct str * str);
|
|
48
|
+
extern int str_len(const struct str * str);
|
|
49
|
+
extern int str_back(const struct str *str);
|
|
50
|
+
extern int get_utf8(const symbol * p, int * slot);
|
|
51
|
+
extern int put_utf8(int ch, symbol * p);
|
|
52
|
+
extern void output_str(FILE * outfile, struct str * str);
|
|
53
|
+
|
|
54
|
+
typedef enum { ENC_SINGLEBYTE, ENC_UTF8, ENC_WIDECHARS } enc;
|
|
55
|
+
|
|
56
|
+
struct m_pair {
|
|
57
|
+
|
|
58
|
+
struct m_pair * next;
|
|
59
|
+
symbol * name;
|
|
60
|
+
symbol * value;
|
|
61
|
+
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
/* struct input must be a prefix of struct tokeniser. */
|
|
65
|
+
struct input {
|
|
66
|
+
|
|
67
|
+
struct input * next;
|
|
68
|
+
symbol * p;
|
|
69
|
+
int c;
|
|
70
|
+
char * file;
|
|
71
|
+
int file_needs_freeing;
|
|
72
|
+
int line_number;
|
|
73
|
+
|
|
74
|
+
};
|
|
75
|
+
|
|
76
|
+
struct include {
|
|
77
|
+
|
|
78
|
+
struct include * next;
|
|
79
|
+
symbol * b;
|
|
80
|
+
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
enum token_codes {
|
|
84
|
+
|
|
85
|
+
#include "syswords2.h"
|
|
86
|
+
|
|
87
|
+
c_mathassign,
|
|
88
|
+
c_name,
|
|
89
|
+
c_number,
|
|
90
|
+
c_literalstring,
|
|
91
|
+
c_neg,
|
|
92
|
+
c_call,
|
|
93
|
+
c_grouping,
|
|
94
|
+
c_booltest,
|
|
95
|
+
|
|
96
|
+
NUM_TOKEN_CODES
|
|
97
|
+
};
|
|
98
|
+
|
|
99
|
+
enum uplus_modes {
|
|
100
|
+
UPLUS_NONE,
|
|
101
|
+
UPLUS_DEFINED,
|
|
102
|
+
UPLUS_UNICODE
|
|
103
|
+
};
|
|
104
|
+
|
|
105
|
+
/* struct input must be a prefix of struct tokeniser. */
|
|
106
|
+
struct tokeniser {
|
|
107
|
+
|
|
108
|
+
struct input * next;
|
|
109
|
+
symbol * p;
|
|
110
|
+
int c;
|
|
111
|
+
char * file;
|
|
112
|
+
int file_needs_freeing;
|
|
113
|
+
int line_number;
|
|
114
|
+
symbol * b;
|
|
115
|
+
symbol * b2;
|
|
116
|
+
int number;
|
|
117
|
+
int m_start;
|
|
118
|
+
int m_end;
|
|
119
|
+
struct m_pair * m_pairs;
|
|
120
|
+
int get_depth;
|
|
121
|
+
int error_count;
|
|
122
|
+
int token;
|
|
123
|
+
int previous_token;
|
|
124
|
+
byte token_held;
|
|
125
|
+
enc encoding;
|
|
126
|
+
|
|
127
|
+
int omission;
|
|
128
|
+
struct include * includes;
|
|
129
|
+
|
|
130
|
+
/* Mode in which U+ has been used:
|
|
131
|
+
* UPLUS_NONE - not used yet
|
|
132
|
+
* UPLUS_DEFINED - stringdef U+xxxx ....
|
|
133
|
+
* UPLUS_UNICODE - {U+xxxx} used with implicit meaning
|
|
134
|
+
*/
|
|
135
|
+
int uplusmode;
|
|
136
|
+
|
|
137
|
+
char token_disabled[NUM_TOKEN_CODES];
|
|
138
|
+
};
|
|
139
|
+
|
|
140
|
+
extern symbol * get_input(const char * filename);
|
|
141
|
+
extern struct tokeniser * create_tokeniser(symbol * b, char * file);
|
|
142
|
+
extern int read_token(struct tokeniser * t);
|
|
143
|
+
extern const char * name_of_token(int code);
|
|
144
|
+
extern void disable_token(struct tokeniser * t, int code);
|
|
145
|
+
extern void close_tokeniser(struct tokeniser * t);
|
|
146
|
+
|
|
147
|
+
extern int space_count;
|
|
148
|
+
extern void * check_malloc(int n);
|
|
149
|
+
extern void check_free(void * p);
|
|
150
|
+
|
|
151
|
+
struct node;
|
|
152
|
+
|
|
153
|
+
struct name {
|
|
154
|
+
|
|
155
|
+
struct name * next;
|
|
156
|
+
symbol * b;
|
|
157
|
+
int type; /* t_string etc */
|
|
158
|
+
int mode; /* )_ for routines, externals */
|
|
159
|
+
struct node * definition; /* ) */
|
|
160
|
+
int count; /* 0, 1, 2 for each type */
|
|
161
|
+
struct grouping * grouping; /* for grouping names */
|
|
162
|
+
byte referenced;
|
|
163
|
+
byte used_in_among; /* Function used in among? */
|
|
164
|
+
byte value_used; /* (For variables) is its value ever used? */
|
|
165
|
+
byte initialised; /* (For variables) is it ever initialised? */
|
|
166
|
+
byte used_in_definition; /* (grouping) used in grouping definition? */
|
|
167
|
+
struct node * used; /* First use, or NULL if not used */
|
|
168
|
+
struct name * local_to; /* Local to one routine/external */
|
|
169
|
+
int declaration_line_number;/* Line number of declaration */
|
|
170
|
+
|
|
171
|
+
};
|
|
172
|
+
|
|
173
|
+
struct literalstring {
|
|
174
|
+
|
|
175
|
+
struct literalstring * next;
|
|
176
|
+
symbol * b;
|
|
177
|
+
|
|
178
|
+
};
|
|
179
|
+
|
|
180
|
+
struct amongvec {
|
|
181
|
+
|
|
182
|
+
symbol * b; /* the string giving the case */
|
|
183
|
+
int size; /* - and its size */
|
|
184
|
+
struct node * action; /* the corresponding action */
|
|
185
|
+
int i; /* the amongvec index of the longest substring of b */
|
|
186
|
+
int result; /* the numeric result for the case */
|
|
187
|
+
int line_number; /* for diagnostics and stable sorting */
|
|
188
|
+
struct name * function;
|
|
189
|
+
|
|
190
|
+
};
|
|
191
|
+
|
|
192
|
+
struct among {
|
|
193
|
+
|
|
194
|
+
struct among * next;
|
|
195
|
+
struct amongvec * b; /* pointer to the amongvec */
|
|
196
|
+
int number; /* amongs are numbered 0, 1, 2 ... */
|
|
197
|
+
int literalstring_count; /* in this among */
|
|
198
|
+
int command_count; /* in this among (includes "no command" entries) */
|
|
199
|
+
int nocommand_count; /* number of "no command" entries in this among */
|
|
200
|
+
int function_count; /* in this among */
|
|
201
|
+
int amongvar_needed; /* do we need to set among_var? */
|
|
202
|
+
struct node * starter; /* i.e. among( (starter) 'string' ... ) */
|
|
203
|
+
struct node * substring; /* i.e. substring ... among ( ... ) */
|
|
204
|
+
struct node ** commands; /* array with command_count entries */
|
|
205
|
+
};
|
|
206
|
+
|
|
207
|
+
struct grouping {
|
|
208
|
+
|
|
209
|
+
struct grouping * next;
|
|
210
|
+
symbol * b; /* the characters of this group */
|
|
211
|
+
int largest_ch; /* character with max code */
|
|
212
|
+
int smallest_ch; /* character with min code */
|
|
213
|
+
struct name * name; /* so g->name->grouping == g */
|
|
214
|
+
int line_number;
|
|
215
|
+
};
|
|
216
|
+
|
|
217
|
+
struct node {
|
|
218
|
+
|
|
219
|
+
struct node * next;
|
|
220
|
+
struct node * left;
|
|
221
|
+
struct node * aux; /* used in setlimit */
|
|
222
|
+
struct among * among; /* used in among */
|
|
223
|
+
struct node * right;
|
|
224
|
+
int type;
|
|
225
|
+
int mode;
|
|
226
|
+
struct node * AE;
|
|
227
|
+
struct name * name;
|
|
228
|
+
symbol * literalstring;
|
|
229
|
+
int number;
|
|
230
|
+
int line_number;
|
|
231
|
+
int amongvar_needed; /* used in routine definitions */
|
|
232
|
+
};
|
|
233
|
+
|
|
234
|
+
enum name_types {
|
|
235
|
+
|
|
236
|
+
t_size = 6,
|
|
237
|
+
|
|
238
|
+
t_string = 0, t_boolean = 1, t_integer = 2, t_routine = 3, t_external = 4,
|
|
239
|
+
t_grouping = 5
|
|
240
|
+
|
|
241
|
+
/* If this list is extended, adjust wvn in generator.c */
|
|
242
|
+
};
|
|
243
|
+
|
|
244
|
+
/* In name_count[i] below, remember that
|
|
245
|
+
type is
|
|
246
|
+
----+----
|
|
247
|
+
0 | string
|
|
248
|
+
1 | boolean
|
|
249
|
+
2 | integer
|
|
250
|
+
3 | routine
|
|
251
|
+
4 | external
|
|
252
|
+
5 | grouping
|
|
253
|
+
*/
|
|
254
|
+
|
|
255
|
+
struct analyser {
|
|
256
|
+
|
|
257
|
+
struct tokeniser * tokeniser;
|
|
258
|
+
struct node * nodes;
|
|
259
|
+
struct name * names;
|
|
260
|
+
struct literalstring * literalstrings;
|
|
261
|
+
int mode;
|
|
262
|
+
byte modifyable; /* false inside reverse(...) */
|
|
263
|
+
struct node * program;
|
|
264
|
+
struct node * program_end;
|
|
265
|
+
int name_count[t_size]; /* name_count[i] counts the number of names of type i */
|
|
266
|
+
struct among * amongs;
|
|
267
|
+
struct among * amongs_end;
|
|
268
|
+
int among_count;
|
|
269
|
+
int amongvar_needed; /* used in reading routine definitions */
|
|
270
|
+
struct grouping * groupings;
|
|
271
|
+
struct grouping * groupings_end;
|
|
272
|
+
struct node * substring; /* pending 'substring' in current routine definition */
|
|
273
|
+
enc encoding;
|
|
274
|
+
byte int_limits_used; /* are maxint or minint used? */
|
|
275
|
+
};
|
|
276
|
+
|
|
277
|
+
enum analyser_modes {
|
|
278
|
+
|
|
279
|
+
m_forward = 0, m_backward /*, m_integer */
|
|
280
|
+
|
|
281
|
+
};
|
|
282
|
+
|
|
283
|
+
extern void print_program(struct analyser * a);
|
|
284
|
+
extern struct analyser * create_analyser(struct tokeniser * t);
|
|
285
|
+
extern void close_analyser(struct analyser * a);
|
|
286
|
+
|
|
287
|
+
extern void read_program(struct analyser * a);
|
|
288
|
+
|
|
289
|
+
struct generator {
|
|
290
|
+
|
|
291
|
+
struct analyser * analyser;
|
|
292
|
+
struct options * options;
|
|
293
|
+
int unreachable; /* 0 if code can be reached, 1 if current code
|
|
294
|
+
* is unreachable. */
|
|
295
|
+
int var_number; /* Number of next variable to use. */
|
|
296
|
+
struct str * outbuf; /* temporary str to store output */
|
|
297
|
+
struct str * declarations; /* str storing variable declarations */
|
|
298
|
+
int next_label;
|
|
299
|
+
#ifndef DISABLE_PYTHON
|
|
300
|
+
int max_label;
|
|
301
|
+
#endif
|
|
302
|
+
int margin;
|
|
303
|
+
|
|
304
|
+
/* if > 0, keep_count to restore in case of a failure;
|
|
305
|
+
* if < 0, the negated keep_count for the limit to restore in case of
|
|
306
|
+
* failure. */
|
|
307
|
+
int failure_keep_count;
|
|
308
|
+
#if !defined(DISABLE_JAVA) && !defined(DISABLE_JS) && !defined(DISABLE_PYTHON) && !defined(DISABLE_CSHARP)
|
|
309
|
+
struct str * failure_str; /* This is used by some generators instead of failure_keep_count */
|
|
310
|
+
#endif
|
|
311
|
+
|
|
312
|
+
int label_used; /* Keep track of whether the failure label is used. */
|
|
313
|
+
int failure_label;
|
|
314
|
+
int debug_count;
|
|
315
|
+
int copy_from_count; /* count of calls to copy_from() */
|
|
316
|
+
|
|
317
|
+
const char * S[10]; /* strings */
|
|
318
|
+
symbol * B[10]; /* blocks */
|
|
319
|
+
int I[10]; /* integers */
|
|
320
|
+
struct name * V[5]; /* variables */
|
|
321
|
+
symbol * L[5]; /* literals, used in formatted write */
|
|
322
|
+
|
|
323
|
+
int line_count; /* counts number of lines output */
|
|
324
|
+
int line_labelled; /* in ISO C, will need extra ';' if it is a block end */
|
|
325
|
+
int literalstring_count;
|
|
326
|
+
int keep_count; /* used to number keep/restore pairs to avoid compiler warnings
|
|
327
|
+
about shadowed variables */
|
|
328
|
+
int temporary_used; /* track if temporary variable used (for Pascal) */
|
|
329
|
+
};
|
|
330
|
+
|
|
331
|
+
/* Special values for failure_label in struct generator. */
|
|
332
|
+
enum special_labels {
|
|
333
|
+
x_return = -1
|
|
334
|
+
};
|
|
335
|
+
|
|
336
|
+
struct options {
|
|
337
|
+
|
|
338
|
+
/* for the command line: */
|
|
339
|
+
|
|
340
|
+
const char * output_file;
|
|
341
|
+
char * name;
|
|
342
|
+
FILE * output_src;
|
|
343
|
+
FILE * output_h;
|
|
344
|
+
byte syntax_tree;
|
|
345
|
+
byte comments;
|
|
346
|
+
enc encoding;
|
|
347
|
+
enum { LANG_JAVA, LANG_C, LANG_CPLUSPLUS, LANG_CSHARP, LANG_PASCAL, LANG_PYTHON, LANG_JAVASCRIPT, LANG_RUST, LANG_GO, LANG_ADA } make_lang;
|
|
348
|
+
const char * externals_prefix;
|
|
349
|
+
const char * variables_prefix;
|
|
350
|
+
const char * runtime_path;
|
|
351
|
+
const char * parent_class_name;
|
|
352
|
+
const char * package;
|
|
353
|
+
const char * go_snowball_runtime;
|
|
354
|
+
const char * string_class;
|
|
355
|
+
const char * among_class;
|
|
356
|
+
struct include * includes;
|
|
357
|
+
struct include * includes_end;
|
|
358
|
+
};
|
|
359
|
+
|
|
360
|
+
/* Generator functions common to several backends. */
|
|
361
|
+
|
|
362
|
+
extern struct generator * create_generator(struct analyser * a, struct options * o);
|
|
363
|
+
extern void close_generator(struct generator * g);
|
|
364
|
+
|
|
365
|
+
extern void write_char(struct generator * g, int ch);
|
|
366
|
+
extern void write_newline(struct generator * g);
|
|
367
|
+
extern void write_string(struct generator * g, const char * s);
|
|
368
|
+
extern void write_int(struct generator * g, int i);
|
|
369
|
+
extern void write_symbol(struct generator * g, symbol s);
|
|
370
|
+
extern void write_b(struct generator * g, symbol * b);
|
|
371
|
+
extern void write_str(struct generator * g, struct str * str);
|
|
372
|
+
|
|
373
|
+
extern void write_comment_content(struct generator * g, struct node * p);
|
|
374
|
+
extern void write_generated_comment_content(struct generator * g);
|
|
375
|
+
extern void write_start_comment(struct generator * g,
|
|
376
|
+
const char * comment_start,
|
|
377
|
+
const char * comment_end);
|
|
378
|
+
|
|
379
|
+
extern int K_needed(struct generator * g, struct node * p);
|
|
380
|
+
extern int repeat_restore(struct generator * g, struct node * p);
|
|
381
|
+
|
|
382
|
+
/* Generator for C code. */
|
|
383
|
+
extern void generate_program_c(struct generator * g);
|
|
384
|
+
|
|
385
|
+
#ifndef DISABLE_JAVA
|
|
386
|
+
/* Generator for Java code. */
|
|
387
|
+
extern void generate_program_java(struct generator * g);
|
|
388
|
+
#endif
|
|
389
|
+
|
|
390
|
+
#ifndef DISABLE_CSHARP
|
|
391
|
+
/* Generator for C# code. */
|
|
392
|
+
extern void generate_program_csharp(struct generator * g);
|
|
393
|
+
#endif
|
|
394
|
+
|
|
395
|
+
#ifndef DISABLE_PASCAL
|
|
396
|
+
extern void generate_program_pascal(struct generator * g);
|
|
397
|
+
#endif
|
|
398
|
+
|
|
399
|
+
#ifndef DISABLE_PYTHON
|
|
400
|
+
/* Generator for Python code. */
|
|
401
|
+
extern void generate_program_python(struct generator * g);
|
|
402
|
+
#endif
|
|
403
|
+
|
|
404
|
+
#ifndef DISABLE_JS
|
|
405
|
+
extern void generate_program_js(struct generator * g);
|
|
406
|
+
#endif
|
|
407
|
+
|
|
408
|
+
#ifndef DISABLE_RUST
|
|
409
|
+
extern void generate_program_rust(struct generator * g);
|
|
410
|
+
#endif
|
|
411
|
+
|
|
412
|
+
#ifndef DISABLE_GO
|
|
413
|
+
extern void generate_program_go(struct generator * g);
|
|
414
|
+
#endif
|
|
415
|
+
|
|
416
|
+
#ifndef DISABLE_ADA
|
|
417
|
+
extern void generate_program_ada(struct generator * g);
|
|
418
|
+
#endif
|
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
|
|
2
|
+
#include <stdio.h> /* for printf */
|
|
3
|
+
#include <stdlib.h> /* malloc, free */
|
|
4
|
+
#include <string.h> /* memmove */
|
|
5
|
+
|
|
6
|
+
#include "header.h"
|
|
7
|
+
|
|
8
|
+
#define HEAD 2*sizeof(int)
|
|
9
|
+
#define EXTENDER 40
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
/* This modules provides a simple mechanism for arbitrary length writable
|
|
13
|
+
strings, called 'blocks'. They are 'symbol *' items rather than 'char *'
|
|
14
|
+
items however.
|
|
15
|
+
|
|
16
|
+
The calls are:
|
|
17
|
+
|
|
18
|
+
symbol * b = create_b(n);
|
|
19
|
+
- create an empty block b with room for n symbols
|
|
20
|
+
b = increase_capacity(b, n);
|
|
21
|
+
- increase the capacity of block b by n symbols (b may change)
|
|
22
|
+
b2 = copy_b(b)
|
|
23
|
+
- copy block b into b2
|
|
24
|
+
lose_b(b);
|
|
25
|
+
- lose block b
|
|
26
|
+
b = move_to_b(b, n, p);
|
|
27
|
+
- set the data in b to be the n symbols at address p
|
|
28
|
+
b = add_to_b(b, n, p);
|
|
29
|
+
- add the n symbols at address p to the end of the data in b
|
|
30
|
+
SIZE(b)
|
|
31
|
+
- is the number of symbols in b
|
|
32
|
+
For example:
|
|
33
|
+
|
|
34
|
+
symbol * b = create_b(0);
|
|
35
|
+
{ int i;
|
|
36
|
+
char p[10];
|
|
37
|
+
for (i = 0; i < 100; i++) {
|
|
38
|
+
sprintf(p, " %d", i);
|
|
39
|
+
add_s_to_b(b, p);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
and b contains " 0 1 2 ... 99" spaced out as symbols.
|
|
44
|
+
*/
|
|
45
|
+
|
|
46
|
+
/* For a block b, SIZE(b) is the number of symbols so far written into it,
|
|
47
|
+
CAPACITY(b) the total number it can contain, so SIZE(b) <= CAPACITY(b).
|
|
48
|
+
In fact blocks have 1 extra character over the promised capacity so
|
|
49
|
+
they can be zero terminated by 'b[SIZE(b)] = 0;' without fear of
|
|
50
|
+
overwriting.
|
|
51
|
+
*/
|
|
52
|
+
|
|
53
|
+
extern symbol * create_b(int n) {
|
|
54
|
+
symbol * p = (symbol *) (HEAD + (char *) MALLOC(HEAD + (n + 1) * sizeof(symbol)));
|
|
55
|
+
CAPACITY(p) = n;
|
|
56
|
+
SIZE(p) = 0;
|
|
57
|
+
return p;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
extern void report_b(FILE * out, const symbol * p) {
|
|
61
|
+
int i;
|
|
62
|
+
for (i = 0; i < SIZE(p); i++) {
|
|
63
|
+
if (p[i] > 255) {
|
|
64
|
+
printf("In report_b, can't convert p[%d] to char because it's 0x%02x\n", i, (int)p[i]);
|
|
65
|
+
exit(1);
|
|
66
|
+
}
|
|
67
|
+
putc(p[i], out);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
extern void output_str(FILE * outfile, struct str * str) {
|
|
72
|
+
report_b(outfile, str_data(str));
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
extern void lose_b(symbol * p) {
|
|
76
|
+
if (p == 0) return;
|
|
77
|
+
FREE((char *) p - HEAD);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
extern symbol * increase_capacity(symbol * p, int n) {
|
|
81
|
+
symbol * q = create_b(CAPACITY(p) + n + EXTENDER);
|
|
82
|
+
memmove(q, p, CAPACITY(p) * sizeof(symbol));
|
|
83
|
+
SIZE(q) = SIZE(p);
|
|
84
|
+
lose_b(p); return q;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
extern symbol * move_to_b(symbol * p, int n, const symbol * q) {
|
|
88
|
+
int x = n - CAPACITY(p);
|
|
89
|
+
if (x > 0) p = increase_capacity(p, x);
|
|
90
|
+
memmove(p, q, n * sizeof(symbol)); SIZE(p) = n; return p;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
extern symbol * add_to_b(symbol * p, int n, const symbol * q) {
|
|
94
|
+
int x = SIZE(p) + n - CAPACITY(p);
|
|
95
|
+
if (x > 0) p = increase_capacity(p, x);
|
|
96
|
+
memmove(p + SIZE(p), q, n * sizeof(symbol)); SIZE(p) += n; return p;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
extern symbol * copy_b(const symbol * p) {
|
|
100
|
+
int n = SIZE(p);
|
|
101
|
+
symbol * q = create_b(n);
|
|
102
|
+
move_to_b(q, n, p);
|
|
103
|
+
return q;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
int space_count = 0;
|
|
107
|
+
|
|
108
|
+
extern void * check_malloc(int n) {
|
|
109
|
+
space_count++;
|
|
110
|
+
return malloc(n);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
extern void check_free(void * p) {
|
|
114
|
+
space_count--;
|
|
115
|
+
free(p);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/* To convert a block to a zero terminated string: */
|
|
119
|
+
|
|
120
|
+
extern char * b_to_s(const symbol * p) {
|
|
121
|
+
int n = SIZE(p);
|
|
122
|
+
char * s = (char *)malloc(n + 1);
|
|
123
|
+
{
|
|
124
|
+
int i;
|
|
125
|
+
for (i = 0; i < n; i++) {
|
|
126
|
+
if (p[i] > 255) {
|
|
127
|
+
printf("In b_to_s, can't convert p[%d] to char because it's 0x%02x\n", i, (int)p[i]);
|
|
128
|
+
exit(1);
|
|
129
|
+
}
|
|
130
|
+
s[i] = (char)p[i];
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
s[n] = 0;
|
|
134
|
+
return s;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/* To add a zero terminated string to a block. If p = 0 the
|
|
138
|
+
block is created. */
|
|
139
|
+
|
|
140
|
+
extern symbol * add_s_to_b(symbol * p, const char * s) {
|
|
141
|
+
int n = strlen(s);
|
|
142
|
+
int k;
|
|
143
|
+
if (p == 0) p = create_b(n);
|
|
144
|
+
k = SIZE(p);
|
|
145
|
+
{
|
|
146
|
+
int x = k + n - CAPACITY(p);
|
|
147
|
+
if (x > 0) p = increase_capacity(p, x);
|
|
148
|
+
}
|
|
149
|
+
{
|
|
150
|
+
int i;
|
|
151
|
+
for (i = 0; i < n; i++) p[i + k] = s[i];
|
|
152
|
+
}
|
|
153
|
+
SIZE(p) += n;
|
|
154
|
+
return p;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
/* The next section defines string handling capabilities in terms
|
|
158
|
+
of the lower level block handling capabilities of space.c */
|
|
159
|
+
/* -------------------------------------------------------------*/
|
|
160
|
+
|
|
161
|
+
struct str {
|
|
162
|
+
symbol * data;
|
|
163
|
+
};
|
|
164
|
+
|
|
165
|
+
/* Create a new string. */
|
|
166
|
+
extern struct str * str_new(void) {
|
|
167
|
+
|
|
168
|
+
struct str * output = (struct str *) malloc(sizeof(struct str));
|
|
169
|
+
output->data = create_b(0);
|
|
170
|
+
return output;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
/* Delete a string. */
|
|
174
|
+
extern void str_delete(struct str * str) {
|
|
175
|
+
|
|
176
|
+
lose_b(str->data);
|
|
177
|
+
free(str);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/* Append a str to this str. */
|
|
181
|
+
extern void str_append(struct str * str, const struct str * add) {
|
|
182
|
+
|
|
183
|
+
symbol * q = add->data;
|
|
184
|
+
str->data = add_to_b(str->data, SIZE(q), q);
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
/* Append a character to this str. */
|
|
188
|
+
extern void str_append_ch(struct str * str, char add) {
|
|
189
|
+
|
|
190
|
+
symbol sym = (unsigned char)add;
|
|
191
|
+
str->data = add_to_b(str->data, 1, &sym);
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
/* Append a low level block to a str. */
|
|
195
|
+
extern void str_append_b(struct str * str, const symbol * q) {
|
|
196
|
+
|
|
197
|
+
str->data = add_to_b(str->data, SIZE(q), q);
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
/* Append the tail of a low level block to a str. */
|
|
201
|
+
extern void str_append_b_tail(struct str * str, const symbol * q, int skip) {
|
|
202
|
+
if (skip < 0 || skip >= SIZE(q)) return;
|
|
203
|
+
|
|
204
|
+
str->data = add_to_b(str->data, SIZE(q) - skip, q + skip);
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
/* Append a (char *, null terminated) string to a str. */
|
|
208
|
+
extern void str_append_string(struct str * str, const char * s) {
|
|
209
|
+
|
|
210
|
+
str->data = add_s_to_b(str->data, s);
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
/* Append an integer to a str. */
|
|
214
|
+
extern void str_append_int(struct str * str, int i) {
|
|
215
|
+
|
|
216
|
+
char s[30];
|
|
217
|
+
sprintf(s, "%d", i);
|
|
218
|
+
str_append_string(str, s);
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
/* Clear a string */
|
|
222
|
+
extern void str_clear(struct str * str) {
|
|
223
|
+
|
|
224
|
+
SIZE(str->data) = 0;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
/* Set a string */
|
|
228
|
+
extern void str_assign(struct str * str, const char * s) {
|
|
229
|
+
|
|
230
|
+
str_clear(str);
|
|
231
|
+
str_append_string(str, s);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
/* Copy a string. */
|
|
235
|
+
extern struct str * str_copy(const struct str * old) {
|
|
236
|
+
|
|
237
|
+
struct str * newstr = str_new();
|
|
238
|
+
str_append(newstr, old);
|
|
239
|
+
return newstr;
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
/* Get the data stored in this str. */
|
|
243
|
+
extern symbol * str_data(const struct str * str) {
|
|
244
|
+
|
|
245
|
+
return str->data;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
/* Get the length of the str. */
|
|
249
|
+
extern int str_len(const struct str * str) {
|
|
250
|
+
|
|
251
|
+
return SIZE(str->data);
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
/* Get the last character of the str.
|
|
255
|
+
*
|
|
256
|
+
* Or -1 if the string is empty.
|
|
257
|
+
*/
|
|
258
|
+
extern int str_back(const struct str *str) {
|
|
259
|
+
return SIZE(str->data) ? str->data[SIZE(str->data) - 1] : -1;
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
extern int get_utf8(const symbol * p, int * slot) {
|
|
263
|
+
int b0, b1;
|
|
264
|
+
b0 = *p++;
|
|
265
|
+
if (b0 < 0xC0) { /* 1100 0000 */
|
|
266
|
+
* slot = b0; return 1;
|
|
267
|
+
}
|
|
268
|
+
b1 = *p++;
|
|
269
|
+
if (b0 < 0xE0) { /* 1110 0000 */
|
|
270
|
+
* slot = (b0 & 0x1F) << 6 | (b1 & 0x3F); return 2;
|
|
271
|
+
}
|
|
272
|
+
* slot = (b0 & 0xF) << 12 | (b1 & 0x3F) << 6 | (*p & 0x3F); return 3;
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
extern int put_utf8(int ch, symbol * p) {
|
|
276
|
+
if (ch < 0x80) {
|
|
277
|
+
p[0] = ch; return 1;
|
|
278
|
+
}
|
|
279
|
+
if (ch < 0x800) {
|
|
280
|
+
p[0] = (ch >> 6) | 0xC0;
|
|
281
|
+
p[1] = (ch & 0x3F) | 0x80; return 2;
|
|
282
|
+
}
|
|
283
|
+
p[0] = (ch >> 12) | 0xE0;
|
|
284
|
+
p[1] = ((ch >> 6) & 0x3F) | 0x80;
|
|
285
|
+
p[2] = (ch & 0x3F) | 0x80; return 3;
|
|
286
|
+
}
|