mittens 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/Gemfile +7 -0
- data/LICENSE.txt +30 -0
- data/README.md +62 -0
- data/Rakefile +21 -0
- data/ext/mittens/ext.c +96 -0
- data/ext/mittens/extconf.rb +12 -0
- data/lib/mittens/version.rb +3 -0
- data/lib/mittens.rb +7 -0
- data/mittens.gemspec +22 -0
- data/vendor/snowball/.gitignore +26 -0
- data/vendor/snowball/.travis.yml +112 -0
- data/vendor/snowball/AUTHORS +27 -0
- data/vendor/snowball/CONTRIBUTING.rst +216 -0
- data/vendor/snowball/COPYING +29 -0
- data/vendor/snowball/GNUmakefile +742 -0
- data/vendor/snowball/NEWS +754 -0
- data/vendor/snowball/README.rst +37 -0
- data/vendor/snowball/ada/README.md +74 -0
- data/vendor/snowball/ada/generate/generate.adb +83 -0
- data/vendor/snowball/ada/generate.gpr +21 -0
- data/vendor/snowball/ada/src/stemmer.adb +620 -0
- data/vendor/snowball/ada/src/stemmer.ads +219 -0
- data/vendor/snowball/ada/src/stemwords.adb +70 -0
- data/vendor/snowball/ada/stemmer_config.gpr +83 -0
- data/vendor/snowball/ada/stemwords.gpr +21 -0
- data/vendor/snowball/algorithms/arabic.sbl +558 -0
- data/vendor/snowball/algorithms/armenian.sbl +301 -0
- data/vendor/snowball/algorithms/basque.sbl +149 -0
- data/vendor/snowball/algorithms/catalan.sbl +202 -0
- data/vendor/snowball/algorithms/danish.sbl +93 -0
- data/vendor/snowball/algorithms/dutch.sbl +164 -0
- data/vendor/snowball/algorithms/english.sbl +229 -0
- data/vendor/snowball/algorithms/finnish.sbl +197 -0
- data/vendor/snowball/algorithms/french.sbl +254 -0
- data/vendor/snowball/algorithms/german.sbl +139 -0
- data/vendor/snowball/algorithms/german2.sbl +145 -0
- data/vendor/snowball/algorithms/greek.sbl +701 -0
- data/vendor/snowball/algorithms/hindi.sbl +323 -0
- data/vendor/snowball/algorithms/hungarian.sbl +241 -0
- data/vendor/snowball/algorithms/indonesian.sbl +192 -0
- data/vendor/snowball/algorithms/irish.sbl +149 -0
- data/vendor/snowball/algorithms/italian.sbl +202 -0
- data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +240 -0
- data/vendor/snowball/algorithms/lithuanian.sbl +373 -0
- data/vendor/snowball/algorithms/lovins.sbl +208 -0
- data/vendor/snowball/algorithms/nepali.sbl +92 -0
- data/vendor/snowball/algorithms/norwegian.sbl +80 -0
- data/vendor/snowball/algorithms/porter.sbl +139 -0
- data/vendor/snowball/algorithms/portuguese.sbl +218 -0
- data/vendor/snowball/algorithms/romanian.sbl +236 -0
- data/vendor/snowball/algorithms/russian.sbl +221 -0
- data/vendor/snowball/algorithms/serbian.sbl +2379 -0
- data/vendor/snowball/algorithms/spanish.sbl +230 -0
- data/vendor/snowball/algorithms/swedish.sbl +72 -0
- data/vendor/snowball/algorithms/tamil.sbl +405 -0
- data/vendor/snowball/algorithms/turkish.sbl +470 -0
- data/vendor/snowball/algorithms/yiddish.sbl +460 -0
- data/vendor/snowball/charsets/ISO-8859-2.sbl +98 -0
- data/vendor/snowball/charsets/KOI8-R.sbl +74 -0
- data/vendor/snowball/charsets/cp850.sbl +130 -0
- data/vendor/snowball/compiler/analyser.c +1547 -0
- data/vendor/snowball/compiler/driver.c +615 -0
- data/vendor/snowball/compiler/generator.c +1748 -0
- data/vendor/snowball/compiler/generator_ada.c +1702 -0
- data/vendor/snowball/compiler/generator_csharp.c +1322 -0
- data/vendor/snowball/compiler/generator_go.c +1278 -0
- data/vendor/snowball/compiler/generator_java.c +1313 -0
- data/vendor/snowball/compiler/generator_js.c +1316 -0
- data/vendor/snowball/compiler/generator_pascal.c +1387 -0
- data/vendor/snowball/compiler/generator_python.c +1337 -0
- data/vendor/snowball/compiler/generator_rust.c +1295 -0
- data/vendor/snowball/compiler/header.h +418 -0
- data/vendor/snowball/compiler/space.c +286 -0
- data/vendor/snowball/compiler/syswords.h +86 -0
- data/vendor/snowball/compiler/syswords2.h +13 -0
- data/vendor/snowball/compiler/tokeniser.c +567 -0
- data/vendor/snowball/csharp/.gitignore +8 -0
- data/vendor/snowball/csharp/Snowball/Algorithms/.gitignore +1 -0
- data/vendor/snowball/csharp/Snowball/Among.cs +108 -0
- data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +36 -0
- data/vendor/snowball/csharp/Snowball/Stemmer.cs +660 -0
- data/vendor/snowball/csharp/Stemwords/App.config +6 -0
- data/vendor/snowball/csharp/Stemwords/Program.cs +114 -0
- data/vendor/snowball/doc/TODO +12 -0
- data/vendor/snowball/doc/libstemmer_c_README +148 -0
- data/vendor/snowball/doc/libstemmer_csharp_README +53 -0
- data/vendor/snowball/doc/libstemmer_java_README +67 -0
- data/vendor/snowball/doc/libstemmer_js_README +48 -0
- data/vendor/snowball/doc/libstemmer_python_README +113 -0
- data/vendor/snowball/examples/stemwords.c +204 -0
- data/vendor/snowball/go/README.md +55 -0
- data/vendor/snowball/go/among.go +16 -0
- data/vendor/snowball/go/env.go +403 -0
- data/vendor/snowball/go/stemwords/generate.go +68 -0
- data/vendor/snowball/go/stemwords/main.go +68 -0
- data/vendor/snowball/go/util.go +34 -0
- data/vendor/snowball/iconv.py +50 -0
- data/vendor/snowball/include/libstemmer.h +78 -0
- data/vendor/snowball/java/org/tartarus/snowball/Among.java +29 -0
- data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +381 -0
- data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +8 -0
- data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +75 -0
- data/vendor/snowball/javascript/base-stemmer.js +294 -0
- data/vendor/snowball/javascript/stemwords.js +106 -0
- data/vendor/snowball/libstemmer/libstemmer_c.in +96 -0
- data/vendor/snowball/libstemmer/mkalgorithms.pl +90 -0
- data/vendor/snowball/libstemmer/mkmodules.pl +267 -0
- data/vendor/snowball/libstemmer/modules.txt +63 -0
- data/vendor/snowball/libstemmer/test.c +34 -0
- data/vendor/snowball/pascal/.gitignore +4 -0
- data/vendor/snowball/pascal/SnowballProgram.pas +430 -0
- data/vendor/snowball/pascal/generate.pl +23 -0
- data/vendor/snowball/pascal/stemwords-template.dpr +78 -0
- data/vendor/snowball/python/MANIFEST.in +7 -0
- data/vendor/snowball/python/create_init.py +54 -0
- data/vendor/snowball/python/setup.cfg +6 -0
- data/vendor/snowball/python/setup.py +81 -0
- data/vendor/snowball/python/snowballstemmer/among.py +13 -0
- data/vendor/snowball/python/snowballstemmer/basestemmer.py +323 -0
- data/vendor/snowball/python/stemwords.py +101 -0
- data/vendor/snowball/python/testapp.py +28 -0
- data/vendor/snowball/runtime/api.c +58 -0
- data/vendor/snowball/runtime/api.h +32 -0
- data/vendor/snowball/runtime/header.h +61 -0
- data/vendor/snowball/runtime/utilities.c +513 -0
- data/vendor/snowball/rust/Cargo.toml +7 -0
- data/vendor/snowball/rust/build.rs +55 -0
- data/vendor/snowball/rust/rust-pre-1.27-compat.patch +30 -0
- data/vendor/snowball/rust/src/main.rs +102 -0
- data/vendor/snowball/rust/src/snowball/algorithms/mod.rs +2 -0
- data/vendor/snowball/rust/src/snowball/among.rs +6 -0
- data/vendor/snowball/rust/src/snowball/mod.rs +6 -0
- data/vendor/snowball/rust/src/snowball/snowball_env.rs +421 -0
- data/vendor/snowball/tests/stemtest.c +95 -0
- metadata +178 -0
@@ -0,0 +1,418 @@
|
|
1
|
+
#include <stdio.h>
|
2
|
+
|
3
|
+
#define SNOWBALL_VERSION "2.2.0"
|
4
|
+
|
5
|
+
typedef unsigned char byte;
|
6
|
+
typedef unsigned short symbol;
|
7
|
+
|
8
|
+
#define true 1
|
9
|
+
#define false 0
|
10
|
+
|
11
|
+
#define MALLOC check_malloc
|
12
|
+
#define FREE check_free
|
13
|
+
|
14
|
+
#define NEW(type, p) struct type * p = (struct type *) MALLOC(sizeof(struct type))
|
15
|
+
#define NEWVEC(type, p, n) struct type * p = (struct type *) MALLOC(sizeof(struct type) * (n))
|
16
|
+
|
17
|
+
#define SIZE(p) ((int *)(p))[-1]
|
18
|
+
#define CAPACITY(p) ((int *)(p))[-2]
|
19
|
+
|
20
|
+
extern symbol * create_b(int n);
|
21
|
+
extern void report_b(FILE * out, const symbol * p);
|
22
|
+
extern void lose_b(symbol * p);
|
23
|
+
extern symbol * increase_capacity(symbol * p, int n);
|
24
|
+
extern symbol * move_to_b(symbol * p, int n, const symbol * q);
|
25
|
+
extern symbol * add_to_b(symbol * p, int n, const symbol * q);
|
26
|
+
extern symbol * copy_b(const symbol * p);
|
27
|
+
extern char * b_to_s(const symbol * p);
|
28
|
+
extern symbol * add_s_to_b(symbol * p, const char * s);
|
29
|
+
|
30
|
+
#define MOVE_TO_B(B, LIT) \
|
31
|
+
move_to_b(B, sizeof(LIT) / sizeof(LIT[0]), LIT)
|
32
|
+
|
33
|
+
struct str; /* defined in space.c */
|
34
|
+
|
35
|
+
extern struct str * str_new(void);
|
36
|
+
extern void str_delete(struct str * str);
|
37
|
+
extern void str_append(struct str * str, const struct str * add);
|
38
|
+
extern void str_append_ch(struct str * str, char add);
|
39
|
+
extern void str_append_symbol(struct str * str, symbol add);
|
40
|
+
extern void str_append_b(struct str * str, const symbol * q);
|
41
|
+
extern void str_append_b_tail(struct str * str, const symbol * q, int skip);
|
42
|
+
extern void str_append_string(struct str * str, const char * s);
|
43
|
+
extern void str_append_int(struct str * str, int i);
|
44
|
+
extern void str_clear(struct str * str);
|
45
|
+
extern void str_assign(struct str * str, const char * s);
|
46
|
+
extern struct str * str_copy(const struct str * old);
|
47
|
+
extern symbol * str_data(const struct str * str);
|
48
|
+
extern int str_len(const struct str * str);
|
49
|
+
extern int str_back(const struct str *str);
|
50
|
+
extern int get_utf8(const symbol * p, int * slot);
|
51
|
+
extern int put_utf8(int ch, symbol * p);
|
52
|
+
extern void output_str(FILE * outfile, struct str * str);
|
53
|
+
|
54
|
+
typedef enum { ENC_SINGLEBYTE, ENC_UTF8, ENC_WIDECHARS } enc;
|
55
|
+
|
56
|
+
struct m_pair {
|
57
|
+
|
58
|
+
struct m_pair * next;
|
59
|
+
symbol * name;
|
60
|
+
symbol * value;
|
61
|
+
|
62
|
+
};
|
63
|
+
|
64
|
+
/* struct input must be a prefix of struct tokeniser. */
|
65
|
+
struct input {
|
66
|
+
|
67
|
+
struct input * next;
|
68
|
+
symbol * p;
|
69
|
+
int c;
|
70
|
+
char * file;
|
71
|
+
int file_needs_freeing;
|
72
|
+
int line_number;
|
73
|
+
|
74
|
+
};
|
75
|
+
|
76
|
+
struct include {
|
77
|
+
|
78
|
+
struct include * next;
|
79
|
+
symbol * b;
|
80
|
+
|
81
|
+
};
|
82
|
+
|
83
|
+
enum token_codes {
|
84
|
+
|
85
|
+
#include "syswords2.h"
|
86
|
+
|
87
|
+
c_mathassign,
|
88
|
+
c_name,
|
89
|
+
c_number,
|
90
|
+
c_literalstring,
|
91
|
+
c_neg,
|
92
|
+
c_call,
|
93
|
+
c_grouping,
|
94
|
+
c_booltest,
|
95
|
+
|
96
|
+
NUM_TOKEN_CODES
|
97
|
+
};
|
98
|
+
|
99
|
+
enum uplus_modes {
|
100
|
+
UPLUS_NONE,
|
101
|
+
UPLUS_DEFINED,
|
102
|
+
UPLUS_UNICODE
|
103
|
+
};
|
104
|
+
|
105
|
+
/* struct input must be a prefix of struct tokeniser. */
|
106
|
+
struct tokeniser {
|
107
|
+
|
108
|
+
struct input * next;
|
109
|
+
symbol * p;
|
110
|
+
int c;
|
111
|
+
char * file;
|
112
|
+
int file_needs_freeing;
|
113
|
+
int line_number;
|
114
|
+
symbol * b;
|
115
|
+
symbol * b2;
|
116
|
+
int number;
|
117
|
+
int m_start;
|
118
|
+
int m_end;
|
119
|
+
struct m_pair * m_pairs;
|
120
|
+
int get_depth;
|
121
|
+
int error_count;
|
122
|
+
int token;
|
123
|
+
int previous_token;
|
124
|
+
byte token_held;
|
125
|
+
enc encoding;
|
126
|
+
|
127
|
+
int omission;
|
128
|
+
struct include * includes;
|
129
|
+
|
130
|
+
/* Mode in which U+ has been used:
|
131
|
+
* UPLUS_NONE - not used yet
|
132
|
+
* UPLUS_DEFINED - stringdef U+xxxx ....
|
133
|
+
* UPLUS_UNICODE - {U+xxxx} used with implicit meaning
|
134
|
+
*/
|
135
|
+
int uplusmode;
|
136
|
+
|
137
|
+
char token_disabled[NUM_TOKEN_CODES];
|
138
|
+
};
|
139
|
+
|
140
|
+
extern symbol * get_input(const char * filename);
|
141
|
+
extern struct tokeniser * create_tokeniser(symbol * b, char * file);
|
142
|
+
extern int read_token(struct tokeniser * t);
|
143
|
+
extern const char * name_of_token(int code);
|
144
|
+
extern void disable_token(struct tokeniser * t, int code);
|
145
|
+
extern void close_tokeniser(struct tokeniser * t);
|
146
|
+
|
147
|
+
extern int space_count;
|
148
|
+
extern void * check_malloc(int n);
|
149
|
+
extern void check_free(void * p);
|
150
|
+
|
151
|
+
struct node;
|
152
|
+
|
153
|
+
struct name {
|
154
|
+
|
155
|
+
struct name * next;
|
156
|
+
symbol * b;
|
157
|
+
int type; /* t_string etc */
|
158
|
+
int mode; /* )_ for routines, externals */
|
159
|
+
struct node * definition; /* ) */
|
160
|
+
int count; /* 0, 1, 2 for each type */
|
161
|
+
struct grouping * grouping; /* for grouping names */
|
162
|
+
byte referenced;
|
163
|
+
byte used_in_among; /* Function used in among? */
|
164
|
+
byte value_used; /* (For variables) is its value ever used? */
|
165
|
+
byte initialised; /* (For variables) is it ever initialised? */
|
166
|
+
byte used_in_definition; /* (grouping) used in grouping definition? */
|
167
|
+
struct node * used; /* First use, or NULL if not used */
|
168
|
+
struct name * local_to; /* Local to one routine/external */
|
169
|
+
int declaration_line_number;/* Line number of declaration */
|
170
|
+
|
171
|
+
};
|
172
|
+
|
173
|
+
struct literalstring {
|
174
|
+
|
175
|
+
struct literalstring * next;
|
176
|
+
symbol * b;
|
177
|
+
|
178
|
+
};
|
179
|
+
|
180
|
+
struct amongvec {
|
181
|
+
|
182
|
+
symbol * b; /* the string giving the case */
|
183
|
+
int size; /* - and its size */
|
184
|
+
struct node * action; /* the corresponding action */
|
185
|
+
int i; /* the amongvec index of the longest substring of b */
|
186
|
+
int result; /* the numeric result for the case */
|
187
|
+
int line_number; /* for diagnostics and stable sorting */
|
188
|
+
struct name * function;
|
189
|
+
|
190
|
+
};
|
191
|
+
|
192
|
+
struct among {
|
193
|
+
|
194
|
+
struct among * next;
|
195
|
+
struct amongvec * b; /* pointer to the amongvec */
|
196
|
+
int number; /* amongs are numbered 0, 1, 2 ... */
|
197
|
+
int literalstring_count; /* in this among */
|
198
|
+
int command_count; /* in this among (includes "no command" entries) */
|
199
|
+
int nocommand_count; /* number of "no command" entries in this among */
|
200
|
+
int function_count; /* in this among */
|
201
|
+
int amongvar_needed; /* do we need to set among_var? */
|
202
|
+
struct node * starter; /* i.e. among( (starter) 'string' ... ) */
|
203
|
+
struct node * substring; /* i.e. substring ... among ( ... ) */
|
204
|
+
struct node ** commands; /* array with command_count entries */
|
205
|
+
};
|
206
|
+
|
207
|
+
struct grouping {
|
208
|
+
|
209
|
+
struct grouping * next;
|
210
|
+
symbol * b; /* the characters of this group */
|
211
|
+
int largest_ch; /* character with max code */
|
212
|
+
int smallest_ch; /* character with min code */
|
213
|
+
struct name * name; /* so g->name->grouping == g */
|
214
|
+
int line_number;
|
215
|
+
};
|
216
|
+
|
217
|
+
struct node {
|
218
|
+
|
219
|
+
struct node * next;
|
220
|
+
struct node * left;
|
221
|
+
struct node * aux; /* used in setlimit */
|
222
|
+
struct among * among; /* used in among */
|
223
|
+
struct node * right;
|
224
|
+
int type;
|
225
|
+
int mode;
|
226
|
+
struct node * AE;
|
227
|
+
struct name * name;
|
228
|
+
symbol * literalstring;
|
229
|
+
int number;
|
230
|
+
int line_number;
|
231
|
+
int amongvar_needed; /* used in routine definitions */
|
232
|
+
};
|
233
|
+
|
234
|
+
enum name_types {
|
235
|
+
|
236
|
+
t_size = 6,
|
237
|
+
|
238
|
+
t_string = 0, t_boolean = 1, t_integer = 2, t_routine = 3, t_external = 4,
|
239
|
+
t_grouping = 5
|
240
|
+
|
241
|
+
/* If this list is extended, adjust wvn in generator.c */
|
242
|
+
};
|
243
|
+
|
244
|
+
/* In name_count[i] below, remember that
|
245
|
+
type is
|
246
|
+
----+----
|
247
|
+
0 | string
|
248
|
+
1 | boolean
|
249
|
+
2 | integer
|
250
|
+
3 | routine
|
251
|
+
4 | external
|
252
|
+
5 | grouping
|
253
|
+
*/
|
254
|
+
|
255
|
+
struct analyser {
|
256
|
+
|
257
|
+
struct tokeniser * tokeniser;
|
258
|
+
struct node * nodes;
|
259
|
+
struct name * names;
|
260
|
+
struct literalstring * literalstrings;
|
261
|
+
int mode;
|
262
|
+
byte modifyable; /* false inside reverse(...) */
|
263
|
+
struct node * program;
|
264
|
+
struct node * program_end;
|
265
|
+
int name_count[t_size]; /* name_count[i] counts the number of names of type i */
|
266
|
+
struct among * amongs;
|
267
|
+
struct among * amongs_end;
|
268
|
+
int among_count;
|
269
|
+
int amongvar_needed; /* used in reading routine definitions */
|
270
|
+
struct grouping * groupings;
|
271
|
+
struct grouping * groupings_end;
|
272
|
+
struct node * substring; /* pending 'substring' in current routine definition */
|
273
|
+
enc encoding;
|
274
|
+
byte int_limits_used; /* are maxint or minint used? */
|
275
|
+
};
|
276
|
+
|
277
|
+
enum analyser_modes {
|
278
|
+
|
279
|
+
m_forward = 0, m_backward /*, m_integer */
|
280
|
+
|
281
|
+
};
|
282
|
+
|
283
|
+
extern void print_program(struct analyser * a);
|
284
|
+
extern struct analyser * create_analyser(struct tokeniser * t);
|
285
|
+
extern void close_analyser(struct analyser * a);
|
286
|
+
|
287
|
+
extern void read_program(struct analyser * a);
|
288
|
+
|
289
|
+
struct generator {
|
290
|
+
|
291
|
+
struct analyser * analyser;
|
292
|
+
struct options * options;
|
293
|
+
int unreachable; /* 0 if code can be reached, 1 if current code
|
294
|
+
* is unreachable. */
|
295
|
+
int var_number; /* Number of next variable to use. */
|
296
|
+
struct str * outbuf; /* temporary str to store output */
|
297
|
+
struct str * declarations; /* str storing variable declarations */
|
298
|
+
int next_label;
|
299
|
+
#ifndef DISABLE_PYTHON
|
300
|
+
int max_label;
|
301
|
+
#endif
|
302
|
+
int margin;
|
303
|
+
|
304
|
+
/* if > 0, keep_count to restore in case of a failure;
|
305
|
+
* if < 0, the negated keep_count for the limit to restore in case of
|
306
|
+
* failure. */
|
307
|
+
int failure_keep_count;
|
308
|
+
#if !defined(DISABLE_JAVA) && !defined(DISABLE_JS) && !defined(DISABLE_PYTHON) && !defined(DISABLE_CSHARP)
|
309
|
+
struct str * failure_str; /* This is used by some generators instead of failure_keep_count */
|
310
|
+
#endif
|
311
|
+
|
312
|
+
int label_used; /* Keep track of whether the failure label is used. */
|
313
|
+
int failure_label;
|
314
|
+
int debug_count;
|
315
|
+
int copy_from_count; /* count of calls to copy_from() */
|
316
|
+
|
317
|
+
const char * S[10]; /* strings */
|
318
|
+
symbol * B[10]; /* blocks */
|
319
|
+
int I[10]; /* integers */
|
320
|
+
struct name * V[5]; /* variables */
|
321
|
+
symbol * L[5]; /* literals, used in formatted write */
|
322
|
+
|
323
|
+
int line_count; /* counts number of lines output */
|
324
|
+
int line_labelled; /* in ISO C, will need extra ';' if it is a block end */
|
325
|
+
int literalstring_count;
|
326
|
+
int keep_count; /* used to number keep/restore pairs to avoid compiler warnings
|
327
|
+
about shadowed variables */
|
328
|
+
int temporary_used; /* track if temporary variable used (for Pascal) */
|
329
|
+
};
|
330
|
+
|
331
|
+
/* Special values for failure_label in struct generator. */
|
332
|
+
enum special_labels {
|
333
|
+
x_return = -1
|
334
|
+
};
|
335
|
+
|
336
|
+
struct options {
|
337
|
+
|
338
|
+
/* for the command line: */
|
339
|
+
|
340
|
+
const char * output_file;
|
341
|
+
char * name;
|
342
|
+
FILE * output_src;
|
343
|
+
FILE * output_h;
|
344
|
+
byte syntax_tree;
|
345
|
+
byte comments;
|
346
|
+
enc encoding;
|
347
|
+
enum { LANG_JAVA, LANG_C, LANG_CPLUSPLUS, LANG_CSHARP, LANG_PASCAL, LANG_PYTHON, LANG_JAVASCRIPT, LANG_RUST, LANG_GO, LANG_ADA } make_lang;
|
348
|
+
const char * externals_prefix;
|
349
|
+
const char * variables_prefix;
|
350
|
+
const char * runtime_path;
|
351
|
+
const char * parent_class_name;
|
352
|
+
const char * package;
|
353
|
+
const char * go_snowball_runtime;
|
354
|
+
const char * string_class;
|
355
|
+
const char * among_class;
|
356
|
+
struct include * includes;
|
357
|
+
struct include * includes_end;
|
358
|
+
};
|
359
|
+
|
360
|
+
/* Generator functions common to several backends. */
|
361
|
+
|
362
|
+
extern struct generator * create_generator(struct analyser * a, struct options * o);
|
363
|
+
extern void close_generator(struct generator * g);
|
364
|
+
|
365
|
+
extern void write_char(struct generator * g, int ch);
|
366
|
+
extern void write_newline(struct generator * g);
|
367
|
+
extern void write_string(struct generator * g, const char * s);
|
368
|
+
extern void write_int(struct generator * g, int i);
|
369
|
+
extern void write_symbol(struct generator * g, symbol s);
|
370
|
+
extern void write_b(struct generator * g, symbol * b);
|
371
|
+
extern void write_str(struct generator * g, struct str * str);
|
372
|
+
|
373
|
+
extern void write_comment_content(struct generator * g, struct node * p);
|
374
|
+
extern void write_generated_comment_content(struct generator * g);
|
375
|
+
extern void write_start_comment(struct generator * g,
|
376
|
+
const char * comment_start,
|
377
|
+
const char * comment_end);
|
378
|
+
|
379
|
+
extern int K_needed(struct generator * g, struct node * p);
|
380
|
+
extern int repeat_restore(struct generator * g, struct node * p);
|
381
|
+
|
382
|
+
/* Generator for C code. */
|
383
|
+
extern void generate_program_c(struct generator * g);
|
384
|
+
|
385
|
+
#ifndef DISABLE_JAVA
|
386
|
+
/* Generator for Java code. */
|
387
|
+
extern void generate_program_java(struct generator * g);
|
388
|
+
#endif
|
389
|
+
|
390
|
+
#ifndef DISABLE_CSHARP
|
391
|
+
/* Generator for C# code. */
|
392
|
+
extern void generate_program_csharp(struct generator * g);
|
393
|
+
#endif
|
394
|
+
|
395
|
+
#ifndef DISABLE_PASCAL
|
396
|
+
extern void generate_program_pascal(struct generator * g);
|
397
|
+
#endif
|
398
|
+
|
399
|
+
#ifndef DISABLE_PYTHON
|
400
|
+
/* Generator for Python code. */
|
401
|
+
extern void generate_program_python(struct generator * g);
|
402
|
+
#endif
|
403
|
+
|
404
|
+
#ifndef DISABLE_JS
|
405
|
+
extern void generate_program_js(struct generator * g);
|
406
|
+
#endif
|
407
|
+
|
408
|
+
#ifndef DISABLE_RUST
|
409
|
+
extern void generate_program_rust(struct generator * g);
|
410
|
+
#endif
|
411
|
+
|
412
|
+
#ifndef DISABLE_GO
|
413
|
+
extern void generate_program_go(struct generator * g);
|
414
|
+
#endif
|
415
|
+
|
416
|
+
#ifndef DISABLE_ADA
|
417
|
+
extern void generate_program_ada(struct generator * g);
|
418
|
+
#endif
|
@@ -0,0 +1,286 @@
|
|
1
|
+
|
2
|
+
#include <stdio.h> /* for printf */
|
3
|
+
#include <stdlib.h> /* malloc, free */
|
4
|
+
#include <string.h> /* memmove */
|
5
|
+
|
6
|
+
#include "header.h"
|
7
|
+
|
8
|
+
#define HEAD 2*sizeof(int)
|
9
|
+
#define EXTENDER 40
|
10
|
+
|
11
|
+
|
12
|
+
/* This modules provides a simple mechanism for arbitrary length writable
|
13
|
+
strings, called 'blocks'. They are 'symbol *' items rather than 'char *'
|
14
|
+
items however.
|
15
|
+
|
16
|
+
The calls are:
|
17
|
+
|
18
|
+
symbol * b = create_b(n);
|
19
|
+
- create an empty block b with room for n symbols
|
20
|
+
b = increase_capacity(b, n);
|
21
|
+
- increase the capacity of block b by n symbols (b may change)
|
22
|
+
b2 = copy_b(b)
|
23
|
+
- copy block b into b2
|
24
|
+
lose_b(b);
|
25
|
+
- lose block b
|
26
|
+
b = move_to_b(b, n, p);
|
27
|
+
- set the data in b to be the n symbols at address p
|
28
|
+
b = add_to_b(b, n, p);
|
29
|
+
- add the n symbols at address p to the end of the data in b
|
30
|
+
SIZE(b)
|
31
|
+
- is the number of symbols in b
|
32
|
+
For example:
|
33
|
+
|
34
|
+
symbol * b = create_b(0);
|
35
|
+
{ int i;
|
36
|
+
char p[10];
|
37
|
+
for (i = 0; i < 100; i++) {
|
38
|
+
sprintf(p, " %d", i);
|
39
|
+
add_s_to_b(b, p);
|
40
|
+
}
|
41
|
+
}
|
42
|
+
|
43
|
+
and b contains " 0 1 2 ... 99" spaced out as symbols.
|
44
|
+
*/
|
45
|
+
|
46
|
+
/* For a block b, SIZE(b) is the number of symbols so far written into it,
|
47
|
+
CAPACITY(b) the total number it can contain, so SIZE(b) <= CAPACITY(b).
|
48
|
+
In fact blocks have 1 extra character over the promised capacity so
|
49
|
+
they can be zero terminated by 'b[SIZE(b)] = 0;' without fear of
|
50
|
+
overwriting.
|
51
|
+
*/
|
52
|
+
|
53
|
+
extern symbol * create_b(int n) {
|
54
|
+
symbol * p = (symbol *) (HEAD + (char *) MALLOC(HEAD + (n + 1) * sizeof(symbol)));
|
55
|
+
CAPACITY(p) = n;
|
56
|
+
SIZE(p) = 0;
|
57
|
+
return p;
|
58
|
+
}
|
59
|
+
|
60
|
+
extern void report_b(FILE * out, const symbol * p) {
|
61
|
+
int i;
|
62
|
+
for (i = 0; i < SIZE(p); i++) {
|
63
|
+
if (p[i] > 255) {
|
64
|
+
printf("In report_b, can't convert p[%d] to char because it's 0x%02x\n", i, (int)p[i]);
|
65
|
+
exit(1);
|
66
|
+
}
|
67
|
+
putc(p[i], out);
|
68
|
+
}
|
69
|
+
}
|
70
|
+
|
71
|
+
extern void output_str(FILE * outfile, struct str * str) {
|
72
|
+
report_b(outfile, str_data(str));
|
73
|
+
}
|
74
|
+
|
75
|
+
extern void lose_b(symbol * p) {
|
76
|
+
if (p == 0) return;
|
77
|
+
FREE((char *) p - HEAD);
|
78
|
+
}
|
79
|
+
|
80
|
+
extern symbol * increase_capacity(symbol * p, int n) {
|
81
|
+
symbol * q = create_b(CAPACITY(p) + n + EXTENDER);
|
82
|
+
memmove(q, p, CAPACITY(p) * sizeof(symbol));
|
83
|
+
SIZE(q) = SIZE(p);
|
84
|
+
lose_b(p); return q;
|
85
|
+
}
|
86
|
+
|
87
|
+
extern symbol * move_to_b(symbol * p, int n, const symbol * q) {
|
88
|
+
int x = n - CAPACITY(p);
|
89
|
+
if (x > 0) p = increase_capacity(p, x);
|
90
|
+
memmove(p, q, n * sizeof(symbol)); SIZE(p) = n; return p;
|
91
|
+
}
|
92
|
+
|
93
|
+
extern symbol * add_to_b(symbol * p, int n, const symbol * q) {
|
94
|
+
int x = SIZE(p) + n - CAPACITY(p);
|
95
|
+
if (x > 0) p = increase_capacity(p, x);
|
96
|
+
memmove(p + SIZE(p), q, n * sizeof(symbol)); SIZE(p) += n; return p;
|
97
|
+
}
|
98
|
+
|
99
|
+
extern symbol * copy_b(const symbol * p) {
|
100
|
+
int n = SIZE(p);
|
101
|
+
symbol * q = create_b(n);
|
102
|
+
move_to_b(q, n, p);
|
103
|
+
return q;
|
104
|
+
}
|
105
|
+
|
106
|
+
int space_count = 0;
|
107
|
+
|
108
|
+
extern void * check_malloc(int n) {
|
109
|
+
space_count++;
|
110
|
+
return malloc(n);
|
111
|
+
}
|
112
|
+
|
113
|
+
extern void check_free(void * p) {
|
114
|
+
space_count--;
|
115
|
+
free(p);
|
116
|
+
}
|
117
|
+
|
118
|
+
/* To convert a block to a zero terminated string: */
|
119
|
+
|
120
|
+
extern char * b_to_s(const symbol * p) {
|
121
|
+
int n = SIZE(p);
|
122
|
+
char * s = (char *)malloc(n + 1);
|
123
|
+
{
|
124
|
+
int i;
|
125
|
+
for (i = 0; i < n; i++) {
|
126
|
+
if (p[i] > 255) {
|
127
|
+
printf("In b_to_s, can't convert p[%d] to char because it's 0x%02x\n", i, (int)p[i]);
|
128
|
+
exit(1);
|
129
|
+
}
|
130
|
+
s[i] = (char)p[i];
|
131
|
+
}
|
132
|
+
}
|
133
|
+
s[n] = 0;
|
134
|
+
return s;
|
135
|
+
}
|
136
|
+
|
137
|
+
/* To add a zero terminated string to a block. If p = 0 the
|
138
|
+
block is created. */
|
139
|
+
|
140
|
+
extern symbol * add_s_to_b(symbol * p, const char * s) {
|
141
|
+
int n = strlen(s);
|
142
|
+
int k;
|
143
|
+
if (p == 0) p = create_b(n);
|
144
|
+
k = SIZE(p);
|
145
|
+
{
|
146
|
+
int x = k + n - CAPACITY(p);
|
147
|
+
if (x > 0) p = increase_capacity(p, x);
|
148
|
+
}
|
149
|
+
{
|
150
|
+
int i;
|
151
|
+
for (i = 0; i < n; i++) p[i + k] = s[i];
|
152
|
+
}
|
153
|
+
SIZE(p) += n;
|
154
|
+
return p;
|
155
|
+
}
|
156
|
+
|
157
|
+
/* The next section defines string handling capabilities in terms
|
158
|
+
of the lower level block handling capabilities of space.c */
|
159
|
+
/* -------------------------------------------------------------*/
|
160
|
+
|
161
|
+
struct str {
|
162
|
+
symbol * data;
|
163
|
+
};
|
164
|
+
|
165
|
+
/* Create a new string. */
|
166
|
+
extern struct str * str_new(void) {
|
167
|
+
|
168
|
+
struct str * output = (struct str *) malloc(sizeof(struct str));
|
169
|
+
output->data = create_b(0);
|
170
|
+
return output;
|
171
|
+
}
|
172
|
+
|
173
|
+
/* Delete a string. */
|
174
|
+
extern void str_delete(struct str * str) {
|
175
|
+
|
176
|
+
lose_b(str->data);
|
177
|
+
free(str);
|
178
|
+
}
|
179
|
+
|
180
|
+
/* Append a str to this str. */
|
181
|
+
extern void str_append(struct str * str, const struct str * add) {
|
182
|
+
|
183
|
+
symbol * q = add->data;
|
184
|
+
str->data = add_to_b(str->data, SIZE(q), q);
|
185
|
+
}
|
186
|
+
|
187
|
+
/* Append a character to this str. */
|
188
|
+
extern void str_append_ch(struct str * str, char add) {
|
189
|
+
|
190
|
+
symbol sym = (unsigned char)add;
|
191
|
+
str->data = add_to_b(str->data, 1, &sym);
|
192
|
+
}
|
193
|
+
|
194
|
+
/* Append a low level block to a str. */
|
195
|
+
extern void str_append_b(struct str * str, const symbol * q) {
|
196
|
+
|
197
|
+
str->data = add_to_b(str->data, SIZE(q), q);
|
198
|
+
}
|
199
|
+
|
200
|
+
/* Append the tail of a low level block to a str. */
|
201
|
+
extern void str_append_b_tail(struct str * str, const symbol * q, int skip) {
|
202
|
+
if (skip < 0 || skip >= SIZE(q)) return;
|
203
|
+
|
204
|
+
str->data = add_to_b(str->data, SIZE(q) - skip, q + skip);
|
205
|
+
}
|
206
|
+
|
207
|
+
/* Append a (char *, null terminated) string to a str. */
|
208
|
+
extern void str_append_string(struct str * str, const char * s) {
|
209
|
+
|
210
|
+
str->data = add_s_to_b(str->data, s);
|
211
|
+
}
|
212
|
+
|
213
|
+
/* Append an integer to a str. */
|
214
|
+
extern void str_append_int(struct str * str, int i) {
|
215
|
+
|
216
|
+
char s[30];
|
217
|
+
sprintf(s, "%d", i);
|
218
|
+
str_append_string(str, s);
|
219
|
+
}
|
220
|
+
|
221
|
+
/* Clear a string */
|
222
|
+
extern void str_clear(struct str * str) {
|
223
|
+
|
224
|
+
SIZE(str->data) = 0;
|
225
|
+
}
|
226
|
+
|
227
|
+
/* Set a string */
|
228
|
+
extern void str_assign(struct str * str, const char * s) {
|
229
|
+
|
230
|
+
str_clear(str);
|
231
|
+
str_append_string(str, s);
|
232
|
+
}
|
233
|
+
|
234
|
+
/* Copy a string. */
|
235
|
+
extern struct str * str_copy(const struct str * old) {
|
236
|
+
|
237
|
+
struct str * newstr = str_new();
|
238
|
+
str_append(newstr, old);
|
239
|
+
return newstr;
|
240
|
+
}
|
241
|
+
|
242
|
+
/* Get the data stored in this str. */
|
243
|
+
extern symbol * str_data(const struct str * str) {
|
244
|
+
|
245
|
+
return str->data;
|
246
|
+
}
|
247
|
+
|
248
|
+
/* Get the length of the str. */
|
249
|
+
extern int str_len(const struct str * str) {
|
250
|
+
|
251
|
+
return SIZE(str->data);
|
252
|
+
}
|
253
|
+
|
254
|
+
/* Get the last character of the str.
|
255
|
+
*
|
256
|
+
* Or -1 if the string is empty.
|
257
|
+
*/
|
258
|
+
extern int str_back(const struct str *str) {
|
259
|
+
return SIZE(str->data) ? str->data[SIZE(str->data) - 1] : -1;
|
260
|
+
}
|
261
|
+
|
262
|
+
extern int get_utf8(const symbol * p, int * slot) {
|
263
|
+
int b0, b1;
|
264
|
+
b0 = *p++;
|
265
|
+
if (b0 < 0xC0) { /* 1100 0000 */
|
266
|
+
* slot = b0; return 1;
|
267
|
+
}
|
268
|
+
b1 = *p++;
|
269
|
+
if (b0 < 0xE0) { /* 1110 0000 */
|
270
|
+
* slot = (b0 & 0x1F) << 6 | (b1 & 0x3F); return 2;
|
271
|
+
}
|
272
|
+
* slot = (b0 & 0xF) << 12 | (b1 & 0x3F) << 6 | (*p & 0x3F); return 3;
|
273
|
+
}
|
274
|
+
|
275
|
+
extern int put_utf8(int ch, symbol * p) {
|
276
|
+
if (ch < 0x80) {
|
277
|
+
p[0] = ch; return 1;
|
278
|
+
}
|
279
|
+
if (ch < 0x800) {
|
280
|
+
p[0] = (ch >> 6) | 0xC0;
|
281
|
+
p[1] = (ch & 0x3F) | 0x80; return 2;
|
282
|
+
}
|
283
|
+
p[0] = (ch >> 12) | 0xE0;
|
284
|
+
p[1] = ((ch >> 6) & 0x3F) | 0x80;
|
285
|
+
p[2] = (ch & 0x3F) | 0x80; return 3;
|
286
|
+
}
|