mittens 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (137) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/Gemfile +7 -0
  4. data/LICENSE.txt +30 -0
  5. data/README.md +62 -0
  6. data/Rakefile +21 -0
  7. data/ext/mittens/ext.c +96 -0
  8. data/ext/mittens/extconf.rb +12 -0
  9. data/lib/mittens/version.rb +3 -0
  10. data/lib/mittens.rb +7 -0
  11. data/mittens.gemspec +22 -0
  12. data/vendor/snowball/.gitignore +26 -0
  13. data/vendor/snowball/.travis.yml +112 -0
  14. data/vendor/snowball/AUTHORS +27 -0
  15. data/vendor/snowball/CONTRIBUTING.rst +216 -0
  16. data/vendor/snowball/COPYING +29 -0
  17. data/vendor/snowball/GNUmakefile +742 -0
  18. data/vendor/snowball/NEWS +754 -0
  19. data/vendor/snowball/README.rst +37 -0
  20. data/vendor/snowball/ada/README.md +74 -0
  21. data/vendor/snowball/ada/generate/generate.adb +83 -0
  22. data/vendor/snowball/ada/generate.gpr +21 -0
  23. data/vendor/snowball/ada/src/stemmer.adb +620 -0
  24. data/vendor/snowball/ada/src/stemmer.ads +219 -0
  25. data/vendor/snowball/ada/src/stemwords.adb +70 -0
  26. data/vendor/snowball/ada/stemmer_config.gpr +83 -0
  27. data/vendor/snowball/ada/stemwords.gpr +21 -0
  28. data/vendor/snowball/algorithms/arabic.sbl +558 -0
  29. data/vendor/snowball/algorithms/armenian.sbl +301 -0
  30. data/vendor/snowball/algorithms/basque.sbl +149 -0
  31. data/vendor/snowball/algorithms/catalan.sbl +202 -0
  32. data/vendor/snowball/algorithms/danish.sbl +93 -0
  33. data/vendor/snowball/algorithms/dutch.sbl +164 -0
  34. data/vendor/snowball/algorithms/english.sbl +229 -0
  35. data/vendor/snowball/algorithms/finnish.sbl +197 -0
  36. data/vendor/snowball/algorithms/french.sbl +254 -0
  37. data/vendor/snowball/algorithms/german.sbl +139 -0
  38. data/vendor/snowball/algorithms/german2.sbl +145 -0
  39. data/vendor/snowball/algorithms/greek.sbl +701 -0
  40. data/vendor/snowball/algorithms/hindi.sbl +323 -0
  41. data/vendor/snowball/algorithms/hungarian.sbl +241 -0
  42. data/vendor/snowball/algorithms/indonesian.sbl +192 -0
  43. data/vendor/snowball/algorithms/irish.sbl +149 -0
  44. data/vendor/snowball/algorithms/italian.sbl +202 -0
  45. data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +240 -0
  46. data/vendor/snowball/algorithms/lithuanian.sbl +373 -0
  47. data/vendor/snowball/algorithms/lovins.sbl +208 -0
  48. data/vendor/snowball/algorithms/nepali.sbl +92 -0
  49. data/vendor/snowball/algorithms/norwegian.sbl +80 -0
  50. data/vendor/snowball/algorithms/porter.sbl +139 -0
  51. data/vendor/snowball/algorithms/portuguese.sbl +218 -0
  52. data/vendor/snowball/algorithms/romanian.sbl +236 -0
  53. data/vendor/snowball/algorithms/russian.sbl +221 -0
  54. data/vendor/snowball/algorithms/serbian.sbl +2379 -0
  55. data/vendor/snowball/algorithms/spanish.sbl +230 -0
  56. data/vendor/snowball/algorithms/swedish.sbl +72 -0
  57. data/vendor/snowball/algorithms/tamil.sbl +405 -0
  58. data/vendor/snowball/algorithms/turkish.sbl +470 -0
  59. data/vendor/snowball/algorithms/yiddish.sbl +460 -0
  60. data/vendor/snowball/charsets/ISO-8859-2.sbl +98 -0
  61. data/vendor/snowball/charsets/KOI8-R.sbl +74 -0
  62. data/vendor/snowball/charsets/cp850.sbl +130 -0
  63. data/vendor/snowball/compiler/analyser.c +1547 -0
  64. data/vendor/snowball/compiler/driver.c +615 -0
  65. data/vendor/snowball/compiler/generator.c +1748 -0
  66. data/vendor/snowball/compiler/generator_ada.c +1702 -0
  67. data/vendor/snowball/compiler/generator_csharp.c +1322 -0
  68. data/vendor/snowball/compiler/generator_go.c +1278 -0
  69. data/vendor/snowball/compiler/generator_java.c +1313 -0
  70. data/vendor/snowball/compiler/generator_js.c +1316 -0
  71. data/vendor/snowball/compiler/generator_pascal.c +1387 -0
  72. data/vendor/snowball/compiler/generator_python.c +1337 -0
  73. data/vendor/snowball/compiler/generator_rust.c +1295 -0
  74. data/vendor/snowball/compiler/header.h +418 -0
  75. data/vendor/snowball/compiler/space.c +286 -0
  76. data/vendor/snowball/compiler/syswords.h +86 -0
  77. data/vendor/snowball/compiler/syswords2.h +13 -0
  78. data/vendor/snowball/compiler/tokeniser.c +567 -0
  79. data/vendor/snowball/csharp/.gitignore +8 -0
  80. data/vendor/snowball/csharp/Snowball/Algorithms/.gitignore +1 -0
  81. data/vendor/snowball/csharp/Snowball/Among.cs +108 -0
  82. data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +36 -0
  83. data/vendor/snowball/csharp/Snowball/Stemmer.cs +660 -0
  84. data/vendor/snowball/csharp/Stemwords/App.config +6 -0
  85. data/vendor/snowball/csharp/Stemwords/Program.cs +114 -0
  86. data/vendor/snowball/doc/TODO +12 -0
  87. data/vendor/snowball/doc/libstemmer_c_README +148 -0
  88. data/vendor/snowball/doc/libstemmer_csharp_README +53 -0
  89. data/vendor/snowball/doc/libstemmer_java_README +67 -0
  90. data/vendor/snowball/doc/libstemmer_js_README +48 -0
  91. data/vendor/snowball/doc/libstemmer_python_README +113 -0
  92. data/vendor/snowball/examples/stemwords.c +204 -0
  93. data/vendor/snowball/go/README.md +55 -0
  94. data/vendor/snowball/go/among.go +16 -0
  95. data/vendor/snowball/go/env.go +403 -0
  96. data/vendor/snowball/go/stemwords/generate.go +68 -0
  97. data/vendor/snowball/go/stemwords/main.go +68 -0
  98. data/vendor/snowball/go/util.go +34 -0
  99. data/vendor/snowball/iconv.py +50 -0
  100. data/vendor/snowball/include/libstemmer.h +78 -0
  101. data/vendor/snowball/java/org/tartarus/snowball/Among.java +29 -0
  102. data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +381 -0
  103. data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +8 -0
  104. data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +75 -0
  105. data/vendor/snowball/javascript/base-stemmer.js +294 -0
  106. data/vendor/snowball/javascript/stemwords.js +106 -0
  107. data/vendor/snowball/libstemmer/libstemmer_c.in +96 -0
  108. data/vendor/snowball/libstemmer/mkalgorithms.pl +90 -0
  109. data/vendor/snowball/libstemmer/mkmodules.pl +267 -0
  110. data/vendor/snowball/libstemmer/modules.txt +63 -0
  111. data/vendor/snowball/libstemmer/test.c +34 -0
  112. data/vendor/snowball/pascal/.gitignore +4 -0
  113. data/vendor/snowball/pascal/SnowballProgram.pas +430 -0
  114. data/vendor/snowball/pascal/generate.pl +23 -0
  115. data/vendor/snowball/pascal/stemwords-template.dpr +78 -0
  116. data/vendor/snowball/python/MANIFEST.in +7 -0
  117. data/vendor/snowball/python/create_init.py +54 -0
  118. data/vendor/snowball/python/setup.cfg +6 -0
  119. data/vendor/snowball/python/setup.py +81 -0
  120. data/vendor/snowball/python/snowballstemmer/among.py +13 -0
  121. data/vendor/snowball/python/snowballstemmer/basestemmer.py +323 -0
  122. data/vendor/snowball/python/stemwords.py +101 -0
  123. data/vendor/snowball/python/testapp.py +28 -0
  124. data/vendor/snowball/runtime/api.c +58 -0
  125. data/vendor/snowball/runtime/api.h +32 -0
  126. data/vendor/snowball/runtime/header.h +61 -0
  127. data/vendor/snowball/runtime/utilities.c +513 -0
  128. data/vendor/snowball/rust/Cargo.toml +7 -0
  129. data/vendor/snowball/rust/build.rs +55 -0
  130. data/vendor/snowball/rust/rust-pre-1.27-compat.patch +30 -0
  131. data/vendor/snowball/rust/src/main.rs +102 -0
  132. data/vendor/snowball/rust/src/snowball/algorithms/mod.rs +2 -0
  133. data/vendor/snowball/rust/src/snowball/among.rs +6 -0
  134. data/vendor/snowball/rust/src/snowball/mod.rs +6 -0
  135. data/vendor/snowball/rust/src/snowball/snowball_env.rs +421 -0
  136. data/vendor/snowball/tests/stemtest.c +95 -0
  137. metadata +178 -0
@@ -0,0 +1,418 @@
1
+ #include <stdio.h>
2
+
3
+ #define SNOWBALL_VERSION "2.2.0"
4
+
5
+ typedef unsigned char byte;
6
+ typedef unsigned short symbol;
7
+
8
+ #define true 1
9
+ #define false 0
10
+
11
+ #define MALLOC check_malloc
12
+ #define FREE check_free
13
+
14
+ #define NEW(type, p) struct type * p = (struct type *) MALLOC(sizeof(struct type))
15
+ #define NEWVEC(type, p, n) struct type * p = (struct type *) MALLOC(sizeof(struct type) * (n))
16
+
17
+ #define SIZE(p) ((int *)(p))[-1]
18
+ #define CAPACITY(p) ((int *)(p))[-2]
19
+
20
+ extern symbol * create_b(int n);
21
+ extern void report_b(FILE * out, const symbol * p);
22
+ extern void lose_b(symbol * p);
23
+ extern symbol * increase_capacity(symbol * p, int n);
24
+ extern symbol * move_to_b(symbol * p, int n, const symbol * q);
25
+ extern symbol * add_to_b(symbol * p, int n, const symbol * q);
26
+ extern symbol * copy_b(const symbol * p);
27
+ extern char * b_to_s(const symbol * p);
28
+ extern symbol * add_s_to_b(symbol * p, const char * s);
29
+
30
+ #define MOVE_TO_B(B, LIT) \
31
+ move_to_b(B, sizeof(LIT) / sizeof(LIT[0]), LIT)
32
+
33
+ struct str; /* defined in space.c */
34
+
35
+ extern struct str * str_new(void);
36
+ extern void str_delete(struct str * str);
37
+ extern void str_append(struct str * str, const struct str * add);
38
+ extern void str_append_ch(struct str * str, char add);
39
+ extern void str_append_symbol(struct str * str, symbol add);
40
+ extern void str_append_b(struct str * str, const symbol * q);
41
+ extern void str_append_b_tail(struct str * str, const symbol * q, int skip);
42
+ extern void str_append_string(struct str * str, const char * s);
43
+ extern void str_append_int(struct str * str, int i);
44
+ extern void str_clear(struct str * str);
45
+ extern void str_assign(struct str * str, const char * s);
46
+ extern struct str * str_copy(const struct str * old);
47
+ extern symbol * str_data(const struct str * str);
48
+ extern int str_len(const struct str * str);
49
+ extern int str_back(const struct str *str);
50
+ extern int get_utf8(const symbol * p, int * slot);
51
+ extern int put_utf8(int ch, symbol * p);
52
+ extern void output_str(FILE * outfile, struct str * str);
53
+
54
+ typedef enum { ENC_SINGLEBYTE, ENC_UTF8, ENC_WIDECHARS } enc;
55
+
56
+ struct m_pair {
57
+
58
+ struct m_pair * next;
59
+ symbol * name;
60
+ symbol * value;
61
+
62
+ };
63
+
64
+ /* struct input must be a prefix of struct tokeniser. */
65
+ struct input {
66
+
67
+ struct input * next;
68
+ symbol * p;
69
+ int c;
70
+ char * file;
71
+ int file_needs_freeing;
72
+ int line_number;
73
+
74
+ };
75
+
76
+ struct include {
77
+
78
+ struct include * next;
79
+ symbol * b;
80
+
81
+ };
82
+
83
+ enum token_codes {
84
+
85
+ #include "syswords2.h"
86
+
87
+ c_mathassign,
88
+ c_name,
89
+ c_number,
90
+ c_literalstring,
91
+ c_neg,
92
+ c_call,
93
+ c_grouping,
94
+ c_booltest,
95
+
96
+ NUM_TOKEN_CODES
97
+ };
98
+
99
+ enum uplus_modes {
100
+ UPLUS_NONE,
101
+ UPLUS_DEFINED,
102
+ UPLUS_UNICODE
103
+ };
104
+
105
+ /* struct input must be a prefix of struct tokeniser. */
106
+ struct tokeniser {
107
+
108
+ struct input * next;
109
+ symbol * p;
110
+ int c;
111
+ char * file;
112
+ int file_needs_freeing;
113
+ int line_number;
114
+ symbol * b;
115
+ symbol * b2;
116
+ int number;
117
+ int m_start;
118
+ int m_end;
119
+ struct m_pair * m_pairs;
120
+ int get_depth;
121
+ int error_count;
122
+ int token;
123
+ int previous_token;
124
+ byte token_held;
125
+ enc encoding;
126
+
127
+ int omission;
128
+ struct include * includes;
129
+
130
+ /* Mode in which U+ has been used:
131
+ * UPLUS_NONE - not used yet
132
+ * UPLUS_DEFINED - stringdef U+xxxx ....
133
+ * UPLUS_UNICODE - {U+xxxx} used with implicit meaning
134
+ */
135
+ int uplusmode;
136
+
137
+ char token_disabled[NUM_TOKEN_CODES];
138
+ };
139
+
140
+ extern symbol * get_input(const char * filename);
141
+ extern struct tokeniser * create_tokeniser(symbol * b, char * file);
142
+ extern int read_token(struct tokeniser * t);
143
+ extern const char * name_of_token(int code);
144
+ extern void disable_token(struct tokeniser * t, int code);
145
+ extern void close_tokeniser(struct tokeniser * t);
146
+
147
+ extern int space_count;
148
+ extern void * check_malloc(int n);
149
+ extern void check_free(void * p);
150
+
151
+ struct node;
152
+
153
+ struct name {
154
+
155
+ struct name * next;
156
+ symbol * b;
157
+ int type; /* t_string etc */
158
+ int mode; /* )_ for routines, externals */
159
+ struct node * definition; /* ) */
160
+ int count; /* 0, 1, 2 for each type */
161
+ struct grouping * grouping; /* for grouping names */
162
+ byte referenced;
163
+ byte used_in_among; /* Function used in among? */
164
+ byte value_used; /* (For variables) is its value ever used? */
165
+ byte initialised; /* (For variables) is it ever initialised? */
166
+ byte used_in_definition; /* (grouping) used in grouping definition? */
167
+ struct node * used; /* First use, or NULL if not used */
168
+ struct name * local_to; /* Local to one routine/external */
169
+ int declaration_line_number;/* Line number of declaration */
170
+
171
+ };
172
+
173
+ struct literalstring {
174
+
175
+ struct literalstring * next;
176
+ symbol * b;
177
+
178
+ };
179
+
180
+ struct amongvec {
181
+
182
+ symbol * b; /* the string giving the case */
183
+ int size; /* - and its size */
184
+ struct node * action; /* the corresponding action */
185
+ int i; /* the amongvec index of the longest substring of b */
186
+ int result; /* the numeric result for the case */
187
+ int line_number; /* for diagnostics and stable sorting */
188
+ struct name * function;
189
+
190
+ };
191
+
192
+ struct among {
193
+
194
+ struct among * next;
195
+ struct amongvec * b; /* pointer to the amongvec */
196
+ int number; /* amongs are numbered 0, 1, 2 ... */
197
+ int literalstring_count; /* in this among */
198
+ int command_count; /* in this among (includes "no command" entries) */
199
+ int nocommand_count; /* number of "no command" entries in this among */
200
+ int function_count; /* in this among */
201
+ int amongvar_needed; /* do we need to set among_var? */
202
+ struct node * starter; /* i.e. among( (starter) 'string' ... ) */
203
+ struct node * substring; /* i.e. substring ... among ( ... ) */
204
+ struct node ** commands; /* array with command_count entries */
205
+ };
206
+
207
+ struct grouping {
208
+
209
+ struct grouping * next;
210
+ symbol * b; /* the characters of this group */
211
+ int largest_ch; /* character with max code */
212
+ int smallest_ch; /* character with min code */
213
+ struct name * name; /* so g->name->grouping == g */
214
+ int line_number;
215
+ };
216
+
217
+ struct node {
218
+
219
+ struct node * next;
220
+ struct node * left;
221
+ struct node * aux; /* used in setlimit */
222
+ struct among * among; /* used in among */
223
+ struct node * right;
224
+ int type;
225
+ int mode;
226
+ struct node * AE;
227
+ struct name * name;
228
+ symbol * literalstring;
229
+ int number;
230
+ int line_number;
231
+ int amongvar_needed; /* used in routine definitions */
232
+ };
233
+
234
+ enum name_types {
235
+
236
+ t_size = 6,
237
+
238
+ t_string = 0, t_boolean = 1, t_integer = 2, t_routine = 3, t_external = 4,
239
+ t_grouping = 5
240
+
241
+ /* If this list is extended, adjust wvn in generator.c */
242
+ };
243
+
244
+ /* In name_count[i] below, remember that
245
+ type is
246
+ ----+----
247
+ 0 | string
248
+ 1 | boolean
249
+ 2 | integer
250
+ 3 | routine
251
+ 4 | external
252
+ 5 | grouping
253
+ */
254
+
255
+ struct analyser {
256
+
257
+ struct tokeniser * tokeniser;
258
+ struct node * nodes;
259
+ struct name * names;
260
+ struct literalstring * literalstrings;
261
+ int mode;
262
+ byte modifyable; /* false inside reverse(...) */
263
+ struct node * program;
264
+ struct node * program_end;
265
+ int name_count[t_size]; /* name_count[i] counts the number of names of type i */
266
+ struct among * amongs;
267
+ struct among * amongs_end;
268
+ int among_count;
269
+ int amongvar_needed; /* used in reading routine definitions */
270
+ struct grouping * groupings;
271
+ struct grouping * groupings_end;
272
+ struct node * substring; /* pending 'substring' in current routine definition */
273
+ enc encoding;
274
+ byte int_limits_used; /* are maxint or minint used? */
275
+ };
276
+
277
+ enum analyser_modes {
278
+
279
+ m_forward = 0, m_backward /*, m_integer */
280
+
281
+ };
282
+
283
+ extern void print_program(struct analyser * a);
284
+ extern struct analyser * create_analyser(struct tokeniser * t);
285
+ extern void close_analyser(struct analyser * a);
286
+
287
+ extern void read_program(struct analyser * a);
288
+
289
+ struct generator {
290
+
291
+ struct analyser * analyser;
292
+ struct options * options;
293
+ int unreachable; /* 0 if code can be reached, 1 if current code
294
+ * is unreachable. */
295
+ int var_number; /* Number of next variable to use. */
296
+ struct str * outbuf; /* temporary str to store output */
297
+ struct str * declarations; /* str storing variable declarations */
298
+ int next_label;
299
+ #ifndef DISABLE_PYTHON
300
+ int max_label;
301
+ #endif
302
+ int margin;
303
+
304
+ /* if > 0, keep_count to restore in case of a failure;
305
+ * if < 0, the negated keep_count for the limit to restore in case of
306
+ * failure. */
307
+ int failure_keep_count;
308
+ #if !defined(DISABLE_JAVA) && !defined(DISABLE_JS) && !defined(DISABLE_PYTHON) && !defined(DISABLE_CSHARP)
309
+ struct str * failure_str; /* This is used by some generators instead of failure_keep_count */
310
+ #endif
311
+
312
+ int label_used; /* Keep track of whether the failure label is used. */
313
+ int failure_label;
314
+ int debug_count;
315
+ int copy_from_count; /* count of calls to copy_from() */
316
+
317
+ const char * S[10]; /* strings */
318
+ symbol * B[10]; /* blocks */
319
+ int I[10]; /* integers */
320
+ struct name * V[5]; /* variables */
321
+ symbol * L[5]; /* literals, used in formatted write */
322
+
323
+ int line_count; /* counts number of lines output */
324
+ int line_labelled; /* in ISO C, will need extra ';' if it is a block end */
325
+ int literalstring_count;
326
+ int keep_count; /* used to number keep/restore pairs to avoid compiler warnings
327
+ about shadowed variables */
328
+ int temporary_used; /* track if temporary variable used (for Pascal) */
329
+ };
330
+
331
+ /* Special values for failure_label in struct generator. */
332
+ enum special_labels {
333
+ x_return = -1
334
+ };
335
+
336
+ struct options {
337
+
338
+ /* for the command line: */
339
+
340
+ const char * output_file;
341
+ char * name;
342
+ FILE * output_src;
343
+ FILE * output_h;
344
+ byte syntax_tree;
345
+ byte comments;
346
+ enc encoding;
347
+ enum { LANG_JAVA, LANG_C, LANG_CPLUSPLUS, LANG_CSHARP, LANG_PASCAL, LANG_PYTHON, LANG_JAVASCRIPT, LANG_RUST, LANG_GO, LANG_ADA } make_lang;
348
+ const char * externals_prefix;
349
+ const char * variables_prefix;
350
+ const char * runtime_path;
351
+ const char * parent_class_name;
352
+ const char * package;
353
+ const char * go_snowball_runtime;
354
+ const char * string_class;
355
+ const char * among_class;
356
+ struct include * includes;
357
+ struct include * includes_end;
358
+ };
359
+
360
+ /* Generator functions common to several backends. */
361
+
362
+ extern struct generator * create_generator(struct analyser * a, struct options * o);
363
+ extern void close_generator(struct generator * g);
364
+
365
+ extern void write_char(struct generator * g, int ch);
366
+ extern void write_newline(struct generator * g);
367
+ extern void write_string(struct generator * g, const char * s);
368
+ extern void write_int(struct generator * g, int i);
369
+ extern void write_symbol(struct generator * g, symbol s);
370
+ extern void write_b(struct generator * g, symbol * b);
371
+ extern void write_str(struct generator * g, struct str * str);
372
+
373
+ extern void write_comment_content(struct generator * g, struct node * p);
374
+ extern void write_generated_comment_content(struct generator * g);
375
+ extern void write_start_comment(struct generator * g,
376
+ const char * comment_start,
377
+ const char * comment_end);
378
+
379
+ extern int K_needed(struct generator * g, struct node * p);
380
+ extern int repeat_restore(struct generator * g, struct node * p);
381
+
382
+ /* Generator for C code. */
383
+ extern void generate_program_c(struct generator * g);
384
+
385
+ #ifndef DISABLE_JAVA
386
+ /* Generator for Java code. */
387
+ extern void generate_program_java(struct generator * g);
388
+ #endif
389
+
390
+ #ifndef DISABLE_CSHARP
391
+ /* Generator for C# code. */
392
+ extern void generate_program_csharp(struct generator * g);
393
+ #endif
394
+
395
+ #ifndef DISABLE_PASCAL
396
+ extern void generate_program_pascal(struct generator * g);
397
+ #endif
398
+
399
+ #ifndef DISABLE_PYTHON
400
+ /* Generator for Python code. */
401
+ extern void generate_program_python(struct generator * g);
402
+ #endif
403
+
404
+ #ifndef DISABLE_JS
405
+ extern void generate_program_js(struct generator * g);
406
+ #endif
407
+
408
+ #ifndef DISABLE_RUST
409
+ extern void generate_program_rust(struct generator * g);
410
+ #endif
411
+
412
+ #ifndef DISABLE_GO
413
+ extern void generate_program_go(struct generator * g);
414
+ #endif
415
+
416
+ #ifndef DISABLE_ADA
417
+ extern void generate_program_ada(struct generator * g);
418
+ #endif
@@ -0,0 +1,286 @@
1
+
2
+ #include <stdio.h> /* for printf */
3
+ #include <stdlib.h> /* malloc, free */
4
+ #include <string.h> /* memmove */
5
+
6
+ #include "header.h"
7
+
8
+ #define HEAD 2*sizeof(int)
9
+ #define EXTENDER 40
10
+
11
+
12
+ /* This modules provides a simple mechanism for arbitrary length writable
13
+ strings, called 'blocks'. They are 'symbol *' items rather than 'char *'
14
+ items however.
15
+
16
+ The calls are:
17
+
18
+ symbol * b = create_b(n);
19
+ - create an empty block b with room for n symbols
20
+ b = increase_capacity(b, n);
21
+ - increase the capacity of block b by n symbols (b may change)
22
+ b2 = copy_b(b)
23
+ - copy block b into b2
24
+ lose_b(b);
25
+ - lose block b
26
+ b = move_to_b(b, n, p);
27
+ - set the data in b to be the n symbols at address p
28
+ b = add_to_b(b, n, p);
29
+ - add the n symbols at address p to the end of the data in b
30
+ SIZE(b)
31
+ - is the number of symbols in b
32
+ For example:
33
+
34
+ symbol * b = create_b(0);
35
+ { int i;
36
+ char p[10];
37
+ for (i = 0; i < 100; i++) {
38
+ sprintf(p, " %d", i);
39
+ add_s_to_b(b, p);
40
+ }
41
+ }
42
+
43
+ and b contains " 0 1 2 ... 99" spaced out as symbols.
44
+ */
45
+
46
+ /* For a block b, SIZE(b) is the number of symbols so far written into it,
47
+ CAPACITY(b) the total number it can contain, so SIZE(b) <= CAPACITY(b).
48
+ In fact blocks have 1 extra character over the promised capacity so
49
+ they can be zero terminated by 'b[SIZE(b)] = 0;' without fear of
50
+ overwriting.
51
+ */
52
+
53
+ extern symbol * create_b(int n) {
54
+ symbol * p = (symbol *) (HEAD + (char *) MALLOC(HEAD + (n + 1) * sizeof(symbol)));
55
+ CAPACITY(p) = n;
56
+ SIZE(p) = 0;
57
+ return p;
58
+ }
59
+
60
+ extern void report_b(FILE * out, const symbol * p) {
61
+ int i;
62
+ for (i = 0; i < SIZE(p); i++) {
63
+ if (p[i] > 255) {
64
+ printf("In report_b, can't convert p[%d] to char because it's 0x%02x\n", i, (int)p[i]);
65
+ exit(1);
66
+ }
67
+ putc(p[i], out);
68
+ }
69
+ }
70
+
71
+ extern void output_str(FILE * outfile, struct str * str) {
72
+ report_b(outfile, str_data(str));
73
+ }
74
+
75
+ extern void lose_b(symbol * p) {
76
+ if (p == 0) return;
77
+ FREE((char *) p - HEAD);
78
+ }
79
+
80
+ extern symbol * increase_capacity(symbol * p, int n) {
81
+ symbol * q = create_b(CAPACITY(p) + n + EXTENDER);
82
+ memmove(q, p, CAPACITY(p) * sizeof(symbol));
83
+ SIZE(q) = SIZE(p);
84
+ lose_b(p); return q;
85
+ }
86
+
87
+ extern symbol * move_to_b(symbol * p, int n, const symbol * q) {
88
+ int x = n - CAPACITY(p);
89
+ if (x > 0) p = increase_capacity(p, x);
90
+ memmove(p, q, n * sizeof(symbol)); SIZE(p) = n; return p;
91
+ }
92
+
93
+ extern symbol * add_to_b(symbol * p, int n, const symbol * q) {
94
+ int x = SIZE(p) + n - CAPACITY(p);
95
+ if (x > 0) p = increase_capacity(p, x);
96
+ memmove(p + SIZE(p), q, n * sizeof(symbol)); SIZE(p) += n; return p;
97
+ }
98
+
99
+ extern symbol * copy_b(const symbol * p) {
100
+ int n = SIZE(p);
101
+ symbol * q = create_b(n);
102
+ move_to_b(q, n, p);
103
+ return q;
104
+ }
105
+
106
+ int space_count = 0;
107
+
108
+ extern void * check_malloc(int n) {
109
+ space_count++;
110
+ return malloc(n);
111
+ }
112
+
113
+ extern void check_free(void * p) {
114
+ space_count--;
115
+ free(p);
116
+ }
117
+
118
+ /* To convert a block to a zero terminated string: */
119
+
120
+ extern char * b_to_s(const symbol * p) {
121
+ int n = SIZE(p);
122
+ char * s = (char *)malloc(n + 1);
123
+ {
124
+ int i;
125
+ for (i = 0; i < n; i++) {
126
+ if (p[i] > 255) {
127
+ printf("In b_to_s, can't convert p[%d] to char because it's 0x%02x\n", i, (int)p[i]);
128
+ exit(1);
129
+ }
130
+ s[i] = (char)p[i];
131
+ }
132
+ }
133
+ s[n] = 0;
134
+ return s;
135
+ }
136
+
137
+ /* To add a zero terminated string to a block. If p = 0 the
138
+ block is created. */
139
+
140
+ extern symbol * add_s_to_b(symbol * p, const char * s) {
141
+ int n = strlen(s);
142
+ int k;
143
+ if (p == 0) p = create_b(n);
144
+ k = SIZE(p);
145
+ {
146
+ int x = k + n - CAPACITY(p);
147
+ if (x > 0) p = increase_capacity(p, x);
148
+ }
149
+ {
150
+ int i;
151
+ for (i = 0; i < n; i++) p[i + k] = s[i];
152
+ }
153
+ SIZE(p) += n;
154
+ return p;
155
+ }
156
+
157
+ /* The next section defines string handling capabilities in terms
158
+ of the lower level block handling capabilities of space.c */
159
+ /* -------------------------------------------------------------*/
160
+
161
+ struct str {
162
+ symbol * data;
163
+ };
164
+
165
+ /* Create a new string. */
166
+ extern struct str * str_new(void) {
167
+
168
+ struct str * output = (struct str *) malloc(sizeof(struct str));
169
+ output->data = create_b(0);
170
+ return output;
171
+ }
172
+
173
+ /* Delete a string. */
174
+ extern void str_delete(struct str * str) {
175
+
176
+ lose_b(str->data);
177
+ free(str);
178
+ }
179
+
180
+ /* Append a str to this str. */
181
+ extern void str_append(struct str * str, const struct str * add) {
182
+
183
+ symbol * q = add->data;
184
+ str->data = add_to_b(str->data, SIZE(q), q);
185
+ }
186
+
187
+ /* Append a character to this str. */
188
+ extern void str_append_ch(struct str * str, char add) {
189
+
190
+ symbol sym = (unsigned char)add;
191
+ str->data = add_to_b(str->data, 1, &sym);
192
+ }
193
+
194
+ /* Append a low level block to a str. */
195
+ extern void str_append_b(struct str * str, const symbol * q) {
196
+
197
+ str->data = add_to_b(str->data, SIZE(q), q);
198
+ }
199
+
200
+ /* Append the tail of a low level block to a str. */
201
+ extern void str_append_b_tail(struct str * str, const symbol * q, int skip) {
202
+ if (skip < 0 || skip >= SIZE(q)) return;
203
+
204
+ str->data = add_to_b(str->data, SIZE(q) - skip, q + skip);
205
+ }
206
+
207
+ /* Append a (char *, null terminated) string to a str. */
208
+ extern void str_append_string(struct str * str, const char * s) {
209
+
210
+ str->data = add_s_to_b(str->data, s);
211
+ }
212
+
213
+ /* Append an integer to a str. */
214
+ extern void str_append_int(struct str * str, int i) {
215
+
216
+ char s[30];
217
+ sprintf(s, "%d", i);
218
+ str_append_string(str, s);
219
+ }
220
+
221
+ /* Clear a string */
222
+ extern void str_clear(struct str * str) {
223
+
224
+ SIZE(str->data) = 0;
225
+ }
226
+
227
+ /* Set a string */
228
+ extern void str_assign(struct str * str, const char * s) {
229
+
230
+ str_clear(str);
231
+ str_append_string(str, s);
232
+ }
233
+
234
+ /* Copy a string. */
235
+ extern struct str * str_copy(const struct str * old) {
236
+
237
+ struct str * newstr = str_new();
238
+ str_append(newstr, old);
239
+ return newstr;
240
+ }
241
+
242
+ /* Get the data stored in this str. */
243
+ extern symbol * str_data(const struct str * str) {
244
+
245
+ return str->data;
246
+ }
247
+
248
+ /* Get the length of the str. */
249
+ extern int str_len(const struct str * str) {
250
+
251
+ return SIZE(str->data);
252
+ }
253
+
254
+ /* Get the last character of the str.
255
+ *
256
+ * Or -1 if the string is empty.
257
+ */
258
+ extern int str_back(const struct str *str) {
259
+ return SIZE(str->data) ? str->data[SIZE(str->data) - 1] : -1;
260
+ }
261
+
262
+ extern int get_utf8(const symbol * p, int * slot) {
263
+ int b0, b1;
264
+ b0 = *p++;
265
+ if (b0 < 0xC0) { /* 1100 0000 */
266
+ * slot = b0; return 1;
267
+ }
268
+ b1 = *p++;
269
+ if (b0 < 0xE0) { /* 1110 0000 */
270
+ * slot = (b0 & 0x1F) << 6 | (b1 & 0x3F); return 2;
271
+ }
272
+ * slot = (b0 & 0xF) << 12 | (b1 & 0x3F) << 6 | (*p & 0x3F); return 3;
273
+ }
274
+
275
+ extern int put_utf8(int ch, symbol * p) {
276
+ if (ch < 0x80) {
277
+ p[0] = ch; return 1;
278
+ }
279
+ if (ch < 0x800) {
280
+ p[0] = (ch >> 6) | 0xC0;
281
+ p[1] = (ch & 0x3F) | 0x80; return 2;
282
+ }
283
+ p[0] = (ch >> 12) | 0xE0;
284
+ p[1] = ((ch >> 6) & 0x3F) | 0x80;
285
+ p[2] = (ch & 0x3F) | 0x80; return 3;
286
+ }