chinwag 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Rakefile +35 -0
- data/ext/chinwag/chinwag.c +246 -0
- data/ext/chinwag/chinwag.h +71 -0
- data/ext/chinwag/config.c +28 -0
- data/ext/chinwag/config.h +42 -0
- data/ext/chinwag/dict.c +670 -0
- data/ext/chinwag/dict.h +50 -0
- data/ext/chinwag/extconf.rb +11 -0
- data/ext/chinwag/generator.c +541 -0
- data/ext/chinwag/generator.h +25 -0
- data/ext/chinwag/ingredient.h +69 -0
- data/ext/chinwag/latin.c +4 -0
- data/ext/chinwag/latin.h +9 -0
- data/ext/chinwag/rb_chinwag_ext.c +792 -0
- data/ext/chinwag/rb_chinwag_ext.h +23 -0
- data/ext/chinwag/seuss.c +4 -0
- data/ext/chinwag/seuss.h +9 -0
- data/ext/chinwag/tokenize.c +53 -0
- data/ext/chinwag/tokenize.h +9 -0
- data/ext/chinwag/utility.c +143 -0
- data/ext/chinwag/utility.h +20 -0
- data/lib/chinwag.rb +5 -0
- metadata +70 -0
@@ -0,0 +1,792 @@
|
|
1
|
+
#include "rb_chinwag_ext.h"
|
2
|
+
|
3
|
+
VALUE m_chinwag;
|
4
|
+
VALUE c_cw_dict;
|
5
|
+
|
6
|
+
VALUE default_dict;
|
7
|
+
VALUE default_output_type;
|
8
|
+
VALUE default_min_output;
|
9
|
+
VALUE default_max_output;
|
10
|
+
|
11
|
+
cw_t get_cw_t_for_symbol(VALUE symbol)
|
12
|
+
{
|
13
|
+
cw_t cw_type = CW_DEFAULT;
|
14
|
+
|
15
|
+
Check_Type(symbol, T_SYMBOL);
|
16
|
+
|
17
|
+
VALUE id = rb_to_id(symbol);
|
18
|
+
|
19
|
+
if(strcmp(rb_id2name(id), "letters") == 0) cw_type = CW_LETTERS;
|
20
|
+
else if(strcmp(rb_id2name(id), "words") == 0) cw_type = CW_WORDS;
|
21
|
+
else if(strcmp(rb_id2name(id), "sentences") == 0)cw_type=CW_SENTENCES;
|
22
|
+
else if(strcmp(rb_id2name(id), "paragraphs") == 0)cw_type=CW_PARAGRAPHS;
|
23
|
+
else
|
24
|
+
{
|
25
|
+
rb_raise(rb_eTypeError, "invalid type (expected :letters, :words, "
|
26
|
+
":sentences, or :paragraphs)");
|
27
|
+
}
|
28
|
+
|
29
|
+
return cw_type;
|
30
|
+
}
|
31
|
+
|
32
|
+
VALUE m_cw_generate(int argc, VALUE* argv, VALUE obj)
|
33
|
+
{
|
34
|
+
dict_t* d;
|
35
|
+
VALUE result;
|
36
|
+
cw_t cw_type = get_cw_t_for_symbol(default_output_type);
|
37
|
+
long min = NUM2LONG(default_min_output);
|
38
|
+
long max = NUM2LONG(default_max_output);
|
39
|
+
char* output, *e;
|
40
|
+
|
41
|
+
// raise exception if passed wrong number of arguments
|
42
|
+
if(argc > 4)
|
43
|
+
{
|
44
|
+
rb_raise(rb_eArgError, "wrong number of arguments (0..4)");
|
45
|
+
}
|
46
|
+
|
47
|
+
// do stuff with 'em
|
48
|
+
if(argc == 0) Data_Get_Struct(default_dict, dict_t, d);
|
49
|
+
|
50
|
+
if(argc >= 1) Data_Get_Struct(argv[0], dict_t, d);
|
51
|
+
|
52
|
+
if(argc >= 2) cw_type = get_cw_t_for_symbol(argv[1]);
|
53
|
+
|
54
|
+
if(argc >= 3)
|
55
|
+
{
|
56
|
+
Check_Type(argv[2], T_FIXNUM);
|
57
|
+
long temp_a = FIX2LONG(argv[2]);
|
58
|
+
min = temp_a; max = temp_a;
|
59
|
+
}
|
60
|
+
|
61
|
+
if(argc == 4)
|
62
|
+
{
|
63
|
+
Check_Type(argv[3], T_FIXNUM);
|
64
|
+
max = FIX2LONG(argv[3]);
|
65
|
+
}
|
66
|
+
|
67
|
+
if(max < min)
|
68
|
+
{
|
69
|
+
rb_raise(rb_eException,"upper threshold must be more than lower "
|
70
|
+
"threshold (min : %lu, max : %lu)", min, max);
|
71
|
+
}
|
72
|
+
|
73
|
+
if(min < 1 || max > 10000)
|
74
|
+
{
|
75
|
+
rb_raise(rb_eRangeError,"out of range (1..10000)");
|
76
|
+
}
|
77
|
+
|
78
|
+
if(!dict_valid(*d, &e))
|
79
|
+
{
|
80
|
+
rb_raise(rb_eException, "%s", e);
|
81
|
+
free(e);
|
82
|
+
}
|
83
|
+
|
84
|
+
switch(cw_type)
|
85
|
+
{
|
86
|
+
case CW_LETTERS:
|
87
|
+
output = chinwag(cw_type, min, max, *d);
|
88
|
+
break;
|
89
|
+
case CW_WORDS:
|
90
|
+
output = chinwag(cw_type, min, max, *d);
|
91
|
+
break;
|
92
|
+
case CW_SENTENCES:
|
93
|
+
output = chinwag(cw_type, min, max, *d);
|
94
|
+
break;
|
95
|
+
case CW_PARAGRAPHS:
|
96
|
+
output = chinwag(cw_type, min, max, *d);
|
97
|
+
break;
|
98
|
+
default:
|
99
|
+
rb_raise(rb_eTypeError, "invalid type (expected LETTERS, WORDS, "
|
100
|
+
"SENTENCES, or PARAGRAPHS)");
|
101
|
+
break;
|
102
|
+
}
|
103
|
+
|
104
|
+
result = rb_str_new2(output);
|
105
|
+
// free(output);
|
106
|
+
|
107
|
+
return result;
|
108
|
+
}
|
109
|
+
|
110
|
+
VALUE c_cw_dict_clone(VALUE obj);
|
111
|
+
VALUE c_cw_dict_close(VALUE obj);
|
112
|
+
VALUE m_set_d_dict(VALUE obj, VALUE new)
|
113
|
+
{
|
114
|
+
VALUE original = c_cw_dict_clone(default_dict);
|
115
|
+
|
116
|
+
default_dict = c_cw_dict_close(default_dict);
|
117
|
+
default_dict = c_cw_dict_clone(new);
|
118
|
+
|
119
|
+
return original;
|
120
|
+
}
|
121
|
+
|
122
|
+
VALUE m_set_d_type(VALUE obj, VALUE sym)
|
123
|
+
{
|
124
|
+
VALUE original = default_output_type;
|
125
|
+
|
126
|
+
Check_Type(sym, T_SYMBOL);
|
127
|
+
|
128
|
+
VALUE id = rb_to_id(sym);
|
129
|
+
|
130
|
+
if(strcmp(rb_id2name(id), "letters") == 0) default_output_type = sym;
|
131
|
+
else if(strcmp(rb_id2name(id), "words") == 0) default_output_type = sym;
|
132
|
+
else if(strcmp(rb_id2name(id), "sentences") == 0)default_output_type=sym;
|
133
|
+
else if(strcmp(rb_id2name(id), "paragraphs") == 0)default_output_type=sym;
|
134
|
+
else
|
135
|
+
{
|
136
|
+
rb_raise(rb_eTypeError, "invalid type (expected :letters, :words, "
|
137
|
+
":sentences, or :paragraphs)");
|
138
|
+
}
|
139
|
+
|
140
|
+
return original;
|
141
|
+
}
|
142
|
+
|
143
|
+
VALUE m_s_min(VALUE obj, VALUE num)
|
144
|
+
{
|
145
|
+
VALUE original = default_min_output;
|
146
|
+
|
147
|
+
Check_Type(num, T_FIXNUM);
|
148
|
+
|
149
|
+
default_min_output = num;
|
150
|
+
|
151
|
+
long min = NUM2LONG(default_min_output);
|
152
|
+
long max = NUM2LONG(default_max_output);
|
153
|
+
|
154
|
+
if(max < min)
|
155
|
+
{
|
156
|
+
rb_raise(rb_eException,"upper threshold must be more than lower "
|
157
|
+
"threshold (min : %lu, max : %lu)", min, max);
|
158
|
+
}
|
159
|
+
|
160
|
+
if(min < 1 || max > 10000)
|
161
|
+
{
|
162
|
+
rb_raise(rb_eRangeError,"out of range (1..10000)");
|
163
|
+
}
|
164
|
+
|
165
|
+
return original;
|
166
|
+
}
|
167
|
+
|
168
|
+
VALUE m_s_max(VALUE obj, VALUE num)
|
169
|
+
{
|
170
|
+
VALUE original = default_max_output;
|
171
|
+
|
172
|
+
Check_Type(num, T_FIXNUM);
|
173
|
+
|
174
|
+
default_max_output = num;
|
175
|
+
|
176
|
+
long min = NUM2LONG(default_min_output);
|
177
|
+
long max = NUM2LONG(default_max_output);
|
178
|
+
|
179
|
+
if(max < min)
|
180
|
+
{
|
181
|
+
rb_raise(rb_eException,"upper threshold must be more than lower "
|
182
|
+
"threshold (min : %lu, max : %lu)", min, max);
|
183
|
+
}
|
184
|
+
|
185
|
+
if(min < 1 || max > 10000)
|
186
|
+
{
|
187
|
+
rb_raise(rb_eRangeError,"out of range (1..10000)");
|
188
|
+
}
|
189
|
+
|
190
|
+
return original;
|
191
|
+
}
|
192
|
+
|
193
|
+
static void c_cw_dict_free(void* dict)
|
194
|
+
{
|
195
|
+
dict_t* d = (dict_t*)dict;
|
196
|
+
if(d->drows && d->count > 0) close_dict(*d);
|
197
|
+
}
|
198
|
+
|
199
|
+
VALUE c_cw_dict_open(int argc, VALUE* argv, VALUE obj)
|
200
|
+
{
|
201
|
+
VALUE file_pathname;
|
202
|
+
FILE* file_ptr = NULL;
|
203
|
+
dict_t d = open_dict(), path_parts, file_parts;
|
204
|
+
char* tkns_ptr = NULL; char* name_ptr = NULL;
|
205
|
+
char* path_name = NULL; char* file_name = NULL;
|
206
|
+
char* name = NULL; bool used_file = false; char* file_buffer = NULL;
|
207
|
+
long last_drow = 0, last_word = 0; size_t len = 0;
|
208
|
+
|
209
|
+
// raise exception if passed wrong number of arguments
|
210
|
+
if(argc > 2)
|
211
|
+
rb_raise(rb_eArgError, "wrong number of arguments (expected 0..2)");
|
212
|
+
|
213
|
+
if(argc == 1)
|
214
|
+
{
|
215
|
+
switch(TYPE(argv[0]))
|
216
|
+
{
|
217
|
+
case T_FILE:
|
218
|
+
#ifdef HAVE_RUBY_IO_H
|
219
|
+
file_pathname = RFILE(argv[0])->fptr->pathv;
|
220
|
+
path_name = StringValueCStr(file_pathname);
|
221
|
+
file_ptr = rb_io_stdio_file(RFILE(argv[0])->fptr);
|
222
|
+
#else
|
223
|
+
path_name = RFILE(argv[0])->fptr->path;
|
224
|
+
file_ptr = RFILE(argv[0])->fptr->f;
|
225
|
+
#endif
|
226
|
+
|
227
|
+
if(include(path_name, "/") || include(path_name, "\\"))
|
228
|
+
{
|
229
|
+
path_parts = split(path_name, "/\\");
|
230
|
+
|
231
|
+
last_drow = path_parts.count - 1;
|
232
|
+
last_word = path_parts.drows[last_drow].count - 1;
|
233
|
+
len = strlen(path_parts.drows[last_drow].words[last_word]);
|
234
|
+
|
235
|
+
file_name = (char*)malloc(len + 1);
|
236
|
+
strcpy(file_name, path_parts.drows[last_drow].words[last_word]);
|
237
|
+
file_name[len] = '\0';
|
238
|
+
|
239
|
+
close_dict(path_parts);
|
240
|
+
}
|
241
|
+
else
|
242
|
+
{
|
243
|
+
file_name = (char*)malloc(strlen(path_name) + 1);
|
244
|
+
strcpy(file_name, path_name);
|
245
|
+
file_name[strlen(path_name)] = '\0';
|
246
|
+
}
|
247
|
+
|
248
|
+
if(include(file_name, "."))
|
249
|
+
{
|
250
|
+
file_parts = split(file_name, ".");
|
251
|
+
|
252
|
+
size_t len = strlen(file_parts.drows[0].words[0]);
|
253
|
+
|
254
|
+
name = (char*)malloc(len + 1);
|
255
|
+
strcpy(name, file_parts.drows[0].words[0]);
|
256
|
+
name[len] = '\0';
|
257
|
+
|
258
|
+
close_dict(file_parts);
|
259
|
+
}
|
260
|
+
else
|
261
|
+
{
|
262
|
+
name = (char*)malloc(strlen(file_name) + 1);
|
263
|
+
strcpy(name, file_name);
|
264
|
+
name[strlen(file_name)] = '\0';
|
265
|
+
}
|
266
|
+
|
267
|
+
used_file = true;
|
268
|
+
|
269
|
+
break;
|
270
|
+
case T_STRING:
|
271
|
+
tkns_ptr = StringValueCStr(argv[0]);
|
272
|
+
break;
|
273
|
+
default:
|
274
|
+
rb_raise(rb_eTypeError, "invalid type (String or File)");
|
275
|
+
break;
|
276
|
+
}
|
277
|
+
}
|
278
|
+
if(argc == 2)
|
279
|
+
{
|
280
|
+
Check_Type(argv[1], T_STRING);
|
281
|
+
name_ptr = StringValueCStr(argv[1]);
|
282
|
+
}
|
283
|
+
|
284
|
+
// check if tkns references existing, embedded dictionary...
|
285
|
+
if(!tkns_ptr && !name_ptr && !used_file) d = open_dict();
|
286
|
+
else if(tkns_ptr && !used_file)
|
287
|
+
{
|
288
|
+
if(strcmp(tkns_ptr, "seussian") == 0)
|
289
|
+
{
|
290
|
+
if(!name_ptr)
|
291
|
+
d = open_dict_with_name_and_tokens("seussian",dict_seuss,DELIMITERS);
|
292
|
+
else
|
293
|
+
d = open_dict_with_name_and_tokens(name_ptr,dict_seuss,DELIMITERS);
|
294
|
+
}
|
295
|
+
else if(strcmp(tkns_ptr, "latin") == 0)
|
296
|
+
{
|
297
|
+
if(!name_ptr)
|
298
|
+
d = open_dict_with_name_and_tokens("latin", dict_latin,DELIMITERS);
|
299
|
+
else
|
300
|
+
d = open_dict_with_name_and_tokens(name_ptr, dict_latin,DELIMITERS);
|
301
|
+
}
|
302
|
+
}
|
303
|
+
// ...else, if just a name was passed...
|
304
|
+
else if(name_ptr && !used_file) d = open_dict_with_name(name_ptr);
|
305
|
+
// ...else, see if file exists by passed name...
|
306
|
+
else if(used_file && name && file_ptr)
|
307
|
+
{
|
308
|
+
U32 stringify_result = stringify_file(&file_buffer, file_ptr);
|
309
|
+
if(stringify_result == 0)
|
310
|
+
{
|
311
|
+
rb_raise(rb_eException, "unable to process passed file (%s)",
|
312
|
+
file_name);
|
313
|
+
}
|
314
|
+
|
315
|
+
d = open_dict_with_name_and_tokens(name, file_buffer, DELIMITERS);
|
316
|
+
|
317
|
+
free(file_buffer);
|
318
|
+
}
|
319
|
+
// ...else, return a blank dictionary
|
320
|
+
|
321
|
+
// create a dictionary pointer
|
322
|
+
dict_t* d_ptr = (dict_t*)malloc(sizeof(dict_t));
|
323
|
+
*d_ptr = d;
|
324
|
+
|
325
|
+
if(name) free(name);
|
326
|
+
if(file_name) free(file_name);
|
327
|
+
|
328
|
+
return Data_Wrap_Struct(c_cw_dict, 0, c_cw_dict_free, d_ptr);
|
329
|
+
}
|
330
|
+
|
331
|
+
VALUE c_cw_dict_close(VALUE obj)
|
332
|
+
{
|
333
|
+
dict_t* d;
|
334
|
+
dict_t empty;
|
335
|
+
|
336
|
+
// get original pointer from Ruby VM and close
|
337
|
+
Data_Get_Struct(obj, dict_t, d);
|
338
|
+
|
339
|
+
if(d->drows && d->count > 0) { *d = close_dict(*d); }
|
340
|
+
|
341
|
+
return obj;
|
342
|
+
}
|
343
|
+
|
344
|
+
VALUE c_cw_dict_name_g(VALUE obj)
|
345
|
+
{
|
346
|
+
dict_t* d;
|
347
|
+
|
348
|
+
// get original pointer from Ruby VM
|
349
|
+
Data_Get_Struct(obj, dict_t, d);
|
350
|
+
|
351
|
+
if(d->name && strlen(d->name) > 0) return rb_str_new2(d->name);
|
352
|
+
return rb_str_new2("");
|
353
|
+
}
|
354
|
+
|
355
|
+
VALUE c_cw_dict_name_s(VALUE obj, VALUE name)
|
356
|
+
{
|
357
|
+
dict_t* d; long len = RSTRING_LEN(name);
|
358
|
+
|
359
|
+
// get original pointer from Ruby VM
|
360
|
+
Data_Get_Struct(obj, dict_t, d);
|
361
|
+
|
362
|
+
d->name = (char*)malloc(len + 1);
|
363
|
+
strcpy(d->name, StringValueCStr(name));
|
364
|
+
d->name[len] = '\0';
|
365
|
+
|
366
|
+
return obj;
|
367
|
+
}
|
368
|
+
|
369
|
+
VALUE c_cw_dict_length(VALUE obj)
|
370
|
+
{
|
371
|
+
dict_t* d;
|
372
|
+
|
373
|
+
// get original pointer from Ruby VM
|
374
|
+
Data_Get_Struct(obj, dict_t, d);
|
375
|
+
|
376
|
+
if(d->count == 0) return INT2NUM(0);
|
377
|
+
return LONG2NUM(total_dict(*d));
|
378
|
+
}
|
379
|
+
|
380
|
+
VALUE c_cw_dict_join(int argc, VALUE* argv, VALUE obj)
|
381
|
+
{
|
382
|
+
dict_t* d;
|
383
|
+
|
384
|
+
// raise exception if passed wrong number of arguments
|
385
|
+
if(argc > 1) rb_raise(rb_eArgError, "wrong number of arguments");
|
386
|
+
if(argc == 1) Check_Type(argv[0], T_STRING);
|
387
|
+
|
388
|
+
// get original pointer from Ruby VM
|
389
|
+
Data_Get_Struct(obj, dict_t, d);
|
390
|
+
|
391
|
+
if(argc == 0) return rb_str_new2(join_dict(*d, " "));
|
392
|
+
return rb_str_new2(join_dict(*d, StringValueCStr(argv[0])));
|
393
|
+
}
|
394
|
+
|
395
|
+
VALUE c_cw_dict_clone(VALUE obj)
|
396
|
+
{
|
397
|
+
dict_t* d, *new_p;
|
398
|
+
VALUE new;
|
399
|
+
|
400
|
+
// open new dict for return value
|
401
|
+
VALUE args[] = { rb_str_new2(""), rb_str_new2("") };
|
402
|
+
new = c_cw_dict_open(2, args, new);
|
403
|
+
|
404
|
+
// get original pointers from Ruby VM
|
405
|
+
Data_Get_Struct(obj, dict_t, d);
|
406
|
+
Data_Get_Struct(new, dict_t, new_p);
|
407
|
+
|
408
|
+
// get a copy of the original dictionary
|
409
|
+
*new_p = deep_copy_dict(*d);
|
410
|
+
|
411
|
+
return new;
|
412
|
+
}
|
413
|
+
|
414
|
+
VALUE c_cw_dict_sample(VALUE obj)
|
415
|
+
{
|
416
|
+
dict_t* d;
|
417
|
+
|
418
|
+
// get original pointer from Ruby VM
|
419
|
+
Data_Get_Struct(obj, dict_t, d);
|
420
|
+
|
421
|
+
return rb_str_new2(sample_dict(*d));
|
422
|
+
}
|
423
|
+
|
424
|
+
VALUE c_cw_dict_sort(VALUE obj)
|
425
|
+
{
|
426
|
+
dict_t* d;
|
427
|
+
VALUE new;
|
428
|
+
|
429
|
+
// get a new copy of the original dict
|
430
|
+
new = c_cw_dict_clone(obj);
|
431
|
+
|
432
|
+
// get original pointer from Ruby VM
|
433
|
+
Data_Get_Struct(new, dict_t, d);
|
434
|
+
|
435
|
+
*d = bubble_dict(*d);
|
436
|
+
|
437
|
+
return new;
|
438
|
+
}
|
439
|
+
|
440
|
+
VALUE c_cw_dict_prune(VALUE obj)
|
441
|
+
{
|
442
|
+
dict_t* d;
|
443
|
+
VALUE new;
|
444
|
+
|
445
|
+
// get a new copy of the original dict
|
446
|
+
new = c_cw_dict_clone(obj);
|
447
|
+
|
448
|
+
// get original pointer from Ruby VM
|
449
|
+
Data_Get_Struct(new, dict_t, d);
|
450
|
+
|
451
|
+
*d = prune_dict(*d, false);
|
452
|
+
|
453
|
+
return new;
|
454
|
+
}
|
455
|
+
|
456
|
+
VALUE c_cw_dict_clean(VALUE obj)
|
457
|
+
{
|
458
|
+
dict_t* d;
|
459
|
+
VALUE new;
|
460
|
+
|
461
|
+
// get a new copy of the original dict
|
462
|
+
new = c_cw_dict_clone(obj);
|
463
|
+
|
464
|
+
// get original pointer from Ruby VM
|
465
|
+
Data_Get_Struct(new, dict_t, d);
|
466
|
+
|
467
|
+
*d = prune_dict(*d, true);
|
468
|
+
|
469
|
+
return new;
|
470
|
+
}
|
471
|
+
|
472
|
+
VALUE c_cw_dict_sort_s(VALUE obj)
|
473
|
+
{
|
474
|
+
dict_t* d;
|
475
|
+
|
476
|
+
// get original pointer from Ruby VM
|
477
|
+
Data_Get_Struct(obj, dict_t, d);
|
478
|
+
|
479
|
+
*d = bubble_dict(*d);
|
480
|
+
|
481
|
+
return obj;
|
482
|
+
}
|
483
|
+
|
484
|
+
VALUE c_cw_dict_prune_s(VALUE obj)
|
485
|
+
{
|
486
|
+
dict_t* d;
|
487
|
+
|
488
|
+
// get original pointer from Ruby VM
|
489
|
+
Data_Get_Struct(obj, dict_t, d);
|
490
|
+
|
491
|
+
*d = prune_dict(*d, false);
|
492
|
+
|
493
|
+
return obj;
|
494
|
+
}
|
495
|
+
|
496
|
+
VALUE c_cw_dict_clean_s(VALUE obj)
|
497
|
+
{
|
498
|
+
dict_t* d;
|
499
|
+
|
500
|
+
// get original pointer from Ruby VM
|
501
|
+
Data_Get_Struct(obj, dict_t, d);
|
502
|
+
|
503
|
+
*d = prune_dict(*d, true);
|
504
|
+
|
505
|
+
return obj;
|
506
|
+
}
|
507
|
+
|
508
|
+
VALUE c_cw_dict_validate_s(VALUE obj)
|
509
|
+
{
|
510
|
+
char* e;
|
511
|
+
dict_t* d;
|
512
|
+
|
513
|
+
// get original pointer from Ruby VM
|
514
|
+
Data_Get_Struct(obj, dict_t, d);
|
515
|
+
|
516
|
+
// handle invalid state first (for error handling's sake)
|
517
|
+
if(!dict_valid(*d, &e))
|
518
|
+
{
|
519
|
+
rb_raise(rb_eException, "%s", e);
|
520
|
+
free(e);
|
521
|
+
}
|
522
|
+
|
523
|
+
return obj;
|
524
|
+
}
|
525
|
+
|
526
|
+
VALUE c_cw_dict_named_q(VALUE obj)
|
527
|
+
{
|
528
|
+
dict_t* d;
|
529
|
+
|
530
|
+
// get original pointer from Ruby VM
|
531
|
+
Data_Get_Struct(obj, dict_t, d);
|
532
|
+
|
533
|
+
if(d->name && strlen(d->name) > 0) return Qtrue;
|
534
|
+
return Qfalse;
|
535
|
+
}
|
536
|
+
|
537
|
+
VALUE c_cw_dict_valid_q(VALUE obj)
|
538
|
+
{
|
539
|
+
char* e;
|
540
|
+
dict_t* d;
|
541
|
+
|
542
|
+
// get original pointer from Ruby VM
|
543
|
+
Data_Get_Struct(obj, dict_t, d);
|
544
|
+
|
545
|
+
// handle invalid state first (for error handling's sake)
|
546
|
+
if(!dict_valid(*d, &e))
|
547
|
+
{
|
548
|
+
rb_raise(rb_eException, "%s", e);
|
549
|
+
free(e);
|
550
|
+
|
551
|
+
return Qfalse;
|
552
|
+
}
|
553
|
+
|
554
|
+
return Qtrue;
|
555
|
+
}
|
556
|
+
|
557
|
+
VALUE c_cw_dict_sorted_q(VALUE obj)
|
558
|
+
{
|
559
|
+
dict_t* d;
|
560
|
+
|
561
|
+
// get original pointer from Ruby VM
|
562
|
+
Data_Get_Struct(obj, dict_t, d);
|
563
|
+
|
564
|
+
if(d->sorted) return Qtrue;
|
565
|
+
return Qfalse;
|
566
|
+
}
|
567
|
+
|
568
|
+
VALUE c_cw_dict_include_q(VALUE obj, VALUE string)
|
569
|
+
{
|
570
|
+
dict_t* d;
|
571
|
+
|
572
|
+
// get original pointer from Ruby VM
|
573
|
+
Data_Get_Struct(obj, dict_t, d);
|
574
|
+
|
575
|
+
if(dict_include(*d, StringValueCStr(string))) return Qtrue;
|
576
|
+
return Qfalse;
|
577
|
+
}
|
578
|
+
|
579
|
+
VALUE c_cw_dict_exclude_q(VALUE obj, VALUE string)
|
580
|
+
{
|
581
|
+
dict_t* d;
|
582
|
+
|
583
|
+
// get original pointer from Ruby VM
|
584
|
+
Data_Get_Struct(obj, dict_t, d);
|
585
|
+
|
586
|
+
if(dict_exclude(*d, StringValueCStr(string))) return Qtrue;
|
587
|
+
return Qfalse;
|
588
|
+
}
|
589
|
+
|
590
|
+
VALUE c_cw_dict_inspect(VALUE obj)
|
591
|
+
{
|
592
|
+
dict_t* dict; VALUE str;
|
593
|
+
size_t count = 0; int multiplier = 1; int word_len = 0;
|
594
|
+
char* result = (char*)malloc(LARGE_BUFFER * multiplier + 1);
|
595
|
+
|
596
|
+
// get original pointer from Ruby VM
|
597
|
+
Data_Get_Struct(obj, dict_t, dict);
|
598
|
+
|
599
|
+
// add opening delimiter
|
600
|
+
strcpy(result, "["); ++count;
|
601
|
+
|
602
|
+
// add internal dictionary row elements
|
603
|
+
for(unsigned long i = 0; i != dict->count; ++i)
|
604
|
+
{
|
605
|
+
// add opening row delimiter
|
606
|
+
strcat(result, "["); ++count;
|
607
|
+
|
608
|
+
for(unsigned long j = 0; j != dict->drows[i].count; ++j)
|
609
|
+
{
|
610
|
+
// get word length for count
|
611
|
+
word_len = strlen(dict->drows[i].words[j]);
|
612
|
+
|
613
|
+
// add word to resulting string
|
614
|
+
strcat(result, dict->drows[i].words[j]);
|
615
|
+
count += word_len;
|
616
|
+
|
617
|
+
// add continuation delimiter (if applicable)
|
618
|
+
if(j < dict->drows[i].count - 1){ strcat(result,", "); count += 2; }
|
619
|
+
|
620
|
+
// resize result if necessary
|
621
|
+
if(count >= LARGE_BUFFER * multiplier - 100)
|
622
|
+
{
|
623
|
+
// create temporary copy
|
624
|
+
char* temp = (char*)malloc(LARGE_BUFFER * multiplier + 1);
|
625
|
+
|
626
|
+
result[count] = '\0';
|
627
|
+
strcpy(temp, result);
|
628
|
+
temp[count] = '\0';
|
629
|
+
|
630
|
+
// increase buffer size
|
631
|
+
result = (char*)realloc(result,LARGE_BUFFER * ++multiplier + 1);
|
632
|
+
|
633
|
+
// move back into resulting string and clear intermediary buffer
|
634
|
+
strcpy(result, temp);
|
635
|
+
result[count] = '\0';
|
636
|
+
|
637
|
+
free(temp);
|
638
|
+
}
|
639
|
+
}
|
640
|
+
|
641
|
+
// add closing row delimiter
|
642
|
+
strcat(result, "]"); ++count;
|
643
|
+
|
644
|
+
// add continuation delimiter (if applicable)
|
645
|
+
if(i < dict->count - 1) { strcat(result, ", "); count += 2; }
|
646
|
+
}
|
647
|
+
|
648
|
+
// add closing delimiter
|
649
|
+
strcat(result, "]"); ++count;
|
650
|
+
|
651
|
+
// NULL terminator (just in case)
|
652
|
+
result[count] = '\0';
|
653
|
+
|
654
|
+
str = rb_str_new2(result);
|
655
|
+
free(result);
|
656
|
+
|
657
|
+
return str;
|
658
|
+
}
|
659
|
+
|
660
|
+
VALUE c_cw_dict_to_s(VALUE obj)
|
661
|
+
{
|
662
|
+
dict_t* dict;
|
663
|
+
size_t count = 0; int multiplier = 1;
|
664
|
+
|
665
|
+
// get original pointer from Ruby VM
|
666
|
+
Data_Get_Struct(obj, dict_t, dict);
|
667
|
+
|
668
|
+
// if(dict->drows && dict->count > 0) return Qnil;
|
669
|
+
return c_cw_dict_inspect(obj);
|
670
|
+
}
|
671
|
+
|
672
|
+
VALUE c_cw_dict_append_op(VALUE obj, VALUE addend)
|
673
|
+
{
|
674
|
+
dict_t* d;
|
675
|
+
|
676
|
+
// get original pointer from Ruby VM
|
677
|
+
Data_Get_Struct(obj, dict_t, d);
|
678
|
+
|
679
|
+
switch(TYPE(addend))
|
680
|
+
{
|
681
|
+
case T_STRING:
|
682
|
+
*d = place_word_in_dict_strict(*d, StringValueCStr(addend));
|
683
|
+
break;
|
684
|
+
case T_ARRAY:
|
685
|
+
for(long i = 0; i != RARRAY_LEN(addend); ++i)
|
686
|
+
{
|
687
|
+
if(TYPE(rb_ary_entry(addend, i)) != T_STRING)
|
688
|
+
{
|
689
|
+
rb_raise(rb_eTypeError,"not a valid value (passed Array can only"
|
690
|
+
" contain String elements)");
|
691
|
+
break;
|
692
|
+
}
|
693
|
+
|
694
|
+
VALUE entry = rb_ary_entry(addend, i);
|
695
|
+
char* entry_str = StringValueCStr(entry);
|
696
|
+
|
697
|
+
*d = place_word_in_dict_strict(*d, entry_str);
|
698
|
+
}
|
699
|
+
|
700
|
+
break;
|
701
|
+
default:
|
702
|
+
rb_raise(rb_eTypeError,"not a valid value (expected String or Array");
|
703
|
+
break;
|
704
|
+
}
|
705
|
+
|
706
|
+
if(d->sorted) return c_cw_dict_sort(obj);
|
707
|
+
return obj;
|
708
|
+
}
|
709
|
+
|
710
|
+
VALUE c_cw_dict_add_op(VALUE obj, VALUE addend)
|
711
|
+
{
|
712
|
+
VALUE new;
|
713
|
+
|
714
|
+
// get a clone of the original
|
715
|
+
new = c_cw_dict_clone(obj);
|
716
|
+
|
717
|
+
return c_cw_dict_append_op(new, addend);
|
718
|
+
}
|
719
|
+
|
720
|
+
VALUE c_cw_dict_add_assign_op(VALUE obj, VALUE addend)
|
721
|
+
{
|
722
|
+
return c_cw_dict_append_op(obj, addend);
|
723
|
+
}
|
724
|
+
|
725
|
+
void Init_chinwag()
|
726
|
+
{
|
727
|
+
// setup module extension and containing class(es)
|
728
|
+
m_chinwag = rb_define_module("Chinwag");
|
729
|
+
c_cw_dict = rb_define_class_under(m_chinwag, "CWDict", rb_cObject);
|
730
|
+
|
731
|
+
// sync up module generation functions
|
732
|
+
rb_define_module_function(m_chinwag, "generate", m_cw_generate, -1);
|
733
|
+
rb_define_module_function(m_chinwag, "set_default_dict", m_set_d_dict, 1);
|
734
|
+
rb_define_module_function(m_chinwag, "set_default_type", m_set_d_type, 1);
|
735
|
+
rb_define_module_function(m_chinwag, "set_default_min_output",m_s_min,1);
|
736
|
+
rb_define_module_function(m_chinwag, "set_default_max_output",m_s_max,1);
|
737
|
+
|
738
|
+
// sync up class methods
|
739
|
+
rb_define_singleton_method(c_cw_dict, "open", c_cw_dict_open, -1);
|
740
|
+
rb_define_method(c_cw_dict, "close", c_cw_dict_close, 0);
|
741
|
+
|
742
|
+
rb_define_method(c_cw_dict, "name", c_cw_dict_name_g, 0);
|
743
|
+
rb_define_method(c_cw_dict, "name=", c_cw_dict_name_s, 1);
|
744
|
+
rb_define_method(c_cw_dict, "length", c_cw_dict_length, 0);
|
745
|
+
|
746
|
+
rb_define_method(c_cw_dict, "join", c_cw_dict_join, -1);
|
747
|
+
rb_define_method(c_cw_dict, "clone", c_cw_dict_clone, 0);
|
748
|
+
rb_define_method(c_cw_dict, "sample", c_cw_dict_sample, 0);
|
749
|
+
|
750
|
+
rb_define_method(c_cw_dict, "sort", c_cw_dict_sort, 0);
|
751
|
+
rb_define_method(c_cw_dict, "prune", c_cw_dict_prune, 0);
|
752
|
+
rb_define_method(c_cw_dict, "clean", c_cw_dict_clean, 0);
|
753
|
+
|
754
|
+
rb_define_method(c_cw_dict, "sort!", c_cw_dict_sort_s, 0);
|
755
|
+
rb_define_method(c_cw_dict, "prune!", c_cw_dict_prune_s, 0);
|
756
|
+
rb_define_method(c_cw_dict, "clean!", c_cw_dict_clean_s, 0);
|
757
|
+
rb_define_method(c_cw_dict, "validate!", c_cw_dict_validate_s, 0);
|
758
|
+
|
759
|
+
rb_define_method(c_cw_dict, "named?", c_cw_dict_named_q, 0);
|
760
|
+
rb_define_method(c_cw_dict, "valid?", c_cw_dict_valid_q, 0);
|
761
|
+
rb_define_method(c_cw_dict, "sorted?", c_cw_dict_sorted_q, 0);
|
762
|
+
rb_define_method(c_cw_dict, "include?", c_cw_dict_include_q, 1);
|
763
|
+
rb_define_method(c_cw_dict, "exclude?", c_cw_dict_exclude_q, 1);
|
764
|
+
|
765
|
+
rb_define_method(c_cw_dict, "inspect", c_cw_dict_inspect, 0);
|
766
|
+
rb_define_method(c_cw_dict, "to_s", c_cw_dict_to_s, 0);
|
767
|
+
|
768
|
+
// operator methods
|
769
|
+
rb_define_method(c_cw_dict, "+", c_cw_dict_add_op, 1);
|
770
|
+
rb_define_method(c_cw_dict, "+=", c_cw_dict_add_assign_op, 1);
|
771
|
+
rb_define_method(c_cw_dict, "<<", c_cw_dict_append_op, 1);
|
772
|
+
// rb_define_method(c_cw_dict, "==", c_cw_dict_check_equality, 1);
|
773
|
+
// rb_define_method(c_cw_dict, "!=", c_cw_dict_check_inequality, 1);
|
774
|
+
|
775
|
+
// method aliases
|
776
|
+
rb_define_alias(c_cw_dict, "dup", "clone");
|
777
|
+
rb_define_alias(c_cw_dict, "size", "length");
|
778
|
+
rb_define_alias(c_cw_dict, "count", "length");
|
779
|
+
|
780
|
+
// open Seussian dict as default fall-back
|
781
|
+
VALUE args[] = { rb_str_new2("seussian") };
|
782
|
+
default_dict = c_cw_dict_open(1, args, default_dict);
|
783
|
+
|
784
|
+
// set default output type
|
785
|
+
default_output_type = ID2SYM(rb_intern("words"));
|
786
|
+
|
787
|
+
// set default minimum output amount
|
788
|
+
default_min_output = LONG2NUM(DEFAULT_MIN_OUTPUT_LENGTH);
|
789
|
+
|
790
|
+
// set default maximum output amount
|
791
|
+
default_max_output = LONG2NUM(DEFAULT_MAX_OUTPUT_LENGTH);
|
792
|
+
}
|