chinwag 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Rakefile +35 -0
- data/ext/chinwag/chinwag.c +246 -0
- data/ext/chinwag/chinwag.h +71 -0
- data/ext/chinwag/config.c +28 -0
- data/ext/chinwag/config.h +42 -0
- data/ext/chinwag/dict.c +670 -0
- data/ext/chinwag/dict.h +50 -0
- data/ext/chinwag/extconf.rb +11 -0
- data/ext/chinwag/generator.c +541 -0
- data/ext/chinwag/generator.h +25 -0
- data/ext/chinwag/ingredient.h +69 -0
- data/ext/chinwag/latin.c +4 -0
- data/ext/chinwag/latin.h +9 -0
- data/ext/chinwag/rb_chinwag_ext.c +792 -0
- data/ext/chinwag/rb_chinwag_ext.h +23 -0
- data/ext/chinwag/seuss.c +4 -0
- data/ext/chinwag/seuss.h +9 -0
- data/ext/chinwag/tokenize.c +53 -0
- data/ext/chinwag/tokenize.h +9 -0
- data/ext/chinwag/utility.c +143 -0
- data/ext/chinwag/utility.h +20 -0
- data/lib/chinwag.rb +5 -0
- metadata +70 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 4e33fde93c7612aeb9941b6b5544a48f9acc2afb
|
4
|
+
data.tar.gz: cd2a75d19bce767042c8e0c6401f29dfd30e7227
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 73ff8f9e4db275f9ef69ea03ffe9cb35ef46db12621d1c7a8eca9a23a3a8cc7b63763fc3ebed916eb98c63220d366350d39c085d8ce8dff2e613cebd4a986da1
|
7
|
+
data.tar.gz: 2aa6284b8dcece586643bcd76922c72cbb0c7de76389444ec5198e25ad97a03622535ec3267d022d6aba9b82669abe445c0e03548ab756438de3d2cf214802ad
|
data/Rakefile
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'rake/extensiontask'
|
2
|
+
require 'rubygems/package_task'
|
3
|
+
require 'rspec/core/rake_task'
|
4
|
+
|
5
|
+
RSpec::Core::RakeTask.new
|
6
|
+
|
7
|
+
Rake::ExtensionTask.new "chinwag" do |ext|
|
8
|
+
ext.lib_dir = "lib/chinwag"
|
9
|
+
end
|
10
|
+
|
11
|
+
s = Gem::Specification.new "chinwag", "0.1.0" do |s|
|
12
|
+
s.authors = ["Chris Calo"]
|
13
|
+
s.email = ["ccalo@vulcanca.com"]
|
14
|
+
s.summary = "A text-synthesis library, for use in layout testing (and more)."
|
15
|
+
s.description = "#{s.summary} It houses flexible methods for generation, and a dictionary class (CWDict) allowing adequate token files (or embedded defaults) to be used as the basis for output creation."
|
16
|
+
s.homepage = "https://github.com/vulcancreative/chinwag-ruby"
|
17
|
+
s.license = "MIT"
|
18
|
+
|
19
|
+
s.files = Dir["Rakefile"]
|
20
|
+
s.files += Dir["lib/**/*.rb"]
|
21
|
+
s.files += Dir["ext/**/*.h"]
|
22
|
+
s.files += Dir["ext/**/*.c"]
|
23
|
+
s.files += Dir["ext/chinwag/extconf.rb"]
|
24
|
+
|
25
|
+
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
26
|
+
s.extensions = Dir["ext/**/extconf.rb"]
|
27
|
+
s.require_paths = ["lib", "ext"]
|
28
|
+
end
|
29
|
+
|
30
|
+
Gem::PackageTask.new s do end
|
31
|
+
|
32
|
+
task :spec => [:clean, :clobber, :compile]
|
33
|
+
|
34
|
+
task :default => :spec
|
35
|
+
task :test => :spec
|
@@ -0,0 +1,246 @@
|
|
1
|
+
#include "chinwag.h"
|
2
|
+
|
3
|
+
char* chinwag(cw_t type, unsigned long min, unsigned long max, dict_t dict)
|
4
|
+
{
|
5
|
+
char* result = NULL;
|
6
|
+
|
7
|
+
if(min == 0 || max == 0)
|
8
|
+
{
|
9
|
+
char e[]="ERROR : chinwag requires max and min to be greater than 0; "
|
10
|
+
"got min : %d, max : %d\n";
|
11
|
+
fprintf(stderr, e, min, max);
|
12
|
+
exit(EXIT_FAILURE);
|
13
|
+
}
|
14
|
+
|
15
|
+
if(max < min)
|
16
|
+
{
|
17
|
+
char e[]="ERROR : chinwag requires max greater than, or equal to, min; "
|
18
|
+
"got min : %d, max : %d\n";
|
19
|
+
fprintf(stderr, e, min, max);
|
20
|
+
exit(EXIT_FAILURE);
|
21
|
+
}
|
22
|
+
|
23
|
+
if(type >= 4)
|
24
|
+
{
|
25
|
+
char e[]="ERROR : chinwag requires a valid output type\n";
|
26
|
+
fprintf(stderr, e, min, max);
|
27
|
+
exit(EXIT_FAILURE);
|
28
|
+
}
|
29
|
+
|
30
|
+
if(type == CW_LETTERS) result = ltr_rng(min, max, dict);
|
31
|
+
else if(type == CW_WORDS) result = wrd_rng(min, max, dict);
|
32
|
+
else if(type == CW_SENTENCES) result = snt_rng(min, max, dict);
|
33
|
+
else if(type == CW_PARAGRAPHS) result = pgf_rng(min, max, dict);
|
34
|
+
|
35
|
+
return result;
|
36
|
+
}
|
37
|
+
|
38
|
+
char* ltr_rng(unsigned long min, unsigned long max, dict_t dict)
|
39
|
+
{
|
40
|
+
dict_t temp = open_dict();
|
41
|
+
I32 amount = motherr(min,max), total = 0; U32 len = 0;
|
42
|
+
char* s = (char*)malloc(SMALL_BUFFER); char* sample = NULL;
|
43
|
+
char* result = NULL; char* vowels = "aeiou";
|
44
|
+
|
45
|
+
while(amount > 0)
|
46
|
+
{
|
47
|
+
if(amount == 2)
|
48
|
+
{
|
49
|
+
// SSWS : modifies destination, can't modify source, new string
|
50
|
+
sample = sample_substring_with_size(vowels, 1);
|
51
|
+
s = strcpy(s, sample); free(sample);
|
52
|
+
len = 1; total += len;
|
53
|
+
s[len] = '\0';
|
54
|
+
}
|
55
|
+
else
|
56
|
+
{
|
57
|
+
s = strcpy(s, sample_dict(dict));
|
58
|
+
len = strlen(s); total += len; s[len] = '\0';
|
59
|
+
if(len > amount || include(s, " ") || include(s, "-")) continue;
|
60
|
+
}
|
61
|
+
|
62
|
+
amount -= len;
|
63
|
+
|
64
|
+
// postfixed alteration (append vowel chain/remove trailing character)
|
65
|
+
if(amount + 1 == 0)
|
66
|
+
{
|
67
|
+
// SWS : modifies destination, can't modify source, new string
|
68
|
+
sample = substring_with_size(s, 0, len - 2);
|
69
|
+
s = strcpy(s, sample); free(sample);
|
70
|
+
len -= 1; total -= 1; s[len] = '\0';
|
71
|
+
|
72
|
+
amount += 1;
|
73
|
+
}
|
74
|
+
else if(amount - 1 == 0)
|
75
|
+
{
|
76
|
+
// SSWS : modifies destination, can't modify source, new string
|
77
|
+
sample = sample_substring_with_size(vowels, 1);
|
78
|
+
s = add_suffix(s, sample); free(sample);
|
79
|
+
len += 1; total += 1; s[len] = '\0';
|
80
|
+
|
81
|
+
amount -= 1;
|
82
|
+
}
|
83
|
+
else if(amount - 2 == 0)
|
84
|
+
{
|
85
|
+
// SSWS : modifies destination, can't modify source, new string
|
86
|
+
sample = sample_substring_with_size(vowels, 2);
|
87
|
+
s = add_suffix(s, sample); free(sample);
|
88
|
+
len += 2; total += 2; s[len] = '\0';
|
89
|
+
|
90
|
+
amount -= 2;
|
91
|
+
}
|
92
|
+
|
93
|
+
temp = place_word_in_dict(temp, s);
|
94
|
+
if(amount > 0 && amount != 1) --amount;
|
95
|
+
}
|
96
|
+
|
97
|
+
// post-process dict (pass utility::capitalize function as parameter)
|
98
|
+
temp = prune_dict(temp, false);
|
99
|
+
temp = map_dict(temp, capitalize);
|
100
|
+
result = join_dict(temp, " ");
|
101
|
+
|
102
|
+
close_dict(temp);
|
103
|
+
free(s); s = NULL;
|
104
|
+
|
105
|
+
return result;
|
106
|
+
}
|
107
|
+
|
108
|
+
char* wrd_rng(unsigned long min, unsigned long max, dict_t dict)
|
109
|
+
{
|
110
|
+
dict_t temp = open_dict();
|
111
|
+
U32 amount = motherr(min, max), total = total_dict(dict);
|
112
|
+
char* sample = NULL; char* result = NULL;
|
113
|
+
bool invalid = true;
|
114
|
+
|
115
|
+
// add words to dict
|
116
|
+
for(U32 i = 0; i != amount; ++i)
|
117
|
+
{
|
118
|
+
while(invalid)
|
119
|
+
{
|
120
|
+
sample = sample_dict(dict);
|
121
|
+
|
122
|
+
// valid if no space, hyphen, or duplicate (latter depends on size)
|
123
|
+
if(exclude(sample, " ") && exclude(sample, "-"))
|
124
|
+
{
|
125
|
+
if(amount > total) invalid = false;
|
126
|
+
else if(dict_exclude(temp, sample)) invalid = false;
|
127
|
+
}
|
128
|
+
}
|
129
|
+
|
130
|
+
temp = place_word_in_dict(temp, sample);
|
131
|
+
invalid = true;
|
132
|
+
}
|
133
|
+
|
134
|
+
// post-process dict (pass utility::capitalize function as parameter)
|
135
|
+
temp = map_dict(temp, capitalize);
|
136
|
+
result = join_dict(temp, " ");
|
137
|
+
|
138
|
+
close_dict(temp);
|
139
|
+
|
140
|
+
return result;
|
141
|
+
}
|
142
|
+
|
143
|
+
char* snt_rng(unsigned long min, unsigned long max, dict_t dict)
|
144
|
+
{
|
145
|
+
dict_t master = open_dict(), temp; drow_t selected;
|
146
|
+
U32 word_amount = 0, last = 0, amount = motherr(min, max), now = 0,
|
147
|
+
len = 0, t_minus = 0; U8 comma = 0; I32 punct = 0;
|
148
|
+
U32* no_dice = (U32*)malloc(sizeof(U32) * SMALL_BUFFER);
|
149
|
+
char* sample = NULL; char* result = NULL; char* s = NULL;
|
150
|
+
bool invalid = true;
|
151
|
+
|
152
|
+
for(U32 i = 0; i != amount; ++i)
|
153
|
+
{
|
154
|
+
temp = open_dict();
|
155
|
+
word_amount = motherr(SENTENCE_MIN_WORD_LENGTH,
|
156
|
+
SENTENCE_MAX_WORD_LENGTH);
|
157
|
+
|
158
|
+
if(word_amount >= 2) comma = (U8)motherr(0, 1);
|
159
|
+
|
160
|
+
// if comma, determine commma position after first word)
|
161
|
+
if(word_amount >= 2 && comma == 1) comma = motherr(1, word_amount - 1);
|
162
|
+
|
163
|
+
// determine sentence rhythm
|
164
|
+
for(U32 j = 0; j != word_amount; ++j)
|
165
|
+
{
|
166
|
+
if(j == 0) now = motherr(5, 10);
|
167
|
+
else if(j == word_amount - 1) now = motherr(3, 8);
|
168
|
+
else if(t_minus > 0) { now = motherr(1, 10); --t_minus; }
|
169
|
+
else if(last <= 10) now = motherr(1, dict.count - 1);
|
170
|
+
else if(last > 10 || last <= 2) { now = motherr(6, 10); t_minus = 3; }
|
171
|
+
|
172
|
+
selected = dict.drows[now];
|
173
|
+
sample = sample_drow(selected);
|
174
|
+
|
175
|
+
while(dict_include(temp, sample) && strlen(sample) != now)
|
176
|
+
{ sample = sample_dict(dict); }
|
177
|
+
|
178
|
+
// add comma (if applicable)
|
179
|
+
if(comma && j == comma - 1)
|
180
|
+
{
|
181
|
+
// get local copy of sample for modification
|
182
|
+
len = strlen(sample);
|
183
|
+
s = (char*)malloc(strlen(sample) + 1);
|
184
|
+
|
185
|
+
strcpy(s, sample);
|
186
|
+
s[len] = '\0';
|
187
|
+
|
188
|
+
s = add_suffix(s, ",");
|
189
|
+
temp = place_word_in_dict(temp, s);
|
190
|
+
|
191
|
+
free(s);
|
192
|
+
}
|
193
|
+
else temp = place_word_in_dict(temp, sample);
|
194
|
+
|
195
|
+
invalid = true;
|
196
|
+
last = now;
|
197
|
+
}
|
198
|
+
|
199
|
+
// join temporary dict into a sentence; capitalize first word
|
200
|
+
s = join_dict(temp, " ");
|
201
|
+
s = capitalize(s);
|
202
|
+
|
203
|
+
// determine punctuation; 1 - period, 2 - question, 3 - exclamation
|
204
|
+
// based on a ratio of 64-21-15, sampled from Shakespeare's Hamlet
|
205
|
+
punct = (I32)motherr(0, 99);
|
206
|
+
|
207
|
+
if(punct >= 0 && punct <= 63) s = add_suffix(s, ".");
|
208
|
+
else if(punct >= 64 && punct <= 84) s = add_suffix(s, "?");
|
209
|
+
else if(punct >= 85 && punct <= 99) s = add_suffix(s, "!");
|
210
|
+
|
211
|
+
// add sentence to master dict and cleanup
|
212
|
+
master = place_word_in_dict(master, s);
|
213
|
+
|
214
|
+
close_dict(temp);
|
215
|
+
free(s);
|
216
|
+
}
|
217
|
+
|
218
|
+
result = join_dict(master, " ");
|
219
|
+
close_dict(master);
|
220
|
+
free(no_dice);
|
221
|
+
|
222
|
+
return result;
|
223
|
+
}
|
224
|
+
|
225
|
+
char* pgf_rng(unsigned long min, unsigned long max, dict_t dict)
|
226
|
+
{
|
227
|
+
char* result = NULL; char* sentences = NULL;
|
228
|
+
U32 amount = motherr(min, max), sentence_amount = 0;
|
229
|
+
dict_t master = open_dict();
|
230
|
+
|
231
|
+
for(U32 i = 0; i != amount; ++i)
|
232
|
+
{
|
233
|
+
sentence_amount = motherr(PARAGRAPH_MIN_SENTENCE_LENGTH,
|
234
|
+
PARAGRAPH_MAX_SENTENCE_LENGTH);
|
235
|
+
|
236
|
+
sentences = snt(sentence_amount, dict);
|
237
|
+
master = place_word_in_dict(master, sentences);
|
238
|
+
|
239
|
+
free(sentences);
|
240
|
+
}
|
241
|
+
|
242
|
+
result = join_dict(master, "\n\n");
|
243
|
+
close_dict(master);
|
244
|
+
|
245
|
+
return result;
|
246
|
+
}
|
@@ -0,0 +1,71 @@
|
|
1
|
+
//
|
2
|
+
// chinwag.h
|
3
|
+
// Chinwag
|
4
|
+
//
|
5
|
+
// Created by Chris Calo on 8/05/14.
|
6
|
+
// Copyright (c) 2014 Vulcan Creative, LLC. All rights reserved.
|
7
|
+
//
|
8
|
+
|
9
|
+
#ifndef __CHINWAG_AUGXMJP4_H
|
10
|
+
#define __CHINWAG_AUGXMJP4_H
|
11
|
+
|
12
|
+
#include <ctype.h>
|
13
|
+
#include <stdio.h>
|
14
|
+
#include <stdlib.h>
|
15
|
+
#include <string.h>
|
16
|
+
#include <assert.h>
|
17
|
+
#include <signal.h>
|
18
|
+
#include <unistd.h>
|
19
|
+
#include <stdint.h>
|
20
|
+
#include <stdbool.h>
|
21
|
+
#include <sys/time.h>
|
22
|
+
|
23
|
+
// output types
|
24
|
+
enum {
|
25
|
+
CW_LETTERS = 0,
|
26
|
+
CW_WORDS = 1,
|
27
|
+
CW_SENTENCES = 2,
|
28
|
+
CW_PARAGRAPHS = 3,
|
29
|
+
};
|
30
|
+
typedef unsigned long cw_t;
|
31
|
+
|
32
|
+
// internal dictionary row
|
33
|
+
typedef struct dictionary_type {
|
34
|
+
bool sorted;
|
35
|
+
unsigned long count;
|
36
|
+
unsigned long largest;
|
37
|
+
unsigned long largest_pos;
|
38
|
+
char** words;
|
39
|
+
} drow_t;
|
40
|
+
|
41
|
+
// dictionary (row container)
|
42
|
+
typedef struct dictionary_container_type {
|
43
|
+
bool sorted;
|
44
|
+
unsigned long count;
|
45
|
+
drow_t* drows;
|
46
|
+
char* name;
|
47
|
+
} dict_t;
|
48
|
+
|
49
|
+
#include "seuss.h"
|
50
|
+
#include "latin.h"
|
51
|
+
|
52
|
+
#include "ingredient.h"
|
53
|
+
#include "generator.h"
|
54
|
+
#include "tokenize.h"
|
55
|
+
#include "utility.h"
|
56
|
+
#include "config.h"
|
57
|
+
#include "dict.h"
|
58
|
+
|
59
|
+
char* chinwag(cw_t type, unsigned long min, unsigned long max, dict_t dict);
|
60
|
+
|
61
|
+
#define ltr(amt, dict) ltr_rng(amt, amt, dict)
|
62
|
+
#define wrd(amt, dict) wrd_rng(amt, amt, dict)
|
63
|
+
#define snt(amt, dict) snt_rng(amt, amt, dict)
|
64
|
+
#define pgf(amt, dict) pgf_rng(amt, amt, dict)
|
65
|
+
|
66
|
+
char* ltr_rng(unsigned long min, unsigned long max, dict_t dict);
|
67
|
+
char* wrd_rng(unsigned long min, unsigned long max, dict_t dict);
|
68
|
+
char* snt_rng(unsigned long min, unsigned long max, dict_t dict);
|
69
|
+
char* pgf_rng(unsigned long min, unsigned long max, dict_t dict);
|
70
|
+
|
71
|
+
#endif
|
@@ -0,0 +1,28 @@
|
|
1
|
+
#include "config.h"
|
2
|
+
|
3
|
+
const unsigned MAJOR_VERSION = 1;
|
4
|
+
const unsigned MINOR_VERSION = 1;
|
5
|
+
const unsigned PATCH_VERSION = 4;
|
6
|
+
|
7
|
+
const char* const DATE_YEAR = "2014";
|
8
|
+
const char* const DATE_MONTH = "12";
|
9
|
+
const char* const DATE_DAY = "02";
|
10
|
+
const char* const REVISION = "743";
|
11
|
+
|
12
|
+
const unsigned SMALL_BUFFER = 1024;
|
13
|
+
const unsigned LARGE_BUFFER = 5120;
|
14
|
+
|
15
|
+
const unsigned MIN_DICT_SIZE = 300;
|
16
|
+
|
17
|
+
const char* const DELIMITERS = "\r\n,;:\034";
|
18
|
+
|
19
|
+
const unsigned CW_DEFAULT = 1; // == WORDS
|
20
|
+
|
21
|
+
const unsigned DEFAULT_MIN_OUTPUT_LENGTH = 1;
|
22
|
+
const unsigned DEFAULT_MAX_OUTPUT_LENGTH = 5;
|
23
|
+
|
24
|
+
const unsigned SENTENCE_MIN_WORD_LENGTH = 2;
|
25
|
+
const unsigned SENTENCE_MAX_WORD_LENGTH = 25;
|
26
|
+
|
27
|
+
const unsigned PARAGRAPH_MIN_SENTENCE_LENGTH = 4;
|
28
|
+
const unsigned PARAGRAPH_MAX_SENTENCE_LENGTH = 6;
|
@@ -0,0 +1,42 @@
|
|
1
|
+
#ifndef __CONFIG_6TCIIEDT_H
|
2
|
+
#define __CONFIG_6TCIIEDT_H
|
3
|
+
|
4
|
+
#include "chinwag.h"
|
5
|
+
|
6
|
+
// data used for versioning information
|
7
|
+
extern const unsigned MAJOR_VERSION;
|
8
|
+
extern const unsigned MINOR_VERSION;
|
9
|
+
extern const unsigned PATCH_VERSION;
|
10
|
+
|
11
|
+
// data used for compilation information
|
12
|
+
extern const char* const DATE_YEAR;
|
13
|
+
extern const char* const DATE_MONTH;
|
14
|
+
extern const char* const DATE_DAY;
|
15
|
+
extern const char* const REVISION;
|
16
|
+
|
17
|
+
// minimum and maximum fixed buffer sizes
|
18
|
+
extern const unsigned SMALL_BUFFER;
|
19
|
+
extern const unsigned LARGE_BUFFER;
|
20
|
+
|
21
|
+
// minimum size allowed for dictionary tokens (more == better randomization)
|
22
|
+
extern const unsigned MIN_DICT_SIZE;
|
23
|
+
|
24
|
+
// individual characters used to deliminate dictionary tokens
|
25
|
+
extern const char* const DELIMITERS;
|
26
|
+
|
27
|
+
// default output type
|
28
|
+
extern const unsigned CW_DEFAULT;
|
29
|
+
|
30
|
+
// pre-defined default output caps (both must be unsigned integers)
|
31
|
+
extern const unsigned DEFAULT_MIN_OUTPUT_LENGTH;
|
32
|
+
extern const unsigned DEFAULT_MAX_OUTPUT_LENGTH;
|
33
|
+
|
34
|
+
// pre-defined caps for determining randomized sentence lengths
|
35
|
+
extern const unsigned SENTENCE_MIN_WORD_LENGTH;
|
36
|
+
extern const unsigned SENTENCE_MAX_WORD_LENGTH;
|
37
|
+
|
38
|
+
// pre-defined caps for determining randomized paragraph lengths
|
39
|
+
extern const unsigned PARAGRAPH_MIN_SENTENCE_LENGTH;
|
40
|
+
extern const unsigned PARAGRAPH_MAX_SENTENCE_LENGTH;
|
41
|
+
|
42
|
+
#endif
|