crawdad 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile ADDED
@@ -0,0 +1,56 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'rake/testtask'
4
+ require 'rake/rdoctask'
5
+ require 'rake/gempackagetask'
6
+
7
+ CRAWDAD_VERSION = '0.0.1'
8
+
9
+ task :default => [:build]
10
+
11
+ task :build do
12
+ system "make -Cext/crawdad"
13
+ end
14
+
15
+ desc "Run all tests, test-spec required"
16
+ Rake::TestTask.new do |test|
17
+ test.libs << "spec"
18
+ test.test_files = Dir[ "spec/*_spec.rb" ]
19
+ test.verbose = true
20
+ end
21
+
22
+ desc "Generate documentation"
23
+ Rake::RDocTask.new do |rdoc|
24
+ rdoc.rdoc_files.include("README", "lib/")
25
+ rdoc.main = "README"
26
+ rdoc.rdoc_dir = "doc/html"
27
+ rdoc.title = "Crawdad Documentation"
28
+ end
29
+
30
+ spec = Gem::Specification.new do |spec|
31
+ spec.name = 'crawdad'
32
+ spec.version = CRAWDAD_VERSION
33
+ spec.platform = Gem::Platform::RUBY
34
+ spec.summary = "Knuth-Plass linebreaking for Ruby"
35
+ spec.files = FileList["lib/**/**/*"] + FileList["ext/crawdad/*"]
36
+ spec.require_paths << 'ext'
37
+
38
+ binaries = FileList['ext/crawdad/*.bundle', 'ext/crawdad/*.so']
39
+ spec.extensions << 'Rakefile'
40
+ spec.files += binaries.to_a
41
+
42
+ spec.has_rdoc = true
43
+ spec.rdoc_options << '--title' << 'Crawdad Documentation' << '-q'
44
+ spec.author = 'Brad Ediger'
45
+ spec.email = 'brad.ediger@madriska.com'
46
+ spec.homepage = 'http://github.com/madriska/crawdad'
47
+ spec.description = <<END_DESC
48
+ Crawdad is an implementation of Knuth-Plass linebreaking (justification)
49
+ for Ruby.
50
+ END_DESC
51
+ end
52
+
53
+ Rake::GemPackageTask.new(spec) do |pkg|
54
+ pkg.need_tar = true
55
+ end
56
+
@@ -0,0 +1,25 @@
1
+ OS:=$(shell uname | sed 's/[-_].*//')
2
+ CFLAGS=-Wall -O2 -fPIC
3
+ #CFLAGS=-Wall -fPIC -g
4
+ SHARED=-shared
5
+ SOEXT:=.so
6
+
7
+ objects = tokens.o paragraph.o
8
+ headers = tokens.h paragraph.h breakpoint.h
9
+
10
+ ifeq (${OS},Darwin)
11
+ SHARED = -dynamiclib
12
+ SOEXT:=.bundle
13
+ endif
14
+
15
+ all: crawdad$(SOEXT)
16
+
17
+ crawdad$(SOEXT): $(objects)
18
+ $(CC) $(SHARED) -o crawdad$(SOEXT) $(objects)
19
+
20
+ %.o: %.c $(headers)
21
+ $(CC) -c -o $@ $< $(CFLAGS)
22
+
23
+ clean:
24
+ rm *.o *$(SOEXT)
25
+
@@ -0,0 +1,53 @@
1
+ #ifndef _BREAKPOINT_H_
2
+ #define _BREAKPOINT_H_
3
+
4
+ typedef struct breakpoint {
5
+ int position;
6
+ int line;
7
+ int fitness_class;
8
+
9
+ float total_width;
10
+ float total_stretch;
11
+ float total_shrink;
12
+ float total_demerits;
13
+
14
+ float ratio;
15
+
16
+ struct breakpoint *previous;
17
+ struct breakpoint *link;
18
+ } breakpoint;
19
+
20
+ struct breakpoint *active_nodes;
21
+
22
+ breakpoint *make_starting_breakpoint() {
23
+ breakpoint *bp;
24
+
25
+ bp = malloc(sizeof(breakpoint));
26
+
27
+ bp->position = 0;
28
+ bp->line = 0;
29
+ bp->fitness_class = 1;
30
+
31
+ bp->total_width = 0.0;
32
+ bp->total_stretch = 0.0;
33
+ bp->total_shrink = 0.0;
34
+ bp->total_demerits = 0.0;
35
+
36
+ bp->ratio = 0.0;
37
+
38
+ bp->previous = NULL;
39
+ bp->link = NULL;
40
+
41
+ return bp;
42
+ }
43
+
44
+ /* Holds information about the best breakpoint found so far for a particular
45
+ * fitness class. */
46
+ typedef struct best_breakpoint {
47
+ breakpoint *bp;
48
+ float demerits;
49
+ float ratio;
50
+ } best_breakpoint;
51
+
52
+ #endif
53
+
@@ -0,0 +1,275 @@
1
+ #include <stdio.h>
2
+ #include <stdlib.h>
3
+ #include <math.h>
4
+
5
+ #include "tokens.h"
6
+ #include "paragraph.h"
7
+ #include "breakpoint.h"
8
+
9
+ #define FLAGGED_PENALTY 3000
10
+ #define FITNESS_PENALTY 100
11
+
12
+ #define GAMMA INFINITY
13
+
14
+ void inspect_token(token *t) {
15
+ printf("(0x%02lX) ", (unsigned long)t);
16
+ switch(t->box.type){
17
+ case BOX:
18
+ printf("BOX %f \"%s\"\n", t->box.width, t->box.content);
19
+ break;
20
+ case GLUE:
21
+ printf("GLUE %f %f %f\n", t->glue.width, t->glue.stretch,
22
+ t->glue.shrink);
23
+ break;
24
+ case PENALTY:
25
+ printf("PENALTY %f %f %s\n", t->penalty.penalty, t->penalty.width,
26
+ (t->penalty.flagged ? "F" : "-"));
27
+ break;
28
+ default:
29
+ printf("UNKNOWN %d\n", t->box.type);
30
+ }
31
+ }
32
+
33
+ float calculate_demerits(token *stream[], int old_i, token *new_item,
34
+ float r) {
35
+ token *old_item = stream[old_i];
36
+ float d;
37
+
38
+ if((new_item->penalty.type == PENALTY) &&
39
+ (new_item->penalty.penalty >= 0)) {
40
+ d = pow(1 + 100*(pow(abs(r), 3) + new_item->penalty.penalty), 2);
41
+ } else if((new_item->penalty.type == PENALTY) &&
42
+ (new_item->penalty.penalty != -INFINITY)) {
43
+ d = pow((1 + 100*(pow(abs(r), 3))), 2) -
44
+ pow(new_item->penalty.penalty, 2);
45
+ } else {
46
+ d = pow(1 + 100*(pow(abs(r), 3)), 2);
47
+ }
48
+
49
+ if(old_item->penalty.type == PENALTY && old_item->penalty.flagged &&
50
+ new_item->penalty.type == PENALTY && new_item->penalty.flagged)
51
+ d += FLAGGED_PENALTY;
52
+
53
+ return d;
54
+ }
55
+
56
+ float adjustment_ratio(float tw, float ty, float tz,
57
+ float aw, float ay, float az,
58
+ float target_width, token *stream[], int b) {
59
+ float w, y, z; /* w=width y=stretch z=shrink */
60
+ token *item_b = stream[b];
61
+
62
+ w = tw - aw; /* Non-adjusted width of the line. */
63
+
64
+ /* Add the penalty width (hyphen) if we are breaking at a penalty. */
65
+ if(item_b->penalty.type == PENALTY)
66
+ w += item_b->penalty.width;
67
+
68
+ if(w < target_width) {
69
+ y = ty - ay;
70
+ return (y > 0) ? (target_width - w) / y : INFINITY;
71
+ } else if(w > target_width) {
72
+ z = tz - az;
73
+ return (z > 0) ? (target_width - w) / z : INFINITY;
74
+ } else {
75
+ return 0.0;
76
+ }
77
+ }
78
+
79
+ void calculate_widths(token *stream[], float *tw, float *ty, float *tz){
80
+ int i;
81
+ token *p;
82
+ for(i=0; (p = stream[i]); i++) {
83
+ switch(p->box.type) {
84
+ case BOX:
85
+ return;
86
+ case GLUE:
87
+ *tw += p->glue.width;
88
+ *ty += p->glue.stretch;
89
+ *tz += p->glue.shrink;
90
+ break;
91
+ case PENALTY:
92
+ if((p->penalty.penalty == -INFINITY) && (i > 0))
93
+ return;
94
+ }
95
+ }
96
+ }
97
+
98
+ void foreach_legal_breakpoint(token *stream[], float width, float threshold,
99
+ void (*fn)(token **, int, float, float, float, float, float)) {
100
+ float tw=0, ty=0, tz=0;
101
+ int i;
102
+ token *t;
103
+
104
+ for(i=0; (t = stream[i]); i++) {
105
+ switch(t->box.type) {
106
+ case BOX:
107
+ tw += t->box.width;
108
+ break;
109
+ case GLUE:
110
+ if(stream[i-1]->box.type == BOX)
111
+ fn(stream, i, tw, ty, tz, width, threshold);
112
+ tw += t->glue.width;
113
+ ty += t->glue.stretch;
114
+ tz += t->glue.shrink;
115
+ break;
116
+ case PENALTY:
117
+ if(t->penalty.penalty != INFINITY)
118
+ fn(stream, i, tw, ty, tz, width, threshold);
119
+ break;
120
+ }
121
+ }
122
+ }
123
+
124
+ int fitness_class(float ratio) {
125
+ if(ratio < -0.5)
126
+ return 0;
127
+ if(ratio < 0.5)
128
+ return 1;
129
+ if(ratio < 1)
130
+ return 2;
131
+ return 3;
132
+ }
133
+
134
+ void concat_new_active_nodes(token *stream[], float total_width, float
135
+ total_stretch, float total_shrink, best_breakpoint best[4], int i,
136
+ breakpoint *active, breakpoint **p_previous_node) {
137
+ float lowest_demerits = INFINITY;
138
+ float tw = total_width, ty = total_stretch, tz = total_shrink;
139
+ int fclass;
140
+ breakpoint *bp;
141
+
142
+ for(fclass=0; fclass<4; fclass++)
143
+ if(best[fclass].demerits < lowest_demerits)
144
+ lowest_demerits = best[fclass].demerits;
145
+
146
+ calculate_widths(stream + i, &tw, &ty, &tz);
147
+
148
+ for(fclass=0; fclass<4; fclass++) {
149
+ if((best[fclass].demerits == INFINITY) ||
150
+ (best[fclass].demerits > lowest_demerits + GAMMA))
151
+ continue;
152
+
153
+ /* Create and activate node */
154
+ bp = malloc(sizeof(breakpoint));
155
+
156
+ bp->position = i;
157
+ bp->line = best[fclass].bp->line + 1;
158
+ bp->fitness_class = fclass;
159
+
160
+ bp->total_width = tw;
161
+ bp->total_stretch = ty;
162
+ bp->total_shrink = tz;
163
+
164
+ bp->total_demerits = best[fclass].demerits;
165
+ bp->ratio = best[fclass].ratio;
166
+
167
+ bp->previous = best[fclass].bp;
168
+ bp->link = active;
169
+
170
+ if(*p_previous_node)
171
+ (*p_previous_node)->link = bp;
172
+ else
173
+ active_nodes = bp;
174
+
175
+ *p_previous_node = bp;
176
+ }
177
+ }
178
+
179
+ void main_loop(token *stream[], int i, float tw, float ty, float tz,
180
+ float width, float threshold) {
181
+ breakpoint *active, *next_node, *previous_node;
182
+ best_breakpoint best[4];
183
+ int current_line;
184
+ float ratio;
185
+ float demerits;
186
+ int fclass;
187
+
188
+ if(active_nodes == NULL) {
189
+ /* TODO: be nicer */
190
+ printf("No feasible solution. Try relaxing threshold.");
191
+ exit(1);
192
+ }
193
+
194
+ active = active_nodes;
195
+ previous_node = NULL;
196
+ next_node = NULL;
197
+
198
+ while(active) {
199
+ best[0].demerits = INFINITY;
200
+ best[1].demerits = INFINITY;
201
+ best[2].demerits = INFINITY;
202
+ best[3].demerits = INFINITY;
203
+
204
+ while(active) {
205
+ current_line = active->line + 1;
206
+ next_node = active->link;
207
+
208
+ /* TODO: width can be replaced by a line-specific width for line j */
209
+ ratio = adjustment_ratio(tw, ty, tz, active->total_width,
210
+ active->total_stretch, active->total_shrink, width,
211
+ stream, i);
212
+
213
+ if((ratio < -1) || (is_penalty(stream[i]) &&
214
+ (stream[i]->penalty.penalty == -INFINITY))) {
215
+ /* Remove active node from the list */
216
+ if(previous_node)
217
+ previous_node->link = next_node;
218
+ else
219
+ active_nodes = next_node;
220
+ /* TODO: put active on the passive list or free? */
221
+ } else {
222
+ previous_node = active;
223
+ }
224
+
225
+ if((ratio >= -1) && (ratio <= threshold)) {
226
+ demerits = calculate_demerits(stream, active->position, stream[i],
227
+ ratio) + active->total_demerits;
228
+ fclass = fitness_class(ratio);
229
+
230
+ /* Penalize consecutive lines more than one fitness class away from
231
+ * each other. */
232
+ if(abs(fclass - active->fitness_class) > 1)
233
+ demerits += FITNESS_PENALTY;
234
+
235
+ /* Update high scores if this is a new best. */
236
+ if(demerits < best[fclass].demerits) {
237
+ best[fclass].bp = active;
238
+ best[fclass].demerits = demerits;
239
+ best[fclass].ratio = ratio;
240
+ }
241
+ }
242
+
243
+ /* Add nodes to the active list before moving to the next line. */
244
+ active = next_node;
245
+ if(!active)
246
+ break;
247
+ if(active->line >= current_line)
248
+ break;
249
+ }
250
+
251
+ /* If we found any best nodes, add them to the active list. */
252
+ concat_new_active_nodes(stream, tw, ty, tz, best, i, active,
253
+ &previous_node);
254
+
255
+ active = next_node;
256
+ }
257
+ }
258
+
259
+ breakpoint *populate_active_nodes(token *stream[], float width,
260
+ float threshold) {
261
+ breakpoint *bp, *min_node;
262
+
263
+ active_nodes = make_starting_breakpoint();
264
+ foreach_legal_breakpoint(stream, width, threshold, main_loop);
265
+
266
+ /* Find node with minimum demerits */
267
+ min_node = NULL;
268
+ for(bp = active_nodes; bp; bp = bp->link)
269
+ if(!min_node || (bp->total_demerits < min_node->total_demerits))
270
+ min_node = bp;
271
+
272
+ return min_node;
273
+ }
274
+
275
+
@@ -0,0 +1,29 @@
1
+ #ifndef _PARAGRAPH_H_
2
+ #define _PARAGRAPH_H_
3
+
4
+ #include "breakpoint.h"
5
+
6
+ void inspect_token(token *t);
7
+
8
+ float calculate_demerits(token *stream[], int old_i, token *new_item,
9
+ float r);
10
+
11
+ float adjustment_ratio(float tw, float ty, float tz,
12
+ float aw, float ay, float az,
13
+ float target_width, token *stream[], int b);
14
+
15
+ void calculate_widths(token *stream[], float *tw, float *ty, float *tz);
16
+
17
+ void foreach_legal_breakpoint(token *stream[], float width, float threshold,
18
+ void (*fn)(token **, int, float, float, float, float, float));
19
+
20
+ int fitness_class(float ratio);
21
+
22
+ void main_loop(token *stream[], int i, float tw, float ty, float tz,
23
+ float width, float threshold);
24
+
25
+ breakpoint *populate_active_nodes(token *stream[], float width,
26
+ float threshold);
27
+
28
+ #endif
29
+
@@ -0,0 +1,57 @@
1
+ #include <stdio.h>
2
+ #include <stdlib.h>
3
+ #include <math.h>
4
+ #include <string.h>
5
+
6
+ #include "tokens.h"
7
+
8
+ struct box *make_box(float width, char *content) {
9
+ int len;
10
+ struct box *t;
11
+
12
+ t = malloc(sizeof(struct box));
13
+ t->type = BOX;
14
+ t->width = width;
15
+
16
+ len = strlen(content);
17
+ t->content = malloc(len+1);
18
+ strncpy(t->content, content, len);
19
+ t->content[len] = '\0';
20
+
21
+ return t;
22
+ }
23
+
24
+ struct glue *make_glue(float width, float stretch, float shrink) {
25
+ struct glue *t = malloc(sizeof(struct glue));
26
+ t->type = GLUE;
27
+ t->width = width;
28
+ t->stretch = stretch;
29
+ t->shrink = shrink;
30
+ return t;
31
+ }
32
+
33
+ struct penalty *make_penalty(float width, float penalty, int flagged) {
34
+ struct penalty *t = malloc(sizeof(struct penalty));
35
+ t->type = PENALTY;
36
+ t->width = width;
37
+ t->penalty = penalty;
38
+ t->flagged = flagged;
39
+ return t;
40
+ }
41
+
42
+ int token_type(token *t) {
43
+ return t->box.type;
44
+ }
45
+
46
+ int is_box(token *t) {
47
+ return (t->box.type == BOX);
48
+ }
49
+
50
+ int is_penalty(token *t) {
51
+ return (t->penalty.type == PENALTY);
52
+ }
53
+
54
+ int is_glue(token *t) {
55
+ return (t->glue.type == GLUE);
56
+ }
57
+