crawdad 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile ADDED
@@ -0,0 +1,56 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'rake/testtask'
4
+ require 'rake/rdoctask'
5
+ require 'rake/gempackagetask'
6
+
7
+ CRAWDAD_VERSION = '0.0.1'
8
+
9
+ task :default => [:build]
10
+
11
+ task :build do
12
+ system "make -Cext/crawdad"
13
+ end
14
+
15
+ desc "Run all tests, test-spec required"
16
+ Rake::TestTask.new do |test|
17
+ test.libs << "spec"
18
+ test.test_files = Dir[ "spec/*_spec.rb" ]
19
+ test.verbose = true
20
+ end
21
+
22
+ desc "Generate documentation"
23
+ Rake::RDocTask.new do |rdoc|
24
+ rdoc.rdoc_files.include("README", "lib/")
25
+ rdoc.main = "README"
26
+ rdoc.rdoc_dir = "doc/html"
27
+ rdoc.title = "Crawdad Documentation"
28
+ end
29
+
30
+ spec = Gem::Specification.new do |spec|
31
+ spec.name = 'crawdad'
32
+ spec.version = CRAWDAD_VERSION
33
+ spec.platform = Gem::Platform::RUBY
34
+ spec.summary = "Knuth-Plass linebreaking for Ruby"
35
+ spec.files = FileList["lib/**/**/*"] + FileList["ext/crawdad/*"]
36
+ spec.require_paths << 'ext'
37
+
38
+ binaries = FileList['ext/crawdad/*.bundle', 'ext/crawdad/*.so']
39
+ spec.extensions << 'Rakefile'
40
+ spec.files += binaries.to_a
41
+
42
+ spec.has_rdoc = true
43
+ spec.rdoc_options << '--title' << 'Crawdad Documentation' << '-q'
44
+ spec.author = 'Brad Ediger'
45
+ spec.email = 'brad.ediger@madriska.com'
46
+ spec.homepage = 'http://github.com/madriska/crawdad'
47
+ spec.description = <<END_DESC
48
+ Crawdad is an implementation of Knuth-Plass linebreaking (justification)
49
+ for Ruby.
50
+ END_DESC
51
+ end
52
+
53
+ Rake::GemPackageTask.new(spec) do |pkg|
54
+ pkg.need_tar = true
55
+ end
56
+
@@ -0,0 +1,25 @@
1
+ OS:=$(shell uname | sed 's/[-_].*//')
2
+ CFLAGS=-Wall -O2 -fPIC
3
+ #CFLAGS=-Wall -fPIC -g
4
+ SHARED=-shared
5
+ SOEXT:=.so
6
+
7
+ objects = tokens.o paragraph.o
8
+ headers = tokens.h paragraph.h breakpoint.h
9
+
10
+ ifeq (${OS},Darwin)
11
+ SHARED = -dynamiclib
12
+ SOEXT:=.bundle
13
+ endif
14
+
15
+ all: crawdad$(SOEXT)
16
+
17
+ crawdad$(SOEXT): $(objects)
18
+ $(CC) $(SHARED) -o crawdad$(SOEXT) $(objects)
19
+
20
+ %.o: %.c $(headers)
21
+ $(CC) -c -o $@ $< $(CFLAGS)
22
+
23
+ clean:
24
+ rm *.o *$(SOEXT)
25
+
@@ -0,0 +1,53 @@
1
+ #ifndef _BREAKPOINT_H_
2
+ #define _BREAKPOINT_H_
3
+
4
+ typedef struct breakpoint {
5
+ int position;
6
+ int line;
7
+ int fitness_class;
8
+
9
+ float total_width;
10
+ float total_stretch;
11
+ float total_shrink;
12
+ float total_demerits;
13
+
14
+ float ratio;
15
+
16
+ struct breakpoint *previous;
17
+ struct breakpoint *link;
18
+ } breakpoint;
19
+
20
+ struct breakpoint *active_nodes;
21
+
22
+ breakpoint *make_starting_breakpoint() {
23
+ breakpoint *bp;
24
+
25
+ bp = malloc(sizeof(breakpoint));
26
+
27
+ bp->position = 0;
28
+ bp->line = 0;
29
+ bp->fitness_class = 1;
30
+
31
+ bp->total_width = 0.0;
32
+ bp->total_stretch = 0.0;
33
+ bp->total_shrink = 0.0;
34
+ bp->total_demerits = 0.0;
35
+
36
+ bp->ratio = 0.0;
37
+
38
+ bp->previous = NULL;
39
+ bp->link = NULL;
40
+
41
+ return bp;
42
+ }
43
+
44
+ /* Holds information about the best breakpoint found so far for a particular
45
+ * fitness class. */
46
+ typedef struct best_breakpoint {
47
+ breakpoint *bp;
48
+ float demerits;
49
+ float ratio;
50
+ } best_breakpoint;
51
+
52
+ #endif
53
+
@@ -0,0 +1,275 @@
1
+ #include <stdio.h>
2
+ #include <stdlib.h>
3
+ #include <math.h>
4
+
5
+ #include "tokens.h"
6
+ #include "paragraph.h"
7
+ #include "breakpoint.h"
8
+
9
+ #define FLAGGED_PENALTY 3000
10
+ #define FITNESS_PENALTY 100
11
+
12
+ #define GAMMA INFINITY
13
+
14
+ void inspect_token(token *t) {
15
+ printf("(0x%02lX) ", (unsigned long)t);
16
+ switch(t->box.type){
17
+ case BOX:
18
+ printf("BOX %f \"%s\"\n", t->box.width, t->box.content);
19
+ break;
20
+ case GLUE:
21
+ printf("GLUE %f %f %f\n", t->glue.width, t->glue.stretch,
22
+ t->glue.shrink);
23
+ break;
24
+ case PENALTY:
25
+ printf("PENALTY %f %f %s\n", t->penalty.penalty, t->penalty.width,
26
+ (t->penalty.flagged ? "F" : "-"));
27
+ break;
28
+ default:
29
+ printf("UNKNOWN %d\n", t->box.type);
30
+ }
31
+ }
32
+
33
+ float calculate_demerits(token *stream[], int old_i, token *new_item,
34
+ float r) {
35
+ token *old_item = stream[old_i];
36
+ float d;
37
+
38
+ if((new_item->penalty.type == PENALTY) &&
39
+ (new_item->penalty.penalty >= 0)) {
40
+ d = pow(1 + 100*(pow(abs(r), 3) + new_item->penalty.penalty), 2);
41
+ } else if((new_item->penalty.type == PENALTY) &&
42
+ (new_item->penalty.penalty != -INFINITY)) {
43
+ d = pow((1 + 100*(pow(abs(r), 3))), 2) -
44
+ pow(new_item->penalty.penalty, 2);
45
+ } else {
46
+ d = pow(1 + 100*(pow(abs(r), 3)), 2);
47
+ }
48
+
49
+ if(old_item->penalty.type == PENALTY && old_item->penalty.flagged &&
50
+ new_item->penalty.type == PENALTY && new_item->penalty.flagged)
51
+ d += FLAGGED_PENALTY;
52
+
53
+ return d;
54
+ }
55
+
56
+ float adjustment_ratio(float tw, float ty, float tz,
57
+ float aw, float ay, float az,
58
+ float target_width, token *stream[], int b) {
59
+ float w, y, z; /* w=width y=stretch z=shrink */
60
+ token *item_b = stream[b];
61
+
62
+ w = tw - aw; /* Non-adjusted width of the line. */
63
+
64
+ /* Add the penalty width (hyphen) if we are breaking at a penalty. */
65
+ if(item_b->penalty.type == PENALTY)
66
+ w += item_b->penalty.width;
67
+
68
+ if(w < target_width) {
69
+ y = ty - ay;
70
+ return (y > 0) ? (target_width - w) / y : INFINITY;
71
+ } else if(w > target_width) {
72
+ z = tz - az;
73
+ return (z > 0) ? (target_width - w) / z : INFINITY;
74
+ } else {
75
+ return 0.0;
76
+ }
77
+ }
78
+
79
+ void calculate_widths(token *stream[], float *tw, float *ty, float *tz){
80
+ int i;
81
+ token *p;
82
+ for(i=0; (p = stream[i]); i++) {
83
+ switch(p->box.type) {
84
+ case BOX:
85
+ return;
86
+ case GLUE:
87
+ *tw += p->glue.width;
88
+ *ty += p->glue.stretch;
89
+ *tz += p->glue.shrink;
90
+ break;
91
+ case PENALTY:
92
+ if((p->penalty.penalty == -INFINITY) && (i > 0))
93
+ return;
94
+ }
95
+ }
96
+ }
97
+
98
+ void foreach_legal_breakpoint(token *stream[], float width, float threshold,
99
+ void (*fn)(token **, int, float, float, float, float, float)) {
100
+ float tw=0, ty=0, tz=0;
101
+ int i;
102
+ token *t;
103
+
104
+ for(i=0; (t = stream[i]); i++) {
105
+ switch(t->box.type) {
106
+ case BOX:
107
+ tw += t->box.width;
108
+ break;
109
+ case GLUE:
110
+ if(stream[i-1]->box.type == BOX)
111
+ fn(stream, i, tw, ty, tz, width, threshold);
112
+ tw += t->glue.width;
113
+ ty += t->glue.stretch;
114
+ tz += t->glue.shrink;
115
+ break;
116
+ case PENALTY:
117
+ if(t->penalty.penalty != INFINITY)
118
+ fn(stream, i, tw, ty, tz, width, threshold);
119
+ break;
120
+ }
121
+ }
122
+ }
123
+
124
+ int fitness_class(float ratio) {
125
+ if(ratio < -0.5)
126
+ return 0;
127
+ if(ratio < 0.5)
128
+ return 1;
129
+ if(ratio < 1)
130
+ return 2;
131
+ return 3;
132
+ }
133
+
134
+ void concat_new_active_nodes(token *stream[], float total_width, float
135
+ total_stretch, float total_shrink, best_breakpoint best[4], int i,
136
+ breakpoint *active, breakpoint **p_previous_node) {
137
+ float lowest_demerits = INFINITY;
138
+ float tw = total_width, ty = total_stretch, tz = total_shrink;
139
+ int fclass;
140
+ breakpoint *bp;
141
+
142
+ for(fclass=0; fclass<4; fclass++)
143
+ if(best[fclass].demerits < lowest_demerits)
144
+ lowest_demerits = best[fclass].demerits;
145
+
146
+ calculate_widths(stream + i, &tw, &ty, &tz);
147
+
148
+ for(fclass=0; fclass<4; fclass++) {
149
+ if((best[fclass].demerits == INFINITY) ||
150
+ (best[fclass].demerits > lowest_demerits + GAMMA))
151
+ continue;
152
+
153
+ /* Create and activate node */
154
+ bp = malloc(sizeof(breakpoint));
155
+
156
+ bp->position = i;
157
+ bp->line = best[fclass].bp->line + 1;
158
+ bp->fitness_class = fclass;
159
+
160
+ bp->total_width = tw;
161
+ bp->total_stretch = ty;
162
+ bp->total_shrink = tz;
163
+
164
+ bp->total_demerits = best[fclass].demerits;
165
+ bp->ratio = best[fclass].ratio;
166
+
167
+ bp->previous = best[fclass].bp;
168
+ bp->link = active;
169
+
170
+ if(*p_previous_node)
171
+ (*p_previous_node)->link = bp;
172
+ else
173
+ active_nodes = bp;
174
+
175
+ *p_previous_node = bp;
176
+ }
177
+ }
178
+
179
+ void main_loop(token *stream[], int i, float tw, float ty, float tz,
180
+ float width, float threshold) {
181
+ breakpoint *active, *next_node, *previous_node;
182
+ best_breakpoint best[4];
183
+ int current_line;
184
+ float ratio;
185
+ float demerits;
186
+ int fclass;
187
+
188
+ if(active_nodes == NULL) {
189
+ /* TODO: be nicer */
190
+ printf("No feasible solution. Try relaxing threshold.");
191
+ exit(1);
192
+ }
193
+
194
+ active = active_nodes;
195
+ previous_node = NULL;
196
+ next_node = NULL;
197
+
198
+ while(active) {
199
+ best[0].demerits = INFINITY;
200
+ best[1].demerits = INFINITY;
201
+ best[2].demerits = INFINITY;
202
+ best[3].demerits = INFINITY;
203
+
204
+ while(active) {
205
+ current_line = active->line + 1;
206
+ next_node = active->link;
207
+
208
+ /* TODO: width can be replaced by a line-specific width for line j */
209
+ ratio = adjustment_ratio(tw, ty, tz, active->total_width,
210
+ active->total_stretch, active->total_shrink, width,
211
+ stream, i);
212
+
213
+ if((ratio < -1) || (is_penalty(stream[i]) &&
214
+ (stream[i]->penalty.penalty == -INFINITY))) {
215
+ /* Remove active node from the list */
216
+ if(previous_node)
217
+ previous_node->link = next_node;
218
+ else
219
+ active_nodes = next_node;
220
+ /* TODO: put active on the passive list or free? */
221
+ } else {
222
+ previous_node = active;
223
+ }
224
+
225
+ if((ratio >= -1) && (ratio <= threshold)) {
226
+ demerits = calculate_demerits(stream, active->position, stream[i],
227
+ ratio) + active->total_demerits;
228
+ fclass = fitness_class(ratio);
229
+
230
+ /* Penalize consecutive lines more than one fitness class away from
231
+ * each other. */
232
+ if(abs(fclass - active->fitness_class) > 1)
233
+ demerits += FITNESS_PENALTY;
234
+
235
+ /* Update high scores if this is a new best. */
236
+ if(demerits < best[fclass].demerits) {
237
+ best[fclass].bp = active;
238
+ best[fclass].demerits = demerits;
239
+ best[fclass].ratio = ratio;
240
+ }
241
+ }
242
+
243
+ /* Add nodes to the active list before moving to the next line. */
244
+ active = next_node;
245
+ if(!active)
246
+ break;
247
+ if(active->line >= current_line)
248
+ break;
249
+ }
250
+
251
+ /* If we found any best nodes, add them to the active list. */
252
+ concat_new_active_nodes(stream, tw, ty, tz, best, i, active,
253
+ &previous_node);
254
+
255
+ active = next_node;
256
+ }
257
+ }
258
+
259
+ breakpoint *populate_active_nodes(token *stream[], float width,
260
+ float threshold) {
261
+ breakpoint *bp, *min_node;
262
+
263
+ active_nodes = make_starting_breakpoint();
264
+ foreach_legal_breakpoint(stream, width, threshold, main_loop);
265
+
266
+ /* Find node with minimum demerits */
267
+ min_node = NULL;
268
+ for(bp = active_nodes; bp; bp = bp->link)
269
+ if(!min_node || (bp->total_demerits < min_node->total_demerits))
270
+ min_node = bp;
271
+
272
+ return min_node;
273
+ }
274
+
275
+
@@ -0,0 +1,29 @@
1
+ #ifndef _PARAGRAPH_H_
2
+ #define _PARAGRAPH_H_
3
+
4
+ #include "breakpoint.h"
5
+
6
+ void inspect_token(token *t);
7
+
8
+ float calculate_demerits(token *stream[], int old_i, token *new_item,
9
+ float r);
10
+
11
+ float adjustment_ratio(float tw, float ty, float tz,
12
+ float aw, float ay, float az,
13
+ float target_width, token *stream[], int b);
14
+
15
+ void calculate_widths(token *stream[], float *tw, float *ty, float *tz);
16
+
17
+ void foreach_legal_breakpoint(token *stream[], float width, float threshold,
18
+ void (*fn)(token **, int, float, float, float, float, float));
19
+
20
+ int fitness_class(float ratio);
21
+
22
+ void main_loop(token *stream[], int i, float tw, float ty, float tz,
23
+ float width, float threshold);
24
+
25
+ breakpoint *populate_active_nodes(token *stream[], float width,
26
+ float threshold);
27
+
28
+ #endif
29
+
@@ -0,0 +1,57 @@
1
+ #include <stdio.h>
2
+ #include <stdlib.h>
3
+ #include <math.h>
4
+ #include <string.h>
5
+
6
+ #include "tokens.h"
7
+
8
+ struct box *make_box(float width, char *content) {
9
+ int len;
10
+ struct box *t;
11
+
12
+ t = malloc(sizeof(struct box));
13
+ t->type = BOX;
14
+ t->width = width;
15
+
16
+ len = strlen(content);
17
+ t->content = malloc(len+1);
18
+ strncpy(t->content, content, len);
19
+ t->content[len] = '\0';
20
+
21
+ return t;
22
+ }
23
+
24
+ struct glue *make_glue(float width, float stretch, float shrink) {
25
+ struct glue *t = malloc(sizeof(struct glue));
26
+ t->type = GLUE;
27
+ t->width = width;
28
+ t->stretch = stretch;
29
+ t->shrink = shrink;
30
+ return t;
31
+ }
32
+
33
+ struct penalty *make_penalty(float width, float penalty, int flagged) {
34
+ struct penalty *t = malloc(sizeof(struct penalty));
35
+ t->type = PENALTY;
36
+ t->width = width;
37
+ t->penalty = penalty;
38
+ t->flagged = flagged;
39
+ return t;
40
+ }
41
+
42
+ int token_type(token *t) {
43
+ return t->box.type;
44
+ }
45
+
46
+ int is_box(token *t) {
47
+ return (t->box.type == BOX);
48
+ }
49
+
50
+ int is_penalty(token *t) {
51
+ return (t->penalty.type == PENALTY);
52
+ }
53
+
54
+ int is_glue(token *t) {
55
+ return (t->glue.type == GLUE);
56
+ }
57
+