crawdad 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +56 -0
- data/ext/crawdad/Makefile +25 -0
- data/ext/crawdad/breakpoint.h +53 -0
- data/ext/crawdad/paragraph.c +275 -0
- data/ext/crawdad/paragraph.h +29 -0
- data/ext/crawdad/tokens.c +57 -0
- data/ext/crawdad/tokens.h +41 -0
- data/lib/crawdad.rb +18 -0
- data/lib/crawdad/breakpoint.rb +82 -0
- data/lib/crawdad/compatibility.rb +12 -0
- data/lib/crawdad/ffi.rb +7 -0
- data/lib/crawdad/ffi/breakpoint_node.rb +36 -0
- data/lib/crawdad/ffi/paragraph.rb +58 -0
- data/lib/crawdad/ffi/tokens.rb +71 -0
- data/lib/crawdad/native.rb +11 -0
- data/lib/crawdad/paragraph.rb +293 -0
- data/lib/crawdad/prawn_tokenizer.rb +139 -0
- data/lib/crawdad/tokens.rb +48 -0
- metadata +75 -0
data/Rakefile
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'rake/testtask'
|
4
|
+
require 'rake/rdoctask'
|
5
|
+
require 'rake/gempackagetask'
|
6
|
+
|
7
|
+
CRAWDAD_VERSION = '0.0.1'
|
8
|
+
|
9
|
+
task :default => [:build]
|
10
|
+
|
11
|
+
task :build do
|
12
|
+
system "make -Cext/crawdad"
|
13
|
+
end
|
14
|
+
|
15
|
+
desc "Run all tests, test-spec required"
|
16
|
+
Rake::TestTask.new do |test|
|
17
|
+
test.libs << "spec"
|
18
|
+
test.test_files = Dir[ "spec/*_spec.rb" ]
|
19
|
+
test.verbose = true
|
20
|
+
end
|
21
|
+
|
22
|
+
desc "Generate documentation"
|
23
|
+
Rake::RDocTask.new do |rdoc|
|
24
|
+
rdoc.rdoc_files.include("README", "lib/")
|
25
|
+
rdoc.main = "README"
|
26
|
+
rdoc.rdoc_dir = "doc/html"
|
27
|
+
rdoc.title = "Crawdad Documentation"
|
28
|
+
end
|
29
|
+
|
30
|
+
spec = Gem::Specification.new do |spec|
|
31
|
+
spec.name = 'crawdad'
|
32
|
+
spec.version = CRAWDAD_VERSION
|
33
|
+
spec.platform = Gem::Platform::RUBY
|
34
|
+
spec.summary = "Knuth-Plass linebreaking for Ruby"
|
35
|
+
spec.files = FileList["lib/**/**/*"] + FileList["ext/crawdad/*"]
|
36
|
+
spec.require_paths << 'ext'
|
37
|
+
|
38
|
+
binaries = FileList['ext/crawdad/*.bundle', 'ext/crawdad/*.so']
|
39
|
+
spec.extensions << 'Rakefile'
|
40
|
+
spec.files += binaries.to_a
|
41
|
+
|
42
|
+
spec.has_rdoc = true
|
43
|
+
spec.rdoc_options << '--title' << 'Crawdad Documentation' << '-q'
|
44
|
+
spec.author = 'Brad Ediger'
|
45
|
+
spec.email = 'brad.ediger@madriska.com'
|
46
|
+
spec.homepage = 'http://github.com/madriska/crawdad'
|
47
|
+
spec.description = <<END_DESC
|
48
|
+
Crawdad is an implementation of Knuth-Plass linebreaking (justification)
|
49
|
+
for Ruby.
|
50
|
+
END_DESC
|
51
|
+
end
|
52
|
+
|
53
|
+
Rake::GemPackageTask.new(spec) do |pkg|
|
54
|
+
pkg.need_tar = true
|
55
|
+
end
|
56
|
+
|
@@ -0,0 +1,25 @@
|
|
1
|
+
OS:=$(shell uname | sed 's/[-_].*//')
|
2
|
+
CFLAGS=-Wall -O2 -fPIC
|
3
|
+
#CFLAGS=-Wall -fPIC -g
|
4
|
+
SHARED=-shared
|
5
|
+
SOEXT:=.so
|
6
|
+
|
7
|
+
objects = tokens.o paragraph.o
|
8
|
+
headers = tokens.h paragraph.h breakpoint.h
|
9
|
+
|
10
|
+
ifeq (${OS},Darwin)
|
11
|
+
SHARED = -dynamiclib
|
12
|
+
SOEXT:=.bundle
|
13
|
+
endif
|
14
|
+
|
15
|
+
all: crawdad$(SOEXT)
|
16
|
+
|
17
|
+
crawdad$(SOEXT): $(objects)
|
18
|
+
$(CC) $(SHARED) -o crawdad$(SOEXT) $(objects)
|
19
|
+
|
20
|
+
%.o: %.c $(headers)
|
21
|
+
$(CC) -c -o $@ $< $(CFLAGS)
|
22
|
+
|
23
|
+
clean:
|
24
|
+
rm *.o *$(SOEXT)
|
25
|
+
|
@@ -0,0 +1,53 @@
|
|
1
|
+
#ifndef _BREAKPOINT_H_
|
2
|
+
#define _BREAKPOINT_H_
|
3
|
+
|
4
|
+
typedef struct breakpoint {
|
5
|
+
int position;
|
6
|
+
int line;
|
7
|
+
int fitness_class;
|
8
|
+
|
9
|
+
float total_width;
|
10
|
+
float total_stretch;
|
11
|
+
float total_shrink;
|
12
|
+
float total_demerits;
|
13
|
+
|
14
|
+
float ratio;
|
15
|
+
|
16
|
+
struct breakpoint *previous;
|
17
|
+
struct breakpoint *link;
|
18
|
+
} breakpoint;
|
19
|
+
|
20
|
+
struct breakpoint *active_nodes;
|
21
|
+
|
22
|
+
breakpoint *make_starting_breakpoint() {
|
23
|
+
breakpoint *bp;
|
24
|
+
|
25
|
+
bp = malloc(sizeof(breakpoint));
|
26
|
+
|
27
|
+
bp->position = 0;
|
28
|
+
bp->line = 0;
|
29
|
+
bp->fitness_class = 1;
|
30
|
+
|
31
|
+
bp->total_width = 0.0;
|
32
|
+
bp->total_stretch = 0.0;
|
33
|
+
bp->total_shrink = 0.0;
|
34
|
+
bp->total_demerits = 0.0;
|
35
|
+
|
36
|
+
bp->ratio = 0.0;
|
37
|
+
|
38
|
+
bp->previous = NULL;
|
39
|
+
bp->link = NULL;
|
40
|
+
|
41
|
+
return bp;
|
42
|
+
}
|
43
|
+
|
44
|
+
/* Holds information about the best breakpoint found so far for a particular
|
45
|
+
* fitness class. */
|
46
|
+
typedef struct best_breakpoint {
|
47
|
+
breakpoint *bp;
|
48
|
+
float demerits;
|
49
|
+
float ratio;
|
50
|
+
} best_breakpoint;
|
51
|
+
|
52
|
+
#endif
|
53
|
+
|
@@ -0,0 +1,275 @@
|
|
1
|
+
#include <stdio.h>
|
2
|
+
#include <stdlib.h>
|
3
|
+
#include <math.h>
|
4
|
+
|
5
|
+
#include "tokens.h"
|
6
|
+
#include "paragraph.h"
|
7
|
+
#include "breakpoint.h"
|
8
|
+
|
9
|
+
#define FLAGGED_PENALTY 3000
|
10
|
+
#define FITNESS_PENALTY 100
|
11
|
+
|
12
|
+
#define GAMMA INFINITY
|
13
|
+
|
14
|
+
void inspect_token(token *t) {
|
15
|
+
printf("(0x%02lX) ", (unsigned long)t);
|
16
|
+
switch(t->box.type){
|
17
|
+
case BOX:
|
18
|
+
printf("BOX %f \"%s\"\n", t->box.width, t->box.content);
|
19
|
+
break;
|
20
|
+
case GLUE:
|
21
|
+
printf("GLUE %f %f %f\n", t->glue.width, t->glue.stretch,
|
22
|
+
t->glue.shrink);
|
23
|
+
break;
|
24
|
+
case PENALTY:
|
25
|
+
printf("PENALTY %f %f %s\n", t->penalty.penalty, t->penalty.width,
|
26
|
+
(t->penalty.flagged ? "F" : "-"));
|
27
|
+
break;
|
28
|
+
default:
|
29
|
+
printf("UNKNOWN %d\n", t->box.type);
|
30
|
+
}
|
31
|
+
}
|
32
|
+
|
33
|
+
float calculate_demerits(token *stream[], int old_i, token *new_item,
|
34
|
+
float r) {
|
35
|
+
token *old_item = stream[old_i];
|
36
|
+
float d;
|
37
|
+
|
38
|
+
if((new_item->penalty.type == PENALTY) &&
|
39
|
+
(new_item->penalty.penalty >= 0)) {
|
40
|
+
d = pow(1 + 100*(pow(abs(r), 3) + new_item->penalty.penalty), 2);
|
41
|
+
} else if((new_item->penalty.type == PENALTY) &&
|
42
|
+
(new_item->penalty.penalty != -INFINITY)) {
|
43
|
+
d = pow((1 + 100*(pow(abs(r), 3))), 2) -
|
44
|
+
pow(new_item->penalty.penalty, 2);
|
45
|
+
} else {
|
46
|
+
d = pow(1 + 100*(pow(abs(r), 3)), 2);
|
47
|
+
}
|
48
|
+
|
49
|
+
if(old_item->penalty.type == PENALTY && old_item->penalty.flagged &&
|
50
|
+
new_item->penalty.type == PENALTY && new_item->penalty.flagged)
|
51
|
+
d += FLAGGED_PENALTY;
|
52
|
+
|
53
|
+
return d;
|
54
|
+
}
|
55
|
+
|
56
|
+
float adjustment_ratio(float tw, float ty, float tz,
|
57
|
+
float aw, float ay, float az,
|
58
|
+
float target_width, token *stream[], int b) {
|
59
|
+
float w, y, z; /* w=width y=stretch z=shrink */
|
60
|
+
token *item_b = stream[b];
|
61
|
+
|
62
|
+
w = tw - aw; /* Non-adjusted width of the line. */
|
63
|
+
|
64
|
+
/* Add the penalty width (hyphen) if we are breaking at a penalty. */
|
65
|
+
if(item_b->penalty.type == PENALTY)
|
66
|
+
w += item_b->penalty.width;
|
67
|
+
|
68
|
+
if(w < target_width) {
|
69
|
+
y = ty - ay;
|
70
|
+
return (y > 0) ? (target_width - w) / y : INFINITY;
|
71
|
+
} else if(w > target_width) {
|
72
|
+
z = tz - az;
|
73
|
+
return (z > 0) ? (target_width - w) / z : INFINITY;
|
74
|
+
} else {
|
75
|
+
return 0.0;
|
76
|
+
}
|
77
|
+
}
|
78
|
+
|
79
|
+
void calculate_widths(token *stream[], float *tw, float *ty, float *tz){
|
80
|
+
int i;
|
81
|
+
token *p;
|
82
|
+
for(i=0; (p = stream[i]); i++) {
|
83
|
+
switch(p->box.type) {
|
84
|
+
case BOX:
|
85
|
+
return;
|
86
|
+
case GLUE:
|
87
|
+
*tw += p->glue.width;
|
88
|
+
*ty += p->glue.stretch;
|
89
|
+
*tz += p->glue.shrink;
|
90
|
+
break;
|
91
|
+
case PENALTY:
|
92
|
+
if((p->penalty.penalty == -INFINITY) && (i > 0))
|
93
|
+
return;
|
94
|
+
}
|
95
|
+
}
|
96
|
+
}
|
97
|
+
|
98
|
+
void foreach_legal_breakpoint(token *stream[], float width, float threshold,
|
99
|
+
void (*fn)(token **, int, float, float, float, float, float)) {
|
100
|
+
float tw=0, ty=0, tz=0;
|
101
|
+
int i;
|
102
|
+
token *t;
|
103
|
+
|
104
|
+
for(i=0; (t = stream[i]); i++) {
|
105
|
+
switch(t->box.type) {
|
106
|
+
case BOX:
|
107
|
+
tw += t->box.width;
|
108
|
+
break;
|
109
|
+
case GLUE:
|
110
|
+
if(stream[i-1]->box.type == BOX)
|
111
|
+
fn(stream, i, tw, ty, tz, width, threshold);
|
112
|
+
tw += t->glue.width;
|
113
|
+
ty += t->glue.stretch;
|
114
|
+
tz += t->glue.shrink;
|
115
|
+
break;
|
116
|
+
case PENALTY:
|
117
|
+
if(t->penalty.penalty != INFINITY)
|
118
|
+
fn(stream, i, tw, ty, tz, width, threshold);
|
119
|
+
break;
|
120
|
+
}
|
121
|
+
}
|
122
|
+
}
|
123
|
+
|
124
|
+
int fitness_class(float ratio) {
|
125
|
+
if(ratio < -0.5)
|
126
|
+
return 0;
|
127
|
+
if(ratio < 0.5)
|
128
|
+
return 1;
|
129
|
+
if(ratio < 1)
|
130
|
+
return 2;
|
131
|
+
return 3;
|
132
|
+
}
|
133
|
+
|
134
|
+
void concat_new_active_nodes(token *stream[], float total_width, float
|
135
|
+
total_stretch, float total_shrink, best_breakpoint best[4], int i,
|
136
|
+
breakpoint *active, breakpoint **p_previous_node) {
|
137
|
+
float lowest_demerits = INFINITY;
|
138
|
+
float tw = total_width, ty = total_stretch, tz = total_shrink;
|
139
|
+
int fclass;
|
140
|
+
breakpoint *bp;
|
141
|
+
|
142
|
+
for(fclass=0; fclass<4; fclass++)
|
143
|
+
if(best[fclass].demerits < lowest_demerits)
|
144
|
+
lowest_demerits = best[fclass].demerits;
|
145
|
+
|
146
|
+
calculate_widths(stream + i, &tw, &ty, &tz);
|
147
|
+
|
148
|
+
for(fclass=0; fclass<4; fclass++) {
|
149
|
+
if((best[fclass].demerits == INFINITY) ||
|
150
|
+
(best[fclass].demerits > lowest_demerits + GAMMA))
|
151
|
+
continue;
|
152
|
+
|
153
|
+
/* Create and activate node */
|
154
|
+
bp = malloc(sizeof(breakpoint));
|
155
|
+
|
156
|
+
bp->position = i;
|
157
|
+
bp->line = best[fclass].bp->line + 1;
|
158
|
+
bp->fitness_class = fclass;
|
159
|
+
|
160
|
+
bp->total_width = tw;
|
161
|
+
bp->total_stretch = ty;
|
162
|
+
bp->total_shrink = tz;
|
163
|
+
|
164
|
+
bp->total_demerits = best[fclass].demerits;
|
165
|
+
bp->ratio = best[fclass].ratio;
|
166
|
+
|
167
|
+
bp->previous = best[fclass].bp;
|
168
|
+
bp->link = active;
|
169
|
+
|
170
|
+
if(*p_previous_node)
|
171
|
+
(*p_previous_node)->link = bp;
|
172
|
+
else
|
173
|
+
active_nodes = bp;
|
174
|
+
|
175
|
+
*p_previous_node = bp;
|
176
|
+
}
|
177
|
+
}
|
178
|
+
|
179
|
+
void main_loop(token *stream[], int i, float tw, float ty, float tz,
|
180
|
+
float width, float threshold) {
|
181
|
+
breakpoint *active, *next_node, *previous_node;
|
182
|
+
best_breakpoint best[4];
|
183
|
+
int current_line;
|
184
|
+
float ratio;
|
185
|
+
float demerits;
|
186
|
+
int fclass;
|
187
|
+
|
188
|
+
if(active_nodes == NULL) {
|
189
|
+
/* TODO: be nicer */
|
190
|
+
printf("No feasible solution. Try relaxing threshold.");
|
191
|
+
exit(1);
|
192
|
+
}
|
193
|
+
|
194
|
+
active = active_nodes;
|
195
|
+
previous_node = NULL;
|
196
|
+
next_node = NULL;
|
197
|
+
|
198
|
+
while(active) {
|
199
|
+
best[0].demerits = INFINITY;
|
200
|
+
best[1].demerits = INFINITY;
|
201
|
+
best[2].demerits = INFINITY;
|
202
|
+
best[3].demerits = INFINITY;
|
203
|
+
|
204
|
+
while(active) {
|
205
|
+
current_line = active->line + 1;
|
206
|
+
next_node = active->link;
|
207
|
+
|
208
|
+
/* TODO: width can be replaced by a line-specific width for line j */
|
209
|
+
ratio = adjustment_ratio(tw, ty, tz, active->total_width,
|
210
|
+
active->total_stretch, active->total_shrink, width,
|
211
|
+
stream, i);
|
212
|
+
|
213
|
+
if((ratio < -1) || (is_penalty(stream[i]) &&
|
214
|
+
(stream[i]->penalty.penalty == -INFINITY))) {
|
215
|
+
/* Remove active node from the list */
|
216
|
+
if(previous_node)
|
217
|
+
previous_node->link = next_node;
|
218
|
+
else
|
219
|
+
active_nodes = next_node;
|
220
|
+
/* TODO: put active on the passive list or free? */
|
221
|
+
} else {
|
222
|
+
previous_node = active;
|
223
|
+
}
|
224
|
+
|
225
|
+
if((ratio >= -1) && (ratio <= threshold)) {
|
226
|
+
demerits = calculate_demerits(stream, active->position, stream[i],
|
227
|
+
ratio) + active->total_demerits;
|
228
|
+
fclass = fitness_class(ratio);
|
229
|
+
|
230
|
+
/* Penalize consecutive lines more than one fitness class away from
|
231
|
+
* each other. */
|
232
|
+
if(abs(fclass - active->fitness_class) > 1)
|
233
|
+
demerits += FITNESS_PENALTY;
|
234
|
+
|
235
|
+
/* Update high scores if this is a new best. */
|
236
|
+
if(demerits < best[fclass].demerits) {
|
237
|
+
best[fclass].bp = active;
|
238
|
+
best[fclass].demerits = demerits;
|
239
|
+
best[fclass].ratio = ratio;
|
240
|
+
}
|
241
|
+
}
|
242
|
+
|
243
|
+
/* Add nodes to the active list before moving to the next line. */
|
244
|
+
active = next_node;
|
245
|
+
if(!active)
|
246
|
+
break;
|
247
|
+
if(active->line >= current_line)
|
248
|
+
break;
|
249
|
+
}
|
250
|
+
|
251
|
+
/* If we found any best nodes, add them to the active list. */
|
252
|
+
concat_new_active_nodes(stream, tw, ty, tz, best, i, active,
|
253
|
+
&previous_node);
|
254
|
+
|
255
|
+
active = next_node;
|
256
|
+
}
|
257
|
+
}
|
258
|
+
|
259
|
+
breakpoint *populate_active_nodes(token *stream[], float width,
|
260
|
+
float threshold) {
|
261
|
+
breakpoint *bp, *min_node;
|
262
|
+
|
263
|
+
active_nodes = make_starting_breakpoint();
|
264
|
+
foreach_legal_breakpoint(stream, width, threshold, main_loop);
|
265
|
+
|
266
|
+
/* Find node with minimum demerits */
|
267
|
+
min_node = NULL;
|
268
|
+
for(bp = active_nodes; bp; bp = bp->link)
|
269
|
+
if(!min_node || (bp->total_demerits < min_node->total_demerits))
|
270
|
+
min_node = bp;
|
271
|
+
|
272
|
+
return min_node;
|
273
|
+
}
|
274
|
+
|
275
|
+
|
@@ -0,0 +1,29 @@
|
|
1
|
+
#ifndef _PARAGRAPH_H_
|
2
|
+
#define _PARAGRAPH_H_
|
3
|
+
|
4
|
+
#include "breakpoint.h"
|
5
|
+
|
6
|
+
void inspect_token(token *t);
|
7
|
+
|
8
|
+
float calculate_demerits(token *stream[], int old_i, token *new_item,
|
9
|
+
float r);
|
10
|
+
|
11
|
+
float adjustment_ratio(float tw, float ty, float tz,
|
12
|
+
float aw, float ay, float az,
|
13
|
+
float target_width, token *stream[], int b);
|
14
|
+
|
15
|
+
void calculate_widths(token *stream[], float *tw, float *ty, float *tz);
|
16
|
+
|
17
|
+
void foreach_legal_breakpoint(token *stream[], float width, float threshold,
|
18
|
+
void (*fn)(token **, int, float, float, float, float, float));
|
19
|
+
|
20
|
+
int fitness_class(float ratio);
|
21
|
+
|
22
|
+
void main_loop(token *stream[], int i, float tw, float ty, float tz,
|
23
|
+
float width, float threshold);
|
24
|
+
|
25
|
+
breakpoint *populate_active_nodes(token *stream[], float width,
|
26
|
+
float threshold);
|
27
|
+
|
28
|
+
#endif
|
29
|
+
|
@@ -0,0 +1,57 @@
|
|
1
|
+
#include <stdio.h>
|
2
|
+
#include <stdlib.h>
|
3
|
+
#include <math.h>
|
4
|
+
#include <string.h>
|
5
|
+
|
6
|
+
#include "tokens.h"
|
7
|
+
|
8
|
+
struct box *make_box(float width, char *content) {
|
9
|
+
int len;
|
10
|
+
struct box *t;
|
11
|
+
|
12
|
+
t = malloc(sizeof(struct box));
|
13
|
+
t->type = BOX;
|
14
|
+
t->width = width;
|
15
|
+
|
16
|
+
len = strlen(content);
|
17
|
+
t->content = malloc(len+1);
|
18
|
+
strncpy(t->content, content, len);
|
19
|
+
t->content[len] = '\0';
|
20
|
+
|
21
|
+
return t;
|
22
|
+
}
|
23
|
+
|
24
|
+
struct glue *make_glue(float width, float stretch, float shrink) {
|
25
|
+
struct glue *t = malloc(sizeof(struct glue));
|
26
|
+
t->type = GLUE;
|
27
|
+
t->width = width;
|
28
|
+
t->stretch = stretch;
|
29
|
+
t->shrink = shrink;
|
30
|
+
return t;
|
31
|
+
}
|
32
|
+
|
33
|
+
struct penalty *make_penalty(float width, float penalty, int flagged) {
|
34
|
+
struct penalty *t = malloc(sizeof(struct penalty));
|
35
|
+
t->type = PENALTY;
|
36
|
+
t->width = width;
|
37
|
+
t->penalty = penalty;
|
38
|
+
t->flagged = flagged;
|
39
|
+
return t;
|
40
|
+
}
|
41
|
+
|
42
|
+
int token_type(token *t) {
|
43
|
+
return t->box.type;
|
44
|
+
}
|
45
|
+
|
46
|
+
int is_box(token *t) {
|
47
|
+
return (t->box.type == BOX);
|
48
|
+
}
|
49
|
+
|
50
|
+
int is_penalty(token *t) {
|
51
|
+
return (t->penalty.type == PENALTY);
|
52
|
+
}
|
53
|
+
|
54
|
+
int is_glue(token *t) {
|
55
|
+
return (t->glue.type == GLUE);
|
56
|
+
}
|
57
|
+
|