crawdad 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +13 -0
- data/README.markdown +61 -0
- data/Rakefile +4 -26
- data/crawdad.gemspec +24 -0
- data/ext/crawdad/Makefile +1 -1
- data/ext/crawdad/crawdad.bundle +0 -0
- data/ext/crawdad/paragraph.c +1 -1
- data/ext/crawdad/paragraph.o +0 -0
- data/ext/crawdad/tokens.o +0 -0
- data/lib/crawdad/ffi/tokens.rb +13 -0
- data/lib/crawdad/prawn_tokenizer.rb +100 -45
- metadata +31 -18
data/CHANGELOG
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
= Version 0.1.0 (April 22, 2010)
|
2
|
+
|
3
|
+
* Add support for centered, ragged-right, and ragged-left text.
|
4
|
+
|
5
|
+
* Work around an issue with hyphenating some words like "to" using
|
6
|
+
text-hyphen.
|
7
|
+
|
8
|
+
* Fix segfault when a token stream starts with glue.
|
9
|
+
|
10
|
+
|
11
|
+
= Version 0.0.1 (March 19, 2010)
|
12
|
+
|
13
|
+
* Initial release.
|
data/README.markdown
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
# Crawdad: Knuth-Plass linebreaking for Ruby
|
2
|
+
|
3
|
+
Crawdad is a basic implementation of the Knuth-Plass linebreaking
|
4
|
+
algorithm for breaking paragraphs of text into lines. It uses a total-fit
|
5
|
+
method to ensure consistency across the entire paragraph at once. The
|
6
|
+
algorithm is a simplified version of the one used in the TeX typesetting
|
7
|
+
system.
|
8
|
+
|
9
|
+
## References
|
10
|
+
|
11
|
+
The canonical reference for this algorithm is "Breaking Paragraphs Into
|
12
|
+
Lines", by Donald E. Knuth and Michael F. Plass, originally published in
|
13
|
+
Software Practice and Experience 11 (1981), pp. 1119-1184. It is reprinted
|
14
|
+
in the excellent monograph Digital Typography (Knuth 1999).
|
15
|
+
|
16
|
+
This implementation was inspired by Bram Stein's TypeSet project, an
|
17
|
+
implementation of Knuth-Plass in Javascript that uses HTML5 Canvas.
|
18
|
+
|
19
|
+
http://www.bramstein.com/projects/typeset/
|
20
|
+
|
21
|
+
## To Do
|
22
|
+
|
23
|
+
### Short-Term
|
24
|
+
|
25
|
+
* Collect all of our constants and magic numbers and expose them somewhat to
|
26
|
+
the user. Survey other software (TeX?) to harvest good starting values for
|
27
|
+
these parameters.
|
28
|
+
|
29
|
+
* Fix the demerits calculation to the TeX "improved" formula (Digital
|
30
|
+
Typography p. 154). Thanks to Bram Stein for pointing this out.
|
31
|
+
|
32
|
+
* Implement the looseness parameter q (algorithm "Choose the appropriate
|
33
|
+
active node", Digital Typography p. 120).
|
34
|
+
|
35
|
+
### Long-Term
|
36
|
+
|
37
|
+
* Bring this into Prawn, and integrate (if possible) with the Text::Box API.
|
38
|
+
|
39
|
+
* The tokenizer could be smarter; it should recognize more than just
|
40
|
+
low-ASCII hyphens as hyphens / dashes, and it can get confused when
|
41
|
+
whitespace and hyphens interact.
|
42
|
+
|
43
|
+
* Automatically relax the thresholds when the constraints cannot be
|
44
|
+
satisfied? Or we could look into TeX's two-pass method (pp. 121-122).
|
45
|
+
|
46
|
+
## Acknowledgements
|
47
|
+
|
48
|
+
Thanks are due to the following individuals:
|
49
|
+
|
50
|
+
* Donald Knuth and Michael Plass created the original algorithm and data
|
51
|
+
structures.
|
52
|
+
|
53
|
+
* Bram Stein wrote the aforementioned Javascript implementation of the
|
54
|
+
Knuth-Plass algorithm. His code was helpful in exposing some of the darker
|
55
|
+
corners of the original authors' description of the method.
|
56
|
+
|
57
|
+
## License
|
58
|
+
|
59
|
+
Crawdad is copyrighted free software, written by Brad Ediger. See the
|
60
|
+
LICENSE file for details.
|
61
|
+
|
data/Rakefile
CHANGED
@@ -4,10 +4,10 @@ require 'rake/testtask'
|
|
4
4
|
require 'rake/rdoctask'
|
5
5
|
require 'rake/gempackagetask'
|
6
6
|
|
7
|
-
|
8
|
-
|
7
|
+
# Build must be the default task, to fake out using a Makefile to build a
|
8
|
+
# non-Ruby extension with Rubygems. There's probably an easier way, but I can't
|
9
|
+
# find it.
|
9
10
|
task :default => [:build]
|
10
|
-
|
11
11
|
task :build do
|
12
12
|
system "make -Cext/crawdad"
|
13
13
|
end
|
@@ -27,29 +27,7 @@ Rake::RDocTask.new do |rdoc|
|
|
27
27
|
rdoc.title = "Crawdad Documentation"
|
28
28
|
end
|
29
29
|
|
30
|
-
spec = Gem::Specification.
|
31
|
-
spec.name = 'crawdad'
|
32
|
-
spec.version = CRAWDAD_VERSION
|
33
|
-
spec.platform = Gem::Platform::RUBY
|
34
|
-
spec.summary = "Knuth-Plass linebreaking for Ruby"
|
35
|
-
spec.files = FileList["lib/**/**/*"] + FileList["ext/crawdad/*"]
|
36
|
-
spec.require_paths << 'ext'
|
37
|
-
|
38
|
-
binaries = FileList['ext/crawdad/*.bundle', 'ext/crawdad/*.so']
|
39
|
-
spec.extensions << 'Rakefile'
|
40
|
-
spec.files += binaries.to_a
|
41
|
-
|
42
|
-
spec.has_rdoc = true
|
43
|
-
spec.rdoc_options << '--title' << 'Crawdad Documentation' << '-q'
|
44
|
-
spec.author = 'Brad Ediger'
|
45
|
-
spec.email = 'brad.ediger@madriska.com'
|
46
|
-
spec.homepage = 'http://github.com/madriska/crawdad'
|
47
|
-
spec.description = <<END_DESC
|
48
|
-
Crawdad is an implementation of Knuth-Plass linebreaking (justification)
|
49
|
-
for Ruby.
|
50
|
-
END_DESC
|
51
|
-
end
|
52
|
-
|
30
|
+
spec = Gem::Specification.load("crawdad.gemspec")
|
53
31
|
Rake::GemPackageTask.new(spec) do |pkg|
|
54
32
|
pkg.need_tar = true
|
55
33
|
end
|
data/crawdad.gemspec
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
Gem::Specification.new do |spec|
|
2
|
+
spec.name = 'crawdad'
|
3
|
+
spec.version = '0.1.0'
|
4
|
+
spec.platform = Gem::Platform::RUBY
|
5
|
+
spec.summary = "Knuth-Plass linebreaking for Ruby"
|
6
|
+
spec.files = FileList["lib/**/**/*", "ext/crawdad/*", "README.markdown",
|
7
|
+
"crawdad.gemspec", "CHANGELOG"]
|
8
|
+
spec.require_paths << 'ext'
|
9
|
+
|
10
|
+
binaries = FileList['ext/crawdad/*.bundle', 'ext/crawdad/*.so']
|
11
|
+
spec.extensions << 'Rakefile'
|
12
|
+
spec.files += binaries.to_a
|
13
|
+
|
14
|
+
spec.has_rdoc = true
|
15
|
+
spec.rdoc_options << '--title' << 'Crawdad Documentation' << '-q'
|
16
|
+
spec.author = 'Brad Ediger'
|
17
|
+
spec.email = 'brad.ediger@madriska.com'
|
18
|
+
spec.homepage = 'http://github.com/madriska/crawdad'
|
19
|
+
spec.description = <<END_DESC
|
20
|
+
Crawdad is an implementation of Knuth-Plass linebreaking (justification)
|
21
|
+
for Ruby.
|
22
|
+
END_DESC
|
23
|
+
end
|
24
|
+
|
data/ext/crawdad/Makefile
CHANGED
Binary file
|
data/ext/crawdad/paragraph.c
CHANGED
@@ -107,7 +107,7 @@ void foreach_legal_breakpoint(token *stream[], float width, float threshold,
|
|
107
107
|
tw += t->box.width;
|
108
108
|
break;
|
109
109
|
case GLUE:
|
110
|
-
if(stream[i-1]->box.type == BOX)
|
110
|
+
if((i > 0) && (stream[i-1]->box.type == BOX))
|
111
111
|
fn(stream, i, tw, ty, tz, width, threshold);
|
112
112
|
tw += t->glue.width;
|
113
113
|
ty += t->glue.stretch;
|
Binary file
|
Binary file
|
data/lib/crawdad/ffi/tokens.rb
CHANGED
@@ -14,6 +14,10 @@ module Crawdad
|
|
14
14
|
layout :type, Type,
|
15
15
|
:width, :float,
|
16
16
|
:content, :string
|
17
|
+
|
18
|
+
def inspect
|
19
|
+
"(box %.2f %s)" % [self[:width], self[:content].inspect]
|
20
|
+
end
|
17
21
|
end
|
18
22
|
|
19
23
|
def box(width, content)
|
@@ -29,6 +33,10 @@ module Crawdad
|
|
29
33
|
:width, :float,
|
30
34
|
:stretch, :float,
|
31
35
|
:shrink, :float
|
36
|
+
|
37
|
+
def inspect
|
38
|
+
"(glue %.2f %.2f %.2f)" % [self[:width], self[:stretch], self[:shrink]]
|
39
|
+
end
|
32
40
|
end
|
33
41
|
|
34
42
|
def glue(width, stretch, shrink)
|
@@ -48,6 +56,11 @@ module Crawdad
|
|
48
56
|
:width, :float,
|
49
57
|
:penalty, :float,
|
50
58
|
:flagged, :int
|
59
|
+
|
60
|
+
def inspect
|
61
|
+
"(penalty %.2f %.2f#{" F" if self[:flagged] == 1})" %
|
62
|
+
[self[:penalty], self[:width]]
|
63
|
+
end
|
51
64
|
end
|
52
65
|
|
53
66
|
def penalty(penalty, width=0.0, flagged=false)
|
@@ -36,7 +36,15 @@ module Crawdad
|
|
36
36
|
# number of PDF points.
|
37
37
|
#
|
38
38
|
def paragraph(text, options={})
|
39
|
+
@align = options[:align] || :justify
|
40
|
+
|
39
41
|
hyphenator = if options[:hyphenation]
|
42
|
+
# Box-glue-penalty model does not easily permit optional hyphenation
|
43
|
+
# with the construction we use for centered text.
|
44
|
+
if @align == :center
|
45
|
+
raise ArgumentError, "Hyphenation is not supported with centered text"
|
46
|
+
end
|
47
|
+
|
40
48
|
begin
|
41
49
|
gem 'text-hyphen'
|
42
50
|
require 'text/hyphen'
|
@@ -49,78 +57,125 @@ module Crawdad
|
|
49
57
|
@hyphenators[language] ||= Text::Hyphen.new(:language => language)
|
50
58
|
end
|
51
59
|
|
52
|
-
stream = []
|
53
|
-
|
54
|
-
if w = options[:indent]
|
55
|
-
stream << box(w, "")
|
56
|
-
end
|
57
|
-
|
58
|
-
# Interword glue can stretch by half and shrink by a third.
|
59
|
-
# TODO: optimal stretch/shrink ratios
|
60
|
-
space_width = @pdf.width_of(" ")
|
61
|
-
interword_glue = glue(space_width,
|
62
|
-
space_width / 2.0,
|
63
|
-
space_width / 3.0)
|
64
|
-
|
65
|
-
# TODO: optimal values for sentence space w/y/z
|
66
|
-
sentence_space_width = space_width * 1.5
|
67
|
-
sentence_glue = glue(sentence_space_width,
|
68
|
-
sentence_space_width / 2.0,
|
69
|
-
sentence_space_width / 3.0)
|
60
|
+
stream = starting_tokens(options[:indent])
|
70
61
|
|
71
62
|
# Break paragraph on whitespace.
|
72
63
|
# TODO: how should "battle-\nfield" be tokenized?
|
73
|
-
text.strip.split(/\s+/)
|
64
|
+
words = text.strip.split(/\s+/)
|
65
|
+
|
66
|
+
words.each_with_index do |word, i|
|
74
67
|
w = StringScanner.new(word)
|
75
68
|
|
76
69
|
# For hyphenated words, follow each hyphen by a zero-width flagged
|
77
70
|
# penalty.
|
78
|
-
# TODO: recognize dashes in all their variants
|
79
71
|
while seg = w.scan(/[^-]+-/) # "night-time" --> "<<night->>time"
|
80
|
-
stream
|
72
|
+
stream += word_segment(seg, hyphenator)
|
81
73
|
end
|
82
74
|
|
83
|
-
stream
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
if w.rest =~ /\.$/
|
88
|
-
stream << sentence_glue
|
89
|
-
else
|
90
|
-
stream << interword_glue
|
75
|
+
stream += word_segment(w.rest, hyphenator)
|
76
|
+
|
77
|
+
unless i == words.length - 1
|
78
|
+
stream += interword_tokens
|
91
79
|
end
|
92
80
|
end
|
93
81
|
|
94
|
-
#
|
95
|
-
stream
|
96
|
-
|
97
|
-
# Finish paragraph with a penalty inhibiting a break, finishing glue (to
|
98
|
-
# pad out the last line), and a forced break to finish the paragraph.
|
99
|
-
stream << penalty(Infinity)
|
100
|
-
stream << glue(0, Infinity, 0)
|
101
|
-
stream << penalty(-Infinity)
|
82
|
+
# Add needed tokens to finish off the paragraph.
|
83
|
+
stream += finishing_tokens
|
102
84
|
|
103
85
|
stream
|
104
86
|
end
|
105
87
|
|
106
88
|
protected
|
107
89
|
|
108
|
-
#
|
109
|
-
#
|
110
|
-
|
90
|
+
# Width of one space.
|
91
|
+
#
|
92
|
+
def space
|
93
|
+
@space ||= @pdf.width_of(" ")
|
94
|
+
end
|
95
|
+
|
96
|
+
# Tokens used to start a paragraph. Accepts one argument, +indent_width+,
|
97
|
+
# the amount by which to indent the first line, which only really makes
|
98
|
+
# sense for justified or ragged-left text.
|
99
|
+
#
|
100
|
+
def starting_tokens(indent_width)
|
101
|
+
if @align == :center
|
102
|
+
[box(0, ""), glue(0, 3*space, 0)]
|
103
|
+
elsif indent_width
|
104
|
+
[box(w, "")]
|
105
|
+
else
|
106
|
+
[]
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
# Tokens used between words in a sentence. This depends on @align; the
|
111
|
+
# box-glue-penalty model is flexible enough to accommodate ragged (right or
|
112
|
+
# left), centered, or justified text.
|
111
113
|
#
|
112
|
-
|
114
|
+
# See Digital Typography pp. 93-95 for details.
|
115
|
+
#
|
116
|
+
def interword_tokens
|
117
|
+
case @align
|
118
|
+
when :justify
|
119
|
+
[glue(space, space / 2.0, space / 3.0)]
|
120
|
+
when :center
|
121
|
+
[glue(0, 3*space, 0), penalty(0), glue(space, -6*space, 0), box(0, ""),
|
122
|
+
penalty(Infinity), glue(0, 3*space, 0)]
|
123
|
+
else # :right, :left
|
124
|
+
[glue(0, 3*space, 0), penalty(0), glue(space, -3*space, 0)]
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
# Tokens representing a possible hyphenation point.
|
129
|
+
#
|
130
|
+
def optional_hyphen
|
131
|
+
hyphen = @pdf.width_of('-')
|
132
|
+
|
133
|
+
if @align == :justify
|
134
|
+
[penalty(50, hyphen, true)]
|
135
|
+
else # :left or :right (:center is incompatible with hyphenation)
|
136
|
+
# Hyphens cost 10 times more in unjustified text because we can usually
|
137
|
+
# do better to avoid them.
|
138
|
+
[penalty(Infinity), glue(0, 3*space, 0), penalty(500, hyphen, true),
|
139
|
+
glue(0, -3*space, 0)]
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
# Tokens to finish out a paragraph -- pad out the last line if needed, and
|
144
|
+
# force a break.
|
145
|
+
#
|
146
|
+
def finishing_tokens
|
147
|
+
if @align == :center
|
148
|
+
[glue(0, 3*space, 0), penalty(-Infinity)]
|
149
|
+
else
|
150
|
+
[penalty(Infinity), glue(0, Infinity, 0), penalty(-Infinity)]
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
# Returns tokens representing the given word. Hyphenates using the given
|
155
|
+
# +hyphenator+, if provided. Appends a zero-width flagged penalty if the
|
156
|
+
# given word ends in a hyphen.
|
157
|
+
#
|
158
|
+
def word_segment(word, hyphenator)
|
113
159
|
tokens = []
|
114
160
|
|
115
161
|
if hyphenator
|
116
|
-
|
162
|
+
begin
|
163
|
+
splits = hyphenator.hyphenate(word)
|
164
|
+
rescue NoMethodError => e
|
165
|
+
if e.message =~ /each_with_index/
|
166
|
+
# known issue wth text-hyphen 1.0.0:
|
167
|
+
# http://rubyforge.org/tracker/index.php?func=detail&aid=28128&group_id=294&atid=1195
|
168
|
+
splits = []
|
169
|
+
else
|
170
|
+
raise
|
171
|
+
end
|
172
|
+
end
|
117
173
|
|
118
|
-
|
119
|
-
# For each hyphenated segment, add the box with an optional penalty.
|
174
|
+
# For each hyphenated segment, add the box with an optional hyphen.
|
120
175
|
[0, *splits].each_cons(2) do |a, b|
|
121
176
|
seg = word[a...b]
|
122
177
|
tokens << box(@pdf.width_of(seg), seg)
|
123
|
-
tokens
|
178
|
+
tokens += optional_hyphen
|
124
179
|
end
|
125
180
|
|
126
181
|
last = word[(splits.last || 0)..-1]
|
metadata
CHANGED
@@ -1,7 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: crawdad
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 1
|
8
|
+
- 0
|
9
|
+
version: 0.1.0
|
5
10
|
platform: ruby
|
6
11
|
authors:
|
7
12
|
- Brad Ediger
|
@@ -9,11 +14,11 @@ autorequire:
|
|
9
14
|
bindir: bin
|
10
15
|
cert_chain: []
|
11
16
|
|
12
|
-
date: 2010-
|
17
|
+
date: 2010-04-22 00:00:00 -05:00
|
13
18
|
default_executable:
|
14
19
|
dependencies: []
|
15
20
|
|
16
|
-
description: Crawdad is an implementation of Knuth-Plass linebreaking (justification)
|
21
|
+
description: " Crawdad is an implementation of Knuth-Plass linebreaking (justification)\n for Ruby.\n"
|
17
22
|
email: brad.ediger@madriska.com
|
18
23
|
executables: []
|
19
24
|
|
@@ -22,28 +27,34 @@ extensions:
|
|
22
27
|
extra_rdoc_files: []
|
23
28
|
|
24
29
|
files:
|
25
|
-
- lib/crawdad
|
26
|
-
- lib/crawdad/prawn_tokenizer.rb
|
27
30
|
- lib/crawdad/breakpoint.rb
|
28
|
-
- lib/crawdad/native.rb
|
29
|
-
- lib/crawdad/ffi.rb
|
30
|
-
- lib/crawdad/paragraph.rb
|
31
31
|
- lib/crawdad/compatibility.rb
|
32
|
-
- lib/crawdad/
|
33
|
-
- lib/crawdad/ffi
|
32
|
+
- lib/crawdad/ffi/breakpoint_node.rb
|
34
33
|
- lib/crawdad/ffi/paragraph.rb
|
35
34
|
- lib/crawdad/ffi/tokens.rb
|
36
|
-
- lib/crawdad/ffi
|
35
|
+
- lib/crawdad/ffi.rb
|
36
|
+
- lib/crawdad/native.rb
|
37
|
+
- lib/crawdad/paragraph.rb
|
38
|
+
- lib/crawdad/prawn_tokenizer.rb
|
39
|
+
- lib/crawdad/tokens.rb
|
37
40
|
- lib/crawdad.rb
|
38
41
|
- ext/crawdad/breakpoint.h
|
39
|
-
- ext/crawdad/
|
40
|
-
- ext/crawdad/paragraph.h
|
42
|
+
- ext/crawdad/crawdad.bundle
|
41
43
|
- ext/crawdad/Makefile
|
42
|
-
- ext/crawdad/tokens.h
|
43
44
|
- ext/crawdad/paragraph.c
|
45
|
+
- ext/crawdad/paragraph.h
|
46
|
+
- ext/crawdad/paragraph.o
|
47
|
+
- ext/crawdad/tokens.c
|
48
|
+
- ext/crawdad/tokens.h
|
49
|
+
- ext/crawdad/tokens.o
|
50
|
+
- README.markdown
|
51
|
+
- crawdad.gemspec
|
52
|
+
- CHANGELOG
|
44
53
|
- Rakefile
|
45
54
|
has_rdoc: true
|
46
55
|
homepage: http://github.com/madriska/crawdad
|
56
|
+
licenses: []
|
57
|
+
|
47
58
|
post_install_message:
|
48
59
|
rdoc_options:
|
49
60
|
- --title
|
@@ -56,20 +67,22 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
56
67
|
requirements:
|
57
68
|
- - ">="
|
58
69
|
- !ruby/object:Gem::Version
|
70
|
+
segments:
|
71
|
+
- 0
|
59
72
|
version: "0"
|
60
|
-
version:
|
61
73
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
62
74
|
requirements:
|
63
75
|
- - ">="
|
64
76
|
- !ruby/object:Gem::Version
|
77
|
+
segments:
|
78
|
+
- 0
|
65
79
|
version: "0"
|
66
|
-
version:
|
67
80
|
requirements: []
|
68
81
|
|
69
82
|
rubyforge_project:
|
70
|
-
rubygems_version: 1.3.
|
83
|
+
rubygems_version: 1.3.6
|
71
84
|
signing_key:
|
72
|
-
specification_version:
|
85
|
+
specification_version: 3
|
73
86
|
summary: Knuth-Plass linebreaking for Ruby
|
74
87
|
test_files: []
|
75
88
|
|