crawdad 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,41 @@
1
+ #ifndef _TOKENS_H_
2
+ #define _TOKENS_H_
3
+
4
+ enum token_type { BOX, GLUE, PENALTY };
5
+
6
+ struct box {
7
+ enum token_type type;
8
+ float width;
9
+ char * content;
10
+ };
11
+
12
+ struct glue {
13
+ enum token_type type;
14
+ float width;
15
+ float stretch;
16
+ float shrink;
17
+ };
18
+
19
+ struct penalty {
20
+ enum token_type type;
21
+ float width;
22
+ float penalty;
23
+ int flagged;
24
+ };
25
+
26
+ typedef union {
27
+ struct box box;
28
+ struct glue glue;
29
+ struct penalty penalty;
30
+ } token;
31
+
32
+ int token_type(token *);
33
+
34
+ int is_box(token *);
35
+
36
+ int is_penalty(token *);
37
+
38
+ int is_glue(token *);
39
+
40
+ #endif
41
+
data/lib/crawdad.rb ADDED
@@ -0,0 +1,18 @@
1
+ # encoding: utf-8
2
+ # Crawdad: Knuth-Plass linebreaking in Ruby.
3
+ #
4
+ # Copyright February 2010, Brad Ediger. All Rights Reserved.
5
+ #
6
+ # This is free software. Please see the LICENSE and COPYING files for details.
7
+
8
+ require 'crawdad/native'
9
+
10
+ begin
11
+ require 'crawdad/ffi'
12
+ rescue LoadError
13
+ end
14
+
15
+ if defined?(Prawn)
16
+ require 'crawdad/prawn_tokenizer'
17
+ end
18
+
@@ -0,0 +1,82 @@
1
+ # encoding: utf-8
2
+ # Crawdad: Knuth-Plass linebreaking in Ruby.
3
+ #
4
+ # Copyright February 2010, Brad Ediger. All Rights Reserved.
5
+ #
6
+ # This is free software. Please see the LICENSE and COPYING files for details.
7
+
8
+ module Crawdad
9
+
10
+ # A node in the breakpoint list.
11
+ #
12
+ class Breakpoint
13
+
14
+ # Returns the node used for the head of the active list -- represents the
15
+ # starting point.
16
+ #
17
+ def self.starting_node
18
+ new(position=0, line=0, fitness_class=1, total_width=0, total_stretch=0,
19
+ total_shrink=0, total_demerits=0, ratio=0.0, previous=nil)
20
+ end
21
+
22
+ # Creates a breakpoint node with the given parameters.
23
+ #
24
+ def initialize(position, line, fitness_class, total_width, total_stretch,
25
+ total_shrink, total_demerits, ratio, previous)
26
+ @position = position
27
+ @line = line
28
+ @fitness_class = fitness_class
29
+ @total_width = total_width
30
+ @total_stretch = total_stretch
31
+ @total_shrink = total_shrink
32
+ @total_demerits = total_demerits
33
+ @ratio = ratio
34
+ @previous = previous
35
+ end
36
+
37
+ # Index of this breakpoint within the sequence of items.
38
+ #
39
+ attr_accessor :position
40
+
41
+ # Number of the line ending at this breakpoint.
42
+ #
43
+ attr_reader :line
44
+
45
+ # Fitness class (0=tight, 1=normal, 2=loose, 3=very loose) of the line
46
+ # ending at this breakpoint.
47
+ #
48
+ attr_reader :fitness_class
49
+
50
+ # Total width up to after(self). Used to calculate adjustment ratios.
51
+ #
52
+ attr_reader :total_width
53
+
54
+ # Total stretch up to after(self). Used to calculate adjustment ratios.
55
+ #
56
+ attr_reader :total_stretch
57
+
58
+ # Total shrink up to after(self). Used to calculate adjustment ratios.
59
+ #
60
+ attr_reader :total_shrink
61
+
62
+ # Minimum total demerits up to this breakpoint.
63
+ #
64
+ attr_reader :total_demerits
65
+
66
+ # The ratio of stretch or shrink used for the line ending at this
67
+ # breakpoint. 0 is a perfect fit; +1 means 100% of the stretch has been
68
+ # used; -1 means all of the shrink has been used.
69
+ #
70
+ attr_reader :ratio
71
+
72
+ # Link to the best preceding breakpoint.
73
+ #
74
+ attr_reader :previous
75
+
76
+ def inspect
77
+ "#<Breakpoint position=#{@position} ratio=#{@ratio}>"
78
+ end
79
+
80
+ end
81
+
82
+ end
@@ -0,0 +1,12 @@
1
+ unless [].respond_to?(:find_index)
2
+ module Enumerable
3
+ def find_index(needle=nil, &b)
4
+ each_with_index do |hay, i|
5
+ if b ? b[hay] : needle == hay
6
+ return i
7
+ end
8
+ end
9
+ nil
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,7 @@
1
+ require 'ffi'
2
+ require 'ffi-inliner'
3
+
4
+ require 'crawdad/ffi/tokens'
5
+ require 'crawdad/ffi/breakpoint_node'
6
+ require 'crawdad/ffi/paragraph'
7
+
@@ -0,0 +1,36 @@
1
+ require 'ffi'
2
+
3
+ module Crawdad
4
+ extend FFI::Library
5
+
6
+ class BreakpointNode < FFI::Struct
7
+ layout :position, :int,
8
+ :line, :int,
9
+ :fitness_class, :int,
10
+ :total_width, :float,
11
+ :total_stretch, :float,
12
+ :total_shrink, :float,
13
+ :total_demerits, :float,
14
+ :ratio, :float,
15
+ :previous, :pointer,
16
+ :link, :pointer
17
+
18
+ def position
19
+ self[:position]
20
+ end
21
+
22
+ def position=(p)
23
+ self[:position] = p
24
+ end
25
+
26
+ def ratio
27
+ self[:ratio]
28
+ end
29
+
30
+ def inspect
31
+ "#<BreakpointNode position=#{position} ratio=#{ratio}>"
32
+ end
33
+ end
34
+
35
+ end
36
+
@@ -0,0 +1,58 @@
1
+ require 'fileutils'
2
+ require 'ffi'
3
+
4
+ module Crawdad
5
+
6
+ class Paragraph
7
+
8
+ module C
9
+ extend FFI::Library
10
+ Base = File.expand_path(File.dirname(__FILE__) + "/../../..")
11
+ ffi_lib ["#{Base}/ext/crawdad/crawdad.bundle",
12
+ "#{Base}/ext/crawdad/crawdad.so"]
13
+
14
+ attach_function :make_box, [:float, :string], :pointer
15
+ attach_function :make_glue, [:float, :float, :float], :pointer
16
+ attach_function :make_penalty, [:float, :float, :bool], :pointer
17
+
18
+ attach_function :token_type, [:pointer], :int
19
+ attach_function :is_box, [:pointer], :bool
20
+ attach_function :is_glue, [:pointer], :bool
21
+ attach_function :is_penalty, [:pointer], :bool
22
+
23
+ attach_function :populate_active_nodes, [:pointer, :float, :float],
24
+ BreakpointNode
25
+
26
+ attach_function :inspect_token, [:pointer], :void
27
+ end
28
+
29
+ def initialize(stream, options={})
30
+ @stream = stream
31
+
32
+ # Set up C-accessible array of "token *"s
33
+ @stream_ptr = FFI::MemoryPointer.new(:pointer, stream.length + 1)
34
+ @stream_ptr.write_array_of_pointer(stream)
35
+
36
+ @line_widths = options[:line_widths]
37
+ @width = options[:width]
38
+ @flagged_penalty = options[:flagged_penalty] || 3000
39
+ @fitness_penalty = options[:fitness_penalty] || 100
40
+ end
41
+
42
+ def optimum_breakpoints(threshold=5)
43
+ node = BreakpointNode.new(
44
+ C.populate_active_nodes(@stream_ptr, @width, threshold))
45
+
46
+ nodes = []
47
+
48
+ while node && !node.pointer.null?
49
+ nodes.unshift(node)
50
+ node = BreakpointNode.new(node[:previous])
51
+ end
52
+
53
+ nodes
54
+ end
55
+
56
+ end
57
+
58
+ end
@@ -0,0 +1,71 @@
1
+ require 'ffi'
2
+
3
+ module Crawdad
4
+ module Tokens
5
+ extend FFI::Library
6
+
7
+ Type = enum(:box, :glue, :penalty)
8
+
9
+ def token_type(token)
10
+ token[:type]
11
+ end
12
+
13
+ class Box < FFI::Struct
14
+ layout :type, Type,
15
+ :width, :float,
16
+ :content, :string
17
+ end
18
+
19
+ def box(width, content)
20
+ Box.new(Crawdad::Paragraph::C.make_box(width, content))
21
+ end
22
+
23
+ def box_content(b)
24
+ b[:content]
25
+ end
26
+
27
+ class Glue < FFI::Struct
28
+ layout :type, Type,
29
+ :width, :float,
30
+ :stretch, :float,
31
+ :shrink, :float
32
+ end
33
+
34
+ def glue(width, stretch, shrink)
35
+ Glue.new(Crawdad::Paragraph::C.make_glue(width, stretch, shrink))
36
+ end
37
+
38
+ def glue_stretch(glue)
39
+ glue[:stretch]
40
+ end
41
+
42
+ def glue_shrink(glue)
43
+ glue[:shrink]
44
+ end
45
+
46
+ class Penalty < FFI::Struct
47
+ layout :type, Type,
48
+ :width, :float,
49
+ :penalty, :float,
50
+ :flagged, :int
51
+ end
52
+
53
+ def penalty(penalty, width=0.0, flagged=false)
54
+ Penalty.new(Crawdad::Paragraph::C.make_penalty(width, penalty, flagged))
55
+ end
56
+
57
+ def penalty_penalty(p)
58
+ p[:penalty]
59
+ end
60
+
61
+ # TODO: this might return true/false. problem?
62
+ def penalty_flagged?(p)
63
+ p[:flagged] != 0
64
+ end
65
+
66
+ def token_width(token)
67
+ token[:width]
68
+ end
69
+
70
+ end
71
+ end
@@ -0,0 +1,11 @@
1
+
2
+ module Crawdad
3
+ Infinity = 1.0/0
4
+ end
5
+
6
+ require 'crawdad/compatibility'
7
+
8
+ require 'crawdad/tokens'
9
+ require 'crawdad/breakpoint'
10
+ require 'crawdad/paragraph'
11
+
@@ -0,0 +1,293 @@
1
+ # encoding: utf-8
2
+ # Crawdad: Knuth-Plass linebreaking in Ruby.
3
+ #
4
+ # Copyright February 2010, Brad Ediger. All Rights Reserved.
5
+ #
6
+ # This is free software. Please see the LICENSE and COPYING files for details.
7
+
8
+ module Crawdad
9
+
10
+ class Paragraph
11
+
12
+ include Tokens
13
+
14
+ def initialize(stream, options={})
15
+ @stream = stream
16
+ @width = options[:width]
17
+ @flagged_penalty = options[:flagged_penalty] || 3000
18
+ @fitness_penalty = options[:fitness_penalty] || 100
19
+ end
20
+
21
+ # Width of the paragraph of text.
22
+ #
23
+ attr_accessor :width
24
+
25
+ # Returns an array of optimally sized lines. Each line in the array consists
26
+ # of two elements [tokens, breakpoint]. +tokens+ is an array of tokens taken
27
+ # sequentially from the input stream. +breakpoint+ is a Crawdad::Breakpoint
28
+ # object representing data about the line (primarily the adjustment ratio).
29
+ #
30
+ def lines(threshold=5)
31
+ ls = []
32
+ breakpoints = optimum_breakpoints(threshold)
33
+
34
+ # When we break on penalties, we want them to show up at the *end* of the
35
+ # line so that we can put hyphens there if needed. So adjust the
36
+ # breakpoint positions to make that the case.
37
+ breakpoints.each do |b|
38
+ b.position += 1 if token_type(@stream[b.position]) == :penalty
39
+ end
40
+
41
+ breakpoints.each_cons(2) do |a, b|
42
+ last = (b == breakpoints[-1]) ? b.position : b.position - 1
43
+ ls << [@stream[a.position..last], b]
44
+ end
45
+ ls
46
+ end
47
+
48
+ def optimum_breakpoints(threshold=5)
49
+ active_nodes = [Breakpoint.starting_node]
50
+ each_legal_breakpoint do |item, bi|
51
+ # "Main Loop" (Digital Typography p. 118)
52
+
53
+ if active_nodes.empty?
54
+ raise "No feasible solution. Try relaxing threshold."
55
+ end
56
+
57
+ ai = 0
58
+
59
+ while active_nodes[ai]
60
+ # For each fitness class, keep track of the nodes with the fewest
61
+ # demerits so far.
62
+ best = [nil] * 4
63
+
64
+ while a = active_nodes[ai]
65
+ j = a.line + 1 # current line
66
+ r = adjustment_ratio(a, bi)
67
+
68
+ if r < -1 || (token_type(item) == :penalty &&
69
+ penalty_penalty(item) == -Infinity &&
70
+ a.position < @stream.length - 1)
71
+ active_nodes.delete_at(ai)
72
+ else
73
+ ai += 1
74
+ end
75
+
76
+ if r >= -1 && r <= threshold
77
+ d = calculate_demerits(r, item, a) + a.total_demerits
78
+ c = self.class.fitness_class(r)
79
+
80
+ # Penalize consecutive lines more than one fitness class away from
81
+ # each other.
82
+ if (c - a.fitness_class).abs > 1
83
+ d += @fitness_penalty
84
+ end
85
+
86
+ # Update high scores if this is a new best.
87
+ if best[c].nil? || d < best[c][:demerits]
88
+ best[c] = {:node => a, :demerits => d, :ratio => r}
89
+ end
90
+ end
91
+
92
+ # Add nodes to the active list before moving to the next line.
93
+ if (next_node = active_nodes[ai]) && next_node.line >= j
94
+ break
95
+ end
96
+ end
97
+
98
+ # If we found any best nodes, add them to the active list.
99
+ if ai && ai < active_nodes.length - 1
100
+ active_nodes[ai, 0] = new_active_nodes(best, bi)
101
+ else
102
+ active_nodes.concat new_active_nodes(best, bi)
103
+ end
104
+ end
105
+
106
+ end
107
+
108
+ # At this point, everything in active_nodes should point to the final
109
+ # element of our stream (the forced break). Now we pick the one with the
110
+ # fewest total demerits.
111
+
112
+ node = active_nodes.sort_by { |n| n.total_demerits }.first
113
+
114
+ nodes = []
115
+ begin
116
+ nodes.unshift(node)
117
+ end while node = node.previous
118
+
119
+ nodes
120
+ end
121
+
122
+ # For each item before which we could break, yields two values:
123
+ #
124
+ # +item+::
125
+ # The item we can break before (glue or penalty).
126
+ # +i+::
127
+ # The index of +item+ in the stream.
128
+ #
129
+ # Updates the @total_width, @total_stretch, and @total_shrink variables as
130
+ # it moves over the stream, to allow quick calculation of the
131
+ # width/stretch/shrink from the last breakpoint node.
132
+ #
133
+ # Legal breakpoints are either:
134
+ #
135
+ # * glue immediately following a box, or
136
+ # * a penalty less than positive infinity.
137
+ #
138
+ def each_legal_breakpoint
139
+ @total_width = 0
140
+ @total_stretch = 0
141
+ @total_shrink = 0
142
+
143
+ @stream.each_with_index do |item, i|
144
+ case token_type(item)
145
+ when :box
146
+ @total_width += token_width(item)
147
+ when :glue
148
+ # We can break here if we immediately follow a box.
149
+ yield(item, i) if token_type(@stream[i-1]) == :box
150
+ @total_width += token_width(item)
151
+ @total_stretch += glue_stretch(item)
152
+ @total_shrink += glue_shrink(item)
153
+ when :penalty
154
+ # We can break here unless inhibited by an infinite penalty.
155
+ yield(item, i) unless penalty_penalty(item) == Infinity
156
+ else
157
+ raise ArgumentError, "Unknown item: #{item.inspect}"
158
+ end
159
+ end
160
+ end
161
+
162
+ # Calculates the adjustment ratio r by which a line from a to b would have
163
+ # to be adjusted to fit in the given length. r==0 means the natural widths
164
+ # are perfect. r==-1 means all of the shrinkability has been used; r==1
165
+ # means all of the stretchability has been used.
166
+ #
167
+ # Arguments:
168
+ # +node_a+::
169
+ # Breakpoint node of our starting point (on the active list).
170
+ # +b+::
171
+ # Index (into +stream+) of the breakpoint under consideration.
172
+ #
173
+ def adjustment_ratio(node_a, b)
174
+ item_b = @stream[b]
175
+ # Find the width from a to b.
176
+ w = @total_width - node_a.total_width
177
+ # Add penalty width (hyphen) if we are breaking at a penalty
178
+ w += token_width(item_b) if token_type(item_b) == :penalty
179
+ target_width = @width
180
+
181
+ case
182
+ when w < target_width
183
+ stretch = @total_stretch - node_a.total_stretch
184
+ (stretch > 0) ? (target_width - w) / stretch.to_f : Infinity
185
+ when w > target_width
186
+ shrink = @total_shrink - node_a.total_shrink
187
+ (shrink > 0) ? (target_width - w) / shrink.to_f : Infinity
188
+ else 0
189
+ end
190
+ end
191
+
192
+ protected
193
+
194
+ # Returns the demerits assessed to a break before +new_item+ with adjustment
195
+ # ratio +r+, given the provided active breakpoint.
196
+ #
197
+ def calculate_demerits(r, new_item, active_breakpoint)
198
+ d = case
199
+ when token_type(new_item) == :penalty &&
200
+ penalty_penalty(new_item) >= 0
201
+ (1 + 100*(r.abs ** 3) + penalty_penalty(new_item)) ** 2
202
+ when token_type(new_item) == :penalty &&
203
+ penalty_penalty(new_item) != -Infinity
204
+ ((1 + 100*(r.abs ** 3)) ** 2) - (penalty_penalty(new_item) ** 2)
205
+ else
206
+ (1 + 100*(r.abs ** 3)) ** 2
207
+ end
208
+
209
+ old_item = @stream[active_breakpoint.position]
210
+ if token_type(old_item) == :penalty && penalty_flagged?(old_item) &&
211
+ token_type(new_item) == :penalty && penalty_flagged?(new_item)
212
+ d += @flagged_penalty
213
+ end
214
+
215
+ d
216
+ end
217
+
218
+ # Returns a fitness class number (0=tight, 1=normal, 2=loose, 3=very loose),
219
+ # given the adjustment ratio +r+.
220
+ #
221
+ def self.fitness_class(r)
222
+ case
223
+ when r < -0.5 then 0
224
+ when r < 0.5 then 1
225
+ when r < 1 then 2
226
+ else 3
227
+ end
228
+ end
229
+
230
+ # Returns new active nodes for breaks from all "best" breakpoints +best+
231
+ # (lowest demerits within each fitness class) to +b+ (index of the current
232
+ # item in the stream).
233
+ #
234
+ # The +gamma+ value is used in an optional dominance test; candidate breaks
235
+ # must do better than the optimum fitness class by +gamma+ demerits to be
236
+ # considered.
237
+ #
238
+ # TODO: find optimal value for gamma
239
+ #
240
+ # This is the middle algorithm ("Insert new active nodes for breaks from Ac
241
+ # to b") on p. 119 of Digital Typography.
242
+ #
243
+ def new_active_nodes(best, b, gamma=Infinity)
244
+ lowest_demerits = best.compact.map { |n| n[:demerits] }.min
245
+ new_width, new_stretch, new_shrink = calculate_widths(b)
246
+
247
+ new_nodes = []
248
+
249
+ # If we found any best nodes, add them to the active list.
250
+ best.each_with_index do |n, fitness_class|
251
+ next if n.nil?
252
+ node, demerits, ratio = n[:node], n[:demerits], n[:ratio]
253
+ next if demerits == Infinity || demerits > lowest_demerits + gamma
254
+
255
+ new_nodes << Breakpoint.new(b, node.line + 1, fitness_class, new_width,
256
+ new_stretch, new_shrink, demerits, ratio,
257
+ node)
258
+ end
259
+
260
+ new_nodes
261
+ end
262
+
263
+ # Compute (\sum w)_{after(b)}, et al. -- total width, stretch, shrink from
264
+ # the active breakpoint to the next box or forced break.
265
+ #
266
+ # Last algorithm on p. 119 of Digital Typography.
267
+ #
268
+ def calculate_widths(b)
269
+ total_width, total_stretch, total_shrink =
270
+ @total_width, @total_stretch, @total_shrink
271
+
272
+ @stream[b..-1].each_with_index do |item, i|
273
+ case token_type(item)
274
+ when :box
275
+ break
276
+ when :glue
277
+ total_width += token_width(item)
278
+ total_stretch += glue_stretch(item)
279
+ total_shrink += glue_shrink(item)
280
+ when :penalty
281
+ break if penalty_penalty(item) == -Infinity && i > 0
282
+ else
283
+ raise ArgumentError, "Unknown item: #{item.inspect}"
284
+ end
285
+ end
286
+
287
+ [total_width, total_stretch, total_shrink]
288
+ end
289
+
290
+
291
+ end
292
+
293
+ end