crawdad 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,41 @@
1
+ #ifndef _TOKENS_H_
2
+ #define _TOKENS_H_
3
+
4
+ enum token_type { BOX, GLUE, PENALTY };
5
+
6
+ struct box {
7
+ enum token_type type;
8
+ float width;
9
+ char * content;
10
+ };
11
+
12
+ struct glue {
13
+ enum token_type type;
14
+ float width;
15
+ float stretch;
16
+ float shrink;
17
+ };
18
+
19
+ struct penalty {
20
+ enum token_type type;
21
+ float width;
22
+ float penalty;
23
+ int flagged;
24
+ };
25
+
26
+ typedef union {
27
+ struct box box;
28
+ struct glue glue;
29
+ struct penalty penalty;
30
+ } token;
31
+
32
+ int token_type(token *);
33
+
34
+ int is_box(token *);
35
+
36
+ int is_penalty(token *);
37
+
38
+ int is_glue(token *);
39
+
40
+ #endif
41
+
data/lib/crawdad.rb ADDED
@@ -0,0 +1,18 @@
1
+ # encoding: utf-8
2
+ # Crawdad: Knuth-Plass linebreaking in Ruby.
3
+ #
4
+ # Copyright February 2010, Brad Ediger. All Rights Reserved.
5
+ #
6
+ # This is free software. Please see the LICENSE and COPYING files for details.
7
+
8
+ require 'crawdad/native'
9
+
10
+ begin
11
+ require 'crawdad/ffi'
12
+ rescue LoadError
13
+ end
14
+
15
+ if defined?(Prawn)
16
+ require 'crawdad/prawn_tokenizer'
17
+ end
18
+
@@ -0,0 +1,82 @@
1
+ # encoding: utf-8
2
+ # Crawdad: Knuth-Plass linebreaking in Ruby.
3
+ #
4
+ # Copyright February 2010, Brad Ediger. All Rights Reserved.
5
+ #
6
+ # This is free software. Please see the LICENSE and COPYING files for details.
7
+
8
+ module Crawdad
9
+
10
+ # A node in the breakpoint list.
11
+ #
12
+ class Breakpoint
13
+
14
+ # Returns the node used for the head of the active list -- represents the
15
+ # starting point.
16
+ #
17
+ def self.starting_node
18
+ new(position=0, line=0, fitness_class=1, total_width=0, total_stretch=0,
19
+ total_shrink=0, total_demerits=0, ratio=0.0, previous=nil)
20
+ end
21
+
22
+ # Creates a breakpoint node with the given parameters.
23
+ #
24
+ def initialize(position, line, fitness_class, total_width, total_stretch,
25
+ total_shrink, total_demerits, ratio, previous)
26
+ @position = position
27
+ @line = line
28
+ @fitness_class = fitness_class
29
+ @total_width = total_width
30
+ @total_stretch = total_stretch
31
+ @total_shrink = total_shrink
32
+ @total_demerits = total_demerits
33
+ @ratio = ratio
34
+ @previous = previous
35
+ end
36
+
37
+ # Index of this breakpoint within the sequence of items.
38
+ #
39
+ attr_accessor :position
40
+
41
+ # Number of the line ending at this breakpoint.
42
+ #
43
+ attr_reader :line
44
+
45
+ # Fitness class (0=tight, 1=normal, 2=loose, 3=very loose) of the line
46
+ # ending at this breakpoint.
47
+ #
48
+ attr_reader :fitness_class
49
+
50
+ # Total width up to after(self). Used to calculate adjustment ratios.
51
+ #
52
+ attr_reader :total_width
53
+
54
+ # Total stretch up to after(self). Used to calculate adjustment ratios.
55
+ #
56
+ attr_reader :total_stretch
57
+
58
+ # Total shrink up to after(self). Used to calculate adjustment ratios.
59
+ #
60
+ attr_reader :total_shrink
61
+
62
+ # Minimum total demerits up to this breakpoint.
63
+ #
64
+ attr_reader :total_demerits
65
+
66
+ # The ratio of stretch or shrink used for the line ending at this
67
+ # breakpoint. 0 is a perfect fit; +1 means 100% of the stretch has been
68
+ # used; -1 means all of the shrink has been used.
69
+ #
70
+ attr_reader :ratio
71
+
72
+ # Link to the best preceding breakpoint.
73
+ #
74
+ attr_reader :previous
75
+
76
+ def inspect
77
+ "#<Breakpoint position=#{@position} ratio=#{@ratio}>"
78
+ end
79
+
80
+ end
81
+
82
+ end
@@ -0,0 +1,12 @@
1
+ unless [].respond_to?(:find_index)
2
+ module Enumerable
3
+ def find_index(needle=nil, &b)
4
+ each_with_index do |hay, i|
5
+ if b ? b[hay] : needle == hay
6
+ return i
7
+ end
8
+ end
9
+ nil
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,7 @@
1
+ require 'ffi'
2
+ require 'ffi-inliner'
3
+
4
+ require 'crawdad/ffi/tokens'
5
+ require 'crawdad/ffi/breakpoint_node'
6
+ require 'crawdad/ffi/paragraph'
7
+
@@ -0,0 +1,36 @@
1
+ require 'ffi'
2
+
3
+ module Crawdad
4
+ extend FFI::Library
5
+
6
+ class BreakpointNode < FFI::Struct
7
+ layout :position, :int,
8
+ :line, :int,
9
+ :fitness_class, :int,
10
+ :total_width, :float,
11
+ :total_stretch, :float,
12
+ :total_shrink, :float,
13
+ :total_demerits, :float,
14
+ :ratio, :float,
15
+ :previous, :pointer,
16
+ :link, :pointer
17
+
18
+ def position
19
+ self[:position]
20
+ end
21
+
22
+ def position=(p)
23
+ self[:position] = p
24
+ end
25
+
26
+ def ratio
27
+ self[:ratio]
28
+ end
29
+
30
+ def inspect
31
+ "#<BreakpointNode position=#{position} ratio=#{ratio}>"
32
+ end
33
+ end
34
+
35
+ end
36
+
@@ -0,0 +1,58 @@
1
+ require 'fileutils'
2
+ require 'ffi'
3
+
4
+ module Crawdad
5
+
6
+ class Paragraph
7
+
8
+ module C
9
+ extend FFI::Library
10
+ Base = File.expand_path(File.dirname(__FILE__) + "/../../..")
11
+ ffi_lib ["#{Base}/ext/crawdad/crawdad.bundle",
12
+ "#{Base}/ext/crawdad/crawdad.so"]
13
+
14
+ attach_function :make_box, [:float, :string], :pointer
15
+ attach_function :make_glue, [:float, :float, :float], :pointer
16
+ attach_function :make_penalty, [:float, :float, :bool], :pointer
17
+
18
+ attach_function :token_type, [:pointer], :int
19
+ attach_function :is_box, [:pointer], :bool
20
+ attach_function :is_glue, [:pointer], :bool
21
+ attach_function :is_penalty, [:pointer], :bool
22
+
23
+ attach_function :populate_active_nodes, [:pointer, :float, :float],
24
+ BreakpointNode
25
+
26
+ attach_function :inspect_token, [:pointer], :void
27
+ end
28
+
29
+ def initialize(stream, options={})
30
+ @stream = stream
31
+
32
+ # Set up C-accessible array of "token *"s
33
+ @stream_ptr = FFI::MemoryPointer.new(:pointer, stream.length + 1)
34
+ @stream_ptr.write_array_of_pointer(stream)
35
+
36
+ @line_widths = options[:line_widths]
37
+ @width = options[:width]
38
+ @flagged_penalty = options[:flagged_penalty] || 3000
39
+ @fitness_penalty = options[:fitness_penalty] || 100
40
+ end
41
+
42
+ def optimum_breakpoints(threshold=5)
43
+ node = BreakpointNode.new(
44
+ C.populate_active_nodes(@stream_ptr, @width, threshold))
45
+
46
+ nodes = []
47
+
48
+ while node && !node.pointer.null?
49
+ nodes.unshift(node)
50
+ node = BreakpointNode.new(node[:previous])
51
+ end
52
+
53
+ nodes
54
+ end
55
+
56
+ end
57
+
58
+ end
@@ -0,0 +1,71 @@
1
+ require 'ffi'
2
+
3
+ module Crawdad
4
+ module Tokens
5
+ extend FFI::Library
6
+
7
+ Type = enum(:box, :glue, :penalty)
8
+
9
+ def token_type(token)
10
+ token[:type]
11
+ end
12
+
13
+ class Box < FFI::Struct
14
+ layout :type, Type,
15
+ :width, :float,
16
+ :content, :string
17
+ end
18
+
19
+ def box(width, content)
20
+ Box.new(Crawdad::Paragraph::C.make_box(width, content))
21
+ end
22
+
23
+ def box_content(b)
24
+ b[:content]
25
+ end
26
+
27
+ class Glue < FFI::Struct
28
+ layout :type, Type,
29
+ :width, :float,
30
+ :stretch, :float,
31
+ :shrink, :float
32
+ end
33
+
34
+ def glue(width, stretch, shrink)
35
+ Glue.new(Crawdad::Paragraph::C.make_glue(width, stretch, shrink))
36
+ end
37
+
38
+ def glue_stretch(glue)
39
+ glue[:stretch]
40
+ end
41
+
42
+ def glue_shrink(glue)
43
+ glue[:shrink]
44
+ end
45
+
46
+ class Penalty < FFI::Struct
47
+ layout :type, Type,
48
+ :width, :float,
49
+ :penalty, :float,
50
+ :flagged, :int
51
+ end
52
+
53
+ def penalty(penalty, width=0.0, flagged=false)
54
+ Penalty.new(Crawdad::Paragraph::C.make_penalty(width, penalty, flagged))
55
+ end
56
+
57
+ def penalty_penalty(p)
58
+ p[:penalty]
59
+ end
60
+
61
+ # TODO: this might return true/false. problem?
62
+ def penalty_flagged?(p)
63
+ p[:flagged] != 0
64
+ end
65
+
66
+ def token_width(token)
67
+ token[:width]
68
+ end
69
+
70
+ end
71
+ end
@@ -0,0 +1,11 @@
1
+
2
+ module Crawdad
3
+ Infinity = 1.0/0
4
+ end
5
+
6
+ require 'crawdad/compatibility'
7
+
8
+ require 'crawdad/tokens'
9
+ require 'crawdad/breakpoint'
10
+ require 'crawdad/paragraph'
11
+
@@ -0,0 +1,293 @@
1
+ # encoding: utf-8
2
+ # Crawdad: Knuth-Plass linebreaking in Ruby.
3
+ #
4
+ # Copyright February 2010, Brad Ediger. All Rights Reserved.
5
+ #
6
+ # This is free software. Please see the LICENSE and COPYING files for details.
7
+
8
+ module Crawdad
9
+
10
+ class Paragraph
11
+
12
+ include Tokens
13
+
14
+ def initialize(stream, options={})
15
+ @stream = stream
16
+ @width = options[:width]
17
+ @flagged_penalty = options[:flagged_penalty] || 3000
18
+ @fitness_penalty = options[:fitness_penalty] || 100
19
+ end
20
+
21
+ # Width of the paragraph of text.
22
+ #
23
+ attr_accessor :width
24
+
25
+ # Returns an array of optimally sized lines. Each line in the array consists
26
+ # of two elements [tokens, breakpoint]. +tokens+ is an array of tokens taken
27
+ # sequentially from the input stream. +breakpoint+ is a Crawdad::Breakpoint
28
+ # object representing data about the line (primarily the adjustment ratio).
29
+ #
30
+ def lines(threshold=5)
31
+ ls = []
32
+ breakpoints = optimum_breakpoints(threshold)
33
+
34
+ # When we break on penalties, we want them to show up at the *end* of the
35
+ # line so that we can put hyphens there if needed. So adjust the
36
+ # breakpoint positions to make that the case.
37
+ breakpoints.each do |b|
38
+ b.position += 1 if token_type(@stream[b.position]) == :penalty
39
+ end
40
+
41
+ breakpoints.each_cons(2) do |a, b|
42
+ last = (b == breakpoints[-1]) ? b.position : b.position - 1
43
+ ls << [@stream[a.position..last], b]
44
+ end
45
+ ls
46
+ end
47
+
48
+ def optimum_breakpoints(threshold=5)
49
+ active_nodes = [Breakpoint.starting_node]
50
+ each_legal_breakpoint do |item, bi|
51
+ # "Main Loop" (Digital Typography p. 118)
52
+
53
+ if active_nodes.empty?
54
+ raise "No feasible solution. Try relaxing threshold."
55
+ end
56
+
57
+ ai = 0
58
+
59
+ while active_nodes[ai]
60
+ # For each fitness class, keep track of the nodes with the fewest
61
+ # demerits so far.
62
+ best = [nil] * 4
63
+
64
+ while a = active_nodes[ai]
65
+ j = a.line + 1 # current line
66
+ r = adjustment_ratio(a, bi)
67
+
68
+ if r < -1 || (token_type(item) == :penalty &&
69
+ penalty_penalty(item) == -Infinity &&
70
+ a.position < @stream.length - 1)
71
+ active_nodes.delete_at(ai)
72
+ else
73
+ ai += 1
74
+ end
75
+
76
+ if r >= -1 && r <= threshold
77
+ d = calculate_demerits(r, item, a) + a.total_demerits
78
+ c = self.class.fitness_class(r)
79
+
80
+ # Penalize consecutive lines more than one fitness class away from
81
+ # each other.
82
+ if (c - a.fitness_class).abs > 1
83
+ d += @fitness_penalty
84
+ end
85
+
86
+ # Update high scores if this is a new best.
87
+ if best[c].nil? || d < best[c][:demerits]
88
+ best[c] = {:node => a, :demerits => d, :ratio => r}
89
+ end
90
+ end
91
+
92
+ # Add nodes to the active list before moving to the next line.
93
+ if (next_node = active_nodes[ai]) && next_node.line >= j
94
+ break
95
+ end
96
+ end
97
+
98
+ # If we found any best nodes, add them to the active list.
99
+ if ai && ai < active_nodes.length - 1
100
+ active_nodes[ai, 0] = new_active_nodes(best, bi)
101
+ else
102
+ active_nodes.concat new_active_nodes(best, bi)
103
+ end
104
+ end
105
+
106
+ end
107
+
108
+ # At this point, everything in active_nodes should point to the final
109
+ # element of our stream (the forced break). Now we pick the one with the
110
+ # fewest total demerits.
111
+
112
+ node = active_nodes.sort_by { |n| n.total_demerits }.first
113
+
114
+ nodes = []
115
+ begin
116
+ nodes.unshift(node)
117
+ end while node = node.previous
118
+
119
+ nodes
120
+ end
121
+
122
+ # For each item before which we could break, yields two values:
123
+ #
124
+ # +item+::
125
+ # The item we can break before (glue or penalty).
126
+ # +i+::
127
+ # The index of +item+ in the stream.
128
+ #
129
+ # Updates the @total_width, @total_stretch, and @total_shrink variables as
130
+ # it moves over the stream, to allow quick calculation of the
131
+ # width/stretch/shrink from the last breakpoint node.
132
+ #
133
+ # Legal breakpoints are either:
134
+ #
135
+ # * glue immediately following a box, or
136
+ # * a penalty less than positive infinity.
137
+ #
138
+ def each_legal_breakpoint
139
+ @total_width = 0
140
+ @total_stretch = 0
141
+ @total_shrink = 0
142
+
143
+ @stream.each_with_index do |item, i|
144
+ case token_type(item)
145
+ when :box
146
+ @total_width += token_width(item)
147
+ when :glue
148
+ # We can break here if we immediately follow a box.
149
+ yield(item, i) if token_type(@stream[i-1]) == :box
150
+ @total_width += token_width(item)
151
+ @total_stretch += glue_stretch(item)
152
+ @total_shrink += glue_shrink(item)
153
+ when :penalty
154
+ # We can break here unless inhibited by an infinite penalty.
155
+ yield(item, i) unless penalty_penalty(item) == Infinity
156
+ else
157
+ raise ArgumentError, "Unknown item: #{item.inspect}"
158
+ end
159
+ end
160
+ end
161
+
162
+ # Calculates the adjustment ratio r by which a line from a to b would have
163
+ # to be adjusted to fit in the given length. r==0 means the natural widths
164
+ # are perfect. r==-1 means all of the shrinkability has been used; r==1
165
+ # means all of the stretchability has been used.
166
+ #
167
+ # Arguments:
168
+ # +node_a+::
169
+ # Breakpoint node of our starting point (on the active list).
170
+ # +b+::
171
+ # Index (into +stream+) of the breakpoint under consideration.
172
+ #
173
+ def adjustment_ratio(node_a, b)
174
+ item_b = @stream[b]
175
+ # Find the width from a to b.
176
+ w = @total_width - node_a.total_width
177
+ # Add penalty width (hyphen) if we are breaking at a penalty
178
+ w += token_width(item_b) if token_type(item_b) == :penalty
179
+ target_width = @width
180
+
181
+ case
182
+ when w < target_width
183
+ stretch = @total_stretch - node_a.total_stretch
184
+ (stretch > 0) ? (target_width - w) / stretch.to_f : Infinity
185
+ when w > target_width
186
+ shrink = @total_shrink - node_a.total_shrink
187
+ (shrink > 0) ? (target_width - w) / shrink.to_f : Infinity
188
+ else 0
189
+ end
190
+ end
191
+
192
+ protected
193
+
194
+ # Returns the demerits assessed to a break before +new_item+ with adjustment
195
+ # ratio +r+, given the provided active breakpoint.
196
+ #
197
+ def calculate_demerits(r, new_item, active_breakpoint)
198
+ d = case
199
+ when token_type(new_item) == :penalty &&
200
+ penalty_penalty(new_item) >= 0
201
+ (1 + 100*(r.abs ** 3) + penalty_penalty(new_item)) ** 2
202
+ when token_type(new_item) == :penalty &&
203
+ penalty_penalty(new_item) != -Infinity
204
+ ((1 + 100*(r.abs ** 3)) ** 2) - (penalty_penalty(new_item) ** 2)
205
+ else
206
+ (1 + 100*(r.abs ** 3)) ** 2
207
+ end
208
+
209
+ old_item = @stream[active_breakpoint.position]
210
+ if token_type(old_item) == :penalty && penalty_flagged?(old_item) &&
211
+ token_type(new_item) == :penalty && penalty_flagged?(new_item)
212
+ d += @flagged_penalty
213
+ end
214
+
215
+ d
216
+ end
217
+
218
+ # Returns a fitness class number (0=tight, 1=normal, 2=loose, 3=very loose),
219
+ # given the adjustment ratio +r+.
220
+ #
221
+ def self.fitness_class(r)
222
+ case
223
+ when r < -0.5 then 0
224
+ when r < 0.5 then 1
225
+ when r < 1 then 2
226
+ else 3
227
+ end
228
+ end
229
+
230
+ # Returns new active nodes for breaks from all "best" breakpoints +best+
231
+ # (lowest demerits within each fitness class) to +b+ (index of the current
232
+ # item in the stream).
233
+ #
234
+ # The +gamma+ value is used in an optional dominance test; candidate breaks
235
+ # must do better than the optimum fitness class by +gamma+ demerits to be
236
+ # considered.
237
+ #
238
+ # TODO: find optimal value for gamma
239
+ #
240
+ # This is the middle algorithm ("Insert new active nodes for breaks from Ac
241
+ # to b") on p. 119 of Digital Typography.
242
+ #
243
+ def new_active_nodes(best, b, gamma=Infinity)
244
+ lowest_demerits = best.compact.map { |n| n[:demerits] }.min
245
+ new_width, new_stretch, new_shrink = calculate_widths(b)
246
+
247
+ new_nodes = []
248
+
249
+ # If we found any best nodes, add them to the active list.
250
+ best.each_with_index do |n, fitness_class|
251
+ next if n.nil?
252
+ node, demerits, ratio = n[:node], n[:demerits], n[:ratio]
253
+ next if demerits == Infinity || demerits > lowest_demerits + gamma
254
+
255
+ new_nodes << Breakpoint.new(b, node.line + 1, fitness_class, new_width,
256
+ new_stretch, new_shrink, demerits, ratio,
257
+ node)
258
+ end
259
+
260
+ new_nodes
261
+ end
262
+
263
+ # Compute (\sum w)_{after(b)}, et al. -- total width, stretch, shrink from
264
+ # the active breakpoint to the next box or forced break.
265
+ #
266
+ # Last algorithm on p. 119 of Digital Typography.
267
+ #
268
+ def calculate_widths(b)
269
+ total_width, total_stretch, total_shrink =
270
+ @total_width, @total_stretch, @total_shrink
271
+
272
+ @stream[b..-1].each_with_index do |item, i|
273
+ case token_type(item)
274
+ when :box
275
+ break
276
+ when :glue
277
+ total_width += token_width(item)
278
+ total_stretch += glue_stretch(item)
279
+ total_shrink += glue_shrink(item)
280
+ when :penalty
281
+ break if penalty_penalty(item) == -Infinity && i > 0
282
+ else
283
+ raise ArgumentError, "Unknown item: #{item.inspect}"
284
+ end
285
+ end
286
+
287
+ [total_width, total_stretch, total_shrink]
288
+ end
289
+
290
+
291
+ end
292
+
293
+ end