crawdad 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,139 @@
1
+ # encoding: utf-8
2
+ # Crawdad: Knuth-Plass linebreaking in Ruby.
3
+ #
4
+ # Copyright February 2010, Brad Ediger. All Rights Reserved.
5
+ #
6
+ # This is free software. Please see the LICENSE and COPYING files for details.
7
+
8
+ require 'strscan'
9
+ require 'enumerator'
10
+
11
+ module Crawdad
12
+
13
+ # Ambassador to Prawn. Turns a paragraph into wrappable items.
14
+ #
15
+ class PrawnTokenizer
16
+
17
+ include Tokens
18
+
19
+ # Sets up a tokenizer for the given document (+pdf+).
20
+ #
21
+ def initialize(pdf)
22
+ @pdf = pdf
23
+ end
24
+
25
+ # Tokenize the given paragraph of text into a stream of items (boxes, glue,
26
+ # and penalties).
27
+ #
28
+ # +options+:
29
+ #
30
+ # +hyphenation+::
31
+ # If provided, allow the given text to be hyphenated as needed to best
32
+ # fit the available space. Requires the text-hyphen gem. Allowable values:
33
+ # an ISO 639 language code (like 'pt'), or +true+ (synonym for 'en_us').
34
+ # +indent+::
35
+ # If specified, indent the first line of the paragraph by the given
36
+ # number of PDF points.
37
+ #
38
+ def paragraph(text, options={})
39
+ hyphenator = if options[:hyphenation]
40
+ begin
41
+ gem 'text-hyphen'
42
+ require 'text/hyphen'
43
+ rescue LoadError
44
+ raise LoadError, ":hyphenation option requires the text-hyphen gem"
45
+ end
46
+
47
+ language = ((lang = options[:hyphenation]) == true) ? 'en_us' : lang
48
+ @hyphenators ||= {}
49
+ @hyphenators[language] ||= Text::Hyphen.new(:language => language)
50
+ end
51
+
52
+ stream = []
53
+
54
+ if w = options[:indent]
55
+ stream << box(w, "")
56
+ end
57
+
58
+ # Interword glue can stretch by half and shrink by a third.
59
+ # TODO: optimal stretch/shrink ratios
60
+ space_width = @pdf.width_of(" ")
61
+ interword_glue = glue(space_width,
62
+ space_width / 2.0,
63
+ space_width / 3.0)
64
+
65
+ # TODO: optimal values for sentence space w/y/z
66
+ sentence_space_width = space_width * 1.5
67
+ sentence_glue = glue(sentence_space_width,
68
+ sentence_space_width / 2.0,
69
+ sentence_space_width / 3.0)
70
+
71
+ # Break paragraph on whitespace.
72
+ # TODO: how should "battle-\nfield" be tokenized?
73
+ text.strip.split(/\s+/).each do |word|
74
+ w = StringScanner.new(word)
75
+
76
+ # For hyphenated words, follow each hyphen by a zero-width flagged
77
+ # penalty.
78
+ # TODO: recognize dashes in all their variants
79
+ while seg = w.scan(/[^-]+-/) # "night-time" --> "<<night->>time"
80
+ stream.concat add_word_segment(seg, hyphenator)
81
+ end
82
+
83
+ stream.concat(add_word_segment(w.rest, hyphenator))
84
+
85
+ # TODO: add ties (~) or some other way to denote a period that
86
+ # doesn't end a sentence.
87
+ if w.rest =~ /\.$/
88
+ stream << sentence_glue
89
+ else
90
+ stream << interword_glue
91
+ end
92
+ end
93
+
94
+ # Remove extra glue at the end.
95
+ stream.pop if token_type(stream.last) == :glue
96
+
97
+ # Finish paragraph with a penalty inhibiting a break, finishing glue (to
98
+ # pad out the last line), and a forced break to finish the paragraph.
99
+ stream << penalty(Infinity)
100
+ stream << glue(0, Infinity, 0)
101
+ stream << penalty(-Infinity)
102
+
103
+ stream
104
+ end
105
+
106
+ protected
107
+
108
+ # Returns a series of tokens representing the given word. Hyphenates using
109
+ # the given +hyphenator+, if provided. Appends a zero-width flagged penalty
110
+ # if the given word ends in a hyphen.
111
+ #
112
+ def add_word_segment(word, hyphenator)
113
+ tokens = []
114
+
115
+ if hyphenator
116
+ hyphen_width = @pdf.width_of('-')
117
+
118
+ splits = hyphenator.hyphenate(word)
119
+ # For each hyphenated segment, add the box with an optional penalty.
120
+ [0, *splits].each_cons(2) do |a, b|
121
+ seg = word[a...b]
122
+ tokens << box(@pdf.width_of(seg), seg)
123
+ tokens << penalty(50, @pdf.width_of('-'), true)
124
+ end
125
+
126
+ last = word[(splits.last || 0)..-1]
127
+ tokens << box(@pdf.width_of(last), last)
128
+ else
129
+ tokens << box(@pdf.width_of(word), word)
130
+ end
131
+
132
+ tokens << penalty(50, 0, true) if word =~ /-$/
133
+ tokens
134
+ end
135
+
136
+ end
137
+
138
+ end
139
+
@@ -0,0 +1,48 @@
1
+
2
+ module Crawdad
3
+
4
+ module Tokens
5
+
6
+ def token_type(token)
7
+ token[0]
8
+ end
9
+
10
+ def box(width, content)
11
+ [:box, width, content]
12
+ end
13
+
14
+ def box_content(token)
15
+ token[2]
16
+ end
17
+
18
+ def glue(width, stretch, shrink)
19
+ [:glue, width, stretch, shrink]
20
+ end
21
+
22
+ def glue_stretch(token)
23
+ token[2]
24
+ end
25
+
26
+ def glue_shrink(token)
27
+ token[3]
28
+ end
29
+
30
+ def penalty(penalty, width=0, flagged=false)
31
+ [:penalty, width, penalty, flagged]
32
+ end
33
+
34
+ def penalty_penalty(token)
35
+ token[2]
36
+ end
37
+
38
+ def penalty_flagged?(token)
39
+ token[3]
40
+ end
41
+
42
+ def token_width(token)
43
+ token[1]
44
+ end
45
+
46
+ end
47
+
48
+ end
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: crawdad
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Brad Ediger
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2010-03-19 00:00:00 -05:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: Crawdad is an implementation of Knuth-Plass linebreaking (justification) for Ruby.
17
+ email: brad.ediger@madriska.com
18
+ executables: []
19
+
20
+ extensions:
21
+ - Rakefile
22
+ extra_rdoc_files: []
23
+
24
+ files:
25
+ - lib/crawdad
26
+ - lib/crawdad/prawn_tokenizer.rb
27
+ - lib/crawdad/breakpoint.rb
28
+ - lib/crawdad/native.rb
29
+ - lib/crawdad/ffi.rb
30
+ - lib/crawdad/paragraph.rb
31
+ - lib/crawdad/compatibility.rb
32
+ - lib/crawdad/tokens.rb
33
+ - lib/crawdad/ffi
34
+ - lib/crawdad/ffi/paragraph.rb
35
+ - lib/crawdad/ffi/tokens.rb
36
+ - lib/crawdad/ffi/breakpoint_node.rb
37
+ - lib/crawdad.rb
38
+ - ext/crawdad/breakpoint.h
39
+ - ext/crawdad/tokens.c
40
+ - ext/crawdad/paragraph.h
41
+ - ext/crawdad/Makefile
42
+ - ext/crawdad/tokens.h
43
+ - ext/crawdad/paragraph.c
44
+ - Rakefile
45
+ has_rdoc: true
46
+ homepage: http://github.com/madriska/crawdad
47
+ post_install_message:
48
+ rdoc_options:
49
+ - --title
50
+ - Crawdad Documentation
51
+ - -q
52
+ require_paths:
53
+ - lib
54
+ - ext
55
+ required_ruby_version: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: "0"
60
+ version:
61
+ required_rubygems_version: !ruby/object:Gem::Requirement
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ version: "0"
66
+ version:
67
+ requirements: []
68
+
69
+ rubyforge_project:
70
+ rubygems_version: 1.3.1
71
+ signing_key:
72
+ specification_version: 2
73
+ summary: Knuth-Plass linebreaking for Ruby
74
+ test_files: []
75
+