crawdad 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,139 @@
1
+ # encoding: utf-8
2
+ # Crawdad: Knuth-Plass linebreaking in Ruby.
3
+ #
4
+ # Copyright February 2010, Brad Ediger. All Rights Reserved.
5
+ #
6
+ # This is free software. Please see the LICENSE and COPYING files for details.
7
+
8
+ require 'strscan'
9
+ require 'enumerator'
10
+
11
+ module Crawdad
12
+
13
+ # Ambassador to Prawn. Turns a paragraph into wrappable items.
14
+ #
15
+ class PrawnTokenizer
16
+
17
+ include Tokens
18
+
19
+ # Sets up a tokenizer for the given document (+pdf+).
20
+ #
21
+ def initialize(pdf)
22
+ @pdf = pdf
23
+ end
24
+
25
+ # Tokenize the given paragraph of text into a stream of items (boxes, glue,
26
+ # and penalties).
27
+ #
28
+ # +options+:
29
+ #
30
+ # +hyphenation+::
31
+ # If provided, allow the given text to be hyphenated as needed to best
32
+ # fit the available space. Requires the text-hyphen gem. Allowable values:
33
+ # an ISO 639 language code (like 'pt'), or +true+ (synonym for 'en_us').
34
+ # +indent+::
35
+ # If specified, indent the first line of the paragraph by the given
36
+ # number of PDF points.
37
+ #
38
+ def paragraph(text, options={})
39
+ hyphenator = if options[:hyphenation]
40
+ begin
41
+ gem 'text-hyphen'
42
+ require 'text/hyphen'
43
+ rescue LoadError
44
+ raise LoadError, ":hyphenation option requires the text-hyphen gem"
45
+ end
46
+
47
+ language = ((lang = options[:hyphenation]) == true) ? 'en_us' : lang
48
+ @hyphenators ||= {}
49
+ @hyphenators[language] ||= Text::Hyphen.new(:language => language)
50
+ end
51
+
52
+ stream = []
53
+
54
+ if w = options[:indent]
55
+ stream << box(w, "")
56
+ end
57
+
58
+ # Interword glue can stretch by half and shrink by a third.
59
+ # TODO: optimal stretch/shrink ratios
60
+ space_width = @pdf.width_of(" ")
61
+ interword_glue = glue(space_width,
62
+ space_width / 2.0,
63
+ space_width / 3.0)
64
+
65
+ # TODO: optimal values for sentence space w/y/z
66
+ sentence_space_width = space_width * 1.5
67
+ sentence_glue = glue(sentence_space_width,
68
+ sentence_space_width / 2.0,
69
+ sentence_space_width / 3.0)
70
+
71
+ # Break paragraph on whitespace.
72
+ # TODO: how should "battle-\nfield" be tokenized?
73
+ text.strip.split(/\s+/).each do |word|
74
+ w = StringScanner.new(word)
75
+
76
+ # For hyphenated words, follow each hyphen by a zero-width flagged
77
+ # penalty.
78
+ # TODO: recognize dashes in all their variants
79
+ while seg = w.scan(/[^-]+-/) # "night-time" --> "<<night->>time"
80
+ stream.concat add_word_segment(seg, hyphenator)
81
+ end
82
+
83
+ stream.concat(add_word_segment(w.rest, hyphenator))
84
+
85
+ # TODO: add ties (~) or some other way to denote a period that
86
+ # doesn't end a sentence.
87
+ if w.rest =~ /\.$/
88
+ stream << sentence_glue
89
+ else
90
+ stream << interword_glue
91
+ end
92
+ end
93
+
94
+ # Remove extra glue at the end.
95
+ stream.pop if token_type(stream.last) == :glue
96
+
97
+ # Finish paragraph with a penalty inhibiting a break, finishing glue (to
98
+ # pad out the last line), and a forced break to finish the paragraph.
99
+ stream << penalty(Infinity)
100
+ stream << glue(0, Infinity, 0)
101
+ stream << penalty(-Infinity)
102
+
103
+ stream
104
+ end
105
+
106
+ protected
107
+
108
+ # Returns a series of tokens representing the given word. Hyphenates using
109
+ # the given +hyphenator+, if provided. Appends a zero-width flagged penalty
110
+ # if the given word ends in a hyphen.
111
+ #
112
+ def add_word_segment(word, hyphenator)
113
+ tokens = []
114
+
115
+ if hyphenator
116
+ hyphen_width = @pdf.width_of('-')
117
+
118
+ splits = hyphenator.hyphenate(word)
119
+ # For each hyphenated segment, add the box with an optional penalty.
120
+ [0, *splits].each_cons(2) do |a, b|
121
+ seg = word[a...b]
122
+ tokens << box(@pdf.width_of(seg), seg)
123
+ tokens << penalty(50, @pdf.width_of('-'), true)
124
+ end
125
+
126
+ last = word[(splits.last || 0)..-1]
127
+ tokens << box(@pdf.width_of(last), last)
128
+ else
129
+ tokens << box(@pdf.width_of(word), word)
130
+ end
131
+
132
+ tokens << penalty(50, 0, true) if word =~ /-$/
133
+ tokens
134
+ end
135
+
136
+ end
137
+
138
+ end
139
+
@@ -0,0 +1,48 @@
1
+
2
+ module Crawdad
3
+
4
+ module Tokens
5
+
6
+ def token_type(token)
7
+ token[0]
8
+ end
9
+
10
+ def box(width, content)
11
+ [:box, width, content]
12
+ end
13
+
14
+ def box_content(token)
15
+ token[2]
16
+ end
17
+
18
+ def glue(width, stretch, shrink)
19
+ [:glue, width, stretch, shrink]
20
+ end
21
+
22
+ def glue_stretch(token)
23
+ token[2]
24
+ end
25
+
26
+ def glue_shrink(token)
27
+ token[3]
28
+ end
29
+
30
+ def penalty(penalty, width=0, flagged=false)
31
+ [:penalty, width, penalty, flagged]
32
+ end
33
+
34
+ def penalty_penalty(token)
35
+ token[2]
36
+ end
37
+
38
+ def penalty_flagged?(token)
39
+ token[3]
40
+ end
41
+
42
+ def token_width(token)
43
+ token[1]
44
+ end
45
+
46
+ end
47
+
48
+ end
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: crawdad
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Brad Ediger
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2010-03-19 00:00:00 -05:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: Crawdad is an implementation of Knuth-Plass linebreaking (justification) for Ruby.
17
+ email: brad.ediger@madriska.com
18
+ executables: []
19
+
20
+ extensions:
21
+ - Rakefile
22
+ extra_rdoc_files: []
23
+
24
+ files:
25
+ - lib/crawdad
26
+ - lib/crawdad/prawn_tokenizer.rb
27
+ - lib/crawdad/breakpoint.rb
28
+ - lib/crawdad/native.rb
29
+ - lib/crawdad/ffi.rb
30
+ - lib/crawdad/paragraph.rb
31
+ - lib/crawdad/compatibility.rb
32
+ - lib/crawdad/tokens.rb
33
+ - lib/crawdad/ffi
34
+ - lib/crawdad/ffi/paragraph.rb
35
+ - lib/crawdad/ffi/tokens.rb
36
+ - lib/crawdad/ffi/breakpoint_node.rb
37
+ - lib/crawdad.rb
38
+ - ext/crawdad/breakpoint.h
39
+ - ext/crawdad/tokens.c
40
+ - ext/crawdad/paragraph.h
41
+ - ext/crawdad/Makefile
42
+ - ext/crawdad/tokens.h
43
+ - ext/crawdad/paragraph.c
44
+ - Rakefile
45
+ has_rdoc: true
46
+ homepage: http://github.com/madriska/crawdad
47
+ post_install_message:
48
+ rdoc_options:
49
+ - --title
50
+ - Crawdad Documentation
51
+ - -q
52
+ require_paths:
53
+ - lib
54
+ - ext
55
+ required_ruby_version: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: "0"
60
+ version:
61
+ required_rubygems_version: !ruby/object:Gem::Requirement
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ version: "0"
66
+ version:
67
+ requirements: []
68
+
69
+ rubyforge_project:
70
+ rubygems_version: 1.3.1
71
+ signing_key:
72
+ specification_version: 2
73
+ summary: Knuth-Plass linebreaking for Ruby
74
+ test_files: []
75
+