crawdad 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +56 -0
- data/ext/crawdad/Makefile +25 -0
- data/ext/crawdad/breakpoint.h +53 -0
- data/ext/crawdad/paragraph.c +275 -0
- data/ext/crawdad/paragraph.h +29 -0
- data/ext/crawdad/tokens.c +57 -0
- data/ext/crawdad/tokens.h +41 -0
- data/lib/crawdad.rb +18 -0
- data/lib/crawdad/breakpoint.rb +82 -0
- data/lib/crawdad/compatibility.rb +12 -0
- data/lib/crawdad/ffi.rb +7 -0
- data/lib/crawdad/ffi/breakpoint_node.rb +36 -0
- data/lib/crawdad/ffi/paragraph.rb +58 -0
- data/lib/crawdad/ffi/tokens.rb +71 -0
- data/lib/crawdad/native.rb +11 -0
- data/lib/crawdad/paragraph.rb +293 -0
- data/lib/crawdad/prawn_tokenizer.rb +139 -0
- data/lib/crawdad/tokens.rb +48 -0
- metadata +75 -0
@@ -0,0 +1,139 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
# Crawdad: Knuth-Plass linebreaking in Ruby.
|
3
|
+
#
|
4
|
+
# Copyright February 2010, Brad Ediger. All Rights Reserved.
|
5
|
+
#
|
6
|
+
# This is free software. Please see the LICENSE and COPYING files for details.
|
7
|
+
|
8
|
+
require 'strscan'
|
9
|
+
require 'enumerator'
|
10
|
+
|
11
|
+
module Crawdad
|
12
|
+
|
13
|
+
# Ambassador to Prawn. Turns a paragraph into wrappable items.
|
14
|
+
#
|
15
|
+
class PrawnTokenizer
|
16
|
+
|
17
|
+
include Tokens
|
18
|
+
|
19
|
+
# Sets up a tokenizer for the given document (+pdf+).
|
20
|
+
#
|
21
|
+
def initialize(pdf)
|
22
|
+
@pdf = pdf
|
23
|
+
end
|
24
|
+
|
25
|
+
# Tokenize the given paragraph of text into a stream of items (boxes, glue,
|
26
|
+
# and penalties).
|
27
|
+
#
|
28
|
+
# +options+:
|
29
|
+
#
|
30
|
+
# +hyphenation+::
|
31
|
+
# If provided, allow the given text to be hyphenated as needed to best
|
32
|
+
# fit the available space. Requires the text-hyphen gem. Allowable values:
|
33
|
+
# an ISO 639 language code (like 'pt'), or +true+ (synonym for 'en_us').
|
34
|
+
# +indent+::
|
35
|
+
# If specified, indent the first line of the paragraph by the given
|
36
|
+
# number of PDF points.
|
37
|
+
#
|
38
|
+
def paragraph(text, options={})
|
39
|
+
hyphenator = if options[:hyphenation]
|
40
|
+
begin
|
41
|
+
gem 'text-hyphen'
|
42
|
+
require 'text/hyphen'
|
43
|
+
rescue LoadError
|
44
|
+
raise LoadError, ":hyphenation option requires the text-hyphen gem"
|
45
|
+
end
|
46
|
+
|
47
|
+
language = ((lang = options[:hyphenation]) == true) ? 'en_us' : lang
|
48
|
+
@hyphenators ||= {}
|
49
|
+
@hyphenators[language] ||= Text::Hyphen.new(:language => language)
|
50
|
+
end
|
51
|
+
|
52
|
+
stream = []
|
53
|
+
|
54
|
+
if w = options[:indent]
|
55
|
+
stream << box(w, "")
|
56
|
+
end
|
57
|
+
|
58
|
+
# Interword glue can stretch by half and shrink by a third.
|
59
|
+
# TODO: optimal stretch/shrink ratios
|
60
|
+
space_width = @pdf.width_of(" ")
|
61
|
+
interword_glue = glue(space_width,
|
62
|
+
space_width / 2.0,
|
63
|
+
space_width / 3.0)
|
64
|
+
|
65
|
+
# TODO: optimal values for sentence space w/y/z
|
66
|
+
sentence_space_width = space_width * 1.5
|
67
|
+
sentence_glue = glue(sentence_space_width,
|
68
|
+
sentence_space_width / 2.0,
|
69
|
+
sentence_space_width / 3.0)
|
70
|
+
|
71
|
+
# Break paragraph on whitespace.
|
72
|
+
# TODO: how should "battle-\nfield" be tokenized?
|
73
|
+
text.strip.split(/\s+/).each do |word|
|
74
|
+
w = StringScanner.new(word)
|
75
|
+
|
76
|
+
# For hyphenated words, follow each hyphen by a zero-width flagged
|
77
|
+
# penalty.
|
78
|
+
# TODO: recognize dashes in all their variants
|
79
|
+
while seg = w.scan(/[^-]+-/) # "night-time" --> "<<night->>time"
|
80
|
+
stream.concat add_word_segment(seg, hyphenator)
|
81
|
+
end
|
82
|
+
|
83
|
+
stream.concat(add_word_segment(w.rest, hyphenator))
|
84
|
+
|
85
|
+
# TODO: add ties (~) or some other way to denote a period that
|
86
|
+
# doesn't end a sentence.
|
87
|
+
if w.rest =~ /\.$/
|
88
|
+
stream << sentence_glue
|
89
|
+
else
|
90
|
+
stream << interword_glue
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
# Remove extra glue at the end.
|
95
|
+
stream.pop if token_type(stream.last) == :glue
|
96
|
+
|
97
|
+
# Finish paragraph with a penalty inhibiting a break, finishing glue (to
|
98
|
+
# pad out the last line), and a forced break to finish the paragraph.
|
99
|
+
stream << penalty(Infinity)
|
100
|
+
stream << glue(0, Infinity, 0)
|
101
|
+
stream << penalty(-Infinity)
|
102
|
+
|
103
|
+
stream
|
104
|
+
end
|
105
|
+
|
106
|
+
protected
|
107
|
+
|
108
|
+
# Returns a series of tokens representing the given word. Hyphenates using
|
109
|
+
# the given +hyphenator+, if provided. Appends a zero-width flagged penalty
|
110
|
+
# if the given word ends in a hyphen.
|
111
|
+
#
|
112
|
+
def add_word_segment(word, hyphenator)
|
113
|
+
tokens = []
|
114
|
+
|
115
|
+
if hyphenator
|
116
|
+
hyphen_width = @pdf.width_of('-')
|
117
|
+
|
118
|
+
splits = hyphenator.hyphenate(word)
|
119
|
+
# For each hyphenated segment, add the box with an optional penalty.
|
120
|
+
[0, *splits].each_cons(2) do |a, b|
|
121
|
+
seg = word[a...b]
|
122
|
+
tokens << box(@pdf.width_of(seg), seg)
|
123
|
+
tokens << penalty(50, @pdf.width_of('-'), true)
|
124
|
+
end
|
125
|
+
|
126
|
+
last = word[(splits.last || 0)..-1]
|
127
|
+
tokens << box(@pdf.width_of(last), last)
|
128
|
+
else
|
129
|
+
tokens << box(@pdf.width_of(word), word)
|
130
|
+
end
|
131
|
+
|
132
|
+
tokens << penalty(50, 0, true) if word =~ /-$/
|
133
|
+
tokens
|
134
|
+
end
|
135
|
+
|
136
|
+
end
|
137
|
+
|
138
|
+
end
|
139
|
+
|
@@ -0,0 +1,48 @@
|
|
1
|
+
|
2
|
+
module Crawdad
|
3
|
+
|
4
|
+
module Tokens
|
5
|
+
|
6
|
+
def token_type(token)
|
7
|
+
token[0]
|
8
|
+
end
|
9
|
+
|
10
|
+
def box(width, content)
|
11
|
+
[:box, width, content]
|
12
|
+
end
|
13
|
+
|
14
|
+
def box_content(token)
|
15
|
+
token[2]
|
16
|
+
end
|
17
|
+
|
18
|
+
def glue(width, stretch, shrink)
|
19
|
+
[:glue, width, stretch, shrink]
|
20
|
+
end
|
21
|
+
|
22
|
+
def glue_stretch(token)
|
23
|
+
token[2]
|
24
|
+
end
|
25
|
+
|
26
|
+
def glue_shrink(token)
|
27
|
+
token[3]
|
28
|
+
end
|
29
|
+
|
30
|
+
def penalty(penalty, width=0, flagged=false)
|
31
|
+
[:penalty, width, penalty, flagged]
|
32
|
+
end
|
33
|
+
|
34
|
+
def penalty_penalty(token)
|
35
|
+
token[2]
|
36
|
+
end
|
37
|
+
|
38
|
+
def penalty_flagged?(token)
|
39
|
+
token[3]
|
40
|
+
end
|
41
|
+
|
42
|
+
def token_width(token)
|
43
|
+
token[1]
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
metadata
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: crawdad
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Brad Ediger
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2010-03-19 00:00:00 -05:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: Crawdad is an implementation of Knuth-Plass linebreaking (justification) for Ruby.
|
17
|
+
email: brad.ediger@madriska.com
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions:
|
21
|
+
- Rakefile
|
22
|
+
extra_rdoc_files: []
|
23
|
+
|
24
|
+
files:
|
25
|
+
- lib/crawdad
|
26
|
+
- lib/crawdad/prawn_tokenizer.rb
|
27
|
+
- lib/crawdad/breakpoint.rb
|
28
|
+
- lib/crawdad/native.rb
|
29
|
+
- lib/crawdad/ffi.rb
|
30
|
+
- lib/crawdad/paragraph.rb
|
31
|
+
- lib/crawdad/compatibility.rb
|
32
|
+
- lib/crawdad/tokens.rb
|
33
|
+
- lib/crawdad/ffi
|
34
|
+
- lib/crawdad/ffi/paragraph.rb
|
35
|
+
- lib/crawdad/ffi/tokens.rb
|
36
|
+
- lib/crawdad/ffi/breakpoint_node.rb
|
37
|
+
- lib/crawdad.rb
|
38
|
+
- ext/crawdad/breakpoint.h
|
39
|
+
- ext/crawdad/tokens.c
|
40
|
+
- ext/crawdad/paragraph.h
|
41
|
+
- ext/crawdad/Makefile
|
42
|
+
- ext/crawdad/tokens.h
|
43
|
+
- ext/crawdad/paragraph.c
|
44
|
+
- Rakefile
|
45
|
+
has_rdoc: true
|
46
|
+
homepage: http://github.com/madriska/crawdad
|
47
|
+
post_install_message:
|
48
|
+
rdoc_options:
|
49
|
+
- --title
|
50
|
+
- Crawdad Documentation
|
51
|
+
- -q
|
52
|
+
require_paths:
|
53
|
+
- lib
|
54
|
+
- ext
|
55
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
56
|
+
requirements:
|
57
|
+
- - ">="
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: "0"
|
60
|
+
version:
|
61
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
62
|
+
requirements:
|
63
|
+
- - ">="
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: "0"
|
66
|
+
version:
|
67
|
+
requirements: []
|
68
|
+
|
69
|
+
rubyforge_project:
|
70
|
+
rubygems_version: 1.3.1
|
71
|
+
signing_key:
|
72
|
+
specification_version: 2
|
73
|
+
summary: Knuth-Plass linebreaking for Ruby
|
74
|
+
test_files: []
|
75
|
+
|