srdperu-prawn-format 0.1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/prawn/format/effects/link.rb +34 -0
- data/lib/prawn/format/effects/underline.rb +32 -0
- data/lib/prawn/format/instructions/base.rb +62 -0
- data/lib/prawn/format/instructions/tag_close.rb +52 -0
- data/lib/prawn/format/instructions/tag_open.rb +95 -0
- data/lib/prawn/format/instructions/text.rb +89 -0
- data/lib/prawn/format/layout_builder.rb +113 -0
- data/lib/prawn/format/lexer.rb +240 -0
- data/lib/prawn/format/line.rb +99 -0
- data/lib/prawn/format/parser.rb +181 -0
- data/lib/prawn/format/state.rb +189 -0
- data/lib/prawn/format/text_object.rb +107 -0
- data/lib/prawn/format/version.rb +5 -0
- data/lib/prawn/format.rb +229 -0
- metadata +90 -0
@@ -0,0 +1,240 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'strscan'
|
4
|
+
|
5
|
+
module Prawn
|
6
|
+
module Format
|
7
|
+
|
8
|
+
# The Lexer class is used by the formatting subsystem to scan a string
|
9
|
+
# and extract tokens from it. The tokens it looks for are either text,
|
10
|
+
# XML entities, or XML tags.
|
11
|
+
#
|
12
|
+
# Note that the lexer only scans for a subset of XML--it is not a true
|
13
|
+
# XML scanner, and understands just enough to provide a basic markup
|
14
|
+
# language for use in formatting documents.
|
15
|
+
#
|
16
|
+
# The subset includes only XML entities and tags--instructions, comments,
|
17
|
+
# and the like are not supported.
|
18
|
+
class Lexer
|
19
|
+
# When the scanner encounters a state or entity it is not able to
|
20
|
+
# handle, this exception will be raised.
|
21
|
+
class InvalidFormat < RuntimeError; end
|
22
|
+
|
23
|
+
# Controls whether whitespace is lexed verbatim or not. If not,
|
24
|
+
# adjacent whitespace is compressed into a single space character
|
25
|
+
# (this includes newlines).
|
26
|
+
attr_accessor :verbatim
|
27
|
+
|
28
|
+
# Create a new lexer that will scan the given text. The text must be
|
29
|
+
# UTF-8 encoded, and must consist of well-formed XML in the subset
|
30
|
+
# understand by the lexer.
|
31
|
+
def initialize(text)
|
32
|
+
@scanner = StringScanner.new(text)
|
33
|
+
@state = :start
|
34
|
+
@verbatim = false
|
35
|
+
end
|
36
|
+
|
37
|
+
# Returns the next token from the scanner. If the end of the string
|
38
|
+
# has been reached, this will return nil. Otherwise, the token itself
|
39
|
+
# is returned as a hash. The hash will always include a :type key,
|
40
|
+
# identifying the type of the token. It will be one of :text, :open,
|
41
|
+
# or :close.
|
42
|
+
#
|
43
|
+
# For :text tokens, the hash will also contain a :text key, which will
|
44
|
+
# point to an array of strings. Each element of the array contains
|
45
|
+
# either word, whitespace, or some other character at which the line
|
46
|
+
# may be broken.
|
47
|
+
#
|
48
|
+
# For :open tokens, the hash will contain a :tag key which identifies
|
49
|
+
# the name of the tag (as a symbol), and an :options key, which
|
50
|
+
# is another hash that contains the options that were given with the
|
51
|
+
# tag.
|
52
|
+
#
|
53
|
+
# For :close tokens, the hash will contain only a :tag key.
|
54
|
+
def next
|
55
|
+
if @state == :start && @scanner.eos?
|
56
|
+
return nil
|
57
|
+
else
|
58
|
+
scan_next_token
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# Iterates over each token in the string, until the end of the string
|
63
|
+
# is reached. Each token is yielded. See #next for a discussion of the
|
64
|
+
# available token types.
|
65
|
+
def each
|
66
|
+
while (token = next_token)
|
67
|
+
yield token
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
private
|
72
|
+
|
73
|
+
def scan_next_token
|
74
|
+
case @state
|
75
|
+
when :start then scan_start_state
|
76
|
+
when :self_close then scan_self_close_state
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
if RUBY_VERSION >= "1.9.0"
|
81
|
+
def scan_other_text
|
82
|
+
@scanner.scan(/[^-\xE2\x80\x94\s<&]+/)
|
83
|
+
end
|
84
|
+
else
|
85
|
+
def scan_other_text
|
86
|
+
return nil if @scanner.eos?
|
87
|
+
|
88
|
+
result = @scanner.scan_until(/[-\s<&]|\xE2\x80\x94/)
|
89
|
+
if result
|
90
|
+
@scanner.pos -= @scanner.matched.length
|
91
|
+
return nil if result == "<" || result == "&"
|
92
|
+
return result[0,result.length - @scanner.matched.length]
|
93
|
+
else
|
94
|
+
result = @scanner.rest
|
95
|
+
@scanner.terminate
|
96
|
+
return result
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def scan_text_chunk
|
102
|
+
@scanner.scan(/-/) || # hyphen
|
103
|
+
@scanner.scan(/\xe2\x80\x94/) || # mdash
|
104
|
+
scan_other_text
|
105
|
+
end
|
106
|
+
|
107
|
+
def scan_verbatim_text_chunk
|
108
|
+
@scanner.scan(/\r\n|\r|\n/) || # newline
|
109
|
+
@scanner.scan(/\t/) || # tab
|
110
|
+
@scanner.scan(/ +/) || # spaces
|
111
|
+
scan_text_chunk
|
112
|
+
end
|
113
|
+
|
114
|
+
def scan_nonverbatim_text_chunk
|
115
|
+
(@scanner.scan(/\s+/) && " ") || # whitespace
|
116
|
+
scan_text_chunk
|
117
|
+
end
|
118
|
+
|
119
|
+
def scan_next_text_chunk
|
120
|
+
if @verbatim
|
121
|
+
scan_verbatim_text_chunk
|
122
|
+
else
|
123
|
+
scan_nonverbatim_text_chunk
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def scan_start_state
|
128
|
+
if @scanner.scan(/</)
|
129
|
+
if @scanner.scan(%r(/))
|
130
|
+
scan_end_tag
|
131
|
+
else
|
132
|
+
scan_open_tag
|
133
|
+
end
|
134
|
+
elsif @scanner.scan(/&/)
|
135
|
+
scan_entity
|
136
|
+
else
|
137
|
+
pieces = []
|
138
|
+
loop do
|
139
|
+
chunk = scan_next_text_chunk or break
|
140
|
+
pieces << chunk
|
141
|
+
end
|
142
|
+
{ :type => :text, :text => pieces }
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
ENTITY_MAP = {
|
147
|
+
"lt" => "<",
|
148
|
+
"gt" => ">",
|
149
|
+
"amp" => "&",
|
150
|
+
"mdash" => "\xE2\x80\x94",
|
151
|
+
"ndash" => "\xE2\x80\x93",
|
152
|
+
"nbsp" => "\xC2\xA0",
|
153
|
+
"bull" => "\342\200\242",
|
154
|
+
"quot" => '"',
|
155
|
+
"trade" => '™',
|
156
|
+
"iexcl" => '¡',
|
157
|
+
"cent" => '¢',
|
158
|
+
"pound" => '£',
|
159
|
+
"curren" => '¤',
|
160
|
+
"copy" => '©',
|
161
|
+
"aacute" => 'á',
|
162
|
+
"eacute" => 'é',
|
163
|
+
"iacute" => 'í',
|
164
|
+
"oacute" => 'ó',
|
165
|
+
"uacute" => 'ú',
|
166
|
+
"Aacute" => 'Á',
|
167
|
+
"Eacute" => 'É',
|
168
|
+
"Iacute" => 'Í',
|
169
|
+
"Oacute" => 'Ó',
|
170
|
+
"Uacute" => 'Ú',
|
171
|
+
"ntilde" => 'ñ',
|
172
|
+
"Ntilde" => 'Ñ',
|
173
|
+
}
|
174
|
+
|
175
|
+
def scan_entity
|
176
|
+
entity = @scanner.scan(/(?:#x?)?\w+/) or error("bad format for entity")
|
177
|
+
@scanner.scan(/;/) or error("missing semicolon to terminate entity")
|
178
|
+
|
179
|
+
text = case entity
|
180
|
+
when /#(\d+)/ then [$1.to_i].pack("U*")
|
181
|
+
when /#x([0-9a-f]+)/ then [$1.to_i(16)].pack("U*")
|
182
|
+
else
|
183
|
+
result = ENTITY_MAP[entity] or error("unrecognized entity #{entity.inspect}")
|
184
|
+
result.dup
|
185
|
+
end
|
186
|
+
|
187
|
+
{ :type => :text, :text => [text] }
|
188
|
+
end
|
189
|
+
|
190
|
+
def scan_open_tag
|
191
|
+
tag = @scanner.scan(/\w+/) or error("'<' without valid tag")
|
192
|
+
tag = tag.downcase.to_sym
|
193
|
+
|
194
|
+
options = {}
|
195
|
+
@scanner.skip(/\s*/)
|
196
|
+
while !@scanner.eos? && @scanner.peek(1) =~ /\w/
|
197
|
+
name = @scanner.scan(/\w+/)
|
198
|
+
@scanner.scan(/\s*=\s*/) or error("expected assigment after option #{name}")
|
199
|
+
if (delim = @scanner.scan(/['"]/))
|
200
|
+
value = @scanner.scan(/[^#{delim}]*/)
|
201
|
+
@scanner.scan(/#{delim}/) or error("expected option value to end with #{delim}")
|
202
|
+
else
|
203
|
+
value = @scanner.scan(/[^\s>]*/)
|
204
|
+
end
|
205
|
+
options[name.downcase.to_sym] = value
|
206
|
+
@scanner.skip(/\s*/)
|
207
|
+
end
|
208
|
+
|
209
|
+
if @scanner.scan(%r(/))
|
210
|
+
@self_close = true
|
211
|
+
@tag = tag
|
212
|
+
@state = :self_close
|
213
|
+
else
|
214
|
+
@self_close = false
|
215
|
+
@state = :start
|
216
|
+
end
|
217
|
+
|
218
|
+
@scanner.scan(/>/) or error("unclosed tag #{tag.inspect}")
|
219
|
+
|
220
|
+
{ :type => :open, :tag => tag, :options => options }
|
221
|
+
end
|
222
|
+
|
223
|
+
def scan_end_tag
|
224
|
+
tag = @scanner.scan(/\w+/).to_sym
|
225
|
+
@scanner.skip(/\s*/)
|
226
|
+
@scanner.scan(/>/) or error("unclosed ending tag #{tag.inspect}")
|
227
|
+
{ :type => :close, :tag => tag }
|
228
|
+
end
|
229
|
+
|
230
|
+
def scan_self_close_state
|
231
|
+
@state = :start
|
232
|
+
{ :type => :close, :tag => @tag }
|
233
|
+
end
|
234
|
+
|
235
|
+
def error(message)
|
236
|
+
raise InvalidFormat, "#{message} at #{@scanner.pos} -> #{@scanner.rest.inspect[0,50]}..."
|
237
|
+
end
|
238
|
+
end
|
239
|
+
end
|
240
|
+
end
|
@@ -0,0 +1,99 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Prawn
|
4
|
+
module Format
|
5
|
+
|
6
|
+
class Line
|
7
|
+
attr_reader :source
|
8
|
+
attr_reader :instructions
|
9
|
+
|
10
|
+
def initialize(instructions, hard_break)
|
11
|
+
# need to remember the "source" instructions, because lines can
|
12
|
+
# pushed back onto the stack en masse when flowing into boxes,
|
13
|
+
# if a line is discovered to not fit. Thus, a line must preserve
|
14
|
+
# all instructions it was originally given.
|
15
|
+
|
16
|
+
@source = instructions
|
17
|
+
@hard_break = hard_break
|
18
|
+
end
|
19
|
+
|
20
|
+
def instructions
|
21
|
+
@instructions ||= begin
|
22
|
+
instructions = source.dup
|
23
|
+
|
24
|
+
# ignore discardable items at the end of lines
|
25
|
+
instructions.pop while instructions.any? && instructions.last.discardable?
|
26
|
+
|
27
|
+
consolidate(instructions)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def spaces
|
32
|
+
@spaces ||= begin
|
33
|
+
spaces = instructions.inject(0) { |sum, instruction| sum + instruction.spaces }
|
34
|
+
[1, spaces].max
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def hard_break?
|
39
|
+
@hard_break
|
40
|
+
end
|
41
|
+
|
42
|
+
def width
|
43
|
+
instructions.inject(0) { |sum, instruction| sum + instruction.width }
|
44
|
+
end
|
45
|
+
|
46
|
+
# distance from top of line to baseline
|
47
|
+
def ascent
|
48
|
+
instructions.map { |instruction| instruction.ascent }.max || 0
|
49
|
+
end
|
50
|
+
|
51
|
+
# distance from bottom of line to baseline
|
52
|
+
def descent
|
53
|
+
instructions.map { |instruction| instruction.descent }.min || 0
|
54
|
+
end
|
55
|
+
|
56
|
+
def height(include_blank=false)
|
57
|
+
instructions.map { |instruction| instruction.height(include_blank) }.max
|
58
|
+
end
|
59
|
+
|
60
|
+
def draw_on(document, state, options={})
|
61
|
+
return if instructions.empty?
|
62
|
+
|
63
|
+
format_state = instructions.first.state
|
64
|
+
|
65
|
+
case(options[:align])
|
66
|
+
when :left
|
67
|
+
state[:dx] = 0
|
68
|
+
when :center
|
69
|
+
state[:dx] = (state[:width] - width) / 2.0
|
70
|
+
when :right
|
71
|
+
state[:dx] = state[:width] - width
|
72
|
+
when :justify
|
73
|
+
state[:dx] = 0
|
74
|
+
state[:padding] = hard_break? ? 0 : (state[:width] - width) / spaces
|
75
|
+
state[:text].word_space(state[:padding])
|
76
|
+
end
|
77
|
+
|
78
|
+
state[:dy] -= ascent
|
79
|
+
|
80
|
+
state[:text].move_to(state[:dx], state[:dy])
|
81
|
+
state[:line] = self
|
82
|
+
|
83
|
+
document.save_font do
|
84
|
+
instructions.each { |instruction| instruction.draw(document, state, options) }
|
85
|
+
state[:pending_effects].each { |effect| effect.wrap(document, state) }
|
86
|
+
end
|
87
|
+
|
88
|
+
state[:dy] -= (options[:spacing] || 0) + (height - ascent)
|
89
|
+
end
|
90
|
+
|
91
|
+
private
|
92
|
+
|
93
|
+
def consolidate(list)
|
94
|
+
list.inject([]) { |l,i| i.accumulate(l) }
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,181 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'prawn/format/instructions/text'
|
4
|
+
require 'prawn/format/instructions/tag_open'
|
5
|
+
require 'prawn/format/instructions/tag_close'
|
6
|
+
require 'prawn/format/lexer'
|
7
|
+
require 'prawn/format/line'
|
8
|
+
require 'prawn/format/state'
|
9
|
+
|
10
|
+
module Prawn
|
11
|
+
module Format
|
12
|
+
|
13
|
+
# The Parser class is used by the formatting subsystem to take
|
14
|
+
# the raw tokens from the Lexer class and wrap them in
|
15
|
+
# "instructions", which are then used by the LayoutBuilder to
|
16
|
+
# determine how each token should be rendered.
|
17
|
+
#
|
18
|
+
# The parser also ensures that tags are opened and closed
|
19
|
+
# consistently. It is not forgiving at all--if you forget to
|
20
|
+
# close a tag, the parser will raise an exception (TagError).
|
21
|
+
#
|
22
|
+
# It will also raise an exception if a tag is encountered with
|
23
|
+
# no style definition for it.
|
24
|
+
class Parser
|
25
|
+
# This is the exception that gets raised when the parser cannot
|
26
|
+
# process a particular tag.
|
27
|
+
class TagError < RuntimeError; end
|
28
|
+
|
29
|
+
attr_reader :document
|
30
|
+
attr_reader :tags
|
31
|
+
attr_reader :state
|
32
|
+
|
33
|
+
# Creates a new parser associated with the given +document+, and which
|
34
|
+
# will parse the given +text+. The +options+ may include either of two
|
35
|
+
# optional keys:
|
36
|
+
#
|
37
|
+
# * :tags is used to specify the hash of tags and their associated
|
38
|
+
# styles. Any tag not specified here will not be recognized by the
|
39
|
+
# parser, and will cause an error if it is encountered in +text+.
|
40
|
+
# * :styles is used to specify the mapping of style classes to their
|
41
|
+
# definitions. The keys should be symbols, and the values should be
|
42
|
+
# hashes. The values have the same format as for the :tags map.
|
43
|
+
# * :style is the default style for any text not otherwise wrapped by
|
44
|
+
# tags.
|
45
|
+
#
|
46
|
+
# Example:
|
47
|
+
#
|
48
|
+
# parser = Parser.new(@pdf, "<b class='ruby'>hello</b>",
|
49
|
+
# :tags => { :b => { :font_weight => :bold } },
|
50
|
+
# :styles => { :ruby => { :color => "red" } },
|
51
|
+
# :style => { :font_family => "Times-Roman" })
|
52
|
+
#
|
53
|
+
# See Format::State for a description of the supported style options.
|
54
|
+
def initialize(document, text, options={})
|
55
|
+
@document = document
|
56
|
+
@lexer = Lexer.new(text)
|
57
|
+
@tags = options[:tags] || {}
|
58
|
+
@styles = options[:styles] || {}
|
59
|
+
|
60
|
+
@state = State.new(document, :style => options[:style])
|
61
|
+
@lexer.verbatim = (@state.white_space == :pre)
|
62
|
+
|
63
|
+
@action = :start
|
64
|
+
|
65
|
+
@saved = []
|
66
|
+
@tag_stack = []
|
67
|
+
end
|
68
|
+
|
69
|
+
def verbatim?
|
70
|
+
@lexer.verbatim
|
71
|
+
end
|
72
|
+
|
73
|
+
# Returns the next instruction from the stream. If there are no more
|
74
|
+
# instructions in the stream (e.g., the end has been encountered), this
|
75
|
+
# returns +nil+.
|
76
|
+
def next
|
77
|
+
return @saved.pop if @saved.any?
|
78
|
+
|
79
|
+
case @action
|
80
|
+
when :start then start_parse
|
81
|
+
when :text then text_parse
|
82
|
+
else raise "BUG: unknown parser action: #{@action.inspect}"
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
# "Ungets" the given +instruction+. This makes it so the next call to
|
87
|
+
# +next+ will return +instruction+. This is useful for backtracking.
|
88
|
+
def push(instruction)
|
89
|
+
@saved.push(instruction)
|
90
|
+
end
|
91
|
+
|
92
|
+
# This is identical to +next+, except it does not consume the
|
93
|
+
# instruction. This means that +peek+ returns the instruction that will
|
94
|
+
# be returned by the next call to +next+. It is useful for testing
|
95
|
+
# the next instruction in the stream without advancing the stream.
|
96
|
+
def peek
|
97
|
+
save = self.next
|
98
|
+
push(save) if save
|
99
|
+
return save
|
100
|
+
end
|
101
|
+
|
102
|
+
# Returns +true+ if the end of the stream has been reached. Subsequent
|
103
|
+
# calls to +peek+ or +next+ will return +nil+.
|
104
|
+
def eos?
|
105
|
+
peek.nil?
|
106
|
+
end
|
107
|
+
|
108
|
+
private
|
109
|
+
|
110
|
+
def start_parse
|
111
|
+
instruction = nil
|
112
|
+
while (@token = @lexer.next)
|
113
|
+
case @token[:type]
|
114
|
+
when :text
|
115
|
+
@position = 0
|
116
|
+
instruction = text_parse
|
117
|
+
when :open
|
118
|
+
instruction = process_open_tag
|
119
|
+
when :close
|
120
|
+
raise TagError, "closing #{@token[:tag]}, but no tags are open" if @tag_stack.empty?
|
121
|
+
raise TagError, "closing #{@tag_stack.last[:tag]} with #{@token[:tag]}" if @tag_stack.last[:tag] != @token[:tag]
|
122
|
+
|
123
|
+
instruction = Instructions::TagClose.new(@state, @tag_stack.pop)
|
124
|
+
@state = @state.previous
|
125
|
+
else
|
126
|
+
raise ArgumentError, "[BUG] unknown token type #{@token[:type].inspect} (#{@token.inspect})"
|
127
|
+
end
|
128
|
+
|
129
|
+
if instruction
|
130
|
+
if instruction.start_verbatim?
|
131
|
+
@lexer.verbatim = true
|
132
|
+
elsif instruction.end_verbatim?
|
133
|
+
@lexer.verbatim = false
|
134
|
+
end
|
135
|
+
|
136
|
+
return instruction
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
return nil
|
141
|
+
end
|
142
|
+
|
143
|
+
def text_parse
|
144
|
+
if @token[:text][@position]
|
145
|
+
@action = :text
|
146
|
+
@position += 1
|
147
|
+
|
148
|
+
text = @token[:text][@position - 1]
|
149
|
+
if @state.white_space == :pre && text =~ /(?:\r\n|\r|\n)/
|
150
|
+
Instructions::TagClose.new(@state, { :style => { :display => :break }, :options => {} })
|
151
|
+
else
|
152
|
+
Instructions::Text.new(@state, text)
|
153
|
+
end
|
154
|
+
else
|
155
|
+
@action = :start
|
156
|
+
start_parse
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
def process_open_tag
|
161
|
+
@tag_stack << @token
|
162
|
+
raise TagError, "undefined tag #{@token[:tag]}" unless @tags[@token[:tag]]
|
163
|
+
@token[:style] = @tags[@token[:tag]].dup
|
164
|
+
|
165
|
+
(@token[:options][:class] || "").split(/\s/).each do |name|
|
166
|
+
@token[:style].update(@styles[name.to_sym] || {})
|
167
|
+
end
|
168
|
+
|
169
|
+
if @token[:style][:meta]
|
170
|
+
@token[:style][:meta].each do |key, value|
|
171
|
+
@token[:style][value] = @token[:options][key]
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
@state = @state.with_style(@token[:style])
|
176
|
+
Instructions::TagOpen.new(@state, @token)
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
end
|
181
|
+
end
|