srdperu-prawn-format 0.1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,240 @@
1
+ # encoding: utf-8
2
+
3
+ require 'strscan'
4
+
5
+ module Prawn
6
+ module Format
7
+
8
+ # The Lexer class is used by the formatting subsystem to scan a string
9
+ # and extract tokens from it. The tokens it looks for are either text,
10
+ # XML entities, or XML tags.
11
+ #
12
+ # Note that the lexer only scans for a subset of XML--it is not a true
13
+ # XML scanner, and understands just enough to provide a basic markup
14
+ # language for use in formatting documents.
15
+ #
16
+ # The subset includes only XML entities and tags--instructions, comments,
17
+ # and the like are not supported.
18
+ class Lexer
19
+ # When the scanner encounters a state or entity it is not able to
20
+ # handle, this exception will be raised.
21
+ class InvalidFormat < RuntimeError; end
22
+
23
+ # Controls whether whitespace is lexed verbatim or not. If not,
24
+ # adjacent whitespace is compressed into a single space character
25
+ # (this includes newlines).
26
+ attr_accessor :verbatim
27
+
28
+ # Create a new lexer that will scan the given text. The text must be
29
+ # UTF-8 encoded, and must consist of well-formed XML in the subset
30
+ # understand by the lexer.
31
+ def initialize(text)
32
+ @scanner = StringScanner.new(text)
33
+ @state = :start
34
+ @verbatim = false
35
+ end
36
+
37
+ # Returns the next token from the scanner. If the end of the string
38
+ # has been reached, this will return nil. Otherwise, the token itself
39
+ # is returned as a hash. The hash will always include a :type key,
40
+ # identifying the type of the token. It will be one of :text, :open,
41
+ # or :close.
42
+ #
43
+ # For :text tokens, the hash will also contain a :text key, which will
44
+ # point to an array of strings. Each element of the array contains
45
+ # either word, whitespace, or some other character at which the line
46
+ # may be broken.
47
+ #
48
+ # For :open tokens, the hash will contain a :tag key which identifies
49
+ # the name of the tag (as a symbol), and an :options key, which
50
+ # is another hash that contains the options that were given with the
51
+ # tag.
52
+ #
53
+ # For :close tokens, the hash will contain only a :tag key.
54
+ def next
55
+ if @state == :start && @scanner.eos?
56
+ return nil
57
+ else
58
+ scan_next_token
59
+ end
60
+ end
61
+
62
+ # Iterates over each token in the string, until the end of the string
63
+ # is reached. Each token is yielded. See #next for a discussion of the
64
+ # available token types.
65
+ def each
66
+ while (token = next_token)
67
+ yield token
68
+ end
69
+ end
70
+
71
+ private
72
+
73
+ def scan_next_token
74
+ case @state
75
+ when :start then scan_start_state
76
+ when :self_close then scan_self_close_state
77
+ end
78
+ end
79
+
80
+ if RUBY_VERSION >= "1.9.0"
81
+ def scan_other_text
82
+ @scanner.scan(/[^-\xE2\x80\x94\s<&]+/)
83
+ end
84
+ else
85
+ def scan_other_text
86
+ return nil if @scanner.eos?
87
+
88
+ result = @scanner.scan_until(/[-\s<&]|\xE2\x80\x94/)
89
+ if result
90
+ @scanner.pos -= @scanner.matched.length
91
+ return nil if result == "<" || result == "&"
92
+ return result[0,result.length - @scanner.matched.length]
93
+ else
94
+ result = @scanner.rest
95
+ @scanner.terminate
96
+ return result
97
+ end
98
+ end
99
+ end
100
+
101
+ def scan_text_chunk
102
+ @scanner.scan(/-/) || # hyphen
103
+ @scanner.scan(/\xe2\x80\x94/) || # mdash
104
+ scan_other_text
105
+ end
106
+
107
+ def scan_verbatim_text_chunk
108
+ @scanner.scan(/\r\n|\r|\n/) || # newline
109
+ @scanner.scan(/\t/) || # tab
110
+ @scanner.scan(/ +/) || # spaces
111
+ scan_text_chunk
112
+ end
113
+
114
+ def scan_nonverbatim_text_chunk
115
+ (@scanner.scan(/\s+/) && " ") || # whitespace
116
+ scan_text_chunk
117
+ end
118
+
119
+ def scan_next_text_chunk
120
+ if @verbatim
121
+ scan_verbatim_text_chunk
122
+ else
123
+ scan_nonverbatim_text_chunk
124
+ end
125
+ end
126
+
127
+ def scan_start_state
128
+ if @scanner.scan(/</)
129
+ if @scanner.scan(%r(/))
130
+ scan_end_tag
131
+ else
132
+ scan_open_tag
133
+ end
134
+ elsif @scanner.scan(/&/)
135
+ scan_entity
136
+ else
137
+ pieces = []
138
+ loop do
139
+ chunk = scan_next_text_chunk or break
140
+ pieces << chunk
141
+ end
142
+ { :type => :text, :text => pieces }
143
+ end
144
+ end
145
+
146
+ ENTITY_MAP = {
147
+ "lt" => "<",
148
+ "gt" => ">",
149
+ "amp" => "&",
150
+ "mdash" => "\xE2\x80\x94",
151
+ "ndash" => "\xE2\x80\x93",
152
+ "nbsp" => "\xC2\xA0",
153
+ "bull" => "\342\200\242",
154
+ "quot" => '"',
155
+ "trade" => '™',
156
+ "iexcl" => '¡',
157
+ "cent" => '¢',
158
+ "pound" => '£',
159
+ "curren" => '¤',
160
+ "copy" => '©',
161
+ "aacute" => 'á',
162
+ "eacute" => 'é',
163
+ "iacute" => 'í',
164
+ "oacute" => 'ó',
165
+ "uacute" => 'ú',
166
+ "Aacute" => 'Á',
167
+ "Eacute" => 'É',
168
+ "Iacute" => 'Í',
169
+ "Oacute" => 'Ó',
170
+ "Uacute" => 'Ú',
171
+ "ntilde" => 'ñ',
172
+ "Ntilde" => 'Ñ',
173
+ }
174
+
175
+ def scan_entity
176
+ entity = @scanner.scan(/(?:#x?)?\w+/) or error("bad format for entity")
177
+ @scanner.scan(/;/) or error("missing semicolon to terminate entity")
178
+
179
+ text = case entity
180
+ when /#(\d+)/ then [$1.to_i].pack("U*")
181
+ when /#x([0-9a-f]+)/ then [$1.to_i(16)].pack("U*")
182
+ else
183
+ result = ENTITY_MAP[entity] or error("unrecognized entity #{entity.inspect}")
184
+ result.dup
185
+ end
186
+
187
+ { :type => :text, :text => [text] }
188
+ end
189
+
190
+ def scan_open_tag
191
+ tag = @scanner.scan(/\w+/) or error("'<' without valid tag")
192
+ tag = tag.downcase.to_sym
193
+
194
+ options = {}
195
+ @scanner.skip(/\s*/)
196
+ while !@scanner.eos? && @scanner.peek(1) =~ /\w/
197
+ name = @scanner.scan(/\w+/)
198
+ @scanner.scan(/\s*=\s*/) or error("expected assigment after option #{name}")
199
+ if (delim = @scanner.scan(/['"]/))
200
+ value = @scanner.scan(/[^#{delim}]*/)
201
+ @scanner.scan(/#{delim}/) or error("expected option value to end with #{delim}")
202
+ else
203
+ value = @scanner.scan(/[^\s>]*/)
204
+ end
205
+ options[name.downcase.to_sym] = value
206
+ @scanner.skip(/\s*/)
207
+ end
208
+
209
+ if @scanner.scan(%r(/))
210
+ @self_close = true
211
+ @tag = tag
212
+ @state = :self_close
213
+ else
214
+ @self_close = false
215
+ @state = :start
216
+ end
217
+
218
+ @scanner.scan(/>/) or error("unclosed tag #{tag.inspect}")
219
+
220
+ { :type => :open, :tag => tag, :options => options }
221
+ end
222
+
223
+ def scan_end_tag
224
+ tag = @scanner.scan(/\w+/).to_sym
225
+ @scanner.skip(/\s*/)
226
+ @scanner.scan(/>/) or error("unclosed ending tag #{tag.inspect}")
227
+ { :type => :close, :tag => tag }
228
+ end
229
+
230
+ def scan_self_close_state
231
+ @state = :start
232
+ { :type => :close, :tag => @tag }
233
+ end
234
+
235
+ def error(message)
236
+ raise InvalidFormat, "#{message} at #{@scanner.pos} -> #{@scanner.rest.inspect[0,50]}..."
237
+ end
238
+ end
239
+ end
240
+ end
@@ -0,0 +1,99 @@
1
+ # encoding: utf-8
2
+
3
+ module Prawn
4
+ module Format
5
+
6
+ class Line
7
+ attr_reader :source
8
+ attr_reader :instructions
9
+
10
+ def initialize(instructions, hard_break)
11
+ # need to remember the "source" instructions, because lines can
12
+ # pushed back onto the stack en masse when flowing into boxes,
13
+ # if a line is discovered to not fit. Thus, a line must preserve
14
+ # all instructions it was originally given.
15
+
16
+ @source = instructions
17
+ @hard_break = hard_break
18
+ end
19
+
20
+ def instructions
21
+ @instructions ||= begin
22
+ instructions = source.dup
23
+
24
+ # ignore discardable items at the end of lines
25
+ instructions.pop while instructions.any? && instructions.last.discardable?
26
+
27
+ consolidate(instructions)
28
+ end
29
+ end
30
+
31
+ def spaces
32
+ @spaces ||= begin
33
+ spaces = instructions.inject(0) { |sum, instruction| sum + instruction.spaces }
34
+ [1, spaces].max
35
+ end
36
+ end
37
+
38
+ def hard_break?
39
+ @hard_break
40
+ end
41
+
42
+ def width
43
+ instructions.inject(0) { |sum, instruction| sum + instruction.width }
44
+ end
45
+
46
+ # distance from top of line to baseline
47
+ def ascent
48
+ instructions.map { |instruction| instruction.ascent }.max || 0
49
+ end
50
+
51
+ # distance from bottom of line to baseline
52
+ def descent
53
+ instructions.map { |instruction| instruction.descent }.min || 0
54
+ end
55
+
56
+ def height(include_blank=false)
57
+ instructions.map { |instruction| instruction.height(include_blank) }.max
58
+ end
59
+
60
+ def draw_on(document, state, options={})
61
+ return if instructions.empty?
62
+
63
+ format_state = instructions.first.state
64
+
65
+ case(options[:align])
66
+ when :left
67
+ state[:dx] = 0
68
+ when :center
69
+ state[:dx] = (state[:width] - width) / 2.0
70
+ when :right
71
+ state[:dx] = state[:width] - width
72
+ when :justify
73
+ state[:dx] = 0
74
+ state[:padding] = hard_break? ? 0 : (state[:width] - width) / spaces
75
+ state[:text].word_space(state[:padding])
76
+ end
77
+
78
+ state[:dy] -= ascent
79
+
80
+ state[:text].move_to(state[:dx], state[:dy])
81
+ state[:line] = self
82
+
83
+ document.save_font do
84
+ instructions.each { |instruction| instruction.draw(document, state, options) }
85
+ state[:pending_effects].each { |effect| effect.wrap(document, state) }
86
+ end
87
+
88
+ state[:dy] -= (options[:spacing] || 0) + (height - ascent)
89
+ end
90
+
91
+ private
92
+
93
+ def consolidate(list)
94
+ list.inject([]) { |l,i| i.accumulate(l) }
95
+ end
96
+ end
97
+
98
+ end
99
+ end
@@ -0,0 +1,181 @@
1
+ # encoding: utf-8
2
+
3
+ require 'prawn/format/instructions/text'
4
+ require 'prawn/format/instructions/tag_open'
5
+ require 'prawn/format/instructions/tag_close'
6
+ require 'prawn/format/lexer'
7
+ require 'prawn/format/line'
8
+ require 'prawn/format/state'
9
+
10
+ module Prawn
11
+ module Format
12
+
13
+ # The Parser class is used by the formatting subsystem to take
14
+ # the raw tokens from the Lexer class and wrap them in
15
+ # "instructions", which are then used by the LayoutBuilder to
16
+ # determine how each token should be rendered.
17
+ #
18
+ # The parser also ensures that tags are opened and closed
19
+ # consistently. It is not forgiving at all--if you forget to
20
+ # close a tag, the parser will raise an exception (TagError).
21
+ #
22
+ # It will also raise an exception if a tag is encountered with
23
+ # no style definition for it.
24
+ class Parser
25
+ # This is the exception that gets raised when the parser cannot
26
+ # process a particular tag.
27
+ class TagError < RuntimeError; end
28
+
29
+ attr_reader :document
30
+ attr_reader :tags
31
+ attr_reader :state
32
+
33
+ # Creates a new parser associated with the given +document+, and which
34
+ # will parse the given +text+. The +options+ may include either of two
35
+ # optional keys:
36
+ #
37
+ # * :tags is used to specify the hash of tags and their associated
38
+ # styles. Any tag not specified here will not be recognized by the
39
+ # parser, and will cause an error if it is encountered in +text+.
40
+ # * :styles is used to specify the mapping of style classes to their
41
+ # definitions. The keys should be symbols, and the values should be
42
+ # hashes. The values have the same format as for the :tags map.
43
+ # * :style is the default style for any text not otherwise wrapped by
44
+ # tags.
45
+ #
46
+ # Example:
47
+ #
48
+ # parser = Parser.new(@pdf, "<b class='ruby'>hello</b>",
49
+ # :tags => { :b => { :font_weight => :bold } },
50
+ # :styles => { :ruby => { :color => "red" } },
51
+ # :style => { :font_family => "Times-Roman" })
52
+ #
53
+ # See Format::State for a description of the supported style options.
54
+ def initialize(document, text, options={})
55
+ @document = document
56
+ @lexer = Lexer.new(text)
57
+ @tags = options[:tags] || {}
58
+ @styles = options[:styles] || {}
59
+
60
+ @state = State.new(document, :style => options[:style])
61
+ @lexer.verbatim = (@state.white_space == :pre)
62
+
63
+ @action = :start
64
+
65
+ @saved = []
66
+ @tag_stack = []
67
+ end
68
+
69
+ def verbatim?
70
+ @lexer.verbatim
71
+ end
72
+
73
+ # Returns the next instruction from the stream. If there are no more
74
+ # instructions in the stream (e.g., the end has been encountered), this
75
+ # returns +nil+.
76
+ def next
77
+ return @saved.pop if @saved.any?
78
+
79
+ case @action
80
+ when :start then start_parse
81
+ when :text then text_parse
82
+ else raise "BUG: unknown parser action: #{@action.inspect}"
83
+ end
84
+ end
85
+
86
+ # "Ungets" the given +instruction+. This makes it so the next call to
87
+ # +next+ will return +instruction+. This is useful for backtracking.
88
+ def push(instruction)
89
+ @saved.push(instruction)
90
+ end
91
+
92
+ # This is identical to +next+, except it does not consume the
93
+ # instruction. This means that +peek+ returns the instruction that will
94
+ # be returned by the next call to +next+. It is useful for testing
95
+ # the next instruction in the stream without advancing the stream.
96
+ def peek
97
+ save = self.next
98
+ push(save) if save
99
+ return save
100
+ end
101
+
102
+ # Returns +true+ if the end of the stream has been reached. Subsequent
103
+ # calls to +peek+ or +next+ will return +nil+.
104
+ def eos?
105
+ peek.nil?
106
+ end
107
+
108
+ private
109
+
110
+ def start_parse
111
+ instruction = nil
112
+ while (@token = @lexer.next)
113
+ case @token[:type]
114
+ when :text
115
+ @position = 0
116
+ instruction = text_parse
117
+ when :open
118
+ instruction = process_open_tag
119
+ when :close
120
+ raise TagError, "closing #{@token[:tag]}, but no tags are open" if @tag_stack.empty?
121
+ raise TagError, "closing #{@tag_stack.last[:tag]} with #{@token[:tag]}" if @tag_stack.last[:tag] != @token[:tag]
122
+
123
+ instruction = Instructions::TagClose.new(@state, @tag_stack.pop)
124
+ @state = @state.previous
125
+ else
126
+ raise ArgumentError, "[BUG] unknown token type #{@token[:type].inspect} (#{@token.inspect})"
127
+ end
128
+
129
+ if instruction
130
+ if instruction.start_verbatim?
131
+ @lexer.verbatim = true
132
+ elsif instruction.end_verbatim?
133
+ @lexer.verbatim = false
134
+ end
135
+
136
+ return instruction
137
+ end
138
+ end
139
+
140
+ return nil
141
+ end
142
+
143
+ def text_parse
144
+ if @token[:text][@position]
145
+ @action = :text
146
+ @position += 1
147
+
148
+ text = @token[:text][@position - 1]
149
+ if @state.white_space == :pre && text =~ /(?:\r\n|\r|\n)/
150
+ Instructions::TagClose.new(@state, { :style => { :display => :break }, :options => {} })
151
+ else
152
+ Instructions::Text.new(@state, text)
153
+ end
154
+ else
155
+ @action = :start
156
+ start_parse
157
+ end
158
+ end
159
+
160
+ def process_open_tag
161
+ @tag_stack << @token
162
+ raise TagError, "undefined tag #{@token[:tag]}" unless @tags[@token[:tag]]
163
+ @token[:style] = @tags[@token[:tag]].dup
164
+
165
+ (@token[:options][:class] || "").split(/\s/).each do |name|
166
+ @token[:style].update(@styles[name.to_sym] || {})
167
+ end
168
+
169
+ if @token[:style][:meta]
170
+ @token[:style][:meta].each do |key, value|
171
+ @token[:style][value] = @token[:options][key]
172
+ end
173
+ end
174
+
175
+ @state = @state.with_style(@token[:style])
176
+ Instructions::TagOpen.new(@state, @token)
177
+ end
178
+ end
179
+
180
+ end
181
+ end