asciidoctor-pdf 1.5.0.alpha.16 → 1.5.0.alpha.17
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.yardopts +12 -0
- data/CHANGELOG.adoc +66 -0
- data/LICENSE.adoc +1 -1
- data/README.adoc +221 -68
- data/asciidoctor-pdf.gemspec +41 -42
- data/bin/asciidoctor-pdf +3 -3
- data/data/fonts/mplus1p-regular-fallback.ttf +0 -0
- data/data/fonts/notoserif-bold-subset.ttf +0 -0
- data/data/fonts/notoserif-bold_italic-subset.ttf +0 -0
- data/data/fonts/notoserif-italic-subset.ttf +0 -0
- data/data/fonts/notoserif-regular-subset.ttf +0 -0
- data/data/themes/default-theme.yml +6 -3
- data/docs/theming-guide.adoc +162 -23
- data/lib/asciidoctor-pdf.rb +2 -1
- data/lib/asciidoctor-pdf/asciidoctor_ext.rb +1 -0
- data/lib/asciidoctor-pdf/asciidoctor_ext/logging_shim.rb +19 -0
- data/lib/asciidoctor-pdf/converter.rb +408 -186
- data/lib/asciidoctor-pdf/core_ext/array.rb +0 -6
- data/lib/asciidoctor-pdf/core_ext/numeric.rb +21 -12
- data/lib/asciidoctor-pdf/core_ext/ostruct.rb +3 -12
- data/lib/asciidoctor-pdf/core_ext/string.rb +1 -1
- data/lib/asciidoctor-pdf/formatted_text.rb +1 -0
- data/lib/asciidoctor-pdf/formatted_text/formatter.rb +8 -2
- data/lib/asciidoctor-pdf/formatted_text/inline_destination_marker.rb +1 -1
- data/lib/asciidoctor-pdf/formatted_text/inline_image_arranger.rb +18 -32
- data/lib/asciidoctor-pdf/formatted_text/inline_image_renderer.rb +3 -3
- data/lib/asciidoctor-pdf/formatted_text/inline_text_aligner.rb +20 -0
- data/lib/asciidoctor-pdf/formatted_text/parser.rb +124 -38
- data/lib/asciidoctor-pdf/formatted_text/parser.treetop +17 -10
- data/lib/asciidoctor-pdf/formatted_text/transform.rb +30 -20
- data/lib/asciidoctor-pdf/implicit_header_processor.rb +2 -2
- data/lib/asciidoctor-pdf/index_catalog.rb +25 -23
- data/lib/asciidoctor-pdf/measurements.rb +1 -1
- data/lib/asciidoctor-pdf/pdf-core_ext/pdf_object.rb +1 -1
- data/lib/asciidoctor-pdf/pdfmark.rb +13 -13
- data/lib/asciidoctor-pdf/prawn-svg_ext.rb +2 -2
- data/lib/asciidoctor-pdf/prawn-svg_ext/interface.rb +2 -2
- data/lib/asciidoctor-pdf/prawn-table_ext.rb +1 -0
- data/lib/asciidoctor-pdf/prawn-table_ext/cell.rb +60 -0
- data/lib/asciidoctor-pdf/prawn-table_ext/cell/text.rb +3 -3
- data/lib/asciidoctor-pdf/prawn_ext/coderay_encoder.rb +3 -3
- data/lib/asciidoctor-pdf/prawn_ext/extensions.rb +39 -14
- data/lib/asciidoctor-pdf/prawn_ext/formatted_text/fragment.rb +9 -10
- data/lib/asciidoctor-pdf/prawn_ext/images.rb +2 -2
- data/lib/asciidoctor-pdf/roman_numeral.rb +7 -7
- data/lib/asciidoctor-pdf/rouge_ext.rb +2 -2
- data/lib/asciidoctor-pdf/rouge_ext/formatters/prawn.rb +20 -9
- data/lib/asciidoctor-pdf/rouge_ext/themes/{pastie.rb → asciidoctor_pdf_default.rb} +5 -5
- data/lib/asciidoctor-pdf/rouge_ext/themes/bw.rb +38 -0
- data/lib/asciidoctor-pdf/sanitizer.rb +36 -23
- data/lib/asciidoctor-pdf/temporary_path.rb +1 -1
- data/lib/asciidoctor-pdf/theme_loader.rb +17 -14
- data/lib/asciidoctor-pdf/version.rb +3 -2
- data/lib/asciidoctor/pdf.rb +1 -0
- data/lib/asciidoctor/pdf/version.rb +1 -0
- metadata +113 -84
- data/Gemfile +0 -22
- data/Rakefile +0 -81
- data/lib/asciidoctor-pdf/rouge_ext/css_theme.rb +0 -15
@@ -2,17 +2,17 @@
|
|
2
2
|
#
|
3
3
|
# This file was copied from Prawn (manual/syntax_highlight.rb) and
|
4
4
|
# modified for use with Asciidoctor PDF.
|
5
|
-
#
|
5
|
+
#
|
6
6
|
# Prawn is free software: you can redistribute it and/or modify
|
7
7
|
# it under the terms of the GNU General Public License as published by
|
8
8
|
# the Free Software Foundation, either version 3 of the License, or
|
9
9
|
# (at your option) any later version.
|
10
|
-
#
|
10
|
+
#
|
11
11
|
# Prawn is distributed in the hope that it will be useful,
|
12
12
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
13
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
14
|
# GNU General Public License for more details.
|
15
|
-
#
|
15
|
+
#
|
16
16
|
# You should have received a copy of the GNU General Public License
|
17
17
|
# along with Prawn. If not, see <http://www.gnu.org/licenses/>.
|
18
18
|
#
|
@@ -2,13 +2,16 @@ Prawn::Font::AFM.instance_variable_set :@hide_m17n_warning, true
|
|
2
2
|
|
3
3
|
require 'prawn/icon'
|
4
4
|
|
5
|
+
Prawn::Icon::Compatibility.send :prepend, (::Module.new { def warning *args; end })
|
6
|
+
|
5
7
|
module Asciidoctor
|
6
8
|
module Prawn
|
7
9
|
module Extensions
|
8
|
-
include ::Asciidoctor::
|
9
|
-
include ::Asciidoctor::
|
10
|
+
include ::Asciidoctor::PDF::Measurements
|
11
|
+
include ::Asciidoctor::PDF::Sanitizer
|
10
12
|
|
11
|
-
|
13
|
+
FontAwesomeIconSets = %w(fab far fas)
|
14
|
+
IconSets = %w(fab far fas fi pf).to_set
|
12
15
|
InitialPageContent = %(q\n)
|
13
16
|
|
14
17
|
# - :height is the height of a line
|
@@ -76,7 +79,7 @@ module Extensions
|
|
76
79
|
page.margins[:left]
|
77
80
|
end
|
78
81
|
# deprecated
|
79
|
-
alias
|
82
|
+
alias left_margin page_margin_left
|
80
83
|
|
81
84
|
# Returns the width of the right margin for the current page
|
82
85
|
#
|
@@ -84,7 +87,7 @@ module Extensions
|
|
84
87
|
page.margins[:right]
|
85
88
|
end
|
86
89
|
# deprecated
|
87
|
-
alias
|
90
|
+
alias right_margin page_margin_right
|
88
91
|
|
89
92
|
# Returns the width of the top margin for the current page
|
90
93
|
#
|
@@ -147,7 +150,7 @@ module Extensions
|
|
147
150
|
# ...or use more robust, low-level check (initial value of content is "q\n")
|
148
151
|
page_number > 0 && page.content.stream.filtered_stream == InitialPageContent
|
149
152
|
end
|
150
|
-
alias
|
153
|
+
alias page_is_empty? empty_page?
|
151
154
|
|
152
155
|
# Returns whether the current page is the last page in the document.
|
153
156
|
#
|
@@ -179,7 +182,7 @@ module Extensions
|
|
179
182
|
# bold: 'fonts/roboto-bold.ttf',
|
180
183
|
# bold_italic: 'fonts/roboto-bold_italic.ttf'
|
181
184
|
# }
|
182
|
-
#
|
185
|
+
#
|
183
186
|
def register_font data
|
184
187
|
font_families.update data.inject({}) {|accum, (key, val)| accum[key.to_s] = val; accum }
|
185
188
|
end
|
@@ -190,8 +193,11 @@ module Extensions
|
|
190
193
|
#
|
191
194
|
def font name = nil, options = {}
|
192
195
|
if name
|
193
|
-
::Prawn::Icon::FontData.load self, name if IconSets.include? name
|
194
196
|
options = { size: options } if ::Numeric === options
|
197
|
+
if IconSets.include? name
|
198
|
+
::Prawn::Icon::FontData.load self, name
|
199
|
+
options = options.reject {|k| k == :style } if options.key? :style
|
200
|
+
end
|
195
201
|
end
|
196
202
|
super name, options
|
197
203
|
end
|
@@ -202,7 +208,7 @@ module Extensions
|
|
202
208
|
font.options[:family]
|
203
209
|
end
|
204
210
|
|
205
|
-
alias
|
211
|
+
alias font_name font_family
|
206
212
|
|
207
213
|
# Retrieves the current font info (family, style, size) as a Hash
|
208
214
|
#
|
@@ -211,7 +217,7 @@ module Extensions
|
|
211
217
|
end
|
212
218
|
|
213
219
|
# Sets the font style for the scope of the block to which this method
|
214
|
-
# yields. If the style is nil and no block is given, return the current
|
220
|
+
# yields. If the style is nil and no block is given, return the current
|
215
221
|
# font style.
|
216
222
|
#
|
217
223
|
def font_style style = nil
|
@@ -303,6 +309,10 @@ module Extensions
|
|
303
309
|
end
|
304
310
|
end
|
305
311
|
|
312
|
+
def icon_font_data family
|
313
|
+
::Prawn::Icon::FontData.load self, family
|
314
|
+
end
|
315
|
+
|
306
316
|
def calc_line_metrics line_height = 1, font = self.font, font_size = self.font_size
|
307
317
|
line_height_length = line_height * font_size
|
308
318
|
leading = line_height_length - font_size
|
@@ -333,7 +343,7 @@ module Extensions
|
|
333
343
|
options = options.dup
|
334
344
|
if (format_option = options.delete :inline_format)
|
335
345
|
format_option = [] unless ::Array === format_option
|
336
|
-
fragments = self.text_formatter.format string, *format_option
|
346
|
+
fragments = self.text_formatter.format string, *format_option
|
337
347
|
else
|
338
348
|
fragments = [{text: string}]
|
339
349
|
end
|
@@ -494,6 +504,21 @@ module Extensions
|
|
494
504
|
#end
|
495
505
|
end
|
496
506
|
|
507
|
+
# TODO memoize the result
|
508
|
+
def inflate_padding padding
|
509
|
+
padding = [*(padding || 0)].slice 0, 4
|
510
|
+
case padding.size
|
511
|
+
when 1
|
512
|
+
[padding[0], padding[0], padding[0], padding[0]]
|
513
|
+
when 2
|
514
|
+
[padding[0], padding[1], padding[0], padding[1]]
|
515
|
+
when 3
|
516
|
+
[padding[0], padding[1], padding[2], padding[1]]
|
517
|
+
else
|
518
|
+
padding
|
519
|
+
end
|
520
|
+
end
|
521
|
+
|
497
522
|
# Stretch the current bounds to the left and right edges of the current page
|
498
523
|
# while yielding the specified block if the verdict argument is true.
|
499
524
|
# Otherwise, simply yield the specified block.
|
@@ -776,7 +801,7 @@ module Extensions
|
|
776
801
|
# TODO set scratch number on scratch document
|
777
802
|
scratch
|
778
803
|
else
|
779
|
-
warn '
|
804
|
+
logger.warn 'no scratch prototype available; instantiating fresh scratch document'
|
780
805
|
::Prawn::Document.new
|
781
806
|
end
|
782
807
|
end
|
@@ -786,7 +811,7 @@ module Extensions
|
|
786
811
|
rescue
|
787
812
|
false # NOTE this method may get called before the state is initialized
|
788
813
|
end
|
789
|
-
alias
|
814
|
+
alias is_scratch? scratch?
|
790
815
|
|
791
816
|
# TODO document me
|
792
817
|
def dry_run &block
|
@@ -827,7 +852,7 @@ module Extensions
|
|
827
852
|
else
|
828
853
|
started_new_page = false
|
829
854
|
end
|
830
|
-
|
855
|
+
|
831
856
|
# HACK yield doesn't work here on JRuby (at least not when called from AsciidoctorJ)
|
832
857
|
#yield remainder, started_new_page
|
833
858
|
instance_exec(total_height, started_new_page, &block)
|
@@ -3,7 +3,7 @@ module Prawn
|
|
3
3
|
module FormattedText
|
4
4
|
module Fragment
|
5
5
|
attr_reader :document
|
6
|
-
|
6
|
+
|
7
7
|
# Prevent fragment from being written by discarding the text.
|
8
8
|
def conceal
|
9
9
|
@text = ''
|
@@ -20,18 +20,17 @@ module Fragment
|
|
20
20
|
def descender= val
|
21
21
|
@descender = (format_state.key? :descender) ? format_state[:descender] : val
|
22
22
|
end
|
23
|
-
end
|
24
23
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
remove_method :descender=
|
32
|
-
include Fragment
|
24
|
+
def width
|
25
|
+
if (val = format_state[:width])
|
26
|
+
(val.end_with? 'em') ? val.to_f * @document.font_size : val
|
27
|
+
else
|
28
|
+
super
|
29
|
+
end
|
33
30
|
end
|
34
31
|
end
|
32
|
+
|
33
|
+
::Prawn::Text::Formatted::Fragment.prepend Fragment
|
35
34
|
end
|
36
35
|
end
|
37
36
|
end
|
@@ -12,7 +12,7 @@ module Images
|
|
12
12
|
# FIXME handle case when SVG is a File or IO object
|
13
13
|
if ::String === file && (file.downcase.end_with? '.svg')
|
14
14
|
opts[:fallback_font_name] ||= default_svg_font if respond_to? :default_svg_font
|
15
|
-
svg((::
|
15
|
+
svg((::File.read file), opts)
|
16
16
|
else
|
17
17
|
_initial_image file, opts
|
18
18
|
end
|
@@ -24,7 +24,7 @@ module Images
|
|
24
24
|
# intrinsic width and height values (in pixels)
|
25
25
|
def intrinsic_image_dimensions path
|
26
26
|
if path.end_with? '.svg'
|
27
|
-
img_obj = ::Prawn::
|
27
|
+
img_obj = ::Prawn::SVG::Interface.new ::File.read(path), self, {}
|
28
28
|
img_size = img_obj.document.sizing
|
29
29
|
{ width: img_size.output_width, height: img_size.output_height }
|
30
30
|
else
|
@@ -10,10 +10,10 @@
|
|
10
10
|
# distribute, sublicense, and/or sell copies of the Software, and to
|
11
11
|
# permit persons to whom the Software is furnished to do so, subject to
|
12
12
|
# the following conditions:
|
13
|
-
#
|
13
|
+
#
|
14
14
|
# The above copyright notice and this permission notice shall be
|
15
15
|
# included in all copies or substantial portions of the Software.
|
16
|
-
#
|
16
|
+
#
|
17
17
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
18
18
|
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
19
19
|
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
@@ -23,12 +23,12 @@
|
|
23
23
|
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
24
24
|
#
|
25
25
|
# Copyright (c) 2011 Andrew Vos
|
26
|
-
# Copyright (c) 2014
|
26
|
+
# Copyright (c) 2014 OpenDevise, Inc.
|
27
27
|
#
|
28
28
|
########################################################################
|
29
29
|
|
30
30
|
module Asciidoctor
|
31
|
-
module
|
31
|
+
module PDF
|
32
32
|
class RomanNumeral
|
33
33
|
BaseDigits = {
|
34
34
|
1 => 'I',
|
@@ -88,9 +88,9 @@ class RomanNumeral
|
|
88
88
|
|
89
89
|
def self.int_to_roman value
|
90
90
|
result = []
|
91
|
-
BaseDigits.keys.
|
91
|
+
BaseDigits.keys.reverse_each do |ival|
|
92
92
|
while value >= ival
|
93
|
-
value -= ival
|
93
|
+
value -= ival
|
94
94
|
result << BaseDigits[ival]
|
95
95
|
end
|
96
96
|
end
|
@@ -100,7 +100,7 @@ class RomanNumeral
|
|
100
100
|
def self.roman_to_int value
|
101
101
|
value = value.upcase
|
102
102
|
result = 0
|
103
|
-
BaseDigits.values.
|
103
|
+
BaseDigits.values.reverse_each do |rval|
|
104
104
|
while value.start_with? rval
|
105
105
|
offset = rval.length
|
106
106
|
value = value[offset..offset]
|
@@ -1,4 +1,4 @@
|
|
1
1
|
require 'rouge'
|
2
2
|
require_relative 'rouge_ext/formatters/prawn'
|
3
|
-
require_relative 'rouge_ext/
|
4
|
-
require_relative 'rouge_ext/themes/
|
3
|
+
require_relative 'rouge_ext/themes/asciidoctor_pdf_default'
|
4
|
+
require_relative 'rouge_ext/themes/bw' unless Rouge::Theme.find 'bw'
|
@@ -1,17 +1,23 @@
|
|
1
1
|
module Rouge
|
2
2
|
module Formatters
|
3
|
-
# Transforms a token stream into an array of
|
3
|
+
# Transforms a token stream into an array of
|
4
4
|
# formatted text fragments for use with Prawn.
|
5
5
|
class Prawn < Formatter
|
6
6
|
tag 'prawn'
|
7
7
|
|
8
8
|
Tokens = ::Rouge::Token::Tokens
|
9
|
+
LineOrientedTokens = [
|
10
|
+
::Rouge::Token::Tokens::Generic::Inserted,
|
11
|
+
::Rouge::Token::Tokens::Generic::Deleted,
|
12
|
+
::Rouge::Token::Tokens::Generic::Heading,
|
13
|
+
::Rouge::Token::Tokens::Generic::Subheading
|
14
|
+
]
|
9
15
|
|
10
|
-
LF =
|
11
|
-
NoBreakSpace =
|
12
|
-
InnerIndent = %(
|
13
|
-
GuardedIndent =
|
14
|
-
GuardedInnerIndent = %(
|
16
|
+
LF = ?\n
|
17
|
+
NoBreakSpace = ?\u00a0
|
18
|
+
InnerIndent = %(#{LF} )
|
19
|
+
GuardedIndent = NoBreakSpace
|
20
|
+
GuardedInnerIndent = %(#{LF}#{NoBreakSpace})
|
15
21
|
BoldStyle = [:bold].to_set
|
16
22
|
ItalicStyle = [:italic].to_set
|
17
23
|
BoldItalicStyle = [:bold, :italic].to_set
|
@@ -20,7 +26,7 @@ class Prawn < Formatter
|
|
20
26
|
def initialize opts = {}
|
21
27
|
unless ::Rouge::Theme === (theme = opts[:theme])
|
22
28
|
unless theme && (theme = ::Rouge::Theme.find theme)
|
23
|
-
theme = ::Rouge::Themes::
|
29
|
+
theme = ::Rouge::Themes::AsciidoctorPDFDefault
|
24
30
|
end
|
25
31
|
theme = theme.new
|
26
32
|
end
|
@@ -106,8 +112,13 @@ class Prawn < Formatter
|
|
106
112
|
if (bg = normalize_color style_rules.bg) && bg != @background_color
|
107
113
|
fragment[:background_color] = bg
|
108
114
|
fragment[:callback] = @background_colorizer
|
109
|
-
|
110
|
-
|
115
|
+
if LineOrientedTokens.include? tok
|
116
|
+
fragment[:inline_block] = true unless style_rules[:inline_block] == false
|
117
|
+
fragment[:extend] = true unless style_rules[:extend] == false
|
118
|
+
else
|
119
|
+
fragment[:inline_block] = true if style_rules[:inline_block]
|
120
|
+
fragment[:extend] = true if style_rules[:extend]
|
121
|
+
end
|
111
122
|
end
|
112
123
|
if (fg = normalize_color style_rules.fg)
|
113
124
|
fragment[:color] = fg
|
@@ -1,9 +1,9 @@
|
|
1
1
|
module Rouge
|
2
2
|
module Themes
|
3
|
-
# A
|
3
|
+
# A variation on the pastie style from Pygments, customized for Asciidoctor PDF
|
4
4
|
# See https://bitbucket.org/birkenfeld/pygments-main/src/default/pygments/styles/pastie.py
|
5
|
-
class
|
6
|
-
name '
|
5
|
+
class AsciidoctorPDFDefault < CSSTheme
|
6
|
+
name 'asciidoctor_pdf_default'
|
7
7
|
|
8
8
|
# Deviate from pastie here since our italic is actually a thinner font
|
9
9
|
style Comment, fg: '#888888' #, italic: true
|
@@ -17,8 +17,8 @@ module Rouge
|
|
17
17
|
style Generic::Heading, fg: '#333333'
|
18
18
|
style Generic::Subheading, fg: '#666666'
|
19
19
|
|
20
|
-
style Generic::Deleted, fg: '#000000', bg: '#ffdddd'
|
21
|
-
style Generic::Inserted, fg: '#000000', bg: '#ddffdd'
|
20
|
+
style Generic::Deleted, fg: '#000000', bg: '#ffdddd'
|
21
|
+
style Generic::Inserted, fg: '#000000', bg: '#ddffdd'
|
22
22
|
|
23
23
|
style Generic::Emph, italic: true
|
24
24
|
style Generic::Strong, bold: true
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Rouge
|
2
|
+
module Themes
|
3
|
+
# A port of the bw style from Pygments.
|
4
|
+
# See https://bitbucket.org/birkenfeld/pygments-main/src/default/pygments/styles/bw.py
|
5
|
+
class BlackWhiteTheme < CSSTheme
|
6
|
+
name 'bw'
|
7
|
+
|
8
|
+
style Text, fg: '#000000', bg: '#ffffff'
|
9
|
+
|
10
|
+
style Comment, italic: true
|
11
|
+
style Comment::Preproc, italic: false
|
12
|
+
|
13
|
+
style Keyword, bold: true
|
14
|
+
style Keyword::Pseudo, bold: false
|
15
|
+
style Keyword::Type, bold: false
|
16
|
+
|
17
|
+
style Operator, bold: true
|
18
|
+
|
19
|
+
style Name::Class, bold: true
|
20
|
+
style Name::Namespace, bold: true
|
21
|
+
style Name::Exception, bold: true
|
22
|
+
style Name::Entity, bold: true
|
23
|
+
style Name::Tag, bold: true
|
24
|
+
|
25
|
+
style Literal::String, italic: true
|
26
|
+
style Literal::String::Interpol, bold: true
|
27
|
+
style Literal::String::Escape, bold: true
|
28
|
+
|
29
|
+
style Generic::Heading, bold: true
|
30
|
+
style Generic::Subheading, bold: true
|
31
|
+
style Generic::Emph, italic: true
|
32
|
+
style Generic::Strong, bold: true
|
33
|
+
style Generic::Prompt, bold: true
|
34
|
+
|
35
|
+
style Error, fg: '#FF0000'
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -1,26 +1,36 @@
|
|
1
|
-
|
2
|
-
require 'unicode' unless defined? Unicode::VERSION
|
3
|
-
rescue LoadError
|
1
|
+
unless RUBY_VERSION >= '2.4'
|
4
2
|
begin
|
5
|
-
require '
|
6
|
-
rescue LoadError
|
3
|
+
require 'unicode' unless defined? Unicode::VERSION
|
4
|
+
rescue LoadError
|
5
|
+
begin
|
6
|
+
require 'active_support/multibyte' unless defined? ActiveSupport::Multibyte
|
7
|
+
rescue LoadError; end
|
8
|
+
end
|
7
9
|
end
|
8
10
|
|
9
11
|
module Asciidoctor
|
10
|
-
module
|
12
|
+
module PDF
|
11
13
|
module Sanitizer
|
12
|
-
|
13
|
-
'<' =>
|
14
|
-
'>' =>
|
15
|
-
'&' =>
|
14
|
+
XMLSpecialChars = {
|
15
|
+
'<' => ?<,
|
16
|
+
'>' => ?>,
|
17
|
+
'&' => ?&,
|
16
18
|
}
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
19
|
+
XMLSpecialCharsRx = /(?:#{XMLSpecialChars.keys * ?|})/
|
20
|
+
InverseXMLSpecialChars = XMLSpecialChars.invert
|
21
|
+
InverseXMLSpecialCharsRx = /[#{InverseXMLSpecialChars.keys.join}]/
|
22
|
+
(BuiltInNamedEntities = {
|
23
|
+
'amp' => ?&,
|
24
|
+
'apos' => ?',
|
25
|
+
'gt' => ?>,
|
26
|
+
'lt' => ?<,
|
27
|
+
'nbsp' => ' ',
|
28
|
+
'quot' => ?",
|
29
|
+
}).default = ??
|
30
|
+
SanitizeXMLRx = /<[^>]+>/
|
31
|
+
XMLMarkupRx = /&#?[a-z\d]+;|</
|
32
|
+
CharRefRx = /&(?:([a-z][a-z]+\d{0,2})|#(?:(\d\d\d{0,4})|x([a-f\d][a-f\d][a-f\d]{0,3})));/
|
33
|
+
SiftPCDATARx = /(&#?[a-z\d]+;|<[^>]+>)|([^&<]+)/
|
24
34
|
|
25
35
|
# Strip leading, trailing and repeating whitespace, remove XML tags and
|
26
36
|
# resolve all entities in the specified string.
|
@@ -29,19 +39,22 @@ module Sanitizer
|
|
29
39
|
# FIXME add option to control escaping entities, or a filter mechanism in general
|
30
40
|
def sanitize string
|
31
41
|
string.strip
|
32
|
-
.gsub(
|
42
|
+
.gsub(SanitizeXMLRx, '')
|
33
43
|
.tr_s(' ', ' ')
|
34
|
-
.gsub(
|
35
|
-
.gsub(BuiltInEntityCharRx, BuiltInEntityChars)
|
44
|
+
.gsub(CharRefRx) { $1 ? BuiltInNamedEntities[$1] : [$2 ? $2.to_i : ($3.to_i 16)].pack('U1') }
|
36
45
|
end
|
37
46
|
|
38
47
|
def escape_xml string
|
39
|
-
string.gsub
|
48
|
+
string.gsub InverseXMLSpecialCharsRx, InverseXMLSpecialChars
|
49
|
+
end
|
50
|
+
|
51
|
+
def encode_quotes string
|
52
|
+
(string.include? ?") ? (string.gsub ?", '"') : string
|
40
53
|
end
|
41
54
|
|
42
55
|
def uppercase_pcdata string
|
43
|
-
if
|
44
|
-
string.gsub(
|
56
|
+
if XMLMarkupRx.match? string
|
57
|
+
string.gsub(SiftPCDATARx) { $2 ? (uppercase_mb $2) : $1 }
|
45
58
|
else
|
46
59
|
uppercase_mb string
|
47
60
|
end
|