kramdown 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of kramdown might be problematic. Click here for more details.

Files changed (44) hide show
  1. data/ChangeLog +276 -0
  2. data/Rakefile +2 -2
  3. data/VERSION +1 -1
  4. data/benchmark/benchmark.rb +1 -1
  5. data/benchmark/historic-jruby-1.4.0.dat +7 -7
  6. data/benchmark/historic-ruby-1.8.6.dat +7 -7
  7. data/benchmark/historic-ruby-1.8.7.dat +7 -7
  8. data/benchmark/historic-ruby-1.9.1p243.dat +7 -7
  9. data/benchmark/historic-ruby-1.9.2dev.dat +7 -7
  10. data/bin/kramdown +46 -1
  11. data/doc/index.page +2 -2
  12. data/doc/syntax.page +7 -3
  13. data/lib/kramdown/converter.rb +6 -285
  14. data/lib/kramdown/converter/base.rb +75 -0
  15. data/lib/kramdown/converter/html.rb +325 -0
  16. data/lib/kramdown/converter/latex.rb +516 -0
  17. data/lib/kramdown/document.rb +36 -66
  18. data/lib/kramdown/options.rb +262 -0
  19. data/lib/kramdown/parser/kramdown.rb +36 -17
  20. data/lib/kramdown/parser/kramdown/attribute_list.rb +1 -1
  21. data/lib/kramdown/parser/kramdown/autolink.rb +1 -1
  22. data/lib/kramdown/parser/kramdown/codespan.rb +1 -1
  23. data/lib/kramdown/parser/kramdown/emphasis.rb +2 -2
  24. data/lib/kramdown/parser/kramdown/escaped_chars.rb +2 -2
  25. data/lib/kramdown/parser/kramdown/extension.rb +46 -2
  26. data/lib/kramdown/parser/kramdown/footnote.rb +1 -1
  27. data/lib/kramdown/parser/kramdown/html.rb +2 -2
  28. data/lib/kramdown/parser/kramdown/html_entity.rb +4 -5
  29. data/lib/kramdown/parser/kramdown/line_break.rb +1 -1
  30. data/lib/kramdown/parser/kramdown/link.rb +2 -2
  31. data/lib/kramdown/parser/kramdown/smart_quotes.rb +213 -0
  32. data/lib/kramdown/parser/kramdown/typographic_symbol.rb +1 -1
  33. data/lib/kramdown/version.rb +1 -1
  34. data/test/testcases/encoding.html +46 -0
  35. data/test/testcases/encoding.text +28 -0
  36. data/test/testcases/span/01_link/inline.html +1 -1
  37. data/test/testcases/span/01_link/reference.html +2 -2
  38. data/test/testcases/span/escaped_chars/normal.html +4 -0
  39. data/test/testcases/span/escaped_chars/normal.text +4 -0
  40. data/test/testcases/span/text_substitutions/entities.html +1 -1
  41. data/test/testcases/span/text_substitutions/typography.html +12 -0
  42. data/test/testcases/span/text_substitutions/typography.text +12 -0
  43. metadata +9 -3
  44. data/lib/kramdown/extension.rb +0 -98
@@ -26,10 +26,22 @@ require 'kramdown/version'
26
26
  require 'kramdown/error'
27
27
  require 'kramdown/parser'
28
28
  require 'kramdown/converter'
29
- require 'kramdown/extension'
29
+ require 'kramdown/options'
30
30
 
31
31
  module Kramdown
32
32
 
33
+ # Return the data directory for kramdown.
34
+ def self.data_dir
35
+ unless defined?(@@data_dir)
36
+ require 'rbconfig'
37
+ @@data_dir = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'data', 'kramdown'))
38
+ @@data_dir = File.expand_path(File.join(Config::CONFIG["datadir"], "kramdown")) if !File.exists?(@@data_dir)
39
+ raise "kramdown data directory not found! This is a bug, please report it!" unless File.directory?(@@data_dir)
40
+ end
41
+ @@data_dir
42
+ end
43
+
44
+
33
45
  # The main interface to kramdown.
34
46
  #
35
47
  # This class provides a one-stop-shop for using kramdown to convert text into various output
@@ -39,59 +51,12 @@ module Kramdown
39
51
  # doc = Kramdown::Document.new('This *is* some kramdown text')
40
52
  # puts doc.to_html
41
53
  #
42
- # The #to_html method is a shortcut for using the Converter::ToHtml class. If other converters are
43
- # added later, there may be additional shortcut methods.
54
+ # The #to_html method is a shortcut for using the Converter::Html class.
44
55
  #
45
- # The second argument to the #new method is an options hash for customizing the behaviour of
46
- # kramdown.
56
+ # The second argument to the #new method is an options hash for customizing the behaviour of the
57
+ # kramdown parser and the converters.
47
58
  class Document
48
59
 
49
- # Currently available options are:
50
- #
51
- # [:auto_ids (used by the parser)]
52
- # A boolean value deciding whether automatic header ID generation is used. Default: +false+.
53
- #
54
- # [:coderay (used by the HTML converter)]
55
- # A hash containing options for the CodeRay syntax highlighter. If this is set to +nil+,
56
- # syntax highlighting is disabled. When using the +options+ extension, any CodeRay option can
57
- # be set by prefixing it with +coderay_+.
58
- #
59
- # Default:
60
- # {:wrap => :div, :line_numbers => :inline, :line_number_start => 1,
61
- # :tab_width => 8, :bold_every => 10, :css => :style}
62
- #
63
- # [:filter_html (used by the HTML converter)]
64
- # An array of HTML tag names that defines which tags should be filtered from the output. For
65
- # example, if the value contains +iframe+, then all HTML +iframe+ tags are filtered out and
66
- # only the body is displayed. Default: empty array. When using the +options+ extension, the
67
- # string value needs to hold the HTML tag names separated by one or more spaces.
68
- #
69
- # [:footnote_nr (used by the HTML converter)]
70
- # The initial number used for creating the link to the first footnote. Default: +1+. When
71
- # using the +options+ extension, the string value needs to be a valid number.
72
- #
73
- # [:parse_block_html (used by the parser)]
74
- # A boolean value deciding whether kramdown syntax is processed in block HTML tags. Default:
75
- # +false+.
76
- #
77
- # [:parse_span_html (used by the parser)]
78
- # A boolean value deciding whether kramdown syntax is processed in span HTML tags. Default:
79
- # +true+.
80
- #
81
- # When using the +options+ extension, all boolean values can be set to false by using the
82
- # string 'false' or an empty string, any other non-empty string will be converted to the value
83
- # +true+.
84
- DEFAULT_OPTIONS={
85
- :footnote_nr => 1,
86
- :filter_html => [],
87
- :auto_ids => true,
88
- :parse_block_html => false,
89
- :parse_span_html => true,
90
- :coderay => {:wrap => :div, :line_numbers => :inline,
91
- :line_number_start => 1, :tab_width => 8, :bold_every => 10, :css => :style}
92
- }
93
-
94
-
95
60
  # The element tree of the document. It is immediately available after the #new method has been
96
61
  # called.
97
62
  attr_accessor :tree
@@ -101,33 +66,38 @@ module Kramdown
101
66
  attr_accessor :options
102
67
 
103
68
  # An array of warning messages. It is filled with warnings during the parsing phase (i.e. in
104
- # #new) and the converting phase.
69
+ # #new) and the conversion phase.
105
70
  attr_reader :warnings
106
71
 
107
72
  # Holds needed parse information like ALDs, link definitions and so on.
108
73
  attr_reader :parse_infos
109
74
 
110
- # Holds an instance of the extension class.
111
- attr_reader :extension
75
+ # Holds conversion information which is dependent on the used converter. A converter clears this
76
+ # variable before duing the conversion.
77
+ attr_reader :conversion_infos
112
78
 
113
79
 
114
- # Create a new Kramdown document from the string +source+ and use the provided +options+ (see
115
- # DEFAULT_OPTIONS for a list of available options). The +source+ is immediately parsed by the
116
- # kramdown parser sothat after this call the output can be generated.
117
- #
118
- # The parameter +ext+ can be used to set a custom extension class. Note that the default
119
- # kramdown extensions should be available in the custom extension class.
120
- def initialize(source, options = {}, ext = nil)
121
- @options = DEFAULT_OPTIONS.merge(options)
80
+ # Create a new Kramdown document from the string +source+ and use the provided +options+. The
81
+ # +source+ is immediately parsed by the kramdown parser sothat after this call the output can be
82
+ # generated.
83
+ def initialize(source, options = {})
84
+ @options = Options.merge(options)
122
85
  @warnings = []
123
86
  @parse_infos = {}
124
- @extension = extension || Kramdown::Extension.new
87
+ @conversion_infos = {}
125
88
  @tree = Parser::Kramdown.parse(source, self)
126
89
  end
127
90
 
128
- # Convert the document to HTML. Uses the Converter::ToHtml class for doing the conversion.
129
- def to_html
130
- Converter::Html.convert(self)
91
+ # Check if a method is invoked that begins with +to_+ and if so, try to instantiate a converter
92
+ # class and use it for converting the document.
93
+ #
94
+ # For example, +to_html+ would instantiate the Converter::Html class.
95
+ def method_missing(id, *attr, &block)
96
+ if id.to_s =~ /^to_(\w+)$/
97
+ Converter.const_get($1.capitalize).convert(self)
98
+ else
99
+ super
100
+ end
131
101
  end
132
102
 
133
103
  def inspect #:nodoc:
@@ -0,0 +1,262 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ #--
4
+ # Copyright (C) 2009 Thomas Leitner <t_leitner@gmx.at>
5
+ #
6
+ # This file is part of kramdown.
7
+ #
8
+ # kramdown is free software: you can redistribute it and/or modify
9
+ # it under the terms of the GNU General Public License as published by
10
+ # the Free Software Foundation, either version 3 of the License, or
11
+ # (at your option) any later version.
12
+ #
13
+ # This program is distributed in the hope that it will be useful,
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
+ # GNU General Public License for more details.
17
+ #
18
+ # You should have received a copy of the GNU General Public License
19
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
20
+ #++
21
+ #
22
+
23
+ module Kramdown
24
+
25
+ # This module defines all options that are used by parsers and/or converters.
26
+ module Options
27
+
28
+ # Helper class introducing a boolean type for specifying boolean values (+true+ and +false+) as
29
+ # option types.
30
+ class Boolean
31
+
32
+ # Return +true+ if +other+ is either +true+ or +false+
33
+ def self.===(other)
34
+ FalseClass === other || TrueClass === other
35
+ end
36
+
37
+ end
38
+
39
+ # ----------------------------
40
+ # :section: Option definitions
41
+ #
42
+ # This sections informs about the methods that can be used on the Options class.
43
+ # ----------------------------
44
+
45
+ # Contains the definition of an option.
46
+ Definition = Struct.new(:name, :type, :default, :desc)
47
+
48
+ # Allowed option types
49
+ ALLOWED_TYPES = [String, Integer, Float, Symbol, Boolean, Array, Object]
50
+
51
+ @options = {}
52
+
53
+ # Define a new option called +name+ (a Symbol) with the given +type+ (String, Integer, Float,
54
+ # Symbol, Boolean, Array, Object), default value +default+ and the description +desc+.
55
+ #
56
+ # The type 'Object' should only be used if none of the other types suffices because such an
57
+ # option will be opaque!
58
+ def self.define(name, type, default, desc)
59
+ raise ArgumentError, "Option name #{name} is already used" if @options.has_key?(name)
60
+ raise ArgumentError, "Invalid option type #{type} specified" if !ALLOWED_TYPES.include?(type)
61
+ raise ArgumentError, "Invalid type for default value" if !(type === default) && !default.nil?
62
+ @options[name] = Definition.new(name, type, default, desc)
63
+ end
64
+
65
+ # Return all option definitions.
66
+ def self.definitions
67
+ @options
68
+ end
69
+
70
+ # Return +true+ if an option +name+ is defined.
71
+ def self.defined?(name)
72
+ @options.has_key?(name)
73
+ end
74
+
75
+ # Return a Hash with the default values for all options.
76
+ def self.defaults
77
+ temp = {}
78
+ @options.each {|n, o| temp[o.name] = o.default}
79
+ temp
80
+ end
81
+
82
+ # Merge the #defaults Hash with the parsed options from the given Hash.
83
+ def self.merge(hash)
84
+ temp = defaults
85
+ hash.each do |k,v|
86
+ next unless @options.has_key?(k)
87
+ temp[k] = parse(k, v)
88
+ end
89
+ temp
90
+ end
91
+
92
+ # Parse the given value +data+ as if it was a value for the option +name+ and return the parsed
93
+ # value with the correct type.
94
+ #
95
+ # If +data+ already has the correct type, it is just returned. Otherwise it is converted to a
96
+ # String and then to the correct type.
97
+ def self.parse(name, data)
98
+ raise ArgumentError, "No option named #{name} defined" if !@options.has_key?(name)
99
+ return data if @options[name].type === data
100
+ data = data.to_s
101
+ if @options[name].type == String
102
+ data
103
+ elsif @options[name].type == Integer
104
+ Integer(data)
105
+ elsif @options[name].type == Float
106
+ Float(data)
107
+ elsif @options[name].type == Symbol
108
+ (data.strip.empty? ? nil : data.to_sym)
109
+ elsif @options[name].type == Boolean
110
+ data.downcase.strip != 'false' && !data.empty?
111
+ elsif @options[name].type == Array
112
+ data.split(/\s+/)
113
+ end
114
+ end
115
+
116
+ # ----------------------------
117
+ # :section: Option Definitions
118
+ #
119
+ # This sections contains all option definitions that are used by the included
120
+ # parsers/converters.
121
+ # ----------------------------
122
+
123
+ define(:template, String, '', <<EOF)
124
+ The name of an ERB template file that should be used to wrap the output
125
+
126
+ This is used to wrap the output in an environment so that the output can
127
+ be used as a stand-alone document. For example, an HTML template would
128
+ provide the needed header and body tags so that the whole output is a
129
+ valid HTML file. If no template is specified, the output will be just
130
+ the converted text.
131
+
132
+ When resolving the template file, the given template name is used first.
133
+ If such a file is not found, the converter extension is appended. If the
134
+ file still cannot be found, the templates name is interpreted as a
135
+ template name that is provided by kramdown (without the converter
136
+ extension).
137
+
138
+ kramdown provides a default template named 'default' for each converter.
139
+
140
+ Default: ''
141
+ Used by: all converters
142
+ EOF
143
+
144
+ define(:auto_ids, Boolean, true, <<EOF)
145
+ Use automatic header ID generation
146
+
147
+ If this option is `true`, ID values for all headers are automatically
148
+ generated if no ID is explicitly specified.
149
+
150
+ Default: true
151
+ Used by: kramdown parser
152
+ EOF
153
+
154
+ define(:parse_block_html, Boolean, false, <<EOF)
155
+ Process kramdown syntax in block HTML tags
156
+
157
+ If this option is `true`, the kramdown parser processes the content of
158
+ block HTML tags as text containing block level elements. Since this is
159
+ not wanted normally, the default is `false`. It is normally better to
160
+ selectively enable kramdown processing via the markdown attribute.
161
+
162
+ Default: false
163
+ Used by: kramdown parser
164
+ EOF
165
+
166
+ define(:parse_span_html, Boolean, true, <<EOF)
167
+ Process kramdown syntax in span HTML tags
168
+
169
+ If this option is `true`, the kramdown parser processes the content of
170
+ span HTML tags as text containing span level elements.
171
+
172
+ Default: true
173
+ Used by: kramdown parser
174
+ EOF
175
+
176
+ define(:extension, Object, nil, <<EOF)
177
+ An object for handling the extensions
178
+
179
+ The value for this option needs to be an object that can handle the
180
+ extensions found in a kramdown document. If this option is `nil`, the
181
+ default extension object is used.
182
+
183
+ Default: nil
184
+ Used by: kramdown parser
185
+ EOF
186
+
187
+ define(:footnote_nr, Integer, 1, <<EOF)
188
+ The number of the first footnote
189
+
190
+ This option can be used to specify the number that is used for the first
191
+ footnote.
192
+
193
+ Default: 1
194
+ Used by: HTML converter
195
+ EOF
196
+
197
+ define(:filter_html, Array, [], <<EOF)
198
+ An array of HTML tags that should be filtered from the output
199
+
200
+ The value can either be specified as array or as a space separated
201
+ string (which will be converted to an array). All HTML tags that are
202
+ listed in the array will be filtered from the output, i.e. only their
203
+ contents is used. This applies only to HTML tags found in the initial
204
+ document.
205
+
206
+ Default: []
207
+ Used by: HTML converter
208
+ EOF
209
+
210
+ define(:coderay_wrap, Symbol, :div, <<EOF)
211
+ Defines how the highlighted code should be wrapped
212
+
213
+ The possible values are :span, :div or nil.
214
+
215
+ Default: :div
216
+ Used by: HTML converter
217
+ EOF
218
+
219
+ define(:coderay_line_numbers, Symbol, :inline, <<EOF)
220
+ Defines how and if line numbers should be shown
221
+
222
+ The possible values are :table, :inline, :list or nil. If this option is
223
+ nil, no line numbers are shown.
224
+
225
+ Default: :inline
226
+ Used by: HTML converter
227
+ EOF
228
+
229
+ define(:coderay_line_number_start, Integer, 1, <<EOF)
230
+ The start value for the line numbers
231
+
232
+ Default: 1
233
+ Used by: HTML converter
234
+ EOF
235
+
236
+ define(:coderay_tab_width, Integer, 8, <<EOF)
237
+ The tab width used in highlighted code
238
+
239
+ Used by: HTML converter
240
+ EOF
241
+
242
+ define(:coderay_bold_every, Integer, 10, <<EOF)
243
+ Defines how often a line number should be made bold
244
+
245
+ Default: 10
246
+ Used by: HTML converter
247
+ EOF
248
+
249
+ define(:coderay_css, Symbol, :style, <<EOF)
250
+ Defines how the highlighted code gets styled
251
+
252
+ Possible values are :class (CSS classes are applied to the code
253
+ elements, one must supply the needed CSS file) or :style (default CSS
254
+ styles are directly applied to the code elements).
255
+
256
+ Default: style
257
+ Used by: HTML converter
258
+ EOF
259
+
260
+ end
261
+
262
+ end
@@ -37,10 +37,12 @@ module Kramdown
37
37
 
38
38
  attr_reader :tree
39
39
  attr_reader :doc
40
+ attr_reader :options
40
41
 
41
42
  # Create a new Kramdown parser object for the Kramdown::Document +doc+.
42
43
  def initialize(doc)
43
44
  @doc = doc
45
+ @extension = @doc.options[:extension] || Kramdown::Parser::Kramdown::Extension.new
44
46
 
45
47
  @src = nil
46
48
  @tree = nil
@@ -84,7 +86,7 @@ module Kramdown
84
86
  BLOCK_PARSERS = [:blank_line, :codeblock, :codeblock_fenced, :blockquote, :table, :atx_header,
85
87
  :setext_header, :horizontal_rule, :list, :definition_list, :link_definition, :block_html,
86
88
  :footnote_definition, :ald, :block_ial, :extension_block, :eob_marker, :paragraph]
87
- SPAN_PARSERS = [:emphasis, :codespan, :autolink, :span_html, :footnote_marker, :link,
89
+ SPAN_PARSERS = [:emphasis, :codespan, :autolink, :span_html, :footnote_marker, :link, :smart_quotes,
88
90
  :span_ial, :html_entity, :typographic_syms, :line_break, :escaped_chars]
89
91
 
90
92
  # Adapt the object to allow parsing like specified in the options.
@@ -97,8 +99,13 @@ module Kramdown
97
99
  raise Kramdown::Error, "Unknown parser: #{name}"
98
100
  end
99
101
  end
100
- @span_start = Regexp.union(*SPAN_PARSERS.map {|name| @parsers[name].start_re})
101
- @span_start_re = /(?=#{@span_start})/
102
+ @span_start, @span_start_re = span_parser_regexps
103
+ end
104
+
105
+ # Create the needed span parser regexps.
106
+ def span_parser_regexps(parsers = SPAN_PARSERS)
107
+ span_start = /#{parsers.map {|name| @parsers[name].span_start}.join('|')}/
108
+ [span_start, /(?=#{span_start})/]
102
109
  end
103
110
 
104
111
  # Parse all block level elements in +text+ into the element +el+.
@@ -149,10 +156,7 @@ module Kramdown
149
156
 
150
157
  span_start = @span_start
151
158
  span_start_re = @span_start_re
152
- if parsers
153
- span_start = Regexp.union(*parsers.map {|name| @parsers[name].start_re})
154
- span_start_re = /(?=#{span_start})/
155
- end
159
+ span_start, span_start_re = span_parser_regexps(parsers) if parsers
156
160
  parsers = parsers || SPAN_PARSERS
157
161
 
158
162
  used_re = (stop_re.nil? ? span_start_re : /(?=#{Regexp.union(stop_re, span_start)})/)
@@ -171,13 +175,7 @@ module Kramdown
171
175
  false
172
176
  end
173
177
  end unless stop_re_found
174
- if !processed && !stop_re_found
175
- if stop_re_matched
176
- add_text(@src.scan(/./))
177
- else
178
- raise Kramdown::Error, 'Bug: please report!'
179
- end
180
- end
178
+ add_text(@src.scan(/./)) if !processed && !stop_re_found
181
179
  else
182
180
  add_text(@src.scan(/.*/m)) unless stop_re
183
181
  break
@@ -213,22 +211,42 @@ module Kramdown
213
211
  ial.each {|k,v| attr[k] = v if k.kind_of?(String) && k != 'class' }
214
212
  end
215
213
 
214
+ # Extract the part of the StringScanner backed string specified by the +range+. This method
215
+ # also works correctly under Ruby 1.9.
216
+ def extract_string(range)
217
+ result = nil
218
+ if RUBY_VERSION >= '1.9'
219
+ begin
220
+ enc = @src.string.encoding
221
+ @src.string.force_encoding('ASCII-8BIT')
222
+ result = @src.string[range].force_encoding(enc)
223
+ ensure
224
+ @src.string.force_encoding(enc)
225
+ end
226
+ else
227
+ result = @src.string[range]
228
+ end
229
+ result
230
+ end
231
+
216
232
 
217
233
  @@parsers = {}
218
234
 
219
235
  # Holds all the needed data for one block/span level parser.
220
- Data = Struct.new(:name, :start_re, :method)
236
+ Data = Struct.new(:name, :start_re, :span_start, :method)
221
237
 
222
238
  # Add a parser method
223
239
  #
224
240
  # * with the given +name+,
225
241
  # * using +start_re+ as start regexp
242
+ # * and, for span parsers, +span_start+ as a String that can be used in a regexp and
243
+ # which identifies the starting character(s)
226
244
  #
227
245
  # to the registry. The method name is automatically derived from the +name+ or can explicitly
228
246
  # be set by using the +meth_name+ parameter.
229
- def self.define_parser(name, start_re, meth_name = "parse_#{name}")
247
+ def self.define_parser(name, start_re, span_start = nil, meth_name = "parse_#{name}")
230
248
  raise "A parser with the name #{name} already exists!" if @@parsers.has_key?(name)
231
- @@parsers[name] = Data.new(name, start_re, meth_name)
249
+ @@parsers[name] = Data.new(name, start_re, span_start, meth_name)
232
250
  end
233
251
 
234
252
  # Return the Data structure for the parser +name+.
@@ -265,6 +283,7 @@ module Kramdown
265
283
  require 'kramdown/parser/kramdown/autolink'
266
284
  require 'kramdown/parser/kramdown/codespan'
267
285
  require 'kramdown/parser/kramdown/emphasis'
286
+ require 'kramdown/parser/kramdown/smart_quotes'
268
287
 
269
288
  end
270
289