asciidoctor 0.1.4 → 1.5.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of asciidoctor might be problematic. Click here for more details.

Files changed (101) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.adoc +209 -25
  3. data/{LICENSE → LICENSE.adoc} +4 -3
  4. data/README.adoc +392 -395
  5. data/Rakefile +94 -137
  6. data/benchmark/benchmark.rb +127 -0
  7. data/benchmark/sample-data/mdbasics.adoc +334 -0
  8. data/bin/asciidoctor +5 -8
  9. data/bin/asciidoctor-safe +4 -8
  10. data/compat/asciidoc.conf +78 -11
  11. data/compat/font-awesome-3-compat.css +397 -0
  12. data/data/stylesheets/asciidoctor-default.css +399 -0
  13. data/data/stylesheets/coderay-asciidoctor.css +89 -0
  14. data/features/open_block.feature +92 -0
  15. data/features/pass_block.feature +66 -0
  16. data/features/step_definitions.rb +42 -0
  17. data/features/text_formatting.feature +55 -0
  18. data/features/xref.feature +116 -0
  19. data/lib/asciidoctor.rb +1155 -605
  20. data/lib/asciidoctor/abstract_block.rb +157 -71
  21. data/lib/asciidoctor/abstract_node.rb +150 -93
  22. data/lib/asciidoctor/attribute_list.rb +85 -90
  23. data/lib/asciidoctor/block.rb +51 -24
  24. data/lib/asciidoctor/callouts.rb +4 -7
  25. data/lib/asciidoctor/cli.rb +3 -0
  26. data/lib/asciidoctor/cli/invoker.rb +86 -76
  27. data/lib/asciidoctor/cli/options.rb +111 -61
  28. data/lib/asciidoctor/converter.rb +232 -0
  29. data/lib/asciidoctor/converter/base.rb +58 -0
  30. data/lib/asciidoctor/converter/composite.rb +66 -0
  31. data/lib/asciidoctor/converter/docbook45.rb +94 -0
  32. data/lib/asciidoctor/converter/docbook5.rb +684 -0
  33. data/lib/asciidoctor/converter/factory.rb +225 -0
  34. data/lib/asciidoctor/converter/html5.rb +1081 -0
  35. data/lib/asciidoctor/converter/template.rb +296 -0
  36. data/lib/asciidoctor/core_ext.rb +7 -0
  37. data/lib/asciidoctor/core_ext/object/nil_or_empty.rb +23 -0
  38. data/lib/asciidoctor/core_ext/string/chr.rb +6 -0
  39. data/lib/asciidoctor/core_ext/symbol/length.rb +6 -0
  40. data/lib/asciidoctor/document.rb +590 -304
  41. data/lib/asciidoctor/extensions.rb +1100 -308
  42. data/lib/asciidoctor/helpers.rb +109 -46
  43. data/lib/asciidoctor/inline.rb +16 -9
  44. data/lib/asciidoctor/list.rb +23 -15
  45. data/lib/asciidoctor/opal_ext.rb +4 -0
  46. data/lib/asciidoctor/opal_ext/comparable.rb +38 -0
  47. data/lib/asciidoctor/opal_ext/dir.rb +13 -0
  48. data/lib/asciidoctor/opal_ext/error.rb +2 -0
  49. data/lib/asciidoctor/opal_ext/file.rb +125 -0
  50. data/lib/asciidoctor/{lexer.rb → parser.rb} +646 -455
  51. data/lib/asciidoctor/path_resolver.rb +141 -77
  52. data/lib/asciidoctor/reader.rb +257 -187
  53. data/lib/asciidoctor/section.rb +12 -16
  54. data/lib/asciidoctor/stylesheets.rb +91 -0
  55. data/lib/asciidoctor/substitutors.rb +1548 -0
  56. data/lib/asciidoctor/table.rb +73 -57
  57. data/lib/asciidoctor/timings.rb +39 -0
  58. data/lib/asciidoctor/version.rb +1 -1
  59. data/man/asciidoctor.1 +22 -14
  60. data/man/asciidoctor.adoc +18 -10
  61. data/test/attributes_test.rb +314 -14
  62. data/test/blocks_test.rb +763 -118
  63. data/test/converter_test.rb +352 -0
  64. data/test/document_test.rb +518 -199
  65. data/test/extensions_test.rb +273 -103
  66. data/test/fixtures/asciidoc_index.txt +27 -13
  67. data/test/fixtures/basic-docinfo.xml +1 -1
  68. data/test/fixtures/chapter-a.adoc +3 -0
  69. data/test/fixtures/custom-backends/erb/html5/block_paragraph.html.erb +6 -0
  70. data/test/fixtures/docinfo.xml +1 -1
  71. data/test/fixtures/include-file.asciidoc +2 -0
  72. data/test/fixtures/master.adoc +5 -0
  73. data/test/invoker_test.rb +173 -61
  74. data/test/links_test.rb +97 -21
  75. data/test/lists_test.rb +181 -22
  76. data/test/options_test.rb +86 -2
  77. data/test/paragraphs_test.rb +47 -5
  78. data/test/{lexer_test.rb → parser_test.rb} +128 -57
  79. data/test/paths_test.rb +36 -1
  80. data/test/preamble_test.rb +25 -17
  81. data/test/reader_test.rb +404 -249
  82. data/test/sections_test.rb +623 -58
  83. data/test/substitutions_test.rb +609 -132
  84. data/test/tables_test.rb +198 -24
  85. data/test/test_helper.rb +101 -31
  86. data/test/text_test.rb +88 -31
  87. metadata +160 -64
  88. data/Gemfile +0 -12
  89. data/Guardfile +0 -18
  90. data/asciidoctor.gemspec +0 -143
  91. data/lib/asciidoctor/backends/_stylesheets.rb +0 -466
  92. data/lib/asciidoctor/backends/base_template.rb +0 -114
  93. data/lib/asciidoctor/backends/docbook45.rb +0 -774
  94. data/lib/asciidoctor/backends/docbook5.rb +0 -103
  95. data/lib/asciidoctor/backends/html5.rb +0 -1214
  96. data/lib/asciidoctor/renderer.rb +0 -259
  97. data/lib/asciidoctor/substituters.rb +0 -1083
  98. data/test/fixtures/asciidoc.txt +0 -105
  99. data/test/fixtures/ascshort.txt +0 -32
  100. data/test/fixtures/list_elements.asciidoc +0 -10
  101. data/test/renderer_test.rb +0 -162
@@ -0,0 +1,92 @@
1
+ # language: en
2
+ Feature: Open Blocks
3
+ In order to group content in a generic container
4
+ As a writer
5
+ I want to be able to wrap content in an open block
6
+
7
+
8
+ Scenario: Render an open block that contains a paragraph to HTML
9
+ Given the AsciiDoc source
10
+ """
11
+ --
12
+ A paragraph in an open block.
13
+ --
14
+ """
15
+ When it is converted to html
16
+ Then the result should match the HTML source
17
+ """
18
+ <div class="openblock">
19
+ <div class="content">
20
+ <div class="paragraph">
21
+ <p>A paragraph in an open block.</p>
22
+ </div>
23
+ </div>
24
+ </div>
25
+ """
26
+
27
+
28
+ Scenario: Render an open block that contains a paragraph to DocBook
29
+ Given the AsciiDoc source
30
+ """
31
+ --
32
+ A paragraph in an open block.
33
+ --
34
+ """
35
+ When it is converted to docbook
36
+ Then the result should match the XML source
37
+ """
38
+ <simpara>A paragraph in an open block.</simpara>
39
+ """
40
+
41
+
42
+ Scenario: Render an open block that contains a paragraph to HTML (alt)
43
+ Given the AsciiDoc source
44
+ """
45
+ --
46
+ A paragraph in an open block.
47
+ --
48
+ """
49
+ When it is converted to html
50
+ Then the result should match the HTML structure
51
+ """
52
+ .openblock
53
+ .content
54
+ .paragraph
55
+ p A paragraph in an open block.
56
+ """
57
+
58
+
59
+ Scenario: Render an open block that contains a paragraph to DocBook (alt)
60
+ Given the AsciiDoc source
61
+ """
62
+ --
63
+ A paragraph in an open block.
64
+ --
65
+ """
66
+ When it is converted to docbook
67
+ Then the result should match the XML structure
68
+ """
69
+ simpara A paragraph in an open block.
70
+ """
71
+
72
+
73
+ Scenario: Render an open block that contains a list to HTML
74
+ Given the AsciiDoc source
75
+ """
76
+ --
77
+ * one
78
+ * two
79
+ * three
80
+ --
81
+ """
82
+ When it is converted to html
83
+ Then the result should match the HTML structure
84
+ """
85
+ .openblock
86
+ .content
87
+ .ulist
88
+ ul
89
+ li: p one
90
+ li: p two
91
+ li: p three
92
+ """
@@ -0,0 +1,66 @@
1
+ # language: en
2
+ Feature: Open Blocks
3
+ In order to pass content through unprocessed
4
+ As a writer
5
+ I want to be able to mark passthrough content using a pass block
6
+
7
+
8
+ Scenario: Render a pass block without performing substitutions by default to HTML
9
+ Given the AsciiDoc source
10
+ """
11
+ :name: value
12
+
13
+ ++++
14
+ <p>{name}</p>
15
+
16
+ image:tiger.png[]
17
+ ++++
18
+ """
19
+ When it is converted to html
20
+ Then the result should match the HTML source
21
+ """
22
+ <p>{name}</p>
23
+
24
+ image:tiger.png[]
25
+ """
26
+
27
+
28
+ Scenario: Render a pass block without performing substitutions by default to DocBook
29
+ Given the AsciiDoc source
30
+ """
31
+ :name: value
32
+
33
+ ++++
34
+ <simpara>{name}</simpara>
35
+
36
+ image:tiger.png[]
37
+ ++++
38
+ """
39
+ When it is converted to docbook
40
+ Then the result should match the XML source
41
+ """
42
+ <simpara>{name}</simpara>
43
+
44
+ image:tiger.png[]
45
+ """
46
+
47
+
48
+ Scenario: Render a pass block performing explicit substitutions to HTML
49
+ Given the AsciiDoc source
50
+ """
51
+ :name: value
52
+
53
+ [subs="attributes,macros"]
54
+ ++++
55
+ <p>{name}</p>
56
+
57
+ image:tiger.png[]
58
+ ++++
59
+ """
60
+ When it is converted to html
61
+ Then the result should match the HTML source
62
+ """
63
+ <p>value</p>
64
+
65
+ <span class="image"><img src="tiger.png" alt="tiger"></span>
66
+ """
@@ -0,0 +1,42 @@
1
+ require "#{File.dirname __FILE__}/../lib/asciidoctor"
2
+ require 'rspec/expectations'
3
+ require 'tilt'
4
+ require 'slim'
5
+
6
+ Given /the AsciiDoc source/ do |source|
7
+ @source = source
8
+ end
9
+
10
+ When /it is converted to html/ do
11
+ @output = Asciidoctor.convert @source
12
+ #File.open('/tmp/test.adoc', 'w') {|f| f.write @source }
13
+ #@output = %x{asciidoc -f compat/asciidoc.conf -o - -s /tmp/test.adoc | XMLLINT_INDENT='' xmllint --format - | tail -n +2}.rstrip
14
+ ##@output = %x{asciidoc -f compat/asciidoc.conf -o - -s /tmp/test.adoc}
15
+ end
16
+
17
+ When /it is converted to docbook/ do
18
+ @output = Asciidoctor.convert @source, :backend => :docbook
19
+ end
20
+
21
+ Then /the result should match the (HTML|XML) source/ do |format, expect|
22
+ @output.should == expect
23
+ end
24
+
25
+ Then /the result should match the (HTML|XML) structure/ do |format, expect|
26
+ case format
27
+ when 'HTML'
28
+ options = {:format => :html5}
29
+ when 'XML'
30
+ options = {:format => :xhtml}
31
+ else
32
+ options = {}
33
+ end
34
+ slim_friendly_output = @output.lines.entries.map {|line|
35
+ if line.start_with? '<'
36
+ line
37
+ else
38
+ %(|#{line})
39
+ end
40
+ }.join
41
+ Slim::Template.new(options) { slim_friendly_output }.render.should == Slim::Template.new(options) { expect }.render
42
+ end
@@ -0,0 +1,55 @@
1
+ # language: en
2
+ Feature: Text Formatting
3
+ In order to apply formatting to the text
4
+ As a writer
5
+ I want to be able to markup inline text with formatting characters
6
+
7
+
8
+ Scenario: Convert text that contains superscript and subscript characters
9
+ Given the AsciiDoc source
10
+ """
11
+ _v_~rocket~ is the value
12
+ ^3^He is the isotope
13
+ log~4~x^n^ is the expression
14
+ M^me^ White is the address
15
+ the 10^th^ point has coordinate (x~10~, y~10~)
16
+ """
17
+ When it is converted to html
18
+ Then the result should match the HTML source
19
+ """
20
+ <div class="paragraph">
21
+ <p><em>v</em><sub>rocket</sub> is the value
22
+ <sup>3</sup>He is the isotope
23
+ log<sub>4</sub>x<sup>n</sup> is the expression
24
+ M<sup>me</sup> White is the address
25
+ the 10<sup>th</sup> point has coordinate (x<sub>10</sub>, y<sub>10</sub>)</p>
26
+ </div>
27
+ """
28
+
29
+
30
+ Scenario: Convert text that has ex-inline literal formatting
31
+ Given the AsciiDoc source
32
+ """
33
+ Use [x-]`{asciidoctor-version}` to print the version of Asciidoctor.
34
+ """
35
+ When it is converted to html
36
+ Then the result should match the HTML source
37
+ """
38
+ <div class="paragraph">
39
+ <p>Use <code>{asciidoctor-version}</code> to print the version of Asciidoctor.</p>
40
+ </div>
41
+ """
42
+
43
+
44
+ Scenario: Convert text that has ex-inline monospaced formatting
45
+ Given the AsciiDoc source
46
+ """
47
+ The document is assumed to be encoded as [x-]+{encoding}+.
48
+ """
49
+ When it is converted to html
50
+ Then the result should match the HTML source
51
+ """
52
+ <div class="paragraph">
53
+ <p>The document is assumed to be encoded as <code>UTF-8</code>.</p>
54
+ </div>
55
+ """
@@ -0,0 +1,116 @@
1
+ # language: en
2
+ Feature: Cross References
3
+ In order to create links to other sections
4
+ As a writer
5
+ I want to be able to use a cross reference macro
6
+
7
+
8
+ Scenario: Create a cross reference from an AsciiDoc cell to a section
9
+ Given the AsciiDoc source
10
+ """
11
+ |===
12
+ a|See <<_install>>
13
+ |===
14
+
15
+ == Install
16
+
17
+ Instructions go here.
18
+ """
19
+ When it is converted to html
20
+ Then the result should match the HTML structure
21
+ """
22
+ table.tableblock.frame-all.grid-all.spread
23
+ colgroup
24
+ col style='width: 100%;'
25
+ tbody
26
+ tr
27
+ td.tableblock.halign-left.valign-top
28
+ div
29
+ .paragraph: p
30
+ 'See
31
+ a href='#_install' Install
32
+ .sect1
33
+ h2#_install Install
34
+ .sectionbody
35
+ .paragraph: p Instructions go here.
36
+ """
37
+
38
+
39
+ Scenario: Create a cross reference using the target section title
40
+ Given the AsciiDoc source
41
+ """
42
+ == Section One
43
+
44
+ content
45
+
46
+ == Section Two
47
+
48
+ refer to <<Section One>>
49
+ """
50
+ When it is converted to html
51
+ Then the result should match the HTML structure
52
+ """
53
+ .sect1
54
+ h2#_section_one Section One
55
+ .sectionbody: .paragraph: p content
56
+ .sect1
57
+ h2#_section_two Section Two
58
+ .sectionbody: .paragraph: p
59
+ 'refer to
60
+ a href='#_section_one' Section One
61
+ """
62
+
63
+
64
+ Scenario: Create a cross reference using the target reftext
65
+ Given the AsciiDoc source
66
+ """
67
+ [reftext="the first section"]
68
+ == Section One
69
+
70
+ content
71
+
72
+ == Section Two
73
+
74
+ refer to <<the first section>>
75
+ """
76
+ When it is converted to html
77
+ Then the result should match the HTML structure
78
+ """
79
+ .sect1
80
+ h2#_section_one Section One
81
+ .sectionbody: .paragraph: p content
82
+ .sect1
83
+ h2#_section_two Section Two
84
+ .sectionbody: .paragraph: p
85
+ 'refer to
86
+ a href='#_section_one' the first section
87
+ """
88
+
89
+
90
+ Scenario: Create a cross reference using the formatted target title
91
+ Given the AsciiDoc source
92
+ """
93
+ == Section *One*
94
+
95
+ content
96
+
97
+ == Section Two
98
+
99
+ refer to <<Section *One*>>
100
+ """
101
+ When it is converted to html
102
+ Then the result should match the HTML structure
103
+ """
104
+ .sect1
105
+ h2#_section_strong_one_strong
106
+ 'Section
107
+ strong One
108
+ .sectionbody: .paragraph: p content
109
+ .sect1
110
+ h2#_section_two Section Two
111
+ .sectionbody: .paragraph: p
112
+ 'refer to
113
+ a href='#_section_strong_one_strong'
114
+ 'Section
115
+ strong One
116
+ """
@@ -1,13 +1,30 @@
1
1
  RUBY_ENGINE = 'unknown' unless defined? RUBY_ENGINE
2
- require 'strscan'
2
+ RUBY_ENGINE_OPAL = (RUBY_ENGINE == 'opal')
3
+ RUBY_ENGINE_JRUBY = (RUBY_ENGINE == 'jruby')
4
+ RUBY_MIN_VERSION_1_9 = (RUBY_VERSION >= '1.9')
5
+ RUBY_MIN_VERSION_2 = (RUBY_VERSION >= '2')
6
+
3
7
  require 'set'
4
8
 
5
- $:.unshift(File.dirname(__FILE__))
9
+ # NOTE RUBY_ENGINE == 'opal' conditional blocks are filtered by the Opal preprocessor
10
+ if RUBY_ENGINE == 'opal'
11
+ require 'encoding' # needed for String.bytes method
12
+ require 'strscan'
13
+ require 'asciidoctor/opal_ext'
14
+ else
15
+ autoload :Base64, 'base64'
16
+ autoload :FileUtils, 'fileutils'
17
+ autoload :OpenURI, 'open-uri'
18
+ autoload :StringScanner, 'strscan'
19
+ end
20
+
21
+ # ideally we should use require_relative instead of modifying the LOAD_PATH
22
+ $:.unshift File.dirname __FILE__
6
23
 
7
- # Public: Methods for parsing Asciidoc input files and rendering documents
24
+ # Public: Methods for parsing AsciiDoc input files and converting documents
8
25
  # using eRuby templates.
9
26
  #
10
- # Asciidoc documents comprise a header followed by zero or more sections.
27
+ # AsciiDoc documents comprise a header followed by zero or more sections.
11
28
  # Sections are composed of blocks of content. For example:
12
29
  #
13
30
  # = Doc Title
@@ -25,25 +42,19 @@ $:.unshift(File.dirname(__FILE__))
25
42
  #
26
43
  # Examples:
27
44
  #
28
- # Use built-in templates:
45
+ # Use built-in converter:
29
46
  #
30
- # lines = File.readlines("your_file.asc")
31
- # doc = Asciidoctor::Document.new(lines)
32
- # html = doc.render
33
- # File.open("your_file.html", "w+") do |file|
34
- # file.puts html
35
- # end
47
+ # Asciidoctor.convert_file 'sample.adoc'
36
48
  #
37
49
  # Use custom (Tilt-supported) templates:
38
50
  #
39
- # lines = File.readlines("your_file.asc")
40
- # doc = Asciidoctor::Document.new(lines, :template_dir => 'templates')
41
- # html = doc.render
42
- # File.open("your_file.html", "w+") do |file|
43
- # file.puts html
44
- # end
51
+ # Asciidoctor.convert_file 'sample.adoc', :template_dir => 'path/to/templates'
52
+ #
45
53
  module Asciidoctor
46
54
 
55
+ # alias the RUBY_ENGINE constant inside the Asciidoctor namespace
56
+ RUBY_ENGINE = ::RUBY_ENGINE
57
+
47
58
  module SafeMode
48
59
 
49
60
  # A safe mode level that disables any of the security features enforced
@@ -56,7 +67,7 @@ module Asciidoctor
56
67
  SAFE = 1;
57
68
 
58
69
  # A safe mode level that disallows the document from setting attributes
59
- # that would affect the rendering of the document, in addition to all the
70
+ # that would affect the conversion of the document, in addition to all the
60
71
  # security features of SafeMode::SAFE. For instance, this level disallows
61
72
  # changing the backend or the source-highlighter using an attribute defined
62
73
  # in the source document. This is the most fundamental level of security
@@ -87,45 +98,128 @@ module Asciidoctor
87
98
 
88
99
  # Flags to control compliance with the behavior of AsciiDoc
89
100
  module Compliance
101
+ @keys = [].to_set
102
+ class << self
103
+ attr :keys
104
+ end
105
+
106
+ # Defines a new compliance key and assigns an initial value.
107
+ def self.define key, value
108
+ if key == :keys || (self.respond_to? key)
109
+ raise ::ArgumentError, %(Illegal key name: #{key})
110
+ end
111
+ instance_variable_set %(@#{key}), value
112
+ class << self; self; end.send :attr_accessor, key
113
+ @keys << key
114
+ end
115
+
116
+ # AsciiDoc terminates paragraphs adjacent to
117
+ # block content (delimiter or block attribute list)
118
+ # This option allows this behavior to be modified
119
+ # TODO what about literal paragraph?
120
+ # Compliance value: true
121
+ define :block_terminates_paragraph, true
122
+
123
+ # AsciiDoc does not treat paragraphs labeled with a verbatim style
124
+ # (literal, listing, source, verse) as verbatim
125
+ # This options allows this behavior to be modified
126
+ # Compliance value: false
127
+ define :strict_verbatim_paragraphs, true
128
+
129
+ # NOT CURRENTLY USED
130
+ # AsciiDoc allows start and end delimiters around
131
+ # a block to be different lengths
132
+ # Enabling this option requires matching lengths
133
+ # Compliance value: false
134
+ #define :congruent_block_delimiters, true
135
+
90
136
  # AsciiDoc supports both single-line and underlined
91
137
  # section titles.
92
138
  # This option disables the underlined variant.
93
139
  # Compliance value: true
94
- @underline_style_section_titles = true
95
- class << self
96
- attr_accessor :underline_style_section_titles
97
- end
140
+ define :underline_style_section_titles, true
141
+
142
+ # Asciidoctor will unwrap the content in a preamble
143
+ # if the document has a title and no sections.
144
+ # Compliance value: false
145
+ define :unwrap_standalone_preamble, true
146
+
147
+ # AsciiDoc drops lines that contain references to missing attributes.
148
+ # This behavior is not intuitive to most writers
149
+ # Compliance value: 'drop-line'
150
+ define :attribute_missing, 'skip'
151
+
152
+ # AsciiDoc drops lines that contain an attribute unassignemnt.
153
+ # This behavior may need to be tuned depending on the circumstances.
154
+ # Compliance value: 'drop-line'
155
+ define :attribute_undefined, 'drop-line'
156
+
157
+ # Asciidoctor will allow the id, role and options to be set
158
+ # on blocks using a shorthand syntax (e.g., #idname.rolename%optionname)
159
+ define :shorthand_property_syntax, true
98
160
 
99
161
  # Asciidoctor will recognize commonly-used Markdown syntax
100
162
  # to the degree it does not interfere with existing
101
163
  # AsciiDoc syntax and behavior.
102
164
  # Compliance value: false
103
- @markdown_syntax = true
104
- class << self
105
- attr_accessor :markdown_syntax
106
- end
165
+ define :markdown_syntax, true
107
166
  end
108
167
 
109
- # The root path of the Asciidoctor gem
110
- ROOT_PATH = File.expand_path(File.join(File.dirname(__FILE__), '..'))
168
+ # The absolute root path of the Asciidoctor RubyGem
169
+ ROOT_PATH = ::File.dirname ::File.dirname ::File.expand_path __FILE__
111
170
 
112
- # Flag to indicate whether encoding of external strings needs to be forced to UTF-8
171
+ # The absolute lib path of the Asciidoctor RubyGem
172
+ LIB_PATH = ::File.join ROOT_PATH, 'lib'
173
+
174
+ # The absolute data path of the Asciidoctor RubyGem
175
+ DATA_PATH = ::File.join ROOT_PATH, 'data'
176
+
177
+ # The user's home directory, as best we can determine it
178
+ # NOTE not using infix rescue for performance reasons, see: https://github.com/jruby/jruby/issues/1816
179
+ begin
180
+ USER_HOME = ::Dir.home
181
+ rescue
182
+ USER_HOME = ::ENV['HOME'] || ::Dir.pwd
183
+ end
184
+
185
+ # Flag to indicate whether encoding can be coerced to UTF-8
113
186
  # _All_ input data must be force encoded to UTF-8 if Encoding.default_external is *not* UTF-8
114
- # Address failures performing string operations that are reported as "invalid byte sequence in US-ASCII"
187
+ # Addresses failures performing string operations that are reported as "invalid byte sequence in US-ASCII"
115
188
  # Ruby 1.8 doesn't seem to experience this problem (perhaps because it isn't validating the encodings)
116
- FORCE_ENCODING = RUBY_VERSION > '1.9' && Encoding.default_external != Encoding::UTF_8
189
+ COERCE_ENCODING = !::RUBY_ENGINE_OPAL && ::RUBY_MIN_VERSION_1_9
190
+
191
+ # Flag to indicate whether encoding of external strings needs to be forced to UTF-8
192
+ FORCE_ENCODING = COERCE_ENCODING && ::Encoding.default_external != ::Encoding::UTF_8
193
+
194
+ # Byte arrays for UTF-* Byte Order Marks
195
+ # hex escape sequence used for Ruby 1.8 compatibility
196
+ BOM_BYTES_UTF_8 = "\xef\xbb\xbf".bytes.to_a
197
+ BOM_BYTES_UTF_16LE = "\xff\xfe".bytes.to_a
198
+ BOM_BYTES_UTF_16BE = "\xfe\xff".bytes.to_a
117
199
 
118
200
  # Flag to indicate that line length should be calculated using a unicode mode hint
119
- FORCE_UNICODE_LINE_LENGTH = RUBY_VERSION < '1.9'
201
+ FORCE_UNICODE_LINE_LENGTH = !::RUBY_MIN_VERSION_1_9
120
202
 
121
- # The endline character to use when rendering output
203
+ # Flag to indicate whether gsub can use a Hash to map matches to replacements
204
+ SUPPORTS_GSUB_RESULT_HASH = ::RUBY_MIN_VERSION_1_9 && !::RUBY_ENGINE_OPAL
205
+
206
+ # The endline character used for output; stored in constant table as an optimization
122
207
  EOL = "\n"
123
208
 
209
+ # The null character to use for splitting attribute values
210
+ NULL = "\0"
211
+
212
+ # String for matching tab character
213
+ TAB = "\t"
214
+
215
+ # Regexp for replacing tab character
216
+ TAB_PATTERN = /\t/
217
+
124
218
  # The default document type
125
- # Can influence markup generated by render templates
219
+ # Can influence markup generated by the converters
126
220
  DEFAULT_DOCTYPE = 'article'
127
221
 
128
- # The backend determines the format of the rendered output, default to html5
222
+ # The backend determines the format of the converted output, default to html5
129
223
  DEFAULT_BACKEND = 'html5'
130
224
 
131
225
  DEFAULT_STYLESHEET_KEYS = ['', 'DEFAULT'].to_set
@@ -134,8 +228,8 @@ module Asciidoctor
134
228
 
135
229
  # Pointers to the preferred version for a given backend.
136
230
  BACKEND_ALIASES = {
137
- 'html' => 'html5',
138
- 'docbook' => 'docbook45'
231
+ 'html' => 'html5',
232
+ 'docbook' => 'docbook5'
139
233
  }
140
234
 
141
235
  # Default page widths for calculating absolute widths
@@ -147,8 +241,9 @@ module Asciidoctor
147
241
  DEFAULT_EXTENSIONS = {
148
242
  'html' => '.html',
149
243
  'docbook' => '.xml',
150
- 'asciidoc' => '.ad',
151
- 'markdown' => '.md'
244
+ 'pdf' => '.pdf',
245
+ 'epub' => '.epub',
246
+ 'asciidoc' => '.adoc'
152
247
  }
153
248
 
154
249
  # Set of file extensions recognized as AsciiDoc documents (stored as a truth hash)
@@ -157,6 +252,7 @@ module Asciidoctor
157
252
  '.adoc' => true,
158
253
  '.ad' => true,
159
254
  '.asc' => true,
255
+ # TODO .txt should be deprecated
160
256
  '.txt' => true
161
257
  }
162
258
 
@@ -179,26 +275,25 @@ module Asciidoctor
179
275
  '----' => [:listing, ['literal', 'source'].to_set],
180
276
  '....' => [:literal, ['listing', 'source'].to_set],
181
277
  '====' => [:example, ['admonition'].to_set],
182
- '****' => [:sidebar, Set.new],
278
+ '****' => [:sidebar, ::Set.new],
183
279
  '____' => [:quote, ['verse'].to_set],
184
280
  '""' => [:quote, ['verse'].to_set],
185
- '++++' => [:pass, Set.new],
186
- '|===' => [:table, Set.new],
187
- ',===' => [:table, Set.new],
188
- ':===' => [:table, Set.new],
189
- '!===' => [:table, Set.new],
190
- '////' => [:comment, Set.new],
191
- '```' => [:fenced_code, Set.new],
192
- '~~~' => [:fenced_code, Set.new]
281
+ '++++' => [:pass, ['stem', 'latexmath', 'asciimath'].to_set],
282
+ '|===' => [:table, ::Set.new],
283
+ ',===' => [:table, ::Set.new],
284
+ ':===' => [:table, ::Set.new],
285
+ '!===' => [:table, ::Set.new],
286
+ '////' => [:comment, ::Set.new],
287
+ '```' => [:fenced_code, ::Set.new]
193
288
  }
194
289
 
195
290
  DELIMITED_BLOCK_LEADERS = DELIMITED_BLOCKS.keys.map {|key| key[0..1] }.to_set
196
291
 
197
- BREAK_LINES = {
198
- '\'' => :ruler,
199
- '-' => :ruler,
200
- '*' => :ruler,
201
- '_' => :ruler,
292
+ LAYOUT_BREAK_LINES = {
293
+ '\'' => :thematic_break,
294
+ '-' => :thematic_break,
295
+ '*' => :thematic_break,
296
+ '_' => :thematic_break,
202
297
  '<' => :page_break
203
298
  }
204
299
 
@@ -209,15 +304,6 @@ module Asciidoctor
209
304
  # TODO validate use of explicit style name above ordered list (this list is for selecting an implicit style)
210
305
  ORDERED_LIST_STYLES = [:arabic, :loweralpha, :lowerroman, :upperalpha, :upperroman] #, :lowergreek]
211
306
 
212
- ORDERED_LIST_MARKER_PATTERNS = {
213
- :arabic => /\d+[.>]/,
214
- :loweralpha => /[a-z]\./,
215
- :lowerroman => /[ivx]+\)/,
216
- :upperalpha => /[A-Z]\./,
217
- :upperroman => /[IVX]+\)/
218
- #:lowergreek => /[a-z]\]/
219
- }
220
-
221
307
  ORDERED_LIST_KEYWORDS = {
222
308
  'loweralpha' => 'a',
223
309
  'lowerroman' => 'i',
@@ -230,390 +316,811 @@ module Asciidoctor
230
316
 
231
317
  LIST_CONTINUATION = '+'
232
318
 
319
+ # NOTE AsciiDoc Python recognizes both a preceding TAB and a space
233
320
  LINE_BREAK = ' +'
234
321
 
322
+ LINE_CONTINUATION = ' \\'
323
+
324
+ LINE_CONTINUATION_LEGACY = ' +'
325
+
326
+ BLOCK_MATH_DELIMITERS = {
327
+ :asciimath => ['\\$', '\\$'],
328
+ :latexmath => ['\\[', '\\]'],
329
+ }
330
+
331
+ INLINE_MATH_DELIMITERS = {
332
+ :asciimath => ['\\$', '\\$'],
333
+ :latexmath => ['\\(', '\\)'],
334
+ }
335
+
235
336
  # attributes which be changed within the content of the document (but not
236
- # header) because it has semantic meaning; ex. numbered
237
- FLEXIBLE_ATTRIBUTES = %w(numbered)
337
+ # header) because it has semantic meaning; ex. sectnums
338
+ FLEXIBLE_ATTRIBUTES = %w(sectnums)
238
339
 
239
- # NOTE allows for empty space in line as it could be left by the template engine
240
- BLANK_LINE_PATTERN = /^[[:blank:]]*\n/
340
+ # A collection of regular expressions used by the parser.
341
+ #
342
+ # NOTE: The following pattern, which appears frequently, captures the
343
+ # contents between square brackets, ignoring escaped closing brackets
344
+ # (closing brackets prefixed with a backslash '\' character)
345
+ #
346
+ # Pattern: (?:\[((?:\\\]|[^\]])*?)\])
347
+ # Matches: [enclosed text here] or [enclosed [text\] here]
348
+ #
349
+ #(pseudo)module Rx
350
+
351
+ ## Regular expression character classes (to ensure regexp compatibility between Ruby and JavaScript)
352
+ ## CC stands for "character class", CG stands for "character class group"
353
+
354
+ # NOTE \w matches only the ASCII word characters, whereas [[:word:]] or \p{Word} matches any character in the Unicode word category.
355
+
356
+ # character classes for the Regexp engine(s) in JavaScript
357
+ if RUBY_ENGINE == 'opal'
358
+ CC_ALPHA = 'a-zA-Z'
359
+ CG_ALPHA = '[a-zA-Z]'
360
+ CC_ALNUM = 'a-zA-Z0-9'
361
+ CG_ALNUM = '[a-zA-Z0-9]'
362
+ CG_BLANK = '[ \\t]'
363
+ CC_EOL = '(?=\\n|$)'
364
+ CG_GRAPH = '[\\x21-\\x7E]' # non-blank character
365
+ CC_ALL = '[\s\S]' # any character, including newlines (alternatively, [^])
366
+ CC_WORD = 'a-zA-Z0-9_'
367
+ CG_WORD = '[a-zA-Z0-9_]'
368
+ # character classes for the Regexp engine in Ruby >= 2 (Ruby 1.9 supports \p{} but has problems w/ encoding)
369
+ elsif ::RUBY_MIN_VERSION_2
370
+ CC_ALPHA = CG_ALPHA = '\p{Alpha}'
371
+ CC_ALNUM = CG_ALNUM = '\p{Alnum}'
372
+ CC_ALL = '.'
373
+ CG_BLANK = '\p{Blank}'
374
+ CC_EOL = '$'
375
+ CG_GRAPH = '\p{Graph}'
376
+ CC_WORD = CG_WORD = '\p{Word}'
377
+ # character classes for the Regexp engine in Ruby < 2
378
+ else
379
+ CC_ALPHA = '[:alpha:]'
380
+ CG_ALPHA = '[[:alpha:]]'
381
+ CC_ALL = '.'
382
+ CC_ALNUM = '[:alnum:]'
383
+ CG_ALNUM = '[[:alnum:]]'
384
+ CG_BLANK = '[[:blank:]]'
385
+ CC_EOL = '$'
386
+ CG_GRAPH = '[[:graph:]]' # non-blank character
387
+ if ::RUBY_MIN_VERSION_1_9
388
+ CC_WORD = '[:word:]'
389
+ CG_WORD = '[[:word:]]'
390
+ else
391
+ # NOTE Ruby 1.8 cannot match word characters beyond the ASCII range; if you need this feature, upgrade!
392
+ CC_WORD = '[:alnum:]_'
393
+ CG_WORD = '[[:alnum:]_]'
394
+ end
395
+ end
241
396
 
242
- LINE_FEED_ENTITY = '&#10;' # or &#x0A;
397
+ ## Document header
243
398
 
244
- # Flags to control compliance with the behavior of AsciiDoc
245
- COMPLIANCE = {
246
- # AsciiDoc terminates paragraphs adjacent to
247
- # block content (delimiter or block attribute list)
248
- # Compliance value: true
249
- # TODO what about literal paragraph?
250
- :block_terminates_paragraph => true,
399
+ # Matches the author info line immediately following the document title.
400
+ #
401
+ # Examples
402
+ #
403
+ # Doc Writer <doc@example.com>
404
+ # Mary_Sue Brontë
405
+ #
406
+ AuthorInfoLineRx = /^(#{CG_WORD}[#{CC_WORD}\-'.]*)(?: +(#{CG_WORD}[#{CC_WORD}\-'.]*))?(?: +(#{CG_WORD}[#{CC_WORD}\-'.]*))?(?: +<([^>]+)>)?$/
251
407
 
252
- # AsciiDoc does not treat paragraphs labeled with a
253
- # verbatim style (literal, listing, source, verse)
254
- # as verbatim; override this behavior
255
- # Compliance value: false
256
- :strict_verbatim_paragraphs => true,
408
+ # Matches the revision info line, which appears immediately following
409
+ # the author info line beneath the document title.
410
+ #
411
+ # Examples
412
+ #
413
+ # v1.0, 2013-01-01: Ring in the new year release
414
+ #
415
+ RevisionInfoLineRx = /^(?:\D*(.*?),)?(?:\s*(?!:)(.*?))(?:\s*(?!^):\s*(.*))?$/
257
416
 
258
- # AsciiDoc allows start and end delimiters around
259
- # a block to be different lengths
260
- # this option requires that they be the same
261
- # Compliance value: false
262
- :congruent_block_delimiters => true,
417
+ # Matches the title and volnum in the manpage doctype.
418
+ #
419
+ # Examples
420
+ #
421
+ # = asciidoctor ( 1 )
422
+ #
423
+ ManpageTitleVolnumRx = /^(.*)\((.*)\)$/
263
424
 
264
- # AsciiDoc drops lines that contain references to missing attributes.
265
- # This behavior is not intuitive to most writers
266
- # Compliance value: 'drop-line'
267
- :attribute_missing => 'skip',
425
+ # Matches the name and purpose in the manpage doctype.
426
+ #
427
+ # Examples
428
+ #
429
+ # asciidoctor - converts AsciiDoc source files to HTML, DocBook and other formats
430
+ #
431
+ ManpageNamePurposeRx = /^(.*?)#{CG_BLANK}+-#{CG_BLANK}+(.*)$/
268
432
 
269
- # AsciiDoc drops lines that contain an attribute unassignemnt.
270
- # This behavior may need to be tuned depending on the circumstances.
271
- # Compliance value: 'drop-line'
272
- :attribute_undefined => 'drop-line',
273
- }
433
+ ## Preprocessor directives
274
434
 
275
- # The following pattern, which appears frequently, captures the contents between square brackets,
276
- # ignoring escaped closing brackets (closing brackets prefixed with a backslash '\' character)
277
- #
278
- # Pattern:
279
- # (?:\[((?:\\\]|[^\]])*?)\])
280
- # Matches:
281
- # [enclosed text here] or [enclosed [text\] here]
282
- REGEXP = {
283
- # NOTE: this is a inline admonition note
284
- :admonition_inline => /^(#{ADMONITION_STYLES.to_a * '|'}):\s/,
285
-
286
- # [[Foo]]
287
- :anchor => /^\[\[([^\s\[\]]+)\]\]$/,
288
-
289
- # Foowhatevs [[Bar]]
290
- :anchor_embedded => /^(.*?)\s*\[\[([^\[\]]+)\]\]$/,
291
-
292
- # [[ref]] (anywhere inline)
293
- :anchor_macro => /\\?\[\[([\w":].*?)\]\]/,
294
-
295
- # matches any unbounded block delimiter:
296
- # listing, literal, example, sidebar, quote, passthrough, table, fenced code
297
- # does not include open block or air quotes
298
- # TIP position the most common blocks towards the front of the pattern
299
- :any_blk => %r{^(?:(?:-|\.|=|\*|_|\+|/){4,}|[\|,;!]={3,}|(?:`|~){3,}.*)$},
300
-
301
- # detect a list item of any sort
302
- # [[:graph:]] is a non-blank character
303
- :any_list => /^(?:
304
- <?\d+>[[:blank:]]+[[:graph:]]|
305
- [[:blank:]]*(?:-|(?:\*|\.){1,5}|\d+\.|[A-Za-z]\.|[IVXivx]+\))[[:blank:]]+[[:graph:]]|
306
- [[:blank:]]*.*?(?::{2,4}|;;)(?:[[:blank:]]+[[:graph:]]|$)
307
- )/x,
308
-
309
- # :foo: bar
310
- # :Author: Dan
311
- # :numbered!:
312
- # :long-entry: Attribute value lines ending in ' +'
313
- # are joined together as a single value,
314
- # collapsing the line breaks and indentation to
315
- # a single space.
316
- :attr_entry => /^:(!?\w.*?):(?:[[:blank:]]+(.*))?$/,
317
-
318
- # An attribute list above a block element
319
- #
320
- # Can be strictly positional:
321
- # [quote, Adam Smith, Wealth of Nations]
322
- # Or can have name/value pairs
323
- # [NOTE, caption="Good to know"]
324
- # Can be defined by an attribute
325
- # [{lead}]
326
- :blk_attr_list => /^\[(|[[:blank:]]*[\w\{,.#"'%].*)\]$/,
327
-
328
- # block attribute list or block id (bulk query)
329
- :attr_line => /^\[(|[[:blank:]]*[\w\{,.#"'%].*|\[[^\[\]]*\])\]$/,
330
-
331
- # attribute reference
332
- # {foo}
333
- # {counter:pcount:1}
334
- # {set:foo:bar}
335
- # {set:name!}
336
- :attr_ref => /(\\)?\{((set|counter2?):.+?|\w+(?:[\-]\w+)*)(\\)?\}/,
337
-
338
- # The author info line the appears immediately following the document title
339
- # John Doe <john@anonymous.com>
340
- :author_info => /^(\w[\w\-'.]*)(?: +(\w[\w\-'.]*))?(?: +(\w[\w\-'.]*))?(?: +<([^>]+)>)?$/,
341
-
342
- # [[[Foo]]] (anywhere inline)
343
- :biblio_macro => /\\?\[\[\[([\w:][\w:.-]*?)\]\]\]/,
344
-
345
- # callout reference inside literal text
346
- # <1> (optionally prefixed by //, # or ;; line comment chars)
347
- # <1> <2> (multiple callouts on one line)
348
- # <!--1--> (for XML-based languages)
349
- # special characters are already be replaced at this point during render
350
- :callout_render => /(?:(?:\/\/|#|;;) ?)?(\\)?&lt;!?(--|)(\d+)\2&gt;(?=(?: ?\\?&lt;!?\2\d+\2&gt;)*$)/,
351
- # ...but not while scanning
352
- :callout_quick_scan => /\\?<!?(--|)(\d+)\1>(?=(?: ?\\?<!?\1\d+\1>)*$)/,
353
- :callout_scan => /(?:(?:\/\/|#|;;) ?)?(\\)?<!?(--|)(\d+)\2>(?=(?: ?\\?<!?\2\d+\2>)*$)/,
354
-
355
- # <1> Foo
356
- :colist => /^<?(\d+)>[[:blank:]]+(.*)/,
357
-
358
- # ////
359
- # comment block
360
- # ////
361
- :comment_blk => %r{^/{4,}$},
362
-
363
- # // (and then whatever)
364
- :comment => %r{^//(?:[^/]|$)},
365
-
366
- # one,two;three;four
367
- :ssv_or_csv_delim => /,|;/,
368
-
369
- # one two three
370
- :space_delim => /([^\\])[[:blank:]]+/,
371
-
372
- # Ctrl + Alt+T
373
- # Ctrl,T
374
- :kbd_delim => /(?:\+|,)(?=[[:blank:]]*[^\1])/,
375
-
376
- # one\ two\ three
377
- :escaped_space => /\\([[:blank:]])/,
378
-
379
- # 29
380
- :digits => /^\d+$/,
381
-
382
- # foo:: || foo::: || foo:::: || foo;;
383
- # Should be followed by a definition, on the same line...
384
- # foo:: That which precedes 'bar' (see also, <<bar>>)
385
- # ...or on a separate line
386
- # foo::
387
- # That which precedes 'bar' (see also, <<bar>>)
388
- # The term may be an attribute reference
389
- # {term_foo}:: {def_foo}
390
- # NOTE negative match for comment line is intentional since that isn't handled when looking for next list item
391
- # QUESTION should we check for line comment in regex or when scanning the lines?
392
- :dlist => /^(?!\/\/)[[:blank:]]*(.*?)(:{2,4}|;;)(?:[[:blank:]]+(.*))?$/,
393
- :dlist_siblings => {
394
- # (?:.*?[^:])? - a non-capturing group which grabs longest sequence of characters that doesn't end w/ colon
395
- '::' => /^(?!\/\/)[[:blank:]]*((?:.*[^:])?)(::)(?:[[:blank:]]+(.*))?$/,
396
- ':::' => /^(?!\/\/)[[:blank:]]*((?:.*[^:])?)(:::)(?:[[:blank:]]+(.*))?$/,
397
- '::::' => /^(?!\/\/)[[:blank:]]*((?:.*[^:])?)(::::)(?:[[:blank:]]+(.*))?$/,
398
- ';;' => /^(?!\/\/)[[:blank:]]*(.*)(;;)(?:[[:blank:]]+(.*))?$/
399
- },
400
-
401
- :illegal_sectid_chars => /&(?:[[:alpha:]]+|#[[:digit:]]+|#x[[:alnum:]]+);|\W+?/,
402
-
403
- # footnote:[text]
404
- # footnoteref:[id,text]
405
- # footnoteref:[id]
406
- :footnote_macro => /\\?(footnote|footnoteref):\[((?:\\\]|[^\]])*?)\]/,
407
-
408
- # gist::123456[]
409
- :generic_blk_macro => /^(\w[\w\-]*)::(\S+?)\[((?:\\\]|[^\]])*?)\]$/,
410
-
411
- # kbd:[F3]
412
- # kbd:[Ctrl+Shift+T]
413
- # kbd:[Ctrl+\]]
414
- # kbd:[Ctrl,T]
415
- # btn:[Save]
416
- :kbd_btn_macro => /\\?(?:kbd|btn):\[((?:\\\]|[^\]])+?)\]/,
417
-
418
- # menu:File[New...]
419
- # menu:View[Page Style > No Style]
420
- # menu:View[Page Style, No Style]
421
- :menu_macro => /\\?menu:(\w|\w.*?\S)\[[[:blank:]]*(.+?)?\]/,
422
-
423
- # "File > New..."
424
- :menu_inline_macro => /\\?"(\w[^"]*?[[:blank:]]*&gt;[[:blank:]]*[^"[:blank:]][^"]*)"/,
425
-
426
- # image::filename.png[Caption]
427
- # video::http://youtube.com/12345[Cats vs Dogs]
428
- :media_blk_macro => /^(image|video|audio)::(\S+?)\[((?:\\\]|[^\]])*?)\]$/,
429
-
430
- # image:filename.png[Alt Text]
431
- # image:http://example.com/images/filename.png[Alt Text]
432
- # image:filename.png[More [Alt\] Text] (alt text becomes "More [Alt] Text")
433
- # icon:github[large]
434
- :image_macro => /\\?(?:image|icon):([^:\[][^\[]*)\[((?:\\\]|[^\]])*?)\]/,
435
-
436
- # indexterm:[Tigers,Big cats]
437
- # (((Tigers,Big cats)))
438
- :indexterm_macro => /\\?(?:indexterm:(?:\[((?:\\\]|[^\]])*?)\])|\(\(\((.*?)\)\)\)(?!\)))/m,
439
-
440
- # indexterm2:[Tigers]
441
- # ((Tigers))
442
- :indexterm2_macro => /\\?(?:indexterm2:(?:\[((?:\\\]|[^\]])*?)\])|\(\((.*?)\)\)(?!\)))/m,
443
-
444
- # whitespace at the beginning of the line
445
- :leading_blanks => /^([[:blank:]]*)/,
446
-
447
- # leading parent directory references in path
448
- :leading_parent_dirs => /^(?:\.\.\/)*/,
449
-
450
- # + From the Asciidoc User Guide: "A plus character preceded by at
451
- # least one space character at the end of a non-blank line forces
452
- # a line break. It generates a line break (br) tag for HTML outputs.
453
- #
454
- # + (would not match because there's no space before +)
455
- # + (would match and capture '')
456
- # Foo + (would and capture 'Foo')
457
- :line_break => /^(.*)[[:blank:]]\+$/,
458
-
459
- # inline link and some inline link macro
460
- # FIXME revisit!
461
- :link_inline => %r{(^|link:|\s|>|&lt;|[\(\)\[\]])(\\?(?:https?|ftp|irc)://[^\s\[\]<]*[^\s.,\[\]<])(?:\[((?:\\\]|[^\]])*?)\])?},
462
-
463
- # inline link macro
464
- # link:path[label]
465
- :link_macro => /\\?(?:link|mailto):([^\s\[]+)(?:\[((?:\\\]|[^\]])*?)\])/,
466
-
467
- # inline email address
468
- # doc.writer@asciidoc.org
469
- :email_inline => /[\\>:]?\w[\w.%+-]*@[[:alnum:]][[:alnum:].-]*\.[[:alpha:]]{2,4}\b/,
470
-
471
- # <TAB>Foo or one-or-more-spaces-or-tabs then whatever
472
- :lit_par => /^([[:blank:]]+.*)$/,
473
-
474
- # . Foo (up to 5 consecutive dots)
475
- # 1. Foo (arabic, default)
476
- # a. Foo (loweralpha)
477
- # A. Foo (upperalpha)
478
- # i. Foo (lowerroman)
479
- # I. Foo (upperroman)
480
- # REVIEW leading space has already been stripped, so may not need in regex
481
- :olist => /^[[:blank:]]*(\.{1,5}|\d+\.|[A-Za-z]\.|[IVXivx]+\))[[:blank:]]+(.*)$/,
482
-
483
- # ''' (ruler)
484
- # <<< (pagebreak)
485
- :break_line => /^('|<){3,}$/,
486
-
487
- # ''' or ' ' ' (ruler)
488
- # --- or - - - (ruler)
489
- # *** or * * * (ruler)
490
- # <<< (pagebreak)
491
- :break_line_plus => /^(?:'|<){3,}$|^ {0,3}([-\*_])( *)\1\2\1$/,
492
-
493
- # inline passthrough macros
494
- # +++text+++
495
- # $$text$$
496
- # pass:quotes[text]
497
- :pass_macro => /\\?(?:(\+{3}|\${2})(.*?)\1|pass:([a-z,]*)\[((?:\\\]|[^\]])*?)\])/m,
498
-
499
- # passthrough macro allowed in value of attribute assignment
500
- # pass:[text]
501
- :pass_macro_basic => /^pass:([a-z,]*)\[(.*)\]$/,
502
-
503
- # inline literal passthrough macro
504
- # `text`
505
- :pass_lit => /(^|[^`\w])(?:\[([^\]]+?)\])?(\\?`([^`\s]|[^`\s].*?\S)`)(?![`\w])/m,
506
-
507
- # placeholder for extracted passthrough text
508
- :pass_placeholder => /\e(\d+)\e/,
509
-
510
- # The document revision info line the appears immediately following the
511
- # document title author info line, if present
512
- # v1.0, 2013-01-01: Ring in the new year release
513
- :revision_info => /^(?:\D*(.*?),)?(?:\s*(?!:)(.*?))(?:\s*(?!^):\s*(.*))?$/,
514
-
515
- # \' within a word
516
- :single_quote_esc => /(\w)\\'(\w)/,
517
- # an alternative if our backend generated single-quoted html/xml attributes
518
- #:single_quote_esc => /(\w|=)\\'(\w)/,
519
-
520
- # used for sanitizing attribute names
521
- :illegal_attr_name_chars => /[^\w\-]/,
435
+ # Matches a conditional preprocessor directive (e.g., ifdef, ifndef, ifeval and endif).
436
+ #
437
+ # Examples
438
+ #
439
+ # ifdef::basebackend-html[]
440
+ # ifndef::theme[]
441
+ # ifeval::["{asciidoctor-version}" >= "0.1.0"]
442
+ # ifdef::asciidoctor[Asciidoctor!]
443
+ # endif::theme[]
444
+ # endif::basebackend-html[]
445
+ # endif::[]
446
+ #
447
+ ConditionalDirectiveRx = /^\\?(ifdef|ifndef|ifeval|endif)::(\S*?(?:([,\+])\S+?)?)\[(.+)?\]$/
448
+
449
+ # Matches a restricted (read as safe) eval expression.
450
+ #
451
+ # Examples
452
+ #
453
+ # "{asciidoctor-version}" >= "0.1.0"
454
+ #
455
+ EvalExpressionRx = /^(\S.*?)#{CG_BLANK}*(==|!=|<=|>=|<|>)#{CG_BLANK}*(\S.*)$/
456
+
457
+ # Matches an include preprocessor directive.
458
+ #
459
+ # Examples
460
+ #
461
+ # include::chapter1.ad[]
462
+ # include::example.txt[lines=1;2;5..10]
463
+ #
464
+ IncludeDirectiveRx = /^\\?include::([^\[]+)\[(.*?)\]$/
465
+
466
+ # Matches a trailing tag directive in an include file.
467
+ #
468
+ # Examples
469
+ #
470
+ # // tag::try-catch[]
471
+ # try {
472
+ # someMethod();
473
+ # catch (Exception e) {
474
+ # log(e);
475
+ # }
476
+ # // end::try-catch[]
477
+ TagDirectiveRx = /\b(?:tag|end)::\S+\[\]$/
478
+
479
+ ## Attribute entries and references
480
+
481
+ # Matches a document attribute entry.
482
+ #
483
+ # Examples
484
+ #
485
+ # :foo: bar
486
+ # :First Name: Dan
487
+ # :sectnums!:
488
+ # :!toc:
489
+ # :long-entry: Attribute value lines ending in ' +'
490
+ # are joined together as a single value,
491
+ # collapsing the line breaks and indentation to
492
+ # a single space.
493
+ #
494
+ AttributeEntryRx = /^:(!?\w.*?):(?:#{CG_BLANK}+(.*))?$/
495
+
496
+ # Matches invalid characters in an attribute name.
497
+ InvalidAttributeNameCharsRx = /[^\w\-]/
498
+
499
+ # Matches the pass inline macro allowed in value of attribute assignment.
500
+ #
501
+ # Examples
502
+ #
503
+ # pass:[text]
504
+ #
505
+ AttributeEntryPassMacroRx = /^pass:([a-z,]*)\[(.*)\]$/
506
+
507
+ # Matches an inline attribute reference.
508
+ #
509
+ # Examples
510
+ #
511
+ # {foo}
512
+ # {counter:pcount:1}
513
+ # {set:foo:bar}
514
+ # {set:name!}
515
+ #
516
+ AttributeReferenceRx = /(\\)?\{((set|counter2?):.+?|\w+(?:[\-]\w+)*)(\\)?\}/
517
+
518
+ ## Paragraphs and delimited blocks
519
+
520
+ # Matches an anchor (i.e., id + optional reference text) on a line above a block.
521
+ #
522
+ # Examples
523
+ #
524
+ # [[idname]]
525
+ # [[idname,Reference Text]]
526
+ #
527
+ BlockAnchorRx = /^\[\[(?:|([#{CC_ALPHA}:_][#{CC_WORD}:.-]*)(?:,#{CG_BLANK}*(\S.*))?)\]\]$/
528
+
529
+ # Matches an attribute list above a block element.
530
+ #
531
+ # Examples
532
+ #
533
+ # # strictly positional
534
+ # [quote, Adam Smith, Wealth of Nations]
535
+ #
536
+ # # name/value pairs
537
+ # [NOTE, caption="Good to know"]
538
+ #
539
+ # # as attribute reference
540
+ # [{lead}]
541
+ #
542
+ BlockAttributeListRx = /^\[(|#{CG_BLANK}*[#{CC_WORD}\{,.#"'%].*)\]$/
543
+
544
+ # A combined pattern that matches either a block anchor or a block attribute list.
545
+ #
546
+ # TODO this one gets hit a lot, should be optimized as much as possible
547
+ BlockAttributeLineRx = /^\[(|#{CG_BLANK}*[#{CC_WORD}\{,.#"'%].*|\[(?:|[#{CC_ALPHA}:_][#{CC_WORD}:.-]*(?:,#{CG_BLANK}*\S.*)?)\])\]$/
548
+
549
+ # Matches a title above a block.
550
+ #
551
+ # Examples
552
+ #
553
+ # .Title goes here
554
+ #
555
+ BlockTitleRx = /^\.([^\s.].*)$/
522
556
 
523
- # 1*h,2*,^3e
524
- :table_colspec => /^(?:(\d+)\*)?([<^>](?:\.[<^>]?)?|(?:[<^>]?\.)?[<^>])?(\d+%?)?([a-z])?$/,
557
+ # Matches an admonition label at the start of a paragraph.
558
+ #
559
+ # Examples
560
+ #
561
+ # NOTE: Just a little note.
562
+ # TIP: Don't forget!
563
+ #
564
+ AdmonitionParagraphRx = /^(#{ADMONITION_STYLES.to_a * '|'}):#{CG_BLANK}/
565
+
566
+ # Matches a literal paragraph, which is a line of text preceded by at least one space.
567
+ #
568
+ # Examples
569
+ #
570
+ # <SPACE>Foo
571
+ # <TAB>Foo
572
+ LiteralParagraphRx = /^(#{CG_BLANK}+.*)$/
525
573
 
526
- # 2.3+<.>m
527
- # TODO might want to use step-wise scan rather than this mega-regexp
528
- :table_cellspec => {
529
- :start => /^[[:blank:]]*(?:(\d+(?:\.\d*)?|(?:\d*\.)?\d+)([*+]))?([<^>](?:\.[<^>]?)?|(?:[<^>]?\.)?[<^>])?([a-z])?\|/,
530
- :end => /[[:blank:]]+(?:(\d+(?:\.\d*)?|(?:\d*\.)?\d+)([*+]))?([<^>](?:\.[<^>]?)?|(?:[<^>]?\.)?[<^>])?([a-z])?$/
531
- },
532
-
533
- # docbook45
534
- # html5
535
- :trailing_digit => /[[:digit:]]+$/,
536
-
537
- # .Foo but not . Foo or ..Foo
538
- :blk_title => /^\.([^\s.].*)$/,
574
+ # Matches a comment block.
575
+ #
576
+ # Examples
577
+ #
578
+ # ////
579
+ # This is a block comment.
580
+ # It can span one or more lines.
581
+ # ////
582
+ CommentBlockRx = %r{^/{4,}$}
539
583
 
540
- # matches double quoted text, capturing quote char and text (single-line)
541
- :dbl_quoted => /^("|)(.*)\1$/,
584
+ # Matches a comment line.
585
+ #
586
+ # Examples
587
+ #
588
+ # // an then whatever
589
+ #
590
+ CommentLineRx = %r{^//(?:[^/]|$)}
542
591
 
543
- # matches double quoted text, capturing quote char and text (multi-line)
544
- :m_dbl_quoted => /^("|)(.*)\1$/m,
592
+ ## Section titles
545
593
 
546
- # == Foo
547
- # ^ yields a level 2 title
548
- #
549
- # == Foo ==
550
- # ^ also yields a level 2 title
551
- #
552
- # both equivalent to this two-line version:
553
- # Foo
554
- # ~~~
594
+ # Matches a single-line (Atx-style) section title.
595
+ #
596
+ # Examples
597
+ #
598
+ # == Foo
599
+ # # ^ a level 1 (h2) section title
600
+ #
601
+ # == Foo ==
602
+ # # ^ also a level 1 (h2) section title
555
603
  #
556
604
  # match[1] is the delimiter, whose length determines the level
557
605
  # match[2] is the title itself
558
606
  # match[3] is an inline anchor, which becomes the section id
559
- :section_title => /^((?:=|#){1,6})\s+(\S.*?)(?:\s*\[\[([^\[]+)\]\])?(?:\s+\1)?$/,
607
+ AtxSectionRx = /^((?:=|#){1,6})#{CG_BLANK}+(\S.*?)(?:#{CG_BLANK}+\1)?$/
608
+
609
+ # Matches the restricted section name for a two-line (Setext-style) section title.
610
+ # The name cannot begin with a dot and has at least one alphanumeric character.
611
+ SetextSectionTitleRx = /^((?=.*#{CG_WORD}+.*)[^.].*?)$/
612
+
613
+ # Matches the underline in a two-line (Setext-style) section title.
614
+ #
615
+ # Examples
616
+ #
617
+ # ====== || ------ || ~~~~~~ || ^^^^^^ || ++++++
618
+ #
619
+ SetextSectionLineRx = /^(?:=|-|~|\^|\+)+$/
620
+
621
+ # Matches an anchor (i.e., id + optional reference text) inside a section title.
622
+ #
623
+ # Examples
624
+ #
625
+ # Section Title [[idname]]
626
+ # Section Title [[idname,Reference Text]]
627
+ #
628
+ InlineSectionAnchorRx = /^(.*?)#{CG_BLANK}+(\\)?\[\[([#{CC_ALPHA}:_][#{CC_WORD}:.-]*)(?:,#{CG_BLANK}*(\S.*?))?\]\]$/
629
+
630
+ # Matches invalid characters in a section id.
631
+ InvalidSectionIdCharsRx = /&(?:[a-zA-Z]{2,}|#\d{2,5}|#x[a-fA-F0-9]{2,4});|[^#{CC_WORD}]+?/
632
+
633
+ # Matches the block style used to designate a section title as a floating title.
634
+ #
635
+ # Examples
636
+ #
637
+ # [float]
638
+ # = Floating Title
639
+ #
640
+ FloatingTitleStyleRx = /^(?:float|discrete)\b/
641
+
642
+ ## Lists
643
+
644
+ # Detects the start of any list item.
645
+ AnyListRx = /^(?:<?\d+>#{CG_BLANK}+#{CG_GRAPH}|#{CG_BLANK}*(?:-|(?:\*|\.){1,5}|\d+\.|[a-zA-Z]\.|[IVXivx]+\))#{CG_BLANK}+#{CG_GRAPH}|#{CG_BLANK}*.*?(?::{2,4}|;;)(?:#{CG_BLANK}+#{CG_GRAPH}|$))/
646
+
647
+ # Matches an unordered list item (one level for hyphens, up to 5 levels for asterisks).
648
+ #
649
+ # Examples
650
+ #
651
+ # * Foo
652
+ # - Foo
653
+ #
654
+ UnorderedListRx = /^#{CG_BLANK}*(-|\*{1,5})#{CG_BLANK}+(.*)$/
655
+
656
+ # Matches an ordered list item (explicit numbering or up to 5 consecutive dots).
657
+ #
658
+ # Examples
659
+ #
660
+ # . Foo
661
+ # .. Foo
662
+ # 1. Foo (arabic, default)
663
+ # a. Foo (loweralpha)
664
+ # A. Foo (upperalpha)
665
+ # i. Foo (lowerroman)
666
+ # I. Foo (upperroman)
667
+ #
668
+ # NOTE leading space match is not always necessary, but is used for list reader
669
+ OrderedListRx = /^#{CG_BLANK}*(\.{1,5}|\d+\.|[a-zA-Z]\.|[IVXivx]+\))#{CG_BLANK}+(.*)$/
670
+
671
+ # Matches the ordinals for each type of ordered list.
672
+ OrderedListMarkerRxMap = {
673
+ :arabic => /\d+[.>]/,
674
+ :loweralpha => /[a-z]\./,
675
+ :lowerroman => /[ivx]+\)/,
676
+ :upperalpha => /[A-Z]\./,
677
+ :upperroman => /[IVX]+\)/
678
+ #:lowergreek => /[a-z]\]/
679
+ }
680
+
681
+ # Matches a definition list item.
682
+ #
683
+ # Examples
684
+ #
685
+ # foo::
686
+ # foo:::
687
+ # foo::::
688
+ # foo;;
689
+ #
690
+ # # should be followed by a definition, on the same line...
691
+ #
692
+ # foo:: That which precedes 'bar' (see also, <<bar>>)
693
+ #
694
+ # # ...or on a separate line
695
+ #
696
+ # foo::
697
+ # That which precedes 'bar' (see also, <<bar>>)
698
+ #
699
+ # # the term may be an attribute reference
700
+ #
701
+ # {foo_term}:: {foo_def}
702
+ #
703
+ # NOTE negative match for comment line is intentional since that isn't handled when looking for next list item
704
+ # QUESTION should we check for line comment in regex or when scanning the lines?
705
+ #
706
+ DefinitionListRx = /^(?!\/\/)#{CG_BLANK}*(.*?)(:{2,4}|;;)(?:#{CG_BLANK}+(.*))?$/
707
+
708
+ # Matches a sibling definition list item (which does not include the keyed type).
709
+ DefinitionListSiblingRx = {
710
+ # (?:.*?[^:])? - a non-capturing group which grabs longest sequence of characters that doesn't end w/ colon
711
+ '::' => /^(?!\/\/)#{CG_BLANK}*((?:.*[^:])?)(::)(?:#{CG_BLANK}+(.*))?$/,
712
+ ':::' => /^(?!\/\/)#{CG_BLANK}*((?:.*[^:])?)(:::)(?:#{CG_BLANK}+(.*))?$/,
713
+ '::::' => /^(?!\/\/)#{CG_BLANK}*((?:.*[^:])?)(::::)(?:#{CG_BLANK}+(.*))?$/,
714
+ ';;' => /^(?!\/\/)#{CG_BLANK}*(.*)(;;)(?:#{CG_BLANK}+(.*))?$/
715
+ }
560
716
 
561
- # does not begin with a dot and has at least one alphanumeric character
562
- :section_name => /^((?=.*\w+.*)[^.].*?)$/,
717
+ # Matches a callout list item.
718
+ #
719
+ # Examples
720
+ #
721
+ # <1> Foo
722
+ #
723
+ CalloutListRx = /^<?(\d+)>#{CG_BLANK}+(.*)/
724
+
725
+ # Matches a callout reference inside literal text.
726
+ #
727
+ # Examples
728
+ # <1> (optionally prefixed by //, # or ;; line comment chars)
729
+ # <1> <2> (multiple callouts on one line)
730
+ # <!--1--> (for XML-based languages)
731
+ #
732
+ # NOTE special characters are already be replaced at this point during conversion to an SGML format
733
+ CalloutConvertRx = /(?:(?:\/\/|#|;;) ?)?(\\)?&lt;!?(--|)(\d+)\2&gt;(?=(?: ?\\?&lt;!?\2\d+\2&gt;)*#{CC_EOL})/
734
+ # NOTE (con't) ...but not while scanning
735
+ CalloutQuickScanRx = /\\?<!?(--|)(\d+)\1>(?=(?: ?\\?<!?\1\d+\1>)*#{CC_EOL})/
736
+ CalloutScanRx = /(?:(?:\/\/|#|;;) ?)?(\\)?<!?(--|)(\d+)\2>(?=(?: ?\\?<!?\2\d+\2>)*#{CC_EOL})/
737
+
738
+ # A Hash of regexps for lists used for dynamic access.
739
+ ListRxMap = {
740
+ :ulist => UnorderedListRx,
741
+ :olist => OrderedListRx,
742
+ :dlist => DefinitionListRx,
743
+ :colist => CalloutListRx
744
+ }
563
745
 
564
- # ====== || ------ || ~~~~~~ || ^^^^^^ || ++++++
565
- # TODO build from SECTION_LEVELS keys
566
- :section_underline => /^(?:=|-|~|\^|\+)+$/,
746
+ ## Tables
567
747
 
568
- # toc::[]
569
- # toc::[levels=2]
570
- :toc => /^toc::\[(.*?)\]$/,
748
+ # Parses the column spec (i.e., colspec) for a table.
749
+ #
750
+ # Examples
751
+ #
752
+ # 1*h,2*,^3e
753
+ #
754
+ ColumnSpecRx = /^(?:(\d+)\*)?([<^>](?:\.[<^>]?)?|(?:[<^>]?\.)?[<^>])?(\d+%?)?([a-z])?$/
571
755
 
572
- # * Foo (up to 5 consecutive asterisks)
573
- # - Foo
574
- # REVIEW leading space has already been stripped, so may not need in regex
575
- :ulist => /^[[:blank:]]*(-|\*{1,5})[[:blank:]]+(.*)$/,
756
+ # Parses the start and end of a cell spec (i.e., cellspec) for a table.
757
+ #
758
+ # Examples
759
+ #
760
+ # 2.3+<.>m
761
+ #
762
+ # FIXME use step-wise scan (or treetop) rather than this mega-regexp
763
+ CellSpecStartRx = /^#{CG_BLANK}*(?:(\d+(?:\.\d*)?|(?:\d*\.)?\d+)([*+]))?([<^>](?:\.[<^>]?)?|(?:[<^>]?\.)?[<^>])?([a-z])?$/
764
+ CellSpecEndRx = /#{CG_BLANK}+(?:(\d+(?:\.\d*)?|(?:\d*\.)?\d+)([*+]))?([<^>](?:\.[<^>]?)?|(?:[<^>]?\.)?[<^>])?([a-z])?$/
576
765
 
577
- # inline xref macro
578
- # <<id,reftext>> (special characters have already been escaped, hence the entity references)
579
- # xref:id[reftext]
580
- :xref_macro => /\\?(?:&lt;&lt;([\w":].*?)&gt;&gt;|xref:([\w":].*?)\[(.*?)\])/m,
766
+ # Block macros
581
767
 
582
- # ifdef::basebackend-html[]
583
- # ifndef::theme[]
584
- # ifeval::["{asciidoctor-version}" >= "0.1.0"]
585
- # ifdef::asciidoctor[Asciidoctor!]
586
- # endif::theme[]
587
- # endif::basebackend-html[]
588
- # endif::[]
589
- :ifdef_macro => /^[\\]?(ifdef|ifndef|ifeval|endif)::(\S*?(?:([,\+])\S+?)?)\[(.+)?\]$/,
768
+ # Matches the general block macro pattern.
769
+ #
770
+ # Examples
771
+ #
772
+ # gist::123456[]
773
+ #
774
+ #--
775
+ # NOTE we've relaxed the match for target to accomodate the short format (e.g., name::[attrlist])
776
+ GenericBlockMacroRx = /^(#{CG_WORD}+)::(\S*?)\[((?:\\\]|[^\]])*?)\]$/
590
777
 
591
- # "{asciidoctor-version}" >= "0.1.0"
592
- :eval_expr => /^(\S.*?)[[:blank:]]*(==|!=|<=|>=|<|>)[[:blank:]]*(\S.*)$/,
593
- # ...or if we want to be more strict up front about what's on each side
594
- #:eval_expr => /^(true|false|("|'|)\{\w+(?:\-\w+)*\}\2|("|')[^\3]*\3|\-?\d+(?:\.\d+)*)[[:blank:]]*(==|!=|<=|>=|<|>)[[:blank:]]*(true|false|("|'|)\{\w+(?:\-\w+)*\}\6|("|')[^\7]*\7|\-?\d+(?:\.\d+)*)$/,
778
+ # Matches an image, video or audio block macro.
779
+ #
780
+ # Examples
781
+ #
782
+ # image::filename.png[Caption]
783
+ # video::http://youtube.com/12345[Cats vs Dogs]
784
+ #
785
+ MediaBlockMacroRx = /^(image|video|audio)::(\S+?)\[((?:\\\]|[^\]])*?)\]$/
595
786
 
596
- # include::chapter1.ad[]
597
- # include::example.txt[lines=1;2;5..10]
598
- :include_macro => /^\\?include::([^\[]+)\[(.*?)\]$/,
787
+ # Matches the TOC block macro.
788
+ #
789
+ # Examples
790
+ #
791
+ # toc::[]
792
+ # toc::[levels=2]
793
+ #
794
+ TocBlockMacroRx = /^toc::\[(.*?)\]$/
599
795
 
600
- # http://domain
601
- # https://domain
602
- # data:info
603
- :uri_sniff => %r{\A[[:alpha:]][[:alnum:].+-]*:/*},
796
+ ## Inline macros
604
797
 
605
- :uri_encode_chars => /[^\w\-.!~*';:@=+$,()\[\]]/,
798
+ # Matches an anchor (i.e., id + optional reference text) in the flow of text.
799
+ #
800
+ # Examples
801
+ #
802
+ # [[idname]]
803
+ # [[idname,Reference Text]]
804
+ # anchor:idname[]
805
+ # anchor:idname[Reference Text]
806
+ #
807
+ InlineAnchorRx = /\\?(?:\[\[([#{CC_ALPHA}:_][#{CC_WORD}:.-]*)(?:,#{CG_BLANK}*(\S.*?))?\]\]|anchor:(\S+)\[(.*?[^\\])?\])/
606
808
 
607
- :mantitle_manvolnum => /^(.*)\((.*)\)$/,
809
+ # Matches a bibliography anchor anywhere inline.
810
+ #
811
+ # Examples
812
+ #
813
+ # [[[Foo]]]
814
+ #
815
+ InlineBiblioAnchorRx = /\\?\[\[\[([#{CC_WORD}:][#{CC_WORD}:.-]*?)\]\]\]/
608
816
 
609
- :manname_manpurpose => /^(.*?)[[:blank:]]+-[[:blank:]]+(.*)$/
610
- }
817
+ # Matches an inline e-mail address.
818
+ #
819
+ # doc.writer@example.com
820
+ #
821
+ EmailInlineMacroRx = /([\\>:\/])?#{CG_WORD}[#{CC_WORD}.%+-]*@#{CG_ALNUM}[#{CC_ALNUM}.-]*\.#{CG_ALPHA}{2,4}\b/
611
822
 
612
- INTRINSICS = Hash.new{|h,k| STDERR.puts "Missing intrinsic: #{k.inspect}"; "{#{k}}"}.merge(
613
- {
823
+ # Matches an inline footnote macro, which is allowed to span multiple lines.
824
+ #
825
+ # Examples
826
+ # footnote:[text]
827
+ # footnoteref:[id,text]
828
+ # footnoteref:[id]
829
+ #
830
+ FootnoteInlineMacroRx = /\\?(footnote(?:ref)?):\[(#{CC_ALL}*?[^\\])\]/m
831
+
832
+ # Matches an image or icon inline macro.
833
+ #
834
+ # Examples
835
+ #
836
+ # image:filename.png[Alt Text]
837
+ # image:http://example.com/images/filename.png[Alt Text]
838
+ # image:filename.png[More [Alt\] Text] (alt text becomes "More [Alt] Text")
839
+ # icon:github[large]
840
+ #
841
+ ImageInlineMacroRx = /\\?(?:image|icon):([^:\[][^\[]*)\[((?:\\\]|[^\]])*?)\]/
842
+
843
+ # Matches an indexterm inline macro, which may span multiple lines.
844
+ #
845
+ # Examples
846
+ #
847
+ # indexterm:[Tigers,Big cats]
848
+ # (((Tigers,Big cats)))
849
+ # indexterm2:[Tigers]
850
+ # ((Tigers))
851
+ #
852
+ IndextermInlineMacroRx = /\\?(?:(indexterm2?):\[(#{CC_ALL}*?[^\\])\]|\(\((#{CC_ALL}+?)\)\)(?!\)))/m
853
+
854
+ # Matches either the kbd or btn inline macro.
855
+ #
856
+ # Examples
857
+ #
858
+ # kbd:[F3]
859
+ # kbd:[Ctrl+Shift+T]
860
+ # kbd:[Ctrl+\]]
861
+ # kbd:[Ctrl,T]
862
+ # btn:[Save]
863
+ #
864
+ KbdBtnInlineMacroRx = /\\?(?:kbd|btn):\[((?:\\\]|[^\]])+?)\]/
865
+
866
+ # Matches the delimiter used for kbd value.
867
+ #
868
+ # Examples
869
+ #
870
+ # Ctrl + Alt+T
871
+ # Ctrl,T
872
+ #
873
+ KbdDelimiterRx = /(?:\+|,)(?=#{CG_BLANK}*[^\1])/
874
+
875
+ # Matches an implicit link and some of the link inline macro.
876
+ #
877
+ # Examples
878
+ #
879
+ # http://github.com
880
+ # http://github.com[GitHub]
881
+ #
882
+ # FIXME revisit! the main issue is we need different rules for implicit vs explicit
883
+ LinkInlineRx = %r{(^|link:|&lt;|[\s>\(\)\[\];])(\\?(?:https?|file|ftp|irc)://[^\s\[\]<]*[^\s.,\[\]<])(?:\[((?:\\\]|[^\]])*?)\])?}
884
+
885
+ # Match a link or e-mail inline macro.
886
+ #
887
+ # Examples
888
+ #
889
+ # link:path[label]
890
+ # mailto:doc.writer@example.com[]
891
+ #
892
+ LinkInlineMacroRx = /\\?(?:link|mailto):([^\s\[]+)(?:\[((?:\\\]|[^\]])*?)\])/
893
+
894
+ # Matches a stem (and alternatives, asciimath and latexmath) inline macro, which may span multiple lines.
895
+ #
896
+ # Examples
897
+ #
898
+ # stem:[x != 0]
899
+ # asciimath:[x != 0]
900
+ # latexmath:[\sqrt{4} = 2]
901
+ #
902
+ StemInlineMacroRx = /\\?(stem|(?:latex|ascii)math):([a-z,]*)\[(#{CC_ALL}*?[^\\])\]/m
903
+
904
+ # Matches a menu inline macro.
905
+ #
906
+ # Examples
907
+ #
908
+ # menu:File[New...]
909
+ # menu:View[Page Style > No Style]
910
+ # menu:View[Page Style, No Style]
911
+ #
912
+ MenuInlineMacroRx = /\\?menu:(#{CG_WORD}|#{CG_WORD}.*?\S)\[#{CG_BLANK}*(.+?)?\]/
913
+
914
+ # Matches an implicit menu inline macro.
915
+ #
916
+ # Examples
917
+ #
918
+ # "File > New..."
919
+ #
920
+ MenuInlineRx = /\\?"(#{CG_WORD}[^"]*?#{CG_BLANK}*&gt;#{CG_BLANK}*[^" \t][^"]*)"/
921
+
922
+ # Matches an inline passthrough value, which may span multiple lines.
923
+ #
924
+ # Examples
925
+ #
926
+ # +text+
927
+ # `text` (compat)
928
+ #
929
+ # NOTE we always capture the attributes so we know when to use compatible (i.e., legacy) behavior
930
+ PassInlineRx = {
931
+ false => ['+', '`', /(^|[^#{CC_WORD};:])(?:\[([^\]]+?)\])?(\\?(\+|`)(\S|\S#{CC_ALL}*?\S)\4)(?!#{CG_WORD})/m],
932
+ true => ['`', nil, /(^|[^`#{CC_WORD}])(?:\[([^\]]+?)\])?(\\?(`)([^`\s]|[^`\s]#{CC_ALL}*?\S)\4)(?![`#{CC_WORD}])/m]
933
+ }
934
+
935
+ # Matches several variants of the passthrough inline macro, which may span multiple lines.
936
+ #
937
+ # Examples
938
+ #
939
+ # +++text+++
940
+ # $$text$$
941
+ # pass:quotes[text]
942
+ #
943
+ PassInlineMacroRx = /(?:(?:(\\?)\[([^\]]+?)\])?(\\{0,2})(\+{2,3}|\${2})(#{CC_ALL}*?)\4|(\\?)pass:([a-z,]*)\[(#{CC_ALL}*?[^\\])\])/m
944
+
945
+ # Matches an xref (i.e., cross-reference) inline macro, which may span multiple lines.
946
+ #
947
+ # Examples
948
+ #
949
+ # <<id,reftext>>
950
+ # xref:id[reftext]
951
+ #
952
+ # NOTE special characters have already been escaped, hence the entity references
953
+ XrefInlineMacroRx = /\\?(?:&lt;&lt;([#{CC_WORD}":]#{CC_ALL}*?)&gt;&gt;|xref:([#{CC_WORD}":]#{CC_ALL}*?)\[(#{CC_ALL}*?)\])/m
954
+
955
+ ## Layout
956
+
957
+ # Matches a trailing + preceded by at least one space character,
958
+ # which forces a hard line break (<br> tag in HTML outputs).
959
+ #
960
+ # Examples
961
+ #
962
+ # +
963
+ # Foo +
964
+ #
965
+ if RUBY_ENGINE == 'opal'
966
+ # NOTE JavaScript only treats ^ and $ as line boundaries in multiline regexp; . won't match newlines
967
+ LineBreakRx = /^(.*)[ \t]\+$/m
968
+ else
969
+ LineBreakRx = /^(.*)[[:blank:]]\+$/
970
+ end
971
+
972
+ # Matches an AsciiDoc horizontal rule or AsciiDoc page break.
973
+ #
974
+ # Examples
975
+ #
976
+ # ''' (horizontal rule)
977
+ # <<< (page break)
978
+ #
979
+ LayoutBreakLineRx = /^('|<){3,}$/
980
+
981
+ # Matches an AsciiDoc or Markdown horizontal rule or AsciiDoc page break.
982
+ #
983
+ # Examples
984
+ #
985
+ # ''' or ' ' ' (horizontal rule)
986
+ # --- or - - - (horizontal rule)
987
+ # *** or * * * (horizontal rule)
988
+ # <<< (page break)
989
+ #
990
+ LayoutBreakLinePlusRx = /^(?:'|<){3,}$|^ {0,3}([-\*_])( *)\1\2\1$/
991
+
992
+ ## General
993
+
994
+ # Matches a blank line.
995
+ #
996
+ # NOTE allows for empty space in line as it could be left by the template engine
997
+ BlankLineRx = /^#{CG_BLANK}*\n/
998
+
999
+ # Matches a comma or semi-colon delimiter.
1000
+ #
1001
+ # Examples
1002
+ #
1003
+ # one,two
1004
+ # three;four
1005
+ #
1006
+ DataDelimiterRx = /,|;/
1007
+
1008
+ # Matches one or more consecutive digits on a single line.
1009
+ #
1010
+ # Examples
1011
+ #
1012
+ # 29
1013
+ #
1014
+ DigitsRx = /^\d+$/
1015
+
1016
+ # Matches a single-line of text enclosed in double quotes, capturing the quote char and text.
1017
+ #
1018
+ # Examples
1019
+ #
1020
+ # "Who goes there?"
1021
+ #
1022
+ DoubleQuotedRx = /^("|)(.*)\1$/
1023
+
1024
+ # Matches multiple lines of text enclosed in double quotes, capturing the quote char and text.
1025
+ #
1026
+ # Examples
1027
+ #
1028
+ # "I am a run-on sentence and I like
1029
+ # to take up multiple lines and I
1030
+ # still want to be matched."
1031
+ #
1032
+ DoubleQuotedMultiRx = /^("|)(#{CC_ALL}*)\1$/m
1033
+
1034
+ # Matches one or more consecutive digits at the end of a line.
1035
+ #
1036
+ # Examples
1037
+ #
1038
+ # docbook45
1039
+ # html5
1040
+ #
1041
+ TrailingDigitsRx = /\d+$/
1042
+
1043
+ # Matches a space escaped by a backslash.
1044
+ #
1045
+ # Examples
1046
+ #
1047
+ # one\ two\ three
1048
+ #
1049
+ EscapedSpaceRx = /\\(#{CG_BLANK})/
1050
+
1051
+ # Matches a space delimiter that's not escaped.
1052
+ #
1053
+ # Examples
1054
+ #
1055
+ # one two three four
1056
+ #
1057
+ SpaceDelimiterRx = /([^\\])#{CG_BLANK}+/
1058
+
1059
+ # Matches a + or - modifier in a subs list
1060
+ #
1061
+ SubModifierSniffRx = /[+-]/
1062
+
1063
+ # Matches any character with multibyte support explicitly enabled (length of multibyte char = 1)
1064
+ #
1065
+ # NOTE If necessary to hide use of the language modifier (u) from JavaScript, use (Regexp.new '.', false, 'u')
1066
+ #
1067
+ UnicodeCharScanRx = unless RUBY_ENGINE == 'opal'
1068
+ FORCE_UNICODE_LINE_LENGTH ? /./u : nil
1069
+ end
1070
+
1071
+ # Detects strings that resemble URIs.
1072
+ #
1073
+ # Examples
1074
+ # http://domain
1075
+ # https://domain
1076
+ # data:info
1077
+ #
1078
+ UriSniffRx = %r{^#{CG_ALPHA}[#{CC_ALNUM}.+-]*:/{0,2}}
1079
+
1080
+ # Detects the end of an implicit URI in the text
1081
+ #
1082
+ # Examples
1083
+ #
1084
+ # (http://google.com)
1085
+ # &gt;http://google.com&lt;
1086
+ # (See http://google.com):
1087
+ #
1088
+ UriTerminator = /[);:]$/
1089
+
1090
+ # Detects XML tags
1091
+ XmlSanitizeRx = /<[^>]+>/
1092
+
1093
+ # Unused
1094
+
1095
+ # Detects any fenced block delimiter, including:
1096
+ # listing, literal, example, sidebar, quote, passthrough, table and fenced code
1097
+ # Does not match open blocks or air quotes
1098
+ # TIP position the most common blocks towards the front of the pattern
1099
+ #BlockDelimiterRx = %r{^(?:(?:-|\.|=|\*|_|\+|/){4,}|[\|,;!]={3,}|(?:`|~){3,}.*)$}
1100
+
1101
+ # Matches an escaped single quote within a word
1102
+ #
1103
+ # Examples
1104
+ #
1105
+ # Here\'s Johnny!
1106
+ #
1107
+ #EscapedSingleQuoteRx = /(#{CG_WORD})\\'(#{CG_WORD})/
1108
+ # an alternative if our backend generates single-quoted html/xml attributes
1109
+ #EscapedSingleQuoteRx = /(#{CG_WORD}|=)\\'(#{CG_WORD})/
1110
+
1111
+ # Matches whitespace at the beginning of the line
1112
+ #LeadingSpacesRx = /^(#{CG_BLANK}*)/
1113
+
1114
+ # Matches parent directory references at the beginning of a path
1115
+ #LeadingParentDirsRx = /^(?:\.\.\/)*/
1116
+
1117
+ #StripLineWise = /\A(?:\s*\n)?(#{CC_ALL}*?)\s*\z/m
1118
+ #end
1119
+
1120
+ INTRINSIC_ATTRIBUTES = {
614
1121
  'startsb' => '[',
615
1122
  'endsb' => ']',
616
- 'brvbar' => '|',
1123
+ 'vbar' => '|',
617
1124
  'caret' => '^',
618
1125
  'asterisk' => '*',
619
1126
  'tilde' => '~',
@@ -636,67 +1143,77 @@ module Asciidoctor
636
1143
  'ldquo' => '&#8220;',
637
1144
  'rdquo' => '&#8221;',
638
1145
  'wj' => '&#8288;',
1146
+ 'brvbar' => '&#166;',
639
1147
  'amp' => '&',
640
1148
  'lt' => '<',
641
1149
  'gt' => '>'
642
- }
643
- )
644
-
645
- SPECIAL_CHARS = {
646
- '<' => '&lt;',
647
- '>' => '&gt;',
648
- '&' => '&amp;'
649
1150
  }
650
1151
 
651
- SPECIAL_CHARS_PATTERN = /[#{SPECIAL_CHARS.keys.join}]/
652
- #SPECIAL_CHARS_PATTERN = /(?:<|>|&(?![[:alpha:]]{2,};|#[[:digit:]]{2,}+;|#x[[:alnum:]]{2,}+;))/
653
-
654
1152
  # unconstrained quotes:: can appear anywhere
655
1153
  # constrained quotes:: must be bordered by non-word characters
656
- # NOTE these substituions are processed in the order they appear here and
1154
+ # NOTE these substitutions are processed in the order they appear here and
657
1155
  # the order in which they are replaced is important
658
- QUOTE_SUBS = [
659
-
1156
+ quote_subs = [
660
1157
  # **strong**
661
- [:strong, :unconstrained, /\\?(?:\[([^\]]+?)\])?\*\*(.+?)\*\*/m],
1158
+ [:strong, :unconstrained, /\\?(?:\[([^\]]+?)\])?\*\*(#{CC_ALL}+?)\*\*/m],
662
1159
 
663
1160
  # *strong*
664
- [:strong, :constrained, /(^|[^\w;:}])(?:\[([^\]]+?)\])?\*(\S|\S.*?\S)\*(?=\W|$)/m],
665
-
666
- # ``double-quoted''
667
- [:double, :constrained, /(^|[^\w;:}])(?:\[([^\]]+?)\])?``(\S|\S.*?\S)''(?=\W|$)/m],
1161
+ [:strong, :constrained, /(^|[^#{CC_WORD};:}])(?:\[([^\]]+?)\])?\*(\S|\S#{CC_ALL}*?\S)\*(?!#{CG_WORD})/m],
668
1162
 
669
- # 'emphasis'
670
- [:emphasis, :constrained, /(^|[^\w;:}])(?:\[([^\]]+?)\])?'(\S|\S.*?\S)'(?=\W|$)/m],
1163
+ # "`double-quoted`"
1164
+ [:double, :constrained, /(^|[^#{CC_WORD};:}])(?:\[([^\]]+?)\])?"`(\S|\S#{CC_ALL}*?\S)`"(?!#{CG_WORD})/m],
671
1165
 
672
- # `single-quoted'
673
- [:single, :constrained, /(^|[^\w;:}])(?:\[([^\]]+?)\])?`(\S|\S.*?\S)'(?=\W|$)/m],
1166
+ # '`single-quoted`'
1167
+ [:single, :constrained, /(^|[^#{CC_WORD};:`}])(?:\[([^\]]+?)\])?'`(\S|\S#{CC_ALL}*?\S)`'(?!#{CG_WORD})/m],
674
1168
 
675
- # ++monospaced++
676
- [:monospaced, :unconstrained, /\\?(?:\[([^\]]+?)\])?\+\+(.+?)\+\+/m],
1169
+ # ``monospaced``
1170
+ [:monospaced, :unconstrained, /\\?(?:\[([^\]]+?)\])?``(#{CC_ALL}+?)``/m],
677
1171
 
678
- # +monospaced+
679
- [:monospaced, :constrained, /(^|[^\w;:}])(?:\[([^\]]+?)\])?\+(\S|\S.*?\S)\+(?=\W|$)/m],
1172
+ # `monospaced`
1173
+ [:monospaced, :constrained, /(^|[^#{CC_WORD};:"'`}])(?:\[([^\]]+?)\])?`(\S|\S#{CC_ALL}*?\S)`(?![#{CC_WORD}"'`])/m],
680
1174
 
681
1175
  # __emphasis__
682
- [:emphasis, :unconstrained, /\\?(?:\[([^\]]+?)\])?\_\_(.+?)\_\_/m],
1176
+ [:emphasis, :unconstrained, /\\?(?:\[([^\]]+?)\])?__(#{CC_ALL}+?)__/m],
683
1177
 
684
1178
  # _emphasis_
685
- [:emphasis, :constrained, /(^|[^\w;:}])(?:\[([^\]]+?)\])?_(\S|\S.*?\S)_(?=\W|$)/m],
1179
+ [:emphasis, :constrained, /(^|[^#{CC_WORD};:}])(?:\[([^\]]+?)\])?_(\S|\S#{CC_ALL}*?\S)_(?!#{CG_WORD})/m],
686
1180
 
687
- # ##unquoted##
688
- [:none, :unconstrained, /\\?(?:\[([^\]]+?)\])?##(.+?)##/m],
1181
+ # ##mark## (referred to in AsciiDoc Python as unquoted)
1182
+ [:mark, :unconstrained, /\\?(?:\[([^\]]+?)\])?##(#{CC_ALL}+?)##/m],
689
1183
 
690
- # #unquoted#
691
- [:none, :constrained, /(^|[^\w;:}])(?:\[([^\]]+?)\])?#(\S|\S.*?\S)#(?=\W|$)/m],
1184
+ # #mark# (referred to in AsciiDoc Python as unquoted)
1185
+ [:mark, :constrained, /(^|[^#{CC_WORD};:}])(?:\[([^\]]+?)\])?#(\S|\S#{CC_ALL}*?\S)#(?!#{CG_WORD})/m],
692
1186
 
693
1187
  # ^superscript^
694
- [:superscript, :unconstrained, /\\?(?:\[([^\]]+?)\])?\^(.+?)\^/m],
1188
+ [:superscript, :unconstrained, /\\?(?:\[([^\]]+?)\])?\^(\S+?)\^/],
695
1189
 
696
1190
  # ~subscript~
697
- [:subscript, :unconstrained, /\\?(?:\[([^\]]+?)\])?\~(.+?)\~/m]
1191
+ [:subscript, :unconstrained, /\\?(?:\[([^\]]+?)\])?~(\S+?)~/]
698
1192
  ]
699
1193
 
1194
+ compat_quote_subs = quote_subs.dup
1195
+ # ``quoted''
1196
+ compat_quote_subs[2] = [:double, :constrained, /(^|[^#{CC_WORD};:}])(?:\[([^\]]+?)\])?``(\S|\S#{CC_ALL}*?\S)''(?!#{CG_WORD})/m]
1197
+ # `quoted'
1198
+ compat_quote_subs[3] = [:single, :constrained, /(^|[^#{CC_WORD};:}])(?:\[([^\]]+?)\])?`(\S|\S#{CC_ALL}*?\S)'(?!#{CG_WORD})/m]
1199
+ # ++monospaced++
1200
+ compat_quote_subs[4] = [:monospaced, :unconstrained, /\\?(?:\[([^\]]+?)\])?\+\+(#{CC_ALL}+?)\+\+/m]
1201
+ # +monospaced+
1202
+ compat_quote_subs[5] = [:monospaced, :constrained, /(^|[^#{CC_WORD};:}])(?:\[([^\]]+?)\])?\+(\S|\S#{CC_ALL}*?\S)\+(?!#{CG_WORD})/m]
1203
+ # #unquoted#
1204
+ #compat_quote_subs[8] = [:unquoted, *compat_quote_subs[8][1..-1]]
1205
+ # ##unquoted##
1206
+ #compat_quote_subs[9] = [:unquoted, *compat_quote_subs[9][1..-1]]
1207
+ # 'emphasis'
1208
+ compat_quote_subs.insert 3, [:emphasis, :constrained, /(^|[^#{CC_WORD};:}])(?:\[([^\]]+?)\])?'(\S|\S#{CC_ALL}*?\S)'(?!#{CG_WORD})/m]
1209
+
1210
+ QUOTE_SUBS = {
1211
+ false => quote_subs,
1212
+ true => compat_quote_subs
1213
+ }
1214
+ quote_subs = nil
1215
+ compat_quote_subs = nil
1216
+
700
1217
  # NOTE in Ruby 1.8.7, [^\\] does not match start of line,
701
1218
  # so we need to match it explicitly
702
1219
  # order is significant
@@ -708,26 +1225,31 @@ module Asciidoctor
708
1225
  # (TM)
709
1226
  [/\\?\(TM\)/, '&#8482;', :none],
710
1227
  # foo -- bar
1228
+ # FIXME this drops the endline if it appears at end of line
711
1229
  [/(^|\n| |\\)--( |\n|$)/, '&#8201;&#8212;&#8201;', :none],
712
1230
  # foo--bar
713
- [/(\w)\\?--(?=\w)/, '&#8212;', :leading],
1231
+ [/(#{CG_WORD})\\?--(?=#{CG_WORD})/, '&#8212;&#8203;', :leading],
714
1232
  # ellipsis
715
- [/\\?\.\.\./, '&#8230;', :leading],
716
- # single quotes
717
- [/(\w)\\?'(\w)/, '&#8217;', :bounding],
1233
+ [/\\?\.\.\./, '&#8230;&#8203;', :leading],
1234
+ # right single quote
1235
+ [/\\?`'/, '&#8217;', :none],
1236
+ # apostrophe (inside a word)
1237
+ [/(#{CG_ALNUM})\\?'(?=#{CG_ALPHA})/, '&#8217;', :leading],
718
1238
  # right arrow ->
719
1239
  [/\\?-&gt;/, '&#8594;', :none],
720
1240
  # right double arrow =>
721
1241
  [/\\?=&gt;/, '&#8658;', :none],
722
1242
  # left arrow <-
723
1243
  [/\\?&lt;-/, '&#8592;', :none],
724
- # right left arrow <=
1244
+ # left double arrow <=
725
1245
  [/\\?&lt;=/, '&#8656;', :none],
726
1246
  # restore entities
727
- [/\\?(&)amp;((?:[[:alpha:]]+|#[[:digit:]]+|#x[[:alnum:]]+);)/, '', :bounding]
1247
+ [/\\?(&)amp;((?:[a-zA-Z]+|#\d{2,5}|#x[a-fA-F0-9]{2,4});)/, '', :bounding]
728
1248
  ]
729
1249
 
730
- # Public: Parse the AsciiDoc source input into an Asciidoctor::Document
1250
+ class << self
1251
+
1252
+ # Public: Parse the AsciiDoc source input into a {Document}
731
1253
  #
732
1254
  # Accepts input as an IO (or StringIO), String or String Array object. If the
733
1255
  # input is a File, information about the file is stored in attributes on the
@@ -736,79 +1258,84 @@ module Asciidoctor
736
1258
  # input - the AsciiDoc source as a IO, String or Array.
737
1259
  # options - a String, Array or Hash of options to control processing (default: {})
738
1260
  # String and Array values are converted into a Hash.
739
- # See Asciidoctor::Document#initialize for details about options.
1261
+ # See {Document#initialize} for details about these options.
740
1262
  #
741
- # returns the Asciidoctor::Document
742
- def self.load(input, options = {})
743
- if (monitor = options.fetch(:monitor, false))
744
- start = Time.now
1263
+ # Returns the Document
1264
+ def load input, options = {}
1265
+ options = options.dup
1266
+ if (timings = options[:timings])
1267
+ timings.start :read
745
1268
  end
746
1269
 
747
- attrs = (options[:attributes] ||= {})
748
- if attrs.is_a?(Hash) || (RUBY_ENGINE == 'jruby' && attrs.is_a?(Java::JavaUtil::Map))
749
- # all good; placed here as optimization
750
- elsif attrs.is_a? Array
751
- attrs = options[:attributes] = attrs.inject({}) do |accum, entry|
1270
+ attributes = options[:attributes] = if !(attrs = options[:attributes])
1271
+ {}
1272
+ elsif (attrs.is_a? ::Hash) || (::RUBY_ENGINE_JRUBY && (attrs.is_a? ::Java::JavaUtil::Map))
1273
+ attrs.dup
1274
+ elsif attrs.is_a? ::Array
1275
+ attrs.inject({}) do |accum, entry|
752
1276
  k, v = entry.split '=', 2
753
1277
  accum[k] = v || ''
754
1278
  accum
755
1279
  end
756
- elsif attrs.is_a? String
1280
+ elsif attrs.is_a? ::String
757
1281
  # convert non-escaped spaces into null character, so we split on the
758
1282
  # correct spaces chars, and restore escaped spaces
759
- attrs = attrs.gsub(REGEXP[:space_delim], "\\1\0").gsub(REGEXP[:escaped_space], '\1')
1283
+ capture_1 = ::RUBY_ENGINE_OPAL ? '$1' : '\1'
1284
+ attrs = attrs.gsub(SpaceDelimiterRx, %(#{capture_1}#{NULL})).gsub(EscapedSpaceRx, capture_1)
760
1285
 
761
- attrs = options[:attributes] = attrs.split("\0").inject({}) do |accum, entry|
1286
+ attrs.split(NULL).inject({}) do |accum, entry|
762
1287
  k, v = entry.split '=', 2
763
1288
  accum[k] = v || ''
764
1289
  accum
765
1290
  end
766
- elsif attrs.respond_to?('keys') && attrs.respond_to?('[]')
1291
+ elsif (attrs.respond_to? :keys) && (attrs.respond_to? :[])
767
1292
  # convert it to a Hash as we know it
768
1293
  original_attrs = attrs
769
- attrs = options[:attributes] = {}
1294
+ attrs = {}
770
1295
  original_attrs.keys.each do |key|
771
1296
  attrs[key] = original_attrs[key]
772
1297
  end
1298
+ attrs
773
1299
  else
774
- raise ArgumentError, "illegal type for attributes option: #{attrs.class.ancestors}"
1300
+ raise ::ArgumentError, %(illegal type for attributes option: #{attrs.class.ancestors})
775
1301
  end
776
1302
 
777
1303
  lines = nil
778
- if input.is_a? File
1304
+ if input.is_a? ::File
779
1305
  lines = input.readlines
780
1306
  input_mtime = input.mtime
781
- input_path = File.expand_path(input.path)
1307
+ input = ::File.new ::File.expand_path input.path
1308
+ input_path = input.path
782
1309
  # hold off on setting infile and indir until we get a better sense of their purpose
783
- attrs['docfile'] = input_path
784
- attrs['docdir'] = File.dirname(input_path)
785
- attrs['docname'] = File.basename(input_path, File.extname(input_path))
786
- attrs['docdate'] = input_mtime.strftime('%Y-%m-%d')
787
- attrs['doctime'] = input_mtime.strftime('%H:%M:%S %Z')
788
- attrs['docdatetime'] = [attrs['docdate'], attrs['doctime']] * ' '
789
- elsif input.respond_to?(:readlines)
790
- input.rewind rescue nil
1310
+ attributes['docfile'] = input_path
1311
+ attributes['docdir'] = ::File.dirname input_path
1312
+ attributes['docname'] = ::File.basename input_path, (::File.extname input_path)
1313
+ attributes['docdate'] = docdate = input_mtime.strftime('%Y-%m-%d')
1314
+ attributes['doctime'] = doctime = input_mtime.strftime('%H:%M:%S %Z')
1315
+ attributes['docdatetime'] = %(#{docdate} #{doctime})
1316
+ elsif input.respond_to? :readlines
1317
+ # NOTE tty, pipes & sockets can't be rewound, but can't be sniffed easily either
1318
+ # just fail the rewind operation silently to handle all cases
1319
+ begin
1320
+ input.rewind
1321
+ rescue
1322
+ end
791
1323
  lines = input.readlines
792
- elsif input.is_a?(String)
1324
+ elsif input.is_a? ::String
793
1325
  lines = input.lines.entries
794
- elsif input.is_a?(Array)
1326
+ elsif input.is_a? ::Array
795
1327
  lines = input.dup
796
1328
  else
797
- raise "Unsupported input type: #{input.class}"
1329
+ raise ::ArgumentError, %(Unsupported input type: #{input.class})
798
1330
  end
799
1331
 
800
- if monitor
801
- read_time = Time.now - start
802
- start = Time.now
1332
+ if timings
1333
+ timings.record :read
1334
+ timings.start :parse
803
1335
  end
804
1336
 
805
- doc = Document.new(lines, options)
806
- if monitor
807
- parse_time = Time.now - start
808
- monitor[:read] = read_time
809
- monitor[:parse] = parse_time
810
- monitor[:load] = read_time + parse_time
811
- end
1337
+ doc = (options[:parse] == false ? (Document.new lines, options) : (Document.new lines,options).parse)
1338
+ timings.record :parse if timings
812
1339
  doc
813
1340
  end
814
1341
 
@@ -823,13 +1350,13 @@ module Asciidoctor
823
1350
  # String and Array values are converted into a Hash.
824
1351
  # See Asciidoctor::Document#initialize for details about options.
825
1352
  #
826
- # returns the Asciidoctor::Document
827
- def self.load_file(filename, options = {})
828
- Asciidoctor.load(File.new(filename), options)
1353
+ # Returns the Asciidoctor::Document
1354
+ def load_file filename, options = {}
1355
+ self.load ::File.new(filename || ''), options
829
1356
  end
830
1357
 
831
- # Public: Parse the AsciiDoc source input into an Asciidoctor::Document and render it
832
- # to the specified backend format
1358
+ # Public: Parse the AsciiDoc source input into an Asciidoctor::Document and
1359
+ # convert it to the specified backend format.
833
1360
  #
834
1361
  # Accepts input as an IO, String or String Array object. If the
835
1362
  # input is a File, information about the file is stored in
@@ -846,130 +1373,135 @@ module Asciidoctor
846
1373
  # outside of the Document#base_dir in safe mode, an IOError is raised.
847
1374
  #
848
1375
  # If the output is going to be written to a file, the header and footer are
849
- # rendered unless specified otherwise (writing to a file implies creating a
850
- # standalone document). Otherwise, the header and footer are not rendered by
851
- # default and the rendered output is returned.
1376
+ # included unless specified otherwise (writing to a file implies creating a
1377
+ # standalone document). Otherwise, the header and footer are not included by
1378
+ # default and the converted result is returned.
852
1379
  #
853
1380
  # input - the String AsciiDoc source filename
854
1381
  # options - a String, Array or Hash of options to control processing (default: {})
855
1382
  # String and Array values are converted into a Hash.
856
1383
  # See Asciidoctor::Document#initialize for details about options.
857
1384
  #
858
- # returns the Document object if the rendered result String is written to a
859
- # file, otherwise the rendered result String
860
- def self.render(input, options = {})
861
- in_place = options.delete(:in_place) || false
1385
+ # Returns the Document object if the converted String is written to a
1386
+ # file, otherwise the converted String
1387
+ def convert input, options = {}
1388
+ options = options.dup
862
1389
  to_file = options.delete(:to_file)
863
1390
  to_dir = options.delete(:to_dir)
864
1391
  mkdirs = options.delete(:mkdirs) || false
865
- monitor = options.fetch(:monitor, false)
866
-
867
- write_in_place = in_place && input.is_a?(File)
868
- write_to_target = to_file || to_dir
869
- stream_output = !to_file.nil? && to_file.respond_to?(:write)
870
-
871
- if write_in_place && write_to_target
872
- raise ArgumentError, 'the option :in_place cannot be used with either the :to_dir or :to_file option'
1392
+ timings = options[:timings]
1393
+
1394
+ case to_file
1395
+ when true, nil
1396
+ write_to_same_dir = !to_dir && (input.is_a? ::File)
1397
+ stream_output = false
1398
+ write_to_target = to_dir
1399
+ to_file = nil
1400
+ when false
1401
+ write_to_same_dir = false
1402
+ stream_output = false
1403
+ write_to_target = false
1404
+ to_file = nil
1405
+ else
1406
+ write_to_same_dir = false
1407
+ stream_output = to_file.respond_to? :write
1408
+ write_to_target = stream_output ? false : to_file
873
1409
  end
874
1410
 
875
- if !options.has_key?(:header_footer) && (write_in_place || write_to_target)
1411
+ if !options.key?(:header_footer) && (write_to_same_dir || write_to_target)
876
1412
  options[:header_footer] = true
877
1413
  end
878
1414
 
879
- doc = Asciidoctor.load(input, options)
1415
+ doc = self.load input, options
880
1416
 
881
1417
  if to_file == '/dev/null'
882
1418
  return doc
883
- elsif write_in_place
884
- to_file = File.join(File.dirname(input.path), "#{doc.attributes['docname']}#{doc.attributes['outfilesuffix']}")
885
- elsif !stream_output && write_to_target
886
- working_dir = options.has_key?(:base_dir) ? File.expand_path(options[:base_dir]) : File.expand_path(Dir.pwd)
1419
+ elsif write_to_same_dir
1420
+ infile = ::File.expand_path input.path
1421
+ outfile = ::File.join ::File.dirname(infile), %(#{doc.attributes['docname']}#{doc.attributes['outfilesuffix']})
1422
+ if outfile == infile
1423
+ raise ::IOError, 'Input file and output file are the same!'
1424
+ end
1425
+ outdir = ::File.dirname outfile
1426
+ elsif write_to_target
1427
+ working_dir = options.has_key?(:base_dir) ? ::File.expand_path(options[:base_dir]) : ::File.expand_path(::Dir.pwd)
887
1428
  # QUESTION should the jail be the working_dir or doc.base_dir???
888
1429
  jail = doc.safe >= SafeMode::SAFE ? working_dir : nil
889
1430
  if to_dir
890
- to_dir = doc.normalize_system_path(to_dir, working_dir, jail, :target_name => 'to_dir', :recover => false)
1431
+ outdir = doc.normalize_system_path(to_dir, working_dir, jail, :target_name => 'to_dir', :recover => false)
891
1432
  if to_file
892
- to_file = doc.normalize_system_path(to_file, to_dir, nil, :target_name => 'to_dir', :recover => false)
893
- # reestablish to_dir as the final target directory (in the case to_file had directory segments)
894
- to_dir = File.dirname(to_file)
1433
+ outfile = doc.normalize_system_path(to_file, outdir, nil, :target_name => 'to_dir', :recover => false)
1434
+ # reestablish outdir as the final target directory (in the case to_file had directory segments)
1435
+ outdir = ::File.dirname outfile
895
1436
  else
896
- to_file = File.join(to_dir, "#{doc.attributes['docname']}#{doc.attributes['outfilesuffix']}")
1437
+ outfile = ::File.join outdir, %(#{doc.attributes['docname']}#{doc.attributes['outfilesuffix']})
897
1438
  end
898
1439
  elsif to_file
899
- to_file = doc.normalize_system_path(to_file, working_dir, jail, :target_name => 'to_dir', :recover => false)
900
- # establish to_dir as the final target directory (in the case to_file had directory segments)
901
- to_dir = File.dirname(to_file)
1440
+ outfile = doc.normalize_system_path(to_file, working_dir, jail, :target_name => 'to_dir', :recover => false)
1441
+ # establish outdir as the final target directory (in the case to_file had directory segments)
1442
+ outdir = ::File.dirname outfile
902
1443
  end
903
1444
 
904
- if !File.directory? to_dir
1445
+ unless ::File.directory? outdir
905
1446
  if mkdirs
906
- Helpers.require_library 'fileutils'
907
- FileUtils.mkdir_p to_dir
1447
+ ::FileUtils.mkdir_p outdir
908
1448
  else
909
- raise IOError, "target directory does not exist: #{to_dir}"
1449
+ # NOTE we intentionally refer to the directory as it was passed to the API
1450
+ raise ::IOError, %(target directory does not exist: #{to_dir})
910
1451
  end
911
1452
  end
1453
+ else
1454
+ outfile = to_file
1455
+ outdir = nil
912
1456
  end
913
1457
 
914
- start = Time.now if monitor
915
- output = doc.render
916
-
917
- if monitor
918
- render_time = Time.now - start
919
- monitor[:render] = render_time
920
- monitor[:load_render] = monitor[:load] + render_time
921
- end
1458
+ timings.start :convert if timings
1459
+ output = doc.convert
1460
+ timings.record :convert if timings
922
1461
 
923
- if to_file
924
- start = Time.now if monitor
925
- if stream_output
926
- to_file.write output.rstrip
927
- # ensure there's a trailing endline
928
- to_file.write EOL
929
- else
930
- File.open(to_file, 'w') {|file| file.write output }
931
- # these assignments primarily for testing, diagnostics or reporting
932
- doc.attributes['outfile'] = outfile = File.expand_path(to_file)
933
- doc.attributes['outdir'] = File.dirname(outfile)
934
- end
935
- if monitor
936
- write_time = Time.now - start
937
- monitor[:write] = write_time
938
- monitor[:total] = monitor[:load_render] + write_time
1462
+ if outfile
1463
+ timings.start :write if timings
1464
+ unless stream_output
1465
+ doc.attributes['outfile'] = outfile
1466
+ doc.attributes['outdir'] = outdir
939
1467
  end
1468
+ doc.write output, outfile
1469
+ timings.record :write if timings
940
1470
 
941
1471
  # NOTE document cannot control this behavior if safe >= SafeMode::SERVER
942
1472
  if !stream_output && doc.safe < SafeMode::SECURE && (doc.attr? 'basebackend-html') &&
943
1473
  (doc.attr? 'linkcss') && (doc.attr? 'copycss')
944
1474
  copy_asciidoctor_stylesheet = DEFAULT_STYLESHEET_KEYS.include?(stylesheet = (doc.attr 'stylesheet'))
945
- #copy_user_stylesheet = !copy_asciidoctor_stylesheet && (doc.attr? 'copycss')
1475
+ copy_user_stylesheet = !copy_asciidoctor_stylesheet && !stylesheet.nil_or_empty?
946
1476
  copy_coderay_stylesheet = (doc.attr? 'source-highlighter', 'coderay') && (doc.attr 'coderay-css', 'class') == 'class'
947
1477
  copy_pygments_stylesheet = (doc.attr? 'source-highlighter', 'pygments') && (doc.attr 'pygments-css', 'class') == 'class'
948
- if copy_asciidoctor_stylesheet || copy_coderay_stylesheet || copy_pygments_stylesheet
949
- Helpers.require_library 'fileutils'
1478
+ if copy_asciidoctor_stylesheet || copy_user_stylesheet || copy_coderay_stylesheet || copy_pygments_stylesheet
950
1479
  outdir = doc.attr('outdir')
951
- stylesdir = doc.normalize_system_path(doc.attr('stylesdir'), outdir,
1480
+ stylesoutdir = doc.normalize_system_path(doc.attr('stylesdir'), outdir,
952
1481
  doc.safe >= SafeMode::SAFE ? outdir : nil)
953
- Helpers.mkdir_p stylesdir if mkdirs
1482
+ Helpers.mkdir_p stylesoutdir if mkdirs
1483
+
954
1484
  if copy_asciidoctor_stylesheet
955
- File.open(File.join(stylesdir, DEFAULT_STYLESHEET_NAME), 'w') {|f|
956
- f.write Asciidoctor::HTML5.default_asciidoctor_stylesheet
957
- }
1485
+ Stylesheets.instance.write_primary_stylesheet stylesoutdir
1486
+ # FIXME should Stylesheets also handle the user stylesheet?
1487
+ elsif copy_user_stylesheet
1488
+ if (stylesheet_src = (doc.attr 'copycss')).empty?
1489
+ stylesheet_src = doc.normalize_system_path stylesheet
1490
+ else
1491
+ stylesheet_src = doc.normalize_system_path stylesheet_src
1492
+ end
1493
+ stylesheet_dst = doc.normalize_system_path stylesheet, stylesoutdir, (doc.safe >= SafeMode::SAFE ? outdir : nil)
1494
+ unless stylesheet_src == stylesheet_dst || (stylesheet_content = doc.read_asset stylesheet_src).nil?
1495
+ ::File.open(stylesheet_dst, 'w') {|f|
1496
+ f.write stylesheet_content
1497
+ }
1498
+ end
958
1499
  end
959
1500
 
960
- #if copy_user_stylesheet
961
- #end
962
-
963
1501
  if copy_coderay_stylesheet
964
- File.open(File.join(stylesdir, 'asciidoctor-coderay.css'), 'w') {|f|
965
- f.write Asciidoctor::HTML5.default_coderay_stylesheet
966
- }
967
- end
968
-
969
- if copy_pygments_stylesheet
970
- File.open(File.join(stylesdir, 'asciidoctor-pygments.css'), 'w') {|f|
971
- f.write Asciidoctor::HTML5.pygments_stylesheet(doc.attr 'pygments-style')
972
- }
1502
+ Stylesheets.instance.write_coderay_stylesheet stylesoutdir
1503
+ elsif copy_pygments_stylesheet
1504
+ Stylesheets.instance.write_pygments_stylesheet stylesoutdir, (doc.attr 'pygments-style')
973
1505
  end
974
1506
  end
975
1507
  end
@@ -979,44 +1511,62 @@ module Asciidoctor
979
1511
  end
980
1512
  end
981
1513
 
982
- # Public: Parse the contents of the AsciiDoc source file into an Asciidoctor::Document
983
- # and render it to the specified backend format
1514
+ # Alias render to convert to maintain backwards compatibility
1515
+ alias :render :convert
1516
+
1517
+ # Public: Parse the contents of the AsciiDoc source file into an
1518
+ # Asciidoctor::Document and convert it to the specified backend format.
984
1519
  #
985
1520
  # input - the String AsciiDoc source filename
986
1521
  # options - a String, Array or Hash of options to control processing (default: {})
987
1522
  # String and Array values are converted into a Hash.
988
1523
  # See Asciidoctor::Document#initialize for details about options.
989
1524
  #
990
- # returns the Document object if the rendered result String is written to a
991
- # file, otherwise the rendered result String
992
- def self.render_file(filename, options = {})
993
- Asciidoctor.render(File.new(filename), options)
1525
+ # Returns the Document object if the converted String is written to a
1526
+ # file, otherwise the converted String
1527
+ def convert_file filename, options = {}
1528
+ self.convert ::File.new(filename || ''), options
994
1529
  end
995
1530
 
996
- # modules
997
- require 'asciidoctor/debug'
998
- require 'asciidoctor/substituters'
999
- require 'asciidoctor/helpers'
1000
-
1001
- # abstract classes
1002
- require 'asciidoctor/abstract_node'
1003
- require 'asciidoctor/abstract_block'
1004
-
1005
- # concrete classes
1006
- require 'asciidoctor/attribute_list'
1007
- require 'asciidoctor/backends/base_template'
1008
- require 'asciidoctor/block'
1009
- require 'asciidoctor/callouts'
1010
- require 'asciidoctor/document'
1011
- require 'asciidoctor/inline'
1012
- require 'asciidoctor/lexer'
1013
- require 'asciidoctor/list'
1014
- require 'asciidoctor/path_resolver'
1015
- require 'asciidoctor/reader'
1016
- require 'asciidoctor/renderer'
1017
- require 'asciidoctor/section'
1018
- require 'asciidoctor/table'
1019
-
1020
- # info
1021
- require 'asciidoctor/version'
1531
+ # Alias render_file to convert_file to maintain backwards compatibility
1532
+ alias :render_file :convert_file
1533
+
1534
+ end
1535
+
1536
+ if RUBY_ENGINE == 'opal'
1537
+ require 'asciidoctor/debug'
1538
+ require 'asciidoctor/version'
1539
+ require 'asciidoctor/timings'
1540
+ else
1541
+ autoload :Debug, 'asciidoctor/debug'
1542
+ autoload :VERSION, 'asciidoctor/version'
1543
+ autoload :Timings, 'asciidoctor/timings'
1544
+ end
1022
1545
  end
1546
+
1547
+ # core extensions
1548
+ require 'asciidoctor/core_ext'
1549
+
1550
+ # modules
1551
+ require 'asciidoctor/helpers'
1552
+ require 'asciidoctor/substitutors'
1553
+
1554
+ # abstract classes
1555
+ require 'asciidoctor/abstract_node'
1556
+ require 'asciidoctor/abstract_block'
1557
+
1558
+ # concrete classes
1559
+ require 'asciidoctor/attribute_list'
1560
+ require 'asciidoctor/block'
1561
+ require 'asciidoctor/callouts'
1562
+ require 'asciidoctor/converter'
1563
+ require 'asciidoctor/converter/html5' if RUBY_ENGINE_OPAL
1564
+ require 'asciidoctor/document'
1565
+ require 'asciidoctor/inline'
1566
+ require 'asciidoctor/list'
1567
+ require 'asciidoctor/parser'
1568
+ require 'asciidoctor/path_resolver'
1569
+ require 'asciidoctor/reader'
1570
+ require 'asciidoctor/section'
1571
+ require 'asciidoctor/stylesheets'
1572
+ require 'asciidoctor/table'