asciidoctor 0.1.4 → 1.5.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of asciidoctor might be problematic. Click here for more details.

Files changed (101) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.adoc +209 -25
  3. data/{LICENSE → LICENSE.adoc} +4 -3
  4. data/README.adoc +392 -395
  5. data/Rakefile +94 -137
  6. data/benchmark/benchmark.rb +127 -0
  7. data/benchmark/sample-data/mdbasics.adoc +334 -0
  8. data/bin/asciidoctor +5 -8
  9. data/bin/asciidoctor-safe +4 -8
  10. data/compat/asciidoc.conf +78 -11
  11. data/compat/font-awesome-3-compat.css +397 -0
  12. data/data/stylesheets/asciidoctor-default.css +399 -0
  13. data/data/stylesheets/coderay-asciidoctor.css +89 -0
  14. data/features/open_block.feature +92 -0
  15. data/features/pass_block.feature +66 -0
  16. data/features/step_definitions.rb +42 -0
  17. data/features/text_formatting.feature +55 -0
  18. data/features/xref.feature +116 -0
  19. data/lib/asciidoctor.rb +1155 -605
  20. data/lib/asciidoctor/abstract_block.rb +157 -71
  21. data/lib/asciidoctor/abstract_node.rb +150 -93
  22. data/lib/asciidoctor/attribute_list.rb +85 -90
  23. data/lib/asciidoctor/block.rb +51 -24
  24. data/lib/asciidoctor/callouts.rb +4 -7
  25. data/lib/asciidoctor/cli.rb +3 -0
  26. data/lib/asciidoctor/cli/invoker.rb +86 -76
  27. data/lib/asciidoctor/cli/options.rb +111 -61
  28. data/lib/asciidoctor/converter.rb +232 -0
  29. data/lib/asciidoctor/converter/base.rb +58 -0
  30. data/lib/asciidoctor/converter/composite.rb +66 -0
  31. data/lib/asciidoctor/converter/docbook45.rb +94 -0
  32. data/lib/asciidoctor/converter/docbook5.rb +684 -0
  33. data/lib/asciidoctor/converter/factory.rb +225 -0
  34. data/lib/asciidoctor/converter/html5.rb +1081 -0
  35. data/lib/asciidoctor/converter/template.rb +296 -0
  36. data/lib/asciidoctor/core_ext.rb +7 -0
  37. data/lib/asciidoctor/core_ext/object/nil_or_empty.rb +23 -0
  38. data/lib/asciidoctor/core_ext/string/chr.rb +6 -0
  39. data/lib/asciidoctor/core_ext/symbol/length.rb +6 -0
  40. data/lib/asciidoctor/document.rb +590 -304
  41. data/lib/asciidoctor/extensions.rb +1100 -308
  42. data/lib/asciidoctor/helpers.rb +109 -46
  43. data/lib/asciidoctor/inline.rb +16 -9
  44. data/lib/asciidoctor/list.rb +23 -15
  45. data/lib/asciidoctor/opal_ext.rb +4 -0
  46. data/lib/asciidoctor/opal_ext/comparable.rb +38 -0
  47. data/lib/asciidoctor/opal_ext/dir.rb +13 -0
  48. data/lib/asciidoctor/opal_ext/error.rb +2 -0
  49. data/lib/asciidoctor/opal_ext/file.rb +125 -0
  50. data/lib/asciidoctor/{lexer.rb → parser.rb} +646 -455
  51. data/lib/asciidoctor/path_resolver.rb +141 -77
  52. data/lib/asciidoctor/reader.rb +257 -187
  53. data/lib/asciidoctor/section.rb +12 -16
  54. data/lib/asciidoctor/stylesheets.rb +91 -0
  55. data/lib/asciidoctor/substitutors.rb +1548 -0
  56. data/lib/asciidoctor/table.rb +73 -57
  57. data/lib/asciidoctor/timings.rb +39 -0
  58. data/lib/asciidoctor/version.rb +1 -1
  59. data/man/asciidoctor.1 +22 -14
  60. data/man/asciidoctor.adoc +18 -10
  61. data/test/attributes_test.rb +314 -14
  62. data/test/blocks_test.rb +763 -118
  63. data/test/converter_test.rb +352 -0
  64. data/test/document_test.rb +518 -199
  65. data/test/extensions_test.rb +273 -103
  66. data/test/fixtures/asciidoc_index.txt +27 -13
  67. data/test/fixtures/basic-docinfo.xml +1 -1
  68. data/test/fixtures/chapter-a.adoc +3 -0
  69. data/test/fixtures/custom-backends/erb/html5/block_paragraph.html.erb +6 -0
  70. data/test/fixtures/docinfo.xml +1 -1
  71. data/test/fixtures/include-file.asciidoc +2 -0
  72. data/test/fixtures/master.adoc +5 -0
  73. data/test/invoker_test.rb +173 -61
  74. data/test/links_test.rb +97 -21
  75. data/test/lists_test.rb +181 -22
  76. data/test/options_test.rb +86 -2
  77. data/test/paragraphs_test.rb +47 -5
  78. data/test/{lexer_test.rb → parser_test.rb} +128 -57
  79. data/test/paths_test.rb +36 -1
  80. data/test/preamble_test.rb +25 -17
  81. data/test/reader_test.rb +404 -249
  82. data/test/sections_test.rb +623 -58
  83. data/test/substitutions_test.rb +609 -132
  84. data/test/tables_test.rb +198 -24
  85. data/test/test_helper.rb +101 -31
  86. data/test/text_test.rb +88 -31
  87. metadata +160 -64
  88. data/Gemfile +0 -12
  89. data/Guardfile +0 -18
  90. data/asciidoctor.gemspec +0 -143
  91. data/lib/asciidoctor/backends/_stylesheets.rb +0 -466
  92. data/lib/asciidoctor/backends/base_template.rb +0 -114
  93. data/lib/asciidoctor/backends/docbook45.rb +0 -774
  94. data/lib/asciidoctor/backends/docbook5.rb +0 -103
  95. data/lib/asciidoctor/backends/html5.rb +0 -1214
  96. data/lib/asciidoctor/renderer.rb +0 -259
  97. data/lib/asciidoctor/substituters.rb +0 -1083
  98. data/test/fixtures/asciidoc.txt +0 -105
  99. data/test/fixtures/ascshort.txt +0 -32
  100. data/test/fixtures/list_elements.asciidoc +0 -10
  101. data/test/renderer_test.rb +0 -162
@@ -1,11 +1,11 @@
1
1
  module Asciidoctor
2
2
  # Public: Methods to parse lines of AsciiDoc into an object hierarchy
3
3
  # representing the structure of the document. All methods are class methods and
4
- # should be invoked from the Lexer class. The main entry point is ::next_block.
5
- # No Lexer instances shall be discovered running around. (Any attempt to
6
- # instantiate a Lexer will be futile).
4
+ # should be invoked from the Parser class. The main entry point is ::next_block.
5
+ # No Parser instances shall be discovered running around. (Any attempt to
6
+ # instantiate a Parser will be futile).
7
7
  #
8
- # The object hierarchy created by the Lexer consists of zero or more Section
8
+ # The object hierarchy created by the Parser consists of zero or more Section
9
9
  # and Block objects. Section objects may be nested and a Section object
10
10
  # contains zero or more Block objects. Block objects may be nested, but may
11
11
  # only contain other Block objects. Block objects which represent lists may
@@ -14,18 +14,18 @@ module Asciidoctor
14
14
  # Examples
15
15
  #
16
16
  # # Create a Reader for the AsciiDoc lines and retrieve the next block from it.
17
- # # Lexer::next_block requires a parent, so we begin by instantiating an empty Document.
17
+ # # Parser.next_block requires a parent, so we begin by instantiating an empty Document.
18
18
  #
19
19
  # doc = Document.new
20
20
  # reader = Reader.new lines
21
- # block = Lexer.next_block(reader, doc)
21
+ # block = Parser.next_block(reader, doc)
22
22
  # block.class
23
23
  # # => Asciidoctor::Block
24
- class Lexer
24
+ class Parser
25
25
 
26
- BlockMatchData = Struct.new(:context, :masq, :tip, :terminator)
26
+ BlockMatchData = Struct.new :context, :masq, :tip, :terminator
27
27
 
28
- # Public: Make sure the Lexer object doesn't get initialized.
28
+ # Public: Make sure the Parser object doesn't get initialized.
29
29
  #
30
30
  # Raises RuntimeError if this constructor is invoked.
31
31
  def initialize
@@ -34,7 +34,7 @@ class Lexer
34
34
 
35
35
  # Public: Parses AsciiDoc source read from the Reader into the Document
36
36
  #
37
- # This method is the main entry-point into the Lexer when parsing a full document.
37
+ # This method is the main entry-point into the Parser when parsing a full document.
38
38
  # It first looks for and, if found, processes the document title. It then
39
39
  # proceeds to iterate through the lines in the Reader, parsing the document
40
40
  # into nested Sections and Blocks.
@@ -50,8 +50,21 @@ class Lexer
50
50
  unless options[:header_only]
51
51
  while reader.has_more_lines?
52
52
  new_section, block_attributes = next_section(reader, document, block_attributes)
53
- document << new_section unless new_section.nil?
53
+ document << new_section if new_section
54
54
  end
55
+ # NOTE we could try to avoid creating a preamble in the first place, though
56
+ # that would require reworking assumptions in next_section since the preamble
57
+ # is treated like an untitled section
58
+ # NOTE logic relocated to end of next_section
59
+ #if Compliance.unwrap_standalone_preamble &&
60
+ # document.blocks.size == 1 && (first_block = document.blocks[0]).context == :preamble &&
61
+ # first_block.blocks? && (document.doctype != 'book' || first_block.blocks[0].style != 'abstract')
62
+ # preamble = document.blocks.shift
63
+ # while (child_block = preamble.blocks.shift)
64
+ # child_block.parent = document
65
+ # document << child_block
66
+ # end
67
+ #end
55
68
  end
56
69
 
57
70
  document
@@ -83,7 +96,7 @@ class Lexer
83
96
  # yep, document title logic in AsciiDoc is just insanity
84
97
  # definitely an area for spec refinement
85
98
  assigned_doctitle = nil
86
- unless (val = document.attributes.fetch('doctitle', '')).empty?
99
+ unless (val = document.attributes['doctitle']).nil_or_empty?
87
100
  document.title = val
88
101
  assigned_doctitle = val
89
102
  end
@@ -92,20 +105,24 @@ class Lexer
92
105
  # check if the first line is the document title
93
106
  # if so, add a header to the document and parse the header metadata
94
107
  if is_next_line_document_title?(reader, block_attributes)
95
- document.id, doctitle, _, _ = parse_section_title(reader, document)
108
+ source_location = reader.cursor if document.sourcemap
109
+ document.id, _, doctitle, _, single_line = parse_section_title(reader, document)
96
110
  unless assigned_doctitle
97
111
  document.title = doctitle
98
112
  assigned_doctitle = doctitle
99
113
  end
114
+ # default to compat-mode if document uses atx-style doctitle
115
+ document.set_attribute 'compat-mode', '' unless single_line
116
+ document.header.source_location = source_location if source_location
100
117
  document.attributes['doctitle'] = section_title = doctitle
101
118
  # QUESTION: should the id assignment on Document be encapsulated in the Document class?
102
- if document.id.nil? && block_attributes.has_key?('id')
119
+ unless document.id
103
120
  document.id = block_attributes.delete('id')
104
121
  end
105
122
  parse_header_metadata(reader, document)
106
123
  end
107
124
 
108
- if !(val = document.attributes.fetch('doctitle', '')).empty? &&
125
+ if !(val = document.attributes['doctitle']).nil_or_empty? &&
109
126
  val != section_title
110
127
  document.title = val
111
128
  assigned_doctitle = val
@@ -128,11 +145,11 @@ class Lexer
128
145
  #
129
146
  # returns Nothing
130
147
  def self.parse_manpage_header(reader, document)
131
- if (m = document.attributes['doctitle'].match(REGEXP[:mantitle_manvolnum]))
148
+ if (m = ManpageTitleVolnumRx.match(document.attributes['doctitle']))
132
149
  document.attributes['mantitle'] = document.sub_attributes(m[1].rstrip.downcase)
133
150
  document.attributes['manvolnum'] = m[2].strip
134
151
  else
135
- warn "asciidoctor: ERROR: #{reader.prev_line_info}: malformed manpage title"
152
+ warn %(asciidoctor: ERROR: #{reader.prev_line_info}: malformed manpage title)
136
153
  end
137
154
 
138
155
  reader.skip_blank_lines
@@ -140,24 +157,24 @@ class Lexer
140
157
  if is_next_line_section?(reader, {})
141
158
  name_section = initialize_section(reader, document, {})
142
159
  if name_section.level == 1
143
- name_section_buffer = reader.read_lines_until(:break_on_blank_lines => true).join.tr_s("\n ", ' ')
144
- if (m = name_section_buffer.match(REGEXP[:manname_manpurpose]))
145
- document.attributes['manname'] = m[1]
160
+ name_section_buffer = reader.read_lines_until(:break_on_blank_lines => true).join(' ').tr_s(' ', ' ')
161
+ if (m = ManpageNamePurposeRx.match(name_section_buffer))
162
+ document.attributes['manname'] = document.sub_attributes m[1]
146
163
  document.attributes['manpurpose'] = m[2]
147
164
  # TODO parse multiple man names
148
165
 
149
166
  if document.backend == 'manpage'
150
167
  document.attributes['docname'] = document.attributes['manname']
151
- document.attributes['outfilesuffix'] = ".#{document.attributes['manvolnum']}"
168
+ document.attributes['outfilesuffix'] = %(.#{document.attributes['manvolnum']})
152
169
  end
153
170
  else
154
- warn "asciidoctor: ERROR: #{reader.prev_line_info}: malformed name section body"
171
+ warn %(asciidoctor: ERROR: #{reader.prev_line_info}: malformed name section body)
155
172
  end
156
173
  else
157
- warn "asciidoctor: ERROR: #{reader.prev_line_info}: name section title must be at level 1"
174
+ warn %(asciidoctor: ERROR: #{reader.prev_line_info}: name section title must be at level 1)
158
175
  end
159
176
  else
160
- warn "asciidoctor: ERROR: #{reader.prev_line_info}: name section expected"
177
+ warn %(asciidoctor: ERROR: #{reader.prev_line_info}: name section expected)
161
178
  end
162
179
  end
163
180
 
@@ -184,33 +201,35 @@ class Lexer
184
201
  # Examples
185
202
  #
186
203
  # source
187
- # # => "Greetings\n---------\nThis is my doc.\n\nSalutations\n-----------\nIt is awesome."
204
+ # # => "= Greetings\n\nThis is my doc.\n\n== Salutations\n\nIt is awesome."
188
205
  #
189
- # reader = Reader.new source.lines.entries
206
+ # reader = Reader.new source, nil, :normalize => true
190
207
  # # create empty document to parent the section
191
208
  # # and hold attributes extracted from header
192
209
  # doc = Document.new
193
210
  #
194
- # Lexer.next_section(reader, doc).first.title
211
+ # Parser.next_section(reader, doc).first.title
195
212
  # # => "Greetings"
196
213
  #
197
- # Lexer.next_section(reader, doc).first.title
214
+ # Parser.next_section(reader, doc).first.title
198
215
  # # => "Salutations"
199
216
  #
200
217
  # returns a two-element Array containing the Section and Hash of orphaned attributes
201
218
  def self.next_section(reader, parent, attributes = {})
202
219
  preamble = false
220
+ part = false
221
+ intro = false
203
222
 
204
223
  # FIXME if attributes[1] is a verbatim style, then don't check for section
205
224
 
206
225
  # check if we are at the start of processing the document
207
226
  # NOTE we could drop a hint in the attributes to indicate
208
227
  # that we are at a section title (so we don't have to check)
209
- if parent.is_a?(Document) && parent.blocks.empty? &&
210
- (parent.has_header? || attributes.delete('invalid-header') || !is_next_line_section?(reader, attributes))
211
-
212
- if parent.has_header?
213
- preamble = Block.new(parent, :preamble, :content_model => :compound)
228
+ if parent.context == :document && parent.blocks.empty? &&
229
+ ((has_header = parent.has_header?) || attributes.delete('invalid-header') || !is_next_line_section?(reader, attributes))
230
+ doctype = parent.doctype
231
+ if has_header || (doctype == 'book' && attributes[1] != 'abstract')
232
+ preamble = intro = Block.new(parent, :preamble, :content_model => :compound)
214
233
  parent << preamble
215
234
  end
216
235
  section = parent
@@ -219,21 +238,26 @@ class Lexer
219
238
  if parent.attributes.has_key? 'fragment'
220
239
  expected_next_levels = nil
221
240
  # small tweak to allow subsequent level-0 sections for book doctype
222
- elsif parent.doctype == 'book'
241
+ elsif doctype == 'book'
223
242
  expected_next_levels = [0, 1]
224
243
  else
225
244
  expected_next_levels = [1]
226
245
  end
227
246
  else
247
+ doctype = parent.document.doctype
228
248
  section = initialize_section(reader, parent, attributes)
229
249
  # clear attributes, except for title which carries over
230
250
  # section title to next block of content
231
- attributes = attributes.delete_if {|k, v| k != 'title'}
251
+ attributes = (title = attributes['title']) ? { 'title' => title } : {}
232
252
  current_level = section.level
233
- # subsections in preface & appendix in multipart books start at level 2
234
- if current_level == 0 && section.special &&
235
- section.document.doctype == 'book' && ['preface', 'appendix'].include?(section.sectname)
236
- expected_next_levels = [current_level + 2]
253
+ if current_level == 0 && doctype == 'book'
254
+ part = !section.special
255
+ # subsections in preface & appendix in multipart books start at level 2
256
+ if section.special && (['preface', 'appendix'].include? section.sectname)
257
+ expected_next_levels = [current_level + 2]
258
+ else
259
+ expected_next_levels = [current_level + 1]
260
+ end
237
261
  else
238
262
  expected_next_levels = [current_level + 1]
239
263
  end
@@ -253,46 +277,103 @@ class Lexer
253
277
  while reader.has_more_lines?
254
278
  parse_block_metadata_lines(reader, section, attributes)
255
279
 
256
- next_level = is_next_line_section? reader, attributes
257
- if next_level
280
+ if (next_level = is_next_line_section? reader, attributes)
258
281
  next_level += section.document.attr('leveloffset', 0).to_i
259
- doctype = parent.document.doctype
260
- if next_level > current_level || (section.is_a?(Document) && next_level == 0)
282
+ if next_level > current_level || (section.context == :document && next_level == 0)
261
283
  if next_level == 0 && doctype != 'book'
262
- warn "asciidoctor: ERROR: #{reader.line_info}: only book doctypes can contain level 0 sections"
263
- elsif !expected_next_levels.nil? && !expected_next_levels.include?(next_level)
264
- warn "asciidoctor: WARNING: #{reader.line_info}: section title out of sequence: " +
265
- "expected #{expected_next_levels.size > 1 ? 'levels' : 'level'} #{expected_next_levels * ' or '}, " +
266
- "got level #{next_level}"
284
+ warn %(asciidoctor: ERROR: #{reader.line_info}: only book doctypes can contain level 0 sections)
285
+ elsif expected_next_levels && !expected_next_levels.include?(next_level)
286
+ warn %(asciidoctor: WARNING: #{reader.line_info}: section title out of sequence: ) +
287
+ %(expected #{expected_next_levels.size > 1 ? 'levels' : 'level'} #{expected_next_levels * ' or '}, ) +
288
+ %(got level #{next_level})
267
289
  end
268
290
  # the attributes returned are those that are orphaned
269
291
  new_section, attributes = next_section(reader, section, attributes)
270
292
  section << new_section
271
293
  else
272
294
  if next_level == 0 && doctype != 'book'
273
- warn "asciidoctor: ERROR: #{reader.line_info}: only book doctypes can contain level 0 sections"
295
+ warn %(asciidoctor: ERROR: #{reader.line_info}: only book doctypes can contain level 0 sections)
274
296
  end
275
297
  # close this section (and break out of the nesting) to begin a new one
276
298
  break
277
299
  end
278
300
  else
279
301
  # just take one block or else we run the risk of overrunning section boundaries
280
- new_block = next_block(reader, (preamble || section), attributes, :parse_metadata => false)
281
- if !new_block.nil?
282
- (preamble || section) << new_block
302
+ block_line_info = reader.line_info
303
+ if (new_block = next_block reader, (intro || section), attributes, :parse_metadata => false)
304
+ # REVIEW this may be doing too much
305
+ if part
306
+ if !section.blocks?
307
+ # if this block wasn't marked as [partintro], emulate behavior as if it had
308
+ if new_block.style != 'partintro'
309
+ # emulate [partintro] paragraph
310
+ if new_block.context == :paragraph
311
+ new_block.context = :open
312
+ new_block.style = 'partintro'
313
+ # emulate [partintro] open block
314
+ else
315
+ intro = Block.new section, :open, :content_model => :compound
316
+ intro.style = 'partintro'
317
+ new_block.parent = intro
318
+ section << intro
319
+ end
320
+ end
321
+ elsif section.blocks.size == 1
322
+ first_block = section.blocks[0]
323
+ # open the [partintro] open block for appending
324
+ if !intro && first_block.content_model == :compound
325
+ #new_block.parent = (intro = first_block)
326
+ warn %(asciidoctor: ERROR: #{block_line_info}: illegal block content outside of partintro block)
327
+ # rebuild [partintro] paragraph as an open block
328
+ elsif first_block.content_model != :compound
329
+ intro = Block.new section, :open, :content_model => :compound
330
+ intro.style = 'partintro'
331
+ section.blocks.shift
332
+ if first_block.style == 'partintro'
333
+ first_block.context = :paragraph
334
+ first_block.style = nil
335
+ end
336
+ first_block.parent = intro
337
+ intro << first_block
338
+ new_block.parent = intro
339
+ section << intro
340
+ end
341
+ end
342
+ end
343
+
344
+ (intro || section) << new_block
283
345
  attributes = {}
284
- else
285
- # don't clear attributes if we don't find a block because they may
286
- # be trailing attributes that didn't get associated with a block
346
+ #else
347
+ # # don't clear attributes if we don't find a block because they may
348
+ # # be trailing attributes that didn't get associated with a block
287
349
  end
288
350
  end
289
351
 
290
352
  reader.skip_blank_lines
291
353
  end
292
354
 
293
- if preamble && !preamble.blocks?
355
+ if part
356
+ unless section.blocks? && section.blocks[-1].context == :section
357
+ warn %(asciidoctor: ERROR: #{reader.line_info}: invalid part, must have at least one section (e.g., chapter, appendix, etc.))
358
+ end
359
+ # NOTE we could try to avoid creating a preamble in the first place, though
360
+ # that would require reworking assumptions in next_section since the preamble
361
+ # is treated like an untitled section
362
+ elsif preamble # implies parent == document
363
+ document = parent
364
+ if preamble.blocks?
365
+ # unwrap standalone preamble (i.e., no sections), if permissible
366
+ if Compliance.unwrap_standalone_preamble && document.blocks.size == 1 && doctype != 'book'
367
+ document.blocks.shift
368
+ while (child_block = preamble.blocks.shift)
369
+ child_block.parent = document
370
+ document << child_block
371
+ end
372
+ end
294
373
  # drop the preamble if it has no content
295
- section.blocks.delete_at(0)
374
+ else
375
+ document.blocks.shift
376
+ end
296
377
  end
297
378
 
298
379
  # The attributes returned here are orphaned attributes that fall at the end
@@ -324,13 +405,12 @@ class Lexer
324
405
  skipped = reader.skip_blank_lines
325
406
 
326
407
  # bail if we've reached the end of the parent block or document
327
- return nil unless reader.has_more_lines?
408
+ return unless reader.has_more_lines?
328
409
 
329
- text_only = options[:text]
330
410
  # check for option to find list item text only
331
411
  # if skipped a line, assume a list continuation was
332
412
  # used and block content is acceptable
333
- if text_only && skipped > 0
413
+ if (text_only = options[:text]) && skipped > 0
334
414
  options.delete(:text)
335
415
  text_only = false
336
416
  end
@@ -341,27 +421,30 @@ class Lexer
341
421
  document = parent.document
342
422
  if (extensions = document.extensions)
343
423
  block_extensions = extensions.blocks?
344
- macro_extensions = extensions.block_macros?
424
+ block_macro_extensions = extensions.block_macros?
345
425
  else
346
- block_extensions = macro_extensions = false
426
+ block_extensions = block_macro_extensions = false
347
427
  end
348
428
  #parent_context = parent.is_a?(Block) ? parent.context : nil
349
- in_list = parent.is_a?(List)
429
+ in_list = (parent.is_a? List)
350
430
  block = nil
351
431
  style = nil
352
432
  explicit_style = nil
433
+ sourcemap = document.sourcemap
434
+ source_location = nil
353
435
 
354
- while reader.has_more_lines? && block.nil?
436
+ while !block && reader.has_more_lines?
355
437
  # if parsing metadata, read until there is no more to read
356
438
  if parse_metadata && parse_block_metadata_line(reader, document, attributes, options)
357
439
  reader.advance
358
440
  next
359
- #elsif parse_sections && parent_context.nil? && is_next_line_section?(reader, attributes)
441
+ #elsif parse_sections && !parent_context && is_next_line_section?(reader, attributes)
360
442
  # block, attributes = next_section(reader, parent, attributes)
361
443
  # break
362
444
  end
363
445
 
364
446
  # QUESTION should we introduce a parsing context object?
447
+ source_location = reader.cursor if sourcemap
365
448
  this_line = reader.read_line
366
449
  delimited_block = false
367
450
  block_context = nil
@@ -372,7 +455,7 @@ class Lexer
372
455
  style, explicit_style = parse_style_attribute(attributes, reader)
373
456
  end
374
457
 
375
- if delimited_blk_match = is_delimited_block?(this_line, true)
458
+ if (delimited_blk_match = is_delimited_block? this_line, true)
376
459
  delimited_block = true
377
460
  block_context = cloaked_context = delimited_blk_match.context
378
461
  terminator = delimited_blk_match.terminator
@@ -383,16 +466,16 @@ class Lexer
383
466
  block_context = style.to_sym
384
467
  elsif delimited_blk_match.masq.include?('admonition') && ADMONITION_STYLES.include?(style)
385
468
  block_context = :admonition
386
- elsif block_extensions && extensions.processor_registered_for_block?(style, block_context)
469
+ elsif block_extensions && extensions.registered_for_block?(style, block_context)
387
470
  block_context = style.to_sym
388
471
  else
389
- warn "asciidoctor: WARNING: #{reader.prev_line_info}: invalid style for #{block_context} block: #{style}"
472
+ warn %(asciidoctor: WARNING: #{reader.prev_line_info}: invalid style for #{block_context} block: #{style})
390
473
  style = block_context.to_s
391
474
  end
392
475
  end
393
476
  end
394
477
 
395
- if !delimited_block
478
+ unless delimited_block
396
479
 
397
480
  # this loop only executes once; used for flow control
398
481
  # break once a block is found or at end of loop
@@ -401,7 +484,7 @@ class Lexer
401
484
  while true
402
485
 
403
486
  # process lines verbatim
404
- if !style.nil? && COMPLIANCE[:strict_verbatim_paragraphs] && VERBATIM_STYLES.include?(style)
487
+ if style && Compliance.strict_verbatim_paragraphs && VERBATIM_STYLES.include?(style)
405
488
  block_context = style.to_sym
406
489
  reader.unshift_line this_line
407
490
  # advance to block parsing =>
@@ -410,14 +493,14 @@ class Lexer
410
493
 
411
494
  # process lines normally
412
495
  unless text_only
413
- first_char = Compliance.markdown_syntax ? this_line.lstrip[0..0] : this_line[0..0]
414
- # NOTE we're letting break lines (ruler, page_break, etc) have attributes
415
- if BREAK_LINES.has_key?(first_char) && this_line.length > 3 &&
416
- (match = this_line.match(Compliance.markdown_syntax ? REGEXP[:break_line_plus] : REGEXP[:break_line]))
417
- block = Block.new(parent, BREAK_LINES[first_char], :content_model => :empty)
496
+ first_char = Compliance.markdown_syntax ? this_line.lstrip.chr : this_line.chr
497
+ # NOTE we're letting break lines (horizontal rule, page_break, etc) have attributes
498
+ if (LAYOUT_BREAK_LINES.has_key? first_char) && this_line.length >= 3 &&
499
+ (Compliance.markdown_syntax ? LayoutBreakLinePlusRx : LayoutBreakLineRx) =~ this_line
500
+ block = Block.new(parent, LAYOUT_BREAK_LINES[first_char], :content_model => :empty)
418
501
  break
419
502
 
420
- elsif (match = this_line.match(REGEXP[:media_blk_macro]))
503
+ elsif this_line.end_with?(']') && (match = MediaBlockMacroRx.match(this_line))
421
504
  blk_ctx = match[1].to_sym
422
505
  block = Block.new(parent, blk_ctx, :content_model => :empty)
423
506
  if blk_ctx == :image
@@ -428,7 +511,7 @@ class Lexer
428
511
  posattrs = []
429
512
  end
430
513
 
431
- unless style.nil? || explicit_style
514
+ unless !style || explicit_style
432
515
  attributes['alt'] = style if blk_ctx == :image
433
516
  attributes.delete('style')
434
517
  style = nil
@@ -441,52 +524,66 @@ class Lexer
441
524
  :into => attributes)
442
525
  target = block.sub_attributes(match[2], :attribute_missing => 'drop-line')
443
526
  if target.empty?
444
- if document.attributes.fetch('attribute-missing', COMPLIANCE[:attribute_missing]) == 'skip'
445
- # retain as unparsed
446
- return Block.new(parent, :paragraph, :source => [this_line.chomp])
527
+ # retain as unparsed if attribute-missing is skip
528
+ if document.attributes.fetch('attribute-missing', Compliance.attribute_missing) == 'skip'
529
+ return Block.new(parent, :paragraph, :content_model => :simple, :source => [this_line])
530
+ # otherwise, drop the line
447
531
  else
448
- # drop the line if target resolves to nothing
449
- return nil
532
+ attributes.clear
533
+ return
450
534
  end
451
535
  end
452
536
 
453
537
  attributes['target'] = target
454
- block.title = attributes.delete('title') if attributes.has_key?('title')
455
- if blk_ctx == :image
456
- document.register(:images, target)
457
- attributes['alt'] ||= File.basename(target, File.extname(target)).tr('_-', ' ')
458
- # QUESTION should video or audio have an auto-numbered caption?
459
- block.assign_caption attributes.delete('caption'), 'figure'
460
- end
538
+ # now done down below
539
+ #block.title = attributes.delete('title') if attributes.has_key?('title')
540
+ #if blk_ctx == :image
541
+ # if attributes.has_key? 'scaledwidth'
542
+ # # append % to scaledwidth if ends in number (no units present)
543
+ # if (48..57).include?((attributes['scaledwidth'][-1] || 0).ord)
544
+ # attributes['scaledwidth'] = %(#{attributes['scaledwidth']}%)
545
+ # end
546
+ # end
547
+ # document.register(:images, target)
548
+ # attributes['alt'] ||= ::File.basename(target, ::File.extname(target)).tr('_-', ' ')
549
+ # # QUESTION should video or audio have an auto-numbered caption?
550
+ # block.assign_caption attributes.delete('caption'), 'figure'
551
+ #end
461
552
  break
462
553
 
463
554
  # NOTE we're letting the toc macro have attributes
464
- elsif first_char == 't' && (match = this_line.match(REGEXP[:toc]))
555
+ elsif first_char == 't' && (match = TocBlockMacroRx.match(this_line))
465
556
  block = Block.new(parent, :toc, :content_model => :empty)
466
557
  block.parse_attributes(match[1], [], :sub_result => false, :into => attributes)
467
558
  break
468
559
 
469
- elsif macro_extensions && (match = this_line.match(REGEXP[:generic_blk_macro])) &&
470
- extensions.processor_registered_for_block_macro?(match[1])
471
- name = match[1]
560
+ elsif block_macro_extensions && (match = GenericBlockMacroRx.match(this_line)) &&
561
+ (extension = extensions.registered_for_block_macro?(match[1]))
472
562
  target = match[2]
473
563
  raw_attributes = match[3]
474
- processor = extensions.load_block_macro_processor name, document
475
- unless raw_attributes.empty?
476
- document.parse_attributes(raw_attributes, processor.options.fetch(:pos_attrs, []),
477
- :sub_input => true, :sub_result => false, :into => attributes)
564
+ if extension.config[:content_model] == :attributes
565
+ unless raw_attributes.empty?
566
+ document.parse_attributes(raw_attributes, (extension.config[:pos_attrs] || []),
567
+ :sub_input => true, :sub_result => false, :into => attributes)
568
+ end
569
+ else
570
+ attributes['text'] = raw_attributes
478
571
  end
479
- if !(default_attrs = processor.options.fetch(:default_attrs, {})).empty?
572
+ if (default_attrs = extension.config[:default_attrs])
480
573
  default_attrs.each {|k, v| attributes[k] ||= v }
481
574
  end
482
- block = processor.process parent, target, attributes
483
- return nil if block.nil?
575
+ if (block = extension.process_method[parent, target, attributes.dup])
576
+ attributes.replace block.attributes
577
+ else
578
+ attributes.clear
579
+ return
580
+ end
484
581
  break
485
582
  end
486
583
  end
487
584
 
488
585
  # haven't found anything yet, continue
489
- if (match = this_line.match(REGEXP[:colist]))
586
+ if (match = CalloutListRx.match(this_line))
490
587
  block = List.new(parent, :colist)
491
588
  attributes['style'] = 'arabic'
492
589
  reader.unshift_line this_line
@@ -495,48 +592,48 @@ class Lexer
495
592
  # might want to move this check to a validate method
496
593
  if match[1].to_i != expected_index
497
594
  # FIXME this lineno - 2 hack means we need a proper look-behind cursor
498
- warn "asciidoctor: WARNING: #{reader.path}: line #{reader.lineno - 2}: callout list item index: expected #{expected_index} got #{match[1]}"
595
+ warn %(asciidoctor: WARNING: #{reader.path}: line #{reader.lineno - 2}: callout list item index: expected #{expected_index} got #{match[1]})
499
596
  end
500
597
  list_item = next_list_item(reader, block, match)
501
598
  expected_index += 1
502
- if !list_item.nil?
599
+ if list_item
503
600
  block << list_item
504
601
  coids = document.callouts.callout_ids(block.items.size)
505
602
  if !coids.empty?
506
603
  list_item.attributes['coids'] = coids
507
604
  else
508
605
  # FIXME this lineno - 2 hack means we need a proper look-behind cursor
509
- warn "asciidoctor: WARNING: #{reader.path}: line #{reader.lineno - 2}: no callouts refer to list item #{block.items.size}"
606
+ warn %(asciidoctor: WARNING: #{reader.path}: line #{reader.lineno - 2}: no callouts refer to list item #{block.items.size})
510
607
  end
511
608
  end
512
- end while reader.has_more_lines? && match = reader.peek_line.match(REGEXP[:colist])
609
+ end while reader.has_more_lines? && (match = CalloutListRx.match(reader.peek_line))
513
610
 
514
611
  document.callouts.next_list
515
612
  break
516
613
 
517
- elsif (match = this_line.match(REGEXP[:ulist]))
614
+ elsif UnorderedListRx =~ this_line
518
615
  reader.unshift_line this_line
519
616
  block = next_outline_list(reader, :ulist, parent)
520
617
  break
521
618
 
522
- elsif (match = this_line.match(REGEXP[:olist]))
619
+ elsif (match = OrderedListRx.match(this_line))
523
620
  reader.unshift_line this_line
524
621
  block = next_outline_list(reader, :olist, parent)
525
622
  # QUESTION move this logic to next_outline_list?
526
623
  if !attributes['style'] && !block.attributes['style']
527
- marker = block.items.first.marker
624
+ marker = block.items[0].marker
528
625
  if marker.start_with? '.'
529
626
  # first one makes more sense, but second one is AsciiDoc-compliant
530
- #attributes['style'] = (ORDERED_LIST_STYLES[block.level - 1] || ORDERED_LIST_STYLES.first).to_s
531
- attributes['style'] = (ORDERED_LIST_STYLES[marker.length - 1] || ORDERED_LIST_STYLES.first).to_s
627
+ #attributes['style'] = (ORDERED_LIST_STYLES[block.level - 1] || ORDERED_LIST_STYLES[0]).to_s
628
+ attributes['style'] = (ORDERED_LIST_STYLES[marker.length - 1] || ORDERED_LIST_STYLES[0]).to_s
532
629
  else
533
- style = ORDERED_LIST_STYLES.detect{|s| marker.match(ORDERED_LIST_MARKER_PATTERNS[s]) }
534
- attributes['style'] = (style || ORDERED_LIST_STYLES.first).to_s
630
+ style = ORDERED_LIST_STYLES.detect{|s| OrderedListMarkerRxMap[s] =~ marker }
631
+ attributes['style'] = (style || ORDERED_LIST_STYLES[0]).to_s
535
632
  end
536
633
  end
537
634
  break
538
635
 
539
- elsif (match = this_line.match(REGEXP[:dlist]))
636
+ elsif (match = DefinitionListRx.match(this_line))
540
637
  reader.unshift_line this_line
541
638
  block = next_labeled_list(reader, match, parent)
542
639
  break
@@ -544,10 +641,11 @@ class Lexer
544
641
  elsif (style == 'float' || style == 'discrete') &&
545
642
  is_section_title?(this_line, (Compliance.underline_style_section_titles ? reader.peek_line(true) : nil))
546
643
  reader.unshift_line this_line
547
- float_id, float_title, float_level, _ = parse_section_title(reader, document)
644
+ float_id, float_reftext, float_title, float_level, _ = parse_section_title(reader, document)
645
+ attributes['reftext'] = float_reftext if float_reftext
548
646
  float_id ||= attributes['id'] if attributes.has_key?('id')
549
647
  block = Block.new(parent, :floating_title, :content_model => :empty)
550
- if float_id.nil? || float_id.empty?
648
+ if float_id.nil_or_empty?
551
649
  # FIXME remove hack of creating throwaway Section to get at the generate_id method
552
650
  tmp_sect = Section.new(parent)
553
651
  tmp_sect.title = float_title
@@ -555,14 +653,13 @@ class Lexer
555
653
  else
556
654
  block.id = float_id
557
655
  end
558
- document.register(:ids, [block.id, float_title]) if block.id
559
656
  block.level = float_level
560
657
  block.title = float_title
561
658
  break
562
659
 
563
660
  # FIXME create another set for "passthrough" styles
564
661
  # FIXME make this more DRY!
565
- elsif !style.nil? && style != 'normal'
662
+ elsif style && style != 'normal'
566
663
  if PARAGRAPH_STYLES.include?(style)
567
664
  block_context = style.to_sym
568
665
  cloaked_context = :paragraph
@@ -575,14 +672,14 @@ class Lexer
575
672
  reader.unshift_line this_line
576
673
  # advance to block parsing =>
577
674
  break
578
- elsif block_extensions && extensions.processor_registered_for_block?(style, :paragraph)
675
+ elsif block_extensions && extensions.registered_for_block?(style, :paragraph)
579
676
  block_context = style.to_sym
580
677
  cloaked_context = :paragraph
581
678
  reader.unshift_line this_line
582
679
  # advance to block parsing =>
583
680
  break
584
681
  else
585
- warn "asciidoctor: WARNING: #{reader.prev_line_info}: invalid style for paragraph: #{style}"
682
+ warn %(asciidoctor: WARNING: #{reader.prev_line_info}: invalid style for paragraph: #{style})
586
683
  style = nil
587
684
  # continue to process paragraph
588
685
  end
@@ -591,7 +688,7 @@ class Lexer
591
688
  break_at_list = (skipped == 0 && in_list)
592
689
 
593
690
  # a literal paragraph is contiguous lines starting at least one space
594
- if style != 'normal' && this_line.match(REGEXP[:lit_par])
691
+ if style != 'normal' && LiteralParagraphRx =~ this_line
595
692
  # So we need to actually include this one in the read_lines group
596
693
  reader.unshift_line this_line
597
694
  lines = reader.read_lines_until(
@@ -602,8 +699,8 @@ class Lexer
602
699
  # and therefore we should not break at a list item
603
700
  # (this won't stop breaking on item of same level since we've already parsed them out)
604
701
  # QUESTION can we turn this block into a lambda or function call?
605
- (break_at_list && line.match(REGEXP[:any_list])) ||
606
- (COMPLIANCE[:block_terminates_paragraph] && (is_delimited_block?(line) || line.match(REGEXP[:attr_line])))
702
+ (break_at_list && AnyListRx =~ line) ||
703
+ (Compliance.block_terminates_paragraph && (is_delimited_block?(line) || BlockAttributeLineRx =~ line))
607
704
  }
608
705
 
609
706
  reset_block_indent! lines
@@ -625,8 +722,8 @@ class Lexer
625
722
  # and therefore we should not break at a list item
626
723
  # (this won't stop breaking on item of same level since we've already parsed them out)
627
724
  # QUESTION can we turn this block into a lambda or function call?
628
- (break_at_list && line.match(REGEXP[:any_list])) ||
629
- (COMPLIANCE[:block_terminates_paragraph] && (is_delimited_block?(line) || line.match(REGEXP[:attr_line])))
725
+ (break_at_list && AnyListRx =~ line) ||
726
+ (Compliance.block_terminates_paragraph && (is_delimited_block?(line) || BlockAttributeLineRx =~ line))
630
727
  }
631
728
 
632
729
  # NOTE we need this logic because we've asked the reader to skip
@@ -635,59 +732,57 @@ class Lexer
635
732
  if lines.empty?
636
733
  # call advance since the reader preserved the last line
637
734
  reader.advance
638
- return nil
735
+ return
639
736
  end
640
737
 
641
- catalog_inline_anchors(lines.join, document)
738
+ catalog_inline_anchors(lines.join(EOL), document)
642
739
 
643
- first_line = lines.first
644
- if !text_only && (admonition_match = first_line.match(REGEXP[:admonition_inline]))
740
+ first_line = lines[0]
741
+ if !text_only && (admonition_match = AdmonitionParagraphRx.match(first_line))
645
742
  lines[0] = admonition_match.post_match.lstrip
646
743
  attributes['style'] = admonition_match[1]
647
744
  attributes['name'] = admonition_name = admonition_match[1].downcase
648
- attributes['caption'] ||= document.attributes["#{admonition_name}-caption"]
649
- block = Block.new(parent, :admonition, :source => lines, :attributes => attributes)
745
+ attributes['caption'] ||= document.attributes[%(#{admonition_name}-caption)]
746
+ block = Block.new(parent, :admonition, :content_model => :simple, :source => lines, :attributes => attributes)
650
747
  elsif !text_only && Compliance.markdown_syntax && first_line.start_with?('> ')
651
748
  lines.map! {|line|
652
- if line.start_with?('> ')
653
- line[2..-1]
654
- elsif line.chomp == '>'
749
+ if line == '>'
655
750
  line[1..-1]
751
+ elsif line.start_with? '> '
752
+ line[2..-1]
656
753
  else
657
754
  line
658
755
  end
659
756
  }
660
757
 
661
- if lines.last.start_with?('-- ')
758
+ if lines[-1].start_with? '-- '
662
759
  attribution, citetitle = lines.pop[3..-1].split(', ', 2)
663
- lines.pop while lines.last.chomp.empty?
664
- lines[-1] = lines.last.chomp
760
+ lines.pop while lines[-1].empty?
665
761
  else
666
762
  attribution, citetitle = nil
667
763
  end
668
764
  attributes['style'] = 'quote'
669
- attributes['attribution'] = attribution unless attribution.nil?
670
- attributes['citetitle'] = citetitle unless citetitle.nil?
765
+ attributes['attribution'] = attribution if attribution
766
+ attributes['citetitle'] = citetitle if citetitle
671
767
  # NOTE will only detect headings that are floating titles (not section titles)
672
768
  # TODO could assume a floating title when inside a block context
673
769
  # FIXME Reader needs to be created w/ line info
674
770
  block = build_block(:quote, :compound, false, parent, Reader.new(lines), attributes)
675
771
  elsif !text_only && lines.size > 1 && first_line.start_with?('"') &&
676
- lines.last.start_with?('-- ') && lines[-2].chomp.end_with?('"')
772
+ lines[-1].start_with?('-- ') && lines[-2].end_with?('"')
677
773
  lines[0] = first_line[1..-1]
678
774
  attribution, citetitle = lines.pop[3..-1].split(', ', 2)
679
- lines.pop while lines.last.chomp.empty?
680
- lines[-1] = lines.last.chomp.chop
775
+ lines.pop while lines[-1].empty?
776
+ # strip trailing quote
777
+ lines[-1] = lines[-1].chop
681
778
  attributes['style'] = 'quote'
682
- attributes['attribution'] = attribution unless attribution.nil?
683
- attributes['citetitle'] = citetitle unless citetitle.nil?
684
- block = Block.new(parent, :quote, :source => lines, :attributes => attributes)
685
- #block = Block.new(parent, :quote, :content_model => :compound, :attributes => attributes)
686
- #block << Block.new(block, :paragraph, :source => lines)
779
+ attributes['attribution'] = attribution if attribution
780
+ attributes['citetitle'] = citetitle if citetitle
781
+ block = Block.new(parent, :quote, :content_model => :simple, :source => lines, :attributes => attributes)
687
782
  else
688
783
  # if [normal] is used over an indented paragraph, unindent it
689
- if style == 'normal' && ((first_char = lines.first[0..0]) == ' ' || first_char == "\t")
690
- first_line = lines.first
784
+ if style == 'normal' && ((first_char = lines[0].chr) == ' ' || first_char == TAB)
785
+ first_line = lines[0]
691
786
  first_line_shifted = first_line.lstrip
692
787
  indent = line_length(first_line) - line_length(first_line_shifted)
693
788
  lines[0] = first_line_shifted
@@ -697,7 +792,7 @@ class Lexer
697
792
  end
698
793
  end
699
794
 
700
- block = Block.new(parent, :paragraph, :source => lines, :attributes => attributes)
795
+ block = Block.new(parent, :paragraph, :content_model => :simple, :source => lines, :attributes => attributes)
701
796
  end
702
797
  end
703
798
 
@@ -707,7 +802,7 @@ class Lexer
707
802
  end
708
803
 
709
804
  # either delimited block or styled paragraph
710
- if block.nil? && !block_context.nil?
805
+ if !block && block_context
711
806
  # abstract and partintro should be handled by open block
712
807
  # FIXME kind of hackish...need to sort out how to generalize this
713
808
  block_context = :open if block_context == :abstract || block_context == :partintro
@@ -715,29 +810,36 @@ class Lexer
715
810
  case block_context
716
811
  when :admonition
717
812
  attributes['name'] = admonition_name = style.downcase
718
- attributes['caption'] ||= document.attributes["#{admonition_name}-caption"]
813
+ attributes['caption'] ||= document.attributes[%(#{admonition_name}-caption)]
719
814
  block = build_block(block_context, :compound, terminator, parent, reader, attributes)
720
815
 
721
816
  when :comment
722
817
  build_block(block_context, :skip, terminator, parent, reader, attributes)
723
- return nil
818
+ return
724
819
 
725
820
  when :example
726
- block = build_block(block_context, :compound, terminator, parent, reader, attributes, {:supports_caption => true})
821
+ block = build_block(block_context, :compound, terminator, parent, reader, attributes)
727
822
 
728
823
  when :listing, :fenced_code, :source
729
824
  if block_context == :fenced_code
730
825
  style = attributes['style'] = 'source'
731
- language, linenums = this_line[3...-1].split(',', 2)
826
+ language, linenums = this_line[3..-1].split(',', 2)
732
827
  if language && !(language = language.strip).empty?
733
828
  attributes['language'] = language
734
829
  attributes['linenums'] = '' if linenums && !linenums.strip.empty?
830
+ elsif (default_language = document.attributes['source-language'])
831
+ attributes['language'] = default_language
735
832
  end
736
833
  terminator = terminator[0..2]
737
834
  elsif block_context == :source
738
835
  AttributeList.rekey(attributes, [nil, 'language', 'linenums'])
836
+ unless attributes.has_key? 'language'
837
+ if (default_language = document.attributes['source-language'])
838
+ attributes['language'] = default_language
839
+ end
840
+ end
739
841
  end
740
- block = build_block(:listing, :verbatim, terminator, parent, reader, attributes, {:supports_caption => true})
842
+ block = build_block(:listing, :verbatim, terminator, parent, reader, attributes)
741
843
 
742
844
  when :literal
743
845
  block = build_block(block_context, :verbatim, terminator, parent, reader, attributes)
@@ -745,13 +847,25 @@ class Lexer
745
847
  when :pass
746
848
  block = build_block(block_context, :raw, terminator, parent, reader, attributes)
747
849
 
850
+ when :stem, :latexmath, :asciimath
851
+ if block_context == :stem
852
+ attributes['style'] = if (explicit_stem_syntax = attributes[2])
853
+ explicit_stem_syntax.include?('tex') ? 'latexmath' : 'asciimath'
854
+ elsif (default_stem_syntax = document.attributes['stem']).nil_or_empty?
855
+ 'asciimath'
856
+ else
857
+ default_stem_syntax
858
+ end
859
+ end
860
+ block = build_block(:stem, :raw, terminator, parent, reader, attributes)
861
+
748
862
  when :open, :sidebar
749
863
  block = build_block(block_context, :compound, terminator, parent, reader, attributes)
750
864
 
751
865
  when :table
752
866
  cursor = reader.cursor
753
867
  block_reader = Reader.new reader.read_lines_until(:terminator => terminator, :skip_line_comments => true), cursor
754
- case terminator[0..0]
868
+ case terminator.chr
755
869
  when ','
756
870
  attributes['format'] = 'csv'
757
871
  when ':'
@@ -764,22 +878,24 @@ class Lexer
764
878
  block = build_block(block_context, (block_context == :verse ? :verbatim : :compound), terminator, parent, reader, attributes)
765
879
 
766
880
  else
767
- if block_extensions && extensions.processor_registered_for_block?(block_context, cloaked_context)
768
- processor = extensions.load_block_processor block_context, document
769
-
770
- if (content_model = processor.options[:content_model]) != :skip
771
- if !(pos_attrs = processor.options.fetch(:pos_attrs, [])).empty?
881
+ if block_extensions && (extension = extensions.registered_for_block?(block_context, cloaked_context))
882
+ # TODO pass cloaked_context to extension somehow (perhaps a new instance for each cloaked_context?)
883
+ if (content_model = extension.config[:content_model]) != :skip
884
+ if !(pos_attrs = extension.config[:pos_attrs] || []).empty?
772
885
  AttributeList.rekey(attributes, [nil].concat(pos_attrs))
773
886
  end
774
- if !(default_attrs = processor.options.fetch(:default_attrs, {})).empty?
887
+ if (default_attrs = extension.config[:default_attrs])
775
888
  default_attrs.each {|k, v| attributes[k] ||= v }
776
889
  end
777
890
  end
778
- block = build_block(block_context, content_model, terminator, parent, reader, attributes, :processor => processor)
779
- return nil if block.nil?
891
+ block = build_block block_context, content_model, terminator, parent, reader, attributes, :extension => extension
892
+ unless block && content_model != :skip
893
+ attributes.clear
894
+ return
895
+ end
780
896
  else
781
897
  # this should only happen if there's a misconfiguration
782
- raise "Unsupported block type #{block_context} at #{reader.line_info}"
898
+ raise %(Unsupported block type #{block_context} at #{reader.line_info})
783
899
  end
784
900
  end
785
901
  end
@@ -789,20 +905,38 @@ class Lexer
789
905
  # blocks or trailing attribute lists could leave us without a block,
790
906
  # so handle accordingly
791
907
  # REVIEW we may no longer need this nil check
792
- if !block.nil?
908
+ # FIXME we've got to clean this up, it's horrible!
909
+ if block
910
+ block.source_location = source_location if source_location
793
911
  # REVIEW seems like there is a better way to organize this wrap-up
794
- block.id ||= attributes['id'] if attributes.has_key?('id')
795
912
  block.title = attributes['title'] unless block.title?
796
- block.caption ||= attributes.delete('caption')
913
+ # FIXME HACK don't hardcode logic for alt, caption and scaledwidth on images down here
914
+ if block.context == :image
915
+ resolved_target = attributes['target']
916
+ block.document.register(:images, resolved_target)
917
+ attributes['alt'] ||= ::File.basename(resolved_target, ::File.extname(resolved_target)).tr('_-', ' ')
918
+ attributes['alt'] = block.sub_specialcharacters attributes['alt']
919
+ block.assign_caption attributes.delete('caption'), 'figure'
920
+ if (scaledwidth = attributes['scaledwidth'])
921
+ # append % to scaledwidth if ends in number (no units present)
922
+ if (48..57).include?((scaledwidth[-1] || 0).ord)
923
+ attributes['scaledwidth'] = %(#{scaledwidth}%)
924
+ end
925
+ end
926
+ else
927
+ block.caption ||= attributes.delete('caption')
928
+ end
797
929
  # TODO eventualy remove the style attribute from the attributes hash
798
930
  #block.style = attributes.delete('style')
799
931
  block.style = attributes['style']
800
932
  # AsciiDoc always use [id] as the reftext in HTML output,
801
933
  # but I'd like to do better in Asciidoctor
802
- if block.id && block.title? && !attributes.has_key?('reftext')
803
- document.register(:ids, [block.id, block.title])
934
+ if (block_id = (block.id ||= attributes['id']))
935
+ # TODO sub reftext
936
+ document.register(:ids, [block_id, (attributes['reftext'] || (block.title? ? block.title : nil))])
804
937
  end
805
- block.update_attributes(attributes)
938
+ # FIXME remove the need for this update!
939
+ block.attributes.update(attributes) unless attributes.empty?
806
940
  block.lock_in_subs
807
941
 
808
942
  #if document.attributes.has_key? :pending_attribute_entries
@@ -812,7 +946,7 @@ class Lexer
812
946
  #end
813
947
 
814
948
  if block.sub? :callouts
815
- if !(catalog_callouts block.source, document)
949
+ unless (catalog_callouts block.source, document)
816
950
  # No need to look for callouts if they aren't there
817
951
  block.remove_sub :callouts
818
952
  end
@@ -827,17 +961,14 @@ class Lexer
827
961
  # returns the match data if this line is the first line of a delimited block or nil if not
828
962
  def self.is_delimited_block? line, return_match_data = false
829
963
  # highly optimized for best performance
830
- line_len = line.length - 1
831
- return nil unless line_len > 1 && DELIMITED_BLOCK_LEADERS.include?(line[0..1])
832
- line = line.chomp
833
- # counts endline character in line length
964
+ return unless (line_len = line.length) > 1 && (DELIMITED_BLOCK_LEADERS.include? line[0..1])
965
+ # catches open block
834
966
  if line_len == 2
835
967
  tip = line
836
968
  tl = 2
837
- elsif line_len < 3
838
- return nil
839
969
  else
840
- if line_len < 5
970
+ # catches all other delimited blocks, including fenced code
971
+ if line_len <= 4
841
972
  tip = line
842
973
  tl = line_len
843
974
  else
@@ -846,27 +977,27 @@ class Lexer
846
977
  end
847
978
 
848
979
  # special case for fenced code blocks
980
+ # REVIEW review this logic
981
+ fenced_code = false
849
982
  if Compliance.markdown_syntax
850
- tip_alt = tip.chop if tl == 4
851
- if tip_alt == '```'
852
- if tip.end_with? '`'
853
- return nil
854
- end
855
- tip = tip_alt
856
- tl = 3
857
- elsif tip_alt == '~~~'
858
- if tip.end_with? '~'
859
- return nil
983
+ tip_3 = (tl == 4 ? tip.chop : tip)
984
+ if tip_3 == '```'
985
+ if tl == 4 && tip.end_with?('`')
986
+ return
860
987
  end
861
- tip = tip_alt
988
+ tip = tip_3
862
989
  tl = 3
990
+ fenced_code = true
863
991
  end
864
992
  end
993
+
994
+ # short circuit if not a fenced code block
995
+ return if tl == 3 && !fenced_code
865
996
  end
866
997
 
867
998
  if DELIMITED_BLOCKS.has_key? tip
868
999
  # tip is the full line when delimiter is minimum length
869
- if tl == 3 || tl == line_len
1000
+ if tl < 4 || tl == line_len
870
1001
  if return_match_data
871
1002
  context, masq = *DELIMITED_BLOCKS[tip]
872
1003
  BlockMatchData.new(context, masq, tip, tip)
@@ -880,7 +1011,8 @@ class Lexer
880
1011
  else
881
1012
  true
882
1013
  end
883
- #elsif match = line.match(REGEXP[:any_blk])
1014
+ # only enable if/when we decide to support non-congruent block delimiters
1015
+ #elsif (match = BlockDelimiterRx.match(line))
884
1016
  # if return_match_data
885
1017
  # context, masq = *DELIMITED_BLOCKS[tip]
886
1018
  # BlockMatchData.new(context, masq, tip, match[0])
@@ -918,13 +1050,13 @@ class Lexer
918
1050
  :preserve_last_line => true,
919
1051
  :skip_line_comments => true,
920
1052
  :skip_processing => skip_processing) {|line|
921
- COMPLIANCE[:block_terminates_paragraph] && (is_delimited_block?(line) || line.match(REGEXP[:attr_line]))
1053
+ Compliance.block_terminates_paragraph && (is_delimited_block?(line) || BlockAttributeLineRx =~ line)
922
1054
  }
923
1055
  # QUESTION check for empty lines after grabbing lines for simple content model?
924
1056
  end
925
1057
  block_reader = nil
926
1058
  elsif parse_as_content_model != :compound
927
- lines = reader.read_lines_until(:terminator => terminator, :chomp_last_line => true, :skip_processing => skip_processing)
1059
+ lines = reader.read_lines_until(:terminator => terminator, :skip_processing => skip_processing)
928
1060
  block_reader = nil
929
1061
  # terminator is false when reader has already been prepared
930
1062
  elsif terminator == false
@@ -938,24 +1070,37 @@ class Lexer
938
1070
 
939
1071
  if content_model == :skip
940
1072
  attributes.clear
1073
+ # FIXME we shouldn't be mixing return types
941
1074
  return lines
942
1075
  end
943
1076
 
944
- if content_model == :verbatim && attributes.has_key?('indent')
945
- reset_block_indent! lines, attributes['indent'].to_i
1077
+ if content_model == :verbatim && (indent = attributes['indent'])
1078
+ reset_block_indent! lines, indent.to_i
946
1079
  end
947
1080
 
948
- if (processor = options[:processor])
1081
+ if (extension = options[:extension])
1082
+ # QUESTION do we want to delete the style?
949
1083
  attributes.delete('style')
950
- processor.options[:content_model] = content_model
951
- block = processor.process(parent, block_reader || Reader.new(lines), attributes)
1084
+ if (block = extension.process_method[parent, block_reader || (Reader.new lines), attributes.dup])
1085
+ attributes.replace block.attributes
1086
+ # FIXME if the content model is set to compound, but we only have simple in this context, then
1087
+ # forcefully set the content_model to simple to prevent parsing blocks from children
1088
+ # TODO document this behavior!!
1089
+ if block.content_model == :compound && !(lines = block.lines).nil_or_empty?
1090
+ content_model = :compound
1091
+ block_reader = Reader.new lines
1092
+ end
1093
+ else
1094
+ # FIXME need a test to verify this returns nil at the right time
1095
+ return
1096
+ end
952
1097
  else
953
- block = Block.new(parent, block_context, :content_model => content_model, :attributes => attributes, :source => lines)
1098
+ block = Block.new(parent, block_context, :content_model => content_model, :source => lines, :attributes => attributes)
954
1099
  end
955
1100
 
956
- # should supports_caption be necessary?
957
- if options.fetch(:supports_caption, false)
958
- block.title = attributes.delete('title') if attributes.has_key?('title')
1101
+ # QUESTION should we have an explicit map or can we rely on check for *-caption attribute?
1102
+ if (attributes.has_key? 'title') && (block.document.attr? %(#{block.context}-caption))
1103
+ block.title = attributes.delete 'title'
959
1104
  block.assign_caption attributes.delete('caption')
960
1105
  end
961
1106
 
@@ -970,7 +1115,7 @@ class Lexer
970
1115
 
971
1116
  # Public: Parse blocks from this reader until there are no more lines.
972
1117
  #
973
- # This method calls Lexer#next_block until there are no more lines in the
1118
+ # This method calls Parser#next_block until there are no more lines in the
974
1119
  # Reader. It does not consider sections because it's assumed the Reader only
975
1120
  # has lines which are within a delimited block region.
976
1121
  #
@@ -980,8 +1125,8 @@ class Lexer
980
1125
  # Returns nothing.
981
1126
  def self.parse_blocks(reader, parent)
982
1127
  while reader.has_more_lines?
983
- block = Lexer.next_block(reader, parent)
984
- parent << block unless block.nil?
1128
+ block = Parser.next_block(reader, parent)
1129
+ parent << block if block
985
1130
  end
986
1131
  end
987
1132
 
@@ -1001,18 +1146,18 @@ class Lexer
1001
1146
  end
1002
1147
  #Debug.debug { "Created #{list_type} block: #{list_block}" }
1003
1148
 
1004
- while reader.has_more_lines? && (match = reader.peek_line.match(REGEXP[list_type]))
1149
+ while reader.has_more_lines? && (match = ListRxMap[list_type].match(reader.peek_line))
1005
1150
  marker = resolve_list_marker(list_type, match[1])
1006
1151
 
1007
1152
  # if we are moving to the next item, and the marker is different
1008
1153
  # determine if we are moving up or down in nesting
1009
- if list_block.items? && marker != list_block.items.first.marker
1154
+ if list_block.items? && marker != list_block.items[0].marker
1010
1155
  # assume list is nested by default, but then check to see if we are
1011
1156
  # popping out of a nested list by matching an ancestor's list marker
1012
1157
  this_item_level = list_block.level + 1
1013
1158
  ancestor = parent
1014
1159
  while ancestor.context == list_type
1015
- if marker == ancestor.items.first.marker
1160
+ if marker == ancestor.items[0].marker
1016
1161
  this_item_level = ancestor.level
1017
1162
  break
1018
1163
  end
@@ -1030,10 +1175,10 @@ class Lexer
1030
1175
  elsif this_item_level > list_block.level
1031
1176
  # If this next list level is down one from the
1032
1177
  # current Block's, append it to content of the current list item
1033
- list_block.items.last << next_block(reader, list_block)
1178
+ list_block.items[-1] << next_block(reader, list_block)
1034
1179
  end
1035
1180
 
1036
- list_block << list_item unless list_item.nil?
1181
+ list_block << list_item if list_item
1037
1182
  list_item = nil
1038
1183
 
1039
1184
  reader.skip_blank_lines
@@ -1051,10 +1196,10 @@ class Lexer
1051
1196
  def self.catalog_callouts(text, document)
1052
1197
  found = false
1053
1198
  if text.include? '<'
1054
- text.scan(REGEXP[:callout_quick_scan]) {
1199
+ text.scan(CalloutQuickScanRx) {
1055
1200
  # alias match for Ruby 1.8.7 compat
1056
1201
  m = $~
1057
- if m[0][0..0] != '\\'
1202
+ if m[0].chr != '\\'
1058
1203
  document.callouts.register(m[2])
1059
1204
  end
1060
1205
  # we have to mark as found even if it's escaped so it can be unescaped
@@ -1071,17 +1216,21 @@ class Lexer
1071
1216
  #
1072
1217
  # Returns nothing
1073
1218
  def self.catalog_inline_anchors(text, document)
1074
- text.scan(REGEXP[:anchor_macro]) {
1075
- # alias match for Ruby 1.8.7 compat
1076
- m = $~
1077
- next if m[0].start_with? '\\'
1078
- id, reftext = m[1].split(',')
1079
- id.sub!(REGEXP[:dbl_quoted], '\2')
1080
- if !reftext.nil?
1081
- reftext.sub!(REGEXP[:m_dbl_quoted], '\2')
1082
- end
1083
- document.register(:ids, [id, reftext])
1084
- }
1219
+ if text.include? '['
1220
+ text.scan(InlineAnchorRx) {
1221
+ # alias match for Ruby 1.8.7 compat
1222
+ m = $~
1223
+ next if m[0].start_with? '\\'
1224
+ id = m[1] || m[3]
1225
+ reftext = m[2] || m[4]
1226
+ # enable if we want to allow double quoted values
1227
+ #id = id.sub(DoubleQuotedRx, '\2')
1228
+ #if reftext
1229
+ # reftext = reftext.sub(DoubleQuotedMultiRx, '\2')
1230
+ #end
1231
+ document.register(:ids, [id, reftext])
1232
+ }
1233
+ end
1085
1234
  nil
1086
1235
  end
1087
1236
 
@@ -1097,11 +1246,11 @@ class Lexer
1097
1246
  previous_pair = nil
1098
1247
  # allows us to capture until we find a labeled item
1099
1248
  # that uses the same delimiter (::, :::, :::: or ;;)
1100
- sibling_pattern = REGEXP[:dlist_siblings][match[2]]
1249
+ sibling_pattern = DefinitionListSiblingRx[match[2]]
1101
1250
 
1102
1251
  begin
1103
1252
  term, item = next_list_item(reader, list_block, match, sibling_pattern)
1104
- if !previous_pair.nil? && previous_pair.last.nil?
1253
+ if previous_pair && !previous_pair[-1]
1105
1254
  previous_pair.pop
1106
1255
  previous_pair[0] << term
1107
1256
  previous_pair << item
@@ -1109,7 +1258,7 @@ class Lexer
1109
1258
  # FIXME this misses the automatic parent assignment
1110
1259
  list_block.items << (previous_pair = [[term], item])
1111
1260
  end
1112
- end while reader.has_more_lines? && match = reader.peek_line.match(sibling_pattern)
1261
+ end while reader.has_more_lines? && (match = sibling_pattern.match(reader.peek_line))
1113
1262
 
1114
1263
  list_block
1115
1264
  end
@@ -1132,22 +1281,20 @@ class Lexer
1132
1281
  # Returns the next ListItem or ListItem pair (depending on the list type)
1133
1282
  # for the parent list Block.
1134
1283
  def self.next_list_item(reader, list_block, match, sibling_trait = nil)
1135
- list_type = list_block.context
1136
-
1137
- if list_type == :dlist
1284
+ if (list_type = list_block.context) == :dlist
1138
1285
  list_term = ListItem.new(list_block, match[1])
1139
1286
  list_item = ListItem.new(list_block, match[3])
1140
- has_text = !match[3].to_s.empty?
1287
+ has_text = !match[3].nil_or_empty?
1141
1288
  else
1142
1289
  # Create list item using first line as the text of the list item
1143
1290
  text = match[2]
1144
1291
  checkbox = false
1145
1292
  if list_type == :ulist && text.start_with?('[')
1146
- if text.start_with? '[ ] '
1293
+ if text.start_with?('[ ] ')
1147
1294
  checkbox = true
1148
1295
  checked = false
1149
1296
  text = text[3..-1].lstrip
1150
- elsif text.start_with?('[*] ') || text.start_with?('[x] ')
1297
+ elsif text.start_with?('[x] ') || text.start_with?('[*] ')
1151
1298
  checkbox = true
1152
1299
  checked = true
1153
1300
  text = text[3..-1].lstrip
@@ -1162,9 +1309,7 @@ class Lexer
1162
1309
  list_item.attributes['checked'] = '' if checked
1163
1310
  end
1164
1311
 
1165
- if !sibling_trait
1166
- sibling_trait = resolve_list_marker(list_type, match[1], list_block.items.size, true, reader)
1167
- end
1312
+ sibling_trait ||= resolve_list_marker(list_type, match[1], list_block.items.size, true, reader)
1168
1313
  list_item.marker = sibling_trait
1169
1314
  has_text = true
1170
1315
  end
@@ -1179,13 +1324,13 @@ class Lexer
1179
1324
  list_item_reader.unshift_lines comment_lines unless comment_lines.empty?
1180
1325
 
1181
1326
  if !subsequent_line.nil?
1182
- continuation_connects_first_block = (subsequent_line == ::Asciidoctor::EOL)
1327
+ continuation_connects_first_block = subsequent_line.empty?
1183
1328
  # if there's no continuation connecting the first block, then
1184
1329
  # treat the lines as paragraph text (activated when has_text = false)
1185
1330
  if !continuation_connects_first_block && list_type != :dlist
1186
1331
  has_text = false
1187
1332
  end
1188
- content_adjacent = !subsequent_line.chomp.empty?
1333
+ content_adjacent = !continuation_connects_first_block && !subsequent_line.empty?
1189
1334
  else
1190
1335
  continuation_connects_first_block = false
1191
1336
  content_adjacent = false
@@ -1199,7 +1344,7 @@ class Lexer
1199
1344
  # list
1200
1345
  while list_item_reader.has_more_lines?
1201
1346
  new_block = next_block(list_item_reader, list_block, {}, options)
1202
- list_item << new_block unless new_block.nil?
1347
+ list_item << new_block if new_block
1203
1348
  end
1204
1349
 
1205
1350
  list_item.fold_first(continuation_connects_first_block, content_adjacent)
@@ -1255,17 +1400,17 @@ class Lexer
1255
1400
  # the termination of the list
1256
1401
  break if is_sibling_list_item?(this_line, list_type, sibling_trait)
1257
1402
 
1258
- prev_line = buffer.empty? ? nil : buffer.last.chomp
1403
+ prev_line = buffer.empty? ? nil : buffer[-1]
1259
1404
 
1260
1405
  if prev_line == LIST_CONTINUATION
1261
1406
  if continuation == :inactive
1262
1407
  continuation = :active
1263
1408
  has_text = true
1264
- buffer[-1] = ::Asciidoctor::EOL unless within_nested_list
1409
+ buffer[-1] = '' unless within_nested_list
1265
1410
  end
1266
1411
 
1267
1412
  # dealing with adjacent list continuations (which is really a syntax error)
1268
- if this_line.chomp == LIST_CONTINUATION
1413
+ if this_line == LIST_CONTINUATION
1269
1414
  if continuation != :frozen
1270
1415
  continuation = :frozen
1271
1416
  buffer << this_line
@@ -1277,7 +1422,7 @@ class Lexer
1277
1422
 
1278
1423
  # a delimited block immediately breaks the list unless preceded
1279
1424
  # by a list continuation (they are harsh like that ;0)
1280
- if match = is_delimited_block?(this_line, true)
1425
+ if (match = is_delimited_block?(this_line, true))
1281
1426
  if continuation == :active
1282
1427
  buffer << this_line
1283
1428
  # grab all the lines in the block, leaving the delimiters in place
@@ -1287,17 +1432,18 @@ class Lexer
1287
1432
  else
1288
1433
  break
1289
1434
  end
1290
- # technically attr_line only breaks if ensuing line is not a list item
1291
- # which really means attr_line only breaks if it's acting as a block delimiter
1292
- elsif list_type == :dlist && continuation != :active && this_line.match(REGEXP[:attr_line])
1435
+ # technically BlockAttributeLineRx only breaks if ensuing line is not a list item
1436
+ # which really means BlockAttributeLineRx only breaks if it's acting as a block delimiter
1437
+ # FIXME to be AsciiDoc compliant, we shouldn't break if style in attribute line is "literal" (i.e., [literal])
1438
+ elsif list_type == :dlist && continuation != :active && BlockAttributeLineRx =~ this_line
1293
1439
  break
1294
1440
  else
1295
- if continuation == :active && !this_line.chomp.empty?
1441
+ if continuation == :active && !this_line.empty?
1296
1442
  # literal paragraphs have special considerations (and this is one of
1297
1443
  # two entry points into one)
1298
1444
  # if we don't process it as a whole, then a line in it that looks like a
1299
1445
  # list item will throw off the exit from it
1300
- if this_line.match(REGEXP[:lit_par])
1446
+ if LiteralParagraphRx =~ this_line
1301
1447
  reader.unshift_line this_line
1302
1448
  buffer.concat reader.read_lines_until(
1303
1449
  :preserve_last_line => true,
@@ -1309,12 +1455,12 @@ class Lexer
1309
1455
  }
1310
1456
  continuation = :inactive
1311
1457
  # let block metadata play out until we find the block
1312
- elsif this_line.match(REGEXP[:blk_title]) || this_line.match(REGEXP[:attr_line]) || this_line.match(REGEXP[:attr_entry])
1458
+ elsif BlockTitleRx =~ this_line || BlockAttributeLineRx =~ this_line || AttributeEntryRx =~ this_line
1313
1459
  buffer << this_line
1314
1460
  else
1315
- if nested_list_type = (within_nested_list ? [:dlist] : NESTABLE_LIST_CONTEXTS).detect {|ctx| this_line.match(REGEXP[ctx]) }
1461
+ if nested_list_type = (within_nested_list ? [:dlist] : NESTABLE_LIST_CONTEXTS).detect {|ctx| ListRxMap[ctx] =~ this_line }
1316
1462
  within_nested_list = true
1317
- if nested_list_type == :dlist && $~[3].to_s.empty?
1463
+ if nested_list_type == :dlist && $~[3].nil_or_empty?
1318
1464
  # get greedy again
1319
1465
  has_text = false
1320
1466
  end
@@ -1322,16 +1468,16 @@ class Lexer
1322
1468
  buffer << this_line
1323
1469
  continuation = :inactive
1324
1470
  end
1325
- elsif !prev_line.nil? && prev_line.chomp.empty?
1471
+ elsif !prev_line.nil? && prev_line.empty?
1326
1472
  # advance to the next line of content
1327
- if this_line.chomp.empty?
1473
+ if this_line.empty?
1328
1474
  reader.skip_blank_lines
1329
1475
  this_line = reader.read_line
1330
1476
  # if we hit eof or a sibling, stop reading
1331
1477
  break if this_line.nil? || is_sibling_list_item?(this_line, list_type, sibling_trait)
1332
1478
  end
1333
1479
 
1334
- if this_line.chomp == LIST_CONTINUATION
1480
+ if this_line == LIST_CONTINUATION
1335
1481
  detached_continuation = buffer.size
1336
1482
  buffer << this_line
1337
1483
  else
@@ -1339,8 +1485,19 @@ class Lexer
1339
1485
  # for all other lists, has_text is always true
1340
1486
  # in this block, we have to see whether we stay in the list
1341
1487
  if has_text
1488
+ # TODO any way to combine this with the check after skipping blank lines?
1489
+ if is_sibling_list_item?(this_line, list_type, sibling_trait)
1490
+ break
1491
+ elsif nested_list_type = NESTABLE_LIST_CONTEXTS.detect {|ctx| ListRxMap[ctx] =~ this_line }
1492
+ buffer << this_line
1493
+ within_nested_list = true
1494
+ if nested_list_type == :dlist && $~[3].nil_or_empty?
1495
+ # get greedy again
1496
+ has_text = false
1497
+ end
1342
1498
  # slurp up any literal paragraph offset by blank lines
1343
- if this_line.match(REGEXP[:lit_par])
1499
+ # NOTE we have to check for indented list items first
1500
+ elsif LiteralParagraphRx =~ this_line
1344
1501
  reader.unshift_line this_line
1345
1502
  buffer.concat reader.read_lines_until(
1346
1503
  :preserve_last_line => true,
@@ -1350,16 +1507,6 @@ class Lexer
1350
1507
  # so we need to make sure we don't slurp up a legitimate sibling
1351
1508
  list_type == :dlist && is_sibling_list_item?(line, list_type, sibling_trait)
1352
1509
  }
1353
- # TODO any way to combine this with the check after skipping blank lines?
1354
- elsif is_sibling_list_item?(this_line, list_type, sibling_trait)
1355
- break
1356
- elsif nested_list_type = NESTABLE_LIST_CONTEXTS.detect {|ctx| this_line.match(REGEXP[ctx]) }
1357
- buffer << this_line
1358
- within_nested_list = true
1359
- if nested_list_type == :dlist && $~[3].to_s.empty?
1360
- # get greedy again
1361
- has_text = false
1362
- end
1363
1510
  else
1364
1511
  break
1365
1512
  end
@@ -1371,10 +1518,10 @@ class Lexer
1371
1518
  end
1372
1519
  end
1373
1520
  else
1374
- has_text = true if !this_line.chomp.empty?
1375
- if nested_list_type = (within_nested_list ? [:dlist] : NESTABLE_LIST_CONTEXTS).detect {|ctx| this_line.match(REGEXP[ctx]) }
1521
+ has_text = true if !this_line.empty?
1522
+ if nested_list_type = (within_nested_list ? [:dlist] : NESTABLE_LIST_CONTEXTS).detect {|ctx| ListRxMap[ctx] =~ this_line }
1376
1523
  within_nested_list = true
1377
- if nested_list_type == :dlist && $~[3].to_s.empty?
1524
+ if nested_list_type == :dlist && $~[3].nil_or_empty?
1378
1525
  # get greedy again
1379
1526
  has_text = false
1380
1527
  end
@@ -1385,23 +1532,21 @@ class Lexer
1385
1532
  this_line = nil
1386
1533
  end
1387
1534
 
1388
- reader.unshift_line this_line if !this_line.nil?
1535
+ reader.unshift_line this_line if this_line
1389
1536
 
1390
1537
  if detached_continuation
1391
1538
  buffer.delete_at detached_continuation
1392
1539
  end
1393
1540
 
1394
1541
  # strip trailing blank lines to prevent empty blocks
1395
- buffer.pop while !buffer.empty? && buffer.last.chomp.empty?
1542
+ buffer.pop while !buffer.empty? && buffer[-1].empty?
1396
1543
 
1397
1544
  # We do need to replace the optional trailing continuation
1398
1545
  # a blank line would have served the same purpose in the document
1399
- if !buffer.empty? && buffer.last.chomp == LIST_CONTINUATION
1400
- buffer.pop
1401
- end
1546
+ buffer.pop if !buffer.empty? && buffer[-1] == LIST_CONTINUATION
1402
1547
 
1403
- #puts "BUFFER[#{list_type},#{sibling_trait}]>#{buffer.join}<BUFFER"
1404
- #puts "BUFFER[#{list_type},#{sibling_trait}]>#{buffer.inspect}<BUFFER"
1548
+ #warn "BUFFER[#{list_type},#{sibling_trait}]>#{buffer * EOL}<BUFFER"
1549
+ #warn "BUFFER[#{list_type},#{sibling_trait}]>#{buffer.inspect}<BUFFER"
1405
1550
 
1406
1551
  buffer
1407
1552
  end
@@ -1416,35 +1561,37 @@ class Lexer
1416
1561
  # attributes - a Hash of attributes to assign to this section (default: {})
1417
1562
  def self.initialize_section(reader, parent, attributes = {})
1418
1563
  document = parent.document
1419
- sect_id, sect_title, sect_level, _ = parse_section_title(reader, document)
1420
- section = Section.new parent, sect_level, document.attributes.has_key?('numbered')
1564
+ source_location = reader.cursor if document.sourcemap
1565
+ sect_id, sect_reftext, sect_title, sect_level, _ = parse_section_title(reader, document)
1566
+ attributes['reftext'] = sect_reftext if sect_reftext
1567
+ section = Section.new parent, sect_level, document.attributes.has_key?('sectnums')
1568
+ section.source_location = source_location if source_location
1421
1569
  section.id = sect_id
1422
1570
  section.title = sect_title
1423
1571
  # parse style, id and role from first positional attribute
1424
1572
  if attributes[1]
1425
- section.sectname, _ = parse_style_attribute(attributes, reader)
1426
- section.special = true
1427
- # HACK needs to be refactored so it's driven by config
1428
- if section.sectname == 'abstract' && document.doctype == 'book'
1429
- section.sectname = "sect1"
1430
- section.special = false
1431
- section.level = 1
1432
- # FIXME refactor to use assign_caption (also check requirements)
1433
- elsif section.sectname == 'appendix' &&
1434
- !attributes.has_key?('caption') &&
1435
- !document.attributes.has_key?('caption')
1436
- number = document.counter('appendix-number', 'A')
1437
- section.caption = "#{document.attributes['appendix-caption']} #{number}: "
1438
- Document::AttributeEntry.new('appendix-number', number).save_to(attributes)
1573
+ style, _ = parse_style_attribute attributes, reader
1574
+ # handle case where only id and/or role are given (e.g., #idname.rolename)
1575
+ if style
1576
+ section.sectname = style
1577
+ section.special = true
1578
+ # HACK needs to be refactored so it's driven by config
1579
+ if section.sectname == 'abstract' && document.doctype == 'book'
1580
+ section.sectname = 'sect1'
1581
+ section.special = false
1582
+ section.level = 1
1583
+ end
1584
+ else
1585
+ section.sectname = %(sect#{section.level})
1439
1586
  end
1440
1587
  elsif sect_title.downcase == 'synopsis' && document.doctype == 'manpage'
1441
1588
  section.special = true
1442
1589
  section.sectname = 'synopsis'
1443
1590
  else
1444
- section.sectname = "sect#{section.level}"
1591
+ section.sectname = %(sect#{section.level})
1445
1592
  end
1446
1593
 
1447
- if section.id.nil? && (id = attributes['id'])
1594
+ if !section.id && (id = attributes['id'])
1448
1595
  section.id = id
1449
1596
  else
1450
1597
  # generate an id if one was not *embedded* in the heading line
@@ -1453,7 +1600,8 @@ class Lexer
1453
1600
  end
1454
1601
 
1455
1602
  if section.id
1456
- section.document.register(:ids, [section.id, section.title])
1603
+ # TODO sub reftext
1604
+ section.document.register(:ids, [section.id, (attributes['reftext'] || section.title)])
1457
1605
  end
1458
1606
  section.update_attributes(attributes)
1459
1607
  reader.skip_blank_lines
@@ -1466,7 +1614,7 @@ class Lexer
1466
1614
  #
1467
1615
  # line - the String line from under the section title.
1468
1616
  def self.section_level(line)
1469
- SECTION_LEVELS[line[0..0]]
1617
+ SECTION_LEVELS[line.chr]
1470
1618
  end
1471
1619
 
1472
1620
  #--
@@ -1483,8 +1631,10 @@ class Lexer
1483
1631
  # returns the section level if the Reader is positioned at a section title,
1484
1632
  # false otherwise
1485
1633
  def self.is_next_line_section?(reader, attributes)
1486
- return false if !(val = attributes[1]).nil? && ['float', 'discrete'].include?(val)
1487
- return false if !reader.has_more_lines?
1634
+ if !(val = attributes[1]).nil? && ((ord_0 = val[0].ord) == 100 || ord_0 == 102) && val =~ FloatingTitleStyleRx
1635
+ return false
1636
+ end
1637
+ return false unless reader.has_more_lines?
1488
1638
  Compliance.underline_style_section_titles ? is_section_title?(*reader.peek_lines(2)) : is_section_title?(reader.peek_line)
1489
1639
  end
1490
1640
 
@@ -1516,9 +1666,9 @@ class Lexer
1516
1666
  end
1517
1667
 
1518
1668
  def self.is_single_line_section_title?(line1)
1519
- first_char = line1.nil? ? nil : line1[0..0]
1669
+ first_char = line1 ? line1.chr : nil
1520
1670
  if (first_char == '=' || (Compliance.markdown_syntax && first_char == '#')) &&
1521
- (match = line1.match(REGEXP[:section_title]))
1671
+ (match = AtxSectionRx.match(line1))
1522
1672
  single_line_section_level match[1]
1523
1673
  else
1524
1674
  false
@@ -1526,8 +1676,8 @@ class Lexer
1526
1676
  end
1527
1677
 
1528
1678
  def self.is_two_line_section_title?(line1, line2)
1529
- if !line1.nil? && !line2.nil? && SECTION_LEVELS.has_key?(line2[0..0]) &&
1530
- line2.match(REGEXP[:section_underline]) && line1.match(REGEXP[:section_name]) &&
1679
+ if line1 && line2 && SECTION_LEVELS.has_key?(line2.chr) &&
1680
+ line2 =~ SetextSectionLineRx && line1 =~ SetextSectionTitleRx &&
1531
1681
  # chomp so that a (non-visible) endline does not impact calculation
1532
1682
  (line_length(line1) - line_length(line2)).abs <= 1
1533
1683
  section_level line2
@@ -1547,9 +1697,9 @@ class Lexer
1547
1697
  # Examples
1548
1698
  #
1549
1699
  # reader.lines
1550
- # # => ["Foo\n", "~~~\n"]
1700
+ # # => ["Foo", "~~~"]
1551
1701
  #
1552
- # title, level, id, single = parse_section_title(reader, document)
1702
+ # id, reftext, title, level, single = parse_section_title(reader, document)
1553
1703
  #
1554
1704
  # title
1555
1705
  # # => "Foo"
@@ -1561,9 +1711,9 @@ class Lexer
1561
1711
  # # => false
1562
1712
  #
1563
1713
  # line1
1564
- # # => "==== Foo\n"
1714
+ # # => "==== Foo"
1565
1715
  #
1566
- # title, level, id, single = parse_section_title(reader, document)
1716
+ # id, reftext, title, level, single = parse_section_title(reader, document)
1567
1717
  #
1568
1718
  # title
1569
1719
  # # => "Foo"
@@ -1574,8 +1724,8 @@ class Lexer
1574
1724
  # single
1575
1725
  # # => true
1576
1726
  #
1577
- # returns an Array of [String, Integer, String, Boolean], representing the
1578
- # id, title, level and line count of the Section, or nil.
1727
+ # returns an Array of [String, String, Integer, String, Boolean], representing the
1728
+ # id, reftext, title, level and line count of the Section, or nil.
1579
1729
  #
1580
1730
  #--
1581
1731
  # NOTE for efficiency, we don't reuse methods that check for a section title
@@ -1584,25 +1734,33 @@ class Lexer
1584
1734
  sect_id = nil
1585
1735
  sect_title = nil
1586
1736
  sect_level = -1
1737
+ sect_reftext = nil
1587
1738
  single_line = true
1588
1739
 
1589
- first_char = line1[0..0]
1740
+ first_char = line1.chr
1590
1741
  if (first_char == '=' || (Compliance.markdown_syntax && first_char == '#')) &&
1591
- (match = line1.match(REGEXP[:section_title]))
1592
- sect_id = match[3]
1593
- sect_title = match[2]
1742
+ (match = AtxSectionRx.match(line1))
1594
1743
  sect_level = single_line_section_level match[1]
1744
+ sect_title = match[2]
1745
+ if sect_title.end_with?(']]') && (anchor_match = InlineSectionAnchorRx.match(sect_title))
1746
+ if anchor_match[2].nil?
1747
+ sect_title = anchor_match[1]
1748
+ sect_id = anchor_match[3]
1749
+ sect_reftext = anchor_match[4]
1750
+ end
1751
+ end
1595
1752
  elsif Compliance.underline_style_section_titles
1596
- line2 = reader.peek_line true
1597
- if !line2.nil? && SECTION_LEVELS.has_key?(line2[0..0]) && line2.match(REGEXP[:section_underline]) &&
1598
- (name_match = line1.match(REGEXP[:section_name])) &&
1753
+ if (line2 = reader.peek_line(true)) && SECTION_LEVELS.has_key?(line2.chr) && line2 =~ SetextSectionLineRx &&
1754
+ (name_match = SetextSectionTitleRx.match(line1)) &&
1599
1755
  # chomp so that a (non-visible) endline does not impact calculation
1600
1756
  (line_length(line1) - line_length(line2)).abs <= 1
1601
- if anchor_match = name_match[1].match(REGEXP[:anchor_embedded])
1602
- sect_id = anchor_match[2]
1603
- sect_title = anchor_match[1]
1604
- else
1605
- sect_title = name_match[1]
1757
+ sect_title = name_match[1]
1758
+ if sect_title.end_with?(']]') && (anchor_match = InlineSectionAnchorRx.match(sect_title))
1759
+ if anchor_match[2].nil?
1760
+ sect_title = anchor_match[1]
1761
+ sect_id = anchor_match[3]
1762
+ sect_reftext = anchor_match[4]
1763
+ end
1606
1764
  end
1607
1765
  sect_level = section_level line2
1608
1766
  single_line = false
@@ -1612,7 +1770,7 @@ class Lexer
1612
1770
  if sect_level >= 0
1613
1771
  sect_level += document.attr('leveloffset', 0).to_i
1614
1772
  end
1615
- [sect_id, sect_title, sect_level, single_line]
1773
+ [sect_id, sect_reftext, sect_title, sect_level, single_line]
1616
1774
  end
1617
1775
 
1618
1776
  # Public: Calculate the number of unicode characters in the line, excluding the endline
@@ -1621,7 +1779,7 @@ class Lexer
1621
1779
  #
1622
1780
  # returns the number of unicode characters in the line
1623
1781
  def self.line_length(line)
1624
- FORCE_UNICODE_LINE_LENGTH ? line.chomp.scan(/./u).length : line.chomp.length
1782
+ FORCE_UNICODE_LINE_LENGTH ? line.scan(UnicodeCharScanRx).length : line.length
1625
1783
  end
1626
1784
 
1627
1785
  # Public: Consume and parse the two header lines (line 1 = author info, line 2 = revision info).
@@ -1634,7 +1792,8 @@ class Lexer
1634
1792
  #
1635
1793
  # Examples
1636
1794
  #
1637
- # parse_header_metadata(Reader.new ["Author Name <author@example.org>\n", "v1.0, 2012-12-21: Coincide w/ end of world.\n"])
1795
+ # data = ["Author Name <author@example.org>\n", "v1.0, 2012-12-21: Coincide w/ end of world.\n"]
1796
+ # parse_header_metadata(Reader.new data, nil, :normalize => true)
1638
1797
  # # => {'author' => 'Author Name', 'firstname' => 'Author', 'lastname' => 'Name', 'email' => 'author@example.org',
1639
1798
  # # 'revnumber' => '1.0', 'revdate' => '2012-12-21', 'revremark' => 'Coincide w/ end of world.'}
1640
1799
  def self.parse_header_metadata(reader, document = nil)
@@ -1649,12 +1808,12 @@ class Lexer
1649
1808
  author_metadata = process_authors reader.read_line
1650
1809
 
1651
1810
  unless author_metadata.empty?
1652
- # apply header subs and assign to document
1653
- if !document.nil?
1654
- author_metadata.map do |key, val|
1655
- val = val.is_a?(String) ? document.apply_header_subs(val) : val
1656
- document.attributes[key] = val if !document.attributes.has_key?(key)
1657
- val
1811
+ if document
1812
+ # apply header subs and assign to document
1813
+ author_metadata.each do |key, val|
1814
+ unless document.attributes.has_key? key
1815
+ document.attributes[key] = ((val.is_a? ::String) ? document.apply_header_subs(val) : val)
1816
+ end
1658
1817
  end
1659
1818
 
1660
1819
  implicit_author = document.attributes['author']
@@ -1671,7 +1830,7 @@ class Lexer
1671
1830
 
1672
1831
  if reader.has_more_lines? && !reader.next_line_empty?
1673
1832
  rev_line = reader.read_line
1674
- if match = rev_line.match(REGEXP[:revision_info])
1833
+ if (match = RevisionInfoLineRx.match(rev_line))
1675
1834
  rev_metadata['revdate'] = match[2].strip
1676
1835
  rev_metadata['revnumber'] = match[1].rstrip unless match[1].nil?
1677
1836
  rev_metadata['revremark'] = match[3].rstrip unless match[3].nil?
@@ -1682,12 +1841,12 @@ class Lexer
1682
1841
  end
1683
1842
 
1684
1843
  unless rev_metadata.empty?
1685
- # apply header subs and assign to document
1686
- if !document.nil?
1687
- rev_metadata.map do |key, val|
1688
- val = document.apply_header_subs(val)
1689
- document.attributes[key] = val if !document.attributes.has_key?(key)
1690
- val
1844
+ if document
1845
+ # apply header subs and assign to document
1846
+ rev_metadata.each do |key, val|
1847
+ unless document.attributes.has_key? key
1848
+ document.attributes[key] = document.apply_header_subs(val)
1849
+ end
1691
1850
  end
1692
1851
  end
1693
1852
 
@@ -1700,7 +1859,7 @@ class Lexer
1700
1859
  reader.skip_blank_lines
1701
1860
  end
1702
1861
 
1703
- if !document.nil?
1862
+ if document
1704
1863
  # process author attribute entries that override (or stand in for) the implicit author line
1705
1864
  author_metadata = nil
1706
1865
  if document.attributes.has_key?('author') &&
@@ -1713,21 +1872,21 @@ class Lexer
1713
1872
  author_metadata = process_authors author_line, true
1714
1873
  else
1715
1874
  authors = []
1716
- author_key = "author_#{authors.size + 1}"
1875
+ author_key = %(author_#{authors.size + 1})
1717
1876
  while document.attributes.has_key? author_key
1718
1877
  authors << document.attributes[author_key]
1719
- author_key = "author_#{authors.size + 1}"
1878
+ author_key = %(author_#{authors.size + 1})
1720
1879
  end
1721
1880
  if authors.size == 1
1722
1881
  # do not allow multiple, process as names only
1723
- author_metadata = process_authors authors.first, true, false
1882
+ author_metadata = process_authors authors[0], true, false
1724
1883
  elsif authors.size > 1
1725
1884
  # allow multiple, process as names only
1726
1885
  author_metadata = process_authors authors.join('; '), true
1727
1886
  end
1728
1887
  end
1729
1888
 
1730
- unless author_metadata.nil?
1889
+ if author_metadata
1731
1890
  document.attributes.update author_metadata
1732
1891
 
1733
1892
  # special case
@@ -1752,9 +1911,8 @@ class Lexer
1752
1911
  def self.process_authors(author_line, names_only = false, multiple = true)
1753
1912
  author_metadata = {}
1754
1913
  keys = ['author', 'authorinitials', 'firstname', 'middlename', 'lastname', 'email']
1755
- author_entries = multiple ? author_line.split(';').map(&:strip) : [author_line]
1914
+ author_entries = multiple ? (author_line.split ';').map {|line| line.strip } : [author_line]
1756
1915
  author_entries.each_with_index do |author_entry, idx|
1757
- author_entry.strip!
1758
1916
  next if author_entry.empty?
1759
1917
  key_map = {}
1760
1918
  if idx.zero?
@@ -1763,7 +1921,7 @@ class Lexer
1763
1921
  end
1764
1922
  else
1765
1923
  keys.each do |key|
1766
- key_map[key.to_sym] = "#{key}_#{idx + 1}"
1924
+ key_map[key.to_sym] = %(#{key}_#{idx + 1})
1767
1925
  end
1768
1926
  end
1769
1927
 
@@ -1771,7 +1929,7 @@ class Lexer
1771
1929
  if names_only
1772
1930
  # splitting on ' ' will collapse repeating spaces
1773
1931
  segments = author_entry.split(' ', 3)
1774
- elsif (match = author_entry.match(REGEXP[:author_info]))
1932
+ elsif (match = AuthorInfoLineRx.match(author_entry))
1775
1933
  segments = match.to_a
1776
1934
  segments.shift
1777
1935
  end
@@ -1792,7 +1950,7 @@ class Lexer
1792
1950
  end
1793
1951
  author_metadata[key_map[:email]] = segments[3] unless names_only || segments[3].nil?
1794
1952
  else
1795
- author_metadata[key_map[:author]] = author_metadata[key_map[:firstname]] = fname = author_entry.strip.squeeze(' ')
1953
+ author_metadata[key_map[:author]] = author_metadata[key_map[:firstname]] = fname = author_entry.strip.tr_s(' ', ' ')
1796
1954
  author_metadata[key_map[:authorinitials]] = fname[0, 1]
1797
1955
  end
1798
1956
 
@@ -1800,13 +1958,13 @@ class Lexer
1800
1958
  # only assign the _1 attributes if there are multiple authors
1801
1959
  if idx == 1
1802
1960
  keys.each do |key|
1803
- author_metadata["#{key}_1"] = author_metadata[key] if author_metadata.has_key? key
1961
+ author_metadata[%(#{key}_1)] = author_metadata[key] if author_metadata.has_key? key
1804
1962
  end
1805
1963
  end
1806
1964
  if idx.zero?
1807
1965
  author_metadata['authors'] = author_metadata[key_map[:author]]
1808
1966
  else
1809
- author_metadata['authors'] = "#{author_metadata['authors']}, #{author_metadata[key_map[:author]]}"
1967
+ author_metadata['authors'] = %(#{author_metadata['authors']}, #{author_metadata[key_map[:author]]})
1810
1968
  end
1811
1969
  end
1812
1970
 
@@ -1856,30 +2014,28 @@ class Lexer
1856
2014
  #
1857
2015
  # returns true if the line contains metadata, otherwise false
1858
2016
  def self.parse_block_metadata_line(reader, parent, attributes, options = {})
1859
- return false if !reader.has_more_lines?
2017
+ return false unless reader.has_more_lines?
1860
2018
  next_line = reader.peek_line
1861
- if (commentish = next_line.start_with?('//')) && (match = next_line.match(REGEXP[:comment_blk]))
2019
+ if (commentish = next_line.start_with?('//')) && (match = CommentBlockRx.match(next_line))
1862
2020
  terminator = match[0]
1863
2021
  reader.read_lines_until(:skip_first_line => true, :preserve_last_line => true, :terminator => terminator, :skip_processing => true)
1864
- elsif commentish && next_line.match(REGEXP[:comment])
2022
+ elsif commentish && CommentLineRx =~ next_line
1865
2023
  # do nothing, we'll skip it
1866
- elsif !options[:text] && (match = next_line.match(REGEXP[:attr_entry]))
2024
+ elsif !options[:text] && next_line.start_with?(':') && (match = AttributeEntryRx.match(next_line))
1867
2025
  process_attribute_entry(reader, parent, attributes, match)
1868
- elsif match = next_line.match(REGEXP[:anchor])
1869
- id, reftext = match[1].split(',')
1870
- attributes['id'] = id
1871
- # AsciiDoc always uses [id] as the reftext in HTML output,
1872
- # but I'd like to do better in Asciidoctor
1873
- #parent.document.register(:ids, id)
1874
- if reftext
1875
- attributes['reftext'] = reftext
1876
- parent.document.register(:ids, [id, reftext])
2026
+ elsif (in_square_brackets = next_line.start_with?('[') && next_line.end_with?(']')) && (match = BlockAnchorRx.match(next_line))
2027
+ unless match[1].nil_or_empty?
2028
+ attributes['id'] = match[1]
2029
+ # AsciiDoc always uses [id] as the reftext in HTML output,
2030
+ # but I'd like to do better in Asciidoctor
2031
+ # registration is deferred until the block or section is processed
2032
+ attributes['reftext'] = match[2] unless match[2].nil?
1877
2033
  end
1878
- elsif match = next_line.match(REGEXP[:blk_attr_list])
2034
+ elsif in_square_brackets && (match = BlockAttributeListRx.match(next_line))
1879
2035
  parent.document.parse_attributes(match[1], [], :sub_input => true, :into => attributes)
1880
2036
  # NOTE title doesn't apply to section, but we need to stash it for the first block
1881
2037
  # TODO should issue an error if this is found above the document title
1882
- elsif !options[:text] && (match = next_line.match(REGEXP[:blk_title]))
2038
+ elsif !options[:text] && (match = BlockTitleRx.match(next_line))
1883
2039
  attributes['title'] = match[1]
1884
2040
  else
1885
2041
  return false
@@ -1898,25 +2054,26 @@ class Lexer
1898
2054
  end
1899
2055
 
1900
2056
  def self.process_attribute_entry(reader, parent, attributes = nil, match = nil)
1901
- match ||= reader.has_more_lines? ? reader.peek_line.match(REGEXP[:attr_entry]) : nil
2057
+ match ||= (reader.has_more_lines? ? AttributeEntryRx.match(reader.peek_line) : nil)
1902
2058
  if match
1903
2059
  name = match[1]
1904
- value = match[2].nil? ? '' : match[2]
1905
- if value.end_with? LINE_BREAK
1906
- value.chop!.rstrip!
1907
- while reader.advance
1908
- next_line = reader.peek_line.strip
1909
- break if next_line.empty?
1910
- if next_line.end_with? LINE_BREAK
1911
- value = "#{value} #{next_line.chop.rstrip}"
1912
- else
1913
- value = "#{value} #{next_line}"
1914
- break
2060
+ unless (value = match[2] || '').empty?
2061
+ if value.end_with?(line_continuation = LINE_CONTINUATION) ||
2062
+ value.end_with?(line_continuation = LINE_CONTINUATION_LEGACY)
2063
+ value = value.chop.rstrip
2064
+ while reader.advance
2065
+ break if (next_line = reader.peek_line.strip).empty?
2066
+ if (keep_open = next_line.end_with? line_continuation)
2067
+ next_line = next_line.chop.rstrip
2068
+ end
2069
+ separator = (value.end_with? LINE_BREAK) ? EOL : ' '
2070
+ value = %(#{value}#{separator}#{next_line})
2071
+ break unless keep_open
1915
2072
  end
1916
2073
  end
1917
2074
  end
1918
2075
 
1919
- store_attribute(name, value, parent.nil? ? nil : parent.document, attributes)
2076
+ store_attribute(name, value, (parent ? parent.document : nil), attributes)
1920
2077
  true
1921
2078
  else
1922
2079
  false
@@ -1932,6 +2089,7 @@ class Lexer
1932
2089
  #
1933
2090
  # returns a 2-element array containing the attribute name and value
1934
2091
  def self.store_attribute(name, value, doc = nil, attrs = nil)
2092
+ # TODO move processing of attribute value to utility method
1935
2093
  if name.end_with?('!')
1936
2094
  # a nil value signals the attribute should be deleted (undefined)
1937
2095
  value = nil
@@ -1944,11 +2102,25 @@ class Lexer
1944
2102
 
1945
2103
  name = sanitize_attribute_name(name)
1946
2104
  accessible = true
1947
- unless doc.nil?
1948
- accessible = value.nil? ? doc.delete_attribute(name) : doc.set_attribute(name, value)
2105
+ if doc
2106
+ # alias numbered attribute to sectnums
2107
+ if name == 'numbered'
2108
+ name = 'sectnums'
2109
+ # support relative leveloffset values
2110
+ elsif name == 'leveloffset'
2111
+ if value
2112
+ case value.chr
2113
+ when '+'
2114
+ value = ((doc.attr 'leveloffset', 0).to_i + (value[1..-1] || 0).to_i).to_s
2115
+ when '-'
2116
+ value = ((doc.attr 'leveloffset', 0).to_i - (value[1..-1] || 0).to_i).to_s
2117
+ end
2118
+ end
2119
+ end
2120
+ accessible = value ? doc.set_attribute(name, value) : doc.delete_attribute(name)
1949
2121
  end
1950
2122
 
1951
- unless !accessible || attrs.nil?
2123
+ if accessible && attrs
1952
2124
  Document::AttributeEntry.new(name, value).save_to(attrs)
1953
2125
  end
1954
2126
 
@@ -1998,12 +2170,12 @@ class Lexer
1998
2170
  # Examples
1999
2171
  #
2000
2172
  # marker = 'B.'
2001
- # Lexer::resolve_ordered_list_marker(marker, 1, true)
2173
+ # Parser.resolve_ordered_list_marker(marker, 1, true)
2002
2174
  # # => 'A.'
2003
2175
  #
2004
2176
  # Returns the String of the first marker in this number series
2005
2177
  def self.resolve_ordered_list_marker(marker, ordinal = 0, validate = false, reader = nil)
2006
- number_style = ORDERED_LIST_STYLES.detect {|s| marker.match(ORDERED_LIST_MARKER_PATTERNS[s]) }
2178
+ number_style = ORDERED_LIST_STYLES.detect {|s| OrderedListMarkerRxMap[s] =~ marker }
2007
2179
  expected = actual = nil
2008
2180
  case number_style
2009
2181
  when :arabic
@@ -2041,7 +2213,7 @@ class Lexer
2041
2213
  end
2042
2214
 
2043
2215
  if validate && expected != actual
2044
- warn "asciidoctor: WARNING: #{reader.line_info}: list item index: expected #{expected}, got #{actual}"
2216
+ warn %(asciidoctor: WARNING: #{reader.line_info}: list item index: expected #{expected}, got #{actual})
2045
2217
  end
2046
2218
 
2047
2219
  marker
@@ -2057,15 +2229,15 @@ class Lexer
2057
2229
  # Returns a Boolean indicating whether this line is a sibling list item given
2058
2230
  # the criteria provided
2059
2231
  def self.is_sibling_list_item?(line, list_type, sibling_trait)
2060
- if sibling_trait.is_a?(Regexp)
2232
+ if sibling_trait.is_a? ::Regexp
2061
2233
  matcher = sibling_trait
2062
2234
  expected_marker = false
2063
2235
  else
2064
- matcher = REGEXP[list_type]
2236
+ matcher = ListRxMap[list_type]
2065
2237
  expected_marker = sibling_trait
2066
2238
  end
2067
2239
 
2068
- if m = line.match(matcher)
2240
+ if (m = matcher.match(line))
2069
2241
  if expected_marker
2070
2242
  expected_marker == resolve_list_marker(list_type, m[1])
2071
2243
  else
@@ -2085,8 +2257,10 @@ class Lexer
2085
2257
  # returns an instance of Asciidoctor::Table parsed from the provided reader
2086
2258
  def self.next_table(table_reader, parent, attributes)
2087
2259
  table = Table.new(parent, attributes)
2088
- table.title = attributes.delete('title') if attributes.has_key?('title')
2089
- table.assign_caption attributes.delete('caption')
2260
+ if (attributes.has_key? 'title')
2261
+ table.title = attributes.delete 'title'
2262
+ table.assign_caption attributes.delete('caption')
2263
+ end
2090
2264
 
2091
2265
  if attributes.has_key? 'cols'
2092
2266
  table.create_columns(parse_col_specs(attributes['cols']))
@@ -2104,7 +2278,7 @@ class Lexer
2104
2278
  line = table_reader.read_line
2105
2279
 
2106
2280
  if skipped == 0 && loop_idx.zero? && !attributes.has_key?('options') &&
2107
- !(next_line = table_reader.peek_line).nil? && next_line == ::Asciidoctor::EOL
2281
+ !(next_line = table_reader.peek_line).nil? && next_line.empty?
2108
2282
  table.has_header_option = true
2109
2283
  table.set_option 'header'
2110
2284
  end
@@ -2115,7 +2289,7 @@ class Lexer
2115
2289
  # push an empty cell spec if boundary at start of line
2116
2290
  parser_ctx.close_open_cell
2117
2291
  else
2118
- next_cell_spec, line = parse_cell_spec(line, :start)
2292
+ next_cell_spec, line = parse_cell_spec(line, :start, parser_ctx.delimiter)
2119
2293
  # if the cell spec is not null, then we're at a cell boundary
2120
2294
  if !next_cell_spec.nil?
2121
2295
  parser_ctx.close_open_cell next_cell_spec
@@ -2125,8 +2299,10 @@ class Lexer
2125
2299
  end
2126
2300
  end
2127
2301
 
2128
- while !line.empty?
2129
- if m = parser_ctx.match_delimiter(line)
2302
+ seen = false
2303
+ while !seen || !line.empty?
2304
+ seen = true
2305
+ if (m = parser_ctx.match_delimiter(line))
2130
2306
  if parser_ctx.format == 'csv'
2131
2307
  if parser_ctx.buffer_has_unclosed_quotes?(m.pre_match)
2132
2308
  # throw it back, it's too small
@@ -2153,7 +2329,7 @@ class Lexer
2153
2329
  else
2154
2330
  # no other delimiters to see here
2155
2331
  # suck up this line into the buffer and move on
2156
- parser_ctx.buffer = %(#{parser_ctx.buffer}#{line})
2332
+ parser_ctx.buffer = %(#{parser_ctx.buffer}#{line}#{EOL})
2157
2333
  # QUESTION make stripping endlines in csv data an option? (unwrap-option?)
2158
2334
  if parser_ctx.format == 'csv'
2159
2335
  parser_ctx.buffer = %(#{parser_ctx.buffer.rstrip} )
@@ -2200,27 +2376,24 @@ class Lexer
2200
2376
  # returns a Hash of attributes that specify how to format
2201
2377
  # and layout the cells in the table.
2202
2378
  def self.parse_col_specs(records)
2203
- specs = []
2204
-
2205
- # check for deprecated syntax
2206
- if m = records.match(REGEXP[:digits])
2207
- 1.upto(m[0].to_i) {
2208
- specs << {'width' => 1}
2209
- }
2210
- return specs
2379
+ # check for deprecated syntax: single number, equal column spread
2380
+ # REVIEW could use records == records.to_i.to_s instead of regexp
2381
+ if DigitsRx =~ records
2382
+ return ::Array.new(records.to_i) { { 'width' => 1 } }
2211
2383
  end
2212
2384
 
2385
+ specs = []
2213
2386
  records.split(',').each {|record|
2214
2387
  # TODO might want to use scan rather than this mega-regexp
2215
- if m = record.match(REGEXP[:table_colspec])
2388
+ if (m = ColumnSpecRx.match(record))
2216
2389
  spec = {}
2217
2390
  if m[2]
2218
2391
  # make this an operation
2219
2392
  colspec, rowspec = m[2].split '.'
2220
- if !colspec.to_s.empty? && Table::ALIGNMENTS[:h].has_key?(colspec)
2393
+ if !colspec.nil_or_empty? && Table::ALIGNMENTS[:h].has_key?(colspec)
2221
2394
  spec['halign'] = Table::ALIGNMENTS[:h][colspec]
2222
2395
  end
2223
- if !rowspec.to_s.empty? && Table::ALIGNMENTS[:v].has_key?(rowspec)
2396
+ if !rowspec.nil_or_empty? && Table::ALIGNMENTS[:v].has_key?(rowspec)
2224
2397
  spec['valign'] = Table::ALIGNMENTS[:v][rowspec]
2225
2398
  end
2226
2399
  end
@@ -2248,47 +2421,65 @@ class Lexer
2248
2421
  #
2249
2422
  # The cell specs dictate the cell's alignments, styles or filters,
2250
2423
  # colspan, rowspan and/or repeating content.
2424
+ #
2425
+ # The default spec when pos == :end is {} since we already know we're at a
2426
+ # delimiter. When pos == :start, we *may* be at a delimiter, nil indicates
2427
+ # we're not.
2251
2428
  #
2252
2429
  # returns the Hash of attributes that indicate how to layout
2253
2430
  # and style this cell in the table.
2254
- def self.parse_cell_spec(line, pos = :start)
2255
- # the default for the end pos it {} since we
2256
- # know we're at a delimiter; when the pos
2257
- # is start, we *may* be at a delimiter and
2258
- # nil indicates we're not
2259
- spec = (pos == :end ? {} : nil)
2260
- rest = line
2261
-
2262
- if m = line.match(REGEXP[:table_cellspec][pos])
2263
- spec = {}
2264
- return [spec, line] if m[0].chomp.empty?
2265
- rest = (pos == :start ? m.post_match : m.pre_match)
2266
- if m[1]
2267
- colspec, rowspec = m[1].split '.'
2268
- colspec = colspec.to_s.empty? ? 1 : colspec.to_i
2269
- rowspec = rowspec.to_s.empty? ? 1 : rowspec.to_i
2270
- if m[2] == '+'
2271
- spec['colspan'] = colspec unless colspec == 1
2272
- spec['rowspan'] = rowspec unless rowspec == 1
2273
- elsif m[2] == '*'
2274
- spec['repeatcol'] = colspec unless colspec == 1
2431
+ def self.parse_cell_spec(line, pos = :start, delimiter = nil)
2432
+ m = nil
2433
+ rest = ''
2434
+
2435
+ case pos
2436
+ when :start
2437
+ if line.include? delimiter
2438
+ spec_part, rest = line.split delimiter, 2
2439
+ if (m = CellSpecStartRx.match spec_part)
2440
+ return [{}, rest] if m[0].empty?
2441
+ else
2442
+ return [nil, line]
2275
2443
  end
2444
+ else
2445
+ return [nil, line]
2276
2446
  end
2277
-
2278
- if m[3]
2279
- colspec, rowspec = m[3].split '.'
2280
- if !colspec.to_s.empty? && Table::ALIGNMENTS[:h].has_key?(colspec)
2281
- spec['halign'] = Table::ALIGNMENTS[:h][colspec]
2282
- end
2283
- if !rowspec.to_s.empty? && Table::ALIGNMENTS[:v].has_key?(rowspec)
2284
- spec['valign'] = Table::ALIGNMENTS[:v][rowspec]
2285
- end
2447
+ when :end
2448
+ if (m = CellSpecEndRx.match line)
2449
+ # NOTE return the line stripped of trailing whitespace if no cellspec is found in this case
2450
+ return [{}, line.rstrip] if m[0].lstrip.empty?
2451
+ rest = m.pre_match
2452
+ else
2453
+ return [{}, line]
2286
2454
  end
2455
+ end
2287
2456
 
2288
- if m[4] && Table::TEXT_STYLES.has_key?(m[4])
2289
- spec['style'] = Table::TEXT_STYLES[m[4]]
2457
+ spec = {}
2458
+ if m[1]
2459
+ colspec, rowspec = m[1].split '.'
2460
+ colspec = colspec.nil_or_empty? ? 1 : colspec.to_i
2461
+ rowspec = rowspec.nil_or_empty? ? 1 : rowspec.to_i
2462
+ if m[2] == '+'
2463
+ spec['colspan'] = colspec unless colspec == 1
2464
+ spec['rowspan'] = rowspec unless rowspec == 1
2465
+ elsif m[2] == '*'
2466
+ spec['repeatcol'] = colspec unless colspec == 1
2467
+ end
2468
+ end
2469
+
2470
+ if m[3]
2471
+ colspec, rowspec = m[3].split '.'
2472
+ if !colspec.nil_or_empty? && Table::ALIGNMENTS[:h].has_key?(colspec)
2473
+ spec['halign'] = Table::ALIGNMENTS[:h][colspec]
2290
2474
  end
2291
- end
2475
+ if !rowspec.nil_or_empty? && Table::ALIGNMENTS[:v].has_key?(rowspec)
2476
+ spec['valign'] = Table::ALIGNMENTS[:v][rowspec]
2477
+ end
2478
+ end
2479
+
2480
+ if m[4] && Table::TEXT_STYLES.has_key?(m[4])
2481
+ spec['style'] = Table::TEXT_STYLES[m[4]]
2482
+ end
2292
2483
 
2293
2484
  [spec, rest]
2294
2485
  end
@@ -2321,10 +2512,7 @@ class Lexer
2321
2512
  original_style = attributes['style']
2322
2513
  raw_style = attributes[1]
2323
2514
  # NOTE spaces are not allowed in shorthand, so if we find one, this ain't shorthand
2324
- if !raw_style || raw_style.include?(' ')
2325
- attributes['style'] = raw_style
2326
- [raw_style, original_style]
2327
- else
2515
+ if raw_style && !raw_style.include?(' ') && Compliance.shorthand_property_syntax
2328
2516
  type = :style
2329
2517
  collector = []
2330
2518
  parsed = {}
@@ -2332,7 +2520,7 @@ class Lexer
2332
2520
  save_current = lambda {
2333
2521
  if collector.empty?
2334
2522
  if type != :style
2335
- warn "asciidoctor: WARNING:#{reader.nil? ? nil : " #{reader.prev_line_info}:"} invalid empty #{type} detected in style attribute"
2523
+ warn %(asciidoctor: WARNING:#{reader.nil? ? nil : " #{reader.prev_line_info}:"} invalid empty #{type} detected in style attribute)
2336
2524
  end
2337
2525
  else
2338
2526
  case type
@@ -2341,7 +2529,7 @@ class Lexer
2341
2529
  parsed[type].push collector.join
2342
2530
  when :id
2343
2531
  if parsed.has_key? :id
2344
- warn "asciidoctor: WARNING:#{reader.nil? ? nil : " #{reader.prev_line_info}:"} multiple ids detected in style attribute"
2532
+ warn %(asciidoctor: WARNING:#{reader.nil? ? nil : " #{reader.prev_line_info}:"} multiple ids detected in style attribute)
2345
2533
  end
2346
2534
  parsed[type] = collector.join
2347
2535
  else
@@ -2351,7 +2539,7 @@ class Lexer
2351
2539
  end
2352
2540
  }
2353
2541
 
2354
- raw_style.split('').each do |c|
2542
+ raw_style.each_char do |c|
2355
2543
  if c == '.' || c == '#' || c == '%'
2356
2544
  save_current.call
2357
2545
  case c
@@ -2389,7 +2577,7 @@ class Lexer
2389
2577
 
2390
2578
  if parsed.has_key? :option
2391
2579
  (options = parsed[:option]).each do |option|
2392
- attributes["#{option}-option"] = ''
2580
+ attributes[%(#{option}-option)] = ''
2393
2581
  end
2394
2582
  if (existing_opts = attributes['options'])
2395
2583
  attributes['options'] = (options + existing_opts.split(',')) * ','
@@ -2400,6 +2588,9 @@ class Lexer
2400
2588
  end
2401
2589
 
2402
2590
  [parsed_style, original_style]
2591
+ else
2592
+ attributes['style'] = raw_style
2593
+ [raw_style, original_style]
2403
2594
  end
2404
2595
  end
2405
2596
 
@@ -2427,13 +2618,13 @@ class Lexer
2427
2618
  # end
2428
2619
  # EOS
2429
2620
  #
2430
- # source.lines.entries
2431
- # # => [" def names\n", " @names.split ' '\n", " end\n"]
2621
+ # source.split("\n")
2622
+ # # => [" def names", " @names.split ' '", " end"]
2432
2623
  #
2433
- # Lexer.reset_block_indent(source.lines.entries)
2434
- # # => ["def names\n", " @names.split ' '\n", "end\n"]
2624
+ # Parser.reset_block_indent(source.split "\n")
2625
+ # # => ["def names", " @names.split ' '", "end"]
2435
2626
  #
2436
- # puts Lexer.reset_block_indent(source.lines.entries).join
2627
+ # puts Parser.reset_block_indent(source.split "\n") * "\n"
2437
2628
  # # => def names
2438
2629
  # # => @names.split ' '
2439
2630
  # # => end
@@ -2442,7 +2633,7 @@ class Lexer
2442
2633
  #--
2443
2634
  # FIXME refactor gsub matchers into compiled regex
2444
2635
  def self.reset_block_indent!(lines, indent = 0)
2445
- return if indent.nil? || lines.empty?
2636
+ return if !indent || lines.empty?
2446
2637
 
2447
2638
  tab_detected = false
2448
2639
  # TODO make tab size configurable
@@ -2450,10 +2641,10 @@ class Lexer
2450
2641
  # strip leading block indent
2451
2642
  offsets = lines.map do |line|
2452
2643
  # break if the first char is non-whitespace
2453
- break [] unless line.chomp[0..0].lstrip.empty?
2454
- if line.include? "\t"
2644
+ break [] unless line.chr.lstrip.empty?
2645
+ if line.include? TAB
2455
2646
  tab_detected = true
2456
- line = line.gsub("\t", tab_expansion)
2647
+ line = line.gsub(TAB_PATTERN, tab_expansion)
2457
2648
  end
2458
2649
  if (flush_line = line.lstrip).empty?
2459
2650
  nil
@@ -2467,8 +2658,8 @@ class Lexer
2467
2658
  unless offsets.empty? || (offsets = offsets.compact).empty?
2468
2659
  if (offset = offsets.min) > 0
2469
2660
  lines.map! {|line|
2470
- line = line.gsub("\t", tab_expansion) if tab_detected
2471
- line[offset..-1] || "\n"
2661
+ line = line.gsub(TAB_PATTERN, tab_expansion) if tab_detected
2662
+ line[offset..-1].to_s
2472
2663
  }
2473
2664
  end
2474
2665
  end
@@ -2498,7 +2689,7 @@ class Lexer
2498
2689
  # sanitize_attribute_name('Foo 3 #-Billy')
2499
2690
  # => 'foo3-billy'
2500
2691
  def self.sanitize_attribute_name(name)
2501
- name.gsub(REGEXP[:illegal_attr_name_chars], '').downcase
2692
+ name.gsub(InvalidAttributeNameCharsRx, '').downcase
2502
2693
  end
2503
2694
 
2504
2695
  # Internal: Converts a Roman numeral to an integer value.