asciidoctor 1.5.7.1 → 1.5.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (94) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.adoc +95 -5
  3. data/Gemfile +23 -13
  4. data/README-de.adoc +482 -0
  5. data/README-fr.adoc +128 -119
  6. data/README-jp.adoc +2 -3
  7. data/README-zh_CN.adoc +2 -3
  8. data/README.adoc +131 -106
  9. data/asciidoctor.gemspec +9 -7
  10. data/data/locale/attributes-ar.adoc +1 -1
  11. data/data/locale/attributes-bg.adoc +1 -1
  12. data/data/locale/attributes-ca.adoc +1 -1
  13. data/data/locale/attributes-cs.adoc +1 -1
  14. data/data/locale/attributes-da.adoc +1 -1
  15. data/data/locale/attributes-de.adoc +1 -1
  16. data/data/locale/attributes-en.adoc +1 -1
  17. data/data/locale/attributes-es.adoc +1 -1
  18. data/data/locale/attributes-fa.adoc +1 -1
  19. data/data/locale/attributes-fi.adoc +1 -1
  20. data/data/locale/attributes-fr.adoc +1 -1
  21. data/data/locale/attributes-hu.adoc +1 -1
  22. data/data/locale/attributes-id.adoc +1 -1
  23. data/data/locale/attributes-it.adoc +1 -1
  24. data/data/locale/attributes-ja.adoc +1 -1
  25. data/data/locale/attributes-kr.adoc +1 -1
  26. data/data/locale/attributes-nb.adoc +1 -1
  27. data/data/locale/attributes-nl.adoc +1 -1
  28. data/data/locale/attributes-nn.adoc +1 -1
  29. data/data/locale/attributes-pl.adoc +1 -1
  30. data/data/locale/attributes-pt.adoc +1 -1
  31. data/data/locale/attributes-pt_BR.adoc +1 -1
  32. data/data/locale/attributes-ro.adoc +1 -1
  33. data/data/locale/attributes-ru.adoc +1 -1
  34. data/data/locale/attributes-sr.adoc +5 -4
  35. data/data/locale/attributes-sr_Latn.adoc +5 -4
  36. data/data/locale/attributes-sv.adoc +23 -0
  37. data/data/locale/attributes-tr.adoc +1 -1
  38. data/data/locale/attributes-uk.adoc +1 -1
  39. data/data/locale/attributes-zh_CN.adoc +1 -1
  40. data/data/locale/attributes-zh_TW.adoc +1 -1
  41. data/data/stylesheets/asciidoctor-default.css +23 -23
  42. data/lib/asciidoctor.rb +110 -104
  43. data/lib/asciidoctor/abstract_block.rb +55 -32
  44. data/lib/asciidoctor/abstract_node.rb +32 -17
  45. data/lib/asciidoctor/attribute_list.rb +8 -7
  46. data/lib/asciidoctor/block.rb +5 -7
  47. data/lib/asciidoctor/cli/options.rb +5 -9
  48. data/lib/asciidoctor/converter.rb +2 -2
  49. data/lib/asciidoctor/converter/docbook45.rb +7 -20
  50. data/lib/asciidoctor/converter/docbook5.rb +36 -37
  51. data/lib/asciidoctor/converter/factory.rb +10 -8
  52. data/lib/asciidoctor/converter/html5.rb +90 -65
  53. data/lib/asciidoctor/converter/manpage.rb +72 -62
  54. data/lib/asciidoctor/converter/template.rb +8 -6
  55. data/lib/asciidoctor/core_ext/1.8.7/concurrent/hash.rb +5 -0
  56. data/lib/asciidoctor/document.rb +62 -10
  57. data/lib/asciidoctor/extensions.rb +74 -16
  58. data/lib/asciidoctor/helpers.rb +11 -14
  59. data/lib/asciidoctor/list.rb +2 -2
  60. data/lib/asciidoctor/parser.rb +223 -195
  61. data/lib/asciidoctor/path_resolver.rb +15 -7
  62. data/lib/asciidoctor/reader.rb +65 -36
  63. data/lib/asciidoctor/section.rb +6 -4
  64. data/lib/asciidoctor/substitutors.rb +170 -149
  65. data/lib/asciidoctor/table.rb +16 -8
  66. data/lib/asciidoctor/version.rb +1 -1
  67. data/man/asciidoctor.1 +6 -5
  68. data/man/asciidoctor.adoc +3 -2
  69. data/test/api_test.rb +236 -0
  70. data/test/attribute_list_test.rb +242 -0
  71. data/test/attributes_test.rb +65 -52
  72. data/test/blocks_test.rb +408 -260
  73. data/test/converter_test.rb +7 -7
  74. data/test/document_test.rb +60 -54
  75. data/test/extensions_test.rb +218 -32
  76. data/test/fixtures/doctime-localtime.adoc +2 -0
  77. data/test/fixtures/section-a.adoc +4 -0
  78. data/test/fixtures/subs.adoc +0 -1
  79. data/test/invoker_test.rb +56 -18
  80. data/test/links_test.rb +105 -81
  81. data/test/lists_test.rb +636 -265
  82. data/test/logger_test.rb +1 -1
  83. data/test/manpage_test.rb +140 -3
  84. data/test/paragraphs_test.rb +42 -42
  85. data/test/parser_test.rb +63 -183
  86. data/test/paths_test.rb +21 -4
  87. data/test/preamble_test.rb +9 -9
  88. data/test/reader_test.rb +78 -28
  89. data/test/sections_test.rb +273 -151
  90. data/test/substitutions_test.rb +53 -19
  91. data/test/tables_test.rb +286 -163
  92. data/test/test_helper.rb +4 -3
  93. data/test/text_test.rb +65 -65
  94. metadata +16 -21
@@ -15,6 +15,7 @@ else
15
15
  autoload :Base64, 'base64'
16
16
  autoload :URI, 'uri'
17
17
  autoload :OpenURI, 'open-uri'
18
+ autoload :Pathname, 'pathname'
18
19
  autoload :StringScanner, 'strscan'
19
20
  end
20
21
 
@@ -141,13 +142,6 @@ module Asciidoctor
141
142
  # Compliance value: false
142
143
  define :strict_verbatim_paragraphs, true
143
144
 
144
- # NOT CURRENTLY USED
145
- # AsciiDoc allows start and end delimiters around
146
- # a block to be different lengths
147
- # Enabling this option requires matching lengths
148
- # Compliance value: false
149
- #define :congruent_block_delimiters, true
150
-
151
145
  # AsciiDoc supports both atx (single-line) and setext (underlined) section titles.
152
146
  # This option can be used to disable the setext variant.
153
147
  # Compliance value: true
@@ -290,8 +284,6 @@ module Asciidoctor
290
284
 
291
285
  ADMONITION_STYLE_HEADS = ['N', 'T', 'I', 'W', 'C'].to_set
292
286
 
293
- CALLOUT_LIST_HEADS = ['<', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0'].to_set
294
-
295
287
  PARAGRAPH_STYLES = ['comment', 'example', 'literal', 'listing', 'normal', 'open', 'pass', 'quote', 'sidebar', 'source', 'verse', 'abstract', 'partintro'].to_set
296
288
 
297
289
  VERBATIM_STYLES = ['literal', 'listing', 'source', 'verse'].to_set
@@ -382,47 +374,51 @@ module Asciidoctor
382
374
 
383
375
  # A collection of regular expressions used by the parser.
384
376
  #
385
- # NOTE: The following pattern, which appears frequently, captures the
377
+ # NOTE The following pattern, which appears frequently, captures the
386
378
  # contents between square brackets, ignoring escaped closing brackets
387
379
  # (closing brackets prefixed with a backslash '\' character)
388
380
  #
389
- # Pattern: \[(|.*?[^\\])\]
381
+ # Pattern: \[(|#{CC_ALL}*?[^\\])\]
390
382
  # Matches: [enclosed text] and [enclosed [text\]], not [enclosed text \\] or [\\] (as these require a trailing space)
391
383
  #
384
+ # NOTE \w only matches ASCII word characters, whereas [[:word:]] or \p{Word} matches any character in the Unicode word category.
392
385
  #(pseudo)module Rx
393
386
 
394
387
  ## Regular expression character classes (to ensure regexp compatibility between Ruby and JavaScript)
395
388
  ## CC stands for "character class", CG stands for "character class group"
396
389
 
397
- # NOTE \w matches only the ASCII word characters, whereas [[:word:]] or \p{Word} matches any character in the Unicode word category.
398
-
399
- # character classes for the Regexp engine in Ruby >= 2 (Ruby 1.9 supports \p{} but has problems w/ encoding)
400
- if ::RUBY_MIN_VERSION_2
401
- CC_ALPHA = CG_ALPHA = '\p{Alpha}'
402
- CC_ALNUM = CG_ALNUM = '\p{Alnum}'
403
- CC_ALL = '.'
404
- CG_BLANK = '\p{Blank}'
405
- CC_EOL = '$'
406
- CC_WORD = CG_WORD = '\p{Word}'
407
- # character classes for the Regexp engine in Ruby < 2
390
+ if RUBY_ENGINE == 'opal'
391
+ CC_ANY = '[^\n]' unless defined? CC_ANY
408
392
  else
409
- CC_ALPHA = '[:alpha:]'
410
- CG_ALPHA = '[[:alpha:]]'
411
- CC_ALL = '.'
412
- CC_ALNUM = '[:alnum:]'
413
- CG_ALNUM = '[[:alnum:]]'
414
- CC_EOL = '$'
415
- if ::RUBY_MIN_VERSION_1_9
416
- CG_BLANK = '[[:blank:]]'
417
- CC_WORD = '[:word:]'
418
- CG_WORD = '[[:word:]]'
393
+ # CC_ALL is any character, including newlines (must be accompanied by multiline regexp flag)
394
+ CC_ALL = '.'
395
+ # CC_ANY is any character except newlines
396
+ CC_ANY = '.'
397
+ CC_EOL = '$'
398
+ # character classes for the Regexp engine in Ruby >= 2 (Ruby 1.9 supports \p{} but has problems w/ encoding)
399
+ if ::RUBY_MIN_VERSION_2
400
+ CC_ALPHA = CG_ALPHA = '\p{Alpha}'
401
+ CC_ALNUM = CG_ALNUM = '\p{Alnum}'
402
+ CG_BLANK = '\p{Blank}'
403
+ CC_WORD = CG_WORD = '\p{Word}'
404
+ # character classes for the Regexp engine in Ruby < 2
419
405
  else
420
- # NOTE Ruby 1.8 cannot match word characters beyond the ASCII range; if you need this feature, upgrade!
421
- CG_BLANK = '[ \t]'
422
- CC_WORD = '[:alnum:]_'
423
- CG_WORD = '[[:alnum:]_]'
406
+ CC_ALPHA = '[:alpha:]'
407
+ CG_ALPHA = '[[:alpha:]]'
408
+ CC_ALNUM = '[:alnum:]'
409
+ CG_ALNUM = '[[:alnum:]]'
410
+ if ::RUBY_MIN_VERSION_1_9
411
+ CG_BLANK = '[[:blank:]]'
412
+ CC_WORD = '[:word:]'
413
+ CG_WORD = '[[:word:]]'
414
+ else
415
+ # NOTE Ruby 1.8 cannot match word characters beyond the ASCII range; if you need this feature, upgrade!
416
+ CG_BLANK = '[ \t]'
417
+ CC_WORD = '[:alnum:]_'
418
+ CG_WORD = '[[:alnum:]_]'
419
+ end
424
420
  end
425
- end unless RUBY_ENGINE == 'opal'
421
+ end
426
422
 
427
423
  ## Document header
428
424
 
@@ -445,7 +441,7 @@ module Asciidoctor
445
441
  # v1.0, 2013-01-01: Ring in the new year release
446
442
  # 1.0, Jan 01, 2013
447
443
  #
448
- RevisionInfoLineRx = /^(?:\D*(.*?),)? *(?!:)(.*?)(?: *(?!^),?: *(.*))?$/
444
+ RevisionInfoLineRx = /^(?:[^\d{]*(#{CC_ANY}*?),)? *(?!:)(#{CC_ANY}*?)(?: *(?!^),?: *(#{CC_ANY}*))?$/
449
445
 
450
446
  # Matches the title and volnum in the manpage doctype.
451
447
  #
@@ -454,7 +450,7 @@ module Asciidoctor
454
450
  # = asciidoctor(1)
455
451
  # = asciidoctor ( 1 )
456
452
  #
457
- ManpageTitleVolnumRx = /^(.+?) *\( *(.+?) *\)$/
453
+ ManpageTitleVolnumRx = /^(#{CC_ANY}+?) *\( *(#{CC_ANY}+?) *\)$/
458
454
 
459
455
  # Matches the name and purpose in the manpage doctype.
460
456
  #
@@ -462,7 +458,7 @@ module Asciidoctor
462
458
  #
463
459
  # asciidoctor - converts AsciiDoc source files to HTML, DocBook and other formats
464
460
  #
465
- ManpageNamePurposeRx = /^(.+?) +- +(.+)$/
461
+ ManpageNamePurposeRx = /^(#{CC_ANY}+?) +- +(#{CC_ANY}+)$/
466
462
 
467
463
  ## Preprocessor directives
468
464
 
@@ -478,7 +474,7 @@ module Asciidoctor
478
474
  # endif::basebackend-html[]
479
475
  # endif::[]
480
476
  #
481
- ConditionalDirectiveRx = /^(\\)?(ifdef|ifndef|ifeval|endif)::(\S*?(?:([,+])\S*?)?)\[(.+)?\]$/
477
+ ConditionalDirectiveRx = /^(\\)?(ifdef|ifndef|ifeval|endif)::(\S*?(?:([,+])\S*?)?)\[(#{CC_ANY}+)?\]$/
482
478
 
483
479
  # Matches a restricted (read as safe) eval expression.
484
480
  #
@@ -486,7 +482,7 @@ module Asciidoctor
486
482
  #
487
483
  # "{asciidoctor-version}" >= "0.1.0"
488
484
  #
489
- EvalExpressionRx = /^(.+?) *([=!><]=|[><]) *(.+)$/
485
+ EvalExpressionRx = /^(#{CC_ANY}+?) *([=!><]=|[><]) *(#{CC_ANY}+)$/
490
486
 
491
487
  # Matches an include preprocessor directive.
492
488
  #
@@ -495,7 +491,7 @@ module Asciidoctor
495
491
  # include::chapter1.ad[]
496
492
  # include::example.txt[lines=1;2;5..10]
497
493
  #
498
- IncludeDirectiveRx = /^(\\)?include::([^\[][^\[]*)\[(.+)?\]$/
494
+ IncludeDirectiveRx = /^(\\)?include::([^\[][^\[]*)\[(#{CC_ANY}+)?\]$/
499
495
 
500
496
  # Matches a trailing tag directive in an include file.
501
497
  #
@@ -508,7 +504,8 @@ module Asciidoctor
508
504
  # log(e);
509
505
  # }
510
506
  # // end::try-catch[]
511
- TagDirectiveRx = /\b(?:tag|(e)nd)::(\S+?)\[\][\n \r]/
507
+ # NOTE m flag is required for Asciidoctor.js
508
+ TagDirectiveRx = /\b(?:tag|(e)nd)::(\S+?)\[\](?=$|[ \r])/m
512
509
 
513
510
  ## Attribute entries and references
514
511
 
@@ -525,10 +522,10 @@ module Asciidoctor
525
522
  # collapsing the line breaks and indentation to \
526
523
  # a single space.
527
524
  #
528
- AttributeEntryRx = /^:(!?#{CG_WORD}[^:]*):(?:[ \t]+(.*))?$/
525
+ AttributeEntryRx = /^:(!?#{CG_WORD}[^:]*):(?:[ \t]+(#{CC_ANY}*))?$/
529
526
 
530
527
  # Matches invalid characters in an attribute name.
531
- InvalidAttributeNameCharsRx = /[^#{CC_WORD}-]/
528
+ InvalidAttributeNameCharsRx = /[^-#{CC_WORD}]/
532
529
 
533
530
  # Matches a pass inline macro that surrounds the value of an attribute
534
531
  # entry once it has been parsed.
@@ -539,7 +536,7 @@ module Asciidoctor
539
536
  # pass:a[{a} {b} {c}]
540
537
  #
541
538
  if RUBY_ENGINE == 'opal'
542
- # In JavaScript, ^ and $ match the boundaries of the string when the m flag is not set
539
+ # NOTE In JavaScript, ^ and $ match the boundaries of the string when the m flag is not set
543
540
  AttributeEntryPassMacroRx = /^pass:([a-z]+(?:,[a-z]+)*)?\[([\S\s]*)\]$/
544
541
  else
545
542
  AttributeEntryPassMacroRx = /\Apass:([a-z]+(?:,[a-z]+)*)?\[(.*)\]\Z/m
@@ -554,7 +551,7 @@ module Asciidoctor
554
551
  # {set:foo:bar}
555
552
  # {set:name!}
556
553
  #
557
- AttributeReferenceRx = /(\\)?\{(#{CG_WORD}+[-#{CC_WORD}]*|(set|counter2?):.+?)(\\)?\}/
554
+ AttributeReferenceRx = /(\\)?\{(#{CG_WORD}[-#{CC_WORD}]*|(set|counter2?):#{CC_ANY}+?)(\\)?\}/
558
555
 
559
556
  ## Paragraphs and delimited blocks
560
557
 
@@ -565,7 +562,7 @@ module Asciidoctor
565
562
  # [[idname]]
566
563
  # [[idname,Reference Text]]
567
564
  #
568
- BlockAnchorRx = /^\[\[(?:|([#{CC_ALPHA}_:][#{CC_WORD}:.-]*)(?:, *(.+))?)\]\]$/
565
+ BlockAnchorRx = /^\[\[(?:|([#{CC_ALPHA}_:][#{CC_WORD}:.-]*)(?:, *(#{CC_ANY}+))?)\]\]$/
569
566
 
570
567
  # Matches an attribute list above a block element.
571
568
  #
@@ -580,12 +577,12 @@ module Asciidoctor
580
577
  # # as attribute reference
581
578
  # [{lead}]
582
579
  #
583
- BlockAttributeListRx = /^\[(|[#{CC_WORD}.#%{,"'].*)\]$/
580
+ BlockAttributeListRx = /^\[(|[#{CC_WORD}.#%{,"']#{CC_ANY}*)\]$/
584
581
 
585
582
  # A combined pattern that matches either a block anchor or a block attribute list.
586
583
  #
587
584
  # TODO this one gets hit a lot, should be optimized as much as possible
588
- BlockAttributeLineRx = /^\[(?:|[#{CC_WORD}.#%{,"'].*|\[(?:|[#{CC_ALPHA}_:][#{CC_WORD}:.-]*(?:, *.+)?)\])\]$/
585
+ BlockAttributeLineRx = /^\[(?:|[#{CC_WORD}.#%{,"']#{CC_ANY}*|\[(?:|[#{CC_ALPHA}_:][#{CC_WORD}:.-]*(?:, *#{CC_ANY}+)?)\])\]$/
589
586
 
590
587
  # Matches a title above a block.
591
588
  #
@@ -593,7 +590,7 @@ module Asciidoctor
593
590
  #
594
591
  # .Title goes here
595
592
  #
596
- BlockTitleRx = /^\.(\.?[^ \t.].*)$/
593
+ BlockTitleRx = /^\.(\.?[^ \t.]#{CC_ANY}*)$/
597
594
 
598
595
  # Matches an admonition label at the start of a paragraph.
599
596
  #
@@ -602,7 +599,7 @@ module Asciidoctor
602
599
  # NOTE: Just a little note.
603
600
  # TIP: Don't forget!
604
601
  #
605
- AdmonitionParagraphRx = /^(#{ADMONITION_STYLES.to_a * '|'}):[ \t]+/
602
+ AdmonitionParagraphRx = /^(#{ADMONITION_STYLES.to_a.join '|'}):[ \t]+/
606
603
 
607
604
  # Matches a literal paragraph, which is a line of text preceded by at least one space.
608
605
  #
@@ -610,7 +607,7 @@ module Asciidoctor
610
607
  #
611
608
  # <SPACE>Foo
612
609
  # <TAB>Foo
613
- LiteralParagraphRx = /^([ \t]+.*)$/
610
+ LiteralParagraphRx = /^([ \t]+#{CC_ANY}*)$/
614
611
 
615
612
  # Matches a comment block.
616
613
  #
@@ -642,14 +639,14 @@ module Asciidoctor
642
639
  # == Foo ==
643
640
  # // ^ also a level 1 (h2) section title
644
641
  #
645
- AtxSectionTitleRx = /^(=={0,5})[ \t]+(.+?)(?:[ \t]+\1)?$/
642
+ AtxSectionTitleRx = /^(=={0,5})[ \t]+(#{CC_ANY}+?)(?:[ \t]+\1)?$/
646
643
 
647
644
  # Matches an extended Atx section title that includes support for the Markdown variant.
648
- ExtAtxSectionTitleRx = /^(=={0,5}|#\#{0,5})[ \t]+(.+?)(?:[ \t]+\1)?$/
645
+ ExtAtxSectionTitleRx = /^(=={0,5}|#\#{0,5})[ \t]+(#{CC_ANY}+?)(?:[ \t]+\1)?$/
649
646
 
650
647
  # Matches the title only (first line) of an Setext (two-line) section title.
651
648
  # The title cannot begin with a dot and must have at least one alphanumeric character.
652
- SetextSectionTitleRx = /^((?=.*#{CG_WORD}+.*)[^.].*?)$/
649
+ SetextSectionTitleRx = /^((?!\.)#{CC_ANY}*?#{CG_WORD}#{CC_ANY}*)$/
653
650
 
654
651
  # Matches an anchor (i.e., id + optional reference text) inside a section title.
655
652
  #
@@ -658,7 +655,7 @@ module Asciidoctor
658
655
  # Section Title [[idname]]
659
656
  # Section Title [[idname,Reference Text]]
660
657
  #
661
- InlineSectionAnchorRx = / (\\)?\[\[([#{CC_ALPHA}_:][#{CC_WORD}:.-]*)(?:, *(.+))?\]\]$/
658
+ InlineSectionAnchorRx = / (\\)?\[\[([#{CC_ALPHA}_:][#{CC_WORD}:.-]*)(?:, *(#{CC_ANY}+))?\]\]$/
662
659
 
663
660
  # Matches invalid ID characters in a section title.
664
661
  #
@@ -670,7 +667,8 @@ module Asciidoctor
670
667
  # Detects the start of any list item.
671
668
  #
672
669
  # NOTE we only have to check as far as the blank character because we know it means non-whitespace follows.
673
- AnyListRx = /^(?:[ \t]*(?:-|\*\*{0,4}|\.\.{0,4}|\u2022\u2022{0,4}|\d+\.|[a-zA-Z]\.|[IVXivx]+\))[ \t]|[ \t]*.*?(?::::{0,2}|;;)(?:$|[ \t])|<?\d+>[ \t])/
670
+ # IMPORTANT if this regexp does not agree with the regexp for each list type, the parser will hang.
671
+ AnyListRx = %r(^(?:[ \t]*(?:-|\*\**|\.\.*|\u2022|\d+\.|[a-zA-Z]\.|[IVXivx]+\))[ \t]|(?!//[^/])#{CC_ANY}*?(?::::{0,2}|;;)(?:$|[ \t])|<?\d+>[ \t]))
674
672
 
675
673
  # Matches an unordered list item (one level for hyphens, up to 5 levels for asterisks).
676
674
  #
@@ -680,7 +678,7 @@ module Asciidoctor
680
678
  # - Foo
681
679
  #
682
680
  # NOTE we know trailing (.*) will match at least one character because we strip trailing spaces
683
- UnorderedListRx = /^[ \t]*(-|\*\*{0,4}|\u2022\u2022{0,4})[ \t]+(.*)$/
681
+ UnorderedListRx = /^[ \t]*(-|\*\**|\u2022)[ \t]+(#{CC_ANY}*)$/
684
682
 
685
683
  # Matches an ordered list item (explicit numbering or up to 5 consecutive dots).
686
684
  #
@@ -696,7 +694,7 @@ module Asciidoctor
696
694
  #
697
695
  # NOTE leading space match is not always necessary, but is used for list reader
698
696
  # NOTE we know trailing (.*) will match at least one character because we strip trailing spaces
699
- OrderedListRx = /^[ \t]*(\.\.{0,4}|\d+\.|[a-zA-Z]\.|[IVXivx]+\))[ \t]+(.*)$/
697
+ OrderedListRx = /^[ \t]*(\.\.*|\d+\.|[a-zA-Z]\.|[IVXivx]+\))[ \t]+(#{CC_ANY}*)$/
700
698
 
701
699
  # Matches the ordinals for each type of ordered list.
702
700
  OrderedListMarkerRxMap = {
@@ -731,29 +729,30 @@ module Asciidoctor
731
729
  # {foo-term}:: {foo-desc}
732
730
  #
733
731
  # NOTE we know trailing (.*) will match at least one character because we strip trailing spaces
734
- # NOTE negative match for comment line is intentional since that isn't handled when looking for next list item
735
- # TODO check for line comment when scanning lines instead of in regex
736
- DescriptionListRx = %r(^(?!//)[ \t]*(.*?)(:::{0,2}|;;)(?:$|[ \t]+(.*)$))
732
+ # NOTE must skip line comment when looking for next list item inside list
733
+ DescriptionListRx = %r(^(?!//[^/])[ \t]*(#{CC_ANY}*?)(:::{0,2}|;;)(?:$|[ \t]+(#{CC_ANY}*)$))
737
734
 
738
735
  # Matches a sibling description list item (which does not include the type in the key).
736
+ # NOTE must skip line comment when looking for sibling list item
739
737
  DescriptionListSiblingRx = {
740
- '::' => %r(^(?!//)[ \t]*(.*[^:]|)(::)(?:$|[ \t]+(.*)$)),
741
- ':::' => %r(^(?!//)[ \t]*(.*[^:]|)(:::)(?:$|[ \t]+(.*)$)),
742
- '::::' => %r(^(?!//)[ \t]*(.*[^:]|)(::::)(?:$|[ \t]+(.*)$)),
743
- ';;' => %r(^(?!//)[ \t]*(.*?)(;;)(?:$|[ \t]+(.*)$))
738
+ '::' => %r(^(?!//[^/])[ \t]*(#{CC_ANY}*[^:]|)(::)(?:$|[ \t]+(#{CC_ANY}*)$)),
739
+ ':::' => %r(^(?!//[^/])[ \t]*(#{CC_ANY}*[^:]|)(:::)(?:$|[ \t]+(#{CC_ANY}*)$)),
740
+ '::::' => %r(^(?!//[^/])[ \t]*(#{CC_ANY}*[^:]|)(::::)(?:$|[ \t]+(#{CC_ANY}*)$)),
741
+ ';;' => %r(^(?!//[^/])[ \t]*(#{CC_ANY}*?)(;;)(?:$|[ \t]+(#{CC_ANY}*)$))
744
742
  }
745
743
 
746
744
  # Matches a callout list item.
747
745
  #
748
746
  # Examples
749
747
  #
750
- # <1> Foo
748
+ # <1> Explanation
749
+ #
750
+ # or
751
+ #
752
+ # <.> Explanation with automatic number
751
753
  #
752
754
  # NOTE we know trailing (.*) will match at least one character because we strip trailing spaces
753
- CalloutListRx = /^<?(\d+)>[ \t]+(.*)$/
754
-
755
- # Detects a potential callout list item.
756
- CalloutListSniffRx = /^<?\d+>/
755
+ CalloutListRx = /^<(\d+|\.)>[ \t]+(#{CC_ANY}*)$/
757
756
 
758
757
  # Matches a callout reference inside literal text.
759
758
  #
@@ -761,15 +760,18 @@ module Asciidoctor
761
760
  # <1> (optionally prefixed by //, #, -- or ;; line comment chars)
762
761
  # <1> <2> (multiple callouts on one line)
763
762
  # <!--1--> (for XML-based languages)
763
+ # <.> (auto-numbered)
764
764
  #
765
765
  # NOTE extract regexps are applied line-by-line, so we can use $ as end-of-line char
766
- CalloutExtractRx = %r((?:(?://|#|--|;;) ?)?(\\)?<!?(|--)(\d+)\2>(?=(?: ?\\?<!?\2\d+\2>)*$))
767
- CalloutExtractRxt = '(\\\\)?<()(\\d+)>(?=(?: ?\\\\?<\\d+>)*$)'
766
+ CalloutExtractRx = %r(((?://|#|--|;;) ?)?(\\)?<!?(|--)(\d+|\.)\3>(?=(?: ?\\?<!?\3(?:\d+|\.)\3>)*$))
767
+ CalloutExtractRxt = '(\\\\)?<()(\\d+|\\.)>(?=(?: ?\\\\?<(?:\\d+|\\.)>)*$)'
768
+ CalloutExtractRxMap = ::Hash.new {|h, k| h[k] = /(#{::Regexp.escape k} ?)?#{CalloutExtractRxt}/ }
768
769
  # NOTE special characters have not been replaced when scanning
769
- CalloutScanRx = /\\?<!?(|--)(\d+)\1>(?=(?: ?\\?<!?\1\d+\1>)*#{CC_EOL})/
770
+ CalloutScanRx = /\\?<!?(|--)(\d+|\.)\1>(?=(?: ?\\?<!?\1(?:\d+|\.)\1>)*#{CC_EOL})/
770
771
  # NOTE special characters have already been replaced when converting to an SGML format
771
- CalloutSourceRx = %r((?:(?://|#|--|;;) ?)?(\\)?&lt;!?(|--)(\d+)\2&gt;(?=(?: ?\\?&lt;!?\2\d+\2&gt;)*#{CC_EOL}))
772
- CalloutSourceRxt = "(\\\\)?&lt;()(\\d+)&gt;(?=(?: ?\\\\?&lt;\\d+&gt;)*#{CC_EOL})"
772
+ CalloutSourceRx = %r(((?://|#|--|;;) ?)?(\\)?&lt;!?(|--)(\d+|\.)\3&gt;(?=(?: ?\\?&lt;!?\3(?:\d+|\.)\3&gt;)*#{CC_EOL}))
773
+ CalloutSourceRxt = "(\\\\)?&lt;()(\\d+|\\.)&gt;(?=(?: ?\\\\?&lt;(?:\\d+|\\.)&gt;)*#{CC_EOL})"
774
+ CalloutSourceRxMap = ::Hash.new {|h, k| h[k] = /(#{::Regexp.escape k} ?)?#{CalloutSourceRxt}/ }
773
775
 
774
776
  # A Hash of regexps for lists used for dynamic access.
775
777
  ListRxMap = {
@@ -809,7 +811,7 @@ module Asciidoctor
809
811
  #
810
812
  #--
811
813
  # NOTE we've relaxed the match for target to accomodate the short format (e.g., name::[attrlist])
812
- CustomBlockMacroRx = /^(#{CG_WORD}+)::(|\S|\S.*?\S)\[(.+)?\]$/
814
+ CustomBlockMacroRx = /^(#{CG_WORD}[-#{CC_WORD}]*)::(|\S|\S#{CC_ANY}*?\S)\[(#{CC_ANY}+)?\]$/
813
815
 
814
816
  # Matches an image, video or audio block macro.
815
817
  #
@@ -818,7 +820,7 @@ module Asciidoctor
818
820
  # image::filename.png[Caption]
819
821
  # video::http://youtube.com/12345[Cats vs Dogs]
820
822
  #
821
- BlockMediaMacroRx = /^(image|video|audio)::(\S|\S.*?\S)\[(.+)?\]$/
823
+ BlockMediaMacroRx = /^(image|video|audio)::(\S|\S#{CC_ANY}*?\S)\[(#{CC_ANY}+)?\]$/
822
824
 
823
825
  # Matches the TOC block macro.
824
826
  #
@@ -827,7 +829,7 @@ module Asciidoctor
827
829
  # toc::[]
828
830
  # toc::[levels=2]
829
831
  #
830
- BlockTocMacroRx = /^toc::\[(.+)?\]$/
832
+ BlockTocMacroRx = /^toc::\[(#{CC_ANY}+)?\]$/
831
833
 
832
834
  ## Inline macros
833
835
 
@@ -840,13 +842,13 @@ module Asciidoctor
840
842
  # anchor:idname[]
841
843
  # anchor:idname[Reference Text]
842
844
  #
843
- InlineAnchorRx = /(\\)?(?:\[\[([#{CC_ALPHA}_:][#{CC_WORD}:.-]*)(?:, *(.+?))?\]\]|anchor:([#{CC_ALPHA}_:][#{CC_WORD}:.-]*)\[(?:\]|(.*?[^\\])\]))/
845
+ InlineAnchorRx = /(\\)?(?:\[\[([#{CC_ALPHA}_:][#{CC_WORD}:.-]*)(?:, *(#{CC_ANY}+?))?\]\]|anchor:([#{CC_ALPHA}_:][#{CC_WORD}:.-]*)\[(?:\]|(#{CC_ANY}*?[^\\])\]))/
844
846
 
845
847
  # Scans for a non-escaped anchor (i.e., id + optional reference text) in the flow of text.
846
- InlineAnchorScanRx = /(?:^|[^\\\[])\[\[([#{CC_ALPHA}_:][#{CC_WORD}:.-]*)(?:, *(.+?))?\]\]|(?:^|[^\\])anchor:([#{CC_ALPHA}_:][#{CC_WORD}:.-]*)\[(?:\]|(.*?[^\\])\])/
848
+ InlineAnchorScanRx = /(?:^|[^\\\[])\[\[([#{CC_ALPHA}_:][#{CC_WORD}:.-]*)(?:, *(#{CC_ANY}+?))?\]\]|(?:^|[^\\])anchor:([#{CC_ALPHA}_:][#{CC_WORD}:.-]*)\[(?:\]|(#{CC_ANY}*?[^\\])\])/
847
849
 
848
850
  # Scans for a leading, non-escaped anchor (i.e., id + optional reference text).
849
- LeadingInlineAnchorRx = /^\[\[([#{CC_ALPHA}_:][#{CC_WORD}:.-]*)(?:, *(.+?))?\]\]/
851
+ LeadingInlineAnchorRx = /^\[\[([#{CC_ALPHA}_:][#{CC_WORD}:.-]*)(?:, *(#{CC_ANY}+?))?\]\]/
850
852
 
851
853
  # Matches a bibliography anchor at the start of the list item text (in a bibliography list).
852
854
  #
@@ -854,7 +856,7 @@ module Asciidoctor
854
856
  #
855
857
  # [[[Fowler_1997]]] Fowler M. ...
856
858
  #
857
- InlineBiblioAnchorRx = /^\[\[\[([#{CC_ALPHA}_:][#{CC_WORD}:.-]*)(?:, *(.+?))?\]\]\]/
859
+ InlineBiblioAnchorRx = /^\[\[\[([#{CC_ALPHA}_:][#{CC_WORD}:.-]*)(?:, *(#{CC_ANY}+?))?\]\]\]/
858
860
 
859
861
  # Matches an inline e-mail address.
860
862
  #
@@ -932,7 +934,7 @@ module Asciidoctor
932
934
 
933
935
  # Matches the name of a macro.
934
936
  #
935
- MacroNameRx = /^#{CG_WORD}+$/
937
+ MacroNameRx = /^#{CG_WORD}[-#{CC_WORD}]*$/
936
938
 
937
939
  # Matches a stem (and alternatives, asciimath and latexmath) inline macro, which may span multiple lines.
938
940
  #
@@ -1019,9 +1021,10 @@ module Asciidoctor
1019
1021
  # Humpty Dumpty had a great fall.
1020
1022
  #
1021
1023
  if RUBY_ENGINE == 'opal'
1022
- # NOTE In Ruby, ^ and $ always match start and end of line, respectively; JavaScript only does so in multiline mode
1023
- HardLineBreakRx = /^(.*) \+$/m
1024
+ # NOTE In JavaScript, ^ and $ only match the start and end of line if the multiline flag is present
1025
+ HardLineBreakRx = /^(#{CC_ANY}*) \+$/m
1024
1026
  else
1027
+ # NOTE In Ruby, ^ and $ always match start and end of line
1025
1028
  HardLineBreakRx = /^(.*) \+$/
1026
1029
  end
1027
1030
 
@@ -1066,16 +1069,7 @@ module Asciidoctor
1066
1069
  # one,two
1067
1070
  # three;four
1068
1071
  #
1069
- DataDelimiterRx = /[,;]/
1070
-
1071
- # Matches one or more consecutive digits at the end of a line.
1072
- #
1073
- # Examples
1074
- #
1075
- # docbook45
1076
- # html5
1077
- #
1078
- TrailingDigitsRx = /\d+$/
1072
+ #DataDelimiterRx = /[,;]/
1079
1073
 
1080
1074
  # Matches whitespace (space, tab, newline) escaped by a backslash.
1081
1075
  #
@@ -1104,6 +1098,15 @@ module Asciidoctor
1104
1098
  #
1105
1099
  SubModifierSniffRx = /[+-]/
1106
1100
 
1101
+ # Matches one or more consecutive digits at the end of a line.
1102
+ #
1103
+ # Examples
1104
+ #
1105
+ # docbook45
1106
+ # html5
1107
+ #
1108
+ TrailingDigitsRx = /\d+$/
1109
+
1107
1110
  # Matches any character with multibyte support explicitly enabled (length of multibyte char = 1)
1108
1111
  #
1109
1112
  unless RUBY_ENGINE == 'opal'
@@ -1162,7 +1165,8 @@ module Asciidoctor
1162
1165
  'rdquo' => '&#8221;',
1163
1166
  'wj' => '&#8288;',
1164
1167
  'brvbar' => '&#166;',
1165
- 'cpp' => 'C++',
1168
+ 'pp' => '&#43;&#43;',
1169
+ 'cpp' => 'C&#43;&#43;',
1166
1170
  'amp' => '&',
1167
1171
  'lt' => '<',
1168
1172
  'gt' => '>'
@@ -1210,7 +1214,7 @@ module Asciidoctor
1210
1214
  [:subscript, :unconstrained, /\\?(?:\[([^\]]+)\])?~(\S+?)~/]
1211
1215
  ]
1212
1216
 
1213
- compat_quote_subs = quote_subs.dup
1217
+ compat_quote_subs = quote_subs.drop 0
1214
1218
  # ``quoted''
1215
1219
  compat_quote_subs[2] = [:double, :constrained, /(^|[^#{CC_WORD};:}])(?:\[([^\]]+)\])?``(\S|\S#{CC_ALL}*?\S)''(?!#{CG_WORD})/m]
1216
1220
  # `quoted'
@@ -1313,7 +1317,7 @@ module Asciidoctor
1313
1317
  # convert it to a Hash as we know it
1314
1318
  attrs = ::Hash[attrs.keys.map {|k| [k, attrs[k]] }]
1315
1319
  else
1316
- raise ::ArgumentError, %(illegal type for attributes option: #{attrs.class.ancestors * ' < '})
1320
+ raise ::ArgumentError, %(illegal type for attributes option: #{attrs.class.ancestors.join ' < '})
1317
1321
  end
1318
1322
 
1319
1323
  lines = nil
@@ -1331,10 +1335,12 @@ module Asciidoctor
1331
1335
  if (docdate = attrs['docdate'])
1332
1336
  attrs['docyear'] ||= ((docdate.index '-') == 4 ? (docdate.slice 0, 4) : nil)
1333
1337
  else
1334
- docdate = attrs['docdate'] = (input_mtime.strftime '%Y-%m-%d')
1338
+ docdate = attrs['docdate'] = (input_mtime.strftime '%F')
1335
1339
  attrs['docyear'] ||= input_mtime.year.to_s
1336
1340
  end
1337
- doctime = (attrs['doctime'] ||= input_mtime.strftime('%H:%M:%S %Z'))
1341
+ # %Z is OS dependent and may contain characters that aren't UTF-8 encoded (see asciidoctor#2770 and asciidoctor.js#23)
1342
+ # Ruby 1.8 doesn't support %:z
1343
+ doctime = (attrs['doctime'] ||= input_mtime.strftime %(%T #{input_mtime.utc_offset == 0 ? 'UTC' : '%z'}))
1338
1344
  attrs['docdatetime'] = %(#{docdate} #{doctime})
1339
1345
  elsif input.respond_to? :readlines
1340
1346
  # NOTE tty, pipes & sockets can't be rewound, but can't be sniffed easily either
@@ -1347,7 +1353,7 @@ module Asciidoctor
1347
1353
  elsif ::String === input
1348
1354
  lines = ::RUBY_MIN_VERSION_2 ? input.lines : input.each_line.to_a
1349
1355
  elsif ::Array === input
1350
- lines = input.dup
1356
+ lines = input.drop 0
1351
1357
  else
1352
1358
  raise ::ArgumentError, %(unsupported input type: #{input.class})
1353
1359
  end