asciidoctor 0.0.7 → 0.0.9

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of asciidoctor might be problematic. Click here for more details.

Files changed (47) hide show
  1. data/Gemfile +2 -0
  2. data/README.asciidoc +35 -26
  3. data/Rakefile +9 -6
  4. data/asciidoctor.gemspec +27 -8
  5. data/bin/asciidoctor +1 -1
  6. data/lib/asciidoctor.rb +351 -63
  7. data/lib/asciidoctor/abstract_block.rb +218 -0
  8. data/lib/asciidoctor/abstract_node.rb +249 -0
  9. data/lib/asciidoctor/attribute_list.rb +211 -0
  10. data/lib/asciidoctor/backends/base_template.rb +99 -0
  11. data/lib/asciidoctor/backends/docbook45.rb +510 -0
  12. data/lib/asciidoctor/backends/html5.rb +585 -0
  13. data/lib/asciidoctor/block.rb +27 -254
  14. data/lib/asciidoctor/callouts.rb +117 -0
  15. data/lib/asciidoctor/debug.rb +7 -4
  16. data/lib/asciidoctor/document.rb +229 -77
  17. data/lib/asciidoctor/inline.rb +29 -0
  18. data/lib/asciidoctor/lexer.rb +1330 -502
  19. data/lib/asciidoctor/list_item.rb +33 -34
  20. data/lib/asciidoctor/reader.rb +305 -142
  21. data/lib/asciidoctor/renderer.rb +115 -19
  22. data/lib/asciidoctor/section.rb +100 -189
  23. data/lib/asciidoctor/substituters.rb +468 -0
  24. data/lib/asciidoctor/table.rb +499 -0
  25. data/lib/asciidoctor/version.rb +1 -1
  26. data/test/attributes_test.rb +301 -87
  27. data/test/blocks_test.rb +568 -0
  28. data/test/document_test.rb +221 -24
  29. data/test/fixtures/dot.gif +0 -0
  30. data/test/fixtures/encoding.asciidoc +1 -0
  31. data/test/fixtures/include-file.asciidoc +1 -0
  32. data/test/fixtures/tip.gif +0 -0
  33. data/test/headers_test.rb +411 -43
  34. data/test/lexer_test.rb +265 -45
  35. data/test/links_test.rb +144 -3
  36. data/test/lists_test.rb +2252 -74
  37. data/test/paragraphs_test.rb +21 -30
  38. data/test/preamble_test.rb +24 -0
  39. data/test/reader_test.rb +248 -12
  40. data/test/renderer_test.rb +22 -0
  41. data/test/substitutions_test.rb +414 -0
  42. data/test/tables_test.rb +484 -0
  43. data/test/test_helper.rb +70 -6
  44. data/test/text_test.rb +30 -6
  45. metadata +64 -10
  46. data/lib/asciidoctor/render_templates.rb +0 -317
  47. data/lib/asciidoctor/string.rb +0 -12
@@ -1,6 +1,8 @@
1
1
  module Asciidoctor
2
- def self.debug(*args)
3
- puts *args if self.show_debug_output?
2
+ @show_debug = nil
3
+
4
+ def self.debug
5
+ puts yield if self.show_debug_output?
4
6
  end
5
7
 
6
8
  def self.set_debug(value)
@@ -12,9 +14,10 @@ module Asciidoctor
12
14
  end
13
15
 
14
16
  def self.puts_indented(level, *args)
15
- thing = " "*level*2
17
+ indentation = " " * level * 2
18
+
16
19
  args.each do |arg|
17
- self.debug "#{thing}#{arg}"
20
+ self.debug { "#{indentation}#{arg}" }
18
21
  end
19
22
  end
20
23
  end
@@ -1,73 +1,194 @@
1
1
  # Public: Methods for parsing Asciidoc documents and rendering them
2
2
  # using erb templates.
3
- class Asciidoctor::Document
3
+ #
4
+ # There are several strategies for getting the title of the document:
5
+ #
6
+ # doctitle - value of title attribute, if assigned and non-empty,
7
+ # otherwise title of first section in document, if present
8
+ # otherwise nil
9
+ # name - an alias of doctitle
10
+ # title - value of the title attribute, or nil if not present
11
+ # first_section.title - title of first section in document, if present
12
+ # header.title - title of section level 0
13
+ #
14
+ # Keep in mind that you'll want to honor these document settings:
15
+ #
16
+ # notitle - The h1 heading should not be shown
17
+ # noheader - The header block (h1 heading, author, revision info) should not be shown
18
+ class Asciidoctor::Document < Asciidoctor::AbstractBlock
4
19
 
5
20
  include Asciidoctor
6
21
 
7
- # Public: Get the Hash of attributes
8
- attr_reader :attributes
22
+ # Public A read-only integer value indicating the level of security that
23
+ # should be enforced while processing this document. The value must be
24
+ # set in the Document constructor using the :safe option.
25
+ #
26
+ # A value of 0 (UNSAFE) disables any of the security features enforced
27
+ # by Asciidoctor (Ruby is still subject to its own restrictions).
28
+ #
29
+ # A value of 1 (SAFE) closely parallels safe mode in AsciiDoc. In particular,
30
+ # it prevents access to files which reside outside of the parent directory
31
+ # of the source file and disables any macro other than the include macro.
32
+ #
33
+ # A value of 10 (SECURE) disallows the document from attempting to read
34
+ # files from the file system and including the contents of them into the
35
+ # document. In particular, it disallows use of the include::[] macro and the
36
+ # embedding of binary content (data uri), stylesheets and JavaScripts
37
+ # referenced by the document. (Asciidoctor and trusted extensions may still
38
+ # be allowed to embed trusted content into the document). Since Asciidoctor
39
+ # is aiming for wide adoption, this value is the default and is recommended
40
+ # for server-side deployments.
41
+ #
42
+ # A value of 100 (PARANOID) is planned to disallow the use of passthrough
43
+ # macros and prevents the document from setting any known attributes in
44
+ # addition to all the security features of SafeMode::SECURE. Please note that
45
+ # this level is not currently implemented (and therefore not enforced)!
46
+ attr_reader :safe
9
47
 
10
48
  # Public: Get the Hash of document references
11
49
  attr_reader :references
12
50
 
13
- # The section level 0 element
51
+ # Public: Get the Hash of callouts
52
+ attr_reader :callouts
53
+
54
+ # Public: The section level 0 block
14
55
  attr_reader :header
15
56
 
16
- # Public: Get the Array of elements (really Blocks or Sections) for the document
17
- attr_reader :elements
57
+ # Public: Base directory for rendering this document
58
+ attr_reader :base_dir
59
+
60
+ # Public: A reference to the parent document of this nested document.
61
+ attr_reader :parent_document
18
62
 
19
63
  # Public: Initialize an Asciidoc object.
20
64
  #
21
- # data - The Array of Strings holding the Asciidoc source document.
22
- # block - A block that can be used to retrieve external Asciidoc
23
- # data to include in this document.
65
+ # data - The Array of Strings holding the Asciidoc source document. (default: [])
66
+ # options - A Hash of options to control processing, such as setting the safe mode (:safe),
67
+ # suppressing the header/footer (:header_footer) and attribute overrides (:attributes)
68
+ # (default: {})
69
+ # block - A block that can be used to retrieve external Asciidoc
70
+ # data to include in this document.
24
71
  #
25
72
  # Examples
26
73
  #
27
74
  # data = File.readlines(filename)
28
75
  # doc = Asciidoctor::Document.new(data)
29
- def initialize(data, options = {}, &block)
30
- @elements = []
76
+ # puts doc.render
77
+ def initialize(data = [], options = {}, &block)
78
+ super(self, :document)
79
+ @renderer = nil
80
+
81
+ if options[:parent]
82
+ @parent_document = options.delete(:parent)
83
+ # should we dup here?
84
+ options[:attributes] = @parent_document.attributes
85
+ @renderer = @parent_document.renderer
86
+ else
87
+ @parent_document = nil
88
+ end
89
+
90
+ @header = nil
91
+ @references = {
92
+ :ids => {},
93
+ :links => [],
94
+ :images => []
95
+ }
96
+ @callouts = Callouts.new
31
97
  @options = options
98
+ @safe = @options.fetch(:safe, SafeMode::SECURE).to_i
32
99
  @options[:header_footer] = @options.fetch(:header_footer, true)
33
100
 
34
- @attributes = {}
35
- @attributes['sectids'] = nil
101
+ @attributes['asciidoctor'] = true
102
+ @attributes['asciidoctor-version'] = VERSION
103
+ @attributes['sectids'] = true
104
+ @attributes['encoding'] = 'UTF-8'
36
105
 
37
- @reader = Reader.new(data, @attributes, &block)
106
+ attribute_overrides = options[:attributes] || {}
38
107
 
39
- # pseudo-delegation :)
40
- #@attributes = @reader.attributes
41
- @references = @reader.references
108
+ # the only way to set the include-depth attribute is via the document options
109
+ # 10 is the AsciiDoc default, though currently Asciidoctor only supports 1 level
110
+ attribute_overrides['include-depth'] ||= 10
111
+
112
+ # TODO we should go with one or the other, this is confusing
113
+ # for now, base_dir takes precedence if set
114
+ if options.has_key? :base_dir
115
+ @base_dir = attribute_overrides['docdir'] = options[:base_dir]
116
+ else
117
+ attribute_overrides['docdir'] ||= Dir.pwd
118
+ @base_dir = attribute_overrides['docdir']
119
+ end
120
+
121
+ # restrict document from setting source-highlighter in SECURE safe mode
122
+ # it can only be set via the constructor
123
+ if @safe >= SafeMode::SECURE
124
+ attribute_overrides['source-highlighter'] ||= nil
125
+ end
126
+
127
+ attribute_overrides.each {|key, val|
128
+ # a nil or negative key undefines the attribute
129
+ if (val.nil? || key[-1..-1] == '!')
130
+ @attributes.delete(key.chomp '!')
131
+ # otherwise it's an attribute assignment
132
+ else
133
+ @attributes[key] = val
134
+ end
135
+ }
136
+
137
+ @attributes['backend'] ||= DEFAULT_BACKEND
138
+ update_backend_attributes
139
+
140
+ if nested?
141
+ # don't need to do the extra processing within our own document
142
+ @reader = Reader.new(data)
143
+ else
144
+ @reader = Reader.new(data, self, attribute_overrides, &block)
145
+ end
42
146
 
43
147
  # dynamic intrinstic attribute values
44
148
  @attributes['doctype'] ||= DEFAULT_DOCTYPE
149
+
45
150
  now = Time.new
46
151
  @attributes['localdate'] ||= now.strftime('%Y-%m-%d')
47
152
  @attributes['localtime'] ||= now.strftime('%H:%m:%S %Z')
48
153
  @attributes['localdatetime'] ||= [@attributes['localdate'], @attributes['localtime']].join(' ')
49
- @attributes['asciidoctor-version'] = VERSION
154
+
155
+ # docdate and doctime should default to localdate and localtime if not otherwise set
156
+ @attributes['docdate'] ||= @attributes['localdate']
157
+ @attributes['doctime'] ||= @attributes['localtime']
158
+
159
+ @attributes['iconsdir'] ||= File.join(@attributes.fetch('imagesdir', 'images'), 'icons')
50
160
 
51
- # Now parse @lines into elements
52
- while @reader.has_lines?
53
- @reader.skip_blank
161
+ # Now parse the lines in the reader into blocks
162
+ Lexer.parse(@reader, self)
163
+ # or we could make it...
164
+ #self << *Lexer.parse(@reader, self)
54
165
 
55
- @elements << Lexer.next_block(@reader, self) if @reader.has_lines?
56
- end
166
+ @callouts.rewind
57
167
 
58
- Asciidoctor.debug "Found #{@elements.size} elements in this document:"
59
- @elements.each do |el|
60
- Asciidoctor.debug el
61
- end
168
+ Asciidoctor.debug {
169
+ msg = []
170
+ msg << "Found #{@blocks.size} blocks in this document:"
171
+ @blocks.each {|b|
172
+ msg << b
173
+ }
174
+ msg * "\n"
175
+ }
176
+ end
62
177
 
63
- # split off the level 0 section, if present
64
- root = @elements.first
65
- if root.is_a?(Section) && root.level == 0
66
- @header = @elements.shift
67
- @elements = @header.blocks
68
- @header.clear_blocks
178
+ def register(type, value)
179
+ if type == :ids
180
+ if value.is_a?(Array)
181
+ @references[:ids][value[0]] = (value[1] || '[' + value[0] + ']')
182
+ else
183
+ @references[:ids][value] = '[' + value + ']'
184
+ end
185
+ elsif @options[:catalog_assets]
186
+ @references[type] << value
69
187
  end
188
+ end
70
189
 
190
+ def nested?
191
+ !@parent_document.nil?
71
192
  end
72
193
 
73
194
  # Make the raw source for the Document available.
@@ -75,18 +196,8 @@ class Asciidoctor::Document
75
196
  @reader.source if @reader
76
197
  end
77
198
 
78
- def attr(name, default = nil)
79
- default.nil? ? @attributes[name.to_s] : @attributes.fetch(name.to_s, default)
80
- #default.nil? ? @attributes[name.to_s.tr('_', '-')] : @attributes.fetch(name.to_s.tr('_', '-'), default)
81
- end
82
-
83
- def attr?(name)
84
- @attributes.has_key? name.to_s
85
- #@attributes.has_key? name.to_s.tr('_', '-')
86
- end
87
-
88
- def level
89
- 0
199
+ def doctype
200
+ @attributes['doctype']
90
201
  end
91
202
 
92
203
  # The title explicitly defined in the document attributes
@@ -94,18 +205,20 @@ class Asciidoctor::Document
94
205
  @attributes['title']
95
206
  end
96
207
 
208
+ def title=(title)
209
+ @header = Section.new self
210
+ @header.title = title
211
+ end
212
+
97
213
  # We need to be able to return some semblance of a title
98
214
  def doctitle
99
- # cached value
100
- return @doctitle if @doctitle
101
-
102
- if @header
103
- @doctitle = @header.title
104
- elsif @elements.first
105
- @doctitle = @elements.first.title
215
+ if !(title = @attributes.fetch('title', '')).empty?
216
+ title
217
+ elsif !(sect = first_section).nil? && sect.title?
218
+ sect.title
219
+ else
220
+ nil
106
221
  end
107
-
108
- @doctitle
109
222
  end
110
223
  alias :name :doctitle
111
224
 
@@ -113,33 +226,71 @@ class Asciidoctor::Document
113
226
  @attributes.has_key? 'notitle'
114
227
  end
115
228
 
116
- def splain
117
- if @header
118
- Asciidoctor.debug "Header is #{@header}"
229
+ def noheader
230
+ @attributes.has_key? 'noheader'
231
+ end
232
+
233
+ # QUESTION move to AbstractBlock?
234
+ def first_section
235
+ has_header? ? @header : (@blocks || []).detect{|e| e.is_a? Section}
236
+ end
237
+
238
+ def has_header?
239
+ !@header.nil?
240
+ end
241
+
242
+ # Public: Update the backend attributes to reflect a change in the selected backend
243
+ def update_backend_attributes()
244
+ backend = @attributes['backend']
245
+ basebackend = backend.sub(/[[:digit:]]+$/, '')
246
+ page_width = DEFAULT_PAGE_WIDTHS[basebackend]
247
+ if page_width
248
+ @attributes['pagewidth'] = page_width
119
249
  else
120
- Asciidoctor.debug "No header"
250
+ @attributes.delete('pagewidth')
121
251
  end
252
+ @attributes['backend-' + backend] = 1
253
+ @attributes['basebackend'] = basebackend
254
+ @attributes['basebackend-' + basebackend] = 1
255
+ end
122
256
 
123
- Asciidoctor.debug "I have #{@elements.count} elements"
124
- @elements.each_with_index do |block, i|
125
- Asciidoctor.debug "v" * 60
126
- Asciidoctor.debug "Block ##{i} is a #{block.class}"
127
- Asciidoctor.debug "Name is #{block.name rescue 'n/a'}"
128
- block.splain(0) if block.respond_to? :splain
129
- Asciidoctor.debug "^" * 60
130
- end
257
+ def splain
258
+ Asciidoctor.debug {
259
+ msg = ''
260
+ if @header
261
+ msg = "Header is #{@header}"
262
+ else
263
+ msg = "No header"
264
+ end
265
+
266
+ msg += "I have #{@blocks.count} blocks"
267
+ @blocks.each_with_index do |block, i|
268
+ msg += "v" * 60
269
+ msg += "Block ##{i} is a #{block.class}"
270
+ msg += "Name is #{block.title rescue 'n/a'}"
271
+ block.splain(0) if block.respond_to? :splain
272
+ msg += "^" * 60
273
+ end
274
+ }
131
275
  nil
132
276
  end
133
277
 
134
- def renderer(options = {})
278
+ def renderer(opts = {})
135
279
  return @renderer if @renderer
280
+
136
281
  render_options = {}
282
+
137
283
  # Load up relevant Document @options
138
284
  if @options[:template_dir]
139
285
  render_options[:template_dir] = @options[:template_dir]
140
286
  end
287
+
288
+ render_options[:backend] = @attributes.fetch('backend', 'html5')
289
+ render_options[:eruby] = @options.fetch(:eruby, 'erb')
290
+ render_options[:compact] = @options.fetch(:compact, false)
291
+
141
292
  # Override Document @option settings with options passed in
142
- render_options.merge! options
293
+ render_options.merge! opts
143
294
 
144
295
  @renderer = Renderer.new(render_options)
145
296
  end
@@ -148,18 +299,19 @@ class Asciidoctor::Document
148
299
  # loaded by Renderer. If a :template_dir is not specified,
149
300
  # or a template is missing, the renderer will fall back to
150
301
  # using the appropriate built-in template.
151
- def render(options = {})
152
- r = renderer(options)
153
- @options.merge(options)[:header_footer] ? r.render('document', self) : content
302
+ def render(opts = {})
303
+ r = renderer(opts)
304
+ @options.merge(opts)[:header_footer] ? r.render('document', self) : r.render('embedded', self)
154
305
  end
155
306
 
156
307
  def content
157
- html_pieces = []
158
- @elements.each do |element|
159
- Asciidoctor::debug "Rendering element: #{element}"
160
- html_pieces << element.render
161
- end
162
- html_pieces.join
308
+ # per AsciiDoc-spec, remove the title after rendering the header
309
+ @attributes.delete('title')
310
+ @blocks.map {|b| b.render }.join
311
+ end
312
+
313
+ def to_s
314
+ %[#{super.to_s} - #{doctitle}]
163
315
  end
164
316
 
165
317
  end
@@ -0,0 +1,29 @@
1
+ # Public: Methods for managing inline elements in AsciiDoc block
2
+ class Asciidoctor::Inline < Asciidoctor::AbstractNode
3
+ # Public: Get the text of this inline element
4
+ attr_reader :text
5
+
6
+ # Public: Get the type (qualifier) of this inline element
7
+ attr_reader :type
8
+
9
+ # Public: Get/Set the target (e.g., uri) of this inline element
10
+ attr_accessor :target
11
+
12
+ def initialize(parent, context, text = nil, opts = {})
13
+ super(parent, context)
14
+
15
+ @text = text
16
+ @id = opts[:id] if opts.has_key?(:id)
17
+ @type = opts[:type] if opts.has_key?(:type)
18
+ @target = opts[:target] if opts.has_key?(:target)
19
+
20
+ if opts.has_key?(:attributes) && (attributes = opts[:attributes]).is_a?(Hash)
21
+ update_attributes(opts[:attributes]) unless attributes.empty?
22
+ end
23
+ end
24
+
25
+ def render
26
+ renderer.render("inline_#{@context}", self).chomp
27
+ end
28
+
29
+ end
@@ -1,199 +1,380 @@
1
- # Public: Methods to parse and build objects from Asciidoc lines
1
+ # Public: Methods to parse lines of AsciiDoc into an object hierarchy
2
+ # representing the structure of the document. All methods are class methods and
3
+ # should be invoked from the Lexer class. The main entry point is ::next_block.
4
+ # No Lexer instances shall be discovered running around. (Any attempt to
5
+ # instantiate a Lexer will be futile).
6
+ #
7
+ # The object hierarchy created by the Lexer consists of zero or more Section
8
+ # and Block objects. Section objects may be nested and a Section object
9
+ # contains zero or more Block objects. Block objects may be nested, but may
10
+ # only contain other Block objects. Block objects which represent lists may
11
+ # contain zero or more ListItem objects.
12
+ #
13
+ # Examples
14
+ #
15
+ # # Create a Reader for the AsciiDoc lines and retrieve the next block from it.
16
+ # # Lexer::next_block requires a parent, so we begin by instantiating an empty Document.
17
+ #
18
+ # doc = Document.new
19
+ # reader = Reader.new lines
20
+ # block = Lexer.next_block(reader, doc)
21
+ # block.class
22
+ # # => Asciidoctor::Block
2
23
  class Asciidoctor::Lexer
3
24
 
4
25
  include Asciidoctor
5
26
 
6
27
  # Public: Make sure the Lexer object doesn't get initialized.
28
+ #
29
+ # Raises RuntimeError if this constructor is invoked.
7
30
  def initialize
8
31
  raise 'Au contraire, mon frere. No lexer instances will be running around.'
9
32
  end
10
33
 
11
- def self.document_from_parent(parent)
12
- if parent.is_a? Document
13
- parent
34
+ # Public: Parses AsciiDoc source read from the Reader into the Document
35
+ #
36
+ # This method is the main entry-point into the Lexer when parsing a full document.
37
+ # It first looks for and, if found, processes the document title. It then
38
+ # proceeds to iterate through the lines in the Reader, parsing the document
39
+ # into nested Sections and Blocks.
40
+ #
41
+ # reader - the Reader holding the source lines of the document
42
+ # document - the empty Document into which the lines will be parsed
43
+ #
44
+ # returns the Document object
45
+ def self.parse(reader, document)
46
+ # process and plow away any attribute lines that proceed the first block so
47
+ # we can get at the document title, if present, then begin parsing blocks
48
+ attributes = parse_block_metadata_lines(reader, document)
49
+
50
+ # by processing the header here, we enforce its position at head of the document
51
+ next_level = is_next_line_section? reader
52
+ if next_level == 0
53
+ title_info = parse_section_title(reader)
54
+ document.title = title_info[1]
55
+ parse_header_metadata(reader, document)
56
+ end
57
+
58
+ while reader.has_lines?
59
+ new_section, attributes = next_section(reader, document, attributes)
60
+ document << new_section unless new_section.nil?
61
+ end
62
+
63
+ document
64
+ end
65
+
66
+ # Public: Return the next section from the Reader.
67
+ #
68
+ # This method process block metadata, content and subsections for this
69
+ # section and returns the Section object and any orphaned attributes.
70
+ #
71
+ # If the parent is a Document and has a header (document title), then
72
+ # this method will put any non-section blocks at the start of document
73
+ # into a preamble Block. If there are no such blocks, the preamble is
74
+ # dropped.
75
+ #
76
+ # Since we are reading line-by-line, there's a chance that metadata
77
+ # that should be associated with the following block gets consumed.
78
+ # To deal with this case, the method returns a running Hash of
79
+ # "orphaned" attributes that get passed to the next Section or Block.
80
+ #
81
+ # reader - the source Reader
82
+ # parent - the parent Section or Document of this new section
83
+ # attributes - a Hash of metadata that was left orphaned from the
84
+ # previous Section.
85
+ #
86
+ # Examples
87
+ #
88
+ # source
89
+ # # => "Greetings\n---------\nThis is my doc.\n\nSalutations\n-----------\nIt is awesome."
90
+ #
91
+ # reader = Reader.new source.lines.entries
92
+ # # create empty document to parent the section
93
+ # # and hold attributes extracted from header
94
+ # doc = Document.new
95
+ #
96
+ # Lexer.next_section(reader, doc).first.title
97
+ # # => "Greetings"
98
+ #
99
+ # Lexer.next_section(reader, doc).first.title
100
+ # # => "Salutations"
101
+ #
102
+ # returns a two-element Array containing the Section and Hash of orphaned attributes
103
+ def self.next_section(reader, parent, attributes = {})
104
+ preamble = false
105
+
106
+ # check if we are at the start of processing the document
107
+ # NOTE we could drop a hint in the attributes to indicate
108
+ # that we are at a section title (so we don't have to check)
109
+ if parent.is_a?(Document) && parent.blocks.empty? &&
110
+ (parent.has_header? || !is_next_line_section?(reader))
111
+
112
+ if parent.has_header?
113
+ preamble = Block.new(parent, :preamble)
114
+ parent << preamble
115
+ end
116
+ section = parent
117
+
118
+ current_level = 0
119
+ if parent.attributes.has_key? 'fragment'
120
+ expected_next_levels = nil
121
+ # small tweak to allow subsequent level-0 sections for book doctype
122
+ elsif parent.doctype == 'book'
123
+ expected_next_levels = [0, 1]
124
+ else
125
+ expected_next_levels = [1]
126
+ end
14
127
  else
15
- parent.document
128
+ section = initialize_section(reader, parent, attributes)
129
+ # clear attributes, except for title which carries over
130
+ # section title to next block of content
131
+ attributes = attributes.delete_if {|k, v| k != 'title'}
132
+ current_level = section.level
133
+ expected_next_levels = [current_level + 1]
16
134
  end
135
+
136
+ reader.skip_blank_lines
137
+
138
+ # Parse lines belonging to this section and its subsections until we
139
+ # reach the end of this section level
140
+ #
141
+ # 1. first look for metadata thingies (anchor, attribute list, block title line, etc)
142
+ # 2. then look for a section, recurse if found
143
+ # 3. then process blocks
144
+ #
145
+ # We have to parse all the metadata lines before continuing with the loop,
146
+ # otherwise subsequent metadata lines get interpreted as block content
147
+ while reader.has_lines?
148
+ parse_block_metadata_lines(reader, section, attributes)
149
+
150
+ next_level = is_next_line_section?(reader)
151
+ if next_level
152
+ doctype = parent.document.doctype
153
+ if next_level == 0 && doctype != 'book'
154
+ puts "asciidoctor: ERROR: only book doctypes can contain level 0 sections"
155
+ end
156
+ if next_level > current_level || (section.is_a?(Document) && next_level == 0)
157
+ unless expected_next_levels.nil? || expected_next_levels.include?(next_level)
158
+ puts "asciidoctor: WARNING: section title out of sequence: " +
159
+ "expected #{expected_next_levels.size > 1 ? 'levels' : 'level'} #{expected_next_levels * ' or '}, " +
160
+ "got level #{next_level}"
161
+ end
162
+ # the attributes returned are those that are orphaned
163
+ new_section, attributes = next_section(reader, section, attributes)
164
+ section << new_section
165
+ else
166
+ # close this section (and break out of the nesting) to begin a new one
167
+ break
168
+ end
169
+ else
170
+ # just take one block or else we run the risk of overrunning section boundaries
171
+ new_block = next_block(reader, section, attributes, :parse_metadata => false)
172
+ if !new_block.nil?
173
+ (preamble || section) << new_block
174
+ attributes = {}
175
+ else
176
+ # don't clear attributes if we don't find a block because they may
177
+ # be trailing attributes that didn't get associated with a block
178
+ end
179
+ end
180
+
181
+ reader.skip_blank_lines
182
+ end
183
+
184
+ # prune the preamble if it has no content
185
+ if preamble && preamble.blocks.empty?
186
+ section.delete_at(0)
187
+ end
188
+
189
+ # The attributes returned here are orphaned attributes that fall at the end
190
+ # of a section that need to get transfered to the next section
191
+ # see "trailing block attributes transfer to the following section" in
192
+ # test/attributes_test.rb for an example
193
+ [section != parent ? section : nil, attributes.dup]
17
194
  end
18
195
 
19
- # Return the next block from the Reader.
196
+ # Public: Return the next Section or Block object from the Reader.
197
+ #
198
+ # Begins by skipping over blank lines to find the start of the next Section
199
+ # or Block. Processes each line of the reader in sequence until a Section or
200
+ # Block is found or the reader has no more lines.
20
201
  #
21
- # * Skip over blank lines to find the start of the next content block.
22
- # * Use defined regular expressions to determine the type of content block.
23
- # * Based on the type of content block, grab lines to the end of the block.
24
- # * Return a new Asciidoctor::Block or Asciidoctor::Section instance with the
25
- # content set to the grabbed lines.
26
- def self.next_block(reader, parent = self)
202
+ # Uses regular expressions from the Asciidoctor module to match Section
203
+ # and Block delimiters. The ensuing lines are then processed according
204
+ # to the type of content.
205
+ #
206
+ # reader - The Reader from which to retrieve the next block
207
+ # parent - The Document, Section or Block to which the next block belongs
208
+ #
209
+ # Returns a Section or Block object holding the parsed content of the processed lines
210
+ def self.next_block(reader, parent, attributes = {}, options = {})
27
211
  # Skip ahead to the block content
28
- reader.skip_blank
212
+ skipped = reader.skip_blank
29
213
 
214
+ # bail if we've reached the end of the section content
30
215
  return nil unless reader.has_lines?
31
- context = parent.is_a?(Block) ? parent.context : nil
32
-
33
- # NOTE: An anchor looks like this:
34
- # [[foo]]
35
- # with the inside [foo] (including brackets) as match[1]
36
- if match = reader.peek_line.match(REGEXP[:anchor])
37
- Asciidoctor.debug "Found an anchor in line:\n\t#{reader.peek_line}"
38
- # NOTE: This expression conditionally strips off the brackets from
39
- # [foo], though REGEXP[:anchor] won't actually match without
40
- # match[1] being bracketed, so the condition isn't necessary.
41
- anchor = match[1].match(/^\[(.*)\]/) ? $1 : match[1]
42
- # NOTE: Set @references['foo'] = '[foo]'
43
- document_from_parent(parent).references[anchor] = match[1]
44
- reader.get_line
45
- else
46
- anchor = nil
47
- end
48
216
 
49
- # skip a list continuation character if we're processing a list
50
- if LIST_CONTEXTS.include?(context)
51
- reader.skip_list_continuation
217
+ if options[:text] && skipped > 0
218
+ options.delete(:text)
52
219
  end
53
220
 
54
- Asciidoctor.debug "/"*64
55
- Asciidoctor.debug "#{File.basename(__FILE__)}:#{__LINE__} -> #{__method__} - First two lines are:"
56
- Asciidoctor.debug reader.peek_line
57
- tmp_line = reader.get_line
58
- Asciidoctor.debug reader.peek_line
59
- reader.unshift tmp_line
60
- Asciidoctor.debug "/"*64
221
+ Asciidoctor.debug {
222
+ msg = []
223
+ msg << '/' * 64
224
+ msg << 'next_block() - First two lines are:'
225
+ msg << reader.peek_line
226
+ tmp_line = reader.get_line
227
+ msg << reader.peek_line
228
+ reader.unshift tmp_line
229
+ msg << '/' * 64
230
+ msg * "\n"
231
+ }
232
+
233
+ parse_metadata = options[:parse_metadata] || true
234
+ parse_sections = options[:parse_sections] || false
61
235
 
236
+ document = parent.document
237
+ context = parent.is_a?(Block) ? parent.context : nil
62
238
  block = nil
63
239
  title = nil
64
240
  caption = nil
65
- buffer = []
66
- attributes = {}
67
- context = parent.is_a?(Block) ? parent.context : nil
68
- while reader.has_lines? && block.nil?
69
- buffer.clear
70
- this_line = reader.get_line
71
- next_line = reader.peek_line || ''
72
-
73
- if this_line.match(REGEXP[:comment_blk])
74
- Reader.new(reader.grab_lines_until {|line| line.match( REGEXP[:comment_blk] ) })
75
241
 
76
- elsif this_line.match(REGEXP[:comment])
77
- reader.skip_blank
78
-
79
- elsif match = this_line.match(REGEXP[:attr_list_blk])
80
- collect_attributes(match[1], attributes)
81
- reader.skip_blank
242
+ while reader.has_lines? && block.nil?
243
+ if parse_metadata && parse_block_metadata_line(reader, document, attributes, options)
244
+ reader.next_line
245
+ next
246
+ elsif parse_sections && context.nil? && is_next_line_section?(reader)
247
+ block, attributes = next_section(reader, parent, attributes)
248
+ break
249
+ end
82
250
 
83
- elsif is_section_heading?(this_line, next_line)
84
- # If we've come to a new section, then we've found the end of this
85
- # current block. Likewise if we'd found an unassigned anchor, push
86
- #
87
- # FIXME when slurping up next section, give back trailing anchor to following section
88
- reader.unshift(this_line)
89
- Asciidoctor.debug "#{__method__}: SENDING to next_section with lines[0] = #{reader.peek_line}"
90
- block = next_section(reader, parent)
251
+ this_line = reader.get_line
91
252
 
92
- elsif match = this_line.match(REGEXP[:title])
93
- title = match[1]
253
+ delimited_blk = delimited_block? this_line
254
+
255
+ # NOTE I've haven't decided whether I want this check here or in
256
+ # parse_block_metadata (where it is currently)
257
+ #if this_line.match(REGEXP[:comment_blk])
258
+ # reader.grab_lines_until {|line| line.match( REGEXP[:comment_blk] ) }
259
+ # reader.skip_blank
260
+ # # NOTE we should break here because we have found a block, it
261
+ # # just happens to be nil...if we keep going we potentially overrun
262
+ # # a section heading which is not processed in this anymore
263
+ # break
264
+
265
+ # NOTE we're letting ruler have attributes
266
+ if !options[:text] && this_line.match(REGEXP[:ruler])
267
+ block = Block.new(parent, :ruler)
94
268
  reader.skip_blank
95
269
 
96
- elsif match = this_line.match(REGEXP[:image_blk])
97
- collect_attributes(match[2], attributes, ['alt', 'width', 'height'])
270
+ elsif !options[:text] && (match = this_line.match(REGEXP[:image_blk]))
98
271
  block = Block.new(parent, :image)
99
- # FIXME this seems kind of one-off here
272
+ AttributeList.new(document.sub_attributes(match[2])).parse_into(attributes, ['alt', 'width', 'height'])
100
273
  target = block.sub_attributes(match[1])
101
- attributes['target'] = target
102
- attributes['alt'] ||= File.basename(target, File.extname(target))
274
+ if !target.to_s.empty?
275
+ attributes['target'] = target
276
+ document.register(:images, target)
277
+ attributes['alt'] ||= File.basename(target, File.extname(target))
278
+ else
279
+ # drop the line if target resolves to nothing
280
+ block = nil
281
+ end
103
282
  reader.skip_blank
104
283
 
105
- elsif this_line.match(REGEXP[:oblock])
106
- # oblock is surrounded by '--' lines and has zero or more blocks inside
107
- buffer = Reader.new(reader.grab_lines_until { |line| line.match(REGEXP[:oblock]) })
284
+ elsif delimited_blk && (match = this_line.match(REGEXP[:open_blk]))
285
+ # an open block is surrounded by '--' lines and has zero or more blocks inside
286
+ terminator = match[0]
287
+ buffer = Reader.new reader.grab_lines_until(:terminator => terminator)
108
288
 
109
289
  # Strip lines off end of block - not implemented yet
110
290
  # while buffer.has_lines? && buffer.last.strip.empty?
111
291
  # buffer.pop
112
292
  # end
113
293
 
114
- block = Block.new(parent, :oblock, [])
294
+ block = Block.new(parent, :open)
115
295
  while buffer.has_lines?
116
296
  new_block = next_block(buffer, block)
117
297
  block.blocks << new_block unless new_block.nil?
118
298
  end
119
299
 
120
300
  # needs to come before list detection
121
- elsif this_line.match(REGEXP[:sidebar_blk])
301
+ elsif delimited_blk && (match = this_line.match(REGEXP[:sidebar_blk]))
122
302
  # sidebar is surrounded by '****' (4 or more '*' chars) lines
303
+ terminator = match[0]
123
304
  # FIXME violates DRY because it's a duplication of quote parsing
124
305
  block = Block.new(parent, :sidebar)
125
- buffer = Reader.new(reader.grab_lines_until {|line| line.match( REGEXP[:sidebar_blk] ) })
306
+ buffer = Reader.new reader.grab_lines_until(:terminator => terminator)
126
307
 
127
308
  while buffer.has_lines?
128
309
  new_block = next_block(buffer, block)
129
310
  block.blocks << new_block unless new_block.nil?
130
311
  end
131
312
 
132
- elsif list_type = [:olist, :colist].detect{|l| this_line.match( REGEXP[l] )}
313
+ elsif match = this_line.match(REGEXP[:colist])
314
+ block = Block.new(parent, :colist)
315
+ attributes['style'] = 'arabic'
133
316
  items = []
134
- Asciidoctor.debug "Creating block of type: #{list_type}"
135
- block = Block.new(parent, list_type)
136
- attributes['style'] ||= 'arabic'
137
- while !this_line.nil? && match = this_line.match(REGEXP[list_type])
138
- item = ListItem.new(block)
139
-
140
- reader.unshift match[2].lstrip.sub(/^\./, '\.')
141
- item_segment = Reader.new(list_item_segment(reader, :alt_ending => REGEXP[list_type]))
142
- while item_segment.has_lines?
143
- new_block = next_block(item_segment, block)
144
- item.blocks << new_block unless new_block.nil?
317
+ block.buffer = items
318
+ reader.unshift this_line
319
+ expected_index = 1
320
+ begin
321
+ # might want to move this check to a validate method
322
+ if match[1].to_i != expected_index
323
+ puts "asciidoctor: WARNING: callout list item index: expected #{expected_index} got #{match[1]}"
145
324
  end
325
+ list_item = next_list_item(reader, block, match)
326
+ expected_index += 1
327
+ if !list_item.nil?
328
+ items << list_item
329
+ coids = document.callouts.callout_ids(items.size)
330
+ if !coids.empty?
331
+ list_item.attributes['coids'] = coids
332
+ else
333
+ puts 'asciidoctor: WARNING: no callouts refer to list item ' + items.size.to_s
334
+ end
335
+ end
336
+ end while reader.has_lines? && match = reader.peek_line.match(REGEXP[:colist])
146
337
 
147
- item.fold_first
148
-
149
- items << item
150
-
151
- reader.skip_blank
152
-
153
- this_line = reader.get_line
154
- end
155
- reader.unshift(this_line) unless this_line.nil?
156
-
157
- block.buffer = items
338
+ document.callouts.next_list
158
339
 
159
340
  elsif match = this_line.match(REGEXP[:ulist])
341
+ AttributeList.rekey(attributes, ['style'])
160
342
  reader.unshift(this_line)
161
- block = build_ulist(reader, parent)
162
-
163
- elsif match = this_line.match(REGEXP[:dlist])
164
- # TODO build_dlist method?
165
- pairs = []
166
- block = Block.new(parent, :dlist)
167
- # allows us to capture until we find a labeled item using the same delimiter (::, :::, :::: or ;;)
168
- sibling_matcher = REGEXP[:dlist_siblings][match[3]]
343
+ block = next_outline_list(reader, :ulist, parent)
169
344
 
170
- begin
171
- dt = ListItem.new(block, match[2])
172
- dt.anchor = match[1] unless match[1].nil?
173
- dd = ListItem.new(block, match[5])
174
-
175
- dd_segment = Reader.new(list_item_segment(reader, :alt_ending => sibling_matcher))
176
- while dd_segment.has_lines?
177
- new_block = next_block(dd_segment, block)
178
- dd.blocks << new_block unless new_block.nil?
345
+ elsif match = this_line.match(REGEXP[:olist])
346
+ AttributeList.rekey(attributes, ['style'])
347
+ reader.unshift(this_line)
348
+ block = next_outline_list(reader, :olist, parent)
349
+ # QUESTION move this logic to next_outline_list?
350
+ if !(attributes.has_key? 'style') && !(block.attributes.has_key? 'style')
351
+ marker = block.buffer.first.marker
352
+ if marker.start_with? '.'
353
+ # first one makes more sense, but second on is AsciiDoc-compliant
354
+ #attributes['style'] = (ORDERED_LIST_STYLES[block.level - 1] || ORDERED_LIST_STYLES.first).to_s
355
+ attributes['style'] = (ORDERED_LIST_STYLES[marker.length - 1] || ORDERED_LIST_STYLES.first).to_s
356
+ else
357
+ style = ORDERED_LIST_STYLES.detect{|s| marker.match(ORDERED_LIST_MARKER_PATTERNS[s]) }
358
+ attributes['style'] = (style || ORDERED_LIST_STYLES.first).to_s
179
359
  end
360
+ end
180
361
 
181
- dd.fold_first
182
-
183
- pairs << [dt, dd]
184
-
185
- # this skip_blank might be redundant
186
- reader.skip_blank
187
- this_line = reader.get_line
188
- end while !this_line.nil? && match = this_line.match(sibling_matcher)
189
-
190
- reader.unshift(this_line) unless this_line.nil?
191
- block.buffer = pairs
362
+ elsif match = this_line.match(REGEXP[:dlist])
363
+ reader.unshift this_line
364
+ block = next_labeled_list(reader, match, parent)
365
+
366
+ elsif delimited_blk && (match = this_line.match(document.nested? ? REGEXP[:table_nested] : REGEXP[:table]))
367
+ # table is surrounded by lines starting with a | followed by 3 or more '=' chars
368
+ terminator = match[0]
369
+ AttributeList.rekey(attributes, ['style'])
370
+ table_reader = Reader.new reader.grab_lines_until(:terminator => terminator, :skip_line_comments => true)
371
+ block = next_table(table_reader, parent, attributes)
192
372
 
193
373
  # FIXME violates DRY because it's a duplication of other block parsing
194
- elsif this_line.match(REGEXP[:example])
374
+ elsif delimited_blk && (match = this_line.match(REGEXP[:example]))
195
375
  # example is surrounded by lines with 4 or more '=' chars
196
- rekey_positional_attributes(attributes, ['style'])
376
+ terminator = match[0]
377
+ AttributeList.rekey(attributes, ['style'])
197
378
  if admonition_style = ADMONITION_STYLES.detect {|s| attributes['style'] == s}
198
379
  block = Block.new(parent, :admonition)
199
380
  attributes['name'] = admonition_style.downcase
@@ -201,7 +382,7 @@ class Asciidoctor::Lexer
201
382
  else
202
383
  block = Block.new(parent, :example)
203
384
  end
204
- buffer = Reader.new(reader.grab_lines_until {|line| line.match( REGEXP[:example] ) })
385
+ buffer = Reader.new reader.grab_lines_until(:terminator => terminator)
205
386
 
206
387
  while buffer.has_lines?
207
388
  new_block = next_block(buffer, block)
@@ -209,36 +390,45 @@ class Asciidoctor::Lexer
209
390
  end
210
391
 
211
392
  # FIXME violates DRY w/ non-delimited block listing
212
- elsif this_line.match(REGEXP[:listing])
213
- rekey_positional_attributes(attributes, ['style', 'language', 'linenums'])
214
- buffer = reader.grab_lines_until {|line| line.match( REGEXP[:listing] )}
393
+ elsif delimited_blk && (match = this_line.match(REGEXP[:listing]))
394
+ terminator = match[0]
395
+ AttributeList.rekey(attributes, ['style', 'language', 'linenums'])
396
+ buffer = reader.grab_lines_until(:terminator => terminator)
215
397
  buffer.last.chomp! unless buffer.empty?
216
398
  block = Block.new(parent, :listing, buffer)
217
399
 
218
- elsif this_line.match(REGEXP[:quote])
400
+ elsif delimited_blk && (match = this_line.match(REGEXP[:quote]))
219
401
  # multi-line verse or quote is surrounded by a block delimiter
220
- rekey_positional_attributes(attributes, ['style', 'attribution', 'citetitle'])
402
+ terminator = match[0]
403
+ AttributeList.rekey(attributes, ['style', 'attribution', 'citetitle'])
221
404
  quote_context = (attributes['style'] == 'verse' ? :verse : :quote)
222
- buffer = Reader.new(reader.grab_lines_until {|line| line.match( REGEXP[:quote] ) })
405
+ block_reader = Reader.new reader.grab_lines_until(:terminator => terminator)
223
406
 
224
407
  # only quote can have other section elements (as as section block)
225
408
  section_body = (quote_context == :quote)
226
409
 
227
410
  if section_body
228
411
  block = Block.new(parent, quote_context)
229
- while buffer.has_lines?
230
- new_block = next_block(buffer, block)
412
+ while block_reader.has_lines?
413
+ new_block = next_block(block_reader, block)
231
414
  block.blocks << new_block unless new_block.nil?
232
415
  end
233
416
  else
234
- block = Block.new(parent, quote_context, buffer.lines)
417
+ block_reader.chomp_last!
418
+ block = Block.new(parent, quote_context, block_reader.lines)
235
419
  end
236
420
 
237
- elsif this_line.match(REGEXP[:lit_blk])
238
- # example is surrounded by '....' (4 or more '.' chars) lines
239
- buffer = reader.grab_lines_until {|line| line.match( REGEXP[:lit_blk] ) }
421
+ elsif delimited_blk && (blk_ctx = [:literal, :pass].detect{|t| this_line.match(REGEXP[t])})
422
+ # literal is surrounded by '....' (4 or more '.' chars) lines
423
+ # pass is surrounded by '++++' (4 or more '+' chars) lines
424
+ terminator = $~[0]
425
+ buffer = reader.grab_lines_until(:terminator => terminator)
240
426
  buffer.last.chomp! unless buffer.empty?
241
- block = Block.new(parent, :literal, buffer)
427
+ # a literal can masquerade as a listing
428
+ if attributes[1] == 'listing'
429
+ blk_ctx = :listing
430
+ end
431
+ block = Block.new(parent, blk_ctx, buffer)
242
432
 
243
433
  elsif this_line.match(REGEXP[:lit_par])
244
434
  # literal paragraph is contiguous lines starting with
@@ -246,343 +436,529 @@ class Asciidoctor::Lexer
246
436
 
247
437
  # So we need to actually include this one in the grab_lines group
248
438
  reader.unshift this_line
249
- buffer = reader.grab_lines_until(:preserve_last_line => true) {|line|
250
- (context == :dlist && line.match(REGEXP[:dlist])) || !line.match(REGEXP[:lit_par])
439
+ buffer = reader.grab_lines_until(:preserve_last_line => true, :break_on_blank_lines => true) {|line|
440
+ # labeled list terms can be indented, but a preceding blank indicates
441
+ # we are in a list continuation and therefore literals should be strictly literal
442
+ (context == :dlist && skipped == 0 && line.match(REGEXP[:dlist])) ||
443
+ delimited_block?(line)
251
444
  }
252
445
 
253
- # trim off the indentation that put us in this literal paragraph
254
- if !buffer.empty? && match = buffer.first.match(/^([[:blank:]]+)/)
255
- offset = match[1].length
256
- buffer = buffer.map {|l| l.slice(offset..-1)}
446
+ # trim off the indentation equivalent to the size of the least indented line
447
+ if !buffer.empty?
448
+ offset = buffer.map {|line| line.match(REGEXP[:leading_blanks])[1].length }.min
449
+ if offset > 0
450
+ buffer = buffer.map {|l| l.sub(/^\s{1,#{offset}}/, '') }
451
+ end
257
452
  buffer.last.chomp!
258
453
  end
259
454
 
260
455
  block = Block.new(parent, :literal, buffer)
456
+ # a literal gets special meaning inside of a definition list
457
+ if LIST_CONTEXTS.include?(context)
458
+ attributes['options'] ||= []
459
+ # TODO this feels hacky, better way to distinguish from explicit literal block?
460
+ attributes['options'] << 'listparagraph'
461
+ end
261
462
 
262
463
  ## these switches based on style need to come immediately before the else ##
263
464
 
264
- elsif attributes[0] == 'source'
265
- rekey_positional_attributes(attributes, ['style', 'language', 'linenums'])
465
+ elsif attributes[1] == 'source'
466
+ AttributeList.rekey(attributes, ['style', 'language', 'linenums'])
266
467
  reader.unshift(this_line)
267
468
  buffer = reader.grab_lines_until(:break_on_blank_lines => true)
268
469
  buffer.last.chomp! unless buffer.empty?
269
470
  block = Block.new(parent, :listing, buffer)
270
471
 
271
- elsif admonition_style = ADMONITION_STYLES.detect{|s| attributes[0] == s}
272
- # an admonition preceded by [*TYPE*] and lasts until a blank line
472
+ elsif admonition_style = ADMONITION_STYLES.detect{|s| attributes[1] == s}
473
+ # an admonition preceded by [<TYPE>] and lasts until a blank line
273
474
  reader.unshift(this_line)
274
475
  buffer = reader.grab_lines_until(:break_on_blank_lines => true)
476
+ buffer.last.chomp! unless buffer.empty?
275
477
  block = Block.new(parent, :admonition, buffer)
276
478
  attributes['style'] = admonition_style
277
479
  attributes['name'] = admonition_style.downcase
278
480
  attributes['caption'] ||= admonition_style.capitalize
279
481
 
280
- elsif quote_context = [:quote, :verse].detect{|s| attributes[0] == s.to_s}
482
+ elsif quote_context = [:quote, :verse].detect{|s| attributes[1] == s.to_s}
281
483
  # single-paragraph verse or quote is preceded by [verse] or [quote], respectively, and lasts until a blank line
282
- rekey_positional_attributes(attributes, ['style', 'attribution', 'citetitle'])
484
+ AttributeList.rekey(attributes, ['style', 'attribution', 'citetitle'])
283
485
  reader.unshift(this_line)
284
486
  buffer = reader.grab_lines_until(:break_on_blank_lines => true)
487
+ buffer.last.chomp! unless buffer.empty?
285
488
  block = Block.new(parent, quote_context, buffer)
286
489
 
287
- else
288
- # paragraph is contiguous nonblank/noncontinuation lines
490
+ else # paragraph, contiguous nonblank/noncontinuation lines
289
491
  reader.unshift this_line
290
- buffer = reader.grab_lines_until(:break_on_blank_lines => true, :preserve_last_line => true) {|line|
291
- (context == :dlist && line.match(REGEXP[:dlist])) ||
292
- ([:ulist, :olist, :dlist].include?(context) && line.chomp == LIST_CONTINUATION) ||
293
- line.match(REGEXP[:oblock])
492
+ buffer = reader.grab_lines_until(:break_on_blank_lines => true, :preserve_last_line => true, :skip_line_comments => true) {|line|
493
+ delimited_block?(line) || line.match(REGEXP[:attr_line]) ||
494
+ # next list item can be directly adjacent to paragraph of previous list item
495
+ context == :dlist && line.match(REGEXP[:dlist])
496
+ # not sure if there are any cases when we need this check for other list types
497
+ #LIST_CONTEXTS.include?(context) && line.match(REGEXP[context])
294
498
  }
295
499
 
296
- if LIST_CONTEXTS.include?(context)
297
- reader.skip_list_continuation
500
+ # NOTE we need this logic because the reader is processing line
501
+ # comments and that might leave us w/ an empty buffer
502
+ if buffer.empty?
503
+ reader.get_line
504
+ break
298
505
  end
299
506
 
300
- if !buffer.empty? && admonition = buffer.first.match(Regexp.new('^(' + ADMONITION_STYLES.join('|') + '):\s+'))
507
+ catalog_inline_anchors(buffer.join, document)
508
+
509
+ if !options[:text] && (admonition = buffer.first.match(Regexp.new('^(' + ADMONITION_STYLES.join('|') + '):\s+')))
301
510
  buffer[0] = admonition.post_match
302
511
  block = Block.new(parent, :admonition, buffer)
303
512
  attributes['style'] = admonition[1]
304
513
  attributes['name'] = admonition[1].downcase
305
514
  attributes['caption'] ||= admonition[1].capitalize
306
515
  else
307
- buffer.last.chomp! unless buffer.empty?
308
- Asciidoctor.debug "Proud parent #{parent} getting a new paragraph with buffer: #{buffer}"
516
+ buffer.last.chomp!
309
517
  block = Block.new(parent, :paragraph, buffer)
310
518
  end
311
519
  end
312
520
  end
313
521
 
314
- # when looking for nested content, a series of
315
- # line comments or a comment block could leave us
316
- # without a block
522
+ # when looking for nested content, one or more line comments, comment
523
+ # blocks or trailing attribute lists could leave us without a block,
524
+ # so handle accordingly
317
525
  if !block.nil?
318
- block.anchor ||= (anchor || attributes['id'])
319
- block.title ||= title
320
- block.caption ||= caption
526
+ block.id = attributes['id'] if attributes.has_key?('id')
527
+ block.title ||= (attributes['title'] || title)
528
+ block.caption ||= caption unless block.is_a?(Section)
529
+ # AsciiDoc always use [id] as the reftext in HTML output,
530
+ # but I'd like to do better in Asciidoctor
531
+ if block.id && block.title? && !attributes.has_key?('reftext')
532
+ document.register(:ids, [block.id, block.title])
533
+ end
321
534
  block.update_attributes(attributes)
535
+
536
+ if block.context == :listing || block.context == :literal
537
+ catalog_callouts(block.buffer.join, document)
538
+ end
322
539
  end
323
540
 
324
541
  block
325
542
  end
326
543
 
327
- # Private: Return the Array of lines constituting the next list item
328
- # segment, removing them from the 'lines' Array passed in.
329
- #
330
- # reader - the Reader instance from which to get input.
331
- # options - an optional Hash of processing options:
332
- # * :alt_ending may be used to specify a regular expression match
333
- # other than a blank line to signify the end of the segment.
334
- # * :list_types may be used to specify list item patterns to
335
- # include. May be either a single Symbol or an Array of Symbols.
336
- # * :list_level may be used to specify a mimimum list item level
337
- # to include. If this is specified, then break if we find a list
338
- # item of a lower level.
544
+ # Public: Determines whether this line is the start of any of the delimited blocks
339
545
  #
340
- # Returns the Array of lines forming the next segment.
341
- #
342
- # Examples
343
- #
344
- # reader = Asciidoctor::Reader.new(
345
- # ["First paragraph\n", "+\n", "Second paragraph\n", "--\n",
346
- # "Open block\n", "\n", "Can have blank lines\n", "--\n", "\n",
347
- # "In a different segment\n"])
546
+ # returns the match data if this line is the first line of a delimited block or nil if not
547
+ #--
548
+ # TODO could use the match value as a lookup for the block type so we don't have
549
+ # to do any subsequent regexp
550
+ def self.delimited_block?(line)
551
+ # naive match
552
+ #line.match(REGEXP[:any_blk])
553
+
554
+ # attempt at better performance
555
+ if line.length > 0
556
+ # NOTE accessing the first element before calling ord is first Ruby 1.8.7 compat
557
+ REGEXP[:any_blk_ord].include?(line[0..0][0].ord) ? line.match(REGEXP[:any_blk]) : nil
558
+ else
559
+ nil
560
+ end
561
+ end
562
+
563
+ # Internal: Parse and construct an outline list Block from the current position of the Reader
348
564
  #
349
- # list_item_segment(reader)
350
- # => ["First paragraph\n", "+\n", "Second paragraph\n", "--\n",
351
- # "Open block\n", "\n", "Can have blank lines\n", "--\n"]
565
+ # reader - The Reader from which to retrieve the outline list
566
+ # list_type - A Symbol representing the list type (:olist for ordered, :ulist for unordered)
567
+ # parent - The parent Block to which this outline list belongs
352
568
  #
353
- # reader.peek_line
354
- # => "In a different segment\n"
355
- def self.list_item_segment(reader, options={})
356
- alternate_ending = options[:alt_ending]
357
- list_types = Array(options[:list_types]) || [:ulist, :olist, :colist, :dlist]
358
- list_level = options[:list_level].to_i
359
-
360
- # We know we want to include :lit_par types, even if we have specified,
361
- # say, only :ulist type list entries.
362
- list_types << :lit_par unless list_types.include? :lit_par
363
- segment = []
364
-
365
- reader.skip_blank
366
-
367
- # Grab lines until the first blank line not inside an open block
368
- # or listing
369
- in_oblock = false
370
- in_listing = false
371
- while reader.has_lines?
372
- this_line = reader.get_line
373
- Asciidoctor.debug "-----> Processing: #{this_line}"
374
- in_oblock = !in_oblock if this_line.match(REGEXP[:oblock])
375
- in_listing = !in_listing if this_line.match(REGEXP[:listing])
376
- if !in_oblock && !in_listing
377
- if this_line.strip.empty?
378
- # TODO - FIX THIS BEFORE ANY MORE KITTENS DIE AUGGGHHH!!!
379
- next_nonblank = reader.instance_variable_get(:@lines).detect{|l| !l.strip.empty?}
380
-
381
- # If there are blank lines ahead, but there's at least one
382
- # more non-blank line that doesn't trigger an alternate_ending
383
- # for the block of lines, then vacuum up all the blank lines
384
- # into this segment and continue with the next non-blank line.
385
- if next_nonblank &&
386
- ( alternate_ending.nil? ||
387
- !next_nonblank.match(alternate_ending)
388
- ) && list_types.find { |list_type| next_nonblank.match(REGEXP[list_type]) }
389
-
390
- while reader.has_lines? and reader.peek_line.strip.empty?
391
- segment << this_line
392
- this_line = reader.get_line
393
- end
394
- else
569
+ # Returns the Block encapsulating the parsed outline (unordered or ordered) list
570
+ def self.next_outline_list(reader, list_type, parent)
571
+ list_block = Block.new(parent, list_type)
572
+ items = []
573
+ list_block.buffer = items
574
+ if parent.context == list_type
575
+ list_block.level = parent.level + 1
576
+ else
577
+ list_block.level = 1
578
+ end
579
+ Asciidoctor.debug { "Created #{list_type} block: #{list_block}" }
580
+
581
+ while reader.has_lines? && (match = reader.peek_line.match(REGEXP[list_type]))
582
+
583
+ marker = resolve_list_marker(list_type, match[1])
584
+
585
+ # if we are moving to the next item, and the marker is different
586
+ # determine if we are moving up or down in nesting
587
+ if items.size > 0 && marker != items.first.marker
588
+ # assume list is nested by default, but then check to see if we are
589
+ # popping out of a nested list by matching an ancestor's list marker
590
+ this_item_level = list_block.level + 1
591
+ p = parent
592
+ while p.context == list_type
593
+ if marker == p.buffer.first.marker
594
+ this_item_level = p.level
395
595
  break
396
596
  end
397
-
398
- # Have we come to a line matching an alternate_ending regexp?
399
- elsif alternate_ending && this_line.match(alternate_ending)
400
- reader.unshift this_line
401
- break
402
-
403
- # Do we have a minimum list_level, and have come to a list item
404
- # line with a lower level?
405
- elsif list_level &&
406
- list_types.find { |list_type| this_line.match(REGEXP[list_type]) } &&
407
- ($1.length < list_level)
408
- reader.unshift this_line
409
- break
597
+ p = p.parent
410
598
  end
599
+ else
600
+ this_item_level = list_block.level
601
+ end
411
602
 
412
- # From the Asciidoc user's guide:
413
- # Another list or a literal paragraph immediately following
414
- # a list item will be implicitly included in the list item
415
-
416
- # Thus, the list_level stuff may be wrong here.
603
+ if items.size == 0 || this_item_level == list_block.level
604
+ list_item = next_list_item(reader, list_block, match)
605
+ elsif this_item_level < list_block.level
606
+ # leave this block
607
+ break
608
+ elsif this_item_level > list_block.level
609
+ # If this next list level is down one from the
610
+ # current Block's, append it to content of the current list item
611
+ items.last.blocks << next_block(reader, list_block)
417
612
  end
418
613
 
419
- segment << this_line
614
+ items << list_item unless list_item.nil?
615
+ list_item = nil
616
+
617
+ reader.skip_blank
420
618
  end
421
619
 
422
- Asciidoctor.debug "*"*40
423
- Asciidoctor.debug "#{File.basename(__FILE__)}:#{__LINE__} -> #{__method__}: Returning this:"
424
- #Asciidoctor.debug segment.inspect
425
- Asciidoctor.debug "*"*10
426
- Asciidoctor.debug "Leaving #{__method__}: Top of reader queue is:"
427
- Asciidoctor.debug reader.peek_line
428
- Asciidoctor.debug "*"*40
429
- segment
620
+ list_block
430
621
  end
431
622
 
432
- # Private: Get the Integer ulist level based on the characters
433
- # in front of the list item text.
623
+ # Internal: Catalog any callouts found in the text, but don't process them
434
624
  #
435
- # line - the String line containing the list item
436
- def self.ulist_level(line)
437
- if m = line.strip.match(/^(- | \*{1,5})\s+/x)
438
- return m[1].length
439
- end
625
+ # text - The String of text in which to look for callouts
626
+ # document - The current document on which the callouts are stored
627
+ #
628
+ # Returns nothing
629
+ def self.catalog_callouts(text, document)
630
+ text.scan(REGEXP[:callout_scan]) {
631
+ # alias match for Ruby 1.8.7 compat
632
+ m = $~
633
+ next if m[0].start_with? '\\'
634
+ document.callouts.register(m[1])
635
+ }
440
636
  end
441
637
 
442
- def self.build_ulist_item(reader, block, match = nil)
443
- list_type = :ulist
444
- this_line = reader.get_line
445
- return nil unless this_line
446
-
447
- match ||= this_line.match(REGEXP[list_type])
448
- if match.nil?
449
- reader.unshift(this_line)
450
- return nil
451
- end
638
+ # Internal: Catalog any inline anchors found in the text, but don't process them
639
+ #
640
+ # text - The String text in which to look for inline anchors
641
+ # document - The current document on which the references are stored
642
+ #
643
+ # Returns nothing
644
+ def self.catalog_inline_anchors(text, document)
645
+ text.scan(REGEXP[:anchor_macro]) {
646
+ # alias match for Ruby 1.8.7 compat
647
+ m = $~
648
+ next if m[0].start_with? '\\'
649
+ id, reftext = m[1].split(',')
650
+ id.sub!(/^("|)(.*)\1$/, '\2')
651
+ if !reftext.nil?
652
+ reftext.sub!(/^("|)(.*)\1$/m, '\2')
653
+ end
654
+ document.register(:ids, [id, reftext])
655
+ }
656
+ nil
657
+ end
452
658
 
453
- level = match[1].length
659
+ # Internal: Parse and construct a labeled (e.g., definition) list Block from the current position of the Reader
660
+ #
661
+ # reader - The Reader from which to retrieve the labeled list
662
+ # match - The Regexp match for the head of the list
663
+ # parent - The parent Block to which this labeled list belongs
664
+ #
665
+ # Returns the Block encapsulating the parsed labeled list
666
+ def self.next_labeled_list(reader, match, parent)
667
+ pairs = []
668
+ block = Block.new(parent, :dlist)
669
+ block.buffer = pairs
670
+ # allows us to capture until we find a labeled item
671
+ # that uses the same delimiter (::, :::, :::: or ;;)
672
+ sibling_pattern = REGEXP[:dlist_siblings][match[2]]
673
+
674
+ begin
675
+ pairs << next_list_item(reader, block, match, sibling_pattern)
676
+ end while reader.has_lines? && match = reader.peek_line.match(sibling_pattern)
454
677
 
455
- list_item = ListItem.new(block)
456
- list_item.level = level
457
- Asciidoctor.debug "#{__FILE__}:#{__LINE__}: Created ListItem #{list_item} with match[2]: #{match[2]} and level: #{list_item.level}"
678
+ block
679
+ end
458
680
 
459
- # Restore first line of list item
460
- # Also prevent bullet list text starting with . from being treated as a paragraph
461
- # title or some other unseemly thing in list_item_segment. I think. (NOTE)
462
- reader.unshift match[2].lstrip.sub(/^\./, '\.')
681
+ # Internal: Parse and construct the next ListItem for the current bulleted
682
+ # (unordered or ordered) list Block, callout lists included, or the next
683
+ # term ListItem and definition ListItem pair for the labeled list Block.
684
+ #
685
+ # First collect and process all the lines that constitute the next list
686
+ # item for the parent list (according to its type). Next, parse those lines
687
+ # into blocks and associate them with the ListItem (in the case of a
688
+ # labeled list, the definition ListItem). Finally, fold the first block
689
+ # into the item's text attribute according to rules described in ListItem.
690
+ #
691
+ # reader - The Reader from which to retrieve the next list item
692
+ # list_block - The parent list Block of this ListItem. Also provides access to the list type.
693
+ # match - The match Array which contains the marker and text (first-line) of the ListItem
694
+ # sibling_trait - The list marker or the Regexp to match a sibling item
695
+ #
696
+ # Returns the next ListItem or ListItem pair (depending on the list type)
697
+ # for the parent list Block.
698
+ def self.next_list_item(reader, list_block, match, sibling_trait = nil)
699
+ list_type = list_block.context
700
+
701
+ if list_type == :dlist
702
+ list_term = ListItem.new(list_block, match[1])
703
+ list_item = ListItem.new(list_block, match[3])
704
+ has_text = !match[3].to_s.empty?
705
+ else
706
+ # Create list item using first line as the text of the list item
707
+ list_item = ListItem.new(list_block, match[2])
463
708
 
464
- item_segment = Reader.new(list_item_segment(reader, :alt_ending => REGEXP[list_type]))
465
- # item_segment = list_item_segment(reader)
466
- while item_segment.has_lines?
467
- new_block = next_block(item_segment, block)
468
- list_item.blocks << new_block unless new_block.nil?
709
+ if !sibling_trait
710
+ sibling_trait = resolve_list_marker(list_type, match[1], list_block.buffer.size, true)
711
+ end
712
+ list_item.marker = sibling_trait
713
+ has_text = true
469
714
  end
470
715
 
471
- Asciidoctor.debug "\n\nlist_item has #{list_item.blocks.count} blocks, and first is a #{list_item.blocks.first.class} with context #{list_item.blocks.first.context rescue 'n/a'}\n\n"
716
+ # first skip the line with the marker / term
717
+ reader.get_line
718
+ list_item_reader = Reader.new grab_lines_for_list_item(reader, list_type, sibling_trait, has_text)
719
+ if list_item_reader.has_lines?
720
+ comment_lines = list_item_reader.consume_line_comments
721
+ subsequent_line = list_item_reader.peek_line
722
+ list_item_reader.unshift(*comment_lines) unless comment_lines.empty?
723
+
724
+ if !subsequent_line.nil?
725
+ continuation_connects_first_block = (subsequent_line == "\n")
726
+ content_adjacent = !subsequent_line.strip.empty?
727
+ else
728
+ continuation_connects_first_block = false
729
+ content_adjacent = false
730
+ end
731
+
732
+ # only relevant for :dlist
733
+ options = {:text => !has_text}
472
734
 
473
- list_item.fold_first
735
+ while list_item_reader.has_lines?
736
+ new_block = next_block(list_item_reader, list_block, {}, options)
737
+ list_item.blocks << new_block unless new_block.nil?
738
+ end
474
739
 
475
- list_item
740
+ list_item.fold_first(continuation_connects_first_block, content_adjacent)
741
+ end
742
+
743
+ if list_type == :dlist
744
+ unless list_item.text? || list_item.blocks?
745
+ list_item = nil
746
+ end
747
+ [list_term, list_item]
748
+ else
749
+ list_item
750
+ end
476
751
  end
477
752
 
478
- def self.build_ulist(reader, parent = nil)
479
- items = []
480
- list_type = :ulist
481
- block = Block.new(parent, list_type)
482
- Asciidoctor.debug "Created :ulist block: #{block}"
483
- first_item_level = nil
753
+ # Internal: Collect the lines belonging to the current list item, navigating
754
+ # through all the rules that determine what comprises a list item.
755
+ #
756
+ # Grab lines until a sibling list item is found, or the block is broken by a
757
+ # terminator (such as a line comment). Definition lists are more greedy if
758
+ # they don't have optional inline item text...they want that text
759
+ #
760
+ # reader - The Reader from which to retrieve the lines.
761
+ # list_type - The Symbol context of the list (:ulist, :olist, :colist or :dlist)
762
+ # sibling_trait - A Regexp that matches a sibling of this list item or String list marker
763
+ # of the items in this list (default: nil)
764
+ # has_text - Whether the list item has text defined inline (always true except for labeled lists)
765
+ #
766
+ # Returns an Array of lines belonging to the current list item.
767
+ def self.grab_lines_for_list_item(reader, list_type, sibling_trait = nil, has_text = true)
768
+ buffer = []
484
769
 
485
- while reader.has_lines? && match = reader.peek_line.match(REGEXP[list_type])
770
+ # three states for continuation: :inactive, :active & :frozen
771
+ # :frozen signifies we've detected sequential continuation lines &
772
+ # continuation is not permitted until reset
773
+ continuation = :inactive
486
774
 
487
- this_item_level = match[1].length
775
+ # if we are within a nested list, we don't throw away the list
776
+ # continuation marks because they will be processed when grabbing
777
+ # the lines for those nested lists
778
+ within_nested_list = false
488
779
 
489
- if first_item_level && first_item_level < this_item_level
490
- # If this next :uline level is down one from the
491
- # current Block's, append it to content of the current list item
492
- items.last.blocks << next_block(reader, block)
493
- elsif first_item_level && first_item_level > this_item_level
494
- break
495
- else
496
- list_item = build_ulist_item(reader, block, match)
497
- # Set the base item level for this Block
498
- first_item_level ||= list_item.level
499
- end
780
+ # a detached continuation is a list continuation that follows a blank line
781
+ # it gets associated with the outermost block
782
+ detached_continuation = nil
500
783
 
501
- items << list_item unless list_item.nil?
502
- list_item = nil
784
+ while reader.has_lines?
785
+ this_line = reader.get_line
503
786
 
504
- reader.skip_blank
505
- end
787
+ # if we've arrived at a sibling item in this list, we've captured
788
+ # the complete list item and can begin processing it
789
+ # the remainder of the method determines whether we've reached
790
+ # the termination of the list
791
+ break if is_sibling_list_item?(this_line, list_type, sibling_trait)
506
792
 
507
- block.buffer = items
508
- block
509
- end
793
+ prev_line = buffer.empty? ? nil : buffer.last.chomp
510
794
 
511
- def self.build_ulist_ref(lines, parent = nil)
512
- items = []
513
- list_type = :ulist
514
- block = Block.new(parent, list_type)
515
- Asciidoctor.debug "Created :ulist block: #{block}"
516
- last_item_level = nil
517
- this_line = lines.shift
518
-
519
- while this_line && match = this_line.match(REGEXP[list_type])
520
- level = match[1].length
521
-
522
- list_item = ListItem.new(block)
523
- list_item.level = level
524
- Asciidoctor.debug "Created ListItem #{list_item} with match[2]: #{match[2]} and level: #{list_item.level}"
525
-
526
- lines.unshift match[2].lstrip.sub(/^\./, '\.')
527
- item_segment = list_item_segment(lines, :alt_ending => REGEXP[list_type], :list_level => level)
528
- while item_segment.any?
529
- new_block = next_block(item_segment, block)
530
- list_item.blocks << new_block unless new_block.nil?
531
- end
795
+ if prev_line == LIST_CONTINUATION
796
+ if continuation == :inactive
797
+ continuation = :active
798
+ has_text = true
799
+ buffer[buffer.size - 1] = "\n" unless within_nested_list
800
+ end
532
801
 
533
- list_item.fold_first
802
+ # dealing with adjacent list continuations (which is really a syntax error)
803
+ if this_line.chomp == LIST_CONTINUATION
804
+ if continuation != :frozen
805
+ continuation = :frozen
806
+ buffer << this_line
807
+ end
808
+ this_line = nil
809
+ next
810
+ end
811
+ end
534
812
 
535
- if items.any? && (level > items.last.level)
536
- Asciidoctor.debug "--> Putting this new level #{level} ListItem under my pops, #{items.last} (level: #{items.last.level})"
537
- items.last.blocks << list_item
813
+ # a delimited block immediately breaks the list unless preceded
814
+ # by a list continuation (they are harsh like that ;0)
815
+ if (match = delimited_block?(this_line)) ||
816
+ # technically attr_line only breaks if ensuing line is not a list item
817
+ # which really means attr_line only breaks if it's acting as a block delimiter
818
+ (list_type == :dlist && match = this_line.match(REGEXP[:attr_line]))
819
+ terminator = match[0]
820
+ if continuation == :active
821
+ buffer << this_line
822
+ # grab all the lines in the block, leaving the delimiters in place
823
+ # we're being more strict here about the terminator, but I think that's a good thing
824
+ buffer.concat reader.grab_lines_until(:terminator => terminator, :grab_last_line => true)
825
+ continuation = :inactive
826
+ else
827
+ break
828
+ end
538
829
  else
539
- Asciidoctor.debug "Stacking new list item in parent block's blocks"
540
- items << list_item
541
- end
830
+ if continuation == :active && !this_line.strip.empty?
831
+ # literal paragraphs have special considerations (and this is one of
832
+ # two entry points into one)
833
+ # if we don't process it as a whole, then a line in it that looks like a
834
+ # list item will throw off the exit from it
835
+ if this_line.match(REGEXP[:lit_par])
836
+ reader.unshift this_line
837
+ buffer.concat reader.grab_lines_until(
838
+ :preserve_last_line => true,
839
+ :break_on_blank_lines => true,
840
+ :break_on_list_continuation => true)
841
+ else
842
+ if nested_list_type = (within_nested_list ? [:dlist] : NESTABLE_LIST_CONTEXTS).detect {|ctx| this_line.match(REGEXP[ctx]) }
843
+ within_nested_list = true
844
+ if nested_list_type == :dlist && $~[3].to_s.empty?
845
+ # get greedy again
846
+ has_text = false
847
+ end
848
+ end
849
+ buffer << this_line
850
+ end
851
+ continuation = :inactive
852
+ elsif !prev_line.nil? && prev_line.strip.empty?
853
+ # advance to the next line of content
854
+ if this_line.strip.empty?
855
+ reader.skip_blank
856
+ this_line = reader.get_line
857
+ # if we hit eof or a sibling, stop reading
858
+ break if this_line.nil? || is_sibling_list_item?(this_line, list_type, sibling_trait)
859
+ end
542
860
 
543
- last_item_level = list_item.level
861
+ if this_line.chomp == LIST_CONTINUATION
862
+ detached_continuation = buffer.size
863
+ buffer << this_line
864
+ else
865
+ # has_text is only relevant for dlist, which is more greedy until it has text for an item
866
+ # for all other lists, has_text is always true
867
+ # in this block, we have to see whether we stay in the list
868
+ if has_text
869
+ # slurp up any literal paragraph offset by blank lines
870
+ if this_line.match(REGEXP[:lit_par])
871
+ reader.unshift this_line
872
+ buffer.concat reader.grab_lines_until(
873
+ :preserve_last_line => true,
874
+ :break_on_blank_lines => true,
875
+ :break_on_list_continuation => true)
876
+ # TODO any way to combine this with the check after skipping blank lines?
877
+ elsif is_sibling_list_item?(this_line, list_type, sibling_trait)
878
+ #buffer.pop unless within_nested_list
879
+ break
880
+ elsif nested_list_type = NESTABLE_LIST_CONTEXTS.detect {|ctx| this_line.match(REGEXP[ctx]) }
881
+ #buffer.pop unless within_nested_list
882
+ buffer << this_line
883
+ within_nested_list = true
884
+ if nested_list_type == :dlist && $~[3].to_s.empty?
885
+ # get greedy again
886
+ has_text = false
887
+ end
888
+ else
889
+ break
890
+ end
891
+ else # only dlist in need of item text, so slurp it up!
892
+ # pop the blank line so it's not interpretted as a list continuation
893
+ buffer.pop unless within_nested_list
894
+ buffer << this_line
895
+ has_text = true
896
+ end
897
+ end
898
+ else
899
+ has_text = true if !this_line.strip.empty?
900
+ if nested_list_type = (within_nested_list ? [:dlist] : NESTABLE_LIST_CONTEXTS).detect {|ctx| this_line.match(REGEXP[ctx]) }
901
+ within_nested_list = true
902
+ if nested_list_type == :dlist && $~[3].to_s.empty?
903
+ # get greedy again
904
+ has_text = false
905
+ end
906
+ end
907
+ buffer << this_line
908
+ end
909
+ end
910
+ this_line = nil
911
+ end
544
912
 
545
- # TODO: This has to come from a Reader object
546
- skip_blank(lines)
913
+ reader.unshift this_line if !this_line.nil?
547
914
 
548
- this_line = lines.shift
915
+ if detached_continuation
916
+ buffer.delete_at detached_continuation
549
917
  end
550
- lines.unshift(this_line) unless this_line.nil?
551
918
 
552
- block.buffer = items
553
- block
554
- end
919
+ # QUESTION should we strip these trailing endlines?
920
+ #buffer.pop while buffer.last == "\n"
555
921
 
556
- def self.collect_attributes(attrs, attributes, posattrs = [])
557
- # TODO walk be properly rather than using split
558
- attrs.split(/\s*,\s*/).each_with_index do |entry, i|
559
- key, val = entry.split(/\s*=\s*/)
560
- if !val.nil?
561
- val.gsub!(/^(['"])(.*)\1$/, '\2') unless val.nil?
562
- attributes[key] = val
563
- else
564
- attributes[i] = key
565
- # positional attribute has a known key
566
- if posattrs.size >= (i + 1)
567
- attributes[posattrs[i]] = key
568
- end
569
- end
922
+ # We do need to replace the optional trailing continuation
923
+ # a blank line would have served the same purpose in the document
924
+ if !buffer.empty? && buffer.last.chomp == LIST_CONTINUATION
925
+ buffer.pop
570
926
  end
927
+ #puts "BUFFER>#{buffer.join}<BUFFER"
928
+ #puts "BUFFER>#{buffer}<BUFFER"
929
+
930
+ buffer
571
931
  end
572
932
 
573
- def self.rekey_positional_attributes(attributes, posattrs)
574
- posattrs.each_with_index do |key, i|
575
- val = attributes[i]
576
- if !val.nil?
577
- attributes[key] = val
578
- end
933
+ # Internal: Initialize a new Section object and assign any attributes provided
934
+ #
935
+ # The information for this section is retrieved by parsing the lines at the
936
+ # current position of the reader.
937
+ #
938
+ # reader - the source reader
939
+ # parent - the parent Section or Document of this Section
940
+ # attributes - a Hash of attributes to assign to this section (default: {})
941
+ def self.initialize_section(reader, parent, attributes = {})
942
+ section = Section.new parent
943
+ section.id, section.title, section.level, _ = parse_section_title(reader)
944
+ if section.id.nil? && attributes.has_key?('id')
945
+ section.id = attributes['id']
946
+ else
947
+ # generate an id if one was not *embedded* in the heading line
948
+ # or as an anchor above the section
949
+ section.id ||= section.generate_id
579
950
  end
951
+
952
+ section.update_attributes(attributes)
953
+ reader.skip_blank
954
+
955
+ section
580
956
  end
581
957
 
582
958
  # Private: Get the Integer section level based on the characters
583
- # used in the ASCII line under the section name.
959
+ # used in the ASCII line under the section title.
584
960
  #
585
- # line - the String line from under the section name.
961
+ # line - the String line from under the section title.
586
962
  def self.section_level(line)
587
963
  char = line.strip.chars.to_a.uniq
588
964
  case char
@@ -594,179 +970,631 @@ class Asciidoctor::Lexer
594
970
  end
595
971
  end
596
972
 
597
- # == is level 0, === is level 1, etc.
973
+ #--
974
+ # = is level 0, == is level 1, etc.
598
975
  def self.single_line_section_level(line)
599
976
  [line.length - 1, 0].max
600
977
  end
601
978
 
602
- def self.is_single_line_section_heading?(line)
603
- !line.nil? && line.match(REGEXP[:level_title])
979
+ # Internal: Checks if the next line on the Reader is a section title
980
+ #
981
+ # This is a more efficient version of #is_section_title? and should
982
+ # eventually replace its usage.
983
+ #
984
+ # reader - the source Reader
985
+ #
986
+ # returns the section level if the Reader is positioned at a section title,
987
+ # false otherwise
988
+ def self.is_next_line_section?(reader)
989
+ if reader.has_lines?
990
+ line1 = reader.get_line
991
+ line2 = reader.peek_line
992
+ reader.unshift line1
993
+ else
994
+ return false
995
+ end
996
+
997
+ is_section_title?(line1, line2)
604
998
  end
605
999
 
606
- def self.is_two_line_section_heading?(line1, line2)
607
- !line1.nil? && !line2.nil? &&
608
- line1.match(REGEXP[:name]) && line2.match(REGEXP[:line]) &&
609
- # chomp so that a (non-visible) endline does not impact calculation
610
- (line1.chomp.size - line2.chomp.size).abs <= 1
1000
+ # Public: Checks if these lines are a section title
1001
+ #
1002
+ # line1 - the first line as a String
1003
+ # line2 - the second line as a String (default: nil)
1004
+ #
1005
+ # returns the section level if these lines are a section title,
1006
+ # false otherwise
1007
+ def self.is_section_title?(line1, line2 = nil)
1008
+ if (level = is_single_line_section_title?(line1))
1009
+ level
1010
+ elsif (level = is_two_line_section_title?(line1, line2))
1011
+ level
1012
+ else
1013
+ false
1014
+ end
611
1015
  end
612
1016
 
613
- def self.is_section_heading?(line1, line2 = nil)
614
- is_single_line_section_heading?(line1) ||
615
- is_two_line_section_heading?(line1, line2)
1017
+ def self.is_single_line_section_title?(line1)
1018
+ if !line1.nil? && (match = line1.match(REGEXP[:section_title]))
1019
+ single_line_section_level match[1]
1020
+ else
1021
+ false
1022
+ end
616
1023
  end
617
1024
 
618
- # Private: Extracts the name, level and (optional) embedded anchor from a
619
- # 1- or 2-line section heading.
1025
+ def self.is_two_line_section_title?(line1, line2)
1026
+ if !line1.nil? && !line2.nil? && line1.match(REGEXP[:section_name]) &&
1027
+ line2.match(REGEXP[:section_underline]) &&
1028
+ # chomp so that a (non-visible) endline does not impact calculation
1029
+ (line1.chomp.size - line2.chomp.size).abs <= 1
1030
+ section_level line2
1031
+ else
1032
+ false
1033
+ end
1034
+ end
1035
+
1036
+ # Internal: Parse the section title from the current position of the reader
1037
+ #
1038
+ # Parse a single or double-line section title. After this method is called,
1039
+ # the Reader will be positioned at the line after the section title.
620
1040
  #
621
- # Returns an array of a String, Integer, and String or nil.
1041
+ # reader - the source reader, positioned at a section title
622
1042
  #
623
1043
  # Examples
624
1044
  #
625
- # line1
626
- # => "Foo\n"
627
- # line2
628
- # => "~~~\n"
1045
+ # reader.lines
1046
+ # # => ["Foo\n", "~~~\n"]
629
1047
  #
630
- # name, level, anchor = extract_section_heading(line1, line2)
1048
+ # title, level, id, single = parse_section_title(reader)
631
1049
  #
632
- # name
633
- # => "Foo"
1050
+ # title
1051
+ # # => "Foo"
634
1052
  # level
635
- # => 2
636
- # anchor
637
- # => nil
1053
+ # # => 2
1054
+ # id
1055
+ # # => nil
1056
+ # single
1057
+ # # => false
638
1058
  #
639
1059
  # line1
640
- # => "==== Foo\n"
1060
+ # # => "==== Foo\n"
641
1061
  #
642
- # name, level, anchor = extract_section_heading(line1)
1062
+ # title, level, id, single = parse_section_title(reader)
643
1063
  #
644
- # name
645
- # => "Foo"
1064
+ # title
1065
+ # # => "Foo"
646
1066
  # level
647
- # => 3
648
- # anchor
649
- # => nil
1067
+ # # => 3
1068
+ # id
1069
+ # # => nil
1070
+ # single
1071
+ # # => true
1072
+ #
1073
+ # returns an Array of [String, Integer, String, Boolean], representing the
1074
+ # id, title, level and line count of the Section, or nil.
650
1075
  #
651
- def self.extract_section_heading(line1, line2 = nil)
652
- Asciidoctor.debug "#{__method__} -> line1: #{line1.chomp rescue 'nil'}, line2: #{line2.chomp rescue 'nil'}"
653
- sect_name = sect_anchor = nil
1076
+ #--
1077
+ # NOTE for efficiency, we don't reuse methods that check for a section title
1078
+ def self.parse_section_title(reader)
1079
+ line1 = reader.get_line
1080
+ sect_id = nil
1081
+ sect_title = nil
654
1082
  sect_level = 0
1083
+ single_line = true
655
1084
 
656
- if is_single_line_section_heading?(line1)
657
- header_match = line1.match(REGEXP[:level_title])
658
- sect_name = header_match[2]
659
- sect_level = single_line_section_level(header_match[1])
660
- elsif is_two_line_section_heading?(line1, line2)
661
- header_match = line1.match(REGEXP[:name])
662
- if anchor_match = header_match[1].match(REGEXP[:anchor_embedded])
663
- sect_name = anchor_match[1]
664
- sect_anchor = anchor_match[2]
665
- else
666
- sect_name = header_match[1]
1085
+ if match = line1.match(REGEXP[:section_title])
1086
+ sect_id = match[3]
1087
+ sect_title = match[2]
1088
+ sect_level = single_line_section_level match[1]
1089
+ else
1090
+ line2 = reader.peek_line
1091
+ if !line2.nil? && (name_match = line1.match(REGEXP[:section_name])) &&
1092
+ line2.match(REGEXP[:section_underline]) &&
1093
+ # chomp so that a (non-visible) endline does not impact calculation
1094
+ (line1.chomp.size - line2.chomp.size).abs <= 1
1095
+ if anchor_match = name_match[1].match(REGEXP[:anchor_embedded])
1096
+ sect_id = anchor_match[2]
1097
+ sect_title = anchor_match[1]
1098
+ else
1099
+ sect_title = name_match[1]
1100
+ end
1101
+ sect_level = section_level line2
1102
+ single_line = false
1103
+ reader.get_line
667
1104
  end
668
- sect_level = section_level(line2)
669
1105
  end
670
- Asciidoctor.debug "#{__method__} -> Returning #{sect_name}, #{sect_level} (anchor: '#{sect_anchor || '<none>'}')"
671
- return [sect_name, sect_level, sect_anchor]
1106
+ return [sect_id, sect_title, sect_level, single_line]
672
1107
  end
673
1108
 
674
- # Private: Return the next section from the Reader.
1109
+ # Public: Consume and parse the two header lines (line 1 = author info, line 2 = revision info).
1110
+ #
1111
+ # Returns the Hash of header metadata. If a Document object is supplied, the metadata
1112
+ # is applied directly to the attributes of the Document.
1113
+ #
1114
+ # reader - the Reader holding the source lines of the document
1115
+ # document - the Document we are building (default: nil)
675
1116
  #
676
1117
  # Examples
677
1118
  #
678
- # source
679
- # => "GREETINGS\n---------\nThis is my doc.\n\nSALUTATIONS\n-----------\nIt is awesome."
1119
+ # parse_header_metadata(Reader.new ["Author Name <author@example.org>\n", "v1.0, 2012-12-21: Coincide w/ end of world.\n"])
1120
+ # # => {'author' => 'Author Name', 'firstname' => 'Author', 'lastname' => 'Name', 'email' => 'author@example.org',
1121
+ # # 'revnumber' => '1.0', 'revdate' => '2012-12-21', 'revremark' => 'Coincide w/ end of world.'}
1122
+ def self.parse_header_metadata(reader, document = nil)
1123
+ # capture consecutive comment lines so we can reinsert them after the header
1124
+ comment_lines = reader.consume_comments
1125
+
1126
+ metadata = !document.nil? ? document.attributes : {}
1127
+ author_initials = metadata['authorinitials']
1128
+ if reader.has_lines? && !reader.peek_line.strip.empty?
1129
+ author_line = reader.get_line
1130
+ match = author_line.match(REGEXP[:author_info])
1131
+ if match
1132
+ metadata['firstname'] = fname = match[1].tr('_', ' ')
1133
+ metadata['author'] = fname
1134
+ metadata['authorinitials'] = fname[0, 1]
1135
+ if !match[2].nil? && !match[3].nil?
1136
+ metadata['middlename'] = mname = match[2].tr('_', ' ')
1137
+ metadata['lastname'] = lname = match[3].tr('_', ' ')
1138
+ metadata['author'] = [fname, mname, lname].join ' '
1139
+ metadata['authorinitials'] = [fname[0, 1], mname[0, 1], lname[0, 1]].join
1140
+ elsif !match[2].nil?
1141
+ metadata['lastname'] = lname = match[2].tr('_', ' ')
1142
+ metadata['author'] = [fname, lname].join ' '
1143
+ metadata['authorinitials'] = [fname[0, 1], lname[0, 1]].join
1144
+ end
1145
+ metadata['email'] = match[4] unless match[4].nil?
1146
+ else
1147
+ metadata['author'] = metadata['firstname'] = author_line.strip.squeeze(' ')
1148
+ metadata['authorinitials'] = metadata['firstname'][0, 1]
1149
+ end
1150
+
1151
+ # hack because of incorrect order of attribute processing
1152
+ metadata['authorinitials'] = author_initials unless author_initials.nil?
1153
+
1154
+ # capture consecutive comment lines so we can reinsert them after the header
1155
+ comment_lines += reader.consume_comments
1156
+
1157
+ if reader.has_lines? && !reader.peek_line.strip.empty?
1158
+ rev_line = reader.get_line
1159
+ match = rev_line.match(REGEXP[:revision_info])
1160
+ if match
1161
+ metadata['revdate'] = match[2]
1162
+ metadata['revnumber'] = match[1] unless match[1].nil?
1163
+ metadata['revremark'] = match[3] unless match[3].nil?
1164
+ else
1165
+ metadata['revdate'] = rev_line.strip
1166
+ end
1167
+ end
1168
+
1169
+ reader.skip_blank
1170
+ end
1171
+
1172
+ reader.unshift(*comment_lines)
1173
+ metadata
1174
+ end
1175
+
1176
+ # Internal: Parse lines of metadata until a line of metadata is not found.
1177
+ #
1178
+ # This method processes sequential lines containing block metadata, ignoring
1179
+ # blank lines and comments.
1180
+ #
1181
+ # reader - the source reader
1182
+ # parent - the parent to which the lines belong
1183
+ # attributes - a Hash of attributes in which any metadata found will be stored (default: {})
1184
+ # options - a Hash of options to control processing: (default: {})
1185
+ # * :text indicates that lexer is only looking for text content
1186
+ # and thus the block title should not be captured
680
1187
  #
681
- # TODO: doc = Asciidoctor::Document.new(source)
1188
+ # returns the Hash of attributes including any metadata found
1189
+ def self.parse_block_metadata_lines(reader, parent, attributes = {}, options = {})
1190
+ while parse_block_metadata_line(reader, parent, attributes, options)
1191
+ reader.next_line
1192
+ reader.skip_blank_lines
1193
+ end
1194
+ attributes
1195
+ end
1196
+
1197
+ # Internal: Parse the next line if it contains metadata for the following block
682
1198
  #
683
- # doc.next_section
684
- # ["GREETINGS", [:paragraph, "This is my doc."]]
1199
+ # This method handles lines with the following content:
685
1200
  #
686
- # doc.next_section
687
- # ["SALUTATIONS", [:paragraph, "It is awesome."]]
688
- def self.next_section(reader, parent = self)
689
- section = Section.new(parent)
1201
+ # * line or block comment
1202
+ # * anchor
1203
+ # * attribute list
1204
+ # * block title
1205
+ #
1206
+ # Any attributes found will be inserted into the attributes argument.
1207
+ # If the line contains block metadata, the method returns true, otherwise false.
1208
+ #
1209
+ # reader - the source reader
1210
+ # parent - the parent of the current line
1211
+ # attributes - a Hash of attributes in which any metadata found will be stored
1212
+ # options - a Hash of options to control processing: (default: {})
1213
+ # * :text indicates that lexer is only looking for text content
1214
+ # and thus the block title should not be captured
1215
+ #
1216
+ # returns true if the line contains metadata, otherwise false
1217
+ def self.parse_block_metadata_line(reader, parent, attributes, options = {})
1218
+ return false if !reader.has_lines?
1219
+ next_line = reader.peek_line
1220
+ if next_line.match(REGEXP[:comment])
1221
+ # do nothing, we'll skip it
1222
+ # QUESTION should we parse block comments here instead of next_block?
1223
+ # disable until we can agree what the current line is coming in
1224
+ elsif match = next_line.match(REGEXP[:comment_blk])
1225
+ terminator = match[0]
1226
+ reader.grab_lines_until(:skip_first_line => true, :preserve_last_line => true, :terminator => terminator)
1227
+ elsif match = next_line.match(REGEXP[:anchor])
1228
+ id, reftext = match[1].split(',')
1229
+ attributes['id'] = id
1230
+ # AsciiDoc always use [id] as the reftext in HTML output,
1231
+ # but I'd like to do better in Asciidoctor
1232
+ #parent.document.register(:ids, id)
1233
+ if reftext
1234
+ attributes['reftext'] = reftext
1235
+ parent.document.register(:ids, [id, reftext])
1236
+ end
1237
+ elsif match = next_line.match(REGEXP[:blk_attr_list])
1238
+ AttributeList.new(parent.document.sub_attributes(match[1]), parent.document).parse_into(attributes)
1239
+ # NOTE title doesn't apply to section, but we need to stash it for the first block
1240
+ # TODO need test for this getting passed on to first block after section if found above section
1241
+ # TODO should issue an error if this is found above the document title
1242
+ elsif !options[:text] && (match = next_line.match(REGEXP[:blk_title]))
1243
+ attributes['title'] = match[1]
1244
+ else
1245
+ return false
1246
+ end
690
1247
 
691
- Asciidoctor.debug "%"*64
692
- Asciidoctor.debug "#{File.basename(__FILE__)}:#{__LINE__} -> #{__method__} - First two lines are:"
693
- Asciidoctor.debug reader.peek_line
694
- tmp_line = reader.get_line
695
- Asciidoctor.debug reader.peek_line
696
- reader.unshift tmp_line
697
- Asciidoctor.debug "%"*64
1248
+ true
1249
+ end
698
1250
 
699
- # Skip ahead to the next section definition
700
- while reader.has_lines? && section.name.nil?
701
- this_line = reader.get_line
702
- next_line = reader.peek_line || ''
703
- if match = this_line.match(REGEXP[:anchor])
704
- section.anchor = match[1]
705
- elsif is_section_heading?(this_line, next_line)
706
- section.name, section.level, section.anchor = extract_section_heading(this_line, next_line)
707
- reader.get_line unless is_single_line_section_heading?(this_line)
1251
+ # Internal: Resolve the 0-index marker for this list item
1252
+ #
1253
+ # For ordered lists, match the marker used for this list item against the
1254
+ # known list markers and determine which marker is the first (0-index) marker
1255
+ # in its number series.
1256
+ #
1257
+ # For callout lists, return <1>.
1258
+ #
1259
+ # For bulleted lists, return the marker as passed to this method.
1260
+ #
1261
+ # list_type - The Symbol context of the list
1262
+ # marker - The String marker for this list item
1263
+ # ordinal - The position of this list item in the list
1264
+ # validate - Whether to validate the value of the marker
1265
+ #
1266
+ # Returns the String 0-index marker for this list item
1267
+ def self.resolve_list_marker(list_type, marker, ordinal = 0, validate = false)
1268
+ if list_type == :olist && !marker.start_with?('.')
1269
+ resolve_ordered_list_marker(marker, ordinal, validate)
1270
+ elsif list_type == :colist
1271
+ '<1>'
1272
+ else
1273
+ marker
1274
+ end
1275
+ end
1276
+
1277
+ # Internal: Resolve the 0-index marker for this ordered list item
1278
+ #
1279
+ # Match the marker used for this ordered list item against the
1280
+ # known ordered list markers and determine which marker is
1281
+ # the first (0-index) marker in its number series.
1282
+ #
1283
+ # The purpose of this method is to normalize the implicit numbered markers
1284
+ # so that they can be compared against other list items.
1285
+ #
1286
+ # marker - The marker used for this list item
1287
+ # ordinal - The 0-based index of the list item (default: 0)
1288
+ # validate - Perform validation that the marker provided is the proper
1289
+ # marker in the sequence (default: false)
1290
+ #
1291
+ # Examples
1292
+ #
1293
+ # marker = 'B.'
1294
+ # Lexer::resolve_ordered_list_marker(marker, 1, true)
1295
+ # # => 'A.'
1296
+ #
1297
+ # Returns the String of the first marker in this number series
1298
+ def self.resolve_ordered_list_marker(marker, ordinal = 0, validate = false)
1299
+ number_style = ORDERED_LIST_STYLES.detect {|s| marker.match(ORDERED_LIST_MARKER_PATTERNS[s]) }
1300
+ expected = actual = nil
1301
+ case number_style
1302
+ when :arabic
1303
+ if validate
1304
+ expected = ordinal + 1
1305
+ actual = marker.to_i
1306
+ end
1307
+ marker = '1.'
1308
+ when :loweralpha
1309
+ if validate
1310
+ expected = ('a'[0].ord + ordinal).chr
1311
+ actual = marker.chomp('.')
1312
+ end
1313
+ marker = 'a.'
1314
+ when :upperalpha
1315
+ if validate
1316
+ expected = ('A'[0].ord + ordinal).chr
1317
+ actual = marker.chomp('.')
1318
+ end
1319
+ marker = 'A.'
1320
+ when :lowerroman
1321
+ if validate
1322
+ # TODO report this in roman numerals; see https://github.com/jamesshipton/roman-numeral/blob/master/lib/roman_numeral.rb
1323
+ expected = ordinal + 1
1324
+ actual = roman_numeral_to_int(marker.chomp(')'))
1325
+ end
1326
+ marker = 'i)'
1327
+ when :upperroman
1328
+ if validate
1329
+ # TODO report this in roman numerals; see https://github.com/jamesshipton/roman-numeral/blob/master/lib/roman_numeral.rb
1330
+ expected = ordinal + 1
1331
+ actual = roman_numeral_to_int(marker.chomp(')'))
1332
+ end
1333
+ marker = 'I)'
1334
+ end
1335
+
1336
+ if validate && expected != actual
1337
+ puts "asciidoctor: WARNING: list item index: expected #{expected}, got #{actual}"
1338
+ end
1339
+
1340
+ marker
1341
+ end
1342
+
1343
+ # Internal: Determine whether the this line is a sibling list item
1344
+ # according to the list type and trait (marker) provided.
1345
+ #
1346
+ # line - The String line to check
1347
+ # list_type - The context of the list (:olist, :ulist, :colist, :dlist)
1348
+ # sibling_trait - The String marker for the list or the Regexp to match a sibling
1349
+ #
1350
+ # Returns a Boolean indicating whether this line is a sibling list item given
1351
+ # the criteria provided
1352
+ def self.is_sibling_list_item?(line, list_type, sibling_trait)
1353
+ if sibling_trait.is_a?(Regexp)
1354
+ matcher = sibling_trait
1355
+ expected_marker = false
1356
+ else
1357
+ matcher = REGEXP[list_type]
1358
+ expected_marker = sibling_trait
1359
+ end
1360
+
1361
+ if m = line.match(matcher)
1362
+ if expected_marker
1363
+ expected_marker == resolve_list_marker(list_type, m[1])
1364
+ else
1365
+ true
708
1366
  end
1367
+ else
1368
+ false
709
1369
  end
1370
+ end
1371
+
1372
+ # Internal: Parse the table contained in the provided Reader
1373
+ #
1374
+ # table_reader - a Reader containing the source lines of an AsciiDoc table
1375
+ # parent - the parent Block of this Asciidoctor::Table
1376
+ # attributes - attributes captured from above this Block
1377
+ #
1378
+ # returns an instance of Asciidoctor::Table parsed from the provided reader
1379
+ def self.next_table(table_reader, parent, attributes)
1380
+ table = Table.new(parent, attributes)
710
1381
 
711
- if !section.anchor.nil?
712
- anchor_id = section.anchor.match(/^\[(.*)\]/) ? $1 : section.anchor
713
- document_from_parent(parent).references[anchor_id] = section.anchor
714
- section.anchor = anchor_id
1382
+ if attributes.has_key? 'cols'
1383
+ table.create_columns(parse_col_specs(attributes['cols']))
1384
+ explicit_col_specs = true
1385
+ else
1386
+ explicit_col_specs = false
715
1387
  end
716
1388
 
717
- # Grab all the lines that belong to this section
718
- section_lines = []
719
- while reader.has_lines?
720
- this_line = reader.get_line
721
- next_line = reader.peek_line
722
-
723
- if is_section_heading?(this_line, next_line)
724
- _, this_level, _ = extract_section_heading(this_line, next_line)
725
-
726
- if this_level <= section.level
727
- # A section can't contain a section level lower than itself,
728
- # so this signifies the end of the section.
729
- reader.unshift this_line
730
- if section_lines.any? && section_lines.last.match(REGEXP[:anchor])
731
- # Put back the anchor that came before this new-section line
732
- # on which we're bailing.
733
- reader.unshift section_lines.pop
1389
+ table_reader.skip_blank_lines
1390
+
1391
+ parser_ctx = Asciidoctor::Table::ParserContext.new(table, attributes)
1392
+ while table_reader.has_lines?
1393
+ line = table_reader.get_line
1394
+
1395
+ if parser_ctx.format == 'psv'
1396
+ if parser_ctx.starts_with_delimiter? line
1397
+ line = line[1..-1]
1398
+ # push an empty cell spec if boundary at start of line
1399
+ parser_ctx.close_open_cell
1400
+ else
1401
+ next_cell_spec, line = parse_cell_spec(line, :start)
1402
+ # if the cell spec is not null, then we're at a cell boundary
1403
+ if !next_cell_spec.nil?
1404
+ parser_ctx.close_open_cell next_cell_spec
1405
+ else
1406
+ # QUESTION do we not advance to next line? if so, when
1407
+ # will we if we came into this block?
734
1408
  end
735
- break
1409
+ end
1410
+ end
1411
+
1412
+ while !line.empty?
1413
+ if m = parser_ctx.match_delimiter(line)
1414
+ if parser_ctx.format == 'csv'
1415
+ if parser_ctx.buffer_has_unclosed_quotes?(m.pre_match)
1416
+ # throw it back, it's too small
1417
+ line = parser_ctx.skip_matched_delimiter(m)
1418
+ next
1419
+ end
1420
+ else
1421
+ if m.pre_match.end_with? '\\'
1422
+ line = parser_ctx.skip_matched_delimiter(m, true)
1423
+ next
1424
+ end
1425
+ end
1426
+
1427
+ if parser_ctx.format == 'psv'
1428
+ next_cell_spec, cell_text = parse_cell_spec(m.pre_match, :end)
1429
+ parser_ctx.push_cell_spec next_cell_spec
1430
+ parser_ctx.buffer << cell_text
1431
+ else
1432
+ parser_ctx.buffer << m.pre_match
1433
+ end
1434
+
1435
+ line = m.post_match
1436
+ parser_ctx.close_cell
736
1437
  else
737
- section_lines << this_line
738
- section_lines << reader.get_line unless is_single_line_section_heading?(this_line)
1438
+ # no other delimiters to see here
1439
+ # suck up this line into the buffer and move on
1440
+ parser_ctx.buffer << line
1441
+ # QUESTION make this an option? (unwrap-option?)
1442
+ if parser_ctx.format == 'csv'
1443
+ parser_ctx.buffer.rstrip!.concat(' ')
1444
+ end
1445
+ line = ''
1446
+ if parser_ctx.format == 'psv' || (parser_ctx.format == 'csv' &&
1447
+ parser_ctx.buffer_has_unclosed_quotes?)
1448
+ parser_ctx.keep_cell_open
1449
+ else
1450
+ parser_ctx.close_cell true
1451
+ end
739
1452
  end
740
- else
741
- section_lines << this_line
742
1453
  end
743
- end
744
1454
 
745
- section_reader = Reader.new(section_lines)
746
- # Now parse section_lines into Blocks belonging to the current Section
747
- while section_reader.has_lines?
748
- section_reader.skip_blank
1455
+ table_reader.skip_blank_lines unless parser_ctx.cell_open?
749
1456
 
750
- if section_reader.has_lines?
751
- new_block = next_block(section_reader, section)
752
- section << new_block unless new_block.nil?
1457
+ if !table_reader.has_lines?
1458
+ parser_ctx.close_cell true
753
1459
  end
754
1460
  end
755
1461
 
756
- # detect preamble and push it into a block
757
- # QUESTION make this an operation on Section?
758
- if section.level == 0
759
- blocks = section.blocks.take_while {|b| !b.is_a? Section}
760
- if !blocks.empty?
761
- # QUESTION Should we propagate the buffer?
762
- #preamble = Block.new(section, :preamble, blocks.reduce {|a, b| a.buffer + b.buffer})
763
- preamble = Block.new(section, :preamble)
764
- blocks.each { preamble << section.delete_at(0) }
765
- section.insert(0, preamble)
766
- end
1462
+ table.attributes['colcount'] ||= parser_ctx.col_count
1463
+
1464
+ if !explicit_col_specs
1465
+ # TODO further encapsulate this logic (into table perhaps?)
1466
+ even_width = (100.0 / parser_ctx.col_count).floor
1467
+ table.columns.each {|c| c.assign_width(0, even_width) }
767
1468
  end
768
1469
 
769
- section
1470
+ table.partition_header_footer attributes
1471
+
1472
+ table
770
1473
  end
771
1474
 
1475
+ # Internal: Parse the column specs for this table.
1476
+ #
1477
+ # The column specs dictate the number of columns, relative
1478
+ # width of columns, default alignments for cells in each
1479
+ # column, and/or default styles or filters applied to the cells in
1480
+ # the column.
1481
+ #
1482
+ # Every column spec is guaranteed to have a width
1483
+ #
1484
+ # returns a Hash of attributes that specify how to format
1485
+ # and layout the cells in the table.
1486
+ def self.parse_col_specs(records)
1487
+ specs = []
1488
+
1489
+ # check for deprecated syntax
1490
+ if m = records.match(REGEXP[:digits])
1491
+ 1.upto(m[0].to_i) {
1492
+ specs << {'width' => 1}
1493
+ }
1494
+ return specs
1495
+ end
1496
+
1497
+ records.split(',').each {|record|
1498
+ # TODO might want to use scan rather than this mega-regexp
1499
+ if m = record.match(REGEXP[:table_colspec])
1500
+ spec = {}
1501
+ if m[2]
1502
+ # make this an operation
1503
+ colspec, rowspec = m[2].split '.'
1504
+ if !colspec.to_s.empty? && Table::ALIGNMENTS[:h].has_key?(colspec)
1505
+ spec['halign'] = Table::ALIGNMENTS[:h][colspec]
1506
+ end
1507
+ if !rowspec.to_s.empty? && Table::ALIGNMENTS[:v].has_key?(rowspec)
1508
+ spec['valign'] = Table::ALIGNMENTS[:v][rowspec]
1509
+ end
1510
+ end
1511
+
1512
+ # TODO support percentage width
1513
+ spec['width'] = !m[3].nil? ? m[3].to_i : 1
1514
+
1515
+ # make this an operation
1516
+ if m[4] && Table::TEXT_STYLES.has_key?(m[4])
1517
+ spec['style'] = Table::TEXT_STYLES[m[4]]
1518
+ end
1519
+
1520
+ repeat = !m[1].nil? ? m[1].to_i : 1
1521
+
1522
+ 1.upto(repeat) {
1523
+ specs << spec.dup
1524
+ }
1525
+ end
1526
+ }
1527
+ specs
1528
+ end
1529
+
1530
+ # Internal: Parse the cell specs for the current cell.
1531
+ #
1532
+ # The cell specs dictate the cell's alignments, styles or filters,
1533
+ # colspan, rowspan and/or repeating content.
1534
+ #
1535
+ # returns the Hash of attributes that indicate how to layout
1536
+ # and style this cell in the table.
1537
+ def self.parse_cell_spec(line, pos = :start)
1538
+ # the default for the end pos it {} since we
1539
+ # know we're at a delimiter; when the pos
1540
+ # is start, we *may* be at a delimiter and
1541
+ # nil indicates we're not
1542
+ spec = (pos == :end ? {} : nil)
1543
+ rest = line
1544
+
1545
+ if m = line.match(REGEXP[:table_cellspec][pos])
1546
+ spec = {}
1547
+ return [spec, line] if m[0].strip.empty?
1548
+ rest = (pos == :start ? m.post_match : m.pre_match)
1549
+ if m[1]
1550
+ colspec, rowspec = m[1].split '.'
1551
+ colspec = colspec.to_s.empty? ? 1 : colspec.to_i
1552
+ rowspec = rowspec.to_s.empty? ? 1 : rowspec.to_i
1553
+ if m[2] == '+'
1554
+ spec['colspan'] = colspec unless colspec == 1
1555
+ spec['rowspan'] = rowspec unless rowspec == 1
1556
+ elsif m[2] == '*'
1557
+ spec['repeatcol'] = colspec unless colspec == 1
1558
+ end
1559
+ end
1560
+
1561
+ if m[3]
1562
+ colspec, rowspec = m[3].split '.'
1563
+ if !colspec.to_s.empty? && Table::ALIGNMENTS[:h].has_key?(colspec)
1564
+ spec['halign'] = Table::ALIGNMENTS[:h][colspec]
1565
+ end
1566
+ if !rowspec.to_s.empty? && Table::ALIGNMENTS[:v].has_key?(rowspec)
1567
+ spec['valign'] = Table::ALIGNMENTS[:v][rowspec]
1568
+ end
1569
+ end
1570
+
1571
+ if m[4] && Table::TEXT_STYLES.has_key?(m[4])
1572
+ spec['style'] = Table::TEXT_STYLES[m[4]]
1573
+ end
1574
+ end
1575
+
1576
+ [spec, rest]
1577
+ end
1578
+
1579
+ # Internal: Converts a Roman numeral to an integer value.
1580
+ #
1581
+ # value - The String Roman numeral to convert
1582
+ #
1583
+ # Returns the Integer for this Roman numeral
1584
+ def self.roman_numeral_to_int(value)
1585
+ value = value.downcase
1586
+ digits = { 'i' => 1, 'v' => 5, 'x' => 10 }
1587
+ result = 0
1588
+
1589
+ (0..value.length - 1).each {|i|
1590
+ digit = digits[value[i..i]]
1591
+ if i + 1 < value.length && digits[value[i+1..i+1]] > digit
1592
+ result -= digit
1593
+ else
1594
+ result += digit
1595
+ end
1596
+ }
1597
+
1598
+ result
1599
+ end
772
1600
  end