yard 0.9.28 → 0.9.43

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +139 -1
  3. data/LEGAL +29 -1
  4. data/README.md +29 -25
  5. data/docs/GettingStarted.md +41 -15
  6. data/docs/Parser.md +17 -42
  7. data/docs/Tags.md +6 -6
  8. data/docs/Templates.md +5 -4
  9. data/docs/WhatsNew.md +61 -9
  10. data/docs/templates/default/yard_tags/html/setup.rb +1 -1
  11. data/lib/yard/autoload.rb +20 -1
  12. data/lib/yard/cli/command.rb +1 -1
  13. data/lib/yard/cli/diff.rb +7 -2
  14. data/lib/yard/cli/yardoc.rb +1 -1
  15. data/lib/yard/code_objects/base.rb +6 -2
  16. data/lib/yard/code_objects/extra_file_object.rb +1 -0
  17. data/lib/yard/code_objects/macro_object.rb +0 -1
  18. data/lib/yard/code_objects/proxy.rb +1 -1
  19. data/lib/yard/docstring_parser.rb +1 -2
  20. data/lib/yard/handlers/base.rb +23 -1
  21. data/lib/yard/handlers/processor.rb +1 -1
  22. data/lib/yard/handlers/rbs/attribute_handler.rb +79 -0
  23. data/lib/yard/handlers/rbs/base.rb +38 -0
  24. data/lib/yard/handlers/rbs/constant_handler.rb +18 -0
  25. data/lib/yard/handlers/rbs/method_handler.rb +327 -0
  26. data/lib/yard/handlers/rbs/mixin_handler.rb +20 -0
  27. data/lib/yard/handlers/rbs/namespace_handler.rb +26 -0
  28. data/lib/yard/handlers/ruby/attribute_handler.rb +7 -4
  29. data/lib/yard/handlers/ruby/constant_handler.rb +24 -6
  30. data/lib/yard/handlers/ruby/legacy/attribute_handler.rb +1 -1
  31. data/lib/yard/handlers/ruby/legacy/visibility_handler.rb +2 -1
  32. data/lib/yard/handlers/ruby/mixin_handler.rb +13 -6
  33. data/lib/yard/handlers/ruby/visibility_handler.rb +14 -1
  34. data/lib/yard/i18n/locale.rb +2 -2
  35. data/lib/yard/i18n/message.rb +2 -2
  36. data/lib/yard/i18n/messages.rb +1 -1
  37. data/lib/yard/i18n/pot_generator.rb +2 -2
  38. data/lib/yard/logging.rb +116 -61
  39. data/lib/yard/open_struct.rb +67 -0
  40. data/lib/yard/options.rb +1 -1
  41. data/lib/yard/parser/rbs/rbs_parser.rb +325 -0
  42. data/lib/yard/parser/rbs/statement.rb +75 -0
  43. data/lib/yard/parser/ruby/ast_node.rb +5 -4
  44. data/lib/yard/parser/ruby/legacy/irb/slex.rb +19 -1
  45. data/lib/yard/parser/ruby/legacy/ruby_lex.rb +20 -5
  46. data/lib/yard/parser/ruby/ruby_parser.rb +117 -26
  47. data/lib/yard/parser/source_parser.rb +7 -7
  48. data/lib/yard/registry_resolver.rb +9 -1
  49. data/lib/yard/rubygems/specification.rb +1 -1
  50. data/lib/yard/server/commands/base.rb +2 -2
  51. data/lib/yard/server/commands/library_command.rb +8 -8
  52. data/lib/yard/server/commands/static_file_helpers.rb +1 -2
  53. data/lib/yard/server/http_utils.rb +512 -0
  54. data/lib/yard/server/library_version.rb +1 -1
  55. data/lib/yard/server/rack_adapter.rb +13 -5
  56. data/lib/yard/server/templates/default/fulldoc/html/css/custom.css +168 -88
  57. data/lib/yard/server/templates/default/fulldoc/html/js/autocomplete.js +203 -12
  58. data/lib/yard/server/templates/default/layout/html/breadcrumb.erb +1 -17
  59. data/lib/yard/server/templates/default/method_details/html/permalink.erb +4 -2
  60. data/lib/yard/server/templates/doc_server/library_list/html/headers.erb +3 -3
  61. data/lib/yard/server/templates/doc_server/library_list/html/library_list.erb +2 -3
  62. data/lib/yard/server/templates/doc_server/processing/html/processing.erb +22 -16
  63. data/lib/yard/tags/default_factory.rb +1 -0
  64. data/lib/yard/tags/directives.rb +7 -1
  65. data/lib/yard/tags/library.rb +3 -3
  66. data/lib/yard/tags/overload_tag.rb +2 -1
  67. data/lib/yard/tags/tag.rb +4 -3
  68. data/lib/yard/tags/types_explainer.rb +6 -5
  69. data/lib/yard/templates/engine.rb +0 -1
  70. data/lib/yard/templates/helpers/base_helper.rb +1 -1
  71. data/lib/yard/templates/helpers/html_helper.rb +21 -6
  72. data/lib/yard/templates/helpers/html_syntax_highlight_helper.rb +6 -1
  73. data/lib/yard/templates/helpers/markup/hybrid_markdown.rb +2147 -0
  74. data/lib/yard/templates/helpers/markup/rdoc_markup.rb +2 -0
  75. data/lib/yard/templates/helpers/markup_helper.rb +4 -2
  76. data/lib/yard/templates/template_options.rb +0 -1
  77. data/lib/yard/version.rb +1 -1
  78. data/po/ja.po +101 -101
  79. data/templates/default/fulldoc/html/css/common.css +1 -1
  80. data/templates/default/fulldoc/html/css/full_list.css +201 -53
  81. data/templates/default/fulldoc/html/css/style.css +991 -399
  82. data/templates/default/fulldoc/html/frames.erb +9 -4
  83. data/templates/default/fulldoc/html/full_list.erb +8 -5
  84. data/templates/default/fulldoc/html/js/app.js +799 -312
  85. data/templates/default/fulldoc/html/js/full_list.js +332 -214
  86. data/templates/default/fulldoc/html/setup.rb +10 -2
  87. data/templates/default/layout/html/headers.erb +1 -1
  88. data/templates/default/layout/html/layout.erb +3 -1
  89. data/templates/default/method/html/header.erb +3 -3
  90. data/templates/default/module/html/defines.erb +3 -3
  91. data/templates/default/module/html/inherited_methods.erb +1 -0
  92. data/templates/default/module/html/method_summary.erb +8 -0
  93. data/templates/default/module/setup.rb +20 -0
  94. data/templates/default/onefile/html/headers.erb +2 -0
  95. data/templates/default/onefile/html/layout.erb +3 -4
  96. data/templates/default/tags/html/example.erb +2 -2
  97. data/templates/default/tags/html/option.erb +1 -1
  98. data/templates/guide/fulldoc/html/css/style.css +347 -97
  99. data/templates/guide/fulldoc/html/js/app.js +61 -33
  100. data/templates/guide/layout/html/layout.erb +69 -72
  101. metadata +21 -60
  102. data/.dockerignore +0 -2
  103. data/.gitattributes +0 -4
  104. data/.github/FUNDING.yml +0 -3
  105. data/.github/ISSUE_TEMPLATE.md +0 -33
  106. data/.github/PULL_REQUEST_TEMPLATE.md +0 -12
  107. data/.github/workflows/ci.yml +0 -30
  108. data/.github/workflows/gem.yml +0 -19
  109. data/.gitignore +0 -14
  110. data/.rspec +0 -2
  111. data/.rubocop.yml +0 -112
  112. data/CODE_OF_CONDUCT.md +0 -15
  113. data/CONTRIBUTING.md +0 -140
  114. data/Dockerfile.samus +0 -28
  115. data/Gemfile +0 -34
  116. data/Rakefile +0 -36
  117. data/SECURITY.md +0 -26
  118. data/benchmarks/builtins_vs_eval.rb +0 -24
  119. data/benchmarks/concat_vs_join.rb +0 -13
  120. data/benchmarks/erb_vs_erubis.rb +0 -54
  121. data/benchmarks/format_args.rb +0 -47
  122. data/benchmarks/generation.rb +0 -38
  123. data/benchmarks/marshal_vs_dbm.rb +0 -64
  124. data/benchmarks/parsing.rb +0 -46
  125. data/benchmarks/pathname_vs_string.rb +0 -51
  126. data/benchmarks/rdoc_vs_yardoc.rb +0 -11
  127. data/benchmarks/registry_store_types.rb +0 -49
  128. data/benchmarks/ri_vs_yri.rb +0 -19
  129. data/benchmarks/ripper_parser.rb +0 -13
  130. data/benchmarks/splat_vs_flatten.rb +0 -13
  131. data/benchmarks/template_erb.rb +0 -23
  132. data/benchmarks/template_format.rb +0 -7
  133. data/benchmarks/template_profile.rb +0 -18
  134. data/benchmarks/yri_cache.rb +0 -20
  135. data/samus.json +0 -49
  136. data/tasks/prepare_tag.rake +0 -45
  137. data/tasks/update_error_map.rake +0 -53
  138. data/yard.gemspec +0 -25
@@ -0,0 +1,2147 @@
1
+ # frozen_string_literal: true
2
+ if RUBY_VERSION < '3.5'
3
+ require 'cgi/util'
4
+ else
5
+ require 'cgi/escape'
6
+ end
7
+
8
+ module YARD
9
+ module Templates
10
+ module Helpers
11
+ module Markup
12
+ # A built-in formatter that implements a practical subset of GitHub
13
+ # flavored Markdown plus common RDoc markup forms.
14
+ class HybridMarkdown
15
+ attr_accessor :from_path
16
+
17
+ NAMED_ENTITIES = {
18
+ 'nbsp' => [0x00A0].pack('U'),
19
+ 'copy' => [0x00A9].pack('U'),
20
+ 'AElig' => [0x00C6].pack('U'),
21
+ 'Dcaron' => [0x010E].pack('U'),
22
+ 'frac34' => [0x00BE].pack('U'),
23
+ 'HilbertSpace' => [0x210B].pack('U'),
24
+ 'DifferentialD' => [0x2146].pack('U'),
25
+ 'ClockwiseContourIntegral' => [0x2232].pack('U'),
26
+ 'ngE' => [0x2267, 0x0338].pack('U*'),
27
+ 'ouml' => [0x00F6].pack('U'),
28
+ 'quot' => '"',
29
+ 'amp' => '&'
30
+ }.freeze
31
+
32
+ ATX_HEADING_RE = /^\s{0,3}#{Regexp.escape('#')}{1,6}(?=[ \t]|$)/.freeze
33
+ RDOC_HEADING_RE = /^\s*(=+)[ \t]+(.+?)\s*$/.freeze
34
+ SETEXT_HEADING_RE = /^\s{0,3}(=+|-+)\s*$/.freeze
35
+ FENCE_RE = /^(\s{0,3})(`{3,}|~{3,})([^\n]*)$/.freeze
36
+ THEMATIC_BREAK_RE = /^\s{0,3}(?:(?:-\s*){3,}|(?:\*\s*){3,}|(?:_\s*){3,})\s*$/.freeze
37
+ TABLE_SEPARATOR_RE = /^\s*\|?(?:\s*:?-+:?\s*\|)+(?:\s*:?-+:?\s*)\|?\s*$/.freeze
38
+ UNORDERED_LIST_RE = /^\s{0,3}([*+-])[ \t]+(.+?)\s*$/.freeze
39
+ ORDERED_LIST_RE = /^\s{0,3}(\d+)([.)])[ \t]+(.+?)\s*$/.freeze
40
+ RDOC_ORDERED_LIST_RE = /^\s{0,3}([A-Za-z])\.[ \t]+(.+?)\s*$/.freeze
41
+ LABEL_LIST_BRACKET_RE = /^\s*\[([^\]]+)\](?:[ \t]+(.+))?\s*$/.freeze
42
+ LABEL_LIST_COLON_RE = /^\s*([^\s:][^:]*)::(?:[ \t]+(.*))?\s*$/.freeze
43
+ BLOCKQUOTE_RE = /^\s{0,3}>\s?(.*)$/.freeze
44
+ HTML_BLOCK_RE = %r{
45
+ ^\s*(?:
46
+ <!--|
47
+ <\?|
48
+ <![A-Z]|
49
+ <!\[CDATA\[|
50
+ </?(?:address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset|h[1-6]|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|nav|noframes|ol|optgroup|option|p|param|search|section|summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)\b|
51
+ <(?:script|pre|style|textarea)\b|
52
+ </(?:script|pre|style|textarea)\b|
53
+ </?[A-Za-z][A-Za-z0-9-]*(?:\s+[A-Za-z_:][\w:.-]*(?:\s*=\s*(?:"[^"]*"|'[^']*'|[^\s"'=<>`]+))?)*\s*/?>\s*$
54
+ )
55
+ }mx.freeze
56
+ HTML_BLOCK_TAGS = %w[
57
+ address article aside base basefont blockquote body caption center col
58
+ colgroup dd details dialog dir div dl dt fieldset figcaption figure
59
+ footer form frame frameset h1 h2 h3 h4 h5 h6 head header hr html iframe
60
+ legend li link main menu menuitem nav noframes ol optgroup option p param
61
+ search section summary table tbody td tfoot th thead title tr track ul
62
+ ].freeze
63
+ HTML_TAG_RE = %r{
64
+ <!--(?:>|->)|
65
+ <!--(?:.*?)-->|
66
+ <\?.*?\?>|
67
+ <![A-Z][^>]*>|
68
+ <!\[CDATA\[.*?\]\]>|
69
+ </[A-Za-z][A-Za-z0-9-]*\s*>|
70
+ <[A-Za-z][A-Za-z0-9-]*
71
+ (?:\s+[A-Za-z_:][\w:.-]*
72
+ (?:\s*=\s*(?:"[^"]*"|'[^']*'|[^\s"'=<>`]+))?
73
+ )*
74
+ \s*/?>
75
+ }mx.freeze
76
+ ENTITY_RE = /&(?:[A-Za-z][A-Za-z0-9]+|#\d+|#[xX][0-9A-Fa-f]+);/.freeze
77
+ YARD_LINK_RE = /(?<!\\)\{(?!\})(\S+?)(?:\s([^\}]*?\S))?\}(?=\W|.+<\/|$)/m.freeze
78
+ CODE_LANG_RE = /\A(?:[ \t]*\n)?[ \t]*!!!([\w.+-]+)[ \t]*\n/.freeze
79
+ REFERENCE_DEF_START_RE = /^\s{0,3}\[([^\]]+)\]:\s*(.*)$/.freeze
80
+ PLACEHOLDER_RE = /\0(\d+)\0/.freeze
81
+ ESCAPABLE_CHARS_RE = /\\([!"#$%&'()*+,\-.\/:;<=>?@\[\\\]^_`{|}~])/.freeze
82
+ RDOC_ESCAPED_CAPITALIZED_CROSSREF_RE = /\\((?:::)?(?:[A-Z]\w+|[A-Z]\w*::\w+)(?:::\w+)*)/.freeze
83
+ AUTOLINK_RE = /<([A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\s]*|[A-Za-z0-9.!#$%&'*+\/=?^_`{|}~-]+@[A-Za-z0-9](?:[A-Za-z0-9-]*[A-Za-z0-9])?(?:\.[A-Za-z0-9](?:[A-Za-z0-9-]*[A-Za-z0-9])?)+)>/.freeze
84
+ TAB_WIDTH = 4
85
+
86
+ # @param text [String] the Markdown text to format.
87
+ # @param options [Hash] options for the formatter.
88
+ # @option options [Boolean] :heading_ids whether to generate id attributes for headings.
89
+ def initialize(text, options = {})
90
+ @heading_ids = options.fetch(:heading_ids, true)
91
+ @references = {}
92
+ @text = extract_reference_definitions(text.to_s.gsub(/\r\n?/, "\n"))
93
+ end
94
+
95
+ # @return [String] the formatted HTML.
96
+ def to_html
97
+ parse_blocks(split_lines(@text), 0).join("\n")
98
+ end
99
+
100
+ private
101
+
102
+ def parse_blocks(lines, index)
103
+ blocks = []
104
+ previous_block_type = nil
105
+
106
+ while index < lines.length
107
+ line = lines[index]
108
+
109
+ if blank_line?(line)
110
+ index += 1
111
+ elsif yard_indented_code_start?(lines, index)
112
+ block, index = parse_yard_indented_code(lines, index)
113
+ blocks << block
114
+ previous_block_type = :code
115
+ elsif indented_code_block_start?(lines, index, previous_block_type)
116
+ block, index = parse_indented_code(lines, index)
117
+ blocks << block
118
+ previous_block_type = :code
119
+ elsif thematic_break?(line)
120
+ blocks << '<hr />'
121
+ index += 1
122
+ previous_block_type = :hr
123
+ elsif (heading = parse_setext_heading(lines, index))
124
+ blocks << heading[0]
125
+ index = heading[1]
126
+ previous_block_type = :heading
127
+ elsif (heading = parse_heading(line))
128
+ blocks << heading
129
+ index += 1
130
+ previous_block_type = :heading
131
+ elsif fenced_code_start?(line)
132
+ block, index = parse_fenced_code(lines, index)
133
+ blocks << block
134
+ previous_block_type = :code
135
+ elsif table_start?(lines, index)
136
+ block, index = parse_table(lines, index)
137
+ blocks << block
138
+ previous_block_type = :table
139
+ elsif labeled_list_start?(lines, index)
140
+ block, index = parse_labeled_list(lines, index)
141
+ blocks << block
142
+ previous_block_type = :list
143
+ elsif blockquote_start?(line)
144
+ block, index = parse_blockquote(lines, index)
145
+ blocks << block
146
+ previous_block_type = :blockquote
147
+ elsif list_start?(line)
148
+ block, index = parse_list(lines, index)
149
+ blocks << block
150
+ previous_block_type = :list
151
+ elsif html_block_start?(line)
152
+ block, index = parse_html_block(lines, index)
153
+ blocks << block
154
+ previous_block_type = :html
155
+ else
156
+ block, index = parse_paragraph(lines, index)
157
+ blocks << block unless block.empty?
158
+ previous_block_type = :paragraph unless block.empty?
159
+ end
160
+ end
161
+
162
+ blocks
163
+ end
164
+
165
+ def parse_heading(line)
166
+ if (heading = parse_atx_heading(line))
167
+ return heading
168
+ end
169
+
170
+ match = RDOC_HEADING_RE.match(line)
171
+ return unless match
172
+
173
+ heading_marks = match[1]
174
+ heading_text = match[2].strip
175
+ return nil if heading_text =~ /\A[=\-]+\z/
176
+
177
+ level = [heading_marks.length, 6].min
178
+ "<h#{level}#{heading_id(heading_text)}>#{format_inline(heading_text)}</h#{level}>"
179
+ end
180
+
181
+ def parse_setext_heading(lines, index)
182
+ return nil if index + 1 >= lines.length
183
+ return nil if lines[index].strip.empty?
184
+ return nil if lines[index] =~ /^\s{0,3}>/
185
+ return nil if parse_list_marker(lines[index])
186
+ return nil if lines[index] =~ /^(?: {4,}|\t)/
187
+ return nil if parse_heading(lines[index])
188
+ return nil if fenced_code_start?(lines[index])
189
+
190
+ content_lines = []
191
+ current_index = index
192
+
193
+ while current_index < lines.length
194
+ line = lines[current_index]
195
+ return nil if blank_line?(line)
196
+
197
+ if line =~ SETEXT_HEADING_RE
198
+ return nil if content_lines.empty?
199
+
200
+ level = $1.start_with?('=') ? 1 : 2
201
+ text = content_lines.join("\n")
202
+ return ["<h#{level}#{heading_id(text)}>#{format_inline(text)}</h#{level}>", current_index + 1]
203
+ end
204
+
205
+ if current_index > index && block_boundary?(line)
206
+ return nil
207
+ end
208
+
209
+ content_lines << normalize_heading_line(line)
210
+ current_index += 1
211
+ end
212
+
213
+ nil
214
+ end
215
+
216
+ def parse_fenced_code(lines, index)
217
+ opener = parse_fence_opener(lines[index])
218
+ fence_char = opener[:char]
219
+ fence_length = opener[:length]
220
+ indent = opener[:indent]
221
+ lang = opener[:lang]
222
+ index += 1
223
+ body = []
224
+
225
+ while index < lines.length
226
+ break if fence_closer?(lines[index], fence_char, fence_length)
227
+
228
+ body << strip_fenced_indent(lines[index], indent)
229
+ index += 1
230
+ end
231
+
232
+ index += 1 if index < lines.length
233
+ [code_block(body.join, lang), index]
234
+ end
235
+
236
+ def parse_indented_code(lines, index)
237
+ body = []
238
+ previous_blank = false
239
+
240
+ while index < lines.length
241
+ line = lines[index]
242
+ break if previous_blank && html_block_start?(line)
243
+ break unless blank_line?(line) || indented_code_start?(line)
244
+ body << line
245
+ previous_blank = blank_line?(line)
246
+ index += 1
247
+ end
248
+
249
+ body.pop while body.any? && blank_line?(body.last)
250
+ [code_block(unindent_indented_code(body)), index]
251
+ end
252
+
253
+ def parse_yard_indented_code(lines, index)
254
+ body = []
255
+
256
+ while index < lines.length
257
+ line = lines[index]
258
+ break unless blank_line?(line) || indented_code_start?(line)
259
+ body << line
260
+ index += 1
261
+ end
262
+
263
+ body.pop while body.any? && blank_line?(body.last)
264
+ [code_block(unindent(body)), index]
265
+ end
266
+
267
+ def parse_table(lines, index)
268
+ header = split_table_row(lines[index])
269
+ alignments = split_table_row(lines[index + 1]).map { |cell| table_alignment(cell) }
270
+ rows = []
271
+ index += 2
272
+
273
+ while index < lines.length && table_row?(lines[index])
274
+ rows << split_table_row(lines[index])
275
+ index += 1
276
+ end
277
+
278
+ html = "<table>\n<thead>\n<tr>\n".dup
279
+ header.each_with_index do |cell, i|
280
+ attrs = alignments[i] ? %( align="#{alignments[i]}") : ""
281
+ html << "<th#{attrs}>#{format_inline(cell)}</th>\n"
282
+ end
283
+ html << "</tr>\n</thead>\n<tbody>\n"
284
+ rows.each do |row|
285
+ html << "<tr>\n"
286
+ row.each_with_index do |cell, i|
287
+ attrs = alignments[i] ? %( align="#{alignments[i]}") : ""
288
+ html << "<td#{attrs}>#{format_inline(cell)}</td>\n"
289
+ end
290
+ html << "</tr>\n"
291
+ end
292
+ html << "</tbody>\n</table>"
293
+ [html, index]
294
+ end
295
+
296
+ def parse_list(lines, index)
297
+ marker = parse_list_marker(lines[index])
298
+ ordered = marker[:ordered]
299
+ tag = ordered ? 'ol' : 'ul'
300
+ start_attr = ordered && marker[:start] != 1 ? %( start="#{marker[:start]}") : ''
301
+ items = []
302
+ tight = true
303
+ list_indent = marker[:indent]
304
+
305
+ while index < lines.length
306
+ break if items.any? && thematic_break?(lines[index]) && leading_columns(lines[index]) <= list_indent + 3
307
+
308
+ item_marker = parse_list_marker(lines[index])
309
+ break unless item_marker && same_list_type?(marker, item_marker)
310
+
311
+ effective_padding = list_item_padding(item_marker)
312
+ content_indent = item_marker[:indent] + item_marker[:marker_length] + effective_padding
313
+ lazy_indent = item_marker[:indent] + effective_padding
314
+ item_lines = []
315
+ first_line = item_marker[:content]
316
+ unless first_line.empty?
317
+ leading = [item_marker[:padding] - effective_padding, 0].max
318
+ item_lines << "#{' ' * leading}#{first_line}\n"
319
+ end
320
+ index += 1
321
+ blank_seen = false
322
+ item_loose = false
323
+
324
+ while index < lines.length
325
+ line = lines[index]
326
+ break if thematic_break?(line) && !indented_to?(line, content_indent)
327
+ break if setext_underline_line?(line) && !indented_to?(line, content_indent)
328
+
329
+ next_marker = parse_list_marker(line)
330
+ if next_marker && same_list_type?(marker, next_marker) &&
331
+ (next_marker[:indent] == item_marker[:indent] || (blank_seen && next_marker[:indent] <= list_indent + 3))
332
+ if blank_seen
333
+ tight = false
334
+ end
335
+ break
336
+ end
337
+ break if next_marker && next_marker[:indent] < content_indent
338
+ break if !blank_seen && !indented_to?(line, content_indent) && block_boundary?(line)
339
+
340
+ if blank_line?(line)
341
+ item_lines << "\n"
342
+ blank_seen = true
343
+ elsif blank_seen && indented_to?(line, content_indent)
344
+ break if first_line.empty? && item_lines.all? { |item_line| item_line == "\n" } &&
345
+ leading_columns(line) == content_indent
346
+ item_loose = true if loose_list_item_continuation?(item_lines)
347
+ stripped = strip_list_item_indent(line, content_indent)
348
+ item_lines << stripped
349
+ blank_seen = false
350
+ elsif !blank_seen && indented_to?(line, content_indent)
351
+ stripped = strip_list_item_indent(line, content_indent)
352
+ item_lines << stripped
353
+ blank_seen = false
354
+ elsif !blank_seen
355
+ stripped = strip_list_item_indent(line, lazy_indent)
356
+ stripped = escape_list_marker_text(stripped) if parse_list_marker(stripped)
357
+ item_lines << stripped
358
+ blank_seen = false
359
+ else
360
+ break
361
+ end
362
+
363
+ index += 1
364
+ end
365
+
366
+ item_blocks = parse_blocks(item_lines, 0)
367
+ item_html = item_blocks.join("\n")
368
+ item_html = format_inline(first_line) if item_html.empty? && !first_line.empty?
369
+
370
+ simple_item = !item_loose &&
371
+ item_blocks.length == 1 &&
372
+ item_html =~ /\A<p>(.*?)<\/p>\z/m &&
373
+ item_html !~ /<(?:pre|blockquote|ul|ol|dl|table|h\d|hr)/m
374
+
375
+ if item_html.empty?
376
+ item_html = ''
377
+ else
378
+ item_loose ||= item_blocks.count { |block| block.start_with?('<p>') } > 1
379
+ end
380
+
381
+ tight &&= !item_loose
382
+ items << {:html => item_html, :simple => simple_item}
383
+ end
384
+
385
+ items.map! do |item|
386
+ item_html = item[:html]
387
+ item_html = "<p>#{item_html}</p>" if !tight && !item_html.empty? && item_html !~ /\A</m
388
+ item_html = item_html.sub(/\A<p>(.*?)<\/p>(?=\n<(?:ul|ol|blockquote|pre|h\d|table|hr))/m, '\1') if tight
389
+ item_html = item_html.sub(/\n<p>(.*?)<\/p>\z/m, "\n\\1") if tight
390
+ item_html = item_html.sub(/\A<p>(.*?)<\/p>\z/m, '\1') if item[:simple] && tight
391
+
392
+ if item_html.empty?
393
+ '<li></li>'
394
+ elsif item[:simple] && tight
395
+ "<li>#{item_html}</li>"
396
+ elsif item_html !~ /\A</m
397
+ suffix = item_html.include?("\n") ? "\n" : ''
398
+ "<li>#{item_html}#{suffix}</li>"
399
+ else
400
+ suffix = item_html =~ /(?:<\/(?:p|pre|blockquote|ul|ol|dl|table|h\d)>|<hr \/>|<[A-Za-z][A-Za-z0-9-]*>)\z/m ? "\n" : ''
401
+ "<li>\n#{item_html}#{suffix}</li>"
402
+ end
403
+ end
404
+
405
+ ["<#{tag}#{start_attr}>\n#{items.join("\n")}\n</#{tag}>", index]
406
+ end
407
+
408
+ def parse_labeled_list(lines, index)
409
+ items = []
410
+
411
+ while index < lines.length
412
+ label, body = parse_labeled_list_line(lines[index])
413
+ break unless label
414
+
415
+ index += 1
416
+ body_lines = []
417
+ body_lines << body if body && !body.empty?
418
+
419
+ while index < lines.length
420
+ line = lines[index]
421
+ break if blank_line?(line)
422
+ break if parse_labeled_list_line(line)
423
+ break if !line.strip.empty? && !line.match(/^(?: {2,}|\t)/)
424
+
425
+ body_lines << line.sub(/^(?: {2,}|\t)/, '').chomp
426
+ index += 1
427
+ end
428
+
429
+ body_html =
430
+ if body_lines.empty?
431
+ ''
432
+ else
433
+ parse_blocks(body_lines.map { |l| "#{l}\n" }, 0).join("\n")
434
+ end
435
+
436
+ items << "<dt>#{format_inline(label)}</dt>\n<dd>#{body_html}</dd>"
437
+ index += 1 while index < lines.length && blank_line?(lines[index])
438
+ end
439
+
440
+ ["<dl>\n#{items.join("\n")}\n</dl>", index]
441
+ end
442
+
443
+ def parse_blockquote(lines, index)
444
+ quoted_lines = []
445
+ saw_quote = false
446
+ previous_blank = false
447
+
448
+ while index < lines.length
449
+ line = lines[index]
450
+ break if saw_quote && quoted_lines.last == "\n" && !blockquote_start?(line)
451
+ break if saw_quote && blank_line?(line) && blockquote_open_fence?(quoted_lines)
452
+ break if saw_quote && previous_blank
453
+ break if saw_quote && !blank_line?(line) && !blockquote_start?(line) &&
454
+ !lazy_blockquote_continuation?(quoted_lines, line)
455
+ break unless blank_line?(line) || blockquote_start?(line) || saw_quote
456
+
457
+ if blank_line?(line)
458
+ quoted_lines << "\n"
459
+ previous_blank = true
460
+ elsif (stripped = strip_blockquote_marker(line))
461
+ quoted_lines << stripped
462
+ saw_quote = true
463
+ previous_blank = false
464
+ else
465
+ if setext_underline_line?(line)
466
+ quoted_lines << " #{line.lstrip}"
467
+ else
468
+ quoted_lines << line
469
+ end
470
+ previous_blank = false
471
+ end
472
+ index += 1
473
+ end
474
+
475
+ inner_html = parse_blocks(quoted_lines, 0).join("\n")
476
+ [inner_html.empty? ? "<blockquote>\n</blockquote>" : "<blockquote>\n#{inner_html}\n</blockquote>", index]
477
+ end
478
+
479
+ def parse_html_block(lines, index)
480
+ html = []
481
+ type = html_block_type(lines[index])
482
+ return ['', index] unless type
483
+
484
+ while index < lines.length
485
+ line = lines[index]
486
+ break if html.any? && [6, 7].include?(type) && html_block_end?(type, line)
487
+ break unless html.any? || html_block_type(line)
488
+
489
+ html << line.chomp
490
+ if html_block_end?(type, line)
491
+ index += 1
492
+ break
493
+ end
494
+ index += 1
495
+ end
496
+
497
+ [html.join("\n"), index]
498
+ end
499
+
500
+ def parse_paragraph(lines, index)
501
+ buffer = []
502
+
503
+ while index < lines.length
504
+ line = lines[index]
505
+ break if blank_line?(line)
506
+ break if !buffer.empty? && colon_indented_code_block_start?(lines, index)
507
+ break if thematic_break?(line)
508
+ break if parse_setext_heading(lines, index)
509
+ break if parse_heading(line)
510
+ break if fenced_code_start?(line)
511
+ break if table_start?(lines, index)
512
+ break if labeled_list_start?(lines, index)
513
+ break if blockquote_start?(line)
514
+ break if list_start?(line, true)
515
+ break if html_block_start?(line, true)
516
+
517
+ buffer << line.chomp
518
+ index += 1
519
+ end
520
+
521
+ text = buffer.map { |line| normalize_paragraph_line(line) }.join("\n").strip
522
+ [text.empty? ? '' : "<p>#{format_inline(text)}</p>", index]
523
+ end
524
+
525
+ def format_inline(text)
526
+ placeholders = []
527
+ text = protect_yard_links(text, placeholders)
528
+ text = protect_raw_html(text, placeholders)
529
+ text = protect_code_spans(text, placeholders)
530
+ text = protect_autolinks(text, placeholders)
531
+ text = protect_hard_breaks(text, placeholders)
532
+ text = protect_rdoc_images(text, placeholders)
533
+ text = protect_inline_images(text, placeholders)
534
+ text = protect_inline_links(text, placeholders)
535
+ text = protect_braced_text_links(text, placeholders)
536
+ text = protect_single_word_text_links(text, placeholders)
537
+ text = protect_reference_images(text, placeholders)
538
+ text = protect_reference_links(text, placeholders)
539
+ text = protect_escaped_characters(text, placeholders)
540
+ text = protect_entities(text, placeholders)
541
+ text = text.gsub(/[ \t]+\n/, "\n")
542
+ text = h(text)
543
+ text = format_emphasis(text)
544
+ text = format_strikethrough(text)
545
+ restore_placeholders(autolink_urls(text), placeholders)
546
+ end
547
+
548
+ def protect_code_spans(text, placeholders)
549
+ output = String.new
550
+ index = 0
551
+
552
+ while index < text.length
553
+ if text[index, 1] == '`' && (index.zero? || text[index - 1, 1] != '\\') && !inside_angle_autolink_candidate?(text, index)
554
+ opener_length = 1
555
+ opener_length += 1 while index + opener_length < text.length && text[index + opener_length, 1] == '`'
556
+ closer_index = find_matching_backtick_run(text, index + opener_length, opener_length)
557
+ if closer_index
558
+ code = normalize_code_span(restore_placeholders(text[(index + opener_length)...closer_index], placeholders))
559
+ output << store_placeholder(placeholders, "<code>#{h(code)}</code>")
560
+ index = closer_index + opener_length
561
+ next
562
+ end
563
+
564
+ output << ('`' * opener_length)
565
+ index += opener_length
566
+ next
567
+ end
568
+
569
+ output << text[index, 1]
570
+ index += 1
571
+ end
572
+
573
+ output.gsub(/(^|[\s>])\+([^\s+\n](?:[^+\n]*?[^\s+\n])?)\+(?=$|[\s<.,;:!?)]|\z)/) do
574
+ prefix = $1
575
+ prefix + store_placeholder(placeholders, "<code>#{h(restore_placeholders($2, placeholders))}</code>")
576
+ end
577
+ end
578
+
579
+ def inside_angle_autolink_candidate?(text, index)
580
+ opening = text.rindex('<', index)
581
+ return false unless opening
582
+
583
+ closing = text.rindex('>', index)
584
+ return false if closing && closing > opening
585
+
586
+ candidate = text[opening...index]
587
+ return false if candidate =~ /\s/
588
+
589
+ candidate =~ /\A<(?:[A-Za-z][A-Za-z0-9.+-]{1,31}:|[A-Za-z0-9.!#$%&'*+\/=?^_`{|}~-]+@)/
590
+ end
591
+
592
+ def protect_yard_links(text, placeholders)
593
+ text.gsub(YARD_LINK_RE) do
594
+ match = Regexp.last_match
595
+ if text[match.end(0), 1] == '['
596
+ match[0]
597
+ else
598
+ store_placeholder(placeholders, match[0])
599
+ end
600
+ end
601
+ end
602
+
603
+ def protect_autolinks(text, placeholders)
604
+ text.gsub(AUTOLINK_RE) do
605
+ href = $1
606
+ link_href = href.include?('@') && href !~ /\A[A-Za-z][A-Za-z0-9.+-]{1,31}:/ ? "mailto:#{href}" : escape_autolink_url(href)
607
+ store_placeholder(placeholders, %(<a href="#{h(link_href)}">#{h(href)}</a>))
608
+ end
609
+ end
610
+
611
+ def protect_raw_html(text, placeholders)
612
+ text.gsub(/(?<!\\)#{HTML_TAG_RE}/m) do
613
+ match = $&
614
+ match_start = Regexp.last_match.begin(0)
615
+ if match_start > 0 && text[match_start - 1, 1] == '`'
616
+ match
617
+ else
618
+ store_placeholder(placeholders, match)
619
+ end
620
+ end
621
+ end
622
+
623
+ def protect_escaped_characters(text, placeholders)
624
+ text = text.gsub(RDOC_ESCAPED_CAPITALIZED_CROSSREF_RE) do
625
+ store_placeholder(placeholders, h($1))
626
+ end
627
+
628
+ text.gsub(ESCAPABLE_CHARS_RE) { store_placeholder(placeholders, h($1)) }
629
+ end
630
+
631
+ def protect_entities(text, placeholders)
632
+ text.gsub(ENTITY_RE) { store_placeholder(placeholders, h(decode_entity($&))) }
633
+ end
634
+
635
+ def protect_hard_breaks(text, placeholders)
636
+ text.gsub(/(?:\\|\s{2,})\n/) { store_placeholder(placeholders, "<br />\n") }
637
+ end
638
+
639
+ def protect_rdoc_images(text, placeholders)
640
+ text.gsub(/(^|[ \t\n])rdoc-image:([A-Za-z][A-Za-z0-9+.-]*:\/\/\S+)(?=$|[ \t\n])/) do
641
+ prefix = Regexp.last_match(1)
642
+ dest = Regexp.last_match(2)
643
+ prefix + store_placeholder(placeholders, image_html('', dest))
644
+ end
645
+ end
646
+
647
+ def protect_inline_images(text, placeholders)
648
+ replace_inline_constructs(text, placeholders, '!') do |label, dest, title|
649
+ store_placeholder(placeholders, image_html(
650
+ restore_placeholders(label, placeholders),
651
+ restore_placeholders(dest, placeholders),
652
+ title && restore_placeholders(title, placeholders)
653
+ ))
654
+ end
655
+ end
656
+
657
+ def protect_inline_links(text, placeholders)
658
+ replace_inline_constructs(text, placeholders, nil) do |label, dest, title|
659
+ store_placeholder(placeholders, link_html(
660
+ restore_placeholders(label, placeholders),
661
+ restore_placeholders(dest, placeholders),
662
+ title && restore_placeholders(title, placeholders)
663
+ ))
664
+ end
665
+ end
666
+
667
+ def protect_reference_images(text, placeholders)
668
+ scan_reference_constructs(text, placeholders, :image)
669
+ end
670
+
671
+ def protect_reference_links(text, placeholders)
672
+ scan_reference_constructs(text, placeholders, :link)
673
+ end
674
+
675
+ def protect_single_word_text_links(text, placeholders)
676
+ output = String.new
677
+ index = 0
678
+ bracket_depth = 0
679
+
680
+ while index < text.length
681
+ char = text[index, 1]
682
+
683
+ if char == '\\' && index + 1 < text.length
684
+ output << text[index, 2]
685
+ index += 2
686
+ next
687
+ elsif char == '['
688
+ bracket_depth += 1
689
+ elsif char == ']' && bracket_depth > 0
690
+ bracket_depth -= 1
691
+ end
692
+
693
+ if bracket_depth.zero? && (match = text[index..-1].match(/\A([A-Za-z0-9]+)(?=\[)/))
694
+ label = match[1]
695
+ dest, consumed = parse_text_link_destination(text, index + label.length)
696
+
697
+ if dest
698
+ output << store_placeholder(placeholders, link_html(label, dest))
699
+ index += label.length + consumed
700
+ next
701
+ end
702
+ end
703
+
704
+ output << char
705
+ index += 1
706
+ end
707
+
708
+ output
709
+ end
710
+
711
+ def protect_braced_text_links(text, placeholders)
712
+ output = String.new
713
+ index = 0
714
+
715
+ while index < text.length
716
+ if text[index, 1] == '\\' && index + 1 < text.length
717
+ output << text[index, 2]
718
+ index += 2
719
+ next
720
+ end
721
+
722
+ if text[index, 1] == '{'
723
+ label_end = find_braced_text_link_label_end(text, index)
724
+ if label_end
725
+ label = text[(index + 1)...label_end]
726
+ dest, consumed = parse_text_link_destination(text, label_end + 1)
727
+
728
+ if dest
729
+ output << store_placeholder(placeholders, link_html(label, dest))
730
+ index = label_end + 1 + consumed
731
+ next
732
+ end
733
+ end
734
+ end
735
+
736
+ output << text[index, 1]
737
+ index += 1
738
+ end
739
+
740
+ output
741
+ end
742
+
743
+ def format_emphasis(text)
744
+ delimiters = []
745
+ output = []
746
+ index = 0
747
+
748
+ while index < text.length
749
+ char = text[index, 1]
750
+ if char == '*' || char == '_'
751
+ run_end = index
752
+ run_end += 1 while run_end < text.length && text[run_end, 1] == char
753
+ run_length = run_end - index
754
+ can_open, can_close = delimiter_flags(text, index, run_end, char)
755
+ token = {
756
+ :char => char,
757
+ :length => run_length,
758
+ :position => output.length,
759
+ :left_consumed => 0,
760
+ :right_consumed => 0,
761
+ :opening_html => String.new,
762
+ :closing_html => String.new,
763
+ :can_open => can_open,
764
+ :can_close => can_close
765
+ }
766
+ output << token
767
+
768
+ if can_close
769
+ delimiter_index = delimiters.length - 1
770
+ while delimiter_index >= 0 && available_delimiter_length(token) > 0
771
+ opener = delimiters[delimiter_index]
772
+ if opener[:char] == char && available_delimiter_length(opener) > 0 &&
773
+ !odd_match_disallowed?(opener, token)
774
+ use = available_delimiter_length(opener) >= 2 &&
775
+ available_delimiter_length(token) >= 2 ? 2 : 1
776
+ opener[:right_consumed] += use
777
+ opener[:opening_html] = (use == 2 ? '<strong>' : '<em>') + opener[:opening_html]
778
+ token[:left_consumed] += use
779
+ token[:closing_html] << (use == 2 ? '</strong>' : '</em>')
780
+ delimiters.reject! do |candidate|
781
+ candidate[:position] > opener[:position] &&
782
+ candidate[:position] < token[:position] &&
783
+ available_delimiter_length(candidate) > 0
784
+ end
785
+ delimiters.delete_at(delimiter_index) if available_delimiter_length(opener).zero?
786
+ delimiter_index = delimiters.length - 1
787
+ else
788
+ delimiter_index -= 1
789
+ end
790
+ end
791
+ end
792
+
793
+ delimiters << token if can_open && available_delimiter_length(token) > 0
794
+ index = run_end
795
+ else
796
+ output << char
797
+ index += 1
798
+ end
799
+ end
800
+
801
+ output.map do |piece|
802
+ next piece if piece.is_a?(String)
803
+
804
+ piece[:closing_html] +
805
+ (piece[:char] * available_delimiter_length(piece)) +
806
+ piece[:opening_html]
807
+ end.join
808
+ end
809
+
810
+ def format_strikethrough(text)
811
+ text.gsub(/~~([^\n~](?:.*?[^\n~])?)~~/, '<del>\1</del>')
812
+ end
813
+
814
+ def autolink_urls(text)
815
+ text.gsub(/(^|[^\w\/{"'=])((?:https?:\/\/|mailto:)[^\s<]+)/) do
816
+ match = Regexp.last_match
817
+ prefix = $1
818
+ before_url = text[0...match.begin(2)]
819
+ if before_url.end_with?('&lt;') || before_url.end_with?('&lt; ')
820
+ match[0]
821
+ else
822
+ url, trailer = strip_trailing_punctuation($2)
823
+ %(#{prefix}<a href="#{h(url)}">#{h(url)}</a>#{h(trailer)})
824
+ end
825
+ end
826
+ end
827
+
828
+ def restore_placeholders(text, placeholders)
829
+ text.gsub(PLACEHOLDER_RE) { placeholders[$1.to_i] }
830
+ end
831
+
832
+ def store_placeholder(placeholders, html)
833
+ placeholders << html
834
+ "\0#{placeholders.length - 1}\0"
835
+ end
836
+
837
+ def parse_labeled_list_line(line)
838
+ return [$1, $2] if line =~ LABEL_LIST_COLON_RE
839
+
840
+ nil
841
+ end
842
+
843
+ def extract_reference_definitions(text)
844
+ lines = split_lines(text)
845
+ kept_lines = []
846
+ index = 0
847
+ in_fenced_code = false
848
+ previous_line = nil
849
+
850
+ while index < lines.length
851
+ line = lines[index]
852
+ if fenced_code_start?(line)
853
+ in_fenced_code = !in_fenced_code
854
+ kept_lines << line
855
+ index += 1
856
+ previous_line = line
857
+ next
858
+ end
859
+
860
+ if in_fenced_code
861
+ kept_lines << line
862
+ index += 1
863
+ previous_line = line
864
+ next
865
+ end
866
+
867
+ parsed = parse_reference_definition_block(lines, index, previous_line)
868
+ if parsed
869
+ normalized = normalize_reference_label(parsed[:label])
870
+ @references[normalized] ||= parsed[:reference] unless normalized.empty?
871
+ kept_lines.concat(parsed[:replacement_lines])
872
+ index = parsed[:next_index]
873
+ previous_line = kept_lines.last
874
+ next
875
+ end
876
+
877
+ kept_lines << line
878
+ index += 1
879
+ previous_line = line
880
+ end
881
+
882
+ kept_lines.join
883
+ end
884
+
885
+ def normalize_reference_label(label)
886
+ normalized = label.to_s.gsub(/\\([\[\]])/, '\1').gsub(/\s+/, ' ').strip
887
+ unicode_casefold_compat(normalized)
888
+ end
889
+
890
+ def reference_link_html(label, ref)
891
+ reference = @references[normalize_reference_label(ref)]
892
+ return nil unless reference
893
+
894
+ attrs = %( href="#{h(reference[:url])}")
895
+ attrs += %( title="#{h(reference[:title])}") if reference[:title]
896
+ %(<a#{attrs}>#{format_inline(unescape_markdown_punctuation(label))}</a>)
897
+ end
898
+
899
+ def reference_image_html(alt, ref)
900
+ reference = @references[normalize_reference_label(ref)]
901
+ return nil unless reference
902
+
903
+ attrs = %( src="#{h(reference[:url])}" alt="#{h(plain_text(alt))}")
904
+ attrs += %( title="#{h(reference[:title])}") if reference[:title]
905
+ "<img#{attrs} />"
906
+ end
907
+
908
+ def blank_line?(line)
909
+ line.strip.empty?
910
+ end
911
+
912
+ def thematic_break?(line)
913
+ line =~ THEMATIC_BREAK_RE
914
+ end
915
+
916
+ def setext_underline_line?(line)
917
+ line =~ SETEXT_HEADING_RE
918
+ end
919
+
920
+ def fenced_code_start?(line)
921
+ !!parse_fence_opener(line)
922
+ end
923
+
924
+ def indented_code_start?(line)
925
+ leading_columns(line) >= 2
926
+ end
927
+
928
+ def indented_code_block_start?(lines, index, previous_block_type = nil)
929
+ return false unless indented_code_start?(lines[index])
930
+ return true if leading_columns(lines[index]) >= 4
931
+ return true if colon_indented_code_block_start?(lines, index)
932
+ return false if previous_block_type == :list
933
+ return false if html_block_start?(lines[index])
934
+ return false if parse_setext_heading(lines, index)
935
+
936
+ !index.zero? && blank_line?(lines[index - 1])
937
+ end
938
+
939
+ def colon_indented_code_block_start?(lines, index)
940
+ return false if index.zero?
941
+ return false unless leading_columns(lines[index]) >= 2
942
+ return false if leading_columns(lines[index]) >= 4
943
+
944
+ previous_line = lines[index - 1]
945
+ return false if blank_line?(previous_line)
946
+
947
+ previous_line.rstrip.end_with?(':')
948
+ end
949
+
950
+ def yard_indented_code_start?(lines, index)
951
+ return false unless leading_columns(lines[index]) >= 2
952
+ return false unless consume_columns(lines[index], 2) =~ /^!!!([\w.+-]+)[ \t]*$/
953
+ return false if index + 1 >= lines.length
954
+
955
+ indented_code_block_start?(lines, index) && indented_code_start?(lines[index + 1])
956
+ end
957
+
958
+ def list_start?(line, interrupt_paragraph = false)
959
+ return false unless (marker = parse_list_marker(line))
960
+ return true unless interrupt_paragraph
961
+
962
+ return false if marker[:content].empty?
963
+
964
+ !marker[:ordered] || marker[:start] == 1
965
+ end
966
+
967
+ def labeled_list_start?(lines, index)
968
+ line = lines[index]
969
+ return true if line =~ LABEL_LIST_COLON_RE
970
+ false
971
+ end
972
+
973
+ def blockquote_start?(line)
974
+ !strip_blockquote_marker(line).nil?
975
+ end
976
+
977
+ def html_block_start?(line, interrupt_paragraph = false)
978
+ !html_block_type(line, interrupt_paragraph).nil?
979
+ end
980
+
981
+ def table_start?(lines, index)
982
+ return false if index + 1 >= lines.length
983
+ table_row?(lines[index]) && lines[index + 1] =~ TABLE_SEPARATOR_RE
984
+ end
985
+
986
+ def table_row?(line)
987
+ stripped = line.strip
988
+ stripped.include?('|') && stripped !~ /\A[|:\-\s]+\z/
989
+ end
990
+
991
+ def split_table_row(line)
992
+ line.strip.sub(/\A\|/, '').sub(/\|\z/, '').split('|').map(&:strip)
993
+ end
994
+
995
+ def table_alignment(cell)
996
+ stripped = cell.strip
997
+ return 'center' if stripped.start_with?(':') && stripped.end_with?(':')
998
+ return 'left' if stripped.start_with?(':')
999
+ return 'right' if stripped.end_with?(':')
1000
+
1001
+ nil
1002
+ end
1003
+
1004
+ def unindent(lines)
1005
+ indent = lines.reject { |line| blank_line?(line) }.map do |line|
1006
+ leading_columns(line)
1007
+ end.min || 4
1008
+
1009
+ lines.map { |line| consume_columns(line, indent) }.join
1010
+ end
1011
+
1012
+ def unindent_indented_code(lines)
1013
+ lines.map { |line| consume_columns(line, 4) }.join
1014
+ end
1015
+
1016
+ def code_block(text, lang = nil)
1017
+ lang, text = extract_codeblock_language(text, lang)
1018
+ attrs = lang ? %( class="#{h(lang)}") : ''
1019
+ "<pre><code#{attrs}>#{h(text)}</code></pre>"
1020
+ end
1021
+
1022
+ def extract_codeblock_language(text, lang = nil)
1023
+ return [lang, text] unless text =~ CODE_LANG_RE
1024
+
1025
+ lang ||= unescape_markdown_punctuation(decode_entities($1))
1026
+ [lang, $']
1027
+ end
1028
+
1029
+ def strip_trailing_punctuation(url)
1030
+ trailer = ''
1031
+ while url =~ /[),.;:!?]\z/
1032
+ trailer = url[-1, 1] + trailer
1033
+ url = url[0...-1]
1034
+ end
1035
+ [url, trailer]
1036
+ end
1037
+
1038
+ def heading_id(text)
1039
+ return '' unless @heading_ids
1040
+
1041
+ " id=\"#{text.gsub(/\W/, '_')}\""
1042
+ end
1043
+
1044
+ def parse_atx_heading(line)
1045
+ stripped = line.chomp.sub(/^\s{0,3}/, '')
1046
+ match = stripped.match(/\A(#{'#' * 6}|#{'#' * 5}|#{'#' * 4}|#{'#' * 3}|#{'#' * 2}|#)(?=[ \t]|$)(.*)\z/)
1047
+ return nil unless match
1048
+
1049
+ level = match[1].length
1050
+ content = match[2]
1051
+ content = content.sub(/\A[ \t]+/, '')
1052
+ content = content.sub(/[ \t]+#+[ \t]*\z/, '')
1053
+ content = '' if content =~ /\A#+\z/
1054
+ content = content.rstrip
1055
+ "<h#{level}#{heading_id(content)}>#{format_inline(content)}</h#{level}>"
1056
+ end
1057
+
1058
+ def parse_fence_opener(line)
1059
+ match = line.match(FENCE_RE)
1060
+ return nil unless match
1061
+
1062
+ indent = match[1].length
1063
+ fence = match[2]
1064
+ info = match[3].to_s.strip
1065
+ return nil if fence.start_with?('`') && info.include?('`')
1066
+
1067
+ lang = info.empty? ? nil : unescape_markdown_punctuation(decode_entities(info.split(/[ \t]/, 2).first))
1068
+ {:char => fence[0, 1], :length => fence.length, :indent => indent, :lang => lang}
1069
+ end
1070
+
1071
+ def fence_closer?(line, char, min_length)
1072
+ stripped = line.sub(/^\s{0,3}/, '')
1073
+ return false unless stripped.start_with?(char)
1074
+
1075
+ run = stripped[/\A#{Regexp.escape(char)}+/]
1076
+ run && run.length >= min_length && stripped.sub(/\A#{Regexp.escape(run)}/, '').strip.empty?
1077
+ end
1078
+
1079
+ def strip_fenced_indent(line, indent)
1080
+ return line.sub(/^\t/, '') if line.start_with?("\t")
1081
+
1082
+ line.sub(/\A {0,#{indent}}/, '')
1083
+ end
1084
+
1085
+ def parse_list_marker(line)
1086
+ source = line.to_s.sub(/\n\z/, '')
1087
+ indent, index = scan_leading_columns(source)
1088
+ return nil if indent > 3
1089
+ return nil if index >= source.length
1090
+
1091
+ char = source[index, 1]
1092
+ current_column = indent
1093
+
1094
+ if '*+-'.include?(char)
1095
+ marker_length = 1
1096
+ marker_end = index + 1
1097
+ current_column += 1
1098
+ padding, marker_end = scan_padding_columns(source, marker_end, current_column)
1099
+ content = source[marker_end..-1].to_s
1100
+ return nil if padding.zero? && !content.empty?
1101
+
1102
+ return {:ordered => false, :bullet => char, :indent => indent,
1103
+ :marker_length => marker_length, :padding => padding, :content => content}
1104
+ end
1105
+
1106
+ number = source[index..-1][/^\d{1,9}/]
1107
+ if number
1108
+ marker_end = index + number.length
1109
+ delimiter = source[marker_end, 1]
1110
+ if delimiter == '.' || delimiter == ')'
1111
+ marker_length = number.length + 1
1112
+ current_column += marker_length
1113
+ marker_end += 1
1114
+ padding, marker_end = scan_padding_columns(source, marker_end, current_column)
1115
+ content = source[marker_end..-1].to_s
1116
+ return nil if padding.zero? && !content.empty?
1117
+
1118
+ return {:ordered => true, :delimiter => delimiter, :start => number.to_i,
1119
+ :indent => indent, :marker_length => marker_length,
1120
+ :padding => padding, :content => content}
1121
+ end
1122
+ end
1123
+
1124
+ if source[index, 2] =~ /\A[A-Za-z]\.\z/
1125
+ marker_length = 2
1126
+ marker_end = index + marker_length
1127
+ current_column += marker_length
1128
+ padding, marker_end = scan_padding_columns(source, marker_end, current_column)
1129
+ content = source[marker_end..-1].to_s
1130
+ return nil if padding.zero? && !content.empty?
1131
+
1132
+ return {:ordered => true, :delimiter => '.', :start => 1,
1133
+ :indent => indent, :marker_length => marker_length,
1134
+ :padding => padding, :content => content}
1135
+ end
1136
+
1137
+ nil
1138
+ end
1139
+
1140
+ def list_item_padding(marker)
1141
+ (1..4).include?(marker[:padding]) ? marker[:padding] : 1
1142
+ end
1143
+
1144
+ def same_list_type?(base, other)
1145
+ return false unless other
1146
+ return base[:bullet] == other[:bullet] if !base[:ordered] && !other[:ordered]
1147
+
1148
+ base[:ordered] && other[:ordered] && base[:delimiter] == other[:delimiter]
1149
+ end
1150
+
1151
+ def block_boundary?(line)
1152
+ thematic_break?(line) || parse_heading(line) || fenced_code_start?(line) ||
1153
+ table_row?(line) || labeled_list_start?([line, ''], 0) || blockquote_start?(line) ||
1154
+ html_block_start?(line) || parse_list_marker(line)
1155
+ end
1156
+
1157
+ def parse_reference_definition(label, definition)
1158
+ definition = definition.to_s
1159
+ return nil if normalize_reference_label(label).empty?
1160
+
1161
+ index = 0
1162
+ index += 1 while index < definition.length && definition[index, 1] =~ /[ \t\n]/
1163
+ return nil if index >= definition.length
1164
+
1165
+ if definition[index, 1] == '<'
1166
+ close = definition.index('>', index + 1)
1167
+ return nil unless close
1168
+ url = definition[(index + 1)...close]
1169
+ return nil if url.include?("\n")
1170
+ index = close + 1
1171
+ return nil if index < definition.length && definition[index, 1] !~ /[ \t\n]/
1172
+ else
1173
+ start = index
1174
+ while index < definition.length && definition[index, 1] !~ /[ \t\n]/
1175
+ index += 1
1176
+ end
1177
+ url = definition[start...index]
1178
+ end
1179
+
1180
+ return nil if url.nil? || url.include?('<') || url.include?('>')
1181
+
1182
+ index += 1 while index < definition.length && definition[index, 1] =~ /[ \t\n]/
1183
+ title = nil
1184
+
1185
+ if index < definition.length
1186
+ delimiter = definition[index, 1]
1187
+ close_delimiter = delimiter == '(' ? ')' : delimiter
1188
+ if delimiter == '"' || delimiter == "'" || delimiter == '('
1189
+ index += 1
1190
+ start = index
1191
+ buffer = String.new
1192
+ while index < definition.length
1193
+ char = definition[index, 1]
1194
+ if char == '\\' && index + 1 < definition.length
1195
+ buffer << definition[index, 2]
1196
+ index += 2
1197
+ next
1198
+ end
1199
+ break if char == close_delimiter
1200
+ buffer << char
1201
+ index += 1
1202
+ end
1203
+ return nil if index >= definition.length || definition[index, 1] != close_delimiter
1204
+ title = buffer
1205
+ index += 1
1206
+ index += 1 while index < definition.length && definition[index, 1] =~ /[ \t\n]/
1207
+ return nil unless index == definition.length
1208
+ else
1209
+ return nil
1210
+ end
1211
+ end
1212
+
1213
+ {
1214
+ :url => escape_url(unescape_markdown_punctuation(decode_entities(url))),
1215
+ :title => title && unescape_markdown_punctuation(decode_entities(title))
1216
+ }
1217
+ end
1218
+
1219
+ def replace_inline_constructs(text, placeholders, prefix)
1220
+ output = String.new
1221
+ index = 0
1222
+
1223
+ while index < text.length
1224
+ if prefix
1225
+ if text[index, 2] != '![' || (index > 0 && text[index - 1, 1] == '\\')
1226
+ output << text[index, 1]
1227
+ index += 1
1228
+ next
1229
+ end
1230
+ label_start = index + 2
1231
+ else
1232
+ if text[index, 1] != '[' || (index > 0 && text[index - 1, 1] == '\\')
1233
+ output << text[index, 1]
1234
+ index += 1
1235
+ next
1236
+ end
1237
+ label_start = index + 1
1238
+ end
1239
+
1240
+ label_end = find_closing_bracket(text, label_start - 1)
1241
+ unless label_end && text[label_end + 1, 1] == '('
1242
+ output << text[index, 1]
1243
+ index += 1
1244
+ next
1245
+ end
1246
+
1247
+ dest, title, consumed = parse_inline_destination(text, label_end + 2, placeholders)
1248
+ unless consumed
1249
+ output << text[index, 1]
1250
+ index += 1
1251
+ next
1252
+ end
1253
+
1254
+ label = text[label_start...label_end]
1255
+ if !prefix && contains_nested_link?(label, placeholders)
1256
+ output << text[index, 1]
1257
+ index += 1
1258
+ next
1259
+ end
1260
+ output << yield(label, dest, title)
1261
+ index = consumed
1262
+ end
1263
+
1264
+ output
1265
+ end
1266
+
1267
+ def scan_reference_constructs(text, placeholders, kind)
1268
+ output = String.new
1269
+ index = 0
1270
+
1271
+ while index < text.length
1272
+ image = kind == :image
1273
+ if image
1274
+ if text[index, 2] != '![' || (index > 0 && text[index - 1, 1] == '\\')
1275
+ output << text[index, 1]
1276
+ index += 1
1277
+ next
1278
+ end
1279
+ label_open = index + 1
1280
+ else
1281
+ if text[index, 1] != '[' || (index > 0 && text[index - 1, 1] == '\\')
1282
+ output << text[index, 1]
1283
+ index += 1
1284
+ next
1285
+ end
1286
+ label_open = index
1287
+ end
1288
+
1289
+ label_close = find_closing_bracket(text, label_open)
1290
+ unless label_close
1291
+ output << text[index, 1]
1292
+ index += 1
1293
+ next
1294
+ end
1295
+
1296
+ next_char = text[label_close + 1, 1]
1297
+ label = restore_placeholders(text[(label_open + 1)...label_close], placeholders)
1298
+ html = nil
1299
+ consumed = nil
1300
+
1301
+ if next_char == '['
1302
+ ref_close = find_closing_bracket(text, label_close + 1)
1303
+ if ref_close
1304
+ ref = restore_placeholders(text[(label_close + 2)...ref_close], placeholders)
1305
+ ref = label if ref.empty?
1306
+ if kind == :link && contains_nested_link?(label, placeholders)
1307
+ output << text[index]
1308
+ index += 1
1309
+ next
1310
+ end
1311
+ html = kind == :image ? reference_image_html(label, ref) : reference_link_html(label, ref)
1312
+ consumed = ref_close + 1 if html
1313
+ end
1314
+ else
1315
+ if kind == :link && contains_nested_link?(label, placeholders)
1316
+ output << text[index, 1]
1317
+ index += 1
1318
+ next
1319
+ end
1320
+ html = kind == :image ? reference_image_html(label, label) : reference_link_html(label, label)
1321
+ consumed = label_close + 1 if html
1322
+ end
1323
+
1324
+ if html
1325
+ output << store_placeholder(placeholders, html)
1326
+ index = consumed
1327
+ else
1328
+ output << text[index, 1]
1329
+ index += 1
1330
+ end
1331
+ end
1332
+
1333
+ output
1334
+ end
1335
+
1336
+ def find_closing_bracket(text, open_index)
1337
+ depth = 0
1338
+ index = open_index
1339
+ while index < text.length
1340
+ char = text[index, 1]
1341
+ if char == '['
1342
+ depth += 1
1343
+ elsif char == ']'
1344
+ depth -= 1
1345
+ return index if depth.zero?
1346
+ elsif char == '\\'
1347
+ index += 1
1348
+ end
1349
+ index += 1
1350
+ end
1351
+ nil
1352
+ end
1353
+
1354
+ def find_matching_backtick_run(text, index, length)
1355
+ while index < text.length
1356
+ if text[index, 1] == '`'
1357
+ run_length = 1
1358
+ run_length += 1 while index + run_length < text.length && text[index + run_length, 1] == '`'
1359
+ return index if run_length == length
1360
+
1361
+ index += run_length
1362
+ next
1363
+ end
1364
+ index += 1
1365
+ end
1366
+
1367
+ nil
1368
+ end
1369
+
1370
+ def parse_inline_destination(text, index, placeholders = nil)
1371
+ while index < text.length && text[index, 1] =~ /[ \t\n]/
1372
+ index += 1
1373
+ end
1374
+
1375
+ if text[index, 1] == '<'
1376
+ close = text.index('>', index + 1)
1377
+ return [nil, nil, nil] unless close
1378
+ dest = text[(index + 1)...close]
1379
+ return [nil, nil, nil] if dest.include?("\n") || dest.include?('\\')
1380
+ dest = dest.gsub(' ', '%20')
1381
+ index = close + 1
1382
+ else
1383
+ close = index
1384
+ parens = 0
1385
+ while close < text.length
1386
+ char = text[close, 1]
1387
+ if char == '\\' && close + 1 < text.length
1388
+ close += 2
1389
+ next
1390
+ end
1391
+ break if parens.zero? && (char == ')' || char =~ /\s/)
1392
+ parens += 1 if char == '('
1393
+ parens -= 1 if char == ')'
1394
+ close += 1
1395
+ end
1396
+ dest = text[index...close]
1397
+ index = close
1398
+ end
1399
+
1400
+ if placeholders
1401
+ restored_dest = restore_placeholders(dest.to_s, placeholders)
1402
+ if restored_dest.start_with?('<')
1403
+ return [nil, nil, nil] if restored_dest.include?("\n") || restored_dest.include?('\\')
1404
+ return [nil, nil, nil] unless restored_dest.end_with?('>') && restored_dest.index('>') == restored_dest.length - 1
1405
+
1406
+ dest = restored_dest[1...-1]
1407
+ end
1408
+ end
1409
+
1410
+ while index < text.length && text[index, 1] =~ /[ \t\n]/
1411
+ index += 1
1412
+ end
1413
+
1414
+ title = nil
1415
+ if text[index, 1] == '"' || text[index, 1] == "'"
1416
+ delimiter = text[index, 1]
1417
+ index += 1
1418
+ buffer = String.new
1419
+ while index < text.length
1420
+ char = text[index, 1]
1421
+ if char == '\\' && index + 1 < text.length
1422
+ buffer << text[index, 2]
1423
+ index += 2
1424
+ next
1425
+ end
1426
+ break if char == delimiter
1427
+ buffer << char
1428
+ index += 1
1429
+ end
1430
+ return [nil, nil, nil] unless index < text.length && text[index, 1] == delimiter
1431
+ title = buffer
1432
+ index += 1
1433
+ elsif text[index, 1] == '('
1434
+ index += 1
1435
+ buffer = String.new
1436
+ depth = 1
1437
+ while index < text.length
1438
+ char = text[index, 1]
1439
+ if char == '\\' && index + 1 < text.length
1440
+ buffer << text[index, 2]
1441
+ index += 2
1442
+ next
1443
+ end
1444
+ if char == '('
1445
+ depth += 1
1446
+ elsif char == ')'
1447
+ depth -= 1
1448
+ break if depth.zero?
1449
+ end
1450
+ buffer << char
1451
+ index += 1
1452
+ end
1453
+ return [nil, nil, nil] unless index < text.length && text[index, 1] == ')'
1454
+ title = buffer
1455
+ index += 1
1456
+ end
1457
+
1458
+ while index < text.length && text[index, 1] =~ /[ \t\n]/
1459
+ index += 1
1460
+ end
1461
+ return [nil, nil, nil] unless text[index, 1] == ')'
1462
+
1463
+ [dest.to_s, title, index + 1]
1464
+ end
1465
+
1466
+ def plain_text(text)
1467
+ text = text.to_s.gsub(/!\[([^\]]*)\]\([^)]+\)/, '\1')
1468
+ text = text.gsub(/\[([^\]]+)\]\([^)]+\)/, '\1')
1469
+ text = text.gsub(/[*_~`]/, '')
1470
+ decode_entities(unescape_markdown_punctuation(text))
1471
+ end
1472
+
1473
+ def parse_text_link_destination(text, index)
1474
+ return [nil, 0] unless text[index, 1] == '['
1475
+
1476
+ dest = String.new
1477
+ cursor = index + 1
1478
+
1479
+ while cursor < text.length
1480
+ char = text[cursor, 1]
1481
+
1482
+ if char == '\\'
1483
+ escaped = text[cursor + 1, 1]
1484
+ return [nil, 0] unless escaped && "[]\\*+<_".include?(escaped)
1485
+
1486
+ dest << escaped
1487
+ cursor += 2
1488
+ next
1489
+ end
1490
+
1491
+ return [nil, 0] if char =~ /\s/
1492
+ return [dest, cursor - index + 1] if char == ']'
1493
+ return [nil, 0] if char == '['
1494
+
1495
+ dest << char
1496
+ cursor += 1
1497
+ end
1498
+
1499
+ [nil, 0]
1500
+ end
1501
+
1502
+ def find_braced_text_link_label_end(text, index)
1503
+ cursor = index + 1
1504
+
1505
+ while cursor < text.length
1506
+ char = text[cursor, 1]
1507
+
1508
+ if char == '\\'
1509
+ cursor += 2
1510
+ next
1511
+ end
1512
+
1513
+ return cursor if char == '}'
1514
+ cursor += 1
1515
+ end
1516
+
1517
+ nil
1518
+ end
1519
+
1520
+ def link_html(label, dest, title = nil)
1521
+ href = escape_url(unescape_markdown_punctuation(decode_entities(dest.to_s)))
1522
+ normalized_title = title && unescape_markdown_punctuation(decode_entities(title))
1523
+ attrs = %( href="#{h(href)}")
1524
+ attrs += %( title="#{h(normalized_title)}") if normalized_title
1525
+ %(<a#{attrs}>#{format_inline(label)}</a>)
1526
+ end
1527
+
1528
+ def image_html(label, dest, title = nil)
1529
+ src = escape_url(unescape_markdown_punctuation(decode_entities(dest.to_s)))
1530
+ normalized_title = title && unescape_markdown_punctuation(decode_entities(title))
1531
+ attrs = %( src="#{h(src)}" alt="#{h(plain_text(label))}")
1532
+ attrs += %( title="#{h(normalized_title)}") if normalized_title
1533
+ "<img#{attrs} />"
1534
+ end
1535
+
1536
+ def decode_entities(text)
1537
+ text.gsub(ENTITY_RE) do |entity|
1538
+ decode_entity(entity)
1539
+ end
1540
+ end
1541
+
1542
+ def reference_definition_continuation?(line)
1543
+ return true if line =~ /^(?: {1,3}|\t)(.*)$/
1544
+ return true if line =~ /\A<(?:[^>\n]*)>\s*\z/
1545
+ return true if line =~ /\A(?:"[^"]*"|'[^']*'|\([^)]*\))\s*\z/
1546
+
1547
+ false
1548
+ end
1549
+
1550
+ def normalize_code_span(code)
1551
+ code = code.gsub(/\n/, ' ')
1552
+ if code.length > 1 && code.start_with?(' ') && code.end_with?(' ') && code.strip != ''
1553
+ code[1...-1]
1554
+ else
1555
+ code
1556
+ end
1557
+ end
1558
+
1559
+ def available_delimiter_length(token)
1560
+ token[:length] - token[:left_consumed] - token[:right_consumed]
1561
+ end
1562
+
1563
+ def odd_match_disallowed?(opener, closer)
1564
+ return false unless opener[:can_close] || closer[:can_open]
1565
+
1566
+ opener_len = available_delimiter_length(opener)
1567
+ closer_len = available_delimiter_length(closer)
1568
+ ((opener_len + closer_len) % 3).zero? &&
1569
+ (opener_len % 3 != 0 || closer_len % 3 != 0)
1570
+ end
1571
+
1572
+ def delimiter_flags(text, run_start, run_end, char)
1573
+ before = run_start.zero? ? nil : text[run_start - 1, 1]
1574
+ after = run_end >= text.length ? nil : text[run_end, 1]
1575
+ before_whitespace = whitespace_char?(before)
1576
+ after_whitespace = whitespace_char?(after)
1577
+ before_punctuation = punctuation_char?(before)
1578
+ after_punctuation = punctuation_char?(after)
1579
+
1580
+ left_flanking = !after_whitespace && (!after_punctuation || before_whitespace || before_punctuation)
1581
+ right_flanking = !before_whitespace && (!before_punctuation || after_whitespace || after_punctuation)
1582
+
1583
+ if char == '_'
1584
+ [
1585
+ left_flanking && (!right_flanking || before_punctuation),
1586
+ right_flanking && (!left_flanking || after_punctuation)
1587
+ ]
1588
+ else
1589
+ [left_flanking, right_flanking]
1590
+ end
1591
+ end
1592
+
1593
+ def whitespace_char?(char)
1594
+ char.nil? || char =~ /\s/ || char == NAMED_ENTITIES['nbsp']
1595
+ end
1596
+
1597
+ def punctuation_char?(char)
1598
+ return false if char.nil?
1599
+
1600
+ ascii_punctuation_char?(char) || unicode_symbol_char?(char)
1601
+ end
1602
+
1603
+ def unicode_symbol_char?(char)
1604
+ codepoint = char.to_s.unpack('U*').first
1605
+ return false unless codepoint
1606
+
1607
+ (0x00A2..0x00A9).include?(codepoint) ||
1608
+ (0x00AC..0x00AE).include?(codepoint) ||
1609
+ (0x00B0..0x00B4).include?(codepoint) ||
1610
+ codepoint == 0x00B6 ||
1611
+ codepoint == 0x00B7 ||
1612
+ codepoint == 0x00D7 ||
1613
+ codepoint == 0x00F7 ||
1614
+ (0x20A0..0x20CF).include?(codepoint)
1615
+ end
1616
+
1617
+ def ascii_punctuation_char?(char)
1618
+ return false unless ascii_only_compat?(char)
1619
+
1620
+ byte = char.to_s.unpack('C').first
1621
+ return false unless byte
1622
+
1623
+ (0x21..0x2F).include?(byte) ||
1624
+ (0x3A..0x40).include?(byte) ||
1625
+ (0x5B..0x60).include?(byte) ||
1626
+ (0x7B..0x7E).include?(byte)
1627
+ end
1628
+
1629
+ def leading_columns(line)
1630
+ scan_leading_columns(line.to_s).first
1631
+ end
1632
+
1633
+ def indented_to?(line, indent)
1634
+ leading_columns(line) >= indent
1635
+ end
1636
+
1637
+ def strip_list_item_indent(line, content_indent)
1638
+ consume_columns(line, content_indent, 0, true)
1639
+ end
1640
+
1641
+ def escape_list_marker_text(line)
1642
+ source = line.to_s.sub(/\n\z/, '')
1643
+ newline = source.length == line.to_s.length ? '' : "\n"
1644
+
1645
+ if source =~ /\A([*+-])([ \t].*)\z/
1646
+ "\\#{$1}#{$2}#{newline}"
1647
+ elsif source =~ /\A(\d{1,9}[.)])([ \t].*)\z/
1648
+ "\\#{$1}#{$2}#{newline}"
1649
+ elsif source =~ /\A([A-Za-z]\.)([ \t].*)\z/
1650
+ "\\#{$1}#{$2}#{newline}"
1651
+ else
1652
+ source + newline
1653
+ end
1654
+ end
1655
+
1656
+ def escape_url(url)
1657
+ percent_encode_url(url.to_s, /[A-Za-z0-9\-._~:\/?#\[\]@!$&'()*+,;=%]/)
1658
+ end
1659
+
1660
+ def escape_autolink_url(url)
1661
+ percent_encode_url(url.to_s, /[A-Za-z0-9\-._~:\/?#@!$&'()*+,;=%]/)
1662
+ end
1663
+
1664
+ def parse_reference_definition_block(lines, index, previous_line)
1665
+ line = lines[index]
1666
+ return nil unless reference_definition_context?(previous_line)
1667
+
1668
+ prefix, content = split_reference_container_prefix(line)
1669
+ return nil unless content =~ /^\s{0,3}\[/
1670
+
1671
+ label_buffer = content.sub(/^\s{0,3}/, '')
1672
+ consumed_lines = [line]
1673
+ label_end = find_reference_label_end(label_buffer)
1674
+ current_index = index
1675
+
1676
+ while label_end.nil?
1677
+ current_index += 1
1678
+ return nil if current_index >= lines.length
1679
+
1680
+ next_prefix, next_content = split_reference_container_prefix(lines[current_index])
1681
+ return nil unless next_prefix == prefix
1682
+
1683
+ label_buffer << next_content
1684
+ consumed_lines << lines[current_index]
1685
+ label_end = find_reference_label_end(label_buffer)
1686
+ end
1687
+
1688
+ label = label_buffer[1...label_end]
1689
+ remainder = label_buffer[(label_end + 2)..-1].to_s
1690
+ current_index += 1
1691
+
1692
+ while remainder.strip.empty? && current_index < lines.length
1693
+ next_prefix, next_content = split_reference_container_prefix(lines[current_index])
1694
+ break unless next_prefix == prefix
1695
+ break if blank_line?(next_content)
1696
+
1697
+ remainder << (remainder.empty? ? next_content : "\n#{next_content}")
1698
+ consumed_lines << lines[current_index]
1699
+ current_index += 1
1700
+ end
1701
+
1702
+ while unclosed_reference_title?(remainder) && current_index < lines.length
1703
+ next_prefix, next_content = split_reference_container_prefix(lines[current_index])
1704
+ break unless next_prefix == prefix
1705
+ break if blank_line?(next_content)
1706
+
1707
+ remainder << "\n#{next_content}"
1708
+ consumed_lines << lines[current_index]
1709
+ current_index += 1
1710
+ end
1711
+
1712
+ while current_index < lines.length
1713
+ next_prefix, next_content = split_reference_container_prefix(lines[current_index])
1714
+ break unless next_prefix == prefix
1715
+ break unless reference_definition_continuation?(next_content)
1716
+
1717
+ remainder << "\n#{next_content.strip}"
1718
+ consumed_lines << lines[current_index]
1719
+ current_index += 1
1720
+ end
1721
+
1722
+ reference = parse_reference_definition(label, remainder)
1723
+ return nil unless reference
1724
+
1725
+ {
1726
+ :label => label,
1727
+ :reference => reference,
1728
+ :replacement_lines => consumed_lines.map { |consumed| reference_definition_replacement_line(consumed, prefix) },
1729
+ :next_index => current_index
1730
+ }
1731
+ end
1732
+
1733
+ def reference_definition_context?(previous_line)
1734
+ return true if previous_line.nil?
1735
+ return true if blank_line?(previous_line)
1736
+
1737
+ stripped = split_reference_container_prefix(previous_line).last
1738
+ block_boundary?(stripped)
1739
+ end
1740
+
1741
+ def split_reference_container_prefix(line)
1742
+ prefix = String.new
1743
+ content = line.chomp
1744
+
1745
+ while (split = split_blockquote_prefix(content))
1746
+ prefix << split[0]
1747
+ content = split[1].chomp
1748
+ end
1749
+
1750
+ [prefix, content]
1751
+ end
1752
+
1753
+ def reference_definition_replacement_line(line, prefix)
1754
+ return '' if prefix.empty?
1755
+
1756
+ prefix.rstrip + "\n"
1757
+ end
1758
+
1759
+ def find_reference_label_end(text)
1760
+ return nil unless text.start_with?('[')
1761
+
1762
+ index = 1
1763
+ while index < text.length
1764
+ char = text[index, 1]
1765
+ if char == '\\'
1766
+ index += 2
1767
+ next
1768
+ end
1769
+ return nil if char == '['
1770
+ return index if char == ']' && text[index + 1, 1] == ':'
1771
+
1772
+ index += 1
1773
+ end
1774
+
1775
+ nil
1776
+ end
1777
+
1778
+ def contains_nested_link?(label, placeholders)
1779
+ text = restore_placeholders(label.to_s, placeholders)
1780
+ return true if text.include?('<a ')
1781
+
1782
+ index = 0
1783
+
1784
+ while index < text.length
1785
+ if text[index, 2] == '![' && (index.zero? || text[index - 1, 1] != '\\')
1786
+ label_open = index + 1
1787
+ elsif text[index, 1] == '[' && (index.zero? || text[index - 1, 1] != '\\')
1788
+ label_open = index
1789
+ else
1790
+ index += 1
1791
+ next
1792
+ end
1793
+
1794
+ label_close = find_closing_bracket(text, label_open)
1795
+ if label_close
1796
+ next_char = text[label_close + 1, 1]
1797
+ return true if next_char == '(' || next_char == '['
1798
+ end
1799
+
1800
+ index += 1
1801
+ end
1802
+
1803
+ false
1804
+ end
1805
+
1806
+ def unclosed_reference_title?(text)
1807
+ stripped = text.to_s.rstrip
1808
+ return false if stripped.empty?
1809
+
1810
+ single_quotes = stripped.count("'")
1811
+ double_quotes = stripped.count('"')
1812
+ open_parens = stripped.count('(')
1813
+ close_parens = stripped.count(')')
1814
+
1815
+ single_quotes.odd? || double_quotes.odd? || open_parens > close_parens
1816
+ end
1817
+
1818
+ def percent_encode_url(text, allowed_re)
1819
+ encoded = String.new
1820
+
1821
+ each_char_compat(text.to_s) do |char|
1822
+ if ascii_only_compat?(char) && char =~ /\A#{allowed_re.source}\z/
1823
+ encoded << char
1824
+ else
1825
+ utf8_bytes(char).each do |byte|
1826
+ encoded << sprintf('%%%02X', byte)
1827
+ end
1828
+ end
1829
+ end
1830
+
1831
+ encoded
1832
+ end
1833
+
1834
+ def html_block_type(line, interrupt_paragraph = false)
1835
+ stripped = line.chomp
1836
+ return nil unless stripped =~ /^\s{0,3}</ || stripped =~ /^\s{0,3}<(?!!--)/
1837
+
1838
+ return 1 if stripped =~ /^\s{0,3}<(?:script|pre|style|textarea)(?:\s|>|$)/i
1839
+ return 2 if stripped =~ /^\s{0,3}<!--/
1840
+ return 3 if stripped =~ /^\s{0,3}<\?/
1841
+ return 4 if stripped =~ /^\s{0,3}<![A-Z]/
1842
+ return 5 if stripped =~ /^\s{0,3}<!\[CDATA\[/
1843
+ return 6 if stripped =~ /^\s{0,3}<\/?(?:#{HTML_BLOCK_TAGS.join('|')})(?:\s|\/?>|$)/i
1844
+ return nil if interrupt_paragraph
1845
+
1846
+ return 7 if stripped =~ /^\s{0,3}(?:<[A-Za-z][A-Za-z0-9-]*(?:\s+[A-Za-z_:][\w:.-]*(?:\s*=\s*(?:"[^"]*"|'[^']*'|[^\s"'=<>`]+))?)*\s*\/?>|<\/[A-Za-z][A-Za-z0-9-]*\s*>)\s*$/
1847
+
1848
+ nil
1849
+ end
1850
+
1851
+ def html_block_end?(type, line)
1852
+ case type
1853
+ when 1
1854
+ line =~ %r{</(?:script|pre|style|textarea)\s*>}i
1855
+ when 2
1856
+ line.include?('-->')
1857
+ when 3
1858
+ line.include?('?>')
1859
+ when 4
1860
+ line.include?('>')
1861
+ when 5
1862
+ line.include?(']]>')
1863
+ when 6, 7
1864
+ blank_line?(line)
1865
+ else
1866
+ false
1867
+ end
1868
+ end
1869
+
1870
+ def decode_entity(entity)
1871
+ case entity
1872
+ when /\A&#(\d+);\z/
1873
+ codepoint = $1.to_i
1874
+ when /\A&#[xX]([0-9A-Fa-f]+);\z/
1875
+ codepoint = $1.to_i(16)
1876
+ else
1877
+ name = entity[1..-2]
1878
+ return [0x00E4].pack('U') if name == 'auml'
1879
+ return NAMED_ENTITIES[name] || CGI.unescapeHTML(entity)
1880
+ end
1881
+
1882
+ return [0xFFFD].pack('U') if codepoint.zero?
1883
+ return entity if codepoint > 0x10FFFF
1884
+ [codepoint].pack('U')
1885
+ rescue RangeError
1886
+ entity
1887
+ end
1888
+
1889
+ def h(text)
1890
+ text.to_s.gsub('&', '&amp;').gsub('<', '&lt;').gsub('>', '&gt;').gsub('"', '&quot;')
1891
+ end
1892
+
1893
+ def unescape_markdown_punctuation(text)
1894
+ text.to_s.gsub(ESCAPABLE_CHARS_RE, '\1')
1895
+ end
1896
+
1897
+ def split_lines(text)
1898
+ text.to_s.split(/^/, -1)
1899
+ end
1900
+
1901
+ def scan_leading_columns(text)
1902
+ index = 0
1903
+ column = 0
1904
+ source = text.to_s
1905
+
1906
+ while index < source.length
1907
+ char = source[index, 1]
1908
+ if char == ' '
1909
+ column += 1
1910
+ elsif char == "\t"
1911
+ column += TAB_WIDTH - (column % TAB_WIDTH)
1912
+ else
1913
+ break
1914
+ end
1915
+ index += 1
1916
+ end
1917
+
1918
+ [column, index]
1919
+ end
1920
+
1921
+ def scan_padding_columns(text, index, start_column)
1922
+ column = start_column
1923
+ padding = 0
1924
+ source = text.to_s
1925
+
1926
+ while index < source.length
1927
+ char = source[index, 1]
1928
+ if char == ' '
1929
+ column += 1
1930
+ padding += 1
1931
+ elsif char == "\t"
1932
+ advance = TAB_WIDTH - (column % TAB_WIDTH)
1933
+ column += advance
1934
+ padding += advance
1935
+ else
1936
+ break
1937
+ end
1938
+ index += 1
1939
+ end
1940
+
1941
+ [padding, index]
1942
+ end
1943
+
1944
+ def consume_columns(text, columns, start_column = 0, normalize_remaining = false)
1945
+ index = 0
1946
+ column = start_column
1947
+ remaining = columns
1948
+ prefix_width = 0
1949
+ source = text.to_s
1950
+
1951
+ while index < source.length && remaining > 0
1952
+ char = source[index, 1]
1953
+ if char == ' '
1954
+ column += 1
1955
+ remaining -= 1
1956
+ index += 1
1957
+ elsif char == "\t"
1958
+ advance = TAB_WIDTH - (column % TAB_WIDTH)
1959
+ if advance <= remaining
1960
+ column += advance
1961
+ remaining -= advance
1962
+ index += 1
1963
+ else
1964
+ prefix_width += advance - remaining if normalize_remaining
1965
+ column += advance
1966
+ remaining = 0
1967
+ index += 1
1968
+ end
1969
+ else
1970
+ break
1971
+ end
1972
+ end
1973
+
1974
+ if normalize_remaining
1975
+ while index < source.length
1976
+ char = source[index, 1]
1977
+ if char == ' '
1978
+ prefix_width += 1
1979
+ column += 1
1980
+ index += 1
1981
+ elsif char == "\t"
1982
+ advance = TAB_WIDTH - (column % TAB_WIDTH)
1983
+ prefix_width += advance
1984
+ column += advance
1985
+ index += 1
1986
+ else
1987
+ break
1988
+ end
1989
+ end
1990
+
1991
+ (' ' * prefix_width) + source[index..-1].to_s
1992
+ else
1993
+ source[index..-1].to_s
1994
+ end
1995
+ end
1996
+
1997
+ def lazy_blockquote_continuation?(quoted_lines, line)
1998
+ return false if block_boundary?(line)
1999
+ return false if indented_code_start?(line) && !blockquote_paragraph_context?(quoted_lines)
2000
+
2001
+ last_content = quoted_lines.reverse.find { |quoted| !blank_line?(quoted) }
2002
+ return false if last_content && fenced_code_start?(last_content)
2003
+ return false if last_content && indented_code_start?(last_content)
2004
+
2005
+ true
2006
+ end
2007
+
2008
+ def blockquote_open_fence?(quoted_lines)
2009
+ opener = nil
2010
+
2011
+ quoted_lines.each do |quoted|
2012
+ next if blank_line?(quoted)
2013
+
2014
+ if opener
2015
+ opener = nil if fence_closer?(quoted, opener[:char], opener[:length])
2016
+ else
2017
+ opener = parse_fence_opener(quoted)
2018
+ end
2019
+ end
2020
+
2021
+ !opener.nil?
2022
+ end
2023
+
2024
+ def blockquote_paragraph_context?(quoted_lines)
2025
+ last_content = quoted_lines.reverse.find { |quoted| !blank_line?(quoted) }
2026
+ return false unless last_content
2027
+ return false if fenced_code_start?(last_content)
2028
+ return false if parse_heading(last_content)
2029
+ return false if thematic_break?(last_content)
2030
+
2031
+ true
2032
+ end
2033
+
2034
+ def normalize_paragraph_line(line)
2035
+ line.to_s.chomp.sub(/^\s+/, '')
2036
+ end
2037
+
2038
+ def normalize_heading_line(line)
2039
+ normalize_paragraph_line(line).rstrip
2040
+ end
2041
+
2042
+ def split_blockquote_prefix(line)
2043
+ source = line.to_s
2044
+ indent, index = scan_leading_columns(source)
2045
+ return nil if indent > 3
2046
+ return nil unless source[index, 1] == '>'
2047
+
2048
+ prefix = source[0..index]
2049
+ rest = source[(index + 1)..-1].to_s
2050
+ if rest.start_with?(' ') || rest.start_with?("\t")
2051
+ prefix << rest[0, 1]
2052
+ rest = consume_columns(rest, 1, indent + 1, true)
2053
+ end
2054
+
2055
+ [prefix, rest.end_with?("\n") ? rest : "#{rest}\n"]
2056
+ end
2057
+
2058
+ def strip_blockquote_marker(line)
2059
+ split = split_blockquote_prefix(line)
2060
+ split && split[1]
2061
+ end
2062
+
2063
+ def loose_list_item_continuation?(item_lines)
2064
+ return false if open_fence_in_lines?(item_lines)
2065
+
2066
+ previous = item_lines.reverse.find { |item_line| item_line != "\n" }
2067
+ return true unless previous
2068
+
2069
+ !parse_list_marker(previous.chomp)
2070
+ end
2071
+
2072
+ def open_fence_in_lines?(lines)
2073
+ opener = nil
2074
+
2075
+ lines.each do |line|
2076
+ next if blank_line?(line)
2077
+
2078
+ if opener
2079
+ opener = nil if fence_closer?(line, opener[:char], opener[:length])
2080
+ else
2081
+ opener = parse_fence_opener(line)
2082
+ end
2083
+ end
2084
+
2085
+ !opener.nil?
2086
+ end
2087
+
2088
+ def each_char_compat(text)
2089
+ if text.respond_to?(:each_char)
2090
+ text.each_char { |char| yield char }
2091
+ else
2092
+ text.scan(/./m) { |char| yield char }
2093
+ end
2094
+ end
2095
+
2096
+ def ascii_only_compat?(text)
2097
+ if text.respond_to?(:ascii_only?)
2098
+ text.ascii_only?
2099
+ else
2100
+ text.to_s.unpack('C*').all? { |byte| byte < 128 }
2101
+ end
2102
+ end
2103
+
2104
+ def utf8_bytes(char)
2105
+ if defined?(Encoding)
2106
+ char.encode(Encoding::UTF_8).unpack('C*')
2107
+ else
2108
+ [char[0]].pack('U').unpack('C*')
2109
+ end
2110
+ end
2111
+
2112
+ def unicode_casefold_compat(text)
2113
+ codepoints = text.to_s.unpack('U*')
2114
+ folded = String.new
2115
+
2116
+ codepoints.each do |codepoint|
2117
+ append_folded_codepoint(folded, codepoint)
2118
+ end
2119
+
2120
+ folded
2121
+ end
2122
+
2123
+ def append_folded_codepoint(buffer, codepoint)
2124
+ case codepoint
2125
+ when 0x41..0x5A
2126
+ buffer << [codepoint + 32].pack('U')
2127
+ when 0x0391..0x03A1
2128
+ buffer << [codepoint + 32].pack('U')
2129
+ when 0x03A3..0x03AB
2130
+ buffer << [codepoint + 32].pack('U')
2131
+ when 0x03C2
2132
+ buffer << [0x03C3].pack('U')
2133
+ when 0x00DF, 0x1E9E
2134
+ buffer << 'ss'
2135
+ else
2136
+ begin
2137
+ buffer << [codepoint].pack('U').downcase
2138
+ rescue StandardError
2139
+ buffer << [codepoint].pack('U')
2140
+ end
2141
+ end
2142
+ end
2143
+ end
2144
+ end
2145
+ end
2146
+ end
2147
+ end