commonmarker 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of commonmarker might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/ext/commonmarker/cmark/CMakeLists.txt +10 -4
- data/ext/commonmarker/cmark/Makefile +5 -5
- data/ext/commonmarker/cmark/api_test/CMakeLists.txt +1 -1
- data/ext/commonmarker/cmark/api_test/main.c +16 -0
- data/ext/commonmarker/cmark/build/CMakeCache.txt +3 -4
- data/ext/commonmarker/cmark/build/CMakeFiles/2.8.10.1/CMakeSystem.cmake +4 -4
- data/ext/commonmarker/cmark/build/CMakeFiles/CMakeError.log +12 -12
- data/ext/commonmarker/cmark/build/CMakeFiles/CMakeOutput.log +97 -142
- data/ext/commonmarker/cmark/build/CMakeFiles/Makefile.cmake +0 -1
- data/ext/commonmarker/cmark/build/api_test/CMakeFiles/api_test.dir/build.make +1 -1
- data/ext/commonmarker/cmark/build/api_test/CMakeFiles/api_test.dir/link.txt +1 -1
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark.dir/DependInfo.cmake +1 -1
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark.dir/build.make +23 -23
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark.dir/cmake_clean.cmake +2 -2
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark.dir/link.txt +1 -1
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/blocks.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/buffer.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/cmark.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/commonmark.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/houdini_html_u.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/html.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/inlines.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/node.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/references.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/render.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/scanners.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/utf8.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/xml.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/cmake_install.cmake +3 -3
- data/ext/commonmarker/cmark/build/src/cmark_version.h +2 -2
- data/ext/commonmarker/cmark/build/src/config.h +6 -6
- data/ext/commonmarker/cmark/build/src/libcmark.a +0 -0
- data/ext/commonmarker/cmark/build/src/libcmark.pc +1 -1
- data/ext/commonmarker/cmark/build/testdir/CTestTestfile.cmake +4 -4
- data/ext/commonmarker/cmark/changelog.txt +46 -0
- data/ext/commonmarker/cmark/man/man3/cmark.3 +21 -20
- data/ext/commonmarker/cmark/src/CMakeLists.txt +4 -6
- data/ext/commonmarker/cmark/src/bench.h +8 -8
- data/ext/commonmarker/cmark/src/blocks.c +917 -947
- data/ext/commonmarker/cmark/src/buffer.c +213 -288
- data/ext/commonmarker/cmark/src/buffer.h +19 -21
- data/ext/commonmarker/cmark/src/chunk.h +78 -82
- data/ext/commonmarker/cmark/src/cmark.c +9 -17
- data/ext/commonmarker/cmark/src/cmark.h +113 -157
- data/ext/commonmarker/cmark/src/cmark_ctype.c +24 -35
- data/ext/commonmarker/cmark/src/commonmark.c +390 -425
- data/ext/commonmarker/cmark/src/config.h.in +6 -6
- data/ext/commonmarker/cmark/src/houdini.h +21 -15
- data/ext/commonmarker/cmark/src/houdini_href_e.c +50 -57
- data/ext/commonmarker/cmark/src/houdini_html_e.c +36 -51
- data/ext/commonmarker/cmark/src/houdini_html_u.c +119 -124
- data/ext/commonmarker/cmark/src/html.c +289 -307
- data/ext/commonmarker/cmark/src/inlines.c +976 -1030
- data/ext/commonmarker/cmark/src/inlines.h +4 -2
- data/ext/commonmarker/cmark/src/iterator.c +96 -126
- data/ext/commonmarker/cmark/src/iterator.h +5 -5
- data/ext/commonmarker/cmark/src/latex.c +379 -401
- data/ext/commonmarker/cmark/src/main.c +168 -175
- data/ext/commonmarker/cmark/src/man.c +212 -226
- data/ext/commonmarker/cmark/src/node.c +746 -839
- data/ext/commonmarker/cmark/src/node.h +47 -48
- data/ext/commonmarker/cmark/src/parser.h +14 -14
- data/ext/commonmarker/cmark/src/references.c +101 -111
- data/ext/commonmarker/cmark/src/references.h +10 -8
- data/ext/commonmarker/cmark/src/render.c +144 -167
- data/ext/commonmarker/cmark/src/render.h +22 -41
- data/ext/commonmarker/cmark/src/scanners.c +27695 -20903
- data/ext/commonmarker/cmark/src/scanners.h +2 -1
- data/ext/commonmarker/cmark/src/scanners.re +1 -1
- data/ext/commonmarker/cmark/src/utf8.c +276 -419
- data/ext/commonmarker/cmark/src/utf8.h +6 -6
- data/ext/commonmarker/cmark/src/xml.c +129 -144
- data/ext/commonmarker/cmark/test/CMakeLists.txt +4 -4
- data/ext/commonmarker/cmark/test/smart_punct.txt +8 -0
- data/ext/commonmarker/cmark/test/spec.txt +109 -47
- data/lib/commonmarker/version.rb +1 -1
- metadata +2 -2
@@ -1,8 +1,8 @@
|
|
1
1
|
---
|
2
2
|
title: CommonMark Spec
|
3
3
|
author: John MacFarlane
|
4
|
-
version: 0.
|
5
|
-
date:
|
4
|
+
version: 0.22
|
5
|
+
date: 2015-08-23
|
6
6
|
license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)'
|
7
7
|
...
|
8
8
|
|
@@ -204,16 +204,22 @@ In the examples, the `→` character is used to represent tabs.
|
|
204
204
|
Any sequence of [character]s is a valid CommonMark
|
205
205
|
document.
|
206
206
|
|
207
|
-
A [character](@character) is a
|
207
|
+
A [character](@character) is a Unicode code point. Although some
|
208
|
+
code points (for example, combining accents) do not correspond to
|
209
|
+
characters in an intuitive sense, all code points count as characters
|
210
|
+
for purposes of this spec.
|
211
|
+
|
208
212
|
This spec does not specify an encoding; it thinks of lines as composed
|
209
|
-
of
|
213
|
+
of [character]s rather than bytes. A conforming parser may be limited
|
210
214
|
to a certain encoding.
|
211
215
|
|
212
216
|
A [line](@line) is a sequence of zero or more [character]s
|
217
|
+
other than newline (`U+000A`) or carriage return (`U+000D`),
|
213
218
|
followed by a [line ending] or by the end of file.
|
214
219
|
|
215
|
-
A [line ending](@line-ending) is a newline (`U+000A`), carriage return
|
216
|
-
(`U+000D`), or carriage return
|
220
|
+
A [line ending](@line-ending) is a newline (`U+000A`), a carriage return
|
221
|
+
(`U+000D`) not followed by a newline, or a carriage return and a
|
222
|
+
following newline.
|
217
223
|
|
218
224
|
A line containing no characters, or a line containing only spaces
|
219
225
|
(`U+0020`) or tabs (`U+0009`), is called a [blank line](@blank-line).
|
@@ -227,17 +233,17 @@ form feed (`U+000C`), or carriage return (`U+000D`).
|
|
227
233
|
[Whitespace](@whitespace) is a sequence of one or more [whitespace
|
228
234
|
character]s.
|
229
235
|
|
230
|
-
A [
|
231
|
-
any code point in the
|
236
|
+
A [Unicode whitespace character](@unicode-whitespace-character) is
|
237
|
+
any code point in the Unicode `Zs` class, or a tab (`U+0009`),
|
232
238
|
carriage return (`U+000D`), newline (`U+000A`), or form feed
|
233
239
|
(`U+000C`).
|
234
240
|
|
235
241
|
[Unicode whitespace](@unicode-whitespace) is a sequence of one
|
236
|
-
or more [
|
242
|
+
or more [Unicode whitespace character]s.
|
237
243
|
|
238
244
|
A [space](@space) is `U+0020`.
|
239
245
|
|
240
|
-
A [non-whitespace character](@non-
|
246
|
+
A [non-whitespace character](@non-whitespace-character) is any character
|
241
247
|
that is not a [whitespace character].
|
242
248
|
|
243
249
|
An [ASCII punctuation character](@ascii-punctuation-character)
|
@@ -247,7 +253,7 @@ is `!`, `"`, `#`, `$`, `%`, `&`, `'`, `(`, `)`,
|
|
247
253
|
|
248
254
|
A [punctuation character](@punctuation-character) is an [ASCII
|
249
255
|
punctuation character] or anything in
|
250
|
-
the
|
256
|
+
the Unicode classes `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or `Ps`.
|
251
257
|
|
252
258
|
## Tabs
|
253
259
|
|
@@ -300,6 +306,15 @@ by spaces with a tab stop of 4 characters.
|
|
300
306
|
</blockquote>
|
301
307
|
.
|
302
308
|
|
309
|
+
.
|
310
|
+
foo
|
311
|
+
→bar
|
312
|
+
.
|
313
|
+
<pre><code>foo
|
314
|
+
bar
|
315
|
+
</code></pre>
|
316
|
+
.
|
317
|
+
|
303
318
|
|
304
319
|
## Insecure characters
|
305
320
|
|
@@ -562,8 +577,8 @@ If you want a horizontal rule in a list item, use a different bullet:
|
|
562
577
|
An [ATX header](@atx-header)
|
563
578
|
consists of a string of characters, parsed as inline content, between an
|
564
579
|
opening sequence of 1--6 unescaped `#` characters and an optional
|
565
|
-
closing sequence of any number of `#` characters.
|
566
|
-
of `#` characters cannot be followed directly by a
|
580
|
+
closing sequence of any number of unescaped `#` characters.
|
581
|
+
The opening sequence of `#` characters cannot be followed directly by a
|
567
582
|
[non-whitespace character]. The optional closing sequence of `#`s must be
|
568
583
|
preceded by a [space] and may be followed by spaces only. The opening
|
569
584
|
`#` character may be indented 0-3 spaces. The raw contents of the
|
@@ -695,8 +710,7 @@ Spaces are allowed after the closing sequence:
|
|
695
710
|
<h3>foo</h3>
|
696
711
|
.
|
697
712
|
|
698
|
-
A sequence of `#` characters with
|
699
|
-
[non-whitespace character] following it
|
713
|
+
A sequence of `#` characters with anything but [space]s following it
|
700
714
|
is not a closing sequence, but counts as part of the contents of the
|
701
715
|
header:
|
702
716
|
|
@@ -1646,22 +1660,23 @@ followed by one of the strings (case-insensitive) `address`,
|
|
1646
1660
|
`caption`, `center`, `col`, `colgroup`, `dd`, `details`, `dialog`,
|
1647
1661
|
`dir`, `div`, `dl`, `dt`, `fieldset`, `figcaption`, `figure`,
|
1648
1662
|
`footer`, `form`, `frame`, `frameset`, `h1`, `head`, `header`, `hr`,
|
1649
|
-
`html`, `legend`, `li`, `link`, `main`, `menu`, `menuitem`,
|
1650
|
-
`nav`, `noframes`, `ol`, `optgroup`, `option`, `p`, `param`,
|
1651
|
-
`section`, `source`, `
|
1663
|
+
`html`, `iframe`, `legend`, `li`, `link`, `main`, `menu`, `menuitem`,
|
1664
|
+
`meta`, `nav`, `noframes`, `ol`, `optgroup`, `option`, `p`, `param`,
|
1665
|
+
`section`, `source`, `summary`, `table`, `tbody`, `td`,
|
1652
1666
|
`tfoot`, `th`, `thead`, `title`, `tr`, `track`, `ul`, followed
|
1653
1667
|
by [whitespace], the end of the line, the string `>`, or
|
1654
1668
|
the string `/>`.\
|
1655
1669
|
**End condition:** line is followed by a [blank line].
|
1656
1670
|
|
1657
|
-
7. **Start condition:** line begins with
|
1658
|
-
(with any [tag name]
|
1659
|
-
|
1671
|
+
7. **Start condition:** line begins with a complete [open tag]
|
1672
|
+
or [closing tag] (with any [tag name] other than `script`,
|
1673
|
+
`style`, or `pre`) followed only by [whitespace]
|
1674
|
+
or the end of the line.\
|
1660
1675
|
**End condition:** line is followed by a [blank line].
|
1661
1676
|
|
1662
1677
|
All types of [HTML blocks] except type 7 may interrupt
|
1663
1678
|
a paragraph. Blocks of type 7 may not interrupt a paragraph.
|
1664
|
-
(This
|
1679
|
+
(This restriction is intended to prevent unwanted interpretation
|
1665
1680
|
of long tags inside a wrapped paragraph as starting HTML blocks.)
|
1666
1681
|
|
1667
1682
|
Some simple examples follow. Here are some basic HTML blocks
|
@@ -1861,6 +1876,14 @@ In type 7 blocks, the [tag name] can be anything:
|
|
1861
1876
|
</i>
|
1862
1877
|
.
|
1863
1878
|
|
1879
|
+
.
|
1880
|
+
</ins>
|
1881
|
+
*bar*
|
1882
|
+
.
|
1883
|
+
</ins>
|
1884
|
+
*bar*
|
1885
|
+
.
|
1886
|
+
|
1864
1887
|
These rules are designed to allow us to work with tags that
|
1865
1888
|
can function as either block-level or inline-level tags.
|
1866
1889
|
The `<del>` tag is a nice example. We can surround content with
|
@@ -2831,8 +2854,8 @@ foo</p>
|
|
2831
2854
|
.
|
2832
2855
|
|
2833
2856
|
Laziness only applies to lines that would have been continuations of
|
2834
|
-
paragraphs had they been prepended with
|
2835
|
-
|
2857
|
+
paragraphs had they been prepended with [block quote marker]s.
|
2858
|
+
For example, the `> ` cannot be omitted in the second line of
|
2836
2859
|
|
2837
2860
|
``` markdown
|
2838
2861
|
> foo
|
@@ -2851,7 +2874,7 @@ without changing the meaning:
|
|
2851
2874
|
<hr />
|
2852
2875
|
.
|
2853
2876
|
|
2854
|
-
Similarly, if we omit the
|
2877
|
+
Similarly, if we omit the `> ` in the second line of
|
2855
2878
|
|
2856
2879
|
``` markdown
|
2857
2880
|
> - foo
|
@@ -2874,7 +2897,7 @@ then the block quote ends after the first line:
|
|
2874
2897
|
</ul>
|
2875
2898
|
.
|
2876
2899
|
|
2877
|
-
For the same reason, we can't omit the
|
2900
|
+
For the same reason, we can't omit the `> ` in front of
|
2878
2901
|
subsequent lines of an indented or fenced code block:
|
2879
2902
|
|
2880
2903
|
.
|
@@ -2901,6 +2924,30 @@ foo
|
|
2901
2924
|
<pre><code></code></pre>
|
2902
2925
|
.
|
2903
2926
|
|
2927
|
+
Note that in the following case, we have a paragraph
|
2928
|
+
continuation line:
|
2929
|
+
|
2930
|
+
.
|
2931
|
+
> foo
|
2932
|
+
- bar
|
2933
|
+
.
|
2934
|
+
<blockquote>
|
2935
|
+
<p>foo
|
2936
|
+
- bar</p>
|
2937
|
+
</blockquote>
|
2938
|
+
.
|
2939
|
+
|
2940
|
+
To see why, note that in
|
2941
|
+
|
2942
|
+
```markdown
|
2943
|
+
> foo
|
2944
|
+
> - bar
|
2945
|
+
```
|
2946
|
+
|
2947
|
+
the `- bar` is indented too far to start a list, and can't
|
2948
|
+
be an indented code block because indented code blocks cannot
|
2949
|
+
interrupt paragraphs, so it is a [paragraph continuation line].
|
2950
|
+
|
2904
2951
|
A block quote can be empty:
|
2905
2952
|
|
2906
2953
|
.
|
@@ -3605,6 +3652,21 @@ Here are some list items that start with a blank line but are not empty:
|
|
3605
3652
|
</ul>
|
3606
3653
|
.
|
3607
3654
|
|
3655
|
+
A list item can begin with at most one blank line.
|
3656
|
+
In the following example, `foo` is not part of the list
|
3657
|
+
item:
|
3658
|
+
|
3659
|
+
.
|
3660
|
+
-
|
3661
|
+
|
3662
|
+
foo
|
3663
|
+
.
|
3664
|
+
<ul>
|
3665
|
+
<li></li>
|
3666
|
+
</ul>
|
3667
|
+
<p>foo</p>
|
3668
|
+
.
|
3669
|
+
|
3608
3670
|
Here is an empty bullet list item:
|
3609
3671
|
|
3610
3672
|
.
|
@@ -4849,17 +4911,17 @@ foo
|
|
4849
4911
|
|
4850
4912
|
With the goal of making this standard as HTML-agnostic as possible, all
|
4851
4913
|
valid HTML entities (except in code blocks and code spans)
|
4852
|
-
are recognized as such and converted into
|
4914
|
+
are recognized as such and converted into Unicode characters before
|
4853
4915
|
they are stored in the AST. This means that renderers to formats other
|
4854
4916
|
than HTML need not be HTML-entity aware. HTML renderers may either escape
|
4855
|
-
|
4917
|
+
Unicode characters as entities or leave them as they are. (However,
|
4856
4918
|
`"`, `&`, `<`, and `>` must always be rendered as entities.)
|
4857
4919
|
|
4858
|
-
[Named entities](@name-entities) consist of `&`
|
4859
|
-
|
4920
|
+
[Named entities](@name-entities) consist of `&` + any of the valid
|
4921
|
+
HTML5 entity names + `;`. The
|
4860
4922
|
[following document](https://html.spec.whatwg.org/multipage/entities.json)
|
4861
4923
|
is used as an authoritative source of the valid entity names and their
|
4862
|
-
corresponding
|
4924
|
+
corresponding code points.
|
4863
4925
|
|
4864
4926
|
.
|
4865
4927
|
& © Æ Ď
|
@@ -4874,9 +4936,9 @@ corresponding codepoints.
|
|
4874
4936
|
[Decimal entities](@decimal-entities)
|
4875
4937
|
consist of `&#` + a string of 1--8 arabic digits + `;`. Again, these
|
4876
4938
|
entities need to be recognised and transformed into their corresponding
|
4877
|
-
|
4878
|
-
the "unknown
|
4879
|
-
the
|
4939
|
+
Unicode code points. Invalid Unicode code points will be replaced by
|
4940
|
+
the "unknown code point" character (`U+FFFD`). For security reasons,
|
4941
|
+
the code point `U+0000` will also be replaced by `U+FFFD`.
|
4880
4942
|
|
4881
4943
|
.
|
4882
4944
|
# Ӓ Ϡ � �
|
@@ -4884,10 +4946,10 @@ the codepoint `U+0000` will also be replaced by `U+FFFD`.
|
|
4884
4946
|
<p># Ӓ Ϡ � �</p>
|
4885
4947
|
.
|
4886
4948
|
|
4887
|
-
[Hexadecimal entities](@hexadecimal-entities)
|
4888
|
-
|
4889
|
-
|
4890
|
-
|
4949
|
+
[Hexadecimal entities](@hexadecimal-entities) consist of `&#` + either
|
4950
|
+
`X` or `x` + a string of 1-8 hexadecimal digits + `;`. They will also
|
4951
|
+
be parsed and turned into the corresponding Unicode code points in the
|
4952
|
+
AST.
|
4891
4953
|
|
4892
4954
|
.
|
4893
4955
|
" ആ ಫ
|
@@ -5179,18 +5241,18 @@ followed by a `*` character, or a sequence of one or more `_`
|
|
5179
5241
|
characters that is not preceded or followed by a `_` character.
|
5180
5242
|
|
5181
5243
|
A [left-flanking delimiter run](@left-flanking-delimiter-run) is
|
5182
|
-
a [delimiter run] that is (a) not followed by [
|
5244
|
+
a [delimiter run] that is (a) not followed by [Unicode whitespace],
|
5183
5245
|
and (b) either not followed by a [punctuation character], or
|
5184
|
-
preceded by [
|
5246
|
+
preceded by [Unicode whitespace] or a [punctuation character].
|
5185
5247
|
For purposes of this definition, the beginning and the end of
|
5186
|
-
the line count as
|
5248
|
+
the line count as Unicode whitespace.
|
5187
5249
|
|
5188
5250
|
A [right-flanking delimiter run](@right-flanking-delimiter-run) is
|
5189
|
-
a [delimiter run] that is (a) not preceded by [
|
5251
|
+
a [delimiter run] that is (a) not preceded by [Unicode whitespace],
|
5190
5252
|
and (b) either not preceded by a [punctuation character], or
|
5191
|
-
followed by [
|
5253
|
+
followed by [Unicode whitespace] or a [punctuation character].
|
5192
5254
|
For purposes of this definition, the beginning and the end of
|
5193
|
-
the line count as
|
5255
|
+
the line count as Unicode whitespace.
|
5194
5256
|
|
5195
5257
|
Here are some examples of delimiter runs.
|
5196
5258
|
|
@@ -6511,8 +6573,8 @@ just a backslash:
|
|
6511
6573
|
|
6512
6574
|
URL-escaping should be left alone inside the destination, as all
|
6513
6575
|
URL-escaped characters are also valid URL characters. HTML entities in
|
6514
|
-
the destination will be parsed into the corresponding
|
6515
|
-
|
6576
|
+
the destination will be parsed into the corresponding Unicode
|
6577
|
+
code points, as usual, and optionally URL-escaped when written as HTML.
|
6516
6578
|
|
6517
6579
|
.
|
6518
6580
|
[link](foo%20bä)
|
@@ -6721,7 +6783,7 @@ characters inside the square brackets.
|
|
6721
6783
|
|
6722
6784
|
One label [matches](@matches)
|
6723
6785
|
another just in case their normalized forms are equal. To normalize a
|
6724
|
-
label, perform the *
|
6786
|
+
label, perform the *Unicode case fold* and collapse consecutive internal
|
6725
6787
|
[whitespace] to a single space. If there are multiple
|
6726
6788
|
matching reference link definitions, the one that comes first in the
|
6727
6789
|
document is used. (It is desirable in such cases to emit a warning.)
|
data/lib/commonmarker/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: commonmarker
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Garen Torikian
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-08-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-enum
|