wikitext 1.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ext/parser.c +149 -45
- data/ext/token.c +1 -0
- data/ext/token.h +1 -0
- data/ext/wikitext_ragel.c +1718 -1947
- data/lib/wikitext/version.rb +17 -0
- data/spec/autolinking_spec.rb +462 -0
- data/spec/integration_spec.rb +1 -1
- data/spec/pre_spec.rb +77 -18
- data/spec/regressions_spec.rb +703 -1
- metadata +3 -2
data/ext/parser.c
CHANGED
@@ -23,6 +23,7 @@
|
|
23
23
|
// poor man's object orientation in C:
|
24
24
|
// instead of parsing around multiple parameters between functions in the parser
|
25
25
|
// we pack everything into a struct and pass around only a pointer to that
|
26
|
+
// TODO: consider changing some of the VALUE members (eg link_target) to the more efficient str_t type
|
26
27
|
typedef struct
|
27
28
|
{
|
28
29
|
VALUE output; // for accumulating output to be returned
|
@@ -420,6 +421,15 @@ void _Wikitext_start_para_if_necessary(parser_t *parser)
|
|
420
421
|
parser->pending_crlf = Qfalse;
|
421
422
|
}
|
422
423
|
|
424
|
+
void _Wikitext_emit_pending_crlf_if_necessary(parser_t *parser)
|
425
|
+
{
|
426
|
+
if (parser->pending_crlf == Qtrue)
|
427
|
+
{
|
428
|
+
rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
|
429
|
+
parser->pending_crlf = Qfalse;
|
430
|
+
}
|
431
|
+
}
|
432
|
+
|
423
433
|
// Helper function that pops any excess elements off scope (pushing is already handled in the respective rules).
|
424
434
|
// For example, given input like:
|
425
435
|
//
|
@@ -1035,7 +1045,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1035
1045
|
|
1036
1046
|
case PRE_START:
|
1037
1047
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1048
|
+
{
|
1049
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1038
1050
|
rb_str_cat(parser->output, escaped_pre_start, sizeof(escaped_pre_start) - 1);
|
1051
|
+
}
|
1039
1052
|
else if (IN(BLOCKQUOTE_START))
|
1040
1053
|
{
|
1041
1054
|
_Wikitext_rollback_failed_link(parser); // if any
|
@@ -1046,9 +1059,16 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1046
1059
|
ary_push(parser->scope, PRE_START);
|
1047
1060
|
ary_push(parser->line, PRE_START);
|
1048
1061
|
}
|
1049
|
-
else if (
|
1062
|
+
else if (IN(BLOCKQUOTE))
|
1063
|
+
{
|
1064
|
+
// PRE_START is illegal
|
1065
|
+
i = NIL_P(parser->capture) ? parser->output : parser->capture;
|
1066
|
+
_Wikitext_pop_excess_elements(parser);
|
1067
|
+
_Wikitext_start_para_if_necessary(parser);
|
1068
|
+
rb_str_cat(i, escaped_pre_start, sizeof(escaped_pre_start) - 1);
|
1069
|
+
}
|
1070
|
+
else
|
1050
1071
|
{
|
1051
|
-
// would be nice to eliminate the repetition here but it's probably the clearest way
|
1052
1072
|
_Wikitext_rollback_failed_link(parser); // if any
|
1053
1073
|
_Wikitext_rollback_failed_external_link(parser); // if any
|
1054
1074
|
_Wikitext_pop_from_stack_up_to(parser, Qnil, P, Qtrue);
|
@@ -1057,19 +1077,14 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1057
1077
|
ary_push(parser->scope, PRE_START);
|
1058
1078
|
ary_push(parser->line, PRE_START);
|
1059
1079
|
}
|
1060
|
-
else
|
1061
|
-
{
|
1062
|
-
// everywhere else, PRE_START is illegal (in LI, BLOCKQUOTE, H1_START etc)
|
1063
|
-
i = NIL_P(parser->capture) ? parser->output : parser->capture;
|
1064
|
-
_Wikitext_pop_excess_elements(parser);
|
1065
|
-
_Wikitext_start_para_if_necessary(parser);
|
1066
|
-
rb_str_cat(i, escaped_pre_start, sizeof(escaped_pre_start) - 1);
|
1067
|
-
}
|
1068
1080
|
break;
|
1069
1081
|
|
1070
1082
|
case PRE_END:
|
1071
1083
|
if (IN(NO_WIKI_START) || IN(PRE))
|
1084
|
+
{
|
1085
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1072
1086
|
rb_str_cat(parser->output, escaped_pre_end, sizeof(escaped_pre_end) - 1);
|
1087
|
+
}
|
1073
1088
|
else
|
1074
1089
|
{
|
1075
1090
|
if (IN(PRE_START))
|
@@ -1139,7 +1154,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1139
1154
|
|
1140
1155
|
case BLOCKQUOTE_START:
|
1141
1156
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1157
|
+
{
|
1158
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1142
1159
|
rb_str_cat(parser->output, escaped_blockquote_start, sizeof(escaped_blockquote_start) - 1);
|
1160
|
+
}
|
1143
1161
|
else if (IN(BLOCKQUOTE_START))
|
1144
1162
|
{
|
1145
1163
|
// nesting is fine here
|
@@ -1176,7 +1194,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1176
1194
|
|
1177
1195
|
case BLOCKQUOTE_END:
|
1178
1196
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1197
|
+
{
|
1198
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1179
1199
|
rb_str_cat(parser->output, escaped_blockquote_end, sizeof(escaped_blockquote_end) - 1);
|
1200
|
+
}
|
1180
1201
|
else
|
1181
1202
|
{
|
1182
1203
|
if (IN(BLOCKQUOTE_START))
|
@@ -1193,7 +1214,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1193
1214
|
|
1194
1215
|
case NO_WIKI_START:
|
1195
1216
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1217
|
+
{
|
1218
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1196
1219
|
rb_str_cat(parser->output, escaped_no_wiki_start, sizeof(escaped_no_wiki_start) - 1);
|
1220
|
+
}
|
1197
1221
|
else
|
1198
1222
|
{
|
1199
1223
|
_Wikitext_pop_excess_elements(parser);
|
@@ -1218,6 +1242,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1218
1242
|
case STRONG_EM:
|
1219
1243
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1220
1244
|
{
|
1245
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1221
1246
|
rb_str_cat(parser->output, literal_strong_em, sizeof(literal_strong_em) - 1);
|
1222
1247
|
break;
|
1223
1248
|
}
|
@@ -1282,7 +1307,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1282
1307
|
|
1283
1308
|
case STRONG:
|
1284
1309
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1310
|
+
{
|
1311
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1285
1312
|
rb_str_cat(parser->output, literal_strong, sizeof(literal_strong) - 1);
|
1313
|
+
}
|
1286
1314
|
else
|
1287
1315
|
{
|
1288
1316
|
i = NIL_P(parser->capture) ? parser->output : parser->capture;
|
@@ -1306,7 +1334,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1306
1334
|
|
1307
1335
|
case STRONG_START:
|
1308
1336
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1337
|
+
{
|
1338
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1309
1339
|
rb_str_cat(parser->output, escaped_strong_start, sizeof(escaped_strong_start) - 1);
|
1340
|
+
}
|
1310
1341
|
else
|
1311
1342
|
{
|
1312
1343
|
i = NIL_P(parser->capture) ? parser->output : parser->capture;
|
@@ -1325,7 +1356,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1325
1356
|
|
1326
1357
|
case STRONG_END:
|
1327
1358
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1359
|
+
{
|
1360
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1328
1361
|
rb_str_cat(parser->output, escaped_strong_end, sizeof(escaped_strong_end) - 1);
|
1362
|
+
}
|
1329
1363
|
else
|
1330
1364
|
{
|
1331
1365
|
i = NIL_P(parser->capture) ? parser->output : parser->capture;
|
@@ -1343,7 +1377,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1343
1377
|
|
1344
1378
|
case EM:
|
1345
1379
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1380
|
+
{
|
1381
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1346
1382
|
rb_str_cat(parser->output, literal_em, sizeof(literal_em) - 1);
|
1383
|
+
}
|
1347
1384
|
else
|
1348
1385
|
{
|
1349
1386
|
i = NIL_P(parser->capture) ? parser->output : parser->capture;
|
@@ -1367,7 +1404,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1367
1404
|
|
1368
1405
|
case EM_START:
|
1369
1406
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1407
|
+
{
|
1408
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1370
1409
|
rb_str_cat(parser->output, escaped_em_start, sizeof(escaped_em_start) - 1);
|
1410
|
+
}
|
1371
1411
|
else
|
1372
1412
|
{
|
1373
1413
|
i = NIL_P(parser->capture) ? parser->output : parser->capture;
|
@@ -1386,7 +1426,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1386
1426
|
|
1387
1427
|
case EM_END:
|
1388
1428
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1429
|
+
{
|
1430
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1389
1431
|
rb_str_cat(parser->output, escaped_em_end, sizeof(escaped_em_end) - 1);
|
1432
|
+
}
|
1390
1433
|
else
|
1391
1434
|
{
|
1392
1435
|
i = NIL_P(parser->capture) ? parser->output : parser->capture;
|
@@ -1404,7 +1447,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1404
1447
|
|
1405
1448
|
case TT:
|
1406
1449
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1450
|
+
{
|
1451
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1407
1452
|
rb_str_cat(parser->output, backtick, sizeof(backtick) - 1);
|
1453
|
+
}
|
1408
1454
|
else
|
1409
1455
|
{
|
1410
1456
|
i = NIL_P(parser->capture) ? parser->output : parser->capture;
|
@@ -1428,7 +1474,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1428
1474
|
|
1429
1475
|
case TT_START:
|
1430
1476
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1477
|
+
{
|
1478
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1431
1479
|
rb_str_cat(parser->output, escaped_tt_start, sizeof(escaped_tt_start) - 1);
|
1480
|
+
}
|
1432
1481
|
else
|
1433
1482
|
{
|
1434
1483
|
i = NIL_P(parser->capture) ? parser->output : parser->capture;
|
@@ -1447,7 +1496,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1447
1496
|
|
1448
1497
|
case TT_END:
|
1449
1498
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1499
|
+
{
|
1500
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1450
1501
|
rb_str_cat(parser->output, escaped_tt_end, sizeof(escaped_tt_end) - 1);
|
1502
|
+
}
|
1451
1503
|
else
|
1452
1504
|
{
|
1453
1505
|
i = NIL_P(parser->capture) ? parser->output : parser->capture;
|
@@ -1650,7 +1702,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1650
1702
|
|
1651
1703
|
case H6_END:
|
1652
1704
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1705
|
+
{
|
1706
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1653
1707
|
rb_str_cat(parser->output, literal_h6, sizeof(literal_h6) - 1);
|
1708
|
+
}
|
1654
1709
|
else
|
1655
1710
|
{
|
1656
1711
|
_Wikitext_rollback_failed_external_link(parser); // if any
|
@@ -1665,7 +1720,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1665
1720
|
|
1666
1721
|
case H5_END:
|
1667
1722
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1723
|
+
{
|
1724
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1668
1725
|
rb_str_cat(parser->output, literal_h5, sizeof(literal_h5) - 1);
|
1726
|
+
}
|
1669
1727
|
else
|
1670
1728
|
{
|
1671
1729
|
_Wikitext_rollback_failed_external_link(parser); // if any
|
@@ -1680,7 +1738,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1680
1738
|
|
1681
1739
|
case H4_END:
|
1682
1740
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1741
|
+
{
|
1742
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1683
1743
|
rb_str_cat(parser->output, literal_h4, sizeof(literal_h4) - 1);
|
1744
|
+
}
|
1684
1745
|
else
|
1685
1746
|
{
|
1686
1747
|
_Wikitext_rollback_failed_external_link(parser); // if any
|
@@ -1695,7 +1756,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1695
1756
|
|
1696
1757
|
case H3_END:
|
1697
1758
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1759
|
+
{
|
1760
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1698
1761
|
rb_str_cat(parser->output, literal_h3, sizeof(literal_h3) - 1);
|
1762
|
+
}
|
1699
1763
|
else
|
1700
1764
|
{
|
1701
1765
|
_Wikitext_rollback_failed_external_link(parser); // if any
|
@@ -1710,7 +1774,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1710
1774
|
|
1711
1775
|
case H2_END:
|
1712
1776
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1777
|
+
{
|
1778
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1713
1779
|
rb_str_cat(parser->output, literal_h2, sizeof(literal_h2) - 1);
|
1780
|
+
}
|
1714
1781
|
else
|
1715
1782
|
{
|
1716
1783
|
_Wikitext_rollback_failed_external_link(parser); // if any
|
@@ -1725,7 +1792,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1725
1792
|
|
1726
1793
|
case H1_END:
|
1727
1794
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1795
|
+
{
|
1796
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1728
1797
|
rb_str_cat(parser->output, literal_h1, sizeof(literal_h1) - 1);
|
1798
|
+
}
|
1729
1799
|
else
|
1730
1800
|
{
|
1731
1801
|
_Wikitext_rollback_failed_external_link(parser); // if any
|
@@ -1740,7 +1810,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1740
1810
|
|
1741
1811
|
case MAIL:
|
1742
1812
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1813
|
+
{
|
1814
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1743
1815
|
rb_str_cat(parser->output, token->start, TOKEN_LEN(token));
|
1816
|
+
}
|
1744
1817
|
else
|
1745
1818
|
{
|
1746
1819
|
// in plain scope, will turn into autolink (with appropriate, user-configurable CSS)
|
@@ -1839,12 +1912,15 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1839
1912
|
// example [[foo €]]
|
1840
1913
|
// renders <a href="/wiki/Foo_%E2%82%AC">foo €</a>
|
1841
1914
|
// we'll impose similar restrictions here for the link target; allowed tokens will be:
|
1842
|
-
// SPACE, PRINTABLE, DEFAULT, QUOT and AMP
|
1915
|
+
// SPACE, SPECIAL_URI_CHARS, PRINTABLE, DEFAULT, QUOT and AMP
|
1843
1916
|
// everything else will be rejected
|
1844
1917
|
case LINK_START:
|
1845
1918
|
i = NIL_P(parser->capture) ? parser->output : parser->capture;
|
1846
1919
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1920
|
+
{
|
1921
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1847
1922
|
rb_str_cat(i, link_start, sizeof(link_start) - 1);
|
1923
|
+
}
|
1848
1924
|
else if (IN(EXT_LINK_START))
|
1849
1925
|
// already in external link scope! (and in fact, must be capturing link_text right now)
|
1850
1926
|
rb_str_cat(i, link_start, sizeof(link_start) - 1);
|
@@ -1868,16 +1944,17 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1868
1944
|
// look ahead and try to gobble up link target
|
1869
1945
|
while (NEXT_TOKEN(), (type = token->type))
|
1870
1946
|
{
|
1871
|
-
if (type == SPACE
|
1872
|
-
type ==
|
1873
|
-
type ==
|
1874
|
-
type ==
|
1875
|
-
type ==
|
1876
|
-
type ==
|
1877
|
-
type ==
|
1878
|
-
type ==
|
1879
|
-
type ==
|
1880
|
-
type ==
|
1947
|
+
if (type == SPACE ||
|
1948
|
+
type == SPECIAL_URI_CHARS ||
|
1949
|
+
type == PRINTABLE ||
|
1950
|
+
type == DEFAULT ||
|
1951
|
+
type == QUOT ||
|
1952
|
+
type == QUOT_ENTITY ||
|
1953
|
+
type == AMP ||
|
1954
|
+
type == AMP_ENTITY ||
|
1955
|
+
type == IMG_START ||
|
1956
|
+
type == IMG_END ||
|
1957
|
+
type == LEFT_CURLY ||
|
1881
1958
|
type == RIGHT_CURLY)
|
1882
1959
|
{
|
1883
1960
|
// accumulate these tokens into link_target
|
@@ -1920,7 +1997,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1920
1997
|
case LINK_END:
|
1921
1998
|
i = NIL_P(parser->capture) ? parser->output : parser->capture;
|
1922
1999
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
2000
|
+
{
|
2001
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1923
2002
|
rb_str_cat(i, link_end, sizeof(link_end) - 1);
|
2003
|
+
}
|
1924
2004
|
else if (IN(EXT_LINK_START))
|
1925
2005
|
// already in external link scope! (and in fact, must be capturing link_text right now)
|
1926
2006
|
rb_str_cat(i, link_end, sizeof(link_end) - 1);
|
@@ -1958,7 +2038,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1958
2038
|
case EXT_LINK_START:
|
1959
2039
|
i = NIL_P(parser->capture) ? parser->output : parser->capture;
|
1960
2040
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
2041
|
+
{
|
2042
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1961
2043
|
rb_str_cat(i, ext_link_start, sizeof(ext_link_start) - 1);
|
2044
|
+
}
|
1962
2045
|
else if (IN(EXT_LINK_START))
|
1963
2046
|
// already in external link scope! (and in fact, must be capturing link_text right now)
|
1964
2047
|
rb_str_cat(i, ext_link_start, sizeof(ext_link_start) - 1);
|
@@ -2003,7 +2086,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2003
2086
|
case EXT_LINK_END:
|
2004
2087
|
i = NIL_P(parser->capture) ? parser->output : parser->capture;
|
2005
2088
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
2089
|
+
{
|
2090
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
2006
2091
|
rb_str_cat(i, ext_link_end, sizeof(ext_link_end) - 1);
|
2092
|
+
}
|
2007
2093
|
else if (IN(EXT_LINK_START))
|
2008
2094
|
{
|
2009
2095
|
if (NIL_P(parser->link_text))
|
@@ -2038,7 +2124,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2038
2124
|
case SPACE:
|
2039
2125
|
i = NIL_P(parser->capture) ? parser->output : parser->capture;
|
2040
2126
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
2127
|
+
{
|
2128
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
2041
2129
|
rb_str_cat(i, token->start, TOKEN_LEN(token));
|
2130
|
+
}
|
2042
2131
|
else
|
2043
2132
|
{
|
2044
2133
|
// peek ahead to see next token
|
@@ -2117,7 +2206,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2117
2206
|
|
2118
2207
|
case IMG_START:
|
2119
2208
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
2209
|
+
{
|
2210
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
2120
2211
|
rb_str_cat(parser->output, token->start, TOKEN_LEN(token));
|
2212
|
+
}
|
2121
2213
|
else if (!NIL_P(parser->capture))
|
2122
2214
|
rb_str_cat(parser->capture, token->start, TOKEN_LEN(token));
|
2123
2215
|
else
|
@@ -2126,39 +2218,38 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2126
2218
|
_Wikitext_pop_excess_elements(parser);
|
2127
2219
|
_Wikitext_start_para_if_necessary(parser);
|
2128
2220
|
|
2129
|
-
//
|
2130
|
-
|
2131
|
-
if (
|
2132
|
-
|
2133
|
-
|
2134
|
-
else
|
2221
|
+
// scan ahead consuming PRINTABLE and SPECIAL_URI_CHARS tokens
|
2222
|
+
// will cheat here and abuse the link_target capture buffer to accumulate text
|
2223
|
+
if (NIL_P(parser->link_target))
|
2224
|
+
parser->link_target = rb_str_new2("");
|
2225
|
+
while (NEXT_TOKEN(), (type = token->type))
|
2135
2226
|
{
|
2136
|
-
|
2137
|
-
|
2138
|
-
|
2139
|
-
|
2140
|
-
// peek ahead once more
|
2141
|
-
NEXT_TOKEN();
|
2142
|
-
if (token->type == IMG_END)
|
2227
|
+
if (type == PRINTABLE || type == SPECIAL_URI_CHARS)
|
2228
|
+
rb_str_cat(parser->link_target, token->start, TOKEN_LEN(token));
|
2229
|
+
else if (type == IMG_END)
|
2143
2230
|
{
|
2144
2231
|
// success
|
2145
|
-
_Wikitext_append_img(parser,
|
2232
|
+
_Wikitext_append_img(parser, RSTRING_PTR(parser->link_target), RSTRING_LEN(parser->link_target));
|
2146
2233
|
token = NULL;
|
2234
|
+
break;
|
2147
2235
|
}
|
2148
|
-
else
|
2236
|
+
else // unexpected token (syntax error)
|
2149
2237
|
{
|
2150
|
-
//
|
2238
|
+
// rollback
|
2151
2239
|
rb_str_cat(parser->output, literal_img_start, sizeof(literal_img_start) - 1);
|
2152
|
-
rb_str_cat(parser->output,
|
2240
|
+
rb_str_cat(parser->output, RSTRING_PTR(parser->link_target), RSTRING_LEN(parser->link_target));
|
2241
|
+
break;
|
2153
2242
|
}
|
2154
2243
|
}
|
2155
2244
|
|
2156
2245
|
// jump to top of the loop to process token we scanned during lookahead
|
2246
|
+
parser->link_target = Qnil;
|
2157
2247
|
continue;
|
2158
2248
|
}
|
2159
2249
|
break;
|
2160
2250
|
|
2161
2251
|
case CRLF:
|
2252
|
+
i = parser->pending_crlf;
|
2162
2253
|
parser->pending_crlf = Qfalse;
|
2163
2254
|
_Wikitext_rollback_failed_link(parser); // if any
|
2164
2255
|
_Wikitext_rollback_failed_external_link(parser); // if any
|
@@ -2170,27 +2261,33 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2170
2261
|
}
|
2171
2262
|
else if (IN(PRE))
|
2172
2263
|
{
|
2173
|
-
// beware when
|
2174
|
-
if (
|
2264
|
+
// beware when BLOCKQUOTE on line buffer (not line stack!) prior to CRLF, that must be end of PRE block
|
2265
|
+
if (ary_entry(parser->line_buffer, -2) == BLOCKQUOTE)
|
2175
2266
|
// don't emit in this case
|
2176
2267
|
_Wikitext_pop_from_stack_up_to(parser, parser->output, PRE, Qtrue);
|
2177
2268
|
else
|
2178
2269
|
{
|
2270
|
+
if (ary_entry(parser->line_buffer, -2) == PRE)
|
2271
|
+
{
|
2272
|
+
// only thing on line is the PRE: emit pending line ending (if we had one)
|
2273
|
+
if (i == Qtrue)
|
2274
|
+
rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
|
2275
|
+
}
|
2276
|
+
|
2277
|
+
// clear these _before_ calling NEXT_TOKEN (NEXT_TOKEN adds to the line_buffer)
|
2278
|
+
ary_clear(parser->line);
|
2279
|
+
ary_clear(parser->line_buffer);
|
2280
|
+
|
2179
2281
|
// peek ahead to see if this is definitely the end of the PRE block
|
2180
2282
|
NEXT_TOKEN();
|
2181
2283
|
type = token->type;
|
2182
2284
|
if (type != BLOCKQUOTE && type != PRE)
|
2183
|
-
{
|
2184
2285
|
// this is definitely the end of the block, so don't emit
|
2185
2286
|
_Wikitext_pop_from_stack_up_to(parser, parser->output, PRE, Qtrue);
|
2186
|
-
}
|
2187
2287
|
else
|
2188
2288
|
// potentially will emit
|
2189
2289
|
parser->pending_crlf = Qtrue;
|
2190
2290
|
|
2191
|
-
// delete the entire contents of the line scope stack and buffer
|
2192
|
-
ary_clear(parser->line);
|
2193
|
-
ary_clear(parser->line_buffer);
|
2194
2291
|
continue; // jump back to top of loop to handle token grabbed via lookahead
|
2195
2292
|
}
|
2196
2293
|
}
|
@@ -2233,6 +2330,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2233
2330
|
ary_clear(parser->line_buffer);
|
2234
2331
|
break;
|
2235
2332
|
|
2333
|
+
case SPECIAL_URI_CHARS:
|
2236
2334
|
case PRINTABLE:
|
2237
2335
|
case IMG_END:
|
2238
2336
|
case LEFT_CURLY:
|
@@ -2251,6 +2349,12 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2251
2349
|
break;
|
2252
2350
|
|
2253
2351
|
case END_OF_FILE:
|
2352
|
+
// special case for input like " foo\n " (see pre_spec.rb)
|
2353
|
+
if (IN(PRE) &&
|
2354
|
+
ary_entry(parser->line_buffer, -2) == PRE &&
|
2355
|
+
parser->pending_crlf == Qtrue)
|
2356
|
+
rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
|
2357
|
+
|
2254
2358
|
// close any open scopes on hitting EOF
|
2255
2359
|
_Wikitext_rollback_failed_external_link(parser); // if any
|
2256
2360
|
_Wikitext_rollback_failed_link(parser); // if any
|