wikitext 1.0 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/ext/parser.c +149 -45
- data/ext/token.c +1 -0
- data/ext/token.h +1 -0
- data/ext/wikitext_ragel.c +1718 -1947
- data/lib/wikitext/version.rb +17 -0
- data/spec/autolinking_spec.rb +462 -0
- data/spec/integration_spec.rb +1 -1
- data/spec/pre_spec.rb +77 -18
- data/spec/regressions_spec.rb +703 -1
- metadata +3 -2
data/ext/parser.c
CHANGED
@@ -23,6 +23,7 @@
|
|
23
23
|
// poor man's object orientation in C:
|
24
24
|
// instead of parsing around multiple parameters between functions in the parser
|
25
25
|
// we pack everything into a struct and pass around only a pointer to that
|
26
|
+
// TODO: consider changing some of the VALUE members (eg link_target) to the more efficient str_t type
|
26
27
|
typedef struct
|
27
28
|
{
|
28
29
|
VALUE output; // for accumulating output to be returned
|
@@ -420,6 +421,15 @@ void _Wikitext_start_para_if_necessary(parser_t *parser)
|
|
420
421
|
parser->pending_crlf = Qfalse;
|
421
422
|
}
|
422
423
|
|
424
|
+
void _Wikitext_emit_pending_crlf_if_necessary(parser_t *parser)
|
425
|
+
{
|
426
|
+
if (parser->pending_crlf == Qtrue)
|
427
|
+
{
|
428
|
+
rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
|
429
|
+
parser->pending_crlf = Qfalse;
|
430
|
+
}
|
431
|
+
}
|
432
|
+
|
423
433
|
// Helper function that pops any excess elements off scope (pushing is already handled in the respective rules).
|
424
434
|
// For example, given input like:
|
425
435
|
//
|
@@ -1035,7 +1045,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1035
1045
|
|
1036
1046
|
case PRE_START:
|
1037
1047
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1048
|
+
{
|
1049
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1038
1050
|
rb_str_cat(parser->output, escaped_pre_start, sizeof(escaped_pre_start) - 1);
|
1051
|
+
}
|
1039
1052
|
else if (IN(BLOCKQUOTE_START))
|
1040
1053
|
{
|
1041
1054
|
_Wikitext_rollback_failed_link(parser); // if any
|
@@ -1046,9 +1059,16 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1046
1059
|
ary_push(parser->scope, PRE_START);
|
1047
1060
|
ary_push(parser->line, PRE_START);
|
1048
1061
|
}
|
1049
|
-
else if (
|
1062
|
+
else if (IN(BLOCKQUOTE))
|
1063
|
+
{
|
1064
|
+
// PRE_START is illegal
|
1065
|
+
i = NIL_P(parser->capture) ? parser->output : parser->capture;
|
1066
|
+
_Wikitext_pop_excess_elements(parser);
|
1067
|
+
_Wikitext_start_para_if_necessary(parser);
|
1068
|
+
rb_str_cat(i, escaped_pre_start, sizeof(escaped_pre_start) - 1);
|
1069
|
+
}
|
1070
|
+
else
|
1050
1071
|
{
|
1051
|
-
// would be nice to eliminate the repetition here but it's probably the clearest way
|
1052
1072
|
_Wikitext_rollback_failed_link(parser); // if any
|
1053
1073
|
_Wikitext_rollback_failed_external_link(parser); // if any
|
1054
1074
|
_Wikitext_pop_from_stack_up_to(parser, Qnil, P, Qtrue);
|
@@ -1057,19 +1077,14 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1057
1077
|
ary_push(parser->scope, PRE_START);
|
1058
1078
|
ary_push(parser->line, PRE_START);
|
1059
1079
|
}
|
1060
|
-
else
|
1061
|
-
{
|
1062
|
-
// everywhere else, PRE_START is illegal (in LI, BLOCKQUOTE, H1_START etc)
|
1063
|
-
i = NIL_P(parser->capture) ? parser->output : parser->capture;
|
1064
|
-
_Wikitext_pop_excess_elements(parser);
|
1065
|
-
_Wikitext_start_para_if_necessary(parser);
|
1066
|
-
rb_str_cat(i, escaped_pre_start, sizeof(escaped_pre_start) - 1);
|
1067
|
-
}
|
1068
1080
|
break;
|
1069
1081
|
|
1070
1082
|
case PRE_END:
|
1071
1083
|
if (IN(NO_WIKI_START) || IN(PRE))
|
1084
|
+
{
|
1085
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1072
1086
|
rb_str_cat(parser->output, escaped_pre_end, sizeof(escaped_pre_end) - 1);
|
1087
|
+
}
|
1073
1088
|
else
|
1074
1089
|
{
|
1075
1090
|
if (IN(PRE_START))
|
@@ -1139,7 +1154,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1139
1154
|
|
1140
1155
|
case BLOCKQUOTE_START:
|
1141
1156
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1157
|
+
{
|
1158
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1142
1159
|
rb_str_cat(parser->output, escaped_blockquote_start, sizeof(escaped_blockquote_start) - 1);
|
1160
|
+
}
|
1143
1161
|
else if (IN(BLOCKQUOTE_START))
|
1144
1162
|
{
|
1145
1163
|
// nesting is fine here
|
@@ -1176,7 +1194,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1176
1194
|
|
1177
1195
|
case BLOCKQUOTE_END:
|
1178
1196
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1197
|
+
{
|
1198
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1179
1199
|
rb_str_cat(parser->output, escaped_blockquote_end, sizeof(escaped_blockquote_end) - 1);
|
1200
|
+
}
|
1180
1201
|
else
|
1181
1202
|
{
|
1182
1203
|
if (IN(BLOCKQUOTE_START))
|
@@ -1193,7 +1214,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1193
1214
|
|
1194
1215
|
case NO_WIKI_START:
|
1195
1216
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1217
|
+
{
|
1218
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1196
1219
|
rb_str_cat(parser->output, escaped_no_wiki_start, sizeof(escaped_no_wiki_start) - 1);
|
1220
|
+
}
|
1197
1221
|
else
|
1198
1222
|
{
|
1199
1223
|
_Wikitext_pop_excess_elements(parser);
|
@@ -1218,6 +1242,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1218
1242
|
case STRONG_EM:
|
1219
1243
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1220
1244
|
{
|
1245
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1221
1246
|
rb_str_cat(parser->output, literal_strong_em, sizeof(literal_strong_em) - 1);
|
1222
1247
|
break;
|
1223
1248
|
}
|
@@ -1282,7 +1307,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1282
1307
|
|
1283
1308
|
case STRONG:
|
1284
1309
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1310
|
+
{
|
1311
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1285
1312
|
rb_str_cat(parser->output, literal_strong, sizeof(literal_strong) - 1);
|
1313
|
+
}
|
1286
1314
|
else
|
1287
1315
|
{
|
1288
1316
|
i = NIL_P(parser->capture) ? parser->output : parser->capture;
|
@@ -1306,7 +1334,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1306
1334
|
|
1307
1335
|
case STRONG_START:
|
1308
1336
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1337
|
+
{
|
1338
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1309
1339
|
rb_str_cat(parser->output, escaped_strong_start, sizeof(escaped_strong_start) - 1);
|
1340
|
+
}
|
1310
1341
|
else
|
1311
1342
|
{
|
1312
1343
|
i = NIL_P(parser->capture) ? parser->output : parser->capture;
|
@@ -1325,7 +1356,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1325
1356
|
|
1326
1357
|
case STRONG_END:
|
1327
1358
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1359
|
+
{
|
1360
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1328
1361
|
rb_str_cat(parser->output, escaped_strong_end, sizeof(escaped_strong_end) - 1);
|
1362
|
+
}
|
1329
1363
|
else
|
1330
1364
|
{
|
1331
1365
|
i = NIL_P(parser->capture) ? parser->output : parser->capture;
|
@@ -1343,7 +1377,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1343
1377
|
|
1344
1378
|
case EM:
|
1345
1379
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1380
|
+
{
|
1381
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1346
1382
|
rb_str_cat(parser->output, literal_em, sizeof(literal_em) - 1);
|
1383
|
+
}
|
1347
1384
|
else
|
1348
1385
|
{
|
1349
1386
|
i = NIL_P(parser->capture) ? parser->output : parser->capture;
|
@@ -1367,7 +1404,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1367
1404
|
|
1368
1405
|
case EM_START:
|
1369
1406
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1407
|
+
{
|
1408
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1370
1409
|
rb_str_cat(parser->output, escaped_em_start, sizeof(escaped_em_start) - 1);
|
1410
|
+
}
|
1371
1411
|
else
|
1372
1412
|
{
|
1373
1413
|
i = NIL_P(parser->capture) ? parser->output : parser->capture;
|
@@ -1386,7 +1426,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1386
1426
|
|
1387
1427
|
case EM_END:
|
1388
1428
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1429
|
+
{
|
1430
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1389
1431
|
rb_str_cat(parser->output, escaped_em_end, sizeof(escaped_em_end) - 1);
|
1432
|
+
}
|
1390
1433
|
else
|
1391
1434
|
{
|
1392
1435
|
i = NIL_P(parser->capture) ? parser->output : parser->capture;
|
@@ -1404,7 +1447,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1404
1447
|
|
1405
1448
|
case TT:
|
1406
1449
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1450
|
+
{
|
1451
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1407
1452
|
rb_str_cat(parser->output, backtick, sizeof(backtick) - 1);
|
1453
|
+
}
|
1408
1454
|
else
|
1409
1455
|
{
|
1410
1456
|
i = NIL_P(parser->capture) ? parser->output : parser->capture;
|
@@ -1428,7 +1474,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1428
1474
|
|
1429
1475
|
case TT_START:
|
1430
1476
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1477
|
+
{
|
1478
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1431
1479
|
rb_str_cat(parser->output, escaped_tt_start, sizeof(escaped_tt_start) - 1);
|
1480
|
+
}
|
1432
1481
|
else
|
1433
1482
|
{
|
1434
1483
|
i = NIL_P(parser->capture) ? parser->output : parser->capture;
|
@@ -1447,7 +1496,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1447
1496
|
|
1448
1497
|
case TT_END:
|
1449
1498
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1499
|
+
{
|
1500
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1450
1501
|
rb_str_cat(parser->output, escaped_tt_end, sizeof(escaped_tt_end) - 1);
|
1502
|
+
}
|
1451
1503
|
else
|
1452
1504
|
{
|
1453
1505
|
i = NIL_P(parser->capture) ? parser->output : parser->capture;
|
@@ -1650,7 +1702,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1650
1702
|
|
1651
1703
|
case H6_END:
|
1652
1704
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1705
|
+
{
|
1706
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1653
1707
|
rb_str_cat(parser->output, literal_h6, sizeof(literal_h6) - 1);
|
1708
|
+
}
|
1654
1709
|
else
|
1655
1710
|
{
|
1656
1711
|
_Wikitext_rollback_failed_external_link(parser); // if any
|
@@ -1665,7 +1720,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1665
1720
|
|
1666
1721
|
case H5_END:
|
1667
1722
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1723
|
+
{
|
1724
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1668
1725
|
rb_str_cat(parser->output, literal_h5, sizeof(literal_h5) - 1);
|
1726
|
+
}
|
1669
1727
|
else
|
1670
1728
|
{
|
1671
1729
|
_Wikitext_rollback_failed_external_link(parser); // if any
|
@@ -1680,7 +1738,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1680
1738
|
|
1681
1739
|
case H4_END:
|
1682
1740
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1741
|
+
{
|
1742
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1683
1743
|
rb_str_cat(parser->output, literal_h4, sizeof(literal_h4) - 1);
|
1744
|
+
}
|
1684
1745
|
else
|
1685
1746
|
{
|
1686
1747
|
_Wikitext_rollback_failed_external_link(parser); // if any
|
@@ -1695,7 +1756,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1695
1756
|
|
1696
1757
|
case H3_END:
|
1697
1758
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1759
|
+
{
|
1760
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1698
1761
|
rb_str_cat(parser->output, literal_h3, sizeof(literal_h3) - 1);
|
1762
|
+
}
|
1699
1763
|
else
|
1700
1764
|
{
|
1701
1765
|
_Wikitext_rollback_failed_external_link(parser); // if any
|
@@ -1710,7 +1774,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1710
1774
|
|
1711
1775
|
case H2_END:
|
1712
1776
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1777
|
+
{
|
1778
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1713
1779
|
rb_str_cat(parser->output, literal_h2, sizeof(literal_h2) - 1);
|
1780
|
+
}
|
1714
1781
|
else
|
1715
1782
|
{
|
1716
1783
|
_Wikitext_rollback_failed_external_link(parser); // if any
|
@@ -1725,7 +1792,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1725
1792
|
|
1726
1793
|
case H1_END:
|
1727
1794
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1795
|
+
{
|
1796
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1728
1797
|
rb_str_cat(parser->output, literal_h1, sizeof(literal_h1) - 1);
|
1798
|
+
}
|
1729
1799
|
else
|
1730
1800
|
{
|
1731
1801
|
_Wikitext_rollback_failed_external_link(parser); // if any
|
@@ -1740,7 +1810,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1740
1810
|
|
1741
1811
|
case MAIL:
|
1742
1812
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1813
|
+
{
|
1814
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1743
1815
|
rb_str_cat(parser->output, token->start, TOKEN_LEN(token));
|
1816
|
+
}
|
1744
1817
|
else
|
1745
1818
|
{
|
1746
1819
|
// in plain scope, will turn into autolink (with appropriate, user-configurable CSS)
|
@@ -1839,12 +1912,15 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1839
1912
|
// example [[foo €]]
|
1840
1913
|
// renders <a href="/wiki/Foo_%E2%82%AC">foo €</a>
|
1841
1914
|
// we'll impose similar restrictions here for the link target; allowed tokens will be:
|
1842
|
-
// SPACE, PRINTABLE, DEFAULT, QUOT and AMP
|
1915
|
+
// SPACE, SPECIAL_URI_CHARS, PRINTABLE, DEFAULT, QUOT and AMP
|
1843
1916
|
// everything else will be rejected
|
1844
1917
|
case LINK_START:
|
1845
1918
|
i = NIL_P(parser->capture) ? parser->output : parser->capture;
|
1846
1919
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1920
|
+
{
|
1921
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1847
1922
|
rb_str_cat(i, link_start, sizeof(link_start) - 1);
|
1923
|
+
}
|
1848
1924
|
else if (IN(EXT_LINK_START))
|
1849
1925
|
// already in external link scope! (and in fact, must be capturing link_text right now)
|
1850
1926
|
rb_str_cat(i, link_start, sizeof(link_start) - 1);
|
@@ -1868,16 +1944,17 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1868
1944
|
// look ahead and try to gobble up link target
|
1869
1945
|
while (NEXT_TOKEN(), (type = token->type))
|
1870
1946
|
{
|
1871
|
-
if (type == SPACE
|
1872
|
-
type ==
|
1873
|
-
type ==
|
1874
|
-
type ==
|
1875
|
-
type ==
|
1876
|
-
type ==
|
1877
|
-
type ==
|
1878
|
-
type ==
|
1879
|
-
type ==
|
1880
|
-
type ==
|
1947
|
+
if (type == SPACE ||
|
1948
|
+
type == SPECIAL_URI_CHARS ||
|
1949
|
+
type == PRINTABLE ||
|
1950
|
+
type == DEFAULT ||
|
1951
|
+
type == QUOT ||
|
1952
|
+
type == QUOT_ENTITY ||
|
1953
|
+
type == AMP ||
|
1954
|
+
type == AMP_ENTITY ||
|
1955
|
+
type == IMG_START ||
|
1956
|
+
type == IMG_END ||
|
1957
|
+
type == LEFT_CURLY ||
|
1881
1958
|
type == RIGHT_CURLY)
|
1882
1959
|
{
|
1883
1960
|
// accumulate these tokens into link_target
|
@@ -1920,7 +1997,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1920
1997
|
case LINK_END:
|
1921
1998
|
i = NIL_P(parser->capture) ? parser->output : parser->capture;
|
1922
1999
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
2000
|
+
{
|
2001
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1923
2002
|
rb_str_cat(i, link_end, sizeof(link_end) - 1);
|
2003
|
+
}
|
1924
2004
|
else if (IN(EXT_LINK_START))
|
1925
2005
|
// already in external link scope! (and in fact, must be capturing link_text right now)
|
1926
2006
|
rb_str_cat(i, link_end, sizeof(link_end) - 1);
|
@@ -1958,7 +2038,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1958
2038
|
case EXT_LINK_START:
|
1959
2039
|
i = NIL_P(parser->capture) ? parser->output : parser->capture;
|
1960
2040
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
2041
|
+
{
|
2042
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1961
2043
|
rb_str_cat(i, ext_link_start, sizeof(ext_link_start) - 1);
|
2044
|
+
}
|
1962
2045
|
else if (IN(EXT_LINK_START))
|
1963
2046
|
// already in external link scope! (and in fact, must be capturing link_text right now)
|
1964
2047
|
rb_str_cat(i, ext_link_start, sizeof(ext_link_start) - 1);
|
@@ -2003,7 +2086,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2003
2086
|
case EXT_LINK_END:
|
2004
2087
|
i = NIL_P(parser->capture) ? parser->output : parser->capture;
|
2005
2088
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
2089
|
+
{
|
2090
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
2006
2091
|
rb_str_cat(i, ext_link_end, sizeof(ext_link_end) - 1);
|
2092
|
+
}
|
2007
2093
|
else if (IN(EXT_LINK_START))
|
2008
2094
|
{
|
2009
2095
|
if (NIL_P(parser->link_text))
|
@@ -2038,7 +2124,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2038
2124
|
case SPACE:
|
2039
2125
|
i = NIL_P(parser->capture) ? parser->output : parser->capture;
|
2040
2126
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
2127
|
+
{
|
2128
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
2041
2129
|
rb_str_cat(i, token->start, TOKEN_LEN(token));
|
2130
|
+
}
|
2042
2131
|
else
|
2043
2132
|
{
|
2044
2133
|
// peek ahead to see next token
|
@@ -2117,7 +2206,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2117
2206
|
|
2118
2207
|
case IMG_START:
|
2119
2208
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
2209
|
+
{
|
2210
|
+
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
2120
2211
|
rb_str_cat(parser->output, token->start, TOKEN_LEN(token));
|
2212
|
+
}
|
2121
2213
|
else if (!NIL_P(parser->capture))
|
2122
2214
|
rb_str_cat(parser->capture, token->start, TOKEN_LEN(token));
|
2123
2215
|
else
|
@@ -2126,39 +2218,38 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2126
2218
|
_Wikitext_pop_excess_elements(parser);
|
2127
2219
|
_Wikitext_start_para_if_necessary(parser);
|
2128
2220
|
|
2129
|
-
//
|
2130
|
-
|
2131
|
-
if (
|
2132
|
-
|
2133
|
-
|
2134
|
-
else
|
2221
|
+
// scan ahead consuming PRINTABLE and SPECIAL_URI_CHARS tokens
|
2222
|
+
// will cheat here and abuse the link_target capture buffer to accumulate text
|
2223
|
+
if (NIL_P(parser->link_target))
|
2224
|
+
parser->link_target = rb_str_new2("");
|
2225
|
+
while (NEXT_TOKEN(), (type = token->type))
|
2135
2226
|
{
|
2136
|
-
|
2137
|
-
|
2138
|
-
|
2139
|
-
|
2140
|
-
// peek ahead once more
|
2141
|
-
NEXT_TOKEN();
|
2142
|
-
if (token->type == IMG_END)
|
2227
|
+
if (type == PRINTABLE || type == SPECIAL_URI_CHARS)
|
2228
|
+
rb_str_cat(parser->link_target, token->start, TOKEN_LEN(token));
|
2229
|
+
else if (type == IMG_END)
|
2143
2230
|
{
|
2144
2231
|
// success
|
2145
|
-
_Wikitext_append_img(parser,
|
2232
|
+
_Wikitext_append_img(parser, RSTRING_PTR(parser->link_target), RSTRING_LEN(parser->link_target));
|
2146
2233
|
token = NULL;
|
2234
|
+
break;
|
2147
2235
|
}
|
2148
|
-
else
|
2236
|
+
else // unexpected token (syntax error)
|
2149
2237
|
{
|
2150
|
-
//
|
2238
|
+
// rollback
|
2151
2239
|
rb_str_cat(parser->output, literal_img_start, sizeof(literal_img_start) - 1);
|
2152
|
-
rb_str_cat(parser->output,
|
2240
|
+
rb_str_cat(parser->output, RSTRING_PTR(parser->link_target), RSTRING_LEN(parser->link_target));
|
2241
|
+
break;
|
2153
2242
|
}
|
2154
2243
|
}
|
2155
2244
|
|
2156
2245
|
// jump to top of the loop to process token we scanned during lookahead
|
2246
|
+
parser->link_target = Qnil;
|
2157
2247
|
continue;
|
2158
2248
|
}
|
2159
2249
|
break;
|
2160
2250
|
|
2161
2251
|
case CRLF:
|
2252
|
+
i = parser->pending_crlf;
|
2162
2253
|
parser->pending_crlf = Qfalse;
|
2163
2254
|
_Wikitext_rollback_failed_link(parser); // if any
|
2164
2255
|
_Wikitext_rollback_failed_external_link(parser); // if any
|
@@ -2170,27 +2261,33 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2170
2261
|
}
|
2171
2262
|
else if (IN(PRE))
|
2172
2263
|
{
|
2173
|
-
// beware when
|
2174
|
-
if (
|
2264
|
+
// beware when BLOCKQUOTE on line buffer (not line stack!) prior to CRLF, that must be end of PRE block
|
2265
|
+
if (ary_entry(parser->line_buffer, -2) == BLOCKQUOTE)
|
2175
2266
|
// don't emit in this case
|
2176
2267
|
_Wikitext_pop_from_stack_up_to(parser, parser->output, PRE, Qtrue);
|
2177
2268
|
else
|
2178
2269
|
{
|
2270
|
+
if (ary_entry(parser->line_buffer, -2) == PRE)
|
2271
|
+
{
|
2272
|
+
// only thing on line is the PRE: emit pending line ending (if we had one)
|
2273
|
+
if (i == Qtrue)
|
2274
|
+
rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
|
2275
|
+
}
|
2276
|
+
|
2277
|
+
// clear these _before_ calling NEXT_TOKEN (NEXT_TOKEN adds to the line_buffer)
|
2278
|
+
ary_clear(parser->line);
|
2279
|
+
ary_clear(parser->line_buffer);
|
2280
|
+
|
2179
2281
|
// peek ahead to see if this is definitely the end of the PRE block
|
2180
2282
|
NEXT_TOKEN();
|
2181
2283
|
type = token->type;
|
2182
2284
|
if (type != BLOCKQUOTE && type != PRE)
|
2183
|
-
{
|
2184
2285
|
// this is definitely the end of the block, so don't emit
|
2185
2286
|
_Wikitext_pop_from_stack_up_to(parser, parser->output, PRE, Qtrue);
|
2186
|
-
}
|
2187
2287
|
else
|
2188
2288
|
// potentially will emit
|
2189
2289
|
parser->pending_crlf = Qtrue;
|
2190
2290
|
|
2191
|
-
// delete the entire contents of the line scope stack and buffer
|
2192
|
-
ary_clear(parser->line);
|
2193
|
-
ary_clear(parser->line_buffer);
|
2194
2291
|
continue; // jump back to top of loop to handle token grabbed via lookahead
|
2195
2292
|
}
|
2196
2293
|
}
|
@@ -2233,6 +2330,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2233
2330
|
ary_clear(parser->line_buffer);
|
2234
2331
|
break;
|
2235
2332
|
|
2333
|
+
case SPECIAL_URI_CHARS:
|
2236
2334
|
case PRINTABLE:
|
2237
2335
|
case IMG_END:
|
2238
2336
|
case LEFT_CURLY:
|
@@ -2251,6 +2349,12 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2251
2349
|
break;
|
2252
2350
|
|
2253
2351
|
case END_OF_FILE:
|
2352
|
+
// special case for input like " foo\n " (see pre_spec.rb)
|
2353
|
+
if (IN(PRE) &&
|
2354
|
+
ary_entry(parser->line_buffer, -2) == PRE &&
|
2355
|
+
parser->pending_crlf == Qtrue)
|
2356
|
+
rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
|
2357
|
+
|
2254
2358
|
// close any open scopes on hitting EOF
|
2255
2359
|
_Wikitext_rollback_failed_external_link(parser); // if any
|
2256
2360
|
_Wikitext_rollback_failed_link(parser); // if any
|