wikitext 1.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/parser.c CHANGED
@@ -23,6 +23,7 @@
23
23
  // poor man's object orientation in C:
24
24
  // instead of parsing around multiple parameters between functions in the parser
25
25
  // we pack everything into a struct and pass around only a pointer to that
26
+ // TODO: consider changing some of the VALUE members (eg link_target) to the more efficient str_t type
26
27
  typedef struct
27
28
  {
28
29
  VALUE output; // for accumulating output to be returned
@@ -420,6 +421,15 @@ void _Wikitext_start_para_if_necessary(parser_t *parser)
420
421
  parser->pending_crlf = Qfalse;
421
422
  }
422
423
 
424
+ void _Wikitext_emit_pending_crlf_if_necessary(parser_t *parser)
425
+ {
426
+ if (parser->pending_crlf == Qtrue)
427
+ {
428
+ rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
429
+ parser->pending_crlf = Qfalse;
430
+ }
431
+ }
432
+
423
433
  // Helper function that pops any excess elements off scope (pushing is already handled in the respective rules).
424
434
  // For example, given input like:
425
435
  //
@@ -1035,7 +1045,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1035
1045
 
1036
1046
  case PRE_START:
1037
1047
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1048
+ {
1049
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1038
1050
  rb_str_cat(parser->output, escaped_pre_start, sizeof(escaped_pre_start) - 1);
1051
+ }
1039
1052
  else if (IN(BLOCKQUOTE_START))
1040
1053
  {
1041
1054
  _Wikitext_rollback_failed_link(parser); // if any
@@ -1046,9 +1059,16 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1046
1059
  ary_push(parser->scope, PRE_START);
1047
1060
  ary_push(parser->line, PRE_START);
1048
1061
  }
1049
- else if (parser->scope->count == 0 || (IN(P) && !IN(BLOCKQUOTE)))
1062
+ else if (IN(BLOCKQUOTE))
1063
+ {
1064
+ // PRE_START is illegal
1065
+ i = NIL_P(parser->capture) ? parser->output : parser->capture;
1066
+ _Wikitext_pop_excess_elements(parser);
1067
+ _Wikitext_start_para_if_necessary(parser);
1068
+ rb_str_cat(i, escaped_pre_start, sizeof(escaped_pre_start) - 1);
1069
+ }
1070
+ else
1050
1071
  {
1051
- // would be nice to eliminate the repetition here but it's probably the clearest way
1052
1072
  _Wikitext_rollback_failed_link(parser); // if any
1053
1073
  _Wikitext_rollback_failed_external_link(parser); // if any
1054
1074
  _Wikitext_pop_from_stack_up_to(parser, Qnil, P, Qtrue);
@@ -1057,19 +1077,14 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1057
1077
  ary_push(parser->scope, PRE_START);
1058
1078
  ary_push(parser->line, PRE_START);
1059
1079
  }
1060
- else
1061
- {
1062
- // everywhere else, PRE_START is illegal (in LI, BLOCKQUOTE, H1_START etc)
1063
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
1064
- _Wikitext_pop_excess_elements(parser);
1065
- _Wikitext_start_para_if_necessary(parser);
1066
- rb_str_cat(i, escaped_pre_start, sizeof(escaped_pre_start) - 1);
1067
- }
1068
1080
  break;
1069
1081
 
1070
1082
  case PRE_END:
1071
1083
  if (IN(NO_WIKI_START) || IN(PRE))
1084
+ {
1085
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1072
1086
  rb_str_cat(parser->output, escaped_pre_end, sizeof(escaped_pre_end) - 1);
1087
+ }
1073
1088
  else
1074
1089
  {
1075
1090
  if (IN(PRE_START))
@@ -1139,7 +1154,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1139
1154
 
1140
1155
  case BLOCKQUOTE_START:
1141
1156
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1157
+ {
1158
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1142
1159
  rb_str_cat(parser->output, escaped_blockquote_start, sizeof(escaped_blockquote_start) - 1);
1160
+ }
1143
1161
  else if (IN(BLOCKQUOTE_START))
1144
1162
  {
1145
1163
  // nesting is fine here
@@ -1176,7 +1194,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1176
1194
 
1177
1195
  case BLOCKQUOTE_END:
1178
1196
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1197
+ {
1198
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1179
1199
  rb_str_cat(parser->output, escaped_blockquote_end, sizeof(escaped_blockquote_end) - 1);
1200
+ }
1180
1201
  else
1181
1202
  {
1182
1203
  if (IN(BLOCKQUOTE_START))
@@ -1193,7 +1214,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1193
1214
 
1194
1215
  case NO_WIKI_START:
1195
1216
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1217
+ {
1218
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1196
1219
  rb_str_cat(parser->output, escaped_no_wiki_start, sizeof(escaped_no_wiki_start) - 1);
1220
+ }
1197
1221
  else
1198
1222
  {
1199
1223
  _Wikitext_pop_excess_elements(parser);
@@ -1218,6 +1242,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1218
1242
  case STRONG_EM:
1219
1243
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1220
1244
  {
1245
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1221
1246
  rb_str_cat(parser->output, literal_strong_em, sizeof(literal_strong_em) - 1);
1222
1247
  break;
1223
1248
  }
@@ -1282,7 +1307,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1282
1307
 
1283
1308
  case STRONG:
1284
1309
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1310
+ {
1311
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1285
1312
  rb_str_cat(parser->output, literal_strong, sizeof(literal_strong) - 1);
1313
+ }
1286
1314
  else
1287
1315
  {
1288
1316
  i = NIL_P(parser->capture) ? parser->output : parser->capture;
@@ -1306,7 +1334,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1306
1334
 
1307
1335
  case STRONG_START:
1308
1336
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1337
+ {
1338
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1309
1339
  rb_str_cat(parser->output, escaped_strong_start, sizeof(escaped_strong_start) - 1);
1340
+ }
1310
1341
  else
1311
1342
  {
1312
1343
  i = NIL_P(parser->capture) ? parser->output : parser->capture;
@@ -1325,7 +1356,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1325
1356
 
1326
1357
  case STRONG_END:
1327
1358
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1359
+ {
1360
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1328
1361
  rb_str_cat(parser->output, escaped_strong_end, sizeof(escaped_strong_end) - 1);
1362
+ }
1329
1363
  else
1330
1364
  {
1331
1365
  i = NIL_P(parser->capture) ? parser->output : parser->capture;
@@ -1343,7 +1377,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1343
1377
 
1344
1378
  case EM:
1345
1379
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1380
+ {
1381
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1346
1382
  rb_str_cat(parser->output, literal_em, sizeof(literal_em) - 1);
1383
+ }
1347
1384
  else
1348
1385
  {
1349
1386
  i = NIL_P(parser->capture) ? parser->output : parser->capture;
@@ -1367,7 +1404,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1367
1404
 
1368
1405
  case EM_START:
1369
1406
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1407
+ {
1408
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1370
1409
  rb_str_cat(parser->output, escaped_em_start, sizeof(escaped_em_start) - 1);
1410
+ }
1371
1411
  else
1372
1412
  {
1373
1413
  i = NIL_P(parser->capture) ? parser->output : parser->capture;
@@ -1386,7 +1426,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1386
1426
 
1387
1427
  case EM_END:
1388
1428
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1429
+ {
1430
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1389
1431
  rb_str_cat(parser->output, escaped_em_end, sizeof(escaped_em_end) - 1);
1432
+ }
1390
1433
  else
1391
1434
  {
1392
1435
  i = NIL_P(parser->capture) ? parser->output : parser->capture;
@@ -1404,7 +1447,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1404
1447
 
1405
1448
  case TT:
1406
1449
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1450
+ {
1451
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1407
1452
  rb_str_cat(parser->output, backtick, sizeof(backtick) - 1);
1453
+ }
1408
1454
  else
1409
1455
  {
1410
1456
  i = NIL_P(parser->capture) ? parser->output : parser->capture;
@@ -1428,7 +1474,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1428
1474
 
1429
1475
  case TT_START:
1430
1476
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1477
+ {
1478
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1431
1479
  rb_str_cat(parser->output, escaped_tt_start, sizeof(escaped_tt_start) - 1);
1480
+ }
1432
1481
  else
1433
1482
  {
1434
1483
  i = NIL_P(parser->capture) ? parser->output : parser->capture;
@@ -1447,7 +1496,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1447
1496
 
1448
1497
  case TT_END:
1449
1498
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1499
+ {
1500
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1450
1501
  rb_str_cat(parser->output, escaped_tt_end, sizeof(escaped_tt_end) - 1);
1502
+ }
1451
1503
  else
1452
1504
  {
1453
1505
  i = NIL_P(parser->capture) ? parser->output : parser->capture;
@@ -1650,7 +1702,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1650
1702
 
1651
1703
  case H6_END:
1652
1704
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1705
+ {
1706
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1653
1707
  rb_str_cat(parser->output, literal_h6, sizeof(literal_h6) - 1);
1708
+ }
1654
1709
  else
1655
1710
  {
1656
1711
  _Wikitext_rollback_failed_external_link(parser); // if any
@@ -1665,7 +1720,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1665
1720
 
1666
1721
  case H5_END:
1667
1722
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1723
+ {
1724
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1668
1725
  rb_str_cat(parser->output, literal_h5, sizeof(literal_h5) - 1);
1726
+ }
1669
1727
  else
1670
1728
  {
1671
1729
  _Wikitext_rollback_failed_external_link(parser); // if any
@@ -1680,7 +1738,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1680
1738
 
1681
1739
  case H4_END:
1682
1740
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1741
+ {
1742
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1683
1743
  rb_str_cat(parser->output, literal_h4, sizeof(literal_h4) - 1);
1744
+ }
1684
1745
  else
1685
1746
  {
1686
1747
  _Wikitext_rollback_failed_external_link(parser); // if any
@@ -1695,7 +1756,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1695
1756
 
1696
1757
  case H3_END:
1697
1758
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1759
+ {
1760
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1698
1761
  rb_str_cat(parser->output, literal_h3, sizeof(literal_h3) - 1);
1762
+ }
1699
1763
  else
1700
1764
  {
1701
1765
  _Wikitext_rollback_failed_external_link(parser); // if any
@@ -1710,7 +1774,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1710
1774
 
1711
1775
  case H2_END:
1712
1776
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1777
+ {
1778
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1713
1779
  rb_str_cat(parser->output, literal_h2, sizeof(literal_h2) - 1);
1780
+ }
1714
1781
  else
1715
1782
  {
1716
1783
  _Wikitext_rollback_failed_external_link(parser); // if any
@@ -1725,7 +1792,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1725
1792
 
1726
1793
  case H1_END:
1727
1794
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1795
+ {
1796
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1728
1797
  rb_str_cat(parser->output, literal_h1, sizeof(literal_h1) - 1);
1798
+ }
1729
1799
  else
1730
1800
  {
1731
1801
  _Wikitext_rollback_failed_external_link(parser); // if any
@@ -1740,7 +1810,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1740
1810
 
1741
1811
  case MAIL:
1742
1812
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1813
+ {
1814
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1743
1815
  rb_str_cat(parser->output, token->start, TOKEN_LEN(token));
1816
+ }
1744
1817
  else
1745
1818
  {
1746
1819
  // in plain scope, will turn into autolink (with appropriate, user-configurable CSS)
@@ -1839,12 +1912,15 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1839
1912
  // example [[foo €]]
1840
1913
  // renders <a href="/wiki/Foo_%E2%82%AC">foo €</a>
1841
1914
  // we'll impose similar restrictions here for the link target; allowed tokens will be:
1842
- // SPACE, PRINTABLE, DEFAULT, QUOT and AMP
1915
+ // SPACE, SPECIAL_URI_CHARS, PRINTABLE, DEFAULT, QUOT and AMP
1843
1916
  // everything else will be rejected
1844
1917
  case LINK_START:
1845
1918
  i = NIL_P(parser->capture) ? parser->output : parser->capture;
1846
1919
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1920
+ {
1921
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1847
1922
  rb_str_cat(i, link_start, sizeof(link_start) - 1);
1923
+ }
1848
1924
  else if (IN(EXT_LINK_START))
1849
1925
  // already in external link scope! (and in fact, must be capturing link_text right now)
1850
1926
  rb_str_cat(i, link_start, sizeof(link_start) - 1);
@@ -1868,16 +1944,17 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1868
1944
  // look ahead and try to gobble up link target
1869
1945
  while (NEXT_TOKEN(), (type = token->type))
1870
1946
  {
1871
- if (type == SPACE ||
1872
- type == PRINTABLE ||
1873
- type == DEFAULT ||
1874
- type == QUOT ||
1875
- type == QUOT_ENTITY ||
1876
- type == AMP ||
1877
- type == AMP_ENTITY ||
1878
- type == IMG_START ||
1879
- type == IMG_END ||
1880
- type == LEFT_CURLY ||
1947
+ if (type == SPACE ||
1948
+ type == SPECIAL_URI_CHARS ||
1949
+ type == PRINTABLE ||
1950
+ type == DEFAULT ||
1951
+ type == QUOT ||
1952
+ type == QUOT_ENTITY ||
1953
+ type == AMP ||
1954
+ type == AMP_ENTITY ||
1955
+ type == IMG_START ||
1956
+ type == IMG_END ||
1957
+ type == LEFT_CURLY ||
1881
1958
  type == RIGHT_CURLY)
1882
1959
  {
1883
1960
  // accumulate these tokens into link_target
@@ -1920,7 +1997,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1920
1997
  case LINK_END:
1921
1998
  i = NIL_P(parser->capture) ? parser->output : parser->capture;
1922
1999
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
2000
+ {
2001
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1923
2002
  rb_str_cat(i, link_end, sizeof(link_end) - 1);
2003
+ }
1924
2004
  else if (IN(EXT_LINK_START))
1925
2005
  // already in external link scope! (and in fact, must be capturing link_text right now)
1926
2006
  rb_str_cat(i, link_end, sizeof(link_end) - 1);
@@ -1958,7 +2038,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1958
2038
  case EXT_LINK_START:
1959
2039
  i = NIL_P(parser->capture) ? parser->output : parser->capture;
1960
2040
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
2041
+ {
2042
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1961
2043
  rb_str_cat(i, ext_link_start, sizeof(ext_link_start) - 1);
2044
+ }
1962
2045
  else if (IN(EXT_LINK_START))
1963
2046
  // already in external link scope! (and in fact, must be capturing link_text right now)
1964
2047
  rb_str_cat(i, ext_link_start, sizeof(ext_link_start) - 1);
@@ -2003,7 +2086,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2003
2086
  case EXT_LINK_END:
2004
2087
  i = NIL_P(parser->capture) ? parser->output : parser->capture;
2005
2088
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
2089
+ {
2090
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
2006
2091
  rb_str_cat(i, ext_link_end, sizeof(ext_link_end) - 1);
2092
+ }
2007
2093
  else if (IN(EXT_LINK_START))
2008
2094
  {
2009
2095
  if (NIL_P(parser->link_text))
@@ -2038,7 +2124,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2038
2124
  case SPACE:
2039
2125
  i = NIL_P(parser->capture) ? parser->output : parser->capture;
2040
2126
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
2127
+ {
2128
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
2041
2129
  rb_str_cat(i, token->start, TOKEN_LEN(token));
2130
+ }
2042
2131
  else
2043
2132
  {
2044
2133
  // peek ahead to see next token
@@ -2117,7 +2206,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2117
2206
 
2118
2207
  case IMG_START:
2119
2208
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
2209
+ {
2210
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
2120
2211
  rb_str_cat(parser->output, token->start, TOKEN_LEN(token));
2212
+ }
2121
2213
  else if (!NIL_P(parser->capture))
2122
2214
  rb_str_cat(parser->capture, token->start, TOKEN_LEN(token));
2123
2215
  else
@@ -2126,39 +2218,38 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2126
2218
  _Wikitext_pop_excess_elements(parser);
2127
2219
  _Wikitext_start_para_if_necessary(parser);
2128
2220
 
2129
- // peek ahead to see next token
2130
- NEXT_TOKEN();
2131
- if (token->type != PRINTABLE)
2132
- // failure
2133
- rb_str_cat(parser->output, literal_img_start, sizeof(literal_img_start) - 1);
2134
- else
2221
+ // scan ahead consuming PRINTABLE and SPECIAL_URI_CHARS tokens
2222
+ // will cheat here and abuse the link_target capture buffer to accumulate text
2223
+ if (NIL_P(parser->link_target))
2224
+ parser->link_target = rb_str_new2("");
2225
+ while (NEXT_TOKEN(), (type = token->type))
2135
2226
  {
2136
- // remember the PRINTABLE
2137
- char *token_ptr = token->start;
2138
- int token_len = TOKEN_LEN(token);
2139
-
2140
- // peek ahead once more
2141
- NEXT_TOKEN();
2142
- if (token->type == IMG_END)
2227
+ if (type == PRINTABLE || type == SPECIAL_URI_CHARS)
2228
+ rb_str_cat(parser->link_target, token->start, TOKEN_LEN(token));
2229
+ else if (type == IMG_END)
2143
2230
  {
2144
2231
  // success
2145
- _Wikitext_append_img(parser, token_ptr, token_len);
2232
+ _Wikitext_append_img(parser, RSTRING_PTR(parser->link_target), RSTRING_LEN(parser->link_target));
2146
2233
  token = NULL;
2234
+ break;
2147
2235
  }
2148
- else
2236
+ else // unexpected token (syntax error)
2149
2237
  {
2150
- // failure
2238
+ // rollback
2151
2239
  rb_str_cat(parser->output, literal_img_start, sizeof(literal_img_start) - 1);
2152
- rb_str_cat(parser->output, token_ptr, token_len);
2240
+ rb_str_cat(parser->output, RSTRING_PTR(parser->link_target), RSTRING_LEN(parser->link_target));
2241
+ break;
2153
2242
  }
2154
2243
  }
2155
2244
 
2156
2245
  // jump to top of the loop to process token we scanned during lookahead
2246
+ parser->link_target = Qnil;
2157
2247
  continue;
2158
2248
  }
2159
2249
  break;
2160
2250
 
2161
2251
  case CRLF:
2252
+ i = parser->pending_crlf;
2162
2253
  parser->pending_crlf = Qfalse;
2163
2254
  _Wikitext_rollback_failed_link(parser); // if any
2164
2255
  _Wikitext_rollback_failed_external_link(parser); // if any
@@ -2170,27 +2261,33 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2170
2261
  }
2171
2262
  else if (IN(PRE))
2172
2263
  {
2173
- // beware when nothing or BLOCKQUOTE on line buffer (not line stack!) prior to CRLF, that must be end of PRE block
2174
- if (NO_ITEM(ary_entry(parser->line_buffer, -2)) || ary_entry(parser->line_buffer, -2) == BLOCKQUOTE)
2264
+ // beware when BLOCKQUOTE on line buffer (not line stack!) prior to CRLF, that must be end of PRE block
2265
+ if (ary_entry(parser->line_buffer, -2) == BLOCKQUOTE)
2175
2266
  // don't emit in this case
2176
2267
  _Wikitext_pop_from_stack_up_to(parser, parser->output, PRE, Qtrue);
2177
2268
  else
2178
2269
  {
2270
+ if (ary_entry(parser->line_buffer, -2) == PRE)
2271
+ {
2272
+ // only thing on line is the PRE: emit pending line ending (if we had one)
2273
+ if (i == Qtrue)
2274
+ rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
2275
+ }
2276
+
2277
+ // clear these _before_ calling NEXT_TOKEN (NEXT_TOKEN adds to the line_buffer)
2278
+ ary_clear(parser->line);
2279
+ ary_clear(parser->line_buffer);
2280
+
2179
2281
  // peek ahead to see if this is definitely the end of the PRE block
2180
2282
  NEXT_TOKEN();
2181
2283
  type = token->type;
2182
2284
  if (type != BLOCKQUOTE && type != PRE)
2183
- {
2184
2285
  // this is definitely the end of the block, so don't emit
2185
2286
  _Wikitext_pop_from_stack_up_to(parser, parser->output, PRE, Qtrue);
2186
- }
2187
2287
  else
2188
2288
  // potentially will emit
2189
2289
  parser->pending_crlf = Qtrue;
2190
2290
 
2191
- // delete the entire contents of the line scope stack and buffer
2192
- ary_clear(parser->line);
2193
- ary_clear(parser->line_buffer);
2194
2291
  continue; // jump back to top of loop to handle token grabbed via lookahead
2195
2292
  }
2196
2293
  }
@@ -2233,6 +2330,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2233
2330
  ary_clear(parser->line_buffer);
2234
2331
  break;
2235
2332
 
2333
+ case SPECIAL_URI_CHARS:
2236
2334
  case PRINTABLE:
2237
2335
  case IMG_END:
2238
2336
  case LEFT_CURLY:
@@ -2251,6 +2349,12 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2251
2349
  break;
2252
2350
 
2253
2351
  case END_OF_FILE:
2352
+ // special case for input like " foo\n " (see pre_spec.rb)
2353
+ if (IN(PRE) &&
2354
+ ary_entry(parser->line_buffer, -2) == PRE &&
2355
+ parser->pending_crlf == Qtrue)
2356
+ rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
2357
+
2254
2358
  // close any open scopes on hitting EOF
2255
2359
  _Wikitext_rollback_failed_external_link(parser); // if any
2256
2360
  _Wikitext_rollback_failed_link(parser); // if any