wikitext 1.0 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/ext/parser.c CHANGED
@@ -23,6 +23,7 @@
23
23
  // poor man's object orientation in C:
24
24
  // instead of parsing around multiple parameters between functions in the parser
25
25
  // we pack everything into a struct and pass around only a pointer to that
26
+ // TODO: consider changing some of the VALUE members (eg link_target) to the more efficient str_t type
26
27
  typedef struct
27
28
  {
28
29
  VALUE output; // for accumulating output to be returned
@@ -420,6 +421,15 @@ void _Wikitext_start_para_if_necessary(parser_t *parser)
420
421
  parser->pending_crlf = Qfalse;
421
422
  }
422
423
 
424
+ void _Wikitext_emit_pending_crlf_if_necessary(parser_t *parser)
425
+ {
426
+ if (parser->pending_crlf == Qtrue)
427
+ {
428
+ rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
429
+ parser->pending_crlf = Qfalse;
430
+ }
431
+ }
432
+
423
433
  // Helper function that pops any excess elements off scope (pushing is already handled in the respective rules).
424
434
  // For example, given input like:
425
435
  //
@@ -1035,7 +1045,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1035
1045
 
1036
1046
  case PRE_START:
1037
1047
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1048
+ {
1049
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1038
1050
  rb_str_cat(parser->output, escaped_pre_start, sizeof(escaped_pre_start) - 1);
1051
+ }
1039
1052
  else if (IN(BLOCKQUOTE_START))
1040
1053
  {
1041
1054
  _Wikitext_rollback_failed_link(parser); // if any
@@ -1046,9 +1059,16 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1046
1059
  ary_push(parser->scope, PRE_START);
1047
1060
  ary_push(parser->line, PRE_START);
1048
1061
  }
1049
- else if (parser->scope->count == 0 || (IN(P) && !IN(BLOCKQUOTE)))
1062
+ else if (IN(BLOCKQUOTE))
1063
+ {
1064
+ // PRE_START is illegal
1065
+ i = NIL_P(parser->capture) ? parser->output : parser->capture;
1066
+ _Wikitext_pop_excess_elements(parser);
1067
+ _Wikitext_start_para_if_necessary(parser);
1068
+ rb_str_cat(i, escaped_pre_start, sizeof(escaped_pre_start) - 1);
1069
+ }
1070
+ else
1050
1071
  {
1051
- // would be nice to eliminate the repetition here but it's probably the clearest way
1052
1072
  _Wikitext_rollback_failed_link(parser); // if any
1053
1073
  _Wikitext_rollback_failed_external_link(parser); // if any
1054
1074
  _Wikitext_pop_from_stack_up_to(parser, Qnil, P, Qtrue);
@@ -1057,19 +1077,14 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1057
1077
  ary_push(parser->scope, PRE_START);
1058
1078
  ary_push(parser->line, PRE_START);
1059
1079
  }
1060
- else
1061
- {
1062
- // everywhere else, PRE_START is illegal (in LI, BLOCKQUOTE, H1_START etc)
1063
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
1064
- _Wikitext_pop_excess_elements(parser);
1065
- _Wikitext_start_para_if_necessary(parser);
1066
- rb_str_cat(i, escaped_pre_start, sizeof(escaped_pre_start) - 1);
1067
- }
1068
1080
  break;
1069
1081
 
1070
1082
  case PRE_END:
1071
1083
  if (IN(NO_WIKI_START) || IN(PRE))
1084
+ {
1085
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1072
1086
  rb_str_cat(parser->output, escaped_pre_end, sizeof(escaped_pre_end) - 1);
1087
+ }
1073
1088
  else
1074
1089
  {
1075
1090
  if (IN(PRE_START))
@@ -1139,7 +1154,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1139
1154
 
1140
1155
  case BLOCKQUOTE_START:
1141
1156
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1157
+ {
1158
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1142
1159
  rb_str_cat(parser->output, escaped_blockquote_start, sizeof(escaped_blockquote_start) - 1);
1160
+ }
1143
1161
  else if (IN(BLOCKQUOTE_START))
1144
1162
  {
1145
1163
  // nesting is fine here
@@ -1176,7 +1194,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1176
1194
 
1177
1195
  case BLOCKQUOTE_END:
1178
1196
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1197
+ {
1198
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1179
1199
  rb_str_cat(parser->output, escaped_blockquote_end, sizeof(escaped_blockquote_end) - 1);
1200
+ }
1180
1201
  else
1181
1202
  {
1182
1203
  if (IN(BLOCKQUOTE_START))
@@ -1193,7 +1214,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1193
1214
 
1194
1215
  case NO_WIKI_START:
1195
1216
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1217
+ {
1218
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1196
1219
  rb_str_cat(parser->output, escaped_no_wiki_start, sizeof(escaped_no_wiki_start) - 1);
1220
+ }
1197
1221
  else
1198
1222
  {
1199
1223
  _Wikitext_pop_excess_elements(parser);
@@ -1218,6 +1242,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1218
1242
  case STRONG_EM:
1219
1243
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1220
1244
  {
1245
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1221
1246
  rb_str_cat(parser->output, literal_strong_em, sizeof(literal_strong_em) - 1);
1222
1247
  break;
1223
1248
  }
@@ -1282,7 +1307,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1282
1307
 
1283
1308
  case STRONG:
1284
1309
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1310
+ {
1311
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1285
1312
  rb_str_cat(parser->output, literal_strong, sizeof(literal_strong) - 1);
1313
+ }
1286
1314
  else
1287
1315
  {
1288
1316
  i = NIL_P(parser->capture) ? parser->output : parser->capture;
@@ -1306,7 +1334,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1306
1334
 
1307
1335
  case STRONG_START:
1308
1336
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1337
+ {
1338
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1309
1339
  rb_str_cat(parser->output, escaped_strong_start, sizeof(escaped_strong_start) - 1);
1340
+ }
1310
1341
  else
1311
1342
  {
1312
1343
  i = NIL_P(parser->capture) ? parser->output : parser->capture;
@@ -1325,7 +1356,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1325
1356
 
1326
1357
  case STRONG_END:
1327
1358
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1359
+ {
1360
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1328
1361
  rb_str_cat(parser->output, escaped_strong_end, sizeof(escaped_strong_end) - 1);
1362
+ }
1329
1363
  else
1330
1364
  {
1331
1365
  i = NIL_P(parser->capture) ? parser->output : parser->capture;
@@ -1343,7 +1377,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1343
1377
 
1344
1378
  case EM:
1345
1379
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1380
+ {
1381
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1346
1382
  rb_str_cat(parser->output, literal_em, sizeof(literal_em) - 1);
1383
+ }
1347
1384
  else
1348
1385
  {
1349
1386
  i = NIL_P(parser->capture) ? parser->output : parser->capture;
@@ -1367,7 +1404,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1367
1404
 
1368
1405
  case EM_START:
1369
1406
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1407
+ {
1408
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1370
1409
  rb_str_cat(parser->output, escaped_em_start, sizeof(escaped_em_start) - 1);
1410
+ }
1371
1411
  else
1372
1412
  {
1373
1413
  i = NIL_P(parser->capture) ? parser->output : parser->capture;
@@ -1386,7 +1426,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1386
1426
 
1387
1427
  case EM_END:
1388
1428
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1429
+ {
1430
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1389
1431
  rb_str_cat(parser->output, escaped_em_end, sizeof(escaped_em_end) - 1);
1432
+ }
1390
1433
  else
1391
1434
  {
1392
1435
  i = NIL_P(parser->capture) ? parser->output : parser->capture;
@@ -1404,7 +1447,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1404
1447
 
1405
1448
  case TT:
1406
1449
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1450
+ {
1451
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1407
1452
  rb_str_cat(parser->output, backtick, sizeof(backtick) - 1);
1453
+ }
1408
1454
  else
1409
1455
  {
1410
1456
  i = NIL_P(parser->capture) ? parser->output : parser->capture;
@@ -1428,7 +1474,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1428
1474
 
1429
1475
  case TT_START:
1430
1476
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1477
+ {
1478
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1431
1479
  rb_str_cat(parser->output, escaped_tt_start, sizeof(escaped_tt_start) - 1);
1480
+ }
1432
1481
  else
1433
1482
  {
1434
1483
  i = NIL_P(parser->capture) ? parser->output : parser->capture;
@@ -1447,7 +1496,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1447
1496
 
1448
1497
  case TT_END:
1449
1498
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1499
+ {
1500
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1450
1501
  rb_str_cat(parser->output, escaped_tt_end, sizeof(escaped_tt_end) - 1);
1502
+ }
1451
1503
  else
1452
1504
  {
1453
1505
  i = NIL_P(parser->capture) ? parser->output : parser->capture;
@@ -1650,7 +1702,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1650
1702
 
1651
1703
  case H6_END:
1652
1704
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1705
+ {
1706
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1653
1707
  rb_str_cat(parser->output, literal_h6, sizeof(literal_h6) - 1);
1708
+ }
1654
1709
  else
1655
1710
  {
1656
1711
  _Wikitext_rollback_failed_external_link(parser); // if any
@@ -1665,7 +1720,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1665
1720
 
1666
1721
  case H5_END:
1667
1722
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1723
+ {
1724
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1668
1725
  rb_str_cat(parser->output, literal_h5, sizeof(literal_h5) - 1);
1726
+ }
1669
1727
  else
1670
1728
  {
1671
1729
  _Wikitext_rollback_failed_external_link(parser); // if any
@@ -1680,7 +1738,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1680
1738
 
1681
1739
  case H4_END:
1682
1740
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1741
+ {
1742
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1683
1743
  rb_str_cat(parser->output, literal_h4, sizeof(literal_h4) - 1);
1744
+ }
1684
1745
  else
1685
1746
  {
1686
1747
  _Wikitext_rollback_failed_external_link(parser); // if any
@@ -1695,7 +1756,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1695
1756
 
1696
1757
  case H3_END:
1697
1758
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1759
+ {
1760
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1698
1761
  rb_str_cat(parser->output, literal_h3, sizeof(literal_h3) - 1);
1762
+ }
1699
1763
  else
1700
1764
  {
1701
1765
  _Wikitext_rollback_failed_external_link(parser); // if any
@@ -1710,7 +1774,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1710
1774
 
1711
1775
  case H2_END:
1712
1776
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1777
+ {
1778
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1713
1779
  rb_str_cat(parser->output, literal_h2, sizeof(literal_h2) - 1);
1780
+ }
1714
1781
  else
1715
1782
  {
1716
1783
  _Wikitext_rollback_failed_external_link(parser); // if any
@@ -1725,7 +1792,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1725
1792
 
1726
1793
  case H1_END:
1727
1794
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1795
+ {
1796
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1728
1797
  rb_str_cat(parser->output, literal_h1, sizeof(literal_h1) - 1);
1798
+ }
1729
1799
  else
1730
1800
  {
1731
1801
  _Wikitext_rollback_failed_external_link(parser); // if any
@@ -1740,7 +1810,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1740
1810
 
1741
1811
  case MAIL:
1742
1812
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1813
+ {
1814
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1743
1815
  rb_str_cat(parser->output, token->start, TOKEN_LEN(token));
1816
+ }
1744
1817
  else
1745
1818
  {
1746
1819
  // in plain scope, will turn into autolink (with appropriate, user-configurable CSS)
@@ -1839,12 +1912,15 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1839
1912
  // example [[foo €]]
1840
1913
  // renders <a href="/wiki/Foo_%E2%82%AC">foo €</a>
1841
1914
  // we'll impose similar restrictions here for the link target; allowed tokens will be:
1842
- // SPACE, PRINTABLE, DEFAULT, QUOT and AMP
1915
+ // SPACE, SPECIAL_URI_CHARS, PRINTABLE, DEFAULT, QUOT and AMP
1843
1916
  // everything else will be rejected
1844
1917
  case LINK_START:
1845
1918
  i = NIL_P(parser->capture) ? parser->output : parser->capture;
1846
1919
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1920
+ {
1921
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1847
1922
  rb_str_cat(i, link_start, sizeof(link_start) - 1);
1923
+ }
1848
1924
  else if (IN(EXT_LINK_START))
1849
1925
  // already in external link scope! (and in fact, must be capturing link_text right now)
1850
1926
  rb_str_cat(i, link_start, sizeof(link_start) - 1);
@@ -1868,16 +1944,17 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1868
1944
  // look ahead and try to gobble up link target
1869
1945
  while (NEXT_TOKEN(), (type = token->type))
1870
1946
  {
1871
- if (type == SPACE ||
1872
- type == PRINTABLE ||
1873
- type == DEFAULT ||
1874
- type == QUOT ||
1875
- type == QUOT_ENTITY ||
1876
- type == AMP ||
1877
- type == AMP_ENTITY ||
1878
- type == IMG_START ||
1879
- type == IMG_END ||
1880
- type == LEFT_CURLY ||
1947
+ if (type == SPACE ||
1948
+ type == SPECIAL_URI_CHARS ||
1949
+ type == PRINTABLE ||
1950
+ type == DEFAULT ||
1951
+ type == QUOT ||
1952
+ type == QUOT_ENTITY ||
1953
+ type == AMP ||
1954
+ type == AMP_ENTITY ||
1955
+ type == IMG_START ||
1956
+ type == IMG_END ||
1957
+ type == LEFT_CURLY ||
1881
1958
  type == RIGHT_CURLY)
1882
1959
  {
1883
1960
  // accumulate these tokens into link_target
@@ -1920,7 +1997,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1920
1997
  case LINK_END:
1921
1998
  i = NIL_P(parser->capture) ? parser->output : parser->capture;
1922
1999
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
2000
+ {
2001
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1923
2002
  rb_str_cat(i, link_end, sizeof(link_end) - 1);
2003
+ }
1924
2004
  else if (IN(EXT_LINK_START))
1925
2005
  // already in external link scope! (and in fact, must be capturing link_text right now)
1926
2006
  rb_str_cat(i, link_end, sizeof(link_end) - 1);
@@ -1958,7 +2038,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1958
2038
  case EXT_LINK_START:
1959
2039
  i = NIL_P(parser->capture) ? parser->output : parser->capture;
1960
2040
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
2041
+ {
2042
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
1961
2043
  rb_str_cat(i, ext_link_start, sizeof(ext_link_start) - 1);
2044
+ }
1962
2045
  else if (IN(EXT_LINK_START))
1963
2046
  // already in external link scope! (and in fact, must be capturing link_text right now)
1964
2047
  rb_str_cat(i, ext_link_start, sizeof(ext_link_start) - 1);
@@ -2003,7 +2086,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2003
2086
  case EXT_LINK_END:
2004
2087
  i = NIL_P(parser->capture) ? parser->output : parser->capture;
2005
2088
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
2089
+ {
2090
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
2006
2091
  rb_str_cat(i, ext_link_end, sizeof(ext_link_end) - 1);
2092
+ }
2007
2093
  else if (IN(EXT_LINK_START))
2008
2094
  {
2009
2095
  if (NIL_P(parser->link_text))
@@ -2038,7 +2124,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2038
2124
  case SPACE:
2039
2125
  i = NIL_P(parser->capture) ? parser->output : parser->capture;
2040
2126
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
2127
+ {
2128
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
2041
2129
  rb_str_cat(i, token->start, TOKEN_LEN(token));
2130
+ }
2042
2131
  else
2043
2132
  {
2044
2133
  // peek ahead to see next token
@@ -2117,7 +2206,10 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2117
2206
 
2118
2207
  case IMG_START:
2119
2208
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
2209
+ {
2210
+ _Wikitext_emit_pending_crlf_if_necessary(parser);
2120
2211
  rb_str_cat(parser->output, token->start, TOKEN_LEN(token));
2212
+ }
2121
2213
  else if (!NIL_P(parser->capture))
2122
2214
  rb_str_cat(parser->capture, token->start, TOKEN_LEN(token));
2123
2215
  else
@@ -2126,39 +2218,38 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2126
2218
  _Wikitext_pop_excess_elements(parser);
2127
2219
  _Wikitext_start_para_if_necessary(parser);
2128
2220
 
2129
- // peek ahead to see next token
2130
- NEXT_TOKEN();
2131
- if (token->type != PRINTABLE)
2132
- // failure
2133
- rb_str_cat(parser->output, literal_img_start, sizeof(literal_img_start) - 1);
2134
- else
2221
+ // scan ahead consuming PRINTABLE and SPECIAL_URI_CHARS tokens
2222
+ // will cheat here and abuse the link_target capture buffer to accumulate text
2223
+ if (NIL_P(parser->link_target))
2224
+ parser->link_target = rb_str_new2("");
2225
+ while (NEXT_TOKEN(), (type = token->type))
2135
2226
  {
2136
- // remember the PRINTABLE
2137
- char *token_ptr = token->start;
2138
- int token_len = TOKEN_LEN(token);
2139
-
2140
- // peek ahead once more
2141
- NEXT_TOKEN();
2142
- if (token->type == IMG_END)
2227
+ if (type == PRINTABLE || type == SPECIAL_URI_CHARS)
2228
+ rb_str_cat(parser->link_target, token->start, TOKEN_LEN(token));
2229
+ else if (type == IMG_END)
2143
2230
  {
2144
2231
  // success
2145
- _Wikitext_append_img(parser, token_ptr, token_len);
2232
+ _Wikitext_append_img(parser, RSTRING_PTR(parser->link_target), RSTRING_LEN(parser->link_target));
2146
2233
  token = NULL;
2234
+ break;
2147
2235
  }
2148
- else
2236
+ else // unexpected token (syntax error)
2149
2237
  {
2150
- // failure
2238
+ // rollback
2151
2239
  rb_str_cat(parser->output, literal_img_start, sizeof(literal_img_start) - 1);
2152
- rb_str_cat(parser->output, token_ptr, token_len);
2240
+ rb_str_cat(parser->output, RSTRING_PTR(parser->link_target), RSTRING_LEN(parser->link_target));
2241
+ break;
2153
2242
  }
2154
2243
  }
2155
2244
 
2156
2245
  // jump to top of the loop to process token we scanned during lookahead
2246
+ parser->link_target = Qnil;
2157
2247
  continue;
2158
2248
  }
2159
2249
  break;
2160
2250
 
2161
2251
  case CRLF:
2252
+ i = parser->pending_crlf;
2162
2253
  parser->pending_crlf = Qfalse;
2163
2254
  _Wikitext_rollback_failed_link(parser); // if any
2164
2255
  _Wikitext_rollback_failed_external_link(parser); // if any
@@ -2170,27 +2261,33 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2170
2261
  }
2171
2262
  else if (IN(PRE))
2172
2263
  {
2173
- // beware when nothing or BLOCKQUOTE on line buffer (not line stack!) prior to CRLF, that must be end of PRE block
2174
- if (NO_ITEM(ary_entry(parser->line_buffer, -2)) || ary_entry(parser->line_buffer, -2) == BLOCKQUOTE)
2264
+ // beware when BLOCKQUOTE on line buffer (not line stack!) prior to CRLF, that must be end of PRE block
2265
+ if (ary_entry(parser->line_buffer, -2) == BLOCKQUOTE)
2175
2266
  // don't emit in this case
2176
2267
  _Wikitext_pop_from_stack_up_to(parser, parser->output, PRE, Qtrue);
2177
2268
  else
2178
2269
  {
2270
+ if (ary_entry(parser->line_buffer, -2) == PRE)
2271
+ {
2272
+ // only thing on line is the PRE: emit pending line ending (if we had one)
2273
+ if (i == Qtrue)
2274
+ rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
2275
+ }
2276
+
2277
+ // clear these _before_ calling NEXT_TOKEN (NEXT_TOKEN adds to the line_buffer)
2278
+ ary_clear(parser->line);
2279
+ ary_clear(parser->line_buffer);
2280
+
2179
2281
  // peek ahead to see if this is definitely the end of the PRE block
2180
2282
  NEXT_TOKEN();
2181
2283
  type = token->type;
2182
2284
  if (type != BLOCKQUOTE && type != PRE)
2183
- {
2184
2285
  // this is definitely the end of the block, so don't emit
2185
2286
  _Wikitext_pop_from_stack_up_to(parser, parser->output, PRE, Qtrue);
2186
- }
2187
2287
  else
2188
2288
  // potentially will emit
2189
2289
  parser->pending_crlf = Qtrue;
2190
2290
 
2191
- // delete the entire contents of the line scope stack and buffer
2192
- ary_clear(parser->line);
2193
- ary_clear(parser->line_buffer);
2194
2291
  continue; // jump back to top of loop to handle token grabbed via lookahead
2195
2292
  }
2196
2293
  }
@@ -2233,6 +2330,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2233
2330
  ary_clear(parser->line_buffer);
2234
2331
  break;
2235
2332
 
2333
+ case SPECIAL_URI_CHARS:
2236
2334
  case PRINTABLE:
2237
2335
  case IMG_END:
2238
2336
  case LEFT_CURLY:
@@ -2251,6 +2349,12 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2251
2349
  break;
2252
2350
 
2253
2351
  case END_OF_FILE:
2352
+ // special case for input like " foo\n " (see pre_spec.rb)
2353
+ if (IN(PRE) &&
2354
+ ary_entry(parser->line_buffer, -2) == PRE &&
2355
+ parser->pending_crlf == Qtrue)
2356
+ rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
2357
+
2254
2358
  // close any open scopes on hitting EOF
2255
2359
  _Wikitext_rollback_failed_external_link(parser); // if any
2256
2360
  _Wikitext_rollback_failed_link(parser); // if any