wikitext 1.6 → 1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/ary.h CHANGED
@@ -35,10 +35,6 @@ typedef struct
35
35
 
36
36
  #define NO_ITEM(item) (item == INT_MAX)
37
37
 
38
- // Mark the ary struct designated by ptr as a participant in Ruby's mark-and-sweep garbage collection scheme.
39
- // A variable named name is placed on the C stack to prevent the structure from being prematurely collected.
40
- #define GC_WRAP_ARY(ptr, name) volatile VALUE name __attribute__((unused)) = Data_Wrap_Struct(rb_cObject, 0, ary_free, ptr)
41
-
42
38
  ary_t *ary_new(void);
43
39
  int ary_entry(ary_t *ary, int idx);
44
40
  void ary_clear(ary_t *ary);
@@ -47,8 +43,6 @@ void ary_push(ary_t *ary, int val);
47
43
  int ary_includes(ary_t *ary, int val);
48
44
 
49
45
  // returns a count indicating the number of times the value appears in the collection
50
- // refactored from _Wikitext_count()
51
46
  int ary_count(ary_t *ary, int item);
52
47
 
53
- // this method not inlined so its address can be passed to the Data_Wrap_Struct function.
54
48
  void ary_free(ary_t *ary);
data/ext/extconf.rb CHANGED
@@ -28,5 +28,14 @@ def missing item
28
28
  exit 1
29
29
  end
30
30
 
31
+ case RUBY_VERSION
32
+ when /\A1\.8/
33
+ $CFLAGS += ' -DRUBY_1_8_x'
34
+ when /\A1\.9/
35
+ $CFLAGS += ' -DRUBY_1_9_x'
36
+ else
37
+ raise "unsupported Ruby version: #{RUBY_VERSION}"
38
+ end
39
+
31
40
  have_header('ruby.h') or missing 'ruby.h'
32
41
  create_makefile('wikitext')
data/ext/parser.c CHANGED
@@ -21,6 +21,8 @@
21
21
  // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
22
22
  // POSSIBILITY OF SUCH DAMAGE.
23
23
 
24
+ #include <stdbool.h>
25
+
24
26
  #include "parser.h"
25
27
  #include "ary.h"
26
28
  #include "str.h"
@@ -32,29 +34,29 @@
32
34
  // poor man's object orientation in C:
33
35
  // instead of parsing around multiple parameters between functions in the parser
34
36
  // we pack everything into a struct and pass around only a pointer to that
35
- // TODO: consider changing some of the VALUE members (eg link_target) to the more efficient str_t type
36
37
  typedef struct
37
38
  {
38
- VALUE output; // for accumulating output to be returned
39
- VALUE capture; // for capturing substrings
40
- VALUE link_target; // short term "memory" for parsing links
41
- VALUE link_text; // short term "memory" for parsing links
42
- VALUE external_link_class; // CSS class applied to external links
43
- VALUE mailto_class; // CSS class applied to email (mailto) links
44
- VALUE img_prefix; // path prepended when emitting img tags
39
+ str_t *capture; // capturing to link_target, link_text, or NULL (direct to output, not capturing)
40
+ str_t *output; // for accumulating output to be returned
41
+ str_t *link_target; // short term "memory" for parsing links
42
+ str_t *link_text; // short term "memory" for parsing links
43
+ str_t *line_ending;
44
+ str_t *tabulation; // caching buffer for emitting indentation
45
45
  ary_t *scope; // stack for tracking scope
46
46
  ary_t *line; // stack for tracking scope as implied by current line
47
47
  ary_t *line_buffer; // stack for tracking raw tokens (not scope) on current line
48
- VALUE pending_crlf; // boolean (Qtrue or Qfalse)
49
- VALUE autolink; // boolean (Qtrue or Qfalse)
50
- VALUE space_to_underscore; // boolean (Qtrue or Qfalse)
51
- str_t *line_ending;
48
+ VALUE external_link_class; // CSS class applied to external links
49
+ VALUE mailto_class; // CSS class applied to email (mailto) links
50
+ VALUE img_prefix; // path prepended when emitting img tags
52
51
  int base_indent; // controlled by the :indent option to Wikitext::Parser#parse
53
52
  int current_indent; // fluctuates according to currently nested structures
54
- str_t *tabulation; // caching buffer for emitting indentation
55
53
  int base_heading_level;
54
+ bool pending_crlf;
55
+ bool autolink;
56
+ bool space_to_underscore;
56
57
  } parser_t;
57
58
 
59
+ const char null_str[] = { 0 };
58
60
  const char escaped_no_wiki_start[] = "&lt;nowiki&gt;";
59
61
  const char escaped_no_wiki_end[] = "&lt;/nowiki&gt;";
60
62
  const char literal_strong_em[] = "'''''";
@@ -66,12 +68,6 @@ const char escaped_strong_start[] = "&lt;strong&gt;";
66
68
  const char escaped_strong_end[] = "&lt;/strong&gt;";
67
69
  const char escaped_tt_start[] = "&lt;tt&gt;";
68
70
  const char escaped_tt_end[] = "&lt;/tt&gt;";
69
- const char literal_h6[] = "======";
70
- const char literal_h5[] = "=====";
71
- const char literal_h4[] = "====";
72
- const char literal_h3[] = "===";
73
- const char literal_h2[] = "==";
74
- const char literal_h1[] = "=";
75
71
  const char pre_start[] = "<pre>";
76
72
  const char pre_end[] = "</pre>";
77
73
  const char escaped_pre_start[] = "&lt;pre&gt;";
@@ -130,6 +126,49 @@ const char img_start[] = "<img src=\"";
130
126
  const char img_end[] = "\" />";
131
127
  const char img_alt[] = "\" alt=\"";
132
128
 
129
+ // Mark the parser struct designated by ptr as a participant in Ruby's
130
+ // mark-and-sweep garbage collection scheme. A variable named name is placed on
131
+ // the C stack to prevent the structure from being prematurely collected.
132
+ #define GC_WRAP_PARSER(ptr, name) volatile VALUE name __attribute__((unused)) = Data_Wrap_Struct(rb_cObject, 0, parser_free, ptr)
133
+
134
+ parser_t *parser_new(void)
135
+ {
136
+ parser_t *parser = ALLOC_N(parser_t, 1);
137
+ parser->capture = NULL; // not a real instance, pointer to other member's instance
138
+ parser->output = str_new();
139
+ parser->link_target = str_new();
140
+ parser->link_text = str_new();
141
+ parser->line_ending = NULL; // caller should set up
142
+ parser->tabulation = str_new();
143
+ parser->scope = ary_new();
144
+ parser->line = ary_new();
145
+ parser->line_buffer = ary_new();
146
+ parser->external_link_class = Qnil; // caller should set up
147
+ parser->mailto_class = Qnil; // caller should set up
148
+ parser->img_prefix = Qnil; // caller should set up
149
+ parser->base_indent = 0;
150
+ parser->current_indent = 0;
151
+ parser->base_heading_level = 0;
152
+ parser->pending_crlf = false;
153
+ parser->autolink = true;
154
+ parser->space_to_underscore = true;
155
+ return parser;
156
+ }
157
+
158
+ void parser_free(parser_t *parser)
159
+ {
160
+ // we don't free parser->capture; it's just a redundant pointer
161
+ if (parser->output) str_free(parser->output);
162
+ if (parser->link_target) str_free(parser->link_target);
163
+ if (parser->link_text) str_free(parser->link_text);
164
+ if (parser->line_ending) str_free(parser->line_ending);
165
+ if (parser->tabulation) str_free(parser->tabulation);
166
+ if (parser->scope) ary_free(parser->scope);
167
+ if (parser->line) ary_free(parser->line);
168
+ if (parser->line_buffer) ary_free(parser->line_buffer);
169
+ free(parser);
170
+ }
171
+
133
172
  // for testing and debugging only
134
173
  VALUE Wikitext_parser_tokenize(VALUE self, VALUE string)
135
174
  {
@@ -142,11 +181,11 @@ VALUE Wikitext_parser_tokenize(VALUE self, VALUE string)
142
181
  char *pe = p + len;
143
182
  token_t token;
144
183
  next_token(&token, NULL, p, pe);
145
- rb_ary_push(tokens, _Wikitext_token(&token));
184
+ rb_ary_push(tokens, wiki_token(&token));
146
185
  while (token.type != END_OF_FILE)
147
186
  {
148
187
  next_token(&token, &token, NULL, pe);
149
- rb_ary_push(tokens, _Wikitext_token(&token));
188
+ rb_ary_push(tokens, wiki_token(&token));
150
189
  }
151
190
  return tokens;
152
191
  }
@@ -217,59 +256,66 @@ VALUE Wikitext_parser_fulltext_tokenize(int argc, VALUE *argv, VALUE self)
217
256
  return tokens;
218
257
  }
219
258
 
220
- // we downcase "in place", overwriting the original contents of the buffer and returning the same string
221
- VALUE _Wikitext_downcase(VALUE string)
259
+ // we downcase "in place", overwriting the original contents of the buffer
260
+ void wiki_downcase_bang(char *ptr, long len)
222
261
  {
223
- char *ptr = RSTRING_PTR(string);
224
- long len = RSTRING_LEN(string);
225
262
  for (long i = 0; i < len; i++)
226
263
  {
227
264
  if (ptr[i] >= 'A' && ptr[i] <= 'Z')
228
265
  ptr[i] += 32;
229
266
  }
230
- return string;
231
267
  }
232
268
 
233
- VALUE _Wikitext_hyperlink(parser_t *parser, VALUE link_prefix, VALUE link_target, VALUE link_text, VALUE link_class)
269
+ // prepare hyperlink and append it to parser->output
270
+ // if check_autolink is true, checks parser->autolink to decide whether to emit a real hyperlink
271
+ // or merely the literal link target
272
+ // if link_text is Qnil, the link_target is re-used for the link text
273
+ void wiki_append_hyperlink(parser_t *parser, VALUE link_prefix, str_t *link_target, str_t *link_text, VALUE link_class, bool check_autolink)
234
274
  {
235
- VALUE string = rb_str_new(a_start, sizeof(a_start) - 1); // <a href="
236
- if (!NIL_P(link_prefix))
237
- rb_str_append(string, link_prefix);
238
- rb_str_append(string, link_target);
239
-
240
- // special handling for mailto URIs
241
- const char *mailto = "mailto:";
242
- if (NIL_P(link_prefix) &&
243
- RSTRING_LEN(link_target) >= (long)sizeof(mailto) &&
244
- strncmp(mailto, RSTRING_PTR(link_target), sizeof(mailto)) == 0)
245
- link_class = parser->mailto_class; // use mailto_class from parser
246
-
247
- if (link_class != Qnil)
275
+ if (check_autolink && !parser->autolink)
276
+ str_append_str(parser->output, link_target);
277
+ else
248
278
  {
249
- rb_str_cat(string, a_class, sizeof(a_class) - 1); // " class="
250
- rb_str_append(string, link_class);
279
+ str_append(parser->output, a_start, sizeof(a_start) - 1); // <a href="
280
+ if (!NIL_P(link_prefix))
281
+ str_append_string(parser->output, link_prefix);
282
+ str_append_str(parser->output, link_target);
283
+
284
+ // special handling for mailto URIs
285
+ const char *mailto = "mailto:";
286
+ if (NIL_P(link_prefix) &&
287
+ link_target->len >= (long)sizeof(mailto) &&
288
+ strncmp(mailto, link_target->ptr, sizeof(mailto)) == 0)
289
+ link_class = parser->mailto_class; // use mailto_class from parser
290
+ if (link_class != Qnil)
291
+ {
292
+ str_append(parser->output, a_class, sizeof(a_class) - 1); // " class="
293
+ str_append_string(parser->output, link_class);
294
+ }
295
+ str_append(parser->output, a_start_close, sizeof(a_start_close) - 1); // ">
296
+ if (!link_text || link_text->len == 0) // re-use link_target
297
+ str_append_str(parser->output, link_target);
298
+ else
299
+ str_append_str(parser->output, link_text);
300
+ str_append(parser->output, a_end, sizeof(a_end) - 1); // </a>
251
301
  }
252
- rb_str_cat(string, a_start_close, sizeof(a_start_close) - 1); // ">
253
- rb_str_append(string, link_text);
254
- rb_str_cat(string, a_end, sizeof(a_end) - 1);
255
- return string;
256
302
  }
257
303
 
258
- void _Wikitext_append_img(parser_t *parser, char *token_ptr, int token_len)
304
+ void wiki_append_img(parser_t *parser, char *token_ptr, int token_len)
259
305
  {
260
- rb_str_cat(parser->output, img_start, sizeof(img_start) - 1); // <img src="
306
+ str_append(parser->output, img_start, sizeof(img_start) - 1); // <img src="
261
307
  if (!NIL_P(parser->img_prefix) && *token_ptr != '/') // len always > 0
262
- rb_str_append(parser->output, parser->img_prefix);
263
- rb_str_cat(parser->output, token_ptr, token_len);
264
- rb_str_cat(parser->output, img_alt, sizeof(img_alt) - 1); // " alt="
265
- rb_str_cat(parser->output, token_ptr, token_len);
266
- rb_str_cat(parser->output, img_end, sizeof(img_end) - 1); // " />
308
+ str_append_string(parser->output, parser->img_prefix);
309
+ str_append(parser->output, token_ptr, token_len);
310
+ str_append(parser->output, img_alt, sizeof(img_alt) - 1); // " alt="
311
+ str_append(parser->output, token_ptr, token_len);
312
+ str_append(parser->output, img_end, sizeof(img_end) - 1); // " />
267
313
  }
268
314
 
269
315
  // will emit indentation only if we are about to emit any of:
270
316
  // <blockquote>, <p>, <ul>, <ol>, <li>, <h1> etc, <pre>
271
317
  // each time we enter one of those spans must ++ the indentation level
272
- void _Wikitext_indent(parser_t *parser)
318
+ void wiki_indent(parser_t *parser)
273
319
  {
274
320
  if (parser->base_indent == -1) // indentation disabled
275
321
  return;
@@ -285,32 +331,32 @@ void _Wikitext_indent(parser_t *parser)
285
331
  *old_end++ = ' ';
286
332
  if (space_count > parser->tabulation->len)
287
333
  parser->tabulation->len = space_count;
288
- rb_str_cat(parser->output, parser->tabulation->ptr, space_count);
334
+ str_append(parser->output, parser->tabulation->ptr, space_count);
289
335
  }
290
336
  parser->current_indent += 2;
291
337
  }
292
338
 
293
- void _Wikitext_dedent(parser_t *parser, VALUE emit)
339
+ void wiki_dedent(parser_t *parser, bool emit)
294
340
  {
295
341
  if (parser->base_indent == -1) // indentation disabled
296
342
  return;
297
343
  parser->current_indent -= 2;
298
- if (emit != Qtrue)
344
+ if (!emit)
299
345
  return;
300
346
  int space_count = parser->current_indent + parser->base_indent;
301
347
  if (space_count > 0)
302
- rb_str_cat(parser->output, parser->tabulation->ptr, space_count);
348
+ str_append(parser->output, parser->tabulation->ptr, space_count);
303
349
  }
304
350
 
305
351
  // Pops a single item off the parser's scope stack.
306
352
  // A corresponding closing tag is written to the target string.
307
353
  // The target string may be the main output buffer, or a substring capturing buffer if a link is being scanned.
308
- void _Wikitext_pop_from_stack(parser_t *parser, VALUE target)
354
+ void wiki_pop_from_stack(parser_t *parser, str_t *target)
309
355
  {
310
356
  int top = ary_entry(parser->scope, -1);
311
357
  if (NO_ITEM(top))
312
358
  return;
313
- if (NIL_P(target))
359
+ if (!target)
314
360
  target = parser->output;
315
361
 
316
362
  // for headings, take base_heading_level into account
@@ -326,16 +372,16 @@ void _Wikitext_pop_from_stack(parser_t *parser, VALUE target)
326
372
  {
327
373
  case PRE:
328
374
  case PRE_START:
329
- rb_str_cat(target, pre_end, sizeof(pre_end) - 1);
330
- rb_str_cat(target, parser->line_ending->ptr, parser->line_ending->len);
331
- _Wikitext_dedent(parser, Qfalse);
375
+ str_append(target, pre_end, sizeof(pre_end) - 1);
376
+ str_append_str(target, parser->line_ending);
377
+ wiki_dedent(parser, false);
332
378
  break;
333
379
 
334
380
  case BLOCKQUOTE:
335
381
  case BLOCKQUOTE_START:
336
- _Wikitext_dedent(parser, Qtrue);
337
- rb_str_cat(target, blockquote_end, sizeof(blockquote_end) - 1);
338
- rb_str_cat(target, parser->line_ending->ptr, parser->line_ending->len);
382
+ wiki_dedent(parser, true);
383
+ str_append(target, blockquote_end, sizeof(blockquote_end) - 1);
384
+ str_append_str(target, parser->line_ending);
339
385
  break;
340
386
 
341
387
  case NO_WIKI_START:
@@ -344,29 +390,29 @@ void _Wikitext_pop_from_stack(parser_t *parser, VALUE target)
344
390
 
345
391
  case STRONG:
346
392
  case STRONG_START:
347
- rb_str_cat(target, strong_end, sizeof(strong_end) - 1);
393
+ str_append(target, strong_end, sizeof(strong_end) - 1);
348
394
  break;
349
395
 
350
396
  case EM:
351
397
  case EM_START:
352
- rb_str_cat(target, em_end, sizeof(em_end) - 1);
398
+ str_append(target, em_end, sizeof(em_end) - 1);
353
399
  break;
354
400
 
355
401
  case TT:
356
402
  case TT_START:
357
- rb_str_cat(target, tt_end, sizeof(tt_end) - 1);
403
+ str_append(target, tt_end, sizeof(tt_end) - 1);
358
404
  break;
359
405
 
360
406
  case OL:
361
- _Wikitext_dedent(parser, Qtrue);
362
- rb_str_cat(target, ol_end, sizeof(ol_end) - 1);
363
- rb_str_cat(target, parser->line_ending->ptr, parser->line_ending->len);
407
+ wiki_dedent(parser, true);
408
+ str_append(target, ol_end, sizeof(ol_end) - 1);
409
+ str_append_str(target, parser->line_ending);
364
410
  break;
365
411
 
366
412
  case UL:
367
- _Wikitext_dedent(parser, Qtrue);
368
- rb_str_cat(target, ul_end, sizeof(ul_end) - 1);
369
- rb_str_cat(target, parser->line_ending->ptr, parser->line_ending->len);
413
+ wiki_dedent(parser, true);
414
+ str_append(target, ul_end, sizeof(ul_end) - 1);
415
+ str_append_str(target, parser->line_ending);
370
416
  break;
371
417
 
372
418
  case NESTED_LIST:
@@ -375,50 +421,50 @@ void _Wikitext_pop_from_stack(parser_t *parser, VALUE target)
375
421
  // and other times we want it to behave like BLOCKQUOTE (ie. when it has a nested list inside)
376
422
  // hence this hack: we do an emitting dedent on behalf of the LI that we know must be coming
377
423
  // and then when we pop the actual LI itself (below) we do the standard non-emitting indent
378
- _Wikitext_dedent(parser, Qtrue); // we really only want to emit the spaces
379
- parser->current_indent += 2; // we don't want to decrement the actual indent level, so put it back
424
+ wiki_dedent(parser, true); // we really only want to emit the spaces
425
+ parser->current_indent += 2; // we don't want to decrement the actual indent level, so put it back
380
426
  break;
381
427
 
382
428
  case LI:
383
- rb_str_cat(target, li_end, sizeof(li_end) - 1);
384
- rb_str_cat(target, parser->line_ending->ptr, parser->line_ending->len);
385
- _Wikitext_dedent(parser, Qfalse);
429
+ str_append(target, li_end, sizeof(li_end) - 1);
430
+ str_append_str(target, parser->line_ending);
431
+ wiki_dedent(parser, false);
386
432
  break;
387
433
 
388
434
  case H6_START:
389
- rb_str_cat(target, h6_end, sizeof(h6_end) - 1);
390
- rb_str_cat(target, parser->line_ending->ptr, parser->line_ending->len);
391
- _Wikitext_dedent(parser, Qfalse);
435
+ str_append(target, h6_end, sizeof(h6_end) - 1);
436
+ str_append_str(target, parser->line_ending);
437
+ wiki_dedent(parser, false);
392
438
  break;
393
439
 
394
440
  case H5_START:
395
- rb_str_cat(target, h5_end, sizeof(h5_end) - 1);
396
- rb_str_cat(target, parser->line_ending->ptr, parser->line_ending->len);
397
- _Wikitext_dedent(parser, Qfalse);
441
+ str_append(target, h5_end, sizeof(h5_end) - 1);
442
+ str_append_str(target, parser->line_ending);
443
+ wiki_dedent(parser, false);
398
444
  break;
399
445
 
400
446
  case H4_START:
401
- rb_str_cat(target, h4_end, sizeof(h4_end) - 1);
402
- rb_str_cat(target, parser->line_ending->ptr, parser->line_ending->len);
403
- _Wikitext_dedent(parser, Qfalse);
447
+ str_append(target, h4_end, sizeof(h4_end) - 1);
448
+ str_append_str(target, parser->line_ending);
449
+ wiki_dedent(parser, false);
404
450
  break;
405
451
 
406
452
  case H3_START:
407
- rb_str_cat(target, h3_end, sizeof(h3_end) - 1);
408
- rb_str_cat(target, parser->line_ending->ptr, parser->line_ending->len);
409
- _Wikitext_dedent(parser, Qfalse);
453
+ str_append(target, h3_end, sizeof(h3_end) - 1);
454
+ str_append_str(target, parser->line_ending);
455
+ wiki_dedent(parser, false);
410
456
  break;
411
457
 
412
458
  case H2_START:
413
- rb_str_cat(target, h2_end, sizeof(h2_end) - 1);
414
- rb_str_cat(target, parser->line_ending->ptr, parser->line_ending->len);
415
- _Wikitext_dedent(parser, Qfalse);
459
+ str_append(target, h2_end, sizeof(h2_end) - 1);
460
+ str_append_str(target, parser->line_ending);
461
+ wiki_dedent(parser, false);
416
462
  break;
417
463
 
418
464
  case H1_START:
419
- rb_str_cat(target, h1_end, sizeof(h1_end) - 1);
420
- rb_str_cat(target, parser->line_ending->ptr, parser->line_ending->len);
421
- _Wikitext_dedent(parser, Qfalse);
465
+ str_append(target, h1_end, sizeof(h1_end) - 1);
466
+ str_append_str(target, parser->line_ending);
467
+ wiki_dedent(parser, false);
422
468
  break;
423
469
 
424
470
  case LINK_START:
@@ -442,9 +488,9 @@ void _Wikitext_pop_from_stack(parser_t *parser, VALUE target)
442
488
  break;
443
489
 
444
490
  case P:
445
- rb_str_cat(target, p_end, sizeof(p_end) - 1);
446
- rb_str_cat(target, parser->line_ending->ptr, parser->line_ending->len);
447
- _Wikitext_dedent(parser, Qfalse);
491
+ str_append(target, p_end, sizeof(p_end) - 1);
492
+ str_append_str(target, parser->line_ending);
493
+ wiki_dedent(parser, false);
448
494
  break;
449
495
 
450
496
  case END_OF_FILE:
@@ -459,9 +505,9 @@ void _Wikitext_pop_from_stack(parser_t *parser, VALUE target)
459
505
  }
460
506
 
461
507
  // Pops items off the top of parser's scope stack, accumulating closing tags for them into the target string, until item is reached.
462
- // If including is Qtrue then the item itself is also popped.
508
+ // If including is true then the item itself is also popped.
463
509
  // The target string may be the main output buffer, or a substring capturing buffer when scanning links.
464
- void _Wikitext_pop_from_stack_up_to(parser_t *parser, VALUE target, int item, VALUE including)
510
+ void wiki_pop_from_stack_up_to(parser_t *parser, str_t *target, int item, bool including)
465
511
  {
466
512
  int continue_looping = 1;
467
513
  do
@@ -471,23 +517,23 @@ void _Wikitext_pop_from_stack_up_to(parser_t *parser, VALUE target, int item, VA
471
517
  return;
472
518
  if (top == item)
473
519
  {
474
- if (including != Qtrue)
520
+ if (!including)
475
521
  return;
476
522
  continue_looping = 0;
477
523
  }
478
- _Wikitext_pop_from_stack(parser, target);
524
+ wiki_pop_from_stack(parser, target);
479
525
  } while (continue_looping);
480
526
  }
481
527
 
482
- void _Wikitext_pop_all_from_stack(parser_t *parser, VALUE target)
528
+ void wiki_pop_all_from_stack(parser_t *parser)
483
529
  {
484
- while (!NO_ITEM(ary_entry(parser->scope, -1)))
485
- _Wikitext_pop_from_stack(parser, target);
530
+ for (int i = 0, max = parser->scope->count; i < max; i++)
531
+ wiki_pop_from_stack(parser, NULL);
486
532
  }
487
533
 
488
- void _Wikitext_start_para_if_necessary(parser_t *parser)
534
+ void wiki_start_para_if_necessary(parser_t *parser)
489
535
  {
490
- if (!NIL_P(parser->capture)) // we don't do anything if in capturing mode
536
+ if (parser->capture)
491
537
  return;
492
538
 
493
539
  // if no block open yet, or top of stack is BLOCKQUOTE/BLOCKQUOTE_START (with nothing in it yet)
@@ -495,29 +541,29 @@ void _Wikitext_start_para_if_necessary(parser_t *parser)
495
541
  ary_entry(parser->scope, -1) == BLOCKQUOTE ||
496
542
  ary_entry(parser->scope, -1) == BLOCKQUOTE_START)
497
543
  {
498
- _Wikitext_indent(parser);
499
- rb_str_cat(parser->output, p_start, sizeof(p_start) - 1);
544
+ wiki_indent(parser);
545
+ str_append(parser->output, p_start, sizeof(p_start) - 1);
500
546
  ary_push(parser->scope, P);
501
547
  ary_push(parser->line, P);
502
548
  }
503
- else if (parser->pending_crlf == Qtrue)
549
+ else if (parser->pending_crlf)
504
550
  {
505
551
  if (IN(P))
506
552
  // already in a paragraph block; convert pending CRLF into a space
507
- rb_str_cat(parser->output, space, sizeof(space) - 1);
553
+ str_append(parser->output, space, sizeof(space) - 1);
508
554
  else if (IN(PRE))
509
555
  // PRE blocks can have pending CRLF too (helps us avoid emitting the trailing newline)
510
- rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
556
+ str_append_str(parser->output, parser->line_ending);
511
557
  }
512
- parser->pending_crlf = Qfalse;
558
+ parser->pending_crlf = false;
513
559
  }
514
560
 
515
- void _Wikitext_emit_pending_crlf_if_necessary(parser_t *parser)
561
+ void wiki_emit_pending_crlf_if_necessary(parser_t *parser)
516
562
  {
517
- if (parser->pending_crlf == Qtrue)
563
+ if (parser->pending_crlf)
518
564
  {
519
- rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
520
- parser->pending_crlf = Qfalse;
565
+ str_append_str(parser->output, parser->line_ending);
566
+ parser->pending_crlf = false;
521
567
  }
522
568
  }
523
569
 
@@ -543,9 +589,9 @@ void _Wikitext_emit_pending_crlf_if_necessary(parser_t *parser)
543
589
  // on the line scope.
544
590
  // Luckily, BLOCKQUOTE_START tokens can only appear at the start of the scope array, so we can check for them first before
545
591
  // entering the for loop.
546
- void _Wikitext_pop_excess_elements(parser_t *parser)
592
+ void wiki_pop_excess_elements(parser_t *parser)
547
593
  {
548
- if (!NIL_P(parser->capture)) // we don't pop anything if in capturing mode
594
+ if (parser->capture)
549
595
  return;
550
596
  for (int i = parser->scope->count - ary_count(parser->scope, BLOCKQUOTE_START), j = parser->line->count; i > j; i--)
551
597
  {
@@ -560,65 +606,94 @@ void _Wikitext_pop_excess_elements(parser_t *parser)
560
606
  continue;
561
607
  }
562
608
  }
563
- _Wikitext_pop_from_stack(parser, parser->output);
609
+ wiki_pop_from_stack(parser, NULL);
564
610
  }
565
611
  }
566
612
 
567
- #define INVALID_ENCODING(msg) do { if (dest_ptr) free(dest_ptr); rb_raise(eWikitextParserError, "invalid encoding: " msg); } while(0)
568
-
569
- // convert a single UTF-8 codepoint to UTF-32
570
- // expects an input buffer, src, containing a UTF-8 encoded character (which may be multi-byte)
571
- // the end of the input buffer, end, is also passed in to allow the detection of invalidly truncated codepoints
572
- // the number of bytes in the UTF-8 character (between 1 and 4) is returned by reference in width_out
573
- // raises a RangeError if the supplied character is invalid UTF-8
574
- // (in which case it also frees the block of memory indicated by dest_ptr if it is non-NULL)
575
- uint32_t _Wikitext_utf8_to_utf32(char *src, char *end, long *width_out, void *dest_ptr)
613
+ // Convert a single UTF-8 codepoint to UTF-32
614
+ //
615
+ // Expects an input buffer, src, containing a UTF-8 encoded character (which
616
+ // may be multi-byte). The end of the input buffer, end, is also passed in to
617
+ // allow the detection of invalidly truncated codepoints. The number of bytes
618
+ // in the UTF-8 character (between 1 and 4) is returned by reference in
619
+ // width_out.
620
+ //
621
+ // Raises a RangeError if the supplied character is invalid UTF-8.
622
+ uint32_t wiki_utf8_to_utf32(char *src, char *end, long *width_out)
576
623
  {
577
624
  uint32_t dest;
578
- if ((unsigned char)src[0] <= 0x7f) // ASCII
625
+ if ((unsigned char)src[0] <= 0x7f)
579
626
  {
627
+ // ASCII
580
628
  dest = src[0];
581
629
  *width_out = 1;
582
630
  }
583
- else if ((src[0] & 0xe0) == 0xc0) // byte starts with 110..... : this should be a two-byte sequence
631
+ else if ((src[0] & 0xe0) == 0xc0)
584
632
  {
633
+ // byte starts with 110..... : this should be a two-byte sequence
585
634
  if (src + 1 >= end)
586
- INVALID_ENCODING("truncated byte sequence"); // no second byte
587
- else if (((unsigned char)src[0] == 0xc0) || ((unsigned char)src[0] == 0xc1))
588
- INVALID_ENCODING("overlong encoding"); // overlong encoding: lead byte of 110..... but code point <= 127
635
+ // no second byte
636
+ rb_raise(eWikitextParserError, "invalid encoding: truncated byte sequence");
637
+ else if (((unsigned char)src[0] == 0xc0) ||
638
+ ((unsigned char)src[0] == 0xc1))
639
+ // overlong encoding: lead byte of 110..... but code point <= 127
640
+ rb_raise(eWikitextParserError, "invalid encoding: overlong encoding");
589
641
  else if ((src[1] & 0xc0) != 0x80 )
590
- INVALID_ENCODING("malformed byte sequence"); // should have second byte starting with 10......
591
- dest = ((uint32_t)(src[0] & 0x1f)) << 6 | (src[1] & 0x3f);
642
+ // should have second byte starting with 10......
643
+ rb_raise(eWikitextParserError, "invalid encoding: malformed byte sequence");
644
+
645
+ dest =
646
+ ((uint32_t)(src[0] & 0x1f)) << 6 |
647
+ (src[1] & 0x3f);
592
648
  *width_out = 2;
593
649
  }
594
- else if ((src[0] & 0xf0) == 0xe0) // byte starts with 1110.... : this should be a three-byte sequence
650
+ else if ((src[0] & 0xf0) == 0xe0)
595
651
  {
652
+ // byte starts with 1110.... : this should be a three-byte sequence
596
653
  if (src + 2 >= end)
597
- INVALID_ENCODING("truncated byte sequence"); // missing second or third byte
598
- else if (((src[1] & 0xc0) != 0x80 ) || ((src[2] & 0xc0) != 0x80 ))
599
- INVALID_ENCODING("malformed byte sequence"); // should have second and third bytes starting with 10......
600
- dest = ((uint32_t)(src[0] & 0x0f)) << 12 | ((uint32_t)(src[1] & 0x3f)) << 6 | (src[2] & 0x3f);
654
+ // missing second or third byte
655
+ rb_raise(eWikitextParserError, "invalid encoding: truncated byte sequence");
656
+ else if (((src[1] & 0xc0) != 0x80 ) ||
657
+ ((src[2] & 0xc0) != 0x80 ))
658
+ // should have second and third bytes starting with 10......
659
+ rb_raise(eWikitextParserError, "invalid encoding: malformed byte sequence");
660
+
661
+ dest =
662
+ ((uint32_t)(src[0] & 0x0f)) << 12 |
663
+ ((uint32_t)(src[1] & 0x3f)) << 6 |
664
+ (src[2] & 0x3f);
601
665
  *width_out = 3;
602
666
  }
603
- else if ((src[0] & 0xf8) == 0xf0) // bytes starts with 11110... : this should be a four-byte sequence
667
+ else if ((src[0] & 0xf8) == 0xf0)
604
668
  {
669
+ // bytes starts with 11110... : this should be a four-byte sequence
605
670
  if (src + 3 >= end)
606
- INVALID_ENCODING("truncated byte sequence"); // missing second, third, or fourth byte
607
- else if ((unsigned char)src[0] >= 0xf5 && (unsigned char)src[0] <= 0xf7)
608
- INVALID_ENCODING("overlong encoding"); // disallowed by RFC 3629 (codepoints above 0x10ffff)
609
- else if (((src[1] & 0xc0) != 0x80 ) || ((src[2] & 0xc0) != 0x80 ) || ((src[3] & 0xc0) != 0x80 ))
610
- INVALID_ENCODING("malformed byte sequence"); // should have second and third bytes starting with 10......
611
- dest = ((uint32_t)(src[0] & 0x07)) << 18 | ((uint32_t)(src[1] & 0x3f)) << 12 | ((uint32_t)(src[1] & 0x3f)) << 6 | (src[2] & 0x3f);
671
+ // missing second, third, or fourth byte
672
+ rb_raise(eWikitextParserError, "invalid encoding: truncated byte sequence");
673
+ else if ((unsigned char)src[0] >= 0xf5 &&
674
+ (unsigned char)src[0] <= 0xf7)
675
+ // disallowed by RFC 3629 (codepoints above 0x10ffff)
676
+ rb_raise(eWikitextParserError, "invalid encoding: overlong encoding");
677
+ else if (((src[1] & 0xc0) != 0x80 ) ||
678
+ ((src[2] & 0xc0) != 0x80 ) ||
679
+ ((src[3] & 0xc0) != 0x80 ))
680
+ // should have second and third bytes starting with 10......
681
+ rb_raise(eWikitextParserError, "invalid encoding: malformed byte sequence");
682
+
683
+ dest =
684
+ ((uint32_t)(src[0] & 0x07)) << 18 |
685
+ ((uint32_t)(src[1] & 0x3f)) << 12 |
686
+ ((uint32_t)(src[1] & 0x3f)) << 6 |
687
+ (src[2] & 0x3f);
612
688
  *width_out = 4;
613
689
  }
614
- else // invalid input
615
- INVALID_ENCODING("unexpected byte");
690
+ else
691
+ rb_raise(eWikitextParserError, "invalid encoding: unexpected byte");
616
692
  return dest;
617
693
  }
618
694
 
619
- VALUE _Wikitext_utf32_char_to_entity(uint32_t character)
695
+ void wiki_append_entity_from_utf32_char(str_t *output, uint32_t character)
620
696
  {
621
- // TODO: consider special casing some entities (ie. quot, amp, lt, gt etc)?
622
697
  char hex_string[8] = { '&', '#', 'x', 0, 0, 0, 0, ';' };
623
698
  char scratch = (character & 0xf000) >> 12;
624
699
  hex_string[3] = (scratch <= 9 ? scratch + 48 : scratch + 87);
@@ -628,18 +703,17 @@ VALUE _Wikitext_utf32_char_to_entity(uint32_t character)
628
703
  hex_string[5] = (scratch <= 9 ? scratch + 48 : scratch + 87);
629
704
  scratch = character & 0x000f;
630
705
  hex_string[6] = (scratch <= 9 ? scratch + 48 : scratch + 87);
631
- return rb_str_new((const char *)hex_string, sizeof(hex_string));
706
+ str_append(output, hex_string, sizeof(hex_string));
632
707
  }
633
708
 
634
- VALUE _Wikitext_parser_trim_link_target(VALUE string)
709
+ // trim parser->link_text in place
710
+ void wiki_trim_link_text(parser_t *parser)
635
711
  {
636
- string = StringValue(string);
637
- char *src = RSTRING_PTR(string);
712
+ char *src = parser->link_text->ptr;
638
713
  char *start = src; // remember this so we can check if we're at the start
639
714
  char *left = src;
640
715
  char *non_space = src; // remember last non-space character output
641
- long len = RSTRING_LEN(string);
642
- char *end = src + len;
716
+ char *end = src + parser->link_text->len;
643
717
  while (src < end)
644
718
  {
645
719
  if (*src == ' ')
@@ -651,143 +725,104 @@ VALUE _Wikitext_parser_trim_link_target(VALUE string)
651
725
  non_space = src;
652
726
  src++;
653
727
  }
654
- if (left == start && non_space + 1 == end)
655
- return string;
656
- else
657
- return rb_str_new(left, (non_space + 1) - left);
728
+ if (left != start || non_space + 1 != end)
729
+ {
730
+ // TODO: could potentially avoid this memmove by extending the str_t struct with an "offset" or "free" member
731
+ parser->link_text->len = (non_space + 1) - left;
732
+ memmove(parser->link_text->ptr, left, parser->link_text->len);
733
+ }
658
734
  }
659
735
 
660
736
  // - non-printable (non-ASCII) characters converted to numeric entities
661
737
  // - QUOT and AMP characters converted to named entities
662
- // - if rollback is Qtrue, there is no special treatment of spaces
663
- // - if rollback is Qfalse, leading and trailing whitespace trimmed
664
- VALUE _Wikitext_parser_sanitize_link_target(parser_t *parser, VALUE rollback)
738
+ // - if trim is true, leading and trailing whitespace trimmed
739
+ // - if trim is false, there is no special treatment of spaces
740
+ void wiki_append_sanitized_link_target(parser_t *parser, str_t *output, bool trim)
665
741
  {
666
- VALUE string = StringValue(parser->link_target); // raises if string is nil or doesn't quack like a string
667
- char *src = RSTRING_PTR(string);
668
- char *start = src; // remember this so we can check if we're at the start
669
- long len = RSTRING_LEN(string);
670
- char *end = src + len;
671
-
672
- // start with a destination buffer twice the size of the source, will realloc if necessary
673
- // slop = (len / 8) * 8 (ie. one in every 8 characters can be converted into an entity, each entity requires 8 bytes)
674
- // this efficiently handles the most common case (where the size of the buffer doesn't change much)
675
- char *dest = ALLOC_N(char, len * 2);
676
- char *dest_ptr = dest; // hang on to this so we can pass it to free() later
677
- char *non_space = dest; // remember last non-space character output
742
+ char *src = parser->link_target->ptr;
743
+ char *start = src; // remember this so we can check if we're at the start
744
+ char *non_space = output->ptr + output->len; // remember last non-space character output
745
+ char *end = src + parser->link_target->len;
678
746
  while (src < end)
679
747
  {
680
- // need at most 8 characters (8 bytes) to display each character
681
- if (dest + 8 > dest_ptr + len) // outgrowing buffer, must reallocate
748
+ // need at most 8 bytes to display each input character (&#x0000;)
749
+ if (output->ptr + output->len + 8 > output->ptr + output->capacity) // outgrowing buffer, must grow
682
750
  {
683
- char *old_dest = dest;
684
- char *old_dest_ptr = dest_ptr;
685
- len = len + (end - src) * 8; // allocate enough for worst case
686
- dest = realloc(dest_ptr, len); // will never have to realloc more than once
687
- if (dest == NULL)
688
- {
689
- // would have used reallocf, but this has to run on Linux too, not just Darwin
690
- free(dest_ptr);
691
- rb_raise(rb_eNoMemError, "failed to re-allocate temporary storage (memory allocation error)");
692
- }
693
- dest_ptr = dest;
694
- dest = dest_ptr + (old_dest - old_dest_ptr);
695
- non_space = dest_ptr + (non_space - old_dest_ptr);
751
+ char *old_ptr = output->ptr;
752
+ str_grow(output, output->len + (end - src) * 8); // allocate enough for worst case
753
+ if (old_ptr != output->ptr) // may have moved
754
+ non_space += output->ptr - old_ptr;
696
755
  }
697
756
 
698
- if (*src == '"') // QUOT
757
+ if (*src == '"')
699
758
  {
700
759
  char quot_entity_literal[] = { '&', 'q', 'u', 'o', 't', ';' }; // no trailing NUL
701
- memcpy(dest, quot_entity_literal, sizeof(quot_entity_literal));
702
- dest += sizeof(quot_entity_literal);
760
+ str_append(output, quot_entity_literal, sizeof(quot_entity_literal));
703
761
  }
704
- else if (*src == '&') // AMP
762
+ else if (*src == '&')
705
763
  {
706
764
  char amp_entity_literal[] = { '&', 'a', 'm', 'p', ';' }; // no trailing NUL
707
- memcpy(dest, amp_entity_literal, sizeof(amp_entity_literal));
708
- dest += sizeof(amp_entity_literal);
709
- }
710
- else if (*src == '<') // LESS_THAN
711
- {
712
- free(dest_ptr);
713
- rb_raise(rb_eRangeError, "invalid link text (\"<\" may not appear in link text)");
714
- }
715
- else if (*src == '>') // GREATER_THAN
716
- {
717
- free(dest_ptr);
718
- rb_raise(rb_eRangeError, "invalid link text (\">\" may not appear in link text)");
765
+ str_append(output, amp_entity_literal, sizeof(amp_entity_literal));
719
766
  }
720
- else if (*src == ' ' && src == start && rollback == Qfalse)
721
- start++; // we eat leading space
722
- else if (*src >= 0x20 && *src <= 0x7e) // printable ASCII
767
+ else if (*src == '<' || *src == '>')
768
+ rb_raise(rb_eRangeError, "invalid link text (\"%c\" may not appear in link text)", *src);
769
+ else if (*src == ' ' && src == start && trim)
770
+ start++; // we eat leading space
771
+ else if (*src >= 0x20 && *src <= 0x7e) // printable ASCII
723
772
  {
724
- *dest = *src;
725
- dest++;
773
+ *(output->ptr + output->len) = *src;
774
+ output->len++;
726
775
  }
727
776
  else // all others: must convert to entities
728
777
  {
729
778
  long width;
730
- VALUE entity = _Wikitext_utf32_char_to_entity(_Wikitext_utf8_to_utf32(src, end, &width, dest_ptr));
731
- char *entity_src = RSTRING_PTR(entity);
732
- long entity_len = RSTRING_LEN(entity); // should always be 8 characters (8 bytes)
733
- memcpy(dest, entity_src, entity_len);
734
- dest += entity_len;
779
+ wiki_append_entity_from_utf32_char(output, wiki_utf8_to_utf32(src, end, &width));
735
780
  src += width;
736
- non_space = dest;
781
+ non_space = output->ptr + output->len;
737
782
  continue;
738
783
  }
739
784
  if (*src != ' ')
740
- non_space = dest;
785
+ non_space = output->ptr + output->len;
741
786
  src++;
742
787
  }
743
788
 
744
789
  // trim trailing space if necessary
745
- if (rollback == Qfalse && non_space > dest_ptr && dest != non_space)
746
- len = non_space - dest_ptr;
747
- else
748
- len = dest - dest_ptr;
749
- VALUE out = rb_str_new(dest_ptr, len);
750
- free(dest_ptr);
751
- return out;
790
+ if (trim && output->ptr + output->len != non_space)
791
+ output->len -= (output->ptr + output->len) - non_space;
752
792
  }
753
793
 
754
794
  VALUE Wikitext_parser_sanitize_link_target(VALUE self, VALUE string)
755
795
  {
756
796
  parser_t parser;
757
- parser.link_target = string;
758
- return _Wikitext_parser_sanitize_link_target(&parser, Qfalse);
797
+ parser.link_target = str_new_from_string(string);
798
+ GC_WRAP_STR(parser.link_target, link_target_gc);
799
+ str_t *output = str_new();
800
+ GC_WRAP_STR(output, output_gc);
801
+ wiki_append_sanitized_link_target(&parser, output, true);
802
+ return string_from_str(output);
759
803
  }
760
804
 
761
- // encodes the input string according to RFCs 2396 and 2718
762
- // leading and trailing whitespace trimmed
763
- // note that the first character of the target link is not case-sensitive
764
- // (this is a recommended application-level constraint; it is not imposed at this level)
765
- // this is to allow links like:
766
- // ...the [[foo]] is...
767
- // to be equivalent to:
768
- // thing. [[Foo]] was...
769
- static void _Wikitext_parser_encode_link_target(parser_t *parser)
805
+ // Encodes the parser link_target member (in-place) according to RFCs 2396 and 2718
806
+ //
807
+ // Leading and trailing whitespace trimmed. Spaces are converted to
808
+ // underscores if the parser space_to_underscore member is true.
809
+ static void wiki_encode_link_target(parser_t *parser)
770
810
  {
771
- VALUE in = StringValue(parser->link_target);
772
- char *input = RSTRING_PTR(in);
773
- char *start = input; // remember this so we can check if we're at the start
774
- long len = RSTRING_LEN(in);
811
+ char *src = parser->link_target->ptr;
812
+ char *start = src; // remember this so we can check if we're at the start
813
+ long len = parser->link_target->len;
775
814
  if (!(len > 0))
776
815
  return;
777
- char *end = input + len;
778
- static char hex[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
779
-
780
- // to avoid most reallocations start with a destination buffer twice the size of the source
781
- // this handles the most common case (where most chars are in the ASCII range and don't require more storage, but there are
782
- // often quite a few spaces, which are encoded as "%20" and occupy 3 bytes)
783
- // the worst case is where _every_ byte must be written out using 3 bytes
816
+ char *end = src + len;
784
817
  long dest_len = len * 2;
785
818
  char *dest = ALLOC_N(char, dest_len);
786
819
  char *dest_ptr = dest; // hang on to this so we can pass it to free() later
787
820
  char *non_space = dest; // remember last non-space character output
788
- for (; input < end; input++)
821
+ static char hex[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
822
+ for (; src < end; src++)
789
823
  {
790
- if ((dest + 3) > (dest_ptr + dest_len)) // worst case: a single character may grow to 3 characters once encoded
824
+ // worst case: a single character may grow to 3 characters once encoded
825
+ if ((dest + 3) > (dest_ptr + dest_len))
791
826
  {
792
827
  // outgrowing buffer, must reallocate
793
828
  char *old_dest = dest;
@@ -806,27 +841,27 @@ static void _Wikitext_parser_encode_link_target(parser_t *parser)
806
841
  }
807
842
 
808
843
  // pass through unreserved characters
809
- if (((*input >= 'a') && (*input <= 'z')) ||
810
- ((*input >= 'A') && (*input <= 'Z')) ||
811
- ((*input >= '0') && (*input <= '9')) ||
812
- (*input == '-') ||
813
- (*input == '_') ||
814
- (*input == '.') ||
815
- (*input == '~'))
844
+ if ((*src >= 'a' && *src <= 'z') ||
845
+ (*src >= 'A' && *src <= 'Z') ||
846
+ (*src >= '0' && *src <= '9') ||
847
+ *src == '-' ||
848
+ *src == '_' ||
849
+ *src == '.' ||
850
+ *src == '~')
816
851
  {
817
- *dest++ = *input;
852
+ *dest++ = *src;
818
853
  non_space = dest;
819
854
  }
820
- else if (*input == ' ' && input == start)
855
+ else if (*src == ' ' && src == start)
821
856
  start++; // we eat leading space
822
- else if (*input == ' ' && parser->space_to_underscore == Qtrue)
857
+ else if (*src == ' ' && parser->space_to_underscore)
823
858
  *dest++ = '_';
824
859
  else // everything else gets URL-encoded
825
860
  {
826
861
  *dest++ = '%';
827
- *dest++ = hex[(unsigned char)(*input) / 16]; // left
828
- *dest++ = hex[(unsigned char)(*input) % 16]; // right
829
- if (*input != ' ')
862
+ *dest++ = hex[(unsigned char)(*src) / 16]; // left
863
+ *dest++ = hex[(unsigned char)(*src) % 16]; // right
864
+ if (*src != ' ')
830
865
  non_space = dest;
831
866
  }
832
867
  }
@@ -836,90 +871,89 @@ static void _Wikitext_parser_encode_link_target(parser_t *parser)
836
871
  dest_len = non_space - dest_ptr;
837
872
  else
838
873
  dest_len = dest - dest_ptr;
839
- parser->link_target = rb_str_new(dest_ptr, dest_len);
874
+ str_clear(parser->link_target);
875
+ str_append(parser->link_target, dest_ptr, dest_len);
840
876
  free(dest_ptr);
841
877
  }
842
878
 
843
879
  VALUE Wikitext_parser_encode_link_target(VALUE self, VALUE in)
844
880
  {
845
881
  parser_t parser;
846
- parser.link_target = in;
847
- parser.space_to_underscore = Qfalse;
848
- _Wikitext_parser_encode_link_target(&parser);
849
- return parser.link_target;
850
- }
851
-
852
- // this method exposed for testing only
853
- VALUE Wikitext_parser_encode_special_link_target(VALUE self, VALUE in)
854
- {
855
- parser_t parser;
856
- parser.link_target = in;
857
- parser.space_to_underscore = Qfalse;
858
- _Wikitext_parser_encode_link_target(&parser);
859
- return parser.link_target;
882
+ parser.space_to_underscore = false;
883
+ parser.link_target = str_new_from_string(in);
884
+ GC_WRAP_STR(parser.link_target, link_target_gc);
885
+ wiki_encode_link_target(&parser);
886
+ return string_from_str(parser.link_target);
860
887
  }
861
888
 
862
889
  // returns 1 (true) if supplied string is blank (nil, empty, or all whitespace)
863
890
  // returns 0 (false) otherwise
864
- int _Wikitext_blank(VALUE str)
891
+ bool wiki_blank(str_t *str)
865
892
  {
866
- if (NIL_P(str) || RSTRING_LEN(str) == 0)
867
- return 1;
868
- for (char *ptr = RSTRING_PTR(str),
869
- *end = RSTRING_PTR(str) + RSTRING_LEN(str);
893
+ if (str->len == 0)
894
+ return true;
895
+ for (char *ptr = str->ptr,
896
+ *end = str->ptr + str->len;
870
897
  ptr < end; ptr++)
871
898
  {
872
899
  if (*ptr != ' ')
873
- return 0;
900
+ return false;
874
901
  }
875
- return 1;
902
+ return true;
876
903
  }
877
904
 
878
- void _Wikitext_rollback_failed_link(parser_t *parser)
905
+ void wiki_rollback_failed_internal_link(parser_t *parser)
879
906
  {
880
907
  if (!IN(LINK_START))
881
908
  return; // nothing to do!
882
909
  int scope_includes_separator = IN(SEPARATOR);
883
- _Wikitext_pop_from_stack_up_to(parser, Qnil, LINK_START, Qtrue);
884
- rb_str_cat(parser->output, link_start, sizeof(link_start) - 1);
885
- if (!NIL_P(parser->link_target))
910
+ wiki_pop_from_stack_up_to(parser, NULL, LINK_START, true);
911
+ str_append(parser->output, link_start, sizeof(link_start) - 1);
912
+ if (parser->link_target->len > 0)
886
913
  {
887
- VALUE sanitized = _Wikitext_parser_sanitize_link_target(parser, Qtrue);
888
- rb_str_append(parser->output, sanitized);
914
+ wiki_append_sanitized_link_target(parser, parser->output, false);
889
915
  if (scope_includes_separator)
890
916
  {
891
- rb_str_cat(parser->output, separator, sizeof(separator) - 1);
892
- if (!NIL_P(parser->link_text))
893
- rb_str_append(parser->output, parser->link_text);
917
+ str_append(parser->output, separator, sizeof(separator) - 1);
918
+ if (parser->link_text->len > 0)
919
+ str_append_str(parser->output, parser->link_text);
894
920
  }
895
921
  }
896
- parser->capture = Qnil;
897
- parser->link_target = Qnil;
898
- parser->link_text = Qnil;
922
+ parser->capture = NULL;
923
+ str_clear(parser->link_target);
924
+ str_clear(parser->link_text);
899
925
  }
900
926
 
901
- void _Wikitext_rollback_failed_external_link(parser_t *parser)
927
+ void wiki_rollback_failed_external_link(parser_t *parser)
902
928
  {
903
929
  if (!IN(EXT_LINK_START))
904
930
  return; // nothing to do!
931
+
932
+ // store a couple of values before popping
905
933
  int scope_includes_space = IN(SPACE);
906
- _Wikitext_pop_from_stack_up_to(parser, Qnil, EXT_LINK_START, Qtrue);
907
- rb_str_cat(parser->output, ext_link_start, sizeof(ext_link_start) - 1);
908
- if (!NIL_P(parser->link_target))
934
+ VALUE link_class = IN(PATH) ? Qnil : parser->external_link_class;
935
+ wiki_pop_from_stack_up_to(parser, NULL, EXT_LINK_START, true);
936
+
937
+ str_append(parser->output, ext_link_start, sizeof(ext_link_start) - 1);
938
+ if (parser->link_target->len > 0)
909
939
  {
910
- if (parser->autolink == Qtrue)
911
- parser->link_target = _Wikitext_hyperlink(parser, Qnil, parser->link_target, parser->link_target, parser->external_link_class);
912
- rb_str_append(parser->output, parser->link_target);
940
+ wiki_append_hyperlink(parser, Qnil, parser->link_target, NULL, link_class, true);
913
941
  if (scope_includes_space)
914
942
  {
915
- rb_str_cat(parser->output, space, sizeof(space) - 1);
916
- if (!NIL_P(parser->link_text))
917
- rb_str_append(parser->output, parser->link_text);
943
+ str_append(parser->output, space, sizeof(space) - 1);
944
+ if (parser->link_text->len > 0)
945
+ str_append_str(parser->output, parser->link_text);
918
946
  }
919
947
  }
920
- parser->capture = Qnil;
921
- parser->link_target = Qnil;
922
- parser->link_text = Qnil;
948
+ parser->capture = NULL;
949
+ str_clear(parser->link_target);
950
+ str_clear(parser->link_text);
951
+ }
952
+
953
+ void wiki_rollback_failed_link(parser_t *parser)
954
+ {
955
+ wiki_rollback_failed_internal_link(parser);
956
+ wiki_rollback_failed_external_link(parser);
923
957
  }
924
958
 
925
959
  VALUE Wikitext_parser_initialize(int argc, VALUE *argv, VALUE self)
@@ -1031,31 +1065,15 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1031
1065
  VALUE prefix = rb_iv_get(self, "@internal_link_prefix");
1032
1066
 
1033
1067
  // set up parser struct to make passing parameters a little easier
1034
- // eventually this will encapsulate most or all of the variables above
1035
- parser_t _parser;
1036
- parser_t *parser = &_parser;
1037
- parser->output = rb_str_new2("");
1038
- parser->capture = Qnil;
1039
- parser->link_target = Qnil;
1040
- parser->link_text = Qnil;
1068
+ parser_t *parser = parser_new();
1069
+ GC_WRAP_PARSER(parser, parser_gc);
1041
1070
  parser->external_link_class = link_class;
1042
1071
  parser->mailto_class = mailto_class;
1043
1072
  parser->img_prefix = rb_iv_get(self, "@img_prefix");
1044
- parser->scope = ary_new();
1045
- GC_WRAP_ARY(parser->scope, scope_gc);
1046
- parser->line = ary_new();
1047
- GC_WRAP_ARY(parser->line, line_gc);
1048
- parser->line_buffer = ary_new();
1049
- GC_WRAP_ARY(parser->line_buffer, line_buffer_gc);
1050
- parser->pending_crlf = Qfalse;
1051
- parser->autolink = rb_iv_get(self, "@autolink");
1052
- parser->space_to_underscore = rb_iv_get(self, "@space_to_underscore");
1073
+ parser->autolink = rb_iv_get(self, "@autolink") == Qtrue ? true : false;
1074
+ parser->space_to_underscore = rb_iv_get(self, "@space_to_underscore") == Qtrue ? true : false;
1053
1075
  parser->line_ending = str_new_from_string(line_ending);
1054
- GC_WRAP_STR(parser->line_ending, line_ending_gc);
1055
1076
  parser->base_indent = base_indent;
1056
- parser->current_indent = 0;
1057
- parser->tabulation = str_new();
1058
- GC_WRAP_STR(parser->tabulation, tabulation_gc);
1059
1077
  parser->base_heading_level = base_heading_level;
1060
1078
 
1061
1079
  // this simple looping design leads to a single enormous function,
@@ -1093,10 +1111,13 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1093
1111
  long remove_strong = -1;
1094
1112
  long remove_em = -1;
1095
1113
 
1096
- // general purpose counters and flags
1114
+ // general purpose counters, flags and pointers
1097
1115
  long i = 0;
1098
1116
  long j = 0;
1099
1117
  long k = 0;
1118
+ str_t *output = NULL;
1119
+ str_t _token_str;
1120
+ str_t *token_str = &_token_str;
1100
1121
 
1101
1122
  // The following giant switch statement contains cases for all the possible token types.
1102
1123
  // In the most basic sense we are emitting the HTML that corresponds to each token,
@@ -1118,16 +1139,16 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1118
1139
  case PRE:
1119
1140
  if (IN(NO_WIKI_START) || IN(PRE_START))
1120
1141
  {
1121
- rb_str_cat(parser->output, space, sizeof(space) - 1);
1142
+ str_append(parser->output, space, sizeof(space) - 1);
1122
1143
  break;
1123
1144
  }
1124
1145
  else if (IN(BLOCKQUOTE_START))
1125
1146
  {
1126
1147
  // this kind of nesting not allowed (to avoid user confusion)
1127
- _Wikitext_pop_excess_elements(parser);
1128
- _Wikitext_start_para_if_necessary(parser);
1129
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
1130
- rb_str_cat(i, space, sizeof(space) - 1);
1148
+ wiki_pop_excess_elements(parser);
1149
+ wiki_start_para_if_necessary(parser);
1150
+ output = parser->capture ? parser->capture : parser->output;
1151
+ str_append(output, space, sizeof(space) - 1);
1131
1152
  break;
1132
1153
  }
1133
1154
 
@@ -1139,15 +1160,15 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1139
1160
  {
1140
1161
  // must pop (reduce nesting level)
1141
1162
  for (i = j - i; i > 0; i--)
1142
- _Wikitext_pop_from_stack_up_to(parser, Qnil, BLOCKQUOTE, Qtrue);
1163
+ wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, true);
1143
1164
  }
1144
1165
 
1145
1166
  if (!IN(PRE))
1146
1167
  {
1147
- parser->pending_crlf = Qfalse;
1148
- _Wikitext_pop_from_stack_up_to(parser, Qnil, BLOCKQUOTE, Qfalse);
1149
- _Wikitext_indent(parser);
1150
- rb_str_cat(parser->output, pre_start, sizeof(pre_start) - 1);
1168
+ parser->pending_crlf = false;
1169
+ wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, false);
1170
+ wiki_indent(parser);
1171
+ str_append(parser->output, pre_start, sizeof(pre_start) - 1);
1151
1172
  ary_push(parser->scope, PRE);
1152
1173
  }
1153
1174
  break;
@@ -1155,16 +1176,15 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1155
1176
  case PRE_START:
1156
1177
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1157
1178
  {
1158
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1159
- rb_str_cat(parser->output, escaped_pre_start, sizeof(escaped_pre_start) - 1);
1179
+ wiki_emit_pending_crlf_if_necessary(parser);
1180
+ str_append(parser->output, escaped_pre_start, sizeof(escaped_pre_start) - 1);
1160
1181
  }
1161
1182
  else if (IN(BLOCKQUOTE_START))
1162
1183
  {
1163
- _Wikitext_rollback_failed_link(parser); // if any
1164
- _Wikitext_rollback_failed_external_link(parser); // if any
1165
- _Wikitext_pop_from_stack_up_to(parser, Qnil, BLOCKQUOTE_START, Qfalse);
1166
- _Wikitext_indent(parser);
1167
- rb_str_cat(parser->output, pre_start, sizeof(pre_start) - 1);
1184
+ wiki_rollback_failed_link(parser); // if any
1185
+ wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE_START, false);
1186
+ wiki_indent(parser);
1187
+ str_append(parser->output, pre_start, sizeof(pre_start) - 1);
1168
1188
  ary_push(parser->scope, PRE_START);
1169
1189
  ary_push(parser->line, PRE_START);
1170
1190
  }
@@ -1172,29 +1192,27 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1172
1192
  {
1173
1193
  if (token->column_start == 1) // only allowed in first column
1174
1194
  {
1175
- _Wikitext_rollback_failed_link(parser); // if any
1176
- _Wikitext_rollback_failed_external_link(parser); // if any
1177
- _Wikitext_pop_all_from_stack(parser, Qnil);
1178
- _Wikitext_indent(parser);
1179
- rb_str_cat(parser->output, pre_start, sizeof(pre_start) - 1);
1195
+ wiki_rollback_failed_link(parser); // if any
1196
+ wiki_pop_all_from_stack(parser);
1197
+ wiki_indent(parser);
1198
+ str_append(parser->output, pre_start, sizeof(pre_start) - 1);
1180
1199
  ary_push(parser->scope, PRE_START);
1181
1200
  ary_push(parser->line, PRE_START);
1182
1201
  }
1183
1202
  else // PRE_START illegal here
1184
1203
  {
1185
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
1186
- _Wikitext_pop_excess_elements(parser);
1187
- _Wikitext_start_para_if_necessary(parser);
1188
- rb_str_cat(i, escaped_pre_start, sizeof(escaped_pre_start) - 1);
1204
+ output = parser->capture ? parser->capture : parser->output;
1205
+ wiki_pop_excess_elements(parser);
1206
+ wiki_start_para_if_necessary(parser);
1207
+ str_append(output, escaped_pre_start, sizeof(escaped_pre_start) - 1);
1189
1208
  }
1190
1209
  }
1191
1210
  else
1192
1211
  {
1193
- _Wikitext_rollback_failed_link(parser); // if any
1194
- _Wikitext_rollback_failed_external_link(parser); // if any
1195
- _Wikitext_pop_from_stack_up_to(parser, Qnil, P, Qtrue);
1196
- _Wikitext_indent(parser);
1197
- rb_str_cat(parser->output, pre_start, sizeof(pre_start) - 1);
1212
+ wiki_rollback_failed_link(parser); // if any
1213
+ wiki_pop_from_stack_up_to(parser, NULL, P, true);
1214
+ wiki_indent(parser);
1215
+ str_append(parser->output, pre_start, sizeof(pre_start) - 1);
1198
1216
  ary_push(parser->scope, PRE_START);
1199
1217
  ary_push(parser->line, PRE_START);
1200
1218
  }
@@ -1203,19 +1221,19 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1203
1221
  case PRE_END:
1204
1222
  if (IN(NO_WIKI_START) || IN(PRE))
1205
1223
  {
1206
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1207
- rb_str_cat(parser->output, escaped_pre_end, sizeof(escaped_pre_end) - 1);
1224
+ wiki_emit_pending_crlf_if_necessary(parser);
1225
+ str_append(parser->output, escaped_pre_end, sizeof(escaped_pre_end) - 1);
1208
1226
  }
1209
1227
  else
1210
1228
  {
1211
1229
  if (IN(PRE_START))
1212
- _Wikitext_pop_from_stack_up_to(parser, parser->output, PRE_START, Qtrue);
1230
+ wiki_pop_from_stack_up_to(parser, parser->output, PRE_START, true);
1213
1231
  else
1214
1232
  {
1215
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
1216
- _Wikitext_pop_excess_elements(parser);
1217
- _Wikitext_start_para_if_necessary(parser);
1218
- rb_str_cat(i, escaped_pre_end, sizeof(escaped_pre_end) - 1);
1233
+ output = parser->capture ? parser->capture : parser->output;
1234
+ wiki_pop_excess_elements(parser);
1235
+ wiki_start_para_if_necessary(parser);
1236
+ str_append(output, escaped_pre_end, sizeof(escaped_pre_end) - 1);
1219
1237
  }
1220
1238
  }
1221
1239
  break;
@@ -1223,14 +1241,14 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1223
1241
  case BLOCKQUOTE:
1224
1242
  if (IN(NO_WIKI_START) || IN(PRE_START))
1225
1243
  // no need to check for <pre>; can never appear inside it
1226
- rb_str_cat(parser->output, escaped_blockquote, TOKEN_LEN(token) + 3); // will either emit "&gt;" or "&gt; "
1244
+ str_append(parser->output, escaped_blockquote, TOKEN_LEN(token) + 3); // will either emit "&gt;" or "&gt; "
1227
1245
  else if (IN(BLOCKQUOTE_START))
1228
1246
  {
1229
1247
  // this kind of nesting not allowed (to avoid user confusion)
1230
- _Wikitext_pop_excess_elements(parser);
1231
- _Wikitext_start_para_if_necessary(parser);
1232
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
1233
- rb_str_cat(i, escaped_blockquote, TOKEN_LEN(token) + 3); // will either emit "&gt;" or "&gt; "
1248
+ wiki_pop_excess_elements(parser);
1249
+ wiki_start_para_if_necessary(parser);
1250
+ output = parser->capture ? parser->capture : parser->output;
1251
+ str_append(output, escaped_blockquote, TOKEN_LEN(token) + 3); // will either emit "&gt;" or "&gt; "
1234
1252
  break;
1235
1253
  }
1236
1254
  else
@@ -1252,12 +1270,12 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1252
1270
  if (i > j)
1253
1271
  {
1254
1272
  // must push (increase nesting level)
1255
- _Wikitext_pop_from_stack_up_to(parser, Qnil, BLOCKQUOTE, Qfalse);
1273
+ wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, false);
1256
1274
  for (i = i - j; i > 0; i--)
1257
1275
  {
1258
- _Wikitext_indent(parser);
1259
- rb_str_cat(parser->output, blockquote_start, sizeof(blockquote_start) - 1);
1260
- rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
1276
+ wiki_indent(parser);
1277
+ str_append(parser->output, blockquote_start, sizeof(blockquote_start) - 1);
1278
+ str_append_str(parser->output, parser->line_ending);
1261
1279
  ary_push(parser->scope, BLOCKQUOTE);
1262
1280
  }
1263
1281
  }
@@ -1265,7 +1283,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1265
1283
  {
1266
1284
  // must pop (reduce nesting level)
1267
1285
  for (i = j - i; i > 0; i--)
1268
- _Wikitext_pop_from_stack_up_to(parser, Qnil, BLOCKQUOTE, Qtrue);
1286
+ wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, true);
1269
1287
  }
1270
1288
 
1271
1289
  // jump to top of the loop to process token we scanned during lookahead
@@ -1276,18 +1294,17 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1276
1294
  case BLOCKQUOTE_START:
1277
1295
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1278
1296
  {
1279
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1280
- rb_str_cat(parser->output, escaped_blockquote_start, sizeof(escaped_blockquote_start) - 1);
1297
+ wiki_emit_pending_crlf_if_necessary(parser);
1298
+ str_append(parser->output, escaped_blockquote_start, sizeof(escaped_blockquote_start) - 1);
1281
1299
  }
1282
1300
  else if (IN(BLOCKQUOTE_START))
1283
1301
  {
1284
1302
  // nesting is fine here
1285
- _Wikitext_rollback_failed_link(parser); // if any
1286
- _Wikitext_rollback_failed_external_link(parser); // if any
1287
- _Wikitext_pop_from_stack_up_to(parser, Qnil, BLOCKQUOTE_START, Qfalse);
1288
- _Wikitext_indent(parser);
1289
- rb_str_cat(parser->output, blockquote_start, sizeof(blockquote_start) - 1);
1290
- rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
1303
+ wiki_rollback_failed_link(parser); // if any
1304
+ wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE_START, false);
1305
+ wiki_indent(parser);
1306
+ str_append(parser->output, blockquote_start, sizeof(blockquote_start) - 1);
1307
+ str_append_str(parser->output, parser->line_ending);
1291
1308
  ary_push(parser->scope, BLOCKQUOTE_START);
1292
1309
  ary_push(parser->line, BLOCKQUOTE_START);
1293
1310
  }
@@ -1295,32 +1312,30 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1295
1312
  {
1296
1313
  if (token->column_start == 1) // only allowed in first column
1297
1314
  {
1298
- _Wikitext_rollback_failed_link(parser); // if any
1299
- _Wikitext_rollback_failed_external_link(parser); // if any
1300
- _Wikitext_pop_all_from_stack(parser, Qnil);
1301
- _Wikitext_indent(parser);
1302
- rb_str_cat(parser->output, blockquote_start, sizeof(blockquote_start) - 1);
1303
- rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
1315
+ wiki_rollback_failed_link(parser); // if any
1316
+ wiki_pop_all_from_stack(parser);
1317
+ wiki_indent(parser);
1318
+ str_append(parser->output, blockquote_start, sizeof(blockquote_start) - 1);
1319
+ str_append_str(parser->output, parser->line_ending);
1304
1320
  ary_push(parser->scope, BLOCKQUOTE_START);
1305
1321
  ary_push(parser->line, BLOCKQUOTE_START);
1306
1322
  }
1307
1323
  else // BLOCKQUOTE_START illegal here
1308
1324
  {
1309
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
1310
- _Wikitext_pop_excess_elements(parser);
1311
- _Wikitext_start_para_if_necessary(parser);
1312
- rb_str_cat(i, escaped_blockquote_start, sizeof(escaped_blockquote_start) - 1);
1325
+ output = parser->capture ? parser->capture : parser->output;
1326
+ wiki_pop_excess_elements(parser);
1327
+ wiki_start_para_if_necessary(parser);
1328
+ str_append(output, escaped_blockquote_start, sizeof(escaped_blockquote_start) - 1);
1313
1329
  }
1314
1330
  }
1315
1331
  else
1316
1332
  {
1317
1333
  // would be nice to eliminate the repetition here but it's probably the clearest way
1318
- _Wikitext_rollback_failed_link(parser); // if any
1319
- _Wikitext_rollback_failed_external_link(parser); // if any
1320
- _Wikitext_pop_from_stack_up_to(parser, Qnil, P, Qtrue);
1321
- _Wikitext_indent(parser);
1322
- rb_str_cat(parser->output, blockquote_start, sizeof(blockquote_start) - 1);
1323
- rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
1334
+ wiki_rollback_failed_link(parser); // if any
1335
+ wiki_pop_from_stack_up_to(parser, NULL, P, true);
1336
+ wiki_indent(parser);
1337
+ str_append(parser->output, blockquote_start, sizeof(blockquote_start) - 1);
1338
+ str_append_str(parser->output, parser->line_ending);
1324
1339
  ary_push(parser->scope, BLOCKQUOTE_START);
1325
1340
  ary_push(parser->line, BLOCKQUOTE_START);
1326
1341
  }
@@ -1329,19 +1344,19 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1329
1344
  case BLOCKQUOTE_END:
1330
1345
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1331
1346
  {
1332
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1333
- rb_str_cat(parser->output, escaped_blockquote_end, sizeof(escaped_blockquote_end) - 1);
1347
+ wiki_emit_pending_crlf_if_necessary(parser);
1348
+ str_append(parser->output, escaped_blockquote_end, sizeof(escaped_blockquote_end) - 1);
1334
1349
  }
1335
1350
  else
1336
1351
  {
1337
1352
  if (IN(BLOCKQUOTE_START))
1338
- _Wikitext_pop_from_stack_up_to(parser, parser->output, BLOCKQUOTE_START, Qtrue);
1353
+ wiki_pop_from_stack_up_to(parser, parser->output, BLOCKQUOTE_START, true);
1339
1354
  else
1340
1355
  {
1341
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
1342
- _Wikitext_pop_excess_elements(parser);
1343
- _Wikitext_start_para_if_necessary(parser);
1344
- rb_str_cat(i, escaped_blockquote_end, sizeof(escaped_blockquote_end) - 1);
1356
+ output = parser->capture ? parser->capture : parser->output;
1357
+ wiki_pop_excess_elements(parser);
1358
+ wiki_start_para_if_necessary(parser);
1359
+ str_append(output, escaped_blockquote_end, sizeof(escaped_blockquote_end) - 1);
1345
1360
  }
1346
1361
  }
1347
1362
  break;
@@ -1349,13 +1364,13 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1349
1364
  case NO_WIKI_START:
1350
1365
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1351
1366
  {
1352
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1353
- rb_str_cat(parser->output, escaped_no_wiki_start, sizeof(escaped_no_wiki_start) - 1);
1367
+ wiki_emit_pending_crlf_if_necessary(parser);
1368
+ str_append(parser->output, escaped_no_wiki_start, sizeof(escaped_no_wiki_start) - 1);
1354
1369
  }
1355
1370
  else
1356
1371
  {
1357
- _Wikitext_pop_excess_elements(parser);
1358
- _Wikitext_start_para_if_necessary(parser);
1372
+ wiki_pop_excess_elements(parser);
1373
+ wiki_start_para_if_necessary(parser);
1359
1374
  ary_push(parser->scope, NO_WIKI_START);
1360
1375
  ary_push(parser->line, NO_WIKI_START);
1361
1376
  }
@@ -1364,25 +1379,25 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1364
1379
  case NO_WIKI_END:
1365
1380
  if (IN(NO_WIKI_START))
1366
1381
  // <nowiki> should always only ever be the last item in the stack, but use the helper routine just in case
1367
- _Wikitext_pop_from_stack_up_to(parser, Qnil, NO_WIKI_START, Qtrue);
1382
+ wiki_pop_from_stack_up_to(parser, NULL, NO_WIKI_START, true);
1368
1383
  else
1369
1384
  {
1370
- _Wikitext_pop_excess_elements(parser);
1371
- _Wikitext_start_para_if_necessary(parser);
1372
- rb_str_cat(parser->output, escaped_no_wiki_end, sizeof(escaped_no_wiki_end) - 1);
1385
+ wiki_pop_excess_elements(parser);
1386
+ wiki_start_para_if_necessary(parser);
1387
+ str_append(parser->output, escaped_no_wiki_end, sizeof(escaped_no_wiki_end) - 1);
1373
1388
  }
1374
1389
  break;
1375
1390
 
1376
1391
  case STRONG_EM:
1377
1392
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1378
1393
  {
1379
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1380
- rb_str_cat(parser->output, literal_strong_em, sizeof(literal_strong_em) - 1);
1394
+ wiki_emit_pending_crlf_if_necessary(parser);
1395
+ str_append(parser->output, literal_strong_em, sizeof(literal_strong_em) - 1);
1381
1396
  break;
1382
1397
  }
1383
1398
 
1384
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
1385
- _Wikitext_pop_excess_elements(parser);
1399
+ output = parser->capture ? parser->capture : parser->output;
1400
+ wiki_pop_excess_elements(parser);
1386
1401
 
1387
1402
  // if you've seen STRONG/STRONG_START or EM/EM_START, must close them in the reverse order that you saw them!
1388
1403
  // otherwise, must open them
@@ -1394,12 +1409,12 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1394
1409
  int val = ary_entry(parser->scope, j);
1395
1410
  if (val == STRONG || val == STRONG_START)
1396
1411
  {
1397
- rb_str_cat(i, strong_end, sizeof(strong_end) - 1);
1412
+ str_append(output, strong_end, sizeof(strong_end) - 1);
1398
1413
  remove_strong = j;
1399
1414
  }
1400
1415
  else if (val == EM || val == EM_START)
1401
1416
  {
1402
- rb_str_cat(i, em_end, sizeof(em_end) - 1);
1417
+ str_append(output, em_end, sizeof(em_end) - 1);
1403
1418
  remove_em = j;
1404
1419
  }
1405
1420
  }
@@ -1411,7 +1426,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1411
1426
  ary_pop(parser->scope);
1412
1427
  else // there was no em to remove!, so consider this an opening em tag
1413
1428
  {
1414
- rb_str_cat(i, em_start, sizeof(em_start) - 1);
1429
+ str_append(output, em_start, sizeof(em_start) - 1);
1415
1430
  ary_push(parser->scope, EM);
1416
1431
  ary_push(parser->line, EM);
1417
1432
  }
@@ -1423,15 +1438,15 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1423
1438
  ary_pop(parser->scope);
1424
1439
  else // there was no strong to remove!, so consider this an opening strong tag
1425
1440
  {
1426
- rb_str_cat(i, strong_start, sizeof(strong_start) - 1);
1441
+ str_append(output, strong_start, sizeof(strong_start) - 1);
1427
1442
  ary_push(parser->scope, STRONG);
1428
1443
  ary_push(parser->line, STRONG);
1429
1444
  }
1430
1445
  }
1431
1446
  else // no strong or em to remove, so this must be a new opening of both
1432
1447
  {
1433
- _Wikitext_start_para_if_necessary(parser);
1434
- rb_str_cat(i, strong_em_start, sizeof(strong_em_start) - 1);
1448
+ wiki_start_para_if_necessary(parser);
1449
+ str_append(output, strong_em_start, sizeof(strong_em_start) - 1);
1435
1450
  ary_push(parser->scope, STRONG);
1436
1451
  ary_push(parser->line, STRONG);
1437
1452
  ary_push(parser->scope, EM);
@@ -1442,24 +1457,24 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1442
1457
  case STRONG:
1443
1458
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1444
1459
  {
1445
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1446
- rb_str_cat(parser->output, literal_strong, sizeof(literal_strong) - 1);
1460
+ wiki_emit_pending_crlf_if_necessary(parser);
1461
+ str_append(parser->output, literal_strong, sizeof(literal_strong) - 1);
1447
1462
  }
1448
1463
  else
1449
1464
  {
1450
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
1465
+ output = parser->capture ? parser->capture : parser->output;
1451
1466
  if (IN(STRONG_START))
1452
1467
  // already in span started with <strong>, no choice but to emit this literally
1453
- rb_str_cat(parser->output, literal_strong, sizeof(literal_strong) - 1);
1468
+ str_append(output, literal_strong, sizeof(literal_strong) - 1);
1454
1469
  else if (IN(STRONG))
1455
1470
  // STRONG already seen, this is a closing tag
1456
- _Wikitext_pop_from_stack_up_to(parser, i, STRONG, Qtrue);
1471
+ wiki_pop_from_stack_up_to(parser, output, STRONG, true);
1457
1472
  else
1458
1473
  {
1459
1474
  // this is a new opening
1460
- _Wikitext_pop_excess_elements(parser);
1461
- _Wikitext_start_para_if_necessary(parser);
1462
- rb_str_cat(i, strong_start, sizeof(strong_start) - 1);
1475
+ wiki_pop_excess_elements(parser);
1476
+ wiki_start_para_if_necessary(parser);
1477
+ str_append(output, strong_start, sizeof(strong_start) - 1);
1463
1478
  ary_push(parser->scope, STRONG);
1464
1479
  ary_push(parser->line, STRONG);
1465
1480
  }
@@ -1469,19 +1484,19 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1469
1484
  case STRONG_START:
1470
1485
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1471
1486
  {
1472
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1473
- rb_str_cat(parser->output, escaped_strong_start, sizeof(escaped_strong_start) - 1);
1487
+ wiki_emit_pending_crlf_if_necessary(parser);
1488
+ str_append(parser->output, escaped_strong_start, sizeof(escaped_strong_start) - 1);
1474
1489
  }
1475
1490
  else
1476
1491
  {
1477
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
1492
+ output = parser->capture ? parser->capture : parser->output;
1478
1493
  if (IN(STRONG_START) || IN(STRONG))
1479
- rb_str_cat(parser->output, escaped_strong_start, sizeof(escaped_strong_start) - 1);
1494
+ str_append(output, escaped_strong_start, sizeof(escaped_strong_start) - 1);
1480
1495
  else
1481
1496
  {
1482
- _Wikitext_pop_excess_elements(parser);
1483
- _Wikitext_start_para_if_necessary(parser);
1484
- rb_str_cat(i, strong_start, sizeof(strong_start) - 1);
1497
+ wiki_pop_excess_elements(parser);
1498
+ wiki_start_para_if_necessary(parser);
1499
+ str_append(output, strong_start, sizeof(strong_start) - 1);
1485
1500
  ary_push(parser->scope, STRONG_START);
1486
1501
  ary_push(parser->line, STRONG_START);
1487
1502
  }
@@ -1491,20 +1506,20 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1491
1506
  case STRONG_END:
1492
1507
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1493
1508
  {
1494
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1495
- rb_str_cat(parser->output, escaped_strong_end, sizeof(escaped_strong_end) - 1);
1509
+ wiki_emit_pending_crlf_if_necessary(parser);
1510
+ str_append(parser->output, escaped_strong_end, sizeof(escaped_strong_end) - 1);
1496
1511
  }
1497
1512
  else
1498
1513
  {
1499
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
1514
+ output = parser->capture ? parser->capture : parser->output;
1500
1515
  if (IN(STRONG_START))
1501
- _Wikitext_pop_from_stack_up_to(parser, i, STRONG_START, Qtrue);
1516
+ wiki_pop_from_stack_up_to(parser, output, STRONG_START, true);
1502
1517
  else
1503
1518
  {
1504
1519
  // no STRONG_START in scope, so must interpret the STRONG_END without any special meaning
1505
- _Wikitext_pop_excess_elements(parser);
1506
- _Wikitext_start_para_if_necessary(parser);
1507
- rb_str_cat(i, escaped_strong_end, sizeof(escaped_strong_end) - 1);
1520
+ wiki_pop_excess_elements(parser);
1521
+ wiki_start_para_if_necessary(parser);
1522
+ str_append(output, escaped_strong_end, sizeof(escaped_strong_end) - 1);
1508
1523
  }
1509
1524
  }
1510
1525
  break;
@@ -1512,24 +1527,24 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1512
1527
  case EM:
1513
1528
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1514
1529
  {
1515
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1516
- rb_str_cat(parser->output, literal_em, sizeof(literal_em) - 1);
1530
+ wiki_emit_pending_crlf_if_necessary(parser);
1531
+ str_append(parser->output, literal_em, sizeof(literal_em) - 1);
1517
1532
  }
1518
1533
  else
1519
1534
  {
1520
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
1535
+ output = parser->capture ? parser->capture : parser->output;
1521
1536
  if (IN(EM_START))
1522
1537
  // already in span started with <em>, no choice but to emit this literally
1523
- rb_str_cat(parser->output, literal_em, sizeof(literal_em) - 1);
1538
+ str_append(output, literal_em, sizeof(literal_em) - 1);
1524
1539
  else if (IN(EM))
1525
1540
  // EM already seen, this is a closing tag
1526
- _Wikitext_pop_from_stack_up_to(parser, i, EM, Qtrue);
1541
+ wiki_pop_from_stack_up_to(parser, output, EM, true);
1527
1542
  else
1528
1543
  {
1529
1544
  // this is a new opening
1530
- _Wikitext_pop_excess_elements(parser);
1531
- _Wikitext_start_para_if_necessary(parser);
1532
- rb_str_cat(i, em_start, sizeof(em_start) - 1);
1545
+ wiki_pop_excess_elements(parser);
1546
+ wiki_start_para_if_necessary(parser);
1547
+ str_append(output, em_start, sizeof(em_start) - 1);
1533
1548
  ary_push(parser->scope, EM);
1534
1549
  ary_push(parser->line, EM);
1535
1550
  }
@@ -1539,19 +1554,19 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1539
1554
  case EM_START:
1540
1555
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1541
1556
  {
1542
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1543
- rb_str_cat(parser->output, escaped_em_start, sizeof(escaped_em_start) - 1);
1557
+ wiki_emit_pending_crlf_if_necessary(parser);
1558
+ str_append(parser->output, escaped_em_start, sizeof(escaped_em_start) - 1);
1544
1559
  }
1545
1560
  else
1546
1561
  {
1547
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
1562
+ output = parser->capture ? parser->capture : parser->output;
1548
1563
  if (IN(EM_START) || IN(EM))
1549
- rb_str_cat(i, escaped_em_start, sizeof(escaped_em_start) - 1);
1564
+ str_append(output, escaped_em_start, sizeof(escaped_em_start) - 1);
1550
1565
  else
1551
1566
  {
1552
- _Wikitext_pop_excess_elements(parser);
1553
- _Wikitext_start_para_if_necessary(parser);
1554
- rb_str_cat(i, em_start, sizeof(em_start) - 1);
1567
+ wiki_pop_excess_elements(parser);
1568
+ wiki_start_para_if_necessary(parser);
1569
+ str_append(output, em_start, sizeof(em_start) - 1);
1555
1570
  ary_push(parser->scope, EM_START);
1556
1571
  ary_push(parser->line, EM_START);
1557
1572
  }
@@ -1561,20 +1576,20 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1561
1576
  case EM_END:
1562
1577
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1563
1578
  {
1564
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1565
- rb_str_cat(parser->output, escaped_em_end, sizeof(escaped_em_end) - 1);
1579
+ wiki_emit_pending_crlf_if_necessary(parser);
1580
+ str_append(parser->output, escaped_em_end, sizeof(escaped_em_end) - 1);
1566
1581
  }
1567
1582
  else
1568
1583
  {
1569
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
1584
+ output = parser->capture ? parser->capture : parser->output;
1570
1585
  if (IN(EM_START))
1571
- _Wikitext_pop_from_stack_up_to(parser, i, EM_START, Qtrue);
1586
+ wiki_pop_from_stack_up_to(parser, output, EM_START, true);
1572
1587
  else
1573
1588
  {
1574
1589
  // no EM_START in scope, so must interpret the TT_END without any special meaning
1575
- _Wikitext_pop_excess_elements(parser);
1576
- _Wikitext_start_para_if_necessary(parser);
1577
- rb_str_cat(i, escaped_em_end, sizeof(escaped_em_end) - 1);
1590
+ wiki_pop_excess_elements(parser);
1591
+ wiki_start_para_if_necessary(parser);
1592
+ str_append(output, escaped_em_end, sizeof(escaped_em_end) - 1);
1578
1593
  }
1579
1594
  }
1580
1595
  break;
@@ -1582,24 +1597,24 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1582
1597
  case TT:
1583
1598
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1584
1599
  {
1585
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1586
- rb_str_cat(parser->output, backtick, sizeof(backtick) - 1);
1600
+ wiki_emit_pending_crlf_if_necessary(parser);
1601
+ str_append(parser->output, backtick, sizeof(backtick) - 1);
1587
1602
  }
1588
1603
  else
1589
1604
  {
1590
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
1605
+ output = parser->capture ? parser->capture : parser->output;
1591
1606
  if (IN(TT_START))
1592
1607
  // already in span started with <tt>, no choice but to emit this literally
1593
- rb_str_cat(parser->output, backtick, sizeof(backtick) - 1);
1608
+ str_append(output, backtick, sizeof(backtick) - 1);
1594
1609
  else if (IN(TT))
1595
1610
  // TT (`) already seen, this is a closing tag
1596
- _Wikitext_pop_from_stack_up_to(parser, i, TT, Qtrue);
1611
+ wiki_pop_from_stack_up_to(parser, output, TT, true);
1597
1612
  else
1598
1613
  {
1599
1614
  // this is a new opening
1600
- _Wikitext_pop_excess_elements(parser);
1601
- _Wikitext_start_para_if_necessary(parser);
1602
- rb_str_cat(i, tt_start, sizeof(tt_start) - 1);
1615
+ wiki_pop_excess_elements(parser);
1616
+ wiki_start_para_if_necessary(parser);
1617
+ str_append(output, tt_start, sizeof(tt_start) - 1);
1603
1618
  ary_push(parser->scope, TT);
1604
1619
  ary_push(parser->line, TT);
1605
1620
  }
@@ -1609,19 +1624,19 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1609
1624
  case TT_START:
1610
1625
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1611
1626
  {
1612
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1613
- rb_str_cat(parser->output, escaped_tt_start, sizeof(escaped_tt_start) - 1);
1627
+ wiki_emit_pending_crlf_if_necessary(parser);
1628
+ str_append(parser->output, escaped_tt_start, sizeof(escaped_tt_start) - 1);
1614
1629
  }
1615
1630
  else
1616
1631
  {
1617
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
1632
+ output = parser->capture ? parser->capture : parser->output;
1618
1633
  if (IN(TT_START) || IN(TT))
1619
- rb_str_cat(i, escaped_tt_start, sizeof(escaped_tt_start) - 1);
1634
+ str_append(output, escaped_tt_start, sizeof(escaped_tt_start) - 1);
1620
1635
  else
1621
1636
  {
1622
- _Wikitext_pop_excess_elements(parser);
1623
- _Wikitext_start_para_if_necessary(parser);
1624
- rb_str_cat(i, tt_start, sizeof(tt_start) - 1);
1637
+ wiki_pop_excess_elements(parser);
1638
+ wiki_start_para_if_necessary(parser);
1639
+ str_append(output, tt_start, sizeof(tt_start) - 1);
1625
1640
  ary_push(parser->scope, TT_START);
1626
1641
  ary_push(parser->line, TT_START);
1627
1642
  }
@@ -1631,20 +1646,20 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1631
1646
  case TT_END:
1632
1647
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1633
1648
  {
1634
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1635
- rb_str_cat(parser->output, escaped_tt_end, sizeof(escaped_tt_end) - 1);
1649
+ wiki_emit_pending_crlf_if_necessary(parser);
1650
+ str_append(parser->output, escaped_tt_end, sizeof(escaped_tt_end) - 1);
1636
1651
  }
1637
1652
  else
1638
1653
  {
1639
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
1654
+ output = parser->capture ? parser->capture : parser->output;
1640
1655
  if (IN(TT_START))
1641
- _Wikitext_pop_from_stack_up_to(parser, i, TT_START, Qtrue);
1656
+ wiki_pop_from_stack_up_to(parser, output, TT_START, true);
1642
1657
  else
1643
1658
  {
1644
1659
  // no TT_START in scope, so must interpret the TT_END without any special meaning
1645
- _Wikitext_pop_excess_elements(parser);
1646
- _Wikitext_start_para_if_necessary(parser);
1647
- rb_str_cat(i, escaped_tt_end, sizeof(escaped_tt_end) - 1);
1660
+ wiki_pop_excess_elements(parser);
1661
+ wiki_start_para_if_necessary(parser);
1662
+ str_append(output, escaped_tt_end, sizeof(escaped_tt_end) - 1);
1648
1663
  }
1649
1664
  }
1650
1665
  break;
@@ -1654,7 +1669,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1654
1669
  if (IN(NO_WIKI_START) || IN(PRE_START))
1655
1670
  {
1656
1671
  // no need to check for PRE; can never appear inside it
1657
- rb_str_cat(parser->output, token->start, TOKEN_LEN(token));
1672
+ str_append(parser->output, token->start, TOKEN_LEN(token));
1658
1673
  break;
1659
1674
  }
1660
1675
 
@@ -1684,7 +1699,8 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1684
1699
  // want to compare line with scope but can only do so if scope has enough items on it
1685
1700
  if (j >= i)
1686
1701
  {
1687
- if (ary_entry(parser->scope, i + bq_count - 2) == type && ary_entry(parser->scope, i + bq_count - 1) == LI)
1702
+ if (ary_entry(parser->scope, i + bq_count - 2) == type &&
1703
+ ary_entry(parser->scope, i + bq_count - 1) == LI)
1688
1704
  {
1689
1705
  // line and scope match at this point: do nothing yet
1690
1706
  }
@@ -1693,7 +1709,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1693
1709
  // item just pushed onto line does not match corresponding slot of scope!
1694
1710
  for (; j >= i - 2; j--)
1695
1711
  // must pop back before emitting
1696
- _Wikitext_pop_from_stack(parser, Qnil);
1712
+ wiki_pop_from_stack(parser, NULL);
1697
1713
 
1698
1714
  // will emit UL or OL, then LI
1699
1715
  break;
@@ -1707,13 +1723,13 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1707
1723
  // not a OL or UL token!
1708
1724
  if (j == i)
1709
1725
  // must close existing LI and re-open new one
1710
- _Wikitext_pop_from_stack(parser, Qnil);
1726
+ wiki_pop_from_stack(parser, NULL);
1711
1727
  else if (j > i)
1712
1728
  {
1713
1729
  // item just pushed onto line does not match corresponding slot of scope!
1714
1730
  for (; j >= i; j--)
1715
1731
  // must pop back before emitting
1716
- _Wikitext_pop_from_stack(parser, Qnil);
1732
+ wiki_pop_from_stack(parser, NULL);
1717
1733
  }
1718
1734
  break;
1719
1735
  }
@@ -1727,33 +1743,33 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1727
1743
  if (j > 0 && ary_entry(parser->scope, -1) == LI)
1728
1744
  {
1729
1745
  // so we should precede it with a CRLF, and indicate that it's a nested list
1730
- rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
1746
+ str_append(parser->output, parser->line_ending->ptr, parser->line_ending->len);
1731
1747
  ary_push(parser->scope, NESTED_LIST);
1732
1748
  }
1733
1749
  else
1734
1750
  {
1735
1751
  // this is a new list
1736
1752
  if (IN(BLOCKQUOTE_START))
1737
- _Wikitext_pop_from_stack_up_to(parser, Qnil, BLOCKQUOTE_START, Qfalse);
1753
+ wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE_START, false);
1738
1754
  else
1739
- _Wikitext_pop_from_stack_up_to(parser, Qnil, BLOCKQUOTE, Qfalse);
1755
+ wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, false);
1740
1756
  }
1741
1757
 
1742
1758
  // emit
1743
- _Wikitext_indent(parser);
1759
+ wiki_indent(parser);
1744
1760
  if (type == OL)
1745
- rb_str_cat(parser->output, ol_start, sizeof(ol_start) - 1);
1761
+ str_append(parser->output, ol_start, sizeof(ol_start) - 1);
1746
1762
  else if (type == UL)
1747
- rb_str_cat(parser->output, ul_start, sizeof(ul_start) - 1);
1763
+ str_append(parser->output, ul_start, sizeof(ul_start) - 1);
1748
1764
  ary_push(parser->scope, type);
1749
- rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
1765
+ str_append(parser->output, parser->line_ending->ptr, parser->line_ending->len);
1750
1766
  }
1751
1767
  else if (type == SPACE)
1752
1768
  // silently throw away the optional SPACE token after final list marker
1753
1769
  token = NULL;
1754
1770
 
1755
- _Wikitext_indent(parser);
1756
- rb_str_cat(parser->output, li_start, sizeof(li_start) - 1);
1771
+ wiki_indent(parser);
1772
+ str_append(parser->output, li_start, sizeof(li_start) - 1);
1757
1773
  ary_push(parser->scope, LI);
1758
1774
 
1759
1775
  // any subsequent UL or OL tokens on this line are syntax errors and must be emitted literally
@@ -1763,7 +1779,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1763
1779
  while (k++, NEXT_TOKEN(), (type = token->type))
1764
1780
  {
1765
1781
  if (type == OL || type == UL)
1766
- rb_str_cat(parser->output, token->start, TOKEN_LEN(token));
1782
+ str_append(parser->output, token->start, TOKEN_LEN(token));
1767
1783
  else if (type == SPACE && k == 1)
1768
1784
  {
1769
1785
  // silently throw away the optional SPACE token after final list marker
@@ -1787,15 +1803,15 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1787
1803
  if (IN(NO_WIKI_START) || IN(PRE_START))
1788
1804
  {
1789
1805
  // no need to check for PRE; can never appear inside it
1790
- rb_str_cat(parser->output, token->start, TOKEN_LEN(token));
1806
+ str_append(parser->output, token->start, TOKEN_LEN(token));
1791
1807
  break;
1792
1808
  }
1793
1809
 
1794
1810
  // pop up to but not including the last BLOCKQUOTE on the scope stack
1795
1811
  if (IN(BLOCKQUOTE_START))
1796
- _Wikitext_pop_from_stack_up_to(parser, Qnil, BLOCKQUOTE_START, Qfalse);
1812
+ wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE_START, false);
1797
1813
  else
1798
- _Wikitext_pop_from_stack_up_to(parser, Qnil, BLOCKQUOTE, Qfalse);
1814
+ wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, false);
1799
1815
 
1800
1816
  // count number of BLOCKQUOTE tokens in line buffer and in scope stack
1801
1817
  ary_push(parser->line, type);
@@ -1807,7 +1823,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1807
1823
  {
1808
1824
  // must pop (reduce nesting level)
1809
1825
  for (i = j - i; i > 0; i--)
1810
- _Wikitext_pop_from_stack_up_to(parser, Qnil, BLOCKQUOTE, Qtrue);
1826
+ wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, true);
1811
1827
  }
1812
1828
 
1813
1829
  // discard any whitespace here (so that "== foo ==" will be translated to "<h2>foo</h2>" rather than "<h2> foo </h2")
@@ -1815,7 +1831,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1815
1831
  ; // discard
1816
1832
 
1817
1833
  ary_push(parser->scope, type);
1818
- _Wikitext_indent(parser);
1834
+ wiki_indent(parser);
1819
1835
 
1820
1836
  // take base_heading_level into account
1821
1837
  type += base_heading_level;
@@ -1824,125 +1840,45 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1824
1840
 
1825
1841
  // rather than repeat all that code for each kind of heading, share it and use a conditional here
1826
1842
  if (type == H6_START)
1827
- rb_str_cat(parser->output, h6_start, sizeof(h6_start) - 1);
1843
+ str_append(parser->output, h6_start, sizeof(h6_start) - 1);
1828
1844
  else if (type == H5_START)
1829
- rb_str_cat(parser->output, h5_start, sizeof(h5_start) - 1);
1845
+ str_append(parser->output, h5_start, sizeof(h5_start) - 1);
1830
1846
  else if (type == H4_START)
1831
- rb_str_cat(parser->output, h4_start, sizeof(h4_start) - 1);
1847
+ str_append(parser->output, h4_start, sizeof(h4_start) - 1);
1832
1848
  else if (type == H3_START)
1833
- rb_str_cat(parser->output, h3_start, sizeof(h3_start) - 1);
1849
+ str_append(parser->output, h3_start, sizeof(h3_start) - 1);
1834
1850
  else if (type == H2_START)
1835
- rb_str_cat(parser->output, h2_start, sizeof(h2_start) - 1);
1851
+ str_append(parser->output, h2_start, sizeof(h2_start) - 1);
1836
1852
  else if (type == H1_START)
1837
- rb_str_cat(parser->output, h1_start, sizeof(h1_start) - 1);
1853
+ str_append(parser->output, h1_start, sizeof(h1_start) - 1);
1838
1854
 
1839
1855
  // jump to top of the loop to process token we scanned during lookahead
1840
1856
  continue;
1841
1857
 
1842
1858
  case H6_END:
1843
- if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1844
- {
1845
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1846
- rb_str_cat(parser->output, literal_h6, sizeof(literal_h6) - 1);
1847
- }
1848
- else
1849
- {
1850
- _Wikitext_rollback_failed_external_link(parser); // if any
1851
- if (!IN(H6_START))
1852
- {
1853
- // literal output only if not in h6 scope (we stay silent in that case)
1854
- _Wikitext_start_para_if_necessary(parser);
1855
- rb_str_cat(parser->output, literal_h6, sizeof(literal_h6) - 1);
1856
- }
1857
- }
1858
- break;
1859
-
1860
1859
  case H5_END:
1861
- if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1862
- {
1863
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1864
- rb_str_cat(parser->output, literal_h5, sizeof(literal_h5) - 1);
1865
- }
1866
- else
1867
- {
1868
- _Wikitext_rollback_failed_external_link(parser); // if any
1869
- if (!IN(H5_START))
1870
- {
1871
- // literal output only if not in h5 scope (we stay silent in that case)
1872
- _Wikitext_start_para_if_necessary(parser);
1873
- rb_str_cat(parser->output, literal_h5, sizeof(literal_h5) - 1);
1874
- }
1875
- }
1876
- break;
1877
-
1878
1860
  case H4_END:
1879
- if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1880
- {
1881
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1882
- rb_str_cat(parser->output, literal_h4, sizeof(literal_h4) - 1);
1883
- }
1884
- else
1885
- {
1886
- _Wikitext_rollback_failed_external_link(parser); // if any
1887
- if (!IN(H4_START))
1888
- {
1889
- // literal output only if not in h4 scope (we stay silent in that case)
1890
- _Wikitext_start_para_if_necessary(parser);
1891
- rb_str_cat(parser->output, literal_h4, sizeof(literal_h4) - 1);
1892
- }
1893
- }
1894
- break;
1895
-
1896
1861
  case H3_END:
1897
- if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1898
- {
1899
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1900
- rb_str_cat(parser->output, literal_h3, sizeof(literal_h3) - 1);
1901
- }
1902
- else
1903
- {
1904
- _Wikitext_rollback_failed_external_link(parser); // if any
1905
- if (!IN(H3_START))
1906
- {
1907
- // literal output only if not in h3 scope (we stay silent in that case)
1908
- _Wikitext_start_para_if_necessary(parser);
1909
- rb_str_cat(parser->output, literal_h3, sizeof(literal_h3) - 1);
1910
- }
1911
- }
1912
- break;
1913
-
1914
1862
  case H2_END:
1915
- if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1916
- {
1917
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1918
- rb_str_cat(parser->output, literal_h2, sizeof(literal_h2) - 1);
1919
- }
1920
- else
1921
- {
1922
- _Wikitext_rollback_failed_external_link(parser); // if any
1923
- if (!IN(H2_START))
1924
- {
1925
- // literal output only if not in h2 scope (we stay silent in that case)
1926
- _Wikitext_start_para_if_necessary(parser);
1927
- rb_str_cat(parser->output, literal_h2, sizeof(literal_h2) - 1);
1928
- }
1929
- }
1930
- break;
1931
-
1932
1863
  case H1_END:
1933
1864
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1934
1865
  {
1935
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1936
- rb_str_cat(parser->output, literal_h1, sizeof(literal_h1) - 1);
1866
+ wiki_emit_pending_crlf_if_necessary(parser);
1867
+ str_append(parser->output, token->start, TOKEN_LEN(token));
1937
1868
  }
1938
1869
  else
1939
1870
  {
1940
- _Wikitext_rollback_failed_external_link(parser); // if any
1941
- if (!IN(H1_START))
1871
+ wiki_rollback_failed_external_link(parser); // if any
1872
+ if ((type == H6_END && !IN(H6_START)) ||
1873
+ (type == H5_END && !IN(H5_START)) ||
1874
+ (type == H4_END && !IN(H4_START)) ||
1875
+ (type == H3_END && !IN(H3_START)) ||
1876
+ (type == H2_END && !IN(H2_START)) ||
1877
+ (type == H1_END && !IN(H1_START)))
1942
1878
  {
1943
- // literal output only if not in h1 scope (we stay silent in that case)
1944
- _Wikitext_start_para_if_necessary(parser);
1945
- rb_str_cat(parser->output, literal_h1, sizeof(literal_h1) - 1);
1879
+ // literal output only if not in appropriate scope (we stay silent in that case)
1880
+ wiki_start_para_if_necessary(parser);
1881
+ str_append(parser->output, token->start, TOKEN_LEN(token));
1946
1882
  }
1947
1883
  }
1948
1884
  break;
@@ -1950,18 +1886,16 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1950
1886
  case MAIL:
1951
1887
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1952
1888
  {
1953
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1954
- rb_str_cat(parser->output, token->start, TOKEN_LEN(token));
1889
+ wiki_emit_pending_crlf_if_necessary(parser);
1890
+ str_append(parser->output, token->start, TOKEN_LEN(token));
1955
1891
  }
1956
1892
  else
1957
1893
  {
1958
- // in plain scope, will turn into autolink (with appropriate, user-configurable CSS)
1959
- _Wikitext_pop_excess_elements(parser);
1960
- _Wikitext_start_para_if_necessary(parser);
1961
- i = TOKEN_TEXT(token);
1962
- if (parser->autolink == Qtrue)
1963
- i = _Wikitext_hyperlink(parser, rb_str_new2("mailto:"), i, i, mailto_class);
1964
- rb_str_append(parser->output, i);
1894
+ wiki_pop_excess_elements(parser);
1895
+ wiki_start_para_if_necessary(parser);
1896
+ token_str->ptr = token->start;
1897
+ token_str->len = TOKEN_LEN(token);
1898
+ wiki_append_hyperlink(parser, rb_str_new2("mailto:"), token_str, NULL, mailto_class, true);
1965
1899
  }
1966
1900
  break;
1967
1901
 
@@ -1969,110 +1903,93 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1969
1903
  if (IN(NO_WIKI_START))
1970
1904
  // user can temporarily suppress autolinking by using <nowiki></nowiki>
1971
1905
  // note that unlike MediaWiki, we do allow autolinking inside PRE blocks
1972
- rb_str_cat(parser->output, token->start, TOKEN_LEN(token));
1906
+ str_append(parser->output, token->start, TOKEN_LEN(token));
1973
1907
  else if (IN(LINK_START))
1974
1908
  {
1975
1909
  // if the URI were allowed it would have been handled already in LINK_START
1976
- _Wikitext_rollback_failed_link(parser);
1977
- i = TOKEN_TEXT(token);
1978
- if (parser->autolink == Qtrue)
1979
- i = _Wikitext_hyperlink(parser, Qnil, i, i, parser->external_link_class); // link target, link text
1980
- rb_str_append(parser->output, i);
1910
+ wiki_rollback_failed_internal_link(parser);
1911
+ token_str->ptr = token->start;
1912
+ token_str->len = TOKEN_LEN(token);
1913
+ wiki_append_hyperlink(parser, Qnil, token_str, NULL, parser->external_link_class, true);
1981
1914
  }
1982
1915
  else if (IN(EXT_LINK_START))
1983
1916
  {
1984
- if (NIL_P(parser->link_target))
1917
+ if (parser->link_target->len == 0)
1985
1918
  {
1986
1919
  // this must be our link target: look ahead to make sure we see the space we're expecting to see
1987
- i = TOKEN_TEXT(token);
1920
+ token_str->ptr = token->start;
1921
+ token_str->len = TOKEN_LEN(token);
1988
1922
  NEXT_TOKEN();
1989
1923
  if (token->type == SPACE)
1990
1924
  {
1991
1925
  ary_push(parser->scope, SPACE);
1992
- parser->link_target = i;
1993
- parser->link_text = rb_str_new2("");
1926
+ str_append_str(parser->link_target, token_str);
1927
+ str_clear(parser->link_text);
1994
1928
  parser->capture = parser->link_text;
1995
1929
  token = NULL; // silently consume space
1996
1930
  }
1997
1931
  else
1998
1932
  {
1999
1933
  // didn't see the space! this must be an error
2000
- _Wikitext_pop_from_stack(parser, Qnil);
2001
- _Wikitext_pop_excess_elements(parser);
2002
- _Wikitext_start_para_if_necessary(parser);
2003
- rb_str_cat(parser->output, ext_link_start, sizeof(ext_link_start) - 1);
2004
- if (parser->autolink == Qtrue)
2005
- i = _Wikitext_hyperlink(parser, Qnil, i, i, parser->external_link_class); // link target, link text
2006
- rb_str_append(parser->output, i);
1934
+ wiki_pop_from_stack(parser, NULL);
1935
+ wiki_pop_excess_elements(parser);
1936
+ wiki_start_para_if_necessary(parser);
1937
+ str_append(parser->output, ext_link_start, sizeof(ext_link_start) - 1);
1938
+ wiki_append_hyperlink(parser, Qnil, token_str, NULL, parser->external_link_class, true);
2007
1939
  }
2008
1940
  }
2009
1941
  else
2010
- {
2011
- if (NIL_P(parser->link_text))
2012
- // this must be the first part of our link text
2013
- parser->link_text = TOKEN_TEXT(token);
2014
- else
2015
- // add to existing link text
2016
- rb_str_cat(parser->link_text, token->start, TOKEN_LEN(token));
2017
- }
1942
+ str_append(parser->link_text, token->start, TOKEN_LEN(token));
2018
1943
  }
2019
1944
  else
2020
1945
  {
2021
- // in plain scope, will turn into autolink (with appropriate, user-configurable CSS)
2022
- _Wikitext_pop_excess_elements(parser);
2023
- _Wikitext_start_para_if_necessary(parser);
2024
- i = TOKEN_TEXT(token);
2025
- if (parser->autolink == Qtrue)
2026
- i = _Wikitext_hyperlink(parser, Qnil, i, i, parser->external_link_class); // link target, link text
2027
- rb_str_append(parser->output, i);
1946
+ wiki_pop_excess_elements(parser);
1947
+ wiki_start_para_if_necessary(parser);
1948
+ token_str->ptr = token->start;
1949
+ token_str->len = TOKEN_LEN(token);
1950
+ wiki_append_hyperlink(parser, Qnil, token_str, NULL, parser->external_link_class, true);
2028
1951
  }
2029
1952
  break;
2030
1953
 
2031
1954
  case PATH:
2032
1955
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
2033
- rb_str_cat(parser->output, token->start, TOKEN_LEN(token));
1956
+ str_append(parser->output, token->start, TOKEN_LEN(token));
2034
1957
  else if (IN(EXT_LINK_START))
2035
1958
  {
2036
- if (NIL_P(parser->link_target))
1959
+ if (parser->link_target->len == 0)
2037
1960
  {
2038
1961
  // this must be our link target: look ahead to make sure we see the space we're expecting to see
2039
- i = TOKEN_TEXT(token);
1962
+ token_str->ptr = token->start;
1963
+ token_str->len = TOKEN_LEN(token);
2040
1964
  NEXT_TOKEN();
2041
1965
  if (token->type == SPACE)
2042
1966
  {
2043
1967
  ary_push(parser->scope, PATH);
2044
1968
  ary_push(parser->scope, SPACE);
2045
- parser->link_target = i;
2046
- parser->link_text = rb_str_new2("");
1969
+ str_append_str(parser->link_target, token_str);
1970
+ str_clear(parser->link_text);
2047
1971
  parser->capture = parser->link_text;
2048
1972
  token = NULL; // silently consume space
2049
1973
  }
2050
1974
  else
2051
1975
  {
2052
1976
  // didn't see the space! this must be an error
2053
- _Wikitext_pop_from_stack(parser, Qnil);
2054
- _Wikitext_pop_excess_elements(parser);
2055
- _Wikitext_start_para_if_necessary(parser);
2056
- rb_str_cat(parser->output, ext_link_start, sizeof(ext_link_start) - 1);
2057
- rb_str_append(parser->output, i);
1977
+ wiki_pop_from_stack(parser, NULL);
1978
+ wiki_pop_excess_elements(parser);
1979
+ wiki_start_para_if_necessary(parser);
1980
+ str_append(parser->output, ext_link_start, sizeof(ext_link_start) - 1);
1981
+ str_append_str(parser->output, token_str);
2058
1982
  }
2059
1983
  }
2060
1984
  else
2061
- {
2062
- if (NIL_P(parser->link_text))
2063
- // this must be the first part of our link text
2064
- parser->link_text = TOKEN_TEXT(token);
2065
- else
2066
- // add to existing link text
2067
- rb_str_cat(parser->link_text, token->start, TOKEN_LEN(token));
2068
- }
1985
+ str_append(parser->link_text, token->start, TOKEN_LEN(token));
2069
1986
  }
2070
1987
  else
2071
1988
  {
2072
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
2073
- _Wikitext_pop_excess_elements(parser);
2074
- _Wikitext_start_para_if_necessary(parser);
2075
- rb_str_cat(i, token->start, TOKEN_LEN(token));
1989
+ output = parser->capture ? parser->capture : parser->output;
1990
+ wiki_pop_excess_elements(parser);
1991
+ wiki_start_para_if_necessary(parser);
1992
+ str_append(output, token->start, TOKEN_LEN(token));
2076
1993
  }
2077
1994
  break;
2078
1995
 
@@ -2099,20 +2016,20 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2099
2016
  // SPACE, SPECIAL_URI_CHARS, PRINTABLE, PATH, ALNUM, DEFAULT, QUOT and AMP
2100
2017
  // everything else will be rejected
2101
2018
  case LINK_START:
2102
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
2019
+ output = parser->capture ? parser->capture : parser->output;
2103
2020
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
2104
2021
  {
2105
- _Wikitext_emit_pending_crlf_if_necessary(parser);
2106
- rb_str_cat(i, link_start, sizeof(link_start) - 1);
2022
+ wiki_emit_pending_crlf_if_necessary(parser);
2023
+ str_append(output, link_start, sizeof(link_start) - 1);
2107
2024
  }
2108
2025
  else if (IN(EXT_LINK_START))
2109
2026
  // already in external link scope! (and in fact, must be capturing link_text right now)
2110
- rb_str_cat(i, link_start, sizeof(link_start) - 1);
2027
+ str_append(output, link_start, sizeof(link_start) - 1);
2111
2028
  else if (IN(LINK_START))
2112
2029
  {
2113
2030
  // already in internal link scope! this is a syntax error
2114
- _Wikitext_rollback_failed_link(parser);
2115
- rb_str_cat(parser->output, link_start, sizeof(link_start) - 1);
2031
+ wiki_rollback_failed_internal_link(parser);
2032
+ str_append(parser->output, link_start, sizeof(link_start) - 1);
2116
2033
  }
2117
2034
  else if (IN(SEPARATOR))
2118
2035
  {
@@ -2121,8 +2038,8 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2121
2038
  else // not in internal link scope yet
2122
2039
  {
2123
2040
  // will either emit a link, or the rollback of a failed link, so start the para now
2124
- _Wikitext_pop_excess_elements(parser);
2125
- _Wikitext_start_para_if_necessary(parser);
2041
+ wiki_pop_excess_elements(parser);
2042
+ wiki_start_para_if_necessary(parser);
2126
2043
  ary_push(parser->scope, LINK_START);
2127
2044
 
2128
2045
  // look ahead and try to gobble up link target
@@ -2144,34 +2061,34 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2144
2061
  type == RIGHT_CURLY)
2145
2062
  {
2146
2063
  // accumulate these tokens into link_target
2147
- if (NIL_P(parser->link_target))
2064
+ if (parser->link_target->len == 0)
2148
2065
  {
2149
- parser->link_target = rb_str_new2("");
2150
- parser->capture = parser->link_target;
2066
+ str_clear(parser->link_target);
2067
+ parser->capture = parser->link_target;
2151
2068
  }
2152
2069
  if (type == QUOT_ENTITY)
2153
2070
  // don't insert the entity, insert the literal quote
2154
- rb_str_cat(parser->link_target, quote, sizeof(quote) - 1);
2071
+ str_append(parser->link_target, quote, sizeof(quote) - 1);
2155
2072
  else if (type == AMP_ENTITY)
2156
2073
  // don't insert the entity, insert the literal ampersand
2157
- rb_str_cat(parser->link_target, ampersand, sizeof(ampersand) - 1);
2074
+ str_append(parser->link_target, ampersand, sizeof(ampersand) - 1);
2158
2075
  else
2159
- rb_str_cat(parser->link_target, token->start, TOKEN_LEN(token));
2076
+ str_append(parser->link_target, token->start, TOKEN_LEN(token));
2160
2077
  }
2161
2078
  else if (type == LINK_END)
2162
2079
  {
2163
- if (NIL_P(parser->link_target)) // bail for inputs like "[[]]"
2164
- _Wikitext_rollback_failed_link(parser);
2080
+ if (parser->link_target->len == 0) // bail for inputs like "[[]]"
2081
+ wiki_rollback_failed_internal_link(parser);
2165
2082
  break; // jump back to top of loop (will handle this in LINK_END case below)
2166
2083
  }
2167
2084
  else if (type == SEPARATOR)
2168
2085
  {
2169
- if (NIL_P(parser->link_target)) // bail for inputs like "[[|"
2170
- _Wikitext_rollback_failed_link(parser);
2086
+ if (parser->link_target->len == 0) // bail for inputs like "[[|"
2087
+ wiki_rollback_failed_internal_link(parser);
2171
2088
  else
2172
2089
  {
2173
2090
  ary_push(parser->scope, SEPARATOR);
2174
- parser->link_text = rb_str_new2("");
2091
+ str_clear(parser->link_text);
2175
2092
  parser->capture = parser->link_text;
2176
2093
  token = NULL;
2177
2094
  }
@@ -2179,7 +2096,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2179
2096
  }
2180
2097
  else // unexpected token (syntax error)
2181
2098
  {
2182
- _Wikitext_rollback_failed_link(parser);
2099
+ wiki_rollback_failed_internal_link(parser);
2183
2100
  break; // jump back to top of loop to handle unexpected token
2184
2101
  }
2185
2102
  }
@@ -2190,42 +2107,45 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2190
2107
  break;
2191
2108
 
2192
2109
  case LINK_END:
2193
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
2110
+ output = parser->capture ? parser->capture : parser->output;
2194
2111
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
2195
2112
  {
2196
- _Wikitext_emit_pending_crlf_if_necessary(parser);
2197
- rb_str_cat(i, link_end, sizeof(link_end) - 1);
2113
+ wiki_emit_pending_crlf_if_necessary(parser);
2114
+ str_append(output, link_end, sizeof(link_end) - 1);
2198
2115
  }
2199
2116
  else if (IN(EXT_LINK_START))
2200
2117
  // already in external link scope! (and in fact, must be capturing link_text right now)
2201
- rb_str_cat(i, link_end, sizeof(link_end) - 1);
2118
+ str_append(output, link_end, sizeof(link_end) - 1);
2202
2119
  else if (IN(LINK_START)) // in internal link scope!
2203
2120
  {
2204
- if (_Wikitext_blank(parser->link_target))
2121
+ if (wiki_blank(parser->link_target))
2205
2122
  {
2206
2123
  // special case for inputs like "[[ ]]"
2207
- _Wikitext_rollback_failed_link(parser);
2208
- rb_str_cat(parser->output, link_end, sizeof(link_end) - 1);
2124
+ wiki_rollback_failed_internal_link(parser);
2125
+ str_append(parser->output, link_end, sizeof(link_end) - 1);
2209
2126
  break;
2210
2127
  }
2211
- if (NIL_P(parser->link_text) || RSTRING_LEN(parser->link_text) == 0)
2128
+ if (parser->link_text->len == 0 ||
2129
+ wiki_blank(parser->link_text))
2130
+ {
2212
2131
  // use link target as link text
2213
- parser->link_text = _Wikitext_parser_sanitize_link_target(parser, Qfalse);
2132
+ str_clear(parser->link_text);
2133
+ wiki_append_sanitized_link_target(parser, parser->link_text, true);
2134
+ }
2214
2135
  else
2215
- parser->link_text = _Wikitext_parser_trim_link_target(parser->link_text);
2216
- _Wikitext_parser_encode_link_target(parser);
2217
- _Wikitext_pop_from_stack_up_to(parser, i, LINK_START, Qtrue);
2218
- parser->capture = Qnil;
2219
- i = _Wikitext_hyperlink(parser, prefix, parser->link_target, parser->link_text, Qnil);
2220
- rb_str_append(parser->output, i);
2221
- parser->link_target = Qnil;
2222
- parser->link_text = Qnil;
2136
+ wiki_trim_link_text(parser);
2137
+ wiki_encode_link_target(parser);
2138
+ wiki_pop_from_stack_up_to(parser, output, LINK_START, true);
2139
+ parser->capture = NULL;
2140
+ wiki_append_hyperlink(parser, prefix, parser->link_target, parser->link_text, Qnil, false);
2141
+ str_clear(parser->link_target);
2142
+ str_clear(parser->link_text);
2223
2143
  }
2224
2144
  else // wasn't in internal link scope
2225
2145
  {
2226
- _Wikitext_pop_excess_elements(parser);
2227
- _Wikitext_start_para_if_necessary(parser);
2228
- rb_str_cat(i, link_end, sizeof(link_end) - 1);
2146
+ wiki_pop_excess_elements(parser);
2147
+ wiki_start_para_if_necessary(parser);
2148
+ str_append(output, link_end, sizeof(link_end) - 1);
2229
2149
  }
2230
2150
  break;
2231
2151
 
@@ -2235,41 +2155,28 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2235
2155
  // strings in square brackets which don't match this syntax get passed through literally; eg:
2236
2156
  // he was very angery [sic] about the turn of events
2237
2157
  case EXT_LINK_START:
2238
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
2158
+ output = parser->capture ? parser->capture : parser->output;
2239
2159
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
2240
2160
  {
2241
- _Wikitext_emit_pending_crlf_if_necessary(parser);
2242
- rb_str_cat(i, ext_link_start, sizeof(ext_link_start) - 1);
2161
+ wiki_emit_pending_crlf_if_necessary(parser);
2162
+ str_append(output, ext_link_start, sizeof(ext_link_start) - 1);
2243
2163
  }
2244
2164
  else if (IN(EXT_LINK_START))
2245
2165
  // already in external link scope! (and in fact, must be capturing link_text right now)
2246
- rb_str_cat(i, ext_link_start, sizeof(ext_link_start) - 1);
2166
+ str_append(output, ext_link_start, sizeof(ext_link_start) - 1);
2247
2167
  else if (IN(LINK_START))
2248
2168
  {
2249
2169
  // already in internal link scope!
2250
- i = rb_str_new(ext_link_start, sizeof(ext_link_start) - 1);
2251
- if (NIL_P(parser->link_target))
2252
- // this must be the first character of our link target
2253
- parser->link_target = i;
2254
- else if (IN(SPACE))
2255
- {
2256
- // link target has already been scanned
2257
- if (NIL_P(parser->link_text))
2258
- // this must be the first character of our link text
2259
- parser->link_text = i;
2260
- else
2261
- // add to existing link text
2262
- rb_str_append(parser->link_text, i);
2263
- }
2264
- else
2265
- // add to existing link target
2266
- rb_str_append(parser->link_target, i);
2170
+ if (parser->link_target->len == 0 || !IN(SPACE))
2171
+ str_append(parser->link_target, ext_link_start, sizeof(ext_link_start) - 1);
2172
+ else // link target has already been scanned
2173
+ str_append(parser->link_text, ext_link_start, sizeof(ext_link_start) - 1);
2267
2174
  }
2268
2175
  else // not in external link scope yet
2269
2176
  {
2270
2177
  // will either emit a link, or the rollback of a failed link, so start the para now
2271
- _Wikitext_pop_excess_elements(parser);
2272
- _Wikitext_start_para_if_necessary(parser);
2178
+ wiki_pop_excess_elements(parser);
2179
+ wiki_start_para_if_necessary(parser);
2273
2180
 
2274
2181
  // look ahead: expect an absolute URI (with protocol) or "relative" (path) URI
2275
2182
  NEXT_TOKEN();
@@ -2277,56 +2184,55 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2277
2184
  ary_push(parser->scope, EXT_LINK_START); // so far so good, jump back to the top of the loop
2278
2185
  else
2279
2186
  // only get here if there was a syntax error (missing URI)
2280
- rb_str_cat(parser->output, ext_link_start, sizeof(ext_link_start) - 1);
2187
+ str_append(parser->output, ext_link_start, sizeof(ext_link_start) - 1);
2281
2188
  continue; // jump back to top of loop to handle token (either URI or whatever it is)
2282
2189
  }
2283
2190
  break;
2284
2191
 
2285
2192
  case EXT_LINK_END:
2286
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
2193
+ output = parser->capture ? parser->capture : parser->output;
2287
2194
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
2288
2195
  {
2289
- _Wikitext_emit_pending_crlf_if_necessary(parser);
2290
- rb_str_cat(i, ext_link_end, sizeof(ext_link_end) - 1);
2196
+ wiki_emit_pending_crlf_if_necessary(parser);
2197
+ str_append(output, ext_link_end, sizeof(ext_link_end) - 1);
2291
2198
  }
2292
2199
  else if (IN(EXT_LINK_START))
2293
2200
  {
2294
- if (NIL_P(parser->link_text))
2201
+ if (parser->link_text->len == 0)
2295
2202
  // syntax error: external link with no link text
2296
- _Wikitext_rollback_failed_external_link(parser);
2203
+ wiki_rollback_failed_external_link(parser);
2297
2204
  else
2298
2205
  {
2299
2206
  // success!
2300
2207
  j = IN(PATH) ? Qnil : parser->external_link_class;
2301
- _Wikitext_pop_from_stack_up_to(parser, i, EXT_LINK_START, Qtrue);
2302
- parser->capture = Qnil;
2303
- i = _Wikitext_hyperlink(parser, Qnil, parser->link_target, parser->link_text, j);
2304
- rb_str_append(parser->output, i);
2208
+ wiki_pop_from_stack_up_to(parser, output, EXT_LINK_START, true);
2209
+ parser->capture = NULL;
2210
+ wiki_append_hyperlink(parser, Qnil, parser->link_target, parser->link_text, j, false);
2305
2211
  }
2306
- parser->link_target = Qnil;
2307
- parser->link_text = Qnil;
2212
+ str_clear(parser->link_target);
2213
+ str_clear(parser->link_text);
2308
2214
  }
2309
2215
  else
2310
2216
  {
2311
- _Wikitext_pop_excess_elements(parser);
2312
- _Wikitext_start_para_if_necessary(parser);
2313
- rb_str_cat(parser->output, ext_link_end, sizeof(ext_link_end) - 1);
2217
+ wiki_pop_excess_elements(parser);
2218
+ wiki_start_para_if_necessary(parser);
2219
+ str_append(parser->output, ext_link_end, sizeof(ext_link_end) - 1);
2314
2220
  }
2315
2221
  break;
2316
2222
 
2317
2223
  case SEPARATOR:
2318
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
2319
- _Wikitext_pop_excess_elements(parser);
2320
- _Wikitext_start_para_if_necessary(parser);
2321
- rb_str_cat(i, separator, sizeof(separator) - 1);
2224
+ output = parser->capture ? parser->capture : parser->output;
2225
+ wiki_pop_excess_elements(parser);
2226
+ wiki_start_para_if_necessary(parser);
2227
+ str_append(output, separator, sizeof(separator) - 1);
2322
2228
  break;
2323
2229
 
2324
2230
  case SPACE:
2325
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
2231
+ output = parser->capture ? parser->capture : parser->output;
2326
2232
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
2327
2233
  {
2328
- _Wikitext_emit_pending_crlf_if_necessary(parser);
2329
- rb_str_cat(i, token->start, TOKEN_LEN(token));
2234
+ wiki_emit_pending_crlf_if_necessary(parser);
2235
+ str_append(output, token->start, TOKEN_LEN(token));
2330
2236
  }
2331
2237
  else
2332
2238
  {
@@ -2335,21 +2241,21 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2335
2241
  int token_len = TOKEN_LEN(token);
2336
2242
  NEXT_TOKEN();
2337
2243
  type = token->type;
2338
- if (((type == H6_END) && IN(H6_START)) ||
2339
- ((type == H5_END) && IN(H5_START)) ||
2340
- ((type == H4_END) && IN(H4_START)) ||
2341
- ((type == H3_END) && IN(H3_START)) ||
2342
- ((type == H2_END) && IN(H2_START)) ||
2343
- ((type == H1_END) && IN(H1_START)))
2244
+ if ((type == H6_END && IN(H6_START)) ||
2245
+ (type == H5_END && IN(H5_START)) ||
2246
+ (type == H4_END && IN(H4_START)) ||
2247
+ (type == H3_END && IN(H3_START)) ||
2248
+ (type == H2_END && IN(H2_START)) ||
2249
+ (type == H1_END && IN(H1_START)))
2344
2250
  {
2345
2251
  // will suppress emission of space (discard) if next token is a H6_END, H5_END etc and we are in the corresponding scope
2346
2252
  }
2347
2253
  else
2348
2254
  {
2349
2255
  // emit the space
2350
- _Wikitext_pop_excess_elements(parser);
2351
- _Wikitext_start_para_if_necessary(parser);
2352
- rb_str_cat(i, token_ptr, token_len);
2256
+ wiki_pop_excess_elements(parser);
2257
+ wiki_start_para_if_necessary(parser);
2258
+ str_append(output, token_ptr, token_len);
2353
2259
  }
2354
2260
 
2355
2261
  // jump to top of the loop to process token we scanned during lookahead
@@ -2362,101 +2268,100 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2362
2268
  case NAMED_ENTITY:
2363
2269
  case DECIMAL_ENTITY:
2364
2270
  // pass these through unaltered as they are case sensitive
2365
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
2366
- _Wikitext_pop_excess_elements(parser);
2367
- _Wikitext_start_para_if_necessary(parser);
2368
- rb_str_cat(i, token->start, TOKEN_LEN(token));
2271
+ output = parser->capture ? parser->capture : parser->output;
2272
+ wiki_pop_excess_elements(parser);
2273
+ wiki_start_para_if_necessary(parser);
2274
+ str_append(output, token->start, TOKEN_LEN(token));
2369
2275
  break;
2370
2276
 
2371
2277
  case HEX_ENTITY:
2372
2278
  // normalize hex entities (downcase them)
2373
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
2374
- _Wikitext_pop_excess_elements(parser);
2375
- _Wikitext_start_para_if_necessary(parser);
2376
- rb_str_append(i, _Wikitext_downcase(TOKEN_TEXT(token)));
2279
+ output = parser->capture ? parser->capture : parser->output;
2280
+ wiki_pop_excess_elements(parser);
2281
+ wiki_start_para_if_necessary(parser);
2282
+ str_append(output, token->start, TOKEN_LEN(token));
2283
+ wiki_downcase_bang(output->ptr + output->len - TOKEN_LEN(token), TOKEN_LEN(token));
2377
2284
  break;
2378
2285
 
2379
2286
  case QUOT:
2380
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
2381
- _Wikitext_pop_excess_elements(parser);
2382
- _Wikitext_start_para_if_necessary(parser);
2383
- rb_str_cat(i, quot_entity, sizeof(quot_entity) - 1);
2287
+ output = parser->capture ? parser->capture : parser->output;
2288
+ wiki_pop_excess_elements(parser);
2289
+ wiki_start_para_if_necessary(parser);
2290
+ str_append(output, quot_entity, sizeof(quot_entity) - 1);
2384
2291
  break;
2385
2292
 
2386
2293
  case AMP:
2387
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
2388
- _Wikitext_pop_excess_elements(parser);
2389
- _Wikitext_start_para_if_necessary(parser);
2390
- rb_str_cat(i, amp_entity, sizeof(amp_entity) - 1);
2294
+ output = parser->capture ? parser->capture : parser->output;
2295
+ wiki_pop_excess_elements(parser);
2296
+ wiki_start_para_if_necessary(parser);
2297
+ str_append(output, amp_entity, sizeof(amp_entity) - 1);
2391
2298
  break;
2392
2299
 
2393
2300
  case LESS:
2394
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
2395
- _Wikitext_pop_excess_elements(parser);
2396
- _Wikitext_start_para_if_necessary(parser);
2397
- rb_str_cat(i, lt_entity, sizeof(lt_entity) - 1);
2301
+ output = parser->capture ? parser->capture : parser->output;
2302
+ wiki_pop_excess_elements(parser);
2303
+ wiki_start_para_if_necessary(parser);
2304
+ str_append(output, lt_entity, sizeof(lt_entity) - 1);
2398
2305
  break;
2399
2306
 
2400
2307
  case GREATER:
2401
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
2402
- _Wikitext_pop_excess_elements(parser);
2403
- _Wikitext_start_para_if_necessary(parser);
2404
- rb_str_cat(i, gt_entity, sizeof(gt_entity) - 1);
2308
+ output = parser->capture ? parser->capture : parser->output;
2309
+ wiki_pop_excess_elements(parser);
2310
+ wiki_start_para_if_necessary(parser);
2311
+ str_append(output, gt_entity, sizeof(gt_entity) - 1);
2405
2312
  break;
2406
2313
 
2407
2314
  case IMG_START:
2408
2315
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
2409
2316
  {
2410
- _Wikitext_emit_pending_crlf_if_necessary(parser);
2411
- rb_str_cat(parser->output, token->start, TOKEN_LEN(token));
2317
+ wiki_emit_pending_crlf_if_necessary(parser);
2318
+ str_append(parser->output, token->start, TOKEN_LEN(token));
2412
2319
  }
2413
- else if (!NIL_P(parser->capture))
2414
- rb_str_cat(parser->capture, token->start, TOKEN_LEN(token));
2320
+ else if (parser->capture)
2321
+ str_append(parser->capture, token->start, TOKEN_LEN(token));
2415
2322
  else
2416
2323
  {
2417
2324
  // not currently capturing: will be emitting something on success or failure, so get ready
2418
- _Wikitext_pop_excess_elements(parser);
2419
- _Wikitext_start_para_if_necessary(parser);
2325
+ wiki_pop_excess_elements(parser);
2326
+ wiki_start_para_if_necessary(parser);
2420
2327
 
2421
2328
  // scan ahead consuming PATH, PRINTABLE, ALNUM and SPECIAL_URI_CHARS tokens
2422
2329
  // will cheat here and abuse the link_target capture buffer to accumulate text
2423
- if (NIL_P(parser->link_target))
2424
- parser->link_target = rb_str_new2("");
2425
2330
  while (NEXT_TOKEN(), (type = token->type))
2426
2331
  {
2427
2332
  if (type == PATH || type == PRINTABLE || type == ALNUM || type == SPECIAL_URI_CHARS)
2428
- rb_str_cat(parser->link_target, token->start, TOKEN_LEN(token));
2429
- else if (type == IMG_END && RSTRING_LEN(parser->link_target) > 0)
2333
+ str_append(parser->link_target, token->start, TOKEN_LEN(token));
2334
+ else if (type == IMG_END && parser->link_target->len > 0)
2430
2335
  {
2431
2336
  // success
2432
- _Wikitext_append_img(parser, RSTRING_PTR(parser->link_target), RSTRING_LEN(parser->link_target));
2337
+ wiki_append_img(parser, parser->link_target->ptr, parser->link_target->len);
2433
2338
  token = NULL;
2434
2339
  break;
2435
2340
  }
2436
2341
  else // unexpected token or zero-length target (syntax error)
2437
2342
  {
2438
2343
  // rollback
2439
- rb_str_cat(parser->output, literal_img_start, sizeof(literal_img_start) - 1);
2440
- rb_str_cat(parser->output, RSTRING_PTR(parser->link_target), RSTRING_LEN(parser->link_target));
2344
+ str_append(parser->output, literal_img_start, sizeof(literal_img_start) - 1);
2345
+ if (parser->link_target->len > 0)
2346
+ str_append(parser->output, parser->link_target->ptr, parser->link_target->len);
2441
2347
  break;
2442
2348
  }
2443
2349
  }
2444
2350
 
2445
2351
  // jump to top of the loop to process token we scanned during lookahead
2446
- parser->link_target = Qnil;
2352
+ str_clear(parser->link_target);
2447
2353
  continue;
2448
2354
  }
2449
2355
  break;
2450
2356
 
2451
2357
  case CRLF:
2452
2358
  i = parser->pending_crlf;
2453
- parser->pending_crlf = Qfalse;
2454
- _Wikitext_rollback_failed_link(parser); // if any
2455
- _Wikitext_rollback_failed_external_link(parser); // if any
2359
+ parser->pending_crlf = false;
2360
+ wiki_rollback_failed_link(parser); // if any
2456
2361
  if (IN(NO_WIKI_START) || IN(PRE_START))
2457
2362
  {
2458
2363
  ary_clear(parser->line_buffer);
2459
- rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
2364
+ str_append_str(parser->output, parser->line_ending);
2460
2365
  break;
2461
2366
  }
2462
2367
  else if (IN(PRE))
@@ -2464,14 +2369,14 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2464
2369
  // beware when BLOCKQUOTE on line buffer (not line stack!) prior to CRLF, that must be end of PRE block
2465
2370
  if (ary_entry(parser->line_buffer, -2) == BLOCKQUOTE)
2466
2371
  // don't emit in this case
2467
- _Wikitext_pop_from_stack_up_to(parser, parser->output, PRE, Qtrue);
2372
+ wiki_pop_from_stack_up_to(parser, parser->output, PRE, true);
2468
2373
  else
2469
2374
  {
2470
2375
  if (ary_entry(parser->line_buffer, -2) == PRE)
2471
2376
  {
2472
2377
  // only thing on line is the PRE: emit pending line ending (if we had one)
2473
- if (i == Qtrue)
2474
- rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
2378
+ if (i)
2379
+ str_append_str(parser->output, parser->line_ending);
2475
2380
  }
2476
2381
 
2477
2382
  // clear these _before_ calling NEXT_TOKEN (NEXT_TOKEN adds to the line_buffer)
@@ -2483,17 +2388,17 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2483
2388
  type = token->type;
2484
2389
  if (type != BLOCKQUOTE && type != PRE)
2485
2390
  // this is definitely the end of the block, so don't emit
2486
- _Wikitext_pop_from_stack_up_to(parser, parser->output, PRE, Qtrue);
2391
+ wiki_pop_from_stack_up_to(parser, parser->output, PRE, true);
2487
2392
  else
2488
2393
  // potentially will emit
2489
- parser->pending_crlf = Qtrue;
2394
+ parser->pending_crlf = true;
2490
2395
 
2491
2396
  continue; // jump back to top of loop to handle token grabbed via lookahead
2492
2397
  }
2493
2398
  }
2494
2399
  else
2495
2400
  {
2496
- parser->pending_crlf = Qtrue;
2401
+ parser->pending_crlf = true;
2497
2402
 
2498
2403
  // count number of BLOCKQUOTE tokens in line buffer (can be zero) and pop back to that level
2499
2404
  // as a side effect, this handles any open span-level elements and unclosed blocks
@@ -2503,7 +2408,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2503
2408
  {
2504
2409
  if (parser->scope->count > 0 && ary_entry(parser->scope, -1) == LI)
2505
2410
  {
2506
- parser->pending_crlf = Qfalse;
2411
+ parser->pending_crlf = false;
2507
2412
  break;
2508
2413
  }
2509
2414
 
@@ -2516,12 +2421,12 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2516
2421
  if (NO_ITEM(ary_entry(parser->line_buffer, -2)) ||
2517
2422
  (ary_entry(parser->line_buffer, -2) == BLOCKQUOTE && !IN(BLOCKQUOTE_START)))
2518
2423
  // paragraph break
2519
- parser->pending_crlf = Qfalse;
2424
+ parser->pending_crlf = false;
2520
2425
  else
2521
2426
  // not a paragraph break!
2522
2427
  continue;
2523
2428
  }
2524
- _Wikitext_pop_from_stack(parser, Qnil);
2429
+ wiki_pop_from_stack(parser, NULL);
2525
2430
  }
2526
2431
  }
2527
2432
 
@@ -2536,31 +2441,29 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2536
2441
  case IMG_END:
2537
2442
  case LEFT_CURLY:
2538
2443
  case RIGHT_CURLY:
2539
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
2540
- _Wikitext_pop_excess_elements(parser);
2541
- _Wikitext_start_para_if_necessary(parser);
2542
- rb_str_cat(i, token->start, TOKEN_LEN(token));
2444
+ output = parser->capture ? parser->capture : parser->output;
2445
+ wiki_pop_excess_elements(parser);
2446
+ wiki_start_para_if_necessary(parser);
2447
+ str_append(output, token->start, TOKEN_LEN(token));
2543
2448
  break;
2544
2449
 
2545
2450
  case DEFAULT:
2546
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
2547
- _Wikitext_pop_excess_elements(parser);
2548
- _Wikitext_start_para_if_necessary(parser);
2549
- rb_str_append(i, _Wikitext_utf32_char_to_entity(token->code_point)); // convert to entity
2451
+ output = parser->capture ? parser->capture : parser->output;
2452
+ wiki_pop_excess_elements(parser);
2453
+ wiki_start_para_if_necessary(parser);
2454
+ wiki_append_entity_from_utf32_char(output, token->code_point);
2550
2455
  break;
2551
2456
 
2552
2457
  case END_OF_FILE:
2553
2458
  // special case for input like " foo\n " (see pre_spec.rb)
2554
2459
  if (IN(PRE) &&
2555
2460
  ary_entry(parser->line_buffer, -2) == PRE &&
2556
- parser->pending_crlf == Qtrue)
2557
- rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
2461
+ parser->pending_crlf)
2462
+ str_append(parser->output, parser->line_ending->ptr, parser->line_ending->len);
2558
2463
 
2559
2464
  // close any open scopes on hitting EOF
2560
- _Wikitext_rollback_failed_external_link(parser); // if any
2561
- _Wikitext_rollback_failed_link(parser); // if any
2562
- for (i = 0, j = parser->scope->count; i < j; i++)
2563
- _Wikitext_pop_from_stack(parser, Qnil);
2465
+ wiki_rollback_failed_link(parser); // if any
2466
+ wiki_pop_all_from_stack(parser);
2564
2467
  goto return_output; // break not enough here (want to break out of outer while loop, not inner switch statement)
2565
2468
 
2566
2469
  default:
@@ -2571,5 +2474,25 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2571
2474
  token = NULL;
2572
2475
  } while (1);
2573
2476
  return_output:
2574
- return parser->output;
2477
+ // nasty hack to avoid re-allocating our return value
2478
+ str_append(parser->output, null_str, 1); // null-terminate
2479
+ len = parser->output->len - 1; // don't count null termination
2480
+
2481
+ #if defined(RUBY_1_9_x)
2482
+ VALUE out = rb_str_buf_new(RSTRING_EMBED_LEN_MAX + 1);
2483
+ free(RSTRING_PTR(out));
2484
+ RSTRING(out)->as.heap.aux.capa = len;
2485
+ RSTRING(out)->as.heap.ptr = parser->output->ptr;
2486
+ RSTRING(out)->as.heap.len = len;
2487
+ #elif defined(RUBY_1_8_x)
2488
+ VALUE out = rb_str_new2("");
2489
+ free(RSTRING_PTR(out));
2490
+ RSTRING(out)->len = len;
2491
+ RSTRING(out)->aux.capa = len;
2492
+ RSTRING(out)->ptr = parser->output->ptr;
2493
+ #else
2494
+ #error unsupported RUBY_VERSION
2495
+ #endif
2496
+ parser->output->ptr = NULL; // don't double-free
2497
+ return out;
2575
2498
  }