wikitext 1.6 → 1.7

Sign up to get free protection for your applications and to get access to all the features.
data/ext/ary.h CHANGED
@@ -35,10 +35,6 @@ typedef struct
35
35
 
36
36
  #define NO_ITEM(item) (item == INT_MAX)
37
37
 
38
- // Mark the ary struct designated by ptr as a participant in Ruby's mark-and-sweep garbage collection scheme.
39
- // A variable named name is placed on the C stack to prevent the structure from being prematurely collected.
40
- #define GC_WRAP_ARY(ptr, name) volatile VALUE name __attribute__((unused)) = Data_Wrap_Struct(rb_cObject, 0, ary_free, ptr)
41
-
42
38
  ary_t *ary_new(void);
43
39
  int ary_entry(ary_t *ary, int idx);
44
40
  void ary_clear(ary_t *ary);
@@ -47,8 +43,6 @@ void ary_push(ary_t *ary, int val);
47
43
  int ary_includes(ary_t *ary, int val);
48
44
 
49
45
  // returns a count indicating the number of times the value appears in the collection
50
- // refactored from _Wikitext_count()
51
46
  int ary_count(ary_t *ary, int item);
52
47
 
53
- // this method not inlined so its address can be passed to the Data_Wrap_Struct function.
54
48
  void ary_free(ary_t *ary);
data/ext/extconf.rb CHANGED
@@ -28,5 +28,14 @@ def missing item
28
28
  exit 1
29
29
  end
30
30
 
31
+ case RUBY_VERSION
32
+ when /\A1\.8/
33
+ $CFLAGS += ' -DRUBY_1_8_x'
34
+ when /\A1\.9/
35
+ $CFLAGS += ' -DRUBY_1_9_x'
36
+ else
37
+ raise "unsupported Ruby version: #{RUBY_VERSION}"
38
+ end
39
+
31
40
  have_header('ruby.h') or missing 'ruby.h'
32
41
  create_makefile('wikitext')
data/ext/parser.c CHANGED
@@ -21,6 +21,8 @@
21
21
  // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
22
22
  // POSSIBILITY OF SUCH DAMAGE.
23
23
 
24
+ #include <stdbool.h>
25
+
24
26
  #include "parser.h"
25
27
  #include "ary.h"
26
28
  #include "str.h"
@@ -32,29 +34,29 @@
32
34
  // poor man's object orientation in C:
33
35
  // instead of parsing around multiple parameters between functions in the parser
34
36
  // we pack everything into a struct and pass around only a pointer to that
35
- // TODO: consider changing some of the VALUE members (eg link_target) to the more efficient str_t type
36
37
  typedef struct
37
38
  {
38
- VALUE output; // for accumulating output to be returned
39
- VALUE capture; // for capturing substrings
40
- VALUE link_target; // short term "memory" for parsing links
41
- VALUE link_text; // short term "memory" for parsing links
42
- VALUE external_link_class; // CSS class applied to external links
43
- VALUE mailto_class; // CSS class applied to email (mailto) links
44
- VALUE img_prefix; // path prepended when emitting img tags
39
+ str_t *capture; // capturing to link_target, link_text, or NULL (direct to output, not capturing)
40
+ str_t *output; // for accumulating output to be returned
41
+ str_t *link_target; // short term "memory" for parsing links
42
+ str_t *link_text; // short term "memory" for parsing links
43
+ str_t *line_ending;
44
+ str_t *tabulation; // caching buffer for emitting indentation
45
45
  ary_t *scope; // stack for tracking scope
46
46
  ary_t *line; // stack for tracking scope as implied by current line
47
47
  ary_t *line_buffer; // stack for tracking raw tokens (not scope) on current line
48
- VALUE pending_crlf; // boolean (Qtrue or Qfalse)
49
- VALUE autolink; // boolean (Qtrue or Qfalse)
50
- VALUE space_to_underscore; // boolean (Qtrue or Qfalse)
51
- str_t *line_ending;
48
+ VALUE external_link_class; // CSS class applied to external links
49
+ VALUE mailto_class; // CSS class applied to email (mailto) links
50
+ VALUE img_prefix; // path prepended when emitting img tags
52
51
  int base_indent; // controlled by the :indent option to Wikitext::Parser#parse
53
52
  int current_indent; // fluctuates according to currently nested structures
54
- str_t *tabulation; // caching buffer for emitting indentation
55
53
  int base_heading_level;
54
+ bool pending_crlf;
55
+ bool autolink;
56
+ bool space_to_underscore;
56
57
  } parser_t;
57
58
 
59
+ const char null_str[] = { 0 };
58
60
  const char escaped_no_wiki_start[] = "&lt;nowiki&gt;";
59
61
  const char escaped_no_wiki_end[] = "&lt;/nowiki&gt;";
60
62
  const char literal_strong_em[] = "'''''";
@@ -66,12 +68,6 @@ const char escaped_strong_start[] = "&lt;strong&gt;";
66
68
  const char escaped_strong_end[] = "&lt;/strong&gt;";
67
69
  const char escaped_tt_start[] = "&lt;tt&gt;";
68
70
  const char escaped_tt_end[] = "&lt;/tt&gt;";
69
- const char literal_h6[] = "======";
70
- const char literal_h5[] = "=====";
71
- const char literal_h4[] = "====";
72
- const char literal_h3[] = "===";
73
- const char literal_h2[] = "==";
74
- const char literal_h1[] = "=";
75
71
  const char pre_start[] = "<pre>";
76
72
  const char pre_end[] = "</pre>";
77
73
  const char escaped_pre_start[] = "&lt;pre&gt;";
@@ -130,6 +126,49 @@ const char img_start[] = "<img src=\"";
130
126
  const char img_end[] = "\" />";
131
127
  const char img_alt[] = "\" alt=\"";
132
128
 
129
+ // Mark the parser struct designated by ptr as a participant in Ruby's
130
+ // mark-and-sweep garbage collection scheme. A variable named name is placed on
131
+ // the C stack to prevent the structure from being prematurely collected.
132
+ #define GC_WRAP_PARSER(ptr, name) volatile VALUE name __attribute__((unused)) = Data_Wrap_Struct(rb_cObject, 0, parser_free, ptr)
133
+
134
+ parser_t *parser_new(void)
135
+ {
136
+ parser_t *parser = ALLOC_N(parser_t, 1);
137
+ parser->capture = NULL; // not a real instance, pointer to other member's instance
138
+ parser->output = str_new();
139
+ parser->link_target = str_new();
140
+ parser->link_text = str_new();
141
+ parser->line_ending = NULL; // caller should set up
142
+ parser->tabulation = str_new();
143
+ parser->scope = ary_new();
144
+ parser->line = ary_new();
145
+ parser->line_buffer = ary_new();
146
+ parser->external_link_class = Qnil; // caller should set up
147
+ parser->mailto_class = Qnil; // caller should set up
148
+ parser->img_prefix = Qnil; // caller should set up
149
+ parser->base_indent = 0;
150
+ parser->current_indent = 0;
151
+ parser->base_heading_level = 0;
152
+ parser->pending_crlf = false;
153
+ parser->autolink = true;
154
+ parser->space_to_underscore = true;
155
+ return parser;
156
+ }
157
+
158
+ void parser_free(parser_t *parser)
159
+ {
160
+ // we don't free parser->capture; it's just a redundant pointer
161
+ if (parser->output) str_free(parser->output);
162
+ if (parser->link_target) str_free(parser->link_target);
163
+ if (parser->link_text) str_free(parser->link_text);
164
+ if (parser->line_ending) str_free(parser->line_ending);
165
+ if (parser->tabulation) str_free(parser->tabulation);
166
+ if (parser->scope) ary_free(parser->scope);
167
+ if (parser->line) ary_free(parser->line);
168
+ if (parser->line_buffer) ary_free(parser->line_buffer);
169
+ free(parser);
170
+ }
171
+
133
172
  // for testing and debugging only
134
173
  VALUE Wikitext_parser_tokenize(VALUE self, VALUE string)
135
174
  {
@@ -142,11 +181,11 @@ VALUE Wikitext_parser_tokenize(VALUE self, VALUE string)
142
181
  char *pe = p + len;
143
182
  token_t token;
144
183
  next_token(&token, NULL, p, pe);
145
- rb_ary_push(tokens, _Wikitext_token(&token));
184
+ rb_ary_push(tokens, wiki_token(&token));
146
185
  while (token.type != END_OF_FILE)
147
186
  {
148
187
  next_token(&token, &token, NULL, pe);
149
- rb_ary_push(tokens, _Wikitext_token(&token));
188
+ rb_ary_push(tokens, wiki_token(&token));
150
189
  }
151
190
  return tokens;
152
191
  }
@@ -217,59 +256,66 @@ VALUE Wikitext_parser_fulltext_tokenize(int argc, VALUE *argv, VALUE self)
217
256
  return tokens;
218
257
  }
219
258
 
220
- // we downcase "in place", overwriting the original contents of the buffer and returning the same string
221
- VALUE _Wikitext_downcase(VALUE string)
259
+ // we downcase "in place", overwriting the original contents of the buffer
260
+ void wiki_downcase_bang(char *ptr, long len)
222
261
  {
223
- char *ptr = RSTRING_PTR(string);
224
- long len = RSTRING_LEN(string);
225
262
  for (long i = 0; i < len; i++)
226
263
  {
227
264
  if (ptr[i] >= 'A' && ptr[i] <= 'Z')
228
265
  ptr[i] += 32;
229
266
  }
230
- return string;
231
267
  }
232
268
 
233
- VALUE _Wikitext_hyperlink(parser_t *parser, VALUE link_prefix, VALUE link_target, VALUE link_text, VALUE link_class)
269
+ // prepare hyperlink and append it to parser->output
270
+ // if check_autolink is true, checks parser->autolink to decide whether to emit a real hyperlink
271
+ // or merely the literal link target
272
+ // if link_text is Qnil, the link_target is re-used for the link text
273
+ void wiki_append_hyperlink(parser_t *parser, VALUE link_prefix, str_t *link_target, str_t *link_text, VALUE link_class, bool check_autolink)
234
274
  {
235
- VALUE string = rb_str_new(a_start, sizeof(a_start) - 1); // <a href="
236
- if (!NIL_P(link_prefix))
237
- rb_str_append(string, link_prefix);
238
- rb_str_append(string, link_target);
239
-
240
- // special handling for mailto URIs
241
- const char *mailto = "mailto:";
242
- if (NIL_P(link_prefix) &&
243
- RSTRING_LEN(link_target) >= (long)sizeof(mailto) &&
244
- strncmp(mailto, RSTRING_PTR(link_target), sizeof(mailto)) == 0)
245
- link_class = parser->mailto_class; // use mailto_class from parser
246
-
247
- if (link_class != Qnil)
275
+ if (check_autolink && !parser->autolink)
276
+ str_append_str(parser->output, link_target);
277
+ else
248
278
  {
249
- rb_str_cat(string, a_class, sizeof(a_class) - 1); // " class="
250
- rb_str_append(string, link_class);
279
+ str_append(parser->output, a_start, sizeof(a_start) - 1); // <a href="
280
+ if (!NIL_P(link_prefix))
281
+ str_append_string(parser->output, link_prefix);
282
+ str_append_str(parser->output, link_target);
283
+
284
+ // special handling for mailto URIs
285
+ const char *mailto = "mailto:";
286
+ if (NIL_P(link_prefix) &&
287
+ link_target->len >= (long)sizeof(mailto) &&
288
+ strncmp(mailto, link_target->ptr, sizeof(mailto)) == 0)
289
+ link_class = parser->mailto_class; // use mailto_class from parser
290
+ if (link_class != Qnil)
291
+ {
292
+ str_append(parser->output, a_class, sizeof(a_class) - 1); // " class="
293
+ str_append_string(parser->output, link_class);
294
+ }
295
+ str_append(parser->output, a_start_close, sizeof(a_start_close) - 1); // ">
296
+ if (!link_text || link_text->len == 0) // re-use link_target
297
+ str_append_str(parser->output, link_target);
298
+ else
299
+ str_append_str(parser->output, link_text);
300
+ str_append(parser->output, a_end, sizeof(a_end) - 1); // </a>
251
301
  }
252
- rb_str_cat(string, a_start_close, sizeof(a_start_close) - 1); // ">
253
- rb_str_append(string, link_text);
254
- rb_str_cat(string, a_end, sizeof(a_end) - 1);
255
- return string;
256
302
  }
257
303
 
258
- void _Wikitext_append_img(parser_t *parser, char *token_ptr, int token_len)
304
+ void wiki_append_img(parser_t *parser, char *token_ptr, int token_len)
259
305
  {
260
- rb_str_cat(parser->output, img_start, sizeof(img_start) - 1); // <img src="
306
+ str_append(parser->output, img_start, sizeof(img_start) - 1); // <img src="
261
307
  if (!NIL_P(parser->img_prefix) && *token_ptr != '/') // len always > 0
262
- rb_str_append(parser->output, parser->img_prefix);
263
- rb_str_cat(parser->output, token_ptr, token_len);
264
- rb_str_cat(parser->output, img_alt, sizeof(img_alt) - 1); // " alt="
265
- rb_str_cat(parser->output, token_ptr, token_len);
266
- rb_str_cat(parser->output, img_end, sizeof(img_end) - 1); // " />
308
+ str_append_string(parser->output, parser->img_prefix);
309
+ str_append(parser->output, token_ptr, token_len);
310
+ str_append(parser->output, img_alt, sizeof(img_alt) - 1); // " alt="
311
+ str_append(parser->output, token_ptr, token_len);
312
+ str_append(parser->output, img_end, sizeof(img_end) - 1); // " />
267
313
  }
268
314
 
269
315
  // will emit indentation only if we are about to emit any of:
270
316
  // <blockquote>, <p>, <ul>, <ol>, <li>, <h1> etc, <pre>
271
317
  // each time we enter one of those spans must ++ the indentation level
272
- void _Wikitext_indent(parser_t *parser)
318
+ void wiki_indent(parser_t *parser)
273
319
  {
274
320
  if (parser->base_indent == -1) // indentation disabled
275
321
  return;
@@ -285,32 +331,32 @@ void _Wikitext_indent(parser_t *parser)
285
331
  *old_end++ = ' ';
286
332
  if (space_count > parser->tabulation->len)
287
333
  parser->tabulation->len = space_count;
288
- rb_str_cat(parser->output, parser->tabulation->ptr, space_count);
334
+ str_append(parser->output, parser->tabulation->ptr, space_count);
289
335
  }
290
336
  parser->current_indent += 2;
291
337
  }
292
338
 
293
- void _Wikitext_dedent(parser_t *parser, VALUE emit)
339
+ void wiki_dedent(parser_t *parser, bool emit)
294
340
  {
295
341
  if (parser->base_indent == -1) // indentation disabled
296
342
  return;
297
343
  parser->current_indent -= 2;
298
- if (emit != Qtrue)
344
+ if (!emit)
299
345
  return;
300
346
  int space_count = parser->current_indent + parser->base_indent;
301
347
  if (space_count > 0)
302
- rb_str_cat(parser->output, parser->tabulation->ptr, space_count);
348
+ str_append(parser->output, parser->tabulation->ptr, space_count);
303
349
  }
304
350
 
305
351
  // Pops a single item off the parser's scope stack.
306
352
  // A corresponding closing tag is written to the target string.
307
353
  // The target string may be the main output buffer, or a substring capturing buffer if a link is being scanned.
308
- void _Wikitext_pop_from_stack(parser_t *parser, VALUE target)
354
+ void wiki_pop_from_stack(parser_t *parser, str_t *target)
309
355
  {
310
356
  int top = ary_entry(parser->scope, -1);
311
357
  if (NO_ITEM(top))
312
358
  return;
313
- if (NIL_P(target))
359
+ if (!target)
314
360
  target = parser->output;
315
361
 
316
362
  // for headings, take base_heading_level into account
@@ -326,16 +372,16 @@ void _Wikitext_pop_from_stack(parser_t *parser, VALUE target)
326
372
  {
327
373
  case PRE:
328
374
  case PRE_START:
329
- rb_str_cat(target, pre_end, sizeof(pre_end) - 1);
330
- rb_str_cat(target, parser->line_ending->ptr, parser->line_ending->len);
331
- _Wikitext_dedent(parser, Qfalse);
375
+ str_append(target, pre_end, sizeof(pre_end) - 1);
376
+ str_append_str(target, parser->line_ending);
377
+ wiki_dedent(parser, false);
332
378
  break;
333
379
 
334
380
  case BLOCKQUOTE:
335
381
  case BLOCKQUOTE_START:
336
- _Wikitext_dedent(parser, Qtrue);
337
- rb_str_cat(target, blockquote_end, sizeof(blockquote_end) - 1);
338
- rb_str_cat(target, parser->line_ending->ptr, parser->line_ending->len);
382
+ wiki_dedent(parser, true);
383
+ str_append(target, blockquote_end, sizeof(blockquote_end) - 1);
384
+ str_append_str(target, parser->line_ending);
339
385
  break;
340
386
 
341
387
  case NO_WIKI_START:
@@ -344,29 +390,29 @@ void _Wikitext_pop_from_stack(parser_t *parser, VALUE target)
344
390
 
345
391
  case STRONG:
346
392
  case STRONG_START:
347
- rb_str_cat(target, strong_end, sizeof(strong_end) - 1);
393
+ str_append(target, strong_end, sizeof(strong_end) - 1);
348
394
  break;
349
395
 
350
396
  case EM:
351
397
  case EM_START:
352
- rb_str_cat(target, em_end, sizeof(em_end) - 1);
398
+ str_append(target, em_end, sizeof(em_end) - 1);
353
399
  break;
354
400
 
355
401
  case TT:
356
402
  case TT_START:
357
- rb_str_cat(target, tt_end, sizeof(tt_end) - 1);
403
+ str_append(target, tt_end, sizeof(tt_end) - 1);
358
404
  break;
359
405
 
360
406
  case OL:
361
- _Wikitext_dedent(parser, Qtrue);
362
- rb_str_cat(target, ol_end, sizeof(ol_end) - 1);
363
- rb_str_cat(target, parser->line_ending->ptr, parser->line_ending->len);
407
+ wiki_dedent(parser, true);
408
+ str_append(target, ol_end, sizeof(ol_end) - 1);
409
+ str_append_str(target, parser->line_ending);
364
410
  break;
365
411
 
366
412
  case UL:
367
- _Wikitext_dedent(parser, Qtrue);
368
- rb_str_cat(target, ul_end, sizeof(ul_end) - 1);
369
- rb_str_cat(target, parser->line_ending->ptr, parser->line_ending->len);
413
+ wiki_dedent(parser, true);
414
+ str_append(target, ul_end, sizeof(ul_end) - 1);
415
+ str_append_str(target, parser->line_ending);
370
416
  break;
371
417
 
372
418
  case NESTED_LIST:
@@ -375,50 +421,50 @@ void _Wikitext_pop_from_stack(parser_t *parser, VALUE target)
375
421
  // and other times we want it to behave like BLOCKQUOTE (ie. when it has a nested list inside)
376
422
  // hence this hack: we do an emitting dedent on behalf of the LI that we know must be coming
377
423
  // and then when we pop the actual LI itself (below) we do the standard non-emitting indent
378
- _Wikitext_dedent(parser, Qtrue); // we really only want to emit the spaces
379
- parser->current_indent += 2; // we don't want to decrement the actual indent level, so put it back
424
+ wiki_dedent(parser, true); // we really only want to emit the spaces
425
+ parser->current_indent += 2; // we don't want to decrement the actual indent level, so put it back
380
426
  break;
381
427
 
382
428
  case LI:
383
- rb_str_cat(target, li_end, sizeof(li_end) - 1);
384
- rb_str_cat(target, parser->line_ending->ptr, parser->line_ending->len);
385
- _Wikitext_dedent(parser, Qfalse);
429
+ str_append(target, li_end, sizeof(li_end) - 1);
430
+ str_append_str(target, parser->line_ending);
431
+ wiki_dedent(parser, false);
386
432
  break;
387
433
 
388
434
  case H6_START:
389
- rb_str_cat(target, h6_end, sizeof(h6_end) - 1);
390
- rb_str_cat(target, parser->line_ending->ptr, parser->line_ending->len);
391
- _Wikitext_dedent(parser, Qfalse);
435
+ str_append(target, h6_end, sizeof(h6_end) - 1);
436
+ str_append_str(target, parser->line_ending);
437
+ wiki_dedent(parser, false);
392
438
  break;
393
439
 
394
440
  case H5_START:
395
- rb_str_cat(target, h5_end, sizeof(h5_end) - 1);
396
- rb_str_cat(target, parser->line_ending->ptr, parser->line_ending->len);
397
- _Wikitext_dedent(parser, Qfalse);
441
+ str_append(target, h5_end, sizeof(h5_end) - 1);
442
+ str_append_str(target, parser->line_ending);
443
+ wiki_dedent(parser, false);
398
444
  break;
399
445
 
400
446
  case H4_START:
401
- rb_str_cat(target, h4_end, sizeof(h4_end) - 1);
402
- rb_str_cat(target, parser->line_ending->ptr, parser->line_ending->len);
403
- _Wikitext_dedent(parser, Qfalse);
447
+ str_append(target, h4_end, sizeof(h4_end) - 1);
448
+ str_append_str(target, parser->line_ending);
449
+ wiki_dedent(parser, false);
404
450
  break;
405
451
 
406
452
  case H3_START:
407
- rb_str_cat(target, h3_end, sizeof(h3_end) - 1);
408
- rb_str_cat(target, parser->line_ending->ptr, parser->line_ending->len);
409
- _Wikitext_dedent(parser, Qfalse);
453
+ str_append(target, h3_end, sizeof(h3_end) - 1);
454
+ str_append_str(target, parser->line_ending);
455
+ wiki_dedent(parser, false);
410
456
  break;
411
457
 
412
458
  case H2_START:
413
- rb_str_cat(target, h2_end, sizeof(h2_end) - 1);
414
- rb_str_cat(target, parser->line_ending->ptr, parser->line_ending->len);
415
- _Wikitext_dedent(parser, Qfalse);
459
+ str_append(target, h2_end, sizeof(h2_end) - 1);
460
+ str_append_str(target, parser->line_ending);
461
+ wiki_dedent(parser, false);
416
462
  break;
417
463
 
418
464
  case H1_START:
419
- rb_str_cat(target, h1_end, sizeof(h1_end) - 1);
420
- rb_str_cat(target, parser->line_ending->ptr, parser->line_ending->len);
421
- _Wikitext_dedent(parser, Qfalse);
465
+ str_append(target, h1_end, sizeof(h1_end) - 1);
466
+ str_append_str(target, parser->line_ending);
467
+ wiki_dedent(parser, false);
422
468
  break;
423
469
 
424
470
  case LINK_START:
@@ -442,9 +488,9 @@ void _Wikitext_pop_from_stack(parser_t *parser, VALUE target)
442
488
  break;
443
489
 
444
490
  case P:
445
- rb_str_cat(target, p_end, sizeof(p_end) - 1);
446
- rb_str_cat(target, parser->line_ending->ptr, parser->line_ending->len);
447
- _Wikitext_dedent(parser, Qfalse);
491
+ str_append(target, p_end, sizeof(p_end) - 1);
492
+ str_append_str(target, parser->line_ending);
493
+ wiki_dedent(parser, false);
448
494
  break;
449
495
 
450
496
  case END_OF_FILE:
@@ -459,9 +505,9 @@ void _Wikitext_pop_from_stack(parser_t *parser, VALUE target)
459
505
  }
460
506
 
461
507
  // Pops items off the top of parser's scope stack, accumulating closing tags for them into the target string, until item is reached.
462
- // If including is Qtrue then the item itself is also popped.
508
+ // If including is true then the item itself is also popped.
463
509
  // The target string may be the main output buffer, or a substring capturing buffer when scanning links.
464
- void _Wikitext_pop_from_stack_up_to(parser_t *parser, VALUE target, int item, VALUE including)
510
+ void wiki_pop_from_stack_up_to(parser_t *parser, str_t *target, int item, bool including)
465
511
  {
466
512
  int continue_looping = 1;
467
513
  do
@@ -471,23 +517,23 @@ void _Wikitext_pop_from_stack_up_to(parser_t *parser, VALUE target, int item, VA
471
517
  return;
472
518
  if (top == item)
473
519
  {
474
- if (including != Qtrue)
520
+ if (!including)
475
521
  return;
476
522
  continue_looping = 0;
477
523
  }
478
- _Wikitext_pop_from_stack(parser, target);
524
+ wiki_pop_from_stack(parser, target);
479
525
  } while (continue_looping);
480
526
  }
481
527
 
482
- void _Wikitext_pop_all_from_stack(parser_t *parser, VALUE target)
528
+ void wiki_pop_all_from_stack(parser_t *parser)
483
529
  {
484
- while (!NO_ITEM(ary_entry(parser->scope, -1)))
485
- _Wikitext_pop_from_stack(parser, target);
530
+ for (int i = 0, max = parser->scope->count; i < max; i++)
531
+ wiki_pop_from_stack(parser, NULL);
486
532
  }
487
533
 
488
- void _Wikitext_start_para_if_necessary(parser_t *parser)
534
+ void wiki_start_para_if_necessary(parser_t *parser)
489
535
  {
490
- if (!NIL_P(parser->capture)) // we don't do anything if in capturing mode
536
+ if (parser->capture)
491
537
  return;
492
538
 
493
539
  // if no block open yet, or top of stack is BLOCKQUOTE/BLOCKQUOTE_START (with nothing in it yet)
@@ -495,29 +541,29 @@ void _Wikitext_start_para_if_necessary(parser_t *parser)
495
541
  ary_entry(parser->scope, -1) == BLOCKQUOTE ||
496
542
  ary_entry(parser->scope, -1) == BLOCKQUOTE_START)
497
543
  {
498
- _Wikitext_indent(parser);
499
- rb_str_cat(parser->output, p_start, sizeof(p_start) - 1);
544
+ wiki_indent(parser);
545
+ str_append(parser->output, p_start, sizeof(p_start) - 1);
500
546
  ary_push(parser->scope, P);
501
547
  ary_push(parser->line, P);
502
548
  }
503
- else if (parser->pending_crlf == Qtrue)
549
+ else if (parser->pending_crlf)
504
550
  {
505
551
  if (IN(P))
506
552
  // already in a paragraph block; convert pending CRLF into a space
507
- rb_str_cat(parser->output, space, sizeof(space) - 1);
553
+ str_append(parser->output, space, sizeof(space) - 1);
508
554
  else if (IN(PRE))
509
555
  // PRE blocks can have pending CRLF too (helps us avoid emitting the trailing newline)
510
- rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
556
+ str_append_str(parser->output, parser->line_ending);
511
557
  }
512
- parser->pending_crlf = Qfalse;
558
+ parser->pending_crlf = false;
513
559
  }
514
560
 
515
- void _Wikitext_emit_pending_crlf_if_necessary(parser_t *parser)
561
+ void wiki_emit_pending_crlf_if_necessary(parser_t *parser)
516
562
  {
517
- if (parser->pending_crlf == Qtrue)
563
+ if (parser->pending_crlf)
518
564
  {
519
- rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
520
- parser->pending_crlf = Qfalse;
565
+ str_append_str(parser->output, parser->line_ending);
566
+ parser->pending_crlf = false;
521
567
  }
522
568
  }
523
569
 
@@ -543,9 +589,9 @@ void _Wikitext_emit_pending_crlf_if_necessary(parser_t *parser)
543
589
  // on the line scope.
544
590
  // Luckily, BLOCKQUOTE_START tokens can only appear at the start of the scope array, so we can check for them first before
545
591
  // entering the for loop.
546
- void _Wikitext_pop_excess_elements(parser_t *parser)
592
+ void wiki_pop_excess_elements(parser_t *parser)
547
593
  {
548
- if (!NIL_P(parser->capture)) // we don't pop anything if in capturing mode
594
+ if (parser->capture)
549
595
  return;
550
596
  for (int i = parser->scope->count - ary_count(parser->scope, BLOCKQUOTE_START), j = parser->line->count; i > j; i--)
551
597
  {
@@ -560,65 +606,94 @@ void _Wikitext_pop_excess_elements(parser_t *parser)
560
606
  continue;
561
607
  }
562
608
  }
563
- _Wikitext_pop_from_stack(parser, parser->output);
609
+ wiki_pop_from_stack(parser, NULL);
564
610
  }
565
611
  }
566
612
 
567
- #define INVALID_ENCODING(msg) do { if (dest_ptr) free(dest_ptr); rb_raise(eWikitextParserError, "invalid encoding: " msg); } while(0)
568
-
569
- // convert a single UTF-8 codepoint to UTF-32
570
- // expects an input buffer, src, containing a UTF-8 encoded character (which may be multi-byte)
571
- // the end of the input buffer, end, is also passed in to allow the detection of invalidly truncated codepoints
572
- // the number of bytes in the UTF-8 character (between 1 and 4) is returned by reference in width_out
573
- // raises a RangeError if the supplied character is invalid UTF-8
574
- // (in which case it also frees the block of memory indicated by dest_ptr if it is non-NULL)
575
- uint32_t _Wikitext_utf8_to_utf32(char *src, char *end, long *width_out, void *dest_ptr)
613
+ // Convert a single UTF-8 codepoint to UTF-32
614
+ //
615
+ // Expects an input buffer, src, containing a UTF-8 encoded character (which
616
+ // may be multi-byte). The end of the input buffer, end, is also passed in to
617
+ // allow the detection of invalidly truncated codepoints. The number of bytes
618
+ // in the UTF-8 character (between 1 and 4) is returned by reference in
619
+ // width_out.
620
+ //
621
+ // Raises a RangeError if the supplied character is invalid UTF-8.
622
+ uint32_t wiki_utf8_to_utf32(char *src, char *end, long *width_out)
576
623
  {
577
624
  uint32_t dest;
578
- if ((unsigned char)src[0] <= 0x7f) // ASCII
625
+ if ((unsigned char)src[0] <= 0x7f)
579
626
  {
627
+ // ASCII
580
628
  dest = src[0];
581
629
  *width_out = 1;
582
630
  }
583
- else if ((src[0] & 0xe0) == 0xc0) // byte starts with 110..... : this should be a two-byte sequence
631
+ else if ((src[0] & 0xe0) == 0xc0)
584
632
  {
633
+ // byte starts with 110..... : this should be a two-byte sequence
585
634
  if (src + 1 >= end)
586
- INVALID_ENCODING("truncated byte sequence"); // no second byte
587
- else if (((unsigned char)src[0] == 0xc0) || ((unsigned char)src[0] == 0xc1))
588
- INVALID_ENCODING("overlong encoding"); // overlong encoding: lead byte of 110..... but code point <= 127
635
+ // no second byte
636
+ rb_raise(eWikitextParserError, "invalid encoding: truncated byte sequence");
637
+ else if (((unsigned char)src[0] == 0xc0) ||
638
+ ((unsigned char)src[0] == 0xc1))
639
+ // overlong encoding: lead byte of 110..... but code point <= 127
640
+ rb_raise(eWikitextParserError, "invalid encoding: overlong encoding");
589
641
  else if ((src[1] & 0xc0) != 0x80 )
590
- INVALID_ENCODING("malformed byte sequence"); // should have second byte starting with 10......
591
- dest = ((uint32_t)(src[0] & 0x1f)) << 6 | (src[1] & 0x3f);
642
+ // should have second byte starting with 10......
643
+ rb_raise(eWikitextParserError, "invalid encoding: malformed byte sequence");
644
+
645
+ dest =
646
+ ((uint32_t)(src[0] & 0x1f)) << 6 |
647
+ (src[1] & 0x3f);
592
648
  *width_out = 2;
593
649
  }
594
- else if ((src[0] & 0xf0) == 0xe0) // byte starts with 1110.... : this should be a three-byte sequence
650
+ else if ((src[0] & 0xf0) == 0xe0)
595
651
  {
652
+ // byte starts with 1110.... : this should be a three-byte sequence
596
653
  if (src + 2 >= end)
597
- INVALID_ENCODING("truncated byte sequence"); // missing second or third byte
598
- else if (((src[1] & 0xc0) != 0x80 ) || ((src[2] & 0xc0) != 0x80 ))
599
- INVALID_ENCODING("malformed byte sequence"); // should have second and third bytes starting with 10......
600
- dest = ((uint32_t)(src[0] & 0x0f)) << 12 | ((uint32_t)(src[1] & 0x3f)) << 6 | (src[2] & 0x3f);
654
+ // missing second or third byte
655
+ rb_raise(eWikitextParserError, "invalid encoding: truncated byte sequence");
656
+ else if (((src[1] & 0xc0) != 0x80 ) ||
657
+ ((src[2] & 0xc0) != 0x80 ))
658
+ // should have second and third bytes starting with 10......
659
+ rb_raise(eWikitextParserError, "invalid encoding: malformed byte sequence");
660
+
661
+ dest =
662
+ ((uint32_t)(src[0] & 0x0f)) << 12 |
663
+ ((uint32_t)(src[1] & 0x3f)) << 6 |
664
+ (src[2] & 0x3f);
601
665
  *width_out = 3;
602
666
  }
603
- else if ((src[0] & 0xf8) == 0xf0) // bytes starts with 11110... : this should be a four-byte sequence
667
+ else if ((src[0] & 0xf8) == 0xf0)
604
668
  {
669
+ // bytes starts with 11110... : this should be a four-byte sequence
605
670
  if (src + 3 >= end)
606
- INVALID_ENCODING("truncated byte sequence"); // missing second, third, or fourth byte
607
- else if ((unsigned char)src[0] >= 0xf5 && (unsigned char)src[0] <= 0xf7)
608
- INVALID_ENCODING("overlong encoding"); // disallowed by RFC 3629 (codepoints above 0x10ffff)
609
- else if (((src[1] & 0xc0) != 0x80 ) || ((src[2] & 0xc0) != 0x80 ) || ((src[3] & 0xc0) != 0x80 ))
610
- INVALID_ENCODING("malformed byte sequence"); // should have second and third bytes starting with 10......
611
- dest = ((uint32_t)(src[0] & 0x07)) << 18 | ((uint32_t)(src[1] & 0x3f)) << 12 | ((uint32_t)(src[1] & 0x3f)) << 6 | (src[2] & 0x3f);
671
+ // missing second, third, or fourth byte
672
+ rb_raise(eWikitextParserError, "invalid encoding: truncated byte sequence");
673
+ else if ((unsigned char)src[0] >= 0xf5 &&
674
+ (unsigned char)src[0] <= 0xf7)
675
+ // disallowed by RFC 3629 (codepoints above 0x10ffff)
676
+ rb_raise(eWikitextParserError, "invalid encoding: overlong encoding");
677
+ else if (((src[1] & 0xc0) != 0x80 ) ||
678
+ ((src[2] & 0xc0) != 0x80 ) ||
679
+ ((src[3] & 0xc0) != 0x80 ))
680
+ // should have second and third bytes starting with 10......
681
+ rb_raise(eWikitextParserError, "invalid encoding: malformed byte sequence");
682
+
683
+ dest =
684
+ ((uint32_t)(src[0] & 0x07)) << 18 |
685
+ ((uint32_t)(src[1] & 0x3f)) << 12 |
686
+ ((uint32_t)(src[1] & 0x3f)) << 6 |
687
+ (src[2] & 0x3f);
612
688
  *width_out = 4;
613
689
  }
614
- else // invalid input
615
- INVALID_ENCODING("unexpected byte");
690
+ else
691
+ rb_raise(eWikitextParserError, "invalid encoding: unexpected byte");
616
692
  return dest;
617
693
  }
618
694
 
619
- VALUE _Wikitext_utf32_char_to_entity(uint32_t character)
695
+ void wiki_append_entity_from_utf32_char(str_t *output, uint32_t character)
620
696
  {
621
- // TODO: consider special casing some entities (ie. quot, amp, lt, gt etc)?
622
697
  char hex_string[8] = { '&', '#', 'x', 0, 0, 0, 0, ';' };
623
698
  char scratch = (character & 0xf000) >> 12;
624
699
  hex_string[3] = (scratch <= 9 ? scratch + 48 : scratch + 87);
@@ -628,18 +703,17 @@ VALUE _Wikitext_utf32_char_to_entity(uint32_t character)
628
703
  hex_string[5] = (scratch <= 9 ? scratch + 48 : scratch + 87);
629
704
  scratch = character & 0x000f;
630
705
  hex_string[6] = (scratch <= 9 ? scratch + 48 : scratch + 87);
631
- return rb_str_new((const char *)hex_string, sizeof(hex_string));
706
+ str_append(output, hex_string, sizeof(hex_string));
632
707
  }
633
708
 
634
- VALUE _Wikitext_parser_trim_link_target(VALUE string)
709
+ // trim parser->link_text in place
710
+ void wiki_trim_link_text(parser_t *parser)
635
711
  {
636
- string = StringValue(string);
637
- char *src = RSTRING_PTR(string);
712
+ char *src = parser->link_text->ptr;
638
713
  char *start = src; // remember this so we can check if we're at the start
639
714
  char *left = src;
640
715
  char *non_space = src; // remember last non-space character output
641
- long len = RSTRING_LEN(string);
642
- char *end = src + len;
716
+ char *end = src + parser->link_text->len;
643
717
  while (src < end)
644
718
  {
645
719
  if (*src == ' ')
@@ -651,143 +725,104 @@ VALUE _Wikitext_parser_trim_link_target(VALUE string)
651
725
  non_space = src;
652
726
  src++;
653
727
  }
654
- if (left == start && non_space + 1 == end)
655
- return string;
656
- else
657
- return rb_str_new(left, (non_space + 1) - left);
728
+ if (left != start || non_space + 1 != end)
729
+ {
730
+ // TODO: could potentially avoid this memmove by extending the str_t struct with an "offset" or "free" member
731
+ parser->link_text->len = (non_space + 1) - left;
732
+ memmove(parser->link_text->ptr, left, parser->link_text->len);
733
+ }
658
734
  }
659
735
 
660
736
  // - non-printable (non-ASCII) characters converted to numeric entities
661
737
  // - QUOT and AMP characters converted to named entities
662
- // - if rollback is Qtrue, there is no special treatment of spaces
663
- // - if rollback is Qfalse, leading and trailing whitespace trimmed
664
- VALUE _Wikitext_parser_sanitize_link_target(parser_t *parser, VALUE rollback)
738
+ // - if trim is true, leading and trailing whitespace trimmed
739
+ // - if trim is false, there is no special treatment of spaces
740
+ void wiki_append_sanitized_link_target(parser_t *parser, str_t *output, bool trim)
665
741
  {
666
- VALUE string = StringValue(parser->link_target); // raises if string is nil or doesn't quack like a string
667
- char *src = RSTRING_PTR(string);
668
- char *start = src; // remember this so we can check if we're at the start
669
- long len = RSTRING_LEN(string);
670
- char *end = src + len;
671
-
672
- // start with a destination buffer twice the size of the source, will realloc if necessary
673
- // slop = (len / 8) * 8 (ie. one in every 8 characters can be converted into an entity, each entity requires 8 bytes)
674
- // this efficiently handles the most common case (where the size of the buffer doesn't change much)
675
- char *dest = ALLOC_N(char, len * 2);
676
- char *dest_ptr = dest; // hang on to this so we can pass it to free() later
677
- char *non_space = dest; // remember last non-space character output
742
+ char *src = parser->link_target->ptr;
743
+ char *start = src; // remember this so we can check if we're at the start
744
+ char *non_space = output->ptr + output->len; // remember last non-space character output
745
+ char *end = src + parser->link_target->len;
678
746
  while (src < end)
679
747
  {
680
- // need at most 8 characters (8 bytes) to display each character
681
- if (dest + 8 > dest_ptr + len) // outgrowing buffer, must reallocate
748
+ // need at most 8 bytes to display each input character (&#x0000;)
749
+ if (output->ptr + output->len + 8 > output->ptr + output->capacity) // outgrowing buffer, must grow
682
750
  {
683
- char *old_dest = dest;
684
- char *old_dest_ptr = dest_ptr;
685
- len = len + (end - src) * 8; // allocate enough for worst case
686
- dest = realloc(dest_ptr, len); // will never have to realloc more than once
687
- if (dest == NULL)
688
- {
689
- // would have used reallocf, but this has to run on Linux too, not just Darwin
690
- free(dest_ptr);
691
- rb_raise(rb_eNoMemError, "failed to re-allocate temporary storage (memory allocation error)");
692
- }
693
- dest_ptr = dest;
694
- dest = dest_ptr + (old_dest - old_dest_ptr);
695
- non_space = dest_ptr + (non_space - old_dest_ptr);
751
+ char *old_ptr = output->ptr;
752
+ str_grow(output, output->len + (end - src) * 8); // allocate enough for worst case
753
+ if (old_ptr != output->ptr) // may have moved
754
+ non_space += output->ptr - old_ptr;
696
755
  }
697
756
 
698
- if (*src == '"') // QUOT
757
+ if (*src == '"')
699
758
  {
700
759
  char quot_entity_literal[] = { '&', 'q', 'u', 'o', 't', ';' }; // no trailing NUL
701
- memcpy(dest, quot_entity_literal, sizeof(quot_entity_literal));
702
- dest += sizeof(quot_entity_literal);
760
+ str_append(output, quot_entity_literal, sizeof(quot_entity_literal));
703
761
  }
704
- else if (*src == '&') // AMP
762
+ else if (*src == '&')
705
763
  {
706
764
  char amp_entity_literal[] = { '&', 'a', 'm', 'p', ';' }; // no trailing NUL
707
- memcpy(dest, amp_entity_literal, sizeof(amp_entity_literal));
708
- dest += sizeof(amp_entity_literal);
709
- }
710
- else if (*src == '<') // LESS_THAN
711
- {
712
- free(dest_ptr);
713
- rb_raise(rb_eRangeError, "invalid link text (\"<\" may not appear in link text)");
714
- }
715
- else if (*src == '>') // GREATER_THAN
716
- {
717
- free(dest_ptr);
718
- rb_raise(rb_eRangeError, "invalid link text (\">\" may not appear in link text)");
765
+ str_append(output, amp_entity_literal, sizeof(amp_entity_literal));
719
766
  }
720
- else if (*src == ' ' && src == start && rollback == Qfalse)
721
- start++; // we eat leading space
722
- else if (*src >= 0x20 && *src <= 0x7e) // printable ASCII
767
+ else if (*src == '<' || *src == '>')
768
+ rb_raise(rb_eRangeError, "invalid link text (\"%c\" may not appear in link text)", *src);
769
+ else if (*src == ' ' && src == start && trim)
770
+ start++; // we eat leading space
771
+ else if (*src >= 0x20 && *src <= 0x7e) // printable ASCII
723
772
  {
724
- *dest = *src;
725
- dest++;
773
+ *(output->ptr + output->len) = *src;
774
+ output->len++;
726
775
  }
727
776
  else // all others: must convert to entities
728
777
  {
729
778
  long width;
730
- VALUE entity = _Wikitext_utf32_char_to_entity(_Wikitext_utf8_to_utf32(src, end, &width, dest_ptr));
731
- char *entity_src = RSTRING_PTR(entity);
732
- long entity_len = RSTRING_LEN(entity); // should always be 8 characters (8 bytes)
733
- memcpy(dest, entity_src, entity_len);
734
- dest += entity_len;
779
+ wiki_append_entity_from_utf32_char(output, wiki_utf8_to_utf32(src, end, &width));
735
780
  src += width;
736
- non_space = dest;
781
+ non_space = output->ptr + output->len;
737
782
  continue;
738
783
  }
739
784
  if (*src != ' ')
740
- non_space = dest;
785
+ non_space = output->ptr + output->len;
741
786
  src++;
742
787
  }
743
788
 
744
789
  // trim trailing space if necessary
745
- if (rollback == Qfalse && non_space > dest_ptr && dest != non_space)
746
- len = non_space - dest_ptr;
747
- else
748
- len = dest - dest_ptr;
749
- VALUE out = rb_str_new(dest_ptr, len);
750
- free(dest_ptr);
751
- return out;
790
+ if (trim && output->ptr + output->len != non_space)
791
+ output->len -= (output->ptr + output->len) - non_space;
752
792
  }
753
793
 
754
794
  VALUE Wikitext_parser_sanitize_link_target(VALUE self, VALUE string)
755
795
  {
756
796
  parser_t parser;
757
- parser.link_target = string;
758
- return _Wikitext_parser_sanitize_link_target(&parser, Qfalse);
797
+ parser.link_target = str_new_from_string(string);
798
+ GC_WRAP_STR(parser.link_target, link_target_gc);
799
+ str_t *output = str_new();
800
+ GC_WRAP_STR(output, output_gc);
801
+ wiki_append_sanitized_link_target(&parser, output, true);
802
+ return string_from_str(output);
759
803
  }
760
804
 
761
- // encodes the input string according to RFCs 2396 and 2718
762
- // leading and trailing whitespace trimmed
763
- // note that the first character of the target link is not case-sensitive
764
- // (this is a recommended application-level constraint; it is not imposed at this level)
765
- // this is to allow links like:
766
- // ...the [[foo]] is...
767
- // to be equivalent to:
768
- // thing. [[Foo]] was...
769
- static void _Wikitext_parser_encode_link_target(parser_t *parser)
805
+ // Encodes the parser link_target member (in-place) according to RFCs 2396 and 2718
806
+ //
807
+ // Leading and trailing whitespace trimmed. Spaces are converted to
808
+ // underscores if the parser space_to_underscore member is true.
809
+ static void wiki_encode_link_target(parser_t *parser)
770
810
  {
771
- VALUE in = StringValue(parser->link_target);
772
- char *input = RSTRING_PTR(in);
773
- char *start = input; // remember this so we can check if we're at the start
774
- long len = RSTRING_LEN(in);
811
+ char *src = parser->link_target->ptr;
812
+ char *start = src; // remember this so we can check if we're at the start
813
+ long len = parser->link_target->len;
775
814
  if (!(len > 0))
776
815
  return;
777
- char *end = input + len;
778
- static char hex[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
779
-
780
- // to avoid most reallocations start with a destination buffer twice the size of the source
781
- // this handles the most common case (where most chars are in the ASCII range and don't require more storage, but there are
782
- // often quite a few spaces, which are encoded as "%20" and occupy 3 bytes)
783
- // the worst case is where _every_ byte must be written out using 3 bytes
816
+ char *end = src + len;
784
817
  long dest_len = len * 2;
785
818
  char *dest = ALLOC_N(char, dest_len);
786
819
  char *dest_ptr = dest; // hang on to this so we can pass it to free() later
787
820
  char *non_space = dest; // remember last non-space character output
788
- for (; input < end; input++)
821
+ static char hex[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
822
+ for (; src < end; src++)
789
823
  {
790
- if ((dest + 3) > (dest_ptr + dest_len)) // worst case: a single character may grow to 3 characters once encoded
824
+ // worst case: a single character may grow to 3 characters once encoded
825
+ if ((dest + 3) > (dest_ptr + dest_len))
791
826
  {
792
827
  // outgrowing buffer, must reallocate
793
828
  char *old_dest = dest;
@@ -806,27 +841,27 @@ static void _Wikitext_parser_encode_link_target(parser_t *parser)
806
841
  }
807
842
 
808
843
  // pass through unreserved characters
809
- if (((*input >= 'a') && (*input <= 'z')) ||
810
- ((*input >= 'A') && (*input <= 'Z')) ||
811
- ((*input >= '0') && (*input <= '9')) ||
812
- (*input == '-') ||
813
- (*input == '_') ||
814
- (*input == '.') ||
815
- (*input == '~'))
844
+ if ((*src >= 'a' && *src <= 'z') ||
845
+ (*src >= 'A' && *src <= 'Z') ||
846
+ (*src >= '0' && *src <= '9') ||
847
+ *src == '-' ||
848
+ *src == '_' ||
849
+ *src == '.' ||
850
+ *src == '~')
816
851
  {
817
- *dest++ = *input;
852
+ *dest++ = *src;
818
853
  non_space = dest;
819
854
  }
820
- else if (*input == ' ' && input == start)
855
+ else if (*src == ' ' && src == start)
821
856
  start++; // we eat leading space
822
- else if (*input == ' ' && parser->space_to_underscore == Qtrue)
857
+ else if (*src == ' ' && parser->space_to_underscore)
823
858
  *dest++ = '_';
824
859
  else // everything else gets URL-encoded
825
860
  {
826
861
  *dest++ = '%';
827
- *dest++ = hex[(unsigned char)(*input) / 16]; // left
828
- *dest++ = hex[(unsigned char)(*input) % 16]; // right
829
- if (*input != ' ')
862
+ *dest++ = hex[(unsigned char)(*src) / 16]; // left
863
+ *dest++ = hex[(unsigned char)(*src) % 16]; // right
864
+ if (*src != ' ')
830
865
  non_space = dest;
831
866
  }
832
867
  }
@@ -836,90 +871,89 @@ static void _Wikitext_parser_encode_link_target(parser_t *parser)
836
871
  dest_len = non_space - dest_ptr;
837
872
  else
838
873
  dest_len = dest - dest_ptr;
839
- parser->link_target = rb_str_new(dest_ptr, dest_len);
874
+ str_clear(parser->link_target);
875
+ str_append(parser->link_target, dest_ptr, dest_len);
840
876
  free(dest_ptr);
841
877
  }
842
878
 
843
879
  VALUE Wikitext_parser_encode_link_target(VALUE self, VALUE in)
844
880
  {
845
881
  parser_t parser;
846
- parser.link_target = in;
847
- parser.space_to_underscore = Qfalse;
848
- _Wikitext_parser_encode_link_target(&parser);
849
- return parser.link_target;
850
- }
851
-
852
- // this method exposed for testing only
853
- VALUE Wikitext_parser_encode_special_link_target(VALUE self, VALUE in)
854
- {
855
- parser_t parser;
856
- parser.link_target = in;
857
- parser.space_to_underscore = Qfalse;
858
- _Wikitext_parser_encode_link_target(&parser);
859
- return parser.link_target;
882
+ parser.space_to_underscore = false;
883
+ parser.link_target = str_new_from_string(in);
884
+ GC_WRAP_STR(parser.link_target, link_target_gc);
885
+ wiki_encode_link_target(&parser);
886
+ return string_from_str(parser.link_target);
860
887
  }
861
888
 
862
889
  // returns 1 (true) if supplied string is blank (nil, empty, or all whitespace)
863
890
  // returns 0 (false) otherwise
864
- int _Wikitext_blank(VALUE str)
891
+ bool wiki_blank(str_t *str)
865
892
  {
866
- if (NIL_P(str) || RSTRING_LEN(str) == 0)
867
- return 1;
868
- for (char *ptr = RSTRING_PTR(str),
869
- *end = RSTRING_PTR(str) + RSTRING_LEN(str);
893
+ if (str->len == 0)
894
+ return true;
895
+ for (char *ptr = str->ptr,
896
+ *end = str->ptr + str->len;
870
897
  ptr < end; ptr++)
871
898
  {
872
899
  if (*ptr != ' ')
873
- return 0;
900
+ return false;
874
901
  }
875
- return 1;
902
+ return true;
876
903
  }
877
904
 
878
- void _Wikitext_rollback_failed_link(parser_t *parser)
905
+ void wiki_rollback_failed_internal_link(parser_t *parser)
879
906
  {
880
907
  if (!IN(LINK_START))
881
908
  return; // nothing to do!
882
909
  int scope_includes_separator = IN(SEPARATOR);
883
- _Wikitext_pop_from_stack_up_to(parser, Qnil, LINK_START, Qtrue);
884
- rb_str_cat(parser->output, link_start, sizeof(link_start) - 1);
885
- if (!NIL_P(parser->link_target))
910
+ wiki_pop_from_stack_up_to(parser, NULL, LINK_START, true);
911
+ str_append(parser->output, link_start, sizeof(link_start) - 1);
912
+ if (parser->link_target->len > 0)
886
913
  {
887
- VALUE sanitized = _Wikitext_parser_sanitize_link_target(parser, Qtrue);
888
- rb_str_append(parser->output, sanitized);
914
+ wiki_append_sanitized_link_target(parser, parser->output, false);
889
915
  if (scope_includes_separator)
890
916
  {
891
- rb_str_cat(parser->output, separator, sizeof(separator) - 1);
892
- if (!NIL_P(parser->link_text))
893
- rb_str_append(parser->output, parser->link_text);
917
+ str_append(parser->output, separator, sizeof(separator) - 1);
918
+ if (parser->link_text->len > 0)
919
+ str_append_str(parser->output, parser->link_text);
894
920
  }
895
921
  }
896
- parser->capture = Qnil;
897
- parser->link_target = Qnil;
898
- parser->link_text = Qnil;
922
+ parser->capture = NULL;
923
+ str_clear(parser->link_target);
924
+ str_clear(parser->link_text);
899
925
  }
900
926
 
901
- void _Wikitext_rollback_failed_external_link(parser_t *parser)
927
+ void wiki_rollback_failed_external_link(parser_t *parser)
902
928
  {
903
929
  if (!IN(EXT_LINK_START))
904
930
  return; // nothing to do!
931
+
932
+ // store a couple of values before popping
905
933
  int scope_includes_space = IN(SPACE);
906
- _Wikitext_pop_from_stack_up_to(parser, Qnil, EXT_LINK_START, Qtrue);
907
- rb_str_cat(parser->output, ext_link_start, sizeof(ext_link_start) - 1);
908
- if (!NIL_P(parser->link_target))
934
+ VALUE link_class = IN(PATH) ? Qnil : parser->external_link_class;
935
+ wiki_pop_from_stack_up_to(parser, NULL, EXT_LINK_START, true);
936
+
937
+ str_append(parser->output, ext_link_start, sizeof(ext_link_start) - 1);
938
+ if (parser->link_target->len > 0)
909
939
  {
910
- if (parser->autolink == Qtrue)
911
- parser->link_target = _Wikitext_hyperlink(parser, Qnil, parser->link_target, parser->link_target, parser->external_link_class);
912
- rb_str_append(parser->output, parser->link_target);
940
+ wiki_append_hyperlink(parser, Qnil, parser->link_target, NULL, link_class, true);
913
941
  if (scope_includes_space)
914
942
  {
915
- rb_str_cat(parser->output, space, sizeof(space) - 1);
916
- if (!NIL_P(parser->link_text))
917
- rb_str_append(parser->output, parser->link_text);
943
+ str_append(parser->output, space, sizeof(space) - 1);
944
+ if (parser->link_text->len > 0)
945
+ str_append_str(parser->output, parser->link_text);
918
946
  }
919
947
  }
920
- parser->capture = Qnil;
921
- parser->link_target = Qnil;
922
- parser->link_text = Qnil;
948
+ parser->capture = NULL;
949
+ str_clear(parser->link_target);
950
+ str_clear(parser->link_text);
951
+ }
952
+
953
+ void wiki_rollback_failed_link(parser_t *parser)
954
+ {
955
+ wiki_rollback_failed_internal_link(parser);
956
+ wiki_rollback_failed_external_link(parser);
923
957
  }
924
958
 
925
959
  VALUE Wikitext_parser_initialize(int argc, VALUE *argv, VALUE self)
@@ -1031,31 +1065,15 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1031
1065
  VALUE prefix = rb_iv_get(self, "@internal_link_prefix");
1032
1066
 
1033
1067
  // set up parser struct to make passing parameters a little easier
1034
- // eventually this will encapsulate most or all of the variables above
1035
- parser_t _parser;
1036
- parser_t *parser = &_parser;
1037
- parser->output = rb_str_new2("");
1038
- parser->capture = Qnil;
1039
- parser->link_target = Qnil;
1040
- parser->link_text = Qnil;
1068
+ parser_t *parser = parser_new();
1069
+ GC_WRAP_PARSER(parser, parser_gc);
1041
1070
  parser->external_link_class = link_class;
1042
1071
  parser->mailto_class = mailto_class;
1043
1072
  parser->img_prefix = rb_iv_get(self, "@img_prefix");
1044
- parser->scope = ary_new();
1045
- GC_WRAP_ARY(parser->scope, scope_gc);
1046
- parser->line = ary_new();
1047
- GC_WRAP_ARY(parser->line, line_gc);
1048
- parser->line_buffer = ary_new();
1049
- GC_WRAP_ARY(parser->line_buffer, line_buffer_gc);
1050
- parser->pending_crlf = Qfalse;
1051
- parser->autolink = rb_iv_get(self, "@autolink");
1052
- parser->space_to_underscore = rb_iv_get(self, "@space_to_underscore");
1073
+ parser->autolink = rb_iv_get(self, "@autolink") == Qtrue ? true : false;
1074
+ parser->space_to_underscore = rb_iv_get(self, "@space_to_underscore") == Qtrue ? true : false;
1053
1075
  parser->line_ending = str_new_from_string(line_ending);
1054
- GC_WRAP_STR(parser->line_ending, line_ending_gc);
1055
1076
  parser->base_indent = base_indent;
1056
- parser->current_indent = 0;
1057
- parser->tabulation = str_new();
1058
- GC_WRAP_STR(parser->tabulation, tabulation_gc);
1059
1077
  parser->base_heading_level = base_heading_level;
1060
1078
 
1061
1079
  // this simple looping design leads to a single enormous function,
@@ -1093,10 +1111,13 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1093
1111
  long remove_strong = -1;
1094
1112
  long remove_em = -1;
1095
1113
 
1096
- // general purpose counters and flags
1114
+ // general purpose counters, flags and pointers
1097
1115
  long i = 0;
1098
1116
  long j = 0;
1099
1117
  long k = 0;
1118
+ str_t *output = NULL;
1119
+ str_t _token_str;
1120
+ str_t *token_str = &_token_str;
1100
1121
 
1101
1122
  // The following giant switch statement contains cases for all the possible token types.
1102
1123
  // In the most basic sense we are emitting the HTML that corresponds to each token,
@@ -1118,16 +1139,16 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1118
1139
  case PRE:
1119
1140
  if (IN(NO_WIKI_START) || IN(PRE_START))
1120
1141
  {
1121
- rb_str_cat(parser->output, space, sizeof(space) - 1);
1142
+ str_append(parser->output, space, sizeof(space) - 1);
1122
1143
  break;
1123
1144
  }
1124
1145
  else if (IN(BLOCKQUOTE_START))
1125
1146
  {
1126
1147
  // this kind of nesting not allowed (to avoid user confusion)
1127
- _Wikitext_pop_excess_elements(parser);
1128
- _Wikitext_start_para_if_necessary(parser);
1129
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
1130
- rb_str_cat(i, space, sizeof(space) - 1);
1148
+ wiki_pop_excess_elements(parser);
1149
+ wiki_start_para_if_necessary(parser);
1150
+ output = parser->capture ? parser->capture : parser->output;
1151
+ str_append(output, space, sizeof(space) - 1);
1131
1152
  break;
1132
1153
  }
1133
1154
 
@@ -1139,15 +1160,15 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1139
1160
  {
1140
1161
  // must pop (reduce nesting level)
1141
1162
  for (i = j - i; i > 0; i--)
1142
- _Wikitext_pop_from_stack_up_to(parser, Qnil, BLOCKQUOTE, Qtrue);
1163
+ wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, true);
1143
1164
  }
1144
1165
 
1145
1166
  if (!IN(PRE))
1146
1167
  {
1147
- parser->pending_crlf = Qfalse;
1148
- _Wikitext_pop_from_stack_up_to(parser, Qnil, BLOCKQUOTE, Qfalse);
1149
- _Wikitext_indent(parser);
1150
- rb_str_cat(parser->output, pre_start, sizeof(pre_start) - 1);
1168
+ parser->pending_crlf = false;
1169
+ wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, false);
1170
+ wiki_indent(parser);
1171
+ str_append(parser->output, pre_start, sizeof(pre_start) - 1);
1151
1172
  ary_push(parser->scope, PRE);
1152
1173
  }
1153
1174
  break;
@@ -1155,16 +1176,15 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1155
1176
  case PRE_START:
1156
1177
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1157
1178
  {
1158
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1159
- rb_str_cat(parser->output, escaped_pre_start, sizeof(escaped_pre_start) - 1);
1179
+ wiki_emit_pending_crlf_if_necessary(parser);
1180
+ str_append(parser->output, escaped_pre_start, sizeof(escaped_pre_start) - 1);
1160
1181
  }
1161
1182
  else if (IN(BLOCKQUOTE_START))
1162
1183
  {
1163
- _Wikitext_rollback_failed_link(parser); // if any
1164
- _Wikitext_rollback_failed_external_link(parser); // if any
1165
- _Wikitext_pop_from_stack_up_to(parser, Qnil, BLOCKQUOTE_START, Qfalse);
1166
- _Wikitext_indent(parser);
1167
- rb_str_cat(parser->output, pre_start, sizeof(pre_start) - 1);
1184
+ wiki_rollback_failed_link(parser); // if any
1185
+ wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE_START, false);
1186
+ wiki_indent(parser);
1187
+ str_append(parser->output, pre_start, sizeof(pre_start) - 1);
1168
1188
  ary_push(parser->scope, PRE_START);
1169
1189
  ary_push(parser->line, PRE_START);
1170
1190
  }
@@ -1172,29 +1192,27 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1172
1192
  {
1173
1193
  if (token->column_start == 1) // only allowed in first column
1174
1194
  {
1175
- _Wikitext_rollback_failed_link(parser); // if any
1176
- _Wikitext_rollback_failed_external_link(parser); // if any
1177
- _Wikitext_pop_all_from_stack(parser, Qnil);
1178
- _Wikitext_indent(parser);
1179
- rb_str_cat(parser->output, pre_start, sizeof(pre_start) - 1);
1195
+ wiki_rollback_failed_link(parser); // if any
1196
+ wiki_pop_all_from_stack(parser);
1197
+ wiki_indent(parser);
1198
+ str_append(parser->output, pre_start, sizeof(pre_start) - 1);
1180
1199
  ary_push(parser->scope, PRE_START);
1181
1200
  ary_push(parser->line, PRE_START);
1182
1201
  }
1183
1202
  else // PRE_START illegal here
1184
1203
  {
1185
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
1186
- _Wikitext_pop_excess_elements(parser);
1187
- _Wikitext_start_para_if_necessary(parser);
1188
- rb_str_cat(i, escaped_pre_start, sizeof(escaped_pre_start) - 1);
1204
+ output = parser->capture ? parser->capture : parser->output;
1205
+ wiki_pop_excess_elements(parser);
1206
+ wiki_start_para_if_necessary(parser);
1207
+ str_append(output, escaped_pre_start, sizeof(escaped_pre_start) - 1);
1189
1208
  }
1190
1209
  }
1191
1210
  else
1192
1211
  {
1193
- _Wikitext_rollback_failed_link(parser); // if any
1194
- _Wikitext_rollback_failed_external_link(parser); // if any
1195
- _Wikitext_pop_from_stack_up_to(parser, Qnil, P, Qtrue);
1196
- _Wikitext_indent(parser);
1197
- rb_str_cat(parser->output, pre_start, sizeof(pre_start) - 1);
1212
+ wiki_rollback_failed_link(parser); // if any
1213
+ wiki_pop_from_stack_up_to(parser, NULL, P, true);
1214
+ wiki_indent(parser);
1215
+ str_append(parser->output, pre_start, sizeof(pre_start) - 1);
1198
1216
  ary_push(parser->scope, PRE_START);
1199
1217
  ary_push(parser->line, PRE_START);
1200
1218
  }
@@ -1203,19 +1221,19 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1203
1221
  case PRE_END:
1204
1222
  if (IN(NO_WIKI_START) || IN(PRE))
1205
1223
  {
1206
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1207
- rb_str_cat(parser->output, escaped_pre_end, sizeof(escaped_pre_end) - 1);
1224
+ wiki_emit_pending_crlf_if_necessary(parser);
1225
+ str_append(parser->output, escaped_pre_end, sizeof(escaped_pre_end) - 1);
1208
1226
  }
1209
1227
  else
1210
1228
  {
1211
1229
  if (IN(PRE_START))
1212
- _Wikitext_pop_from_stack_up_to(parser, parser->output, PRE_START, Qtrue);
1230
+ wiki_pop_from_stack_up_to(parser, parser->output, PRE_START, true);
1213
1231
  else
1214
1232
  {
1215
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
1216
- _Wikitext_pop_excess_elements(parser);
1217
- _Wikitext_start_para_if_necessary(parser);
1218
- rb_str_cat(i, escaped_pre_end, sizeof(escaped_pre_end) - 1);
1233
+ output = parser->capture ? parser->capture : parser->output;
1234
+ wiki_pop_excess_elements(parser);
1235
+ wiki_start_para_if_necessary(parser);
1236
+ str_append(output, escaped_pre_end, sizeof(escaped_pre_end) - 1);
1219
1237
  }
1220
1238
  }
1221
1239
  break;
@@ -1223,14 +1241,14 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1223
1241
  case BLOCKQUOTE:
1224
1242
  if (IN(NO_WIKI_START) || IN(PRE_START))
1225
1243
  // no need to check for <pre>; can never appear inside it
1226
- rb_str_cat(parser->output, escaped_blockquote, TOKEN_LEN(token) + 3); // will either emit "&gt;" or "&gt; "
1244
+ str_append(parser->output, escaped_blockquote, TOKEN_LEN(token) + 3); // will either emit "&gt;" or "&gt; "
1227
1245
  else if (IN(BLOCKQUOTE_START))
1228
1246
  {
1229
1247
  // this kind of nesting not allowed (to avoid user confusion)
1230
- _Wikitext_pop_excess_elements(parser);
1231
- _Wikitext_start_para_if_necessary(parser);
1232
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
1233
- rb_str_cat(i, escaped_blockquote, TOKEN_LEN(token) + 3); // will either emit "&gt;" or "&gt; "
1248
+ wiki_pop_excess_elements(parser);
1249
+ wiki_start_para_if_necessary(parser);
1250
+ output = parser->capture ? parser->capture : parser->output;
1251
+ str_append(output, escaped_blockquote, TOKEN_LEN(token) + 3); // will either emit "&gt;" or "&gt; "
1234
1252
  break;
1235
1253
  }
1236
1254
  else
@@ -1252,12 +1270,12 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1252
1270
  if (i > j)
1253
1271
  {
1254
1272
  // must push (increase nesting level)
1255
- _Wikitext_pop_from_stack_up_to(parser, Qnil, BLOCKQUOTE, Qfalse);
1273
+ wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, false);
1256
1274
  for (i = i - j; i > 0; i--)
1257
1275
  {
1258
- _Wikitext_indent(parser);
1259
- rb_str_cat(parser->output, blockquote_start, sizeof(blockquote_start) - 1);
1260
- rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
1276
+ wiki_indent(parser);
1277
+ str_append(parser->output, blockquote_start, sizeof(blockquote_start) - 1);
1278
+ str_append_str(parser->output, parser->line_ending);
1261
1279
  ary_push(parser->scope, BLOCKQUOTE);
1262
1280
  }
1263
1281
  }
@@ -1265,7 +1283,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1265
1283
  {
1266
1284
  // must pop (reduce nesting level)
1267
1285
  for (i = j - i; i > 0; i--)
1268
- _Wikitext_pop_from_stack_up_to(parser, Qnil, BLOCKQUOTE, Qtrue);
1286
+ wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, true);
1269
1287
  }
1270
1288
 
1271
1289
  // jump to top of the loop to process token we scanned during lookahead
@@ -1276,18 +1294,17 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1276
1294
  case BLOCKQUOTE_START:
1277
1295
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1278
1296
  {
1279
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1280
- rb_str_cat(parser->output, escaped_blockquote_start, sizeof(escaped_blockquote_start) - 1);
1297
+ wiki_emit_pending_crlf_if_necessary(parser);
1298
+ str_append(parser->output, escaped_blockquote_start, sizeof(escaped_blockquote_start) - 1);
1281
1299
  }
1282
1300
  else if (IN(BLOCKQUOTE_START))
1283
1301
  {
1284
1302
  // nesting is fine here
1285
- _Wikitext_rollback_failed_link(parser); // if any
1286
- _Wikitext_rollback_failed_external_link(parser); // if any
1287
- _Wikitext_pop_from_stack_up_to(parser, Qnil, BLOCKQUOTE_START, Qfalse);
1288
- _Wikitext_indent(parser);
1289
- rb_str_cat(parser->output, blockquote_start, sizeof(blockquote_start) - 1);
1290
- rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
1303
+ wiki_rollback_failed_link(parser); // if any
1304
+ wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE_START, false);
1305
+ wiki_indent(parser);
1306
+ str_append(parser->output, blockquote_start, sizeof(blockquote_start) - 1);
1307
+ str_append_str(parser->output, parser->line_ending);
1291
1308
  ary_push(parser->scope, BLOCKQUOTE_START);
1292
1309
  ary_push(parser->line, BLOCKQUOTE_START);
1293
1310
  }
@@ -1295,32 +1312,30 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1295
1312
  {
1296
1313
  if (token->column_start == 1) // only allowed in first column
1297
1314
  {
1298
- _Wikitext_rollback_failed_link(parser); // if any
1299
- _Wikitext_rollback_failed_external_link(parser); // if any
1300
- _Wikitext_pop_all_from_stack(parser, Qnil);
1301
- _Wikitext_indent(parser);
1302
- rb_str_cat(parser->output, blockquote_start, sizeof(blockquote_start) - 1);
1303
- rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
1315
+ wiki_rollback_failed_link(parser); // if any
1316
+ wiki_pop_all_from_stack(parser);
1317
+ wiki_indent(parser);
1318
+ str_append(parser->output, blockquote_start, sizeof(blockquote_start) - 1);
1319
+ str_append_str(parser->output, parser->line_ending);
1304
1320
  ary_push(parser->scope, BLOCKQUOTE_START);
1305
1321
  ary_push(parser->line, BLOCKQUOTE_START);
1306
1322
  }
1307
1323
  else // BLOCKQUOTE_START illegal here
1308
1324
  {
1309
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
1310
- _Wikitext_pop_excess_elements(parser);
1311
- _Wikitext_start_para_if_necessary(parser);
1312
- rb_str_cat(i, escaped_blockquote_start, sizeof(escaped_blockquote_start) - 1);
1325
+ output = parser->capture ? parser->capture : parser->output;
1326
+ wiki_pop_excess_elements(parser);
1327
+ wiki_start_para_if_necessary(parser);
1328
+ str_append(output, escaped_blockquote_start, sizeof(escaped_blockquote_start) - 1);
1313
1329
  }
1314
1330
  }
1315
1331
  else
1316
1332
  {
1317
1333
  // would be nice to eliminate the repetition here but it's probably the clearest way
1318
- _Wikitext_rollback_failed_link(parser); // if any
1319
- _Wikitext_rollback_failed_external_link(parser); // if any
1320
- _Wikitext_pop_from_stack_up_to(parser, Qnil, P, Qtrue);
1321
- _Wikitext_indent(parser);
1322
- rb_str_cat(parser->output, blockquote_start, sizeof(blockquote_start) - 1);
1323
- rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
1334
+ wiki_rollback_failed_link(parser); // if any
1335
+ wiki_pop_from_stack_up_to(parser, NULL, P, true);
1336
+ wiki_indent(parser);
1337
+ str_append(parser->output, blockquote_start, sizeof(blockquote_start) - 1);
1338
+ str_append_str(parser->output, parser->line_ending);
1324
1339
  ary_push(parser->scope, BLOCKQUOTE_START);
1325
1340
  ary_push(parser->line, BLOCKQUOTE_START);
1326
1341
  }
@@ -1329,19 +1344,19 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1329
1344
  case BLOCKQUOTE_END:
1330
1345
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1331
1346
  {
1332
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1333
- rb_str_cat(parser->output, escaped_blockquote_end, sizeof(escaped_blockquote_end) - 1);
1347
+ wiki_emit_pending_crlf_if_necessary(parser);
1348
+ str_append(parser->output, escaped_blockquote_end, sizeof(escaped_blockquote_end) - 1);
1334
1349
  }
1335
1350
  else
1336
1351
  {
1337
1352
  if (IN(BLOCKQUOTE_START))
1338
- _Wikitext_pop_from_stack_up_to(parser, parser->output, BLOCKQUOTE_START, Qtrue);
1353
+ wiki_pop_from_stack_up_to(parser, parser->output, BLOCKQUOTE_START, true);
1339
1354
  else
1340
1355
  {
1341
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
1342
- _Wikitext_pop_excess_elements(parser);
1343
- _Wikitext_start_para_if_necessary(parser);
1344
- rb_str_cat(i, escaped_blockquote_end, sizeof(escaped_blockquote_end) - 1);
1356
+ output = parser->capture ? parser->capture : parser->output;
1357
+ wiki_pop_excess_elements(parser);
1358
+ wiki_start_para_if_necessary(parser);
1359
+ str_append(output, escaped_blockquote_end, sizeof(escaped_blockquote_end) - 1);
1345
1360
  }
1346
1361
  }
1347
1362
  break;
@@ -1349,13 +1364,13 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1349
1364
  case NO_WIKI_START:
1350
1365
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1351
1366
  {
1352
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1353
- rb_str_cat(parser->output, escaped_no_wiki_start, sizeof(escaped_no_wiki_start) - 1);
1367
+ wiki_emit_pending_crlf_if_necessary(parser);
1368
+ str_append(parser->output, escaped_no_wiki_start, sizeof(escaped_no_wiki_start) - 1);
1354
1369
  }
1355
1370
  else
1356
1371
  {
1357
- _Wikitext_pop_excess_elements(parser);
1358
- _Wikitext_start_para_if_necessary(parser);
1372
+ wiki_pop_excess_elements(parser);
1373
+ wiki_start_para_if_necessary(parser);
1359
1374
  ary_push(parser->scope, NO_WIKI_START);
1360
1375
  ary_push(parser->line, NO_WIKI_START);
1361
1376
  }
@@ -1364,25 +1379,25 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1364
1379
  case NO_WIKI_END:
1365
1380
  if (IN(NO_WIKI_START))
1366
1381
  // <nowiki> should always only ever be the last item in the stack, but use the helper routine just in case
1367
- _Wikitext_pop_from_stack_up_to(parser, Qnil, NO_WIKI_START, Qtrue);
1382
+ wiki_pop_from_stack_up_to(parser, NULL, NO_WIKI_START, true);
1368
1383
  else
1369
1384
  {
1370
- _Wikitext_pop_excess_elements(parser);
1371
- _Wikitext_start_para_if_necessary(parser);
1372
- rb_str_cat(parser->output, escaped_no_wiki_end, sizeof(escaped_no_wiki_end) - 1);
1385
+ wiki_pop_excess_elements(parser);
1386
+ wiki_start_para_if_necessary(parser);
1387
+ str_append(parser->output, escaped_no_wiki_end, sizeof(escaped_no_wiki_end) - 1);
1373
1388
  }
1374
1389
  break;
1375
1390
 
1376
1391
  case STRONG_EM:
1377
1392
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1378
1393
  {
1379
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1380
- rb_str_cat(parser->output, literal_strong_em, sizeof(literal_strong_em) - 1);
1394
+ wiki_emit_pending_crlf_if_necessary(parser);
1395
+ str_append(parser->output, literal_strong_em, sizeof(literal_strong_em) - 1);
1381
1396
  break;
1382
1397
  }
1383
1398
 
1384
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
1385
- _Wikitext_pop_excess_elements(parser);
1399
+ output = parser->capture ? parser->capture : parser->output;
1400
+ wiki_pop_excess_elements(parser);
1386
1401
 
1387
1402
  // if you've seen STRONG/STRONG_START or EM/EM_START, must close them in the reverse order that you saw them!
1388
1403
  // otherwise, must open them
@@ -1394,12 +1409,12 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1394
1409
  int val = ary_entry(parser->scope, j);
1395
1410
  if (val == STRONG || val == STRONG_START)
1396
1411
  {
1397
- rb_str_cat(i, strong_end, sizeof(strong_end) - 1);
1412
+ str_append(output, strong_end, sizeof(strong_end) - 1);
1398
1413
  remove_strong = j;
1399
1414
  }
1400
1415
  else if (val == EM || val == EM_START)
1401
1416
  {
1402
- rb_str_cat(i, em_end, sizeof(em_end) - 1);
1417
+ str_append(output, em_end, sizeof(em_end) - 1);
1403
1418
  remove_em = j;
1404
1419
  }
1405
1420
  }
@@ -1411,7 +1426,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1411
1426
  ary_pop(parser->scope);
1412
1427
  else // there was no em to remove!, so consider this an opening em tag
1413
1428
  {
1414
- rb_str_cat(i, em_start, sizeof(em_start) - 1);
1429
+ str_append(output, em_start, sizeof(em_start) - 1);
1415
1430
  ary_push(parser->scope, EM);
1416
1431
  ary_push(parser->line, EM);
1417
1432
  }
@@ -1423,15 +1438,15 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1423
1438
  ary_pop(parser->scope);
1424
1439
  else // there was no strong to remove!, so consider this an opening strong tag
1425
1440
  {
1426
- rb_str_cat(i, strong_start, sizeof(strong_start) - 1);
1441
+ str_append(output, strong_start, sizeof(strong_start) - 1);
1427
1442
  ary_push(parser->scope, STRONG);
1428
1443
  ary_push(parser->line, STRONG);
1429
1444
  }
1430
1445
  }
1431
1446
  else // no strong or em to remove, so this must be a new opening of both
1432
1447
  {
1433
- _Wikitext_start_para_if_necessary(parser);
1434
- rb_str_cat(i, strong_em_start, sizeof(strong_em_start) - 1);
1448
+ wiki_start_para_if_necessary(parser);
1449
+ str_append(output, strong_em_start, sizeof(strong_em_start) - 1);
1435
1450
  ary_push(parser->scope, STRONG);
1436
1451
  ary_push(parser->line, STRONG);
1437
1452
  ary_push(parser->scope, EM);
@@ -1442,24 +1457,24 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1442
1457
  case STRONG:
1443
1458
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1444
1459
  {
1445
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1446
- rb_str_cat(parser->output, literal_strong, sizeof(literal_strong) - 1);
1460
+ wiki_emit_pending_crlf_if_necessary(parser);
1461
+ str_append(parser->output, literal_strong, sizeof(literal_strong) - 1);
1447
1462
  }
1448
1463
  else
1449
1464
  {
1450
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
1465
+ output = parser->capture ? parser->capture : parser->output;
1451
1466
  if (IN(STRONG_START))
1452
1467
  // already in span started with <strong>, no choice but to emit this literally
1453
- rb_str_cat(parser->output, literal_strong, sizeof(literal_strong) - 1);
1468
+ str_append(output, literal_strong, sizeof(literal_strong) - 1);
1454
1469
  else if (IN(STRONG))
1455
1470
  // STRONG already seen, this is a closing tag
1456
- _Wikitext_pop_from_stack_up_to(parser, i, STRONG, Qtrue);
1471
+ wiki_pop_from_stack_up_to(parser, output, STRONG, true);
1457
1472
  else
1458
1473
  {
1459
1474
  // this is a new opening
1460
- _Wikitext_pop_excess_elements(parser);
1461
- _Wikitext_start_para_if_necessary(parser);
1462
- rb_str_cat(i, strong_start, sizeof(strong_start) - 1);
1475
+ wiki_pop_excess_elements(parser);
1476
+ wiki_start_para_if_necessary(parser);
1477
+ str_append(output, strong_start, sizeof(strong_start) - 1);
1463
1478
  ary_push(parser->scope, STRONG);
1464
1479
  ary_push(parser->line, STRONG);
1465
1480
  }
@@ -1469,19 +1484,19 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1469
1484
  case STRONG_START:
1470
1485
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1471
1486
  {
1472
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1473
- rb_str_cat(parser->output, escaped_strong_start, sizeof(escaped_strong_start) - 1);
1487
+ wiki_emit_pending_crlf_if_necessary(parser);
1488
+ str_append(parser->output, escaped_strong_start, sizeof(escaped_strong_start) - 1);
1474
1489
  }
1475
1490
  else
1476
1491
  {
1477
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
1492
+ output = parser->capture ? parser->capture : parser->output;
1478
1493
  if (IN(STRONG_START) || IN(STRONG))
1479
- rb_str_cat(parser->output, escaped_strong_start, sizeof(escaped_strong_start) - 1);
1494
+ str_append(output, escaped_strong_start, sizeof(escaped_strong_start) - 1);
1480
1495
  else
1481
1496
  {
1482
- _Wikitext_pop_excess_elements(parser);
1483
- _Wikitext_start_para_if_necessary(parser);
1484
- rb_str_cat(i, strong_start, sizeof(strong_start) - 1);
1497
+ wiki_pop_excess_elements(parser);
1498
+ wiki_start_para_if_necessary(parser);
1499
+ str_append(output, strong_start, sizeof(strong_start) - 1);
1485
1500
  ary_push(parser->scope, STRONG_START);
1486
1501
  ary_push(parser->line, STRONG_START);
1487
1502
  }
@@ -1491,20 +1506,20 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1491
1506
  case STRONG_END:
1492
1507
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1493
1508
  {
1494
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1495
- rb_str_cat(parser->output, escaped_strong_end, sizeof(escaped_strong_end) - 1);
1509
+ wiki_emit_pending_crlf_if_necessary(parser);
1510
+ str_append(parser->output, escaped_strong_end, sizeof(escaped_strong_end) - 1);
1496
1511
  }
1497
1512
  else
1498
1513
  {
1499
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
1514
+ output = parser->capture ? parser->capture : parser->output;
1500
1515
  if (IN(STRONG_START))
1501
- _Wikitext_pop_from_stack_up_to(parser, i, STRONG_START, Qtrue);
1516
+ wiki_pop_from_stack_up_to(parser, output, STRONG_START, true);
1502
1517
  else
1503
1518
  {
1504
1519
  // no STRONG_START in scope, so must interpret the STRONG_END without any special meaning
1505
- _Wikitext_pop_excess_elements(parser);
1506
- _Wikitext_start_para_if_necessary(parser);
1507
- rb_str_cat(i, escaped_strong_end, sizeof(escaped_strong_end) - 1);
1520
+ wiki_pop_excess_elements(parser);
1521
+ wiki_start_para_if_necessary(parser);
1522
+ str_append(output, escaped_strong_end, sizeof(escaped_strong_end) - 1);
1508
1523
  }
1509
1524
  }
1510
1525
  break;
@@ -1512,24 +1527,24 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1512
1527
  case EM:
1513
1528
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1514
1529
  {
1515
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1516
- rb_str_cat(parser->output, literal_em, sizeof(literal_em) - 1);
1530
+ wiki_emit_pending_crlf_if_necessary(parser);
1531
+ str_append(parser->output, literal_em, sizeof(literal_em) - 1);
1517
1532
  }
1518
1533
  else
1519
1534
  {
1520
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
1535
+ output = parser->capture ? parser->capture : parser->output;
1521
1536
  if (IN(EM_START))
1522
1537
  // already in span started with <em>, no choice but to emit this literally
1523
- rb_str_cat(parser->output, literal_em, sizeof(literal_em) - 1);
1538
+ str_append(output, literal_em, sizeof(literal_em) - 1);
1524
1539
  else if (IN(EM))
1525
1540
  // EM already seen, this is a closing tag
1526
- _Wikitext_pop_from_stack_up_to(parser, i, EM, Qtrue);
1541
+ wiki_pop_from_stack_up_to(parser, output, EM, true);
1527
1542
  else
1528
1543
  {
1529
1544
  // this is a new opening
1530
- _Wikitext_pop_excess_elements(parser);
1531
- _Wikitext_start_para_if_necessary(parser);
1532
- rb_str_cat(i, em_start, sizeof(em_start) - 1);
1545
+ wiki_pop_excess_elements(parser);
1546
+ wiki_start_para_if_necessary(parser);
1547
+ str_append(output, em_start, sizeof(em_start) - 1);
1533
1548
  ary_push(parser->scope, EM);
1534
1549
  ary_push(parser->line, EM);
1535
1550
  }
@@ -1539,19 +1554,19 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1539
1554
  case EM_START:
1540
1555
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1541
1556
  {
1542
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1543
- rb_str_cat(parser->output, escaped_em_start, sizeof(escaped_em_start) - 1);
1557
+ wiki_emit_pending_crlf_if_necessary(parser);
1558
+ str_append(parser->output, escaped_em_start, sizeof(escaped_em_start) - 1);
1544
1559
  }
1545
1560
  else
1546
1561
  {
1547
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
1562
+ output = parser->capture ? parser->capture : parser->output;
1548
1563
  if (IN(EM_START) || IN(EM))
1549
- rb_str_cat(i, escaped_em_start, sizeof(escaped_em_start) - 1);
1564
+ str_append(output, escaped_em_start, sizeof(escaped_em_start) - 1);
1550
1565
  else
1551
1566
  {
1552
- _Wikitext_pop_excess_elements(parser);
1553
- _Wikitext_start_para_if_necessary(parser);
1554
- rb_str_cat(i, em_start, sizeof(em_start) - 1);
1567
+ wiki_pop_excess_elements(parser);
1568
+ wiki_start_para_if_necessary(parser);
1569
+ str_append(output, em_start, sizeof(em_start) - 1);
1555
1570
  ary_push(parser->scope, EM_START);
1556
1571
  ary_push(parser->line, EM_START);
1557
1572
  }
@@ -1561,20 +1576,20 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1561
1576
  case EM_END:
1562
1577
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1563
1578
  {
1564
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1565
- rb_str_cat(parser->output, escaped_em_end, sizeof(escaped_em_end) - 1);
1579
+ wiki_emit_pending_crlf_if_necessary(parser);
1580
+ str_append(parser->output, escaped_em_end, sizeof(escaped_em_end) - 1);
1566
1581
  }
1567
1582
  else
1568
1583
  {
1569
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
1584
+ output = parser->capture ? parser->capture : parser->output;
1570
1585
  if (IN(EM_START))
1571
- _Wikitext_pop_from_stack_up_to(parser, i, EM_START, Qtrue);
1586
+ wiki_pop_from_stack_up_to(parser, output, EM_START, true);
1572
1587
  else
1573
1588
  {
1574
1589
  // no EM_START in scope, so must interpret the TT_END without any special meaning
1575
- _Wikitext_pop_excess_elements(parser);
1576
- _Wikitext_start_para_if_necessary(parser);
1577
- rb_str_cat(i, escaped_em_end, sizeof(escaped_em_end) - 1);
1590
+ wiki_pop_excess_elements(parser);
1591
+ wiki_start_para_if_necessary(parser);
1592
+ str_append(output, escaped_em_end, sizeof(escaped_em_end) - 1);
1578
1593
  }
1579
1594
  }
1580
1595
  break;
@@ -1582,24 +1597,24 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1582
1597
  case TT:
1583
1598
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1584
1599
  {
1585
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1586
- rb_str_cat(parser->output, backtick, sizeof(backtick) - 1);
1600
+ wiki_emit_pending_crlf_if_necessary(parser);
1601
+ str_append(parser->output, backtick, sizeof(backtick) - 1);
1587
1602
  }
1588
1603
  else
1589
1604
  {
1590
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
1605
+ output = parser->capture ? parser->capture : parser->output;
1591
1606
  if (IN(TT_START))
1592
1607
  // already in span started with <tt>, no choice but to emit this literally
1593
- rb_str_cat(parser->output, backtick, sizeof(backtick) - 1);
1608
+ str_append(output, backtick, sizeof(backtick) - 1);
1594
1609
  else if (IN(TT))
1595
1610
  // TT (`) already seen, this is a closing tag
1596
- _Wikitext_pop_from_stack_up_to(parser, i, TT, Qtrue);
1611
+ wiki_pop_from_stack_up_to(parser, output, TT, true);
1597
1612
  else
1598
1613
  {
1599
1614
  // this is a new opening
1600
- _Wikitext_pop_excess_elements(parser);
1601
- _Wikitext_start_para_if_necessary(parser);
1602
- rb_str_cat(i, tt_start, sizeof(tt_start) - 1);
1615
+ wiki_pop_excess_elements(parser);
1616
+ wiki_start_para_if_necessary(parser);
1617
+ str_append(output, tt_start, sizeof(tt_start) - 1);
1603
1618
  ary_push(parser->scope, TT);
1604
1619
  ary_push(parser->line, TT);
1605
1620
  }
@@ -1609,19 +1624,19 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1609
1624
  case TT_START:
1610
1625
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1611
1626
  {
1612
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1613
- rb_str_cat(parser->output, escaped_tt_start, sizeof(escaped_tt_start) - 1);
1627
+ wiki_emit_pending_crlf_if_necessary(parser);
1628
+ str_append(parser->output, escaped_tt_start, sizeof(escaped_tt_start) - 1);
1614
1629
  }
1615
1630
  else
1616
1631
  {
1617
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
1632
+ output = parser->capture ? parser->capture : parser->output;
1618
1633
  if (IN(TT_START) || IN(TT))
1619
- rb_str_cat(i, escaped_tt_start, sizeof(escaped_tt_start) - 1);
1634
+ str_append(output, escaped_tt_start, sizeof(escaped_tt_start) - 1);
1620
1635
  else
1621
1636
  {
1622
- _Wikitext_pop_excess_elements(parser);
1623
- _Wikitext_start_para_if_necessary(parser);
1624
- rb_str_cat(i, tt_start, sizeof(tt_start) - 1);
1637
+ wiki_pop_excess_elements(parser);
1638
+ wiki_start_para_if_necessary(parser);
1639
+ str_append(output, tt_start, sizeof(tt_start) - 1);
1625
1640
  ary_push(parser->scope, TT_START);
1626
1641
  ary_push(parser->line, TT_START);
1627
1642
  }
@@ -1631,20 +1646,20 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1631
1646
  case TT_END:
1632
1647
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1633
1648
  {
1634
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1635
- rb_str_cat(parser->output, escaped_tt_end, sizeof(escaped_tt_end) - 1);
1649
+ wiki_emit_pending_crlf_if_necessary(parser);
1650
+ str_append(parser->output, escaped_tt_end, sizeof(escaped_tt_end) - 1);
1636
1651
  }
1637
1652
  else
1638
1653
  {
1639
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
1654
+ output = parser->capture ? parser->capture : parser->output;
1640
1655
  if (IN(TT_START))
1641
- _Wikitext_pop_from_stack_up_to(parser, i, TT_START, Qtrue);
1656
+ wiki_pop_from_stack_up_to(parser, output, TT_START, true);
1642
1657
  else
1643
1658
  {
1644
1659
  // no TT_START in scope, so must interpret the TT_END without any special meaning
1645
- _Wikitext_pop_excess_elements(parser);
1646
- _Wikitext_start_para_if_necessary(parser);
1647
- rb_str_cat(i, escaped_tt_end, sizeof(escaped_tt_end) - 1);
1660
+ wiki_pop_excess_elements(parser);
1661
+ wiki_start_para_if_necessary(parser);
1662
+ str_append(output, escaped_tt_end, sizeof(escaped_tt_end) - 1);
1648
1663
  }
1649
1664
  }
1650
1665
  break;
@@ -1654,7 +1669,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1654
1669
  if (IN(NO_WIKI_START) || IN(PRE_START))
1655
1670
  {
1656
1671
  // no need to check for PRE; can never appear inside it
1657
- rb_str_cat(parser->output, token->start, TOKEN_LEN(token));
1672
+ str_append(parser->output, token->start, TOKEN_LEN(token));
1658
1673
  break;
1659
1674
  }
1660
1675
 
@@ -1684,7 +1699,8 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1684
1699
  // want to compare line with scope but can only do so if scope has enough items on it
1685
1700
  if (j >= i)
1686
1701
  {
1687
- if (ary_entry(parser->scope, i + bq_count - 2) == type && ary_entry(parser->scope, i + bq_count - 1) == LI)
1702
+ if (ary_entry(parser->scope, i + bq_count - 2) == type &&
1703
+ ary_entry(parser->scope, i + bq_count - 1) == LI)
1688
1704
  {
1689
1705
  // line and scope match at this point: do nothing yet
1690
1706
  }
@@ -1693,7 +1709,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1693
1709
  // item just pushed onto line does not match corresponding slot of scope!
1694
1710
  for (; j >= i - 2; j--)
1695
1711
  // must pop back before emitting
1696
- _Wikitext_pop_from_stack(parser, Qnil);
1712
+ wiki_pop_from_stack(parser, NULL);
1697
1713
 
1698
1714
  // will emit UL or OL, then LI
1699
1715
  break;
@@ -1707,13 +1723,13 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1707
1723
  // not a OL or UL token!
1708
1724
  if (j == i)
1709
1725
  // must close existing LI and re-open new one
1710
- _Wikitext_pop_from_stack(parser, Qnil);
1726
+ wiki_pop_from_stack(parser, NULL);
1711
1727
  else if (j > i)
1712
1728
  {
1713
1729
  // item just pushed onto line does not match corresponding slot of scope!
1714
1730
  for (; j >= i; j--)
1715
1731
  // must pop back before emitting
1716
- _Wikitext_pop_from_stack(parser, Qnil);
1732
+ wiki_pop_from_stack(parser, NULL);
1717
1733
  }
1718
1734
  break;
1719
1735
  }
@@ -1727,33 +1743,33 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1727
1743
  if (j > 0 && ary_entry(parser->scope, -1) == LI)
1728
1744
  {
1729
1745
  // so we should precede it with a CRLF, and indicate that it's a nested list
1730
- rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
1746
+ str_append(parser->output, parser->line_ending->ptr, parser->line_ending->len);
1731
1747
  ary_push(parser->scope, NESTED_LIST);
1732
1748
  }
1733
1749
  else
1734
1750
  {
1735
1751
  // this is a new list
1736
1752
  if (IN(BLOCKQUOTE_START))
1737
- _Wikitext_pop_from_stack_up_to(parser, Qnil, BLOCKQUOTE_START, Qfalse);
1753
+ wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE_START, false);
1738
1754
  else
1739
- _Wikitext_pop_from_stack_up_to(parser, Qnil, BLOCKQUOTE, Qfalse);
1755
+ wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, false);
1740
1756
  }
1741
1757
 
1742
1758
  // emit
1743
- _Wikitext_indent(parser);
1759
+ wiki_indent(parser);
1744
1760
  if (type == OL)
1745
- rb_str_cat(parser->output, ol_start, sizeof(ol_start) - 1);
1761
+ str_append(parser->output, ol_start, sizeof(ol_start) - 1);
1746
1762
  else if (type == UL)
1747
- rb_str_cat(parser->output, ul_start, sizeof(ul_start) - 1);
1763
+ str_append(parser->output, ul_start, sizeof(ul_start) - 1);
1748
1764
  ary_push(parser->scope, type);
1749
- rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
1765
+ str_append(parser->output, parser->line_ending->ptr, parser->line_ending->len);
1750
1766
  }
1751
1767
  else if (type == SPACE)
1752
1768
  // silently throw away the optional SPACE token after final list marker
1753
1769
  token = NULL;
1754
1770
 
1755
- _Wikitext_indent(parser);
1756
- rb_str_cat(parser->output, li_start, sizeof(li_start) - 1);
1771
+ wiki_indent(parser);
1772
+ str_append(parser->output, li_start, sizeof(li_start) - 1);
1757
1773
  ary_push(parser->scope, LI);
1758
1774
 
1759
1775
  // any subsequent UL or OL tokens on this line are syntax errors and must be emitted literally
@@ -1763,7 +1779,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1763
1779
  while (k++, NEXT_TOKEN(), (type = token->type))
1764
1780
  {
1765
1781
  if (type == OL || type == UL)
1766
- rb_str_cat(parser->output, token->start, TOKEN_LEN(token));
1782
+ str_append(parser->output, token->start, TOKEN_LEN(token));
1767
1783
  else if (type == SPACE && k == 1)
1768
1784
  {
1769
1785
  // silently throw away the optional SPACE token after final list marker
@@ -1787,15 +1803,15 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1787
1803
  if (IN(NO_WIKI_START) || IN(PRE_START))
1788
1804
  {
1789
1805
  // no need to check for PRE; can never appear inside it
1790
- rb_str_cat(parser->output, token->start, TOKEN_LEN(token));
1806
+ str_append(parser->output, token->start, TOKEN_LEN(token));
1791
1807
  break;
1792
1808
  }
1793
1809
 
1794
1810
  // pop up to but not including the last BLOCKQUOTE on the scope stack
1795
1811
  if (IN(BLOCKQUOTE_START))
1796
- _Wikitext_pop_from_stack_up_to(parser, Qnil, BLOCKQUOTE_START, Qfalse);
1812
+ wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE_START, false);
1797
1813
  else
1798
- _Wikitext_pop_from_stack_up_to(parser, Qnil, BLOCKQUOTE, Qfalse);
1814
+ wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, false);
1799
1815
 
1800
1816
  // count number of BLOCKQUOTE tokens in line buffer and in scope stack
1801
1817
  ary_push(parser->line, type);
@@ -1807,7 +1823,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1807
1823
  {
1808
1824
  // must pop (reduce nesting level)
1809
1825
  for (i = j - i; i > 0; i--)
1810
- _Wikitext_pop_from_stack_up_to(parser, Qnil, BLOCKQUOTE, Qtrue);
1826
+ wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, true);
1811
1827
  }
1812
1828
 
1813
1829
  // discard any whitespace here (so that "== foo ==" will be translated to "<h2>foo</h2>" rather than "<h2> foo </h2")
@@ -1815,7 +1831,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1815
1831
  ; // discard
1816
1832
 
1817
1833
  ary_push(parser->scope, type);
1818
- _Wikitext_indent(parser);
1834
+ wiki_indent(parser);
1819
1835
 
1820
1836
  // take base_heading_level into account
1821
1837
  type += base_heading_level;
@@ -1824,125 +1840,45 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1824
1840
 
1825
1841
  // rather than repeat all that code for each kind of heading, share it and use a conditional here
1826
1842
  if (type == H6_START)
1827
- rb_str_cat(parser->output, h6_start, sizeof(h6_start) - 1);
1843
+ str_append(parser->output, h6_start, sizeof(h6_start) - 1);
1828
1844
  else if (type == H5_START)
1829
- rb_str_cat(parser->output, h5_start, sizeof(h5_start) - 1);
1845
+ str_append(parser->output, h5_start, sizeof(h5_start) - 1);
1830
1846
  else if (type == H4_START)
1831
- rb_str_cat(parser->output, h4_start, sizeof(h4_start) - 1);
1847
+ str_append(parser->output, h4_start, sizeof(h4_start) - 1);
1832
1848
  else if (type == H3_START)
1833
- rb_str_cat(parser->output, h3_start, sizeof(h3_start) - 1);
1849
+ str_append(parser->output, h3_start, sizeof(h3_start) - 1);
1834
1850
  else if (type == H2_START)
1835
- rb_str_cat(parser->output, h2_start, sizeof(h2_start) - 1);
1851
+ str_append(parser->output, h2_start, sizeof(h2_start) - 1);
1836
1852
  else if (type == H1_START)
1837
- rb_str_cat(parser->output, h1_start, sizeof(h1_start) - 1);
1853
+ str_append(parser->output, h1_start, sizeof(h1_start) - 1);
1838
1854
 
1839
1855
  // jump to top of the loop to process token we scanned during lookahead
1840
1856
  continue;
1841
1857
 
1842
1858
  case H6_END:
1843
- if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1844
- {
1845
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1846
- rb_str_cat(parser->output, literal_h6, sizeof(literal_h6) - 1);
1847
- }
1848
- else
1849
- {
1850
- _Wikitext_rollback_failed_external_link(parser); // if any
1851
- if (!IN(H6_START))
1852
- {
1853
- // literal output only if not in h6 scope (we stay silent in that case)
1854
- _Wikitext_start_para_if_necessary(parser);
1855
- rb_str_cat(parser->output, literal_h6, sizeof(literal_h6) - 1);
1856
- }
1857
- }
1858
- break;
1859
-
1860
1859
  case H5_END:
1861
- if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1862
- {
1863
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1864
- rb_str_cat(parser->output, literal_h5, sizeof(literal_h5) - 1);
1865
- }
1866
- else
1867
- {
1868
- _Wikitext_rollback_failed_external_link(parser); // if any
1869
- if (!IN(H5_START))
1870
- {
1871
- // literal output only if not in h5 scope (we stay silent in that case)
1872
- _Wikitext_start_para_if_necessary(parser);
1873
- rb_str_cat(parser->output, literal_h5, sizeof(literal_h5) - 1);
1874
- }
1875
- }
1876
- break;
1877
-
1878
1860
  case H4_END:
1879
- if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1880
- {
1881
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1882
- rb_str_cat(parser->output, literal_h4, sizeof(literal_h4) - 1);
1883
- }
1884
- else
1885
- {
1886
- _Wikitext_rollback_failed_external_link(parser); // if any
1887
- if (!IN(H4_START))
1888
- {
1889
- // literal output only if not in h4 scope (we stay silent in that case)
1890
- _Wikitext_start_para_if_necessary(parser);
1891
- rb_str_cat(parser->output, literal_h4, sizeof(literal_h4) - 1);
1892
- }
1893
- }
1894
- break;
1895
-
1896
1861
  case H3_END:
1897
- if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1898
- {
1899
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1900
- rb_str_cat(parser->output, literal_h3, sizeof(literal_h3) - 1);
1901
- }
1902
- else
1903
- {
1904
- _Wikitext_rollback_failed_external_link(parser); // if any
1905
- if (!IN(H3_START))
1906
- {
1907
- // literal output only if not in h3 scope (we stay silent in that case)
1908
- _Wikitext_start_para_if_necessary(parser);
1909
- rb_str_cat(parser->output, literal_h3, sizeof(literal_h3) - 1);
1910
- }
1911
- }
1912
- break;
1913
-
1914
1862
  case H2_END:
1915
- if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1916
- {
1917
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1918
- rb_str_cat(parser->output, literal_h2, sizeof(literal_h2) - 1);
1919
- }
1920
- else
1921
- {
1922
- _Wikitext_rollback_failed_external_link(parser); // if any
1923
- if (!IN(H2_START))
1924
- {
1925
- // literal output only if not in h2 scope (we stay silent in that case)
1926
- _Wikitext_start_para_if_necessary(parser);
1927
- rb_str_cat(parser->output, literal_h2, sizeof(literal_h2) - 1);
1928
- }
1929
- }
1930
- break;
1931
-
1932
1863
  case H1_END:
1933
1864
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1934
1865
  {
1935
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1936
- rb_str_cat(parser->output, literal_h1, sizeof(literal_h1) - 1);
1866
+ wiki_emit_pending_crlf_if_necessary(parser);
1867
+ str_append(parser->output, token->start, TOKEN_LEN(token));
1937
1868
  }
1938
1869
  else
1939
1870
  {
1940
- _Wikitext_rollback_failed_external_link(parser); // if any
1941
- if (!IN(H1_START))
1871
+ wiki_rollback_failed_external_link(parser); // if any
1872
+ if ((type == H6_END && !IN(H6_START)) ||
1873
+ (type == H5_END && !IN(H5_START)) ||
1874
+ (type == H4_END && !IN(H4_START)) ||
1875
+ (type == H3_END && !IN(H3_START)) ||
1876
+ (type == H2_END && !IN(H2_START)) ||
1877
+ (type == H1_END && !IN(H1_START)))
1942
1878
  {
1943
- // literal output only if not in h1 scope (we stay silent in that case)
1944
- _Wikitext_start_para_if_necessary(parser);
1945
- rb_str_cat(parser->output, literal_h1, sizeof(literal_h1) - 1);
1879
+ // literal output only if not in appropriate scope (we stay silent in that case)
1880
+ wiki_start_para_if_necessary(parser);
1881
+ str_append(parser->output, token->start, TOKEN_LEN(token));
1946
1882
  }
1947
1883
  }
1948
1884
  break;
@@ -1950,18 +1886,16 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1950
1886
  case MAIL:
1951
1887
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
1952
1888
  {
1953
- _Wikitext_emit_pending_crlf_if_necessary(parser);
1954
- rb_str_cat(parser->output, token->start, TOKEN_LEN(token));
1889
+ wiki_emit_pending_crlf_if_necessary(parser);
1890
+ str_append(parser->output, token->start, TOKEN_LEN(token));
1955
1891
  }
1956
1892
  else
1957
1893
  {
1958
- // in plain scope, will turn into autolink (with appropriate, user-configurable CSS)
1959
- _Wikitext_pop_excess_elements(parser);
1960
- _Wikitext_start_para_if_necessary(parser);
1961
- i = TOKEN_TEXT(token);
1962
- if (parser->autolink == Qtrue)
1963
- i = _Wikitext_hyperlink(parser, rb_str_new2("mailto:"), i, i, mailto_class);
1964
- rb_str_append(parser->output, i);
1894
+ wiki_pop_excess_elements(parser);
1895
+ wiki_start_para_if_necessary(parser);
1896
+ token_str->ptr = token->start;
1897
+ token_str->len = TOKEN_LEN(token);
1898
+ wiki_append_hyperlink(parser, rb_str_new2("mailto:"), token_str, NULL, mailto_class, true);
1965
1899
  }
1966
1900
  break;
1967
1901
 
@@ -1969,110 +1903,93 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1969
1903
  if (IN(NO_WIKI_START))
1970
1904
  // user can temporarily suppress autolinking by using <nowiki></nowiki>
1971
1905
  // note that unlike MediaWiki, we do allow autolinking inside PRE blocks
1972
- rb_str_cat(parser->output, token->start, TOKEN_LEN(token));
1906
+ str_append(parser->output, token->start, TOKEN_LEN(token));
1973
1907
  else if (IN(LINK_START))
1974
1908
  {
1975
1909
  // if the URI were allowed it would have been handled already in LINK_START
1976
- _Wikitext_rollback_failed_link(parser);
1977
- i = TOKEN_TEXT(token);
1978
- if (parser->autolink == Qtrue)
1979
- i = _Wikitext_hyperlink(parser, Qnil, i, i, parser->external_link_class); // link target, link text
1980
- rb_str_append(parser->output, i);
1910
+ wiki_rollback_failed_internal_link(parser);
1911
+ token_str->ptr = token->start;
1912
+ token_str->len = TOKEN_LEN(token);
1913
+ wiki_append_hyperlink(parser, Qnil, token_str, NULL, parser->external_link_class, true);
1981
1914
  }
1982
1915
  else if (IN(EXT_LINK_START))
1983
1916
  {
1984
- if (NIL_P(parser->link_target))
1917
+ if (parser->link_target->len == 0)
1985
1918
  {
1986
1919
  // this must be our link target: look ahead to make sure we see the space we're expecting to see
1987
- i = TOKEN_TEXT(token);
1920
+ token_str->ptr = token->start;
1921
+ token_str->len = TOKEN_LEN(token);
1988
1922
  NEXT_TOKEN();
1989
1923
  if (token->type == SPACE)
1990
1924
  {
1991
1925
  ary_push(parser->scope, SPACE);
1992
- parser->link_target = i;
1993
- parser->link_text = rb_str_new2("");
1926
+ str_append_str(parser->link_target, token_str);
1927
+ str_clear(parser->link_text);
1994
1928
  parser->capture = parser->link_text;
1995
1929
  token = NULL; // silently consume space
1996
1930
  }
1997
1931
  else
1998
1932
  {
1999
1933
  // didn't see the space! this must be an error
2000
- _Wikitext_pop_from_stack(parser, Qnil);
2001
- _Wikitext_pop_excess_elements(parser);
2002
- _Wikitext_start_para_if_necessary(parser);
2003
- rb_str_cat(parser->output, ext_link_start, sizeof(ext_link_start) - 1);
2004
- if (parser->autolink == Qtrue)
2005
- i = _Wikitext_hyperlink(parser, Qnil, i, i, parser->external_link_class); // link target, link text
2006
- rb_str_append(parser->output, i);
1934
+ wiki_pop_from_stack(parser, NULL);
1935
+ wiki_pop_excess_elements(parser);
1936
+ wiki_start_para_if_necessary(parser);
1937
+ str_append(parser->output, ext_link_start, sizeof(ext_link_start) - 1);
1938
+ wiki_append_hyperlink(parser, Qnil, token_str, NULL, parser->external_link_class, true);
2007
1939
  }
2008
1940
  }
2009
1941
  else
2010
- {
2011
- if (NIL_P(parser->link_text))
2012
- // this must be the first part of our link text
2013
- parser->link_text = TOKEN_TEXT(token);
2014
- else
2015
- // add to existing link text
2016
- rb_str_cat(parser->link_text, token->start, TOKEN_LEN(token));
2017
- }
1942
+ str_append(parser->link_text, token->start, TOKEN_LEN(token));
2018
1943
  }
2019
1944
  else
2020
1945
  {
2021
- // in plain scope, will turn into autolink (with appropriate, user-configurable CSS)
2022
- _Wikitext_pop_excess_elements(parser);
2023
- _Wikitext_start_para_if_necessary(parser);
2024
- i = TOKEN_TEXT(token);
2025
- if (parser->autolink == Qtrue)
2026
- i = _Wikitext_hyperlink(parser, Qnil, i, i, parser->external_link_class); // link target, link text
2027
- rb_str_append(parser->output, i);
1946
+ wiki_pop_excess_elements(parser);
1947
+ wiki_start_para_if_necessary(parser);
1948
+ token_str->ptr = token->start;
1949
+ token_str->len = TOKEN_LEN(token);
1950
+ wiki_append_hyperlink(parser, Qnil, token_str, NULL, parser->external_link_class, true);
2028
1951
  }
2029
1952
  break;
2030
1953
 
2031
1954
  case PATH:
2032
1955
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
2033
- rb_str_cat(parser->output, token->start, TOKEN_LEN(token));
1956
+ str_append(parser->output, token->start, TOKEN_LEN(token));
2034
1957
  else if (IN(EXT_LINK_START))
2035
1958
  {
2036
- if (NIL_P(parser->link_target))
1959
+ if (parser->link_target->len == 0)
2037
1960
  {
2038
1961
  // this must be our link target: look ahead to make sure we see the space we're expecting to see
2039
- i = TOKEN_TEXT(token);
1962
+ token_str->ptr = token->start;
1963
+ token_str->len = TOKEN_LEN(token);
2040
1964
  NEXT_TOKEN();
2041
1965
  if (token->type == SPACE)
2042
1966
  {
2043
1967
  ary_push(parser->scope, PATH);
2044
1968
  ary_push(parser->scope, SPACE);
2045
- parser->link_target = i;
2046
- parser->link_text = rb_str_new2("");
1969
+ str_append_str(parser->link_target, token_str);
1970
+ str_clear(parser->link_text);
2047
1971
  parser->capture = parser->link_text;
2048
1972
  token = NULL; // silently consume space
2049
1973
  }
2050
1974
  else
2051
1975
  {
2052
1976
  // didn't see the space! this must be an error
2053
- _Wikitext_pop_from_stack(parser, Qnil);
2054
- _Wikitext_pop_excess_elements(parser);
2055
- _Wikitext_start_para_if_necessary(parser);
2056
- rb_str_cat(parser->output, ext_link_start, sizeof(ext_link_start) - 1);
2057
- rb_str_append(parser->output, i);
1977
+ wiki_pop_from_stack(parser, NULL);
1978
+ wiki_pop_excess_elements(parser);
1979
+ wiki_start_para_if_necessary(parser);
1980
+ str_append(parser->output, ext_link_start, sizeof(ext_link_start) - 1);
1981
+ str_append_str(parser->output, token_str);
2058
1982
  }
2059
1983
  }
2060
1984
  else
2061
- {
2062
- if (NIL_P(parser->link_text))
2063
- // this must be the first part of our link text
2064
- parser->link_text = TOKEN_TEXT(token);
2065
- else
2066
- // add to existing link text
2067
- rb_str_cat(parser->link_text, token->start, TOKEN_LEN(token));
2068
- }
1985
+ str_append(parser->link_text, token->start, TOKEN_LEN(token));
2069
1986
  }
2070
1987
  else
2071
1988
  {
2072
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
2073
- _Wikitext_pop_excess_elements(parser);
2074
- _Wikitext_start_para_if_necessary(parser);
2075
- rb_str_cat(i, token->start, TOKEN_LEN(token));
1989
+ output = parser->capture ? parser->capture : parser->output;
1990
+ wiki_pop_excess_elements(parser);
1991
+ wiki_start_para_if_necessary(parser);
1992
+ str_append(output, token->start, TOKEN_LEN(token));
2076
1993
  }
2077
1994
  break;
2078
1995
 
@@ -2099,20 +2016,20 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2099
2016
  // SPACE, SPECIAL_URI_CHARS, PRINTABLE, PATH, ALNUM, DEFAULT, QUOT and AMP
2100
2017
  // everything else will be rejected
2101
2018
  case LINK_START:
2102
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
2019
+ output = parser->capture ? parser->capture : parser->output;
2103
2020
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
2104
2021
  {
2105
- _Wikitext_emit_pending_crlf_if_necessary(parser);
2106
- rb_str_cat(i, link_start, sizeof(link_start) - 1);
2022
+ wiki_emit_pending_crlf_if_necessary(parser);
2023
+ str_append(output, link_start, sizeof(link_start) - 1);
2107
2024
  }
2108
2025
  else if (IN(EXT_LINK_START))
2109
2026
  // already in external link scope! (and in fact, must be capturing link_text right now)
2110
- rb_str_cat(i, link_start, sizeof(link_start) - 1);
2027
+ str_append(output, link_start, sizeof(link_start) - 1);
2111
2028
  else if (IN(LINK_START))
2112
2029
  {
2113
2030
  // already in internal link scope! this is a syntax error
2114
- _Wikitext_rollback_failed_link(parser);
2115
- rb_str_cat(parser->output, link_start, sizeof(link_start) - 1);
2031
+ wiki_rollback_failed_internal_link(parser);
2032
+ str_append(parser->output, link_start, sizeof(link_start) - 1);
2116
2033
  }
2117
2034
  else if (IN(SEPARATOR))
2118
2035
  {
@@ -2121,8 +2038,8 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2121
2038
  else // not in internal link scope yet
2122
2039
  {
2123
2040
  // will either emit a link, or the rollback of a failed link, so start the para now
2124
- _Wikitext_pop_excess_elements(parser);
2125
- _Wikitext_start_para_if_necessary(parser);
2041
+ wiki_pop_excess_elements(parser);
2042
+ wiki_start_para_if_necessary(parser);
2126
2043
  ary_push(parser->scope, LINK_START);
2127
2044
 
2128
2045
  // look ahead and try to gobble up link target
@@ -2144,34 +2061,34 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2144
2061
  type == RIGHT_CURLY)
2145
2062
  {
2146
2063
  // accumulate these tokens into link_target
2147
- if (NIL_P(parser->link_target))
2064
+ if (parser->link_target->len == 0)
2148
2065
  {
2149
- parser->link_target = rb_str_new2("");
2150
- parser->capture = parser->link_target;
2066
+ str_clear(parser->link_target);
2067
+ parser->capture = parser->link_target;
2151
2068
  }
2152
2069
  if (type == QUOT_ENTITY)
2153
2070
  // don't insert the entity, insert the literal quote
2154
- rb_str_cat(parser->link_target, quote, sizeof(quote) - 1);
2071
+ str_append(parser->link_target, quote, sizeof(quote) - 1);
2155
2072
  else if (type == AMP_ENTITY)
2156
2073
  // don't insert the entity, insert the literal ampersand
2157
- rb_str_cat(parser->link_target, ampersand, sizeof(ampersand) - 1);
2074
+ str_append(parser->link_target, ampersand, sizeof(ampersand) - 1);
2158
2075
  else
2159
- rb_str_cat(parser->link_target, token->start, TOKEN_LEN(token));
2076
+ str_append(parser->link_target, token->start, TOKEN_LEN(token));
2160
2077
  }
2161
2078
  else if (type == LINK_END)
2162
2079
  {
2163
- if (NIL_P(parser->link_target)) // bail for inputs like "[[]]"
2164
- _Wikitext_rollback_failed_link(parser);
2080
+ if (parser->link_target->len == 0) // bail for inputs like "[[]]"
2081
+ wiki_rollback_failed_internal_link(parser);
2165
2082
  break; // jump back to top of loop (will handle this in LINK_END case below)
2166
2083
  }
2167
2084
  else if (type == SEPARATOR)
2168
2085
  {
2169
- if (NIL_P(parser->link_target)) // bail for inputs like "[[|"
2170
- _Wikitext_rollback_failed_link(parser);
2086
+ if (parser->link_target->len == 0) // bail for inputs like "[[|"
2087
+ wiki_rollback_failed_internal_link(parser);
2171
2088
  else
2172
2089
  {
2173
2090
  ary_push(parser->scope, SEPARATOR);
2174
- parser->link_text = rb_str_new2("");
2091
+ str_clear(parser->link_text);
2175
2092
  parser->capture = parser->link_text;
2176
2093
  token = NULL;
2177
2094
  }
@@ -2179,7 +2096,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2179
2096
  }
2180
2097
  else // unexpected token (syntax error)
2181
2098
  {
2182
- _Wikitext_rollback_failed_link(parser);
2099
+ wiki_rollback_failed_internal_link(parser);
2183
2100
  break; // jump back to top of loop to handle unexpected token
2184
2101
  }
2185
2102
  }
@@ -2190,42 +2107,45 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2190
2107
  break;
2191
2108
 
2192
2109
  case LINK_END:
2193
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
2110
+ output = parser->capture ? parser->capture : parser->output;
2194
2111
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
2195
2112
  {
2196
- _Wikitext_emit_pending_crlf_if_necessary(parser);
2197
- rb_str_cat(i, link_end, sizeof(link_end) - 1);
2113
+ wiki_emit_pending_crlf_if_necessary(parser);
2114
+ str_append(output, link_end, sizeof(link_end) - 1);
2198
2115
  }
2199
2116
  else if (IN(EXT_LINK_START))
2200
2117
  // already in external link scope! (and in fact, must be capturing link_text right now)
2201
- rb_str_cat(i, link_end, sizeof(link_end) - 1);
2118
+ str_append(output, link_end, sizeof(link_end) - 1);
2202
2119
  else if (IN(LINK_START)) // in internal link scope!
2203
2120
  {
2204
- if (_Wikitext_blank(parser->link_target))
2121
+ if (wiki_blank(parser->link_target))
2205
2122
  {
2206
2123
  // special case for inputs like "[[ ]]"
2207
- _Wikitext_rollback_failed_link(parser);
2208
- rb_str_cat(parser->output, link_end, sizeof(link_end) - 1);
2124
+ wiki_rollback_failed_internal_link(parser);
2125
+ str_append(parser->output, link_end, sizeof(link_end) - 1);
2209
2126
  break;
2210
2127
  }
2211
- if (NIL_P(parser->link_text) || RSTRING_LEN(parser->link_text) == 0)
2128
+ if (parser->link_text->len == 0 ||
2129
+ wiki_blank(parser->link_text))
2130
+ {
2212
2131
  // use link target as link text
2213
- parser->link_text = _Wikitext_parser_sanitize_link_target(parser, Qfalse);
2132
+ str_clear(parser->link_text);
2133
+ wiki_append_sanitized_link_target(parser, parser->link_text, true);
2134
+ }
2214
2135
  else
2215
- parser->link_text = _Wikitext_parser_trim_link_target(parser->link_text);
2216
- _Wikitext_parser_encode_link_target(parser);
2217
- _Wikitext_pop_from_stack_up_to(parser, i, LINK_START, Qtrue);
2218
- parser->capture = Qnil;
2219
- i = _Wikitext_hyperlink(parser, prefix, parser->link_target, parser->link_text, Qnil);
2220
- rb_str_append(parser->output, i);
2221
- parser->link_target = Qnil;
2222
- parser->link_text = Qnil;
2136
+ wiki_trim_link_text(parser);
2137
+ wiki_encode_link_target(parser);
2138
+ wiki_pop_from_stack_up_to(parser, output, LINK_START, true);
2139
+ parser->capture = NULL;
2140
+ wiki_append_hyperlink(parser, prefix, parser->link_target, parser->link_text, Qnil, false);
2141
+ str_clear(parser->link_target);
2142
+ str_clear(parser->link_text);
2223
2143
  }
2224
2144
  else // wasn't in internal link scope
2225
2145
  {
2226
- _Wikitext_pop_excess_elements(parser);
2227
- _Wikitext_start_para_if_necessary(parser);
2228
- rb_str_cat(i, link_end, sizeof(link_end) - 1);
2146
+ wiki_pop_excess_elements(parser);
2147
+ wiki_start_para_if_necessary(parser);
2148
+ str_append(output, link_end, sizeof(link_end) - 1);
2229
2149
  }
2230
2150
  break;
2231
2151
 
@@ -2235,41 +2155,28 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2235
2155
  // strings in square brackets which don't match this syntax get passed through literally; eg:
2236
2156
  // he was very angery [sic] about the turn of events
2237
2157
  case EXT_LINK_START:
2238
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
2158
+ output = parser->capture ? parser->capture : parser->output;
2239
2159
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
2240
2160
  {
2241
- _Wikitext_emit_pending_crlf_if_necessary(parser);
2242
- rb_str_cat(i, ext_link_start, sizeof(ext_link_start) - 1);
2161
+ wiki_emit_pending_crlf_if_necessary(parser);
2162
+ str_append(output, ext_link_start, sizeof(ext_link_start) - 1);
2243
2163
  }
2244
2164
  else if (IN(EXT_LINK_START))
2245
2165
  // already in external link scope! (and in fact, must be capturing link_text right now)
2246
- rb_str_cat(i, ext_link_start, sizeof(ext_link_start) - 1);
2166
+ str_append(output, ext_link_start, sizeof(ext_link_start) - 1);
2247
2167
  else if (IN(LINK_START))
2248
2168
  {
2249
2169
  // already in internal link scope!
2250
- i = rb_str_new(ext_link_start, sizeof(ext_link_start) - 1);
2251
- if (NIL_P(parser->link_target))
2252
- // this must be the first character of our link target
2253
- parser->link_target = i;
2254
- else if (IN(SPACE))
2255
- {
2256
- // link target has already been scanned
2257
- if (NIL_P(parser->link_text))
2258
- // this must be the first character of our link text
2259
- parser->link_text = i;
2260
- else
2261
- // add to existing link text
2262
- rb_str_append(parser->link_text, i);
2263
- }
2264
- else
2265
- // add to existing link target
2266
- rb_str_append(parser->link_target, i);
2170
+ if (parser->link_target->len == 0 || !IN(SPACE))
2171
+ str_append(parser->link_target, ext_link_start, sizeof(ext_link_start) - 1);
2172
+ else // link target has already been scanned
2173
+ str_append(parser->link_text, ext_link_start, sizeof(ext_link_start) - 1);
2267
2174
  }
2268
2175
  else // not in external link scope yet
2269
2176
  {
2270
2177
  // will either emit a link, or the rollback of a failed link, so start the para now
2271
- _Wikitext_pop_excess_elements(parser);
2272
- _Wikitext_start_para_if_necessary(parser);
2178
+ wiki_pop_excess_elements(parser);
2179
+ wiki_start_para_if_necessary(parser);
2273
2180
 
2274
2181
  // look ahead: expect an absolute URI (with protocol) or "relative" (path) URI
2275
2182
  NEXT_TOKEN();
@@ -2277,56 +2184,55 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2277
2184
  ary_push(parser->scope, EXT_LINK_START); // so far so good, jump back to the top of the loop
2278
2185
  else
2279
2186
  // only get here if there was a syntax error (missing URI)
2280
- rb_str_cat(parser->output, ext_link_start, sizeof(ext_link_start) - 1);
2187
+ str_append(parser->output, ext_link_start, sizeof(ext_link_start) - 1);
2281
2188
  continue; // jump back to top of loop to handle token (either URI or whatever it is)
2282
2189
  }
2283
2190
  break;
2284
2191
 
2285
2192
  case EXT_LINK_END:
2286
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
2193
+ output = parser->capture ? parser->capture : parser->output;
2287
2194
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
2288
2195
  {
2289
- _Wikitext_emit_pending_crlf_if_necessary(parser);
2290
- rb_str_cat(i, ext_link_end, sizeof(ext_link_end) - 1);
2196
+ wiki_emit_pending_crlf_if_necessary(parser);
2197
+ str_append(output, ext_link_end, sizeof(ext_link_end) - 1);
2291
2198
  }
2292
2199
  else if (IN(EXT_LINK_START))
2293
2200
  {
2294
- if (NIL_P(parser->link_text))
2201
+ if (parser->link_text->len == 0)
2295
2202
  // syntax error: external link with no link text
2296
- _Wikitext_rollback_failed_external_link(parser);
2203
+ wiki_rollback_failed_external_link(parser);
2297
2204
  else
2298
2205
  {
2299
2206
  // success!
2300
2207
  j = IN(PATH) ? Qnil : parser->external_link_class;
2301
- _Wikitext_pop_from_stack_up_to(parser, i, EXT_LINK_START, Qtrue);
2302
- parser->capture = Qnil;
2303
- i = _Wikitext_hyperlink(parser, Qnil, parser->link_target, parser->link_text, j);
2304
- rb_str_append(parser->output, i);
2208
+ wiki_pop_from_stack_up_to(parser, output, EXT_LINK_START, true);
2209
+ parser->capture = NULL;
2210
+ wiki_append_hyperlink(parser, Qnil, parser->link_target, parser->link_text, j, false);
2305
2211
  }
2306
- parser->link_target = Qnil;
2307
- parser->link_text = Qnil;
2212
+ str_clear(parser->link_target);
2213
+ str_clear(parser->link_text);
2308
2214
  }
2309
2215
  else
2310
2216
  {
2311
- _Wikitext_pop_excess_elements(parser);
2312
- _Wikitext_start_para_if_necessary(parser);
2313
- rb_str_cat(parser->output, ext_link_end, sizeof(ext_link_end) - 1);
2217
+ wiki_pop_excess_elements(parser);
2218
+ wiki_start_para_if_necessary(parser);
2219
+ str_append(parser->output, ext_link_end, sizeof(ext_link_end) - 1);
2314
2220
  }
2315
2221
  break;
2316
2222
 
2317
2223
  case SEPARATOR:
2318
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
2319
- _Wikitext_pop_excess_elements(parser);
2320
- _Wikitext_start_para_if_necessary(parser);
2321
- rb_str_cat(i, separator, sizeof(separator) - 1);
2224
+ output = parser->capture ? parser->capture : parser->output;
2225
+ wiki_pop_excess_elements(parser);
2226
+ wiki_start_para_if_necessary(parser);
2227
+ str_append(output, separator, sizeof(separator) - 1);
2322
2228
  break;
2323
2229
 
2324
2230
  case SPACE:
2325
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
2231
+ output = parser->capture ? parser->capture : parser->output;
2326
2232
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
2327
2233
  {
2328
- _Wikitext_emit_pending_crlf_if_necessary(parser);
2329
- rb_str_cat(i, token->start, TOKEN_LEN(token));
2234
+ wiki_emit_pending_crlf_if_necessary(parser);
2235
+ str_append(output, token->start, TOKEN_LEN(token));
2330
2236
  }
2331
2237
  else
2332
2238
  {
@@ -2335,21 +2241,21 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2335
2241
  int token_len = TOKEN_LEN(token);
2336
2242
  NEXT_TOKEN();
2337
2243
  type = token->type;
2338
- if (((type == H6_END) && IN(H6_START)) ||
2339
- ((type == H5_END) && IN(H5_START)) ||
2340
- ((type == H4_END) && IN(H4_START)) ||
2341
- ((type == H3_END) && IN(H3_START)) ||
2342
- ((type == H2_END) && IN(H2_START)) ||
2343
- ((type == H1_END) && IN(H1_START)))
2244
+ if ((type == H6_END && IN(H6_START)) ||
2245
+ (type == H5_END && IN(H5_START)) ||
2246
+ (type == H4_END && IN(H4_START)) ||
2247
+ (type == H3_END && IN(H3_START)) ||
2248
+ (type == H2_END && IN(H2_START)) ||
2249
+ (type == H1_END && IN(H1_START)))
2344
2250
  {
2345
2251
  // will suppress emission of space (discard) if next token is a H6_END, H5_END etc and we are in the corresponding scope
2346
2252
  }
2347
2253
  else
2348
2254
  {
2349
2255
  // emit the space
2350
- _Wikitext_pop_excess_elements(parser);
2351
- _Wikitext_start_para_if_necessary(parser);
2352
- rb_str_cat(i, token_ptr, token_len);
2256
+ wiki_pop_excess_elements(parser);
2257
+ wiki_start_para_if_necessary(parser);
2258
+ str_append(output, token_ptr, token_len);
2353
2259
  }
2354
2260
 
2355
2261
  // jump to top of the loop to process token we scanned during lookahead
@@ -2362,101 +2268,100 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2362
2268
  case NAMED_ENTITY:
2363
2269
  case DECIMAL_ENTITY:
2364
2270
  // pass these through unaltered as they are case sensitive
2365
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
2366
- _Wikitext_pop_excess_elements(parser);
2367
- _Wikitext_start_para_if_necessary(parser);
2368
- rb_str_cat(i, token->start, TOKEN_LEN(token));
2271
+ output = parser->capture ? parser->capture : parser->output;
2272
+ wiki_pop_excess_elements(parser);
2273
+ wiki_start_para_if_necessary(parser);
2274
+ str_append(output, token->start, TOKEN_LEN(token));
2369
2275
  break;
2370
2276
 
2371
2277
  case HEX_ENTITY:
2372
2278
  // normalize hex entities (downcase them)
2373
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
2374
- _Wikitext_pop_excess_elements(parser);
2375
- _Wikitext_start_para_if_necessary(parser);
2376
- rb_str_append(i, _Wikitext_downcase(TOKEN_TEXT(token)));
2279
+ output = parser->capture ? parser->capture : parser->output;
2280
+ wiki_pop_excess_elements(parser);
2281
+ wiki_start_para_if_necessary(parser);
2282
+ str_append(output, token->start, TOKEN_LEN(token));
2283
+ wiki_downcase_bang(output->ptr + output->len - TOKEN_LEN(token), TOKEN_LEN(token));
2377
2284
  break;
2378
2285
 
2379
2286
  case QUOT:
2380
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
2381
- _Wikitext_pop_excess_elements(parser);
2382
- _Wikitext_start_para_if_necessary(parser);
2383
- rb_str_cat(i, quot_entity, sizeof(quot_entity) - 1);
2287
+ output = parser->capture ? parser->capture : parser->output;
2288
+ wiki_pop_excess_elements(parser);
2289
+ wiki_start_para_if_necessary(parser);
2290
+ str_append(output, quot_entity, sizeof(quot_entity) - 1);
2384
2291
  break;
2385
2292
 
2386
2293
  case AMP:
2387
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
2388
- _Wikitext_pop_excess_elements(parser);
2389
- _Wikitext_start_para_if_necessary(parser);
2390
- rb_str_cat(i, amp_entity, sizeof(amp_entity) - 1);
2294
+ output = parser->capture ? parser->capture : parser->output;
2295
+ wiki_pop_excess_elements(parser);
2296
+ wiki_start_para_if_necessary(parser);
2297
+ str_append(output, amp_entity, sizeof(amp_entity) - 1);
2391
2298
  break;
2392
2299
 
2393
2300
  case LESS:
2394
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
2395
- _Wikitext_pop_excess_elements(parser);
2396
- _Wikitext_start_para_if_necessary(parser);
2397
- rb_str_cat(i, lt_entity, sizeof(lt_entity) - 1);
2301
+ output = parser->capture ? parser->capture : parser->output;
2302
+ wiki_pop_excess_elements(parser);
2303
+ wiki_start_para_if_necessary(parser);
2304
+ str_append(output, lt_entity, sizeof(lt_entity) - 1);
2398
2305
  break;
2399
2306
 
2400
2307
  case GREATER:
2401
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
2402
- _Wikitext_pop_excess_elements(parser);
2403
- _Wikitext_start_para_if_necessary(parser);
2404
- rb_str_cat(i, gt_entity, sizeof(gt_entity) - 1);
2308
+ output = parser->capture ? parser->capture : parser->output;
2309
+ wiki_pop_excess_elements(parser);
2310
+ wiki_start_para_if_necessary(parser);
2311
+ str_append(output, gt_entity, sizeof(gt_entity) - 1);
2405
2312
  break;
2406
2313
 
2407
2314
  case IMG_START:
2408
2315
  if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
2409
2316
  {
2410
- _Wikitext_emit_pending_crlf_if_necessary(parser);
2411
- rb_str_cat(parser->output, token->start, TOKEN_LEN(token));
2317
+ wiki_emit_pending_crlf_if_necessary(parser);
2318
+ str_append(parser->output, token->start, TOKEN_LEN(token));
2412
2319
  }
2413
- else if (!NIL_P(parser->capture))
2414
- rb_str_cat(parser->capture, token->start, TOKEN_LEN(token));
2320
+ else if (parser->capture)
2321
+ str_append(parser->capture, token->start, TOKEN_LEN(token));
2415
2322
  else
2416
2323
  {
2417
2324
  // not currently capturing: will be emitting something on success or failure, so get ready
2418
- _Wikitext_pop_excess_elements(parser);
2419
- _Wikitext_start_para_if_necessary(parser);
2325
+ wiki_pop_excess_elements(parser);
2326
+ wiki_start_para_if_necessary(parser);
2420
2327
 
2421
2328
  // scan ahead consuming PATH, PRINTABLE, ALNUM and SPECIAL_URI_CHARS tokens
2422
2329
  // will cheat here and abuse the link_target capture buffer to accumulate text
2423
- if (NIL_P(parser->link_target))
2424
- parser->link_target = rb_str_new2("");
2425
2330
  while (NEXT_TOKEN(), (type = token->type))
2426
2331
  {
2427
2332
  if (type == PATH || type == PRINTABLE || type == ALNUM || type == SPECIAL_URI_CHARS)
2428
- rb_str_cat(parser->link_target, token->start, TOKEN_LEN(token));
2429
- else if (type == IMG_END && RSTRING_LEN(parser->link_target) > 0)
2333
+ str_append(parser->link_target, token->start, TOKEN_LEN(token));
2334
+ else if (type == IMG_END && parser->link_target->len > 0)
2430
2335
  {
2431
2336
  // success
2432
- _Wikitext_append_img(parser, RSTRING_PTR(parser->link_target), RSTRING_LEN(parser->link_target));
2337
+ wiki_append_img(parser, parser->link_target->ptr, parser->link_target->len);
2433
2338
  token = NULL;
2434
2339
  break;
2435
2340
  }
2436
2341
  else // unexpected token or zero-length target (syntax error)
2437
2342
  {
2438
2343
  // rollback
2439
- rb_str_cat(parser->output, literal_img_start, sizeof(literal_img_start) - 1);
2440
- rb_str_cat(parser->output, RSTRING_PTR(parser->link_target), RSTRING_LEN(parser->link_target));
2344
+ str_append(parser->output, literal_img_start, sizeof(literal_img_start) - 1);
2345
+ if (parser->link_target->len > 0)
2346
+ str_append(parser->output, parser->link_target->ptr, parser->link_target->len);
2441
2347
  break;
2442
2348
  }
2443
2349
  }
2444
2350
 
2445
2351
  // jump to top of the loop to process token we scanned during lookahead
2446
- parser->link_target = Qnil;
2352
+ str_clear(parser->link_target);
2447
2353
  continue;
2448
2354
  }
2449
2355
  break;
2450
2356
 
2451
2357
  case CRLF:
2452
2358
  i = parser->pending_crlf;
2453
- parser->pending_crlf = Qfalse;
2454
- _Wikitext_rollback_failed_link(parser); // if any
2455
- _Wikitext_rollback_failed_external_link(parser); // if any
2359
+ parser->pending_crlf = false;
2360
+ wiki_rollback_failed_link(parser); // if any
2456
2361
  if (IN(NO_WIKI_START) || IN(PRE_START))
2457
2362
  {
2458
2363
  ary_clear(parser->line_buffer);
2459
- rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
2364
+ str_append_str(parser->output, parser->line_ending);
2460
2365
  break;
2461
2366
  }
2462
2367
  else if (IN(PRE))
@@ -2464,14 +2369,14 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2464
2369
  // beware when BLOCKQUOTE on line buffer (not line stack!) prior to CRLF, that must be end of PRE block
2465
2370
  if (ary_entry(parser->line_buffer, -2) == BLOCKQUOTE)
2466
2371
  // don't emit in this case
2467
- _Wikitext_pop_from_stack_up_to(parser, parser->output, PRE, Qtrue);
2372
+ wiki_pop_from_stack_up_to(parser, parser->output, PRE, true);
2468
2373
  else
2469
2374
  {
2470
2375
  if (ary_entry(parser->line_buffer, -2) == PRE)
2471
2376
  {
2472
2377
  // only thing on line is the PRE: emit pending line ending (if we had one)
2473
- if (i == Qtrue)
2474
- rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
2378
+ if (i)
2379
+ str_append_str(parser->output, parser->line_ending);
2475
2380
  }
2476
2381
 
2477
2382
  // clear these _before_ calling NEXT_TOKEN (NEXT_TOKEN adds to the line_buffer)
@@ -2483,17 +2388,17 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2483
2388
  type = token->type;
2484
2389
  if (type != BLOCKQUOTE && type != PRE)
2485
2390
  // this is definitely the end of the block, so don't emit
2486
- _Wikitext_pop_from_stack_up_to(parser, parser->output, PRE, Qtrue);
2391
+ wiki_pop_from_stack_up_to(parser, parser->output, PRE, true);
2487
2392
  else
2488
2393
  // potentially will emit
2489
- parser->pending_crlf = Qtrue;
2394
+ parser->pending_crlf = true;
2490
2395
 
2491
2396
  continue; // jump back to top of loop to handle token grabbed via lookahead
2492
2397
  }
2493
2398
  }
2494
2399
  else
2495
2400
  {
2496
- parser->pending_crlf = Qtrue;
2401
+ parser->pending_crlf = true;
2497
2402
 
2498
2403
  // count number of BLOCKQUOTE tokens in line buffer (can be zero) and pop back to that level
2499
2404
  // as a side effect, this handles any open span-level elements and unclosed blocks
@@ -2503,7 +2408,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2503
2408
  {
2504
2409
  if (parser->scope->count > 0 && ary_entry(parser->scope, -1) == LI)
2505
2410
  {
2506
- parser->pending_crlf = Qfalse;
2411
+ parser->pending_crlf = false;
2507
2412
  break;
2508
2413
  }
2509
2414
 
@@ -2516,12 +2421,12 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2516
2421
  if (NO_ITEM(ary_entry(parser->line_buffer, -2)) ||
2517
2422
  (ary_entry(parser->line_buffer, -2) == BLOCKQUOTE && !IN(BLOCKQUOTE_START)))
2518
2423
  // paragraph break
2519
- parser->pending_crlf = Qfalse;
2424
+ parser->pending_crlf = false;
2520
2425
  else
2521
2426
  // not a paragraph break!
2522
2427
  continue;
2523
2428
  }
2524
- _Wikitext_pop_from_stack(parser, Qnil);
2429
+ wiki_pop_from_stack(parser, NULL);
2525
2430
  }
2526
2431
  }
2527
2432
 
@@ -2536,31 +2441,29 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2536
2441
  case IMG_END:
2537
2442
  case LEFT_CURLY:
2538
2443
  case RIGHT_CURLY:
2539
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
2540
- _Wikitext_pop_excess_elements(parser);
2541
- _Wikitext_start_para_if_necessary(parser);
2542
- rb_str_cat(i, token->start, TOKEN_LEN(token));
2444
+ output = parser->capture ? parser->capture : parser->output;
2445
+ wiki_pop_excess_elements(parser);
2446
+ wiki_start_para_if_necessary(parser);
2447
+ str_append(output, token->start, TOKEN_LEN(token));
2543
2448
  break;
2544
2449
 
2545
2450
  case DEFAULT:
2546
- i = NIL_P(parser->capture) ? parser->output : parser->capture;
2547
- _Wikitext_pop_excess_elements(parser);
2548
- _Wikitext_start_para_if_necessary(parser);
2549
- rb_str_append(i, _Wikitext_utf32_char_to_entity(token->code_point)); // convert to entity
2451
+ output = parser->capture ? parser->capture : parser->output;
2452
+ wiki_pop_excess_elements(parser);
2453
+ wiki_start_para_if_necessary(parser);
2454
+ wiki_append_entity_from_utf32_char(output, token->code_point);
2550
2455
  break;
2551
2456
 
2552
2457
  case END_OF_FILE:
2553
2458
  // special case for input like " foo\n " (see pre_spec.rb)
2554
2459
  if (IN(PRE) &&
2555
2460
  ary_entry(parser->line_buffer, -2) == PRE &&
2556
- parser->pending_crlf == Qtrue)
2557
- rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
2461
+ parser->pending_crlf)
2462
+ str_append(parser->output, parser->line_ending->ptr, parser->line_ending->len);
2558
2463
 
2559
2464
  // close any open scopes on hitting EOF
2560
- _Wikitext_rollback_failed_external_link(parser); // if any
2561
- _Wikitext_rollback_failed_link(parser); // if any
2562
- for (i = 0, j = parser->scope->count; i < j; i++)
2563
- _Wikitext_pop_from_stack(parser, Qnil);
2465
+ wiki_rollback_failed_link(parser); // if any
2466
+ wiki_pop_all_from_stack(parser);
2564
2467
  goto return_output; // break not enough here (want to break out of outer while loop, not inner switch statement)
2565
2468
 
2566
2469
  default:
@@ -2571,5 +2474,25 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
2571
2474
  token = NULL;
2572
2475
  } while (1);
2573
2476
  return_output:
2574
- return parser->output;
2477
+ // nasty hack to avoid re-allocating our return value
2478
+ str_append(parser->output, null_str, 1); // null-terminate
2479
+ len = parser->output->len - 1; // don't count null termination
2480
+
2481
+ #if defined(RUBY_1_9_x)
2482
+ VALUE out = rb_str_buf_new(RSTRING_EMBED_LEN_MAX + 1);
2483
+ free(RSTRING_PTR(out));
2484
+ RSTRING(out)->as.heap.aux.capa = len;
2485
+ RSTRING(out)->as.heap.ptr = parser->output->ptr;
2486
+ RSTRING(out)->as.heap.len = len;
2487
+ #elif defined(RUBY_1_8_x)
2488
+ VALUE out = rb_str_new2("");
2489
+ free(RSTRING_PTR(out));
2490
+ RSTRING(out)->len = len;
2491
+ RSTRING(out)->aux.capa = len;
2492
+ RSTRING(out)->ptr = parser->output->ptr;
2493
+ #else
2494
+ #error unsupported RUBY_VERSION
2495
+ #endif
2496
+ parser->output->ptr = NULL; // don't double-free
2497
+ return out;
2575
2498
  }