wikitext 1.6 → 1.7
Sign up to get free protection for your applications and to get access to all the features.
- data/ext/ary.h +0 -6
- data/ext/extconf.rb +9 -0
- data/ext/parser.c +762 -839
- data/ext/parser.h +0 -2
- data/ext/str.c +16 -33
- data/ext/str.h +4 -12
- data/ext/token.c +1 -1
- data/ext/token.h +2 -2
- data/ext/wikitext.c +0 -1
- data/lib/wikitext/version.rb +1 -1
- data/spec/external_link_spec.rb +17 -0
- data/spec/internal_link_spec.rb +7 -1
- data/spec/link_encoding_spec.rb +0 -47
- data/spec/link_sanitizing_spec.rb +1 -1
- data/spec/regressions_spec.rb +2 -2
- metadata +2 -2
data/ext/ary.h
CHANGED
@@ -35,10 +35,6 @@ typedef struct
|
|
35
35
|
|
36
36
|
#define NO_ITEM(item) (item == INT_MAX)
|
37
37
|
|
38
|
-
// Mark the ary struct designated by ptr as a participant in Ruby's mark-and-sweep garbage collection scheme.
|
39
|
-
// A variable named name is placed on the C stack to prevent the structure from being prematurely collected.
|
40
|
-
#define GC_WRAP_ARY(ptr, name) volatile VALUE name __attribute__((unused)) = Data_Wrap_Struct(rb_cObject, 0, ary_free, ptr)
|
41
|
-
|
42
38
|
ary_t *ary_new(void);
|
43
39
|
int ary_entry(ary_t *ary, int idx);
|
44
40
|
void ary_clear(ary_t *ary);
|
@@ -47,8 +43,6 @@ void ary_push(ary_t *ary, int val);
|
|
47
43
|
int ary_includes(ary_t *ary, int val);
|
48
44
|
|
49
45
|
// returns a count indicating the number of times the value appears in the collection
|
50
|
-
// refactored from _Wikitext_count()
|
51
46
|
int ary_count(ary_t *ary, int item);
|
52
47
|
|
53
|
-
// this method not inlined so its address can be passed to the Data_Wrap_Struct function.
|
54
48
|
void ary_free(ary_t *ary);
|
data/ext/extconf.rb
CHANGED
@@ -28,5 +28,14 @@ def missing item
|
|
28
28
|
exit 1
|
29
29
|
end
|
30
30
|
|
31
|
+
case RUBY_VERSION
|
32
|
+
when /\A1\.8/
|
33
|
+
$CFLAGS += ' -DRUBY_1_8_x'
|
34
|
+
when /\A1\.9/
|
35
|
+
$CFLAGS += ' -DRUBY_1_9_x'
|
36
|
+
else
|
37
|
+
raise "unsupported Ruby version: #{RUBY_VERSION}"
|
38
|
+
end
|
39
|
+
|
31
40
|
have_header('ruby.h') or missing 'ruby.h'
|
32
41
|
create_makefile('wikitext')
|
data/ext/parser.c
CHANGED
@@ -21,6 +21,8 @@
|
|
21
21
|
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
22
22
|
// POSSIBILITY OF SUCH DAMAGE.
|
23
23
|
|
24
|
+
#include <stdbool.h>
|
25
|
+
|
24
26
|
#include "parser.h"
|
25
27
|
#include "ary.h"
|
26
28
|
#include "str.h"
|
@@ -32,29 +34,29 @@
|
|
32
34
|
// poor man's object orientation in C:
|
33
35
|
// instead of parsing around multiple parameters between functions in the parser
|
34
36
|
// we pack everything into a struct and pass around only a pointer to that
|
35
|
-
// TODO: consider changing some of the VALUE members (eg link_target) to the more efficient str_t type
|
36
37
|
typedef struct
|
37
38
|
{
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
VALUE img_prefix; // path prepended when emitting img tags
|
39
|
+
str_t *capture; // capturing to link_target, link_text, or NULL (direct to output, not capturing)
|
40
|
+
str_t *output; // for accumulating output to be returned
|
41
|
+
str_t *link_target; // short term "memory" for parsing links
|
42
|
+
str_t *link_text; // short term "memory" for parsing links
|
43
|
+
str_t *line_ending;
|
44
|
+
str_t *tabulation; // caching buffer for emitting indentation
|
45
45
|
ary_t *scope; // stack for tracking scope
|
46
46
|
ary_t *line; // stack for tracking scope as implied by current line
|
47
47
|
ary_t *line_buffer; // stack for tracking raw tokens (not scope) on current line
|
48
|
-
VALUE
|
49
|
-
VALUE
|
50
|
-
VALUE
|
51
|
-
str_t *line_ending;
|
48
|
+
VALUE external_link_class; // CSS class applied to external links
|
49
|
+
VALUE mailto_class; // CSS class applied to email (mailto) links
|
50
|
+
VALUE img_prefix; // path prepended when emitting img tags
|
52
51
|
int base_indent; // controlled by the :indent option to Wikitext::Parser#parse
|
53
52
|
int current_indent; // fluctuates according to currently nested structures
|
54
|
-
str_t *tabulation; // caching buffer for emitting indentation
|
55
53
|
int base_heading_level;
|
54
|
+
bool pending_crlf;
|
55
|
+
bool autolink;
|
56
|
+
bool space_to_underscore;
|
56
57
|
} parser_t;
|
57
58
|
|
59
|
+
const char null_str[] = { 0 };
|
58
60
|
const char escaped_no_wiki_start[] = "<nowiki>";
|
59
61
|
const char escaped_no_wiki_end[] = "</nowiki>";
|
60
62
|
const char literal_strong_em[] = "'''''";
|
@@ -66,12 +68,6 @@ const char escaped_strong_start[] = "<strong>";
|
|
66
68
|
const char escaped_strong_end[] = "</strong>";
|
67
69
|
const char escaped_tt_start[] = "<tt>";
|
68
70
|
const char escaped_tt_end[] = "</tt>";
|
69
|
-
const char literal_h6[] = "======";
|
70
|
-
const char literal_h5[] = "=====";
|
71
|
-
const char literal_h4[] = "====";
|
72
|
-
const char literal_h3[] = "===";
|
73
|
-
const char literal_h2[] = "==";
|
74
|
-
const char literal_h1[] = "=";
|
75
71
|
const char pre_start[] = "<pre>";
|
76
72
|
const char pre_end[] = "</pre>";
|
77
73
|
const char escaped_pre_start[] = "<pre>";
|
@@ -130,6 +126,49 @@ const char img_start[] = "<img src=\"";
|
|
130
126
|
const char img_end[] = "\" />";
|
131
127
|
const char img_alt[] = "\" alt=\"";
|
132
128
|
|
129
|
+
// Mark the parser struct designated by ptr as a participant in Ruby's
|
130
|
+
// mark-and-sweep garbage collection scheme. A variable named name is placed on
|
131
|
+
// the C stack to prevent the structure from being prematurely collected.
|
132
|
+
#define GC_WRAP_PARSER(ptr, name) volatile VALUE name __attribute__((unused)) = Data_Wrap_Struct(rb_cObject, 0, parser_free, ptr)
|
133
|
+
|
134
|
+
parser_t *parser_new(void)
|
135
|
+
{
|
136
|
+
parser_t *parser = ALLOC_N(parser_t, 1);
|
137
|
+
parser->capture = NULL; // not a real instance, pointer to other member's instance
|
138
|
+
parser->output = str_new();
|
139
|
+
parser->link_target = str_new();
|
140
|
+
parser->link_text = str_new();
|
141
|
+
parser->line_ending = NULL; // caller should set up
|
142
|
+
parser->tabulation = str_new();
|
143
|
+
parser->scope = ary_new();
|
144
|
+
parser->line = ary_new();
|
145
|
+
parser->line_buffer = ary_new();
|
146
|
+
parser->external_link_class = Qnil; // caller should set up
|
147
|
+
parser->mailto_class = Qnil; // caller should set up
|
148
|
+
parser->img_prefix = Qnil; // caller should set up
|
149
|
+
parser->base_indent = 0;
|
150
|
+
parser->current_indent = 0;
|
151
|
+
parser->base_heading_level = 0;
|
152
|
+
parser->pending_crlf = false;
|
153
|
+
parser->autolink = true;
|
154
|
+
parser->space_to_underscore = true;
|
155
|
+
return parser;
|
156
|
+
}
|
157
|
+
|
158
|
+
void parser_free(parser_t *parser)
|
159
|
+
{
|
160
|
+
// we don't free parser->capture; it's just a redundant pointer
|
161
|
+
if (parser->output) str_free(parser->output);
|
162
|
+
if (parser->link_target) str_free(parser->link_target);
|
163
|
+
if (parser->link_text) str_free(parser->link_text);
|
164
|
+
if (parser->line_ending) str_free(parser->line_ending);
|
165
|
+
if (parser->tabulation) str_free(parser->tabulation);
|
166
|
+
if (parser->scope) ary_free(parser->scope);
|
167
|
+
if (parser->line) ary_free(parser->line);
|
168
|
+
if (parser->line_buffer) ary_free(parser->line_buffer);
|
169
|
+
free(parser);
|
170
|
+
}
|
171
|
+
|
133
172
|
// for testing and debugging only
|
134
173
|
VALUE Wikitext_parser_tokenize(VALUE self, VALUE string)
|
135
174
|
{
|
@@ -142,11 +181,11 @@ VALUE Wikitext_parser_tokenize(VALUE self, VALUE string)
|
|
142
181
|
char *pe = p + len;
|
143
182
|
token_t token;
|
144
183
|
next_token(&token, NULL, p, pe);
|
145
|
-
rb_ary_push(tokens,
|
184
|
+
rb_ary_push(tokens, wiki_token(&token));
|
146
185
|
while (token.type != END_OF_FILE)
|
147
186
|
{
|
148
187
|
next_token(&token, &token, NULL, pe);
|
149
|
-
rb_ary_push(tokens,
|
188
|
+
rb_ary_push(tokens, wiki_token(&token));
|
150
189
|
}
|
151
190
|
return tokens;
|
152
191
|
}
|
@@ -217,59 +256,66 @@ VALUE Wikitext_parser_fulltext_tokenize(int argc, VALUE *argv, VALUE self)
|
|
217
256
|
return tokens;
|
218
257
|
}
|
219
258
|
|
220
|
-
// we downcase "in place", overwriting the original contents of the buffer
|
221
|
-
|
259
|
+
// we downcase "in place", overwriting the original contents of the buffer
|
260
|
+
void wiki_downcase_bang(char *ptr, long len)
|
222
261
|
{
|
223
|
-
char *ptr = RSTRING_PTR(string);
|
224
|
-
long len = RSTRING_LEN(string);
|
225
262
|
for (long i = 0; i < len; i++)
|
226
263
|
{
|
227
264
|
if (ptr[i] >= 'A' && ptr[i] <= 'Z')
|
228
265
|
ptr[i] += 32;
|
229
266
|
}
|
230
|
-
return string;
|
231
267
|
}
|
232
268
|
|
233
|
-
|
269
|
+
// prepare hyperlink and append it to parser->output
|
270
|
+
// if check_autolink is true, checks parser->autolink to decide whether to emit a real hyperlink
|
271
|
+
// or merely the literal link target
|
272
|
+
// if link_text is Qnil, the link_target is re-used for the link text
|
273
|
+
void wiki_append_hyperlink(parser_t *parser, VALUE link_prefix, str_t *link_target, str_t *link_text, VALUE link_class, bool check_autolink)
|
234
274
|
{
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
rb_str_append(string, link_target);
|
239
|
-
|
240
|
-
// special handling for mailto URIs
|
241
|
-
const char *mailto = "mailto:";
|
242
|
-
if (NIL_P(link_prefix) &&
|
243
|
-
RSTRING_LEN(link_target) >= (long)sizeof(mailto) &&
|
244
|
-
strncmp(mailto, RSTRING_PTR(link_target), sizeof(mailto)) == 0)
|
245
|
-
link_class = parser->mailto_class; // use mailto_class from parser
|
246
|
-
|
247
|
-
if (link_class != Qnil)
|
275
|
+
if (check_autolink && !parser->autolink)
|
276
|
+
str_append_str(parser->output, link_target);
|
277
|
+
else
|
248
278
|
{
|
249
|
-
|
250
|
-
|
279
|
+
str_append(parser->output, a_start, sizeof(a_start) - 1); // <a href="
|
280
|
+
if (!NIL_P(link_prefix))
|
281
|
+
str_append_string(parser->output, link_prefix);
|
282
|
+
str_append_str(parser->output, link_target);
|
283
|
+
|
284
|
+
// special handling for mailto URIs
|
285
|
+
const char *mailto = "mailto:";
|
286
|
+
if (NIL_P(link_prefix) &&
|
287
|
+
link_target->len >= (long)sizeof(mailto) &&
|
288
|
+
strncmp(mailto, link_target->ptr, sizeof(mailto)) == 0)
|
289
|
+
link_class = parser->mailto_class; // use mailto_class from parser
|
290
|
+
if (link_class != Qnil)
|
291
|
+
{
|
292
|
+
str_append(parser->output, a_class, sizeof(a_class) - 1); // " class="
|
293
|
+
str_append_string(parser->output, link_class);
|
294
|
+
}
|
295
|
+
str_append(parser->output, a_start_close, sizeof(a_start_close) - 1); // ">
|
296
|
+
if (!link_text || link_text->len == 0) // re-use link_target
|
297
|
+
str_append_str(parser->output, link_target);
|
298
|
+
else
|
299
|
+
str_append_str(parser->output, link_text);
|
300
|
+
str_append(parser->output, a_end, sizeof(a_end) - 1); // </a>
|
251
301
|
}
|
252
|
-
rb_str_cat(string, a_start_close, sizeof(a_start_close) - 1); // ">
|
253
|
-
rb_str_append(string, link_text);
|
254
|
-
rb_str_cat(string, a_end, sizeof(a_end) - 1);
|
255
|
-
return string;
|
256
302
|
}
|
257
303
|
|
258
|
-
void
|
304
|
+
void wiki_append_img(parser_t *parser, char *token_ptr, int token_len)
|
259
305
|
{
|
260
|
-
|
306
|
+
str_append(parser->output, img_start, sizeof(img_start) - 1); // <img src="
|
261
307
|
if (!NIL_P(parser->img_prefix) && *token_ptr != '/') // len always > 0
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
308
|
+
str_append_string(parser->output, parser->img_prefix);
|
309
|
+
str_append(parser->output, token_ptr, token_len);
|
310
|
+
str_append(parser->output, img_alt, sizeof(img_alt) - 1); // " alt="
|
311
|
+
str_append(parser->output, token_ptr, token_len);
|
312
|
+
str_append(parser->output, img_end, sizeof(img_end) - 1); // " />
|
267
313
|
}
|
268
314
|
|
269
315
|
// will emit indentation only if we are about to emit any of:
|
270
316
|
// <blockquote>, <p>, <ul>, <ol>, <li>, <h1> etc, <pre>
|
271
317
|
// each time we enter one of those spans must ++ the indentation level
|
272
|
-
void
|
318
|
+
void wiki_indent(parser_t *parser)
|
273
319
|
{
|
274
320
|
if (parser->base_indent == -1) // indentation disabled
|
275
321
|
return;
|
@@ -285,32 +331,32 @@ void _Wikitext_indent(parser_t *parser)
|
|
285
331
|
*old_end++ = ' ';
|
286
332
|
if (space_count > parser->tabulation->len)
|
287
333
|
parser->tabulation->len = space_count;
|
288
|
-
|
334
|
+
str_append(parser->output, parser->tabulation->ptr, space_count);
|
289
335
|
}
|
290
336
|
parser->current_indent += 2;
|
291
337
|
}
|
292
338
|
|
293
|
-
void
|
339
|
+
void wiki_dedent(parser_t *parser, bool emit)
|
294
340
|
{
|
295
341
|
if (parser->base_indent == -1) // indentation disabled
|
296
342
|
return;
|
297
343
|
parser->current_indent -= 2;
|
298
|
-
if (emit
|
344
|
+
if (!emit)
|
299
345
|
return;
|
300
346
|
int space_count = parser->current_indent + parser->base_indent;
|
301
347
|
if (space_count > 0)
|
302
|
-
|
348
|
+
str_append(parser->output, parser->tabulation->ptr, space_count);
|
303
349
|
}
|
304
350
|
|
305
351
|
// Pops a single item off the parser's scope stack.
|
306
352
|
// A corresponding closing tag is written to the target string.
|
307
353
|
// The target string may be the main output buffer, or a substring capturing buffer if a link is being scanned.
|
308
|
-
void
|
354
|
+
void wiki_pop_from_stack(parser_t *parser, str_t *target)
|
309
355
|
{
|
310
356
|
int top = ary_entry(parser->scope, -1);
|
311
357
|
if (NO_ITEM(top))
|
312
358
|
return;
|
313
|
-
if (
|
359
|
+
if (!target)
|
314
360
|
target = parser->output;
|
315
361
|
|
316
362
|
// for headings, take base_heading_level into account
|
@@ -326,16 +372,16 @@ void _Wikitext_pop_from_stack(parser_t *parser, VALUE target)
|
|
326
372
|
{
|
327
373
|
case PRE:
|
328
374
|
case PRE_START:
|
329
|
-
|
330
|
-
|
331
|
-
|
375
|
+
str_append(target, pre_end, sizeof(pre_end) - 1);
|
376
|
+
str_append_str(target, parser->line_ending);
|
377
|
+
wiki_dedent(parser, false);
|
332
378
|
break;
|
333
379
|
|
334
380
|
case BLOCKQUOTE:
|
335
381
|
case BLOCKQUOTE_START:
|
336
|
-
|
337
|
-
|
338
|
-
|
382
|
+
wiki_dedent(parser, true);
|
383
|
+
str_append(target, blockquote_end, sizeof(blockquote_end) - 1);
|
384
|
+
str_append_str(target, parser->line_ending);
|
339
385
|
break;
|
340
386
|
|
341
387
|
case NO_WIKI_START:
|
@@ -344,29 +390,29 @@ void _Wikitext_pop_from_stack(parser_t *parser, VALUE target)
|
|
344
390
|
|
345
391
|
case STRONG:
|
346
392
|
case STRONG_START:
|
347
|
-
|
393
|
+
str_append(target, strong_end, sizeof(strong_end) - 1);
|
348
394
|
break;
|
349
395
|
|
350
396
|
case EM:
|
351
397
|
case EM_START:
|
352
|
-
|
398
|
+
str_append(target, em_end, sizeof(em_end) - 1);
|
353
399
|
break;
|
354
400
|
|
355
401
|
case TT:
|
356
402
|
case TT_START:
|
357
|
-
|
403
|
+
str_append(target, tt_end, sizeof(tt_end) - 1);
|
358
404
|
break;
|
359
405
|
|
360
406
|
case OL:
|
361
|
-
|
362
|
-
|
363
|
-
|
407
|
+
wiki_dedent(parser, true);
|
408
|
+
str_append(target, ol_end, sizeof(ol_end) - 1);
|
409
|
+
str_append_str(target, parser->line_ending);
|
364
410
|
break;
|
365
411
|
|
366
412
|
case UL:
|
367
|
-
|
368
|
-
|
369
|
-
|
413
|
+
wiki_dedent(parser, true);
|
414
|
+
str_append(target, ul_end, sizeof(ul_end) - 1);
|
415
|
+
str_append_str(target, parser->line_ending);
|
370
416
|
break;
|
371
417
|
|
372
418
|
case NESTED_LIST:
|
@@ -375,50 +421,50 @@ void _Wikitext_pop_from_stack(parser_t *parser, VALUE target)
|
|
375
421
|
// and other times we want it to behave like BLOCKQUOTE (ie. when it has a nested list inside)
|
376
422
|
// hence this hack: we do an emitting dedent on behalf of the LI that we know must be coming
|
377
423
|
// and then when we pop the actual LI itself (below) we do the standard non-emitting indent
|
378
|
-
|
379
|
-
parser->current_indent += 2;
|
424
|
+
wiki_dedent(parser, true); // we really only want to emit the spaces
|
425
|
+
parser->current_indent += 2; // we don't want to decrement the actual indent level, so put it back
|
380
426
|
break;
|
381
427
|
|
382
428
|
case LI:
|
383
|
-
|
384
|
-
|
385
|
-
|
429
|
+
str_append(target, li_end, sizeof(li_end) - 1);
|
430
|
+
str_append_str(target, parser->line_ending);
|
431
|
+
wiki_dedent(parser, false);
|
386
432
|
break;
|
387
433
|
|
388
434
|
case H6_START:
|
389
|
-
|
390
|
-
|
391
|
-
|
435
|
+
str_append(target, h6_end, sizeof(h6_end) - 1);
|
436
|
+
str_append_str(target, parser->line_ending);
|
437
|
+
wiki_dedent(parser, false);
|
392
438
|
break;
|
393
439
|
|
394
440
|
case H5_START:
|
395
|
-
|
396
|
-
|
397
|
-
|
441
|
+
str_append(target, h5_end, sizeof(h5_end) - 1);
|
442
|
+
str_append_str(target, parser->line_ending);
|
443
|
+
wiki_dedent(parser, false);
|
398
444
|
break;
|
399
445
|
|
400
446
|
case H4_START:
|
401
|
-
|
402
|
-
|
403
|
-
|
447
|
+
str_append(target, h4_end, sizeof(h4_end) - 1);
|
448
|
+
str_append_str(target, parser->line_ending);
|
449
|
+
wiki_dedent(parser, false);
|
404
450
|
break;
|
405
451
|
|
406
452
|
case H3_START:
|
407
|
-
|
408
|
-
|
409
|
-
|
453
|
+
str_append(target, h3_end, sizeof(h3_end) - 1);
|
454
|
+
str_append_str(target, parser->line_ending);
|
455
|
+
wiki_dedent(parser, false);
|
410
456
|
break;
|
411
457
|
|
412
458
|
case H2_START:
|
413
|
-
|
414
|
-
|
415
|
-
|
459
|
+
str_append(target, h2_end, sizeof(h2_end) - 1);
|
460
|
+
str_append_str(target, parser->line_ending);
|
461
|
+
wiki_dedent(parser, false);
|
416
462
|
break;
|
417
463
|
|
418
464
|
case H1_START:
|
419
|
-
|
420
|
-
|
421
|
-
|
465
|
+
str_append(target, h1_end, sizeof(h1_end) - 1);
|
466
|
+
str_append_str(target, parser->line_ending);
|
467
|
+
wiki_dedent(parser, false);
|
422
468
|
break;
|
423
469
|
|
424
470
|
case LINK_START:
|
@@ -442,9 +488,9 @@ void _Wikitext_pop_from_stack(parser_t *parser, VALUE target)
|
|
442
488
|
break;
|
443
489
|
|
444
490
|
case P:
|
445
|
-
|
446
|
-
|
447
|
-
|
491
|
+
str_append(target, p_end, sizeof(p_end) - 1);
|
492
|
+
str_append_str(target, parser->line_ending);
|
493
|
+
wiki_dedent(parser, false);
|
448
494
|
break;
|
449
495
|
|
450
496
|
case END_OF_FILE:
|
@@ -459,9 +505,9 @@ void _Wikitext_pop_from_stack(parser_t *parser, VALUE target)
|
|
459
505
|
}
|
460
506
|
|
461
507
|
// Pops items off the top of parser's scope stack, accumulating closing tags for them into the target string, until item is reached.
|
462
|
-
// If including is
|
508
|
+
// If including is true then the item itself is also popped.
|
463
509
|
// The target string may be the main output buffer, or a substring capturing buffer when scanning links.
|
464
|
-
void
|
510
|
+
void wiki_pop_from_stack_up_to(parser_t *parser, str_t *target, int item, bool including)
|
465
511
|
{
|
466
512
|
int continue_looping = 1;
|
467
513
|
do
|
@@ -471,23 +517,23 @@ void _Wikitext_pop_from_stack_up_to(parser_t *parser, VALUE target, int item, VA
|
|
471
517
|
return;
|
472
518
|
if (top == item)
|
473
519
|
{
|
474
|
-
if (including
|
520
|
+
if (!including)
|
475
521
|
return;
|
476
522
|
continue_looping = 0;
|
477
523
|
}
|
478
|
-
|
524
|
+
wiki_pop_from_stack(parser, target);
|
479
525
|
} while (continue_looping);
|
480
526
|
}
|
481
527
|
|
482
|
-
void
|
528
|
+
void wiki_pop_all_from_stack(parser_t *parser)
|
483
529
|
{
|
484
|
-
|
485
|
-
|
530
|
+
for (int i = 0, max = parser->scope->count; i < max; i++)
|
531
|
+
wiki_pop_from_stack(parser, NULL);
|
486
532
|
}
|
487
533
|
|
488
|
-
void
|
534
|
+
void wiki_start_para_if_necessary(parser_t *parser)
|
489
535
|
{
|
490
|
-
if (
|
536
|
+
if (parser->capture)
|
491
537
|
return;
|
492
538
|
|
493
539
|
// if no block open yet, or top of stack is BLOCKQUOTE/BLOCKQUOTE_START (with nothing in it yet)
|
@@ -495,29 +541,29 @@ void _Wikitext_start_para_if_necessary(parser_t *parser)
|
|
495
541
|
ary_entry(parser->scope, -1) == BLOCKQUOTE ||
|
496
542
|
ary_entry(parser->scope, -1) == BLOCKQUOTE_START)
|
497
543
|
{
|
498
|
-
|
499
|
-
|
544
|
+
wiki_indent(parser);
|
545
|
+
str_append(parser->output, p_start, sizeof(p_start) - 1);
|
500
546
|
ary_push(parser->scope, P);
|
501
547
|
ary_push(parser->line, P);
|
502
548
|
}
|
503
|
-
else if (parser->pending_crlf
|
549
|
+
else if (parser->pending_crlf)
|
504
550
|
{
|
505
551
|
if (IN(P))
|
506
552
|
// already in a paragraph block; convert pending CRLF into a space
|
507
|
-
|
553
|
+
str_append(parser->output, space, sizeof(space) - 1);
|
508
554
|
else if (IN(PRE))
|
509
555
|
// PRE blocks can have pending CRLF too (helps us avoid emitting the trailing newline)
|
510
|
-
|
556
|
+
str_append_str(parser->output, parser->line_ending);
|
511
557
|
}
|
512
|
-
parser->pending_crlf =
|
558
|
+
parser->pending_crlf = false;
|
513
559
|
}
|
514
560
|
|
515
|
-
void
|
561
|
+
void wiki_emit_pending_crlf_if_necessary(parser_t *parser)
|
516
562
|
{
|
517
|
-
if (parser->pending_crlf
|
563
|
+
if (parser->pending_crlf)
|
518
564
|
{
|
519
|
-
|
520
|
-
parser->pending_crlf =
|
565
|
+
str_append_str(parser->output, parser->line_ending);
|
566
|
+
parser->pending_crlf = false;
|
521
567
|
}
|
522
568
|
}
|
523
569
|
|
@@ -543,9 +589,9 @@ void _Wikitext_emit_pending_crlf_if_necessary(parser_t *parser)
|
|
543
589
|
// on the line scope.
|
544
590
|
// Luckily, BLOCKQUOTE_START tokens can only appear at the start of the scope array, so we can check for them first before
|
545
591
|
// entering the for loop.
|
546
|
-
void
|
592
|
+
void wiki_pop_excess_elements(parser_t *parser)
|
547
593
|
{
|
548
|
-
if (
|
594
|
+
if (parser->capture)
|
549
595
|
return;
|
550
596
|
for (int i = parser->scope->count - ary_count(parser->scope, BLOCKQUOTE_START), j = parser->line->count; i > j; i--)
|
551
597
|
{
|
@@ -560,65 +606,94 @@ void _Wikitext_pop_excess_elements(parser_t *parser)
|
|
560
606
|
continue;
|
561
607
|
}
|
562
608
|
}
|
563
|
-
|
609
|
+
wiki_pop_from_stack(parser, NULL);
|
564
610
|
}
|
565
611
|
}
|
566
612
|
|
567
|
-
|
568
|
-
|
569
|
-
//
|
570
|
-
//
|
571
|
-
//
|
572
|
-
//
|
573
|
-
//
|
574
|
-
//
|
575
|
-
|
613
|
+
// Convert a single UTF-8 codepoint to UTF-32
|
614
|
+
//
|
615
|
+
// Expects an input buffer, src, containing a UTF-8 encoded character (which
|
616
|
+
// may be multi-byte). The end of the input buffer, end, is also passed in to
|
617
|
+
// allow the detection of invalidly truncated codepoints. The number of bytes
|
618
|
+
// in the UTF-8 character (between 1 and 4) is returned by reference in
|
619
|
+
// width_out.
|
620
|
+
//
|
621
|
+
// Raises a RangeError if the supplied character is invalid UTF-8.
|
622
|
+
uint32_t wiki_utf8_to_utf32(char *src, char *end, long *width_out)
|
576
623
|
{
|
577
624
|
uint32_t dest;
|
578
|
-
if ((unsigned char)src[0] <= 0x7f)
|
625
|
+
if ((unsigned char)src[0] <= 0x7f)
|
579
626
|
{
|
627
|
+
// ASCII
|
580
628
|
dest = src[0];
|
581
629
|
*width_out = 1;
|
582
630
|
}
|
583
|
-
else if ((src[0] & 0xe0) == 0xc0)
|
631
|
+
else if ((src[0] & 0xe0) == 0xc0)
|
584
632
|
{
|
633
|
+
// byte starts with 110..... : this should be a two-byte sequence
|
585
634
|
if (src + 1 >= end)
|
586
|
-
|
587
|
-
|
588
|
-
|
635
|
+
// no second byte
|
636
|
+
rb_raise(eWikitextParserError, "invalid encoding: truncated byte sequence");
|
637
|
+
else if (((unsigned char)src[0] == 0xc0) ||
|
638
|
+
((unsigned char)src[0] == 0xc1))
|
639
|
+
// overlong encoding: lead byte of 110..... but code point <= 127
|
640
|
+
rb_raise(eWikitextParserError, "invalid encoding: overlong encoding");
|
589
641
|
else if ((src[1] & 0xc0) != 0x80 )
|
590
|
-
|
591
|
-
|
642
|
+
// should have second byte starting with 10......
|
643
|
+
rb_raise(eWikitextParserError, "invalid encoding: malformed byte sequence");
|
644
|
+
|
645
|
+
dest =
|
646
|
+
((uint32_t)(src[0] & 0x1f)) << 6 |
|
647
|
+
(src[1] & 0x3f);
|
592
648
|
*width_out = 2;
|
593
649
|
}
|
594
|
-
else if ((src[0] & 0xf0) == 0xe0)
|
650
|
+
else if ((src[0] & 0xf0) == 0xe0)
|
595
651
|
{
|
652
|
+
// byte starts with 1110.... : this should be a three-byte sequence
|
596
653
|
if (src + 2 >= end)
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
654
|
+
// missing second or third byte
|
655
|
+
rb_raise(eWikitextParserError, "invalid encoding: truncated byte sequence");
|
656
|
+
else if (((src[1] & 0xc0) != 0x80 ) ||
|
657
|
+
((src[2] & 0xc0) != 0x80 ))
|
658
|
+
// should have second and third bytes starting with 10......
|
659
|
+
rb_raise(eWikitextParserError, "invalid encoding: malformed byte sequence");
|
660
|
+
|
661
|
+
dest =
|
662
|
+
((uint32_t)(src[0] & 0x0f)) << 12 |
|
663
|
+
((uint32_t)(src[1] & 0x3f)) << 6 |
|
664
|
+
(src[2] & 0x3f);
|
601
665
|
*width_out = 3;
|
602
666
|
}
|
603
|
-
else if ((src[0] & 0xf8) == 0xf0)
|
667
|
+
else if ((src[0] & 0xf8) == 0xf0)
|
604
668
|
{
|
669
|
+
// bytes starts with 11110... : this should be a four-byte sequence
|
605
670
|
if (src + 3 >= end)
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
671
|
+
// missing second, third, or fourth byte
|
672
|
+
rb_raise(eWikitextParserError, "invalid encoding: truncated byte sequence");
|
673
|
+
else if ((unsigned char)src[0] >= 0xf5 &&
|
674
|
+
(unsigned char)src[0] <= 0xf7)
|
675
|
+
// disallowed by RFC 3629 (codepoints above 0x10ffff)
|
676
|
+
rb_raise(eWikitextParserError, "invalid encoding: overlong encoding");
|
677
|
+
else if (((src[1] & 0xc0) != 0x80 ) ||
|
678
|
+
((src[2] & 0xc0) != 0x80 ) ||
|
679
|
+
((src[3] & 0xc0) != 0x80 ))
|
680
|
+
// should have second and third bytes starting with 10......
|
681
|
+
rb_raise(eWikitextParserError, "invalid encoding: malformed byte sequence");
|
682
|
+
|
683
|
+
dest =
|
684
|
+
((uint32_t)(src[0] & 0x07)) << 18 |
|
685
|
+
((uint32_t)(src[1] & 0x3f)) << 12 |
|
686
|
+
((uint32_t)(src[1] & 0x3f)) << 6 |
|
687
|
+
(src[2] & 0x3f);
|
612
688
|
*width_out = 4;
|
613
689
|
}
|
614
|
-
else
|
615
|
-
|
690
|
+
else
|
691
|
+
rb_raise(eWikitextParserError, "invalid encoding: unexpected byte");
|
616
692
|
return dest;
|
617
693
|
}
|
618
694
|
|
619
|
-
|
695
|
+
void wiki_append_entity_from_utf32_char(str_t *output, uint32_t character)
|
620
696
|
{
|
621
|
-
// TODO: consider special casing some entities (ie. quot, amp, lt, gt etc)?
|
622
697
|
char hex_string[8] = { '&', '#', 'x', 0, 0, 0, 0, ';' };
|
623
698
|
char scratch = (character & 0xf000) >> 12;
|
624
699
|
hex_string[3] = (scratch <= 9 ? scratch + 48 : scratch + 87);
|
@@ -628,18 +703,17 @@ VALUE _Wikitext_utf32_char_to_entity(uint32_t character)
|
|
628
703
|
hex_string[5] = (scratch <= 9 ? scratch + 48 : scratch + 87);
|
629
704
|
scratch = character & 0x000f;
|
630
705
|
hex_string[6] = (scratch <= 9 ? scratch + 48 : scratch + 87);
|
631
|
-
|
706
|
+
str_append(output, hex_string, sizeof(hex_string));
|
632
707
|
}
|
633
708
|
|
634
|
-
|
709
|
+
// trim parser->link_text in place
|
710
|
+
void wiki_trim_link_text(parser_t *parser)
|
635
711
|
{
|
636
|
-
|
637
|
-
char *src = RSTRING_PTR(string);
|
712
|
+
char *src = parser->link_text->ptr;
|
638
713
|
char *start = src; // remember this so we can check if we're at the start
|
639
714
|
char *left = src;
|
640
715
|
char *non_space = src; // remember last non-space character output
|
641
|
-
|
642
|
-
char *end = src + len;
|
716
|
+
char *end = src + parser->link_text->len;
|
643
717
|
while (src < end)
|
644
718
|
{
|
645
719
|
if (*src == ' ')
|
@@ -651,143 +725,104 @@ VALUE _Wikitext_parser_trim_link_target(VALUE string)
|
|
651
725
|
non_space = src;
|
652
726
|
src++;
|
653
727
|
}
|
654
|
-
if (left
|
655
|
-
|
656
|
-
|
657
|
-
|
728
|
+
if (left != start || non_space + 1 != end)
|
729
|
+
{
|
730
|
+
// TODO: could potentially avoid this memmove by extending the str_t struct with an "offset" or "free" member
|
731
|
+
parser->link_text->len = (non_space + 1) - left;
|
732
|
+
memmove(parser->link_text->ptr, left, parser->link_text->len);
|
733
|
+
}
|
658
734
|
}
|
659
735
|
|
660
736
|
// - non-printable (non-ASCII) characters converted to numeric entities
|
661
737
|
// - QUOT and AMP characters converted to named entities
|
662
|
-
// - if
|
663
|
-
// - if
|
664
|
-
|
738
|
+
// - if trim is true, leading and trailing whitespace trimmed
|
739
|
+
// - if trim is false, there is no special treatment of spaces
|
740
|
+
void wiki_append_sanitized_link_target(parser_t *parser, str_t *output, bool trim)
|
665
741
|
{
|
666
|
-
|
667
|
-
char *
|
668
|
-
char *
|
669
|
-
|
670
|
-
char *end = src + len;
|
671
|
-
|
672
|
-
// start with a destination buffer twice the size of the source, will realloc if necessary
|
673
|
-
// slop = (len / 8) * 8 (ie. one in every 8 characters can be converted into an entity, each entity requires 8 bytes)
|
674
|
-
// this efficiently handles the most common case (where the size of the buffer doesn't change much)
|
675
|
-
char *dest = ALLOC_N(char, len * 2);
|
676
|
-
char *dest_ptr = dest; // hang on to this so we can pass it to free() later
|
677
|
-
char *non_space = dest; // remember last non-space character output
|
742
|
+
char *src = parser->link_target->ptr;
|
743
|
+
char *start = src; // remember this so we can check if we're at the start
|
744
|
+
char *non_space = output->ptr + output->len; // remember last non-space character output
|
745
|
+
char *end = src + parser->link_target->len;
|
678
746
|
while (src < end)
|
679
747
|
{
|
680
|
-
// need at most 8
|
681
|
-
if (
|
748
|
+
// need at most 8 bytes to display each input character (�)
|
749
|
+
if (output->ptr + output->len + 8 > output->ptr + output->capacity) // outgrowing buffer, must grow
|
682
750
|
{
|
683
|
-
char *
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
if (dest == NULL)
|
688
|
-
{
|
689
|
-
// would have used reallocf, but this has to run on Linux too, not just Darwin
|
690
|
-
free(dest_ptr);
|
691
|
-
rb_raise(rb_eNoMemError, "failed to re-allocate temporary storage (memory allocation error)");
|
692
|
-
}
|
693
|
-
dest_ptr = dest;
|
694
|
-
dest = dest_ptr + (old_dest - old_dest_ptr);
|
695
|
-
non_space = dest_ptr + (non_space - old_dest_ptr);
|
751
|
+
char *old_ptr = output->ptr;
|
752
|
+
str_grow(output, output->len + (end - src) * 8); // allocate enough for worst case
|
753
|
+
if (old_ptr != output->ptr) // may have moved
|
754
|
+
non_space += output->ptr - old_ptr;
|
696
755
|
}
|
697
756
|
|
698
|
-
if (*src == '"')
|
757
|
+
if (*src == '"')
|
699
758
|
{
|
700
759
|
char quot_entity_literal[] = { '&', 'q', 'u', 'o', 't', ';' }; // no trailing NUL
|
701
|
-
|
702
|
-
dest += sizeof(quot_entity_literal);
|
760
|
+
str_append(output, quot_entity_literal, sizeof(quot_entity_literal));
|
703
761
|
}
|
704
|
-
else if (*src == '&')
|
762
|
+
else if (*src == '&')
|
705
763
|
{
|
706
764
|
char amp_entity_literal[] = { '&', 'a', 'm', 'p', ';' }; // no trailing NUL
|
707
|
-
|
708
|
-
dest += sizeof(amp_entity_literal);
|
709
|
-
}
|
710
|
-
else if (*src == '<') // LESS_THAN
|
711
|
-
{
|
712
|
-
free(dest_ptr);
|
713
|
-
rb_raise(rb_eRangeError, "invalid link text (\"<\" may not appear in link text)");
|
714
|
-
}
|
715
|
-
else if (*src == '>') // GREATER_THAN
|
716
|
-
{
|
717
|
-
free(dest_ptr);
|
718
|
-
rb_raise(rb_eRangeError, "invalid link text (\">\" may not appear in link text)");
|
765
|
+
str_append(output, amp_entity_literal, sizeof(amp_entity_literal));
|
719
766
|
}
|
720
|
-
else if (*src == '
|
721
|
-
|
722
|
-
else if (*src
|
767
|
+
else if (*src == '<' || *src == '>')
|
768
|
+
rb_raise(rb_eRangeError, "invalid link text (\"%c\" may not appear in link text)", *src);
|
769
|
+
else if (*src == ' ' && src == start && trim)
|
770
|
+
start++; // we eat leading space
|
771
|
+
else if (*src >= 0x20 && *src <= 0x7e) // printable ASCII
|
723
772
|
{
|
724
|
-
*
|
725
|
-
|
773
|
+
*(output->ptr + output->len) = *src;
|
774
|
+
output->len++;
|
726
775
|
}
|
727
776
|
else // all others: must convert to entities
|
728
777
|
{
|
729
778
|
long width;
|
730
|
-
|
731
|
-
char *entity_src = RSTRING_PTR(entity);
|
732
|
-
long entity_len = RSTRING_LEN(entity); // should always be 8 characters (8 bytes)
|
733
|
-
memcpy(dest, entity_src, entity_len);
|
734
|
-
dest += entity_len;
|
779
|
+
wiki_append_entity_from_utf32_char(output, wiki_utf8_to_utf32(src, end, &width));
|
735
780
|
src += width;
|
736
|
-
non_space =
|
781
|
+
non_space = output->ptr + output->len;
|
737
782
|
continue;
|
738
783
|
}
|
739
784
|
if (*src != ' ')
|
740
|
-
non_space =
|
785
|
+
non_space = output->ptr + output->len;
|
741
786
|
src++;
|
742
787
|
}
|
743
788
|
|
744
789
|
// trim trailing space if necessary
|
745
|
-
if (
|
746
|
-
len
|
747
|
-
else
|
748
|
-
len = dest - dest_ptr;
|
749
|
-
VALUE out = rb_str_new(dest_ptr, len);
|
750
|
-
free(dest_ptr);
|
751
|
-
return out;
|
790
|
+
if (trim && output->ptr + output->len != non_space)
|
791
|
+
output->len -= (output->ptr + output->len) - non_space;
|
752
792
|
}
|
753
793
|
|
754
794
|
VALUE Wikitext_parser_sanitize_link_target(VALUE self, VALUE string)
|
755
795
|
{
|
756
796
|
parser_t parser;
|
757
|
-
parser.link_target
|
758
|
-
|
797
|
+
parser.link_target = str_new_from_string(string);
|
798
|
+
GC_WRAP_STR(parser.link_target, link_target_gc);
|
799
|
+
str_t *output = str_new();
|
800
|
+
GC_WRAP_STR(output, output_gc);
|
801
|
+
wiki_append_sanitized_link_target(&parser, output, true);
|
802
|
+
return string_from_str(output);
|
759
803
|
}
|
760
804
|
|
761
|
-
//
|
762
|
-
//
|
763
|
-
//
|
764
|
-
//
|
765
|
-
|
766
|
-
// ...the [[foo]] is...
|
767
|
-
// to be equivalent to:
|
768
|
-
// thing. [[Foo]] was...
|
769
|
-
static void _Wikitext_parser_encode_link_target(parser_t *parser)
|
805
|
+
// Encodes the parser link_target member (in-place) according to RFCs 2396 and 2718
|
806
|
+
//
|
807
|
+
// Leading and trailing whitespace trimmed. Spaces are converted to
|
808
|
+
// underscores if the parser space_to_underscore member is true.
|
809
|
+
static void wiki_encode_link_target(parser_t *parser)
|
770
810
|
{
|
771
|
-
|
772
|
-
char *
|
773
|
-
|
774
|
-
long len = RSTRING_LEN(in);
|
811
|
+
char *src = parser->link_target->ptr;
|
812
|
+
char *start = src; // remember this so we can check if we're at the start
|
813
|
+
long len = parser->link_target->len;
|
775
814
|
if (!(len > 0))
|
776
815
|
return;
|
777
|
-
char *end =
|
778
|
-
static char hex[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
|
779
|
-
|
780
|
-
// to avoid most reallocations start with a destination buffer twice the size of the source
|
781
|
-
// this handles the most common case (where most chars are in the ASCII range and don't require more storage, but there are
|
782
|
-
// often quite a few spaces, which are encoded as "%20" and occupy 3 bytes)
|
783
|
-
// the worst case is where _every_ byte must be written out using 3 bytes
|
816
|
+
char *end = src + len;
|
784
817
|
long dest_len = len * 2;
|
785
818
|
char *dest = ALLOC_N(char, dest_len);
|
786
819
|
char *dest_ptr = dest; // hang on to this so we can pass it to free() later
|
787
820
|
char *non_space = dest; // remember last non-space character output
|
788
|
-
|
821
|
+
static char hex[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
|
822
|
+
for (; src < end; src++)
|
789
823
|
{
|
790
|
-
|
824
|
+
// worst case: a single character may grow to 3 characters once encoded
|
825
|
+
if ((dest + 3) > (dest_ptr + dest_len))
|
791
826
|
{
|
792
827
|
// outgrowing buffer, must reallocate
|
793
828
|
char *old_dest = dest;
|
@@ -806,27 +841,27 @@ static void _Wikitext_parser_encode_link_target(parser_t *parser)
|
|
806
841
|
}
|
807
842
|
|
808
843
|
// pass through unreserved characters
|
809
|
-
if ((
|
810
|
-
(
|
811
|
-
(
|
812
|
-
|
813
|
-
|
814
|
-
|
815
|
-
|
844
|
+
if ((*src >= 'a' && *src <= 'z') ||
|
845
|
+
(*src >= 'A' && *src <= 'Z') ||
|
846
|
+
(*src >= '0' && *src <= '9') ||
|
847
|
+
*src == '-' ||
|
848
|
+
*src == '_' ||
|
849
|
+
*src == '.' ||
|
850
|
+
*src == '~')
|
816
851
|
{
|
817
|
-
*dest++ = *
|
852
|
+
*dest++ = *src;
|
818
853
|
non_space = dest;
|
819
854
|
}
|
820
|
-
else if (*
|
855
|
+
else if (*src == ' ' && src == start)
|
821
856
|
start++; // we eat leading space
|
822
|
-
else if (*
|
857
|
+
else if (*src == ' ' && parser->space_to_underscore)
|
823
858
|
*dest++ = '_';
|
824
859
|
else // everything else gets URL-encoded
|
825
860
|
{
|
826
861
|
*dest++ = '%';
|
827
|
-
*dest++ = hex[(unsigned char)(*
|
828
|
-
*dest++ = hex[(unsigned char)(*
|
829
|
-
if (*
|
862
|
+
*dest++ = hex[(unsigned char)(*src) / 16]; // left
|
863
|
+
*dest++ = hex[(unsigned char)(*src) % 16]; // right
|
864
|
+
if (*src != ' ')
|
830
865
|
non_space = dest;
|
831
866
|
}
|
832
867
|
}
|
@@ -836,90 +871,89 @@ static void _Wikitext_parser_encode_link_target(parser_t *parser)
|
|
836
871
|
dest_len = non_space - dest_ptr;
|
837
872
|
else
|
838
873
|
dest_len = dest - dest_ptr;
|
839
|
-
parser->link_target
|
874
|
+
str_clear(parser->link_target);
|
875
|
+
str_append(parser->link_target, dest_ptr, dest_len);
|
840
876
|
free(dest_ptr);
|
841
877
|
}
|
842
878
|
|
843
879
|
VALUE Wikitext_parser_encode_link_target(VALUE self, VALUE in)
|
844
880
|
{
|
845
881
|
parser_t parser;
|
846
|
-
parser.
|
847
|
-
parser.
|
848
|
-
|
849
|
-
|
850
|
-
|
851
|
-
|
852
|
-
// this method exposed for testing only
|
853
|
-
VALUE Wikitext_parser_encode_special_link_target(VALUE self, VALUE in)
|
854
|
-
{
|
855
|
-
parser_t parser;
|
856
|
-
parser.link_target = in;
|
857
|
-
parser.space_to_underscore = Qfalse;
|
858
|
-
_Wikitext_parser_encode_link_target(&parser);
|
859
|
-
return parser.link_target;
|
882
|
+
parser.space_to_underscore = false;
|
883
|
+
parser.link_target = str_new_from_string(in);
|
884
|
+
GC_WRAP_STR(parser.link_target, link_target_gc);
|
885
|
+
wiki_encode_link_target(&parser);
|
886
|
+
return string_from_str(parser.link_target);
|
860
887
|
}
|
861
888
|
|
862
889
|
// returns 1 (true) if supplied string is blank (nil, empty, or all whitespace)
|
863
890
|
// returns 0 (false) otherwise
|
864
|
-
|
891
|
+
bool wiki_blank(str_t *str)
|
865
892
|
{
|
866
|
-
if (
|
867
|
-
return
|
868
|
-
for (char *ptr =
|
869
|
-
*end =
|
893
|
+
if (str->len == 0)
|
894
|
+
return true;
|
895
|
+
for (char *ptr = str->ptr,
|
896
|
+
*end = str->ptr + str->len;
|
870
897
|
ptr < end; ptr++)
|
871
898
|
{
|
872
899
|
if (*ptr != ' ')
|
873
|
-
return
|
900
|
+
return false;
|
874
901
|
}
|
875
|
-
return
|
902
|
+
return true;
|
876
903
|
}
|
877
904
|
|
878
|
-
void
|
905
|
+
void wiki_rollback_failed_internal_link(parser_t *parser)
|
879
906
|
{
|
880
907
|
if (!IN(LINK_START))
|
881
908
|
return; // nothing to do!
|
882
909
|
int scope_includes_separator = IN(SEPARATOR);
|
883
|
-
|
884
|
-
|
885
|
-
if (
|
910
|
+
wiki_pop_from_stack_up_to(parser, NULL, LINK_START, true);
|
911
|
+
str_append(parser->output, link_start, sizeof(link_start) - 1);
|
912
|
+
if (parser->link_target->len > 0)
|
886
913
|
{
|
887
|
-
|
888
|
-
rb_str_append(parser->output, sanitized);
|
914
|
+
wiki_append_sanitized_link_target(parser, parser->output, false);
|
889
915
|
if (scope_includes_separator)
|
890
916
|
{
|
891
|
-
|
892
|
-
if (
|
893
|
-
|
917
|
+
str_append(parser->output, separator, sizeof(separator) - 1);
|
918
|
+
if (parser->link_text->len > 0)
|
919
|
+
str_append_str(parser->output, parser->link_text);
|
894
920
|
}
|
895
921
|
}
|
896
|
-
parser->capture
|
897
|
-
parser->link_target
|
898
|
-
parser->link_text
|
922
|
+
parser->capture = NULL;
|
923
|
+
str_clear(parser->link_target);
|
924
|
+
str_clear(parser->link_text);
|
899
925
|
}
|
900
926
|
|
901
|
-
void
|
927
|
+
void wiki_rollback_failed_external_link(parser_t *parser)
|
902
928
|
{
|
903
929
|
if (!IN(EXT_LINK_START))
|
904
930
|
return; // nothing to do!
|
931
|
+
|
932
|
+
// store a couple of values before popping
|
905
933
|
int scope_includes_space = IN(SPACE);
|
906
|
-
|
907
|
-
|
908
|
-
|
934
|
+
VALUE link_class = IN(PATH) ? Qnil : parser->external_link_class;
|
935
|
+
wiki_pop_from_stack_up_to(parser, NULL, EXT_LINK_START, true);
|
936
|
+
|
937
|
+
str_append(parser->output, ext_link_start, sizeof(ext_link_start) - 1);
|
938
|
+
if (parser->link_target->len > 0)
|
909
939
|
{
|
910
|
-
|
911
|
-
parser->link_target = _Wikitext_hyperlink(parser, Qnil, parser->link_target, parser->link_target, parser->external_link_class);
|
912
|
-
rb_str_append(parser->output, parser->link_target);
|
940
|
+
wiki_append_hyperlink(parser, Qnil, parser->link_target, NULL, link_class, true);
|
913
941
|
if (scope_includes_space)
|
914
942
|
{
|
915
|
-
|
916
|
-
if (
|
917
|
-
|
943
|
+
str_append(parser->output, space, sizeof(space) - 1);
|
944
|
+
if (parser->link_text->len > 0)
|
945
|
+
str_append_str(parser->output, parser->link_text);
|
918
946
|
}
|
919
947
|
}
|
920
|
-
parser->capture
|
921
|
-
parser->link_target
|
922
|
-
parser->link_text
|
948
|
+
parser->capture = NULL;
|
949
|
+
str_clear(parser->link_target);
|
950
|
+
str_clear(parser->link_text);
|
951
|
+
}
|
952
|
+
|
953
|
+
void wiki_rollback_failed_link(parser_t *parser)
|
954
|
+
{
|
955
|
+
wiki_rollback_failed_internal_link(parser);
|
956
|
+
wiki_rollback_failed_external_link(parser);
|
923
957
|
}
|
924
958
|
|
925
959
|
VALUE Wikitext_parser_initialize(int argc, VALUE *argv, VALUE self)
|
@@ -1031,31 +1065,15 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1031
1065
|
VALUE prefix = rb_iv_get(self, "@internal_link_prefix");
|
1032
1066
|
|
1033
1067
|
// set up parser struct to make passing parameters a little easier
|
1034
|
-
|
1035
|
-
|
1036
|
-
parser_t *parser = &_parser;
|
1037
|
-
parser->output = rb_str_new2("");
|
1038
|
-
parser->capture = Qnil;
|
1039
|
-
parser->link_target = Qnil;
|
1040
|
-
parser->link_text = Qnil;
|
1068
|
+
parser_t *parser = parser_new();
|
1069
|
+
GC_WRAP_PARSER(parser, parser_gc);
|
1041
1070
|
parser->external_link_class = link_class;
|
1042
1071
|
parser->mailto_class = mailto_class;
|
1043
1072
|
parser->img_prefix = rb_iv_get(self, "@img_prefix");
|
1044
|
-
parser->
|
1045
|
-
|
1046
|
-
parser->line = ary_new();
|
1047
|
-
GC_WRAP_ARY(parser->line, line_gc);
|
1048
|
-
parser->line_buffer = ary_new();
|
1049
|
-
GC_WRAP_ARY(parser->line_buffer, line_buffer_gc);
|
1050
|
-
parser->pending_crlf = Qfalse;
|
1051
|
-
parser->autolink = rb_iv_get(self, "@autolink");
|
1052
|
-
parser->space_to_underscore = rb_iv_get(self, "@space_to_underscore");
|
1073
|
+
parser->autolink = rb_iv_get(self, "@autolink") == Qtrue ? true : false;
|
1074
|
+
parser->space_to_underscore = rb_iv_get(self, "@space_to_underscore") == Qtrue ? true : false;
|
1053
1075
|
parser->line_ending = str_new_from_string(line_ending);
|
1054
|
-
GC_WRAP_STR(parser->line_ending, line_ending_gc);
|
1055
1076
|
parser->base_indent = base_indent;
|
1056
|
-
parser->current_indent = 0;
|
1057
|
-
parser->tabulation = str_new();
|
1058
|
-
GC_WRAP_STR(parser->tabulation, tabulation_gc);
|
1059
1077
|
parser->base_heading_level = base_heading_level;
|
1060
1078
|
|
1061
1079
|
// this simple looping design leads to a single enormous function,
|
@@ -1093,10 +1111,13 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1093
1111
|
long remove_strong = -1;
|
1094
1112
|
long remove_em = -1;
|
1095
1113
|
|
1096
|
-
// general purpose counters and
|
1114
|
+
// general purpose counters, flags and pointers
|
1097
1115
|
long i = 0;
|
1098
1116
|
long j = 0;
|
1099
1117
|
long k = 0;
|
1118
|
+
str_t *output = NULL;
|
1119
|
+
str_t _token_str;
|
1120
|
+
str_t *token_str = &_token_str;
|
1100
1121
|
|
1101
1122
|
// The following giant switch statement contains cases for all the possible token types.
|
1102
1123
|
// In the most basic sense we are emitting the HTML that corresponds to each token,
|
@@ -1118,16 +1139,16 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1118
1139
|
case PRE:
|
1119
1140
|
if (IN(NO_WIKI_START) || IN(PRE_START))
|
1120
1141
|
{
|
1121
|
-
|
1142
|
+
str_append(parser->output, space, sizeof(space) - 1);
|
1122
1143
|
break;
|
1123
1144
|
}
|
1124
1145
|
else if (IN(BLOCKQUOTE_START))
|
1125
1146
|
{
|
1126
1147
|
// this kind of nesting not allowed (to avoid user confusion)
|
1127
|
-
|
1128
|
-
|
1129
|
-
|
1130
|
-
|
1148
|
+
wiki_pop_excess_elements(parser);
|
1149
|
+
wiki_start_para_if_necessary(parser);
|
1150
|
+
output = parser->capture ? parser->capture : parser->output;
|
1151
|
+
str_append(output, space, sizeof(space) - 1);
|
1131
1152
|
break;
|
1132
1153
|
}
|
1133
1154
|
|
@@ -1139,15 +1160,15 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1139
1160
|
{
|
1140
1161
|
// must pop (reduce nesting level)
|
1141
1162
|
for (i = j - i; i > 0; i--)
|
1142
|
-
|
1163
|
+
wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, true);
|
1143
1164
|
}
|
1144
1165
|
|
1145
1166
|
if (!IN(PRE))
|
1146
1167
|
{
|
1147
|
-
parser->pending_crlf =
|
1148
|
-
|
1149
|
-
|
1150
|
-
|
1168
|
+
parser->pending_crlf = false;
|
1169
|
+
wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, false);
|
1170
|
+
wiki_indent(parser);
|
1171
|
+
str_append(parser->output, pre_start, sizeof(pre_start) - 1);
|
1151
1172
|
ary_push(parser->scope, PRE);
|
1152
1173
|
}
|
1153
1174
|
break;
|
@@ -1155,16 +1176,15 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1155
1176
|
case PRE_START:
|
1156
1177
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1157
1178
|
{
|
1158
|
-
|
1159
|
-
|
1179
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1180
|
+
str_append(parser->output, escaped_pre_start, sizeof(escaped_pre_start) - 1);
|
1160
1181
|
}
|
1161
1182
|
else if (IN(BLOCKQUOTE_START))
|
1162
1183
|
{
|
1163
|
-
|
1164
|
-
|
1165
|
-
|
1166
|
-
|
1167
|
-
rb_str_cat(parser->output, pre_start, sizeof(pre_start) - 1);
|
1184
|
+
wiki_rollback_failed_link(parser); // if any
|
1185
|
+
wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE_START, false);
|
1186
|
+
wiki_indent(parser);
|
1187
|
+
str_append(parser->output, pre_start, sizeof(pre_start) - 1);
|
1168
1188
|
ary_push(parser->scope, PRE_START);
|
1169
1189
|
ary_push(parser->line, PRE_START);
|
1170
1190
|
}
|
@@ -1172,29 +1192,27 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1172
1192
|
{
|
1173
1193
|
if (token->column_start == 1) // only allowed in first column
|
1174
1194
|
{
|
1175
|
-
|
1176
|
-
|
1177
|
-
|
1178
|
-
|
1179
|
-
rb_str_cat(parser->output, pre_start, sizeof(pre_start) - 1);
|
1195
|
+
wiki_rollback_failed_link(parser); // if any
|
1196
|
+
wiki_pop_all_from_stack(parser);
|
1197
|
+
wiki_indent(parser);
|
1198
|
+
str_append(parser->output, pre_start, sizeof(pre_start) - 1);
|
1180
1199
|
ary_push(parser->scope, PRE_START);
|
1181
1200
|
ary_push(parser->line, PRE_START);
|
1182
1201
|
}
|
1183
1202
|
else // PRE_START illegal here
|
1184
1203
|
{
|
1185
|
-
|
1186
|
-
|
1187
|
-
|
1188
|
-
|
1204
|
+
output = parser->capture ? parser->capture : parser->output;
|
1205
|
+
wiki_pop_excess_elements(parser);
|
1206
|
+
wiki_start_para_if_necessary(parser);
|
1207
|
+
str_append(output, escaped_pre_start, sizeof(escaped_pre_start) - 1);
|
1189
1208
|
}
|
1190
1209
|
}
|
1191
1210
|
else
|
1192
1211
|
{
|
1193
|
-
|
1194
|
-
|
1195
|
-
|
1196
|
-
|
1197
|
-
rb_str_cat(parser->output, pre_start, sizeof(pre_start) - 1);
|
1212
|
+
wiki_rollback_failed_link(parser); // if any
|
1213
|
+
wiki_pop_from_stack_up_to(parser, NULL, P, true);
|
1214
|
+
wiki_indent(parser);
|
1215
|
+
str_append(parser->output, pre_start, sizeof(pre_start) - 1);
|
1198
1216
|
ary_push(parser->scope, PRE_START);
|
1199
1217
|
ary_push(parser->line, PRE_START);
|
1200
1218
|
}
|
@@ -1203,19 +1221,19 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1203
1221
|
case PRE_END:
|
1204
1222
|
if (IN(NO_WIKI_START) || IN(PRE))
|
1205
1223
|
{
|
1206
|
-
|
1207
|
-
|
1224
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1225
|
+
str_append(parser->output, escaped_pre_end, sizeof(escaped_pre_end) - 1);
|
1208
1226
|
}
|
1209
1227
|
else
|
1210
1228
|
{
|
1211
1229
|
if (IN(PRE_START))
|
1212
|
-
|
1230
|
+
wiki_pop_from_stack_up_to(parser, parser->output, PRE_START, true);
|
1213
1231
|
else
|
1214
1232
|
{
|
1215
|
-
|
1216
|
-
|
1217
|
-
|
1218
|
-
|
1233
|
+
output = parser->capture ? parser->capture : parser->output;
|
1234
|
+
wiki_pop_excess_elements(parser);
|
1235
|
+
wiki_start_para_if_necessary(parser);
|
1236
|
+
str_append(output, escaped_pre_end, sizeof(escaped_pre_end) - 1);
|
1219
1237
|
}
|
1220
1238
|
}
|
1221
1239
|
break;
|
@@ -1223,14 +1241,14 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1223
1241
|
case BLOCKQUOTE:
|
1224
1242
|
if (IN(NO_WIKI_START) || IN(PRE_START))
|
1225
1243
|
// no need to check for <pre>; can never appear inside it
|
1226
|
-
|
1244
|
+
str_append(parser->output, escaped_blockquote, TOKEN_LEN(token) + 3); // will either emit ">" or "> "
|
1227
1245
|
else if (IN(BLOCKQUOTE_START))
|
1228
1246
|
{
|
1229
1247
|
// this kind of nesting not allowed (to avoid user confusion)
|
1230
|
-
|
1231
|
-
|
1232
|
-
|
1233
|
-
|
1248
|
+
wiki_pop_excess_elements(parser);
|
1249
|
+
wiki_start_para_if_necessary(parser);
|
1250
|
+
output = parser->capture ? parser->capture : parser->output;
|
1251
|
+
str_append(output, escaped_blockquote, TOKEN_LEN(token) + 3); // will either emit ">" or "> "
|
1234
1252
|
break;
|
1235
1253
|
}
|
1236
1254
|
else
|
@@ -1252,12 +1270,12 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1252
1270
|
if (i > j)
|
1253
1271
|
{
|
1254
1272
|
// must push (increase nesting level)
|
1255
|
-
|
1273
|
+
wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, false);
|
1256
1274
|
for (i = i - j; i > 0; i--)
|
1257
1275
|
{
|
1258
|
-
|
1259
|
-
|
1260
|
-
|
1276
|
+
wiki_indent(parser);
|
1277
|
+
str_append(parser->output, blockquote_start, sizeof(blockquote_start) - 1);
|
1278
|
+
str_append_str(parser->output, parser->line_ending);
|
1261
1279
|
ary_push(parser->scope, BLOCKQUOTE);
|
1262
1280
|
}
|
1263
1281
|
}
|
@@ -1265,7 +1283,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1265
1283
|
{
|
1266
1284
|
// must pop (reduce nesting level)
|
1267
1285
|
for (i = j - i; i > 0; i--)
|
1268
|
-
|
1286
|
+
wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, true);
|
1269
1287
|
}
|
1270
1288
|
|
1271
1289
|
// jump to top of the loop to process token we scanned during lookahead
|
@@ -1276,18 +1294,17 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1276
1294
|
case BLOCKQUOTE_START:
|
1277
1295
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1278
1296
|
{
|
1279
|
-
|
1280
|
-
|
1297
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1298
|
+
str_append(parser->output, escaped_blockquote_start, sizeof(escaped_blockquote_start) - 1);
|
1281
1299
|
}
|
1282
1300
|
else if (IN(BLOCKQUOTE_START))
|
1283
1301
|
{
|
1284
1302
|
// nesting is fine here
|
1285
|
-
|
1286
|
-
|
1287
|
-
|
1288
|
-
|
1289
|
-
|
1290
|
-
rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
|
1303
|
+
wiki_rollback_failed_link(parser); // if any
|
1304
|
+
wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE_START, false);
|
1305
|
+
wiki_indent(parser);
|
1306
|
+
str_append(parser->output, blockquote_start, sizeof(blockquote_start) - 1);
|
1307
|
+
str_append_str(parser->output, parser->line_ending);
|
1291
1308
|
ary_push(parser->scope, BLOCKQUOTE_START);
|
1292
1309
|
ary_push(parser->line, BLOCKQUOTE_START);
|
1293
1310
|
}
|
@@ -1295,32 +1312,30 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1295
1312
|
{
|
1296
1313
|
if (token->column_start == 1) // only allowed in first column
|
1297
1314
|
{
|
1298
|
-
|
1299
|
-
|
1300
|
-
|
1301
|
-
|
1302
|
-
|
1303
|
-
rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
|
1315
|
+
wiki_rollback_failed_link(parser); // if any
|
1316
|
+
wiki_pop_all_from_stack(parser);
|
1317
|
+
wiki_indent(parser);
|
1318
|
+
str_append(parser->output, blockquote_start, sizeof(blockquote_start) - 1);
|
1319
|
+
str_append_str(parser->output, parser->line_ending);
|
1304
1320
|
ary_push(parser->scope, BLOCKQUOTE_START);
|
1305
1321
|
ary_push(parser->line, BLOCKQUOTE_START);
|
1306
1322
|
}
|
1307
1323
|
else // BLOCKQUOTE_START illegal here
|
1308
1324
|
{
|
1309
|
-
|
1310
|
-
|
1311
|
-
|
1312
|
-
|
1325
|
+
output = parser->capture ? parser->capture : parser->output;
|
1326
|
+
wiki_pop_excess_elements(parser);
|
1327
|
+
wiki_start_para_if_necessary(parser);
|
1328
|
+
str_append(output, escaped_blockquote_start, sizeof(escaped_blockquote_start) - 1);
|
1313
1329
|
}
|
1314
1330
|
}
|
1315
1331
|
else
|
1316
1332
|
{
|
1317
1333
|
// would be nice to eliminate the repetition here but it's probably the clearest way
|
1318
|
-
|
1319
|
-
|
1320
|
-
|
1321
|
-
|
1322
|
-
|
1323
|
-
rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
|
1334
|
+
wiki_rollback_failed_link(parser); // if any
|
1335
|
+
wiki_pop_from_stack_up_to(parser, NULL, P, true);
|
1336
|
+
wiki_indent(parser);
|
1337
|
+
str_append(parser->output, blockquote_start, sizeof(blockquote_start) - 1);
|
1338
|
+
str_append_str(parser->output, parser->line_ending);
|
1324
1339
|
ary_push(parser->scope, BLOCKQUOTE_START);
|
1325
1340
|
ary_push(parser->line, BLOCKQUOTE_START);
|
1326
1341
|
}
|
@@ -1329,19 +1344,19 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1329
1344
|
case BLOCKQUOTE_END:
|
1330
1345
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1331
1346
|
{
|
1332
|
-
|
1333
|
-
|
1347
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1348
|
+
str_append(parser->output, escaped_blockquote_end, sizeof(escaped_blockquote_end) - 1);
|
1334
1349
|
}
|
1335
1350
|
else
|
1336
1351
|
{
|
1337
1352
|
if (IN(BLOCKQUOTE_START))
|
1338
|
-
|
1353
|
+
wiki_pop_from_stack_up_to(parser, parser->output, BLOCKQUOTE_START, true);
|
1339
1354
|
else
|
1340
1355
|
{
|
1341
|
-
|
1342
|
-
|
1343
|
-
|
1344
|
-
|
1356
|
+
output = parser->capture ? parser->capture : parser->output;
|
1357
|
+
wiki_pop_excess_elements(parser);
|
1358
|
+
wiki_start_para_if_necessary(parser);
|
1359
|
+
str_append(output, escaped_blockquote_end, sizeof(escaped_blockquote_end) - 1);
|
1345
1360
|
}
|
1346
1361
|
}
|
1347
1362
|
break;
|
@@ -1349,13 +1364,13 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1349
1364
|
case NO_WIKI_START:
|
1350
1365
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1351
1366
|
{
|
1352
|
-
|
1353
|
-
|
1367
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1368
|
+
str_append(parser->output, escaped_no_wiki_start, sizeof(escaped_no_wiki_start) - 1);
|
1354
1369
|
}
|
1355
1370
|
else
|
1356
1371
|
{
|
1357
|
-
|
1358
|
-
|
1372
|
+
wiki_pop_excess_elements(parser);
|
1373
|
+
wiki_start_para_if_necessary(parser);
|
1359
1374
|
ary_push(parser->scope, NO_WIKI_START);
|
1360
1375
|
ary_push(parser->line, NO_WIKI_START);
|
1361
1376
|
}
|
@@ -1364,25 +1379,25 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1364
1379
|
case NO_WIKI_END:
|
1365
1380
|
if (IN(NO_WIKI_START))
|
1366
1381
|
// <nowiki> should always only ever be the last item in the stack, but use the helper routine just in case
|
1367
|
-
|
1382
|
+
wiki_pop_from_stack_up_to(parser, NULL, NO_WIKI_START, true);
|
1368
1383
|
else
|
1369
1384
|
{
|
1370
|
-
|
1371
|
-
|
1372
|
-
|
1385
|
+
wiki_pop_excess_elements(parser);
|
1386
|
+
wiki_start_para_if_necessary(parser);
|
1387
|
+
str_append(parser->output, escaped_no_wiki_end, sizeof(escaped_no_wiki_end) - 1);
|
1373
1388
|
}
|
1374
1389
|
break;
|
1375
1390
|
|
1376
1391
|
case STRONG_EM:
|
1377
1392
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1378
1393
|
{
|
1379
|
-
|
1380
|
-
|
1394
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1395
|
+
str_append(parser->output, literal_strong_em, sizeof(literal_strong_em) - 1);
|
1381
1396
|
break;
|
1382
1397
|
}
|
1383
1398
|
|
1384
|
-
|
1385
|
-
|
1399
|
+
output = parser->capture ? parser->capture : parser->output;
|
1400
|
+
wiki_pop_excess_elements(parser);
|
1386
1401
|
|
1387
1402
|
// if you've seen STRONG/STRONG_START or EM/EM_START, must close them in the reverse order that you saw them!
|
1388
1403
|
// otherwise, must open them
|
@@ -1394,12 +1409,12 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1394
1409
|
int val = ary_entry(parser->scope, j);
|
1395
1410
|
if (val == STRONG || val == STRONG_START)
|
1396
1411
|
{
|
1397
|
-
|
1412
|
+
str_append(output, strong_end, sizeof(strong_end) - 1);
|
1398
1413
|
remove_strong = j;
|
1399
1414
|
}
|
1400
1415
|
else if (val == EM || val == EM_START)
|
1401
1416
|
{
|
1402
|
-
|
1417
|
+
str_append(output, em_end, sizeof(em_end) - 1);
|
1403
1418
|
remove_em = j;
|
1404
1419
|
}
|
1405
1420
|
}
|
@@ -1411,7 +1426,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1411
1426
|
ary_pop(parser->scope);
|
1412
1427
|
else // there was no em to remove!, so consider this an opening em tag
|
1413
1428
|
{
|
1414
|
-
|
1429
|
+
str_append(output, em_start, sizeof(em_start) - 1);
|
1415
1430
|
ary_push(parser->scope, EM);
|
1416
1431
|
ary_push(parser->line, EM);
|
1417
1432
|
}
|
@@ -1423,15 +1438,15 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1423
1438
|
ary_pop(parser->scope);
|
1424
1439
|
else // there was no strong to remove!, so consider this an opening strong tag
|
1425
1440
|
{
|
1426
|
-
|
1441
|
+
str_append(output, strong_start, sizeof(strong_start) - 1);
|
1427
1442
|
ary_push(parser->scope, STRONG);
|
1428
1443
|
ary_push(parser->line, STRONG);
|
1429
1444
|
}
|
1430
1445
|
}
|
1431
1446
|
else // no strong or em to remove, so this must be a new opening of both
|
1432
1447
|
{
|
1433
|
-
|
1434
|
-
|
1448
|
+
wiki_start_para_if_necessary(parser);
|
1449
|
+
str_append(output, strong_em_start, sizeof(strong_em_start) - 1);
|
1435
1450
|
ary_push(parser->scope, STRONG);
|
1436
1451
|
ary_push(parser->line, STRONG);
|
1437
1452
|
ary_push(parser->scope, EM);
|
@@ -1442,24 +1457,24 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1442
1457
|
case STRONG:
|
1443
1458
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1444
1459
|
{
|
1445
|
-
|
1446
|
-
|
1460
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1461
|
+
str_append(parser->output, literal_strong, sizeof(literal_strong) - 1);
|
1447
1462
|
}
|
1448
1463
|
else
|
1449
1464
|
{
|
1450
|
-
|
1465
|
+
output = parser->capture ? parser->capture : parser->output;
|
1451
1466
|
if (IN(STRONG_START))
|
1452
1467
|
// already in span started with <strong>, no choice but to emit this literally
|
1453
|
-
|
1468
|
+
str_append(output, literal_strong, sizeof(literal_strong) - 1);
|
1454
1469
|
else if (IN(STRONG))
|
1455
1470
|
// STRONG already seen, this is a closing tag
|
1456
|
-
|
1471
|
+
wiki_pop_from_stack_up_to(parser, output, STRONG, true);
|
1457
1472
|
else
|
1458
1473
|
{
|
1459
1474
|
// this is a new opening
|
1460
|
-
|
1461
|
-
|
1462
|
-
|
1475
|
+
wiki_pop_excess_elements(parser);
|
1476
|
+
wiki_start_para_if_necessary(parser);
|
1477
|
+
str_append(output, strong_start, sizeof(strong_start) - 1);
|
1463
1478
|
ary_push(parser->scope, STRONG);
|
1464
1479
|
ary_push(parser->line, STRONG);
|
1465
1480
|
}
|
@@ -1469,19 +1484,19 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1469
1484
|
case STRONG_START:
|
1470
1485
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1471
1486
|
{
|
1472
|
-
|
1473
|
-
|
1487
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1488
|
+
str_append(parser->output, escaped_strong_start, sizeof(escaped_strong_start) - 1);
|
1474
1489
|
}
|
1475
1490
|
else
|
1476
1491
|
{
|
1477
|
-
|
1492
|
+
output = parser->capture ? parser->capture : parser->output;
|
1478
1493
|
if (IN(STRONG_START) || IN(STRONG))
|
1479
|
-
|
1494
|
+
str_append(output, escaped_strong_start, sizeof(escaped_strong_start) - 1);
|
1480
1495
|
else
|
1481
1496
|
{
|
1482
|
-
|
1483
|
-
|
1484
|
-
|
1497
|
+
wiki_pop_excess_elements(parser);
|
1498
|
+
wiki_start_para_if_necessary(parser);
|
1499
|
+
str_append(output, strong_start, sizeof(strong_start) - 1);
|
1485
1500
|
ary_push(parser->scope, STRONG_START);
|
1486
1501
|
ary_push(parser->line, STRONG_START);
|
1487
1502
|
}
|
@@ -1491,20 +1506,20 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1491
1506
|
case STRONG_END:
|
1492
1507
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1493
1508
|
{
|
1494
|
-
|
1495
|
-
|
1509
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1510
|
+
str_append(parser->output, escaped_strong_end, sizeof(escaped_strong_end) - 1);
|
1496
1511
|
}
|
1497
1512
|
else
|
1498
1513
|
{
|
1499
|
-
|
1514
|
+
output = parser->capture ? parser->capture : parser->output;
|
1500
1515
|
if (IN(STRONG_START))
|
1501
|
-
|
1516
|
+
wiki_pop_from_stack_up_to(parser, output, STRONG_START, true);
|
1502
1517
|
else
|
1503
1518
|
{
|
1504
1519
|
// no STRONG_START in scope, so must interpret the STRONG_END without any special meaning
|
1505
|
-
|
1506
|
-
|
1507
|
-
|
1520
|
+
wiki_pop_excess_elements(parser);
|
1521
|
+
wiki_start_para_if_necessary(parser);
|
1522
|
+
str_append(output, escaped_strong_end, sizeof(escaped_strong_end) - 1);
|
1508
1523
|
}
|
1509
1524
|
}
|
1510
1525
|
break;
|
@@ -1512,24 +1527,24 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1512
1527
|
case EM:
|
1513
1528
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1514
1529
|
{
|
1515
|
-
|
1516
|
-
|
1530
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1531
|
+
str_append(parser->output, literal_em, sizeof(literal_em) - 1);
|
1517
1532
|
}
|
1518
1533
|
else
|
1519
1534
|
{
|
1520
|
-
|
1535
|
+
output = parser->capture ? parser->capture : parser->output;
|
1521
1536
|
if (IN(EM_START))
|
1522
1537
|
// already in span started with <em>, no choice but to emit this literally
|
1523
|
-
|
1538
|
+
str_append(output, literal_em, sizeof(literal_em) - 1);
|
1524
1539
|
else if (IN(EM))
|
1525
1540
|
// EM already seen, this is a closing tag
|
1526
|
-
|
1541
|
+
wiki_pop_from_stack_up_to(parser, output, EM, true);
|
1527
1542
|
else
|
1528
1543
|
{
|
1529
1544
|
// this is a new opening
|
1530
|
-
|
1531
|
-
|
1532
|
-
|
1545
|
+
wiki_pop_excess_elements(parser);
|
1546
|
+
wiki_start_para_if_necessary(parser);
|
1547
|
+
str_append(output, em_start, sizeof(em_start) - 1);
|
1533
1548
|
ary_push(parser->scope, EM);
|
1534
1549
|
ary_push(parser->line, EM);
|
1535
1550
|
}
|
@@ -1539,19 +1554,19 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1539
1554
|
case EM_START:
|
1540
1555
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1541
1556
|
{
|
1542
|
-
|
1543
|
-
|
1557
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1558
|
+
str_append(parser->output, escaped_em_start, sizeof(escaped_em_start) - 1);
|
1544
1559
|
}
|
1545
1560
|
else
|
1546
1561
|
{
|
1547
|
-
|
1562
|
+
output = parser->capture ? parser->capture : parser->output;
|
1548
1563
|
if (IN(EM_START) || IN(EM))
|
1549
|
-
|
1564
|
+
str_append(output, escaped_em_start, sizeof(escaped_em_start) - 1);
|
1550
1565
|
else
|
1551
1566
|
{
|
1552
|
-
|
1553
|
-
|
1554
|
-
|
1567
|
+
wiki_pop_excess_elements(parser);
|
1568
|
+
wiki_start_para_if_necessary(parser);
|
1569
|
+
str_append(output, em_start, sizeof(em_start) - 1);
|
1555
1570
|
ary_push(parser->scope, EM_START);
|
1556
1571
|
ary_push(parser->line, EM_START);
|
1557
1572
|
}
|
@@ -1561,20 +1576,20 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1561
1576
|
case EM_END:
|
1562
1577
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1563
1578
|
{
|
1564
|
-
|
1565
|
-
|
1579
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1580
|
+
str_append(parser->output, escaped_em_end, sizeof(escaped_em_end) - 1);
|
1566
1581
|
}
|
1567
1582
|
else
|
1568
1583
|
{
|
1569
|
-
|
1584
|
+
output = parser->capture ? parser->capture : parser->output;
|
1570
1585
|
if (IN(EM_START))
|
1571
|
-
|
1586
|
+
wiki_pop_from_stack_up_to(parser, output, EM_START, true);
|
1572
1587
|
else
|
1573
1588
|
{
|
1574
1589
|
// no EM_START in scope, so must interpret the TT_END without any special meaning
|
1575
|
-
|
1576
|
-
|
1577
|
-
|
1590
|
+
wiki_pop_excess_elements(parser);
|
1591
|
+
wiki_start_para_if_necessary(parser);
|
1592
|
+
str_append(output, escaped_em_end, sizeof(escaped_em_end) - 1);
|
1578
1593
|
}
|
1579
1594
|
}
|
1580
1595
|
break;
|
@@ -1582,24 +1597,24 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1582
1597
|
case TT:
|
1583
1598
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1584
1599
|
{
|
1585
|
-
|
1586
|
-
|
1600
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1601
|
+
str_append(parser->output, backtick, sizeof(backtick) - 1);
|
1587
1602
|
}
|
1588
1603
|
else
|
1589
1604
|
{
|
1590
|
-
|
1605
|
+
output = parser->capture ? parser->capture : parser->output;
|
1591
1606
|
if (IN(TT_START))
|
1592
1607
|
// already in span started with <tt>, no choice but to emit this literally
|
1593
|
-
|
1608
|
+
str_append(output, backtick, sizeof(backtick) - 1);
|
1594
1609
|
else if (IN(TT))
|
1595
1610
|
// TT (`) already seen, this is a closing tag
|
1596
|
-
|
1611
|
+
wiki_pop_from_stack_up_to(parser, output, TT, true);
|
1597
1612
|
else
|
1598
1613
|
{
|
1599
1614
|
// this is a new opening
|
1600
|
-
|
1601
|
-
|
1602
|
-
|
1615
|
+
wiki_pop_excess_elements(parser);
|
1616
|
+
wiki_start_para_if_necessary(parser);
|
1617
|
+
str_append(output, tt_start, sizeof(tt_start) - 1);
|
1603
1618
|
ary_push(parser->scope, TT);
|
1604
1619
|
ary_push(parser->line, TT);
|
1605
1620
|
}
|
@@ -1609,19 +1624,19 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1609
1624
|
case TT_START:
|
1610
1625
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1611
1626
|
{
|
1612
|
-
|
1613
|
-
|
1627
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1628
|
+
str_append(parser->output, escaped_tt_start, sizeof(escaped_tt_start) - 1);
|
1614
1629
|
}
|
1615
1630
|
else
|
1616
1631
|
{
|
1617
|
-
|
1632
|
+
output = parser->capture ? parser->capture : parser->output;
|
1618
1633
|
if (IN(TT_START) || IN(TT))
|
1619
|
-
|
1634
|
+
str_append(output, escaped_tt_start, sizeof(escaped_tt_start) - 1);
|
1620
1635
|
else
|
1621
1636
|
{
|
1622
|
-
|
1623
|
-
|
1624
|
-
|
1637
|
+
wiki_pop_excess_elements(parser);
|
1638
|
+
wiki_start_para_if_necessary(parser);
|
1639
|
+
str_append(output, tt_start, sizeof(tt_start) - 1);
|
1625
1640
|
ary_push(parser->scope, TT_START);
|
1626
1641
|
ary_push(parser->line, TT_START);
|
1627
1642
|
}
|
@@ -1631,20 +1646,20 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1631
1646
|
case TT_END:
|
1632
1647
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1633
1648
|
{
|
1634
|
-
|
1635
|
-
|
1649
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1650
|
+
str_append(parser->output, escaped_tt_end, sizeof(escaped_tt_end) - 1);
|
1636
1651
|
}
|
1637
1652
|
else
|
1638
1653
|
{
|
1639
|
-
|
1654
|
+
output = parser->capture ? parser->capture : parser->output;
|
1640
1655
|
if (IN(TT_START))
|
1641
|
-
|
1656
|
+
wiki_pop_from_stack_up_to(parser, output, TT_START, true);
|
1642
1657
|
else
|
1643
1658
|
{
|
1644
1659
|
// no TT_START in scope, so must interpret the TT_END without any special meaning
|
1645
|
-
|
1646
|
-
|
1647
|
-
|
1660
|
+
wiki_pop_excess_elements(parser);
|
1661
|
+
wiki_start_para_if_necessary(parser);
|
1662
|
+
str_append(output, escaped_tt_end, sizeof(escaped_tt_end) - 1);
|
1648
1663
|
}
|
1649
1664
|
}
|
1650
1665
|
break;
|
@@ -1654,7 +1669,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1654
1669
|
if (IN(NO_WIKI_START) || IN(PRE_START))
|
1655
1670
|
{
|
1656
1671
|
// no need to check for PRE; can never appear inside it
|
1657
|
-
|
1672
|
+
str_append(parser->output, token->start, TOKEN_LEN(token));
|
1658
1673
|
break;
|
1659
1674
|
}
|
1660
1675
|
|
@@ -1684,7 +1699,8 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1684
1699
|
// want to compare line with scope but can only do so if scope has enough items on it
|
1685
1700
|
if (j >= i)
|
1686
1701
|
{
|
1687
|
-
if (ary_entry(parser->scope, i + bq_count - 2) == type &&
|
1702
|
+
if (ary_entry(parser->scope, i + bq_count - 2) == type &&
|
1703
|
+
ary_entry(parser->scope, i + bq_count - 1) == LI)
|
1688
1704
|
{
|
1689
1705
|
// line and scope match at this point: do nothing yet
|
1690
1706
|
}
|
@@ -1693,7 +1709,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1693
1709
|
// item just pushed onto line does not match corresponding slot of scope!
|
1694
1710
|
for (; j >= i - 2; j--)
|
1695
1711
|
// must pop back before emitting
|
1696
|
-
|
1712
|
+
wiki_pop_from_stack(parser, NULL);
|
1697
1713
|
|
1698
1714
|
// will emit UL or OL, then LI
|
1699
1715
|
break;
|
@@ -1707,13 +1723,13 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1707
1723
|
// not a OL or UL token!
|
1708
1724
|
if (j == i)
|
1709
1725
|
// must close existing LI and re-open new one
|
1710
|
-
|
1726
|
+
wiki_pop_from_stack(parser, NULL);
|
1711
1727
|
else if (j > i)
|
1712
1728
|
{
|
1713
1729
|
// item just pushed onto line does not match corresponding slot of scope!
|
1714
1730
|
for (; j >= i; j--)
|
1715
1731
|
// must pop back before emitting
|
1716
|
-
|
1732
|
+
wiki_pop_from_stack(parser, NULL);
|
1717
1733
|
}
|
1718
1734
|
break;
|
1719
1735
|
}
|
@@ -1727,33 +1743,33 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1727
1743
|
if (j > 0 && ary_entry(parser->scope, -1) == LI)
|
1728
1744
|
{
|
1729
1745
|
// so we should precede it with a CRLF, and indicate that it's a nested list
|
1730
|
-
|
1746
|
+
str_append(parser->output, parser->line_ending->ptr, parser->line_ending->len);
|
1731
1747
|
ary_push(parser->scope, NESTED_LIST);
|
1732
1748
|
}
|
1733
1749
|
else
|
1734
1750
|
{
|
1735
1751
|
// this is a new list
|
1736
1752
|
if (IN(BLOCKQUOTE_START))
|
1737
|
-
|
1753
|
+
wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE_START, false);
|
1738
1754
|
else
|
1739
|
-
|
1755
|
+
wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, false);
|
1740
1756
|
}
|
1741
1757
|
|
1742
1758
|
// emit
|
1743
|
-
|
1759
|
+
wiki_indent(parser);
|
1744
1760
|
if (type == OL)
|
1745
|
-
|
1761
|
+
str_append(parser->output, ol_start, sizeof(ol_start) - 1);
|
1746
1762
|
else if (type == UL)
|
1747
|
-
|
1763
|
+
str_append(parser->output, ul_start, sizeof(ul_start) - 1);
|
1748
1764
|
ary_push(parser->scope, type);
|
1749
|
-
|
1765
|
+
str_append(parser->output, parser->line_ending->ptr, parser->line_ending->len);
|
1750
1766
|
}
|
1751
1767
|
else if (type == SPACE)
|
1752
1768
|
// silently throw away the optional SPACE token after final list marker
|
1753
1769
|
token = NULL;
|
1754
1770
|
|
1755
|
-
|
1756
|
-
|
1771
|
+
wiki_indent(parser);
|
1772
|
+
str_append(parser->output, li_start, sizeof(li_start) - 1);
|
1757
1773
|
ary_push(parser->scope, LI);
|
1758
1774
|
|
1759
1775
|
// any subsequent UL or OL tokens on this line are syntax errors and must be emitted literally
|
@@ -1763,7 +1779,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1763
1779
|
while (k++, NEXT_TOKEN(), (type = token->type))
|
1764
1780
|
{
|
1765
1781
|
if (type == OL || type == UL)
|
1766
|
-
|
1782
|
+
str_append(parser->output, token->start, TOKEN_LEN(token));
|
1767
1783
|
else if (type == SPACE && k == 1)
|
1768
1784
|
{
|
1769
1785
|
// silently throw away the optional SPACE token after final list marker
|
@@ -1787,15 +1803,15 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1787
1803
|
if (IN(NO_WIKI_START) || IN(PRE_START))
|
1788
1804
|
{
|
1789
1805
|
// no need to check for PRE; can never appear inside it
|
1790
|
-
|
1806
|
+
str_append(parser->output, token->start, TOKEN_LEN(token));
|
1791
1807
|
break;
|
1792
1808
|
}
|
1793
1809
|
|
1794
1810
|
// pop up to but not including the last BLOCKQUOTE on the scope stack
|
1795
1811
|
if (IN(BLOCKQUOTE_START))
|
1796
|
-
|
1812
|
+
wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE_START, false);
|
1797
1813
|
else
|
1798
|
-
|
1814
|
+
wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, false);
|
1799
1815
|
|
1800
1816
|
// count number of BLOCKQUOTE tokens in line buffer and in scope stack
|
1801
1817
|
ary_push(parser->line, type);
|
@@ -1807,7 +1823,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1807
1823
|
{
|
1808
1824
|
// must pop (reduce nesting level)
|
1809
1825
|
for (i = j - i; i > 0; i--)
|
1810
|
-
|
1826
|
+
wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, true);
|
1811
1827
|
}
|
1812
1828
|
|
1813
1829
|
// discard any whitespace here (so that "== foo ==" will be translated to "<h2>foo</h2>" rather than "<h2> foo </h2")
|
@@ -1815,7 +1831,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1815
1831
|
; // discard
|
1816
1832
|
|
1817
1833
|
ary_push(parser->scope, type);
|
1818
|
-
|
1834
|
+
wiki_indent(parser);
|
1819
1835
|
|
1820
1836
|
// take base_heading_level into account
|
1821
1837
|
type += base_heading_level;
|
@@ -1824,125 +1840,45 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1824
1840
|
|
1825
1841
|
// rather than repeat all that code for each kind of heading, share it and use a conditional here
|
1826
1842
|
if (type == H6_START)
|
1827
|
-
|
1843
|
+
str_append(parser->output, h6_start, sizeof(h6_start) - 1);
|
1828
1844
|
else if (type == H5_START)
|
1829
|
-
|
1845
|
+
str_append(parser->output, h5_start, sizeof(h5_start) - 1);
|
1830
1846
|
else if (type == H4_START)
|
1831
|
-
|
1847
|
+
str_append(parser->output, h4_start, sizeof(h4_start) - 1);
|
1832
1848
|
else if (type == H3_START)
|
1833
|
-
|
1849
|
+
str_append(parser->output, h3_start, sizeof(h3_start) - 1);
|
1834
1850
|
else if (type == H2_START)
|
1835
|
-
|
1851
|
+
str_append(parser->output, h2_start, sizeof(h2_start) - 1);
|
1836
1852
|
else if (type == H1_START)
|
1837
|
-
|
1853
|
+
str_append(parser->output, h1_start, sizeof(h1_start) - 1);
|
1838
1854
|
|
1839
1855
|
// jump to top of the loop to process token we scanned during lookahead
|
1840
1856
|
continue;
|
1841
1857
|
|
1842
1858
|
case H6_END:
|
1843
|
-
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1844
|
-
{
|
1845
|
-
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1846
|
-
rb_str_cat(parser->output, literal_h6, sizeof(literal_h6) - 1);
|
1847
|
-
}
|
1848
|
-
else
|
1849
|
-
{
|
1850
|
-
_Wikitext_rollback_failed_external_link(parser); // if any
|
1851
|
-
if (!IN(H6_START))
|
1852
|
-
{
|
1853
|
-
// literal output only if not in h6 scope (we stay silent in that case)
|
1854
|
-
_Wikitext_start_para_if_necessary(parser);
|
1855
|
-
rb_str_cat(parser->output, literal_h6, sizeof(literal_h6) - 1);
|
1856
|
-
}
|
1857
|
-
}
|
1858
|
-
break;
|
1859
|
-
|
1860
1859
|
case H5_END:
|
1861
|
-
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1862
|
-
{
|
1863
|
-
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1864
|
-
rb_str_cat(parser->output, literal_h5, sizeof(literal_h5) - 1);
|
1865
|
-
}
|
1866
|
-
else
|
1867
|
-
{
|
1868
|
-
_Wikitext_rollback_failed_external_link(parser); // if any
|
1869
|
-
if (!IN(H5_START))
|
1870
|
-
{
|
1871
|
-
// literal output only if not in h5 scope (we stay silent in that case)
|
1872
|
-
_Wikitext_start_para_if_necessary(parser);
|
1873
|
-
rb_str_cat(parser->output, literal_h5, sizeof(literal_h5) - 1);
|
1874
|
-
}
|
1875
|
-
}
|
1876
|
-
break;
|
1877
|
-
|
1878
1860
|
case H4_END:
|
1879
|
-
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1880
|
-
{
|
1881
|
-
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1882
|
-
rb_str_cat(parser->output, literal_h4, sizeof(literal_h4) - 1);
|
1883
|
-
}
|
1884
|
-
else
|
1885
|
-
{
|
1886
|
-
_Wikitext_rollback_failed_external_link(parser); // if any
|
1887
|
-
if (!IN(H4_START))
|
1888
|
-
{
|
1889
|
-
// literal output only if not in h4 scope (we stay silent in that case)
|
1890
|
-
_Wikitext_start_para_if_necessary(parser);
|
1891
|
-
rb_str_cat(parser->output, literal_h4, sizeof(literal_h4) - 1);
|
1892
|
-
}
|
1893
|
-
}
|
1894
|
-
break;
|
1895
|
-
|
1896
1861
|
case H3_END:
|
1897
|
-
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1898
|
-
{
|
1899
|
-
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1900
|
-
rb_str_cat(parser->output, literal_h3, sizeof(literal_h3) - 1);
|
1901
|
-
}
|
1902
|
-
else
|
1903
|
-
{
|
1904
|
-
_Wikitext_rollback_failed_external_link(parser); // if any
|
1905
|
-
if (!IN(H3_START))
|
1906
|
-
{
|
1907
|
-
// literal output only if not in h3 scope (we stay silent in that case)
|
1908
|
-
_Wikitext_start_para_if_necessary(parser);
|
1909
|
-
rb_str_cat(parser->output, literal_h3, sizeof(literal_h3) - 1);
|
1910
|
-
}
|
1911
|
-
}
|
1912
|
-
break;
|
1913
|
-
|
1914
1862
|
case H2_END:
|
1915
|
-
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1916
|
-
{
|
1917
|
-
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1918
|
-
rb_str_cat(parser->output, literal_h2, sizeof(literal_h2) - 1);
|
1919
|
-
}
|
1920
|
-
else
|
1921
|
-
{
|
1922
|
-
_Wikitext_rollback_failed_external_link(parser); // if any
|
1923
|
-
if (!IN(H2_START))
|
1924
|
-
{
|
1925
|
-
// literal output only if not in h2 scope (we stay silent in that case)
|
1926
|
-
_Wikitext_start_para_if_necessary(parser);
|
1927
|
-
rb_str_cat(parser->output, literal_h2, sizeof(literal_h2) - 1);
|
1928
|
-
}
|
1929
|
-
}
|
1930
|
-
break;
|
1931
|
-
|
1932
1863
|
case H1_END:
|
1933
1864
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1934
1865
|
{
|
1935
|
-
|
1936
|
-
|
1866
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1867
|
+
str_append(parser->output, token->start, TOKEN_LEN(token));
|
1937
1868
|
}
|
1938
1869
|
else
|
1939
1870
|
{
|
1940
|
-
|
1941
|
-
if (!IN(
|
1871
|
+
wiki_rollback_failed_external_link(parser); // if any
|
1872
|
+
if ((type == H6_END && !IN(H6_START)) ||
|
1873
|
+
(type == H5_END && !IN(H5_START)) ||
|
1874
|
+
(type == H4_END && !IN(H4_START)) ||
|
1875
|
+
(type == H3_END && !IN(H3_START)) ||
|
1876
|
+
(type == H2_END && !IN(H2_START)) ||
|
1877
|
+
(type == H1_END && !IN(H1_START)))
|
1942
1878
|
{
|
1943
|
-
// literal output only if not in
|
1944
|
-
|
1945
|
-
|
1879
|
+
// literal output only if not in appropriate scope (we stay silent in that case)
|
1880
|
+
wiki_start_para_if_necessary(parser);
|
1881
|
+
str_append(parser->output, token->start, TOKEN_LEN(token));
|
1946
1882
|
}
|
1947
1883
|
}
|
1948
1884
|
break;
|
@@ -1950,18 +1886,16 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1950
1886
|
case MAIL:
|
1951
1887
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1952
1888
|
{
|
1953
|
-
|
1954
|
-
|
1889
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1890
|
+
str_append(parser->output, token->start, TOKEN_LEN(token));
|
1955
1891
|
}
|
1956
1892
|
else
|
1957
1893
|
{
|
1958
|
-
|
1959
|
-
|
1960
|
-
|
1961
|
-
|
1962
|
-
|
1963
|
-
i = _Wikitext_hyperlink(parser, rb_str_new2("mailto:"), i, i, mailto_class);
|
1964
|
-
rb_str_append(parser->output, i);
|
1894
|
+
wiki_pop_excess_elements(parser);
|
1895
|
+
wiki_start_para_if_necessary(parser);
|
1896
|
+
token_str->ptr = token->start;
|
1897
|
+
token_str->len = TOKEN_LEN(token);
|
1898
|
+
wiki_append_hyperlink(parser, rb_str_new2("mailto:"), token_str, NULL, mailto_class, true);
|
1965
1899
|
}
|
1966
1900
|
break;
|
1967
1901
|
|
@@ -1969,110 +1903,93 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1969
1903
|
if (IN(NO_WIKI_START))
|
1970
1904
|
// user can temporarily suppress autolinking by using <nowiki></nowiki>
|
1971
1905
|
// note that unlike MediaWiki, we do allow autolinking inside PRE blocks
|
1972
|
-
|
1906
|
+
str_append(parser->output, token->start, TOKEN_LEN(token));
|
1973
1907
|
else if (IN(LINK_START))
|
1974
1908
|
{
|
1975
1909
|
// if the URI were allowed it would have been handled already in LINK_START
|
1976
|
-
|
1977
|
-
|
1978
|
-
|
1979
|
-
|
1980
|
-
rb_str_append(parser->output, i);
|
1910
|
+
wiki_rollback_failed_internal_link(parser);
|
1911
|
+
token_str->ptr = token->start;
|
1912
|
+
token_str->len = TOKEN_LEN(token);
|
1913
|
+
wiki_append_hyperlink(parser, Qnil, token_str, NULL, parser->external_link_class, true);
|
1981
1914
|
}
|
1982
1915
|
else if (IN(EXT_LINK_START))
|
1983
1916
|
{
|
1984
|
-
if (
|
1917
|
+
if (parser->link_target->len == 0)
|
1985
1918
|
{
|
1986
1919
|
// this must be our link target: look ahead to make sure we see the space we're expecting to see
|
1987
|
-
|
1920
|
+
token_str->ptr = token->start;
|
1921
|
+
token_str->len = TOKEN_LEN(token);
|
1988
1922
|
NEXT_TOKEN();
|
1989
1923
|
if (token->type == SPACE)
|
1990
1924
|
{
|
1991
1925
|
ary_push(parser->scope, SPACE);
|
1992
|
-
parser->link_target
|
1993
|
-
parser->link_text
|
1926
|
+
str_append_str(parser->link_target, token_str);
|
1927
|
+
str_clear(parser->link_text);
|
1994
1928
|
parser->capture = parser->link_text;
|
1995
1929
|
token = NULL; // silently consume space
|
1996
1930
|
}
|
1997
1931
|
else
|
1998
1932
|
{
|
1999
1933
|
// didn't see the space! this must be an error
|
2000
|
-
|
2001
|
-
|
2002
|
-
|
2003
|
-
|
2004
|
-
|
2005
|
-
i = _Wikitext_hyperlink(parser, Qnil, i, i, parser->external_link_class); // link target, link text
|
2006
|
-
rb_str_append(parser->output, i);
|
1934
|
+
wiki_pop_from_stack(parser, NULL);
|
1935
|
+
wiki_pop_excess_elements(parser);
|
1936
|
+
wiki_start_para_if_necessary(parser);
|
1937
|
+
str_append(parser->output, ext_link_start, sizeof(ext_link_start) - 1);
|
1938
|
+
wiki_append_hyperlink(parser, Qnil, token_str, NULL, parser->external_link_class, true);
|
2007
1939
|
}
|
2008
1940
|
}
|
2009
1941
|
else
|
2010
|
-
|
2011
|
-
if (NIL_P(parser->link_text))
|
2012
|
-
// this must be the first part of our link text
|
2013
|
-
parser->link_text = TOKEN_TEXT(token);
|
2014
|
-
else
|
2015
|
-
// add to existing link text
|
2016
|
-
rb_str_cat(parser->link_text, token->start, TOKEN_LEN(token));
|
2017
|
-
}
|
1942
|
+
str_append(parser->link_text, token->start, TOKEN_LEN(token));
|
2018
1943
|
}
|
2019
1944
|
else
|
2020
1945
|
{
|
2021
|
-
|
2022
|
-
|
2023
|
-
|
2024
|
-
|
2025
|
-
|
2026
|
-
i = _Wikitext_hyperlink(parser, Qnil, i, i, parser->external_link_class); // link target, link text
|
2027
|
-
rb_str_append(parser->output, i);
|
1946
|
+
wiki_pop_excess_elements(parser);
|
1947
|
+
wiki_start_para_if_necessary(parser);
|
1948
|
+
token_str->ptr = token->start;
|
1949
|
+
token_str->len = TOKEN_LEN(token);
|
1950
|
+
wiki_append_hyperlink(parser, Qnil, token_str, NULL, parser->external_link_class, true);
|
2028
1951
|
}
|
2029
1952
|
break;
|
2030
1953
|
|
2031
1954
|
case PATH:
|
2032
1955
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
2033
|
-
|
1956
|
+
str_append(parser->output, token->start, TOKEN_LEN(token));
|
2034
1957
|
else if (IN(EXT_LINK_START))
|
2035
1958
|
{
|
2036
|
-
if (
|
1959
|
+
if (parser->link_target->len == 0)
|
2037
1960
|
{
|
2038
1961
|
// this must be our link target: look ahead to make sure we see the space we're expecting to see
|
2039
|
-
|
1962
|
+
token_str->ptr = token->start;
|
1963
|
+
token_str->len = TOKEN_LEN(token);
|
2040
1964
|
NEXT_TOKEN();
|
2041
1965
|
if (token->type == SPACE)
|
2042
1966
|
{
|
2043
1967
|
ary_push(parser->scope, PATH);
|
2044
1968
|
ary_push(parser->scope, SPACE);
|
2045
|
-
parser->link_target
|
2046
|
-
parser->link_text
|
1969
|
+
str_append_str(parser->link_target, token_str);
|
1970
|
+
str_clear(parser->link_text);
|
2047
1971
|
parser->capture = parser->link_text;
|
2048
1972
|
token = NULL; // silently consume space
|
2049
1973
|
}
|
2050
1974
|
else
|
2051
1975
|
{
|
2052
1976
|
// didn't see the space! this must be an error
|
2053
|
-
|
2054
|
-
|
2055
|
-
|
2056
|
-
|
2057
|
-
|
1977
|
+
wiki_pop_from_stack(parser, NULL);
|
1978
|
+
wiki_pop_excess_elements(parser);
|
1979
|
+
wiki_start_para_if_necessary(parser);
|
1980
|
+
str_append(parser->output, ext_link_start, sizeof(ext_link_start) - 1);
|
1981
|
+
str_append_str(parser->output, token_str);
|
2058
1982
|
}
|
2059
1983
|
}
|
2060
1984
|
else
|
2061
|
-
|
2062
|
-
if (NIL_P(parser->link_text))
|
2063
|
-
// this must be the first part of our link text
|
2064
|
-
parser->link_text = TOKEN_TEXT(token);
|
2065
|
-
else
|
2066
|
-
// add to existing link text
|
2067
|
-
rb_str_cat(parser->link_text, token->start, TOKEN_LEN(token));
|
2068
|
-
}
|
1985
|
+
str_append(parser->link_text, token->start, TOKEN_LEN(token));
|
2069
1986
|
}
|
2070
1987
|
else
|
2071
1988
|
{
|
2072
|
-
|
2073
|
-
|
2074
|
-
|
2075
|
-
|
1989
|
+
output = parser->capture ? parser->capture : parser->output;
|
1990
|
+
wiki_pop_excess_elements(parser);
|
1991
|
+
wiki_start_para_if_necessary(parser);
|
1992
|
+
str_append(output, token->start, TOKEN_LEN(token));
|
2076
1993
|
}
|
2077
1994
|
break;
|
2078
1995
|
|
@@ -2099,20 +2016,20 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2099
2016
|
// SPACE, SPECIAL_URI_CHARS, PRINTABLE, PATH, ALNUM, DEFAULT, QUOT and AMP
|
2100
2017
|
// everything else will be rejected
|
2101
2018
|
case LINK_START:
|
2102
|
-
|
2019
|
+
output = parser->capture ? parser->capture : parser->output;
|
2103
2020
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
2104
2021
|
{
|
2105
|
-
|
2106
|
-
|
2022
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
2023
|
+
str_append(output, link_start, sizeof(link_start) - 1);
|
2107
2024
|
}
|
2108
2025
|
else if (IN(EXT_LINK_START))
|
2109
2026
|
// already in external link scope! (and in fact, must be capturing link_text right now)
|
2110
|
-
|
2027
|
+
str_append(output, link_start, sizeof(link_start) - 1);
|
2111
2028
|
else if (IN(LINK_START))
|
2112
2029
|
{
|
2113
2030
|
// already in internal link scope! this is a syntax error
|
2114
|
-
|
2115
|
-
|
2031
|
+
wiki_rollback_failed_internal_link(parser);
|
2032
|
+
str_append(parser->output, link_start, sizeof(link_start) - 1);
|
2116
2033
|
}
|
2117
2034
|
else if (IN(SEPARATOR))
|
2118
2035
|
{
|
@@ -2121,8 +2038,8 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2121
2038
|
else // not in internal link scope yet
|
2122
2039
|
{
|
2123
2040
|
// will either emit a link, or the rollback of a failed link, so start the para now
|
2124
|
-
|
2125
|
-
|
2041
|
+
wiki_pop_excess_elements(parser);
|
2042
|
+
wiki_start_para_if_necessary(parser);
|
2126
2043
|
ary_push(parser->scope, LINK_START);
|
2127
2044
|
|
2128
2045
|
// look ahead and try to gobble up link target
|
@@ -2144,34 +2061,34 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2144
2061
|
type == RIGHT_CURLY)
|
2145
2062
|
{
|
2146
2063
|
// accumulate these tokens into link_target
|
2147
|
-
if (
|
2064
|
+
if (parser->link_target->len == 0)
|
2148
2065
|
{
|
2149
|
-
parser->link_target
|
2150
|
-
parser->capture
|
2066
|
+
str_clear(parser->link_target);
|
2067
|
+
parser->capture = parser->link_target;
|
2151
2068
|
}
|
2152
2069
|
if (type == QUOT_ENTITY)
|
2153
2070
|
// don't insert the entity, insert the literal quote
|
2154
|
-
|
2071
|
+
str_append(parser->link_target, quote, sizeof(quote) - 1);
|
2155
2072
|
else if (type == AMP_ENTITY)
|
2156
2073
|
// don't insert the entity, insert the literal ampersand
|
2157
|
-
|
2074
|
+
str_append(parser->link_target, ampersand, sizeof(ampersand) - 1);
|
2158
2075
|
else
|
2159
|
-
|
2076
|
+
str_append(parser->link_target, token->start, TOKEN_LEN(token));
|
2160
2077
|
}
|
2161
2078
|
else if (type == LINK_END)
|
2162
2079
|
{
|
2163
|
-
if (
|
2164
|
-
|
2080
|
+
if (parser->link_target->len == 0) // bail for inputs like "[[]]"
|
2081
|
+
wiki_rollback_failed_internal_link(parser);
|
2165
2082
|
break; // jump back to top of loop (will handle this in LINK_END case below)
|
2166
2083
|
}
|
2167
2084
|
else if (type == SEPARATOR)
|
2168
2085
|
{
|
2169
|
-
if (
|
2170
|
-
|
2086
|
+
if (parser->link_target->len == 0) // bail for inputs like "[[|"
|
2087
|
+
wiki_rollback_failed_internal_link(parser);
|
2171
2088
|
else
|
2172
2089
|
{
|
2173
2090
|
ary_push(parser->scope, SEPARATOR);
|
2174
|
-
parser->link_text
|
2091
|
+
str_clear(parser->link_text);
|
2175
2092
|
parser->capture = parser->link_text;
|
2176
2093
|
token = NULL;
|
2177
2094
|
}
|
@@ -2179,7 +2096,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2179
2096
|
}
|
2180
2097
|
else // unexpected token (syntax error)
|
2181
2098
|
{
|
2182
|
-
|
2099
|
+
wiki_rollback_failed_internal_link(parser);
|
2183
2100
|
break; // jump back to top of loop to handle unexpected token
|
2184
2101
|
}
|
2185
2102
|
}
|
@@ -2190,42 +2107,45 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2190
2107
|
break;
|
2191
2108
|
|
2192
2109
|
case LINK_END:
|
2193
|
-
|
2110
|
+
output = parser->capture ? parser->capture : parser->output;
|
2194
2111
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
2195
2112
|
{
|
2196
|
-
|
2197
|
-
|
2113
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
2114
|
+
str_append(output, link_end, sizeof(link_end) - 1);
|
2198
2115
|
}
|
2199
2116
|
else if (IN(EXT_LINK_START))
|
2200
2117
|
// already in external link scope! (and in fact, must be capturing link_text right now)
|
2201
|
-
|
2118
|
+
str_append(output, link_end, sizeof(link_end) - 1);
|
2202
2119
|
else if (IN(LINK_START)) // in internal link scope!
|
2203
2120
|
{
|
2204
|
-
if (
|
2121
|
+
if (wiki_blank(parser->link_target))
|
2205
2122
|
{
|
2206
2123
|
// special case for inputs like "[[ ]]"
|
2207
|
-
|
2208
|
-
|
2124
|
+
wiki_rollback_failed_internal_link(parser);
|
2125
|
+
str_append(parser->output, link_end, sizeof(link_end) - 1);
|
2209
2126
|
break;
|
2210
2127
|
}
|
2211
|
-
if (
|
2128
|
+
if (parser->link_text->len == 0 ||
|
2129
|
+
wiki_blank(parser->link_text))
|
2130
|
+
{
|
2212
2131
|
// use link target as link text
|
2213
|
-
parser->link_text
|
2132
|
+
str_clear(parser->link_text);
|
2133
|
+
wiki_append_sanitized_link_target(parser, parser->link_text, true);
|
2134
|
+
}
|
2214
2135
|
else
|
2215
|
-
|
2216
|
-
|
2217
|
-
|
2218
|
-
parser->capture
|
2219
|
-
|
2220
|
-
|
2221
|
-
parser->
|
2222
|
-
parser->link_text = Qnil;
|
2136
|
+
wiki_trim_link_text(parser);
|
2137
|
+
wiki_encode_link_target(parser);
|
2138
|
+
wiki_pop_from_stack_up_to(parser, output, LINK_START, true);
|
2139
|
+
parser->capture = NULL;
|
2140
|
+
wiki_append_hyperlink(parser, prefix, parser->link_target, parser->link_text, Qnil, false);
|
2141
|
+
str_clear(parser->link_target);
|
2142
|
+
str_clear(parser->link_text);
|
2223
2143
|
}
|
2224
2144
|
else // wasn't in internal link scope
|
2225
2145
|
{
|
2226
|
-
|
2227
|
-
|
2228
|
-
|
2146
|
+
wiki_pop_excess_elements(parser);
|
2147
|
+
wiki_start_para_if_necessary(parser);
|
2148
|
+
str_append(output, link_end, sizeof(link_end) - 1);
|
2229
2149
|
}
|
2230
2150
|
break;
|
2231
2151
|
|
@@ -2235,41 +2155,28 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2235
2155
|
// strings in square brackets which don't match this syntax get passed through literally; eg:
|
2236
2156
|
// he was very angery [sic] about the turn of events
|
2237
2157
|
case EXT_LINK_START:
|
2238
|
-
|
2158
|
+
output = parser->capture ? parser->capture : parser->output;
|
2239
2159
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
2240
2160
|
{
|
2241
|
-
|
2242
|
-
|
2161
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
2162
|
+
str_append(output, ext_link_start, sizeof(ext_link_start) - 1);
|
2243
2163
|
}
|
2244
2164
|
else if (IN(EXT_LINK_START))
|
2245
2165
|
// already in external link scope! (and in fact, must be capturing link_text right now)
|
2246
|
-
|
2166
|
+
str_append(output, ext_link_start, sizeof(ext_link_start) - 1);
|
2247
2167
|
else if (IN(LINK_START))
|
2248
2168
|
{
|
2249
2169
|
// already in internal link scope!
|
2250
|
-
|
2251
|
-
|
2252
|
-
|
2253
|
-
parser->
|
2254
|
-
else if (IN(SPACE))
|
2255
|
-
{
|
2256
|
-
// link target has already been scanned
|
2257
|
-
if (NIL_P(parser->link_text))
|
2258
|
-
// this must be the first character of our link text
|
2259
|
-
parser->link_text = i;
|
2260
|
-
else
|
2261
|
-
// add to existing link text
|
2262
|
-
rb_str_append(parser->link_text, i);
|
2263
|
-
}
|
2264
|
-
else
|
2265
|
-
// add to existing link target
|
2266
|
-
rb_str_append(parser->link_target, i);
|
2170
|
+
if (parser->link_target->len == 0 || !IN(SPACE))
|
2171
|
+
str_append(parser->link_target, ext_link_start, sizeof(ext_link_start) - 1);
|
2172
|
+
else // link target has already been scanned
|
2173
|
+
str_append(parser->link_text, ext_link_start, sizeof(ext_link_start) - 1);
|
2267
2174
|
}
|
2268
2175
|
else // not in external link scope yet
|
2269
2176
|
{
|
2270
2177
|
// will either emit a link, or the rollback of a failed link, so start the para now
|
2271
|
-
|
2272
|
-
|
2178
|
+
wiki_pop_excess_elements(parser);
|
2179
|
+
wiki_start_para_if_necessary(parser);
|
2273
2180
|
|
2274
2181
|
// look ahead: expect an absolute URI (with protocol) or "relative" (path) URI
|
2275
2182
|
NEXT_TOKEN();
|
@@ -2277,56 +2184,55 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2277
2184
|
ary_push(parser->scope, EXT_LINK_START); // so far so good, jump back to the top of the loop
|
2278
2185
|
else
|
2279
2186
|
// only get here if there was a syntax error (missing URI)
|
2280
|
-
|
2187
|
+
str_append(parser->output, ext_link_start, sizeof(ext_link_start) - 1);
|
2281
2188
|
continue; // jump back to top of loop to handle token (either URI or whatever it is)
|
2282
2189
|
}
|
2283
2190
|
break;
|
2284
2191
|
|
2285
2192
|
case EXT_LINK_END:
|
2286
|
-
|
2193
|
+
output = parser->capture ? parser->capture : parser->output;
|
2287
2194
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
2288
2195
|
{
|
2289
|
-
|
2290
|
-
|
2196
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
2197
|
+
str_append(output, ext_link_end, sizeof(ext_link_end) - 1);
|
2291
2198
|
}
|
2292
2199
|
else if (IN(EXT_LINK_START))
|
2293
2200
|
{
|
2294
|
-
if (
|
2201
|
+
if (parser->link_text->len == 0)
|
2295
2202
|
// syntax error: external link with no link text
|
2296
|
-
|
2203
|
+
wiki_rollback_failed_external_link(parser);
|
2297
2204
|
else
|
2298
2205
|
{
|
2299
2206
|
// success!
|
2300
2207
|
j = IN(PATH) ? Qnil : parser->external_link_class;
|
2301
|
-
|
2302
|
-
parser->capture =
|
2303
|
-
|
2304
|
-
rb_str_append(parser->output, i);
|
2208
|
+
wiki_pop_from_stack_up_to(parser, output, EXT_LINK_START, true);
|
2209
|
+
parser->capture = NULL;
|
2210
|
+
wiki_append_hyperlink(parser, Qnil, parser->link_target, parser->link_text, j, false);
|
2305
2211
|
}
|
2306
|
-
parser->link_target
|
2307
|
-
parser->link_text
|
2212
|
+
str_clear(parser->link_target);
|
2213
|
+
str_clear(parser->link_text);
|
2308
2214
|
}
|
2309
2215
|
else
|
2310
2216
|
{
|
2311
|
-
|
2312
|
-
|
2313
|
-
|
2217
|
+
wiki_pop_excess_elements(parser);
|
2218
|
+
wiki_start_para_if_necessary(parser);
|
2219
|
+
str_append(parser->output, ext_link_end, sizeof(ext_link_end) - 1);
|
2314
2220
|
}
|
2315
2221
|
break;
|
2316
2222
|
|
2317
2223
|
case SEPARATOR:
|
2318
|
-
|
2319
|
-
|
2320
|
-
|
2321
|
-
|
2224
|
+
output = parser->capture ? parser->capture : parser->output;
|
2225
|
+
wiki_pop_excess_elements(parser);
|
2226
|
+
wiki_start_para_if_necessary(parser);
|
2227
|
+
str_append(output, separator, sizeof(separator) - 1);
|
2322
2228
|
break;
|
2323
2229
|
|
2324
2230
|
case SPACE:
|
2325
|
-
|
2231
|
+
output = parser->capture ? parser->capture : parser->output;
|
2326
2232
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
2327
2233
|
{
|
2328
|
-
|
2329
|
-
|
2234
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
2235
|
+
str_append(output, token->start, TOKEN_LEN(token));
|
2330
2236
|
}
|
2331
2237
|
else
|
2332
2238
|
{
|
@@ -2335,21 +2241,21 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2335
2241
|
int token_len = TOKEN_LEN(token);
|
2336
2242
|
NEXT_TOKEN();
|
2337
2243
|
type = token->type;
|
2338
|
-
if ((
|
2339
|
-
(
|
2340
|
-
(
|
2341
|
-
(
|
2342
|
-
(
|
2343
|
-
(
|
2244
|
+
if ((type == H6_END && IN(H6_START)) ||
|
2245
|
+
(type == H5_END && IN(H5_START)) ||
|
2246
|
+
(type == H4_END && IN(H4_START)) ||
|
2247
|
+
(type == H3_END && IN(H3_START)) ||
|
2248
|
+
(type == H2_END && IN(H2_START)) ||
|
2249
|
+
(type == H1_END && IN(H1_START)))
|
2344
2250
|
{
|
2345
2251
|
// will suppress emission of space (discard) if next token is a H6_END, H5_END etc and we are in the corresponding scope
|
2346
2252
|
}
|
2347
2253
|
else
|
2348
2254
|
{
|
2349
2255
|
// emit the space
|
2350
|
-
|
2351
|
-
|
2352
|
-
|
2256
|
+
wiki_pop_excess_elements(parser);
|
2257
|
+
wiki_start_para_if_necessary(parser);
|
2258
|
+
str_append(output, token_ptr, token_len);
|
2353
2259
|
}
|
2354
2260
|
|
2355
2261
|
// jump to top of the loop to process token we scanned during lookahead
|
@@ -2362,101 +2268,100 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2362
2268
|
case NAMED_ENTITY:
|
2363
2269
|
case DECIMAL_ENTITY:
|
2364
2270
|
// pass these through unaltered as they are case sensitive
|
2365
|
-
|
2366
|
-
|
2367
|
-
|
2368
|
-
|
2271
|
+
output = parser->capture ? parser->capture : parser->output;
|
2272
|
+
wiki_pop_excess_elements(parser);
|
2273
|
+
wiki_start_para_if_necessary(parser);
|
2274
|
+
str_append(output, token->start, TOKEN_LEN(token));
|
2369
2275
|
break;
|
2370
2276
|
|
2371
2277
|
case HEX_ENTITY:
|
2372
2278
|
// normalize hex entities (downcase them)
|
2373
|
-
|
2374
|
-
|
2375
|
-
|
2376
|
-
|
2279
|
+
output = parser->capture ? parser->capture : parser->output;
|
2280
|
+
wiki_pop_excess_elements(parser);
|
2281
|
+
wiki_start_para_if_necessary(parser);
|
2282
|
+
str_append(output, token->start, TOKEN_LEN(token));
|
2283
|
+
wiki_downcase_bang(output->ptr + output->len - TOKEN_LEN(token), TOKEN_LEN(token));
|
2377
2284
|
break;
|
2378
2285
|
|
2379
2286
|
case QUOT:
|
2380
|
-
|
2381
|
-
|
2382
|
-
|
2383
|
-
|
2287
|
+
output = parser->capture ? parser->capture : parser->output;
|
2288
|
+
wiki_pop_excess_elements(parser);
|
2289
|
+
wiki_start_para_if_necessary(parser);
|
2290
|
+
str_append(output, quot_entity, sizeof(quot_entity) - 1);
|
2384
2291
|
break;
|
2385
2292
|
|
2386
2293
|
case AMP:
|
2387
|
-
|
2388
|
-
|
2389
|
-
|
2390
|
-
|
2294
|
+
output = parser->capture ? parser->capture : parser->output;
|
2295
|
+
wiki_pop_excess_elements(parser);
|
2296
|
+
wiki_start_para_if_necessary(parser);
|
2297
|
+
str_append(output, amp_entity, sizeof(amp_entity) - 1);
|
2391
2298
|
break;
|
2392
2299
|
|
2393
2300
|
case LESS:
|
2394
|
-
|
2395
|
-
|
2396
|
-
|
2397
|
-
|
2301
|
+
output = parser->capture ? parser->capture : parser->output;
|
2302
|
+
wiki_pop_excess_elements(parser);
|
2303
|
+
wiki_start_para_if_necessary(parser);
|
2304
|
+
str_append(output, lt_entity, sizeof(lt_entity) - 1);
|
2398
2305
|
break;
|
2399
2306
|
|
2400
2307
|
case GREATER:
|
2401
|
-
|
2402
|
-
|
2403
|
-
|
2404
|
-
|
2308
|
+
output = parser->capture ? parser->capture : parser->output;
|
2309
|
+
wiki_pop_excess_elements(parser);
|
2310
|
+
wiki_start_para_if_necessary(parser);
|
2311
|
+
str_append(output, gt_entity, sizeof(gt_entity) - 1);
|
2405
2312
|
break;
|
2406
2313
|
|
2407
2314
|
case IMG_START:
|
2408
2315
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
2409
2316
|
{
|
2410
|
-
|
2411
|
-
|
2317
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
2318
|
+
str_append(parser->output, token->start, TOKEN_LEN(token));
|
2412
2319
|
}
|
2413
|
-
else if (
|
2414
|
-
|
2320
|
+
else if (parser->capture)
|
2321
|
+
str_append(parser->capture, token->start, TOKEN_LEN(token));
|
2415
2322
|
else
|
2416
2323
|
{
|
2417
2324
|
// not currently capturing: will be emitting something on success or failure, so get ready
|
2418
|
-
|
2419
|
-
|
2325
|
+
wiki_pop_excess_elements(parser);
|
2326
|
+
wiki_start_para_if_necessary(parser);
|
2420
2327
|
|
2421
2328
|
// scan ahead consuming PATH, PRINTABLE, ALNUM and SPECIAL_URI_CHARS tokens
|
2422
2329
|
// will cheat here and abuse the link_target capture buffer to accumulate text
|
2423
|
-
if (NIL_P(parser->link_target))
|
2424
|
-
parser->link_target = rb_str_new2("");
|
2425
2330
|
while (NEXT_TOKEN(), (type = token->type))
|
2426
2331
|
{
|
2427
2332
|
if (type == PATH || type == PRINTABLE || type == ALNUM || type == SPECIAL_URI_CHARS)
|
2428
|
-
|
2429
|
-
else if (type == IMG_END &&
|
2333
|
+
str_append(parser->link_target, token->start, TOKEN_LEN(token));
|
2334
|
+
else if (type == IMG_END && parser->link_target->len > 0)
|
2430
2335
|
{
|
2431
2336
|
// success
|
2432
|
-
|
2337
|
+
wiki_append_img(parser, parser->link_target->ptr, parser->link_target->len);
|
2433
2338
|
token = NULL;
|
2434
2339
|
break;
|
2435
2340
|
}
|
2436
2341
|
else // unexpected token or zero-length target (syntax error)
|
2437
2342
|
{
|
2438
2343
|
// rollback
|
2439
|
-
|
2440
|
-
|
2344
|
+
str_append(parser->output, literal_img_start, sizeof(literal_img_start) - 1);
|
2345
|
+
if (parser->link_target->len > 0)
|
2346
|
+
str_append(parser->output, parser->link_target->ptr, parser->link_target->len);
|
2441
2347
|
break;
|
2442
2348
|
}
|
2443
2349
|
}
|
2444
2350
|
|
2445
2351
|
// jump to top of the loop to process token we scanned during lookahead
|
2446
|
-
parser->link_target
|
2352
|
+
str_clear(parser->link_target);
|
2447
2353
|
continue;
|
2448
2354
|
}
|
2449
2355
|
break;
|
2450
2356
|
|
2451
2357
|
case CRLF:
|
2452
2358
|
i = parser->pending_crlf;
|
2453
|
-
parser->pending_crlf =
|
2454
|
-
|
2455
|
-
_Wikitext_rollback_failed_external_link(parser); // if any
|
2359
|
+
parser->pending_crlf = false;
|
2360
|
+
wiki_rollback_failed_link(parser); // if any
|
2456
2361
|
if (IN(NO_WIKI_START) || IN(PRE_START))
|
2457
2362
|
{
|
2458
2363
|
ary_clear(parser->line_buffer);
|
2459
|
-
|
2364
|
+
str_append_str(parser->output, parser->line_ending);
|
2460
2365
|
break;
|
2461
2366
|
}
|
2462
2367
|
else if (IN(PRE))
|
@@ -2464,14 +2369,14 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2464
2369
|
// beware when BLOCKQUOTE on line buffer (not line stack!) prior to CRLF, that must be end of PRE block
|
2465
2370
|
if (ary_entry(parser->line_buffer, -2) == BLOCKQUOTE)
|
2466
2371
|
// don't emit in this case
|
2467
|
-
|
2372
|
+
wiki_pop_from_stack_up_to(parser, parser->output, PRE, true);
|
2468
2373
|
else
|
2469
2374
|
{
|
2470
2375
|
if (ary_entry(parser->line_buffer, -2) == PRE)
|
2471
2376
|
{
|
2472
2377
|
// only thing on line is the PRE: emit pending line ending (if we had one)
|
2473
|
-
if (i
|
2474
|
-
|
2378
|
+
if (i)
|
2379
|
+
str_append_str(parser->output, parser->line_ending);
|
2475
2380
|
}
|
2476
2381
|
|
2477
2382
|
// clear these _before_ calling NEXT_TOKEN (NEXT_TOKEN adds to the line_buffer)
|
@@ -2483,17 +2388,17 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2483
2388
|
type = token->type;
|
2484
2389
|
if (type != BLOCKQUOTE && type != PRE)
|
2485
2390
|
// this is definitely the end of the block, so don't emit
|
2486
|
-
|
2391
|
+
wiki_pop_from_stack_up_to(parser, parser->output, PRE, true);
|
2487
2392
|
else
|
2488
2393
|
// potentially will emit
|
2489
|
-
parser->pending_crlf =
|
2394
|
+
parser->pending_crlf = true;
|
2490
2395
|
|
2491
2396
|
continue; // jump back to top of loop to handle token grabbed via lookahead
|
2492
2397
|
}
|
2493
2398
|
}
|
2494
2399
|
else
|
2495
2400
|
{
|
2496
|
-
parser->pending_crlf =
|
2401
|
+
parser->pending_crlf = true;
|
2497
2402
|
|
2498
2403
|
// count number of BLOCKQUOTE tokens in line buffer (can be zero) and pop back to that level
|
2499
2404
|
// as a side effect, this handles any open span-level elements and unclosed blocks
|
@@ -2503,7 +2408,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2503
2408
|
{
|
2504
2409
|
if (parser->scope->count > 0 && ary_entry(parser->scope, -1) == LI)
|
2505
2410
|
{
|
2506
|
-
parser->pending_crlf =
|
2411
|
+
parser->pending_crlf = false;
|
2507
2412
|
break;
|
2508
2413
|
}
|
2509
2414
|
|
@@ -2516,12 +2421,12 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2516
2421
|
if (NO_ITEM(ary_entry(parser->line_buffer, -2)) ||
|
2517
2422
|
(ary_entry(parser->line_buffer, -2) == BLOCKQUOTE && !IN(BLOCKQUOTE_START)))
|
2518
2423
|
// paragraph break
|
2519
|
-
parser->pending_crlf =
|
2424
|
+
parser->pending_crlf = false;
|
2520
2425
|
else
|
2521
2426
|
// not a paragraph break!
|
2522
2427
|
continue;
|
2523
2428
|
}
|
2524
|
-
|
2429
|
+
wiki_pop_from_stack(parser, NULL);
|
2525
2430
|
}
|
2526
2431
|
}
|
2527
2432
|
|
@@ -2536,31 +2441,29 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2536
2441
|
case IMG_END:
|
2537
2442
|
case LEFT_CURLY:
|
2538
2443
|
case RIGHT_CURLY:
|
2539
|
-
|
2540
|
-
|
2541
|
-
|
2542
|
-
|
2444
|
+
output = parser->capture ? parser->capture : parser->output;
|
2445
|
+
wiki_pop_excess_elements(parser);
|
2446
|
+
wiki_start_para_if_necessary(parser);
|
2447
|
+
str_append(output, token->start, TOKEN_LEN(token));
|
2543
2448
|
break;
|
2544
2449
|
|
2545
2450
|
case DEFAULT:
|
2546
|
-
|
2547
|
-
|
2548
|
-
|
2549
|
-
|
2451
|
+
output = parser->capture ? parser->capture : parser->output;
|
2452
|
+
wiki_pop_excess_elements(parser);
|
2453
|
+
wiki_start_para_if_necessary(parser);
|
2454
|
+
wiki_append_entity_from_utf32_char(output, token->code_point);
|
2550
2455
|
break;
|
2551
2456
|
|
2552
2457
|
case END_OF_FILE:
|
2553
2458
|
// special case for input like " foo\n " (see pre_spec.rb)
|
2554
2459
|
if (IN(PRE) &&
|
2555
2460
|
ary_entry(parser->line_buffer, -2) == PRE &&
|
2556
|
-
parser->pending_crlf
|
2557
|
-
|
2461
|
+
parser->pending_crlf)
|
2462
|
+
str_append(parser->output, parser->line_ending->ptr, parser->line_ending->len);
|
2558
2463
|
|
2559
2464
|
// close any open scopes on hitting EOF
|
2560
|
-
|
2561
|
-
|
2562
|
-
for (i = 0, j = parser->scope->count; i < j; i++)
|
2563
|
-
_Wikitext_pop_from_stack(parser, Qnil);
|
2465
|
+
wiki_rollback_failed_link(parser); // if any
|
2466
|
+
wiki_pop_all_from_stack(parser);
|
2564
2467
|
goto return_output; // break not enough here (want to break out of outer while loop, not inner switch statement)
|
2565
2468
|
|
2566
2469
|
default:
|
@@ -2571,5 +2474,25 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2571
2474
|
token = NULL;
|
2572
2475
|
} while (1);
|
2573
2476
|
return_output:
|
2574
|
-
return
|
2477
|
+
// nasty hack to avoid re-allocating our return value
|
2478
|
+
str_append(parser->output, null_str, 1); // null-terminate
|
2479
|
+
len = parser->output->len - 1; // don't count null termination
|
2480
|
+
|
2481
|
+
#if defined(RUBY_1_9_x)
|
2482
|
+
VALUE out = rb_str_buf_new(RSTRING_EMBED_LEN_MAX + 1);
|
2483
|
+
free(RSTRING_PTR(out));
|
2484
|
+
RSTRING(out)->as.heap.aux.capa = len;
|
2485
|
+
RSTRING(out)->as.heap.ptr = parser->output->ptr;
|
2486
|
+
RSTRING(out)->as.heap.len = len;
|
2487
|
+
#elif defined(RUBY_1_8_x)
|
2488
|
+
VALUE out = rb_str_new2("");
|
2489
|
+
free(RSTRING_PTR(out));
|
2490
|
+
RSTRING(out)->len = len;
|
2491
|
+
RSTRING(out)->aux.capa = len;
|
2492
|
+
RSTRING(out)->ptr = parser->output->ptr;
|
2493
|
+
#else
|
2494
|
+
#error unsupported RUBY_VERSION
|
2495
|
+
#endif
|
2496
|
+
parser->output->ptr = NULL; // don't double-free
|
2497
|
+
return out;
|
2575
2498
|
}
|