wikitext 1.6 → 1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ext/ary.h +0 -6
- data/ext/extconf.rb +9 -0
- data/ext/parser.c +762 -839
- data/ext/parser.h +0 -2
- data/ext/str.c +16 -33
- data/ext/str.h +4 -12
- data/ext/token.c +1 -1
- data/ext/token.h +2 -2
- data/ext/wikitext.c +0 -1
- data/lib/wikitext/version.rb +1 -1
- data/spec/external_link_spec.rb +17 -0
- data/spec/internal_link_spec.rb +7 -1
- data/spec/link_encoding_spec.rb +0 -47
- data/spec/link_sanitizing_spec.rb +1 -1
- data/spec/regressions_spec.rb +2 -2
- metadata +2 -2
data/ext/ary.h
CHANGED
@@ -35,10 +35,6 @@ typedef struct
|
|
35
35
|
|
36
36
|
#define NO_ITEM(item) (item == INT_MAX)
|
37
37
|
|
38
|
-
// Mark the ary struct designated by ptr as a participant in Ruby's mark-and-sweep garbage collection scheme.
|
39
|
-
// A variable named name is placed on the C stack to prevent the structure from being prematurely collected.
|
40
|
-
#define GC_WRAP_ARY(ptr, name) volatile VALUE name __attribute__((unused)) = Data_Wrap_Struct(rb_cObject, 0, ary_free, ptr)
|
41
|
-
|
42
38
|
ary_t *ary_new(void);
|
43
39
|
int ary_entry(ary_t *ary, int idx);
|
44
40
|
void ary_clear(ary_t *ary);
|
@@ -47,8 +43,6 @@ void ary_push(ary_t *ary, int val);
|
|
47
43
|
int ary_includes(ary_t *ary, int val);
|
48
44
|
|
49
45
|
// returns a count indicating the number of times the value appears in the collection
|
50
|
-
// refactored from _Wikitext_count()
|
51
46
|
int ary_count(ary_t *ary, int item);
|
52
47
|
|
53
|
-
// this method not inlined so its address can be passed to the Data_Wrap_Struct function.
|
54
48
|
void ary_free(ary_t *ary);
|
data/ext/extconf.rb
CHANGED
@@ -28,5 +28,14 @@ def missing item
|
|
28
28
|
exit 1
|
29
29
|
end
|
30
30
|
|
31
|
+
case RUBY_VERSION
|
32
|
+
when /\A1\.8/
|
33
|
+
$CFLAGS += ' -DRUBY_1_8_x'
|
34
|
+
when /\A1\.9/
|
35
|
+
$CFLAGS += ' -DRUBY_1_9_x'
|
36
|
+
else
|
37
|
+
raise "unsupported Ruby version: #{RUBY_VERSION}"
|
38
|
+
end
|
39
|
+
|
31
40
|
have_header('ruby.h') or missing 'ruby.h'
|
32
41
|
create_makefile('wikitext')
|
data/ext/parser.c
CHANGED
@@ -21,6 +21,8 @@
|
|
21
21
|
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
22
22
|
// POSSIBILITY OF SUCH DAMAGE.
|
23
23
|
|
24
|
+
#include <stdbool.h>
|
25
|
+
|
24
26
|
#include "parser.h"
|
25
27
|
#include "ary.h"
|
26
28
|
#include "str.h"
|
@@ -32,29 +34,29 @@
|
|
32
34
|
// poor man's object orientation in C:
|
33
35
|
// instead of parsing around multiple parameters between functions in the parser
|
34
36
|
// we pack everything into a struct and pass around only a pointer to that
|
35
|
-
// TODO: consider changing some of the VALUE members (eg link_target) to the more efficient str_t type
|
36
37
|
typedef struct
|
37
38
|
{
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
VALUE img_prefix; // path prepended when emitting img tags
|
39
|
+
str_t *capture; // capturing to link_target, link_text, or NULL (direct to output, not capturing)
|
40
|
+
str_t *output; // for accumulating output to be returned
|
41
|
+
str_t *link_target; // short term "memory" for parsing links
|
42
|
+
str_t *link_text; // short term "memory" for parsing links
|
43
|
+
str_t *line_ending;
|
44
|
+
str_t *tabulation; // caching buffer for emitting indentation
|
45
45
|
ary_t *scope; // stack for tracking scope
|
46
46
|
ary_t *line; // stack for tracking scope as implied by current line
|
47
47
|
ary_t *line_buffer; // stack for tracking raw tokens (not scope) on current line
|
48
|
-
VALUE
|
49
|
-
VALUE
|
50
|
-
VALUE
|
51
|
-
str_t *line_ending;
|
48
|
+
VALUE external_link_class; // CSS class applied to external links
|
49
|
+
VALUE mailto_class; // CSS class applied to email (mailto) links
|
50
|
+
VALUE img_prefix; // path prepended when emitting img tags
|
52
51
|
int base_indent; // controlled by the :indent option to Wikitext::Parser#parse
|
53
52
|
int current_indent; // fluctuates according to currently nested structures
|
54
|
-
str_t *tabulation; // caching buffer for emitting indentation
|
55
53
|
int base_heading_level;
|
54
|
+
bool pending_crlf;
|
55
|
+
bool autolink;
|
56
|
+
bool space_to_underscore;
|
56
57
|
} parser_t;
|
57
58
|
|
59
|
+
const char null_str[] = { 0 };
|
58
60
|
const char escaped_no_wiki_start[] = "<nowiki>";
|
59
61
|
const char escaped_no_wiki_end[] = "</nowiki>";
|
60
62
|
const char literal_strong_em[] = "'''''";
|
@@ -66,12 +68,6 @@ const char escaped_strong_start[] = "<strong>";
|
|
66
68
|
const char escaped_strong_end[] = "</strong>";
|
67
69
|
const char escaped_tt_start[] = "<tt>";
|
68
70
|
const char escaped_tt_end[] = "</tt>";
|
69
|
-
const char literal_h6[] = "======";
|
70
|
-
const char literal_h5[] = "=====";
|
71
|
-
const char literal_h4[] = "====";
|
72
|
-
const char literal_h3[] = "===";
|
73
|
-
const char literal_h2[] = "==";
|
74
|
-
const char literal_h1[] = "=";
|
75
71
|
const char pre_start[] = "<pre>";
|
76
72
|
const char pre_end[] = "</pre>";
|
77
73
|
const char escaped_pre_start[] = "<pre>";
|
@@ -130,6 +126,49 @@ const char img_start[] = "<img src=\"";
|
|
130
126
|
const char img_end[] = "\" />";
|
131
127
|
const char img_alt[] = "\" alt=\"";
|
132
128
|
|
129
|
+
// Mark the parser struct designated by ptr as a participant in Ruby's
|
130
|
+
// mark-and-sweep garbage collection scheme. A variable named name is placed on
|
131
|
+
// the C stack to prevent the structure from being prematurely collected.
|
132
|
+
#define GC_WRAP_PARSER(ptr, name) volatile VALUE name __attribute__((unused)) = Data_Wrap_Struct(rb_cObject, 0, parser_free, ptr)
|
133
|
+
|
134
|
+
parser_t *parser_new(void)
|
135
|
+
{
|
136
|
+
parser_t *parser = ALLOC_N(parser_t, 1);
|
137
|
+
parser->capture = NULL; // not a real instance, pointer to other member's instance
|
138
|
+
parser->output = str_new();
|
139
|
+
parser->link_target = str_new();
|
140
|
+
parser->link_text = str_new();
|
141
|
+
parser->line_ending = NULL; // caller should set up
|
142
|
+
parser->tabulation = str_new();
|
143
|
+
parser->scope = ary_new();
|
144
|
+
parser->line = ary_new();
|
145
|
+
parser->line_buffer = ary_new();
|
146
|
+
parser->external_link_class = Qnil; // caller should set up
|
147
|
+
parser->mailto_class = Qnil; // caller should set up
|
148
|
+
parser->img_prefix = Qnil; // caller should set up
|
149
|
+
parser->base_indent = 0;
|
150
|
+
parser->current_indent = 0;
|
151
|
+
parser->base_heading_level = 0;
|
152
|
+
parser->pending_crlf = false;
|
153
|
+
parser->autolink = true;
|
154
|
+
parser->space_to_underscore = true;
|
155
|
+
return parser;
|
156
|
+
}
|
157
|
+
|
158
|
+
void parser_free(parser_t *parser)
|
159
|
+
{
|
160
|
+
// we don't free parser->capture; it's just a redundant pointer
|
161
|
+
if (parser->output) str_free(parser->output);
|
162
|
+
if (parser->link_target) str_free(parser->link_target);
|
163
|
+
if (parser->link_text) str_free(parser->link_text);
|
164
|
+
if (parser->line_ending) str_free(parser->line_ending);
|
165
|
+
if (parser->tabulation) str_free(parser->tabulation);
|
166
|
+
if (parser->scope) ary_free(parser->scope);
|
167
|
+
if (parser->line) ary_free(parser->line);
|
168
|
+
if (parser->line_buffer) ary_free(parser->line_buffer);
|
169
|
+
free(parser);
|
170
|
+
}
|
171
|
+
|
133
172
|
// for testing and debugging only
|
134
173
|
VALUE Wikitext_parser_tokenize(VALUE self, VALUE string)
|
135
174
|
{
|
@@ -142,11 +181,11 @@ VALUE Wikitext_parser_tokenize(VALUE self, VALUE string)
|
|
142
181
|
char *pe = p + len;
|
143
182
|
token_t token;
|
144
183
|
next_token(&token, NULL, p, pe);
|
145
|
-
rb_ary_push(tokens,
|
184
|
+
rb_ary_push(tokens, wiki_token(&token));
|
146
185
|
while (token.type != END_OF_FILE)
|
147
186
|
{
|
148
187
|
next_token(&token, &token, NULL, pe);
|
149
|
-
rb_ary_push(tokens,
|
188
|
+
rb_ary_push(tokens, wiki_token(&token));
|
150
189
|
}
|
151
190
|
return tokens;
|
152
191
|
}
|
@@ -217,59 +256,66 @@ VALUE Wikitext_parser_fulltext_tokenize(int argc, VALUE *argv, VALUE self)
|
|
217
256
|
return tokens;
|
218
257
|
}
|
219
258
|
|
220
|
-
// we downcase "in place", overwriting the original contents of the buffer
|
221
|
-
|
259
|
+
// we downcase "in place", overwriting the original contents of the buffer
|
260
|
+
void wiki_downcase_bang(char *ptr, long len)
|
222
261
|
{
|
223
|
-
char *ptr = RSTRING_PTR(string);
|
224
|
-
long len = RSTRING_LEN(string);
|
225
262
|
for (long i = 0; i < len; i++)
|
226
263
|
{
|
227
264
|
if (ptr[i] >= 'A' && ptr[i] <= 'Z')
|
228
265
|
ptr[i] += 32;
|
229
266
|
}
|
230
|
-
return string;
|
231
267
|
}
|
232
268
|
|
233
|
-
|
269
|
+
// prepare hyperlink and append it to parser->output
|
270
|
+
// if check_autolink is true, checks parser->autolink to decide whether to emit a real hyperlink
|
271
|
+
// or merely the literal link target
|
272
|
+
// if link_text is Qnil, the link_target is re-used for the link text
|
273
|
+
void wiki_append_hyperlink(parser_t *parser, VALUE link_prefix, str_t *link_target, str_t *link_text, VALUE link_class, bool check_autolink)
|
234
274
|
{
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
rb_str_append(string, link_target);
|
239
|
-
|
240
|
-
// special handling for mailto URIs
|
241
|
-
const char *mailto = "mailto:";
|
242
|
-
if (NIL_P(link_prefix) &&
|
243
|
-
RSTRING_LEN(link_target) >= (long)sizeof(mailto) &&
|
244
|
-
strncmp(mailto, RSTRING_PTR(link_target), sizeof(mailto)) == 0)
|
245
|
-
link_class = parser->mailto_class; // use mailto_class from parser
|
246
|
-
|
247
|
-
if (link_class != Qnil)
|
275
|
+
if (check_autolink && !parser->autolink)
|
276
|
+
str_append_str(parser->output, link_target);
|
277
|
+
else
|
248
278
|
{
|
249
|
-
|
250
|
-
|
279
|
+
str_append(parser->output, a_start, sizeof(a_start) - 1); // <a href="
|
280
|
+
if (!NIL_P(link_prefix))
|
281
|
+
str_append_string(parser->output, link_prefix);
|
282
|
+
str_append_str(parser->output, link_target);
|
283
|
+
|
284
|
+
// special handling for mailto URIs
|
285
|
+
const char *mailto = "mailto:";
|
286
|
+
if (NIL_P(link_prefix) &&
|
287
|
+
link_target->len >= (long)sizeof(mailto) &&
|
288
|
+
strncmp(mailto, link_target->ptr, sizeof(mailto)) == 0)
|
289
|
+
link_class = parser->mailto_class; // use mailto_class from parser
|
290
|
+
if (link_class != Qnil)
|
291
|
+
{
|
292
|
+
str_append(parser->output, a_class, sizeof(a_class) - 1); // " class="
|
293
|
+
str_append_string(parser->output, link_class);
|
294
|
+
}
|
295
|
+
str_append(parser->output, a_start_close, sizeof(a_start_close) - 1); // ">
|
296
|
+
if (!link_text || link_text->len == 0) // re-use link_target
|
297
|
+
str_append_str(parser->output, link_target);
|
298
|
+
else
|
299
|
+
str_append_str(parser->output, link_text);
|
300
|
+
str_append(parser->output, a_end, sizeof(a_end) - 1); // </a>
|
251
301
|
}
|
252
|
-
rb_str_cat(string, a_start_close, sizeof(a_start_close) - 1); // ">
|
253
|
-
rb_str_append(string, link_text);
|
254
|
-
rb_str_cat(string, a_end, sizeof(a_end) - 1);
|
255
|
-
return string;
|
256
302
|
}
|
257
303
|
|
258
|
-
void
|
304
|
+
void wiki_append_img(parser_t *parser, char *token_ptr, int token_len)
|
259
305
|
{
|
260
|
-
|
306
|
+
str_append(parser->output, img_start, sizeof(img_start) - 1); // <img src="
|
261
307
|
if (!NIL_P(parser->img_prefix) && *token_ptr != '/') // len always > 0
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
308
|
+
str_append_string(parser->output, parser->img_prefix);
|
309
|
+
str_append(parser->output, token_ptr, token_len);
|
310
|
+
str_append(parser->output, img_alt, sizeof(img_alt) - 1); // " alt="
|
311
|
+
str_append(parser->output, token_ptr, token_len);
|
312
|
+
str_append(parser->output, img_end, sizeof(img_end) - 1); // " />
|
267
313
|
}
|
268
314
|
|
269
315
|
// will emit indentation only if we are about to emit any of:
|
270
316
|
// <blockquote>, <p>, <ul>, <ol>, <li>, <h1> etc, <pre>
|
271
317
|
// each time we enter one of those spans must ++ the indentation level
|
272
|
-
void
|
318
|
+
void wiki_indent(parser_t *parser)
|
273
319
|
{
|
274
320
|
if (parser->base_indent == -1) // indentation disabled
|
275
321
|
return;
|
@@ -285,32 +331,32 @@ void _Wikitext_indent(parser_t *parser)
|
|
285
331
|
*old_end++ = ' ';
|
286
332
|
if (space_count > parser->tabulation->len)
|
287
333
|
parser->tabulation->len = space_count;
|
288
|
-
|
334
|
+
str_append(parser->output, parser->tabulation->ptr, space_count);
|
289
335
|
}
|
290
336
|
parser->current_indent += 2;
|
291
337
|
}
|
292
338
|
|
293
|
-
void
|
339
|
+
void wiki_dedent(parser_t *parser, bool emit)
|
294
340
|
{
|
295
341
|
if (parser->base_indent == -1) // indentation disabled
|
296
342
|
return;
|
297
343
|
parser->current_indent -= 2;
|
298
|
-
if (emit
|
344
|
+
if (!emit)
|
299
345
|
return;
|
300
346
|
int space_count = parser->current_indent + parser->base_indent;
|
301
347
|
if (space_count > 0)
|
302
|
-
|
348
|
+
str_append(parser->output, parser->tabulation->ptr, space_count);
|
303
349
|
}
|
304
350
|
|
305
351
|
// Pops a single item off the parser's scope stack.
|
306
352
|
// A corresponding closing tag is written to the target string.
|
307
353
|
// The target string may be the main output buffer, or a substring capturing buffer if a link is being scanned.
|
308
|
-
void
|
354
|
+
void wiki_pop_from_stack(parser_t *parser, str_t *target)
|
309
355
|
{
|
310
356
|
int top = ary_entry(parser->scope, -1);
|
311
357
|
if (NO_ITEM(top))
|
312
358
|
return;
|
313
|
-
if (
|
359
|
+
if (!target)
|
314
360
|
target = parser->output;
|
315
361
|
|
316
362
|
// for headings, take base_heading_level into account
|
@@ -326,16 +372,16 @@ void _Wikitext_pop_from_stack(parser_t *parser, VALUE target)
|
|
326
372
|
{
|
327
373
|
case PRE:
|
328
374
|
case PRE_START:
|
329
|
-
|
330
|
-
|
331
|
-
|
375
|
+
str_append(target, pre_end, sizeof(pre_end) - 1);
|
376
|
+
str_append_str(target, parser->line_ending);
|
377
|
+
wiki_dedent(parser, false);
|
332
378
|
break;
|
333
379
|
|
334
380
|
case BLOCKQUOTE:
|
335
381
|
case BLOCKQUOTE_START:
|
336
|
-
|
337
|
-
|
338
|
-
|
382
|
+
wiki_dedent(parser, true);
|
383
|
+
str_append(target, blockquote_end, sizeof(blockquote_end) - 1);
|
384
|
+
str_append_str(target, parser->line_ending);
|
339
385
|
break;
|
340
386
|
|
341
387
|
case NO_WIKI_START:
|
@@ -344,29 +390,29 @@ void _Wikitext_pop_from_stack(parser_t *parser, VALUE target)
|
|
344
390
|
|
345
391
|
case STRONG:
|
346
392
|
case STRONG_START:
|
347
|
-
|
393
|
+
str_append(target, strong_end, sizeof(strong_end) - 1);
|
348
394
|
break;
|
349
395
|
|
350
396
|
case EM:
|
351
397
|
case EM_START:
|
352
|
-
|
398
|
+
str_append(target, em_end, sizeof(em_end) - 1);
|
353
399
|
break;
|
354
400
|
|
355
401
|
case TT:
|
356
402
|
case TT_START:
|
357
|
-
|
403
|
+
str_append(target, tt_end, sizeof(tt_end) - 1);
|
358
404
|
break;
|
359
405
|
|
360
406
|
case OL:
|
361
|
-
|
362
|
-
|
363
|
-
|
407
|
+
wiki_dedent(parser, true);
|
408
|
+
str_append(target, ol_end, sizeof(ol_end) - 1);
|
409
|
+
str_append_str(target, parser->line_ending);
|
364
410
|
break;
|
365
411
|
|
366
412
|
case UL:
|
367
|
-
|
368
|
-
|
369
|
-
|
413
|
+
wiki_dedent(parser, true);
|
414
|
+
str_append(target, ul_end, sizeof(ul_end) - 1);
|
415
|
+
str_append_str(target, parser->line_ending);
|
370
416
|
break;
|
371
417
|
|
372
418
|
case NESTED_LIST:
|
@@ -375,50 +421,50 @@ void _Wikitext_pop_from_stack(parser_t *parser, VALUE target)
|
|
375
421
|
// and other times we want it to behave like BLOCKQUOTE (ie. when it has a nested list inside)
|
376
422
|
// hence this hack: we do an emitting dedent on behalf of the LI that we know must be coming
|
377
423
|
// and then when we pop the actual LI itself (below) we do the standard non-emitting indent
|
378
|
-
|
379
|
-
parser->current_indent += 2;
|
424
|
+
wiki_dedent(parser, true); // we really only want to emit the spaces
|
425
|
+
parser->current_indent += 2; // we don't want to decrement the actual indent level, so put it back
|
380
426
|
break;
|
381
427
|
|
382
428
|
case LI:
|
383
|
-
|
384
|
-
|
385
|
-
|
429
|
+
str_append(target, li_end, sizeof(li_end) - 1);
|
430
|
+
str_append_str(target, parser->line_ending);
|
431
|
+
wiki_dedent(parser, false);
|
386
432
|
break;
|
387
433
|
|
388
434
|
case H6_START:
|
389
|
-
|
390
|
-
|
391
|
-
|
435
|
+
str_append(target, h6_end, sizeof(h6_end) - 1);
|
436
|
+
str_append_str(target, parser->line_ending);
|
437
|
+
wiki_dedent(parser, false);
|
392
438
|
break;
|
393
439
|
|
394
440
|
case H5_START:
|
395
|
-
|
396
|
-
|
397
|
-
|
441
|
+
str_append(target, h5_end, sizeof(h5_end) - 1);
|
442
|
+
str_append_str(target, parser->line_ending);
|
443
|
+
wiki_dedent(parser, false);
|
398
444
|
break;
|
399
445
|
|
400
446
|
case H4_START:
|
401
|
-
|
402
|
-
|
403
|
-
|
447
|
+
str_append(target, h4_end, sizeof(h4_end) - 1);
|
448
|
+
str_append_str(target, parser->line_ending);
|
449
|
+
wiki_dedent(parser, false);
|
404
450
|
break;
|
405
451
|
|
406
452
|
case H3_START:
|
407
|
-
|
408
|
-
|
409
|
-
|
453
|
+
str_append(target, h3_end, sizeof(h3_end) - 1);
|
454
|
+
str_append_str(target, parser->line_ending);
|
455
|
+
wiki_dedent(parser, false);
|
410
456
|
break;
|
411
457
|
|
412
458
|
case H2_START:
|
413
|
-
|
414
|
-
|
415
|
-
|
459
|
+
str_append(target, h2_end, sizeof(h2_end) - 1);
|
460
|
+
str_append_str(target, parser->line_ending);
|
461
|
+
wiki_dedent(parser, false);
|
416
462
|
break;
|
417
463
|
|
418
464
|
case H1_START:
|
419
|
-
|
420
|
-
|
421
|
-
|
465
|
+
str_append(target, h1_end, sizeof(h1_end) - 1);
|
466
|
+
str_append_str(target, parser->line_ending);
|
467
|
+
wiki_dedent(parser, false);
|
422
468
|
break;
|
423
469
|
|
424
470
|
case LINK_START:
|
@@ -442,9 +488,9 @@ void _Wikitext_pop_from_stack(parser_t *parser, VALUE target)
|
|
442
488
|
break;
|
443
489
|
|
444
490
|
case P:
|
445
|
-
|
446
|
-
|
447
|
-
|
491
|
+
str_append(target, p_end, sizeof(p_end) - 1);
|
492
|
+
str_append_str(target, parser->line_ending);
|
493
|
+
wiki_dedent(parser, false);
|
448
494
|
break;
|
449
495
|
|
450
496
|
case END_OF_FILE:
|
@@ -459,9 +505,9 @@ void _Wikitext_pop_from_stack(parser_t *parser, VALUE target)
|
|
459
505
|
}
|
460
506
|
|
461
507
|
// Pops items off the top of parser's scope stack, accumulating closing tags for them into the target string, until item is reached.
|
462
|
-
// If including is
|
508
|
+
// If including is true then the item itself is also popped.
|
463
509
|
// The target string may be the main output buffer, or a substring capturing buffer when scanning links.
|
464
|
-
void
|
510
|
+
void wiki_pop_from_stack_up_to(parser_t *parser, str_t *target, int item, bool including)
|
465
511
|
{
|
466
512
|
int continue_looping = 1;
|
467
513
|
do
|
@@ -471,23 +517,23 @@ void _Wikitext_pop_from_stack_up_to(parser_t *parser, VALUE target, int item, VA
|
|
471
517
|
return;
|
472
518
|
if (top == item)
|
473
519
|
{
|
474
|
-
if (including
|
520
|
+
if (!including)
|
475
521
|
return;
|
476
522
|
continue_looping = 0;
|
477
523
|
}
|
478
|
-
|
524
|
+
wiki_pop_from_stack(parser, target);
|
479
525
|
} while (continue_looping);
|
480
526
|
}
|
481
527
|
|
482
|
-
void
|
528
|
+
void wiki_pop_all_from_stack(parser_t *parser)
|
483
529
|
{
|
484
|
-
|
485
|
-
|
530
|
+
for (int i = 0, max = parser->scope->count; i < max; i++)
|
531
|
+
wiki_pop_from_stack(parser, NULL);
|
486
532
|
}
|
487
533
|
|
488
|
-
void
|
534
|
+
void wiki_start_para_if_necessary(parser_t *parser)
|
489
535
|
{
|
490
|
-
if (
|
536
|
+
if (parser->capture)
|
491
537
|
return;
|
492
538
|
|
493
539
|
// if no block open yet, or top of stack is BLOCKQUOTE/BLOCKQUOTE_START (with nothing in it yet)
|
@@ -495,29 +541,29 @@ void _Wikitext_start_para_if_necessary(parser_t *parser)
|
|
495
541
|
ary_entry(parser->scope, -1) == BLOCKQUOTE ||
|
496
542
|
ary_entry(parser->scope, -1) == BLOCKQUOTE_START)
|
497
543
|
{
|
498
|
-
|
499
|
-
|
544
|
+
wiki_indent(parser);
|
545
|
+
str_append(parser->output, p_start, sizeof(p_start) - 1);
|
500
546
|
ary_push(parser->scope, P);
|
501
547
|
ary_push(parser->line, P);
|
502
548
|
}
|
503
|
-
else if (parser->pending_crlf
|
549
|
+
else if (parser->pending_crlf)
|
504
550
|
{
|
505
551
|
if (IN(P))
|
506
552
|
// already in a paragraph block; convert pending CRLF into a space
|
507
|
-
|
553
|
+
str_append(parser->output, space, sizeof(space) - 1);
|
508
554
|
else if (IN(PRE))
|
509
555
|
// PRE blocks can have pending CRLF too (helps us avoid emitting the trailing newline)
|
510
|
-
|
556
|
+
str_append_str(parser->output, parser->line_ending);
|
511
557
|
}
|
512
|
-
parser->pending_crlf =
|
558
|
+
parser->pending_crlf = false;
|
513
559
|
}
|
514
560
|
|
515
|
-
void
|
561
|
+
void wiki_emit_pending_crlf_if_necessary(parser_t *parser)
|
516
562
|
{
|
517
|
-
if (parser->pending_crlf
|
563
|
+
if (parser->pending_crlf)
|
518
564
|
{
|
519
|
-
|
520
|
-
parser->pending_crlf =
|
565
|
+
str_append_str(parser->output, parser->line_ending);
|
566
|
+
parser->pending_crlf = false;
|
521
567
|
}
|
522
568
|
}
|
523
569
|
|
@@ -543,9 +589,9 @@ void _Wikitext_emit_pending_crlf_if_necessary(parser_t *parser)
|
|
543
589
|
// on the line scope.
|
544
590
|
// Luckily, BLOCKQUOTE_START tokens can only appear at the start of the scope array, so we can check for them first before
|
545
591
|
// entering the for loop.
|
546
|
-
void
|
592
|
+
void wiki_pop_excess_elements(parser_t *parser)
|
547
593
|
{
|
548
|
-
if (
|
594
|
+
if (parser->capture)
|
549
595
|
return;
|
550
596
|
for (int i = parser->scope->count - ary_count(parser->scope, BLOCKQUOTE_START), j = parser->line->count; i > j; i--)
|
551
597
|
{
|
@@ -560,65 +606,94 @@ void _Wikitext_pop_excess_elements(parser_t *parser)
|
|
560
606
|
continue;
|
561
607
|
}
|
562
608
|
}
|
563
|
-
|
609
|
+
wiki_pop_from_stack(parser, NULL);
|
564
610
|
}
|
565
611
|
}
|
566
612
|
|
567
|
-
|
568
|
-
|
569
|
-
//
|
570
|
-
//
|
571
|
-
//
|
572
|
-
//
|
573
|
-
//
|
574
|
-
//
|
575
|
-
|
613
|
+
// Convert a single UTF-8 codepoint to UTF-32
|
614
|
+
//
|
615
|
+
// Expects an input buffer, src, containing a UTF-8 encoded character (which
|
616
|
+
// may be multi-byte). The end of the input buffer, end, is also passed in to
|
617
|
+
// allow the detection of invalidly truncated codepoints. The number of bytes
|
618
|
+
// in the UTF-8 character (between 1 and 4) is returned by reference in
|
619
|
+
// width_out.
|
620
|
+
//
|
621
|
+
// Raises a RangeError if the supplied character is invalid UTF-8.
|
622
|
+
uint32_t wiki_utf8_to_utf32(char *src, char *end, long *width_out)
|
576
623
|
{
|
577
624
|
uint32_t dest;
|
578
|
-
if ((unsigned char)src[0] <= 0x7f)
|
625
|
+
if ((unsigned char)src[0] <= 0x7f)
|
579
626
|
{
|
627
|
+
// ASCII
|
580
628
|
dest = src[0];
|
581
629
|
*width_out = 1;
|
582
630
|
}
|
583
|
-
else if ((src[0] & 0xe0) == 0xc0)
|
631
|
+
else if ((src[0] & 0xe0) == 0xc0)
|
584
632
|
{
|
633
|
+
// byte starts with 110..... : this should be a two-byte sequence
|
585
634
|
if (src + 1 >= end)
|
586
|
-
|
587
|
-
|
588
|
-
|
635
|
+
// no second byte
|
636
|
+
rb_raise(eWikitextParserError, "invalid encoding: truncated byte sequence");
|
637
|
+
else if (((unsigned char)src[0] == 0xc0) ||
|
638
|
+
((unsigned char)src[0] == 0xc1))
|
639
|
+
// overlong encoding: lead byte of 110..... but code point <= 127
|
640
|
+
rb_raise(eWikitextParserError, "invalid encoding: overlong encoding");
|
589
641
|
else if ((src[1] & 0xc0) != 0x80 )
|
590
|
-
|
591
|
-
|
642
|
+
// should have second byte starting with 10......
|
643
|
+
rb_raise(eWikitextParserError, "invalid encoding: malformed byte sequence");
|
644
|
+
|
645
|
+
dest =
|
646
|
+
((uint32_t)(src[0] & 0x1f)) << 6 |
|
647
|
+
(src[1] & 0x3f);
|
592
648
|
*width_out = 2;
|
593
649
|
}
|
594
|
-
else if ((src[0] & 0xf0) == 0xe0)
|
650
|
+
else if ((src[0] & 0xf0) == 0xe0)
|
595
651
|
{
|
652
|
+
// byte starts with 1110.... : this should be a three-byte sequence
|
596
653
|
if (src + 2 >= end)
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
654
|
+
// missing second or third byte
|
655
|
+
rb_raise(eWikitextParserError, "invalid encoding: truncated byte sequence");
|
656
|
+
else if (((src[1] & 0xc0) != 0x80 ) ||
|
657
|
+
((src[2] & 0xc0) != 0x80 ))
|
658
|
+
// should have second and third bytes starting with 10......
|
659
|
+
rb_raise(eWikitextParserError, "invalid encoding: malformed byte sequence");
|
660
|
+
|
661
|
+
dest =
|
662
|
+
((uint32_t)(src[0] & 0x0f)) << 12 |
|
663
|
+
((uint32_t)(src[1] & 0x3f)) << 6 |
|
664
|
+
(src[2] & 0x3f);
|
601
665
|
*width_out = 3;
|
602
666
|
}
|
603
|
-
else if ((src[0] & 0xf8) == 0xf0)
|
667
|
+
else if ((src[0] & 0xf8) == 0xf0)
|
604
668
|
{
|
669
|
+
// bytes starts with 11110... : this should be a four-byte sequence
|
605
670
|
if (src + 3 >= end)
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
671
|
+
// missing second, third, or fourth byte
|
672
|
+
rb_raise(eWikitextParserError, "invalid encoding: truncated byte sequence");
|
673
|
+
else if ((unsigned char)src[0] >= 0xf5 &&
|
674
|
+
(unsigned char)src[0] <= 0xf7)
|
675
|
+
// disallowed by RFC 3629 (codepoints above 0x10ffff)
|
676
|
+
rb_raise(eWikitextParserError, "invalid encoding: overlong encoding");
|
677
|
+
else if (((src[1] & 0xc0) != 0x80 ) ||
|
678
|
+
((src[2] & 0xc0) != 0x80 ) ||
|
679
|
+
((src[3] & 0xc0) != 0x80 ))
|
680
|
+
// should have second and third bytes starting with 10......
|
681
|
+
rb_raise(eWikitextParserError, "invalid encoding: malformed byte sequence");
|
682
|
+
|
683
|
+
dest =
|
684
|
+
((uint32_t)(src[0] & 0x07)) << 18 |
|
685
|
+
((uint32_t)(src[1] & 0x3f)) << 12 |
|
686
|
+
((uint32_t)(src[1] & 0x3f)) << 6 |
|
687
|
+
(src[2] & 0x3f);
|
612
688
|
*width_out = 4;
|
613
689
|
}
|
614
|
-
else
|
615
|
-
|
690
|
+
else
|
691
|
+
rb_raise(eWikitextParserError, "invalid encoding: unexpected byte");
|
616
692
|
return dest;
|
617
693
|
}
|
618
694
|
|
619
|
-
|
695
|
+
void wiki_append_entity_from_utf32_char(str_t *output, uint32_t character)
|
620
696
|
{
|
621
|
-
// TODO: consider special casing some entities (ie. quot, amp, lt, gt etc)?
|
622
697
|
char hex_string[8] = { '&', '#', 'x', 0, 0, 0, 0, ';' };
|
623
698
|
char scratch = (character & 0xf000) >> 12;
|
624
699
|
hex_string[3] = (scratch <= 9 ? scratch + 48 : scratch + 87);
|
@@ -628,18 +703,17 @@ VALUE _Wikitext_utf32_char_to_entity(uint32_t character)
|
|
628
703
|
hex_string[5] = (scratch <= 9 ? scratch + 48 : scratch + 87);
|
629
704
|
scratch = character & 0x000f;
|
630
705
|
hex_string[6] = (scratch <= 9 ? scratch + 48 : scratch + 87);
|
631
|
-
|
706
|
+
str_append(output, hex_string, sizeof(hex_string));
|
632
707
|
}
|
633
708
|
|
634
|
-
|
709
|
+
// trim parser->link_text in place
|
710
|
+
void wiki_trim_link_text(parser_t *parser)
|
635
711
|
{
|
636
|
-
|
637
|
-
char *src = RSTRING_PTR(string);
|
712
|
+
char *src = parser->link_text->ptr;
|
638
713
|
char *start = src; // remember this so we can check if we're at the start
|
639
714
|
char *left = src;
|
640
715
|
char *non_space = src; // remember last non-space character output
|
641
|
-
|
642
|
-
char *end = src + len;
|
716
|
+
char *end = src + parser->link_text->len;
|
643
717
|
while (src < end)
|
644
718
|
{
|
645
719
|
if (*src == ' ')
|
@@ -651,143 +725,104 @@ VALUE _Wikitext_parser_trim_link_target(VALUE string)
|
|
651
725
|
non_space = src;
|
652
726
|
src++;
|
653
727
|
}
|
654
|
-
if (left
|
655
|
-
|
656
|
-
|
657
|
-
|
728
|
+
if (left != start || non_space + 1 != end)
|
729
|
+
{
|
730
|
+
// TODO: could potentially avoid this memmove by extending the str_t struct with an "offset" or "free" member
|
731
|
+
parser->link_text->len = (non_space + 1) - left;
|
732
|
+
memmove(parser->link_text->ptr, left, parser->link_text->len);
|
733
|
+
}
|
658
734
|
}
|
659
735
|
|
660
736
|
// - non-printable (non-ASCII) characters converted to numeric entities
|
661
737
|
// - QUOT and AMP characters converted to named entities
|
662
|
-
// - if
|
663
|
-
// - if
|
664
|
-
|
738
|
+
// - if trim is true, leading and trailing whitespace trimmed
|
739
|
+
// - if trim is false, there is no special treatment of spaces
|
740
|
+
void wiki_append_sanitized_link_target(parser_t *parser, str_t *output, bool trim)
|
665
741
|
{
|
666
|
-
|
667
|
-
char *
|
668
|
-
char *
|
669
|
-
|
670
|
-
char *end = src + len;
|
671
|
-
|
672
|
-
// start with a destination buffer twice the size of the source, will realloc if necessary
|
673
|
-
// slop = (len / 8) * 8 (ie. one in every 8 characters can be converted into an entity, each entity requires 8 bytes)
|
674
|
-
// this efficiently handles the most common case (where the size of the buffer doesn't change much)
|
675
|
-
char *dest = ALLOC_N(char, len * 2);
|
676
|
-
char *dest_ptr = dest; // hang on to this so we can pass it to free() later
|
677
|
-
char *non_space = dest; // remember last non-space character output
|
742
|
+
char *src = parser->link_target->ptr;
|
743
|
+
char *start = src; // remember this so we can check if we're at the start
|
744
|
+
char *non_space = output->ptr + output->len; // remember last non-space character output
|
745
|
+
char *end = src + parser->link_target->len;
|
678
746
|
while (src < end)
|
679
747
|
{
|
680
|
-
// need at most 8
|
681
|
-
if (
|
748
|
+
// need at most 8 bytes to display each input character (�)
|
749
|
+
if (output->ptr + output->len + 8 > output->ptr + output->capacity) // outgrowing buffer, must grow
|
682
750
|
{
|
683
|
-
char *
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
if (dest == NULL)
|
688
|
-
{
|
689
|
-
// would have used reallocf, but this has to run on Linux too, not just Darwin
|
690
|
-
free(dest_ptr);
|
691
|
-
rb_raise(rb_eNoMemError, "failed to re-allocate temporary storage (memory allocation error)");
|
692
|
-
}
|
693
|
-
dest_ptr = dest;
|
694
|
-
dest = dest_ptr + (old_dest - old_dest_ptr);
|
695
|
-
non_space = dest_ptr + (non_space - old_dest_ptr);
|
751
|
+
char *old_ptr = output->ptr;
|
752
|
+
str_grow(output, output->len + (end - src) * 8); // allocate enough for worst case
|
753
|
+
if (old_ptr != output->ptr) // may have moved
|
754
|
+
non_space += output->ptr - old_ptr;
|
696
755
|
}
|
697
756
|
|
698
|
-
if (*src == '"')
|
757
|
+
if (*src == '"')
|
699
758
|
{
|
700
759
|
char quot_entity_literal[] = { '&', 'q', 'u', 'o', 't', ';' }; // no trailing NUL
|
701
|
-
|
702
|
-
dest += sizeof(quot_entity_literal);
|
760
|
+
str_append(output, quot_entity_literal, sizeof(quot_entity_literal));
|
703
761
|
}
|
704
|
-
else if (*src == '&')
|
762
|
+
else if (*src == '&')
|
705
763
|
{
|
706
764
|
char amp_entity_literal[] = { '&', 'a', 'm', 'p', ';' }; // no trailing NUL
|
707
|
-
|
708
|
-
dest += sizeof(amp_entity_literal);
|
709
|
-
}
|
710
|
-
else if (*src == '<') // LESS_THAN
|
711
|
-
{
|
712
|
-
free(dest_ptr);
|
713
|
-
rb_raise(rb_eRangeError, "invalid link text (\"<\" may not appear in link text)");
|
714
|
-
}
|
715
|
-
else if (*src == '>') // GREATER_THAN
|
716
|
-
{
|
717
|
-
free(dest_ptr);
|
718
|
-
rb_raise(rb_eRangeError, "invalid link text (\">\" may not appear in link text)");
|
765
|
+
str_append(output, amp_entity_literal, sizeof(amp_entity_literal));
|
719
766
|
}
|
720
|
-
else if (*src == '
|
721
|
-
|
722
|
-
else if (*src
|
767
|
+
else if (*src == '<' || *src == '>')
|
768
|
+
rb_raise(rb_eRangeError, "invalid link text (\"%c\" may not appear in link text)", *src);
|
769
|
+
else if (*src == ' ' && src == start && trim)
|
770
|
+
start++; // we eat leading space
|
771
|
+
else if (*src >= 0x20 && *src <= 0x7e) // printable ASCII
|
723
772
|
{
|
724
|
-
*
|
725
|
-
|
773
|
+
*(output->ptr + output->len) = *src;
|
774
|
+
output->len++;
|
726
775
|
}
|
727
776
|
else // all others: must convert to entities
|
728
777
|
{
|
729
778
|
long width;
|
730
|
-
|
731
|
-
char *entity_src = RSTRING_PTR(entity);
|
732
|
-
long entity_len = RSTRING_LEN(entity); // should always be 8 characters (8 bytes)
|
733
|
-
memcpy(dest, entity_src, entity_len);
|
734
|
-
dest += entity_len;
|
779
|
+
wiki_append_entity_from_utf32_char(output, wiki_utf8_to_utf32(src, end, &width));
|
735
780
|
src += width;
|
736
|
-
non_space =
|
781
|
+
non_space = output->ptr + output->len;
|
737
782
|
continue;
|
738
783
|
}
|
739
784
|
if (*src != ' ')
|
740
|
-
non_space =
|
785
|
+
non_space = output->ptr + output->len;
|
741
786
|
src++;
|
742
787
|
}
|
743
788
|
|
744
789
|
// trim trailing space if necessary
|
745
|
-
if (
|
746
|
-
len
|
747
|
-
else
|
748
|
-
len = dest - dest_ptr;
|
749
|
-
VALUE out = rb_str_new(dest_ptr, len);
|
750
|
-
free(dest_ptr);
|
751
|
-
return out;
|
790
|
+
if (trim && output->ptr + output->len != non_space)
|
791
|
+
output->len -= (output->ptr + output->len) - non_space;
|
752
792
|
}
|
753
793
|
|
754
794
|
VALUE Wikitext_parser_sanitize_link_target(VALUE self, VALUE string)
|
755
795
|
{
|
756
796
|
parser_t parser;
|
757
|
-
parser.link_target
|
758
|
-
|
797
|
+
parser.link_target = str_new_from_string(string);
|
798
|
+
GC_WRAP_STR(parser.link_target, link_target_gc);
|
799
|
+
str_t *output = str_new();
|
800
|
+
GC_WRAP_STR(output, output_gc);
|
801
|
+
wiki_append_sanitized_link_target(&parser, output, true);
|
802
|
+
return string_from_str(output);
|
759
803
|
}
|
760
804
|
|
761
|
-
//
|
762
|
-
//
|
763
|
-
//
|
764
|
-
//
|
765
|
-
|
766
|
-
// ...the [[foo]] is...
|
767
|
-
// to be equivalent to:
|
768
|
-
// thing. [[Foo]] was...
|
769
|
-
static void _Wikitext_parser_encode_link_target(parser_t *parser)
|
805
|
+
// Encodes the parser link_target member (in-place) according to RFCs 2396 and 2718
|
806
|
+
//
|
807
|
+
// Leading and trailing whitespace trimmed. Spaces are converted to
|
808
|
+
// underscores if the parser space_to_underscore member is true.
|
809
|
+
static void wiki_encode_link_target(parser_t *parser)
|
770
810
|
{
|
771
|
-
|
772
|
-
char *
|
773
|
-
|
774
|
-
long len = RSTRING_LEN(in);
|
811
|
+
char *src = parser->link_target->ptr;
|
812
|
+
char *start = src; // remember this so we can check if we're at the start
|
813
|
+
long len = parser->link_target->len;
|
775
814
|
if (!(len > 0))
|
776
815
|
return;
|
777
|
-
char *end =
|
778
|
-
static char hex[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
|
779
|
-
|
780
|
-
// to avoid most reallocations start with a destination buffer twice the size of the source
|
781
|
-
// this handles the most common case (where most chars are in the ASCII range and don't require more storage, but there are
|
782
|
-
// often quite a few spaces, which are encoded as "%20" and occupy 3 bytes)
|
783
|
-
// the worst case is where _every_ byte must be written out using 3 bytes
|
816
|
+
char *end = src + len;
|
784
817
|
long dest_len = len * 2;
|
785
818
|
char *dest = ALLOC_N(char, dest_len);
|
786
819
|
char *dest_ptr = dest; // hang on to this so we can pass it to free() later
|
787
820
|
char *non_space = dest; // remember last non-space character output
|
788
|
-
|
821
|
+
static char hex[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
|
822
|
+
for (; src < end; src++)
|
789
823
|
{
|
790
|
-
|
824
|
+
// worst case: a single character may grow to 3 characters once encoded
|
825
|
+
if ((dest + 3) > (dest_ptr + dest_len))
|
791
826
|
{
|
792
827
|
// outgrowing buffer, must reallocate
|
793
828
|
char *old_dest = dest;
|
@@ -806,27 +841,27 @@ static void _Wikitext_parser_encode_link_target(parser_t *parser)
|
|
806
841
|
}
|
807
842
|
|
808
843
|
// pass through unreserved characters
|
809
|
-
if ((
|
810
|
-
(
|
811
|
-
(
|
812
|
-
|
813
|
-
|
814
|
-
|
815
|
-
|
844
|
+
if ((*src >= 'a' && *src <= 'z') ||
|
845
|
+
(*src >= 'A' && *src <= 'Z') ||
|
846
|
+
(*src >= '0' && *src <= '9') ||
|
847
|
+
*src == '-' ||
|
848
|
+
*src == '_' ||
|
849
|
+
*src == '.' ||
|
850
|
+
*src == '~')
|
816
851
|
{
|
817
|
-
*dest++ = *
|
852
|
+
*dest++ = *src;
|
818
853
|
non_space = dest;
|
819
854
|
}
|
820
|
-
else if (*
|
855
|
+
else if (*src == ' ' && src == start)
|
821
856
|
start++; // we eat leading space
|
822
|
-
else if (*
|
857
|
+
else if (*src == ' ' && parser->space_to_underscore)
|
823
858
|
*dest++ = '_';
|
824
859
|
else // everything else gets URL-encoded
|
825
860
|
{
|
826
861
|
*dest++ = '%';
|
827
|
-
*dest++ = hex[(unsigned char)(*
|
828
|
-
*dest++ = hex[(unsigned char)(*
|
829
|
-
if (*
|
862
|
+
*dest++ = hex[(unsigned char)(*src) / 16]; // left
|
863
|
+
*dest++ = hex[(unsigned char)(*src) % 16]; // right
|
864
|
+
if (*src != ' ')
|
830
865
|
non_space = dest;
|
831
866
|
}
|
832
867
|
}
|
@@ -836,90 +871,89 @@ static void _Wikitext_parser_encode_link_target(parser_t *parser)
|
|
836
871
|
dest_len = non_space - dest_ptr;
|
837
872
|
else
|
838
873
|
dest_len = dest - dest_ptr;
|
839
|
-
parser->link_target
|
874
|
+
str_clear(parser->link_target);
|
875
|
+
str_append(parser->link_target, dest_ptr, dest_len);
|
840
876
|
free(dest_ptr);
|
841
877
|
}
|
842
878
|
|
843
879
|
VALUE Wikitext_parser_encode_link_target(VALUE self, VALUE in)
|
844
880
|
{
|
845
881
|
parser_t parser;
|
846
|
-
parser.
|
847
|
-
parser.
|
848
|
-
|
849
|
-
|
850
|
-
|
851
|
-
|
852
|
-
// this method exposed for testing only
|
853
|
-
VALUE Wikitext_parser_encode_special_link_target(VALUE self, VALUE in)
|
854
|
-
{
|
855
|
-
parser_t parser;
|
856
|
-
parser.link_target = in;
|
857
|
-
parser.space_to_underscore = Qfalse;
|
858
|
-
_Wikitext_parser_encode_link_target(&parser);
|
859
|
-
return parser.link_target;
|
882
|
+
parser.space_to_underscore = false;
|
883
|
+
parser.link_target = str_new_from_string(in);
|
884
|
+
GC_WRAP_STR(parser.link_target, link_target_gc);
|
885
|
+
wiki_encode_link_target(&parser);
|
886
|
+
return string_from_str(parser.link_target);
|
860
887
|
}
|
861
888
|
|
862
889
|
// returns 1 (true) if supplied string is blank (nil, empty, or all whitespace)
|
863
890
|
// returns 0 (false) otherwise
|
864
|
-
|
891
|
+
bool wiki_blank(str_t *str)
|
865
892
|
{
|
866
|
-
if (
|
867
|
-
return
|
868
|
-
for (char *ptr =
|
869
|
-
*end =
|
893
|
+
if (str->len == 0)
|
894
|
+
return true;
|
895
|
+
for (char *ptr = str->ptr,
|
896
|
+
*end = str->ptr + str->len;
|
870
897
|
ptr < end; ptr++)
|
871
898
|
{
|
872
899
|
if (*ptr != ' ')
|
873
|
-
return
|
900
|
+
return false;
|
874
901
|
}
|
875
|
-
return
|
902
|
+
return true;
|
876
903
|
}
|
877
904
|
|
878
|
-
void
|
905
|
+
void wiki_rollback_failed_internal_link(parser_t *parser)
|
879
906
|
{
|
880
907
|
if (!IN(LINK_START))
|
881
908
|
return; // nothing to do!
|
882
909
|
int scope_includes_separator = IN(SEPARATOR);
|
883
|
-
|
884
|
-
|
885
|
-
if (
|
910
|
+
wiki_pop_from_stack_up_to(parser, NULL, LINK_START, true);
|
911
|
+
str_append(parser->output, link_start, sizeof(link_start) - 1);
|
912
|
+
if (parser->link_target->len > 0)
|
886
913
|
{
|
887
|
-
|
888
|
-
rb_str_append(parser->output, sanitized);
|
914
|
+
wiki_append_sanitized_link_target(parser, parser->output, false);
|
889
915
|
if (scope_includes_separator)
|
890
916
|
{
|
891
|
-
|
892
|
-
if (
|
893
|
-
|
917
|
+
str_append(parser->output, separator, sizeof(separator) - 1);
|
918
|
+
if (parser->link_text->len > 0)
|
919
|
+
str_append_str(parser->output, parser->link_text);
|
894
920
|
}
|
895
921
|
}
|
896
|
-
parser->capture
|
897
|
-
parser->link_target
|
898
|
-
parser->link_text
|
922
|
+
parser->capture = NULL;
|
923
|
+
str_clear(parser->link_target);
|
924
|
+
str_clear(parser->link_text);
|
899
925
|
}
|
900
926
|
|
901
|
-
void
|
927
|
+
void wiki_rollback_failed_external_link(parser_t *parser)
|
902
928
|
{
|
903
929
|
if (!IN(EXT_LINK_START))
|
904
930
|
return; // nothing to do!
|
931
|
+
|
932
|
+
// store a couple of values before popping
|
905
933
|
int scope_includes_space = IN(SPACE);
|
906
|
-
|
907
|
-
|
908
|
-
|
934
|
+
VALUE link_class = IN(PATH) ? Qnil : parser->external_link_class;
|
935
|
+
wiki_pop_from_stack_up_to(parser, NULL, EXT_LINK_START, true);
|
936
|
+
|
937
|
+
str_append(parser->output, ext_link_start, sizeof(ext_link_start) - 1);
|
938
|
+
if (parser->link_target->len > 0)
|
909
939
|
{
|
910
|
-
|
911
|
-
parser->link_target = _Wikitext_hyperlink(parser, Qnil, parser->link_target, parser->link_target, parser->external_link_class);
|
912
|
-
rb_str_append(parser->output, parser->link_target);
|
940
|
+
wiki_append_hyperlink(parser, Qnil, parser->link_target, NULL, link_class, true);
|
913
941
|
if (scope_includes_space)
|
914
942
|
{
|
915
|
-
|
916
|
-
if (
|
917
|
-
|
943
|
+
str_append(parser->output, space, sizeof(space) - 1);
|
944
|
+
if (parser->link_text->len > 0)
|
945
|
+
str_append_str(parser->output, parser->link_text);
|
918
946
|
}
|
919
947
|
}
|
920
|
-
parser->capture
|
921
|
-
parser->link_target
|
922
|
-
parser->link_text
|
948
|
+
parser->capture = NULL;
|
949
|
+
str_clear(parser->link_target);
|
950
|
+
str_clear(parser->link_text);
|
951
|
+
}
|
952
|
+
|
953
|
+
void wiki_rollback_failed_link(parser_t *parser)
|
954
|
+
{
|
955
|
+
wiki_rollback_failed_internal_link(parser);
|
956
|
+
wiki_rollback_failed_external_link(parser);
|
923
957
|
}
|
924
958
|
|
925
959
|
VALUE Wikitext_parser_initialize(int argc, VALUE *argv, VALUE self)
|
@@ -1031,31 +1065,15 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1031
1065
|
VALUE prefix = rb_iv_get(self, "@internal_link_prefix");
|
1032
1066
|
|
1033
1067
|
// set up parser struct to make passing parameters a little easier
|
1034
|
-
|
1035
|
-
|
1036
|
-
parser_t *parser = &_parser;
|
1037
|
-
parser->output = rb_str_new2("");
|
1038
|
-
parser->capture = Qnil;
|
1039
|
-
parser->link_target = Qnil;
|
1040
|
-
parser->link_text = Qnil;
|
1068
|
+
parser_t *parser = parser_new();
|
1069
|
+
GC_WRAP_PARSER(parser, parser_gc);
|
1041
1070
|
parser->external_link_class = link_class;
|
1042
1071
|
parser->mailto_class = mailto_class;
|
1043
1072
|
parser->img_prefix = rb_iv_get(self, "@img_prefix");
|
1044
|
-
parser->
|
1045
|
-
|
1046
|
-
parser->line = ary_new();
|
1047
|
-
GC_WRAP_ARY(parser->line, line_gc);
|
1048
|
-
parser->line_buffer = ary_new();
|
1049
|
-
GC_WRAP_ARY(parser->line_buffer, line_buffer_gc);
|
1050
|
-
parser->pending_crlf = Qfalse;
|
1051
|
-
parser->autolink = rb_iv_get(self, "@autolink");
|
1052
|
-
parser->space_to_underscore = rb_iv_get(self, "@space_to_underscore");
|
1073
|
+
parser->autolink = rb_iv_get(self, "@autolink") == Qtrue ? true : false;
|
1074
|
+
parser->space_to_underscore = rb_iv_get(self, "@space_to_underscore") == Qtrue ? true : false;
|
1053
1075
|
parser->line_ending = str_new_from_string(line_ending);
|
1054
|
-
GC_WRAP_STR(parser->line_ending, line_ending_gc);
|
1055
1076
|
parser->base_indent = base_indent;
|
1056
|
-
parser->current_indent = 0;
|
1057
|
-
parser->tabulation = str_new();
|
1058
|
-
GC_WRAP_STR(parser->tabulation, tabulation_gc);
|
1059
1077
|
parser->base_heading_level = base_heading_level;
|
1060
1078
|
|
1061
1079
|
// this simple looping design leads to a single enormous function,
|
@@ -1093,10 +1111,13 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1093
1111
|
long remove_strong = -1;
|
1094
1112
|
long remove_em = -1;
|
1095
1113
|
|
1096
|
-
// general purpose counters and
|
1114
|
+
// general purpose counters, flags and pointers
|
1097
1115
|
long i = 0;
|
1098
1116
|
long j = 0;
|
1099
1117
|
long k = 0;
|
1118
|
+
str_t *output = NULL;
|
1119
|
+
str_t _token_str;
|
1120
|
+
str_t *token_str = &_token_str;
|
1100
1121
|
|
1101
1122
|
// The following giant switch statement contains cases for all the possible token types.
|
1102
1123
|
// In the most basic sense we are emitting the HTML that corresponds to each token,
|
@@ -1118,16 +1139,16 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1118
1139
|
case PRE:
|
1119
1140
|
if (IN(NO_WIKI_START) || IN(PRE_START))
|
1120
1141
|
{
|
1121
|
-
|
1142
|
+
str_append(parser->output, space, sizeof(space) - 1);
|
1122
1143
|
break;
|
1123
1144
|
}
|
1124
1145
|
else if (IN(BLOCKQUOTE_START))
|
1125
1146
|
{
|
1126
1147
|
// this kind of nesting not allowed (to avoid user confusion)
|
1127
|
-
|
1128
|
-
|
1129
|
-
|
1130
|
-
|
1148
|
+
wiki_pop_excess_elements(parser);
|
1149
|
+
wiki_start_para_if_necessary(parser);
|
1150
|
+
output = parser->capture ? parser->capture : parser->output;
|
1151
|
+
str_append(output, space, sizeof(space) - 1);
|
1131
1152
|
break;
|
1132
1153
|
}
|
1133
1154
|
|
@@ -1139,15 +1160,15 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1139
1160
|
{
|
1140
1161
|
// must pop (reduce nesting level)
|
1141
1162
|
for (i = j - i; i > 0; i--)
|
1142
|
-
|
1163
|
+
wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, true);
|
1143
1164
|
}
|
1144
1165
|
|
1145
1166
|
if (!IN(PRE))
|
1146
1167
|
{
|
1147
|
-
parser->pending_crlf =
|
1148
|
-
|
1149
|
-
|
1150
|
-
|
1168
|
+
parser->pending_crlf = false;
|
1169
|
+
wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, false);
|
1170
|
+
wiki_indent(parser);
|
1171
|
+
str_append(parser->output, pre_start, sizeof(pre_start) - 1);
|
1151
1172
|
ary_push(parser->scope, PRE);
|
1152
1173
|
}
|
1153
1174
|
break;
|
@@ -1155,16 +1176,15 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1155
1176
|
case PRE_START:
|
1156
1177
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1157
1178
|
{
|
1158
|
-
|
1159
|
-
|
1179
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1180
|
+
str_append(parser->output, escaped_pre_start, sizeof(escaped_pre_start) - 1);
|
1160
1181
|
}
|
1161
1182
|
else if (IN(BLOCKQUOTE_START))
|
1162
1183
|
{
|
1163
|
-
|
1164
|
-
|
1165
|
-
|
1166
|
-
|
1167
|
-
rb_str_cat(parser->output, pre_start, sizeof(pre_start) - 1);
|
1184
|
+
wiki_rollback_failed_link(parser); // if any
|
1185
|
+
wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE_START, false);
|
1186
|
+
wiki_indent(parser);
|
1187
|
+
str_append(parser->output, pre_start, sizeof(pre_start) - 1);
|
1168
1188
|
ary_push(parser->scope, PRE_START);
|
1169
1189
|
ary_push(parser->line, PRE_START);
|
1170
1190
|
}
|
@@ -1172,29 +1192,27 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1172
1192
|
{
|
1173
1193
|
if (token->column_start == 1) // only allowed in first column
|
1174
1194
|
{
|
1175
|
-
|
1176
|
-
|
1177
|
-
|
1178
|
-
|
1179
|
-
rb_str_cat(parser->output, pre_start, sizeof(pre_start) - 1);
|
1195
|
+
wiki_rollback_failed_link(parser); // if any
|
1196
|
+
wiki_pop_all_from_stack(parser);
|
1197
|
+
wiki_indent(parser);
|
1198
|
+
str_append(parser->output, pre_start, sizeof(pre_start) - 1);
|
1180
1199
|
ary_push(parser->scope, PRE_START);
|
1181
1200
|
ary_push(parser->line, PRE_START);
|
1182
1201
|
}
|
1183
1202
|
else // PRE_START illegal here
|
1184
1203
|
{
|
1185
|
-
|
1186
|
-
|
1187
|
-
|
1188
|
-
|
1204
|
+
output = parser->capture ? parser->capture : parser->output;
|
1205
|
+
wiki_pop_excess_elements(parser);
|
1206
|
+
wiki_start_para_if_necessary(parser);
|
1207
|
+
str_append(output, escaped_pre_start, sizeof(escaped_pre_start) - 1);
|
1189
1208
|
}
|
1190
1209
|
}
|
1191
1210
|
else
|
1192
1211
|
{
|
1193
|
-
|
1194
|
-
|
1195
|
-
|
1196
|
-
|
1197
|
-
rb_str_cat(parser->output, pre_start, sizeof(pre_start) - 1);
|
1212
|
+
wiki_rollback_failed_link(parser); // if any
|
1213
|
+
wiki_pop_from_stack_up_to(parser, NULL, P, true);
|
1214
|
+
wiki_indent(parser);
|
1215
|
+
str_append(parser->output, pre_start, sizeof(pre_start) - 1);
|
1198
1216
|
ary_push(parser->scope, PRE_START);
|
1199
1217
|
ary_push(parser->line, PRE_START);
|
1200
1218
|
}
|
@@ -1203,19 +1221,19 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1203
1221
|
case PRE_END:
|
1204
1222
|
if (IN(NO_WIKI_START) || IN(PRE))
|
1205
1223
|
{
|
1206
|
-
|
1207
|
-
|
1224
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1225
|
+
str_append(parser->output, escaped_pre_end, sizeof(escaped_pre_end) - 1);
|
1208
1226
|
}
|
1209
1227
|
else
|
1210
1228
|
{
|
1211
1229
|
if (IN(PRE_START))
|
1212
|
-
|
1230
|
+
wiki_pop_from_stack_up_to(parser, parser->output, PRE_START, true);
|
1213
1231
|
else
|
1214
1232
|
{
|
1215
|
-
|
1216
|
-
|
1217
|
-
|
1218
|
-
|
1233
|
+
output = parser->capture ? parser->capture : parser->output;
|
1234
|
+
wiki_pop_excess_elements(parser);
|
1235
|
+
wiki_start_para_if_necessary(parser);
|
1236
|
+
str_append(output, escaped_pre_end, sizeof(escaped_pre_end) - 1);
|
1219
1237
|
}
|
1220
1238
|
}
|
1221
1239
|
break;
|
@@ -1223,14 +1241,14 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1223
1241
|
case BLOCKQUOTE:
|
1224
1242
|
if (IN(NO_WIKI_START) || IN(PRE_START))
|
1225
1243
|
// no need to check for <pre>; can never appear inside it
|
1226
|
-
|
1244
|
+
str_append(parser->output, escaped_blockquote, TOKEN_LEN(token) + 3); // will either emit ">" or "> "
|
1227
1245
|
else if (IN(BLOCKQUOTE_START))
|
1228
1246
|
{
|
1229
1247
|
// this kind of nesting not allowed (to avoid user confusion)
|
1230
|
-
|
1231
|
-
|
1232
|
-
|
1233
|
-
|
1248
|
+
wiki_pop_excess_elements(parser);
|
1249
|
+
wiki_start_para_if_necessary(parser);
|
1250
|
+
output = parser->capture ? parser->capture : parser->output;
|
1251
|
+
str_append(output, escaped_blockquote, TOKEN_LEN(token) + 3); // will either emit ">" or "> "
|
1234
1252
|
break;
|
1235
1253
|
}
|
1236
1254
|
else
|
@@ -1252,12 +1270,12 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1252
1270
|
if (i > j)
|
1253
1271
|
{
|
1254
1272
|
// must push (increase nesting level)
|
1255
|
-
|
1273
|
+
wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, false);
|
1256
1274
|
for (i = i - j; i > 0; i--)
|
1257
1275
|
{
|
1258
|
-
|
1259
|
-
|
1260
|
-
|
1276
|
+
wiki_indent(parser);
|
1277
|
+
str_append(parser->output, blockquote_start, sizeof(blockquote_start) - 1);
|
1278
|
+
str_append_str(parser->output, parser->line_ending);
|
1261
1279
|
ary_push(parser->scope, BLOCKQUOTE);
|
1262
1280
|
}
|
1263
1281
|
}
|
@@ -1265,7 +1283,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1265
1283
|
{
|
1266
1284
|
// must pop (reduce nesting level)
|
1267
1285
|
for (i = j - i; i > 0; i--)
|
1268
|
-
|
1286
|
+
wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, true);
|
1269
1287
|
}
|
1270
1288
|
|
1271
1289
|
// jump to top of the loop to process token we scanned during lookahead
|
@@ -1276,18 +1294,17 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1276
1294
|
case BLOCKQUOTE_START:
|
1277
1295
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1278
1296
|
{
|
1279
|
-
|
1280
|
-
|
1297
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1298
|
+
str_append(parser->output, escaped_blockquote_start, sizeof(escaped_blockquote_start) - 1);
|
1281
1299
|
}
|
1282
1300
|
else if (IN(BLOCKQUOTE_START))
|
1283
1301
|
{
|
1284
1302
|
// nesting is fine here
|
1285
|
-
|
1286
|
-
|
1287
|
-
|
1288
|
-
|
1289
|
-
|
1290
|
-
rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
|
1303
|
+
wiki_rollback_failed_link(parser); // if any
|
1304
|
+
wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE_START, false);
|
1305
|
+
wiki_indent(parser);
|
1306
|
+
str_append(parser->output, blockquote_start, sizeof(blockquote_start) - 1);
|
1307
|
+
str_append_str(parser->output, parser->line_ending);
|
1291
1308
|
ary_push(parser->scope, BLOCKQUOTE_START);
|
1292
1309
|
ary_push(parser->line, BLOCKQUOTE_START);
|
1293
1310
|
}
|
@@ -1295,32 +1312,30 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1295
1312
|
{
|
1296
1313
|
if (token->column_start == 1) // only allowed in first column
|
1297
1314
|
{
|
1298
|
-
|
1299
|
-
|
1300
|
-
|
1301
|
-
|
1302
|
-
|
1303
|
-
rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
|
1315
|
+
wiki_rollback_failed_link(parser); // if any
|
1316
|
+
wiki_pop_all_from_stack(parser);
|
1317
|
+
wiki_indent(parser);
|
1318
|
+
str_append(parser->output, blockquote_start, sizeof(blockquote_start) - 1);
|
1319
|
+
str_append_str(parser->output, parser->line_ending);
|
1304
1320
|
ary_push(parser->scope, BLOCKQUOTE_START);
|
1305
1321
|
ary_push(parser->line, BLOCKQUOTE_START);
|
1306
1322
|
}
|
1307
1323
|
else // BLOCKQUOTE_START illegal here
|
1308
1324
|
{
|
1309
|
-
|
1310
|
-
|
1311
|
-
|
1312
|
-
|
1325
|
+
output = parser->capture ? parser->capture : parser->output;
|
1326
|
+
wiki_pop_excess_elements(parser);
|
1327
|
+
wiki_start_para_if_necessary(parser);
|
1328
|
+
str_append(output, escaped_blockquote_start, sizeof(escaped_blockquote_start) - 1);
|
1313
1329
|
}
|
1314
1330
|
}
|
1315
1331
|
else
|
1316
1332
|
{
|
1317
1333
|
// would be nice to eliminate the repetition here but it's probably the clearest way
|
1318
|
-
|
1319
|
-
|
1320
|
-
|
1321
|
-
|
1322
|
-
|
1323
|
-
rb_str_cat(parser->output, parser->line_ending->ptr, parser->line_ending->len);
|
1334
|
+
wiki_rollback_failed_link(parser); // if any
|
1335
|
+
wiki_pop_from_stack_up_to(parser, NULL, P, true);
|
1336
|
+
wiki_indent(parser);
|
1337
|
+
str_append(parser->output, blockquote_start, sizeof(blockquote_start) - 1);
|
1338
|
+
str_append_str(parser->output, parser->line_ending);
|
1324
1339
|
ary_push(parser->scope, BLOCKQUOTE_START);
|
1325
1340
|
ary_push(parser->line, BLOCKQUOTE_START);
|
1326
1341
|
}
|
@@ -1329,19 +1344,19 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1329
1344
|
case BLOCKQUOTE_END:
|
1330
1345
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1331
1346
|
{
|
1332
|
-
|
1333
|
-
|
1347
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1348
|
+
str_append(parser->output, escaped_blockquote_end, sizeof(escaped_blockquote_end) - 1);
|
1334
1349
|
}
|
1335
1350
|
else
|
1336
1351
|
{
|
1337
1352
|
if (IN(BLOCKQUOTE_START))
|
1338
|
-
|
1353
|
+
wiki_pop_from_stack_up_to(parser, parser->output, BLOCKQUOTE_START, true);
|
1339
1354
|
else
|
1340
1355
|
{
|
1341
|
-
|
1342
|
-
|
1343
|
-
|
1344
|
-
|
1356
|
+
output = parser->capture ? parser->capture : parser->output;
|
1357
|
+
wiki_pop_excess_elements(parser);
|
1358
|
+
wiki_start_para_if_necessary(parser);
|
1359
|
+
str_append(output, escaped_blockquote_end, sizeof(escaped_blockquote_end) - 1);
|
1345
1360
|
}
|
1346
1361
|
}
|
1347
1362
|
break;
|
@@ -1349,13 +1364,13 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1349
1364
|
case NO_WIKI_START:
|
1350
1365
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1351
1366
|
{
|
1352
|
-
|
1353
|
-
|
1367
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1368
|
+
str_append(parser->output, escaped_no_wiki_start, sizeof(escaped_no_wiki_start) - 1);
|
1354
1369
|
}
|
1355
1370
|
else
|
1356
1371
|
{
|
1357
|
-
|
1358
|
-
|
1372
|
+
wiki_pop_excess_elements(parser);
|
1373
|
+
wiki_start_para_if_necessary(parser);
|
1359
1374
|
ary_push(parser->scope, NO_WIKI_START);
|
1360
1375
|
ary_push(parser->line, NO_WIKI_START);
|
1361
1376
|
}
|
@@ -1364,25 +1379,25 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1364
1379
|
case NO_WIKI_END:
|
1365
1380
|
if (IN(NO_WIKI_START))
|
1366
1381
|
// <nowiki> should always only ever be the last item in the stack, but use the helper routine just in case
|
1367
|
-
|
1382
|
+
wiki_pop_from_stack_up_to(parser, NULL, NO_WIKI_START, true);
|
1368
1383
|
else
|
1369
1384
|
{
|
1370
|
-
|
1371
|
-
|
1372
|
-
|
1385
|
+
wiki_pop_excess_elements(parser);
|
1386
|
+
wiki_start_para_if_necessary(parser);
|
1387
|
+
str_append(parser->output, escaped_no_wiki_end, sizeof(escaped_no_wiki_end) - 1);
|
1373
1388
|
}
|
1374
1389
|
break;
|
1375
1390
|
|
1376
1391
|
case STRONG_EM:
|
1377
1392
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1378
1393
|
{
|
1379
|
-
|
1380
|
-
|
1394
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1395
|
+
str_append(parser->output, literal_strong_em, sizeof(literal_strong_em) - 1);
|
1381
1396
|
break;
|
1382
1397
|
}
|
1383
1398
|
|
1384
|
-
|
1385
|
-
|
1399
|
+
output = parser->capture ? parser->capture : parser->output;
|
1400
|
+
wiki_pop_excess_elements(parser);
|
1386
1401
|
|
1387
1402
|
// if you've seen STRONG/STRONG_START or EM/EM_START, must close them in the reverse order that you saw them!
|
1388
1403
|
// otherwise, must open them
|
@@ -1394,12 +1409,12 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1394
1409
|
int val = ary_entry(parser->scope, j);
|
1395
1410
|
if (val == STRONG || val == STRONG_START)
|
1396
1411
|
{
|
1397
|
-
|
1412
|
+
str_append(output, strong_end, sizeof(strong_end) - 1);
|
1398
1413
|
remove_strong = j;
|
1399
1414
|
}
|
1400
1415
|
else if (val == EM || val == EM_START)
|
1401
1416
|
{
|
1402
|
-
|
1417
|
+
str_append(output, em_end, sizeof(em_end) - 1);
|
1403
1418
|
remove_em = j;
|
1404
1419
|
}
|
1405
1420
|
}
|
@@ -1411,7 +1426,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1411
1426
|
ary_pop(parser->scope);
|
1412
1427
|
else // there was no em to remove!, so consider this an opening em tag
|
1413
1428
|
{
|
1414
|
-
|
1429
|
+
str_append(output, em_start, sizeof(em_start) - 1);
|
1415
1430
|
ary_push(parser->scope, EM);
|
1416
1431
|
ary_push(parser->line, EM);
|
1417
1432
|
}
|
@@ -1423,15 +1438,15 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1423
1438
|
ary_pop(parser->scope);
|
1424
1439
|
else // there was no strong to remove!, so consider this an opening strong tag
|
1425
1440
|
{
|
1426
|
-
|
1441
|
+
str_append(output, strong_start, sizeof(strong_start) - 1);
|
1427
1442
|
ary_push(parser->scope, STRONG);
|
1428
1443
|
ary_push(parser->line, STRONG);
|
1429
1444
|
}
|
1430
1445
|
}
|
1431
1446
|
else // no strong or em to remove, so this must be a new opening of both
|
1432
1447
|
{
|
1433
|
-
|
1434
|
-
|
1448
|
+
wiki_start_para_if_necessary(parser);
|
1449
|
+
str_append(output, strong_em_start, sizeof(strong_em_start) - 1);
|
1435
1450
|
ary_push(parser->scope, STRONG);
|
1436
1451
|
ary_push(parser->line, STRONG);
|
1437
1452
|
ary_push(parser->scope, EM);
|
@@ -1442,24 +1457,24 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1442
1457
|
case STRONG:
|
1443
1458
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1444
1459
|
{
|
1445
|
-
|
1446
|
-
|
1460
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1461
|
+
str_append(parser->output, literal_strong, sizeof(literal_strong) - 1);
|
1447
1462
|
}
|
1448
1463
|
else
|
1449
1464
|
{
|
1450
|
-
|
1465
|
+
output = parser->capture ? parser->capture : parser->output;
|
1451
1466
|
if (IN(STRONG_START))
|
1452
1467
|
// already in span started with <strong>, no choice but to emit this literally
|
1453
|
-
|
1468
|
+
str_append(output, literal_strong, sizeof(literal_strong) - 1);
|
1454
1469
|
else if (IN(STRONG))
|
1455
1470
|
// STRONG already seen, this is a closing tag
|
1456
|
-
|
1471
|
+
wiki_pop_from_stack_up_to(parser, output, STRONG, true);
|
1457
1472
|
else
|
1458
1473
|
{
|
1459
1474
|
// this is a new opening
|
1460
|
-
|
1461
|
-
|
1462
|
-
|
1475
|
+
wiki_pop_excess_elements(parser);
|
1476
|
+
wiki_start_para_if_necessary(parser);
|
1477
|
+
str_append(output, strong_start, sizeof(strong_start) - 1);
|
1463
1478
|
ary_push(parser->scope, STRONG);
|
1464
1479
|
ary_push(parser->line, STRONG);
|
1465
1480
|
}
|
@@ -1469,19 +1484,19 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1469
1484
|
case STRONG_START:
|
1470
1485
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1471
1486
|
{
|
1472
|
-
|
1473
|
-
|
1487
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1488
|
+
str_append(parser->output, escaped_strong_start, sizeof(escaped_strong_start) - 1);
|
1474
1489
|
}
|
1475
1490
|
else
|
1476
1491
|
{
|
1477
|
-
|
1492
|
+
output = parser->capture ? parser->capture : parser->output;
|
1478
1493
|
if (IN(STRONG_START) || IN(STRONG))
|
1479
|
-
|
1494
|
+
str_append(output, escaped_strong_start, sizeof(escaped_strong_start) - 1);
|
1480
1495
|
else
|
1481
1496
|
{
|
1482
|
-
|
1483
|
-
|
1484
|
-
|
1497
|
+
wiki_pop_excess_elements(parser);
|
1498
|
+
wiki_start_para_if_necessary(parser);
|
1499
|
+
str_append(output, strong_start, sizeof(strong_start) - 1);
|
1485
1500
|
ary_push(parser->scope, STRONG_START);
|
1486
1501
|
ary_push(parser->line, STRONG_START);
|
1487
1502
|
}
|
@@ -1491,20 +1506,20 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1491
1506
|
case STRONG_END:
|
1492
1507
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1493
1508
|
{
|
1494
|
-
|
1495
|
-
|
1509
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1510
|
+
str_append(parser->output, escaped_strong_end, sizeof(escaped_strong_end) - 1);
|
1496
1511
|
}
|
1497
1512
|
else
|
1498
1513
|
{
|
1499
|
-
|
1514
|
+
output = parser->capture ? parser->capture : parser->output;
|
1500
1515
|
if (IN(STRONG_START))
|
1501
|
-
|
1516
|
+
wiki_pop_from_stack_up_to(parser, output, STRONG_START, true);
|
1502
1517
|
else
|
1503
1518
|
{
|
1504
1519
|
// no STRONG_START in scope, so must interpret the STRONG_END without any special meaning
|
1505
|
-
|
1506
|
-
|
1507
|
-
|
1520
|
+
wiki_pop_excess_elements(parser);
|
1521
|
+
wiki_start_para_if_necessary(parser);
|
1522
|
+
str_append(output, escaped_strong_end, sizeof(escaped_strong_end) - 1);
|
1508
1523
|
}
|
1509
1524
|
}
|
1510
1525
|
break;
|
@@ -1512,24 +1527,24 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1512
1527
|
case EM:
|
1513
1528
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1514
1529
|
{
|
1515
|
-
|
1516
|
-
|
1530
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1531
|
+
str_append(parser->output, literal_em, sizeof(literal_em) - 1);
|
1517
1532
|
}
|
1518
1533
|
else
|
1519
1534
|
{
|
1520
|
-
|
1535
|
+
output = parser->capture ? parser->capture : parser->output;
|
1521
1536
|
if (IN(EM_START))
|
1522
1537
|
// already in span started with <em>, no choice but to emit this literally
|
1523
|
-
|
1538
|
+
str_append(output, literal_em, sizeof(literal_em) - 1);
|
1524
1539
|
else if (IN(EM))
|
1525
1540
|
// EM already seen, this is a closing tag
|
1526
|
-
|
1541
|
+
wiki_pop_from_stack_up_to(parser, output, EM, true);
|
1527
1542
|
else
|
1528
1543
|
{
|
1529
1544
|
// this is a new opening
|
1530
|
-
|
1531
|
-
|
1532
|
-
|
1545
|
+
wiki_pop_excess_elements(parser);
|
1546
|
+
wiki_start_para_if_necessary(parser);
|
1547
|
+
str_append(output, em_start, sizeof(em_start) - 1);
|
1533
1548
|
ary_push(parser->scope, EM);
|
1534
1549
|
ary_push(parser->line, EM);
|
1535
1550
|
}
|
@@ -1539,19 +1554,19 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1539
1554
|
case EM_START:
|
1540
1555
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1541
1556
|
{
|
1542
|
-
|
1543
|
-
|
1557
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1558
|
+
str_append(parser->output, escaped_em_start, sizeof(escaped_em_start) - 1);
|
1544
1559
|
}
|
1545
1560
|
else
|
1546
1561
|
{
|
1547
|
-
|
1562
|
+
output = parser->capture ? parser->capture : parser->output;
|
1548
1563
|
if (IN(EM_START) || IN(EM))
|
1549
|
-
|
1564
|
+
str_append(output, escaped_em_start, sizeof(escaped_em_start) - 1);
|
1550
1565
|
else
|
1551
1566
|
{
|
1552
|
-
|
1553
|
-
|
1554
|
-
|
1567
|
+
wiki_pop_excess_elements(parser);
|
1568
|
+
wiki_start_para_if_necessary(parser);
|
1569
|
+
str_append(output, em_start, sizeof(em_start) - 1);
|
1555
1570
|
ary_push(parser->scope, EM_START);
|
1556
1571
|
ary_push(parser->line, EM_START);
|
1557
1572
|
}
|
@@ -1561,20 +1576,20 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1561
1576
|
case EM_END:
|
1562
1577
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1563
1578
|
{
|
1564
|
-
|
1565
|
-
|
1579
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1580
|
+
str_append(parser->output, escaped_em_end, sizeof(escaped_em_end) - 1);
|
1566
1581
|
}
|
1567
1582
|
else
|
1568
1583
|
{
|
1569
|
-
|
1584
|
+
output = parser->capture ? parser->capture : parser->output;
|
1570
1585
|
if (IN(EM_START))
|
1571
|
-
|
1586
|
+
wiki_pop_from_stack_up_to(parser, output, EM_START, true);
|
1572
1587
|
else
|
1573
1588
|
{
|
1574
1589
|
// no EM_START in scope, so must interpret the TT_END without any special meaning
|
1575
|
-
|
1576
|
-
|
1577
|
-
|
1590
|
+
wiki_pop_excess_elements(parser);
|
1591
|
+
wiki_start_para_if_necessary(parser);
|
1592
|
+
str_append(output, escaped_em_end, sizeof(escaped_em_end) - 1);
|
1578
1593
|
}
|
1579
1594
|
}
|
1580
1595
|
break;
|
@@ -1582,24 +1597,24 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1582
1597
|
case TT:
|
1583
1598
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1584
1599
|
{
|
1585
|
-
|
1586
|
-
|
1600
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1601
|
+
str_append(parser->output, backtick, sizeof(backtick) - 1);
|
1587
1602
|
}
|
1588
1603
|
else
|
1589
1604
|
{
|
1590
|
-
|
1605
|
+
output = parser->capture ? parser->capture : parser->output;
|
1591
1606
|
if (IN(TT_START))
|
1592
1607
|
// already in span started with <tt>, no choice but to emit this literally
|
1593
|
-
|
1608
|
+
str_append(output, backtick, sizeof(backtick) - 1);
|
1594
1609
|
else if (IN(TT))
|
1595
1610
|
// TT (`) already seen, this is a closing tag
|
1596
|
-
|
1611
|
+
wiki_pop_from_stack_up_to(parser, output, TT, true);
|
1597
1612
|
else
|
1598
1613
|
{
|
1599
1614
|
// this is a new opening
|
1600
|
-
|
1601
|
-
|
1602
|
-
|
1615
|
+
wiki_pop_excess_elements(parser);
|
1616
|
+
wiki_start_para_if_necessary(parser);
|
1617
|
+
str_append(output, tt_start, sizeof(tt_start) - 1);
|
1603
1618
|
ary_push(parser->scope, TT);
|
1604
1619
|
ary_push(parser->line, TT);
|
1605
1620
|
}
|
@@ -1609,19 +1624,19 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1609
1624
|
case TT_START:
|
1610
1625
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1611
1626
|
{
|
1612
|
-
|
1613
|
-
|
1627
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1628
|
+
str_append(parser->output, escaped_tt_start, sizeof(escaped_tt_start) - 1);
|
1614
1629
|
}
|
1615
1630
|
else
|
1616
1631
|
{
|
1617
|
-
|
1632
|
+
output = parser->capture ? parser->capture : parser->output;
|
1618
1633
|
if (IN(TT_START) || IN(TT))
|
1619
|
-
|
1634
|
+
str_append(output, escaped_tt_start, sizeof(escaped_tt_start) - 1);
|
1620
1635
|
else
|
1621
1636
|
{
|
1622
|
-
|
1623
|
-
|
1624
|
-
|
1637
|
+
wiki_pop_excess_elements(parser);
|
1638
|
+
wiki_start_para_if_necessary(parser);
|
1639
|
+
str_append(output, tt_start, sizeof(tt_start) - 1);
|
1625
1640
|
ary_push(parser->scope, TT_START);
|
1626
1641
|
ary_push(parser->line, TT_START);
|
1627
1642
|
}
|
@@ -1631,20 +1646,20 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1631
1646
|
case TT_END:
|
1632
1647
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1633
1648
|
{
|
1634
|
-
|
1635
|
-
|
1649
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1650
|
+
str_append(parser->output, escaped_tt_end, sizeof(escaped_tt_end) - 1);
|
1636
1651
|
}
|
1637
1652
|
else
|
1638
1653
|
{
|
1639
|
-
|
1654
|
+
output = parser->capture ? parser->capture : parser->output;
|
1640
1655
|
if (IN(TT_START))
|
1641
|
-
|
1656
|
+
wiki_pop_from_stack_up_to(parser, output, TT_START, true);
|
1642
1657
|
else
|
1643
1658
|
{
|
1644
1659
|
// no TT_START in scope, so must interpret the TT_END without any special meaning
|
1645
|
-
|
1646
|
-
|
1647
|
-
|
1660
|
+
wiki_pop_excess_elements(parser);
|
1661
|
+
wiki_start_para_if_necessary(parser);
|
1662
|
+
str_append(output, escaped_tt_end, sizeof(escaped_tt_end) - 1);
|
1648
1663
|
}
|
1649
1664
|
}
|
1650
1665
|
break;
|
@@ -1654,7 +1669,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1654
1669
|
if (IN(NO_WIKI_START) || IN(PRE_START))
|
1655
1670
|
{
|
1656
1671
|
// no need to check for PRE; can never appear inside it
|
1657
|
-
|
1672
|
+
str_append(parser->output, token->start, TOKEN_LEN(token));
|
1658
1673
|
break;
|
1659
1674
|
}
|
1660
1675
|
|
@@ -1684,7 +1699,8 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1684
1699
|
// want to compare line with scope but can only do so if scope has enough items on it
|
1685
1700
|
if (j >= i)
|
1686
1701
|
{
|
1687
|
-
if (ary_entry(parser->scope, i + bq_count - 2) == type &&
|
1702
|
+
if (ary_entry(parser->scope, i + bq_count - 2) == type &&
|
1703
|
+
ary_entry(parser->scope, i + bq_count - 1) == LI)
|
1688
1704
|
{
|
1689
1705
|
// line and scope match at this point: do nothing yet
|
1690
1706
|
}
|
@@ -1693,7 +1709,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1693
1709
|
// item just pushed onto line does not match corresponding slot of scope!
|
1694
1710
|
for (; j >= i - 2; j--)
|
1695
1711
|
// must pop back before emitting
|
1696
|
-
|
1712
|
+
wiki_pop_from_stack(parser, NULL);
|
1697
1713
|
|
1698
1714
|
// will emit UL or OL, then LI
|
1699
1715
|
break;
|
@@ -1707,13 +1723,13 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1707
1723
|
// not a OL or UL token!
|
1708
1724
|
if (j == i)
|
1709
1725
|
// must close existing LI and re-open new one
|
1710
|
-
|
1726
|
+
wiki_pop_from_stack(parser, NULL);
|
1711
1727
|
else if (j > i)
|
1712
1728
|
{
|
1713
1729
|
// item just pushed onto line does not match corresponding slot of scope!
|
1714
1730
|
for (; j >= i; j--)
|
1715
1731
|
// must pop back before emitting
|
1716
|
-
|
1732
|
+
wiki_pop_from_stack(parser, NULL);
|
1717
1733
|
}
|
1718
1734
|
break;
|
1719
1735
|
}
|
@@ -1727,33 +1743,33 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1727
1743
|
if (j > 0 && ary_entry(parser->scope, -1) == LI)
|
1728
1744
|
{
|
1729
1745
|
// so we should precede it with a CRLF, and indicate that it's a nested list
|
1730
|
-
|
1746
|
+
str_append(parser->output, parser->line_ending->ptr, parser->line_ending->len);
|
1731
1747
|
ary_push(parser->scope, NESTED_LIST);
|
1732
1748
|
}
|
1733
1749
|
else
|
1734
1750
|
{
|
1735
1751
|
// this is a new list
|
1736
1752
|
if (IN(BLOCKQUOTE_START))
|
1737
|
-
|
1753
|
+
wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE_START, false);
|
1738
1754
|
else
|
1739
|
-
|
1755
|
+
wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, false);
|
1740
1756
|
}
|
1741
1757
|
|
1742
1758
|
// emit
|
1743
|
-
|
1759
|
+
wiki_indent(parser);
|
1744
1760
|
if (type == OL)
|
1745
|
-
|
1761
|
+
str_append(parser->output, ol_start, sizeof(ol_start) - 1);
|
1746
1762
|
else if (type == UL)
|
1747
|
-
|
1763
|
+
str_append(parser->output, ul_start, sizeof(ul_start) - 1);
|
1748
1764
|
ary_push(parser->scope, type);
|
1749
|
-
|
1765
|
+
str_append(parser->output, parser->line_ending->ptr, parser->line_ending->len);
|
1750
1766
|
}
|
1751
1767
|
else if (type == SPACE)
|
1752
1768
|
// silently throw away the optional SPACE token after final list marker
|
1753
1769
|
token = NULL;
|
1754
1770
|
|
1755
|
-
|
1756
|
-
|
1771
|
+
wiki_indent(parser);
|
1772
|
+
str_append(parser->output, li_start, sizeof(li_start) - 1);
|
1757
1773
|
ary_push(parser->scope, LI);
|
1758
1774
|
|
1759
1775
|
// any subsequent UL or OL tokens on this line are syntax errors and must be emitted literally
|
@@ -1763,7 +1779,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1763
1779
|
while (k++, NEXT_TOKEN(), (type = token->type))
|
1764
1780
|
{
|
1765
1781
|
if (type == OL || type == UL)
|
1766
|
-
|
1782
|
+
str_append(parser->output, token->start, TOKEN_LEN(token));
|
1767
1783
|
else if (type == SPACE && k == 1)
|
1768
1784
|
{
|
1769
1785
|
// silently throw away the optional SPACE token after final list marker
|
@@ -1787,15 +1803,15 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1787
1803
|
if (IN(NO_WIKI_START) || IN(PRE_START))
|
1788
1804
|
{
|
1789
1805
|
// no need to check for PRE; can never appear inside it
|
1790
|
-
|
1806
|
+
str_append(parser->output, token->start, TOKEN_LEN(token));
|
1791
1807
|
break;
|
1792
1808
|
}
|
1793
1809
|
|
1794
1810
|
// pop up to but not including the last BLOCKQUOTE on the scope stack
|
1795
1811
|
if (IN(BLOCKQUOTE_START))
|
1796
|
-
|
1812
|
+
wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE_START, false);
|
1797
1813
|
else
|
1798
|
-
|
1814
|
+
wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, false);
|
1799
1815
|
|
1800
1816
|
// count number of BLOCKQUOTE tokens in line buffer and in scope stack
|
1801
1817
|
ary_push(parser->line, type);
|
@@ -1807,7 +1823,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1807
1823
|
{
|
1808
1824
|
// must pop (reduce nesting level)
|
1809
1825
|
for (i = j - i; i > 0; i--)
|
1810
|
-
|
1826
|
+
wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, true);
|
1811
1827
|
}
|
1812
1828
|
|
1813
1829
|
// discard any whitespace here (so that "== foo ==" will be translated to "<h2>foo</h2>" rather than "<h2> foo </h2")
|
@@ -1815,7 +1831,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1815
1831
|
; // discard
|
1816
1832
|
|
1817
1833
|
ary_push(parser->scope, type);
|
1818
|
-
|
1834
|
+
wiki_indent(parser);
|
1819
1835
|
|
1820
1836
|
// take base_heading_level into account
|
1821
1837
|
type += base_heading_level;
|
@@ -1824,125 +1840,45 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1824
1840
|
|
1825
1841
|
// rather than repeat all that code for each kind of heading, share it and use a conditional here
|
1826
1842
|
if (type == H6_START)
|
1827
|
-
|
1843
|
+
str_append(parser->output, h6_start, sizeof(h6_start) - 1);
|
1828
1844
|
else if (type == H5_START)
|
1829
|
-
|
1845
|
+
str_append(parser->output, h5_start, sizeof(h5_start) - 1);
|
1830
1846
|
else if (type == H4_START)
|
1831
|
-
|
1847
|
+
str_append(parser->output, h4_start, sizeof(h4_start) - 1);
|
1832
1848
|
else if (type == H3_START)
|
1833
|
-
|
1849
|
+
str_append(parser->output, h3_start, sizeof(h3_start) - 1);
|
1834
1850
|
else if (type == H2_START)
|
1835
|
-
|
1851
|
+
str_append(parser->output, h2_start, sizeof(h2_start) - 1);
|
1836
1852
|
else if (type == H1_START)
|
1837
|
-
|
1853
|
+
str_append(parser->output, h1_start, sizeof(h1_start) - 1);
|
1838
1854
|
|
1839
1855
|
// jump to top of the loop to process token we scanned during lookahead
|
1840
1856
|
continue;
|
1841
1857
|
|
1842
1858
|
case H6_END:
|
1843
|
-
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1844
|
-
{
|
1845
|
-
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1846
|
-
rb_str_cat(parser->output, literal_h6, sizeof(literal_h6) - 1);
|
1847
|
-
}
|
1848
|
-
else
|
1849
|
-
{
|
1850
|
-
_Wikitext_rollback_failed_external_link(parser); // if any
|
1851
|
-
if (!IN(H6_START))
|
1852
|
-
{
|
1853
|
-
// literal output only if not in h6 scope (we stay silent in that case)
|
1854
|
-
_Wikitext_start_para_if_necessary(parser);
|
1855
|
-
rb_str_cat(parser->output, literal_h6, sizeof(literal_h6) - 1);
|
1856
|
-
}
|
1857
|
-
}
|
1858
|
-
break;
|
1859
|
-
|
1860
1859
|
case H5_END:
|
1861
|
-
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1862
|
-
{
|
1863
|
-
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1864
|
-
rb_str_cat(parser->output, literal_h5, sizeof(literal_h5) - 1);
|
1865
|
-
}
|
1866
|
-
else
|
1867
|
-
{
|
1868
|
-
_Wikitext_rollback_failed_external_link(parser); // if any
|
1869
|
-
if (!IN(H5_START))
|
1870
|
-
{
|
1871
|
-
// literal output only if not in h5 scope (we stay silent in that case)
|
1872
|
-
_Wikitext_start_para_if_necessary(parser);
|
1873
|
-
rb_str_cat(parser->output, literal_h5, sizeof(literal_h5) - 1);
|
1874
|
-
}
|
1875
|
-
}
|
1876
|
-
break;
|
1877
|
-
|
1878
1860
|
case H4_END:
|
1879
|
-
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1880
|
-
{
|
1881
|
-
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1882
|
-
rb_str_cat(parser->output, literal_h4, sizeof(literal_h4) - 1);
|
1883
|
-
}
|
1884
|
-
else
|
1885
|
-
{
|
1886
|
-
_Wikitext_rollback_failed_external_link(parser); // if any
|
1887
|
-
if (!IN(H4_START))
|
1888
|
-
{
|
1889
|
-
// literal output only if not in h4 scope (we stay silent in that case)
|
1890
|
-
_Wikitext_start_para_if_necessary(parser);
|
1891
|
-
rb_str_cat(parser->output, literal_h4, sizeof(literal_h4) - 1);
|
1892
|
-
}
|
1893
|
-
}
|
1894
|
-
break;
|
1895
|
-
|
1896
1861
|
case H3_END:
|
1897
|
-
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1898
|
-
{
|
1899
|
-
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1900
|
-
rb_str_cat(parser->output, literal_h3, sizeof(literal_h3) - 1);
|
1901
|
-
}
|
1902
|
-
else
|
1903
|
-
{
|
1904
|
-
_Wikitext_rollback_failed_external_link(parser); // if any
|
1905
|
-
if (!IN(H3_START))
|
1906
|
-
{
|
1907
|
-
// literal output only if not in h3 scope (we stay silent in that case)
|
1908
|
-
_Wikitext_start_para_if_necessary(parser);
|
1909
|
-
rb_str_cat(parser->output, literal_h3, sizeof(literal_h3) - 1);
|
1910
|
-
}
|
1911
|
-
}
|
1912
|
-
break;
|
1913
|
-
|
1914
1862
|
case H2_END:
|
1915
|
-
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1916
|
-
{
|
1917
|
-
_Wikitext_emit_pending_crlf_if_necessary(parser);
|
1918
|
-
rb_str_cat(parser->output, literal_h2, sizeof(literal_h2) - 1);
|
1919
|
-
}
|
1920
|
-
else
|
1921
|
-
{
|
1922
|
-
_Wikitext_rollback_failed_external_link(parser); // if any
|
1923
|
-
if (!IN(H2_START))
|
1924
|
-
{
|
1925
|
-
// literal output only if not in h2 scope (we stay silent in that case)
|
1926
|
-
_Wikitext_start_para_if_necessary(parser);
|
1927
|
-
rb_str_cat(parser->output, literal_h2, sizeof(literal_h2) - 1);
|
1928
|
-
}
|
1929
|
-
}
|
1930
|
-
break;
|
1931
|
-
|
1932
1863
|
case H1_END:
|
1933
1864
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1934
1865
|
{
|
1935
|
-
|
1936
|
-
|
1866
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1867
|
+
str_append(parser->output, token->start, TOKEN_LEN(token));
|
1937
1868
|
}
|
1938
1869
|
else
|
1939
1870
|
{
|
1940
|
-
|
1941
|
-
if (!IN(
|
1871
|
+
wiki_rollback_failed_external_link(parser); // if any
|
1872
|
+
if ((type == H6_END && !IN(H6_START)) ||
|
1873
|
+
(type == H5_END && !IN(H5_START)) ||
|
1874
|
+
(type == H4_END && !IN(H4_START)) ||
|
1875
|
+
(type == H3_END && !IN(H3_START)) ||
|
1876
|
+
(type == H2_END && !IN(H2_START)) ||
|
1877
|
+
(type == H1_END && !IN(H1_START)))
|
1942
1878
|
{
|
1943
|
-
// literal output only if not in
|
1944
|
-
|
1945
|
-
|
1879
|
+
// literal output only if not in appropriate scope (we stay silent in that case)
|
1880
|
+
wiki_start_para_if_necessary(parser);
|
1881
|
+
str_append(parser->output, token->start, TOKEN_LEN(token));
|
1946
1882
|
}
|
1947
1883
|
}
|
1948
1884
|
break;
|
@@ -1950,18 +1886,16 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1950
1886
|
case MAIL:
|
1951
1887
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
1952
1888
|
{
|
1953
|
-
|
1954
|
-
|
1889
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1890
|
+
str_append(parser->output, token->start, TOKEN_LEN(token));
|
1955
1891
|
}
|
1956
1892
|
else
|
1957
1893
|
{
|
1958
|
-
|
1959
|
-
|
1960
|
-
|
1961
|
-
|
1962
|
-
|
1963
|
-
i = _Wikitext_hyperlink(parser, rb_str_new2("mailto:"), i, i, mailto_class);
|
1964
|
-
rb_str_append(parser->output, i);
|
1894
|
+
wiki_pop_excess_elements(parser);
|
1895
|
+
wiki_start_para_if_necessary(parser);
|
1896
|
+
token_str->ptr = token->start;
|
1897
|
+
token_str->len = TOKEN_LEN(token);
|
1898
|
+
wiki_append_hyperlink(parser, rb_str_new2("mailto:"), token_str, NULL, mailto_class, true);
|
1965
1899
|
}
|
1966
1900
|
break;
|
1967
1901
|
|
@@ -1969,110 +1903,93 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
1969
1903
|
if (IN(NO_WIKI_START))
|
1970
1904
|
// user can temporarily suppress autolinking by using <nowiki></nowiki>
|
1971
1905
|
// note that unlike MediaWiki, we do allow autolinking inside PRE blocks
|
1972
|
-
|
1906
|
+
str_append(parser->output, token->start, TOKEN_LEN(token));
|
1973
1907
|
else if (IN(LINK_START))
|
1974
1908
|
{
|
1975
1909
|
// if the URI were allowed it would have been handled already in LINK_START
|
1976
|
-
|
1977
|
-
|
1978
|
-
|
1979
|
-
|
1980
|
-
rb_str_append(parser->output, i);
|
1910
|
+
wiki_rollback_failed_internal_link(parser);
|
1911
|
+
token_str->ptr = token->start;
|
1912
|
+
token_str->len = TOKEN_LEN(token);
|
1913
|
+
wiki_append_hyperlink(parser, Qnil, token_str, NULL, parser->external_link_class, true);
|
1981
1914
|
}
|
1982
1915
|
else if (IN(EXT_LINK_START))
|
1983
1916
|
{
|
1984
|
-
if (
|
1917
|
+
if (parser->link_target->len == 0)
|
1985
1918
|
{
|
1986
1919
|
// this must be our link target: look ahead to make sure we see the space we're expecting to see
|
1987
|
-
|
1920
|
+
token_str->ptr = token->start;
|
1921
|
+
token_str->len = TOKEN_LEN(token);
|
1988
1922
|
NEXT_TOKEN();
|
1989
1923
|
if (token->type == SPACE)
|
1990
1924
|
{
|
1991
1925
|
ary_push(parser->scope, SPACE);
|
1992
|
-
parser->link_target
|
1993
|
-
parser->link_text
|
1926
|
+
str_append_str(parser->link_target, token_str);
|
1927
|
+
str_clear(parser->link_text);
|
1994
1928
|
parser->capture = parser->link_text;
|
1995
1929
|
token = NULL; // silently consume space
|
1996
1930
|
}
|
1997
1931
|
else
|
1998
1932
|
{
|
1999
1933
|
// didn't see the space! this must be an error
|
2000
|
-
|
2001
|
-
|
2002
|
-
|
2003
|
-
|
2004
|
-
|
2005
|
-
i = _Wikitext_hyperlink(parser, Qnil, i, i, parser->external_link_class); // link target, link text
|
2006
|
-
rb_str_append(parser->output, i);
|
1934
|
+
wiki_pop_from_stack(parser, NULL);
|
1935
|
+
wiki_pop_excess_elements(parser);
|
1936
|
+
wiki_start_para_if_necessary(parser);
|
1937
|
+
str_append(parser->output, ext_link_start, sizeof(ext_link_start) - 1);
|
1938
|
+
wiki_append_hyperlink(parser, Qnil, token_str, NULL, parser->external_link_class, true);
|
2007
1939
|
}
|
2008
1940
|
}
|
2009
1941
|
else
|
2010
|
-
|
2011
|
-
if (NIL_P(parser->link_text))
|
2012
|
-
// this must be the first part of our link text
|
2013
|
-
parser->link_text = TOKEN_TEXT(token);
|
2014
|
-
else
|
2015
|
-
// add to existing link text
|
2016
|
-
rb_str_cat(parser->link_text, token->start, TOKEN_LEN(token));
|
2017
|
-
}
|
1942
|
+
str_append(parser->link_text, token->start, TOKEN_LEN(token));
|
2018
1943
|
}
|
2019
1944
|
else
|
2020
1945
|
{
|
2021
|
-
|
2022
|
-
|
2023
|
-
|
2024
|
-
|
2025
|
-
|
2026
|
-
i = _Wikitext_hyperlink(parser, Qnil, i, i, parser->external_link_class); // link target, link text
|
2027
|
-
rb_str_append(parser->output, i);
|
1946
|
+
wiki_pop_excess_elements(parser);
|
1947
|
+
wiki_start_para_if_necessary(parser);
|
1948
|
+
token_str->ptr = token->start;
|
1949
|
+
token_str->len = TOKEN_LEN(token);
|
1950
|
+
wiki_append_hyperlink(parser, Qnil, token_str, NULL, parser->external_link_class, true);
|
2028
1951
|
}
|
2029
1952
|
break;
|
2030
1953
|
|
2031
1954
|
case PATH:
|
2032
1955
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
2033
|
-
|
1956
|
+
str_append(parser->output, token->start, TOKEN_LEN(token));
|
2034
1957
|
else if (IN(EXT_LINK_START))
|
2035
1958
|
{
|
2036
|
-
if (
|
1959
|
+
if (parser->link_target->len == 0)
|
2037
1960
|
{
|
2038
1961
|
// this must be our link target: look ahead to make sure we see the space we're expecting to see
|
2039
|
-
|
1962
|
+
token_str->ptr = token->start;
|
1963
|
+
token_str->len = TOKEN_LEN(token);
|
2040
1964
|
NEXT_TOKEN();
|
2041
1965
|
if (token->type == SPACE)
|
2042
1966
|
{
|
2043
1967
|
ary_push(parser->scope, PATH);
|
2044
1968
|
ary_push(parser->scope, SPACE);
|
2045
|
-
parser->link_target
|
2046
|
-
parser->link_text
|
1969
|
+
str_append_str(parser->link_target, token_str);
|
1970
|
+
str_clear(parser->link_text);
|
2047
1971
|
parser->capture = parser->link_text;
|
2048
1972
|
token = NULL; // silently consume space
|
2049
1973
|
}
|
2050
1974
|
else
|
2051
1975
|
{
|
2052
1976
|
// didn't see the space! this must be an error
|
2053
|
-
|
2054
|
-
|
2055
|
-
|
2056
|
-
|
2057
|
-
|
1977
|
+
wiki_pop_from_stack(parser, NULL);
|
1978
|
+
wiki_pop_excess_elements(parser);
|
1979
|
+
wiki_start_para_if_necessary(parser);
|
1980
|
+
str_append(parser->output, ext_link_start, sizeof(ext_link_start) - 1);
|
1981
|
+
str_append_str(parser->output, token_str);
|
2058
1982
|
}
|
2059
1983
|
}
|
2060
1984
|
else
|
2061
|
-
|
2062
|
-
if (NIL_P(parser->link_text))
|
2063
|
-
// this must be the first part of our link text
|
2064
|
-
parser->link_text = TOKEN_TEXT(token);
|
2065
|
-
else
|
2066
|
-
// add to existing link text
|
2067
|
-
rb_str_cat(parser->link_text, token->start, TOKEN_LEN(token));
|
2068
|
-
}
|
1985
|
+
str_append(parser->link_text, token->start, TOKEN_LEN(token));
|
2069
1986
|
}
|
2070
1987
|
else
|
2071
1988
|
{
|
2072
|
-
|
2073
|
-
|
2074
|
-
|
2075
|
-
|
1989
|
+
output = parser->capture ? parser->capture : parser->output;
|
1990
|
+
wiki_pop_excess_elements(parser);
|
1991
|
+
wiki_start_para_if_necessary(parser);
|
1992
|
+
str_append(output, token->start, TOKEN_LEN(token));
|
2076
1993
|
}
|
2077
1994
|
break;
|
2078
1995
|
|
@@ -2099,20 +2016,20 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2099
2016
|
// SPACE, SPECIAL_URI_CHARS, PRINTABLE, PATH, ALNUM, DEFAULT, QUOT and AMP
|
2100
2017
|
// everything else will be rejected
|
2101
2018
|
case LINK_START:
|
2102
|
-
|
2019
|
+
output = parser->capture ? parser->capture : parser->output;
|
2103
2020
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
2104
2021
|
{
|
2105
|
-
|
2106
|
-
|
2022
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
2023
|
+
str_append(output, link_start, sizeof(link_start) - 1);
|
2107
2024
|
}
|
2108
2025
|
else if (IN(EXT_LINK_START))
|
2109
2026
|
// already in external link scope! (and in fact, must be capturing link_text right now)
|
2110
|
-
|
2027
|
+
str_append(output, link_start, sizeof(link_start) - 1);
|
2111
2028
|
else if (IN(LINK_START))
|
2112
2029
|
{
|
2113
2030
|
// already in internal link scope! this is a syntax error
|
2114
|
-
|
2115
|
-
|
2031
|
+
wiki_rollback_failed_internal_link(parser);
|
2032
|
+
str_append(parser->output, link_start, sizeof(link_start) - 1);
|
2116
2033
|
}
|
2117
2034
|
else if (IN(SEPARATOR))
|
2118
2035
|
{
|
@@ -2121,8 +2038,8 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2121
2038
|
else // not in internal link scope yet
|
2122
2039
|
{
|
2123
2040
|
// will either emit a link, or the rollback of a failed link, so start the para now
|
2124
|
-
|
2125
|
-
|
2041
|
+
wiki_pop_excess_elements(parser);
|
2042
|
+
wiki_start_para_if_necessary(parser);
|
2126
2043
|
ary_push(parser->scope, LINK_START);
|
2127
2044
|
|
2128
2045
|
// look ahead and try to gobble up link target
|
@@ -2144,34 +2061,34 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2144
2061
|
type == RIGHT_CURLY)
|
2145
2062
|
{
|
2146
2063
|
// accumulate these tokens into link_target
|
2147
|
-
if (
|
2064
|
+
if (parser->link_target->len == 0)
|
2148
2065
|
{
|
2149
|
-
parser->link_target
|
2150
|
-
parser->capture
|
2066
|
+
str_clear(parser->link_target);
|
2067
|
+
parser->capture = parser->link_target;
|
2151
2068
|
}
|
2152
2069
|
if (type == QUOT_ENTITY)
|
2153
2070
|
// don't insert the entity, insert the literal quote
|
2154
|
-
|
2071
|
+
str_append(parser->link_target, quote, sizeof(quote) - 1);
|
2155
2072
|
else if (type == AMP_ENTITY)
|
2156
2073
|
// don't insert the entity, insert the literal ampersand
|
2157
|
-
|
2074
|
+
str_append(parser->link_target, ampersand, sizeof(ampersand) - 1);
|
2158
2075
|
else
|
2159
|
-
|
2076
|
+
str_append(parser->link_target, token->start, TOKEN_LEN(token));
|
2160
2077
|
}
|
2161
2078
|
else if (type == LINK_END)
|
2162
2079
|
{
|
2163
|
-
if (
|
2164
|
-
|
2080
|
+
if (parser->link_target->len == 0) // bail for inputs like "[[]]"
|
2081
|
+
wiki_rollback_failed_internal_link(parser);
|
2165
2082
|
break; // jump back to top of loop (will handle this in LINK_END case below)
|
2166
2083
|
}
|
2167
2084
|
else if (type == SEPARATOR)
|
2168
2085
|
{
|
2169
|
-
if (
|
2170
|
-
|
2086
|
+
if (parser->link_target->len == 0) // bail for inputs like "[[|"
|
2087
|
+
wiki_rollback_failed_internal_link(parser);
|
2171
2088
|
else
|
2172
2089
|
{
|
2173
2090
|
ary_push(parser->scope, SEPARATOR);
|
2174
|
-
parser->link_text
|
2091
|
+
str_clear(parser->link_text);
|
2175
2092
|
parser->capture = parser->link_text;
|
2176
2093
|
token = NULL;
|
2177
2094
|
}
|
@@ -2179,7 +2096,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2179
2096
|
}
|
2180
2097
|
else // unexpected token (syntax error)
|
2181
2098
|
{
|
2182
|
-
|
2099
|
+
wiki_rollback_failed_internal_link(parser);
|
2183
2100
|
break; // jump back to top of loop to handle unexpected token
|
2184
2101
|
}
|
2185
2102
|
}
|
@@ -2190,42 +2107,45 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2190
2107
|
break;
|
2191
2108
|
|
2192
2109
|
case LINK_END:
|
2193
|
-
|
2110
|
+
output = parser->capture ? parser->capture : parser->output;
|
2194
2111
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
2195
2112
|
{
|
2196
|
-
|
2197
|
-
|
2113
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
2114
|
+
str_append(output, link_end, sizeof(link_end) - 1);
|
2198
2115
|
}
|
2199
2116
|
else if (IN(EXT_LINK_START))
|
2200
2117
|
// already in external link scope! (and in fact, must be capturing link_text right now)
|
2201
|
-
|
2118
|
+
str_append(output, link_end, sizeof(link_end) - 1);
|
2202
2119
|
else if (IN(LINK_START)) // in internal link scope!
|
2203
2120
|
{
|
2204
|
-
if (
|
2121
|
+
if (wiki_blank(parser->link_target))
|
2205
2122
|
{
|
2206
2123
|
// special case for inputs like "[[ ]]"
|
2207
|
-
|
2208
|
-
|
2124
|
+
wiki_rollback_failed_internal_link(parser);
|
2125
|
+
str_append(parser->output, link_end, sizeof(link_end) - 1);
|
2209
2126
|
break;
|
2210
2127
|
}
|
2211
|
-
if (
|
2128
|
+
if (parser->link_text->len == 0 ||
|
2129
|
+
wiki_blank(parser->link_text))
|
2130
|
+
{
|
2212
2131
|
// use link target as link text
|
2213
|
-
parser->link_text
|
2132
|
+
str_clear(parser->link_text);
|
2133
|
+
wiki_append_sanitized_link_target(parser, parser->link_text, true);
|
2134
|
+
}
|
2214
2135
|
else
|
2215
|
-
|
2216
|
-
|
2217
|
-
|
2218
|
-
parser->capture
|
2219
|
-
|
2220
|
-
|
2221
|
-
parser->
|
2222
|
-
parser->link_text = Qnil;
|
2136
|
+
wiki_trim_link_text(parser);
|
2137
|
+
wiki_encode_link_target(parser);
|
2138
|
+
wiki_pop_from_stack_up_to(parser, output, LINK_START, true);
|
2139
|
+
parser->capture = NULL;
|
2140
|
+
wiki_append_hyperlink(parser, prefix, parser->link_target, parser->link_text, Qnil, false);
|
2141
|
+
str_clear(parser->link_target);
|
2142
|
+
str_clear(parser->link_text);
|
2223
2143
|
}
|
2224
2144
|
else // wasn't in internal link scope
|
2225
2145
|
{
|
2226
|
-
|
2227
|
-
|
2228
|
-
|
2146
|
+
wiki_pop_excess_elements(parser);
|
2147
|
+
wiki_start_para_if_necessary(parser);
|
2148
|
+
str_append(output, link_end, sizeof(link_end) - 1);
|
2229
2149
|
}
|
2230
2150
|
break;
|
2231
2151
|
|
@@ -2235,41 +2155,28 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2235
2155
|
// strings in square brackets which don't match this syntax get passed through literally; eg:
|
2236
2156
|
// he was very angery [sic] about the turn of events
|
2237
2157
|
case EXT_LINK_START:
|
2238
|
-
|
2158
|
+
output = parser->capture ? parser->capture : parser->output;
|
2239
2159
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
2240
2160
|
{
|
2241
|
-
|
2242
|
-
|
2161
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
2162
|
+
str_append(output, ext_link_start, sizeof(ext_link_start) - 1);
|
2243
2163
|
}
|
2244
2164
|
else if (IN(EXT_LINK_START))
|
2245
2165
|
// already in external link scope! (and in fact, must be capturing link_text right now)
|
2246
|
-
|
2166
|
+
str_append(output, ext_link_start, sizeof(ext_link_start) - 1);
|
2247
2167
|
else if (IN(LINK_START))
|
2248
2168
|
{
|
2249
2169
|
// already in internal link scope!
|
2250
|
-
|
2251
|
-
|
2252
|
-
|
2253
|
-
parser->
|
2254
|
-
else if (IN(SPACE))
|
2255
|
-
{
|
2256
|
-
// link target has already been scanned
|
2257
|
-
if (NIL_P(parser->link_text))
|
2258
|
-
// this must be the first character of our link text
|
2259
|
-
parser->link_text = i;
|
2260
|
-
else
|
2261
|
-
// add to existing link text
|
2262
|
-
rb_str_append(parser->link_text, i);
|
2263
|
-
}
|
2264
|
-
else
|
2265
|
-
// add to existing link target
|
2266
|
-
rb_str_append(parser->link_target, i);
|
2170
|
+
if (parser->link_target->len == 0 || !IN(SPACE))
|
2171
|
+
str_append(parser->link_target, ext_link_start, sizeof(ext_link_start) - 1);
|
2172
|
+
else // link target has already been scanned
|
2173
|
+
str_append(parser->link_text, ext_link_start, sizeof(ext_link_start) - 1);
|
2267
2174
|
}
|
2268
2175
|
else // not in external link scope yet
|
2269
2176
|
{
|
2270
2177
|
// will either emit a link, or the rollback of a failed link, so start the para now
|
2271
|
-
|
2272
|
-
|
2178
|
+
wiki_pop_excess_elements(parser);
|
2179
|
+
wiki_start_para_if_necessary(parser);
|
2273
2180
|
|
2274
2181
|
// look ahead: expect an absolute URI (with protocol) or "relative" (path) URI
|
2275
2182
|
NEXT_TOKEN();
|
@@ -2277,56 +2184,55 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2277
2184
|
ary_push(parser->scope, EXT_LINK_START); // so far so good, jump back to the top of the loop
|
2278
2185
|
else
|
2279
2186
|
// only get here if there was a syntax error (missing URI)
|
2280
|
-
|
2187
|
+
str_append(parser->output, ext_link_start, sizeof(ext_link_start) - 1);
|
2281
2188
|
continue; // jump back to top of loop to handle token (either URI or whatever it is)
|
2282
2189
|
}
|
2283
2190
|
break;
|
2284
2191
|
|
2285
2192
|
case EXT_LINK_END:
|
2286
|
-
|
2193
|
+
output = parser->capture ? parser->capture : parser->output;
|
2287
2194
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
2288
2195
|
{
|
2289
|
-
|
2290
|
-
|
2196
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
2197
|
+
str_append(output, ext_link_end, sizeof(ext_link_end) - 1);
|
2291
2198
|
}
|
2292
2199
|
else if (IN(EXT_LINK_START))
|
2293
2200
|
{
|
2294
|
-
if (
|
2201
|
+
if (parser->link_text->len == 0)
|
2295
2202
|
// syntax error: external link with no link text
|
2296
|
-
|
2203
|
+
wiki_rollback_failed_external_link(parser);
|
2297
2204
|
else
|
2298
2205
|
{
|
2299
2206
|
// success!
|
2300
2207
|
j = IN(PATH) ? Qnil : parser->external_link_class;
|
2301
|
-
|
2302
|
-
parser->capture =
|
2303
|
-
|
2304
|
-
rb_str_append(parser->output, i);
|
2208
|
+
wiki_pop_from_stack_up_to(parser, output, EXT_LINK_START, true);
|
2209
|
+
parser->capture = NULL;
|
2210
|
+
wiki_append_hyperlink(parser, Qnil, parser->link_target, parser->link_text, j, false);
|
2305
2211
|
}
|
2306
|
-
parser->link_target
|
2307
|
-
parser->link_text
|
2212
|
+
str_clear(parser->link_target);
|
2213
|
+
str_clear(parser->link_text);
|
2308
2214
|
}
|
2309
2215
|
else
|
2310
2216
|
{
|
2311
|
-
|
2312
|
-
|
2313
|
-
|
2217
|
+
wiki_pop_excess_elements(parser);
|
2218
|
+
wiki_start_para_if_necessary(parser);
|
2219
|
+
str_append(parser->output, ext_link_end, sizeof(ext_link_end) - 1);
|
2314
2220
|
}
|
2315
2221
|
break;
|
2316
2222
|
|
2317
2223
|
case SEPARATOR:
|
2318
|
-
|
2319
|
-
|
2320
|
-
|
2321
|
-
|
2224
|
+
output = parser->capture ? parser->capture : parser->output;
|
2225
|
+
wiki_pop_excess_elements(parser);
|
2226
|
+
wiki_start_para_if_necessary(parser);
|
2227
|
+
str_append(output, separator, sizeof(separator) - 1);
|
2322
2228
|
break;
|
2323
2229
|
|
2324
2230
|
case SPACE:
|
2325
|
-
|
2231
|
+
output = parser->capture ? parser->capture : parser->output;
|
2326
2232
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
2327
2233
|
{
|
2328
|
-
|
2329
|
-
|
2234
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
2235
|
+
str_append(output, token->start, TOKEN_LEN(token));
|
2330
2236
|
}
|
2331
2237
|
else
|
2332
2238
|
{
|
@@ -2335,21 +2241,21 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2335
2241
|
int token_len = TOKEN_LEN(token);
|
2336
2242
|
NEXT_TOKEN();
|
2337
2243
|
type = token->type;
|
2338
|
-
if ((
|
2339
|
-
(
|
2340
|
-
(
|
2341
|
-
(
|
2342
|
-
(
|
2343
|
-
(
|
2244
|
+
if ((type == H6_END && IN(H6_START)) ||
|
2245
|
+
(type == H5_END && IN(H5_START)) ||
|
2246
|
+
(type == H4_END && IN(H4_START)) ||
|
2247
|
+
(type == H3_END && IN(H3_START)) ||
|
2248
|
+
(type == H2_END && IN(H2_START)) ||
|
2249
|
+
(type == H1_END && IN(H1_START)))
|
2344
2250
|
{
|
2345
2251
|
// will suppress emission of space (discard) if next token is a H6_END, H5_END etc and we are in the corresponding scope
|
2346
2252
|
}
|
2347
2253
|
else
|
2348
2254
|
{
|
2349
2255
|
// emit the space
|
2350
|
-
|
2351
|
-
|
2352
|
-
|
2256
|
+
wiki_pop_excess_elements(parser);
|
2257
|
+
wiki_start_para_if_necessary(parser);
|
2258
|
+
str_append(output, token_ptr, token_len);
|
2353
2259
|
}
|
2354
2260
|
|
2355
2261
|
// jump to top of the loop to process token we scanned during lookahead
|
@@ -2362,101 +2268,100 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2362
2268
|
case NAMED_ENTITY:
|
2363
2269
|
case DECIMAL_ENTITY:
|
2364
2270
|
// pass these through unaltered as they are case sensitive
|
2365
|
-
|
2366
|
-
|
2367
|
-
|
2368
|
-
|
2271
|
+
output = parser->capture ? parser->capture : parser->output;
|
2272
|
+
wiki_pop_excess_elements(parser);
|
2273
|
+
wiki_start_para_if_necessary(parser);
|
2274
|
+
str_append(output, token->start, TOKEN_LEN(token));
|
2369
2275
|
break;
|
2370
2276
|
|
2371
2277
|
case HEX_ENTITY:
|
2372
2278
|
// normalize hex entities (downcase them)
|
2373
|
-
|
2374
|
-
|
2375
|
-
|
2376
|
-
|
2279
|
+
output = parser->capture ? parser->capture : parser->output;
|
2280
|
+
wiki_pop_excess_elements(parser);
|
2281
|
+
wiki_start_para_if_necessary(parser);
|
2282
|
+
str_append(output, token->start, TOKEN_LEN(token));
|
2283
|
+
wiki_downcase_bang(output->ptr + output->len - TOKEN_LEN(token), TOKEN_LEN(token));
|
2377
2284
|
break;
|
2378
2285
|
|
2379
2286
|
case QUOT:
|
2380
|
-
|
2381
|
-
|
2382
|
-
|
2383
|
-
|
2287
|
+
output = parser->capture ? parser->capture : parser->output;
|
2288
|
+
wiki_pop_excess_elements(parser);
|
2289
|
+
wiki_start_para_if_necessary(parser);
|
2290
|
+
str_append(output, quot_entity, sizeof(quot_entity) - 1);
|
2384
2291
|
break;
|
2385
2292
|
|
2386
2293
|
case AMP:
|
2387
|
-
|
2388
|
-
|
2389
|
-
|
2390
|
-
|
2294
|
+
output = parser->capture ? parser->capture : parser->output;
|
2295
|
+
wiki_pop_excess_elements(parser);
|
2296
|
+
wiki_start_para_if_necessary(parser);
|
2297
|
+
str_append(output, amp_entity, sizeof(amp_entity) - 1);
|
2391
2298
|
break;
|
2392
2299
|
|
2393
2300
|
case LESS:
|
2394
|
-
|
2395
|
-
|
2396
|
-
|
2397
|
-
|
2301
|
+
output = parser->capture ? parser->capture : parser->output;
|
2302
|
+
wiki_pop_excess_elements(parser);
|
2303
|
+
wiki_start_para_if_necessary(parser);
|
2304
|
+
str_append(output, lt_entity, sizeof(lt_entity) - 1);
|
2398
2305
|
break;
|
2399
2306
|
|
2400
2307
|
case GREATER:
|
2401
|
-
|
2402
|
-
|
2403
|
-
|
2404
|
-
|
2308
|
+
output = parser->capture ? parser->capture : parser->output;
|
2309
|
+
wiki_pop_excess_elements(parser);
|
2310
|
+
wiki_start_para_if_necessary(parser);
|
2311
|
+
str_append(output, gt_entity, sizeof(gt_entity) - 1);
|
2405
2312
|
break;
|
2406
2313
|
|
2407
2314
|
case IMG_START:
|
2408
2315
|
if (IN(NO_WIKI_START) || IN(PRE) || IN(PRE_START))
|
2409
2316
|
{
|
2410
|
-
|
2411
|
-
|
2317
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
2318
|
+
str_append(parser->output, token->start, TOKEN_LEN(token));
|
2412
2319
|
}
|
2413
|
-
else if (
|
2414
|
-
|
2320
|
+
else if (parser->capture)
|
2321
|
+
str_append(parser->capture, token->start, TOKEN_LEN(token));
|
2415
2322
|
else
|
2416
2323
|
{
|
2417
2324
|
// not currently capturing: will be emitting something on success or failure, so get ready
|
2418
|
-
|
2419
|
-
|
2325
|
+
wiki_pop_excess_elements(parser);
|
2326
|
+
wiki_start_para_if_necessary(parser);
|
2420
2327
|
|
2421
2328
|
// scan ahead consuming PATH, PRINTABLE, ALNUM and SPECIAL_URI_CHARS tokens
|
2422
2329
|
// will cheat here and abuse the link_target capture buffer to accumulate text
|
2423
|
-
if (NIL_P(parser->link_target))
|
2424
|
-
parser->link_target = rb_str_new2("");
|
2425
2330
|
while (NEXT_TOKEN(), (type = token->type))
|
2426
2331
|
{
|
2427
2332
|
if (type == PATH || type == PRINTABLE || type == ALNUM || type == SPECIAL_URI_CHARS)
|
2428
|
-
|
2429
|
-
else if (type == IMG_END &&
|
2333
|
+
str_append(parser->link_target, token->start, TOKEN_LEN(token));
|
2334
|
+
else if (type == IMG_END && parser->link_target->len > 0)
|
2430
2335
|
{
|
2431
2336
|
// success
|
2432
|
-
|
2337
|
+
wiki_append_img(parser, parser->link_target->ptr, parser->link_target->len);
|
2433
2338
|
token = NULL;
|
2434
2339
|
break;
|
2435
2340
|
}
|
2436
2341
|
else // unexpected token or zero-length target (syntax error)
|
2437
2342
|
{
|
2438
2343
|
// rollback
|
2439
|
-
|
2440
|
-
|
2344
|
+
str_append(parser->output, literal_img_start, sizeof(literal_img_start) - 1);
|
2345
|
+
if (parser->link_target->len > 0)
|
2346
|
+
str_append(parser->output, parser->link_target->ptr, parser->link_target->len);
|
2441
2347
|
break;
|
2442
2348
|
}
|
2443
2349
|
}
|
2444
2350
|
|
2445
2351
|
// jump to top of the loop to process token we scanned during lookahead
|
2446
|
-
parser->link_target
|
2352
|
+
str_clear(parser->link_target);
|
2447
2353
|
continue;
|
2448
2354
|
}
|
2449
2355
|
break;
|
2450
2356
|
|
2451
2357
|
case CRLF:
|
2452
2358
|
i = parser->pending_crlf;
|
2453
|
-
parser->pending_crlf =
|
2454
|
-
|
2455
|
-
_Wikitext_rollback_failed_external_link(parser); // if any
|
2359
|
+
parser->pending_crlf = false;
|
2360
|
+
wiki_rollback_failed_link(parser); // if any
|
2456
2361
|
if (IN(NO_WIKI_START) || IN(PRE_START))
|
2457
2362
|
{
|
2458
2363
|
ary_clear(parser->line_buffer);
|
2459
|
-
|
2364
|
+
str_append_str(parser->output, parser->line_ending);
|
2460
2365
|
break;
|
2461
2366
|
}
|
2462
2367
|
else if (IN(PRE))
|
@@ -2464,14 +2369,14 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2464
2369
|
// beware when BLOCKQUOTE on line buffer (not line stack!) prior to CRLF, that must be end of PRE block
|
2465
2370
|
if (ary_entry(parser->line_buffer, -2) == BLOCKQUOTE)
|
2466
2371
|
// don't emit in this case
|
2467
|
-
|
2372
|
+
wiki_pop_from_stack_up_to(parser, parser->output, PRE, true);
|
2468
2373
|
else
|
2469
2374
|
{
|
2470
2375
|
if (ary_entry(parser->line_buffer, -2) == PRE)
|
2471
2376
|
{
|
2472
2377
|
// only thing on line is the PRE: emit pending line ending (if we had one)
|
2473
|
-
if (i
|
2474
|
-
|
2378
|
+
if (i)
|
2379
|
+
str_append_str(parser->output, parser->line_ending);
|
2475
2380
|
}
|
2476
2381
|
|
2477
2382
|
// clear these _before_ calling NEXT_TOKEN (NEXT_TOKEN adds to the line_buffer)
|
@@ -2483,17 +2388,17 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2483
2388
|
type = token->type;
|
2484
2389
|
if (type != BLOCKQUOTE && type != PRE)
|
2485
2390
|
// this is definitely the end of the block, so don't emit
|
2486
|
-
|
2391
|
+
wiki_pop_from_stack_up_to(parser, parser->output, PRE, true);
|
2487
2392
|
else
|
2488
2393
|
// potentially will emit
|
2489
|
-
parser->pending_crlf =
|
2394
|
+
parser->pending_crlf = true;
|
2490
2395
|
|
2491
2396
|
continue; // jump back to top of loop to handle token grabbed via lookahead
|
2492
2397
|
}
|
2493
2398
|
}
|
2494
2399
|
else
|
2495
2400
|
{
|
2496
|
-
parser->pending_crlf =
|
2401
|
+
parser->pending_crlf = true;
|
2497
2402
|
|
2498
2403
|
// count number of BLOCKQUOTE tokens in line buffer (can be zero) and pop back to that level
|
2499
2404
|
// as a side effect, this handles any open span-level elements and unclosed blocks
|
@@ -2503,7 +2408,7 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2503
2408
|
{
|
2504
2409
|
if (parser->scope->count > 0 && ary_entry(parser->scope, -1) == LI)
|
2505
2410
|
{
|
2506
|
-
parser->pending_crlf =
|
2411
|
+
parser->pending_crlf = false;
|
2507
2412
|
break;
|
2508
2413
|
}
|
2509
2414
|
|
@@ -2516,12 +2421,12 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2516
2421
|
if (NO_ITEM(ary_entry(parser->line_buffer, -2)) ||
|
2517
2422
|
(ary_entry(parser->line_buffer, -2) == BLOCKQUOTE && !IN(BLOCKQUOTE_START)))
|
2518
2423
|
// paragraph break
|
2519
|
-
parser->pending_crlf =
|
2424
|
+
parser->pending_crlf = false;
|
2520
2425
|
else
|
2521
2426
|
// not a paragraph break!
|
2522
2427
|
continue;
|
2523
2428
|
}
|
2524
|
-
|
2429
|
+
wiki_pop_from_stack(parser, NULL);
|
2525
2430
|
}
|
2526
2431
|
}
|
2527
2432
|
|
@@ -2536,31 +2441,29 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2536
2441
|
case IMG_END:
|
2537
2442
|
case LEFT_CURLY:
|
2538
2443
|
case RIGHT_CURLY:
|
2539
|
-
|
2540
|
-
|
2541
|
-
|
2542
|
-
|
2444
|
+
output = parser->capture ? parser->capture : parser->output;
|
2445
|
+
wiki_pop_excess_elements(parser);
|
2446
|
+
wiki_start_para_if_necessary(parser);
|
2447
|
+
str_append(output, token->start, TOKEN_LEN(token));
|
2543
2448
|
break;
|
2544
2449
|
|
2545
2450
|
case DEFAULT:
|
2546
|
-
|
2547
|
-
|
2548
|
-
|
2549
|
-
|
2451
|
+
output = parser->capture ? parser->capture : parser->output;
|
2452
|
+
wiki_pop_excess_elements(parser);
|
2453
|
+
wiki_start_para_if_necessary(parser);
|
2454
|
+
wiki_append_entity_from_utf32_char(output, token->code_point);
|
2550
2455
|
break;
|
2551
2456
|
|
2552
2457
|
case END_OF_FILE:
|
2553
2458
|
// special case for input like " foo\n " (see pre_spec.rb)
|
2554
2459
|
if (IN(PRE) &&
|
2555
2460
|
ary_entry(parser->line_buffer, -2) == PRE &&
|
2556
|
-
parser->pending_crlf
|
2557
|
-
|
2461
|
+
parser->pending_crlf)
|
2462
|
+
str_append(parser->output, parser->line_ending->ptr, parser->line_ending->len);
|
2558
2463
|
|
2559
2464
|
// close any open scopes on hitting EOF
|
2560
|
-
|
2561
|
-
|
2562
|
-
for (i = 0, j = parser->scope->count; i < j; i++)
|
2563
|
-
_Wikitext_pop_from_stack(parser, Qnil);
|
2465
|
+
wiki_rollback_failed_link(parser); // if any
|
2466
|
+
wiki_pop_all_from_stack(parser);
|
2564
2467
|
goto return_output; // break not enough here (want to break out of outer while loop, not inner switch statement)
|
2565
2468
|
|
2566
2469
|
default:
|
@@ -2571,5 +2474,25 @@ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
|
2571
2474
|
token = NULL;
|
2572
2475
|
} while (1);
|
2573
2476
|
return_output:
|
2574
|
-
return
|
2477
|
+
// nasty hack to avoid re-allocating our return value
|
2478
|
+
str_append(parser->output, null_str, 1); // null-terminate
|
2479
|
+
len = parser->output->len - 1; // don't count null termination
|
2480
|
+
|
2481
|
+
#if defined(RUBY_1_9_x)
|
2482
|
+
VALUE out = rb_str_buf_new(RSTRING_EMBED_LEN_MAX + 1);
|
2483
|
+
free(RSTRING_PTR(out));
|
2484
|
+
RSTRING(out)->as.heap.aux.capa = len;
|
2485
|
+
RSTRING(out)->as.heap.ptr = parser->output->ptr;
|
2486
|
+
RSTRING(out)->as.heap.len = len;
|
2487
|
+
#elif defined(RUBY_1_8_x)
|
2488
|
+
VALUE out = rb_str_new2("");
|
2489
|
+
free(RSTRING_PTR(out));
|
2490
|
+
RSTRING(out)->len = len;
|
2491
|
+
RSTRING(out)->aux.capa = len;
|
2492
|
+
RSTRING(out)->ptr = parser->output->ptr;
|
2493
|
+
#else
|
2494
|
+
#error unsupported RUBY_VERSION
|
2495
|
+
#endif
|
2496
|
+
parser->output->ptr = NULL; // don't double-free
|
2497
|
+
return out;
|
2575
2498
|
}
|