wikitext 4.0.1 → 4.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/wikitext +10 -110
- data/ext/wikitext/ary.c +116 -0
- data/ext/wikitext/ary.h +50 -0
- data/ext/wikitext/depend +32 -0
- data/ext/wikitext/parser.c +2595 -0
- data/ext/wikitext/parser.h +40 -0
- data/ext/wikitext/ruby_compat.h +34 -0
- data/ext/wikitext/str.c +109 -0
- data/ext/wikitext/str.h +64 -0
- data/ext/wikitext/token.c +125 -0
- data/ext/wikitext/token.h +117 -0
- data/ext/wikitext/wikitext.c +125 -0
- data/ext/wikitext/wikitext.h +39 -0
- data/ext/wikitext/wikitext_ragel.c +3211 -0
- data/ext/wikitext/wikitext_ragel.h +26 -0
- data/lib/wikitext/version.rb +1 -1
- metadata +17 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c61ab6467120d8def1be560fcb4604ee7c90454b
|
4
|
+
data.tar.gz: 1c7be94a73a1d038a18744775054910772a964f0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: df484b7d09e76c9b01a53cf4c8cb7b7bcea8fba56d6e7d1443e01259b3b41934e4f517a87cf0a4593ac33646c47a9b9b4c96accf4f810db83013d32a248dd3f7
|
7
|
+
data.tar.gz: 66d3a468c2d787a2ec572b24ff53e8210cc520f37fd4caab68fc3536c0640f01deaddbe59e22f892eea8f2872788ef2f50bc66bba363298c1e5cee7c655cd444
|
data/bin/wikitext
CHANGED
@@ -1,116 +1,16 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
# Copyright 2008-2013 Wincent Colaiuta. All rights reserved.
|
3
2
|
#
|
4
|
-
#
|
5
|
-
#
|
3
|
+
# This file was generated by Bundler.
|
4
|
+
#
|
5
|
+
# The application 'wikitext' is installed as part of a gem, and
|
6
|
+
# this file is here to facilitate running it.
|
6
7
|
#
|
7
|
-
# 1. Redistributions of source code must retain the above copyright notice,
|
8
|
-
# this list of conditions and the following disclaimer.
|
9
|
-
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
10
|
-
# this list of conditions and the following disclaimer in the documentation
|
11
|
-
# and/or other materials provided with the distribution.
|
12
|
-
|
13
|
-
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
14
|
-
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
15
|
-
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
16
|
-
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
17
|
-
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
18
|
-
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
19
|
-
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
20
|
-
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
21
|
-
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
22
|
-
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
23
|
-
# POSSIBILITY OF SUCH DAMAGE.
|
24
|
-
|
25
|
-
require 'wikitext'
|
26
|
-
require 'wikitext/version'
|
27
|
-
|
28
|
-
module Wikitext
|
29
|
-
module Tool
|
30
|
-
# Simple substitute for the HighLine library if it is not available.
|
31
|
-
class FakeHighLine
|
32
|
-
def color(str, _)
|
33
|
-
str
|
34
|
-
end
|
35
|
-
|
36
|
-
def output_cols
|
37
|
-
80
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
INPUT_FILES = []
|
42
|
-
|
43
|
-
def self.interactive?
|
44
|
-
STDOUT.tty? && STDIN.tty? && INPUT_FILES.empty?
|
45
|
-
end
|
46
|
-
|
47
|
-
def self.pretty_print tokens
|
48
|
-
tokens.each do |token|
|
49
|
-
puts <<-END
|
50
|
-
Token: type: #{token.token_type}
|
51
|
-
line: #{token.line_start}..#{token.line_stop} column: #{token.column_start}..#{token.column_stop}
|
52
|
-
pointer: #{token.start}..#{token.stop}
|
53
|
-
code_point: #{token.code_point}
|
54
|
-
string_value: #{token.string_value.inspect}
|
55
|
-
|
56
|
-
END
|
57
|
-
end
|
58
|
-
end
|
59
8
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
method = :tokenize
|
64
|
-
else
|
65
|
-
INPUT_FILES << arg
|
66
|
-
end
|
67
|
-
end
|
9
|
+
require 'pathname'
|
10
|
+
ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
|
11
|
+
Pathname.new(__FILE__).realpath)
|
68
12
|
|
69
|
-
|
70
|
-
|
71
|
-
require 'highline'
|
72
|
-
rescue LoadError
|
73
|
-
begin
|
74
|
-
require 'rubygems'
|
75
|
-
require 'highline'
|
76
|
-
rescue LoadError
|
77
|
-
end
|
78
|
-
end
|
79
|
-
puts "wikitext #{Wikitext::VERSION}"
|
80
|
-
highline = (defined?(HighLine) ? HighLine : FakeHighLine).new
|
81
|
-
end
|
13
|
+
require 'rubygems'
|
14
|
+
require 'bundler/setup'
|
82
15
|
|
83
|
-
|
84
|
-
if INPUT_FILES.empty?
|
85
|
-
begin
|
86
|
-
while true
|
87
|
-
puts highline.color('(Ctrl+D to process, Ctrl+C to exit)>>', :bold) if interactive?
|
88
|
-
input = STDIN.read
|
89
|
-
puts '-' * highline.output_cols if interactive?
|
90
|
-
if method == :tokenize
|
91
|
-
pretty_print parser.tokenize(input)
|
92
|
-
else
|
93
|
-
puts parser.parse(input)
|
94
|
-
end
|
95
|
-
puts '-' * highline.output_cols if interactive?
|
96
|
-
exit unless interactive?
|
97
|
-
end
|
98
|
-
rescue Interrupt
|
99
|
-
end
|
100
|
-
else # we have INPUT_FILES
|
101
|
-
exit_status = 0
|
102
|
-
INPUT_FILES.each do |file|
|
103
|
-
begin
|
104
|
-
puts parser.parse(File.new(file).read)
|
105
|
-
rescue Errno::ENOENT
|
106
|
-
STDERR.puts "error: no such file or directory: #{file}"
|
107
|
-
exit_status |= 1
|
108
|
-
rescue Errno::EACCES
|
109
|
-
STDERR.puts "error: permission denied: #{file}"
|
110
|
-
exit_status |= 2
|
111
|
-
end
|
112
|
-
end
|
113
|
-
exit exit_status
|
114
|
-
end
|
115
|
-
end # module Tool
|
116
|
-
end # module Wikitext
|
16
|
+
load Gem.bin_path('wikitext', 'wikitext')
|
data/ext/wikitext/ary.c
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
// Copyright 2008-2009 Wincent Colaiuta. All rights reserved.
|
2
|
+
//
|
3
|
+
// Redistribution and use in source and binary forms, with or without
|
4
|
+
// modification, are permitted provided that the following conditions are met:
|
5
|
+
//
|
6
|
+
// 1. Redistributions of source code must retain the above copyright notice,
|
7
|
+
// this list of conditions and the following disclaimer.
|
8
|
+
// 2. Redistributions in binary form must reproduce the above copyright notice,
|
9
|
+
// this list of conditions and the following disclaimer in the documentation
|
10
|
+
// and/or other materials provided with the distribution.
|
11
|
+
//
|
12
|
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
13
|
+
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
14
|
+
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
15
|
+
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
16
|
+
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
17
|
+
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
18
|
+
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
19
|
+
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
20
|
+
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
21
|
+
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
22
|
+
// POSSIBILITY OF SUCH DAMAGE.
|
23
|
+
|
24
|
+
#include "ary.h"
|
25
|
+
|
26
|
+
ary_t *ary_new(void)
|
27
|
+
{
|
28
|
+
ary_t *ary = ALLOC_N(ary_t, 1);
|
29
|
+
ary->count = 0;
|
30
|
+
ary->max = DEFAULT_ENTRY_COUNT;
|
31
|
+
ary->entries = ALLOC_N(int, DEFAULT_ENTRY_COUNT);
|
32
|
+
return ary;
|
33
|
+
}
|
34
|
+
|
35
|
+
int ary_entry(ary_t *ary, int idx)
|
36
|
+
{
|
37
|
+
if (idx < 0)
|
38
|
+
idx = ary->count + idx;
|
39
|
+
return (idx >= 0 && ary->count > idx) ? ary->entries[idx] : INT_MAX;
|
40
|
+
}
|
41
|
+
|
42
|
+
void ary_clear(ary_t *ary)
|
43
|
+
{
|
44
|
+
ary->count = 0;
|
45
|
+
}
|
46
|
+
|
47
|
+
int ary_pop(ary_t *ary)
|
48
|
+
{
|
49
|
+
if (ary->count > 0)
|
50
|
+
{
|
51
|
+
ary->count--;
|
52
|
+
return 1;
|
53
|
+
}
|
54
|
+
return 0;
|
55
|
+
}
|
56
|
+
|
57
|
+
void ary_push(ary_t *ary, int val)
|
58
|
+
{
|
59
|
+
if (ary->count == ary->max)
|
60
|
+
{
|
61
|
+
ary->max += DEFAULT_ENTRY_COUNT;
|
62
|
+
REALLOC_N(ary->entries, int, ary->max);
|
63
|
+
}
|
64
|
+
ary->entries[ary->count] = val;
|
65
|
+
ary->count++;
|
66
|
+
}
|
67
|
+
|
68
|
+
int ary_includes(ary_t *ary, int val)
|
69
|
+
{
|
70
|
+
for (int i = 0, max = ary->count; i < max; i++)
|
71
|
+
{
|
72
|
+
if (ary->entries[i] == val)
|
73
|
+
return 1;
|
74
|
+
}
|
75
|
+
return 0;
|
76
|
+
}
|
77
|
+
|
78
|
+
int ary_includes2(ary_t *ary, int val1, int val2)
|
79
|
+
{
|
80
|
+
for (int i = 0, max = ary->count; i < max; i++)
|
81
|
+
{
|
82
|
+
if (ary->entries[i] == val1 ||
|
83
|
+
ary->entries[i] == val2)
|
84
|
+
return 1;
|
85
|
+
}
|
86
|
+
return 0;
|
87
|
+
}
|
88
|
+
|
89
|
+
int ary_includes3(ary_t *ary, int val1, int val2, int val3)
|
90
|
+
{
|
91
|
+
for (int i = 0, max = ary->count; i < max; i++)
|
92
|
+
{
|
93
|
+
if (ary->entries[i] == val1 ||
|
94
|
+
ary->entries[i] == val2 ||
|
95
|
+
ary->entries[i] == val3)
|
96
|
+
return 1;
|
97
|
+
}
|
98
|
+
return 0;
|
99
|
+
}
|
100
|
+
|
101
|
+
int ary_count(ary_t *ary, int item)
|
102
|
+
{
|
103
|
+
int count = 0;
|
104
|
+
for (int i = 0, max = ary->count; i < max; i++)
|
105
|
+
{
|
106
|
+
if (ary->entries[i] == item)
|
107
|
+
count++;
|
108
|
+
}
|
109
|
+
return count;
|
110
|
+
}
|
111
|
+
|
112
|
+
void ary_free(ary_t *ary)
|
113
|
+
{
|
114
|
+
free(ary->entries);
|
115
|
+
free(ary);
|
116
|
+
}
|
data/ext/wikitext/ary.h
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
// Copyright 2008-2009 Wincent Colaiuta. All rights reserved.
|
2
|
+
//
|
3
|
+
// Redistribution and use in source and binary forms, with or without
|
4
|
+
// modification, are permitted provided that the following conditions are met:
|
5
|
+
//
|
6
|
+
// 1. Redistributions of source code must retain the above copyright notice,
|
7
|
+
// this list of conditions and the following disclaimer.
|
8
|
+
// 2. Redistributions in binary form must reproduce the above copyright notice,
|
9
|
+
// this list of conditions and the following disclaimer in the documentation
|
10
|
+
// and/or other materials provided with the distribution.
|
11
|
+
//
|
12
|
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
13
|
+
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
14
|
+
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
15
|
+
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
16
|
+
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
17
|
+
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
18
|
+
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
19
|
+
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
20
|
+
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
21
|
+
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
22
|
+
// POSSIBILITY OF SUCH DAMAGE.
|
23
|
+
|
24
|
+
#include "ruby_compat.h"
|
25
|
+
|
26
|
+
typedef struct
|
27
|
+
{
|
28
|
+
int count;
|
29
|
+
int max;
|
30
|
+
int *entries;
|
31
|
+
} ary_t;
|
32
|
+
|
33
|
+
// in the test suite array count goes no higher than 25 or 26
|
34
|
+
#define DEFAULT_ENTRY_COUNT 64
|
35
|
+
|
36
|
+
#define NO_ITEM(item) (item == INT_MAX)
|
37
|
+
|
38
|
+
ary_t *ary_new(void);
|
39
|
+
int ary_entry(ary_t *ary, int idx);
|
40
|
+
void ary_clear(ary_t *ary);
|
41
|
+
int ary_pop(ary_t *ary);
|
42
|
+
void ary_push(ary_t *ary, int val);
|
43
|
+
int ary_includes(ary_t *ary, int val);
|
44
|
+
int ary_includes2(ary_t *ary, int val1, int val2);
|
45
|
+
int ary_includes3(ary_t *ary, int val1, int val2, int val3);
|
46
|
+
|
47
|
+
// returns a count indicating the number of times the value appears in the collection
|
48
|
+
int ary_count(ary_t *ary, int item);
|
49
|
+
|
50
|
+
void ary_free(ary_t *ary);
|
data/ext/wikitext/depend
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
# Copyright 2008-2010 Wincent Colaiuta. All rights reserved.
|
2
|
+
#
|
3
|
+
# Redistribution and use in source and binary forms, with or without
|
4
|
+
# modification, are permitted provided that the following conditions are met:
|
5
|
+
#
|
6
|
+
# 1. Redistributions of source code must retain the above copyright notice,
|
7
|
+
# this list of conditions and the following disclaimer.
|
8
|
+
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
9
|
+
# this list of conditions and the following disclaimer in the documentation
|
10
|
+
# and/or other materials provided with the distribution.
|
11
|
+
#
|
12
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
13
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
14
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
15
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
16
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
17
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
18
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
19
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
20
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
21
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
22
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
23
|
+
|
24
|
+
# don't warn about unused params because many Ruby methods accept "self" but don't use it
|
25
|
+
CFLAGS += -std=gnu99 -Wall -Wextra -Wno-unused-parameter
|
26
|
+
|
27
|
+
ary.o : ary.c ary.h ruby_compat.h
|
28
|
+
parser.o : ary.c ary.h parser.c parser.h ruby_compat.h str.c str.h token.h wikitext.h wikitext_ragel.h
|
29
|
+
str.o : ruby_compat.h str.c str.h
|
30
|
+
token.o : ruby_compat.h token.c token.h wikitext.h
|
31
|
+
wikitext.o : parser.h ruby_compat.h token.h wikitext.c wikitext.h wikitext_ragel.h
|
32
|
+
wikitext_ragel.o : ruby_compat.h token.h wikitext.h wikitext_ragel.h wikitext_ragel.c
|
@@ -0,0 +1,2595 @@
|
|
1
|
+
// Copyright 2007-2013 Wincent Colaiuta. All rights reserved.
|
2
|
+
//
|
3
|
+
// Redistribution and use in source and binary forms, with or without
|
4
|
+
// modification, are permitted provided that the following conditions are met:
|
5
|
+
//
|
6
|
+
// 1. Redistributions of source code must retain the above copyright notice,
|
7
|
+
// this list of conditions and the following disclaimer.
|
8
|
+
// 2. Redistributions in binary form must reproduce the above copyright notice,
|
9
|
+
// this list of conditions and the following disclaimer in the documentation
|
10
|
+
// and/or other materials provided with the distribution.
|
11
|
+
//
|
12
|
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
13
|
+
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
14
|
+
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
15
|
+
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
16
|
+
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
17
|
+
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
18
|
+
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
19
|
+
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
20
|
+
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
21
|
+
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
22
|
+
// POSSIBILITY OF SUCH DAMAGE.
|
23
|
+
|
24
|
+
#include <stdbool.h>
|
25
|
+
|
26
|
+
#include "parser.h"
|
27
|
+
#include "ary.h"
|
28
|
+
#include "str.h"
|
29
|
+
#include "wikitext.h"
|
30
|
+
#include "wikitext_ragel.h"
|
31
|
+
|
32
|
+
#define IN(type) ary_includes(parser->scope, type)
|
33
|
+
#define IN_EITHER_OF(type1, type2) ary_includes2(parser->scope, type1, type2)
|
34
|
+
#define IN_ANY_OF(type1, type2, type3) ary_includes3(parser->scope, type1, type2, type3)
|
35
|
+
|
36
|
+
// output styles
|
37
|
+
enum { HTML_OUTPUT, XML_OUTPUT };
|
38
|
+
|
39
|
+
// poor man's object orientation in C:
|
40
|
+
// instead of passing around multiple parameters between functions in the parser
|
41
|
+
// we pack everything into a struct and pass around only a pointer to that
|
42
|
+
typedef struct
|
43
|
+
{
|
44
|
+
str_t *capture; // capturing to link_target, link_text, or NULL (direct to output, not capturing)
|
45
|
+
str_t *output; // for accumulating output to be returned
|
46
|
+
str_t *link_target; // short term "memory" for parsing links
|
47
|
+
str_t *link_text; // short term "memory" for parsing links
|
48
|
+
str_t *line_ending;
|
49
|
+
str_t *tabulation; // caching buffer for emitting indentation
|
50
|
+
ary_t *scope; // stack for tracking scope
|
51
|
+
ary_t *line; // stack for tracking scope as implied by current line
|
52
|
+
ary_t *line_buffer; // stack for tracking raw tokens (not scope) on current line
|
53
|
+
VALUE external_link_class; // CSS class applied to external links
|
54
|
+
VALUE external_link_rel; // rel attribute applied to external links
|
55
|
+
VALUE mailto_class; // CSS class applied to email (mailto) links
|
56
|
+
VALUE img_prefix; // path prepended when emitting img tags
|
57
|
+
int output_style; // HTML_OUTPUT (default) or XML_OUTPUT
|
58
|
+
int base_indent; // controlled by the :indent option to Wikitext::Parser#parse
|
59
|
+
int current_indent; // fluctuates according to currently nested structures
|
60
|
+
int base_heading_level;
|
61
|
+
bool pending_crlf;
|
62
|
+
bool autolink;
|
63
|
+
bool space_to_underscore;
|
64
|
+
} parser_t;
|
65
|
+
|
66
|
+
const char null_str[] = { 0 };
|
67
|
+
const char escaped_no_wiki_start[] = "<nowiki>";
|
68
|
+
const char escaped_no_wiki_end[] = "</nowiki>";
|
69
|
+
const char literal_strong_em[] = "'''''";
|
70
|
+
const char literal_strong[] = "'''";
|
71
|
+
const char literal_em[] = "''";
|
72
|
+
const char escaped_em_start[] = "<em>";
|
73
|
+
const char escaped_em_end[] = "</em>";
|
74
|
+
const char escaped_strong_start[] = "<strong>";
|
75
|
+
const char escaped_strong_end[] = "</strong>";
|
76
|
+
const char escaped_tt_start[] = "<tt>";
|
77
|
+
const char escaped_tt_end[] = "</tt>";
|
78
|
+
const char pre_start[] = "<pre>";
|
79
|
+
const char pre_end[] = "</pre>";
|
80
|
+
const char escaped_pre_start[] = "<pre>";
|
81
|
+
const char escaped_pre_end[] = "</pre>";
|
82
|
+
const char blockquote_start[] = "<blockquote>";
|
83
|
+
const char blockquote_end[] = "</blockquote>";
|
84
|
+
const char escaped_blockquote_start[] = "<blockquote>";
|
85
|
+
const char escaped_blockquote_end[] = "</blockquote>";
|
86
|
+
const char strong_em_start[] = "<strong><em>";
|
87
|
+
const char strong_start[] = "<strong>";
|
88
|
+
const char strong_end[] = "</strong>";
|
89
|
+
const char em_start[] = "<em>";
|
90
|
+
const char em_end[] = "</em>";
|
91
|
+
const char code_start[] = "<code>";
|
92
|
+
const char code_end[] = "</code>";
|
93
|
+
const char ol_start[] = "<ol>";
|
94
|
+
const char ol_end[] = "</ol>";
|
95
|
+
const char ul_start[] = "<ul>";
|
96
|
+
const char ul_end[] = "</ul>";
|
97
|
+
const char li_start[] = "<li>";
|
98
|
+
const char li_end[] = "</li>";
|
99
|
+
const char h6_start[] = "<h6>";
|
100
|
+
const char h6_end[] = "</h6>";
|
101
|
+
const char h5_start[] = "<h5>";
|
102
|
+
const char h5_end[] = "</h5>";
|
103
|
+
const char h4_start[] = "<h4>";
|
104
|
+
const char h4_end[] = "</h4>";
|
105
|
+
const char h3_start[] = "<h3>";
|
106
|
+
const char h3_end[] = "</h3>";
|
107
|
+
const char h2_start[] = "<h2>";
|
108
|
+
const char h2_end[] = "</h2>";
|
109
|
+
const char h1_start[] = "<h1>";
|
110
|
+
const char h1_end[] = "</h1>";
|
111
|
+
const char p_start[] = "<p>";
|
112
|
+
const char p_end[] = "</p>";
|
113
|
+
const char space[] = " ";
|
114
|
+
const char a_start[] = "<a href=\"";
|
115
|
+
const char a_class[] = "\" class=\"";
|
116
|
+
const char a_rel[] = "\" rel=\"";
|
117
|
+
const char a_start_close[] = "\">";
|
118
|
+
const char a_end[] = "</a>";
|
119
|
+
const char link_start[] = "[[";
|
120
|
+
const char link_end[] = "]]";
|
121
|
+
const char separator[] = "|";
|
122
|
+
const char ext_link_start[] = "[";
|
123
|
+
const char backtick[] = "`";
|
124
|
+
const char quote[] = "\"";
|
125
|
+
const char ampersand[] = "&";
|
126
|
+
const char quot_entity[] = """;
|
127
|
+
const char amp_entity[] = "&";
|
128
|
+
const char lt_entity[] = "<";
|
129
|
+
const char gt_entity[] = ">";
|
130
|
+
const char escaped_blockquote[] = "> ";
|
131
|
+
const char ext_link_end[] = "]";
|
132
|
+
const char literal_img_start[] = "{{";
|
133
|
+
const char img_start[] = "<img src=\"";
|
134
|
+
const char img_end_xml[] = "\" />";
|
135
|
+
const char img_end_html[] = "\">";
|
136
|
+
const char img_alt[] = "\" alt=\"";
|
137
|
+
const char pre_class_start[] = "<pre class=\"";
|
138
|
+
const char pre_class_end[] = "-syntax\">";
|
139
|
+
|
140
|
+
// Mark the parser struct designated by ptr as a participant in Ruby's
|
141
|
+
// mark-and-sweep garbage collection scheme. A variable named name is placed on
|
142
|
+
// the C stack to prevent the structure from being prematurely collected.
|
143
|
+
#define GC_WRAP_PARSER(ptr, name) volatile VALUE name __attribute__((unused)) = Data_Wrap_Struct(rb_cObject, 0, parser_free, ptr)
|
144
|
+
|
145
|
+
parser_t *parser_new(void)
|
146
|
+
{
|
147
|
+
parser_t *parser = ALLOC_N(parser_t, 1);
|
148
|
+
parser->capture = NULL; // not a real instance, pointer to other member's instance
|
149
|
+
parser->output = str_new();
|
150
|
+
parser->link_target = str_new();
|
151
|
+
parser->link_text = str_new();
|
152
|
+
parser->line_ending = NULL; // caller should set up
|
153
|
+
parser->tabulation = str_new();
|
154
|
+
parser->scope = ary_new();
|
155
|
+
parser->line = ary_new();
|
156
|
+
parser->line_buffer = ary_new();
|
157
|
+
parser->external_link_class = Qnil; // caller should set up
|
158
|
+
parser->external_link_rel = Qnil; // caller should set up
|
159
|
+
parser->mailto_class = Qnil; // caller should set up
|
160
|
+
parser->img_prefix = Qnil; // caller should set up
|
161
|
+
parser->output_style = HTML_OUTPUT;
|
162
|
+
parser->base_indent = 0;
|
163
|
+
parser->current_indent = 0;
|
164
|
+
parser->base_heading_level = 0;
|
165
|
+
parser->pending_crlf = false;
|
166
|
+
parser->autolink = true;
|
167
|
+
parser->space_to_underscore = true;
|
168
|
+
return parser;
|
169
|
+
}
|
170
|
+
|
171
|
+
void parser_free(parser_t *parser)
|
172
|
+
{
|
173
|
+
// we don't free parser->capture; it's just a redundant pointer
|
174
|
+
if (parser->output) str_free(parser->output);
|
175
|
+
if (parser->link_target) str_free(parser->link_target);
|
176
|
+
if (parser->link_text) str_free(parser->link_text);
|
177
|
+
if (parser->line_ending) str_free(parser->line_ending);
|
178
|
+
if (parser->tabulation) str_free(parser->tabulation);
|
179
|
+
if (parser->scope) ary_free(parser->scope);
|
180
|
+
if (parser->line) ary_free(parser->line);
|
181
|
+
if (parser->line_buffer) ary_free(parser->line_buffer);
|
182
|
+
free(parser);
|
183
|
+
}
|
184
|
+
|
185
|
+
// for testing and debugging only
|
186
|
+
VALUE Wikitext_parser_tokenize(VALUE self, VALUE string)
|
187
|
+
{
|
188
|
+
if (NIL_P(string))
|
189
|
+
return Qnil;
|
190
|
+
string = StringValue(string);
|
191
|
+
VALUE tokens = rb_ary_new();
|
192
|
+
char *p = RSTRING_PTR(string);
|
193
|
+
long len = RSTRING_LEN(string);
|
194
|
+
char *pe = p + len;
|
195
|
+
token_t token;
|
196
|
+
next_token(&token, NULL, p, pe);
|
197
|
+
rb_ary_push(tokens, wiki_token(&token));
|
198
|
+
while (token.type != END_OF_FILE)
|
199
|
+
{
|
200
|
+
next_token(&token, &token, NULL, pe);
|
201
|
+
rb_ary_push(tokens, wiki_token(&token));
|
202
|
+
}
|
203
|
+
return tokens;
|
204
|
+
}
|
205
|
+
|
206
|
+
// for benchmarking raw tokenization speed only
|
207
|
+
VALUE Wikitext_parser_benchmarking_tokenize(VALUE self, VALUE string)
|
208
|
+
{
|
209
|
+
if (NIL_P(string))
|
210
|
+
return Qnil;
|
211
|
+
string = StringValue(string);
|
212
|
+
char *p = RSTRING_PTR(string);
|
213
|
+
long len = RSTRING_LEN(string);
|
214
|
+
char *pe = p + len;
|
215
|
+
token_t token;
|
216
|
+
next_token(&token, NULL, p, pe);
|
217
|
+
while (token.type != END_OF_FILE)
|
218
|
+
next_token(&token, &token, NULL, pe);
|
219
|
+
return Qnil;
|
220
|
+
}
|
221
|
+
|
222
|
+
VALUE Wikitext_parser_fulltext_tokenize(int argc, VALUE *argv, VALUE self)
|
223
|
+
{
|
224
|
+
// process arguments
|
225
|
+
VALUE string, options;
|
226
|
+
if (rb_scan_args(argc, argv, "11", &string, &options) == 1) // 1 mandatory argument, 1 optional argument
|
227
|
+
options = Qnil;
|
228
|
+
if (NIL_P(string))
|
229
|
+
return Qnil;
|
230
|
+
string = StringValue(string);
|
231
|
+
VALUE tokens = rb_ary_new();
|
232
|
+
|
233
|
+
// check instance variables
|
234
|
+
VALUE min = rb_iv_get(self, "@minimum_fulltext_token_length");
|
235
|
+
|
236
|
+
// process options hash (can override instance variables)
|
237
|
+
if (!NIL_P(options) && TYPE(options) == T_HASH)
|
238
|
+
{
|
239
|
+
if (rb_funcall(options, rb_intern("has_key?"), 1, ID2SYM(rb_intern("minimum"))) == Qtrue)
|
240
|
+
min = rb_hash_aref(options, ID2SYM(rb_intern("minimum")));
|
241
|
+
}
|
242
|
+
int min_len = NIL_P(min) ? 3 : NUM2INT(min);
|
243
|
+
if (min_len < 0)
|
244
|
+
min_len = 0;
|
245
|
+
|
246
|
+
// set up scanner
|
247
|
+
char *p = RSTRING_PTR(string);
|
248
|
+
long len = RSTRING_LEN(string);
|
249
|
+
char *pe = p + len;
|
250
|
+
token_t token;
|
251
|
+
token_t *_token = &token;
|
252
|
+
next_token(&token, NULL, p, pe);
|
253
|
+
while (token.type != END_OF_FILE)
|
254
|
+
{
|
255
|
+
switch (token.type)
|
256
|
+
{
|
257
|
+
case URI:
|
258
|
+
case MAIL:
|
259
|
+
case ALNUM:
|
260
|
+
if (TOKEN_LEN(_token) >= min_len)
|
261
|
+
rb_ary_push(tokens, TOKEN_TEXT(_token));
|
262
|
+
break;
|
263
|
+
default:
|
264
|
+
// ignore everything else
|
265
|
+
break;
|
266
|
+
}
|
267
|
+
next_token(&token, &token, NULL, pe);
|
268
|
+
}
|
269
|
+
return tokens;
|
270
|
+
}
|
271
|
+
|
272
|
+
// we downcase "in place", overwriting the original contents of the buffer
|
273
|
+
void wiki_downcase_bang(char *ptr, long len)
|
274
|
+
{
|
275
|
+
for (long i = 0; i < len; i++)
|
276
|
+
{
|
277
|
+
if (ptr[i] >= 'A' && ptr[i] <= 'Z')
|
278
|
+
ptr[i] += 32;
|
279
|
+
}
|
280
|
+
}
|
281
|
+
|
282
|
+
void wiki_append_entity_from_utf32_char(str_t *output, uint32_t character)
|
283
|
+
{
|
284
|
+
char hex_string[8] = { '&', '#', 'x', 0, 0, 0, 0, ';' };
|
285
|
+
char scratch = (character & 0xf000) >> 12;
|
286
|
+
hex_string[3] = (scratch <= 9 ? scratch + 48 : scratch + 87);
|
287
|
+
scratch = (character & 0x0f00) >> 8;
|
288
|
+
hex_string[4] = (scratch <= 9 ? scratch + 48 : scratch + 87);
|
289
|
+
scratch = (character & 0x00f0) >> 4;
|
290
|
+
hex_string[5] = (scratch <= 9 ? scratch + 48 : scratch + 87);
|
291
|
+
scratch = character & 0x000f;
|
292
|
+
hex_string[6] = (scratch <= 9 ? scratch + 48 : scratch + 87);
|
293
|
+
str_append(output, hex_string, sizeof(hex_string));
|
294
|
+
}
|
295
|
+
|
296
|
+
// Convert a single UTF-8 codepoint to UTF-32
|
297
|
+
//
|
298
|
+
// Expects an input buffer, src, containing a UTF-8 encoded character (which
|
299
|
+
// may be multi-byte). The end of the input buffer, end, is also passed in to
|
300
|
+
// allow the detection of invalidly truncated codepoints. The number of bytes
|
301
|
+
// in the UTF-8 character (between 1 and 4) is returned by reference in
|
302
|
+
// width_out.
|
303
|
+
//
|
304
|
+
// Raises a RangeError if the supplied character is invalid UTF-8.
|
305
|
+
uint32_t wiki_utf8_to_utf32(char *src, char *end, long *width_out)
|
306
|
+
{
|
307
|
+
uint32_t dest = 0;
|
308
|
+
if ((unsigned char)src[0] <= 0x7f)
|
309
|
+
{
|
310
|
+
// ASCII
|
311
|
+
dest = src[0];
|
312
|
+
*width_out = 1;
|
313
|
+
}
|
314
|
+
else if ((src[0] & 0xe0) == 0xc0)
|
315
|
+
{
|
316
|
+
// byte starts with 110..... : this should be a two-byte sequence
|
317
|
+
if (src + 1 >= end)
|
318
|
+
// no second byte
|
319
|
+
rb_raise(eWikitextParserError, "invalid encoding: truncated byte sequence");
|
320
|
+
else if (((unsigned char)src[0] == 0xc0) ||
|
321
|
+
((unsigned char)src[0] == 0xc1))
|
322
|
+
// overlong encoding: lead byte of 110..... but code point <= 127
|
323
|
+
rb_raise(eWikitextParserError, "invalid encoding: overlong encoding");
|
324
|
+
else if ((src[1] & 0xc0) != 0x80 )
|
325
|
+
// should have second byte starting with 10......
|
326
|
+
rb_raise(eWikitextParserError, "invalid encoding: malformed byte sequence");
|
327
|
+
|
328
|
+
dest =
|
329
|
+
((uint32_t)(src[0] & 0x1f)) << 6 |
|
330
|
+
(src[1] & 0x3f);
|
331
|
+
*width_out = 2;
|
332
|
+
}
|
333
|
+
else if ((src[0] & 0xf0) == 0xe0)
|
334
|
+
{
|
335
|
+
// byte starts with 1110.... : this should be a three-byte sequence
|
336
|
+
if (src + 2 >= end)
|
337
|
+
// missing second or third byte
|
338
|
+
rb_raise(eWikitextParserError, "invalid encoding: truncated byte sequence");
|
339
|
+
else if (((src[1] & 0xc0) != 0x80 ) ||
|
340
|
+
((src[2] & 0xc0) != 0x80 ))
|
341
|
+
// should have second and third bytes starting with 10......
|
342
|
+
rb_raise(eWikitextParserError, "invalid encoding: malformed byte sequence");
|
343
|
+
|
344
|
+
dest =
|
345
|
+
((uint32_t)(src[0] & 0x0f)) << 12 |
|
346
|
+
((uint32_t)(src[1] & 0x3f)) << 6 |
|
347
|
+
(src[2] & 0x3f);
|
348
|
+
*width_out = 3;
|
349
|
+
}
|
350
|
+
else if ((src[0] & 0xf8) == 0xf0)
|
351
|
+
{
|
352
|
+
// bytes starts with 11110... : this should be a four-byte sequence
|
353
|
+
if (src + 3 >= end)
|
354
|
+
// missing second, third, or fourth byte
|
355
|
+
rb_raise(eWikitextParserError, "invalid encoding: truncated byte sequence");
|
356
|
+
else if ((unsigned char)src[0] >= 0xf5 &&
|
357
|
+
(unsigned char)src[0] <= 0xf7)
|
358
|
+
// disallowed by RFC 3629 (codepoints above 0x10ffff)
|
359
|
+
rb_raise(eWikitextParserError, "invalid encoding: overlong encoding");
|
360
|
+
else if (((src[1] & 0xc0) != 0x80 ) ||
|
361
|
+
((src[2] & 0xc0) != 0x80 ) ||
|
362
|
+
((src[3] & 0xc0) != 0x80 ))
|
363
|
+
// should have second and third bytes starting with 10......
|
364
|
+
rb_raise(eWikitextParserError, "invalid encoding: malformed byte sequence");
|
365
|
+
|
366
|
+
dest =
|
367
|
+
((uint32_t)(src[0] & 0x07)) << 18 |
|
368
|
+
((uint32_t)(src[1] & 0x3f)) << 12 |
|
369
|
+
((uint32_t)(src[1] & 0x3f)) << 6 |
|
370
|
+
(src[2] & 0x3f);
|
371
|
+
*width_out = 4;
|
372
|
+
}
|
373
|
+
else
|
374
|
+
rb_raise(eWikitextParserError, "invalid encoding: unexpected byte");
|
375
|
+
return dest;
|
376
|
+
}
|
377
|
+
|
378
|
+
// - non-printable (non-ASCII) characters converted to numeric entities
|
379
|
+
// - QUOT and AMP characters converted to named entities
|
380
|
+
// - if trim is true, leading and trailing whitespace trimmed
|
381
|
+
// - if trim is false, there is no special treatment of spaces
|
382
|
+
void wiki_append_sanitized_link_target(str_t *link_target, str_t *output, bool trim)
|
383
|
+
{
|
384
|
+
char *src = link_target->ptr;
|
385
|
+
char *start = src; // remember this so we can check if we're at the start
|
386
|
+
char *non_space = output->ptr + output->len; // remember last non-space character output
|
387
|
+
char *end = src + link_target->len;
|
388
|
+
while (src < end)
|
389
|
+
{
|
390
|
+
// need at most 8 bytes to display each input character (�)
|
391
|
+
if (output->ptr + output->len + 8 > output->ptr + output->capacity) // outgrowing buffer, must grow
|
392
|
+
{
|
393
|
+
char *old_ptr = output->ptr;
|
394
|
+
str_grow(output, output->len + (end - src) * 8); // allocate enough for worst case
|
395
|
+
if (old_ptr != output->ptr) // may have moved
|
396
|
+
non_space += output->ptr - old_ptr;
|
397
|
+
}
|
398
|
+
|
399
|
+
if (*src == '"')
|
400
|
+
{
|
401
|
+
char quot_entity_literal[] = { '&', 'q', 'u', 'o', 't', ';' }; // no trailing NUL
|
402
|
+
str_append(output, quot_entity_literal, sizeof(quot_entity_literal));
|
403
|
+
}
|
404
|
+
else if (*src == '&')
|
405
|
+
{
|
406
|
+
char amp_entity_literal[] = { '&', 'a', 'm', 'p', ';' }; // no trailing NUL
|
407
|
+
str_append(output, amp_entity_literal, sizeof(amp_entity_literal));
|
408
|
+
}
|
409
|
+
else if (*src == '<' || *src == '>')
|
410
|
+
rb_raise(rb_eRangeError, "invalid link text (\"%c\" may not appear in link text)", *src);
|
411
|
+
else if (*src == ' ' && src == start && trim)
|
412
|
+
start++; // we eat leading space
|
413
|
+
else if (*src >= 0x20 && *src <= 0x7e) // printable ASCII
|
414
|
+
{
|
415
|
+
*(output->ptr + output->len) = *src;
|
416
|
+
output->len++;
|
417
|
+
}
|
418
|
+
else // all others: must convert to entities
|
419
|
+
{
|
420
|
+
long width;
|
421
|
+
wiki_append_entity_from_utf32_char(output, wiki_utf8_to_utf32(src, end, &width));
|
422
|
+
src += width;
|
423
|
+
non_space = output->ptr + output->len;
|
424
|
+
continue;
|
425
|
+
}
|
426
|
+
if (*src != ' ')
|
427
|
+
non_space = output->ptr + output->len;
|
428
|
+
src++;
|
429
|
+
}
|
430
|
+
|
431
|
+
// trim trailing space if necessary
|
432
|
+
if (trim && output->ptr + output->len != non_space)
|
433
|
+
output->len -= (output->ptr + output->len) - non_space;
|
434
|
+
}
|
435
|
+
|
436
|
+
// prepare hyperlink and append it to parser->output
|
437
|
+
// if check_autolink is true, checks parser->autolink to decide whether to emit a real hyperlink
|
438
|
+
// or merely the literal link target
|
439
|
+
// if link_text is Qnil, the link_target is re-used for the link text
|
440
|
+
void wiki_append_hyperlink(parser_t *parser, VALUE link_prefix, str_t *link_target, str_t *link_text, VALUE link_class, VALUE link_rel, bool check_autolink)
|
441
|
+
{
|
442
|
+
if (check_autolink && !parser->autolink)
|
443
|
+
wiki_append_sanitized_link_target(link_target, parser->output, true);
|
444
|
+
else
|
445
|
+
{
|
446
|
+
str_append(parser->output, a_start, sizeof(a_start) - 1); // <a href="
|
447
|
+
if (!NIL_P(link_prefix))
|
448
|
+
str_append_string(parser->output, link_prefix);
|
449
|
+
wiki_append_sanitized_link_target(link_target, parser->output, true);
|
450
|
+
|
451
|
+
// special handling for mailto URIs
|
452
|
+
const char *mailto = "mailto:";
|
453
|
+
long mailto_len = (long)sizeof(mailto) - 1; // don't count NUL byte
|
454
|
+
if ((link_target->len >= mailto_len &&
|
455
|
+
strncmp(mailto, link_target->ptr, mailto_len) == 0) ||
|
456
|
+
(!NIL_P(link_prefix) &&
|
457
|
+
RSTRING_LEN(link_prefix) >= mailto_len &&
|
458
|
+
strncmp(mailto, RSTRING_PTR(link_prefix), mailto_len) == 0))
|
459
|
+
link_class = parser->mailto_class; // use mailto_class from parser
|
460
|
+
if (link_class != Qnil)
|
461
|
+
{
|
462
|
+
str_append(parser->output, a_class, sizeof(a_class) - 1); // " class="
|
463
|
+
str_append_string(parser->output, link_class);
|
464
|
+
}
|
465
|
+
if (link_rel != Qnil)
|
466
|
+
{
|
467
|
+
str_append(parser->output, a_rel, sizeof(a_rel) - 1); // " rel="
|
468
|
+
str_append_string(parser->output, link_rel);
|
469
|
+
}
|
470
|
+
str_append(parser->output, a_start_close, sizeof(a_start_close) - 1); // ">
|
471
|
+
if (!link_text || link_text->len == 0) // re-use link_target
|
472
|
+
wiki_append_sanitized_link_target(link_target, parser->output, true);
|
473
|
+
else
|
474
|
+
str_append_str(parser->output, link_text);
|
475
|
+
str_append(parser->output, a_end, sizeof(a_end) - 1); // </a>
|
476
|
+
}
|
477
|
+
}
|
478
|
+
|
479
|
+
void wiki_append_img(parser_t *parser, char *token_ptr, long token_len)
|
480
|
+
{
|
481
|
+
str_append(parser->output, img_start, sizeof(img_start) - 1); // <img src="
|
482
|
+
if (!NIL_P(parser->img_prefix) && *token_ptr != '/') // len always > 0
|
483
|
+
str_append_string(parser->output, parser->img_prefix);
|
484
|
+
str_append(parser->output, token_ptr, token_len);
|
485
|
+
str_append(parser->output, img_alt, sizeof(img_alt) - 1); // " alt="
|
486
|
+
str_append(parser->output, token_ptr, token_len);
|
487
|
+
if (parser->output_style == XML_OUTPUT)
|
488
|
+
str_append(parser->output, img_end_xml, sizeof(img_end_xml) - 1); // " />
|
489
|
+
else
|
490
|
+
str_append(parser->output, img_end_html, sizeof(img_end_html) - 1); // ">
|
491
|
+
}
|
492
|
+
|
493
|
+
// will emit indentation only if we are about to emit any of:
|
494
|
+
// <blockquote>, <p>, <ul>, <ol>, <li>, <h1> etc, <pre>
|
495
|
+
// each time we enter one of those spans must ++ the indentation level
|
496
|
+
void wiki_indent(parser_t *parser)
|
497
|
+
{
|
498
|
+
if (parser->base_indent == -1) // indentation disabled
|
499
|
+
return;
|
500
|
+
int space_count = parser->current_indent + parser->base_indent;
|
501
|
+
if (space_count > 0)
|
502
|
+
{
|
503
|
+
char *old_end, *new_end;
|
504
|
+
if (parser->tabulation->len < space_count)
|
505
|
+
str_grow(parser->tabulation, space_count); // reallocates if necessary
|
506
|
+
old_end = parser->tabulation->ptr + parser->tabulation->len;
|
507
|
+
new_end = parser->tabulation->ptr + space_count;
|
508
|
+
while (old_end < new_end)
|
509
|
+
*old_end++ = ' ';
|
510
|
+
if (space_count > parser->tabulation->len)
|
511
|
+
parser->tabulation->len = space_count;
|
512
|
+
str_append(parser->output, parser->tabulation->ptr, space_count);
|
513
|
+
}
|
514
|
+
parser->current_indent += 2;
|
515
|
+
}
|
516
|
+
|
517
|
+
void wiki_append_pre_start(parser_t *parser, token_t *token)
|
518
|
+
{
|
519
|
+
wiki_indent(parser);
|
520
|
+
if ((size_t)TOKEN_LEN(token) > sizeof(pre_start) - 1)
|
521
|
+
{
|
522
|
+
str_append(parser->output, pre_class_start, sizeof(pre_class_start) - 1); // <pre class="
|
523
|
+
str_append(parser->output, token->start + 11, TOKEN_LEN(token) - 13); // (the "lang" substring)
|
524
|
+
str_append(parser->output, pre_class_end, sizeof(pre_class_end) - 1); // -syntax">
|
525
|
+
}
|
526
|
+
else
|
527
|
+
str_append(parser->output, pre_start, sizeof(pre_start) - 1);
|
528
|
+
ary_push(parser->scope, PRE_START);
|
529
|
+
ary_push(parser->line, PRE_START);
|
530
|
+
}
|
531
|
+
|
532
|
+
void wiki_dedent(parser_t *parser, bool emit)
|
533
|
+
{
|
534
|
+
if (parser->base_indent == -1) // indentation disabled
|
535
|
+
return;
|
536
|
+
parser->current_indent -= 2;
|
537
|
+
if (!emit)
|
538
|
+
return;
|
539
|
+
int space_count = parser->current_indent + parser->base_indent;
|
540
|
+
if (space_count > 0)
|
541
|
+
str_append(parser->output, parser->tabulation->ptr, space_count);
|
542
|
+
}
|
543
|
+
|
544
|
+
// Pops a single item off the parser's scope stack.
|
545
|
+
// A corresponding closing tag is written to the target string.
|
546
|
+
// The target string may be the main output buffer, or a substring capturing buffer if a link is being scanned.
|
547
|
+
void wiki_pop_from_stack(parser_t *parser, str_t *target)
|
548
|
+
{
|
549
|
+
int top = ary_entry(parser->scope, -1);
|
550
|
+
if (NO_ITEM(top))
|
551
|
+
return;
|
552
|
+
if (!target)
|
553
|
+
target = parser->output;
|
554
|
+
|
555
|
+
// for headings, take base_heading_level into account
|
556
|
+
if (top >= H1_START && top <= H6_START)
|
557
|
+
{
|
558
|
+
top += parser->base_heading_level;
|
559
|
+
// no need to check for underflow (base_heading_level is never negative)
|
560
|
+
if (top > H6_START)
|
561
|
+
top = H6_START;
|
562
|
+
}
|
563
|
+
|
564
|
+
switch (top)
|
565
|
+
{
|
566
|
+
case PRE:
|
567
|
+
case PRE_START:
|
568
|
+
str_append(target, pre_end, sizeof(pre_end) - 1);
|
569
|
+
str_append_str(target, parser->line_ending);
|
570
|
+
wiki_dedent(parser, false);
|
571
|
+
break;
|
572
|
+
|
573
|
+
case BLOCKQUOTE:
|
574
|
+
case BLOCKQUOTE_START:
|
575
|
+
wiki_dedent(parser, true);
|
576
|
+
str_append(target, blockquote_end, sizeof(blockquote_end) - 1);
|
577
|
+
str_append_str(target, parser->line_ending);
|
578
|
+
break;
|
579
|
+
|
580
|
+
case NO_WIKI_START:
|
581
|
+
// not a real HTML tag; so nothing to pop
|
582
|
+
break;
|
583
|
+
|
584
|
+
case STRONG:
|
585
|
+
case STRONG_START:
|
586
|
+
str_append(target, strong_end, sizeof(strong_end) - 1);
|
587
|
+
break;
|
588
|
+
|
589
|
+
case EM:
|
590
|
+
case EM_START:
|
591
|
+
str_append(target, em_end, sizeof(em_end) - 1);
|
592
|
+
break;
|
593
|
+
|
594
|
+
case TT:
|
595
|
+
case TT_START:
|
596
|
+
str_append(target, code_end, sizeof(code_end) - 1);
|
597
|
+
break;
|
598
|
+
|
599
|
+
case OL:
|
600
|
+
wiki_dedent(parser, true);
|
601
|
+
str_append(target, ol_end, sizeof(ol_end) - 1);
|
602
|
+
str_append_str(target, parser->line_ending);
|
603
|
+
break;
|
604
|
+
|
605
|
+
case UL:
|
606
|
+
wiki_dedent(parser, true);
|
607
|
+
str_append(target, ul_end, sizeof(ul_end) - 1);
|
608
|
+
str_append_str(target, parser->line_ending);
|
609
|
+
break;
|
610
|
+
|
611
|
+
case NESTED_LIST:
|
612
|
+
// next token to pop will be a LI
|
613
|
+
// LI is an interesting token because sometimes we want it to behave like P (ie. do a non-emitting indent)
|
614
|
+
// and other times we want it to behave like BLOCKQUOTE (ie. when it has a nested list inside)
|
615
|
+
// hence this hack: we do an emitting dedent on behalf of the LI that we know must be coming
|
616
|
+
// and then when we pop the actual LI itself (below) we do the standard non-emitting indent
|
617
|
+
wiki_dedent(parser, true); // we really only want to emit the spaces
|
618
|
+
parser->current_indent += 2; // we don't want to decrement the actual indent level, so put it back
|
619
|
+
break;
|
620
|
+
|
621
|
+
case LI:
|
622
|
+
str_append(target, li_end, sizeof(li_end) - 1);
|
623
|
+
str_append_str(target, parser->line_ending);
|
624
|
+
wiki_dedent(parser, false);
|
625
|
+
break;
|
626
|
+
|
627
|
+
case H6_START:
|
628
|
+
str_append(target, h6_end, sizeof(h6_end) - 1);
|
629
|
+
str_append_str(target, parser->line_ending);
|
630
|
+
wiki_dedent(parser, false);
|
631
|
+
break;
|
632
|
+
|
633
|
+
case H5_START:
|
634
|
+
str_append(target, h5_end, sizeof(h5_end) - 1);
|
635
|
+
str_append_str(target, parser->line_ending);
|
636
|
+
wiki_dedent(parser, false);
|
637
|
+
break;
|
638
|
+
|
639
|
+
case H4_START:
|
640
|
+
str_append(target, h4_end, sizeof(h4_end) - 1);
|
641
|
+
str_append_str(target, parser->line_ending);
|
642
|
+
wiki_dedent(parser, false);
|
643
|
+
break;
|
644
|
+
|
645
|
+
case H3_START:
|
646
|
+
str_append(target, h3_end, sizeof(h3_end) - 1);
|
647
|
+
str_append_str(target, parser->line_ending);
|
648
|
+
wiki_dedent(parser, false);
|
649
|
+
break;
|
650
|
+
|
651
|
+
case H2_START:
|
652
|
+
str_append(target, h2_end, sizeof(h2_end) - 1);
|
653
|
+
str_append_str(target, parser->line_ending);
|
654
|
+
wiki_dedent(parser, false);
|
655
|
+
break;
|
656
|
+
|
657
|
+
case H1_START:
|
658
|
+
str_append(target, h1_end, sizeof(h1_end) - 1);
|
659
|
+
str_append_str(target, parser->line_ending);
|
660
|
+
wiki_dedent(parser, false);
|
661
|
+
break;
|
662
|
+
|
663
|
+
case LINK_START:
|
664
|
+
// not an HTML tag; so nothing to emit
|
665
|
+
break;
|
666
|
+
|
667
|
+
case EXT_LINK_START:
|
668
|
+
// not an HTML tag; so nothing to emit
|
669
|
+
break;
|
670
|
+
|
671
|
+
case PATH:
|
672
|
+
// not an HTML tag; so nothing to emit
|
673
|
+
break;
|
674
|
+
|
675
|
+
case SPACE:
|
676
|
+
// not an HTML tag (only used to separate an external link target from the link text); so nothing to emit
|
677
|
+
break;
|
678
|
+
|
679
|
+
case SEPARATOR:
|
680
|
+
// not an HTML tag (only used to separate an external link target from the link text); so nothing to emit
|
681
|
+
break;
|
682
|
+
|
683
|
+
case P:
|
684
|
+
str_append(target, p_end, sizeof(p_end) - 1);
|
685
|
+
str_append_str(target, parser->line_ending);
|
686
|
+
wiki_dedent(parser, false);
|
687
|
+
break;
|
688
|
+
|
689
|
+
case END_OF_FILE:
|
690
|
+
// nothing to do
|
691
|
+
break;
|
692
|
+
|
693
|
+
default:
|
694
|
+
// should probably raise an exception here
|
695
|
+
break;
|
696
|
+
}
|
697
|
+
ary_pop(parser->scope);
|
698
|
+
}
|
699
|
+
|
700
|
+
// Pops items off the top of parser's scope stack, accumulating closing tags for them into the target string, until item is reached.
|
701
|
+
// If including is true then the item itself is also popped.
|
702
|
+
// The target string may be the main output buffer, or a substring capturing buffer when scanning links.
|
703
|
+
void wiki_pop_from_stack_up_to(parser_t *parser, str_t *target, int item, bool including)
|
704
|
+
{
|
705
|
+
int continue_looping = 1;
|
706
|
+
do
|
707
|
+
{
|
708
|
+
int top = ary_entry(parser->scope, -1);
|
709
|
+
if (NO_ITEM(top))
|
710
|
+
return;
|
711
|
+
if (top == item)
|
712
|
+
{
|
713
|
+
if (!including)
|
714
|
+
return;
|
715
|
+
continue_looping = 0;
|
716
|
+
}
|
717
|
+
wiki_pop_from_stack(parser, target);
|
718
|
+
} while (continue_looping);
|
719
|
+
}
|
720
|
+
|
721
|
+
void wiki_pop_all_from_stack(parser_t *parser)
|
722
|
+
{
|
723
|
+
for (int i = 0, max = parser->scope->count; i < max; i++)
|
724
|
+
wiki_pop_from_stack(parser, NULL);
|
725
|
+
}
|
726
|
+
|
727
|
+
void wiki_start_para_if_necessary(parser_t *parser)
|
728
|
+
{
|
729
|
+
if (parser->capture)
|
730
|
+
return;
|
731
|
+
|
732
|
+
// if no block open yet, or top of stack is BLOCKQUOTE/BLOCKQUOTE_START (with nothing in it yet)
|
733
|
+
if (parser->scope->count == 0 ||
|
734
|
+
ary_entry(parser->scope, -1) == BLOCKQUOTE ||
|
735
|
+
ary_entry(parser->scope, -1) == BLOCKQUOTE_START)
|
736
|
+
{
|
737
|
+
wiki_indent(parser);
|
738
|
+
str_append(parser->output, p_start, sizeof(p_start) - 1);
|
739
|
+
ary_push(parser->scope, P);
|
740
|
+
ary_push(parser->line, P);
|
741
|
+
}
|
742
|
+
else if (parser->pending_crlf)
|
743
|
+
{
|
744
|
+
if (IN(P))
|
745
|
+
// already in a paragraph block; convert pending CRLF into a space
|
746
|
+
str_append(parser->output, space, sizeof(space) - 1);
|
747
|
+
else if (IN(PRE))
|
748
|
+
// PRE blocks can have pending CRLF too (helps us avoid emitting the trailing newline)
|
749
|
+
str_append_str(parser->output, parser->line_ending);
|
750
|
+
}
|
751
|
+
parser->pending_crlf = false;
|
752
|
+
}
|
753
|
+
|
754
|
+
void wiki_emit_pending_crlf_if_necessary(parser_t *parser)
|
755
|
+
{
|
756
|
+
if (parser->pending_crlf)
|
757
|
+
{
|
758
|
+
str_append_str(parser->output, parser->line_ending);
|
759
|
+
parser->pending_crlf = false;
|
760
|
+
}
|
761
|
+
}
|
762
|
+
|
763
|
+
// Helper function that pops any excess elements off scope (pushing is already handled in the respective rules).
|
764
|
+
// For example, given input like:
|
765
|
+
//
|
766
|
+
// > > foo
|
767
|
+
// bar
|
768
|
+
//
|
769
|
+
// Upon seeing "bar", we want to pop two BLOCKQUOTE elements from the scope.
|
770
|
+
// The reverse case (shown below) is handled from inside the BLOCKQUOTE rule itself:
|
771
|
+
//
|
772
|
+
// foo
|
773
|
+
// > > bar
|
774
|
+
//
|
775
|
+
// Things are made slightly more complicated by the fact that there is one block-level tag that can be on the scope
|
776
|
+
// but not on the line scope:
|
777
|
+
//
|
778
|
+
// <blockquote>foo
|
779
|
+
// bar</blockquote>
|
780
|
+
//
|
781
|
+
// Here on seeing "bar" we have one item on the scope (BLOCKQUOTE_START) which we don't want to pop, but we have nothing
|
782
|
+
// on the line scope.
|
783
|
+
// Luckily, BLOCKQUOTE_START tokens can only appear at the start of the scope array, so we can check for them first before
|
784
|
+
// entering the for loop.
|
785
|
+
void wiki_pop_excess_elements(parser_t *parser)
|
786
|
+
{
|
787
|
+
if (parser->capture)
|
788
|
+
return;
|
789
|
+
for (int i = parser->scope->count - ary_count(parser->scope, BLOCKQUOTE_START), j = parser->line->count; i > j; i--)
|
790
|
+
{
|
791
|
+
// special case for last item on scope
|
792
|
+
if (i - j == 1)
|
793
|
+
{
|
794
|
+
// don't auto-pop P if it is only item on scope
|
795
|
+
if (ary_entry(parser->scope, -1) == P)
|
796
|
+
{
|
797
|
+
// add P to the line scope to prevent us entering the loop at all next time around
|
798
|
+
ary_push(parser->line, P);
|
799
|
+
continue;
|
800
|
+
}
|
801
|
+
}
|
802
|
+
wiki_pop_from_stack(parser, NULL);
|
803
|
+
}
|
804
|
+
}
|
805
|
+
|
806
|
+
// trim parser->link_text in place
|
807
|
+
void wiki_trim_link_text(parser_t *parser)
|
808
|
+
{
|
809
|
+
char *src = parser->link_text->ptr;
|
810
|
+
char *start = src; // remember this so we can check if we're at the start
|
811
|
+
char *left = src;
|
812
|
+
char *non_space = src; // remember last non-space character output
|
813
|
+
char *end = src + parser->link_text->len;
|
814
|
+
while (src < end)
|
815
|
+
{
|
816
|
+
if (*src == ' ')
|
817
|
+
{
|
818
|
+
if (src == left)
|
819
|
+
left++;
|
820
|
+
}
|
821
|
+
else
|
822
|
+
non_space = src;
|
823
|
+
src++;
|
824
|
+
}
|
825
|
+
if (left != start || non_space + 1 != end)
|
826
|
+
{
|
827
|
+
// TODO: could potentially avoid this memmove by extending the str_t struct with an "offset" or "free" member
|
828
|
+
parser->link_text->len = (non_space + 1) - left;
|
829
|
+
memmove(parser->link_text->ptr, left, parser->link_text->len);
|
830
|
+
}
|
831
|
+
}
|
832
|
+
|
833
|
+
VALUE Wikitext_parser_sanitize_link_target(VALUE self, VALUE string)
|
834
|
+
{
|
835
|
+
str_t *link_target = str_new_from_string(string);
|
836
|
+
GC_WRAP_STR(link_target, link_target_gc);
|
837
|
+
str_t *output = str_new();
|
838
|
+
GC_WRAP_STR(output, output_gc);
|
839
|
+
wiki_append_sanitized_link_target(link_target, output, true);
|
840
|
+
return string_from_str(output);
|
841
|
+
}
|
842
|
+
|
843
|
+
// Encodes the parser link_target member (in-place) according to RFCs 2396 and 2718
|
844
|
+
//
|
845
|
+
// Leading and trailing whitespace trimmed. Spaces are converted to
|
846
|
+
// underscores if the parser space_to_underscore member is true.
|
847
|
+
static void wiki_encode_link_target(parser_t *parser)
|
848
|
+
{
|
849
|
+
char *src = parser->link_target->ptr;
|
850
|
+
char *start = src; // remember this so we can check if we're at the start
|
851
|
+
long len = parser->link_target->len;
|
852
|
+
if (!(len > 0))
|
853
|
+
return;
|
854
|
+
char *end = src + len;
|
855
|
+
long dest_len = len * 2;
|
856
|
+
char *dest = ALLOC_N(char, dest_len);
|
857
|
+
char *dest_ptr = dest; // hang on to this so we can pass it to free() later
|
858
|
+
char *non_space = dest; // remember last non-space character output
|
859
|
+
static char hex[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
|
860
|
+
for (; src < end; src++)
|
861
|
+
{
|
862
|
+
// worst case: a single character may grow to 3 characters once encoded
|
863
|
+
if ((dest + 3) > (dest_ptr + dest_len))
|
864
|
+
{
|
865
|
+
// outgrowing buffer, must reallocate
|
866
|
+
char *old_dest = dest;
|
867
|
+
char *old_dest_ptr = dest_ptr;
|
868
|
+
dest_len += len;
|
869
|
+
dest = realloc(dest_ptr, dest_len);
|
870
|
+
if (dest == NULL)
|
871
|
+
{
|
872
|
+
// would have used reallocf, but this has to run on Linux too, not just Darwin
|
873
|
+
free(dest_ptr);
|
874
|
+
rb_raise(rb_eNoMemError, "failed to re-allocate temporary storage (memory allocation error)");
|
875
|
+
}
|
876
|
+
dest_ptr = dest;
|
877
|
+
dest = dest_ptr + (old_dest - old_dest_ptr);
|
878
|
+
non_space = dest_ptr + (non_space - old_dest_ptr);
|
879
|
+
}
|
880
|
+
|
881
|
+
// pass through unreserved characters
|
882
|
+
if ((*src >= 'a' && *src <= 'z') ||
|
883
|
+
(*src >= 'A' && *src <= 'Z') ||
|
884
|
+
(*src >= '0' && *src <= '9') ||
|
885
|
+
*src == '-' ||
|
886
|
+
*src == '_' ||
|
887
|
+
*src == '.' ||
|
888
|
+
*src == '~')
|
889
|
+
{
|
890
|
+
*dest++ = *src;
|
891
|
+
non_space = dest;
|
892
|
+
}
|
893
|
+
else if (*src == ' ' && src == start)
|
894
|
+
start++; // we eat leading space
|
895
|
+
else if (*src == ' ' && parser->space_to_underscore)
|
896
|
+
*dest++ = '_';
|
897
|
+
else // everything else gets URL-encoded
|
898
|
+
{
|
899
|
+
*dest++ = '%';
|
900
|
+
*dest++ = hex[(unsigned char)(*src) / 16]; // left
|
901
|
+
*dest++ = hex[(unsigned char)(*src) % 16]; // right
|
902
|
+
if (*src != ' ')
|
903
|
+
non_space = dest;
|
904
|
+
}
|
905
|
+
}
|
906
|
+
|
907
|
+
// trim trailing space if necessary
|
908
|
+
if (non_space > dest_ptr && dest != non_space)
|
909
|
+
dest_len = non_space - dest_ptr;
|
910
|
+
else
|
911
|
+
dest_len = dest - dest_ptr;
|
912
|
+
str_clear(parser->link_target);
|
913
|
+
str_append(parser->link_target, dest_ptr, dest_len);
|
914
|
+
free(dest_ptr);
|
915
|
+
}
|
916
|
+
|
917
|
+
VALUE Wikitext_parser_encode_link_target(VALUE self, VALUE in)
|
918
|
+
{
|
919
|
+
parser_t parser;
|
920
|
+
parser.space_to_underscore = false;
|
921
|
+
parser.link_target = str_new_from_string(in);
|
922
|
+
GC_WRAP_STR(parser.link_target, link_target_gc);
|
923
|
+
wiki_encode_link_target(&parser);
|
924
|
+
return string_from_str(parser.link_target);
|
925
|
+
}
|
926
|
+
|
927
|
+
// returns 1 (true) if supplied string is blank (nil, empty, or all whitespace)
|
928
|
+
// returns 0 (false) otherwise
|
929
|
+
bool wiki_blank(str_t *str)
|
930
|
+
{
|
931
|
+
if (str->len == 0)
|
932
|
+
return true;
|
933
|
+
for (char *ptr = str->ptr,
|
934
|
+
*end = str->ptr + str->len;
|
935
|
+
ptr < end; ptr++)
|
936
|
+
{
|
937
|
+
if (*ptr != ' ')
|
938
|
+
return false;
|
939
|
+
}
|
940
|
+
return true;
|
941
|
+
}
|
942
|
+
|
943
|
+
void wiki_rollback_failed_internal_link(parser_t *parser)
|
944
|
+
{
|
945
|
+
if (!IN(LINK_START))
|
946
|
+
return; // nothing to do!
|
947
|
+
int scope_includes_separator = IN(SEPARATOR);
|
948
|
+
wiki_pop_from_stack_up_to(parser, NULL, LINK_START, true);
|
949
|
+
str_append(parser->output, link_start, sizeof(link_start) - 1);
|
950
|
+
if (parser->link_target->len > 0)
|
951
|
+
{
|
952
|
+
wiki_append_sanitized_link_target(parser->link_target, parser->output, false);
|
953
|
+
if (scope_includes_separator)
|
954
|
+
{
|
955
|
+
str_append(parser->output, separator, sizeof(separator) - 1);
|
956
|
+
if (parser->link_text->len > 0)
|
957
|
+
str_append_str(parser->output, parser->link_text);
|
958
|
+
}
|
959
|
+
}
|
960
|
+
parser->capture = NULL;
|
961
|
+
str_clear(parser->link_target);
|
962
|
+
str_clear(parser->link_text);
|
963
|
+
}
|
964
|
+
|
965
|
+
void wiki_rollback_failed_external_link(parser_t *parser)
|
966
|
+
{
|
967
|
+
if (!IN(EXT_LINK_START))
|
968
|
+
return; // nothing to do!
|
969
|
+
|
970
|
+
// store a couple of values before popping
|
971
|
+
int scope_includes_space = IN(SPACE);
|
972
|
+
VALUE link_class = IN(PATH) ? Qnil : parser->external_link_class;
|
973
|
+
VALUE link_rel = IN(PATH) ? Qnil : parser->external_link_rel;
|
974
|
+
wiki_pop_from_stack_up_to(parser, NULL, EXT_LINK_START, true);
|
975
|
+
|
976
|
+
str_append(parser->output, ext_link_start, sizeof(ext_link_start) - 1);
|
977
|
+
if (parser->link_target->len > 0)
|
978
|
+
{
|
979
|
+
wiki_append_hyperlink(parser, Qnil, parser->link_target, NULL, link_class, link_rel, true);
|
980
|
+
if (scope_includes_space)
|
981
|
+
{
|
982
|
+
str_append(parser->output, space, sizeof(space) - 1);
|
983
|
+
if (parser->link_text->len > 0)
|
984
|
+
str_append_str(parser->output, parser->link_text);
|
985
|
+
}
|
986
|
+
}
|
987
|
+
parser->capture = NULL;
|
988
|
+
str_clear(parser->link_target);
|
989
|
+
str_clear(parser->link_text);
|
990
|
+
}
|
991
|
+
|
992
|
+
void wiki_rollback_failed_link(parser_t *parser)
|
993
|
+
{
|
994
|
+
wiki_rollback_failed_internal_link(parser);
|
995
|
+
wiki_rollback_failed_external_link(parser);
|
996
|
+
}
|
997
|
+
|
998
|
+
VALUE Wikitext_parser_initialize(int argc, VALUE *argv, VALUE self)
|
999
|
+
{
|
1000
|
+
// process arguments
|
1001
|
+
VALUE options;
|
1002
|
+
if (rb_scan_args(argc, argv, "01", &options) == 0) // 0 mandatory arguments, 1 optional argument
|
1003
|
+
options = Qnil;
|
1004
|
+
|
1005
|
+
// defaults
|
1006
|
+
VALUE autolink = Qtrue;
|
1007
|
+
VALUE line_ending = rb_str_new2("\n");
|
1008
|
+
VALUE external_link_class = rb_str_new2("external");
|
1009
|
+
VALUE external_link_rel = Qnil;
|
1010
|
+
VALUE mailto_class = rb_str_new2("mailto");
|
1011
|
+
VALUE link_proc = Qnil;
|
1012
|
+
VALUE internal_link_prefix = rb_str_new2("/wiki/");
|
1013
|
+
VALUE img_prefix = rb_str_new2("/images/");
|
1014
|
+
VALUE output_style = ID2SYM(rb_intern("html"));
|
1015
|
+
VALUE space_to_underscore = Qtrue;
|
1016
|
+
VALUE minimum_fulltext_token_length = INT2NUM(3);
|
1017
|
+
VALUE base_heading_level = INT2NUM(0);
|
1018
|
+
|
1019
|
+
// process options hash (override defaults)
|
1020
|
+
if (!NIL_P(options) && TYPE(options) == T_HASH)
|
1021
|
+
{
|
1022
|
+
#define OVERRIDE_IF_SET(name) rb_funcall(options, rb_intern("has_key?"), 1, ID2SYM(rb_intern(#name))) == Qtrue ? \
|
1023
|
+
rb_hash_aref(options, ID2SYM(rb_intern(#name))) : name
|
1024
|
+
autolink = OVERRIDE_IF_SET(autolink);
|
1025
|
+
line_ending = OVERRIDE_IF_SET(line_ending);
|
1026
|
+
external_link_class = OVERRIDE_IF_SET(external_link_class);
|
1027
|
+
external_link_rel = OVERRIDE_IF_SET(external_link_rel);
|
1028
|
+
mailto_class = OVERRIDE_IF_SET(mailto_class);
|
1029
|
+
link_proc = OVERRIDE_IF_SET(link_proc);
|
1030
|
+
internal_link_prefix = OVERRIDE_IF_SET(internal_link_prefix);
|
1031
|
+
img_prefix = OVERRIDE_IF_SET(img_prefix);
|
1032
|
+
output_style = OVERRIDE_IF_SET(output_style);
|
1033
|
+
space_to_underscore = OVERRIDE_IF_SET(space_to_underscore);
|
1034
|
+
minimum_fulltext_token_length = OVERRIDE_IF_SET(minimum_fulltext_token_length);
|
1035
|
+
base_heading_level = OVERRIDE_IF_SET(base_heading_level);
|
1036
|
+
}
|
1037
|
+
|
1038
|
+
// no need to call super here; rb_call_super()
|
1039
|
+
rb_iv_set(self, "@autolink", autolink);
|
1040
|
+
rb_iv_set(self, "@line_ending", line_ending);
|
1041
|
+
rb_iv_set(self, "@external_link_class", external_link_class);
|
1042
|
+
rb_iv_set(self, "@external_link_rel", external_link_rel);
|
1043
|
+
rb_iv_set(self, "@mailto_class", mailto_class);
|
1044
|
+
rb_iv_set(self, "@link_proc", link_proc);
|
1045
|
+
rb_iv_set(self, "@internal_link_prefix", internal_link_prefix);
|
1046
|
+
rb_iv_set(self, "@img_prefix", img_prefix);
|
1047
|
+
rb_iv_set(self, "@output_style", output_style);
|
1048
|
+
rb_iv_set(self, "@space_to_underscore", space_to_underscore);
|
1049
|
+
rb_iv_set(self, "@minimum_fulltext_token_length", minimum_fulltext_token_length);
|
1050
|
+
rb_iv_set(self, "@base_heading_level", base_heading_level);
|
1051
|
+
return self;
|
1052
|
+
}
|
1053
|
+
|
1054
|
+
VALUE Wikitext_parser_profiling_parse(VALUE self, VALUE string)
|
1055
|
+
{
|
1056
|
+
for (int i = 0; i < 100000; i++)
|
1057
|
+
Wikitext_parser_parse(1, &string, self);
|
1058
|
+
return Qnil;
|
1059
|
+
}
|
1060
|
+
|
1061
|
+
// convert a Ruby object (:xml, :html etc) into an int output style
|
1062
|
+
int Wikitext_output_style(VALUE output)
|
1063
|
+
{
|
1064
|
+
if (TYPE(output) == T_SYMBOL)
|
1065
|
+
{
|
1066
|
+
if (SYM2ID(output) == rb_intern("xml"))
|
1067
|
+
return XML_OUTPUT;
|
1068
|
+
}
|
1069
|
+
return HTML_OUTPUT; // fall back to default
|
1070
|
+
}
|
1071
|
+
|
1072
|
+
VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
|
1073
|
+
{
|
1074
|
+
// process arguments
|
1075
|
+
VALUE string, options;
|
1076
|
+
if (rb_scan_args(argc, argv, "11", &string, &options) == 1) // 1 mandatory argument, 1 optional argument
|
1077
|
+
options = Qnil;
|
1078
|
+
if (NIL_P(string))
|
1079
|
+
return Qnil;
|
1080
|
+
string = StringValue(string);
|
1081
|
+
|
1082
|
+
// access these once per parse
|
1083
|
+
VALUE line_ending = rb_iv_get(self, "@line_ending");
|
1084
|
+
line_ending = StringValue(line_ending);
|
1085
|
+
VALUE link_class = rb_iv_get(self, "@external_link_class");
|
1086
|
+
link_class = NIL_P(link_class) ? Qnil : StringValue(link_class);
|
1087
|
+
VALUE link_rel = rb_iv_get(self, "@external_link_rel");
|
1088
|
+
link_rel = NIL_P(link_rel) ? Qnil : StringValue(link_rel);
|
1089
|
+
VALUE link_proc = rb_iv_get(self, "@link_proc");
|
1090
|
+
VALUE mailto_class = rb_iv_get(self, "@mailto_class");
|
1091
|
+
mailto_class = NIL_P(mailto_class) ? Qnil : StringValue(mailto_class);
|
1092
|
+
VALUE prefix = rb_iv_get(self, "@internal_link_prefix");
|
1093
|
+
int output_style = Wikitext_output_style(rb_iv_get(self, "@output_style"));
|
1094
|
+
|
1095
|
+
// process options hash
|
1096
|
+
int base_indent = 0;
|
1097
|
+
int base_heading_level = NUM2INT(rb_iv_get(self, "@base_heading_level"));
|
1098
|
+
if (!NIL_P(options) && TYPE(options) == T_HASH)
|
1099
|
+
{
|
1100
|
+
// :indent => 0 (or more)
|
1101
|
+
ID has_key = rb_intern("has_key?");
|
1102
|
+
ID id = ID2SYM(rb_intern("indent"));
|
1103
|
+
if (rb_funcall(options, has_key, 1, id) == Qtrue)
|
1104
|
+
{
|
1105
|
+
VALUE indent = rb_hash_aref(options, id);
|
1106
|
+
if (indent == Qfalse)
|
1107
|
+
base_indent = -1; // indentation disabled
|
1108
|
+
else
|
1109
|
+
{
|
1110
|
+
base_indent = NUM2INT(indent);
|
1111
|
+
if (base_indent < 0)
|
1112
|
+
base_indent = 0;
|
1113
|
+
}
|
1114
|
+
}
|
1115
|
+
|
1116
|
+
// :base_heading_level => 0/1/2/3/4/5/6
|
1117
|
+
id = ID2SYM(rb_intern("base_heading_level"));
|
1118
|
+
if (rb_funcall(options, has_key, 1, id) == Qtrue)
|
1119
|
+
base_heading_level = NUM2INT(rb_hash_aref(options, id));
|
1120
|
+
|
1121
|
+
// :external_link_rel => 'nofollow'
|
1122
|
+
id = ID2SYM(rb_intern("external_link_rel"));
|
1123
|
+
if (rb_funcall(options, has_key, 1, id) == Qtrue)
|
1124
|
+
{
|
1125
|
+
link_rel = rb_hash_aref(options, id);
|
1126
|
+
link_rel = NIL_P(link_rel) ? Qnil : StringValue(link_rel);
|
1127
|
+
}
|
1128
|
+
|
1129
|
+
// :output_style => :html/:xml
|
1130
|
+
id = ID2SYM(rb_intern("output_style"));
|
1131
|
+
if (rb_funcall(options, has_key, 1, id) == Qtrue)
|
1132
|
+
output_style = Wikitext_output_style(rb_hash_aref(options, id));
|
1133
|
+
|
1134
|
+
// :link_proc => lambda { |link_target| ... }
|
1135
|
+
id = ID2SYM(rb_intern("link_proc"));
|
1136
|
+
if (rb_funcall(options, has_key, 1, id) == Qtrue)
|
1137
|
+
link_proc = rb_hash_aref(options, id);
|
1138
|
+
}
|
1139
|
+
|
1140
|
+
// normalize, regardless of whether this came from instance variable or override
|
1141
|
+
if (base_heading_level < 0)
|
1142
|
+
base_heading_level = 0;
|
1143
|
+
if (base_heading_level > 6)
|
1144
|
+
base_heading_level = 6;
|
1145
|
+
|
1146
|
+
// set up scanner
|
1147
|
+
char *p = RSTRING_PTR(string);
|
1148
|
+
long len = RSTRING_LEN(string);
|
1149
|
+
char *pe = p + len;
|
1150
|
+
|
1151
|
+
// set up parser struct to make passing parameters a little easier
|
1152
|
+
parser_t *parser = parser_new();
|
1153
|
+
GC_WRAP_PARSER(parser, parser_gc);
|
1154
|
+
parser->external_link_class = link_class;
|
1155
|
+
parser->external_link_rel = link_rel;
|
1156
|
+
parser->mailto_class = mailto_class;
|
1157
|
+
parser->img_prefix = rb_iv_get(self, "@img_prefix");
|
1158
|
+
parser->autolink = rb_iv_get(self, "@autolink") == Qtrue ? true : false;
|
1159
|
+
parser->space_to_underscore = rb_iv_get(self, "@space_to_underscore") == Qtrue ? true : false;
|
1160
|
+
parser->line_ending = str_new_from_string(line_ending);
|
1161
|
+
parser->base_indent = base_indent;
|
1162
|
+
parser->base_heading_level = base_heading_level;
|
1163
|
+
parser->output_style = output_style;
|
1164
|
+
|
1165
|
+
// this simple looping design leads to a single enormous function,
|
1166
|
+
// but it's faster than doing actual recursive descent and also secure in the face of
|
1167
|
+
// malicious input that seeks to overflow the stack
|
1168
|
+
// (with "<blockquote><blockquote><blockquote>..." times by 10,000, for example)
|
1169
|
+
// given that we expect to deal with a lot of malformed input, a recursive descent design is less appropriate
|
1170
|
+
// than a straightforward looping translator like this one anyway
|
1171
|
+
token_t _token;
|
1172
|
+
_token.type = NO_TOKEN;
|
1173
|
+
token_t *token = NULL;
|
1174
|
+
do
|
1175
|
+
{
|
1176
|
+
// note that whenever we grab a token we push it into the line buffer
|
1177
|
+
// this provides us with context-sensitive "memory" of what's been seen so far on this line
|
1178
|
+
#define NEXT_TOKEN() token = &_token, next_token(token, token, NULL, pe), ary_push(parser->line_buffer, token->type)
|
1179
|
+
|
1180
|
+
// check to see if we have a token hanging around from a previous iteration of this loop
|
1181
|
+
if (token == NULL)
|
1182
|
+
{
|
1183
|
+
if (_token.type == NO_TOKEN)
|
1184
|
+
{
|
1185
|
+
// first time here (haven't started scanning yet)
|
1186
|
+
token = &_token;
|
1187
|
+
next_token(token, NULL, p, pe);
|
1188
|
+
ary_push(parser->line_buffer, token->type);
|
1189
|
+
}
|
1190
|
+
else
|
1191
|
+
// already scanning
|
1192
|
+
NEXT_TOKEN();
|
1193
|
+
}
|
1194
|
+
int type = token->type;
|
1195
|
+
|
1196
|
+
// can't declare new variables inside a switch statement, so predeclare them here
|
1197
|
+
long remove_strong = -1;
|
1198
|
+
long remove_em = -1;
|
1199
|
+
|
1200
|
+
// general purpose counters, flags and pointers
|
1201
|
+
long i = 0;
|
1202
|
+
long j = 0;
|
1203
|
+
long k = 0;
|
1204
|
+
str_t *output = NULL;
|
1205
|
+
str_t _token_str;
|
1206
|
+
str_t *token_str = &_token_str;
|
1207
|
+
|
1208
|
+
// The following giant switch statement contains cases for all the possible token types.
|
1209
|
+
// In the most basic sense we are emitting the HTML that corresponds to each token,
|
1210
|
+
// but some tokens require context information in order to decide what to output.
|
1211
|
+
// For example, does the STRONG token (''') translate to <strong> or </strong>?
|
1212
|
+
// So when looking at any given token we have three state-maintaining variables which gives us a notion of "where we are":
|
1213
|
+
//
|
1214
|
+
// - the "scope" stack (indicates what HTML DOM structures we are currently nested inside, similar to a CSS selector)
|
1215
|
+
// - the line buffer (records tokens seen so far on the current line)
|
1216
|
+
// - the line "scope" stack (indicates what the scope should be based only on what is visible on the line so far)
|
1217
|
+
//
|
1218
|
+
// Although this is fairly complicated, there is one key simplifying factor:
|
1219
|
+
// The translator continuously performs auto-correction, and this means that we always have a guarantee that the
|
1220
|
+
// scope stack (up to the current token) is valid; our translator can take this as a given.
|
1221
|
+
// Auto-correction basically consists of inserting missing tokens (preventing subsquent HTML from being messed up),
|
1222
|
+
// or converting illegal (unexpected) tokens to their plain text equivalents (providing visual feedback to Wikitext author).
|
1223
|
+
switch (type)
|
1224
|
+
{
|
1225
|
+
case PRE:
|
1226
|
+
if (IN_EITHER_OF(NO_WIKI_START, PRE_START))
|
1227
|
+
{
|
1228
|
+
str_append(parser->output, space, sizeof(space) - 1);
|
1229
|
+
break;
|
1230
|
+
}
|
1231
|
+
else if (IN(BLOCKQUOTE_START))
|
1232
|
+
{
|
1233
|
+
// this kind of nesting not allowed (to avoid user confusion)
|
1234
|
+
wiki_pop_excess_elements(parser);
|
1235
|
+
wiki_start_para_if_necessary(parser);
|
1236
|
+
output = parser->capture ? parser->capture : parser->output;
|
1237
|
+
str_append(output, space, sizeof(space) - 1);
|
1238
|
+
break;
|
1239
|
+
}
|
1240
|
+
|
1241
|
+
// count number of BLOCKQUOTE tokens in line buffer and in scope stack
|
1242
|
+
ary_push(parser->line, PRE);
|
1243
|
+
i = ary_count(parser->line, BLOCKQUOTE);
|
1244
|
+
j = ary_count(parser->scope, BLOCKQUOTE);
|
1245
|
+
if (i < j)
|
1246
|
+
{
|
1247
|
+
// must pop (reduce nesting level)
|
1248
|
+
for (i = j - i; i > 0; i--)
|
1249
|
+
wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, true);
|
1250
|
+
}
|
1251
|
+
|
1252
|
+
if (!IN(PRE))
|
1253
|
+
{
|
1254
|
+
parser->pending_crlf = false;
|
1255
|
+
wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, false);
|
1256
|
+
wiki_indent(parser);
|
1257
|
+
str_append(parser->output, pre_start, sizeof(pre_start) - 1);
|
1258
|
+
ary_push(parser->scope, PRE);
|
1259
|
+
}
|
1260
|
+
break;
|
1261
|
+
|
1262
|
+
case PRE_START:
|
1263
|
+
if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
|
1264
|
+
{
|
1265
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1266
|
+
str_append(parser->output, escaped_pre_start, sizeof(escaped_pre_start) - 1);
|
1267
|
+
}
|
1268
|
+
else if (IN(BLOCKQUOTE_START))
|
1269
|
+
{
|
1270
|
+
wiki_rollback_failed_link(parser); // if any
|
1271
|
+
wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE_START, false);
|
1272
|
+
wiki_append_pre_start(parser, token);
|
1273
|
+
}
|
1274
|
+
else if (IN(BLOCKQUOTE))
|
1275
|
+
{
|
1276
|
+
if (token->column_start == 1) // only allowed in first column
|
1277
|
+
{
|
1278
|
+
wiki_rollback_failed_link(parser); // if any
|
1279
|
+
wiki_pop_all_from_stack(parser);
|
1280
|
+
wiki_append_pre_start(parser, token);
|
1281
|
+
}
|
1282
|
+
else // PRE_START illegal here
|
1283
|
+
{
|
1284
|
+
output = parser->capture ? parser->capture : parser->output;
|
1285
|
+
wiki_pop_excess_elements(parser);
|
1286
|
+
wiki_start_para_if_necessary(parser);
|
1287
|
+
str_append(output, escaped_pre_start, sizeof(escaped_pre_start) - 1);
|
1288
|
+
}
|
1289
|
+
}
|
1290
|
+
else
|
1291
|
+
{
|
1292
|
+
wiki_rollback_failed_link(parser); // if any
|
1293
|
+
wiki_pop_from_stack_up_to(parser, NULL, P, true);
|
1294
|
+
wiki_append_pre_start(parser, token);
|
1295
|
+
}
|
1296
|
+
break;
|
1297
|
+
|
1298
|
+
case PRE_END:
|
1299
|
+
if (IN_EITHER_OF(NO_WIKI_START, PRE))
|
1300
|
+
{
|
1301
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1302
|
+
str_append(parser->output, escaped_pre_end, sizeof(escaped_pre_end) - 1);
|
1303
|
+
}
|
1304
|
+
else
|
1305
|
+
{
|
1306
|
+
if (IN(PRE_START))
|
1307
|
+
wiki_pop_from_stack_up_to(parser, parser->output, PRE_START, true);
|
1308
|
+
else
|
1309
|
+
{
|
1310
|
+
output = parser->capture ? parser->capture : parser->output;
|
1311
|
+
wiki_pop_excess_elements(parser);
|
1312
|
+
wiki_start_para_if_necessary(parser);
|
1313
|
+
str_append(output, escaped_pre_end, sizeof(escaped_pre_end) - 1);
|
1314
|
+
}
|
1315
|
+
}
|
1316
|
+
break;
|
1317
|
+
|
1318
|
+
case BLOCKQUOTE:
|
1319
|
+
if (IN_EITHER_OF(NO_WIKI_START, PRE_START))
|
1320
|
+
// no need to check for <pre>; can never appear inside it
|
1321
|
+
str_append(parser->output, escaped_blockquote, TOKEN_LEN(token) + 3); // will either emit ">" or "> "
|
1322
|
+
else if (IN(BLOCKQUOTE_START))
|
1323
|
+
{
|
1324
|
+
// this kind of nesting not allowed (to avoid user confusion)
|
1325
|
+
wiki_pop_excess_elements(parser);
|
1326
|
+
wiki_start_para_if_necessary(parser);
|
1327
|
+
output = parser->capture ? parser->capture : parser->output;
|
1328
|
+
str_append(output, escaped_blockquote, TOKEN_LEN(token) + 3); // will either emit ">" or "> "
|
1329
|
+
break;
|
1330
|
+
}
|
1331
|
+
else
|
1332
|
+
{
|
1333
|
+
ary_push(parser->line, BLOCKQUOTE);
|
1334
|
+
|
1335
|
+
// count number of BLOCKQUOTE tokens in line buffer and in scope stack
|
1336
|
+
i = ary_count(parser->line, BLOCKQUOTE);
|
1337
|
+
j = ary_count(parser->scope, BLOCKQUOTE);
|
1338
|
+
|
1339
|
+
// given that BLOCKQUOTE tokens can be nested, peek ahead and see if there are any more which might affect the decision to push or pop
|
1340
|
+
while (NEXT_TOKEN(), (token->type == BLOCKQUOTE))
|
1341
|
+
{
|
1342
|
+
ary_push(parser->line, BLOCKQUOTE);
|
1343
|
+
i++;
|
1344
|
+
}
|
1345
|
+
|
1346
|
+
// now decide whether to push, pop or do nothing
|
1347
|
+
if (i > j)
|
1348
|
+
{
|
1349
|
+
// must push (increase nesting level)
|
1350
|
+
wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, false);
|
1351
|
+
for (i = i - j; i > 0; i--)
|
1352
|
+
{
|
1353
|
+
wiki_indent(parser);
|
1354
|
+
str_append(parser->output, blockquote_start, sizeof(blockquote_start) - 1);
|
1355
|
+
str_append_str(parser->output, parser->line_ending);
|
1356
|
+
ary_push(parser->scope, BLOCKQUOTE);
|
1357
|
+
}
|
1358
|
+
}
|
1359
|
+
else if (i < j)
|
1360
|
+
{
|
1361
|
+
// must pop (reduce nesting level)
|
1362
|
+
for (i = j - i; i > 0; i--)
|
1363
|
+
wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, true);
|
1364
|
+
}
|
1365
|
+
|
1366
|
+
// jump to top of the loop to process token we scanned during lookahead
|
1367
|
+
continue;
|
1368
|
+
}
|
1369
|
+
break;
|
1370
|
+
|
1371
|
+
case BLOCKQUOTE_START:
|
1372
|
+
if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
|
1373
|
+
{
|
1374
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1375
|
+
str_append(parser->output, escaped_blockquote_start, sizeof(escaped_blockquote_start) - 1);
|
1376
|
+
}
|
1377
|
+
else if (IN(BLOCKQUOTE_START))
|
1378
|
+
{
|
1379
|
+
// nesting is fine here
|
1380
|
+
wiki_rollback_failed_link(parser); // if any
|
1381
|
+
wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE_START, false);
|
1382
|
+
wiki_indent(parser);
|
1383
|
+
str_append(parser->output, blockquote_start, sizeof(blockquote_start) - 1);
|
1384
|
+
str_append_str(parser->output, parser->line_ending);
|
1385
|
+
ary_push(parser->scope, BLOCKQUOTE_START);
|
1386
|
+
ary_push(parser->line, BLOCKQUOTE_START);
|
1387
|
+
}
|
1388
|
+
else if (IN(BLOCKQUOTE))
|
1389
|
+
{
|
1390
|
+
if (token->column_start == 1) // only allowed in first column
|
1391
|
+
{
|
1392
|
+
wiki_rollback_failed_link(parser); // if any
|
1393
|
+
wiki_pop_all_from_stack(parser);
|
1394
|
+
wiki_indent(parser);
|
1395
|
+
str_append(parser->output, blockquote_start, sizeof(blockquote_start) - 1);
|
1396
|
+
str_append_str(parser->output, parser->line_ending);
|
1397
|
+
ary_push(parser->scope, BLOCKQUOTE_START);
|
1398
|
+
ary_push(parser->line, BLOCKQUOTE_START);
|
1399
|
+
}
|
1400
|
+
else // BLOCKQUOTE_START illegal here
|
1401
|
+
{
|
1402
|
+
output = parser->capture ? parser->capture : parser->output;
|
1403
|
+
wiki_pop_excess_elements(parser);
|
1404
|
+
wiki_start_para_if_necessary(parser);
|
1405
|
+
str_append(output, escaped_blockquote_start, sizeof(escaped_blockquote_start) - 1);
|
1406
|
+
}
|
1407
|
+
}
|
1408
|
+
else
|
1409
|
+
{
|
1410
|
+
// would be nice to eliminate the repetition here but it's probably the clearest way
|
1411
|
+
wiki_rollback_failed_link(parser); // if any
|
1412
|
+
wiki_pop_from_stack_up_to(parser, NULL, P, true);
|
1413
|
+
wiki_indent(parser);
|
1414
|
+
str_append(parser->output, blockquote_start, sizeof(blockquote_start) - 1);
|
1415
|
+
str_append_str(parser->output, parser->line_ending);
|
1416
|
+
ary_push(parser->scope, BLOCKQUOTE_START);
|
1417
|
+
ary_push(parser->line, BLOCKQUOTE_START);
|
1418
|
+
}
|
1419
|
+
break;
|
1420
|
+
|
1421
|
+
case BLOCKQUOTE_END:
|
1422
|
+
if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
|
1423
|
+
{
|
1424
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1425
|
+
str_append(parser->output, escaped_blockquote_end, sizeof(escaped_blockquote_end) - 1);
|
1426
|
+
}
|
1427
|
+
else
|
1428
|
+
{
|
1429
|
+
if (IN(BLOCKQUOTE_START))
|
1430
|
+
wiki_pop_from_stack_up_to(parser, parser->output, BLOCKQUOTE_START, true);
|
1431
|
+
else
|
1432
|
+
{
|
1433
|
+
output = parser->capture ? parser->capture : parser->output;
|
1434
|
+
wiki_pop_excess_elements(parser);
|
1435
|
+
wiki_start_para_if_necessary(parser);
|
1436
|
+
str_append(output, escaped_blockquote_end, sizeof(escaped_blockquote_end) - 1);
|
1437
|
+
}
|
1438
|
+
}
|
1439
|
+
break;
|
1440
|
+
|
1441
|
+
case NO_WIKI_START:
|
1442
|
+
if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
|
1443
|
+
{
|
1444
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1445
|
+
str_append(parser->output, escaped_no_wiki_start, sizeof(escaped_no_wiki_start) - 1);
|
1446
|
+
}
|
1447
|
+
else
|
1448
|
+
{
|
1449
|
+
wiki_pop_excess_elements(parser);
|
1450
|
+
wiki_start_para_if_necessary(parser);
|
1451
|
+
ary_push(parser->scope, NO_WIKI_START);
|
1452
|
+
ary_push(parser->line, NO_WIKI_START);
|
1453
|
+
}
|
1454
|
+
break;
|
1455
|
+
|
1456
|
+
case NO_WIKI_END:
|
1457
|
+
if (IN(NO_WIKI_START))
|
1458
|
+
// <nowiki> should always only ever be the last item in the stack, but use the helper routine just in case
|
1459
|
+
wiki_pop_from_stack_up_to(parser, NULL, NO_WIKI_START, true);
|
1460
|
+
else
|
1461
|
+
{
|
1462
|
+
wiki_pop_excess_elements(parser);
|
1463
|
+
wiki_start_para_if_necessary(parser);
|
1464
|
+
str_append(parser->output, escaped_no_wiki_end, sizeof(escaped_no_wiki_end) - 1);
|
1465
|
+
}
|
1466
|
+
break;
|
1467
|
+
|
1468
|
+
case STRONG_EM:
|
1469
|
+
if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
|
1470
|
+
{
|
1471
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1472
|
+
str_append(parser->output, literal_strong_em, sizeof(literal_strong_em) - 1);
|
1473
|
+
break;
|
1474
|
+
}
|
1475
|
+
|
1476
|
+
output = parser->capture ? parser->capture : parser->output;
|
1477
|
+
wiki_pop_excess_elements(parser);
|
1478
|
+
|
1479
|
+
// if you've seen STRONG/STRONG_START or EM/EM_START, must close them in the reverse order that you saw them!
|
1480
|
+
// otherwise, must open them
|
1481
|
+
remove_strong = -1;
|
1482
|
+
remove_em = -1;
|
1483
|
+
j = parser->scope->count;
|
1484
|
+
for (j = j - 1; j >= 0; j--)
|
1485
|
+
{
|
1486
|
+
int val = ary_entry(parser->scope, (int)j);
|
1487
|
+
if (val == STRONG || val == STRONG_START)
|
1488
|
+
{
|
1489
|
+
str_append(output, strong_end, sizeof(strong_end) - 1);
|
1490
|
+
remove_strong = j;
|
1491
|
+
}
|
1492
|
+
else if (val == EM || val == EM_START)
|
1493
|
+
{
|
1494
|
+
str_append(output, em_end, sizeof(em_end) - 1);
|
1495
|
+
remove_em = j;
|
1496
|
+
}
|
1497
|
+
}
|
1498
|
+
|
1499
|
+
if (remove_strong > remove_em) // must remove strong first
|
1500
|
+
{
|
1501
|
+
ary_pop(parser->scope);
|
1502
|
+
if (remove_em > -1)
|
1503
|
+
ary_pop(parser->scope);
|
1504
|
+
else // there was no em to remove!, so consider this an opening em tag
|
1505
|
+
{
|
1506
|
+
str_append(output, em_start, sizeof(em_start) - 1);
|
1507
|
+
ary_push(parser->scope, EM);
|
1508
|
+
ary_push(parser->line, EM);
|
1509
|
+
}
|
1510
|
+
}
|
1511
|
+
else if (remove_em > remove_strong) // must remove em first
|
1512
|
+
{
|
1513
|
+
ary_pop(parser->scope);
|
1514
|
+
if (remove_strong > -1)
|
1515
|
+
ary_pop(parser->scope);
|
1516
|
+
else // there was no strong to remove!, so consider this an opening strong tag
|
1517
|
+
{
|
1518
|
+
str_append(output, strong_start, sizeof(strong_start) - 1);
|
1519
|
+
ary_push(parser->scope, STRONG);
|
1520
|
+
ary_push(parser->line, STRONG);
|
1521
|
+
}
|
1522
|
+
}
|
1523
|
+
else // no strong or em to remove, so this must be a new opening of both
|
1524
|
+
{
|
1525
|
+
wiki_start_para_if_necessary(parser);
|
1526
|
+
str_append(output, strong_em_start, sizeof(strong_em_start) - 1);
|
1527
|
+
ary_push(parser->scope, STRONG);
|
1528
|
+
ary_push(parser->line, STRONG);
|
1529
|
+
ary_push(parser->scope, EM);
|
1530
|
+
ary_push(parser->line, EM);
|
1531
|
+
}
|
1532
|
+
break;
|
1533
|
+
|
1534
|
+
case STRONG:
|
1535
|
+
if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
|
1536
|
+
{
|
1537
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1538
|
+
str_append(parser->output, literal_strong, sizeof(literal_strong) - 1);
|
1539
|
+
}
|
1540
|
+
else
|
1541
|
+
{
|
1542
|
+
output = parser->capture ? parser->capture : parser->output;
|
1543
|
+
if (IN(STRONG_START))
|
1544
|
+
// already in span started with <strong>, no choice but to emit this literally
|
1545
|
+
str_append(output, literal_strong, sizeof(literal_strong) - 1);
|
1546
|
+
else if (IN(STRONG))
|
1547
|
+
// STRONG already seen, this is a closing tag
|
1548
|
+
wiki_pop_from_stack_up_to(parser, output, STRONG, true);
|
1549
|
+
else
|
1550
|
+
{
|
1551
|
+
// this is a new opening
|
1552
|
+
wiki_pop_excess_elements(parser);
|
1553
|
+
wiki_start_para_if_necessary(parser);
|
1554
|
+
str_append(output, strong_start, sizeof(strong_start) - 1);
|
1555
|
+
ary_push(parser->scope, STRONG);
|
1556
|
+
ary_push(parser->line, STRONG);
|
1557
|
+
}
|
1558
|
+
}
|
1559
|
+
break;
|
1560
|
+
|
1561
|
+
case STRONG_START:
|
1562
|
+
if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
|
1563
|
+
{
|
1564
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1565
|
+
str_append(parser->output, escaped_strong_start, sizeof(escaped_strong_start) - 1);
|
1566
|
+
}
|
1567
|
+
else
|
1568
|
+
{
|
1569
|
+
output = parser->capture ? parser->capture : parser->output;
|
1570
|
+
if (IN_EITHER_OF(STRONG_START, STRONG))
|
1571
|
+
str_append(output, escaped_strong_start, sizeof(escaped_strong_start) - 1);
|
1572
|
+
else
|
1573
|
+
{
|
1574
|
+
wiki_pop_excess_elements(parser);
|
1575
|
+
wiki_start_para_if_necessary(parser);
|
1576
|
+
str_append(output, strong_start, sizeof(strong_start) - 1);
|
1577
|
+
ary_push(parser->scope, STRONG_START);
|
1578
|
+
ary_push(parser->line, STRONG_START);
|
1579
|
+
}
|
1580
|
+
}
|
1581
|
+
break;
|
1582
|
+
|
1583
|
+
case STRONG_END:
|
1584
|
+
if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
|
1585
|
+
{
|
1586
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1587
|
+
str_append(parser->output, escaped_strong_end, sizeof(escaped_strong_end) - 1);
|
1588
|
+
}
|
1589
|
+
else
|
1590
|
+
{
|
1591
|
+
output = parser->capture ? parser->capture : parser->output;
|
1592
|
+
if (IN(STRONG_START))
|
1593
|
+
wiki_pop_from_stack_up_to(parser, output, STRONG_START, true);
|
1594
|
+
else
|
1595
|
+
{
|
1596
|
+
// no STRONG_START in scope, so must interpret the STRONG_END without any special meaning
|
1597
|
+
wiki_pop_excess_elements(parser);
|
1598
|
+
wiki_start_para_if_necessary(parser);
|
1599
|
+
str_append(output, escaped_strong_end, sizeof(escaped_strong_end) - 1);
|
1600
|
+
}
|
1601
|
+
}
|
1602
|
+
break;
|
1603
|
+
|
1604
|
+
case EM:
|
1605
|
+
if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
|
1606
|
+
{
|
1607
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1608
|
+
str_append(parser->output, literal_em, sizeof(literal_em) - 1);
|
1609
|
+
}
|
1610
|
+
else
|
1611
|
+
{
|
1612
|
+
output = parser->capture ? parser->capture : parser->output;
|
1613
|
+
if (IN(EM_START))
|
1614
|
+
// already in span started with <em>, no choice but to emit this literally
|
1615
|
+
str_append(output, literal_em, sizeof(literal_em) - 1);
|
1616
|
+
else if (IN(EM))
|
1617
|
+
// EM already seen, this is a closing tag
|
1618
|
+
wiki_pop_from_stack_up_to(parser, output, EM, true);
|
1619
|
+
else
|
1620
|
+
{
|
1621
|
+
// this is a new opening
|
1622
|
+
wiki_pop_excess_elements(parser);
|
1623
|
+
wiki_start_para_if_necessary(parser);
|
1624
|
+
str_append(output, em_start, sizeof(em_start) - 1);
|
1625
|
+
ary_push(parser->scope, EM);
|
1626
|
+
ary_push(parser->line, EM);
|
1627
|
+
}
|
1628
|
+
}
|
1629
|
+
break;
|
1630
|
+
|
1631
|
+
case EM_START:
|
1632
|
+
if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
|
1633
|
+
{
|
1634
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1635
|
+
str_append(parser->output, escaped_em_start, sizeof(escaped_em_start) - 1);
|
1636
|
+
}
|
1637
|
+
else
|
1638
|
+
{
|
1639
|
+
output = parser->capture ? parser->capture : parser->output;
|
1640
|
+
if (IN_EITHER_OF(EM_START, EM))
|
1641
|
+
str_append(output, escaped_em_start, sizeof(escaped_em_start) - 1);
|
1642
|
+
else
|
1643
|
+
{
|
1644
|
+
wiki_pop_excess_elements(parser);
|
1645
|
+
wiki_start_para_if_necessary(parser);
|
1646
|
+
str_append(output, em_start, sizeof(em_start) - 1);
|
1647
|
+
ary_push(parser->scope, EM_START);
|
1648
|
+
ary_push(parser->line, EM_START);
|
1649
|
+
}
|
1650
|
+
}
|
1651
|
+
break;
|
1652
|
+
|
1653
|
+
case EM_END:
|
1654
|
+
if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
|
1655
|
+
{
|
1656
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1657
|
+
str_append(parser->output, escaped_em_end, sizeof(escaped_em_end) - 1);
|
1658
|
+
}
|
1659
|
+
else
|
1660
|
+
{
|
1661
|
+
output = parser->capture ? parser->capture : parser->output;
|
1662
|
+
if (IN(EM_START))
|
1663
|
+
wiki_pop_from_stack_up_to(parser, output, EM_START, true);
|
1664
|
+
else
|
1665
|
+
{
|
1666
|
+
// no EM_START in scope, so must interpret the EM_END without any special meaning
|
1667
|
+
wiki_pop_excess_elements(parser);
|
1668
|
+
wiki_start_para_if_necessary(parser);
|
1669
|
+
str_append(output, escaped_em_end, sizeof(escaped_em_end) - 1);
|
1670
|
+
}
|
1671
|
+
}
|
1672
|
+
break;
|
1673
|
+
|
1674
|
+
case TT:
|
1675
|
+
if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
|
1676
|
+
{
|
1677
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1678
|
+
str_append(parser->output, backtick, sizeof(backtick) - 1);
|
1679
|
+
}
|
1680
|
+
else
|
1681
|
+
{
|
1682
|
+
output = parser->capture ? parser->capture : parser->output;
|
1683
|
+
if (IN(TT_START))
|
1684
|
+
// already in span started with <tt>, no choice but to emit this literally
|
1685
|
+
str_append(output, backtick, sizeof(backtick) - 1);
|
1686
|
+
else if (IN(TT))
|
1687
|
+
// TT (`) already seen, this is a closing tag
|
1688
|
+
wiki_pop_from_stack_up_to(parser, output, TT, true);
|
1689
|
+
else
|
1690
|
+
{
|
1691
|
+
// this is a new opening
|
1692
|
+
wiki_pop_excess_elements(parser);
|
1693
|
+
wiki_start_para_if_necessary(parser);
|
1694
|
+
str_append(output, code_start, sizeof(code_start) - 1);
|
1695
|
+
ary_push(parser->scope, TT);
|
1696
|
+
ary_push(parser->line, TT);
|
1697
|
+
}
|
1698
|
+
}
|
1699
|
+
break;
|
1700
|
+
|
1701
|
+
case TT_START:
|
1702
|
+
if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
|
1703
|
+
{
|
1704
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1705
|
+
str_append(parser->output, escaped_tt_start, sizeof(escaped_tt_start) - 1);
|
1706
|
+
}
|
1707
|
+
else
|
1708
|
+
{
|
1709
|
+
output = parser->capture ? parser->capture : parser->output;
|
1710
|
+
if (IN_EITHER_OF(TT_START, TT))
|
1711
|
+
str_append(output, escaped_tt_start, sizeof(escaped_tt_start) - 1);
|
1712
|
+
else
|
1713
|
+
{
|
1714
|
+
wiki_pop_excess_elements(parser);
|
1715
|
+
wiki_start_para_if_necessary(parser);
|
1716
|
+
str_append(output, code_start, sizeof(code_start) - 1);
|
1717
|
+
ary_push(parser->scope, TT_START);
|
1718
|
+
ary_push(parser->line, TT_START);
|
1719
|
+
}
|
1720
|
+
}
|
1721
|
+
break;
|
1722
|
+
|
1723
|
+
case TT_END:
|
1724
|
+
if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
|
1725
|
+
{
|
1726
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1727
|
+
str_append(parser->output, escaped_tt_end, sizeof(escaped_tt_end) - 1);
|
1728
|
+
}
|
1729
|
+
else
|
1730
|
+
{
|
1731
|
+
output = parser->capture ? parser->capture : parser->output;
|
1732
|
+
if (IN(TT_START))
|
1733
|
+
wiki_pop_from_stack_up_to(parser, output, TT_START, true);
|
1734
|
+
else
|
1735
|
+
{
|
1736
|
+
// no TT_START in scope, so must interpret the TT_END without any special meaning
|
1737
|
+
wiki_pop_excess_elements(parser);
|
1738
|
+
wiki_start_para_if_necessary(parser);
|
1739
|
+
str_append(output, escaped_tt_end, sizeof(escaped_tt_end) - 1);
|
1740
|
+
}
|
1741
|
+
}
|
1742
|
+
break;
|
1743
|
+
|
1744
|
+
case OL:
|
1745
|
+
case UL:
|
1746
|
+
if (IN_EITHER_OF(NO_WIKI_START, PRE_START))
|
1747
|
+
{
|
1748
|
+
// no need to check for PRE; can never appear inside it
|
1749
|
+
str_append(parser->output, token->start, TOKEN_LEN(token));
|
1750
|
+
break;
|
1751
|
+
}
|
1752
|
+
|
1753
|
+
// count number of tokens in line and scope stacks
|
1754
|
+
int bq_count = ary_count(parser->scope, BLOCKQUOTE_START);
|
1755
|
+
i = parser->line->count - ary_count(parser->line, BLOCKQUOTE_START);
|
1756
|
+
j = parser->scope->count - bq_count;
|
1757
|
+
k = i;
|
1758
|
+
|
1759
|
+
// list tokens can be nested so look ahead for any more which might affect the decision to push or pop
|
1760
|
+
for (;;)
|
1761
|
+
{
|
1762
|
+
type = token->type;
|
1763
|
+
if (type == OL || type == UL)
|
1764
|
+
{
|
1765
|
+
token = NULL;
|
1766
|
+
if (i - k >= 2) // already seen at least one OL or UL
|
1767
|
+
{
|
1768
|
+
ary_push(parser->line, NESTED_LIST); // which means this is a nested list
|
1769
|
+
i += 3;
|
1770
|
+
}
|
1771
|
+
else
|
1772
|
+
i += 2;
|
1773
|
+
ary_push(parser->line, type);
|
1774
|
+
ary_push(parser->line, LI);
|
1775
|
+
|
1776
|
+
// want to compare line with scope but can only do so if scope has enough items on it
|
1777
|
+
if (j >= i)
|
1778
|
+
{
|
1779
|
+
if (ary_entry(parser->scope, (int)(i + bq_count - 2)) == type &&
|
1780
|
+
ary_entry(parser->scope, (int)(i + bq_count - 1)) == LI)
|
1781
|
+
{
|
1782
|
+
// line and scope match at this point: do nothing yet
|
1783
|
+
}
|
1784
|
+
else
|
1785
|
+
{
|
1786
|
+
// item just pushed onto line does not match corresponding slot of scope!
|
1787
|
+
for (; j >= i - 2; j--)
|
1788
|
+
// must pop back before emitting
|
1789
|
+
wiki_pop_from_stack(parser, NULL);
|
1790
|
+
|
1791
|
+
// will emit UL or OL, then LI
|
1792
|
+
break;
|
1793
|
+
}
|
1794
|
+
}
|
1795
|
+
else // line stack size now exceeds scope stack size: must increase nesting level
|
1796
|
+
break; // will emit UL or OL, then LI
|
1797
|
+
}
|
1798
|
+
else
|
1799
|
+
{
|
1800
|
+
// not a OL or UL token!
|
1801
|
+
if (j == i)
|
1802
|
+
// must close existing LI and re-open new one
|
1803
|
+
wiki_pop_from_stack(parser, NULL);
|
1804
|
+
else if (j > i)
|
1805
|
+
{
|
1806
|
+
// item just pushed onto line does not match corresponding slot of scope!
|
1807
|
+
for (; j >= i; j--)
|
1808
|
+
// must pop back before emitting
|
1809
|
+
wiki_pop_from_stack(parser, NULL);
|
1810
|
+
}
|
1811
|
+
break;
|
1812
|
+
}
|
1813
|
+
NEXT_TOKEN();
|
1814
|
+
}
|
1815
|
+
|
1816
|
+
// will emit
|
1817
|
+
if (type == OL || type == UL)
|
1818
|
+
{
|
1819
|
+
// if LI is at the top of a stack this is the start of a nested list
|
1820
|
+
if (j > 0 && ary_entry(parser->scope, -1) == LI)
|
1821
|
+
{
|
1822
|
+
// so we should precede it with a CRLF, and indicate that it's a nested list
|
1823
|
+
str_append(parser->output, parser->line_ending->ptr, parser->line_ending->len);
|
1824
|
+
ary_push(parser->scope, NESTED_LIST);
|
1825
|
+
}
|
1826
|
+
else
|
1827
|
+
{
|
1828
|
+
// this is a new list
|
1829
|
+
if (IN(BLOCKQUOTE_START))
|
1830
|
+
wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE_START, false);
|
1831
|
+
else
|
1832
|
+
wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, false);
|
1833
|
+
}
|
1834
|
+
|
1835
|
+
// emit
|
1836
|
+
wiki_indent(parser);
|
1837
|
+
if (type == OL)
|
1838
|
+
str_append(parser->output, ol_start, sizeof(ol_start) - 1);
|
1839
|
+
else if (type == UL)
|
1840
|
+
str_append(parser->output, ul_start, sizeof(ul_start) - 1);
|
1841
|
+
ary_push(parser->scope, type);
|
1842
|
+
str_append(parser->output, parser->line_ending->ptr, parser->line_ending->len);
|
1843
|
+
}
|
1844
|
+
else if (type == SPACE)
|
1845
|
+
// silently throw away the optional SPACE token after final list marker
|
1846
|
+
token = NULL;
|
1847
|
+
|
1848
|
+
wiki_indent(parser);
|
1849
|
+
str_append(parser->output, li_start, sizeof(li_start) - 1);
|
1850
|
+
ary_push(parser->scope, LI);
|
1851
|
+
|
1852
|
+
// any subsequent UL or OL tokens on this line are syntax errors and must be emitted literally
|
1853
|
+
if (type == OL || type == UL)
|
1854
|
+
{
|
1855
|
+
k = 0;
|
1856
|
+
while (k++, NEXT_TOKEN(), (type = token->type))
|
1857
|
+
{
|
1858
|
+
if (type == OL || type == UL)
|
1859
|
+
str_append(parser->output, token->start, TOKEN_LEN(token));
|
1860
|
+
else if (type == SPACE && k == 1)
|
1861
|
+
{
|
1862
|
+
// silently throw away the optional SPACE token after final list marker
|
1863
|
+
token = NULL;
|
1864
|
+
break;
|
1865
|
+
}
|
1866
|
+
else
|
1867
|
+
break;
|
1868
|
+
}
|
1869
|
+
}
|
1870
|
+
|
1871
|
+
// jump to top of the loop to process token we scanned during lookahead
|
1872
|
+
continue;
|
1873
|
+
|
1874
|
+
case H6_START:
|
1875
|
+
case H5_START:
|
1876
|
+
case H4_START:
|
1877
|
+
case H3_START:
|
1878
|
+
case H2_START:
|
1879
|
+
case H1_START:
|
1880
|
+
if (IN_EITHER_OF(NO_WIKI_START, PRE_START))
|
1881
|
+
{
|
1882
|
+
// no need to check for PRE; can never appear inside it
|
1883
|
+
str_append(parser->output, token->start, TOKEN_LEN(token));
|
1884
|
+
break;
|
1885
|
+
}
|
1886
|
+
|
1887
|
+
// pop up to but not including the last BLOCKQUOTE on the scope stack
|
1888
|
+
if (IN(BLOCKQUOTE_START))
|
1889
|
+
wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE_START, false);
|
1890
|
+
else
|
1891
|
+
wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, false);
|
1892
|
+
|
1893
|
+
// count number of BLOCKQUOTE tokens in line buffer and in scope stack
|
1894
|
+
ary_push(parser->line, type);
|
1895
|
+
i = ary_count(parser->line, BLOCKQUOTE);
|
1896
|
+
j = ary_count(parser->scope, BLOCKQUOTE);
|
1897
|
+
|
1898
|
+
// decide whether we need to pop off excess BLOCKQUOTE tokens (will never need to push; that is handled above in the BLOCKQUOTE case itself)
|
1899
|
+
if (i < j)
|
1900
|
+
{
|
1901
|
+
// must pop (reduce nesting level)
|
1902
|
+
for (i = j - i; i > 0; i--)
|
1903
|
+
wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, true);
|
1904
|
+
}
|
1905
|
+
|
1906
|
+
// discard any whitespace here (so that "== foo ==" will be translated to "<h2>foo</h2>" rather than "<h2> foo </h2")
|
1907
|
+
while (NEXT_TOKEN(), (token->type == SPACE))
|
1908
|
+
; // discard
|
1909
|
+
|
1910
|
+
ary_push(parser->scope, type);
|
1911
|
+
wiki_indent(parser);
|
1912
|
+
|
1913
|
+
// take base_heading_level into account
|
1914
|
+
type += base_heading_level;
|
1915
|
+
if (type > H6_START) // no need to check for underflow (base_heading_level never negative)
|
1916
|
+
type = H6_START;
|
1917
|
+
|
1918
|
+
// rather than repeat all that code for each kind of heading, share it and use a conditional here
|
1919
|
+
if (type == H6_START)
|
1920
|
+
str_append(parser->output, h6_start, sizeof(h6_start) - 1);
|
1921
|
+
else if (type == H5_START)
|
1922
|
+
str_append(parser->output, h5_start, sizeof(h5_start) - 1);
|
1923
|
+
else if (type == H4_START)
|
1924
|
+
str_append(parser->output, h4_start, sizeof(h4_start) - 1);
|
1925
|
+
else if (type == H3_START)
|
1926
|
+
str_append(parser->output, h3_start, sizeof(h3_start) - 1);
|
1927
|
+
else if (type == H2_START)
|
1928
|
+
str_append(parser->output, h2_start, sizeof(h2_start) - 1);
|
1929
|
+
else if (type == H1_START)
|
1930
|
+
str_append(parser->output, h1_start, sizeof(h1_start) - 1);
|
1931
|
+
|
1932
|
+
// jump to top of the loop to process token we scanned during lookahead
|
1933
|
+
continue;
|
1934
|
+
|
1935
|
+
case H6_END:
|
1936
|
+
case H5_END:
|
1937
|
+
case H4_END:
|
1938
|
+
case H3_END:
|
1939
|
+
case H2_END:
|
1940
|
+
case H1_END:
|
1941
|
+
if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
|
1942
|
+
{
|
1943
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1944
|
+
str_append(parser->output, token->start, TOKEN_LEN(token));
|
1945
|
+
}
|
1946
|
+
else
|
1947
|
+
{
|
1948
|
+
wiki_rollback_failed_external_link(parser); // if any
|
1949
|
+
if ((type == H6_END && !IN(H6_START)) ||
|
1950
|
+
(type == H5_END && !IN(H5_START)) ||
|
1951
|
+
(type == H4_END && !IN(H4_START)) ||
|
1952
|
+
(type == H3_END && !IN(H3_START)) ||
|
1953
|
+
(type == H2_END && !IN(H2_START)) ||
|
1954
|
+
(type == H1_END && !IN(H1_START)))
|
1955
|
+
{
|
1956
|
+
// literal output only if not in appropriate scope (we stay silent in that case)
|
1957
|
+
wiki_start_para_if_necessary(parser);
|
1958
|
+
str_append(parser->output, token->start, TOKEN_LEN(token));
|
1959
|
+
}
|
1960
|
+
}
|
1961
|
+
break;
|
1962
|
+
|
1963
|
+
case MAIL:
|
1964
|
+
if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
|
1965
|
+
{
|
1966
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
1967
|
+
str_append(parser->output, token->start, TOKEN_LEN(token));
|
1968
|
+
}
|
1969
|
+
else if (IN(EXT_LINK_START))
|
1970
|
+
// must be capturing and this must be part of the link text
|
1971
|
+
str_append(parser->capture, token->start, TOKEN_LEN(token));
|
1972
|
+
else
|
1973
|
+
{
|
1974
|
+
wiki_pop_excess_elements(parser);
|
1975
|
+
wiki_start_para_if_necessary(parser);
|
1976
|
+
token_str->ptr = token->start;
|
1977
|
+
token_str->len = TOKEN_LEN(token);
|
1978
|
+
wiki_append_hyperlink(parser, rb_str_new2("mailto:"), token_str, NULL, mailto_class, Qnil, true);
|
1979
|
+
}
|
1980
|
+
break;
|
1981
|
+
|
1982
|
+
case URI:
|
1983
|
+
if (IN(NO_WIKI_START))
|
1984
|
+
{
|
1985
|
+
// user can temporarily suppress autolinking by using <nowiki></nowiki>
|
1986
|
+
// note that unlike MediaWiki, we do allow autolinking inside PRE blocks
|
1987
|
+
token_str->ptr = token->start;
|
1988
|
+
token_str->len = TOKEN_LEN(token);
|
1989
|
+
wiki_append_sanitized_link_target(token_str, parser->output, false);
|
1990
|
+
}
|
1991
|
+
else if (IN(LINK_START))
|
1992
|
+
{
|
1993
|
+
// if the URI were allowed it would have been handled already in LINK_START
|
1994
|
+
wiki_rollback_failed_internal_link(parser);
|
1995
|
+
token_str->ptr = token->start;
|
1996
|
+
token_str->len = TOKEN_LEN(token);
|
1997
|
+
wiki_append_hyperlink(parser, Qnil, token_str, NULL, parser->external_link_class, parser->external_link_rel, true);
|
1998
|
+
}
|
1999
|
+
else if (IN(EXT_LINK_START))
|
2000
|
+
{
|
2001
|
+
if (parser->link_target->len == 0)
|
2002
|
+
{
|
2003
|
+
// this must be our link target: look ahead to make sure we see the space we're expecting to see
|
2004
|
+
token_str->ptr = token->start;
|
2005
|
+
token_str->len = TOKEN_LEN(token);
|
2006
|
+
NEXT_TOKEN();
|
2007
|
+
if (token->type == SPACE)
|
2008
|
+
{
|
2009
|
+
ary_push(parser->scope, SPACE);
|
2010
|
+
str_append_str(parser->link_target, token_str);
|
2011
|
+
str_clear(parser->link_text);
|
2012
|
+
parser->capture = parser->link_text;
|
2013
|
+
token = NULL; // silently consume space
|
2014
|
+
}
|
2015
|
+
else
|
2016
|
+
{
|
2017
|
+
// didn't see the space! this must be an error
|
2018
|
+
wiki_pop_from_stack(parser, NULL);
|
2019
|
+
wiki_pop_excess_elements(parser);
|
2020
|
+
wiki_start_para_if_necessary(parser);
|
2021
|
+
str_append(parser->output, ext_link_start, sizeof(ext_link_start) - 1);
|
2022
|
+
wiki_append_hyperlink(parser, Qnil, token_str, NULL, parser->external_link_class, parser->external_link_rel, true);
|
2023
|
+
continue;
|
2024
|
+
}
|
2025
|
+
}
|
2026
|
+
else
|
2027
|
+
{
|
2028
|
+
token_str->ptr = token->start;
|
2029
|
+
token_str->len = TOKEN_LEN(token);
|
2030
|
+
wiki_append_sanitized_link_target(token_str, parser->link_text, false);
|
2031
|
+
}
|
2032
|
+
}
|
2033
|
+
else
|
2034
|
+
{
|
2035
|
+
wiki_pop_excess_elements(parser);
|
2036
|
+
wiki_start_para_if_necessary(parser);
|
2037
|
+
token_str->ptr = token->start;
|
2038
|
+
token_str->len = TOKEN_LEN(token);
|
2039
|
+
wiki_append_hyperlink(parser, Qnil, token_str, NULL, parser->external_link_class, parser->external_link_rel, true);
|
2040
|
+
}
|
2041
|
+
break;
|
2042
|
+
|
2043
|
+
case PATH:
|
2044
|
+
if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
|
2045
|
+
{
|
2046
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
2047
|
+
str_append(parser->output, token->start, TOKEN_LEN(token));
|
2048
|
+
}
|
2049
|
+
else if (IN(EXT_LINK_START))
|
2050
|
+
{
|
2051
|
+
if (parser->link_target->len == 0)
|
2052
|
+
{
|
2053
|
+
// this must be our link target: look ahead to make sure we see the space we're expecting to see
|
2054
|
+
token_str->ptr = token->start;
|
2055
|
+
token_str->len = TOKEN_LEN(token);
|
2056
|
+
NEXT_TOKEN();
|
2057
|
+
if (token->type == SPACE)
|
2058
|
+
{
|
2059
|
+
ary_push(parser->scope, PATH);
|
2060
|
+
ary_push(parser->scope, SPACE);
|
2061
|
+
str_append_str(parser->link_target, token_str);
|
2062
|
+
str_clear(parser->link_text);
|
2063
|
+
parser->capture = parser->link_text;
|
2064
|
+
token = NULL; // silently consume space
|
2065
|
+
}
|
2066
|
+
else
|
2067
|
+
{
|
2068
|
+
// didn't see the space! this must be an error
|
2069
|
+
wiki_pop_from_stack(parser, NULL);
|
2070
|
+
wiki_pop_excess_elements(parser);
|
2071
|
+
wiki_start_para_if_necessary(parser);
|
2072
|
+
str_append(parser->output, ext_link_start, sizeof(ext_link_start) - 1);
|
2073
|
+
str_append_str(parser->output, token_str);
|
2074
|
+
continue;
|
2075
|
+
}
|
2076
|
+
}
|
2077
|
+
else
|
2078
|
+
str_append(parser->link_text, token->start, TOKEN_LEN(token));
|
2079
|
+
}
|
2080
|
+
else
|
2081
|
+
{
|
2082
|
+
output = parser->capture ? parser->capture : parser->output;
|
2083
|
+
wiki_pop_excess_elements(parser);
|
2084
|
+
wiki_start_para_if_necessary(parser);
|
2085
|
+
str_append(output, token->start, TOKEN_LEN(token));
|
2086
|
+
}
|
2087
|
+
break;
|
2088
|
+
|
2089
|
+
// internal links (links to other wiki articles) look like this:
|
2090
|
+
// [[another article]] (would point at, for example, "/wiki/another_article")
|
2091
|
+
// [[the other article|the link text we'll use for it]]
|
2092
|
+
// [[the other article | the link text we'll use for it]]
|
2093
|
+
// MediaWiki has strict requirements about what it will accept as a link target:
|
2094
|
+
// all wikitext markup is disallowed:
|
2095
|
+
// example [[foo ''bar'' baz]]
|
2096
|
+
// renders [[foo <em>bar</em> baz]] (ie. not a link)
|
2097
|
+
// example [[foo <em>bar</em> baz]]
|
2098
|
+
// renders [[foo <em>bar</em> baz]] (ie. not a link)
|
2099
|
+
// example [[foo <nowiki>''</nowiki> baz]]
|
2100
|
+
// renders [[foo '' baz]] (ie. not a link)
|
2101
|
+
// example [[foo <bar> baz]]
|
2102
|
+
// renders [[foo <bar> baz]] (ie. not a link)
|
2103
|
+
// HTML entities and non-ASCII, however, make it through:
|
2104
|
+
// example [[foo €]]
|
2105
|
+
// renders <a href="/wiki/Foo_%E2%82%AC">foo €</a>
|
2106
|
+
// example [[foo €]]
|
2107
|
+
// renders <a href="/wiki/Foo_%E2%82%AC">foo €</a>
|
2108
|
+
// we'll impose similar restrictions here for the link target; allowed tokens will be:
|
2109
|
+
// SPACE, SPECIAL_URI_CHARS, PRINTABLE, PATH, ALNUM, DEFAULT, QUOT and AMP
|
2110
|
+
// everything else will be rejected
|
2111
|
+
case LINK_START:
|
2112
|
+
output = parser->capture ? parser->capture : parser->output;
|
2113
|
+
if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
|
2114
|
+
{
|
2115
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
2116
|
+
str_append(output, link_start, sizeof(link_start) - 1);
|
2117
|
+
}
|
2118
|
+
else if (IN(EXT_LINK_START))
|
2119
|
+
// already in external link scope! (and in fact, must be capturing link_text right now)
|
2120
|
+
str_append(output, link_start, sizeof(link_start) - 1);
|
2121
|
+
else if (IN(LINK_START))
|
2122
|
+
{
|
2123
|
+
// already in internal link scope! this is a syntax error
|
2124
|
+
wiki_rollback_failed_internal_link(parser);
|
2125
|
+
str_append(parser->output, link_start, sizeof(link_start) - 1);
|
2126
|
+
}
|
2127
|
+
else if (IN(SEPARATOR))
|
2128
|
+
{
|
2129
|
+
// scanning internal link text
|
2130
|
+
}
|
2131
|
+
else // not in internal link scope yet
|
2132
|
+
{
|
2133
|
+
// will either emit a link, or the rollback of a failed link, so start the para now
|
2134
|
+
wiki_pop_excess_elements(parser);
|
2135
|
+
wiki_start_para_if_necessary(parser);
|
2136
|
+
ary_push(parser->scope, LINK_START);
|
2137
|
+
|
2138
|
+
// look ahead and try to gobble up link target
|
2139
|
+
while (NEXT_TOKEN(), (type = token->type))
|
2140
|
+
{
|
2141
|
+
if (type == SPACE ||
|
2142
|
+
type == SPECIAL_URI_CHARS ||
|
2143
|
+
type == PATH ||
|
2144
|
+
type == PRINTABLE ||
|
2145
|
+
type == ALNUM ||
|
2146
|
+
type == DEFAULT ||
|
2147
|
+
type == QUOT ||
|
2148
|
+
type == QUOT_ENTITY ||
|
2149
|
+
type == AMP ||
|
2150
|
+
type == AMP_ENTITY ||
|
2151
|
+
type == IMG_START ||
|
2152
|
+
type == IMG_END ||
|
2153
|
+
type == LEFT_CURLY ||
|
2154
|
+
type == RIGHT_CURLY)
|
2155
|
+
{
|
2156
|
+
// accumulate these tokens into link_target
|
2157
|
+
if (parser->link_target->len == 0)
|
2158
|
+
{
|
2159
|
+
str_clear(parser->link_target);
|
2160
|
+
parser->capture = parser->link_target;
|
2161
|
+
}
|
2162
|
+
if (type == QUOT_ENTITY)
|
2163
|
+
// don't insert the entity, insert the literal quote
|
2164
|
+
str_append(parser->link_target, quote, sizeof(quote) - 1);
|
2165
|
+
else if (type == AMP_ENTITY)
|
2166
|
+
// don't insert the entity, insert the literal ampersand
|
2167
|
+
str_append(parser->link_target, ampersand, sizeof(ampersand) - 1);
|
2168
|
+
else
|
2169
|
+
str_append(parser->link_target, token->start, TOKEN_LEN(token));
|
2170
|
+
}
|
2171
|
+
else if (type == LINK_END)
|
2172
|
+
{
|
2173
|
+
if (parser->link_target->len == 0) // bail for inputs like "[[]]"
|
2174
|
+
wiki_rollback_failed_internal_link(parser);
|
2175
|
+
break; // jump back to top of loop (will handle this in LINK_END case below)
|
2176
|
+
}
|
2177
|
+
else if (type == SEPARATOR)
|
2178
|
+
{
|
2179
|
+
if (parser->link_target->len == 0) // bail for inputs like "[[|"
|
2180
|
+
wiki_rollback_failed_internal_link(parser);
|
2181
|
+
else
|
2182
|
+
{
|
2183
|
+
ary_push(parser->scope, SEPARATOR);
|
2184
|
+
str_clear(parser->link_text);
|
2185
|
+
parser->capture = parser->link_text;
|
2186
|
+
token = NULL;
|
2187
|
+
}
|
2188
|
+
break;
|
2189
|
+
}
|
2190
|
+
else // unexpected token (syntax error)
|
2191
|
+
{
|
2192
|
+
wiki_rollback_failed_internal_link(parser);
|
2193
|
+
break; // jump back to top of loop to handle unexpected token
|
2194
|
+
}
|
2195
|
+
}
|
2196
|
+
|
2197
|
+
// jump to top of the loop to process token we scanned during lookahead (if any)
|
2198
|
+
continue;
|
2199
|
+
}
|
2200
|
+
break;
|
2201
|
+
|
2202
|
+
case LINK_END:
|
2203
|
+
output = parser->capture ? parser->capture : parser->output;
|
2204
|
+
if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
|
2205
|
+
{
|
2206
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
2207
|
+
str_append(output, link_end, sizeof(link_end) - 1);
|
2208
|
+
}
|
2209
|
+
else if (IN(EXT_LINK_START))
|
2210
|
+
// already in external link scope! (and in fact, must be capturing link_text right now)
|
2211
|
+
str_append(output, link_end, sizeof(link_end) - 1);
|
2212
|
+
else if (IN(LINK_START)) // in internal link scope!
|
2213
|
+
{
|
2214
|
+
if (wiki_blank(parser->link_target))
|
2215
|
+
{
|
2216
|
+
// special case for inputs like "[[ ]]"
|
2217
|
+
wiki_rollback_failed_internal_link(parser);
|
2218
|
+
str_append(parser->output, link_end, sizeof(link_end) - 1);
|
2219
|
+
break;
|
2220
|
+
}
|
2221
|
+
if (parser->link_text->len == 0 ||
|
2222
|
+
wiki_blank(parser->link_text))
|
2223
|
+
{
|
2224
|
+
// use link target as link text
|
2225
|
+
str_clear(parser->link_text);
|
2226
|
+
wiki_append_sanitized_link_target(parser->link_target, parser->link_text, true);
|
2227
|
+
}
|
2228
|
+
else
|
2229
|
+
wiki_trim_link_text(parser);
|
2230
|
+
|
2231
|
+
// perform "redlink" check before manipulating link_target
|
2232
|
+
if (NIL_P(link_proc))
|
2233
|
+
j = Qnil;
|
2234
|
+
else
|
2235
|
+
{
|
2236
|
+
j = rb_funcall(link_proc, rb_intern("call"), 1, string_from_str(parser->link_target));
|
2237
|
+
if (!NIL_P(j))
|
2238
|
+
{
|
2239
|
+
VALUE l = j; // can't cast inside StringValue macro
|
2240
|
+
j = StringValue(l);
|
2241
|
+
}
|
2242
|
+
}
|
2243
|
+
wiki_encode_link_target(parser);
|
2244
|
+
wiki_pop_from_stack_up_to(parser, output, LINK_START, true);
|
2245
|
+
parser->capture = NULL;
|
2246
|
+
wiki_append_hyperlink(parser, prefix, parser->link_target, parser->link_text, j, Qnil, false);
|
2247
|
+
str_clear(parser->link_target);
|
2248
|
+
str_clear(parser->link_text);
|
2249
|
+
}
|
2250
|
+
else // wasn't in internal link scope
|
2251
|
+
{
|
2252
|
+
wiki_pop_excess_elements(parser);
|
2253
|
+
wiki_start_para_if_necessary(parser);
|
2254
|
+
str_append(output, link_end, sizeof(link_end) - 1);
|
2255
|
+
}
|
2256
|
+
break;
|
2257
|
+
|
2258
|
+
// external links look like this:
|
2259
|
+
// [http://google.com/ the link text]
|
2260
|
+
// [/other/page/on/site see this page]
|
2261
|
+
// strings in square brackets which don't match this syntax get passed through literally; eg:
|
2262
|
+
// he was very angery [sic] about the turn of events
|
2263
|
+
case EXT_LINK_START:
|
2264
|
+
output = parser->capture ? parser->capture : parser->output;
|
2265
|
+
if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
|
2266
|
+
{
|
2267
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
2268
|
+
str_append(output, ext_link_start, sizeof(ext_link_start) - 1);
|
2269
|
+
}
|
2270
|
+
else if (IN(EXT_LINK_START))
|
2271
|
+
// already in external link scope! (and in fact, must be capturing link_text right now)
|
2272
|
+
str_append(output, ext_link_start, sizeof(ext_link_start) - 1);
|
2273
|
+
else if (IN(LINK_START))
|
2274
|
+
{
|
2275
|
+
// already in internal link scope!
|
2276
|
+
if (parser->link_target->len == 0 || !IN(SPACE))
|
2277
|
+
str_append(parser->link_target, ext_link_start, sizeof(ext_link_start) - 1);
|
2278
|
+
else // link target has already been scanned
|
2279
|
+
str_append(parser->link_text, ext_link_start, sizeof(ext_link_start) - 1);
|
2280
|
+
}
|
2281
|
+
else // not in external link scope yet
|
2282
|
+
{
|
2283
|
+
// will either emit a link, or the rollback of a failed link, so start the para now
|
2284
|
+
wiki_pop_excess_elements(parser);
|
2285
|
+
wiki_start_para_if_necessary(parser);
|
2286
|
+
|
2287
|
+
// look ahead: expect an absolute URI (with protocol) or "relative" (path) URI
|
2288
|
+
NEXT_TOKEN();
|
2289
|
+
if (token->type == URI || token->type == PATH)
|
2290
|
+
ary_push(parser->scope, EXT_LINK_START); // so far so good, jump back to the top of the loop
|
2291
|
+
else
|
2292
|
+
// only get here if there was a syntax error (missing URI)
|
2293
|
+
str_append(parser->output, ext_link_start, sizeof(ext_link_start) - 1);
|
2294
|
+
continue; // jump back to top of loop to handle token (either URI or whatever it is)
|
2295
|
+
}
|
2296
|
+
break;
|
2297
|
+
|
2298
|
+
case EXT_LINK_END:
|
2299
|
+
output = parser->capture ? parser->capture : parser->output;
|
2300
|
+
if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
|
2301
|
+
{
|
2302
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
2303
|
+
str_append(output, ext_link_end, sizeof(ext_link_end) - 1);
|
2304
|
+
}
|
2305
|
+
else if (IN(EXT_LINK_START))
|
2306
|
+
{
|
2307
|
+
if (parser->link_text->len == 0)
|
2308
|
+
// syntax error: external link with no link text
|
2309
|
+
wiki_rollback_failed_external_link(parser);
|
2310
|
+
else
|
2311
|
+
{
|
2312
|
+
// success!
|
2313
|
+
j = IN(PATH) ? Qnil : parser->external_link_class;
|
2314
|
+
k = IN(PATH) ? Qnil : parser->external_link_rel;
|
2315
|
+
wiki_pop_from_stack_up_to(parser, output, EXT_LINK_START, true);
|
2316
|
+
parser->capture = NULL;
|
2317
|
+
wiki_append_hyperlink(parser, Qnil, parser->link_target, parser->link_text, j, k, false);
|
2318
|
+
}
|
2319
|
+
str_clear(parser->link_target);
|
2320
|
+
str_clear(parser->link_text);
|
2321
|
+
}
|
2322
|
+
else
|
2323
|
+
{
|
2324
|
+
wiki_pop_excess_elements(parser);
|
2325
|
+
wiki_start_para_if_necessary(parser);
|
2326
|
+
str_append(parser->output, ext_link_end, sizeof(ext_link_end) - 1);
|
2327
|
+
}
|
2328
|
+
break;
|
2329
|
+
|
2330
|
+
case SEPARATOR:
|
2331
|
+
output = parser->capture ? parser->capture : parser->output;
|
2332
|
+
wiki_pop_excess_elements(parser);
|
2333
|
+
wiki_start_para_if_necessary(parser);
|
2334
|
+
str_append(output, separator, sizeof(separator) - 1);
|
2335
|
+
break;
|
2336
|
+
|
2337
|
+
case SPACE:
|
2338
|
+
output = parser->capture ? parser->capture : parser->output;
|
2339
|
+
if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
|
2340
|
+
{
|
2341
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
2342
|
+
str_append(output, token->start, TOKEN_LEN(token));
|
2343
|
+
}
|
2344
|
+
else
|
2345
|
+
{
|
2346
|
+
// peek ahead to see next token
|
2347
|
+
char *token_ptr = token->start;
|
2348
|
+
long token_len = TOKEN_LEN(token);
|
2349
|
+
NEXT_TOKEN();
|
2350
|
+
type = token->type;
|
2351
|
+
if ((type == H6_END && IN(H6_START)) ||
|
2352
|
+
(type == H5_END && IN(H5_START)) ||
|
2353
|
+
(type == H4_END && IN(H4_START)) ||
|
2354
|
+
(type == H3_END && IN(H3_START)) ||
|
2355
|
+
(type == H2_END && IN(H2_START)) ||
|
2356
|
+
(type == H1_END && IN(H1_START)))
|
2357
|
+
{
|
2358
|
+
// will suppress emission of space (discard) if next token is a H6_END, H5_END etc and we are in the corresponding scope
|
2359
|
+
}
|
2360
|
+
else
|
2361
|
+
{
|
2362
|
+
// emit the space
|
2363
|
+
wiki_pop_excess_elements(parser);
|
2364
|
+
wiki_start_para_if_necessary(parser);
|
2365
|
+
str_append(output, token_ptr, token_len);
|
2366
|
+
}
|
2367
|
+
|
2368
|
+
// jump to top of the loop to process token we scanned during lookahead
|
2369
|
+
continue;
|
2370
|
+
}
|
2371
|
+
break;
|
2372
|
+
|
2373
|
+
case QUOT_ENTITY:
|
2374
|
+
case AMP_ENTITY:
|
2375
|
+
case NAMED_ENTITY:
|
2376
|
+
case DECIMAL_ENTITY:
|
2377
|
+
// pass these through unaltered as they are case sensitive
|
2378
|
+
output = parser->capture ? parser->capture : parser->output;
|
2379
|
+
wiki_pop_excess_elements(parser);
|
2380
|
+
wiki_start_para_if_necessary(parser);
|
2381
|
+
str_append(output, token->start, TOKEN_LEN(token));
|
2382
|
+
break;
|
2383
|
+
|
2384
|
+
case HEX_ENTITY:
|
2385
|
+
// normalize hex entities (downcase them)
|
2386
|
+
output = parser->capture ? parser->capture : parser->output;
|
2387
|
+
wiki_pop_excess_elements(parser);
|
2388
|
+
wiki_start_para_if_necessary(parser);
|
2389
|
+
str_append(output, token->start, TOKEN_LEN(token));
|
2390
|
+
wiki_downcase_bang(output->ptr + output->len - TOKEN_LEN(token), TOKEN_LEN(token));
|
2391
|
+
break;
|
2392
|
+
|
2393
|
+
case QUOT:
|
2394
|
+
output = parser->capture ? parser->capture : parser->output;
|
2395
|
+
wiki_pop_excess_elements(parser);
|
2396
|
+
wiki_start_para_if_necessary(parser);
|
2397
|
+
str_append(output, quot_entity, sizeof(quot_entity) - 1);
|
2398
|
+
break;
|
2399
|
+
|
2400
|
+
case AMP:
|
2401
|
+
output = parser->capture ? parser->capture : parser->output;
|
2402
|
+
wiki_pop_excess_elements(parser);
|
2403
|
+
wiki_start_para_if_necessary(parser);
|
2404
|
+
str_append(output, amp_entity, sizeof(amp_entity) - 1);
|
2405
|
+
break;
|
2406
|
+
|
2407
|
+
case LESS:
|
2408
|
+
output = parser->capture ? parser->capture : parser->output;
|
2409
|
+
wiki_pop_excess_elements(parser);
|
2410
|
+
wiki_start_para_if_necessary(parser);
|
2411
|
+
str_append(output, lt_entity, sizeof(lt_entity) - 1);
|
2412
|
+
break;
|
2413
|
+
|
2414
|
+
case GREATER:
|
2415
|
+
output = parser->capture ? parser->capture : parser->output;
|
2416
|
+
wiki_pop_excess_elements(parser);
|
2417
|
+
wiki_start_para_if_necessary(parser);
|
2418
|
+
str_append(output, gt_entity, sizeof(gt_entity) - 1);
|
2419
|
+
break;
|
2420
|
+
|
2421
|
+
case IMG_START:
|
2422
|
+
if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
|
2423
|
+
{
|
2424
|
+
wiki_emit_pending_crlf_if_necessary(parser);
|
2425
|
+
str_append(parser->output, token->start, TOKEN_LEN(token));
|
2426
|
+
}
|
2427
|
+
else if (parser->capture)
|
2428
|
+
str_append(parser->capture, token->start, TOKEN_LEN(token));
|
2429
|
+
else
|
2430
|
+
{
|
2431
|
+
// not currently capturing: will be emitting something on success or failure, so get ready
|
2432
|
+
wiki_pop_excess_elements(parser);
|
2433
|
+
wiki_start_para_if_necessary(parser);
|
2434
|
+
|
2435
|
+
// scan ahead consuming PATH, PRINTABLE, ALNUM and SPECIAL_URI_CHARS tokens
|
2436
|
+
// will cheat here and abuse the link_target capture buffer to accumulate text
|
2437
|
+
while (NEXT_TOKEN(), (type = token->type))
|
2438
|
+
{
|
2439
|
+
if (type == PATH || type == PRINTABLE || type == ALNUM || type == SPECIAL_URI_CHARS)
|
2440
|
+
str_append(parser->link_target, token->start, TOKEN_LEN(token));
|
2441
|
+
else if (type == IMG_END && parser->link_target->len > 0)
|
2442
|
+
{
|
2443
|
+
// success
|
2444
|
+
wiki_append_img(parser, parser->link_target->ptr, parser->link_target->len);
|
2445
|
+
token = NULL;
|
2446
|
+
break;
|
2447
|
+
}
|
2448
|
+
else // unexpected token or zero-length target (syntax error)
|
2449
|
+
{
|
2450
|
+
// rollback
|
2451
|
+
str_append(parser->output, literal_img_start, sizeof(literal_img_start) - 1);
|
2452
|
+
if (parser->link_target->len > 0)
|
2453
|
+
str_append(parser->output, parser->link_target->ptr, parser->link_target->len);
|
2454
|
+
break;
|
2455
|
+
}
|
2456
|
+
}
|
2457
|
+
|
2458
|
+
// jump to top of the loop to process token we scanned during lookahead
|
2459
|
+
str_clear(parser->link_target);
|
2460
|
+
continue;
|
2461
|
+
}
|
2462
|
+
break;
|
2463
|
+
|
2464
|
+
case CRLF:
|
2465
|
+
i = parser->pending_crlf;
|
2466
|
+
parser->pending_crlf = false;
|
2467
|
+
wiki_rollback_failed_link(parser); // if any
|
2468
|
+
if (IN_EITHER_OF(NO_WIKI_START, PRE_START))
|
2469
|
+
{
|
2470
|
+
ary_clear(parser->line_buffer);
|
2471
|
+
str_append_str(parser->output, parser->line_ending);
|
2472
|
+
break;
|
2473
|
+
}
|
2474
|
+
else if (IN(PRE))
|
2475
|
+
{
|
2476
|
+
// beware when BLOCKQUOTE on line buffer (not line stack!) prior to CRLF, that must be end of PRE block
|
2477
|
+
if (ary_entry(parser->line_buffer, -2) == BLOCKQUOTE)
|
2478
|
+
// don't emit in this case
|
2479
|
+
wiki_pop_from_stack_up_to(parser, parser->output, PRE, true);
|
2480
|
+
else
|
2481
|
+
{
|
2482
|
+
if (ary_entry(parser->line_buffer, -2) == PRE)
|
2483
|
+
{
|
2484
|
+
// only thing on line is the PRE: emit pending line ending (if we had one)
|
2485
|
+
if (i)
|
2486
|
+
str_append_str(parser->output, parser->line_ending);
|
2487
|
+
}
|
2488
|
+
|
2489
|
+
// clear these _before_ calling NEXT_TOKEN (NEXT_TOKEN adds to the line_buffer)
|
2490
|
+
ary_clear(parser->line);
|
2491
|
+
ary_clear(parser->line_buffer);
|
2492
|
+
|
2493
|
+
// peek ahead to see if this is definitely the end of the PRE block
|
2494
|
+
NEXT_TOKEN();
|
2495
|
+
type = token->type;
|
2496
|
+
if (type != BLOCKQUOTE && type != PRE)
|
2497
|
+
// this is definitely the end of the block, so don't emit
|
2498
|
+
wiki_pop_from_stack_up_to(parser, parser->output, PRE, true);
|
2499
|
+
else
|
2500
|
+
// potentially will emit
|
2501
|
+
parser->pending_crlf = true;
|
2502
|
+
|
2503
|
+
continue; // jump back to top of loop to handle token grabbed via lookahead
|
2504
|
+
}
|
2505
|
+
}
|
2506
|
+
else
|
2507
|
+
{
|
2508
|
+
parser->pending_crlf = true;
|
2509
|
+
|
2510
|
+
// count number of BLOCKQUOTE tokens in line buffer (can be zero) and pop back to that level
|
2511
|
+
// as a side effect, this handles any open span-level elements and unclosed blocks
|
2512
|
+
// (with special handling for P blocks and LI elements)
|
2513
|
+
i = ary_count(parser->line, BLOCKQUOTE) + ary_count(parser->scope, BLOCKQUOTE_START);
|
2514
|
+
for (j = parser->scope->count; j > i; j--)
|
2515
|
+
{
|
2516
|
+
if (parser->scope->count > 0 && ary_entry(parser->scope, -1) == LI)
|
2517
|
+
{
|
2518
|
+
parser->pending_crlf = false;
|
2519
|
+
break;
|
2520
|
+
}
|
2521
|
+
|
2522
|
+
// special handling on last iteration through the loop if the top item on the scope is a P block
|
2523
|
+
if ((j - i == 1) && ary_entry(parser->scope, -1) == P)
|
2524
|
+
{
|
2525
|
+
// if nothing or BLOCKQUOTE on line buffer (not line stack!) prior to CRLF, this must be a paragraph break
|
2526
|
+
// (note that we have to make sure we're not inside a BLOCKQUOTE_START block
|
2527
|
+
// because in those blocks BLOCKQUOTE tokens have no special meaning)
|
2528
|
+
if (NO_ITEM(ary_entry(parser->line_buffer, -2)) ||
|
2529
|
+
(ary_entry(parser->line_buffer, -2) == BLOCKQUOTE && !IN(BLOCKQUOTE_START)))
|
2530
|
+
// paragraph break
|
2531
|
+
parser->pending_crlf = false;
|
2532
|
+
else
|
2533
|
+
// not a paragraph break!
|
2534
|
+
continue;
|
2535
|
+
}
|
2536
|
+
wiki_pop_from_stack(parser, NULL);
|
2537
|
+
}
|
2538
|
+
}
|
2539
|
+
|
2540
|
+
// delete the entire contents of the line scope stack and buffer
|
2541
|
+
ary_clear(parser->line);
|
2542
|
+
ary_clear(parser->line_buffer);
|
2543
|
+
break;
|
2544
|
+
|
2545
|
+
case SPECIAL_URI_CHARS:
|
2546
|
+
case PRINTABLE:
|
2547
|
+
case ALNUM:
|
2548
|
+
case IMG_END:
|
2549
|
+
case LEFT_CURLY:
|
2550
|
+
case RIGHT_CURLY:
|
2551
|
+
output = parser->capture ? parser->capture : parser->output;
|
2552
|
+
wiki_pop_excess_elements(parser);
|
2553
|
+
wiki_start_para_if_necessary(parser);
|
2554
|
+
str_append(output, token->start, TOKEN_LEN(token));
|
2555
|
+
break;
|
2556
|
+
|
2557
|
+
case DEFAULT:
|
2558
|
+
output = parser->capture ? parser->capture : parser->output;
|
2559
|
+
wiki_pop_excess_elements(parser);
|
2560
|
+
wiki_start_para_if_necessary(parser);
|
2561
|
+
wiki_append_entity_from_utf32_char(output, token->code_point);
|
2562
|
+
break;
|
2563
|
+
|
2564
|
+
case END_OF_FILE:
|
2565
|
+
// special case for input like " foo\n " (see pre_spec.rb)
|
2566
|
+
if (IN(PRE) &&
|
2567
|
+
ary_entry(parser->line_buffer, -2) == PRE &&
|
2568
|
+
parser->pending_crlf)
|
2569
|
+
str_append(parser->output, parser->line_ending->ptr, parser->line_ending->len);
|
2570
|
+
|
2571
|
+
// close any open scopes on hitting EOF
|
2572
|
+
wiki_rollback_failed_link(parser); // if any
|
2573
|
+
wiki_pop_all_from_stack(parser);
|
2574
|
+
goto return_output; // break not enough here (want to break out of outer while loop, not inner switch statement)
|
2575
|
+
|
2576
|
+
default:
|
2577
|
+
break;
|
2578
|
+
}
|
2579
|
+
|
2580
|
+
// reset current token; forcing lexer to return another token at the top of the loop
|
2581
|
+
token = NULL;
|
2582
|
+
} while (1);
|
2583
|
+
return_output:
|
2584
|
+
// nasty hack to avoid re-allocating our return value
|
2585
|
+
str_append(parser->output, null_str, 1); // null-terminate
|
2586
|
+
len = parser->output->len - 1; // don't count null termination
|
2587
|
+
|
2588
|
+
VALUE out = rb_str_buf_new(RSTRING_EMBED_LEN_MAX + 1);
|
2589
|
+
free(RSTRING_PTR(out));
|
2590
|
+
RSTRING(out)->as.heap.aux.capa = len;
|
2591
|
+
RSTRING(out)->as.heap.ptr = parser->output->ptr;
|
2592
|
+
RSTRING(out)->as.heap.len = len;
|
2593
|
+
parser->output->ptr = NULL; // don't double-free
|
2594
|
+
return out;
|
2595
|
+
}
|