wikitext 4.0 → 4.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2fd1ae53b44a2354ca48f9d35a39469391285573
4
- data.tar.gz: 46c2fbe593fce24f6d60c9911a47687b9c0475b1
3
+ metadata.gz: 1e73f2d1a7097dd2a9a1e0264f395ad0e331e291
4
+ data.tar.gz: 2b4a73fe78d5ac9906ec32cece324f62a6142ed4
5
5
  SHA512:
6
- metadata.gz: 11d2fc71a0c9a92ec23612c6edc2187ec38f9886c1fc0bf1a1e6ef8a8df7019fee9825ab06866cb140a3f66233e2d58f2288cdba5a61618cd0bae2d54de5d7a0
7
- data.tar.gz: ae8c79c38d2bc463e1c1e42114e139816295a371896f7e5e3c7df7299541c83c87910390acb1fec52225ac6c19a729cd0348d6754022ee1d05946e5e6a0ed431
6
+ metadata.gz: cc07aaadf9d10dcdf5c0f5cc1abf0e1336717881cd7385b9c2394ac1e4bf7c6bd6e0f1837b4a821d0c9ec13f5a2f17ff764c8416d34f5806ade940452ef3dca9
7
+ data.tar.gz: 216d912ae01cfefc7abb1f8d700003d58d61c2757e2044ffc9d28f65b584f59d26ae21e0afffc20891d64733b19613fa1491a621b73db6f252c585ac90f9fd24
data/bin/wikitext CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
- # Copyright 2008-2010 Wincent Colaiuta. All rights reserved.
2
+ # Copyright 2008-2013 Wincent Colaiuta. All rights reserved.
3
3
  #
4
4
  # Redistribution and use in source and binary forms, with or without
5
5
  # modification, are permitted provided that the following conditions are met:
@@ -27,6 +27,17 @@ require 'wikitext/version'
27
27
 
28
28
  module Wikitext
29
29
  module Tool
30
+ # Simple substitute for the HighLine library if it is not available.
31
+ class FakeHighLine
32
+ def color(str, _)
33
+ str
34
+ end
35
+
36
+ def output_cols
37
+ 80
38
+ end
39
+ end
40
+
30
41
  INPUT_FILES = []
31
42
 
32
43
  def self.interactive?
@@ -59,11 +70,14 @@ module Wikitext
59
70
  begin
60
71
  require 'highline'
61
72
  rescue LoadError
62
- require 'rubygems'
63
- require 'highline'
73
+ begin
74
+ require 'rubygems'
75
+ require 'highline'
76
+ rescue LoadError
77
+ end
64
78
  end
65
79
  puts "wikitext #{Wikitext::VERSION}"
66
- highline = HighLine.new
80
+ highline = (defined?(HighLine) ? HighLine : FakeHighLine).new
67
81
  end
68
82
 
69
83
  parser = Parser.new
File without changes
@@ -22,5 +22,5 @@
22
22
  # POSSIBILITY OF SUCH DAMAGE.
23
23
 
24
24
  module Wikitext
25
- VERSION = '4.0'
25
+ VERSION = '4.0.1'
26
26
  end # module Wikitext
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wikitext
3
3
  version: !ruby/object:Gem::Version
4
- version: '4.0'
4
+ version: 4.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Wincent Colaiuta
@@ -100,25 +100,10 @@ email: win@wincent.com
100
100
  executables:
101
101
  - wikitext
102
102
  extensions:
103
- - ext/extconf.rb
103
+ - ext/wikitext/extconf.rb
104
104
  extra_rdoc_files: []
105
105
  files:
106
106
  - bin/wikitext
107
- - ext/extconf.rb
108
- - ext/ary.c
109
- - ext/parser.c
110
- - ext/str.c
111
- - ext/token.c
112
- - ext/wikitext.c
113
- - ext/wikitext_ragel.c
114
- - ext/ary.h
115
- - ext/parser.h
116
- - ext/ruby_compat.h
117
- - ext/str.h
118
- - ext/token.h
119
- - ext/wikitext.h
120
- - ext/wikitext_ragel.h
121
- - ext/depend
122
107
  - lib/wikitext/nil_class.rb
123
108
  - lib/wikitext/parser.rb
124
109
  - lib/wikitext/preprocess.rb
@@ -163,8 +148,10 @@ files:
163
148
  - spec/version_spec.rb
164
149
  - spec/vim_formatter.rb
165
150
  - spec/wikitext_spec.rb
151
+ - ext/wikitext/extconf.rb
166
152
  homepage: https://wincent.com/products/wikitext
167
- licenses: []
153
+ licenses:
154
+ - BSD
168
155
  metadata: {}
169
156
  post_install_message:
170
157
  rdoc_options: []
data/ext/ary.c DELETED
@@ -1,116 +0,0 @@
1
- // Copyright 2008-2009 Wincent Colaiuta. All rights reserved.
2
- //
3
- // Redistribution and use in source and binary forms, with or without
4
- // modification, are permitted provided that the following conditions are met:
5
- //
6
- // 1. Redistributions of source code must retain the above copyright notice,
7
- // this list of conditions and the following disclaimer.
8
- // 2. Redistributions in binary form must reproduce the above copyright notice,
9
- // this list of conditions and the following disclaimer in the documentation
10
- // and/or other materials provided with the distribution.
11
- //
12
- // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
13
- // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
14
- // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
15
- // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
16
- // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
17
- // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
18
- // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
19
- // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
20
- // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
21
- // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
22
- // POSSIBILITY OF SUCH DAMAGE.
23
-
24
- #include "ary.h"
25
-
26
- ary_t *ary_new(void)
27
- {
28
- ary_t *ary = ALLOC_N(ary_t, 1);
29
- ary->count = 0;
30
- ary->max = DEFAULT_ENTRY_COUNT;
31
- ary->entries = ALLOC_N(int, DEFAULT_ENTRY_COUNT);
32
- return ary;
33
- }
34
-
35
- int ary_entry(ary_t *ary, int idx)
36
- {
37
- if (idx < 0)
38
- idx = ary->count + idx;
39
- return (idx >= 0 && ary->count > idx) ? ary->entries[idx] : INT_MAX;
40
- }
41
-
42
- void ary_clear(ary_t *ary)
43
- {
44
- ary->count = 0;
45
- }
46
-
47
- int ary_pop(ary_t *ary)
48
- {
49
- if (ary->count > 0)
50
- {
51
- ary->count--;
52
- return 1;
53
- }
54
- return 0;
55
- }
56
-
57
- void ary_push(ary_t *ary, int val)
58
- {
59
- if (ary->count == ary->max)
60
- {
61
- ary->max += DEFAULT_ENTRY_COUNT;
62
- REALLOC_N(ary->entries, int, ary->max);
63
- }
64
- ary->entries[ary->count] = val;
65
- ary->count++;
66
- }
67
-
68
- int ary_includes(ary_t *ary, int val)
69
- {
70
- for (int i = 0, max = ary->count; i < max; i++)
71
- {
72
- if (ary->entries[i] == val)
73
- return 1;
74
- }
75
- return 0;
76
- }
77
-
78
- int ary_includes2(ary_t *ary, int val1, int val2)
79
- {
80
- for (int i = 0, max = ary->count; i < max; i++)
81
- {
82
- if (ary->entries[i] == val1 ||
83
- ary->entries[i] == val2)
84
- return 1;
85
- }
86
- return 0;
87
- }
88
-
89
- int ary_includes3(ary_t *ary, int val1, int val2, int val3)
90
- {
91
- for (int i = 0, max = ary->count; i < max; i++)
92
- {
93
- if (ary->entries[i] == val1 ||
94
- ary->entries[i] == val2 ||
95
- ary->entries[i] == val3)
96
- return 1;
97
- }
98
- return 0;
99
- }
100
-
101
- int ary_count(ary_t *ary, int item)
102
- {
103
- int count = 0;
104
- for (int i = 0, max = ary->count; i < max; i++)
105
- {
106
- if (ary->entries[i] == item)
107
- count++;
108
- }
109
- return count;
110
- }
111
-
112
- void ary_free(ary_t *ary)
113
- {
114
- free(ary->entries);
115
- free(ary);
116
- }
data/ext/ary.h DELETED
@@ -1,50 +0,0 @@
1
- // Copyright 2008-2009 Wincent Colaiuta. All rights reserved.
2
- //
3
- // Redistribution and use in source and binary forms, with or without
4
- // modification, are permitted provided that the following conditions are met:
5
- //
6
- // 1. Redistributions of source code must retain the above copyright notice,
7
- // this list of conditions and the following disclaimer.
8
- // 2. Redistributions in binary form must reproduce the above copyright notice,
9
- // this list of conditions and the following disclaimer in the documentation
10
- // and/or other materials provided with the distribution.
11
- //
12
- // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
13
- // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
14
- // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
15
- // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
16
- // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
17
- // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
18
- // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
19
- // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
20
- // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
21
- // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
22
- // POSSIBILITY OF SUCH DAMAGE.
23
-
24
- #include "ruby_compat.h"
25
-
26
- typedef struct
27
- {
28
- int count;
29
- int max;
30
- int *entries;
31
- } ary_t;
32
-
33
- // in the test suite array count goes no higher than 25 or 26
34
- #define DEFAULT_ENTRY_COUNT 64
35
-
36
- #define NO_ITEM(item) (item == INT_MAX)
37
-
38
- ary_t *ary_new(void);
39
- int ary_entry(ary_t *ary, int idx);
40
- void ary_clear(ary_t *ary);
41
- int ary_pop(ary_t *ary);
42
- void ary_push(ary_t *ary, int val);
43
- int ary_includes(ary_t *ary, int val);
44
- int ary_includes2(ary_t *ary, int val1, int val2);
45
- int ary_includes3(ary_t *ary, int val1, int val2, int val3);
46
-
47
- // returns a count indicating the number of times the value appears in the collection
48
- int ary_count(ary_t *ary, int item);
49
-
50
- void ary_free(ary_t *ary);
data/ext/depend DELETED
@@ -1,32 +0,0 @@
1
- # Copyright 2008-2010 Wincent Colaiuta. All rights reserved.
2
- #
3
- # Redistribution and use in source and binary forms, with or without
4
- # modification, are permitted provided that the following conditions are met:
5
- #
6
- # 1. Redistributions of source code must retain the above copyright notice,
7
- # this list of conditions and the following disclaimer.
8
- # 2. Redistributions in binary form must reproduce the above copyright notice,
9
- # this list of conditions and the following disclaimer in the documentation
10
- # and/or other materials provided with the distribution.
11
- #
12
- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
13
- # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
14
- # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
15
- # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
16
- # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
17
- # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
18
- # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
19
- # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
20
- # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
21
- # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
22
- # POSSIBILITY OF SUCH DAMAGE.
23
-
24
- # don't warn about unused params because many Ruby methods accept "self" but don't use it
25
- CFLAGS += -std=gnu99 -Wall -Wextra -Wno-unused-parameter
26
-
27
- ary.o : ary.c ary.h ruby_compat.h
28
- parser.o : ary.c ary.h parser.c parser.h ruby_compat.h str.c str.h token.h wikitext.h wikitext_ragel.h
29
- str.o : ruby_compat.h str.c str.h
30
- token.o : ruby_compat.h token.c token.h wikitext.h
31
- wikitext.o : parser.h ruby_compat.h token.h wikitext.c wikitext.h wikitext_ragel.h
32
- wikitext_ragel.o : ruby_compat.h token.h wikitext.h wikitext_ragel.h wikitext_ragel.c
data/ext/parser.c DELETED
@@ -1,2595 +0,0 @@
1
- // Copyright 2007-2013 Wincent Colaiuta. All rights reserved.
2
- //
3
- // Redistribution and use in source and binary forms, with or without
4
- // modification, are permitted provided that the following conditions are met:
5
- //
6
- // 1. Redistributions of source code must retain the above copyright notice,
7
- // this list of conditions and the following disclaimer.
8
- // 2. Redistributions in binary form must reproduce the above copyright notice,
9
- // this list of conditions and the following disclaimer in the documentation
10
- // and/or other materials provided with the distribution.
11
- //
12
- // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
13
- // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
14
- // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
15
- // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
16
- // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
17
- // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
18
- // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
19
- // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
20
- // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
21
- // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
22
- // POSSIBILITY OF SUCH DAMAGE.
23
-
24
- #include <stdbool.h>
25
-
26
- #include "parser.h"
27
- #include "ary.h"
28
- #include "str.h"
29
- #include "wikitext.h"
30
- #include "wikitext_ragel.h"
31
-
32
- #define IN(type) ary_includes(parser->scope, type)
33
- #define IN_EITHER_OF(type1, type2) ary_includes2(parser->scope, type1, type2)
34
- #define IN_ANY_OF(type1, type2, type3) ary_includes3(parser->scope, type1, type2, type3)
35
-
36
- // output styles
37
- enum { HTML_OUTPUT, XML_OUTPUT };
38
-
39
- // poor man's object orientation in C:
40
- // instead of passing around multiple parameters between functions in the parser
41
- // we pack everything into a struct and pass around only a pointer to that
42
- typedef struct
43
- {
44
- str_t *capture; // capturing to link_target, link_text, or NULL (direct to output, not capturing)
45
- str_t *output; // for accumulating output to be returned
46
- str_t *link_target; // short term "memory" for parsing links
47
- str_t *link_text; // short term "memory" for parsing links
48
- str_t *line_ending;
49
- str_t *tabulation; // caching buffer for emitting indentation
50
- ary_t *scope; // stack for tracking scope
51
- ary_t *line; // stack for tracking scope as implied by current line
52
- ary_t *line_buffer; // stack for tracking raw tokens (not scope) on current line
53
- VALUE external_link_class; // CSS class applied to external links
54
- VALUE external_link_rel; // rel attribute applied to external links
55
- VALUE mailto_class; // CSS class applied to email (mailto) links
56
- VALUE img_prefix; // path prepended when emitting img tags
57
- int output_style; // HTML_OUTPUT (default) or XML_OUTPUT
58
- int base_indent; // controlled by the :indent option to Wikitext::Parser#parse
59
- int current_indent; // fluctuates according to currently nested structures
60
- int base_heading_level;
61
- bool pending_crlf;
62
- bool autolink;
63
- bool space_to_underscore;
64
- } parser_t;
65
-
66
- const char null_str[] = { 0 };
67
- const char escaped_no_wiki_start[] = "&lt;nowiki&gt;";
68
- const char escaped_no_wiki_end[] = "&lt;/nowiki&gt;";
69
- const char literal_strong_em[] = "'''''";
70
- const char literal_strong[] = "'''";
71
- const char literal_em[] = "''";
72
- const char escaped_em_start[] = "&lt;em&gt;";
73
- const char escaped_em_end[] = "&lt;/em&gt;";
74
- const char escaped_strong_start[] = "&lt;strong&gt;";
75
- const char escaped_strong_end[] = "&lt;/strong&gt;";
76
- const char escaped_tt_start[] = "&lt;tt&gt;";
77
- const char escaped_tt_end[] = "&lt;/tt&gt;";
78
- const char pre_start[] = "<pre>";
79
- const char pre_end[] = "</pre>";
80
- const char escaped_pre_start[] = "&lt;pre&gt;";
81
- const char escaped_pre_end[] = "&lt;/pre&gt;";
82
- const char blockquote_start[] = "<blockquote>";
83
- const char blockquote_end[] = "</blockquote>";
84
- const char escaped_blockquote_start[] = "&lt;blockquote&gt;";
85
- const char escaped_blockquote_end[] = "&lt;/blockquote&gt;";
86
- const char strong_em_start[] = "<strong><em>";
87
- const char strong_start[] = "<strong>";
88
- const char strong_end[] = "</strong>";
89
- const char em_start[] = "<em>";
90
- const char em_end[] = "</em>";
91
- const char code_start[] = "<code>";
92
- const char code_end[] = "</code>";
93
- const char ol_start[] = "<ol>";
94
- const char ol_end[] = "</ol>";
95
- const char ul_start[] = "<ul>";
96
- const char ul_end[] = "</ul>";
97
- const char li_start[] = "<li>";
98
- const char li_end[] = "</li>";
99
- const char h6_start[] = "<h6>";
100
- const char h6_end[] = "</h6>";
101
- const char h5_start[] = "<h5>";
102
- const char h5_end[] = "</h5>";
103
- const char h4_start[] = "<h4>";
104
- const char h4_end[] = "</h4>";
105
- const char h3_start[] = "<h3>";
106
- const char h3_end[] = "</h3>";
107
- const char h2_start[] = "<h2>";
108
- const char h2_end[] = "</h2>";
109
- const char h1_start[] = "<h1>";
110
- const char h1_end[] = "</h1>";
111
- const char p_start[] = "<p>";
112
- const char p_end[] = "</p>";
113
- const char space[] = " ";
114
- const char a_start[] = "<a href=\"";
115
- const char a_class[] = "\" class=\"";
116
- const char a_rel[] = "\" rel=\"";
117
- const char a_start_close[] = "\">";
118
- const char a_end[] = "</a>";
119
- const char link_start[] = "[[";
120
- const char link_end[] = "]]";
121
- const char separator[] = "|";
122
- const char ext_link_start[] = "[";
123
- const char backtick[] = "`";
124
- const char quote[] = "\"";
125
- const char ampersand[] = "&";
126
- const char quot_entity[] = "&quot;";
127
- const char amp_entity[] = "&amp;";
128
- const char lt_entity[] = "&lt;";
129
- const char gt_entity[] = "&gt;";
130
- const char escaped_blockquote[] = "&gt; ";
131
- const char ext_link_end[] = "]";
132
- const char literal_img_start[] = "{{";
133
- const char img_start[] = "<img src=\"";
134
- const char img_end_xml[] = "\" />";
135
- const char img_end_html[] = "\">";
136
- const char img_alt[] = "\" alt=\"";
137
- const char pre_class_start[] = "<pre class=\"";
138
- const char pre_class_end[] = "-syntax\">";
139
-
140
- // Mark the parser struct designated by ptr as a participant in Ruby's
141
- // mark-and-sweep garbage collection scheme. A variable named name is placed on
142
- // the C stack to prevent the structure from being prematurely collected.
143
- #define GC_WRAP_PARSER(ptr, name) volatile VALUE name __attribute__((unused)) = Data_Wrap_Struct(rb_cObject, 0, parser_free, ptr)
144
-
145
- parser_t *parser_new(void)
146
- {
147
- parser_t *parser = ALLOC_N(parser_t, 1);
148
- parser->capture = NULL; // not a real instance, pointer to other member's instance
149
- parser->output = str_new();
150
- parser->link_target = str_new();
151
- parser->link_text = str_new();
152
- parser->line_ending = NULL; // caller should set up
153
- parser->tabulation = str_new();
154
- parser->scope = ary_new();
155
- parser->line = ary_new();
156
- parser->line_buffer = ary_new();
157
- parser->external_link_class = Qnil; // caller should set up
158
- parser->external_link_rel = Qnil; // caller should set up
159
- parser->mailto_class = Qnil; // caller should set up
160
- parser->img_prefix = Qnil; // caller should set up
161
- parser->output_style = HTML_OUTPUT;
162
- parser->base_indent = 0;
163
- parser->current_indent = 0;
164
- parser->base_heading_level = 0;
165
- parser->pending_crlf = false;
166
- parser->autolink = true;
167
- parser->space_to_underscore = true;
168
- return parser;
169
- }
170
-
171
- void parser_free(parser_t *parser)
172
- {
173
- // we don't free parser->capture; it's just a redundant pointer
174
- if (parser->output) str_free(parser->output);
175
- if (parser->link_target) str_free(parser->link_target);
176
- if (parser->link_text) str_free(parser->link_text);
177
- if (parser->line_ending) str_free(parser->line_ending);
178
- if (parser->tabulation) str_free(parser->tabulation);
179
- if (parser->scope) ary_free(parser->scope);
180
- if (parser->line) ary_free(parser->line);
181
- if (parser->line_buffer) ary_free(parser->line_buffer);
182
- free(parser);
183
- }
184
-
185
- // for testing and debugging only
186
- VALUE Wikitext_parser_tokenize(VALUE self, VALUE string)
187
- {
188
- if (NIL_P(string))
189
- return Qnil;
190
- string = StringValue(string);
191
- VALUE tokens = rb_ary_new();
192
- char *p = RSTRING_PTR(string);
193
- long len = RSTRING_LEN(string);
194
- char *pe = p + len;
195
- token_t token;
196
- next_token(&token, NULL, p, pe);
197
- rb_ary_push(tokens, wiki_token(&token));
198
- while (token.type != END_OF_FILE)
199
- {
200
- next_token(&token, &token, NULL, pe);
201
- rb_ary_push(tokens, wiki_token(&token));
202
- }
203
- return tokens;
204
- }
205
-
206
- // for benchmarking raw tokenization speed only
207
- VALUE Wikitext_parser_benchmarking_tokenize(VALUE self, VALUE string)
208
- {
209
- if (NIL_P(string))
210
- return Qnil;
211
- string = StringValue(string);
212
- char *p = RSTRING_PTR(string);
213
- long len = RSTRING_LEN(string);
214
- char *pe = p + len;
215
- token_t token;
216
- next_token(&token, NULL, p, pe);
217
- while (token.type != END_OF_FILE)
218
- next_token(&token, &token, NULL, pe);
219
- return Qnil;
220
- }
221
-
222
- VALUE Wikitext_parser_fulltext_tokenize(int argc, VALUE *argv, VALUE self)
223
- {
224
- // process arguments
225
- VALUE string, options;
226
- if (rb_scan_args(argc, argv, "11", &string, &options) == 1) // 1 mandatory argument, 1 optional argument
227
- options = Qnil;
228
- if (NIL_P(string))
229
- return Qnil;
230
- string = StringValue(string);
231
- VALUE tokens = rb_ary_new();
232
-
233
- // check instance variables
234
- VALUE min = rb_iv_get(self, "@minimum_fulltext_token_length");
235
-
236
- // process options hash (can override instance variables)
237
- if (!NIL_P(options) && TYPE(options) == T_HASH)
238
- {
239
- if (rb_funcall(options, rb_intern("has_key?"), 1, ID2SYM(rb_intern("minimum"))) == Qtrue)
240
- min = rb_hash_aref(options, ID2SYM(rb_intern("minimum")));
241
- }
242
- int min_len = NIL_P(min) ? 3 : NUM2INT(min);
243
- if (min_len < 0)
244
- min_len = 0;
245
-
246
- // set up scanner
247
- char *p = RSTRING_PTR(string);
248
- long len = RSTRING_LEN(string);
249
- char *pe = p + len;
250
- token_t token;
251
- token_t *_token = &token;
252
- next_token(&token, NULL, p, pe);
253
- while (token.type != END_OF_FILE)
254
- {
255
- switch (token.type)
256
- {
257
- case URI:
258
- case MAIL:
259
- case ALNUM:
260
- if (TOKEN_LEN(_token) >= min_len)
261
- rb_ary_push(tokens, TOKEN_TEXT(_token));
262
- break;
263
- default:
264
- // ignore everything else
265
- break;
266
- }
267
- next_token(&token, &token, NULL, pe);
268
- }
269
- return tokens;
270
- }
271
-
272
- // we downcase "in place", overwriting the original contents of the buffer
273
- void wiki_downcase_bang(char *ptr, long len)
274
- {
275
- for (long i = 0; i < len; i++)
276
- {
277
- if (ptr[i] >= 'A' && ptr[i] <= 'Z')
278
- ptr[i] += 32;
279
- }
280
- }
281
-
282
- void wiki_append_entity_from_utf32_char(str_t *output, uint32_t character)
283
- {
284
- char hex_string[8] = { '&', '#', 'x', 0, 0, 0, 0, ';' };
285
- char scratch = (character & 0xf000) >> 12;
286
- hex_string[3] = (scratch <= 9 ? scratch + 48 : scratch + 87);
287
- scratch = (character & 0x0f00) >> 8;
288
- hex_string[4] = (scratch <= 9 ? scratch + 48 : scratch + 87);
289
- scratch = (character & 0x00f0) >> 4;
290
- hex_string[5] = (scratch <= 9 ? scratch + 48 : scratch + 87);
291
- scratch = character & 0x000f;
292
- hex_string[6] = (scratch <= 9 ? scratch + 48 : scratch + 87);
293
- str_append(output, hex_string, sizeof(hex_string));
294
- }
295
-
296
- // Convert a single UTF-8 codepoint to UTF-32
297
- //
298
- // Expects an input buffer, src, containing a UTF-8 encoded character (which
299
- // may be multi-byte). The end of the input buffer, end, is also passed in to
300
- // allow the detection of invalidly truncated codepoints. The number of bytes
301
- // in the UTF-8 character (between 1 and 4) is returned by reference in
302
- // width_out.
303
- //
304
- // Raises a RangeError if the supplied character is invalid UTF-8.
305
- uint32_t wiki_utf8_to_utf32(char *src, char *end, long *width_out)
306
- {
307
- uint32_t dest = 0;
308
- if ((unsigned char)src[0] <= 0x7f)
309
- {
310
- // ASCII
311
- dest = src[0];
312
- *width_out = 1;
313
- }
314
- else if ((src[0] & 0xe0) == 0xc0)
315
- {
316
- // byte starts with 110..... : this should be a two-byte sequence
317
- if (src + 1 >= end)
318
- // no second byte
319
- rb_raise(eWikitextParserError, "invalid encoding: truncated byte sequence");
320
- else if (((unsigned char)src[0] == 0xc0) ||
321
- ((unsigned char)src[0] == 0xc1))
322
- // overlong encoding: lead byte of 110..... but code point <= 127
323
- rb_raise(eWikitextParserError, "invalid encoding: overlong encoding");
324
- else if ((src[1] & 0xc0) != 0x80 )
325
- // should have second byte starting with 10......
326
- rb_raise(eWikitextParserError, "invalid encoding: malformed byte sequence");
327
-
328
- dest =
329
- ((uint32_t)(src[0] & 0x1f)) << 6 |
330
- (src[1] & 0x3f);
331
- *width_out = 2;
332
- }
333
- else if ((src[0] & 0xf0) == 0xe0)
334
- {
335
- // byte starts with 1110.... : this should be a three-byte sequence
336
- if (src + 2 >= end)
337
- // missing second or third byte
338
- rb_raise(eWikitextParserError, "invalid encoding: truncated byte sequence");
339
- else if (((src[1] & 0xc0) != 0x80 ) ||
340
- ((src[2] & 0xc0) != 0x80 ))
341
- // should have second and third bytes starting with 10......
342
- rb_raise(eWikitextParserError, "invalid encoding: malformed byte sequence");
343
-
344
- dest =
345
- ((uint32_t)(src[0] & 0x0f)) << 12 |
346
- ((uint32_t)(src[1] & 0x3f)) << 6 |
347
- (src[2] & 0x3f);
348
- *width_out = 3;
349
- }
350
- else if ((src[0] & 0xf8) == 0xf0)
351
- {
352
- // bytes starts with 11110... : this should be a four-byte sequence
353
- if (src + 3 >= end)
354
- // missing second, third, or fourth byte
355
- rb_raise(eWikitextParserError, "invalid encoding: truncated byte sequence");
356
- else if ((unsigned char)src[0] >= 0xf5 &&
357
- (unsigned char)src[0] <= 0xf7)
358
- // disallowed by RFC 3629 (codepoints above 0x10ffff)
359
- rb_raise(eWikitextParserError, "invalid encoding: overlong encoding");
360
- else if (((src[1] & 0xc0) != 0x80 ) ||
361
- ((src[2] & 0xc0) != 0x80 ) ||
362
- ((src[3] & 0xc0) != 0x80 ))
363
- // should have second and third bytes starting with 10......
364
- rb_raise(eWikitextParserError, "invalid encoding: malformed byte sequence");
365
-
366
- dest =
367
- ((uint32_t)(src[0] & 0x07)) << 18 |
368
- ((uint32_t)(src[1] & 0x3f)) << 12 |
369
- ((uint32_t)(src[1] & 0x3f)) << 6 |
370
- (src[2] & 0x3f);
371
- *width_out = 4;
372
- }
373
- else
374
- rb_raise(eWikitextParserError, "invalid encoding: unexpected byte");
375
- return dest;
376
- }
377
-
378
- // - non-printable (non-ASCII) characters converted to numeric entities
379
- // - QUOT and AMP characters converted to named entities
380
- // - if trim is true, leading and trailing whitespace trimmed
381
- // - if trim is false, there is no special treatment of spaces
382
- void wiki_append_sanitized_link_target(str_t *link_target, str_t *output, bool trim)
383
- {
384
- char *src = link_target->ptr;
385
- char *start = src; // remember this so we can check if we're at the start
386
- char *non_space = output->ptr + output->len; // remember last non-space character output
387
- char *end = src + link_target->len;
388
- while (src < end)
389
- {
390
- // need at most 8 bytes to display each input character (&#x0000;)
391
- if (output->ptr + output->len + 8 > output->ptr + output->capacity) // outgrowing buffer, must grow
392
- {
393
- char *old_ptr = output->ptr;
394
- str_grow(output, output->len + (end - src) * 8); // allocate enough for worst case
395
- if (old_ptr != output->ptr) // may have moved
396
- non_space += output->ptr - old_ptr;
397
- }
398
-
399
- if (*src == '"')
400
- {
401
- char quot_entity_literal[] = { '&', 'q', 'u', 'o', 't', ';' }; // no trailing NUL
402
- str_append(output, quot_entity_literal, sizeof(quot_entity_literal));
403
- }
404
- else if (*src == '&')
405
- {
406
- char amp_entity_literal[] = { '&', 'a', 'm', 'p', ';' }; // no trailing NUL
407
- str_append(output, amp_entity_literal, sizeof(amp_entity_literal));
408
- }
409
- else if (*src == '<' || *src == '>')
410
- rb_raise(rb_eRangeError, "invalid link text (\"%c\" may not appear in link text)", *src);
411
- else if (*src == ' ' && src == start && trim)
412
- start++; // we eat leading space
413
- else if (*src >= 0x20 && *src <= 0x7e) // printable ASCII
414
- {
415
- *(output->ptr + output->len) = *src;
416
- output->len++;
417
- }
418
- else // all others: must convert to entities
419
- {
420
- long width;
421
- wiki_append_entity_from_utf32_char(output, wiki_utf8_to_utf32(src, end, &width));
422
- src += width;
423
- non_space = output->ptr + output->len;
424
- continue;
425
- }
426
- if (*src != ' ')
427
- non_space = output->ptr + output->len;
428
- src++;
429
- }
430
-
431
- // trim trailing space if necessary
432
- if (trim && output->ptr + output->len != non_space)
433
- output->len -= (output->ptr + output->len) - non_space;
434
- }
435
-
436
- // prepare hyperlink and append it to parser->output
437
- // if check_autolink is true, checks parser->autolink to decide whether to emit a real hyperlink
438
- // or merely the literal link target
439
- // if link_text is Qnil, the link_target is re-used for the link text
440
- void wiki_append_hyperlink(parser_t *parser, VALUE link_prefix, str_t *link_target, str_t *link_text, VALUE link_class, VALUE link_rel, bool check_autolink)
441
- {
442
- if (check_autolink && !parser->autolink)
443
- wiki_append_sanitized_link_target(link_target, parser->output, true);
444
- else
445
- {
446
- str_append(parser->output, a_start, sizeof(a_start) - 1); // <a href="
447
- if (!NIL_P(link_prefix))
448
- str_append_string(parser->output, link_prefix);
449
- wiki_append_sanitized_link_target(link_target, parser->output, true);
450
-
451
- // special handling for mailto URIs
452
- const char *mailto = "mailto:";
453
- long mailto_len = (long)sizeof(mailto) - 1; // don't count NUL byte
454
- if ((link_target->len >= mailto_len &&
455
- strncmp(mailto, link_target->ptr, mailto_len) == 0) ||
456
- (!NIL_P(link_prefix) &&
457
- RSTRING_LEN(link_prefix) >= mailto_len &&
458
- strncmp(mailto, RSTRING_PTR(link_prefix), mailto_len) == 0))
459
- link_class = parser->mailto_class; // use mailto_class from parser
460
- if (link_class != Qnil)
461
- {
462
- str_append(parser->output, a_class, sizeof(a_class) - 1); // " class="
463
- str_append_string(parser->output, link_class);
464
- }
465
- if (link_rel != Qnil)
466
- {
467
- str_append(parser->output, a_rel, sizeof(a_rel) - 1); // " rel="
468
- str_append_string(parser->output, link_rel);
469
- }
470
- str_append(parser->output, a_start_close, sizeof(a_start_close) - 1); // ">
471
- if (!link_text || link_text->len == 0) // re-use link_target
472
- wiki_append_sanitized_link_target(link_target, parser->output, true);
473
- else
474
- str_append_str(parser->output, link_text);
475
- str_append(parser->output, a_end, sizeof(a_end) - 1); // </a>
476
- }
477
- }
478
-
479
- void wiki_append_img(parser_t *parser, char *token_ptr, long token_len)
480
- {
481
- str_append(parser->output, img_start, sizeof(img_start) - 1); // <img src="
482
- if (!NIL_P(parser->img_prefix) && *token_ptr != '/') // len always > 0
483
- str_append_string(parser->output, parser->img_prefix);
484
- str_append(parser->output, token_ptr, token_len);
485
- str_append(parser->output, img_alt, sizeof(img_alt) - 1); // " alt="
486
- str_append(parser->output, token_ptr, token_len);
487
- if (parser->output_style == XML_OUTPUT)
488
- str_append(parser->output, img_end_xml, sizeof(img_end_xml) - 1); // " />
489
- else
490
- str_append(parser->output, img_end_html, sizeof(img_end_html) - 1); // ">
491
- }
492
-
493
- // will emit indentation only if we are about to emit any of:
494
- // <blockquote>, <p>, <ul>, <ol>, <li>, <h1> etc, <pre>
495
- // each time we enter one of those spans must ++ the indentation level
496
- void wiki_indent(parser_t *parser)
497
- {
498
- if (parser->base_indent == -1) // indentation disabled
499
- return;
500
- int space_count = parser->current_indent + parser->base_indent;
501
- if (space_count > 0)
502
- {
503
- char *old_end, *new_end;
504
- if (parser->tabulation->len < space_count)
505
- str_grow(parser->tabulation, space_count); // reallocates if necessary
506
- old_end = parser->tabulation->ptr + parser->tabulation->len;
507
- new_end = parser->tabulation->ptr + space_count;
508
- while (old_end < new_end)
509
- *old_end++ = ' ';
510
- if (space_count > parser->tabulation->len)
511
- parser->tabulation->len = space_count;
512
- str_append(parser->output, parser->tabulation->ptr, space_count);
513
- }
514
- parser->current_indent += 2;
515
- }
516
-
517
- void wiki_append_pre_start(parser_t *parser, token_t *token)
518
- {
519
- wiki_indent(parser);
520
- if ((size_t)TOKEN_LEN(token) > sizeof(pre_start) - 1)
521
- {
522
- str_append(parser->output, pre_class_start, sizeof(pre_class_start) - 1); // <pre class="
523
- str_append(parser->output, token->start + 11, TOKEN_LEN(token) - 13); // (the "lang" substring)
524
- str_append(parser->output, pre_class_end, sizeof(pre_class_end) - 1); // -syntax">
525
- }
526
- else
527
- str_append(parser->output, pre_start, sizeof(pre_start) - 1);
528
- ary_push(parser->scope, PRE_START);
529
- ary_push(parser->line, PRE_START);
530
- }
531
-
532
- void wiki_dedent(parser_t *parser, bool emit)
533
- {
534
- if (parser->base_indent == -1) // indentation disabled
535
- return;
536
- parser->current_indent -= 2;
537
- if (!emit)
538
- return;
539
- int space_count = parser->current_indent + parser->base_indent;
540
- if (space_count > 0)
541
- str_append(parser->output, parser->tabulation->ptr, space_count);
542
- }
543
-
544
- // Pops a single item off the parser's scope stack.
545
- // A corresponding closing tag is written to the target string.
546
- // The target string may be the main output buffer, or a substring capturing buffer if a link is being scanned.
547
- void wiki_pop_from_stack(parser_t *parser, str_t *target)
548
- {
549
- int top = ary_entry(parser->scope, -1);
550
- if (NO_ITEM(top))
551
- return;
552
- if (!target)
553
- target = parser->output;
554
-
555
- // for headings, take base_heading_level into account
556
- if (top >= H1_START && top <= H6_START)
557
- {
558
- top += parser->base_heading_level;
559
- // no need to check for underflow (base_heading_level is never negative)
560
- if (top > H6_START)
561
- top = H6_START;
562
- }
563
-
564
- switch (top)
565
- {
566
- case PRE:
567
- case PRE_START:
568
- str_append(target, pre_end, sizeof(pre_end) - 1);
569
- str_append_str(target, parser->line_ending);
570
- wiki_dedent(parser, false);
571
- break;
572
-
573
- case BLOCKQUOTE:
574
- case BLOCKQUOTE_START:
575
- wiki_dedent(parser, true);
576
- str_append(target, blockquote_end, sizeof(blockquote_end) - 1);
577
- str_append_str(target, parser->line_ending);
578
- break;
579
-
580
- case NO_WIKI_START:
581
- // not a real HTML tag; so nothing to pop
582
- break;
583
-
584
- case STRONG:
585
- case STRONG_START:
586
- str_append(target, strong_end, sizeof(strong_end) - 1);
587
- break;
588
-
589
- case EM:
590
- case EM_START:
591
- str_append(target, em_end, sizeof(em_end) - 1);
592
- break;
593
-
594
- case TT:
595
- case TT_START:
596
- str_append(target, code_end, sizeof(code_end) - 1);
597
- break;
598
-
599
- case OL:
600
- wiki_dedent(parser, true);
601
- str_append(target, ol_end, sizeof(ol_end) - 1);
602
- str_append_str(target, parser->line_ending);
603
- break;
604
-
605
- case UL:
606
- wiki_dedent(parser, true);
607
- str_append(target, ul_end, sizeof(ul_end) - 1);
608
- str_append_str(target, parser->line_ending);
609
- break;
610
-
611
- case NESTED_LIST:
612
- // next token to pop will be a LI
613
- // LI is an interesting token because sometimes we want it to behave like P (ie. do a non-emitting indent)
614
- // and other times we want it to behave like BLOCKQUOTE (ie. when it has a nested list inside)
615
- // hence this hack: we do an emitting dedent on behalf of the LI that we know must be coming
616
- // and then when we pop the actual LI itself (below) we do the standard non-emitting indent
617
- wiki_dedent(parser, true); // we really only want to emit the spaces
618
- parser->current_indent += 2; // we don't want to decrement the actual indent level, so put it back
619
- break;
620
-
621
- case LI:
622
- str_append(target, li_end, sizeof(li_end) - 1);
623
- str_append_str(target, parser->line_ending);
624
- wiki_dedent(parser, false);
625
- break;
626
-
627
- case H6_START:
628
- str_append(target, h6_end, sizeof(h6_end) - 1);
629
- str_append_str(target, parser->line_ending);
630
- wiki_dedent(parser, false);
631
- break;
632
-
633
- case H5_START:
634
- str_append(target, h5_end, sizeof(h5_end) - 1);
635
- str_append_str(target, parser->line_ending);
636
- wiki_dedent(parser, false);
637
- break;
638
-
639
- case H4_START:
640
- str_append(target, h4_end, sizeof(h4_end) - 1);
641
- str_append_str(target, parser->line_ending);
642
- wiki_dedent(parser, false);
643
- break;
644
-
645
- case H3_START:
646
- str_append(target, h3_end, sizeof(h3_end) - 1);
647
- str_append_str(target, parser->line_ending);
648
- wiki_dedent(parser, false);
649
- break;
650
-
651
- case H2_START:
652
- str_append(target, h2_end, sizeof(h2_end) - 1);
653
- str_append_str(target, parser->line_ending);
654
- wiki_dedent(parser, false);
655
- break;
656
-
657
- case H1_START:
658
- str_append(target, h1_end, sizeof(h1_end) - 1);
659
- str_append_str(target, parser->line_ending);
660
- wiki_dedent(parser, false);
661
- break;
662
-
663
- case LINK_START:
664
- // not an HTML tag; so nothing to emit
665
- break;
666
-
667
- case EXT_LINK_START:
668
- // not an HTML tag; so nothing to emit
669
- break;
670
-
671
- case PATH:
672
- // not an HTML tag; so nothing to emit
673
- break;
674
-
675
- case SPACE:
676
- // not an HTML tag (only used to separate an external link target from the link text); so nothing to emit
677
- break;
678
-
679
- case SEPARATOR:
680
- // not an HTML tag (only used to separate an external link target from the link text); so nothing to emit
681
- break;
682
-
683
- case P:
684
- str_append(target, p_end, sizeof(p_end) - 1);
685
- str_append_str(target, parser->line_ending);
686
- wiki_dedent(parser, false);
687
- break;
688
-
689
- case END_OF_FILE:
690
- // nothing to do
691
- break;
692
-
693
- default:
694
- // should probably raise an exception here
695
- break;
696
- }
697
- ary_pop(parser->scope);
698
- }
699
-
700
- // Pops items off the top of parser's scope stack, accumulating closing tags for them into the target string, until item is reached.
701
- // If including is true then the item itself is also popped.
702
- // The target string may be the main output buffer, or a substring capturing buffer when scanning links.
703
- void wiki_pop_from_stack_up_to(parser_t *parser, str_t *target, int item, bool including)
704
- {
705
- int continue_looping = 1;
706
- do
707
- {
708
- int top = ary_entry(parser->scope, -1);
709
- if (NO_ITEM(top))
710
- return;
711
- if (top == item)
712
- {
713
- if (!including)
714
- return;
715
- continue_looping = 0;
716
- }
717
- wiki_pop_from_stack(parser, target);
718
- } while (continue_looping);
719
- }
720
-
721
- void wiki_pop_all_from_stack(parser_t *parser)
722
- {
723
- for (int i = 0, max = parser->scope->count; i < max; i++)
724
- wiki_pop_from_stack(parser, NULL);
725
- }
726
-
727
- void wiki_start_para_if_necessary(parser_t *parser)
728
- {
729
- if (parser->capture)
730
- return;
731
-
732
- // if no block open yet, or top of stack is BLOCKQUOTE/BLOCKQUOTE_START (with nothing in it yet)
733
- if (parser->scope->count == 0 ||
734
- ary_entry(parser->scope, -1) == BLOCKQUOTE ||
735
- ary_entry(parser->scope, -1) == BLOCKQUOTE_START)
736
- {
737
- wiki_indent(parser);
738
- str_append(parser->output, p_start, sizeof(p_start) - 1);
739
- ary_push(parser->scope, P);
740
- ary_push(parser->line, P);
741
- }
742
- else if (parser->pending_crlf)
743
- {
744
- if (IN(P))
745
- // already in a paragraph block; convert pending CRLF into a space
746
- str_append(parser->output, space, sizeof(space) - 1);
747
- else if (IN(PRE))
748
- // PRE blocks can have pending CRLF too (helps us avoid emitting the trailing newline)
749
- str_append_str(parser->output, parser->line_ending);
750
- }
751
- parser->pending_crlf = false;
752
- }
753
-
754
- void wiki_emit_pending_crlf_if_necessary(parser_t *parser)
755
- {
756
- if (parser->pending_crlf)
757
- {
758
- str_append_str(parser->output, parser->line_ending);
759
- parser->pending_crlf = false;
760
- }
761
- }
762
-
763
- // Helper function that pops any excess elements off scope (pushing is already handled in the respective rules).
764
- // For example, given input like:
765
- //
766
- // > > foo
767
- // bar
768
- //
769
- // Upon seeing "bar", we want to pop two BLOCKQUOTE elements from the scope.
770
- // The reverse case (shown below) is handled from inside the BLOCKQUOTE rule itself:
771
- //
772
- // foo
773
- // > > bar
774
- //
775
- // Things are made slightly more complicated by the fact that there is one block-level tag that can be on the scope
776
- // but not on the line scope:
777
- //
778
- // <blockquote>foo
779
- // bar</blockquote>
780
- //
781
- // Here on seeing "bar" we have one item on the scope (BLOCKQUOTE_START) which we don't want to pop, but we have nothing
782
- // on the line scope.
783
- // Luckily, BLOCKQUOTE_START tokens can only appear at the start of the scope array, so we can check for them first before
784
- // entering the for loop.
785
- void wiki_pop_excess_elements(parser_t *parser)
786
- {
787
- if (parser->capture)
788
- return;
789
- for (int i = parser->scope->count - ary_count(parser->scope, BLOCKQUOTE_START), j = parser->line->count; i > j; i--)
790
- {
791
- // special case for last item on scope
792
- if (i - j == 1)
793
- {
794
- // don't auto-pop P if it is only item on scope
795
- if (ary_entry(parser->scope, -1) == P)
796
- {
797
- // add P to the line scope to prevent us entering the loop at all next time around
798
- ary_push(parser->line, P);
799
- continue;
800
- }
801
- }
802
- wiki_pop_from_stack(parser, NULL);
803
- }
804
- }
805
-
806
- // trim parser->link_text in place
807
- void wiki_trim_link_text(parser_t *parser)
808
- {
809
- char *src = parser->link_text->ptr;
810
- char *start = src; // remember this so we can check if we're at the start
811
- char *left = src;
812
- char *non_space = src; // remember last non-space character output
813
- char *end = src + parser->link_text->len;
814
- while (src < end)
815
- {
816
- if (*src == ' ')
817
- {
818
- if (src == left)
819
- left++;
820
- }
821
- else
822
- non_space = src;
823
- src++;
824
- }
825
- if (left != start || non_space + 1 != end)
826
- {
827
- // TODO: could potentially avoid this memmove by extending the str_t struct with an "offset" or "free" member
828
- parser->link_text->len = (non_space + 1) - left;
829
- memmove(parser->link_text->ptr, left, parser->link_text->len);
830
- }
831
- }
832
-
833
- VALUE Wikitext_parser_sanitize_link_target(VALUE self, VALUE string)
834
- {
835
- str_t *link_target = str_new_from_string(string);
836
- GC_WRAP_STR(link_target, link_target_gc);
837
- str_t *output = str_new();
838
- GC_WRAP_STR(output, output_gc);
839
- wiki_append_sanitized_link_target(link_target, output, true);
840
- return string_from_str(output);
841
- }
842
-
843
- // Encodes the parser link_target member (in-place) according to RFCs 2396 and 2718
844
- //
845
- // Leading and trailing whitespace trimmed. Spaces are converted to
846
- // underscores if the parser space_to_underscore member is true.
847
- static void wiki_encode_link_target(parser_t *parser)
848
- {
849
- char *src = parser->link_target->ptr;
850
- char *start = src; // remember this so we can check if we're at the start
851
- long len = parser->link_target->len;
852
- if (!(len > 0))
853
- return;
854
- char *end = src + len;
855
- long dest_len = len * 2;
856
- char *dest = ALLOC_N(char, dest_len);
857
- char *dest_ptr = dest; // hang on to this so we can pass it to free() later
858
- char *non_space = dest; // remember last non-space character output
859
- static char hex[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
860
- for (; src < end; src++)
861
- {
862
- // worst case: a single character may grow to 3 characters once encoded
863
- if ((dest + 3) > (dest_ptr + dest_len))
864
- {
865
- // outgrowing buffer, must reallocate
866
- char *old_dest = dest;
867
- char *old_dest_ptr = dest_ptr;
868
- dest_len += len;
869
- dest = realloc(dest_ptr, dest_len);
870
- if (dest == NULL)
871
- {
872
- // would have used reallocf, but this has to run on Linux too, not just Darwin
873
- free(dest_ptr);
874
- rb_raise(rb_eNoMemError, "failed to re-allocate temporary storage (memory allocation error)");
875
- }
876
- dest_ptr = dest;
877
- dest = dest_ptr + (old_dest - old_dest_ptr);
878
- non_space = dest_ptr + (non_space - old_dest_ptr);
879
- }
880
-
881
- // pass through unreserved characters
882
- if ((*src >= 'a' && *src <= 'z') ||
883
- (*src >= 'A' && *src <= 'Z') ||
884
- (*src >= '0' && *src <= '9') ||
885
- *src == '-' ||
886
- *src == '_' ||
887
- *src == '.' ||
888
- *src == '~')
889
- {
890
- *dest++ = *src;
891
- non_space = dest;
892
- }
893
- else if (*src == ' ' && src == start)
894
- start++; // we eat leading space
895
- else if (*src == ' ' && parser->space_to_underscore)
896
- *dest++ = '_';
897
- else // everything else gets URL-encoded
898
- {
899
- *dest++ = '%';
900
- *dest++ = hex[(unsigned char)(*src) / 16]; // left
901
- *dest++ = hex[(unsigned char)(*src) % 16]; // right
902
- if (*src != ' ')
903
- non_space = dest;
904
- }
905
- }
906
-
907
- // trim trailing space if necessary
908
- if (non_space > dest_ptr && dest != non_space)
909
- dest_len = non_space - dest_ptr;
910
- else
911
- dest_len = dest - dest_ptr;
912
- str_clear(parser->link_target);
913
- str_append(parser->link_target, dest_ptr, dest_len);
914
- free(dest_ptr);
915
- }
916
-
917
- VALUE Wikitext_parser_encode_link_target(VALUE self, VALUE in)
918
- {
919
- parser_t parser;
920
- parser.space_to_underscore = false;
921
- parser.link_target = str_new_from_string(in);
922
- GC_WRAP_STR(parser.link_target, link_target_gc);
923
- wiki_encode_link_target(&parser);
924
- return string_from_str(parser.link_target);
925
- }
926
-
927
- // returns 1 (true) if supplied string is blank (nil, empty, or all whitespace)
928
- // returns 0 (false) otherwise
929
- bool wiki_blank(str_t *str)
930
- {
931
- if (str->len == 0)
932
- return true;
933
- for (char *ptr = str->ptr,
934
- *end = str->ptr + str->len;
935
- ptr < end; ptr++)
936
- {
937
- if (*ptr != ' ')
938
- return false;
939
- }
940
- return true;
941
- }
942
-
943
- void wiki_rollback_failed_internal_link(parser_t *parser)
944
- {
945
- if (!IN(LINK_START))
946
- return; // nothing to do!
947
- int scope_includes_separator = IN(SEPARATOR);
948
- wiki_pop_from_stack_up_to(parser, NULL, LINK_START, true);
949
- str_append(parser->output, link_start, sizeof(link_start) - 1);
950
- if (parser->link_target->len > 0)
951
- {
952
- wiki_append_sanitized_link_target(parser->link_target, parser->output, false);
953
- if (scope_includes_separator)
954
- {
955
- str_append(parser->output, separator, sizeof(separator) - 1);
956
- if (parser->link_text->len > 0)
957
- str_append_str(parser->output, parser->link_text);
958
- }
959
- }
960
- parser->capture = NULL;
961
- str_clear(parser->link_target);
962
- str_clear(parser->link_text);
963
- }
964
-
965
- void wiki_rollback_failed_external_link(parser_t *parser)
966
- {
967
- if (!IN(EXT_LINK_START))
968
- return; // nothing to do!
969
-
970
- // store a couple of values before popping
971
- int scope_includes_space = IN(SPACE);
972
- VALUE link_class = IN(PATH) ? Qnil : parser->external_link_class;
973
- VALUE link_rel = IN(PATH) ? Qnil : parser->external_link_rel;
974
- wiki_pop_from_stack_up_to(parser, NULL, EXT_LINK_START, true);
975
-
976
- str_append(parser->output, ext_link_start, sizeof(ext_link_start) - 1);
977
- if (parser->link_target->len > 0)
978
- {
979
- wiki_append_hyperlink(parser, Qnil, parser->link_target, NULL, link_class, link_rel, true);
980
- if (scope_includes_space)
981
- {
982
- str_append(parser->output, space, sizeof(space) - 1);
983
- if (parser->link_text->len > 0)
984
- str_append_str(parser->output, parser->link_text);
985
- }
986
- }
987
- parser->capture = NULL;
988
- str_clear(parser->link_target);
989
- str_clear(parser->link_text);
990
- }
991
-
992
- void wiki_rollback_failed_link(parser_t *parser)
993
- {
994
- wiki_rollback_failed_internal_link(parser);
995
- wiki_rollback_failed_external_link(parser);
996
- }
997
-
998
- VALUE Wikitext_parser_initialize(int argc, VALUE *argv, VALUE self)
999
- {
1000
- // process arguments
1001
- VALUE options;
1002
- if (rb_scan_args(argc, argv, "01", &options) == 0) // 0 mandatory arguments, 1 optional argument
1003
- options = Qnil;
1004
-
1005
- // defaults
1006
- VALUE autolink = Qtrue;
1007
- VALUE line_ending = rb_str_new2("\n");
1008
- VALUE external_link_class = rb_str_new2("external");
1009
- VALUE external_link_rel = Qnil;
1010
- VALUE mailto_class = rb_str_new2("mailto");
1011
- VALUE link_proc = Qnil;
1012
- VALUE internal_link_prefix = rb_str_new2("/wiki/");
1013
- VALUE img_prefix = rb_str_new2("/images/");
1014
- VALUE output_style = ID2SYM(rb_intern("html"));
1015
- VALUE space_to_underscore = Qtrue;
1016
- VALUE minimum_fulltext_token_length = INT2NUM(3);
1017
- VALUE base_heading_level = INT2NUM(0);
1018
-
1019
- // process options hash (override defaults)
1020
- if (!NIL_P(options) && TYPE(options) == T_HASH)
1021
- {
1022
- #define OVERRIDE_IF_SET(name) rb_funcall(options, rb_intern("has_key?"), 1, ID2SYM(rb_intern(#name))) == Qtrue ? \
1023
- rb_hash_aref(options, ID2SYM(rb_intern(#name))) : name
1024
- autolink = OVERRIDE_IF_SET(autolink);
1025
- line_ending = OVERRIDE_IF_SET(line_ending);
1026
- external_link_class = OVERRIDE_IF_SET(external_link_class);
1027
- external_link_rel = OVERRIDE_IF_SET(external_link_rel);
1028
- mailto_class = OVERRIDE_IF_SET(mailto_class);
1029
- link_proc = OVERRIDE_IF_SET(link_proc);
1030
- internal_link_prefix = OVERRIDE_IF_SET(internal_link_prefix);
1031
- img_prefix = OVERRIDE_IF_SET(img_prefix);
1032
- output_style = OVERRIDE_IF_SET(output_style);
1033
- space_to_underscore = OVERRIDE_IF_SET(space_to_underscore);
1034
- minimum_fulltext_token_length = OVERRIDE_IF_SET(minimum_fulltext_token_length);
1035
- base_heading_level = OVERRIDE_IF_SET(base_heading_level);
1036
- }
1037
-
1038
- // no need to call super here; rb_call_super()
1039
- rb_iv_set(self, "@autolink", autolink);
1040
- rb_iv_set(self, "@line_ending", line_ending);
1041
- rb_iv_set(self, "@external_link_class", external_link_class);
1042
- rb_iv_set(self, "@external_link_rel", external_link_rel);
1043
- rb_iv_set(self, "@mailto_class", mailto_class);
1044
- rb_iv_set(self, "@link_proc", link_proc);
1045
- rb_iv_set(self, "@internal_link_prefix", internal_link_prefix);
1046
- rb_iv_set(self, "@img_prefix", img_prefix);
1047
- rb_iv_set(self, "@output_style", output_style);
1048
- rb_iv_set(self, "@space_to_underscore", space_to_underscore);
1049
- rb_iv_set(self, "@minimum_fulltext_token_length", minimum_fulltext_token_length);
1050
- rb_iv_set(self, "@base_heading_level", base_heading_level);
1051
- return self;
1052
- }
1053
-
1054
- VALUE Wikitext_parser_profiling_parse(VALUE self, VALUE string)
1055
- {
1056
- for (int i = 0; i < 100000; i++)
1057
- Wikitext_parser_parse(1, &string, self);
1058
- return Qnil;
1059
- }
1060
-
1061
- // convert a Ruby object (:xml, :html etc) into an int output style
1062
- int Wikitext_output_style(VALUE output)
1063
- {
1064
- if (TYPE(output) == T_SYMBOL)
1065
- {
1066
- if (SYM2ID(output) == rb_intern("xml"))
1067
- return XML_OUTPUT;
1068
- }
1069
- return HTML_OUTPUT; // fall back to default
1070
- }
1071
-
1072
- VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self)
1073
- {
1074
- // process arguments
1075
- VALUE string, options;
1076
- if (rb_scan_args(argc, argv, "11", &string, &options) == 1) // 1 mandatory argument, 1 optional argument
1077
- options = Qnil;
1078
- if (NIL_P(string))
1079
- return Qnil;
1080
- string = StringValue(string);
1081
-
1082
- // access these once per parse
1083
- VALUE line_ending = rb_iv_get(self, "@line_ending");
1084
- line_ending = StringValue(line_ending);
1085
- VALUE link_class = rb_iv_get(self, "@external_link_class");
1086
- link_class = NIL_P(link_class) ? Qnil : StringValue(link_class);
1087
- VALUE link_rel = rb_iv_get(self, "@external_link_rel");
1088
- link_rel = NIL_P(link_rel) ? Qnil : StringValue(link_rel);
1089
- VALUE link_proc = rb_iv_get(self, "@link_proc");
1090
- VALUE mailto_class = rb_iv_get(self, "@mailto_class");
1091
- mailto_class = NIL_P(mailto_class) ? Qnil : StringValue(mailto_class);
1092
- VALUE prefix = rb_iv_get(self, "@internal_link_prefix");
1093
- int output_style = Wikitext_output_style(rb_iv_get(self, "@output_style"));
1094
-
1095
- // process options hash
1096
- int base_indent = 0;
1097
- int base_heading_level = NUM2INT(rb_iv_get(self, "@base_heading_level"));
1098
- if (!NIL_P(options) && TYPE(options) == T_HASH)
1099
- {
1100
- // :indent => 0 (or more)
1101
- ID has_key = rb_intern("has_key?");
1102
- ID id = ID2SYM(rb_intern("indent"));
1103
- if (rb_funcall(options, has_key, 1, id) == Qtrue)
1104
- {
1105
- VALUE indent = rb_hash_aref(options, id);
1106
- if (indent == Qfalse)
1107
- base_indent = -1; // indentation disabled
1108
- else
1109
- {
1110
- base_indent = NUM2INT(indent);
1111
- if (base_indent < 0)
1112
- base_indent = 0;
1113
- }
1114
- }
1115
-
1116
- // :base_heading_level => 0/1/2/3/4/5/6
1117
- id = ID2SYM(rb_intern("base_heading_level"));
1118
- if (rb_funcall(options, has_key, 1, id) == Qtrue)
1119
- base_heading_level = NUM2INT(rb_hash_aref(options, id));
1120
-
1121
- // :external_link_rel => 'nofollow'
1122
- id = ID2SYM(rb_intern("external_link_rel"));
1123
- if (rb_funcall(options, has_key, 1, id) == Qtrue)
1124
- {
1125
- link_rel = rb_hash_aref(options, id);
1126
- link_rel = NIL_P(link_rel) ? Qnil : StringValue(link_rel);
1127
- }
1128
-
1129
- // :output_style => :html/:xml
1130
- id = ID2SYM(rb_intern("output_style"));
1131
- if (rb_funcall(options, has_key, 1, id) == Qtrue)
1132
- output_style = Wikitext_output_style(rb_hash_aref(options, id));
1133
-
1134
- // :link_proc => lambda { |link_target| ... }
1135
- id = ID2SYM(rb_intern("link_proc"));
1136
- if (rb_funcall(options, has_key, 1, id) == Qtrue)
1137
- link_proc = rb_hash_aref(options, id);
1138
- }
1139
-
1140
- // normalize, regardless of whether this came from instance variable or override
1141
- if (base_heading_level < 0)
1142
- base_heading_level = 0;
1143
- if (base_heading_level > 6)
1144
- base_heading_level = 6;
1145
-
1146
- // set up scanner
1147
- char *p = RSTRING_PTR(string);
1148
- long len = RSTRING_LEN(string);
1149
- char *pe = p + len;
1150
-
1151
- // set up parser struct to make passing parameters a little easier
1152
- parser_t *parser = parser_new();
1153
- GC_WRAP_PARSER(parser, parser_gc);
1154
- parser->external_link_class = link_class;
1155
- parser->external_link_rel = link_rel;
1156
- parser->mailto_class = mailto_class;
1157
- parser->img_prefix = rb_iv_get(self, "@img_prefix");
1158
- parser->autolink = rb_iv_get(self, "@autolink") == Qtrue ? true : false;
1159
- parser->space_to_underscore = rb_iv_get(self, "@space_to_underscore") == Qtrue ? true : false;
1160
- parser->line_ending = str_new_from_string(line_ending);
1161
- parser->base_indent = base_indent;
1162
- parser->base_heading_level = base_heading_level;
1163
- parser->output_style = output_style;
1164
-
1165
- // this simple looping design leads to a single enormous function,
1166
- // but it's faster than doing actual recursive descent and also secure in the face of
1167
- // malicious input that seeks to overflow the stack
1168
- // (with "<blockquote><blockquote><blockquote>..." times by 10,000, for example)
1169
- // given that we expect to deal with a lot of malformed input, a recursive descent design is less appropriate
1170
- // than a straightforward looping translator like this one anyway
1171
- token_t _token;
1172
- _token.type = NO_TOKEN;
1173
- token_t *token = NULL;
1174
- do
1175
- {
1176
- // note that whenever we grab a token we push it into the line buffer
1177
- // this provides us with context-sensitive "memory" of what's been seen so far on this line
1178
- #define NEXT_TOKEN() token = &_token, next_token(token, token, NULL, pe), ary_push(parser->line_buffer, token->type)
1179
-
1180
- // check to see if we have a token hanging around from a previous iteration of this loop
1181
- if (token == NULL)
1182
- {
1183
- if (_token.type == NO_TOKEN)
1184
- {
1185
- // first time here (haven't started scanning yet)
1186
- token = &_token;
1187
- next_token(token, NULL, p, pe);
1188
- ary_push(parser->line_buffer, token->type);
1189
- }
1190
- else
1191
- // already scanning
1192
- NEXT_TOKEN();
1193
- }
1194
- int type = token->type;
1195
-
1196
- // can't declare new variables inside a switch statement, so predeclare them here
1197
- long remove_strong = -1;
1198
- long remove_em = -1;
1199
-
1200
- // general purpose counters, flags and pointers
1201
- long i = 0;
1202
- long j = 0;
1203
- long k = 0;
1204
- str_t *output = NULL;
1205
- str_t _token_str;
1206
- str_t *token_str = &_token_str;
1207
-
1208
- // The following giant switch statement contains cases for all the possible token types.
1209
- // In the most basic sense we are emitting the HTML that corresponds to each token,
1210
- // but some tokens require context information in order to decide what to output.
1211
- // For example, does the STRONG token (''') translate to <strong> or </strong>?
1212
- // So when looking at any given token we have three state-maintaining variables which gives us a notion of "where we are":
1213
- //
1214
- // - the "scope" stack (indicates what HTML DOM structures we are currently nested inside, similar to a CSS selector)
1215
- // - the line buffer (records tokens seen so far on the current line)
1216
- // - the line "scope" stack (indicates what the scope should be based only on what is visible on the line so far)
1217
- //
1218
- // Although this is fairly complicated, there is one key simplifying factor:
1219
- // The translator continuously performs auto-correction, and this means that we always have a guarantee that the
1220
- // scope stack (up to the current token) is valid; our translator can take this as a given.
1221
- // Auto-correction basically consists of inserting missing tokens (preventing subsquent HTML from being messed up),
1222
- // or converting illegal (unexpected) tokens to their plain text equivalents (providing visual feedback to Wikitext author).
1223
- switch (type)
1224
- {
1225
- case PRE:
1226
- if (IN_EITHER_OF(NO_WIKI_START, PRE_START))
1227
- {
1228
- str_append(parser->output, space, sizeof(space) - 1);
1229
- break;
1230
- }
1231
- else if (IN(BLOCKQUOTE_START))
1232
- {
1233
- // this kind of nesting not allowed (to avoid user confusion)
1234
- wiki_pop_excess_elements(parser);
1235
- wiki_start_para_if_necessary(parser);
1236
- output = parser->capture ? parser->capture : parser->output;
1237
- str_append(output, space, sizeof(space) - 1);
1238
- break;
1239
- }
1240
-
1241
- // count number of BLOCKQUOTE tokens in line buffer and in scope stack
1242
- ary_push(parser->line, PRE);
1243
- i = ary_count(parser->line, BLOCKQUOTE);
1244
- j = ary_count(parser->scope, BLOCKQUOTE);
1245
- if (i < j)
1246
- {
1247
- // must pop (reduce nesting level)
1248
- for (i = j - i; i > 0; i--)
1249
- wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, true);
1250
- }
1251
-
1252
- if (!IN(PRE))
1253
- {
1254
- parser->pending_crlf = false;
1255
- wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, false);
1256
- wiki_indent(parser);
1257
- str_append(parser->output, pre_start, sizeof(pre_start) - 1);
1258
- ary_push(parser->scope, PRE);
1259
- }
1260
- break;
1261
-
1262
- case PRE_START:
1263
- if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
1264
- {
1265
- wiki_emit_pending_crlf_if_necessary(parser);
1266
- str_append(parser->output, escaped_pre_start, sizeof(escaped_pre_start) - 1);
1267
- }
1268
- else if (IN(BLOCKQUOTE_START))
1269
- {
1270
- wiki_rollback_failed_link(parser); // if any
1271
- wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE_START, false);
1272
- wiki_append_pre_start(parser, token);
1273
- }
1274
- else if (IN(BLOCKQUOTE))
1275
- {
1276
- if (token->column_start == 1) // only allowed in first column
1277
- {
1278
- wiki_rollback_failed_link(parser); // if any
1279
- wiki_pop_all_from_stack(parser);
1280
- wiki_append_pre_start(parser, token);
1281
- }
1282
- else // PRE_START illegal here
1283
- {
1284
- output = parser->capture ? parser->capture : parser->output;
1285
- wiki_pop_excess_elements(parser);
1286
- wiki_start_para_if_necessary(parser);
1287
- str_append(output, escaped_pre_start, sizeof(escaped_pre_start) - 1);
1288
- }
1289
- }
1290
- else
1291
- {
1292
- wiki_rollback_failed_link(parser); // if any
1293
- wiki_pop_from_stack_up_to(parser, NULL, P, true);
1294
- wiki_append_pre_start(parser, token);
1295
- }
1296
- break;
1297
-
1298
- case PRE_END:
1299
- if (IN_EITHER_OF(NO_WIKI_START, PRE))
1300
- {
1301
- wiki_emit_pending_crlf_if_necessary(parser);
1302
- str_append(parser->output, escaped_pre_end, sizeof(escaped_pre_end) - 1);
1303
- }
1304
- else
1305
- {
1306
- if (IN(PRE_START))
1307
- wiki_pop_from_stack_up_to(parser, parser->output, PRE_START, true);
1308
- else
1309
- {
1310
- output = parser->capture ? parser->capture : parser->output;
1311
- wiki_pop_excess_elements(parser);
1312
- wiki_start_para_if_necessary(parser);
1313
- str_append(output, escaped_pre_end, sizeof(escaped_pre_end) - 1);
1314
- }
1315
- }
1316
- break;
1317
-
1318
- case BLOCKQUOTE:
1319
- if (IN_EITHER_OF(NO_WIKI_START, PRE_START))
1320
- // no need to check for <pre>; can never appear inside it
1321
- str_append(parser->output, escaped_blockquote, TOKEN_LEN(token) + 3); // will either emit "&gt;" or "&gt; "
1322
- else if (IN(BLOCKQUOTE_START))
1323
- {
1324
- // this kind of nesting not allowed (to avoid user confusion)
1325
- wiki_pop_excess_elements(parser);
1326
- wiki_start_para_if_necessary(parser);
1327
- output = parser->capture ? parser->capture : parser->output;
1328
- str_append(output, escaped_blockquote, TOKEN_LEN(token) + 3); // will either emit "&gt;" or "&gt; "
1329
- break;
1330
- }
1331
- else
1332
- {
1333
- ary_push(parser->line, BLOCKQUOTE);
1334
-
1335
- // count number of BLOCKQUOTE tokens in line buffer and in scope stack
1336
- i = ary_count(parser->line, BLOCKQUOTE);
1337
- j = ary_count(parser->scope, BLOCKQUOTE);
1338
-
1339
- // given that BLOCKQUOTE tokens can be nested, peek ahead and see if there are any more which might affect the decision to push or pop
1340
- while (NEXT_TOKEN(), (token->type == BLOCKQUOTE))
1341
- {
1342
- ary_push(parser->line, BLOCKQUOTE);
1343
- i++;
1344
- }
1345
-
1346
- // now decide whether to push, pop or do nothing
1347
- if (i > j)
1348
- {
1349
- // must push (increase nesting level)
1350
- wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, false);
1351
- for (i = i - j; i > 0; i--)
1352
- {
1353
- wiki_indent(parser);
1354
- str_append(parser->output, blockquote_start, sizeof(blockquote_start) - 1);
1355
- str_append_str(parser->output, parser->line_ending);
1356
- ary_push(parser->scope, BLOCKQUOTE);
1357
- }
1358
- }
1359
- else if (i < j)
1360
- {
1361
- // must pop (reduce nesting level)
1362
- for (i = j - i; i > 0; i--)
1363
- wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, true);
1364
- }
1365
-
1366
- // jump to top of the loop to process token we scanned during lookahead
1367
- continue;
1368
- }
1369
- break;
1370
-
1371
- case BLOCKQUOTE_START:
1372
- if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
1373
- {
1374
- wiki_emit_pending_crlf_if_necessary(parser);
1375
- str_append(parser->output, escaped_blockquote_start, sizeof(escaped_blockquote_start) - 1);
1376
- }
1377
- else if (IN(BLOCKQUOTE_START))
1378
- {
1379
- // nesting is fine here
1380
- wiki_rollback_failed_link(parser); // if any
1381
- wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE_START, false);
1382
- wiki_indent(parser);
1383
- str_append(parser->output, blockquote_start, sizeof(blockquote_start) - 1);
1384
- str_append_str(parser->output, parser->line_ending);
1385
- ary_push(parser->scope, BLOCKQUOTE_START);
1386
- ary_push(parser->line, BLOCKQUOTE_START);
1387
- }
1388
- else if (IN(BLOCKQUOTE))
1389
- {
1390
- if (token->column_start == 1) // only allowed in first column
1391
- {
1392
- wiki_rollback_failed_link(parser); // if any
1393
- wiki_pop_all_from_stack(parser);
1394
- wiki_indent(parser);
1395
- str_append(parser->output, blockquote_start, sizeof(blockquote_start) - 1);
1396
- str_append_str(parser->output, parser->line_ending);
1397
- ary_push(parser->scope, BLOCKQUOTE_START);
1398
- ary_push(parser->line, BLOCKQUOTE_START);
1399
- }
1400
- else // BLOCKQUOTE_START illegal here
1401
- {
1402
- output = parser->capture ? parser->capture : parser->output;
1403
- wiki_pop_excess_elements(parser);
1404
- wiki_start_para_if_necessary(parser);
1405
- str_append(output, escaped_blockquote_start, sizeof(escaped_blockquote_start) - 1);
1406
- }
1407
- }
1408
- else
1409
- {
1410
- // would be nice to eliminate the repetition here but it's probably the clearest way
1411
- wiki_rollback_failed_link(parser); // if any
1412
- wiki_pop_from_stack_up_to(parser, NULL, P, true);
1413
- wiki_indent(parser);
1414
- str_append(parser->output, blockquote_start, sizeof(blockquote_start) - 1);
1415
- str_append_str(parser->output, parser->line_ending);
1416
- ary_push(parser->scope, BLOCKQUOTE_START);
1417
- ary_push(parser->line, BLOCKQUOTE_START);
1418
- }
1419
- break;
1420
-
1421
- case BLOCKQUOTE_END:
1422
- if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
1423
- {
1424
- wiki_emit_pending_crlf_if_necessary(parser);
1425
- str_append(parser->output, escaped_blockquote_end, sizeof(escaped_blockquote_end) - 1);
1426
- }
1427
- else
1428
- {
1429
- if (IN(BLOCKQUOTE_START))
1430
- wiki_pop_from_stack_up_to(parser, parser->output, BLOCKQUOTE_START, true);
1431
- else
1432
- {
1433
- output = parser->capture ? parser->capture : parser->output;
1434
- wiki_pop_excess_elements(parser);
1435
- wiki_start_para_if_necessary(parser);
1436
- str_append(output, escaped_blockquote_end, sizeof(escaped_blockquote_end) - 1);
1437
- }
1438
- }
1439
- break;
1440
-
1441
- case NO_WIKI_START:
1442
- if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
1443
- {
1444
- wiki_emit_pending_crlf_if_necessary(parser);
1445
- str_append(parser->output, escaped_no_wiki_start, sizeof(escaped_no_wiki_start) - 1);
1446
- }
1447
- else
1448
- {
1449
- wiki_pop_excess_elements(parser);
1450
- wiki_start_para_if_necessary(parser);
1451
- ary_push(parser->scope, NO_WIKI_START);
1452
- ary_push(parser->line, NO_WIKI_START);
1453
- }
1454
- break;
1455
-
1456
- case NO_WIKI_END:
1457
- if (IN(NO_WIKI_START))
1458
- // <nowiki> should always only ever be the last item in the stack, but use the helper routine just in case
1459
- wiki_pop_from_stack_up_to(parser, NULL, NO_WIKI_START, true);
1460
- else
1461
- {
1462
- wiki_pop_excess_elements(parser);
1463
- wiki_start_para_if_necessary(parser);
1464
- str_append(parser->output, escaped_no_wiki_end, sizeof(escaped_no_wiki_end) - 1);
1465
- }
1466
- break;
1467
-
1468
- case STRONG_EM:
1469
- if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
1470
- {
1471
- wiki_emit_pending_crlf_if_necessary(parser);
1472
- str_append(parser->output, literal_strong_em, sizeof(literal_strong_em) - 1);
1473
- break;
1474
- }
1475
-
1476
- output = parser->capture ? parser->capture : parser->output;
1477
- wiki_pop_excess_elements(parser);
1478
-
1479
- // if you've seen STRONG/STRONG_START or EM/EM_START, must close them in the reverse order that you saw them!
1480
- // otherwise, must open them
1481
- remove_strong = -1;
1482
- remove_em = -1;
1483
- j = parser->scope->count;
1484
- for (j = j - 1; j >= 0; j--)
1485
- {
1486
- int val = ary_entry(parser->scope, (int)j);
1487
- if (val == STRONG || val == STRONG_START)
1488
- {
1489
- str_append(output, strong_end, sizeof(strong_end) - 1);
1490
- remove_strong = j;
1491
- }
1492
- else if (val == EM || val == EM_START)
1493
- {
1494
- str_append(output, em_end, sizeof(em_end) - 1);
1495
- remove_em = j;
1496
- }
1497
- }
1498
-
1499
- if (remove_strong > remove_em) // must remove strong first
1500
- {
1501
- ary_pop(parser->scope);
1502
- if (remove_em > -1)
1503
- ary_pop(parser->scope);
1504
- else // there was no em to remove!, so consider this an opening em tag
1505
- {
1506
- str_append(output, em_start, sizeof(em_start) - 1);
1507
- ary_push(parser->scope, EM);
1508
- ary_push(parser->line, EM);
1509
- }
1510
- }
1511
- else if (remove_em > remove_strong) // must remove em first
1512
- {
1513
- ary_pop(parser->scope);
1514
- if (remove_strong > -1)
1515
- ary_pop(parser->scope);
1516
- else // there was no strong to remove!, so consider this an opening strong tag
1517
- {
1518
- str_append(output, strong_start, sizeof(strong_start) - 1);
1519
- ary_push(parser->scope, STRONG);
1520
- ary_push(parser->line, STRONG);
1521
- }
1522
- }
1523
- else // no strong or em to remove, so this must be a new opening of both
1524
- {
1525
- wiki_start_para_if_necessary(parser);
1526
- str_append(output, strong_em_start, sizeof(strong_em_start) - 1);
1527
- ary_push(parser->scope, STRONG);
1528
- ary_push(parser->line, STRONG);
1529
- ary_push(parser->scope, EM);
1530
- ary_push(parser->line, EM);
1531
- }
1532
- break;
1533
-
1534
- case STRONG:
1535
- if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
1536
- {
1537
- wiki_emit_pending_crlf_if_necessary(parser);
1538
- str_append(parser->output, literal_strong, sizeof(literal_strong) - 1);
1539
- }
1540
- else
1541
- {
1542
- output = parser->capture ? parser->capture : parser->output;
1543
- if (IN(STRONG_START))
1544
- // already in span started with <strong>, no choice but to emit this literally
1545
- str_append(output, literal_strong, sizeof(literal_strong) - 1);
1546
- else if (IN(STRONG))
1547
- // STRONG already seen, this is a closing tag
1548
- wiki_pop_from_stack_up_to(parser, output, STRONG, true);
1549
- else
1550
- {
1551
- // this is a new opening
1552
- wiki_pop_excess_elements(parser);
1553
- wiki_start_para_if_necessary(parser);
1554
- str_append(output, strong_start, sizeof(strong_start) - 1);
1555
- ary_push(parser->scope, STRONG);
1556
- ary_push(parser->line, STRONG);
1557
- }
1558
- }
1559
- break;
1560
-
1561
- case STRONG_START:
1562
- if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
1563
- {
1564
- wiki_emit_pending_crlf_if_necessary(parser);
1565
- str_append(parser->output, escaped_strong_start, sizeof(escaped_strong_start) - 1);
1566
- }
1567
- else
1568
- {
1569
- output = parser->capture ? parser->capture : parser->output;
1570
- if (IN_EITHER_OF(STRONG_START, STRONG))
1571
- str_append(output, escaped_strong_start, sizeof(escaped_strong_start) - 1);
1572
- else
1573
- {
1574
- wiki_pop_excess_elements(parser);
1575
- wiki_start_para_if_necessary(parser);
1576
- str_append(output, strong_start, sizeof(strong_start) - 1);
1577
- ary_push(parser->scope, STRONG_START);
1578
- ary_push(parser->line, STRONG_START);
1579
- }
1580
- }
1581
- break;
1582
-
1583
- case STRONG_END:
1584
- if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
1585
- {
1586
- wiki_emit_pending_crlf_if_necessary(parser);
1587
- str_append(parser->output, escaped_strong_end, sizeof(escaped_strong_end) - 1);
1588
- }
1589
- else
1590
- {
1591
- output = parser->capture ? parser->capture : parser->output;
1592
- if (IN(STRONG_START))
1593
- wiki_pop_from_stack_up_to(parser, output, STRONG_START, true);
1594
- else
1595
- {
1596
- // no STRONG_START in scope, so must interpret the STRONG_END without any special meaning
1597
- wiki_pop_excess_elements(parser);
1598
- wiki_start_para_if_necessary(parser);
1599
- str_append(output, escaped_strong_end, sizeof(escaped_strong_end) - 1);
1600
- }
1601
- }
1602
- break;
1603
-
1604
- case EM:
1605
- if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
1606
- {
1607
- wiki_emit_pending_crlf_if_necessary(parser);
1608
- str_append(parser->output, literal_em, sizeof(literal_em) - 1);
1609
- }
1610
- else
1611
- {
1612
- output = parser->capture ? parser->capture : parser->output;
1613
- if (IN(EM_START))
1614
- // already in span started with <em>, no choice but to emit this literally
1615
- str_append(output, literal_em, sizeof(literal_em) - 1);
1616
- else if (IN(EM))
1617
- // EM already seen, this is a closing tag
1618
- wiki_pop_from_stack_up_to(parser, output, EM, true);
1619
- else
1620
- {
1621
- // this is a new opening
1622
- wiki_pop_excess_elements(parser);
1623
- wiki_start_para_if_necessary(parser);
1624
- str_append(output, em_start, sizeof(em_start) - 1);
1625
- ary_push(parser->scope, EM);
1626
- ary_push(parser->line, EM);
1627
- }
1628
- }
1629
- break;
1630
-
1631
- case EM_START:
1632
- if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
1633
- {
1634
- wiki_emit_pending_crlf_if_necessary(parser);
1635
- str_append(parser->output, escaped_em_start, sizeof(escaped_em_start) - 1);
1636
- }
1637
- else
1638
- {
1639
- output = parser->capture ? parser->capture : parser->output;
1640
- if (IN_EITHER_OF(EM_START, EM))
1641
- str_append(output, escaped_em_start, sizeof(escaped_em_start) - 1);
1642
- else
1643
- {
1644
- wiki_pop_excess_elements(parser);
1645
- wiki_start_para_if_necessary(parser);
1646
- str_append(output, em_start, sizeof(em_start) - 1);
1647
- ary_push(parser->scope, EM_START);
1648
- ary_push(parser->line, EM_START);
1649
- }
1650
- }
1651
- break;
1652
-
1653
- case EM_END:
1654
- if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
1655
- {
1656
- wiki_emit_pending_crlf_if_necessary(parser);
1657
- str_append(parser->output, escaped_em_end, sizeof(escaped_em_end) - 1);
1658
- }
1659
- else
1660
- {
1661
- output = parser->capture ? parser->capture : parser->output;
1662
- if (IN(EM_START))
1663
- wiki_pop_from_stack_up_to(parser, output, EM_START, true);
1664
- else
1665
- {
1666
- // no EM_START in scope, so must interpret the EM_END without any special meaning
1667
- wiki_pop_excess_elements(parser);
1668
- wiki_start_para_if_necessary(parser);
1669
- str_append(output, escaped_em_end, sizeof(escaped_em_end) - 1);
1670
- }
1671
- }
1672
- break;
1673
-
1674
- case TT:
1675
- if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
1676
- {
1677
- wiki_emit_pending_crlf_if_necessary(parser);
1678
- str_append(parser->output, backtick, sizeof(backtick) - 1);
1679
- }
1680
- else
1681
- {
1682
- output = parser->capture ? parser->capture : parser->output;
1683
- if (IN(TT_START))
1684
- // already in span started with <tt>, no choice but to emit this literally
1685
- str_append(output, backtick, sizeof(backtick) - 1);
1686
- else if (IN(TT))
1687
- // TT (`) already seen, this is a closing tag
1688
- wiki_pop_from_stack_up_to(parser, output, TT, true);
1689
- else
1690
- {
1691
- // this is a new opening
1692
- wiki_pop_excess_elements(parser);
1693
- wiki_start_para_if_necessary(parser);
1694
- str_append(output, code_start, sizeof(code_start) - 1);
1695
- ary_push(parser->scope, TT);
1696
- ary_push(parser->line, TT);
1697
- }
1698
- }
1699
- break;
1700
-
1701
- case TT_START:
1702
- if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
1703
- {
1704
- wiki_emit_pending_crlf_if_necessary(parser);
1705
- str_append(parser->output, escaped_tt_start, sizeof(escaped_tt_start) - 1);
1706
- }
1707
- else
1708
- {
1709
- output = parser->capture ? parser->capture : parser->output;
1710
- if (IN_EITHER_OF(TT_START, TT))
1711
- str_append(output, escaped_tt_start, sizeof(escaped_tt_start) - 1);
1712
- else
1713
- {
1714
- wiki_pop_excess_elements(parser);
1715
- wiki_start_para_if_necessary(parser);
1716
- str_append(output, code_start, sizeof(code_start) - 1);
1717
- ary_push(parser->scope, TT_START);
1718
- ary_push(parser->line, TT_START);
1719
- }
1720
- }
1721
- break;
1722
-
1723
- case TT_END:
1724
- if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
1725
- {
1726
- wiki_emit_pending_crlf_if_necessary(parser);
1727
- str_append(parser->output, escaped_tt_end, sizeof(escaped_tt_end) - 1);
1728
- }
1729
- else
1730
- {
1731
- output = parser->capture ? parser->capture : parser->output;
1732
- if (IN(TT_START))
1733
- wiki_pop_from_stack_up_to(parser, output, TT_START, true);
1734
- else
1735
- {
1736
- // no TT_START in scope, so must interpret the TT_END without any special meaning
1737
- wiki_pop_excess_elements(parser);
1738
- wiki_start_para_if_necessary(parser);
1739
- str_append(output, escaped_tt_end, sizeof(escaped_tt_end) - 1);
1740
- }
1741
- }
1742
- break;
1743
-
1744
- case OL:
1745
- case UL:
1746
- if (IN_EITHER_OF(NO_WIKI_START, PRE_START))
1747
- {
1748
- // no need to check for PRE; can never appear inside it
1749
- str_append(parser->output, token->start, TOKEN_LEN(token));
1750
- break;
1751
- }
1752
-
1753
- // count number of tokens in line and scope stacks
1754
- int bq_count = ary_count(parser->scope, BLOCKQUOTE_START);
1755
- i = parser->line->count - ary_count(parser->line, BLOCKQUOTE_START);
1756
- j = parser->scope->count - bq_count;
1757
- k = i;
1758
-
1759
- // list tokens can be nested so look ahead for any more which might affect the decision to push or pop
1760
- for (;;)
1761
- {
1762
- type = token->type;
1763
- if (type == OL || type == UL)
1764
- {
1765
- token = NULL;
1766
- if (i - k >= 2) // already seen at least one OL or UL
1767
- {
1768
- ary_push(parser->line, NESTED_LIST); // which means this is a nested list
1769
- i += 3;
1770
- }
1771
- else
1772
- i += 2;
1773
- ary_push(parser->line, type);
1774
- ary_push(parser->line, LI);
1775
-
1776
- // want to compare line with scope but can only do so if scope has enough items on it
1777
- if (j >= i)
1778
- {
1779
- if (ary_entry(parser->scope, (int)(i + bq_count - 2)) == type &&
1780
- ary_entry(parser->scope, (int)(i + bq_count - 1)) == LI)
1781
- {
1782
- // line and scope match at this point: do nothing yet
1783
- }
1784
- else
1785
- {
1786
- // item just pushed onto line does not match corresponding slot of scope!
1787
- for (; j >= i - 2; j--)
1788
- // must pop back before emitting
1789
- wiki_pop_from_stack(parser, NULL);
1790
-
1791
- // will emit UL or OL, then LI
1792
- break;
1793
- }
1794
- }
1795
- else // line stack size now exceeds scope stack size: must increase nesting level
1796
- break; // will emit UL or OL, then LI
1797
- }
1798
- else
1799
- {
1800
- // not a OL or UL token!
1801
- if (j == i)
1802
- // must close existing LI and re-open new one
1803
- wiki_pop_from_stack(parser, NULL);
1804
- else if (j > i)
1805
- {
1806
- // item just pushed onto line does not match corresponding slot of scope!
1807
- for (; j >= i; j--)
1808
- // must pop back before emitting
1809
- wiki_pop_from_stack(parser, NULL);
1810
- }
1811
- break;
1812
- }
1813
- NEXT_TOKEN();
1814
- }
1815
-
1816
- // will emit
1817
- if (type == OL || type == UL)
1818
- {
1819
- // if LI is at the top of a stack this is the start of a nested list
1820
- if (j > 0 && ary_entry(parser->scope, -1) == LI)
1821
- {
1822
- // so we should precede it with a CRLF, and indicate that it's a nested list
1823
- str_append(parser->output, parser->line_ending->ptr, parser->line_ending->len);
1824
- ary_push(parser->scope, NESTED_LIST);
1825
- }
1826
- else
1827
- {
1828
- // this is a new list
1829
- if (IN(BLOCKQUOTE_START))
1830
- wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE_START, false);
1831
- else
1832
- wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, false);
1833
- }
1834
-
1835
- // emit
1836
- wiki_indent(parser);
1837
- if (type == OL)
1838
- str_append(parser->output, ol_start, sizeof(ol_start) - 1);
1839
- else if (type == UL)
1840
- str_append(parser->output, ul_start, sizeof(ul_start) - 1);
1841
- ary_push(parser->scope, type);
1842
- str_append(parser->output, parser->line_ending->ptr, parser->line_ending->len);
1843
- }
1844
- else if (type == SPACE)
1845
- // silently throw away the optional SPACE token after final list marker
1846
- token = NULL;
1847
-
1848
- wiki_indent(parser);
1849
- str_append(parser->output, li_start, sizeof(li_start) - 1);
1850
- ary_push(parser->scope, LI);
1851
-
1852
- // any subsequent UL or OL tokens on this line are syntax errors and must be emitted literally
1853
- if (type == OL || type == UL)
1854
- {
1855
- k = 0;
1856
- while (k++, NEXT_TOKEN(), (type = token->type))
1857
- {
1858
- if (type == OL || type == UL)
1859
- str_append(parser->output, token->start, TOKEN_LEN(token));
1860
- else if (type == SPACE && k == 1)
1861
- {
1862
- // silently throw away the optional SPACE token after final list marker
1863
- token = NULL;
1864
- break;
1865
- }
1866
- else
1867
- break;
1868
- }
1869
- }
1870
-
1871
- // jump to top of the loop to process token we scanned during lookahead
1872
- continue;
1873
-
1874
- case H6_START:
1875
- case H5_START:
1876
- case H4_START:
1877
- case H3_START:
1878
- case H2_START:
1879
- case H1_START:
1880
- if (IN_EITHER_OF(NO_WIKI_START, PRE_START))
1881
- {
1882
- // no need to check for PRE; can never appear inside it
1883
- str_append(parser->output, token->start, TOKEN_LEN(token));
1884
- break;
1885
- }
1886
-
1887
- // pop up to but not including the last BLOCKQUOTE on the scope stack
1888
- if (IN(BLOCKQUOTE_START))
1889
- wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE_START, false);
1890
- else
1891
- wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, false);
1892
-
1893
- // count number of BLOCKQUOTE tokens in line buffer and in scope stack
1894
- ary_push(parser->line, type);
1895
- i = ary_count(parser->line, BLOCKQUOTE);
1896
- j = ary_count(parser->scope, BLOCKQUOTE);
1897
-
1898
- // decide whether we need to pop off excess BLOCKQUOTE tokens (will never need to push; that is handled above in the BLOCKQUOTE case itself)
1899
- if (i < j)
1900
- {
1901
- // must pop (reduce nesting level)
1902
- for (i = j - i; i > 0; i--)
1903
- wiki_pop_from_stack_up_to(parser, NULL, BLOCKQUOTE, true);
1904
- }
1905
-
1906
- // discard any whitespace here (so that "== foo ==" will be translated to "<h2>foo</h2>" rather than "<h2> foo </h2")
1907
- while (NEXT_TOKEN(), (token->type == SPACE))
1908
- ; // discard
1909
-
1910
- ary_push(parser->scope, type);
1911
- wiki_indent(parser);
1912
-
1913
- // take base_heading_level into account
1914
- type += base_heading_level;
1915
- if (type > H6_START) // no need to check for underflow (base_heading_level never negative)
1916
- type = H6_START;
1917
-
1918
- // rather than repeat all that code for each kind of heading, share it and use a conditional here
1919
- if (type == H6_START)
1920
- str_append(parser->output, h6_start, sizeof(h6_start) - 1);
1921
- else if (type == H5_START)
1922
- str_append(parser->output, h5_start, sizeof(h5_start) - 1);
1923
- else if (type == H4_START)
1924
- str_append(parser->output, h4_start, sizeof(h4_start) - 1);
1925
- else if (type == H3_START)
1926
- str_append(parser->output, h3_start, sizeof(h3_start) - 1);
1927
- else if (type == H2_START)
1928
- str_append(parser->output, h2_start, sizeof(h2_start) - 1);
1929
- else if (type == H1_START)
1930
- str_append(parser->output, h1_start, sizeof(h1_start) - 1);
1931
-
1932
- // jump to top of the loop to process token we scanned during lookahead
1933
- continue;
1934
-
1935
- case H6_END:
1936
- case H5_END:
1937
- case H4_END:
1938
- case H3_END:
1939
- case H2_END:
1940
- case H1_END:
1941
- if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
1942
- {
1943
- wiki_emit_pending_crlf_if_necessary(parser);
1944
- str_append(parser->output, token->start, TOKEN_LEN(token));
1945
- }
1946
- else
1947
- {
1948
- wiki_rollback_failed_external_link(parser); // if any
1949
- if ((type == H6_END && !IN(H6_START)) ||
1950
- (type == H5_END && !IN(H5_START)) ||
1951
- (type == H4_END && !IN(H4_START)) ||
1952
- (type == H3_END && !IN(H3_START)) ||
1953
- (type == H2_END && !IN(H2_START)) ||
1954
- (type == H1_END && !IN(H1_START)))
1955
- {
1956
- // literal output only if not in appropriate scope (we stay silent in that case)
1957
- wiki_start_para_if_necessary(parser);
1958
- str_append(parser->output, token->start, TOKEN_LEN(token));
1959
- }
1960
- }
1961
- break;
1962
-
1963
- case MAIL:
1964
- if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
1965
- {
1966
- wiki_emit_pending_crlf_if_necessary(parser);
1967
- str_append(parser->output, token->start, TOKEN_LEN(token));
1968
- }
1969
- else if (IN(EXT_LINK_START))
1970
- // must be capturing and this must be part of the link text
1971
- str_append(parser->capture, token->start, TOKEN_LEN(token));
1972
- else
1973
- {
1974
- wiki_pop_excess_elements(parser);
1975
- wiki_start_para_if_necessary(parser);
1976
- token_str->ptr = token->start;
1977
- token_str->len = TOKEN_LEN(token);
1978
- wiki_append_hyperlink(parser, rb_str_new2("mailto:"), token_str, NULL, mailto_class, Qnil, true);
1979
- }
1980
- break;
1981
-
1982
- case URI:
1983
- if (IN(NO_WIKI_START))
1984
- {
1985
- // user can temporarily suppress autolinking by using <nowiki></nowiki>
1986
- // note that unlike MediaWiki, we do allow autolinking inside PRE blocks
1987
- token_str->ptr = token->start;
1988
- token_str->len = TOKEN_LEN(token);
1989
- wiki_append_sanitized_link_target(token_str, parser->output, false);
1990
- }
1991
- else if (IN(LINK_START))
1992
- {
1993
- // if the URI were allowed it would have been handled already in LINK_START
1994
- wiki_rollback_failed_internal_link(parser);
1995
- token_str->ptr = token->start;
1996
- token_str->len = TOKEN_LEN(token);
1997
- wiki_append_hyperlink(parser, Qnil, token_str, NULL, parser->external_link_class, parser->external_link_rel, true);
1998
- }
1999
- else if (IN(EXT_LINK_START))
2000
- {
2001
- if (parser->link_target->len == 0)
2002
- {
2003
- // this must be our link target: look ahead to make sure we see the space we're expecting to see
2004
- token_str->ptr = token->start;
2005
- token_str->len = TOKEN_LEN(token);
2006
- NEXT_TOKEN();
2007
- if (token->type == SPACE)
2008
- {
2009
- ary_push(parser->scope, SPACE);
2010
- str_append_str(parser->link_target, token_str);
2011
- str_clear(parser->link_text);
2012
- parser->capture = parser->link_text;
2013
- token = NULL; // silently consume space
2014
- }
2015
- else
2016
- {
2017
- // didn't see the space! this must be an error
2018
- wiki_pop_from_stack(parser, NULL);
2019
- wiki_pop_excess_elements(parser);
2020
- wiki_start_para_if_necessary(parser);
2021
- str_append(parser->output, ext_link_start, sizeof(ext_link_start) - 1);
2022
- wiki_append_hyperlink(parser, Qnil, token_str, NULL, parser->external_link_class, parser->external_link_rel, true);
2023
- continue;
2024
- }
2025
- }
2026
- else
2027
- {
2028
- token_str->ptr = token->start;
2029
- token_str->len = TOKEN_LEN(token);
2030
- wiki_append_sanitized_link_target(token_str, parser->link_text, false);
2031
- }
2032
- }
2033
- else
2034
- {
2035
- wiki_pop_excess_elements(parser);
2036
- wiki_start_para_if_necessary(parser);
2037
- token_str->ptr = token->start;
2038
- token_str->len = TOKEN_LEN(token);
2039
- wiki_append_hyperlink(parser, Qnil, token_str, NULL, parser->external_link_class, parser->external_link_rel, true);
2040
- }
2041
- break;
2042
-
2043
- case PATH:
2044
- if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
2045
- {
2046
- wiki_emit_pending_crlf_if_necessary(parser);
2047
- str_append(parser->output, token->start, TOKEN_LEN(token));
2048
- }
2049
- else if (IN(EXT_LINK_START))
2050
- {
2051
- if (parser->link_target->len == 0)
2052
- {
2053
- // this must be our link target: look ahead to make sure we see the space we're expecting to see
2054
- token_str->ptr = token->start;
2055
- token_str->len = TOKEN_LEN(token);
2056
- NEXT_TOKEN();
2057
- if (token->type == SPACE)
2058
- {
2059
- ary_push(parser->scope, PATH);
2060
- ary_push(parser->scope, SPACE);
2061
- str_append_str(parser->link_target, token_str);
2062
- str_clear(parser->link_text);
2063
- parser->capture = parser->link_text;
2064
- token = NULL; // silently consume space
2065
- }
2066
- else
2067
- {
2068
- // didn't see the space! this must be an error
2069
- wiki_pop_from_stack(parser, NULL);
2070
- wiki_pop_excess_elements(parser);
2071
- wiki_start_para_if_necessary(parser);
2072
- str_append(parser->output, ext_link_start, sizeof(ext_link_start) - 1);
2073
- str_append_str(parser->output, token_str);
2074
- continue;
2075
- }
2076
- }
2077
- else
2078
- str_append(parser->link_text, token->start, TOKEN_LEN(token));
2079
- }
2080
- else
2081
- {
2082
- output = parser->capture ? parser->capture : parser->output;
2083
- wiki_pop_excess_elements(parser);
2084
- wiki_start_para_if_necessary(parser);
2085
- str_append(output, token->start, TOKEN_LEN(token));
2086
- }
2087
- break;
2088
-
2089
- // internal links (links to other wiki articles) look like this:
2090
- // [[another article]] (would point at, for example, "/wiki/another_article")
2091
- // [[the other article|the link text we'll use for it]]
2092
- // [[the other article | the link text we'll use for it]]
2093
- // MediaWiki has strict requirements about what it will accept as a link target:
2094
- // all wikitext markup is disallowed:
2095
- // example [[foo ''bar'' baz]]
2096
- // renders [[foo <em>bar</em> baz]] (ie. not a link)
2097
- // example [[foo <em>bar</em> baz]]
2098
- // renders [[foo <em>bar</em> baz]] (ie. not a link)
2099
- // example [[foo <nowiki>''</nowiki> baz]]
2100
- // renders [[foo '' baz]] (ie. not a link)
2101
- // example [[foo <bar> baz]]
2102
- // renders [[foo &lt;bar&gt; baz]] (ie. not a link)
2103
- // HTML entities and non-ASCII, however, make it through:
2104
- // example [[foo &euro;]]
2105
- // renders <a href="/wiki/Foo_%E2%82%AC">foo &euro;</a>
2106
- // example [[foo €]]
2107
- // renders <a href="/wiki/Foo_%E2%82%AC">foo €</a>
2108
- // we'll impose similar restrictions here for the link target; allowed tokens will be:
2109
- // SPACE, SPECIAL_URI_CHARS, PRINTABLE, PATH, ALNUM, DEFAULT, QUOT and AMP
2110
- // everything else will be rejected
2111
- case LINK_START:
2112
- output = parser->capture ? parser->capture : parser->output;
2113
- if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
2114
- {
2115
- wiki_emit_pending_crlf_if_necessary(parser);
2116
- str_append(output, link_start, sizeof(link_start) - 1);
2117
- }
2118
- else if (IN(EXT_LINK_START))
2119
- // already in external link scope! (and in fact, must be capturing link_text right now)
2120
- str_append(output, link_start, sizeof(link_start) - 1);
2121
- else if (IN(LINK_START))
2122
- {
2123
- // already in internal link scope! this is a syntax error
2124
- wiki_rollback_failed_internal_link(parser);
2125
- str_append(parser->output, link_start, sizeof(link_start) - 1);
2126
- }
2127
- else if (IN(SEPARATOR))
2128
- {
2129
- // scanning internal link text
2130
- }
2131
- else // not in internal link scope yet
2132
- {
2133
- // will either emit a link, or the rollback of a failed link, so start the para now
2134
- wiki_pop_excess_elements(parser);
2135
- wiki_start_para_if_necessary(parser);
2136
- ary_push(parser->scope, LINK_START);
2137
-
2138
- // look ahead and try to gobble up link target
2139
- while (NEXT_TOKEN(), (type = token->type))
2140
- {
2141
- if (type == SPACE ||
2142
- type == SPECIAL_URI_CHARS ||
2143
- type == PATH ||
2144
- type == PRINTABLE ||
2145
- type == ALNUM ||
2146
- type == DEFAULT ||
2147
- type == QUOT ||
2148
- type == QUOT_ENTITY ||
2149
- type == AMP ||
2150
- type == AMP_ENTITY ||
2151
- type == IMG_START ||
2152
- type == IMG_END ||
2153
- type == LEFT_CURLY ||
2154
- type == RIGHT_CURLY)
2155
- {
2156
- // accumulate these tokens into link_target
2157
- if (parser->link_target->len == 0)
2158
- {
2159
- str_clear(parser->link_target);
2160
- parser->capture = parser->link_target;
2161
- }
2162
- if (type == QUOT_ENTITY)
2163
- // don't insert the entity, insert the literal quote
2164
- str_append(parser->link_target, quote, sizeof(quote) - 1);
2165
- else if (type == AMP_ENTITY)
2166
- // don't insert the entity, insert the literal ampersand
2167
- str_append(parser->link_target, ampersand, sizeof(ampersand) - 1);
2168
- else
2169
- str_append(parser->link_target, token->start, TOKEN_LEN(token));
2170
- }
2171
- else if (type == LINK_END)
2172
- {
2173
- if (parser->link_target->len == 0) // bail for inputs like "[[]]"
2174
- wiki_rollback_failed_internal_link(parser);
2175
- break; // jump back to top of loop (will handle this in LINK_END case below)
2176
- }
2177
- else if (type == SEPARATOR)
2178
- {
2179
- if (parser->link_target->len == 0) // bail for inputs like "[[|"
2180
- wiki_rollback_failed_internal_link(parser);
2181
- else
2182
- {
2183
- ary_push(parser->scope, SEPARATOR);
2184
- str_clear(parser->link_text);
2185
- parser->capture = parser->link_text;
2186
- token = NULL;
2187
- }
2188
- break;
2189
- }
2190
- else // unexpected token (syntax error)
2191
- {
2192
- wiki_rollback_failed_internal_link(parser);
2193
- break; // jump back to top of loop to handle unexpected token
2194
- }
2195
- }
2196
-
2197
- // jump to top of the loop to process token we scanned during lookahead (if any)
2198
- continue;
2199
- }
2200
- break;
2201
-
2202
- case LINK_END:
2203
- output = parser->capture ? parser->capture : parser->output;
2204
- if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
2205
- {
2206
- wiki_emit_pending_crlf_if_necessary(parser);
2207
- str_append(output, link_end, sizeof(link_end) - 1);
2208
- }
2209
- else if (IN(EXT_LINK_START))
2210
- // already in external link scope! (and in fact, must be capturing link_text right now)
2211
- str_append(output, link_end, sizeof(link_end) - 1);
2212
- else if (IN(LINK_START)) // in internal link scope!
2213
- {
2214
- if (wiki_blank(parser->link_target))
2215
- {
2216
- // special case for inputs like "[[ ]]"
2217
- wiki_rollback_failed_internal_link(parser);
2218
- str_append(parser->output, link_end, sizeof(link_end) - 1);
2219
- break;
2220
- }
2221
- if (parser->link_text->len == 0 ||
2222
- wiki_blank(parser->link_text))
2223
- {
2224
- // use link target as link text
2225
- str_clear(parser->link_text);
2226
- wiki_append_sanitized_link_target(parser->link_target, parser->link_text, true);
2227
- }
2228
- else
2229
- wiki_trim_link_text(parser);
2230
-
2231
- // perform "redlink" check before manipulating link_target
2232
- if (NIL_P(link_proc))
2233
- j = Qnil;
2234
- else
2235
- {
2236
- j = rb_funcall(link_proc, rb_intern("call"), 1, string_from_str(parser->link_target));
2237
- if (!NIL_P(j))
2238
- {
2239
- VALUE l = j; // can't cast inside StringValue macro
2240
- j = StringValue(l);
2241
- }
2242
- }
2243
- wiki_encode_link_target(parser);
2244
- wiki_pop_from_stack_up_to(parser, output, LINK_START, true);
2245
- parser->capture = NULL;
2246
- wiki_append_hyperlink(parser, prefix, parser->link_target, parser->link_text, j, Qnil, false);
2247
- str_clear(parser->link_target);
2248
- str_clear(parser->link_text);
2249
- }
2250
- else // wasn't in internal link scope
2251
- {
2252
- wiki_pop_excess_elements(parser);
2253
- wiki_start_para_if_necessary(parser);
2254
- str_append(output, link_end, sizeof(link_end) - 1);
2255
- }
2256
- break;
2257
-
2258
- // external links look like this:
2259
- // [http://google.com/ the link text]
2260
- // [/other/page/on/site see this page]
2261
- // strings in square brackets which don't match this syntax get passed through literally; eg:
2262
- // he was very angery [sic] about the turn of events
2263
- case EXT_LINK_START:
2264
- output = parser->capture ? parser->capture : parser->output;
2265
- if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
2266
- {
2267
- wiki_emit_pending_crlf_if_necessary(parser);
2268
- str_append(output, ext_link_start, sizeof(ext_link_start) - 1);
2269
- }
2270
- else if (IN(EXT_LINK_START))
2271
- // already in external link scope! (and in fact, must be capturing link_text right now)
2272
- str_append(output, ext_link_start, sizeof(ext_link_start) - 1);
2273
- else if (IN(LINK_START))
2274
- {
2275
- // already in internal link scope!
2276
- if (parser->link_target->len == 0 || !IN(SPACE))
2277
- str_append(parser->link_target, ext_link_start, sizeof(ext_link_start) - 1);
2278
- else // link target has already been scanned
2279
- str_append(parser->link_text, ext_link_start, sizeof(ext_link_start) - 1);
2280
- }
2281
- else // not in external link scope yet
2282
- {
2283
- // will either emit a link, or the rollback of a failed link, so start the para now
2284
- wiki_pop_excess_elements(parser);
2285
- wiki_start_para_if_necessary(parser);
2286
-
2287
- // look ahead: expect an absolute URI (with protocol) or "relative" (path) URI
2288
- NEXT_TOKEN();
2289
- if (token->type == URI || token->type == PATH)
2290
- ary_push(parser->scope, EXT_LINK_START); // so far so good, jump back to the top of the loop
2291
- else
2292
- // only get here if there was a syntax error (missing URI)
2293
- str_append(parser->output, ext_link_start, sizeof(ext_link_start) - 1);
2294
- continue; // jump back to top of loop to handle token (either URI or whatever it is)
2295
- }
2296
- break;
2297
-
2298
- case EXT_LINK_END:
2299
- output = parser->capture ? parser->capture : parser->output;
2300
- if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
2301
- {
2302
- wiki_emit_pending_crlf_if_necessary(parser);
2303
- str_append(output, ext_link_end, sizeof(ext_link_end) - 1);
2304
- }
2305
- else if (IN(EXT_LINK_START))
2306
- {
2307
- if (parser->link_text->len == 0)
2308
- // syntax error: external link with no link text
2309
- wiki_rollback_failed_external_link(parser);
2310
- else
2311
- {
2312
- // success!
2313
- j = IN(PATH) ? Qnil : parser->external_link_class;
2314
- k = IN(PATH) ? Qnil : parser->external_link_rel;
2315
- wiki_pop_from_stack_up_to(parser, output, EXT_LINK_START, true);
2316
- parser->capture = NULL;
2317
- wiki_append_hyperlink(parser, Qnil, parser->link_target, parser->link_text, j, k, false);
2318
- }
2319
- str_clear(parser->link_target);
2320
- str_clear(parser->link_text);
2321
- }
2322
- else
2323
- {
2324
- wiki_pop_excess_elements(parser);
2325
- wiki_start_para_if_necessary(parser);
2326
- str_append(parser->output, ext_link_end, sizeof(ext_link_end) - 1);
2327
- }
2328
- break;
2329
-
2330
- case SEPARATOR:
2331
- output = parser->capture ? parser->capture : parser->output;
2332
- wiki_pop_excess_elements(parser);
2333
- wiki_start_para_if_necessary(parser);
2334
- str_append(output, separator, sizeof(separator) - 1);
2335
- break;
2336
-
2337
- case SPACE:
2338
- output = parser->capture ? parser->capture : parser->output;
2339
- if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
2340
- {
2341
- wiki_emit_pending_crlf_if_necessary(parser);
2342
- str_append(output, token->start, TOKEN_LEN(token));
2343
- }
2344
- else
2345
- {
2346
- // peek ahead to see next token
2347
- char *token_ptr = token->start;
2348
- long token_len = TOKEN_LEN(token);
2349
- NEXT_TOKEN();
2350
- type = token->type;
2351
- if ((type == H6_END && IN(H6_START)) ||
2352
- (type == H5_END && IN(H5_START)) ||
2353
- (type == H4_END && IN(H4_START)) ||
2354
- (type == H3_END && IN(H3_START)) ||
2355
- (type == H2_END && IN(H2_START)) ||
2356
- (type == H1_END && IN(H1_START)))
2357
- {
2358
- // will suppress emission of space (discard) if next token is a H6_END, H5_END etc and we are in the corresponding scope
2359
- }
2360
- else
2361
- {
2362
- // emit the space
2363
- wiki_pop_excess_elements(parser);
2364
- wiki_start_para_if_necessary(parser);
2365
- str_append(output, token_ptr, token_len);
2366
- }
2367
-
2368
- // jump to top of the loop to process token we scanned during lookahead
2369
- continue;
2370
- }
2371
- break;
2372
-
2373
- case QUOT_ENTITY:
2374
- case AMP_ENTITY:
2375
- case NAMED_ENTITY:
2376
- case DECIMAL_ENTITY:
2377
- // pass these through unaltered as they are case sensitive
2378
- output = parser->capture ? parser->capture : parser->output;
2379
- wiki_pop_excess_elements(parser);
2380
- wiki_start_para_if_necessary(parser);
2381
- str_append(output, token->start, TOKEN_LEN(token));
2382
- break;
2383
-
2384
- case HEX_ENTITY:
2385
- // normalize hex entities (downcase them)
2386
- output = parser->capture ? parser->capture : parser->output;
2387
- wiki_pop_excess_elements(parser);
2388
- wiki_start_para_if_necessary(parser);
2389
- str_append(output, token->start, TOKEN_LEN(token));
2390
- wiki_downcase_bang(output->ptr + output->len - TOKEN_LEN(token), TOKEN_LEN(token));
2391
- break;
2392
-
2393
- case QUOT:
2394
- output = parser->capture ? parser->capture : parser->output;
2395
- wiki_pop_excess_elements(parser);
2396
- wiki_start_para_if_necessary(parser);
2397
- str_append(output, quot_entity, sizeof(quot_entity) - 1);
2398
- break;
2399
-
2400
- case AMP:
2401
- output = parser->capture ? parser->capture : parser->output;
2402
- wiki_pop_excess_elements(parser);
2403
- wiki_start_para_if_necessary(parser);
2404
- str_append(output, amp_entity, sizeof(amp_entity) - 1);
2405
- break;
2406
-
2407
- case LESS:
2408
- output = parser->capture ? parser->capture : parser->output;
2409
- wiki_pop_excess_elements(parser);
2410
- wiki_start_para_if_necessary(parser);
2411
- str_append(output, lt_entity, sizeof(lt_entity) - 1);
2412
- break;
2413
-
2414
- case GREATER:
2415
- output = parser->capture ? parser->capture : parser->output;
2416
- wiki_pop_excess_elements(parser);
2417
- wiki_start_para_if_necessary(parser);
2418
- str_append(output, gt_entity, sizeof(gt_entity) - 1);
2419
- break;
2420
-
2421
- case IMG_START:
2422
- if (IN_ANY_OF(NO_WIKI_START, PRE, PRE_START))
2423
- {
2424
- wiki_emit_pending_crlf_if_necessary(parser);
2425
- str_append(parser->output, token->start, TOKEN_LEN(token));
2426
- }
2427
- else if (parser->capture)
2428
- str_append(parser->capture, token->start, TOKEN_LEN(token));
2429
- else
2430
- {
2431
- // not currently capturing: will be emitting something on success or failure, so get ready
2432
- wiki_pop_excess_elements(parser);
2433
- wiki_start_para_if_necessary(parser);
2434
-
2435
- // scan ahead consuming PATH, PRINTABLE, ALNUM and SPECIAL_URI_CHARS tokens
2436
- // will cheat here and abuse the link_target capture buffer to accumulate text
2437
- while (NEXT_TOKEN(), (type = token->type))
2438
- {
2439
- if (type == PATH || type == PRINTABLE || type == ALNUM || type == SPECIAL_URI_CHARS)
2440
- str_append(parser->link_target, token->start, TOKEN_LEN(token));
2441
- else if (type == IMG_END && parser->link_target->len > 0)
2442
- {
2443
- // success
2444
- wiki_append_img(parser, parser->link_target->ptr, parser->link_target->len);
2445
- token = NULL;
2446
- break;
2447
- }
2448
- else // unexpected token or zero-length target (syntax error)
2449
- {
2450
- // rollback
2451
- str_append(parser->output, literal_img_start, sizeof(literal_img_start) - 1);
2452
- if (parser->link_target->len > 0)
2453
- str_append(parser->output, parser->link_target->ptr, parser->link_target->len);
2454
- break;
2455
- }
2456
- }
2457
-
2458
- // jump to top of the loop to process token we scanned during lookahead
2459
- str_clear(parser->link_target);
2460
- continue;
2461
- }
2462
- break;
2463
-
2464
- case CRLF:
2465
- i = parser->pending_crlf;
2466
- parser->pending_crlf = false;
2467
- wiki_rollback_failed_link(parser); // if any
2468
- if (IN_EITHER_OF(NO_WIKI_START, PRE_START))
2469
- {
2470
- ary_clear(parser->line_buffer);
2471
- str_append_str(parser->output, parser->line_ending);
2472
- break;
2473
- }
2474
- else if (IN(PRE))
2475
- {
2476
- // beware when BLOCKQUOTE on line buffer (not line stack!) prior to CRLF, that must be end of PRE block
2477
- if (ary_entry(parser->line_buffer, -2) == BLOCKQUOTE)
2478
- // don't emit in this case
2479
- wiki_pop_from_stack_up_to(parser, parser->output, PRE, true);
2480
- else
2481
- {
2482
- if (ary_entry(parser->line_buffer, -2) == PRE)
2483
- {
2484
- // only thing on line is the PRE: emit pending line ending (if we had one)
2485
- if (i)
2486
- str_append_str(parser->output, parser->line_ending);
2487
- }
2488
-
2489
- // clear these _before_ calling NEXT_TOKEN (NEXT_TOKEN adds to the line_buffer)
2490
- ary_clear(parser->line);
2491
- ary_clear(parser->line_buffer);
2492
-
2493
- // peek ahead to see if this is definitely the end of the PRE block
2494
- NEXT_TOKEN();
2495
- type = token->type;
2496
- if (type != BLOCKQUOTE && type != PRE)
2497
- // this is definitely the end of the block, so don't emit
2498
- wiki_pop_from_stack_up_to(parser, parser->output, PRE, true);
2499
- else
2500
- // potentially will emit
2501
- parser->pending_crlf = true;
2502
-
2503
- continue; // jump back to top of loop to handle token grabbed via lookahead
2504
- }
2505
- }
2506
- else
2507
- {
2508
- parser->pending_crlf = true;
2509
-
2510
- // count number of BLOCKQUOTE tokens in line buffer (can be zero) and pop back to that level
2511
- // as a side effect, this handles any open span-level elements and unclosed blocks
2512
- // (with special handling for P blocks and LI elements)
2513
- i = ary_count(parser->line, BLOCKQUOTE) + ary_count(parser->scope, BLOCKQUOTE_START);
2514
- for (j = parser->scope->count; j > i; j--)
2515
- {
2516
- if (parser->scope->count > 0 && ary_entry(parser->scope, -1) == LI)
2517
- {
2518
- parser->pending_crlf = false;
2519
- break;
2520
- }
2521
-
2522
- // special handling on last iteration through the loop if the top item on the scope is a P block
2523
- if ((j - i == 1) && ary_entry(parser->scope, -1) == P)
2524
- {
2525
- // if nothing or BLOCKQUOTE on line buffer (not line stack!) prior to CRLF, this must be a paragraph break
2526
- // (note that we have to make sure we're not inside a BLOCKQUOTE_START block
2527
- // because in those blocks BLOCKQUOTE tokens have no special meaning)
2528
- if (NO_ITEM(ary_entry(parser->line_buffer, -2)) ||
2529
- (ary_entry(parser->line_buffer, -2) == BLOCKQUOTE && !IN(BLOCKQUOTE_START)))
2530
- // paragraph break
2531
- parser->pending_crlf = false;
2532
- else
2533
- // not a paragraph break!
2534
- continue;
2535
- }
2536
- wiki_pop_from_stack(parser, NULL);
2537
- }
2538
- }
2539
-
2540
- // delete the entire contents of the line scope stack and buffer
2541
- ary_clear(parser->line);
2542
- ary_clear(parser->line_buffer);
2543
- break;
2544
-
2545
- case SPECIAL_URI_CHARS:
2546
- case PRINTABLE:
2547
- case ALNUM:
2548
- case IMG_END:
2549
- case LEFT_CURLY:
2550
- case RIGHT_CURLY:
2551
- output = parser->capture ? parser->capture : parser->output;
2552
- wiki_pop_excess_elements(parser);
2553
- wiki_start_para_if_necessary(parser);
2554
- str_append(output, token->start, TOKEN_LEN(token));
2555
- break;
2556
-
2557
- case DEFAULT:
2558
- output = parser->capture ? parser->capture : parser->output;
2559
- wiki_pop_excess_elements(parser);
2560
- wiki_start_para_if_necessary(parser);
2561
- wiki_append_entity_from_utf32_char(output, token->code_point);
2562
- break;
2563
-
2564
- case END_OF_FILE:
2565
- // special case for input like " foo\n " (see pre_spec.rb)
2566
- if (IN(PRE) &&
2567
- ary_entry(parser->line_buffer, -2) == PRE &&
2568
- parser->pending_crlf)
2569
- str_append(parser->output, parser->line_ending->ptr, parser->line_ending->len);
2570
-
2571
- // close any open scopes on hitting EOF
2572
- wiki_rollback_failed_link(parser); // if any
2573
- wiki_pop_all_from_stack(parser);
2574
- goto return_output; // break not enough here (want to break out of outer while loop, not inner switch statement)
2575
-
2576
- default:
2577
- break;
2578
- }
2579
-
2580
- // reset current token; forcing lexer to return another token at the top of the loop
2581
- token = NULL;
2582
- } while (1);
2583
- return_output:
2584
- // nasty hack to avoid re-allocating our return value
2585
- str_append(parser->output, null_str, 1); // null-terminate
2586
- len = parser->output->len - 1; // don't count null termination
2587
-
2588
- VALUE out = rb_str_buf_new(RSTRING_EMBED_LEN_MAX + 1);
2589
- free(RSTRING_PTR(out));
2590
- RSTRING(out)->as.heap.aux.capa = len;
2591
- RSTRING(out)->as.heap.ptr = parser->output->ptr;
2592
- RSTRING(out)->as.heap.len = len;
2593
- parser->output->ptr = NULL; // don't double-free
2594
- return out;
2595
- }