wikitext 4.0.1 → 4.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/wikitext +10 -110
- data/ext/wikitext/ary.c +116 -0
- data/ext/wikitext/ary.h +50 -0
- data/ext/wikitext/depend +32 -0
- data/ext/wikitext/parser.c +2595 -0
- data/ext/wikitext/parser.h +40 -0
- data/ext/wikitext/ruby_compat.h +34 -0
- data/ext/wikitext/str.c +109 -0
- data/ext/wikitext/str.h +64 -0
- data/ext/wikitext/token.c +125 -0
- data/ext/wikitext/token.h +117 -0
- data/ext/wikitext/wikitext.c +125 -0
- data/ext/wikitext/wikitext.h +39 -0
- data/ext/wikitext/wikitext_ragel.c +3211 -0
- data/ext/wikitext/wikitext_ragel.h +26 -0
- data/lib/wikitext/version.rb +1 -1
- metadata +17 -3
@@ -0,0 +1,40 @@
|
|
1
|
+
// Copyright 2008-2009 Wincent Colaiuta. All rights reserved.
|
2
|
+
//
|
3
|
+
// Redistribution and use in source and binary forms, with or without
|
4
|
+
// modification, are permitted provided that the following conditions are met:
|
5
|
+
//
|
6
|
+
// 1. Redistributions of source code must retain the above copyright notice,
|
7
|
+
// this list of conditions and the following disclaimer.
|
8
|
+
// 2. Redistributions in binary form must reproduce the above copyright notice,
|
9
|
+
// this list of conditions and the following disclaimer in the documentation
|
10
|
+
// and/or other materials provided with the distribution.
|
11
|
+
//
|
12
|
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
13
|
+
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
14
|
+
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
15
|
+
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
16
|
+
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
17
|
+
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
18
|
+
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
19
|
+
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
20
|
+
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
21
|
+
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
22
|
+
// POSSIBILITY OF SUCH DAMAGE.
|
23
|
+
|
24
|
+
#include "ruby_compat.h"
|
25
|
+
|
26
|
+
VALUE Wikitext_parser_initialize(int argc, VALUE *argv, VALUE self);
|
27
|
+
|
28
|
+
VALUE Wikitext_parser_tokenize(VALUE self, VALUE string);
|
29
|
+
|
30
|
+
VALUE Wikitext_parser_benchmarking_tokenize(VALUE self, VALUE string);
|
31
|
+
|
32
|
+
VALUE Wikitext_parser_fulltext_tokenize(int argc, VALUE *argv, VALUE self);
|
33
|
+
|
34
|
+
VALUE Wikitext_parser_sanitize_link_target(VALUE self, VALUE string);
|
35
|
+
|
36
|
+
VALUE Wikitext_parser_encode_link_target(VALUE self, VALUE in);
|
37
|
+
|
38
|
+
VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self);
|
39
|
+
|
40
|
+
VALUE Wikitext_parser_profiling_parse(VALUE self, VALUE string);
|
@@ -0,0 +1,34 @@
|
|
1
|
+
// Copyright 2008-2009 Wincent Colaiuta. All rights reserved.
|
2
|
+
//
|
3
|
+
// Redistribution and use in source and binary forms, with or without
|
4
|
+
// modification, are permitted provided that the following conditions are met:
|
5
|
+
//
|
6
|
+
// 1. Redistributions of source code must retain the above copyright notice,
|
7
|
+
// this list of conditions and the following disclaimer.
|
8
|
+
// 2. Redistributions in binary form must reproduce the above copyright notice,
|
9
|
+
// this list of conditions and the following disclaimer in the documentation
|
10
|
+
// and/or other materials provided with the distribution.
|
11
|
+
//
|
12
|
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
13
|
+
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
14
|
+
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
15
|
+
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
16
|
+
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
17
|
+
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
18
|
+
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
19
|
+
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
20
|
+
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
21
|
+
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
22
|
+
// POSSIBILITY OF SUCH DAMAGE.
|
23
|
+
|
24
|
+
#include <ruby.h>
|
25
|
+
|
26
|
+
// for compatibility with Ruby 1.8.5, which doesn't declare RSTRING_PTR
|
27
|
+
#ifndef RSTRING_PTR
|
28
|
+
#define RSTRING_PTR(s) (RSTRING(s)->ptr)
|
29
|
+
#endif
|
30
|
+
|
31
|
+
// for compatibility with Ruby 1.8.5, which doesn't declare RSTRING_LEN
|
32
|
+
#ifndef RSTRING_LEN
|
33
|
+
#define RSTRING_LEN(s) (RSTRING(s)->len)
|
34
|
+
#endif
|
data/ext/wikitext/str.c
ADDED
@@ -0,0 +1,109 @@
|
|
1
|
+
// Copyright 2008-2009 Wincent Colaiuta. All rights reserved.
|
2
|
+
//
|
3
|
+
// Redistribution and use in source and binary forms, with or without
|
4
|
+
// modification, are permitted provided that the following conditions are met:
|
5
|
+
//
|
6
|
+
// 1. Redistributions of source code must retain the above copyright notice,
|
7
|
+
// this list of conditions and the following disclaimer.
|
8
|
+
// 2. Redistributions in binary form must reproduce the above copyright notice,
|
9
|
+
// this list of conditions and the following disclaimer in the documentation
|
10
|
+
// and/or other materials provided with the distribution.
|
11
|
+
//
|
12
|
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
13
|
+
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
14
|
+
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
15
|
+
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
16
|
+
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
17
|
+
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
18
|
+
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
19
|
+
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
20
|
+
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
21
|
+
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
22
|
+
// POSSIBILITY OF SUCH DAMAGE.
|
23
|
+
|
24
|
+
#include "str.h"
|
25
|
+
|
26
|
+
// when allocating memory, reserve a little more than was asked for,
|
27
|
+
// which can help to avoid subsequent allocations
|
28
|
+
#define STR_OVERALLOC 256
|
29
|
+
|
30
|
+
str_t *str_new(void)
|
31
|
+
{
|
32
|
+
str_t *str = ALLOC_N(str_t, 1);
|
33
|
+
str->ptr = NULL;
|
34
|
+
str->len = 0;
|
35
|
+
str->capacity = 0;
|
36
|
+
return str;
|
37
|
+
}
|
38
|
+
|
39
|
+
str_t *str_new_copy(const char *src, long len)
|
40
|
+
{
|
41
|
+
str_t *str = ALLOC_N(str_t, 1);
|
42
|
+
str->ptr = ALLOC_N(char, len + STR_OVERALLOC);
|
43
|
+
memcpy(str->ptr, src, len);
|
44
|
+
str->len = len;
|
45
|
+
str->capacity = len + STR_OVERALLOC;
|
46
|
+
return str;
|
47
|
+
}
|
48
|
+
|
49
|
+
str_t *str_new_from_string(VALUE string)
|
50
|
+
{
|
51
|
+
string = StringValue(string);
|
52
|
+
return str_new_copy(RSTRING_PTR(string), RSTRING_LEN(string));
|
53
|
+
}
|
54
|
+
|
55
|
+
VALUE string_from_str(str_t *str)
|
56
|
+
{
|
57
|
+
VALUE string = rb_str_new(str->ptr, str->len);
|
58
|
+
rb_funcall(string, rb_intern("force_encoding"), 1, rb_str_new2("UTF-8"));
|
59
|
+
return string;
|
60
|
+
}
|
61
|
+
|
62
|
+
void str_grow(str_t *str, long len)
|
63
|
+
{
|
64
|
+
if (str->capacity < len)
|
65
|
+
{
|
66
|
+
if (str->ptr)
|
67
|
+
REALLOC_N(str->ptr, char, len + STR_OVERALLOC);
|
68
|
+
else
|
69
|
+
str->ptr = ALLOC_N(char, len + STR_OVERALLOC);
|
70
|
+
str->capacity = len + STR_OVERALLOC;
|
71
|
+
}
|
72
|
+
}
|
73
|
+
|
74
|
+
void str_append(str_t *str, const char *src, long len)
|
75
|
+
{
|
76
|
+
long new_len = str->len + len;
|
77
|
+
if (str->capacity < new_len)
|
78
|
+
{
|
79
|
+
if (str->ptr)
|
80
|
+
REALLOC_N(str->ptr, char, new_len + STR_OVERALLOC);
|
81
|
+
else
|
82
|
+
str->ptr = ALLOC_N(char, new_len + STR_OVERALLOC);
|
83
|
+
str->capacity = new_len + STR_OVERALLOC;
|
84
|
+
}
|
85
|
+
memcpy(str->ptr + str->len, src, len);
|
86
|
+
str->len = new_len;
|
87
|
+
}
|
88
|
+
|
89
|
+
void str_append_str(str_t *str, str_t *other)
|
90
|
+
{
|
91
|
+
str_append(str, other->ptr, other->len);
|
92
|
+
}
|
93
|
+
|
94
|
+
void str_append_string(str_t *str, VALUE other)
|
95
|
+
{
|
96
|
+
str_append(str, RSTRING_PTR(other), RSTRING_LEN(other));
|
97
|
+
}
|
98
|
+
|
99
|
+
void str_clear(str_t *str)
|
100
|
+
{
|
101
|
+
str->len = 0;
|
102
|
+
}
|
103
|
+
|
104
|
+
void str_free(str_t *str)
|
105
|
+
{
|
106
|
+
if (str->ptr)
|
107
|
+
free(str->ptr);
|
108
|
+
free(str);
|
109
|
+
}
|
data/ext/wikitext/str.h
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
// Copyright 2008-2009 Wincent Colaiuta. All rights reserved.
|
2
|
+
//
|
3
|
+
// Redistribution and use in source and binary forms, with or without
|
4
|
+
// modification, are permitted provided that the following conditions are met:
|
5
|
+
//
|
6
|
+
// 1. Redistributions of source code must retain the above copyright notice,
|
7
|
+
// this list of conditions and the following disclaimer.
|
8
|
+
// 2. Redistributions in binary form must reproduce the above copyright notice,
|
9
|
+
// this list of conditions and the following disclaimer in the documentation
|
10
|
+
// and/or other materials provided with the distribution.
|
11
|
+
//
|
12
|
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
13
|
+
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
14
|
+
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
15
|
+
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
16
|
+
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
17
|
+
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
18
|
+
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
19
|
+
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
20
|
+
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
21
|
+
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
22
|
+
// POSSIBILITY OF SUCH DAMAGE.
|
23
|
+
|
24
|
+
#include "ruby_compat.h"
|
25
|
+
|
26
|
+
typedef struct
|
27
|
+
{
|
28
|
+
char *ptr;
|
29
|
+
long len;
|
30
|
+
long capacity;
|
31
|
+
} str_t;
|
32
|
+
|
33
|
+
// Mark the str struct designated by ptr as a participant in Ruby's mark-and-sweep garbage collection scheme.
|
34
|
+
// A variable named name is placed on the C stack to prevent the structure from being prematurely collected.
|
35
|
+
#define GC_WRAP_STR(ptr, name) volatile VALUE name __attribute__((unused)) = Data_Wrap_Struct(rb_cObject, 0, str_free, ptr)
|
36
|
+
|
37
|
+
// create a new, empty string struct
|
38
|
+
str_t *str_new(void);
|
39
|
+
|
40
|
+
// create a new string struct and initialize it with a copy of the buffer of length len pointed to by src
|
41
|
+
str_t *str_new_copy(const char *src, long len);
|
42
|
+
|
43
|
+
// convenience method for testing
|
44
|
+
str_t *str_new_from_string(VALUE string);
|
45
|
+
|
46
|
+
// convenience method for testing
|
47
|
+
VALUE string_from_str(str_t *str);
|
48
|
+
|
49
|
+
// grows a string's capacity to the specified length
|
50
|
+
void str_grow(str_t *str, long len);
|
51
|
+
|
52
|
+
void str_append(str_t *str, const char *src, long len);
|
53
|
+
|
54
|
+
// appends the "other" string struct onto str
|
55
|
+
void str_append_str(str_t *str, str_t *other);
|
56
|
+
|
57
|
+
// appends the "other" string (a Ruby String) onto str
|
58
|
+
void str_append_string(str_t *str, VALUE other);
|
59
|
+
|
60
|
+
// don't actually free the memory yet
|
61
|
+
// this makes str structs very useful when reusing buffers because it avoids reallocation
|
62
|
+
void str_clear(str_t *str);
|
63
|
+
|
64
|
+
void str_free(str_t *str);
|
@@ -0,0 +1,125 @@
|
|
1
|
+
// Copyright 2008-2009 Wincent Colaiuta. All rights reserved.
|
2
|
+
//
|
3
|
+
// Redistribution and use in source and binary forms, with or without
|
4
|
+
// modification, are permitted provided that the following conditions are met:
|
5
|
+
//
|
6
|
+
// 1. Redistributions of source code must retain the above copyright notice,
|
7
|
+
// this list of conditions and the following disclaimer.
|
8
|
+
// 2. Redistributions in binary form must reproduce the above copyright notice,
|
9
|
+
// this list of conditions and the following disclaimer in the documentation
|
10
|
+
// and/or other materials provided with the distribution.
|
11
|
+
//
|
12
|
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
13
|
+
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
14
|
+
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
15
|
+
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
16
|
+
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
17
|
+
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
18
|
+
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
19
|
+
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
20
|
+
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
21
|
+
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
22
|
+
// POSSIBILITY OF SUCH DAMAGE.
|
23
|
+
|
24
|
+
#include "token.h"
|
25
|
+
#include "wikitext.h"
|
26
|
+
|
27
|
+
// return a hash of token types
|
28
|
+
// we make this available for unit testing purposes
|
29
|
+
|
30
|
+
VALUE Wikitext_parser_token_types(VALUE self)
|
31
|
+
{
|
32
|
+
VALUE hash = rb_hash_new();
|
33
|
+
|
34
|
+
#define SET_TOKEN_TYPE(identifier) (void)rb_hash_aset(hash, INT2FIX(identifier), \
|
35
|
+
rb_funcall(rb_funcall(rb_str_new2(#identifier), rb_intern("downcase"), 0), rb_intern("to_sym"), 0))
|
36
|
+
|
37
|
+
SET_TOKEN_TYPE(NO_TOKEN);
|
38
|
+
SET_TOKEN_TYPE(P);
|
39
|
+
SET_TOKEN_TYPE(LI);
|
40
|
+
SET_TOKEN_TYPE(NESTED_LIST);
|
41
|
+
SET_TOKEN_TYPE(PRE);
|
42
|
+
SET_TOKEN_TYPE(PRE_START);
|
43
|
+
SET_TOKEN_TYPE(PRE_END);
|
44
|
+
SET_TOKEN_TYPE(NO_WIKI_START);
|
45
|
+
SET_TOKEN_TYPE(NO_WIKI_END);
|
46
|
+
SET_TOKEN_TYPE(BLOCKQUOTE);
|
47
|
+
SET_TOKEN_TYPE(BLOCKQUOTE_START);
|
48
|
+
SET_TOKEN_TYPE(BLOCKQUOTE_END);
|
49
|
+
SET_TOKEN_TYPE(STRONG_EM);
|
50
|
+
SET_TOKEN_TYPE(STRONG_START);
|
51
|
+
SET_TOKEN_TYPE(STRONG_END);
|
52
|
+
SET_TOKEN_TYPE(STRONG);
|
53
|
+
SET_TOKEN_TYPE(EM_START);
|
54
|
+
SET_TOKEN_TYPE(EM_END);
|
55
|
+
SET_TOKEN_TYPE(EM);
|
56
|
+
SET_TOKEN_TYPE(TT_START);
|
57
|
+
SET_TOKEN_TYPE(TT_END);
|
58
|
+
SET_TOKEN_TYPE(TT);
|
59
|
+
SET_TOKEN_TYPE(OL);
|
60
|
+
SET_TOKEN_TYPE(UL);
|
61
|
+
SET_TOKEN_TYPE(H1_START);
|
62
|
+
SET_TOKEN_TYPE(H2_START);
|
63
|
+
SET_TOKEN_TYPE(H3_START);
|
64
|
+
SET_TOKEN_TYPE(H4_START);
|
65
|
+
SET_TOKEN_TYPE(H5_START);
|
66
|
+
SET_TOKEN_TYPE(H6_START);
|
67
|
+
SET_TOKEN_TYPE(H1_END);
|
68
|
+
SET_TOKEN_TYPE(H2_END);
|
69
|
+
SET_TOKEN_TYPE(H3_END);
|
70
|
+
SET_TOKEN_TYPE(H4_END);
|
71
|
+
SET_TOKEN_TYPE(H5_END);
|
72
|
+
SET_TOKEN_TYPE(H6_END);
|
73
|
+
SET_TOKEN_TYPE(URI);
|
74
|
+
SET_TOKEN_TYPE(MAIL);
|
75
|
+
SET_TOKEN_TYPE(PATH);
|
76
|
+
SET_TOKEN_TYPE(LINK_START);
|
77
|
+
SET_TOKEN_TYPE(LINK_END);
|
78
|
+
SET_TOKEN_TYPE(EXT_LINK_START);
|
79
|
+
SET_TOKEN_TYPE(EXT_LINK_END);
|
80
|
+
SET_TOKEN_TYPE(SEPARATOR);
|
81
|
+
SET_TOKEN_TYPE(SPACE);
|
82
|
+
SET_TOKEN_TYPE(QUOT_ENTITY);
|
83
|
+
SET_TOKEN_TYPE(AMP_ENTITY);
|
84
|
+
SET_TOKEN_TYPE(NAMED_ENTITY);
|
85
|
+
SET_TOKEN_TYPE(HEX_ENTITY);
|
86
|
+
SET_TOKEN_TYPE(DECIMAL_ENTITY);
|
87
|
+
SET_TOKEN_TYPE(QUOT);
|
88
|
+
SET_TOKEN_TYPE(AMP);
|
89
|
+
SET_TOKEN_TYPE(LESS);
|
90
|
+
SET_TOKEN_TYPE(GREATER);
|
91
|
+
SET_TOKEN_TYPE(IMG_START);
|
92
|
+
SET_TOKEN_TYPE(IMG_END);
|
93
|
+
SET_TOKEN_TYPE(LEFT_CURLY);
|
94
|
+
SET_TOKEN_TYPE(RIGHT_CURLY);
|
95
|
+
SET_TOKEN_TYPE(CRLF);
|
96
|
+
SET_TOKEN_TYPE(SPECIAL_URI_CHARS);
|
97
|
+
SET_TOKEN_TYPE(PRINTABLE);
|
98
|
+
SET_TOKEN_TYPE(ALNUM);
|
99
|
+
SET_TOKEN_TYPE(DEFAULT);
|
100
|
+
SET_TOKEN_TYPE(END_OF_FILE);
|
101
|
+
|
102
|
+
#undef SET_TOKEN_TYPE
|
103
|
+
|
104
|
+
return hash;
|
105
|
+
}
|
106
|
+
|
107
|
+
// for testing and debugging only
|
108
|
+
VALUE wiki_token(token_t *token)
|
109
|
+
{
|
110
|
+
VALUE object = rb_class_new_instance(0, NULL, cWikitextParserToken);
|
111
|
+
(void)rb_iv_set(object, "@start", LONG2NUM((long)token->start));
|
112
|
+
(void)rb_iv_set(object, "@stop", LONG2NUM((long)token->stop));
|
113
|
+
(void)rb_iv_set(object, "@line_start", LONG2NUM(token->line_start));
|
114
|
+
(void)rb_iv_set(object, "@line_stop", LONG2NUM(token->line_stop));
|
115
|
+
(void)rb_iv_set(object, "@column_start", LONG2NUM(token->column_start));
|
116
|
+
(void)rb_iv_set(object, "@column_stop", LONG2NUM(token->column_stop));
|
117
|
+
(void)rb_iv_set(object, "@code_point", INT2NUM(token->code_point));
|
118
|
+
|
119
|
+
// look-up the token type
|
120
|
+
VALUE types = Wikitext_parser_token_types(Qnil);
|
121
|
+
VALUE type = rb_hash_aref(types, INT2FIX(token->type));
|
122
|
+
(void)rb_iv_set(object, "@token_type", type);
|
123
|
+
(void)rb_iv_set(object, "@string_value", rb_str_new(token->start, token->stop - token->start));
|
124
|
+
return object;
|
125
|
+
}
|
@@ -0,0 +1,117 @@
|
|
1
|
+
// Copyright 2008-2009 Wincent Colaiuta. All rights reserved.
|
2
|
+
//
|
3
|
+
// Redistribution and use in source and binary forms, with or without
|
4
|
+
// modification, are permitted provided that the following conditions are met:
|
5
|
+
//
|
6
|
+
// 1. Redistributions of source code must retain the above copyright notice,
|
7
|
+
// this list of conditions and the following disclaimer.
|
8
|
+
// 2. Redistributions in binary form must reproduce the above copyright notice,
|
9
|
+
// this list of conditions and the following disclaimer in the documentation
|
10
|
+
// and/or other materials provided with the distribution.
|
11
|
+
//
|
12
|
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
13
|
+
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
14
|
+
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
15
|
+
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
16
|
+
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
17
|
+
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
18
|
+
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
19
|
+
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
20
|
+
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
21
|
+
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
22
|
+
// POSSIBILITY OF SUCH DAMAGE.
|
23
|
+
|
24
|
+
#include "ruby_compat.h"
|
25
|
+
#include <stdint.h> /* uint32_t */
|
26
|
+
|
27
|
+
#define TOKEN_LEN(token) (token->stop - token->start)
|
28
|
+
#define TOKEN_TEXT(token) rb_str_new((const char *)token->start, TOKEN_LEN(token))
|
29
|
+
|
30
|
+
typedef struct
|
31
|
+
{
|
32
|
+
char *start;
|
33
|
+
char *stop;
|
34
|
+
size_t line_start;
|
35
|
+
size_t line_stop;
|
36
|
+
size_t column_start;
|
37
|
+
size_t column_stop;
|
38
|
+
uint32_t code_point;
|
39
|
+
int type;
|
40
|
+
} token_t;
|
41
|
+
|
42
|
+
enum token_types {
|
43
|
+
NO_TOKEN,
|
44
|
+
P, // imaginary token (never explicitly marked up)
|
45
|
+
LI, // imaginary token (never explicitly marked up)
|
46
|
+
NESTED_LIST, // imaginary token (never explicitly marked up)
|
47
|
+
PRE,
|
48
|
+
PRE_START,
|
49
|
+
PRE_END,
|
50
|
+
NO_WIKI_START,
|
51
|
+
NO_WIKI_END,
|
52
|
+
BLOCKQUOTE,
|
53
|
+
BLOCKQUOTE_START,
|
54
|
+
BLOCKQUOTE_END,
|
55
|
+
STRONG_EM,
|
56
|
+
STRONG_START,
|
57
|
+
STRONG_END,
|
58
|
+
STRONG,
|
59
|
+
EM_START,
|
60
|
+
EM_END,
|
61
|
+
EM,
|
62
|
+
TT_START,
|
63
|
+
TT_END,
|
64
|
+
TT,
|
65
|
+
OL,
|
66
|
+
UL,
|
67
|
+
|
68
|
+
// keep these consecutive, and in ascending order
|
69
|
+
// (the arithmetic for the base_heading_level feature assumes this)
|
70
|
+
H1_START,
|
71
|
+
H2_START,
|
72
|
+
H3_START,
|
73
|
+
H4_START,
|
74
|
+
H5_START,
|
75
|
+
H6_START,
|
76
|
+
|
77
|
+
// likewise for the H*_END tokens
|
78
|
+
H1_END,
|
79
|
+
H2_END,
|
80
|
+
H3_END,
|
81
|
+
H4_END,
|
82
|
+
H5_END,
|
83
|
+
H6_END,
|
84
|
+
|
85
|
+
URI,
|
86
|
+
MAIL,
|
87
|
+
PATH,
|
88
|
+
LINK_START,
|
89
|
+
LINK_END,
|
90
|
+
EXT_LINK_START,
|
91
|
+
EXT_LINK_END,
|
92
|
+
SEPARATOR,
|
93
|
+
SPACE,
|
94
|
+
QUOT_ENTITY,
|
95
|
+
AMP_ENTITY,
|
96
|
+
NAMED_ENTITY,
|
97
|
+
HEX_ENTITY,
|
98
|
+
DECIMAL_ENTITY,
|
99
|
+
QUOT,
|
100
|
+
AMP,
|
101
|
+
LESS,
|
102
|
+
GREATER,
|
103
|
+
IMG_START,
|
104
|
+
IMG_END,
|
105
|
+
LEFT_CURLY,
|
106
|
+
RIGHT_CURLY,
|
107
|
+
CRLF,
|
108
|
+
SPECIAL_URI_CHARS,
|
109
|
+
PRINTABLE,
|
110
|
+
ALNUM,
|
111
|
+
DEFAULT,
|
112
|
+
END_OF_FILE
|
113
|
+
};
|
114
|
+
|
115
|
+
VALUE Wikitext_parser_token_types(VALUE self);
|
116
|
+
|
117
|
+
VALUE wiki_token(token_t *token);
|