wikitext 4.0 → 4.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/wikitext +18 -4
- data/ext/{extconf.rb → wikitext/extconf.rb} +0 -0
- data/lib/wikitext/version.rb +1 -1
- metadata +5 -18
- data/ext/ary.c +0 -116
- data/ext/ary.h +0 -50
- data/ext/depend +0 -32
- data/ext/parser.c +0 -2595
- data/ext/parser.h +0 -40
- data/ext/ruby_compat.h +0 -34
- data/ext/str.c +0 -109
- data/ext/str.h +0 -64
- data/ext/token.c +0 -125
- data/ext/token.h +0 -117
- data/ext/wikitext.c +0 -125
- data/ext/wikitext.h +0 -39
- data/ext/wikitext_ragel.c +0 -3211
- data/ext/wikitext_ragel.h +0 -26
data/ext/parser.h
DELETED
@@ -1,40 +0,0 @@
|
|
1
|
-
// Copyright 2008-2009 Wincent Colaiuta. All rights reserved.
|
2
|
-
//
|
3
|
-
// Redistribution and use in source and binary forms, with or without
|
4
|
-
// modification, are permitted provided that the following conditions are met:
|
5
|
-
//
|
6
|
-
// 1. Redistributions of source code must retain the above copyright notice,
|
7
|
-
// this list of conditions and the following disclaimer.
|
8
|
-
// 2. Redistributions in binary form must reproduce the above copyright notice,
|
9
|
-
// this list of conditions and the following disclaimer in the documentation
|
10
|
-
// and/or other materials provided with the distribution.
|
11
|
-
//
|
12
|
-
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
13
|
-
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
14
|
-
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
15
|
-
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
16
|
-
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
17
|
-
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
18
|
-
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
19
|
-
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
20
|
-
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
21
|
-
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
22
|
-
// POSSIBILITY OF SUCH DAMAGE.
|
23
|
-
|
24
|
-
#include "ruby_compat.h"
|
25
|
-
|
26
|
-
VALUE Wikitext_parser_initialize(int argc, VALUE *argv, VALUE self);
|
27
|
-
|
28
|
-
VALUE Wikitext_parser_tokenize(VALUE self, VALUE string);
|
29
|
-
|
30
|
-
VALUE Wikitext_parser_benchmarking_tokenize(VALUE self, VALUE string);
|
31
|
-
|
32
|
-
VALUE Wikitext_parser_fulltext_tokenize(int argc, VALUE *argv, VALUE self);
|
33
|
-
|
34
|
-
VALUE Wikitext_parser_sanitize_link_target(VALUE self, VALUE string);
|
35
|
-
|
36
|
-
VALUE Wikitext_parser_encode_link_target(VALUE self, VALUE in);
|
37
|
-
|
38
|
-
VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self);
|
39
|
-
|
40
|
-
VALUE Wikitext_parser_profiling_parse(VALUE self, VALUE string);
|
data/ext/ruby_compat.h
DELETED
@@ -1,34 +0,0 @@
|
|
1
|
-
// Copyright 2008-2009 Wincent Colaiuta. All rights reserved.
|
2
|
-
//
|
3
|
-
// Redistribution and use in source and binary forms, with or without
|
4
|
-
// modification, are permitted provided that the following conditions are met:
|
5
|
-
//
|
6
|
-
// 1. Redistributions of source code must retain the above copyright notice,
|
7
|
-
// this list of conditions and the following disclaimer.
|
8
|
-
// 2. Redistributions in binary form must reproduce the above copyright notice,
|
9
|
-
// this list of conditions and the following disclaimer in the documentation
|
10
|
-
// and/or other materials provided with the distribution.
|
11
|
-
//
|
12
|
-
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
13
|
-
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
14
|
-
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
15
|
-
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
16
|
-
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
17
|
-
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
18
|
-
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
19
|
-
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
20
|
-
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
21
|
-
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
22
|
-
// POSSIBILITY OF SUCH DAMAGE.
|
23
|
-
|
24
|
-
#include <ruby.h>
|
25
|
-
|
26
|
-
// for compatibility with Ruby 1.8.5, which doesn't declare RSTRING_PTR
|
27
|
-
#ifndef RSTRING_PTR
|
28
|
-
#define RSTRING_PTR(s) (RSTRING(s)->ptr)
|
29
|
-
#endif
|
30
|
-
|
31
|
-
// for compatibility with Ruby 1.8.5, which doesn't declare RSTRING_LEN
|
32
|
-
#ifndef RSTRING_LEN
|
33
|
-
#define RSTRING_LEN(s) (RSTRING(s)->len)
|
34
|
-
#endif
|
data/ext/str.c
DELETED
@@ -1,109 +0,0 @@
|
|
1
|
-
// Copyright 2008-2009 Wincent Colaiuta. All rights reserved.
|
2
|
-
//
|
3
|
-
// Redistribution and use in source and binary forms, with or without
|
4
|
-
// modification, are permitted provided that the following conditions are met:
|
5
|
-
//
|
6
|
-
// 1. Redistributions of source code must retain the above copyright notice,
|
7
|
-
// this list of conditions and the following disclaimer.
|
8
|
-
// 2. Redistributions in binary form must reproduce the above copyright notice,
|
9
|
-
// this list of conditions and the following disclaimer in the documentation
|
10
|
-
// and/or other materials provided with the distribution.
|
11
|
-
//
|
12
|
-
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
13
|
-
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
14
|
-
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
15
|
-
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
16
|
-
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
17
|
-
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
18
|
-
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
19
|
-
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
20
|
-
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
21
|
-
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
22
|
-
// POSSIBILITY OF SUCH DAMAGE.
|
23
|
-
|
24
|
-
#include "str.h"
|
25
|
-
|
26
|
-
// when allocating memory, reserve a little more than was asked for,
|
27
|
-
// which can help to avoid subsequent allocations
|
28
|
-
#define STR_OVERALLOC 256
|
29
|
-
|
30
|
-
str_t *str_new(void)
|
31
|
-
{
|
32
|
-
str_t *str = ALLOC_N(str_t, 1);
|
33
|
-
str->ptr = NULL;
|
34
|
-
str->len = 0;
|
35
|
-
str->capacity = 0;
|
36
|
-
return str;
|
37
|
-
}
|
38
|
-
|
39
|
-
str_t *str_new_copy(const char *src, long len)
|
40
|
-
{
|
41
|
-
str_t *str = ALLOC_N(str_t, 1);
|
42
|
-
str->ptr = ALLOC_N(char, len + STR_OVERALLOC);
|
43
|
-
memcpy(str->ptr, src, len);
|
44
|
-
str->len = len;
|
45
|
-
str->capacity = len + STR_OVERALLOC;
|
46
|
-
return str;
|
47
|
-
}
|
48
|
-
|
49
|
-
str_t *str_new_from_string(VALUE string)
|
50
|
-
{
|
51
|
-
string = StringValue(string);
|
52
|
-
return str_new_copy(RSTRING_PTR(string), RSTRING_LEN(string));
|
53
|
-
}
|
54
|
-
|
55
|
-
VALUE string_from_str(str_t *str)
|
56
|
-
{
|
57
|
-
VALUE string = rb_str_new(str->ptr, str->len);
|
58
|
-
rb_funcall(string, rb_intern("force_encoding"), 1, rb_str_new2("UTF-8"));
|
59
|
-
return string;
|
60
|
-
}
|
61
|
-
|
62
|
-
void str_grow(str_t *str, long len)
|
63
|
-
{
|
64
|
-
if (str->capacity < len)
|
65
|
-
{
|
66
|
-
if (str->ptr)
|
67
|
-
REALLOC_N(str->ptr, char, len + STR_OVERALLOC);
|
68
|
-
else
|
69
|
-
str->ptr = ALLOC_N(char, len + STR_OVERALLOC);
|
70
|
-
str->capacity = len + STR_OVERALLOC;
|
71
|
-
}
|
72
|
-
}
|
73
|
-
|
74
|
-
void str_append(str_t *str, const char *src, long len)
|
75
|
-
{
|
76
|
-
long new_len = str->len + len;
|
77
|
-
if (str->capacity < new_len)
|
78
|
-
{
|
79
|
-
if (str->ptr)
|
80
|
-
REALLOC_N(str->ptr, char, new_len + STR_OVERALLOC);
|
81
|
-
else
|
82
|
-
str->ptr = ALLOC_N(char, new_len + STR_OVERALLOC);
|
83
|
-
str->capacity = new_len + STR_OVERALLOC;
|
84
|
-
}
|
85
|
-
memcpy(str->ptr + str->len, src, len);
|
86
|
-
str->len = new_len;
|
87
|
-
}
|
88
|
-
|
89
|
-
void str_append_str(str_t *str, str_t *other)
|
90
|
-
{
|
91
|
-
str_append(str, other->ptr, other->len);
|
92
|
-
}
|
93
|
-
|
94
|
-
void str_append_string(str_t *str, VALUE other)
|
95
|
-
{
|
96
|
-
str_append(str, RSTRING_PTR(other), RSTRING_LEN(other));
|
97
|
-
}
|
98
|
-
|
99
|
-
void str_clear(str_t *str)
|
100
|
-
{
|
101
|
-
str->len = 0;
|
102
|
-
}
|
103
|
-
|
104
|
-
void str_free(str_t *str)
|
105
|
-
{
|
106
|
-
if (str->ptr)
|
107
|
-
free(str->ptr);
|
108
|
-
free(str);
|
109
|
-
}
|
data/ext/str.h
DELETED
@@ -1,64 +0,0 @@
|
|
1
|
-
// Copyright 2008-2009 Wincent Colaiuta. All rights reserved.
|
2
|
-
//
|
3
|
-
// Redistribution and use in source and binary forms, with or without
|
4
|
-
// modification, are permitted provided that the following conditions are met:
|
5
|
-
//
|
6
|
-
// 1. Redistributions of source code must retain the above copyright notice,
|
7
|
-
// this list of conditions and the following disclaimer.
|
8
|
-
// 2. Redistributions in binary form must reproduce the above copyright notice,
|
9
|
-
// this list of conditions and the following disclaimer in the documentation
|
10
|
-
// and/or other materials provided with the distribution.
|
11
|
-
//
|
12
|
-
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
13
|
-
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
14
|
-
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
15
|
-
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
16
|
-
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
17
|
-
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
18
|
-
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
19
|
-
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
20
|
-
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
21
|
-
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
22
|
-
// POSSIBILITY OF SUCH DAMAGE.
|
23
|
-
|
24
|
-
#include "ruby_compat.h"
|
25
|
-
|
26
|
-
typedef struct
|
27
|
-
{
|
28
|
-
char *ptr;
|
29
|
-
long len;
|
30
|
-
long capacity;
|
31
|
-
} str_t;
|
32
|
-
|
33
|
-
// Mark the str struct designated by ptr as a participant in Ruby's mark-and-sweep garbage collection scheme.
|
34
|
-
// A variable named name is placed on the C stack to prevent the structure from being prematurely collected.
|
35
|
-
#define GC_WRAP_STR(ptr, name) volatile VALUE name __attribute__((unused)) = Data_Wrap_Struct(rb_cObject, 0, str_free, ptr)
|
36
|
-
|
37
|
-
// create a new, empty string struct
|
38
|
-
str_t *str_new(void);
|
39
|
-
|
40
|
-
// create a new string struct and initialize it with a copy of the buffer of length len pointed to by src
|
41
|
-
str_t *str_new_copy(const char *src, long len);
|
42
|
-
|
43
|
-
// convenience method for testing
|
44
|
-
str_t *str_new_from_string(VALUE string);
|
45
|
-
|
46
|
-
// convenience method for testing
|
47
|
-
VALUE string_from_str(str_t *str);
|
48
|
-
|
49
|
-
// grows a string's capacity to the specified length
|
50
|
-
void str_grow(str_t *str, long len);
|
51
|
-
|
52
|
-
void str_append(str_t *str, const char *src, long len);
|
53
|
-
|
54
|
-
// appends the "other" string struct onto str
|
55
|
-
void str_append_str(str_t *str, str_t *other);
|
56
|
-
|
57
|
-
// appends the "other" string (a Ruby String) onto str
|
58
|
-
void str_append_string(str_t *str, VALUE other);
|
59
|
-
|
60
|
-
// don't actually free the memory yet
|
61
|
-
// this makes str structs very useful when reusing buffers because it avoids reallocation
|
62
|
-
void str_clear(str_t *str);
|
63
|
-
|
64
|
-
void str_free(str_t *str);
|
data/ext/token.c
DELETED
@@ -1,125 +0,0 @@
|
|
1
|
-
// Copyright 2008-2009 Wincent Colaiuta. All rights reserved.
|
2
|
-
//
|
3
|
-
// Redistribution and use in source and binary forms, with or without
|
4
|
-
// modification, are permitted provided that the following conditions are met:
|
5
|
-
//
|
6
|
-
// 1. Redistributions of source code must retain the above copyright notice,
|
7
|
-
// this list of conditions and the following disclaimer.
|
8
|
-
// 2. Redistributions in binary form must reproduce the above copyright notice,
|
9
|
-
// this list of conditions and the following disclaimer in the documentation
|
10
|
-
// and/or other materials provided with the distribution.
|
11
|
-
//
|
12
|
-
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
13
|
-
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
14
|
-
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
15
|
-
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
16
|
-
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
17
|
-
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
18
|
-
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
19
|
-
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
20
|
-
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
21
|
-
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
22
|
-
// POSSIBILITY OF SUCH DAMAGE.
|
23
|
-
|
24
|
-
#include "token.h"
|
25
|
-
#include "wikitext.h"
|
26
|
-
|
27
|
-
// return a hash of token types
|
28
|
-
// we make this available for unit testing purposes
|
29
|
-
|
30
|
-
VALUE Wikitext_parser_token_types(VALUE self)
|
31
|
-
{
|
32
|
-
VALUE hash = rb_hash_new();
|
33
|
-
|
34
|
-
#define SET_TOKEN_TYPE(identifier) (void)rb_hash_aset(hash, INT2FIX(identifier), \
|
35
|
-
rb_funcall(rb_funcall(rb_str_new2(#identifier), rb_intern("downcase"), 0), rb_intern("to_sym"), 0))
|
36
|
-
|
37
|
-
SET_TOKEN_TYPE(NO_TOKEN);
|
38
|
-
SET_TOKEN_TYPE(P);
|
39
|
-
SET_TOKEN_TYPE(LI);
|
40
|
-
SET_TOKEN_TYPE(NESTED_LIST);
|
41
|
-
SET_TOKEN_TYPE(PRE);
|
42
|
-
SET_TOKEN_TYPE(PRE_START);
|
43
|
-
SET_TOKEN_TYPE(PRE_END);
|
44
|
-
SET_TOKEN_TYPE(NO_WIKI_START);
|
45
|
-
SET_TOKEN_TYPE(NO_WIKI_END);
|
46
|
-
SET_TOKEN_TYPE(BLOCKQUOTE);
|
47
|
-
SET_TOKEN_TYPE(BLOCKQUOTE_START);
|
48
|
-
SET_TOKEN_TYPE(BLOCKQUOTE_END);
|
49
|
-
SET_TOKEN_TYPE(STRONG_EM);
|
50
|
-
SET_TOKEN_TYPE(STRONG_START);
|
51
|
-
SET_TOKEN_TYPE(STRONG_END);
|
52
|
-
SET_TOKEN_TYPE(STRONG);
|
53
|
-
SET_TOKEN_TYPE(EM_START);
|
54
|
-
SET_TOKEN_TYPE(EM_END);
|
55
|
-
SET_TOKEN_TYPE(EM);
|
56
|
-
SET_TOKEN_TYPE(TT_START);
|
57
|
-
SET_TOKEN_TYPE(TT_END);
|
58
|
-
SET_TOKEN_TYPE(TT);
|
59
|
-
SET_TOKEN_TYPE(OL);
|
60
|
-
SET_TOKEN_TYPE(UL);
|
61
|
-
SET_TOKEN_TYPE(H1_START);
|
62
|
-
SET_TOKEN_TYPE(H2_START);
|
63
|
-
SET_TOKEN_TYPE(H3_START);
|
64
|
-
SET_TOKEN_TYPE(H4_START);
|
65
|
-
SET_TOKEN_TYPE(H5_START);
|
66
|
-
SET_TOKEN_TYPE(H6_START);
|
67
|
-
SET_TOKEN_TYPE(H1_END);
|
68
|
-
SET_TOKEN_TYPE(H2_END);
|
69
|
-
SET_TOKEN_TYPE(H3_END);
|
70
|
-
SET_TOKEN_TYPE(H4_END);
|
71
|
-
SET_TOKEN_TYPE(H5_END);
|
72
|
-
SET_TOKEN_TYPE(H6_END);
|
73
|
-
SET_TOKEN_TYPE(URI);
|
74
|
-
SET_TOKEN_TYPE(MAIL);
|
75
|
-
SET_TOKEN_TYPE(PATH);
|
76
|
-
SET_TOKEN_TYPE(LINK_START);
|
77
|
-
SET_TOKEN_TYPE(LINK_END);
|
78
|
-
SET_TOKEN_TYPE(EXT_LINK_START);
|
79
|
-
SET_TOKEN_TYPE(EXT_LINK_END);
|
80
|
-
SET_TOKEN_TYPE(SEPARATOR);
|
81
|
-
SET_TOKEN_TYPE(SPACE);
|
82
|
-
SET_TOKEN_TYPE(QUOT_ENTITY);
|
83
|
-
SET_TOKEN_TYPE(AMP_ENTITY);
|
84
|
-
SET_TOKEN_TYPE(NAMED_ENTITY);
|
85
|
-
SET_TOKEN_TYPE(HEX_ENTITY);
|
86
|
-
SET_TOKEN_TYPE(DECIMAL_ENTITY);
|
87
|
-
SET_TOKEN_TYPE(QUOT);
|
88
|
-
SET_TOKEN_TYPE(AMP);
|
89
|
-
SET_TOKEN_TYPE(LESS);
|
90
|
-
SET_TOKEN_TYPE(GREATER);
|
91
|
-
SET_TOKEN_TYPE(IMG_START);
|
92
|
-
SET_TOKEN_TYPE(IMG_END);
|
93
|
-
SET_TOKEN_TYPE(LEFT_CURLY);
|
94
|
-
SET_TOKEN_TYPE(RIGHT_CURLY);
|
95
|
-
SET_TOKEN_TYPE(CRLF);
|
96
|
-
SET_TOKEN_TYPE(SPECIAL_URI_CHARS);
|
97
|
-
SET_TOKEN_TYPE(PRINTABLE);
|
98
|
-
SET_TOKEN_TYPE(ALNUM);
|
99
|
-
SET_TOKEN_TYPE(DEFAULT);
|
100
|
-
SET_TOKEN_TYPE(END_OF_FILE);
|
101
|
-
|
102
|
-
#undef SET_TOKEN_TYPE
|
103
|
-
|
104
|
-
return hash;
|
105
|
-
}
|
106
|
-
|
107
|
-
// for testing and debugging only
|
108
|
-
VALUE wiki_token(token_t *token)
|
109
|
-
{
|
110
|
-
VALUE object = rb_class_new_instance(0, NULL, cWikitextParserToken);
|
111
|
-
(void)rb_iv_set(object, "@start", LONG2NUM((long)token->start));
|
112
|
-
(void)rb_iv_set(object, "@stop", LONG2NUM((long)token->stop));
|
113
|
-
(void)rb_iv_set(object, "@line_start", LONG2NUM(token->line_start));
|
114
|
-
(void)rb_iv_set(object, "@line_stop", LONG2NUM(token->line_stop));
|
115
|
-
(void)rb_iv_set(object, "@column_start", LONG2NUM(token->column_start));
|
116
|
-
(void)rb_iv_set(object, "@column_stop", LONG2NUM(token->column_stop));
|
117
|
-
(void)rb_iv_set(object, "@code_point", INT2NUM(token->code_point));
|
118
|
-
|
119
|
-
// look-up the token type
|
120
|
-
VALUE types = Wikitext_parser_token_types(Qnil);
|
121
|
-
VALUE type = rb_hash_aref(types, INT2FIX(token->type));
|
122
|
-
(void)rb_iv_set(object, "@token_type", type);
|
123
|
-
(void)rb_iv_set(object, "@string_value", rb_str_new(token->start, token->stop - token->start));
|
124
|
-
return object;
|
125
|
-
}
|
data/ext/token.h
DELETED
@@ -1,117 +0,0 @@
|
|
1
|
-
// Copyright 2008-2009 Wincent Colaiuta. All rights reserved.
|
2
|
-
//
|
3
|
-
// Redistribution and use in source and binary forms, with or without
|
4
|
-
// modification, are permitted provided that the following conditions are met:
|
5
|
-
//
|
6
|
-
// 1. Redistributions of source code must retain the above copyright notice,
|
7
|
-
// this list of conditions and the following disclaimer.
|
8
|
-
// 2. Redistributions in binary form must reproduce the above copyright notice,
|
9
|
-
// this list of conditions and the following disclaimer in the documentation
|
10
|
-
// and/or other materials provided with the distribution.
|
11
|
-
//
|
12
|
-
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
13
|
-
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
14
|
-
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
15
|
-
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
16
|
-
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
17
|
-
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
18
|
-
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
19
|
-
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
20
|
-
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
21
|
-
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
22
|
-
// POSSIBILITY OF SUCH DAMAGE.
|
23
|
-
|
24
|
-
#include "ruby_compat.h"
|
25
|
-
#include <stdint.h> /* uint32_t */
|
26
|
-
|
27
|
-
#define TOKEN_LEN(token) (token->stop - token->start)
|
28
|
-
#define TOKEN_TEXT(token) rb_str_new((const char *)token->start, TOKEN_LEN(token))
|
29
|
-
|
30
|
-
typedef struct
|
31
|
-
{
|
32
|
-
char *start;
|
33
|
-
char *stop;
|
34
|
-
size_t line_start;
|
35
|
-
size_t line_stop;
|
36
|
-
size_t column_start;
|
37
|
-
size_t column_stop;
|
38
|
-
uint32_t code_point;
|
39
|
-
int type;
|
40
|
-
} token_t;
|
41
|
-
|
42
|
-
enum token_types {
|
43
|
-
NO_TOKEN,
|
44
|
-
P, // imaginary token (never explicitly marked up)
|
45
|
-
LI, // imaginary token (never explicitly marked up)
|
46
|
-
NESTED_LIST, // imaginary token (never explicitly marked up)
|
47
|
-
PRE,
|
48
|
-
PRE_START,
|
49
|
-
PRE_END,
|
50
|
-
NO_WIKI_START,
|
51
|
-
NO_WIKI_END,
|
52
|
-
BLOCKQUOTE,
|
53
|
-
BLOCKQUOTE_START,
|
54
|
-
BLOCKQUOTE_END,
|
55
|
-
STRONG_EM,
|
56
|
-
STRONG_START,
|
57
|
-
STRONG_END,
|
58
|
-
STRONG,
|
59
|
-
EM_START,
|
60
|
-
EM_END,
|
61
|
-
EM,
|
62
|
-
TT_START,
|
63
|
-
TT_END,
|
64
|
-
TT,
|
65
|
-
OL,
|
66
|
-
UL,
|
67
|
-
|
68
|
-
// keep these consecutive, and in ascending order
|
69
|
-
// (the arithmetic for the base_heading_level feature assumes this)
|
70
|
-
H1_START,
|
71
|
-
H2_START,
|
72
|
-
H3_START,
|
73
|
-
H4_START,
|
74
|
-
H5_START,
|
75
|
-
H6_START,
|
76
|
-
|
77
|
-
// likewise for the H*_END tokens
|
78
|
-
H1_END,
|
79
|
-
H2_END,
|
80
|
-
H3_END,
|
81
|
-
H4_END,
|
82
|
-
H5_END,
|
83
|
-
H6_END,
|
84
|
-
|
85
|
-
URI,
|
86
|
-
MAIL,
|
87
|
-
PATH,
|
88
|
-
LINK_START,
|
89
|
-
LINK_END,
|
90
|
-
EXT_LINK_START,
|
91
|
-
EXT_LINK_END,
|
92
|
-
SEPARATOR,
|
93
|
-
SPACE,
|
94
|
-
QUOT_ENTITY,
|
95
|
-
AMP_ENTITY,
|
96
|
-
NAMED_ENTITY,
|
97
|
-
HEX_ENTITY,
|
98
|
-
DECIMAL_ENTITY,
|
99
|
-
QUOT,
|
100
|
-
AMP,
|
101
|
-
LESS,
|
102
|
-
GREATER,
|
103
|
-
IMG_START,
|
104
|
-
IMG_END,
|
105
|
-
LEFT_CURLY,
|
106
|
-
RIGHT_CURLY,
|
107
|
-
CRLF,
|
108
|
-
SPECIAL_URI_CHARS,
|
109
|
-
PRINTABLE,
|
110
|
-
ALNUM,
|
111
|
-
DEFAULT,
|
112
|
-
END_OF_FILE
|
113
|
-
};
|
114
|
-
|
115
|
-
VALUE Wikitext_parser_token_types(VALUE self);
|
116
|
-
|
117
|
-
VALUE wiki_token(token_t *token);
|