wikitext 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/parser.h ADDED
@@ -0,0 +1,31 @@
1
+ // Copyright 2008 Wincent Colaiuta
2
+ // This program is free software: you can redistribute it and/or modify
3
+ // it under the terms of the GNU General Public License as published by
4
+ // the Free Software Foundation, either version 3 of the License, or
5
+ // (at your option) any later version.
6
+ //
7
+ // This program is distributed in the hope that it will be useful,
8
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of
9
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
+ // GNU General Public License for more details.
11
+ //
12
+ // You should have received a copy of the GNU General Public License
13
+ // along with this program. If not, see <http://www.gnu.org/licenses/>.
14
+
15
+ #include <ruby/ruby.h>
16
+
17
+ VALUE Wikitext_parser_initialize(VALUE self);
18
+
19
+ VALUE Wikitext_parser_tokenize(VALUE self, VALUE string);
20
+
21
+ VALUE Wikitext_parser_benchmarking_tokenize(VALUE self, VALUE string);
22
+
23
+ VALUE Wikitext_parser_sanitize_link_target(VALUE self, VALUE string);
24
+
25
+ VALUE Wikitext_parser_encode_link_target(VALUE self, VALUE in);
26
+
27
+ VALUE Wikitext_parser_encode_special_link_target(VALUE self, VALUE in);
28
+
29
+ VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self);
30
+
31
+ VALUE Wikitext_parser_profiling_parse(VALUE self, VALUE string);
data/ext/str.h ADDED
@@ -0,0 +1,135 @@
1
+ // Copyright 2008 Wincent Colaiuta
2
+ // This program is free software: you can redistribute it and/or modify
3
+ // it under the terms of the GNU General Public License as published by
4
+ // the Free Software Foundation, either version 3 of the License, or
5
+ // (at your option) any later version.
6
+ //
7
+ // This program is distributed in the hope that it will be useful,
8
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of
9
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
+ // GNU General Public License for more details.
11
+ //
12
+ // You should have received a copy of the GNU General Public License
13
+ // along with this program. If not, see <http://www.gnu.org/licenses/>.
14
+
15
+ #include <ruby/ruby.h>
16
+
17
+ typedef struct
18
+ {
19
+ char *ptr;
20
+ long len;
21
+ long capacity;
22
+ } str_t;
23
+
24
+ // create a new, empty string struct
25
+ inline str_t *str_new(void)
26
+ {
27
+ str_t *str = ALLOC_N(str_t, 1);
28
+ str->ptr = NULL;
29
+ str->len = 0;
30
+ str->capacity = 0;
31
+ return str;
32
+ }
33
+
34
+ // create a new, empty string struct with capacity len
35
+ inline str_t *str_new_size(long len)
36
+ {
37
+ str_t *str = ALLOC_N(str_t, 1);
38
+ str->ptr = ALLOC_N(char, len);
39
+ str->len = 0;
40
+ str->capacity = len;
41
+ return str;
42
+ }
43
+
44
+ // create a new string struct and initialize it with a copy of the buffer of length len pointed to by src
45
+ inline str_t *str_new_copy(char *src, long len)
46
+ {
47
+ str_t *str = ALLOC_N(str_t, 1);
48
+ str->ptr = ALLOC_N(char, len);
49
+ memcpy(str->ptr, src, len);
50
+ str->len = len;
51
+ str->capacity = len;
52
+ return str;
53
+ }
54
+
55
+ // create a new string struct and initialize it with the buffer of length len pointed to by src
56
+ // no copy is made; the struct takes ownership of the buffer and will free it when the struct is disposed of
57
+ inline str_t *str_new_no_copy(char *src, long len)
58
+ {
59
+ str_t *str = ALLOC_N(str_t, 1);
60
+ str->ptr = src;
61
+ str->len = len;
62
+ str->capacity = len;
63
+ return str;
64
+ }
65
+
66
+ // convenience method for testing
67
+ inline str_t *str_new_from_string(VALUE string)
68
+ {
69
+ string = StringValue(string);
70
+ return str_new_copy(RSTRING_PTR(string), RSTRING_LEN(string));
71
+ }
72
+
73
+ // convenience method for testing
74
+ inline VALUE string_from_str(str_t *str)
75
+ {
76
+ return rb_str_new(str->ptr, str->len);
77
+ }
78
+
79
+ // grows a string's capacity to the specified length
80
+ inline void str_grow(str_t *str, long len)
81
+ {
82
+ if (str->capacity < len)
83
+ {
84
+ if (str->ptr)
85
+ REALLOC_N(str->ptr, char, len);
86
+ else
87
+ str->ptr = ALLOC_N(char, len);
88
+ str->capacity = len;
89
+ }
90
+ }
91
+
92
+ inline void str_append(str_t *str, char *src, long len)
93
+ {
94
+ long new_len = str->len + len;
95
+ if (str->capacity < new_len)
96
+ {
97
+ if (str->ptr)
98
+ REALLOC_N(str->ptr, char, new_len);
99
+ else
100
+ str->ptr = ALLOC_N(char, new_len);
101
+ str->capacity = new_len;
102
+ }
103
+ memcpy(str->ptr + str->len, src, len);
104
+ str->len = new_len;
105
+ }
106
+
107
+ // appends the "other" string struct onto str
108
+ inline void str_append_str(str_t *str, str_t *other)
109
+ {
110
+ str_append(str, other->ptr, other->len);
111
+ }
112
+
113
+ // this is a temporary convenience measure
114
+ // later on if I develop in-place variants of some functions this won't be needed
115
+ inline void str_swap(str_t **a, str_t **b)
116
+ {
117
+ str_t *c;
118
+ c = *a;
119
+ *a = *b;
120
+ *b = c;
121
+ }
122
+
123
+ // don't actually free the memory yet
124
+ // this makes str structs very useful when reusing buffers because it avoids reallocation
125
+ inline void str_clear(str_t *str)
126
+ {
127
+ str->len = 0;
128
+ }
129
+
130
+ inline void str_free(str_t *str)
131
+ {
132
+ if (str->ptr)
133
+ free(str->ptr);
134
+ free(str);
135
+ }
data/ext/token.c ADDED
@@ -0,0 +1,109 @@
1
+ // Copyright 2008 Wincent Colaiuta
2
+ // This program is free software: you can redistribute it and/or modify
3
+ // it under the terms of the GNU General Public License as published by
4
+ // the Free Software Foundation, either version 3 of the License, or
5
+ // (at your option) any later version.
6
+ //
7
+ // This program is distributed in the hope that it will be useful,
8
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of
9
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
+ // GNU General Public License for more details.
11
+ //
12
+ // You should have received a copy of the GNU General Public License
13
+ // along with this program. If not, see <http://www.gnu.org/licenses/>.
14
+
15
+ #include "token.h"
16
+ #include "wikitext.h"
17
+
18
+ // return a hash of token types
19
+ // we make this available for unit testing purposes
20
+
21
+ VALUE Wikitext_parser_token_types(VALUE self)
22
+ {
23
+ VALUE hash = rb_hash_new();
24
+
25
+ #define SET_TOKEN_TYPE(identifier) (void)rb_hash_aset(hash, INT2FIX(identifier), \
26
+ rb_funcall(rb_funcall(rb_str_new2(#identifier), rb_intern("downcase"), 0), rb_intern("to_sym"), 0))
27
+
28
+ SET_TOKEN_TYPE(NO_TOKEN);
29
+ SET_TOKEN_TYPE(P);
30
+ SET_TOKEN_TYPE(LI);
31
+ SET_TOKEN_TYPE(NESTED_LIST);
32
+ SET_TOKEN_TYPE(PRE);
33
+ SET_TOKEN_TYPE(PRE_START);
34
+ SET_TOKEN_TYPE(PRE_END);
35
+ SET_TOKEN_TYPE(NO_WIKI_START);
36
+ SET_TOKEN_TYPE(NO_WIKI_END);
37
+ SET_TOKEN_TYPE(BLOCKQUOTE);
38
+ SET_TOKEN_TYPE(BLOCKQUOTE_START);
39
+ SET_TOKEN_TYPE(BLOCKQUOTE_END);
40
+ SET_TOKEN_TYPE(STRONG_EM);
41
+ SET_TOKEN_TYPE(STRONG_START);
42
+ SET_TOKEN_TYPE(STRONG_END);
43
+ SET_TOKEN_TYPE(STRONG);
44
+ SET_TOKEN_TYPE(EM_START);
45
+ SET_TOKEN_TYPE(EM_END);
46
+ SET_TOKEN_TYPE(EM);
47
+ SET_TOKEN_TYPE(TT_START);
48
+ SET_TOKEN_TYPE(TT_END);
49
+ SET_TOKEN_TYPE(TT);
50
+ SET_TOKEN_TYPE(OL);
51
+ SET_TOKEN_TYPE(UL);
52
+ SET_TOKEN_TYPE(H6_START);
53
+ SET_TOKEN_TYPE(H5_START);
54
+ SET_TOKEN_TYPE(H4_START);
55
+ SET_TOKEN_TYPE(H3_START);
56
+ SET_TOKEN_TYPE(H2_START);
57
+ SET_TOKEN_TYPE(H1_START);
58
+ SET_TOKEN_TYPE(H6_END);
59
+ SET_TOKEN_TYPE(H5_END);
60
+ SET_TOKEN_TYPE(H4_END);
61
+ SET_TOKEN_TYPE(H3_END);
62
+ SET_TOKEN_TYPE(H2_END);
63
+ SET_TOKEN_TYPE(H1_END);
64
+ SET_TOKEN_TYPE(URI);
65
+ SET_TOKEN_TYPE(MAIL);
66
+ SET_TOKEN_TYPE(LINK_START);
67
+ SET_TOKEN_TYPE(LINK_END);
68
+ SET_TOKEN_TYPE(EXT_LINK_START);
69
+ SET_TOKEN_TYPE(EXT_LINK_END);
70
+ SET_TOKEN_TYPE(SEPARATOR);
71
+ SET_TOKEN_TYPE(SPACE);
72
+ SET_TOKEN_TYPE(QUOT_ENTITY);
73
+ SET_TOKEN_TYPE(AMP_ENTITY);
74
+ SET_TOKEN_TYPE(NAMED_ENTITY);
75
+ SET_TOKEN_TYPE(HEX_ENTITY);
76
+ SET_TOKEN_TYPE(DECIMAL_ENTITY);
77
+ SET_TOKEN_TYPE(QUOT);
78
+ SET_TOKEN_TYPE(AMP);
79
+ SET_TOKEN_TYPE(LESS);
80
+ SET_TOKEN_TYPE(GREATER);
81
+ SET_TOKEN_TYPE(CRLF);
82
+ SET_TOKEN_TYPE(PRINTABLE);
83
+ SET_TOKEN_TYPE(DEFAULT);
84
+ SET_TOKEN_TYPE(END_OF_FILE);
85
+
86
+ #undef SET_TOKEN_TYPE
87
+
88
+ return hash;
89
+ }
90
+
91
+ // for testing and debugging only
92
+ VALUE _Wikitext_token(token_t *token)
93
+ {
94
+ VALUE object = rb_class_new_instance(0, NULL, cWikitextParserToken);
95
+ (void)rb_iv_set(object, "@start", LONG2NUM((long)token->start));
96
+ (void)rb_iv_set(object, "@stop", LONG2NUM((long)token->stop));
97
+ (void)rb_iv_set(object, "@line_start", LONG2NUM(token->line_start));
98
+ (void)rb_iv_set(object, "@line_stop", LONG2NUM(token->line_stop));
99
+ (void)rb_iv_set(object, "@column_start", LONG2NUM(token->column_start));
100
+ (void)rb_iv_set(object, "@column_stop", LONG2NUM(token->column_stop));
101
+ (void)rb_iv_set(object, "@code_point", INT2NUM(token->code_point));
102
+
103
+ // look-up the token type
104
+ VALUE types = Wikitext_parser_token_types(Qnil);
105
+ VALUE type = rb_hash_aref(types, INT2FIX(token->type));
106
+ (void)rb_iv_set(object, "@token_type", type);
107
+ (void)rb_iv_set(object, "@string_value", rb_str_new(token->start, token->stop - token->start));
108
+ return object;
109
+ }
data/ext/token.h ADDED
@@ -0,0 +1,95 @@
1
+ // Copyright 2008 Wincent Colaiuta
2
+ // This program is free software: you can redistribute it and/or modify
3
+ // it under the terms of the GNU General Public License as published by
4
+ // the Free Software Foundation, either version 3 of the License, or
5
+ // (at your option) any later version.
6
+ //
7
+ // This program is distributed in the hope that it will be useful,
8
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of
9
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
+ // GNU General Public License for more details.
11
+ //
12
+ // You should have received a copy of the GNU General Public License
13
+ // along with this program. If not, see <http://www.gnu.org/licenses/>.
14
+
15
+ #include <ruby/ruby.h>
16
+ #include <stdint.h> /* uint32_t */
17
+
18
+ #define TOKEN_TEXT(token) rb_str_new((const char *)token->start, (token->stop - token->start))
19
+ #define TOKEN_LEN(token) (token->stop - token->start)
20
+
21
+ typedef struct
22
+ {
23
+ char *start;
24
+ char *stop;
25
+ size_t line_start;
26
+ size_t line_stop;
27
+ size_t column_start;
28
+ size_t column_stop;
29
+ uint32_t code_point;
30
+ int type;
31
+ } token_t;
32
+
33
+ enum token_types {
34
+ NO_TOKEN,
35
+ P, // imaginary token (never explicitly marked up)
36
+ LI, // imaginary token (never explicitly marked up)
37
+ NESTED_LIST, // imaginary token (never explicitly marked up)
38
+ PRE,
39
+ PRE_START,
40
+ PRE_END,
41
+ NO_WIKI_START,
42
+ NO_WIKI_END,
43
+ BLOCKQUOTE,
44
+ BLOCKQUOTE_START,
45
+ BLOCKQUOTE_END,
46
+ STRONG_EM,
47
+ STRONG_START,
48
+ STRONG_END,
49
+ STRONG,
50
+ EM_START,
51
+ EM_END,
52
+ EM,
53
+ TT_START,
54
+ TT_END,
55
+ TT,
56
+ OL,
57
+ UL,
58
+ H6_START,
59
+ H5_START,
60
+ H4_START,
61
+ H3_START,
62
+ H2_START,
63
+ H1_START,
64
+ H6_END,
65
+ H5_END,
66
+ H4_END,
67
+ H3_END,
68
+ H2_END,
69
+ H1_END,
70
+ URI,
71
+ MAIL,
72
+ LINK_START,
73
+ LINK_END,
74
+ EXT_LINK_START,
75
+ EXT_LINK_END,
76
+ SEPARATOR,
77
+ SPACE,
78
+ QUOT_ENTITY,
79
+ AMP_ENTITY,
80
+ NAMED_ENTITY,
81
+ HEX_ENTITY,
82
+ DECIMAL_ENTITY,
83
+ QUOT,
84
+ AMP,
85
+ LESS,
86
+ GREATER,
87
+ CRLF,
88
+ PRINTABLE,
89
+ DEFAULT,
90
+ END_OF_FILE
91
+ };
92
+
93
+ VALUE Wikitext_parser_token_types(VALUE self);
94
+
95
+ VALUE _Wikitext_token(token_t *token);
data/ext/wikitext.c ADDED
@@ -0,0 +1,60 @@
1
+ // Copyright 2008 Wincent Colaiuta
2
+ // This program is free software: you can redistribute it and/or modify
3
+ // it under the terms of the GNU General Public License as published by
4
+ // the Free Software Foundation, either version 3 of the License, or
5
+ // (at your option) any later version.
6
+ //
7
+ // This program is distributed in the hope that it will be useful,
8
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of
9
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
+ // GNU General Public License for more details.
11
+ //
12
+ // You should have received a copy of the GNU General Public License
13
+ // along with this program. If not, see <http://www.gnu.org/licenses/>.
14
+
15
+ #include "wikitext_ragel.h"
16
+ #include "parser.h"
17
+
18
+ VALUE mWikitext = 0; // module Wikitext
19
+ VALUE cWikitextParser = 0; // class Wikitext::Parser
20
+ VALUE eWikitextParserError = 0; // class Wikitext::Parser::Error
21
+ VALUE cWikitextParserToken = 0; // class Wikitext::Parser::Token
22
+
23
+ void Init_wikitext()
24
+ {
25
+ // Wikitext
26
+ mWikitext = rb_define_module("Wikitext");
27
+
28
+ // Wikitext::Parser
29
+ cWikitextParser = rb_define_class_under(mWikitext, "Parser", rb_cObject);
30
+ rb_define_method(cWikitextParser, "initialize", Wikitext_parser_initialize, 0);
31
+ rb_define_method(cWikitextParser, "parse", Wikitext_parser_parse, -1);
32
+ rb_define_method(cWikitextParser, "profiling_parse", Wikitext_parser_profiling_parse, 1);
33
+ rb_define_method(cWikitextParser, "tokenize", Wikitext_parser_tokenize, 1);
34
+ rb_define_method(cWikitextParser, "benchmarking_tokenize", Wikitext_parser_benchmarking_tokenize, 1);
35
+ rb_define_singleton_method(cWikitextParser, "sanitize_link_target", Wikitext_parser_sanitize_link_target, 1);
36
+ rb_define_singleton_method(cWikitextParser, "encode_link_target", Wikitext_parser_encode_link_target, 1);
37
+ rb_define_singleton_method(cWikitextParser, "encode_special_link_target", Wikitext_parser_encode_special_link_target, 1);
38
+ rb_define_attr(cWikitextParser, "line_ending", Qtrue, Qtrue);
39
+ rb_define_attr(cWikitextParser, "internal_link_prefix", Qtrue, Qtrue);
40
+ rb_define_attr(cWikitextParser, "external_link_class", Qtrue, Qtrue);
41
+ rb_define_attr(cWikitextParser, "mailto_class", Qtrue, Qtrue);
42
+ rb_define_attr(cWikitextParser, "autolink", Qtrue, Qtrue);
43
+ rb_define_attr(cWikitextParser, "treat_slash_as_special", Qtrue, Qtrue);
44
+
45
+ // Wikitext::Parser::Error
46
+ eWikitextParserError = rb_define_class_under(cWikitextParser, "Error", rb_eException);
47
+
48
+ // Wikitext::Parser::Token
49
+ cWikitextParserToken = rb_define_class_under(cWikitextParser, "Token", rb_cObject);
50
+ rb_define_singleton_method(cWikitextParserToken, "types", Wikitext_parser_token_types, 0);
51
+ rb_define_attr(cWikitextParserToken, "start", Qtrue, Qfalse);
52
+ rb_define_attr(cWikitextParserToken, "stop", Qtrue, Qfalse);
53
+ rb_define_attr(cWikitextParserToken, "line_start", Qtrue, Qfalse);
54
+ rb_define_attr(cWikitextParserToken, "line_stop", Qtrue, Qfalse);
55
+ rb_define_attr(cWikitextParserToken, "column_start", Qtrue, Qfalse);
56
+ rb_define_attr(cWikitextParserToken, "column_stop", Qtrue, Qfalse);
57
+ rb_define_attr(cWikitextParserToken, "code_point", Qtrue, Qfalse);
58
+ rb_define_attr(cWikitextParserToken, "token_type", Qtrue, Qfalse);
59
+ rb_define_attr(cWikitextParserToken, "string_value", Qtrue, Qfalse);
60
+ }
data/ext/wikitext.h ADDED
@@ -0,0 +1,30 @@
1
+ // Copyright 2008 Wincent Colaiuta
2
+ // This program is free software: you can redistribute it and/or modify
3
+ // it under the terms of the GNU General Public License as published by
4
+ // the Free Software Foundation, either version 3 of the License, or
5
+ // (at your option) any later version.
6
+ //
7
+ // This program is distributed in the hope that it will be useful,
8
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of
9
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
+ // GNU General Public License for more details.
11
+ //
12
+ // You should have received a copy of the GNU General Public License
13
+ // along with this program. If not, see <http://www.gnu.org/licenses/>.
14
+
15
+ #include <ruby/ruby.h>
16
+ #include <stdint.h>
17
+
18
+ #define ruby_inspect(obj) rb_funcall(rb_mKernel, rb_intern("p"), 1, obj)
19
+
20
+ // module Wikitext
21
+ extern VALUE mWikitext;
22
+
23
+ // class Wikitext::Parser
24
+ extern VALUE cWikitextParser;
25
+
26
+ // class Wikitext::Parser::Error
27
+ extern VALUE eWikitextParserError;
28
+
29
+ // class Wikitext::Parser::Token
30
+ extern VALUE cWikitextParserToken;