wikitext 1.6 → 1.7
Sign up to get free protection for your applications and to get access to all the features.
- data/ext/ary.h +0 -6
- data/ext/extconf.rb +9 -0
- data/ext/parser.c +762 -839
- data/ext/parser.h +0 -2
- data/ext/str.c +16 -33
- data/ext/str.h +4 -12
- data/ext/token.c +1 -1
- data/ext/token.h +2 -2
- data/ext/wikitext.c +0 -1
- data/lib/wikitext/version.rb +1 -1
- data/spec/external_link_spec.rb +17 -0
- data/spec/internal_link_spec.rb +7 -1
- data/spec/link_encoding_spec.rb +0 -47
- data/spec/link_sanitizing_spec.rb +1 -1
- data/spec/regressions_spec.rb +2 -2
- metadata +2 -2
data/ext/parser.h
CHANGED
@@ -35,8 +35,6 @@ VALUE Wikitext_parser_sanitize_link_target(VALUE self, VALUE string);
|
|
35
35
|
|
36
36
|
VALUE Wikitext_parser_encode_link_target(VALUE self, VALUE in);
|
37
37
|
|
38
|
-
VALUE Wikitext_parser_encode_special_link_target(VALUE self, VALUE in);
|
39
|
-
|
40
38
|
VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self);
|
41
39
|
|
42
40
|
VALUE Wikitext_parser_profiling_parse(VALUE self, VALUE string);
|
data/ext/str.c
CHANGED
@@ -23,6 +23,10 @@
|
|
23
23
|
|
24
24
|
#include "str.h"
|
25
25
|
|
26
|
+
// when allocating memory, reserve a little more than was asked for,
|
27
|
+
// which can help to avoid subsequent allocations
|
28
|
+
#define STR_OVERALLOC 256
|
29
|
+
|
26
30
|
str_t *str_new(void)
|
27
31
|
{
|
28
32
|
str_t *str = ALLOC_N(str_t, 1);
|
@@ -32,31 +36,13 @@ str_t *str_new(void)
|
|
32
36
|
return str;
|
33
37
|
}
|
34
38
|
|
35
|
-
str_t *
|
36
|
-
{
|
37
|
-
str_t *str = ALLOC_N(str_t, 1);
|
38
|
-
str->ptr = ALLOC_N(char, len);
|
39
|
-
str->len = 0;
|
40
|
-
str->capacity = len;
|
41
|
-
return str;
|
42
|
-
}
|
43
|
-
|
44
|
-
str_t *str_new_copy(char *src, long len)
|
39
|
+
str_t *str_new_copy(const char *src, long len)
|
45
40
|
{
|
46
41
|
str_t *str = ALLOC_N(str_t, 1);
|
47
|
-
str->ptr = ALLOC_N(char, len);
|
42
|
+
str->ptr = ALLOC_N(char, len + STR_OVERALLOC);
|
48
43
|
memcpy(str->ptr, src, len);
|
49
44
|
str->len = len;
|
50
|
-
str->capacity = len;
|
51
|
-
return str;
|
52
|
-
}
|
53
|
-
|
54
|
-
str_t *str_new_no_copy(char *src, long len)
|
55
|
-
{
|
56
|
-
str_t *str = ALLOC_N(str_t, 1);
|
57
|
-
str->ptr = src;
|
58
|
-
str->len = len;
|
59
|
-
str->capacity = len;
|
45
|
+
str->capacity = len + STR_OVERALLOC;
|
60
46
|
return str;
|
61
47
|
}
|
62
48
|
|
@@ -76,23 +62,23 @@ void str_grow(str_t *str, long len)
|
|
76
62
|
if (str->capacity < len)
|
77
63
|
{
|
78
64
|
if (str->ptr)
|
79
|
-
REALLOC_N(str->ptr, char, len);
|
65
|
+
REALLOC_N(str->ptr, char, len + STR_OVERALLOC);
|
80
66
|
else
|
81
|
-
str->ptr = ALLOC_N(char, len);
|
82
|
-
str->capacity = len;
|
67
|
+
str->ptr = ALLOC_N(char, len + STR_OVERALLOC);
|
68
|
+
str->capacity = len + STR_OVERALLOC;
|
83
69
|
}
|
84
70
|
}
|
85
71
|
|
86
|
-
void str_append(str_t *str, char *src, long len)
|
72
|
+
void str_append(str_t *str, const char *src, long len)
|
87
73
|
{
|
88
74
|
long new_len = str->len + len;
|
89
75
|
if (str->capacity < new_len)
|
90
76
|
{
|
91
77
|
if (str->ptr)
|
92
|
-
REALLOC_N(str->ptr, char, new_len);
|
78
|
+
REALLOC_N(str->ptr, char, new_len + STR_OVERALLOC);
|
93
79
|
else
|
94
|
-
str->ptr = ALLOC_N(char, new_len);
|
95
|
-
str->capacity = new_len;
|
80
|
+
str->ptr = ALLOC_N(char, new_len + STR_OVERALLOC);
|
81
|
+
str->capacity = new_len + STR_OVERALLOC;
|
96
82
|
}
|
97
83
|
memcpy(str->ptr + str->len, src, len);
|
98
84
|
str->len = new_len;
|
@@ -103,12 +89,9 @@ void str_append_str(str_t *str, str_t *other)
|
|
103
89
|
str_append(str, other->ptr, other->len);
|
104
90
|
}
|
105
91
|
|
106
|
-
void
|
92
|
+
void str_append_string(str_t *str, VALUE other)
|
107
93
|
{
|
108
|
-
|
109
|
-
c = *a;
|
110
|
-
*a = *b;
|
111
|
-
*b = c;
|
94
|
+
str_append(str, RSTRING_PTR(other), RSTRING_LEN(other));
|
112
95
|
}
|
113
96
|
|
114
97
|
void str_clear(str_t *str)
|
data/ext/str.h
CHANGED
@@ -37,15 +37,8 @@ typedef struct
|
|
37
37
|
// create a new, empty string struct
|
38
38
|
str_t *str_new(void);
|
39
39
|
|
40
|
-
// create a new, empty string struct with capacity len
|
41
|
-
str_t *str_new_size(long len);
|
42
|
-
|
43
40
|
// create a new string struct and initialize it with a copy of the buffer of length len pointed to by src
|
44
|
-
str_t *str_new_copy(char *src, long len);
|
45
|
-
|
46
|
-
// create a new string struct and initialize it with the buffer of length len pointed to by src
|
47
|
-
// no copy is made; the struct takes ownership of the buffer and will free it when the struct is disposed of
|
48
|
-
str_t *str_new_no_copy(char *src, long len);
|
41
|
+
str_t *str_new_copy(const char *src, long len);
|
49
42
|
|
50
43
|
// convenience method for testing
|
51
44
|
str_t *str_new_from_string(VALUE string);
|
@@ -56,14 +49,13 @@ VALUE string_from_str(str_t *str);
|
|
56
49
|
// grows a string's capacity to the specified length
|
57
50
|
void str_grow(str_t *str, long len);
|
58
51
|
|
59
|
-
void str_append(str_t *str, char *src, long len);
|
52
|
+
void str_append(str_t *str, const char *src, long len);
|
60
53
|
|
61
54
|
// appends the "other" string struct onto str
|
62
55
|
void str_append_str(str_t *str, str_t *other);
|
63
56
|
|
64
|
-
//
|
65
|
-
|
66
|
-
void str_swap(str_t **a, str_t **b);
|
57
|
+
// appends the "other" string (a Ruby String) onto str
|
58
|
+
void str_append_string(str_t *str, VALUE other);
|
67
59
|
|
68
60
|
// don't actually free the memory yet
|
69
61
|
// this makes str structs very useful when reusing buffers because it avoids reallocation
|
data/ext/token.c
CHANGED
@@ -105,7 +105,7 @@ VALUE Wikitext_parser_token_types(VALUE self)
|
|
105
105
|
}
|
106
106
|
|
107
107
|
// for testing and debugging only
|
108
|
-
VALUE
|
108
|
+
VALUE wiki_token(token_t *token)
|
109
109
|
{
|
110
110
|
VALUE object = rb_class_new_instance(0, NULL, cWikitextParserToken);
|
111
111
|
(void)rb_iv_set(object, "@start", LONG2NUM((long)token->start));
|
data/ext/token.h
CHANGED
@@ -24,8 +24,8 @@
|
|
24
24
|
#include "ruby_compat.h"
|
25
25
|
#include <stdint.h> /* uint32_t */
|
26
26
|
|
27
|
-
#define TOKEN_TEXT(token) rb_str_new((const char *)token->start, (token->stop - token->start))
|
28
27
|
#define TOKEN_LEN(token) (token->stop - token->start)
|
28
|
+
#define TOKEN_TEXT(token) rb_str_new((const char *)token->start, TOKEN_LEN(token))
|
29
29
|
|
30
30
|
typedef struct
|
31
31
|
{
|
@@ -114,4 +114,4 @@ enum token_types {
|
|
114
114
|
|
115
115
|
VALUE Wikitext_parser_token_types(VALUE self);
|
116
116
|
|
117
|
-
VALUE
|
117
|
+
VALUE wiki_token(token_t *token);
|
data/ext/wikitext.c
CHANGED
@@ -44,7 +44,6 @@ void Init_wikitext()
|
|
44
44
|
rb_define_method(cWikitextParser, "fulltext_tokenize", Wikitext_parser_fulltext_tokenize, -1);
|
45
45
|
rb_define_singleton_method(cWikitextParser, "sanitize_link_target", Wikitext_parser_sanitize_link_target, 1);
|
46
46
|
rb_define_singleton_method(cWikitextParser, "encode_link_target", Wikitext_parser_encode_link_target, 1);
|
47
|
-
rb_define_singleton_method(cWikitextParser, "encode_special_link_target", Wikitext_parser_encode_special_link_target, 1);
|
48
47
|
rb_define_attr(cWikitextParser, "line_ending", Qtrue, Qtrue);
|
49
48
|
rb_define_attr(cWikitextParser, "internal_link_prefix", Qtrue, Qtrue);
|
50
49
|
rb_define_attr(cWikitextParser, "img_prefix", Qtrue, Qtrue);
|
data/lib/wikitext/version.rb
CHANGED
data/spec/external_link_spec.rb
CHANGED
@@ -326,4 +326,21 @@ describe Wikitext::Parser, 'external links' do
|
|
326
326
|
@parser.parse("> [http://google.com/\n").should == expected # was a bug
|
327
327
|
end
|
328
328
|
end
|
329
|
+
|
330
|
+
describe 'regressions' do
|
331
|
+
# assorted examples
|
332
|
+
it 'should not turn failed absolute links into external hyperlinks' do
|
333
|
+
# was emitting: <p>[<a href="/hello" class="external">/hello</a> this</p>\n
|
334
|
+
expected = %Q{<p>[<a href="/hello">/hello</a> this</p>\n}
|
335
|
+
@parser.parse('[/hello this').should == expected
|
336
|
+
|
337
|
+
# was emitting: <p>[<a href="/hello" class="external">/hello</a> </p>\n
|
338
|
+
expected = %Q{<p>[<a href="/hello">/hello</a> </p>\n}
|
339
|
+
@parser.parse('[/hello ').should == expected
|
340
|
+
|
341
|
+
# was emitting: <h1>hello [<a href="/hello" class="external">/hello</a> </h1>\n
|
342
|
+
expected = %Q{<h1>hello [<a href="/hello">/hello</a> </h1>\n}
|
343
|
+
@parser.parse('= hello [/hello =').should == expected
|
344
|
+
end
|
345
|
+
end
|
329
346
|
end
|
data/spec/internal_link_spec.rb
CHANGED
@@ -823,9 +823,15 @@ describe Wikitext::Parser, 'internal links (space to underscore on)' do
|
|
823
823
|
end
|
824
824
|
|
825
825
|
describe 'missing link text' do
|
826
|
-
it 'should use link target' do
|
826
|
+
it 'should use link target (zero-width link text)' do
|
827
827
|
@parser.parse('[[foo|]]').should == %Q{<p><a href="/wiki/foo">foo</a></p>\n}
|
828
828
|
end
|
829
|
+
|
830
|
+
# was a bug in version <= 1.6
|
831
|
+
# emitted: <p><a href="/wiki/foo"></a></p>\n
|
832
|
+
it 'should use link target (blank link text)' do
|
833
|
+
@parser.parse('[[foo| ]]').should == %Q{<p><a href="/wiki/foo">foo</a></p>\n}
|
834
|
+
end
|
829
835
|
end
|
830
836
|
|
831
837
|
describe 'link cut off at separator (end-of-file)' do
|
data/spec/link_encoding_spec.rb
CHANGED
@@ -94,51 +94,4 @@ describe Wikitext, 'encoding a link target' do
|
|
94
94
|
Wikitext::Parser.encode_link_target(string).should == URI.escape(string, reserved).downcase
|
95
95
|
end
|
96
96
|
end
|
97
|
-
|
98
|
-
# "special" links don't get transformed in any way
|
99
|
-
describe 'special links' do
|
100
|
-
|
101
|
-
# as of version 1.4.0 the encode_link_target function no longer handles special links
|
102
|
-
it 'should (no longer) recognize links which match /\A[a-z]+\/\d+\z/ as being special' do
|
103
|
-
string = 'foo/10'
|
104
|
-
Wikitext::Parser.encode_special_link_target(string).should == 'foo%2f10'
|
105
|
-
Wikitext::Parser.encode_link_target(string).should == 'foo%2f10'
|
106
|
-
end
|
107
|
-
|
108
|
-
it "should not recognize links which don't match at /\A/ as being special" do
|
109
|
-
string = ' foo/10'
|
110
|
-
Wikitext::Parser.encode_special_link_target(string).should_not == string
|
111
|
-
string = '..foo/10'
|
112
|
-
Wikitext::Parser.encode_special_link_target(string).should_not == string
|
113
|
-
string = '12foo/10'
|
114
|
-
Wikitext::Parser.encode_special_link_target(string).should_not == string
|
115
|
-
end
|
116
|
-
|
117
|
-
it "should not recognize links which don't match at /\z/ as being special" do
|
118
|
-
string = 'foo/10 '
|
119
|
-
Wikitext::Parser.encode_special_link_target(string).should_not == string
|
120
|
-
string = 'foo/10__'
|
121
|
-
Wikitext::Parser.encode_special_link_target(string).should_not == string
|
122
|
-
string = 'foo/10##'
|
123
|
-
Wikitext::Parser.encode_special_link_target(string).should_not == string
|
124
|
-
string = 'foo/10ab'
|
125
|
-
Wikitext::Parser.encode_special_link_target(string).should_not == string
|
126
|
-
end
|
127
|
-
|
128
|
-
it "should not recognize links which don't match at /[a-z]/ (case differences) as being special" do
|
129
|
-
string = 'FOO/10'
|
130
|
-
Wikitext::Parser.encode_special_link_target(string).should_not == string
|
131
|
-
end
|
132
|
-
|
133
|
-
it "should not recognize links which don't match at /[0-9]/ (case differences) as being special" do
|
134
|
-
string = 'foo/xx'
|
135
|
-
Wikitext::Parser.encode_special_link_target(string).should_not == string
|
136
|
-
end
|
137
|
-
|
138
|
-
it "should not recognize links which don't match at /\// as being special" do
|
139
|
-
string = 'foo 10'
|
140
|
-
Wikitext::Parser.encode_special_link_target(string).should_not == string
|
141
|
-
end
|
142
|
-
end
|
143
97
|
end
|
144
|
-
|
@@ -103,7 +103,7 @@ describe Wikitext, 'sanitizing a link target' do
|
|
103
103
|
Wikitext::Parser.sanitize_link_target('foo, "bar" & baz €').should == 'foo, "bar" & baz €'
|
104
104
|
end
|
105
105
|
|
106
|
-
# here we're exercising the
|
106
|
+
# here we're exercising the wiki_utf8_to_utf32 function
|
107
107
|
describe 'with invalidly encoded input' do
|
108
108
|
it 'should raise an exception for missing second byte' do
|
109
109
|
lambda {
|
data/spec/regressions_spec.rb
CHANGED
@@ -130,7 +130,7 @@ describe Wikitext::Parser, 'regressions' do
|
|
130
130
|
|
131
131
|
# this is the general case of the bug covered in the previous spec
|
132
132
|
# any token that appears as the first token after a PRE token can manifest this bug
|
133
|
-
# PRINTABLE didn't only because it called
|
133
|
+
# PRINTABLE didn't only because it called wiki_start_para_if_necessary(), which handled the pending CRLF
|
134
134
|
it 'should emit pending newlines for all token types found inside PRE and PRE_START blocks' do
|
135
135
|
# PRE_START
|
136
136
|
input = dedent <<-END
|
@@ -441,7 +441,7 @@ describe Wikitext::Parser, 'regressions' do
|
|
441
441
|
END
|
442
442
|
@parser.parse(input).should == expected
|
443
443
|
|
444
|
-
# these tokens weren't affected by the bug, seeing as they either call
|
444
|
+
# these tokens weren't affected by the bug, seeing as they either call wiki_start_para_if_necessary()
|
445
445
|
# or they can only appear in PRE_START (not PRE) thanks to the tokenizer
|
446
446
|
# but we add specs for them to make sure that the issue never crops up for them in the future
|
447
447
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wikitext
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: "1.
|
4
|
+
version: "1.7"
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Wincent Colaiuta
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-05-
|
12
|
+
date: 2009-05-13 00:00:00 +02:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|