wikitext 1.6 → 1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ext/ary.h +0 -6
- data/ext/extconf.rb +9 -0
- data/ext/parser.c +762 -839
- data/ext/parser.h +0 -2
- data/ext/str.c +16 -33
- data/ext/str.h +4 -12
- data/ext/token.c +1 -1
- data/ext/token.h +2 -2
- data/ext/wikitext.c +0 -1
- data/lib/wikitext/version.rb +1 -1
- data/spec/external_link_spec.rb +17 -0
- data/spec/internal_link_spec.rb +7 -1
- data/spec/link_encoding_spec.rb +0 -47
- data/spec/link_sanitizing_spec.rb +1 -1
- data/spec/regressions_spec.rb +2 -2
- metadata +2 -2
data/ext/parser.h
CHANGED
@@ -35,8 +35,6 @@ VALUE Wikitext_parser_sanitize_link_target(VALUE self, VALUE string);
|
|
35
35
|
|
36
36
|
VALUE Wikitext_parser_encode_link_target(VALUE self, VALUE in);
|
37
37
|
|
38
|
-
VALUE Wikitext_parser_encode_special_link_target(VALUE self, VALUE in);
|
39
|
-
|
40
38
|
VALUE Wikitext_parser_parse(int argc, VALUE *argv, VALUE self);
|
41
39
|
|
42
40
|
VALUE Wikitext_parser_profiling_parse(VALUE self, VALUE string);
|
data/ext/str.c
CHANGED
@@ -23,6 +23,10 @@
|
|
23
23
|
|
24
24
|
#include "str.h"
|
25
25
|
|
26
|
+
// when allocating memory, reserve a little more than was asked for,
|
27
|
+
// which can help to avoid subsequent allocations
|
28
|
+
#define STR_OVERALLOC 256
|
29
|
+
|
26
30
|
str_t *str_new(void)
|
27
31
|
{
|
28
32
|
str_t *str = ALLOC_N(str_t, 1);
|
@@ -32,31 +36,13 @@ str_t *str_new(void)
|
|
32
36
|
return str;
|
33
37
|
}
|
34
38
|
|
35
|
-
str_t *
|
36
|
-
{
|
37
|
-
str_t *str = ALLOC_N(str_t, 1);
|
38
|
-
str->ptr = ALLOC_N(char, len);
|
39
|
-
str->len = 0;
|
40
|
-
str->capacity = len;
|
41
|
-
return str;
|
42
|
-
}
|
43
|
-
|
44
|
-
str_t *str_new_copy(char *src, long len)
|
39
|
+
str_t *str_new_copy(const char *src, long len)
|
45
40
|
{
|
46
41
|
str_t *str = ALLOC_N(str_t, 1);
|
47
|
-
str->ptr = ALLOC_N(char, len);
|
42
|
+
str->ptr = ALLOC_N(char, len + STR_OVERALLOC);
|
48
43
|
memcpy(str->ptr, src, len);
|
49
44
|
str->len = len;
|
50
|
-
str->capacity = len;
|
51
|
-
return str;
|
52
|
-
}
|
53
|
-
|
54
|
-
str_t *str_new_no_copy(char *src, long len)
|
55
|
-
{
|
56
|
-
str_t *str = ALLOC_N(str_t, 1);
|
57
|
-
str->ptr = src;
|
58
|
-
str->len = len;
|
59
|
-
str->capacity = len;
|
45
|
+
str->capacity = len + STR_OVERALLOC;
|
60
46
|
return str;
|
61
47
|
}
|
62
48
|
|
@@ -76,23 +62,23 @@ void str_grow(str_t *str, long len)
|
|
76
62
|
if (str->capacity < len)
|
77
63
|
{
|
78
64
|
if (str->ptr)
|
79
|
-
REALLOC_N(str->ptr, char, len);
|
65
|
+
REALLOC_N(str->ptr, char, len + STR_OVERALLOC);
|
80
66
|
else
|
81
|
-
str->ptr = ALLOC_N(char, len);
|
82
|
-
str->capacity = len;
|
67
|
+
str->ptr = ALLOC_N(char, len + STR_OVERALLOC);
|
68
|
+
str->capacity = len + STR_OVERALLOC;
|
83
69
|
}
|
84
70
|
}
|
85
71
|
|
86
|
-
void str_append(str_t *str, char *src, long len)
|
72
|
+
void str_append(str_t *str, const char *src, long len)
|
87
73
|
{
|
88
74
|
long new_len = str->len + len;
|
89
75
|
if (str->capacity < new_len)
|
90
76
|
{
|
91
77
|
if (str->ptr)
|
92
|
-
REALLOC_N(str->ptr, char, new_len);
|
78
|
+
REALLOC_N(str->ptr, char, new_len + STR_OVERALLOC);
|
93
79
|
else
|
94
|
-
str->ptr = ALLOC_N(char, new_len);
|
95
|
-
str->capacity = new_len;
|
80
|
+
str->ptr = ALLOC_N(char, new_len + STR_OVERALLOC);
|
81
|
+
str->capacity = new_len + STR_OVERALLOC;
|
96
82
|
}
|
97
83
|
memcpy(str->ptr + str->len, src, len);
|
98
84
|
str->len = new_len;
|
@@ -103,12 +89,9 @@ void str_append_str(str_t *str, str_t *other)
|
|
103
89
|
str_append(str, other->ptr, other->len);
|
104
90
|
}
|
105
91
|
|
106
|
-
void
|
92
|
+
void str_append_string(str_t *str, VALUE other)
|
107
93
|
{
|
108
|
-
|
109
|
-
c = *a;
|
110
|
-
*a = *b;
|
111
|
-
*b = c;
|
94
|
+
str_append(str, RSTRING_PTR(other), RSTRING_LEN(other));
|
112
95
|
}
|
113
96
|
|
114
97
|
void str_clear(str_t *str)
|
data/ext/str.h
CHANGED
@@ -37,15 +37,8 @@ typedef struct
|
|
37
37
|
// create a new, empty string struct
|
38
38
|
str_t *str_new(void);
|
39
39
|
|
40
|
-
// create a new, empty string struct with capacity len
|
41
|
-
str_t *str_new_size(long len);
|
42
|
-
|
43
40
|
// create a new string struct and initialize it with a copy of the buffer of length len pointed to by src
|
44
|
-
str_t *str_new_copy(char *src, long len);
|
45
|
-
|
46
|
-
// create a new string struct and initialize it with the buffer of length len pointed to by src
|
47
|
-
// no copy is made; the struct takes ownership of the buffer and will free it when the struct is disposed of
|
48
|
-
str_t *str_new_no_copy(char *src, long len);
|
41
|
+
str_t *str_new_copy(const char *src, long len);
|
49
42
|
|
50
43
|
// convenience method for testing
|
51
44
|
str_t *str_new_from_string(VALUE string);
|
@@ -56,14 +49,13 @@ VALUE string_from_str(str_t *str);
|
|
56
49
|
// grows a string's capacity to the specified length
|
57
50
|
void str_grow(str_t *str, long len);
|
58
51
|
|
59
|
-
void str_append(str_t *str, char *src, long len);
|
52
|
+
void str_append(str_t *str, const char *src, long len);
|
60
53
|
|
61
54
|
// appends the "other" string struct onto str
|
62
55
|
void str_append_str(str_t *str, str_t *other);
|
63
56
|
|
64
|
-
//
|
65
|
-
|
66
|
-
void str_swap(str_t **a, str_t **b);
|
57
|
+
// appends the "other" string (a Ruby String) onto str
|
58
|
+
void str_append_string(str_t *str, VALUE other);
|
67
59
|
|
68
60
|
// don't actually free the memory yet
|
69
61
|
// this makes str structs very useful when reusing buffers because it avoids reallocation
|
data/ext/token.c
CHANGED
@@ -105,7 +105,7 @@ VALUE Wikitext_parser_token_types(VALUE self)
|
|
105
105
|
}
|
106
106
|
|
107
107
|
// for testing and debugging only
|
108
|
-
VALUE
|
108
|
+
VALUE wiki_token(token_t *token)
|
109
109
|
{
|
110
110
|
VALUE object = rb_class_new_instance(0, NULL, cWikitextParserToken);
|
111
111
|
(void)rb_iv_set(object, "@start", LONG2NUM((long)token->start));
|
data/ext/token.h
CHANGED
@@ -24,8 +24,8 @@
|
|
24
24
|
#include "ruby_compat.h"
|
25
25
|
#include <stdint.h> /* uint32_t */
|
26
26
|
|
27
|
-
#define TOKEN_TEXT(token) rb_str_new((const char *)token->start, (token->stop - token->start))
|
28
27
|
#define TOKEN_LEN(token) (token->stop - token->start)
|
28
|
+
#define TOKEN_TEXT(token) rb_str_new((const char *)token->start, TOKEN_LEN(token))
|
29
29
|
|
30
30
|
typedef struct
|
31
31
|
{
|
@@ -114,4 +114,4 @@ enum token_types {
|
|
114
114
|
|
115
115
|
VALUE Wikitext_parser_token_types(VALUE self);
|
116
116
|
|
117
|
-
VALUE
|
117
|
+
VALUE wiki_token(token_t *token);
|
data/ext/wikitext.c
CHANGED
@@ -44,7 +44,6 @@ void Init_wikitext()
|
|
44
44
|
rb_define_method(cWikitextParser, "fulltext_tokenize", Wikitext_parser_fulltext_tokenize, -1);
|
45
45
|
rb_define_singleton_method(cWikitextParser, "sanitize_link_target", Wikitext_parser_sanitize_link_target, 1);
|
46
46
|
rb_define_singleton_method(cWikitextParser, "encode_link_target", Wikitext_parser_encode_link_target, 1);
|
47
|
-
rb_define_singleton_method(cWikitextParser, "encode_special_link_target", Wikitext_parser_encode_special_link_target, 1);
|
48
47
|
rb_define_attr(cWikitextParser, "line_ending", Qtrue, Qtrue);
|
49
48
|
rb_define_attr(cWikitextParser, "internal_link_prefix", Qtrue, Qtrue);
|
50
49
|
rb_define_attr(cWikitextParser, "img_prefix", Qtrue, Qtrue);
|
data/lib/wikitext/version.rb
CHANGED
data/spec/external_link_spec.rb
CHANGED
@@ -326,4 +326,21 @@ describe Wikitext::Parser, 'external links' do
|
|
326
326
|
@parser.parse("> [http://google.com/\n").should == expected # was a bug
|
327
327
|
end
|
328
328
|
end
|
329
|
+
|
330
|
+
describe 'regressions' do
|
331
|
+
# assorted examples
|
332
|
+
it 'should not turn failed absolute links into external hyperlinks' do
|
333
|
+
# was emitting: <p>[<a href="/hello" class="external">/hello</a> this</p>\n
|
334
|
+
expected = %Q{<p>[<a href="/hello">/hello</a> this</p>\n}
|
335
|
+
@parser.parse('[/hello this').should == expected
|
336
|
+
|
337
|
+
# was emitting: <p>[<a href="/hello" class="external">/hello</a> </p>\n
|
338
|
+
expected = %Q{<p>[<a href="/hello">/hello</a> </p>\n}
|
339
|
+
@parser.parse('[/hello ').should == expected
|
340
|
+
|
341
|
+
# was emitting: <h1>hello [<a href="/hello" class="external">/hello</a> </h1>\n
|
342
|
+
expected = %Q{<h1>hello [<a href="/hello">/hello</a> </h1>\n}
|
343
|
+
@parser.parse('= hello [/hello =').should == expected
|
344
|
+
end
|
345
|
+
end
|
329
346
|
end
|
data/spec/internal_link_spec.rb
CHANGED
@@ -823,9 +823,15 @@ describe Wikitext::Parser, 'internal links (space to underscore on)' do
|
|
823
823
|
end
|
824
824
|
|
825
825
|
describe 'missing link text' do
|
826
|
-
it 'should use link target' do
|
826
|
+
it 'should use link target (zero-width link text)' do
|
827
827
|
@parser.parse('[[foo|]]').should == %Q{<p><a href="/wiki/foo">foo</a></p>\n}
|
828
828
|
end
|
829
|
+
|
830
|
+
# was a bug in version <= 1.6
|
831
|
+
# emitted: <p><a href="/wiki/foo"></a></p>\n
|
832
|
+
it 'should use link target (blank link text)' do
|
833
|
+
@parser.parse('[[foo| ]]').should == %Q{<p><a href="/wiki/foo">foo</a></p>\n}
|
834
|
+
end
|
829
835
|
end
|
830
836
|
|
831
837
|
describe 'link cut off at separator (end-of-file)' do
|
data/spec/link_encoding_spec.rb
CHANGED
@@ -94,51 +94,4 @@ describe Wikitext, 'encoding a link target' do
|
|
94
94
|
Wikitext::Parser.encode_link_target(string).should == URI.escape(string, reserved).downcase
|
95
95
|
end
|
96
96
|
end
|
97
|
-
|
98
|
-
# "special" links don't get transformed in any way
|
99
|
-
describe 'special links' do
|
100
|
-
|
101
|
-
# as of version 1.4.0 the encode_link_target function no longer handles special links
|
102
|
-
it 'should (no longer) recognize links which match /\A[a-z]+\/\d+\z/ as being special' do
|
103
|
-
string = 'foo/10'
|
104
|
-
Wikitext::Parser.encode_special_link_target(string).should == 'foo%2f10'
|
105
|
-
Wikitext::Parser.encode_link_target(string).should == 'foo%2f10'
|
106
|
-
end
|
107
|
-
|
108
|
-
it "should not recognize links which don't match at /\A/ as being special" do
|
109
|
-
string = ' foo/10'
|
110
|
-
Wikitext::Parser.encode_special_link_target(string).should_not == string
|
111
|
-
string = '..foo/10'
|
112
|
-
Wikitext::Parser.encode_special_link_target(string).should_not == string
|
113
|
-
string = '12foo/10'
|
114
|
-
Wikitext::Parser.encode_special_link_target(string).should_not == string
|
115
|
-
end
|
116
|
-
|
117
|
-
it "should not recognize links which don't match at /\z/ as being special" do
|
118
|
-
string = 'foo/10 '
|
119
|
-
Wikitext::Parser.encode_special_link_target(string).should_not == string
|
120
|
-
string = 'foo/10__'
|
121
|
-
Wikitext::Parser.encode_special_link_target(string).should_not == string
|
122
|
-
string = 'foo/10##'
|
123
|
-
Wikitext::Parser.encode_special_link_target(string).should_not == string
|
124
|
-
string = 'foo/10ab'
|
125
|
-
Wikitext::Parser.encode_special_link_target(string).should_not == string
|
126
|
-
end
|
127
|
-
|
128
|
-
it "should not recognize links which don't match at /[a-z]/ (case differences) as being special" do
|
129
|
-
string = 'FOO/10'
|
130
|
-
Wikitext::Parser.encode_special_link_target(string).should_not == string
|
131
|
-
end
|
132
|
-
|
133
|
-
it "should not recognize links which don't match at /[0-9]/ (case differences) as being special" do
|
134
|
-
string = 'foo/xx'
|
135
|
-
Wikitext::Parser.encode_special_link_target(string).should_not == string
|
136
|
-
end
|
137
|
-
|
138
|
-
it "should not recognize links which don't match at /\// as being special" do
|
139
|
-
string = 'foo 10'
|
140
|
-
Wikitext::Parser.encode_special_link_target(string).should_not == string
|
141
|
-
end
|
142
|
-
end
|
143
97
|
end
|
144
|
-
|
@@ -103,7 +103,7 @@ describe Wikitext, 'sanitizing a link target' do
|
|
103
103
|
Wikitext::Parser.sanitize_link_target('foo, "bar" & baz €').should == 'foo, "bar" & baz €'
|
104
104
|
end
|
105
105
|
|
106
|
-
# here we're exercising the
|
106
|
+
# here we're exercising the wiki_utf8_to_utf32 function
|
107
107
|
describe 'with invalidly encoded input' do
|
108
108
|
it 'should raise an exception for missing second byte' do
|
109
109
|
lambda {
|
data/spec/regressions_spec.rb
CHANGED
@@ -130,7 +130,7 @@ describe Wikitext::Parser, 'regressions' do
|
|
130
130
|
|
131
131
|
# this is the general case of the bug covered in the previous spec
|
132
132
|
# any token that appears as the first token after a PRE token can manifest this bug
|
133
|
-
# PRINTABLE didn't only because it called
|
133
|
+
# PRINTABLE didn't only because it called wiki_start_para_if_necessary(), which handled the pending CRLF
|
134
134
|
it 'should emit pending newlines for all token types found inside PRE and PRE_START blocks' do
|
135
135
|
# PRE_START
|
136
136
|
input = dedent <<-END
|
@@ -441,7 +441,7 @@ describe Wikitext::Parser, 'regressions' do
|
|
441
441
|
END
|
442
442
|
@parser.parse(input).should == expected
|
443
443
|
|
444
|
-
# these tokens weren't affected by the bug, seeing as they either call
|
444
|
+
# these tokens weren't affected by the bug, seeing as they either call wiki_start_para_if_necessary()
|
445
445
|
# or they can only appear in PRE_START (not PRE) thanks to the tokenizer
|
446
446
|
# but we add specs for them to make sure that the issue never crops up for them in the future
|
447
447
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wikitext
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: "1.
|
4
|
+
version: "1.7"
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Wincent Colaiuta
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-05-
|
12
|
+
date: 2009-05-13 00:00:00 +02:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|