rinku 1.7.3 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Rakefile +0 -19
- data/ext/rinku/autolink.c +114 -127
- data/ext/rinku/autolink.h +22 -16
- data/ext/rinku/rinku.c +34 -252
- data/ext/rinku/rinku.h +23 -5
- data/ext/rinku/rinku_rb.c +239 -0
- data/ext/rinku/utf8.c +187 -0
- data/ext/rinku/utf8.h +34 -0
- data/lib/rinku.rb +6 -2
- data/rinku.gemspec +12 -5
- data/test/autolink_test.rb +96 -18
- metadata +60 -16
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 07c65aa1e4fe77d917eeeb0d4ab03889f5b72443
|
4
|
+
data.tar.gz: 2e77157e5b959fbda3660e0676d20686e59bd469
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: e4c692320131620c6b1398a0d8784091bd1952660bfb1ab14ec7ee32fd115f7c0a39a6faddd924045f4085ee91a1062b67cd6a0600df768a7ccb092a1083e772
|
7
|
+
data.tar.gz: bc6774b49bb020deaa479ce8145c881e49e037c1539924e3f4cd2a0e8e0e1fb9871356b848e8090d3f3d79239131b0c4d7d6c8afdad117e0dd35edcbe1b5fa14
|
data/Rakefile
CHANGED
@@ -48,22 +48,3 @@ file package('.gem') => %w[pkg/ rinku.gemspec] + $spec.files do |f|
|
|
48
48
|
sh "gem build rinku.gemspec"
|
49
49
|
mv File.basename(f.name), f.name
|
50
50
|
end
|
51
|
-
|
52
|
-
# GEMSPEC HELPERS ==========================================================
|
53
|
-
task :gather => 'sundown:checkout' do |t|
|
54
|
-
files =
|
55
|
-
FileList[
|
56
|
-
'sundown/src/{buffer,autolink}.h',
|
57
|
-
'sundown/src/{buffer,autolink}.c',
|
58
|
-
]
|
59
|
-
cp files, 'ext/rinku/',
|
60
|
-
:preserve => true,
|
61
|
-
:verbose => true
|
62
|
-
end
|
63
|
-
|
64
|
-
task 'sundown:checkout' do |t|
|
65
|
-
unless File.exists?('sundown/src/markdown.h')
|
66
|
-
sh 'git submodule init'
|
67
|
-
sh 'git submodule update'
|
68
|
-
end
|
69
|
-
end
|
data/ext/rinku/autolink.c
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) 2016, GitHub, Inc
|
3
3
|
*
|
4
4
|
* Permission to use, copy, modify, and distribute this software for any
|
5
5
|
* purpose with or without fee is hereby granted, provided that the above
|
@@ -13,21 +13,22 @@
|
|
13
13
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
14
14
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
15
15
|
*/
|
16
|
-
|
17
|
-
#include "buffer.h"
|
18
|
-
#include "autolink.h"
|
19
|
-
|
20
16
|
#include <string.h>
|
17
|
+
#include <assert.h>
|
21
18
|
#include <stdlib.h>
|
22
19
|
#include <stdio.h>
|
23
|
-
#include <
|
20
|
+
#include <stdbool.h>
|
21
|
+
|
22
|
+
#include "buffer.h"
|
23
|
+
#include "autolink.h"
|
24
|
+
#include "utf8.h"
|
24
25
|
|
25
26
|
#if defined(_WIN32)
|
26
27
|
#define strncasecmp _strnicmp
|
27
28
|
#endif
|
28
29
|
|
29
|
-
|
30
|
-
|
30
|
+
bool
|
31
|
+
autolink_issafe(const uint8_t *link, size_t link_len)
|
31
32
|
{
|
32
33
|
static const size_t valid_uris_count = 5;
|
33
34
|
static const char *valid_uris[] = {
|
@@ -41,47 +42,53 @@ sd_autolink_issafe(const uint8_t *link, size_t link_len)
|
|
41
42
|
|
42
43
|
if (link_len > len &&
|
43
44
|
strncasecmp((char *)link, valid_uris[i], len) == 0 &&
|
44
|
-
|
45
|
-
return
|
45
|
+
rinku_isalnum(link[len]))
|
46
|
+
return true;
|
46
47
|
}
|
47
48
|
|
48
|
-
return
|
49
|
+
return false;
|
49
50
|
}
|
50
51
|
|
51
|
-
static
|
52
|
-
autolink_delim(uint8_t *data,
|
52
|
+
static bool
|
53
|
+
autolink_delim(const uint8_t *data, struct autolink_pos *link)
|
53
54
|
{
|
54
55
|
uint8_t cclose, copen = 0;
|
55
56
|
size_t i;
|
56
57
|
|
57
|
-
for (i =
|
58
|
+
for (i = link->start; i < link->end; ++i)
|
58
59
|
if (data[i] == '<') {
|
59
|
-
|
60
|
+
link->end = i;
|
60
61
|
break;
|
61
62
|
}
|
62
63
|
|
63
|
-
while (
|
64
|
-
if (strchr("?!.,:", data[
|
65
|
-
|
64
|
+
while (link->end > link->start) {
|
65
|
+
if (strchr("?!.,:", data[link->end - 1]) != NULL)
|
66
|
+
link->end--;
|
66
67
|
|
67
|
-
else if (data[
|
68
|
-
size_t new_end =
|
68
|
+
else if (data[link->end - 1] == ';') {
|
69
|
+
size_t new_end = link->end - 2;
|
69
70
|
|
70
|
-
while (new_end > 0 &&
|
71
|
+
while (new_end > 0 && rinku_isalnum(data[new_end]))
|
71
72
|
new_end--;
|
72
73
|
|
73
|
-
if (new_end <
|
74
|
-
|
75
|
-
|
76
|
-
|
74
|
+
if (new_end < link->end - 2) {
|
75
|
+
if (new_end > 0 && data[new_end] == '#')
|
76
|
+
new_end--;
|
77
|
+
|
78
|
+
if (data[new_end] == '&') {
|
79
|
+
link->end = new_end;
|
80
|
+
continue;
|
81
|
+
}
|
82
|
+
}
|
83
|
+
link->end--;
|
77
84
|
}
|
78
85
|
else break;
|
79
86
|
}
|
80
87
|
|
81
|
-
if (
|
82
|
-
return
|
88
|
+
if (link->end == link->start)
|
89
|
+
return false;
|
83
90
|
|
84
|
-
cclose = data[
|
91
|
+
cclose = data[link->end - 1];
|
85
92
|
|
86
93
|
switch (cclose) {
|
87
94
|
case '"': copen = '"'; break;
|
@@ -94,7 +101,7 @@ autolink_delim(uint8_t *data, size_t link_end, size_t max_rewind, size_t size)
|
|
94
101
|
if (copen != 0) {
|
95
102
|
size_t closing = 0;
|
96
103
|
size_t opening = 0;
|
97
|
-
size_t i =
|
104
|
+
size_t i = link->start;
|
98
105
|
|
99
106
|
/* Try to close the final punctuation sign in this same line;
|
100
107
|
* if we managed to close it outside of the URL, that means that it's
|
@@ -116,7 +123,7 @@ autolink_delim(uint8_t *data, size_t link_end, size_t max_rewind, size_t size)
|
|
116
123
|
* => foo http://www.pokemon.com/Pikachu_(Electric)
|
117
124
|
*/
|
118
125
|
|
119
|
-
while (i <
|
126
|
+
while (i < link->end) {
|
120
127
|
if (data[i] == copen)
|
121
128
|
opening++;
|
122
129
|
else if (data[i] == cclose)
|
@@ -126,170 +133,150 @@ autolink_delim(uint8_t *data, size_t link_end, size_t max_rewind, size_t size)
|
|
126
133
|
}
|
127
134
|
|
128
135
|
if (closing != opening)
|
129
|
-
|
136
|
+
link->end--;
|
130
137
|
}
|
131
138
|
|
132
|
-
return
|
139
|
+
return true;
|
133
140
|
}
|
134
141
|
|
135
|
-
static
|
136
|
-
check_domain(uint8_t *data, size_t size,
|
142
|
+
static bool
|
143
|
+
check_domain(const uint8_t *data, size_t size,
|
144
|
+
struct autolink_pos *link, bool allow_short)
|
137
145
|
{
|
138
146
|
size_t i, np = 0;
|
139
147
|
|
140
|
-
if (!
|
141
|
-
return
|
148
|
+
if (!rinku_isalnum(data[link->start]))
|
149
|
+
return false;
|
142
150
|
|
143
|
-
for (i = 1; i < size - 1; ++i) {
|
151
|
+
for (i = link->start + 1; i < size - 1; ++i) {
|
144
152
|
if (data[i] == '.') np++;
|
145
|
-
else if (!
|
153
|
+
else if (!rinku_isalnum(data[i]) && data[i] != '-') break;
|
146
154
|
}
|
147
155
|
|
156
|
+
link->end = i;
|
157
|
+
|
148
158
|
if (allow_short) {
|
149
159
|
/* We don't need a valid domain in the strict sense (with
|
150
160
|
* least one dot; so just make sure it's composed of valid
|
151
161
|
* domain characters and return the length of the the valid
|
152
162
|
* sequence. */
|
153
|
-
return
|
163
|
+
return true;
|
154
164
|
} else {
|
155
165
|
/* a valid domain needs to have at least a dot.
|
156
166
|
* that's as far as we get */
|
157
|
-
return np
|
167
|
+
return (np > 0);
|
158
168
|
}
|
159
169
|
}
|
160
170
|
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
size_t max_rewind,
|
171
|
+
bool
|
172
|
+
autolink__www(
|
173
|
+
struct autolink_pos *link,
|
174
|
+
const uint8_t *data,
|
175
|
+
size_t pos,
|
167
176
|
size_t size,
|
168
177
|
unsigned int flags)
|
169
178
|
{
|
170
|
-
|
171
|
-
|
172
|
-
if (max_rewind > 0 && !ispunct(data[-1]) && !isspace(data[-1]))
|
173
|
-
return 0;
|
174
|
-
|
175
|
-
if (size < 4 || memcmp(data, "www.", strlen("www.")) != 0)
|
176
|
-
return 0;
|
179
|
+
int32_t boundary;
|
180
|
+
assert(data[pos] == 'w' || data[pos] == 'W');
|
177
181
|
|
178
|
-
|
182
|
+
if ((size - pos) < 4 ||
|
183
|
+
(data[pos + 1] != 'w' && data[pos + 1] != 'W') ||
|
184
|
+
(data[pos + 2] != 'w' && data[pos + 2] != 'W') ||
|
185
|
+
data[pos + 3] != '.')
|
186
|
+
return false;
|
179
187
|
|
180
|
-
|
181
|
-
|
188
|
+
boundary = utf8proc_rewind(data, pos);
|
189
|
+
if (boundary &&
|
190
|
+
!utf8proc_is_space(boundary) &&
|
191
|
+
!utf8proc_is_punctuation(boundary))
|
192
|
+
return false;
|
182
193
|
|
183
|
-
|
184
|
-
|
194
|
+
link->start = pos;
|
195
|
+
link->end = 0;
|
185
196
|
|
186
|
-
|
197
|
+
if (!check_domain(data, size, link, false))
|
198
|
+
return false;
|
187
199
|
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
bufput(link, data, link_end);
|
192
|
-
*rewind_p = 0;
|
193
|
-
|
194
|
-
return (int)link_end;
|
200
|
+
link->end = utf8proc_find_space(data, link->end, size);
|
201
|
+
return autolink_delim(data, link);
|
195
202
|
}
|
196
203
|
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
size_t max_rewind,
|
204
|
+
bool
|
205
|
+
autolink__email(
|
206
|
+
struct autolink_pos *link,
|
207
|
+
const uint8_t *data,
|
208
|
+
size_t pos,
|
203
209
|
size_t size,
|
204
210
|
unsigned int flags)
|
205
211
|
{
|
206
|
-
size_t link_end, rewind;
|
207
212
|
int nb = 0, np = 0;
|
213
|
+
assert(data[pos] == '@');
|
214
|
+
|
215
|
+
link->start = pos;
|
216
|
+
link->end = pos;
|
208
217
|
|
209
|
-
for (
|
210
|
-
uint8_t c = data[
|
218
|
+
for (; link->start > 0; link->start--) {
|
219
|
+
uint8_t c = data[link->start - 1];
|
211
220
|
|
212
|
-
if (
|
221
|
+
if (rinku_isalnum(c))
|
213
222
|
continue;
|
214
223
|
|
215
|
-
if (strchr(".+-_", c) != NULL)
|
224
|
+
if (strchr(".+-_%", c) != NULL)
|
216
225
|
continue;
|
217
226
|
|
218
227
|
break;
|
219
228
|
}
|
220
229
|
|
221
|
-
if (
|
222
|
-
return
|
230
|
+
if (link->start == pos)
|
231
|
+
return false;
|
223
232
|
|
224
|
-
for (
|
225
|
-
uint8_t c = data[
|
233
|
+
for (; link->end < size; link->end++) {
|
234
|
+
uint8_t c = data[link->end];
|
226
235
|
|
227
|
-
if (
|
236
|
+
if (rinku_isalnum(c))
|
228
237
|
continue;
|
229
238
|
|
230
239
|
if (c == '@')
|
231
240
|
nb++;
|
232
|
-
else if (c == '.' &&
|
241
|
+
else if (c == '.' && link->end < size - 1)
|
233
242
|
np++;
|
234
243
|
else if (c != '-' && c != '_')
|
235
244
|
break;
|
236
245
|
}
|
237
246
|
|
238
|
-
if (
|
239
|
-
return
|
240
|
-
|
241
|
-
link_end = autolink_delim(data, link_end, max_rewind, size);
|
242
|
-
|
243
|
-
if (link_end == 0)
|
244
|
-
return 0;
|
247
|
+
if ((link->end - pos) < 2 || nb != 1 || np == 0)
|
248
|
+
return false;
|
245
249
|
|
246
|
-
|
247
|
-
*rewind_p = rewind;
|
248
|
-
|
249
|
-
return link_end;
|
250
|
+
return autolink_delim(data, link);
|
250
251
|
}
|
251
252
|
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
size_t max_rewind,
|
253
|
+
bool
|
254
|
+
autolink__url(
|
255
|
+
struct autolink_pos *link,
|
256
|
+
const uint8_t *data,
|
257
|
+
size_t pos,
|
258
258
|
size_t size,
|
259
259
|
unsigned int flags)
|
260
260
|
{
|
261
|
-
|
262
|
-
|
263
|
-
if (size < 4 || data[1] != '/' || data[2] != '/')
|
264
|
-
return 0;
|
265
|
-
|
266
|
-
while (rewind < max_rewind && isalpha(data[-rewind - 1]))
|
267
|
-
rewind++;
|
268
|
-
|
269
|
-
if (!sd_autolink_issafe(data - rewind, size + rewind))
|
270
|
-
return 0;
|
271
|
-
|
272
|
-
link_end = strlen("://");
|
261
|
+
assert(data[pos] == ':');
|
273
262
|
|
274
|
-
|
275
|
-
|
276
|
-
size - link_end,
|
277
|
-
flags & SD_AUTOLINK_SHORT_DOMAINS);
|
263
|
+
if ((size - pos) < 4 || data[pos + 1] != '/' || data[pos + 2] != '/')
|
264
|
+
return false;
|
278
265
|
|
279
|
-
|
280
|
-
|
266
|
+
link->start = pos + 3;
|
267
|
+
link->end = 0;
|
281
268
|
|
282
|
-
|
283
|
-
|
284
|
-
link_end++;
|
269
|
+
if (!check_domain(data, size, link, flags & AUTOLINK_SHORT_DOMAINS))
|
270
|
+
return false;
|
285
271
|
|
286
|
-
|
272
|
+
link->start = pos;
|
273
|
+
link->end = utf8proc_find_space(data, link->end, size);
|
287
274
|
|
288
|
-
|
289
|
-
|
275
|
+
while (link->start && rinku_isalpha(data[link->start - 1]))
|
276
|
+
link->start--;
|
290
277
|
|
291
|
-
|
292
|
-
|
278
|
+
if (!autolink_issafe(data + link->start, size - link->start))
|
279
|
+
return false;
|
293
280
|
|
294
|
-
return
|
281
|
+
return autolink_delim(data, link);
|
295
282
|
}
|
data/ext/rinku/autolink.h
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) 2016, GitHub, Inc
|
3
3
|
*
|
4
4
|
* Permission to use, copy, modify, and distribute this software for any
|
5
5
|
* purpose with or without fee is hereby granted, provided that the above
|
@@ -13,10 +13,11 @@
|
|
13
13
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
14
14
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
15
15
|
*/
|
16
|
+
#ifndef RINKU_AUTOLINK_H
|
17
|
+
#define RINKU_AUTOLINK_H
|
16
18
|
|
17
|
-
#
|
18
|
-
#
|
19
|
-
|
19
|
+
#include <stdbool.h>
|
20
|
+
#include <stdint.h>
|
20
21
|
#include "buffer.h"
|
21
22
|
|
22
23
|
#ifdef __cplusplus
|
@@ -24,23 +25,28 @@ extern "C" {
|
|
24
25
|
#endif
|
25
26
|
|
26
27
|
enum {
|
27
|
-
|
28
|
+
AUTOLINK_SHORT_DOMAINS = (1 << 0),
|
29
|
+
};
|
30
|
+
|
31
|
+
struct autolink_pos {
|
32
|
+
size_t start;
|
33
|
+
size_t end;
|
28
34
|
};
|
29
35
|
|
30
|
-
|
31
|
-
|
36
|
+
bool
|
37
|
+
autolink_issafe(const uint8_t *link, size_t link_len);
|
32
38
|
|
33
|
-
|
34
|
-
|
35
|
-
uint8_t *data, size_t
|
39
|
+
bool
|
40
|
+
autolink__www(struct autolink_pos *res,
|
41
|
+
const uint8_t *data, size_t pos, size_t size, unsigned int flags);
|
36
42
|
|
37
|
-
|
38
|
-
|
39
|
-
uint8_t *data, size_t
|
43
|
+
bool
|
44
|
+
autolink__email(struct autolink_pos *res,
|
45
|
+
const uint8_t *data, size_t pos, size_t size, unsigned int flags);
|
40
46
|
|
41
|
-
|
42
|
-
|
43
|
-
uint8_t *data, size_t
|
47
|
+
bool
|
48
|
+
autolink__url(struct autolink_pos *res,
|
49
|
+
const uint8_t *data, size_t pos, size_t size, unsigned int flags);
|
44
50
|
|
45
51
|
#ifdef __cplusplus
|
46
52
|
}
|