rinku 1.7.3 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Rakefile +0 -19
- data/ext/rinku/autolink.c +114 -127
- data/ext/rinku/autolink.h +22 -16
- data/ext/rinku/rinku.c +34 -252
- data/ext/rinku/rinku.h +23 -5
- data/ext/rinku/rinku_rb.c +239 -0
- data/ext/rinku/utf8.c +187 -0
- data/ext/rinku/utf8.h +34 -0
- data/lib/rinku.rb +6 -2
- data/rinku.gemspec +12 -5
- data/test/autolink_test.rb +96 -18
- metadata +60 -16
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 07c65aa1e4fe77d917eeeb0d4ab03889f5b72443
|
4
|
+
data.tar.gz: 2e77157e5b959fbda3660e0676d20686e59bd469
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: e4c692320131620c6b1398a0d8784091bd1952660bfb1ab14ec7ee32fd115f7c0a39a6faddd924045f4085ee91a1062b67cd6a0600df768a7ccb092a1083e772
|
7
|
+
data.tar.gz: bc6774b49bb020deaa479ce8145c881e49e037c1539924e3f4cd2a0e8e0e1fb9871356b848e8090d3f3d79239131b0c4d7d6c8afdad117e0dd35edcbe1b5fa14
|
data/Rakefile
CHANGED
@@ -48,22 +48,3 @@ file package('.gem') => %w[pkg/ rinku.gemspec] + $spec.files do |f|
|
|
48
48
|
sh "gem build rinku.gemspec"
|
49
49
|
mv File.basename(f.name), f.name
|
50
50
|
end
|
51
|
-
|
52
|
-
# GEMSPEC HELPERS ==========================================================
|
53
|
-
task :gather => 'sundown:checkout' do |t|
|
54
|
-
files =
|
55
|
-
FileList[
|
56
|
-
'sundown/src/{buffer,autolink}.h',
|
57
|
-
'sundown/src/{buffer,autolink}.c',
|
58
|
-
]
|
59
|
-
cp files, 'ext/rinku/',
|
60
|
-
:preserve => true,
|
61
|
-
:verbose => true
|
62
|
-
end
|
63
|
-
|
64
|
-
task 'sundown:checkout' do |t|
|
65
|
-
unless File.exists?('sundown/src/markdown.h')
|
66
|
-
sh 'git submodule init'
|
67
|
-
sh 'git submodule update'
|
68
|
-
end
|
69
|
-
end
|
data/ext/rinku/autolink.c
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) 2016, GitHub, Inc
|
3
3
|
*
|
4
4
|
* Permission to use, copy, modify, and distribute this software for any
|
5
5
|
* purpose with or without fee is hereby granted, provided that the above
|
@@ -13,21 +13,22 @@
|
|
13
13
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
14
14
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
15
15
|
*/
|
16
|
-
|
17
|
-
#include "buffer.h"
|
18
|
-
#include "autolink.h"
|
19
|
-
|
20
16
|
#include <string.h>
|
17
|
+
#include <assert.h>
|
21
18
|
#include <stdlib.h>
|
22
19
|
#include <stdio.h>
|
23
|
-
#include <
|
20
|
+
#include <stdbool.h>
|
21
|
+
|
22
|
+
#include "buffer.h"
|
23
|
+
#include "autolink.h"
|
24
|
+
#include "utf8.h"
|
24
25
|
|
25
26
|
#if defined(_WIN32)
|
26
27
|
#define strncasecmp _strnicmp
|
27
28
|
#endif
|
28
29
|
|
29
|
-
|
30
|
-
|
30
|
+
bool
|
31
|
+
autolink_issafe(const uint8_t *link, size_t link_len)
|
31
32
|
{
|
32
33
|
static const size_t valid_uris_count = 5;
|
33
34
|
static const char *valid_uris[] = {
|
@@ -41,47 +42,53 @@ sd_autolink_issafe(const uint8_t *link, size_t link_len)
|
|
41
42
|
|
42
43
|
if (link_len > len &&
|
43
44
|
strncasecmp((char *)link, valid_uris[i], len) == 0 &&
|
44
|
-
|
45
|
-
return
|
45
|
+
rinku_isalnum(link[len]))
|
46
|
+
return true;
|
46
47
|
}
|
47
48
|
|
48
|
-
return
|
49
|
+
return false;
|
49
50
|
}
|
50
51
|
|
51
|
-
static
|
52
|
-
autolink_delim(uint8_t *data,
|
52
|
+
static bool
|
53
|
+
autolink_delim(const uint8_t *data, struct autolink_pos *link)
|
53
54
|
{
|
54
55
|
uint8_t cclose, copen = 0;
|
55
56
|
size_t i;
|
56
57
|
|
57
|
-
for (i =
|
58
|
+
for (i = link->start; i < link->end; ++i)
|
58
59
|
if (data[i] == '<') {
|
59
|
-
|
60
|
+
link->end = i;
|
60
61
|
break;
|
61
62
|
}
|
62
63
|
|
63
|
-
while (
|
64
|
-
if (strchr("?!.,:", data[
|
65
|
-
|
64
|
+
while (link->end > link->start) {
|
65
|
+
if (strchr("?!.,:", data[link->end - 1]) != NULL)
|
66
|
+
link->end--;
|
66
67
|
|
67
|
-
else if (data[
|
68
|
-
size_t new_end =
|
68
|
+
else if (data[link->end - 1] == ';') {
|
69
|
+
size_t new_end = link->end - 2;
|
69
70
|
|
70
|
-
while (new_end > 0 &&
|
71
|
+
while (new_end > 0 && rinku_isalnum(data[new_end]))
|
71
72
|
new_end--;
|
72
73
|
|
73
|
-
if (new_end <
|
74
|
-
|
75
|
-
|
76
|
-
|
74
|
+
if (new_end < link->end - 2) {
|
75
|
+
if (new_end > 0 && data[new_end] == '#')
|
76
|
+
new_end--;
|
77
|
+
|
78
|
+
if (data[new_end] == '&') {
|
79
|
+
link->end = new_end;
|
80
|
+
continue;
|
81
|
+
}
|
82
|
+
}
|
83
|
+
link->end--;
|
77
84
|
}
|
78
85
|
else break;
|
79
86
|
}
|
80
87
|
|
81
|
-
if (
|
82
|
-
return
|
88
|
+
if (link->end == link->start)
|
89
|
+
return false;
|
83
90
|
|
84
|
-
cclose = data[
|
91
|
+
cclose = data[link->end - 1];
|
85
92
|
|
86
93
|
switch (cclose) {
|
87
94
|
case '"': copen = '"'; break;
|
@@ -94,7 +101,7 @@ autolink_delim(uint8_t *data, size_t link_end, size_t max_rewind, size_t size)
|
|
94
101
|
if (copen != 0) {
|
95
102
|
size_t closing = 0;
|
96
103
|
size_t opening = 0;
|
97
|
-
size_t i =
|
104
|
+
size_t i = link->start;
|
98
105
|
|
99
106
|
/* Try to close the final punctuation sign in this same line;
|
100
107
|
* if we managed to close it outside of the URL, that means that it's
|
@@ -116,7 +123,7 @@ autolink_delim(uint8_t *data, size_t link_end, size_t max_rewind, size_t size)
|
|
116
123
|
* => foo http://www.pokemon.com/Pikachu_(Electric)
|
117
124
|
*/
|
118
125
|
|
119
|
-
while (i <
|
126
|
+
while (i < link->end) {
|
120
127
|
if (data[i] == copen)
|
121
128
|
opening++;
|
122
129
|
else if (data[i] == cclose)
|
@@ -126,170 +133,150 @@ autolink_delim(uint8_t *data, size_t link_end, size_t max_rewind, size_t size)
|
|
126
133
|
}
|
127
134
|
|
128
135
|
if (closing != opening)
|
129
|
-
|
136
|
+
link->end--;
|
130
137
|
}
|
131
138
|
|
132
|
-
return
|
139
|
+
return true;
|
133
140
|
}
|
134
141
|
|
135
|
-
static
|
136
|
-
check_domain(uint8_t *data, size_t size,
|
142
|
+
static bool
|
143
|
+
check_domain(const uint8_t *data, size_t size,
|
144
|
+
struct autolink_pos *link, bool allow_short)
|
137
145
|
{
|
138
146
|
size_t i, np = 0;
|
139
147
|
|
140
|
-
if (!
|
141
|
-
return
|
148
|
+
if (!rinku_isalnum(data[link->start]))
|
149
|
+
return false;
|
142
150
|
|
143
|
-
for (i = 1; i < size - 1; ++i) {
|
151
|
+
for (i = link->start + 1; i < size - 1; ++i) {
|
144
152
|
if (data[i] == '.') np++;
|
145
|
-
else if (!
|
153
|
+
else if (!rinku_isalnum(data[i]) && data[i] != '-') break;
|
146
154
|
}
|
147
155
|
|
156
|
+
link->end = i;
|
157
|
+
|
148
158
|
if (allow_short) {
|
149
159
|
/* We don't need a valid domain in the strict sense (with
|
150
160
|
* least one dot; so just make sure it's composed of valid
|
151
161
|
* domain characters and return the length of the the valid
|
152
162
|
* sequence. */
|
153
|
-
return
|
163
|
+
return true;
|
154
164
|
} else {
|
155
165
|
/* a valid domain needs to have at least a dot.
|
156
166
|
* that's as far as we get */
|
157
|
-
return np
|
167
|
+
return (np > 0);
|
158
168
|
}
|
159
169
|
}
|
160
170
|
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
size_t max_rewind,
|
171
|
+
bool
|
172
|
+
autolink__www(
|
173
|
+
struct autolink_pos *link,
|
174
|
+
const uint8_t *data,
|
175
|
+
size_t pos,
|
167
176
|
size_t size,
|
168
177
|
unsigned int flags)
|
169
178
|
{
|
170
|
-
|
171
|
-
|
172
|
-
if (max_rewind > 0 && !ispunct(data[-1]) && !isspace(data[-1]))
|
173
|
-
return 0;
|
174
|
-
|
175
|
-
if (size < 4 || memcmp(data, "www.", strlen("www.")) != 0)
|
176
|
-
return 0;
|
179
|
+
int32_t boundary;
|
180
|
+
assert(data[pos] == 'w' || data[pos] == 'W');
|
177
181
|
|
178
|
-
|
182
|
+
if ((size - pos) < 4 ||
|
183
|
+
(data[pos + 1] != 'w' && data[pos + 1] != 'W') ||
|
184
|
+
(data[pos + 2] != 'w' && data[pos + 2] != 'W') ||
|
185
|
+
data[pos + 3] != '.')
|
186
|
+
return false;
|
179
187
|
|
180
|
-
|
181
|
-
|
188
|
+
boundary = utf8proc_rewind(data, pos);
|
189
|
+
if (boundary &&
|
190
|
+
!utf8proc_is_space(boundary) &&
|
191
|
+
!utf8proc_is_punctuation(boundary))
|
192
|
+
return false;
|
182
193
|
|
183
|
-
|
184
|
-
|
194
|
+
link->start = pos;
|
195
|
+
link->end = 0;
|
185
196
|
|
186
|
-
|
197
|
+
if (!check_domain(data, size, link, false))
|
198
|
+
return false;
|
187
199
|
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
bufput(link, data, link_end);
|
192
|
-
*rewind_p = 0;
|
193
|
-
|
194
|
-
return (int)link_end;
|
200
|
+
link->end = utf8proc_find_space(data, link->end, size);
|
201
|
+
return autolink_delim(data, link);
|
195
202
|
}
|
196
203
|
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
size_t max_rewind,
|
204
|
+
bool
|
205
|
+
autolink__email(
|
206
|
+
struct autolink_pos *link,
|
207
|
+
const uint8_t *data,
|
208
|
+
size_t pos,
|
203
209
|
size_t size,
|
204
210
|
unsigned int flags)
|
205
211
|
{
|
206
|
-
size_t link_end, rewind;
|
207
212
|
int nb = 0, np = 0;
|
213
|
+
assert(data[pos] == '@');
|
214
|
+
|
215
|
+
link->start = pos;
|
216
|
+
link->end = pos;
|
208
217
|
|
209
|
-
for (
|
210
|
-
uint8_t c = data[
|
218
|
+
for (; link->start > 0; link->start--) {
|
219
|
+
uint8_t c = data[link->start - 1];
|
211
220
|
|
212
|
-
if (
|
221
|
+
if (rinku_isalnum(c))
|
213
222
|
continue;
|
214
223
|
|
215
|
-
if (strchr(".+-_", c) != NULL)
|
224
|
+
if (strchr(".+-_%", c) != NULL)
|
216
225
|
continue;
|
217
226
|
|
218
227
|
break;
|
219
228
|
}
|
220
229
|
|
221
|
-
if (
|
222
|
-
return
|
230
|
+
if (link->start == pos)
|
231
|
+
return false;
|
223
232
|
|
224
|
-
for (
|
225
|
-
uint8_t c = data[
|
233
|
+
for (; link->end < size; link->end++) {
|
234
|
+
uint8_t c = data[link->end];
|
226
235
|
|
227
|
-
if (
|
236
|
+
if (rinku_isalnum(c))
|
228
237
|
continue;
|
229
238
|
|
230
239
|
if (c == '@')
|
231
240
|
nb++;
|
232
|
-
else if (c == '.' &&
|
241
|
+
else if (c == '.' && link->end < size - 1)
|
233
242
|
np++;
|
234
243
|
else if (c != '-' && c != '_')
|
235
244
|
break;
|
236
245
|
}
|
237
246
|
|
238
|
-
if (
|
239
|
-
return
|
240
|
-
|
241
|
-
link_end = autolink_delim(data, link_end, max_rewind, size);
|
242
|
-
|
243
|
-
if (link_end == 0)
|
244
|
-
return 0;
|
247
|
+
if ((link->end - pos) < 2 || nb != 1 || np == 0)
|
248
|
+
return false;
|
245
249
|
|
246
|
-
|
247
|
-
*rewind_p = rewind;
|
248
|
-
|
249
|
-
return link_end;
|
250
|
+
return autolink_delim(data, link);
|
250
251
|
}
|
251
252
|
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
size_t max_rewind,
|
253
|
+
bool
|
254
|
+
autolink__url(
|
255
|
+
struct autolink_pos *link,
|
256
|
+
const uint8_t *data,
|
257
|
+
size_t pos,
|
258
258
|
size_t size,
|
259
259
|
unsigned int flags)
|
260
260
|
{
|
261
|
-
|
262
|
-
|
263
|
-
if (size < 4 || data[1] != '/' || data[2] != '/')
|
264
|
-
return 0;
|
265
|
-
|
266
|
-
while (rewind < max_rewind && isalpha(data[-rewind - 1]))
|
267
|
-
rewind++;
|
268
|
-
|
269
|
-
if (!sd_autolink_issafe(data - rewind, size + rewind))
|
270
|
-
return 0;
|
271
|
-
|
272
|
-
link_end = strlen("://");
|
261
|
+
assert(data[pos] == ':');
|
273
262
|
|
274
|
-
|
275
|
-
|
276
|
-
size - link_end,
|
277
|
-
flags & SD_AUTOLINK_SHORT_DOMAINS);
|
263
|
+
if ((size - pos) < 4 || data[pos + 1] != '/' || data[pos + 2] != '/')
|
264
|
+
return false;
|
278
265
|
|
279
|
-
|
280
|
-
|
266
|
+
link->start = pos + 3;
|
267
|
+
link->end = 0;
|
281
268
|
|
282
|
-
|
283
|
-
|
284
|
-
link_end++;
|
269
|
+
if (!check_domain(data, size, link, flags & AUTOLINK_SHORT_DOMAINS))
|
270
|
+
return false;
|
285
271
|
|
286
|
-
|
272
|
+
link->start = pos;
|
273
|
+
link->end = utf8proc_find_space(data, link->end, size);
|
287
274
|
|
288
|
-
|
289
|
-
|
275
|
+
while (link->start && rinku_isalpha(data[link->start - 1]))
|
276
|
+
link->start--;
|
290
277
|
|
291
|
-
|
292
|
-
|
278
|
+
if (!autolink_issafe(data + link->start, size - link->start))
|
279
|
+
return false;
|
293
280
|
|
294
|
-
return
|
281
|
+
return autolink_delim(data, link);
|
295
282
|
}
|
data/ext/rinku/autolink.h
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) 2016, GitHub, Inc
|
3
3
|
*
|
4
4
|
* Permission to use, copy, modify, and distribute this software for any
|
5
5
|
* purpose with or without fee is hereby granted, provided that the above
|
@@ -13,10 +13,11 @@
|
|
13
13
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
14
14
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
15
15
|
*/
|
16
|
+
#ifndef RINKU_AUTOLINK_H
|
17
|
+
#define RINKU_AUTOLINK_H
|
16
18
|
|
17
|
-
#
|
18
|
-
#
|
19
|
-
|
19
|
+
#include <stdbool.h>
|
20
|
+
#include <stdint.h>
|
20
21
|
#include "buffer.h"
|
21
22
|
|
22
23
|
#ifdef __cplusplus
|
@@ -24,23 +25,28 @@ extern "C" {
|
|
24
25
|
#endif
|
25
26
|
|
26
27
|
enum {
|
27
|
-
|
28
|
+
AUTOLINK_SHORT_DOMAINS = (1 << 0),
|
29
|
+
};
|
30
|
+
|
31
|
+
struct autolink_pos {
|
32
|
+
size_t start;
|
33
|
+
size_t end;
|
28
34
|
};
|
29
35
|
|
30
|
-
|
31
|
-
|
36
|
+
bool
|
37
|
+
autolink_issafe(const uint8_t *link, size_t link_len);
|
32
38
|
|
33
|
-
|
34
|
-
|
35
|
-
uint8_t *data, size_t
|
39
|
+
bool
|
40
|
+
autolink__www(struct autolink_pos *res,
|
41
|
+
const uint8_t *data, size_t pos, size_t size, unsigned int flags);
|
36
42
|
|
37
|
-
|
38
|
-
|
39
|
-
uint8_t *data, size_t
|
43
|
+
bool
|
44
|
+
autolink__email(struct autolink_pos *res,
|
45
|
+
const uint8_t *data, size_t pos, size_t size, unsigned int flags);
|
40
46
|
|
41
|
-
|
42
|
-
|
43
|
-
uint8_t *data, size_t
|
47
|
+
bool
|
48
|
+
autolink__url(struct autolink_pos *res,
|
49
|
+
const uint8_t *data, size_t pos, size_t size, unsigned int flags);
|
44
50
|
|
45
51
|
#ifdef __cplusplus
|
46
52
|
}
|