rinku 1.7.3 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 07c65aa1e4fe77d917eeeb0d4ab03889f5b72443
4
+ data.tar.gz: 2e77157e5b959fbda3660e0676d20686e59bd469
5
+ SHA512:
6
+ metadata.gz: e4c692320131620c6b1398a0d8784091bd1952660bfb1ab14ec7ee32fd115f7c0a39a6faddd924045f4085ee91a1062b67cd6a0600df768a7ccb092a1083e772
7
+ data.tar.gz: bc6774b49bb020deaa479ce8145c881e49e037c1539924e3f4cd2a0e8e0e1fb9871356b848e8090d3f3d79239131b0c4d7d6c8afdad117e0dd35edcbe1b5fa14
data/Rakefile CHANGED
@@ -48,22 +48,3 @@ file package('.gem') => %w[pkg/ rinku.gemspec] + $spec.files do |f|
48
48
  sh "gem build rinku.gemspec"
49
49
  mv File.basename(f.name), f.name
50
50
  end
51
-
52
- # GEMSPEC HELPERS ==========================================================
53
- task :gather => 'sundown:checkout' do |t|
54
- files =
55
- FileList[
56
- 'sundown/src/{buffer,autolink}.h',
57
- 'sundown/src/{buffer,autolink}.c',
58
- ]
59
- cp files, 'ext/rinku/',
60
- :preserve => true,
61
- :verbose => true
62
- end
63
-
64
- task 'sundown:checkout' do |t|
65
- unless File.exists?('sundown/src/markdown.h')
66
- sh 'git submodule init'
67
- sh 'git submodule update'
68
- end
69
- end
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2011, Vicent Marti
2
+ * Copyright (c) 2016, GitHub, Inc
3
3
  *
4
4
  * Permission to use, copy, modify, and distribute this software for any
5
5
  * purpose with or without fee is hereby granted, provided that the above
@@ -13,21 +13,22 @@
13
13
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14
14
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15
15
  */
16
-
17
- #include "buffer.h"
18
- #include "autolink.h"
19
-
20
16
  #include <string.h>
17
+ #include <assert.h>
21
18
  #include <stdlib.h>
22
19
  #include <stdio.h>
23
- #include <ctype.h>
20
+ #include <stdbool.h>
21
+
22
+ #include "buffer.h"
23
+ #include "autolink.h"
24
+ #include "utf8.h"
24
25
 
25
26
  #if defined(_WIN32)
26
27
  #define strncasecmp _strnicmp
27
28
  #endif
28
29
 
29
- int
30
- sd_autolink_issafe(const uint8_t *link, size_t link_len)
30
+ bool
31
+ autolink_issafe(const uint8_t *link, size_t link_len)
31
32
  {
32
33
  static const size_t valid_uris_count = 5;
33
34
  static const char *valid_uris[] = {
@@ -41,47 +42,53 @@ sd_autolink_issafe(const uint8_t *link, size_t link_len)
41
42
 
42
43
  if (link_len > len &&
43
44
  strncasecmp((char *)link, valid_uris[i], len) == 0 &&
44
- isalnum(link[len]))
45
- return 1;
45
+ rinku_isalnum(link[len]))
46
+ return true;
46
47
  }
47
48
 
48
- return 0;
49
+ return false;
49
50
  }
50
51
 
51
- static size_t
52
- autolink_delim(uint8_t *data, size_t link_end, size_t max_rewind, size_t size)
52
+ static bool
53
+ autolink_delim(const uint8_t *data, struct autolink_pos *link)
53
54
  {
54
55
  uint8_t cclose, copen = 0;
55
56
  size_t i;
56
57
 
57
- for (i = 0; i < link_end; ++i)
58
+ for (i = link->start; i < link->end; ++i)
58
59
  if (data[i] == '<') {
59
- link_end = i;
60
+ link->end = i;
60
61
  break;
61
62
  }
62
63
 
63
- while (link_end > 0) {
64
- if (strchr("?!.,:", data[link_end - 1]) != NULL)
65
- link_end--;
64
+ while (link->end > link->start) {
65
+ if (strchr("?!.,:", data[link->end - 1]) != NULL)
66
+ link->end--;
66
67
 
67
- else if (data[link_end - 1] == ';') {
68
- size_t new_end = link_end - 2;
68
+ else if (data[link->end - 1] == ';') {
69
+ size_t new_end = link->end - 2;
69
70
 
70
- while (new_end > 0 && isalpha(data[new_end]))
71
+ while (new_end > 0 && rinku_isalnum(data[new_end]))
71
72
  new_end--;
72
73
 
73
- if (new_end < link_end - 2 && data[new_end] == '&')
74
- link_end = new_end;
75
- else
76
- link_end--;
74
+ if (new_end < link->end - 2) {
75
+ if (new_end > 0 && data[new_end] == '#')
76
+ new_end--;
77
+
78
+ if (data[new_end] == '&') {
79
+ link->end = new_end;
80
+ continue;
81
+ }
82
+ }
83
+ link->end--;
77
84
  }
78
85
  else break;
79
86
  }
80
87
 
81
- if (link_end == 0)
82
- return 0;
88
+ if (link->end == link->start)
89
+ return false;
83
90
 
84
- cclose = data[link_end - 1];
91
+ cclose = data[link->end - 1];
85
92
 
86
93
  switch (cclose) {
87
94
  case '"': copen = '"'; break;
@@ -94,7 +101,7 @@ autolink_delim(uint8_t *data, size_t link_end, size_t max_rewind, size_t size)
94
101
  if (copen != 0) {
95
102
  size_t closing = 0;
96
103
  size_t opening = 0;
97
- size_t i = 0;
104
+ size_t i = link->start;
98
105
 
99
106
  /* Try to close the final punctuation sign in this same line;
100
107
  * if we managed to close it outside of the URL, that means that it's
@@ -116,7 +123,7 @@ autolink_delim(uint8_t *data, size_t link_end, size_t max_rewind, size_t size)
116
123
  * => foo http://www.pokemon.com/Pikachu_(Electric)
117
124
  */
118
125
 
119
- while (i < link_end) {
126
+ while (i < link->end) {
120
127
  if (data[i] == copen)
121
128
  opening++;
122
129
  else if (data[i] == cclose)
@@ -126,170 +133,150 @@ autolink_delim(uint8_t *data, size_t link_end, size_t max_rewind, size_t size)
126
133
  }
127
134
 
128
135
  if (closing != opening)
129
- link_end--;
136
+ link->end--;
130
137
  }
131
138
 
132
- return link_end;
139
+ return true;
133
140
  }
134
141
 
135
- static size_t
136
- check_domain(uint8_t *data, size_t size, int allow_short)
142
+ static bool
143
+ check_domain(const uint8_t *data, size_t size,
144
+ struct autolink_pos *link, bool allow_short)
137
145
  {
138
146
  size_t i, np = 0;
139
147
 
140
- if (!isalnum(data[0]))
141
- return 0;
148
+ if (!rinku_isalnum(data[link->start]))
149
+ return false;
142
150
 
143
- for (i = 1; i < size - 1; ++i) {
151
+ for (i = link->start + 1; i < size - 1; ++i) {
144
152
  if (data[i] == '.') np++;
145
- else if (!isalnum(data[i]) && data[i] != '-') break;
153
+ else if (!rinku_isalnum(data[i]) && data[i] != '-') break;
146
154
  }
147
155
 
156
+ link->end = i;
157
+
148
158
  if (allow_short) {
149
159
  /* We don't need a valid domain in the strict sense (with
150
160
  * least one dot; so just make sure it's composed of valid
151
161
  * domain characters and return the length of the the valid
152
162
  * sequence. */
153
- return i;
163
+ return true;
154
164
  } else {
155
165
  /* a valid domain needs to have at least a dot.
156
166
  * that's as far as we get */
157
- return np ? i : 0;
167
+ return (np > 0);
158
168
  }
159
169
  }
160
170
 
161
- size_t
162
- sd_autolink__www(
163
- size_t *rewind_p,
164
- struct buf *link,
165
- uint8_t *data,
166
- size_t max_rewind,
171
+ bool
172
+ autolink__www(
173
+ struct autolink_pos *link,
174
+ const uint8_t *data,
175
+ size_t pos,
167
176
  size_t size,
168
177
  unsigned int flags)
169
178
  {
170
- size_t link_end;
171
-
172
- if (max_rewind > 0 && !ispunct(data[-1]) && !isspace(data[-1]))
173
- return 0;
174
-
175
- if (size < 4 || memcmp(data, "www.", strlen("www.")) != 0)
176
- return 0;
179
+ int32_t boundary;
180
+ assert(data[pos] == 'w' || data[pos] == 'W');
177
181
 
178
- link_end = check_domain(data, size, 0);
182
+ if ((size - pos) < 4 ||
183
+ (data[pos + 1] != 'w' && data[pos + 1] != 'W') ||
184
+ (data[pos + 2] != 'w' && data[pos + 2] != 'W') ||
185
+ data[pos + 3] != '.')
186
+ return false;
179
187
 
180
- if (link_end == 0)
181
- return 0;
188
+ boundary = utf8proc_rewind(data, pos);
189
+ if (boundary &&
190
+ !utf8proc_is_space(boundary) &&
191
+ !utf8proc_is_punctuation(boundary))
192
+ return false;
182
193
 
183
- while (link_end < size && !isspace(data[link_end]))
184
- link_end++;
194
+ link->start = pos;
195
+ link->end = 0;
185
196
 
186
- link_end = autolink_delim(data, link_end, max_rewind, size);
197
+ if (!check_domain(data, size, link, false))
198
+ return false;
187
199
 
188
- if (link_end == 0)
189
- return 0;
190
-
191
- bufput(link, data, link_end);
192
- *rewind_p = 0;
193
-
194
- return (int)link_end;
200
+ link->end = utf8proc_find_space(data, link->end, size);
201
+ return autolink_delim(data, link);
195
202
  }
196
203
 
197
- size_t
198
- sd_autolink__email(
199
- size_t *rewind_p,
200
- struct buf *link,
201
- uint8_t *data,
202
- size_t max_rewind,
204
+ bool
205
+ autolink__email(
206
+ struct autolink_pos *link,
207
+ const uint8_t *data,
208
+ size_t pos,
203
209
  size_t size,
204
210
  unsigned int flags)
205
211
  {
206
- size_t link_end, rewind;
207
212
  int nb = 0, np = 0;
213
+ assert(data[pos] == '@');
214
+
215
+ link->start = pos;
216
+ link->end = pos;
208
217
 
209
- for (rewind = 0; rewind < max_rewind; ++rewind) {
210
- uint8_t c = data[-rewind - 1];
218
+ for (; link->start > 0; link->start--) {
219
+ uint8_t c = data[link->start - 1];
211
220
 
212
- if (isalnum(c))
221
+ if (rinku_isalnum(c))
213
222
  continue;
214
223
 
215
- if (strchr(".+-_", c) != NULL)
224
+ if (strchr(".+-_%", c) != NULL)
216
225
  continue;
217
226
 
218
227
  break;
219
228
  }
220
229
 
221
- if (rewind == 0)
222
- return 0;
230
+ if (link->start == pos)
231
+ return false;
223
232
 
224
- for (link_end = 0; link_end < size; ++link_end) {
225
- uint8_t c = data[link_end];
233
+ for (; link->end < size; link->end++) {
234
+ uint8_t c = data[link->end];
226
235
 
227
- if (isalnum(c))
236
+ if (rinku_isalnum(c))
228
237
  continue;
229
238
 
230
239
  if (c == '@')
231
240
  nb++;
232
- else if (c == '.' && link_end < size - 1)
241
+ else if (c == '.' && link->end < size - 1)
233
242
  np++;
234
243
  else if (c != '-' && c != '_')
235
244
  break;
236
245
  }
237
246
 
238
- if (link_end < 2 || nb != 1 || np == 0)
239
- return 0;
240
-
241
- link_end = autolink_delim(data, link_end, max_rewind, size);
242
-
243
- if (link_end == 0)
244
- return 0;
247
+ if ((link->end - pos) < 2 || nb != 1 || np == 0)
248
+ return false;
245
249
 
246
- bufput(link, data - rewind, link_end + rewind);
247
- *rewind_p = rewind;
248
-
249
- return link_end;
250
+ return autolink_delim(data, link);
250
251
  }
251
252
 
252
- size_t
253
- sd_autolink__url(
254
- size_t *rewind_p,
255
- struct buf *link,
256
- uint8_t *data,
257
- size_t max_rewind,
253
+ bool
254
+ autolink__url(
255
+ struct autolink_pos *link,
256
+ const uint8_t *data,
257
+ size_t pos,
258
258
  size_t size,
259
259
  unsigned int flags)
260
260
  {
261
- size_t link_end, rewind = 0, domain_len;
262
-
263
- if (size < 4 || data[1] != '/' || data[2] != '/')
264
- return 0;
265
-
266
- while (rewind < max_rewind && isalpha(data[-rewind - 1]))
267
- rewind++;
268
-
269
- if (!sd_autolink_issafe(data - rewind, size + rewind))
270
- return 0;
271
-
272
- link_end = strlen("://");
261
+ assert(data[pos] == ':');
273
262
 
274
- domain_len = check_domain(
275
- data + link_end,
276
- size - link_end,
277
- flags & SD_AUTOLINK_SHORT_DOMAINS);
263
+ if ((size - pos) < 4 || data[pos + 1] != '/' || data[pos + 2] != '/')
264
+ return false;
278
265
 
279
- if (domain_len == 0)
280
- return 0;
266
+ link->start = pos + 3;
267
+ link->end = 0;
281
268
 
282
- link_end += domain_len;
283
- while (link_end < size && !isspace(data[link_end]))
284
- link_end++;
269
+ if (!check_domain(data, size, link, flags & AUTOLINK_SHORT_DOMAINS))
270
+ return false;
285
271
 
286
- link_end = autolink_delim(data, link_end, max_rewind, size);
272
+ link->start = pos;
273
+ link->end = utf8proc_find_space(data, link->end, size);
287
274
 
288
- if (link_end == 0)
289
- return 0;
275
+ while (link->start && rinku_isalpha(data[link->start - 1]))
276
+ link->start--;
290
277
 
291
- bufput(link, data - rewind, link_end + rewind);
292
- *rewind_p = rewind;
278
+ if (!autolink_issafe(data + link->start, size - link->start))
279
+ return false;
293
280
 
294
- return link_end;
281
+ return autolink_delim(data, link);
295
282
  }
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2011, Vicent Marti
2
+ * Copyright (c) 2016, GitHub, Inc
3
3
  *
4
4
  * Permission to use, copy, modify, and distribute this software for any
5
5
  * purpose with or without fee is hereby granted, provided that the above
@@ -13,10 +13,11 @@
13
13
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14
14
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15
15
  */
16
+ #ifndef RINKU_AUTOLINK_H
17
+ #define RINKU_AUTOLINK_H
16
18
 
17
- #ifndef UPSKIRT_AUTOLINK_H
18
- #define UPSKIRT_AUTOLINK_H
19
-
19
+ #include <stdbool.h>
20
+ #include <stdint.h>
20
21
  #include "buffer.h"
21
22
 
22
23
  #ifdef __cplusplus
@@ -24,23 +25,28 @@ extern "C" {
24
25
  #endif
25
26
 
26
27
  enum {
27
- SD_AUTOLINK_SHORT_DOMAINS = (1 << 0),
28
+ AUTOLINK_SHORT_DOMAINS = (1 << 0),
29
+ };
30
+
31
+ struct autolink_pos {
32
+ size_t start;
33
+ size_t end;
28
34
  };
29
35
 
30
- int
31
- sd_autolink_issafe(const uint8_t *link, size_t link_len);
36
+ bool
37
+ autolink_issafe(const uint8_t *link, size_t link_len);
32
38
 
33
- size_t
34
- sd_autolink__www(size_t *rewind_p, struct buf *link,
35
- uint8_t *data, size_t offset, size_t size, unsigned int flags);
39
+ bool
40
+ autolink__www(struct autolink_pos *res,
41
+ const uint8_t *data, size_t pos, size_t size, unsigned int flags);
36
42
 
37
- size_t
38
- sd_autolink__email(size_t *rewind_p, struct buf *link,
39
- uint8_t *data, size_t offset, size_t size, unsigned int flags);
43
+ bool
44
+ autolink__email(struct autolink_pos *res,
45
+ const uint8_t *data, size_t pos, size_t size, unsigned int flags);
40
46
 
41
- size_t
42
- sd_autolink__url(size_t *rewind_p, struct buf *link,
43
- uint8_t *data, size_t offset, size_t size, unsigned int flags);
47
+ bool
48
+ autolink__url(struct autolink_pos *res,
49
+ const uint8_t *data, size_t pos, size_t size, unsigned int flags);
44
50
 
45
51
  #ifdef __cplusplus
46
52
  }