rinku 1.7.3 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 07c65aa1e4fe77d917eeeb0d4ab03889f5b72443
4
+ data.tar.gz: 2e77157e5b959fbda3660e0676d20686e59bd469
5
+ SHA512:
6
+ metadata.gz: e4c692320131620c6b1398a0d8784091bd1952660bfb1ab14ec7ee32fd115f7c0a39a6faddd924045f4085ee91a1062b67cd6a0600df768a7ccb092a1083e772
7
+ data.tar.gz: bc6774b49bb020deaa479ce8145c881e49e037c1539924e3f4cd2a0e8e0e1fb9871356b848e8090d3f3d79239131b0c4d7d6c8afdad117e0dd35edcbe1b5fa14
data/Rakefile CHANGED
@@ -48,22 +48,3 @@ file package('.gem') => %w[pkg/ rinku.gemspec] + $spec.files do |f|
48
48
  sh "gem build rinku.gemspec"
49
49
  mv File.basename(f.name), f.name
50
50
  end
51
-
52
- # GEMSPEC HELPERS ==========================================================
53
- task :gather => 'sundown:checkout' do |t|
54
- files =
55
- FileList[
56
- 'sundown/src/{buffer,autolink}.h',
57
- 'sundown/src/{buffer,autolink}.c',
58
- ]
59
- cp files, 'ext/rinku/',
60
- :preserve => true,
61
- :verbose => true
62
- end
63
-
64
- task 'sundown:checkout' do |t|
65
- unless File.exists?('sundown/src/markdown.h')
66
- sh 'git submodule init'
67
- sh 'git submodule update'
68
- end
69
- end
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2011, Vicent Marti
2
+ * Copyright (c) 2016, GitHub, Inc
3
3
  *
4
4
  * Permission to use, copy, modify, and distribute this software for any
5
5
  * purpose with or without fee is hereby granted, provided that the above
@@ -13,21 +13,22 @@
13
13
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14
14
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15
15
  */
16
-
17
- #include "buffer.h"
18
- #include "autolink.h"
19
-
20
16
  #include <string.h>
17
+ #include <assert.h>
21
18
  #include <stdlib.h>
22
19
  #include <stdio.h>
23
- #include <ctype.h>
20
+ #include <stdbool.h>
21
+
22
+ #include "buffer.h"
23
+ #include "autolink.h"
24
+ #include "utf8.h"
24
25
 
25
26
  #if defined(_WIN32)
26
27
  #define strncasecmp _strnicmp
27
28
  #endif
28
29
 
29
- int
30
- sd_autolink_issafe(const uint8_t *link, size_t link_len)
30
+ bool
31
+ autolink_issafe(const uint8_t *link, size_t link_len)
31
32
  {
32
33
  static const size_t valid_uris_count = 5;
33
34
  static const char *valid_uris[] = {
@@ -41,47 +42,53 @@ sd_autolink_issafe(const uint8_t *link, size_t link_len)
41
42
 
42
43
  if (link_len > len &&
43
44
  strncasecmp((char *)link, valid_uris[i], len) == 0 &&
44
- isalnum(link[len]))
45
- return 1;
45
+ rinku_isalnum(link[len]))
46
+ return true;
46
47
  }
47
48
 
48
- return 0;
49
+ return false;
49
50
  }
50
51
 
51
- static size_t
52
- autolink_delim(uint8_t *data, size_t link_end, size_t max_rewind, size_t size)
52
+ static bool
53
+ autolink_delim(const uint8_t *data, struct autolink_pos *link)
53
54
  {
54
55
  uint8_t cclose, copen = 0;
55
56
  size_t i;
56
57
 
57
- for (i = 0; i < link_end; ++i)
58
+ for (i = link->start; i < link->end; ++i)
58
59
  if (data[i] == '<') {
59
- link_end = i;
60
+ link->end = i;
60
61
  break;
61
62
  }
62
63
 
63
- while (link_end > 0) {
64
- if (strchr("?!.,:", data[link_end - 1]) != NULL)
65
- link_end--;
64
+ while (link->end > link->start) {
65
+ if (strchr("?!.,:", data[link->end - 1]) != NULL)
66
+ link->end--;
66
67
 
67
- else if (data[link_end - 1] == ';') {
68
- size_t new_end = link_end - 2;
68
+ else if (data[link->end - 1] == ';') {
69
+ size_t new_end = link->end - 2;
69
70
 
70
- while (new_end > 0 && isalpha(data[new_end]))
71
+ while (new_end > 0 && rinku_isalnum(data[new_end]))
71
72
  new_end--;
72
73
 
73
- if (new_end < link_end - 2 && data[new_end] == '&')
74
- link_end = new_end;
75
- else
76
- link_end--;
74
+ if (new_end < link->end - 2) {
75
+ if (new_end > 0 && data[new_end] == '#')
76
+ new_end--;
77
+
78
+ if (data[new_end] == '&') {
79
+ link->end = new_end;
80
+ continue;
81
+ }
82
+ }
83
+ link->end--;
77
84
  }
78
85
  else break;
79
86
  }
80
87
 
81
- if (link_end == 0)
82
- return 0;
88
+ if (link->end == link->start)
89
+ return false;
83
90
 
84
- cclose = data[link_end - 1];
91
+ cclose = data[link->end - 1];
85
92
 
86
93
  switch (cclose) {
87
94
  case '"': copen = '"'; break;
@@ -94,7 +101,7 @@ autolink_delim(uint8_t *data, size_t link_end, size_t max_rewind, size_t size)
94
101
  if (copen != 0) {
95
102
  size_t closing = 0;
96
103
  size_t opening = 0;
97
- size_t i = 0;
104
+ size_t i = link->start;
98
105
 
99
106
  /* Try to close the final punctuation sign in this same line;
100
107
  * if we managed to close it outside of the URL, that means that it's
@@ -116,7 +123,7 @@ autolink_delim(uint8_t *data, size_t link_end, size_t max_rewind, size_t size)
116
123
  * => foo http://www.pokemon.com/Pikachu_(Electric)
117
124
  */
118
125
 
119
- while (i < link_end) {
126
+ while (i < link->end) {
120
127
  if (data[i] == copen)
121
128
  opening++;
122
129
  else if (data[i] == cclose)
@@ -126,170 +133,150 @@ autolink_delim(uint8_t *data, size_t link_end, size_t max_rewind, size_t size)
126
133
  }
127
134
 
128
135
  if (closing != opening)
129
- link_end--;
136
+ link->end--;
130
137
  }
131
138
 
132
- return link_end;
139
+ return true;
133
140
  }
134
141
 
135
- static size_t
136
- check_domain(uint8_t *data, size_t size, int allow_short)
142
+ static bool
143
+ check_domain(const uint8_t *data, size_t size,
144
+ struct autolink_pos *link, bool allow_short)
137
145
  {
138
146
  size_t i, np = 0;
139
147
 
140
- if (!isalnum(data[0]))
141
- return 0;
148
+ if (!rinku_isalnum(data[link->start]))
149
+ return false;
142
150
 
143
- for (i = 1; i < size - 1; ++i) {
151
+ for (i = link->start + 1; i < size - 1; ++i) {
144
152
  if (data[i] == '.') np++;
145
- else if (!isalnum(data[i]) && data[i] != '-') break;
153
+ else if (!rinku_isalnum(data[i]) && data[i] != '-') break;
146
154
  }
147
155
 
156
+ link->end = i;
157
+
148
158
  if (allow_short) {
149
159
  /* We don't need a valid domain in the strict sense (with
150
160
  * least one dot; so just make sure it's composed of valid
151
161
  * domain characters and return the length of the the valid
152
162
  * sequence. */
153
- return i;
163
+ return true;
154
164
  } else {
155
165
  /* a valid domain needs to have at least a dot.
156
166
  * that's as far as we get */
157
- return np ? i : 0;
167
+ return (np > 0);
158
168
  }
159
169
  }
160
170
 
161
- size_t
162
- sd_autolink__www(
163
- size_t *rewind_p,
164
- struct buf *link,
165
- uint8_t *data,
166
- size_t max_rewind,
171
+ bool
172
+ autolink__www(
173
+ struct autolink_pos *link,
174
+ const uint8_t *data,
175
+ size_t pos,
167
176
  size_t size,
168
177
  unsigned int flags)
169
178
  {
170
- size_t link_end;
171
-
172
- if (max_rewind > 0 && !ispunct(data[-1]) && !isspace(data[-1]))
173
- return 0;
174
-
175
- if (size < 4 || memcmp(data, "www.", strlen("www.")) != 0)
176
- return 0;
179
+ int32_t boundary;
180
+ assert(data[pos] == 'w' || data[pos] == 'W');
177
181
 
178
- link_end = check_domain(data, size, 0);
182
+ if ((size - pos) < 4 ||
183
+ (data[pos + 1] != 'w' && data[pos + 1] != 'W') ||
184
+ (data[pos + 2] != 'w' && data[pos + 2] != 'W') ||
185
+ data[pos + 3] != '.')
186
+ return false;
179
187
 
180
- if (link_end == 0)
181
- return 0;
188
+ boundary = utf8proc_rewind(data, pos);
189
+ if (boundary &&
190
+ !utf8proc_is_space(boundary) &&
191
+ !utf8proc_is_punctuation(boundary))
192
+ return false;
182
193
 
183
- while (link_end < size && !isspace(data[link_end]))
184
- link_end++;
194
+ link->start = pos;
195
+ link->end = 0;
185
196
 
186
- link_end = autolink_delim(data, link_end, max_rewind, size);
197
+ if (!check_domain(data, size, link, false))
198
+ return false;
187
199
 
188
- if (link_end == 0)
189
- return 0;
190
-
191
- bufput(link, data, link_end);
192
- *rewind_p = 0;
193
-
194
- return (int)link_end;
200
+ link->end = utf8proc_find_space(data, link->end, size);
201
+ return autolink_delim(data, link);
195
202
  }
196
203
 
197
- size_t
198
- sd_autolink__email(
199
- size_t *rewind_p,
200
- struct buf *link,
201
- uint8_t *data,
202
- size_t max_rewind,
204
+ bool
205
+ autolink__email(
206
+ struct autolink_pos *link,
207
+ const uint8_t *data,
208
+ size_t pos,
203
209
  size_t size,
204
210
  unsigned int flags)
205
211
  {
206
- size_t link_end, rewind;
207
212
  int nb = 0, np = 0;
213
+ assert(data[pos] == '@');
214
+
215
+ link->start = pos;
216
+ link->end = pos;
208
217
 
209
- for (rewind = 0; rewind < max_rewind; ++rewind) {
210
- uint8_t c = data[-rewind - 1];
218
+ for (; link->start > 0; link->start--) {
219
+ uint8_t c = data[link->start - 1];
211
220
 
212
- if (isalnum(c))
221
+ if (rinku_isalnum(c))
213
222
  continue;
214
223
 
215
- if (strchr(".+-_", c) != NULL)
224
+ if (strchr(".+-_%", c) != NULL)
216
225
  continue;
217
226
 
218
227
  break;
219
228
  }
220
229
 
221
- if (rewind == 0)
222
- return 0;
230
+ if (link->start == pos)
231
+ return false;
223
232
 
224
- for (link_end = 0; link_end < size; ++link_end) {
225
- uint8_t c = data[link_end];
233
+ for (; link->end < size; link->end++) {
234
+ uint8_t c = data[link->end];
226
235
 
227
- if (isalnum(c))
236
+ if (rinku_isalnum(c))
228
237
  continue;
229
238
 
230
239
  if (c == '@')
231
240
  nb++;
232
- else if (c == '.' && link_end < size - 1)
241
+ else if (c == '.' && link->end < size - 1)
233
242
  np++;
234
243
  else if (c != '-' && c != '_')
235
244
  break;
236
245
  }
237
246
 
238
- if (link_end < 2 || nb != 1 || np == 0)
239
- return 0;
240
-
241
- link_end = autolink_delim(data, link_end, max_rewind, size);
242
-
243
- if (link_end == 0)
244
- return 0;
247
+ if ((link->end - pos) < 2 || nb != 1 || np == 0)
248
+ return false;
245
249
 
246
- bufput(link, data - rewind, link_end + rewind);
247
- *rewind_p = rewind;
248
-
249
- return link_end;
250
+ return autolink_delim(data, link);
250
251
  }
251
252
 
252
- size_t
253
- sd_autolink__url(
254
- size_t *rewind_p,
255
- struct buf *link,
256
- uint8_t *data,
257
- size_t max_rewind,
253
+ bool
254
+ autolink__url(
255
+ struct autolink_pos *link,
256
+ const uint8_t *data,
257
+ size_t pos,
258
258
  size_t size,
259
259
  unsigned int flags)
260
260
  {
261
- size_t link_end, rewind = 0, domain_len;
262
-
263
- if (size < 4 || data[1] != '/' || data[2] != '/')
264
- return 0;
265
-
266
- while (rewind < max_rewind && isalpha(data[-rewind - 1]))
267
- rewind++;
268
-
269
- if (!sd_autolink_issafe(data - rewind, size + rewind))
270
- return 0;
271
-
272
- link_end = strlen("://");
261
+ assert(data[pos] == ':');
273
262
 
274
- domain_len = check_domain(
275
- data + link_end,
276
- size - link_end,
277
- flags & SD_AUTOLINK_SHORT_DOMAINS);
263
+ if ((size - pos) < 4 || data[pos + 1] != '/' || data[pos + 2] != '/')
264
+ return false;
278
265
 
279
- if (domain_len == 0)
280
- return 0;
266
+ link->start = pos + 3;
267
+ link->end = 0;
281
268
 
282
- link_end += domain_len;
283
- while (link_end < size && !isspace(data[link_end]))
284
- link_end++;
269
+ if (!check_domain(data, size, link, flags & AUTOLINK_SHORT_DOMAINS))
270
+ return false;
285
271
 
286
- link_end = autolink_delim(data, link_end, max_rewind, size);
272
+ link->start = pos;
273
+ link->end = utf8proc_find_space(data, link->end, size);
287
274
 
288
- if (link_end == 0)
289
- return 0;
275
+ while (link->start && rinku_isalpha(data[link->start - 1]))
276
+ link->start--;
290
277
 
291
- bufput(link, data - rewind, link_end + rewind);
292
- *rewind_p = rewind;
278
+ if (!autolink_issafe(data + link->start, size - link->start))
279
+ return false;
293
280
 
294
- return link_end;
281
+ return autolink_delim(data, link);
295
282
  }
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2011, Vicent Marti
2
+ * Copyright (c) 2016, GitHub, Inc
3
3
  *
4
4
  * Permission to use, copy, modify, and distribute this software for any
5
5
  * purpose with or without fee is hereby granted, provided that the above
@@ -13,10 +13,11 @@
13
13
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14
14
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15
15
  */
16
+ #ifndef RINKU_AUTOLINK_H
17
+ #define RINKU_AUTOLINK_H
16
18
 
17
- #ifndef UPSKIRT_AUTOLINK_H
18
- #define UPSKIRT_AUTOLINK_H
19
-
19
+ #include <stdbool.h>
20
+ #include <stdint.h>
20
21
  #include "buffer.h"
21
22
 
22
23
  #ifdef __cplusplus
@@ -24,23 +25,28 @@ extern "C" {
24
25
  #endif
25
26
 
26
27
  enum {
27
- SD_AUTOLINK_SHORT_DOMAINS = (1 << 0),
28
+ AUTOLINK_SHORT_DOMAINS = (1 << 0),
29
+ };
30
+
31
+ struct autolink_pos {
32
+ size_t start;
33
+ size_t end;
28
34
  };
29
35
 
30
- int
31
- sd_autolink_issafe(const uint8_t *link, size_t link_len);
36
+ bool
37
+ autolink_issafe(const uint8_t *link, size_t link_len);
32
38
 
33
- size_t
34
- sd_autolink__www(size_t *rewind_p, struct buf *link,
35
- uint8_t *data, size_t offset, size_t size, unsigned int flags);
39
+ bool
40
+ autolink__www(struct autolink_pos *res,
41
+ const uint8_t *data, size_t pos, size_t size, unsigned int flags);
36
42
 
37
- size_t
38
- sd_autolink__email(size_t *rewind_p, struct buf *link,
39
- uint8_t *data, size_t offset, size_t size, unsigned int flags);
43
+ bool
44
+ autolink__email(struct autolink_pos *res,
45
+ const uint8_t *data, size_t pos, size_t size, unsigned int flags);
40
46
 
41
- size_t
42
- sd_autolink__url(size_t *rewind_p, struct buf *link,
43
- uint8_t *data, size_t offset, size_t size, unsigned int flags);
47
+ bool
48
+ autolink__url(struct autolink_pos *res,
49
+ const uint8_t *data, size_t pos, size_t size, unsigned int flags);
44
50
 
45
51
  #ifdef __cplusplus
46
52
  }