rinku 1.7.3 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Rakefile +0 -19
- data/ext/rinku/autolink.c +114 -127
- data/ext/rinku/autolink.h +22 -16
- data/ext/rinku/rinku.c +34 -252
- data/ext/rinku/rinku.h +23 -5
- data/ext/rinku/rinku_rb.c +239 -0
- data/ext/rinku/utf8.c +187 -0
- data/ext/rinku/utf8.h +34 -0
- data/lib/rinku.rb +6 -2
- data/rinku.gemspec +12 -5
- data/test/autolink_test.rb +96 -18
- metadata +60 -16
data/ext/rinku/rinku.c
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) 2016, GitHub, Inc
|
3
3
|
*
|
4
4
|
* Permission to use, copy, modify, and distribute this software for any
|
5
5
|
* purpose with or without fee is hereby granted, provided that the above
|
@@ -13,28 +13,15 @@
|
|
13
13
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
14
14
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
15
15
|
*/
|
16
|
-
#define RSTRING_NOT_MODIFIED
|
17
|
-
|
18
|
-
#include <stdio.h>
|
19
|
-
#include "ruby.h"
|
20
|
-
|
21
|
-
#define RUBY_EXPORT __attribute__ ((visibility ("default")))
|
22
|
-
|
23
|
-
#ifdef HAVE_RUBY_ENCODING_H
|
24
|
-
#include <ruby/encoding.h>
|
25
|
-
#else
|
26
|
-
#define rb_enc_copy(dst, src)
|
27
|
-
#endif
|
28
|
-
|
29
|
-
#include "autolink.h"
|
30
|
-
#include "buffer.h"
|
31
|
-
|
32
16
|
#include <string.h>
|
33
17
|
#include <stdlib.h>
|
34
18
|
#include <stdio.h>
|
35
|
-
#include <
|
19
|
+
#include <assert.h>
|
36
20
|
|
37
|
-
|
21
|
+
#include "rinku.h"
|
22
|
+
#include "autolink.h"
|
23
|
+
#include "buffer.h"
|
24
|
+
#include "utf8.h"
|
38
25
|
|
39
26
|
typedef enum {
|
40
27
|
HTML_TAG_NONE = 0,
|
@@ -42,15 +29,6 @@ typedef enum {
|
|
42
29
|
HTML_TAG_CLOSE,
|
43
30
|
} html_tag;
|
44
31
|
|
45
|
-
typedef enum {
|
46
|
-
AUTOLINK_URLS = (1 << 0),
|
47
|
-
AUTOLINK_EMAILS = (1 << 1),
|
48
|
-
AUTOLINK_ALL = AUTOLINK_URLS|AUTOLINK_EMAILS
|
49
|
-
} autolink_mode;
|
50
|
-
|
51
|
-
typedef size_t (*autolink_parse_cb)(
|
52
|
-
size_t *rewind, struct buf *, uint8_t *, size_t, size_t, unsigned int);
|
53
|
-
|
54
32
|
typedef enum {
|
55
33
|
AUTOLINK_ACTION_NONE = 0,
|
56
34
|
AUTOLINK_ACTION_WWW,
|
@@ -59,11 +37,14 @@ typedef enum {
|
|
59
37
|
AUTOLINK_ACTION_SKIP_TAG
|
60
38
|
} autolink_action;
|
61
39
|
|
40
|
+
typedef bool (*autolink_parse_cb)(
|
41
|
+
struct autolink_pos *, const uint8_t *, size_t, size_t, unsigned int);
|
42
|
+
|
62
43
|
static autolink_parse_cb g_callbacks[] = {
|
63
44
|
NULL,
|
64
|
-
|
65
|
-
|
66
|
-
|
45
|
+
autolink__www, /* 1 */
|
46
|
+
autolink__email,/* 2 */
|
47
|
+
autolink__url, /* 3 */
|
67
48
|
};
|
68
49
|
|
69
50
|
static const char *g_hrefs[] = {
|
@@ -73,12 +54,6 @@ static const char *g_hrefs[] = {
|
|
73
54
|
"<a href=\"",
|
74
55
|
};
|
75
56
|
|
76
|
-
static void
|
77
|
-
autolink__print(struct buf *ob, const struct buf *link, void *payload)
|
78
|
-
{
|
79
|
-
bufput(ob, link->data, link->size);
|
80
|
-
}
|
81
|
-
|
82
57
|
/*
|
83
58
|
* Rinku assumes valid HTML encoding for all input, but there's still
|
84
59
|
* the case where a link can contain a double quote `"` that allows XSS.
|
@@ -86,7 +61,7 @@ autolink__print(struct buf *ob, const struct buf *link, void *payload)
|
|
86
61
|
* We need to properly escape the character we use for the `href` attribute
|
87
62
|
* declaration
|
88
63
|
*/
|
89
|
-
static void print_link(struct buf *ob, const
|
64
|
+
static void print_link(struct buf *ob, const uint8_t *link, size_t size)
|
90
65
|
{
|
91
66
|
size_t i = 0, org;
|
92
67
|
|
@@ -135,7 +110,7 @@ html_is_tag(const uint8_t *tag_data, size_t tag_size, const char *tagname)
|
|
135
110
|
if (i == tag_size)
|
136
111
|
return HTML_TAG_NONE;
|
137
112
|
|
138
|
-
if (
|
113
|
+
if (rinku_isspace(tag_data[i]) || tag_data[i] == '>')
|
139
114
|
return closed ? HTML_TAG_CLOSE : HTML_TAG_OPEN;
|
140
115
|
|
141
116
|
return HTML_TAG_NONE;
|
@@ -178,7 +153,6 @@ autolink__skip_tag(
|
|
178
153
|
i++;
|
179
154
|
}
|
180
155
|
|
181
|
-
// bufput(ob, text, i + 1);
|
182
156
|
return i;
|
183
157
|
}
|
184
158
|
|
@@ -191,20 +165,16 @@ rinku_autolink(
|
|
191
165
|
unsigned int flags,
|
192
166
|
const char *link_attr,
|
193
167
|
const char **skip_tags,
|
194
|
-
void (*link_text_cb)(struct buf
|
168
|
+
void (*link_text_cb)(struct buf *, const uint8_t *, size_t, void *),
|
195
169
|
void *payload)
|
196
170
|
{
|
197
|
-
size_t i, end
|
198
|
-
|
199
|
-
char active_chars[256];
|
200
|
-
void (*link_url_cb)(struct buf *, const struct buf *, void *);
|
171
|
+
size_t i, end;
|
172
|
+
char active_chars[256] = {0};
|
201
173
|
int link_count = 0;
|
202
174
|
|
203
175
|
if (!text || size == 0)
|
204
176
|
return 0;
|
205
177
|
|
206
|
-
memset(active_chars, 0x0, sizeof(active_chars));
|
207
|
-
|
208
178
|
active_chars['<'] = AUTOLINK_ACTION_SKIP_TAG;
|
209
179
|
|
210
180
|
if (mode & AUTOLINK_EMAILS)
|
@@ -216,11 +186,8 @@ rinku_autolink(
|
|
216
186
|
active_chars[':'] = AUTOLINK_ACTION_URL;
|
217
187
|
}
|
218
188
|
|
219
|
-
if (link_text_cb == NULL)
|
220
|
-
link_text_cb = &autolink__print;
|
221
|
-
|
222
189
|
if (link_attr != NULL) {
|
223
|
-
while (
|
190
|
+
while (rinku_isspace(*link_attr))
|
224
191
|
link_attr++;
|
225
192
|
}
|
226
193
|
|
@@ -229,7 +196,8 @@ rinku_autolink(
|
|
229
196
|
i = end = 0;
|
230
197
|
|
231
198
|
while (i < size) {
|
232
|
-
|
199
|
+
struct autolink_pos link;
|
200
|
+
bool link_found;
|
233
201
|
char action = 0;
|
234
202
|
|
235
203
|
while (end < size && (action = active_chars[text[end]]) == 0)
|
@@ -244,23 +212,19 @@ rinku_autolink(
|
|
244
212
|
if (action == AUTOLINK_ACTION_SKIP_TAG) {
|
245
213
|
end += autolink__skip_tag(ob,
|
246
214
|
text + end, size - end, skip_tags);
|
247
|
-
|
248
215
|
continue;
|
249
216
|
}
|
250
217
|
|
251
|
-
|
252
|
-
|
253
|
-
link_end = g_callbacks[(int)action](
|
254
|
-
&rewind, link, (uint8_t *)text + end,
|
255
|
-
end - last_link_found,
|
256
|
-
size - end, flags);
|
218
|
+
link_found = g_callbacks[(int)action](
|
219
|
+
&link, text, end, size, flags);
|
257
220
|
|
258
|
-
|
259
|
-
|
260
|
-
|
221
|
+
if (link_found && link.start >= i) {
|
222
|
+
const uint8_t *link_str = text + link.start;
|
223
|
+
const size_t link_len = link.end - link.start;
|
261
224
|
|
225
|
+
bufput(ob, text + i, link.start - i);
|
262
226
|
bufputs(ob, g_hrefs[(int)action]);
|
263
|
-
print_link(ob,
|
227
|
+
print_link(ob, link_str, link_len);
|
264
228
|
|
265
229
|
if (link_attr) {
|
266
230
|
BUFPUTSL(ob, "\" ");
|
@@ -270,202 +234,20 @@ rinku_autolink(
|
|
270
234
|
BUFPUTSL(ob, "\">");
|
271
235
|
}
|
272
236
|
|
273
|
-
link_text_cb
|
237
|
+
if (link_text_cb) {
|
238
|
+
link_text_cb(ob, link_str, link_len, payload);
|
239
|
+
} else {
|
240
|
+
bufput(ob, link_str, link_len);
|
241
|
+
}
|
242
|
+
|
274
243
|
BUFPUTSL(ob, "</a>");
|
275
244
|
|
276
245
|
link_count++;
|
277
|
-
i = end
|
278
|
-
last_link_found = end = i;
|
246
|
+
end = i = link.end;
|
279
247
|
} else {
|
280
248
|
end = end + 1;
|
281
249
|
}
|
282
250
|
}
|
283
251
|
|
284
|
-
bufrelease(link);
|
285
252
|
return link_count;
|
286
253
|
}
|
287
|
-
|
288
|
-
|
289
|
-
/**
|
290
|
-
* Ruby code
|
291
|
-
*/
|
292
|
-
static void
|
293
|
-
autolink_callback(struct buf *link_text, const struct buf *link, void *block)
|
294
|
-
{
|
295
|
-
VALUE rb_link, rb_link_text;
|
296
|
-
rb_link = rb_str_new(link->data, link->size);
|
297
|
-
rb_link_text = rb_funcall((VALUE)block, rb_intern("call"), 1, rb_link);
|
298
|
-
Check_Type(rb_link_text, T_STRING);
|
299
|
-
bufput(link_text, RSTRING_PTR(rb_link_text), RSTRING_LEN(rb_link_text));
|
300
|
-
}
|
301
|
-
|
302
|
-
const char **rinku_load_tags(VALUE rb_skip)
|
303
|
-
{
|
304
|
-
const char **skip_tags;
|
305
|
-
size_t i, count;
|
306
|
-
|
307
|
-
Check_Type(rb_skip, T_ARRAY);
|
308
|
-
|
309
|
-
count = RARRAY_LEN(rb_skip);
|
310
|
-
skip_tags = xmalloc(sizeof(void *) * (count + 1));
|
311
|
-
|
312
|
-
for (i = 0; i < count; ++i) {
|
313
|
-
VALUE tag = rb_ary_entry(rb_skip, i);
|
314
|
-
Check_Type(tag, T_STRING);
|
315
|
-
skip_tags[i] = StringValueCStr(tag);
|
316
|
-
}
|
317
|
-
|
318
|
-
skip_tags[count] = NULL;
|
319
|
-
return skip_tags;
|
320
|
-
}
|
321
|
-
|
322
|
-
/*
|
323
|
-
* Document-method: auto_link
|
324
|
-
*
|
325
|
-
* call-seq:
|
326
|
-
* auto_link(text, mode=:all, link_attr=nil, skip_tags=nil, flags=0)
|
327
|
-
* auto_link(text, mode=:all, link_attr=nil, skip_tags=nil, flags=0) { |link_text| ... }
|
328
|
-
*
|
329
|
-
* Parses a block of text looking for "safe" urls or email addresses,
|
330
|
-
* and turns them into HTML links with the given attributes.
|
331
|
-
*
|
332
|
-
* NOTE: The block of text may or may not be HTML; if the text is HTML,
|
333
|
-
* Rinku will skip the relevant tags to prevent double-linking and linking
|
334
|
-
* inside `pre` blocks by default.
|
335
|
-
*
|
336
|
-
* NOTE: If the input text is HTML, it's expected to be already escaped.
|
337
|
-
* Rinku will perform no escaping.
|
338
|
-
*
|
339
|
-
* NOTE: Currently the follow protocols are considered safe and are the
|
340
|
-
* only ones that will be autolinked.
|
341
|
-
*
|
342
|
-
* http:// https:// ftp:// mailto://
|
343
|
-
*
|
344
|
-
* Email addresses are also autolinked by default. URLs without a protocol
|
345
|
-
* specifier but starting with 'www.' will also be autolinked, defaulting to
|
346
|
-
* the 'http://' protocol.
|
347
|
-
*
|
348
|
-
* - `text` is a string in plain text or HTML markup. If the string is formatted in
|
349
|
-
* HTML, Rinku is smart enough to skip the links that are already enclosed in `<a>`
|
350
|
-
* tags.`
|
351
|
-
*
|
352
|
-
* - `mode` is a symbol, either `:all`, `:urls` or `:email_addresses`,
|
353
|
-
* which specifies which kind of links will be auto-linked.
|
354
|
-
*
|
355
|
-
* - `link_attr` is a string containing the link attributes for each link that
|
356
|
-
* will be generated. These attributes are not sanitized and will be include as-is
|
357
|
-
* in each generated link, e.g.
|
358
|
-
*
|
359
|
-
* ~~~~~ruby
|
360
|
-
* auto_link('http://www.pokemon.com', :all, 'target="_blank"')
|
361
|
-
* # => '<a href="http://www.pokemon.com" target="_blank">http://www.pokemon.com</a>'
|
362
|
-
* ~~~~~
|
363
|
-
*
|
364
|
-
* This string can be autogenerated from a hash using the Rails `tag_options` helper.
|
365
|
-
*
|
366
|
-
* - `skip_tags` is a list of strings with the names of HTML tags that will be skipped
|
367
|
-
* when autolinking. If `nil`, this defaults to the value of the global `Rinku.skip_tags`,
|
368
|
-
* which is initially `["a", "pre", "code", "kbd", "script"]`.
|
369
|
-
*
|
370
|
-
* - `flag` is an optional boolean value specifying whether to recognize
|
371
|
-
* 'http://foo' as a valid domain, or require at least one '.'. It defaults to false.
|
372
|
-
*
|
373
|
-
* - `&block` is an optional block argument. If a block is passed, it will
|
374
|
-
* be yielded for each found link in the text, and its return value will be used instead
|
375
|
-
* of the name of the link. E.g.
|
376
|
-
*
|
377
|
-
* ~~~~~ruby
|
378
|
-
* auto_link('Check it out at http://www.pokemon.com') do |url|
|
379
|
-
* "THE POKEMAN WEBSITEZ"
|
380
|
-
* end
|
381
|
-
* # => 'Check it out at <a href="http://www.pokemon.com">THE POKEMAN WEBSITEZ</a>'
|
382
|
-
* ~~~~~~
|
383
|
-
*/
|
384
|
-
static VALUE
|
385
|
-
rb_rinku_autolink(int argc, VALUE *argv, VALUE self)
|
386
|
-
{
|
387
|
-
static const char *SKIP_TAGS[] = {"a", "pre", "code", "kbd", "script", NULL};
|
388
|
-
|
389
|
-
VALUE result, rb_text, rb_mode, rb_html, rb_skip, rb_flags, rb_block;
|
390
|
-
struct buf *output_buf;
|
391
|
-
int link_mode, count;
|
392
|
-
unsigned int link_flags = 0;
|
393
|
-
const char *link_attr = NULL;
|
394
|
-
const char **skip_tags = NULL;
|
395
|
-
ID mode_sym;
|
396
|
-
|
397
|
-
rb_scan_args(argc, argv, "14&", &rb_text, &rb_mode,
|
398
|
-
&rb_html, &rb_skip, &rb_flags, &rb_block);
|
399
|
-
|
400
|
-
Check_Type(rb_text, T_STRING);
|
401
|
-
|
402
|
-
if (!NIL_P(rb_mode)) {
|
403
|
-
Check_Type(rb_mode, T_SYMBOL);
|
404
|
-
mode_sym = SYM2ID(rb_mode);
|
405
|
-
} else {
|
406
|
-
mode_sym = rb_intern("all");
|
407
|
-
}
|
408
|
-
|
409
|
-
if (!NIL_P(rb_html)) {
|
410
|
-
Check_Type(rb_html, T_STRING);
|
411
|
-
link_attr = RSTRING_PTR(rb_html);
|
412
|
-
}
|
413
|
-
|
414
|
-
if (NIL_P(rb_skip))
|
415
|
-
rb_skip = rb_iv_get(self, "@skip_tags");
|
416
|
-
|
417
|
-
if (NIL_P(rb_skip)) {
|
418
|
-
skip_tags = SKIP_TAGS;
|
419
|
-
} else {
|
420
|
-
skip_tags = rinku_load_tags(rb_skip);
|
421
|
-
}
|
422
|
-
|
423
|
-
if (!NIL_P(rb_flags)) {
|
424
|
-
Check_Type(rb_flags, T_FIXNUM);
|
425
|
-
link_flags = FIX2INT(rb_flags);
|
426
|
-
}
|
427
|
-
|
428
|
-
output_buf = bufnew(32);
|
429
|
-
|
430
|
-
if (mode_sym == rb_intern("all"))
|
431
|
-
link_mode = AUTOLINK_ALL;
|
432
|
-
else if (mode_sym == rb_intern("email_addresses"))
|
433
|
-
link_mode = AUTOLINK_EMAILS;
|
434
|
-
else if (mode_sym == rb_intern("urls"))
|
435
|
-
link_mode = AUTOLINK_URLS;
|
436
|
-
else
|
437
|
-
rb_raise(rb_eTypeError,
|
438
|
-
"Invalid linking mode (possible values are :all, :urls, :email_addresses)");
|
439
|
-
|
440
|
-
count = rinku_autolink(
|
441
|
-
output_buf,
|
442
|
-
RSTRING_PTR(rb_text),
|
443
|
-
RSTRING_LEN(rb_text),
|
444
|
-
link_mode,
|
445
|
-
link_flags,
|
446
|
-
link_attr,
|
447
|
-
skip_tags,
|
448
|
-
RTEST(rb_block) ? &autolink_callback : NULL,
|
449
|
-
(void*)rb_block);
|
450
|
-
|
451
|
-
if (count == 0)
|
452
|
-
result = rb_text;
|
453
|
-
else {
|
454
|
-
result = rb_str_new(output_buf->data, output_buf->size);
|
455
|
-
rb_enc_copy(result, rb_text);
|
456
|
-
}
|
457
|
-
|
458
|
-
if (skip_tags != SKIP_TAGS)
|
459
|
-
xfree(skip_tags);
|
460
|
-
|
461
|
-
bufrelease(output_buf);
|
462
|
-
return result;
|
463
|
-
}
|
464
|
-
|
465
|
-
void RUBY_EXPORT Init_rinku()
|
466
|
-
{
|
467
|
-
rb_mRinku = rb_define_module("Rinku");
|
468
|
-
rb_define_method(rb_mRinku, "auto_link", rb_rinku_autolink, -1);
|
469
|
-
rb_define_const(rb_mRinku, "AUTOLINK_SHORT_DOMAINS", INT2FIX(SD_AUTOLINK_SHORT_DOMAINS));
|
470
|
-
}
|
471
|
-
|
data/ext/rinku/rinku.h
CHANGED
@@ -1,7 +1,25 @@
|
|
1
|
-
#ifndef
|
2
|
-
#define
|
1
|
+
#ifndef _RINKU_H
|
2
|
+
#define _RINKU_H
|
3
3
|
|
4
|
-
|
5
|
-
|
4
|
+
#include <stdint.h>
|
5
|
+
#include "buffer.h"
|
6
|
+
|
7
|
+
typedef enum {
|
8
|
+
AUTOLINK_URLS = (1 << 0),
|
9
|
+
AUTOLINK_EMAILS = (1 << 1),
|
10
|
+
AUTOLINK_ALL = AUTOLINK_URLS|AUTOLINK_EMAILS
|
11
|
+
} autolink_mode;
|
12
|
+
|
13
|
+
int
|
14
|
+
rinku_autolink(
|
15
|
+
struct buf *ob,
|
16
|
+
const uint8_t *text,
|
17
|
+
size_t size,
|
18
|
+
autolink_mode mode,
|
19
|
+
unsigned int flags,
|
20
|
+
const char *link_attr,
|
21
|
+
const char **skip_tags,
|
22
|
+
void (*link_text_cb)(struct buf *, const uint8_t *, size_t, void *),
|
23
|
+
void *payload);
|
6
24
|
|
7
|
-
#endif
|
25
|
+
#endif
|
@@ -0,0 +1,239 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2016, GitHub, Inc
|
3
|
+
*
|
4
|
+
* Permission to use, copy, modify, and distribute this software for any
|
5
|
+
* purpose with or without fee is hereby granted, provided that the above
|
6
|
+
* copyright notice and this permission notice appear in all copies.
|
7
|
+
*
|
8
|
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
9
|
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
10
|
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
11
|
+
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
12
|
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
13
|
+
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
14
|
+
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
15
|
+
*/
|
16
|
+
#include <stdio.h>
|
17
|
+
|
18
|
+
#define RUBY_EXPORT __attribute__ ((visibility ("default")))
|
19
|
+
|
20
|
+
#include <ruby.h>
|
21
|
+
#include <ruby/encoding.h>
|
22
|
+
|
23
|
+
#include "rinku.h"
|
24
|
+
#include "autolink.h"
|
25
|
+
|
26
|
+
static VALUE rb_mRinku;
|
27
|
+
|
28
|
+
struct callback_data {
|
29
|
+
VALUE rb_block;
|
30
|
+
rb_encoding *encoding;
|
31
|
+
};
|
32
|
+
|
33
|
+
static rb_encoding *
|
34
|
+
validate_encoding(VALUE rb_str)
|
35
|
+
{
|
36
|
+
rb_encoding *encoding;
|
37
|
+
|
38
|
+
Check_Type(rb_str, T_STRING);
|
39
|
+
encoding = rb_enc_get(rb_str);
|
40
|
+
|
41
|
+
if (!rb_enc_asciicompat(encoding))
|
42
|
+
rb_raise(rb_eArgError, "Invalid encoding");
|
43
|
+
|
44
|
+
if (rb_enc_str_coderange(rb_str) == ENC_CODERANGE_BROKEN)
|
45
|
+
rb_raise(rb_eArgError, "invalid byte sequence in %s",
|
46
|
+
rb_enc_name(encoding));
|
47
|
+
|
48
|
+
return encoding;
|
49
|
+
}
|
50
|
+
|
51
|
+
static void
|
52
|
+
autolink_callback(struct buf *link_text,
|
53
|
+
const uint8_t *url, size_t url_len, void *block)
|
54
|
+
{
|
55
|
+
struct callback_data *data = block;
|
56
|
+
VALUE rb_link, rb_link_text;
|
57
|
+
|
58
|
+
rb_link = rb_enc_str_new((const char *)url, url_len, data->encoding);
|
59
|
+
rb_link_text = rb_funcall(data->rb_block,
|
60
|
+
rb_intern("call"), 1, rb_link);
|
61
|
+
|
62
|
+
if (validate_encoding(rb_link_text) != data->encoding)
|
63
|
+
rb_raise(rb_eArgError, "encoding mismatch");
|
64
|
+
|
65
|
+
bufput(link_text, RSTRING_PTR(rb_link_text), RSTRING_LEN(rb_link_text));
|
66
|
+
}
|
67
|
+
|
68
|
+
const char **rinku_load_tags(VALUE rb_skip)
|
69
|
+
{
|
70
|
+
const char **skip_tags;
|
71
|
+
size_t i, count;
|
72
|
+
|
73
|
+
Check_Type(rb_skip, T_ARRAY);
|
74
|
+
|
75
|
+
count = RARRAY_LEN(rb_skip);
|
76
|
+
skip_tags = xmalloc(sizeof(void *) * (count + 1));
|
77
|
+
|
78
|
+
for (i = 0; i < count; ++i) {
|
79
|
+
VALUE tag = rb_ary_entry(rb_skip, i);
|
80
|
+
Check_Type(tag, T_STRING);
|
81
|
+
skip_tags[i] = StringValueCStr(tag);
|
82
|
+
}
|
83
|
+
|
84
|
+
skip_tags[count] = NULL;
|
85
|
+
return skip_tags;
|
86
|
+
}
|
87
|
+
|
88
|
+
/*
|
89
|
+
* Document-method: auto_link
|
90
|
+
*
|
91
|
+
* call-seq:
|
92
|
+
* auto_link(text, mode=:all, link_attr=nil, skip_tags=nil, flags=0)
|
93
|
+
* auto_link(text, mode=:all, link_attr=nil, skip_tags=nil, flags=0) { |link_text| ... }
|
94
|
+
*
|
95
|
+
* Parses a block of text looking for "safe" urls or email addresses,
|
96
|
+
* and turns them into HTML links with the given attributes.
|
97
|
+
*
|
98
|
+
* NOTE: The block of text may or may not be HTML; if the text is HTML,
|
99
|
+
* Rinku will skip the relevant tags to prevent double-linking and linking
|
100
|
+
* inside `pre` blocks by default.
|
101
|
+
*
|
102
|
+
* NOTE: If the input text is HTML, it's expected to be already escaped.
|
103
|
+
* Rinku will perform no escaping.
|
104
|
+
*
|
105
|
+
* NOTE: Currently the follow protocols are considered safe and are the
|
106
|
+
* only ones that will be autolinked.
|
107
|
+
*
|
108
|
+
* http:// https:// ftp:// mailto://
|
109
|
+
*
|
110
|
+
* Email addresses are also autolinked by default. URLs without a protocol
|
111
|
+
* specifier but starting with 'www.' will also be autolinked, defaulting to
|
112
|
+
* the 'http://' protocol.
|
113
|
+
*
|
114
|
+
* - `text` is a string in plain text or HTML markup. If the string is formatted in
|
115
|
+
* HTML, Rinku is smart enough to skip the links that are already enclosed in `<a>`
|
116
|
+
* tags.`
|
117
|
+
*
|
118
|
+
* - `mode` is a symbol, either `:all`, `:urls` or `:email_addresses`,
|
119
|
+
* which specifies which kind of links will be auto-linked.
|
120
|
+
*
|
121
|
+
* - `link_attr` is a string containing the link attributes for each link that
|
122
|
+
* will be generated. These attributes are not sanitized and will be include as-is
|
123
|
+
* in each generated link, e.g.
|
124
|
+
*
|
125
|
+
* ~~~~~ruby
|
126
|
+
* auto_link('http://www.pokemon.com', :all, 'target="_blank"')
|
127
|
+
* # => '<a href="http://www.pokemon.com" target="_blank">http://www.pokemon.com</a>'
|
128
|
+
* ~~~~~
|
129
|
+
*
|
130
|
+
* This string can be autogenerated from a hash using the Rails `tag_options` helper.
|
131
|
+
*
|
132
|
+
* - `skip_tags` is a list of strings with the names of HTML tags that will be skipped
|
133
|
+
* when autolinking. If `nil`, this defaults to the value of the global `Rinku.skip_tags`,
|
134
|
+
* which is initially `["a", "pre", "code", "kbd", "script"]`.
|
135
|
+
*
|
136
|
+
* - `flag` is an optional boolean value specifying whether to recognize
|
137
|
+
* 'http://foo' as a valid domain, or require at least one '.'. It defaults to false.
|
138
|
+
*
|
139
|
+
* - `&block` is an optional block argument. If a block is passed, it will
|
140
|
+
* be yielded for each found link in the text, and its return value will be used instead
|
141
|
+
* of the name of the link. E.g.
|
142
|
+
*
|
143
|
+
* ~~~~~ruby
|
144
|
+
* auto_link('Check it out at http://www.pokemon.com') do |url|
|
145
|
+
* "THE POKEMAN WEBSITEZ"
|
146
|
+
* end
|
147
|
+
* # => 'Check it out at <a href="http://www.pokemon.com">THE POKEMAN WEBSITEZ</a>'
|
148
|
+
* ~~~~~~
|
149
|
+
*/
|
150
|
+
static VALUE
|
151
|
+
rb_rinku_autolink(int argc, VALUE *argv, VALUE self)
|
152
|
+
{
|
153
|
+
static const char *SKIP_TAGS[] = {"a", "pre", "code", "kbd", "script", NULL};
|
154
|
+
|
155
|
+
VALUE result, rb_text, rb_mode, rb_html, rb_skip, rb_flags, rb_block;
|
156
|
+
rb_encoding *text_encoding;
|
157
|
+
struct buf *output_buf;
|
158
|
+
int link_mode = AUTOLINK_ALL, count;
|
159
|
+
unsigned int link_flags = 0;
|
160
|
+
const char *link_attr = NULL;
|
161
|
+
const char **skip_tags = NULL;
|
162
|
+
struct callback_data cbdata;
|
163
|
+
|
164
|
+
rb_scan_args(argc, argv, "14&", &rb_text, &rb_mode,
|
165
|
+
&rb_html, &rb_skip, &rb_flags, &rb_block);
|
166
|
+
|
167
|
+
text_encoding = validate_encoding(rb_text);
|
168
|
+
|
169
|
+
if (!NIL_P(rb_mode)) {
|
170
|
+
ID mode_sym;
|
171
|
+
Check_Type(rb_mode, T_SYMBOL);
|
172
|
+
|
173
|
+
mode_sym = SYM2ID(rb_mode);
|
174
|
+
if (mode_sym == rb_intern("all"))
|
175
|
+
link_mode = AUTOLINK_ALL;
|
176
|
+
else if (mode_sym == rb_intern("email_addresses"))
|
177
|
+
link_mode = AUTOLINK_EMAILS;
|
178
|
+
else if (mode_sym == rb_intern("urls"))
|
179
|
+
link_mode = AUTOLINK_URLS;
|
180
|
+
else
|
181
|
+
rb_raise(rb_eTypeError,
|
182
|
+
"Invalid linking mode "
|
183
|
+
"(possible values are :all, :urls, :email_addresses)");
|
184
|
+
}
|
185
|
+
|
186
|
+
if (!NIL_P(rb_html)) {
|
187
|
+
Check_Type(rb_html, T_STRING);
|
188
|
+
link_attr = RSTRING_PTR(rb_html);
|
189
|
+
}
|
190
|
+
|
191
|
+
if (!NIL_P(rb_flags)) {
|
192
|
+
Check_Type(rb_flags, T_FIXNUM);
|
193
|
+
link_flags = FIX2INT(rb_flags);
|
194
|
+
}
|
195
|
+
|
196
|
+
if (NIL_P(rb_skip))
|
197
|
+
rb_skip = rb_iv_get(self, "@skip_tags");
|
198
|
+
|
199
|
+
if (NIL_P(rb_skip)) {
|
200
|
+
skip_tags = SKIP_TAGS;
|
201
|
+
} else {
|
202
|
+
skip_tags = rinku_load_tags(rb_skip);
|
203
|
+
}
|
204
|
+
|
205
|
+
output_buf = bufnew(32);
|
206
|
+
cbdata.rb_block = rb_block;
|
207
|
+
cbdata.encoding = text_encoding;
|
208
|
+
count = rinku_autolink(
|
209
|
+
output_buf,
|
210
|
+
(const uint8_t *)RSTRING_PTR(rb_text),
|
211
|
+
(size_t)RSTRING_LEN(rb_text),
|
212
|
+
link_mode,
|
213
|
+
link_flags,
|
214
|
+
link_attr,
|
215
|
+
skip_tags,
|
216
|
+
RTEST(rb_block) ? &autolink_callback : NULL,
|
217
|
+
(void*)&cbdata);
|
218
|
+
|
219
|
+
if (count == 0)
|
220
|
+
result = rb_text;
|
221
|
+
else {
|
222
|
+
result = rb_enc_str_new((char *)output_buf->data, output_buf->size,
|
223
|
+
text_encoding);
|
224
|
+
}
|
225
|
+
|
226
|
+
if (skip_tags != SKIP_TAGS)
|
227
|
+
xfree(skip_tags);
|
228
|
+
|
229
|
+
bufrelease(output_buf);
|
230
|
+
return result;
|
231
|
+
}
|
232
|
+
|
233
|
+
void RUBY_EXPORT Init_rinku()
|
234
|
+
{
|
235
|
+
rb_mRinku = rb_define_module("Rinku");
|
236
|
+
rb_define_module_function(rb_mRinku, "auto_link", rb_rinku_autolink, -1);
|
237
|
+
rb_define_const(rb_mRinku, "AUTOLINK_SHORT_DOMAINS", INT2FIX(AUTOLINK_SHORT_DOMAINS));
|
238
|
+
}
|
239
|
+
|