rinku 1.7.3 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Rakefile +0 -19
- data/ext/rinku/autolink.c +114 -127
- data/ext/rinku/autolink.h +22 -16
- data/ext/rinku/rinku.c +34 -252
- data/ext/rinku/rinku.h +23 -5
- data/ext/rinku/rinku_rb.c +239 -0
- data/ext/rinku/utf8.c +187 -0
- data/ext/rinku/utf8.h +34 -0
- data/lib/rinku.rb +6 -2
- data/rinku.gemspec +12 -5
- data/test/autolink_test.rb +96 -18
- metadata +60 -16
data/ext/rinku/rinku.c
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) 2016, GitHub, Inc
|
3
3
|
*
|
4
4
|
* Permission to use, copy, modify, and distribute this software for any
|
5
5
|
* purpose with or without fee is hereby granted, provided that the above
|
@@ -13,28 +13,15 @@
|
|
13
13
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
14
14
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
15
15
|
*/
|
16
|
-
#define RSTRING_NOT_MODIFIED
|
17
|
-
|
18
|
-
#include <stdio.h>
|
19
|
-
#include "ruby.h"
|
20
|
-
|
21
|
-
#define RUBY_EXPORT __attribute__ ((visibility ("default")))
|
22
|
-
|
23
|
-
#ifdef HAVE_RUBY_ENCODING_H
|
24
|
-
#include <ruby/encoding.h>
|
25
|
-
#else
|
26
|
-
#define rb_enc_copy(dst, src)
|
27
|
-
#endif
|
28
|
-
|
29
|
-
#include "autolink.h"
|
30
|
-
#include "buffer.h"
|
31
|
-
|
32
16
|
#include <string.h>
|
33
17
|
#include <stdlib.h>
|
34
18
|
#include <stdio.h>
|
35
|
-
#include <
|
19
|
+
#include <assert.h>
|
36
20
|
|
37
|
-
|
21
|
+
#include "rinku.h"
|
22
|
+
#include "autolink.h"
|
23
|
+
#include "buffer.h"
|
24
|
+
#include "utf8.h"
|
38
25
|
|
39
26
|
typedef enum {
|
40
27
|
HTML_TAG_NONE = 0,
|
@@ -42,15 +29,6 @@ typedef enum {
|
|
42
29
|
HTML_TAG_CLOSE,
|
43
30
|
} html_tag;
|
44
31
|
|
45
|
-
typedef enum {
|
46
|
-
AUTOLINK_URLS = (1 << 0),
|
47
|
-
AUTOLINK_EMAILS = (1 << 1),
|
48
|
-
AUTOLINK_ALL = AUTOLINK_URLS|AUTOLINK_EMAILS
|
49
|
-
} autolink_mode;
|
50
|
-
|
51
|
-
typedef size_t (*autolink_parse_cb)(
|
52
|
-
size_t *rewind, struct buf *, uint8_t *, size_t, size_t, unsigned int);
|
53
|
-
|
54
32
|
typedef enum {
|
55
33
|
AUTOLINK_ACTION_NONE = 0,
|
56
34
|
AUTOLINK_ACTION_WWW,
|
@@ -59,11 +37,14 @@ typedef enum {
|
|
59
37
|
AUTOLINK_ACTION_SKIP_TAG
|
60
38
|
} autolink_action;
|
61
39
|
|
40
|
+
typedef bool (*autolink_parse_cb)(
|
41
|
+
struct autolink_pos *, const uint8_t *, size_t, size_t, unsigned int);
|
42
|
+
|
62
43
|
static autolink_parse_cb g_callbacks[] = {
|
63
44
|
NULL,
|
64
|
-
|
65
|
-
|
66
|
-
|
45
|
+
autolink__www, /* 1 */
|
46
|
+
autolink__email,/* 2 */
|
47
|
+
autolink__url, /* 3 */
|
67
48
|
};
|
68
49
|
|
69
50
|
static const char *g_hrefs[] = {
|
@@ -73,12 +54,6 @@ static const char *g_hrefs[] = {
|
|
73
54
|
"<a href=\"",
|
74
55
|
};
|
75
56
|
|
76
|
-
static void
|
77
|
-
autolink__print(struct buf *ob, const struct buf *link, void *payload)
|
78
|
-
{
|
79
|
-
bufput(ob, link->data, link->size);
|
80
|
-
}
|
81
|
-
|
82
57
|
/*
|
83
58
|
* Rinku assumes valid HTML encoding for all input, but there's still
|
84
59
|
* the case where a link can contain a double quote `"` that allows XSS.
|
@@ -86,7 +61,7 @@ autolink__print(struct buf *ob, const struct buf *link, void *payload)
|
|
86
61
|
* We need to properly escape the character we use for the `href` attribute
|
87
62
|
* declaration
|
88
63
|
*/
|
89
|
-
static void print_link(struct buf *ob, const
|
64
|
+
static void print_link(struct buf *ob, const uint8_t *link, size_t size)
|
90
65
|
{
|
91
66
|
size_t i = 0, org;
|
92
67
|
|
@@ -135,7 +110,7 @@ html_is_tag(const uint8_t *tag_data, size_t tag_size, const char *tagname)
|
|
135
110
|
if (i == tag_size)
|
136
111
|
return HTML_TAG_NONE;
|
137
112
|
|
138
|
-
if (
|
113
|
+
if (rinku_isspace(tag_data[i]) || tag_data[i] == '>')
|
139
114
|
return closed ? HTML_TAG_CLOSE : HTML_TAG_OPEN;
|
140
115
|
|
141
116
|
return HTML_TAG_NONE;
|
@@ -178,7 +153,6 @@ autolink__skip_tag(
|
|
178
153
|
i++;
|
179
154
|
}
|
180
155
|
|
181
|
-
// bufput(ob, text, i + 1);
|
182
156
|
return i;
|
183
157
|
}
|
184
158
|
|
@@ -191,20 +165,16 @@ rinku_autolink(
|
|
191
165
|
unsigned int flags,
|
192
166
|
const char *link_attr,
|
193
167
|
const char **skip_tags,
|
194
|
-
void (*link_text_cb)(struct buf
|
168
|
+
void (*link_text_cb)(struct buf *, const uint8_t *, size_t, void *),
|
195
169
|
void *payload)
|
196
170
|
{
|
197
|
-
size_t i, end
|
198
|
-
|
199
|
-
char active_chars[256];
|
200
|
-
void (*link_url_cb)(struct buf *, const struct buf *, void *);
|
171
|
+
size_t i, end;
|
172
|
+
char active_chars[256] = {0};
|
201
173
|
int link_count = 0;
|
202
174
|
|
203
175
|
if (!text || size == 0)
|
204
176
|
return 0;
|
205
177
|
|
206
|
-
memset(active_chars, 0x0, sizeof(active_chars));
|
207
|
-
|
208
178
|
active_chars['<'] = AUTOLINK_ACTION_SKIP_TAG;
|
209
179
|
|
210
180
|
if (mode & AUTOLINK_EMAILS)
|
@@ -216,11 +186,8 @@ rinku_autolink(
|
|
216
186
|
active_chars[':'] = AUTOLINK_ACTION_URL;
|
217
187
|
}
|
218
188
|
|
219
|
-
if (link_text_cb == NULL)
|
220
|
-
link_text_cb = &autolink__print;
|
221
|
-
|
222
189
|
if (link_attr != NULL) {
|
223
|
-
while (
|
190
|
+
while (rinku_isspace(*link_attr))
|
224
191
|
link_attr++;
|
225
192
|
}
|
226
193
|
|
@@ -229,7 +196,8 @@ rinku_autolink(
|
|
229
196
|
i = end = 0;
|
230
197
|
|
231
198
|
while (i < size) {
|
232
|
-
|
199
|
+
struct autolink_pos link;
|
200
|
+
bool link_found;
|
233
201
|
char action = 0;
|
234
202
|
|
235
203
|
while (end < size && (action = active_chars[text[end]]) == 0)
|
@@ -244,23 +212,19 @@ rinku_autolink(
|
|
244
212
|
if (action == AUTOLINK_ACTION_SKIP_TAG) {
|
245
213
|
end += autolink__skip_tag(ob,
|
246
214
|
text + end, size - end, skip_tags);
|
247
|
-
|
248
215
|
continue;
|
249
216
|
}
|
250
217
|
|
251
|
-
|
252
|
-
|
253
|
-
link_end = g_callbacks[(int)action](
|
254
|
-
&rewind, link, (uint8_t *)text + end,
|
255
|
-
end - last_link_found,
|
256
|
-
size - end, flags);
|
218
|
+
link_found = g_callbacks[(int)action](
|
219
|
+
&link, text, end, size, flags);
|
257
220
|
|
258
|
-
|
259
|
-
|
260
|
-
|
221
|
+
if (link_found && link.start >= i) {
|
222
|
+
const uint8_t *link_str = text + link.start;
|
223
|
+
const size_t link_len = link.end - link.start;
|
261
224
|
|
225
|
+
bufput(ob, text + i, link.start - i);
|
262
226
|
bufputs(ob, g_hrefs[(int)action]);
|
263
|
-
print_link(ob,
|
227
|
+
print_link(ob, link_str, link_len);
|
264
228
|
|
265
229
|
if (link_attr) {
|
266
230
|
BUFPUTSL(ob, "\" ");
|
@@ -270,202 +234,20 @@ rinku_autolink(
|
|
270
234
|
BUFPUTSL(ob, "\">");
|
271
235
|
}
|
272
236
|
|
273
|
-
link_text_cb
|
237
|
+
if (link_text_cb) {
|
238
|
+
link_text_cb(ob, link_str, link_len, payload);
|
239
|
+
} else {
|
240
|
+
bufput(ob, link_str, link_len);
|
241
|
+
}
|
242
|
+
|
274
243
|
BUFPUTSL(ob, "</a>");
|
275
244
|
|
276
245
|
link_count++;
|
277
|
-
i = end
|
278
|
-
last_link_found = end = i;
|
246
|
+
end = i = link.end;
|
279
247
|
} else {
|
280
248
|
end = end + 1;
|
281
249
|
}
|
282
250
|
}
|
283
251
|
|
284
|
-
bufrelease(link);
|
285
252
|
return link_count;
|
286
253
|
}
|
287
|
-
|
288
|
-
|
289
|
-
/**
|
290
|
-
* Ruby code
|
291
|
-
*/
|
292
|
-
static void
|
293
|
-
autolink_callback(struct buf *link_text, const struct buf *link, void *block)
|
294
|
-
{
|
295
|
-
VALUE rb_link, rb_link_text;
|
296
|
-
rb_link = rb_str_new(link->data, link->size);
|
297
|
-
rb_link_text = rb_funcall((VALUE)block, rb_intern("call"), 1, rb_link);
|
298
|
-
Check_Type(rb_link_text, T_STRING);
|
299
|
-
bufput(link_text, RSTRING_PTR(rb_link_text), RSTRING_LEN(rb_link_text));
|
300
|
-
}
|
301
|
-
|
302
|
-
const char **rinku_load_tags(VALUE rb_skip)
|
303
|
-
{
|
304
|
-
const char **skip_tags;
|
305
|
-
size_t i, count;
|
306
|
-
|
307
|
-
Check_Type(rb_skip, T_ARRAY);
|
308
|
-
|
309
|
-
count = RARRAY_LEN(rb_skip);
|
310
|
-
skip_tags = xmalloc(sizeof(void *) * (count + 1));
|
311
|
-
|
312
|
-
for (i = 0; i < count; ++i) {
|
313
|
-
VALUE tag = rb_ary_entry(rb_skip, i);
|
314
|
-
Check_Type(tag, T_STRING);
|
315
|
-
skip_tags[i] = StringValueCStr(tag);
|
316
|
-
}
|
317
|
-
|
318
|
-
skip_tags[count] = NULL;
|
319
|
-
return skip_tags;
|
320
|
-
}
|
321
|
-
|
322
|
-
/*
|
323
|
-
* Document-method: auto_link
|
324
|
-
*
|
325
|
-
* call-seq:
|
326
|
-
* auto_link(text, mode=:all, link_attr=nil, skip_tags=nil, flags=0)
|
327
|
-
* auto_link(text, mode=:all, link_attr=nil, skip_tags=nil, flags=0) { |link_text| ... }
|
328
|
-
*
|
329
|
-
* Parses a block of text looking for "safe" urls or email addresses,
|
330
|
-
* and turns them into HTML links with the given attributes.
|
331
|
-
*
|
332
|
-
* NOTE: The block of text may or may not be HTML; if the text is HTML,
|
333
|
-
* Rinku will skip the relevant tags to prevent double-linking and linking
|
334
|
-
* inside `pre` blocks by default.
|
335
|
-
*
|
336
|
-
* NOTE: If the input text is HTML, it's expected to be already escaped.
|
337
|
-
* Rinku will perform no escaping.
|
338
|
-
*
|
339
|
-
* NOTE: Currently the follow protocols are considered safe and are the
|
340
|
-
* only ones that will be autolinked.
|
341
|
-
*
|
342
|
-
* http:// https:// ftp:// mailto://
|
343
|
-
*
|
344
|
-
* Email addresses are also autolinked by default. URLs without a protocol
|
345
|
-
* specifier but starting with 'www.' will also be autolinked, defaulting to
|
346
|
-
* the 'http://' protocol.
|
347
|
-
*
|
348
|
-
* - `text` is a string in plain text or HTML markup. If the string is formatted in
|
349
|
-
* HTML, Rinku is smart enough to skip the links that are already enclosed in `<a>`
|
350
|
-
* tags.`
|
351
|
-
*
|
352
|
-
* - `mode` is a symbol, either `:all`, `:urls` or `:email_addresses`,
|
353
|
-
* which specifies which kind of links will be auto-linked.
|
354
|
-
*
|
355
|
-
* - `link_attr` is a string containing the link attributes for each link that
|
356
|
-
* will be generated. These attributes are not sanitized and will be include as-is
|
357
|
-
* in each generated link, e.g.
|
358
|
-
*
|
359
|
-
* ~~~~~ruby
|
360
|
-
* auto_link('http://www.pokemon.com', :all, 'target="_blank"')
|
361
|
-
* # => '<a href="http://www.pokemon.com" target="_blank">http://www.pokemon.com</a>'
|
362
|
-
* ~~~~~
|
363
|
-
*
|
364
|
-
* This string can be autogenerated from a hash using the Rails `tag_options` helper.
|
365
|
-
*
|
366
|
-
* - `skip_tags` is a list of strings with the names of HTML tags that will be skipped
|
367
|
-
* when autolinking. If `nil`, this defaults to the value of the global `Rinku.skip_tags`,
|
368
|
-
* which is initially `["a", "pre", "code", "kbd", "script"]`.
|
369
|
-
*
|
370
|
-
* - `flag` is an optional boolean value specifying whether to recognize
|
371
|
-
* 'http://foo' as a valid domain, or require at least one '.'. It defaults to false.
|
372
|
-
*
|
373
|
-
* - `&block` is an optional block argument. If a block is passed, it will
|
374
|
-
* be yielded for each found link in the text, and its return value will be used instead
|
375
|
-
* of the name of the link. E.g.
|
376
|
-
*
|
377
|
-
* ~~~~~ruby
|
378
|
-
* auto_link('Check it out at http://www.pokemon.com') do |url|
|
379
|
-
* "THE POKEMAN WEBSITEZ"
|
380
|
-
* end
|
381
|
-
* # => 'Check it out at <a href="http://www.pokemon.com">THE POKEMAN WEBSITEZ</a>'
|
382
|
-
* ~~~~~~
|
383
|
-
*/
|
384
|
-
static VALUE
|
385
|
-
rb_rinku_autolink(int argc, VALUE *argv, VALUE self)
|
386
|
-
{
|
387
|
-
static const char *SKIP_TAGS[] = {"a", "pre", "code", "kbd", "script", NULL};
|
388
|
-
|
389
|
-
VALUE result, rb_text, rb_mode, rb_html, rb_skip, rb_flags, rb_block;
|
390
|
-
struct buf *output_buf;
|
391
|
-
int link_mode, count;
|
392
|
-
unsigned int link_flags = 0;
|
393
|
-
const char *link_attr = NULL;
|
394
|
-
const char **skip_tags = NULL;
|
395
|
-
ID mode_sym;
|
396
|
-
|
397
|
-
rb_scan_args(argc, argv, "14&", &rb_text, &rb_mode,
|
398
|
-
&rb_html, &rb_skip, &rb_flags, &rb_block);
|
399
|
-
|
400
|
-
Check_Type(rb_text, T_STRING);
|
401
|
-
|
402
|
-
if (!NIL_P(rb_mode)) {
|
403
|
-
Check_Type(rb_mode, T_SYMBOL);
|
404
|
-
mode_sym = SYM2ID(rb_mode);
|
405
|
-
} else {
|
406
|
-
mode_sym = rb_intern("all");
|
407
|
-
}
|
408
|
-
|
409
|
-
if (!NIL_P(rb_html)) {
|
410
|
-
Check_Type(rb_html, T_STRING);
|
411
|
-
link_attr = RSTRING_PTR(rb_html);
|
412
|
-
}
|
413
|
-
|
414
|
-
if (NIL_P(rb_skip))
|
415
|
-
rb_skip = rb_iv_get(self, "@skip_tags");
|
416
|
-
|
417
|
-
if (NIL_P(rb_skip)) {
|
418
|
-
skip_tags = SKIP_TAGS;
|
419
|
-
} else {
|
420
|
-
skip_tags = rinku_load_tags(rb_skip);
|
421
|
-
}
|
422
|
-
|
423
|
-
if (!NIL_P(rb_flags)) {
|
424
|
-
Check_Type(rb_flags, T_FIXNUM);
|
425
|
-
link_flags = FIX2INT(rb_flags);
|
426
|
-
}
|
427
|
-
|
428
|
-
output_buf = bufnew(32);
|
429
|
-
|
430
|
-
if (mode_sym == rb_intern("all"))
|
431
|
-
link_mode = AUTOLINK_ALL;
|
432
|
-
else if (mode_sym == rb_intern("email_addresses"))
|
433
|
-
link_mode = AUTOLINK_EMAILS;
|
434
|
-
else if (mode_sym == rb_intern("urls"))
|
435
|
-
link_mode = AUTOLINK_URLS;
|
436
|
-
else
|
437
|
-
rb_raise(rb_eTypeError,
|
438
|
-
"Invalid linking mode (possible values are :all, :urls, :email_addresses)");
|
439
|
-
|
440
|
-
count = rinku_autolink(
|
441
|
-
output_buf,
|
442
|
-
RSTRING_PTR(rb_text),
|
443
|
-
RSTRING_LEN(rb_text),
|
444
|
-
link_mode,
|
445
|
-
link_flags,
|
446
|
-
link_attr,
|
447
|
-
skip_tags,
|
448
|
-
RTEST(rb_block) ? &autolink_callback : NULL,
|
449
|
-
(void*)rb_block);
|
450
|
-
|
451
|
-
if (count == 0)
|
452
|
-
result = rb_text;
|
453
|
-
else {
|
454
|
-
result = rb_str_new(output_buf->data, output_buf->size);
|
455
|
-
rb_enc_copy(result, rb_text);
|
456
|
-
}
|
457
|
-
|
458
|
-
if (skip_tags != SKIP_TAGS)
|
459
|
-
xfree(skip_tags);
|
460
|
-
|
461
|
-
bufrelease(output_buf);
|
462
|
-
return result;
|
463
|
-
}
|
464
|
-
|
465
|
-
void RUBY_EXPORT Init_rinku()
|
466
|
-
{
|
467
|
-
rb_mRinku = rb_define_module("Rinku");
|
468
|
-
rb_define_method(rb_mRinku, "auto_link", rb_rinku_autolink, -1);
|
469
|
-
rb_define_const(rb_mRinku, "AUTOLINK_SHORT_DOMAINS", INT2FIX(SD_AUTOLINK_SHORT_DOMAINS));
|
470
|
-
}
|
471
|
-
|
data/ext/rinku/rinku.h
CHANGED
@@ -1,7 +1,25 @@
|
|
1
|
-
#ifndef
|
2
|
-
#define
|
1
|
+
#ifndef _RINKU_H
|
2
|
+
#define _RINKU_H
|
3
3
|
|
4
|
-
|
5
|
-
|
4
|
+
#include <stdint.h>
|
5
|
+
#include "buffer.h"
|
6
|
+
|
7
|
+
typedef enum {
|
8
|
+
AUTOLINK_URLS = (1 << 0),
|
9
|
+
AUTOLINK_EMAILS = (1 << 1),
|
10
|
+
AUTOLINK_ALL = AUTOLINK_URLS|AUTOLINK_EMAILS
|
11
|
+
} autolink_mode;
|
12
|
+
|
13
|
+
int
|
14
|
+
rinku_autolink(
|
15
|
+
struct buf *ob,
|
16
|
+
const uint8_t *text,
|
17
|
+
size_t size,
|
18
|
+
autolink_mode mode,
|
19
|
+
unsigned int flags,
|
20
|
+
const char *link_attr,
|
21
|
+
const char **skip_tags,
|
22
|
+
void (*link_text_cb)(struct buf *, const uint8_t *, size_t, void *),
|
23
|
+
void *payload);
|
6
24
|
|
7
|
-
#endif
|
25
|
+
#endif
|
@@ -0,0 +1,239 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2016, GitHub, Inc
|
3
|
+
*
|
4
|
+
* Permission to use, copy, modify, and distribute this software for any
|
5
|
+
* purpose with or without fee is hereby granted, provided that the above
|
6
|
+
* copyright notice and this permission notice appear in all copies.
|
7
|
+
*
|
8
|
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
9
|
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
10
|
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
11
|
+
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
12
|
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
13
|
+
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
14
|
+
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
15
|
+
*/
|
16
|
+
#include <stdio.h>
|
17
|
+
|
18
|
+
#define RUBY_EXPORT __attribute__ ((visibility ("default")))
|
19
|
+
|
20
|
+
#include <ruby.h>
|
21
|
+
#include <ruby/encoding.h>
|
22
|
+
|
23
|
+
#include "rinku.h"
|
24
|
+
#include "autolink.h"
|
25
|
+
|
26
|
+
static VALUE rb_mRinku;
|
27
|
+
|
28
|
+
struct callback_data {
|
29
|
+
VALUE rb_block;
|
30
|
+
rb_encoding *encoding;
|
31
|
+
};
|
32
|
+
|
33
|
+
static rb_encoding *
|
34
|
+
validate_encoding(VALUE rb_str)
|
35
|
+
{
|
36
|
+
rb_encoding *encoding;
|
37
|
+
|
38
|
+
Check_Type(rb_str, T_STRING);
|
39
|
+
encoding = rb_enc_get(rb_str);
|
40
|
+
|
41
|
+
if (!rb_enc_asciicompat(encoding))
|
42
|
+
rb_raise(rb_eArgError, "Invalid encoding");
|
43
|
+
|
44
|
+
if (rb_enc_str_coderange(rb_str) == ENC_CODERANGE_BROKEN)
|
45
|
+
rb_raise(rb_eArgError, "invalid byte sequence in %s",
|
46
|
+
rb_enc_name(encoding));
|
47
|
+
|
48
|
+
return encoding;
|
49
|
+
}
|
50
|
+
|
51
|
+
static void
|
52
|
+
autolink_callback(struct buf *link_text,
|
53
|
+
const uint8_t *url, size_t url_len, void *block)
|
54
|
+
{
|
55
|
+
struct callback_data *data = block;
|
56
|
+
VALUE rb_link, rb_link_text;
|
57
|
+
|
58
|
+
rb_link = rb_enc_str_new((const char *)url, url_len, data->encoding);
|
59
|
+
rb_link_text = rb_funcall(data->rb_block,
|
60
|
+
rb_intern("call"), 1, rb_link);
|
61
|
+
|
62
|
+
if (validate_encoding(rb_link_text) != data->encoding)
|
63
|
+
rb_raise(rb_eArgError, "encoding mismatch");
|
64
|
+
|
65
|
+
bufput(link_text, RSTRING_PTR(rb_link_text), RSTRING_LEN(rb_link_text));
|
66
|
+
}
|
67
|
+
|
68
|
+
const char **rinku_load_tags(VALUE rb_skip)
|
69
|
+
{
|
70
|
+
const char **skip_tags;
|
71
|
+
size_t i, count;
|
72
|
+
|
73
|
+
Check_Type(rb_skip, T_ARRAY);
|
74
|
+
|
75
|
+
count = RARRAY_LEN(rb_skip);
|
76
|
+
skip_tags = xmalloc(sizeof(void *) * (count + 1));
|
77
|
+
|
78
|
+
for (i = 0; i < count; ++i) {
|
79
|
+
VALUE tag = rb_ary_entry(rb_skip, i);
|
80
|
+
Check_Type(tag, T_STRING);
|
81
|
+
skip_tags[i] = StringValueCStr(tag);
|
82
|
+
}
|
83
|
+
|
84
|
+
skip_tags[count] = NULL;
|
85
|
+
return skip_tags;
|
86
|
+
}
|
87
|
+
|
88
|
+
/*
|
89
|
+
* Document-method: auto_link
|
90
|
+
*
|
91
|
+
* call-seq:
|
92
|
+
* auto_link(text, mode=:all, link_attr=nil, skip_tags=nil, flags=0)
|
93
|
+
* auto_link(text, mode=:all, link_attr=nil, skip_tags=nil, flags=0) { |link_text| ... }
|
94
|
+
*
|
95
|
+
* Parses a block of text looking for "safe" urls or email addresses,
|
96
|
+
* and turns them into HTML links with the given attributes.
|
97
|
+
*
|
98
|
+
* NOTE: The block of text may or may not be HTML; if the text is HTML,
|
99
|
+
* Rinku will skip the relevant tags to prevent double-linking and linking
|
100
|
+
* inside `pre` blocks by default.
|
101
|
+
*
|
102
|
+
* NOTE: If the input text is HTML, it's expected to be already escaped.
|
103
|
+
* Rinku will perform no escaping.
|
104
|
+
*
|
105
|
+
* NOTE: Currently the follow protocols are considered safe and are the
|
106
|
+
* only ones that will be autolinked.
|
107
|
+
*
|
108
|
+
* http:// https:// ftp:// mailto://
|
109
|
+
*
|
110
|
+
* Email addresses are also autolinked by default. URLs without a protocol
|
111
|
+
* specifier but starting with 'www.' will also be autolinked, defaulting to
|
112
|
+
* the 'http://' protocol.
|
113
|
+
*
|
114
|
+
* - `text` is a string in plain text or HTML markup. If the string is formatted in
|
115
|
+
* HTML, Rinku is smart enough to skip the links that are already enclosed in `<a>`
|
116
|
+
* tags.`
|
117
|
+
*
|
118
|
+
* - `mode` is a symbol, either `:all`, `:urls` or `:email_addresses`,
|
119
|
+
* which specifies which kind of links will be auto-linked.
|
120
|
+
*
|
121
|
+
* - `link_attr` is a string containing the link attributes for each link that
|
122
|
+
* will be generated. These attributes are not sanitized and will be include as-is
|
123
|
+
* in each generated link, e.g.
|
124
|
+
*
|
125
|
+
* ~~~~~ruby
|
126
|
+
* auto_link('http://www.pokemon.com', :all, 'target="_blank"')
|
127
|
+
* # => '<a href="http://www.pokemon.com" target="_blank">http://www.pokemon.com</a>'
|
128
|
+
* ~~~~~
|
129
|
+
*
|
130
|
+
* This string can be autogenerated from a hash using the Rails `tag_options` helper.
|
131
|
+
*
|
132
|
+
* - `skip_tags` is a list of strings with the names of HTML tags that will be skipped
|
133
|
+
* when autolinking. If `nil`, this defaults to the value of the global `Rinku.skip_tags`,
|
134
|
+
* which is initially `["a", "pre", "code", "kbd", "script"]`.
|
135
|
+
*
|
136
|
+
* - `flag` is an optional boolean value specifying whether to recognize
|
137
|
+
* 'http://foo' as a valid domain, or require at least one '.'. It defaults to false.
|
138
|
+
*
|
139
|
+
* - `&block` is an optional block argument. If a block is passed, it will
|
140
|
+
* be yielded for each found link in the text, and its return value will be used instead
|
141
|
+
* of the name of the link. E.g.
|
142
|
+
*
|
143
|
+
* ~~~~~ruby
|
144
|
+
* auto_link('Check it out at http://www.pokemon.com') do |url|
|
145
|
+
* "THE POKEMAN WEBSITEZ"
|
146
|
+
* end
|
147
|
+
* # => 'Check it out at <a href="http://www.pokemon.com">THE POKEMAN WEBSITEZ</a>'
|
148
|
+
* ~~~~~~
|
149
|
+
*/
|
150
|
+
static VALUE
|
151
|
+
rb_rinku_autolink(int argc, VALUE *argv, VALUE self)
|
152
|
+
{
|
153
|
+
static const char *SKIP_TAGS[] = {"a", "pre", "code", "kbd", "script", NULL};
|
154
|
+
|
155
|
+
VALUE result, rb_text, rb_mode, rb_html, rb_skip, rb_flags, rb_block;
|
156
|
+
rb_encoding *text_encoding;
|
157
|
+
struct buf *output_buf;
|
158
|
+
int link_mode = AUTOLINK_ALL, count;
|
159
|
+
unsigned int link_flags = 0;
|
160
|
+
const char *link_attr = NULL;
|
161
|
+
const char **skip_tags = NULL;
|
162
|
+
struct callback_data cbdata;
|
163
|
+
|
164
|
+
rb_scan_args(argc, argv, "14&", &rb_text, &rb_mode,
|
165
|
+
&rb_html, &rb_skip, &rb_flags, &rb_block);
|
166
|
+
|
167
|
+
text_encoding = validate_encoding(rb_text);
|
168
|
+
|
169
|
+
if (!NIL_P(rb_mode)) {
|
170
|
+
ID mode_sym;
|
171
|
+
Check_Type(rb_mode, T_SYMBOL);
|
172
|
+
|
173
|
+
mode_sym = SYM2ID(rb_mode);
|
174
|
+
if (mode_sym == rb_intern("all"))
|
175
|
+
link_mode = AUTOLINK_ALL;
|
176
|
+
else if (mode_sym == rb_intern("email_addresses"))
|
177
|
+
link_mode = AUTOLINK_EMAILS;
|
178
|
+
else if (mode_sym == rb_intern("urls"))
|
179
|
+
link_mode = AUTOLINK_URLS;
|
180
|
+
else
|
181
|
+
rb_raise(rb_eTypeError,
|
182
|
+
"Invalid linking mode "
|
183
|
+
"(possible values are :all, :urls, :email_addresses)");
|
184
|
+
}
|
185
|
+
|
186
|
+
if (!NIL_P(rb_html)) {
|
187
|
+
Check_Type(rb_html, T_STRING);
|
188
|
+
link_attr = RSTRING_PTR(rb_html);
|
189
|
+
}
|
190
|
+
|
191
|
+
if (!NIL_P(rb_flags)) {
|
192
|
+
Check_Type(rb_flags, T_FIXNUM);
|
193
|
+
link_flags = FIX2INT(rb_flags);
|
194
|
+
}
|
195
|
+
|
196
|
+
if (NIL_P(rb_skip))
|
197
|
+
rb_skip = rb_iv_get(self, "@skip_tags");
|
198
|
+
|
199
|
+
if (NIL_P(rb_skip)) {
|
200
|
+
skip_tags = SKIP_TAGS;
|
201
|
+
} else {
|
202
|
+
skip_tags = rinku_load_tags(rb_skip);
|
203
|
+
}
|
204
|
+
|
205
|
+
output_buf = bufnew(32);
|
206
|
+
cbdata.rb_block = rb_block;
|
207
|
+
cbdata.encoding = text_encoding;
|
208
|
+
count = rinku_autolink(
|
209
|
+
output_buf,
|
210
|
+
(const uint8_t *)RSTRING_PTR(rb_text),
|
211
|
+
(size_t)RSTRING_LEN(rb_text),
|
212
|
+
link_mode,
|
213
|
+
link_flags,
|
214
|
+
link_attr,
|
215
|
+
skip_tags,
|
216
|
+
RTEST(rb_block) ? &autolink_callback : NULL,
|
217
|
+
(void*)&cbdata);
|
218
|
+
|
219
|
+
if (count == 0)
|
220
|
+
result = rb_text;
|
221
|
+
else {
|
222
|
+
result = rb_enc_str_new((char *)output_buf->data, output_buf->size,
|
223
|
+
text_encoding);
|
224
|
+
}
|
225
|
+
|
226
|
+
if (skip_tags != SKIP_TAGS)
|
227
|
+
xfree(skip_tags);
|
228
|
+
|
229
|
+
bufrelease(output_buf);
|
230
|
+
return result;
|
231
|
+
}
|
232
|
+
|
233
|
+
void RUBY_EXPORT Init_rinku()
|
234
|
+
{
|
235
|
+
rb_mRinku = rb_define_module("Rinku");
|
236
|
+
rb_define_module_function(rb_mRinku, "auto_link", rb_rinku_autolink, -1);
|
237
|
+
rb_define_const(rb_mRinku, "AUTOLINK_SHORT_DOMAINS", INT2FIX(AUTOLINK_SHORT_DOMAINS));
|
238
|
+
}
|
239
|
+
|