zendesk-rinku 1.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/COPYING +13 -0
- data/README.markdown +120 -0
- data/Rakefile +67 -0
- data/ext/rinku/autolink.c +296 -0
- data/ext/rinku/autolink.h +51 -0
- data/ext/rinku/buffer.c +225 -0
- data/ext/rinku/buffer.h +96 -0
- data/ext/rinku/extconf.rb +6 -0
- data/ext/rinku/rinku.c +468 -0
- data/ext/rinku/rinku.h +7 -0
- data/lib/rails_rinku.rb +29 -0
- data/lib/rinku.rb +7 -0
- data/rinku.gemspec +38 -0
- data/test/autolink_test.rb +295 -0
- metadata +62 -0
@@ -0,0 +1,51 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2011, Vicent Marti
|
3
|
+
*
|
4
|
+
* Permission to use, copy, modify, and distribute this software for any
|
5
|
+
* purpose with or without fee is hereby granted, provided that the above
|
6
|
+
* copyright notice and this permission notice appear in all copies.
|
7
|
+
*
|
8
|
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
9
|
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
10
|
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
11
|
+
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
12
|
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
13
|
+
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
14
|
+
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
15
|
+
*/
|
16
|
+
|
17
|
+
#ifndef UPSKIRT_AUTOLINK_H
|
18
|
+
#define UPSKIRT_AUTOLINK_H
|
19
|
+
|
20
|
+
#include "buffer.h"
|
21
|
+
|
22
|
+
#ifdef __cplusplus
|
23
|
+
extern "C" {
|
24
|
+
#endif
|
25
|
+
|
26
|
+
enum {
|
27
|
+
SD_AUTOLINK_SHORT_DOMAINS = (1 << 0),
|
28
|
+
};
|
29
|
+
|
30
|
+
int
|
31
|
+
sd_autolink_issafe(const uint8_t *link, size_t link_len);
|
32
|
+
|
33
|
+
size_t
|
34
|
+
sd_autolink__www(size_t *rewind_p, struct buf *link,
|
35
|
+
uint8_t *data, size_t offset, size_t size, unsigned int flags);
|
36
|
+
|
37
|
+
size_t
|
38
|
+
sd_autolink__email(size_t *rewind_p, struct buf *link,
|
39
|
+
uint8_t *data, size_t offset, size_t size, unsigned int flags);
|
40
|
+
|
41
|
+
size_t
|
42
|
+
sd_autolink__url(size_t *rewind_p, struct buf *link,
|
43
|
+
uint8_t *data, size_t offset, size_t size, unsigned int flags);
|
44
|
+
|
45
|
+
#ifdef __cplusplus
|
46
|
+
}
|
47
|
+
#endif
|
48
|
+
|
49
|
+
#endif
|
50
|
+
|
51
|
+
/* vim: set filetype=c: */
|
data/ext/rinku/buffer.c
ADDED
@@ -0,0 +1,225 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2008, Natacha Porté
|
3
|
+
* Copyright (c) 2011, Vicent Martí
|
4
|
+
*
|
5
|
+
* Permission to use, copy, modify, and distribute this software for any
|
6
|
+
* purpose with or without fee is hereby granted, provided that the above
|
7
|
+
* copyright notice and this permission notice appear in all copies.
|
8
|
+
*
|
9
|
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
10
|
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
11
|
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
12
|
+
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
13
|
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
14
|
+
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
15
|
+
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
16
|
+
*/
|
17
|
+
|
18
|
+
#define BUFFER_MAX_ALLOC_SIZE (1024 * 1024 * 16) //16mb
|
19
|
+
|
20
|
+
#include "buffer.h"
|
21
|
+
|
22
|
+
#include <stdio.h>
|
23
|
+
#include <stdlib.h>
|
24
|
+
#include <string.h>
|
25
|
+
#include <assert.h>
|
26
|
+
|
27
|
+
/* MSVC compat */
|
28
|
+
#if defined(_MSC_VER)
|
29
|
+
# define _buf_vsnprintf _vsnprintf
|
30
|
+
#else
|
31
|
+
# define _buf_vsnprintf vsnprintf
|
32
|
+
#endif
|
33
|
+
|
34
|
+
int
|
35
|
+
bufprefix(const struct buf *buf, const char *prefix)
|
36
|
+
{
|
37
|
+
size_t i;
|
38
|
+
assert(buf && buf->unit);
|
39
|
+
|
40
|
+
for (i = 0; i < buf->size; ++i) {
|
41
|
+
if (prefix[i] == 0)
|
42
|
+
return 0;
|
43
|
+
|
44
|
+
if (buf->data[i] != prefix[i])
|
45
|
+
return buf->data[i] - prefix[i];
|
46
|
+
}
|
47
|
+
|
48
|
+
return 0;
|
49
|
+
}
|
50
|
+
|
51
|
+
/* bufgrow: increasing the allocated size to the given value */
|
52
|
+
int
|
53
|
+
bufgrow(struct buf *buf, size_t neosz)
|
54
|
+
{
|
55
|
+
size_t neoasz;
|
56
|
+
void *neodata;
|
57
|
+
|
58
|
+
assert(buf && buf->unit);
|
59
|
+
|
60
|
+
if (neosz > BUFFER_MAX_ALLOC_SIZE)
|
61
|
+
return BUF_ENOMEM;
|
62
|
+
|
63
|
+
if (buf->asize >= neosz)
|
64
|
+
return BUF_OK;
|
65
|
+
|
66
|
+
neoasz = buf->asize + buf->unit;
|
67
|
+
while (neoasz < neosz)
|
68
|
+
neoasz += buf->unit;
|
69
|
+
|
70
|
+
neodata = realloc(buf->data, neoasz);
|
71
|
+
if (!neodata)
|
72
|
+
return BUF_ENOMEM;
|
73
|
+
|
74
|
+
buf->data = neodata;
|
75
|
+
buf->asize = neoasz;
|
76
|
+
return BUF_OK;
|
77
|
+
}
|
78
|
+
|
79
|
+
|
80
|
+
/* bufnew: allocation of a new buffer */
|
81
|
+
struct buf *
|
82
|
+
bufnew(size_t unit)
|
83
|
+
{
|
84
|
+
struct buf *ret;
|
85
|
+
ret = malloc(sizeof (struct buf));
|
86
|
+
|
87
|
+
if (ret) {
|
88
|
+
ret->data = 0;
|
89
|
+
ret->size = ret->asize = 0;
|
90
|
+
ret->unit = unit;
|
91
|
+
}
|
92
|
+
return ret;
|
93
|
+
}
|
94
|
+
|
95
|
+
/* bufnullterm: NULL-termination of the string array */
|
96
|
+
const char *
|
97
|
+
bufcstr(struct buf *buf)
|
98
|
+
{
|
99
|
+
assert(buf && buf->unit);
|
100
|
+
|
101
|
+
if (buf->size < buf->asize && buf->data[buf->size] == 0)
|
102
|
+
return (char *)buf->data;
|
103
|
+
|
104
|
+
if (buf->size + 1 <= buf->asize || bufgrow(buf, buf->size + 1) == 0) {
|
105
|
+
buf->data[buf->size] = 0;
|
106
|
+
return (char *)buf->data;
|
107
|
+
}
|
108
|
+
|
109
|
+
return NULL;
|
110
|
+
}
|
111
|
+
|
112
|
+
/* bufprintf: formatted printing to a buffer */
|
113
|
+
void
|
114
|
+
bufprintf(struct buf *buf, const char *fmt, ...)
|
115
|
+
{
|
116
|
+
va_list ap;
|
117
|
+
int n;
|
118
|
+
|
119
|
+
assert(buf && buf->unit);
|
120
|
+
|
121
|
+
if (buf->size >= buf->asize && bufgrow(buf, buf->size + 1) < 0)
|
122
|
+
return;
|
123
|
+
|
124
|
+
va_start(ap, fmt);
|
125
|
+
n = _buf_vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap);
|
126
|
+
va_end(ap);
|
127
|
+
|
128
|
+
if (n < 0) {
|
129
|
+
#ifdef _MSC_VER
|
130
|
+
va_start(ap, fmt);
|
131
|
+
n = _vscprintf(fmt, ap);
|
132
|
+
va_end(ap);
|
133
|
+
#else
|
134
|
+
return;
|
135
|
+
#endif
|
136
|
+
}
|
137
|
+
|
138
|
+
if ((size_t)n >= buf->asize - buf->size) {
|
139
|
+
if (bufgrow(buf, buf->size + n + 1) < 0)
|
140
|
+
return;
|
141
|
+
|
142
|
+
va_start(ap, fmt);
|
143
|
+
n = _buf_vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap);
|
144
|
+
va_end(ap);
|
145
|
+
}
|
146
|
+
|
147
|
+
if (n < 0)
|
148
|
+
return;
|
149
|
+
|
150
|
+
buf->size += n;
|
151
|
+
}
|
152
|
+
|
153
|
+
/* bufput: appends raw data to a buffer */
|
154
|
+
void
|
155
|
+
bufput(struct buf *buf, const void *data, size_t len)
|
156
|
+
{
|
157
|
+
assert(buf && buf->unit);
|
158
|
+
|
159
|
+
if (buf->size + len > buf->asize && bufgrow(buf, buf->size + len) < 0)
|
160
|
+
return;
|
161
|
+
|
162
|
+
memcpy(buf->data + buf->size, data, len);
|
163
|
+
buf->size += len;
|
164
|
+
}
|
165
|
+
|
166
|
+
/* bufputs: appends a NUL-terminated string to a buffer */
|
167
|
+
void
|
168
|
+
bufputs(struct buf *buf, const char *str)
|
169
|
+
{
|
170
|
+
bufput(buf, str, strlen(str));
|
171
|
+
}
|
172
|
+
|
173
|
+
|
174
|
+
/* bufputc: appends a single uint8_t to a buffer */
|
175
|
+
void
|
176
|
+
bufputc(struct buf *buf, int c)
|
177
|
+
{
|
178
|
+
assert(buf && buf->unit);
|
179
|
+
|
180
|
+
if (buf->size + 1 > buf->asize && bufgrow(buf, buf->size + 1) < 0)
|
181
|
+
return;
|
182
|
+
|
183
|
+
buf->data[buf->size] = c;
|
184
|
+
buf->size += 1;
|
185
|
+
}
|
186
|
+
|
187
|
+
/* bufrelease: decrease the reference count and free the buffer if needed */
|
188
|
+
void
|
189
|
+
bufrelease(struct buf *buf)
|
190
|
+
{
|
191
|
+
if (!buf)
|
192
|
+
return;
|
193
|
+
|
194
|
+
free(buf->data);
|
195
|
+
free(buf);
|
196
|
+
}
|
197
|
+
|
198
|
+
|
199
|
+
/* bufreset: frees internal data of the buffer */
|
200
|
+
void
|
201
|
+
bufreset(struct buf *buf)
|
202
|
+
{
|
203
|
+
if (!buf)
|
204
|
+
return;
|
205
|
+
|
206
|
+
free(buf->data);
|
207
|
+
buf->data = NULL;
|
208
|
+
buf->size = buf->asize = 0;
|
209
|
+
}
|
210
|
+
|
211
|
+
/* bufslurp: removes a given number of bytes from the head of the array */
|
212
|
+
void
|
213
|
+
bufslurp(struct buf *buf, size_t len)
|
214
|
+
{
|
215
|
+
assert(buf && buf->unit);
|
216
|
+
|
217
|
+
if (len >= buf->size) {
|
218
|
+
buf->size = 0;
|
219
|
+
return;
|
220
|
+
}
|
221
|
+
|
222
|
+
buf->size -= len;
|
223
|
+
memmove(buf->data, buf->data + len, buf->size);
|
224
|
+
}
|
225
|
+
|
data/ext/rinku/buffer.h
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2008, Natacha Porté
|
3
|
+
* Copyright (c) 2011, Vicent Martí
|
4
|
+
*
|
5
|
+
* Permission to use, copy, modify, and distribute this software for any
|
6
|
+
* purpose with or without fee is hereby granted, provided that the above
|
7
|
+
* copyright notice and this permission notice appear in all copies.
|
8
|
+
*
|
9
|
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
10
|
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
11
|
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
12
|
+
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
13
|
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
14
|
+
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
15
|
+
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
16
|
+
*/
|
17
|
+
|
18
|
+
#ifndef BUFFER_H__
|
19
|
+
#define BUFFER_H__
|
20
|
+
|
21
|
+
#include <stddef.h>
|
22
|
+
#include <stdarg.h>
|
23
|
+
#include <stdint.h>
|
24
|
+
|
25
|
+
#ifdef __cplusplus
|
26
|
+
extern "C" {
|
27
|
+
#endif
|
28
|
+
|
29
|
+
#if defined(_MSC_VER)
|
30
|
+
#define __attribute__(x)
|
31
|
+
#define inline
|
32
|
+
#endif
|
33
|
+
|
34
|
+
typedef enum {
|
35
|
+
BUF_OK = 0,
|
36
|
+
BUF_ENOMEM = -1,
|
37
|
+
} buferror_t;
|
38
|
+
|
39
|
+
/* struct buf: character array buffer */
|
40
|
+
struct buf {
|
41
|
+
uint8_t *data; /* actual character data */
|
42
|
+
size_t size; /* size of the string */
|
43
|
+
size_t asize; /* allocated size (0 = volatile buffer) */
|
44
|
+
size_t unit; /* reallocation unit size (0 = read-only buffer) */
|
45
|
+
};
|
46
|
+
|
47
|
+
/* CONST_BUF: global buffer from a string litteral */
|
48
|
+
#define BUF_STATIC(string) \
|
49
|
+
{ (uint8_t *)string, sizeof string -1, sizeof string, 0, 0 }
|
50
|
+
|
51
|
+
/* VOLATILE_BUF: macro for creating a volatile buffer on the stack */
|
52
|
+
#define BUF_VOLATILE(strname) \
|
53
|
+
{ (uint8_t *)strname, strlen(strname), 0, 0, 0 }
|
54
|
+
|
55
|
+
/* BUFPUTSL: optimized bufputs of a string litteral */
|
56
|
+
#define BUFPUTSL(output, literal) \
|
57
|
+
bufput(output, literal, sizeof literal - 1)
|
58
|
+
|
59
|
+
/* bufgrow: increasing the allocated size to the given value */
|
60
|
+
int bufgrow(struct buf *, size_t);
|
61
|
+
|
62
|
+
/* bufnew: allocation of a new buffer */
|
63
|
+
struct buf *bufnew(size_t) __attribute__ ((malloc));
|
64
|
+
|
65
|
+
/* bufnullterm: NUL-termination of the string array (making a C-string) */
|
66
|
+
const char *bufcstr(struct buf *);
|
67
|
+
|
68
|
+
/* bufprefix: compare the beginning of a buffer with a string */
|
69
|
+
int bufprefix(const struct buf *buf, const char *prefix);
|
70
|
+
|
71
|
+
/* bufput: appends raw data to a buffer */
|
72
|
+
void bufput(struct buf *, const void *, size_t);
|
73
|
+
|
74
|
+
/* bufputs: appends a NUL-terminated string to a buffer */
|
75
|
+
void bufputs(struct buf *, const char *);
|
76
|
+
|
77
|
+
/* bufputc: appends a single char to a buffer */
|
78
|
+
void bufputc(struct buf *, int);
|
79
|
+
|
80
|
+
/* bufrelease: decrease the reference count and free the buffer if needed */
|
81
|
+
void bufrelease(struct buf *);
|
82
|
+
|
83
|
+
/* bufreset: frees internal data of the buffer */
|
84
|
+
void bufreset(struct buf *);
|
85
|
+
|
86
|
+
/* bufslurp: removes a given number of bytes from the head of the array */
|
87
|
+
void bufslurp(struct buf *, size_t);
|
88
|
+
|
89
|
+
/* bufprintf: formatted printing to a buffer */
|
90
|
+
void bufprintf(struct buf *, const char *, ...) __attribute__ ((format (printf, 2, 3)));
|
91
|
+
|
92
|
+
#ifdef __cplusplus
|
93
|
+
}
|
94
|
+
#endif
|
95
|
+
|
96
|
+
#endif
|
data/ext/rinku/rinku.c
ADDED
@@ -0,0 +1,468 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2011, Vicent Marti
|
3
|
+
*
|
4
|
+
* Permission to use, copy, modify, and distribute this software for any
|
5
|
+
* purpose with or without fee is hereby granted, provided that the above
|
6
|
+
* copyright notice and this permission notice appear in all copies.
|
7
|
+
*
|
8
|
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
9
|
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
10
|
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
11
|
+
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
12
|
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
13
|
+
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
14
|
+
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
15
|
+
*/
|
16
|
+
#define RSTRING_NOT_MODIFIED
|
17
|
+
|
18
|
+
#include <stdio.h>
|
19
|
+
#include "ruby.h"
|
20
|
+
|
21
|
+
#define RUBY_EXPORT __attribute__ ((visibility ("default")))
|
22
|
+
|
23
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
24
|
+
#include <ruby/encoding.h>
|
25
|
+
#else
|
26
|
+
#define rb_enc_copy(dst, src)
|
27
|
+
#endif
|
28
|
+
|
29
|
+
#include "autolink.h"
|
30
|
+
#include "buffer.h"
|
31
|
+
|
32
|
+
#include <string.h>
|
33
|
+
#include <stdlib.h>
|
34
|
+
#include <stdio.h>
|
35
|
+
#include <ctype.h>
|
36
|
+
|
37
|
+
static VALUE rb_mRinku;
|
38
|
+
|
39
|
+
typedef enum {
|
40
|
+
HTML_TAG_NONE = 0,
|
41
|
+
HTML_TAG_OPEN,
|
42
|
+
HTML_TAG_CLOSE,
|
43
|
+
} html_tag;
|
44
|
+
|
45
|
+
typedef enum {
|
46
|
+
AUTOLINK_URLS = (1 << 0),
|
47
|
+
AUTOLINK_EMAILS = (1 << 1),
|
48
|
+
AUTOLINK_ALL = AUTOLINK_URLS|AUTOLINK_EMAILS
|
49
|
+
} autolink_mode;
|
50
|
+
|
51
|
+
typedef size_t (*autolink_parse_cb)(
|
52
|
+
size_t *rewind, struct buf *, uint8_t *, size_t, size_t, unsigned int);
|
53
|
+
|
54
|
+
typedef enum {
|
55
|
+
AUTOLINK_ACTION_NONE = 0,
|
56
|
+
AUTOLINK_ACTION_WWW,
|
57
|
+
AUTOLINK_ACTION_EMAIL,
|
58
|
+
AUTOLINK_ACTION_URL,
|
59
|
+
AUTOLINK_ACTION_SKIP_TAG
|
60
|
+
} autolink_action;
|
61
|
+
|
62
|
+
static autolink_parse_cb g_callbacks[] = {
|
63
|
+
NULL,
|
64
|
+
sd_autolink__www, /* 1 */
|
65
|
+
sd_autolink__email,/* 2 */
|
66
|
+
sd_autolink__url, /* 3 */
|
67
|
+
};
|
68
|
+
|
69
|
+
static const char *g_hrefs[] = {
|
70
|
+
NULL,
|
71
|
+
"<a href=\"http://",
|
72
|
+
"<a href=\"mailto:",
|
73
|
+
"<a href=\"",
|
74
|
+
};
|
75
|
+
|
76
|
+
static void
|
77
|
+
autolink__print(struct buf *ob, const struct buf *link, void *payload)
|
78
|
+
{
|
79
|
+
bufput(ob, link->data, link->size);
|
80
|
+
}
|
81
|
+
|
82
|
+
/*
|
83
|
+
* Rinku assumes valid HTML encoding for all input, but there's still
|
84
|
+
* the case where a link can contain a double quote `"` that allows XSS.
|
85
|
+
*
|
86
|
+
* We need to properly escape the character we use for the `href` attribute
|
87
|
+
* declaration
|
88
|
+
*/
|
89
|
+
static void print_link(struct buf *ob, const char *link, size_t size)
|
90
|
+
{
|
91
|
+
size_t i = 0, org;
|
92
|
+
|
93
|
+
while (i < size) {
|
94
|
+
org = i;
|
95
|
+
|
96
|
+
while (i < size && link[i] != '"')
|
97
|
+
i++;
|
98
|
+
|
99
|
+
if (i > org)
|
100
|
+
bufput(ob, link + org, i - org);
|
101
|
+
|
102
|
+
if (i >= size)
|
103
|
+
break;
|
104
|
+
|
105
|
+
BUFPUTSL(ob, """);
|
106
|
+
i++;
|
107
|
+
}
|
108
|
+
}
|
109
|
+
|
110
|
+
/* From sundown/html/html.c */
|
111
|
+
static int
|
112
|
+
html_is_tag(const uint8_t *tag_data, size_t tag_size, const char *tagname)
|
113
|
+
{
|
114
|
+
size_t i;
|
115
|
+
int closed = 0;
|
116
|
+
|
117
|
+
if (tag_size < 3 || tag_data[0] != '<')
|
118
|
+
return HTML_TAG_NONE;
|
119
|
+
|
120
|
+
i = 1;
|
121
|
+
|
122
|
+
if (tag_data[i] == '/') {
|
123
|
+
closed = 1;
|
124
|
+
i++;
|
125
|
+
}
|
126
|
+
|
127
|
+
for (; i < tag_size; ++i, ++tagname) {
|
128
|
+
if (*tagname == 0)
|
129
|
+
break;
|
130
|
+
|
131
|
+
if (tag_data[i] != *tagname)
|
132
|
+
return HTML_TAG_NONE;
|
133
|
+
}
|
134
|
+
|
135
|
+
if (i == tag_size)
|
136
|
+
return HTML_TAG_NONE;
|
137
|
+
|
138
|
+
if (isspace(tag_data[i]) || tag_data[i] == '>')
|
139
|
+
return closed ? HTML_TAG_CLOSE : HTML_TAG_OPEN;
|
140
|
+
|
141
|
+
return HTML_TAG_NONE;
|
142
|
+
}
|
143
|
+
|
144
|
+
static size_t
|
145
|
+
autolink__skip_tag(
|
146
|
+
struct buf *ob,
|
147
|
+
const uint8_t *text,
|
148
|
+
size_t size,
|
149
|
+
const char **skip_tags)
|
150
|
+
{
|
151
|
+
size_t i = 0;
|
152
|
+
|
153
|
+
while (i < size && text[i] != '>')
|
154
|
+
i++;
|
155
|
+
|
156
|
+
while (*skip_tags != NULL) {
|
157
|
+
if (html_is_tag(text, size, *skip_tags) == HTML_TAG_OPEN)
|
158
|
+
break;
|
159
|
+
|
160
|
+
skip_tags++;
|
161
|
+
}
|
162
|
+
|
163
|
+
if (*skip_tags != NULL) {
|
164
|
+
for (;;) {
|
165
|
+
while (i < size && text[i] != '<')
|
166
|
+
i++;
|
167
|
+
|
168
|
+
if (i == size)
|
169
|
+
break;
|
170
|
+
|
171
|
+
if (html_is_tag(text + i, size - i, *skip_tags) == HTML_TAG_CLOSE)
|
172
|
+
break;
|
173
|
+
|
174
|
+
i++;
|
175
|
+
}
|
176
|
+
|
177
|
+
while (i < size && text[i] != '>')
|
178
|
+
i++;
|
179
|
+
}
|
180
|
+
|
181
|
+
// bufput(ob, text, i + 1);
|
182
|
+
return i;
|
183
|
+
}
|
184
|
+
|
185
|
+
int
|
186
|
+
rinku_autolink(
|
187
|
+
struct buf *ob,
|
188
|
+
const uint8_t *text,
|
189
|
+
size_t size,
|
190
|
+
autolink_mode mode,
|
191
|
+
unsigned int flags,
|
192
|
+
const char *link_attr,
|
193
|
+
const char **skip_tags,
|
194
|
+
void (*link_text_cb)(struct buf *ob, const struct buf *link, void *payload),
|
195
|
+
void *payload)
|
196
|
+
{
|
197
|
+
size_t i, end;
|
198
|
+
struct buf *link = bufnew(16);
|
199
|
+
char active_chars[256];
|
200
|
+
void (*link_url_cb)(struct buf *, const struct buf *, void *);
|
201
|
+
int link_count = 0;
|
202
|
+
|
203
|
+
if (!text || size == 0)
|
204
|
+
return 0;
|
205
|
+
|
206
|
+
memset(active_chars, 0x0, sizeof(active_chars));
|
207
|
+
|
208
|
+
active_chars['<'] = AUTOLINK_ACTION_SKIP_TAG;
|
209
|
+
|
210
|
+
if (mode & AUTOLINK_EMAILS)
|
211
|
+
active_chars['@'] = AUTOLINK_ACTION_EMAIL;
|
212
|
+
|
213
|
+
if (mode & AUTOLINK_URLS) {
|
214
|
+
active_chars['w'] = AUTOLINK_ACTION_WWW;
|
215
|
+
active_chars['W'] = AUTOLINK_ACTION_WWW;
|
216
|
+
active_chars[':'] = AUTOLINK_ACTION_URL;
|
217
|
+
}
|
218
|
+
|
219
|
+
if (link_text_cb == NULL)
|
220
|
+
link_text_cb = &autolink__print;
|
221
|
+
|
222
|
+
if (link_attr != NULL) {
|
223
|
+
while (isspace(*link_attr))
|
224
|
+
link_attr++;
|
225
|
+
}
|
226
|
+
|
227
|
+
bufgrow(ob, size);
|
228
|
+
|
229
|
+
i = end = 0;
|
230
|
+
|
231
|
+
while (i < size) {
|
232
|
+
size_t rewind, link_end;
|
233
|
+
char action = 0;
|
234
|
+
|
235
|
+
while (end < size && (action = active_chars[text[end]]) == 0)
|
236
|
+
end++;
|
237
|
+
|
238
|
+
if (end == size) {
|
239
|
+
if (link_count > 0)
|
240
|
+
bufput(ob, text + i, end - i);
|
241
|
+
break;
|
242
|
+
}
|
243
|
+
|
244
|
+
if (action == AUTOLINK_ACTION_SKIP_TAG) {
|
245
|
+
end += autolink__skip_tag(ob,
|
246
|
+
text + end, size - end, skip_tags);
|
247
|
+
|
248
|
+
continue;
|
249
|
+
}
|
250
|
+
|
251
|
+
link->size = 0;
|
252
|
+
link_end = g_callbacks[(int)action](
|
253
|
+
&rewind, link, (uint8_t *)text + end, end, size - end, flags);
|
254
|
+
|
255
|
+
/* print the link */
|
256
|
+
if (link_end > 0) {
|
257
|
+
bufput(ob, text + i, end - i - rewind);
|
258
|
+
|
259
|
+
bufputs(ob, g_hrefs[(int)action]);
|
260
|
+
print_link(ob, link->data, link->size);
|
261
|
+
|
262
|
+
if (link_attr) {
|
263
|
+
BUFPUTSL(ob, "\" ");
|
264
|
+
bufputs(ob, link_attr);
|
265
|
+
bufputc(ob, '>');
|
266
|
+
} else {
|
267
|
+
BUFPUTSL(ob, "\">");
|
268
|
+
}
|
269
|
+
|
270
|
+
link_text_cb(ob, link, payload);
|
271
|
+
BUFPUTSL(ob, "</a>");
|
272
|
+
|
273
|
+
link_count++;
|
274
|
+
i = end + link_end;
|
275
|
+
end = i;
|
276
|
+
} else {
|
277
|
+
end = end + 1;
|
278
|
+
}
|
279
|
+
}
|
280
|
+
|
281
|
+
bufrelease(link);
|
282
|
+
return link_count;
|
283
|
+
}
|
284
|
+
|
285
|
+
|
286
|
+
/**
|
287
|
+
* Ruby code
|
288
|
+
*/
|
289
|
+
static void
|
290
|
+
autolink_callback(struct buf *link_text, const struct buf *link, void *block)
|
291
|
+
{
|
292
|
+
VALUE rb_link, rb_link_text;
|
293
|
+
rb_link = rb_str_new(link->data, link->size);
|
294
|
+
rb_link_text = rb_funcall((VALUE)block, rb_intern("call"), 1, rb_link);
|
295
|
+
Check_Type(rb_link_text, T_STRING);
|
296
|
+
bufput(link_text, RSTRING_PTR(rb_link_text), RSTRING_LEN(rb_link_text));
|
297
|
+
}
|
298
|
+
|
299
|
+
const char **rinku_load_tags(VALUE rb_skip)
|
300
|
+
{
|
301
|
+
const char **skip_tags;
|
302
|
+
size_t i, count;
|
303
|
+
|
304
|
+
Check_Type(rb_skip, T_ARRAY);
|
305
|
+
|
306
|
+
count = RARRAY_LEN(rb_skip);
|
307
|
+
skip_tags = xmalloc(sizeof(void *) * (count + 1));
|
308
|
+
|
309
|
+
for (i = 0; i < count; ++i) {
|
310
|
+
VALUE tag = rb_ary_entry(rb_skip, i);
|
311
|
+
Check_Type(tag, T_STRING);
|
312
|
+
skip_tags[i] = StringValueCStr(tag);
|
313
|
+
}
|
314
|
+
|
315
|
+
skip_tags[count] = NULL;
|
316
|
+
return skip_tags;
|
317
|
+
}
|
318
|
+
|
319
|
+
/*
|
320
|
+
* Document-method: auto_link
|
321
|
+
*
|
322
|
+
* call-seq:
|
323
|
+
* auto_link(text, mode=:all, link_attr=nil, skip_tags=nil, flags=0)
|
324
|
+
* auto_link(text, mode=:all, link_attr=nil, skip_tags=nil, flags=0) { |link_text| ... }
|
325
|
+
*
|
326
|
+
* Parses a block of text looking for "safe" urls or email addresses,
|
327
|
+
* and turns them into HTML links with the given attributes.
|
328
|
+
*
|
329
|
+
* NOTE: The block of text may or may not be HTML; if the text is HTML,
|
330
|
+
* Rinku will skip the relevant tags to prevent double-linking and linking
|
331
|
+
* inside `pre` blocks by default.
|
332
|
+
*
|
333
|
+
* NOTE: If the input text is HTML, it's expected to be already escaped.
|
334
|
+
* Rinku will perform no escaping.
|
335
|
+
*
|
336
|
+
* NOTE: Currently the follow protocols are considered safe and are the
|
337
|
+
* only ones that will be autolinked.
|
338
|
+
*
|
339
|
+
* http:// https:// ftp:// mailto://
|
340
|
+
*
|
341
|
+
* Email addresses are also autolinked by default. URLs without a protocol
|
342
|
+
* specifier but starting with 'www.' will also be autolinked, defaulting to
|
343
|
+
* the 'http://' protocol.
|
344
|
+
*
|
345
|
+
* - `text` is a string in plain text or HTML markup. If the string is formatted in
|
346
|
+
* HTML, Rinku is smart enough to skip the links that are already enclosed in `<a>`
|
347
|
+
* tags.`
|
348
|
+
*
|
349
|
+
* - `mode` is a symbol, either `:all`, `:urls` or `:email_addresses`,
|
350
|
+
* which specifies which kind of links will be auto-linked.
|
351
|
+
*
|
352
|
+
* - `link_attr` is a string containing the link attributes for each link that
|
353
|
+
* will be generated. These attributes are not sanitized and will be include as-is
|
354
|
+
* in each generated link, e.g.
|
355
|
+
*
|
356
|
+
* ~~~~~ruby
|
357
|
+
* auto_link('http://www.pokemon.com', :all, 'target="_blank"')
|
358
|
+
* # => '<a href="http://www.pokemon.com" target="_blank">http://www.pokemon.com</a>'
|
359
|
+
* ~~~~~
|
360
|
+
*
|
361
|
+
* This string can be autogenerated from a hash using the Rails `tag_options` helper.
|
362
|
+
*
|
363
|
+
* - `skip_tags` is a list of strings with the names of HTML tags that will be skipped
|
364
|
+
* when autolinking. If `nil`, this defaults to the value of the global `Rinku.skip_tags`,
|
365
|
+
* which is initially `["a", "pre", "code", "kbd", "script"]`.
|
366
|
+
*
|
367
|
+
* - `flag` is an optional boolean value specifying whether to recognize
|
368
|
+
* 'http://foo' as a valid domain, or require at least one '.'. It defaults to false.
|
369
|
+
*
|
370
|
+
* - `&block` is an optional block argument. If a block is passed, it will
|
371
|
+
* be yielded for each found link in the text, and its return value will be used instead
|
372
|
+
* of the name of the link. E.g.
|
373
|
+
*
|
374
|
+
* ~~~~~ruby
|
375
|
+
* auto_link('Check it out at http://www.pokemon.com') do |url|
|
376
|
+
* "THE POKEMAN WEBSITEZ"
|
377
|
+
* end
|
378
|
+
* # => 'Check it out at <a href="http://www.pokemon.com">THE POKEMAN WEBSITEZ</a>'
|
379
|
+
* ~~~~~~
|
380
|
+
*/
|
381
|
+
static VALUE
|
382
|
+
rb_rinku_autolink(int argc, VALUE *argv, VALUE self)
|
383
|
+
{
|
384
|
+
static const char *SKIP_TAGS[] = {"a", "pre", "code", "kbd", "script", NULL};
|
385
|
+
|
386
|
+
VALUE result, rb_text, rb_mode, rb_html, rb_skip, rb_flags, rb_block;
|
387
|
+
struct buf *output_buf;
|
388
|
+
int link_mode, count;
|
389
|
+
unsigned int link_flags = 0;
|
390
|
+
const char *link_attr = NULL;
|
391
|
+
const char **skip_tags = NULL;
|
392
|
+
ID mode_sym;
|
393
|
+
|
394
|
+
rb_scan_args(argc, argv, "14&", &rb_text, &rb_mode,
|
395
|
+
&rb_html, &rb_skip, &rb_flags, &rb_block);
|
396
|
+
|
397
|
+
Check_Type(rb_text, T_STRING);
|
398
|
+
|
399
|
+
if (!NIL_P(rb_mode)) {
|
400
|
+
Check_Type(rb_mode, T_SYMBOL);
|
401
|
+
mode_sym = SYM2ID(rb_mode);
|
402
|
+
} else {
|
403
|
+
mode_sym = rb_intern("all");
|
404
|
+
}
|
405
|
+
|
406
|
+
if (!NIL_P(rb_html)) {
|
407
|
+
Check_Type(rb_html, T_STRING);
|
408
|
+
link_attr = RSTRING_PTR(rb_html);
|
409
|
+
}
|
410
|
+
|
411
|
+
if (NIL_P(rb_skip))
|
412
|
+
rb_skip = rb_iv_get(self, "@skip_tags");
|
413
|
+
|
414
|
+
if (NIL_P(rb_skip)) {
|
415
|
+
skip_tags = SKIP_TAGS;
|
416
|
+
} else {
|
417
|
+
skip_tags = rinku_load_tags(rb_skip);
|
418
|
+
}
|
419
|
+
|
420
|
+
if (!NIL_P(rb_flags)) {
|
421
|
+
Check_Type(rb_flags, T_FIXNUM);
|
422
|
+
link_flags = FIX2INT(rb_flags);
|
423
|
+
}
|
424
|
+
|
425
|
+
output_buf = bufnew(32);
|
426
|
+
|
427
|
+
if (mode_sym == rb_intern("all"))
|
428
|
+
link_mode = AUTOLINK_ALL;
|
429
|
+
else if (mode_sym == rb_intern("email_addresses"))
|
430
|
+
link_mode = AUTOLINK_EMAILS;
|
431
|
+
else if (mode_sym == rb_intern("urls"))
|
432
|
+
link_mode = AUTOLINK_URLS;
|
433
|
+
else
|
434
|
+
rb_raise(rb_eTypeError,
|
435
|
+
"Invalid linking mode (possible values are :all, :urls, :email_addresses)");
|
436
|
+
|
437
|
+
count = rinku_autolink(
|
438
|
+
output_buf,
|
439
|
+
RSTRING_PTR(rb_text),
|
440
|
+
RSTRING_LEN(rb_text),
|
441
|
+
link_mode,
|
442
|
+
link_flags,
|
443
|
+
link_attr,
|
444
|
+
skip_tags,
|
445
|
+
RTEST(rb_block) ? &autolink_callback : NULL,
|
446
|
+
(void*)rb_block);
|
447
|
+
|
448
|
+
if (count == 0)
|
449
|
+
result = rb_text;
|
450
|
+
else {
|
451
|
+
result = rb_str_new(output_buf->data, output_buf->size);
|
452
|
+
rb_enc_copy(result, rb_text);
|
453
|
+
}
|
454
|
+
|
455
|
+
if (skip_tags != SKIP_TAGS)
|
456
|
+
xfree(skip_tags);
|
457
|
+
|
458
|
+
bufrelease(output_buf);
|
459
|
+
return result;
|
460
|
+
}
|
461
|
+
|
462
|
+
void RUBY_EXPORT Init_rinku()
|
463
|
+
{
|
464
|
+
rb_mRinku = rb_define_module("Rinku");
|
465
|
+
rb_define_method(rb_mRinku, "auto_link", rb_rinku_autolink, -1);
|
466
|
+
rb_define_const(rb_mRinku, "AUTOLINK_SHORT_DOMAINS", INT2FIX(SD_AUTOLINK_SHORT_DOMAINS));
|
467
|
+
}
|
468
|
+
|