zendesk-rinku 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/COPYING +13 -0
- data/README.markdown +120 -0
- data/Rakefile +67 -0
- data/ext/rinku/autolink.c +296 -0
- data/ext/rinku/autolink.h +51 -0
- data/ext/rinku/buffer.c +225 -0
- data/ext/rinku/buffer.h +96 -0
- data/ext/rinku/extconf.rb +6 -0
- data/ext/rinku/rinku.c +468 -0
- data/ext/rinku/rinku.h +7 -0
- data/lib/rails_rinku.rb +29 -0
- data/lib/rinku.rb +7 -0
- data/rinku.gemspec +38 -0
- data/test/autolink_test.rb +295 -0
- metadata +62 -0
@@ -0,0 +1,51 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2011, Vicent Marti
|
3
|
+
*
|
4
|
+
* Permission to use, copy, modify, and distribute this software for any
|
5
|
+
* purpose with or without fee is hereby granted, provided that the above
|
6
|
+
* copyright notice and this permission notice appear in all copies.
|
7
|
+
*
|
8
|
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
9
|
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
10
|
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
11
|
+
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
12
|
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
13
|
+
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
14
|
+
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
15
|
+
*/
|
16
|
+
|
17
|
+
#ifndef UPSKIRT_AUTOLINK_H
|
18
|
+
#define UPSKIRT_AUTOLINK_H
|
19
|
+
|
20
|
+
#include "buffer.h"
|
21
|
+
|
22
|
+
#ifdef __cplusplus
|
23
|
+
extern "C" {
|
24
|
+
#endif
|
25
|
+
|
26
|
+
enum {
|
27
|
+
SD_AUTOLINK_SHORT_DOMAINS = (1 << 0),
|
28
|
+
};
|
29
|
+
|
30
|
+
int
|
31
|
+
sd_autolink_issafe(const uint8_t *link, size_t link_len);
|
32
|
+
|
33
|
+
size_t
|
34
|
+
sd_autolink__www(size_t *rewind_p, struct buf *link,
|
35
|
+
uint8_t *data, size_t offset, size_t size, unsigned int flags);
|
36
|
+
|
37
|
+
size_t
|
38
|
+
sd_autolink__email(size_t *rewind_p, struct buf *link,
|
39
|
+
uint8_t *data, size_t offset, size_t size, unsigned int flags);
|
40
|
+
|
41
|
+
size_t
|
42
|
+
sd_autolink__url(size_t *rewind_p, struct buf *link,
|
43
|
+
uint8_t *data, size_t offset, size_t size, unsigned int flags);
|
44
|
+
|
45
|
+
#ifdef __cplusplus
|
46
|
+
}
|
47
|
+
#endif
|
48
|
+
|
49
|
+
#endif
|
50
|
+
|
51
|
+
/* vim: set filetype=c: */
|
data/ext/rinku/buffer.c
ADDED
@@ -0,0 +1,225 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2008, Natacha Porté
|
3
|
+
* Copyright (c) 2011, Vicent Martí
|
4
|
+
*
|
5
|
+
* Permission to use, copy, modify, and distribute this software for any
|
6
|
+
* purpose with or without fee is hereby granted, provided that the above
|
7
|
+
* copyright notice and this permission notice appear in all copies.
|
8
|
+
*
|
9
|
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
10
|
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
11
|
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
12
|
+
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
13
|
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
14
|
+
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
15
|
+
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
16
|
+
*/
|
17
|
+
|
18
|
+
#define BUFFER_MAX_ALLOC_SIZE (1024 * 1024 * 16) //16mb
|
19
|
+
|
20
|
+
#include "buffer.h"
|
21
|
+
|
22
|
+
#include <stdio.h>
|
23
|
+
#include <stdlib.h>
|
24
|
+
#include <string.h>
|
25
|
+
#include <assert.h>
|
26
|
+
|
27
|
+
/* MSVC compat */
|
28
|
+
#if defined(_MSC_VER)
|
29
|
+
# define _buf_vsnprintf _vsnprintf
|
30
|
+
#else
|
31
|
+
# define _buf_vsnprintf vsnprintf
|
32
|
+
#endif
|
33
|
+
|
34
|
+
int
|
35
|
+
bufprefix(const struct buf *buf, const char *prefix)
|
36
|
+
{
|
37
|
+
size_t i;
|
38
|
+
assert(buf && buf->unit);
|
39
|
+
|
40
|
+
for (i = 0; i < buf->size; ++i) {
|
41
|
+
if (prefix[i] == 0)
|
42
|
+
return 0;
|
43
|
+
|
44
|
+
if (buf->data[i] != prefix[i])
|
45
|
+
return buf->data[i] - prefix[i];
|
46
|
+
}
|
47
|
+
|
48
|
+
return 0;
|
49
|
+
}
|
50
|
+
|
51
|
+
/* bufgrow: increasing the allocated size to the given value */
|
52
|
+
int
|
53
|
+
bufgrow(struct buf *buf, size_t neosz)
|
54
|
+
{
|
55
|
+
size_t neoasz;
|
56
|
+
void *neodata;
|
57
|
+
|
58
|
+
assert(buf && buf->unit);
|
59
|
+
|
60
|
+
if (neosz > BUFFER_MAX_ALLOC_SIZE)
|
61
|
+
return BUF_ENOMEM;
|
62
|
+
|
63
|
+
if (buf->asize >= neosz)
|
64
|
+
return BUF_OK;
|
65
|
+
|
66
|
+
neoasz = buf->asize + buf->unit;
|
67
|
+
while (neoasz < neosz)
|
68
|
+
neoasz += buf->unit;
|
69
|
+
|
70
|
+
neodata = realloc(buf->data, neoasz);
|
71
|
+
if (!neodata)
|
72
|
+
return BUF_ENOMEM;
|
73
|
+
|
74
|
+
buf->data = neodata;
|
75
|
+
buf->asize = neoasz;
|
76
|
+
return BUF_OK;
|
77
|
+
}
|
78
|
+
|
79
|
+
|
80
|
+
/* bufnew: allocation of a new buffer */
|
81
|
+
struct buf *
|
82
|
+
bufnew(size_t unit)
|
83
|
+
{
|
84
|
+
struct buf *ret;
|
85
|
+
ret = malloc(sizeof (struct buf));
|
86
|
+
|
87
|
+
if (ret) {
|
88
|
+
ret->data = 0;
|
89
|
+
ret->size = ret->asize = 0;
|
90
|
+
ret->unit = unit;
|
91
|
+
}
|
92
|
+
return ret;
|
93
|
+
}
|
94
|
+
|
95
|
+
/* bufnullterm: NULL-termination of the string array */
|
96
|
+
const char *
|
97
|
+
bufcstr(struct buf *buf)
|
98
|
+
{
|
99
|
+
assert(buf && buf->unit);
|
100
|
+
|
101
|
+
if (buf->size < buf->asize && buf->data[buf->size] == 0)
|
102
|
+
return (char *)buf->data;
|
103
|
+
|
104
|
+
if (buf->size + 1 <= buf->asize || bufgrow(buf, buf->size + 1) == 0) {
|
105
|
+
buf->data[buf->size] = 0;
|
106
|
+
return (char *)buf->data;
|
107
|
+
}
|
108
|
+
|
109
|
+
return NULL;
|
110
|
+
}
|
111
|
+
|
112
|
+
/* bufprintf: formatted printing to a buffer */
|
113
|
+
void
|
114
|
+
bufprintf(struct buf *buf, const char *fmt, ...)
|
115
|
+
{
|
116
|
+
va_list ap;
|
117
|
+
int n;
|
118
|
+
|
119
|
+
assert(buf && buf->unit);
|
120
|
+
|
121
|
+
if (buf->size >= buf->asize && bufgrow(buf, buf->size + 1) < 0)
|
122
|
+
return;
|
123
|
+
|
124
|
+
va_start(ap, fmt);
|
125
|
+
n = _buf_vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap);
|
126
|
+
va_end(ap);
|
127
|
+
|
128
|
+
if (n < 0) {
|
129
|
+
#ifdef _MSC_VER
|
130
|
+
va_start(ap, fmt);
|
131
|
+
n = _vscprintf(fmt, ap);
|
132
|
+
va_end(ap);
|
133
|
+
#else
|
134
|
+
return;
|
135
|
+
#endif
|
136
|
+
}
|
137
|
+
|
138
|
+
if ((size_t)n >= buf->asize - buf->size) {
|
139
|
+
if (bufgrow(buf, buf->size + n + 1) < 0)
|
140
|
+
return;
|
141
|
+
|
142
|
+
va_start(ap, fmt);
|
143
|
+
n = _buf_vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap);
|
144
|
+
va_end(ap);
|
145
|
+
}
|
146
|
+
|
147
|
+
if (n < 0)
|
148
|
+
return;
|
149
|
+
|
150
|
+
buf->size += n;
|
151
|
+
}
|
152
|
+
|
153
|
+
/* bufput: appends raw data to a buffer */
|
154
|
+
void
|
155
|
+
bufput(struct buf *buf, const void *data, size_t len)
|
156
|
+
{
|
157
|
+
assert(buf && buf->unit);
|
158
|
+
|
159
|
+
if (buf->size + len > buf->asize && bufgrow(buf, buf->size + len) < 0)
|
160
|
+
return;
|
161
|
+
|
162
|
+
memcpy(buf->data + buf->size, data, len);
|
163
|
+
buf->size += len;
|
164
|
+
}
|
165
|
+
|
166
|
+
/* bufputs: appends a NUL-terminated string to a buffer */
|
167
|
+
void
|
168
|
+
bufputs(struct buf *buf, const char *str)
|
169
|
+
{
|
170
|
+
bufput(buf, str, strlen(str));
|
171
|
+
}
|
172
|
+
|
173
|
+
|
174
|
+
/* bufputc: appends a single uint8_t to a buffer */
|
175
|
+
void
|
176
|
+
bufputc(struct buf *buf, int c)
|
177
|
+
{
|
178
|
+
assert(buf && buf->unit);
|
179
|
+
|
180
|
+
if (buf->size + 1 > buf->asize && bufgrow(buf, buf->size + 1) < 0)
|
181
|
+
return;
|
182
|
+
|
183
|
+
buf->data[buf->size] = c;
|
184
|
+
buf->size += 1;
|
185
|
+
}
|
186
|
+
|
187
|
+
/* bufrelease: decrease the reference count and free the buffer if needed */
|
188
|
+
void
|
189
|
+
bufrelease(struct buf *buf)
|
190
|
+
{
|
191
|
+
if (!buf)
|
192
|
+
return;
|
193
|
+
|
194
|
+
free(buf->data);
|
195
|
+
free(buf);
|
196
|
+
}
|
197
|
+
|
198
|
+
|
199
|
+
/* bufreset: frees internal data of the buffer */
|
200
|
+
void
|
201
|
+
bufreset(struct buf *buf)
|
202
|
+
{
|
203
|
+
if (!buf)
|
204
|
+
return;
|
205
|
+
|
206
|
+
free(buf->data);
|
207
|
+
buf->data = NULL;
|
208
|
+
buf->size = buf->asize = 0;
|
209
|
+
}
|
210
|
+
|
211
|
+
/* bufslurp: removes a given number of bytes from the head of the array */
|
212
|
+
void
|
213
|
+
bufslurp(struct buf *buf, size_t len)
|
214
|
+
{
|
215
|
+
assert(buf && buf->unit);
|
216
|
+
|
217
|
+
if (len >= buf->size) {
|
218
|
+
buf->size = 0;
|
219
|
+
return;
|
220
|
+
}
|
221
|
+
|
222
|
+
buf->size -= len;
|
223
|
+
memmove(buf->data, buf->data + len, buf->size);
|
224
|
+
}
|
225
|
+
|
data/ext/rinku/buffer.h
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2008, Natacha Porté
|
3
|
+
* Copyright (c) 2011, Vicent Martí
|
4
|
+
*
|
5
|
+
* Permission to use, copy, modify, and distribute this software for any
|
6
|
+
* purpose with or without fee is hereby granted, provided that the above
|
7
|
+
* copyright notice and this permission notice appear in all copies.
|
8
|
+
*
|
9
|
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
10
|
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
11
|
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
12
|
+
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
13
|
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
14
|
+
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
15
|
+
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
16
|
+
*/
|
17
|
+
|
18
|
+
#ifndef BUFFER_H__
|
19
|
+
#define BUFFER_H__
|
20
|
+
|
21
|
+
#include <stddef.h>
|
22
|
+
#include <stdarg.h>
|
23
|
+
#include <stdint.h>
|
24
|
+
|
25
|
+
#ifdef __cplusplus
|
26
|
+
extern "C" {
|
27
|
+
#endif
|
28
|
+
|
29
|
+
#if defined(_MSC_VER)
|
30
|
+
#define __attribute__(x)
|
31
|
+
#define inline
|
32
|
+
#endif
|
33
|
+
|
34
|
+
typedef enum {
|
35
|
+
BUF_OK = 0,
|
36
|
+
BUF_ENOMEM = -1,
|
37
|
+
} buferror_t;
|
38
|
+
|
39
|
+
/* struct buf: character array buffer */
|
40
|
+
struct buf {
|
41
|
+
uint8_t *data; /* actual character data */
|
42
|
+
size_t size; /* size of the string */
|
43
|
+
size_t asize; /* allocated size (0 = volatile buffer) */
|
44
|
+
size_t unit; /* reallocation unit size (0 = read-only buffer) */
|
45
|
+
};
|
46
|
+
|
47
|
+
/* CONST_BUF: global buffer from a string litteral */
|
48
|
+
#define BUF_STATIC(string) \
|
49
|
+
{ (uint8_t *)string, sizeof string -1, sizeof string, 0, 0 }
|
50
|
+
|
51
|
+
/* VOLATILE_BUF: macro for creating a volatile buffer on the stack */
|
52
|
+
#define BUF_VOLATILE(strname) \
|
53
|
+
{ (uint8_t *)strname, strlen(strname), 0, 0, 0 }
|
54
|
+
|
55
|
+
/* BUFPUTSL: optimized bufputs of a string litteral */
|
56
|
+
#define BUFPUTSL(output, literal) \
|
57
|
+
bufput(output, literal, sizeof literal - 1)
|
58
|
+
|
59
|
+
/* bufgrow: increasing the allocated size to the given value */
|
60
|
+
int bufgrow(struct buf *, size_t);
|
61
|
+
|
62
|
+
/* bufnew: allocation of a new buffer */
|
63
|
+
struct buf *bufnew(size_t) __attribute__ ((malloc));
|
64
|
+
|
65
|
+
/* bufnullterm: NUL-termination of the string array (making a C-string) */
|
66
|
+
const char *bufcstr(struct buf *);
|
67
|
+
|
68
|
+
/* bufprefix: compare the beginning of a buffer with a string */
|
69
|
+
int bufprefix(const struct buf *buf, const char *prefix);
|
70
|
+
|
71
|
+
/* bufput: appends raw data to a buffer */
|
72
|
+
void bufput(struct buf *, const void *, size_t);
|
73
|
+
|
74
|
+
/* bufputs: appends a NUL-terminated string to a buffer */
|
75
|
+
void bufputs(struct buf *, const char *);
|
76
|
+
|
77
|
+
/* bufputc: appends a single char to a buffer */
|
78
|
+
void bufputc(struct buf *, int);
|
79
|
+
|
80
|
+
/* bufrelease: decrease the reference count and free the buffer if needed */
|
81
|
+
void bufrelease(struct buf *);
|
82
|
+
|
83
|
+
/* bufreset: frees internal data of the buffer */
|
84
|
+
void bufreset(struct buf *);
|
85
|
+
|
86
|
+
/* bufslurp: removes a given number of bytes from the head of the array */
|
87
|
+
void bufslurp(struct buf *, size_t);
|
88
|
+
|
89
|
+
/* bufprintf: formatted printing to a buffer */
|
90
|
+
void bufprintf(struct buf *, const char *, ...) __attribute__ ((format (printf, 2, 3)));
|
91
|
+
|
92
|
+
#ifdef __cplusplus
|
93
|
+
}
|
94
|
+
#endif
|
95
|
+
|
96
|
+
#endif
|
data/ext/rinku/rinku.c
ADDED
@@ -0,0 +1,468 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2011, Vicent Marti
|
3
|
+
*
|
4
|
+
* Permission to use, copy, modify, and distribute this software for any
|
5
|
+
* purpose with or without fee is hereby granted, provided that the above
|
6
|
+
* copyright notice and this permission notice appear in all copies.
|
7
|
+
*
|
8
|
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
9
|
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
10
|
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
11
|
+
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
12
|
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
13
|
+
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
14
|
+
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
15
|
+
*/
|
16
|
+
#define RSTRING_NOT_MODIFIED
|
17
|
+
|
18
|
+
#include <stdio.h>
|
19
|
+
#include "ruby.h"
|
20
|
+
|
21
|
+
#define RUBY_EXPORT __attribute__ ((visibility ("default")))
|
22
|
+
|
23
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
24
|
+
#include <ruby/encoding.h>
|
25
|
+
#else
|
26
|
+
#define rb_enc_copy(dst, src)
|
27
|
+
#endif
|
28
|
+
|
29
|
+
#include "autolink.h"
|
30
|
+
#include "buffer.h"
|
31
|
+
|
32
|
+
#include <string.h>
|
33
|
+
#include <stdlib.h>
|
34
|
+
#include <stdio.h>
|
35
|
+
#include <ctype.h>
|
36
|
+
|
37
|
+
static VALUE rb_mRinku;
|
38
|
+
|
39
|
+
typedef enum {
|
40
|
+
HTML_TAG_NONE = 0,
|
41
|
+
HTML_TAG_OPEN,
|
42
|
+
HTML_TAG_CLOSE,
|
43
|
+
} html_tag;
|
44
|
+
|
45
|
+
typedef enum {
|
46
|
+
AUTOLINK_URLS = (1 << 0),
|
47
|
+
AUTOLINK_EMAILS = (1 << 1),
|
48
|
+
AUTOLINK_ALL = AUTOLINK_URLS|AUTOLINK_EMAILS
|
49
|
+
} autolink_mode;
|
50
|
+
|
51
|
+
typedef size_t (*autolink_parse_cb)(
|
52
|
+
size_t *rewind, struct buf *, uint8_t *, size_t, size_t, unsigned int);
|
53
|
+
|
54
|
+
typedef enum {
|
55
|
+
AUTOLINK_ACTION_NONE = 0,
|
56
|
+
AUTOLINK_ACTION_WWW,
|
57
|
+
AUTOLINK_ACTION_EMAIL,
|
58
|
+
AUTOLINK_ACTION_URL,
|
59
|
+
AUTOLINK_ACTION_SKIP_TAG
|
60
|
+
} autolink_action;
|
61
|
+
|
62
|
+
static autolink_parse_cb g_callbacks[] = {
|
63
|
+
NULL,
|
64
|
+
sd_autolink__www, /* 1 */
|
65
|
+
sd_autolink__email,/* 2 */
|
66
|
+
sd_autolink__url, /* 3 */
|
67
|
+
};
|
68
|
+
|
69
|
+
static const char *g_hrefs[] = {
|
70
|
+
NULL,
|
71
|
+
"<a href=\"http://",
|
72
|
+
"<a href=\"mailto:",
|
73
|
+
"<a href=\"",
|
74
|
+
};
|
75
|
+
|
76
|
+
static void
|
77
|
+
autolink__print(struct buf *ob, const struct buf *link, void *payload)
|
78
|
+
{
|
79
|
+
bufput(ob, link->data, link->size);
|
80
|
+
}
|
81
|
+
|
82
|
+
/*
|
83
|
+
* Rinku assumes valid HTML encoding for all input, but there's still
|
84
|
+
* the case where a link can contain a double quote `"` that allows XSS.
|
85
|
+
*
|
86
|
+
* We need to properly escape the character we use for the `href` attribute
|
87
|
+
* declaration
|
88
|
+
*/
|
89
|
+
static void print_link(struct buf *ob, const char *link, size_t size)
|
90
|
+
{
|
91
|
+
size_t i = 0, org;
|
92
|
+
|
93
|
+
while (i < size) {
|
94
|
+
org = i;
|
95
|
+
|
96
|
+
while (i < size && link[i] != '"')
|
97
|
+
i++;
|
98
|
+
|
99
|
+
if (i > org)
|
100
|
+
bufput(ob, link + org, i - org);
|
101
|
+
|
102
|
+
if (i >= size)
|
103
|
+
break;
|
104
|
+
|
105
|
+
BUFPUTSL(ob, """);
|
106
|
+
i++;
|
107
|
+
}
|
108
|
+
}
|
109
|
+
|
110
|
+
/* From sundown/html/html.c */
|
111
|
+
static int
|
112
|
+
html_is_tag(const uint8_t *tag_data, size_t tag_size, const char *tagname)
|
113
|
+
{
|
114
|
+
size_t i;
|
115
|
+
int closed = 0;
|
116
|
+
|
117
|
+
if (tag_size < 3 || tag_data[0] != '<')
|
118
|
+
return HTML_TAG_NONE;
|
119
|
+
|
120
|
+
i = 1;
|
121
|
+
|
122
|
+
if (tag_data[i] == '/') {
|
123
|
+
closed = 1;
|
124
|
+
i++;
|
125
|
+
}
|
126
|
+
|
127
|
+
for (; i < tag_size; ++i, ++tagname) {
|
128
|
+
if (*tagname == 0)
|
129
|
+
break;
|
130
|
+
|
131
|
+
if (tag_data[i] != *tagname)
|
132
|
+
return HTML_TAG_NONE;
|
133
|
+
}
|
134
|
+
|
135
|
+
if (i == tag_size)
|
136
|
+
return HTML_TAG_NONE;
|
137
|
+
|
138
|
+
if (isspace(tag_data[i]) || tag_data[i] == '>')
|
139
|
+
return closed ? HTML_TAG_CLOSE : HTML_TAG_OPEN;
|
140
|
+
|
141
|
+
return HTML_TAG_NONE;
|
142
|
+
}
|
143
|
+
|
144
|
+
static size_t
|
145
|
+
autolink__skip_tag(
|
146
|
+
struct buf *ob,
|
147
|
+
const uint8_t *text,
|
148
|
+
size_t size,
|
149
|
+
const char **skip_tags)
|
150
|
+
{
|
151
|
+
size_t i = 0;
|
152
|
+
|
153
|
+
while (i < size && text[i] != '>')
|
154
|
+
i++;
|
155
|
+
|
156
|
+
while (*skip_tags != NULL) {
|
157
|
+
if (html_is_tag(text, size, *skip_tags) == HTML_TAG_OPEN)
|
158
|
+
break;
|
159
|
+
|
160
|
+
skip_tags++;
|
161
|
+
}
|
162
|
+
|
163
|
+
if (*skip_tags != NULL) {
|
164
|
+
for (;;) {
|
165
|
+
while (i < size && text[i] != '<')
|
166
|
+
i++;
|
167
|
+
|
168
|
+
if (i == size)
|
169
|
+
break;
|
170
|
+
|
171
|
+
if (html_is_tag(text + i, size - i, *skip_tags) == HTML_TAG_CLOSE)
|
172
|
+
break;
|
173
|
+
|
174
|
+
i++;
|
175
|
+
}
|
176
|
+
|
177
|
+
while (i < size && text[i] != '>')
|
178
|
+
i++;
|
179
|
+
}
|
180
|
+
|
181
|
+
// bufput(ob, text, i + 1);
|
182
|
+
return i;
|
183
|
+
}
|
184
|
+
|
185
|
+
int
|
186
|
+
rinku_autolink(
|
187
|
+
struct buf *ob,
|
188
|
+
const uint8_t *text,
|
189
|
+
size_t size,
|
190
|
+
autolink_mode mode,
|
191
|
+
unsigned int flags,
|
192
|
+
const char *link_attr,
|
193
|
+
const char **skip_tags,
|
194
|
+
void (*link_text_cb)(struct buf *ob, const struct buf *link, void *payload),
|
195
|
+
void *payload)
|
196
|
+
{
|
197
|
+
size_t i, end;
|
198
|
+
struct buf *link = bufnew(16);
|
199
|
+
char active_chars[256];
|
200
|
+
void (*link_url_cb)(struct buf *, const struct buf *, void *);
|
201
|
+
int link_count = 0;
|
202
|
+
|
203
|
+
if (!text || size == 0)
|
204
|
+
return 0;
|
205
|
+
|
206
|
+
memset(active_chars, 0x0, sizeof(active_chars));
|
207
|
+
|
208
|
+
active_chars['<'] = AUTOLINK_ACTION_SKIP_TAG;
|
209
|
+
|
210
|
+
if (mode & AUTOLINK_EMAILS)
|
211
|
+
active_chars['@'] = AUTOLINK_ACTION_EMAIL;
|
212
|
+
|
213
|
+
if (mode & AUTOLINK_URLS) {
|
214
|
+
active_chars['w'] = AUTOLINK_ACTION_WWW;
|
215
|
+
active_chars['W'] = AUTOLINK_ACTION_WWW;
|
216
|
+
active_chars[':'] = AUTOLINK_ACTION_URL;
|
217
|
+
}
|
218
|
+
|
219
|
+
if (link_text_cb == NULL)
|
220
|
+
link_text_cb = &autolink__print;
|
221
|
+
|
222
|
+
if (link_attr != NULL) {
|
223
|
+
while (isspace(*link_attr))
|
224
|
+
link_attr++;
|
225
|
+
}
|
226
|
+
|
227
|
+
bufgrow(ob, size);
|
228
|
+
|
229
|
+
i = end = 0;
|
230
|
+
|
231
|
+
while (i < size) {
|
232
|
+
size_t rewind, link_end;
|
233
|
+
char action = 0;
|
234
|
+
|
235
|
+
while (end < size && (action = active_chars[text[end]]) == 0)
|
236
|
+
end++;
|
237
|
+
|
238
|
+
if (end == size) {
|
239
|
+
if (link_count > 0)
|
240
|
+
bufput(ob, text + i, end - i);
|
241
|
+
break;
|
242
|
+
}
|
243
|
+
|
244
|
+
if (action == AUTOLINK_ACTION_SKIP_TAG) {
|
245
|
+
end += autolink__skip_tag(ob,
|
246
|
+
text + end, size - end, skip_tags);
|
247
|
+
|
248
|
+
continue;
|
249
|
+
}
|
250
|
+
|
251
|
+
link->size = 0;
|
252
|
+
link_end = g_callbacks[(int)action](
|
253
|
+
&rewind, link, (uint8_t *)text + end, end, size - end, flags);
|
254
|
+
|
255
|
+
/* print the link */
|
256
|
+
if (link_end > 0) {
|
257
|
+
bufput(ob, text + i, end - i - rewind);
|
258
|
+
|
259
|
+
bufputs(ob, g_hrefs[(int)action]);
|
260
|
+
print_link(ob, link->data, link->size);
|
261
|
+
|
262
|
+
if (link_attr) {
|
263
|
+
BUFPUTSL(ob, "\" ");
|
264
|
+
bufputs(ob, link_attr);
|
265
|
+
bufputc(ob, '>');
|
266
|
+
} else {
|
267
|
+
BUFPUTSL(ob, "\">");
|
268
|
+
}
|
269
|
+
|
270
|
+
link_text_cb(ob, link, payload);
|
271
|
+
BUFPUTSL(ob, "</a>");
|
272
|
+
|
273
|
+
link_count++;
|
274
|
+
i = end + link_end;
|
275
|
+
end = i;
|
276
|
+
} else {
|
277
|
+
end = end + 1;
|
278
|
+
}
|
279
|
+
}
|
280
|
+
|
281
|
+
bufrelease(link);
|
282
|
+
return link_count;
|
283
|
+
}
|
284
|
+
|
285
|
+
|
286
|
+
/**
|
287
|
+
* Ruby code
|
288
|
+
*/
|
289
|
+
static void
|
290
|
+
autolink_callback(struct buf *link_text, const struct buf *link, void *block)
|
291
|
+
{
|
292
|
+
VALUE rb_link, rb_link_text;
|
293
|
+
rb_link = rb_str_new(link->data, link->size);
|
294
|
+
rb_link_text = rb_funcall((VALUE)block, rb_intern("call"), 1, rb_link);
|
295
|
+
Check_Type(rb_link_text, T_STRING);
|
296
|
+
bufput(link_text, RSTRING_PTR(rb_link_text), RSTRING_LEN(rb_link_text));
|
297
|
+
}
|
298
|
+
|
299
|
+
const char **rinku_load_tags(VALUE rb_skip)
|
300
|
+
{
|
301
|
+
const char **skip_tags;
|
302
|
+
size_t i, count;
|
303
|
+
|
304
|
+
Check_Type(rb_skip, T_ARRAY);
|
305
|
+
|
306
|
+
count = RARRAY_LEN(rb_skip);
|
307
|
+
skip_tags = xmalloc(sizeof(void *) * (count + 1));
|
308
|
+
|
309
|
+
for (i = 0; i < count; ++i) {
|
310
|
+
VALUE tag = rb_ary_entry(rb_skip, i);
|
311
|
+
Check_Type(tag, T_STRING);
|
312
|
+
skip_tags[i] = StringValueCStr(tag);
|
313
|
+
}
|
314
|
+
|
315
|
+
skip_tags[count] = NULL;
|
316
|
+
return skip_tags;
|
317
|
+
}
|
318
|
+
|
319
|
+
/*
|
320
|
+
* Document-method: auto_link
|
321
|
+
*
|
322
|
+
* call-seq:
|
323
|
+
* auto_link(text, mode=:all, link_attr=nil, skip_tags=nil, flags=0)
|
324
|
+
* auto_link(text, mode=:all, link_attr=nil, skip_tags=nil, flags=0) { |link_text| ... }
|
325
|
+
*
|
326
|
+
* Parses a block of text looking for "safe" urls or email addresses,
|
327
|
+
* and turns them into HTML links with the given attributes.
|
328
|
+
*
|
329
|
+
* NOTE: The block of text may or may not be HTML; if the text is HTML,
|
330
|
+
* Rinku will skip the relevant tags to prevent double-linking and linking
|
331
|
+
* inside `pre` blocks by default.
|
332
|
+
*
|
333
|
+
* NOTE: If the input text is HTML, it's expected to be already escaped.
|
334
|
+
* Rinku will perform no escaping.
|
335
|
+
*
|
336
|
+
* NOTE: Currently the follow protocols are considered safe and are the
|
337
|
+
* only ones that will be autolinked.
|
338
|
+
*
|
339
|
+
* http:// https:// ftp:// mailto://
|
340
|
+
*
|
341
|
+
* Email addresses are also autolinked by default. URLs without a protocol
|
342
|
+
* specifier but starting with 'www.' will also be autolinked, defaulting to
|
343
|
+
* the 'http://' protocol.
|
344
|
+
*
|
345
|
+
* - `text` is a string in plain text or HTML markup. If the string is formatted in
|
346
|
+
* HTML, Rinku is smart enough to skip the links that are already enclosed in `<a>`
|
347
|
+
* tags.`
|
348
|
+
*
|
349
|
+
* - `mode` is a symbol, either `:all`, `:urls` or `:email_addresses`,
|
350
|
+
* which specifies which kind of links will be auto-linked.
|
351
|
+
*
|
352
|
+
* - `link_attr` is a string containing the link attributes for each link that
|
353
|
+
* will be generated. These attributes are not sanitized and will be include as-is
|
354
|
+
* in each generated link, e.g.
|
355
|
+
*
|
356
|
+
* ~~~~~ruby
|
357
|
+
* auto_link('http://www.pokemon.com', :all, 'target="_blank"')
|
358
|
+
* # => '<a href="http://www.pokemon.com" target="_blank">http://www.pokemon.com</a>'
|
359
|
+
* ~~~~~
|
360
|
+
*
|
361
|
+
* This string can be autogenerated from a hash using the Rails `tag_options` helper.
|
362
|
+
*
|
363
|
+
* - `skip_tags` is a list of strings with the names of HTML tags that will be skipped
|
364
|
+
* when autolinking. If `nil`, this defaults to the value of the global `Rinku.skip_tags`,
|
365
|
+
* which is initially `["a", "pre", "code", "kbd", "script"]`.
|
366
|
+
*
|
367
|
+
* - `flag` is an optional boolean value specifying whether to recognize
|
368
|
+
* 'http://foo' as a valid domain, or require at least one '.'. It defaults to false.
|
369
|
+
*
|
370
|
+
* - `&block` is an optional block argument. If a block is passed, it will
|
371
|
+
* be yielded for each found link in the text, and its return value will be used instead
|
372
|
+
* of the name of the link. E.g.
|
373
|
+
*
|
374
|
+
* ~~~~~ruby
|
375
|
+
* auto_link('Check it out at http://www.pokemon.com') do |url|
|
376
|
+
* "THE POKEMAN WEBSITEZ"
|
377
|
+
* end
|
378
|
+
* # => 'Check it out at <a href="http://www.pokemon.com">THE POKEMAN WEBSITEZ</a>'
|
379
|
+
* ~~~~~~
|
380
|
+
*/
|
381
|
+
static VALUE
|
382
|
+
rb_rinku_autolink(int argc, VALUE *argv, VALUE self)
|
383
|
+
{
|
384
|
+
static const char *SKIP_TAGS[] = {"a", "pre", "code", "kbd", "script", NULL};
|
385
|
+
|
386
|
+
VALUE result, rb_text, rb_mode, rb_html, rb_skip, rb_flags, rb_block;
|
387
|
+
struct buf *output_buf;
|
388
|
+
int link_mode, count;
|
389
|
+
unsigned int link_flags = 0;
|
390
|
+
const char *link_attr = NULL;
|
391
|
+
const char **skip_tags = NULL;
|
392
|
+
ID mode_sym;
|
393
|
+
|
394
|
+
rb_scan_args(argc, argv, "14&", &rb_text, &rb_mode,
|
395
|
+
&rb_html, &rb_skip, &rb_flags, &rb_block);
|
396
|
+
|
397
|
+
Check_Type(rb_text, T_STRING);
|
398
|
+
|
399
|
+
if (!NIL_P(rb_mode)) {
|
400
|
+
Check_Type(rb_mode, T_SYMBOL);
|
401
|
+
mode_sym = SYM2ID(rb_mode);
|
402
|
+
} else {
|
403
|
+
mode_sym = rb_intern("all");
|
404
|
+
}
|
405
|
+
|
406
|
+
if (!NIL_P(rb_html)) {
|
407
|
+
Check_Type(rb_html, T_STRING);
|
408
|
+
link_attr = RSTRING_PTR(rb_html);
|
409
|
+
}
|
410
|
+
|
411
|
+
if (NIL_P(rb_skip))
|
412
|
+
rb_skip = rb_iv_get(self, "@skip_tags");
|
413
|
+
|
414
|
+
if (NIL_P(rb_skip)) {
|
415
|
+
skip_tags = SKIP_TAGS;
|
416
|
+
} else {
|
417
|
+
skip_tags = rinku_load_tags(rb_skip);
|
418
|
+
}
|
419
|
+
|
420
|
+
if (!NIL_P(rb_flags)) {
|
421
|
+
Check_Type(rb_flags, T_FIXNUM);
|
422
|
+
link_flags = FIX2INT(rb_flags);
|
423
|
+
}
|
424
|
+
|
425
|
+
output_buf = bufnew(32);
|
426
|
+
|
427
|
+
if (mode_sym == rb_intern("all"))
|
428
|
+
link_mode = AUTOLINK_ALL;
|
429
|
+
else if (mode_sym == rb_intern("email_addresses"))
|
430
|
+
link_mode = AUTOLINK_EMAILS;
|
431
|
+
else if (mode_sym == rb_intern("urls"))
|
432
|
+
link_mode = AUTOLINK_URLS;
|
433
|
+
else
|
434
|
+
rb_raise(rb_eTypeError,
|
435
|
+
"Invalid linking mode (possible values are :all, :urls, :email_addresses)");
|
436
|
+
|
437
|
+
count = rinku_autolink(
|
438
|
+
output_buf,
|
439
|
+
RSTRING_PTR(rb_text),
|
440
|
+
RSTRING_LEN(rb_text),
|
441
|
+
link_mode,
|
442
|
+
link_flags,
|
443
|
+
link_attr,
|
444
|
+
skip_tags,
|
445
|
+
RTEST(rb_block) ? &autolink_callback : NULL,
|
446
|
+
(void*)rb_block);
|
447
|
+
|
448
|
+
if (count == 0)
|
449
|
+
result = rb_text;
|
450
|
+
else {
|
451
|
+
result = rb_str_new(output_buf->data, output_buf->size);
|
452
|
+
rb_enc_copy(result, rb_text);
|
453
|
+
}
|
454
|
+
|
455
|
+
if (skip_tags != SKIP_TAGS)
|
456
|
+
xfree(skip_tags);
|
457
|
+
|
458
|
+
bufrelease(output_buf);
|
459
|
+
return result;
|
460
|
+
}
|
461
|
+
|
462
|
+
void RUBY_EXPORT Init_rinku()
|
463
|
+
{
|
464
|
+
rb_mRinku = rb_define_module("Rinku");
|
465
|
+
rb_define_method(rb_mRinku, "auto_link", rb_rinku_autolink, -1);
|
466
|
+
rb_define_const(rb_mRinku, "AUTOLINK_SHORT_DOMAINS", INT2FIX(SD_AUTOLINK_SHORT_DOMAINS));
|
467
|
+
}
|
468
|
+
|