rinku 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/COPYING +13 -0
- data/README.markdown +87 -0
- data/Rakefile +71 -0
- data/VERSION +1 -0
- data/ext/rinku/autolink.c +239 -0
- data/ext/rinku/autolink.h +39 -0
- data/ext/rinku/buffer.c +323 -0
- data/ext/rinku/buffer.h +154 -0
- data/ext/rinku/extconf.rb +4 -0
- data/ext/rinku/html_autolink.c +221 -0
- data/ext/rinku/rinku.c +86 -0
- data/lib/rinku.rb +41 -0
- data/rinku.gemspec +34 -0
- data/test/autolink_test.rb +135 -0
- metadata +80 -0
data/COPYING
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
Copyright (c) 2011, Vicent Marti
|
2
|
+
|
3
|
+
Permission to use, copy, modify, and distribute this software for any
|
4
|
+
purpose with or without fee is hereby granted, provided that the above
|
5
|
+
copyright notice and this permission notice appear in all copies.
|
6
|
+
|
7
|
+
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
8
|
+
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
9
|
+
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
10
|
+
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
11
|
+
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
12
|
+
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
13
|
+
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
data/README.markdown
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
Rinku does linking
|
2
|
+
==================
|
3
|
+
|
4
|
+
Rinku is a Ruby library that does autolinking.
|
5
|
+
It parses text and turns anything that remotely resembles a link into an HTML link,
|
6
|
+
just like the Ruby on Rails `auto_link` method -- but it's about 20 times faster,
|
7
|
+
because it's written in C, and it's about 20 times smarter when linking,
|
8
|
+
because it does actual parsing instead of RegEx replacements.
|
9
|
+
|
10
|
+
Rinku is a Ruby Gem
|
11
|
+
-------------------
|
12
|
+
|
13
|
+
Rinku is available as a Ruby gem:
|
14
|
+
|
15
|
+
$ [sudo] gem install rinku
|
16
|
+
|
17
|
+
The Rinku source is available at GitHub:
|
18
|
+
|
19
|
+
$ git clone git://github.com/tanoku/rinku.git
|
20
|
+
|
21
|
+
Rinku is a drop-in replacement for Rails `auto_link`
|
22
|
+
----------------------------------------------------
|
23
|
+
|
24
|
+
And I'm a lazy bum, so I've copied and pasted the Rails API docs here.
|
25
|
+
Yes, the Rinku API is 100% compatible.
|
26
|
+
|
27
|
+
~~~~~~ruby
|
28
|
+
# Turns all URLs and e-mail addresses into clickable links. The <tt>:link</tt> option
|
29
|
+
# will limit what should be linked. You can add HTML attributes to the links using
|
30
|
+
# <tt>:html</tt>. Possible values for <tt>:link</tt> are <tt>:all</tt> (default),
|
31
|
+
# <tt>:email_addresses</tt>, and <tt>:urls</tt>. If a block is given, each URL and
|
32
|
+
# e-mail address is yielded and the result is used as the link text.
|
33
|
+
#
|
34
|
+
# ==== Examples
|
35
|
+
# auto_link("Go to http://www.rubyonrails.org and say hello to david@loudthinking.com")
|
36
|
+
# # => "Go to <a href=\"http://www.rubyonrails.org\">http://www.rubyonrails.org</a> and
|
37
|
+
# # say hello to <a href=\"mailto:david@loudthinking.com\">david@loudthinking.com</a>"
|
38
|
+
#
|
39
|
+
# auto_link("Visit http://www.loudthinking.com/ or e-mail david@loudthinking.com", :link => :urls)
|
40
|
+
# # => "Visit <a href=\"http://www.loudthinking.com/\">http://www.loudthinking.com/</a>
|
41
|
+
# # or e-mail david@loudthinking.com"
|
42
|
+
#
|
43
|
+
# auto_link("Visit http://www.loudthinking.com/ or e-mail david@loudthinking.com", :link => :email_addresses)
|
44
|
+
# # => "Visit http://www.loudthinking.com/ or e-mail <a href=\"mailto:david@loudthinking.com\">david@loudthinking.com</a>"
|
45
|
+
#
|
46
|
+
# post_body = "Welcome to my new blog at http://www.myblog.com/. Please e-mail me at me@email.com."
|
47
|
+
# auto_link(post_body, :html => { :target => '_blank' }) do |text|
|
48
|
+
# truncate(text, :length => 15)
|
49
|
+
# end
|
50
|
+
# # => "Welcome to my new blog at <a href=\"http://www.myblog.com/\" target=\"_blank\">http://www.m...</a>.
|
51
|
+
# Please e-mail me at <a href=\"mailto:me@email.com\">me@email.com</a>."
|
52
|
+
#
|
53
|
+
#
|
54
|
+
# You can still use <tt>auto_link</tt> with the old API that accepts the
|
55
|
+
# +link+ as its optional second parameter and the +html_options+ hash
|
56
|
+
# as its optional third parameter:
|
57
|
+
# post_body = "Welcome to my new blog at http://www.myblog.com/. Please e-mail me at me@email.com."
|
58
|
+
# auto_link(post_body, :urls) # => Once upon\na time
|
59
|
+
# # => "Welcome to my new blog at <a href=\"http://www.myblog.com/\">http://www.myblog.com</a>.
|
60
|
+
# Please e-mail me at me@email.com."
|
61
|
+
#
|
62
|
+
# auto_link(post_body, :all, :target => "_blank") # => Once upon\na time
|
63
|
+
# # => "Welcome to my new blog at <a href=\"http://www.myblog.com/\" target=\"_blank\">http://www.myblog.com</a>.
|
64
|
+
# Please e-mail me at <a href=\"mailto:me@email.com\">me@email.com</a>."
|
65
|
+
~~~~~~~~~
|
66
|
+
|
67
|
+
Rinku is written by me
|
68
|
+
----------------------
|
69
|
+
|
70
|
+
I am Vicent Marti, and I wrote Rinku.
|
71
|
+
While Rinku is busy doing autolinks, you should be busy following me on twitter. `@tanoku`. Do it.
|
72
|
+
|
73
|
+
Rinku has an awesome license
|
74
|
+
----------------------------
|
75
|
+
|
76
|
+
Permission to use, copy, modify, and/or distribute this software for any
|
77
|
+
purpose with or without fee is hereby granted, provided that the above
|
78
|
+
copyright notice and this permission notice appear in all copies.
|
79
|
+
|
80
|
+
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
81
|
+
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
82
|
+
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
83
|
+
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
84
|
+
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
85
|
+
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
86
|
+
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
87
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
require 'date'
|
2
|
+
require 'rake/clean'
|
3
|
+
require 'rake/extensiontask'
|
4
|
+
require 'digest/md5'
|
5
|
+
|
6
|
+
task :default => :test
|
7
|
+
|
8
|
+
# ==========================================================
|
9
|
+
# Ruby Extension
|
10
|
+
# ==========================================================
|
11
|
+
|
12
|
+
Rake::ExtensionTask.new('rinku')
|
13
|
+
|
14
|
+
# ==========================================================
|
15
|
+
# Testing
|
16
|
+
# ==========================================================
|
17
|
+
|
18
|
+
require 'rake/testtask'
|
19
|
+
Rake::TestTask.new('test') do |t|
|
20
|
+
t.test_files = FileList['test/*_test.rb']
|
21
|
+
t.ruby_opts += ['-rubygems'] if defined? Gem
|
22
|
+
end
|
23
|
+
task 'test' => [:compile]
|
24
|
+
|
25
|
+
# PACKAGING =================================================================
|
26
|
+
|
27
|
+
require 'rubygems'
|
28
|
+
$spec = eval(File.read('rinku.gemspec'))
|
29
|
+
|
30
|
+
def package(ext='')
|
31
|
+
"pkg/rinku-#{$spec.version}" + ext
|
32
|
+
end
|
33
|
+
|
34
|
+
desc 'Build packages'
|
35
|
+
task :package => package('.gem')
|
36
|
+
|
37
|
+
desc 'Build and install as local gem'
|
38
|
+
task :install => package('.gem') do
|
39
|
+
sh "gem install #{package('.gem')}"
|
40
|
+
end
|
41
|
+
|
42
|
+
desc 'Update the gemspec'
|
43
|
+
task :update_gem => file('rinku.gemspec')
|
44
|
+
|
45
|
+
directory 'pkg/'
|
46
|
+
|
47
|
+
file package('.gem') => %w[pkg/ rinku.gemspec] + $spec.files do |f|
|
48
|
+
sh "gem build rinku.gemspec"
|
49
|
+
mv File.basename(f.name), f.name
|
50
|
+
end
|
51
|
+
|
52
|
+
# GEMSPEC HELPERS ==========================================================
|
53
|
+
|
54
|
+
desc 'Gather required Upskirt sources into extension directory'
|
55
|
+
task :gather => 'upskirt/src/markdown.h' do |t|
|
56
|
+
files =
|
57
|
+
FileList[
|
58
|
+
'upskirt/src/{buffer,autolink}.h',
|
59
|
+
'upskirt/src/{buffer,autolink}.c',
|
60
|
+
'upskirt/html/html_autolink.c'
|
61
|
+
]
|
62
|
+
cp files, 'ext/rinku/',
|
63
|
+
:preserve => true,
|
64
|
+
:verbose => true
|
65
|
+
end
|
66
|
+
|
67
|
+
file 'upskirt/src/markdown.h' do |t|
|
68
|
+
abort "The Upskirt submodule is required."
|
69
|
+
end
|
70
|
+
|
71
|
+
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
1.0.0
|
@@ -0,0 +1,239 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2011, Vicent Marti
|
3
|
+
*
|
4
|
+
* Permission to use, copy, modify, and distribute this software for any
|
5
|
+
* purpose with or without fee is hereby granted, provided that the above
|
6
|
+
* copyright notice and this permission notice appear in all copies.
|
7
|
+
*
|
8
|
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
9
|
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
10
|
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
11
|
+
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
12
|
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
13
|
+
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
14
|
+
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
15
|
+
*/
|
16
|
+
|
17
|
+
#include "buffer.h"
|
18
|
+
|
19
|
+
#include <string.h>
|
20
|
+
#include <stdlib.h>
|
21
|
+
#include <stdio.h>
|
22
|
+
#include <ctype.h>
|
23
|
+
|
24
|
+
int
|
25
|
+
is_safe_link(const char *link, size_t link_len)
|
26
|
+
{
|
27
|
+
static const size_t valid_uris_count = 4;
|
28
|
+
static const char *valid_uris[] = {
|
29
|
+
"http://", "https://", "ftp://", "mailto://"
|
30
|
+
};
|
31
|
+
|
32
|
+
size_t i;
|
33
|
+
|
34
|
+
for (i = 0; i < valid_uris_count; ++i) {
|
35
|
+
size_t len = strlen(valid_uris[i]);
|
36
|
+
|
37
|
+
if (link_len > len &&
|
38
|
+
strncasecmp(link, valid_uris[i], len) == 0 &&
|
39
|
+
isalnum(link[len]))
|
40
|
+
return 1;
|
41
|
+
}
|
42
|
+
|
43
|
+
return 0;
|
44
|
+
}
|
45
|
+
|
46
|
+
static size_t
|
47
|
+
autolink_delim(char *data, size_t link_end, size_t offset, size_t size)
|
48
|
+
{
|
49
|
+
char cclose, copen = 0;
|
50
|
+
|
51
|
+
while (link_end > 0) {
|
52
|
+
if (strchr("?!.,", data[link_end - 1]) != NULL)
|
53
|
+
link_end--;
|
54
|
+
|
55
|
+
else if (data[link_end - 1] == ';') {
|
56
|
+
size_t new_end = link_end - 2;
|
57
|
+
|
58
|
+
while (new_end > 0 && isalpha(data[new_end]))
|
59
|
+
new_end--;
|
60
|
+
|
61
|
+
if (new_end < link_end - 2 && data[new_end] == '&')
|
62
|
+
link_end = new_end;
|
63
|
+
else
|
64
|
+
link_end--;
|
65
|
+
}
|
66
|
+
|
67
|
+
else if (data[link_end - 1] == '>') {
|
68
|
+
while (link_end > 0 && data[link_end] != '<')
|
69
|
+
link_end--;
|
70
|
+
}
|
71
|
+
else break;
|
72
|
+
}
|
73
|
+
|
74
|
+
if (link_end == 0)
|
75
|
+
return 0;
|
76
|
+
|
77
|
+
cclose = data[link_end - 1];
|
78
|
+
|
79
|
+
switch (cclose) {
|
80
|
+
case '"': copen = '"'; break;
|
81
|
+
case '\'': copen = '\''; break;
|
82
|
+
case ')': copen = '('; break;
|
83
|
+
case ']': copen = '['; break;
|
84
|
+
case '}': copen = '{'; break;
|
85
|
+
}
|
86
|
+
|
87
|
+
if (copen != 0) {
|
88
|
+
size_t closing = 0;
|
89
|
+
size_t opening = 0;
|
90
|
+
size_t i = 0;
|
91
|
+
|
92
|
+
/* Try to close the final punctuation sign in this same line;
|
93
|
+
* if we managed to close it outside of the URL, that means that it's
|
94
|
+
* not part of the URL. If it closes inside the URL, that means it
|
95
|
+
* is part of the URL.
|
96
|
+
*
|
97
|
+
* Examples:
|
98
|
+
*
|
99
|
+
* foo http://www.pokemon.com/Pikachu_(Electric) bar
|
100
|
+
* => http://www.pokemon.com/Pikachu_(Electric)
|
101
|
+
*
|
102
|
+
* foo (http://www.pokemon.com/Pikachu_(Electric)) bar
|
103
|
+
* => http://www.pokemon.com/Pikachu_(Electric)
|
104
|
+
*
|
105
|
+
* foo http://www.pokemon.com/Pikachu_(Electric)) bar
|
106
|
+
* => http://www.pokemon.com/Pikachu_(Electric))
|
107
|
+
*
|
108
|
+
* (foo http://www.pokemon.com/Pikachu_(Electric)) bar
|
109
|
+
* => foo http://www.pokemon.com/Pikachu_(Electric)
|
110
|
+
*/
|
111
|
+
|
112
|
+
while (i < link_end) {
|
113
|
+
if (data[i] == copen)
|
114
|
+
opening++;
|
115
|
+
else if (data[i] == cclose)
|
116
|
+
closing++;
|
117
|
+
|
118
|
+
i++;
|
119
|
+
}
|
120
|
+
|
121
|
+
if (closing != opening)
|
122
|
+
link_end--;
|
123
|
+
}
|
124
|
+
|
125
|
+
return link_end;
|
126
|
+
}
|
127
|
+
|
128
|
+
size_t
|
129
|
+
ups_autolink__www(size_t *rewind_p, struct buf *link, char *data, size_t offset, size_t size)
|
130
|
+
{
|
131
|
+
size_t link_end;
|
132
|
+
int np = 0;
|
133
|
+
|
134
|
+
if (offset > 0 && !ispunct(data[-1]) && !isspace(data[-1]))
|
135
|
+
return 0;
|
136
|
+
|
137
|
+
if (size < 4 || memcmp(data, "www.", STRLEN("www.")) != 0)
|
138
|
+
return 0;
|
139
|
+
|
140
|
+
link_end = 0;
|
141
|
+
while (link_end < size && !isspace(data[link_end])) {
|
142
|
+
if (data[link_end] == '.')
|
143
|
+
np++;
|
144
|
+
|
145
|
+
link_end++;
|
146
|
+
}
|
147
|
+
|
148
|
+
if (np < 2)
|
149
|
+
return 0;
|
150
|
+
|
151
|
+
link_end = autolink_delim(data, link_end, offset, size);
|
152
|
+
|
153
|
+
if (link_end == 0)
|
154
|
+
return 0;
|
155
|
+
|
156
|
+
bufput(link, data, link_end);
|
157
|
+
*rewind_p = 0;
|
158
|
+
|
159
|
+
return (int)link_end;
|
160
|
+
}
|
161
|
+
|
162
|
+
size_t
|
163
|
+
ups_autolink__email(size_t *rewind_p, struct buf *link, char *data, size_t offset, size_t size)
|
164
|
+
{
|
165
|
+
size_t link_end, rewind;
|
166
|
+
int nb = 0, np = 0;
|
167
|
+
|
168
|
+
for (rewind = 0; rewind < offset; ++rewind) {
|
169
|
+
char c = data[-rewind - 1];
|
170
|
+
|
171
|
+
if (isalnum(c))
|
172
|
+
continue;
|
173
|
+
|
174
|
+
if (strchr(".+-_", c) != NULL)
|
175
|
+
continue;
|
176
|
+
|
177
|
+
break;
|
178
|
+
}
|
179
|
+
|
180
|
+
if (rewind == 0)
|
181
|
+
return 0;
|
182
|
+
|
183
|
+
for (link_end = 0; link_end < size; ++link_end) {
|
184
|
+
char c = data[link_end];
|
185
|
+
|
186
|
+
if (isalnum(c))
|
187
|
+
continue;
|
188
|
+
|
189
|
+
if (c == '@')
|
190
|
+
nb++;
|
191
|
+
else if (c == '.' && link_end < size - 1)
|
192
|
+
np++;
|
193
|
+
else if (c != '-' && c != '_')
|
194
|
+
break;
|
195
|
+
}
|
196
|
+
|
197
|
+
if (link_end < 2 || nb != 1 || np == 0)
|
198
|
+
return 0;
|
199
|
+
|
200
|
+
link_end = autolink_delim(data, link_end, offset, size);
|
201
|
+
|
202
|
+
if (link_end == 0)
|
203
|
+
return 0;
|
204
|
+
|
205
|
+
bufput(link, data - rewind, link_end + rewind);
|
206
|
+
*rewind_p = rewind;
|
207
|
+
|
208
|
+
return link_end;
|
209
|
+
}
|
210
|
+
|
211
|
+
size_t
|
212
|
+
ups_autolink__url(size_t *rewind_p, struct buf *link, char *data, size_t offset, size_t size)
|
213
|
+
{
|
214
|
+
size_t link_end, rewind = 0;
|
215
|
+
|
216
|
+
if (size < 4 || data[1] != '/' || data[2] != '/')
|
217
|
+
return 0;
|
218
|
+
|
219
|
+
while (rewind < offset && isalpha(data[-rewind - 1]))
|
220
|
+
rewind++;
|
221
|
+
|
222
|
+
if (!is_safe_link(data - rewind, size + rewind))
|
223
|
+
return 0;
|
224
|
+
|
225
|
+
link_end = 0;
|
226
|
+
while (link_end < size && !isspace(data[link_end]))
|
227
|
+
link_end++;
|
228
|
+
|
229
|
+
link_end = autolink_delim(data, link_end, offset, size);
|
230
|
+
|
231
|
+
if (link_end == 0)
|
232
|
+
return 0;
|
233
|
+
|
234
|
+
bufput(link, data - rewind, link_end + rewind);
|
235
|
+
*rewind_p = rewind;
|
236
|
+
|
237
|
+
return link_end;
|
238
|
+
}
|
239
|
+
|
@@ -0,0 +1,39 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2011, Vicent Marti
|
3
|
+
*
|
4
|
+
* Permission to use, copy, modify, and distribute this software for any
|
5
|
+
* purpose with or without fee is hereby granted, provided that the above
|
6
|
+
* copyright notice and this permission notice appear in all copies.
|
7
|
+
*
|
8
|
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
9
|
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
10
|
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
11
|
+
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
12
|
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
13
|
+
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
14
|
+
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
15
|
+
*/
|
16
|
+
|
17
|
+
#ifndef UPSKIRT_AUTOLINK_H
|
18
|
+
#define UPSKIRT_AUTOLINK_H_H
|
19
|
+
|
20
|
+
#include "buffer.h"
|
21
|
+
|
22
|
+
typedef enum {
|
23
|
+
AUTOLINK_URLS = (1 << 0),
|
24
|
+
AUTOLINK_EMAILS = (1 << 1),
|
25
|
+
AUTOLINK_ALL = AUTOLINK_URLS|AUTOLINK_EMAILS
|
26
|
+
} autolink_mode;
|
27
|
+
|
28
|
+
extern size_t
|
29
|
+
ups_autolink__www(size_t *rewind_p, struct buf *link, char *data, size_t offset, size_t size);
|
30
|
+
|
31
|
+
extern size_t
|
32
|
+
ups_autolink__email(size_t *rewind_p, struct buf *link, char *data, size_t offset, size_t size);
|
33
|
+
|
34
|
+
extern size_t
|
35
|
+
ups_autolink__url(size_t *rewind_p, struct buf *link, char *data, size_t offset, size_t size);
|
36
|
+
|
37
|
+
#endif
|
38
|
+
|
39
|
+
/* vim: set filetype=c: */
|