rinku 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/COPYING +13 -0
- data/README.markdown +87 -0
- data/Rakefile +71 -0
- data/VERSION +1 -0
- data/ext/rinku/autolink.c +239 -0
- data/ext/rinku/autolink.h +39 -0
- data/ext/rinku/buffer.c +323 -0
- data/ext/rinku/buffer.h +154 -0
- data/ext/rinku/extconf.rb +4 -0
- data/ext/rinku/html_autolink.c +221 -0
- data/ext/rinku/rinku.c +86 -0
- data/lib/rinku.rb +41 -0
- data/rinku.gemspec +34 -0
- data/test/autolink_test.rb +135 -0
- metadata +80 -0
data/COPYING
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
Copyright (c) 2011, Vicent Marti
|
2
|
+
|
3
|
+
Permission to use, copy, modify, and distribute this software for any
|
4
|
+
purpose with or without fee is hereby granted, provided that the above
|
5
|
+
copyright notice and this permission notice appear in all copies.
|
6
|
+
|
7
|
+
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
8
|
+
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
9
|
+
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
10
|
+
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
11
|
+
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
12
|
+
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
13
|
+
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
data/README.markdown
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
Rinku does linking
|
2
|
+
==================
|
3
|
+
|
4
|
+
Rinku is a Ruby library that does autolinking.
|
5
|
+
It parses text and turns anything that remotely resembles a link into an HTML link,
|
6
|
+
just like the Ruby on Rails `auto_link` method -- but it's about 20 times faster,
|
7
|
+
because it's written in C, and it's about 20 times smarter when linking,
|
8
|
+
because it does actual parsing instead of RegEx replacements.
|
9
|
+
|
10
|
+
Rinku is a Ruby Gem
|
11
|
+
-------------------
|
12
|
+
|
13
|
+
Rinku is available as a Ruby gem:
|
14
|
+
|
15
|
+
$ [sudo] gem install rinku
|
16
|
+
|
17
|
+
The Rinku source is available at GitHub:
|
18
|
+
|
19
|
+
$ git clone git://github.com/tanoku/rinku.git
|
20
|
+
|
21
|
+
Rinku is a drop-in replacement for Rails `auto_link`
|
22
|
+
----------------------------------------------------
|
23
|
+
|
24
|
+
And I'm a lazy bum, so I've copied and pasted the Rails API docs here.
|
25
|
+
Yes, the Rinku API is 100% compatible.
|
26
|
+
|
27
|
+
~~~~~~ruby
|
28
|
+
# Turns all URLs and e-mail addresses into clickable links. The <tt>:link</tt> option
|
29
|
+
# will limit what should be linked. You can add HTML attributes to the links using
|
30
|
+
# <tt>:html</tt>. Possible values for <tt>:link</tt> are <tt>:all</tt> (default),
|
31
|
+
# <tt>:email_addresses</tt>, and <tt>:urls</tt>. If a block is given, each URL and
|
32
|
+
# e-mail address is yielded and the result is used as the link text.
|
33
|
+
#
|
34
|
+
# ==== Examples
|
35
|
+
# auto_link("Go to http://www.rubyonrails.org and say hello to david@loudthinking.com")
|
36
|
+
# # => "Go to <a href=\"http://www.rubyonrails.org\">http://www.rubyonrails.org</a> and
|
37
|
+
# # say hello to <a href=\"mailto:david@loudthinking.com\">david@loudthinking.com</a>"
|
38
|
+
#
|
39
|
+
# auto_link("Visit http://www.loudthinking.com/ or e-mail david@loudthinking.com", :link => :urls)
|
40
|
+
# # => "Visit <a href=\"http://www.loudthinking.com/\">http://www.loudthinking.com/</a>
|
41
|
+
# # or e-mail david@loudthinking.com"
|
42
|
+
#
|
43
|
+
# auto_link("Visit http://www.loudthinking.com/ or e-mail david@loudthinking.com", :link => :email_addresses)
|
44
|
+
# # => "Visit http://www.loudthinking.com/ or e-mail <a href=\"mailto:david@loudthinking.com\">david@loudthinking.com</a>"
|
45
|
+
#
|
46
|
+
# post_body = "Welcome to my new blog at http://www.myblog.com/. Please e-mail me at me@email.com."
|
47
|
+
# auto_link(post_body, :html => { :target => '_blank' }) do |text|
|
48
|
+
# truncate(text, :length => 15)
|
49
|
+
# end
|
50
|
+
# # => "Welcome to my new blog at <a href=\"http://www.myblog.com/\" target=\"_blank\">http://www.m...</a>.
|
51
|
+
# Please e-mail me at <a href=\"mailto:me@email.com\">me@email.com</a>."
|
52
|
+
#
|
53
|
+
#
|
54
|
+
# You can still use <tt>auto_link</tt> with the old API that accepts the
|
55
|
+
# +link+ as its optional second parameter and the +html_options+ hash
|
56
|
+
# as its optional third parameter:
|
57
|
+
# post_body = "Welcome to my new blog at http://www.myblog.com/. Please e-mail me at me@email.com."
|
58
|
+
# auto_link(post_body, :urls) # => Once upon\na time
|
59
|
+
# # => "Welcome to my new blog at <a href=\"http://www.myblog.com/\">http://www.myblog.com</a>.
|
60
|
+
# Please e-mail me at me@email.com."
|
61
|
+
#
|
62
|
+
# auto_link(post_body, :all, :target => "_blank") # => Once upon\na time
|
63
|
+
# # => "Welcome to my new blog at <a href=\"http://www.myblog.com/\" target=\"_blank\">http://www.myblog.com</a>.
|
64
|
+
# Please e-mail me at <a href=\"mailto:me@email.com\">me@email.com</a>."
|
65
|
+
~~~~~~~~~
|
66
|
+
|
67
|
+
Rinku is written by me
|
68
|
+
----------------------
|
69
|
+
|
70
|
+
I am Vicent Marti, and I wrote Rinku.
|
71
|
+
While Rinku is busy doing autolinks, you should be busy following me on twitter. `@tanoku`. Do it.
|
72
|
+
|
73
|
+
Rinku has an awesome license
|
74
|
+
----------------------------
|
75
|
+
|
76
|
+
Permission to use, copy, modify, and/or distribute this software for any
|
77
|
+
purpose with or without fee is hereby granted, provided that the above
|
78
|
+
copyright notice and this permission notice appear in all copies.
|
79
|
+
|
80
|
+
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
81
|
+
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
82
|
+
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
83
|
+
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
84
|
+
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
85
|
+
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
86
|
+
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
87
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
require 'date'
|
2
|
+
require 'rake/clean'
|
3
|
+
require 'rake/extensiontask'
|
4
|
+
require 'digest/md5'
|
5
|
+
|
6
|
+
task :default => :test
|
7
|
+
|
8
|
+
# ==========================================================
|
9
|
+
# Ruby Extension
|
10
|
+
# ==========================================================
|
11
|
+
|
12
|
+
Rake::ExtensionTask.new('rinku')
|
13
|
+
|
14
|
+
# ==========================================================
|
15
|
+
# Testing
|
16
|
+
# ==========================================================
|
17
|
+
|
18
|
+
require 'rake/testtask'
|
19
|
+
Rake::TestTask.new('test') do |t|
|
20
|
+
t.test_files = FileList['test/*_test.rb']
|
21
|
+
t.ruby_opts += ['-rubygems'] if defined? Gem
|
22
|
+
end
|
23
|
+
task 'test' => [:compile]
|
24
|
+
|
25
|
+
# PACKAGING =================================================================
|
26
|
+
|
27
|
+
require 'rubygems'
|
28
|
+
$spec = eval(File.read('rinku.gemspec'))
|
29
|
+
|
30
|
+
def package(ext='')
|
31
|
+
"pkg/rinku-#{$spec.version}" + ext
|
32
|
+
end
|
33
|
+
|
34
|
+
desc 'Build packages'
|
35
|
+
task :package => package('.gem')
|
36
|
+
|
37
|
+
desc 'Build and install as local gem'
|
38
|
+
task :install => package('.gem') do
|
39
|
+
sh "gem install #{package('.gem')}"
|
40
|
+
end
|
41
|
+
|
42
|
+
desc 'Update the gemspec'
|
43
|
+
task :update_gem => file('rinku.gemspec')
|
44
|
+
|
45
|
+
directory 'pkg/'
|
46
|
+
|
47
|
+
file package('.gem') => %w[pkg/ rinku.gemspec] + $spec.files do |f|
|
48
|
+
sh "gem build rinku.gemspec"
|
49
|
+
mv File.basename(f.name), f.name
|
50
|
+
end
|
51
|
+
|
52
|
+
# GEMSPEC HELPERS ==========================================================
|
53
|
+
|
54
|
+
desc 'Gather required Upskirt sources into extension directory'
|
55
|
+
task :gather => 'upskirt/src/markdown.h' do |t|
|
56
|
+
files =
|
57
|
+
FileList[
|
58
|
+
'upskirt/src/{buffer,autolink}.h',
|
59
|
+
'upskirt/src/{buffer,autolink}.c',
|
60
|
+
'upskirt/html/html_autolink.c'
|
61
|
+
]
|
62
|
+
cp files, 'ext/rinku/',
|
63
|
+
:preserve => true,
|
64
|
+
:verbose => true
|
65
|
+
end
|
66
|
+
|
67
|
+
file 'upskirt/src/markdown.h' do |t|
|
68
|
+
abort "The Upskirt submodule is required."
|
69
|
+
end
|
70
|
+
|
71
|
+
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
1.0.0
|
@@ -0,0 +1,239 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2011, Vicent Marti
|
3
|
+
*
|
4
|
+
* Permission to use, copy, modify, and distribute this software for any
|
5
|
+
* purpose with or without fee is hereby granted, provided that the above
|
6
|
+
* copyright notice and this permission notice appear in all copies.
|
7
|
+
*
|
8
|
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
9
|
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
10
|
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
11
|
+
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
12
|
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
13
|
+
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
14
|
+
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
15
|
+
*/
|
16
|
+
|
17
|
+
#include "buffer.h"
|
18
|
+
|
19
|
+
#include <string.h>
|
20
|
+
#include <stdlib.h>
|
21
|
+
#include <stdio.h>
|
22
|
+
#include <ctype.h>
|
23
|
+
|
24
|
+
int
|
25
|
+
is_safe_link(const char *link, size_t link_len)
|
26
|
+
{
|
27
|
+
static const size_t valid_uris_count = 4;
|
28
|
+
static const char *valid_uris[] = {
|
29
|
+
"http://", "https://", "ftp://", "mailto://"
|
30
|
+
};
|
31
|
+
|
32
|
+
size_t i;
|
33
|
+
|
34
|
+
for (i = 0; i < valid_uris_count; ++i) {
|
35
|
+
size_t len = strlen(valid_uris[i]);
|
36
|
+
|
37
|
+
if (link_len > len &&
|
38
|
+
strncasecmp(link, valid_uris[i], len) == 0 &&
|
39
|
+
isalnum(link[len]))
|
40
|
+
return 1;
|
41
|
+
}
|
42
|
+
|
43
|
+
return 0;
|
44
|
+
}
|
45
|
+
|
46
|
+
static size_t
|
47
|
+
autolink_delim(char *data, size_t link_end, size_t offset, size_t size)
|
48
|
+
{
|
49
|
+
char cclose, copen = 0;
|
50
|
+
|
51
|
+
while (link_end > 0) {
|
52
|
+
if (strchr("?!.,", data[link_end - 1]) != NULL)
|
53
|
+
link_end--;
|
54
|
+
|
55
|
+
else if (data[link_end - 1] == ';') {
|
56
|
+
size_t new_end = link_end - 2;
|
57
|
+
|
58
|
+
while (new_end > 0 && isalpha(data[new_end]))
|
59
|
+
new_end--;
|
60
|
+
|
61
|
+
if (new_end < link_end - 2 && data[new_end] == '&')
|
62
|
+
link_end = new_end;
|
63
|
+
else
|
64
|
+
link_end--;
|
65
|
+
}
|
66
|
+
|
67
|
+
else if (data[link_end - 1] == '>') {
|
68
|
+
while (link_end > 0 && data[link_end] != '<')
|
69
|
+
link_end--;
|
70
|
+
}
|
71
|
+
else break;
|
72
|
+
}
|
73
|
+
|
74
|
+
if (link_end == 0)
|
75
|
+
return 0;
|
76
|
+
|
77
|
+
cclose = data[link_end - 1];
|
78
|
+
|
79
|
+
switch (cclose) {
|
80
|
+
case '"': copen = '"'; break;
|
81
|
+
case '\'': copen = '\''; break;
|
82
|
+
case ')': copen = '('; break;
|
83
|
+
case ']': copen = '['; break;
|
84
|
+
case '}': copen = '{'; break;
|
85
|
+
}
|
86
|
+
|
87
|
+
if (copen != 0) {
|
88
|
+
size_t closing = 0;
|
89
|
+
size_t opening = 0;
|
90
|
+
size_t i = 0;
|
91
|
+
|
92
|
+
/* Try to close the final punctuation sign in this same line;
|
93
|
+
* if we managed to close it outside of the URL, that means that it's
|
94
|
+
* not part of the URL. If it closes inside the URL, that means it
|
95
|
+
* is part of the URL.
|
96
|
+
*
|
97
|
+
* Examples:
|
98
|
+
*
|
99
|
+
* foo http://www.pokemon.com/Pikachu_(Electric) bar
|
100
|
+
* => http://www.pokemon.com/Pikachu_(Electric)
|
101
|
+
*
|
102
|
+
* foo (http://www.pokemon.com/Pikachu_(Electric)) bar
|
103
|
+
* => http://www.pokemon.com/Pikachu_(Electric)
|
104
|
+
*
|
105
|
+
* foo http://www.pokemon.com/Pikachu_(Electric)) bar
|
106
|
+
* => http://www.pokemon.com/Pikachu_(Electric))
|
107
|
+
*
|
108
|
+
* (foo http://www.pokemon.com/Pikachu_(Electric)) bar
|
109
|
+
* => foo http://www.pokemon.com/Pikachu_(Electric)
|
110
|
+
*/
|
111
|
+
|
112
|
+
while (i < link_end) {
|
113
|
+
if (data[i] == copen)
|
114
|
+
opening++;
|
115
|
+
else if (data[i] == cclose)
|
116
|
+
closing++;
|
117
|
+
|
118
|
+
i++;
|
119
|
+
}
|
120
|
+
|
121
|
+
if (closing != opening)
|
122
|
+
link_end--;
|
123
|
+
}
|
124
|
+
|
125
|
+
return link_end;
|
126
|
+
}
|
127
|
+
|
128
|
+
size_t
|
129
|
+
ups_autolink__www(size_t *rewind_p, struct buf *link, char *data, size_t offset, size_t size)
|
130
|
+
{
|
131
|
+
size_t link_end;
|
132
|
+
int np = 0;
|
133
|
+
|
134
|
+
if (offset > 0 && !ispunct(data[-1]) && !isspace(data[-1]))
|
135
|
+
return 0;
|
136
|
+
|
137
|
+
if (size < 4 || memcmp(data, "www.", STRLEN("www.")) != 0)
|
138
|
+
return 0;
|
139
|
+
|
140
|
+
link_end = 0;
|
141
|
+
while (link_end < size && !isspace(data[link_end])) {
|
142
|
+
if (data[link_end] == '.')
|
143
|
+
np++;
|
144
|
+
|
145
|
+
link_end++;
|
146
|
+
}
|
147
|
+
|
148
|
+
if (np < 2)
|
149
|
+
return 0;
|
150
|
+
|
151
|
+
link_end = autolink_delim(data, link_end, offset, size);
|
152
|
+
|
153
|
+
if (link_end == 0)
|
154
|
+
return 0;
|
155
|
+
|
156
|
+
bufput(link, data, link_end);
|
157
|
+
*rewind_p = 0;
|
158
|
+
|
159
|
+
return (int)link_end;
|
160
|
+
}
|
161
|
+
|
162
|
+
size_t
|
163
|
+
ups_autolink__email(size_t *rewind_p, struct buf *link, char *data, size_t offset, size_t size)
|
164
|
+
{
|
165
|
+
size_t link_end, rewind;
|
166
|
+
int nb = 0, np = 0;
|
167
|
+
|
168
|
+
for (rewind = 0; rewind < offset; ++rewind) {
|
169
|
+
char c = data[-rewind - 1];
|
170
|
+
|
171
|
+
if (isalnum(c))
|
172
|
+
continue;
|
173
|
+
|
174
|
+
if (strchr(".+-_", c) != NULL)
|
175
|
+
continue;
|
176
|
+
|
177
|
+
break;
|
178
|
+
}
|
179
|
+
|
180
|
+
if (rewind == 0)
|
181
|
+
return 0;
|
182
|
+
|
183
|
+
for (link_end = 0; link_end < size; ++link_end) {
|
184
|
+
char c = data[link_end];
|
185
|
+
|
186
|
+
if (isalnum(c))
|
187
|
+
continue;
|
188
|
+
|
189
|
+
if (c == '@')
|
190
|
+
nb++;
|
191
|
+
else if (c == '.' && link_end < size - 1)
|
192
|
+
np++;
|
193
|
+
else if (c != '-' && c != '_')
|
194
|
+
break;
|
195
|
+
}
|
196
|
+
|
197
|
+
if (link_end < 2 || nb != 1 || np == 0)
|
198
|
+
return 0;
|
199
|
+
|
200
|
+
link_end = autolink_delim(data, link_end, offset, size);
|
201
|
+
|
202
|
+
if (link_end == 0)
|
203
|
+
return 0;
|
204
|
+
|
205
|
+
bufput(link, data - rewind, link_end + rewind);
|
206
|
+
*rewind_p = rewind;
|
207
|
+
|
208
|
+
return link_end;
|
209
|
+
}
|
210
|
+
|
211
|
+
size_t
|
212
|
+
ups_autolink__url(size_t *rewind_p, struct buf *link, char *data, size_t offset, size_t size)
|
213
|
+
{
|
214
|
+
size_t link_end, rewind = 0;
|
215
|
+
|
216
|
+
if (size < 4 || data[1] != '/' || data[2] != '/')
|
217
|
+
return 0;
|
218
|
+
|
219
|
+
while (rewind < offset && isalpha(data[-rewind - 1]))
|
220
|
+
rewind++;
|
221
|
+
|
222
|
+
if (!is_safe_link(data - rewind, size + rewind))
|
223
|
+
return 0;
|
224
|
+
|
225
|
+
link_end = 0;
|
226
|
+
while (link_end < size && !isspace(data[link_end]))
|
227
|
+
link_end++;
|
228
|
+
|
229
|
+
link_end = autolink_delim(data, link_end, offset, size);
|
230
|
+
|
231
|
+
if (link_end == 0)
|
232
|
+
return 0;
|
233
|
+
|
234
|
+
bufput(link, data - rewind, link_end + rewind);
|
235
|
+
*rewind_p = rewind;
|
236
|
+
|
237
|
+
return link_end;
|
238
|
+
}
|
239
|
+
|
@@ -0,0 +1,39 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2011, Vicent Marti
|
3
|
+
*
|
4
|
+
* Permission to use, copy, modify, and distribute this software for any
|
5
|
+
* purpose with or without fee is hereby granted, provided that the above
|
6
|
+
* copyright notice and this permission notice appear in all copies.
|
7
|
+
*
|
8
|
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
9
|
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
10
|
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
11
|
+
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
12
|
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
13
|
+
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
14
|
+
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
15
|
+
*/
|
16
|
+
|
17
|
+
#ifndef UPSKIRT_AUTOLINK_H
|
18
|
+
#define UPSKIRT_AUTOLINK_H_H
|
19
|
+
|
20
|
+
#include "buffer.h"
|
21
|
+
|
22
|
+
typedef enum {
|
23
|
+
AUTOLINK_URLS = (1 << 0),
|
24
|
+
AUTOLINK_EMAILS = (1 << 1),
|
25
|
+
AUTOLINK_ALL = AUTOLINK_URLS|AUTOLINK_EMAILS
|
26
|
+
} autolink_mode;
|
27
|
+
|
28
|
+
extern size_t
|
29
|
+
ups_autolink__www(size_t *rewind_p, struct buf *link, char *data, size_t offset, size_t size);
|
30
|
+
|
31
|
+
extern size_t
|
32
|
+
ups_autolink__email(size_t *rewind_p, struct buf *link, char *data, size_t offset, size_t size);
|
33
|
+
|
34
|
+
extern size_t
|
35
|
+
ups_autolink__url(size_t *rewind_p, struct buf *link, char *data, size_t offset, size_t size);
|
36
|
+
|
37
|
+
#endif
|
38
|
+
|
39
|
+
/* vim: set filetype=c: */
|