rack-linkify 0.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +2 -0
- data/LICENSE +21 -0
- data/README.rdoc +20 -0
- data/Rakefile +32 -0
- data/lib/rack-linkify.rb +74 -0
- data/test/rackapp/app.rb +69 -0
- data/test/rackapp/config.ru +6 -0
- metadata +71 -0
data/CHANGELOG
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2009 Wyatt M. Greene
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
= Rack::Linkify
|
2
|
+
|
3
|
+
== Description
|
4
|
+
|
5
|
+
Dead links (like this one http://you.cant.click.this.com/) are magically
|
6
|
+
transformed into live links! Rack::Linkify is very generous in what it
|
7
|
+
considers a link; it will turn http://www.google.com, www.google.com, and
|
8
|
+
just plain google.com into a link.
|
9
|
+
|
10
|
+
Rack::Linkify can also turn text that looks like @this into a twitter link.
|
11
|
+
|
12
|
+
== Usage
|
13
|
+
|
14
|
+
URLs only:
|
15
|
+
|
16
|
+
use Rack::Linkify
|
17
|
+
|
18
|
+
Enable Twitter links, too:
|
19
|
+
|
20
|
+
use Rack::Linkify, :twitter => true
|
data/Rakefile
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rubygems'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |s|
|
7
|
+
s.name = "rack-linkify"
|
8
|
+
s.version = "0.0.0"
|
9
|
+
s.add_dependency 'rack-plastic', '>= 0.0.3'
|
10
|
+
s.author = "Wyatt Greene"
|
11
|
+
s.email = "techiferous@gmail.com"
|
12
|
+
s.summary = "Rack middleware that adds anchor tags to URLs in text."
|
13
|
+
s.description = %Q{
|
14
|
+
Any URLs that occur in the text of the web page are automatically surrounded
|
15
|
+
by an anchor tag.
|
16
|
+
}
|
17
|
+
s.require_path = "lib"
|
18
|
+
s.files = []
|
19
|
+
s.files << "README.rdoc"
|
20
|
+
s.files << "LICENSE"
|
21
|
+
s.files << "CHANGELOG"
|
22
|
+
s.files << "Rakefile"
|
23
|
+
s.files += Dir.glob("lib/**/*")
|
24
|
+
s.files += Dir.glob("test/**/*")
|
25
|
+
s.homepage = "http://github.com/techiferous/rack-linkify"
|
26
|
+
s.requirements << "none"
|
27
|
+
s.has_rdoc = false
|
28
|
+
end
|
29
|
+
Jeweler::GemcutterTasks.new
|
30
|
+
rescue LoadError
|
31
|
+
puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
|
32
|
+
end
|
data/lib/rack-linkify.rb
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
require 'rack-plastic'
|
2
|
+
|
3
|
+
module Rack
|
4
|
+
class Linkify < Plastic
|
5
|
+
|
6
|
+
def change_nokogiri_doc(doc)
|
7
|
+
find_candidate_links(doc)
|
8
|
+
doc
|
9
|
+
end
|
10
|
+
|
11
|
+
def change_html_string(html)
|
12
|
+
linkify(html)
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def find_candidate_links(doc)
|
18
|
+
doc.at_css("body").traverse do |node|
|
19
|
+
if node.text?
|
20
|
+
update_text(node, mark_links(node.content))
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def linkify(html)
|
26
|
+
html.gsub!('beginninganchor1', '<a href="')
|
27
|
+
html.gsub!('beginninganchor2', '">')
|
28
|
+
html.gsub!('endinganchor', '</a>')
|
29
|
+
# if an href URL doesn't start with http://, let's add it:
|
30
|
+
html.gsub!(/href="((?!http)\S+)/, 'href="http://\1')
|
31
|
+
html
|
32
|
+
end
|
33
|
+
|
34
|
+
def mark_links(text)
|
35
|
+
|
36
|
+
new_text = text
|
37
|
+
|
38
|
+
# A pattern-matching algorithm that would correctly detect URLs 100% of the time
|
39
|
+
# would be prohibitively complex. For example, if a URL in a sentence is followed
|
40
|
+
# by a comma, like http://www.google.com, we would want to match the URL but
|
41
|
+
# skip the comma. However, commas are allowed in URLs. So there are a lot
|
42
|
+
# of edge cases that make a complete solution very complex.
|
43
|
+
#
|
44
|
+
# The following strategy has the benefits of being relatively straightforword
|
45
|
+
# to implement as well as having high accuracy. Text is scanned for top-level
|
46
|
+
# domains, and if one is found it is assumed to be a URL.
|
47
|
+
|
48
|
+
common_gtlds = "com|net|org|edu|gov|info|mil|name|mobi|biz"
|
49
|
+
|
50
|
+
new_text.gsub!(/\b
|
51
|
+
(\S+\.(#{common_gtlds}|[a-z]{2})\S*) # match words that contain common
|
52
|
+
# top-level domains or country codes
|
53
|
+
#
|
54
|
+
(\.|\?|!|:|,\))* # if the URL ends in punctuation,
|
55
|
+
# assume the punction is grammatical
|
56
|
+
# and is not part of the URL
|
57
|
+
\b/x,
|
58
|
+
# We mark the text with phrases like "beginninganchor1". That's because it's
|
59
|
+
# much easier to replace these strings later with anchor tags rather than work within
|
60
|
+
# Nokogiri's document structure to add a new node in the middle of the text.
|
61
|
+
'beginninganchor1\0beginninganchor2\0\3endinganchor')
|
62
|
+
|
63
|
+
# text that looks like @foo can become a twitter link
|
64
|
+
if options[:twitter]
|
65
|
+
new_text.gsub!(/(^|\s)(@(\w+))(\.|\?|!|:|,\))*\b/,
|
66
|
+
'\1beginninganchor1http://twitter.com/\3beginninganchor2\2endinganchor')
|
67
|
+
end
|
68
|
+
|
69
|
+
new_text
|
70
|
+
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
end
|
data/test/rackapp/app.rb
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
class App
|
2
|
+
|
3
|
+
def call(env)
|
4
|
+
response = Rack::Response.new
|
5
|
+
response['Content-Type'] = 'text/html'
|
6
|
+
response.write front_page
|
7
|
+
response.finish
|
8
|
+
end
|
9
|
+
|
10
|
+
def front_page
|
11
|
+
%Q{
|
12
|
+
<!DOCTYPE html
|
13
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
14
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
15
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
16
|
+
<head>
|
17
|
+
<title>Testing Rack::Linkify</title>
|
18
|
+
</head>
|
19
|
+
<body>
|
20
|
+
<div id="container">
|
21
|
+
<h1>Testing Rack::Linkify</h1>
|
22
|
+
<p>
|
23
|
+
This is a test of links in free-flowing text. <br />
|
24
|
+
Test a typical URL http://www.google.com <br />
|
25
|
+
Test a URL followed by a period http://www.google.com. <br />
|
26
|
+
Test a URL without the http www.google.com <br />
|
27
|
+
Test a URL without http and ending in a period www.google.com. <br />
|
28
|
+
Test a URL without www and followed by a comma google.com, <br />
|
29
|
+
Test a URL followed by a bang google.com! <br />
|
30
|
+
Test a URL followed by a hook google.com? <br />
|
31
|
+
Test a URL followed by an interrobang google.com?! <br />
|
32
|
+
Test another URL coderack.org <br />
|
33
|
+
Test a URL with a path google.com/foobar <br />
|
34
|
+
Test a longer URL google.com/foobar/index.html. <br />
|
35
|
+
Test a URL followed by a parenthesis google.com) <br />
|
36
|
+
Test a URL followed by a parenthesis and comma google.com), <br />
|
37
|
+
Test atypical gTLDs http://www.something.info, and http://del.icio.us <br />
|
38
|
+
Test more atypical gTLDs http://bit.ly/n0og http://www.wikio.co.uk <br />
|
39
|
+
</p>
|
40
|
+
<p>
|
41
|
+
Here are some more links:
|
42
|
+
<pre>
|
43
|
+
http://www.google.com
|
44
|
+
http://www.google.co.uk
|
45
|
+
www.google.com
|
46
|
+
google.com
|
47
|
+
http://localhost:3000/houses
|
48
|
+
http://oreilly.com/ruby/excerpts/ruby-learning-rails/ruby-guide-regular-expressions.html
|
49
|
+
http://www.regular-expressions.info/freespacing.html
|
50
|
+
http://www.perlmonks.org/?node_id=518444
|
51
|
+
localhost:3000/houses
|
52
|
+
oreilly.com/ruby/excerpts/ruby-learning-rails/ruby-guide-regular-expressions.html
|
53
|
+
www.regular-expressions.info/freespacing.html
|
54
|
+
www.perlmonks.org/?node_id=518444
|
55
|
+
http://maps.google.com/maps?f=q&source=s_q&hl=en&geocode=&q=375+Harvard+St,+Cambridge,+MA+02138&sll=42.398774,-71.117184&sspn=0.008097,0.019205&ie=UTF8&ll=42.372781,-71.112099&spn=0.008101,0.019205&z=16&iwloc=A
|
56
|
+
</pre>
|
57
|
+
<p>
|
58
|
+
here is a potential twitter address @techiferous <br />
|
59
|
+
here it is with punctuation @techiferous. <br />
|
60
|
+
will this work? @coderack?!?! <br />
|
61
|
+
and how about this? @techiferous) <br />
|
62
|
+
</p>
|
63
|
+
</div>
|
64
|
+
</body>
|
65
|
+
</html>
|
66
|
+
}
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
metadata
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rack-linkify
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Wyatt Greene
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-12-03 00:00:00 -05:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: rack-plastic
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.0.3
|
24
|
+
version:
|
25
|
+
description: "\n Any URLs that occur in the text of the web page are automatically surrounded\n by an anchor tag.\n "
|
26
|
+
email: techiferous@gmail.com
|
27
|
+
executables: []
|
28
|
+
|
29
|
+
extensions: []
|
30
|
+
|
31
|
+
extra_rdoc_files:
|
32
|
+
- LICENSE
|
33
|
+
- README.rdoc
|
34
|
+
files:
|
35
|
+
- CHANGELOG
|
36
|
+
- LICENSE
|
37
|
+
- README.rdoc
|
38
|
+
- Rakefile
|
39
|
+
- lib/rack-linkify.rb
|
40
|
+
- test/rackapp/app.rb
|
41
|
+
- test/rackapp/config.ru
|
42
|
+
has_rdoc: true
|
43
|
+
homepage: http://github.com/techiferous/rack-linkify
|
44
|
+
licenses: []
|
45
|
+
|
46
|
+
post_install_message:
|
47
|
+
rdoc_options:
|
48
|
+
- --charset=UTF-8
|
49
|
+
require_paths:
|
50
|
+
- lib
|
51
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: "0"
|
56
|
+
version:
|
57
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: "0"
|
62
|
+
version:
|
63
|
+
requirements:
|
64
|
+
- none
|
65
|
+
rubyforge_project:
|
66
|
+
rubygems_version: 1.3.5
|
67
|
+
signing_key:
|
68
|
+
specification_version: 3
|
69
|
+
summary: Rack middleware that adds anchor tags to URLs in text.
|
70
|
+
test_files: []
|
71
|
+
|