rack-linkify 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG ADDED
@@ -0,0 +1,2 @@
1
+ 0.0.0 (December 3, 2009)
2
+ * Initial release.
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License
2
+
3
+ Copyright (c) 2009 Wyatt M. Greene
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,20 @@
1
+ = Rack::Linkify
2
+
3
+ == Description
4
+
5
+ Dead links (like this one http://you.cant.click.this.com/) are magically
6
+ transformed into live links! Rack::Linkify is very generous in what it
7
+ considers a link; it will turn http://www.google.com, www.google.com, and
8
+ just plain google.com into a link.
9
+
10
+ Rack::Linkify can also turn text that looks like @this into a twitter link.
11
+
12
+ == Usage
13
+
14
+ URLs only:
15
+
16
+ use Rack::Linkify
17
+
18
+ Enable Twitter links, too:
19
+
20
+ use Rack::Linkify, :twitter => true
data/Rakefile ADDED
@@ -0,0 +1,32 @@
1
+ require 'rake'
2
+ require 'rubygems'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |s|
7
+ s.name = "rack-linkify"
8
+ s.version = "0.0.0"
9
+ s.add_dependency 'rack-plastic', '>= 0.0.3'
10
+ s.author = "Wyatt Greene"
11
+ s.email = "techiferous@gmail.com"
12
+ s.summary = "Rack middleware that adds anchor tags to URLs in text."
13
+ s.description = %Q{
14
+ Any URLs that occur in the text of the web page are automatically surrounded
15
+ by an anchor tag.
16
+ }
17
+ s.require_path = "lib"
18
+ s.files = []
19
+ s.files << "README.rdoc"
20
+ s.files << "LICENSE"
21
+ s.files << "CHANGELOG"
22
+ s.files << "Rakefile"
23
+ s.files += Dir.glob("lib/**/*")
24
+ s.files += Dir.glob("test/**/*")
25
+ s.homepage = "http://github.com/techiferous/rack-linkify"
26
+ s.requirements << "none"
27
+ s.has_rdoc = false
28
+ end
29
+ Jeweler::GemcutterTasks.new
30
+ rescue LoadError
31
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
32
+ end
@@ -0,0 +1,74 @@
1
+ require 'rack-plastic'
2
+
3
+ module Rack
4
+ class Linkify < Plastic
5
+
6
+ def change_nokogiri_doc(doc)
7
+ find_candidate_links(doc)
8
+ doc
9
+ end
10
+
11
+ def change_html_string(html)
12
+ linkify(html)
13
+ end
14
+
15
+ private
16
+
17
+ def find_candidate_links(doc)
18
+ doc.at_css("body").traverse do |node|
19
+ if node.text?
20
+ update_text(node, mark_links(node.content))
21
+ end
22
+ end
23
+ end
24
+
25
+ def linkify(html)
26
+ html.gsub!('beginninganchor1', '<a href="')
27
+ html.gsub!('beginninganchor2', '">')
28
+ html.gsub!('endinganchor', '</a>')
29
+ # if an href URL doesn't start with http://, let's add it:
30
+ html.gsub!(/href="((?!http)\S+)/, 'href="http://\1')
31
+ html
32
+ end
33
+
34
+ def mark_links(text)
35
+
36
+ new_text = text
37
+
38
+ # A pattern-matching algorithm that would correctly detect URLs 100% of the time
39
+ # would be prohibitively complex. For example, if a URL in a sentence is followed
40
+ # by a comma, like http://www.google.com, we would want to match the URL but
41
+ # skip the comma. However, commas are allowed in URLs. So there are a lot
42
+ # of edge cases that make a complete solution very complex.
43
+ #
44
+ # The following strategy has the benefits of being relatively straightforword
45
+ # to implement as well as having high accuracy. Text is scanned for top-level
46
+ # domains, and if one is found it is assumed to be a URL.
47
+
48
+ common_gtlds = "com|net|org|edu|gov|info|mil|name|mobi|biz"
49
+
50
+ new_text.gsub!(/\b
51
+ (\S+\.(#{common_gtlds}|[a-z]{2})\S*) # match words that contain common
52
+ # top-level domains or country codes
53
+ #
54
+ (\.|\?|!|:|,\))* # if the URL ends in punctuation,
55
+ # assume the punction is grammatical
56
+ # and is not part of the URL
57
+ \b/x,
58
+ # We mark the text with phrases like "beginninganchor1". That's because it's
59
+ # much easier to replace these strings later with anchor tags rather than work within
60
+ # Nokogiri's document structure to add a new node in the middle of the text.
61
+ 'beginninganchor1\0beginninganchor2\0\3endinganchor')
62
+
63
+ # text that looks like @foo can become a twitter link
64
+ if options[:twitter]
65
+ new_text.gsub!(/(^|\s)(@(\w+))(\.|\?|!|:|,\))*\b/,
66
+ '\1beginninganchor1http://twitter.com/\3beginninganchor2\2endinganchor')
67
+ end
68
+
69
+ new_text
70
+
71
+ end
72
+
73
+ end
74
+ end
@@ -0,0 +1,69 @@
1
+ class App
2
+
3
+ def call(env)
4
+ response = Rack::Response.new
5
+ response['Content-Type'] = 'text/html'
6
+ response.write front_page
7
+ response.finish
8
+ end
9
+
10
+ def front_page
11
+ %Q{
12
+ <!DOCTYPE html
13
+ PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
14
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
15
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
16
+ <head>
17
+ <title>Testing Rack::Linkify</title>
18
+ </head>
19
+ <body>
20
+ <div id="container">
21
+ <h1>Testing Rack::Linkify</h1>
22
+ <p>
23
+ This is a test of links in free-flowing text. <br />
24
+ Test a typical URL http://www.google.com <br />
25
+ Test a URL followed by a period http://www.google.com. <br />
26
+ Test a URL without the http www.google.com <br />
27
+ Test a URL without http and ending in a period www.google.com. <br />
28
+ Test a URL without www and followed by a comma google.com, <br />
29
+ Test a URL followed by a bang google.com! <br />
30
+ Test a URL followed by a hook google.com? <br />
31
+ Test a URL followed by an interrobang google.com?! <br />
32
+ Test another URL coderack.org <br />
33
+ Test a URL with a path google.com/foobar <br />
34
+ Test a longer URL google.com/foobar/index.html. <br />
35
+ Test a URL followed by a parenthesis google.com) <br />
36
+ Test a URL followed by a parenthesis and comma google.com), <br />
37
+ Test atypical gTLDs http://www.something.info, and http://del.icio.us <br />
38
+ Test more atypical gTLDs http://bit.ly/n0og http://www.wikio.co.uk <br />
39
+ </p>
40
+ <p>
41
+ Here are some more links:
42
+ <pre>
43
+ http://www.google.com
44
+ http://www.google.co.uk
45
+ www.google.com
46
+ google.com
47
+ http://localhost:3000/houses
48
+ http://oreilly.com/ruby/excerpts/ruby-learning-rails/ruby-guide-regular-expressions.html
49
+ http://www.regular-expressions.info/freespacing.html
50
+ http://www.perlmonks.org/?node_id=518444
51
+ localhost:3000/houses
52
+ oreilly.com/ruby/excerpts/ruby-learning-rails/ruby-guide-regular-expressions.html
53
+ www.regular-expressions.info/freespacing.html
54
+ www.perlmonks.org/?node_id=518444
55
+ http://maps.google.com/maps?f=q&source=s_q&hl=en&geocode=&q=375+Harvard+St,+Cambridge,+MA+02138&sll=42.398774,-71.117184&sspn=0.008097,0.019205&ie=UTF8&ll=42.372781,-71.112099&spn=0.008101,0.019205&z=16&iwloc=A
56
+ </pre>
57
+ <p>
58
+ here is a potential twitter address @techiferous <br />
59
+ here it is with punctuation @techiferous. <br />
60
+ will this work? @coderack?!?! <br />
61
+ and how about this? @techiferous) <br />
62
+ </p>
63
+ </div>
64
+ </body>
65
+ </html>
66
+ }
67
+ end
68
+
69
+ end
@@ -0,0 +1,6 @@
1
+ require 'app'
2
+ require File.join(File.dirname(__FILE__), '..', '..', 'lib', 'rack-linkify')
3
+
4
+ use Rack::Linkify, :twitter => true
5
+
6
+ run App.new
metadata ADDED
@@ -0,0 +1,71 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rack-linkify
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Wyatt Greene
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-12-03 00:00:00 -05:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: rack-plastic
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.3
24
+ version:
25
+ description: "\n Any URLs that occur in the text of the web page are automatically surrounded\n by an anchor tag.\n "
26
+ email: techiferous@gmail.com
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files:
32
+ - LICENSE
33
+ - README.rdoc
34
+ files:
35
+ - CHANGELOG
36
+ - LICENSE
37
+ - README.rdoc
38
+ - Rakefile
39
+ - lib/rack-linkify.rb
40
+ - test/rackapp/app.rb
41
+ - test/rackapp/config.ru
42
+ has_rdoc: true
43
+ homepage: http://github.com/techiferous/rack-linkify
44
+ licenses: []
45
+
46
+ post_install_message:
47
+ rdoc_options:
48
+ - --charset=UTF-8
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: "0"
56
+ version:
57
+ required_rubygems_version: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: "0"
62
+ version:
63
+ requirements:
64
+ - none
65
+ rubyforge_project:
66
+ rubygems_version: 1.3.5
67
+ signing_key:
68
+ specification_version: 3
69
+ summary: Rack middleware that adds anchor tags to URLs in text.
70
+ test_files: []
71
+