twitter-text 1.1.8 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +1 -1
- data/lib/autolink.rb +54 -18
- data/lib/extractor.rb +12 -10
- data/lib/hithighlighter.rb +2 -4
- data/lib/regex.rb +11 -4
- data/lib/validation.rb +1 -1
- data/spec/autolinking_spec.rb +42 -3
- data/spec/hithighlighter_spec.rb +5 -1
- metadata +12 -5
data/Rakefile
CHANGED
@@ -11,7 +11,7 @@ require 'digest'
|
|
11
11
|
|
12
12
|
spec = Gem::Specification.new do |s|
|
13
13
|
s.name = "twitter-text"
|
14
|
-
s.version = "1.
|
14
|
+
s.version = "1.2.0"
|
15
15
|
s.authors = ["Matt Sanford", "Patrick Ewing", "Ben Cherry", "Britt Selvitelle", "Raffi Krikorian"]
|
16
16
|
s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com", "bcherry@gmail.com", "bs@brittspace.com", "raffi@twitter.com"]
|
17
17
|
s.homepage = "http://twitter.com"
|
data/lib/autolink.rb
CHANGED
@@ -2,11 +2,9 @@
|
|
2
2
|
module Twitter
|
3
3
|
# A module for including Tweet auto-linking in a class. The primary use of this is for helpers/views so they can auto-link
|
4
4
|
# usernames, lists, hashtags and URLs.
|
5
|
-
module Autolink
|
5
|
+
module Autolink extend self
|
6
6
|
include ActionView::Helpers::TagHelper #tag_options needed by auto_link
|
7
7
|
|
8
|
-
WWW_REGEX = /www\./i #:nodoc:
|
9
|
-
|
10
8
|
# Default CSS class for auto-linked URLs
|
11
9
|
DEFAULT_URL_CLASS = "tweet-url"
|
12
10
|
# Default CSS class for auto-linked lists (along with the url class)
|
@@ -18,6 +16,20 @@ module Twitter
|
|
18
16
|
# HTML attribute for robot nofollow behavior (default)
|
19
17
|
HTML_ATTR_NO_FOLLOW = " rel=\"nofollow\""
|
20
18
|
|
19
|
+
HTML_ENTITIES = {
|
20
|
+
'&' => '&',
|
21
|
+
'>' => '>',
|
22
|
+
'<' => '<',
|
23
|
+
'"' => '"',
|
24
|
+
"'" => '''
|
25
|
+
}
|
26
|
+
|
27
|
+
def encode(text)
|
28
|
+
text && text.gsub(/[&"'><]/) do |character|
|
29
|
+
HTML_ENTITIES[character]
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
21
33
|
# Add <tt><a></a></tt> tags around the usernames, lists, hashtags and URLs in the provided <tt>text</tt>. The
|
22
34
|
# <tt><a></tt> tags can be controlled with the following entries in the <tt>options</tt>
|
23
35
|
# hash:
|
@@ -59,19 +71,39 @@ module Twitter
|
|
59
71
|
options[:list_url_base] ||= "http://twitter.com/"
|
60
72
|
extra_html = HTML_ATTR_NO_FOLLOW unless options[:suppress_no_follow]
|
61
73
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
74
|
+
new_text = ""
|
75
|
+
|
76
|
+
# this -1 flag allows strings ending in ">" to work
|
77
|
+
text.split(/[<>]/, -1).each_with_index do |chunk, index|
|
78
|
+
if index != 0
|
79
|
+
new_text << ((index % 2 == 0) ? ">" : "<")
|
80
|
+
end
|
81
|
+
|
82
|
+
if index % 4 != 0
|
83
|
+
new_text << chunk
|
68
84
|
else
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
85
|
+
new_text << chunk.gsub(Twitter::Regex[:auto_link_usernames_or_lists]) do
|
86
|
+
before, at, user, slash_listname, after = $1, $2, $3, $4, $5
|
87
|
+
if slash_listname && !options[:suppress_lists]
|
88
|
+
# the link is a list
|
89
|
+
chunk = list = "#{user}#{slash_listname}"
|
90
|
+
chunk = yield(list) if block_given?
|
91
|
+
"#{before}#{at}<a class=\"#{options[:url_class]} #{options[:list_class]}\" href=\"#{encode(options[:list_url_base])}#{encode(list.downcase)}\"#{extra_html}>#{encode(chunk)}</a>#{after}"
|
92
|
+
else
|
93
|
+
if after =~ Twitter::Regex[:end_screen_name_match]
|
94
|
+
# Followed by something that means we don't autolink
|
95
|
+
"#{before}#{at}#{user}#{slash_listname}#{after}"
|
96
|
+
else
|
97
|
+
# this is a screen name
|
98
|
+
chunk = user
|
99
|
+
chunk = yield(chunk) if block_given?
|
100
|
+
"#{before}#{at}<a class=\"#{options[:url_class]} #{options[:username_class]}\" href=\"#{encode(options[:username_url_base])}#{encode(chunk)}\"#{extra_html}>#{encode(chunk)}</a>#{after}"
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
73
104
|
end
|
74
105
|
end
|
106
|
+
new_text
|
75
107
|
end
|
76
108
|
|
77
109
|
# Add <tt><a></a></tt> tags around the hashtags in the provided <tt>text</tt>. The
|
@@ -94,7 +126,7 @@ module Twitter
|
|
94
126
|
hash = $2
|
95
127
|
text = $3
|
96
128
|
text = yield(text) if block_given?
|
97
|
-
"#{before}<a href=\"#{options[:hashtag_url_base]}#{text}\" title=\"##{text}\" class=\"#{options[:url_class]} #{options[:hashtag_class]}\"#{extra_html}>#{hash}#{text}</a>"
|
129
|
+
"#{before}<a href=\"#{options[:hashtag_url_base]}#{encode(text)}\" title=\"##{encode(text)}\" class=\"#{options[:url_class]} #{options[:hashtag_class]}\"#{extra_html}>#{encode(hash)}#{encode(text)}</a>"
|
98
130
|
end
|
99
131
|
end
|
100
132
|
|
@@ -107,10 +139,14 @@ module Twitter
|
|
107
139
|
options[:rel] = "nofollow" unless options.delete(:suppress_no_follow)
|
108
140
|
|
109
141
|
text.gsub(Twitter::Regex[:valid_url]) do
|
110
|
-
all, before, url, protocol = $1, $2, $3, $4
|
111
|
-
|
112
|
-
|
113
|
-
|
142
|
+
all, before, url, protocol, domain, path, query_string = $1, $2, $3, $4, $5, $6, $7
|
143
|
+
if !protocol.blank? || domain =~ Twitter::Regex[:probable_tld]
|
144
|
+
html_attrs = tag_options(options.stringify_keys) || ""
|
145
|
+
full_url = ((protocol =~ Twitter::Regex[:www] || protocol.blank?) ? "http://#{url}" : url)
|
146
|
+
"#{before}<a href=\"#{encode(full_url)}\"#{html_attrs}>#{encode(url)}</a>"
|
147
|
+
else
|
148
|
+
all
|
149
|
+
end
|
114
150
|
end
|
115
151
|
end
|
116
152
|
|
data/lib/extractor.rb
CHANGED
@@ -39,7 +39,7 @@ end
|
|
39
39
|
module Twitter
|
40
40
|
# A module for including Tweet parsing in a class. This module provides function for the extraction and processing
|
41
41
|
# of usernames, lists, URLs and hashtags.
|
42
|
-
module Extractor
|
42
|
+
module Extractor extend self
|
43
43
|
|
44
44
|
# Extracts a list of all usernames mentioned in the Tweet <tt>text</tt>. If the
|
45
45
|
# <tt>text</tt> is <tt>nil</tt> or contains no username mentions an empty array
|
@@ -65,7 +65,7 @@ module Twitter
|
|
65
65
|
possible_screen_names = []
|
66
66
|
position = 0
|
67
67
|
text.to_s.scan(Twitter::Regex[:extract_mentions]) do |before, sn, after|
|
68
|
-
unless after =~ Twitter::Regex[:
|
68
|
+
unless after =~ Twitter::Regex[:end_screen_name_match]
|
69
69
|
start_position = text.to_s.sub_string_search(sn, position) - 1
|
70
70
|
position = start_position + sn.char_length + 1
|
71
71
|
possible_screen_names << {
|
@@ -117,13 +117,15 @@ module Twitter
|
|
117
117
|
urls = []
|
118
118
|
position = 0
|
119
119
|
text.to_s.scan(Twitter::Regex[:valid_url]) do |all, before, url, protocol, domain, path, query|
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
120
|
+
if !protocol.blank? || domain =~ Twitter::Regex[:probable_tld]
|
121
|
+
start_position = text.to_s.sub_string_search(url, position)
|
122
|
+
end_position = start_position + url.char_length
|
123
|
+
position = end_position
|
124
|
+
urls << {
|
125
|
+
:url => ((protocol =~ Twitter::Regex[:www] || protocol.blank?) ? "http://#{url}" : url),
|
126
|
+
:indices => [start_position, end_position]
|
127
|
+
}
|
128
|
+
end
|
127
129
|
end
|
128
130
|
urls.each{|url| yield url[:url], url[:indices].first, url[:indices].last } if block_given?
|
129
131
|
urls
|
@@ -153,7 +155,7 @@ module Twitter
|
|
153
155
|
tags = []
|
154
156
|
position = 0
|
155
157
|
text.scan(Twitter::Regex[:auto_link_hashtags]) do |before, hash, hash_text|
|
156
|
-
start_position = text.to_s.sub_string_search(hash, position)
|
158
|
+
start_position = text.to_s.sub_string_search(hash + hash_text, position)
|
157
159
|
position = start_position + hash_text.char_length + 1
|
158
160
|
tags << {
|
159
161
|
:hashtag => hash_text,
|
data/lib/hithighlighter.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
module Twitter
|
3
3
|
# Module for doing "hit highlighting" on tweets that have been auto-linked already.
|
4
4
|
# Useful with the results returned from the Search API.
|
5
|
-
module HitHighlighter
|
5
|
+
module HitHighlighter extend self
|
6
6
|
# Default Tag used for hit highlighting
|
7
7
|
DEFAULT_HIGHLIGHT_TAG = "em"
|
8
8
|
|
@@ -22,9 +22,7 @@ module Twitter
|
|
22
22
|
tag_name = options[:tag] || DEFAULT_HIGHLIGHT_TAG
|
23
23
|
tags = ["<" + tag_name + ">", "</" + tag_name + ">"]
|
24
24
|
|
25
|
-
chunks = text.split(
|
26
|
-
item.blank? ? item : item.split(">")
|
27
|
-
end.flatten
|
25
|
+
chunks = text.split(/[<>]/)
|
28
26
|
|
29
27
|
result = ""
|
30
28
|
chunk_index, chunk = 0, chunks[0]
|
data/lib/regex.rb
CHANGED
@@ -43,16 +43,23 @@ module Twitter
|
|
43
43
|
LATIN_ACCENTS = [(0xc0..0xd6).to_a, (0xd8..0xf6).to_a, (0xf8..0xff).to_a].flatten.pack('U*').freeze
|
44
44
|
REGEXEN[:latin_accents] = /[#{LATIN_ACCENTS}]+/o
|
45
45
|
|
46
|
+
REGEXEN[:end_screen_name_match] = /#{REGEXEN[:at_signs]}|#{REGEXEN[:latin_accents]}/o
|
47
|
+
|
46
48
|
# Characters considered valid in a hashtag but not at the beginning, where only a-z and 0-9 are valid.
|
47
49
|
HASHTAG_CHARACTERS = /[a-z0-9_#{LATIN_ACCENTS}]/io
|
48
|
-
REGEXEN[:auto_link_hashtags] = /(^|[^0-9A-Z
|
49
|
-
REGEXEN[:auto_link_usernames_or_lists] = /([^a-zA-Z0-9_]
|
50
|
+
REGEXEN[:auto_link_hashtags] = /(^|[^0-9A-Z&\/\?]+)(#|#)([0-9A-Z_]*[A-Z_]+#{HASHTAG_CHARACTERS}*)/io
|
51
|
+
REGEXEN[:auto_link_usernames_or_lists] = /([^a-zA-Z0-9_]|^|RT:?)([@@]+)([a-zA-Z0-9_]{1,20})(\/[a-zA-Z][a-zA-Z0-9_\-]{0,24})?($|.)/o
|
50
52
|
REGEXEN[:auto_link_emoticon] = /(8\-\#|8\-E|\+\-\(|\`\@|\`O|\<\|:~\(|\}:o\{|:\-\[|\>o\<|X\-\/|\[:-\]\-I\-|\/\/\/\/Ö\\\\\\\\|\(\|:\|\/\)|∑:\*\)|\( \| \))/
|
51
53
|
|
52
54
|
# URL related hash regex collection
|
53
|
-
REGEXEN[:valid_preceding_chars] = /(?:[
|
55
|
+
REGEXEN[:valid_preceding_chars] = /(?:[^-\/"':!=A-Z0-9_]|^|\:)/i
|
54
56
|
REGEXEN[:valid_domain] = /(?:[^[:punct:]\s][\.-](?=[^[:punct:]\s])|[^[:punct:]\s]){1,}\.[a-z]{2,}(?::[0-9]+)?/i
|
55
57
|
|
58
|
+
# For protocol-less URLs, we'll accept them if they end in one of a handful of likely TLDs
|
59
|
+
REGEXEN[:probable_tld] = /\.(?:com|net|org|gov|edu)$/i
|
60
|
+
|
61
|
+
REGEXEN[:www] = /www\./i
|
62
|
+
|
56
63
|
REGEXEN[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\$\/%#\[\]\-_,~]/i
|
57
64
|
# Allow URL paths to contain balanced parens
|
58
65
|
# 1. Used in Wikipedia URLs like /Primer_(film)
|
@@ -73,7 +80,7 @@ module Twitter
|
|
73
80
|
( # $1 total match
|
74
81
|
(#{REGEXEN[:valid_preceding_chars]}) # $2 Preceeding chracter
|
75
82
|
( # $3 URL
|
76
|
-
(https?:\/\/|www\.)
|
83
|
+
((?:https?:\/\/|www\.)?) # $4 Protocol or beginning
|
77
84
|
(#{REGEXEN[:valid_domain]}) # $5 Domain(s) and optional post number
|
78
85
|
(/#{REGEXEN[:valid_url_path_chars]}*
|
79
86
|
#{REGEXEN[:valid_url_path_ending_chars]}?
|
data/lib/validation.rb
CHANGED
data/spec/autolinking_spec.rb
CHANGED
@@ -475,10 +475,30 @@ describe Twitter::Autolink do
|
|
475
475
|
end
|
476
476
|
|
477
477
|
context "with a @ in a URL" do
|
478
|
-
|
478
|
+
context "with XSS attack" do
|
479
|
+
def original_text; 'http://x.xx/@"style="color:pink"onmouseover=alert(1)//'; end
|
479
480
|
|
480
|
-
|
481
|
-
|
481
|
+
it "should not allow XSS follwing @" do
|
482
|
+
@autolinked_text.should have_autolinked_url('http://x.xx/')
|
483
|
+
end
|
484
|
+
end
|
485
|
+
|
486
|
+
context "with a username not followed by a /" do
|
487
|
+
def original_text; 'http://example.com/@foobar'; end
|
488
|
+
|
489
|
+
it "should link small url and username" do
|
490
|
+
@autolinked_text.should have_autolinked_url('http://example.com/')
|
491
|
+
@autolinked_text.should link_to_screen_name('foobar')
|
492
|
+
end
|
493
|
+
end
|
494
|
+
|
495
|
+
context "with a username followed by a /" do
|
496
|
+
def original_text; 'http://example.com/@foobar/'; end
|
497
|
+
|
498
|
+
it "should not link the username but link full url" do
|
499
|
+
@autolinked_text.should have_autolinked_url('http://example.com/@foobar/')
|
500
|
+
@autolinked_text.should_not link_to_screen_name('foobar')
|
501
|
+
end
|
482
502
|
end
|
483
503
|
end
|
484
504
|
|
@@ -498,4 +518,23 @@ describe Twitter::Autolink do
|
|
498
518
|
|
499
519
|
end
|
500
520
|
|
521
|
+
describe "encode" do
|
522
|
+
before do
|
523
|
+
@linker = TestAutolink.new
|
524
|
+
end
|
525
|
+
it "should escape html entities properly" do
|
526
|
+
@linker.encode("&").should == "&"
|
527
|
+
@linker.encode(">").should == ">"
|
528
|
+
@linker.encode("<").should == "<"
|
529
|
+
@linker.encode("\"").should == """
|
530
|
+
@linker.encode("'").should == "'"
|
531
|
+
@linker.encode("&<>\"").should == "&<>""
|
532
|
+
@linker.encode("<div>").should == "<div>"
|
533
|
+
@linker.encode("a&b").should == "a&b"
|
534
|
+
@linker.encode("<a href=\"http://twitter.com\" target=\"_blank\">twitter & friends</a>").should == "<a href="http://twitter.com" target="_blank">twitter & friends</a>"
|
535
|
+
@linker.encode("&").should == "&amp;"
|
536
|
+
@linker.encode(nil).should == nil
|
537
|
+
end
|
538
|
+
end
|
539
|
+
|
501
540
|
end
|
data/spec/hithighlighter_spec.rb
CHANGED
@@ -76,11 +76,15 @@ describe Twitter::HitHighlighter do
|
|
76
76
|
it "should highlight around a link" do
|
77
77
|
@highlighter.hit_highlight("test <a>test</a> test", [[3, 11]]).should == "tes<em>t <a>test</a> t</em>est"
|
78
78
|
end
|
79
|
-
|
79
|
+
|
80
80
|
it "should fail gracefully with bad hits" do
|
81
81
|
@highlighter.hit_highlight("test test", [[5, 20]]).should == "test <em>test</em>"
|
82
82
|
end
|
83
83
|
|
84
|
+
it "should not mess up with touching tags" do
|
85
|
+
@highlighter.hit_highlight("<a>foo</a><a>foo</a>", [[3,6]]).should == "<a>foo</a><a><em>foo</em></a>"
|
86
|
+
end
|
87
|
+
|
84
88
|
end
|
85
89
|
|
86
90
|
end
|
metadata
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitter-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
+
hash: 31
|
4
5
|
prerelease: false
|
5
6
|
segments:
|
6
7
|
- 1
|
7
|
-
-
|
8
|
-
-
|
9
|
-
version: 1.
|
8
|
+
- 2
|
9
|
+
- 0
|
10
|
+
version: 1.2.0
|
10
11
|
platform: ruby
|
11
12
|
authors:
|
12
13
|
- Matt Sanford
|
@@ -18,16 +19,18 @@ autorequire: ""
|
|
18
19
|
bindir: bin
|
19
20
|
cert_chain: []
|
20
21
|
|
21
|
-
date: 2010-
|
22
|
+
date: 2010-10-05 00:00:00 -07:00
|
22
23
|
default_executable:
|
23
24
|
dependencies:
|
24
25
|
- !ruby/object:Gem::Dependency
|
25
26
|
name: actionpack
|
26
27
|
prerelease: false
|
27
28
|
requirement: &id001 !ruby/object:Gem::Requirement
|
29
|
+
none: false
|
28
30
|
requirements:
|
29
31
|
- - ">="
|
30
32
|
- !ruby/object:Gem::Version
|
33
|
+
hash: 3
|
31
34
|
segments:
|
32
35
|
- 0
|
33
36
|
version: "0"
|
@@ -76,23 +79,27 @@ rdoc_options: []
|
|
76
79
|
require_paths:
|
77
80
|
- lib
|
78
81
|
required_ruby_version: !ruby/object:Gem::Requirement
|
82
|
+
none: false
|
79
83
|
requirements:
|
80
84
|
- - ">="
|
81
85
|
- !ruby/object:Gem::Version
|
86
|
+
hash: 3
|
82
87
|
segments:
|
83
88
|
- 0
|
84
89
|
version: "0"
|
85
90
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
91
|
+
none: false
|
86
92
|
requirements:
|
87
93
|
- - ">="
|
88
94
|
- !ruby/object:Gem::Version
|
95
|
+
hash: 3
|
89
96
|
segments:
|
90
97
|
- 0
|
91
98
|
version: "0"
|
92
99
|
requirements: []
|
93
100
|
|
94
101
|
rubyforge_project:
|
95
|
-
rubygems_version: 1.3.
|
102
|
+
rubygems_version: 1.3.7
|
96
103
|
signing_key:
|
97
104
|
specification_version: 3
|
98
105
|
summary: Twitter text handling library
|