twitter-text 1.1.8 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +1 -1
- data/lib/autolink.rb +54 -18
- data/lib/extractor.rb +12 -10
- data/lib/hithighlighter.rb +2 -4
- data/lib/regex.rb +11 -4
- data/lib/validation.rb +1 -1
- data/spec/autolinking_spec.rb +42 -3
- data/spec/hithighlighter_spec.rb +5 -1
- metadata +12 -5
data/Rakefile
CHANGED
@@ -11,7 +11,7 @@ require 'digest'
|
|
11
11
|
|
12
12
|
spec = Gem::Specification.new do |s|
|
13
13
|
s.name = "twitter-text"
|
14
|
-
s.version = "1.
|
14
|
+
s.version = "1.2.0"
|
15
15
|
s.authors = ["Matt Sanford", "Patrick Ewing", "Ben Cherry", "Britt Selvitelle", "Raffi Krikorian"]
|
16
16
|
s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com", "bcherry@gmail.com", "bs@brittspace.com", "raffi@twitter.com"]
|
17
17
|
s.homepage = "http://twitter.com"
|
data/lib/autolink.rb
CHANGED
@@ -2,11 +2,9 @@
|
|
2
2
|
module Twitter
|
3
3
|
# A module for including Tweet auto-linking in a class. The primary use of this is for helpers/views so they can auto-link
|
4
4
|
# usernames, lists, hashtags and URLs.
|
5
|
-
module Autolink
|
5
|
+
module Autolink extend self
|
6
6
|
include ActionView::Helpers::TagHelper #tag_options needed by auto_link
|
7
7
|
|
8
|
-
WWW_REGEX = /www\./i #:nodoc:
|
9
|
-
|
10
8
|
# Default CSS class for auto-linked URLs
|
11
9
|
DEFAULT_URL_CLASS = "tweet-url"
|
12
10
|
# Default CSS class for auto-linked lists (along with the url class)
|
@@ -18,6 +16,20 @@ module Twitter
|
|
18
16
|
# HTML attribute for robot nofollow behavior (default)
|
19
17
|
HTML_ATTR_NO_FOLLOW = " rel=\"nofollow\""
|
20
18
|
|
19
|
+
HTML_ENTITIES = {
|
20
|
+
'&' => '&',
|
21
|
+
'>' => '>',
|
22
|
+
'<' => '<',
|
23
|
+
'"' => '"',
|
24
|
+
"'" => '''
|
25
|
+
}
|
26
|
+
|
27
|
+
def encode(text)
|
28
|
+
text && text.gsub(/[&"'><]/) do |character|
|
29
|
+
HTML_ENTITIES[character]
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
21
33
|
# Add <tt><a></a></tt> tags around the usernames, lists, hashtags and URLs in the provided <tt>text</tt>. The
|
22
34
|
# <tt><a></tt> tags can be controlled with the following entries in the <tt>options</tt>
|
23
35
|
# hash:
|
@@ -59,19 +71,39 @@ module Twitter
|
|
59
71
|
options[:list_url_base] ||= "http://twitter.com/"
|
60
72
|
extra_html = HTML_ATTR_NO_FOLLOW unless options[:suppress_no_follow]
|
61
73
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
74
|
+
new_text = ""
|
75
|
+
|
76
|
+
# this -1 flag allows strings ending in ">" to work
|
77
|
+
text.split(/[<>]/, -1).each_with_index do |chunk, index|
|
78
|
+
if index != 0
|
79
|
+
new_text << ((index % 2 == 0) ? ">" : "<")
|
80
|
+
end
|
81
|
+
|
82
|
+
if index % 4 != 0
|
83
|
+
new_text << chunk
|
68
84
|
else
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
85
|
+
new_text << chunk.gsub(Twitter::Regex[:auto_link_usernames_or_lists]) do
|
86
|
+
before, at, user, slash_listname, after = $1, $2, $3, $4, $5
|
87
|
+
if slash_listname && !options[:suppress_lists]
|
88
|
+
# the link is a list
|
89
|
+
chunk = list = "#{user}#{slash_listname}"
|
90
|
+
chunk = yield(list) if block_given?
|
91
|
+
"#{before}#{at}<a class=\"#{options[:url_class]} #{options[:list_class]}\" href=\"#{encode(options[:list_url_base])}#{encode(list.downcase)}\"#{extra_html}>#{encode(chunk)}</a>#{after}"
|
92
|
+
else
|
93
|
+
if after =~ Twitter::Regex[:end_screen_name_match]
|
94
|
+
# Followed by something that means we don't autolink
|
95
|
+
"#{before}#{at}#{user}#{slash_listname}#{after}"
|
96
|
+
else
|
97
|
+
# this is a screen name
|
98
|
+
chunk = user
|
99
|
+
chunk = yield(chunk) if block_given?
|
100
|
+
"#{before}#{at}<a class=\"#{options[:url_class]} #{options[:username_class]}\" href=\"#{encode(options[:username_url_base])}#{encode(chunk)}\"#{extra_html}>#{encode(chunk)}</a>#{after}"
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
73
104
|
end
|
74
105
|
end
|
106
|
+
new_text
|
75
107
|
end
|
76
108
|
|
77
109
|
# Add <tt><a></a></tt> tags around the hashtags in the provided <tt>text</tt>. The
|
@@ -94,7 +126,7 @@ module Twitter
|
|
94
126
|
hash = $2
|
95
127
|
text = $3
|
96
128
|
text = yield(text) if block_given?
|
97
|
-
"#{before}<a href=\"#{options[:hashtag_url_base]}#{text}\" title=\"##{text}\" class=\"#{options[:url_class]} #{options[:hashtag_class]}\"#{extra_html}>#{hash}#{text}</a>"
|
129
|
+
"#{before}<a href=\"#{options[:hashtag_url_base]}#{encode(text)}\" title=\"##{encode(text)}\" class=\"#{options[:url_class]} #{options[:hashtag_class]}\"#{extra_html}>#{encode(hash)}#{encode(text)}</a>"
|
98
130
|
end
|
99
131
|
end
|
100
132
|
|
@@ -107,10 +139,14 @@ module Twitter
|
|
107
139
|
options[:rel] = "nofollow" unless options.delete(:suppress_no_follow)
|
108
140
|
|
109
141
|
text.gsub(Twitter::Regex[:valid_url]) do
|
110
|
-
all, before, url, protocol = $1, $2, $3, $4
|
111
|
-
|
112
|
-
|
113
|
-
|
142
|
+
all, before, url, protocol, domain, path, query_string = $1, $2, $3, $4, $5, $6, $7
|
143
|
+
if !protocol.blank? || domain =~ Twitter::Regex[:probable_tld]
|
144
|
+
html_attrs = tag_options(options.stringify_keys) || ""
|
145
|
+
full_url = ((protocol =~ Twitter::Regex[:www] || protocol.blank?) ? "http://#{url}" : url)
|
146
|
+
"#{before}<a href=\"#{encode(full_url)}\"#{html_attrs}>#{encode(url)}</a>"
|
147
|
+
else
|
148
|
+
all
|
149
|
+
end
|
114
150
|
end
|
115
151
|
end
|
116
152
|
|
data/lib/extractor.rb
CHANGED
@@ -39,7 +39,7 @@ end
|
|
39
39
|
module Twitter
|
40
40
|
# A module for including Tweet parsing in a class. This module provides function for the extraction and processing
|
41
41
|
# of usernames, lists, URLs and hashtags.
|
42
|
-
module Extractor
|
42
|
+
module Extractor extend self
|
43
43
|
|
44
44
|
# Extracts a list of all usernames mentioned in the Tweet <tt>text</tt>. If the
|
45
45
|
# <tt>text</tt> is <tt>nil</tt> or contains no username mentions an empty array
|
@@ -65,7 +65,7 @@ module Twitter
|
|
65
65
|
possible_screen_names = []
|
66
66
|
position = 0
|
67
67
|
text.to_s.scan(Twitter::Regex[:extract_mentions]) do |before, sn, after|
|
68
|
-
unless after =~ Twitter::Regex[:
|
68
|
+
unless after =~ Twitter::Regex[:end_screen_name_match]
|
69
69
|
start_position = text.to_s.sub_string_search(sn, position) - 1
|
70
70
|
position = start_position + sn.char_length + 1
|
71
71
|
possible_screen_names << {
|
@@ -117,13 +117,15 @@ module Twitter
|
|
117
117
|
urls = []
|
118
118
|
position = 0
|
119
119
|
text.to_s.scan(Twitter::Regex[:valid_url]) do |all, before, url, protocol, domain, path, query|
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
120
|
+
if !protocol.blank? || domain =~ Twitter::Regex[:probable_tld]
|
121
|
+
start_position = text.to_s.sub_string_search(url, position)
|
122
|
+
end_position = start_position + url.char_length
|
123
|
+
position = end_position
|
124
|
+
urls << {
|
125
|
+
:url => ((protocol =~ Twitter::Regex[:www] || protocol.blank?) ? "http://#{url}" : url),
|
126
|
+
:indices => [start_position, end_position]
|
127
|
+
}
|
128
|
+
end
|
127
129
|
end
|
128
130
|
urls.each{|url| yield url[:url], url[:indices].first, url[:indices].last } if block_given?
|
129
131
|
urls
|
@@ -153,7 +155,7 @@ module Twitter
|
|
153
155
|
tags = []
|
154
156
|
position = 0
|
155
157
|
text.scan(Twitter::Regex[:auto_link_hashtags]) do |before, hash, hash_text|
|
156
|
-
start_position = text.to_s.sub_string_search(hash, position)
|
158
|
+
start_position = text.to_s.sub_string_search(hash + hash_text, position)
|
157
159
|
position = start_position + hash_text.char_length + 1
|
158
160
|
tags << {
|
159
161
|
:hashtag => hash_text,
|
data/lib/hithighlighter.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
module Twitter
|
3
3
|
# Module for doing "hit highlighting" on tweets that have been auto-linked already.
|
4
4
|
# Useful with the results returned from the Search API.
|
5
|
-
module HitHighlighter
|
5
|
+
module HitHighlighter extend self
|
6
6
|
# Default Tag used for hit highlighting
|
7
7
|
DEFAULT_HIGHLIGHT_TAG = "em"
|
8
8
|
|
@@ -22,9 +22,7 @@ module Twitter
|
|
22
22
|
tag_name = options[:tag] || DEFAULT_HIGHLIGHT_TAG
|
23
23
|
tags = ["<" + tag_name + ">", "</" + tag_name + ">"]
|
24
24
|
|
25
|
-
chunks = text.split(
|
26
|
-
item.blank? ? item : item.split(">")
|
27
|
-
end.flatten
|
25
|
+
chunks = text.split(/[<>]/)
|
28
26
|
|
29
27
|
result = ""
|
30
28
|
chunk_index, chunk = 0, chunks[0]
|
data/lib/regex.rb
CHANGED
@@ -43,16 +43,23 @@ module Twitter
|
|
43
43
|
LATIN_ACCENTS = [(0xc0..0xd6).to_a, (0xd8..0xf6).to_a, (0xf8..0xff).to_a].flatten.pack('U*').freeze
|
44
44
|
REGEXEN[:latin_accents] = /[#{LATIN_ACCENTS}]+/o
|
45
45
|
|
46
|
+
REGEXEN[:end_screen_name_match] = /#{REGEXEN[:at_signs]}|#{REGEXEN[:latin_accents]}/o
|
47
|
+
|
46
48
|
# Characters considered valid in a hashtag but not at the beginning, where only a-z and 0-9 are valid.
|
47
49
|
HASHTAG_CHARACTERS = /[a-z0-9_#{LATIN_ACCENTS}]/io
|
48
|
-
REGEXEN[:auto_link_hashtags] = /(^|[^0-9A-Z
|
49
|
-
REGEXEN[:auto_link_usernames_or_lists] = /([^a-zA-Z0-9_]
|
50
|
+
REGEXEN[:auto_link_hashtags] = /(^|[^0-9A-Z&\/\?]+)(#|#)([0-9A-Z_]*[A-Z_]+#{HASHTAG_CHARACTERS}*)/io
|
51
|
+
REGEXEN[:auto_link_usernames_or_lists] = /([^a-zA-Z0-9_]|^|RT:?)([@@]+)([a-zA-Z0-9_]{1,20})(\/[a-zA-Z][a-zA-Z0-9_\-]{0,24})?($|.)/o
|
50
52
|
REGEXEN[:auto_link_emoticon] = /(8\-\#|8\-E|\+\-\(|\`\@|\`O|\<\|:~\(|\}:o\{|:\-\[|\>o\<|X\-\/|\[:-\]\-I\-|\/\/\/\/Ö\\\\\\\\|\(\|:\|\/\)|∑:\*\)|\( \| \))/
|
51
53
|
|
52
54
|
# URL related hash regex collection
|
53
|
-
REGEXEN[:valid_preceding_chars] = /(?:[
|
55
|
+
REGEXEN[:valid_preceding_chars] = /(?:[^-\/"':!=A-Z0-9_]|^|\:)/i
|
54
56
|
REGEXEN[:valid_domain] = /(?:[^[:punct:]\s][\.-](?=[^[:punct:]\s])|[^[:punct:]\s]){1,}\.[a-z]{2,}(?::[0-9]+)?/i
|
55
57
|
|
58
|
+
# For protocol-less URLs, we'll accept them if they end in one of a handful of likely TLDs
|
59
|
+
REGEXEN[:probable_tld] = /\.(?:com|net|org|gov|edu)$/i
|
60
|
+
|
61
|
+
REGEXEN[:www] = /www\./i
|
62
|
+
|
56
63
|
REGEXEN[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\$\/%#\[\]\-_,~]/i
|
57
64
|
# Allow URL paths to contain balanced parens
|
58
65
|
# 1. Used in Wikipedia URLs like /Primer_(film)
|
@@ -73,7 +80,7 @@ module Twitter
|
|
73
80
|
( # $1 total match
|
74
81
|
(#{REGEXEN[:valid_preceding_chars]}) # $2 Preceeding chracter
|
75
82
|
( # $3 URL
|
76
|
-
(https?:\/\/|www\.)
|
83
|
+
((?:https?:\/\/|www\.)?) # $4 Protocol or beginning
|
77
84
|
(#{REGEXEN[:valid_domain]}) # $5 Domain(s) and optional post number
|
78
85
|
(/#{REGEXEN[:valid_url_path_chars]}*
|
79
86
|
#{REGEXEN[:valid_url_path_ending_chars]}?
|
data/lib/validation.rb
CHANGED
data/spec/autolinking_spec.rb
CHANGED
@@ -475,10 +475,30 @@ describe Twitter::Autolink do
|
|
475
475
|
end
|
476
476
|
|
477
477
|
context "with a @ in a URL" do
|
478
|
-
|
478
|
+
context "with XSS attack" do
|
479
|
+
def original_text; 'http://x.xx/@"style="color:pink"onmouseover=alert(1)//'; end
|
479
480
|
|
480
|
-
|
481
|
-
|
481
|
+
it "should not allow XSS follwing @" do
|
482
|
+
@autolinked_text.should have_autolinked_url('http://x.xx/')
|
483
|
+
end
|
484
|
+
end
|
485
|
+
|
486
|
+
context "with a username not followed by a /" do
|
487
|
+
def original_text; 'http://example.com/@foobar'; end
|
488
|
+
|
489
|
+
it "should link small url and username" do
|
490
|
+
@autolinked_text.should have_autolinked_url('http://example.com/')
|
491
|
+
@autolinked_text.should link_to_screen_name('foobar')
|
492
|
+
end
|
493
|
+
end
|
494
|
+
|
495
|
+
context "with a username followed by a /" do
|
496
|
+
def original_text; 'http://example.com/@foobar/'; end
|
497
|
+
|
498
|
+
it "should not link the username but link full url" do
|
499
|
+
@autolinked_text.should have_autolinked_url('http://example.com/@foobar/')
|
500
|
+
@autolinked_text.should_not link_to_screen_name('foobar')
|
501
|
+
end
|
482
502
|
end
|
483
503
|
end
|
484
504
|
|
@@ -498,4 +518,23 @@ describe Twitter::Autolink do
|
|
498
518
|
|
499
519
|
end
|
500
520
|
|
521
|
+
describe "encode" do
|
522
|
+
before do
|
523
|
+
@linker = TestAutolink.new
|
524
|
+
end
|
525
|
+
it "should escape html entities properly" do
|
526
|
+
@linker.encode("&").should == "&"
|
527
|
+
@linker.encode(">").should == ">"
|
528
|
+
@linker.encode("<").should == "<"
|
529
|
+
@linker.encode("\"").should == """
|
530
|
+
@linker.encode("'").should == "'"
|
531
|
+
@linker.encode("&<>\"").should == "&<>""
|
532
|
+
@linker.encode("<div>").should == "<div>"
|
533
|
+
@linker.encode("a&b").should == "a&b"
|
534
|
+
@linker.encode("<a href=\"http://twitter.com\" target=\"_blank\">twitter & friends</a>").should == "<a href="http://twitter.com" target="_blank">twitter & friends</a>"
|
535
|
+
@linker.encode("&").should == "&amp;"
|
536
|
+
@linker.encode(nil).should == nil
|
537
|
+
end
|
538
|
+
end
|
539
|
+
|
501
540
|
end
|
data/spec/hithighlighter_spec.rb
CHANGED
@@ -76,11 +76,15 @@ describe Twitter::HitHighlighter do
|
|
76
76
|
it "should highlight around a link" do
|
77
77
|
@highlighter.hit_highlight("test <a>test</a> test", [[3, 11]]).should == "tes<em>t <a>test</a> t</em>est"
|
78
78
|
end
|
79
|
-
|
79
|
+
|
80
80
|
it "should fail gracefully with bad hits" do
|
81
81
|
@highlighter.hit_highlight("test test", [[5, 20]]).should == "test <em>test</em>"
|
82
82
|
end
|
83
83
|
|
84
|
+
it "should not mess up with touching tags" do
|
85
|
+
@highlighter.hit_highlight("<a>foo</a><a>foo</a>", [[3,6]]).should == "<a>foo</a><a><em>foo</em></a>"
|
86
|
+
end
|
87
|
+
|
84
88
|
end
|
85
89
|
|
86
90
|
end
|
metadata
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitter-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
+
hash: 31
|
4
5
|
prerelease: false
|
5
6
|
segments:
|
6
7
|
- 1
|
7
|
-
-
|
8
|
-
-
|
9
|
-
version: 1.
|
8
|
+
- 2
|
9
|
+
- 0
|
10
|
+
version: 1.2.0
|
10
11
|
platform: ruby
|
11
12
|
authors:
|
12
13
|
- Matt Sanford
|
@@ -18,16 +19,18 @@ autorequire: ""
|
|
18
19
|
bindir: bin
|
19
20
|
cert_chain: []
|
20
21
|
|
21
|
-
date: 2010-
|
22
|
+
date: 2010-10-05 00:00:00 -07:00
|
22
23
|
default_executable:
|
23
24
|
dependencies:
|
24
25
|
- !ruby/object:Gem::Dependency
|
25
26
|
name: actionpack
|
26
27
|
prerelease: false
|
27
28
|
requirement: &id001 !ruby/object:Gem::Requirement
|
29
|
+
none: false
|
28
30
|
requirements:
|
29
31
|
- - ">="
|
30
32
|
- !ruby/object:Gem::Version
|
33
|
+
hash: 3
|
31
34
|
segments:
|
32
35
|
- 0
|
33
36
|
version: "0"
|
@@ -76,23 +79,27 @@ rdoc_options: []
|
|
76
79
|
require_paths:
|
77
80
|
- lib
|
78
81
|
required_ruby_version: !ruby/object:Gem::Requirement
|
82
|
+
none: false
|
79
83
|
requirements:
|
80
84
|
- - ">="
|
81
85
|
- !ruby/object:Gem::Version
|
86
|
+
hash: 3
|
82
87
|
segments:
|
83
88
|
- 0
|
84
89
|
version: "0"
|
85
90
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
91
|
+
none: false
|
86
92
|
requirements:
|
87
93
|
- - ">="
|
88
94
|
- !ruby/object:Gem::Version
|
95
|
+
hash: 3
|
89
96
|
segments:
|
90
97
|
- 0
|
91
98
|
version: "0"
|
92
99
|
requirements: []
|
93
100
|
|
94
101
|
rubyforge_project:
|
95
|
-
rubygems_version: 1.3.
|
102
|
+
rubygems_version: 1.3.7
|
96
103
|
signing_key:
|
97
104
|
specification_version: 3
|
98
105
|
summary: Twitter text handling library
|