twitter-text 1.4.8 → 1.4.9
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile.lock +1 -27
- data/README.rdoc +1 -0
- data/lib/autolink.rb +55 -47
- data/lib/extractor.rb +36 -2
- data/lib/regex.rb +49 -24
- data/lib/rewriter.rb +63 -0
- data/lib/twitter-text.rb +7 -4
- data/lib/validation.rb +5 -5
- data/spec/autolinking_spec.rb +41 -3
- data/spec/extractor_spec.rb +2 -2
- data/spec/rewriter_spec.rb +558 -0
- data/spec/spec_helper.rb +10 -4
- data/test/conformance_test.rb +18 -5
- data/twitter-text.gemspec +6 -4
- metadata +12 -7
data/Gemfile.lock
CHANGED
@@ -2,40 +2,16 @@ PATH
|
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
4
|
twitter-text (1.4.8)
|
5
|
-
|
5
|
+
activesupport
|
6
6
|
|
7
7
|
GEM
|
8
8
|
remote: http://rubygems.org/
|
9
9
|
specs:
|
10
|
-
abstract (1.0.0)
|
11
|
-
actionpack (3.0.3)
|
12
|
-
activemodel (= 3.0.3)
|
13
|
-
activesupport (= 3.0.3)
|
14
|
-
builder (~> 2.1.2)
|
15
|
-
erubis (~> 2.6.6)
|
16
|
-
i18n (~> 0.4)
|
17
|
-
rack (~> 1.2.1)
|
18
|
-
rack-mount (~> 0.6.13)
|
19
|
-
rack-test (~> 0.5.6)
|
20
|
-
tzinfo (~> 0.3.23)
|
21
|
-
activemodel (3.0.3)
|
22
|
-
activesupport (= 3.0.3)
|
23
|
-
builder (~> 2.1.2)
|
24
|
-
i18n (~> 0.4)
|
25
10
|
activesupport (3.0.3)
|
26
|
-
builder (2.1.2)
|
27
11
|
diff-lcs (1.1.2)
|
28
|
-
erubis (2.6.6)
|
29
|
-
abstract (>= 1.0.0)
|
30
|
-
i18n (0.5.0)
|
31
12
|
nokogiri (1.4.4)
|
32
13
|
nokogiri (1.4.4-java)
|
33
14
|
weakling (>= 0.0.3)
|
34
|
-
rack (1.2.1)
|
35
|
-
rack-mount (0.6.13)
|
36
|
-
rack (>= 1.0.0)
|
37
|
-
rack-test (0.5.6)
|
38
|
-
rack (>= 1.0)
|
39
15
|
rake (0.8.7)
|
40
16
|
rspec (2.3.0)
|
41
17
|
rspec-core (~> 2.3.0)
|
@@ -48,7 +24,6 @@ GEM
|
|
48
24
|
simplecov (0.3.7)
|
49
25
|
simplecov-html (>= 0.3.7)
|
50
26
|
simplecov-html (0.3.9)
|
51
|
-
tzinfo (0.3.23)
|
52
27
|
weakling (0.0.4-java)
|
53
28
|
|
54
29
|
PLATFORMS
|
@@ -56,7 +31,6 @@ PLATFORMS
|
|
56
31
|
ruby
|
57
32
|
|
58
33
|
DEPENDENCIES
|
59
|
-
actionpack
|
60
34
|
nokogiri
|
61
35
|
rake
|
62
36
|
rspec
|
data/README.rdoc
CHANGED
@@ -90,6 +90,7 @@ Thanks to everybody who has filed issues, provided feedback or contributed patch
|
|
90
90
|
* Jeff Smick - http://github.com/sprsquish
|
91
91
|
* Kenneth Kufluk - https://github.com/kennethkufluk
|
92
92
|
* Keita Fujii - https://github.com/keitaf
|
93
|
+
* Yoshimasa Niwa - https://github.com/niw
|
93
94
|
|
94
95
|
* Patches from the community …
|
95
96
|
* Jean-Philippe Bougie - http://github.com/jpbougie
|
data/lib/autolink.rb
CHANGED
@@ -1,9 +1,9 @@
|
|
1
|
+
require 'set'
|
2
|
+
|
1
3
|
module Twitter
|
2
4
|
# A module for including Tweet auto-linking in a class. The primary use of this is for helpers/views so they can auto-link
|
3
5
|
# usernames, lists, hashtags and URLs.
|
4
6
|
module Autolink extend self
|
5
|
-
include ActionView::Helpers::TagHelper #tag_options needed by auto_link
|
6
|
-
|
7
7
|
# Default CSS class for auto-linked URLs
|
8
8
|
DEFAULT_URL_CLASS = "tweet-url"
|
9
9
|
# Default CSS class for auto-linked lists (along with the url class)
|
@@ -19,6 +19,7 @@ module Twitter
|
|
19
19
|
# Options which should not be passed as HTML attributes
|
20
20
|
OPTIONS_NOT_ATTRIBUTES = [:url_class, :list_class, :username_class, :hashtag_class,
|
21
21
|
:username_url_base, :list_url_base, :hashtag_url_base,
|
22
|
+
:username_url_block, :list_url_block, :hashtag_url_block, :link_url_block,
|
22
23
|
:suppress_lists, :suppress_no_follow]
|
23
24
|
|
24
25
|
HTML_ENTITIES = {
|
@@ -30,7 +31,7 @@ module Twitter
|
|
30
31
|
}
|
31
32
|
|
32
33
|
def html_escape(text)
|
33
|
-
text && text.gsub(/[&"'><]/) do |character|
|
34
|
+
text && text.to_s.gsub(/[&"'><]/) do |character|
|
34
35
|
HTML_ENTITIES[character]
|
35
36
|
end
|
36
37
|
end
|
@@ -68,7 +69,7 @@ module Twitter
|
|
68
69
|
# <tt>:list_url_base</tt>:: the value for <tt>href</tt> attribute on list links. The <tt>@username/list</tt> (minus the <tt>@</tt>) will be appended at the end of this.
|
69
70
|
# <tt>:suppress_lists</tt>:: disable auto-linking to lists
|
70
71
|
# <tt>:suppress_no_follow</tt>:: Do not add <tt>rel="nofollow"</tt> to auto-linked items
|
71
|
-
# <tt>:target</tt>:: add <tt>target="window_name"</tt> to auto-linked items
|
72
|
+
# <tt>:target</tt>:: add <tt>target="window_name"</tt> to auto-linked items
|
72
73
|
def auto_link_usernames_or_lists(text, options = {}) # :yields: list_or_username
|
73
74
|
options = options.dup
|
74
75
|
options[:url_class] ||= DEFAULT_URL_CLASS
|
@@ -79,39 +80,27 @@ module Twitter
|
|
79
80
|
options[:target] ||= DEFAULT_TARGET
|
80
81
|
|
81
82
|
extra_html = HTML_ATTR_NO_FOLLOW unless options[:suppress_no_follow]
|
82
|
-
new_text = ""
|
83
83
|
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
new_text << ((index % 2 == 0) ? ">" : "<")
|
88
|
-
end
|
84
|
+
Twitter::Rewriter.rewrite_usernames_or_lists(text) do |at, username, slash_listname|
|
85
|
+
name = "#{username}#{slash_listname}"
|
86
|
+
chunk = block_given? ? yield(name) : name
|
89
87
|
|
90
|
-
if
|
91
|
-
|
88
|
+
if slash_listname && !options[:suppress_lists]
|
89
|
+
href = if options[:list_url_block]
|
90
|
+
options[:list_url_block].call(name.downcase)
|
91
|
+
else
|
92
|
+
"#{html_escape(options[:list_url_base])}#{html_escape(name.downcase)}"
|
93
|
+
end
|
94
|
+
%(#{at}<a class="#{options[:url_class]} #{options[:list_class]}" #{target_tag(options)}href="#{href}"#{extra_html}>#{html_escape(chunk)}</a>)
|
92
95
|
else
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
chunk = list = "#{user}#{slash_listname}"
|
98
|
-
chunk = yield(list) if block_given?
|
99
|
-
"#{before}#{at}<a class=\"#{options[:url_class]} #{options[:list_class]}\" #{target_tag(options)}href=\"#{html_escape(options[:list_url_base])}#{html_escape(list.downcase)}\"#{extra_html}>#{html_escape(chunk)}</a>"
|
100
|
-
else
|
101
|
-
if after =~ Twitter::Regex[:end_screen_name_match]
|
102
|
-
# Followed by something that means we don't autolink
|
103
|
-
"#{before}#{at}#{user}#{slash_listname}"
|
104
|
-
else
|
105
|
-
# this is a screen name
|
106
|
-
chunk = user
|
107
|
-
chunk = yield(chunk) if block_given?
|
108
|
-
"#{before}#{at}<a class=\"#{options[:url_class]} #{options[:username_class]}\" #{target_tag(options)}href=\"#{html_escape(options[:username_url_base])}#{html_escape(chunk)}\"#{extra_html}>#{html_escape(chunk)}</a>#{slash_listname}"
|
109
|
-
end
|
110
|
-
end
|
96
|
+
href = if options[:username_url_block]
|
97
|
+
options[:username_url_block].call(chunk)
|
98
|
+
else
|
99
|
+
"#{html_escape(options[:username_url_base])}#{html_escape(chunk)}"
|
111
100
|
end
|
101
|
+
%(#{at}<a class="#{options[:url_class]} #{options[:username_class]}" #{target_tag(options)}href="#{href}"#{extra_html}>#{html_escape(chunk)}</a>)
|
112
102
|
end
|
113
103
|
end
|
114
|
-
new_text
|
115
104
|
end
|
116
105
|
|
117
106
|
# Add <tt><a></a></tt> tags around the hashtags in the provided <tt>text</tt>. The
|
@@ -122,7 +111,7 @@ module Twitter
|
|
122
111
|
# <tt>:hashtag_class</tt>:: class to add to hashtag <tt><a></tt> tags
|
123
112
|
# <tt>:hashtag_url_base</tt>:: the value for <tt>href</tt> attribute. The hashtag text (minus the <tt>#</tt>) will be appended at the end of this.
|
124
113
|
# <tt>:suppress_no_follow</tt>:: Do not add <tt>rel="nofollow"</tt> to auto-linked items
|
125
|
-
# <tt>:target</tt>:: add <tt>target="window_name"</tt> to auto-linked items
|
114
|
+
# <tt>:target</tt>:: add <tt>target="window_name"</tt> to auto-linked items
|
126
115
|
def auto_link_hashtags(text, options = {}) # :yields: hashtag_text
|
127
116
|
options = options.dup
|
128
117
|
options[:url_class] ||= DEFAULT_URL_CLASS
|
@@ -131,12 +120,14 @@ module Twitter
|
|
131
120
|
options[:target] ||= DEFAULT_TARGET
|
132
121
|
extra_html = HTML_ATTR_NO_FOLLOW unless options[:suppress_no_follow]
|
133
122
|
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
123
|
+
Twitter::Rewriter.rewrite_hashtags(text) do |hash, hashtag|
|
124
|
+
hashtag = yield(hashtag) if block_given?
|
125
|
+
href = if options[:hashtag_url_block]
|
126
|
+
options[:hashtag_url_block].call(hashtag)
|
127
|
+
else
|
128
|
+
"#{options[:hashtag_url_base]}#{html_escape(hashtag)}"
|
129
|
+
end
|
130
|
+
%(<a href="#{href}" title="##{html_escape(hashtag)}" #{target_tag(options)}class="#{options[:url_class]} #{options[:hashtag_class]}"#{extra_html}>#{html_escape(hash)}#{html_escape(hashtag)}</a>)
|
140
131
|
end
|
141
132
|
end
|
142
133
|
|
@@ -148,28 +139,45 @@ module Twitter
|
|
148
139
|
options = href_options.dup
|
149
140
|
options[:rel] = "nofollow" unless options.delete(:suppress_no_follow)
|
150
141
|
options[:class] = options.delete(:url_class)
|
142
|
+
html_attrs = html_attrs_for_options(options)
|
151
143
|
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
html_attrs = tag_options(options.reject{|k,v| OPTIONS_NOT_ATTRIBUTES.include?(k) }.stringify_keys) || ""
|
156
|
-
"#{before}<a href=\"#{html_escape(url)}\"#{html_attrs}>#{html_escape(url)}</a>"
|
144
|
+
Twitter::Rewriter.rewrite_urls(text) do |url|
|
145
|
+
href = if options[:link_url_block]
|
146
|
+
options.delete(:link_url_block).call(url)
|
157
147
|
else
|
158
|
-
|
148
|
+
html_escape(url)
|
159
149
|
end
|
150
|
+
%(<a href="#{href}"#{html_attrs}>#{html_escape(url)}</a>)
|
160
151
|
end
|
161
152
|
end
|
162
153
|
|
163
154
|
private
|
164
155
|
|
156
|
+
BOOLEAN_ATTRIBUTES = Set.new([:disabled, :readonly, :multiple, :checked]).freeze
|
157
|
+
|
158
|
+
def html_attrs_for_options(options)
|
159
|
+
html_attrs options.reject{|k, v| OPTIONS_NOT_ATTRIBUTES.include?(k)}
|
160
|
+
end
|
161
|
+
|
162
|
+
def html_attrs(options)
|
163
|
+
options.inject("") do |attrs, (key, value)|
|
164
|
+
if BOOLEAN_ATTRIBUTES.include?(key)
|
165
|
+
value = value ? key : nil
|
166
|
+
end
|
167
|
+
if !value.nil?
|
168
|
+
attrs << %( #{html_escape(key)}="#{html_escape(value)}")
|
169
|
+
end
|
170
|
+
attrs
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
165
174
|
def target_tag(options)
|
166
|
-
target_option = options[:target]
|
167
|
-
if target_option.
|
175
|
+
target_option = options[:target].to_s
|
176
|
+
if target_option.empty?
|
168
177
|
""
|
169
178
|
else
|
170
179
|
"target=\"#{html_escape(target_option)}\""
|
171
180
|
end
|
172
181
|
end
|
173
|
-
|
174
182
|
end
|
175
183
|
end
|
data/lib/extractor.rb
CHANGED
@@ -57,7 +57,7 @@ module Twitter
|
|
57
57
|
screen_names_only
|
58
58
|
end
|
59
59
|
|
60
|
-
# Extracts a list of all
|
60
|
+
# Extracts a list of all usernames mentioned in the Tweet <tt>text</tt>
|
61
61
|
# along with the indices for where the mention ocurred. If the
|
62
62
|
# <tt>text</tt> is nil or contains no username mentions, an empty array
|
63
63
|
# will be returned.
|
@@ -87,6 +87,40 @@ module Twitter
|
|
87
87
|
possible_screen_names
|
88
88
|
end
|
89
89
|
|
90
|
+
# Extracts a list of all usernames or lists mentioned in the Tweet <tt>text</tt>
|
91
|
+
# along with the indices for where the mention ocurred. If the
|
92
|
+
# <tt>text</tt> is nil or contains no username or list mentions, an empty array
|
93
|
+
# will be returned.
|
94
|
+
#
|
95
|
+
# If a block is given, then it will be called with each username, list slug, the start
|
96
|
+
# index, and the end index in the <tt>text</tt>. The list_slug will be an empty stirng
|
97
|
+
# if this is a username mention.
|
98
|
+
def extract_mentions_or_lists_with_indices(text) # :yields: username, list_slug, start, end
|
99
|
+
return [] unless text
|
100
|
+
|
101
|
+
possible_entries = []
|
102
|
+
text.to_s.scan(Twitter::Regex[:extract_mentions_or_lists]) do |before, sn, list_slug, after|
|
103
|
+
extract_mentions_match_data = $~
|
104
|
+
unless after =~ Twitter::Regex[:end_screen_name_match]
|
105
|
+
start_position = extract_mentions_match_data.char_begin(2) - 1
|
106
|
+
end_position = extract_mentions_match_data.char_end(list_slug.nil? ? 2 : 3)
|
107
|
+
possible_entries << {
|
108
|
+
:screen_name => sn,
|
109
|
+
:list_slug => list_slug || "",
|
110
|
+
:indices => [start_position, end_position]
|
111
|
+
}
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
if block_given?
|
116
|
+
possible_entries.each do |mention|
|
117
|
+
yield mention[:screen_name], mention[:list_slug], mention[:indices].first, mention[:indices].last
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
possible_entries
|
122
|
+
end
|
123
|
+
|
90
124
|
# Extracts the username username replied to in the Tweet <tt>text</tt>. If the
|
91
125
|
# <tt>text</tt> is <tt>nil</tt> or is not a reply nil will be returned.
|
92
126
|
#
|
@@ -123,7 +157,7 @@ module Twitter
|
|
123
157
|
position = 0
|
124
158
|
text.to_s.scan(Twitter::Regex[:valid_url]) do |all, before, url, protocol, domain, path, query|
|
125
159
|
valid_url_match_data = $~
|
126
|
-
if !protocol.
|
160
|
+
if protocol && !protocol.empty?
|
127
161
|
start_position = valid_url_match_data.char_begin(3)
|
128
162
|
end_position = valid_url_match_data.char_end(3)
|
129
163
|
urls << {
|
data/lib/regex.rb
CHANGED
@@ -7,6 +7,22 @@ module Twitter
|
|
7
7
|
class Regex
|
8
8
|
REGEXEN = {} # :nodoc:
|
9
9
|
|
10
|
+
def self.regex_range(from, to = nil) # :nodoc:
|
11
|
+
if $RUBY_1_9
|
12
|
+
if to
|
13
|
+
"\\u{#{from.to_s(16).rjust(4, '0')}}-\\u{#{to.to_s(16).rjust(4, '0')}}"
|
14
|
+
else
|
15
|
+
"\\u{#{from.to_s(16).rjust(4, '0')}}"
|
16
|
+
end
|
17
|
+
else
|
18
|
+
if to
|
19
|
+
[from].pack('U') + '-' + [to].pack('U')
|
20
|
+
else
|
21
|
+
[from].pack('U')
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
10
26
|
# Space is more than %20, U+3000 for example is the full-width space used with Kanji. Provide a short-hand
|
11
27
|
# to access both the list of characters and a pattern suitible for use with String#split
|
12
28
|
# Taken from: ActiveSupport::Multibyte::Handlers::UTF8Handler::UNICODE_WHITESPACE
|
@@ -29,6 +45,7 @@ module Twitter
|
|
29
45
|
|
30
46
|
REGEXEN[:at_signs] = /[@@]/
|
31
47
|
REGEXEN[:extract_mentions] = /(^|[^a-zA-Z0-9_])#{REGEXEN[:at_signs]}([a-zA-Z0-9_]{1,20})(?=(.|$))/o
|
48
|
+
REGEXEN[:extract_mentions_or_lists] = /(^|[^a-zA-Z0-9_])#{REGEXEN[:at_signs]}([a-zA-Z0-9_]{1,20})(\/[a-zA-Z][a-zA-Z0-9_\-]{0,24})?(?=(.|$))/o
|
32
49
|
REGEXEN[:extract_reply] = /^(?:#{REGEXEN[:spaces]})*#{REGEXEN[:at_signs]}([a-zA-Z0-9_]{1,20})/o
|
33
50
|
|
34
51
|
major, minor, patch = RUBY_VERSION.split('.')
|
@@ -42,35 +59,43 @@ module Twitter
|
|
42
59
|
# Latin accented characters
|
43
60
|
# Excludes 0xd7 from the range (the multiplication sign, confusable with "x").
|
44
61
|
# Also excludes 0xf7, the division sign
|
45
|
-
LATIN_ACCENTS = [
|
62
|
+
LATIN_ACCENTS = [
|
63
|
+
regex_range(0xc0, 0xd6),
|
64
|
+
regex_range(0xd8, 0xf6),
|
65
|
+
regex_range(0xf8, 0xff),
|
66
|
+
regex_range(0x015f)
|
67
|
+
].join('').freeze
|
68
|
+
|
46
69
|
NON_LATIN_HASHTAG_CHARS = [
|
47
70
|
# Cyrillic (Russian, Ukrainian, etc.)
|
48
|
-
(0x0400
|
49
|
-
(0x0500
|
71
|
+
regex_range(0x0400, 0x04ff), # Cyrillic
|
72
|
+
regex_range(0x0500, 0x0527), # Cyrillic Supplement
|
73
|
+
regex_range(0x2de0, 0x2dff), # Cyrillic Extended A
|
74
|
+
regex_range(0xa640, 0xa69f), # Cyrillic Extended B
|
50
75
|
# Hangul (Korean)
|
51
|
-
(0x1100
|
52
|
-
(0x3130
|
53
|
-
(0xA960
|
54
|
-
(0xAC00
|
55
|
-
(0xD7B0
|
56
|
-
|
76
|
+
regex_range(0x1100, 0x11ff), # Hangul Jamo
|
77
|
+
regex_range(0x3130, 0x3185), # Hangul Compatibility Jamo
|
78
|
+
regex_range(0xA960, 0xA97F), # Hangul Jamo Extended-A
|
79
|
+
regex_range(0xAC00, 0xD7AF), # Hangul Syllables
|
80
|
+
regex_range(0xD7B0, 0xD7FF), # Hangul Jamo Extended-B
|
81
|
+
regex_range(0xFFA1, 0xFFDC) # Half-width Hangul
|
82
|
+
].join('').freeze
|
57
83
|
REGEXEN[:latin_accents] = /[#{LATIN_ACCENTS}]+/o
|
58
84
|
|
59
85
|
REGEXEN[:end_screen_name_match] = /^(?:#{REGEXEN[:at_signs]}|#{REGEXEN[:latin_accents]}|:\/\/)/o
|
60
86
|
|
61
87
|
CJ_HASHTAG_CHARACTERS = [
|
62
|
-
(0x30A1
|
63
|
-
(0xFF66
|
64
|
-
(0xFF10
|
65
|
-
(0x3041
|
66
|
-
(0x3400
|
67
|
-
(0x4E00
|
68
|
-
(0x20000
|
69
|
-
(0x2A700
|
70
|
-
(0x2B740
|
71
|
-
(0x2F800
|
72
|
-
|
73
|
-
].flatten.pack('U*').freeze
|
88
|
+
regex_range(0x30A1, 0x30FA), regex_range(0x30FC, 0x30FE), # Katakana (full-width)
|
89
|
+
regex_range(0xFF66, 0xFF9F), # Katakana (half-width)
|
90
|
+
regex_range(0xFF10, 0xFF19), regex_range(0xFF21, 0xFF3A), regex_range(0xFF41, 0xFF5A), # Latin (full-width)
|
91
|
+
regex_range(0x3041, 0x3096), regex_range(0x3099, 0x309E), # Hiragana
|
92
|
+
regex_range(0x3400, 0x4DBF), # Kanji (CJK Extension A)
|
93
|
+
regex_range(0x4E00, 0x9FFF), # Kanji (Unified)
|
94
|
+
regex_range(0x20000, 0x2A6DF), # Kanji (CJK Extension B)
|
95
|
+
regex_range(0x2A700, 0x2B73F), # Kanji (CJK Extension C)
|
96
|
+
regex_range(0x2B740, 0x2B81F), # Kanji (CJK Extension D)
|
97
|
+
regex_range(0x2F800, 0x2FA1F), regex_range(0x3005), regex_range(0x303B) # Kanji (CJK supplement)
|
98
|
+
].join('').freeze
|
74
99
|
|
75
100
|
HASHTAG_BOUNDARY = /(?:\A|\z|#{REGEXEN[:spaces]}|「|」|。|、|\.|!|\?|!|?|,)/
|
76
101
|
|
@@ -93,7 +118,7 @@ module Twitter
|
|
93
118
|
REGEXEN[:valid_domain_name] = /(?:[^#{DOMAIN_EXCLUDE_PART}](?:[-]|[^#{DOMAIN_EXCLUDE_PART}])*)?[^#{DOMAIN_EXCLUDE_PART}]/
|
94
119
|
REGEXEN[:valid_domain] = /#{REGEXEN[:valid_subdomain]}*#{REGEXEN[:valid_domain_name]}\.(?:xn--[a-z0-9]{2,}|[a-z]{2,})(?::[0-9]+)?/i
|
95
120
|
|
96
|
-
REGEXEN[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\,\$\/%#\[\]\-_
|
121
|
+
REGEXEN[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\,\$\/%#\[\]\-_~|#{LATIN_ACCENTS}]/i
|
97
122
|
# Allow URL paths to contain balanced parens
|
98
123
|
# 1. Used in Wikipedia URLs like /Primer_(film)
|
99
124
|
# 2. Used in IIS sessions like /S(dfd346)/
|
@@ -102,12 +127,12 @@ module Twitter
|
|
102
127
|
REGEXEN[:valid_url_path_chars] = /(?:
|
103
128
|
#{REGEXEN[:wikipedia_disambiguation]}|
|
104
129
|
@#{REGEXEN[:valid_general_url_path_chars]}+\/|
|
105
|
-
[\.,]#{REGEXEN[:valid_general_url_path_chars]}
|
130
|
+
[\.,]#{REGEXEN[:valid_general_url_path_chars]}?|
|
106
131
|
#{REGEXEN[:valid_general_url_path_chars]}+
|
107
132
|
)/ix
|
108
133
|
# Valid end-of-path chracters (so /foo. does not gobble the period).
|
109
134
|
# 1. Allow =&# for empty URL parameters and other URL-join artifacts
|
110
|
-
REGEXEN[:valid_url_path_ending_chars] = /[a-z0-9=_
|
135
|
+
REGEXEN[:valid_url_path_ending_chars] = /[a-z0-9=_#\/\+\-#{LATIN_ACCENTS}]|#{REGEXEN[:wikipedia_disambiguation]}/io
|
111
136
|
REGEXEN[:valid_url_query_chars] = /[a-z0-9!\*'\(\);:&=\+\$\/%#\[\]\-_\.,~|]/i
|
112
137
|
REGEXEN[:valid_url_query_ending_chars] = /[a-z0-9_&=#\/]/i
|
113
138
|
REGEXEN[:valid_url] = %r{
|
data/lib/rewriter.rb
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
module Twitter
|
2
|
+
# A module provides base methods to rewrite usernames, lists, hashtags and URLs.
|
3
|
+
module Rewriter extend self
|
4
|
+
def rewrite(text, options = {})
|
5
|
+
[:hashtags, :urls, :usernames_or_lists].inject(text) do |key|
|
6
|
+
send("rewrite_#{key}", text, &options[key]) if options[key]
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
def rewrite_usernames_or_lists(text)
|
11
|
+
new_text = ""
|
12
|
+
|
13
|
+
# this -1 flag allows strings ending in ">" to work
|
14
|
+
text.to_s.split(/[<>]/, -1).each_with_index do |chunk, index|
|
15
|
+
if index != 0
|
16
|
+
new_text << ((index % 2 == 0) ? ">" : "<")
|
17
|
+
end
|
18
|
+
|
19
|
+
if index % 4 != 0
|
20
|
+
new_text << chunk
|
21
|
+
else
|
22
|
+
new_text << chunk.gsub(Twitter::Regex[:auto_link_usernames_or_lists]) do
|
23
|
+
before, at, user, slash_listname, after = $1, $2, $3, $4, $'
|
24
|
+
if slash_listname
|
25
|
+
# the link is a list
|
26
|
+
"#{before}#{yield(at, user, slash_listname)}"
|
27
|
+
else
|
28
|
+
if after =~ Twitter::Regex[:end_screen_name_match]
|
29
|
+
# Followed by something that means we don't autolink
|
30
|
+
"#{before}#{at}#{user}#{slash_listname}"
|
31
|
+
else
|
32
|
+
# this is a screen name
|
33
|
+
"#{before}#{yield(at, user, nil)}#{slash_listname}"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
new_text
|
41
|
+
end
|
42
|
+
|
43
|
+
def rewrite_hashtags(text)
|
44
|
+
text.to_s.gsub(Twitter::Regex[:auto_link_hashtags]) do
|
45
|
+
before = $1
|
46
|
+
hash = $2
|
47
|
+
hashtag = $3
|
48
|
+
"#{before}#{yield(hash, hashtag)}"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def rewrite_urls(text)
|
53
|
+
text.to_s.gsub(Twitter::Regex[:valid_url]) do
|
54
|
+
all, before, url, protocol, domain, path, query_string = $1, $2, $3, $4, $5, $6, $7
|
55
|
+
if protocol && !protocol.empty?
|
56
|
+
"#{before}#{yield(url)}"
|
57
|
+
else
|
58
|
+
all
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|