twitter-text 1.4.8 → 1.4.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile.lock +1 -27
- data/README.rdoc +1 -0
- data/lib/autolink.rb +55 -47
- data/lib/extractor.rb +36 -2
- data/lib/regex.rb +49 -24
- data/lib/rewriter.rb +63 -0
- data/lib/twitter-text.rb +7 -4
- data/lib/validation.rb +5 -5
- data/spec/autolinking_spec.rb +41 -3
- data/spec/extractor_spec.rb +2 -2
- data/spec/rewriter_spec.rb +558 -0
- data/spec/spec_helper.rb +10 -4
- data/test/conformance_test.rb +18 -5
- data/twitter-text.gemspec +6 -4
- metadata +12 -7
data/Gemfile.lock
CHANGED
@@ -2,40 +2,16 @@ PATH
|
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
4
|
twitter-text (1.4.8)
|
5
|
-
|
5
|
+
activesupport
|
6
6
|
|
7
7
|
GEM
|
8
8
|
remote: http://rubygems.org/
|
9
9
|
specs:
|
10
|
-
abstract (1.0.0)
|
11
|
-
actionpack (3.0.3)
|
12
|
-
activemodel (= 3.0.3)
|
13
|
-
activesupport (= 3.0.3)
|
14
|
-
builder (~> 2.1.2)
|
15
|
-
erubis (~> 2.6.6)
|
16
|
-
i18n (~> 0.4)
|
17
|
-
rack (~> 1.2.1)
|
18
|
-
rack-mount (~> 0.6.13)
|
19
|
-
rack-test (~> 0.5.6)
|
20
|
-
tzinfo (~> 0.3.23)
|
21
|
-
activemodel (3.0.3)
|
22
|
-
activesupport (= 3.0.3)
|
23
|
-
builder (~> 2.1.2)
|
24
|
-
i18n (~> 0.4)
|
25
10
|
activesupport (3.0.3)
|
26
|
-
builder (2.1.2)
|
27
11
|
diff-lcs (1.1.2)
|
28
|
-
erubis (2.6.6)
|
29
|
-
abstract (>= 1.0.0)
|
30
|
-
i18n (0.5.0)
|
31
12
|
nokogiri (1.4.4)
|
32
13
|
nokogiri (1.4.4-java)
|
33
14
|
weakling (>= 0.0.3)
|
34
|
-
rack (1.2.1)
|
35
|
-
rack-mount (0.6.13)
|
36
|
-
rack (>= 1.0.0)
|
37
|
-
rack-test (0.5.6)
|
38
|
-
rack (>= 1.0)
|
39
15
|
rake (0.8.7)
|
40
16
|
rspec (2.3.0)
|
41
17
|
rspec-core (~> 2.3.0)
|
@@ -48,7 +24,6 @@ GEM
|
|
48
24
|
simplecov (0.3.7)
|
49
25
|
simplecov-html (>= 0.3.7)
|
50
26
|
simplecov-html (0.3.9)
|
51
|
-
tzinfo (0.3.23)
|
52
27
|
weakling (0.0.4-java)
|
53
28
|
|
54
29
|
PLATFORMS
|
@@ -56,7 +31,6 @@ PLATFORMS
|
|
56
31
|
ruby
|
57
32
|
|
58
33
|
DEPENDENCIES
|
59
|
-
actionpack
|
60
34
|
nokogiri
|
61
35
|
rake
|
62
36
|
rspec
|
data/README.rdoc
CHANGED
@@ -90,6 +90,7 @@ Thanks to everybody who has filed issues, provided feedback or contributed patch
|
|
90
90
|
* Jeff Smick - http://github.com/sprsquish
|
91
91
|
* Kenneth Kufluk - https://github.com/kennethkufluk
|
92
92
|
* Keita Fujii - https://github.com/keitaf
|
93
|
+
* Yoshimasa Niwa - https://github.com/niw
|
93
94
|
|
94
95
|
* Patches from the community …
|
95
96
|
* Jean-Philippe Bougie - http://github.com/jpbougie
|
data/lib/autolink.rb
CHANGED
@@ -1,9 +1,9 @@
|
|
1
|
+
require 'set'
|
2
|
+
|
1
3
|
module Twitter
|
2
4
|
# A module for including Tweet auto-linking in a class. The primary use of this is for helpers/views so they can auto-link
|
3
5
|
# usernames, lists, hashtags and URLs.
|
4
6
|
module Autolink extend self
|
5
|
-
include ActionView::Helpers::TagHelper #tag_options needed by auto_link
|
6
|
-
|
7
7
|
# Default CSS class for auto-linked URLs
|
8
8
|
DEFAULT_URL_CLASS = "tweet-url"
|
9
9
|
# Default CSS class for auto-linked lists (along with the url class)
|
@@ -19,6 +19,7 @@ module Twitter
|
|
19
19
|
# Options which should not be passed as HTML attributes
|
20
20
|
OPTIONS_NOT_ATTRIBUTES = [:url_class, :list_class, :username_class, :hashtag_class,
|
21
21
|
:username_url_base, :list_url_base, :hashtag_url_base,
|
22
|
+
:username_url_block, :list_url_block, :hashtag_url_block, :link_url_block,
|
22
23
|
:suppress_lists, :suppress_no_follow]
|
23
24
|
|
24
25
|
HTML_ENTITIES = {
|
@@ -30,7 +31,7 @@ module Twitter
|
|
30
31
|
}
|
31
32
|
|
32
33
|
def html_escape(text)
|
33
|
-
text && text.gsub(/[&"'><]/) do |character|
|
34
|
+
text && text.to_s.gsub(/[&"'><]/) do |character|
|
34
35
|
HTML_ENTITIES[character]
|
35
36
|
end
|
36
37
|
end
|
@@ -68,7 +69,7 @@ module Twitter
|
|
68
69
|
# <tt>:list_url_base</tt>:: the value for <tt>href</tt> attribute on list links. The <tt>@username/list</tt> (minus the <tt>@</tt>) will be appended at the end of this.
|
69
70
|
# <tt>:suppress_lists</tt>:: disable auto-linking to lists
|
70
71
|
# <tt>:suppress_no_follow</tt>:: Do not add <tt>rel="nofollow"</tt> to auto-linked items
|
71
|
-
# <tt>:target</tt>:: add <tt>target="window_name"</tt> to auto-linked items
|
72
|
+
# <tt>:target</tt>:: add <tt>target="window_name"</tt> to auto-linked items
|
72
73
|
def auto_link_usernames_or_lists(text, options = {}) # :yields: list_or_username
|
73
74
|
options = options.dup
|
74
75
|
options[:url_class] ||= DEFAULT_URL_CLASS
|
@@ -79,39 +80,27 @@ module Twitter
|
|
79
80
|
options[:target] ||= DEFAULT_TARGET
|
80
81
|
|
81
82
|
extra_html = HTML_ATTR_NO_FOLLOW unless options[:suppress_no_follow]
|
82
|
-
new_text = ""
|
83
83
|
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
new_text << ((index % 2 == 0) ? ">" : "<")
|
88
|
-
end
|
84
|
+
Twitter::Rewriter.rewrite_usernames_or_lists(text) do |at, username, slash_listname|
|
85
|
+
name = "#{username}#{slash_listname}"
|
86
|
+
chunk = block_given? ? yield(name) : name
|
89
87
|
|
90
|
-
if
|
91
|
-
|
88
|
+
if slash_listname && !options[:suppress_lists]
|
89
|
+
href = if options[:list_url_block]
|
90
|
+
options[:list_url_block].call(name.downcase)
|
91
|
+
else
|
92
|
+
"#{html_escape(options[:list_url_base])}#{html_escape(name.downcase)}"
|
93
|
+
end
|
94
|
+
%(#{at}<a class="#{options[:url_class]} #{options[:list_class]}" #{target_tag(options)}href="#{href}"#{extra_html}>#{html_escape(chunk)}</a>)
|
92
95
|
else
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
chunk = list = "#{user}#{slash_listname}"
|
98
|
-
chunk = yield(list) if block_given?
|
99
|
-
"#{before}#{at}<a class=\"#{options[:url_class]} #{options[:list_class]}\" #{target_tag(options)}href=\"#{html_escape(options[:list_url_base])}#{html_escape(list.downcase)}\"#{extra_html}>#{html_escape(chunk)}</a>"
|
100
|
-
else
|
101
|
-
if after =~ Twitter::Regex[:end_screen_name_match]
|
102
|
-
# Followed by something that means we don't autolink
|
103
|
-
"#{before}#{at}#{user}#{slash_listname}"
|
104
|
-
else
|
105
|
-
# this is a screen name
|
106
|
-
chunk = user
|
107
|
-
chunk = yield(chunk) if block_given?
|
108
|
-
"#{before}#{at}<a class=\"#{options[:url_class]} #{options[:username_class]}\" #{target_tag(options)}href=\"#{html_escape(options[:username_url_base])}#{html_escape(chunk)}\"#{extra_html}>#{html_escape(chunk)}</a>#{slash_listname}"
|
109
|
-
end
|
110
|
-
end
|
96
|
+
href = if options[:username_url_block]
|
97
|
+
options[:username_url_block].call(chunk)
|
98
|
+
else
|
99
|
+
"#{html_escape(options[:username_url_base])}#{html_escape(chunk)}"
|
111
100
|
end
|
101
|
+
%(#{at}<a class="#{options[:url_class]} #{options[:username_class]}" #{target_tag(options)}href="#{href}"#{extra_html}>#{html_escape(chunk)}</a>)
|
112
102
|
end
|
113
103
|
end
|
114
|
-
new_text
|
115
104
|
end
|
116
105
|
|
117
106
|
# Add <tt><a></a></tt> tags around the hashtags in the provided <tt>text</tt>. The
|
@@ -122,7 +111,7 @@ module Twitter
|
|
122
111
|
# <tt>:hashtag_class</tt>:: class to add to hashtag <tt><a></tt> tags
|
123
112
|
# <tt>:hashtag_url_base</tt>:: the value for <tt>href</tt> attribute. The hashtag text (minus the <tt>#</tt>) will be appended at the end of this.
|
124
113
|
# <tt>:suppress_no_follow</tt>:: Do not add <tt>rel="nofollow"</tt> to auto-linked items
|
125
|
-
# <tt>:target</tt>:: add <tt>target="window_name"</tt> to auto-linked items
|
114
|
+
# <tt>:target</tt>:: add <tt>target="window_name"</tt> to auto-linked items
|
126
115
|
def auto_link_hashtags(text, options = {}) # :yields: hashtag_text
|
127
116
|
options = options.dup
|
128
117
|
options[:url_class] ||= DEFAULT_URL_CLASS
|
@@ -131,12 +120,14 @@ module Twitter
|
|
131
120
|
options[:target] ||= DEFAULT_TARGET
|
132
121
|
extra_html = HTML_ATTR_NO_FOLLOW unless options[:suppress_no_follow]
|
133
122
|
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
123
|
+
Twitter::Rewriter.rewrite_hashtags(text) do |hash, hashtag|
|
124
|
+
hashtag = yield(hashtag) if block_given?
|
125
|
+
href = if options[:hashtag_url_block]
|
126
|
+
options[:hashtag_url_block].call(hashtag)
|
127
|
+
else
|
128
|
+
"#{options[:hashtag_url_base]}#{html_escape(hashtag)}"
|
129
|
+
end
|
130
|
+
%(<a href="#{href}" title="##{html_escape(hashtag)}" #{target_tag(options)}class="#{options[:url_class]} #{options[:hashtag_class]}"#{extra_html}>#{html_escape(hash)}#{html_escape(hashtag)}</a>)
|
140
131
|
end
|
141
132
|
end
|
142
133
|
|
@@ -148,28 +139,45 @@ module Twitter
|
|
148
139
|
options = href_options.dup
|
149
140
|
options[:rel] = "nofollow" unless options.delete(:suppress_no_follow)
|
150
141
|
options[:class] = options.delete(:url_class)
|
142
|
+
html_attrs = html_attrs_for_options(options)
|
151
143
|
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
html_attrs = tag_options(options.reject{|k,v| OPTIONS_NOT_ATTRIBUTES.include?(k) }.stringify_keys) || ""
|
156
|
-
"#{before}<a href=\"#{html_escape(url)}\"#{html_attrs}>#{html_escape(url)}</a>"
|
144
|
+
Twitter::Rewriter.rewrite_urls(text) do |url|
|
145
|
+
href = if options[:link_url_block]
|
146
|
+
options.delete(:link_url_block).call(url)
|
157
147
|
else
|
158
|
-
|
148
|
+
html_escape(url)
|
159
149
|
end
|
150
|
+
%(<a href="#{href}"#{html_attrs}>#{html_escape(url)}</a>)
|
160
151
|
end
|
161
152
|
end
|
162
153
|
|
163
154
|
private
|
164
155
|
|
156
|
+
BOOLEAN_ATTRIBUTES = Set.new([:disabled, :readonly, :multiple, :checked]).freeze
|
157
|
+
|
158
|
+
def html_attrs_for_options(options)
|
159
|
+
html_attrs options.reject{|k, v| OPTIONS_NOT_ATTRIBUTES.include?(k)}
|
160
|
+
end
|
161
|
+
|
162
|
+
def html_attrs(options)
|
163
|
+
options.inject("") do |attrs, (key, value)|
|
164
|
+
if BOOLEAN_ATTRIBUTES.include?(key)
|
165
|
+
value = value ? key : nil
|
166
|
+
end
|
167
|
+
if !value.nil?
|
168
|
+
attrs << %( #{html_escape(key)}="#{html_escape(value)}")
|
169
|
+
end
|
170
|
+
attrs
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
165
174
|
def target_tag(options)
|
166
|
-
target_option = options[:target]
|
167
|
-
if target_option.
|
175
|
+
target_option = options[:target].to_s
|
176
|
+
if target_option.empty?
|
168
177
|
""
|
169
178
|
else
|
170
179
|
"target=\"#{html_escape(target_option)}\""
|
171
180
|
end
|
172
181
|
end
|
173
|
-
|
174
182
|
end
|
175
183
|
end
|
data/lib/extractor.rb
CHANGED
@@ -57,7 +57,7 @@ module Twitter
|
|
57
57
|
screen_names_only
|
58
58
|
end
|
59
59
|
|
60
|
-
# Extracts a list of all
|
60
|
+
# Extracts a list of all usernames mentioned in the Tweet <tt>text</tt>
|
61
61
|
# along with the indices for where the mention ocurred. If the
|
62
62
|
# <tt>text</tt> is nil or contains no username mentions, an empty array
|
63
63
|
# will be returned.
|
@@ -87,6 +87,40 @@ module Twitter
|
|
87
87
|
possible_screen_names
|
88
88
|
end
|
89
89
|
|
90
|
+
# Extracts a list of all usernames or lists mentioned in the Tweet <tt>text</tt>
|
91
|
+
# along with the indices for where the mention ocurred. If the
|
92
|
+
# <tt>text</tt> is nil or contains no username or list mentions, an empty array
|
93
|
+
# will be returned.
|
94
|
+
#
|
95
|
+
# If a block is given, then it will be called with each username, list slug, the start
|
96
|
+
# index, and the end index in the <tt>text</tt>. The list_slug will be an empty stirng
|
97
|
+
# if this is a username mention.
|
98
|
+
def extract_mentions_or_lists_with_indices(text) # :yields: username, list_slug, start, end
|
99
|
+
return [] unless text
|
100
|
+
|
101
|
+
possible_entries = []
|
102
|
+
text.to_s.scan(Twitter::Regex[:extract_mentions_or_lists]) do |before, sn, list_slug, after|
|
103
|
+
extract_mentions_match_data = $~
|
104
|
+
unless after =~ Twitter::Regex[:end_screen_name_match]
|
105
|
+
start_position = extract_mentions_match_data.char_begin(2) - 1
|
106
|
+
end_position = extract_mentions_match_data.char_end(list_slug.nil? ? 2 : 3)
|
107
|
+
possible_entries << {
|
108
|
+
:screen_name => sn,
|
109
|
+
:list_slug => list_slug || "",
|
110
|
+
:indices => [start_position, end_position]
|
111
|
+
}
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
if block_given?
|
116
|
+
possible_entries.each do |mention|
|
117
|
+
yield mention[:screen_name], mention[:list_slug], mention[:indices].first, mention[:indices].last
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
possible_entries
|
122
|
+
end
|
123
|
+
|
90
124
|
# Extracts the username username replied to in the Tweet <tt>text</tt>. If the
|
91
125
|
# <tt>text</tt> is <tt>nil</tt> or is not a reply nil will be returned.
|
92
126
|
#
|
@@ -123,7 +157,7 @@ module Twitter
|
|
123
157
|
position = 0
|
124
158
|
text.to_s.scan(Twitter::Regex[:valid_url]) do |all, before, url, protocol, domain, path, query|
|
125
159
|
valid_url_match_data = $~
|
126
|
-
if !protocol.
|
160
|
+
if protocol && !protocol.empty?
|
127
161
|
start_position = valid_url_match_data.char_begin(3)
|
128
162
|
end_position = valid_url_match_data.char_end(3)
|
129
163
|
urls << {
|
data/lib/regex.rb
CHANGED
@@ -7,6 +7,22 @@ module Twitter
|
|
7
7
|
class Regex
|
8
8
|
REGEXEN = {} # :nodoc:
|
9
9
|
|
10
|
+
def self.regex_range(from, to = nil) # :nodoc:
|
11
|
+
if $RUBY_1_9
|
12
|
+
if to
|
13
|
+
"\\u{#{from.to_s(16).rjust(4, '0')}}-\\u{#{to.to_s(16).rjust(4, '0')}}"
|
14
|
+
else
|
15
|
+
"\\u{#{from.to_s(16).rjust(4, '0')}}"
|
16
|
+
end
|
17
|
+
else
|
18
|
+
if to
|
19
|
+
[from].pack('U') + '-' + [to].pack('U')
|
20
|
+
else
|
21
|
+
[from].pack('U')
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
10
26
|
# Space is more than %20, U+3000 for example is the full-width space used with Kanji. Provide a short-hand
|
11
27
|
# to access both the list of characters and a pattern suitible for use with String#split
|
12
28
|
# Taken from: ActiveSupport::Multibyte::Handlers::UTF8Handler::UNICODE_WHITESPACE
|
@@ -29,6 +45,7 @@ module Twitter
|
|
29
45
|
|
30
46
|
REGEXEN[:at_signs] = /[@@]/
|
31
47
|
REGEXEN[:extract_mentions] = /(^|[^a-zA-Z0-9_])#{REGEXEN[:at_signs]}([a-zA-Z0-9_]{1,20})(?=(.|$))/o
|
48
|
+
REGEXEN[:extract_mentions_or_lists] = /(^|[^a-zA-Z0-9_])#{REGEXEN[:at_signs]}([a-zA-Z0-9_]{1,20})(\/[a-zA-Z][a-zA-Z0-9_\-]{0,24})?(?=(.|$))/o
|
32
49
|
REGEXEN[:extract_reply] = /^(?:#{REGEXEN[:spaces]})*#{REGEXEN[:at_signs]}([a-zA-Z0-9_]{1,20})/o
|
33
50
|
|
34
51
|
major, minor, patch = RUBY_VERSION.split('.')
|
@@ -42,35 +59,43 @@ module Twitter
|
|
42
59
|
# Latin accented characters
|
43
60
|
# Excludes 0xd7 from the range (the multiplication sign, confusable with "x").
|
44
61
|
# Also excludes 0xf7, the division sign
|
45
|
-
LATIN_ACCENTS = [
|
62
|
+
LATIN_ACCENTS = [
|
63
|
+
regex_range(0xc0, 0xd6),
|
64
|
+
regex_range(0xd8, 0xf6),
|
65
|
+
regex_range(0xf8, 0xff),
|
66
|
+
regex_range(0x015f)
|
67
|
+
].join('').freeze
|
68
|
+
|
46
69
|
NON_LATIN_HASHTAG_CHARS = [
|
47
70
|
# Cyrillic (Russian, Ukrainian, etc.)
|
48
|
-
(0x0400
|
49
|
-
(0x0500
|
71
|
+
regex_range(0x0400, 0x04ff), # Cyrillic
|
72
|
+
regex_range(0x0500, 0x0527), # Cyrillic Supplement
|
73
|
+
regex_range(0x2de0, 0x2dff), # Cyrillic Extended A
|
74
|
+
regex_range(0xa640, 0xa69f), # Cyrillic Extended B
|
50
75
|
# Hangul (Korean)
|
51
|
-
(0x1100
|
52
|
-
(0x3130
|
53
|
-
(0xA960
|
54
|
-
(0xAC00
|
55
|
-
(0xD7B0
|
56
|
-
|
76
|
+
regex_range(0x1100, 0x11ff), # Hangul Jamo
|
77
|
+
regex_range(0x3130, 0x3185), # Hangul Compatibility Jamo
|
78
|
+
regex_range(0xA960, 0xA97F), # Hangul Jamo Extended-A
|
79
|
+
regex_range(0xAC00, 0xD7AF), # Hangul Syllables
|
80
|
+
regex_range(0xD7B0, 0xD7FF), # Hangul Jamo Extended-B
|
81
|
+
regex_range(0xFFA1, 0xFFDC) # Half-width Hangul
|
82
|
+
].join('').freeze
|
57
83
|
REGEXEN[:latin_accents] = /[#{LATIN_ACCENTS}]+/o
|
58
84
|
|
59
85
|
REGEXEN[:end_screen_name_match] = /^(?:#{REGEXEN[:at_signs]}|#{REGEXEN[:latin_accents]}|:\/\/)/o
|
60
86
|
|
61
87
|
CJ_HASHTAG_CHARACTERS = [
|
62
|
-
(0x30A1
|
63
|
-
(0xFF66
|
64
|
-
(0xFF10
|
65
|
-
(0x3041
|
66
|
-
(0x3400
|
67
|
-
(0x4E00
|
68
|
-
(0x20000
|
69
|
-
(0x2A700
|
70
|
-
(0x2B740
|
71
|
-
(0x2F800
|
72
|
-
|
73
|
-
].flatten.pack('U*').freeze
|
88
|
+
regex_range(0x30A1, 0x30FA), regex_range(0x30FC, 0x30FE), # Katakana (full-width)
|
89
|
+
regex_range(0xFF66, 0xFF9F), # Katakana (half-width)
|
90
|
+
regex_range(0xFF10, 0xFF19), regex_range(0xFF21, 0xFF3A), regex_range(0xFF41, 0xFF5A), # Latin (full-width)
|
91
|
+
regex_range(0x3041, 0x3096), regex_range(0x3099, 0x309E), # Hiragana
|
92
|
+
regex_range(0x3400, 0x4DBF), # Kanji (CJK Extension A)
|
93
|
+
regex_range(0x4E00, 0x9FFF), # Kanji (Unified)
|
94
|
+
regex_range(0x20000, 0x2A6DF), # Kanji (CJK Extension B)
|
95
|
+
regex_range(0x2A700, 0x2B73F), # Kanji (CJK Extension C)
|
96
|
+
regex_range(0x2B740, 0x2B81F), # Kanji (CJK Extension D)
|
97
|
+
regex_range(0x2F800, 0x2FA1F), regex_range(0x3005), regex_range(0x303B) # Kanji (CJK supplement)
|
98
|
+
].join('').freeze
|
74
99
|
|
75
100
|
HASHTAG_BOUNDARY = /(?:\A|\z|#{REGEXEN[:spaces]}|「|」|。|、|\.|!|\?|!|?|,)/
|
76
101
|
|
@@ -93,7 +118,7 @@ module Twitter
|
|
93
118
|
REGEXEN[:valid_domain_name] = /(?:[^#{DOMAIN_EXCLUDE_PART}](?:[-]|[^#{DOMAIN_EXCLUDE_PART}])*)?[^#{DOMAIN_EXCLUDE_PART}]/
|
94
119
|
REGEXEN[:valid_domain] = /#{REGEXEN[:valid_subdomain]}*#{REGEXEN[:valid_domain_name]}\.(?:xn--[a-z0-9]{2,}|[a-z]{2,})(?::[0-9]+)?/i
|
95
120
|
|
96
|
-
REGEXEN[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\,\$\/%#\[\]\-_
|
121
|
+
REGEXEN[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\,\$\/%#\[\]\-_~|#{LATIN_ACCENTS}]/i
|
97
122
|
# Allow URL paths to contain balanced parens
|
98
123
|
# 1. Used in Wikipedia URLs like /Primer_(film)
|
99
124
|
# 2. Used in IIS sessions like /S(dfd346)/
|
@@ -102,12 +127,12 @@ module Twitter
|
|
102
127
|
REGEXEN[:valid_url_path_chars] = /(?:
|
103
128
|
#{REGEXEN[:wikipedia_disambiguation]}|
|
104
129
|
@#{REGEXEN[:valid_general_url_path_chars]}+\/|
|
105
|
-
[\.,]#{REGEXEN[:valid_general_url_path_chars]}
|
130
|
+
[\.,]#{REGEXEN[:valid_general_url_path_chars]}?|
|
106
131
|
#{REGEXEN[:valid_general_url_path_chars]}+
|
107
132
|
)/ix
|
108
133
|
# Valid end-of-path chracters (so /foo. does not gobble the period).
|
109
134
|
# 1. Allow =&# for empty URL parameters and other URL-join artifacts
|
110
|
-
REGEXEN[:valid_url_path_ending_chars] = /[a-z0-9=_
|
135
|
+
REGEXEN[:valid_url_path_ending_chars] = /[a-z0-9=_#\/\+\-#{LATIN_ACCENTS}]|#{REGEXEN[:wikipedia_disambiguation]}/io
|
111
136
|
REGEXEN[:valid_url_query_chars] = /[a-z0-9!\*'\(\);:&=\+\$\/%#\[\]\-_\.,~|]/i
|
112
137
|
REGEXEN[:valid_url_query_ending_chars] = /[a-z0-9_&=#\/]/i
|
113
138
|
REGEXEN[:valid_url] = %r{
|
data/lib/rewriter.rb
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
module Twitter
|
2
|
+
# A module provides base methods to rewrite usernames, lists, hashtags and URLs.
|
3
|
+
module Rewriter extend self
|
4
|
+
def rewrite(text, options = {})
|
5
|
+
[:hashtags, :urls, :usernames_or_lists].inject(text) do |key|
|
6
|
+
send("rewrite_#{key}", text, &options[key]) if options[key]
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
def rewrite_usernames_or_lists(text)
|
11
|
+
new_text = ""
|
12
|
+
|
13
|
+
# this -1 flag allows strings ending in ">" to work
|
14
|
+
text.to_s.split(/[<>]/, -1).each_with_index do |chunk, index|
|
15
|
+
if index != 0
|
16
|
+
new_text << ((index % 2 == 0) ? ">" : "<")
|
17
|
+
end
|
18
|
+
|
19
|
+
if index % 4 != 0
|
20
|
+
new_text << chunk
|
21
|
+
else
|
22
|
+
new_text << chunk.gsub(Twitter::Regex[:auto_link_usernames_or_lists]) do
|
23
|
+
before, at, user, slash_listname, after = $1, $2, $3, $4, $'
|
24
|
+
if slash_listname
|
25
|
+
# the link is a list
|
26
|
+
"#{before}#{yield(at, user, slash_listname)}"
|
27
|
+
else
|
28
|
+
if after =~ Twitter::Regex[:end_screen_name_match]
|
29
|
+
# Followed by something that means we don't autolink
|
30
|
+
"#{before}#{at}#{user}#{slash_listname}"
|
31
|
+
else
|
32
|
+
# this is a screen name
|
33
|
+
"#{before}#{yield(at, user, nil)}#{slash_listname}"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
new_text
|
41
|
+
end
|
42
|
+
|
43
|
+
def rewrite_hashtags(text)
|
44
|
+
text.to_s.gsub(Twitter::Regex[:auto_link_hashtags]) do
|
45
|
+
before = $1
|
46
|
+
hash = $2
|
47
|
+
hashtag = $3
|
48
|
+
"#{before}#{yield(hash, hashtag)}"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def rewrite_urls(text)
|
53
|
+
text.to_s.gsub(Twitter::Regex[:valid_url]) do
|
54
|
+
all, before, url, protocol, domain, path, query_string = $1, $2, $3, $4, $5, $6, $7
|
55
|
+
if protocol && !protocol.empty?
|
56
|
+
"#{before}#{yield(url)}"
|
57
|
+
else
|
58
|
+
all
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|