twitter-text 1.4.8 → 1.4.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile.lock CHANGED
@@ -2,40 +2,16 @@ PATH
2
2
  remote: .
3
3
  specs:
4
4
  twitter-text (1.4.8)
5
- actionpack
5
+ activesupport
6
6
 
7
7
  GEM
8
8
  remote: http://rubygems.org/
9
9
  specs:
10
- abstract (1.0.0)
11
- actionpack (3.0.3)
12
- activemodel (= 3.0.3)
13
- activesupport (= 3.0.3)
14
- builder (~> 2.1.2)
15
- erubis (~> 2.6.6)
16
- i18n (~> 0.4)
17
- rack (~> 1.2.1)
18
- rack-mount (~> 0.6.13)
19
- rack-test (~> 0.5.6)
20
- tzinfo (~> 0.3.23)
21
- activemodel (3.0.3)
22
- activesupport (= 3.0.3)
23
- builder (~> 2.1.2)
24
- i18n (~> 0.4)
25
10
  activesupport (3.0.3)
26
- builder (2.1.2)
27
11
  diff-lcs (1.1.2)
28
- erubis (2.6.6)
29
- abstract (>= 1.0.0)
30
- i18n (0.5.0)
31
12
  nokogiri (1.4.4)
32
13
  nokogiri (1.4.4-java)
33
14
  weakling (>= 0.0.3)
34
- rack (1.2.1)
35
- rack-mount (0.6.13)
36
- rack (>= 1.0.0)
37
- rack-test (0.5.6)
38
- rack (>= 1.0)
39
15
  rake (0.8.7)
40
16
  rspec (2.3.0)
41
17
  rspec-core (~> 2.3.0)
@@ -48,7 +24,6 @@ GEM
48
24
  simplecov (0.3.7)
49
25
  simplecov-html (>= 0.3.7)
50
26
  simplecov-html (0.3.9)
51
- tzinfo (0.3.23)
52
27
  weakling (0.0.4-java)
53
28
 
54
29
  PLATFORMS
@@ -56,7 +31,6 @@ PLATFORMS
56
31
  ruby
57
32
 
58
33
  DEPENDENCIES
59
- actionpack
60
34
  nokogiri
61
35
  rake
62
36
  rspec
data/README.rdoc CHANGED
@@ -90,6 +90,7 @@ Thanks to everybody who has filed issues, provided feedback or contributed patch
90
90
  * Jeff Smick - http://github.com/sprsquish
91
91
  * Kenneth Kufluk - https://github.com/kennethkufluk
92
92
  * Keita Fujii - https://github.com/keitaf
93
+ * Yoshimasa Niwa - https://github.com/niw
93
94
 
94
95
  * Patches from the community …
95
96
  * Jean-Philippe Bougie - http://github.com/jpbougie
data/lib/autolink.rb CHANGED
@@ -1,9 +1,9 @@
1
+ require 'set'
2
+
1
3
  module Twitter
2
4
  # A module for including Tweet auto-linking in a class. The primary use of this is for helpers/views so they can auto-link
3
5
  # usernames, lists, hashtags and URLs.
4
6
  module Autolink extend self
5
- include ActionView::Helpers::TagHelper #tag_options needed by auto_link
6
-
7
7
  # Default CSS class for auto-linked URLs
8
8
  DEFAULT_URL_CLASS = "tweet-url"
9
9
  # Default CSS class for auto-linked lists (along with the url class)
@@ -19,6 +19,7 @@ module Twitter
19
19
  # Options which should not be passed as HTML attributes
20
20
  OPTIONS_NOT_ATTRIBUTES = [:url_class, :list_class, :username_class, :hashtag_class,
21
21
  :username_url_base, :list_url_base, :hashtag_url_base,
22
+ :username_url_block, :list_url_block, :hashtag_url_block, :link_url_block,
22
23
  :suppress_lists, :suppress_no_follow]
23
24
 
24
25
  HTML_ENTITIES = {
@@ -30,7 +31,7 @@ module Twitter
30
31
  }
31
32
 
32
33
  def html_escape(text)
33
- text && text.gsub(/[&"'><]/) do |character|
34
+ text && text.to_s.gsub(/[&"'><]/) do |character|
34
35
  HTML_ENTITIES[character]
35
36
  end
36
37
  end
@@ -68,7 +69,7 @@ module Twitter
68
69
  # <tt>:list_url_base</tt>:: the value for <tt>href</tt> attribute on list links. The <tt>@username/list</tt> (minus the <tt>@</tt>) will be appended at the end of this.
69
70
  # <tt>:suppress_lists</tt>:: disable auto-linking to lists
70
71
  # <tt>:suppress_no_follow</tt>:: Do not add <tt>rel="nofollow"</tt> to auto-linked items
71
- # <tt>:target</tt>:: add <tt>target="window_name"</tt> to auto-linked items
72
+ # <tt>:target</tt>:: add <tt>target="window_name"</tt> to auto-linked items
72
73
  def auto_link_usernames_or_lists(text, options = {}) # :yields: list_or_username
73
74
  options = options.dup
74
75
  options[:url_class] ||= DEFAULT_URL_CLASS
@@ -79,39 +80,27 @@ module Twitter
79
80
  options[:target] ||= DEFAULT_TARGET
80
81
 
81
82
  extra_html = HTML_ATTR_NO_FOLLOW unless options[:suppress_no_follow]
82
- new_text = ""
83
83
 
84
- # this -1 flag allows strings ending in ">" to work
85
- text.split(/[<>]/, -1).each_with_index do |chunk, index|
86
- if index != 0
87
- new_text << ((index % 2 == 0) ? ">" : "<")
88
- end
84
+ Twitter::Rewriter.rewrite_usernames_or_lists(text) do |at, username, slash_listname|
85
+ name = "#{username}#{slash_listname}"
86
+ chunk = block_given? ? yield(name) : name
89
87
 
90
- if index % 4 != 0
91
- new_text << chunk
88
+ if slash_listname && !options[:suppress_lists]
89
+ href = if options[:list_url_block]
90
+ options[:list_url_block].call(name.downcase)
91
+ else
92
+ "#{html_escape(options[:list_url_base])}#{html_escape(name.downcase)}"
93
+ end
94
+ %(#{at}<a class="#{options[:url_class]} #{options[:list_class]}" #{target_tag(options)}href="#{href}"#{extra_html}>#{html_escape(chunk)}</a>)
92
95
  else
93
- new_text << chunk.gsub(Twitter::Regex[:auto_link_usernames_or_lists]) do
94
- before, at, user, slash_listname, after = $1, $2, $3, $4, $'
95
- if slash_listname && !options[:suppress_lists]
96
- # the link is a list
97
- chunk = list = "#{user}#{slash_listname}"
98
- chunk = yield(list) if block_given?
99
- "#{before}#{at}<a class=\"#{options[:url_class]} #{options[:list_class]}\" #{target_tag(options)}href=\"#{html_escape(options[:list_url_base])}#{html_escape(list.downcase)}\"#{extra_html}>#{html_escape(chunk)}</a>"
100
- else
101
- if after =~ Twitter::Regex[:end_screen_name_match]
102
- # Followed by something that means we don't autolink
103
- "#{before}#{at}#{user}#{slash_listname}"
104
- else
105
- # this is a screen name
106
- chunk = user
107
- chunk = yield(chunk) if block_given?
108
- "#{before}#{at}<a class=\"#{options[:url_class]} #{options[:username_class]}\" #{target_tag(options)}href=\"#{html_escape(options[:username_url_base])}#{html_escape(chunk)}\"#{extra_html}>#{html_escape(chunk)}</a>#{slash_listname}"
109
- end
110
- end
96
+ href = if options[:username_url_block]
97
+ options[:username_url_block].call(chunk)
98
+ else
99
+ "#{html_escape(options[:username_url_base])}#{html_escape(chunk)}"
111
100
  end
101
+ %(#{at}<a class="#{options[:url_class]} #{options[:username_class]}" #{target_tag(options)}href="#{href}"#{extra_html}>#{html_escape(chunk)}</a>)
112
102
  end
113
103
  end
114
- new_text
115
104
  end
116
105
 
117
106
  # Add <tt><a></a></tt> tags around the hashtags in the provided <tt>text</tt>. The
@@ -122,7 +111,7 @@ module Twitter
122
111
  # <tt>:hashtag_class</tt>:: class to add to hashtag <tt><a></tt> tags
123
112
  # <tt>:hashtag_url_base</tt>:: the value for <tt>href</tt> attribute. The hashtag text (minus the <tt>#</tt>) will be appended at the end of this.
124
113
  # <tt>:suppress_no_follow</tt>:: Do not add <tt>rel="nofollow"</tt> to auto-linked items
125
- # <tt>:target</tt>:: add <tt>target="window_name"</tt> to auto-linked items
114
+ # <tt>:target</tt>:: add <tt>target="window_name"</tt> to auto-linked items
126
115
  def auto_link_hashtags(text, options = {}) # :yields: hashtag_text
127
116
  options = options.dup
128
117
  options[:url_class] ||= DEFAULT_URL_CLASS
@@ -131,12 +120,14 @@ module Twitter
131
120
  options[:target] ||= DEFAULT_TARGET
132
121
  extra_html = HTML_ATTR_NO_FOLLOW unless options[:suppress_no_follow]
133
122
 
134
- text.gsub(Twitter::Regex[:auto_link_hashtags]) do
135
- before = $1
136
- hash = $2
137
- text = $3
138
- text = yield(text) if block_given?
139
- "#{before}<a href=\"#{options[:hashtag_url_base]}#{html_escape(text)}\" title=\"##{html_escape(text)}\" #{target_tag(options)}class=\"#{options[:url_class]} #{options[:hashtag_class]}\"#{extra_html}>#{html_escape(hash)}#{html_escape(text)}</a>"
123
+ Twitter::Rewriter.rewrite_hashtags(text) do |hash, hashtag|
124
+ hashtag = yield(hashtag) if block_given?
125
+ href = if options[:hashtag_url_block]
126
+ options[:hashtag_url_block].call(hashtag)
127
+ else
128
+ "#{options[:hashtag_url_base]}#{html_escape(hashtag)}"
129
+ end
130
+ %(<a href="#{href}" title="##{html_escape(hashtag)}" #{target_tag(options)}class="#{options[:url_class]} #{options[:hashtag_class]}"#{extra_html}>#{html_escape(hash)}#{html_escape(hashtag)}</a>)
140
131
  end
141
132
  end
142
133
 
@@ -148,28 +139,45 @@ module Twitter
148
139
  options = href_options.dup
149
140
  options[:rel] = "nofollow" unless options.delete(:suppress_no_follow)
150
141
  options[:class] = options.delete(:url_class)
142
+ html_attrs = html_attrs_for_options(options)
151
143
 
152
- text.gsub(Twitter::Regex[:valid_url]) do
153
- all, before, url, protocol, domain, path, query_string = $1, $2, $3, $4, $5, $6, $7
154
- if !protocol.blank?
155
- html_attrs = tag_options(options.reject{|k,v| OPTIONS_NOT_ATTRIBUTES.include?(k) }.stringify_keys) || ""
156
- "#{before}<a href=\"#{html_escape(url)}\"#{html_attrs}>#{html_escape(url)}</a>"
144
+ Twitter::Rewriter.rewrite_urls(text) do |url|
145
+ href = if options[:link_url_block]
146
+ options.delete(:link_url_block).call(url)
157
147
  else
158
- all
148
+ html_escape(url)
159
149
  end
150
+ %(<a href="#{href}"#{html_attrs}>#{html_escape(url)}</a>)
160
151
  end
161
152
  end
162
153
 
163
154
  private
164
155
 
156
+ BOOLEAN_ATTRIBUTES = Set.new([:disabled, :readonly, :multiple, :checked]).freeze
157
+
158
+ def html_attrs_for_options(options)
159
+ html_attrs options.reject{|k, v| OPTIONS_NOT_ATTRIBUTES.include?(k)}
160
+ end
161
+
162
+ def html_attrs(options)
163
+ options.inject("") do |attrs, (key, value)|
164
+ if BOOLEAN_ATTRIBUTES.include?(key)
165
+ value = value ? key : nil
166
+ end
167
+ if !value.nil?
168
+ attrs << %( #{html_escape(key)}="#{html_escape(value)}")
169
+ end
170
+ attrs
171
+ end
172
+ end
173
+
165
174
  def target_tag(options)
166
- target_option = options[:target]
167
- if target_option.blank?
175
+ target_option = options[:target].to_s
176
+ if target_option.empty?
168
177
  ""
169
178
  else
170
179
  "target=\"#{html_escape(target_option)}\""
171
180
  end
172
181
  end
173
-
174
182
  end
175
183
  end
data/lib/extractor.rb CHANGED
@@ -57,7 +57,7 @@ module Twitter
57
57
  screen_names_only
58
58
  end
59
59
 
60
- # Extracts a list of all usersnames mentioned in the Tweet <tt>text</tt>
60
+ # Extracts a list of all usernames mentioned in the Tweet <tt>text</tt>
61
61
  # along with the indices for where the mention ocurred. If the
62
62
  # <tt>text</tt> is nil or contains no username mentions, an empty array
63
63
  # will be returned.
@@ -87,6 +87,40 @@ module Twitter
87
87
  possible_screen_names
88
88
  end
89
89
 
90
+ # Extracts a list of all usernames or lists mentioned in the Tweet <tt>text</tt>
91
+ # along with the indices for where the mention ocurred. If the
92
+ # <tt>text</tt> is nil or contains no username or list mentions, an empty array
93
+ # will be returned.
94
+ #
95
+ # If a block is given, then it will be called with each username, list slug, the start
96
+ # index, and the end index in the <tt>text</tt>. The list_slug will be an empty stirng
97
+ # if this is a username mention.
98
+ def extract_mentions_or_lists_with_indices(text) # :yields: username, list_slug, start, end
99
+ return [] unless text
100
+
101
+ possible_entries = []
102
+ text.to_s.scan(Twitter::Regex[:extract_mentions_or_lists]) do |before, sn, list_slug, after|
103
+ extract_mentions_match_data = $~
104
+ unless after =~ Twitter::Regex[:end_screen_name_match]
105
+ start_position = extract_mentions_match_data.char_begin(2) - 1
106
+ end_position = extract_mentions_match_data.char_end(list_slug.nil? ? 2 : 3)
107
+ possible_entries << {
108
+ :screen_name => sn,
109
+ :list_slug => list_slug || "",
110
+ :indices => [start_position, end_position]
111
+ }
112
+ end
113
+ end
114
+
115
+ if block_given?
116
+ possible_entries.each do |mention|
117
+ yield mention[:screen_name], mention[:list_slug], mention[:indices].first, mention[:indices].last
118
+ end
119
+ end
120
+
121
+ possible_entries
122
+ end
123
+
90
124
  # Extracts the username username replied to in the Tweet <tt>text</tt>. If the
91
125
  # <tt>text</tt> is <tt>nil</tt> or is not a reply nil will be returned.
92
126
  #
@@ -123,7 +157,7 @@ module Twitter
123
157
  position = 0
124
158
  text.to_s.scan(Twitter::Regex[:valid_url]) do |all, before, url, protocol, domain, path, query|
125
159
  valid_url_match_data = $~
126
- if !protocol.blank?
160
+ if protocol && !protocol.empty?
127
161
  start_position = valid_url_match_data.char_begin(3)
128
162
  end_position = valid_url_match_data.char_end(3)
129
163
  urls << {
data/lib/regex.rb CHANGED
@@ -7,6 +7,22 @@ module Twitter
7
7
  class Regex
8
8
  REGEXEN = {} # :nodoc:
9
9
 
10
+ def self.regex_range(from, to = nil) # :nodoc:
11
+ if $RUBY_1_9
12
+ if to
13
+ "\\u{#{from.to_s(16).rjust(4, '0')}}-\\u{#{to.to_s(16).rjust(4, '0')}}"
14
+ else
15
+ "\\u{#{from.to_s(16).rjust(4, '0')}}"
16
+ end
17
+ else
18
+ if to
19
+ [from].pack('U') + '-' + [to].pack('U')
20
+ else
21
+ [from].pack('U')
22
+ end
23
+ end
24
+ end
25
+
10
26
  # Space is more than %20, U+3000 for example is the full-width space used with Kanji. Provide a short-hand
11
27
  # to access both the list of characters and a pattern suitible for use with String#split
12
28
  # Taken from: ActiveSupport::Multibyte::Handlers::UTF8Handler::UNICODE_WHITESPACE
@@ -29,6 +45,7 @@ module Twitter
29
45
 
30
46
  REGEXEN[:at_signs] = /[@@]/
31
47
  REGEXEN[:extract_mentions] = /(^|[^a-zA-Z0-9_])#{REGEXEN[:at_signs]}([a-zA-Z0-9_]{1,20})(?=(.|$))/o
48
+ REGEXEN[:extract_mentions_or_lists] = /(^|[^a-zA-Z0-9_])#{REGEXEN[:at_signs]}([a-zA-Z0-9_]{1,20})(\/[a-zA-Z][a-zA-Z0-9_\-]{0,24})?(?=(.|$))/o
32
49
  REGEXEN[:extract_reply] = /^(?:#{REGEXEN[:spaces]})*#{REGEXEN[:at_signs]}([a-zA-Z0-9_]{1,20})/o
33
50
 
34
51
  major, minor, patch = RUBY_VERSION.split('.')
@@ -42,35 +59,43 @@ module Twitter
42
59
  # Latin accented characters
43
60
  # Excludes 0xd7 from the range (the multiplication sign, confusable with "x").
44
61
  # Also excludes 0xf7, the division sign
45
- LATIN_ACCENTS = [(0xc0..0xd6).to_a, (0xd8..0xf6).to_a, (0xf8..0xff).to_a].flatten.pack('U*').freeze
62
+ LATIN_ACCENTS = [
63
+ regex_range(0xc0, 0xd6),
64
+ regex_range(0xd8, 0xf6),
65
+ regex_range(0xf8, 0xff),
66
+ regex_range(0x015f)
67
+ ].join('').freeze
68
+
46
69
  NON_LATIN_HASHTAG_CHARS = [
47
70
  # Cyrillic (Russian, Ukrainian, etc.)
48
- (0x0400..0x04ff).to_a, # Cyrillic
49
- (0x0500..0x0527).to_a, # Cyrillic Supplement
71
+ regex_range(0x0400, 0x04ff), # Cyrillic
72
+ regex_range(0x0500, 0x0527), # Cyrillic Supplement
73
+ regex_range(0x2de0, 0x2dff), # Cyrillic Extended A
74
+ regex_range(0xa640, 0xa69f), # Cyrillic Extended B
50
75
  # Hangul (Korean)
51
- (0x1100..0x11ff).to_a, # Hangul Jamo
52
- (0x3130..0x3185).to_a, # Hangul Compatibility Jamo
53
- (0xA960..0xA97F).to_a, # Hangul Jamo Extended-A
54
- (0xAC00..0xD7AF).to_a, # Hangul Syllables
55
- (0xD7B0..0xD7FF).to_a # Hangul Jamo Extended-B
56
- ].flatten.pack('U*').freeze
76
+ regex_range(0x1100, 0x11ff), # Hangul Jamo
77
+ regex_range(0x3130, 0x3185), # Hangul Compatibility Jamo
78
+ regex_range(0xA960, 0xA97F), # Hangul Jamo Extended-A
79
+ regex_range(0xAC00, 0xD7AF), # Hangul Syllables
80
+ regex_range(0xD7B0, 0xD7FF), # Hangul Jamo Extended-B
81
+ regex_range(0xFFA1, 0xFFDC) # Half-width Hangul
82
+ ].join('').freeze
57
83
  REGEXEN[:latin_accents] = /[#{LATIN_ACCENTS}]+/o
58
84
 
59
85
  REGEXEN[:end_screen_name_match] = /^(?:#{REGEXEN[:at_signs]}|#{REGEXEN[:latin_accents]}|:\/\/)/o
60
86
 
61
87
  CJ_HASHTAG_CHARACTERS = [
62
- (0x30A1..0x30FA).to_a, 0x30FC, # Katakana (full-width)
63
- (0xFF66..0xFF9F).to_a, # Katakana (half-width)
64
- (0xFF10..0xFF19).to_a, (0xFF21..0xFF3A).to_a, (0xFF41..0xFF5A).to_a, # Latin (full-width)
65
- (0x3041..0x3096).to_a, # Hiragana
66
- (0x3400..0x4DBF).to_a, # Kanji (CJK Extension A)
67
- (0x4E00..0x9FFF).to_a, # Kanji (Unified)
68
- (0x20000..0x2A6DF).to_a, # Kanji (CJK Extension B)
69
- (0x2A700..0x2B73F).to_a, # Kanji (CJK Extension C)
70
- (0x2B740..0x2B81F).to_a, # Kanji (CJK Extension D)
71
- (0x2F800..0x2FA1F).to_a, # Kanji (CJK supplement)
72
- 0x3005 # Kanji (iteration mark)
73
- ].flatten.pack('U*').freeze
88
+ regex_range(0x30A1, 0x30FA), regex_range(0x30FC, 0x30FE), # Katakana (full-width)
89
+ regex_range(0xFF66, 0xFF9F), # Katakana (half-width)
90
+ regex_range(0xFF10, 0xFF19), regex_range(0xFF21, 0xFF3A), regex_range(0xFF41, 0xFF5A), # Latin (full-width)
91
+ regex_range(0x3041, 0x3096), regex_range(0x3099, 0x309E), # Hiragana
92
+ regex_range(0x3400, 0x4DBF), # Kanji (CJK Extension A)
93
+ regex_range(0x4E00, 0x9FFF), # Kanji (Unified)
94
+ regex_range(0x20000, 0x2A6DF), # Kanji (CJK Extension B)
95
+ regex_range(0x2A700, 0x2B73F), # Kanji (CJK Extension C)
96
+ regex_range(0x2B740, 0x2B81F), # Kanji (CJK Extension D)
97
+ regex_range(0x2F800, 0x2FA1F), regex_range(0x3005), regex_range(0x303B) # Kanji (CJK supplement)
98
+ ].join('').freeze
74
99
 
75
100
  HASHTAG_BOUNDARY = /(?:\A|\z|#{REGEXEN[:spaces]}|「|」|。|、|\.|!|\?|!|?|,)/
76
101
 
@@ -93,7 +118,7 @@ module Twitter
93
118
  REGEXEN[:valid_domain_name] = /(?:[^#{DOMAIN_EXCLUDE_PART}](?:[-]|[^#{DOMAIN_EXCLUDE_PART}])*)?[^#{DOMAIN_EXCLUDE_PART}]/
94
119
  REGEXEN[:valid_domain] = /#{REGEXEN[:valid_subdomain]}*#{REGEXEN[:valid_domain_name]}\.(?:xn--[a-z0-9]{2,}|[a-z]{2,})(?::[0-9]+)?/i
95
120
 
96
- REGEXEN[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\,\$\/%#\[\]\-_~|\.]/i
121
+ REGEXEN[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\,\$\/%#\[\]\-_~|#{LATIN_ACCENTS}]/i
97
122
  # Allow URL paths to contain balanced parens
98
123
  # 1. Used in Wikipedia URLs like /Primer_(film)
99
124
  # 2. Used in IIS sessions like /S(dfd346)/
@@ -102,12 +127,12 @@ module Twitter
102
127
  REGEXEN[:valid_url_path_chars] = /(?:
103
128
  #{REGEXEN[:wikipedia_disambiguation]}|
104
129
  @#{REGEXEN[:valid_general_url_path_chars]}+\/|
105
- [\.,]#{REGEXEN[:valid_general_url_path_chars]}+|
130
+ [\.,]#{REGEXEN[:valid_general_url_path_chars]}?|
106
131
  #{REGEXEN[:valid_general_url_path_chars]}+
107
132
  )/ix
108
133
  # Valid end-of-path chracters (so /foo. does not gobble the period).
109
134
  # 1. Allow =&# for empty URL parameters and other URL-join artifacts
110
- REGEXEN[:valid_url_path_ending_chars] = /[a-z0-9=_#\/\+\-]|#{REGEXEN[:wikipedia_disambiguation]}/io
135
+ REGEXEN[:valid_url_path_ending_chars] = /[a-z0-9=_#\/\+\-#{LATIN_ACCENTS}]|#{REGEXEN[:wikipedia_disambiguation]}/io
111
136
  REGEXEN[:valid_url_query_chars] = /[a-z0-9!\*'\(\);:&=\+\$\/%#\[\]\-_\.,~|]/i
112
137
  REGEXEN[:valid_url_query_ending_chars] = /[a-z0-9_&=#\/]/i
113
138
  REGEXEN[:valid_url] = %r{
data/lib/rewriter.rb ADDED
@@ -0,0 +1,63 @@
1
+ module Twitter
2
+ # A module provides base methods to rewrite usernames, lists, hashtags and URLs.
3
+ module Rewriter extend self
4
+ def rewrite(text, options = {})
5
+ [:hashtags, :urls, :usernames_or_lists].inject(text) do |key|
6
+ send("rewrite_#{key}", text, &options[key]) if options[key]
7
+ end
8
+ end
9
+
10
+ def rewrite_usernames_or_lists(text)
11
+ new_text = ""
12
+
13
+ # this -1 flag allows strings ending in ">" to work
14
+ text.to_s.split(/[<>]/, -1).each_with_index do |chunk, index|
15
+ if index != 0
16
+ new_text << ((index % 2 == 0) ? ">" : "<")
17
+ end
18
+
19
+ if index % 4 != 0
20
+ new_text << chunk
21
+ else
22
+ new_text << chunk.gsub(Twitter::Regex[:auto_link_usernames_or_lists]) do
23
+ before, at, user, slash_listname, after = $1, $2, $3, $4, $'
24
+ if slash_listname
25
+ # the link is a list
26
+ "#{before}#{yield(at, user, slash_listname)}"
27
+ else
28
+ if after =~ Twitter::Regex[:end_screen_name_match]
29
+ # Followed by something that means we don't autolink
30
+ "#{before}#{at}#{user}#{slash_listname}"
31
+ else
32
+ # this is a screen name
33
+ "#{before}#{yield(at, user, nil)}#{slash_listname}"
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
39
+
40
+ new_text
41
+ end
42
+
43
+ def rewrite_hashtags(text)
44
+ text.to_s.gsub(Twitter::Regex[:auto_link_hashtags]) do
45
+ before = $1
46
+ hash = $2
47
+ hashtag = $3
48
+ "#{before}#{yield(hash, hashtag)}"
49
+ end
50
+ end
51
+
52
+ def rewrite_urls(text)
53
+ text.to_s.gsub(Twitter::Regex[:valid_url]) do
54
+ all, before, url, protocol, domain, path, query_string = $1, $2, $3, $4, $5, $6, $7
55
+ if protocol && !protocol.empty?
56
+ "#{before}#{yield(url)}"
57
+ else
58
+ all
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end