twitter-text-relative 1.6.2.pre.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ module Twitter
2
+ module Deprecation
3
+ def deprecate(method, new_method = nil)
4
+ deprecated_method = :"deprecated_#{method}"
5
+ message = "Deprecation: `#{method}` is deprecated."
6
+ message << " Please use `#{new_method}` instead." if new_method
7
+
8
+ alias_method(deprecated_method, method)
9
+ define_method method do |*args, &block|
10
+ warn message
11
+ send(deprecated_method, *args, &block)
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,328 @@
1
+ # encoding: UTF-8
2
+
3
+ class String
4
+ # Helper function to count the character length by first converting to an
5
+ # array. This is needed because with unicode strings, the return value
6
+ # of length may be incorrect
7
+ def char_length
8
+ if respond_to? :codepoints
9
+ length
10
+ else
11
+ chars.kind_of?(Enumerable) ? chars.to_a.size : chars.size
12
+ end
13
+ end
14
+
15
+ # Helper function to convert this string into an array of unicode characters.
16
+ def to_char_a
17
+ @to_char_a ||= if chars.kind_of?(Enumerable)
18
+ chars.to_a
19
+ else
20
+ char_array = []
21
+ 0.upto(char_length - 1) { |i| char_array << [chars.slice(i)].pack('U') }
22
+ char_array
23
+ end
24
+ end
25
+ end
26
+
27
+ # Helper functions to return character offsets instead of byte offsets.
28
+ class MatchData
29
+ def char_begin(n)
30
+ if string.respond_to? :codepoints
31
+ self.begin(n)
32
+ else
33
+ string[0, self.begin(n)].char_length
34
+ end
35
+ end
36
+
37
+ def char_end(n)
38
+ if string.respond_to? :codepoints
39
+ self.end(n)
40
+ else
41
+ string[0, self.end(n)].char_length
42
+ end
43
+ end
44
+ end
45
+
46
+ module Twitter
47
+ # A module for including Tweet parsing in a class. This module provides function for the extraction and processing
48
+ # of usernames, lists, URLs and hashtags.
49
+ module Extractor extend self
50
+ # Remove overlapping entities.
51
+ # This returns a new array with no overlapping entities.
52
+ def remove_overlapping_entities(entities)
53
+ # sort by start index
54
+ entities = entities.sort_by{|entity| entity[:indices].first}
55
+
56
+ # remove duplicates
57
+ prev = nil
58
+ entities.reject!{|entity| (prev && prev[:indices].last > entity[:indices].first) || (prev = entity) && false}
59
+ entities
60
+ end
61
+
62
+ # Extracts all usernames, lists, hashtags and URLs in the Tweet <tt>text</tt>
63
+ # along with the indices for where the entity ocurred
64
+ # If the <tt>text</tt> is <tt>nil</tt> or contains no entity an empty array
65
+ # will be returned.
66
+ #
67
+ # If a block is given then it will be called for each entity.
68
+ def extract_entities_with_indices(text, options = {}, &block)
69
+ # extract all entities
70
+ entities = extract_urls_with_indices(text, options) +
71
+ extract_hashtags_with_indices(text, :check_url_overlap => false) +
72
+ extract_mentions_or_lists_with_indices(text) +
73
+ extract_cashtags_with_indices(text)
74
+
75
+ return [] if entities.empty?
76
+
77
+ entities = remove_overlapping_entities(entities)
78
+
79
+ entities.each(&block) if block_given?
80
+ entities
81
+ end
82
+
83
+ # Extracts a list of all usernames mentioned in the Tweet <tt>text</tt>. If the
84
+ # <tt>text</tt> is <tt>nil</tt> or contains no username mentions an empty array
85
+ # will be returned.
86
+ #
87
+ # If a block is given then it will be called for each username.
88
+ def extract_mentioned_screen_names(text, &block) # :yields: username
89
+ screen_names = extract_mentioned_screen_names_with_indices(text).map{|m| m[:screen_name]}
90
+ screen_names.each(&block) if block_given?
91
+ screen_names
92
+ end
93
+
94
+ # Extracts a list of all usernames mentioned in the Tweet <tt>text</tt>
95
+ # along with the indices for where the mention ocurred. If the
96
+ # <tt>text</tt> is nil or contains no username mentions, an empty array
97
+ # will be returned.
98
+ #
99
+ # If a block is given, then it will be called with each username, the start
100
+ # index, and the end index in the <tt>text</tt>.
101
+ def extract_mentioned_screen_names_with_indices(text) # :yields: username, start, end
102
+ return [] unless text
103
+
104
+ possible_screen_names = []
105
+ extract_mentions_or_lists_with_indices(text) do |screen_name, list_slug, start_position, end_position|
106
+ next unless list_slug.empty?
107
+ possible_screen_names << {
108
+ :screen_name => screen_name,
109
+ :indices => [start_position, end_position]
110
+ }
111
+ end
112
+
113
+ if block_given?
114
+ possible_screen_names.each do |mention|
115
+ yield mention[:screen_name], mention[:indices].first, mention[:indices].last
116
+ end
117
+ end
118
+
119
+ possible_screen_names
120
+ end
121
+
122
+ # Extracts a list of all usernames or lists mentioned in the Tweet <tt>text</tt>
123
+ # along with the indices for where the mention ocurred. If the
124
+ # <tt>text</tt> is nil or contains no username or list mentions, an empty array
125
+ # will be returned.
126
+ #
127
+ # If a block is given, then it will be called with each username, list slug, the start
128
+ # index, and the end index in the <tt>text</tt>. The list_slug will be an empty stirng
129
+ # if this is a username mention.
130
+ def extract_mentions_or_lists_with_indices(text) # :yields: username, list_slug, start, end
131
+ return [] unless text =~ /[@@]/
132
+
133
+ possible_entries = []
134
+ text.to_s.scan(Twitter::Regex[:valid_mention_or_list]) do |before, at, screen_name, list_slug|
135
+ match_data = $~
136
+ after = $'
137
+ unless after =~ Twitter::Regex[:end_mention_match]
138
+ start_position = match_data.char_begin(3) - 1
139
+ end_position = match_data.char_end(list_slug.nil? ? 3 : 4)
140
+ possible_entries << {
141
+ :screen_name => screen_name,
142
+ :list_slug => list_slug || "",
143
+ :indices => [start_position, end_position]
144
+ }
145
+ end
146
+ end
147
+
148
+ if block_given?
149
+ possible_entries.each do |mention|
150
+ yield mention[:screen_name], mention[:list_slug], mention[:indices].first, mention[:indices].last
151
+ end
152
+ end
153
+
154
+ possible_entries
155
+ end
156
+
157
+ # Extracts the username username replied to in the Tweet <tt>text</tt>. If the
158
+ # <tt>text</tt> is <tt>nil</tt> or is not a reply nil will be returned.
159
+ #
160
+ # If a block is given then it will be called with the username replied to (if any)
161
+ def extract_reply_screen_name(text) # :yields: username
162
+ return nil unless text
163
+
164
+ possible_screen_name = text.match(Twitter::Regex[:valid_reply])
165
+ return unless possible_screen_name.respond_to?(:captures)
166
+ return if $' =~ Twitter::Regex[:end_mention_match]
167
+ screen_name = possible_screen_name.captures.first
168
+ yield screen_name if block_given?
169
+ screen_name
170
+ end
171
+
172
+ # Extracts a list of all URLs included in the Tweet <tt>text</tt>. If the
173
+ # <tt>text</tt> is <tt>nil</tt> or contains no URLs an empty array
174
+ # will be returned.
175
+ #
176
+ # If a block is given then it will be called for each URL.
177
+ def extract_urls(text, &block) # :yields: url
178
+ urls = extract_urls_with_indices(text).map{|u| u[:url]}
179
+ urls.each(&block) if block_given?
180
+ urls
181
+ end
182
+
183
+ # Extracts a list of all URLs included in the Tweet <tt>text</tt> along
184
+ # with the indices. If the <tt>text</tt> is <tt>nil</tt> or contains no
185
+ # URLs an empty array will be returned.
186
+ #
187
+ # If a block is given then it will be called for each URL.
188
+ def extract_urls_with_indices(text, options = {:extract_url_without_protocol => true}) # :yields: url, start, end
189
+ return [] unless text && (options[:extract_url_without_protocol] ? text.index(".") : text.index(":"))
190
+ urls = []
191
+ position = 0
192
+
193
+ text.to_s.scan(Twitter::Regex[:valid_url]) do |all, before, url, protocol, domain, port, path, query|
194
+ valid_url_match_data = $~
195
+
196
+ start_position = valid_url_match_data.char_begin(3)
197
+ end_position = valid_url_match_data.char_end(3)
198
+
199
+ # If protocol is missing and domain contains non-ASCII characters,
200
+ # extract ASCII-only domains.
201
+ if !protocol
202
+ next if !options[:extract_url_without_protocol] || before =~ Twitter::Regex[:invalid_url_without_protocol_preceding_chars]
203
+ last_url = nil
204
+ last_url_invalid_match = nil
205
+ domain.scan(Twitter::Regex[:valid_ascii_domain]) do |ascii_domain|
206
+ last_url = {
207
+ :url => ascii_domain,
208
+ :indices => [start_position + $~.char_begin(0),
209
+ start_position + $~.char_end(0)]
210
+ }
211
+ last_url_invalid_match = ascii_domain =~ Twitter::Regex[:invalid_short_domain]
212
+ urls << last_url unless last_url_invalid_match
213
+ end
214
+
215
+ # no ASCII-only domain found. Skip the entire URL
216
+ next unless last_url
217
+
218
+ # last_url only contains domain. Need to add path and query if they exist.
219
+ if path
220
+ # last_url was not added. Add it to urls here.
221
+ urls << last_url if last_url_invalid_match
222
+ last_url[:url] = url.sub(domain, last_url[:url])
223
+ last_url[:indices][1] = end_position
224
+ end
225
+ else
226
+ # In the case of t.co URLs, don't allow additional path characters
227
+ if url =~ Twitter::Regex[:valid_tco_url]
228
+ url = $&
229
+ end_position = start_position + url.char_length
230
+ end
231
+ urls << {
232
+ :url => url,
233
+ :indices => [start_position, end_position]
234
+ }
235
+ end
236
+ end
237
+ urls.each{|url| yield url[:url], url[:indices].first, url[:indices].last} if block_given?
238
+ urls
239
+ end
240
+
241
+ # Extracts a list of all hashtags included in the Tweet <tt>text</tt>. If the
242
+ # <tt>text</tt> is <tt>nil</tt> or contains no hashtags an empty array
243
+ # will be returned. The array returned will not include the leading <tt>#</tt>
244
+ # character.
245
+ #
246
+ # If a block is given then it will be called for each hashtag.
247
+ def extract_hashtags(text, &block) # :yields: hashtag_text
248
+ hashtags = extract_hashtags_with_indices(text).map{|h| h[:hashtag]}
249
+ hashtags.each(&block) if block_given?
250
+ hashtags
251
+ end
252
+
253
+ # Extracts a list of all hashtags included in the Tweet <tt>text</tt>. If the
254
+ # <tt>text</tt> is <tt>nil</tt> or contains no hashtags an empty array
255
+ # will be returned. The array returned will not include the leading <tt>#</tt>
256
+ # character.
257
+ #
258
+ # If a block is given then it will be called for each hashtag.
259
+ def extract_hashtags_with_indices(text, options = {:check_url_overlap => true}) # :yields: hashtag_text, start, end
260
+ return [] unless text =~ /[##]/
261
+
262
+ tags = []
263
+ text.scan(Twitter::Regex[:valid_hashtag]) do |before, hash, hash_text|
264
+ match_data = $~
265
+ start_position = match_data.char_begin(2)
266
+ end_position = match_data.char_end(3)
267
+ after = $'
268
+ unless after =~ Twitter::Regex[:end_hashtag_match]
269
+ tags << {
270
+ :hashtag => hash_text,
271
+ :indices => [start_position, end_position]
272
+ }
273
+ end
274
+ end
275
+
276
+ if options[:check_url_overlap]
277
+ # extract URLs
278
+ urls = extract_urls_with_indices(text)
279
+ unless urls.empty?
280
+ tags.concat(urls)
281
+ # remove duplicates
282
+ tags = remove_overlapping_entities(tags)
283
+ # remove URL entities
284
+ tags.reject!{|entity| !entity[:hashtag] }
285
+ end
286
+ end
287
+
288
+ tags.each{|tag| yield tag[:hashtag], tag[:indices].first, tag[:indices].last} if block_given?
289
+ tags
290
+ end
291
+
292
+ # Extracts a list of all cashtags included in the Tweet <tt>text</tt>. If the
293
+ # <tt>text</tt> is <tt>nil</tt> or contains no cashtags an empty array
294
+ # will be returned. The array returned will not include the leading <tt>$</tt>
295
+ # character.
296
+ #
297
+ # If a block is given then it will be called for each cashtag.
298
+ def extract_cashtags(text, &block) # :yields: cashtag_text
299
+ cashtags = extract_cashtags_with_indices(text).map{|h| h[:cashtag]}
300
+ cashtags.each(&block) if block_given?
301
+ cashtags
302
+ end
303
+
304
+ # Extracts a list of all cashtags included in the Tweet <tt>text</tt>. If the
305
+ # <tt>text</tt> is <tt>nil</tt> or contains no cashtags an empty array
306
+ # will be returned. The array returned will not include the leading <tt>$</tt>
307
+ # character.
308
+ #
309
+ # If a block is given then it will be called for each cashtag.
310
+ def extract_cashtags_with_indices(text) # :yields: cashtag_text, start, end
311
+ return [] unless text =~ /\$/
312
+
313
+ tags = []
314
+ text.scan(Twitter::Regex[:valid_cashtag]) do |before, dollar, cash_text|
315
+ match_data = $~
316
+ start_position = match_data.char_begin(2)
317
+ end_position = match_data.char_end(3)
318
+ tags << {
319
+ :cashtag => cash_text,
320
+ :indices => [start_position, end_position]
321
+ }
322
+ end
323
+
324
+ tags.each{|tag| yield tag[:cashtag], tag[:indices].first, tag[:indices].last} if block_given?
325
+ tags
326
+ end
327
+ end
328
+ end
@@ -0,0 +1,21 @@
1
+ module Twitter
2
+ module HashHelper
3
+ # Return a new hash with all keys converted to symbols, as long as
4
+ # they respond to +to_sym+.
5
+ #
6
+ # { 'name' => 'Rob', 'years' => '28' }.symbolize_keys
7
+ # #=> { :name => "Rob", :years => "28" }
8
+ def self.symbolize_keys(hash)
9
+ hash.dup.symbolize_keys!
10
+ end
11
+
12
+ # Destructively convert all keys to symbols, as long as they respond
13
+ # to +to_sym+. Same as +symbolize_keys+, but modifies +self+.
14
+ def self.symbolize_keys!(hash)
15
+ hash.keys.each do |key|
16
+ hash[(key.to_sym rescue key) || key] = hash.delete(key)
17
+ end
18
+ hash
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,86 @@
1
+ module Twitter
2
+ # Module for doing "hit highlighting" on tweets that have been auto-linked already.
3
+ # Useful with the results returned from the Search API.
4
+ module HitHighlighter extend self
5
+ # Default Tag used for hit highlighting
6
+ DEFAULT_HIGHLIGHT_TAG = "em"
7
+
8
+ # Add <tt><em></em></tt> tags around the <tt>hits</tt> provided in the <tt>text</tt>. The
9
+ # <tt>hits</tt> should be an array of (start, end) index pairs, relative to the original
10
+ # text, before auto-linking (but the <tt>text</tt> may already be auto-linked if desired)
11
+ #
12
+ # The <tt><em></em></tt> tags can be overridden using the <tt>:tag</tt> option. For example:
13
+ #
14
+ # irb> hit_highlight("test hit here", [[5, 8]], :tag => 'strong')
15
+ # => "test <strong>hit</strong> here"
16
+ def hit_highlight(text, hits = [], options = {})
17
+ if hits.empty?
18
+ return text
19
+ end
20
+
21
+ tag_name = options[:tag] || DEFAULT_HIGHLIGHT_TAG
22
+ tags = ["<" + tag_name + ">", "</" + tag_name + ">"]
23
+
24
+ chunks = text.split(/[<>]/)
25
+
26
+ result = []
27
+ chunk_index, chunk = 0, chunks[0]
28
+ chunk_chars = chunk.to_s.to_char_a
29
+ prev_chunks_len = 0
30
+ chunk_cursor = 0
31
+ start_in_chunk = false
32
+ for hit, index in hits.flatten.each_with_index do
33
+ tag = tags[index % 2]
34
+
35
+ placed = false
36
+ until chunk.nil? || hit < prev_chunks_len + chunk.length do
37
+ result << chunk_chars[chunk_cursor..-1]
38
+ if start_in_chunk && hit == prev_chunks_len + chunk_chars.length
39
+ result << tag
40
+ placed = true
41
+ end
42
+
43
+ # correctly handle highlights that end on the final character.
44
+ if tag_text = chunks[chunk_index+1]
45
+ result << "<#{tag_text}>"
46
+ end
47
+
48
+ prev_chunks_len += chunk_chars.length
49
+ chunk_cursor = 0
50
+ chunk_index += 2
51
+ chunk = chunks[chunk_index]
52
+ chunk_chars = chunk.to_s.to_char_a
53
+ start_in_chunk = false
54
+ end
55
+
56
+ if !placed && !chunk.nil?
57
+ hit_spot = hit - prev_chunks_len
58
+ result << chunk_chars[chunk_cursor...hit_spot] << tag
59
+ chunk_cursor = hit_spot
60
+ if index % 2 == 0
61
+ start_in_chunk = true
62
+ else
63
+ start_in_chunk = false
64
+ end
65
+ placed = true
66
+ end
67
+
68
+ # ultimate fallback, hits that run off the end get a closing tag
69
+ if !placed
70
+ result << tag
71
+ end
72
+ end
73
+
74
+ if chunk
75
+ if chunk_cursor < chunk_chars.length
76
+ result << chunk_chars[chunk_cursor..-1]
77
+ end
78
+ (chunk_index+1).upto(chunks.length-1).each do |index|
79
+ result << (index.even? ? chunks[index] : "<#{chunks[index]}>")
80
+ end
81
+ end
82
+
83
+ result.flatten.join
84
+ end
85
+ end
86
+ end