twitter-text 2.0.2 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,63 +1,65 @@
1
1
  module Twitter
2
- # A module provides base methods to rewrite usernames, lists, hashtags and URLs.
3
- module Rewriter extend self
4
- def rewrite_entities(text, entities)
5
- chars = text.to_s.to_char_a
6
-
7
- # sort by start index
8
- entities = entities.sort_by do |entity|
9
- indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices]
10
- indices.first
11
- end
2
+ module TwitterText
3
+ # A module provides base methods to rewrite usernames, lists, hashtags and URLs.
4
+ module Rewriter extend self
5
+ def rewrite_entities(text, entities)
6
+ chars = text.to_s.to_char_a
12
7
 
13
- result = []
14
- last_index = entities.inject(0) do |index, entity|
15
- indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices]
16
- result << chars[index...indices.first]
17
- result << yield(entity, chars)
18
- indices.last
19
- end
20
- result << chars[last_index..-1]
8
+ # sort by start index
9
+ entities = entities.sort_by do |entity|
10
+ indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices]
11
+ indices.first
12
+ end
21
13
 
22
- result.flatten.join
23
- end
14
+ result = []
15
+ last_index = entities.inject(0) do |index, entity|
16
+ indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices]
17
+ result << chars[index...indices.first]
18
+ result << yield(entity, chars)
19
+ indices.last
20
+ end
21
+ result << chars[last_index..-1]
22
+
23
+ result.flatten.join
24
+ end
24
25
 
25
- # These methods are deprecated, will be removed in future.
26
- extend Deprecation
26
+ # These methods are deprecated, will be removed in future.
27
+ extend Deprecation
27
28
 
28
- def rewrite(text, options = {})
29
- [:hashtags, :urls, :usernames_or_lists].inject(text) do |key|
30
- options[key] ? send(:"rewrite_#{key}", text, &options[key]) : text
29
+ def rewrite(text, options = {})
30
+ [:hashtags, :urls, :usernames_or_lists].inject(text) do |key|
31
+ options[key] ? send(:"rewrite_#{key}", text, &options[key]) : text
32
+ end
31
33
  end
32
- end
33
- deprecate :rewrite, :rewrite_entities
34
-
35
- def rewrite_usernames_or_lists(text)
36
- entities = Extractor.extract_mentions_or_lists_with_indices(text)
37
- rewrite_entities(text, entities) do |entity, chars|
38
- at = chars[entity[:indices].first]
39
- list_slug = entity[:list_slug]
40
- list_slug = nil if list_slug.empty?
41
- yield(at, entity[:screen_name], list_slug)
34
+ deprecate :rewrite, :rewrite_entities
35
+
36
+ def rewrite_usernames_or_lists(text)
37
+ entities = Extractor.extract_mentions_or_lists_with_indices(text)
38
+ rewrite_entities(text, entities) do |entity, chars|
39
+ at = chars[entity[:indices].first]
40
+ list_slug = entity[:list_slug]
41
+ list_slug = nil if list_slug.empty?
42
+ yield(at, entity[:screen_name], list_slug)
43
+ end
42
44
  end
43
- end
44
- deprecate :rewrite_usernames_or_lists, :rewrite_entities
45
+ deprecate :rewrite_usernames_or_lists, :rewrite_entities
45
46
 
46
- def rewrite_hashtags(text)
47
- entities = Extractor.extract_hashtags_with_indices(text)
48
- rewrite_entities(text, entities) do |entity, chars|
49
- hash = chars[entity[:indices].first]
50
- yield(hash, entity[:hashtag])
47
+ def rewrite_hashtags(text)
48
+ entities = Extractor.extract_hashtags_with_indices(text)
49
+ rewrite_entities(text, entities) do |entity, chars|
50
+ hash = chars[entity[:indices].first]
51
+ yield(hash, entity[:hashtag])
52
+ end
51
53
  end
52
- end
53
- deprecate :rewrite_hashtags, :rewrite_entities
54
+ deprecate :rewrite_hashtags, :rewrite_entities
54
55
 
55
- def rewrite_urls(text)
56
- entities = Extractor.extract_urls_with_indices(text, :extract_url_without_protocol => false)
57
- rewrite_entities(text, entities) do |entity, chars|
58
- yield(entity[:url])
56
+ def rewrite_urls(text)
57
+ entities = Extractor.extract_urls_with_indices(text, :extract_url_without_protocol => false)
58
+ rewrite_entities(text, entities) do |entity, chars|
59
+ yield(entity[:url])
60
+ end
59
61
  end
62
+ deprecate :rewrite_urls, :rewrite_entities
60
63
  end
61
- deprecate :rewrite_urls, :rewrite_entities
62
64
  end
63
65
  end
@@ -1,26 +1,27 @@
1
1
  module Twitter
2
- # This module lazily defines constants of the form Uxxxx for all Unicode
3
- # codepoints from U0000 to U10FFFF. The value of each constant is the
4
- # UTF-8 string for the codepoint.
5
- # Examples:
6
- # copyright = Unicode::U00A9
7
- # euro = Unicode::U20AC
8
- # infinity = Unicode::U221E
9
- #
10
- module Unicode
11
- CODEPOINT_REGEX = /^U_?([0-9a-fA-F]{4,5}|10[0-9a-fA-F]{4})$/
2
+ module TwitterText
3
+ # This module lazily defines constants of the form Uxxxx for all Unicode
4
+ # codepoints from U0000 to U10FFFF. The value of each constant is the
5
+ # UTF-8 string for the codepoint.
6
+ # Examples:
7
+ # copyright = Unicode::U00A9
8
+ # euro = Unicode::U20AC
9
+ # infinity = Unicode::U221E
10
+ #
11
+ module Unicode
12
+ CODEPOINT_REGEX = /^U_?([0-9a-fA-F]{4,5}|10[0-9a-fA-F]{4})$/
12
13
 
13
- def self.const_missing(name)
14
- # Check that the constant name is of the right form: U0000 to U10FFFF
15
- if name.to_s =~ CODEPOINT_REGEX
16
- # Convert the codepoint to an immutable UTF-8 string,
17
- # define a real constant for that value and return the value
18
- #p name, name.class
19
- const_set(name, [$1.to_i(16)].pack("U").freeze)
20
- else # Raise an error for constants that are not Unicode.
21
- raise NameError, "Uninitialized constant: Unicode::#{name}"
14
+ def self.const_missing(name)
15
+ # Check that the constant name is of the right form: U0000 to U10FFFF
16
+ if name.to_s =~ CODEPOINT_REGEX
17
+ # Convert the codepoint to an immutable UTF-8 string,
18
+ # define a real constant for that value and return the value
19
+ #p name, name.class
20
+ const_set(name, [$1.to_i(16)].pack("U").freeze)
21
+ else # Raise an error for constants that are not Unicode.
22
+ raise NameError, "Uninitialized constant: Unicode::#{name}"
23
+ end
22
24
  end
23
25
  end
24
26
  end
25
-
26
27
  end
@@ -1,225 +1,227 @@
1
1
  require 'unf'
2
2
 
3
3
  module Twitter
4
- module Validation extend self
5
- DEFAULT_TCO_URL_LENGTHS = {
6
- :short_url_length => 23,
7
- }
8
-
9
- # :weighted_length the weighted length of tweet based on weights specified in the config
10
- # :valid If tweet is valid
11
- # :permillage permillage of the tweet over the max length specified in config
12
- # :valid_range_start beginning of valid text
13
- # :valid_range_end End index of valid part of the tweet text (inclusive)
14
- # :display_range_start beginning index of display text
15
- # :display_range_end end index of display text (inclusive)
16
- class ParseResults < Hash
17
-
18
- RESULT_PARAMS = [:weighted_length, :valid, :permillage, :valid_range_start, :valid_range_end, :display_range_start, :display_range_end]
19
-
20
- def self.empty
21
- return ParseResults.new(weighted_length: 0, permillage: 0, valid: true, display_range_start: 0, display_range_end: 0, valid_range_start: 0, valid_range_end: 0)
22
- end
4
+ module TwitterText
5
+ module Validation extend self
6
+ DEFAULT_TCO_URL_LENGTHS = {
7
+ :short_url_length => 23,
8
+ }
9
+
10
+ # :weighted_length the weighted length of tweet based on weights specified in the config
11
+ # :valid If tweet is valid
12
+ # :permillage permillage of the tweet over the max length specified in config
13
+ # :valid_range_start beginning of valid text
14
+ # :valid_range_end End index of valid part of the tweet text (inclusive)
15
+ # :display_range_start beginning index of display text
16
+ # :display_range_end end index of display text (inclusive)
17
+ class ParseResults < Hash
18
+
19
+ RESULT_PARAMS = [:weighted_length, :valid, :permillage, :valid_range_start, :valid_range_end, :display_range_start, :display_range_end]
20
+
21
+ def self.empty
22
+ return ParseResults.new(weighted_length: 0, permillage: 0, valid: true, display_range_start: 0, display_range_end: 0, valid_range_start: 0, valid_range_end: 0)
23
+ end
23
24
 
24
- def initialize(params = {})
25
- RESULT_PARAMS.each do |key|
26
- super[key] = params[key] if params.key?(key)
25
+ def initialize(params = {})
26
+ RESULT_PARAMS.each do |key|
27
+ super[key] = params[key] if params.key?(key)
28
+ end
27
29
  end
28
30
  end
29
- end
30
31
 
31
- # Parse input text and return hash with descriptive parameters populated.
32
- def parse_tweet(text, options = {})
33
- options = DEFAULT_TCO_URL_LENGTHS.merge(options)
34
- config = options[:config] || Twitter::Configuration.default_configuration
35
- normalized_text = text.to_nfc
36
- normalized_text_length = normalized_text.char_length
37
- unless (normalized_text_length > 0)
38
- ParseResults.empty()
39
- end
32
+ # Parse input text and return hash with descriptive parameters populated.
33
+ def parse_tweet(text, options = {})
34
+ options = DEFAULT_TCO_URL_LENGTHS.merge(options)
35
+ config = options[:config] || Twitter::TwitterText::Configuration.default_configuration
36
+ normalized_text = text.to_nfc
37
+ normalized_text_length = normalized_text.char_length
38
+ unless (normalized_text_length > 0)
39
+ ParseResults.empty()
40
+ end
40
41
 
41
- scale = config.scale
42
- max_weighted_tweet_length = config.max_weighted_tweet_length
43
- scaled_max_weighted_tweet_length = max_weighted_tweet_length * scale
44
- transformed_url_length = config.transformed_url_length * scale
45
- ranges = config.ranges
46
-
47
- url_entities = Twitter::Extractor.extract_urls_with_indices(normalized_text)
48
-
49
- has_invalid_chars = false
50
- weighted_count = 0
51
- offset = 0
52
- display_offset = 0
53
- valid_offset = 0
54
-
55
- while offset < normalized_text_length
56
- # Reset the default char weight each pass through the loop
57
- char_weight = config.default_weight
58
- url_entities.each do |url_entity|
59
- if url_entity[:indices].first == offset
60
- url_length = url_entity[:indices].last - url_entity[:indices].first
61
- weighted_count += transformed_url_length
62
- offset += url_length
63
- display_offset += url_length
64
- if weighted_count <= scaled_max_weighted_tweet_length
65
- valid_offset += url_length
42
+ scale = config.scale
43
+ max_weighted_tweet_length = config.max_weighted_tweet_length
44
+ scaled_max_weighted_tweet_length = max_weighted_tweet_length * scale
45
+ transformed_url_length = config.transformed_url_length * scale
46
+ ranges = config.ranges
47
+
48
+ url_entities = Twitter::TwitterText::Extractor.extract_urls_with_indices(normalized_text)
49
+
50
+ has_invalid_chars = false
51
+ weighted_count = 0
52
+ offset = 0
53
+ display_offset = 0
54
+ valid_offset = 0
55
+
56
+ while offset < normalized_text_length
57
+ # Reset the default char weight each pass through the loop
58
+ char_weight = config.default_weight
59
+ url_entities.each do |url_entity|
60
+ if url_entity[:indices].first == offset
61
+ url_length = url_entity[:indices].last - url_entity[:indices].first
62
+ weighted_count += transformed_url_length
63
+ offset += url_length
64
+ display_offset += url_length
65
+ if weighted_count <= scaled_max_weighted_tweet_length
66
+ valid_offset += url_length
67
+ end
68
+ # Finding a match breaks the loop; order of ranges matters.
69
+ break
66
70
  end
67
- # Finding a match breaks the loop; order of ranges matters.
68
- break
69
71
  end
70
- end
71
72
 
72
- if offset < normalized_text_length
73
- code_point = normalized_text[offset]
73
+ if offset < normalized_text_length
74
+ code_point = normalized_text[offset]
74
75
 
75
- ranges.each do |range|
76
- if range.contains?(code_point.unpack("U").first)
77
- char_weight = range.weight
78
- break
76
+ ranges.each do |range|
77
+ if range.contains?(code_point.unpack("U").first)
78
+ char_weight = range.weight
79
+ break
80
+ end
79
81
  end
80
- end
81
82
 
82
- weighted_count += char_weight
83
+ weighted_count += char_weight
83
84
 
84
- has_invalid_chars = contains_invalid?(normalized_text[offset]) unless has_invalid_chars
85
- char_count = code_point.char_length
86
- offset += char_count
87
- display_offset += char_count
85
+ has_invalid_chars = contains_invalid?(normalized_text[offset]) unless has_invalid_chars
86
+ char_count = code_point.char_length
87
+ offset += char_count
88
+ display_offset += char_count
88
89
 
89
- if !has_invalid_chars && (weighted_count <= scaled_max_weighted_tweet_length)
90
- valid_offset += char_count
90
+ if !has_invalid_chars && (weighted_count <= scaled_max_weighted_tweet_length)
91
+ valid_offset += char_count
92
+ end
91
93
  end
92
94
  end
93
- end
94
- normalized_text_offset = text.char_length - normalized_text.char_length
95
- scaled_weighted_length = weighted_count / scale
96
- is_valid = !has_invalid_chars && (scaled_weighted_length <= max_weighted_tweet_length)
97
- permillage = scaled_weighted_length * 1000 / max_weighted_tweet_length
95
+ normalized_text_offset = text.char_length - normalized_text.char_length
96
+ scaled_weighted_length = weighted_count / scale
97
+ is_valid = !has_invalid_chars && (scaled_weighted_length <= max_weighted_tweet_length)
98
+ permillage = scaled_weighted_length * 1000 / max_weighted_tweet_length
98
99
 
99
- return ParseResults.new(weighted_length: scaled_weighted_length, permillage: permillage, valid: is_valid, display_range_start: 0, display_range_end: (display_offset + normalized_text_offset - 1), valid_range_start: 0, valid_range_end: (valid_offset + normalized_text_offset - 1))
100
- end
100
+ return ParseResults.new(weighted_length: scaled_weighted_length, permillage: permillage, valid: is_valid, display_range_start: 0, display_range_end: (display_offset + normalized_text_offset - 1), valid_range_start: 0, valid_range_end: (valid_offset + normalized_text_offset - 1))
101
+ end
101
102
 
102
- def contains_invalid?(text)
103
- return false if !text || text.empty?
104
- begin
105
- return true if Twitter::Regex::INVALID_CHARACTERS.any?{|invalid_char| text.include?(invalid_char) }
106
- rescue ArgumentError
107
- # non-Unicode value.
108
- return true
103
+ def contains_invalid?(text)
104
+ return false if !text || text.empty?
105
+ begin
106
+ return true if Twitter::TwitterText::Regex::INVALID_CHARACTERS.any?{|invalid_char| text.include?(invalid_char) }
107
+ rescue ArgumentError
108
+ # non-Unicode value.
109
+ return true
110
+ end
111
+ return false
109
112
  end
110
- return false
111
- end
112
113
 
113
- def valid_username?(username)
114
- return false if !username || username.empty?
114
+ def valid_username?(username)
115
+ return false if !username || username.empty?
115
116
 
116
- extracted = Twitter::Extractor.extract_mentioned_screen_names(username)
117
- # Should extract the username minus the @ sign, hence the [1..-1]
118
- extracted.size == 1 && extracted.first == username[1..-1]
119
- end
120
-
121
- VALID_LIST_RE = /\A#{Twitter::Regex[:valid_mention_or_list]}\z/o
122
- def valid_list?(username_list)
123
- match = username_list.match(VALID_LIST_RE)
124
- # Must have matched and had nothing before or after
125
- !!(match && match[1] == "" && match[4] && !match[4].empty?)
126
- end
117
+ extracted = Twitter::TwitterText::Extractor.extract_mentioned_screen_names(username)
118
+ # Should extract the username minus the @ sign, hence the [1..-1]
119
+ extracted.size == 1 && extracted.first == username[1..-1]
120
+ end
127
121
 
128
- def valid_hashtag?(hashtag)
129
- return false if !hashtag || hashtag.empty?
122
+ VALID_LIST_RE = /\A#{Twitter::TwitterText::Regex[:valid_mention_or_list]}\z/o
123
+ def valid_list?(username_list)
124
+ match = username_list.match(VALID_LIST_RE)
125
+ # Must have matched and had nothing before or after
126
+ !!(match && match[1] == "" && match[4] && !match[4].empty?)
127
+ end
130
128
 
131
- extracted = Twitter::Extractor.extract_hashtags(hashtag)
132
- # Should extract the hashtag minus the # sign, hence the [1..-1]
133
- extracted.size == 1 && extracted.first == hashtag[1..-1]
134
- end
129
+ def valid_hashtag?(hashtag)
130
+ return false if !hashtag || hashtag.empty?
135
131
 
136
- def valid_url?(url, unicode_domains=true, require_protocol=true)
137
- return false if !url || url.empty?
132
+ extracted = Twitter::TwitterText::Extractor.extract_hashtags(hashtag)
133
+ # Should extract the hashtag minus the # sign, hence the [1..-1]
134
+ extracted.size == 1 && extracted.first == hashtag[1..-1]
135
+ end
138
136
 
139
- url_parts = url.match(Twitter::Regex[:validate_url_unencoded])
140
- return false unless (url_parts && url_parts.to_s == url)
137
+ def valid_url?(url, unicode_domains=true, require_protocol=true)
138
+ return false if !url || url.empty?
141
139
 
142
- scheme, authority, path, query, fragment = url_parts.captures
140
+ url_parts = url.match(Twitter::TwitterText::Regex[:validate_url_unencoded])
141
+ return false unless (url_parts && url_parts.to_s == url)
143
142
 
144
- return false unless ((!require_protocol ||
145
- (valid_match?(scheme, Twitter::Regex[:validate_url_scheme]) && scheme.match(/\Ahttps?\Z/i))) &&
146
- valid_match?(path, Twitter::Regex[:validate_url_path]) &&
147
- valid_match?(query, Twitter::Regex[:validate_url_query], true) &&
148
- valid_match?(fragment, Twitter::Regex[:validate_url_fragment], true))
143
+ scheme, authority, path, query, fragment = url_parts.captures
149
144
 
150
- return (unicode_domains && valid_match?(authority, Twitter::Regex[:validate_url_unicode_authority])) ||
151
- (!unicode_domains && valid_match?(authority, Twitter::Regex[:validate_url_authority]))
152
- end
145
+ return false unless ((!require_protocol ||
146
+ (valid_match?(scheme, Twitter::TwitterText::Regex[:validate_url_scheme]) && scheme.match(/\Ahttps?\Z/i))) &&
147
+ valid_match?(path, Twitter::TwitterText::Regex[:validate_url_path]) &&
148
+ valid_match?(query, Twitter::TwitterText::Regex[:validate_url_query], true) &&
149
+ valid_match?(fragment, Twitter::TwitterText::Regex[:validate_url_fragment], true))
153
150
 
154
- # These methods are deprecated, will be removed in future.
155
- extend Deprecation
156
-
157
- MAX_LENGTH_LEGACY = 140
158
-
159
- # DEPRECATED: Please use parse_text instead.
160
- #
161
- # Returns the length of the string as it would be displayed. This is equivilent to the length of the Unicode NFC
162
- # (See: http://www.unicode.org/reports/tr15). This is needed in order to consistently calculate the length of a
163
- # string no matter which actual form was transmitted. For example:
164
- #
165
- # U+0065 Latin Small Letter E
166
- # + U+0301 Combining Acute Accent
167
- # ----------
168
- # = 2 bytes, 2 characters, displayed as é (1 visual glyph)
169
- # … The NFC of {U+0065, U+0301} is {U+00E9}, which is a single chracter and a +display_length+ of 1
170
- #
171
- # The string could also contain U+00E9 already, in which case the canonicalization will not change the value.
172
- #
173
- def tweet_length(text, options = {})
174
- options = DEFAULT_TCO_URL_LENGTHS.merge(options)
175
-
176
- length = text.to_nfc.unpack("U*").length
177
-
178
- Twitter::Extractor.extract_urls_with_indices(text) do |url, start_position, end_position|
179
- length += start_position - end_position
180
- length += options[:short_url_length] if url.length > 0
151
+ return (unicode_domains && valid_match?(authority, Twitter::TwitterText::Regex[:validate_url_unicode_authority])) ||
152
+ (!unicode_domains && valid_match?(authority, Twitter::TwitterText::Regex[:validate_url_authority]))
181
153
  end
182
154
 
183
- length
184
- end
185
- deprecate :tweet_length, :parse_tweet
186
-
187
- # DEPRECATED: Please use parse_text instead.
188
- #
189
- # Check the <tt>text</tt> for any reason that it may not be valid as a Tweet. This is meant as a pre-validation
190
- # before posting to api.twitter.com. There are several server-side reasons for Tweets to fail but this pre-validation
191
- # will allow quicker feedback.
192
- #
193
- # Returns <tt>false</tt> if this <tt>text</tt> is valid. Otherwise one of the following Symbols will be returned:
194
- #
195
- # <tt>:too_long</tt>:: if the <tt>text</tt> is too long
196
- # <tt>:empty</tt>:: if the <tt>text</tt> is nil or empty
197
- # <tt>:invalid_characters</tt>:: if the <tt>text</tt> contains non-Unicode or any of the disallowed Unicode characters
198
- def tweet_invalid?(text)
199
- return :empty if !text || text.empty?
200
- begin
201
- return :too_long if tweet_length(text) > MAX_LENGTH_LEGACY
202
- return :invalid_characters if Twitter::Regex::INVALID_CHARACTERS.any?{|invalid_char| text.include?(invalid_char) }
203
- rescue ArgumentError
204
- # non-Unicode value.
205
- return :invalid_characters
155
+ # These methods are deprecated, will be removed in future.
156
+ extend Deprecation
157
+
158
+ MAX_LENGTH_LEGACY = 140
159
+
160
+ # DEPRECATED: Please use parse_text instead.
161
+ #
162
+ # Returns the length of the string as it would be displayed. This is equivilent to the length of the Unicode NFC
163
+ # (See: http://www.unicode.org/reports/tr15). This is needed in order to consistently calculate the length of a
164
+ # string no matter which actual form was transmitted. For example:
165
+ #
166
+ # U+0065 Latin Small Letter E
167
+ # + U+0301 Combining Acute Accent
168
+ # ----------
169
+ # = 2 bytes, 2 characters, displayed as é (1 visual glyph)
170
+ # … The NFC of {U+0065, U+0301} is {U+00E9}, which is a single chracter and a +display_length+ of 1
171
+ #
172
+ # The string could also contain U+00E9 already, in which case the canonicalization will not change the value.
173
+ #
174
+ def tweet_length(text, options = {})
175
+ options = DEFAULT_TCO_URL_LENGTHS.merge(options)
176
+
177
+ length = text.to_nfc.unpack("U*").length
178
+
179
+ Twitter::TwitterText::Extractor.extract_urls_with_indices(text) do |url, start_position, end_position|
180
+ length += start_position - end_position
181
+ length += options[:short_url_length] if url.length > 0
182
+ end
183
+
184
+ length
206
185
  end
186
+ deprecate :tweet_length, :parse_tweet
187
+
188
+ # DEPRECATED: Please use parse_text instead.
189
+ #
190
+ # Check the <tt>text</tt> for any reason that it may not be valid as a Tweet. This is meant as a pre-validation
191
+ # before posting to api.twitter.com. There are several server-side reasons for Tweets to fail but this pre-validation
192
+ # will allow quicker feedback.
193
+ #
194
+ # Returns <tt>false</tt> if this <tt>text</tt> is valid. Otherwise one of the following Symbols will be returned:
195
+ #
196
+ # <tt>:too_long</tt>:: if the <tt>text</tt> is too long
197
+ # <tt>:empty</tt>:: if the <tt>text</tt> is nil or empty
198
+ # <tt>:invalid_characters</tt>:: if the <tt>text</tt> contains non-Unicode or any of the disallowed Unicode characters
199
+ def tweet_invalid?(text)
200
+ return :empty if !text || text.empty?
201
+ begin
202
+ return :too_long if tweet_length(text) > MAX_LENGTH_LEGACY
203
+ return :invalid_characters if Twitter::TwitterText::Regex::INVALID_CHARACTERS.any?{|invalid_char| text.include?(invalid_char) }
204
+ rescue ArgumentError
205
+ # non-Unicode value.
206
+ return :invalid_characters
207
+ end
207
208
 
208
- return false
209
- end
210
- deprecate :tweet_invalid?, :parse_tweet
209
+ return false
210
+ end
211
+ deprecate :tweet_invalid?, :parse_tweet
211
212
 
212
- def valid_tweet_text?(text)
213
- !tweet_invalid?(text)
214
- end
215
- deprecate :valid_tweet_text?, :parse_tweet
213
+ def valid_tweet_text?(text)
214
+ !tweet_invalid?(text)
215
+ end
216
+ deprecate :valid_tweet_text?, :parse_tweet
216
217
 
217
- private
218
+ private
218
219
 
219
- def valid_match?(string, regex, optional=false)
220
- return (string && string.match(regex) && $~.to_s == string) unless optional
220
+ def valid_match?(string, regex, optional=false)
221
+ return (string && string.match(regex) && $~.to_s == string) unless optional
221
222
 
222
- !(string && (!string.match(regex) || $~.to_s != string))
223
+ !(string && (!string.match(regex) || $~.to_s != string))
224
+ end
223
225
  end
224
226
  end
225
227
  end