twitter-text 2.0.2 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,63 +1,65 @@
1
1
  module Twitter
2
- # A module provides base methods to rewrite usernames, lists, hashtags and URLs.
3
- module Rewriter extend self
4
- def rewrite_entities(text, entities)
5
- chars = text.to_s.to_char_a
6
-
7
- # sort by start index
8
- entities = entities.sort_by do |entity|
9
- indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices]
10
- indices.first
11
- end
2
+ module TwitterText
3
+ # A module provides base methods to rewrite usernames, lists, hashtags and URLs.
4
+ module Rewriter extend self
5
+ def rewrite_entities(text, entities)
6
+ chars = text.to_s.to_char_a
12
7
 
13
- result = []
14
- last_index = entities.inject(0) do |index, entity|
15
- indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices]
16
- result << chars[index...indices.first]
17
- result << yield(entity, chars)
18
- indices.last
19
- end
20
- result << chars[last_index..-1]
8
+ # sort by start index
9
+ entities = entities.sort_by do |entity|
10
+ indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices]
11
+ indices.first
12
+ end
21
13
 
22
- result.flatten.join
23
- end
14
+ result = []
15
+ last_index = entities.inject(0) do |index, entity|
16
+ indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices]
17
+ result << chars[index...indices.first]
18
+ result << yield(entity, chars)
19
+ indices.last
20
+ end
21
+ result << chars[last_index..-1]
22
+
23
+ result.flatten.join
24
+ end
24
25
 
25
- # These methods are deprecated, will be removed in future.
26
- extend Deprecation
26
+ # These methods are deprecated, will be removed in future.
27
+ extend Deprecation
27
28
 
28
- def rewrite(text, options = {})
29
- [:hashtags, :urls, :usernames_or_lists].inject(text) do |key|
30
- options[key] ? send(:"rewrite_#{key}", text, &options[key]) : text
29
+ def rewrite(text, options = {})
30
+ [:hashtags, :urls, :usernames_or_lists].inject(text) do |key|
31
+ options[key] ? send(:"rewrite_#{key}", text, &options[key]) : text
32
+ end
31
33
  end
32
- end
33
- deprecate :rewrite, :rewrite_entities
34
-
35
- def rewrite_usernames_or_lists(text)
36
- entities = Extractor.extract_mentions_or_lists_with_indices(text)
37
- rewrite_entities(text, entities) do |entity, chars|
38
- at = chars[entity[:indices].first]
39
- list_slug = entity[:list_slug]
40
- list_slug = nil if list_slug.empty?
41
- yield(at, entity[:screen_name], list_slug)
34
+ deprecate :rewrite, :rewrite_entities
35
+
36
+ def rewrite_usernames_or_lists(text)
37
+ entities = Extractor.extract_mentions_or_lists_with_indices(text)
38
+ rewrite_entities(text, entities) do |entity, chars|
39
+ at = chars[entity[:indices].first]
40
+ list_slug = entity[:list_slug]
41
+ list_slug = nil if list_slug.empty?
42
+ yield(at, entity[:screen_name], list_slug)
43
+ end
42
44
  end
43
- end
44
- deprecate :rewrite_usernames_or_lists, :rewrite_entities
45
+ deprecate :rewrite_usernames_or_lists, :rewrite_entities
45
46
 
46
- def rewrite_hashtags(text)
47
- entities = Extractor.extract_hashtags_with_indices(text)
48
- rewrite_entities(text, entities) do |entity, chars|
49
- hash = chars[entity[:indices].first]
50
- yield(hash, entity[:hashtag])
47
+ def rewrite_hashtags(text)
48
+ entities = Extractor.extract_hashtags_with_indices(text)
49
+ rewrite_entities(text, entities) do |entity, chars|
50
+ hash = chars[entity[:indices].first]
51
+ yield(hash, entity[:hashtag])
52
+ end
51
53
  end
52
- end
53
- deprecate :rewrite_hashtags, :rewrite_entities
54
+ deprecate :rewrite_hashtags, :rewrite_entities
54
55
 
55
- def rewrite_urls(text)
56
- entities = Extractor.extract_urls_with_indices(text, :extract_url_without_protocol => false)
57
- rewrite_entities(text, entities) do |entity, chars|
58
- yield(entity[:url])
56
+ def rewrite_urls(text)
57
+ entities = Extractor.extract_urls_with_indices(text, :extract_url_without_protocol => false)
58
+ rewrite_entities(text, entities) do |entity, chars|
59
+ yield(entity[:url])
60
+ end
59
61
  end
62
+ deprecate :rewrite_urls, :rewrite_entities
60
63
  end
61
- deprecate :rewrite_urls, :rewrite_entities
62
64
  end
63
65
  end
@@ -1,26 +1,27 @@
1
1
  module Twitter
2
- # This module lazily defines constants of the form Uxxxx for all Unicode
3
- # codepoints from U0000 to U10FFFF. The value of each constant is the
4
- # UTF-8 string for the codepoint.
5
- # Examples:
6
- # copyright = Unicode::U00A9
7
- # euro = Unicode::U20AC
8
- # infinity = Unicode::U221E
9
- #
10
- module Unicode
11
- CODEPOINT_REGEX = /^U_?([0-9a-fA-F]{4,5}|10[0-9a-fA-F]{4})$/
2
+ module TwitterText
3
+ # This module lazily defines constants of the form Uxxxx for all Unicode
4
+ # codepoints from U0000 to U10FFFF. The value of each constant is the
5
+ # UTF-8 string for the codepoint.
6
+ # Examples:
7
+ # copyright = Unicode::U00A9
8
+ # euro = Unicode::U20AC
9
+ # infinity = Unicode::U221E
10
+ #
11
+ module Unicode
12
+ CODEPOINT_REGEX = /^U_?([0-9a-fA-F]{4,5}|10[0-9a-fA-F]{4})$/
12
13
 
13
- def self.const_missing(name)
14
- # Check that the constant name is of the right form: U0000 to U10FFFF
15
- if name.to_s =~ CODEPOINT_REGEX
16
- # Convert the codepoint to an immutable UTF-8 string,
17
- # define a real constant for that value and return the value
18
- #p name, name.class
19
- const_set(name, [$1.to_i(16)].pack("U").freeze)
20
- else # Raise an error for constants that are not Unicode.
21
- raise NameError, "Uninitialized constant: Unicode::#{name}"
14
+ def self.const_missing(name)
15
+ # Check that the constant name is of the right form: U0000 to U10FFFF
16
+ if name.to_s =~ CODEPOINT_REGEX
17
+ # Convert the codepoint to an immutable UTF-8 string,
18
+ # define a real constant for that value and return the value
19
+ #p name, name.class
20
+ const_set(name, [$1.to_i(16)].pack("U").freeze)
21
+ else # Raise an error for constants that are not Unicode.
22
+ raise NameError, "Uninitialized constant: Unicode::#{name}"
23
+ end
22
24
  end
23
25
  end
24
26
  end
25
-
26
27
  end
@@ -1,225 +1,227 @@
1
1
  require 'unf'
2
2
 
3
3
  module Twitter
4
- module Validation extend self
5
- DEFAULT_TCO_URL_LENGTHS = {
6
- :short_url_length => 23,
7
- }
8
-
9
- # :weighted_length the weighted length of tweet based on weights specified in the config
10
- # :valid If tweet is valid
11
- # :permillage permillage of the tweet over the max length specified in config
12
- # :valid_range_start beginning of valid text
13
- # :valid_range_end End index of valid part of the tweet text (inclusive)
14
- # :display_range_start beginning index of display text
15
- # :display_range_end end index of display text (inclusive)
16
- class ParseResults < Hash
17
-
18
- RESULT_PARAMS = [:weighted_length, :valid, :permillage, :valid_range_start, :valid_range_end, :display_range_start, :display_range_end]
19
-
20
- def self.empty
21
- return ParseResults.new(weighted_length: 0, permillage: 0, valid: true, display_range_start: 0, display_range_end: 0, valid_range_start: 0, valid_range_end: 0)
22
- end
4
+ module TwitterText
5
+ module Validation extend self
6
+ DEFAULT_TCO_URL_LENGTHS = {
7
+ :short_url_length => 23,
8
+ }
9
+
10
+ # :weighted_length the weighted length of tweet based on weights specified in the config
11
+ # :valid If tweet is valid
12
+ # :permillage permillage of the tweet over the max length specified in config
13
+ # :valid_range_start beginning of valid text
14
+ # :valid_range_end End index of valid part of the tweet text (inclusive)
15
+ # :display_range_start beginning index of display text
16
+ # :display_range_end end index of display text (inclusive)
17
+ class ParseResults < Hash
18
+
19
+ RESULT_PARAMS = [:weighted_length, :valid, :permillage, :valid_range_start, :valid_range_end, :display_range_start, :display_range_end]
20
+
21
+ def self.empty
22
+ return ParseResults.new(weighted_length: 0, permillage: 0, valid: true, display_range_start: 0, display_range_end: 0, valid_range_start: 0, valid_range_end: 0)
23
+ end
23
24
 
24
- def initialize(params = {})
25
- RESULT_PARAMS.each do |key|
26
- super[key] = params[key] if params.key?(key)
25
+ def initialize(params = {})
26
+ RESULT_PARAMS.each do |key|
27
+ super[key] = params[key] if params.key?(key)
28
+ end
27
29
  end
28
30
  end
29
- end
30
31
 
31
- # Parse input text and return hash with descriptive parameters populated.
32
- def parse_tweet(text, options = {})
33
- options = DEFAULT_TCO_URL_LENGTHS.merge(options)
34
- config = options[:config] || Twitter::Configuration.default_configuration
35
- normalized_text = text.to_nfc
36
- normalized_text_length = normalized_text.char_length
37
- unless (normalized_text_length > 0)
38
- ParseResults.empty()
39
- end
32
+ # Parse input text and return hash with descriptive parameters populated.
33
+ def parse_tweet(text, options = {})
34
+ options = DEFAULT_TCO_URL_LENGTHS.merge(options)
35
+ config = options[:config] || Twitter::TwitterText::Configuration.default_configuration
36
+ normalized_text = text.to_nfc
37
+ normalized_text_length = normalized_text.char_length
38
+ unless (normalized_text_length > 0)
39
+ ParseResults.empty()
40
+ end
40
41
 
41
- scale = config.scale
42
- max_weighted_tweet_length = config.max_weighted_tweet_length
43
- scaled_max_weighted_tweet_length = max_weighted_tweet_length * scale
44
- transformed_url_length = config.transformed_url_length * scale
45
- ranges = config.ranges
46
-
47
- url_entities = Twitter::Extractor.extract_urls_with_indices(normalized_text)
48
-
49
- has_invalid_chars = false
50
- weighted_count = 0
51
- offset = 0
52
- display_offset = 0
53
- valid_offset = 0
54
-
55
- while offset < normalized_text_length
56
- # Reset the default char weight each pass through the loop
57
- char_weight = config.default_weight
58
- url_entities.each do |url_entity|
59
- if url_entity[:indices].first == offset
60
- url_length = url_entity[:indices].last - url_entity[:indices].first
61
- weighted_count += transformed_url_length
62
- offset += url_length
63
- display_offset += url_length
64
- if weighted_count <= scaled_max_weighted_tweet_length
65
- valid_offset += url_length
42
+ scale = config.scale
43
+ max_weighted_tweet_length = config.max_weighted_tweet_length
44
+ scaled_max_weighted_tweet_length = max_weighted_tweet_length * scale
45
+ transformed_url_length = config.transformed_url_length * scale
46
+ ranges = config.ranges
47
+
48
+ url_entities = Twitter::TwitterText::Extractor.extract_urls_with_indices(normalized_text)
49
+
50
+ has_invalid_chars = false
51
+ weighted_count = 0
52
+ offset = 0
53
+ display_offset = 0
54
+ valid_offset = 0
55
+
56
+ while offset < normalized_text_length
57
+ # Reset the default char weight each pass through the loop
58
+ char_weight = config.default_weight
59
+ url_entities.each do |url_entity|
60
+ if url_entity[:indices].first == offset
61
+ url_length = url_entity[:indices].last - url_entity[:indices].first
62
+ weighted_count += transformed_url_length
63
+ offset += url_length
64
+ display_offset += url_length
65
+ if weighted_count <= scaled_max_weighted_tweet_length
66
+ valid_offset += url_length
67
+ end
68
+ # Finding a match breaks the loop; order of ranges matters.
69
+ break
66
70
  end
67
- # Finding a match breaks the loop; order of ranges matters.
68
- break
69
71
  end
70
- end
71
72
 
72
- if offset < normalized_text_length
73
- code_point = normalized_text[offset]
73
+ if offset < normalized_text_length
74
+ code_point = normalized_text[offset]
74
75
 
75
- ranges.each do |range|
76
- if range.contains?(code_point.unpack("U").first)
77
- char_weight = range.weight
78
- break
76
+ ranges.each do |range|
77
+ if range.contains?(code_point.unpack("U").first)
78
+ char_weight = range.weight
79
+ break
80
+ end
79
81
  end
80
- end
81
82
 
82
- weighted_count += char_weight
83
+ weighted_count += char_weight
83
84
 
84
- has_invalid_chars = contains_invalid?(normalized_text[offset]) unless has_invalid_chars
85
- char_count = code_point.char_length
86
- offset += char_count
87
- display_offset += char_count
85
+ has_invalid_chars = contains_invalid?(normalized_text[offset]) unless has_invalid_chars
86
+ char_count = code_point.char_length
87
+ offset += char_count
88
+ display_offset += char_count
88
89
 
89
- if !has_invalid_chars && (weighted_count <= scaled_max_weighted_tweet_length)
90
- valid_offset += char_count
90
+ if !has_invalid_chars && (weighted_count <= scaled_max_weighted_tweet_length)
91
+ valid_offset += char_count
92
+ end
91
93
  end
92
94
  end
93
- end
94
- normalized_text_offset = text.char_length - normalized_text.char_length
95
- scaled_weighted_length = weighted_count / scale
96
- is_valid = !has_invalid_chars && (scaled_weighted_length <= max_weighted_tweet_length)
97
- permillage = scaled_weighted_length * 1000 / max_weighted_tweet_length
95
+ normalized_text_offset = text.char_length - normalized_text.char_length
96
+ scaled_weighted_length = weighted_count / scale
97
+ is_valid = !has_invalid_chars && (scaled_weighted_length <= max_weighted_tweet_length)
98
+ permillage = scaled_weighted_length * 1000 / max_weighted_tweet_length
98
99
 
99
- return ParseResults.new(weighted_length: scaled_weighted_length, permillage: permillage, valid: is_valid, display_range_start: 0, display_range_end: (display_offset + normalized_text_offset - 1), valid_range_start: 0, valid_range_end: (valid_offset + normalized_text_offset - 1))
100
- end
100
+ return ParseResults.new(weighted_length: scaled_weighted_length, permillage: permillage, valid: is_valid, display_range_start: 0, display_range_end: (display_offset + normalized_text_offset - 1), valid_range_start: 0, valid_range_end: (valid_offset + normalized_text_offset - 1))
101
+ end
101
102
 
102
- def contains_invalid?(text)
103
- return false if !text || text.empty?
104
- begin
105
- return true if Twitter::Regex::INVALID_CHARACTERS.any?{|invalid_char| text.include?(invalid_char) }
106
- rescue ArgumentError
107
- # non-Unicode value.
108
- return true
103
+ def contains_invalid?(text)
104
+ return false if !text || text.empty?
105
+ begin
106
+ return true if Twitter::TwitterText::Regex::INVALID_CHARACTERS.any?{|invalid_char| text.include?(invalid_char) }
107
+ rescue ArgumentError
108
+ # non-Unicode value.
109
+ return true
110
+ end
111
+ return false
109
112
  end
110
- return false
111
- end
112
113
 
113
- def valid_username?(username)
114
- return false if !username || username.empty?
114
+ def valid_username?(username)
115
+ return false if !username || username.empty?
115
116
 
116
- extracted = Twitter::Extractor.extract_mentioned_screen_names(username)
117
- # Should extract the username minus the @ sign, hence the [1..-1]
118
- extracted.size == 1 && extracted.first == username[1..-1]
119
- end
120
-
121
- VALID_LIST_RE = /\A#{Twitter::Regex[:valid_mention_or_list]}\z/o
122
- def valid_list?(username_list)
123
- match = username_list.match(VALID_LIST_RE)
124
- # Must have matched and had nothing before or after
125
- !!(match && match[1] == "" && match[4] && !match[4].empty?)
126
- end
117
+ extracted = Twitter::TwitterText::Extractor.extract_mentioned_screen_names(username)
118
+ # Should extract the username minus the @ sign, hence the [1..-1]
119
+ extracted.size == 1 && extracted.first == username[1..-1]
120
+ end
127
121
 
128
- def valid_hashtag?(hashtag)
129
- return false if !hashtag || hashtag.empty?
122
+ VALID_LIST_RE = /\A#{Twitter::TwitterText::Regex[:valid_mention_or_list]}\z/o
123
+ def valid_list?(username_list)
124
+ match = username_list.match(VALID_LIST_RE)
125
+ # Must have matched and had nothing before or after
126
+ !!(match && match[1] == "" && match[4] && !match[4].empty?)
127
+ end
130
128
 
131
- extracted = Twitter::Extractor.extract_hashtags(hashtag)
132
- # Should extract the hashtag minus the # sign, hence the [1..-1]
133
- extracted.size == 1 && extracted.first == hashtag[1..-1]
134
- end
129
+ def valid_hashtag?(hashtag)
130
+ return false if !hashtag || hashtag.empty?
135
131
 
136
- def valid_url?(url, unicode_domains=true, require_protocol=true)
137
- return false if !url || url.empty?
132
+ extracted = Twitter::TwitterText::Extractor.extract_hashtags(hashtag)
133
+ # Should extract the hashtag minus the # sign, hence the [1..-1]
134
+ extracted.size == 1 && extracted.first == hashtag[1..-1]
135
+ end
138
136
 
139
- url_parts = url.match(Twitter::Regex[:validate_url_unencoded])
140
- return false unless (url_parts && url_parts.to_s == url)
137
+ def valid_url?(url, unicode_domains=true, require_protocol=true)
138
+ return false if !url || url.empty?
141
139
 
142
- scheme, authority, path, query, fragment = url_parts.captures
140
+ url_parts = url.match(Twitter::TwitterText::Regex[:validate_url_unencoded])
141
+ return false unless (url_parts && url_parts.to_s == url)
143
142
 
144
- return false unless ((!require_protocol ||
145
- (valid_match?(scheme, Twitter::Regex[:validate_url_scheme]) && scheme.match(/\Ahttps?\Z/i))) &&
146
- valid_match?(path, Twitter::Regex[:validate_url_path]) &&
147
- valid_match?(query, Twitter::Regex[:validate_url_query], true) &&
148
- valid_match?(fragment, Twitter::Regex[:validate_url_fragment], true))
143
+ scheme, authority, path, query, fragment = url_parts.captures
149
144
 
150
- return (unicode_domains && valid_match?(authority, Twitter::Regex[:validate_url_unicode_authority])) ||
151
- (!unicode_domains && valid_match?(authority, Twitter::Regex[:validate_url_authority]))
152
- end
145
+ return false unless ((!require_protocol ||
146
+ (valid_match?(scheme, Twitter::TwitterText::Regex[:validate_url_scheme]) && scheme.match(/\Ahttps?\Z/i))) &&
147
+ valid_match?(path, Twitter::TwitterText::Regex[:validate_url_path]) &&
148
+ valid_match?(query, Twitter::TwitterText::Regex[:validate_url_query], true) &&
149
+ valid_match?(fragment, Twitter::TwitterText::Regex[:validate_url_fragment], true))
153
150
 
154
- # These methods are deprecated, will be removed in future.
155
- extend Deprecation
156
-
157
- MAX_LENGTH_LEGACY = 140
158
-
159
- # DEPRECATED: Please use parse_text instead.
160
- #
161
- # Returns the length of the string as it would be displayed. This is equivilent to the length of the Unicode NFC
162
- # (See: http://www.unicode.org/reports/tr15). This is needed in order to consistently calculate the length of a
163
- # string no matter which actual form was transmitted. For example:
164
- #
165
- # U+0065 Latin Small Letter E
166
- # + U+0301 Combining Acute Accent
167
- # ----------
168
- # = 2 bytes, 2 characters, displayed as é (1 visual glyph)
169
- # … The NFC of {U+0065, U+0301} is {U+00E9}, which is a single chracter and a +display_length+ of 1
170
- #
171
- # The string could also contain U+00E9 already, in which case the canonicalization will not change the value.
172
- #
173
- def tweet_length(text, options = {})
174
- options = DEFAULT_TCO_URL_LENGTHS.merge(options)
175
-
176
- length = text.to_nfc.unpack("U*").length
177
-
178
- Twitter::Extractor.extract_urls_with_indices(text) do |url, start_position, end_position|
179
- length += start_position - end_position
180
- length += options[:short_url_length] if url.length > 0
151
+ return (unicode_domains && valid_match?(authority, Twitter::TwitterText::Regex[:validate_url_unicode_authority])) ||
152
+ (!unicode_domains && valid_match?(authority, Twitter::TwitterText::Regex[:validate_url_authority]))
181
153
  end
182
154
 
183
- length
184
- end
185
- deprecate :tweet_length, :parse_tweet
186
-
187
- # DEPRECATED: Please use parse_text instead.
188
- #
189
- # Check the <tt>text</tt> for any reason that it may not be valid as a Tweet. This is meant as a pre-validation
190
- # before posting to api.twitter.com. There are several server-side reasons for Tweets to fail but this pre-validation
191
- # will allow quicker feedback.
192
- #
193
- # Returns <tt>false</tt> if this <tt>text</tt> is valid. Otherwise one of the following Symbols will be returned:
194
- #
195
- # <tt>:too_long</tt>:: if the <tt>text</tt> is too long
196
- # <tt>:empty</tt>:: if the <tt>text</tt> is nil or empty
197
- # <tt>:invalid_characters</tt>:: if the <tt>text</tt> contains non-Unicode or any of the disallowed Unicode characters
198
- def tweet_invalid?(text)
199
- return :empty if !text || text.empty?
200
- begin
201
- return :too_long if tweet_length(text) > MAX_LENGTH_LEGACY
202
- return :invalid_characters if Twitter::Regex::INVALID_CHARACTERS.any?{|invalid_char| text.include?(invalid_char) }
203
- rescue ArgumentError
204
- # non-Unicode value.
205
- return :invalid_characters
155
+ # These methods are deprecated, will be removed in future.
156
+ extend Deprecation
157
+
158
+ MAX_LENGTH_LEGACY = 140
159
+
160
+ # DEPRECATED: Please use parse_text instead.
161
+ #
162
+ # Returns the length of the string as it would be displayed. This is equivilent to the length of the Unicode NFC
163
+ # (See: http://www.unicode.org/reports/tr15). This is needed in order to consistently calculate the length of a
164
+ # string no matter which actual form was transmitted. For example:
165
+ #
166
+ # U+0065 Latin Small Letter E
167
+ # + U+0301 Combining Acute Accent
168
+ # ----------
169
+ # = 2 bytes, 2 characters, displayed as é (1 visual glyph)
170
+ # … The NFC of {U+0065, U+0301} is {U+00E9}, which is a single chracter and a +display_length+ of 1
171
+ #
172
+ # The string could also contain U+00E9 already, in which case the canonicalization will not change the value.
173
+ #
174
+ def tweet_length(text, options = {})
175
+ options = DEFAULT_TCO_URL_LENGTHS.merge(options)
176
+
177
+ length = text.to_nfc.unpack("U*").length
178
+
179
+ Twitter::TwitterText::Extractor.extract_urls_with_indices(text) do |url, start_position, end_position|
180
+ length += start_position - end_position
181
+ length += options[:short_url_length] if url.length > 0
182
+ end
183
+
184
+ length
206
185
  end
186
+ deprecate :tweet_length, :parse_tweet
187
+
188
+ # DEPRECATED: Please use parse_text instead.
189
+ #
190
+ # Check the <tt>text</tt> for any reason that it may not be valid as a Tweet. This is meant as a pre-validation
191
+ # before posting to api.twitter.com. There are several server-side reasons for Tweets to fail but this pre-validation
192
+ # will allow quicker feedback.
193
+ #
194
+ # Returns <tt>false</tt> if this <tt>text</tt> is valid. Otherwise one of the following Symbols will be returned:
195
+ #
196
+ # <tt>:too_long</tt>:: if the <tt>text</tt> is too long
197
+ # <tt>:empty</tt>:: if the <tt>text</tt> is nil or empty
198
+ # <tt>:invalid_characters</tt>:: if the <tt>text</tt> contains non-Unicode or any of the disallowed Unicode characters
199
+ def tweet_invalid?(text)
200
+ return :empty if !text || text.empty?
201
+ begin
202
+ return :too_long if tweet_length(text) > MAX_LENGTH_LEGACY
203
+ return :invalid_characters if Twitter::TwitterText::Regex::INVALID_CHARACTERS.any?{|invalid_char| text.include?(invalid_char) }
204
+ rescue ArgumentError
205
+ # non-Unicode value.
206
+ return :invalid_characters
207
+ end
207
208
 
208
- return false
209
- end
210
- deprecate :tweet_invalid?, :parse_tweet
209
+ return false
210
+ end
211
+ deprecate :tweet_invalid?, :parse_tweet
211
212
 
212
- def valid_tweet_text?(text)
213
- !tweet_invalid?(text)
214
- end
215
- deprecate :valid_tweet_text?, :parse_tweet
213
+ def valid_tweet_text?(text)
214
+ !tweet_invalid?(text)
215
+ end
216
+ deprecate :valid_tweet_text?, :parse_tweet
216
217
 
217
- private
218
+ private
218
219
 
219
- def valid_match?(string, regex, optional=false)
220
- return (string && string.match(regex) && $~.to_s == string) unless optional
220
+ def valid_match?(string, regex, optional=false)
221
+ return (string && string.match(regex) && $~.to_s == string) unless optional
221
222
 
222
- !(string && (!string.match(regex) || $~.to_s != string))
223
+ !(string && (!string.match(regex) || $~.to_s != string))
224
+ end
223
225
  end
224
226
  end
225
227
  end