twitter-text 1.5.0 → 1.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +1 -1
- data/lib/twitter-text/autolink.rb +7 -2
- data/lib/twitter-text/extractor.rb +4 -5
- data/lib/twitter-text/regex.rb +15 -6
- data/lib/twitter-text/validation.rb +3 -2
- data/spec/autolinking_spec.rb +7 -3
- data/spec/rewriter_spec.rb +3 -3
- data/test/conformance_test.rb +1 -1
- data/twitter-text.gemspec +3 -3
- metadata +8 -7
data/README.rdoc
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
== twitter-text {<img src="https://secure.travis-ci.org/twitter/twitter-text-rb.png" />}[http://travis-ci.org/twitter/twitter-text-rb]
|
1
|
+
== twitter-text {<img src="https://secure.travis-ci.org/twitter/twitter-text-rb.png" />}[http://travis-ci.org/twitter/twitter-text-rb] {<img src="https://codeclimate.com/badge.png" />}[https://codeclimate.com/github/twitter/twitter-text-rb]
|
2
2
|
|
3
3
|
A gem that provides text processing routines for Twitter Tweets. The major
|
4
4
|
reason for this is to unify the various auto-linking and extraction of
|
@@ -207,7 +207,7 @@ module Twitter
|
|
207
207
|
OPTIONS_NOT_ATTRIBUTES = Set.new([
|
208
208
|
:url_class, :list_class, :username_class, :hashtag_class, :cashtag_class,
|
209
209
|
:username_url_base, :list_url_base, :hashtag_url_base, :cashtag_url_base,
|
210
|
-
:username_url_block, :list_url_block, :hashtag_url_block, :link_url_block,
|
210
|
+
:username_url_block, :list_url_block, :hashtag_url_block, :cashtag_url_block, :link_url_block,
|
211
211
|
:username_include_symbol, :suppress_lists, :suppress_no_follow, :url_entities,
|
212
212
|
:invisible_tag_attrs, :symbol_tag, :text_with_symbol_tag, :url_target,
|
213
213
|
:link_attribute_block, :link_text_block
|
@@ -326,6 +326,11 @@ module Twitter
|
|
326
326
|
hash = chars[entity[:indices].first]
|
327
327
|
hashtag = entity[:hashtag]
|
328
328
|
hashtag = yield(hashtag) if block_given?
|
329
|
+
hashtag_class = options[:hashtag_class]
|
330
|
+
|
331
|
+
if hashtag.match Twitter::Regex::REGEXEN[:rtl_chars]
|
332
|
+
hashtag_class += ' rtl'
|
333
|
+
end
|
329
334
|
|
330
335
|
href = if options[:hashtag_url_block]
|
331
336
|
options[:hashtag_url_block].call(hashtag)
|
@@ -334,7 +339,7 @@ module Twitter
|
|
334
339
|
end
|
335
340
|
|
336
341
|
html_attrs = {
|
337
|
-
:class =>
|
342
|
+
:class => hashtag_class,
|
338
343
|
# FIXME As our conformance test, hash in title should be half-width,
|
339
344
|
# this should be bug of conformance data.
|
340
345
|
:title => "##{hashtag}"
|
@@ -311,13 +311,12 @@ module Twitter
|
|
311
311
|
return [] unless text =~ /\$/
|
312
312
|
|
313
313
|
tags = []
|
314
|
-
text.scan(Twitter::Regex[:valid_cashtag]) do |cash_text|
|
314
|
+
text.scan(Twitter::Regex[:valid_cashtag]) do |before, dollar, cash_text|
|
315
315
|
match_data = $~
|
316
|
-
|
317
|
-
|
318
|
-
end_position = match_data.char_end(1)
|
316
|
+
start_position = match_data.char_begin(2)
|
317
|
+
end_position = match_data.char_end(3)
|
319
318
|
tags << {
|
320
|
-
:cashtag => cash_text
|
319
|
+
:cashtag => cash_text,
|
321
320
|
:indices => [start_position, end_position]
|
322
321
|
}
|
323
322
|
end
|
data/lib/twitter-text/regex.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
module Twitter
|
4
4
|
# A collection of regular expressions for parsing Tweet text. The regular expression
|
5
|
-
# list is frozen at load time to ensure immutability. These
|
5
|
+
# list is frozen at load time to ensure immutability. These regular expressions are
|
6
6
|
# used throughout the <tt>Twitter</tt> classes. Special care has been taken to make
|
7
7
|
# sure these reular expressions work with Tweets in all languages.
|
8
8
|
class Regex
|
@@ -82,6 +82,14 @@ module Twitter
|
|
82
82
|
regex_range(0x1e00, 0x1eff)
|
83
83
|
].join('').freeze
|
84
84
|
|
85
|
+
RTL_CHARACTERS = [
|
86
|
+
regex_range(0x0600,0x06FF),
|
87
|
+
regex_range(0x0750,0x077F),
|
88
|
+
regex_range(0x0590,0x05FF),
|
89
|
+
regex_range(0xFE70,0xFEFF)
|
90
|
+
].join('').freeze
|
91
|
+
|
92
|
+
|
85
93
|
NON_LATIN_HASHTAG_CHARS = [
|
86
94
|
# Cyrillic (Russian, Ukrainian, etc.)
|
87
95
|
regex_range(0x0400, 0x04ff), # Cyrillic
|
@@ -212,7 +220,7 @@ module Twitter
|
|
212
220
|
|
213
221
|
REGEXEN[:valid_port_number] = /[0-9]+/
|
214
222
|
|
215
|
-
REGEXEN[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\,\.\$\/%#\[\]\-_
|
223
|
+
REGEXEN[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\,\.\$\/%#\[\]\-_~&|@#{LATIN_ACCENTS}]/io
|
216
224
|
# Allow URL paths to contain balanced parens
|
217
225
|
# 1. Used in Wikipedia URLs like /Primer_(film)
|
218
226
|
# 2. Used in IIS sessions like /S(dfd346)/
|
@@ -220,16 +228,15 @@ module Twitter
|
|
220
228
|
# Valid end-of-path chracters (so /foo. does not gobble the period).
|
221
229
|
# 1. Allow =&# for empty URL parameters and other URL-join artifacts
|
222
230
|
REGEXEN[:valid_url_path_ending_chars] = /[a-z0-9=_#\/\+\-#{LATIN_ACCENTS}]|(?:#{REGEXEN[:valid_url_balanced_parens]})/io
|
223
|
-
# Allow @ in a url, but only in the middle. Catch things like http://example.com/@user/
|
224
231
|
REGEXEN[:valid_url_path] = /(?:
|
225
232
|
(?:
|
226
233
|
#{REGEXEN[:valid_general_url_path_chars]}*
|
227
234
|
(?:#{REGEXEN[:valid_url_balanced_parens]} #{REGEXEN[:valid_general_url_path_chars]}*)*
|
228
235
|
#{REGEXEN[:valid_url_path_ending_chars]}
|
229
|
-
)|(
|
236
|
+
)|(?:#{REGEXEN[:valid_general_url_path_chars]}+\/)
|
230
237
|
)/iox
|
231
238
|
|
232
|
-
REGEXEN[:valid_url_query_chars] = /[a-z0-9!?\*'\(\);:&=\+\$\/%#\[\]\-_
|
239
|
+
REGEXEN[:valid_url_query_chars] = /[a-z0-9!?\*'\(\);:&=\+\$\/%#\[\]\-_\.,~|@]/i
|
233
240
|
REGEXEN[:valid_url_query_ending_chars] = /[a-z0-9_&=#\/]/i
|
234
241
|
REGEXEN[:valid_url] = %r{
|
235
242
|
( # $1 total match
|
@@ -245,7 +252,7 @@ module Twitter
|
|
245
252
|
}iox;
|
246
253
|
|
247
254
|
REGEXEN[:cashtag] = /[a-z]{1,6}(?:[._][a-z]{1,2})?/i
|
248
|
-
REGEXEN[:valid_cashtag] = /(
|
255
|
+
REGEXEN[:valid_cashtag] = /(^|#{REGEXEN[:spaces]})(\$)(#{REGEXEN[:cashtag]})(?=$|\s|[#{PUNCTUATION_CHARS}])/i
|
249
256
|
|
250
257
|
# These URL validation pattern strings are based on the ABNF from RFC 3986
|
251
258
|
REGEXEN[:validate_url_unreserved] = /[a-z0-9\-._~]/i
|
@@ -342,6 +349,8 @@ module Twitter
|
|
342
349
|
)?\Z
|
343
350
|
}ix
|
344
351
|
|
352
|
+
REGEXEN[:rtl_chars] = /[#{RTL_CHARACTERS}]/io
|
353
|
+
|
345
354
|
REGEXEN.each_pair{|k,v| v.freeze }
|
346
355
|
|
347
356
|
# Return the regular expression for a given <tt>key</tt>. If the <tt>key</tt>
|
@@ -3,8 +3,9 @@ module Twitter
|
|
3
3
|
MAX_LENGTH = 140
|
4
4
|
|
5
5
|
DEFAULT_TCO_URL_LENGTHS = {
|
6
|
-
:short_url_length =>
|
7
|
-
:short_url_length_https =>
|
6
|
+
:short_url_length => 22,
|
7
|
+
:short_url_length_https => 23,
|
8
|
+
:characters_reserved_per_media => 22
|
8
9
|
}.freeze
|
9
10
|
|
10
11
|
# Returns the length of the string as it would be displayed. This is equivilent to the length of the Unicode NFC
|
data/spec/autolinking_spec.rb
CHANGED
@@ -488,9 +488,8 @@ describe Twitter::Autolink do
|
|
488
488
|
context "with a username not followed by a /" do
|
489
489
|
def original_text; 'http://example.com/@foobar'; end
|
490
490
|
|
491
|
-
it "should link
|
492
|
-
@autolinked_text.should have_autolinked_url('http://example.com
|
493
|
-
@autolinked_text.should link_to_screen_name('foobar')
|
491
|
+
it "should link url" do
|
492
|
+
@autolinked_text.should have_autolinked_url('http://example.com/@foobar')
|
494
493
|
end
|
495
494
|
end
|
496
495
|
|
@@ -678,6 +677,11 @@ describe Twitter::Autolink do
|
|
678
677
|
linked.should have_autolinked_url('dummy', '#hashtag')
|
679
678
|
end
|
680
679
|
|
680
|
+
it "should customize href by cashtag_url_block option" do
|
681
|
+
linked = @linker.auto_link("$CASH", :cashtag_url_block => lambda{|a| "dummy"})
|
682
|
+
linked.should have_autolinked_url('dummy', '$CASH')
|
683
|
+
end
|
684
|
+
|
681
685
|
it "should customize href by link_url_block option" do
|
682
686
|
linked = @linker.auto_link("http://example.com/", :link_url_block => lambda{|a| "dummy"})
|
683
687
|
linked.should have_autolinked_url('dummy', 'http://example.com/')
|
data/spec/rewriter_spec.rb
CHANGED
@@ -527,9 +527,9 @@ describe Twitter::Rewriter do
|
|
527
527
|
context "with a username not followed by a /" do
|
528
528
|
def original_text; "http://example.com/@foobar"; end
|
529
529
|
|
530
|
-
it "should link
|
531
|
-
@block_args.should == ["http://example.com
|
532
|
-
@rewritten_text.should == "[rewritten]
|
530
|
+
it "should link url" do
|
531
|
+
@block_args.should == ["http://example.com/@foobar"]
|
532
|
+
@rewritten_text.should == "[rewritten]"
|
533
533
|
end
|
534
534
|
end
|
535
535
|
|
data/test/conformance_test.rb
CHANGED
@@ -33,7 +33,7 @@ class ConformanceTest < Test::Unit::TestCase
|
|
33
33
|
def equal_nodes?(expected, actual)
|
34
34
|
return false unless expected.name == actual.name
|
35
35
|
return false unless ordered_attributes(expected) == ordered_attributes(actual)
|
36
|
-
return false if expected.text? && actual.text? &&
|
36
|
+
return false if expected.text? && actual.text? && expected.content != actual.content
|
37
37
|
|
38
38
|
expected.children.each_with_index do |child, index|
|
39
39
|
return false unless equal_nodes?(child, actual.children[index])
|
data/twitter-text.gemspec
CHANGED
@@ -2,11 +2,11 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = "twitter-text"
|
5
|
-
s.version = "1.
|
5
|
+
s.version = "1.6.0"
|
6
6
|
s.authors = ["Matt Sanford", "Patrick Ewing", "Ben Cherry", "Britt Selvitelle",
|
7
|
-
"Raffi Krikorian", "J.P. Cummins", "Yoshimasa Niwa", "Keita Fujii"]
|
7
|
+
"Raffi Krikorian", "J.P. Cummins", "Yoshimasa Niwa", "Keita Fujii", "James Koval"]
|
8
8
|
s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com", "bcherry@gmail.com", "bs@brittspace.com",
|
9
|
-
"raffi@twitter.com", "jcummins@twitter.com", "niw@niw.at", "keita@twitter.com"]
|
9
|
+
"raffi@twitter.com", "jcummins@twitter.com", "niw@niw.at", "keita@twitter.com", "jkoval@twitter.com"]
|
10
10
|
s.homepage = "http://twitter.com"
|
11
11
|
s.description = s.summary = "A gem that provides text handling for Twitter"
|
12
12
|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitter-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 15
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
|
-
-
|
8
|
+
- 6
|
9
9
|
- 0
|
10
|
-
version: 1.
|
10
|
+
version: 1.6.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Matt Sanford
|
@@ -18,12 +18,12 @@ authors:
|
|
18
18
|
- J.P. Cummins
|
19
19
|
- Yoshimasa Niwa
|
20
20
|
- Keita Fujii
|
21
|
+
- James Koval
|
21
22
|
autorequire:
|
22
23
|
bindir: bin
|
23
24
|
cert_chain: []
|
24
25
|
|
25
|
-
date:
|
26
|
-
default_executable:
|
26
|
+
date: 2013-03-01 00:00:00 Z
|
27
27
|
dependencies:
|
28
28
|
- !ruby/object:Gem::Dependency
|
29
29
|
name: nokogiri
|
@@ -119,6 +119,7 @@ email:
|
|
119
119
|
- jcummins@twitter.com
|
120
120
|
- niw@niw.at
|
121
121
|
- keita@twitter.com
|
122
|
+
- jkoval@twitter.com
|
122
123
|
executables: []
|
123
124
|
|
124
125
|
extensions: []
|
@@ -159,7 +160,6 @@ files:
|
|
159
160
|
- spec/validation_spec.rb
|
160
161
|
- test/conformance_test.rb
|
161
162
|
- twitter-text.gemspec
|
162
|
-
has_rdoc: true
|
163
163
|
homepage: http://twitter.com
|
164
164
|
licenses: []
|
165
165
|
|
@@ -189,7 +189,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
189
189
|
requirements: []
|
190
190
|
|
191
191
|
rubyforge_project:
|
192
|
-
rubygems_version: 1.
|
192
|
+
rubygems_version: 1.8.24
|
193
193
|
signing_key:
|
194
194
|
specification_version: 3
|
195
195
|
summary: Twitter text handling library
|
@@ -205,3 +205,4 @@ test_files:
|
|
205
205
|
- spec/unicode_spec.rb
|
206
206
|
- spec/validation_spec.rb
|
207
207
|
- test/conformance_test.rb
|
208
|
+
has_rdoc: true
|