twitter-text 1.5.0 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +1 -1
- data/lib/twitter-text/autolink.rb +7 -2
- data/lib/twitter-text/extractor.rb +4 -5
- data/lib/twitter-text/regex.rb +15 -6
- data/lib/twitter-text/validation.rb +3 -2
- data/spec/autolinking_spec.rb +7 -3
- data/spec/rewriter_spec.rb +3 -3
- data/test/conformance_test.rb +1 -1
- data/twitter-text.gemspec +3 -3
- metadata +8 -7
data/README.rdoc
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
== twitter-text {<img src="https://secure.travis-ci.org/twitter/twitter-text-rb.png" />}[http://travis-ci.org/twitter/twitter-text-rb]
|
1
|
+
== twitter-text {<img src="https://secure.travis-ci.org/twitter/twitter-text-rb.png" />}[http://travis-ci.org/twitter/twitter-text-rb] {<img src="https://codeclimate.com/badge.png" />}[https://codeclimate.com/github/twitter/twitter-text-rb]
|
2
2
|
|
3
3
|
A gem that provides text processing routines for Twitter Tweets. The major
|
4
4
|
reason for this is to unify the various auto-linking and extraction of
|
@@ -207,7 +207,7 @@ module Twitter
|
|
207
207
|
OPTIONS_NOT_ATTRIBUTES = Set.new([
|
208
208
|
:url_class, :list_class, :username_class, :hashtag_class, :cashtag_class,
|
209
209
|
:username_url_base, :list_url_base, :hashtag_url_base, :cashtag_url_base,
|
210
|
-
:username_url_block, :list_url_block, :hashtag_url_block, :link_url_block,
|
210
|
+
:username_url_block, :list_url_block, :hashtag_url_block, :cashtag_url_block, :link_url_block,
|
211
211
|
:username_include_symbol, :suppress_lists, :suppress_no_follow, :url_entities,
|
212
212
|
:invisible_tag_attrs, :symbol_tag, :text_with_symbol_tag, :url_target,
|
213
213
|
:link_attribute_block, :link_text_block
|
@@ -326,6 +326,11 @@ module Twitter
|
|
326
326
|
hash = chars[entity[:indices].first]
|
327
327
|
hashtag = entity[:hashtag]
|
328
328
|
hashtag = yield(hashtag) if block_given?
|
329
|
+
hashtag_class = options[:hashtag_class]
|
330
|
+
|
331
|
+
if hashtag.match Twitter::Regex::REGEXEN[:rtl_chars]
|
332
|
+
hashtag_class += ' rtl'
|
333
|
+
end
|
329
334
|
|
330
335
|
href = if options[:hashtag_url_block]
|
331
336
|
options[:hashtag_url_block].call(hashtag)
|
@@ -334,7 +339,7 @@ module Twitter
|
|
334
339
|
end
|
335
340
|
|
336
341
|
html_attrs = {
|
337
|
-
:class =>
|
342
|
+
:class => hashtag_class,
|
338
343
|
# FIXME As our conformance test, hash in title should be half-width,
|
339
344
|
# this should be bug of conformance data.
|
340
345
|
:title => "##{hashtag}"
|
@@ -311,13 +311,12 @@ module Twitter
|
|
311
311
|
return [] unless text =~ /\$/
|
312
312
|
|
313
313
|
tags = []
|
314
|
-
text.scan(Twitter::Regex[:valid_cashtag]) do |cash_text|
|
314
|
+
text.scan(Twitter::Regex[:valid_cashtag]) do |before, dollar, cash_text|
|
315
315
|
match_data = $~
|
316
|
-
|
317
|
-
|
318
|
-
end_position = match_data.char_end(1)
|
316
|
+
start_position = match_data.char_begin(2)
|
317
|
+
end_position = match_data.char_end(3)
|
319
318
|
tags << {
|
320
|
-
:cashtag => cash_text
|
319
|
+
:cashtag => cash_text,
|
321
320
|
:indices => [start_position, end_position]
|
322
321
|
}
|
323
322
|
end
|
data/lib/twitter-text/regex.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
module Twitter
|
4
4
|
# A collection of regular expressions for parsing Tweet text. The regular expression
|
5
|
-
# list is frozen at load time to ensure immutability. These
|
5
|
+
# list is frozen at load time to ensure immutability. These regular expressions are
|
6
6
|
# used throughout the <tt>Twitter</tt> classes. Special care has been taken to make
|
7
7
|
# sure these reular expressions work with Tweets in all languages.
|
8
8
|
class Regex
|
@@ -82,6 +82,14 @@ module Twitter
|
|
82
82
|
regex_range(0x1e00, 0x1eff)
|
83
83
|
].join('').freeze
|
84
84
|
|
85
|
+
RTL_CHARACTERS = [
|
86
|
+
regex_range(0x0600,0x06FF),
|
87
|
+
regex_range(0x0750,0x077F),
|
88
|
+
regex_range(0x0590,0x05FF),
|
89
|
+
regex_range(0xFE70,0xFEFF)
|
90
|
+
].join('').freeze
|
91
|
+
|
92
|
+
|
85
93
|
NON_LATIN_HASHTAG_CHARS = [
|
86
94
|
# Cyrillic (Russian, Ukrainian, etc.)
|
87
95
|
regex_range(0x0400, 0x04ff), # Cyrillic
|
@@ -212,7 +220,7 @@ module Twitter
|
|
212
220
|
|
213
221
|
REGEXEN[:valid_port_number] = /[0-9]+/
|
214
222
|
|
215
|
-
REGEXEN[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\,\.\$\/%#\[\]\-_
|
223
|
+
REGEXEN[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\,\.\$\/%#\[\]\-_~&|@#{LATIN_ACCENTS}]/io
|
216
224
|
# Allow URL paths to contain balanced parens
|
217
225
|
# 1. Used in Wikipedia URLs like /Primer_(film)
|
218
226
|
# 2. Used in IIS sessions like /S(dfd346)/
|
@@ -220,16 +228,15 @@ module Twitter
|
|
220
228
|
# Valid end-of-path chracters (so /foo. does not gobble the period).
|
221
229
|
# 1. Allow =&# for empty URL parameters and other URL-join artifacts
|
222
230
|
REGEXEN[:valid_url_path_ending_chars] = /[a-z0-9=_#\/\+\-#{LATIN_ACCENTS}]|(?:#{REGEXEN[:valid_url_balanced_parens]})/io
|
223
|
-
# Allow @ in a url, but only in the middle. Catch things like http://example.com/@user/
|
224
231
|
REGEXEN[:valid_url_path] = /(?:
|
225
232
|
(?:
|
226
233
|
#{REGEXEN[:valid_general_url_path_chars]}*
|
227
234
|
(?:#{REGEXEN[:valid_url_balanced_parens]} #{REGEXEN[:valid_general_url_path_chars]}*)*
|
228
235
|
#{REGEXEN[:valid_url_path_ending_chars]}
|
229
|
-
)|(
|
236
|
+
)|(?:#{REGEXEN[:valid_general_url_path_chars]}+\/)
|
230
237
|
)/iox
|
231
238
|
|
232
|
-
REGEXEN[:valid_url_query_chars] = /[a-z0-9!?\*'\(\);:&=\+\$\/%#\[\]\-_
|
239
|
+
REGEXEN[:valid_url_query_chars] = /[a-z0-9!?\*'\(\);:&=\+\$\/%#\[\]\-_\.,~|@]/i
|
233
240
|
REGEXEN[:valid_url_query_ending_chars] = /[a-z0-9_&=#\/]/i
|
234
241
|
REGEXEN[:valid_url] = %r{
|
235
242
|
( # $1 total match
|
@@ -245,7 +252,7 @@ module Twitter
|
|
245
252
|
}iox;
|
246
253
|
|
247
254
|
REGEXEN[:cashtag] = /[a-z]{1,6}(?:[._][a-z]{1,2})?/i
|
248
|
-
REGEXEN[:valid_cashtag] = /(
|
255
|
+
REGEXEN[:valid_cashtag] = /(^|#{REGEXEN[:spaces]})(\$)(#{REGEXEN[:cashtag]})(?=$|\s|[#{PUNCTUATION_CHARS}])/i
|
249
256
|
|
250
257
|
# These URL validation pattern strings are based on the ABNF from RFC 3986
|
251
258
|
REGEXEN[:validate_url_unreserved] = /[a-z0-9\-._~]/i
|
@@ -342,6 +349,8 @@ module Twitter
|
|
342
349
|
)?\Z
|
343
350
|
}ix
|
344
351
|
|
352
|
+
REGEXEN[:rtl_chars] = /[#{RTL_CHARACTERS}]/io
|
353
|
+
|
345
354
|
REGEXEN.each_pair{|k,v| v.freeze }
|
346
355
|
|
347
356
|
# Return the regular expression for a given <tt>key</tt>. If the <tt>key</tt>
|
@@ -3,8 +3,9 @@ module Twitter
|
|
3
3
|
MAX_LENGTH = 140
|
4
4
|
|
5
5
|
DEFAULT_TCO_URL_LENGTHS = {
|
6
|
-
:short_url_length =>
|
7
|
-
:short_url_length_https =>
|
6
|
+
:short_url_length => 22,
|
7
|
+
:short_url_length_https => 23,
|
8
|
+
:characters_reserved_per_media => 22
|
8
9
|
}.freeze
|
9
10
|
|
10
11
|
# Returns the length of the string as it would be displayed. This is equivilent to the length of the Unicode NFC
|
data/spec/autolinking_spec.rb
CHANGED
@@ -488,9 +488,8 @@ describe Twitter::Autolink do
|
|
488
488
|
context "with a username not followed by a /" do
|
489
489
|
def original_text; 'http://example.com/@foobar'; end
|
490
490
|
|
491
|
-
it "should link
|
492
|
-
@autolinked_text.should have_autolinked_url('http://example.com
|
493
|
-
@autolinked_text.should link_to_screen_name('foobar')
|
491
|
+
it "should link url" do
|
492
|
+
@autolinked_text.should have_autolinked_url('http://example.com/@foobar')
|
494
493
|
end
|
495
494
|
end
|
496
495
|
|
@@ -678,6 +677,11 @@ describe Twitter::Autolink do
|
|
678
677
|
linked.should have_autolinked_url('dummy', '#hashtag')
|
679
678
|
end
|
680
679
|
|
680
|
+
it "should customize href by cashtag_url_block option" do
|
681
|
+
linked = @linker.auto_link("$CASH", :cashtag_url_block => lambda{|a| "dummy"})
|
682
|
+
linked.should have_autolinked_url('dummy', '$CASH')
|
683
|
+
end
|
684
|
+
|
681
685
|
it "should customize href by link_url_block option" do
|
682
686
|
linked = @linker.auto_link("http://example.com/", :link_url_block => lambda{|a| "dummy"})
|
683
687
|
linked.should have_autolinked_url('dummy', 'http://example.com/')
|
data/spec/rewriter_spec.rb
CHANGED
@@ -527,9 +527,9 @@ describe Twitter::Rewriter do
|
|
527
527
|
context "with a username not followed by a /" do
|
528
528
|
def original_text; "http://example.com/@foobar"; end
|
529
529
|
|
530
|
-
it "should link
|
531
|
-
@block_args.should == ["http://example.com
|
532
|
-
@rewritten_text.should == "[rewritten]
|
530
|
+
it "should link url" do
|
531
|
+
@block_args.should == ["http://example.com/@foobar"]
|
532
|
+
@rewritten_text.should == "[rewritten]"
|
533
533
|
end
|
534
534
|
end
|
535
535
|
|
data/test/conformance_test.rb
CHANGED
@@ -33,7 +33,7 @@ class ConformanceTest < Test::Unit::TestCase
|
|
33
33
|
def equal_nodes?(expected, actual)
|
34
34
|
return false unless expected.name == actual.name
|
35
35
|
return false unless ordered_attributes(expected) == ordered_attributes(actual)
|
36
|
-
return false if expected.text? && actual.text? &&
|
36
|
+
return false if expected.text? && actual.text? && expected.content != actual.content
|
37
37
|
|
38
38
|
expected.children.each_with_index do |child, index|
|
39
39
|
return false unless equal_nodes?(child, actual.children[index])
|
data/twitter-text.gemspec
CHANGED
@@ -2,11 +2,11 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = "twitter-text"
|
5
|
-
s.version = "1.
|
5
|
+
s.version = "1.6.0"
|
6
6
|
s.authors = ["Matt Sanford", "Patrick Ewing", "Ben Cherry", "Britt Selvitelle",
|
7
|
-
"Raffi Krikorian", "J.P. Cummins", "Yoshimasa Niwa", "Keita Fujii"]
|
7
|
+
"Raffi Krikorian", "J.P. Cummins", "Yoshimasa Niwa", "Keita Fujii", "James Koval"]
|
8
8
|
s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com", "bcherry@gmail.com", "bs@brittspace.com",
|
9
|
-
"raffi@twitter.com", "jcummins@twitter.com", "niw@niw.at", "keita@twitter.com"]
|
9
|
+
"raffi@twitter.com", "jcummins@twitter.com", "niw@niw.at", "keita@twitter.com", "jkoval@twitter.com"]
|
10
10
|
s.homepage = "http://twitter.com"
|
11
11
|
s.description = s.summary = "A gem that provides text handling for Twitter"
|
12
12
|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitter-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 15
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
|
-
-
|
8
|
+
- 6
|
9
9
|
- 0
|
10
|
-
version: 1.
|
10
|
+
version: 1.6.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Matt Sanford
|
@@ -18,12 +18,12 @@ authors:
|
|
18
18
|
- J.P. Cummins
|
19
19
|
- Yoshimasa Niwa
|
20
20
|
- Keita Fujii
|
21
|
+
- James Koval
|
21
22
|
autorequire:
|
22
23
|
bindir: bin
|
23
24
|
cert_chain: []
|
24
25
|
|
25
|
-
date:
|
26
|
-
default_executable:
|
26
|
+
date: 2013-03-01 00:00:00 Z
|
27
27
|
dependencies:
|
28
28
|
- !ruby/object:Gem::Dependency
|
29
29
|
name: nokogiri
|
@@ -119,6 +119,7 @@ email:
|
|
119
119
|
- jcummins@twitter.com
|
120
120
|
- niw@niw.at
|
121
121
|
- keita@twitter.com
|
122
|
+
- jkoval@twitter.com
|
122
123
|
executables: []
|
123
124
|
|
124
125
|
extensions: []
|
@@ -159,7 +160,6 @@ files:
|
|
159
160
|
- spec/validation_spec.rb
|
160
161
|
- test/conformance_test.rb
|
161
162
|
- twitter-text.gemspec
|
162
|
-
has_rdoc: true
|
163
163
|
homepage: http://twitter.com
|
164
164
|
licenses: []
|
165
165
|
|
@@ -189,7 +189,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
189
189
|
requirements: []
|
190
190
|
|
191
191
|
rubyforge_project:
|
192
|
-
rubygems_version: 1.
|
192
|
+
rubygems_version: 1.8.24
|
193
193
|
signing_key:
|
194
194
|
specification_version: 3
|
195
195
|
summary: Twitter text handling library
|
@@ -205,3 +205,4 @@ test_files:
|
|
205
205
|
- spec/unicode_spec.rb
|
206
206
|
- spec/validation_spec.rb
|
207
207
|
- test/conformance_test.rb
|
208
|
+
has_rdoc: true
|