twitter-text 1.5.0 → 1.6.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -1,4 +1,4 @@
1
- == twitter-text {<img src="https://secure.travis-ci.org/twitter/twitter-text-rb.png" />}[http://travis-ci.org/twitter/twitter-text-rb]
1
+ == twitter-text {<img src="https://secure.travis-ci.org/twitter/twitter-text-rb.png" />}[http://travis-ci.org/twitter/twitter-text-rb] {<img src="https://codeclimate.com/badge.png" />}[https://codeclimate.com/github/twitter/twitter-text-rb]
2
2
 
3
3
  A gem that provides text processing routines for Twitter Tweets. The major
4
4
  reason for this is to unify the various auto-linking and extraction of
@@ -207,7 +207,7 @@ module Twitter
207
207
  OPTIONS_NOT_ATTRIBUTES = Set.new([
208
208
  :url_class, :list_class, :username_class, :hashtag_class, :cashtag_class,
209
209
  :username_url_base, :list_url_base, :hashtag_url_base, :cashtag_url_base,
210
- :username_url_block, :list_url_block, :hashtag_url_block, :link_url_block,
210
+ :username_url_block, :list_url_block, :hashtag_url_block, :cashtag_url_block, :link_url_block,
211
211
  :username_include_symbol, :suppress_lists, :suppress_no_follow, :url_entities,
212
212
  :invisible_tag_attrs, :symbol_tag, :text_with_symbol_tag, :url_target,
213
213
  :link_attribute_block, :link_text_block
@@ -326,6 +326,11 @@ module Twitter
326
326
  hash = chars[entity[:indices].first]
327
327
  hashtag = entity[:hashtag]
328
328
  hashtag = yield(hashtag) if block_given?
329
+ hashtag_class = options[:hashtag_class]
330
+
331
+ if hashtag.match Twitter::Regex::REGEXEN[:rtl_chars]
332
+ hashtag_class += ' rtl'
333
+ end
329
334
 
330
335
  href = if options[:hashtag_url_block]
331
336
  options[:hashtag_url_block].call(hashtag)
@@ -334,7 +339,7 @@ module Twitter
334
339
  end
335
340
 
336
341
  html_attrs = {
337
- :class => "#{options[:hashtag_class]}",
342
+ :class => hashtag_class,
338
343
  # FIXME As our conformance test, hash in title should be half-width,
339
344
  # this should be bug of conformance data.
340
345
  :title => "##{hashtag}"
@@ -311,13 +311,12 @@ module Twitter
311
311
  return [] unless text =~ /\$/
312
312
 
313
313
  tags = []
314
- text.scan(Twitter::Regex[:valid_cashtag]) do |cash_text|
314
+ text.scan(Twitter::Regex[:valid_cashtag]) do |before, dollar, cash_text|
315
315
  match_data = $~
316
- # cash_text doesn't contain $ symbol, so need to decrement start_position by one
317
- start_position = match_data.char_begin(1) - 1
318
- end_position = match_data.char_end(1)
316
+ start_position = match_data.char_begin(2)
317
+ end_position = match_data.char_end(3)
319
318
  tags << {
320
- :cashtag => cash_text[0],
319
+ :cashtag => cash_text,
321
320
  :indices => [start_position, end_position]
322
321
  }
323
322
  end
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Twitter
4
4
  # A collection of regular expressions for parsing Tweet text. The regular expression
5
- # list is frozen at load time to ensure immutability. These reular expressions are
5
+ # list is frozen at load time to ensure immutability. These regular expressions are
6
6
  # used throughout the <tt>Twitter</tt> classes. Special care has been taken to make
7
7
  # sure these reular expressions work with Tweets in all languages.
8
8
  class Regex
@@ -82,6 +82,14 @@ module Twitter
82
82
  regex_range(0x1e00, 0x1eff)
83
83
  ].join('').freeze
84
84
 
85
+ RTL_CHARACTERS = [
86
+ regex_range(0x0600,0x06FF),
87
+ regex_range(0x0750,0x077F),
88
+ regex_range(0x0590,0x05FF),
89
+ regex_range(0xFE70,0xFEFF)
90
+ ].join('').freeze
91
+
92
+
85
93
  NON_LATIN_HASHTAG_CHARS = [
86
94
  # Cyrillic (Russian, Ukrainian, etc.)
87
95
  regex_range(0x0400, 0x04ff), # Cyrillic
@@ -212,7 +220,7 @@ module Twitter
212
220
 
213
221
  REGEXEN[:valid_port_number] = /[0-9]+/
214
222
 
215
- REGEXEN[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\,\.\$\/%#\[\]\-_~&|#{LATIN_ACCENTS}]/io
223
+ REGEXEN[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\,\.\$\/%#\[\]\-_~&|@#{LATIN_ACCENTS}]/io
216
224
  # Allow URL paths to contain balanced parens
217
225
  # 1. Used in Wikipedia URLs like /Primer_(film)
218
226
  # 2. Used in IIS sessions like /S(dfd346)/
@@ -220,16 +228,15 @@ module Twitter
220
228
  # Valid end-of-path chracters (so /foo. does not gobble the period).
221
229
  # 1. Allow =&# for empty URL parameters and other URL-join artifacts
222
230
  REGEXEN[:valid_url_path_ending_chars] = /[a-z0-9=_#\/\+\-#{LATIN_ACCENTS}]|(?:#{REGEXEN[:valid_url_balanced_parens]})/io
223
- # Allow @ in a url, but only in the middle. Catch things like http://example.com/@user/
224
231
  REGEXEN[:valid_url_path] = /(?:
225
232
  (?:
226
233
  #{REGEXEN[:valid_general_url_path_chars]}*
227
234
  (?:#{REGEXEN[:valid_url_balanced_parens]} #{REGEXEN[:valid_general_url_path_chars]}*)*
228
235
  #{REGEXEN[:valid_url_path_ending_chars]}
229
- )|(?:@#{REGEXEN[:valid_general_url_path_chars]}+\/)
236
+ )|(?:#{REGEXEN[:valid_general_url_path_chars]}+\/)
230
237
  )/iox
231
238
 
232
- REGEXEN[:valid_url_query_chars] = /[a-z0-9!?\*'\(\);:&=\+\$\/%#\[\]\-_\.,~|]/i
239
+ REGEXEN[:valid_url_query_chars] = /[a-z0-9!?\*'\(\);:&=\+\$\/%#\[\]\-_\.,~|@]/i
233
240
  REGEXEN[:valid_url_query_ending_chars] = /[a-z0-9_&=#\/]/i
234
241
  REGEXEN[:valid_url] = %r{
235
242
  ( # $1 total match
@@ -245,7 +252,7 @@ module Twitter
245
252
  }iox;
246
253
 
247
254
  REGEXEN[:cashtag] = /[a-z]{1,6}(?:[._][a-z]{1,2})?/i
248
- REGEXEN[:valid_cashtag] = /(?:^|#{REGEXEN[:spaces]})\$(#{REGEXEN[:cashtag]})(?=$|\s|[#{PUNCTUATION_CHARS}])/i
255
+ REGEXEN[:valid_cashtag] = /(^|#{REGEXEN[:spaces]})(\$)(#{REGEXEN[:cashtag]})(?=$|\s|[#{PUNCTUATION_CHARS}])/i
249
256
 
250
257
  # These URL validation pattern strings are based on the ABNF from RFC 3986
251
258
  REGEXEN[:validate_url_unreserved] = /[a-z0-9\-._~]/i
@@ -342,6 +349,8 @@ module Twitter
342
349
  )?\Z
343
350
  }ix
344
351
 
352
+ REGEXEN[:rtl_chars] = /[#{RTL_CHARACTERS}]/io
353
+
345
354
  REGEXEN.each_pair{|k,v| v.freeze }
346
355
 
347
356
  # Return the regular expression for a given <tt>key</tt>. If the <tt>key</tt>
@@ -3,8 +3,9 @@ module Twitter
3
3
  MAX_LENGTH = 140
4
4
 
5
5
  DEFAULT_TCO_URL_LENGTHS = {
6
- :short_url_length => 20,
7
- :short_url_length_https => 21
6
+ :short_url_length => 22,
7
+ :short_url_length_https => 23,
8
+ :characters_reserved_per_media => 22
8
9
  }.freeze
9
10
 
10
11
  # Returns the length of the string as it would be displayed. This is equivilent to the length of the Unicode NFC
@@ -488,9 +488,8 @@ describe Twitter::Autolink do
488
488
  context "with a username not followed by a /" do
489
489
  def original_text; 'http://example.com/@foobar'; end
490
490
 
491
- it "should link small url and username" do
492
- @autolinked_text.should have_autolinked_url('http://example.com/')
493
- @autolinked_text.should link_to_screen_name('foobar')
491
+ it "should link url" do
492
+ @autolinked_text.should have_autolinked_url('http://example.com/@foobar')
494
493
  end
495
494
  end
496
495
 
@@ -678,6 +677,11 @@ describe Twitter::Autolink do
678
677
  linked.should have_autolinked_url('dummy', '#hashtag')
679
678
  end
680
679
 
680
+ it "should customize href by cashtag_url_block option" do
681
+ linked = @linker.auto_link("$CASH", :cashtag_url_block => lambda{|a| "dummy"})
682
+ linked.should have_autolinked_url('dummy', '$CASH')
683
+ end
684
+
681
685
  it "should customize href by link_url_block option" do
682
686
  linked = @linker.auto_link("http://example.com/", :link_url_block => lambda{|a| "dummy"})
683
687
  linked.should have_autolinked_url('dummy', 'http://example.com/')
@@ -527,9 +527,9 @@ describe Twitter::Rewriter do
527
527
  context "with a username not followed by a /" do
528
528
  def original_text; "http://example.com/@foobar"; end
529
529
 
530
- it "should link small url and username" do
531
- @block_args.should == ["http://example.com/"]
532
- @rewritten_text.should == "[rewritten]@foobar"
530
+ it "should link url" do
531
+ @block_args.should == ["http://example.com/@foobar"]
532
+ @rewritten_text.should == "[rewritten]"
533
533
  end
534
534
  end
535
535
 
@@ -33,7 +33,7 @@ class ConformanceTest < Test::Unit::TestCase
33
33
  def equal_nodes?(expected, actual)
34
34
  return false unless expected.name == actual.name
35
35
  return false unless ordered_attributes(expected) == ordered_attributes(actual)
36
- return false if expected.text? && actual.text? && !(expected.content= actual.content)
36
+ return false if expected.text? && actual.text? && expected.content != actual.content
37
37
 
38
38
  expected.children.each_with_index do |child, index|
39
39
  return false unless equal_nodes?(child, actual.children[index])
data/twitter-text.gemspec CHANGED
@@ -2,11 +2,11 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = "twitter-text"
5
- s.version = "1.5.0"
5
+ s.version = "1.6.0"
6
6
  s.authors = ["Matt Sanford", "Patrick Ewing", "Ben Cherry", "Britt Selvitelle",
7
- "Raffi Krikorian", "J.P. Cummins", "Yoshimasa Niwa", "Keita Fujii"]
7
+ "Raffi Krikorian", "J.P. Cummins", "Yoshimasa Niwa", "Keita Fujii", "James Koval"]
8
8
  s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com", "bcherry@gmail.com", "bs@brittspace.com",
9
- "raffi@twitter.com", "jcummins@twitter.com", "niw@niw.at", "keita@twitter.com"]
9
+ "raffi@twitter.com", "jcummins@twitter.com", "niw@niw.at", "keita@twitter.com", "jkoval@twitter.com"]
10
10
  s.homepage = "http://twitter.com"
11
11
  s.description = s.summary = "A gem that provides text handling for Twitter"
12
12
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twitter-text
3
3
  version: !ruby/object:Gem::Version
4
- hash: 3
4
+ hash: 15
5
5
  prerelease:
6
6
  segments:
7
7
  - 1
8
- - 5
8
+ - 6
9
9
  - 0
10
- version: 1.5.0
10
+ version: 1.6.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Matt Sanford
@@ -18,12 +18,12 @@ authors:
18
18
  - J.P. Cummins
19
19
  - Yoshimasa Niwa
20
20
  - Keita Fujii
21
+ - James Koval
21
22
  autorequire:
22
23
  bindir: bin
23
24
  cert_chain: []
24
25
 
25
- date: 2012-06-18 00:00:00 -07:00
26
- default_executable:
26
+ date: 2013-03-01 00:00:00 Z
27
27
  dependencies:
28
28
  - !ruby/object:Gem::Dependency
29
29
  name: nokogiri
@@ -119,6 +119,7 @@ email:
119
119
  - jcummins@twitter.com
120
120
  - niw@niw.at
121
121
  - keita@twitter.com
122
+ - jkoval@twitter.com
122
123
  executables: []
123
124
 
124
125
  extensions: []
@@ -159,7 +160,6 @@ files:
159
160
  - spec/validation_spec.rb
160
161
  - test/conformance_test.rb
161
162
  - twitter-text.gemspec
162
- has_rdoc: true
163
163
  homepage: http://twitter.com
164
164
  licenses: []
165
165
 
@@ -189,7 +189,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
189
189
  requirements: []
190
190
 
191
191
  rubyforge_project:
192
- rubygems_version: 1.4.1
192
+ rubygems_version: 1.8.24
193
193
  signing_key:
194
194
  specification_version: 3
195
195
  summary: Twitter text handling library
@@ -205,3 +205,4 @@ test_files:
205
205
  - spec/unicode_spec.rb
206
206
  - spec/validation_spec.rb
207
207
  - test/conformance_test.rb
208
+ has_rdoc: true