twitter-text 1.5.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -1,4 +1,4 @@
1
- == twitter-text {<img src="https://secure.travis-ci.org/twitter/twitter-text-rb.png" />}[http://travis-ci.org/twitter/twitter-text-rb]
1
+ == twitter-text {<img src="https://secure.travis-ci.org/twitter/twitter-text-rb.png" />}[http://travis-ci.org/twitter/twitter-text-rb] {<img src="https://codeclimate.com/badge.png" />}[https://codeclimate.com/github/twitter/twitter-text-rb]
2
2
 
3
3
  A gem that provides text processing routines for Twitter Tweets. The major
4
4
  reason for this is to unify the various auto-linking and extraction of
@@ -207,7 +207,7 @@ module Twitter
207
207
  OPTIONS_NOT_ATTRIBUTES = Set.new([
208
208
  :url_class, :list_class, :username_class, :hashtag_class, :cashtag_class,
209
209
  :username_url_base, :list_url_base, :hashtag_url_base, :cashtag_url_base,
210
- :username_url_block, :list_url_block, :hashtag_url_block, :link_url_block,
210
+ :username_url_block, :list_url_block, :hashtag_url_block, :cashtag_url_block, :link_url_block,
211
211
  :username_include_symbol, :suppress_lists, :suppress_no_follow, :url_entities,
212
212
  :invisible_tag_attrs, :symbol_tag, :text_with_symbol_tag, :url_target,
213
213
  :link_attribute_block, :link_text_block
@@ -326,6 +326,11 @@ module Twitter
326
326
  hash = chars[entity[:indices].first]
327
327
  hashtag = entity[:hashtag]
328
328
  hashtag = yield(hashtag) if block_given?
329
+ hashtag_class = options[:hashtag_class]
330
+
331
+ if hashtag.match Twitter::Regex::REGEXEN[:rtl_chars]
332
+ hashtag_class += ' rtl'
333
+ end
329
334
 
330
335
  href = if options[:hashtag_url_block]
331
336
  options[:hashtag_url_block].call(hashtag)
@@ -334,7 +339,7 @@ module Twitter
334
339
  end
335
340
 
336
341
  html_attrs = {
337
- :class => "#{options[:hashtag_class]}",
342
+ :class => hashtag_class,
338
343
  # FIXME As our conformance test, hash in title should be half-width,
339
344
  # this should be bug of conformance data.
340
345
  :title => "##{hashtag}"
@@ -311,13 +311,12 @@ module Twitter
311
311
  return [] unless text =~ /\$/
312
312
 
313
313
  tags = []
314
- text.scan(Twitter::Regex[:valid_cashtag]) do |cash_text|
314
+ text.scan(Twitter::Regex[:valid_cashtag]) do |before, dollar, cash_text|
315
315
  match_data = $~
316
- # cash_text doesn't contain $ symbol, so need to decrement start_position by one
317
- start_position = match_data.char_begin(1) - 1
318
- end_position = match_data.char_end(1)
316
+ start_position = match_data.char_begin(2)
317
+ end_position = match_data.char_end(3)
319
318
  tags << {
320
- :cashtag => cash_text[0],
319
+ :cashtag => cash_text,
321
320
  :indices => [start_position, end_position]
322
321
  }
323
322
  end
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Twitter
4
4
  # A collection of regular expressions for parsing Tweet text. The regular expression
5
- # list is frozen at load time to ensure immutability. These reular expressions are
5
+ # list is frozen at load time to ensure immutability. These regular expressions are
6
6
  # used throughout the <tt>Twitter</tt> classes. Special care has been taken to make
7
7
  # sure these reular expressions work with Tweets in all languages.
8
8
  class Regex
@@ -82,6 +82,14 @@ module Twitter
82
82
  regex_range(0x1e00, 0x1eff)
83
83
  ].join('').freeze
84
84
 
85
+ RTL_CHARACTERS = [
86
+ regex_range(0x0600,0x06FF),
87
+ regex_range(0x0750,0x077F),
88
+ regex_range(0x0590,0x05FF),
89
+ regex_range(0xFE70,0xFEFF)
90
+ ].join('').freeze
91
+
92
+
85
93
  NON_LATIN_HASHTAG_CHARS = [
86
94
  # Cyrillic (Russian, Ukrainian, etc.)
87
95
  regex_range(0x0400, 0x04ff), # Cyrillic
@@ -212,7 +220,7 @@ module Twitter
212
220
 
213
221
  REGEXEN[:valid_port_number] = /[0-9]+/
214
222
 
215
- REGEXEN[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\,\.\$\/%#\[\]\-_~&|#{LATIN_ACCENTS}]/io
223
+ REGEXEN[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\,\.\$\/%#\[\]\-_~&|@#{LATIN_ACCENTS}]/io
216
224
  # Allow URL paths to contain balanced parens
217
225
  # 1. Used in Wikipedia URLs like /Primer_(film)
218
226
  # 2. Used in IIS sessions like /S(dfd346)/
@@ -220,16 +228,15 @@ module Twitter
220
228
  # Valid end-of-path chracters (so /foo. does not gobble the period).
221
229
  # 1. Allow =&# for empty URL parameters and other URL-join artifacts
222
230
  REGEXEN[:valid_url_path_ending_chars] = /[a-z0-9=_#\/\+\-#{LATIN_ACCENTS}]|(?:#{REGEXEN[:valid_url_balanced_parens]})/io
223
- # Allow @ in a url, but only in the middle. Catch things like http://example.com/@user/
224
231
  REGEXEN[:valid_url_path] = /(?:
225
232
  (?:
226
233
  #{REGEXEN[:valid_general_url_path_chars]}*
227
234
  (?:#{REGEXEN[:valid_url_balanced_parens]} #{REGEXEN[:valid_general_url_path_chars]}*)*
228
235
  #{REGEXEN[:valid_url_path_ending_chars]}
229
- )|(?:@#{REGEXEN[:valid_general_url_path_chars]}+\/)
236
+ )|(?:#{REGEXEN[:valid_general_url_path_chars]}+\/)
230
237
  )/iox
231
238
 
232
- REGEXEN[:valid_url_query_chars] = /[a-z0-9!?\*'\(\);:&=\+\$\/%#\[\]\-_\.,~|]/i
239
+ REGEXEN[:valid_url_query_chars] = /[a-z0-9!?\*'\(\);:&=\+\$\/%#\[\]\-_\.,~|@]/i
233
240
  REGEXEN[:valid_url_query_ending_chars] = /[a-z0-9_&=#\/]/i
234
241
  REGEXEN[:valid_url] = %r{
235
242
  ( # $1 total match
@@ -245,7 +252,7 @@ module Twitter
245
252
  }iox;
246
253
 
247
254
  REGEXEN[:cashtag] = /[a-z]{1,6}(?:[._][a-z]{1,2})?/i
248
- REGEXEN[:valid_cashtag] = /(?:^|#{REGEXEN[:spaces]})\$(#{REGEXEN[:cashtag]})(?=$|\s|[#{PUNCTUATION_CHARS}])/i
255
+ REGEXEN[:valid_cashtag] = /(^|#{REGEXEN[:spaces]})(\$)(#{REGEXEN[:cashtag]})(?=$|\s|[#{PUNCTUATION_CHARS}])/i
249
256
 
250
257
  # These URL validation pattern strings are based on the ABNF from RFC 3986
251
258
  REGEXEN[:validate_url_unreserved] = /[a-z0-9\-._~]/i
@@ -342,6 +349,8 @@ module Twitter
342
349
  )?\Z
343
350
  }ix
344
351
 
352
+ REGEXEN[:rtl_chars] = /[#{RTL_CHARACTERS}]/io
353
+
345
354
  REGEXEN.each_pair{|k,v| v.freeze }
346
355
 
347
356
  # Return the regular expression for a given <tt>key</tt>. If the <tt>key</tt>
@@ -3,8 +3,9 @@ module Twitter
3
3
  MAX_LENGTH = 140
4
4
 
5
5
  DEFAULT_TCO_URL_LENGTHS = {
6
- :short_url_length => 20,
7
- :short_url_length_https => 21
6
+ :short_url_length => 22,
7
+ :short_url_length_https => 23,
8
+ :characters_reserved_per_media => 22
8
9
  }.freeze
9
10
 
10
11
  # Returns the length of the string as it would be displayed. This is equivilent to the length of the Unicode NFC
@@ -488,9 +488,8 @@ describe Twitter::Autolink do
488
488
  context "with a username not followed by a /" do
489
489
  def original_text; 'http://example.com/@foobar'; end
490
490
 
491
- it "should link small url and username" do
492
- @autolinked_text.should have_autolinked_url('http://example.com/')
493
- @autolinked_text.should link_to_screen_name('foobar')
491
+ it "should link url" do
492
+ @autolinked_text.should have_autolinked_url('http://example.com/@foobar')
494
493
  end
495
494
  end
496
495
 
@@ -678,6 +677,11 @@ describe Twitter::Autolink do
678
677
  linked.should have_autolinked_url('dummy', '#hashtag')
679
678
  end
680
679
 
680
+ it "should customize href by cashtag_url_block option" do
681
+ linked = @linker.auto_link("$CASH", :cashtag_url_block => lambda{|a| "dummy"})
682
+ linked.should have_autolinked_url('dummy', '$CASH')
683
+ end
684
+
681
685
  it "should customize href by link_url_block option" do
682
686
  linked = @linker.auto_link("http://example.com/", :link_url_block => lambda{|a| "dummy"})
683
687
  linked.should have_autolinked_url('dummy', 'http://example.com/')
@@ -527,9 +527,9 @@ describe Twitter::Rewriter do
527
527
  context "with a username not followed by a /" do
528
528
  def original_text; "http://example.com/@foobar"; end
529
529
 
530
- it "should link small url and username" do
531
- @block_args.should == ["http://example.com/"]
532
- @rewritten_text.should == "[rewritten]@foobar"
530
+ it "should link url" do
531
+ @block_args.should == ["http://example.com/@foobar"]
532
+ @rewritten_text.should == "[rewritten]"
533
533
  end
534
534
  end
535
535
 
@@ -33,7 +33,7 @@ class ConformanceTest < Test::Unit::TestCase
33
33
  def equal_nodes?(expected, actual)
34
34
  return false unless expected.name == actual.name
35
35
  return false unless ordered_attributes(expected) == ordered_attributes(actual)
36
- return false if expected.text? && actual.text? && !(expected.content= actual.content)
36
+ return false if expected.text? && actual.text? && expected.content != actual.content
37
37
 
38
38
  expected.children.each_with_index do |child, index|
39
39
  return false unless equal_nodes?(child, actual.children[index])
data/twitter-text.gemspec CHANGED
@@ -2,11 +2,11 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = "twitter-text"
5
- s.version = "1.5.0"
5
+ s.version = "1.6.0"
6
6
  s.authors = ["Matt Sanford", "Patrick Ewing", "Ben Cherry", "Britt Selvitelle",
7
- "Raffi Krikorian", "J.P. Cummins", "Yoshimasa Niwa", "Keita Fujii"]
7
+ "Raffi Krikorian", "J.P. Cummins", "Yoshimasa Niwa", "Keita Fujii", "James Koval"]
8
8
  s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com", "bcherry@gmail.com", "bs@brittspace.com",
9
- "raffi@twitter.com", "jcummins@twitter.com", "niw@niw.at", "keita@twitter.com"]
9
+ "raffi@twitter.com", "jcummins@twitter.com", "niw@niw.at", "keita@twitter.com", "jkoval@twitter.com"]
10
10
  s.homepage = "http://twitter.com"
11
11
  s.description = s.summary = "A gem that provides text handling for Twitter"
12
12
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twitter-text
3
3
  version: !ruby/object:Gem::Version
4
- hash: 3
4
+ hash: 15
5
5
  prerelease:
6
6
  segments:
7
7
  - 1
8
- - 5
8
+ - 6
9
9
  - 0
10
- version: 1.5.0
10
+ version: 1.6.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Matt Sanford
@@ -18,12 +18,12 @@ authors:
18
18
  - J.P. Cummins
19
19
  - Yoshimasa Niwa
20
20
  - Keita Fujii
21
+ - James Koval
21
22
  autorequire:
22
23
  bindir: bin
23
24
  cert_chain: []
24
25
 
25
- date: 2012-06-18 00:00:00 -07:00
26
- default_executable:
26
+ date: 2013-03-01 00:00:00 Z
27
27
  dependencies:
28
28
  - !ruby/object:Gem::Dependency
29
29
  name: nokogiri
@@ -119,6 +119,7 @@ email:
119
119
  - jcummins@twitter.com
120
120
  - niw@niw.at
121
121
  - keita@twitter.com
122
+ - jkoval@twitter.com
122
123
  executables: []
123
124
 
124
125
  extensions: []
@@ -159,7 +160,6 @@ files:
159
160
  - spec/validation_spec.rb
160
161
  - test/conformance_test.rb
161
162
  - twitter-text.gemspec
162
- has_rdoc: true
163
163
  homepage: http://twitter.com
164
164
  licenses: []
165
165
 
@@ -189,7 +189,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
189
189
  requirements: []
190
190
 
191
191
  rubyforge_project:
192
- rubygems_version: 1.4.1
192
+ rubygems_version: 1.8.24
193
193
  signing_key:
194
194
  specification_version: 3
195
195
  summary: Twitter text handling library
@@ -205,3 +205,4 @@ test_files:
205
205
  - spec/unicode_spec.rb
206
206
  - spec/validation_spec.rb
207
207
  - test/conformance_test.rb
208
+ has_rdoc: true