RubyGems - twitter-text - Versions diffs - 1.4.13 → 1.4.14 - Mend

twitter-text 1.4.13 → 1.4.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

data/lib/extractor.rb CHANGED Viewed

@@ -190,6 +190,11 @@ module Twitter
             last_url[:indices][1] = end_position
           end
         else
+          # In the case of t.co URLs, don't allow additional path characters
+          if url =~ Twitter::Regex[:valid_tco_url]
+            url = $&
+            end_position = start_position + url.char_length
+          end
           urls << {
             :url => url,
             :indices => [start_position, end_position]

data/lib/regex.rb CHANGED Viewed

@@ -106,14 +106,14 @@ module Twitter
     REGEXEN[:auto_link_hashtags] = /#{HASHTAG}/io
     # Used in Extractor and Rewriter for final filtering
-    REGEXEN[:end_hashtag_match] = /^(?:[#＃]|:\/\/)/o
+    REGEXEN[:end_hashtag_match] = /\A(?:[#＃]|:\/\/)/o
     REGEXEN[:at_signs] = /[@＠]/
     REGEXEN[:extract_mentions] = /(^|[^a-zA-Z0-9_])#{REGEXEN[:at_signs]}([a-zA-Z0-9_]{1,20})/o
     REGEXEN[:extract_mentions_or_lists] = /(^|[^a-zA-Z0-9_])#{REGEXEN[:at_signs]}([a-zA-Z0-9_]{1,20})(\/[a-zA-Z][a-zA-Z0-9_\-]{0,24})?/o
     REGEXEN[:extract_reply] = /^(?:#{REGEXEN[:spaces]})*#{REGEXEN[:at_signs]}([a-zA-Z0-9_]{1,20})/o
     # Used in Extractor and Rewriter for final filtering
-    REGEXEN[:end_screen_name_match] = /^(?:#{REGEXEN[:at_signs]}|#{REGEXEN[:latin_accents]}|:\/\/)/o
+    REGEXEN[:end_screen_name_match] = /\A(?:#{REGEXEN[:at_signs]}|#{REGEXEN[:latin_accents]}|:\/\/)/o
     REGEXEN[:auto_link_usernames_or_lists] = /([^a-zA-Z0-9_]|^|RT:?)([@＠]+)([a-zA-Z0-9_]{1,20})(\/[a-zA-Z][a-zA-Z0-9_\-]{0,24})?/o
     REGEXEN[:auto_link_emoticon] = /(8\-\#|8\-E|\+\-\(|\`\@|\`O|\&lt;\|:~\(|\}:o\{|:\-\[|\&gt;o\&lt;|X\-\/|\[:-\]\-I\-|\/\/\/\/Ö\\\\\\\\|\(\|:\|\/\)|∑:\*\)|\( \| \))/
@@ -125,7 +125,7 @@ module Twitter
     REGEXEN[:valid_subdomain] = /(?:(?:#{DOMAIN_VALID_CHARS}(?:[_-]|#{DOMAIN_VALID_CHARS})*)?#{DOMAIN_VALID_CHARS}\.)/io
     REGEXEN[:valid_domain_name] = /(?:(?:#{DOMAIN_VALID_CHARS}(?:[-]|#{DOMAIN_VALID_CHARS})*)?#{DOMAIN_VALID_CHARS}\.)/io
-    REGEXEN[:valid_gTLD] = /(?:(?:aero|asia|biz|cat|com|coop|edu|gov|info|int|jobs|mil|mobi|museum|name|net|org|pro|tel|travel)(?=[^a-z]|$))/i
+    REGEXEN[:valid_gTLD] = /(?:(?:aero|asia|biz|cat|com|coop|edu|gov|info|int|jobs|mil|mobi|museum|name|net|org|pro|tel|travel|xxx)(?=[^a-z]|$))/i
     REGEXEN[:valid_ccTLD] = %r{
       (?:
         (?:ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|
@@ -150,8 +150,11 @@ module Twitter
       (?:#{REGEXEN[:valid_gTLD]}|#{REGEXEN[:valid_ccTLD]}|#{REGEXEN[:valid_punycode]})
     /iox
+    # This is used in Extractor for stricter t.co URL extraction
+    REGEXEN[:valid_tco_url] = /^https?:\/\/t\.co\/[a-z0-9]+/i
     # This is used in Extractor to filter out unwanted URLs.
-    REGEXEN[:invalid_short_domain] = /^#{REGEXEN[:valid_domain_name]}#{REGEXEN[:valid_ccTLD]}$/io
+    REGEXEN[:invalid_short_domain] = /\A#{REGEXEN[:valid_domain_name]}#{REGEXEN[:valid_ccTLD]}\Z/io
     REGEXEN[:valid_port_number] = /[0-9]+/

data/spec/extractor_spec.rb CHANGED Viewed

@@ -169,6 +169,27 @@ describe Twitter::Extractor do
         @extractor.extract_urls("http://tld-too-short.x").should == []
       end
     end
+    describe "t.co URLS" do
+      TestUrls::TCO.each do |url|
+        it "should only extract the t.co URL from the URL #{url}" do
+          extracted_urls = @extractor.extract_urls(url)
+          extracted_urls.size.should == 1
+          extracted_url = extracted_urls.first
+          extracted_url.should_not == url
+          extracted_url.should == url[0...20]
+        end
+        it "should match the t.co URL from the URL #{url} when it's embedded in other text" do
+          text = "Sweet url: #{url} I found. #awesome"
+          extracted_urls = @extractor.extract_urls(text)
+          extracted_urls.size.should == 1
+          extracted_url = extracted_urls.first
+          extracted_url.should_not == url
+          extracted_url.should == url[0...20]
+        end
+      end
+    end
   end
   describe "urls with indices" do
@@ -200,6 +221,31 @@ describe Twitter::Extractor do
         @extractor.extract_urls_with_indices("http://tld-too-short.x").should == []
       end
     end
+    describe "t.co URLS" do
+      TestUrls::TCO.each do |url|
+        it "should only extract the t.co URL from the URL #{url} and adjust indices correctly" do
+          extracted_urls = @extractor.extract_urls_with_indices(url)
+          extracted_urls.size.should == 1
+          extracted_url = extracted_urls.first
+          extracted_url[:url].should_not include(url)
+          extracted_url[:url].should include(url[0...20])
+          extracted_url[:indices].first.should == 0
+          extracted_url[:indices].last.should == 20
+        end
+        it "should match the t.co URL from the URL #{url} when it's embedded in other text" do
+          text = "Sweet url: #{url} I found. #awesome"
+          extracted_urls = @extractor.extract_urls_with_indices(text)
+          extracted_urls.size.should == 1
+          extracted_url = extracted_urls.first
+          extracted_url[:url].should_not include(url)
+          extracted_url[:url].should include(url[0...20])
+          extracted_url[:indices].first.should == 11
+          extracted_url[:indices].last.should == 31
+        end
+      end
+    end
   end
   describe "hashtags" do

data/spec/test_urls.rb CHANGED Viewed

@@ -52,6 +52,29 @@ module TestUrls
     "http://twitt#{[0x202B].pack('U')}er.com",
     "http://twitt#{[0x202C].pack('U')}er.com",
     "http://twitt#{[0x202D].pack('U')}er.com",
-    "http://twitt#{[0x202E].pack('U')}er.com",
+    "http://twitt#{[0x202E].pack('U')}er.com"
   ] unless defined?(TestUrls::INVALID)
+  TCO = [
+    "http://t.co/P53cv5yO!",
+    "http://t.co/fQJmiPGg***",
+    "http://t.co/pbY2NfTZ's",
+    "http://t.co/2vYHpAc5;",
+    "http://t.co/ulYGBYSo:",
+    "http://t.co/GeT4bSiw=win",
+    "http://t.co/8MkmHU0k+fun",
+    "http://t.co/TKLp64dY.yes,",
+    "http://t.co/8vuO27cI$$",
+    "http://t.co/rPYTvdA8/",
+    "http://t.co/WvtMw5ku%",
+    "http://t.co/8t7G3ddS#",
+    "http://t.co/nfHNJDV2/#!",
+    "http://t.co/gK6NOXHs[good]",
+    "http://t.co/dMrT0o1Y]bad",
+    "http://t.co/FNkPfmii-",
+    "http://t.co/sMgS3pjI_oh",
+    "http://t.co/F8Dq3Plb~",
+    "http://t.co/ivvH58vC&help",
+    "http://t.co/iUBL15zD|NZ5KYLQ8"
+  ] unless defined?(TestUrls::TCO)
 end

data/twitter-text.gemspec CHANGED Viewed

@@ -2,7 +2,7 @@
 Gem::Specification.new do |s|
   s.name = "twitter-text"
-  s.version = "1.4.13"
+  s.version = "1.4.14"
   s.authors = ["Matt Sanford", "Patrick Ewing", "Ben Cherry", "Britt Selvitelle",
                "Raffi Krikorian", "J.P. Cummins", "Yoshimasa Niwa", "Keita Fujii"]
   s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com", "bcherry@gmail.com", "bs@brittspace.com",

metadata CHANGED Viewed

@@ -1,13 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: twitter-text
 version: !ruby/object:Gem::Version
-  hash: 29
+  hash: 27
   prerelease:
   segments:
   - 1
   - 4
-  - 13
-  version: 1.4.13
+  - 14
+  version: 1.4.14
 platform: ruby
 authors:
 - Matt Sanford
@@ -22,7 +22,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2011-11-02 00:00:00 -07:00
+date: 2011-12-01 00:00:00 -08:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency