RubyGems - twitter-text - Versions diffs - 1.4.9 → 1.4.10 - Mend

twitter-text 1.4.9 → 1.4.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

data/Gemfile.lock CHANGED Viewed

@@ -1,14 +1,16 @@
 PATH
   remote: .
   specs:
-    twitter-text (1.4.8)
+    twitter-text (1.4.10)
       activesupport
 GEM
   remote: http://rubygems.org/
   specs:
-    activesupport (3.0.3)
+    activesupport (3.1.0)
+      multi_json (~> 1.0)
     diff-lcs (1.1.2)
+    multi_json (1.0.4)
     nokogiri (1.4.4)
     nokogiri (1.4.4-java)
       weakling (>= 0.0.3)

data/lib/autolink.rb CHANGED Viewed

@@ -20,7 +20,7 @@ module Twitter
     OPTIONS_NOT_ATTRIBUTES = [:url_class, :list_class, :username_class, :hashtag_class,
                               :username_url_base, :list_url_base, :hashtag_url_base,
                               :username_url_block, :list_url_block, :hashtag_url_block, :link_url_block,
-                              :suppress_lists, :suppress_no_follow]
+                              :suppress_lists, :suppress_no_follow, :url_entities]
     HTML_ENTITIES = {
       '&' => '&amp;',
@@ -139,6 +139,16 @@ module Twitter
       options = href_options.dup
       options[:rel] = "nofollow" unless options.delete(:suppress_no_follow)
       options[:class] = options.delete(:url_class)
+      url_entities = {}
+      if options[:url_entities]
+        options[:url_entities].each do |entity|
+          entity = entity.with_indifferent_access
+          url_entities[entity[:url]] = entity
+        end
+        options.delete(:url_entities)
+      end
       html_attrs = html_attrs_for_options(options)
       Twitter::Rewriter.rewrite_urls(text) do |url|
@@ -147,7 +157,13 @@ module Twitter
         else
           html_escape(url)
         end
-        %(<a href="#{href}"#{html_attrs}>#{html_escape(url)}</a>)
+        display_url = url
+        if url_entities[url] && url_entities[url][:display_url]
+          display_url = url_entities[url][:display_url]
+        end
+        %(<a href="#{href}"#{html_attrs}>#{html_escape(display_url)}</a>)
       end
     end

data/lib/extractor.rb CHANGED Viewed

@@ -155,9 +155,12 @@ module Twitter
       return [] unless text
       urls = []
       position = 0
-      text.to_s.scan(Twitter::Regex[:valid_url]) do |all, before, url, protocol, domain, path, query|
+      text.to_s.scan(Twitter::Regex[:valid_url]) do |all, before, url, protocol, domain, port, path, query|
         valid_url_match_data = $~
-        if protocol && !protocol.empty?
+        # Regex in Ruby 1.8 doesn't support lookbehind, so we need to manually filter out
+        # the short URLs without protocol and path, i.e., [domain].[ccTLD]
+        unless !protocol && !path && domain =~ Twitter::Regex[:valid_short_domain]
           start_position = valid_url_match_data.char_begin(3)
           end_position = valid_url_match_data.char_end(3)
           urls << {

data/lib/regex.rb CHANGED Viewed

@@ -111,14 +111,35 @@ module Twitter
     REGEXEN[:auto_link_emoticon] = /(8\-\#|8\-E|\+\-\(|\`\@|\`O|\&lt;\|:~\(|\}:o\{|:\-\[|\&gt;o\&lt;|X\-\/|\[:-\]\-I\-|\/\/\/\/Ö\\\\\\\\|\(\|:\|\/\)|∑:\*\)|\( \| \))/
     # URL related hash regex collection
-    REGEXEN[:valid_preceding_chars] = /(?:[^-\/"':!=A-Z0-9_@＠]|^|\:)/i
+    REGEXEN[:valid_preceding_chars] = /(?:[^-\/"'!=A-Z0-9_@＠\.]|^)/i
-    DOMAIN_EXCLUDE_PART = "[:punct:][:space:][:blank:]#{[0x00A0].pack('U')}"
-    REGEXEN[:valid_subdomain] = /(?:[^#{DOMAIN_EXCLUDE_PART}](?:[_-]|[^#{DOMAIN_EXCLUDE_PART}])*)?[^#{DOMAIN_EXCLUDE_PART}]\./
-    REGEXEN[:valid_domain_name] = /(?:[^#{DOMAIN_EXCLUDE_PART}](?:[-]|[^#{DOMAIN_EXCLUDE_PART}])*)?[^#{DOMAIN_EXCLUDE_PART}]/
-    REGEXEN[:valid_domain] = /#{REGEXEN[:valid_subdomain]}*#{REGEXEN[:valid_domain_name]}\.(?:xn--[a-z0-9]{2,}|[a-z]{2,})(?::[0-9]+)?/i
+    DOMAIN_VALID_CHARS = "[^[:punct:][:space:][:blank:]#{[0x00A0].pack('U')}]"
+    REGEXEN[:valid_subdomain] = /(?:(?:#{DOMAIN_VALID_CHARS}(?:[_-]|#{DOMAIN_VALID_CHARS})*)?#{DOMAIN_VALID_CHARS}\.)/i
+    REGEXEN[:valid_domain_name] = /(?:(?:#{DOMAIN_VALID_CHARS}(?:[-]|#{DOMAIN_VALID_CHARS})*)?#{DOMAIN_VALID_CHARS}\.)/i
-    REGEXEN[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\,\$\/%#\[\]\-_~|#{LATIN_ACCENTS}]/i
+    REGEXEN[:valid_gTLD] = /(?:(?:aero|asia|biz|cat|com|coop|edu|gov|info|int|jobs|mil|mobi|museum|name|net|org|pro|tel|travel)(?=[^[:alpha:]]|$))/i
+    REGEXEN[:valid_ccTLD] = %r{
+      (?:
+        (?:ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|
+        ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|
+        gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|
+        lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|
+        pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|ss|st|su|sv|sy|sz|tc|td|tf|tg|th|
+        tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|za|zm|zw)
+        (?=[^[:alpha:]]|$)
+      )
+    }ix
+    REGEXEN[:valid_punycode] = /(?:xn--[0-9a-z]+)/
+    REGEXEN[:valid_domain] = /(?:
+      #{REGEXEN[:valid_subdomain]}*#{REGEXEN[:valid_domain_name]}
+      (?:#{REGEXEN[:valid_gTLD]}|#{REGEXEN[:valid_ccTLD]}|#{REGEXEN[:valid_punycode]})
+    )/ix
+    REGEXEN[:valid_short_domain] = /^#{REGEXEN[:valid_domain_name]}#{REGEXEN[:valid_ccTLD]}$/
+    REGEXEN[:valid_port_number] = /[0-9]+/
+    REGEXEN[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\,\$\/%#\[\]\-_~&|#{LATIN_ACCENTS}]/i
     # Allow URL paths to contain balanced parens
     #  1. Used in Wikipedia URLs like /Primer_(film)
     #  2. Used in IIS sessions like /S(dfd346)/
@@ -139,16 +160,17 @@ module Twitter
       (                                                                                     #   $1 total match
         (#{REGEXEN[:valid_preceding_chars]})                                                #   $2 Preceeding chracter
         (                                                                                   #   $3 URL
-          (https?:\/\/)                                                                     #   $4 Protocol
-          (#{REGEXEN[:valid_domain]})                                                       #   $5 Domain(s) and optional post number
+          (https?:\/\/)?                                                                    #   $4 Protocol (optional)
+          (#{REGEXEN[:valid_domain]})                                                       #   $5 Domain(s)
+          (?::(#{REGEXEN[:valid_port_number]}))?                                            #   $6 Port number (optional)
           (/
             (?:
               #{REGEXEN[:valid_url_path_chars]}+#{REGEXEN[:valid_url_path_ending_chars]}|   # 1+ path chars and a valid last char
               #{REGEXEN[:valid_url_path_chars]}+#{REGEXEN[:valid_url_path_ending_chars]}?|  # Optional last char to handle /@foo/ case
               #{REGEXEN[:valid_url_path_ending_chars]}                                      # Just a # case
             )?
-          )?                                                                                #   $6 URL Path and anchor
-          (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? #   $7 Query String
+          )?                                                                                #   $7 URL Path and anchor
+          (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? #   $8 Query String
         )
       )
     }iox;
@@ -161,7 +183,7 @@ module Twitter
       #{REGEXEN[:validate_url_unreserved]}|
       #{REGEXEN[:validate_url_pct_encoded]}|
       #{REGEXEN[:validate_url_sub_delims]}|
-      :|@
+      [:\|@]
     )/iox
     REGEXEN[:validate_url_scheme] = /(?:[a-z][a-z0-9+\-.]*)/i
@@ -236,11 +258,9 @@ module Twitter
     REGEXEN[:validate_url_unencoded] = %r{
       \A                                #  Full URL
       (?:
-        ([^:/?#]+):                    #  $1 Scheme
-      )
-      (?://
-        ([^/?#]*)                      #  $2 Authority
-      )
+        ([^:/?#]+)://                  #  $1 Scheme
+      )?
+      ([^/?#]*)                        #  $2 Authority
       ([^?#]*)                         #  $3 Path
       (?:
         \?([^#]*)                      #  $4 Query

data/lib/validation.rb CHANGED Viewed

@@ -74,7 +74,7 @@ module Twitter
       extracted.size == 1 && extracted.first == hashtag[1..-1]
     end
-    def valid_url?(url, unicode_domains=true)
+    def valid_url?(url, unicode_domains=true, require_protocol=true)
       return false if !url || url.empty?
       url_parts = url.match(Twitter::Regex[:validate_url_unencoded])
@@ -82,7 +82,8 @@ module Twitter
       scheme, authority, path, query, fragment = url_parts.captures
-      return false unless (valid_match?(scheme, Twitter::Regex[:validate_url_scheme]) && scheme.match(/\Ahttps?\Z/i) &&
+      return false unless ((!require_protocol ||
+                           (valid_match?(scheme, Twitter::Regex[:validate_url_scheme]) && scheme.match(/\Ahttps?\Z/i))) &&
                            valid_match?(path, Twitter::Regex[:validate_url_path]) &&
                            valid_match?(query, Twitter::Regex[:validate_url_query], true) &&
                            valid_match?(fragment, Twitter::Regex[:validate_url_fragment], true))

data/spec/autolinking_spec.rb CHANGED Viewed

@@ -391,18 +391,18 @@ describe Twitter::Autolink do
         end
         context "balanced parens with a double quote inside" do
-          def url; "http://foo.bar/foo_(\")_bar" end
+          def url; "http://foo.com/foo_(\")_bar" end
           it "should be linked" do
-            @autolinked_text.should have_autolinked_url("http://foo.bar/foo_")
+            @autolinked_text.should have_autolinked_url("http://foo.com/foo_")
           end
         end
         context "balanced parens hiding XSS" do
-          def url; 'http://x.xx/("style="color:red"onmouseover="alert(1)' end
+          def url; 'http://x.xx.com/("style="color:red"onmouseover="alert(1)' end
           it "should be linked" do
-            @autolinked_text.should have_autolinked_url("http://x.xx/")
+            @autolinked_text.should have_autolinked_url("http://x.xx.com/")
           end
         end
       end
@@ -479,10 +479,10 @@ describe Twitter::Autolink do
       context "with a @ in a URL" do
         context "with XSS attack" do
-          def original_text; 'http://x.xx/@"style="color:pink"onmouseover=alert(1)//'; end
+          def original_text; 'http://x.xx.com/@"style="color:pink"onmouseover=alert(1)//'; end
           it "should not allow XSS follwing @" do
-            @autolinked_text.should have_autolinked_url('http://x.xx/')
+            @autolinked_text.should have_autolinked_url('http://x.xx.com/')
           end
         end

data/spec/rewriter_spec.rb CHANGED Viewed

@@ -432,19 +432,19 @@ describe Twitter::Rewriter do
       end
       context "balanced parens with a double quote inside" do
-        def url; "http://foo.bar/foo_(\")_bar" end
+        def url; "http://foo.bar.com/foo_(\")_bar" end
         it "should be rewritten" do
-          @block_args.should == ["http://foo.bar/foo_"];
+          @block_args.should == ["http://foo.bar.com/foo_"];
           @rewritten_text.should == "I found a neatness ([rewritten](\")_bar)"
         end
       end
       context "balanced parens hiding XSS" do
-        def url; 'http://x.xx/("style="color:red"onmouseover="alert(1)' end
+        def url; 'http://x.xx.com/("style="color:red"onmouseover="alert(1)' end
         it "should be rewritten" do
-          @block_args.should == ["http://x.xx/"];
+          @block_args.should == ["http://x.xx.com/"];
           @rewritten_text.should == 'I found a neatness ([rewritten]("style="color:red"onmouseover="alert(1))'
         end
       end
@@ -526,10 +526,10 @@ describe Twitter::Rewriter do
     context "with a @ in a URL" do
       context "with XSS attack" do
-        def original_text; 'http://x.xx/@"style="color:pink"onmouseover=alert(1)//'; end
+        def original_text; 'http://x.xx.com/@"style="color:pink"onmouseover=alert(1)//'; end
         it "should not allow XSS follwing @" do
-          @block_args.should == ["http://x.xx/"]
+          @block_args.should == ["http://x.xx.com/"]
           @rewritten_text.should == '[rewritten]@"style="color:pink"onmouseover=alert(1)//'
         end
       end

data/spec/test_urls.rb CHANGED Viewed

@@ -26,14 +26,16 @@ module TestUrls
     "http://a_b.c-d.com",
     "http://a-b.b.com",
     "http://twitter-dash.com",
-    # "t.co/nwcLTFF"
+    "www.foobar.com",
+    "WWW.FOOBAR.COM",
+    "www.foobar.co.jp",
+    "http://t.co",
+    "t.co/nwcLTFF"
   ] unless defined?(TestUrls::VALID)
   INVALID = [
     "http://no-tld",
     "http://tld-too-short.x",
-    "www.foobar.com",
-    "WWW.FOOBAR.COM",
     "http://-doman_dash.com",
     "http://_leadingunderscore.twitter.com",
     "http://trailingunderscore_.twitter.com",

data/test/conformance_test.rb CHANGED Viewed

@@ -50,7 +50,7 @@ class ConformanceTest < Test::Unit::TestCase
       run_conformance_test(File.join(@conformance_dir, 'extract.yml'), :urls) do |description, expected, input|
         assert_equal expected, extract_urls(input), description
         expected.each do |expected_url|
-          assert_equal true, valid_url?(expected_url), "expected url [#{expected_url}] not valid"
+          assert_equal true, valid_url?(expected_url, true, false), "expected url [#{expected_url}] not valid"
         end
       end
     end
@@ -151,6 +151,12 @@ class ConformanceTest < Test::Unit::TestCase
       end
     end
+    def test_urls_without_protocol_validation_conformance
+      run_conformance_test(File.join(@conformance_dir, 'validate.yml'), :urls_without_protocol) do |description, expected, input|
+        assert_equal expected, valid_url?(input, true, false), description
+      end
+    end
     def test_hashtags_validation_conformance
       run_conformance_test(File.join(@conformance_dir, 'validate.yml'), :hashtags) do |description, expected, input|
         assert_equal expected, valid_hashtag?(input), description

data/twitter-text.gemspec CHANGED Viewed

@@ -1,10 +1,10 @@
 spec = Gem::Specification.new do |s|
   s.name = "twitter-text"
-  s.version = "1.4.9"
+  s.version = "1.4.10"
   s.authors = ["Matt Sanford", "Patrick Ewing", "Ben Cherry", "Britt Selvitelle",
-               "Raffi Krikorian", "J.P. Cummins", "Yoshimasa Niwa"]
+               "Raffi Krikorian", "J.P. Cummins", "Yoshimasa Niwa", "Keita Fujii"]
   s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com", "bcherry@gmail.com", "bs@brittspace.com",
-             "raffi@twitter.com", "jcummins@twitter.com", "niw@niw.at"]
+             "raffi@twitter.com", "jcummins@twitter.com", "niw@niw.at", "keita@twitter.com"]
   s.homepage = "http://twitter.com"
   s.description = s.summary = "A gem that provides text handling for Twitter"

metadata CHANGED Viewed

@@ -1,13 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: twitter-text
 version: !ruby/object:Gem::Version
-  hash: 21
-  prerelease: false
+  hash: 19
+  prerelease:
   segments:
   - 1
   - 4
-  - 9
-  version: 1.4.9
+  - 10
+  version: 1.4.10
 platform: ruby
 authors:
 - Matt Sanford
@@ -17,11 +17,12 @@ authors:
 - Raffi Krikorian
 - J.P. Cummins
 - Yoshimasa Niwa
+- Keita Fujii
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2011-08-05 00:00:00 -07:00
+date: 2011-09-20 00:00:00 -07:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency
@@ -103,6 +104,7 @@ email:
 - raffi@twitter.com
 - jcummins@twitter.com
 - niw@niw.at
+- keita@twitter.com
 executables: []
 extensions: []
@@ -171,7 +173,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
 requirements: []
 rubyforge_project:
-rubygems_version: 1.3.7
+rubygems_version: 1.4.1
 signing_key:
 specification_version: 3
 summary: Twitter text handling library