RubyGems - twitter-text - Versions diffs - 1.1.8 → 1.2.0 - Mend

twitter-text 1.1.8 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

data/Rakefile CHANGED

@@ -11,7 +11,7 @@ require 'digest'
 spec = Gem::Specification.new do |s|
   s.name = "twitter-text"
-  s.version = "1.1.8"
+  s.version = "1.2.0"
   s.authors = ["Matt Sanford", "Patrick Ewing", "Ben Cherry", "Britt Selvitelle", "Raffi Krikorian"]
   s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com", "bcherry@gmail.com", "bs@brittspace.com", "raffi@twitter.com"]
   s.homepage = "http://twitter.com"

data/lib/autolink.rb CHANGED

@@ -2,11 +2,9 @@
 module Twitter
   # A module for including Tweet auto-linking in a class. The primary use of this is for helpers/views so they can auto-link
   # usernames, lists, hashtags and URLs.
-  module Autolink
+  module Autolink extend self
     include ActionView::Helpers::TagHelper #tag_options needed by auto_link
-    WWW_REGEX = /www\./i #:nodoc:
     # Default CSS class for auto-linked URLs
     DEFAULT_URL_CLASS = "tweet-url"
     # Default CSS class for auto-linked lists (along with the url class)
@@ -18,6 +16,20 @@ module Twitter
     # HTML attribute for robot nofollow behavior (default)
     HTML_ATTR_NO_FOLLOW = " rel=\"nofollow\""
+    HTML_ENTITIES = {
+      '&' => '&amp;',
+      '>' => '&gt;',
+      '<' => '&lt;',
+      '"' => '&quot;',
+      "'" => '&#39;'
+    }
+    def encode(text)
+      text && text.gsub(/[&"'><]/) do |character|
+        HTML_ENTITIES[character]
+      end
+    end
     # Add <tt><a></a></tt> tags around the usernames, lists, hashtags and URLs in the provided <tt>text</tt>. The
     # <tt><a></tt> tags can be controlled with the following entries in the <tt>options</tt>
     # hash:
@@ -59,19 +71,39 @@ module Twitter
       options[:list_url_base] ||= "http://twitter.com/"
       extra_html = HTML_ATTR_NO_FOLLOW unless options[:suppress_no_follow]
-      text.gsub(Twitter::Regex[:auto_link_usernames_or_lists]) do
-        if $4 && !options[:suppress_lists]
-          # the link is a list
-          text = list = "#{$3}#{$4}"
-          text = yield(list) if block_given?
-          "#{$1}#{$2}<a class=\"#{options[:url_class]} #{options[:list_class]}\" href=\"#{options[:list_url_base]}#{list.downcase}\"#{extra_html}>#{text}</a>"
+      new_text = ""
+      # this -1 flag allows strings ending in ">" to work
+      text.split(/[<>]/, -1).each_with_index do |chunk, index|
+        if index != 0
+          new_text << ((index % 2 == 0) ? ">" : "<")
+        end
+        if index % 4 != 0
+          new_text << chunk
         else
-          # this is a screen name
-          text = $3
-          text = yield(text) if block_given?
-          "#{$1}#{$2}<a class=\"#{options[:url_class]} #{options[:username_class]}\" href=\"#{options[:username_url_base]}#{text}\"#{extra_html}>#{text}</a>"
+          new_text << chunk.gsub(Twitter::Regex[:auto_link_usernames_or_lists]) do
+            before, at, user, slash_listname, after = $1, $2, $3, $4, $5
+            if slash_listname && !options[:suppress_lists]
+              # the link is a list
+              chunk = list = "#{user}#{slash_listname}"
+              chunk = yield(list) if block_given?
+              "#{before}#{at}<a class=\"#{options[:url_class]} #{options[:list_class]}\" href=\"#{encode(options[:list_url_base])}#{encode(list.downcase)}\"#{extra_html}>#{encode(chunk)}</a>#{after}"
+            else
+              if after =~ Twitter::Regex[:end_screen_name_match]
+                # Followed by something that means we don't autolink
+                "#{before}#{at}#{user}#{slash_listname}#{after}"
+              else
+                # this is a screen name
+                chunk = user
+                chunk = yield(chunk) if block_given?
+                "#{before}#{at}<a class=\"#{options[:url_class]} #{options[:username_class]}\" href=\"#{encode(options[:username_url_base])}#{encode(chunk)}\"#{extra_html}>#{encode(chunk)}</a>#{after}"
+              end
+            end
+          end
         end
       end
+      new_text
     end
     # Add <tt><a></a></tt> tags around the hashtags in the provided <tt>text</tt>. The
@@ -94,7 +126,7 @@ module Twitter
         hash = $2
         text = $3
         text = yield(text) if block_given?
-        "#{before}<a href=\"#{options[:hashtag_url_base]}#{text}\" title=\"##{text}\" class=\"#{options[:url_class]} #{options[:hashtag_class]}\"#{extra_html}>#{hash}#{text}</a>"
+        "#{before}<a href=\"#{options[:hashtag_url_base]}#{encode(text)}\" title=\"##{encode(text)}\" class=\"#{options[:url_class]} #{options[:hashtag_class]}\"#{extra_html}>#{encode(hash)}#{encode(text)}</a>"
       end
     end
@@ -107,10 +139,14 @@ module Twitter
       options[:rel] = "nofollow" unless options.delete(:suppress_no_follow)
       text.gsub(Twitter::Regex[:valid_url]) do
-        all, before, url, protocol = $1, $2, $3, $4
-        html_attrs = tag_options(options.stringify_keys) || ""
-        full_url = (protocol =~ WWW_REGEX ? "http://#{url}" : url)
-        "#{before}<a href=\"#{full_url}\"#{html_attrs}>#{url}</a>"
+        all, before, url, protocol, domain, path, query_string = $1, $2, $3, $4, $5, $6, $7
+        if !protocol.blank? || domain =~ Twitter::Regex[:probable_tld]
+          html_attrs = tag_options(options.stringify_keys) || ""
+          full_url = ((protocol =~ Twitter::Regex[:www] || protocol.blank?) ? "http://#{url}" : url)
+          "#{before}<a href=\"#{encode(full_url)}\"#{html_attrs}>#{encode(url)}</a>"
+        else
+          all
+        end
       end
     end

data/lib/extractor.rb CHANGED

@@ -39,7 +39,7 @@ end
 module Twitter
   # A module for including Tweet parsing in a class. This module provides function for the extraction and processing
   # of usernames, lists, URLs and hashtags.
-  module Extractor
+  module Extractor extend self
     # Extracts a list of all usernames mentioned in the Tweet <tt>text</tt>. If the
     # <tt>text</tt> is <tt>nil</tt> or contains no username mentions an empty array
@@ -65,7 +65,7 @@ module Twitter
       possible_screen_names = []
       position = 0
       text.to_s.scan(Twitter::Regex[:extract_mentions]) do |before, sn, after|
-        unless after =~ Twitter::Regex[:at_signs]
+        unless after =~ Twitter::Regex[:end_screen_name_match]
           start_position = text.to_s.sub_string_search(sn, position) - 1
           position = start_position + sn.char_length + 1
           possible_screen_names << {
@@ -117,13 +117,15 @@ module Twitter
       urls = []
       position = 0
       text.to_s.scan(Twitter::Regex[:valid_url]) do |all, before, url, protocol, domain, path, query|
-        start_position = text.to_s.sub_string_search(url, position)
-        end_position = start_position + url.char_length
-        position = end_position
-        urls << {
-          :url => (protocol == "www." ? "http://#{url}" : url),
-          :indices => [start_position, end_position]
-        }
+        if !protocol.blank? || domain =~ Twitter::Regex[:probable_tld]
+          start_position = text.to_s.sub_string_search(url, position)
+          end_position = start_position + url.char_length
+          position = end_position
+          urls << {
+            :url => ((protocol =~ Twitter::Regex[:www] || protocol.blank?) ? "http://#{url}" : url),
+            :indices => [start_position, end_position]
+          }
+        end
       end
       urls.each{|url| yield url[:url], url[:indices].first, url[:indices].last } if block_given?
       urls
@@ -153,7 +155,7 @@ module Twitter
       tags = []
       position = 0
       text.scan(Twitter::Regex[:auto_link_hashtags]) do |before, hash, hash_text|
-        start_position = text.to_s.sub_string_search(hash, position)
+        start_position = text.to_s.sub_string_search(hash + hash_text, position)
         position = start_position + hash_text.char_length + 1
         tags << {
           :hashtag => hash_text,

data/lib/hithighlighter.rb CHANGED

@@ -2,7 +2,7 @@
 module Twitter
   # Module for doing "hit highlighting" on tweets that have been auto-linked already.
   # Useful with the results returned from the Search API.
-  module HitHighlighter
+  module HitHighlighter extend self
     # Default Tag used for hit highlighting
     DEFAULT_HIGHLIGHT_TAG = "em"
@@ -22,9 +22,7 @@ module Twitter
       tag_name = options[:tag] || DEFAULT_HIGHLIGHT_TAG
       tags = ["<" + tag_name + ">", "</" + tag_name + ">"]
-      chunks = text.split("<").map do |item|
-        item.blank? ? item : item.split(">")
-      end.flatten
+      chunks = text.split(/[<>]/)
       result = ""
       chunk_index, chunk = 0, chunks[0]

data/lib/regex.rb CHANGED

@@ -43,16 +43,23 @@ module Twitter
     LATIN_ACCENTS = [(0xc0..0xd6).to_a, (0xd8..0xf6).to_a, (0xf8..0xff).to_a].flatten.pack('U*').freeze
     REGEXEN[:latin_accents] = /[#{LATIN_ACCENTS}]+/o
+    REGEXEN[:end_screen_name_match] = /#{REGEXEN[:at_signs]}|#{REGEXEN[:latin_accents]}/o
     # Characters considered valid in a hashtag but not at the beginning, where only a-z and 0-9 are valid.
     HASHTAG_CHARACTERS = /[a-z0-9_#{LATIN_ACCENTS}]/io
-    REGEXEN[:auto_link_hashtags] = /(^|[^0-9A-Z&\/]+)(#|＃)([0-9A-Z_]*[A-Z_]+#{HASHTAG_CHARACTERS}*)/io
-    REGEXEN[:auto_link_usernames_or_lists] = /([^a-zA-Z0-9_]|^)([@＠]+)([a-zA-Z0-9_]{1,20})(\/#{REGEXEN[:list_name]})?/o
+    REGEXEN[:auto_link_hashtags] = /(^|[^0-9A-Z&\/\?]+)(#|＃)([0-9A-Z_]*[A-Z_]+#{HASHTAG_CHARACTERS}*)/io
+    REGEXEN[:auto_link_usernames_or_lists] = /([^a-zA-Z0-9_]|^|RT:?)([@＠]+)([a-zA-Z0-9_]{1,20})(\/[a-zA-Z][a-zA-Z0-9_\-]{0,24})?($|.)/o
     REGEXEN[:auto_link_emoticon] = /(8\-\#|8\-E|\+\-\(|\`\@|\`O|\&lt;\|:~\(|\}:o\{|:\-\[|\&gt;o\&lt;|X\-\/|\[:-\]\-I\-|\/\/\/\/Ö\\\\\\\\|\(\|:\|\/\)|∑:\*\)|\( \| \))/
     # URL related hash regex collection
-    REGEXEN[:valid_preceding_chars] = /(?:[^\/"':!=]|^|\:)/
+    REGEXEN[:valid_preceding_chars] = /(?:[^-\/"':!=A-Z0-9_]|^|\:)/i
     REGEXEN[:valid_domain] = /(?:[^[:punct:]\s][\.-](?=[^[:punct:]\s])|[^[:punct:]\s]){1,}\.[a-z]{2,}(?::[0-9]+)?/i
+    # For protocol-less URLs, we'll accept them if they end in one of a handful of likely TLDs
+    REGEXEN[:probable_tld] = /\.(?:com|net|org|gov|edu)$/i
+    REGEXEN[:www] = /www\./i
     REGEXEN[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\$\/%#\[\]\-_,~]/i
     # Allow URL paths to contain balanced parens
     #  1. Used in Wikipedia URLs like /Primer_(film)
@@ -73,7 +80,7 @@ module Twitter
       (                                                                                     #   $1 total match
         (#{REGEXEN[:valid_preceding_chars]})                                                #   $2 Preceeding chracter
         (                                                                                   #   $3 URL
-          (https?:\/\/|www\.)                                                               #   $4 Protocol or beginning
+          ((?:https?:\/\/|www\.)?)                                                          #   $4 Protocol or beginning
           (#{REGEXEN[:valid_domain]})                                                       #   $5 Domain(s) and optional post number
           (/#{REGEXEN[:valid_url_path_chars]}*
             #{REGEXEN[:valid_url_path_ending_chars]}?

data/lib/validation.rb CHANGED

@@ -1,5 +1,5 @@
 module Twitter
-  module Validation
+  module Validation extend self
     MAX_LENGTH = 140
     # Character not allowed in Tweets

data/spec/autolinking_spec.rb CHANGED

@@ -475,10 +475,30 @@ describe Twitter::Autolink do
       end
       context "with a @ in a URL" do
-        def original_text; 'http://x.xx/@"style="color:pink"onmouseover=alert(1)//'; end
+        context "with XSS attack" do
+          def original_text; 'http://x.xx/@"style="color:pink"onmouseover=alert(1)//'; end
-        it "should not allow XSS follwing @" do
-          @autolinked_text.should have_autolinked_url('http://x.xx/')
+          it "should not allow XSS follwing @" do
+            @autolinked_text.should have_autolinked_url('http://x.xx/')
+          end
+        end
+        context "with a username not followed by a /" do
+          def original_text; 'http://example.com/@foobar'; end
+          it "should link small url and username" do
+            @autolinked_text.should have_autolinked_url('http://example.com/')
+            @autolinked_text.should link_to_screen_name('foobar')
+          end
+        end
+        context "with a username followed by a /" do
+          def original_text; 'http://example.com/@foobar/'; end
+          it "should not link the username but link full url" do
+            @autolinked_text.should have_autolinked_url('http://example.com/@foobar/')
+            @autolinked_text.should_not link_to_screen_name('foobar')
+          end
         end
       end
@@ -498,4 +518,23 @@ describe Twitter::Autolink do
   end
+  describe "encode" do
+    before do
+      @linker = TestAutolink.new
+    end
+    it "should escape html entities properly" do
+      @linker.encode("&").should == "&amp;"
+      @linker.encode(">").should == "&gt;"
+      @linker.encode("<").should == "&lt;"
+      @linker.encode("\"").should == "&quot;"
+      @linker.encode("'").should == "&#39;"
+      @linker.encode("&<>\"").should == "&amp;&lt;&gt;&quot;"
+      @linker.encode("<div>").should == "&lt;div&gt;"
+      @linker.encode("a&b").should == "a&amp;b"
+      @linker.encode("<a href=\"http://twitter.com\" target=\"_blank\">twitter & friends</a>").should == "&lt;a href=&quot;http://twitter.com&quot; target=&quot;_blank&quot;&gt;twitter &amp; friends&lt;/a&gt;"
+      @linker.encode("&amp;").should == "&amp;amp;"
+      @linker.encode(nil).should == nil
+    end
+  end
 end

data/spec/hithighlighter_spec.rb CHANGED

@@ -76,11 +76,15 @@ describe Twitter::HitHighlighter do
       it "should highlight around a link" do
         @highlighter.hit_highlight("test <a>test</a> test", [[3, 11]]).should == "tes<em>t <a>test</a> t</em>est"
       end
       it "should fail gracefully with bad hits" do
         @highlighter.hit_highlight("test test", [[5, 20]]).should == "test <em>test</em>"
       end
+      it "should not mess up with touching tags" do
+        @highlighter.hit_highlight("<a>foo</a><a>foo</a>", [[3,6]]).should == "<a>foo</a><a><em>foo</em></a>"
+      end
     end
   end

metadata CHANGED

@@ -1,12 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: twitter-text
 version: !ruby/object:Gem::Version
+  hash: 31
   prerelease: false
   segments:
   - 1
-  - 1
-  - 8
-  version: 1.1.8
+  - 2
+  - 0
+  version: 1.2.0
 platform: ruby
 authors:
 - Matt Sanford
@@ -18,16 +19,18 @@ autorequire: ""
 bindir: bin
 cert_chain: []
-date: 2010-08-23 00:00:00 -07:00
+date: 2010-10-05 00:00:00 -07:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency
   name: actionpack
   prerelease: false
   requirement: &id001 !ruby/object:Gem::Requirement
+    none: false
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
+        hash: 3
         segments:
         - 0
         version: "0"
@@ -76,23 +79,27 @@ rdoc_options: []
 require_paths:
 - lib
 required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
+      hash: 3
       segments:
       - 0
       version: "0"
 required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
+      hash: 3
       segments:
       - 0
       version: "0"
 requirements: []
 rubyforge_project:
-rubygems_version: 1.3.6
+rubygems_version: 1.3.7
 signing_key:
 specification_version: 3
 summary: Twitter text handling library