RubyGems - twitter-text-simpleidn - Versions diffs - 3.0.0.0 - Mend

twitter-text-simpleidn 3.0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

checksums.yaml +7 -0
data/.gemtest +0 -0
data/.gitignore +40 -0
data/.gitmodules +3 -0
data/.rspec +2 -0
data/CHANGELOG.md +35 -0
data/Gemfile +4 -0
data/LICENSE +188 -0
data/README.md +193 -0
data/Rakefile +52 -0
data/config/README.md +142 -0
data/config/v1.json +8 -0
data/config/v2.json +29 -0
data/config/v3.json +30 -0
data/lib/assets/tld_lib.yml +1571 -0
data/lib/twitter-text.rb +29 -0
data/lib/twitter-text/autolink.rb +453 -0
data/lib/twitter-text/configuration.rb +68 -0
data/lib/twitter-text/deprecation.rb +21 -0
data/lib/twitter-text/emoji_regex.rb +27 -0
data/lib/twitter-text/extractor.rb +388 -0
data/lib/twitter-text/hash_helper.rb +27 -0
data/lib/twitter-text/hit_highlighter.rb +92 -0
data/lib/twitter-text/regex.rb +381 -0
data/lib/twitter-text/rewriter.rb +69 -0
data/lib/twitter-text/unicode.rb +31 -0
data/lib/twitter-text/validation.rb +251 -0
data/lib/twitter-text/weighted_range.rb +24 -0
data/script/destroy +14 -0
data/script/generate +14 -0
data/spec/autolinking_spec.rb +848 -0
data/spec/configuration_spec.rb +136 -0
data/spec/extractor_spec.rb +392 -0
data/spec/hithighlighter_spec.rb +96 -0
data/spec/regex_spec.rb +76 -0
data/spec/rewriter_spec.rb +553 -0
data/spec/spec_helper.rb +139 -0
data/spec/test_urls.rb +90 -0
data/spec/twitter_text_spec.rb +25 -0
data/spec/unicode_spec.rb +35 -0
data/spec/validation_spec.rb +87 -0
data/test/conformance_test.rb +242 -0
data/twitter-text.gemspec +35 -0
metadata +229 -0

data/lib/twitter-text/rewriter.rb ADDED

@@ -0,0 +1,69 @@
+# Copyright 2018 Twitter, Inc.
+# Licensed under the Apache License, Version 2.0
+# http://www.apache.org/licenses/LICENSE-2.0
+module Twitter
+  module TwitterText
+    # A module provides base methods to rewrite usernames, lists, hashtags and URLs.
+    module Rewriter extend self
+      def rewrite_entities(text, entities)
+        codepoints = text.to_s.to_codepoint_a
+        # sort by start index
+        entities = entities.sort_by do |entity|
+          indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices]
+          indices.first
+        end
+        result = []
+        last_index = entities.inject(0) do |index, entity|
+          indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices]
+          result << codepoints[index...indices.first]
+          result << yield(entity, codepoints)
+          indices.last
+        end
+        result << codepoints[last_index..-1]
+        result.flatten.join
+      end
+      # These methods are deprecated, will be removed in future.
+      extend Deprecation
+      def rewrite(text, options = {})
+        [:hashtags, :urls, :usernames_or_lists].inject(text) do |key|
+          options[key] ? send(:"rewrite_#{key}", text, &options[key]) : text
+        end
+      end
+      deprecate :rewrite, :rewrite_entities
+      def rewrite_usernames_or_lists(text)
+        entities = Extractor.extract_mentions_or_lists_with_indices(text)
+        rewrite_entities(text, entities) do |entity, codepoints|
+          at = codepoints[entity[:indices].first]
+          list_slug = entity[:list_slug]
+          list_slug = nil if list_slug.empty?
+          yield(at, entity[:screen_name], list_slug)
+        end
+      end
+      deprecate :rewrite_usernames_or_lists, :rewrite_entities
+      def rewrite_hashtags(text)
+        entities = Extractor.extract_hashtags_with_indices(text)
+        rewrite_entities(text, entities) do |entity, codepoints|
+          hash = codepoints[entity[:indices].first]
+          yield(hash, entity[:hashtag])
+        end
+      end
+      deprecate :rewrite_hashtags, :rewrite_entities
+      def rewrite_urls(text)
+        entities = Extractor.extract_urls_with_indices(text, :extract_url_without_protocol => false)
+        rewrite_entities(text, entities) do |entity, codepoints|
+          yield(entity[:url])
+        end
+      end
+      deprecate :rewrite_urls, :rewrite_entities
+    end
+  end
+end

data/lib/twitter-text/unicode.rb ADDED

@@ -0,0 +1,31 @@
+# Copyright 2018 Twitter, Inc.
+# Licensed under the Apache License, Version 2.0
+# http://www.apache.org/licenses/LICENSE-2.0
+module Twitter
+  module TwitterText
+    # This module lazily defines constants of the form Uxxxx for all Unicode
+    # codepoints from U0000 to U10FFFF. The value of each constant is the
+    # UTF-8 string for the codepoint.
+    # Examples:
+    #   copyright = Unicode::U00A9
+    #   euro = Unicode::U20AC
+    #   infinity = Unicode::U221E
+    #
+    module Unicode
+      CODEPOINT_REGEX = /^U_?([0-9a-fA-F]{4,5}|10[0-9a-fA-F]{4})$/
+      def self.const_missing(name)
+        # Check that the constant name is of the right form: U0000 to U10FFFF
+        if name.to_s =~ CODEPOINT_REGEX
+          # Convert the codepoint to an immutable UTF-8 string,
+          # define a real constant for that value and return the value
+          #p name, name.class
+          const_set(name, [$1.to_i(16)].pack("U").freeze)
+        else  # Raise an error for constants that are not Unicode.
+          raise NameError, "Uninitialized constant: Unicode::#{name}"
+        end
+      end
+    end
+  end
+end

data/lib/twitter-text/validation.rb ADDED

@@ -0,0 +1,251 @@
+# Copyright 2018 Twitter, Inc.
+# Licensed under the Apache License, Version 2.0
+# http://www.apache.org/licenses/LICENSE-2.0
+require 'unf'
+module Twitter
+  module TwitterText
+    module Validation extend self
+      DEFAULT_TCO_URL_LENGTHS = {
+        :short_url_length => 23,
+      }
+      # :weighted_length the weighted length of tweet based on weights specified in the config
+      # :valid If tweet is valid
+      # :permillage permillage of the tweet over the max length specified in config
+      # :valid_range_start beginning of valid text
+      # :valid_range_end End index of valid part of the tweet text (inclusive)
+      # :display_range_start beginning index of display text
+      # :display_range_end end index of display text (inclusive)
+      class ParseResults < Hash
+        RESULT_PARAMS = [:weighted_length, :valid, :permillage, :valid_range_start, :valid_range_end, :display_range_start, :display_range_end]
+        def self.empty
+          return ParseResults.new(weighted_length: 0, permillage: 0, valid: true, display_range_start: 0, display_range_end: 0, valid_range_start: 0, valid_range_end: 0)
+        end
+        def initialize(params = {})
+          RESULT_PARAMS.each do |key|
+            super[key] = params[key] if params.key?(key)
+          end
+        end
+      end
+      # Parse input text and return hash with descriptive parameters populated.
+      def parse_tweet(text, options = {})
+        options = DEFAULT_TCO_URL_LENGTHS.merge(options)
+        config = options[:config] || Twitter::TwitterText::Configuration.default_configuration
+        normalized_text = text.to_nfc
+        unless (normalized_text.length > 0)
+          ParseResults.empty()
+        end
+        scale = config.scale
+        max_weighted_tweet_length = config.max_weighted_tweet_length
+        scaled_max_weighted_tweet_length = max_weighted_tweet_length * scale
+        transformed_url_length = config.transformed_url_length * scale
+        ranges = config.ranges
+        url_entities = Twitter::TwitterText::Extractor.extract_urls_with_indices(normalized_text)
+        emoji_entities = config.emoji_parsing_enabled ? Twitter::TwitterText::Extractor.extract_emoji_with_indices(normalized_text) : []
+        has_invalid_chars = false
+        weighted_count = 0
+        offset = 0
+        display_offset = 0
+        valid_offset = 0
+        while offset < normalized_text.codepoint_length
+          # Reset the default char weight each pass through the loop
+          char_weight = config.default_weight
+          entity_length = 0
+          url_entities.each do |url_entity|
+            if url_entity[:indices].first == offset
+              entity_length = url_entity[:indices].last - url_entity[:indices].first
+              weighted_count += transformed_url_length
+              offset += entity_length
+              display_offset += entity_length
+              if weighted_count <= scaled_max_weighted_tweet_length
+                valid_offset += entity_length
+              end
+              # Finding a match breaks the loop
+              break
+            end
+          end
+          emoji_entities.each do |emoji_entity|
+            if emoji_entity[:indices].first == offset
+              entity_length = emoji_entity[:indices].last - emoji_entity[:indices].first
+              weighted_count += char_weight # the default weight
+              offset += entity_length
+              display_offset += entity_length
+              if weighted_count <= scaled_max_weighted_tweet_length
+                valid_offset += entity_length
+              end
+              # Finding a match breaks the loop
+              break
+            end
+          end
+          next if entity_length > 0
+          if offset < normalized_text.codepoint_length
+            code_point = normalized_text[offset]
+            ranges.each do |range|
+              if range.contains?(code_point.unpack("U").first)
+                char_weight = range.weight
+                break
+              end
+            end
+            weighted_count += char_weight
+            has_invalid_chars = contains_invalid?(code_point) unless has_invalid_chars
+            codepoint_length = code_point.codepoint_length
+            offset += codepoint_length
+            display_offset += codepoint_length
+            #          index += codepoint_length
+            if !has_invalid_chars && (weighted_count <= scaled_max_weighted_tweet_length)
+              valid_offset += codepoint_length
+            end
+          end
+        end
+        normalized_text_offset = text.codepoint_length - normalized_text.codepoint_length
+        scaled_weighted_length = weighted_count / scale
+        is_valid = !has_invalid_chars && (scaled_weighted_length <= max_weighted_tweet_length)
+        permillage = scaled_weighted_length * 1000 / max_weighted_tweet_length
+        return ParseResults.new(weighted_length: scaled_weighted_length, permillage: permillage, valid: is_valid, display_range_start: 0, display_range_end: (display_offset + normalized_text_offset - 1), valid_range_start: 0, valid_range_end: (valid_offset + normalized_text_offset - 1))
+      end
+      def contains_invalid?(text)
+        return false if !text || text.empty?
+        begin
+          return true if Twitter::TwitterText::Regex::INVALID_CHARACTERS.any?{|invalid_char| text.include?(invalid_char) }
+        rescue ArgumentError
+          # non-Unicode value.
+          return true
+        end
+        return false
+      end
+      def valid_username?(username)
+        return false if !username || username.empty?
+        extracted = Twitter::TwitterText::Extractor.extract_mentioned_screen_names(username)
+        # Should extract the username minus the @ sign, hence the [1..-1]
+        extracted.size == 1 && extracted.first == username[1..-1]
+      end
+      VALID_LIST_RE = /\A#{Twitter::TwitterText::Regex[:valid_mention_or_list]}\z/o
+      def valid_list?(username_list)
+        match = username_list.match(VALID_LIST_RE)
+        # Must have matched and had nothing before or after
+        !!(match && match[1] == "" && match[4] && !match[4].empty?)
+      end
+      def valid_hashtag?(hashtag)
+        return false if !hashtag || hashtag.empty?
+        extracted = Twitter::TwitterText::Extractor.extract_hashtags(hashtag)
+        # Should extract the hashtag minus the # sign, hence the [1..-1]
+        extracted.size == 1 && extracted.first == hashtag[1..-1]
+      end
+      def valid_url?(url, unicode_domains=true, require_protocol=true)
+        return false if !url || url.empty?
+        url_parts = url.match(Twitter::TwitterText::Regex[:validate_url_unencoded])
+        return false unless (url_parts && url_parts.to_s == url)
+        scheme, authority, path, query, fragment = url_parts.captures
+        return false unless ((!require_protocol ||
+                              (valid_match?(scheme, Twitter::TwitterText::Regex[:validate_url_scheme]) && scheme.match(/\Ahttps?\Z/i))) &&
+                             valid_match?(path, Twitter::TwitterText::Regex[:validate_url_path]) &&
+                             valid_match?(query, Twitter::TwitterText::Regex[:validate_url_query], true) &&
+                             valid_match?(fragment, Twitter::TwitterText::Regex[:validate_url_fragment], true))
+        return (unicode_domains && valid_match?(authority, Twitter::TwitterText::Regex[:validate_url_unicode_authority])) ||
+               (!unicode_domains && valid_match?(authority, Twitter::TwitterText::Regex[:validate_url_authority]))
+      end
+      # These methods are deprecated, will be removed in future.
+      extend Deprecation
+      MAX_LENGTH_LEGACY = 140
+      # DEPRECATED: Please use parse_text instead.
+      #
+      # Returns the length of the string as it would be displayed. This is equivilent to the length of the Unicode NFC
+      # (See: http://www.unicode.org/reports/tr15). This is needed in order to consistently calculate the length of a
+      # string no matter which actual form was transmitted. For example:
+      #
+      #     U+0065  Latin Small Letter E
+      # +   U+0301  Combining Acute Accent
+      # ----------
+      # =   2 bytes, 2 characters, displayed as é (1 visual glyph)
+      #     … The NFC of {U+0065, U+0301} is {U+00E9}, which is a single chracter and a +display_length+ of 1
+      #
+      # The string could also contain U+00E9 already, in which case the canonicalization will not change the value.
+      #
+      def tweet_length(text, options = {})
+        options = DEFAULT_TCO_URL_LENGTHS.merge(options)
+        length = text.to_nfc.unpack("U*").length
+        Twitter::TwitterText::Extractor.extract_urls_with_indices(text) do |url, start_position, end_position|
+          length += start_position - end_position
+          length += options[:short_url_length] if url.length > 0
+        end
+        length
+      end
+      deprecate :tweet_length, :parse_tweet
+      # DEPRECATED: Please use parse_text instead.
+      #
+      # Check the <tt>text</tt> for any reason that it may not be valid as a Tweet. This is meant as a pre-validation
+      # before posting to api.twitter.com. There are several server-side reasons for Tweets to fail but this pre-validation
+      # will allow quicker feedback.
+      #
+      # Returns <tt>false</tt> if this <tt>text</tt> is valid. Otherwise one of the following Symbols will be returned:
+      #
+      #   <tt>:too_long</tt>:: if the <tt>text</tt> is too long
+      #   <tt>:empty</tt>:: if the <tt>text</tt> is nil or empty
+      #   <tt>:invalid_characters</tt>:: if the <tt>text</tt> contains non-Unicode or any of the disallowed Unicode characters
+      def tweet_invalid?(text)
+        return :empty if !text || text.empty?
+        begin
+          return :too_long if tweet_length(text) > MAX_LENGTH_LEGACY
+          return :invalid_characters if Twitter::TwitterText::Regex::INVALID_CHARACTERS.any?{|invalid_char| text.include?(invalid_char) }
+        rescue ArgumentError
+          # non-Unicode value.
+          return :invalid_characters
+        end
+        return false
+      end
+      deprecate :tweet_invalid?, :parse_tweet
+      def valid_tweet_text?(text)
+        !tweet_invalid?(text)
+      end
+      deprecate :valid_tweet_text?, :parse_tweet
+      private
+      def valid_match?(string, regex, optional=false)
+        return (string && string.match(regex) && $~.to_s == string) unless optional
+        !(string && (!string.match(regex) || $~.to_s != string))
+      end
+    end
+  end
+end

data/lib/twitter-text/weighted_range.rb ADDED

@@ -0,0 +1,24 @@
+# Copyright 2018 Twitter, Inc.
+# Licensed under the Apache License, Version 2.0
+# http://www.apache.org/licenses/LICENSE-2.0
+# encoding: UTF-8
+module Twitter
+  module TwitterText
+    class WeightedRange
+      attr_reader :start, :end, :weight
+      def initialize(range = {})
+        raise ArgumentError.new("Invalid range") unless [:start, :end, :weight].all? { |key| range.key?(key) && range[key].is_a?(Integer) }
+        @start = range[:start]
+        @end = range[:end]
+        @weight = range[:weight]
+      end
+      def contains?(code_point)
+        code_point >= @start && code_point <= @end
+      end
+    end
+  end
+end

data/script/destroy ADDED

@@ -0,0 +1,14 @@
+#!/usr/bin/env ruby
+APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
+begin
+  require 'rubigen'
+rescue LoadError
+  require 'rubygems'
+  require 'rubigen'
+end
+require 'rubigen/scripts/destroy'
+ARGV.shift if ['--help', '-h'].include?(ARGV[0])
+RubiGen::Base.use_component_sources! [:newgem_simple, :test_unit]
+RubiGen::Scripts::Destroy.new.run(ARGV)

data/script/generate ADDED

@@ -0,0 +1,14 @@
+#!/usr/bin/env ruby
+APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
+begin
+  require 'rubigen'
+rescue LoadError
+  require 'rubygems'
+  require 'rubigen'
+end
+require 'rubigen/scripts/generate'
+ARGV.shift if ['--help', '-h'].include?(ARGV[0])
+RubiGen::Base.use_component_sources! [:newgem_simple, :test_unit]
+RubiGen::Scripts::Generate.new.run(ARGV)

data/spec/autolinking_spec.rb ADDED

@@ -0,0 +1,848 @@
+# Copyright 2018 Twitter, Inc.
+# Licensed under the Apache License, Version 2.0
+# http://www.apache.org/licenses/LICENSE-2.0
+# encoding: utf-8
+require File.dirname(__FILE__) + '/spec_helper'
+class TestAutolink
+  include Twitter::TwitterText::Autolink
+end
+describe Twitter::TwitterText::Autolink do
+  def original_text; end
+  def url; end
+  describe "auto_link_custom" do
+    before do
+      @autolinked_text = TestAutolink.new.auto_link(original_text) if original_text
+    end
+    describe "username autolinking" do
+      context "username preceded by a space" do
+        def original_text; "hello @jacob"; end
+        it "should be linked" do
+          expect(@autolinked_text).to link_to_screen_name('jacob')
+        end
+      end
+      context "username in camelCase" do
+        def original_text() "@jaCob iS cOoL" end
+        it "should be linked" do
+          expect(@autolinked_text).to link_to_screen_name('jaCob')
+        end
+      end
+      context "username at beginning of line" do
+        def original_text; "@jacob you're cool"; end
+        it "should be linked" do
+          expect(@autolinked_text).to link_to_screen_name('jacob')
+        end
+      end
+      context "username preceded by word character" do
+        def original_text; "meet@the beach"; end
+        it "should not be linked" do
+          expect(Nokogiri::HTML(@autolinked_text).search('a')).to be_empty
+        end
+      end
+      context "username preceded by non-word character" do
+        def original_text; "great.@jacob"; end
+        it "should be linked" do
+          expect(@autolinked_text).to link_to_screen_name('jacob')
+        end
+      end
+      context "username containing non-word characters" do
+        def original_text; "@zach&^$%^"; end
+        it "should not be linked" do
+          expect(@autolinked_text).to link_to_screen_name('zach')
+        end
+      end
+      context "username over twenty characters" do
+        def original_text
+          @twenty_character_username = "zach" * 5
+          "@" + @twenty_character_username + "1"
+        end
+        it "should not be linked" do
+          expect(@autolinked_text).to link_to_screen_name(@twenty_character_username)
+        end
+      end
+      context "username followed by japanese" do
+        def original_text; "@jacobの"; end
+        it "should be linked" do
+          expect(@autolinked_text).to link_to_screen_name('jacob')
+        end
+      end
+      context "username preceded by japanese" do
+        def original_text; "あ@matz"; end
+        it "should be linked" do
+          expect(@autolinked_text).to link_to_screen_name('matz')
+        end
+      end
+      context "username surrounded by japanese" do
+        def original_text; "あ@yoshimiの"; end
+        it "should be linked" do
+          expect(@autolinked_text).to link_to_screen_name('yoshimi')
+        end
+      end
+      context "username using full-width at-sign" do
+        def original_text
+          "#{[0xFF20].pack('U')}jacob"
+        end
+        it "should be linked" do
+          expect(@autolinked_text).to link_to_screen_name('jacob')
+        end
+      end
+    end
+    describe "list path autolinking" do
+      context "when List is not available" do
+        it "should not be linked" do
+          @autolinked_text = TestAutolink.new.auto_link_usernames_or_lists("hello @jacob/my-list", :suppress_lists => true)
+          expect(@autolinked_text).to_not link_to_list_path('jacob/my-list')
+          expect(@autolinked_text).to include('my-list')
+        end
+      end
+      context "slug preceded by a space" do
+        def original_text; "hello @jacob/my-list"; end
+        it "should be linked" do
+          expect(@autolinked_text).to link_to_list_path('jacob/my-list')
+        end
+      end
+      context "username followed by a slash but no list" do
+        def original_text; "hello @jacob/ my-list"; end
+        it "should NOT be linked" do
+          expect(@autolinked_text).to_not link_to_list_path('jacob/my-list')
+          expect(@autolinked_text).to link_to_screen_name('jacob')
+        end
+      end
+      context "empty username followed by a list" do
+        def original_text; "hello @/my-list"; end
+        it "should NOT be linked" do
+          expect(Nokogiri::HTML(@autolinked_text).search('a')).to be_empty
+        end
+      end
+      context "list slug at beginning of line" do
+        def original_text; "@jacob/my-list"; end
+        it "should be linked" do
+          expect(@autolinked_text).to link_to_list_path('jacob/my-list')
+        end
+      end
+      context "username preceded by alpha-numeric character" do
+        def original_text; "meet@the/beach"; end
+        it "should not be linked" do
+          expect(Nokogiri::HTML(@autolinked_text).search('a')).to be_empty
+        end
+      end
+      context "username preceded by non-word character" do
+        def original_text; "great.@jacob/my-list"; end
+        it "should be linked" do
+          @autolinked_text = TestAutolink.new.auto_link("great.@jacob/my-list")
+          expect(@autolinked_text).to link_to_list_path('jacob/my-list')
+        end
+      end
+      context "username containing non-word characters" do
+        def original_text; "@zach/test&^$%^"; end
+        it "should be linked" do
+          expect(@autolinked_text).to link_to_list_path('zach/test')
+        end
+      end
+      context "username over twenty characters" do
+        def original_text
+          @twentyfive_character_list = "jack/" + ("a" * 25)
+          "@#{@twentyfive_character_list}12345"
+        end
+        it "should be linked" do
+          expect(@autolinked_text).to link_to_list_path(@twentyfive_character_list)
+        end
+      end
+    end
+    describe "hashtag autolinking" do
+      context "with an all numeric hashtag" do
+        def original_text; "#123"; end
+        it "should not be linked" do
+          expect(@autolinked_text).to_not have_autolinked_hashtag('#123')
+        end
+      end
+      context "with a hashtag with alphanumeric characters" do
+        def original_text; "#ab1d"; end
+        it "should be linked" do
+          expect(@autolinked_text).to have_autolinked_hashtag('#ab1d')
+        end
+      end
+      context "with a hashtag with underscores" do
+        def original_text; "#a_b_c_d"; end
+        it "should be linked" do
+          expect(@autolinked_text).to have_autolinked_hashtag(original_text)
+        end
+      end
+      context "with a hashtag that is preceded by a word character" do
+        def original_text; "ab#cd"; end
+        it "should not be linked" do
+          expect(@autolinked_text).to_not have_autolinked_hashtag(original_text)
+        end
+      end
+      context "with a page anchor in a url" do
+        def original_text; "Here's my url: http://foobar.com/#home"; end
+        it "should not link the hashtag" do
+          expect(@autolinked_text).to_not have_autolinked_hashtag('#home')
+        end
+        it "should link the url" do
+          expect(@autolinked_text).to have_autolinked_url('http://foobar.com/#home')
+        end
+      end
+      context "with a hashtag that starts with a number but has word characters" do
+        def original_text; "#2ab"; end
+        it "should be linked" do
+          expect(@autolinked_text).to have_autolinked_hashtag(original_text)
+        end
+      end
+      context "with multiple valid hashtags" do
+        def original_text; "I'm frickin' awesome #ab #cd #ef"; end
+        it "links each hashtag" do
+          expect(@autolinked_text).to have_autolinked_hashtag('#ab')
+          expect(@autolinked_text).to have_autolinked_hashtag('#cd')
+          expect(@autolinked_text).to have_autolinked_hashtag('#ef')
+        end
+      end
+      context "with a hashtag preceded by a ." do
+        def original_text; "ok, great.#abc"; end
+        it "should be linked" do
+          expect(@autolinked_text).to have_autolinked_hashtag('#abc')
+        end
+      end
+      context "with a hashtag preceded by a &" do
+        def original_text; "&#nbsp;"; end
+        it "should not be linked" do
+          expect(@autolinked_text).to_not have_autolinked_hashtag('#nbsp;')
+        end
+      end
+      context "with a hashtag that ends in an !" do
+        def original_text; "#great!"; end
+        it "should be linked, but should not include the !" do
+          expect(@autolinked_text).to have_autolinked_hashtag('#great')
+        end
+      end
+      context "with a hashtag followed by Japanese" do
+         def original_text; "#twj_devの"; end
+        it "should be linked" do
+          expect(@autolinked_text).to have_autolinked_hashtag('#twj_devの')
+        end
+      end
+      context "with a hashtag preceded by a full-width space" do
+        def original_text; "#{[0x3000].pack('U')}#twj_dev"; end
+        it "should be linked" do
+          expect(@autolinked_text).to have_autolinked_hashtag('#twj_dev')
+        end
+      end
+      context "with a hashtag followed by a full-width space" do
+        def original_text; "#twj_dev#{[0x3000].pack('U')}"; end
+        it "should be linked" do
+          expect(@autolinked_text).to have_autolinked_hashtag('#twj_dev')
+        end
+      end
+      context "with a hashtag using full-width hash" do
+        def original_text; "#{[0xFF03].pack('U')}twj_dev"; end
+        it "should be linked" do
+          link = Nokogiri::HTML(@autolinked_text).search('a')
+          expect((link.inner_text.respond_to?(:force_encoding) ? link.inner_text.force_encoding("utf-8") : link.inner_text)).to be == "#{[0xFF03].pack('U')}twj_dev"
+          expect(link.first['href']).to be == 'https://twitter.com/search?q=%23twj_dev'
+        end
+      end
+      context "with a hashtag containing an accented latin character" do
+        def original_text
+          # the hashtag is #éhashtag
+          "##{[0x00e9].pack('U')}hashtag"
+        end
+        it "should be linked" do
+          expect(@autolinked_text).to be == "<a class=\"tweet-url hashtag\" href=\"https://twitter.com/search?q=%23éhashtag\" rel=\"nofollow\" title=\"#éhashtag\">#éhashtag</a>"
+        end
+      end
+    end
+    describe "URL autolinking" do
+      def url; "http://www.google.com"; end
+      context "when embedded in plain text" do
+        def original_text; "On my search engine #{url} I found good links."; end
+        it "should be linked" do
+          expect(@autolinked_text).to have_autolinked_url(url)
+        end
+      end
+      context "when surrounded by Japanese;" do
+        def original_text; "いまなにしてる#{url}いまなにしてる"; end
+        it "should be linked" do
+          expect(@autolinked_text).to have_autolinked_url(url)
+        end
+      end
+      context "with a path surrounded by parentheses;" do
+        def original_text; "I found a neatness (#{url})"; end
+        it "should be linked" do
+          expect(@autolinked_text).to have_autolinked_url(url)
+        end
+        context "when the URL ends with a slash;" do
+          def url; "http://www.google.com/"; end
+          it "should be linked" do
+            expect(@autolinked_text).to have_autolinked_url(url)
+          end
+        end
+        context "when the URL has a path;" do
+          def url; "http://www.google.com/fsdfasdf"; end
+          it "should be linked" do
+            expect(@autolinked_text).to have_autolinked_url(url)
+          end
+        end
+      end
+      context "when path contains parens" do
+        def original_text; "I found a neatness (#{url})"; end
+        it "should be linked" do
+          expect(@autolinked_text).to have_autolinked_url(url)
+        end
+        context "wikipedia" do
+          def url; "http://en.wikipedia.org/wiki/Madonna_(artist)"; end
+          it "should be linked" do
+            expect(@autolinked_text).to have_autolinked_url(url)
+          end
+        end
+        context "IIS session" do
+          def url; "http://msdn.com/S(deadbeef)/page.htm"; end
+          it "should be linked" do
+            expect(@autolinked_text).to have_autolinked_url(url)
+          end
+        end
+        context "unbalanced parens" do
+          def url; "http://example.com/i_has_a_("; end
+          it "should be linked" do
+            expect(@autolinked_text).to have_autolinked_url("http://example.com/i_has_a_")
+          end
+        end
+        context "balanced parens with a double quote inside" do
+          def url; "http://foo.com/foo_(\")_bar" end
+          it "should be linked" do
+            expect(@autolinked_text).to have_autolinked_url("http://foo.com/foo_")
+          end
+        end
+        context "balanced parens hiding XSS" do
+          def url; 'http://x.xx.com/("style="color:red"onmouseover="alert(1)' end
+          it "should be linked" do
+            expect(@autolinked_text).to have_autolinked_url("http://x.xx.com/")
+          end
+        end
+      end
+      context "when preceded by a :" do
+        def original_text; "Check this out @hoverbird:#{url}"; end
+        it "should be linked" do
+          expect(@autolinked_text).to have_autolinked_url(url)
+        end
+      end
+      context "with a URL ending in allowed punctuation" do
+        it "does not consume ending punctuation" do
+          matcher = TestAutolink.new
+          %w| ? ! , . : ; ] ) } = \ ' |.each do |char|
+            expect(matcher.auto_link("#{url}#{char}")).to have_autolinked_url(url)
+          end
+        end
+      end
+      context "with a URL preceded in forbidden characters" do
+        it "should be linked" do
+          matcher = TestAutolink.new
+          %w| \ ' / ! = |.each do |char|
+            expect(matcher.auto_link("#{char}#{url}")).to have_autolinked_url(url)
+          end
+        end
+      end
+      context "when embedded in a link tag" do
+        def original_text; "<link rel='true'>#{url}</link>"; end
+        it "should be linked" do
+          expect(@autolinked_text).to have_autolinked_url(url)
+        end
+      end
+      context "with multiple URLs" do
+        def original_text; "http://www.links.org link at start of page, link at end http://www.foo.org"; end
+        it "should autolink each one" do
+          expect(@autolinked_text).to have_autolinked_url('http://www.links.org')
+          expect(@autolinked_text).to have_autolinked_url('http://www.foo.org')
+        end
+      end
+      context "with multiple URLs in different formats" do
+        def original_text; "http://foo.com https://bar.com http://mail.foobar.org"; end
+        it "should autolink each one, in the proper order" do
+          expect(@autolinked_text).to have_autolinked_url('http://foo.com')
+          expect(@autolinked_text).to have_autolinked_url('https://bar.com')
+          expect(@autolinked_text).to have_autolinked_url('http://mail.foobar.org')
+        end
+      end
+      context "with a URL having a long TLD" do
+        def original_text; "Yahoo integriert Facebook http://golem.mobi/0912/71607.html"; end
+        it "should autolink it" do
+          expect(@autolinked_text).to have_autolinked_url('http://golem.mobi/0912/71607.html')
+        end
+      end
+      context "with a url lacking the protocol" do
+        def original_text; "I like www.foobar.com dudes"; end
+        it "does not link at all" do
+          link = Nokogiri::HTML(@autolinked_text).search('a')
+          expect(link).to be_empty
+        end
+      end
+      context "with a @ in a URL" do
+        context "with XSS attack" do
+          def original_text; 'http://x.xx.com/@"style="color:pink"onmouseover=alert(1)//'; end
+          it "should not allow XSS follwing @" do
+            expect(@autolinked_text).to have_autolinked_url('http://x.xx.com/')
+          end
+        end
+        context "with a username not followed by a /" do
+          def original_text; 'http://example.com/@foobar'; end
+          it "should link url" do
+            expect(@autolinked_text).to have_autolinked_url('http://example.com/@foobar')
+          end
+        end
+        context "with a username followed by a /" do
+          def original_text; 'http://example.com/@foobar/'; end
+          it "should not link the username but link full url" do
+            expect(@autolinked_text).to have_autolinked_url('http://example.com/@foobar/')
+            expect(@autolinked_text).to_not link_to_screen_name('foobar')
+          end
+        end
+      end
+      context "regex engine quirks" do
+        context "does not spiral out of control on repeated periods" do
+          def original_text; "Test a ton of periods http://example.com/path.........................................."; end
+          it "should autolink" do
+            expect(@autolinked_text).to have_autolinked_url('http://example.com/path')
+          end
+        end
+        context "does not spiral out of control on repeated dashes" do
+          def original_text; "Single char file ext http://www.bestbuy.com/site/Currie+Technologies+-+Ezip+400+Scooter/9885188.p?id=1218189013070&skuId=9885188"; end
+          it "should autolink" do
+            expect(@autolinked_text).to have_autolinked_url('http://www.bestbuy.com/site/Currie+Technologies+-+Ezip+400+Scooter/9885188.p?id=1218189013070&skuId=9885188')
+          end
+        end
+      end
+    end
+    describe "Autolink all" do
+      before do
+        @linker = TestAutolink.new
+      end
+      it "should allow url/hashtag overlap" do
+        auto_linked = @linker.auto_link("https://twitter.com/#search")
+        expect(auto_linked).to have_autolinked_url('https://twitter.com/#search')
+      end
+      it "should not add invalid option in HTML tags" do
+        auto_linked = @linker.auto_link("https://twitter.com/ is a URL, not a hashtag", :hashtag_class => 'hashtag_classname')
+        expect(auto_linked).to have_autolinked_url('https://twitter.com/')
+        expect(auto_linked).to_not include('hashtag_class')
+        expect(auto_linked).to_not include('hashtag_classname')
+      end
+      it "should autolink url/hashtag/mention in text with Unicode supplementary characters" do
+        auto_linked = @linker.auto_link("#{[0x10400].pack('U')} #hashtag #{[0x10400].pack('U')} @mention #{[0x10400].pack('U')} http://twitter.com/")
+        expect(auto_linked).to have_autolinked_hashtag('#hashtag')
+        expect(auto_linked).to link_to_screen_name('mention')
+        expect(auto_linked).to have_autolinked_url('http://twitter.com/')
+      end
+    end
+  end
+  describe "autolinking options" do
+    before do
+      @linker = TestAutolink.new
+    end
+    it "should show display_url when :url_entities provided" do
+      linked = @linker.auto_link("http://t.co/0JG5Mcq", :url_entities => [{
+        "url" => "http://t.co/0JG5Mcq",
+        "display_url" => "blog.twitter.com/2011/05/twitte…",
+        "expanded_url" => "http://blog.twitter.com/2011/05/twitter-for-mac-update.html",
+        "indices" => [
+          84,
+          103
+        ]
+      }])
+      html = Nokogiri::HTML(linked)
+      expect(html.search('a')).to_not be_empty
+      expect(html.search('a[@href="http://t.co/0JG5Mcq"]')).to_not be_empty
+      expect(html.search('span[@class=js-display-url]').inner_text).to be == "blog.twitter.com/2011/05/twitte"
+      expect(html.inner_text).to be == " http://blog.twitter.com/2011/05/twitter-for-mac-update.html …"
+      expect(html.search('span[@style="position:absolute;left:-9999px;"]').size).to be == 4
+    end
+    it "should accept invisible_tag_attrs option" do
+      linked = @linker.auto_link("http://t.co/0JG5Mcq",
+        {
+          :url_entities => [{
+            "url" => "http://t.co/0JG5Mcq",
+            "display_url" => "blog.twitter.com/2011/05/twitte…",
+            "expanded_url" => "http://blog.twitter.com/2011/05/twitter-for-mac-update.html",
+            "indices" => [
+              0,
+              19
+            ]
+          }],
+          :invisible_tag_attrs => "style='dummy;'"
+      })
+      html = Nokogiri::HTML(linked)
+      expect(html.search('span[@style="dummy;"]').size).to be == 4
+    end
+    it "should show display_url if available in entity" do
+      linked = @linker.auto_link_entities("http://t.co/0JG5Mcq",
+        [{
+          :url => "http://t.co/0JG5Mcq",
+          :display_url => "blog.twitter.com/2011/05/twitte…",
+          :expanded_url => "http://blog.twitter.com/2011/05/twitter-for-mac-update.html",
+          :indices => [0, 19]
+        }]
+      )
+      html = Nokogiri::HTML(linked)
+      expect(html.search('a')).to_not be_empty
+      expect(html.search('a[@href="http://t.co/0JG5Mcq"]')).to_not be_empty
+      expect(html.search('span[@class=js-display-url]').inner_text).to be == "blog.twitter.com/2011/05/twitte"
+      expect(html.inner_text).to be == " http://blog.twitter.com/2011/05/twitter-for-mac-update.html …"
+    end
+    it "should apply :class as a CSS class" do
+      linked = @linker.auto_link("http://example.com/", :class => 'myclass')
+      expect(linked).to have_autolinked_url('http://example.com/')
+      expect(linked).to match(/myclass/)
+    end
+    it "should apply :url_class only on URL" do
+      linked = @linker.auto_link("http://twitter.com")
+      expect(linked).to have_autolinked_url('http://twitter.com')
+      expect(expect(linked)).to_not match(/class/)
+      linked = @linker.auto_link("http://twitter.com", :url_class => 'testClass')
+      expect(linked).to have_autolinked_url('http://twitter.com')
+      expect(linked).to match(/class=\"testClass\"/)
+      linked = @linker.auto_link("#hash @tw", :url_class => 'testClass')
+      expect(linked).to match(/class=\"tweet-url hashtag\"/)
+      expect(linked).to match(/class=\"tweet-url username\"/)
+      expect(linked).to_not match(/class=\"testClass\"/)
+    end
+    it "should add rel=nofollow by default" do
+      linked = @linker.auto_link("http://example.com/")
+      expect(linked).to have_autolinked_url('http://example.com/')
+      expect(linked).to match(/nofollow/)
+    end
+    it "should include the '@' symbol in a username when passed :username_include_symbol" do
+      linked = @linker.auto_link("@user", :username_include_symbol => true)
+      expect(linked).to link_to_screen_name('user', '@user')
+    end
+    it "should include the '@' symbol in a list when passed :username_include_symbol" do
+      linked = @linker.auto_link("@user/list", :username_include_symbol => true)
+      expect(linked).to link_to_list_path('user/list', '@user/list')
+    end
+    it "should not add rel=nofollow when passed :suppress_no_follow" do
+      linked = @linker.auto_link("http://example.com/", :suppress_no_follow => true)
+      expect(linked).to have_autolinked_url('http://example.com/')
+      expect(linked).to_not match(/nofollow/)
+    end
+    it "should not add a target attribute by default" do
+      linked = @linker.auto_link("http://example.com/")
+      expect(linked).to have_autolinked_url('http://example.com/')
+      expect(linked).to_not match(/target=/)
+    end
+    it "should respect the :target option" do
+      linked = @linker.auto_link("http://example.com/", :target => 'mywindow')
+      expect(linked).to have_autolinked_url('http://example.com/')
+      expect(linked).to match(/target="mywindow"/)
+    end
+    it "should customize href by username_url_block option" do
+      linked = @linker.auto_link("@test", :username_url_block => lambda{|a| "dummy"})
+      expect(linked).to have_autolinked_url('dummy', 'test')
+    end
+    it "should customize href by list_url_block option" do
+      linked = @linker.auto_link("@test/list", :list_url_block => lambda{|a| "dummy"})
+      expect(linked).to have_autolinked_url('dummy', 'test/list')
+    end
+    it "should customize href by hashtag_url_block option" do
+      linked = @linker.auto_link("#hashtag", :hashtag_url_block => lambda{|a| "dummy"})
+      expect(linked).to have_autolinked_url('dummy', '#hashtag')
+    end
+    it "should customize href by cashtag_url_block option" do
+      linked = @linker.auto_link("$CASH", :cashtag_url_block => lambda{|a| "dummy"})
+      expect(linked).to have_autolinked_url('dummy', '$CASH')
+    end
+    it "should customize href by link_url_block option" do
+      linked = @linker.auto_link("http://example.com/", :link_url_block => lambda{|a| "dummy"})
+      expect(linked).to have_autolinked_url('dummy', 'http://example.com/')
+    end
+    it "should modify link attributes by link_attribute_block" do
+      linked = @linker.auto_link("#hash @mention",
+        :link_attribute_block => lambda{|entity, attributes|
+          attributes[:"dummy-hash-attr"] = "test" if entity[:hashtag]
+        }
+      )
+      expect(linked).to match(/<a[^>]+hashtag[^>]+dummy-hash-attr=\"test\"[^>]+>/)
+      expect(linked).to_not match(/<a[^>]+username[^>]+dummy-hash-attr=\"test\"[^>]+>/)
+      expect(linked).to_not match(/link_attribute_block/i)
+      linked = @linker.auto_link("@mention http://twitter.com/",
+        :link_attribute_block => lambda{|entity, attributes|
+          attributes["dummy-url-attr"] = entity[:url] if entity[:url]
+        }
+      )
+      expect(linked).to_not match(/<a[^>]+username[^>]+dummy-url-attr=\"http:\/\/twitter.com\/\"[^>]*>/)
+      expect(linked).to match(/<a[^>]+dummy-url-attr=\"http:\/\/twitter.com\/\"/)
+    end
+    it "should modify link text by link_text_block" do
+      linked = @linker.auto_link("#hash @mention",
+        :link_text_block => lambda{|entity, text|
+          entity[:hashtag] ? "#replaced" : "pre_#{text}_post"
+        }
+      )
+      expect(linked).to match(/<a[^>]+>#replaced<\/a>/)
+      expect(linked).to match(/<a[^>]+>pre_mention_post<\/a>/)
+      linked = @linker.auto_link("#hash @mention", {
+        :link_text_block => lambda{|entity, text|
+          "pre_#{text}_post"
+        },
+        :symbol_tag => "s", :text_with_symbol_tag => "b", :username_include_symbol => true
+      })
+      expect(linked).to match(/<a[^>]+>pre_<s>#<\/s><b>hash<\/b>_post<\/a>/)
+      expect(linked).to match(/<a[^>]+>pre_<s>@<\/s><b>mention<\/b>_post<\/a>/)
+    end
+    it "should apply :url_target only to auto-linked URLs" do
+      auto_linked = @linker.auto_link("#hashtag @mention http://test.com/", {:url_target => '_blank'})
+      expect(auto_linked).to have_autolinked_hashtag('#hashtag')
+      expect(auto_linked).to link_to_screen_name('mention')
+      expect(auto_linked).to have_autolinked_url('http://test.com/')
+      expect(auto_linked).to_not match(/<a[^>]+hashtag[^>]+target[^>]+>/)
+      expect(auto_linked).to_not match(/<a[^>]+username[^>]+target[^>]+>/)
+      expect(auto_linked).to match(/<a[^>]+test.com[^>]+target=\"_blank\"[^>]*>/)
+    end
+    it "should apply target='_blank' only to auto-linked URLs when :target_blank is set to true" do
+      auto_linked = @linker.auto_link("#hashtag @mention http://test.com/", {:target_blank => true})
+      expect(auto_linked).to have_autolinked_hashtag('#hashtag')
+      expect(auto_linked).to link_to_screen_name('mention')
+      expect(auto_linked).to have_autolinked_url('http://test.com/')
+      expect(auto_linked).to match(/<a[^>]+hashtag[^>]+target=\"_blank\"[^>]*>/)
+      expect(auto_linked).to match(/<a[^>]+username[^>]+target=\"_blank\"[^>]*>/)
+      expect(auto_linked).to match(/<a[^>]+test.com[^>]+target=\"_blank\"[^>]*>/)
+    end
+  end
+  describe "link_url_with_entity" do
+    before do
+      @linker = TestAutolink.new
+    end
+    it "should use display_url and expanded_url" do
+      expect(@linker.send(:link_url_with_entity,
+        {
+          :url => "http://t.co/abcde",
+          :display_url => "twitter.com",
+          :expanded_url => "http://twitter.com/"},
+        {:invisible_tag_attrs => "class='invisible'"}).gsub('"', "'")).to be == "<span class='tco-ellipsis'><span class='invisible'>&nbsp;</span></span><span class='invisible'>http://</span><span class='js-display-url'>twitter.com</span><span class='invisible'>/</span><span class='tco-ellipsis'><span class='invisible'>&nbsp;</span></span>";
+    end
+    it "should correctly handle display_url ending with '…'" do
+      expect(@linker.send(:link_url_with_entity,
+        {
+          :url => "http://t.co/abcde",
+          :display_url => "twitter.com…",
+          :expanded_url => "http://twitter.com/abcdefg"},
+        {:invisible_tag_attrs => "class='invisible'"}).gsub('"', "'")).to be == "<span class='tco-ellipsis'><span class='invisible'>&nbsp;</span></span><span class='invisible'>http://</span><span class='js-display-url'>twitter.com</span><span class='invisible'>/abcdefg</span><span class='tco-ellipsis'><span class='invisible'>&nbsp;</span>…</span>";
+    end
+    it "should correctly handle display_url starting with '…'" do
+      expect(@linker.send(:link_url_with_entity,
+        {
+          :url => "http://t.co/abcde",
+          :display_url => "…tter.com/abcdefg",
+          :expanded_url => "http://twitter.com/abcdefg"},
+        {:invisible_tag_attrs => "class='invisible'"}).gsub('"', "'")).to be == "<span class='tco-ellipsis'>…<span class='invisible'>&nbsp;</span></span><span class='invisible'>http://twi</span><span class='js-display-url'>tter.com/abcdefg</span><span class='invisible'></span><span class='tco-ellipsis'><span class='invisible'>&nbsp;</span></span>";
+    end
+    it "should not create spans if display_url and expanded_url are on different domains" do
+      expect(@linker.send(:link_url_with_entity,
+        {
+          :url => "http://t.co/abcde",
+          :display_url => "pic.twitter.com/xyz",
+          :expanded_url => "http://twitter.com/foo/statuses/123/photo/1"},
+        {:invisible_tag_attrs => "class='invisible'"}).gsub('"', "'")).to be == "pic.twitter.com/xyz"
+    end
+  end
+  describe "symbol_tag" do
+    before do
+      @linker = TestAutolink.new
+    end
+    it "should put :symbol_tag around symbol" do
+      expect(@linker.auto_link("@mention", {:symbol_tag => 's', :username_include_symbol=>true})).to match(/<s>@<\/s>mention/)
+      expect(@linker.auto_link("#hash", {:symbol_tag => 's'})).to match(/<s>#<\/s>hash/)
+      result = @linker.auto_link("@mention #hash $CASH", {:symbol_tag => 'b', :username_include_symbol=>true})
+      expect(result).to match(/<b>@<\/b>mention/)
+      expect(result).to match(/<b>#<\/b>hash/)
+      expect(result).to match(/<b>\$<\/b>CASH/)
+    end
+    it "should put :text_with_symbol_tag around text" do
+      result = @linker.auto_link("@mention #hash $CASH", {:text_with_symbol_tag => 'b'})
+      expect(result).to match(/<b>mention<\/b>/)
+      expect(result).to match(/<b>hash<\/b>/)
+      expect(result).to match(/<b>CASH<\/b>/)
+    end
+    it "should put :symbol_tag around symbol and :text_with_symbol_tag around text" do
+      result = @linker.auto_link("@mention #hash $CASH", {:symbol_tag => 's', :text_with_symbol_tag => 'b', :username_include_symbol=>true})
+      expect(result).to match(/<s>@<\/s><b>mention<\/b>/)
+      expect(result).to match(/<s>#<\/s><b>hash<\/b>/)
+      expect(result).to match(/<s>\$<\/s><b>CASH<\/b>/)
+    end
+  end
+  describe "html_escape" do
+    before do
+      @linker = TestAutolink.new
+    end
+    it "should escape html entities properly" do
+      expect(@linker.html_escape("&")).to be == "&amp;"
+      expect(@linker.html_escape(">")).to be == "&gt;"
+      expect(@linker.html_escape("<")).to be == "&lt;"
+      expect(@linker.html_escape("\"")).to be == "&quot;"
+      expect(@linker.html_escape("'")).to be == "&#39;"
+      expect(@linker.html_escape("&<>\"")).to be == "&amp;&lt;&gt;&quot;"
+      expect(@linker.html_escape("<div>")).to be == "&lt;div&gt;"
+      expect(@linker.html_escape("a&b")).to be == "a&amp;b"
+      expect(@linker.html_escape("<a href=\"https://twitter.com\" target=\"_blank\">twitter & friends</a>")).to be == "&lt;a href=&quot;https://twitter.com&quot; target=&quot;_blank&quot;&gt;twitter &amp; friends&lt;/a&gt;"
+      expect(@linker.html_escape("&amp;")).to be == "&amp;amp;"
+      expect(@linker.html_escape(nil)).to be == nil
+    end
+  end
+end