RubyGems - twitter-text - Versions diffs - 1.4.8 → 1.4.9 - Mend

twitter-text 1.4.8 → 1.4.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

data/Gemfile.lock CHANGED Viewed

@@ -2,40 +2,16 @@ PATH
   remote: .
   specs:
     twitter-text (1.4.8)
-      actionpack
+      activesupport
 GEM
   remote: http://rubygems.org/
   specs:
-    abstract (1.0.0)
-    actionpack (3.0.3)
-      activemodel (= 3.0.3)
-      activesupport (= 3.0.3)
-      builder (~> 2.1.2)
-      erubis (~> 2.6.6)
-      i18n (~> 0.4)
-      rack (~> 1.2.1)
-      rack-mount (~> 0.6.13)
-      rack-test (~> 0.5.6)
-      tzinfo (~> 0.3.23)
-    activemodel (3.0.3)
-      activesupport (= 3.0.3)
-      builder (~> 2.1.2)
-      i18n (~> 0.4)
     activesupport (3.0.3)
-    builder (2.1.2)
     diff-lcs (1.1.2)
-    erubis (2.6.6)
-      abstract (>= 1.0.0)
-    i18n (0.5.0)
     nokogiri (1.4.4)
     nokogiri (1.4.4-java)
       weakling (>= 0.0.3)
-    rack (1.2.1)
-    rack-mount (0.6.13)
-      rack (>= 1.0.0)
-    rack-test (0.5.6)
-      rack (>= 1.0)
     rake (0.8.7)
     rspec (2.3.0)
       rspec-core (~> 2.3.0)
@@ -48,7 +24,6 @@ GEM
     simplecov (0.3.7)
       simplecov-html (>= 0.3.7)
     simplecov-html (0.3.9)
-    tzinfo (0.3.23)
     weakling (0.0.4-java)
 PLATFORMS
@@ -56,7 +31,6 @@ PLATFORMS
   ruby
 DEPENDENCIES
-  actionpack
   nokogiri
   rake
   rspec

data/README.rdoc CHANGED Viewed

@@ -90,6 +90,7 @@ Thanks to everybody who has filed issues, provided feedback or contributed patch
   * Jeff Smick - http://github.com/sprsquish
   * Kenneth Kufluk - https://github.com/kennethkufluk
   * Keita Fujii - https://github.com/keitaf
+  * Yoshimasa Niwa - https://github.com/niw
 * Patches from the community …
   * Jean-Philippe Bougie - http://github.com/jpbougie

data/lib/autolink.rb CHANGED Viewed

@@ -1,9 +1,9 @@
+require 'set'
 module Twitter
   # A module for including Tweet auto-linking in a class. The primary use of this is for helpers/views so they can auto-link
   # usernames, lists, hashtags and URLs.
   module Autolink extend self
-    include ActionView::Helpers::TagHelper #tag_options needed by auto_link
     # Default CSS class for auto-linked URLs
     DEFAULT_URL_CLASS = "tweet-url"
     # Default CSS class for auto-linked lists (along with the url class)
@@ -19,6 +19,7 @@ module Twitter
     # Options which should not be passed as HTML attributes
     OPTIONS_NOT_ATTRIBUTES = [:url_class, :list_class, :username_class, :hashtag_class,
                               :username_url_base, :list_url_base, :hashtag_url_base,
+                              :username_url_block, :list_url_block, :hashtag_url_block, :link_url_block,
                               :suppress_lists, :suppress_no_follow]
     HTML_ENTITIES = {
@@ -30,7 +31,7 @@ module Twitter
     }
     def html_escape(text)
-      text && text.gsub(/[&"'><]/) do |character|
+      text && text.to_s.gsub(/[&"'><]/) do |character|
         HTML_ENTITIES[character]
       end
     end
@@ -68,7 +69,7 @@ module Twitter
     # <tt>:list_url_base</tt>::      the value for <tt>href</tt> attribute on list links. The <tt>@username/list</tt> (minus the <tt>@</tt>) will be appended at the end of this.
     # <tt>:suppress_lists</tt>::    disable auto-linking to lists
     # <tt>:suppress_no_follow</tt>::   Do not add <tt>rel="nofollow"</tt> to auto-linked items
-    # <tt>:target</tt>::   add <tt>target="window_name"</tt> to auto-linked items
+    # <tt>:target</tt>::   add <tt>target="window_name"</tt> to auto-linked items
     def auto_link_usernames_or_lists(text, options = {}) # :yields: list_or_username
       options = options.dup
       options[:url_class] ||= DEFAULT_URL_CLASS
@@ -79,39 +80,27 @@ module Twitter
       options[:target] ||= DEFAULT_TARGET
       extra_html = HTML_ATTR_NO_FOLLOW unless options[:suppress_no_follow]
-      new_text = ""
-      # this -1 flag allows strings ending in ">" to work
-      text.split(/[<>]/, -1).each_with_index do |chunk, index|
-        if index != 0
-          new_text << ((index % 2 == 0) ? ">" : "<")
-        end
+      Twitter::Rewriter.rewrite_usernames_or_lists(text) do |at, username, slash_listname|
+        name = "#{username}#{slash_listname}"
+        chunk = block_given? ? yield(name) : name
-        if index % 4 != 0
-          new_text << chunk
+        if slash_listname && !options[:suppress_lists]
+          href = if options[:list_url_block]
+            options[:list_url_block].call(name.downcase)
+          else
+            "#{html_escape(options[:list_url_base])}#{html_escape(name.downcase)}"
+          end
+          %(#{at}<a class="#{options[:url_class]} #{options[:list_class]}" #{target_tag(options)}href="#{href}"#{extra_html}>#{html_escape(chunk)}</a>)
         else
-          new_text << chunk.gsub(Twitter::Regex[:auto_link_usernames_or_lists]) do
-            before, at, user, slash_listname, after = $1, $2, $3, $4, $'
-            if slash_listname && !options[:suppress_lists]
-              # the link is a list
-              chunk = list = "#{user}#{slash_listname}"
-              chunk = yield(list) if block_given?
-              "#{before}#{at}<a class=\"#{options[:url_class]} #{options[:list_class]}\" #{target_tag(options)}href=\"#{html_escape(options[:list_url_base])}#{html_escape(list.downcase)}\"#{extra_html}>#{html_escape(chunk)}</a>"
-            else
-              if after =~ Twitter::Regex[:end_screen_name_match]
-                # Followed by something that means we don't autolink
-                "#{before}#{at}#{user}#{slash_listname}"
-              else
-                # this is a screen name
-                chunk = user
-                chunk = yield(chunk) if block_given?
-                "#{before}#{at}<a class=\"#{options[:url_class]} #{options[:username_class]}\" #{target_tag(options)}href=\"#{html_escape(options[:username_url_base])}#{html_escape(chunk)}\"#{extra_html}>#{html_escape(chunk)}</a>#{slash_listname}"
-              end
-            end
+          href = if options[:username_url_block]
+            options[:username_url_block].call(chunk)
+          else
+            "#{html_escape(options[:username_url_base])}#{html_escape(chunk)}"
           end
+          %(#{at}<a class="#{options[:url_class]} #{options[:username_class]}" #{target_tag(options)}href="#{href}"#{extra_html}>#{html_escape(chunk)}</a>)
         end
       end
-      new_text
     end
     # Add <tt><a></a></tt> tags around the hashtags in the provided <tt>text</tt>. The
@@ -122,7 +111,7 @@ module Twitter
     # <tt>:hashtag_class</tt>:: class to add to hashtag <tt><a></tt> tags
     # <tt>:hashtag_url_base</tt>::      the value for <tt>href</tt> attribute. The hashtag text (minus the <tt>#</tt>) will be appended at the end of this.
     # <tt>:suppress_no_follow</tt>::   Do not add <tt>rel="nofollow"</tt> to auto-linked items
-    # <tt>:target</tt>::   add <tt>target="window_name"</tt> to auto-linked items
+    # <tt>:target</tt>::   add <tt>target="window_name"</tt> to auto-linked items
     def auto_link_hashtags(text, options = {})  # :yields: hashtag_text
       options = options.dup
       options[:url_class] ||= DEFAULT_URL_CLASS
@@ -131,12 +120,14 @@ module Twitter
       options[:target] ||= DEFAULT_TARGET
       extra_html = HTML_ATTR_NO_FOLLOW unless options[:suppress_no_follow]
-      text.gsub(Twitter::Regex[:auto_link_hashtags]) do
-        before = $1
-        hash = $2
-        text = $3
-        text = yield(text) if block_given?
-        "#{before}<a href=\"#{options[:hashtag_url_base]}#{html_escape(text)}\" title=\"##{html_escape(text)}\" #{target_tag(options)}class=\"#{options[:url_class]} #{options[:hashtag_class]}\"#{extra_html}>#{html_escape(hash)}#{html_escape(text)}</a>"
+      Twitter::Rewriter.rewrite_hashtags(text) do |hash, hashtag|
+        hashtag = yield(hashtag) if block_given?
+        href = if options[:hashtag_url_block]
+          options[:hashtag_url_block].call(hashtag)
+        else
+          "#{options[:hashtag_url_base]}#{html_escape(hashtag)}"
+        end
+        %(<a href="#{href}" title="##{html_escape(hashtag)}" #{target_tag(options)}class="#{options[:url_class]} #{options[:hashtag_class]}"#{extra_html}>#{html_escape(hash)}#{html_escape(hashtag)}</a>)
       end
     end
@@ -148,28 +139,45 @@ module Twitter
       options = href_options.dup
       options[:rel] = "nofollow" unless options.delete(:suppress_no_follow)
       options[:class] = options.delete(:url_class)
+      html_attrs = html_attrs_for_options(options)
-      text.gsub(Twitter::Regex[:valid_url]) do
-        all, before, url, protocol, domain, path, query_string = $1, $2, $3, $4, $5, $6, $7
-        if !protocol.blank?
-          html_attrs = tag_options(options.reject{|k,v| OPTIONS_NOT_ATTRIBUTES.include?(k) }.stringify_keys) || ""
-          "#{before}<a href=\"#{html_escape(url)}\"#{html_attrs}>#{html_escape(url)}</a>"
+      Twitter::Rewriter.rewrite_urls(text) do |url|
+        href = if options[:link_url_block]
+          options.delete(:link_url_block).call(url)
         else
-          all
+          html_escape(url)
         end
+        %(<a href="#{href}"#{html_attrs}>#{html_escape(url)}</a>)
       end
     end
     private
+    BOOLEAN_ATTRIBUTES = Set.new([:disabled, :readonly, :multiple, :checked]).freeze
+    def html_attrs_for_options(options)
+      html_attrs options.reject{|k, v| OPTIONS_NOT_ATTRIBUTES.include?(k)}
+    end
+    def html_attrs(options)
+      options.inject("") do |attrs, (key, value)|
+        if BOOLEAN_ATTRIBUTES.include?(key)
+          value = value ? key : nil
+        end
+        if !value.nil?
+          attrs << %( #{html_escape(key)}="#{html_escape(value)}")
+        end
+        attrs
+      end
+    end
     def target_tag(options)
-      target_option = options[:target]
-      if target_option.blank?
+      target_option = options[:target].to_s
+      if target_option.empty?
         ""
       else
         "target=\"#{html_escape(target_option)}\""
       end
     end
   end
 end

data/lib/extractor.rb CHANGED Viewed

@@ -57,7 +57,7 @@ module Twitter
       screen_names_only
     end
-    # Extracts a list of all usersnames mentioned in the Tweet <tt>text</tt>
+    # Extracts a list of all usernames mentioned in the Tweet <tt>text</tt>
     # along with the indices for where the mention ocurred.  If the
     # <tt>text</tt> is nil or contains no username mentions, an empty array
     # will be returned.
@@ -87,6 +87,40 @@ module Twitter
       possible_screen_names
     end
+    # Extracts a list of all usernames or lists mentioned in the Tweet <tt>text</tt>
+    # along with the indices for where the mention ocurred.  If the
+    # <tt>text</tt> is nil or contains no username or list mentions, an empty array
+    # will be returned.
+    #
+    # If a block is given, then it will be called with each username, list slug, the start
+    # index, and the end index in the <tt>text</tt>. The list_slug will be an empty stirng
+    # if this is a username mention.
+    def extract_mentions_or_lists_with_indices(text) # :yields: username, list_slug, start, end
+      return [] unless text
+      possible_entries = []
+      text.to_s.scan(Twitter::Regex[:extract_mentions_or_lists]) do |before, sn, list_slug, after|
+        extract_mentions_match_data = $~
+        unless after =~ Twitter::Regex[:end_screen_name_match]
+          start_position = extract_mentions_match_data.char_begin(2) - 1
+          end_position = extract_mentions_match_data.char_end(list_slug.nil? ? 2 : 3)
+          possible_entries << {
+            :screen_name => sn,
+            :list_slug => list_slug || "",
+            :indices => [start_position, end_position]
+          }
+        end
+      end
+      if block_given?
+        possible_entries.each do |mention|
+          yield mention[:screen_name], mention[:list_slug], mention[:indices].first, mention[:indices].last
+        end
+      end
+      possible_entries
+    end
     # Extracts the username username replied to in the Tweet <tt>text</tt>. If the
     # <tt>text</tt> is <tt>nil</tt> or is not a reply nil will be returned.
     #
@@ -123,7 +157,7 @@ module Twitter
       position = 0
       text.to_s.scan(Twitter::Regex[:valid_url]) do |all, before, url, protocol, domain, path, query|
         valid_url_match_data = $~
-        if !protocol.blank?
+        if protocol && !protocol.empty?
           start_position = valid_url_match_data.char_begin(3)
           end_position = valid_url_match_data.char_end(3)
           urls << {

data/lib/regex.rb CHANGED Viewed

@@ -7,6 +7,22 @@ module Twitter
   class Regex
     REGEXEN = {} # :nodoc:
+    def self.regex_range(from, to = nil) # :nodoc:
+      if $RUBY_1_9
+        if to
+          "\\u{#{from.to_s(16).rjust(4, '0')}}-\\u{#{to.to_s(16).rjust(4, '0')}}"
+        else
+          "\\u{#{from.to_s(16).rjust(4, '0')}}"
+        end
+      else
+        if to
+          [from].pack('U') + '-' + [to].pack('U')
+        else
+          [from].pack('U')
+        end
+      end
+   end
     # Space is more than %20, U+3000 for example is the full-width space used with Kanji. Provide a short-hand
     # to access both the list of characters and a pattern suitible for use with String#split
     #  Taken from: ActiveSupport::Multibyte::Handlers::UTF8Handler::UNICODE_WHITESPACE
@@ -29,6 +45,7 @@ module Twitter
     REGEXEN[:at_signs] = /[@＠]/
     REGEXEN[:extract_mentions] = /(^|[^a-zA-Z0-9_])#{REGEXEN[:at_signs]}([a-zA-Z0-9_]{1,20})(?=(.|$))/o
+    REGEXEN[:extract_mentions_or_lists] = /(^|[^a-zA-Z0-9_])#{REGEXEN[:at_signs]}([a-zA-Z0-9_]{1,20})(\/[a-zA-Z][a-zA-Z0-9_\-]{0,24})?(?=(.|$))/o
     REGEXEN[:extract_reply] = /^(?:#{REGEXEN[:spaces]})*#{REGEXEN[:at_signs]}([a-zA-Z0-9_]{1,20})/o
     major, minor, patch = RUBY_VERSION.split('.')
@@ -42,35 +59,43 @@ module Twitter
     # Latin accented characters
     # Excludes 0xd7 from the range (the multiplication sign, confusable with "x").
     # Also excludes 0xf7, the division sign
-    LATIN_ACCENTS = [(0xc0..0xd6).to_a, (0xd8..0xf6).to_a, (0xf8..0xff).to_a].flatten.pack('U*').freeze
+    LATIN_ACCENTS = [
+          regex_range(0xc0, 0xd6),
+          regex_range(0xd8, 0xf6),
+          regex_range(0xf8, 0xff),
+          regex_range(0x015f)
+    ].join('').freeze
     NON_LATIN_HASHTAG_CHARS = [
       # Cyrillic (Russian, Ukrainian, etc.)
-      (0x0400..0x04ff).to_a, # Cyrillic
-      (0x0500..0x0527).to_a, # Cyrillic Supplement
+      regex_range(0x0400, 0x04ff), # Cyrillic
+      regex_range(0x0500, 0x0527), # Cyrillic Supplement
+      regex_range(0x2de0, 0x2dff), # Cyrillic Extended A
+      regex_range(0xa640, 0xa69f), # Cyrillic Extended B
       # Hangul (Korean)
-      (0x1100..0x11ff).to_a, # Hangul Jamo
-      (0x3130..0x3185).to_a, # Hangul Compatibility Jamo
-      (0xA960..0xA97F).to_a, # Hangul Jamo Extended-A
-      (0xAC00..0xD7AF).to_a, # Hangul Syllables
-      (0xD7B0..0xD7FF).to_a # Hangul Jamo Extended-B
-    ].flatten.pack('U*').freeze
+      regex_range(0x1100, 0x11ff), # Hangul Jamo
+      regex_range(0x3130, 0x3185), # Hangul Compatibility Jamo
+      regex_range(0xA960, 0xA97F), # Hangul Jamo Extended-A
+      regex_range(0xAC00, 0xD7AF), # Hangul Syllables
+      regex_range(0xD7B0, 0xD7FF), # Hangul Jamo Extended-B
+      regex_range(0xFFA1, 0xFFDC) # Half-width Hangul
+    ].join('').freeze
     REGEXEN[:latin_accents] = /[#{LATIN_ACCENTS}]+/o
     REGEXEN[:end_screen_name_match] = /^(?:#{REGEXEN[:at_signs]}|#{REGEXEN[:latin_accents]}|:\/\/)/o
     CJ_HASHTAG_CHARACTERS = [
-      (0x30A1..0x30FA).to_a, 0x30FC, # Katakana (full-width)
-      (0xFF66..0xFF9F).to_a, # Katakana (half-width)
-      (0xFF10..0xFF19).to_a, (0xFF21..0xFF3A).to_a, (0xFF41..0xFF5A).to_a, # Latin (full-width)
-      (0x3041..0x3096).to_a, # Hiragana
-      (0x3400..0x4DBF).to_a, # Kanji (CJK Extension A)
-      (0x4E00..0x9FFF).to_a, # Kanji (Unified)
-      (0x20000..0x2A6DF).to_a, # Kanji (CJK Extension B)
-      (0x2A700..0x2B73F).to_a, # Kanji (CJK Extension C)
-      (0x2B740..0x2B81F).to_a, # Kanji (CJK Extension D)
-      (0x2F800..0x2FA1F).to_a, # Kanji (CJK supplement)
-      0x3005                   # Kanji (iteration mark)
-    ].flatten.pack('U*').freeze
+      regex_range(0x30A1, 0x30FA), regex_range(0x30FC, 0x30FE), # Katakana (full-width)
+      regex_range(0xFF66, 0xFF9F), # Katakana (half-width)
+      regex_range(0xFF10, 0xFF19), regex_range(0xFF21, 0xFF3A), regex_range(0xFF41, 0xFF5A), # Latin (full-width)
+      regex_range(0x3041, 0x3096), regex_range(0x3099, 0x309E), # Hiragana
+      regex_range(0x3400, 0x4DBF), # Kanji (CJK Extension A)
+      regex_range(0x4E00, 0x9FFF), # Kanji (Unified)
+      regex_range(0x20000, 0x2A6DF), # Kanji (CJK Extension B)
+      regex_range(0x2A700, 0x2B73F), # Kanji (CJK Extension C)
+      regex_range(0x2B740, 0x2B81F), # Kanji (CJK Extension D)
+      regex_range(0x2F800, 0x2FA1F), regex_range(0x3005), regex_range(0x303B) # Kanji (CJK supplement)
+    ].join('').freeze
     HASHTAG_BOUNDARY = /(?:\A|\z|#{REGEXEN[:spaces]}|「|」|。|、|\.|!|\?|！|？|,)/
@@ -93,7 +118,7 @@ module Twitter
     REGEXEN[:valid_domain_name] = /(?:[^#{DOMAIN_EXCLUDE_PART}](?:[-]|[^#{DOMAIN_EXCLUDE_PART}])*)?[^#{DOMAIN_EXCLUDE_PART}]/
     REGEXEN[:valid_domain] = /#{REGEXEN[:valid_subdomain]}*#{REGEXEN[:valid_domain_name]}\.(?:xn--[a-z0-9]{2,}|[a-z]{2,})(?::[0-9]+)?/i
-    REGEXEN[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\,\$\/%#\[\]\-_~|\.]/i
+    REGEXEN[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\,\$\/%#\[\]\-_~|#{LATIN_ACCENTS}]/i
     # Allow URL paths to contain balanced parens
     #  1. Used in Wikipedia URLs like /Primer_(film)
     #  2. Used in IIS sessions like /S(dfd346)/
@@ -102,12 +127,12 @@ module Twitter
     REGEXEN[:valid_url_path_chars] = /(?:
       #{REGEXEN[:wikipedia_disambiguation]}|
       @#{REGEXEN[:valid_general_url_path_chars]}+\/|
-      [\.,]#{REGEXEN[:valid_general_url_path_chars]}+|
+      [\.,]#{REGEXEN[:valid_general_url_path_chars]}?|
       #{REGEXEN[:valid_general_url_path_chars]}+
     )/ix
     # Valid end-of-path chracters (so /foo. does not gobble the period).
     #   1. Allow =&# for empty URL parameters and other URL-join artifacts
-    REGEXEN[:valid_url_path_ending_chars] = /[a-z0-9=_#\/\+\-]|#{REGEXEN[:wikipedia_disambiguation]}/io
+    REGEXEN[:valid_url_path_ending_chars] = /[a-z0-9=_#\/\+\-#{LATIN_ACCENTS}]|#{REGEXEN[:wikipedia_disambiguation]}/io
     REGEXEN[:valid_url_query_chars] = /[a-z0-9!\*'\(\);:&=\+\$\/%#\[\]\-_\.,~|]/i
     REGEXEN[:valid_url_query_ending_chars] = /[a-z0-9_&=#\/]/i
     REGEXEN[:valid_url] = %r{

data/lib/rewriter.rb ADDED Viewed

@@ -0,0 +1,63 @@
+module Twitter
+  # A module provides base methods to rewrite usernames, lists, hashtags and URLs.
+  module Rewriter extend self
+    def rewrite(text, options = {})
+      [:hashtags, :urls, :usernames_or_lists].inject(text) do |key|
+        send("rewrite_#{key}", text, &options[key]) if options[key]
+      end
+    end
+    def rewrite_usernames_or_lists(text)
+      new_text = ""
+      # this -1 flag allows strings ending in ">" to work
+      text.to_s.split(/[<>]/, -1).each_with_index do |chunk, index|
+        if index != 0
+          new_text << ((index % 2 == 0) ? ">" : "<")
+        end
+        if index % 4 != 0
+          new_text << chunk
+        else
+          new_text << chunk.gsub(Twitter::Regex[:auto_link_usernames_or_lists]) do
+            before, at, user, slash_listname, after = $1, $2, $3, $4, $'
+            if slash_listname
+              # the link is a list
+              "#{before}#{yield(at, user, slash_listname)}"
+            else
+              if after =~ Twitter::Regex[:end_screen_name_match]
+                # Followed by something that means we don't autolink
+                "#{before}#{at}#{user}#{slash_listname}"
+              else
+                # this is a screen name
+                "#{before}#{yield(at, user, nil)}#{slash_listname}"
+              end
+            end
+          end
+        end
+      end
+      new_text
+    end
+    def rewrite_hashtags(text)
+      text.to_s.gsub(Twitter::Regex[:auto_link_hashtags]) do
+        before = $1
+        hash = $2
+        hashtag = $3
+        "#{before}#{yield(hash, hashtag)}"
+      end
+    end
+    def rewrite_urls(text)
+      text.to_s.gsub(Twitter::Regex[:valid_url]) do
+        all, before, url, protocol, domain, path, query_string = $1, $2, $3, $4, $5, $6, $7
+        if protocol && !protocol.empty?
+          "#{before}#{yield(url)}"
+        else
+          all
+        end
+      end
+    end
+  end
+end