twitter-text 1.5.0 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +1 -1
- data/lib/twitter-text/autolink.rb +7 -2
- data/lib/twitter-text/extractor.rb +4 -5
- data/lib/twitter-text/regex.rb +15 -6
- data/lib/twitter-text/validation.rb +3 -2
- data/spec/autolinking_spec.rb +7 -3
- data/spec/rewriter_spec.rb +3 -3
- data/test/conformance_test.rb +1 -1
- data/twitter-text.gemspec +3 -3
- metadata +8 -7
    
        data/README.rdoc
    CHANGED
    
    | @@ -1,4 +1,4 @@ | |
| 1 | 
            -
            == twitter-text {<img src="https://secure.travis-ci.org/twitter/twitter-text-rb.png" />}[http://travis-ci.org/twitter/twitter-text-rb]
         | 
| 1 | 
            +
            == twitter-text {<img src="https://secure.travis-ci.org/twitter/twitter-text-rb.png" />}[http://travis-ci.org/twitter/twitter-text-rb] {<img src="https://codeclimate.com/badge.png" />}[https://codeclimate.com/github/twitter/twitter-text-rb]
         | 
| 2 2 |  | 
| 3 3 | 
             
            A gem that provides text processing routines for Twitter Tweets. The major
         | 
| 4 4 | 
             
            reason for this is to unify the various auto-linking and extraction of
         | 
| @@ -207,7 +207,7 @@ module Twitter | |
| 207 207 | 
             
                OPTIONS_NOT_ATTRIBUTES = Set.new([
         | 
| 208 208 | 
             
                  :url_class, :list_class, :username_class, :hashtag_class, :cashtag_class,
         | 
| 209 209 | 
             
                  :username_url_base, :list_url_base, :hashtag_url_base, :cashtag_url_base,
         | 
| 210 | 
            -
                  :username_url_block, :list_url_block, :hashtag_url_block, :link_url_block,
         | 
| 210 | 
            +
                  :username_url_block, :list_url_block, :hashtag_url_block, :cashtag_url_block, :link_url_block,
         | 
| 211 211 | 
             
                  :username_include_symbol, :suppress_lists, :suppress_no_follow, :url_entities,
         | 
| 212 212 | 
             
                  :invisible_tag_attrs, :symbol_tag, :text_with_symbol_tag, :url_target,
         | 
| 213 213 | 
             
                  :link_attribute_block, :link_text_block
         | 
| @@ -326,6 +326,11 @@ module Twitter | |
| 326 326 | 
             
                  hash = chars[entity[:indices].first]
         | 
| 327 327 | 
             
                  hashtag = entity[:hashtag]
         | 
| 328 328 | 
             
                  hashtag = yield(hashtag) if block_given?
         | 
| 329 | 
            +
                  hashtag_class = options[:hashtag_class]
         | 
| 330 | 
            +
             | 
| 331 | 
            +
                  if hashtag.match Twitter::Regex::REGEXEN[:rtl_chars]
         | 
| 332 | 
            +
                    hashtag_class += ' rtl'
         | 
| 333 | 
            +
                  end
         | 
| 329 334 |  | 
| 330 335 | 
             
                  href = if options[:hashtag_url_block]
         | 
| 331 336 | 
             
                    options[:hashtag_url_block].call(hashtag)
         | 
| @@ -334,7 +339,7 @@ module Twitter | |
| 334 339 | 
             
                  end
         | 
| 335 340 |  | 
| 336 341 | 
             
                  html_attrs = {
         | 
| 337 | 
            -
                    :class =>  | 
| 342 | 
            +
                    :class => hashtag_class,
         | 
| 338 343 | 
             
                    # FIXME As our conformance test, hash in title should be half-width,
         | 
| 339 344 | 
             
                    # this should be bug of conformance data.
         | 
| 340 345 | 
             
                    :title => "##{hashtag}"
         | 
| @@ -311,13 +311,12 @@ module Twitter | |
| 311 311 | 
             
                  return [] unless text =~ /\$/
         | 
| 312 312 |  | 
| 313 313 | 
             
                  tags = []
         | 
| 314 | 
            -
                  text.scan(Twitter::Regex[:valid_cashtag]) do |cash_text|
         | 
| 314 | 
            +
                  text.scan(Twitter::Regex[:valid_cashtag]) do |before, dollar, cash_text|
         | 
| 315 315 | 
             
                    match_data = $~
         | 
| 316 | 
            -
                     | 
| 317 | 
            -
                     | 
| 318 | 
            -
                    end_position = match_data.char_end(1)
         | 
| 316 | 
            +
                    start_position = match_data.char_begin(2)
         | 
| 317 | 
            +
                    end_position = match_data.char_end(3)
         | 
| 319 318 | 
             
                    tags << {
         | 
| 320 | 
            -
                      :cashtag => cash_text | 
| 319 | 
            +
                      :cashtag => cash_text,
         | 
| 321 320 | 
             
                      :indices => [start_position, end_position]
         | 
| 322 321 | 
             
                    }
         | 
| 323 322 | 
             
                  end
         | 
    
        data/lib/twitter-text/regex.rb
    CHANGED
    
    | @@ -2,7 +2,7 @@ | |
| 2 2 |  | 
| 3 3 | 
             
            module Twitter
         | 
| 4 4 | 
             
              # A collection of regular expressions for parsing Tweet text. The regular expression
         | 
| 5 | 
            -
              # list is frozen at load time to ensure immutability. These  | 
| 5 | 
            +
              # list is frozen at load time to ensure immutability. These regular expressions are
         | 
| 6 6 | 
             
              # used throughout the <tt>Twitter</tt> classes. Special care has been taken to make
         | 
| 7 7 | 
             
              # sure these reular expressions work with Tweets in all languages.
         | 
| 8 8 | 
             
              class Regex
         | 
| @@ -82,6 +82,14 @@ module Twitter | |
| 82 82 | 
             
                      regex_range(0x1e00, 0x1eff)
         | 
| 83 83 | 
             
                ].join('').freeze
         | 
| 84 84 |  | 
| 85 | 
            +
                RTL_CHARACTERS = [
         | 
| 86 | 
            +
                  regex_range(0x0600,0x06FF),
         | 
| 87 | 
            +
                  regex_range(0x0750,0x077F),
         | 
| 88 | 
            +
                  regex_range(0x0590,0x05FF),
         | 
| 89 | 
            +
                  regex_range(0xFE70,0xFEFF)
         | 
| 90 | 
            +
                ].join('').freeze
         | 
| 91 | 
            +
             | 
| 92 | 
            +
             | 
| 85 93 | 
             
                NON_LATIN_HASHTAG_CHARS = [
         | 
| 86 94 | 
             
                  # Cyrillic (Russian, Ukrainian, etc.)
         | 
| 87 95 | 
             
                  regex_range(0x0400, 0x04ff), # Cyrillic
         | 
| @@ -212,7 +220,7 @@ module Twitter | |
| 212 220 |  | 
| 213 221 | 
             
                REGEXEN[:valid_port_number] = /[0-9]+/
         | 
| 214 222 |  | 
| 215 | 
            -
                REGEXEN[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\,\.\$\/%#\[\]\-_ | 
| 223 | 
            +
                REGEXEN[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\,\.\$\/%#\[\]\-_~&|@#{LATIN_ACCENTS}]/io
         | 
| 216 224 | 
             
                # Allow URL paths to contain balanced parens
         | 
| 217 225 | 
             
                #  1. Used in Wikipedia URLs like /Primer_(film)
         | 
| 218 226 | 
             
                #  2. Used in IIS sessions like /S(dfd346)/
         | 
| @@ -220,16 +228,15 @@ module Twitter | |
| 220 228 | 
             
                # Valid end-of-path chracters (so /foo. does not gobble the period).
         | 
| 221 229 | 
             
                #   1. Allow =&# for empty URL parameters and other URL-join artifacts
         | 
| 222 230 | 
             
                REGEXEN[:valid_url_path_ending_chars] = /[a-z0-9=_#\/\+\-#{LATIN_ACCENTS}]|(?:#{REGEXEN[:valid_url_balanced_parens]})/io
         | 
| 223 | 
            -
                # Allow @ in a url, but only in the middle. Catch things like http://example.com/@user/
         | 
| 224 231 | 
             
                REGEXEN[:valid_url_path] = /(?:
         | 
| 225 232 | 
             
                  (?:
         | 
| 226 233 | 
             
                    #{REGEXEN[:valid_general_url_path_chars]}*
         | 
| 227 234 | 
             
                    (?:#{REGEXEN[:valid_url_balanced_parens]} #{REGEXEN[:valid_general_url_path_chars]}*)*
         | 
| 228 235 | 
             
                    #{REGEXEN[:valid_url_path_ending_chars]}
         | 
| 229 | 
            -
                  )|( | 
| 236 | 
            +
                  )|(?:#{REGEXEN[:valid_general_url_path_chars]}+\/)
         | 
| 230 237 | 
             
                )/iox
         | 
| 231 238 |  | 
| 232 | 
            -
                REGEXEN[:valid_url_query_chars] = /[a-z0-9!?\*'\(\);:&=\+\$\/%#\[\]\-_ | 
| 239 | 
            +
                REGEXEN[:valid_url_query_chars] = /[a-z0-9!?\*'\(\);:&=\+\$\/%#\[\]\-_\.,~|@]/i
         | 
| 233 240 | 
             
                REGEXEN[:valid_url_query_ending_chars] = /[a-z0-9_&=#\/]/i
         | 
| 234 241 | 
             
                REGEXEN[:valid_url] = %r{
         | 
| 235 242 | 
             
                  (                                                                                     #   $1 total match
         | 
| @@ -245,7 +252,7 @@ module Twitter | |
| 245 252 | 
             
                }iox;
         | 
| 246 253 |  | 
| 247 254 | 
             
                REGEXEN[:cashtag] = /[a-z]{1,6}(?:[._][a-z]{1,2})?/i
         | 
| 248 | 
            -
                REGEXEN[:valid_cashtag] = /( | 
| 255 | 
            +
                REGEXEN[:valid_cashtag] = /(^|#{REGEXEN[:spaces]})(\$)(#{REGEXEN[:cashtag]})(?=$|\s|[#{PUNCTUATION_CHARS}])/i
         | 
| 249 256 |  | 
| 250 257 | 
             
                # These URL validation pattern strings are based on the ABNF from RFC 3986
         | 
| 251 258 | 
             
                REGEXEN[:validate_url_unreserved] = /[a-z0-9\-._~]/i
         | 
| @@ -342,6 +349,8 @@ module Twitter | |
| 342 349 | 
             
                  )?\Z
         | 
| 343 350 | 
             
                }ix
         | 
| 344 351 |  | 
| 352 | 
            +
                REGEXEN[:rtl_chars] = /[#{RTL_CHARACTERS}]/io
         | 
| 353 | 
            +
             | 
| 345 354 | 
             
                REGEXEN.each_pair{|k,v| v.freeze }
         | 
| 346 355 |  | 
| 347 356 | 
             
                # Return the regular expression for a given <tt>key</tt>. If the <tt>key</tt>
         | 
| @@ -3,8 +3,9 @@ module Twitter | |
| 3 3 | 
             
                MAX_LENGTH = 140
         | 
| 4 4 |  | 
| 5 5 | 
             
                DEFAULT_TCO_URL_LENGTHS = {
         | 
| 6 | 
            -
                  :short_url_length =>  | 
| 7 | 
            -
                  :short_url_length_https =>  | 
| 6 | 
            +
                  :short_url_length => 22,
         | 
| 7 | 
            +
                  :short_url_length_https => 23,
         | 
| 8 | 
            +
                  :characters_reserved_per_media => 22
         | 
| 8 9 | 
             
                }.freeze
         | 
| 9 10 |  | 
| 10 11 | 
             
                # Returns the length of the string as it would be displayed. This is equivilent to the length of the Unicode NFC
         | 
    
        data/spec/autolinking_spec.rb
    CHANGED
    
    | @@ -488,9 +488,8 @@ describe Twitter::Autolink do | |
| 488 488 | 
             
                    context "with a username not followed by a /" do
         | 
| 489 489 | 
             
                      def original_text; 'http://example.com/@foobar'; end
         | 
| 490 490 |  | 
| 491 | 
            -
                      it "should link  | 
| 492 | 
            -
                        @autolinked_text.should have_autolinked_url('http://example.com | 
| 493 | 
            -
                        @autolinked_text.should link_to_screen_name('foobar')
         | 
| 491 | 
            +
                      it "should link url" do
         | 
| 492 | 
            +
                        @autolinked_text.should have_autolinked_url('http://example.com/@foobar')
         | 
| 494 493 | 
             
                      end
         | 
| 495 494 | 
             
                    end
         | 
| 496 495 |  | 
| @@ -678,6 +677,11 @@ describe Twitter::Autolink do | |
| 678 677 | 
             
                  linked.should have_autolinked_url('dummy', '#hashtag')
         | 
| 679 678 | 
             
                end
         | 
| 680 679 |  | 
| 680 | 
            +
                it "should customize href by cashtag_url_block option" do
         | 
| 681 | 
            +
                  linked = @linker.auto_link("$CASH", :cashtag_url_block => lambda{|a| "dummy"})
         | 
| 682 | 
            +
                  linked.should have_autolinked_url('dummy', '$CASH')
         | 
| 683 | 
            +
                end
         | 
| 684 | 
            +
             | 
| 681 685 | 
             
                it "should customize href by link_url_block option" do
         | 
| 682 686 | 
             
                  linked = @linker.auto_link("http://example.com/", :link_url_block => lambda{|a| "dummy"})
         | 
| 683 687 | 
             
                  linked.should have_autolinked_url('dummy', 'http://example.com/')
         | 
    
        data/spec/rewriter_spec.rb
    CHANGED
    
    | @@ -527,9 +527,9 @@ describe Twitter::Rewriter do | |
| 527 527 | 
             
                  context "with a username not followed by a /" do
         | 
| 528 528 | 
             
                    def original_text; "http://example.com/@foobar"; end
         | 
| 529 529 |  | 
| 530 | 
            -
                    it "should link  | 
| 531 | 
            -
                      @block_args.should == ["http://example.com | 
| 532 | 
            -
                      @rewritten_text.should == "[rewritten] | 
| 530 | 
            +
                    it "should link url" do
         | 
| 531 | 
            +
                      @block_args.should == ["http://example.com/@foobar"]
         | 
| 532 | 
            +
                      @rewritten_text.should == "[rewritten]"
         | 
| 533 533 | 
             
                    end
         | 
| 534 534 | 
             
                  end
         | 
| 535 535 |  | 
    
        data/test/conformance_test.rb
    CHANGED
    
    | @@ -33,7 +33,7 @@ class ConformanceTest < Test::Unit::TestCase | |
| 33 33 | 
             
              def equal_nodes?(expected, actual)
         | 
| 34 34 | 
             
                return false unless expected.name == actual.name
         | 
| 35 35 | 
             
                return false unless ordered_attributes(expected) == ordered_attributes(actual)
         | 
| 36 | 
            -
                return false if expected.text? && actual.text? &&  | 
| 36 | 
            +
                return false if expected.text? && actual.text? && expected.content != actual.content
         | 
| 37 37 |  | 
| 38 38 | 
             
                expected.children.each_with_index do |child, index|
         | 
| 39 39 | 
             
                  return false unless equal_nodes?(child, actual.children[index])
         | 
    
        data/twitter-text.gemspec
    CHANGED
    
    | @@ -2,11 +2,11 @@ | |
| 2 2 |  | 
| 3 3 | 
             
            Gem::Specification.new do |s|
         | 
| 4 4 | 
             
              s.name = "twitter-text"
         | 
| 5 | 
            -
              s.version = "1. | 
| 5 | 
            +
              s.version = "1.6.0"
         | 
| 6 6 | 
             
              s.authors = ["Matt Sanford", "Patrick Ewing", "Ben Cherry", "Britt Selvitelle",
         | 
| 7 | 
            -
                           "Raffi Krikorian", "J.P. Cummins", "Yoshimasa Niwa", "Keita Fujii"]
         | 
| 7 | 
            +
                           "Raffi Krikorian", "J.P. Cummins", "Yoshimasa Niwa", "Keita Fujii", "James Koval"]
         | 
| 8 8 | 
             
              s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com", "bcherry@gmail.com", "bs@brittspace.com",
         | 
| 9 | 
            -
                         "raffi@twitter.com", "jcummins@twitter.com", "niw@niw.at", "keita@twitter.com"]
         | 
| 9 | 
            +
                         "raffi@twitter.com", "jcummins@twitter.com", "niw@niw.at", "keita@twitter.com", "jkoval@twitter.com"]
         | 
| 10 10 | 
             
              s.homepage = "http://twitter.com"
         | 
| 11 11 | 
             
              s.description = s.summary = "A gem that provides text handling for Twitter"
         | 
| 12 12 |  | 
    
        metadata
    CHANGED
    
    | @@ -1,13 +1,13 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification 
         | 
| 2 2 | 
             
            name: twitter-text
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version 
         | 
| 4 | 
            -
              hash:  | 
| 4 | 
            +
              hash: 15
         | 
| 5 5 | 
             
              prerelease: 
         | 
| 6 6 | 
             
              segments: 
         | 
| 7 7 | 
             
              - 1
         | 
| 8 | 
            -
              -  | 
| 8 | 
            +
              - 6
         | 
| 9 9 | 
             
              - 0
         | 
| 10 | 
            -
              version: 1. | 
| 10 | 
            +
              version: 1.6.0
         | 
| 11 11 | 
             
            platform: ruby
         | 
| 12 12 | 
             
            authors: 
         | 
| 13 13 | 
             
            - Matt Sanford
         | 
| @@ -18,12 +18,12 @@ authors: | |
| 18 18 | 
             
            - J.P. Cummins
         | 
| 19 19 | 
             
            - Yoshimasa Niwa
         | 
| 20 20 | 
             
            - Keita Fujii
         | 
| 21 | 
            +
            - James Koval
         | 
| 21 22 | 
             
            autorequire: 
         | 
| 22 23 | 
             
            bindir: bin
         | 
| 23 24 | 
             
            cert_chain: []
         | 
| 24 25 |  | 
| 25 | 
            -
            date:  | 
| 26 | 
            -
            default_executable: 
         | 
| 26 | 
            +
            date: 2013-03-01 00:00:00 Z
         | 
| 27 27 | 
             
            dependencies: 
         | 
| 28 28 | 
             
            - !ruby/object:Gem::Dependency 
         | 
| 29 29 | 
             
              name: nokogiri
         | 
| @@ -119,6 +119,7 @@ email: | |
| 119 119 | 
             
            - jcummins@twitter.com
         | 
| 120 120 | 
             
            - niw@niw.at
         | 
| 121 121 | 
             
            - keita@twitter.com
         | 
| 122 | 
            +
            - jkoval@twitter.com
         | 
| 122 123 | 
             
            executables: []
         | 
| 123 124 |  | 
| 124 125 | 
             
            extensions: []
         | 
| @@ -159,7 +160,6 @@ files: | |
| 159 160 | 
             
            - spec/validation_spec.rb
         | 
| 160 161 | 
             
            - test/conformance_test.rb
         | 
| 161 162 | 
             
            - twitter-text.gemspec
         | 
| 162 | 
            -
            has_rdoc: true
         | 
| 163 163 | 
             
            homepage: http://twitter.com
         | 
| 164 164 | 
             
            licenses: []
         | 
| 165 165 |  | 
| @@ -189,7 +189,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement | |
| 189 189 | 
             
            requirements: []
         | 
| 190 190 |  | 
| 191 191 | 
             
            rubyforge_project: 
         | 
| 192 | 
            -
            rubygems_version: 1. | 
| 192 | 
            +
            rubygems_version: 1.8.24
         | 
| 193 193 | 
             
            signing_key: 
         | 
| 194 194 | 
             
            specification_version: 3
         | 
| 195 195 | 
             
            summary: Twitter text handling library
         | 
| @@ -205,3 +205,4 @@ test_files: | |
| 205 205 | 
             
            - spec/unicode_spec.rb
         | 
| 206 206 | 
             
            - spec/validation_spec.rb
         | 
| 207 207 | 
             
            - test/conformance_test.rb
         | 
| 208 | 
            +
            has_rdoc: true
         |