RubyGems - twitter-text - Versions diffs - 1.14.7 → 2.0.0 - Mend

twitter-text 1.14.7 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

checksums.yaml +5 -5
data/.rspec +1 -1
data/README.md +104 -33
data/lib/assets/tld_lib.yml +1 -0
data/lib/twitter-text.rb +2 -0
data/lib/twitter-text/autolink.rb +4 -4
data/lib/twitter-text/configuration.rb +53 -0
data/lib/twitter-text/deprecation.rb +1 -1
data/lib/twitter-text/extractor.rb +31 -1
data/lib/twitter-text/regex.rb +13 -13
data/lib/twitter-text/validation.rb +155 -43
data/lib/twitter-text/weighted_range.rb +18 -0
data/spec/autolinking_spec.rb +161 -161
data/spec/configuration_spec.rb +91 -0
data/spec/extractor_spec.rb +92 -72
data/spec/hithighlighter_spec.rb +15 -15
data/spec/regex_spec.rb +7 -7
data/spec/rewriter_spec.rb +110 -109
data/spec/spec_helper.rb +13 -15
data/spec/test_urls.rb +6 -4
data/spec/twitter_text_spec.rb +2 -2
data/spec/unicode_spec.rb +10 -10
data/spec/validation_spec.rb +35 -11
data/test/conformance_test.rb +14 -0
data/twitter-text.gemspec +11 -9
metadata +53 -32
data/lib/assets/tld_lib.yml +0 -1565

data/spec/configuration_spec.rb ADDED

@@ -0,0 +1,91 @@
+# encoding: utf-8
+require File.dirname(__FILE__) + '/spec_helper'
+describe Twitter::Configuration do
+  context "configuration" do
+    context "with invalid data" do
+      it "should raise an exception" do
+        invalid_hash = Twitter::Configuration.parse_string("{\"version\":2,\"maxWeightedTweetLength\":280,\"scale\":100,\"defaultWeight\":200,\"transformedURLLength\":23,\"ranges\":[{\"start\":0,\"end\":true,\"weight\":false},{\"start\":8192,\"end\":8205,\"weight\":100},{\"start\":8208,\"end\":8223,\"weight\":100},{\"start\":8242,\"end\":8247,\"weight\":100}]}")
+        expect { Twitter::Configuration.new(invalid_hash) }.to raise_error(ArgumentError)
+      end
+    end
+    context "with defaults" do
+      before do
+        Twitter::Configuration.default_configuration = Twitter::Configuration.configuration_from_file(Twitter::Configuration::CONFIG_V2)
+      end
+      it "should define version constants" do
+        expect(Twitter::Configuration.const_defined?(:CONFIG_V1)).to be true
+        expect(Twitter::Configuration.const_defined?(:CONFIG_V2)).to be true
+      end
+      it "should define a default configuration" do
+        expect(Twitter::Configuration.default_configuration).to_not be_nil
+        expect(Twitter::Configuration.default_configuration.version).to eq(2)
+      end
+    end
+    context "with v1 configuration" do
+      before do
+        @config = Twitter::Configuration.configuration_from_file(Twitter::Configuration::CONFIG_V1)
+      end
+      it "should have a version" do
+        expect(@config.version).to eq(1)
+      end
+      it "should have a max_weighted_tweet_length" do
+        expect(@config.max_weighted_tweet_length).to eq(140)
+      end
+      it "should have a scale" do
+        expect(@config.scale).to eq(1)
+      end
+      it "should have a default_weight" do
+        expect(@config.default_weight).to eq(1)
+      end
+      it "should have a transformed_url_length" do
+        expect(@config.transformed_url_length).to eq(23)
+      end
+    end
+    context "with v2 configuration" do
+      before do
+        @config = Twitter::Configuration.configuration_from_file(Twitter::Configuration::CONFIG_V2)
+      end
+      it "should have a version" do
+        expect(@config.version).to eq(2)
+      end
+      it "should have a max_weighted_tweet_length" do
+        expect(@config.max_weighted_tweet_length).to eq(280)
+      end
+      it "should have a scale" do
+        expect(@config.scale).to eq(100)
+      end
+      it "should have a default_weight" do
+        expect(@config.default_weight).to eq(200)
+      end
+      it "should have a transformed_url_length" do
+        expect(@config.transformed_url_length).to eq(23)
+      end
+      it "should have a configured range" do
+        expect(@config.ranges).to be_kind_of(Array)
+        expect(@config.ranges.count).to be > 0
+        expect(@config.ranges[0]).to be_kind_of(Twitter::WeightedRange)
+        weighted_range = @config.ranges[0]
+        expect(weighted_range.start).to be_kind_of(Integer)
+        expect(weighted_range.end).to be_kind_of(Integer)
+        expect(weighted_range.weight).to be_kind_of(Integer)
+      end
+    end
+  end
+end

data/spec/extractor_spec.rb CHANGED

@@ -13,37 +13,37 @@ describe Twitter::Extractor do
   describe "mentions" do
     context "single screen name alone " do
       it "should be linked" do
-        @extractor.extract_mentioned_screen_names("@alice").should == ["alice"]
+        expect(@extractor.extract_mentioned_screen_names("@alice")).to be == ["alice"]
       end
       it "should be linked with _" do
-        @extractor.extract_mentioned_screen_names("@alice_adams").should == ["alice_adams"]
+        expect(@extractor.extract_mentioned_screen_names("@alice_adams")).to be == ["alice_adams"]
       end
       it "should be linked if numeric" do
-        @extractor.extract_mentioned_screen_names("@1234").should == ["1234"]
+        expect(@extractor.extract_mentioned_screen_names("@1234")).to be == ["1234"]
       end
     end
     context "multiple screen names" do
       it "should both be linked" do
-        @extractor.extract_mentioned_screen_names("@alice @bob").should == ["alice", "bob"]
+        expect(@extractor.extract_mentioned_screen_names("@alice @bob")).to be == ["alice", "bob"]
       end
     end
     context "screen names embedded in text" do
       it "should be linked in Latin text" do
-        @extractor.extract_mentioned_screen_names("waiting for @alice to arrive").should == ["alice"]
+        expect(@extractor.extract_mentioned_screen_names("waiting for @alice to arrive")).to be == ["alice"]
       end
       it "should be linked in Japanese text" do
-        @extractor.extract_mentioned_screen_names("の@aliceに到着を待っている").should == ["alice"]
+        expect(@extractor.extract_mentioned_screen_names("の@aliceに到着を待っている")).to be == ["alice"]
       end
       it "should ignore mentions preceded by !, @, #, $, %, & or *" do
         invalid_chars = ['!', '@', '#', '$', '%', '&', '*']
         invalid_chars.each do |c|
-          @extractor.extract_mentioned_screen_names("f#{c}@kn").should == []
+          expect(@extractor.extract_mentioned_screen_names("f#{c}@kn")).to be == []
         end
       end
     end
@@ -51,36 +51,36 @@ describe Twitter::Extractor do
     it "should accept a block arugment and call it in order" do
       needed = ["alice", "bob"]
       @extractor.extract_mentioned_screen_names("@alice @bob") do |sn|
-        sn.should == needed.shift
+        expect(sn).to be == needed.shift
       end
-      needed.should == []
+      expect(needed).to be == []
     end
   end
   describe "mentions with indices" do
     context "single screen name alone " do
       it "should be linked and the correct indices" do
-        @extractor.extract_mentioned_screen_names_with_indices("@alice").should == [{:screen_name => "alice", :indices => [0, 6]}]
+        expect(@extractor.extract_mentioned_screen_names_with_indices("@alice")).to be == [{:screen_name => "alice", :indices => [0, 6]}]
       end
       it "should be linked with _ and the correct indices" do
-        @extractor.extract_mentioned_screen_names_with_indices("@alice_adams").should == [{:screen_name => "alice_adams", :indices => [0, 12]}]
+        expect(@extractor.extract_mentioned_screen_names_with_indices("@alice_adams")).to be == [{:screen_name => "alice_adams", :indices => [0, 12]}]
       end
       it "should be linked if numeric and the correct indices" do
-        @extractor.extract_mentioned_screen_names_with_indices("@1234").should == [{:screen_name => "1234", :indices => [0, 5]}]
+        expect(@extractor.extract_mentioned_screen_names_with_indices("@1234")).to be == [{:screen_name => "1234", :indices => [0, 5]}]
       end
     end
     context "multiple screen names" do
       it "should both be linked with the correct indices" do
-        @extractor.extract_mentioned_screen_names_with_indices("@alice @bob").should ==
+        expect(@extractor.extract_mentioned_screen_names_with_indices("@alice @bob")).to be ==
           [{:screen_name => "alice", :indices => [0, 6]},
            {:screen_name => "bob", :indices => [7, 11]}]
       end
       it "should be linked with the correct indices even when repeated" do
-        @extractor.extract_mentioned_screen_names_with_indices("@alice @alice @bob").should ==
+        expect(@extractor.extract_mentioned_screen_names_with_indices("@alice @alice @bob")).to be ==
           [{:screen_name => "alice", :indices => [0, 6]},
            {:screen_name => "alice", :indices => [7, 13]},
            {:screen_name => "bob", :indices => [14, 18]}]
@@ -89,11 +89,11 @@ describe Twitter::Extractor do
     context "screen names embedded in text" do
       it "should be linked in Latin text with the correct indices" do
-        @extractor.extract_mentioned_screen_names_with_indices("waiting for @alice to arrive").should == [{:screen_name => "alice", :indices => [12, 18]}]
+        expect(@extractor.extract_mentioned_screen_names_with_indices("waiting for @alice to arrive")).to be == [{:screen_name => "alice", :indices => [12, 18]}]
       end
       it "should be linked in Japanese text with the correct indices" do
-        @extractor.extract_mentioned_screen_names_with_indices("の@aliceに到着を待っている").should == [{:screen_name => "alice", :indices => [1, 7]}]
+        expect(@extractor.extract_mentioned_screen_names_with_indices("の@aliceに到着を待っている")).to be == [{:screen_name => "alice", :indices => [1, 7]}]
       end
     end
@@ -101,45 +101,45 @@ describe Twitter::Extractor do
       needed = [{:screen_name => "alice", :indices => [0, 6]}, {:screen_name => "bob", :indices => [7, 11]}]
       @extractor.extract_mentioned_screen_names_with_indices("@alice @bob") do |sn, start_index, end_index|
         data = needed.shift
-        sn.should == data[:screen_name]
-        start_index.should == data[:indices].first
-        end_index.should == data[:indices].last
+        expect(sn).to be == data[:screen_name]
+        expect(start_index).to be == data[:indices].first
+        expect(end_index).to be == data[:indices].last
       end
-      needed.should == []
+      expect(needed).to be == []
     end
     it "should extract screen name in text with supplementary character" do
-      @extractor.extract_mentioned_screen_names_with_indices("#{[0x10400].pack('U')} @alice").should == [{:screen_name => "alice", :indices => [2, 8]}]
+      expect(@extractor.extract_mentioned_screen_names_with_indices("#{[0x10400].pack('U')} @alice")).to be == [{:screen_name => "alice", :indices => [2, 8]}]
     end
   end
   describe "replies" do
     context "should be extracted from" do
       it "should extract from lone name" do
-        @extractor.extract_reply_screen_name("@alice").should == "alice"
+        expect(@extractor.extract_reply_screen_name("@alice")).to be == "alice"
       end
       it "should extract from the start" do
-        @extractor.extract_reply_screen_name("@alice reply text").should == "alice"
+        expect(@extractor.extract_reply_screen_name("@alice reply text")).to be == "alice"
       end
       it "should extract preceded by a space" do
-        @extractor.extract_reply_screen_name(" @alice reply text").should == "alice"
+        expect(@extractor.extract_reply_screen_name(" @alice reply text")).to be == "alice"
       end
       it "should extract preceded by a full-width space" do
-        @extractor.extract_reply_screen_name("#{[0x3000].pack('U')}@alice reply text").should == "alice"
+        expect(@extractor.extract_reply_screen_name("#{[0x3000].pack('U')}@alice reply text")).to be == "alice"
       end
     end
     context "should not be extracted from" do
       it "should not be extracted when preceded by text" do
-        @extractor.extract_reply_screen_name("reply @alice text").should == nil
+        expect(@extractor.extract_reply_screen_name("reply @alice text")).to be == nil
       end
       it "should not be extracted when preceded by puctuation" do
         %w(. / _ - + # ! @).each do |punct|
-          @extractor.extract_reply_screen_name("#{punct}@alice text").should == nil
+          expect(@extractor.extract_reply_screen_name("#{punct}@alice text")).to be == nil
         end
       end
     end
@@ -147,7 +147,7 @@ describe Twitter::Extractor do
     context "should accept a block arugment" do
       it "should call the block on match" do
         @extractor.extract_reply_screen_name("@alice") do |sn|
-          sn.should == "alice"
+          expect(sn).to be == "alice"
         end
       end
@@ -156,7 +156,7 @@ describe Twitter::Extractor do
         @extractor.extract_reply_screen_name("not a reply") do |sn|
           calls += 1
         end
-        calls.should == 0
+        expect(calls).to be == 0
       end
     end
   end
@@ -165,19 +165,21 @@ describe Twitter::Extractor do
     describe "matching URLS" do
       TestUrls::VALID.each do |url|
         it "should extract the URL #{url} and prefix it with a protocol if missing" do
-          @extractor.extract_urls(url).first.should include(url)
+          expect(@extractor.extract_urls(url).first).to include(url)
         end
         it "should match the URL #{url} when it's embedded in other text" do
           text = "Sweet url: #{url} I found. #awesome"
-          @extractor.extract_urls(text).first.should include(url)
+          expect(@extractor.extract_urls(text).first).to include(url)
         end
       end
     end
     describe "invalid URLS" do
-      it "does not link urls with invalid domains" do
-        @extractor.extract_urls("http://tld-too-short.x").should == []
+      TestUrls::INVALID.each do |url|
+        it "does not extract URL from #{url}" do
+          expect(@extractor.extract_urls(url).first).to be nil
+        end
       end
     end
@@ -185,19 +187,19 @@ describe Twitter::Extractor do
       TestUrls::TCO.each do |url|
         it "should only extract the t.co URL from the URL #{url}" do
           extracted_urls = @extractor.extract_urls(url)
-          extracted_urls.size.should == 1
+          expect(extracted_urls.size).to be == 1
           extracted_url = extracted_urls.first
-          extracted_url.should_not == url
-          extracted_url.should == url[0...20]
+          expect(extracted_url).to_not be == url
+          expect(extracted_url).to be == url[0...20]
         end
         it "should match the t.co URL from the URL #{url} when it's embedded in other text" do
           text = "Sweet url: #{url} I found. #awesome"
           extracted_urls = @extractor.extract_urls(text)
-          extracted_urls.size.should == 1
+          expect(extracted_urls.size).to be == 1
           extracted_url = extracted_urls.first
-          extracted_url.should_not == url
-          extracted_url.should == url[0...20]
+          expect(extracted_url).to_not be == url
+          expect(extracted_url).to be == url[0...20]
         end
       end
     end
@@ -208,32 +210,50 @@ describe Twitter::Extractor do
       TestUrls::VALID.each do |url|
         it "should extract the URL #{url} and prefix it with a protocol if missing" do
           extracted_urls = @extractor.extract_urls_with_indices(url)
-          extracted_urls.size.should == 1
+          expect(extracted_urls.size).to be == 1
           extracted_url = extracted_urls.first
-          extracted_url[:url].should include(url)
-          extracted_url[:indices].first.should == 0
-          extracted_url[:indices].last.should == url.chars.to_a.size
+          expect(extracted_url[:url]).to include(url)
+          expect(extracted_url[:indices].first).to be == 0
+          expect(extracted_url[:indices].last).to be == url.chars.to_a.size
         end
         it "should match the URL #{url} when it's embedded in other text" do
           text = "Sweet url: #{url} I found. #awesome"
           extracted_urls = @extractor.extract_urls_with_indices(text)
-          extracted_urls.size.should == 1
+          expect(extracted_urls.size).to be == 1
           extracted_url = extracted_urls.first
-          extracted_url[:url].should include(url)
-          extracted_url[:indices].first.should == 11
-          extracted_url[:indices].last.should == 11 + url.chars.to_a.size
+          expect(extracted_url[:url]).to include(url)
+          expect(extracted_url[:indices].first).to be == 11
+          expect(extracted_url[:indices].last).to be == 11 + url.chars.to_a.size
         end
       end
       it "should extract URL in text with supplementary character" do
-        @extractor.extract_urls_with_indices("#{[0x10400].pack('U')} http://twitter.com").should == [{:url => "http://twitter.com", :indices => [2, 20]}]
+        expect(@extractor.extract_urls_with_indices("#{[0x10400].pack('U')} http://twitter.com")).to be == [{:url => "http://twitter.com", :indices => [2, 20]}]
       end
     end
     describe "invalid URLS" do
       it "does not link urls with invalid domains" do
-        @extractor.extract_urls_with_indices("http://tld-too-short.x").should == []
+        expect(@extractor.extract_urls_with_indices("http://tld-too-short.x")).to be == []
+      end
+      it "does not consider a long URL with protocol to be valid" do
+        # maximum length of domain label is 32 chars.
+        url = ("a" * 31) + "."
+        url *= (Twitter::Extractor::MAX_URL_LENGTH / 32)
+        url = "https://" + url + "com" # longer than 4096 (MAX_URL_LENGTH) chars
+        expect(@extractor.is_valid_domain(url.length, url, true)).to be false
+      end
+      it "does not consider a long URL without protocol to be valid" do
+        # maximum length of domain label is 32 chars.
+        url = ("a" * 31) + "."
+        url *= ((Twitter::Extractor::MAX_URL_LENGTH / 32) - 1)
+        url = url + "com" # shorter than 4096 (MAX_URL_LENGTH) chars
+        expect(@extractor.is_valid_domain(url.length, url, false)).to be true
+        url = ("a" * (31 - "https://".length)) + "." + url
+        expect(@extractor.is_valid_domain(url.length, url, false)).to be false
       end
     end
@@ -241,23 +261,23 @@ describe Twitter::Extractor do
       TestUrls::TCO.each do |url|
         it "should only extract the t.co URL from the URL #{url} and adjust indices correctly" do
           extracted_urls = @extractor.extract_urls_with_indices(url)
-          extracted_urls.size.should == 1
+          expect(extracted_urls.size).to be == 1
           extracted_url = extracted_urls.first
-          extracted_url[:url].should_not include(url)
-          extracted_url[:url].should include(url[0...20])
-          extracted_url[:indices].first.should == 0
-          extracted_url[:indices].last.should == 20
+          expect(extracted_url[:url]).to_not include(url)
+          expect(extracted_url[:url]).to include(url[0...20])
+          expect(extracted_url[:indices].first).to be == 0
+          expect(extracted_url[:indices].last).to be == 20
         end
         it "should match the t.co URL from the URL #{url} when it's embedded in other text" do
           text = "Sweet url: #{url} I found. #awesome"
           extracted_urls = @extractor.extract_urls_with_indices(text)
-          extracted_urls.size.should == 1
+          expect(extracted_urls.size).to be == 1
           extracted_url = extracted_urls.first
-          extracted_url[:url].should_not include(url)
-          extracted_url[:url].should include(url[0...20])
-          extracted_url[:indices].first.should == 11
-          extracted_url[:indices].last.should == 31
+          expect(extracted_url[:url]).to_not include(url)
+          expect(extracted_url[:url]).to include(url[0...20])
+          expect(extracted_url[:indices].first).to be == 11
+          expect(extracted_url[:indices].last).to be == 31
         end
       end
     end
@@ -267,11 +287,11 @@ describe Twitter::Extractor do
     context "extracts latin/numeric hashtags" do
       %w(text text123 123text).each do |hashtag|
         it "should extract ##{hashtag}" do
-          @extractor.extract_hashtags("##{hashtag}").should == [hashtag]
+          expect(@extractor.extract_hashtags("##{hashtag}")).to be == [hashtag]
         end
         it "should extract ##{hashtag} within text" do
-          @extractor.extract_hashtags("pre-text ##{hashtag} post-text").should == [hashtag]
+          expect(@extractor.extract_hashtags("pre-text ##{hashtag} post-text")).to be == [hashtag]
         end
       end
     end
@@ -280,47 +300,47 @@ describe Twitter::Extractor do
       context "should allow accents" do
         %w(mañana café münchen).each do |hashtag|
           it "should extract ##{hashtag}" do
-            @extractor.extract_hashtags("##{hashtag}").should == [hashtag]
+            expect(@extractor.extract_hashtags("##{hashtag}")).to be == [hashtag]
           end
           it "should extract ##{hashtag} within text" do
-            @extractor.extract_hashtags("pre-text ##{hashtag} post-text").should == [hashtag]
+            expect(@extractor.extract_hashtags("pre-text ##{hashtag} post-text")).to be == [hashtag]
           end
         end
         it "should not allow the multiplication character" do
-          @extractor.extract_hashtags("#pre#{Twitter::Unicode::U00D7}post").should == ["pre"]
+          expect(@extractor.extract_hashtags("#pre#{Twitter::Unicode::U00D7}post")).to be == ["pre"]
         end
         it "should not allow the division character" do
-          @extractor.extract_hashtags("#pre#{Twitter::Unicode::U00F7}post").should == ["pre"]
+          expect(@extractor.extract_hashtags("#pre#{Twitter::Unicode::U00F7}post")).to be == ["pre"]
         end
       end
     end
     it "should not extract numeric hashtags" do
-      @extractor.extract_hashtags("#1234").should == []
+      expect(@extractor.extract_hashtags("#1234")).to be == []
     end
     it "should extract hashtag followed by punctuations" do
-      @extractor.extract_hashtags("#test1: #test2; #test3\"").should == ["test1", "test2" ,"test3"]
+      expect(@extractor.extract_hashtags("#test1: #test2; #test3\"")).to be == ["test1", "test2" ,"test3"]
     end
   end
   describe "hashtags with indices" do
     def match_hashtag_in_text(hashtag, text, offset = 0)
       extracted_hashtags = @extractor.extract_hashtags_with_indices(text)
-      extracted_hashtags.size.should == 1
+      expect(extracted_hashtags.size).to be == 1
       extracted_hashtag = extracted_hashtags.first
-      extracted_hashtag[:hashtag].should == hashtag
-      extracted_hashtag[:indices].first.should == offset
-      extracted_hashtag[:indices].last.should == offset + hashtag.chars.to_a.size + 1
+      expect(extracted_hashtag[:hashtag]).to be == hashtag
+      expect(extracted_hashtag[:indices].first).to be == offset
+      expect(extracted_hashtag[:indices].last).to be == offset + hashtag.chars.to_a.size + 1
     end
     def not_match_hashtag_in_text(text)
       extracted_hashtags = @extractor.extract_hashtags_with_indices(text)
-      extracted_hashtags.size.should == 0
+      expect(extracted_hashtags.size).to be == 0
     end
     context "extracts latin/numeric hashtags" do