RubyGems - twitter-text-relative - Versions diffs - 1.6.2.pre.3 - Mend

twitter-text-relative 1.6.2.pre.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

checksums.yaml +15 -0
data/.gemtest +0 -0
data/.gitignore +40 -0
data/.gitmodules +3 -0
data/.rspec +2 -0
data/.travis.yml +4 -0
data/Gemfile +4 -0
data/LICENSE +188 -0
data/README.rdoc +123 -0
data/Rakefile +64 -0
data/lib/twitter-text-relative.rb +22 -0
data/lib/twitter-text-relative/autolink.rb +443 -0
data/lib/twitter-text-relative/deprecation.rb +15 -0
data/lib/twitter-text-relative/extractor.rb +328 -0
data/lib/twitter-text-relative/hash_helper.rb +21 -0
data/lib/twitter-text-relative/hit_highlighter.rb +86 -0
data/lib/twitter-text-relative/regex.rb +362 -0
data/lib/twitter-text-relative/rewriter.rb +59 -0
data/lib/twitter-text-relative/unicode.rb +26 -0
data/lib/twitter-text-relative/validation.rb +113 -0
data/script/destroy +14 -0
data/script/generate +14 -0
data/spec/autolinking_spec.rb +826 -0
data/spec/extractor_spec.rb +368 -0
data/spec/hithighlighter_spec.rb +92 -0
data/spec/regex_spec.rb +38 -0
data/spec/rewriter_spec.rb +548 -0
data/spec/spec_helper.rb +127 -0
data/spec/test_urls.rb +80 -0
data/spec/twitter_text_spec.rb +21 -0
data/spec/unicode_spec.rb +31 -0
data/spec/validation_spec.rb +43 -0
data/test/conformance_test.rb +182 -0
data/twitter-text-relative.gemspec +30 -0
metadata +203 -0

data/spec/extractor_spec.rb ADDED Viewed

@@ -0,0 +1,368 @@
+# encoding: utf-8
+require File.dirname(__FILE__) + '/spec_helper'
+class TestExtractor
+  include Twitter::Extractor
+end
+describe Twitter::Extractor do
+  before do
+    @extractor = TestExtractor.new
+  end
+  describe "mentions" do
+    context "single screen name alone " do
+      it "should be linked" do
+        @extractor.extract_mentioned_screen_names("@alice").should == ["alice"]
+      end
+      it "should be linked with _" do
+        @extractor.extract_mentioned_screen_names("@alice_adams").should == ["alice_adams"]
+      end
+      it "should be linked if numeric" do
+        @extractor.extract_mentioned_screen_names("@1234").should == ["1234"]
+      end
+    end
+    context "multiple screen names" do
+      it "should both be linked" do
+        @extractor.extract_mentioned_screen_names("@alice @bob").should == ["alice", "bob"]
+      end
+    end
+    context "screen names embedded in text" do
+      it "should be linked in Latin text" do
+        @extractor.extract_mentioned_screen_names("waiting for @alice to arrive").should == ["alice"]
+      end
+      it "should be linked in Japanese text" do
+        @extractor.extract_mentioned_screen_names("の@aliceに到着を待っている").should == ["alice"]
+      end
+      it "should ignore mentions preceded by !, @, #, $, %, & or *" do
+        invalid_chars = ['!', '@', '#', '$', '%', '&', '*']
+        invalid_chars.each do |c|
+          @extractor.extract_mentioned_screen_names("f#{c}@kn").should == []
+        end
+      end
+    end
+    it "should accept a block arugment and call it in order" do
+      needed = ["alice", "bob"]
+      @extractor.extract_mentioned_screen_names("@alice @bob") do |sn|
+        sn.should == needed.shift
+      end
+      needed.should == []
+    end
+  end
+  describe "mentions with indices" do
+    context "single screen name alone " do
+      it "should be linked and the correct indices" do
+        @extractor.extract_mentioned_screen_names_with_indices("@alice").should == [{:screen_name => "alice", :indices => [0, 6]}]
+      end
+      it "should be linked with _ and the correct indices" do
+        @extractor.extract_mentioned_screen_names_with_indices("@alice_adams").should == [{:screen_name => "alice_adams", :indices => [0, 12]}]
+      end
+      it "should be linked if numeric and the correct indices" do
+        @extractor.extract_mentioned_screen_names_with_indices("@1234").should == [{:screen_name => "1234", :indices => [0, 5]}]
+      end
+    end
+    context "multiple screen names" do
+      it "should both be linked with the correct indices" do
+        @extractor.extract_mentioned_screen_names_with_indices("@alice @bob").should ==
+          [{:screen_name => "alice", :indices => [0, 6]},
+           {:screen_name => "bob", :indices => [7, 11]}]
+      end
+      it "should be linked with the correct indices even when repeated" do
+        @extractor.extract_mentioned_screen_names_with_indices("@alice @alice @bob").should ==
+          [{:screen_name => "alice", :indices => [0, 6]},
+           {:screen_name => "alice", :indices => [7, 13]},
+           {:screen_name => "bob", :indices => [14, 18]}]
+      end
+    end
+    context "screen names embedded in text" do
+      it "should be linked in Latin text with the correct indices" do
+        @extractor.extract_mentioned_screen_names_with_indices("waiting for @alice to arrive").should == [{:screen_name => "alice", :indices => [12, 18]}]
+      end
+      it "should be linked in Japanese text with the correct indices" do
+        @extractor.extract_mentioned_screen_names_with_indices("の@aliceに到着を待っている").should == [{:screen_name => "alice", :indices => [1, 7]}]
+      end
+    end
+    it "should accept a block arugment and call it in order" do
+      needed = [{:screen_name => "alice", :indices => [0, 6]}, {:screen_name => "bob", :indices => [7, 11]}]
+      @extractor.extract_mentioned_screen_names_with_indices("@alice @bob") do |sn, start_index, end_index|
+        data = needed.shift
+        sn.should == data[:screen_name]
+        start_index.should == data[:indices].first
+        end_index.should == data[:indices].last
+      end
+      needed.should == []
+    end
+    it "should extract screen name in text with supplementary character" do
+      @extractor.extract_mentioned_screen_names_with_indices("#{[0x10400].pack('U')} @alice").should == [{:screen_name => "alice", :indices => [2, 8]}]
+    end
+  end
+  describe "replies" do
+    context "should be extracted from" do
+      it "should extract from lone name" do
+        @extractor.extract_reply_screen_name("@alice").should == "alice"
+      end
+      it "should extract from the start" do
+        @extractor.extract_reply_screen_name("@alice reply text").should == "alice"
+      end
+      it "should extract preceded by a space" do
+        @extractor.extract_reply_screen_name(" @alice reply text").should == "alice"
+      end
+      it "should extract preceded by a full-width space" do
+        @extractor.extract_reply_screen_name("#{[0x3000].pack('U')}@alice reply text").should == "alice"
+      end
+    end
+    context "should not be extracted from" do
+      it "should not be extracted when preceded by text" do
+        @extractor.extract_reply_screen_name("reply @alice text").should == nil
+      end
+      it "should not be extracted when preceded by puctuation" do
+        %w(. / _ - + # ! @).each do |punct|
+          @extractor.extract_reply_screen_name("#{punct}@alice text").should == nil
+        end
+      end
+    end
+    context "should accept a block arugment" do
+      it "should call the block on match" do
+        @extractor.extract_reply_screen_name("@alice") do |sn|
+          sn.should == "alice"
+        end
+      end
+      it "should not call the block on no match" do
+        calls = 0
+        @extractor.extract_reply_screen_name("not a reply") do |sn|
+          calls += 1
+        end
+        calls.should == 0
+      end
+    end
+  end
+  describe "urls" do
+    describe "matching URLS" do
+      TestUrls::VALID.each do |url|
+        it "should extract the URL #{url} and prefix it with a protocol if missing" do
+          @extractor.extract_urls(url).first.should include(url)
+        end
+        it "should match the URL #{url} when it's embedded in other text" do
+          text = "Sweet url: #{url} I found. #awesome"
+          @extractor.extract_urls(text).first.should include(url)
+        end
+      end
+    end
+    describe "invalid URLS" do
+      it "does not link urls with invalid domains" do
+        @extractor.extract_urls("http://tld-too-short.x").should == []
+      end
+    end
+    describe "t.co URLS" do
+      TestUrls::TCO.each do |url|
+        it "should only extract the t.co URL from the URL #{url}" do
+          extracted_urls = @extractor.extract_urls(url)
+          extracted_urls.size.should == 1
+          extracted_url = extracted_urls.first
+          extracted_url.should_not == url
+          extracted_url.should == url[0...20]
+        end
+        it "should match the t.co URL from the URL #{url} when it's embedded in other text" do
+          text = "Sweet url: #{url} I found. #awesome"
+          extracted_urls = @extractor.extract_urls(text)
+          extracted_urls.size.should == 1
+          extracted_url = extracted_urls.first
+          extracted_url.should_not == url
+          extracted_url.should == url[0...20]
+        end
+      end
+    end
+  end
+  describe "urls with indices" do
+    describe "matching URLS" do
+      TestUrls::VALID.each do |url|
+        it "should extract the URL #{url} and prefix it with a protocol if missing" do
+          extracted_urls = @extractor.extract_urls_with_indices(url)
+          extracted_urls.size.should == 1
+          extracted_url = extracted_urls.first
+          extracted_url[:url].should include(url)
+          extracted_url[:indices].first.should == 0
+          extracted_url[:indices].last.should == url.chars.to_a.size
+        end
+        it "should match the URL #{url} when it's embedded in other text" do
+          text = "Sweet url: #{url} I found. #awesome"
+          extracted_urls = @extractor.extract_urls_with_indices(text)
+          extracted_urls.size.should == 1
+          extracted_url = extracted_urls.first
+          extracted_url[:url].should include(url)
+          extracted_url[:indices].first.should == 11
+          extracted_url[:indices].last.should == 11 + url.chars.to_a.size
+        end
+      end
+      it "should extract URL in text with supplementary character" do
+        @extractor.extract_urls_with_indices("#{[0x10400].pack('U')} http://twitter.com").should == [{:url => "http://twitter.com", :indices => [2, 20]}]
+      end
+    end
+    describe "invalid URLS" do
+      it "does not link urls with invalid domains" do
+        @extractor.extract_urls_with_indices("http://tld-too-short.x").should == []
+      end
+    end
+    describe "t.co URLS" do
+      TestUrls::TCO.each do |url|
+        it "should only extract the t.co URL from the URL #{url} and adjust indices correctly" do
+          extracted_urls = @extractor.extract_urls_with_indices(url)
+          extracted_urls.size.should == 1
+          extracted_url = extracted_urls.first
+          extracted_url[:url].should_not include(url)
+          extracted_url[:url].should include(url[0...20])
+          extracted_url[:indices].first.should == 0
+          extracted_url[:indices].last.should == 20
+        end
+        it "should match the t.co URL from the URL #{url} when it's embedded in other text" do
+          text = "Sweet url: #{url} I found. #awesome"
+          extracted_urls = @extractor.extract_urls_with_indices(text)
+          extracted_urls.size.should == 1
+          extracted_url = extracted_urls.first
+          extracted_url[:url].should_not include(url)
+          extracted_url[:url].should include(url[0...20])
+          extracted_url[:indices].first.should == 11
+          extracted_url[:indices].last.should == 31
+        end
+      end
+    end
+  end
+  describe "hashtags" do
+    context "extracts latin/numeric hashtags" do
+      %w(text text123 123text).each do |hashtag|
+        it "should extract ##{hashtag}" do
+          @extractor.extract_hashtags("##{hashtag}").should == [hashtag]
+        end
+        it "should extract ##{hashtag} within text" do
+          @extractor.extract_hashtags("pre-text ##{hashtag} post-text").should == [hashtag]
+        end
+      end
+    end
+    context "international hashtags" do
+      context "should allow accents" do
+        %w(mañana café münchen).each do |hashtag|
+          it "should extract ##{hashtag}" do
+            @extractor.extract_hashtags("##{hashtag}").should == [hashtag]
+          end
+          it "should extract ##{hashtag} within text" do
+            @extractor.extract_hashtags("pre-text ##{hashtag} post-text").should == [hashtag]
+          end
+        end
+        it "should not allow the multiplication character" do
+          @extractor.extract_hashtags("#pre#{Twitter::Unicode::U00D7}post").should == ["pre"]
+        end
+        it "should not allow the division character" do
+          @extractor.extract_hashtags("#pre#{Twitter::Unicode::U00F7}post").should == ["pre"]
+        end
+      end
+    end
+    it "should not extract numeric hashtags" do
+      @extractor.extract_hashtags("#1234").should == []
+    end
+    it "should extract hashtag followed by punctuations" do
+      @extractor.extract_hashtags("#test1: #test2; #test3\"").should == ["test1", "test2" ,"test3"]
+    end
+  end
+  describe "hashtags with indices" do
+    def match_hashtag_in_text(hashtag, text, offset = 0)
+      extracted_hashtags = @extractor.extract_hashtags_with_indices(text)
+      extracted_hashtags.size.should == 1
+      extracted_hashtag = extracted_hashtags.first
+      extracted_hashtag[:hashtag].should == hashtag
+      extracted_hashtag[:indices].first.should == offset
+      extracted_hashtag[:indices].last.should == offset + hashtag.chars.to_a.size + 1
+    end
+    def not_match_hashtag_in_text(text)
+      extracted_hashtags = @extractor.extract_hashtags_with_indices(text)
+      extracted_hashtags.size.should == 0
+    end
+    context "extracts latin/numeric hashtags" do
+      %w(text text123 123text).each do |hashtag|
+        it "should extract ##{hashtag}" do
+          match_hashtag_in_text(hashtag, "##{hashtag}")
+        end
+        it "should extract ##{hashtag} within text" do
+          match_hashtag_in_text(hashtag, "pre-text ##{hashtag} post-text", 9)
+        end
+      end
+    end
+    context "international hashtags" do
+      context "should allow accents" do
+        %w(mañana café münchen).each do |hashtag|
+          it "should extract ##{hashtag}" do
+            match_hashtag_in_text(hashtag, "##{hashtag}")
+          end
+          it "should extract ##{hashtag} within text" do
+            match_hashtag_in_text(hashtag, "pre-text ##{hashtag} post-text", 9)
+          end
+        end
+        it "should not allow the multiplication character" do
+          match_hashtag_in_text("pre", "#pre#{[0xd7].pack('U')}post", 0)
+        end
+        it "should not allow the division character" do
+          match_hashtag_in_text("pre", "#pre#{[0xf7].pack('U')}post", 0)
+        end
+      end
+    end
+    it "should not extract numeric hashtags" do
+      not_match_hashtag_in_text("#1234")
+    end
+    it "should extract hashtag in text with supplementary character" do
+      match_hashtag_in_text("hashtag", "#{[0x10400].pack('U')} #hashtag", 2)
+    end
+  end
+end

data/spec/hithighlighter_spec.rb ADDED Viewed

@@ -0,0 +1,92 @@
+# encoding: utf-8
+require File.dirname(__FILE__) + '/spec_helper'
+class TestHitHighlighter
+  include Twitter::HitHighlighter
+end
+describe Twitter::HitHighlighter do
+  describe "highlight" do
+    before do
+      @highlighter = TestHitHighlighter.new
+    end
+    context "with options" do
+      before do
+        @original = "Testing this hit highliter"
+        @hits = [[13,16]]
+      end
+      it "should default to <em> tags" do
+        @highlighter.hit_highlight(@original, @hits).should == "Testing this <em>hit</em> highliter"
+      end
+      it "should allow tag override" do
+        @highlighter.hit_highlight(@original, @hits, :tag => 'b').should == "Testing this <b>hit</b> highliter"
+      end
+    end
+    context "without links" do
+      before do
+        @original = "Hey! this is a test tweet"
+      end
+      it "should return original when no hits are provided" do
+        @highlighter.hit_highlight(@original).should == @original
+      end
+      it "should highlight one hit" do
+        @highlighter.hit_highlight(@original, hits = [[5, 9]]).should == "Hey! <em>this</em> is a test tweet"
+      end
+      it "should highlight two hits" do
+        @highlighter.hit_highlight(@original, hits = [[5, 9], [15, 19]]).should == "Hey! <em>this</em> is a <em>test</em> tweet"
+      end
+      it "should correctly highlight first-word hits" do
+        @highlighter.hit_highlight(@original, hits = [[0, 3]]).should == "<em>Hey</em>! this is a test tweet"
+      end
+      it "should correctly highlight last-word hits" do
+        @highlighter.hit_highlight(@original, hits = [[20, 25]]).should == "Hey! this is a test <em>tweet</em>"
+      end
+    end
+    context "with links" do
+      it "should highlight with a single link" do
+        @highlighter.hit_highlight("@<a>bcherry</a> this was a test tweet", [[9, 13]]).should == "@<a>bcherry</a> <em>this</em> was a test tweet"
+      end
+      it "should highlight with link at the end" do
+        @highlighter.hit_highlight("test test <a>test</a>", [[5, 9]]).should == "test <em>test</em> <a>test</a>"
+      end
+      it "should highlight with a link at the beginning" do
+        @highlighter.hit_highlight("<a>test</a> test test", [[5, 9]]).should == "<a>test</a> <em>test</em> test"
+      end
+      it "should highlight an entire link" do
+        @highlighter.hit_highlight("test <a>test</a> test", [[5, 9]]).should == "test <a><em>test</em></a> test"
+      end
+      it "should highlight within a link" do
+        @highlighter.hit_highlight("test <a>test</a> test", [[6, 8]]).should == "test <a>t<em>es</em>t</a> test"
+      end
+      it "should highlight around a link" do
+        @highlighter.hit_highlight("test <a>test</a> test", [[3, 11]]).should == "tes<em>t <a>test</a> t</em>est"
+      end
+      it "should fail gracefully with bad hits" do
+        @highlighter.hit_highlight("test test", [[5, 20]]).should == "test <em>test</em>"
+      end
+      it "should not mess up with touching tags" do
+        @highlighter.hit_highlight("<a>foo</a><a>foo</a>", [[3,6]]).should == "<a>foo</a><a><em>foo</em></a>"
+      end
+    end
+  end
+end