twitter-text 1.0.4 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -57,10 +57,15 @@ Special care has been taken to be sure that auto-linking and extraction work
57
57
  in Tweets of all languages. This means that languages without spaces between
58
58
  words should work equally well.
59
59
 
60
+ === Hit Highlighting
61
+
62
+ Use to provide emphasis around the "hits" returned from the Search API, built
63
+ to work against text that has been auto-linked already.
64
+
60
65
  === Conformance
61
66
 
62
67
  To run the Conformance suite, you'll need to add that project as a git submodule. From the root twitter-text-rb directory, run:
63
68
 
64
- git submodule add git@github.com:mzsanford/twitter-text-conformance.git test/twitter-text-conformance/
65
- git submodule init
66
- git submodule update
69
+ git submodule add git@github.com:mzsanford/twitter-text-conformance.git test/twitter-text-conformance/
70
+ git submodule init
71
+ git submodule update
data/Rakefile CHANGED
@@ -9,9 +9,9 @@ require 'digest'
9
9
 
10
10
  spec = Gem::Specification.new do |s|
11
11
  s.name = "twitter-text"
12
- s.version = "1.0.4"
13
- s.authors = ["Matt Sanford", "Patrick Ewing"]
14
- s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com"]
12
+ s.version = "1.1.1"
13
+ s.authors = ["Matt Sanford", "Patrick Ewing", "Ben Cherry", "Britt Selvitelle"]
14
+ s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com", "bcherry@gmail.com", "bs@brittspace.com"]
15
15
  s.homepage = "http://twitter.com"
16
16
  s.description = s.summary = "A gem that provides text handling for Twitter"
17
17
 
data/TODO CHANGED
@@ -1,3 +1,4 @@
1
1
  TODO:
2
2
 
3
- * More tests
3
+ * @mentions preceded by a dash should work. "I am great -@greatguy"
4
+ * HitHighlighter tests should be moved to the conformance suite
data/lib/autolink.rb CHANGED
@@ -38,7 +38,7 @@ module Twitter
38
38
  options),
39
39
  options)
40
40
  end
41
-
41
+
42
42
  # Add <tt><a></a></tt> tags around the usernames and lists in the provided <tt>text</tt>. The
43
43
  # <tt><a></tt> tags can be controlled with the following entries in the <tt>options</tt>
44
44
  # hash:
@@ -0,0 +1,81 @@
1
+
2
+ module Twitter
3
+ # Module for doing "hit highlighting" on tweets that have been auto-linked already.
4
+ # Useful with the results returned from the Search API.
5
+ module HitHighlighter
6
+ # Default Tag used for hit highlighting
7
+ DEFAULT_HIGHLIGHT_TAG = "em"
8
+
9
+ # Add <tt><em></em></tt> tags around the <tt>hits</tt> provided in the <tt>text</tt>. The
10
+ # <tt>hits</tt> should be an array of (start, end) index pairs, relative to the original
11
+ # text, before auto-linking (but the <tt>text</tt> may already be auto-linked if desired)
12
+ #
13
+ # The <tt><em></em></tt> tags can be overridden using the <tt>:tag</tt> option. For example:
14
+ #
15
+ # irb> hit_highlight("test hit here", [[5, 8]], :tag => 'strong')
16
+ # => "test <strong>hit</strong> here"
17
+ def hit_highlight(text, hits = [], options = {})
18
+ if hits.empty?
19
+ return text
20
+ end
21
+
22
+ tag_name = options[:tag] || DEFAULT_HIGHLIGHT_TAG
23
+ tags = ["<" + tag_name + ">", "</" + tag_name + ">"]
24
+
25
+ chunks = text.split("<").map do |item|
26
+ item.blank? ? item : item.split(">")
27
+ end.flatten
28
+
29
+ result = ""
30
+ chunk_index, chunk = 0, chunks[0]
31
+ chunk_chars = chunk.respond_to?("mb_chars") ? chunk.mb_chars : chunk.respond_to?("chars") && chunk.chars.respond_to?("[]") ? chunk.chars : chunk
32
+ prev_chunks_len = 0
33
+ chunk_cursor = 0
34
+ start_in_chunk = false
35
+ for hit, index in hits.flatten.each_with_index do
36
+ tag = tags[index % 2]
37
+
38
+ placed = false
39
+ until chunk.nil? || hit < prev_chunks_len + chunk.length do
40
+ result << chunk_chars[chunk_cursor..-1]
41
+ if start_in_chunk && hit == prev_chunks_len + chunk_chars.length
42
+ result << tag
43
+ placed = true
44
+ end
45
+
46
+ # correctly handle highlights that end on the final character.
47
+ if tag_text = chunks[chunk_index+1]
48
+ result << "<#{tag_text}>"
49
+ end
50
+
51
+ prev_chunks_len += chunk_chars.length
52
+ chunk_cursor = 0
53
+ chunk_index += 2
54
+ chunk = chunks[chunk_index]
55
+ chunk_chars = chunk.respond_to?("mb_chars") ? chunk.mb_chars : chunk.respond_to?("chars") && chunk.chars.respond_to?("[]") ? chunk.chars : chunk
56
+ start_in_chunk = false
57
+ end
58
+
59
+ if !placed && !chunk.nil?
60
+ hit_spot = hit - prev_chunks_len
61
+ result << chunk_chars[chunk_cursor...hit_spot].to_s + tag
62
+ chunk_cursor = hit_spot
63
+ if index % 2 == 0
64
+ start_in_chunk = true
65
+ end
66
+ end
67
+ end
68
+
69
+ if chunk
70
+ if chunk_cursor < chunk_chars.length
71
+ result << chunk_chars[chunk_cursor..-1]
72
+ end
73
+ (chunk_index+1).upto(chunks.length-1).each do |index|
74
+ result << (index.even? ? chunks[index] : "<#{chunks[index]}>")
75
+ end
76
+ end
77
+
78
+ result
79
+ end
80
+ end
81
+ end
data/lib/regex.rb CHANGED
@@ -44,7 +44,7 @@ module Twitter
44
44
 
45
45
  # URL related hash regex collection
46
46
  REGEXEN[:valid_preceding_chars] = /(?:[^\/"':!=]|^|\:)/
47
- REGEXEN[:valid_domain] = /(?:[^[:punct:]\s][\.-][^[:punct:]\s]|[^[:punct:]\s])+\.[a-z]{2,}(?::[0-9]+)?/i
47
+ REGEXEN[:valid_domain] = /(?:[^[:punct:]\s][\.-](?=[^[:punct:]\s])|[^[:punct:]\s]){1,}\.[a-z]{2,}(?::[0-9]+)?/i
48
48
  REGEXEN[:valid_url_path_chars] = /[\.\,]?[a-z0-9!\*'\(\);:=\+\$\/%#\[\]\-_,~@]/i
49
49
  # Valid end-of-path chracters (so /foo. does not gobble the period).
50
50
  # 1. Allow ) for Wikipedia URLs.
@@ -67,7 +67,7 @@ module Twitter
67
67
  REGEXEN.each_pair{|k,v| v.freeze }
68
68
 
69
69
  # Return the regular expression for a given <tt>key</tt>. If the <tt>key</tt>
70
- # is not a known symbol a <tt>nil</tt> will be returned.
70
+ # is not a known symbol a <tt>nil</tt> will be returned.
71
71
  def self.[](key)
72
72
  REGEXEN[key]
73
73
  end
data/lib/twitter-text.rb CHANGED
@@ -10,3 +10,4 @@ require File.join(File.dirname(__FILE__), 'autolink')
10
10
  require File.join(File.dirname(__FILE__), 'extractor')
11
11
  require File.join(File.dirname(__FILE__), 'unicode')
12
12
  require File.join(File.dirname(__FILE__), 'validation')
13
+ require File.join(File.dirname(__FILE__), 'hithighlighter')
@@ -430,6 +430,7 @@ describe Twitter::Autolink do
430
430
  end
431
431
 
432
432
  end
433
+
433
434
  end
434
435
 
435
436
  end
@@ -0,0 +1,83 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ class TestHitHighlighter
4
+ include Twitter::HitHighlighter
5
+ end
6
+
7
+ describe Twitter::HitHighlighter do
8
+ describe "highlight" do
9
+ before do
10
+ @highlighter = TestHitHighlighter.new
11
+ end
12
+
13
+ context "with options" do
14
+ before do
15
+ @original = "Testing this hit highliter"
16
+ @hits = [[13,16]]
17
+ end
18
+
19
+ it "should default to <em> tags" do
20
+ @highlighter.hit_highlight(@original, @hits).should == "Testing this <em>hit</em> highliter"
21
+ end
22
+
23
+ it "should allow tag override" do
24
+ @highlighter.hit_highlight(@original, @hits, :tag => 'b').should == "Testing this <b>hit</b> highliter"
25
+ end
26
+ end
27
+
28
+ context "without links" do
29
+ before do
30
+ @original = "Hey! this is a test tweet"
31
+ end
32
+
33
+ it "should return original when no hits are provided" do
34
+ @highlighter.hit_highlight(@original).should == @original
35
+ end
36
+
37
+ it "should highlight one hit" do
38
+ @highlighter.hit_highlight(@original, hits = [[5, 9]]).should == "Hey! <em>this</em> is a test tweet"
39
+ end
40
+
41
+ it "should highlight two hits" do
42
+ @highlighter.hit_highlight(@original, hits = [[5, 9], [15, 19]]).should == "Hey! <em>this</em> is a <em>test</em> tweet"
43
+ end
44
+
45
+ it "should correctly highlight first-word hits" do
46
+ @highlighter.hit_highlight(@original, hits = [[0, 3]]).should == "<em>Hey</em>! this is a test tweet"
47
+ end
48
+
49
+ it "should correctly highlight last-word hits" do
50
+ @highlighter.hit_highlight(@original, hits = [[20, 25]]).should == "Hey! this is a test <em>tweet</em>"
51
+ end
52
+ end
53
+
54
+ context "with links" do
55
+ it "should highlight with a single link" do
56
+ @highlighter.hit_highlight("@<a>bcherry</a> this was a test tweet", [[9, 13]]).should == "@<a>bcherry</a> <em>this</em> was a test tweet"
57
+ end
58
+
59
+ it "should highlight with link at the end" do
60
+ @highlighter.hit_highlight("test test <a>test</a>", [[5, 9]]).should == "test <em>test</em> <a>test</a>"
61
+ end
62
+
63
+ it "should highlight with a link at the beginning" do
64
+ @highlighter.hit_highlight("<a>test</a> test test", [[5, 9]]).should == "<a>test</a> <em>test</em> test"
65
+ end
66
+
67
+ it "should highlight an entire link" do
68
+ @highlighter.hit_highlight("test <a>test</a> test", [[5, 9]]).should == "test <a><em>test</em></a> test"
69
+ end
70
+
71
+ it "should highlight within a link" do
72
+ @highlighter.hit_highlight("test <a>test</a> test", [[6, 8]]).should == "test <a>t<em>es</em>t</a> test"
73
+ end
74
+
75
+ it "should highlight around a link" do
76
+ @highlighter.hit_highlight("test <a>test</a> test", [[3, 11]]).should == "tes<em>t <a>test</a> t</em>est"
77
+ end
78
+
79
+ end
80
+
81
+ end
82
+
83
+ end
data/spec/regex_spec.rb CHANGED
@@ -16,7 +16,7 @@ describe "Twitter::Regex regular expressions" do
16
16
 
17
17
  describe "invalid URLS" do
18
18
  it "does not link urls with invalid characters" do
19
- TestUrls::INVALID.each {|url| url.should_not have_autolinked_url(url)}
19
+ TestUrls::INVALID.each {|url| url.should_not match_autolink_expression}
20
20
  end
21
21
  end
22
22
 
data/spec/test_urls.rb CHANGED
@@ -7,7 +7,7 @@ module TestUrls
7
7
  "http://twitter.com/#search?q=iphone%20-filter%3Alinks",
8
8
  "http://www.boingboing.net/2007/02/14/katamari_damacy_phon.html",
9
9
  "http://somehost.com:3000",
10
- "http://x.com/~matthew+%-x",
10
+ "http://xo.com/~matthew+%-x",
11
11
  "http://en.wikipedia.org/wiki/Primer_(film)",
12
12
  "http://www.ams.org/bookstore-getitem/item=mbk-59",
13
13
  "http://chilp.it/?77e8fd",
@@ -16,14 +16,15 @@ module TestUrls
16
16
  "http://tell.me/why",
17
17
  "http://longtlds.info",
18
18
  "http://✪df.ws/ejp",
19
- "http://日本.com"
19
+ "http://日本.com",
20
+ "http://search.twitter.com/search?q=avro&lang=en",
21
+ "http://mrs.domain-dash.biz",
22
+ "http://x.com/has/one/char/domain",
20
23
  ]
21
24
 
22
25
  INVALID = [
23
26
  "http://no-tld",
24
27
  "http://tld-too-short.x",
25
- "http://x.com/,,,/.../@@@/;;;/:::/---/%%%x",
26
- "http://domain-dash.com",
27
28
  "http://-doman_dash.com"
28
29
  ]
29
30
 
metadata CHANGED
@@ -1,32 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twitter-text
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.4
4
+ prerelease: false
5
+ segments:
6
+ - 1
7
+ - 1
8
+ - 1
9
+ version: 1.1.1
5
10
  platform: ruby
6
11
  authors:
7
12
  - Matt Sanford
8
13
  - Patrick Ewing
14
+ - Ben Cherry
15
+ - Britt Selvitelle
9
16
  autorequire: ""
10
17
  bindir: bin
11
18
  cert_chain: []
12
19
 
13
- date: 2010-03-11 00:00:00 -08:00
20
+ date: 2010-04-28 00:00:00 -07:00
14
21
  default_executable:
15
22
  dependencies:
16
23
  - !ruby/object:Gem::Dependency
17
24
  name: actionpack
18
- type: :runtime
19
- version_requirement:
20
- version_requirements: !ruby/object:Gem::Requirement
25
+ prerelease: false
26
+ requirement: &id001 !ruby/object:Gem::Requirement
21
27
  requirements:
22
28
  - - ">="
23
29
  - !ruby/object:Gem::Version
30
+ segments:
31
+ - 0
24
32
  version: "0"
25
- version:
33
+ type: :runtime
34
+ version_requirements: *id001
26
35
  description: A gem that provides text handling for Twitter
27
36
  email:
28
37
  - matt@twitter.com
29
38
  - patrick.henry.ewing@gmail.com
39
+ - bcherry@gmail.com
40
+ - bs@brittspace.com
30
41
  executables: []
31
42
 
32
43
  extensions: []
@@ -40,12 +51,14 @@ files:
40
51
  - TODO
41
52
  - lib/autolink.rb
42
53
  - lib/extractor.rb
54
+ - lib/hithighlighter.rb
43
55
  - lib/regex.rb
44
56
  - lib/twitter-text.rb
45
57
  - lib/unicode.rb
46
58
  - lib/validation.rb
47
59
  - spec/autolinking_spec.rb
48
60
  - spec/extractor_spec.rb
61
+ - spec/hithighlighter_spec.rb
49
62
  - spec/regex_spec.rb
50
63
  - spec/spec_helper.rb
51
64
  - spec/test_urls.rb
@@ -64,18 +77,20 @@ required_ruby_version: !ruby/object:Gem::Requirement
64
77
  requirements:
65
78
  - - ">="
66
79
  - !ruby/object:Gem::Version
80
+ segments:
81
+ - 0
67
82
  version: "0"
68
- version:
69
83
  required_rubygems_version: !ruby/object:Gem::Requirement
70
84
  requirements:
71
85
  - - ">="
72
86
  - !ruby/object:Gem::Version
87
+ segments:
88
+ - 0
73
89
  version: "0"
74
- version:
75
90
  requirements: []
76
91
 
77
92
  rubyforge_project:
78
- rubygems_version: 1.3.5
93
+ rubygems_version: 1.3.6
79
94
  signing_key:
80
95
  specification_version: 3
81
96
  summary: Twitter text handling library