twitter-text 1.0.4 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -57,10 +57,15 @@ Special care has been taken to be sure that auto-linking and extraction work
57
57
  in Tweets of all languages. This means that languages without spaces between
58
58
  words should work equally well.
59
59
 
60
+ === Hit Highlighting
61
+
62
+ Use to provide emphasis around the "hits" returned from the Search API, built
63
+ to work against text that has been auto-linked already.
64
+
60
65
  === Conformance
61
66
 
62
67
  To run the Conformance suite, you'll need to add that project as a git submodule. From the root twitter-text-rb directory, run:
63
68
 
64
- git submodule add git@github.com:mzsanford/twitter-text-conformance.git test/twitter-text-conformance/
65
- git submodule init
66
- git submodule update
69
+ git submodule add git@github.com:mzsanford/twitter-text-conformance.git test/twitter-text-conformance/
70
+ git submodule init
71
+ git submodule update
data/Rakefile CHANGED
@@ -9,9 +9,9 @@ require 'digest'
9
9
 
10
10
  spec = Gem::Specification.new do |s|
11
11
  s.name = "twitter-text"
12
- s.version = "1.0.4"
13
- s.authors = ["Matt Sanford", "Patrick Ewing"]
14
- s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com"]
12
+ s.version = "1.1.1"
13
+ s.authors = ["Matt Sanford", "Patrick Ewing", "Ben Cherry", "Britt Selvitelle"]
14
+ s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com", "bcherry@gmail.com", "bs@brittspace.com"]
15
15
  s.homepage = "http://twitter.com"
16
16
  s.description = s.summary = "A gem that provides text handling for Twitter"
17
17
 
data/TODO CHANGED
@@ -1,3 +1,4 @@
1
1
  TODO:
2
2
 
3
- * More tests
3
+ * @mentions preceded by a dash should work. "I am great -@greatguy"
4
+ * HitHighlighter tests should be moved to the conformance suite
data/lib/autolink.rb CHANGED
@@ -38,7 +38,7 @@ module Twitter
38
38
  options),
39
39
  options)
40
40
  end
41
-
41
+
42
42
  # Add <tt><a></a></tt> tags around the usernames and lists in the provided <tt>text</tt>. The
43
43
  # <tt><a></tt> tags can be controlled with the following entries in the <tt>options</tt>
44
44
  # hash:
@@ -0,0 +1,81 @@
1
+
2
+ module Twitter
3
+ # Module for doing "hit highlighting" on tweets that have been auto-linked already.
4
+ # Useful with the results returned from the Search API.
5
+ module HitHighlighter
6
+ # Default Tag used for hit highlighting
7
+ DEFAULT_HIGHLIGHT_TAG = "em"
8
+
9
+ # Add <tt><em></em></tt> tags around the <tt>hits</tt> provided in the <tt>text</tt>. The
10
+ # <tt>hits</tt> should be an array of (start, end) index pairs, relative to the original
11
+ # text, before auto-linking (but the <tt>text</tt> may already be auto-linked if desired)
12
+ #
13
+ # The <tt><em></em></tt> tags can be overridden using the <tt>:tag</tt> option. For example:
14
+ #
15
+ # irb> hit_highlight("test hit here", [[5, 8]], :tag => 'strong')
16
+ # => "test <strong>hit</strong> here"
17
+ def hit_highlight(text, hits = [], options = {})
18
+ if hits.empty?
19
+ return text
20
+ end
21
+
22
+ tag_name = options[:tag] || DEFAULT_HIGHLIGHT_TAG
23
+ tags = ["<" + tag_name + ">", "</" + tag_name + ">"]
24
+
25
+ chunks = text.split("<").map do |item|
26
+ item.blank? ? item : item.split(">")
27
+ end.flatten
28
+
29
+ result = ""
30
+ chunk_index, chunk = 0, chunks[0]
31
+ chunk_chars = chunk.respond_to?("mb_chars") ? chunk.mb_chars : chunk.respond_to?("chars") && chunk.chars.respond_to?("[]") ? chunk.chars : chunk
32
+ prev_chunks_len = 0
33
+ chunk_cursor = 0
34
+ start_in_chunk = false
35
+ for hit, index in hits.flatten.each_with_index do
36
+ tag = tags[index % 2]
37
+
38
+ placed = false
39
+ until chunk.nil? || hit < prev_chunks_len + chunk.length do
40
+ result << chunk_chars[chunk_cursor..-1]
41
+ if start_in_chunk && hit == prev_chunks_len + chunk_chars.length
42
+ result << tag
43
+ placed = true
44
+ end
45
+
46
+ # correctly handle highlights that end on the final character.
47
+ if tag_text = chunks[chunk_index+1]
48
+ result << "<#{tag_text}>"
49
+ end
50
+
51
+ prev_chunks_len += chunk_chars.length
52
+ chunk_cursor = 0
53
+ chunk_index += 2
54
+ chunk = chunks[chunk_index]
55
+ chunk_chars = chunk.respond_to?("mb_chars") ? chunk.mb_chars : chunk.respond_to?("chars") && chunk.chars.respond_to?("[]") ? chunk.chars : chunk
56
+ start_in_chunk = false
57
+ end
58
+
59
+ if !placed && !chunk.nil?
60
+ hit_spot = hit - prev_chunks_len
61
+ result << chunk_chars[chunk_cursor...hit_spot].to_s + tag
62
+ chunk_cursor = hit_spot
63
+ if index % 2 == 0
64
+ start_in_chunk = true
65
+ end
66
+ end
67
+ end
68
+
69
+ if chunk
70
+ if chunk_cursor < chunk_chars.length
71
+ result << chunk_chars[chunk_cursor..-1]
72
+ end
73
+ (chunk_index+1).upto(chunks.length-1).each do |index|
74
+ result << (index.even? ? chunks[index] : "<#{chunks[index]}>")
75
+ end
76
+ end
77
+
78
+ result
79
+ end
80
+ end
81
+ end
data/lib/regex.rb CHANGED
@@ -44,7 +44,7 @@ module Twitter
44
44
 
45
45
  # URL related hash regex collection
46
46
  REGEXEN[:valid_preceding_chars] = /(?:[^\/"':!=]|^|\:)/
47
- REGEXEN[:valid_domain] = /(?:[^[:punct:]\s][\.-][^[:punct:]\s]|[^[:punct:]\s])+\.[a-z]{2,}(?::[0-9]+)?/i
47
+ REGEXEN[:valid_domain] = /(?:[^[:punct:]\s][\.-](?=[^[:punct:]\s])|[^[:punct:]\s]){1,}\.[a-z]{2,}(?::[0-9]+)?/i
48
48
  REGEXEN[:valid_url_path_chars] = /[\.\,]?[a-z0-9!\*'\(\);:=\+\$\/%#\[\]\-_,~@]/i
49
49
  # Valid end-of-path chracters (so /foo. does not gobble the period).
50
50
  # 1. Allow ) for Wikipedia URLs.
@@ -67,7 +67,7 @@ module Twitter
67
67
  REGEXEN.each_pair{|k,v| v.freeze }
68
68
 
69
69
  # Return the regular expression for a given <tt>key</tt>. If the <tt>key</tt>
70
- # is not a known symbol a <tt>nil</tt> will be returned.
70
+ # is not a known symbol a <tt>nil</tt> will be returned.
71
71
  def self.[](key)
72
72
  REGEXEN[key]
73
73
  end
data/lib/twitter-text.rb CHANGED
@@ -10,3 +10,4 @@ require File.join(File.dirname(__FILE__), 'autolink')
10
10
  require File.join(File.dirname(__FILE__), 'extractor')
11
11
  require File.join(File.dirname(__FILE__), 'unicode')
12
12
  require File.join(File.dirname(__FILE__), 'validation')
13
+ require File.join(File.dirname(__FILE__), 'hithighlighter')
@@ -430,6 +430,7 @@ describe Twitter::Autolink do
430
430
  end
431
431
 
432
432
  end
433
+
433
434
  end
434
435
 
435
436
  end
@@ -0,0 +1,83 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ class TestHitHighlighter
4
+ include Twitter::HitHighlighter
5
+ end
6
+
7
+ describe Twitter::HitHighlighter do
8
+ describe "highlight" do
9
+ before do
10
+ @highlighter = TestHitHighlighter.new
11
+ end
12
+
13
+ context "with options" do
14
+ before do
15
+ @original = "Testing this hit highliter"
16
+ @hits = [[13,16]]
17
+ end
18
+
19
+ it "should default to <em> tags" do
20
+ @highlighter.hit_highlight(@original, @hits).should == "Testing this <em>hit</em> highliter"
21
+ end
22
+
23
+ it "should allow tag override" do
24
+ @highlighter.hit_highlight(@original, @hits, :tag => 'b').should == "Testing this <b>hit</b> highliter"
25
+ end
26
+ end
27
+
28
+ context "without links" do
29
+ before do
30
+ @original = "Hey! this is a test tweet"
31
+ end
32
+
33
+ it "should return original when no hits are provided" do
34
+ @highlighter.hit_highlight(@original).should == @original
35
+ end
36
+
37
+ it "should highlight one hit" do
38
+ @highlighter.hit_highlight(@original, hits = [[5, 9]]).should == "Hey! <em>this</em> is a test tweet"
39
+ end
40
+
41
+ it "should highlight two hits" do
42
+ @highlighter.hit_highlight(@original, hits = [[5, 9], [15, 19]]).should == "Hey! <em>this</em> is a <em>test</em> tweet"
43
+ end
44
+
45
+ it "should correctly highlight first-word hits" do
46
+ @highlighter.hit_highlight(@original, hits = [[0, 3]]).should == "<em>Hey</em>! this is a test tweet"
47
+ end
48
+
49
+ it "should correctly highlight last-word hits" do
50
+ @highlighter.hit_highlight(@original, hits = [[20, 25]]).should == "Hey! this is a test <em>tweet</em>"
51
+ end
52
+ end
53
+
54
+ context "with links" do
55
+ it "should highlight with a single link" do
56
+ @highlighter.hit_highlight("@<a>bcherry</a> this was a test tweet", [[9, 13]]).should == "@<a>bcherry</a> <em>this</em> was a test tweet"
57
+ end
58
+
59
+ it "should highlight with link at the end" do
60
+ @highlighter.hit_highlight("test test <a>test</a>", [[5, 9]]).should == "test <em>test</em> <a>test</a>"
61
+ end
62
+
63
+ it "should highlight with a link at the beginning" do
64
+ @highlighter.hit_highlight("<a>test</a> test test", [[5, 9]]).should == "<a>test</a> <em>test</em> test"
65
+ end
66
+
67
+ it "should highlight an entire link" do
68
+ @highlighter.hit_highlight("test <a>test</a> test", [[5, 9]]).should == "test <a><em>test</em></a> test"
69
+ end
70
+
71
+ it "should highlight within a link" do
72
+ @highlighter.hit_highlight("test <a>test</a> test", [[6, 8]]).should == "test <a>t<em>es</em>t</a> test"
73
+ end
74
+
75
+ it "should highlight around a link" do
76
+ @highlighter.hit_highlight("test <a>test</a> test", [[3, 11]]).should == "tes<em>t <a>test</a> t</em>est"
77
+ end
78
+
79
+ end
80
+
81
+ end
82
+
83
+ end
data/spec/regex_spec.rb CHANGED
@@ -16,7 +16,7 @@ describe "Twitter::Regex regular expressions" do
16
16
 
17
17
  describe "invalid URLS" do
18
18
  it "does not link urls with invalid characters" do
19
- TestUrls::INVALID.each {|url| url.should_not have_autolinked_url(url)}
19
+ TestUrls::INVALID.each {|url| url.should_not match_autolink_expression}
20
20
  end
21
21
  end
22
22
 
data/spec/test_urls.rb CHANGED
@@ -7,7 +7,7 @@ module TestUrls
7
7
  "http://twitter.com/#search?q=iphone%20-filter%3Alinks",
8
8
  "http://www.boingboing.net/2007/02/14/katamari_damacy_phon.html",
9
9
  "http://somehost.com:3000",
10
- "http://x.com/~matthew+%-x",
10
+ "http://xo.com/~matthew+%-x",
11
11
  "http://en.wikipedia.org/wiki/Primer_(film)",
12
12
  "http://www.ams.org/bookstore-getitem/item=mbk-59",
13
13
  "http://chilp.it/?77e8fd",
@@ -16,14 +16,15 @@ module TestUrls
16
16
  "http://tell.me/why",
17
17
  "http://longtlds.info",
18
18
  "http://✪df.ws/ejp",
19
- "http://日本.com"
19
+ "http://日本.com",
20
+ "http://search.twitter.com/search?q=avro&lang=en",
21
+ "http://mrs.domain-dash.biz",
22
+ "http://x.com/has/one/char/domain",
20
23
  ]
21
24
 
22
25
  INVALID = [
23
26
  "http://no-tld",
24
27
  "http://tld-too-short.x",
25
- "http://x.com/,,,/.../@@@/;;;/:::/---/%%%x",
26
- "http://domain-dash.com",
27
28
  "http://-doman_dash.com"
28
29
  ]
29
30
 
metadata CHANGED
@@ -1,32 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twitter-text
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.4
4
+ prerelease: false
5
+ segments:
6
+ - 1
7
+ - 1
8
+ - 1
9
+ version: 1.1.1
5
10
  platform: ruby
6
11
  authors:
7
12
  - Matt Sanford
8
13
  - Patrick Ewing
14
+ - Ben Cherry
15
+ - Britt Selvitelle
9
16
  autorequire: ""
10
17
  bindir: bin
11
18
  cert_chain: []
12
19
 
13
- date: 2010-03-11 00:00:00 -08:00
20
+ date: 2010-04-28 00:00:00 -07:00
14
21
  default_executable:
15
22
  dependencies:
16
23
  - !ruby/object:Gem::Dependency
17
24
  name: actionpack
18
- type: :runtime
19
- version_requirement:
20
- version_requirements: !ruby/object:Gem::Requirement
25
+ prerelease: false
26
+ requirement: &id001 !ruby/object:Gem::Requirement
21
27
  requirements:
22
28
  - - ">="
23
29
  - !ruby/object:Gem::Version
30
+ segments:
31
+ - 0
24
32
  version: "0"
25
- version:
33
+ type: :runtime
34
+ version_requirements: *id001
26
35
  description: A gem that provides text handling for Twitter
27
36
  email:
28
37
  - matt@twitter.com
29
38
  - patrick.henry.ewing@gmail.com
39
+ - bcherry@gmail.com
40
+ - bs@brittspace.com
30
41
  executables: []
31
42
 
32
43
  extensions: []
@@ -40,12 +51,14 @@ files:
40
51
  - TODO
41
52
  - lib/autolink.rb
42
53
  - lib/extractor.rb
54
+ - lib/hithighlighter.rb
43
55
  - lib/regex.rb
44
56
  - lib/twitter-text.rb
45
57
  - lib/unicode.rb
46
58
  - lib/validation.rb
47
59
  - spec/autolinking_spec.rb
48
60
  - spec/extractor_spec.rb
61
+ - spec/hithighlighter_spec.rb
49
62
  - spec/regex_spec.rb
50
63
  - spec/spec_helper.rb
51
64
  - spec/test_urls.rb
@@ -64,18 +77,20 @@ required_ruby_version: !ruby/object:Gem::Requirement
64
77
  requirements:
65
78
  - - ">="
66
79
  - !ruby/object:Gem::Version
80
+ segments:
81
+ - 0
67
82
  version: "0"
68
- version:
69
83
  required_rubygems_version: !ruby/object:Gem::Requirement
70
84
  requirements:
71
85
  - - ">="
72
86
  - !ruby/object:Gem::Version
87
+ segments:
88
+ - 0
73
89
  version: "0"
74
- version:
75
90
  requirements: []
76
91
 
77
92
  rubyforge_project:
78
- rubygems_version: 1.3.5
93
+ rubygems_version: 1.3.6
79
94
  signing_key:
80
95
  specification_version: 3
81
96
  summary: Twitter text handling library