twitter-text 1.0.4 → 1.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +8 -3
- data/Rakefile +3 -3
- data/TODO +2 -1
- data/lib/autolink.rb +1 -1
- data/lib/hithighlighter.rb +81 -0
- data/lib/regex.rb +2 -2
- data/lib/twitter-text.rb +1 -0
- data/spec/autolinking_spec.rb +1 -0
- data/spec/hithighlighter_spec.rb +83 -0
- data/spec/regex_spec.rb +1 -1
- data/spec/test_urls.rb +5 -4
- metadata +24 -9
data/README.rdoc
CHANGED
@@ -57,10 +57,15 @@ Special care has been taken to be sure that auto-linking and extraction work
|
|
57
57
|
in Tweets of all languages. This means that languages without spaces between
|
58
58
|
words should work equally well.
|
59
59
|
|
60
|
+
=== Hit Highlighting
|
61
|
+
|
62
|
+
Use to provide emphasis around the "hits" returned from the Search API, built
|
63
|
+
to work against text that has been auto-linked already.
|
64
|
+
|
60
65
|
=== Conformance
|
61
66
|
|
62
67
|
To run the Conformance suite, you'll need to add that project as a git submodule. From the root twitter-text-rb directory, run:
|
63
68
|
|
64
|
-
git submodule add git@github.com:mzsanford/twitter-text-conformance.git test/twitter-text-conformance/
|
65
|
-
git submodule init
|
66
|
-
git submodule update
|
69
|
+
git submodule add git@github.com:mzsanford/twitter-text-conformance.git test/twitter-text-conformance/
|
70
|
+
git submodule init
|
71
|
+
git submodule update
|
data/Rakefile
CHANGED
@@ -9,9 +9,9 @@ require 'digest'
|
|
9
9
|
|
10
10
|
spec = Gem::Specification.new do |s|
|
11
11
|
s.name = "twitter-text"
|
12
|
-
s.version = "1.
|
13
|
-
s.authors = ["Matt Sanford", "Patrick Ewing"]
|
14
|
-
s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com"]
|
12
|
+
s.version = "1.1.1"
|
13
|
+
s.authors = ["Matt Sanford", "Patrick Ewing", "Ben Cherry", "Britt Selvitelle"]
|
14
|
+
s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com", "bcherry@gmail.com", "bs@brittspace.com"]
|
15
15
|
s.homepage = "http://twitter.com"
|
16
16
|
s.description = s.summary = "A gem that provides text handling for Twitter"
|
17
17
|
|
data/TODO
CHANGED
data/lib/autolink.rb
CHANGED
@@ -0,0 +1,81 @@
|
|
1
|
+
|
2
|
+
module Twitter
|
3
|
+
# Module for doing "hit highlighting" on tweets that have been auto-linked already.
|
4
|
+
# Useful with the results returned from the Search API.
|
5
|
+
module HitHighlighter
|
6
|
+
# Default Tag used for hit highlighting
|
7
|
+
DEFAULT_HIGHLIGHT_TAG = "em"
|
8
|
+
|
9
|
+
# Add <tt><em></em></tt> tags around the <tt>hits</tt> provided in the <tt>text</tt>. The
|
10
|
+
# <tt>hits</tt> should be an array of (start, end) index pairs, relative to the original
|
11
|
+
# text, before auto-linking (but the <tt>text</tt> may already be auto-linked if desired)
|
12
|
+
#
|
13
|
+
# The <tt><em></em></tt> tags can be overridden using the <tt>:tag</tt> option. For example:
|
14
|
+
#
|
15
|
+
# irb> hit_highlight("test hit here", [[5, 8]], :tag => 'strong')
|
16
|
+
# => "test <strong>hit</strong> here"
|
17
|
+
def hit_highlight(text, hits = [], options = {})
|
18
|
+
if hits.empty?
|
19
|
+
return text
|
20
|
+
end
|
21
|
+
|
22
|
+
tag_name = options[:tag] || DEFAULT_HIGHLIGHT_TAG
|
23
|
+
tags = ["<" + tag_name + ">", "</" + tag_name + ">"]
|
24
|
+
|
25
|
+
chunks = text.split("<").map do |item|
|
26
|
+
item.blank? ? item : item.split(">")
|
27
|
+
end.flatten
|
28
|
+
|
29
|
+
result = ""
|
30
|
+
chunk_index, chunk = 0, chunks[0]
|
31
|
+
chunk_chars = chunk.respond_to?("mb_chars") ? chunk.mb_chars : chunk.respond_to?("chars") && chunk.chars.respond_to?("[]") ? chunk.chars : chunk
|
32
|
+
prev_chunks_len = 0
|
33
|
+
chunk_cursor = 0
|
34
|
+
start_in_chunk = false
|
35
|
+
for hit, index in hits.flatten.each_with_index do
|
36
|
+
tag = tags[index % 2]
|
37
|
+
|
38
|
+
placed = false
|
39
|
+
until chunk.nil? || hit < prev_chunks_len + chunk.length do
|
40
|
+
result << chunk_chars[chunk_cursor..-1]
|
41
|
+
if start_in_chunk && hit == prev_chunks_len + chunk_chars.length
|
42
|
+
result << tag
|
43
|
+
placed = true
|
44
|
+
end
|
45
|
+
|
46
|
+
# correctly handle highlights that end on the final character.
|
47
|
+
if tag_text = chunks[chunk_index+1]
|
48
|
+
result << "<#{tag_text}>"
|
49
|
+
end
|
50
|
+
|
51
|
+
prev_chunks_len += chunk_chars.length
|
52
|
+
chunk_cursor = 0
|
53
|
+
chunk_index += 2
|
54
|
+
chunk = chunks[chunk_index]
|
55
|
+
chunk_chars = chunk.respond_to?("mb_chars") ? chunk.mb_chars : chunk.respond_to?("chars") && chunk.chars.respond_to?("[]") ? chunk.chars : chunk
|
56
|
+
start_in_chunk = false
|
57
|
+
end
|
58
|
+
|
59
|
+
if !placed && !chunk.nil?
|
60
|
+
hit_spot = hit - prev_chunks_len
|
61
|
+
result << chunk_chars[chunk_cursor...hit_spot].to_s + tag
|
62
|
+
chunk_cursor = hit_spot
|
63
|
+
if index % 2 == 0
|
64
|
+
start_in_chunk = true
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
if chunk
|
70
|
+
if chunk_cursor < chunk_chars.length
|
71
|
+
result << chunk_chars[chunk_cursor..-1]
|
72
|
+
end
|
73
|
+
(chunk_index+1).upto(chunks.length-1).each do |index|
|
74
|
+
result << (index.even? ? chunks[index] : "<#{chunks[index]}>")
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
result
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
data/lib/regex.rb
CHANGED
@@ -44,7 +44,7 @@ module Twitter
|
|
44
44
|
|
45
45
|
# URL related hash regex collection
|
46
46
|
REGEXEN[:valid_preceding_chars] = /(?:[^\/"':!=]|^|\:)/
|
47
|
-
REGEXEN[:valid_domain] = /(?:[^[:punct:]\s][\.-][^[:punct:]\s]|[^[:punct:]\s])
|
47
|
+
REGEXEN[:valid_domain] = /(?:[^[:punct:]\s][\.-](?=[^[:punct:]\s])|[^[:punct:]\s]){1,}\.[a-z]{2,}(?::[0-9]+)?/i
|
48
48
|
REGEXEN[:valid_url_path_chars] = /[\.\,]?[a-z0-9!\*'\(\);:=\+\$\/%#\[\]\-_,~@]/i
|
49
49
|
# Valid end-of-path chracters (so /foo. does not gobble the period).
|
50
50
|
# 1. Allow ) for Wikipedia URLs.
|
@@ -67,7 +67,7 @@ module Twitter
|
|
67
67
|
REGEXEN.each_pair{|k,v| v.freeze }
|
68
68
|
|
69
69
|
# Return the regular expression for a given <tt>key</tt>. If the <tt>key</tt>
|
70
|
-
# is not a known symbol a <tt>nil</tt> will be returned.
|
70
|
+
# is not a known symbol a <tt>nil</tt> will be returned.
|
71
71
|
def self.[](key)
|
72
72
|
REGEXEN[key]
|
73
73
|
end
|
data/lib/twitter-text.rb
CHANGED
@@ -10,3 +10,4 @@ require File.join(File.dirname(__FILE__), 'autolink')
|
|
10
10
|
require File.join(File.dirname(__FILE__), 'extractor')
|
11
11
|
require File.join(File.dirname(__FILE__), 'unicode')
|
12
12
|
require File.join(File.dirname(__FILE__), 'validation')
|
13
|
+
require File.join(File.dirname(__FILE__), 'hithighlighter')
|
data/spec/autolinking_spec.rb
CHANGED
@@ -0,0 +1,83 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/spec_helper'
|
2
|
+
|
3
|
+
class TestHitHighlighter
|
4
|
+
include Twitter::HitHighlighter
|
5
|
+
end
|
6
|
+
|
7
|
+
describe Twitter::HitHighlighter do
|
8
|
+
describe "highlight" do
|
9
|
+
before do
|
10
|
+
@highlighter = TestHitHighlighter.new
|
11
|
+
end
|
12
|
+
|
13
|
+
context "with options" do
|
14
|
+
before do
|
15
|
+
@original = "Testing this hit highliter"
|
16
|
+
@hits = [[13,16]]
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should default to <em> tags" do
|
20
|
+
@highlighter.hit_highlight(@original, @hits).should == "Testing this <em>hit</em> highliter"
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should allow tag override" do
|
24
|
+
@highlighter.hit_highlight(@original, @hits, :tag => 'b').should == "Testing this <b>hit</b> highliter"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
context "without links" do
|
29
|
+
before do
|
30
|
+
@original = "Hey! this is a test tweet"
|
31
|
+
end
|
32
|
+
|
33
|
+
it "should return original when no hits are provided" do
|
34
|
+
@highlighter.hit_highlight(@original).should == @original
|
35
|
+
end
|
36
|
+
|
37
|
+
it "should highlight one hit" do
|
38
|
+
@highlighter.hit_highlight(@original, hits = [[5, 9]]).should == "Hey! <em>this</em> is a test tweet"
|
39
|
+
end
|
40
|
+
|
41
|
+
it "should highlight two hits" do
|
42
|
+
@highlighter.hit_highlight(@original, hits = [[5, 9], [15, 19]]).should == "Hey! <em>this</em> is a <em>test</em> tweet"
|
43
|
+
end
|
44
|
+
|
45
|
+
it "should correctly highlight first-word hits" do
|
46
|
+
@highlighter.hit_highlight(@original, hits = [[0, 3]]).should == "<em>Hey</em>! this is a test tweet"
|
47
|
+
end
|
48
|
+
|
49
|
+
it "should correctly highlight last-word hits" do
|
50
|
+
@highlighter.hit_highlight(@original, hits = [[20, 25]]).should == "Hey! this is a test <em>tweet</em>"
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
context "with links" do
|
55
|
+
it "should highlight with a single link" do
|
56
|
+
@highlighter.hit_highlight("@<a>bcherry</a> this was a test tweet", [[9, 13]]).should == "@<a>bcherry</a> <em>this</em> was a test tweet"
|
57
|
+
end
|
58
|
+
|
59
|
+
it "should highlight with link at the end" do
|
60
|
+
@highlighter.hit_highlight("test test <a>test</a>", [[5, 9]]).should == "test <em>test</em> <a>test</a>"
|
61
|
+
end
|
62
|
+
|
63
|
+
it "should highlight with a link at the beginning" do
|
64
|
+
@highlighter.hit_highlight("<a>test</a> test test", [[5, 9]]).should == "<a>test</a> <em>test</em> test"
|
65
|
+
end
|
66
|
+
|
67
|
+
it "should highlight an entire link" do
|
68
|
+
@highlighter.hit_highlight("test <a>test</a> test", [[5, 9]]).should == "test <a><em>test</em></a> test"
|
69
|
+
end
|
70
|
+
|
71
|
+
it "should highlight within a link" do
|
72
|
+
@highlighter.hit_highlight("test <a>test</a> test", [[6, 8]]).should == "test <a>t<em>es</em>t</a> test"
|
73
|
+
end
|
74
|
+
|
75
|
+
it "should highlight around a link" do
|
76
|
+
@highlighter.hit_highlight("test <a>test</a> test", [[3, 11]]).should == "tes<em>t <a>test</a> t</em>est"
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
data/spec/regex_spec.rb
CHANGED
@@ -16,7 +16,7 @@ describe "Twitter::Regex regular expressions" do
|
|
16
16
|
|
17
17
|
describe "invalid URLS" do
|
18
18
|
it "does not link urls with invalid characters" do
|
19
|
-
TestUrls::INVALID.each {|url| url.should_not
|
19
|
+
TestUrls::INVALID.each {|url| url.should_not match_autolink_expression}
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
data/spec/test_urls.rb
CHANGED
@@ -7,7 +7,7 @@ module TestUrls
|
|
7
7
|
"http://twitter.com/#search?q=iphone%20-filter%3Alinks",
|
8
8
|
"http://www.boingboing.net/2007/02/14/katamari_damacy_phon.html",
|
9
9
|
"http://somehost.com:3000",
|
10
|
-
"http://
|
10
|
+
"http://xo.com/~matthew+%-x",
|
11
11
|
"http://en.wikipedia.org/wiki/Primer_(film)",
|
12
12
|
"http://www.ams.org/bookstore-getitem/item=mbk-59",
|
13
13
|
"http://chilp.it/?77e8fd",
|
@@ -16,14 +16,15 @@ module TestUrls
|
|
16
16
|
"http://tell.me/why",
|
17
17
|
"http://longtlds.info",
|
18
18
|
"http://✪df.ws/ejp",
|
19
|
-
"http://日本.com"
|
19
|
+
"http://日本.com",
|
20
|
+
"http://search.twitter.com/search?q=avro&lang=en",
|
21
|
+
"http://mrs.domain-dash.biz",
|
22
|
+
"http://x.com/has/one/char/domain",
|
20
23
|
]
|
21
24
|
|
22
25
|
INVALID = [
|
23
26
|
"http://no-tld",
|
24
27
|
"http://tld-too-short.x",
|
25
|
-
"http://x.com/,,,/.../@@@/;;;/:::/---/%%%x",
|
26
|
-
"http://domain-dash.com",
|
27
28
|
"http://-doman_dash.com"
|
28
29
|
]
|
29
30
|
|
metadata
CHANGED
@@ -1,32 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitter-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 1
|
7
|
+
- 1
|
8
|
+
- 1
|
9
|
+
version: 1.1.1
|
5
10
|
platform: ruby
|
6
11
|
authors:
|
7
12
|
- Matt Sanford
|
8
13
|
- Patrick Ewing
|
14
|
+
- Ben Cherry
|
15
|
+
- Britt Selvitelle
|
9
16
|
autorequire: ""
|
10
17
|
bindir: bin
|
11
18
|
cert_chain: []
|
12
19
|
|
13
|
-
date: 2010-
|
20
|
+
date: 2010-04-28 00:00:00 -07:00
|
14
21
|
default_executable:
|
15
22
|
dependencies:
|
16
23
|
- !ruby/object:Gem::Dependency
|
17
24
|
name: actionpack
|
18
|
-
|
19
|
-
|
20
|
-
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
prerelease: false
|
26
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
21
27
|
requirements:
|
22
28
|
- - ">="
|
23
29
|
- !ruby/object:Gem::Version
|
30
|
+
segments:
|
31
|
+
- 0
|
24
32
|
version: "0"
|
25
|
-
|
33
|
+
type: :runtime
|
34
|
+
version_requirements: *id001
|
26
35
|
description: A gem that provides text handling for Twitter
|
27
36
|
email:
|
28
37
|
- matt@twitter.com
|
29
38
|
- patrick.henry.ewing@gmail.com
|
39
|
+
- bcherry@gmail.com
|
40
|
+
- bs@brittspace.com
|
30
41
|
executables: []
|
31
42
|
|
32
43
|
extensions: []
|
@@ -40,12 +51,14 @@ files:
|
|
40
51
|
- TODO
|
41
52
|
- lib/autolink.rb
|
42
53
|
- lib/extractor.rb
|
54
|
+
- lib/hithighlighter.rb
|
43
55
|
- lib/regex.rb
|
44
56
|
- lib/twitter-text.rb
|
45
57
|
- lib/unicode.rb
|
46
58
|
- lib/validation.rb
|
47
59
|
- spec/autolinking_spec.rb
|
48
60
|
- spec/extractor_spec.rb
|
61
|
+
- spec/hithighlighter_spec.rb
|
49
62
|
- spec/regex_spec.rb
|
50
63
|
- spec/spec_helper.rb
|
51
64
|
- spec/test_urls.rb
|
@@ -64,18 +77,20 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
64
77
|
requirements:
|
65
78
|
- - ">="
|
66
79
|
- !ruby/object:Gem::Version
|
80
|
+
segments:
|
81
|
+
- 0
|
67
82
|
version: "0"
|
68
|
-
version:
|
69
83
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
70
84
|
requirements:
|
71
85
|
- - ">="
|
72
86
|
- !ruby/object:Gem::Version
|
87
|
+
segments:
|
88
|
+
- 0
|
73
89
|
version: "0"
|
74
|
-
version:
|
75
90
|
requirements: []
|
76
91
|
|
77
92
|
rubyforge_project:
|
78
|
-
rubygems_version: 1.3.
|
93
|
+
rubygems_version: 1.3.6
|
79
94
|
signing_key:
|
80
95
|
specification_version: 3
|
81
96
|
summary: Twitter text handling library
|