twitter-text 1.0.4 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +8 -3
- data/Rakefile +3 -3
- data/TODO +2 -1
- data/lib/autolink.rb +1 -1
- data/lib/hithighlighter.rb +81 -0
- data/lib/regex.rb +2 -2
- data/lib/twitter-text.rb +1 -0
- data/spec/autolinking_spec.rb +1 -0
- data/spec/hithighlighter_spec.rb +83 -0
- data/spec/regex_spec.rb +1 -1
- data/spec/test_urls.rb +5 -4
- metadata +24 -9
data/README.rdoc
CHANGED
@@ -57,10 +57,15 @@ Special care has been taken to be sure that auto-linking and extraction work
|
|
57
57
|
in Tweets of all languages. This means that languages without spaces between
|
58
58
|
words should work equally well.
|
59
59
|
|
60
|
+
=== Hit Highlighting
|
61
|
+
|
62
|
+
Use to provide emphasis around the "hits" returned from the Search API, built
|
63
|
+
to work against text that has been auto-linked already.
|
64
|
+
|
60
65
|
=== Conformance
|
61
66
|
|
62
67
|
To run the Conformance suite, you'll need to add that project as a git submodule. From the root twitter-text-rb directory, run:
|
63
68
|
|
64
|
-
git submodule add git@github.com:mzsanford/twitter-text-conformance.git test/twitter-text-conformance/
|
65
|
-
git submodule init
|
66
|
-
git submodule update
|
69
|
+
git submodule add git@github.com:mzsanford/twitter-text-conformance.git test/twitter-text-conformance/
|
70
|
+
git submodule init
|
71
|
+
git submodule update
|
data/Rakefile
CHANGED
@@ -9,9 +9,9 @@ require 'digest'
|
|
9
9
|
|
10
10
|
spec = Gem::Specification.new do |s|
|
11
11
|
s.name = "twitter-text"
|
12
|
-
s.version = "1.
|
13
|
-
s.authors = ["Matt Sanford", "Patrick Ewing"]
|
14
|
-
s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com"]
|
12
|
+
s.version = "1.1.1"
|
13
|
+
s.authors = ["Matt Sanford", "Patrick Ewing", "Ben Cherry", "Britt Selvitelle"]
|
14
|
+
s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com", "bcherry@gmail.com", "bs@brittspace.com"]
|
15
15
|
s.homepage = "http://twitter.com"
|
16
16
|
s.description = s.summary = "A gem that provides text handling for Twitter"
|
17
17
|
|
data/TODO
CHANGED
data/lib/autolink.rb
CHANGED
@@ -0,0 +1,81 @@
|
|
1
|
+
|
2
|
+
module Twitter
|
3
|
+
# Module for doing "hit highlighting" on tweets that have been auto-linked already.
|
4
|
+
# Useful with the results returned from the Search API.
|
5
|
+
module HitHighlighter
|
6
|
+
# Default Tag used for hit highlighting
|
7
|
+
DEFAULT_HIGHLIGHT_TAG = "em"
|
8
|
+
|
9
|
+
# Add <tt><em></em></tt> tags around the <tt>hits</tt> provided in the <tt>text</tt>. The
|
10
|
+
# <tt>hits</tt> should be an array of (start, end) index pairs, relative to the original
|
11
|
+
# text, before auto-linking (but the <tt>text</tt> may already be auto-linked if desired)
|
12
|
+
#
|
13
|
+
# The <tt><em></em></tt> tags can be overridden using the <tt>:tag</tt> option. For example:
|
14
|
+
#
|
15
|
+
# irb> hit_highlight("test hit here", [[5, 8]], :tag => 'strong')
|
16
|
+
# => "test <strong>hit</strong> here"
|
17
|
+
def hit_highlight(text, hits = [], options = {})
|
18
|
+
if hits.empty?
|
19
|
+
return text
|
20
|
+
end
|
21
|
+
|
22
|
+
tag_name = options[:tag] || DEFAULT_HIGHLIGHT_TAG
|
23
|
+
tags = ["<" + tag_name + ">", "</" + tag_name + ">"]
|
24
|
+
|
25
|
+
chunks = text.split("<").map do |item|
|
26
|
+
item.blank? ? item : item.split(">")
|
27
|
+
end.flatten
|
28
|
+
|
29
|
+
result = ""
|
30
|
+
chunk_index, chunk = 0, chunks[0]
|
31
|
+
chunk_chars = chunk.respond_to?("mb_chars") ? chunk.mb_chars : chunk.respond_to?("chars") && chunk.chars.respond_to?("[]") ? chunk.chars : chunk
|
32
|
+
prev_chunks_len = 0
|
33
|
+
chunk_cursor = 0
|
34
|
+
start_in_chunk = false
|
35
|
+
for hit, index in hits.flatten.each_with_index do
|
36
|
+
tag = tags[index % 2]
|
37
|
+
|
38
|
+
placed = false
|
39
|
+
until chunk.nil? || hit < prev_chunks_len + chunk.length do
|
40
|
+
result << chunk_chars[chunk_cursor..-1]
|
41
|
+
if start_in_chunk && hit == prev_chunks_len + chunk_chars.length
|
42
|
+
result << tag
|
43
|
+
placed = true
|
44
|
+
end
|
45
|
+
|
46
|
+
# correctly handle highlights that end on the final character.
|
47
|
+
if tag_text = chunks[chunk_index+1]
|
48
|
+
result << "<#{tag_text}>"
|
49
|
+
end
|
50
|
+
|
51
|
+
prev_chunks_len += chunk_chars.length
|
52
|
+
chunk_cursor = 0
|
53
|
+
chunk_index += 2
|
54
|
+
chunk = chunks[chunk_index]
|
55
|
+
chunk_chars = chunk.respond_to?("mb_chars") ? chunk.mb_chars : chunk.respond_to?("chars") && chunk.chars.respond_to?("[]") ? chunk.chars : chunk
|
56
|
+
start_in_chunk = false
|
57
|
+
end
|
58
|
+
|
59
|
+
if !placed && !chunk.nil?
|
60
|
+
hit_spot = hit - prev_chunks_len
|
61
|
+
result << chunk_chars[chunk_cursor...hit_spot].to_s + tag
|
62
|
+
chunk_cursor = hit_spot
|
63
|
+
if index % 2 == 0
|
64
|
+
start_in_chunk = true
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
if chunk
|
70
|
+
if chunk_cursor < chunk_chars.length
|
71
|
+
result << chunk_chars[chunk_cursor..-1]
|
72
|
+
end
|
73
|
+
(chunk_index+1).upto(chunks.length-1).each do |index|
|
74
|
+
result << (index.even? ? chunks[index] : "<#{chunks[index]}>")
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
result
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
data/lib/regex.rb
CHANGED
@@ -44,7 +44,7 @@ module Twitter
|
|
44
44
|
|
45
45
|
# URL related hash regex collection
|
46
46
|
REGEXEN[:valid_preceding_chars] = /(?:[^\/"':!=]|^|\:)/
|
47
|
-
REGEXEN[:valid_domain] = /(?:[^[:punct:]\s][\.-][^[:punct:]\s]|[^[:punct:]\s])
|
47
|
+
REGEXEN[:valid_domain] = /(?:[^[:punct:]\s][\.-](?=[^[:punct:]\s])|[^[:punct:]\s]){1,}\.[a-z]{2,}(?::[0-9]+)?/i
|
48
48
|
REGEXEN[:valid_url_path_chars] = /[\.\,]?[a-z0-9!\*'\(\);:=\+\$\/%#\[\]\-_,~@]/i
|
49
49
|
# Valid end-of-path chracters (so /foo. does not gobble the period).
|
50
50
|
# 1. Allow ) for Wikipedia URLs.
|
@@ -67,7 +67,7 @@ module Twitter
|
|
67
67
|
REGEXEN.each_pair{|k,v| v.freeze }
|
68
68
|
|
69
69
|
# Return the regular expression for a given <tt>key</tt>. If the <tt>key</tt>
|
70
|
-
# is not a known symbol a <tt>nil</tt> will be returned.
|
70
|
+
# is not a known symbol a <tt>nil</tt> will be returned.
|
71
71
|
def self.[](key)
|
72
72
|
REGEXEN[key]
|
73
73
|
end
|
data/lib/twitter-text.rb
CHANGED
@@ -10,3 +10,4 @@ require File.join(File.dirname(__FILE__), 'autolink')
|
|
10
10
|
require File.join(File.dirname(__FILE__), 'extractor')
|
11
11
|
require File.join(File.dirname(__FILE__), 'unicode')
|
12
12
|
require File.join(File.dirname(__FILE__), 'validation')
|
13
|
+
require File.join(File.dirname(__FILE__), 'hithighlighter')
|
data/spec/autolinking_spec.rb
CHANGED
@@ -0,0 +1,83 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/spec_helper'
|
2
|
+
|
3
|
+
class TestHitHighlighter
|
4
|
+
include Twitter::HitHighlighter
|
5
|
+
end
|
6
|
+
|
7
|
+
describe Twitter::HitHighlighter do
|
8
|
+
describe "highlight" do
|
9
|
+
before do
|
10
|
+
@highlighter = TestHitHighlighter.new
|
11
|
+
end
|
12
|
+
|
13
|
+
context "with options" do
|
14
|
+
before do
|
15
|
+
@original = "Testing this hit highliter"
|
16
|
+
@hits = [[13,16]]
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should default to <em> tags" do
|
20
|
+
@highlighter.hit_highlight(@original, @hits).should == "Testing this <em>hit</em> highliter"
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should allow tag override" do
|
24
|
+
@highlighter.hit_highlight(@original, @hits, :tag => 'b').should == "Testing this <b>hit</b> highliter"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
context "without links" do
|
29
|
+
before do
|
30
|
+
@original = "Hey! this is a test tweet"
|
31
|
+
end
|
32
|
+
|
33
|
+
it "should return original when no hits are provided" do
|
34
|
+
@highlighter.hit_highlight(@original).should == @original
|
35
|
+
end
|
36
|
+
|
37
|
+
it "should highlight one hit" do
|
38
|
+
@highlighter.hit_highlight(@original, hits = [[5, 9]]).should == "Hey! <em>this</em> is a test tweet"
|
39
|
+
end
|
40
|
+
|
41
|
+
it "should highlight two hits" do
|
42
|
+
@highlighter.hit_highlight(@original, hits = [[5, 9], [15, 19]]).should == "Hey! <em>this</em> is a <em>test</em> tweet"
|
43
|
+
end
|
44
|
+
|
45
|
+
it "should correctly highlight first-word hits" do
|
46
|
+
@highlighter.hit_highlight(@original, hits = [[0, 3]]).should == "<em>Hey</em>! this is a test tweet"
|
47
|
+
end
|
48
|
+
|
49
|
+
it "should correctly highlight last-word hits" do
|
50
|
+
@highlighter.hit_highlight(@original, hits = [[20, 25]]).should == "Hey! this is a test <em>tweet</em>"
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
context "with links" do
|
55
|
+
it "should highlight with a single link" do
|
56
|
+
@highlighter.hit_highlight("@<a>bcherry</a> this was a test tweet", [[9, 13]]).should == "@<a>bcherry</a> <em>this</em> was a test tweet"
|
57
|
+
end
|
58
|
+
|
59
|
+
it "should highlight with link at the end" do
|
60
|
+
@highlighter.hit_highlight("test test <a>test</a>", [[5, 9]]).should == "test <em>test</em> <a>test</a>"
|
61
|
+
end
|
62
|
+
|
63
|
+
it "should highlight with a link at the beginning" do
|
64
|
+
@highlighter.hit_highlight("<a>test</a> test test", [[5, 9]]).should == "<a>test</a> <em>test</em> test"
|
65
|
+
end
|
66
|
+
|
67
|
+
it "should highlight an entire link" do
|
68
|
+
@highlighter.hit_highlight("test <a>test</a> test", [[5, 9]]).should == "test <a><em>test</em></a> test"
|
69
|
+
end
|
70
|
+
|
71
|
+
it "should highlight within a link" do
|
72
|
+
@highlighter.hit_highlight("test <a>test</a> test", [[6, 8]]).should == "test <a>t<em>es</em>t</a> test"
|
73
|
+
end
|
74
|
+
|
75
|
+
it "should highlight around a link" do
|
76
|
+
@highlighter.hit_highlight("test <a>test</a> test", [[3, 11]]).should == "tes<em>t <a>test</a> t</em>est"
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
data/spec/regex_spec.rb
CHANGED
@@ -16,7 +16,7 @@ describe "Twitter::Regex regular expressions" do
|
|
16
16
|
|
17
17
|
describe "invalid URLS" do
|
18
18
|
it "does not link urls with invalid characters" do
|
19
|
-
TestUrls::INVALID.each {|url| url.should_not
|
19
|
+
TestUrls::INVALID.each {|url| url.should_not match_autolink_expression}
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
data/spec/test_urls.rb
CHANGED
@@ -7,7 +7,7 @@ module TestUrls
|
|
7
7
|
"http://twitter.com/#search?q=iphone%20-filter%3Alinks",
|
8
8
|
"http://www.boingboing.net/2007/02/14/katamari_damacy_phon.html",
|
9
9
|
"http://somehost.com:3000",
|
10
|
-
"http://
|
10
|
+
"http://xo.com/~matthew+%-x",
|
11
11
|
"http://en.wikipedia.org/wiki/Primer_(film)",
|
12
12
|
"http://www.ams.org/bookstore-getitem/item=mbk-59",
|
13
13
|
"http://chilp.it/?77e8fd",
|
@@ -16,14 +16,15 @@ module TestUrls
|
|
16
16
|
"http://tell.me/why",
|
17
17
|
"http://longtlds.info",
|
18
18
|
"http://✪df.ws/ejp",
|
19
|
-
"http://日本.com"
|
19
|
+
"http://日本.com",
|
20
|
+
"http://search.twitter.com/search?q=avro&lang=en",
|
21
|
+
"http://mrs.domain-dash.biz",
|
22
|
+
"http://x.com/has/one/char/domain",
|
20
23
|
]
|
21
24
|
|
22
25
|
INVALID = [
|
23
26
|
"http://no-tld",
|
24
27
|
"http://tld-too-short.x",
|
25
|
-
"http://x.com/,,,/.../@@@/;;;/:::/---/%%%x",
|
26
|
-
"http://domain-dash.com",
|
27
28
|
"http://-doman_dash.com"
|
28
29
|
]
|
29
30
|
|
metadata
CHANGED
@@ -1,32 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitter-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 1
|
7
|
+
- 1
|
8
|
+
- 1
|
9
|
+
version: 1.1.1
|
5
10
|
platform: ruby
|
6
11
|
authors:
|
7
12
|
- Matt Sanford
|
8
13
|
- Patrick Ewing
|
14
|
+
- Ben Cherry
|
15
|
+
- Britt Selvitelle
|
9
16
|
autorequire: ""
|
10
17
|
bindir: bin
|
11
18
|
cert_chain: []
|
12
19
|
|
13
|
-
date: 2010-
|
20
|
+
date: 2010-04-28 00:00:00 -07:00
|
14
21
|
default_executable:
|
15
22
|
dependencies:
|
16
23
|
- !ruby/object:Gem::Dependency
|
17
24
|
name: actionpack
|
18
|
-
|
19
|
-
|
20
|
-
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
prerelease: false
|
26
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
21
27
|
requirements:
|
22
28
|
- - ">="
|
23
29
|
- !ruby/object:Gem::Version
|
30
|
+
segments:
|
31
|
+
- 0
|
24
32
|
version: "0"
|
25
|
-
|
33
|
+
type: :runtime
|
34
|
+
version_requirements: *id001
|
26
35
|
description: A gem that provides text handling for Twitter
|
27
36
|
email:
|
28
37
|
- matt@twitter.com
|
29
38
|
- patrick.henry.ewing@gmail.com
|
39
|
+
- bcherry@gmail.com
|
40
|
+
- bs@brittspace.com
|
30
41
|
executables: []
|
31
42
|
|
32
43
|
extensions: []
|
@@ -40,12 +51,14 @@ files:
|
|
40
51
|
- TODO
|
41
52
|
- lib/autolink.rb
|
42
53
|
- lib/extractor.rb
|
54
|
+
- lib/hithighlighter.rb
|
43
55
|
- lib/regex.rb
|
44
56
|
- lib/twitter-text.rb
|
45
57
|
- lib/unicode.rb
|
46
58
|
- lib/validation.rb
|
47
59
|
- spec/autolinking_spec.rb
|
48
60
|
- spec/extractor_spec.rb
|
61
|
+
- spec/hithighlighter_spec.rb
|
49
62
|
- spec/regex_spec.rb
|
50
63
|
- spec/spec_helper.rb
|
51
64
|
- spec/test_urls.rb
|
@@ -64,18 +77,20 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
64
77
|
requirements:
|
65
78
|
- - ">="
|
66
79
|
- !ruby/object:Gem::Version
|
80
|
+
segments:
|
81
|
+
- 0
|
67
82
|
version: "0"
|
68
|
-
version:
|
69
83
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
70
84
|
requirements:
|
71
85
|
- - ">="
|
72
86
|
- !ruby/object:Gem::Version
|
87
|
+
segments:
|
88
|
+
- 0
|
73
89
|
version: "0"
|
74
|
-
version:
|
75
90
|
requirements: []
|
76
91
|
|
77
92
|
rubyforge_project:
|
78
|
-
rubygems_version: 1.3.
|
93
|
+
rubygems_version: 1.3.6
|
79
94
|
signing_key:
|
80
95
|
specification_version: 3
|
81
96
|
summary: Twitter text handling library
|