twitter-text-kow 1.3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,139 @@
1
+ # Copyright 2018 Twitter, Inc.
2
+ # Licensed under the Apache License, Version 2.0
3
+ # http://www.apache.org/licenses/LICENSE-2.0
4
+
5
+ $TESTING=true
6
+
7
+ # Ruby 1.8 encoding check
8
+ major, minor, patch = RUBY_VERSION.split('.')
9
+ if major.to_i == 1 && minor.to_i < 9
10
+ $KCODE='u'
11
+ end
12
+
13
+ $:.push File.join(File.dirname(__FILE__), '..', 'lib')
14
+
15
+ require 'nokogiri'
16
+ require 'json'
17
+ require 'simplecov'
18
+ SimpleCov.start do
19
+ add_group 'Libraries', 'lib'
20
+ end
21
+
22
+ require File.expand_path('../../lib/twitter-text', __FILE__)
23
+ require File.expand_path('../test_urls', __FILE__)
24
+
25
+ RSpec.configure do |config|
26
+ config.include TestUrls
27
+
28
+ config.filter_run_excluding :ruby => lambda { |version|
29
+ case version.to_s
30
+ when /^> (.*)/
31
+ !(RUBY_VERSION.to_s > $1)
32
+ else
33
+ !(RUBY_VERSION.to_s =~ /^#{version.to_s}/)
34
+ end
35
+ }
36
+ end
37
+
38
+ RSpec::Matchers.define :match_autolink_expression do
39
+ match do |string|
40
+ !Twitter::TwitterText::Extractor.extract_urls(string).empty?
41
+ end
42
+ end
43
+
44
+ RSpec::Matchers.define :match_autolink_expression_in do |text|
45
+ match do |url|
46
+ @match_data = Twitter::TwitterText::Regex[:valid_url].match(text)
47
+ @match_data && @match_data.to_s.strip == url
48
+ end
49
+
50
+ failure_message do |url|
51
+ "Expected to find url '#{url}' in text '#{text}', but the match was #{@match_data.captures}'"
52
+ end
53
+ end
54
+
55
+ RSpec::Matchers.define :have_autolinked_url do |url, inner_text|
56
+ match do |text|
57
+ @link = Nokogiri::HTML(text).search("a[@href='#{url}']")
58
+ @link &&
59
+ @link.inner_text &&
60
+ (inner_text && @link.inner_text == inner_text) || (!inner_text && @link.inner_text == url)
61
+ end
62
+
63
+ failure_message do |text|
64
+ "Expected url '#{url}'#{", inner_text '#{inner_text}'" if inner_text} to be autolinked in '#{text}'"
65
+ end
66
+ end
67
+
68
+ RSpec::Matchers.define :link_to_screen_name do |screen_name, inner_text|
69
+ expected = inner_text ? inner_text : screen_name
70
+
71
+ match do |text|
72
+ @link = Nokogiri::HTML(text).search("a.username")
73
+ return false unless @link && @link.inner_text == expected
74
+ expect("https://twitter.com/#{screen_name}").to eq(@link.first['href'])
75
+ end
76
+
77
+ failure_message do |text|
78
+ if @link.first
79
+ "Expected link '#{@link.inner_text}' with href '#{@link.first['href']}' to match screen_name '#{expected}', but it does not."
80
+ else
81
+ "Expected screen name '#{screen_name}' to be autolinked in '#{text}', but no link was found."
82
+ end
83
+ end
84
+
85
+ failure_message_when_negated do |text|
86
+ "Expected link '#{@link.inner_text}' with href '#{@link.first['href']}' not to match screen_name '#{expected}', but it does."
87
+ end
88
+
89
+ description do
90
+ "contain a link with the name and href pointing to the expected screen_name"
91
+ end
92
+ end
93
+
94
+ RSpec::Matchers.define :link_to_list_path do |list_path, inner_text|
95
+ expected = inner_text ? inner_text : list_path
96
+
97
+ match do |text|
98
+ @link = Nokogiri::HTML(text).search("a.list-slug")
99
+ return false unless @link && @link.inner_text == expected
100
+ expect("https://twitter.com/#{list_path}".downcase).to eq(@link.first['href'])
101
+ end
102
+
103
+ failure_message do |text|
104
+ if @link.first
105
+ "Expected link '#{@link.inner_text}' with href '#{@link.first['href']}' to match the list path '#{expected}', but it does not."
106
+ else
107
+ "Expected list path '#{list_path}' to be autolinked in '#{text}', but no link was found."
108
+ end
109
+ end
110
+
111
+ failure_message_when_negated do |text|
112
+ "Expected link '#{@link.inner_text}' with href '#{@link.first['href']}' not to match the list path '#{expected}', but it does."
113
+ end
114
+
115
+ description do
116
+ "contain a link with the list title and an href pointing to the list path"
117
+ end
118
+ end
119
+
120
+ RSpec::Matchers.define :have_autolinked_hashtag do |hashtag|
121
+ match do |text|
122
+ @link = Nokogiri::HTML(text).search("a[@href='https://twitter.com/search?q=#{hashtag.sub(/^#/, '%23')}']")
123
+ @link &&
124
+ @link.inner_text &&
125
+ @link.inner_text == hashtag
126
+ end
127
+
128
+ failure_message do |text|
129
+ if @link.first
130
+ "Expected link text to be [#{hashtag}], but it was [#{@link.inner_text}] in #{text}"
131
+ else
132
+ "Expected hashtag #{hashtag} to be autolinked in '#{text}', but no link was found."
133
+ end
134
+ end
135
+
136
+ failure_message_when_negated do |text|
137
+ "Expected link '#{@link.inner_text}' with href '#{@link.first['href']}' not to match the hashtag '#{hashtag}', but it does."
138
+ end
139
+ end
data/spec/test_urls.rb ADDED
@@ -0,0 +1,90 @@
1
+ # Copyright 2018 Twitter, Inc.
2
+ # Licensed under the Apache License, Version 2.0
3
+ # http://www.apache.org/licenses/LICENSE-2.0
4
+
5
+ # encoding: utf-8
6
+
7
+ module TestUrls
8
+ VALID = [
9
+ "http://google.com",
10
+ "http://foobar.com/#",
11
+ "http://google.com/#foo",
12
+ "http://google.com/#search?q=iphone%20-filter%3Alinks",
13
+ "http://twitter.com/#search?q=iphone%20-filter%3Alinks",
14
+ "http://somedomain.com/index.php?path=/abc/def/",
15
+ "http://www.boingboing.net/2007/02/14/katamari_damacy_phon.html",
16
+ "http://somehost.com:3000",
17
+ "http://a_b.c-d.com",
18
+ "http://sub_domain-dash.twitter.com",
19
+ "http://xo.com/~matthew+%-x",
20
+ "http://en.wikipedia.org/wiki/Primer_(film)",
21
+ "http://www.ams.org/bookstore-getitem/item=mbk-59",
22
+ "http://chilp.it/?77e8fd",
23
+ "http://tell.me/why",
24
+ "http://longtlds.info",
25
+ "http://✪df.ws/ejp",
26
+ "http://日本.com",
27
+ "http://search.twitter.com/search?q=avro&lang=en",
28
+ "http://mrs.domain-dash.biz",
29
+ "http://x.com/has/one/char/domain",
30
+ "http://t.co/nwcLTFF",
31
+ "http://a.b.cd",
32
+ "http://a-b.b.com",
33
+ "http://twitter-dash.com",
34
+ "http://msdn.microsoft.com/ja-jp/library/system.net.httpwebrequest(v=VS.100).aspx",
35
+ "www.foobar.com",
36
+ "WWW.FOOBAR.COM",
37
+ "www.foobar.co.jp",
38
+ "http://t.co",
39
+ "t.co/nwcLTFF",
40
+ "http://foobar.みんな",
41
+ "http://foobar.中国",
42
+ "http://foobar.پاکستان",
43
+ "https://www.youtube.com/playlist?list=PL0ZPu8XSRTB7wZzn0mLHMvyzVFeRxbWn-",
44
+ "http://ああ.com"
45
+ ] unless defined?(TestUrls::VALID)
46
+
47
+ INVALID = [
48
+ "http://no-tld",
49
+ "http://tld-too-short.x",
50
+ "http://-doman_dash.com",
51
+ "http://_leadingunderscore.twitter.com",
52
+ "http://trailingunderscore_.twitter.com",
53
+ "http://-leadingdash.twitter.com",
54
+ "http://trailingdash-.twitter.com",
55
+ "http://-leadingdash.com",
56
+ "http://trailingdash-.com",
57
+ "http://no_underscores.com",
58
+ "http://test.c_o_m",
59
+ "http://test.c-o-m",
60
+ "http://twitt#{[0x202A].pack('U')}.com",
61
+ "http://twitt#{[0x202B].pack('U')}.com",
62
+ "http://twitt#{[0x202C].pack('U')}.com",
63
+ "http://twitt#{[0x202D].pack('U')}.com",
64
+ "http://twitt#{[0x202E].pack('U')}.com",
65
+ "https://somesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurl.com/foo https://somesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurl.com/foo https://somesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurl.com/foo"
66
+ ] unless defined?(TestUrls::INVALID)
67
+
68
+ TCO = [
69
+ "http://t.co/P53cv5yO!",
70
+ "http://t.co/fQJmiPGg***",
71
+ "http://t.co/pbY2NfTZ's",
72
+ "http://t.co/2vYHpAc5;",
73
+ "http://t.co/ulYGBYSo:",
74
+ "http://t.co/GeT4bSiw=win",
75
+ "http://t.co/8MkmHU0k+fun",
76
+ "http://t.co/TKLp64dY.yes,",
77
+ "http://t.co/8vuO27cI$$",
78
+ "http://t.co/rPYTvdA8/",
79
+ "http://t.co/WvtMw5ku%",
80
+ "http://t.co/8t7G3ddS#",
81
+ "http://t.co/nfHNJDV2/#!",
82
+ "http://t.co/gK6NOXHs[good]",
83
+ "http://t.co/dMrT0o1Y]bad",
84
+ "http://t.co/FNkPfmii-",
85
+ "http://t.co/sMgS3pjI_oh",
86
+ "http://t.co/F8Dq3Plb~",
87
+ "http://t.co/ivvH58vC&help",
88
+ "http://t.co/iUBL15zD|NZ5KYLQ8"
89
+ ] unless defined?(TestUrls::TCO)
90
+ end
@@ -0,0 +1,25 @@
1
+ # Copyright 2018 Twitter, Inc.
2
+ # Licensed under the Apache License, Version 2.0
3
+ # http://www.apache.org/licenses/LICENSE-2.0
4
+
5
+ # encoding: utf-8
6
+ require File.dirname(__FILE__) + '/spec_helper'
7
+
8
+ major, minor, patch = RUBY_VERSION.split('.')
9
+ if major.to_i == 1 && minor.to_i < 9
10
+ describe "base" do
11
+ before do
12
+ $KCODE = 'NONE'
13
+ end
14
+
15
+ after do
16
+ $KCODE = 'u'
17
+ end
18
+
19
+ it "should raise with invalid KCODE on Ruby < 1.9" do
20
+ expect(lambda do
21
+ require 'twitter-text'
22
+ end).to raise_error
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,35 @@
1
+ # Copyright 2018 Twitter, Inc.
2
+ # Licensed under the Apache License, Version 2.0
3
+ # http://www.apache.org/licenses/LICENSE-2.0
4
+
5
+ # encoding: utf-8
6
+ require File.dirname(__FILE__) + '/spec_helper'
7
+
8
+ describe Twitter::TwitterText::Unicode do
9
+
10
+ it "should lazy-init constants" do
11
+ expect(Twitter::TwitterText::Unicode.const_defined?(:UFEB6)).to eq(false)
12
+ expect(Twitter::TwitterText::Unicode::UFEB6).to_not be_nil
13
+ expect(Twitter::TwitterText::Unicode::UFEB6).to be_kind_of(String)
14
+ expect(Twitter::TwitterText::Unicode.const_defined?(:UFEB6)).to eq(true)
15
+ end
16
+
17
+ it "should return corresponding character" do
18
+ expect(Twitter::TwitterText::Unicode::UFEB6).to be == [0xfeb6].pack('U')
19
+ end
20
+
21
+ it "should allow lowercase notation" do
22
+ expect(Twitter::TwitterText::Unicode::Ufeb6).to be == Twitter::TwitterText::Unicode::UFEB6
23
+ expect(Twitter::TwitterText::Unicode::Ufeb6).to be === Twitter::TwitterText::Unicode::UFEB6
24
+ end
25
+
26
+ it "should allow underscore notation" do
27
+ expect(Twitter::TwitterText::Unicode::U_FEB6).to be == Twitter::TwitterText::Unicode::UFEB6
28
+ expect(Twitter::TwitterText::Unicode::U_FEB6).to be === Twitter::TwitterText::Unicode::UFEB6
29
+ end
30
+
31
+ it "should raise on invalid codepoints" do
32
+ expect(lambda { Twitter::TwitterText::Unicode::FFFFFF }).to raise_error(NameError)
33
+ end
34
+
35
+ end
@@ -0,0 +1,87 @@
1
+ # Copyright 2018 Twitter, Inc.
2
+ # Licensed under the Apache License, Version 2.0
3
+ # http://www.apache.org/licenses/LICENSE-2.0
4
+
5
+ # encoding: utf-8
6
+ require File.dirname(__FILE__) + '/spec_helper'
7
+
8
+ class TestValidation
9
+ include Twitter::TwitterText::Validation
10
+ end
11
+
12
+ describe Twitter::TwitterText::Validation do
13
+
14
+ it "should disallow invalid BOM character" do
15
+ expect(TestValidation.new.tweet_invalid?("Bom:#{Twitter::TwitterText::Unicode::UFFFE}")).to be == :invalid_characters
16
+ expect(TestValidation.new.tweet_invalid?("Bom:#{Twitter::TwitterText::Unicode::UFEFF}")).to be == :invalid_characters
17
+ end
18
+
19
+ it "should disallow invalid U+FFFF character" do
20
+ expect(TestValidation.new.tweet_invalid?("Bom:#{Twitter::TwitterText::Unicode::UFFFF}")).to be == :invalid_characters
21
+ end
22
+
23
+ it "should allow direction change characters" do
24
+ [0x202A, 0x202B, 0x202C, 0x202D, 0x202E].map{|cp| [cp].pack('U') }.each do |char|
25
+ expect(TestValidation.new.tweet_invalid?("Invalid:#{char}")).to be false
26
+ end
27
+ end
28
+
29
+ it "should disallow non-Unicode" do
30
+ expect(TestValidation.new.tweet_invalid?("not-Unicode:\xfff0")).to be == :invalid_characters
31
+ end
32
+
33
+ it "should allow <= 140 combined accent characters" do
34
+ char = [0x65, 0x0301].pack('U')
35
+ expect(TestValidation.new.tweet_invalid?(char * 139)).to be false
36
+ expect(TestValidation.new.tweet_invalid?(char * 140)).to be false
37
+ expect(TestValidation.new.tweet_invalid?(char * 141)).to eq(:too_long)
38
+ end
39
+
40
+ it "should allow <= 140 multi-byte characters" do
41
+ char = [ 0x1d106 ].pack('U')
42
+ expect(TestValidation.new.tweet_invalid?(char * 139)).to be false
43
+ expect(TestValidation.new.tweet_invalid?(char * 140)).to be false
44
+ expect(TestValidation.new.tweet_invalid?(char * 141)).to eq(:too_long)
45
+ end
46
+
47
+ context "when returning results" do
48
+ it "should properly create new fully-populated results from arguments" do
49
+ results = Twitter::TwitterText::Validation::ParseResults.new(weighted_length: 26, permillage: 92, valid: true, display_range_start: 0, display_range_end: 16, valid_range_start: 0, valid_range_end:16)
50
+ expect(results).to_not be nil
51
+ expect(results[:weighted_length]).to eq(26)
52
+ expect(results[:permillage]).to eq(92)
53
+ expect(results[:valid]).to be true
54
+ expect(results[:display_range_start]).to eq(0)
55
+ expect(results[:display_range_end]).to eq(16)
56
+ expect(results[:valid_range_start]).to eq(0)
57
+ expect(results[:valid_range_end]).to eq(16)
58
+ end
59
+
60
+ it "should properly create empty results" do
61
+ results = Twitter::TwitterText::Validation::ParseResults.empty()
62
+ expect(results[:weighted_length]).to eq(0)
63
+ expect(results[:permillage]).to eq(0)
64
+ expect(results[:valid]).to be true
65
+ expect(results[:display_range_start]).to eq(0)
66
+ expect(results[:display_range_end]).to eq(0)
67
+ expect(results[:valid_range_start]).to eq(0)
68
+ expect(results[:valid_range_end]).to eq(0)
69
+ end
70
+ end
71
+
72
+ context "when parsing tweet text" do
73
+ it "should properly parse ZWJ and ZWNJ when grapheme clusters are enabled", ruby: ">= 2.5.0" do
74
+ # Grapheme clustering of devenghali script differs based on platform implementation
75
+ text = "ZWJ: क्ष -> क्\u200Dष; ZWNJ: क्ष -> क्\u200Cष"
76
+ config = Twitter::TwitterText::Configuration::configuration_from_file(Twitter::TwitterText::Configuration::CONFIG_V3)
77
+ results = Twitter::TwitterText::Validation::parse_tweet(text, config: config)
78
+ expect(results[:weighted_length]).to eq(29)
79
+ expect(results[:permillage]).to eq(103)
80
+ expect(results[:valid]).to be true
81
+ expect(results[:display_range_start]).to eq(0)
82
+ expect(results[:display_range_end]).to eq(34)
83
+ expect(results[:valid_range_start]).to eq(0)
84
+ expect(results[:valid_range_end]).to eq(34)
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,242 @@
1
+ # Copyright 2018 Twitter, Inc.
2
+ # Licensed under the Apache License, Version 2.0
3
+ # http://www.apache.org/licenses/LICENSE-2.0
4
+
5
+ require 'multi_json'
6
+ require 'nokogiri'
7
+ require 'test/unit'
8
+ require 'yaml'
9
+
10
+ # Detect Ruby 1.8 and older to apply necessary encoding fixes
11
+ major, minor, patch = RUBY_VERSION.split('.')
12
+ OLD_RUBY = major.to_i == 1 && minor.to_i < 9
13
+
14
+ if OLD_RUBY
15
+ $KCODE='u'
16
+ end
17
+
18
+ $:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
19
+ require 'twitter-text'
20
+
21
+ class ConformanceTest < Test::Unit::TestCase
22
+ include Twitter::TwitterText::Extractor
23
+ include Twitter::TwitterText::Autolink
24
+ include Twitter::TwitterText::HitHighlighter
25
+ include Twitter::TwitterText::Validation
26
+
27
+ private
28
+
29
+ %w(description expected json hits).each do |key|
30
+ define_method key.to_sym do
31
+ @test_info[key]
32
+ end
33
+ end
34
+
35
+ if OLD_RUBY
36
+ def text
37
+ @test_info['text'].gsub(/\\u([0-9a-f]{8})/i) do
38
+ [$1.to_i(16)].pack('U*')
39
+ end
40
+ end
41
+ else
42
+ def text
43
+ @test_info['text']
44
+ end
45
+ end
46
+
47
+ def assert_equal_without_attribute_order(expected, actual, failure_message = nil)
48
+ assert_block(build_message(failure_message, "<?> expected but was\n<?>", expected, actual)) do
49
+ equal_nodes?(Nokogiri::HTML(expected).root, Nokogiri::HTML(actual).root)
50
+ end
51
+ end
52
+
53
+ def equal_nodes?(expected, actual)
54
+ return false unless expected.name == actual.name
55
+ return false unless ordered_attributes(expected) == ordered_attributes(actual)
56
+ return false if expected.text? && actual.text? && expected.content != actual.content
57
+
58
+ expected.children.each_with_index do |child, index|
59
+ return false unless equal_nodes?(child, actual.children[index])
60
+ end
61
+
62
+ true
63
+ end
64
+
65
+ def ordered_attributes(element)
66
+ element.attribute_nodes.map{|attr| [attr.name, attr.value]}.sort
67
+ end
68
+
69
+ def assert_equal_parse_results(expected, actual, failure_message = nil)
70
+ e = {}
71
+ # Note that we don't assert display and valid ranges because of differences
72
+ # in how ruby counts characters (wrt surrogate pairs) vs. other platforms
73
+ range_keys = [:display_range_start, :display_range_end, :valid_range_start, :valid_range_end]
74
+ expected.keys.each { |k| e[k.gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').gsub(/([a-z\d])([A-Z])/,'\1_\2').tr("-", "_").downcase.to_sym] = expected[k] }
75
+ [e, actual].each { |a| range_keys.each { |k| a.delete(k) } }
76
+ assert_equal e, actual, failure_message
77
+ end
78
+
79
+ CONFORMANCE_DIR = ENV['CONFORMANCE_DIR'] || File.expand_path("../../../conformance", __FILE__)
80
+
81
+ def self.def_conformance_test(file, test_type, &block)
82
+ yaml = YAML.load_file(File.join(CONFORMANCE_DIR, file))
83
+ raise "No such test suite: #{test_type.to_s}" unless yaml["tests"][test_type.to_s]
84
+
85
+ file_name = file.split('.').first
86
+
87
+ yaml["tests"][test_type.to_s].each do |test_info|
88
+ name = :"test_#{file_name}_#{test_type} #{test_info['description']}"
89
+ define_method name do
90
+ @test_info = test_info
91
+ instance_eval(&block)
92
+ end
93
+ end
94
+ end
95
+
96
+ public
97
+
98
+ # Extractor Conformance
99
+ def_conformance_test("extract.yml", :replies) do
100
+ assert_equal expected, extract_reply_screen_name(text), description
101
+ end
102
+
103
+ def_conformance_test("extract.yml", :mentions) do
104
+ assert_equal expected, extract_mentioned_screen_names(text), description
105
+ end
106
+
107
+ def_conformance_test("extract.yml", :mentions_with_indices) do
108
+ e = expected.map{|elem| elem.inject({}){|h, (k,v)| h[k.to_sym] = v; h} }
109
+ assert_equal e, extract_mentioned_screen_names_with_indices(text), description
110
+ end
111
+
112
+ def_conformance_test("extract.yml", :mentions_or_lists_with_indices) do
113
+ e = expected.map{|elem| elem.inject({}){|h, (k,v)| h[k.to_sym] = v; h} }
114
+ assert_equal e, extract_mentions_or_lists_with_indices(text), description
115
+ end
116
+
117
+ def_conformance_test("extract.yml", :urls) do
118
+ assert_equal expected, extract_urls(text), description
119
+ expected.each do |expected_url|
120
+ assert_equal true, valid_url?(expected_url, true, false), "expected url [#{expected_url}] not valid"
121
+ end
122
+ end
123
+
124
+ def_conformance_test("tlds.yml", :generic) do
125
+ assert_equal expected, extract_urls(text), description
126
+ end
127
+
128
+ def_conformance_test("tlds.yml", :country) do
129
+ assert_equal expected, extract_urls(text), description
130
+ end
131
+
132
+ def_conformance_test("extract.yml", :urls_with_indices) do
133
+ e = expected.map{|elem| elem.inject({}){|h, (k,v)| h[k.to_sym] = v; h} }
134
+ assert_equal e, extract_urls_with_indices(text), description
135
+ end
136
+
137
+ def_conformance_test("extract.yml", :urls_with_directional_markers) do
138
+ e = expected.map{|elem| elem.inject({}){|h, (k,v)| h[k.to_sym] = v; h} }
139
+ assert_equal e, extract_urls_with_indices(text), description
140
+ end
141
+
142
+ def_conformance_test("extract.yml", :hashtags) do
143
+ assert_equal expected, extract_hashtags(text), description
144
+ end
145
+
146
+ def_conformance_test("extract.yml", :hashtags_from_astral) do
147
+ assert_equal expected, extract_hashtags(text), description
148
+ end
149
+
150
+ def_conformance_test("extract.yml", :hashtags_with_indices) do
151
+ e = expected.map{|elem| elem.inject({}){|h, (k,v)| h[k.to_sym] = v; h} }
152
+ assert_equal e, extract_hashtags_with_indices(text), description
153
+ end
154
+
155
+ def_conformance_test("extract.yml", :cashtags) do
156
+ assert_equal expected, extract_cashtags(text), description
157
+ end
158
+
159
+ def_conformance_test("extract.yml", :cashtags_with_indices) do
160
+ e = expected.map{|elem| elem.inject({}){|h, (k,v)| h[k.to_sym] = v; h} }
161
+ assert_equal e, extract_cashtags_with_indices(text), description
162
+ end
163
+
164
+ # Autolink Conformance
165
+ def_conformance_test("autolink.yml", :usernames) do
166
+ assert_equal_without_attribute_order expected, auto_link_usernames_or_lists(text, :suppress_no_follow => true), description
167
+ end
168
+
169
+ def_conformance_test("autolink.yml", :lists) do
170
+ assert_equal_without_attribute_order expected, auto_link_usernames_or_lists(text, :suppress_no_follow => true), description
171
+ end
172
+
173
+ def_conformance_test("autolink.yml", :urls) do
174
+ assert_equal_without_attribute_order expected, auto_link_urls(text, :suppress_no_follow => true), description
175
+ end
176
+
177
+ def_conformance_test("autolink.yml", :hashtags) do
178
+ assert_equal_without_attribute_order expected, auto_link_hashtags(text, :suppress_no_follow => true), description
179
+ end
180
+
181
+ def_conformance_test("autolink.yml", :cashtags) do
182
+ assert_equal_without_attribute_order expected, auto_link_cashtags(text, :suppress_no_follow => true), description
183
+ end
184
+
185
+ def_conformance_test("autolink.yml", :all) do
186
+ assert_equal_without_attribute_order expected, auto_link(text, :suppress_no_follow => true), description
187
+ end
188
+
189
+ def_conformance_test("autolink.yml", :json) do
190
+ assert_equal_without_attribute_order expected, auto_link_with_json(text, MultiJson.load(json), :suppress_no_follow => true), description
191
+ end
192
+
193
+ # HitHighlighter Conformance
194
+ def_conformance_test("hit_highlighting.yml", :plain_text) do
195
+ assert_equal expected, hit_highlight(text, hits), description
196
+ end
197
+
198
+ def_conformance_test("hit_highlighting.yml", :with_links) do
199
+ assert_equal expected, hit_highlight(text, hits), description
200
+ end
201
+
202
+ # Validation Conformance
203
+ def_conformance_test("validate.yml", :tweets) do
204
+ assert_equal expected, parse_tweet(text)[:valid], description
205
+ end
206
+
207
+ def_conformance_test("validate.yml", :usernames) do
208
+ assert_equal expected, valid_username?(text), description
209
+ end
210
+
211
+ def_conformance_test("validate.yml", :lists) do
212
+ assert_equal expected, valid_list?(text), description
213
+ end
214
+
215
+ def_conformance_test("validate.yml", :urls) do
216
+ assert_equal expected, valid_url?(text), description
217
+ end
218
+
219
+ def_conformance_test("validate.yml", :urls_without_protocol) do
220
+ assert_equal expected, valid_url?(text, true, false), description
221
+ end
222
+
223
+ def_conformance_test("validate.yml", :hashtags) do
224
+ assert_equal expected, valid_hashtag?(text), description
225
+ end
226
+
227
+ def_conformance_test("validate.yml", :WeightedTweetsCounterTest) do
228
+ # Force v2 configuration, basic weighted code point support
229
+ config = Twitter::TwitterText::Configuration::configuration_from_file(Twitter::TwitterText::Configuration::CONFIG_V2)
230
+ assert_equal_parse_results expected, parse_tweet(text, config: config), description
231
+ end
232
+
233
+ def_conformance_test("validate.yml", :WeightedTweetsWithDiscountedEmojiCounterTest) do
234
+ # Force v3 configuration, which supports discounting grapheme clusters that are emoji
235
+ config = Twitter::TwitterText::Configuration::configuration_from_file(Twitter::TwitterText::Configuration::CONFIG_V3)
236
+ assert_equal_parse_results expected, parse_tweet(text, config: config), description
237
+ end
238
+
239
+ def_conformance_test("validate.yml", :UnicodeDirectionalMarkerCounterTest) do
240
+ assert_equal_parse_results expected, parse_tweet(text), description
241
+ end
242
+ end
@@ -0,0 +1,35 @@
1
+ # Copyright 2018 Twitter, Inc.
2
+ # Licensed under the Apache License, Version 2.0
3
+ # http://www.apache.org/licenses/LICENSE-2.0
4
+
5
+ # encoding: utf-8
6
+
7
+ Gem::Specification.new do |s|
8
+ s.name = "twitter-text-kow"
9
+ s.version = "1.3.1.0"
10
+ s.authors = ["David LaMacchia", "Sudheer Guntupalli", "Kaushik Lakshmikanth", "Jose Antonio Marquez Russo", "Lee Adams",
11
+ "Yoshimasa Niwa"]
12
+ s.email = ["opensource@twitter.com"]
13
+ s.homepage = "http://twitter.com"
14
+ s.description = s.summary = "A KOW fixed gem that provides text handling for Twitter"
15
+ s.license = "Apache 2.0"
16
+
17
+ s.platform = Gem::Platform::RUBY
18
+ s.summary = "Twitter text handling library"
19
+
20
+ s.add_development_dependency "test-unit"
21
+ s.add_development_dependency "multi_json", "~> 1.3"
22
+ s.add_development_dependency "nokogiri", "~> 1.17.2"
23
+ s.add_development_dependency "rake"
24
+ s.add_development_dependency "rdoc"
25
+ s.add_development_dependency "rspec", "~> 3.0"
26
+ s.add_development_dependency "simplecov"
27
+ s.add_runtime_dependency "unf", "~> 0.1.0"
28
+ # Use of idn-ruby requires libidn to be installed separately
29
+ s.add_runtime_dependency "idn-ruby"
30
+
31
+ s.files = `git ls-files`.split("\n") + ['lib/assets/tld_lib.yml'] + Dir['config/*']
32
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
33
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
34
+ s.require_paths = ["lib"]
35
+ end