twitter-text 1.9.0 → 1.9.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/twitter-text/extractor.rb +5 -4
- data/lib/twitter-text/regex.rb +39 -24
- data/twitter-text.gemspec +3 -3
- metadata +135 -107
- checksums.yaml +0 -7
@@ -201,15 +201,17 @@ module Twitter
|
|
201
201
|
if !protocol
|
202
202
|
next if !options[:extract_url_without_protocol] || before =~ Twitter::Regex[:invalid_url_without_protocol_preceding_chars]
|
203
203
|
last_url = nil
|
204
|
-
last_url_invalid_match = nil
|
205
204
|
domain.scan(Twitter::Regex[:valid_ascii_domain]) do |ascii_domain|
|
206
205
|
last_url = {
|
207
206
|
:url => ascii_domain,
|
208
207
|
:indices => [start_position + $~.char_begin(0),
|
209
208
|
start_position + $~.char_end(0)]
|
210
209
|
}
|
211
|
-
|
212
|
-
|
210
|
+
if path ||
|
211
|
+
ascii_domain =~ Twitter::Regex[:valid_special_short_domain] ||
|
212
|
+
ascii_domain !~ Twitter::Regex[:invalid_short_domain]
|
213
|
+
urls << last_url
|
214
|
+
end
|
213
215
|
end
|
214
216
|
|
215
217
|
# no ASCII-only domain found. Skip the entire URL
|
@@ -218,7 +220,6 @@ module Twitter
|
|
218
220
|
# last_url only contains domain. Need to add path and query if they exist.
|
219
221
|
if path
|
220
222
|
# last_url was not added. Add it to urls here.
|
221
|
-
urls << last_url if last_url_invalid_match
|
222
223
|
last_url[:url] = url.sub(domain, last_url[:url])
|
223
224
|
last_url[:indices][1] = end_position
|
224
225
|
end
|
data/lib/twitter-text/regex.rb
CHANGED
@@ -189,39 +189,53 @@ module Twitter
|
|
189
189
|
|
190
190
|
REGEXEN[:valid_gTLD] = %r{
|
191
191
|
(?:
|
192
|
-
(?:
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
192
|
+
(?:
|
193
|
+
academy|accountants|actor|aero|agency|airforce|archi|arpa|asia|associates|axa|bar|bargains|bayern|berlin|best|
|
194
|
+
bid|bike|biz|black|blackfriday|blue|boutique|build|builders|buzz|cab|camera|camp|capital|cards|care|career|
|
195
|
+
careers|cash|cat|catering|center|ceo|cheap|christmas|citic|claims|cleaning|clinic|clothing|club|codes|coffee|
|
196
|
+
college|cologne|com|community|company|computer|construction|contractors|cooking|cool|coop|country|credit|
|
197
|
+
creditcard|cruises|dance|dating|democrat|dental|desi|diamonds|digital|directory|discount|domains|edu|education|
|
198
|
+
email|engineering|enterprises|equipment|estate|eus|events|exchange|expert|exposed|fail|farm|feedback|finance|
|
199
|
+
financial|fish|fishing|fitness|flights|florist|foo|foundation|frogans|fund|furniture|futbol|gal|gallery|gift|
|
200
|
+
glass|globo|gmo|gop|gov|graphics|gratis|gripe|guitars|guru|haus|holdings|holiday|horse|house|immobilien|
|
201
|
+
industries|info|institute|insure|int|international|investments|jetzt|jobs|kaufen|kim|kitchen|kiwi|koeln|kred|
|
202
|
+
land|lease|lighting|limited|limo|link|london|luxury|management|mango|marketing|media|meet|menu|miami|mil|mobi|
|
203
|
+
moda|moe|monash|moscow|museum|nagoya|name|net|neustar|ninja|nyc|okinawa|onl|org|paris|partners|parts|photo|
|
204
|
+
photography|photos|pics|pictures|pink|plumbing|post|pro|productions|properties|pub|qpon|quebec|recipes|red|
|
205
|
+
reisen|ren|rentals|repair|report|rest|reviews|rich|rocks|rodeo|ruhr|ryukyu|saarland|schule|services|sexy|
|
206
|
+
shiksha|shoes|singles|social|sohu|solar|solutions|soy|supplies|supply|support|surgery|systems|tattoo|tax|
|
207
|
+
technology|tel|tienda|tips|today|tokyo|tools|town|toys|trade|training|travel|university|uno|vacations|vegas|
|
208
|
+
ventures|viajes|villas|vision|vodka|vote|voting|voto|voyage|wang|watch|webcam|wed|wien|wiki|works|wtc|wtf|xxx|
|
209
|
+
xyz|yokohama|zone|дети|москва|онлайн|орг|сайт|بازار|شبكة|संगठन|みんな|世界|中信|中文网|公司|公益|商城|在线|我爱你|政务|机构|游戏|移动|组织机构|
|
210
|
+
网址|网络|集团|삼성
|
211
|
+
)(?=[^0-9a-z@]|$)
|
205
212
|
)
|
206
213
|
}ix
|
207
214
|
|
208
215
|
REGEXEN[:valid_ccTLD] = %r{
|
209
216
|
(?:
|
210
|
-
(?:
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
ye|yt|za|zm|zw
|
218
|
-
|
219
|
-
|
220
|
-
(?=[^0-9a-z@]|$)
|
217
|
+
(?:
|
218
|
+
ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bl|bm|bn|bo|bq|br|bs|bt|bv|bw|
|
219
|
+
by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cu|cv|cw|cx|cy|cz|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|
|
220
|
+
fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|
|
221
|
+
is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mf|mg|mh|mk|ml|
|
222
|
+
mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|
|
223
|
+
ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|
|
224
|
+
tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|za|zm|zw|мкд|мон|рф|
|
225
|
+
срб|укр|қаз|الاردن|الجزائر|السعودية|المغرب|امارات|ایران|بھارت|تونس|سودان|سورية|عمان|فلسطين|قطر|مصر|مليسيا|
|
226
|
+
پاکستان|भारत|বাংলা|ভারত|ਭਾਰਤ|ભારત|இந்தியா|இலங்கை|சிங்கப்பூர்|భారత్|ලංකා|ไทย|გე|中国|中國|台湾|台灣|新加坡|香港|한국
|
227
|
+
)(?=[^0-9a-z@]|$)
|
221
228
|
)
|
222
229
|
}ix
|
223
230
|
REGEXEN[:valid_punycode] = /(?:xn--[0-9a-z]+)/i
|
224
231
|
|
232
|
+
REGEXEN[:valid_special_cctld] = %r{
|
233
|
+
(?:
|
234
|
+
(?:co|tv)
|
235
|
+
(?=[^0-9a-z@]|$)
|
236
|
+
)
|
237
|
+
}ix
|
238
|
+
|
225
239
|
REGEXEN[:valid_domain] = /(?:
|
226
240
|
#{REGEXEN[:valid_subdomain]}*#{REGEXEN[:valid_domain_name]}
|
227
241
|
(?:#{REGEXEN[:valid_gTLD]}|#{REGEXEN[:valid_ccTLD]}|#{REGEXEN[:valid_punycode]})
|
@@ -238,6 +252,7 @@ module Twitter
|
|
238
252
|
|
239
253
|
# This is used in Extractor to filter out unwanted URLs.
|
240
254
|
REGEXEN[:invalid_short_domain] = /\A#{REGEXEN[:valid_domain_name]}#{REGEXEN[:valid_ccTLD]}\Z/io
|
255
|
+
REGEXEN[:valid_special_short_domain] = /\A#{REGEXEN[:valid_domain_name]}#{REGEXEN[:valid_special_cctld]}\Z/io
|
241
256
|
|
242
257
|
REGEXEN[:valid_port_number] = /[0-9]+/
|
243
258
|
|
data/twitter-text.gemspec
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = "twitter-text"
|
5
|
-
s.version = "1.9.
|
5
|
+
s.version = "1.9.1"
|
6
6
|
s.authors = ["Matt Sanford", "Patrick Ewing", "Ben Cherry", "Britt Selvitelle",
|
7
7
|
"Raffi Krikorian", "J.P. Cummins", "Yoshimasa Niwa", "Keita Fujii", "James Koval"]
|
8
8
|
s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com", "bcherry@gmail.com", "bs@brittspace.com",
|
@@ -19,8 +19,8 @@ Gem::Specification.new do |s|
|
|
19
19
|
s.add_development_dependency "nokogiri", "~> 1.5.10"
|
20
20
|
s.add_development_dependency "rake"
|
21
21
|
s.add_development_dependency "rdoc"
|
22
|
-
s.add_development_dependency "rspec"
|
23
|
-
s.add_development_dependency "simplecov"
|
22
|
+
s.add_development_dependency "rspec", "~> 2.14.0"
|
23
|
+
s.add_development_dependency "simplecov", "~> 0.8.0"
|
24
24
|
s.add_runtime_dependency "unf", "~> 0.1.0"
|
25
25
|
|
26
26
|
s.files = `git ls-files`.split("\n")
|
metadata
CHANGED
@@ -1,9 +1,15 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitter-text
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 49
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 1
|
8
|
+
- 9
|
9
|
+
- 1
|
10
|
+
version: 1.9.1
|
5
11
|
platform: ruby
|
6
|
-
authors:
|
12
|
+
authors:
|
7
13
|
- Matt Sanford
|
8
14
|
- Patrick Ewing
|
9
15
|
- Ben Cherry
|
@@ -16,108 +22,118 @@ authors:
|
|
16
22
|
autorequire:
|
17
23
|
bindir: bin
|
18
24
|
cert_chain: []
|
19
|
-
|
20
|
-
|
21
|
-
|
25
|
+
|
26
|
+
date: 2014-07-18 00:00:00 Z
|
27
|
+
dependencies:
|
28
|
+
- !ruby/object:Gem::Dependency
|
22
29
|
name: multi_json
|
23
|
-
requirement: !ruby/object:Gem::Requirement
|
24
|
-
requirements:
|
25
|
-
- - "~>"
|
26
|
-
- !ruby/object:Gem::Version
|
27
|
-
version: '1.3'
|
28
|
-
type: :development
|
29
30
|
prerelease: false
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
- !ruby/object:Gem::Version
|
41
|
-
version: 1.5.10
|
31
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
32
|
+
none: false
|
33
|
+
requirements:
|
34
|
+
- - ~>
|
35
|
+
- !ruby/object:Gem::Version
|
36
|
+
hash: 9
|
37
|
+
segments:
|
38
|
+
- 1
|
39
|
+
- 3
|
40
|
+
version: "1.3"
|
42
41
|
type: :development
|
42
|
+
version_requirements: *id001
|
43
|
+
- !ruby/object:Gem::Dependency
|
44
|
+
name: nokogiri
|
43
45
|
prerelease: false
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
46
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
47
|
+
none: false
|
48
|
+
requirements:
|
49
|
+
- - ~>
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
hash: 23
|
52
|
+
segments:
|
53
|
+
- 1
|
54
|
+
- 5
|
55
|
+
- 10
|
48
56
|
version: 1.5.10
|
49
|
-
- !ruby/object:Gem::Dependency
|
50
|
-
name: rake
|
51
|
-
requirement: !ruby/object:Gem::Requirement
|
52
|
-
requirements:
|
53
|
-
- - ">="
|
54
|
-
- !ruby/object:Gem::Version
|
55
|
-
version: '0'
|
56
57
|
type: :development
|
58
|
+
version_requirements: *id002
|
59
|
+
- !ruby/object:Gem::Dependency
|
60
|
+
name: rake
|
57
61
|
prerelease: false
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
- !ruby/object:Gem::Version
|
62
|
-
version: '0'
|
63
|
-
- !ruby/object:Gem::Dependency
|
64
|
-
name: rdoc
|
65
|
-
requirement: !ruby/object:Gem::Requirement
|
66
|
-
requirements:
|
62
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
63
|
+
none: false
|
64
|
+
requirements:
|
67
65
|
- - ">="
|
68
|
-
- !ruby/object:Gem::Version
|
69
|
-
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
hash: 3
|
68
|
+
segments:
|
69
|
+
- 0
|
70
|
+
version: "0"
|
70
71
|
type: :development
|
72
|
+
version_requirements: *id003
|
73
|
+
- !ruby/object:Gem::Dependency
|
74
|
+
name: rdoc
|
71
75
|
prerelease: false
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
- !ruby/object:Gem::Version
|
76
|
-
version: '0'
|
77
|
-
- !ruby/object:Gem::Dependency
|
78
|
-
name: rspec
|
79
|
-
requirement: !ruby/object:Gem::Requirement
|
80
|
-
requirements:
|
76
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
77
|
+
none: false
|
78
|
+
requirements:
|
81
79
|
- - ">="
|
82
|
-
- !ruby/object:Gem::Version
|
83
|
-
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
hash: 3
|
82
|
+
segments:
|
83
|
+
- 0
|
84
|
+
version: "0"
|
84
85
|
type: :development
|
86
|
+
version_requirements: *id004
|
87
|
+
- !ruby/object:Gem::Dependency
|
88
|
+
name: rspec
|
85
89
|
prerelease: false
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
version: '0'
|
90
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
91
|
+
none: false
|
92
|
+
requirements:
|
93
|
+
- - ~>
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
hash: 55
|
96
|
+
segments:
|
97
|
+
- 2
|
98
|
+
- 14
|
99
|
+
- 0
|
100
|
+
version: 2.14.0
|
98
101
|
type: :development
|
102
|
+
version_requirements: *id005
|
103
|
+
- !ruby/object:Gem::Dependency
|
104
|
+
name: simplecov
|
99
105
|
prerelease: false
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
107
|
+
none: false
|
108
|
+
requirements:
|
109
|
+
- - ~>
|
110
|
+
- !ruby/object:Gem::Version
|
111
|
+
hash: 63
|
112
|
+
segments:
|
113
|
+
- 0
|
114
|
+
- 8
|
115
|
+
- 0
|
116
|
+
version: 0.8.0
|
117
|
+
type: :development
|
118
|
+
version_requirements: *id006
|
119
|
+
- !ruby/object:Gem::Dependency
|
106
120
|
name: unf
|
107
|
-
requirement: !ruby/object:Gem::Requirement
|
108
|
-
requirements:
|
109
|
-
- - "~>"
|
110
|
-
- !ruby/object:Gem::Version
|
111
|
-
version: 0.1.0
|
112
|
-
type: :runtime
|
113
121
|
prerelease: false
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
122
|
+
requirement: &id007 !ruby/object:Gem::Requirement
|
123
|
+
none: false
|
124
|
+
requirements:
|
125
|
+
- - ~>
|
126
|
+
- !ruby/object:Gem::Version
|
127
|
+
hash: 27
|
128
|
+
segments:
|
129
|
+
- 0
|
130
|
+
- 1
|
131
|
+
- 0
|
118
132
|
version: 0.1.0
|
133
|
+
type: :runtime
|
134
|
+
version_requirements: *id007
|
119
135
|
description: A gem that provides text handling for Twitter
|
120
|
-
email:
|
136
|
+
email:
|
121
137
|
- matt@twitter.com
|
122
138
|
- patrick.henry.ewing@gmail.com
|
123
139
|
- bcherry@gmail.com
|
@@ -128,14 +144,17 @@ email:
|
|
128
144
|
- keita@twitter.com
|
129
145
|
- jkoval@twitter.com
|
130
146
|
executables: []
|
147
|
+
|
131
148
|
extensions: []
|
149
|
+
|
132
150
|
extra_rdoc_files: []
|
133
|
-
|
134
|
-
|
135
|
-
-
|
136
|
-
-
|
137
|
-
-
|
138
|
-
-
|
151
|
+
|
152
|
+
files:
|
153
|
+
- .gemtest
|
154
|
+
- .gitignore
|
155
|
+
- .gitmodules
|
156
|
+
- .rspec
|
157
|
+
- .travis.yml
|
139
158
|
- Gemfile
|
140
159
|
- LICENSE
|
141
160
|
- README.rdoc
|
@@ -165,30 +184,39 @@ files:
|
|
165
184
|
- test/conformance_test.rb
|
166
185
|
- twitter-text.gemspec
|
167
186
|
homepage: http://twitter.com
|
168
|
-
licenses:
|
187
|
+
licenses:
|
169
188
|
- Apache 2.0
|
170
|
-
metadata: {}
|
171
189
|
post_install_message:
|
172
190
|
rdoc_options: []
|
173
|
-
|
191
|
+
|
192
|
+
require_paths:
|
174
193
|
- lib
|
175
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
176
|
-
|
194
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
195
|
+
none: false
|
196
|
+
requirements:
|
177
197
|
- - ">="
|
178
|
-
- !ruby/object:Gem::Version
|
179
|
-
|
180
|
-
|
181
|
-
|
198
|
+
- !ruby/object:Gem::Version
|
199
|
+
hash: 3
|
200
|
+
segments:
|
201
|
+
- 0
|
202
|
+
version: "0"
|
203
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
204
|
+
none: false
|
205
|
+
requirements:
|
182
206
|
- - ">="
|
183
|
-
- !ruby/object:Gem::Version
|
184
|
-
|
207
|
+
- !ruby/object:Gem::Version
|
208
|
+
hash: 3
|
209
|
+
segments:
|
210
|
+
- 0
|
211
|
+
version: "0"
|
185
212
|
requirements: []
|
213
|
+
|
186
214
|
rubyforge_project:
|
187
|
-
rubygems_version:
|
215
|
+
rubygems_version: 1.8.15
|
188
216
|
signing_key:
|
189
|
-
specification_version:
|
217
|
+
specification_version: 3
|
190
218
|
summary: Twitter text handling library
|
191
|
-
test_files:
|
219
|
+
test_files:
|
192
220
|
- spec/autolinking_spec.rb
|
193
221
|
- spec/extractor_spec.rb
|
194
222
|
- spec/hithighlighter_spec.rb
|
checksums.yaml
DELETED
@@ -1,7 +0,0 @@
|
|
1
|
-
---
|
2
|
-
SHA1:
|
3
|
-
metadata.gz: d032fd2fa1300bbc000a2e7a8f718fc5a7b33e12
|
4
|
-
data.tar.gz: ac18985474a651ee6ca291e9cdb5fc7116f2d346
|
5
|
-
SHA512:
|
6
|
-
metadata.gz: 7f2c3e340d6768091cc0cfca016f0fbb292deae6f02b6aa4045ce353aadd0e538195b5ed71abb285a82eae40fbdf7c9751271430ea1874f8087b1f1e7b5ed3a5
|
7
|
-
data.tar.gz: d5ab53a8920d232b2b1c6cdf03e6d5a723d12923d2da461329181af0ceb166cac8c034a3f4f715061cf6d007b00aadb745a97817d47984b6f770a86ddb6a8ba6
|