twitter-text 1.9.0 → 1.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/twitter-text/extractor.rb +5 -4
- data/lib/twitter-text/regex.rb +39 -24
- data/twitter-text.gemspec +3 -3
- metadata +135 -107
- checksums.yaml +0 -7
@@ -201,15 +201,17 @@ module Twitter
|
|
201
201
|
if !protocol
|
202
202
|
next if !options[:extract_url_without_protocol] || before =~ Twitter::Regex[:invalid_url_without_protocol_preceding_chars]
|
203
203
|
last_url = nil
|
204
|
-
last_url_invalid_match = nil
|
205
204
|
domain.scan(Twitter::Regex[:valid_ascii_domain]) do |ascii_domain|
|
206
205
|
last_url = {
|
207
206
|
:url => ascii_domain,
|
208
207
|
:indices => [start_position + $~.char_begin(0),
|
209
208
|
start_position + $~.char_end(0)]
|
210
209
|
}
|
211
|
-
|
212
|
-
|
210
|
+
if path ||
|
211
|
+
ascii_domain =~ Twitter::Regex[:valid_special_short_domain] ||
|
212
|
+
ascii_domain !~ Twitter::Regex[:invalid_short_domain]
|
213
|
+
urls << last_url
|
214
|
+
end
|
213
215
|
end
|
214
216
|
|
215
217
|
# no ASCII-only domain found. Skip the entire URL
|
@@ -218,7 +220,6 @@ module Twitter
|
|
218
220
|
# last_url only contains domain. Need to add path and query if they exist.
|
219
221
|
if path
|
220
222
|
# last_url was not added. Add it to urls here.
|
221
|
-
urls << last_url if last_url_invalid_match
|
222
223
|
last_url[:url] = url.sub(domain, last_url[:url])
|
223
224
|
last_url[:indices][1] = end_position
|
224
225
|
end
|
data/lib/twitter-text/regex.rb
CHANGED
@@ -189,39 +189,53 @@ module Twitter
|
|
189
189
|
|
190
190
|
REGEXEN[:valid_gTLD] = %r{
|
191
191
|
(?:
|
192
|
-
(?:
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
192
|
+
(?:
|
193
|
+
academy|accountants|actor|aero|agency|airforce|archi|arpa|asia|associates|axa|bar|bargains|bayern|berlin|best|
|
194
|
+
bid|bike|biz|black|blackfriday|blue|boutique|build|builders|buzz|cab|camera|camp|capital|cards|care|career|
|
195
|
+
careers|cash|cat|catering|center|ceo|cheap|christmas|citic|claims|cleaning|clinic|clothing|club|codes|coffee|
|
196
|
+
college|cologne|com|community|company|computer|construction|contractors|cooking|cool|coop|country|credit|
|
197
|
+
creditcard|cruises|dance|dating|democrat|dental|desi|diamonds|digital|directory|discount|domains|edu|education|
|
198
|
+
email|engineering|enterprises|equipment|estate|eus|events|exchange|expert|exposed|fail|farm|feedback|finance|
|
199
|
+
financial|fish|fishing|fitness|flights|florist|foo|foundation|frogans|fund|furniture|futbol|gal|gallery|gift|
|
200
|
+
glass|globo|gmo|gop|gov|graphics|gratis|gripe|guitars|guru|haus|holdings|holiday|horse|house|immobilien|
|
201
|
+
industries|info|institute|insure|int|international|investments|jetzt|jobs|kaufen|kim|kitchen|kiwi|koeln|kred|
|
202
|
+
land|lease|lighting|limited|limo|link|london|luxury|management|mango|marketing|media|meet|menu|miami|mil|mobi|
|
203
|
+
moda|moe|monash|moscow|museum|nagoya|name|net|neustar|ninja|nyc|okinawa|onl|org|paris|partners|parts|photo|
|
204
|
+
photography|photos|pics|pictures|pink|plumbing|post|pro|productions|properties|pub|qpon|quebec|recipes|red|
|
205
|
+
reisen|ren|rentals|repair|report|rest|reviews|rich|rocks|rodeo|ruhr|ryukyu|saarland|schule|services|sexy|
|
206
|
+
shiksha|shoes|singles|social|sohu|solar|solutions|soy|supplies|supply|support|surgery|systems|tattoo|tax|
|
207
|
+
technology|tel|tienda|tips|today|tokyo|tools|town|toys|trade|training|travel|university|uno|vacations|vegas|
|
208
|
+
ventures|viajes|villas|vision|vodka|vote|voting|voto|voyage|wang|watch|webcam|wed|wien|wiki|works|wtc|wtf|xxx|
|
209
|
+
xyz|yokohama|zone|дети|москва|онлайн|орг|сайт|بازار|شبكة|संगठन|みんな|世界|中信|中文网|公司|公益|商城|在线|我爱你|政务|机构|游戏|移动|组织机构|
|
210
|
+
网址|网络|集团|삼성
|
211
|
+
)(?=[^0-9a-z@]|$)
|
205
212
|
)
|
206
213
|
}ix
|
207
214
|
|
208
215
|
REGEXEN[:valid_ccTLD] = %r{
|
209
216
|
(?:
|
210
|
-
(?:
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
ye|yt|za|zm|zw
|
218
|
-
|
219
|
-
|
220
|
-
(?=[^0-9a-z@]|$)
|
217
|
+
(?:
|
218
|
+
ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bl|bm|bn|bo|bq|br|bs|bt|bv|bw|
|
219
|
+
by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cu|cv|cw|cx|cy|cz|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|
|
220
|
+
fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|
|
221
|
+
is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mf|mg|mh|mk|ml|
|
222
|
+
mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|
|
223
|
+
ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|
|
224
|
+
tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|za|zm|zw|мкд|мон|рф|
|
225
|
+
срб|укр|қаз|الاردن|الجزائر|السعودية|المغرب|امارات|ایران|بھارت|تونس|سودان|سورية|عمان|فلسطين|قطر|مصر|مليسيا|
|
226
|
+
پاکستان|भारत|বাংলা|ভারত|ਭਾਰਤ|ભારત|இந்தியா|இலங்கை|சிங்கப்பூர்|భారత్|ලංකා|ไทย|გე|中国|中國|台湾|台灣|新加坡|香港|한국
|
227
|
+
)(?=[^0-9a-z@]|$)
|
221
228
|
)
|
222
229
|
}ix
|
223
230
|
REGEXEN[:valid_punycode] = /(?:xn--[0-9a-z]+)/i
|
224
231
|
|
232
|
+
REGEXEN[:valid_special_cctld] = %r{
|
233
|
+
(?:
|
234
|
+
(?:co|tv)
|
235
|
+
(?=[^0-9a-z@]|$)
|
236
|
+
)
|
237
|
+
}ix
|
238
|
+
|
225
239
|
REGEXEN[:valid_domain] = /(?:
|
226
240
|
#{REGEXEN[:valid_subdomain]}*#{REGEXEN[:valid_domain_name]}
|
227
241
|
(?:#{REGEXEN[:valid_gTLD]}|#{REGEXEN[:valid_ccTLD]}|#{REGEXEN[:valid_punycode]})
|
@@ -238,6 +252,7 @@ module Twitter
|
|
238
252
|
|
239
253
|
# This is used in Extractor to filter out unwanted URLs.
|
240
254
|
REGEXEN[:invalid_short_domain] = /\A#{REGEXEN[:valid_domain_name]}#{REGEXEN[:valid_ccTLD]}\Z/io
|
255
|
+
REGEXEN[:valid_special_short_domain] = /\A#{REGEXEN[:valid_domain_name]}#{REGEXEN[:valid_special_cctld]}\Z/io
|
241
256
|
|
242
257
|
REGEXEN[:valid_port_number] = /[0-9]+/
|
243
258
|
|
data/twitter-text.gemspec
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = "twitter-text"
|
5
|
-
s.version = "1.9.
|
5
|
+
s.version = "1.9.1"
|
6
6
|
s.authors = ["Matt Sanford", "Patrick Ewing", "Ben Cherry", "Britt Selvitelle",
|
7
7
|
"Raffi Krikorian", "J.P. Cummins", "Yoshimasa Niwa", "Keita Fujii", "James Koval"]
|
8
8
|
s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com", "bcherry@gmail.com", "bs@brittspace.com",
|
@@ -19,8 +19,8 @@ Gem::Specification.new do |s|
|
|
19
19
|
s.add_development_dependency "nokogiri", "~> 1.5.10"
|
20
20
|
s.add_development_dependency "rake"
|
21
21
|
s.add_development_dependency "rdoc"
|
22
|
-
s.add_development_dependency "rspec"
|
23
|
-
s.add_development_dependency "simplecov"
|
22
|
+
s.add_development_dependency "rspec", "~> 2.14.0"
|
23
|
+
s.add_development_dependency "simplecov", "~> 0.8.0"
|
24
24
|
s.add_runtime_dependency "unf", "~> 0.1.0"
|
25
25
|
|
26
26
|
s.files = `git ls-files`.split("\n")
|
metadata
CHANGED
@@ -1,9 +1,15 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitter-text
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 49
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 1
|
8
|
+
- 9
|
9
|
+
- 1
|
10
|
+
version: 1.9.1
|
5
11
|
platform: ruby
|
6
|
-
authors:
|
12
|
+
authors:
|
7
13
|
- Matt Sanford
|
8
14
|
- Patrick Ewing
|
9
15
|
- Ben Cherry
|
@@ -16,108 +22,118 @@ authors:
|
|
16
22
|
autorequire:
|
17
23
|
bindir: bin
|
18
24
|
cert_chain: []
|
19
|
-
|
20
|
-
|
21
|
-
|
25
|
+
|
26
|
+
date: 2014-07-18 00:00:00 Z
|
27
|
+
dependencies:
|
28
|
+
- !ruby/object:Gem::Dependency
|
22
29
|
name: multi_json
|
23
|
-
requirement: !ruby/object:Gem::Requirement
|
24
|
-
requirements:
|
25
|
-
- - "~>"
|
26
|
-
- !ruby/object:Gem::Version
|
27
|
-
version: '1.3'
|
28
|
-
type: :development
|
29
30
|
prerelease: false
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
- !ruby/object:Gem::Version
|
41
|
-
version: 1.5.10
|
31
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
32
|
+
none: false
|
33
|
+
requirements:
|
34
|
+
- - ~>
|
35
|
+
- !ruby/object:Gem::Version
|
36
|
+
hash: 9
|
37
|
+
segments:
|
38
|
+
- 1
|
39
|
+
- 3
|
40
|
+
version: "1.3"
|
42
41
|
type: :development
|
42
|
+
version_requirements: *id001
|
43
|
+
- !ruby/object:Gem::Dependency
|
44
|
+
name: nokogiri
|
43
45
|
prerelease: false
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
46
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
47
|
+
none: false
|
48
|
+
requirements:
|
49
|
+
- - ~>
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
hash: 23
|
52
|
+
segments:
|
53
|
+
- 1
|
54
|
+
- 5
|
55
|
+
- 10
|
48
56
|
version: 1.5.10
|
49
|
-
- !ruby/object:Gem::Dependency
|
50
|
-
name: rake
|
51
|
-
requirement: !ruby/object:Gem::Requirement
|
52
|
-
requirements:
|
53
|
-
- - ">="
|
54
|
-
- !ruby/object:Gem::Version
|
55
|
-
version: '0'
|
56
57
|
type: :development
|
58
|
+
version_requirements: *id002
|
59
|
+
- !ruby/object:Gem::Dependency
|
60
|
+
name: rake
|
57
61
|
prerelease: false
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
- !ruby/object:Gem::Version
|
62
|
-
version: '0'
|
63
|
-
- !ruby/object:Gem::Dependency
|
64
|
-
name: rdoc
|
65
|
-
requirement: !ruby/object:Gem::Requirement
|
66
|
-
requirements:
|
62
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
63
|
+
none: false
|
64
|
+
requirements:
|
67
65
|
- - ">="
|
68
|
-
- !ruby/object:Gem::Version
|
69
|
-
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
hash: 3
|
68
|
+
segments:
|
69
|
+
- 0
|
70
|
+
version: "0"
|
70
71
|
type: :development
|
72
|
+
version_requirements: *id003
|
73
|
+
- !ruby/object:Gem::Dependency
|
74
|
+
name: rdoc
|
71
75
|
prerelease: false
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
- !ruby/object:Gem::Version
|
76
|
-
version: '0'
|
77
|
-
- !ruby/object:Gem::Dependency
|
78
|
-
name: rspec
|
79
|
-
requirement: !ruby/object:Gem::Requirement
|
80
|
-
requirements:
|
76
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
77
|
+
none: false
|
78
|
+
requirements:
|
81
79
|
- - ">="
|
82
|
-
- !ruby/object:Gem::Version
|
83
|
-
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
hash: 3
|
82
|
+
segments:
|
83
|
+
- 0
|
84
|
+
version: "0"
|
84
85
|
type: :development
|
86
|
+
version_requirements: *id004
|
87
|
+
- !ruby/object:Gem::Dependency
|
88
|
+
name: rspec
|
85
89
|
prerelease: false
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
version: '0'
|
90
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
91
|
+
none: false
|
92
|
+
requirements:
|
93
|
+
- - ~>
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
hash: 55
|
96
|
+
segments:
|
97
|
+
- 2
|
98
|
+
- 14
|
99
|
+
- 0
|
100
|
+
version: 2.14.0
|
98
101
|
type: :development
|
102
|
+
version_requirements: *id005
|
103
|
+
- !ruby/object:Gem::Dependency
|
104
|
+
name: simplecov
|
99
105
|
prerelease: false
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
107
|
+
none: false
|
108
|
+
requirements:
|
109
|
+
- - ~>
|
110
|
+
- !ruby/object:Gem::Version
|
111
|
+
hash: 63
|
112
|
+
segments:
|
113
|
+
- 0
|
114
|
+
- 8
|
115
|
+
- 0
|
116
|
+
version: 0.8.0
|
117
|
+
type: :development
|
118
|
+
version_requirements: *id006
|
119
|
+
- !ruby/object:Gem::Dependency
|
106
120
|
name: unf
|
107
|
-
requirement: !ruby/object:Gem::Requirement
|
108
|
-
requirements:
|
109
|
-
- - "~>"
|
110
|
-
- !ruby/object:Gem::Version
|
111
|
-
version: 0.1.0
|
112
|
-
type: :runtime
|
113
121
|
prerelease: false
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
122
|
+
requirement: &id007 !ruby/object:Gem::Requirement
|
123
|
+
none: false
|
124
|
+
requirements:
|
125
|
+
- - ~>
|
126
|
+
- !ruby/object:Gem::Version
|
127
|
+
hash: 27
|
128
|
+
segments:
|
129
|
+
- 0
|
130
|
+
- 1
|
131
|
+
- 0
|
118
132
|
version: 0.1.0
|
133
|
+
type: :runtime
|
134
|
+
version_requirements: *id007
|
119
135
|
description: A gem that provides text handling for Twitter
|
120
|
-
email:
|
136
|
+
email:
|
121
137
|
- matt@twitter.com
|
122
138
|
- patrick.henry.ewing@gmail.com
|
123
139
|
- bcherry@gmail.com
|
@@ -128,14 +144,17 @@ email:
|
|
128
144
|
- keita@twitter.com
|
129
145
|
- jkoval@twitter.com
|
130
146
|
executables: []
|
147
|
+
|
131
148
|
extensions: []
|
149
|
+
|
132
150
|
extra_rdoc_files: []
|
133
|
-
|
134
|
-
|
135
|
-
-
|
136
|
-
-
|
137
|
-
-
|
138
|
-
-
|
151
|
+
|
152
|
+
files:
|
153
|
+
- .gemtest
|
154
|
+
- .gitignore
|
155
|
+
- .gitmodules
|
156
|
+
- .rspec
|
157
|
+
- .travis.yml
|
139
158
|
- Gemfile
|
140
159
|
- LICENSE
|
141
160
|
- README.rdoc
|
@@ -165,30 +184,39 @@ files:
|
|
165
184
|
- test/conformance_test.rb
|
166
185
|
- twitter-text.gemspec
|
167
186
|
homepage: http://twitter.com
|
168
|
-
licenses:
|
187
|
+
licenses:
|
169
188
|
- Apache 2.0
|
170
|
-
metadata: {}
|
171
189
|
post_install_message:
|
172
190
|
rdoc_options: []
|
173
|
-
|
191
|
+
|
192
|
+
require_paths:
|
174
193
|
- lib
|
175
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
176
|
-
|
194
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
195
|
+
none: false
|
196
|
+
requirements:
|
177
197
|
- - ">="
|
178
|
-
- !ruby/object:Gem::Version
|
179
|
-
|
180
|
-
|
181
|
-
|
198
|
+
- !ruby/object:Gem::Version
|
199
|
+
hash: 3
|
200
|
+
segments:
|
201
|
+
- 0
|
202
|
+
version: "0"
|
203
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
204
|
+
none: false
|
205
|
+
requirements:
|
182
206
|
- - ">="
|
183
|
-
- !ruby/object:Gem::Version
|
184
|
-
|
207
|
+
- !ruby/object:Gem::Version
|
208
|
+
hash: 3
|
209
|
+
segments:
|
210
|
+
- 0
|
211
|
+
version: "0"
|
185
212
|
requirements: []
|
213
|
+
|
186
214
|
rubyforge_project:
|
187
|
-
rubygems_version:
|
215
|
+
rubygems_version: 1.8.15
|
188
216
|
signing_key:
|
189
|
-
specification_version:
|
217
|
+
specification_version: 3
|
190
218
|
summary: Twitter text handling library
|
191
|
-
test_files:
|
219
|
+
test_files:
|
192
220
|
- spec/autolinking_spec.rb
|
193
221
|
- spec/extractor_spec.rb
|
194
222
|
- spec/hithighlighter_spec.rb
|
checksums.yaml
DELETED
@@ -1,7 +0,0 @@
|
|
1
|
-
---
|
2
|
-
SHA1:
|
3
|
-
metadata.gz: d032fd2fa1300bbc000a2e7a8f718fc5a7b33e12
|
4
|
-
data.tar.gz: ac18985474a651ee6ca291e9cdb5fc7116f2d346
|
5
|
-
SHA512:
|
6
|
-
metadata.gz: 7f2c3e340d6768091cc0cfca016f0fbb292deae6f02b6aa4045ce353aadd0e538195b5ed71abb285a82eae40fbdf7c9751271430ea1874f8087b1f1e7b5ed3a5
|
7
|
-
data.tar.gz: d5ab53a8920d232b2b1c6cdf03e6d5a723d12923d2da461329181af0ceb166cac8c034a3f4f715061cf6d007b00aadb745a97817d47984b6f770a86ddb6a8ba6
|