url_scrubber 0.7.6 → 0.7.7
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/url_scrubber.rb +35 -6
- data/lib/url_scrubber/version.rb +1 -1
- metadata +2 -2
data/lib/url_scrubber.rb
CHANGED
@@ -16,7 +16,6 @@ module UrlScrubber
|
|
16
16
|
url.sub!('#!/', '')
|
17
17
|
url = downcase_domain(url)
|
18
18
|
remove_www!(url)
|
19
|
-
Rails.logger.debug "URLSCRUBBER - url1 = #{url}"
|
20
19
|
drop_anchor!(special_cases(url))
|
21
20
|
end
|
22
21
|
|
@@ -69,7 +68,7 @@ module UrlScrubber
|
|
69
68
|
when :facebook
|
70
69
|
!!url.match(%r{^http://facebook\.com/(profile\.php?id=\d+|[\w_\.-]+)$})
|
71
70
|
when :linkedin
|
72
|
-
!!url.match(%r{^http://linkedin\.com/(company/[\w_-]+|profile/view\?id=\d+)$})
|
71
|
+
!!url.match(%r{^http://linkedin\.com/(company/[\w_-]+|profile/view\?id=\d+)$}) || !!url.match(%r{^http://linkedin\.com/(groups\?gid=[0-9]+)$}) || !!url.match(%r{^http://linkedin\.com/(groups/[\w_-]+)$})
|
73
72
|
when :google
|
74
73
|
!!url.match(%r{^http://plus\.google\.com/(\+[\w_-]+|\d+)$})
|
75
74
|
when :slideshare
|
@@ -91,6 +90,25 @@ module UrlScrubber
|
|
91
90
|
true
|
92
91
|
end
|
93
92
|
end
|
93
|
+
|
94
|
+
def self.linkedin_company_url?(url)
|
95
|
+
url = scrub(url)
|
96
|
+
return false unless url
|
97
|
+
return url.include?('http://linkedin.com/company/')
|
98
|
+
end
|
99
|
+
|
100
|
+
def self.find_linkedin_identity_from_url(url)
|
101
|
+
return nil if url.nil?
|
102
|
+
scrubbed_url = scrub(url)
|
103
|
+
if scrubbed_url && linkedin_company_url?(scrubbed_url)
|
104
|
+
scrubbed_url.split("/").last
|
105
|
+
elsif scrubbed_url.include?('linkedin.com/groups/')
|
106
|
+
scrubbed_url.split("/").last
|
107
|
+
elsif scrubbed_url.include?('linkedin.com/groups?gid=')
|
108
|
+
id_partition = scrubbed_url.partition('linkedin.com/groups?gid=')
|
109
|
+
drop_url_ampersand!(id_partition[2])
|
110
|
+
end
|
111
|
+
end
|
94
112
|
|
95
113
|
private
|
96
114
|
|
@@ -119,6 +137,11 @@ module UrlScrubber
|
|
119
137
|
url
|
120
138
|
end
|
121
139
|
|
140
|
+
def self.drop_url_ampersand!(url)
|
141
|
+
url.sub!(/\&.*$/, '')
|
142
|
+
url
|
143
|
+
end
|
144
|
+
|
122
145
|
def self.drop_url_query!(url)
|
123
146
|
url.sub!(/\?.*$/, '')
|
124
147
|
url
|
@@ -168,7 +191,16 @@ module UrlScrubber
|
|
168
191
|
def self.linkedin(url)
|
169
192
|
|
170
193
|
url.sub!('linkedin.com/companies/', 'linkedin.com/company/')
|
171
|
-
|
194
|
+
if !!url.match(%r{com/company/})
|
195
|
+
drop_url_query!(url)
|
196
|
+
elsif url.include?('linkedin.com/groups/')
|
197
|
+
drop_url_query!(url)
|
198
|
+
elsif url.include?('linkedin.com/groups?gid=')
|
199
|
+
drop_url_ampersand!(url)
|
200
|
+
elsif url.include?('linkedin.com/groups?home=&gid=')
|
201
|
+
id_partition = url.partition('linkedin.com/groups?home=&gid=')
|
202
|
+
url = "http://linkedin.com/groups?gid=" + drop_url_ampersand!(id_partition[2])
|
203
|
+
end
|
172
204
|
url
|
173
205
|
end
|
174
206
|
|
@@ -179,13 +211,10 @@ module UrlScrubber
|
|
179
211
|
url.sub!('/photos', '')
|
180
212
|
url.sub!('/of', '')
|
181
213
|
url.sub!('/albums', '')
|
182
|
-
Rails.logger.debug "URLSCRUBBER - url3 = #{url}"
|
183
214
|
|
184
215
|
path_match = url.match(/^http:\/\/plus\.google\.com\/([^\/]+)/)
|
185
|
-
Rails.logger.debug "URLSCRUBBER - path_match = #{path_match}"
|
186
216
|
return url unless path_match
|
187
217
|
|
188
|
-
Rails.logger.debug "URLSCRUBBER - no match url = http://plus.google.com/#{path_match[1]}"
|
189
218
|
"http://plus.google.com/#{path_match[1]}"
|
190
219
|
end
|
191
220
|
|
data/lib/url_scrubber/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: url_scrubber
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.7
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2013-09-
|
14
|
+
date: 2013-09-12 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: rspec
|