url_scrubber 0.7.6 → 0.7.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/url_scrubber.rb +35 -6
- data/lib/url_scrubber/version.rb +1 -1
- metadata +2 -2
data/lib/url_scrubber.rb
CHANGED
@@ -16,7 +16,6 @@ module UrlScrubber
|
|
16
16
|
url.sub!('#!/', '')
|
17
17
|
url = downcase_domain(url)
|
18
18
|
remove_www!(url)
|
19
|
-
Rails.logger.debug "URLSCRUBBER - url1 = #{url}"
|
20
19
|
drop_anchor!(special_cases(url))
|
21
20
|
end
|
22
21
|
|
@@ -69,7 +68,7 @@ module UrlScrubber
|
|
69
68
|
when :facebook
|
70
69
|
!!url.match(%r{^http://facebook\.com/(profile\.php?id=\d+|[\w_\.-]+)$})
|
71
70
|
when :linkedin
|
72
|
-
!!url.match(%r{^http://linkedin\.com/(company/[\w_-]+|profile/view\?id=\d+)$})
|
71
|
+
!!url.match(%r{^http://linkedin\.com/(company/[\w_-]+|profile/view\?id=\d+)$}) || !!url.match(%r{^http://linkedin\.com/(groups\?gid=[0-9]+)$}) || !!url.match(%r{^http://linkedin\.com/(groups/[\w_-]+)$})
|
73
72
|
when :google
|
74
73
|
!!url.match(%r{^http://plus\.google\.com/(\+[\w_-]+|\d+)$})
|
75
74
|
when :slideshare
|
@@ -91,6 +90,25 @@ module UrlScrubber
|
|
91
90
|
true
|
92
91
|
end
|
93
92
|
end
|
93
|
+
|
94
|
+
def self.linkedin_company_url?(url)
|
95
|
+
url = scrub(url)
|
96
|
+
return false unless url
|
97
|
+
return url.include?('http://linkedin.com/company/')
|
98
|
+
end
|
99
|
+
|
100
|
+
def self.find_linkedin_identity_from_url(url)
|
101
|
+
return nil if url.nil?
|
102
|
+
scrubbed_url = scrub(url)
|
103
|
+
if scrubbed_url && linkedin_company_url?(scrubbed_url)
|
104
|
+
scrubbed_url.split("/").last
|
105
|
+
elsif scrubbed_url.include?('linkedin.com/groups/')
|
106
|
+
scrubbed_url.split("/").last
|
107
|
+
elsif scrubbed_url.include?('linkedin.com/groups?gid=')
|
108
|
+
id_partition = scrubbed_url.partition('linkedin.com/groups?gid=')
|
109
|
+
drop_url_ampersand!(id_partition[2])
|
110
|
+
end
|
111
|
+
end
|
94
112
|
|
95
113
|
private
|
96
114
|
|
@@ -119,6 +137,11 @@ module UrlScrubber
|
|
119
137
|
url
|
120
138
|
end
|
121
139
|
|
140
|
+
def self.drop_url_ampersand!(url)
|
141
|
+
url.sub!(/\&.*$/, '')
|
142
|
+
url
|
143
|
+
end
|
144
|
+
|
122
145
|
def self.drop_url_query!(url)
|
123
146
|
url.sub!(/\?.*$/, '')
|
124
147
|
url
|
@@ -168,7 +191,16 @@ module UrlScrubber
|
|
168
191
|
def self.linkedin(url)
|
169
192
|
|
170
193
|
url.sub!('linkedin.com/companies/', 'linkedin.com/company/')
|
171
|
-
|
194
|
+
if !!url.match(%r{com/company/})
|
195
|
+
drop_url_query!(url)
|
196
|
+
elsif url.include?('linkedin.com/groups/')
|
197
|
+
drop_url_query!(url)
|
198
|
+
elsif url.include?('linkedin.com/groups?gid=')
|
199
|
+
drop_url_ampersand!(url)
|
200
|
+
elsif url.include?('linkedin.com/groups?home=&gid=')
|
201
|
+
id_partition = url.partition('linkedin.com/groups?home=&gid=')
|
202
|
+
url = "http://linkedin.com/groups?gid=" + drop_url_ampersand!(id_partition[2])
|
203
|
+
end
|
172
204
|
url
|
173
205
|
end
|
174
206
|
|
@@ -179,13 +211,10 @@ module UrlScrubber
|
|
179
211
|
url.sub!('/photos', '')
|
180
212
|
url.sub!('/of', '')
|
181
213
|
url.sub!('/albums', '')
|
182
|
-
Rails.logger.debug "URLSCRUBBER - url3 = #{url}"
|
183
214
|
|
184
215
|
path_match = url.match(/^http:\/\/plus\.google\.com\/([^\/]+)/)
|
185
|
-
Rails.logger.debug "URLSCRUBBER - path_match = #{path_match}"
|
186
216
|
return url unless path_match
|
187
217
|
|
188
|
-
Rails.logger.debug "URLSCRUBBER - no match url = http://plus.google.com/#{path_match[1]}"
|
189
218
|
"http://plus.google.com/#{path_match[1]}"
|
190
219
|
end
|
191
220
|
|
data/lib/url_scrubber/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: url_scrubber
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.7
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2013-09-
|
14
|
+
date: 2013-09-12 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: rspec
|