url_scrubber 0.7.6 → 0.7.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/url_scrubber.rb CHANGED
@@ -16,7 +16,6 @@ module UrlScrubber
16
16
  url.sub!('#!/', '')
17
17
  url = downcase_domain(url)
18
18
  remove_www!(url)
19
- Rails.logger.debug "URLSCRUBBER - url1 = #{url}"
20
19
  drop_anchor!(special_cases(url))
21
20
  end
22
21
 
@@ -69,7 +68,7 @@ module UrlScrubber
69
68
  when :facebook
70
69
  !!url.match(%r{^http://facebook\.com/(profile\.php?id=\d+|[\w_\.-]+)$})
71
70
  when :linkedin
72
- !!url.match(%r{^http://linkedin\.com/(company/[\w_-]+|profile/view\?id=\d+)$})
71
+ !!url.match(%r{^http://linkedin\.com/(company/[\w_-]+|profile/view\?id=\d+)$}) || !!url.match(%r{^http://linkedin\.com/(groups\?gid=[0-9]+)$}) || !!url.match(%r{^http://linkedin\.com/(groups/[\w_-]+)$})
73
72
  when :google
74
73
  !!url.match(%r{^http://plus\.google\.com/(\+[\w_-]+|\d+)$})
75
74
  when :slideshare
@@ -91,6 +90,25 @@ module UrlScrubber
91
90
  true
92
91
  end
93
92
  end
93
+
94
+ def self.linkedin_company_url?(url)
95
+ url = scrub(url)
96
+ return false unless url
97
+ return url.include?('http://linkedin.com/company/')
98
+ end
99
+
100
+ def self.find_linkedin_identity_from_url(url)
101
+ return nil if url.nil?
102
+ scrubbed_url = scrub(url)
103
+ if scrubbed_url && linkedin_company_url?(scrubbed_url)
104
+ scrubbed_url.split("/").last
105
+ elsif scrubbed_url.include?('linkedin.com/groups/')
106
+ scrubbed_url.split("/").last
107
+ elsif scrubbed_url.include?('linkedin.com/groups?gid=')
108
+ id_partition = scrubbed_url.partition('linkedin.com/groups?gid=')
109
+ drop_url_ampersand!(id_partition[2])
110
+ end
111
+ end
94
112
 
95
113
  private
96
114
 
@@ -119,6 +137,11 @@ module UrlScrubber
119
137
  url
120
138
  end
121
139
 
140
+ def self.drop_url_ampersand!(url)
141
+ url.sub!(/\&.*$/, '')
142
+ url
143
+ end
144
+
122
145
  def self.drop_url_query!(url)
123
146
  url.sub!(/\?.*$/, '')
124
147
  url
@@ -168,7 +191,16 @@ module UrlScrubber
168
191
  def self.linkedin(url)
169
192
 
170
193
  url.sub!('linkedin.com/companies/', 'linkedin.com/company/')
171
- drop_url_query!(url) if !!url.match(%r{com/company/})
194
+ if !!url.match(%r{com/company/})
195
+ drop_url_query!(url)
196
+ elsif url.include?('linkedin.com/groups/')
197
+ drop_url_query!(url)
198
+ elsif url.include?('linkedin.com/groups?gid=')
199
+ drop_url_ampersand!(url)
200
+ elsif url.include?('linkedin.com/groups?home=&gid=')
201
+ id_partition = url.partition('linkedin.com/groups?home=&gid=')
202
+ url = "http://linkedin.com/groups?gid=" + drop_url_ampersand!(id_partition[2])
203
+ end
172
204
  url
173
205
  end
174
206
 
@@ -179,13 +211,10 @@ module UrlScrubber
179
211
  url.sub!('/photos', '')
180
212
  url.sub!('/of', '')
181
213
  url.sub!('/albums', '')
182
- Rails.logger.debug "URLSCRUBBER - url3 = #{url}"
183
214
 
184
215
  path_match = url.match(/^http:\/\/plus\.google\.com\/([^\/]+)/)
185
- Rails.logger.debug "URLSCRUBBER - path_match = #{path_match}"
186
216
  return url unless path_match
187
217
 
188
- Rails.logger.debug "URLSCRUBBER - no match url = http://plus.google.com/#{path_match[1]}"
189
218
  "http://plus.google.com/#{path_match[1]}"
190
219
  end
191
220
 
@@ -1,3 +1,3 @@
1
1
  module UrlScrubber
2
- VERSION = "0.7.6"
2
+ VERSION = "0.7.7"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: url_scrubber
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.6
4
+ version: 0.7.7
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -11,7 +11,7 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2013-09-11 00:00:00.000000000 Z
14
+ date: 2013-09-12 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: rspec