url_scrubber 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,33 @@
1
+ **.orig
2
+ *.gem
3
+ *.rbc
4
+ *.sassc
5
+ .bundle
6
+ .config
7
+ .DS_Store
8
+ .rspec
9
+ .sass-cache
10
+ .yardoc
11
+ /.bundle
12
+ /coverage/
13
+ /db/*.sqlite3
14
+ /log/*
15
+ /public/system/*
16
+ /spec/tmp/*
17
+ /tmp/*
18
+ /vendor/bundle
19
+ _yardoc
20
+ capybara-*.html
21
+ coverage
22
+ doc/
23
+ Gemfile.lock
24
+ InstalledFiles
25
+ lib/bundler/man
26
+ pickle-email-*.html
27
+ pkg
28
+ rdoc
29
+ rerun.txt
30
+ spec/reports
31
+ test/tmp
32
+ test/version_tmp
33
+ tmp
data/.rvmrc ADDED
@@ -0,0 +1,48 @@
1
+ #!/usr/bin/env bash
2
+
3
+ # This is an RVM Project .rvmrc file, used to automatically load the ruby
4
+ # development environment upon cd'ing into the directory
5
+
6
+ # First we specify our desired <ruby>[@<gemset>], the @gemset name is optional,
7
+ # Only full ruby name is supported here, for short names use:
8
+ # echo "rvm use 1.9.3" > .rvmrc
9
+ environment_id="ruby-1.9.3-p194@url_scrubber"
10
+
11
+ # Uncomment the following lines if you want to verify rvm version per project
12
+ # rvmrc_rvm_version="1.16.19 (stable)" # 1.10.1 seams as a safe start
13
+ # eval "$(echo ${rvm_version}.${rvmrc_rvm_version} | awk -F. '{print "[[ "$1*65536+$2*256+$3" -ge "$4*65536+$5*256+$6" ]]"}' )" || {
14
+ # echo "This .rvmrc file requires at least RVM ${rvmrc_rvm_version}, aborting loading."
15
+ # return 1
16
+ # }
17
+
18
+ # First we attempt to load the desired environment directly from the environment
19
+ # file. This is very fast and efficient compared to running through the entire
20
+ # CLI and selector. If you want feedback on which environment was used then
21
+ # insert the word 'use' after --create as this triggers verbose mode.
22
+ if [[ -d "${rvm_path:-$HOME/.rvm}/environments"
23
+ && -s "${rvm_path:-$HOME/.rvm}/environments/$environment_id" ]]
24
+ then
25
+ \. "${rvm_path:-$HOME/.rvm}/environments/$environment_id"
26
+ [[ -s "${rvm_path:-$HOME/.rvm}/hooks/after_use" ]] &&
27
+ \. "${rvm_path:-$HOME/.rvm}/hooks/after_use" || true
28
+ else
29
+ # If the environment file has not yet been created, use the RVM CLI to select.
30
+ rvm --create "$environment_id" || {
31
+ echo "Failed to create RVM environment '${environment_id}'."
32
+ return 1
33
+ }
34
+ fi
35
+
36
+ # If you use bundler, this might be useful to you:
37
+ # if [[ -s Gemfile ]] && {
38
+ # ! builtin command -v bundle >/dev/null ||
39
+ # builtin command -v bundle | GREP_OPTIONS= \grep $rvm_path/bin/bundle >/dev/null
40
+ # }
41
+ # then
42
+ # printf "%b" "The rubygem 'bundler' is not installed. Installing it now.\n"
43
+ # gem install bundler
44
+ # fi
45
+ # if [[ -s Gemfile ]] && builtin command -v bundle >/dev/null
46
+ # then
47
+ # bundle install | GREP_OPTIONS= \grep -vE '^Using|Your bundle is complete'
48
+ # fi
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in url_scrubber.gemspec
4
+ gemspec
data/Guardfile ADDED
@@ -0,0 +1,14 @@
1
+ # A sample Guardfile
2
+ # More info at https://github.com/guard/guard#readme
3
+
4
+ guard 'bundler' do
5
+ watch('Gemfile')
6
+ # Uncomment next line if Gemfile contain `gemspec' command
7
+ # watch(/^.+\.gemspec/)
8
+ end
9
+
10
+ guard 'rspec', :version => 2, :cli => '-c' do
11
+ watch(%r{^spec/.+_spec\.rb$})
12
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
13
+ watch('spec/spec_helper.rb') { "spec" }
14
+ end
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # UrlScrubber
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'url_scrubber'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install url_scrubber
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+
4
+ require 'rspec'
5
+ require 'rspec/core/rake_task'
6
+
7
+ desc "Run all examples"
8
+ RSpec::Core::RakeTask.new(:spec) do |t|
9
+ t.pattern = 'spec/**/*_spec.rb'
10
+ end
@@ -0,0 +1,3 @@
1
+ module UrlScrubber
2
+ VERSION = "0.7.3"
3
+ end
@@ -0,0 +1,172 @@
1
+ require "url_scrubber/version"
2
+
3
+ module UrlScrubber
4
+ def self.scrub(url)
5
+ url = url.clone # don't modify the original argument
6
+ m = url.match(/(https?:\/\/\S*)/i)
7
+ return nil unless m
8
+
9
+ url = m[1]
10
+ url.sub!(/^https/, 'http')
11
+ url.sub!(/\/+$/, '')
12
+ url.sub!('#!/', '')
13
+ url = downcase_domain(url)
14
+ remove_www!(url)
15
+ drop_anchor!(special_cases(url))
16
+ end
17
+
18
+ def self.service_of(url)
19
+ domain_match = url.match(%r{https?://([^/]+)})
20
+
21
+ if domain_match
22
+ domain = domain_match[1]
23
+
24
+ case domain
25
+ when /\byoutube\.com$/ then return :youtube
26
+ when /\btwitter\.com$/ then return :twitter
27
+ when /\bfacebook\.com$/ then return :facebook
28
+ when /\blinkedin\.com$/ then return :linkedin
29
+ when /\bplus\.google\.com$/ then return :google
30
+ when /\bslideshare\.net$/ then return :slideshare
31
+ when /\bflickr\.com$/ then return :flickr
32
+ when /\bpinterest\.com$/ then return :pinterest
33
+ when /\bvimeo\.com$/ then return :vimeo
34
+ when /\byelp\.com$/ then return :yelp
35
+ end
36
+ end
37
+
38
+ :other
39
+ end
40
+
41
+ def self.ideal_form?(url)
42
+ url = scrub(url)
43
+ return false unless url
44
+
45
+ case service_of(url)
46
+ when :youtube
47
+ !!url.match(%r{^http://youtube\.com/[\w_-]+$})
48
+ when :twitter
49
+ !!url.match(%r{^http://twitter\.com/[\w_]+$})
50
+ when :facebook
51
+ !!url.match(%r{^http://facebook\.com/(profile\.php?id=\d+|[\w_\.-]+)$})
52
+ when :linkedin
53
+ !!url.match(%r{^http://linkedin\.com/(company/[\w_-]+|profile/view\?id=\d+)$})
54
+ when :google
55
+ !!url.match(%r{^http://plus\.google\.com/(\+[\w_-]+|\d+)$})
56
+ when :slideshare
57
+ !!url.match(%r{^http://slideshare\.net/[\w_-]+$})
58
+ when :flickr
59
+ !!url.match(%r{^http://flickr\.com/[\w_-]+$})
60
+ when :pinterest
61
+ !!url.match(%r{^http://pinterest\.com/[\w_-]+$})
62
+ when :yelp
63
+ !!url.match(%r{^http://yelp\.com/[\w_-]+$})
64
+ when :vimeo
65
+ !!url.match(%r{^http://vimeo\.com/[\w_-]+$}) && !url.match(%r{/\d+$})
66
+ else
67
+ true
68
+ end
69
+ end
70
+
71
+ private
72
+
73
+ def self.downcase_domain(url)
74
+ domain_match = url.match(%r{http://[^/]+}i)
75
+ domain_match[0].downcase + domain_match.post_match
76
+ end
77
+
78
+ def self.special_cases(url)
79
+ case service_of(url)
80
+ when :youtube then return youtube(url)
81
+ when :twitter then return twitter(url)
82
+ when :facebook then return facebook(url)
83
+ when :linkedin then return linkedin(url)
84
+ when :google then return google_plus(url)
85
+ when :flickr then return flickr(url)
86
+ when :pinterest then return pinterest(url)
87
+ when :yelp then return yelp(url)
88
+ end
89
+
90
+ url
91
+ end
92
+
93
+ def self.remove_www!(url)
94
+ url.sub!(%r{://www\d*\.}, '://')
95
+ url
96
+ end
97
+
98
+ def self.drop_url_query!(url)
99
+ url.sub!(/\?.*$/, '')
100
+ url
101
+ end
102
+
103
+ def self.drop_anchor!(url)
104
+ url.sub!(/#.*$/, '')
105
+ url
106
+ end
107
+
108
+ def self.youtube(url)
109
+ url.sub!('youtube.com/user/', 'youtube.com/')
110
+ url.sub!('youtube.com/profile?user=', 'youtube.com/')
111
+ url
112
+ end
113
+
114
+ def self.twitter(url)
115
+ url.sub!('twitter.com/@', 'twitter.com/')
116
+
117
+ status_match = url.match(%r{(twitter\.com/[^/]+)/statuses/\d+})
118
+ if status_match
119
+ url = "http://#{status_match[1]}"
120
+ end
121
+
122
+ search_match = url.match(%r{twitter\.com/search(?:/realtime)?(?:/|\?q=)(?:@|%40)(\S*)$})
123
+ if search_match
124
+ url = "http://twitter.com/#{search_match[1]}"
125
+ end
126
+
127
+ url
128
+ end
129
+
130
+ def self.facebook(url)
131
+ if url.match("/media/albums") || url.match("/media/set")
132
+ url = url.match('\&') ? url.split('&',2)[0] : url
133
+ else
134
+ url.sub!(/facebook\.com\/home\.php[\?#!\/]+/, 'facebook.com/')
135
+ url = drop_url_query!(url)
136
+ end
137
+ url
138
+ end
139
+
140
+ def self.linkedin(url)
141
+
142
+ url.sub!('linkedin.com/companies/', 'linkedin.com/company/')
143
+ drop_url_query!(url) if !!url.match(%r{com/company/})
144
+ url
145
+ end
146
+
147
+ def self.google_plus(url)
148
+ url.sub!('com/u/0/b/', 'com/')
149
+ url.sub!('com/u/0/', 'com/')
150
+ url.sub!('com/b/', 'com/')
151
+
152
+ path_match = url.match(/^http:\/\/plus\.google\.com\/([^\/]+)/)
153
+ return url unless path_match
154
+
155
+ "http://plus.google.com/#{path_match[1]}"
156
+ end
157
+
158
+ def self.flickr(url)
159
+ user_match = url.match(%r{flickr\.com/(photos/|people/)?([^/]+)})
160
+ return url unless user_match
161
+
162
+ "http://flickr.com/#{user_match[2]}"
163
+ end
164
+
165
+ def self.pinterest(url)
166
+ url
167
+ end
168
+
169
+ def self.yelp(url)
170
+ url
171
+ end
172
+ end
@@ -0,0 +1,345 @@
1
+ require 'rspec'
2
+ require 'url_scrubber'
3
+
4
+ describe UrlScrubber do
5
+ describe '.scrub' do
6
+ it 'should downcase all domain stuff' do
7
+ UrlScrubber.scrub("http://ExAmple.COM").should eq('http://example.com')
8
+ end
9
+
10
+ it 'should remove whitespace and stray characters around an url' do
11
+ UrlScrubber.scrub("d http://example.com\t").should eq("http://example.com")
12
+ end
13
+
14
+ it 'should turn an https url into http' do
15
+ UrlScrubber.scrub('https://example.com').should eq('http://example.com')
16
+ end
17
+
18
+ it 'should remove trailing slashes' do
19
+ UrlScrubber.scrub('http://example.com/').should eq('http://example.com')
20
+ end
21
+
22
+ it 'should remove #!/' do
23
+ UrlScrubber.scrub('http://example.com/#!/page').should eq('http://example.com/page')
24
+ end
25
+
26
+ it 'should drop #anchors' do
27
+ UrlScrubber.scrub('http://example.com/page#query:example+pages').should eq('http://example.com/page')
28
+ end
29
+
30
+ it 'should drop www' do
31
+ UrlScrubber.scrub('http://www.example.com/page').should eq('http://example.com/page')
32
+ end
33
+
34
+ describe 'with youtube urls' do
35
+ it 'should drop /user/ if it exists' do
36
+ UrlScrubber.scrub('http://youtube.com/user/absolutely').should eq('http://youtube.com/absolutely')
37
+ end
38
+
39
+ it 'should handle /profile?user= urls' do
40
+ UrlScrubber.scrub('http://youtube.com/profile?user=absolutely').should eq('http://youtube.com/absolutely')
41
+ end
42
+
43
+ it 'should not kick in for a non-youtube url' do
44
+ UrlScrubber.scrub('http://example.com/user/absolutely').should_not eq('http://example.com/absolutely')
45
+ end
46
+ end
47
+
48
+ describe 'with twitter urls' do
49
+ it 'should drop @ from in front of username' do
50
+ UrlScrubber.scrub('http://twitter.com/@absolutely').should eq('http://twitter.com/absolutely')
51
+ end
52
+
53
+ it 'should not kick in for a non-twitter url' do
54
+ UrlScrubber.scrub('http://example.com/@absolutely').should_not eq('http://example.com/absolutely')
55
+ end
56
+
57
+ it 'should transform /search/realtime/%40username into that username' do
58
+ UrlScrubber.scrub('http://twitter.com/search/realtime/%40absolutely').should eq('http://twitter.com/absolutely')
59
+ end
60
+
61
+ it 'should handle similar search urls' do
62
+ UrlScrubber.scrub('http://twitter.com/search/realtime/@absolutely').should eq('http://twitter.com/absolutely')
63
+ UrlScrubber.scrub('http://twitter.com/search/%40absolutely').should eq('http://twitter.com/absolutely')
64
+ UrlScrubber.scrub('http://twitter.com/search/@absolutely').should eq('http://twitter.com/absolutely')
65
+ UrlScrubber.scrub('http://twitter.com/search/realtime?q=%40absolutely').should eq('http://twitter.com/absolutely')
66
+ end
67
+
68
+ it "should transform user statuses into that user's profile" do
69
+ UrlScrubber.scrub('http://twitter.com/absolutely/statuses/135243243261312').should eq('http://twitter.com/absolutely')
70
+ end
71
+ end
72
+
73
+ describe 'with facebook urls' do
74
+ it 'should drop /home.php?#!/ from the beginning of the path' do
75
+ UrlScrubber.scrub('http://facebook.com/home.php?#!/person.name').should eq('http://facebook.com/person.name')
76
+ end
77
+
78
+ it 'should drop www. from the beginning' do
79
+ UrlScrubber.scrub('http://www.facebook.com/person.name').should eq('http://facebook.com/person.name')
80
+ end
81
+
82
+ it 'should drop an url query' do
83
+ UrlScrubber.scrub('http://facebook.com/person.name?ref=pb').should eq('http://facebook.com/person.name')
84
+ end
85
+
86
+ it 'should not kick in for a non-facebook url' do
87
+ UrlScrubber.scrub('http://example.com/home.php?#!/person.name').should_not eq('http://example.com/person.name')
88
+ end
89
+ end
90
+
91
+ describe 'with linkedin urls' do
92
+ it 'should change /companies/ to /company/' do
93
+ UrlScrubber.scrub('http://linkedin.com/companies/1337').should eq('http://linkedin.com/company/1337')
94
+ end
95
+
96
+ it 'should drop www. from the beginning' do
97
+ UrlScrubber.scrub('http://www.linkedin.com/company/1337').should eq('http://linkedin.com/company/1337')
98
+ end
99
+
100
+ it 'should drop query parameters from the end of a company url' do
101
+ UrlScrubber.scrub('http://linkedin.com/company/1337?trk=tyah').should eq('http://linkedin.com/company/1337')
102
+ end
103
+
104
+ it 'should not drop query parameters a profile view url' do
105
+ UrlScrubber.scrub('http://www.linkedin.com/profile/view?id=12341324').should eq('http://linkedin.com/profile/view?id=12341324')
106
+ end
107
+
108
+ it 'should not kick in for a non-linkedin url' do
109
+ UrlScrubber.scrub('http://example.com/companies/1337').should_not eq('http://example.com/company/1337')
110
+ end
111
+ end
112
+
113
+ describe 'with google plus urls' do
114
+ it 'should drop anything after the first significant path component' do
115
+ UrlScrubber.scrub('http://plus.google.com/+SomeName/posts').should eq('http://plus.google.com/+SomeName')
116
+ end
117
+
118
+ it 'should drop u/0/b/ from the beginning of the path' do
119
+ UrlScrubber.scrub(
120
+ 'http://plus.google.com/u/0/b/111111111111111111111'
121
+ ).should eq(
122
+ 'http://plus.google.com/111111111111111111111'
123
+ )
124
+ end
125
+
126
+ it 'should drop u/0/ from the beginning of the path' do
127
+ UrlScrubber.scrub(
128
+ 'https://plus.google.com/u/0/5432123454135/posts'
129
+ ).should eq(
130
+ 'http://plus.google.com/5432123454135'
131
+ )
132
+ end
133
+
134
+ it 'should not kick in for a non-google-plus url' do
135
+ UrlScrubber.scrub('http://example.com/+SomeName/posts').should_not eq('http://example.com/+SomeName')
136
+ end
137
+ end
138
+
139
+ describe 'with slideshare urls' do
140
+ pending
141
+ end
142
+
143
+ describe 'with flickr urls' do
144
+ it 'should drop /photos/ from profile names' do
145
+ UrlScrubber.scrub('http://flickr.com/photos/username/').should eq('http://flickr.com/username')
146
+ end
147
+
148
+ it 'should drop /people/ from profile names' do
149
+ UrlScrubber.scrub('http://flickr.com/people/username/').should eq('http://flickr.com/username')
150
+ end
151
+
152
+ it 'should drop www. from the beginning' do
153
+ UrlScrubber.scrub('http://www.flickr.com/username').should eq('http://flickr.com/username')
154
+ end
155
+
156
+ it 'should drop path components after the username' do
157
+ UrlScrubber.scrub('http://flickr.com/username/favorites').should eq('http://flickr.com/username')
158
+ end
159
+
160
+ it 'should not kick in for a non-flickr url' do
161
+ UrlScrubber.scrub('http://example.com/photos/username/').should_not eq('http://example.com/username')
162
+ end
163
+ end
164
+
165
+ describe 'with pinterest urls' do
166
+ it 'should drop www. from the beginning' do
167
+ UrlScrubber.scrub('http://www.pinterest.com/username').should eq('http://pinterest.com/username')
168
+ end
169
+ end
170
+
171
+ describe 'with yelp urls' do
172
+ it 'should drop www. from the beginning' do
173
+ UrlScrubber.scrub('http://www.yelp.com/biz/very-important-business').should eq('http://yelp.com/biz/very-important-business')
174
+ end
175
+ end
176
+
177
+ describe 'with vimeo urls' do
178
+ pending
179
+ end
180
+ end
181
+
182
+ describe '.service_of' do
183
+ it 'returns :youtube for YouTube urls' do
184
+ UrlScrubber.service_of('http://youtube.com/absolutely').should eq :youtube
185
+ end
186
+
187
+ it 'returns :twitter for Twitter urls' do
188
+ UrlScrubber.service_of('http://twitter.com/absolutely').should eq :twitter
189
+ end
190
+
191
+ it 'returns :facebook for Facebook urls' do
192
+ UrlScrubber.service_of('http://facebook.com/person.name').should eq :facebook
193
+ end
194
+
195
+ it 'returns :linkedin for LinkedIn urls' do
196
+ UrlScrubber.service_of('http://linkedin.com/company/1337').should eq :linkedin
197
+ end
198
+
199
+ it 'returns :google for Google+ urls' do
200
+ UrlScrubber.service_of('http://plus.google.com/+SomeName').should eq :google
201
+ end
202
+
203
+ it 'returns :slideshare for SlideShare urls' do
204
+ UrlScrubber.service_of('http://slideshare.net/absolutely').should eq :slideshare
205
+ end
206
+
207
+ it 'returns :flickr for Flickr urls' do
208
+ UrlScrubber.service_of('http://flickr.com/username').should eq :flickr
209
+ end
210
+
211
+ it 'returns :pinterest for Pinterest urls' do
212
+ UrlScrubber.service_of('http://pinterest.com/username').should eq :pinterest
213
+ end
214
+
215
+ it 'returns :yelp for Yelp urls' do
216
+ UrlScrubber.service_of('http://yelp.com/very-important-business').should eq :yelp
217
+ end
218
+
219
+ it 'returns :vimeo for Vimeo urls' do
220
+ UrlScrubber.service_of('http://vimeo.com/absolutely').should eq :vimeo
221
+ end
222
+
223
+ it 'returns :other for other urls' do
224
+ UrlScrubber.service_of('http://example.com/page').should eq :other
225
+ end
226
+ end
227
+
228
+ describe '.ideal_form?' do
229
+ it 'scrubs the url beforehand' do
230
+ UrlScrubber.should_receive(:scrub).with('http://example.com/some-page/').and_return('http://example.com/some-page')
231
+ UrlScrubber.ideal_form?('http://example.com/some-page/')
232
+ end
233
+
234
+ describe 'for youtube' do
235
+ it 'returns true for apparent channel urls' do
236
+ UrlScrubber.ideal_form?('http://youtube.com/absolutely').should be_true
237
+ end
238
+
239
+ it 'returns false for videos' do
240
+ UrlScrubber.ideal_form?('http://youtube.com/watch?v=vRGMAW1wzQ8').should be_false
241
+ end
242
+ end
243
+
244
+ describe 'for twitter' do
245
+ it 'returns true for apparent user urls' do
246
+ UrlScrubber.ideal_form?('http://twitter.com/absolutely').should be_true
247
+ end
248
+
249
+ it 'returns false for other pages' do
250
+ UrlScrubber.ideal_form?('http://twitter.com/').should be_false
251
+ end
252
+ end
253
+
254
+ describe 'for facebook' do
255
+ it 'returns true for apparent user urls' do
256
+ UrlScrubber.ideal_form?('http://facebook.com/person.name').should be_true
257
+ UrlScrubber.ideal_form?('http://facebook.com/profile.php?id=543123521').should be_true
258
+ end
259
+
260
+ it 'returns false for other urls' do
261
+ UrlScrubber.ideal_form?('http://facebook.com/').should be_false
262
+ end
263
+ end
264
+
265
+ describe 'for linkedin' do
266
+ it 'returns true for apparent company urls' do
267
+ UrlScrubber.ideal_form?('http://linkedin.com/company/1337').should be_true
268
+ UrlScrubber.ideal_form?('http://www.linkedin.com/profile/view?id=12341324').should be_true
269
+ UrlScrubber.ideal_form?('http://linkedin.com/company/brand-name').should be_true
270
+ end
271
+
272
+ it 'returns false for other urls' do
273
+ UrlScrubber.ideal_form?('http://linkedin.com/jobs/c-Cisco').should be_false
274
+ end
275
+ end
276
+
277
+ describe 'for google plus' do
278
+ it 'returns true for apparent person urls' do
279
+ UrlScrubber.ideal_form?('http://plus.google.com/+SomeName').should be_true
280
+ UrlScrubber.ideal_form?('http://plus.google.com/u/0/b/111111111111111111111').should be_true
281
+ UrlScrubber.ideal_form?('https://plus.google.com/u/0/5432123454135/posts').should be_true
282
+ end
283
+
284
+ it 'returns false for other urls' do
285
+ UrlScrubber.ideal_form?('http://plus.google.com/').should be_false
286
+ end
287
+ end
288
+
289
+ describe 'for slideshare' do
290
+ it 'returns true for apparent user urls' do
291
+ UrlScrubber.ideal_form?('http://slideshare.net/absolutely').should be_true
292
+ end
293
+
294
+ it 'returns false for other urls' do
295
+ UrlScrubber.ideal_form?('http://slideshare.net/absolutely/how-to-create-great-slides-for-presentations').should be_false
296
+ end
297
+ end
298
+
299
+ describe 'for flickr' do
300
+ it 'returns true for apparent user urls' do
301
+ UrlScrubber.ideal_form?('http://flickr.com/username').should be_true
302
+ end
303
+
304
+ it 'returns false for other urls' do
305
+ UrlScrubber.ideal_form?('http://flickr.com/').should be_false
306
+ end
307
+ end
308
+
309
+ describe 'for pinterest' do
310
+ it 'returns true for apparent user urls' do
311
+ UrlScrubber.ideal_form?('http://pinterest.com/username').should be_true
312
+ end
313
+
314
+ it 'returns false for other urls' do
315
+ UrlScrubber.ideal_form?('http://pinterest.com/pin/532412513451524').should be_false
316
+ end
317
+ end
318
+
319
+ describe 'for yelp' do
320
+ it 'returns true for apparent business urls' do
321
+ UrlScrubber.ideal_form?('http://yelp.com/very-important-business').should be_true
322
+ end
323
+
324
+ it 'returns false for other urls' do
325
+ UrlScrubber.ideal_form?('http://yelp.com/user_details?userid=Aheunaobuh-huoanuhbAU').should be_false
326
+ end
327
+ end
328
+
329
+ describe 'for vimeo' do
330
+ it 'returns true for apparent user urls' do
331
+ UrlScrubber.ideal_form?('http://vimeo.com/absolutely').should be_true
332
+ end
333
+
334
+ it 'returns false for video urls' do
335
+ UrlScrubber.ideal_form?('http://vimeo.com/45453874578').should be_false
336
+ end
337
+ end
338
+
339
+ describe 'for other sites' do
340
+ it 'returns true for any other site, really' do
341
+ UrlScrubber.ideal_form?('http://example.com/absolutely/anything').should be_true
342
+ end
343
+ end
344
+ end
345
+ end
@@ -0,0 +1,24 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/url_scrubber/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Colin Langton", "Christopher Maujean"]
6
+ gem.email = ["colin@hoteldelta.net", "cmaujean@brandle.net"]
7
+ gem.description = %q{Remove extraneous bits from URLs, follow redirects, identify social media urls, etc.}
8
+ gem.summary = %q{Clean up URLs.}
9
+ gem.homepage = "http://brandle.net"
10
+
11
+ gem.files = `git ls-files`.split($\)
12
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
+ gem.name = "url_scrubber"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = UrlScrubber::VERSION
17
+
18
+ # testing
19
+ gem.add_development_dependency 'rspec', '~> 2.11.0'
20
+ gem.add_development_dependency 'guard-bundler', "~> 0.1.3"
21
+ gem.add_development_dependency 'guard-rspec', "~> 0.4.3"
22
+ gem.add_development_dependency 'terminal-notifier-guard'
23
+ gem.add_development_dependency 'rb-fsevent', '~> 0.9.1'
24
+ end
metadata ADDED
@@ -0,0 +1,114 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: url_scrubber
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.7.3
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Colin Langton
9
+ - Christopher Maujean
10
+ autorequire:
11
+ bindir: bin
12
+ cert_chain: []
13
+ date: 2013-06-06 00:00:00.000000000 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: rspec
17
+ requirement: &2152156960 !ruby/object:Gem::Requirement
18
+ none: false
19
+ requirements:
20
+ - - ~>
21
+ - !ruby/object:Gem::Version
22
+ version: 2.11.0
23
+ type: :development
24
+ prerelease: false
25
+ version_requirements: *2152156960
26
+ - !ruby/object:Gem::Dependency
27
+ name: guard-bundler
28
+ requirement: &2152155880 !ruby/object:Gem::Requirement
29
+ none: false
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: 0.1.3
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: *2152155880
37
+ - !ruby/object:Gem::Dependency
38
+ name: guard-rspec
39
+ requirement: &2152154720 !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ~>
43
+ - !ruby/object:Gem::Version
44
+ version: 0.4.3
45
+ type: :development
46
+ prerelease: false
47
+ version_requirements: *2152154720
48
+ - !ruby/object:Gem::Dependency
49
+ name: terminal-notifier-guard
50
+ requirement: &2152153980 !ruby/object:Gem::Requirement
51
+ none: false
52
+ requirements:
53
+ - - ! '>='
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ type: :development
57
+ prerelease: false
58
+ version_requirements: *2152153980
59
+ - !ruby/object:Gem::Dependency
60
+ name: rb-fsevent
61
+ requirement: &2152153300 !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ~>
65
+ - !ruby/object:Gem::Version
66
+ version: 0.9.1
67
+ type: :development
68
+ prerelease: false
69
+ version_requirements: *2152153300
70
+ description: Remove extraneous bits from URLs, follow redirects, identify social media
71
+ urls, etc.
72
+ email:
73
+ - colin@hoteldelta.net
74
+ - cmaujean@brandle.net
75
+ executables: []
76
+ extensions: []
77
+ extra_rdoc_files: []
78
+ files:
79
+ - .gitignore
80
+ - .rvmrc
81
+ - Gemfile
82
+ - Guardfile
83
+ - README.md
84
+ - Rakefile
85
+ - lib/url_scrubber.rb
86
+ - lib/url_scrubber/version.rb
87
+ - spec/url_scrubber_spec.rb
88
+ - url_scrubber.gemspec
89
+ homepage: http://brandle.net
90
+ licenses: []
91
+ post_install_message:
92
+ rdoc_options: []
93
+ require_paths:
94
+ - lib
95
+ required_ruby_version: !ruby/object:Gem::Requirement
96
+ none: false
97
+ requirements:
98
+ - - ! '>='
99
+ - !ruby/object:Gem::Version
100
+ version: '0'
101
+ required_rubygems_version: !ruby/object:Gem::Requirement
102
+ none: false
103
+ requirements:
104
+ - - ! '>='
105
+ - !ruby/object:Gem::Version
106
+ version: '0'
107
+ requirements: []
108
+ rubyforge_project:
109
+ rubygems_version: 1.8.16
110
+ signing_key:
111
+ specification_version: 3
112
+ summary: Clean up URLs.
113
+ test_files:
114
+ - spec/url_scrubber_spec.rb