url_scrubber 0.7.3

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,33 @@
1
+ **.orig
2
+ *.gem
3
+ *.rbc
4
+ *.sassc
5
+ .bundle
6
+ .config
7
+ .DS_Store
8
+ .rspec
9
+ .sass-cache
10
+ .yardoc
11
+ /.bundle
12
+ /coverage/
13
+ /db/*.sqlite3
14
+ /log/*
15
+ /public/system/*
16
+ /spec/tmp/*
17
+ /tmp/*
18
+ /vendor/bundle
19
+ _yardoc
20
+ capybara-*.html
21
+ coverage
22
+ doc/
23
+ Gemfile.lock
24
+ InstalledFiles
25
+ lib/bundler/man
26
+ pickle-email-*.html
27
+ pkg
28
+ rdoc
29
+ rerun.txt
30
+ spec/reports
31
+ test/tmp
32
+ test/version_tmp
33
+ tmp
data/.rvmrc ADDED
@@ -0,0 +1,48 @@
1
+ #!/usr/bin/env bash
2
+
3
+ # This is an RVM Project .rvmrc file, used to automatically load the ruby
4
+ # development environment upon cd'ing into the directory
5
+
6
+ # First we specify our desired <ruby>[@<gemset>], the @gemset name is optional,
7
+ # Only full ruby name is supported here, for short names use:
8
+ # echo "rvm use 1.9.3" > .rvmrc
9
+ environment_id="ruby-1.9.3-p194@url_scrubber"
10
+
11
+ # Uncomment the following lines if you want to verify rvm version per project
12
+ # rvmrc_rvm_version="1.16.19 (stable)" # 1.10.1 seams as a safe start
13
+ # eval "$(echo ${rvm_version}.${rvmrc_rvm_version} | awk -F. '{print "[[ "$1*65536+$2*256+$3" -ge "$4*65536+$5*256+$6" ]]"}' )" || {
14
+ # echo "This .rvmrc file requires at least RVM ${rvmrc_rvm_version}, aborting loading."
15
+ # return 1
16
+ # }
17
+
18
+ # First we attempt to load the desired environment directly from the environment
19
+ # file. This is very fast and efficient compared to running through the entire
20
+ # CLI and selector. If you want feedback on which environment was used then
21
+ # insert the word 'use' after --create as this triggers verbose mode.
22
+ if [[ -d "${rvm_path:-$HOME/.rvm}/environments"
23
+ && -s "${rvm_path:-$HOME/.rvm}/environments/$environment_id" ]]
24
+ then
25
+ \. "${rvm_path:-$HOME/.rvm}/environments/$environment_id"
26
+ [[ -s "${rvm_path:-$HOME/.rvm}/hooks/after_use" ]] &&
27
+ \. "${rvm_path:-$HOME/.rvm}/hooks/after_use" || true
28
+ else
29
+ # If the environment file has not yet been created, use the RVM CLI to select.
30
+ rvm --create "$environment_id" || {
31
+ echo "Failed to create RVM environment '${environment_id}'."
32
+ return 1
33
+ }
34
+ fi
35
+
36
+ # If you use bundler, this might be useful to you:
37
+ # if [[ -s Gemfile ]] && {
38
+ # ! builtin command -v bundle >/dev/null ||
39
+ # builtin command -v bundle | GREP_OPTIONS= \grep $rvm_path/bin/bundle >/dev/null
40
+ # }
41
+ # then
42
+ # printf "%b" "The rubygem 'bundler' is not installed. Installing it now.\n"
43
+ # gem install bundler
44
+ # fi
45
+ # if [[ -s Gemfile ]] && builtin command -v bundle >/dev/null
46
+ # then
47
+ # bundle install | GREP_OPTIONS= \grep -vE '^Using|Your bundle is complete'
48
+ # fi
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in url_scrubber.gemspec
4
+ gemspec
data/Guardfile ADDED
@@ -0,0 +1,14 @@
1
+ # A sample Guardfile
2
+ # More info at https://github.com/guard/guard#readme
3
+
4
+ guard 'bundler' do
5
+ watch('Gemfile')
6
+ # Uncomment next line if Gemfile contain `gemspec' command
7
+ # watch(/^.+\.gemspec/)
8
+ end
9
+
10
+ guard 'rspec', :version => 2, :cli => '-c' do
11
+ watch(%r{^spec/.+_spec\.rb$})
12
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
13
+ watch('spec/spec_helper.rb') { "spec" }
14
+ end
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # UrlScrubber
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'url_scrubber'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install url_scrubber
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+
4
+ require 'rspec'
5
+ require 'rspec/core/rake_task'
6
+
7
+ desc "Run all examples"
8
+ RSpec::Core::RakeTask.new(:spec) do |t|
9
+ t.pattern = 'spec/**/*_spec.rb'
10
+ end
@@ -0,0 +1,3 @@
1
+ module UrlScrubber
2
+ VERSION = "0.7.3"
3
+ end
@@ -0,0 +1,172 @@
1
+ require "url_scrubber/version"
2
+
3
+ module UrlScrubber
4
+ def self.scrub(url)
5
+ url = url.clone # don't modify the original argument
6
+ m = url.match(/(https?:\/\/\S*)/i)
7
+ return nil unless m
8
+
9
+ url = m[1]
10
+ url.sub!(/^https/, 'http')
11
+ url.sub!(/\/+$/, '')
12
+ url.sub!('#!/', '')
13
+ url = downcase_domain(url)
14
+ remove_www!(url)
15
+ drop_anchor!(special_cases(url))
16
+ end
17
+
18
+ def self.service_of(url)
19
+ domain_match = url.match(%r{https?://([^/]+)})
20
+
21
+ if domain_match
22
+ domain = domain_match[1]
23
+
24
+ case domain
25
+ when /\byoutube\.com$/ then return :youtube
26
+ when /\btwitter\.com$/ then return :twitter
27
+ when /\bfacebook\.com$/ then return :facebook
28
+ when /\blinkedin\.com$/ then return :linkedin
29
+ when /\bplus\.google\.com$/ then return :google
30
+ when /\bslideshare\.net$/ then return :slideshare
31
+ when /\bflickr\.com$/ then return :flickr
32
+ when /\bpinterest\.com$/ then return :pinterest
33
+ when /\bvimeo\.com$/ then return :vimeo
34
+ when /\byelp\.com$/ then return :yelp
35
+ end
36
+ end
37
+
38
+ :other
39
+ end
40
+
41
+ def self.ideal_form?(url)
42
+ url = scrub(url)
43
+ return false unless url
44
+
45
+ case service_of(url)
46
+ when :youtube
47
+ !!url.match(%r{^http://youtube\.com/[\w_-]+$})
48
+ when :twitter
49
+ !!url.match(%r{^http://twitter\.com/[\w_]+$})
50
+ when :facebook
51
+ !!url.match(%r{^http://facebook\.com/(profile\.php?id=\d+|[\w_\.-]+)$})
52
+ when :linkedin
53
+ !!url.match(%r{^http://linkedin\.com/(company/[\w_-]+|profile/view\?id=\d+)$})
54
+ when :google
55
+ !!url.match(%r{^http://plus\.google\.com/(\+[\w_-]+|\d+)$})
56
+ when :slideshare
57
+ !!url.match(%r{^http://slideshare\.net/[\w_-]+$})
58
+ when :flickr
59
+ !!url.match(%r{^http://flickr\.com/[\w_-]+$})
60
+ when :pinterest
61
+ !!url.match(%r{^http://pinterest\.com/[\w_-]+$})
62
+ when :yelp
63
+ !!url.match(%r{^http://yelp\.com/[\w_-]+$})
64
+ when :vimeo
65
+ !!url.match(%r{^http://vimeo\.com/[\w_-]+$}) && !url.match(%r{/\d+$})
66
+ else
67
+ true
68
+ end
69
+ end
70
+
71
+ private
72
+
73
+ def self.downcase_domain(url)
74
+ domain_match = url.match(%r{http://[^/]+}i)
75
+ domain_match[0].downcase + domain_match.post_match
76
+ end
77
+
78
+ def self.special_cases(url)
79
+ case service_of(url)
80
+ when :youtube then return youtube(url)
81
+ when :twitter then return twitter(url)
82
+ when :facebook then return facebook(url)
83
+ when :linkedin then return linkedin(url)
84
+ when :google then return google_plus(url)
85
+ when :flickr then return flickr(url)
86
+ when :pinterest then return pinterest(url)
87
+ when :yelp then return yelp(url)
88
+ end
89
+
90
+ url
91
+ end
92
+
93
+ def self.remove_www!(url)
94
+ url.sub!(%r{://www\d*\.}, '://')
95
+ url
96
+ end
97
+
98
+ def self.drop_url_query!(url)
99
+ url.sub!(/\?.*$/, '')
100
+ url
101
+ end
102
+
103
+ def self.drop_anchor!(url)
104
+ url.sub!(/#.*$/, '')
105
+ url
106
+ end
107
+
108
+ def self.youtube(url)
109
+ url.sub!('youtube.com/user/', 'youtube.com/')
110
+ url.sub!('youtube.com/profile?user=', 'youtube.com/')
111
+ url
112
+ end
113
+
114
+ def self.twitter(url)
115
+ url.sub!('twitter.com/@', 'twitter.com/')
116
+
117
+ status_match = url.match(%r{(twitter\.com/[^/]+)/statuses/\d+})
118
+ if status_match
119
+ url = "http://#{status_match[1]}"
120
+ end
121
+
122
+ search_match = url.match(%r{twitter\.com/search(?:/realtime)?(?:/|\?q=)(?:@|%40)(\S*)$})
123
+ if search_match
124
+ url = "http://twitter.com/#{search_match[1]}"
125
+ end
126
+
127
+ url
128
+ end
129
+
130
+ def self.facebook(url)
131
+ if url.match("/media/albums") || url.match("/media/set")
132
+ url = url.match('\&') ? url.split('&',2)[0] : url
133
+ else
134
+ url.sub!(/facebook\.com\/home\.php[\?#!\/]+/, 'facebook.com/')
135
+ url = drop_url_query!(url)
136
+ end
137
+ url
138
+ end
139
+
140
+ def self.linkedin(url)
141
+
142
+ url.sub!('linkedin.com/companies/', 'linkedin.com/company/')
143
+ drop_url_query!(url) if !!url.match(%r{com/company/})
144
+ url
145
+ end
146
+
147
+ def self.google_plus(url)
148
+ url.sub!('com/u/0/b/', 'com/')
149
+ url.sub!('com/u/0/', 'com/')
150
+ url.sub!('com/b/', 'com/')
151
+
152
+ path_match = url.match(/^http:\/\/plus\.google\.com\/([^\/]+)/)
153
+ return url unless path_match
154
+
155
+ "http://plus.google.com/#{path_match[1]}"
156
+ end
157
+
158
+ def self.flickr(url)
159
+ user_match = url.match(%r{flickr\.com/(photos/|people/)?([^/]+)})
160
+ return url unless user_match
161
+
162
+ "http://flickr.com/#{user_match[2]}"
163
+ end
164
+
165
+ def self.pinterest(url)
166
+ url
167
+ end
168
+
169
+ def self.yelp(url)
170
+ url
171
+ end
172
+ end
@@ -0,0 +1,345 @@
1
+ require 'rspec'
2
+ require 'url_scrubber'
3
+
4
+ describe UrlScrubber do
5
+ describe '.scrub' do
6
+ it 'should downcase all domain stuff' do
7
+ UrlScrubber.scrub("http://ExAmple.COM").should eq('http://example.com')
8
+ end
9
+
10
+ it 'should remove whitespace and stray characters around an url' do
11
+ UrlScrubber.scrub("d http://example.com\t").should eq("http://example.com")
12
+ end
13
+
14
+ it 'should turn an https url into http' do
15
+ UrlScrubber.scrub('https://example.com').should eq('http://example.com')
16
+ end
17
+
18
+ it 'should remove trailing slashes' do
19
+ UrlScrubber.scrub('http://example.com/').should eq('http://example.com')
20
+ end
21
+
22
+ it 'should remove #!/' do
23
+ UrlScrubber.scrub('http://example.com/#!/page').should eq('http://example.com/page')
24
+ end
25
+
26
+ it 'should drop #anchors' do
27
+ UrlScrubber.scrub('http://example.com/page#query:example+pages').should eq('http://example.com/page')
28
+ end
29
+
30
+ it 'should drop www' do
31
+ UrlScrubber.scrub('http://www.example.com/page').should eq('http://example.com/page')
32
+ end
33
+
34
+ describe 'with youtube urls' do
35
+ it 'should drop /user/ if it exists' do
36
+ UrlScrubber.scrub('http://youtube.com/user/absolutely').should eq('http://youtube.com/absolutely')
37
+ end
38
+
39
+ it 'should handle /profile?user= urls' do
40
+ UrlScrubber.scrub('http://youtube.com/profile?user=absolutely').should eq('http://youtube.com/absolutely')
41
+ end
42
+
43
+ it 'should not kick in for a non-youtube url' do
44
+ UrlScrubber.scrub('http://example.com/user/absolutely').should_not eq('http://example.com/absolutely')
45
+ end
46
+ end
47
+
48
+ describe 'with twitter urls' do
49
+ it 'should drop @ from in front of username' do
50
+ UrlScrubber.scrub('http://twitter.com/@absolutely').should eq('http://twitter.com/absolutely')
51
+ end
52
+
53
+ it 'should not kick in for a non-twitter url' do
54
+ UrlScrubber.scrub('http://example.com/@absolutely').should_not eq('http://example.com/absolutely')
55
+ end
56
+
57
+ it 'should transform /search/realtime/%40username into that username' do
58
+ UrlScrubber.scrub('http://twitter.com/search/realtime/%40absolutely').should eq('http://twitter.com/absolutely')
59
+ end
60
+
61
+ it 'should handle similar search urls' do
62
+ UrlScrubber.scrub('http://twitter.com/search/realtime/@absolutely').should eq('http://twitter.com/absolutely')
63
+ UrlScrubber.scrub('http://twitter.com/search/%40absolutely').should eq('http://twitter.com/absolutely')
64
+ UrlScrubber.scrub('http://twitter.com/search/@absolutely').should eq('http://twitter.com/absolutely')
65
+ UrlScrubber.scrub('http://twitter.com/search/realtime?q=%40absolutely').should eq('http://twitter.com/absolutely')
66
+ end
67
+
68
+ it "should transform user statuses into that user's profile" do
69
+ UrlScrubber.scrub('http://twitter.com/absolutely/statuses/135243243261312').should eq('http://twitter.com/absolutely')
70
+ end
71
+ end
72
+
73
+ describe 'with facebook urls' do
74
+ it 'should drop /home.php?#!/ from the beginning of the path' do
75
+ UrlScrubber.scrub('http://facebook.com/home.php?#!/person.name').should eq('http://facebook.com/person.name')
76
+ end
77
+
78
+ it 'should drop www. from the beginning' do
79
+ UrlScrubber.scrub('http://www.facebook.com/person.name').should eq('http://facebook.com/person.name')
80
+ end
81
+
82
+ it 'should drop an url query' do
83
+ UrlScrubber.scrub('http://facebook.com/person.name?ref=pb').should eq('http://facebook.com/person.name')
84
+ end
85
+
86
+ it 'should not kick in for a non-facebook url' do
87
+ UrlScrubber.scrub('http://example.com/home.php?#!/person.name').should_not eq('http://example.com/person.name')
88
+ end
89
+ end
90
+
91
+ describe 'with linkedin urls' do
92
+ it 'should change /companies/ to /company/' do
93
+ UrlScrubber.scrub('http://linkedin.com/companies/1337').should eq('http://linkedin.com/company/1337')
94
+ end
95
+
96
+ it 'should drop www. from the beginning' do
97
+ UrlScrubber.scrub('http://www.linkedin.com/company/1337').should eq('http://linkedin.com/company/1337')
98
+ end
99
+
100
+ it 'should drop query parameters from the end of a company url' do
101
+ UrlScrubber.scrub('http://linkedin.com/company/1337?trk=tyah').should eq('http://linkedin.com/company/1337')
102
+ end
103
+
104
+ it 'should not drop query parameters a profile view url' do
105
+ UrlScrubber.scrub('http://www.linkedin.com/profile/view?id=12341324').should eq('http://linkedin.com/profile/view?id=12341324')
106
+ end
107
+
108
+ it 'should not kick in for a non-linkedin url' do
109
+ UrlScrubber.scrub('http://example.com/companies/1337').should_not eq('http://example.com/company/1337')
110
+ end
111
+ end
112
+
113
+ describe 'with google plus urls' do
114
+ it 'should drop anything after the first significant path component' do
115
+ UrlScrubber.scrub('http://plus.google.com/+SomeName/posts').should eq('http://plus.google.com/+SomeName')
116
+ end
117
+
118
+ it 'should drop u/0/b/ from the beginning of the path' do
119
+ UrlScrubber.scrub(
120
+ 'http://plus.google.com/u/0/b/111111111111111111111'
121
+ ).should eq(
122
+ 'http://plus.google.com/111111111111111111111'
123
+ )
124
+ end
125
+
126
+ it 'should drop u/0/ from the beginning of the path' do
127
+ UrlScrubber.scrub(
128
+ 'https://plus.google.com/u/0/5432123454135/posts'
129
+ ).should eq(
130
+ 'http://plus.google.com/5432123454135'
131
+ )
132
+ end
133
+
134
+ it 'should not kick in for a non-google-plus url' do
135
+ UrlScrubber.scrub('http://example.com/+SomeName/posts').should_not eq('http://example.com/+SomeName')
136
+ end
137
+ end
138
+
139
+ describe 'with slideshare urls' do
140
+ pending
141
+ end
142
+
143
+ describe 'with flickr urls' do
144
+ it 'should drop /photos/ from profile names' do
145
+ UrlScrubber.scrub('http://flickr.com/photos/username/').should eq('http://flickr.com/username')
146
+ end
147
+
148
+ it 'should drop /people/ from profile names' do
149
+ UrlScrubber.scrub('http://flickr.com/people/username/').should eq('http://flickr.com/username')
150
+ end
151
+
152
+ it 'should drop www. from the beginning' do
153
+ UrlScrubber.scrub('http://www.flickr.com/username').should eq('http://flickr.com/username')
154
+ end
155
+
156
+ it 'should drop path components after the username' do
157
+ UrlScrubber.scrub('http://flickr.com/username/favorites').should eq('http://flickr.com/username')
158
+ end
159
+
160
+ it 'should not kick in for a non-flickr url' do
161
+ UrlScrubber.scrub('http://example.com/photos/username/').should_not eq('http://example.com/username')
162
+ end
163
+ end
164
+
165
+ describe 'with pinterest urls' do
166
+ it 'should drop www. from the beginning' do
167
+ UrlScrubber.scrub('http://www.pinterest.com/username').should eq('http://pinterest.com/username')
168
+ end
169
+ end
170
+
171
+ describe 'with yelp urls' do
172
+ it 'should drop www. from the beginning' do
173
+ UrlScrubber.scrub('http://www.yelp.com/biz/very-important-business').should eq('http://yelp.com/biz/very-important-business')
174
+ end
175
+ end
176
+
177
+ describe 'with vimeo urls' do
178
+ pending
179
+ end
180
+ end
181
+
182
+ describe '.service_of' do
183
+ it 'returns :youtube for YouTube urls' do
184
+ UrlScrubber.service_of('http://youtube.com/absolutely').should eq :youtube
185
+ end
186
+
187
+ it 'returns :twitter for Twitter urls' do
188
+ UrlScrubber.service_of('http://twitter.com/absolutely').should eq :twitter
189
+ end
190
+
191
+ it 'returns :facebook for Facebook urls' do
192
+ UrlScrubber.service_of('http://facebook.com/person.name').should eq :facebook
193
+ end
194
+
195
+ it 'returns :linkedin for LinkedIn urls' do
196
+ UrlScrubber.service_of('http://linkedin.com/company/1337').should eq :linkedin
197
+ end
198
+
199
+ it 'returns :google for Google+ urls' do
200
+ UrlScrubber.service_of('http://plus.google.com/+SomeName').should eq :google
201
+ end
202
+
203
+ it 'returns :slideshare for SlideShare urls' do
204
+ UrlScrubber.service_of('http://slideshare.net/absolutely').should eq :slideshare
205
+ end
206
+
207
+ it 'returns :flickr for Flickr urls' do
208
+ UrlScrubber.service_of('http://flickr.com/username').should eq :flickr
209
+ end
210
+
211
+ it 'returns :pinterest for Pinterest urls' do
212
+ UrlScrubber.service_of('http://pinterest.com/username').should eq :pinterest
213
+ end
214
+
215
+ it 'returns :yelp for Yelp urls' do
216
+ UrlScrubber.service_of('http://yelp.com/very-important-business').should eq :yelp
217
+ end
218
+
219
+ it 'returns :vimeo for Vimeo urls' do
220
+ UrlScrubber.service_of('http://vimeo.com/absolutely').should eq :vimeo
221
+ end
222
+
223
+ it 'returns :other for other urls' do
224
+ UrlScrubber.service_of('http://example.com/page').should eq :other
225
+ end
226
+ end
227
+
228
+ describe '.ideal_form?' do
229
+ it 'scrubs the url beforehand' do
230
+ UrlScrubber.should_receive(:scrub).with('http://example.com/some-page/').and_return('http://example.com/some-page')
231
+ UrlScrubber.ideal_form?('http://example.com/some-page/')
232
+ end
233
+
234
+ describe 'for youtube' do
235
+ it 'returns true for apparent channel urls' do
236
+ UrlScrubber.ideal_form?('http://youtube.com/absolutely').should be_true
237
+ end
238
+
239
+ it 'returns false for videos' do
240
+ UrlScrubber.ideal_form?('http://youtube.com/watch?v=vRGMAW1wzQ8').should be_false
241
+ end
242
+ end
243
+
244
+ describe 'for twitter' do
245
+ it 'returns true for apparent user urls' do
246
+ UrlScrubber.ideal_form?('http://twitter.com/absolutely').should be_true
247
+ end
248
+
249
+ it 'returns false for other pages' do
250
+ UrlScrubber.ideal_form?('http://twitter.com/').should be_false
251
+ end
252
+ end
253
+
254
+ describe 'for facebook' do
255
+ it 'returns true for apparent user urls' do
256
+ UrlScrubber.ideal_form?('http://facebook.com/person.name').should be_true
257
+ UrlScrubber.ideal_form?('http://facebook.com/profile.php?id=543123521').should be_true
258
+ end
259
+
260
+ it 'returns false for other urls' do
261
+ UrlScrubber.ideal_form?('http://facebook.com/').should be_false
262
+ end
263
+ end
264
+
265
+ describe 'for linkedin' do
266
+ it 'returns true for apparent company urls' do
267
+ UrlScrubber.ideal_form?('http://linkedin.com/company/1337').should be_true
268
+ UrlScrubber.ideal_form?('http://www.linkedin.com/profile/view?id=12341324').should be_true
269
+ UrlScrubber.ideal_form?('http://linkedin.com/company/brand-name').should be_true
270
+ end
271
+
272
+ it 'returns false for other urls' do
273
+ UrlScrubber.ideal_form?('http://linkedin.com/jobs/c-Cisco').should be_false
274
+ end
275
+ end
276
+
277
+ describe 'for google plus' do
278
+ it 'returns true for apparent person urls' do
279
+ UrlScrubber.ideal_form?('http://plus.google.com/+SomeName').should be_true
280
+ UrlScrubber.ideal_form?('http://plus.google.com/u/0/b/111111111111111111111').should be_true
281
+ UrlScrubber.ideal_form?('https://plus.google.com/u/0/5432123454135/posts').should be_true
282
+ end
283
+
284
+ it 'returns false for other urls' do
285
+ UrlScrubber.ideal_form?('http://plus.google.com/').should be_false
286
+ end
287
+ end
288
+
289
+ describe 'for slideshare' do
290
+ it 'returns true for apparent user urls' do
291
+ UrlScrubber.ideal_form?('http://slideshare.net/absolutely').should be_true
292
+ end
293
+
294
+ it 'returns false for other urls' do
295
+ UrlScrubber.ideal_form?('http://slideshare.net/absolutely/how-to-create-great-slides-for-presentations').should be_false
296
+ end
297
+ end
298
+
299
+ describe 'for flickr' do
300
+ it 'returns true for apparent user urls' do
301
+ UrlScrubber.ideal_form?('http://flickr.com/username').should be_true
302
+ end
303
+
304
+ it 'returns false for other urls' do
305
+ UrlScrubber.ideal_form?('http://flickr.com/').should be_false
306
+ end
307
+ end
308
+
309
+ describe 'for pinterest' do
310
+ it 'returns true for apparent user urls' do
311
+ UrlScrubber.ideal_form?('http://pinterest.com/username').should be_true
312
+ end
313
+
314
+ it 'returns false for other urls' do
315
+ UrlScrubber.ideal_form?('http://pinterest.com/pin/532412513451524').should be_false
316
+ end
317
+ end
318
+
319
+ describe 'for yelp' do
320
+ it 'returns true for apparent business urls' do
321
+ UrlScrubber.ideal_form?('http://yelp.com/very-important-business').should be_true
322
+ end
323
+
324
+ it 'returns false for other urls' do
325
+ UrlScrubber.ideal_form?('http://yelp.com/user_details?userid=Aheunaobuh-huoanuhbAU').should be_false
326
+ end
327
+ end
328
+
329
+ describe 'for vimeo' do
330
+ it 'returns true for apparent user urls' do
331
+ UrlScrubber.ideal_form?('http://vimeo.com/absolutely').should be_true
332
+ end
333
+
334
+ it 'returns false for video urls' do
335
+ UrlScrubber.ideal_form?('http://vimeo.com/45453874578').should be_false
336
+ end
337
+ end
338
+
339
+ describe 'for other sites' do
340
+ it 'returns true for any other site, really' do
341
+ UrlScrubber.ideal_form?('http://example.com/absolutely/anything').should be_true
342
+ end
343
+ end
344
+ end
345
+ end
@@ -0,0 +1,24 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/url_scrubber/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Colin Langton", "Christopher Maujean"]
6
+ gem.email = ["colin@hoteldelta.net", "cmaujean@brandle.net"]
7
+ gem.description = %q{Remove extraneous bits from URLs, follow redirects, identify social media urls, etc.}
8
+ gem.summary = %q{Clean up URLs.}
9
+ gem.homepage = "http://brandle.net"
10
+
11
+ gem.files = `git ls-files`.split($\)
12
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
+ gem.name = "url_scrubber"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = UrlScrubber::VERSION
17
+
18
+ # testing
19
+ gem.add_development_dependency 'rspec', '~> 2.11.0'
20
+ gem.add_development_dependency 'guard-bundler', "~> 0.1.3"
21
+ gem.add_development_dependency 'guard-rspec', "~> 0.4.3"
22
+ gem.add_development_dependency 'terminal-notifier-guard'
23
+ gem.add_development_dependency 'rb-fsevent', '~> 0.9.1'
24
+ end
metadata ADDED
@@ -0,0 +1,114 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: url_scrubber
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.7.3
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Colin Langton
9
+ - Christopher Maujean
10
+ autorequire:
11
+ bindir: bin
12
+ cert_chain: []
13
+ date: 2013-06-06 00:00:00.000000000 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: rspec
17
+ requirement: &2152156960 !ruby/object:Gem::Requirement
18
+ none: false
19
+ requirements:
20
+ - - ~>
21
+ - !ruby/object:Gem::Version
22
+ version: 2.11.0
23
+ type: :development
24
+ prerelease: false
25
+ version_requirements: *2152156960
26
+ - !ruby/object:Gem::Dependency
27
+ name: guard-bundler
28
+ requirement: &2152155880 !ruby/object:Gem::Requirement
29
+ none: false
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: 0.1.3
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: *2152155880
37
+ - !ruby/object:Gem::Dependency
38
+ name: guard-rspec
39
+ requirement: &2152154720 !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ~>
43
+ - !ruby/object:Gem::Version
44
+ version: 0.4.3
45
+ type: :development
46
+ prerelease: false
47
+ version_requirements: *2152154720
48
+ - !ruby/object:Gem::Dependency
49
+ name: terminal-notifier-guard
50
+ requirement: &2152153980 !ruby/object:Gem::Requirement
51
+ none: false
52
+ requirements:
53
+ - - ! '>='
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ type: :development
57
+ prerelease: false
58
+ version_requirements: *2152153980
59
+ - !ruby/object:Gem::Dependency
60
+ name: rb-fsevent
61
+ requirement: &2152153300 !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ~>
65
+ - !ruby/object:Gem::Version
66
+ version: 0.9.1
67
+ type: :development
68
+ prerelease: false
69
+ version_requirements: *2152153300
70
+ description: Remove extraneous bits from URLs, follow redirects, identify social media
71
+ urls, etc.
72
+ email:
73
+ - colin@hoteldelta.net
74
+ - cmaujean@brandle.net
75
+ executables: []
76
+ extensions: []
77
+ extra_rdoc_files: []
78
+ files:
79
+ - .gitignore
80
+ - .rvmrc
81
+ - Gemfile
82
+ - Guardfile
83
+ - README.md
84
+ - Rakefile
85
+ - lib/url_scrubber.rb
86
+ - lib/url_scrubber/version.rb
87
+ - spec/url_scrubber_spec.rb
88
+ - url_scrubber.gemspec
89
+ homepage: http://brandle.net
90
+ licenses: []
91
+ post_install_message:
92
+ rdoc_options: []
93
+ require_paths:
94
+ - lib
95
+ required_ruby_version: !ruby/object:Gem::Requirement
96
+ none: false
97
+ requirements:
98
+ - - ! '>='
99
+ - !ruby/object:Gem::Version
100
+ version: '0'
101
+ required_rubygems_version: !ruby/object:Gem::Requirement
102
+ none: false
103
+ requirements:
104
+ - - ! '>='
105
+ - !ruby/object:Gem::Version
106
+ version: '0'
107
+ requirements: []
108
+ rubyforge_project:
109
+ rubygems_version: 1.8.16
110
+ signing_key:
111
+ specification_version: 3
112
+ summary: Clean up URLs.
113
+ test_files:
114
+ - spec/url_scrubber_spec.rb