url_scrubber 0.7.3 → 0.7.4
Sign up to get free protection for your applications and to get access to all the features.
- data/.rvmrc +1 -48
- data/lib/url_scrubber.rb +20 -1
- data/lib/url_scrubber/version.rb +1 -1
- data/spec/spec_helper.rb +2 -0
- data/spec/url_scrubber_spec.rb +47 -2
- data/url_scrubber.gemspec +2 -2
- metadata +42 -13
data/.rvmrc
CHANGED
@@ -1,48 +1 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
# This is an RVM Project .rvmrc file, used to automatically load the ruby
|
4
|
-
# development environment upon cd'ing into the directory
|
5
|
-
|
6
|
-
# First we specify our desired <ruby>[@<gemset>], the @gemset name is optional,
|
7
|
-
# Only full ruby name is supported here, for short names use:
|
8
|
-
# echo "rvm use 1.9.3" > .rvmrc
|
9
|
-
environment_id="ruby-1.9.3-p194@url_scrubber"
|
10
|
-
|
11
|
-
# Uncomment the following lines if you want to verify rvm version per project
|
12
|
-
# rvmrc_rvm_version="1.16.19 (stable)" # 1.10.1 seams as a safe start
|
13
|
-
# eval "$(echo ${rvm_version}.${rvmrc_rvm_version} | awk -F. '{print "[[ "$1*65536+$2*256+$3" -ge "$4*65536+$5*256+$6" ]]"}' )" || {
|
14
|
-
# echo "This .rvmrc file requires at least RVM ${rvmrc_rvm_version}, aborting loading."
|
15
|
-
# return 1
|
16
|
-
# }
|
17
|
-
|
18
|
-
# First we attempt to load the desired environment directly from the environment
|
19
|
-
# file. This is very fast and efficient compared to running through the entire
|
20
|
-
# CLI and selector. If you want feedback on which environment was used then
|
21
|
-
# insert the word 'use' after --create as this triggers verbose mode.
|
22
|
-
if [[ -d "${rvm_path:-$HOME/.rvm}/environments"
|
23
|
-
&& -s "${rvm_path:-$HOME/.rvm}/environments/$environment_id" ]]
|
24
|
-
then
|
25
|
-
\. "${rvm_path:-$HOME/.rvm}/environments/$environment_id"
|
26
|
-
[[ -s "${rvm_path:-$HOME/.rvm}/hooks/after_use" ]] &&
|
27
|
-
\. "${rvm_path:-$HOME/.rvm}/hooks/after_use" || true
|
28
|
-
else
|
29
|
-
# If the environment file has not yet been created, use the RVM CLI to select.
|
30
|
-
rvm --create "$environment_id" || {
|
31
|
-
echo "Failed to create RVM environment '${environment_id}'."
|
32
|
-
return 1
|
33
|
-
}
|
34
|
-
fi
|
35
|
-
|
36
|
-
# If you use bundler, this might be useful to you:
|
37
|
-
# if [[ -s Gemfile ]] && {
|
38
|
-
# ! builtin command -v bundle >/dev/null ||
|
39
|
-
# builtin command -v bundle | GREP_OPTIONS= \grep $rvm_path/bin/bundle >/dev/null
|
40
|
-
# }
|
41
|
-
# then
|
42
|
-
# printf "%b" "The rubygem 'bundler' is not installed. Installing it now.\n"
|
43
|
-
# gem install bundler
|
44
|
-
# fi
|
45
|
-
# if [[ -s Gemfile ]] && builtin command -v bundle >/dev/null
|
46
|
-
# then
|
47
|
-
# bundle install | GREP_OPTIONS= \grep -vE '^Using|Your bundle is complete'
|
48
|
-
# fi
|
1
|
+
rvm use 1.9.3@url_scrubber --create
|
data/lib/url_scrubber.rb
CHANGED
@@ -16,10 +16,21 @@ module UrlScrubber
|
|
16
16
|
end
|
17
17
|
|
18
18
|
def self.service_of(url)
|
19
|
+
|
19
20
|
domain_match = url.match(%r{https?://([^/]+)})
|
20
21
|
|
21
22
|
if domain_match
|
22
|
-
domain = domain_match[1]
|
23
|
+
domain = domain_match[1]
|
24
|
+
first_dot = domain.index(".")
|
25
|
+
|
26
|
+
#first_dot_position = domain.index(".")
|
27
|
+
#first_dot_position += 1 if first_dot_position
|
28
|
+
|
29
|
+
#Rails.logger.debug "domain = #{domain}, first dot = #{first_dot ? first_dot : 'none'}, first dot 1= #{first_dot ? domain[first_dot+1..domain.size] : 'NIL'}"
|
30
|
+
if first_dot
|
31
|
+
# tumblr is a unique format
|
32
|
+
return :tumblr if domain[first_dot+1..domain.size].index("tumblr.com") == 0
|
33
|
+
end
|
23
34
|
|
24
35
|
case domain
|
25
36
|
when /\byoutube\.com$/ then return :youtube
|
@@ -31,8 +42,11 @@ module UrlScrubber
|
|
31
42
|
when /\bflickr\.com$/ then return :flickr
|
32
43
|
when /\bpinterest\.com$/ then return :pinterest
|
33
44
|
when /\bvimeo\.com$/ then return :vimeo
|
45
|
+
when /\binstagram\.com$/ then return :instagram
|
34
46
|
when /\byelp\.com$/ then return :yelp
|
35
47
|
end
|
48
|
+
else
|
49
|
+
Rails.logger.debug "No Domain Match"
|
36
50
|
end
|
37
51
|
|
38
52
|
:other
|
@@ -63,6 +77,11 @@ module UrlScrubber
|
|
63
77
|
!!url.match(%r{^http://yelp\.com/[\w_-]+$})
|
64
78
|
when :vimeo
|
65
79
|
!!url.match(%r{^http://vimeo\.com/[\w_-]+$}) && !url.match(%r{/\d+$})
|
80
|
+
when :instagram
|
81
|
+
!!url.match(%r{^http://instagram\.com/[\w_]+$})
|
82
|
+
when :tumblr
|
83
|
+
#Rails.logger.debug "CCC Tumblr - url=#{url}, ideal=#{!!url.match(%r{^http://[\w_]+\.tumblr\.com$})}, www=#{url.index("://www.") ? url.index("://www.") : 'NIL'}"
|
84
|
+
!!url.match(%r{^http://[\w_]+\.tumblr\.com$}) && !url.index("://www.")
|
66
85
|
else
|
67
86
|
true
|
68
87
|
end
|
data/lib/url_scrubber/version.rb
CHANGED
data/spec/spec_helper.rb
ADDED
data/spec/url_scrubber_spec.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
|
-
require '
|
2
|
-
require 'url_scrubber'
|
1
|
+
require 'spec_helper'
|
3
2
|
|
4
3
|
describe UrlScrubber do
|
5
4
|
describe '.scrub' do
|
@@ -177,6 +176,19 @@ describe UrlScrubber do
|
|
177
176
|
describe 'with vimeo urls' do
|
178
177
|
pending
|
179
178
|
end
|
179
|
+
|
180
|
+
describe 'with instagram urls' do
|
181
|
+
it 'should drop www. from the beginning' do
|
182
|
+
UrlScrubber.scrub('http://www.instagram.com/username').should eq('http://instagram.com/username')
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
describe 'with tumblr urls' do
|
187
|
+
it 'should drop www. from the beginning' do
|
188
|
+
UrlScrubber.scrub('http://www.cisco.tumblr.com').should eq('http://cisco.tumblr.com')
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
180
192
|
end
|
181
193
|
|
182
194
|
describe '.service_of' do
|
@@ -192,6 +204,11 @@ describe UrlScrubber do
|
|
192
204
|
UrlScrubber.service_of('http://facebook.com/person.name').should eq :facebook
|
193
205
|
end
|
194
206
|
|
207
|
+
it 'does not return :facebook for a non-facebook url with facebook in the name' do
|
208
|
+
UrlScrubber.service_of('http://foofacebook.com/').should_not eq :facebook
|
209
|
+
UrlScrubber.service_of('http://facebookfoo.com/').should_not eq :facebook
|
210
|
+
end
|
211
|
+
|
195
212
|
it 'returns :linkedin for LinkedIn urls' do
|
196
213
|
UrlScrubber.service_of('http://linkedin.com/company/1337').should eq :linkedin
|
197
214
|
end
|
@@ -220,6 +237,14 @@ describe UrlScrubber do
|
|
220
237
|
UrlScrubber.service_of('http://vimeo.com/absolutely').should eq :vimeo
|
221
238
|
end
|
222
239
|
|
240
|
+
it 'returns :instagram for Instagram urls' do
|
241
|
+
UrlScrubber.service_of('http://instagram.com/absolutely').should eq :instagram
|
242
|
+
end
|
243
|
+
|
244
|
+
it 'returns :tumblr for Tumblr urls' do
|
245
|
+
UrlScrubber.service_of('http://cisco.tumblr.com).should eq :tumblr
|
246
|
+
end
|
247
|
+
|
223
248
|
it 'returns :other for other urls' do
|
224
249
|
UrlScrubber.service_of('http://example.com/page').should eq :other
|
225
250
|
end
|
@@ -336,6 +361,26 @@ describe UrlScrubber do
|
|
336
361
|
end
|
337
362
|
end
|
338
363
|
|
364
|
+
describe 'for instagram' do
|
365
|
+
it 'returns true for apparent user urls' do
|
366
|
+
UrlScrubber.ideal_form?('http://instagram.com/username').should be_true
|
367
|
+
end
|
368
|
+
|
369
|
+
it 'returns false for other urls' do
|
370
|
+
UrlScrubber.ideal_form?('http://instagram.com/532513451524').should be_false
|
371
|
+
end
|
372
|
+
end
|
373
|
+
|
374
|
+
describe 'for tumblr' do
|
375
|
+
it 'returns true for apparent business urls' do
|
376
|
+
UrlScrubber.ideal_form?('http://cisco.tumblr.com').should be_true
|
377
|
+
end
|
378
|
+
|
379
|
+
it 'returns false for user urls' do
|
380
|
+
UrlScrubber.ideal_form?('http://tumblr.com/joe').should be_false
|
381
|
+
end
|
382
|
+
end
|
383
|
+
|
339
384
|
describe 'for other sites' do
|
340
385
|
it 'returns true for any other site, really' do
|
341
386
|
UrlScrubber.ideal_form?('http://example.com/absolutely/anything').should be_true
|
data/url_scrubber.gemspec
CHANGED
@@ -2,8 +2,8 @@
|
|
2
2
|
require File.expand_path('../lib/url_scrubber/version', __FILE__)
|
3
3
|
|
4
4
|
Gem::Specification.new do |gem|
|
5
|
-
gem.authors = ["Colin Langton", "Christopher Maujean"]
|
6
|
-
gem.email = ["colin@hoteldelta.net", "cmaujean@brandle.net"]
|
5
|
+
gem.authors = ["Colin Langton", "Christopher Maujean", "David Hillard"]
|
6
|
+
gem.email = ["colin@hoteldelta.net", "cmaujean@brandle.net", "dhillard@brandle.net"]
|
7
7
|
gem.description = %q{Remove extraneous bits from URLs, follow redirects, identify social media urls, etc.}
|
8
8
|
gem.summary = %q{Clean up URLs.}
|
9
9
|
gem.homepage = "http://brandle.net"
|
metadata
CHANGED
@@ -1,20 +1,21 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: url_scrubber
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Colin Langton
|
9
9
|
- Christopher Maujean
|
10
|
+
- David Hillard
|
10
11
|
autorequire:
|
11
12
|
bindir: bin
|
12
13
|
cert_chain: []
|
13
|
-
date: 2013-
|
14
|
+
date: 2013-08-28 00:00:00.000000000 Z
|
14
15
|
dependencies:
|
15
16
|
- !ruby/object:Gem::Dependency
|
16
17
|
name: rspec
|
17
|
-
requirement:
|
18
|
+
requirement: !ruby/object:Gem::Requirement
|
18
19
|
none: false
|
19
20
|
requirements:
|
20
21
|
- - ~>
|
@@ -22,10 +23,15 @@ dependencies:
|
|
22
23
|
version: 2.11.0
|
23
24
|
type: :development
|
24
25
|
prerelease: false
|
25
|
-
version_requirements:
|
26
|
+
version_requirements: !ruby/object:Gem::Requirement
|
27
|
+
none: false
|
28
|
+
requirements:
|
29
|
+
- - ~>
|
30
|
+
- !ruby/object:Gem::Version
|
31
|
+
version: 2.11.0
|
26
32
|
- !ruby/object:Gem::Dependency
|
27
33
|
name: guard-bundler
|
28
|
-
requirement:
|
34
|
+
requirement: !ruby/object:Gem::Requirement
|
29
35
|
none: false
|
30
36
|
requirements:
|
31
37
|
- - ~>
|
@@ -33,10 +39,15 @@ dependencies:
|
|
33
39
|
version: 0.1.3
|
34
40
|
type: :development
|
35
41
|
prerelease: false
|
36
|
-
version_requirements:
|
42
|
+
version_requirements: !ruby/object:Gem::Requirement
|
43
|
+
none: false
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 0.1.3
|
37
48
|
- !ruby/object:Gem::Dependency
|
38
49
|
name: guard-rspec
|
39
|
-
requirement:
|
50
|
+
requirement: !ruby/object:Gem::Requirement
|
40
51
|
none: false
|
41
52
|
requirements:
|
42
53
|
- - ~>
|
@@ -44,10 +55,15 @@ dependencies:
|
|
44
55
|
version: 0.4.3
|
45
56
|
type: :development
|
46
57
|
prerelease: false
|
47
|
-
version_requirements:
|
58
|
+
version_requirements: !ruby/object:Gem::Requirement
|
59
|
+
none: false
|
60
|
+
requirements:
|
61
|
+
- - ~>
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: 0.4.3
|
48
64
|
- !ruby/object:Gem::Dependency
|
49
65
|
name: terminal-notifier-guard
|
50
|
-
requirement:
|
66
|
+
requirement: !ruby/object:Gem::Requirement
|
51
67
|
none: false
|
52
68
|
requirements:
|
53
69
|
- - ! '>='
|
@@ -55,10 +71,15 @@ dependencies:
|
|
55
71
|
version: '0'
|
56
72
|
type: :development
|
57
73
|
prerelease: false
|
58
|
-
version_requirements:
|
74
|
+
version_requirements: !ruby/object:Gem::Requirement
|
75
|
+
none: false
|
76
|
+
requirements:
|
77
|
+
- - ! '>='
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '0'
|
59
80
|
- !ruby/object:Gem::Dependency
|
60
81
|
name: rb-fsevent
|
61
|
-
requirement:
|
82
|
+
requirement: !ruby/object:Gem::Requirement
|
62
83
|
none: false
|
63
84
|
requirements:
|
64
85
|
- - ~>
|
@@ -66,12 +87,18 @@ dependencies:
|
|
66
87
|
version: 0.9.1
|
67
88
|
type: :development
|
68
89
|
prerelease: false
|
69
|
-
version_requirements:
|
90
|
+
version_requirements: !ruby/object:Gem::Requirement
|
91
|
+
none: false
|
92
|
+
requirements:
|
93
|
+
- - ~>
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: 0.9.1
|
70
96
|
description: Remove extraneous bits from URLs, follow redirects, identify social media
|
71
97
|
urls, etc.
|
72
98
|
email:
|
73
99
|
- colin@hoteldelta.net
|
74
100
|
- cmaujean@brandle.net
|
101
|
+
- dhillard@brandle.net
|
75
102
|
executables: []
|
76
103
|
extensions: []
|
77
104
|
extra_rdoc_files: []
|
@@ -84,6 +111,7 @@ files:
|
|
84
111
|
- Rakefile
|
85
112
|
- lib/url_scrubber.rb
|
86
113
|
- lib/url_scrubber/version.rb
|
114
|
+
- spec/spec_helper.rb
|
87
115
|
- spec/url_scrubber_spec.rb
|
88
116
|
- url_scrubber.gemspec
|
89
117
|
homepage: http://brandle.net
|
@@ -106,9 +134,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
106
134
|
version: '0'
|
107
135
|
requirements: []
|
108
136
|
rubyforge_project:
|
109
|
-
rubygems_version: 1.8.
|
137
|
+
rubygems_version: 1.8.21
|
110
138
|
signing_key:
|
111
139
|
specification_version: 3
|
112
140
|
summary: Clean up URLs.
|
113
141
|
test_files:
|
142
|
+
- spec/spec_helper.rb
|
114
143
|
- spec/url_scrubber_spec.rb
|