url_scrubber 0.7.3 → 0.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.rvmrc +1 -48
- data/lib/url_scrubber.rb +20 -1
- data/lib/url_scrubber/version.rb +1 -1
- data/spec/spec_helper.rb +2 -0
- data/spec/url_scrubber_spec.rb +47 -2
- data/url_scrubber.gemspec +2 -2
- metadata +42 -13
data/.rvmrc
CHANGED
@@ -1,48 +1 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
# This is an RVM Project .rvmrc file, used to automatically load the ruby
|
4
|
-
# development environment upon cd'ing into the directory
|
5
|
-
|
6
|
-
# First we specify our desired <ruby>[@<gemset>], the @gemset name is optional,
|
7
|
-
# Only full ruby name is supported here, for short names use:
|
8
|
-
# echo "rvm use 1.9.3" > .rvmrc
|
9
|
-
environment_id="ruby-1.9.3-p194@url_scrubber"
|
10
|
-
|
11
|
-
# Uncomment the following lines if you want to verify rvm version per project
|
12
|
-
# rvmrc_rvm_version="1.16.19 (stable)" # 1.10.1 seams as a safe start
|
13
|
-
# eval "$(echo ${rvm_version}.${rvmrc_rvm_version} | awk -F. '{print "[[ "$1*65536+$2*256+$3" -ge "$4*65536+$5*256+$6" ]]"}' )" || {
|
14
|
-
# echo "This .rvmrc file requires at least RVM ${rvmrc_rvm_version}, aborting loading."
|
15
|
-
# return 1
|
16
|
-
# }
|
17
|
-
|
18
|
-
# First we attempt to load the desired environment directly from the environment
|
19
|
-
# file. This is very fast and efficient compared to running through the entire
|
20
|
-
# CLI and selector. If you want feedback on which environment was used then
|
21
|
-
# insert the word 'use' after --create as this triggers verbose mode.
|
22
|
-
if [[ -d "${rvm_path:-$HOME/.rvm}/environments"
|
23
|
-
&& -s "${rvm_path:-$HOME/.rvm}/environments/$environment_id" ]]
|
24
|
-
then
|
25
|
-
\. "${rvm_path:-$HOME/.rvm}/environments/$environment_id"
|
26
|
-
[[ -s "${rvm_path:-$HOME/.rvm}/hooks/after_use" ]] &&
|
27
|
-
\. "${rvm_path:-$HOME/.rvm}/hooks/after_use" || true
|
28
|
-
else
|
29
|
-
# If the environment file has not yet been created, use the RVM CLI to select.
|
30
|
-
rvm --create "$environment_id" || {
|
31
|
-
echo "Failed to create RVM environment '${environment_id}'."
|
32
|
-
return 1
|
33
|
-
}
|
34
|
-
fi
|
35
|
-
|
36
|
-
# If you use bundler, this might be useful to you:
|
37
|
-
# if [[ -s Gemfile ]] && {
|
38
|
-
# ! builtin command -v bundle >/dev/null ||
|
39
|
-
# builtin command -v bundle | GREP_OPTIONS= \grep $rvm_path/bin/bundle >/dev/null
|
40
|
-
# }
|
41
|
-
# then
|
42
|
-
# printf "%b" "The rubygem 'bundler' is not installed. Installing it now.\n"
|
43
|
-
# gem install bundler
|
44
|
-
# fi
|
45
|
-
# if [[ -s Gemfile ]] && builtin command -v bundle >/dev/null
|
46
|
-
# then
|
47
|
-
# bundle install | GREP_OPTIONS= \grep -vE '^Using|Your bundle is complete'
|
48
|
-
# fi
|
1
|
+
rvm use 1.9.3@url_scrubber --create
|
data/lib/url_scrubber.rb
CHANGED
@@ -16,10 +16,21 @@ module UrlScrubber
|
|
16
16
|
end
|
17
17
|
|
18
18
|
def self.service_of(url)
|
19
|
+
|
19
20
|
domain_match = url.match(%r{https?://([^/]+)})
|
20
21
|
|
21
22
|
if domain_match
|
22
|
-
domain = domain_match[1]
|
23
|
+
domain = domain_match[1]
|
24
|
+
first_dot = domain.index(".")
|
25
|
+
|
26
|
+
#first_dot_position = domain.index(".")
|
27
|
+
#first_dot_position += 1 if first_dot_position
|
28
|
+
|
29
|
+
#Rails.logger.debug "domain = #{domain}, first dot = #{first_dot ? first_dot : 'none'}, first dot 1= #{first_dot ? domain[first_dot+1..domain.size] : 'NIL'}"
|
30
|
+
if first_dot
|
31
|
+
# tumblr is a unique format
|
32
|
+
return :tumblr if domain[first_dot+1..domain.size].index("tumblr.com") == 0
|
33
|
+
end
|
23
34
|
|
24
35
|
case domain
|
25
36
|
when /\byoutube\.com$/ then return :youtube
|
@@ -31,8 +42,11 @@ module UrlScrubber
|
|
31
42
|
when /\bflickr\.com$/ then return :flickr
|
32
43
|
when /\bpinterest\.com$/ then return :pinterest
|
33
44
|
when /\bvimeo\.com$/ then return :vimeo
|
45
|
+
when /\binstagram\.com$/ then return :instagram
|
34
46
|
when /\byelp\.com$/ then return :yelp
|
35
47
|
end
|
48
|
+
else
|
49
|
+
Rails.logger.debug "No Domain Match"
|
36
50
|
end
|
37
51
|
|
38
52
|
:other
|
@@ -63,6 +77,11 @@ module UrlScrubber
|
|
63
77
|
!!url.match(%r{^http://yelp\.com/[\w_-]+$})
|
64
78
|
when :vimeo
|
65
79
|
!!url.match(%r{^http://vimeo\.com/[\w_-]+$}) && !url.match(%r{/\d+$})
|
80
|
+
when :instagram
|
81
|
+
!!url.match(%r{^http://instagram\.com/[\w_]+$})
|
82
|
+
when :tumblr
|
83
|
+
#Rails.logger.debug "CCC Tumblr - url=#{url}, ideal=#{!!url.match(%r{^http://[\w_]+\.tumblr\.com$})}, www=#{url.index("://www.") ? url.index("://www.") : 'NIL'}"
|
84
|
+
!!url.match(%r{^http://[\w_]+\.tumblr\.com$}) && !url.index("://www.")
|
66
85
|
else
|
67
86
|
true
|
68
87
|
end
|
data/lib/url_scrubber/version.rb
CHANGED
data/spec/spec_helper.rb
ADDED
data/spec/url_scrubber_spec.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
|
-
require '
|
2
|
-
require 'url_scrubber'
|
1
|
+
require 'spec_helper'
|
3
2
|
|
4
3
|
describe UrlScrubber do
|
5
4
|
describe '.scrub' do
|
@@ -177,6 +176,19 @@ describe UrlScrubber do
|
|
177
176
|
describe 'with vimeo urls' do
|
178
177
|
pending
|
179
178
|
end
|
179
|
+
|
180
|
+
describe 'with instagram urls' do
|
181
|
+
it 'should drop www. from the beginning' do
|
182
|
+
UrlScrubber.scrub('http://www.instagram.com/username').should eq('http://instagram.com/username')
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
describe 'with tumblr urls' do
|
187
|
+
it 'should drop www. from the beginning' do
|
188
|
+
UrlScrubber.scrub('http://www.cisco.tumblr.com').should eq('http://cisco.tumblr.com')
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
180
192
|
end
|
181
193
|
|
182
194
|
describe '.service_of' do
|
@@ -192,6 +204,11 @@ describe UrlScrubber do
|
|
192
204
|
UrlScrubber.service_of('http://facebook.com/person.name').should eq :facebook
|
193
205
|
end
|
194
206
|
|
207
|
+
it 'does not return :facebook for a non-facebook url with facebook in the name' do
|
208
|
+
UrlScrubber.service_of('http://foofacebook.com/').should_not eq :facebook
|
209
|
+
UrlScrubber.service_of('http://facebookfoo.com/').should_not eq :facebook
|
210
|
+
end
|
211
|
+
|
195
212
|
it 'returns :linkedin for LinkedIn urls' do
|
196
213
|
UrlScrubber.service_of('http://linkedin.com/company/1337').should eq :linkedin
|
197
214
|
end
|
@@ -220,6 +237,14 @@ describe UrlScrubber do
|
|
220
237
|
UrlScrubber.service_of('http://vimeo.com/absolutely').should eq :vimeo
|
221
238
|
end
|
222
239
|
|
240
|
+
it 'returns :instagram for Instagram urls' do
|
241
|
+
UrlScrubber.service_of('http://instagram.com/absolutely').should eq :instagram
|
242
|
+
end
|
243
|
+
|
244
|
+
it 'returns :tumblr for Tumblr urls' do
|
245
|
+
UrlScrubber.service_of('http://cisco.tumblr.com).should eq :tumblr
|
246
|
+
end
|
247
|
+
|
223
248
|
it 'returns :other for other urls' do
|
224
249
|
UrlScrubber.service_of('http://example.com/page').should eq :other
|
225
250
|
end
|
@@ -336,6 +361,26 @@ describe UrlScrubber do
|
|
336
361
|
end
|
337
362
|
end
|
338
363
|
|
364
|
+
describe 'for instagram' do
|
365
|
+
it 'returns true for apparent user urls' do
|
366
|
+
UrlScrubber.ideal_form?('http://instagram.com/username').should be_true
|
367
|
+
end
|
368
|
+
|
369
|
+
it 'returns false for other urls' do
|
370
|
+
UrlScrubber.ideal_form?('http://instagram.com/532513451524').should be_false
|
371
|
+
end
|
372
|
+
end
|
373
|
+
|
374
|
+
describe 'for tumblr' do
|
375
|
+
it 'returns true for apparent business urls' do
|
376
|
+
UrlScrubber.ideal_form?('http://cisco.tumblr.com').should be_true
|
377
|
+
end
|
378
|
+
|
379
|
+
it 'returns false for user urls' do
|
380
|
+
UrlScrubber.ideal_form?('http://tumblr.com/joe').should be_false
|
381
|
+
end
|
382
|
+
end
|
383
|
+
|
339
384
|
describe 'for other sites' do
|
340
385
|
it 'returns true for any other site, really' do
|
341
386
|
UrlScrubber.ideal_form?('http://example.com/absolutely/anything').should be_true
|
data/url_scrubber.gemspec
CHANGED
@@ -2,8 +2,8 @@
|
|
2
2
|
require File.expand_path('../lib/url_scrubber/version', __FILE__)
|
3
3
|
|
4
4
|
Gem::Specification.new do |gem|
|
5
|
-
gem.authors = ["Colin Langton", "Christopher Maujean"]
|
6
|
-
gem.email = ["colin@hoteldelta.net", "cmaujean@brandle.net"]
|
5
|
+
gem.authors = ["Colin Langton", "Christopher Maujean", "David Hillard"]
|
6
|
+
gem.email = ["colin@hoteldelta.net", "cmaujean@brandle.net", "dhillard@brandle.net"]
|
7
7
|
gem.description = %q{Remove extraneous bits from URLs, follow redirects, identify social media urls, etc.}
|
8
8
|
gem.summary = %q{Clean up URLs.}
|
9
9
|
gem.homepage = "http://brandle.net"
|
metadata
CHANGED
@@ -1,20 +1,21 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: url_scrubber
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Colin Langton
|
9
9
|
- Christopher Maujean
|
10
|
+
- David Hillard
|
10
11
|
autorequire:
|
11
12
|
bindir: bin
|
12
13
|
cert_chain: []
|
13
|
-
date: 2013-
|
14
|
+
date: 2013-08-28 00:00:00.000000000 Z
|
14
15
|
dependencies:
|
15
16
|
- !ruby/object:Gem::Dependency
|
16
17
|
name: rspec
|
17
|
-
requirement:
|
18
|
+
requirement: !ruby/object:Gem::Requirement
|
18
19
|
none: false
|
19
20
|
requirements:
|
20
21
|
- - ~>
|
@@ -22,10 +23,15 @@ dependencies:
|
|
22
23
|
version: 2.11.0
|
23
24
|
type: :development
|
24
25
|
prerelease: false
|
25
|
-
version_requirements:
|
26
|
+
version_requirements: !ruby/object:Gem::Requirement
|
27
|
+
none: false
|
28
|
+
requirements:
|
29
|
+
- - ~>
|
30
|
+
- !ruby/object:Gem::Version
|
31
|
+
version: 2.11.0
|
26
32
|
- !ruby/object:Gem::Dependency
|
27
33
|
name: guard-bundler
|
28
|
-
requirement:
|
34
|
+
requirement: !ruby/object:Gem::Requirement
|
29
35
|
none: false
|
30
36
|
requirements:
|
31
37
|
- - ~>
|
@@ -33,10 +39,15 @@ dependencies:
|
|
33
39
|
version: 0.1.3
|
34
40
|
type: :development
|
35
41
|
prerelease: false
|
36
|
-
version_requirements:
|
42
|
+
version_requirements: !ruby/object:Gem::Requirement
|
43
|
+
none: false
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 0.1.3
|
37
48
|
- !ruby/object:Gem::Dependency
|
38
49
|
name: guard-rspec
|
39
|
-
requirement:
|
50
|
+
requirement: !ruby/object:Gem::Requirement
|
40
51
|
none: false
|
41
52
|
requirements:
|
42
53
|
- - ~>
|
@@ -44,10 +55,15 @@ dependencies:
|
|
44
55
|
version: 0.4.3
|
45
56
|
type: :development
|
46
57
|
prerelease: false
|
47
|
-
version_requirements:
|
58
|
+
version_requirements: !ruby/object:Gem::Requirement
|
59
|
+
none: false
|
60
|
+
requirements:
|
61
|
+
- - ~>
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: 0.4.3
|
48
64
|
- !ruby/object:Gem::Dependency
|
49
65
|
name: terminal-notifier-guard
|
50
|
-
requirement:
|
66
|
+
requirement: !ruby/object:Gem::Requirement
|
51
67
|
none: false
|
52
68
|
requirements:
|
53
69
|
- - ! '>='
|
@@ -55,10 +71,15 @@ dependencies:
|
|
55
71
|
version: '0'
|
56
72
|
type: :development
|
57
73
|
prerelease: false
|
58
|
-
version_requirements:
|
74
|
+
version_requirements: !ruby/object:Gem::Requirement
|
75
|
+
none: false
|
76
|
+
requirements:
|
77
|
+
- - ! '>='
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '0'
|
59
80
|
- !ruby/object:Gem::Dependency
|
60
81
|
name: rb-fsevent
|
61
|
-
requirement:
|
82
|
+
requirement: !ruby/object:Gem::Requirement
|
62
83
|
none: false
|
63
84
|
requirements:
|
64
85
|
- - ~>
|
@@ -66,12 +87,18 @@ dependencies:
|
|
66
87
|
version: 0.9.1
|
67
88
|
type: :development
|
68
89
|
prerelease: false
|
69
|
-
version_requirements:
|
90
|
+
version_requirements: !ruby/object:Gem::Requirement
|
91
|
+
none: false
|
92
|
+
requirements:
|
93
|
+
- - ~>
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: 0.9.1
|
70
96
|
description: Remove extraneous bits from URLs, follow redirects, identify social media
|
71
97
|
urls, etc.
|
72
98
|
email:
|
73
99
|
- colin@hoteldelta.net
|
74
100
|
- cmaujean@brandle.net
|
101
|
+
- dhillard@brandle.net
|
75
102
|
executables: []
|
76
103
|
extensions: []
|
77
104
|
extra_rdoc_files: []
|
@@ -84,6 +111,7 @@ files:
|
|
84
111
|
- Rakefile
|
85
112
|
- lib/url_scrubber.rb
|
86
113
|
- lib/url_scrubber/version.rb
|
114
|
+
- spec/spec_helper.rb
|
87
115
|
- spec/url_scrubber_spec.rb
|
88
116
|
- url_scrubber.gemspec
|
89
117
|
homepage: http://brandle.net
|
@@ -106,9 +134,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
106
134
|
version: '0'
|
107
135
|
requirements: []
|
108
136
|
rubyforge_project:
|
109
|
-
rubygems_version: 1.8.
|
137
|
+
rubygems_version: 1.8.21
|
110
138
|
signing_key:
|
111
139
|
specification_version: 3
|
112
140
|
summary: Clean up URLs.
|
113
141
|
test_files:
|
142
|
+
- spec/spec_helper.rb
|
114
143
|
- spec/url_scrubber_spec.rb
|