url_scrubber 0.7.3 → 0.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.rvmrc CHANGED
@@ -1,48 +1 @@
1
- #!/usr/bin/env bash
2
-
3
- # This is an RVM Project .rvmrc file, used to automatically load the ruby
4
- # development environment upon cd'ing into the directory
5
-
6
- # First we specify our desired <ruby>[@<gemset>], the @gemset name is optional,
7
- # Only full ruby name is supported here, for short names use:
8
- # echo "rvm use 1.9.3" > .rvmrc
9
- environment_id="ruby-1.9.3-p194@url_scrubber"
10
-
11
- # Uncomment the following lines if you want to verify rvm version per project
12
- # rvmrc_rvm_version="1.16.19 (stable)" # 1.10.1 seams as a safe start
13
- # eval "$(echo ${rvm_version}.${rvmrc_rvm_version} | awk -F. '{print "[[ "$1*65536+$2*256+$3" -ge "$4*65536+$5*256+$6" ]]"}' )" || {
14
- # echo "This .rvmrc file requires at least RVM ${rvmrc_rvm_version}, aborting loading."
15
- # return 1
16
- # }
17
-
18
- # First we attempt to load the desired environment directly from the environment
19
- # file. This is very fast and efficient compared to running through the entire
20
- # CLI and selector. If you want feedback on which environment was used then
21
- # insert the word 'use' after --create as this triggers verbose mode.
22
- if [[ -d "${rvm_path:-$HOME/.rvm}/environments"
23
- && -s "${rvm_path:-$HOME/.rvm}/environments/$environment_id" ]]
24
- then
25
- \. "${rvm_path:-$HOME/.rvm}/environments/$environment_id"
26
- [[ -s "${rvm_path:-$HOME/.rvm}/hooks/after_use" ]] &&
27
- \. "${rvm_path:-$HOME/.rvm}/hooks/after_use" || true
28
- else
29
- # If the environment file has not yet been created, use the RVM CLI to select.
30
- rvm --create "$environment_id" || {
31
- echo "Failed to create RVM environment '${environment_id}'."
32
- return 1
33
- }
34
- fi
35
-
36
- # If you use bundler, this might be useful to you:
37
- # if [[ -s Gemfile ]] && {
38
- # ! builtin command -v bundle >/dev/null ||
39
- # builtin command -v bundle | GREP_OPTIONS= \grep $rvm_path/bin/bundle >/dev/null
40
- # }
41
- # then
42
- # printf "%b" "The rubygem 'bundler' is not installed. Installing it now.\n"
43
- # gem install bundler
44
- # fi
45
- # if [[ -s Gemfile ]] && builtin command -v bundle >/dev/null
46
- # then
47
- # bundle install | GREP_OPTIONS= \grep -vE '^Using|Your bundle is complete'
48
- # fi
1
+ rvm use 1.9.3@url_scrubber --create
data/lib/url_scrubber.rb CHANGED
@@ -16,10 +16,21 @@ module UrlScrubber
16
16
  end
17
17
 
18
18
  def self.service_of(url)
19
+
19
20
  domain_match = url.match(%r{https?://([^/]+)})
20
21
 
21
22
  if domain_match
22
- domain = domain_match[1]
23
+ domain = domain_match[1]
24
+ first_dot = domain.index(".")
25
+
26
+ #first_dot_position = domain.index(".")
27
+ #first_dot_position += 1 if first_dot_position
28
+
29
+ #Rails.logger.debug "domain = #{domain}, first dot = #{first_dot ? first_dot : 'none'}, first dot 1= #{first_dot ? domain[first_dot+1..domain.size] : 'NIL'}"
30
+ if first_dot
31
+ # tumblr is a unique format
32
+ return :tumblr if domain[first_dot+1..domain.size].index("tumblr.com") == 0
33
+ end
23
34
 
24
35
  case domain
25
36
  when /\byoutube\.com$/ then return :youtube
@@ -31,8 +42,11 @@ module UrlScrubber
31
42
  when /\bflickr\.com$/ then return :flickr
32
43
  when /\bpinterest\.com$/ then return :pinterest
33
44
  when /\bvimeo\.com$/ then return :vimeo
45
+ when /\binstagram\.com$/ then return :instagram
34
46
  when /\byelp\.com$/ then return :yelp
35
47
  end
48
+ else
49
+ Rails.logger.debug "No Domain Match"
36
50
  end
37
51
 
38
52
  :other
@@ -63,6 +77,11 @@ module UrlScrubber
63
77
  !!url.match(%r{^http://yelp\.com/[\w_-]+$})
64
78
  when :vimeo
65
79
  !!url.match(%r{^http://vimeo\.com/[\w_-]+$}) && !url.match(%r{/\d+$})
80
+ when :instagram
81
+ !!url.match(%r{^http://instagram\.com/[\w_]+$})
82
+ when :tumblr
83
+ #Rails.logger.debug "CCC Tumblr - url=#{url}, ideal=#{!!url.match(%r{^http://[\w_]+\.tumblr\.com$})}, www=#{url.index("://www.") ? url.index("://www.") : 'NIL'}"
84
+ !!url.match(%r{^http://[\w_]+\.tumblr\.com$}) && !url.index("://www.")
66
85
  else
67
86
  true
68
87
  end
@@ -1,3 +1,3 @@
1
1
  module UrlScrubber
2
- VERSION = "0.7.3"
2
+ VERSION = "0.7.4"
3
3
  end
@@ -0,0 +1,2 @@
1
+ require 'rspec'
2
+ require_relative '../lib/url_scrubber'
@@ -1,5 +1,4 @@
1
- require 'rspec'
2
- require 'url_scrubber'
1
+ require 'spec_helper'
3
2
 
4
3
  describe UrlScrubber do
5
4
  describe '.scrub' do
@@ -177,6 +176,19 @@ describe UrlScrubber do
177
176
  describe 'with vimeo urls' do
178
177
  pending
179
178
  end
179
+
180
+ describe 'with instagram urls' do
181
+ it 'should drop www. from the beginning' do
182
+ UrlScrubber.scrub('http://www.instagram.com/username').should eq('http://instagram.com/username')
183
+ end
184
+ end
185
+
186
+ describe 'with tumblr urls' do
187
+ it 'should drop www. from the beginning' do
188
+ UrlScrubber.scrub('http://www.cisco.tumblr.com').should eq('http://cisco.tumblr.com')
189
+ end
190
+ end
191
+
180
192
  end
181
193
 
182
194
  describe '.service_of' do
@@ -192,6 +204,11 @@ describe UrlScrubber do
192
204
  UrlScrubber.service_of('http://facebook.com/person.name').should eq :facebook
193
205
  end
194
206
 
207
+ it 'does not return :facebook for a non-facebook url with facebook in the name' do
208
+ UrlScrubber.service_of('http://foofacebook.com/').should_not eq :facebook
209
+ UrlScrubber.service_of('http://facebookfoo.com/').should_not eq :facebook
210
+ end
211
+
195
212
  it 'returns :linkedin for LinkedIn urls' do
196
213
  UrlScrubber.service_of('http://linkedin.com/company/1337').should eq :linkedin
197
214
  end
@@ -220,6 +237,14 @@ describe UrlScrubber do
220
237
  UrlScrubber.service_of('http://vimeo.com/absolutely').should eq :vimeo
221
238
  end
222
239
 
240
+ it 'returns :instagram for Instagram urls' do
241
+ UrlScrubber.service_of('http://instagram.com/absolutely').should eq :instagram
242
+ end
243
+
244
+ it 'returns :tumblr for Tumblr urls' do
245
+ UrlScrubber.service_of('http://cisco.tumblr.com).should eq :tumblr
246
+ end
247
+
223
248
  it 'returns :other for other urls' do
224
249
  UrlScrubber.service_of('http://example.com/page').should eq :other
225
250
  end
@@ -336,6 +361,26 @@ describe UrlScrubber do
336
361
  end
337
362
  end
338
363
 
364
+ describe 'for instagram' do
365
+ it 'returns true for apparent user urls' do
366
+ UrlScrubber.ideal_form?('http://instagram.com/username').should be_true
367
+ end
368
+
369
+ it 'returns false for other urls' do
370
+ UrlScrubber.ideal_form?('http://instagram.com/532513451524').should be_false
371
+ end
372
+ end
373
+
374
+ describe 'for tumblr' do
375
+ it 'returns true for apparent business urls' do
376
+ UrlScrubber.ideal_form?('http://cisco.tumblr.com').should be_true
377
+ end
378
+
379
+ it 'returns false for user urls' do
380
+ UrlScrubber.ideal_form?('http://tumblr.com/joe').should be_false
381
+ end
382
+ end
383
+
339
384
  describe 'for other sites' do
340
385
  it 'returns true for any other site, really' do
341
386
  UrlScrubber.ideal_form?('http://example.com/absolutely/anything').should be_true
data/url_scrubber.gemspec CHANGED
@@ -2,8 +2,8 @@
2
2
  require File.expand_path('../lib/url_scrubber/version', __FILE__)
3
3
 
4
4
  Gem::Specification.new do |gem|
5
- gem.authors = ["Colin Langton", "Christopher Maujean"]
6
- gem.email = ["colin@hoteldelta.net", "cmaujean@brandle.net"]
5
+ gem.authors = ["Colin Langton", "Christopher Maujean", "David Hillard"]
6
+ gem.email = ["colin@hoteldelta.net", "cmaujean@brandle.net", "dhillard@brandle.net"]
7
7
  gem.description = %q{Remove extraneous bits from URLs, follow redirects, identify social media urls, etc.}
8
8
  gem.summary = %q{Clean up URLs.}
9
9
  gem.homepage = "http://brandle.net"
metadata CHANGED
@@ -1,20 +1,21 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: url_scrubber
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.3
4
+ version: 0.7.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
8
8
  - Colin Langton
9
9
  - Christopher Maujean
10
+ - David Hillard
10
11
  autorequire:
11
12
  bindir: bin
12
13
  cert_chain: []
13
- date: 2013-06-06 00:00:00.000000000 Z
14
+ date: 2013-08-28 00:00:00.000000000 Z
14
15
  dependencies:
15
16
  - !ruby/object:Gem::Dependency
16
17
  name: rspec
17
- requirement: &2152156960 !ruby/object:Gem::Requirement
18
+ requirement: !ruby/object:Gem::Requirement
18
19
  none: false
19
20
  requirements:
20
21
  - - ~>
@@ -22,10 +23,15 @@ dependencies:
22
23
  version: 2.11.0
23
24
  type: :development
24
25
  prerelease: false
25
- version_requirements: *2152156960
26
+ version_requirements: !ruby/object:Gem::Requirement
27
+ none: false
28
+ requirements:
29
+ - - ~>
30
+ - !ruby/object:Gem::Version
31
+ version: 2.11.0
26
32
  - !ruby/object:Gem::Dependency
27
33
  name: guard-bundler
28
- requirement: &2152155880 !ruby/object:Gem::Requirement
34
+ requirement: !ruby/object:Gem::Requirement
29
35
  none: false
30
36
  requirements:
31
37
  - - ~>
@@ -33,10 +39,15 @@ dependencies:
33
39
  version: 0.1.3
34
40
  type: :development
35
41
  prerelease: false
36
- version_requirements: *2152155880
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ none: false
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: 0.1.3
37
48
  - !ruby/object:Gem::Dependency
38
49
  name: guard-rspec
39
- requirement: &2152154720 !ruby/object:Gem::Requirement
50
+ requirement: !ruby/object:Gem::Requirement
40
51
  none: false
41
52
  requirements:
42
53
  - - ~>
@@ -44,10 +55,15 @@ dependencies:
44
55
  version: 0.4.3
45
56
  type: :development
46
57
  prerelease: false
47
- version_requirements: *2152154720
58
+ version_requirements: !ruby/object:Gem::Requirement
59
+ none: false
60
+ requirements:
61
+ - - ~>
62
+ - !ruby/object:Gem::Version
63
+ version: 0.4.3
48
64
  - !ruby/object:Gem::Dependency
49
65
  name: terminal-notifier-guard
50
- requirement: &2152153980 !ruby/object:Gem::Requirement
66
+ requirement: !ruby/object:Gem::Requirement
51
67
  none: false
52
68
  requirements:
53
69
  - - ! '>='
@@ -55,10 +71,15 @@ dependencies:
55
71
  version: '0'
56
72
  type: :development
57
73
  prerelease: false
58
- version_requirements: *2152153980
74
+ version_requirements: !ruby/object:Gem::Requirement
75
+ none: false
76
+ requirements:
77
+ - - ! '>='
78
+ - !ruby/object:Gem::Version
79
+ version: '0'
59
80
  - !ruby/object:Gem::Dependency
60
81
  name: rb-fsevent
61
- requirement: &2152153300 !ruby/object:Gem::Requirement
82
+ requirement: !ruby/object:Gem::Requirement
62
83
  none: false
63
84
  requirements:
64
85
  - - ~>
@@ -66,12 +87,18 @@ dependencies:
66
87
  version: 0.9.1
67
88
  type: :development
68
89
  prerelease: false
69
- version_requirements: *2152153300
90
+ version_requirements: !ruby/object:Gem::Requirement
91
+ none: false
92
+ requirements:
93
+ - - ~>
94
+ - !ruby/object:Gem::Version
95
+ version: 0.9.1
70
96
  description: Remove extraneous bits from URLs, follow redirects, identify social media
71
97
  urls, etc.
72
98
  email:
73
99
  - colin@hoteldelta.net
74
100
  - cmaujean@brandle.net
101
+ - dhillard@brandle.net
75
102
  executables: []
76
103
  extensions: []
77
104
  extra_rdoc_files: []
@@ -84,6 +111,7 @@ files:
84
111
  - Rakefile
85
112
  - lib/url_scrubber.rb
86
113
  - lib/url_scrubber/version.rb
114
+ - spec/spec_helper.rb
87
115
  - spec/url_scrubber_spec.rb
88
116
  - url_scrubber.gemspec
89
117
  homepage: http://brandle.net
@@ -106,9 +134,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
106
134
  version: '0'
107
135
  requirements: []
108
136
  rubyforge_project:
109
- rubygems_version: 1.8.16
137
+ rubygems_version: 1.8.21
110
138
  signing_key:
111
139
  specification_version: 3
112
140
  summary: Clean up URLs.
113
141
  test_files:
142
+ - spec/spec_helper.rb
114
143
  - spec/url_scrubber_spec.rb