debugher 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ html/
12
+ lib/bundler/man
13
+ pkg
14
+ rdoc
15
+ spec/reports
16
+ test/tmp
17
+ test/version_tmp
18
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,14 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in debugher.gemspec
4
+ gemspec
5
+
6
+ gem 'nokogiri'
7
+ gem "addressable"
8
+ gem 'robots'
9
+
10
+ group :development, :test do
11
+ gem 'rspec'
12
+ gem 'rack-test'
13
+ gem 'simplecov', :require => false
14
+ end
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Peter Roome
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,32 @@
1
+ # Debugher
2
+
3
+ A handy set of methods for getting various bits of information about a web page.
4
+
5
+ This is used by the Rakkit Debugger to output what information we can gather about various pages on an adhoc basis.
6
+ The library is also used by the Rakkit spider to process and index pages across the web.
7
+
8
+ ## Installation
9
+
10
+ Add this line to your application's Gemfile:
11
+
12
+ gem 'debugher'
13
+
14
+ And then execute:
15
+
16
+ $ bundle
17
+
18
+ Or install it yourself as:
19
+
20
+ $ gem install debugher
21
+
22
+ ## Usage
23
+
24
+ TODO: Write usage instructions here
25
+
26
+ ## Contributing
27
+
28
+ 1. Fork it
29
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
30
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
31
+ 4. Push to the branch (`git push origin my-new-feature`)
32
+ 5. Create new Pull Request
File without changes
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+ require 'rake/testtask'
4
+ require 'rdoc/task'
5
+
6
+ Rake::TestTask.new do |t|
7
+ t.libs << 'test'
8
+ end
9
+
10
+ desc "Run tests"
11
+ task :default => :test
12
+
13
+ Rake::RDocTask.new do |rd|
14
+ rd.main = "README.rdoc"
15
+ rd.rdoc_files.include("README.rdoc", "lib/**/*.rb")
16
+ end
17
+ desc "Generate documentation"
18
+ task :rdoc
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/debugher/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Peter Roome"]
6
+ gem.email = ["pete@wearepandr.com"]
7
+ gem.description = %q{
8
+ A handy set of methods for getting various bits of information about a web page.
9
+ This is used by the Rakkit Debugger to output what information we can gather about various pages on an adhoc basis.
10
+ The library is also used by the Rakkit spider to process and index pages across the web.
11
+ }
12
+ gem.summary = %q{Methods for the Rakkit Debugger.}
13
+ gem.homepage = ""
14
+
15
+ gem.files = `git ls-files`.split($\)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.name = "debugher"
19
+ gem.require_paths = ["lib"]
20
+ gem.version = Debugher::VERSION
21
+
22
+ gem.add_dependency 'nokogiri'
23
+ gem.add_dependency "addressable"
24
+ gem.add_dependency 'robots'
25
+ end
@@ -0,0 +1,473 @@
1
+ require "debugher/version"
2
+
3
+ module Debugher
4
+ require 'robots'
5
+ require 'nokogiri'
6
+ require 'addressable/uri'
7
+ require 'cgi'
8
+
9
+ class Debugger
10
+ FILE_TYPES = ['.mp3', '.m4a', '.MP3']
11
+ attr_accessor :url
12
+
13
+ # pass a url as a string to initialize
14
+ def initialize(url)
15
+ $stdout.sync = true
16
+ @uri = URI.parse(url)
17
+ @url = @uri.class == URI::HTTP ? url : "http://#{url}"
18
+ @uri = URI.parse(@url)
19
+ @opened_url = open_url
20
+ end
21
+
22
+ def open_url
23
+ url_object = nil
24
+ ua = Debugger.user_agent
25
+ @robot = Robots.new(ua)
26
+ if @robot.allowed?(@uri)
27
+ begin
28
+ url_object = open(@uri,
29
+ "User-Agent" => ua,
30
+ "From" => "hello@rakkit.com",
31
+ "Referer" => "http://rakkit.com")
32
+ rescue Exception => e
33
+ # Most likely a 404 error
34
+ $stderr.puts "Unable to open url: #{url} - #{e}"
35
+ end
36
+ end
37
+ return url_object
38
+ end
39
+
40
+ # Get the response code of the page
41
+ #
42
+ # Example:
43
+ # >> Debugger.new("http://rakkit.com").response_code
44
+ # => 200 OK
45
+ def response_code
46
+ @opened_url.status.join(" ")
47
+ end
48
+
49
+ # Return the fecthed URL
50
+ #
51
+ # Example:
52
+ # >> Debugger.new("rakkit.com").fetched_url
53
+ # => http://rakkit.com
54
+ def fetched_url
55
+ @uri.to_s
56
+ end
57
+
58
+ # Get the canonical url of the page
59
+ #
60
+ # Example:
61
+ # >> Debugger.new("http://rakkit.com").response_code
62
+ # => http://rakkit.com/
63
+ def canonical_url
64
+ begin
65
+ canonical_uri = @uri
66
+ canonical_uri.path = ''
67
+ canonical_uri.query = nil
68
+ canonical_uri = canonical_uri + "/"
69
+ return canonical_uri.to_s
70
+ rescue Exception => e
71
+ puts "CANONICAL ERROR: #{e}"
72
+ puts @uri.inspect.to_s
73
+ end
74
+ end
75
+
76
+ # loads the Hpricot XML object if it hasn't already been loaded
77
+ def page
78
+ @page ||= Nokogiri::HTML(@opened_url)
79
+ end
80
+
81
+ # Get the RSS Feed URL
82
+ #
83
+ # Example:
84
+ # >> Debugger.new("http://wearepandr.com").rss_feed_url
85
+ # => http://wearepandr.com/feed
86
+ def rss_feed_url
87
+ rss_url = page.search("link[@type='application/rss+xml']")
88
+ rss_url = rss_url.length == 0 ? nil : rss_url.first['href']
89
+
90
+ rss_url = Debugger.stitch_to_make_absolute(canonical_url, rss_url) if Debugger.relative?(rss_url)
91
+ return rss_url.to_s
92
+ end
93
+
94
+ # Get the Atom Feed URL
95
+ #
96
+ # Example:
97
+ # >> Debugger.new("http://wearepandr.com").atom_feed_url
98
+ # => http://wearepandr.com/feed
99
+ def atom_feed_url
100
+ atom_url = page.search("link[@type='application/atom+xml']")
101
+ atom_url = atom_url.length == 0 ? nil : atom_url.first['href']
102
+
103
+ atom_url = Debugger.stitch_to_make_absolute(canonical_url, atom_url) if Debugger.relative?(atom_url)
104
+ return atom_url.to_s
105
+ end
106
+
107
+ # Get the FEED URL, no matter if it's the Atom URL or the RSS URL
108
+ #
109
+ # Example:
110
+ # >> Debugger.new("http://wearepandr.com").feed_url
111
+ # => http://wearepandr.com/feed
112
+ def feed_url
113
+ if rss_feed_url != '' || atom_feed_url != ''
114
+ feed_url = rss_feed_url != '' ? rss_feed_url : atom_feed_url
115
+
116
+ if Debugger.relative?(feed_url)
117
+ feed_url = Debugger.stitch_to_make_absolute(canonical_url, feed_url)
118
+ else
119
+ feed_url = feed_url
120
+ end
121
+
122
+ else
123
+ feed_url = nil
124
+ end
125
+ end
126
+
127
+ # Return some meta info about the page
128
+ #
129
+ # Example:
130
+ # >> Debugger.new("http://wearepandr.com").scrape_info
131
+ # => {:response_code => "200 OK",
132
+ # :fetched_url => "http://wearepandr.com",
133
+ # :canonical_url => "http://wearepandr.com/",
134
+ # :feed_url => "http://wearepandr.com/feed"}
135
+ def scrape_info
136
+ return {:response_code => response_code,
137
+ :fetched_url => fetched_url,
138
+ :canonical_url => canonical_url,
139
+ :feed_url => feed_url}
140
+ end
141
+
142
+ # Get the page title
143
+ #
144
+ # Example:
145
+ # >> Debugger.new("http://wearepandr.com").title
146
+ # => Web Design Norwich and Norwich Ruby on Rails Web Development in Norfolk | PANDR
147
+ def title
148
+ title = page.css('title')[0].inner_html.strip
149
+ title = title == '' ? nil : title
150
+ return title
151
+ end
152
+
153
+ # Get the page description
154
+ #
155
+ # Example:
156
+ # >> Debugger.new("http://wearepandr.com").description
157
+ # => A custom Web Design Norwich and Norwich Ruby on Rails Web Development agency based in Norfolk, UK
158
+ def description
159
+ description = page.css("meta[name='description']/@content").inner_html.strip
160
+ description = description == '' ? nil : description
161
+ return description
162
+ end
163
+
164
+ # Get the page meta data in a hash, title and description.
165
+ #
166
+ # Example:
167
+ # >> Debugger.new("http://wearepandr.com").meta_data
168
+ # => {:title => "Web Design Norwich and Norwich Ruby on Rails Web Development in Norfolk | PANDR",
169
+ # :description => "A custom Web Design Norwich and Norwich Ruby on Rails Web Development agency based in Norfolk, UK"}
170
+ def meta_data
171
+ return {:title => title,
172
+ :description => description}
173
+ end
174
+
175
+ # Get the music links from the feed found on the page
176
+ #
177
+ # Example:
178
+ # >> Debugger.new("http://wearepandr.com").music_from_feed
179
+ # => ["http://wearepandr.com/track_1.mp3", "http://wearepandr.com/track_2.mp3", "http://wearepandr.com/track_3.mp3"]
180
+ #
181
+ # Arguments:
182
+ # file_types: [Array]
183
+ def music_from_feed(file_types=FILE_TYPES)
184
+ links = []
185
+ if !feed_url.nil?
186
+ @feed ||= Nokogiri::XML(open(feed_url))
187
+ @feed.encoding = 'utf-8'
188
+ channel = @feed.search('//channel')
189
+
190
+ # If the blog isn't set up with channels then we can
191
+ # search the data we have for all links that end in .mp3 x
192
+ if !channel.empty?
193
+ items = @feed.search("//channel/item")
194
+ items.each do |item|
195
+ enclosures = item.search("//channel/item/enclosure")
196
+ enclosures.each do |enclosure|
197
+ enclosure_file = enclosure['url'].to_s[-4,4]
198
+ links << enclosure['url'] if file_types.include?(enclosure_file)
199
+ end
200
+ end
201
+ end
202
+ end
203
+ links = links.uniq
204
+ return links.compact
205
+ end
206
+
207
+ # Get the music links from the page html
208
+ #
209
+ # Example:
210
+ # >> Debugger.new("http://wearepandr.com").music_from_html
211
+ # => ["http://wearepandr.com/track_1.mp3", "http://wearepandr.com/track_2.mp3", "http://wearepandr.com/track_3.mp3"]
212
+ #
213
+ # Arguments:
214
+ # file_types: [Array]
215
+ def music_from_html(file_types=FILE_TYPES)
216
+ links = []
217
+
218
+ page_links.each do |track|
219
+ track_file = track['href'].to_s[-4,4]
220
+
221
+ if file_types.include?(track_file)
222
+ links << track["href"]
223
+ end
224
+ end
225
+ links = links.uniq
226
+ return links.compact
227
+ end
228
+
229
+ # Get the soundcloud music links from the page html
230
+ #
231
+ # Example:
232
+ # >> Debugger.new("http://wearepandr.com").music_from_soundcloud
233
+ # => ["http://api.soundcloud.com/playlists/2153957", "http://api.soundcloud.com/playlists/2153958"]
234
+ def music_from_soundcloud
235
+ links = []
236
+ @html_url ||= Nokogiri::HTML(open(@uri))
237
+ @html_url.search("//iframe", "//param").each do |url|
238
+ object_url = url["src"] || url["value"]
239
+ links << Debugger.get_soundcloud_url(object_url)
240
+ end
241
+ links = links.uniq
242
+ return links.compact
243
+ end
244
+
245
+ # Get the internal page links from the page
246
+ #
247
+ # Example:
248
+ # >> Debugger.new("http://wearepandr.com").internal_links
249
+ # => ["http://wearepandr.com/about", "http://wearepandr.com/blog"]
250
+ def internal_links
251
+ links = []
252
+ current_host = @uri.host
253
+
254
+ page_links.each do |link|
255
+
256
+ # Remove anchors from links
257
+
258
+ new_link = link['href'].nil? ? nil : link['href'].split("#")[0]
259
+
260
+ if !new_link.nil? && !new_link.strip.empty? && !Debugger.mailto_link?(new_link)
261
+
262
+ new_link = Debugger.make_absolute(new_link)
263
+
264
+ if new_link != nil
265
+
266
+ # Check to see if the URL is still from the current site
267
+ #
268
+ if current_host == Addressable::URI.parse(new_link).host
269
+ links << new_link
270
+ end
271
+
272
+ end
273
+ end
274
+ end
275
+ links = links.uniq
276
+ return links.compact
277
+ end
278
+
279
+ # Get all the links from the page
280
+ #
281
+ # Example:
282
+ # >> Debugger.new("http://wearepandr.com").page_links
283
+ # => ["http://wearepandr.com/about", "http://google.com", "http://yahoo.com"]
284
+ def page_links
285
+ @html_url ||= Nokogiri::HTML(open(@uri))
286
+
287
+ links = @html_url.search("//a")
288
+ return links
289
+ end
290
+
291
+ # Get all the links from the page
292
+ #
293
+ # Example:
294
+ # >> Debugger.new("http://wearepandr.com").host
295
+ # => wearepandr.com
296
+ def host
297
+ Addressable::URI.parse(@uri).host
298
+ end
299
+
300
+ # Get the pages content type
301
+ #
302
+ # Example:
303
+ # >> Debugger.new("http://wearepandr.com").content_type
304
+ # => text/html
305
+ def content_type
306
+ @opened_url.content_type
307
+ end
308
+
309
+ # Get the pages charset
310
+ #
311
+ # Example:
312
+ # >> Debugger.new("http://wearepandr.com").charset
313
+ # => utf-8
314
+ def charset
315
+ @opened_url.charset
316
+ end
317
+
318
+ # Get the pages content encoding
319
+ #
320
+ # Example:
321
+ # >> Debugger.new("http://wearepandr.com").content_encoding
322
+ # => []
323
+ def content_encoding
324
+ @opened_url.content_encoding
325
+ end
326
+
327
+ # Get the pages last modified date
328
+ #
329
+ # Example:
330
+ # >> Debugger.new("http://wearepandr.com").last_modified
331
+ # =>
332
+ def last_modified
333
+ @opened_url.last_modified
334
+ end
335
+
336
+ # Get the user agent
337
+ #
338
+ # Example:
339
+ # >> Debugger.user_agent("PANDR")
340
+ # => PANDR/V0.1
341
+ #
342
+ # Arguments:
343
+ # ua: (String)
344
+ def self.user_agent(ua="Rakkit")
345
+ "#{ua}/V#{Debugher::VERSION}"
346
+ end
347
+
348
+ # Get the current version
349
+ #
350
+ # Example:
351
+ # >> Debugger.version
352
+ # => V0.1
353
+ def self.version
354
+ "V#{Debugher::VERSION}"
355
+ end
356
+
357
+ # Check if a URL is relative or not
358
+ #
359
+ # Example:
360
+ # >> Debugger.relative?("http://wearepandr.com")
361
+ # => false
362
+ #
363
+ # Arguments:
364
+ # url: (String)
365
+ def self.relative?(url)
366
+ begin
367
+ @addressable_url = Addressable::URI.parse(url)
368
+ return @addressable_url.relative?
369
+ rescue
370
+ return false
371
+ end
372
+ end
373
+
374
+ # Make a URL absolute
375
+ #
376
+ # Example:
377
+ # >> Debugger.make_absolute("/about", "http://wearepandr.com")
378
+ # => http://wearepandr.com/about
379
+ #
380
+ # Arguments:
381
+ # url: (String)
382
+ # base_url: (String)
383
+ def self.make_absolute(url, base_url=nil)
384
+ if Debugger.relative?(url)
385
+ begin
386
+ if !base_url.nil?
387
+ base_url = Debugger.new(base_url).canonical_url
388
+ else
389
+ base_url = canonical_url
390
+ end
391
+
392
+ url = Debugger.stitch_to_make_absolute(base_url, url)
393
+ rescue Exception => e
394
+ url = nil
395
+ $stderr.puts "Debugger Error: #{url} - #{e}"
396
+ puts "ERROR: Could not make this URL absolute. Set to nil."
397
+ end
398
+ end
399
+ return url
400
+ end
401
+
402
+ # Stitch two strings together to make a single absolute url
403
+ #
404
+ # Example:
405
+ # >> Debugger.stitch_to_make_absolute("http://wearepandr.com/", "/about")
406
+ # => http://wearepandr.com/about
407
+ #
408
+ # Arguments:
409
+ # canonical_url: (String)
410
+ # path: (String)
411
+ def self.stitch_to_make_absolute(canonical_url, path)
412
+ canonical_url.chomp("/") + path
413
+ end
414
+
415
+ # Check if a string is a mailto link
416
+ #
417
+ # Example:
418
+ # >> Debugger.mailto_link?("mailto:pete@wearepandr.com")
419
+ # => true
420
+ #
421
+ # Arguments:
422
+ # url: (String)
423
+ def self.mailto_link?(url)
424
+ url[0..5] == "mailto"
425
+ end
426
+
427
+ # Extract the URL element of a soundcloud embed in order to grab the link to the track.
428
+ #
429
+ # Example:
430
+ # >> Debugger.get_soundcloud_url("https://w.soundcloud.com/player/?url=http%3A%2F%2Fapi.soundcloud.com%2Ftracks%2F59422468")
431
+ # => http://api.soundcloud.com/tracks/59422468
432
+ #
433
+ # Arguments:
434
+ # url: (String)
435
+ def self.get_soundcloud_url(url)
436
+ begin
437
+ uri = URI.parse(url)
438
+ new_url = uri.query.split("&").reject { |q| q[0..2] != "url"}[0]
439
+ new_url = CGI.unescape(new_url[4..new_url.length])
440
+
441
+ if Debugger.soundcloud_url?(new_url)
442
+ return new_url
443
+ end
444
+ rescue
445
+ $stderr.puts "Bad URL - Soundcloud URL's don't cause errors so safe to assume it's not a Soundcloud link."
446
+ end
447
+ end
448
+
449
+ # Check if a string is a Soundcloud URL
450
+ #
451
+ # Example:
452
+ # >> Debugger.soundcloud_url?("http://api.soundcloud.com/tracks/59422468")
453
+ # => http://api.soundcloud.com/tracks/59422468
454
+ #
455
+ # Arguments:
456
+ # url: (String)
457
+ def self.soundcloud_url?(url)
458
+ url.include?("api.soundcloud.com")
459
+ end
460
+
461
+ # Check if a url is a valid url
462
+ #
463
+ # Example:
464
+ # >> Debugger.valid_url?("http://wearepandr.com")
465
+ # => true
466
+ #
467
+ # Arguments:
468
+ # url: (String)
469
+ def self.valid_url?(url)
470
+ !(url =~ URI::regexp).nil?
471
+ end
472
+ end
473
+ end
@@ -0,0 +1,3 @@
1
+ module Debugher
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,195 @@
1
+ require './lib/debugher'
2
+ require 'test/unit'
3
+ require 'rack/test'
4
+
5
+ ENV['RACK_ENV'] = 'test'
6
+
7
+ class DebugherTest < Test::Unit::TestCase
8
+ include Rack::Test::Methods
9
+ include Debugher
10
+
11
+ def test_initialize
12
+ @page = Debugger.new("http://wearepandr.com/")
13
+
14
+ assert_equal @page.url, "http://wearepandr.com/"
15
+ end
16
+
17
+ def test_rss_feed_url
18
+ @page = Debugger.new("http://funtofunky.wordpress.com/")
19
+ assert_equal @page.rss_feed_url, "http://funtofunky.wordpress.com/feed/"
20
+
21
+ @page = Debugger.new("http://blog.iso50.com/")
22
+ assert_equal @page.rss_feed_url, "http://blog.iso50.com/feed/"
23
+ end
24
+
25
+ def test_atom_feed_url
26
+ @page = Debugger.new("http://wearepandr.com/")
27
+ assert_equal @page.atom_feed_url, "http://wearepandr.com/feed"
28
+
29
+ @page = Debugger.new("http://thefourohfive.com/")
30
+ assert_equal @page.atom_feed_url, "http://thefourohfive.com/feed"
31
+ end
32
+
33
+ def test_feed_url
34
+ # Atom Feed
35
+ @page = Debugger.new("http://wearepandr.com/")
36
+ assert_equal @page.feed_url, "http://wearepandr.com/feed"
37
+
38
+ # RSS Feed
39
+ @page = Debugger.new("http://funtofunky.wordpress.com")
40
+ assert_equal @page.feed_url, "http://funtofunky.wordpress.com/feed/"
41
+ end
42
+
43
+ def test_scrape_info
44
+ @page = Debugger.new("http://rakkit.com/about")
45
+ @scrape_info = @page.scrape_info
46
+
47
+ assert_equal '200 OK', @scrape_info[:response_code]
48
+ assert_equal 'http://rakkit.com/about', @scrape_info[:fetched_url]
49
+ assert_equal 'http://rakkit.com/', @scrape_info[:canonical_url]
50
+ assert_equal nil, @scrape_info[:feed_url]
51
+ end
52
+
53
+ def test_meta_data
54
+ @page = Debugger.new("http://rakkit.com")
55
+ @meta = @page.meta_data
56
+
57
+ assert_equal 'The latest new music from websites, artists and labels you love | Rakkit', @meta[:title]
58
+ assert_equal 'The Social link between new music and the fans.', @meta[:description]
59
+ end
60
+
61
+ def test_music_from_feed
62
+ @page = Debugger.new("http://blog.iso50.com")
63
+ @music_links = @page.music_from_feed
64
+
65
+ assert @music_links.kind_of?(Array)
66
+ end
67
+
68
+ def test_music_from_html
69
+ @page = Debugger.new("http://blog.iso50.com")
70
+ @music_links = @page.music_from_html
71
+
72
+ assert @music_links.kind_of?(Array)
73
+ end
74
+
75
+ def test_music_from_soundcloud
76
+ @page = Debugger.new("http://funtofunky.wordpress.com/")
77
+ @music_links = @page.music_from_soundcloud
78
+
79
+ assert @music_links.kind_of?(Array)
80
+ end
81
+
82
+ def test_page_links
83
+ @page = Debugger.new("http://funtofunky.wordpress.com/")
84
+ @internal_links = @page.internal_links
85
+
86
+ assert @internal_links.kind_of?(Array)
87
+ end
88
+
89
+ def test_valid_url?
90
+ @valid_url = Debugger.valid_url?("http://funtofunky.wordpress.com/")
91
+ assert_equal @valid_url, true
92
+
93
+ @valid_url = Debugger.valid_url?("blah blah blah")
94
+ assert_equal @valid_url, false
95
+ end
96
+
97
+ def test_host
98
+ @page = Debugger.new("http://funtofunky.wordpress.com/")
99
+ assert_equal @page.host, "funtofunky.wordpress.com"
100
+ end
101
+
102
+ def test_content_type
103
+ @page = Debugger.new("http://wearepandr.com")
104
+ assert_equal @page.content_type, "text/html"
105
+ end
106
+
107
+ def test_charset
108
+ @page = Debugger.new("http://wearepandr.com")
109
+ assert_equal @page.charset, "utf-8"
110
+ end
111
+
112
+ def test_content_encoding
113
+ # Need to find better examples of this
114
+ @page = Debugger.new("http://wearepandr.com")
115
+ assert_equal @page.content_encoding, []
116
+ end
117
+
118
+ def test_last_modified
119
+ # Need to find better examples of this
120
+ @page = Debugger.new("http://wearepandr.com")
121
+ assert_equal @page.last_modified, nil
122
+ end
123
+
124
+ # Self Methods
125
+ #
126
+ def test_user_agent
127
+ @ua = Debugger.user_agent
128
+ assert_equal @ua, "Rakkit/V#{Debugher::VERSION}"
129
+
130
+ @ua = Debugger.user_agent("PANDR")
131
+ assert_equal @ua, "PANDR/V#{Debugher::VERSION}"
132
+ end
133
+
134
+ def test_version
135
+ @version = Debugger.version
136
+
137
+ # Enough of a test that we're getting the Version #
138
+ assert_equal @version, "V#{Debugher::VERSION}"
139
+ end
140
+
141
+ def test_mail_to_link?
142
+ @url = "http://wearepandr.com"
143
+ assert_equal Debugger.mailto_link?(@url), false
144
+
145
+ @url = "mailto:pete@wearepandr.com"
146
+ assert_equal Debugger.mailto_link?(@url), true
147
+ end
148
+
149
+ def test_relative?
150
+ @url = "/"
151
+ assert_equal Debugger.relative?(@url), true
152
+
153
+ @url = "/about"
154
+ assert_equal Debugger.relative?(@url), true
155
+
156
+ @url = "http://wearepandr.com"
157
+ assert_equal Debugger.relative?(@url), false
158
+
159
+ @url = "http://wearepandr.com/"
160
+ assert_equal Debugger.relative?(@url), false
161
+
162
+ @url = "http://staff.wearepandr.com"
163
+ assert_equal Debugger.relative?(@url), false
164
+ end
165
+
166
+ def test_make_absolute
167
+ @absolute = Debugger.make_absolute("/about", "http://blog.iso50.com")
168
+ assert_equal @absolute, "http://blog.iso50.com/about"
169
+
170
+ @absolute = Debugger.make_absolute("/about", "http://blog.iso50.com/")
171
+ assert_equal @absolute, "http://blog.iso50.com/about"
172
+ end
173
+
174
+ def test_get_soundcloud_url
175
+ @soundcloud_embed = "https://w.soundcloud.com/player/?url=http%3A%2F%2Fapi.soundcloud.com%2Ftracks%2F59422468"
176
+ assert_equal Debugger.get_soundcloud_url(@soundcloud_embed), "http://api.soundcloud.com/tracks/59422468"
177
+
178
+ @soundcloud_embed = "https://w.soundcloud.com/player/?url=http%3A%2F%2Fapi.soundcloud.com%2Fplaylists%2F2153957"
179
+ assert_equal Debugger.get_soundcloud_url(@soundcloud_embed), "http://api.soundcloud.com/playlists/2153957"
180
+
181
+ @soundcloud_embed = "http://wearepandr.com"
182
+ assert_equal Debugger.get_soundcloud_url(@soundcloud_embed), nil
183
+ end
184
+
185
+ def test_soundcloud_url?
186
+ @url = "http://wearepandr.com"
187
+ assert_equal Debugger.soundcloud_url?(@url), false
188
+
189
+ @url = "http://api.soundcloud.com/playlists/2153957"
190
+ assert_equal Debugger.soundcloud_url?(@url), true
191
+
192
+ # A further addition to the method could be to test that there
193
+ # is a unique id on the end of the url.
194
+ end
195
+ end
metadata ADDED
@@ -0,0 +1,108 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: debugher
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Peter Roome
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-09-16 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: addressable
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: robots
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ description: ! "\n A handy set of methods for getting various
63
+ bits of information about a web page.\n This is used by
64
+ the Rakkit Debugger to output what information we can gather about various pages
65
+ on an adhoc basis.\n The library is also used by the Rakkit
66
+ spider to process and index pages across the web.\n "
67
+ email:
68
+ - pete@wearepandr.com
69
+ executables: []
70
+ extensions: []
71
+ extra_rdoc_files: []
72
+ files:
73
+ - .gitignore
74
+ - Gemfile
75
+ - LICENSE
76
+ - README.md
77
+ - README.rdoc
78
+ - Rakefile
79
+ - debugher.gemspec
80
+ - lib/debugher.rb
81
+ - lib/debugher/version.rb
82
+ - test/test_debugher.rb
83
+ homepage: ''
84
+ licenses: []
85
+ post_install_message:
86
+ rdoc_options: []
87
+ require_paths:
88
+ - lib
89
+ required_ruby_version: !ruby/object:Gem::Requirement
90
+ none: false
91
+ requirements:
92
+ - - ! '>='
93
+ - !ruby/object:Gem::Version
94
+ version: '0'
95
+ required_rubygems_version: !ruby/object:Gem::Requirement
96
+ none: false
97
+ requirements:
98
+ - - ! '>='
99
+ - !ruby/object:Gem::Version
100
+ version: '0'
101
+ requirements: []
102
+ rubyforge_project:
103
+ rubygems_version: 1.8.19
104
+ signing_key:
105
+ specification_version: 3
106
+ summary: Methods for the Rakkit Debugger.
107
+ test_files:
108
+ - test/test_debugher.rb