debugher 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ html/
12
+ lib/bundler/man
13
+ pkg
14
+ rdoc
15
+ spec/reports
16
+ test/tmp
17
+ test/version_tmp
18
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,14 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in debugher.gemspec
4
+ gemspec
5
+
6
+ gem 'nokogiri'
7
+ gem "addressable"
8
+ gem 'robots'
9
+
10
+ group :development, :test do
11
+ gem 'rspec'
12
+ gem 'rack-test'
13
+ gem 'simplecov', :require => false
14
+ end
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Peter Roome
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,32 @@
1
+ # Debugher
2
+
3
+ A handy set of methods for getting various bits of information about a web page.
4
+
5
+ This is used by the Rakkit Debugger to output what information we can gather about various pages on an adhoc basis.
6
+ The library is also used by the Rakkit spider to process and index pages across the web.
7
+
8
+ ## Installation
9
+
10
+ Add this line to your application's Gemfile:
11
+
12
+ gem 'debugher'
13
+
14
+ And then execute:
15
+
16
+ $ bundle
17
+
18
+ Or install it yourself as:
19
+
20
+ $ gem install debugher
21
+
22
+ ## Usage
23
+
24
+ TODO: Write usage instructions here
25
+
26
+ ## Contributing
27
+
28
+ 1. Fork it
29
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
30
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
31
+ 4. Push to the branch (`git push origin my-new-feature`)
32
+ 5. Create new Pull Request
File without changes
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+ require 'rake/testtask'
4
+ require 'rdoc/task'
5
+
6
+ Rake::TestTask.new do |t|
7
+ t.libs << 'test'
8
+ end
9
+
10
+ desc "Run tests"
11
+ task :default => :test
12
+
13
+ Rake::RDocTask.new do |rd|
14
+ rd.main = "README.rdoc"
15
+ rd.rdoc_files.include("README.rdoc", "lib/**/*.rb")
16
+ end
17
+ desc "Generate documentation"
18
+ task :rdoc
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/debugher/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Peter Roome"]
6
+ gem.email = ["pete@wearepandr.com"]
7
+ gem.description = %q{
8
+ A handy set of methods for getting various bits of information about a web page.
9
+ This is used by the Rakkit Debugger to output what information we can gather about various pages on an adhoc basis.
10
+ The library is also used by the Rakkit spider to process and index pages across the web.
11
+ }
12
+ gem.summary = %q{Methods for the Rakkit Debugger.}
13
+ gem.homepage = ""
14
+
15
+ gem.files = `git ls-files`.split($\)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.name = "debugher"
19
+ gem.require_paths = ["lib"]
20
+ gem.version = Debugher::VERSION
21
+
22
+ gem.add_dependency 'nokogiri'
23
+ gem.add_dependency "addressable"
24
+ gem.add_dependency 'robots'
25
+ end
@@ -0,0 +1,473 @@
1
+ require "debugher/version"
2
+
3
+ module Debugher
4
+ require 'robots'
5
+ require 'nokogiri'
6
+ require 'addressable/uri'
7
+ require 'cgi'
8
+
9
+ class Debugger
10
+ FILE_TYPES = ['.mp3', '.m4a', '.MP3']
11
+ attr_accessor :url
12
+
13
+ # pass a url as a string to initialize
14
+ def initialize(url)
15
+ $stdout.sync = true
16
+ @uri = URI.parse(url)
17
+ @url = @uri.class == URI::HTTP ? url : "http://#{url}"
18
+ @uri = URI.parse(@url)
19
+ @opened_url = open_url
20
+ end
21
+
22
+ def open_url
23
+ url_object = nil
24
+ ua = Debugger.user_agent
25
+ @robot = Robots.new(ua)
26
+ if @robot.allowed?(@uri)
27
+ begin
28
+ url_object = open(@uri,
29
+ "User-Agent" => ua,
30
+ "From" => "hello@rakkit.com",
31
+ "Referer" => "http://rakkit.com")
32
+ rescue Exception => e
33
+ # Most likely a 404 error
34
+ $stderr.puts "Unable to open url: #{url} - #{e}"
35
+ end
36
+ end
37
+ return url_object
38
+ end
39
+
40
+ # Get the response code of the page
41
+ #
42
+ # Example:
43
+ # >> Debugger.new("http://rakkit.com").response_code
44
+ # => 200 OK
45
+ def response_code
46
+ @opened_url.status.join(" ")
47
+ end
48
+
49
+ # Return the fecthed URL
50
+ #
51
+ # Example:
52
+ # >> Debugger.new("rakkit.com").fetched_url
53
+ # => http://rakkit.com
54
+ def fetched_url
55
+ @uri.to_s
56
+ end
57
+
58
+ # Get the canonical url of the page
59
+ #
60
+ # Example:
61
+ # >> Debugger.new("http://rakkit.com").response_code
62
+ # => http://rakkit.com/
63
+ def canonical_url
64
+ begin
65
+ canonical_uri = @uri
66
+ canonical_uri.path = ''
67
+ canonical_uri.query = nil
68
+ canonical_uri = canonical_uri + "/"
69
+ return canonical_uri.to_s
70
+ rescue Exception => e
71
+ puts "CANONICAL ERROR: #{e}"
72
+ puts @uri.inspect.to_s
73
+ end
74
+ end
75
+
76
+ # loads the Hpricot XML object if it hasn't already been loaded
77
+ def page
78
+ @page ||= Nokogiri::HTML(@opened_url)
79
+ end
80
+
81
+ # Get the RSS Feed URL
82
+ #
83
+ # Example:
84
+ # >> Debugger.new("http://wearepandr.com").rss_feed_url
85
+ # => http://wearepandr.com/feed
86
+ def rss_feed_url
87
+ rss_url = page.search("link[@type='application/rss+xml']")
88
+ rss_url = rss_url.length == 0 ? nil : rss_url.first['href']
89
+
90
+ rss_url = Debugger.stitch_to_make_absolute(canonical_url, rss_url) if Debugger.relative?(rss_url)
91
+ return rss_url.to_s
92
+ end
93
+
94
+ # Get the Atom Feed URL
95
+ #
96
+ # Example:
97
+ # >> Debugger.new("http://wearepandr.com").atom_feed_url
98
+ # => http://wearepandr.com/feed
99
+ def atom_feed_url
100
+ atom_url = page.search("link[@type='application/atom+xml']")
101
+ atom_url = atom_url.length == 0 ? nil : atom_url.first['href']
102
+
103
+ atom_url = Debugger.stitch_to_make_absolute(canonical_url, atom_url) if Debugger.relative?(atom_url)
104
+ return atom_url.to_s
105
+ end
106
+
107
+ # Get the FEED URL, no matter if it's the Atom URL or the RSS URL
108
+ #
109
+ # Example:
110
+ # >> Debugger.new("http://wearepandr.com").feed_url
111
+ # => http://wearepandr.com/feed
112
+ def feed_url
113
+ if rss_feed_url != '' || atom_feed_url != ''
114
+ feed_url = rss_feed_url != '' ? rss_feed_url : atom_feed_url
115
+
116
+ if Debugger.relative?(feed_url)
117
+ feed_url = Debugger.stitch_to_make_absolute(canonical_url, feed_url)
118
+ else
119
+ feed_url = feed_url
120
+ end
121
+
122
+ else
123
+ feed_url = nil
124
+ end
125
+ end
126
+
127
+ # Return some meta info about the page
128
+ #
129
+ # Example:
130
+ # >> Debugger.new("http://wearepandr.com").scrape_info
131
+ # => {:response_code => "200 OK",
132
+ # :fetched_url => "http://wearepandr.com",
133
+ # :canonical_url => "http://wearepandr.com/",
134
+ # :feed_url => "http://wearepandr.com/feed"}
135
+ def scrape_info
136
+ return {:response_code => response_code,
137
+ :fetched_url => fetched_url,
138
+ :canonical_url => canonical_url,
139
+ :feed_url => feed_url}
140
+ end
141
+
142
+ # Get the page title
143
+ #
144
+ # Example:
145
+ # >> Debugger.new("http://wearepandr.com").title
146
+ # => Web Design Norwich and Norwich Ruby on Rails Web Development in Norfolk | PANDR
147
+ def title
148
+ title = page.css('title')[0].inner_html.strip
149
+ title = title == '' ? nil : title
150
+ return title
151
+ end
152
+
153
+ # Get the page description
154
+ #
155
+ # Example:
156
+ # >> Debugger.new("http://wearepandr.com").description
157
+ # => A custom Web Design Norwich and Norwich Ruby on Rails Web Development agency based in Norfolk, UK
158
+ def description
159
+ description = page.css("meta[name='description']/@content").inner_html.strip
160
+ description = description == '' ? nil : description
161
+ return description
162
+ end
163
+
164
+ # Get the page meta data in a hash, title and description.
165
+ #
166
+ # Example:
167
+ # >> Debugger.new("http://wearepandr.com").meta_data
168
+ # => {:title => "Web Design Norwich and Norwich Ruby on Rails Web Development in Norfolk | PANDR",
169
+ # :description => "A custom Web Design Norwich and Norwich Ruby on Rails Web Development agency based in Norfolk, UK"}
170
+ def meta_data
171
+ return {:title => title,
172
+ :description => description}
173
+ end
174
+
175
+ # Get the music links from the feed found on the page
176
+ #
177
+ # Example:
178
+ # >> Debugger.new("http://wearepandr.com").music_from_feed
179
+ # => ["http://wearepandr.com/track_1.mp3", "http://wearepandr.com/track_2.mp3", "http://wearepandr.com/track_3.mp3"]
180
+ #
181
+ # Arguments:
182
+ # file_types: [Array]
183
+ def music_from_feed(file_types=FILE_TYPES)
184
+ links = []
185
+ if !feed_url.nil?
186
+ @feed ||= Nokogiri::XML(open(feed_url))
187
+ @feed.encoding = 'utf-8'
188
+ channel = @feed.search('//channel')
189
+
190
+ # If the blog isn't set up with channels then we can
191
+ # search the data we have for all links that end in .mp3 x
192
+ if !channel.empty?
193
+ items = @feed.search("//channel/item")
194
+ items.each do |item|
195
+ enclosures = item.search("//channel/item/enclosure")
196
+ enclosures.each do |enclosure|
197
+ enclosure_file = enclosure['url'].to_s[-4,4]
198
+ links << enclosure['url'] if file_types.include?(enclosure_file)
199
+ end
200
+ end
201
+ end
202
+ end
203
+ links = links.uniq
204
+ return links.compact
205
+ end
206
+
207
+ # Get the music links from the page html
208
+ #
209
+ # Example:
210
+ # >> Debugger.new("http://wearepandr.com").music_from_html
211
+ # => ["http://wearepandr.com/track_1.mp3", "http://wearepandr.com/track_2.mp3", "http://wearepandr.com/track_3.mp3"]
212
+ #
213
+ # Arguments:
214
+ # file_types: [Array]
215
+ def music_from_html(file_types=FILE_TYPES)
216
+ links = []
217
+
218
+ page_links.each do |track|
219
+ track_file = track['href'].to_s[-4,4]
220
+
221
+ if file_types.include?(track_file)
222
+ links << track["href"]
223
+ end
224
+ end
225
+ links = links.uniq
226
+ return links.compact
227
+ end
228
+
229
+ # Get the soundcloud music links from the page html
230
+ #
231
+ # Example:
232
+ # >> Debugger.new("http://wearepandr.com").music_from_soundcloud
233
+ # => ["http://api.soundcloud.com/playlists/2153957", "http://api.soundcloud.com/playlists/2153958"]
234
+ def music_from_soundcloud
235
+ links = []
236
+ @html_url ||= Nokogiri::HTML(open(@uri))
237
+ @html_url.search("//iframe", "//param").each do |url|
238
+ object_url = url["src"] || url["value"]
239
+ links << Debugger.get_soundcloud_url(object_url)
240
+ end
241
+ links = links.uniq
242
+ return links.compact
243
+ end
244
+
245
+ # Get the internal page links from the page
246
+ #
247
+ # Example:
248
+ # >> Debugger.new("http://wearepandr.com").internal_links
249
+ # => ["http://wearepandr.com/about", "http://wearepandr.com/blog"]
250
+ def internal_links
251
+ links = []
252
+ current_host = @uri.host
253
+
254
+ page_links.each do |link|
255
+
256
+ # Remove anchors from links
257
+
258
+ new_link = link['href'].nil? ? nil : link['href'].split("#")[0]
259
+
260
+ if !new_link.nil? && !new_link.strip.empty? && !Debugger.mailto_link?(new_link)
261
+
262
+ new_link = Debugger.make_absolute(new_link)
263
+
264
+ if new_link != nil
265
+
266
+ # Check to see if the URL is still from the current site
267
+ #
268
+ if current_host == Addressable::URI.parse(new_link).host
269
+ links << new_link
270
+ end
271
+
272
+ end
273
+ end
274
+ end
275
+ links = links.uniq
276
+ return links.compact
277
+ end
278
+
279
+ # Get all the links from the page
280
+ #
281
+ # Example:
282
+ # >> Debugger.new("http://wearepandr.com").page_links
283
+ # => ["http://wearepandr.com/about", "http://google.com", "http://yahoo.com"]
284
+ def page_links
285
+ @html_url ||= Nokogiri::HTML(open(@uri))
286
+
287
+ links = @html_url.search("//a")
288
+ return links
289
+ end
290
+
291
+ # Get all the links from the page
292
+ #
293
+ # Example:
294
+ # >> Debugger.new("http://wearepandr.com").host
295
+ # => wearepandr.com
296
+ def host
297
+ Addressable::URI.parse(@uri).host
298
+ end
299
+
300
+ # Get the pages content type
301
+ #
302
+ # Example:
303
+ # >> Debugger.new("http://wearepandr.com").content_type
304
+ # => text/html
305
+ def content_type
306
+ @opened_url.content_type
307
+ end
308
+
309
+ # Get the pages charset
310
+ #
311
+ # Example:
312
+ # >> Debugger.new("http://wearepandr.com").charset
313
+ # => utf-8
314
+ def charset
315
+ @opened_url.charset
316
+ end
317
+
318
+ # Get the pages content encoding
319
+ #
320
+ # Example:
321
+ # >> Debugger.new("http://wearepandr.com").content_encoding
322
+ # => []
323
+ def content_encoding
324
+ @opened_url.content_encoding
325
+ end
326
+
327
+ # Get the pages last modified date
328
+ #
329
+ # Example:
330
+ # >> Debugger.new("http://wearepandr.com").last_modified
331
+ # =>
332
+ def last_modified
333
+ @opened_url.last_modified
334
+ end
335
+
336
+ # Get the user agent
337
+ #
338
+ # Example:
339
+ # >> Debugger.user_agent("PANDR")
340
+ # => PANDR/V0.1
341
+ #
342
+ # Arguments:
343
+ # ua: (String)
344
+ def self.user_agent(ua="Rakkit")
345
+ "#{ua}/V#{Debugher::VERSION}"
346
+ end
347
+
348
+ # Get the current version
349
+ #
350
+ # Example:
351
+ # >> Debugger.version
352
+ # => V0.1
353
+ def self.version
354
+ "V#{Debugher::VERSION}"
355
+ end
356
+
357
+ # Check if a URL is relative or not
358
+ #
359
+ # Example:
360
+ # >> Debugger.relative?("http://wearepandr.com")
361
+ # => false
362
+ #
363
+ # Arguments:
364
+ # url: (String)
365
+ def self.relative?(url)
366
+ begin
367
+ @addressable_url = Addressable::URI.parse(url)
368
+ return @addressable_url.relative?
369
+ rescue
370
+ return false
371
+ end
372
+ end
373
+
374
+ # Make a URL absolute
375
+ #
376
+ # Example:
377
+ # >> Debugger.make_absolute("/about", "http://wearepandr.com")
378
+ # => http://wearepandr.com/about
379
+ #
380
+ # Arguments:
381
+ # url: (String)
382
+ # base_url: (String)
383
+ def self.make_absolute(url, base_url=nil)
384
+ if Debugger.relative?(url)
385
+ begin
386
+ if !base_url.nil?
387
+ base_url = Debugger.new(base_url).canonical_url
388
+ else
389
+ base_url = canonical_url
390
+ end
391
+
392
+ url = Debugger.stitch_to_make_absolute(base_url, url)
393
+ rescue Exception => e
394
+ url = nil
395
+ $stderr.puts "Debugger Error: #{url} - #{e}"
396
+ puts "ERROR: Could not make this URL absolute. Set to nil."
397
+ end
398
+ end
399
+ return url
400
+ end
401
+
402
+ # Stitch two strings together to make a single absolute url
403
+ #
404
+ # Example:
405
+ # >> Debugger.stitch_to_make_absolute("http://wearepandr.com/", "/about")
406
+ # => http://wearepandr.com/about
407
+ #
408
+ # Arguments:
409
+ # canonical_url: (String)
410
+ # path: (String)
411
+ def self.stitch_to_make_absolute(canonical_url, path)
412
+ canonical_url.chomp("/") + path
413
+ end
414
+
415
+ # Check if a string is a mailto link
416
+ #
417
+ # Example:
418
+ # >> Debugger.mailto_link?("mailto:pete@wearepandr.com")
419
+ # => true
420
+ #
421
+ # Arguments:
422
+ # url: (String)
423
+ def self.mailto_link?(url)
424
+ url[0..5] == "mailto"
425
+ end
426
+
427
+ # Extract the URL element of a soundcloud embed in order to grab the link to the track.
428
+ #
429
+ # Example:
430
+ # >> Debugger.get_soundcloud_url("https://w.soundcloud.com/player/?url=http%3A%2F%2Fapi.soundcloud.com%2Ftracks%2F59422468")
431
+ # => http://api.soundcloud.com/tracks/59422468
432
+ #
433
+ # Arguments:
434
+ # url: (String)
435
+ def self.get_soundcloud_url(url)
436
+ begin
437
+ uri = URI.parse(url)
438
+ new_url = uri.query.split("&").reject { |q| q[0..2] != "url"}[0]
439
+ new_url = CGI.unescape(new_url[4..new_url.length])
440
+
441
+ if Debugger.soundcloud_url?(new_url)
442
+ return new_url
443
+ end
444
+ rescue
445
+ $stderr.puts "Bad URL - Soundcloud URL's don't cause errors so safe to assume it's not a Soundcloud link."
446
+ end
447
+ end
448
+
449
+ # Check if a string is a Soundcloud URL
450
+ #
451
+ # Example:
452
+ # >> Debugger.soundcloud_url?("http://api.soundcloud.com/tracks/59422468")
453
+ # => http://api.soundcloud.com/tracks/59422468
454
+ #
455
+ # Arguments:
456
+ # url: (String)
457
+ def self.soundcloud_url?(url)
458
+ url.include?("api.soundcloud.com")
459
+ end
460
+
461
+ # Check if a url is a valid url
462
+ #
463
+ # Example:
464
+ # >> Debugger.valid_url?("http://wearepandr.com")
465
+ # => true
466
+ #
467
+ # Arguments:
468
+ # url: (String)
469
+ def self.valid_url?(url)
470
+ !(url =~ URI::regexp).nil?
471
+ end
472
+ end
473
+ end
@@ -0,0 +1,3 @@
1
+ module Debugher
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,195 @@
1
+ require './lib/debugher'
2
+ require 'test/unit'
3
+ require 'rack/test'
4
+
5
+ ENV['RACK_ENV'] = 'test'
6
+
7
+ class DebugherTest < Test::Unit::TestCase
8
+ include Rack::Test::Methods
9
+ include Debugher
10
+
11
+ def test_initialize
12
+ @page = Debugger.new("http://wearepandr.com/")
13
+
14
+ assert_equal @page.url, "http://wearepandr.com/"
15
+ end
16
+
17
+ def test_rss_feed_url
18
+ @page = Debugger.new("http://funtofunky.wordpress.com/")
19
+ assert_equal @page.rss_feed_url, "http://funtofunky.wordpress.com/feed/"
20
+
21
+ @page = Debugger.new("http://blog.iso50.com/")
22
+ assert_equal @page.rss_feed_url, "http://blog.iso50.com/feed/"
23
+ end
24
+
25
+ def test_atom_feed_url
26
+ @page = Debugger.new("http://wearepandr.com/")
27
+ assert_equal @page.atom_feed_url, "http://wearepandr.com/feed"
28
+
29
+ @page = Debugger.new("http://thefourohfive.com/")
30
+ assert_equal @page.atom_feed_url, "http://thefourohfive.com/feed"
31
+ end
32
+
33
+ def test_feed_url
34
+ # Atom Feed
35
+ @page = Debugger.new("http://wearepandr.com/")
36
+ assert_equal @page.feed_url, "http://wearepandr.com/feed"
37
+
38
+ # RSS Feed
39
+ @page = Debugger.new("http://funtofunky.wordpress.com")
40
+ assert_equal @page.feed_url, "http://funtofunky.wordpress.com/feed/"
41
+ end
42
+
43
+ def test_scrape_info
44
+ @page = Debugger.new("http://rakkit.com/about")
45
+ @scrape_info = @page.scrape_info
46
+
47
+ assert_equal '200 OK', @scrape_info[:response_code]
48
+ assert_equal 'http://rakkit.com/about', @scrape_info[:fetched_url]
49
+ assert_equal 'http://rakkit.com/', @scrape_info[:canonical_url]
50
+ assert_equal nil, @scrape_info[:feed_url]
51
+ end
52
+
53
+ def test_meta_data
54
+ @page = Debugger.new("http://rakkit.com")
55
+ @meta = @page.meta_data
56
+
57
+ assert_equal 'The latest new music from websites, artists and labels you love | Rakkit', @meta[:title]
58
+ assert_equal 'The Social link between new music and the fans.', @meta[:description]
59
+ end
60
+
61
+ def test_music_from_feed
62
+ @page = Debugger.new("http://blog.iso50.com")
63
+ @music_links = @page.music_from_feed
64
+
65
+ assert @music_links.kind_of?(Array)
66
+ end
67
+
68
+ def test_music_from_html
69
+ @page = Debugger.new("http://blog.iso50.com")
70
+ @music_links = @page.music_from_html
71
+
72
+ assert @music_links.kind_of?(Array)
73
+ end
74
+
75
+ def test_music_from_soundcloud
76
+ @page = Debugger.new("http://funtofunky.wordpress.com/")
77
+ @music_links = @page.music_from_soundcloud
78
+
79
+ assert @music_links.kind_of?(Array)
80
+ end
81
+
82
+ def test_page_links
83
+ @page = Debugger.new("http://funtofunky.wordpress.com/")
84
+ @internal_links = @page.internal_links
85
+
86
+ assert @internal_links.kind_of?(Array)
87
+ end
88
+
89
+ def test_valid_url?
90
+ @valid_url = Debugger.valid_url?("http://funtofunky.wordpress.com/")
91
+ assert_equal @valid_url, true
92
+
93
+ @valid_url = Debugger.valid_url?("blah blah blah")
94
+ assert_equal @valid_url, false
95
+ end
96
+
97
+ def test_host
98
+ @page = Debugger.new("http://funtofunky.wordpress.com/")
99
+ assert_equal @page.host, "funtofunky.wordpress.com"
100
+ end
101
+
102
+ def test_content_type
103
+ @page = Debugger.new("http://wearepandr.com")
104
+ assert_equal @page.content_type, "text/html"
105
+ end
106
+
107
+ def test_charset
108
+ @page = Debugger.new("http://wearepandr.com")
109
+ assert_equal @page.charset, "utf-8"
110
+ end
111
+
112
+ def test_content_encoding
113
+ # Need to find better examples of this
114
+ @page = Debugger.new("http://wearepandr.com")
115
+ assert_equal @page.content_encoding, []
116
+ end
117
+
118
+ def test_last_modified
119
+ # Need to find better examples of this
120
+ @page = Debugger.new("http://wearepandr.com")
121
+ assert_equal @page.last_modified, nil
122
+ end
123
+
124
+ # Self Methods
125
+ #
126
+ def test_user_agent
127
+ @ua = Debugger.user_agent
128
+ assert_equal @ua, "Rakkit/V#{Debugher::VERSION}"
129
+
130
+ @ua = Debugger.user_agent("PANDR")
131
+ assert_equal @ua, "PANDR/V#{Debugher::VERSION}"
132
+ end
133
+
134
+ def test_version
135
+ @version = Debugger.version
136
+
137
+ # Enough of a test that we're getting the Version #
138
+ assert_equal @version, "V#{Debugher::VERSION}"
139
+ end
140
+
141
+ def test_mail_to_link?
142
+ @url = "http://wearepandr.com"
143
+ assert_equal Debugger.mailto_link?(@url), false
144
+
145
+ @url = "mailto:pete@wearepandr.com"
146
+ assert_equal Debugger.mailto_link?(@url), true
147
+ end
148
+
149
+ def test_relative?
150
+ @url = "/"
151
+ assert_equal Debugger.relative?(@url), true
152
+
153
+ @url = "/about"
154
+ assert_equal Debugger.relative?(@url), true
155
+
156
+ @url = "http://wearepandr.com"
157
+ assert_equal Debugger.relative?(@url), false
158
+
159
+ @url = "http://wearepandr.com/"
160
+ assert_equal Debugger.relative?(@url), false
161
+
162
+ @url = "http://staff.wearepandr.com"
163
+ assert_equal Debugger.relative?(@url), false
164
+ end
165
+
166
+ def test_make_absolute
167
+ @absolute = Debugger.make_absolute("/about", "http://blog.iso50.com")
168
+ assert_equal @absolute, "http://blog.iso50.com/about"
169
+
170
+ @absolute = Debugger.make_absolute("/about", "http://blog.iso50.com/")
171
+ assert_equal @absolute, "http://blog.iso50.com/about"
172
+ end
173
+
174
+ def test_get_soundcloud_url
175
+ @soundcloud_embed = "https://w.soundcloud.com/player/?url=http%3A%2F%2Fapi.soundcloud.com%2Ftracks%2F59422468"
176
+ assert_equal Debugger.get_soundcloud_url(@soundcloud_embed), "http://api.soundcloud.com/tracks/59422468"
177
+
178
+ @soundcloud_embed = "https://w.soundcloud.com/player/?url=http%3A%2F%2Fapi.soundcloud.com%2Fplaylists%2F2153957"
179
+ assert_equal Debugger.get_soundcloud_url(@soundcloud_embed), "http://api.soundcloud.com/playlists/2153957"
180
+
181
+ @soundcloud_embed = "http://wearepandr.com"
182
+ assert_equal Debugger.get_soundcloud_url(@soundcloud_embed), nil
183
+ end
184
+
185
+ def test_soundcloud_url?
186
+ @url = "http://wearepandr.com"
187
+ assert_equal Debugger.soundcloud_url?(@url), false
188
+
189
+ @url = "http://api.soundcloud.com/playlists/2153957"
190
+ assert_equal Debugger.soundcloud_url?(@url), true
191
+
192
+ # A further addition to the method could be to test that there
193
+ # is a unique id on the end of the url.
194
+ end
195
+ end
metadata ADDED
@@ -0,0 +1,108 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: debugher
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Peter Roome
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-09-16 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: addressable
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: robots
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ description: ! "\n A handy set of methods for getting various
63
+ bits of information about a web page.\n This is used by
64
+ the Rakkit Debugger to output what information we can gather about various pages
65
+ on an adhoc basis.\n The library is also used by the Rakkit
66
+ spider to process and index pages across the web.\n "
67
+ email:
68
+ - pete@wearepandr.com
69
+ executables: []
70
+ extensions: []
71
+ extra_rdoc_files: []
72
+ files:
73
+ - .gitignore
74
+ - Gemfile
75
+ - LICENSE
76
+ - README.md
77
+ - README.rdoc
78
+ - Rakefile
79
+ - debugher.gemspec
80
+ - lib/debugher.rb
81
+ - lib/debugher/version.rb
82
+ - test/test_debugher.rb
83
+ homepage: ''
84
+ licenses: []
85
+ post_install_message:
86
+ rdoc_options: []
87
+ require_paths:
88
+ - lib
89
+ required_ruby_version: !ruby/object:Gem::Requirement
90
+ none: false
91
+ requirements:
92
+ - - ! '>='
93
+ - !ruby/object:Gem::Version
94
+ version: '0'
95
+ required_rubygems_version: !ruby/object:Gem::Requirement
96
+ none: false
97
+ requirements:
98
+ - - ! '>='
99
+ - !ruby/object:Gem::Version
100
+ version: '0'
101
+ requirements: []
102
+ rubyforge_project:
103
+ rubygems_version: 1.8.19
104
+ signing_key:
105
+ specification_version: 3
106
+ summary: Methods for the Rakkit Debugger.
107
+ test_files:
108
+ - test/test_debugher.rb