jm3-rubunkulous 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/rubunkulous +156 -0
- metadata +82 -0
data/bin/rubunkulous
ADDED
@@ -0,0 +1,156 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Ru-BUNK-u-lous.
|
4
|
+
# n. A reentrant link-checker for delicious power-users, stress-tested with stores of 12,000+ links.
|
5
|
+
# by jm3 (John Manoogian III)
|
6
|
+
|
7
|
+
require 'rubygems'
|
8
|
+
require 'curb'
|
9
|
+
require 'moneta'
|
10
|
+
require 'moneta/basic_file'
|
11
|
+
require 'moneta/xattr'
|
12
|
+
require 'net/https'
|
13
|
+
require 'progressbar'
|
14
|
+
require 'rexml/document'
|
15
|
+
include REXML
|
16
|
+
|
17
|
+
Pointer_Key = 'index_pointer'
|
18
|
+
Max_XAttr_Key_Length = 126 # i swear to christ...
|
19
|
+
|
20
|
+
@verbose = false
|
21
|
+
@start_override = 0
|
22
|
+
|
23
|
+
ARGV.each do|arg|
|
24
|
+
if arg =~ /--start=(\d+)/ or arg =~ /^(\d+)$/
|
25
|
+
@start_override = $1.to_i
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def link_cache
|
30
|
+
f = '.cached_links.xml'
|
31
|
+
File.writable?('.') ? File.join('.', f) : File.join(File.expand_path('~'), f)
|
32
|
+
end
|
33
|
+
|
34
|
+
def credentials
|
35
|
+
creds = File.join( '.', 'credentials.yml')
|
36
|
+
return [] unless File.exist?(creds)
|
37
|
+
creds = YAML.load(File.read(creds))[:delicious]
|
38
|
+
[creds[:user], creds[:password]]
|
39
|
+
end
|
40
|
+
|
41
|
+
def fetch_links
|
42
|
+
@response = ''
|
43
|
+
begin
|
44
|
+
url = URI.parse( 'https://api.del.icio.us/v1/posts/all')
|
45
|
+
http = Net::HTTP.new(url.host, url.port)
|
46
|
+
http.use_ssl = true
|
47
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
48
|
+
http.start do |http|
|
49
|
+
if !url.query.nil?
|
50
|
+
path = url.path + "?" + url.query
|
51
|
+
else
|
52
|
+
path = url.path
|
53
|
+
end
|
54
|
+
req = Net::HTTP::Get.new(path)
|
55
|
+
user, password = credentials
|
56
|
+
req.basic_auth user, password
|
57
|
+
res = http.request(req)
|
58
|
+
@response = res.body
|
59
|
+
end
|
60
|
+
rescue Net::HTTPError
|
61
|
+
raise "Can't connect to the server, please check the username/password.\nError: #{$!} \n"
|
62
|
+
end
|
63
|
+
|
64
|
+
begin
|
65
|
+
@doc = Document.new @response
|
66
|
+
rescue REXML::ParseException
|
67
|
+
raise "\nPosts#find XML parsing failed\nError: #{$!}\n"
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def links
|
72
|
+
# Check if we've got cached XML from a previous run
|
73
|
+
unless File.exists?(link_cache)
|
74
|
+
puts "Caching Delicious locally to avoid making slow API calls more than once."
|
75
|
+
xml = fetch_links
|
76
|
+
File.open(link_cache, 'w') {|f| f.write(xml) }
|
77
|
+
|
78
|
+
# by default, the Delicious API returns reponses with encoding errors; this fixes them:
|
79
|
+
if `which tidy`.empty?
|
80
|
+
puts "WARNING: you don't have tidy installed, which means that occasional errors in Delicious's API xml may hang up Rubunkulous. Try: sudo port install tidy to get it."
|
81
|
+
else
|
82
|
+
`tidy -xml .cached_links.xml &> /dev/null`
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
puts "Loading link data saved from previous run (this could take a second...)"
|
87
|
+
xmldoc = Document.new( File.new(link_cache))
|
88
|
+
xmldoc.elements.to_a('posts/post')
|
89
|
+
end
|
90
|
+
|
91
|
+
def check(links)
|
92
|
+
return unless links.size > 0
|
93
|
+
interrupted = false
|
94
|
+
trap("INT") { interrupted = true }
|
95
|
+
|
96
|
+
@links_checked, @num_fails, @total_links = 0,0,0
|
97
|
+
|
98
|
+
def print_report
|
99
|
+
puts "\n#{@links_checked} links checked (#{@num_fails} failures) - #{@total_links - @links_checked} links to go."
|
100
|
+
puts "\nTo clear last-checked counter and re-check all links, type: \nxattr .moneta_cache/xattr_cache -index_pointer"
|
101
|
+
end
|
102
|
+
|
103
|
+
def log_failed(url, desc, error)
|
104
|
+
# truncate xattr key names or suffer the consequences
|
105
|
+
@cache.store(url[0..Max_XAttr_Key_Length], "#{desc} (#{url}) failed with #{error} at #{Time.now.to_s}")
|
106
|
+
end
|
107
|
+
|
108
|
+
@cache = Moneta::Xattr.new(:file => File.join(File.dirname(__FILE__), ".moneta_cache", "xattr_cache"))
|
109
|
+
last_index = (@start_override > 0 ? @start_override : nil) || @cache[Pointer_Key] || 0
|
110
|
+
|
111
|
+
puts "Left off at link ##{last_index} (of #{links.size} total links)." if last_index > 0
|
112
|
+
|
113
|
+
# skip previously checked links
|
114
|
+
links = links[last_index..links.size]
|
115
|
+
@total_links = links.size
|
116
|
+
puts "#{@total_links} links to check."
|
117
|
+
@pbar = ProgressBar.new(" Checking", links.size)
|
118
|
+
|
119
|
+
links.each do |link|
|
120
|
+
|
121
|
+
if interrupted
|
122
|
+
print_report
|
123
|
+
exit
|
124
|
+
end
|
125
|
+
|
126
|
+
url = link.attributes['href']
|
127
|
+
desc = link.attributes['description']
|
128
|
+
|
129
|
+
begin
|
130
|
+
response = Curl::Easy.perform(url) do |curl|
|
131
|
+
curl.follow_location = true
|
132
|
+
curl.max_redirects = 2
|
133
|
+
curl.connect_timeout = 3
|
134
|
+
curl.timeout = 5 # total request timeout; saves us from having to wait for the request to complete if you bookmarked a 500mb file or something...
|
135
|
+
end
|
136
|
+
rescue
|
137
|
+
log_failed(url, desc, response ? response.response_code : 666)
|
138
|
+
end
|
139
|
+
|
140
|
+
@links_checked += 1
|
141
|
+
@pbar.inc
|
142
|
+
@cache.store(Pointer_Key, last_index + @links_checked)
|
143
|
+
next unless response
|
144
|
+
|
145
|
+
if response.response_code != 200
|
146
|
+
puts "x FAIL #{response.response_code} #{url} (#{@links_checked})" if @verbose
|
147
|
+
@num_fails += 1
|
148
|
+
log_failed(url, desc, response.response_code)
|
149
|
+
else
|
150
|
+
puts "> OK #{url} (#{@links_checked})" if @verbose
|
151
|
+
end
|
152
|
+
end
|
153
|
+
print_report
|
154
|
+
end
|
155
|
+
|
156
|
+
check(links)
|
metadata
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: jm3-rubunkulous
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- John Manoogian III
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-05-16 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: curb
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: "0"
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: moneta
|
27
|
+
type: :runtime
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: "0"
|
34
|
+
version:
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: progressbar
|
37
|
+
type: :runtime
|
38
|
+
version_requirement:
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: "0"
|
44
|
+
version:
|
45
|
+
description: reentrant link-checker for del.icio.us power-users
|
46
|
+
email: jm3@jm3.net
|
47
|
+
executables: []
|
48
|
+
|
49
|
+
extensions: []
|
50
|
+
|
51
|
+
extra_rdoc_files: []
|
52
|
+
|
53
|
+
files:
|
54
|
+
- bin/rubunkulous
|
55
|
+
has_rdoc: true
|
56
|
+
homepage: http://jm3.net/
|
57
|
+
post_install_message:
|
58
|
+
rdoc_options: []
|
59
|
+
|
60
|
+
require_paths:
|
61
|
+
- lib
|
62
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: "0"
|
67
|
+
version:
|
68
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
69
|
+
requirements:
|
70
|
+
- - ">="
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: "0"
|
73
|
+
version:
|
74
|
+
requirements: []
|
75
|
+
|
76
|
+
rubyforge_project: rubunkulous
|
77
|
+
rubygems_version: 1.2.0
|
78
|
+
signing_key:
|
79
|
+
specification_version: 2
|
80
|
+
summary: reentrant link-checker for del.icio.us power-users
|
81
|
+
test_files: []
|
82
|
+
|