jm3-rubunkulous 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/rubunkulous +156 -0
- metadata +82 -0
data/bin/rubunkulous
ADDED
@@ -0,0 +1,156 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Ru-BUNK-u-lous.
|
4
|
+
# n. A reentrant link-checker for delicious power-users, stress-tested with stores of 12,000+ links.
|
5
|
+
# by jm3 (John Manoogian III)
|
6
|
+
|
7
|
+
require 'rubygems'
|
8
|
+
require 'curb'
|
9
|
+
require 'moneta'
|
10
|
+
require 'moneta/basic_file'
|
11
|
+
require 'moneta/xattr'
|
12
|
+
require 'net/https'
|
13
|
+
require 'progressbar'
|
14
|
+
require 'rexml/document'
|
15
|
+
include REXML
|
16
|
+
|
17
|
+
Pointer_Key = 'index_pointer'
|
18
|
+
Max_XAttr_Key_Length = 126 # i swear to christ...
|
19
|
+
|
20
|
+
@verbose = false
|
21
|
+
@start_override = 0
|
22
|
+
|
23
|
+
ARGV.each do|arg|
|
24
|
+
if arg =~ /--start=(\d+)/ or arg =~ /^(\d+)$/
|
25
|
+
@start_override = $1.to_i
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def link_cache
|
30
|
+
f = '.cached_links.xml'
|
31
|
+
File.writable?('.') ? File.join('.', f) : File.join(File.expand_path('~'), f)
|
32
|
+
end
|
33
|
+
|
34
|
+
def credentials
|
35
|
+
creds = File.join( '.', 'credentials.yml')
|
36
|
+
return [] unless File.exist?(creds)
|
37
|
+
creds = YAML.load(File.read(creds))[:delicious]
|
38
|
+
[creds[:user], creds[:password]]
|
39
|
+
end
|
40
|
+
|
41
|
+
def fetch_links
|
42
|
+
@response = ''
|
43
|
+
begin
|
44
|
+
url = URI.parse( 'https://api.del.icio.us/v1/posts/all')
|
45
|
+
http = Net::HTTP.new(url.host, url.port)
|
46
|
+
http.use_ssl = true
|
47
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
48
|
+
http.start do |http|
|
49
|
+
if !url.query.nil?
|
50
|
+
path = url.path + "?" + url.query
|
51
|
+
else
|
52
|
+
path = url.path
|
53
|
+
end
|
54
|
+
req = Net::HTTP::Get.new(path)
|
55
|
+
user, password = credentials
|
56
|
+
req.basic_auth user, password
|
57
|
+
res = http.request(req)
|
58
|
+
@response = res.body
|
59
|
+
end
|
60
|
+
rescue Net::HTTPError
|
61
|
+
raise "Can't connect to the server, please check the username/password.\nError: #{$!} \n"
|
62
|
+
end
|
63
|
+
|
64
|
+
begin
|
65
|
+
@doc = Document.new @response
|
66
|
+
rescue REXML::ParseException
|
67
|
+
raise "\nPosts#find XML parsing failed\nError: #{$!}\n"
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def links
|
72
|
+
# Check if we've got cached XML from a previous run
|
73
|
+
unless File.exists?(link_cache)
|
74
|
+
puts "Caching Delicious locally to avoid making slow API calls more than once."
|
75
|
+
xml = fetch_links
|
76
|
+
File.open(link_cache, 'w') {|f| f.write(xml) }
|
77
|
+
|
78
|
+
# by default, the Delicious API returns reponses with encoding errors; this fixes them:
|
79
|
+
if `which tidy`.empty?
|
80
|
+
puts "WARNING: you don't have tidy installed, which means that occasional errors in Delicious's API xml may hang up Rubunkulous. Try: sudo port install tidy to get it."
|
81
|
+
else
|
82
|
+
`tidy -xml .cached_links.xml &> /dev/null`
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
puts "Loading link data saved from previous run (this could take a second...)"
|
87
|
+
xmldoc = Document.new( File.new(link_cache))
|
88
|
+
xmldoc.elements.to_a('posts/post')
|
89
|
+
end
|
90
|
+
|
91
|
+
def check(links)
|
92
|
+
return unless links.size > 0
|
93
|
+
interrupted = false
|
94
|
+
trap("INT") { interrupted = true }
|
95
|
+
|
96
|
+
@links_checked, @num_fails, @total_links = 0,0,0
|
97
|
+
|
98
|
+
def print_report
|
99
|
+
puts "\n#{@links_checked} links checked (#{@num_fails} failures) - #{@total_links - @links_checked} links to go."
|
100
|
+
puts "\nTo clear last-checked counter and re-check all links, type: \nxattr .moneta_cache/xattr_cache -index_pointer"
|
101
|
+
end
|
102
|
+
|
103
|
+
def log_failed(url, desc, error)
|
104
|
+
# truncate xattr key names or suffer the consequences
|
105
|
+
@cache.store(url[0..Max_XAttr_Key_Length], "#{desc} (#{url}) failed with #{error} at #{Time.now.to_s}")
|
106
|
+
end
|
107
|
+
|
108
|
+
@cache = Moneta::Xattr.new(:file => File.join(File.dirname(__FILE__), ".moneta_cache", "xattr_cache"))
|
109
|
+
last_index = (@start_override > 0 ? @start_override : nil) || @cache[Pointer_Key] || 0
|
110
|
+
|
111
|
+
puts "Left off at link ##{last_index} (of #{links.size} total links)." if last_index > 0
|
112
|
+
|
113
|
+
# skip previously checked links
|
114
|
+
links = links[last_index..links.size]
|
115
|
+
@total_links = links.size
|
116
|
+
puts "#{@total_links} links to check."
|
117
|
+
@pbar = ProgressBar.new(" Checking", links.size)
|
118
|
+
|
119
|
+
links.each do |link|
|
120
|
+
|
121
|
+
if interrupted
|
122
|
+
print_report
|
123
|
+
exit
|
124
|
+
end
|
125
|
+
|
126
|
+
url = link.attributes['href']
|
127
|
+
desc = link.attributes['description']
|
128
|
+
|
129
|
+
begin
|
130
|
+
response = Curl::Easy.perform(url) do |curl|
|
131
|
+
curl.follow_location = true
|
132
|
+
curl.max_redirects = 2
|
133
|
+
curl.connect_timeout = 3
|
134
|
+
curl.timeout = 5 # total request timeout; saves us from having to wait for the request to complete if you bookmarked a 500mb file or something...
|
135
|
+
end
|
136
|
+
rescue
|
137
|
+
log_failed(url, desc, response ? response.response_code : 666)
|
138
|
+
end
|
139
|
+
|
140
|
+
@links_checked += 1
|
141
|
+
@pbar.inc
|
142
|
+
@cache.store(Pointer_Key, last_index + @links_checked)
|
143
|
+
next unless response
|
144
|
+
|
145
|
+
if response.response_code != 200
|
146
|
+
puts "x FAIL #{response.response_code} #{url} (#{@links_checked})" if @verbose
|
147
|
+
@num_fails += 1
|
148
|
+
log_failed(url, desc, response.response_code)
|
149
|
+
else
|
150
|
+
puts "> OK #{url} (#{@links_checked})" if @verbose
|
151
|
+
end
|
152
|
+
end
|
153
|
+
print_report
|
154
|
+
end
|
155
|
+
|
156
|
+
check(links)
|
metadata
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: jm3-rubunkulous
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- John Manoogian III
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-05-16 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: curb
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: "0"
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: moneta
|
27
|
+
type: :runtime
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: "0"
|
34
|
+
version:
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: progressbar
|
37
|
+
type: :runtime
|
38
|
+
version_requirement:
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: "0"
|
44
|
+
version:
|
45
|
+
description: reentrant link-checker for del.icio.us power-users
|
46
|
+
email: jm3@jm3.net
|
47
|
+
executables: []
|
48
|
+
|
49
|
+
extensions: []
|
50
|
+
|
51
|
+
extra_rdoc_files: []
|
52
|
+
|
53
|
+
files:
|
54
|
+
- bin/rubunkulous
|
55
|
+
has_rdoc: true
|
56
|
+
homepage: http://jm3.net/
|
57
|
+
post_install_message:
|
58
|
+
rdoc_options: []
|
59
|
+
|
60
|
+
require_paths:
|
61
|
+
- lib
|
62
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: "0"
|
67
|
+
version:
|
68
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
69
|
+
requirements:
|
70
|
+
- - ">="
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: "0"
|
73
|
+
version:
|
74
|
+
requirements: []
|
75
|
+
|
76
|
+
rubyforge_project: rubunkulous
|
77
|
+
rubygems_version: 1.2.0
|
78
|
+
signing_key:
|
79
|
+
specification_version: 2
|
80
|
+
summary: reentrant link-checker for del.icio.us power-users
|
81
|
+
test_files: []
|
82
|
+
|