jm3-rubunkulous 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/bin/rubunkulous +156 -0
  2. metadata +82 -0
data/bin/rubunkulous ADDED
@@ -0,0 +1,156 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Ru-BUNK-u-lous.
4
+ # n. A reentrant link-checker for delicious power-users, stress-tested with stores of 12,000+ links.
5
+ # by jm3 (John Manoogian III)
6
+
7
+ require 'rubygems'
8
+ require 'curb'
9
+ require 'moneta'
10
+ require 'moneta/basic_file'
11
+ require 'moneta/xattr'
12
+ require 'net/https'
13
+ require 'progressbar'
14
+ require 'rexml/document'
15
+ include REXML
16
+
17
+ Pointer_Key = 'index_pointer'
18
+ Max_XAttr_Key_Length = 126 # i swear to christ...
19
+
20
+ @verbose = false
21
+ @start_override = 0
22
+
23
+ ARGV.each do|arg|
24
+ if arg =~ /--start=(\d+)/ or arg =~ /^(\d+)$/
25
+ @start_override = $1.to_i
26
+ end
27
+ end
28
+
29
+ def link_cache
30
+ f = '.cached_links.xml'
31
+ File.writable?('.') ? File.join('.', f) : File.join(File.expand_path('~'), f)
32
+ end
33
+
34
+ def credentials
35
+ creds = File.join( '.', 'credentials.yml')
36
+ return [] unless File.exist?(creds)
37
+ creds = YAML.load(File.read(creds))[:delicious]
38
+ [creds[:user], creds[:password]]
39
+ end
40
+
41
+ def fetch_links
42
+ @response = ''
43
+ begin
44
+ url = URI.parse( 'https://api.del.icio.us/v1/posts/all')
45
+ http = Net::HTTP.new(url.host, url.port)
46
+ http.use_ssl = true
47
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
48
+ http.start do |http|
49
+ if !url.query.nil?
50
+ path = url.path + "?" + url.query
51
+ else
52
+ path = url.path
53
+ end
54
+ req = Net::HTTP::Get.new(path)
55
+ user, password = credentials
56
+ req.basic_auth user, password
57
+ res = http.request(req)
58
+ @response = res.body
59
+ end
60
+ rescue Net::HTTPError
61
+ raise "Can't connect to the server, please check the username/password.\nError: #{$!} \n"
62
+ end
63
+
64
+ begin
65
+ @doc = Document.new @response
66
+ rescue REXML::ParseException
67
+ raise "\nPosts#find XML parsing failed\nError: #{$!}\n"
68
+ end
69
+ end
70
+
71
+ def links
72
+ # Check if we've got cached XML from a previous run
73
+ unless File.exists?(link_cache)
74
+ puts "Caching Delicious locally to avoid making slow API calls more than once."
75
+ xml = fetch_links
76
+ File.open(link_cache, 'w') {|f| f.write(xml) }
77
+
78
+ # by default, the Delicious API returns reponses with encoding errors; this fixes them:
79
+ if `which tidy`.empty?
80
+ puts "WARNING: you don't have tidy installed, which means that occasional errors in Delicious's API xml may hang up Rubunkulous. Try: sudo port install tidy to get it."
81
+ else
82
+ `tidy -xml .cached_links.xml &> /dev/null`
83
+ end
84
+ end
85
+
86
+ puts "Loading link data saved from previous run (this could take a second...)"
87
+ xmldoc = Document.new( File.new(link_cache))
88
+ xmldoc.elements.to_a('posts/post')
89
+ end
90
+
91
+ def check(links)
92
+ return unless links.size > 0
93
+ interrupted = false
94
+ trap("INT") { interrupted = true }
95
+
96
+ @links_checked, @num_fails, @total_links = 0,0,0
97
+
98
+ def print_report
99
+ puts "\n#{@links_checked} links checked (#{@num_fails} failures) - #{@total_links - @links_checked} links to go."
100
+ puts "\nTo clear last-checked counter and re-check all links, type: \nxattr .moneta_cache/xattr_cache -index_pointer"
101
+ end
102
+
103
+ def log_failed(url, desc, error)
104
+ # truncate xattr key names or suffer the consequences
105
+ @cache.store(url[0..Max_XAttr_Key_Length], "#{desc} (#{url}) failed with #{error} at #{Time.now.to_s}")
106
+ end
107
+
108
+ @cache = Moneta::Xattr.new(:file => File.join(File.dirname(__FILE__), ".moneta_cache", "xattr_cache"))
109
+ last_index = (@start_override > 0 ? @start_override : nil) || @cache[Pointer_Key] || 0
110
+
111
+ puts "Left off at link ##{last_index} (of #{links.size} total links)." if last_index > 0
112
+
113
+ # skip previously checked links
114
+ links = links[last_index..links.size]
115
+ @total_links = links.size
116
+ puts "#{@total_links} links to check."
117
+ @pbar = ProgressBar.new(" Checking", links.size)
118
+
119
+ links.each do |link|
120
+
121
+ if interrupted
122
+ print_report
123
+ exit
124
+ end
125
+
126
+ url = link.attributes['href']
127
+ desc = link.attributes['description']
128
+
129
+ begin
130
+ response = Curl::Easy.perform(url) do |curl|
131
+ curl.follow_location = true
132
+ curl.max_redirects = 2
133
+ curl.connect_timeout = 3
134
+ curl.timeout = 5 # total request timeout; saves us from having to wait for the request to complete if you bookmarked a 500mb file or something...
135
+ end
136
+ rescue
137
+ log_failed(url, desc, response ? response.response_code : 666)
138
+ end
139
+
140
+ @links_checked += 1
141
+ @pbar.inc
142
+ @cache.store(Pointer_Key, last_index + @links_checked)
143
+ next unless response
144
+
145
+ if response.response_code != 200
146
+ puts "x FAIL #{response.response_code} #{url} (#{@links_checked})" if @verbose
147
+ @num_fails += 1
148
+ log_failed(url, desc, response.response_code)
149
+ else
150
+ puts "> OK #{url} (#{@links_checked})" if @verbose
151
+ end
152
+ end
153
+ print_report
154
+ end
155
+
156
+ check(links)
metadata ADDED
@@ -0,0 +1,82 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: jm3-rubunkulous
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - John Manoogian III
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-05-16 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: curb
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: moneta
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "0"
34
+ version:
35
+ - !ruby/object:Gem::Dependency
36
+ name: progressbar
37
+ type: :runtime
38
+ version_requirement:
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: "0"
44
+ version:
45
+ description: reentrant link-checker for del.icio.us power-users
46
+ email: jm3@jm3.net
47
+ executables: []
48
+
49
+ extensions: []
50
+
51
+ extra_rdoc_files: []
52
+
53
+ files:
54
+ - bin/rubunkulous
55
+ has_rdoc: true
56
+ homepage: http://jm3.net/
57
+ post_install_message:
58
+ rdoc_options: []
59
+
60
+ require_paths:
61
+ - lib
62
+ required_ruby_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: "0"
67
+ version:
68
+ required_rubygems_version: !ruby/object:Gem::Requirement
69
+ requirements:
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ version: "0"
73
+ version:
74
+ requirements: []
75
+
76
+ rubyforge_project: rubunkulous
77
+ rubygems_version: 1.2.0
78
+ signing_key:
79
+ specification_version: 2
80
+ summary: reentrant link-checker for del.icio.us power-users
81
+ test_files: []
82
+