jm3-rubunkulous 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/bin/rubunkulous +156 -0
  2. metadata +82 -0
data/bin/rubunkulous ADDED
@@ -0,0 +1,156 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Ru-BUNK-u-lous.
4
+ # n. A reentrant link-checker for delicious power-users, stress-tested with stores of 12,000+ links.
5
+ # by jm3 (John Manoogian III)
6
+
7
+ require 'rubygems'
8
+ require 'curb'
9
+ require 'moneta'
10
+ require 'moneta/basic_file'
11
+ require 'moneta/xattr'
12
+ require 'net/https'
13
+ require 'progressbar'
14
+ require 'rexml/document'
15
+ include REXML
16
+
17
+ Pointer_Key = 'index_pointer'
18
+ Max_XAttr_Key_Length = 126 # i swear to christ...
19
+
20
+ @verbose = false
21
+ @start_override = 0
22
+
23
+ ARGV.each do|arg|
24
+ if arg =~ /--start=(\d+)/ or arg =~ /^(\d+)$/
25
+ @start_override = $1.to_i
26
+ end
27
+ end
28
+
29
+ def link_cache
30
+ f = '.cached_links.xml'
31
+ File.writable?('.') ? File.join('.', f) : File.join(File.expand_path('~'), f)
32
+ end
33
+
34
+ def credentials
35
+ creds = File.join( '.', 'credentials.yml')
36
+ return [] unless File.exist?(creds)
37
+ creds = YAML.load(File.read(creds))[:delicious]
38
+ [creds[:user], creds[:password]]
39
+ end
40
+
41
+ def fetch_links
42
+ @response = ''
43
+ begin
44
+ url = URI.parse( 'https://api.del.icio.us/v1/posts/all')
45
+ http = Net::HTTP.new(url.host, url.port)
46
+ http.use_ssl = true
47
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
48
+ http.start do |http|
49
+ if !url.query.nil?
50
+ path = url.path + "?" + url.query
51
+ else
52
+ path = url.path
53
+ end
54
+ req = Net::HTTP::Get.new(path)
55
+ user, password = credentials
56
+ req.basic_auth user, password
57
+ res = http.request(req)
58
+ @response = res.body
59
+ end
60
+ rescue Net::HTTPError
61
+ raise "Can't connect to the server, please check the username/password.\nError: #{$!} \n"
62
+ end
63
+
64
+ begin
65
+ @doc = Document.new @response
66
+ rescue REXML::ParseException
67
+ raise "\nPosts#find XML parsing failed\nError: #{$!}\n"
68
+ end
69
+ end
70
+
71
+ def links
72
+ # Check if we've got cached XML from a previous run
73
+ unless File.exists?(link_cache)
74
+ puts "Caching Delicious locally to avoid making slow API calls more than once."
75
+ xml = fetch_links
76
+ File.open(link_cache, 'w') {|f| f.write(xml) }
77
+
78
+ # by default, the Delicious API returns reponses with encoding errors; this fixes them:
79
+ if `which tidy`.empty?
80
+ puts "WARNING: you don't have tidy installed, which means that occasional errors in Delicious's API xml may hang up Rubunkulous. Try: sudo port install tidy to get it."
81
+ else
82
+ `tidy -xml .cached_links.xml &> /dev/null`
83
+ end
84
+ end
85
+
86
+ puts "Loading link data saved from previous run (this could take a second...)"
87
+ xmldoc = Document.new( File.new(link_cache))
88
+ xmldoc.elements.to_a('posts/post')
89
+ end
90
+
91
+ def check(links)
92
+ return unless links.size > 0
93
+ interrupted = false
94
+ trap("INT") { interrupted = true }
95
+
96
+ @links_checked, @num_fails, @total_links = 0,0,0
97
+
98
+ def print_report
99
+ puts "\n#{@links_checked} links checked (#{@num_fails} failures) - #{@total_links - @links_checked} links to go."
100
+ puts "\nTo clear last-checked counter and re-check all links, type: \nxattr .moneta_cache/xattr_cache -index_pointer"
101
+ end
102
+
103
+ def log_failed(url, desc, error)
104
+ # truncate xattr key names or suffer the consequences
105
+ @cache.store(url[0..Max_XAttr_Key_Length], "#{desc} (#{url}) failed with #{error} at #{Time.now.to_s}")
106
+ end
107
+
108
+ @cache = Moneta::Xattr.new(:file => File.join(File.dirname(__FILE__), ".moneta_cache", "xattr_cache"))
109
+ last_index = (@start_override > 0 ? @start_override : nil) || @cache[Pointer_Key] || 0
110
+
111
+ puts "Left off at link ##{last_index} (of #{links.size} total links)." if last_index > 0
112
+
113
+ # skip previously checked links
114
+ links = links[last_index..links.size]
115
+ @total_links = links.size
116
+ puts "#{@total_links} links to check."
117
+ @pbar = ProgressBar.new(" Checking", links.size)
118
+
119
+ links.each do |link|
120
+
121
+ if interrupted
122
+ print_report
123
+ exit
124
+ end
125
+
126
+ url = link.attributes['href']
127
+ desc = link.attributes['description']
128
+
129
+ begin
130
+ response = Curl::Easy.perform(url) do |curl|
131
+ curl.follow_location = true
132
+ curl.max_redirects = 2
133
+ curl.connect_timeout = 3
134
+ curl.timeout = 5 # total request timeout; saves us from having to wait for the request to complete if you bookmarked a 500mb file or something...
135
+ end
136
+ rescue
137
+ log_failed(url, desc, response ? response.response_code : 666)
138
+ end
139
+
140
+ @links_checked += 1
141
+ @pbar.inc
142
+ @cache.store(Pointer_Key, last_index + @links_checked)
143
+ next unless response
144
+
145
+ if response.response_code != 200
146
+ puts "x FAIL #{response.response_code} #{url} (#{@links_checked})" if @verbose
147
+ @num_fails += 1
148
+ log_failed(url, desc, response.response_code)
149
+ else
150
+ puts "> OK #{url} (#{@links_checked})" if @verbose
151
+ end
152
+ end
153
+ print_report
154
+ end
155
+
156
+ check(links)
metadata ADDED
@@ -0,0 +1,82 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: jm3-rubunkulous
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - John Manoogian III
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-05-16 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: curb
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: moneta
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "0"
34
+ version:
35
+ - !ruby/object:Gem::Dependency
36
+ name: progressbar
37
+ type: :runtime
38
+ version_requirement:
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: "0"
44
+ version:
45
+ description: reentrant link-checker for del.icio.us power-users
46
+ email: jm3@jm3.net
47
+ executables: []
48
+
49
+ extensions: []
50
+
51
+ extra_rdoc_files: []
52
+
53
+ files:
54
+ - bin/rubunkulous
55
+ has_rdoc: true
56
+ homepage: http://jm3.net/
57
+ post_install_message:
58
+ rdoc_options: []
59
+
60
+ require_paths:
61
+ - lib
62
+ required_ruby_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: "0"
67
+ version:
68
+ required_rubygems_version: !ruby/object:Gem::Requirement
69
+ requirements:
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ version: "0"
73
+ version:
74
+ requirements: []
75
+
76
+ rubyforge_project: rubunkulous
77
+ rubygems_version: 1.2.0
78
+ signing_key:
79
+ specification_version: 2
80
+ summary: reentrant link-checker for del.icio.us power-users
81
+ test_files: []
82
+