jeremylightsmith-actionsite 0.3 → 0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/action_site/async_link_checker.rb +28 -0
- data/lib/action_site/link_checker.rb +25 -19
- data/lib/action_site.rb +2 -1
- metadata +5 -1
@@ -0,0 +1,28 @@
|
|
1
|
+
module ActionSite
|
2
|
+
class AsyncLinkChecker < LinkChecker
|
3
|
+
def check(url)
|
4
|
+
url, html = fetch(url)
|
5
|
+
do_check(host_for(url), 1, url)
|
6
|
+
end
|
7
|
+
|
8
|
+
def do_check(host, level, *urls)
|
9
|
+
return if urls.empty?
|
10
|
+
raise "too many levels down" if level > 20
|
11
|
+
urls = urls.map {|url| clean_url(url) }.
|
12
|
+
reject {|url| ignore_url?(url, host)}
|
13
|
+
urls -= @checked_urls
|
14
|
+
@checked_urls += urls
|
15
|
+
puts "checking #{level} level(s) down :\n #{urls.join("\n ")}\n"
|
16
|
+
|
17
|
+
child_urls = []
|
18
|
+
urls.map do |url|
|
19
|
+
Thread.new do
|
20
|
+
url, html = fetch(url)
|
21
|
+
child_urls << links_from(html, url) if local?(host, url)
|
22
|
+
end
|
23
|
+
end.each {|t| t.join}
|
24
|
+
|
25
|
+
do_check(host, level + 1, *child_urls.flatten)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -11,37 +11,40 @@ module ActionSite
|
|
11
11
|
end
|
12
12
|
|
13
13
|
def check(url, host = nil, indent = "")
|
14
|
-
url = url
|
15
|
-
return if url
|
16
|
-
return if @options[:local] && host && !local?(host, url)
|
14
|
+
url = clean_url(url)
|
15
|
+
return if ignore_url?(url, host)
|
17
16
|
return if @checked_urls.include?(url)
|
18
17
|
@checked_urls << url
|
19
18
|
|
20
19
|
puts "#{indent}checking #{url}"
|
21
20
|
|
22
|
-
|
23
|
-
url, html = fetch(url)
|
24
|
-
rescue
|
25
|
-
puts "#{url} not found"
|
26
|
-
raise
|
27
|
-
end
|
21
|
+
url, html = fetch(url)
|
28
22
|
|
29
23
|
host ||= host_for(url)
|
30
|
-
doc = Hpricot(html)
|
31
|
-
|
32
24
|
if local?(host, url)
|
33
|
-
(
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
raise
|
40
|
-
end
|
25
|
+
links_from(html, url).each do |child_url|
|
26
|
+
begin
|
27
|
+
check child_url, host, indent + " "
|
28
|
+
rescue
|
29
|
+
puts "from #{url} as #{link}"
|
30
|
+
raise
|
41
31
|
end
|
42
32
|
end
|
43
33
|
end
|
44
34
|
end
|
35
|
+
|
36
|
+
def clean_url(url)
|
37
|
+
url.sub(/\#.*/, '')
|
38
|
+
end
|
39
|
+
|
40
|
+
def ignore_url?(url, host = nil)
|
41
|
+
url.blank? || (@options[:local] && host && !local?(host, url))
|
42
|
+
end
|
43
|
+
|
44
|
+
def links_from(html, url)
|
45
|
+
doc = Hpricot(html)
|
46
|
+
(doc / "a").map {|link| expand_link(url, link["href"])}.compact
|
47
|
+
end
|
45
48
|
|
46
49
|
def expand_link(from, to)
|
47
50
|
case to
|
@@ -89,6 +92,9 @@ module ActionSite
|
|
89
92
|
else
|
90
93
|
response.error!
|
91
94
|
end
|
95
|
+
rescue
|
96
|
+
puts "#{url} not found"
|
97
|
+
raise
|
92
98
|
end
|
93
99
|
|
94
100
|
def host_for(url)
|
data/lib/action_site.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jeremylightsmith-actionsite
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: "0.
|
4
|
+
version: "0.4"
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeremy Lightsmith
|
@@ -14,6 +14,7 @@ default_executable:
|
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: activesupport
|
17
|
+
type: :runtime
|
17
18
|
version_requirement:
|
18
19
|
version_requirements: !ruby/object:Gem::Requirement
|
19
20
|
requirements:
|
@@ -23,6 +24,7 @@ dependencies:
|
|
23
24
|
version:
|
24
25
|
- !ruby/object:Gem::Dependency
|
25
26
|
name: RedCloth
|
27
|
+
type: :runtime
|
26
28
|
version_requirement:
|
27
29
|
version_requirements: !ruby/object:Gem::Requirement
|
28
30
|
requirements:
|
@@ -32,6 +34,7 @@ dependencies:
|
|
32
34
|
version:
|
33
35
|
- !ruby/object:Gem::Dependency
|
34
36
|
name: markaby
|
37
|
+
type: :runtime
|
35
38
|
version_requirement:
|
36
39
|
version_requirements: !ruby/object:Gem::Requirement
|
37
40
|
requirements:
|
@@ -51,6 +54,7 @@ files:
|
|
51
54
|
- Rakefile
|
52
55
|
- readme.txt
|
53
56
|
- lib/action_site
|
57
|
+
- lib/action_site/async_link_checker.rb
|
54
58
|
- lib/action_site/context.rb
|
55
59
|
- lib/action_site/extensions
|
56
60
|
- lib/action_site/extensions/string.rb
|