chriskite-anemone 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/README.txt +3 -2
  2. data/bin/anemone_cron.rb +11 -13
  3. metadata +1 -1
data/README.txt CHANGED
@@ -12,7 +12,8 @@ write your own specialized spider tasks quickly and easily.
12
12
  * Allows exclusion of URLs based on regular expressions
13
13
 
14
14
  == REQUIREMENTS
15
- * hpricot
15
+ * nokogiri
16
+ * facets
16
17
 
17
18
  == EXAMPLES
18
- See the +bin+ directory for several examples of useful Anemone tasks.
19
+ See the +bin+ directory for several examples of useful Anemone tasks.
data/bin/anemone_cron.rb CHANGED
@@ -59,29 +59,27 @@ Anemone.crawl(root, {:discard_page_bodies => true}) do |anemone|
59
59
  pages.each_value do |page|
60
60
  url = page.url.to_s
61
61
  not_found << url if page.not_found?
62
- end
63
- if !not_found.empty?
62
+ end
63
+ unless not_found.empty?
64
64
  puts "\n404's:"
65
- not_found.each do |url|
65
+
66
+ missing_links = pages.urls_linking_to(not_found)
67
+ missing_links.each do |url, links|
66
68
  if options.relative
67
69
  puts URI(url).path.to_s
68
- else
70
+ else
69
71
  puts url
70
72
  end
71
- num_linked_from = 0
72
- pages.urls_linking_to(url).each do |u|
73
+ links.slice(0..10).each do |u|
73
74
  u = u.path if options.relative
74
- num_linked_from += 1
75
75
  puts " linked from #{u}"
76
- if num_linked_from > 10
77
- puts " ..."
78
- break
79
- end
80
76
  end
77
+
78
+ puts " ..." if missing_links.size > 10
81
79
  end
82
-
80
+
83
81
  print "\n"
84
- end
82
+ end
85
83
 
86
84
  # remove redirect aliases, and calculate pagedepths
87
85
  pages = pages.shortest_paths!(root).uniq
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chriskite-anemone
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Kite