cobweb 0.0.55 → 0.0.57

Sign up to get free protection for your applications and to get access to all the features.
Files changed (177) hide show
  1. data/README.textile +2 -1
  2. data/lib/cobweb.rb +4 -1
  3. data/lib/cobweb_crawler.rb +1 -4
  4. data/lib/cobweb_version.rb +1 -1
  5. data/lib/content_link_parser.rb +1 -0
  6. data/lib/crawl_job.rb +4 -3
  7. data/lib/server.rb +2 -2
  8. data/lib/stats.rb +6 -1
  9. data/spec/cobweb/cobweb_job_spec.rb +162 -24
  10. data/spec/samples/sample_server.rb +21 -0
  11. data/spec/samples/sample_site/boxgrid.html +258 -0
  12. data/spec/samples/sample_site/css/accordion.css +45 -0
  13. data/spec/samples/sample_site/css/datatable.css +189 -0
  14. data/spec/samples/sample_site/css/datepicker.css +171 -0
  15. data/spec/samples/sample_site/css/form-buttons.css +180 -0
  16. data/spec/samples/sample_site/css/forms.css +489 -0
  17. data/spec/samples/sample_site/css/jquery.fancybox-1.3.4.css +282 -0
  18. data/spec/samples/sample_site/css/jquery.treeview.css +103 -0
  19. data/spec/samples/sample_site/css/link-buttons.css +187 -0
  20. data/spec/samples/sample_site/css/login.css +110 -0
  21. data/spec/samples/sample_site/css/menu.css +156 -0
  22. data/spec/samples/sample_site/css/messages.css +58 -0
  23. data/spec/samples/sample_site/css/modalbox.css +63 -0
  24. data/spec/samples/sample_site/css/statics.css +57 -0
  25. data/spec/samples/sample_site/css/style.css +497 -0
  26. data/spec/samples/sample_site/css/style_text.css +128 -0
  27. data/spec/samples/sample_site/css/tabs.css +58 -0
  28. data/spec/samples/sample_site/css/wysiwyg-editor.css +18 -0
  29. data/spec/samples/sample_site/css/wysiwyg.css +149 -0
  30. data/spec/samples/sample_site/css/wysiwyg.modal.css +69 -0
  31. data/spec/samples/sample_site/dashboard.html +852 -0
  32. data/spec/samples/sample_site/forms.html +329 -0
  33. data/spec/samples/sample_site/gallery.html +263 -0
  34. data/spec/samples/sample_site/gfx/back-menu.gif +0 -0
  35. data/spec/samples/sample_site/gfx/back-submenu.gif +0 -0
  36. data/spec/samples/sample_site/gfx/background.gif +0 -0
  37. data/spec/samples/sample_site/gfx/box-hide.png +0 -0
  38. data/spec/samples/sample_site/gfx/box-search.png +0 -0
  39. data/spec/samples/sample_site/gfx/box-title.gif +0 -0
  40. data/spec/samples/sample_site/gfx/code.gif +0 -0
  41. data/spec/samples/sample_site/gfx/datepicker-arrows.gif +0 -0
  42. data/spec/samples/sample_site/gfx/fancybox/blank.gif +0 -0
  43. data/spec/samples/sample_site/gfx/fancybox/fancy_close.png +0 -0
  44. data/spec/samples/sample_site/gfx/fancybox/fancy_loading.png +0 -0
  45. data/spec/samples/sample_site/gfx/fancybox/fancy_nav_left.png +0 -0
  46. data/spec/samples/sample_site/gfx/fancybox/fancy_nav_right.png +0 -0
  47. data/spec/samples/sample_site/gfx/fancybox/fancy_title_left.png +0 -0
  48. data/spec/samples/sample_site/gfx/fancybox/fancy_title_main.png +0 -0
  49. data/spec/samples/sample_site/gfx/fancybox/fancy_title_over.png +0 -0
  50. data/spec/samples/sample_site/gfx/fancybox/fancy_title_right.png +0 -0
  51. data/spec/samples/sample_site/gfx/fancybox/fancybox-x.png +0 -0
  52. data/spec/samples/sample_site/gfx/fancybox/fancybox.png +0 -0
  53. data/spec/samples/sample_site/gfx/forms/date-next.gif +0 -0
  54. data/spec/samples/sample_site/gfx/forms/date-prev.gif +0 -0
  55. data/spec/samples/sample_site/gfx/forms/forms-checkbox.gif +0 -0
  56. data/spec/samples/sample_site/gfx/forms/forms-date.gif +0 -0
  57. data/spec/samples/sample_site/gfx/forms/forms-file.gif +0 -0
  58. data/spec/samples/sample_site/gfx/forms/forms-input-big.gif +0 -0
  59. data/spec/samples/sample_site/gfx/forms/forms-input-medium.gif +0 -0
  60. data/spec/samples/sample_site/gfx/forms/forms-input-small.gif +0 -0
  61. data/spec/samples/sample_site/gfx/forms/forms-input-xl.gif +0 -0
  62. data/spec/samples/sample_site/gfx/forms/forms-radio.gif +0 -0
  63. data/spec/samples/sample_site/gfx/forms/forms-selectbox-small.gif +0 -0
  64. data/spec/samples/sample_site/gfx/forms/forms-selectbox.gif +0 -0
  65. data/spec/samples/sample_site/gfx/forms/forms-textarea-big.gif +0 -0
  66. data/spec/samples/sample_site/gfx/forms/forms-textarea-medium.gif +0 -0
  67. data/spec/samples/sample_site/gfx/forms/forms-textarea-small.gif +0 -0
  68. data/spec/samples/sample_site/gfx/forms/forms-textarea-xl.gif +0 -0
  69. data/spec/samples/sample_site/gfx/icon-delete.png +0 -0
  70. data/spec/samples/sample_site/gfx/icon-edit.png +0 -0
  71. data/spec/samples/sample_site/gfx/icon-home.gif +0 -0
  72. data/spec/samples/sample_site/gfx/img-delete.png +0 -0
  73. data/spec/samples/sample_site/gfx/img-hover.png +0 -0
  74. data/spec/samples/sample_site/gfx/img-zoom.png +0 -0
  75. data/spec/samples/sample_site/gfx/jquery.wysiwyg.gif +0 -0
  76. data/spec/samples/sample_site/gfx/label-icons.gif +0 -0
  77. data/spec/samples/sample_site/gfx/label.gif +0 -0
  78. data/spec/samples/sample_site/gfx/li-down.gif +0 -0
  79. data/spec/samples/sample_site/gfx/li.gif +0 -0
  80. data/spec/samples/sample_site/gfx/link-button-big.gif +0 -0
  81. data/spec/samples/sample_site/gfx/link-button-medium.gif +0 -0
  82. data/spec/samples/sample_site/gfx/link-button.gif +0 -0
  83. data/spec/samples/sample_site/gfx/loading-2.gif +0 -0
  84. data/spec/samples/sample_site/gfx/loading.gif +0 -0
  85. data/spec/samples/sample_site/gfx/logo.png +0 -0
  86. data/spec/samples/sample_site/gfx/modal-title.gif +0 -0
  87. data/spec/samples/sample_site/gfx/photos/00.jpg +0 -0
  88. data/spec/samples/sample_site/gfx/photos/01.jpg +0 -0
  89. data/spec/samples/sample_site/gfx/photos/01xl.jpg +0 -0
  90. data/spec/samples/sample_site/gfx/photos/02.jpg +0 -0
  91. data/spec/samples/sample_site/gfx/photos/02xl.jpg +0 -0
  92. data/spec/samples/sample_site/gfx/photos/03.jpg +0 -0
  93. data/spec/samples/sample_site/gfx/photos/03xl.jpg +0 -0
  94. data/spec/samples/sample_site/gfx/photos/04.jpg +0 -0
  95. data/spec/samples/sample_site/gfx/photos/04xl.jpg +0 -0
  96. data/spec/samples/sample_site/gfx/photos/05.jpg +0 -0
  97. data/spec/samples/sample_site/gfx/photos/05xl.jpg +0 -0
  98. data/spec/samples/sample_site/gfx/photos/06.jpg +0 -0
  99. data/spec/samples/sample_site/gfx/photos/06xl.jpg +0 -0
  100. data/spec/samples/sample_site/gfx/photos/07.jpg +0 -0
  101. data/spec/samples/sample_site/gfx/photos/07xl.jpg +0 -0
  102. data/spec/samples/sample_site/gfx/photos/08.jpg +0 -0
  103. data/spec/samples/sample_site/gfx/photos/08xl.jpg +0 -0
  104. data/spec/samples/sample_site/gfx/photos/09.jpg +0 -0
  105. data/spec/samples/sample_site/gfx/photos/09xl.jpg +0 -0
  106. data/spec/samples/sample_site/gfx/photos/10.jpg +0 -0
  107. data/spec/samples/sample_site/gfx/photos/10xl.jpg +0 -0
  108. data/spec/samples/sample_site/gfx/photos/11.jpg +0 -0
  109. data/spec/samples/sample_site/gfx/photos/11xl.jpg +0 -0
  110. data/spec/samples/sample_site/gfx/photos/12.jpg +0 -0
  111. data/spec/samples/sample_site/gfx/photos/12xl.jpg +0 -0
  112. data/spec/samples/sample_site/gfx/photos/13.jpg +0 -0
  113. data/spec/samples/sample_site/gfx/photos/13xl.jpg +0 -0
  114. data/spec/samples/sample_site/gfx/photos/14.jpg +0 -0
  115. data/spec/samples/sample_site/gfx/photos/14xl.jpg +0 -0
  116. data/spec/samples/sample_site/gfx/photos/15.jpg +0 -0
  117. data/spec/samples/sample_site/gfx/photos/15xl.jpg +0 -0
  118. data/spec/samples/sample_site/gfx/search-button.gif +0 -0
  119. data/spec/samples/sample_site/gfx/search-input.gif +0 -0
  120. data/spec/samples/sample_site/gfx/slider-button.gif +0 -0
  121. data/spec/samples/sample_site/gfx/system-messages.gif +0 -0
  122. data/spec/samples/sample_site/gfx/table-asc-arrow.gif +0 -0
  123. data/spec/samples/sample_site/gfx/table-desc-arrow.gif +0 -0
  124. data/spec/samples/sample_site/gfx/table-first.gif +0 -0
  125. data/spec/samples/sample_site/gfx/table-last.gif +0 -0
  126. data/spec/samples/sample_site/gfx/table-next.gif +0 -0
  127. data/spec/samples/sample_site/gfx/table-number.gif +0 -0
  128. data/spec/samples/sample_site/gfx/table-prev.gif +0 -0
  129. data/spec/samples/sample_site/gfx/table-rows.gif +0 -0
  130. data/spec/samples/sample_site/gfx/table-search.gif +0 -0
  131. data/spec/samples/sample_site/gfx/table-thead.gif +0 -0
  132. data/spec/samples/sample_site/gfx/tooltip.gif +0 -0
  133. data/spec/samples/sample_site/gfx/treeview/ajax-loader.gif +0 -0
  134. data/spec/samples/sample_site/gfx/treeview/file.gif +0 -0
  135. data/spec/samples/sample_site/gfx/treeview/folder-closed.gif +0 -0
  136. data/spec/samples/sample_site/gfx/treeview/folder.gif +0 -0
  137. data/spec/samples/sample_site/gfx/treeview/minus.gif +0 -0
  138. data/spec/samples/sample_site/gfx/treeview/plus.gif +0 -0
  139. data/spec/samples/sample_site/gfx/treeview/treeview-default-line.gif +0 -0
  140. data/spec/samples/sample_site/gfx/treeview/treeview-default.gif +0 -0
  141. data/spec/samples/sample_site/index.html +852 -0
  142. data/spec/samples/sample_site/js/controls/wysiwyg.image.js +284 -0
  143. data/spec/samples/sample_site/js/controls/wysiwyg.link.js +210 -0
  144. data/spec/samples/sample_site/js/controls/wysiwyg.table.js +151 -0
  145. data/spec/samples/sample_site/js/customInput.jquery.js +68 -0
  146. data/spec/samples/sample_site/js/excanvas.min.js +1 -0
  147. data/spec/samples/sample_site/js/hoverIntent.js +84 -0
  148. data/spec/samples/sample_site/js/inline.js +392 -0
  149. data/spec/samples/sample_site/js/jquery-1.7.1.min.js +4 -0
  150. data/spec/samples/sample_site/js/jquery-ui-select.js +522 -0
  151. data/spec/samples/sample_site/js/jquery-ui-timepicker-addon.js +1299 -0
  152. data/spec/samples/sample_site/js/jquery-ui.js +791 -0
  153. data/spec/samples/sample_site/js/jquery.dataTables.js +7440 -0
  154. data/spec/samples/sample_site/js/jquery.fancybox-1.3.4.js +1156 -0
  155. data/spec/samples/sample_site/js/jquery.filestyle.mini.js +2 -0
  156. data/spec/samples/sample_site/js/jquery.flot.js +2600 -0
  157. data/spec/samples/sample_site/js/jquery.flot.resize.min.js +60 -0
  158. data/spec/samples/sample_site/js/jquery.graphtable-0.2.js +179 -0
  159. data/spec/samples/sample_site/js/jquery.tipsy.js +104 -0
  160. data/spec/samples/sample_site/js/jquery.treeview.js +256 -0
  161. data/spec/samples/sample_site/js/jquery.wysiwyg.js +2454 -0
  162. data/spec/samples/sample_site/js/plugins/wysiwyg.rmFormat.js +348 -0
  163. data/spec/samples/sample_site/js/superfish.js +121 -0
  164. data/spec/samples/sample_site/js/supersubs.js +90 -0
  165. data/spec/samples/sample_site/more.html +503 -0
  166. data/spec/samples/sample_site/tables.html +845 -0
  167. data/spec/samples/sample_site/text/museosans-webfont.eot +0 -0
  168. data/spec/samples/sample_site/text/museosans-webfont.svg +231 -0
  169. data/spec/samples/sample_site/text/museosans-webfont.ttf +0 -0
  170. data/spec/samples/sample_site/text/museosans-webfont.woff +0 -0
  171. data/spec/samples/sample_site/text/museosans_bold-webfont.eot +0 -0
  172. data/spec/samples/sample_site/text/museosans_bold-webfont.svg +231 -0
  173. data/spec/samples/sample_site/text/museosans_bold-webfont.ttf +0 -0
  174. data/spec/samples/sample_site/text/museosans_bold-webfont.woff +0 -0
  175. data/spec/samples/sample_site/typography.html +192 -0
  176. data/spec/spec_helper.rb +10 -2
  177. metadata +196 -26
data/README.textile CHANGED
@@ -1,5 +1,6 @@
1
1
 
2
- h1. Cobweb v0.0.55
2
+ h1. Cobweb v0.0.57
3
+ !https://secure.travis-ci.org/stewartmckee/cobweb.png?branch=master!
3
4
 
4
5
  h2. Intro
5
6
 
data/lib/cobweb.rb CHANGED
@@ -56,7 +56,9 @@ class Cobweb
56
56
 
57
57
  if @options[:internal_urls].nil? || @options[:internal_urls].empty?
58
58
  uri = Addressable::URI.parse(base_url)
59
- @options[:internal_urls] = [[uri.scheme, "://", uri.host, "/*"].join]
59
+ @options[:internal_urls] = []
60
+ @options[:internal_urls] << [uri.scheme, "://", uri.host, "/*"].join
61
+ @options[:internal_urls] << [uri.scheme, "://", uri.host, ":", uri.inferred_port, "/*"].join
60
62
  end
61
63
 
62
64
  request.merge!(@options)
@@ -72,6 +74,7 @@ class Cobweb
72
74
  @options[:internal_urls].map{|url| @redis.sadd("internal_urls", url)}
73
75
 
74
76
  Resque.enqueue(CrawlJob, request)
77
+ request
75
78
  end
76
79
 
77
80
  # Returns array of cookies from content
@@ -20,7 +20,7 @@ class CobwebCrawler
20
20
  @options[:crawl_id] = @crawl_id
21
21
  end
22
22
 
23
- @redis = NamespacedRedis.new(@options[:redis_options], "cobweb-#{@crawl_id}")
23
+ @redis = NamespacedRedis.new(@options[:redis_options], "cobweb-#{Cobweb.version}-#{@crawl_id}")
24
24
  @options[:internal_urls] = [] if @options[:internal_urls].nil?
25
25
  @options[:internal_urls].map{|url| @redis.sadd("internal_urls", url)}
26
26
  @debug = @options[:debug]
@@ -42,9 +42,6 @@ class CobwebCrawler
42
42
 
43
43
  @crawl_options = crawl_options
44
44
 
45
- puts "http://localhost:4567/statistics/#{@crawl_id}"
46
- puts ""
47
-
48
45
  @redis.sadd("queued", base_url) unless @redis.sismember("crawled", base_url) || @redis.sismember("queued", base_url)
49
46
  crawl_counter = @redis.scard("crawled").to_i
50
47
  queue_counter = @redis.scard("queued").to_i
@@ -3,7 +3,7 @@ class CobwebVersion
3
3
 
4
4
  # Returns a string of the current version
5
5
  def self.version
6
- "0.0.55"
6
+ "0.0.57"
7
7
  end
8
8
 
9
9
  end
@@ -46,6 +46,7 @@ class ContentLinkParser
46
46
  data = link_data
47
47
  links = data.keys.map{|key| data[key]}.flatten.uniq
48
48
  links = links.map{|link| UriHelper.join_no_fragment(@url, link).to_s }
49
+ links = links.reject{|link| link =~ /\/([^\/]+?)\/\1\// }
49
50
  links = links.reject{|link| link =~ /([^\/]+?)\/([^\/]+?)\/.*?\1\/\2/ }
50
51
  links = links.select{|link| options[:valid_schemes].include? link.split(':')[0].to_sym}
51
52
  links
data/lib/crawl_job.rb CHANGED
@@ -36,6 +36,7 @@ class CrawlJob
36
36
  content = Cobweb.new(content_request).get(content_request[:url], content_request)
37
37
 
38
38
  ## update statistics
39
+ @stats.update_status("Crawling #{content_request[:url]}...")
39
40
  @stats.update_statistics(content)
40
41
 
41
42
  # set the base url if this is the first page
@@ -72,7 +73,7 @@ class CrawlJob
72
73
  if @redis.scard("queued") == 0
73
74
  finished(content_request)
74
75
  end
75
- elsif @queue_counter == 0 || @crawl_counter >= content_request[:crawl_limit].to_i
76
+ elsif @queue_counter == 0 || @crawl_counter > content_request[:crawl_limit].to_i
76
77
  finished(content_request)
77
78
  end
78
79
  end
@@ -109,12 +110,12 @@ class CrawlJob
109
110
 
110
111
  # Returns true if the crawl count is within limits
111
112
  def self.within_crawl_limits?(crawl_limit)
112
- crawl_limit.nil? or @crawl_counter < crawl_limit.to_i
113
+ crawl_limit.nil? or @crawl_counter <= crawl_limit.to_i
113
114
  end
114
115
 
115
116
  # Returns true if the queue count is calculated to be still within limits when complete
116
117
  def self.within_queue_limits?(crawl_limit)
117
- within_crawl_limits?(crawl_limit) and (crawl_limit.nil? or (@queue_counter + @crawl_counter) < crawl_limit.to_i)
118
+ within_crawl_limits?(crawl_limit) && (crawl_limit.nil? || (@queue_counter + @crawl_counter) < crawl_limit.to_i)
118
119
  end
119
120
 
120
121
  # Sets the base url in redis. If the first page is a redirect, it sets the base_url to the destination
data/lib/server.rb CHANGED
@@ -17,7 +17,7 @@ class Server < Sinatra::Base
17
17
 
18
18
  @crawls = []
19
19
  @full_redis.smembers("cobweb_crawls").each do |crawl_id|
20
- redis = NamespacedRedis.new({}, "cobweb-#{crawl_id}")
20
+ redis = NamespacedRedis.new({}, "cobweb-#{Cobweb.version}-#{crawl_id}")
21
21
  stats = HashUtil.deep_symbolize_keys({
22
22
  :crawl_details => redis.hgetall("crawl_details"),
23
23
  :statistics => redis.hgetall("statistics"),
@@ -31,7 +31,7 @@ class Server < Sinatra::Base
31
31
 
32
32
  # Sinatra Crawl Detail
33
33
  get '/statistics/:crawl_id' do
34
- redis = NamespacedRedis.new({}, "cobweb-#{params[:crawl_id]}")
34
+ redis = NamespacedRedis.new({}, "cobweb-#{Cobweb.version}-#{params[:crawl_id]}")
35
35
 
36
36
  @statistics = HashUtil.deep_symbolize_keys(redis.hgetall("statistics"))
37
37
  if @statistics[:status_counts].nil?
data/lib/stats.rb CHANGED
@@ -4,8 +4,9 @@ class Stats
4
4
 
5
5
  # Sets up redis usage for statistics
6
6
  def initialize(options)
7
+ options[:redis_options] = {} unless options.has_key? :redis_options
7
8
  @full_redis = Redis.new(options[:redis_options])
8
- @redis = NamespacedRedis.new(options[:redis_options], "cobweb-#{options[:crawl_id]}")
9
+ @redis = NamespacedRedis.new(options[:redis_options], "cobweb-#{Cobweb.version}-#{options[:crawl_id]}")
9
10
  end
10
11
 
11
12
  # Sets up the crawl in statistics
@@ -26,6 +27,10 @@ class Stats
26
27
  @redis.del "crawl_details"
27
28
  end
28
29
 
30
+ def get_crawled
31
+ @redis.smembers "crawled"
32
+ end
33
+
29
34
  # Returns statistics hash. update_statistics takes the content hash, extracts statistics from it and updates redis with the data.
30
35
  def update_statistics(content, crawl_counter=@redis.scard("crawled").to_i, queue_counter=@redis.scard("queued").to_i)
31
36
 
@@ -1,41 +1,179 @@
1
1
  require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
2
 
3
- describe Cobweb do
3
+ describe Cobweb, :local_only => true do
4
+
5
+ before(:all) do
6
+ #store all existing resque process ids
7
+ @existing_processes = `ps aux | grep resque | grep -v grep | grep -v resque-web | awk '{print $2}'`.split("\n")
8
+
9
+ # START WORKERS ONLY FOR CRAWL QUEUE SO WE CAN COUNT ENQUEUED PROCESS AND FINISH QUEUES
10
+ puts "Starting Workers... Please Wait..."
11
+ `mkdir log`
12
+ io = IO.popen("nohup rake resque:workers PIDFILE=./tmp/pids/resque.pid COUNT=5 QUEUE=cobweb_crawl_job > log/output.log &")
13
+ puts "Workers Started."
14
+
15
+ # START THIN SERVER TO HOST THE SAMPLE SITE FOR CRAWLING
16
+ @thin = nil
17
+ Thread.new do
18
+ @thin = Thin::Server.start("0.0.0.0", 3532, SampleServer.app)
19
+ end
20
+
21
+ # WAIT FOR START TO COMPLETE
22
+ sleep 1
23
+
24
+ end
4
25
 
5
26
  before(:each) do
6
- @base_url = "http://www.baseurl.com/"
7
- @cobweb = Cobweb.new :quiet => true, :cache => nil
27
+ @base_url = "http://localhost:3532/"
28
+ @base_page_count = 77
29
+
30
+ clear_queues
8
31
  end
9
32
 
10
- describe "detect finish of crawl" do
11
-
12
- describe "with no crawl limit" do
13
- before(:each) do
14
- @request = {
15
- :crawl_id => Digest::SHA1.hexdigest("#{Time.now.to_i}.#{Time.now.usec}"),
16
- :url => @base_url,
17
- :crawl_limit => nil
18
- }
19
- end
20
-
21
- it "should not limit crawl"
22
- it "should detect the end or crawl"
33
+ describe "with no crawl limit" do
34
+ before(:each) do
35
+ @request = {
36
+ :crawl_id => Digest::SHA1.hexdigest("#{Time.now.to_i}.#{Time.now.usec}"),
37
+ :crawl_limit => nil,
38
+ :quiet => false,
39
+ :debug => false,
40
+ :cache => nil
41
+ }
42
+ @cobweb = Cobweb.new @request
23
43
  end
24
-
25
- describe "with a crawl limit" do
44
+
45
+ it "should crawl entire site" do
46
+ crawl = @cobweb.start(@base_url)
47
+ @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
48
+ wait_for_crawl_finished crawl[:crawl_id]
49
+ Resque.size("cobweb_process_job").should == @base_page_count
50
+ end
51
+ it "detect crawl finished" do
52
+ crawl = @cobweb.start(@base_url)
53
+ @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
54
+ wait_for_crawl_finished crawl[:crawl_id]
55
+ Resque.size("cobweb_finished_job").should == 1
56
+ end
57
+ end
58
+
59
+ describe "with a crawl limit" do
60
+ before(:each) do
26
61
  @request = {
27
62
  :crawl_id => Digest::SHA1.hexdigest("#{Time.now.to_i}.#{Time.now.usec}"),
28
- :url => @base_url,
29
- :crawl_limit => 3
63
+ :quiet => true,
64
+ :cache => nil
30
65
  }
66
+ @cobweb = Cobweb.new @request
67
+ end
68
+
69
+ describe "limit to 1" do
70
+ before(:each) do
71
+ @request[:crawl_limit] = 1
72
+ end
31
73
 
32
- it "should limit crawl"
33
- it "should detect the end or crawl based on limit"
34
-
74
+ it "should not crawl the entire site" do
75
+ crawl = @cobweb.start(@base_url)
76
+ @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
77
+ wait_for_crawl_finished crawl[:crawl_id]
78
+ Resque.size("cobweb_process_job").should_not == @base_page_count
79
+ end
80
+ it "should only crawl 1 page" do
81
+ crawl = @cobweb.start(@base_url)
82
+ @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
83
+ wait_for_crawl_finished crawl[:crawl_id]
84
+ Resque.size("cobweb_process_job").should == 1
85
+ end
86
+ it "should notify of crawl finished" do
87
+ crawl = @cobweb.start(@base_url)
88
+ @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
89
+ wait_for_crawl_finished crawl[:crawl_id]
90
+ Resque.size("cobweb_finished_job").should == 1
91
+ end
92
+
35
93
  end
94
+
95
+ describe "limit to 3" do
96
+ before(:each) do
97
+ @request[:crawl_limit] = 3
98
+ end
99
+ it "should not crawl the entire site" do
100
+ crawl = @cobweb.start(@base_url)
101
+ @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
102
+ wait_for_crawl_finished crawl[:crawl_id]
103
+ Resque.size("cobweb_process_job").should_not == @base_page_count
104
+ end
105
+ it "should notify of crawl finished" do
106
+ crawl = @cobweb.start(@base_url)
107
+ @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
108
+ wait_for_crawl_finished crawl[:crawl_id]
109
+ Resque.size("cobweb_finished_job").should == 1
110
+ end
111
+ it "should only crawl 3 pages" do
112
+ crawl = @cobweb.start(@base_url)
113
+ @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
114
+ wait_for_crawl_finished crawl[:crawl_id]
115
+ Resque.size("cobweb_process_job").should == 3
116
+ end
36
117
 
118
+ end
119
+
120
+ describe "limit to 100" do
121
+ before(:each) do
122
+ @request[:crawl_limit] = 100
123
+ end
37
124
 
125
+ it "should crawl the entire site" do
126
+ crawl = @cobweb.start(@base_url)
127
+ @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
128
+ wait_for_crawl_finished crawl[:crawl_id]
129
+ Resque.size("cobweb_process_job").should == @base_page_count
130
+ end
131
+ it "should notify of crawl finished" do
132
+ crawl = @cobweb.start(@base_url)
133
+ @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
134
+ wait_for_crawl_finished crawl[:crawl_id]
135
+ Resque.size("cobweb_finished_job").should == 1
136
+ end
137
+ it "should not crawl 100 pages" do
138
+ crawl = @cobweb.start(@base_url)
139
+ @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
140
+ wait_for_crawl_finished crawl[:crawl_id]
141
+ Resque.size("cobweb_process_job").should_not == 100
142
+ end
143
+ end
144
+ end
145
+
146
+ after(:all) do
147
+ @all_processes = `ps aux | grep resque | grep -v grep | grep -v resque-web | awk '{print $2}'`.split("\n")
148
+ command = "kill #{(@all_processes - @existing_processes).join(" ")}"
149
+ IO.popen(command)
150
+ #@thin.stop!
151
+ end
152
+
153
+ end
154
+
155
+ def wait_for_crawl_finished(crawl_id, timeout=20)
156
+ counter = 0
157
+ while(running?(crawl_id) && counter < timeout) do
158
+ sleep 1
159
+ counter+=1
160
+ end
161
+ if counter > timeout
162
+ raise "End of crawl not detected"
163
+ end
164
+ end
165
+
166
+ def running?(crawl_id)
167
+ @stat.get_status != "Crawl Stopped"
168
+ end
169
+
170
+ def clear_queues
171
+ Resque.queues.each do |queue|
172
+ Resque.remove_queue(queue)
38
173
  end
174
+
175
+ Resque.size("cobweb_process_job").should == 0
176
+ Resque.size("cobweb_finished_job").should == 0
177
+ end
39
178
 
40
179
 
41
- end
@@ -0,0 +1,21 @@
1
+ class SampleServer
2
+
3
+ # This is the root of our app
4
+ @root = File.expand_path(File.dirname(__FILE__)) + "/sample_site"
5
+
6
+ def self.app
7
+ Proc.new { |env|
8
+ # Extract the requested path from the request
9
+ path = Rack::Utils.unescape(env['PATH_INFO'])
10
+ index_file = @root + "#{path}/index.html"
11
+
12
+ if File.exists?(index_file)
13
+ # Return the index
14
+ [200, {'Content-Type' => 'text/html'}, File.read(index_file)]
15
+ else
16
+ # Pass the request to the directory app
17
+ Rack::Directory.new(@root).call(env)
18
+ end
19
+ }
20
+ end
21
+ end
@@ -0,0 +1,258 @@
1
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
2
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
3
+
4
+ <head>
5
+
6
+ <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
7
+ <title>CleanDream</title>
8
+
9
+ <style type="text/css">
10
+ @import url("css/style.css");
11
+ @import url('css/style_text.css');
12
+ @import url('css/form-buttons.css');
13
+ @import url('css/link-buttons.css');
14
+ @import url('css/menu.css');
15
+ @import url('css/statics.css');
16
+ @import url('css/messages.css');
17
+ @import url('css/datatable.css');
18
+ @import url('css/accordion.css');
19
+ @import url('css/tabs.css');
20
+ @import url('css/forms.css');
21
+ @import url('css/datepicker.css');
22
+ @import url('css/modalbox.css');
23
+ @import url('css/jquery.fancybox-1.3.4.css');
24
+ @import url('css/jquery.treeview.css');
25
+ @import url('css/wysiwyg.css');
26
+ @import url('css/wysiwyg.modal.css');
27
+ @import url('css/wysiwyg-editor.css');
28
+ </style>
29
+
30
+ <script type="text/javascript" src="js/jquery-1.7.1.min.js"></script>
31
+
32
+ <!--[if lte IE 8]>
33
+ <script type="text/javascript" src="js/excanvas.min.js"></script>
34
+ <![endif]-->
35
+
36
+ </head>
37
+
38
+ <body>
39
+
40
+ <div class="container">
41
+
42
+ <div class="logo-labels">
43
+ <h1><a href="#">CleanDream</a></h1>
44
+ <ul>
45
+ <li><a href="#"><span>Settings</span></a></li>
46
+ <li class="usermessage"><a href="#"><span>1 new message</span></a></li>
47
+ <li class="logout"><a href="#"><span>Logout</span></a></li>
48
+ </ul>
49
+ </div>
50
+
51
+ <div class="menu-search">
52
+ <ul>
53
+ <li><a href="dashboard.html">Dashboard</a></li>
54
+ <li>
55
+ <a href="#">Dropdown</a>
56
+ <ul>
57
+ <li><a href="#">Submenu item 001</a></li>
58
+ <li><a href="#">Submenu item 002</a></li>
59
+ <li><a href="#">Submenu item 003</a></li>
60
+ <li class="current">
61
+ <a href="#">Submenu item 004</a>
62
+ <ul>
63
+ <li><a href="#">Subsubmenu item 001</a></li>
64
+ <li><a href="#">Subsubmenu item 002</a></li>
65
+ <li class="current"><a href="#">Subsubmenu item 003</a></li>
66
+ <li><a href="#">Subsubmenu item 003</a></li>
67
+ <li><a href="#">Subsubmenu item 005</a></li>
68
+ </ul>
69
+ </li>
70
+ <li> <a href="#">Submenu item 005</a> </li>
71
+ </ul>
72
+ </li>
73
+ <li><a href="typography.html">Typography</a></li>
74
+ <li class="current"><a href="boxgrid.html">Boxes Grid</a></li>
75
+ <li><a href="forms.html">Forms</a></li>
76
+ <li><a href="gallery.html">Gallery</a></li>
77
+ <li><a href="tables.html">Tables</a></li>
78
+ <li><a href="more.html">More</a></li>
79
+ </ul>
80
+ <div class="search">
81
+ <form action="" method="post">
82
+ <input type="text" value="Seachterm" />
83
+ <button type="submit"></button>
84
+ </form>
85
+ </div>
86
+ </div>
87
+
88
+ <div class="breadcrumbs">
89
+ <ul>
90
+ <li class="home"><a href="#"></a></li>
91
+ <li class="break">&#187;</li>
92
+ <li><a href="#">Menu item</a></li>
93
+ <li class="break">&#187;</li>
94
+ <li><a href="#">Menu item</a></li>
95
+ </ul>
96
+ </div>
97
+
98
+ <div class="section">
99
+ <div class="full">
100
+ <div class="box">
101
+ <div class="title">
102
+ <h2>Full box</h2>
103
+ <span class="hide"></span>
104
+ </div>
105
+ <div class="content">Place your content here.</div>
106
+ </div>
107
+ </div>
108
+ </div>
109
+
110
+ <div class="section">
111
+ <div class="small">
112
+ <div class="box">
113
+ <div class="title">
114
+ <h2>Small box</h2>
115
+ <span class="hide"></span>
116
+ </div>
117
+ <div class="content">Place your content here.</div>
118
+ </div>
119
+ </div>
120
+
121
+ <div class="big">
122
+ <div class="box">
123
+ <div class="title">
124
+ <h2>Big box</h2>
125
+ <span class="hide"></span>
126
+ </div>
127
+ <div class="content">Place your content here.</div>
128
+ </div>
129
+ </div>
130
+ </div>
131
+
132
+ <div class="section">
133
+ <div class="medium">
134
+ <div class="box">
135
+ <div class="title">
136
+ <h2>Medium box</h2>
137
+ <span class="hide"></span>
138
+ </div>
139
+ <div class="content">Place your content here.</div>
140
+ </div>
141
+ </div>
142
+
143
+ <div class="medium">
144
+ <div class="box">
145
+ <div class="title">
146
+ <h2>Medium box</h2>
147
+ <span class="hide"></span>
148
+ </div>
149
+ <div class="content">Place your content here.</div>
150
+ </div>
151
+ </div>
152
+ </div>
153
+
154
+ <div class="section">
155
+ <div class="medium">
156
+ <div class="box">
157
+ <div class="title">
158
+ <h2>Medium box</h2>
159
+ <span class="hide"></span>
160
+ </div>
161
+ <div class="content">Place your content here.</div>
162
+ </div>
163
+ </div>
164
+
165
+ <div class="small">
166
+ <div class="box">
167
+ <div class="title">
168
+ <h2>Small box</h2>
169
+ <span class="hide"></span>
170
+ </div>
171
+ <div class="content">Place your content here.</div>
172
+ </div>
173
+ </div>
174
+
175
+ <div class="small">
176
+ <div class="box">
177
+ <div class="title">
178
+ <h2>Small box</h2>
179
+ <span class="hide"></span>
180
+ </div>
181
+ <div class="content">Place your content here.</div>
182
+ </div>
183
+ </div>
184
+ </div>
185
+
186
+ <div class="section">
187
+ <div class="small">
188
+ <div class="box">
189
+ <div class="title">
190
+ <h2>Small box</h2>
191
+ <span class="hide"></span>
192
+ </div>
193
+ <div class="content">Place your content here.</div>
194
+ </div>
195
+ </div>
196
+
197
+ <div class="small">
198
+ <div class="box">
199
+ <div class="title">
200
+ <h2>Small box</h2>
201
+ <span class="hide"></span>
202
+ </div>
203
+ <div class="content">Place your content here.</div>
204
+ </div>
205
+ </div>
206
+
207
+ <div class="small">
208
+ <div class="box">
209
+ <div class="title">
210
+ <h2>Small box</h2>
211
+ <span class="hide"></span>
212
+ </div>
213
+ <div class="content">Place your content here.</div>
214
+ </div>
215
+ </div>
216
+
217
+ <div class="small">
218
+ <div class="box">
219
+ <div class="title">
220
+ <h2>Small box</h2>
221
+ <span class="hide"></span>
222
+ </div>
223
+ <div class="content">Place your content here.</div>
224
+ </div>
225
+ </div>
226
+ </div>
227
+
228
+ <div class="footer">
229
+ <div class="split">&#169; Copyright <a href="#">website.com</a></div>
230
+ <div class="split right">Powered by <a href="http://themeforest.net/item/cleandream/490140" target="_blank">CleanDream</a></div>
231
+ </div>
232
+ </div>
233
+
234
+ <script type="text/javascript" src="js/superfish.js"></script>
235
+ <script type="text/javascript" src="js/supersubs.js"></script>
236
+ <script type="text/javascript" src="js/hoverIntent.js"></script>
237
+ <script type="text/javascript" src="js/jquery.flot.js"></script>
238
+ <script type="text/javascript" src="js/jquery.graphtable-0.2.js"></script>
239
+ <script type="text/javascript" src="js/jquery.flot.resize.min.js"></script>
240
+ <script type="text/javascript" src="js/jquery-ui.js"></script>
241
+ <script type="text/javascript" src="js/jquery-ui-select.js"></script>
242
+ <script type="text/javascript" src="js/customInput.jquery.js"></script>
243
+ <script type="text/javascript" src="js/jquery.dataTables.js"></script>
244
+ <script type="text/javascript" src="js/jquery.fancybox-1.3.4.js"></script>
245
+ <script type="text/javascript" src="js/jquery.filestyle.mini.js"></script>
246
+ <script type="text/javascript" src="js/jquery-ui-timepicker-addon.js"></script>
247
+ <script type="text/javascript" src="js/jquery.treeview.js"></script>
248
+ <script type="text/javascript" src="js/jquery.tipsy.js"></script>
249
+ <script type="text/javascript" src="js/jquery.wysiwyg.js"></script>
250
+ <script type="text/javascript" src="js/plugins/wysiwyg.rmFormat.js"></script>
251
+ <script type="text/javascript" src="js/controls/wysiwyg.image.js"></script>
252
+ <script type="text/javascript" src="js/controls/wysiwyg.link.js"></script>
253
+ <script type="text/javascript" src="js/controls/wysiwyg.table.js"></script>
254
+ <script type="text/javascript" src="js/inline.js"></script>
255
+
256
+ </body>
257
+
258
+ </html>