cobweb 0.0.55 → 0.0.57

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. data/README.textile +2 -1
  2. data/lib/cobweb.rb +4 -1
  3. data/lib/cobweb_crawler.rb +1 -4
  4. data/lib/cobweb_version.rb +1 -1
  5. data/lib/content_link_parser.rb +1 -0
  6. data/lib/crawl_job.rb +4 -3
  7. data/lib/server.rb +2 -2
  8. data/lib/stats.rb +6 -1
  9. data/spec/cobweb/cobweb_job_spec.rb +162 -24
  10. data/spec/samples/sample_server.rb +21 -0
  11. data/spec/samples/sample_site/boxgrid.html +258 -0
  12. data/spec/samples/sample_site/css/accordion.css +45 -0
  13. data/spec/samples/sample_site/css/datatable.css +189 -0
  14. data/spec/samples/sample_site/css/datepicker.css +171 -0
  15. data/spec/samples/sample_site/css/form-buttons.css +180 -0
  16. data/spec/samples/sample_site/css/forms.css +489 -0
  17. data/spec/samples/sample_site/css/jquery.fancybox-1.3.4.css +282 -0
  18. data/spec/samples/sample_site/css/jquery.treeview.css +103 -0
  19. data/spec/samples/sample_site/css/link-buttons.css +187 -0
  20. data/spec/samples/sample_site/css/login.css +110 -0
  21. data/spec/samples/sample_site/css/menu.css +156 -0
  22. data/spec/samples/sample_site/css/messages.css +58 -0
  23. data/spec/samples/sample_site/css/modalbox.css +63 -0
  24. data/spec/samples/sample_site/css/statics.css +57 -0
  25. data/spec/samples/sample_site/css/style.css +497 -0
  26. data/spec/samples/sample_site/css/style_text.css +128 -0
  27. data/spec/samples/sample_site/css/tabs.css +58 -0
  28. data/spec/samples/sample_site/css/wysiwyg-editor.css +18 -0
  29. data/spec/samples/sample_site/css/wysiwyg.css +149 -0
  30. data/spec/samples/sample_site/css/wysiwyg.modal.css +69 -0
  31. data/spec/samples/sample_site/dashboard.html +852 -0
  32. data/spec/samples/sample_site/forms.html +329 -0
  33. data/spec/samples/sample_site/gallery.html +263 -0
  34. data/spec/samples/sample_site/gfx/back-menu.gif +0 -0
  35. data/spec/samples/sample_site/gfx/back-submenu.gif +0 -0
  36. data/spec/samples/sample_site/gfx/background.gif +0 -0
  37. data/spec/samples/sample_site/gfx/box-hide.png +0 -0
  38. data/spec/samples/sample_site/gfx/box-search.png +0 -0
  39. data/spec/samples/sample_site/gfx/box-title.gif +0 -0
  40. data/spec/samples/sample_site/gfx/code.gif +0 -0
  41. data/spec/samples/sample_site/gfx/datepicker-arrows.gif +0 -0
  42. data/spec/samples/sample_site/gfx/fancybox/blank.gif +0 -0
  43. data/spec/samples/sample_site/gfx/fancybox/fancy_close.png +0 -0
  44. data/spec/samples/sample_site/gfx/fancybox/fancy_loading.png +0 -0
  45. data/spec/samples/sample_site/gfx/fancybox/fancy_nav_left.png +0 -0
  46. data/spec/samples/sample_site/gfx/fancybox/fancy_nav_right.png +0 -0
  47. data/spec/samples/sample_site/gfx/fancybox/fancy_title_left.png +0 -0
  48. data/spec/samples/sample_site/gfx/fancybox/fancy_title_main.png +0 -0
  49. data/spec/samples/sample_site/gfx/fancybox/fancy_title_over.png +0 -0
  50. data/spec/samples/sample_site/gfx/fancybox/fancy_title_right.png +0 -0
  51. data/spec/samples/sample_site/gfx/fancybox/fancybox-x.png +0 -0
  52. data/spec/samples/sample_site/gfx/fancybox/fancybox.png +0 -0
  53. data/spec/samples/sample_site/gfx/forms/date-next.gif +0 -0
  54. data/spec/samples/sample_site/gfx/forms/date-prev.gif +0 -0
  55. data/spec/samples/sample_site/gfx/forms/forms-checkbox.gif +0 -0
  56. data/spec/samples/sample_site/gfx/forms/forms-date.gif +0 -0
  57. data/spec/samples/sample_site/gfx/forms/forms-file.gif +0 -0
  58. data/spec/samples/sample_site/gfx/forms/forms-input-big.gif +0 -0
  59. data/spec/samples/sample_site/gfx/forms/forms-input-medium.gif +0 -0
  60. data/spec/samples/sample_site/gfx/forms/forms-input-small.gif +0 -0
  61. data/spec/samples/sample_site/gfx/forms/forms-input-xl.gif +0 -0
  62. data/spec/samples/sample_site/gfx/forms/forms-radio.gif +0 -0
  63. data/spec/samples/sample_site/gfx/forms/forms-selectbox-small.gif +0 -0
  64. data/spec/samples/sample_site/gfx/forms/forms-selectbox.gif +0 -0
  65. data/spec/samples/sample_site/gfx/forms/forms-textarea-big.gif +0 -0
  66. data/spec/samples/sample_site/gfx/forms/forms-textarea-medium.gif +0 -0
  67. data/spec/samples/sample_site/gfx/forms/forms-textarea-small.gif +0 -0
  68. data/spec/samples/sample_site/gfx/forms/forms-textarea-xl.gif +0 -0
  69. data/spec/samples/sample_site/gfx/icon-delete.png +0 -0
  70. data/spec/samples/sample_site/gfx/icon-edit.png +0 -0
  71. data/spec/samples/sample_site/gfx/icon-home.gif +0 -0
  72. data/spec/samples/sample_site/gfx/img-delete.png +0 -0
  73. data/spec/samples/sample_site/gfx/img-hover.png +0 -0
  74. data/spec/samples/sample_site/gfx/img-zoom.png +0 -0
  75. data/spec/samples/sample_site/gfx/jquery.wysiwyg.gif +0 -0
  76. data/spec/samples/sample_site/gfx/label-icons.gif +0 -0
  77. data/spec/samples/sample_site/gfx/label.gif +0 -0
  78. data/spec/samples/sample_site/gfx/li-down.gif +0 -0
  79. data/spec/samples/sample_site/gfx/li.gif +0 -0
  80. data/spec/samples/sample_site/gfx/link-button-big.gif +0 -0
  81. data/spec/samples/sample_site/gfx/link-button-medium.gif +0 -0
  82. data/spec/samples/sample_site/gfx/link-button.gif +0 -0
  83. data/spec/samples/sample_site/gfx/loading-2.gif +0 -0
  84. data/spec/samples/sample_site/gfx/loading.gif +0 -0
  85. data/spec/samples/sample_site/gfx/logo.png +0 -0
  86. data/spec/samples/sample_site/gfx/modal-title.gif +0 -0
  87. data/spec/samples/sample_site/gfx/photos/00.jpg +0 -0
  88. data/spec/samples/sample_site/gfx/photos/01.jpg +0 -0
  89. data/spec/samples/sample_site/gfx/photos/01xl.jpg +0 -0
  90. data/spec/samples/sample_site/gfx/photos/02.jpg +0 -0
  91. data/spec/samples/sample_site/gfx/photos/02xl.jpg +0 -0
  92. data/spec/samples/sample_site/gfx/photos/03.jpg +0 -0
  93. data/spec/samples/sample_site/gfx/photos/03xl.jpg +0 -0
  94. data/spec/samples/sample_site/gfx/photos/04.jpg +0 -0
  95. data/spec/samples/sample_site/gfx/photos/04xl.jpg +0 -0
  96. data/spec/samples/sample_site/gfx/photos/05.jpg +0 -0
  97. data/spec/samples/sample_site/gfx/photos/05xl.jpg +0 -0
  98. data/spec/samples/sample_site/gfx/photos/06.jpg +0 -0
  99. data/spec/samples/sample_site/gfx/photos/06xl.jpg +0 -0
  100. data/spec/samples/sample_site/gfx/photos/07.jpg +0 -0
  101. data/spec/samples/sample_site/gfx/photos/07xl.jpg +0 -0
  102. data/spec/samples/sample_site/gfx/photos/08.jpg +0 -0
  103. data/spec/samples/sample_site/gfx/photos/08xl.jpg +0 -0
  104. data/spec/samples/sample_site/gfx/photos/09.jpg +0 -0
  105. data/spec/samples/sample_site/gfx/photos/09xl.jpg +0 -0
  106. data/spec/samples/sample_site/gfx/photos/10.jpg +0 -0
  107. data/spec/samples/sample_site/gfx/photos/10xl.jpg +0 -0
  108. data/spec/samples/sample_site/gfx/photos/11.jpg +0 -0
  109. data/spec/samples/sample_site/gfx/photos/11xl.jpg +0 -0
  110. data/spec/samples/sample_site/gfx/photos/12.jpg +0 -0
  111. data/spec/samples/sample_site/gfx/photos/12xl.jpg +0 -0
  112. data/spec/samples/sample_site/gfx/photos/13.jpg +0 -0
  113. data/spec/samples/sample_site/gfx/photos/13xl.jpg +0 -0
  114. data/spec/samples/sample_site/gfx/photos/14.jpg +0 -0
  115. data/spec/samples/sample_site/gfx/photos/14xl.jpg +0 -0
  116. data/spec/samples/sample_site/gfx/photos/15.jpg +0 -0
  117. data/spec/samples/sample_site/gfx/photos/15xl.jpg +0 -0
  118. data/spec/samples/sample_site/gfx/search-button.gif +0 -0
  119. data/spec/samples/sample_site/gfx/search-input.gif +0 -0
  120. data/spec/samples/sample_site/gfx/slider-button.gif +0 -0
  121. data/spec/samples/sample_site/gfx/system-messages.gif +0 -0
  122. data/spec/samples/sample_site/gfx/table-asc-arrow.gif +0 -0
  123. data/spec/samples/sample_site/gfx/table-desc-arrow.gif +0 -0
  124. data/spec/samples/sample_site/gfx/table-first.gif +0 -0
  125. data/spec/samples/sample_site/gfx/table-last.gif +0 -0
  126. data/spec/samples/sample_site/gfx/table-next.gif +0 -0
  127. data/spec/samples/sample_site/gfx/table-number.gif +0 -0
  128. data/spec/samples/sample_site/gfx/table-prev.gif +0 -0
  129. data/spec/samples/sample_site/gfx/table-rows.gif +0 -0
  130. data/spec/samples/sample_site/gfx/table-search.gif +0 -0
  131. data/spec/samples/sample_site/gfx/table-thead.gif +0 -0
  132. data/spec/samples/sample_site/gfx/tooltip.gif +0 -0
  133. data/spec/samples/sample_site/gfx/treeview/ajax-loader.gif +0 -0
  134. data/spec/samples/sample_site/gfx/treeview/file.gif +0 -0
  135. data/spec/samples/sample_site/gfx/treeview/folder-closed.gif +0 -0
  136. data/spec/samples/sample_site/gfx/treeview/folder.gif +0 -0
  137. data/spec/samples/sample_site/gfx/treeview/minus.gif +0 -0
  138. data/spec/samples/sample_site/gfx/treeview/plus.gif +0 -0
  139. data/spec/samples/sample_site/gfx/treeview/treeview-default-line.gif +0 -0
  140. data/spec/samples/sample_site/gfx/treeview/treeview-default.gif +0 -0
  141. data/spec/samples/sample_site/index.html +852 -0
  142. data/spec/samples/sample_site/js/controls/wysiwyg.image.js +284 -0
  143. data/spec/samples/sample_site/js/controls/wysiwyg.link.js +210 -0
  144. data/spec/samples/sample_site/js/controls/wysiwyg.table.js +151 -0
  145. data/spec/samples/sample_site/js/customInput.jquery.js +68 -0
  146. data/spec/samples/sample_site/js/excanvas.min.js +1 -0
  147. data/spec/samples/sample_site/js/hoverIntent.js +84 -0
  148. data/spec/samples/sample_site/js/inline.js +392 -0
  149. data/spec/samples/sample_site/js/jquery-1.7.1.min.js +4 -0
  150. data/spec/samples/sample_site/js/jquery-ui-select.js +522 -0
  151. data/spec/samples/sample_site/js/jquery-ui-timepicker-addon.js +1299 -0
  152. data/spec/samples/sample_site/js/jquery-ui.js +791 -0
  153. data/spec/samples/sample_site/js/jquery.dataTables.js +7440 -0
  154. data/spec/samples/sample_site/js/jquery.fancybox-1.3.4.js +1156 -0
  155. data/spec/samples/sample_site/js/jquery.filestyle.mini.js +2 -0
  156. data/spec/samples/sample_site/js/jquery.flot.js +2600 -0
  157. data/spec/samples/sample_site/js/jquery.flot.resize.min.js +60 -0
  158. data/spec/samples/sample_site/js/jquery.graphtable-0.2.js +179 -0
  159. data/spec/samples/sample_site/js/jquery.tipsy.js +104 -0
  160. data/spec/samples/sample_site/js/jquery.treeview.js +256 -0
  161. data/spec/samples/sample_site/js/jquery.wysiwyg.js +2454 -0
  162. data/spec/samples/sample_site/js/plugins/wysiwyg.rmFormat.js +348 -0
  163. data/spec/samples/sample_site/js/superfish.js +121 -0
  164. data/spec/samples/sample_site/js/supersubs.js +90 -0
  165. data/spec/samples/sample_site/more.html +503 -0
  166. data/spec/samples/sample_site/tables.html +845 -0
  167. data/spec/samples/sample_site/text/museosans-webfont.eot +0 -0
  168. data/spec/samples/sample_site/text/museosans-webfont.svg +231 -0
  169. data/spec/samples/sample_site/text/museosans-webfont.ttf +0 -0
  170. data/spec/samples/sample_site/text/museosans-webfont.woff +0 -0
  171. data/spec/samples/sample_site/text/museosans_bold-webfont.eot +0 -0
  172. data/spec/samples/sample_site/text/museosans_bold-webfont.svg +231 -0
  173. data/spec/samples/sample_site/text/museosans_bold-webfont.ttf +0 -0
  174. data/spec/samples/sample_site/text/museosans_bold-webfont.woff +0 -0
  175. data/spec/samples/sample_site/typography.html +192 -0
  176. data/spec/spec_helper.rb +10 -2
  177. metadata +196 -26
data/README.textile CHANGED
@@ -1,5 +1,6 @@
1
1
 
2
- h1. Cobweb v0.0.55
2
+ h1. Cobweb v0.0.57
3
+ !https://secure.travis-ci.org/stewartmckee/cobweb.png?branch=master!
3
4
 
4
5
  h2. Intro
5
6
 
data/lib/cobweb.rb CHANGED
@@ -56,7 +56,9 @@ class Cobweb
56
56
 
57
57
  if @options[:internal_urls].nil? || @options[:internal_urls].empty?
58
58
  uri = Addressable::URI.parse(base_url)
59
- @options[:internal_urls] = [[uri.scheme, "://", uri.host, "/*"].join]
59
+ @options[:internal_urls] = []
60
+ @options[:internal_urls] << [uri.scheme, "://", uri.host, "/*"].join
61
+ @options[:internal_urls] << [uri.scheme, "://", uri.host, ":", uri.inferred_port, "/*"].join
60
62
  end
61
63
 
62
64
  request.merge!(@options)
@@ -72,6 +74,7 @@ class Cobweb
72
74
  @options[:internal_urls].map{|url| @redis.sadd("internal_urls", url)}
73
75
 
74
76
  Resque.enqueue(CrawlJob, request)
77
+ request
75
78
  end
76
79
 
77
80
  # Returns array of cookies from content
@@ -20,7 +20,7 @@ class CobwebCrawler
20
20
  @options[:crawl_id] = @crawl_id
21
21
  end
22
22
 
23
- @redis = NamespacedRedis.new(@options[:redis_options], "cobweb-#{@crawl_id}")
23
+ @redis = NamespacedRedis.new(@options[:redis_options], "cobweb-#{Cobweb.version}-#{@crawl_id}")
24
24
  @options[:internal_urls] = [] if @options[:internal_urls].nil?
25
25
  @options[:internal_urls].map{|url| @redis.sadd("internal_urls", url)}
26
26
  @debug = @options[:debug]
@@ -42,9 +42,6 @@ class CobwebCrawler
42
42
 
43
43
  @crawl_options = crawl_options
44
44
 
45
- puts "http://localhost:4567/statistics/#{@crawl_id}"
46
- puts ""
47
-
48
45
  @redis.sadd("queued", base_url) unless @redis.sismember("crawled", base_url) || @redis.sismember("queued", base_url)
49
46
  crawl_counter = @redis.scard("crawled").to_i
50
47
  queue_counter = @redis.scard("queued").to_i
@@ -3,7 +3,7 @@ class CobwebVersion
3
3
 
4
4
  # Returns a string of the current version
5
5
  def self.version
6
- "0.0.55"
6
+ "0.0.57"
7
7
  end
8
8
 
9
9
  end
@@ -46,6 +46,7 @@ class ContentLinkParser
46
46
  data = link_data
47
47
  links = data.keys.map{|key| data[key]}.flatten.uniq
48
48
  links = links.map{|link| UriHelper.join_no_fragment(@url, link).to_s }
49
+ links = links.reject{|link| link =~ /\/([^\/]+?)\/\1\// }
49
50
  links = links.reject{|link| link =~ /([^\/]+?)\/([^\/]+?)\/.*?\1\/\2/ }
50
51
  links = links.select{|link| options[:valid_schemes].include? link.split(':')[0].to_sym}
51
52
  links
data/lib/crawl_job.rb CHANGED
@@ -36,6 +36,7 @@ class CrawlJob
36
36
  content = Cobweb.new(content_request).get(content_request[:url], content_request)
37
37
 
38
38
  ## update statistics
39
+ @stats.update_status("Crawling #{content_request[:url]}...")
39
40
  @stats.update_statistics(content)
40
41
 
41
42
  # set the base url if this is the first page
@@ -72,7 +73,7 @@ class CrawlJob
72
73
  if @redis.scard("queued") == 0
73
74
  finished(content_request)
74
75
  end
75
- elsif @queue_counter == 0 || @crawl_counter >= content_request[:crawl_limit].to_i
76
+ elsif @queue_counter == 0 || @crawl_counter > content_request[:crawl_limit].to_i
76
77
  finished(content_request)
77
78
  end
78
79
  end
@@ -109,12 +110,12 @@ class CrawlJob
109
110
 
110
111
  # Returns true if the crawl count is within limits
111
112
  def self.within_crawl_limits?(crawl_limit)
112
- crawl_limit.nil? or @crawl_counter < crawl_limit.to_i
113
+ crawl_limit.nil? or @crawl_counter <= crawl_limit.to_i
113
114
  end
114
115
 
115
116
  # Returns true if the queue count is calculated to be still within limits when complete
116
117
  def self.within_queue_limits?(crawl_limit)
117
- within_crawl_limits?(crawl_limit) and (crawl_limit.nil? or (@queue_counter + @crawl_counter) < crawl_limit.to_i)
118
+ within_crawl_limits?(crawl_limit) && (crawl_limit.nil? || (@queue_counter + @crawl_counter) < crawl_limit.to_i)
118
119
  end
119
120
 
120
121
  # Sets the base url in redis. If the first page is a redirect, it sets the base_url to the destination
data/lib/server.rb CHANGED
@@ -17,7 +17,7 @@ class Server < Sinatra::Base
17
17
 
18
18
  @crawls = []
19
19
  @full_redis.smembers("cobweb_crawls").each do |crawl_id|
20
- redis = NamespacedRedis.new({}, "cobweb-#{crawl_id}")
20
+ redis = NamespacedRedis.new({}, "cobweb-#{Cobweb.version}-#{crawl_id}")
21
21
  stats = HashUtil.deep_symbolize_keys({
22
22
  :crawl_details => redis.hgetall("crawl_details"),
23
23
  :statistics => redis.hgetall("statistics"),
@@ -31,7 +31,7 @@ class Server < Sinatra::Base
31
31
 
32
32
  # Sinatra Crawl Detail
33
33
  get '/statistics/:crawl_id' do
34
- redis = NamespacedRedis.new({}, "cobweb-#{params[:crawl_id]}")
34
+ redis = NamespacedRedis.new({}, "cobweb-#{Cobweb.version}-#{params[:crawl_id]}")
35
35
 
36
36
  @statistics = HashUtil.deep_symbolize_keys(redis.hgetall("statistics"))
37
37
  if @statistics[:status_counts].nil?
data/lib/stats.rb CHANGED
@@ -4,8 +4,9 @@ class Stats
4
4
 
5
5
  # Sets up redis usage for statistics
6
6
  def initialize(options)
7
+ options[:redis_options] = {} unless options.has_key? :redis_options
7
8
  @full_redis = Redis.new(options[:redis_options])
8
- @redis = NamespacedRedis.new(options[:redis_options], "cobweb-#{options[:crawl_id]}")
9
+ @redis = NamespacedRedis.new(options[:redis_options], "cobweb-#{Cobweb.version}-#{options[:crawl_id]}")
9
10
  end
10
11
 
11
12
  # Sets up the crawl in statistics
@@ -26,6 +27,10 @@ class Stats
26
27
  @redis.del "crawl_details"
27
28
  end
28
29
 
30
+ def get_crawled
31
+ @redis.smembers "crawled"
32
+ end
33
+
29
34
  # Returns statistics hash. update_statistics takes the content hash, extracts statistics from it and updates redis with the data.
30
35
  def update_statistics(content, crawl_counter=@redis.scard("crawled").to_i, queue_counter=@redis.scard("queued").to_i)
31
36
 
@@ -1,41 +1,179 @@
1
1
  require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
2
 
3
- describe Cobweb do
3
+ describe Cobweb, :local_only => true do
4
+
5
+ before(:all) do
6
+ #store all existing resque process ids
7
+ @existing_processes = `ps aux | grep resque | grep -v grep | grep -v resque-web | awk '{print $2}'`.split("\n")
8
+
9
+ # START WORKERS ONLY FOR CRAWL QUEUE SO WE CAN COUNT ENQUEUED PROCESS AND FINISH QUEUES
10
+ puts "Starting Workers... Please Wait..."
11
+ `mkdir log`
12
+ io = IO.popen("nohup rake resque:workers PIDFILE=./tmp/pids/resque.pid COUNT=5 QUEUE=cobweb_crawl_job > log/output.log &")
13
+ puts "Workers Started."
14
+
15
+ # START THIN SERVER TO HOST THE SAMPLE SITE FOR CRAWLING
16
+ @thin = nil
17
+ Thread.new do
18
+ @thin = Thin::Server.start("0.0.0.0", 3532, SampleServer.app)
19
+ end
20
+
21
+ # WAIT FOR START TO COMPLETE
22
+ sleep 1
23
+
24
+ end
4
25
 
5
26
  before(:each) do
6
- @base_url = "http://www.baseurl.com/"
7
- @cobweb = Cobweb.new :quiet => true, :cache => nil
27
+ @base_url = "http://localhost:3532/"
28
+ @base_page_count = 77
29
+
30
+ clear_queues
8
31
  end
9
32
 
10
- describe "detect finish of crawl" do
11
-
12
- describe "with no crawl limit" do
13
- before(:each) do
14
- @request = {
15
- :crawl_id => Digest::SHA1.hexdigest("#{Time.now.to_i}.#{Time.now.usec}"),
16
- :url => @base_url,
17
- :crawl_limit => nil
18
- }
19
- end
20
-
21
- it "should not limit crawl"
22
- it "should detect the end or crawl"
33
+ describe "with no crawl limit" do
34
+ before(:each) do
35
+ @request = {
36
+ :crawl_id => Digest::SHA1.hexdigest("#{Time.now.to_i}.#{Time.now.usec}"),
37
+ :crawl_limit => nil,
38
+ :quiet => false,
39
+ :debug => false,
40
+ :cache => nil
41
+ }
42
+ @cobweb = Cobweb.new @request
23
43
  end
24
-
25
- describe "with a crawl limit" do
44
+
45
+ it "should crawl entire site" do
46
+ crawl = @cobweb.start(@base_url)
47
+ @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
48
+ wait_for_crawl_finished crawl[:crawl_id]
49
+ Resque.size("cobweb_process_job").should == @base_page_count
50
+ end
51
+ it "detect crawl finished" do
52
+ crawl = @cobweb.start(@base_url)
53
+ @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
54
+ wait_for_crawl_finished crawl[:crawl_id]
55
+ Resque.size("cobweb_finished_job").should == 1
56
+ end
57
+ end
58
+
59
+ describe "with a crawl limit" do
60
+ before(:each) do
26
61
  @request = {
27
62
  :crawl_id => Digest::SHA1.hexdigest("#{Time.now.to_i}.#{Time.now.usec}"),
28
- :url => @base_url,
29
- :crawl_limit => 3
63
+ :quiet => true,
64
+ :cache => nil
30
65
  }
66
+ @cobweb = Cobweb.new @request
67
+ end
68
+
69
+ describe "limit to 1" do
70
+ before(:each) do
71
+ @request[:crawl_limit] = 1
72
+ end
31
73
 
32
- it "should limit crawl"
33
- it "should detect the end or crawl based on limit"
34
-
74
+ it "should not crawl the entire site" do
75
+ crawl = @cobweb.start(@base_url)
76
+ @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
77
+ wait_for_crawl_finished crawl[:crawl_id]
78
+ Resque.size("cobweb_process_job").should_not == @base_page_count
79
+ end
80
+ it "should only crawl 1 page" do
81
+ crawl = @cobweb.start(@base_url)
82
+ @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
83
+ wait_for_crawl_finished crawl[:crawl_id]
84
+ Resque.size("cobweb_process_job").should == 1
85
+ end
86
+ it "should notify of crawl finished" do
87
+ crawl = @cobweb.start(@base_url)
88
+ @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
89
+ wait_for_crawl_finished crawl[:crawl_id]
90
+ Resque.size("cobweb_finished_job").should == 1
91
+ end
92
+
35
93
  end
94
+
95
+ describe "limit to 3" do
96
+ before(:each) do
97
+ @request[:crawl_limit] = 3
98
+ end
99
+ it "should not crawl the entire site" do
100
+ crawl = @cobweb.start(@base_url)
101
+ @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
102
+ wait_for_crawl_finished crawl[:crawl_id]
103
+ Resque.size("cobweb_process_job").should_not == @base_page_count
104
+ end
105
+ it "should notify of crawl finished" do
106
+ crawl = @cobweb.start(@base_url)
107
+ @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
108
+ wait_for_crawl_finished crawl[:crawl_id]
109
+ Resque.size("cobweb_finished_job").should == 1
110
+ end
111
+ it "should only crawl 3 pages" do
112
+ crawl = @cobweb.start(@base_url)
113
+ @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
114
+ wait_for_crawl_finished crawl[:crawl_id]
115
+ Resque.size("cobweb_process_job").should == 3
116
+ end
36
117
 
118
+ end
119
+
120
+ describe "limit to 100" do
121
+ before(:each) do
122
+ @request[:crawl_limit] = 100
123
+ end
37
124
 
125
+ it "should crawl the entire site" do
126
+ crawl = @cobweb.start(@base_url)
127
+ @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
128
+ wait_for_crawl_finished crawl[:crawl_id]
129
+ Resque.size("cobweb_process_job").should == @base_page_count
130
+ end
131
+ it "should notify of crawl finished" do
132
+ crawl = @cobweb.start(@base_url)
133
+ @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
134
+ wait_for_crawl_finished crawl[:crawl_id]
135
+ Resque.size("cobweb_finished_job").should == 1
136
+ end
137
+ it "should not crawl 100 pages" do
138
+ crawl = @cobweb.start(@base_url)
139
+ @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
140
+ wait_for_crawl_finished crawl[:crawl_id]
141
+ Resque.size("cobweb_process_job").should_not == 100
142
+ end
143
+ end
144
+ end
145
+
146
+ after(:all) do
147
+ @all_processes = `ps aux | grep resque | grep -v grep | grep -v resque-web | awk '{print $2}'`.split("\n")
148
+ command = "kill #{(@all_processes - @existing_processes).join(" ")}"
149
+ IO.popen(command)
150
+ #@thin.stop!
151
+ end
152
+
153
+ end
154
+
155
+ def wait_for_crawl_finished(crawl_id, timeout=20)
156
+ counter = 0
157
+ while(running?(crawl_id) && counter < timeout) do
158
+ sleep 1
159
+ counter+=1
160
+ end
161
+ if counter > timeout
162
+ raise "End of crawl not detected"
163
+ end
164
+ end
165
+
166
+ def running?(crawl_id)
167
+ @stat.get_status != "Crawl Stopped"
168
+ end
169
+
170
+ def clear_queues
171
+ Resque.queues.each do |queue|
172
+ Resque.remove_queue(queue)
38
173
  end
174
+
175
+ Resque.size("cobweb_process_job").should == 0
176
+ Resque.size("cobweb_finished_job").should == 0
177
+ end
39
178
 
40
179
 
41
- end
@@ -0,0 +1,21 @@
1
+ class SampleServer
2
+
3
+ # This is the root of our app
4
+ @root = File.expand_path(File.dirname(__FILE__)) + "/sample_site"
5
+
6
+ def self.app
7
+ Proc.new { |env|
8
+ # Extract the requested path from the request
9
+ path = Rack::Utils.unescape(env['PATH_INFO'])
10
+ index_file = @root + "#{path}/index.html"
11
+
12
+ if File.exists?(index_file)
13
+ # Return the index
14
+ [200, {'Content-Type' => 'text/html'}, File.read(index_file)]
15
+ else
16
+ # Pass the request to the directory app
17
+ Rack::Directory.new(@root).call(env)
18
+ end
19
+ }
20
+ end
21
+ end
@@ -0,0 +1,258 @@
1
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
2
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
3
+
4
+ <head>
5
+
6
+ <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
7
+ <title>CleanDream</title>
8
+
9
+ <style type="text/css">
10
+ @import url("css/style.css");
11
+ @import url('css/style_text.css');
12
+ @import url('css/form-buttons.css');
13
+ @import url('css/link-buttons.css');
14
+ @import url('css/menu.css');
15
+ @import url('css/statics.css');
16
+ @import url('css/messages.css');
17
+ @import url('css/datatable.css');
18
+ @import url('css/accordion.css');
19
+ @import url('css/tabs.css');
20
+ @import url('css/forms.css');
21
+ @import url('css/datepicker.css');
22
+ @import url('css/modalbox.css');
23
+ @import url('css/jquery.fancybox-1.3.4.css');
24
+ @import url('css/jquery.treeview.css');
25
+ @import url('css/wysiwyg.css');
26
+ @import url('css/wysiwyg.modal.css');
27
+ @import url('css/wysiwyg-editor.css');
28
+ </style>
29
+
30
+ <script type="text/javascript" src="js/jquery-1.7.1.min.js"></script>
31
+
32
+ <!--[if lte IE 8]>
33
+ <script type="text/javascript" src="js/excanvas.min.js"></script>
34
+ <![endif]-->
35
+
36
+ </head>
37
+
38
+ <body>
39
+
40
+ <div class="container">
41
+
42
+ <div class="logo-labels">
43
+ <h1><a href="#">CleanDream</a></h1>
44
+ <ul>
45
+ <li><a href="#"><span>Settings</span></a></li>
46
+ <li class="usermessage"><a href="#"><span>1 new message</span></a></li>
47
+ <li class="logout"><a href="#"><span>Logout</span></a></li>
48
+ </ul>
49
+ </div>
50
+
51
+ <div class="menu-search">
52
+ <ul>
53
+ <li><a href="dashboard.html">Dashboard</a></li>
54
+ <li>
55
+ <a href="#">Dropdown</a>
56
+ <ul>
57
+ <li><a href="#">Submenu item 001</a></li>
58
+ <li><a href="#">Submenu item 002</a></li>
59
+ <li><a href="#">Submenu item 003</a></li>
60
+ <li class="current">
61
+ <a href="#">Submenu item 004</a>
62
+ <ul>
63
+ <li><a href="#">Subsubmenu item 001</a></li>
64
+ <li><a href="#">Subsubmenu item 002</a></li>
65
+ <li class="current"><a href="#">Subsubmenu item 003</a></li>
66
+ <li><a href="#">Subsubmenu item 003</a></li>
67
+ <li><a href="#">Subsubmenu item 005</a></li>
68
+ </ul>
69
+ </li>
70
+ <li> <a href="#">Submenu item 005</a> </li>
71
+ </ul>
72
+ </li>
73
+ <li><a href="typography.html">Typography</a></li>
74
+ <li class="current"><a href="boxgrid.html">Boxes Grid</a></li>
75
+ <li><a href="forms.html">Forms</a></li>
76
+ <li><a href="gallery.html">Gallery</a></li>
77
+ <li><a href="tables.html">Tables</a></li>
78
+ <li><a href="more.html">More</a></li>
79
+ </ul>
80
+ <div class="search">
81
+ <form action="" method="post">
82
+ <input type="text" value="Seachterm" />
83
+ <button type="submit"></button>
84
+ </form>
85
+ </div>
86
+ </div>
87
+
88
+ <div class="breadcrumbs">
89
+ <ul>
90
+ <li class="home"><a href="#"></a></li>
91
+ <li class="break">&#187;</li>
92
+ <li><a href="#">Menu item</a></li>
93
+ <li class="break">&#187;</li>
94
+ <li><a href="#">Menu item</a></li>
95
+ </ul>
96
+ </div>
97
+
98
+ <div class="section">
99
+ <div class="full">
100
+ <div class="box">
101
+ <div class="title">
102
+ <h2>Full box</h2>
103
+ <span class="hide"></span>
104
+ </div>
105
+ <div class="content">Place your content here.</div>
106
+ </div>
107
+ </div>
108
+ </div>
109
+
110
+ <div class="section">
111
+ <div class="small">
112
+ <div class="box">
113
+ <div class="title">
114
+ <h2>Small box</h2>
115
+ <span class="hide"></span>
116
+ </div>
117
+ <div class="content">Place your content here.</div>
118
+ </div>
119
+ </div>
120
+
121
+ <div class="big">
122
+ <div class="box">
123
+ <div class="title">
124
+ <h2>Big box</h2>
125
+ <span class="hide"></span>
126
+ </div>
127
+ <div class="content">Place your content here.</div>
128
+ </div>
129
+ </div>
130
+ </div>
131
+
132
+ <div class="section">
133
+ <div class="medium">
134
+ <div class="box">
135
+ <div class="title">
136
+ <h2>Medium box</h2>
137
+ <span class="hide"></span>
138
+ </div>
139
+ <div class="content">Place your content here.</div>
140
+ </div>
141
+ </div>
142
+
143
+ <div class="medium">
144
+ <div class="box">
145
+ <div class="title">
146
+ <h2>Medium box</h2>
147
+ <span class="hide"></span>
148
+ </div>
149
+ <div class="content">Place your content here.</div>
150
+ </div>
151
+ </div>
152
+ </div>
153
+
154
+ <div class="section">
155
+ <div class="medium">
156
+ <div class="box">
157
+ <div class="title">
158
+ <h2>Medium box</h2>
159
+ <span class="hide"></span>
160
+ </div>
161
+ <div class="content">Place your content here.</div>
162
+ </div>
163
+ </div>
164
+
165
+ <div class="small">
166
+ <div class="box">
167
+ <div class="title">
168
+ <h2>Small box</h2>
169
+ <span class="hide"></span>
170
+ </div>
171
+ <div class="content">Place your content here.</div>
172
+ </div>
173
+ </div>
174
+
175
+ <div class="small">
176
+ <div class="box">
177
+ <div class="title">
178
+ <h2>Small box</h2>
179
+ <span class="hide"></span>
180
+ </div>
181
+ <div class="content">Place your content here.</div>
182
+ </div>
183
+ </div>
184
+ </div>
185
+
186
+ <div class="section">
187
+ <div class="small">
188
+ <div class="box">
189
+ <div class="title">
190
+ <h2>Small box</h2>
191
+ <span class="hide"></span>
192
+ </div>
193
+ <div class="content">Place your content here.</div>
194
+ </div>
195
+ </div>
196
+
197
+ <div class="small">
198
+ <div class="box">
199
+ <div class="title">
200
+ <h2>Small box</h2>
201
+ <span class="hide"></span>
202
+ </div>
203
+ <div class="content">Place your content here.</div>
204
+ </div>
205
+ </div>
206
+
207
+ <div class="small">
208
+ <div class="box">
209
+ <div class="title">
210
+ <h2>Small box</h2>
211
+ <span class="hide"></span>
212
+ </div>
213
+ <div class="content">Place your content here.</div>
214
+ </div>
215
+ </div>
216
+
217
+ <div class="small">
218
+ <div class="box">
219
+ <div class="title">
220
+ <h2>Small box</h2>
221
+ <span class="hide"></span>
222
+ </div>
223
+ <div class="content">Place your content here.</div>
224
+ </div>
225
+ </div>
226
+ </div>
227
+
228
+ <div class="footer">
229
+ <div class="split">&#169; Copyright <a href="#">website.com</a></div>
230
+ <div class="split right">Powered by <a href="http://themeforest.net/item/cleandream/490140" target="_blank">CleanDream</a></div>
231
+ </div>
232
+ </div>
233
+
234
+ <script type="text/javascript" src="js/superfish.js"></script>
235
+ <script type="text/javascript" src="js/supersubs.js"></script>
236
+ <script type="text/javascript" src="js/hoverIntent.js"></script>
237
+ <script type="text/javascript" src="js/jquery.flot.js"></script>
238
+ <script type="text/javascript" src="js/jquery.graphtable-0.2.js"></script>
239
+ <script type="text/javascript" src="js/jquery.flot.resize.min.js"></script>
240
+ <script type="text/javascript" src="js/jquery-ui.js"></script>
241
+ <script type="text/javascript" src="js/jquery-ui-select.js"></script>
242
+ <script type="text/javascript" src="js/customInput.jquery.js"></script>
243
+ <script type="text/javascript" src="js/jquery.dataTables.js"></script>
244
+ <script type="text/javascript" src="js/jquery.fancybox-1.3.4.js"></script>
245
+ <script type="text/javascript" src="js/jquery.filestyle.mini.js"></script>
246
+ <script type="text/javascript" src="js/jquery-ui-timepicker-addon.js"></script>
247
+ <script type="text/javascript" src="js/jquery.treeview.js"></script>
248
+ <script type="text/javascript" src="js/jquery.tipsy.js"></script>
249
+ <script type="text/javascript" src="js/jquery.wysiwyg.js"></script>
250
+ <script type="text/javascript" src="js/plugins/wysiwyg.rmFormat.js"></script>
251
+ <script type="text/javascript" src="js/controls/wysiwyg.image.js"></script>
252
+ <script type="text/javascript" src="js/controls/wysiwyg.link.js"></script>
253
+ <script type="text/javascript" src="js/controls/wysiwyg.table.js"></script>
254
+ <script type="text/javascript" src="js/inline.js"></script>
255
+
256
+ </body>
257
+
258
+ </html>