waw 0.2.2 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. data/bin/waw-crawl +26 -0
  2. data/lib/waw.rb +3 -3
  3. data/lib/waw/commands/command.rb +12 -4
  4. data/lib/waw/commands/crawl_command.rb +46 -0
  5. data/lib/waw/commands/profile_command.rb +5 -1
  6. data/lib/waw/controllers/action/action.rb +11 -0
  7. data/lib/waw/controllers/action/js_generation.rb +11 -2
  8. data/lib/waw/controllers/static/match.rb +4 -1
  9. data/lib/waw/controllers/static/matcher.rb +35 -0
  10. data/lib/waw/controllers/static/waw_access.rb +28 -15
  11. data/lib/waw/controllers/static/waw_access_dsl.rb +9 -0
  12. data/lib/waw/controllers/static_controller.rb +5 -2
  13. data/lib/waw/crawler.rb +176 -0
  14. data/lib/waw/crawler/crawler_listener.rb +64 -0
  15. data/lib/waw/crawler/crawler_options.rb +42 -0
  16. data/lib/waw/kern/living_state.rb +93 -0
  17. data/lib/waw/scope_utils.rb +1 -0
  18. data/lib/waw/tools/mail/mail_agent.rb +1 -1
  19. data/lib/waw/validation.rb +3 -0
  20. data/lib/waw/validation/datetime_validator.rb +53 -0
  21. data/lib/waw/wspec/browser.rb +4 -2
  22. data/test/bricks/error_handler/test/test_all.rb +20 -0
  23. data/test/bricks/static_controller/config/test.cfg +2 -0
  24. data/test/bricks/static_controller/logs/webapp.log +84 -0
  25. data/test/bricks/static_controller/test/static_controller.wspec +12 -0
  26. data/test/bricks/static_controller/test/test_all.rb +20 -0
  27. data/test/bricks/static_controller/waw.deploy +1 -0
  28. data/test/bricks/static_controller/waw.routing +5 -0
  29. data/test/bricks/test_all.rb +8 -0
  30. data/test/spec/controllers/static/waw_access_spec.rb +3 -3
  31. data/test/spec/test_all.rb +0 -2
  32. data/test/spec/validation/datetime_validation_spec.rb +92 -0
  33. data/test/unit/test_all.rb +1 -0
  34. data/test/unit/waw/controllers/static/logs/webapp.log +22 -0
  35. metadata +23 -6
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Waw - making web development simple
4
+ # (see lib/waw/waw.rb for more information)
5
+ #
6
+ # Copyright (c) 2010 University of Louvain, Bernard & Louis Lambeau
7
+ # Released under a MIT or Ruby licence
8
+ #
9
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
10
+ require 'waw'
11
+ require 'waw/commands/command'
12
+ require 'waw/commands/crawl_command'
13
+
14
+ begin
15
+ r = Waw::Commands::CrawlCommand.new.run '.', ARGV
16
+ rescue Interrupt => e
17
+ $stderr.puts
18
+ $stderr.puts "Interrupted"
19
+ raise e
20
+ rescue OptionParser::ParseError => e
21
+ $stderr.puts e.message
22
+ raise e
23
+ rescue => e
24
+ $stderr.puts e.message
25
+ raise e
26
+ end
data/lib/waw.rb CHANGED
@@ -9,13 +9,13 @@ module Waw
9
9
 
10
10
  # Requirements on gems for this version
11
11
  GEM_REQUIREMENTS = {
12
- :rack => '>= 1.1.0',
12
+ :rack => '>= 1.2.1',
13
13
  :wlang => '>= 0.9.0',
14
14
  :json => '>= 1.1.9'
15
15
  }
16
16
 
17
17
  # Waw version
18
- VERSION = "0.2.2".freeze
18
+ VERSION = "0.3.0".freeze
19
19
 
20
20
  # Waw loading mutex
21
21
  WAW_KERNELS_LOCK = Mutex.new
@@ -96,4 +96,4 @@ require 'waw/controllers/action_controller'
96
96
  require 'waw/controllers/static_controller'
97
97
  require 'waw/controllers/no_cache'
98
98
  require 'waw/controllers/error_handler'
99
- require 'waw/routing'
99
+ require 'waw/routing'
@@ -5,14 +5,14 @@ module Waw
5
5
  class Command
6
6
 
7
7
  # The verbose level
8
- attr_accessor :verbose
8
+ attr_accessor :verbosity
9
9
 
10
10
  # Show stack traces?
11
11
  attr_accessor :trace
12
12
 
13
13
  # Creates an empty command instance
14
14
  def initialize
15
- @verbose = false
15
+ @verbosity = 1
16
16
  @buffer = STDOUT
17
17
  end
18
18
 
@@ -35,8 +35,12 @@ module Waw
35
35
  @trace = true
36
36
  end
37
37
 
38
- opt.on("--verbose", "-v", "Display extra progress as we progress") do |value|
39
- @verbose = true
38
+ opt.on("--verbose", "Display extra info as we progress") do |value|
39
+ @verbosity = 2
40
+ end
41
+
42
+ opt.on("--silent", "Be quiet silent") do |value|
43
+ @verbosity = 0
40
44
  end
41
45
 
42
46
  # No argument, shows at tail. This will print an options summary.
@@ -55,6 +59,10 @@ module Waw
55
59
  opt.separator nil
56
60
  end
57
61
  end
62
+
63
+ def verbose
64
+ verbosity < 0
65
+ end
58
66
 
59
67
  # Runs the command
60
68
  def run(requester_file, argv)
@@ -0,0 +1,46 @@
1
+ require 'waw/crawler'
2
+ module Waw
3
+ module Commands
4
+ class CrawlCommand < Command
5
+
6
+ def banner
7
+ <<-EOF
8
+ Usage: waw-crawl [options] WEB_SITE_URI
9
+ EOF
10
+ end
11
+
12
+ # Start command is always safe
13
+ def check_command_policy
14
+ true
15
+ end
16
+
17
+ # Adds the options
18
+ def add_options(options)
19
+ @crawler = Waw::Crawler.new(nil)
20
+ options.on("--[no-]check-externals", "[Don't] ping any external link") do |value|
21
+ @crawler.check_externals = value
22
+ end
23
+ options.on("--[no-]check-img", "[Don't] check image <img src='...'>") do |value|
24
+ @crawler.ping_on('img/@src', value)
25
+ end
26
+ options.on("--[no-]check-link", "[Don't] check <link href='...'>") do |value|
27
+ @crawler.ping_on('link/@href', value)
28
+ end
29
+ options.on("--[no-]check-script", "[Don't] check <script src='...'>") do |value|
30
+ @crawler.ping_on('script/@src', value)
31
+ end
32
+ end
33
+
34
+ # Runs the sub-class defined command
35
+ def __run(requester_file, arguments)
36
+ exit(nil, true) unless (arguments.size == 1)
37
+ @crawler.root_uri = arguments[0]
38
+ @crawler.listener.verbosity = @verbosity
39
+ @crawler.crawl
40
+ rescue Interrupt => ex
41
+ info "waw-crawl stopping now... ciao!" if verbose
42
+ end
43
+
44
+ end # module ProfileCommand
45
+ end # module Commands
46
+ end # module Waw
@@ -31,7 +31,11 @@ module Waw
31
31
  else
32
32
  puts "Visiting #{location}" if verbose
33
33
  browser.all_internal_links.each do |link|
34
- visit(browser, link[:href], visited)
34
+ begin
35
+ visit(browser, link[:href], visited)
36
+ rescue URI::InvalidURIError => ex
37
+ puts "Hohoho, I've found something really wrong #{link[:href]}"
38
+ end
35
39
  end
36
40
  end
37
41
  end
@@ -43,6 +43,17 @@ module Waw
43
43
  "javascript:#{id}({#{buffer[2..-1]}}, '##{id}')"
44
44
  end
45
45
 
46
+ # Factors the ajax code for the action itself
47
+ def ajax_action_code
48
+ js = Waw::ActionController::JSGeneration.new
49
+ code = js.generate_js_for_action(Waw::kernel, self, "")
50
+ <<-EOF
51
+ <script type="text/javascript">
52
+ #{code}
53
+ </script>
54
+ EOF
55
+ end
56
+
46
57
  # Factors the ajax code for preparing a formulary
47
58
  def ajax_form_preparer(opts = {})
48
59
  form_id = opts[:form_id] || id
@@ -1,6 +1,6 @@
1
1
  module Waw
2
2
  class ActionController < ::Waw::Controller
3
- class JSGeneration
3
+ class JSGeneration < ::Waw::Controller
4
4
 
5
5
  # Header of the generated javascript file
6
6
  HEADER = <<-EOF
@@ -18,6 +18,15 @@ module Waw
18
18
  raise ConfigurationError, msg
19
19
  end
20
20
 
21
+ # Acts as a waw controller
22
+ def call(env)
23
+ buffer = []
24
+ generate_js(kernel, buffer)
25
+ [ 200,
26
+ {'Content-Type' => 'application/javascript'},
27
+ buffer ]
28
+ end
29
+
21
30
  # Start hook start callback required by Waw. Generates the javascript code
22
31
  # if the configuration variable 'code_at_startup' is true.
23
32
  def run(waw_kernel)
@@ -97,7 +106,7 @@ module Waw
97
106
  function #{action.id}(request_data, form) {
98
107
  $.ajax({type: "POST", url: "#{action.url}", data: request_data, dataType: "json",
99
108
  error: function(data) {
100
- window.location = '/feedback?mkey=server_error';
109
+ window.location = '/500';
101
110
  },
102
111
  success: function(data) {
103
112
  THEEND
@@ -4,6 +4,8 @@ module Waw
4
4
  class Match
5
5
  include Waw::ScopeUtils
6
6
 
7
+ attr_reader :wawaccess
8
+
7
9
  # Served file
8
10
  attr_reader :served_file
9
11
 
@@ -15,13 +17,14 @@ module Waw
15
17
  end
16
18
 
17
19
  # Executes on a wawaccess instance
18
- def __execute
20
+ def __execute(env)
19
21
  instance_exec *@args, &@block
20
22
  end
21
23
 
22
24
  # Delegated to the wawaccess that created me
23
25
  def root; @wawaccess.root; end
24
26
  def folder; @wawaccess.folder; end
27
+ def req_path; @wawaccess.req_path; end
25
28
 
26
29
  ################################################### Callbacks proposed to .wawaccess rules
27
30
 
@@ -0,0 +1,35 @@
1
+ module Waw
2
+ class StaticController < ::Waw::Controller
3
+ class Matcher
4
+ include Waw::ScopeUtils
5
+
6
+ # Waw access on which this matcher is defined
7
+ attr_reader :wawaccess
8
+
9
+ # Matcher's predicate
10
+ attr_reader :predicate
11
+
12
+ # Creates a matcher instance
13
+ def initialize(wawaccess, predicate)
14
+ @wawaccess = wawaccess
15
+ @predicate = predicate
16
+ end
17
+
18
+ # Returns wawaccess's folder
19
+ def folder
20
+ wawaccess.folder
21
+ end
22
+
23
+ # Returns requested path
24
+ def req_path
25
+ wawaccess.req_path
26
+ end
27
+
28
+ # Does the matcher matches a given path?
29
+ def matches?(env)
30
+ instance_eval &predicate
31
+ end
32
+
33
+ end # class Matcher
34
+ end # class StaticController
35
+ end # module Waw
@@ -1,9 +1,8 @@
1
- require 'uri'
2
- require 'waw/controllers/static/match'
3
1
  module Waw
4
2
  class StaticController < ::Waw::Controller
5
3
  # Waw version of .htaccess files
6
4
  class WawAccess
5
+ include Waw::ScopeUtils
7
6
 
8
7
  # The folder which is served
9
8
  attr_accessor :folder
@@ -66,7 +65,7 @@ module Waw
66
65
 
67
66
  def recognized_pattern?(pattern)
68
67
  [FalseClass, TrueClass, String,
69
- Regexp, Waw::Validation::Validator].any?{|c| c===pattern}
68
+ Regexp, Waw::Validation::Validator, StaticController::Matcher].any?{|c| c===pattern}
70
69
  end
71
70
 
72
71
  # Adds a child in the hierarchy
@@ -106,6 +105,10 @@ module Waw
106
105
 
107
106
  ################################################### Utilites about paths
108
107
 
108
+ def req_path
109
+ rack_env['REQ_PATH'] || normalize_req_path(rack_env['PATH_INFO'])
110
+ end
111
+
109
112
  # Returns the real path of a file
110
113
  def realpath(file)
111
114
  File.expand_path(File.join(folder, file))
@@ -164,7 +167,8 @@ module Waw
164
167
  ################################################### .waw access rules application!
165
168
 
166
169
  # Finds the matching block inside this .wawaccess handler
167
- def find_match(path)
170
+ def find_match(env)
171
+ path = env['REQ_PATH']
168
172
  @serve.each do |pattern, block|
169
173
  case pattern
170
174
  when FalseClass
@@ -186,6 +190,10 @@ module Waw
186
190
  if pattern.validate(matching_file(path))
187
191
  return Match.new(self, path, block)
188
192
  end
193
+ when StaticController::Matcher
194
+ if pattern.matches?(env)
195
+ return Match.new(self, path, block)
196
+ end
189
197
  else
190
198
  raise WawError, "Unrecognized wawaccess pattern #{pattern}"
191
199
  end
@@ -194,16 +202,16 @@ module Waw
194
202
  end
195
203
 
196
204
  # Applies the rules defined here or delegate to the parent if allowed
197
- def apply_rules(path)
198
- if match = find_match(path)
199
- match.__execute
205
+ def apply_rules(env)
206
+ if match = find_match(env)
207
+ match.__execute(env)
200
208
  elsif (parent and inherits)
201
- parent.apply_rules(path)
209
+ parent.apply_rules(env)
202
210
  else
203
- body = "File not found: #{path}\n"
211
+ body = "File not found: #{env['PATH_INFO']}\n"
204
212
  [404, {"Content-Type" => "text/plain",
205
- "Content-Length" => body.size.to_s,
206
- "X-Cascade" => "pass"},
213
+ "Content-Length" => body.size.to_s,
214
+ "X-Cascade" => "pass"},
207
215
  [body]]
208
216
  end
209
217
  end
@@ -224,10 +232,15 @@ module Waw
224
232
  end
225
233
 
226
234
  # Serves a path from a root waw access in the hierarchy
227
- def do_path_serve(path)
228
- path = normalize_req_path(path)
229
- waw_access = (find_wawaccess_for(path) || self)
230
- waw_access.apply_rules(path)
235
+ def do_path_serve(path, env = rack_env)
236
+ env['REQ_PATH'] = normalize_req_path(path)
237
+ waw_access = (find_wawaccess_for(env['REQ_PATH']) || self)
238
+ waw_access.apply_rules(env)
239
+ end
240
+
241
+ # Makes a Rack standard call
242
+ def call(env)
243
+ do_path_serve(env['PATH_INFO'], env)
231
244
  end
232
245
 
233
246
  end # class WawAccess
@@ -9,6 +9,7 @@ module Waw
9
9
  def initialize(wawaccess)
10
10
  raise ArgumentError, "wawaccess cannot be nil" unless WawAccess===wawaccess
11
11
  @wawaccess = wawaccess
12
+ @matchers = {}
12
13
  end
13
14
 
14
15
  # Returns a validator that matches the root of the wawaccess tree
@@ -16,6 +17,14 @@ module Waw
16
17
  Waw::Validation.validator{|served_file| File.expand_path(served_file) == File.expand_path(@wawaccess.root.folder)}
17
18
  end
18
19
 
20
+ # Installs a matcher
21
+ def matcher(name, &predicate)
22
+ @matchers[name] = Matcher.new(@wawaccess, predicate)
23
+ (class << self; self; end).send(:define_method,name) do
24
+ @matchers[name]
25
+ end
26
+ end
27
+
19
28
  # Starts a wawaccess file
20
29
  def wawaccess(&block)
21
30
  raise WawError, "#{@wawaccess.identifier}: missing block in wawaccess call" unless block
@@ -1,5 +1,8 @@
1
+ require 'uri'
1
2
  require 'waw/controllers/static/waw_access'
2
3
  require 'waw/controllers/static/waw_access_dsl'
4
+ require 'waw/controllers/static/matcher'
5
+ require 'waw/controllers/static/match'
3
6
  module Waw
4
7
  #
5
8
  # A waw service that serves public pages expressed in wlang wtpl format
@@ -29,8 +32,8 @@ module Waw
29
32
  ##############################################################################################
30
33
 
31
34
  # Executes the service
32
- def execute(env, req, res)
33
- @wawaccess.do_path_serve(env['PATH_INFO'])
35
+ def call(env)
36
+ @wawaccess.call(env)
34
37
  end
35
38
 
36
39
  end # class Controller
@@ -0,0 +1,176 @@
1
+ require 'mechanize'
2
+ require 'waw/crawler/crawler_options'
3
+ require 'waw/crawler/crawler_listener'
4
+ module Waw
5
+ class Crawler
6
+ include Crawler::Options
7
+
8
+ ###################################################################### Internal state
9
+
10
+ # Mechanize agent instance
11
+ attr_reader :agent
12
+
13
+ # Root URI to crawl
14
+ attr_reader :root_uri
15
+
16
+ # Sets the root uri
17
+ def root_uri=(uri)
18
+ @root_uri = if uri.nil?
19
+ "127.0.0.1:9292"
20
+ else
21
+ uri.is_a?(URI) ? uri : URI::parse(uri.to_s)
22
+ end
23
+ end
24
+
25
+ # Stack of files/pages to visit
26
+ attr_reader :stack
27
+
28
+ ###################################################################### About URI visit state
29
+
30
+ # URI statuses
31
+ attr_reader :uristate
32
+
33
+ #
34
+ PINGED = 1
35
+ PENDING = 2
36
+ CHECKING = 4
37
+ CHECKED = 8
38
+
39
+ # Marks an URI as currently pending
40
+ def pending!(uri)
41
+ uristate[uri] |= PENDING
42
+ end
43
+
44
+ # Marks an URI as being pinged
45
+ def pinged!(uri)
46
+ uristate[uri] |= PINGED
47
+ end
48
+
49
+ ###################################################################### Initialization
50
+
51
+ # Creates a crawler instance on a root URI
52
+ def initialize(root_uri = nil)
53
+ self.root_uri = root_uri
54
+ set_default_options
55
+ end
56
+
57
+ ###################################################################### Utils
58
+
59
+ # Returns true if a given page is internal to the website currently
60
+ # crawled
61
+ def internal_uri?(uri)
62
+ uri.host.nil? or ((uri.host == root_uri.host) and (uri.port == root_uri.port))
63
+ end
64
+
65
+ # Resolves as an absolute URI something that has been found on
66
+ # a page
67
+ def resolve_uri(href_or_src, page)
68
+ URI::parse(agent.send(:resolve, href_or_src, page))
69
+ end
70
+
71
+ ###################################################################### Crawling
72
+
73
+ # Starts the crawling
74
+ def crawl
75
+ @agent = Mechanize.new
76
+ @uristate = Hash.new{|h,k| h[k] = 0}
77
+ @stack = [ agent.get(root_uri) ]
78
+ until stack.empty?
79
+ to_check = stack.shift
80
+ case to_check
81
+ when ::Mechanize::Page
82
+ check_web_page(to_check)
83
+ else
84
+ listener.doc_skipped(to_check)
85
+ end
86
+ end
87
+ @agent = nil
88
+ @uristate = nil
89
+ @stack = nil
90
+ end
91
+
92
+ def crawl_all(query, referer_page)
93
+ referer_page.search(query).each do |loc|
94
+ crawl_one(loc, referer_page)
95
+ end
96
+ end
97
+
98
+ def crawl_one(location, referer_page)
99
+ uri = resolve_uri(location, referer_page)
100
+
101
+ # Bypass PENDING/CHECKING/CHECKED links
102
+ if uristate[uri] < PENDING
103
+
104
+ # Mark it as PENDING now
105
+ pending!(uri)
106
+
107
+ # Mark as to crawl by pushing on the stack
108
+ if internal_uri?(uri)
109
+ stack.push(agent.get(uri))
110
+ else
111
+ listener.crawl_skipped(referer_page, location)
112
+ end
113
+
114
+ end
115
+ rescue => ex
116
+ handle_error(ex, referer_page, location)
117
+ end
118
+
119
+ ###################################################################### Checking
120
+
121
+ def check_web_page(page)
122
+ uristate[page.uri] |= CHECKING
123
+ listener.checking(page){
124
+ # Make ping checks
125
+ all_ping!(ping_list.join(', '), page)
126
+ # Crawl all links now
127
+ crawl_all(crawl_list.join(', '), page)
128
+ }
129
+ uristate[page.uri] |= CHECKED
130
+ end
131
+
132
+ ###################################################################### Pinging
133
+
134
+ def all_ping!(query, referer_page)
135
+ referer_page.search(query).each do |loc|
136
+ ping!(loc, referer_page)
137
+ end
138
+ end
139
+
140
+ def ping!(loc, referer_page)
141
+ uri = resolve_uri(loc, referer_page)
142
+
143
+ # Only ping uri that are not PINGED/PENDING/CHECKING/CHECKED
144
+ return unless uristate[uri] < PINGED
145
+
146
+ # bypass externals if required
147
+ if internal_uri?(uri) || check_externals
148
+ agent.head(uri) # ping!
149
+ pinged!(uri)
150
+ listener.ping_ok(referer_page, loc)
151
+ else
152
+ listener.ping_skipped(referer_page, loc)
153
+ end
154
+
155
+ rescue => ex
156
+ handle_error(ex, referer_page, loc)
157
+ end
158
+
159
+ ###################################################################### Error handling
160
+
161
+ # Handles errors that occur
162
+ def handle_error(ex, referer_page, loc)
163
+ case ex
164
+ when Mechanize::ResponseCodeError
165
+ listener.reach_failure(referer_page, loc, ex)
166
+ when Mechanize::UnsupportedSchemeError
167
+ listener.scheme_failure(referer_page, loc, ex)
168
+ when SocketError
169
+ listener.socket_error(referer_page, loc, ex)
170
+ else
171
+ raise ex
172
+ end
173
+ end
174
+
175
+ end # class Crawler
176
+ end # module Waw