waw 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. data/bin/waw-crawl +26 -0
  2. data/lib/waw.rb +3 -3
  3. data/lib/waw/commands/command.rb +12 -4
  4. data/lib/waw/commands/crawl_command.rb +46 -0
  5. data/lib/waw/commands/profile_command.rb +5 -1
  6. data/lib/waw/controllers/action/action.rb +11 -0
  7. data/lib/waw/controllers/action/js_generation.rb +11 -2
  8. data/lib/waw/controllers/static/match.rb +4 -1
  9. data/lib/waw/controllers/static/matcher.rb +35 -0
  10. data/lib/waw/controllers/static/waw_access.rb +28 -15
  11. data/lib/waw/controllers/static/waw_access_dsl.rb +9 -0
  12. data/lib/waw/controllers/static_controller.rb +5 -2
  13. data/lib/waw/crawler.rb +176 -0
  14. data/lib/waw/crawler/crawler_listener.rb +64 -0
  15. data/lib/waw/crawler/crawler_options.rb +42 -0
  16. data/lib/waw/kern/living_state.rb +93 -0
  17. data/lib/waw/scope_utils.rb +1 -0
  18. data/lib/waw/tools/mail/mail_agent.rb +1 -1
  19. data/lib/waw/validation.rb +3 -0
  20. data/lib/waw/validation/datetime_validator.rb +53 -0
  21. data/lib/waw/wspec/browser.rb +4 -2
  22. data/test/bricks/error_handler/test/test_all.rb +20 -0
  23. data/test/bricks/static_controller/config/test.cfg +2 -0
  24. data/test/bricks/static_controller/logs/webapp.log +84 -0
  25. data/test/bricks/static_controller/test/static_controller.wspec +12 -0
  26. data/test/bricks/static_controller/test/test_all.rb +20 -0
  27. data/test/bricks/static_controller/waw.deploy +1 -0
  28. data/test/bricks/static_controller/waw.routing +5 -0
  29. data/test/bricks/test_all.rb +8 -0
  30. data/test/spec/controllers/static/waw_access_spec.rb +3 -3
  31. data/test/spec/test_all.rb +0 -2
  32. data/test/spec/validation/datetime_validation_spec.rb +92 -0
  33. data/test/unit/test_all.rb +1 -0
  34. data/test/unit/waw/controllers/static/logs/webapp.log +22 -0
  35. metadata +23 -6
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Waw - making web development simple
4
+ # (see lib/waw/waw.rb for more information)
5
+ #
6
+ # Copyright (c) 2010 University of Louvain, Bernard & Louis Lambeau
7
+ # Released under a MIT or Ruby licence
8
+ #
9
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
10
+ require 'waw'
11
+ require 'waw/commands/command'
12
+ require 'waw/commands/crawl_command'
13
+
14
+ begin
15
+ r = Waw::Commands::CrawlCommand.new.run '.', ARGV
16
+ rescue Interrupt => e
17
+ $stderr.puts
18
+ $stderr.puts "Interrupted"
19
+ raise e
20
+ rescue OptionParser::ParseError => e
21
+ $stderr.puts e.message
22
+ raise e
23
+ rescue => e
24
+ $stderr.puts e.message
25
+ raise e
26
+ end
data/lib/waw.rb CHANGED
@@ -9,13 +9,13 @@ module Waw
9
9
 
10
10
  # Requirements on gems for this version
11
11
  GEM_REQUIREMENTS = {
12
- :rack => '>= 1.1.0',
12
+ :rack => '>= 1.2.1',
13
13
  :wlang => '>= 0.9.0',
14
14
  :json => '>= 1.1.9'
15
15
  }
16
16
 
17
17
  # Waw version
18
- VERSION = "0.2.2".freeze
18
+ VERSION = "0.3.0".freeze
19
19
 
20
20
  # Waw loading mutex
21
21
  WAW_KERNELS_LOCK = Mutex.new
@@ -96,4 +96,4 @@ require 'waw/controllers/action_controller'
96
96
  require 'waw/controllers/static_controller'
97
97
  require 'waw/controllers/no_cache'
98
98
  require 'waw/controllers/error_handler'
99
- require 'waw/routing'
99
+ require 'waw/routing'
@@ -5,14 +5,14 @@ module Waw
5
5
  class Command
6
6
 
7
7
  # The verbose level
8
- attr_accessor :verbose
8
+ attr_accessor :verbosity
9
9
 
10
10
  # Show stack traces?
11
11
  attr_accessor :trace
12
12
 
13
13
  # Creates an empty command instance
14
14
  def initialize
15
- @verbose = false
15
+ @verbosity = 1
16
16
  @buffer = STDOUT
17
17
  end
18
18
 
@@ -35,8 +35,12 @@ module Waw
35
35
  @trace = true
36
36
  end
37
37
 
38
- opt.on("--verbose", "-v", "Display extra progress as we progress") do |value|
39
- @verbose = true
38
+ opt.on("--verbose", "Display extra info as we progress") do |value|
39
+ @verbosity = 2
40
+ end
41
+
42
+ opt.on("--silent", "Be quiet silent") do |value|
43
+ @verbosity = 0
40
44
  end
41
45
 
42
46
  # No argument, shows at tail. This will print an options summary.
@@ -55,6 +59,10 @@ module Waw
55
59
  opt.separator nil
56
60
  end
57
61
  end
62
+
63
+ def verbose
64
+ verbosity < 0
65
+ end
58
66
 
59
67
  # Runs the command
60
68
  def run(requester_file, argv)
@@ -0,0 +1,46 @@
1
+ require 'waw/crawler'
2
+ module Waw
3
+ module Commands
4
+ class CrawlCommand < Command
5
+
6
+ def banner
7
+ <<-EOF
8
+ Usage: waw-crawl [options] WEB_SITE_URI
9
+ EOF
10
+ end
11
+
12
+ # Start command is always safe
13
+ def check_command_policy
14
+ true
15
+ end
16
+
17
+ # Adds the options
18
+ def add_options(options)
19
+ @crawler = Waw::Crawler.new(nil)
20
+ options.on("--[no-]check-externals", "[Don't] ping any external link") do |value|
21
+ @crawler.check_externals = value
22
+ end
23
+ options.on("--[no-]check-img", "[Don't] check image <img src='...'>") do |value|
24
+ @crawler.ping_on('img/@src', value)
25
+ end
26
+ options.on("--[no-]check-link", "[Don't] check <link href='...'>") do |value|
27
+ @crawler.ping_on('link/@href', value)
28
+ end
29
+ options.on("--[no-]check-script", "[Don't] check <script src='...'>") do |value|
30
+ @crawler.ping_on('script/@src', value)
31
+ end
32
+ end
33
+
34
+ # Runs the sub-class defined command
35
+ def __run(requester_file, arguments)
36
+ exit(nil, true) unless (arguments.size == 1)
37
+ @crawler.root_uri = arguments[0]
38
+ @crawler.listener.verbosity = @verbosity
39
+ @crawler.crawl
40
+ rescue Interrupt => ex
41
+ info "waw-crawl stopping now... ciao!" if verbose
42
+ end
43
+
44
+ end # module ProfileCommand
45
+ end # module Commands
46
+ end # module Waw
@@ -31,7 +31,11 @@ module Waw
31
31
  else
32
32
  puts "Visiting #{location}" if verbose
33
33
  browser.all_internal_links.each do |link|
34
- visit(browser, link[:href], visited)
34
+ begin
35
+ visit(browser, link[:href], visited)
36
+ rescue URI::InvalidURIError => ex
37
+ puts "Hohoho, I've found something really wrong #{link[:href]}"
38
+ end
35
39
  end
36
40
  end
37
41
  end
@@ -43,6 +43,17 @@ module Waw
43
43
  "javascript:#{id}({#{buffer[2..-1]}}, '##{id}')"
44
44
  end
45
45
 
46
+ # Factors the ajax code for the action itself
47
+ def ajax_action_code
48
+ js = Waw::ActionController::JSGeneration.new
49
+ code = js.generate_js_for_action(Waw::kernel, self, "")
50
+ <<-EOF
51
+ <script type="text/javascript">
52
+ #{code}
53
+ </script>
54
+ EOF
55
+ end
56
+
46
57
  # Factors the ajax code for preparing a formulary
47
58
  def ajax_form_preparer(opts = {})
48
59
  form_id = opts[:form_id] || id
@@ -1,6 +1,6 @@
1
1
  module Waw
2
2
  class ActionController < ::Waw::Controller
3
- class JSGeneration
3
+ class JSGeneration < ::Waw::Controller
4
4
 
5
5
  # Header of the generated javascript file
6
6
  HEADER = <<-EOF
@@ -18,6 +18,15 @@ module Waw
18
18
  raise ConfigurationError, msg
19
19
  end
20
20
 
21
+ # Acts as a waw controller
22
+ def call(env)
23
+ buffer = []
24
+ generate_js(kernel, buffer)
25
+ [ 200,
26
+ {'Content-Type' => 'application/javascript'},
27
+ buffer ]
28
+ end
29
+
21
30
  # Start hook start callback required by Waw. Generates the javascript code
22
31
  # if the configuration variable 'code_at_startup' is true.
23
32
  def run(waw_kernel)
@@ -97,7 +106,7 @@ module Waw
97
106
  function #{action.id}(request_data, form) {
98
107
  $.ajax({type: "POST", url: "#{action.url}", data: request_data, dataType: "json",
99
108
  error: function(data) {
100
- window.location = '/feedback?mkey=server_error';
109
+ window.location = '/500';
101
110
  },
102
111
  success: function(data) {
103
112
  THEEND
@@ -4,6 +4,8 @@ module Waw
4
4
  class Match
5
5
  include Waw::ScopeUtils
6
6
 
7
+ attr_reader :wawaccess
8
+
7
9
  # Served file
8
10
  attr_reader :served_file
9
11
 
@@ -15,13 +17,14 @@ module Waw
15
17
  end
16
18
 
17
19
  # Executes on a wawaccess instance
18
- def __execute
20
+ def __execute(env)
19
21
  instance_exec *@args, &@block
20
22
  end
21
23
 
22
24
  # Delegated to the wawaccess that created me
23
25
  def root; @wawaccess.root; end
24
26
  def folder; @wawaccess.folder; end
27
+ def req_path; @wawaccess.req_path; end
25
28
 
26
29
  ################################################### Callbacks proposed to .wawaccess rules
27
30
 
@@ -0,0 +1,35 @@
1
+ module Waw
2
+ class StaticController < ::Waw::Controller
3
+ class Matcher
4
+ include Waw::ScopeUtils
5
+
6
+ # Waw access on which this matcher is defined
7
+ attr_reader :wawaccess
8
+
9
+ # Matcher's predicate
10
+ attr_reader :predicate
11
+
12
+ # Creates a matcher instance
13
+ def initialize(wawaccess, predicate)
14
+ @wawaccess = wawaccess
15
+ @predicate = predicate
16
+ end
17
+
18
+ # Returns wawaccess's folder
19
+ def folder
20
+ wawaccess.folder
21
+ end
22
+
23
+ # Returns requested path
24
+ def req_path
25
+ wawaccess.req_path
26
+ end
27
+
28
+ # Does the matcher matches a given path?
29
+ def matches?(env)
30
+ instance_eval &predicate
31
+ end
32
+
33
+ end # class Matcher
34
+ end # class StaticController
35
+ end # module Waw
@@ -1,9 +1,8 @@
1
- require 'uri'
2
- require 'waw/controllers/static/match'
3
1
  module Waw
4
2
  class StaticController < ::Waw::Controller
5
3
  # Waw version of .htaccess files
6
4
  class WawAccess
5
+ include Waw::ScopeUtils
7
6
 
8
7
  # The folder which is served
9
8
  attr_accessor :folder
@@ -66,7 +65,7 @@ module Waw
66
65
 
67
66
  def recognized_pattern?(pattern)
68
67
  [FalseClass, TrueClass, String,
69
- Regexp, Waw::Validation::Validator].any?{|c| c===pattern}
68
+ Regexp, Waw::Validation::Validator, StaticController::Matcher].any?{|c| c===pattern}
70
69
  end
71
70
 
72
71
  # Adds a child in the hierarchy
@@ -106,6 +105,10 @@ module Waw
106
105
 
107
106
  ################################################### Utilites about paths
108
107
 
108
+ def req_path
109
+ rack_env['REQ_PATH'] || normalize_req_path(rack_env['PATH_INFO'])
110
+ end
111
+
109
112
  # Returns the real path of a file
110
113
  def realpath(file)
111
114
  File.expand_path(File.join(folder, file))
@@ -164,7 +167,8 @@ module Waw
164
167
  ################################################### .waw access rules application!
165
168
 
166
169
  # Finds the matching block inside this .wawaccess handler
167
- def find_match(path)
170
+ def find_match(env)
171
+ path = env['REQ_PATH']
168
172
  @serve.each do |pattern, block|
169
173
  case pattern
170
174
  when FalseClass
@@ -186,6 +190,10 @@ module Waw
186
190
  if pattern.validate(matching_file(path))
187
191
  return Match.new(self, path, block)
188
192
  end
193
+ when StaticController::Matcher
194
+ if pattern.matches?(env)
195
+ return Match.new(self, path, block)
196
+ end
189
197
  else
190
198
  raise WawError, "Unrecognized wawaccess pattern #{pattern}"
191
199
  end
@@ -194,16 +202,16 @@ module Waw
194
202
  end
195
203
 
196
204
  # Applies the rules defined here or delegate to the parent if allowed
197
- def apply_rules(path)
198
- if match = find_match(path)
199
- match.__execute
205
+ def apply_rules(env)
206
+ if match = find_match(env)
207
+ match.__execute(env)
200
208
  elsif (parent and inherits)
201
- parent.apply_rules(path)
209
+ parent.apply_rules(env)
202
210
  else
203
- body = "File not found: #{path}\n"
211
+ body = "File not found: #{env['PATH_INFO']}\n"
204
212
  [404, {"Content-Type" => "text/plain",
205
- "Content-Length" => body.size.to_s,
206
- "X-Cascade" => "pass"},
213
+ "Content-Length" => body.size.to_s,
214
+ "X-Cascade" => "pass"},
207
215
  [body]]
208
216
  end
209
217
  end
@@ -224,10 +232,15 @@ module Waw
224
232
  end
225
233
 
226
234
  # Serves a path from a root waw access in the hierarchy
227
- def do_path_serve(path)
228
- path = normalize_req_path(path)
229
- waw_access = (find_wawaccess_for(path) || self)
230
- waw_access.apply_rules(path)
235
+ def do_path_serve(path, env = rack_env)
236
+ env['REQ_PATH'] = normalize_req_path(path)
237
+ waw_access = (find_wawaccess_for(env['REQ_PATH']) || self)
238
+ waw_access.apply_rules(env)
239
+ end
240
+
241
+ # Makes a Rack standard call
242
+ def call(env)
243
+ do_path_serve(env['PATH_INFO'], env)
231
244
  end
232
245
 
233
246
  end # class WawAccess
@@ -9,6 +9,7 @@ module Waw
9
9
  def initialize(wawaccess)
10
10
  raise ArgumentError, "wawaccess cannot be nil" unless WawAccess===wawaccess
11
11
  @wawaccess = wawaccess
12
+ @matchers = {}
12
13
  end
13
14
 
14
15
  # Returns a validator that matches the root of the wawaccess tree
@@ -16,6 +17,14 @@ module Waw
16
17
  Waw::Validation.validator{|served_file| File.expand_path(served_file) == File.expand_path(@wawaccess.root.folder)}
17
18
  end
18
19
 
20
+ # Installs a matcher
21
+ def matcher(name, &predicate)
22
+ @matchers[name] = Matcher.new(@wawaccess, predicate)
23
+ (class << self; self; end).send(:define_method,name) do
24
+ @matchers[name]
25
+ end
26
+ end
27
+
19
28
  # Starts a wawaccess file
20
29
  def wawaccess(&block)
21
30
  raise WawError, "#{@wawaccess.identifier}: missing block in wawaccess call" unless block
@@ -1,5 +1,8 @@
1
+ require 'uri'
1
2
  require 'waw/controllers/static/waw_access'
2
3
  require 'waw/controllers/static/waw_access_dsl'
4
+ require 'waw/controllers/static/matcher'
5
+ require 'waw/controllers/static/match'
3
6
  module Waw
4
7
  #
5
8
  # A waw service that serves public pages expressed in wlang wtpl format
@@ -29,8 +32,8 @@ module Waw
29
32
  ##############################################################################################
30
33
 
31
34
  # Executes the service
32
- def execute(env, req, res)
33
- @wawaccess.do_path_serve(env['PATH_INFO'])
35
+ def call(env)
36
+ @wawaccess.call(env)
34
37
  end
35
38
 
36
39
  end # class Controller
@@ -0,0 +1,176 @@
1
+ require 'mechanize'
2
+ require 'waw/crawler/crawler_options'
3
+ require 'waw/crawler/crawler_listener'
4
+ module Waw
5
+ class Crawler
6
+ include Crawler::Options
7
+
8
+ ###################################################################### Internal state
9
+
10
+ # Mechanize agent instance
11
+ attr_reader :agent
12
+
13
+ # Root URI to crawl
14
+ attr_reader :root_uri
15
+
16
+ # Sets the root uri
17
+ def root_uri=(uri)
18
+ @root_uri = if uri.nil?
19
+ "127.0.0.1:9292"
20
+ else
21
+ uri.is_a?(URI) ? uri : URI::parse(uri.to_s)
22
+ end
23
+ end
24
+
25
+ # Stack of files/pages to visit
26
+ attr_reader :stack
27
+
28
+ ###################################################################### About URI visit state
29
+
30
+ # URI statuses
31
+ attr_reader :uristate
32
+
33
+ #
34
+ PINGED = 1
35
+ PENDING = 2
36
+ CHECKING = 4
37
+ CHECKED = 8
38
+
39
+ # Marks an URI as currently pending
40
+ def pending!(uri)
41
+ uristate[uri] |= PENDING
42
+ end
43
+
44
+ # Marks an URI as being pinged
45
+ def pinged!(uri)
46
+ uristate[uri] |= PINGED
47
+ end
48
+
49
+ ###################################################################### Initialization
50
+
51
+ # Creates a crawler instance on a root URI
52
+ def initialize(root_uri = nil)
53
+ self.root_uri = root_uri
54
+ set_default_options
55
+ end
56
+
57
+ ###################################################################### Utils
58
+
59
+ # Returns true if a given page is internal to the website currently
60
+ # crawled
61
+ def internal_uri?(uri)
62
+ uri.host.nil? or ((uri.host == root_uri.host) and (uri.port == root_uri.port))
63
+ end
64
+
65
+ # Resolves as an absolute URI something that has been found on
66
+ # a page
67
+ def resolve_uri(href_or_src, page)
68
+ URI::parse(agent.send(:resolve, href_or_src, page))
69
+ end
70
+
71
+ ###################################################################### Crawling
72
+
73
+ # Starts the crawling
74
+ def crawl
75
+ @agent = Mechanize.new
76
+ @uristate = Hash.new{|h,k| h[k] = 0}
77
+ @stack = [ agent.get(root_uri) ]
78
+ until stack.empty?
79
+ to_check = stack.shift
80
+ case to_check
81
+ when ::Mechanize::Page
82
+ check_web_page(to_check)
83
+ else
84
+ listener.doc_skipped(to_check)
85
+ end
86
+ end
87
+ @agent = nil
88
+ @uristate = nil
89
+ @stack = nil
90
+ end
91
+
92
+ def crawl_all(query, referer_page)
93
+ referer_page.search(query).each do |loc|
94
+ crawl_one(loc, referer_page)
95
+ end
96
+ end
97
+
98
+ def crawl_one(location, referer_page)
99
+ uri = resolve_uri(location, referer_page)
100
+
101
+ # Bypass PENDING/CHECKING/CHECKED links
102
+ if uristate[uri] < PENDING
103
+
104
+ # Mark it as PENDING now
105
+ pending!(uri)
106
+
107
+ # Mark as to crawl by pushing on the stack
108
+ if internal_uri?(uri)
109
+ stack.push(agent.get(uri))
110
+ else
111
+ listener.crawl_skipped(referer_page, location)
112
+ end
113
+
114
+ end
115
+ rescue => ex
116
+ handle_error(ex, referer_page, location)
117
+ end
118
+
119
+ ###################################################################### Checking
120
+
121
+ def check_web_page(page)
122
+ uristate[page.uri] |= CHECKING
123
+ listener.checking(page){
124
+ # Make ping checks
125
+ all_ping!(ping_list.join(', '), page)
126
+ # Crawl all links now
127
+ crawl_all(crawl_list.join(', '), page)
128
+ }
129
+ uristate[page.uri] |= CHECKED
130
+ end
131
+
132
+ ###################################################################### Pinging
133
+
134
+ def all_ping!(query, referer_page)
135
+ referer_page.search(query).each do |loc|
136
+ ping!(loc, referer_page)
137
+ end
138
+ end
139
+
140
+ def ping!(loc, referer_page)
141
+ uri = resolve_uri(loc, referer_page)
142
+
143
+ # Only ping uri that are not PINGED/PENDING/CHECKING/CHECKED
144
+ return unless uristate[uri] < PINGED
145
+
146
+ # bypass externals if required
147
+ if internal_uri?(uri) || check_externals
148
+ agent.head(uri) # ping!
149
+ pinged!(uri)
150
+ listener.ping_ok(referer_page, loc)
151
+ else
152
+ listener.ping_skipped(referer_page, loc)
153
+ end
154
+
155
+ rescue => ex
156
+ handle_error(ex, referer_page, loc)
157
+ end
158
+
159
+ ###################################################################### Error handling
160
+
161
+ # Handles errors that occur
162
+ def handle_error(ex, referer_page, loc)
163
+ case ex
164
+ when Mechanize::ResponseCodeError
165
+ listener.reach_failure(referer_page, loc, ex)
166
+ when Mechanize::UnsupportedSchemeError
167
+ listener.scheme_failure(referer_page, loc, ex)
168
+ when SocketError
169
+ listener.socket_error(referer_page, loc, ex)
170
+ else
171
+ raise ex
172
+ end
173
+ end
174
+
175
+ end # class Crawler
176
+ end # module Waw