raw 0.49.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (148) hide show
  1. data/doc/CONTRIBUTORS +106 -0
  2. data/doc/LICENSE +32 -0
  3. data/doc/coding_conventions.txt +11 -0
  4. data/lib/raw.rb +42 -0
  5. data/lib/raw/adapter.rb +113 -0
  6. data/lib/raw/adapter/cgi.rb +41 -0
  7. data/lib/raw/adapter/fastcgi.rb +48 -0
  8. data/lib/raw/adapter/mongrel.rb +146 -0
  9. data/lib/raw/adapter/script.rb +94 -0
  10. data/lib/raw/adapter/webrick.rb +144 -0
  11. data/lib/raw/adapter/webrick/vcr.rb +91 -0
  12. data/lib/raw/cgi.rb +323 -0
  13. data/lib/raw/cgi/cookie.rb +47 -0
  14. data/lib/raw/cgi/http.rb +62 -0
  15. data/lib/raw/compiler.rb +138 -0
  16. data/lib/raw/compiler/filter/cleanup.rb +21 -0
  17. data/lib/raw/compiler/filter/elements.rb +166 -0
  18. data/lib/raw/compiler/filter/elements/element.rb +210 -0
  19. data/lib/raw/compiler/filter/localization.rb +23 -0
  20. data/lib/raw/compiler/filter/markup.rb +32 -0
  21. data/lib/raw/compiler/filter/morph.rb +123 -0
  22. data/lib/raw/compiler/filter/morph/each.rb +34 -0
  23. data/lib/raw/compiler/filter/morph/for.rb +11 -0
  24. data/lib/raw/compiler/filter/morph/if.rb +26 -0
  25. data/lib/raw/compiler/filter/morph/selected_if.rb +43 -0
  26. data/lib/raw/compiler/filter/morph/standard.rb +55 -0
  27. data/lib/raw/compiler/filter/morph/times.rb +27 -0
  28. data/lib/raw/compiler/filter/script.rb +116 -0
  29. data/lib/raw/compiler/filter/squeeze.rb +16 -0
  30. data/lib/raw/compiler/filter/static_include.rb +74 -0
  31. data/lib/raw/compiler/filter/template.rb +121 -0
  32. data/lib/raw/compiler/reloader.rb +96 -0
  33. data/lib/raw/context.rb +154 -0
  34. data/lib/raw/context/flash.rb +157 -0
  35. data/lib/raw/context/global.rb +88 -0
  36. data/lib/raw/context/request.rb +338 -0
  37. data/lib/raw/context/response.rb +57 -0
  38. data/lib/raw/context/session.rb +198 -0
  39. data/lib/raw/context/session/drb.rb +11 -0
  40. data/lib/raw/context/session/file.rb +15 -0
  41. data/lib/raw/context/session/memcached.rb +13 -0
  42. data/lib/raw/context/session/memory.rb +12 -0
  43. data/lib/raw/context/session/og.rb +15 -0
  44. data/lib/raw/context/session/pstore.rb +13 -0
  45. data/lib/raw/control.rb +18 -0
  46. data/lib/raw/control/attribute.rb +91 -0
  47. data/lib/raw/control/attribute/checkbox.rb +25 -0
  48. data/lib/raw/control/attribute/datetime.rb +21 -0
  49. data/lib/raw/control/attribute/file.rb +20 -0
  50. data/lib/raw/control/attribute/fixnum.rb +26 -0
  51. data/lib/raw/control/attribute/float.rb +26 -0
  52. data/lib/raw/control/attribute/options.rb +38 -0
  53. data/lib/raw/control/attribute/password.rb +16 -0
  54. data/lib/raw/control/attribute/text.rb +16 -0
  55. data/lib/raw/control/attribute/textarea.rb +16 -0
  56. data/lib/raw/control/none.rb +16 -0
  57. data/lib/raw/control/relation.rb +59 -0
  58. data/lib/raw/control/relation/belongs_to.rb +0 -0
  59. data/lib/raw/control/relation/has_many.rb +97 -0
  60. data/lib/raw/control/relation/joins_many.rb +0 -0
  61. data/lib/raw/control/relation/many_to_many.rb +0 -0
  62. data/lib/raw/control/relation/refers_to.rb +29 -0
  63. data/lib/raw/controller.rb +37 -0
  64. data/lib/raw/controller/publishable.rb +160 -0
  65. data/lib/raw/dispatcher.rb +209 -0
  66. data/lib/raw/dispatcher/format.rb +108 -0
  67. data/lib/raw/dispatcher/format/atom.rb +31 -0
  68. data/lib/raw/dispatcher/format/css.rb +0 -0
  69. data/lib/raw/dispatcher/format/html.rb +42 -0
  70. data/lib/raw/dispatcher/format/json.rb +31 -0
  71. data/lib/raw/dispatcher/format/rss.rb +33 -0
  72. data/lib/raw/dispatcher/format/xoxo.rb +31 -0
  73. data/lib/raw/dispatcher/mounter.rb +60 -0
  74. data/lib/raw/dispatcher/router.rb +111 -0
  75. data/lib/raw/errors.rb +19 -0
  76. data/lib/raw/helper.rb +86 -0
  77. data/lib/raw/helper/benchmark.rb +23 -0
  78. data/lib/raw/helper/buffer.rb +60 -0
  79. data/lib/raw/helper/cookie.rb +32 -0
  80. data/lib/raw/helper/debug.rb +28 -0
  81. data/lib/raw/helper/default.rb +16 -0
  82. data/lib/raw/helper/feed.rb +451 -0
  83. data/lib/raw/helper/form.rb +284 -0
  84. data/lib/raw/helper/javascript.rb +59 -0
  85. data/lib/raw/helper/layout.rb +40 -0
  86. data/lib/raw/helper/navigation.rb +87 -0
  87. data/lib/raw/helper/pager.rb +305 -0
  88. data/lib/raw/helper/table.rb +247 -0
  89. data/lib/raw/helper/xhtml.rb +218 -0
  90. data/lib/raw/helper/xml.rb +125 -0
  91. data/lib/raw/mixin/magick.rb +35 -0
  92. data/lib/raw/mixin/sweeper.rb +71 -0
  93. data/lib/raw/mixin/thumbnails.rb +1 -0
  94. data/lib/raw/mixin/webfile.rb +165 -0
  95. data/lib/raw/render.rb +271 -0
  96. data/lib/raw/render/builder.rb +26 -0
  97. data/lib/raw/render/caching.rb +81 -0
  98. data/lib/raw/render/call.rb +43 -0
  99. data/lib/raw/render/send_file.rb +46 -0
  100. data/lib/raw/render/stream.rb +39 -0
  101. data/lib/raw/scaffold.rb +13 -0
  102. data/lib/raw/scaffold/controller.rb +25 -0
  103. data/lib/raw/scaffold/model.rb +157 -0
  104. data/lib/raw/test.rb +5 -0
  105. data/lib/raw/test/assertions.rb +169 -0
  106. data/lib/raw/test/context.rb +55 -0
  107. data/lib/raw/test/testcase.rb +79 -0
  108. data/lib/raw/util/attr.rb +128 -0
  109. data/lib/raw/util/encode_uri.rb +149 -0
  110. data/lib/raw/util/html_filter.rb +538 -0
  111. data/lib/raw/util/markup.rb +130 -0
  112. data/test/glue/tc_webfile.rb +1 -0
  113. data/test/nitro/CONFIG.rb +3 -0
  114. data/test/nitro/adapter/raw_post1.bin +9 -0
  115. data/test/nitro/adapter/tc_webrick.rb +16 -0
  116. data/test/nitro/cgi/tc_cookie.rb +14 -0
  117. data/test/nitro/cgi/tc_request.rb +61 -0
  118. data/test/nitro/compiler/tc_client_morpher.rb +47 -0
  119. data/test/nitro/compiler/tc_compiler.rb +25 -0
  120. data/test/nitro/dispatcher/tc_mounter.rb +47 -0
  121. data/test/nitro/helper/tc_feed.rb +135 -0
  122. data/test/nitro/helper/tc_navbar.rb +74 -0
  123. data/test/nitro/helper/tc_pager.rb +35 -0
  124. data/test/nitro/helper/tc_table.rb +68 -0
  125. data/test/nitro/helper/tc_xhtml.rb +19 -0
  126. data/test/nitro/tc_caching.rb +19 -0
  127. data/test/nitro/tc_cgi.rb +222 -0
  128. data/test/nitro/tc_context.rb +17 -0
  129. data/test/nitro/tc_controller.rb +103 -0
  130. data/test/nitro/tc_controller_aspect.rb +32 -0
  131. data/test/nitro/tc_controller_params.rb +885 -0
  132. data/test/nitro/tc_dispatcher.rb +109 -0
  133. data/test/nitro/tc_element.rb +85 -0
  134. data/test/nitro/tc_flash.rb +59 -0
  135. data/test/nitro/tc_helper.rb +47 -0
  136. data/test/nitro/tc_render.rb +119 -0
  137. data/test/nitro/tc_router.rb +61 -0
  138. data/test/nitro/tc_server.rb +35 -0
  139. data/test/nitro/tc_session.rb +66 -0
  140. data/test/nitro/tc_template.rb +71 -0
  141. data/test/nitro/util/tc_encode_url.rb +87 -0
  142. data/test/nitro/util/tc_markup.rb +31 -0
  143. data/test/public/blog/another/very_litle/index.xhtml +1 -0
  144. data/test/public/blog/inc1.xhtml +2 -0
  145. data/test/public/blog/inc2.xhtml +1 -0
  146. data/test/public/blog/list.xhtml +9 -0
  147. data/test/public/dummy_mailer/registration.xhtml +5 -0
  148. metadata +244 -0
@@ -0,0 +1,55 @@
1
+ require 'test/unit'
2
+ require 'test/unit/assertions'
3
+ require 'rexml/document'
4
+
5
+ require 'raw/context'
6
+
7
+ module Raw
8
+
9
+ # Override the default Request implementation
10
+ # to include methods useful for testing.
11
+
12
+ module Request
13
+ end
14
+
15
+ # Override the default Response implementation
16
+ # to include methods useful for testing.
17
+
18
+ module Response
19
+
20
+ def status_ok?
21
+ @status == 200
22
+ end
23
+
24
+ def redirect?
25
+ (300..399).include?(@status)
26
+ end
27
+
28
+ def redirect_uri
29
+ @response_headers['location']
30
+ end
31
+
32
+ def response_cookie(name)
33
+ return nil unless @response_cookies
34
+ @response_cookies.find { |c| c.name == name }
35
+ end
36
+
37
+ end
38
+
39
+ # Override the default Context implementation
40
+ # to include methods useful for testing.
41
+
42
+ class Context
43
+ attr_writer :session, :cookies
44
+
45
+ def session
46
+ @session || @session = {}
47
+ end
48
+
49
+ def cookies
50
+ @cookies || @cookies = {}
51
+ end
52
+
53
+ end
54
+
55
+ end
@@ -0,0 +1,79 @@
1
+ require 'ostruct'
2
+
3
+ require 'test/unit'
4
+ require 'test/unit/assertions'
5
+ require 'rexml/document'
6
+
7
+ require 'glue'
8
+ require 'nitro/test/context'
9
+
10
+ module Test::Unit
11
+
12
+ class TestCase
13
+ include Nitro
14
+
15
+ def reset_context
16
+ @context_config = OpenStruct.new(
17
+ :dispatcher => Nitro::Dispatcher.new(Nitro::Server.map)
18
+ )
19
+ @context = Nitro::Context.new(@context_config)
20
+ end
21
+
22
+ # Send a request to the controller. Alternatively you can use
23
+ # the request method helpers (get, post, ...)
24
+ #
25
+ # === Options
26
+ #
27
+ # :uri, :method, :headers/:env, :params, :session
28
+
29
+ def process(options = {})
30
+ unless options.is_a? Hash
31
+ options = { :uri => options.to_s }
32
+ end
33
+
34
+ uri = options[:uri]
35
+ uri = "/#{uri}" unless uri =~ /^\//
36
+
37
+ reset_context unless @context
38
+ context = @context
39
+ if @last_response_cookies
40
+ @last_response_cookies.each do |cookie|
41
+ context.cookies.merge! cookie.name => cookie.value
42
+ end
43
+ end
44
+ context.headers = options[:headers] || options[:env] || {}
45
+ context.headers['REQUEST_URI'] = uri
46
+ context.headers['REQUEST_METHOD'] = options.fetch(:method, :get).to_s.upcase
47
+ context.headers['REMOTE_ADDR'] ||= '127.0.0.1'
48
+ if ((:get == options[:method]) and (options[:params]))
49
+ context.headers['QUERY_STRING'] = options[:params].collect {|k,v| "#{k}=#{v}"}.join('&')
50
+ end
51
+ context.params = options[:params] || {}
52
+ context.cookies.merge! options[:cookies] if options[:cookies]
53
+ context.session.merge! options[:session] if options[:session]
54
+
55
+ context.render(context.path)
56
+ @last_response_cookies = context.response_cookies
57
+ return context.body
58
+ end
59
+
60
+ #--
61
+ # Compile some helpers.
62
+ #++
63
+
64
+ for m in [:get, :post, :put, :delete, :head]
65
+ eval %{
66
+ def #{m}(options = {})
67
+ unless options.is_a? Hash
68
+ options = { :uri => options.to_s }
69
+ end
70
+ options[:method] = :#{m}
71
+ process(options)
72
+ end
73
+ }
74
+ end
75
+
76
+ end
77
+
78
+ end
79
+
@@ -0,0 +1,128 @@
1
+ require 'cgi'
2
+ require 'og/relation/all'
3
+
4
+ #--
5
+ # TODO: find a better name!
6
+ # TODO: this is nitro request specific, should probably get moved
7
+ # into the Nitro directory.
8
+ #++
9
+
10
+ class AttributeUtils
11
+ class << self
12
+
13
+ #--
14
+ # TODO: Add preprocessing.
15
+ #++
16
+
17
+ def set_attr(obj, name, value)
18
+ obj.send("__force_#{name}", value)
19
+ rescue Object => ex
20
+ obj.instance_variable_set("@#{name}", value)
21
+ end
22
+
23
+ # Populate an object from a hash of values.
24
+ # This is a truly dangerous method.
25
+ #
26
+ # === Options
27
+ #
28
+ # * name
29
+ # * force_boolean
30
+
31
+ def populate_object(obj, values, options = {})
32
+ options = {
33
+ :force_boolean => true
34
+ }.update(options)
35
+
36
+ # If a class is passed create an instance.
37
+
38
+ obj = obj.new if obj.is_a?(Class)
39
+
40
+ for sym in obj.class.serializable_attributes
41
+ anno = obj.class.ann(sym)
42
+
43
+ unless options[:all]
44
+ # THINK: should skip control none attributes?
45
+ next if sym == obj.class.primary_key or anno[:control] == :none or anno[:disable_control]
46
+ end
47
+
48
+ prop_name = sym.to_s
49
+
50
+ # See if there is an incoming request param for this prop.
51
+
52
+ if values.keys.include? prop_name
53
+
54
+ prop_value = values[prop_name]
55
+
56
+ # to_s must be called on the prop_value incase the
57
+ # request is IOString.
58
+
59
+ prop_value = prop_value.to_s unless prop_value.is_a?(Hash) or prop_value.is_a?(Array)
60
+
61
+ # If property is a Blob dont overwrite current
62
+ # property's data if "".
63
+
64
+ break if anno[:class] == Og::Blob and prop_value.empty?
65
+
66
+ prop_value = CGI.unescape(prop_value)
67
+
68
+ if anno[:class] == String and anno[:unfiltered] != true
69
+ # html filter all strings by default.
70
+ prop_value = prop_value.html_filter
71
+ end
72
+
73
+ set_attr(obj, prop_name, CGI.unescape(prop_value))
74
+
75
+ elsif options[:force_boolean] and (anno[:class] == TrueClass or anno[:class] == FalseClass)
76
+ # Set a boolean property to false if it is not in the
77
+ # request. Requires force_boolean == true.
78
+
79
+ set_attr(obj, prop_name, false)
80
+ obj.send("__force_#{prop_name}", false)
81
+ end
82
+ end
83
+
84
+ if options[:assign_relations]
85
+ for rel in obj.class.relations
86
+ unless options[:all]
87
+ next if rel.options[:control] == :none or rel.options[:disable_control]
88
+ end
89
+
90
+ rel_name = rel.name.to_s
91
+
92
+ # Renew the relations from values
93
+
94
+ if rel.kind_of?(Og::RefersTo)
95
+ if foreign_oid = values[rel_name]
96
+ foreign_oid = foreign_oid.to_s unless foreign_oid.is_a?(Hash) or foreign_oid.is_a?(Array)
97
+ foreign_oid = nil if foreign_oid == 'nil' or foreign_oid == 'none'
98
+ end
99
+ set_attr(obj, rel.foreign_key, foreign_oid)
100
+ elsif rel.kind_of?(Og::JoinsMany) || rel.kind_of?(Og::HasMany)
101
+ collection = obj.send(rel_name)
102
+ collection.remove_all
103
+ if values.has_key?(rel_name)
104
+ primary_keys = values[rel_name]
105
+ primary_keys.each do |v|
106
+ v = v.to_s
107
+ next if v == "nil" or v == "none"
108
+ collection << rel.target_class[v.to_i]
109
+ end
110
+ end
111
+ end
112
+ end
113
+ end
114
+
115
+ #--
116
+ # gmosx, FIXME: this is a hack, will be replaced with proper
117
+ # code soon.
118
+ #++
119
+
120
+ for callback in obj.class.assign_callbacks
121
+ callback.call(obj, values, options)
122
+ end if obj.class.respond_to?(:assign_callbacks)
123
+
124
+ return obj
125
+ end
126
+
127
+ end
128
+ end
@@ -0,0 +1,149 @@
1
+ module Raw
2
+
3
+ # A collection of intelligent url encoding methods.
4
+
5
+ module EncodeURI
6
+
7
+ private
8
+
9
+ # Encode controller, action, params into a valid url.
10
+ # Automatically respects nice urls and routing.
11
+ #
12
+ # Handles parameters either as a hash or as an array.
13
+ # Use the array method to pass parameters to 'nice' actions.
14
+ #
15
+ # Pass Controller, action, and (param_name, param_value)
16
+ # pairs.
17
+ #
18
+ # If you pass an entity (model) class as the first parameter,
19
+ # the encoder tries to lookup the default controller for this
20
+ # class (ie, Klass::Controller).
21
+ #
22
+ # === Examples
23
+ #
24
+ # encode_url ForaController, :post, :title, 'Hello', :body, 'World'
25
+ # encode_url :post, :title, 'Hello', :body, 'World' # => implies controller == self
26
+ # encode_url :kick, :oid, 4
27
+ # encode_url article # => article.to_href
28
+ #
29
+ # Alternatively you can pass options with a hash:
30
+ #
31
+ # encode_url :controller => ForaController, :action => :delete, :params => { :title => 'Hello' }
32
+ # encode_url :action => :delete
33
+ #--
34
+ # Design: The pseudo-hack method with the alternating array
35
+ # elements is needed because Ruby hashes are not sorted.
36
+ # FIXME: better implementation? optimize this?
37
+ # TODO: move elsewhere.
38
+ #++
39
+
40
+ def encode_uri(*args)
41
+ f = args.first
42
+
43
+ # A standard url as string, return as is.
44
+
45
+ if f.is_a? String
46
+ # Attach the controller mount_path if this is a relative
47
+ # path. Use Controller.current to make this method more
48
+ # reusable.
49
+ unless f =~ /^\// or f =~ /^http/
50
+ f = "#{Controller.current.mount_path}/#{f}".squeeze('/')
51
+ end
52
+ return f
53
+ end
54
+
55
+ # If the passed param is an object that responds to :to_href
56
+ # returns the url to this object.
57
+
58
+ if f.respond_to? :to_href
59
+ return args.first.to_href
60
+ end
61
+
62
+ if f.is_a? Symbol
63
+ # no controller passed, try to use self as controller.
64
+ if self.class.respond_to? :mount_path
65
+ args.unshift(self.class)
66
+ else
67
+ raise "No controller passed to encode_url"
68
+ end
69
+ end
70
+
71
+ # Try to encode using the router.
72
+
73
+ if router = Context.current.dispatcher.router
74
+ if path = router.encode_route(*args)
75
+ return path
76
+ end
77
+ end
78
+
79
+ # No routing rule, manual encoding.
80
+
81
+ controller = args.shift
82
+ action = args.shift.to_sym
83
+
84
+ if controller.is_a? Class
85
+ # If the class argument is not a controller, try to get
86
+ # a controller for this class.
87
+
88
+ unless controller.respond_to? :mount_path
89
+ # Use the standard controller convention.
90
+ controller = controller::Controller
91
+ end
92
+ else
93
+ # An entity model is passed, lookup the class, then
94
+ # the controller and inject the oid as a parameter. For
95
+ # example:
96
+ #
97
+ # a = Article[1]
98
+ # encode_url(a, :read) == encode_url(Article::Controller, :read, :oid, a.oid)
99
+
100
+ args.unshift :oid, controller.oid
101
+ controller = controller.class::Controller
102
+ end
103
+
104
+ if action == :index
105
+ url = "#{controller.mount_path}"
106
+ else
107
+ mount_path = controller.mount_path
108
+ mount_path = nil if mount_path == '/'
109
+ url = "#{mount_path}/#{action}"
110
+ end
111
+
112
+ unless args.empty?
113
+ if controller.action_or_template?(action, Context.current.format)
114
+ param_count = controller.instance_method(action).arity
115
+ if param_count > 0
116
+ param_count.times do
117
+ args.shift # name
118
+ url << "/#{CGI.escape(args.shift.to_s)}"
119
+ end
120
+ end
121
+ end
122
+
123
+ unless args.empty?
124
+ url << '?'
125
+ params = []
126
+ (args.size / 2).times do
127
+ params << "#{args.shift}=#{args.shift}"
128
+ end
129
+ url << params.join(';')
130
+ end
131
+ end
132
+
133
+ return url
134
+ end
135
+ alias R encode_uri
136
+ alias encode_url encode_uri # DEPRECATED.
137
+
138
+
139
+ # Just like encode_uri, but generates an absolute URI instead.
140
+
141
+ def encode_absolute_uri(*args)
142
+ return "#{request.host_url}#{encode_url(*args)}"
143
+ end
144
+ alias RA encode_absolute_uri
145
+ alias encode_absolute_url encode_absolute_uri # DEPRECATED.
146
+
147
+ end
148
+
149
+ end
@@ -0,0 +1,538 @@
1
+ # = HTML filtering library
2
+ #
3
+ # == Port
4
+ #
5
+ # lib_filter.php, v1.15 by Cal Henderson <cal@iamcal.com>
6
+ #
7
+ # This code is licensed under a Creative Commons Attribution-ShareAlike 2.5 License
8
+ # http://creativecommons.org/licenses/by-sa/2.5/
9
+ #
10
+ # Thanks to Jang Kim for adding support for single quoted attributes
11
+ #
12
+ # == Reference
13
+ #
14
+ # http://iamcal.com/publish/articles/php/processing_html/
15
+ # http://iamcal.com/publish/articles/php/processing_html_part_2/
16
+ #
17
+ # == Author(s)
18
+ #
19
+ # * TransNoumena
20
+ # * George Moschovitis
21
+ # * James Britt
22
+ # * Cal Henderson
23
+ # * Jang Kim
24
+
25
+ require "cgi"
26
+
27
+ class HtmlFilter
28
+
29
+ # tags and attributes that are allowed
30
+ #
31
+ # Eg.
32
+ #
33
+ # {
34
+ # 'a' => ['href', 'target'],
35
+ # 'b' => [],
36
+ # 'img' => ['src', 'width', 'height', 'alt']
37
+ # }
38
+ attr_accessor :allowed
39
+
40
+ # tags which should always be self-closing (e.g. "<img />")
41
+ attr_accessor :no_close
42
+
43
+ # tags which must always have seperate opening and closing
44
+ # tags (e.g. "<b></b>")
45
+ attr_accessor :always_close
46
+
47
+ # attributes which should be checked for valid protocols
48
+ # (src,href)
49
+ attr_accessor :protocol_attributes
50
+
51
+ # protocols which are allowed (http, ftp, mailto)
52
+ attr_accessor :allowed_protocols
53
+
54
+ # tags which should be removed if they contain no content
55
+ # (e.g. "<b></b>" or "<b />")
56
+ attr_accessor :remove_blanks
57
+
58
+ # should we remove comments? (true, false)
59
+ attr_accessor :strip_comments
60
+
61
+ # should we try and make a b tag out of "b>" (true, false)
62
+ attr_accessor :always_make_tags
63
+
64
+ # entity control option (true, false)
65
+ attr_accessor :allow_numbered_entities
66
+
67
+ # entity control option (amp, gt, lt, quot, etc.)
68
+ attr_accessor :allowed_entities
69
+
70
+ # default settings
71
+ DEFAULT = {
72
+ 'allowed' => {
73
+ 'a' => ['href', 'target'],
74
+ 'b' => [],
75
+ 'i' => [],
76
+ 'ul' => [],
77
+ 'ol' => [],
78
+ 'li' => [],
79
+ 'img' => ['src', 'width', 'height', 'alt'],
80
+ 'object' => ['width', 'height'],
81
+ 'param' => ['name', 'value'],
82
+ 'embed' => ['src', 'type', 'wmode', 'name', 'value'],
83
+ },
84
+ 'no_close' => ['img', 'br', 'hr'],
85
+ 'always_close' => ['a', 'b'],
86
+ 'protocol_attributes' => ['src', 'href'],
87
+ 'allowed_protocols' => ['http', 'ftp', 'mailto'],
88
+ 'remove_blanks' => ['a', 'b'],
89
+ 'strip_comments' => true,
90
+ 'always_make_tags' => true,
91
+ 'allow_numbered_entities' => true,
92
+ 'allowed_entities' => ['amp', 'gt', 'lt', 'quot']
93
+ }
94
+
95
+ #
96
+ # new html filter
97
+ #
98
+
99
+ def initialize( options=nil )
100
+ @tag_counts = {}
101
+
102
+ (options || DEFAULT).each{ |k,v| send("#{k}=",v) }
103
+ end
104
+
105
+ #
106
+ #
107
+ #
108
+
109
+ def filter(data)
110
+ tag_counts = []
111
+
112
+ data = escape_comments(data)
113
+ data = balance_html(data)
114
+ data = check_tags(data)
115
+ data = process_remove_blanks(data)
116
+ data = validate_entities(data)
117
+
118
+ return data
119
+ end
120
+
121
+ private
122
+
123
+ #
124
+ # internal tag counter
125
+ #
126
+
127
+ attr_reader :tag_counts
128
+
129
+ #
130
+ #
131
+ #
132
+
133
+ def escape_comments(data)
134
+ data = data.gsub(/<!--(.*?)-->/s) do
135
+ '<!--' + html_sepcial_chars(strip_single($1)) + '-->'
136
+ end
137
+
138
+ return data
139
+ end
140
+
141
+ #
142
+ #
143
+ #
144
+
145
+ def balance_html(data)
146
+ data = data.dup
147
+
148
+ if always_make_tags
149
+ # try and form html
150
+ data.gsub!(/>>+/, '>')
151
+ data.gsub!(/<<+/, '<')
152
+ data.gsub!(/^>/, '')
153
+ data.gsub!(/<([^>]*?)(?=<|$)/, '<\1>')
154
+ data.gsub!(/(^|>)([^<]*?)(?=>)/, '\1<\2')
155
+ else
156
+ # escape stray brackets
157
+ data.gsub!(/<([^>]*?)(?=<|$)/, '&lt;\1')
158
+ data.gsub!(/(^|>)([^<]*?)(?=>)/, '\1\2&gt;<')
159
+ # the last regexp causes '<>' entities to appear
160
+ # (we need to do a lookahead assertion so that the last bracket
161
+ # can be used in the next pass of the regexp)
162
+ data.gsub!('<>', '')
163
+ end
164
+
165
+ return data
166
+ end
167
+
168
+ #
169
+ #
170
+ #
171
+
172
+ def check_tags(data)
173
+ data = data.dup
174
+
175
+ data.gsub!(/<(.*?)>/s){
176
+ process_tag(strip_single($1))
177
+ }
178
+
179
+ tag_counts.each do |tag, cnt|
180
+ cnt.times{ data << "</#{tag}>" }
181
+ end
182
+
183
+ return data
184
+ end
185
+
186
+ #
187
+ #
188
+ #
189
+
190
+ def process_tag(data)
191
+
192
+ # ending tags
193
+
194
+ re = /^\/([a-z0-9]+)/si
195
+
196
+ if matches = re.match(data)
197
+ name = matches[1].downcase
198
+ if allowed.key?(name)
199
+ unless no_close.include?(name)
200
+ if tag_counts[name]
201
+ tag_counts[name] -= 1
202
+ return "</#{name}>"
203
+ end
204
+ end
205
+ else
206
+ return ''
207
+ end
208
+ end
209
+
210
+ # starting tags
211
+
212
+ re = /^([a-z0-9]+)(.*?)(\/?)$/si
213
+
214
+ if matches = re.match(data)
215
+ name = matches[1].downcase
216
+ body = matches[2]
217
+ ending = matches[3]
218
+
219
+ if allowed.key?(name)
220
+ params = ""
221
+
222
+ matches_2 = body.scan(/([a-z0-9]+)=(["'])(.*?)\2/si) # <foo a="b" />
223
+ matches_1 = body.scan(/([a-z0-9]+)(=)([^"\s']+)/si) # <foo a=b />
224
+ matches_3 = body.scan(/([a-z0-9]+)=(["'])([^"']*?)\s*$/si) # <foo a="b />
225
+
226
+ matches = matches_1 + matches_2 + matches_3
227
+
228
+ matches.each do |match|
229
+ pname = match[0].downcase
230
+ if allowed[name].include?(pname)
231
+ value = match[2]
232
+ if protocol_attributes.include?(pname)
233
+ value = process_param_protocol(value)
234
+ end
235
+ params += %{ #{pname}="#{value}"}
236
+ end
237
+ end
238
+ if no_close.include?(name)
239
+ ending = ' /'
240
+ end
241
+ if always_close.include?(name)
242
+ ending = ''
243
+ end
244
+ if ending.empty?
245
+ if tag_counts.key?(name)
246
+ tag_counts[name] += 1
247
+ else
248
+ tag_counts[name] = 1
249
+ end
250
+ end
251
+ unless ending.empty?
252
+ ending = ' /'
253
+ end
254
+ return '<' + name + params + ending + '>'
255
+ else
256
+ return ''
257
+ end
258
+ end
259
+
260
+ # comments
261
+ if /^!--(.*)--$/si =~ data
262
+ if strip_comments
263
+ return ''
264
+ else
265
+ return '<' + data + '>'
266
+ end
267
+ end
268
+
269
+ # garbage, ignore it
270
+ return ''
271
+ end
272
+
273
+ #
274
+ #
275
+ #
276
+
277
+ def process_param_protocol(data)
278
+ data = decode_entities(data)
279
+
280
+ re = /^([^:]+)\:/si
281
+
282
+ if matches = re.match(data)
283
+ unless allowed_protocols.include?(matches[1])
284
+ #data = '#'.substr(data, strlen(matches[1])+1)
285
+ data = '#' + data[0..matches[1].size+1]
286
+ end
287
+ end
288
+
289
+ return data
290
+ end
291
+
292
+ #
293
+ #
294
+ #
295
+
296
+ def process_remove_blanks(data)
297
+ data = data.dup
298
+
299
+ remove_blanks.each do |tag|
300
+ data.gsub!(/<#{tag}(\s[^>]*)?><\/#{tag}>/, '')
301
+ data.gsub!(/<#{tag}(\s[^>]*)?\/>/, '')
302
+ end
303
+
304
+ return data
305
+ end
306
+
307
+ #
308
+ #
309
+ #
310
+
311
+ def fix_case(data)
312
+ data_notags = strip_tags(data)
313
+ data_notags = data_notags.gsub(/[^a-zA-Z]/, '')
314
+
315
+ if data_notags.size < 5
316
+ return data
317
+ end
318
+
319
+ if /[a-z]/ =~ data_notags
320
+ return data
321
+ end
322
+
323
+ data = data.gsub(/(>|^)([^<]+?)(<|$)/s){
324
+ strip_single($1) +
325
+ fix_case_inner(strip_single($2)) +
326
+ strip_single($3)
327
+ }
328
+
329
+ return data
330
+ end
331
+
332
+ #
333
+ #
334
+ #
335
+
336
+ def fix_case_inner(data)
337
+ data = data.dup
338
+
339
+ data.downcase!
340
+
341
+ data.gsub!(/(^|[^\w\s\';,\\-])(\s*)([a-z])/){
342
+ strip_single("#{$1}#{$2}") + strip_single($3).upcase
343
+ }
344
+
345
+ return data
346
+ end
347
+
348
+ #
349
+ #
350
+ #
351
+
352
+ def validate_entities(data)
353
+ data = data.dup
354
+
355
+ # validate entities throughout the string
356
+ data.gsub!(%r!&([^&;]*)(?=(;|&|$))!){
357
+ check_entity(strip_single($1), strip_single($2))
358
+ }
359
+
360
+ # validate quotes outside of tags
361
+ data.gsub!(/(>|^)([^<]+?)(<|$)/s){
362
+ m1, m2, m3 = $1, $2, $3
363
+ strip_single(m1) +
364
+ strip_single(m2).gsub('\"', '&quot;') +
365
+ strip_single(m3)
366
+ }
367
+
368
+ return data
369
+ end
370
+
371
+ #
372
+ #
373
+ #
374
+
375
+ def check_entity(preamble, term)
376
+ if term != ';'
377
+ return '&amp;' + preamble
378
+ end
379
+
380
+ if is_valid_entity(preamble)
381
+ return '&' + preamble
382
+ end
383
+
384
+ return '&amp;' + preamble
385
+ end
386
+
387
+ #
388
+ #
389
+ #
390
+
391
+ def is_valid_entity(entity)
392
+ re = /^#([0-9]+)$/i
393
+
394
+ if md = re.match(entity)
395
+ if (md[1].to_i > 127)
396
+ return true
397
+ end
398
+ return allow_numbered_entities
399
+ end
400
+
401
+ if allowed_entities.include?(entity)
402
+ return true
403
+ end
404
+
405
+ return nil
406
+ end
407
+
408
+ # within attributes, we want to convert all hex/dec/url
409
+ # escape sequences into their raw characters so that we can
410
+ # check we don't get stray quotes/brackets inside strings.
411
+
412
+ def decode_entities(data)
413
+ data = data.dup
414
+
415
+ data.gsub!(/(&)#(\d+);?/){ decode_dec_entity($1, $2) }
416
+ data.gsub!(/(&)#x([0-9a-f]+);?/i){ decode_hex_entity($1, $2) }
417
+ data.gsub!(/(%)([0-9a-f]{2});?/i){ decode_hex_entity($1, $2) }
418
+
419
+ data = validate_entities(data)
420
+
421
+ return data
422
+ end
423
+
424
+ #
425
+ #
426
+ #
427
+
428
+ def decode_hex_entity(*m)
429
+ return decode_num_entity(m[1], m[2].to_i.to_s(16))
430
+ end
431
+
432
+ #
433
+ #
434
+ #
435
+
436
+ def decode_dec_entity(*m)
437
+ return decode_num_entity(m[1], m[2])
438
+ end
439
+
440
+ #
441
+ #
442
+ #
443
+
444
+ def decode_num_entity(orig_type, d)
445
+ d = d.to_i
446
+ d = 32 if d < 0 # space
447
+
448
+ # don't mess with high chars
449
+ if d > 127
450
+ return '%' + d.to_s(16) if orig_type == '%'
451
+ return "&#{d};" if orig_type == '&'
452
+ end
453
+
454
+ return escape(d.chr)
455
+ end
456
+
457
+ #
458
+ #
459
+ #
460
+
461
+ def strip_single(data)
462
+ return data.gsub('\"', '"').gsub('\0', 0.chr)
463
+ end
464
+
465
+ # Certain characters have special significance in HTML, and
466
+ # should be represented by HTML entities if they are to
467
+ # preserve their meanings. This function returns a string
468
+ # with some of these conversions made; the translations made
469
+ # are those most useful for everyday web programming.
470
+
471
+ def escape(html)
472
+ CGI.escape(html).gsub(/'/, '&#039;')
473
+ end
474
+
475
+ end
476
+
477
+ # Overload the standard String class for extra convienience.
478
+
479
+ class String
480
+ def html_filter
481
+ HtmlFilter.new.filter(self)
482
+ end
483
+ end
484
+
485
+
486
+ if $0 ==__FILE__
487
+
488
+ require 'test/unit'
489
+
490
+ class TestHtmlFilter < Test::Unit::TestCase
491
+
492
+ def test_strip_single
493
+ hf = HtmlFilter.new
494
+ assert_equal( '"', hf.send(:strip_single,'\"') )
495
+ assert_equal( "\000", hf.send(:strip_single,'\0') )
496
+ end
497
+
498
+ def assert_filter(filtered, original)
499
+ assert_equal(filtered, original.html_filter)
500
+ end
501
+
502
+ def test_fix_quotes
503
+ assert_filter '<img src="foo.jpg" />', "<img src=\"foo.jpg />"
504
+ end
505
+
506
+ def test_basics
507
+ assert_filter '', ''
508
+ assert_filter 'hello', 'hello'
509
+ end
510
+
511
+ def test_balancing_tags
512
+ assert_filter "<b>hello</b>", "<<b>hello</b>"
513
+ assert_filter "<b>hello</b>", "<b>>hello</b>"
514
+ assert_filter "<b>hello</b>", "<b>hello<</b>"
515
+ assert_filter "<b>hello</b>", "<b>hello</b>>"
516
+ assert_filter "", "<>"
517
+ end
518
+
519
+ def test_tag_completion
520
+ assert_filter "hello", "hello<b>"
521
+ assert_filter "<b>hello</b>", "<b>hello"
522
+ assert_filter "hello<b>world</b>", "hello<b>world"
523
+ assert_filter "hello", "hello</b>"
524
+ assert_filter "hello", "hello<b/>"
525
+ assert_filter "hello<b>world</b>", "hello<b/>world"
526
+ assert_filter "<b><b><b>hello</b></b></b>", "<b><b><b>hello"
527
+ assert_filter "", "</b><b>"
528
+ end
529
+
530
+ def test_end_slashes
531
+ assert_filter '<img />', '<img>'
532
+ assert_filter '<img />', '<img/>'
533
+ assert_filter '', '<b/></b>'
534
+ end
535
+
536
+ end
537
+
538
+ end