raw 0.49.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. data/doc/CONTRIBUTORS +106 -0
  2. data/doc/LICENSE +32 -0
  3. data/doc/coding_conventions.txt +11 -0
  4. data/lib/raw.rb +42 -0
  5. data/lib/raw/adapter.rb +113 -0
  6. data/lib/raw/adapter/cgi.rb +41 -0
  7. data/lib/raw/adapter/fastcgi.rb +48 -0
  8. data/lib/raw/adapter/mongrel.rb +146 -0
  9. data/lib/raw/adapter/script.rb +94 -0
  10. data/lib/raw/adapter/webrick.rb +144 -0
  11. data/lib/raw/adapter/webrick/vcr.rb +91 -0
  12. data/lib/raw/cgi.rb +323 -0
  13. data/lib/raw/cgi/cookie.rb +47 -0
  14. data/lib/raw/cgi/http.rb +62 -0
  15. data/lib/raw/compiler.rb +138 -0
  16. data/lib/raw/compiler/filter/cleanup.rb +21 -0
  17. data/lib/raw/compiler/filter/elements.rb +166 -0
  18. data/lib/raw/compiler/filter/elements/element.rb +210 -0
  19. data/lib/raw/compiler/filter/localization.rb +23 -0
  20. data/lib/raw/compiler/filter/markup.rb +32 -0
  21. data/lib/raw/compiler/filter/morph.rb +123 -0
  22. data/lib/raw/compiler/filter/morph/each.rb +34 -0
  23. data/lib/raw/compiler/filter/morph/for.rb +11 -0
  24. data/lib/raw/compiler/filter/morph/if.rb +26 -0
  25. data/lib/raw/compiler/filter/morph/selected_if.rb +43 -0
  26. data/lib/raw/compiler/filter/morph/standard.rb +55 -0
  27. data/lib/raw/compiler/filter/morph/times.rb +27 -0
  28. data/lib/raw/compiler/filter/script.rb +116 -0
  29. data/lib/raw/compiler/filter/squeeze.rb +16 -0
  30. data/lib/raw/compiler/filter/static_include.rb +74 -0
  31. data/lib/raw/compiler/filter/template.rb +121 -0
  32. data/lib/raw/compiler/reloader.rb +96 -0
  33. data/lib/raw/context.rb +154 -0
  34. data/lib/raw/context/flash.rb +157 -0
  35. data/lib/raw/context/global.rb +88 -0
  36. data/lib/raw/context/request.rb +338 -0
  37. data/lib/raw/context/response.rb +57 -0
  38. data/lib/raw/context/session.rb +198 -0
  39. data/lib/raw/context/session/drb.rb +11 -0
  40. data/lib/raw/context/session/file.rb +15 -0
  41. data/lib/raw/context/session/memcached.rb +13 -0
  42. data/lib/raw/context/session/memory.rb +12 -0
  43. data/lib/raw/context/session/og.rb +15 -0
  44. data/lib/raw/context/session/pstore.rb +13 -0
  45. data/lib/raw/control.rb +18 -0
  46. data/lib/raw/control/attribute.rb +91 -0
  47. data/lib/raw/control/attribute/checkbox.rb +25 -0
  48. data/lib/raw/control/attribute/datetime.rb +21 -0
  49. data/lib/raw/control/attribute/file.rb +20 -0
  50. data/lib/raw/control/attribute/fixnum.rb +26 -0
  51. data/lib/raw/control/attribute/float.rb +26 -0
  52. data/lib/raw/control/attribute/options.rb +38 -0
  53. data/lib/raw/control/attribute/password.rb +16 -0
  54. data/lib/raw/control/attribute/text.rb +16 -0
  55. data/lib/raw/control/attribute/textarea.rb +16 -0
  56. data/lib/raw/control/none.rb +16 -0
  57. data/lib/raw/control/relation.rb +59 -0
  58. data/lib/raw/control/relation/belongs_to.rb +0 -0
  59. data/lib/raw/control/relation/has_many.rb +97 -0
  60. data/lib/raw/control/relation/joins_many.rb +0 -0
  61. data/lib/raw/control/relation/many_to_many.rb +0 -0
  62. data/lib/raw/control/relation/refers_to.rb +29 -0
  63. data/lib/raw/controller.rb +37 -0
  64. data/lib/raw/controller/publishable.rb +160 -0
  65. data/lib/raw/dispatcher.rb +209 -0
  66. data/lib/raw/dispatcher/format.rb +108 -0
  67. data/lib/raw/dispatcher/format/atom.rb +31 -0
  68. data/lib/raw/dispatcher/format/css.rb +0 -0
  69. data/lib/raw/dispatcher/format/html.rb +42 -0
  70. data/lib/raw/dispatcher/format/json.rb +31 -0
  71. data/lib/raw/dispatcher/format/rss.rb +33 -0
  72. data/lib/raw/dispatcher/format/xoxo.rb +31 -0
  73. data/lib/raw/dispatcher/mounter.rb +60 -0
  74. data/lib/raw/dispatcher/router.rb +111 -0
  75. data/lib/raw/errors.rb +19 -0
  76. data/lib/raw/helper.rb +86 -0
  77. data/lib/raw/helper/benchmark.rb +23 -0
  78. data/lib/raw/helper/buffer.rb +60 -0
  79. data/lib/raw/helper/cookie.rb +32 -0
  80. data/lib/raw/helper/debug.rb +28 -0
  81. data/lib/raw/helper/default.rb +16 -0
  82. data/lib/raw/helper/feed.rb +451 -0
  83. data/lib/raw/helper/form.rb +284 -0
  84. data/lib/raw/helper/javascript.rb +59 -0
  85. data/lib/raw/helper/layout.rb +40 -0
  86. data/lib/raw/helper/navigation.rb +87 -0
  87. data/lib/raw/helper/pager.rb +305 -0
  88. data/lib/raw/helper/table.rb +247 -0
  89. data/lib/raw/helper/xhtml.rb +218 -0
  90. data/lib/raw/helper/xml.rb +125 -0
  91. data/lib/raw/mixin/magick.rb +35 -0
  92. data/lib/raw/mixin/sweeper.rb +71 -0
  93. data/lib/raw/mixin/thumbnails.rb +1 -0
  94. data/lib/raw/mixin/webfile.rb +165 -0
  95. data/lib/raw/render.rb +271 -0
  96. data/lib/raw/render/builder.rb +26 -0
  97. data/lib/raw/render/caching.rb +81 -0
  98. data/lib/raw/render/call.rb +43 -0
  99. data/lib/raw/render/send_file.rb +46 -0
  100. data/lib/raw/render/stream.rb +39 -0
  101. data/lib/raw/scaffold.rb +13 -0
  102. data/lib/raw/scaffold/controller.rb +25 -0
  103. data/lib/raw/scaffold/model.rb +157 -0
  104. data/lib/raw/test.rb +5 -0
  105. data/lib/raw/test/assertions.rb +169 -0
  106. data/lib/raw/test/context.rb +55 -0
  107. data/lib/raw/test/testcase.rb +79 -0
  108. data/lib/raw/util/attr.rb +128 -0
  109. data/lib/raw/util/encode_uri.rb +149 -0
  110. data/lib/raw/util/html_filter.rb +538 -0
  111. data/lib/raw/util/markup.rb +130 -0
  112. data/test/glue/tc_webfile.rb +1 -0
  113. data/test/nitro/CONFIG.rb +3 -0
  114. data/test/nitro/adapter/raw_post1.bin +9 -0
  115. data/test/nitro/adapter/tc_webrick.rb +16 -0
  116. data/test/nitro/cgi/tc_cookie.rb +14 -0
  117. data/test/nitro/cgi/tc_request.rb +61 -0
  118. data/test/nitro/compiler/tc_client_morpher.rb +47 -0
  119. data/test/nitro/compiler/tc_compiler.rb +25 -0
  120. data/test/nitro/dispatcher/tc_mounter.rb +47 -0
  121. data/test/nitro/helper/tc_feed.rb +135 -0
  122. data/test/nitro/helper/tc_navbar.rb +74 -0
  123. data/test/nitro/helper/tc_pager.rb +35 -0
  124. data/test/nitro/helper/tc_table.rb +68 -0
  125. data/test/nitro/helper/tc_xhtml.rb +19 -0
  126. data/test/nitro/tc_caching.rb +19 -0
  127. data/test/nitro/tc_cgi.rb +222 -0
  128. data/test/nitro/tc_context.rb +17 -0
  129. data/test/nitro/tc_controller.rb +103 -0
  130. data/test/nitro/tc_controller_aspect.rb +32 -0
  131. data/test/nitro/tc_controller_params.rb +885 -0
  132. data/test/nitro/tc_dispatcher.rb +109 -0
  133. data/test/nitro/tc_element.rb +85 -0
  134. data/test/nitro/tc_flash.rb +59 -0
  135. data/test/nitro/tc_helper.rb +47 -0
  136. data/test/nitro/tc_render.rb +119 -0
  137. data/test/nitro/tc_router.rb +61 -0
  138. data/test/nitro/tc_server.rb +35 -0
  139. data/test/nitro/tc_session.rb +66 -0
  140. data/test/nitro/tc_template.rb +71 -0
  141. data/test/nitro/util/tc_encode_url.rb +87 -0
  142. data/test/nitro/util/tc_markup.rb +31 -0
  143. data/test/public/blog/another/very_litle/index.xhtml +1 -0
  144. data/test/public/blog/inc1.xhtml +2 -0
  145. data/test/public/blog/inc2.xhtml +1 -0
  146. data/test/public/blog/list.xhtml +9 -0
  147. data/test/public/dummy_mailer/registration.xhtml +5 -0
  148. metadata +244 -0
@@ -0,0 +1,55 @@
1
+ require 'test/unit'
2
+ require 'test/unit/assertions'
3
+ require 'rexml/document'
4
+
5
+ require 'raw/context'
6
+
7
+ module Raw
8
+
9
+ # Override the default Request implementation
10
+ # to include methods useful for testing.
11
+
12
+ module Request
13
+ end
14
+
15
+ # Override the default Response implementation
16
+ # to include methods useful for testing.
17
+
18
+ module Response
19
+
20
+ def status_ok?
21
+ @status == 200
22
+ end
23
+
24
+ def redirect?
25
+ (300..399).include?(@status)
26
+ end
27
+
28
+ def redirect_uri
29
+ @response_headers['location']
30
+ end
31
+
32
+ def response_cookie(name)
33
+ return nil unless @response_cookies
34
+ @response_cookies.find { |c| c.name == name }
35
+ end
36
+
37
+ end
38
+
39
+ # Override the default Context implementation
40
+ # to include methods useful for testing.
41
+
42
+ class Context
43
+ attr_writer :session, :cookies
44
+
45
+ def session
46
+ @session || @session = {}
47
+ end
48
+
49
+ def cookies
50
+ @cookies || @cookies = {}
51
+ end
52
+
53
+ end
54
+
55
+ end
@@ -0,0 +1,79 @@
1
+ require 'ostruct'
2
+
3
+ require 'test/unit'
4
+ require 'test/unit/assertions'
5
+ require 'rexml/document'
6
+
7
+ require 'glue'
8
+ require 'nitro/test/context'
9
+
10
+ module Test::Unit
11
+
12
+ class TestCase
13
+ include Nitro
14
+
15
+ def reset_context
16
+ @context_config = OpenStruct.new(
17
+ :dispatcher => Nitro::Dispatcher.new(Nitro::Server.map)
18
+ )
19
+ @context = Nitro::Context.new(@context_config)
20
+ end
21
+
22
+ # Send a request to the controller. Alternatively you can use
23
+ # the request method helpers (get, post, ...)
24
+ #
25
+ # === Options
26
+ #
27
+ # :uri, :method, :headers/:env, :params, :session
28
+
29
+ def process(options = {})
30
+ unless options.is_a? Hash
31
+ options = { :uri => options.to_s }
32
+ end
33
+
34
+ uri = options[:uri]
35
+ uri = "/#{uri}" unless uri =~ /^\//
36
+
37
+ reset_context unless @context
38
+ context = @context
39
+ if @last_response_cookies
40
+ @last_response_cookies.each do |cookie|
41
+ context.cookies.merge! cookie.name => cookie.value
42
+ end
43
+ end
44
+ context.headers = options[:headers] || options[:env] || {}
45
+ context.headers['REQUEST_URI'] = uri
46
+ context.headers['REQUEST_METHOD'] = options.fetch(:method, :get).to_s.upcase
47
+ context.headers['REMOTE_ADDR'] ||= '127.0.0.1'
48
+ if ((:get == options[:method]) and (options[:params]))
49
+ context.headers['QUERY_STRING'] = options[:params].collect {|k,v| "#{k}=#{v}"}.join('&')
50
+ end
51
+ context.params = options[:params] || {}
52
+ context.cookies.merge! options[:cookies] if options[:cookies]
53
+ context.session.merge! options[:session] if options[:session]
54
+
55
+ context.render(context.path)
56
+ @last_response_cookies = context.response_cookies
57
+ return context.body
58
+ end
59
+
60
+ #--
61
+ # Compile some helpers.
62
+ #++
63
+
64
+ for m in [:get, :post, :put, :delete, :head]
65
+ eval %{
66
+ def #{m}(options = {})
67
+ unless options.is_a? Hash
68
+ options = { :uri => options.to_s }
69
+ end
70
+ options[:method] = :#{m}
71
+ process(options)
72
+ end
73
+ }
74
+ end
75
+
76
+ end
77
+
78
+ end
79
+
@@ -0,0 +1,128 @@
1
+ require 'cgi'
2
+ require 'og/relation/all'
3
+
4
+ #--
5
+ # TODO: find a better name!
6
+ # TODO: this is nitro request specific, should probably get moved
7
+ # into the Nitro directory.
8
+ #++
9
+
10
+ class AttributeUtils
11
+ class << self
12
+
13
+ #--
14
+ # TODO: Add preprocessing.
15
+ #++
16
+
17
+ def set_attr(obj, name, value)
18
+ obj.send("__force_#{name}", value)
19
+ rescue Object => ex
20
+ obj.instance_variable_set("@#{name}", value)
21
+ end
22
+
23
+ # Populate an object from a hash of values.
24
+ # This is a truly dangerous method.
25
+ #
26
+ # === Options
27
+ #
28
+ # * name
29
+ # * force_boolean
30
+
31
+ def populate_object(obj, values, options = {})
32
+ options = {
33
+ :force_boolean => true
34
+ }.update(options)
35
+
36
+ # If a class is passed create an instance.
37
+
38
+ obj = obj.new if obj.is_a?(Class)
39
+
40
+ for sym in obj.class.serializable_attributes
41
+ anno = obj.class.ann(sym)
42
+
43
+ unless options[:all]
44
+ # THINK: should skip control none attributes?
45
+ next if sym == obj.class.primary_key or anno[:control] == :none or anno[:disable_control]
46
+ end
47
+
48
+ prop_name = sym.to_s
49
+
50
+ # See if there is an incoming request param for this prop.
51
+
52
+ if values.keys.include? prop_name
53
+
54
+ prop_value = values[prop_name]
55
+
56
+ # to_s must be called on the prop_value incase the
57
+ # request is IOString.
58
+
59
+ prop_value = prop_value.to_s unless prop_value.is_a?(Hash) or prop_value.is_a?(Array)
60
+
61
+ # If property is a Blob dont overwrite current
62
+ # property's data if "".
63
+
64
+ break if anno[:class] == Og::Blob and prop_value.empty?
65
+
66
+ prop_value = CGI.unescape(prop_value)
67
+
68
+ if anno[:class] == String and anno[:unfiltered] != true
69
+ # html filter all strings by default.
70
+ prop_value = prop_value.html_filter
71
+ end
72
+
73
+ set_attr(obj, prop_name, CGI.unescape(prop_value))
74
+
75
+ elsif options[:force_boolean] and (anno[:class] == TrueClass or anno[:class] == FalseClass)
76
+ # Set a boolean property to false if it is not in the
77
+ # request. Requires force_boolean == true.
78
+
79
+ set_attr(obj, prop_name, false)
80
+ obj.send("__force_#{prop_name}", false)
81
+ end
82
+ end
83
+
84
+ if options[:assign_relations]
85
+ for rel in obj.class.relations
86
+ unless options[:all]
87
+ next if rel.options[:control] == :none or rel.options[:disable_control]
88
+ end
89
+
90
+ rel_name = rel.name.to_s
91
+
92
+ # Renew the relations from values
93
+
94
+ if rel.kind_of?(Og::RefersTo)
95
+ if foreign_oid = values[rel_name]
96
+ foreign_oid = foreign_oid.to_s unless foreign_oid.is_a?(Hash) or foreign_oid.is_a?(Array)
97
+ foreign_oid = nil if foreign_oid == 'nil' or foreign_oid == 'none'
98
+ end
99
+ set_attr(obj, rel.foreign_key, foreign_oid)
100
+ elsif rel.kind_of?(Og::JoinsMany) || rel.kind_of?(Og::HasMany)
101
+ collection = obj.send(rel_name)
102
+ collection.remove_all
103
+ if values.has_key?(rel_name)
104
+ primary_keys = values[rel_name]
105
+ primary_keys.each do |v|
106
+ v = v.to_s
107
+ next if v == "nil" or v == "none"
108
+ collection << rel.target_class[v.to_i]
109
+ end
110
+ end
111
+ end
112
+ end
113
+ end
114
+
115
+ #--
116
+ # gmosx, FIXME: this is a hack, will be replaced with proper
117
+ # code soon.
118
+ #++
119
+
120
+ for callback in obj.class.assign_callbacks
121
+ callback.call(obj, values, options)
122
+ end if obj.class.respond_to?(:assign_callbacks)
123
+
124
+ return obj
125
+ end
126
+
127
+ end
128
+ end
@@ -0,0 +1,149 @@
1
+ module Raw
2
+
3
+ # A collection of intelligent url encoding methods.
4
+
5
+ module EncodeURI
6
+
7
+ private
8
+
9
+ # Encode controller, action, params into a valid url.
10
+ # Automatically respects nice urls and routing.
11
+ #
12
+ # Handles parameters either as a hash or as an array.
13
+ # Use the array method to pass parameters to 'nice' actions.
14
+ #
15
+ # Pass Controller, action, and (param_name, param_value)
16
+ # pairs.
17
+ #
18
+ # If you pass an entity (model) class as the first parameter,
19
+ # the encoder tries to lookup the default controller for this
20
+ # class (ie, Klass::Controller).
21
+ #
22
+ # === Examples
23
+ #
24
+ # encode_url ForaController, :post, :title, 'Hello', :body, 'World'
25
+ # encode_url :post, :title, 'Hello', :body, 'World' # => implies controller == self
26
+ # encode_url :kick, :oid, 4
27
+ # encode_url article # => article.to_href
28
+ #
29
+ # Alternatively you can pass options with a hash:
30
+ #
31
+ # encode_url :controller => ForaController, :action => :delete, :params => { :title => 'Hello' }
32
+ # encode_url :action => :delete
33
+ #--
34
+ # Design: The pseudo-hack method with the alternating array
35
+ # elements is needed because Ruby hashes are not sorted.
36
+ # FIXME: better implementation? optimize this?
37
+ # TODO: move elsewhere.
38
+ #++
39
+
40
+ def encode_uri(*args)
41
+ f = args.first
42
+
43
+ # A standard url as string, return as is.
44
+
45
+ if f.is_a? String
46
+ # Attach the controller mount_path if this is a relative
47
+ # path. Use Controller.current to make this method more
48
+ # reusable.
49
+ unless f =~ /^\// or f =~ /^http/
50
+ f = "#{Controller.current.mount_path}/#{f}".squeeze('/')
51
+ end
52
+ return f
53
+ end
54
+
55
+ # If the passed param is an object that responds to :to_href
56
+ # returns the url to this object.
57
+
58
+ if f.respond_to? :to_href
59
+ return args.first.to_href
60
+ end
61
+
62
+ if f.is_a? Symbol
63
+ # no controller passed, try to use self as controller.
64
+ if self.class.respond_to? :mount_path
65
+ args.unshift(self.class)
66
+ else
67
+ raise "No controller passed to encode_url"
68
+ end
69
+ end
70
+
71
+ # Try to encode using the router.
72
+
73
+ if router = Context.current.dispatcher.router
74
+ if path = router.encode_route(*args)
75
+ return path
76
+ end
77
+ end
78
+
79
+ # No routing rule, manual encoding.
80
+
81
+ controller = args.shift
82
+ action = args.shift.to_sym
83
+
84
+ if controller.is_a? Class
85
+ # If the class argument is not a controller, try to get
86
+ # a controller for this class.
87
+
88
+ unless controller.respond_to? :mount_path
89
+ # Use the standard controller convention.
90
+ controller = controller::Controller
91
+ end
92
+ else
93
+ # An entity model is passed, lookup the class, then
94
+ # the controller and inject the oid as a parameter. For
95
+ # example:
96
+ #
97
+ # a = Article[1]
98
+ # encode_url(a, :read) == encode_url(Article::Controller, :read, :oid, a.oid)
99
+
100
+ args.unshift :oid, controller.oid
101
+ controller = controller.class::Controller
102
+ end
103
+
104
+ if action == :index
105
+ url = "#{controller.mount_path}"
106
+ else
107
+ mount_path = controller.mount_path
108
+ mount_path = nil if mount_path == '/'
109
+ url = "#{mount_path}/#{action}"
110
+ end
111
+
112
+ unless args.empty?
113
+ if controller.action_or_template?(action, Context.current.format)
114
+ param_count = controller.instance_method(action).arity
115
+ if param_count > 0
116
+ param_count.times do
117
+ args.shift # name
118
+ url << "/#{CGI.escape(args.shift.to_s)}"
119
+ end
120
+ end
121
+ end
122
+
123
+ unless args.empty?
124
+ url << '?'
125
+ params = []
126
+ (args.size / 2).times do
127
+ params << "#{args.shift}=#{args.shift}"
128
+ end
129
+ url << params.join(';')
130
+ end
131
+ end
132
+
133
+ return url
134
+ end
135
+ alias R encode_uri
136
+ alias encode_url encode_uri # DEPRECATED.
137
+
138
+
139
+ # Just like encode_uri, but generates an absolute URI instead.
140
+
141
+ def encode_absolute_uri(*args)
142
+ return "#{request.host_url}#{encode_url(*args)}"
143
+ end
144
+ alias RA encode_absolute_uri
145
+ alias encode_absolute_url encode_absolute_uri # DEPRECATED.
146
+
147
+ end
148
+
149
+ end
@@ -0,0 +1,538 @@
1
+ # = HTML filtering library
2
+ #
3
+ # == Port
4
+ #
5
+ # lib_filter.php, v1.15 by Cal Henderson <cal@iamcal.com>
6
+ #
7
+ # This code is licensed under a Creative Commons Attribution-ShareAlike 2.5 License
8
+ # http://creativecommons.org/licenses/by-sa/2.5/
9
+ #
10
+ # Thanks to Jang Kim for adding support for single quoted attributes
11
+ #
12
+ # == Reference
13
+ #
14
+ # http://iamcal.com/publish/articles/php/processing_html/
15
+ # http://iamcal.com/publish/articles/php/processing_html_part_2/
16
+ #
17
+ # == Author(s)
18
+ #
19
+ # * TransNoumena
20
+ # * George Moschovitis
21
+ # * James Britt
22
+ # * Cal Henderson
23
+ # * Jang Kim
24
+
25
+ require "cgi"
26
+
27
+ class HtmlFilter
28
+
29
+ # tags and attributes that are allowed
30
+ #
31
+ # Eg.
32
+ #
33
+ # {
34
+ # 'a' => ['href', 'target'],
35
+ # 'b' => [],
36
+ # 'img' => ['src', 'width', 'height', 'alt']
37
+ # }
38
+ attr_accessor :allowed
39
+
40
+ # tags which should always be self-closing (e.g. "<img />")
41
+ attr_accessor :no_close
42
+
43
+ # tags which must always have seperate opening and closing
44
+ # tags (e.g. "<b></b>")
45
+ attr_accessor :always_close
46
+
47
+ # attributes which should be checked for valid protocols
48
+ # (src,href)
49
+ attr_accessor :protocol_attributes
50
+
51
+ # protocols which are allowed (http, ftp, mailto)
52
+ attr_accessor :allowed_protocols
53
+
54
+ # tags which should be removed if they contain no content
55
+ # (e.g. "<b></b>" or "<b />")
56
+ attr_accessor :remove_blanks
57
+
58
+ # should we remove comments? (true, false)
59
+ attr_accessor :strip_comments
60
+
61
+ # should we try and make a b tag out of "b>" (true, false)
62
+ attr_accessor :always_make_tags
63
+
64
+ # entity control option (true, false)
65
+ attr_accessor :allow_numbered_entities
66
+
67
+ # entity control option (amp, gt, lt, quot, etc.)
68
+ attr_accessor :allowed_entities
69
+
70
+ # default settings
71
+ DEFAULT = {
72
+ 'allowed' => {
73
+ 'a' => ['href', 'target'],
74
+ 'b' => [],
75
+ 'i' => [],
76
+ 'ul' => [],
77
+ 'ol' => [],
78
+ 'li' => [],
79
+ 'img' => ['src', 'width', 'height', 'alt'],
80
+ 'object' => ['width', 'height'],
81
+ 'param' => ['name', 'value'],
82
+ 'embed' => ['src', 'type', 'wmode', 'name', 'value'],
83
+ },
84
+ 'no_close' => ['img', 'br', 'hr'],
85
+ 'always_close' => ['a', 'b'],
86
+ 'protocol_attributes' => ['src', 'href'],
87
+ 'allowed_protocols' => ['http', 'ftp', 'mailto'],
88
+ 'remove_blanks' => ['a', 'b'],
89
+ 'strip_comments' => true,
90
+ 'always_make_tags' => true,
91
+ 'allow_numbered_entities' => true,
92
+ 'allowed_entities' => ['amp', 'gt', 'lt', 'quot']
93
+ }
94
+
95
+ #
96
+ # new html filter
97
+ #
98
+
99
+ def initialize( options=nil )
100
+ @tag_counts = {}
101
+
102
+ (options || DEFAULT).each{ |k,v| send("#{k}=",v) }
103
+ end
104
+
105
+ #
106
+ #
107
+ #
108
+
109
+ def filter(data)
110
+ tag_counts = []
111
+
112
+ data = escape_comments(data)
113
+ data = balance_html(data)
114
+ data = check_tags(data)
115
+ data = process_remove_blanks(data)
116
+ data = validate_entities(data)
117
+
118
+ return data
119
+ end
120
+
121
+ private
122
+
123
+ #
124
+ # internal tag counter
125
+ #
126
+
127
+ attr_reader :tag_counts
128
+
129
+ #
130
+ #
131
+ #
132
+
133
+ def escape_comments(data)
134
+ data = data.gsub(/<!--(.*?)-->/s) do
135
+ '<!--' + html_sepcial_chars(strip_single($1)) + '-->'
136
+ end
137
+
138
+ return data
139
+ end
140
+
141
+ #
142
+ #
143
+ #
144
+
145
+ def balance_html(data)
146
+ data = data.dup
147
+
148
+ if always_make_tags
149
+ # try and form html
150
+ data.gsub!(/>>+/, '>')
151
+ data.gsub!(/<<+/, '<')
152
+ data.gsub!(/^>/, '')
153
+ data.gsub!(/<([^>]*?)(?=<|$)/, '<\1>')
154
+ data.gsub!(/(^|>)([^<]*?)(?=>)/, '\1<\2')
155
+ else
156
+ # escape stray brackets
157
+ data.gsub!(/<([^>]*?)(?=<|$)/, '&lt;\1')
158
+ data.gsub!(/(^|>)([^<]*?)(?=>)/, '\1\2&gt;<')
159
+ # the last regexp causes '<>' entities to appear
160
+ # (we need to do a lookahead assertion so that the last bracket
161
+ # can be used in the next pass of the regexp)
162
+ data.gsub!('<>', '')
163
+ end
164
+
165
+ return data
166
+ end
167
+
168
+ #
169
+ #
170
+ #
171
+
172
+ def check_tags(data)
173
+ data = data.dup
174
+
175
+ data.gsub!(/<(.*?)>/s){
176
+ process_tag(strip_single($1))
177
+ }
178
+
179
+ tag_counts.each do |tag, cnt|
180
+ cnt.times{ data << "</#{tag}>" }
181
+ end
182
+
183
+ return data
184
+ end
185
+
186
+ #
187
+ #
188
+ #
189
+
190
+ def process_tag(data)
191
+
192
+ # ending tags
193
+
194
+ re = /^\/([a-z0-9]+)/si
195
+
196
+ if matches = re.match(data)
197
+ name = matches[1].downcase
198
+ if allowed.key?(name)
199
+ unless no_close.include?(name)
200
+ if tag_counts[name]
201
+ tag_counts[name] -= 1
202
+ return "</#{name}>"
203
+ end
204
+ end
205
+ else
206
+ return ''
207
+ end
208
+ end
209
+
210
+ # starting tags
211
+
212
+ re = /^([a-z0-9]+)(.*?)(\/?)$/si
213
+
214
+ if matches = re.match(data)
215
+ name = matches[1].downcase
216
+ body = matches[2]
217
+ ending = matches[3]
218
+
219
+ if allowed.key?(name)
220
+ params = ""
221
+
222
+ matches_2 = body.scan(/([a-z0-9]+)=(["'])(.*?)\2/si) # <foo a="b" />
223
+ matches_1 = body.scan(/([a-z0-9]+)(=)([^"\s']+)/si) # <foo a=b />
224
+ matches_3 = body.scan(/([a-z0-9]+)=(["'])([^"']*?)\s*$/si) # <foo a="b />
225
+
226
+ matches = matches_1 + matches_2 + matches_3
227
+
228
+ matches.each do |match|
229
+ pname = match[0].downcase
230
+ if allowed[name].include?(pname)
231
+ value = match[2]
232
+ if protocol_attributes.include?(pname)
233
+ value = process_param_protocol(value)
234
+ end
235
+ params += %{ #{pname}="#{value}"}
236
+ end
237
+ end
238
+ if no_close.include?(name)
239
+ ending = ' /'
240
+ end
241
+ if always_close.include?(name)
242
+ ending = ''
243
+ end
244
+ if ending.empty?
245
+ if tag_counts.key?(name)
246
+ tag_counts[name] += 1
247
+ else
248
+ tag_counts[name] = 1
249
+ end
250
+ end
251
+ unless ending.empty?
252
+ ending = ' /'
253
+ end
254
+ return '<' + name + params + ending + '>'
255
+ else
256
+ return ''
257
+ end
258
+ end
259
+
260
+ # comments
261
+ if /^!--(.*)--$/si =~ data
262
+ if strip_comments
263
+ return ''
264
+ else
265
+ return '<' + data + '>'
266
+ end
267
+ end
268
+
269
+ # garbage, ignore it
270
+ return ''
271
+ end
272
+
273
+ #
274
+ #
275
+ #
276
+
277
+ def process_param_protocol(data)
278
+ data = decode_entities(data)
279
+
280
+ re = /^([^:]+)\:/si
281
+
282
+ if matches = re.match(data)
283
+ unless allowed_protocols.include?(matches[1])
284
+ #data = '#'.substr(data, strlen(matches[1])+1)
285
+ data = '#' + data[0..matches[1].size+1]
286
+ end
287
+ end
288
+
289
+ return data
290
+ end
291
+
292
+ #
293
+ #
294
+ #
295
+
296
+ def process_remove_blanks(data)
297
+ data = data.dup
298
+
299
+ remove_blanks.each do |tag|
300
+ data.gsub!(/<#{tag}(\s[^>]*)?><\/#{tag}>/, '')
301
+ data.gsub!(/<#{tag}(\s[^>]*)?\/>/, '')
302
+ end
303
+
304
+ return data
305
+ end
306
+
307
+ #
308
+ #
309
+ #
310
+
311
+ def fix_case(data)
312
+ data_notags = strip_tags(data)
313
+ data_notags = data_notags.gsub(/[^a-zA-Z]/, '')
314
+
315
+ if data_notags.size < 5
316
+ return data
317
+ end
318
+
319
+ if /[a-z]/ =~ data_notags
320
+ return data
321
+ end
322
+
323
+ data = data.gsub(/(>|^)([^<]+?)(<|$)/s){
324
+ strip_single($1) +
325
+ fix_case_inner(strip_single($2)) +
326
+ strip_single($3)
327
+ }
328
+
329
+ return data
330
+ end
331
+
332
+ #
333
+ #
334
+ #
335
+
336
+ def fix_case_inner(data)
337
+ data = data.dup
338
+
339
+ data.downcase!
340
+
341
+ data.gsub!(/(^|[^\w\s\';,\\-])(\s*)([a-z])/){
342
+ strip_single("#{$1}#{$2}") + strip_single($3).upcase
343
+ }
344
+
345
+ return data
346
+ end
347
+
348
+ #
349
+ #
350
+ #
351
+
352
+ def validate_entities(data)
353
+ data = data.dup
354
+
355
+ # validate entities throughout the string
356
+ data.gsub!(%r!&([^&;]*)(?=(;|&|$))!){
357
+ check_entity(strip_single($1), strip_single($2))
358
+ }
359
+
360
+ # validate quotes outside of tags
361
+ data.gsub!(/(>|^)([^<]+?)(<|$)/s){
362
+ m1, m2, m3 = $1, $2, $3
363
+ strip_single(m1) +
364
+ strip_single(m2).gsub('\"', '&quot;') +
365
+ strip_single(m3)
366
+ }
367
+
368
+ return data
369
+ end
370
+
371
+ #
372
+ #
373
+ #
374
+
375
+ def check_entity(preamble, term)
376
+ if term != ';'
377
+ return '&amp;' + preamble
378
+ end
379
+
380
+ if is_valid_entity(preamble)
381
+ return '&' + preamble
382
+ end
383
+
384
+ return '&amp;' + preamble
385
+ end
386
+
387
+ #
388
+ #
389
+ #
390
+
391
+ def is_valid_entity(entity)
392
+ re = /^#([0-9]+)$/i
393
+
394
+ if md = re.match(entity)
395
+ if (md[1].to_i > 127)
396
+ return true
397
+ end
398
+ return allow_numbered_entities
399
+ end
400
+
401
+ if allowed_entities.include?(entity)
402
+ return true
403
+ end
404
+
405
+ return nil
406
+ end
407
+
408
+ # within attributes, we want to convert all hex/dec/url
409
+ # escape sequences into their raw characters so that we can
410
+ # check we don't get stray quotes/brackets inside strings.
411
+
412
+ def decode_entities(data)
413
+ data = data.dup
414
+
415
+ data.gsub!(/(&)#(\d+);?/){ decode_dec_entity($1, $2) }
416
+ data.gsub!(/(&)#x([0-9a-f]+);?/i){ decode_hex_entity($1, $2) }
417
+ data.gsub!(/(%)([0-9a-f]{2});?/i){ decode_hex_entity($1, $2) }
418
+
419
+ data = validate_entities(data)
420
+
421
+ return data
422
+ end
423
+
424
+ #
425
+ #
426
+ #
427
+
428
+ def decode_hex_entity(*m)
429
+ return decode_num_entity(m[1], m[2].to_i.to_s(16))
430
+ end
431
+
432
+ #
433
+ #
434
+ #
435
+
436
+ def decode_dec_entity(*m)
437
+ return decode_num_entity(m[1], m[2])
438
+ end
439
+
440
+ #
441
+ #
442
+ #
443
+
444
+ def decode_num_entity(orig_type, d)
445
+ d = d.to_i
446
+ d = 32 if d < 0 # space
447
+
448
+ # don't mess with high chars
449
+ if d > 127
450
+ return '%' + d.to_s(16) if orig_type == '%'
451
+ return "&#{d};" if orig_type == '&'
452
+ end
453
+
454
+ return escape(d.chr)
455
+ end
456
+
457
+ #
458
+ #
459
+ #
460
+
461
+ def strip_single(data)
462
+ return data.gsub('\"', '"').gsub('\0', 0.chr)
463
+ end
464
+
465
+ # Certain characters have special significance in HTML, and
466
+ # should be represented by HTML entities if they are to
467
+ # preserve their meanings. This function returns a string
468
+ # with some of these conversions made; the translations made
469
+ # are those most useful for everyday web programming.
470
+
471
+ def escape(html)
472
+ CGI.escape(html).gsub(/'/, '&#039;')
473
+ end
474
+
475
+ end
476
+
477
+ # Overload the standard String class for extra convienience.
478
+
479
+ class String
480
+ def html_filter
481
+ HtmlFilter.new.filter(self)
482
+ end
483
+ end
484
+
485
+
486
+ if $0 ==__FILE__
487
+
488
+ require 'test/unit'
489
+
490
+ class TestHtmlFilter < Test::Unit::TestCase
491
+
492
+ def test_strip_single
493
+ hf = HtmlFilter.new
494
+ assert_equal( '"', hf.send(:strip_single,'\"') )
495
+ assert_equal( "\000", hf.send(:strip_single,'\0') )
496
+ end
497
+
498
+ def assert_filter(filtered, original)
499
+ assert_equal(filtered, original.html_filter)
500
+ end
501
+
502
+ def test_fix_quotes
503
+ assert_filter '<img src="foo.jpg" />', "<img src=\"foo.jpg />"
504
+ end
505
+
506
+ def test_basics
507
+ assert_filter '', ''
508
+ assert_filter 'hello', 'hello'
509
+ end
510
+
511
+ def test_balancing_tags
512
+ assert_filter "<b>hello</b>", "<<b>hello</b>"
513
+ assert_filter "<b>hello</b>", "<b>>hello</b>"
514
+ assert_filter "<b>hello</b>", "<b>hello<</b>"
515
+ assert_filter "<b>hello</b>", "<b>hello</b>>"
516
+ assert_filter "", "<>"
517
+ end
518
+
519
+ def test_tag_completion
520
+ assert_filter "hello", "hello<b>"
521
+ assert_filter "<b>hello</b>", "<b>hello"
522
+ assert_filter "hello<b>world</b>", "hello<b>world"
523
+ assert_filter "hello", "hello</b>"
524
+ assert_filter "hello", "hello<b/>"
525
+ assert_filter "hello<b>world</b>", "hello<b/>world"
526
+ assert_filter "<b><b><b>hello</b></b></b>", "<b><b><b>hello"
527
+ assert_filter "", "</b><b>"
528
+ end
529
+
530
+ def test_end_slashes
531
+ assert_filter '<img />', '<img>'
532
+ assert_filter '<img />', '<img/>'
533
+ assert_filter '', '<b/></b>'
534
+ end
535
+
536
+ end
537
+
538
+ end