hermeneutics 1.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,261 @@
1
+ #
2
+ # hermeneutics/css.rb -- CSS generation
3
+ #
4
+
5
+ require "hermeneutics/html"
6
+
7
+
8
+ module Hermeneutics
9
+
10
+ # == Example
11
+ #
12
+ # require "hermeneutics/css"
13
+ # require "hermeneutics/color"
14
+ # class MyCss < Css
15
+ #
16
+ # COL1 = "904f02".to_rgb
17
+ # COL2 = COL1.edit_hsv { |h,s,v| [h+15,s,v] }
18
+ #
19
+ # ATTR_COL1 = { color: COL1 }
20
+ # ATTR_COL2 = { color: COL2 }
21
+ # ATTR_DECON = { text_decoration: "none" }
22
+ # ATTR_DECOU = { text_decoration: "underline" }
23
+ #
24
+ # def build
25
+ # a ":link", ATTR_COL1, ATTR_DECON
26
+ # a ":visited", ATTR_COL2, ATTR_DECON
27
+ # a ":active", ATTR_COL1, ATTR_DECON
28
+ # a ":focus", ATTR_COL1, ATTR_DECOU
29
+ # space
30
+ #
31
+ # body "#dummy" do
32
+ # properties :background_color => "f7f7f7".to_rgb
33
+ # div ".child", :background_color => "e7e7e7".to_rgb
34
+ # @b = selector
35
+ # td do
36
+ # @bt = selector
37
+ # end
38
+ # end
39
+ # selectors @b, @bt, :fon_size => :large
40
+ # end
41
+ # end
42
+ # Hermeneutics::Css.document
43
+ #
44
+ class Css
45
+
46
+ class <<self
47
+ attr_accessor :main
48
+ def inherited cls
49
+ Css.main = cls
50
+ end
51
+ def open out = nil
52
+ i = (@main||self).new
53
+ i.generate out do
54
+ yield i
55
+ end
56
+ end
57
+ def document *args, &block
58
+ open do |i|
59
+ i.document *args, &block
60
+ end
61
+ end
62
+ def write_file name = nil
63
+ name ||= (File.basename $0, ".rb") + ".css"
64
+ File.open name, "w" do |f|
65
+ open f do |i|
66
+ if block_given? then
67
+ yield i
68
+ else
69
+ i.document
70
+ end
71
+ end
72
+ end
73
+ end
74
+
75
+ end
76
+
77
+ def generate out = nil
78
+ o = @out
79
+ begin
80
+ @out = out||$stdout
81
+ yield
82
+ ensure
83
+ @out = o
84
+ end
85
+ end
86
+
87
+
88
+ class Selector
89
+ def initialize
90
+ @chain = []
91
+ end
92
+ def tag descend, name, sub
93
+ descend and @chain.empty? and
94
+ raise "Descendor without previous tag: #{descend} #{name}#{sub}."
95
+ c = []
96
+ c.push case descend
97
+ when ">", :child then "> "
98
+ when "+", :sibling then "+ "
99
+ when nil then
100
+ else
101
+ raise "Unknown descendor: #{descend}"
102
+ end
103
+ c.push name if name == "*" or Html::TAGS[ name]
104
+ if sub then
105
+ sub =~ /\A(?:
106
+ [:.#]([a-z_0-9-]+)|
107
+ \[([a-z0-9-]+)([~|]?=)(.*)\]
108
+ )*\z/ix or
109
+ raise "Improper tag specification: #{name}#{sub}."
110
+ c.push sub
111
+ end
112
+ @chain.push c
113
+ yield
114
+ ensure
115
+ @chain.pop
116
+ end
117
+ protected
118
+ def replace chain
119
+ @chain.replace chain
120
+ end
121
+ public
122
+ def dup
123
+ s = Selector.new
124
+ s.replace @chain
125
+ s
126
+ end
127
+ def to_s
128
+ @chain.map { |c| c.join }.join " "
129
+ end
130
+ end
131
+
132
+ def initialize
133
+ @selector = Selector.new
134
+ end
135
+
136
+ def document *args, &block
137
+ build *args, &block
138
+ end
139
+
140
+ def path
141
+ @out.path
142
+ rescue NoMethodError
143
+ end
144
+
145
+ def comment str
146
+ @out << "/*"
147
+ str = mask_comment str
148
+ ml = str =~ %r(#$/)
149
+ if ml then
150
+ @out << $/
151
+ str.each_line { |l|
152
+ l.chomp!
153
+ @out << " * " << l << $/
154
+ }
155
+ else
156
+ @out << " " << str
157
+ end
158
+ @out << " */"
159
+ ml and @out << $/
160
+ end
161
+
162
+ def space
163
+ @out << $/
164
+ end
165
+
166
+ def tag *args
167
+ p = []
168
+ while Hash === args.last do
169
+ p.unshift args.pop
170
+ end
171
+ @selector.tag *args do
172
+ if p.empty? then
173
+ yield
174
+ else
175
+ properties *p
176
+ end
177
+ end
178
+ end
179
+
180
+ # remove Kernel methods of the same name: :p, :select, :sub
181
+ m = Html::TAGS.keys & (private_instance_methods +
182
+ protected_instance_methods + instance_methods)
183
+ undef_method *m
184
+
185
+ def method_missing sym, *args, &block
186
+ if Html::TAGS[ sym] then
187
+ if args.any? and not Hash === args.first then
188
+ sub = args.shift
189
+ end
190
+ if args.any? and not Hash === args.first then
191
+ desc, sub = sub, args.shift
192
+ elsif sub !~ /[a-z]/i or Symbol === sub then
193
+ desc, sub = sub, nil
194
+ end
195
+ tag desc, sym, sub, *args, &block
196
+ else
197
+ super
198
+ end
199
+ end
200
+
201
+ def properties *args
202
+ write @selector.to_s, *args
203
+ end
204
+
205
+ def selector
206
+ @selector.dup
207
+ end
208
+
209
+ def selectors *args
210
+ s = []
211
+ while Selector === args.first do
212
+ s.push args.shift
213
+ end
214
+ t = s.join ", "
215
+ write t, *args
216
+ end
217
+
218
+ private
219
+
220
+ def mask_comment str
221
+ str.gsub /\*\//, "* /"
222
+ end
223
+
224
+ INDENT = " "
225
+
226
+ def write sel, *args
227
+ p = {}
228
+ args.each { |a| p.update a }
229
+ @out << sel << " {"
230
+ nl, ind = if p.size > 1 then
231
+ @out << $/
232
+ [ $/, INDENT]
233
+ else
234
+ [ " ", " "]
235
+ end
236
+ single p do |s|
237
+ @out << ind << s << nl
238
+ end
239
+ @out << "}" << $/
240
+ end
241
+
242
+ def single hash
243
+ if block_given? then
244
+ hash.map { |k,v|
245
+ if Symbol === k then k = k.new_string ; k.gsub! /_/, "-" end
246
+ if Array === v then v = v.join " " end
247
+ yield "#{k}: #{v};"
248
+ }
249
+ else
250
+ r = []
251
+ single hash do |s|
252
+ r.push s
253
+ end
254
+ r
255
+ end
256
+ end
257
+
258
+ end
259
+
260
+ end
261
+
@@ -0,0 +1,826 @@
1
+ # encoding: UTF-8
2
+
3
+ #
4
+ # hermeneutics/escape.rb -- Various encoding schemes for internet purposes
5
+ #
6
+
7
+ require "supplement"
8
+
9
+
10
+ =begin rdoc
11
+
12
+ :section: Classes definied here
13
+
14
+ Hermeneutics::Entities encodes to and decodes from HTML-Entities
15
+ (<code>&amp;</code> etc.)
16
+
17
+ Hermeneutics::URLText encodes to and decodes from URLs
18
+ (<code>%2d</code> etc.)
19
+
20
+ Hermeneutics::HeaderExt encodes to and decodes from E-Mail Header fields
21
+ (<code>=?UTF-8?Q?=C3=B6?=</code> etc.).
22
+
23
+ =end
24
+
25
+ module Hermeneutics
26
+
27
+ # Translate HTML and XML character entities: <code>"&"</code> to
28
+ # <code>"&amp;"</code> and vice versa.
29
+ #
30
+ # == What actually happens
31
+ #
32
+ # HTML pages usually come in with characters encoded <code>&lt;</code>
33
+ # for <code><</code> and <code>&euro;</code> for <code>€</code>.
34
+ #
35
+ # Further, they may contain a meta tag in the header like this:
36
+ #
37
+ # <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
38
+ # <meta charset="utf-8" /> (HTML5)
39
+ #
40
+ # or
41
+ #
42
+ # <?xml version="1.0" encoding="UTF-8" ?> (XHTML)
43
+ #
44
+ # When +charset+ is <code>utf-8</code> and the file contains the byte
45
+ # sequence <code>"\303\244"</code>/<code>"\xc3\xa4"</code> then there will
46
+ # be displayed a character <code>"ä"</code>.
47
+ #
48
+ # When +charset+ is <code>iso8859-15</code> and the file contains the byte
49
+ # sequence <code>"\344"</code>/<code>"\xe4"</code> then there will be
50
+ # displayed a character <code>"ä"</code>, too.
51
+ #
52
+ # The sequence <code>"&auml;"</code> will produce an <code>"ä"</code> in any
53
+ # case.
54
+ #
55
+ # == What you should do
56
+ #
57
+ # Generating your own HTML pages you will always be safe when you only
58
+ # produce entity tags as <code>&auml;</code> and <code>&euro;</code> or
59
+ # <code>&#x00e4;</code> and <code>&#x20ac;</code> respectively.
60
+ #
61
+ # == What this module does
62
+ #
63
+ # This module translates strings to a HTML-masked version. The encoding will
64
+ # not be changed and you may demand to keep 8-bit-characters.
65
+ #
66
+ # == Examples
67
+ #
68
+ # Entities.encode "<" #=> "&lt;"
69
+ # Entities.decode "&lt;" #=> "<"
70
+ # Entities.encode "äöü" #=> "&auml;&ouml;&uuml;"
71
+ # Entities.decode "&auml;&ouml;&uuml;" #=> "äöü"
72
+ #
73
+ class Entities
74
+
75
+ # :stopdoc:
76
+ SPECIAL_ASC = {
77
+ '"' => "quot", "&" => "amp", "<" => "lt", ">" => "gt",
78
+ }
79
+ RE_ASC = /[#{SPECIAL_ASC.keys.map { |x| Regexp.quote x }.join}]/
80
+
81
+ SPECIAL = {
82
+ "\u00a0" => "nbsp",
83
+ "¡" => "iexcl", "¢" => "cent", "£" => "pound", "€" => "euro", "¥" => "yen", "Š" => "Scaron",
84
+ "¤" => "curren", "¦" => "brvbar",
85
+ "§" => "sect", "š" => "scaron", "©" => "copy", "ª" => "ordf", "«" => "laquo", "¬" => "not", "­" => "shy",
86
+ "¨" => "uml",
87
+ "®" => "reg", "¯" => "macr",
88
+
89
+ "°" => "deg", "±" => "plusmn", "²" => "sup2", "³" => "sup3", "µ" => "micro", "¶" => "para",
90
+ "´" => "acute",
91
+ "·" => "middot", "¹" => "sup1", "º" => "ordm", "»" => "raquo", "Œ" => "OElig", "œ" => "oelig",
92
+ "¸" => "cedil", "¼" => "frac14", "½" => "frac12",
93
+ "Ÿ" => "Yuml", "¿" => "iquest",
94
+ "¾" => "frac34",
95
+
96
+ "À" => "Agrave", "Á" => "Aacute", "Â" => "Acirc", "Ã" => "Atilde", "Ä" => "Auml", "Å" => "Aring", "Æ" => "AElig",
97
+ "Ç" => "Ccedil", "È" => "Egrave", "É" => "Eacute", "Ê" => "Ecirc", "Ë" => "Euml", "Ì" => "Igrave", "Í" => "Iacute",
98
+ "Î" => "Icirc", "Ï" => "Iuml",
99
+ "Ð" => "ETH", "Ñ" => "Ntilde", "Ò" => "Ograve", "Ó" => "Oacute", "Ô" => "Ocirc", "Õ" => "Otilde", "Ö" => "Ouml",
100
+ "×" => "times", "Ø" => "Oslash", "Ù" => "Ugrave", "Ú" => "Uacute", "Û" => "Ucirc", "Ü" => "Uuml", "Ý" => "Yacute",
101
+ "Þ" => "THORN", "ß" => "szlig",
102
+
103
+ "à" => "agrave", "á" => "aacute", "â" => "acirc", "ã" => "atilde", "ä" => "auml", "å" => "aring", "æ" => "aelig",
104
+ "ç" => "ccedil", "è" => "egrave", "é" => "eacute", "ê" => "ecirc", "ë" => "euml", "ì" => "igrave", "í" => "iacute",
105
+ "î" => "icirc", "ï" => "iuml",
106
+ "ð" => "eth", "ñ" => "ntilde", "ò" => "ograve", "ó" => "oacute", "ô" => "ocirc", "õ" => "otilde", "ö" => "ouml",
107
+ "÷" => "divide", "ø" => "oslash", "ù" => "ugrave", "ú" => "uacute", "û" => "ucirc", "ü" => "uuml", "ý" => "yacute",
108
+ "þ" => "thorn", "ÿ" => "yuml",
109
+
110
+ "‚" => "bsquo", "‘" => "lsquo", "„" => "bdquo", "“" => "ldquo", "‹" => "lsaquo", "›" => "rsaquo",
111
+ "–" => "ndash", "—" => "mdash", "‰" => "permil", "…" => "hellip", "†" => "dagger", "‡" => "Dagger",
112
+ }.update SPECIAL_ASC
113
+ NAMES = SPECIAL.invert
114
+ # :startdoc:
115
+
116
+ attr_accessor :keep_8bit
117
+
118
+ # :call-seq:
119
+ # new( keep_8bit = nil) -> ent
120
+ # new( :keep_8bit => val) -> ent
121
+ #
122
+ # Creates an <code>Entities</code> converter.
123
+ #
124
+ # The parameter may be given as one value or as a hash.
125
+ #
126
+ # ent = Entities.new true
127
+ # ent = Entities.new :keep_8bit => true
128
+ #
129
+ def initialize keep_8bit = nil
130
+ @keep_8bit = case keep_8bit
131
+ when Hash then keep_8bit[ :keep_8bit]
132
+ else keep_8bit
133
+ end
134
+ end
135
+
136
+ # :call-seq:
137
+ # ent.encode( str) -> str
138
+ #
139
+ # Create a string thats characters are masked the HTML style:
140
+ #
141
+ # ent = Entities.new
142
+ # ent.encode "&<\"" #=> "&amp;&lt;&quot;"
143
+ # ent.encode "äöü" #=> "&auml;&ouml;&uuml;"
144
+ #
145
+ # The result will be in the same encoding as the source even if it will
146
+ # not contain any 8-bit characters (what can only happen when +keep_8bit+
147
+ # is set).
148
+ #
149
+ # ent = Entities.new true
150
+ #
151
+ # uml = "<ä>".encode "UTF-8"
152
+ # ent.encode uml #=> "&lt;\xc3\xa4&gt;" in UTF-8
153
+ #
154
+ # uml = "<ä>".encode "ISO-8859-1"
155
+ # ent.encode uml #=> "&lt;\xe4&gt;" in ISO-8859-1
156
+ #
157
+ def encode str
158
+ r = str.new_string
159
+ r.gsub! RE_ASC do |x| "&#{SPECIAL_ASC[ x]};" end
160
+ unless @keep_8bit then
161
+ r.gsub! /[^\0-\x7f]/ do |c|
162
+ c.encode! __ENCODING__
163
+ s = SPECIAL[ c] || ("#x%04x" % c.ord)
164
+ "&#{s};"
165
+ end
166
+ end
167
+ r
168
+ end
169
+
170
+ def decode str
171
+ self.class.decode str
172
+ end
173
+
174
+ public
175
+
176
+ class <<self
177
+
178
+ def std
179
+ @std ||= new
180
+ end
181
+
182
+ def encode str
183
+ std.encode str
184
+ end
185
+
186
+ # :call-seq:
187
+ # Entities.decode( str) -> str
188
+ #
189
+ # Replace HTML-style masks by normal characters:
190
+ #
191
+ # Entities.decode "&lt;" #=> "<"
192
+ # Entities.decode "&auml;&ouml;&uuml;" #=> "äöü"
193
+ #
194
+ # Unmasked 8-bit-characters (<code>"ä"</code> instead of
195
+ # <code>"&auml;"</code>) will be kept but translated to
196
+ # a unique encoding.
197
+ #
198
+ # s = "ä &ouml; ü"
199
+ # s.encode! "utf-8"
200
+ # Entities.decode s #=> "ä ö ü"
201
+ #
202
+ # s = "\xe4 &ouml; \xfc &#x20ac;"
203
+ # s.force_encoding "iso-8859-15"
204
+ # Entities.decode s #=> "ä ö ü €"
205
+ # (in iso8859-15)
206
+ #
207
+ def decode str
208
+ str.gsub /&(.+?);/ do
209
+ (named_decode $1) or (numeric_decode $1) or $&
210
+ end
211
+ end
212
+
213
+ private
214
+
215
+ def named_decode s
216
+ c = NAMES[ s]
217
+ if c then
218
+ if c.encoding != s.encoding then
219
+ c.encode s.encoding
220
+ else
221
+ c
222
+ end
223
+ end
224
+ end
225
+
226
+ def numeric_decode s
227
+ if s =~ /\A#(?:(\d+)|x([0-9a-f]+))\z/i then
228
+ c = ($1 ? $1.to_i : ($2.to_i 0x10)).chr Encoding::UTF_8
229
+ c.encode! s.encoding
230
+ end
231
+ end
232
+
233
+ end
234
+
235
+ end
236
+
237
+
238
+
239
+ # URL-able representation
240
+ #
241
+ # == What's acually happening
242
+ #
243
+ # URLs may not contain spaces and serveral character as slashes, ampersands
244
+ # etc. These characters will be masked by a percent sign and two hex digits
245
+ # representing the ASCII code. Eight bit characters should be masked the
246
+ # same way.
247
+ #
248
+ # An URL line does not store encoding information by itself. A locator may
249
+ # either say one of these:
250
+ #
251
+ # http://www.example.com/subdir/index.html?umlfield=%C3%BCber+alles
252
+ # http://www.example.com/subdir/index.html?umlfield=%FCber+alles
253
+ #
254
+ # The reading CGI has to decide on itself how to treat it.
255
+ #
256
+ # == Examples
257
+ #
258
+ # URLText.encode "'Stop!' said Fred." #=> "%27Stop%21%27+said+Fred."
259
+ # URLText.decode "%27Stop%21%27+said+Fred%2e"
260
+ # #=> "'Stop!' said Fred."
261
+ #
262
+ class URLText
263
+
264
+ attr_accessor :keep_8bit, :keep_space, :mask_space
265
+
266
+ # :call-seq:
267
+ # new( hash) -> urltext
268
+ #
269
+ # Creates a <code>URLText</code> converter.
270
+ #
271
+ # The parameters may be given as values or as a hash.
272
+ #
273
+ # utx = URLText.new :keep_8bit => true, :keep_space => false
274
+ #
275
+ # See the +encode+ method for an explanation of these parameters.
276
+ #
277
+ def initialize hash = nil
278
+ if hash then
279
+ @keep_8bit = hash[ :keep_8bit ]
280
+ @keep_space = hash[ :keep_space]
281
+ @mask_space = hash[ :mask_space]
282
+ end
283
+ end
284
+
285
+ # :call-seq:
286
+ # encode( str) -> str
287
+ #
288
+ # Create a string that contains <code>%XX</code>-encoded bytes.
289
+ #
290
+ # utx = URLText.new
291
+ # utx.encode "'Stop!' said Fred." #=> "%27Stop%21%27+said+Fred."
292
+ #
293
+ # The result will not contain any 8-bit characters, except when
294
+ # +keep_8bit+ is set. The result will be in the same encoding as the
295
+ # argument although this normally has no meaning.
296
+ #
297
+ # utx = URLText.new :keep_8bit => true
298
+ # s = "< ä >".encode "UTF-8"
299
+ # utx.encode s #=> "%3C+\u{e4}+%3E" in UTF-8
300
+ #
301
+ # s = "< ä >".encode "ISO-8859-1"
302
+ # utx.encode s #=> "%3C+\xe4+%3E" in ISO-8859-1
303
+ #
304
+ # A space <code>" "</code> will not be replaced by a plus <code>"+"</code>
305
+ # if +keep_space+ is set.
306
+ #
307
+ # utx = URLText.new :keep_space => true
308
+ # s = "< x >"
309
+ # utx.encode s #=> "%3C x %3E"
310
+ #
311
+ # When +mask_space+ is set, then a space will be represented as
312
+ # <code>"%20"</code>,
313
+ #
314
+ def encode str
315
+ r = str.new_string
316
+ r.force_encoding Encoding::ASCII_8BIT unless @keep_8bit
317
+ r.gsub! %r/([^a-zA-Z0-9_.-])/ do |c|
318
+ if c == " " and not @mask_space then
319
+ @keep_space ? c : "+"
320
+ elsif not @keep_8bit or c.ascii_only? then
321
+ "%%%02X" % c.ord
322
+ else
323
+ c
324
+ end
325
+ end
326
+ r.encode! str.encoding
327
+ end
328
+
329
+
330
+ class Dict < Hash
331
+ class <<self
332
+ def create
333
+ i = new
334
+ yield i
335
+ i
336
+ end
337
+ end
338
+ def initialize
339
+ super
340
+ yield self if block_given?
341
+ end
342
+ def [] key
343
+ super key.to_sym
344
+ end
345
+ def []= key, val
346
+ super key.to_sym, val
347
+ end
348
+ def update hash
349
+ hash.each { |k,v| self[ k] = v }
350
+ end
351
+ alias merge! update
352
+ def parse key, val
353
+ self[ key] = case val
354
+ when nil then nil
355
+ when /\A(?:[+-]?[1-9][0-9]{,9}|0)\z/ then val.to_i
356
+ else val.to_s.notempty?
357
+ end
358
+ end
359
+ def method_missing sym, *args
360
+ if args.empty? and not sym =~ /[!?=]\z/ then
361
+ self[ sym]
362
+ else
363
+ first, *rest = args
364
+ if rest.empty? and sym =~ /=\z/ then
365
+ self[ sym] = first
366
+ else
367
+ super
368
+ end
369
+ end
370
+ end
371
+ end
372
+
373
+ # :stopdoc:
374
+ PAIR_SET = "="
375
+ PAIR_SEP = "&"
376
+ # :startdoc:
377
+
378
+ # :call-seq:
379
+ # encode_hash( hash) -> str
380
+ #
381
+ # Encode a <code>Hash</code> to a URL-style string.
382
+ #
383
+ # utx = URLText.new
384
+ #
385
+ # h = { :name => "John Doe", :age => 42 }
386
+ # utx.encode_hash h
387
+ # #=> "name=John+Doe&age=42"
388
+ #
389
+ # h = { :a => ";;;", :x => "äöü" }
390
+ # utx.encode_hash h
391
+ # #=> "a=%3B%3B%3B&x=%C3%A4%C3%B6%C3%BC"
392
+ #
393
+ def encode_hash hash
394
+ hash.map { |(k,v)|
395
+ case v
396
+ when nil then next
397
+ when true then v = k
398
+ when false then v = ""
399
+ end
400
+ [k, v].map { |x| encode x.to_s }.join PAIR_SET
401
+ }.compact.join PAIR_SEP
402
+ end
403
+
404
+ # :call-seq:
405
+ # mkurl( path, hash, anchor = nil) -> str
406
+ #
407
+ # Make an URL.
408
+ #
409
+ # utx = URLText.new
410
+ # h = { :name => "John Doe", :age => "42" }
411
+ # utx.encode_hash "myscript.rb", h, "chapter"
412
+ # #=> "myscript.rb?name=John+Doe&age=42#chapter"
413
+ #
414
+ def mkurl path, hash = nil, anchor = nil
415
+ unless Hash === hash then
416
+ hash, anchor = anchor, hash
417
+ end
418
+ r = "#{path}"
419
+ r << "?#{encode_hash hash}" if hash
420
+ r << "##{anchor}" if anchor
421
+ r
422
+ end
423
+
424
+ public
425
+
426
+ def decode str
427
+ self.class.decode str
428
+ end
429
+
430
+ def decode_hash qstr, &block
431
+ self.class.decode_hash qstr, &block
432
+ end
433
+
434
+ class <<self
435
+
436
+ def std
437
+ @std ||= new
438
+ end
439
+
440
+ def encode str
441
+ std.encode str
442
+ end
443
+
444
+ def encode_hash hash
445
+ std.encode_hash hash
446
+ end
447
+
448
+ def mkurl path, hash, anchor = nil
449
+ std.mkurl path, hash, anchor
450
+ end
451
+
452
+ # :call-seq:
453
+ # decode( str) -> str
454
+ # decode( str, encoding) -> str
455
+ #
456
+ # Decode the contained string.
457
+ #
458
+ # utx = URLText.new
459
+ # utx.decode "%27Stop%21%27+said+Fred%2e" #=> "'Stop!' said Fred."
460
+ #
461
+ # The encoding will be kept. That means that an invalidly encoded
462
+ # string could be produced.
463
+ #
464
+ # a = "bl%F6d"
465
+ # a.encode! "utf-8"
466
+ # d = utx.decode a
467
+ # d =~ /./ #=> "invalid byte sequence in UTF-8 (ArgumentError)"
468
+ #
469
+ def decode str
470
+ r = str.new_string
471
+ r.tr! "+", " "
472
+ r.gsub! /(?:%([0-9A-F]{2}))/i do $1.hex.chr end
473
+ r.force_encoding str.encoding
474
+ r
475
+ end
476
+
477
+ # :call-seq:
478
+ # decode_hash( str) -> hash
479
+ # decode_hash( str) { |key,val| ... } -> nil or int
480
+ #
481
+ # Decode a URL-style encoded string to a <code>Hash</code>.
482
+ # In case a block is given, the number of key-value pairs is returned.
483
+ #
484
+ # str = "a=%3B%3B%3B&x=%26auml%3B%26ouml%3B%26uuml%3B"
485
+ # URLText.decode_hash str do |k,v|
486
+ # puts "#{k} = #{v}"
487
+ # end
488
+ #
489
+ # Output:
490
+ #
491
+ # a = ;;;
492
+ # x = äöü
493
+ #
494
+ def decode_hash qstr
495
+ if block_given? then
496
+ i = 0
497
+ each_pair qstr do |k,v|
498
+ yield k, v
499
+ i += 1
500
+ end
501
+ i.nonzero?
502
+ else
503
+ Dict.create do |h|
504
+ each_pair qstr do |k,v| h.parse k, v end
505
+ end
506
+ end
507
+ end
508
+
509
+ private
510
+
511
+ def each_pair qstr
512
+ qstr or return
513
+ h = qstr.to_s.split PAIR_SEP
514
+ h.each do |pair|
515
+ kv = pair.split PAIR_SET, 2
516
+ kv.map! { |x| decode x if x }
517
+ yield *kv
518
+ end
519
+ end
520
+
521
+ end
522
+
523
+ end
524
+
525
+ # Header field contents (RFC 2047) encoding
526
+ #
527
+ # == Examples
528
+ #
529
+ # HeaderExt.encode "Jörg Müller"
530
+ # #=> "=?utf-8?Q?J=C3=B6rg_M=C3=BCller?="
531
+ # HeaderExt.decode "=?UTF-8?Q?J=C3=B6rg_M=C3=BCller?="
532
+ # #=> "Jörg Müller"
533
+ #
534
+ class HeaderExt
535
+
536
+ # :call-seq:
537
+ # new( [ parameters] ) -> con
538
+ #
539
+ # Creates a <code>HeaderExt</code> converter.
540
+ #
541
+ # See the +encode+ method for an explanation of the parameters.
542
+ #
543
+ # == Examples
544
+ #
545
+ # con = HeaderExt.new
546
+ # con = HeaderExt.new :base64 => true, :limit => 32, :lower => true
547
+ # con = HeaderExt.new :mask => /["'()]/
548
+ #
549
+ def initialize params = nil
550
+ if params then
551
+ @base64 = params.delete :base64
552
+ @limit = params.delete :limit
553
+ @lower = params.delete :lower
554
+ @mask = params.delete :mask
555
+ params.empty? or
556
+ raise ArgumentError, "invalid parameter: #{params.keys.first}."
557
+ end
558
+ end
559
+
560
+ # :call-seq:
561
+ # needs? str -> true or false
562
+ #
563
+ # Check whether a string needs encoding.
564
+ #
565
+ def needs? str
566
+ (not str.ascii_only? or str =~ @mask) and true or false
567
+ end
568
+
569
+ # :call-seq:
570
+ # encode( str) -> str
571
+ #
572
+ # Create a header field style encoded string. The following parameters
573
+ # will be evaluated:
574
+ #
575
+ # :base64 # build ?B? instead of ?Q?
576
+ # :limit # break words longer than this
577
+ # :lower # build lower case ?b? and ?q?
578
+ # :mask # a regular expression detecting characters to mask
579
+ #
580
+ # The result will not contain any 8-bit characters. The encoding will
581
+ # be kept although it won't have a meaning.
582
+ #
583
+ # The parameter <code>:mask</code> will have no influence on the masking
584
+ # itself but will guarantee characters to be masked.
585
+ #
586
+ # == Examples
587
+ #
588
+ # yodel = "Holleri du dödl di, diri diri dudl dö."
589
+ #
590
+ # con = HeaderExt.new
591
+ # con.encode yodel
592
+ # #=> "Holleri du =?UTF-8?Q?d=C3=B6dl?= di, diri diri dudl =?UTF-8?Q?d=C3=B6=2E?="
593
+ #
594
+ # yodel.encode! "iso8859-1"
595
+ # con.encode yodel
596
+ # #=> "Holleri du =?ISO8859-1?Q?d=F6dl?= di, diri diri dudl =?ISO8859-1?Q?d=F6=2E?="
597
+ #
598
+ # e = "€"
599
+ # e.encode! "utf-8" ; con.encode e #=> "=?UTF-8?Q?=E2=82=AC?="
600
+ # e.encode! "iso8859-15" ; con.encode e #=> "=?ISO8859-15?Q?=A4?="
601
+ # e.encode! "ms-ansi" ; con.encode e #=> "=?MS-ANSI?Q?=80?="
602
+ #
603
+ # con = HeaderExt.new :mask => /["'()]/
604
+ # con.encode "'Stop!' said Fred."
605
+ # #=> "=?UTF-8?Q?=27Stop=21=27?= said Fred."
606
+ #
607
+ def encode str
608
+ do_encoding str do
609
+ # I don't like this kind of programming style but it seems to work. BS
610
+ r, enc = "", ""
611
+ while str =~ /\S+/ do
612
+ if needs? $& then
613
+ (enc.notempty? || r) << $`
614
+ enc << $&
615
+ else
616
+ if not enc.empty? then
617
+ r << (mask enc)
618
+ enc.clear
619
+ end
620
+ r << $` << $&
621
+ end
622
+ str = $'
623
+ end
624
+ if not enc.empty? then
625
+ enc << str
626
+ r << (mask enc)
627
+ else
628
+ r << str
629
+ end
630
+ r
631
+ end
632
+ end
633
+
634
+ # :call-seq:
635
+ # encode_whole( str) -> str
636
+ #
637
+ # The unlike +encode+ the whole string as one piece will be encoded.
638
+ #
639
+ # yodel = "Holleri du dödl di, diri diri dudl dö."
640
+ # HeaderExt.encode_whole yodel
641
+ # #=> "=?UTF-8?Q?Holleri_du_d=C3=B6dl_di,_diri_diri_dudl_d=C3=B6=2E?="
642
+ #
643
+ def encode_whole str
644
+ do_encoding str do
645
+ mask str
646
+ end
647
+ end
648
+
649
+ private
650
+
651
+ def do_encoding str
652
+ @charset = str.encoding
653
+ @type, @encoder = @base64 ? [ "B", :base64] : [ "Q", :quopri ]
654
+ if @lower then
655
+ @charset.downcase!
656
+ @type.downcase!
657
+ end
658
+ yield.force_encoding str.encoding
659
+ ensure
660
+ @charset = @type = @encoder = nil
661
+ end
662
+
663
+ # :stopdoc:
664
+ SPACE = " "
665
+ # :startdoc:
666
+
667
+ def mask str
668
+ r, i = [], 0
669
+ while i < str.length do
670
+ l = @limit||str.length
671
+ r.push "=?#@charset?#@type?#{send @encoder, str[ i, l]}?="
672
+ i += l
673
+ end
674
+ r.join SPACE
675
+ end
676
+
677
+ def base64 c
678
+ c = [c].pack "m*"
679
+ c.gsub! /\s/, ""
680
+ c
681
+ end
682
+
683
+ def quopri c
684
+ c.force_encoding Encoding::ASCII_8BIT
685
+ c.gsub! /([^ a-zA-Z0-9])/ do |s| "=%02X" % s.ord end
686
+ c.tr! " ", "_"
687
+ c
688
+ end
689
+
690
+ public
691
+
692
+ def decode str
693
+ self.class.decode str
694
+ end
695
+
696
+ class <<self
697
+
698
+ # The standard header content encoding has a word break limit of 64.
699
+ #
700
+ def std
701
+ @std ||= new :limit => 64
702
+ end
703
+
704
+ # :call-seq:
705
+ # needs? str -> true or false
706
+ #
707
+ # Use the standard content encoding.
708
+ #
709
+ def needs? str
710
+ std.needs? str
711
+ end
712
+
713
+ # :call-seq:
714
+ # encode( str) -> str
715
+ #
716
+ # Use the standard content encoding.
717
+ #
718
+ def encode str
719
+ std.encode str
720
+ end
721
+
722
+ # :call-seq:
723
+ # encode_whole( str) -> str
724
+ #
725
+ # Use the standard content encoding.
726
+ #
727
+ def encode_whole str
728
+ std.encode_whole str
729
+ end
730
+
731
+ # :call-seq:
732
+ # decode( str) -> str
733
+ #
734
+ # Remove header field style escapes.
735
+ #
736
+ # HeaderExt.decode "=?UTF-8?Q?J=C3=B6rg_M=C3=BCller?="
737
+ # #=> "Jörg Müller"
738
+ #
739
+ def decode str
740
+ r, e = [], []
741
+ v, l = nil, nil
742
+ lexer str do |type,piece|
743
+ case type
744
+ when :decoded then
745
+ e.push piece.encoding
746
+ if l == :space and (v == :decoded or not v) then
747
+ r.pop
748
+ elsif l == :plain then
749
+ r.push SPACE
750
+ end
751
+ when :space then
752
+ nil
753
+ when :plain then
754
+ if l == :decoded then
755
+ r.push SPACE
756
+ end
757
+ end
758
+ r.push piece
759
+ v, l = l, type
760
+ end
761
+ if l == :space and v == :decoded then
762
+ r.pop
763
+ end
764
+ e.uniq!
765
+ begin
766
+ r.join
767
+ rescue EncodingError
768
+ raise if e.empty?
769
+ f = e.shift
770
+ r.each { |x| x.encode! f }
771
+ retry
772
+ end
773
+ end
774
+
775
+ def lexer str
776
+ while str do
777
+ str =~ /(\s+)|\B=\?(\S*?)\?([QB])\?(\S*?)\?=\B/i
778
+ if $1 then
779
+ yield :plain, $` unless $`.empty?
780
+ yield :space, $&
781
+ elsif $2 then
782
+ yield :plain, $` unless $`.empty?
783
+ d = unmask $2, $3, $4
784
+ yield :decoded, d
785
+ else
786
+ yield :plain, str
787
+ end
788
+ str = $'.notempty?
789
+ end
790
+ end
791
+
792
+ private
793
+
794
+ def unmask cs, tp, txt
795
+ case tp.upcase
796
+ when "B" then txt, = txt.unpack "m*"
797
+ when "Q" then txt.tr! "_", " " ; txt, = txt.unpack "M*"
798
+ end
799
+ cs.slice! /\*\w+\z/ # language as in rfc2231, 5.
800
+ case cs
801
+ when /\Autf-?7\z/i then
802
+ # Ruby doesn't seem to do that.
803
+ txt.force_encoding Encoding::US_ASCII
804
+ txt.gsub! /\+([0-9a-zA-Z+\/]*)-?/ do
805
+ if $1.empty? then
806
+ "+"
807
+ else
808
+ s = ("#$1==".unpack "m*").join
809
+ (s.unpack "S>*").map { |x| x.chr Encoding::UTF_8 }.join
810
+ end
811
+ end
812
+ txt.force_encoding Encoding::UTF_8
813
+ when /\Aunknown/i then
814
+ txt.force_encoding Encoding::US_ASCII
815
+ else
816
+ txt.force_encoding cs
817
+ end
818
+ txt
819
+ end
820
+
821
+ end
822
+
823
+ end
824
+
825
+ end
826
+