hermeneutics 1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,261 @@
1
+ #
2
+ # hermeneutics/css.rb -- CSS generation
3
+ #
4
+
5
+ require "hermeneutics/html"
6
+
7
+
8
+ module Hermeneutics
9
+
10
+ # == Example
11
+ #
12
+ # require "hermeneutics/css"
13
+ # require "hermeneutics/color"
14
+ # class MyCss < Css
15
+ #
16
+ # COL1 = "904f02".to_rgb
17
+ # COL2 = COL1.edit_hsv { |h,s,v| [h+15,s,v] }
18
+ #
19
+ # ATTR_COL1 = { color: COL1 }
20
+ # ATTR_COL2 = { color: COL2 }
21
+ # ATTR_DECON = { text_decoration: "none" }
22
+ # ATTR_DECOU = { text_decoration: "underline" }
23
+ #
24
+ # def build
25
+ # a ":link", ATTR_COL1, ATTR_DECON
26
+ # a ":visited", ATTR_COL2, ATTR_DECON
27
+ # a ":active", ATTR_COL1, ATTR_DECON
28
+ # a ":focus", ATTR_COL1, ATTR_DECOU
29
+ # space
30
+ #
31
+ # body "#dummy" do
32
+ # properties :background_color => "f7f7f7".to_rgb
33
+ # div ".child", :background_color => "e7e7e7".to_rgb
34
+ # @b = selector
35
+ # td do
36
+ # @bt = selector
37
+ # end
38
+ # end
39
+ # selectors @b, @bt, :fon_size => :large
40
+ # end
41
+ # end
42
+ # Hermeneutics::Css.document
43
+ #
44
+ class Css
45
+
46
+ class <<self
47
+ attr_accessor :main
48
+ def inherited cls
49
+ Css.main = cls
50
+ end
51
+ def open out = nil
52
+ i = (@main||self).new
53
+ i.generate out do
54
+ yield i
55
+ end
56
+ end
57
+ def document *args, &block
58
+ open do |i|
59
+ i.document *args, &block
60
+ end
61
+ end
62
+ def write_file name = nil
63
+ name ||= (File.basename $0, ".rb") + ".css"
64
+ File.open name, "w" do |f|
65
+ open f do |i|
66
+ if block_given? then
67
+ yield i
68
+ else
69
+ i.document
70
+ end
71
+ end
72
+ end
73
+ end
74
+
75
+ end
76
+
77
+ def generate out = nil
78
+ o = @out
79
+ begin
80
+ @out = out||$stdout
81
+ yield
82
+ ensure
83
+ @out = o
84
+ end
85
+ end
86
+
87
+
88
+ class Selector
89
+ def initialize
90
+ @chain = []
91
+ end
92
+ def tag descend, name, sub
93
+ descend and @chain.empty? and
94
+ raise "Descendor without previous tag: #{descend} #{name}#{sub}."
95
+ c = []
96
+ c.push case descend
97
+ when ">", :child then "> "
98
+ when "+", :sibling then "+ "
99
+ when nil then
100
+ else
101
+ raise "Unknown descendor: #{descend}"
102
+ end
103
+ c.push name if name == "*" or Html::TAGS[ name]
104
+ if sub then
105
+ sub =~ /\A(?:
106
+ [:.#]([a-z_0-9-]+)|
107
+ \[([a-z0-9-]+)([~|]?=)(.*)\]
108
+ )*\z/ix or
109
+ raise "Improper tag specification: #{name}#{sub}."
110
+ c.push sub
111
+ end
112
+ @chain.push c
113
+ yield
114
+ ensure
115
+ @chain.pop
116
+ end
117
+ protected
118
+ def replace chain
119
+ @chain.replace chain
120
+ end
121
+ public
122
+ def dup
123
+ s = Selector.new
124
+ s.replace @chain
125
+ s
126
+ end
127
+ def to_s
128
+ @chain.map { |c| c.join }.join " "
129
+ end
130
+ end
131
+
132
+ def initialize
133
+ @selector = Selector.new
134
+ end
135
+
136
+ def document *args, &block
137
+ build *args, &block
138
+ end
139
+
140
+ def path
141
+ @out.path
142
+ rescue NoMethodError
143
+ end
144
+
145
+ def comment str
146
+ @out << "/*"
147
+ str = mask_comment str
148
+ ml = str =~ %r(#$/)
149
+ if ml then
150
+ @out << $/
151
+ str.each_line { |l|
152
+ l.chomp!
153
+ @out << " * " << l << $/
154
+ }
155
+ else
156
+ @out << " " << str
157
+ end
158
+ @out << " */"
159
+ ml and @out << $/
160
+ end
161
+
162
+ def space
163
+ @out << $/
164
+ end
165
+
166
+ def tag *args
167
+ p = []
168
+ while Hash === args.last do
169
+ p.unshift args.pop
170
+ end
171
+ @selector.tag *args do
172
+ if p.empty? then
173
+ yield
174
+ else
175
+ properties *p
176
+ end
177
+ end
178
+ end
179
+
180
+ # remove Kernel methods of the same name: :p, :select, :sub
181
+ m = Html::TAGS.keys & (private_instance_methods +
182
+ protected_instance_methods + instance_methods)
183
+ undef_method *m
184
+
185
+ def method_missing sym, *args, &block
186
+ if Html::TAGS[ sym] then
187
+ if args.any? and not Hash === args.first then
188
+ sub = args.shift
189
+ end
190
+ if args.any? and not Hash === args.first then
191
+ desc, sub = sub, args.shift
192
+ elsif sub !~ /[a-z]/i or Symbol === sub then
193
+ desc, sub = sub, nil
194
+ end
195
+ tag desc, sym, sub, *args, &block
196
+ else
197
+ super
198
+ end
199
+ end
200
+
201
+ def properties *args
202
+ write @selector.to_s, *args
203
+ end
204
+
205
+ def selector
206
+ @selector.dup
207
+ end
208
+
209
+ def selectors *args
210
+ s = []
211
+ while Selector === args.first do
212
+ s.push args.shift
213
+ end
214
+ t = s.join ", "
215
+ write t, *args
216
+ end
217
+
218
+ private
219
+
220
+ def mask_comment str
221
+ str.gsub /\*\//, "* /"
222
+ end
223
+
224
+ INDENT = " "
225
+
226
+ def write sel, *args
227
+ p = {}
228
+ args.each { |a| p.update a }
229
+ @out << sel << " {"
230
+ nl, ind = if p.size > 1 then
231
+ @out << $/
232
+ [ $/, INDENT]
233
+ else
234
+ [ " ", " "]
235
+ end
236
+ single p do |s|
237
+ @out << ind << s << nl
238
+ end
239
+ @out << "}" << $/
240
+ end
241
+
242
+ def single hash
243
+ if block_given? then
244
+ hash.map { |k,v|
245
+ if Symbol === k then k = k.new_string ; k.gsub! /_/, "-" end
246
+ if Array === v then v = v.join " " end
247
+ yield "#{k}: #{v};"
248
+ }
249
+ else
250
+ r = []
251
+ single hash do |s|
252
+ r.push s
253
+ end
254
+ r
255
+ end
256
+ end
257
+
258
+ end
259
+
260
+ end
261
+
@@ -0,0 +1,826 @@
1
+ # encoding: UTF-8
2
+
3
+ #
4
+ # hermeneutics/escape.rb -- Various encoding schemes for internet purposes
5
+ #
6
+
7
+ require "supplement"
8
+
9
+
10
+ =begin rdoc
11
+
12
+ :section: Classes definied here
13
+
14
+ Hermeneutics::Entities encodes to and decodes from HTML-Entities
15
+ (<code>&amp;</code> etc.)
16
+
17
+ Hermeneutics::URLText encodes to and decodes from URLs
18
+ (<code>%2d</code> etc.)
19
+
20
+ Hermeneutics::HeaderExt encodes to and decodes from E-Mail Header fields
21
+ (<code>=?UTF-8?Q?=C3=B6?=</code> etc.).
22
+
23
+ =end
24
+
25
+ module Hermeneutics
26
+
27
+ # Translate HTML and XML character entities: <code>"&"</code> to
28
+ # <code>"&amp;"</code> and vice versa.
29
+ #
30
+ # == What actually happens
31
+ #
32
+ # HTML pages usually come in with characters encoded <code>&lt;</code>
33
+ # for <code><</code> and <code>&euro;</code> for <code>€</code>.
34
+ #
35
+ # Further, they may contain a meta tag in the header like this:
36
+ #
37
+ # <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
38
+ # <meta charset="utf-8" /> (HTML5)
39
+ #
40
+ # or
41
+ #
42
+ # <?xml version="1.0" encoding="UTF-8" ?> (XHTML)
43
+ #
44
+ # When +charset+ is <code>utf-8</code> and the file contains the byte
45
+ # sequence <code>"\303\244"</code>/<code>"\xc3\xa4"</code> then there will
46
+ # be displayed a character <code>"ä"</code>.
47
+ #
48
+ # When +charset+ is <code>iso8859-15</code> and the file contains the byte
49
+ # sequence <code>"\344"</code>/<code>"\xe4"</code> then there will be
50
+ # displayed a character <code>"ä"</code>, too.
51
+ #
52
+ # The sequence <code>"&auml;"</code> will produce an <code>"ä"</code> in any
53
+ # case.
54
+ #
55
+ # == What you should do
56
+ #
57
+ # Generating your own HTML pages you will always be safe when you only
58
+ # produce entity tags as <code>&auml;</code> and <code>&euro;</code> or
59
+ # <code>&#x00e4;</code> and <code>&#x20ac;</code> respectively.
60
+ #
61
+ # == What this module does
62
+ #
63
+ # This module translates strings to a HTML-masked version. The encoding will
64
+ # not be changed and you may demand to keep 8-bit-characters.
65
+ #
66
+ # == Examples
67
+ #
68
+ # Entities.encode "<" #=> "&lt;"
69
+ # Entities.decode "&lt;" #=> "<"
70
+ # Entities.encode "äöü" #=> "&auml;&ouml;&uuml;"
71
+ # Entities.decode "&auml;&ouml;&uuml;" #=> "äöü"
72
+ #
73
+ class Entities
74
+
75
+ # :stopdoc:
76
+ SPECIAL_ASC = {
77
+ '"' => "quot", "&" => "amp", "<" => "lt", ">" => "gt",
78
+ }
79
+ RE_ASC = /[#{SPECIAL_ASC.keys.map { |x| Regexp.quote x }.join}]/
80
+
81
+ SPECIAL = {
82
+ "\u00a0" => "nbsp",
83
+ "¡" => "iexcl", "¢" => "cent", "£" => "pound", "€" => "euro", "¥" => "yen", "Š" => "Scaron",
84
+ "¤" => "curren", "¦" => "brvbar",
85
+ "§" => "sect", "š" => "scaron", "©" => "copy", "ª" => "ordf", "«" => "laquo", "¬" => "not", "­" => "shy",
86
+ "¨" => "uml",
87
+ "®" => "reg", "¯" => "macr",
88
+
89
+ "°" => "deg", "±" => "plusmn", "²" => "sup2", "³" => "sup3", "µ" => "micro", "¶" => "para",
90
+ "´" => "acute",
91
+ "·" => "middot", "¹" => "sup1", "º" => "ordm", "»" => "raquo", "Œ" => "OElig", "œ" => "oelig",
92
+ "¸" => "cedil", "¼" => "frac14", "½" => "frac12",
93
+ "Ÿ" => "Yuml", "¿" => "iquest",
94
+ "¾" => "frac34",
95
+
96
+ "À" => "Agrave", "Á" => "Aacute", "Â" => "Acirc", "Ã" => "Atilde", "Ä" => "Auml", "Å" => "Aring", "Æ" => "AElig",
97
+ "Ç" => "Ccedil", "È" => "Egrave", "É" => "Eacute", "Ê" => "Ecirc", "Ë" => "Euml", "Ì" => "Igrave", "Í" => "Iacute",
98
+ "Î" => "Icirc", "Ï" => "Iuml",
99
+ "Ð" => "ETH", "Ñ" => "Ntilde", "Ò" => "Ograve", "Ó" => "Oacute", "Ô" => "Ocirc", "Õ" => "Otilde", "Ö" => "Ouml",
100
+ "×" => "times", "Ø" => "Oslash", "Ù" => "Ugrave", "Ú" => "Uacute", "Û" => "Ucirc", "Ü" => "Uuml", "Ý" => "Yacute",
101
+ "Þ" => "THORN", "ß" => "szlig",
102
+
103
+ "à" => "agrave", "á" => "aacute", "â" => "acirc", "ã" => "atilde", "ä" => "auml", "å" => "aring", "æ" => "aelig",
104
+ "ç" => "ccedil", "è" => "egrave", "é" => "eacute", "ê" => "ecirc", "ë" => "euml", "ì" => "igrave", "í" => "iacute",
105
+ "î" => "icirc", "ï" => "iuml",
106
+ "ð" => "eth", "ñ" => "ntilde", "ò" => "ograve", "ó" => "oacute", "ô" => "ocirc", "õ" => "otilde", "ö" => "ouml",
107
+ "÷" => "divide", "ø" => "oslash", "ù" => "ugrave", "ú" => "uacute", "û" => "ucirc", "ü" => "uuml", "ý" => "yacute",
108
+ "þ" => "thorn", "ÿ" => "yuml",
109
+
110
+ "‚" => "bsquo", "‘" => "lsquo", "„" => "bdquo", "“" => "ldquo", "‹" => "lsaquo", "›" => "rsaquo",
111
+ "–" => "ndash", "—" => "mdash", "‰" => "permil", "…" => "hellip", "†" => "dagger", "‡" => "Dagger",
112
+ }.update SPECIAL_ASC
113
+ NAMES = SPECIAL.invert
114
+ # :startdoc:
115
+
116
+ attr_accessor :keep_8bit
117
+
118
+ # :call-seq:
119
+ # new( keep_8bit = nil) -> ent
120
+ # new( :keep_8bit => val) -> ent
121
+ #
122
+ # Creates an <code>Entities</code> converter.
123
+ #
124
+ # The parameter may be given as one value or as a hash.
125
+ #
126
+ # ent = Entities.new true
127
+ # ent = Entities.new :keep_8bit => true
128
+ #
129
+ def initialize keep_8bit = nil
130
+ @keep_8bit = case keep_8bit
131
+ when Hash then keep_8bit[ :keep_8bit]
132
+ else keep_8bit
133
+ end
134
+ end
135
+
136
+ # :call-seq:
137
+ # ent.encode( str) -> str
138
+ #
139
+ # Create a string thats characters are masked the HTML style:
140
+ #
141
+ # ent = Entities.new
142
+ # ent.encode "&<\"" #=> "&amp;&lt;&quot;"
143
+ # ent.encode "äöü" #=> "&auml;&ouml;&uuml;"
144
+ #
145
+ # The result will be in the same encoding as the source even if it will
146
+ # not contain any 8-bit characters (what can only happen when +keep_8bit+
147
+ # is set).
148
+ #
149
+ # ent = Entities.new true
150
+ #
151
+ # uml = "<ä>".encode "UTF-8"
152
+ # ent.encode uml #=> "&lt;\xc3\xa4&gt;" in UTF-8
153
+ #
154
+ # uml = "<ä>".encode "ISO-8859-1"
155
+ # ent.encode uml #=> "&lt;\xe4&gt;" in ISO-8859-1
156
+ #
157
+ def encode str
158
+ r = str.new_string
159
+ r.gsub! RE_ASC do |x| "&#{SPECIAL_ASC[ x]};" end
160
+ unless @keep_8bit then
161
+ r.gsub! /[^\0-\x7f]/ do |c|
162
+ c.encode! __ENCODING__
163
+ s = SPECIAL[ c] || ("#x%04x" % c.ord)
164
+ "&#{s};"
165
+ end
166
+ end
167
+ r
168
+ end
169
+
170
+ def decode str
171
+ self.class.decode str
172
+ end
173
+
174
+ public
175
+
176
+ class <<self
177
+
178
+ def std
179
+ @std ||= new
180
+ end
181
+
182
+ def encode str
183
+ std.encode str
184
+ end
185
+
186
+ # :call-seq:
187
+ # Entities.decode( str) -> str
188
+ #
189
+ # Replace HTML-style masks by normal characters:
190
+ #
191
+ # Entities.decode "&lt;" #=> "<"
192
+ # Entities.decode "&auml;&ouml;&uuml;" #=> "äöü"
193
+ #
194
+ # Unmasked 8-bit-characters (<code>"ä"</code> instead of
195
+ # <code>"&auml;"</code>) will be kept but translated to
196
+ # a unique encoding.
197
+ #
198
+ # s = "ä &ouml; ü"
199
+ # s.encode! "utf-8"
200
+ # Entities.decode s #=> "ä ö ü"
201
+ #
202
+ # s = "\xe4 &ouml; \xfc &#x20ac;"
203
+ # s.force_encoding "iso-8859-15"
204
+ # Entities.decode s #=> "ä ö ü €"
205
+ # (in iso8859-15)
206
+ #
207
+ def decode str
208
+ str.gsub /&(.+?);/ do
209
+ (named_decode $1) or (numeric_decode $1) or $&
210
+ end
211
+ end
212
+
213
+ private
214
+
215
+ def named_decode s
216
+ c = NAMES[ s]
217
+ if c then
218
+ if c.encoding != s.encoding then
219
+ c.encode s.encoding
220
+ else
221
+ c
222
+ end
223
+ end
224
+ end
225
+
226
+ def numeric_decode s
227
+ if s =~ /\A#(?:(\d+)|x([0-9a-f]+))\z/i then
228
+ c = ($1 ? $1.to_i : ($2.to_i 0x10)).chr Encoding::UTF_8
229
+ c.encode! s.encoding
230
+ end
231
+ end
232
+
233
+ end
234
+
235
+ end
236
+
237
+
238
+
239
+ # URL-able representation
240
+ #
241
+ # == What's acually happening
242
+ #
243
+ # URLs may not contain spaces and serveral character as slashes, ampersands
244
+ # etc. These characters will be masked by a percent sign and two hex digits
245
+ # representing the ASCII code. Eight bit characters should be masked the
246
+ # same way.
247
+ #
248
+ # An URL line does not store encoding information by itself. A locator may
249
+ # either say one of these:
250
+ #
251
+ # http://www.example.com/subdir/index.html?umlfield=%C3%BCber+alles
252
+ # http://www.example.com/subdir/index.html?umlfield=%FCber+alles
253
+ #
254
+ # The reading CGI has to decide on itself how to treat it.
255
+ #
256
+ # == Examples
257
+ #
258
+ # URLText.encode "'Stop!' said Fred." #=> "%27Stop%21%27+said+Fred."
259
+ # URLText.decode "%27Stop%21%27+said+Fred%2e"
260
+ # #=> "'Stop!' said Fred."
261
+ #
262
+ class URLText
263
+
264
+ attr_accessor :keep_8bit, :keep_space, :mask_space
265
+
266
+ # :call-seq:
267
+ # new( hash) -> urltext
268
+ #
269
+ # Creates a <code>URLText</code> converter.
270
+ #
271
+ # The parameters may be given as values or as a hash.
272
+ #
273
+ # utx = URLText.new :keep_8bit => true, :keep_space => false
274
+ #
275
+ # See the +encode+ method for an explanation of these parameters.
276
+ #
277
+ def initialize hash = nil
278
+ if hash then
279
+ @keep_8bit = hash[ :keep_8bit ]
280
+ @keep_space = hash[ :keep_space]
281
+ @mask_space = hash[ :mask_space]
282
+ end
283
+ end
284
+
285
+ # :call-seq:
286
+ # encode( str) -> str
287
+ #
288
+ # Create a string that contains <code>%XX</code>-encoded bytes.
289
+ #
290
+ # utx = URLText.new
291
+ # utx.encode "'Stop!' said Fred." #=> "%27Stop%21%27+said+Fred."
292
+ #
293
+ # The result will not contain any 8-bit characters, except when
294
+ # +keep_8bit+ is set. The result will be in the same encoding as the
295
+ # argument although this normally has no meaning.
296
+ #
297
+ # utx = URLText.new :keep_8bit => true
298
+ # s = "< ä >".encode "UTF-8"
299
+ # utx.encode s #=> "%3C+\u{e4}+%3E" in UTF-8
300
+ #
301
+ # s = "< ä >".encode "ISO-8859-1"
302
+ # utx.encode s #=> "%3C+\xe4+%3E" in ISO-8859-1
303
+ #
304
+ # A space <code>" "</code> will not be replaced by a plus <code>"+"</code>
305
+ # if +keep_space+ is set.
306
+ #
307
+ # utx = URLText.new :keep_space => true
308
+ # s = "< x >"
309
+ # utx.encode s #=> "%3C x %3E"
310
+ #
311
+ # When +mask_space+ is set, then a space will be represented as
312
+ # <code>"%20"</code>,
313
+ #
314
+ def encode str
315
+ r = str.new_string
316
+ r.force_encoding Encoding::ASCII_8BIT unless @keep_8bit
317
+ r.gsub! %r/([^a-zA-Z0-9_.-])/ do |c|
318
+ if c == " " and not @mask_space then
319
+ @keep_space ? c : "+"
320
+ elsif not @keep_8bit or c.ascii_only? then
321
+ "%%%02X" % c.ord
322
+ else
323
+ c
324
+ end
325
+ end
326
+ r.encode! str.encoding
327
+ end
328
+
329
+
330
+ class Dict < Hash
331
+ class <<self
332
+ def create
333
+ i = new
334
+ yield i
335
+ i
336
+ end
337
+ end
338
+ def initialize
339
+ super
340
+ yield self if block_given?
341
+ end
342
+ def [] key
343
+ super key.to_sym
344
+ end
345
+ def []= key, val
346
+ super key.to_sym, val
347
+ end
348
+ def update hash
349
+ hash.each { |k,v| self[ k] = v }
350
+ end
351
+ alias merge! update
352
+ def parse key, val
353
+ self[ key] = case val
354
+ when nil then nil
355
+ when /\A(?:[+-]?[1-9][0-9]{,9}|0)\z/ then val.to_i
356
+ else val.to_s.notempty?
357
+ end
358
+ end
359
+ def method_missing sym, *args
360
+ if args.empty? and not sym =~ /[!?=]\z/ then
361
+ self[ sym]
362
+ else
363
+ first, *rest = args
364
+ if rest.empty? and sym =~ /=\z/ then
365
+ self[ sym] = first
366
+ else
367
+ super
368
+ end
369
+ end
370
+ end
371
+ end
372
+
373
+ # :stopdoc:
374
+ PAIR_SET = "="
375
+ PAIR_SEP = "&"
376
+ # :startdoc:
377
+
378
+ # :call-seq:
379
+ # encode_hash( hash) -> str
380
+ #
381
+ # Encode a <code>Hash</code> to a URL-style string.
382
+ #
383
+ # utx = URLText.new
384
+ #
385
+ # h = { :name => "John Doe", :age => 42 }
386
+ # utx.encode_hash h
387
+ # #=> "name=John+Doe&age=42"
388
+ #
389
+ # h = { :a => ";;;", :x => "äöü" }
390
+ # utx.encode_hash h
391
+ # #=> "a=%3B%3B%3B&x=%C3%A4%C3%B6%C3%BC"
392
+ #
393
+ def encode_hash hash
394
+ hash.map { |(k,v)|
395
+ case v
396
+ when nil then next
397
+ when true then v = k
398
+ when false then v = ""
399
+ end
400
+ [k, v].map { |x| encode x.to_s }.join PAIR_SET
401
+ }.compact.join PAIR_SEP
402
+ end
403
+
404
+ # :call-seq:
405
+ # mkurl( path, hash, anchor = nil) -> str
406
+ #
407
+ # Make an URL.
408
+ #
409
+ # utx = URLText.new
410
+ # h = { :name => "John Doe", :age => "42" }
411
+ # utx.encode_hash "myscript.rb", h, "chapter"
412
+ # #=> "myscript.rb?name=John+Doe&age=42#chapter"
413
+ #
414
+ def mkurl path, hash = nil, anchor = nil
415
+ unless Hash === hash then
416
+ hash, anchor = anchor, hash
417
+ end
418
+ r = "#{path}"
419
+ r << "?#{encode_hash hash}" if hash
420
+ r << "##{anchor}" if anchor
421
+ r
422
+ end
423
+
424
+ public
425
+
426
+ def decode str
427
+ self.class.decode str
428
+ end
429
+
430
+ def decode_hash qstr, &block
431
+ self.class.decode_hash qstr, &block
432
+ end
433
+
434
+ class <<self
435
+
436
+ def std
437
+ @std ||= new
438
+ end
439
+
440
+ def encode str
441
+ std.encode str
442
+ end
443
+
444
+ def encode_hash hash
445
+ std.encode_hash hash
446
+ end
447
+
448
+ def mkurl path, hash, anchor = nil
449
+ std.mkurl path, hash, anchor
450
+ end
451
+
452
+ # :call-seq:
453
+ # decode( str) -> str
454
+ # decode( str, encoding) -> str
455
+ #
456
+ # Decode the contained string.
457
+ #
458
+ # utx = URLText.new
459
+ # utx.decode "%27Stop%21%27+said+Fred%2e" #=> "'Stop!' said Fred."
460
+ #
461
+ # The encoding will be kept. That means that an invalidly encoded
462
+ # string could be produced.
463
+ #
464
+ # a = "bl%F6d"
465
+ # a.encode! "utf-8"
466
+ # d = utx.decode a
467
+ # d =~ /./ #=> "invalid byte sequence in UTF-8 (ArgumentError)"
468
+ #
469
+ def decode str
470
+ r = str.new_string
471
+ r.tr! "+", " "
472
+ r.gsub! /(?:%([0-9A-F]{2}))/i do $1.hex.chr end
473
+ r.force_encoding str.encoding
474
+ r
475
+ end
476
+
477
+ # :call-seq:
478
+ # decode_hash( str) -> hash
479
+ # decode_hash( str) { |key,val| ... } -> nil or int
480
+ #
481
+ # Decode a URL-style encoded string to a <code>Hash</code>.
482
+ # In case a block is given, the number of key-value pairs is returned.
483
+ #
484
+ # str = "a=%3B%3B%3B&x=%26auml%3B%26ouml%3B%26uuml%3B"
485
+ # URLText.decode_hash str do |k,v|
486
+ # puts "#{k} = #{v}"
487
+ # end
488
+ #
489
+ # Output:
490
+ #
491
+ # a = ;;;
492
+ # x = äöü
493
+ #
494
+ def decode_hash qstr
495
+ if block_given? then
496
+ i = 0
497
+ each_pair qstr do |k,v|
498
+ yield k, v
499
+ i += 1
500
+ end
501
+ i.nonzero?
502
+ else
503
+ Dict.create do |h|
504
+ each_pair qstr do |k,v| h.parse k, v end
505
+ end
506
+ end
507
+ end
508
+
509
+ private
510
+
511
+ def each_pair qstr
512
+ qstr or return
513
+ h = qstr.to_s.split PAIR_SEP
514
+ h.each do |pair|
515
+ kv = pair.split PAIR_SET, 2
516
+ kv.map! { |x| decode x if x }
517
+ yield *kv
518
+ end
519
+ end
520
+
521
+ end
522
+
523
+ end
524
+
525
+ # Header field contents (RFC 2047) encoding
526
+ #
527
+ # == Examples
528
+ #
529
+ # HeaderExt.encode "Jörg Müller"
530
+ # #=> "=?utf-8?Q?J=C3=B6rg_M=C3=BCller?="
531
+ # HeaderExt.decode "=?UTF-8?Q?J=C3=B6rg_M=C3=BCller?="
532
+ # #=> "Jörg Müller"
533
+ #
534
+ class HeaderExt
535
+
536
+ # :call-seq:
537
+ # new( [ parameters] ) -> con
538
+ #
539
+ # Creates a <code>HeaderExt</code> converter.
540
+ #
541
+ # See the +encode+ method for an explanation of the parameters.
542
+ #
543
+ # == Examples
544
+ #
545
+ # con = HeaderExt.new
546
+ # con = HeaderExt.new :base64 => true, :limit => 32, :lower => true
547
+ # con = HeaderExt.new :mask => /["'()]/
548
+ #
549
+ def initialize params = nil
550
+ if params then
551
+ @base64 = params.delete :base64
552
+ @limit = params.delete :limit
553
+ @lower = params.delete :lower
554
+ @mask = params.delete :mask
555
+ params.empty? or
556
+ raise ArgumentError, "invalid parameter: #{params.keys.first}."
557
+ end
558
+ end
559
+
560
+ # :call-seq:
561
+ # needs? str -> true or false
562
+ #
563
+ # Check whether a string needs encoding.
564
+ #
565
+ def needs? str
566
+ (not str.ascii_only? or str =~ @mask) and true or false
567
+ end
568
+
569
+ # :call-seq:
570
+ # encode( str) -> str
571
+ #
572
+ # Create a header field style encoded string. The following parameters
573
+ # will be evaluated:
574
+ #
575
+ # :base64 # build ?B? instead of ?Q?
576
+ # :limit # break words longer than this
577
+ # :lower # build lower case ?b? and ?q?
578
+ # :mask # a regular expression detecting characters to mask
579
+ #
580
+ # The result will not contain any 8-bit characters. The encoding will
581
+ # be kept although it won't have a meaning.
582
+ #
583
+ # The parameter <code>:mask</code> will have no influence on the masking
584
+ # itself but will guarantee characters to be masked.
585
+ #
586
+ # == Examples
587
+ #
588
+ # yodel = "Holleri du dödl di, diri diri dudl dö."
589
+ #
590
+ # con = HeaderExt.new
591
+ # con.encode yodel
592
+ # #=> "Holleri du =?UTF-8?Q?d=C3=B6dl?= di, diri diri dudl =?UTF-8?Q?d=C3=B6=2E?="
593
+ #
594
+ # yodel.encode! "iso8859-1"
595
+ # con.encode yodel
596
+ # #=> "Holleri du =?ISO8859-1?Q?d=F6dl?= di, diri diri dudl =?ISO8859-1?Q?d=F6=2E?="
597
+ #
598
+ # e = "€"
599
+ # e.encode! "utf-8" ; con.encode e #=> "=?UTF-8?Q?=E2=82=AC?="
600
+ # e.encode! "iso8859-15" ; con.encode e #=> "=?ISO8859-15?Q?=A4?="
601
+ # e.encode! "ms-ansi" ; con.encode e #=> "=?MS-ANSI?Q?=80?="
602
+ #
603
+ # con = HeaderExt.new :mask => /["'()]/
604
+ # con.encode "'Stop!' said Fred."
605
+ # #=> "=?UTF-8?Q?=27Stop=21=27?= said Fred."
606
+ #
607
+ def encode str
608
+ do_encoding str do
609
+ # I don't like this kind of programming style but it seems to work. BS
610
+ r, enc = "", ""
611
+ while str =~ /\S+/ do
612
+ if needs? $& then
613
+ (enc.notempty? || r) << $`
614
+ enc << $&
615
+ else
616
+ if not enc.empty? then
617
+ r << (mask enc)
618
+ enc.clear
619
+ end
620
+ r << $` << $&
621
+ end
622
+ str = $'
623
+ end
624
+ if not enc.empty? then
625
+ enc << str
626
+ r << (mask enc)
627
+ else
628
+ r << str
629
+ end
630
+ r
631
+ end
632
+ end
633
+
634
+ # :call-seq:
635
+ # encode_whole( str) -> str
636
+ #
637
+ # The unlike +encode+ the whole string as one piece will be encoded.
638
+ #
639
+ # yodel = "Holleri du dödl di, diri diri dudl dö."
640
+ # HeaderExt.encode_whole yodel
641
+ # #=> "=?UTF-8?Q?Holleri_du_d=C3=B6dl_di,_diri_diri_dudl_d=C3=B6=2E?="
642
+ #
643
+ def encode_whole str
644
+ do_encoding str do
645
+ mask str
646
+ end
647
+ end
648
+
649
+ private
650
+
651
+ def do_encoding str
652
+ @charset = str.encoding
653
+ @type, @encoder = @base64 ? [ "B", :base64] : [ "Q", :quopri ]
654
+ if @lower then
655
+ @charset.downcase!
656
+ @type.downcase!
657
+ end
658
+ yield.force_encoding str.encoding
659
+ ensure
660
+ @charset = @type = @encoder = nil
661
+ end
662
+
663
+ # :stopdoc:
664
+ SPACE = " "
665
+ # :startdoc:
666
+
667
+ def mask str
668
+ r, i = [], 0
669
+ while i < str.length do
670
+ l = @limit||str.length
671
+ r.push "=?#@charset?#@type?#{send @encoder, str[ i, l]}?="
672
+ i += l
673
+ end
674
+ r.join SPACE
675
+ end
676
+
677
+ def base64 c
678
+ c = [c].pack "m*"
679
+ c.gsub! /\s/, ""
680
+ c
681
+ end
682
+
683
+ def quopri c
684
+ c.force_encoding Encoding::ASCII_8BIT
685
+ c.gsub! /([^ a-zA-Z0-9])/ do |s| "=%02X" % s.ord end
686
+ c.tr! " ", "_"
687
+ c
688
+ end
689
+
690
+ public
691
+
692
+ def decode str
693
+ self.class.decode str
694
+ end
695
+
696
+ class <<self
697
+
698
+ # The standard header content encoding has a word break limit of 64.
699
+ #
700
+ def std
701
+ @std ||= new :limit => 64
702
+ end
703
+
704
+ # :call-seq:
705
+ # needs? str -> true or false
706
+ #
707
+ # Use the standard content encoding.
708
+ #
709
+ def needs? str
710
+ std.needs? str
711
+ end
712
+
713
+ # :call-seq:
714
+ # encode( str) -> str
715
+ #
716
+ # Use the standard content encoding.
717
+ #
718
+ def encode str
719
+ std.encode str
720
+ end
721
+
722
+ # :call-seq:
723
+ # encode_whole( str) -> str
724
+ #
725
+ # Use the standard content encoding.
726
+ #
727
+ def encode_whole str
728
+ std.encode_whole str
729
+ end
730
+
731
+ # :call-seq:
732
+ # decode( str) -> str
733
+ #
734
+ # Remove header field style escapes.
735
+ #
736
+ # HeaderExt.decode "=?UTF-8?Q?J=C3=B6rg_M=C3=BCller?="
737
+ # #=> "Jörg Müller"
738
+ #
739
+ def decode str
740
+ r, e = [], []
741
+ v, l = nil, nil
742
+ lexer str do |type,piece|
743
+ case type
744
+ when :decoded then
745
+ e.push piece.encoding
746
+ if l == :space and (v == :decoded or not v) then
747
+ r.pop
748
+ elsif l == :plain then
749
+ r.push SPACE
750
+ end
751
+ when :space then
752
+ nil
753
+ when :plain then
754
+ if l == :decoded then
755
+ r.push SPACE
756
+ end
757
+ end
758
+ r.push piece
759
+ v, l = l, type
760
+ end
761
+ if l == :space and v == :decoded then
762
+ r.pop
763
+ end
764
+ e.uniq!
765
+ begin
766
+ r.join
767
+ rescue EncodingError
768
+ raise if e.empty?
769
+ f = e.shift
770
+ r.each { |x| x.encode! f }
771
+ retry
772
+ end
773
+ end
774
+
775
+ def lexer str
776
+ while str do
777
+ str =~ /(\s+)|\B=\?(\S*?)\?([QB])\?(\S*?)\?=\B/i
778
+ if $1 then
779
+ yield :plain, $` unless $`.empty?
780
+ yield :space, $&
781
+ elsif $2 then
782
+ yield :plain, $` unless $`.empty?
783
+ d = unmask $2, $3, $4
784
+ yield :decoded, d
785
+ else
786
+ yield :plain, str
787
+ end
788
+ str = $'.notempty?
789
+ end
790
+ end
791
+
792
+ private
793
+
794
+ def unmask cs, tp, txt
795
+ case tp.upcase
796
+ when "B" then txt, = txt.unpack "m*"
797
+ when "Q" then txt.tr! "_", " " ; txt, = txt.unpack "M*"
798
+ end
799
+ cs.slice! /\*\w+\z/ # language as in rfc2231, 5.
800
+ case cs
801
+ when /\Autf-?7\z/i then
802
+ # Ruby doesn't seem to do that.
803
+ txt.force_encoding Encoding::US_ASCII
804
+ txt.gsub! /\+([0-9a-zA-Z+\/]*)-?/ do
805
+ if $1.empty? then
806
+ "+"
807
+ else
808
+ s = ("#$1==".unpack "m*").join
809
+ (s.unpack "S>*").map { |x| x.chr Encoding::UTF_8 }.join
810
+ end
811
+ end
812
+ txt.force_encoding Encoding::UTF_8
813
+ when /\Aunknown/i then
814
+ txt.force_encoding Encoding::US_ASCII
815
+ else
816
+ txt.force_encoding cs
817
+ end
818
+ txt
819
+ end
820
+
821
+ end
822
+
823
+ end
824
+
825
+ end
826
+