hermeneutics 1.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +29 -0
- data/bin/hermesmail +262 -0
- data/etc/exim.conf +34 -0
- data/lib/hermeneutics/addrs.rb +687 -0
- data/lib/hermeneutics/boxes.rb +321 -0
- data/lib/hermeneutics/cgi.rb +253 -0
- data/lib/hermeneutics/cli/pop.rb +102 -0
- data/lib/hermeneutics/color.rb +275 -0
- data/lib/hermeneutics/contents.rb +351 -0
- data/lib/hermeneutics/css.rb +261 -0
- data/lib/hermeneutics/escape.rb +826 -0
- data/lib/hermeneutics/html.rb +462 -0
- data/lib/hermeneutics/mail.rb +105 -0
- data/lib/hermeneutics/message.rb +626 -0
- data/lib/hermeneutics/tags.rb +317 -0
- data/lib/hermeneutics/transports.rb +230 -0
- data/lib/hermeneutics/types.rb +137 -0
- data/lib/hermeneutics/version.rb +32 -0
- metadata +83 -0
@@ -0,0 +1,261 @@
|
|
1
|
+
#
|
2
|
+
# hermeneutics/css.rb -- CSS generation
|
3
|
+
#
|
4
|
+
|
5
|
+
require "hermeneutics/html"
|
6
|
+
|
7
|
+
|
8
|
+
module Hermeneutics
|
9
|
+
|
10
|
+
# == Example
|
11
|
+
#
|
12
|
+
# require "hermeneutics/css"
|
13
|
+
# require "hermeneutics/color"
|
14
|
+
# class MyCss < Css
|
15
|
+
#
|
16
|
+
# COL1 = "904f02".to_rgb
|
17
|
+
# COL2 = COL1.edit_hsv { |h,s,v| [h+15,s,v] }
|
18
|
+
#
|
19
|
+
# ATTR_COL1 = { color: COL1 }
|
20
|
+
# ATTR_COL2 = { color: COL2 }
|
21
|
+
# ATTR_DECON = { text_decoration: "none" }
|
22
|
+
# ATTR_DECOU = { text_decoration: "underline" }
|
23
|
+
#
|
24
|
+
# def build
|
25
|
+
# a ":link", ATTR_COL1, ATTR_DECON
|
26
|
+
# a ":visited", ATTR_COL2, ATTR_DECON
|
27
|
+
# a ":active", ATTR_COL1, ATTR_DECON
|
28
|
+
# a ":focus", ATTR_COL1, ATTR_DECOU
|
29
|
+
# space
|
30
|
+
#
|
31
|
+
# body "#dummy" do
|
32
|
+
# properties :background_color => "f7f7f7".to_rgb
|
33
|
+
# div ".child", :background_color => "e7e7e7".to_rgb
|
34
|
+
# @b = selector
|
35
|
+
# td do
|
36
|
+
# @bt = selector
|
37
|
+
# end
|
38
|
+
# end
|
39
|
+
# selectors @b, @bt, :fon_size => :large
|
40
|
+
# end
|
41
|
+
# end
|
42
|
+
# Hermeneutics::Css.document
|
43
|
+
#
|
44
|
+
class Css
|
45
|
+
|
46
|
+
class <<self
|
47
|
+
attr_accessor :main
|
48
|
+
def inherited cls
|
49
|
+
Css.main = cls
|
50
|
+
end
|
51
|
+
def open out = nil
|
52
|
+
i = (@main||self).new
|
53
|
+
i.generate out do
|
54
|
+
yield i
|
55
|
+
end
|
56
|
+
end
|
57
|
+
def document *args, &block
|
58
|
+
open do |i|
|
59
|
+
i.document *args, &block
|
60
|
+
end
|
61
|
+
end
|
62
|
+
def write_file name = nil
|
63
|
+
name ||= (File.basename $0, ".rb") + ".css"
|
64
|
+
File.open name, "w" do |f|
|
65
|
+
open f do |i|
|
66
|
+
if block_given? then
|
67
|
+
yield i
|
68
|
+
else
|
69
|
+
i.document
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
def generate out = nil
|
78
|
+
o = @out
|
79
|
+
begin
|
80
|
+
@out = out||$stdout
|
81
|
+
yield
|
82
|
+
ensure
|
83
|
+
@out = o
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
|
88
|
+
class Selector
|
89
|
+
def initialize
|
90
|
+
@chain = []
|
91
|
+
end
|
92
|
+
def tag descend, name, sub
|
93
|
+
descend and @chain.empty? and
|
94
|
+
raise "Descendor without previous tag: #{descend} #{name}#{sub}."
|
95
|
+
c = []
|
96
|
+
c.push case descend
|
97
|
+
when ">", :child then "> "
|
98
|
+
when "+", :sibling then "+ "
|
99
|
+
when nil then
|
100
|
+
else
|
101
|
+
raise "Unknown descendor: #{descend}"
|
102
|
+
end
|
103
|
+
c.push name if name == "*" or Html::TAGS[ name]
|
104
|
+
if sub then
|
105
|
+
sub =~ /\A(?:
|
106
|
+
[:.#]([a-z_0-9-]+)|
|
107
|
+
\[([a-z0-9-]+)([~|]?=)(.*)\]
|
108
|
+
)*\z/ix or
|
109
|
+
raise "Improper tag specification: #{name}#{sub}."
|
110
|
+
c.push sub
|
111
|
+
end
|
112
|
+
@chain.push c
|
113
|
+
yield
|
114
|
+
ensure
|
115
|
+
@chain.pop
|
116
|
+
end
|
117
|
+
protected
|
118
|
+
def replace chain
|
119
|
+
@chain.replace chain
|
120
|
+
end
|
121
|
+
public
|
122
|
+
def dup
|
123
|
+
s = Selector.new
|
124
|
+
s.replace @chain
|
125
|
+
s
|
126
|
+
end
|
127
|
+
def to_s
|
128
|
+
@chain.map { |c| c.join }.join " "
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
def initialize
|
133
|
+
@selector = Selector.new
|
134
|
+
end
|
135
|
+
|
136
|
+
def document *args, &block
|
137
|
+
build *args, &block
|
138
|
+
end
|
139
|
+
|
140
|
+
def path
|
141
|
+
@out.path
|
142
|
+
rescue NoMethodError
|
143
|
+
end
|
144
|
+
|
145
|
+
def comment str
|
146
|
+
@out << "/*"
|
147
|
+
str = mask_comment str
|
148
|
+
ml = str =~ %r(#$/)
|
149
|
+
if ml then
|
150
|
+
@out << $/
|
151
|
+
str.each_line { |l|
|
152
|
+
l.chomp!
|
153
|
+
@out << " * " << l << $/
|
154
|
+
}
|
155
|
+
else
|
156
|
+
@out << " " << str
|
157
|
+
end
|
158
|
+
@out << " */"
|
159
|
+
ml and @out << $/
|
160
|
+
end
|
161
|
+
|
162
|
+
def space
|
163
|
+
@out << $/
|
164
|
+
end
|
165
|
+
|
166
|
+
def tag *args
|
167
|
+
p = []
|
168
|
+
while Hash === args.last do
|
169
|
+
p.unshift args.pop
|
170
|
+
end
|
171
|
+
@selector.tag *args do
|
172
|
+
if p.empty? then
|
173
|
+
yield
|
174
|
+
else
|
175
|
+
properties *p
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
# remove Kernel methods of the same name: :p, :select, :sub
|
181
|
+
m = Html::TAGS.keys & (private_instance_methods +
|
182
|
+
protected_instance_methods + instance_methods)
|
183
|
+
undef_method *m
|
184
|
+
|
185
|
+
def method_missing sym, *args, &block
|
186
|
+
if Html::TAGS[ sym] then
|
187
|
+
if args.any? and not Hash === args.first then
|
188
|
+
sub = args.shift
|
189
|
+
end
|
190
|
+
if args.any? and not Hash === args.first then
|
191
|
+
desc, sub = sub, args.shift
|
192
|
+
elsif sub !~ /[a-z]/i or Symbol === sub then
|
193
|
+
desc, sub = sub, nil
|
194
|
+
end
|
195
|
+
tag desc, sym, sub, *args, &block
|
196
|
+
else
|
197
|
+
super
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
def properties *args
|
202
|
+
write @selector.to_s, *args
|
203
|
+
end
|
204
|
+
|
205
|
+
def selector
|
206
|
+
@selector.dup
|
207
|
+
end
|
208
|
+
|
209
|
+
def selectors *args
|
210
|
+
s = []
|
211
|
+
while Selector === args.first do
|
212
|
+
s.push args.shift
|
213
|
+
end
|
214
|
+
t = s.join ", "
|
215
|
+
write t, *args
|
216
|
+
end
|
217
|
+
|
218
|
+
private
|
219
|
+
|
220
|
+
def mask_comment str
|
221
|
+
str.gsub /\*\//, "* /"
|
222
|
+
end
|
223
|
+
|
224
|
+
INDENT = " "
|
225
|
+
|
226
|
+
def write sel, *args
|
227
|
+
p = {}
|
228
|
+
args.each { |a| p.update a }
|
229
|
+
@out << sel << " {"
|
230
|
+
nl, ind = if p.size > 1 then
|
231
|
+
@out << $/
|
232
|
+
[ $/, INDENT]
|
233
|
+
else
|
234
|
+
[ " ", " "]
|
235
|
+
end
|
236
|
+
single p do |s|
|
237
|
+
@out << ind << s << nl
|
238
|
+
end
|
239
|
+
@out << "}" << $/
|
240
|
+
end
|
241
|
+
|
242
|
+
def single hash
|
243
|
+
if block_given? then
|
244
|
+
hash.map { |k,v|
|
245
|
+
if Symbol === k then k = k.new_string ; k.gsub! /_/, "-" end
|
246
|
+
if Array === v then v = v.join " " end
|
247
|
+
yield "#{k}: #{v};"
|
248
|
+
}
|
249
|
+
else
|
250
|
+
r = []
|
251
|
+
single hash do |s|
|
252
|
+
r.push s
|
253
|
+
end
|
254
|
+
r
|
255
|
+
end
|
256
|
+
end
|
257
|
+
|
258
|
+
end
|
259
|
+
|
260
|
+
end
|
261
|
+
|
@@ -0,0 +1,826 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
#
|
4
|
+
# hermeneutics/escape.rb -- Various encoding schemes for internet purposes
|
5
|
+
#
|
6
|
+
|
7
|
+
require "supplement"
|
8
|
+
|
9
|
+
|
10
|
+
=begin rdoc
|
11
|
+
|
12
|
+
:section: Classes definied here
|
13
|
+
|
14
|
+
Hermeneutics::Entities encodes to and decodes from HTML-Entities
|
15
|
+
(<code>&</code> etc.)
|
16
|
+
|
17
|
+
Hermeneutics::URLText encodes to and decodes from URLs
|
18
|
+
(<code>%2d</code> etc.)
|
19
|
+
|
20
|
+
Hermeneutics::HeaderExt encodes to and decodes from E-Mail Header fields
|
21
|
+
(<code>=?UTF-8?Q?=C3=B6?=</code> etc.).
|
22
|
+
|
23
|
+
=end
|
24
|
+
|
25
|
+
module Hermeneutics
|
26
|
+
|
27
|
+
# Translate HTML and XML character entities: <code>"&"</code> to
|
28
|
+
# <code>"&"</code> and vice versa.
|
29
|
+
#
|
30
|
+
# == What actually happens
|
31
|
+
#
|
32
|
+
# HTML pages usually come in with characters encoded <code><</code>
|
33
|
+
# for <code><</code> and <code>€</code> for <code>€</code>.
|
34
|
+
#
|
35
|
+
# Further, they may contain a meta tag in the header like this:
|
36
|
+
#
|
37
|
+
# <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
38
|
+
# <meta charset="utf-8" /> (HTML5)
|
39
|
+
#
|
40
|
+
# or
|
41
|
+
#
|
42
|
+
# <?xml version="1.0" encoding="UTF-8" ?> (XHTML)
|
43
|
+
#
|
44
|
+
# When +charset+ is <code>utf-8</code> and the file contains the byte
|
45
|
+
# sequence <code>"\303\244"</code>/<code>"\xc3\xa4"</code> then there will
|
46
|
+
# be displayed a character <code>"ä"</code>.
|
47
|
+
#
|
48
|
+
# When +charset+ is <code>iso8859-15</code> and the file contains the byte
|
49
|
+
# sequence <code>"\344"</code>/<code>"\xe4"</code> then there will be
|
50
|
+
# displayed a character <code>"ä"</code>, too.
|
51
|
+
#
|
52
|
+
# The sequence <code>"ä"</code> will produce an <code>"ä"</code> in any
|
53
|
+
# case.
|
54
|
+
#
|
55
|
+
# == What you should do
|
56
|
+
#
|
57
|
+
# Generating your own HTML pages you will always be safe when you only
|
58
|
+
# produce entity tags as <code>ä</code> and <code>€</code> or
|
59
|
+
# <code>ä</code> and <code>€</code> respectively.
|
60
|
+
#
|
61
|
+
# == What this module does
|
62
|
+
#
|
63
|
+
# This module translates strings to a HTML-masked version. The encoding will
|
64
|
+
# not be changed and you may demand to keep 8-bit-characters.
|
65
|
+
#
|
66
|
+
# == Examples
|
67
|
+
#
|
68
|
+
# Entities.encode "<" #=> "<"
|
69
|
+
# Entities.decode "<" #=> "<"
|
70
|
+
# Entities.encode "äöü" #=> "äöü"
|
71
|
+
# Entities.decode "äöü" #=> "äöü"
|
72
|
+
#
|
73
|
+
class Entities
|
74
|
+
|
75
|
+
# :stopdoc:
|
76
|
+
SPECIAL_ASC = {
|
77
|
+
'"' => "quot", "&" => "amp", "<" => "lt", ">" => "gt",
|
78
|
+
}
|
79
|
+
RE_ASC = /[#{SPECIAL_ASC.keys.map { |x| Regexp.quote x }.join}]/
|
80
|
+
|
81
|
+
SPECIAL = {
|
82
|
+
"\u00a0" => "nbsp",
|
83
|
+
"¡" => "iexcl", "¢" => "cent", "£" => "pound", "€" => "euro", "¥" => "yen", "Š" => "Scaron",
|
84
|
+
"¤" => "curren", "¦" => "brvbar",
|
85
|
+
"§" => "sect", "š" => "scaron", "©" => "copy", "ª" => "ordf", "«" => "laquo", "¬" => "not", "" => "shy",
|
86
|
+
"¨" => "uml",
|
87
|
+
"®" => "reg", "¯" => "macr",
|
88
|
+
|
89
|
+
"°" => "deg", "±" => "plusmn", "²" => "sup2", "³" => "sup3", "µ" => "micro", "¶" => "para",
|
90
|
+
"´" => "acute",
|
91
|
+
"·" => "middot", "¹" => "sup1", "º" => "ordm", "»" => "raquo", "Œ" => "OElig", "œ" => "oelig",
|
92
|
+
"¸" => "cedil", "¼" => "frac14", "½" => "frac12",
|
93
|
+
"Ÿ" => "Yuml", "¿" => "iquest",
|
94
|
+
"¾" => "frac34",
|
95
|
+
|
96
|
+
"À" => "Agrave", "Á" => "Aacute", "Â" => "Acirc", "Ã" => "Atilde", "Ä" => "Auml", "Å" => "Aring", "Æ" => "AElig",
|
97
|
+
"Ç" => "Ccedil", "È" => "Egrave", "É" => "Eacute", "Ê" => "Ecirc", "Ë" => "Euml", "Ì" => "Igrave", "Í" => "Iacute",
|
98
|
+
"Î" => "Icirc", "Ï" => "Iuml",
|
99
|
+
"Ð" => "ETH", "Ñ" => "Ntilde", "Ò" => "Ograve", "Ó" => "Oacute", "Ô" => "Ocirc", "Õ" => "Otilde", "Ö" => "Ouml",
|
100
|
+
"×" => "times", "Ø" => "Oslash", "Ù" => "Ugrave", "Ú" => "Uacute", "Û" => "Ucirc", "Ü" => "Uuml", "Ý" => "Yacute",
|
101
|
+
"Þ" => "THORN", "ß" => "szlig",
|
102
|
+
|
103
|
+
"à" => "agrave", "á" => "aacute", "â" => "acirc", "ã" => "atilde", "ä" => "auml", "å" => "aring", "æ" => "aelig",
|
104
|
+
"ç" => "ccedil", "è" => "egrave", "é" => "eacute", "ê" => "ecirc", "ë" => "euml", "ì" => "igrave", "í" => "iacute",
|
105
|
+
"î" => "icirc", "ï" => "iuml",
|
106
|
+
"ð" => "eth", "ñ" => "ntilde", "ò" => "ograve", "ó" => "oacute", "ô" => "ocirc", "õ" => "otilde", "ö" => "ouml",
|
107
|
+
"÷" => "divide", "ø" => "oslash", "ù" => "ugrave", "ú" => "uacute", "û" => "ucirc", "ü" => "uuml", "ý" => "yacute",
|
108
|
+
"þ" => "thorn", "ÿ" => "yuml",
|
109
|
+
|
110
|
+
"‚" => "bsquo", "‘" => "lsquo", "„" => "bdquo", "“" => "ldquo", "‹" => "lsaquo", "›" => "rsaquo",
|
111
|
+
"–" => "ndash", "—" => "mdash", "‰" => "permil", "…" => "hellip", "†" => "dagger", "‡" => "Dagger",
|
112
|
+
}.update SPECIAL_ASC
|
113
|
+
NAMES = SPECIAL.invert
|
114
|
+
# :startdoc:
|
115
|
+
|
116
|
+
attr_accessor :keep_8bit
|
117
|
+
|
118
|
+
# :call-seq:
|
119
|
+
# new( keep_8bit = nil) -> ent
|
120
|
+
# new( :keep_8bit => val) -> ent
|
121
|
+
#
|
122
|
+
# Creates an <code>Entities</code> converter.
|
123
|
+
#
|
124
|
+
# The parameter may be given as one value or as a hash.
|
125
|
+
#
|
126
|
+
# ent = Entities.new true
|
127
|
+
# ent = Entities.new :keep_8bit => true
|
128
|
+
#
|
129
|
+
def initialize keep_8bit = nil
|
130
|
+
@keep_8bit = case keep_8bit
|
131
|
+
when Hash then keep_8bit[ :keep_8bit]
|
132
|
+
else keep_8bit
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
# :call-seq:
|
137
|
+
# ent.encode( str) -> str
|
138
|
+
#
|
139
|
+
# Create a string thats characters are masked the HTML style:
|
140
|
+
#
|
141
|
+
# ent = Entities.new
|
142
|
+
# ent.encode "&<\"" #=> "&<""
|
143
|
+
# ent.encode "äöü" #=> "äöü"
|
144
|
+
#
|
145
|
+
# The result will be in the same encoding as the source even if it will
|
146
|
+
# not contain any 8-bit characters (what can only happen when +keep_8bit+
|
147
|
+
# is set).
|
148
|
+
#
|
149
|
+
# ent = Entities.new true
|
150
|
+
#
|
151
|
+
# uml = "<ä>".encode "UTF-8"
|
152
|
+
# ent.encode uml #=> "<\xc3\xa4>" in UTF-8
|
153
|
+
#
|
154
|
+
# uml = "<ä>".encode "ISO-8859-1"
|
155
|
+
# ent.encode uml #=> "<\xe4>" in ISO-8859-1
|
156
|
+
#
|
157
|
+
def encode str
|
158
|
+
r = str.new_string
|
159
|
+
r.gsub! RE_ASC do |x| "&#{SPECIAL_ASC[ x]};" end
|
160
|
+
unless @keep_8bit then
|
161
|
+
r.gsub! /[^\0-\x7f]/ do |c|
|
162
|
+
c.encode! __ENCODING__
|
163
|
+
s = SPECIAL[ c] || ("#x%04x" % c.ord)
|
164
|
+
"&#{s};"
|
165
|
+
end
|
166
|
+
end
|
167
|
+
r
|
168
|
+
end
|
169
|
+
|
170
|
+
def decode str
|
171
|
+
self.class.decode str
|
172
|
+
end
|
173
|
+
|
174
|
+
public
|
175
|
+
|
176
|
+
class <<self
|
177
|
+
|
178
|
+
def std
|
179
|
+
@std ||= new
|
180
|
+
end
|
181
|
+
|
182
|
+
def encode str
|
183
|
+
std.encode str
|
184
|
+
end
|
185
|
+
|
186
|
+
# :call-seq:
|
187
|
+
# Entities.decode( str) -> str
|
188
|
+
#
|
189
|
+
# Replace HTML-style masks by normal characters:
|
190
|
+
#
|
191
|
+
# Entities.decode "<" #=> "<"
|
192
|
+
# Entities.decode "äöü" #=> "äöü"
|
193
|
+
#
|
194
|
+
# Unmasked 8-bit-characters (<code>"ä"</code> instead of
|
195
|
+
# <code>"ä"</code>) will be kept but translated to
|
196
|
+
# a unique encoding.
|
197
|
+
#
|
198
|
+
# s = "ä ö ü"
|
199
|
+
# s.encode! "utf-8"
|
200
|
+
# Entities.decode s #=> "ä ö ü"
|
201
|
+
#
|
202
|
+
# s = "\xe4 ö \xfc €"
|
203
|
+
# s.force_encoding "iso-8859-15"
|
204
|
+
# Entities.decode s #=> "ä ö ü €"
|
205
|
+
# (in iso8859-15)
|
206
|
+
#
|
207
|
+
def decode str
|
208
|
+
str.gsub /&(.+?);/ do
|
209
|
+
(named_decode $1) or (numeric_decode $1) or $&
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
private
|
214
|
+
|
215
|
+
def named_decode s
|
216
|
+
c = NAMES[ s]
|
217
|
+
if c then
|
218
|
+
if c.encoding != s.encoding then
|
219
|
+
c.encode s.encoding
|
220
|
+
else
|
221
|
+
c
|
222
|
+
end
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
def numeric_decode s
|
227
|
+
if s =~ /\A#(?:(\d+)|x([0-9a-f]+))\z/i then
|
228
|
+
c = ($1 ? $1.to_i : ($2.to_i 0x10)).chr Encoding::UTF_8
|
229
|
+
c.encode! s.encoding
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
end
|
234
|
+
|
235
|
+
end
|
236
|
+
|
237
|
+
|
238
|
+
|
239
|
+
# URL-able representation
|
240
|
+
#
|
241
|
+
# == What's acually happening
|
242
|
+
#
|
243
|
+
# URLs may not contain spaces and serveral character as slashes, ampersands
|
244
|
+
# etc. These characters will be masked by a percent sign and two hex digits
|
245
|
+
# representing the ASCII code. Eight bit characters should be masked the
|
246
|
+
# same way.
|
247
|
+
#
|
248
|
+
# An URL line does not store encoding information by itself. A locator may
|
249
|
+
# either say one of these:
|
250
|
+
#
|
251
|
+
# http://www.example.com/subdir/index.html?umlfield=%C3%BCber+alles
|
252
|
+
# http://www.example.com/subdir/index.html?umlfield=%FCber+alles
|
253
|
+
#
|
254
|
+
# The reading CGI has to decide on itself how to treat it.
|
255
|
+
#
|
256
|
+
# == Examples
|
257
|
+
#
|
258
|
+
# URLText.encode "'Stop!' said Fred." #=> "%27Stop%21%27+said+Fred."
|
259
|
+
# URLText.decode "%27Stop%21%27+said+Fred%2e"
|
260
|
+
# #=> "'Stop!' said Fred."
|
261
|
+
#
|
262
|
+
class URLText
|
263
|
+
|
264
|
+
attr_accessor :keep_8bit, :keep_space, :mask_space
|
265
|
+
|
266
|
+
# :call-seq:
|
267
|
+
# new( hash) -> urltext
|
268
|
+
#
|
269
|
+
# Creates a <code>URLText</code> converter.
|
270
|
+
#
|
271
|
+
# The parameters may be given as values or as a hash.
|
272
|
+
#
|
273
|
+
# utx = URLText.new :keep_8bit => true, :keep_space => false
|
274
|
+
#
|
275
|
+
# See the +encode+ method for an explanation of these parameters.
|
276
|
+
#
|
277
|
+
def initialize hash = nil
|
278
|
+
if hash then
|
279
|
+
@keep_8bit = hash[ :keep_8bit ]
|
280
|
+
@keep_space = hash[ :keep_space]
|
281
|
+
@mask_space = hash[ :mask_space]
|
282
|
+
end
|
283
|
+
end
|
284
|
+
|
285
|
+
# :call-seq:
|
286
|
+
# encode( str) -> str
|
287
|
+
#
|
288
|
+
# Create a string that contains <code>%XX</code>-encoded bytes.
|
289
|
+
#
|
290
|
+
# utx = URLText.new
|
291
|
+
# utx.encode "'Stop!' said Fred." #=> "%27Stop%21%27+said+Fred."
|
292
|
+
#
|
293
|
+
# The result will not contain any 8-bit characters, except when
|
294
|
+
# +keep_8bit+ is set. The result will be in the same encoding as the
|
295
|
+
# argument although this normally has no meaning.
|
296
|
+
#
|
297
|
+
# utx = URLText.new :keep_8bit => true
|
298
|
+
# s = "< ä >".encode "UTF-8"
|
299
|
+
# utx.encode s #=> "%3C+\u{e4}+%3E" in UTF-8
|
300
|
+
#
|
301
|
+
# s = "< ä >".encode "ISO-8859-1"
|
302
|
+
# utx.encode s #=> "%3C+\xe4+%3E" in ISO-8859-1
|
303
|
+
#
|
304
|
+
# A space <code>" "</code> will not be replaced by a plus <code>"+"</code>
|
305
|
+
# if +keep_space+ is set.
|
306
|
+
#
|
307
|
+
# utx = URLText.new :keep_space => true
|
308
|
+
# s = "< x >"
|
309
|
+
# utx.encode s #=> "%3C x %3E"
|
310
|
+
#
|
311
|
+
# When +mask_space+ is set, then a space will be represented as
|
312
|
+
# <code>"%20"</code>,
|
313
|
+
#
|
314
|
+
def encode str
|
315
|
+
r = str.new_string
|
316
|
+
r.force_encoding Encoding::ASCII_8BIT unless @keep_8bit
|
317
|
+
r.gsub! %r/([^a-zA-Z0-9_.-])/ do |c|
|
318
|
+
if c == " " and not @mask_space then
|
319
|
+
@keep_space ? c : "+"
|
320
|
+
elsif not @keep_8bit or c.ascii_only? then
|
321
|
+
"%%%02X" % c.ord
|
322
|
+
else
|
323
|
+
c
|
324
|
+
end
|
325
|
+
end
|
326
|
+
r.encode! str.encoding
|
327
|
+
end
|
328
|
+
|
329
|
+
|
330
|
+
class Dict < Hash
|
331
|
+
class <<self
|
332
|
+
def create
|
333
|
+
i = new
|
334
|
+
yield i
|
335
|
+
i
|
336
|
+
end
|
337
|
+
end
|
338
|
+
def initialize
|
339
|
+
super
|
340
|
+
yield self if block_given?
|
341
|
+
end
|
342
|
+
def [] key
|
343
|
+
super key.to_sym
|
344
|
+
end
|
345
|
+
def []= key, val
|
346
|
+
super key.to_sym, val
|
347
|
+
end
|
348
|
+
def update hash
|
349
|
+
hash.each { |k,v| self[ k] = v }
|
350
|
+
end
|
351
|
+
alias merge! update
|
352
|
+
def parse key, val
|
353
|
+
self[ key] = case val
|
354
|
+
when nil then nil
|
355
|
+
when /\A(?:[+-]?[1-9][0-9]{,9}|0)\z/ then val.to_i
|
356
|
+
else val.to_s.notempty?
|
357
|
+
end
|
358
|
+
end
|
359
|
+
def method_missing sym, *args
|
360
|
+
if args.empty? and not sym =~ /[!?=]\z/ then
|
361
|
+
self[ sym]
|
362
|
+
else
|
363
|
+
first, *rest = args
|
364
|
+
if rest.empty? and sym =~ /=\z/ then
|
365
|
+
self[ sym] = first
|
366
|
+
else
|
367
|
+
super
|
368
|
+
end
|
369
|
+
end
|
370
|
+
end
|
371
|
+
end
|
372
|
+
|
373
|
+
# :stopdoc:
|
374
|
+
PAIR_SET = "="
|
375
|
+
PAIR_SEP = "&"
|
376
|
+
# :startdoc:
|
377
|
+
|
378
|
+
# :call-seq:
|
379
|
+
# encode_hash( hash) -> str
|
380
|
+
#
|
381
|
+
# Encode a <code>Hash</code> to a URL-style string.
|
382
|
+
#
|
383
|
+
# utx = URLText.new
|
384
|
+
#
|
385
|
+
# h = { :name => "John Doe", :age => 42 }
|
386
|
+
# utx.encode_hash h
|
387
|
+
# #=> "name=John+Doe&age=42"
|
388
|
+
#
|
389
|
+
# h = { :a => ";;;", :x => "äöü" }
|
390
|
+
# utx.encode_hash h
|
391
|
+
# #=> "a=%3B%3B%3B&x=%C3%A4%C3%B6%C3%BC"
|
392
|
+
#
|
393
|
+
def encode_hash hash
|
394
|
+
hash.map { |(k,v)|
|
395
|
+
case v
|
396
|
+
when nil then next
|
397
|
+
when true then v = k
|
398
|
+
when false then v = ""
|
399
|
+
end
|
400
|
+
[k, v].map { |x| encode x.to_s }.join PAIR_SET
|
401
|
+
}.compact.join PAIR_SEP
|
402
|
+
end
|
403
|
+
|
404
|
+
# :call-seq:
|
405
|
+
# mkurl( path, hash, anchor = nil) -> str
|
406
|
+
#
|
407
|
+
# Make an URL.
|
408
|
+
#
|
409
|
+
# utx = URLText.new
|
410
|
+
# h = { :name => "John Doe", :age => "42" }
|
411
|
+
# utx.encode_hash "myscript.rb", h, "chapter"
|
412
|
+
# #=> "myscript.rb?name=John+Doe&age=42#chapter"
|
413
|
+
#
|
414
|
+
def mkurl path, hash = nil, anchor = nil
|
415
|
+
unless Hash === hash then
|
416
|
+
hash, anchor = anchor, hash
|
417
|
+
end
|
418
|
+
r = "#{path}"
|
419
|
+
r << "?#{encode_hash hash}" if hash
|
420
|
+
r << "##{anchor}" if anchor
|
421
|
+
r
|
422
|
+
end
|
423
|
+
|
424
|
+
public
|
425
|
+
|
426
|
+
def decode str
|
427
|
+
self.class.decode str
|
428
|
+
end
|
429
|
+
|
430
|
+
def decode_hash qstr, &block
|
431
|
+
self.class.decode_hash qstr, &block
|
432
|
+
end
|
433
|
+
|
434
|
+
class <<self
|
435
|
+
|
436
|
+
def std
|
437
|
+
@std ||= new
|
438
|
+
end
|
439
|
+
|
440
|
+
def encode str
|
441
|
+
std.encode str
|
442
|
+
end
|
443
|
+
|
444
|
+
def encode_hash hash
|
445
|
+
std.encode_hash hash
|
446
|
+
end
|
447
|
+
|
448
|
+
def mkurl path, hash, anchor = nil
|
449
|
+
std.mkurl path, hash, anchor
|
450
|
+
end
|
451
|
+
|
452
|
+
# :call-seq:
|
453
|
+
# decode( str) -> str
|
454
|
+
# decode( str, encoding) -> str
|
455
|
+
#
|
456
|
+
# Decode the contained string.
|
457
|
+
#
|
458
|
+
# utx = URLText.new
|
459
|
+
# utx.decode "%27Stop%21%27+said+Fred%2e" #=> "'Stop!' said Fred."
|
460
|
+
#
|
461
|
+
# The encoding will be kept. That means that an invalidly encoded
|
462
|
+
# string could be produced.
|
463
|
+
#
|
464
|
+
# a = "bl%F6d"
|
465
|
+
# a.encode! "utf-8"
|
466
|
+
# d = utx.decode a
|
467
|
+
# d =~ /./ #=> "invalid byte sequence in UTF-8 (ArgumentError)"
|
468
|
+
#
|
469
|
+
def decode str
|
470
|
+
r = str.new_string
|
471
|
+
r.tr! "+", " "
|
472
|
+
r.gsub! /(?:%([0-9A-F]{2}))/i do $1.hex.chr end
|
473
|
+
r.force_encoding str.encoding
|
474
|
+
r
|
475
|
+
end
|
476
|
+
|
477
|
+
# :call-seq:
|
478
|
+
# decode_hash( str) -> hash
|
479
|
+
# decode_hash( str) { |key,val| ... } -> nil or int
|
480
|
+
#
|
481
|
+
# Decode a URL-style encoded string to a <code>Hash</code>.
|
482
|
+
# In case a block is given, the number of key-value pairs is returned.
|
483
|
+
#
|
484
|
+
# str = "a=%3B%3B%3B&x=%26auml%3B%26ouml%3B%26uuml%3B"
|
485
|
+
# URLText.decode_hash str do |k,v|
|
486
|
+
# puts "#{k} = #{v}"
|
487
|
+
# end
|
488
|
+
#
|
489
|
+
# Output:
|
490
|
+
#
|
491
|
+
# a = ;;;
|
492
|
+
# x = äöü
|
493
|
+
#
|
494
|
+
def decode_hash qstr
|
495
|
+
if block_given? then
|
496
|
+
i = 0
|
497
|
+
each_pair qstr do |k,v|
|
498
|
+
yield k, v
|
499
|
+
i += 1
|
500
|
+
end
|
501
|
+
i.nonzero?
|
502
|
+
else
|
503
|
+
Dict.create do |h|
|
504
|
+
each_pair qstr do |k,v| h.parse k, v end
|
505
|
+
end
|
506
|
+
end
|
507
|
+
end
|
508
|
+
|
509
|
+
private
|
510
|
+
|
511
|
+
def each_pair qstr
|
512
|
+
qstr or return
|
513
|
+
h = qstr.to_s.split PAIR_SEP
|
514
|
+
h.each do |pair|
|
515
|
+
kv = pair.split PAIR_SET, 2
|
516
|
+
kv.map! { |x| decode x if x }
|
517
|
+
yield *kv
|
518
|
+
end
|
519
|
+
end
|
520
|
+
|
521
|
+
end
|
522
|
+
|
523
|
+
end
|
524
|
+
|
525
|
+
# Header field contents (RFC 2047) encoding
|
526
|
+
#
|
527
|
+
# == Examples
|
528
|
+
#
|
529
|
+
# HeaderExt.encode "Jörg Müller"
|
530
|
+
# #=> "=?utf-8?Q?J=C3=B6rg_M=C3=BCller?="
|
531
|
+
# HeaderExt.decode "=?UTF-8?Q?J=C3=B6rg_M=C3=BCller?="
|
532
|
+
# #=> "Jörg Müller"
|
533
|
+
#
|
534
|
+
class HeaderExt
|
535
|
+
|
536
|
+
# :call-seq:
|
537
|
+
# new( [ parameters] ) -> con
|
538
|
+
#
|
539
|
+
# Creates a <code>HeaderExt</code> converter.
|
540
|
+
#
|
541
|
+
# See the +encode+ method for an explanation of the parameters.
|
542
|
+
#
|
543
|
+
# == Examples
|
544
|
+
#
|
545
|
+
# con = HeaderExt.new
|
546
|
+
# con = HeaderExt.new :base64 => true, :limit => 32, :lower => true
|
547
|
+
# con = HeaderExt.new :mask => /["'()]/
|
548
|
+
#
|
549
|
+
def initialize params = nil
|
550
|
+
if params then
|
551
|
+
@base64 = params.delete :base64
|
552
|
+
@limit = params.delete :limit
|
553
|
+
@lower = params.delete :lower
|
554
|
+
@mask = params.delete :mask
|
555
|
+
params.empty? or
|
556
|
+
raise ArgumentError, "invalid parameter: #{params.keys.first}."
|
557
|
+
end
|
558
|
+
end
|
559
|
+
|
560
|
+
# :call-seq:
|
561
|
+
# needs? str -> true or false
|
562
|
+
#
|
563
|
+
# Check whether a string needs encoding.
|
564
|
+
#
|
565
|
+
def needs? str
|
566
|
+
(not str.ascii_only? or str =~ @mask) and true or false
|
567
|
+
end
|
568
|
+
|
569
|
+
# :call-seq:
|
570
|
+
# encode( str) -> str
|
571
|
+
#
|
572
|
+
# Create a header field style encoded string. The following parameters
|
573
|
+
# will be evaluated:
|
574
|
+
#
|
575
|
+
# :base64 # build ?B? instead of ?Q?
|
576
|
+
# :limit # break words longer than this
|
577
|
+
# :lower # build lower case ?b? and ?q?
|
578
|
+
# :mask # a regular expression detecting characters to mask
|
579
|
+
#
|
580
|
+
# The result will not contain any 8-bit characters. The encoding will
|
581
|
+
# be kept although it won't have a meaning.
|
582
|
+
#
|
583
|
+
# The parameter <code>:mask</code> will have no influence on the masking
|
584
|
+
# itself but will guarantee characters to be masked.
|
585
|
+
#
|
586
|
+
# == Examples
|
587
|
+
#
|
588
|
+
# yodel = "Holleri du dödl di, diri diri dudl dö."
|
589
|
+
#
|
590
|
+
# con = HeaderExt.new
|
591
|
+
# con.encode yodel
|
592
|
+
# #=> "Holleri du =?UTF-8?Q?d=C3=B6dl?= di, diri diri dudl =?UTF-8?Q?d=C3=B6=2E?="
|
593
|
+
#
|
594
|
+
# yodel.encode! "iso8859-1"
|
595
|
+
# con.encode yodel
|
596
|
+
# #=> "Holleri du =?ISO8859-1?Q?d=F6dl?= di, diri diri dudl =?ISO8859-1?Q?d=F6=2E?="
|
597
|
+
#
|
598
|
+
# e = "€"
|
599
|
+
# e.encode! "utf-8" ; con.encode e #=> "=?UTF-8?Q?=E2=82=AC?="
|
600
|
+
# e.encode! "iso8859-15" ; con.encode e #=> "=?ISO8859-15?Q?=A4?="
|
601
|
+
# e.encode! "ms-ansi" ; con.encode e #=> "=?MS-ANSI?Q?=80?="
|
602
|
+
#
|
603
|
+
# con = HeaderExt.new :mask => /["'()]/
|
604
|
+
# con.encode "'Stop!' said Fred."
|
605
|
+
# #=> "=?UTF-8?Q?=27Stop=21=27?= said Fred."
|
606
|
+
#
|
607
|
+
def encode str
|
608
|
+
do_encoding str do
|
609
|
+
# I don't like this kind of programming style but it seems to work. BS
|
610
|
+
r, enc = "", ""
|
611
|
+
while str =~ /\S+/ do
|
612
|
+
if needs? $& then
|
613
|
+
(enc.notempty? || r) << $`
|
614
|
+
enc << $&
|
615
|
+
else
|
616
|
+
if not enc.empty? then
|
617
|
+
r << (mask enc)
|
618
|
+
enc.clear
|
619
|
+
end
|
620
|
+
r << $` << $&
|
621
|
+
end
|
622
|
+
str = $'
|
623
|
+
end
|
624
|
+
if not enc.empty? then
|
625
|
+
enc << str
|
626
|
+
r << (mask enc)
|
627
|
+
else
|
628
|
+
r << str
|
629
|
+
end
|
630
|
+
r
|
631
|
+
end
|
632
|
+
end
|
633
|
+
|
634
|
+
# :call-seq:
|
635
|
+
# encode_whole( str) -> str
|
636
|
+
#
|
637
|
+
# The unlike +encode+ the whole string as one piece will be encoded.
|
638
|
+
#
|
639
|
+
# yodel = "Holleri du dödl di, diri diri dudl dö."
|
640
|
+
# HeaderExt.encode_whole yodel
|
641
|
+
# #=> "=?UTF-8?Q?Holleri_du_d=C3=B6dl_di,_diri_diri_dudl_d=C3=B6=2E?="
|
642
|
+
#
|
643
|
+
def encode_whole str
|
644
|
+
do_encoding str do
|
645
|
+
mask str
|
646
|
+
end
|
647
|
+
end
|
648
|
+
|
649
|
+
private
|
650
|
+
|
651
|
+
def do_encoding str
|
652
|
+
@charset = str.encoding
|
653
|
+
@type, @encoder = @base64 ? [ "B", :base64] : [ "Q", :quopri ]
|
654
|
+
if @lower then
|
655
|
+
@charset.downcase!
|
656
|
+
@type.downcase!
|
657
|
+
end
|
658
|
+
yield.force_encoding str.encoding
|
659
|
+
ensure
|
660
|
+
@charset = @type = @encoder = nil
|
661
|
+
end
|
662
|
+
|
663
|
+
# :stopdoc:
|
664
|
+
SPACE = " "
|
665
|
+
# :startdoc:
|
666
|
+
|
667
|
+
def mask str
|
668
|
+
r, i = [], 0
|
669
|
+
while i < str.length do
|
670
|
+
l = @limit||str.length
|
671
|
+
r.push "=?#@charset?#@type?#{send @encoder, str[ i, l]}?="
|
672
|
+
i += l
|
673
|
+
end
|
674
|
+
r.join SPACE
|
675
|
+
end
|
676
|
+
|
677
|
+
def base64 c
|
678
|
+
c = [c].pack "m*"
|
679
|
+
c.gsub! /\s/, ""
|
680
|
+
c
|
681
|
+
end
|
682
|
+
|
683
|
+
def quopri c
|
684
|
+
c.force_encoding Encoding::ASCII_8BIT
|
685
|
+
c.gsub! /([^ a-zA-Z0-9])/ do |s| "=%02X" % s.ord end
|
686
|
+
c.tr! " ", "_"
|
687
|
+
c
|
688
|
+
end
|
689
|
+
|
690
|
+
public
|
691
|
+
|
692
|
+
def decode str
|
693
|
+
self.class.decode str
|
694
|
+
end
|
695
|
+
|
696
|
+
class <<self
|
697
|
+
|
698
|
+
# The standard header content encoding has a word break limit of 64.
|
699
|
+
#
|
700
|
+
def std
|
701
|
+
@std ||= new :limit => 64
|
702
|
+
end
|
703
|
+
|
704
|
+
# :call-seq:
|
705
|
+
# needs? str -> true or false
|
706
|
+
#
|
707
|
+
# Use the standard content encoding.
|
708
|
+
#
|
709
|
+
def needs? str
|
710
|
+
std.needs? str
|
711
|
+
end
|
712
|
+
|
713
|
+
# :call-seq:
|
714
|
+
# encode( str) -> str
|
715
|
+
#
|
716
|
+
# Use the standard content encoding.
|
717
|
+
#
|
718
|
+
def encode str
|
719
|
+
std.encode str
|
720
|
+
end
|
721
|
+
|
722
|
+
# :call-seq:
|
723
|
+
# encode_whole( str) -> str
|
724
|
+
#
|
725
|
+
# Use the standard content encoding.
|
726
|
+
#
|
727
|
+
def encode_whole str
|
728
|
+
std.encode_whole str
|
729
|
+
end
|
730
|
+
|
731
|
+
# :call-seq:
|
732
|
+
# decode( str) -> str
|
733
|
+
#
|
734
|
+
# Remove header field style escapes.
|
735
|
+
#
|
736
|
+
# HeaderExt.decode "=?UTF-8?Q?J=C3=B6rg_M=C3=BCller?="
|
737
|
+
# #=> "Jörg Müller"
|
738
|
+
#
|
739
|
+
def decode str
|
740
|
+
r, e = [], []
|
741
|
+
v, l = nil, nil
|
742
|
+
lexer str do |type,piece|
|
743
|
+
case type
|
744
|
+
when :decoded then
|
745
|
+
e.push piece.encoding
|
746
|
+
if l == :space and (v == :decoded or not v) then
|
747
|
+
r.pop
|
748
|
+
elsif l == :plain then
|
749
|
+
r.push SPACE
|
750
|
+
end
|
751
|
+
when :space then
|
752
|
+
nil
|
753
|
+
when :plain then
|
754
|
+
if l == :decoded then
|
755
|
+
r.push SPACE
|
756
|
+
end
|
757
|
+
end
|
758
|
+
r.push piece
|
759
|
+
v, l = l, type
|
760
|
+
end
|
761
|
+
if l == :space and v == :decoded then
|
762
|
+
r.pop
|
763
|
+
end
|
764
|
+
e.uniq!
|
765
|
+
begin
|
766
|
+
r.join
|
767
|
+
rescue EncodingError
|
768
|
+
raise if e.empty?
|
769
|
+
f = e.shift
|
770
|
+
r.each { |x| x.encode! f }
|
771
|
+
retry
|
772
|
+
end
|
773
|
+
end
|
774
|
+
|
775
|
+
def lexer str
|
776
|
+
while str do
|
777
|
+
str =~ /(\s+)|\B=\?(\S*?)\?([QB])\?(\S*?)\?=\B/i
|
778
|
+
if $1 then
|
779
|
+
yield :plain, $` unless $`.empty?
|
780
|
+
yield :space, $&
|
781
|
+
elsif $2 then
|
782
|
+
yield :plain, $` unless $`.empty?
|
783
|
+
d = unmask $2, $3, $4
|
784
|
+
yield :decoded, d
|
785
|
+
else
|
786
|
+
yield :plain, str
|
787
|
+
end
|
788
|
+
str = $'.notempty?
|
789
|
+
end
|
790
|
+
end
|
791
|
+
|
792
|
+
private
|
793
|
+
|
794
|
+
def unmask cs, tp, txt
|
795
|
+
case tp.upcase
|
796
|
+
when "B" then txt, = txt.unpack "m*"
|
797
|
+
when "Q" then txt.tr! "_", " " ; txt, = txt.unpack "M*"
|
798
|
+
end
|
799
|
+
cs.slice! /\*\w+\z/ # language as in rfc2231, 5.
|
800
|
+
case cs
|
801
|
+
when /\Autf-?7\z/i then
|
802
|
+
# Ruby doesn't seem to do that.
|
803
|
+
txt.force_encoding Encoding::US_ASCII
|
804
|
+
txt.gsub! /\+([0-9a-zA-Z+\/]*)-?/ do
|
805
|
+
if $1.empty? then
|
806
|
+
"+"
|
807
|
+
else
|
808
|
+
s = ("#$1==".unpack "m*").join
|
809
|
+
(s.unpack "S>*").map { |x| x.chr Encoding::UTF_8 }.join
|
810
|
+
end
|
811
|
+
end
|
812
|
+
txt.force_encoding Encoding::UTF_8
|
813
|
+
when /\Aunknown/i then
|
814
|
+
txt.force_encoding Encoding::US_ASCII
|
815
|
+
else
|
816
|
+
txt.force_encoding cs
|
817
|
+
end
|
818
|
+
txt
|
819
|
+
end
|
820
|
+
|
821
|
+
end
|
822
|
+
|
823
|
+
end
|
824
|
+
|
825
|
+
end
|
826
|
+
|