hermeneutics 1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +29 -0
- data/bin/hermesmail +262 -0
- data/etc/exim.conf +34 -0
- data/lib/hermeneutics/addrs.rb +687 -0
- data/lib/hermeneutics/boxes.rb +321 -0
- data/lib/hermeneutics/cgi.rb +253 -0
- data/lib/hermeneutics/cli/pop.rb +102 -0
- data/lib/hermeneutics/color.rb +275 -0
- data/lib/hermeneutics/contents.rb +351 -0
- data/lib/hermeneutics/css.rb +261 -0
- data/lib/hermeneutics/escape.rb +826 -0
- data/lib/hermeneutics/html.rb +462 -0
- data/lib/hermeneutics/mail.rb +105 -0
- data/lib/hermeneutics/message.rb +626 -0
- data/lib/hermeneutics/tags.rb +317 -0
- data/lib/hermeneutics/transports.rb +230 -0
- data/lib/hermeneutics/types.rb +137 -0
- data/lib/hermeneutics/version.rb +32 -0
- metadata +83 -0
@@ -0,0 +1,261 @@
|
|
1
|
+
#
|
2
|
+
# hermeneutics/css.rb -- CSS generation
|
3
|
+
#
|
4
|
+
|
5
|
+
require "hermeneutics/html"
|
6
|
+
|
7
|
+
|
8
|
+
module Hermeneutics
|
9
|
+
|
10
|
+
# == Example
|
11
|
+
#
|
12
|
+
# require "hermeneutics/css"
|
13
|
+
# require "hermeneutics/color"
|
14
|
+
# class MyCss < Css
|
15
|
+
#
|
16
|
+
# COL1 = "904f02".to_rgb
|
17
|
+
# COL2 = COL1.edit_hsv { |h,s,v| [h+15,s,v] }
|
18
|
+
#
|
19
|
+
# ATTR_COL1 = { color: COL1 }
|
20
|
+
# ATTR_COL2 = { color: COL2 }
|
21
|
+
# ATTR_DECON = { text_decoration: "none" }
|
22
|
+
# ATTR_DECOU = { text_decoration: "underline" }
|
23
|
+
#
|
24
|
+
# def build
|
25
|
+
# a ":link", ATTR_COL1, ATTR_DECON
|
26
|
+
# a ":visited", ATTR_COL2, ATTR_DECON
|
27
|
+
# a ":active", ATTR_COL1, ATTR_DECON
|
28
|
+
# a ":focus", ATTR_COL1, ATTR_DECOU
|
29
|
+
# space
|
30
|
+
#
|
31
|
+
# body "#dummy" do
|
32
|
+
# properties :background_color => "f7f7f7".to_rgb
|
33
|
+
# div ".child", :background_color => "e7e7e7".to_rgb
|
34
|
+
# @b = selector
|
35
|
+
# td do
|
36
|
+
# @bt = selector
|
37
|
+
# end
|
38
|
+
# end
|
39
|
+
# selectors @b, @bt, :fon_size => :large
|
40
|
+
# end
|
41
|
+
# end
|
42
|
+
# Hermeneutics::Css.document
|
43
|
+
#
|
44
|
+
class Css
|
45
|
+
|
46
|
+
class <<self
|
47
|
+
attr_accessor :main
|
48
|
+
def inherited cls
|
49
|
+
Css.main = cls
|
50
|
+
end
|
51
|
+
def open out = nil
|
52
|
+
i = (@main||self).new
|
53
|
+
i.generate out do
|
54
|
+
yield i
|
55
|
+
end
|
56
|
+
end
|
57
|
+
def document *args, &block
|
58
|
+
open do |i|
|
59
|
+
i.document *args, &block
|
60
|
+
end
|
61
|
+
end
|
62
|
+
def write_file name = nil
|
63
|
+
name ||= (File.basename $0, ".rb") + ".css"
|
64
|
+
File.open name, "w" do |f|
|
65
|
+
open f do |i|
|
66
|
+
if block_given? then
|
67
|
+
yield i
|
68
|
+
else
|
69
|
+
i.document
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
def generate out = nil
|
78
|
+
o = @out
|
79
|
+
begin
|
80
|
+
@out = out||$stdout
|
81
|
+
yield
|
82
|
+
ensure
|
83
|
+
@out = o
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
|
88
|
+
class Selector
|
89
|
+
def initialize
|
90
|
+
@chain = []
|
91
|
+
end
|
92
|
+
def tag descend, name, sub
|
93
|
+
descend and @chain.empty? and
|
94
|
+
raise "Descendor without previous tag: #{descend} #{name}#{sub}."
|
95
|
+
c = []
|
96
|
+
c.push case descend
|
97
|
+
when ">", :child then "> "
|
98
|
+
when "+", :sibling then "+ "
|
99
|
+
when nil then
|
100
|
+
else
|
101
|
+
raise "Unknown descendor: #{descend}"
|
102
|
+
end
|
103
|
+
c.push name if name == "*" or Html::TAGS[ name]
|
104
|
+
if sub then
|
105
|
+
sub =~ /\A(?:
|
106
|
+
[:.#]([a-z_0-9-]+)|
|
107
|
+
\[([a-z0-9-]+)([~|]?=)(.*)\]
|
108
|
+
)*\z/ix or
|
109
|
+
raise "Improper tag specification: #{name}#{sub}."
|
110
|
+
c.push sub
|
111
|
+
end
|
112
|
+
@chain.push c
|
113
|
+
yield
|
114
|
+
ensure
|
115
|
+
@chain.pop
|
116
|
+
end
|
117
|
+
protected
|
118
|
+
def replace chain
|
119
|
+
@chain.replace chain
|
120
|
+
end
|
121
|
+
public
|
122
|
+
def dup
|
123
|
+
s = Selector.new
|
124
|
+
s.replace @chain
|
125
|
+
s
|
126
|
+
end
|
127
|
+
def to_s
|
128
|
+
@chain.map { |c| c.join }.join " "
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
def initialize
|
133
|
+
@selector = Selector.new
|
134
|
+
end
|
135
|
+
|
136
|
+
def document *args, &block
|
137
|
+
build *args, &block
|
138
|
+
end
|
139
|
+
|
140
|
+
def path
|
141
|
+
@out.path
|
142
|
+
rescue NoMethodError
|
143
|
+
end
|
144
|
+
|
145
|
+
def comment str
|
146
|
+
@out << "/*"
|
147
|
+
str = mask_comment str
|
148
|
+
ml = str =~ %r(#$/)
|
149
|
+
if ml then
|
150
|
+
@out << $/
|
151
|
+
str.each_line { |l|
|
152
|
+
l.chomp!
|
153
|
+
@out << " * " << l << $/
|
154
|
+
}
|
155
|
+
else
|
156
|
+
@out << " " << str
|
157
|
+
end
|
158
|
+
@out << " */"
|
159
|
+
ml and @out << $/
|
160
|
+
end
|
161
|
+
|
162
|
+
def space
|
163
|
+
@out << $/
|
164
|
+
end
|
165
|
+
|
166
|
+
def tag *args
|
167
|
+
p = []
|
168
|
+
while Hash === args.last do
|
169
|
+
p.unshift args.pop
|
170
|
+
end
|
171
|
+
@selector.tag *args do
|
172
|
+
if p.empty? then
|
173
|
+
yield
|
174
|
+
else
|
175
|
+
properties *p
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
# remove Kernel methods of the same name: :p, :select, :sub
|
181
|
+
m = Html::TAGS.keys & (private_instance_methods +
|
182
|
+
protected_instance_methods + instance_methods)
|
183
|
+
undef_method *m
|
184
|
+
|
185
|
+
def method_missing sym, *args, &block
|
186
|
+
if Html::TAGS[ sym] then
|
187
|
+
if args.any? and not Hash === args.first then
|
188
|
+
sub = args.shift
|
189
|
+
end
|
190
|
+
if args.any? and not Hash === args.first then
|
191
|
+
desc, sub = sub, args.shift
|
192
|
+
elsif sub !~ /[a-z]/i or Symbol === sub then
|
193
|
+
desc, sub = sub, nil
|
194
|
+
end
|
195
|
+
tag desc, sym, sub, *args, &block
|
196
|
+
else
|
197
|
+
super
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
def properties *args
|
202
|
+
write @selector.to_s, *args
|
203
|
+
end
|
204
|
+
|
205
|
+
def selector
|
206
|
+
@selector.dup
|
207
|
+
end
|
208
|
+
|
209
|
+
def selectors *args
|
210
|
+
s = []
|
211
|
+
while Selector === args.first do
|
212
|
+
s.push args.shift
|
213
|
+
end
|
214
|
+
t = s.join ", "
|
215
|
+
write t, *args
|
216
|
+
end
|
217
|
+
|
218
|
+
private
|
219
|
+
|
220
|
+
def mask_comment str
|
221
|
+
str.gsub /\*\//, "* /"
|
222
|
+
end
|
223
|
+
|
224
|
+
INDENT = " "
|
225
|
+
|
226
|
+
def write sel, *args
|
227
|
+
p = {}
|
228
|
+
args.each { |a| p.update a }
|
229
|
+
@out << sel << " {"
|
230
|
+
nl, ind = if p.size > 1 then
|
231
|
+
@out << $/
|
232
|
+
[ $/, INDENT]
|
233
|
+
else
|
234
|
+
[ " ", " "]
|
235
|
+
end
|
236
|
+
single p do |s|
|
237
|
+
@out << ind << s << nl
|
238
|
+
end
|
239
|
+
@out << "}" << $/
|
240
|
+
end
|
241
|
+
|
242
|
+
def single hash
|
243
|
+
if block_given? then
|
244
|
+
hash.map { |k,v|
|
245
|
+
if Symbol === k then k = k.new_string ; k.gsub! /_/, "-" end
|
246
|
+
if Array === v then v = v.join " " end
|
247
|
+
yield "#{k}: #{v};"
|
248
|
+
}
|
249
|
+
else
|
250
|
+
r = []
|
251
|
+
single hash do |s|
|
252
|
+
r.push s
|
253
|
+
end
|
254
|
+
r
|
255
|
+
end
|
256
|
+
end
|
257
|
+
|
258
|
+
end
|
259
|
+
|
260
|
+
end
|
261
|
+
|
@@ -0,0 +1,826 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
#
|
4
|
+
# hermeneutics/escape.rb -- Various encoding schemes for internet purposes
|
5
|
+
#
|
6
|
+
|
7
|
+
require "supplement"
|
8
|
+
|
9
|
+
|
10
|
+
=begin rdoc
|
11
|
+
|
12
|
+
:section: Classes definied here
|
13
|
+
|
14
|
+
Hermeneutics::Entities encodes to and decodes from HTML-Entities
|
15
|
+
(<code>&</code> etc.)
|
16
|
+
|
17
|
+
Hermeneutics::URLText encodes to and decodes from URLs
|
18
|
+
(<code>%2d</code> etc.)
|
19
|
+
|
20
|
+
Hermeneutics::HeaderExt encodes to and decodes from E-Mail Header fields
|
21
|
+
(<code>=?UTF-8?Q?=C3=B6?=</code> etc.).
|
22
|
+
|
23
|
+
=end
|
24
|
+
|
25
|
+
module Hermeneutics
|
26
|
+
|
27
|
+
# Translate HTML and XML character entities: <code>"&"</code> to
|
28
|
+
# <code>"&"</code> and vice versa.
|
29
|
+
#
|
30
|
+
# == What actually happens
|
31
|
+
#
|
32
|
+
# HTML pages usually come in with characters encoded <code><</code>
|
33
|
+
# for <code><</code> and <code>€</code> for <code>€</code>.
|
34
|
+
#
|
35
|
+
# Further, they may contain a meta tag in the header like this:
|
36
|
+
#
|
37
|
+
# <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
38
|
+
# <meta charset="utf-8" /> (HTML5)
|
39
|
+
#
|
40
|
+
# or
|
41
|
+
#
|
42
|
+
# <?xml version="1.0" encoding="UTF-8" ?> (XHTML)
|
43
|
+
#
|
44
|
+
# When +charset+ is <code>utf-8</code> and the file contains the byte
|
45
|
+
# sequence <code>"\303\244"</code>/<code>"\xc3\xa4"</code> then there will
|
46
|
+
# be displayed a character <code>"ä"</code>.
|
47
|
+
#
|
48
|
+
# When +charset+ is <code>iso8859-15</code> and the file contains the byte
|
49
|
+
# sequence <code>"\344"</code>/<code>"\xe4"</code> then there will be
|
50
|
+
# displayed a character <code>"ä"</code>, too.
|
51
|
+
#
|
52
|
+
# The sequence <code>"ä"</code> will produce an <code>"ä"</code> in any
|
53
|
+
# case.
|
54
|
+
#
|
55
|
+
# == What you should do
|
56
|
+
#
|
57
|
+
# Generating your own HTML pages you will always be safe when you only
|
58
|
+
# produce entity tags as <code>ä</code> and <code>€</code> or
|
59
|
+
# <code>ä</code> and <code>€</code> respectively.
|
60
|
+
#
|
61
|
+
# == What this module does
|
62
|
+
#
|
63
|
+
# This module translates strings to a HTML-masked version. The encoding will
|
64
|
+
# not be changed and you may demand to keep 8-bit-characters.
|
65
|
+
#
|
66
|
+
# == Examples
|
67
|
+
#
|
68
|
+
# Entities.encode "<" #=> "<"
|
69
|
+
# Entities.decode "<" #=> "<"
|
70
|
+
# Entities.encode "äöü" #=> "äöü"
|
71
|
+
# Entities.decode "äöü" #=> "äöü"
|
72
|
+
#
|
73
|
+
class Entities
|
74
|
+
|
75
|
+
# :stopdoc:
|
76
|
+
SPECIAL_ASC = {
|
77
|
+
'"' => "quot", "&" => "amp", "<" => "lt", ">" => "gt",
|
78
|
+
}
|
79
|
+
RE_ASC = /[#{SPECIAL_ASC.keys.map { |x| Regexp.quote x }.join}]/
|
80
|
+
|
81
|
+
SPECIAL = {
|
82
|
+
"\u00a0" => "nbsp",
|
83
|
+
"¡" => "iexcl", "¢" => "cent", "£" => "pound", "€" => "euro", "¥" => "yen", "Š" => "Scaron",
|
84
|
+
"¤" => "curren", "¦" => "brvbar",
|
85
|
+
"§" => "sect", "š" => "scaron", "©" => "copy", "ª" => "ordf", "«" => "laquo", "¬" => "not", "" => "shy",
|
86
|
+
"¨" => "uml",
|
87
|
+
"®" => "reg", "¯" => "macr",
|
88
|
+
|
89
|
+
"°" => "deg", "±" => "plusmn", "²" => "sup2", "³" => "sup3", "µ" => "micro", "¶" => "para",
|
90
|
+
"´" => "acute",
|
91
|
+
"·" => "middot", "¹" => "sup1", "º" => "ordm", "»" => "raquo", "Œ" => "OElig", "œ" => "oelig",
|
92
|
+
"¸" => "cedil", "¼" => "frac14", "½" => "frac12",
|
93
|
+
"Ÿ" => "Yuml", "¿" => "iquest",
|
94
|
+
"¾" => "frac34",
|
95
|
+
|
96
|
+
"À" => "Agrave", "Á" => "Aacute", "Â" => "Acirc", "Ã" => "Atilde", "Ä" => "Auml", "Å" => "Aring", "Æ" => "AElig",
|
97
|
+
"Ç" => "Ccedil", "È" => "Egrave", "É" => "Eacute", "Ê" => "Ecirc", "Ë" => "Euml", "Ì" => "Igrave", "Í" => "Iacute",
|
98
|
+
"Î" => "Icirc", "Ï" => "Iuml",
|
99
|
+
"Ð" => "ETH", "Ñ" => "Ntilde", "Ò" => "Ograve", "Ó" => "Oacute", "Ô" => "Ocirc", "Õ" => "Otilde", "Ö" => "Ouml",
|
100
|
+
"×" => "times", "Ø" => "Oslash", "Ù" => "Ugrave", "Ú" => "Uacute", "Û" => "Ucirc", "Ü" => "Uuml", "Ý" => "Yacute",
|
101
|
+
"Þ" => "THORN", "ß" => "szlig",
|
102
|
+
|
103
|
+
"à" => "agrave", "á" => "aacute", "â" => "acirc", "ã" => "atilde", "ä" => "auml", "å" => "aring", "æ" => "aelig",
|
104
|
+
"ç" => "ccedil", "è" => "egrave", "é" => "eacute", "ê" => "ecirc", "ë" => "euml", "ì" => "igrave", "í" => "iacute",
|
105
|
+
"î" => "icirc", "ï" => "iuml",
|
106
|
+
"ð" => "eth", "ñ" => "ntilde", "ò" => "ograve", "ó" => "oacute", "ô" => "ocirc", "õ" => "otilde", "ö" => "ouml",
|
107
|
+
"÷" => "divide", "ø" => "oslash", "ù" => "ugrave", "ú" => "uacute", "û" => "ucirc", "ü" => "uuml", "ý" => "yacute",
|
108
|
+
"þ" => "thorn", "ÿ" => "yuml",
|
109
|
+
|
110
|
+
"‚" => "bsquo", "‘" => "lsquo", "„" => "bdquo", "“" => "ldquo", "‹" => "lsaquo", "›" => "rsaquo",
|
111
|
+
"–" => "ndash", "—" => "mdash", "‰" => "permil", "…" => "hellip", "†" => "dagger", "‡" => "Dagger",
|
112
|
+
}.update SPECIAL_ASC
|
113
|
+
NAMES = SPECIAL.invert
|
114
|
+
# :startdoc:
|
115
|
+
|
116
|
+
attr_accessor :keep_8bit
|
117
|
+
|
118
|
+
# :call-seq:
|
119
|
+
# new( keep_8bit = nil) -> ent
|
120
|
+
# new( :keep_8bit => val) -> ent
|
121
|
+
#
|
122
|
+
# Creates an <code>Entities</code> converter.
|
123
|
+
#
|
124
|
+
# The parameter may be given as one value or as a hash.
|
125
|
+
#
|
126
|
+
# ent = Entities.new true
|
127
|
+
# ent = Entities.new :keep_8bit => true
|
128
|
+
#
|
129
|
+
def initialize keep_8bit = nil
|
130
|
+
@keep_8bit = case keep_8bit
|
131
|
+
when Hash then keep_8bit[ :keep_8bit]
|
132
|
+
else keep_8bit
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
# :call-seq:
|
137
|
+
# ent.encode( str) -> str
|
138
|
+
#
|
139
|
+
# Create a string thats characters are masked the HTML style:
|
140
|
+
#
|
141
|
+
# ent = Entities.new
|
142
|
+
# ent.encode "&<\"" #=> "&<""
|
143
|
+
# ent.encode "äöü" #=> "äöü"
|
144
|
+
#
|
145
|
+
# The result will be in the same encoding as the source even if it will
|
146
|
+
# not contain any 8-bit characters (what can only happen when +keep_8bit+
|
147
|
+
# is set).
|
148
|
+
#
|
149
|
+
# ent = Entities.new true
|
150
|
+
#
|
151
|
+
# uml = "<ä>".encode "UTF-8"
|
152
|
+
# ent.encode uml #=> "<\xc3\xa4>" in UTF-8
|
153
|
+
#
|
154
|
+
# uml = "<ä>".encode "ISO-8859-1"
|
155
|
+
# ent.encode uml #=> "<\xe4>" in ISO-8859-1
|
156
|
+
#
|
157
|
+
def encode str
|
158
|
+
r = str.new_string
|
159
|
+
r.gsub! RE_ASC do |x| "&#{SPECIAL_ASC[ x]};" end
|
160
|
+
unless @keep_8bit then
|
161
|
+
r.gsub! /[^\0-\x7f]/ do |c|
|
162
|
+
c.encode! __ENCODING__
|
163
|
+
s = SPECIAL[ c] || ("#x%04x" % c.ord)
|
164
|
+
"&#{s};"
|
165
|
+
end
|
166
|
+
end
|
167
|
+
r
|
168
|
+
end
|
169
|
+
|
170
|
+
def decode str
|
171
|
+
self.class.decode str
|
172
|
+
end
|
173
|
+
|
174
|
+
public
|
175
|
+
|
176
|
+
class <<self
|
177
|
+
|
178
|
+
def std
|
179
|
+
@std ||= new
|
180
|
+
end
|
181
|
+
|
182
|
+
def encode str
|
183
|
+
std.encode str
|
184
|
+
end
|
185
|
+
|
186
|
+
# :call-seq:
|
187
|
+
# Entities.decode( str) -> str
|
188
|
+
#
|
189
|
+
# Replace HTML-style masks by normal characters:
|
190
|
+
#
|
191
|
+
# Entities.decode "<" #=> "<"
|
192
|
+
# Entities.decode "äöü" #=> "äöü"
|
193
|
+
#
|
194
|
+
# Unmasked 8-bit-characters (<code>"ä"</code> instead of
|
195
|
+
# <code>"ä"</code>) will be kept but translated to
|
196
|
+
# a unique encoding.
|
197
|
+
#
|
198
|
+
# s = "ä ö ü"
|
199
|
+
# s.encode! "utf-8"
|
200
|
+
# Entities.decode s #=> "ä ö ü"
|
201
|
+
#
|
202
|
+
# s = "\xe4 ö \xfc €"
|
203
|
+
# s.force_encoding "iso-8859-15"
|
204
|
+
# Entities.decode s #=> "ä ö ü €"
|
205
|
+
# (in iso8859-15)
|
206
|
+
#
|
207
|
+
def decode str
|
208
|
+
str.gsub /&(.+?);/ do
|
209
|
+
(named_decode $1) or (numeric_decode $1) or $&
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
private
|
214
|
+
|
215
|
+
def named_decode s
|
216
|
+
c = NAMES[ s]
|
217
|
+
if c then
|
218
|
+
if c.encoding != s.encoding then
|
219
|
+
c.encode s.encoding
|
220
|
+
else
|
221
|
+
c
|
222
|
+
end
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
def numeric_decode s
|
227
|
+
if s =~ /\A#(?:(\d+)|x([0-9a-f]+))\z/i then
|
228
|
+
c = ($1 ? $1.to_i : ($2.to_i 0x10)).chr Encoding::UTF_8
|
229
|
+
c.encode! s.encoding
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
end
|
234
|
+
|
235
|
+
end
|
236
|
+
|
237
|
+
|
238
|
+
|
239
|
+
# URL-able representation
|
240
|
+
#
|
241
|
+
# == What's acually happening
|
242
|
+
#
|
243
|
+
# URLs may not contain spaces and serveral character as slashes, ampersands
|
244
|
+
# etc. These characters will be masked by a percent sign and two hex digits
|
245
|
+
# representing the ASCII code. Eight bit characters should be masked the
|
246
|
+
# same way.
|
247
|
+
#
|
248
|
+
# An URL line does not store encoding information by itself. A locator may
|
249
|
+
# either say one of these:
|
250
|
+
#
|
251
|
+
# http://www.example.com/subdir/index.html?umlfield=%C3%BCber+alles
|
252
|
+
# http://www.example.com/subdir/index.html?umlfield=%FCber+alles
|
253
|
+
#
|
254
|
+
# The reading CGI has to decide on itself how to treat it.
|
255
|
+
#
|
256
|
+
# == Examples
|
257
|
+
#
|
258
|
+
# URLText.encode "'Stop!' said Fred." #=> "%27Stop%21%27+said+Fred."
|
259
|
+
# URLText.decode "%27Stop%21%27+said+Fred%2e"
|
260
|
+
# #=> "'Stop!' said Fred."
|
261
|
+
#
|
262
|
+
class URLText
|
263
|
+
|
264
|
+
attr_accessor :keep_8bit, :keep_space, :mask_space
|
265
|
+
|
266
|
+
# :call-seq:
|
267
|
+
# new( hash) -> urltext
|
268
|
+
#
|
269
|
+
# Creates a <code>URLText</code> converter.
|
270
|
+
#
|
271
|
+
# The parameters may be given as values or as a hash.
|
272
|
+
#
|
273
|
+
# utx = URLText.new :keep_8bit => true, :keep_space => false
|
274
|
+
#
|
275
|
+
# See the +encode+ method for an explanation of these parameters.
|
276
|
+
#
|
277
|
+
def initialize hash = nil
|
278
|
+
if hash then
|
279
|
+
@keep_8bit = hash[ :keep_8bit ]
|
280
|
+
@keep_space = hash[ :keep_space]
|
281
|
+
@mask_space = hash[ :mask_space]
|
282
|
+
end
|
283
|
+
end
|
284
|
+
|
285
|
+
# :call-seq:
|
286
|
+
# encode( str) -> str
|
287
|
+
#
|
288
|
+
# Create a string that contains <code>%XX</code>-encoded bytes.
|
289
|
+
#
|
290
|
+
# utx = URLText.new
|
291
|
+
# utx.encode "'Stop!' said Fred." #=> "%27Stop%21%27+said+Fred."
|
292
|
+
#
|
293
|
+
# The result will not contain any 8-bit characters, except when
|
294
|
+
# +keep_8bit+ is set. The result will be in the same encoding as the
|
295
|
+
# argument although this normally has no meaning.
|
296
|
+
#
|
297
|
+
# utx = URLText.new :keep_8bit => true
|
298
|
+
# s = "< ä >".encode "UTF-8"
|
299
|
+
# utx.encode s #=> "%3C+\u{e4}+%3E" in UTF-8
|
300
|
+
#
|
301
|
+
# s = "< ä >".encode "ISO-8859-1"
|
302
|
+
# utx.encode s #=> "%3C+\xe4+%3E" in ISO-8859-1
|
303
|
+
#
|
304
|
+
# A space <code>" "</code> will not be replaced by a plus <code>"+"</code>
|
305
|
+
# if +keep_space+ is set.
|
306
|
+
#
|
307
|
+
# utx = URLText.new :keep_space => true
|
308
|
+
# s = "< x >"
|
309
|
+
# utx.encode s #=> "%3C x %3E"
|
310
|
+
#
|
311
|
+
# When +mask_space+ is set, then a space will be represented as
|
312
|
+
# <code>"%20"</code>,
|
313
|
+
#
|
314
|
+
def encode str
|
315
|
+
r = str.new_string
|
316
|
+
r.force_encoding Encoding::ASCII_8BIT unless @keep_8bit
|
317
|
+
r.gsub! %r/([^a-zA-Z0-9_.-])/ do |c|
|
318
|
+
if c == " " and not @mask_space then
|
319
|
+
@keep_space ? c : "+"
|
320
|
+
elsif not @keep_8bit or c.ascii_only? then
|
321
|
+
"%%%02X" % c.ord
|
322
|
+
else
|
323
|
+
c
|
324
|
+
end
|
325
|
+
end
|
326
|
+
r.encode! str.encoding
|
327
|
+
end
|
328
|
+
|
329
|
+
|
330
|
+
class Dict < Hash
|
331
|
+
class <<self
|
332
|
+
def create
|
333
|
+
i = new
|
334
|
+
yield i
|
335
|
+
i
|
336
|
+
end
|
337
|
+
end
|
338
|
+
def initialize
|
339
|
+
super
|
340
|
+
yield self if block_given?
|
341
|
+
end
|
342
|
+
def [] key
|
343
|
+
super key.to_sym
|
344
|
+
end
|
345
|
+
def []= key, val
|
346
|
+
super key.to_sym, val
|
347
|
+
end
|
348
|
+
def update hash
|
349
|
+
hash.each { |k,v| self[ k] = v }
|
350
|
+
end
|
351
|
+
alias merge! update
|
352
|
+
def parse key, val
|
353
|
+
self[ key] = case val
|
354
|
+
when nil then nil
|
355
|
+
when /\A(?:[+-]?[1-9][0-9]{,9}|0)\z/ then val.to_i
|
356
|
+
else val.to_s.notempty?
|
357
|
+
end
|
358
|
+
end
|
359
|
+
def method_missing sym, *args
|
360
|
+
if args.empty? and not sym =~ /[!?=]\z/ then
|
361
|
+
self[ sym]
|
362
|
+
else
|
363
|
+
first, *rest = args
|
364
|
+
if rest.empty? and sym =~ /=\z/ then
|
365
|
+
self[ sym] = first
|
366
|
+
else
|
367
|
+
super
|
368
|
+
end
|
369
|
+
end
|
370
|
+
end
|
371
|
+
end
|
372
|
+
|
373
|
+
# :stopdoc:
|
374
|
+
PAIR_SET = "="
|
375
|
+
PAIR_SEP = "&"
|
376
|
+
# :startdoc:
|
377
|
+
|
378
|
+
# :call-seq:
|
379
|
+
# encode_hash( hash) -> str
|
380
|
+
#
|
381
|
+
# Encode a <code>Hash</code> to a URL-style string.
|
382
|
+
#
|
383
|
+
# utx = URLText.new
|
384
|
+
#
|
385
|
+
# h = { :name => "John Doe", :age => 42 }
|
386
|
+
# utx.encode_hash h
|
387
|
+
# #=> "name=John+Doe&age=42"
|
388
|
+
#
|
389
|
+
# h = { :a => ";;;", :x => "äöü" }
|
390
|
+
# utx.encode_hash h
|
391
|
+
# #=> "a=%3B%3B%3B&x=%C3%A4%C3%B6%C3%BC"
|
392
|
+
#
|
393
|
+
def encode_hash hash
|
394
|
+
hash.map { |(k,v)|
|
395
|
+
case v
|
396
|
+
when nil then next
|
397
|
+
when true then v = k
|
398
|
+
when false then v = ""
|
399
|
+
end
|
400
|
+
[k, v].map { |x| encode x.to_s }.join PAIR_SET
|
401
|
+
}.compact.join PAIR_SEP
|
402
|
+
end
|
403
|
+
|
404
|
+
# :call-seq:
|
405
|
+
# mkurl( path, hash, anchor = nil) -> str
|
406
|
+
#
|
407
|
+
# Make an URL.
|
408
|
+
#
|
409
|
+
# utx = URLText.new
|
410
|
+
# h = { :name => "John Doe", :age => "42" }
|
411
|
+
# utx.encode_hash "myscript.rb", h, "chapter"
|
412
|
+
# #=> "myscript.rb?name=John+Doe&age=42#chapter"
|
413
|
+
#
|
414
|
+
def mkurl path, hash = nil, anchor = nil
|
415
|
+
unless Hash === hash then
|
416
|
+
hash, anchor = anchor, hash
|
417
|
+
end
|
418
|
+
r = "#{path}"
|
419
|
+
r << "?#{encode_hash hash}" if hash
|
420
|
+
r << "##{anchor}" if anchor
|
421
|
+
r
|
422
|
+
end
|
423
|
+
|
424
|
+
public
|
425
|
+
|
426
|
+
def decode str
|
427
|
+
self.class.decode str
|
428
|
+
end
|
429
|
+
|
430
|
+
def decode_hash qstr, &block
|
431
|
+
self.class.decode_hash qstr, &block
|
432
|
+
end
|
433
|
+
|
434
|
+
class <<self
|
435
|
+
|
436
|
+
def std
|
437
|
+
@std ||= new
|
438
|
+
end
|
439
|
+
|
440
|
+
def encode str
|
441
|
+
std.encode str
|
442
|
+
end
|
443
|
+
|
444
|
+
def encode_hash hash
|
445
|
+
std.encode_hash hash
|
446
|
+
end
|
447
|
+
|
448
|
+
def mkurl path, hash, anchor = nil
|
449
|
+
std.mkurl path, hash, anchor
|
450
|
+
end
|
451
|
+
|
452
|
+
# :call-seq:
|
453
|
+
# decode( str) -> str
|
454
|
+
# decode( str, encoding) -> str
|
455
|
+
#
|
456
|
+
# Decode the contained string.
|
457
|
+
#
|
458
|
+
# utx = URLText.new
|
459
|
+
# utx.decode "%27Stop%21%27+said+Fred%2e" #=> "'Stop!' said Fred."
|
460
|
+
#
|
461
|
+
# The encoding will be kept. That means that an invalidly encoded
|
462
|
+
# string could be produced.
|
463
|
+
#
|
464
|
+
# a = "bl%F6d"
|
465
|
+
# a.encode! "utf-8"
|
466
|
+
# d = utx.decode a
|
467
|
+
# d =~ /./ #=> "invalid byte sequence in UTF-8 (ArgumentError)"
|
468
|
+
#
|
469
|
+
def decode str
|
470
|
+
r = str.new_string
|
471
|
+
r.tr! "+", " "
|
472
|
+
r.gsub! /(?:%([0-9A-F]{2}))/i do $1.hex.chr end
|
473
|
+
r.force_encoding str.encoding
|
474
|
+
r
|
475
|
+
end
|
476
|
+
|
477
|
+
# :call-seq:
|
478
|
+
# decode_hash( str) -> hash
|
479
|
+
# decode_hash( str) { |key,val| ... } -> nil or int
|
480
|
+
#
|
481
|
+
# Decode a URL-style encoded string to a <code>Hash</code>.
|
482
|
+
# In case a block is given, the number of key-value pairs is returned.
|
483
|
+
#
|
484
|
+
# str = "a=%3B%3B%3B&x=%26auml%3B%26ouml%3B%26uuml%3B"
|
485
|
+
# URLText.decode_hash str do |k,v|
|
486
|
+
# puts "#{k} = #{v}"
|
487
|
+
# end
|
488
|
+
#
|
489
|
+
# Output:
|
490
|
+
#
|
491
|
+
# a = ;;;
|
492
|
+
# x = äöü
|
493
|
+
#
|
494
|
+
def decode_hash qstr
|
495
|
+
if block_given? then
|
496
|
+
i = 0
|
497
|
+
each_pair qstr do |k,v|
|
498
|
+
yield k, v
|
499
|
+
i += 1
|
500
|
+
end
|
501
|
+
i.nonzero?
|
502
|
+
else
|
503
|
+
Dict.create do |h|
|
504
|
+
each_pair qstr do |k,v| h.parse k, v end
|
505
|
+
end
|
506
|
+
end
|
507
|
+
end
|
508
|
+
|
509
|
+
private
|
510
|
+
|
511
|
+
def each_pair qstr
|
512
|
+
qstr or return
|
513
|
+
h = qstr.to_s.split PAIR_SEP
|
514
|
+
h.each do |pair|
|
515
|
+
kv = pair.split PAIR_SET, 2
|
516
|
+
kv.map! { |x| decode x if x }
|
517
|
+
yield *kv
|
518
|
+
end
|
519
|
+
end
|
520
|
+
|
521
|
+
end
|
522
|
+
|
523
|
+
end
|
524
|
+
|
525
|
+
# Header field contents (RFC 2047) encoding
|
526
|
+
#
|
527
|
+
# == Examples
|
528
|
+
#
|
529
|
+
# HeaderExt.encode "Jörg Müller"
|
530
|
+
# #=> "=?utf-8?Q?J=C3=B6rg_M=C3=BCller?="
|
531
|
+
# HeaderExt.decode "=?UTF-8?Q?J=C3=B6rg_M=C3=BCller?="
|
532
|
+
# #=> "Jörg Müller"
|
533
|
+
#
|
534
|
+
class HeaderExt
|
535
|
+
|
536
|
+
# :call-seq:
|
537
|
+
# new( [ parameters] ) -> con
|
538
|
+
#
|
539
|
+
# Creates a <code>HeaderExt</code> converter.
|
540
|
+
#
|
541
|
+
# See the +encode+ method for an explanation of the parameters.
|
542
|
+
#
|
543
|
+
# == Examples
|
544
|
+
#
|
545
|
+
# con = HeaderExt.new
|
546
|
+
# con = HeaderExt.new :base64 => true, :limit => 32, :lower => true
|
547
|
+
# con = HeaderExt.new :mask => /["'()]/
|
548
|
+
#
|
549
|
+
def initialize params = nil
|
550
|
+
if params then
|
551
|
+
@base64 = params.delete :base64
|
552
|
+
@limit = params.delete :limit
|
553
|
+
@lower = params.delete :lower
|
554
|
+
@mask = params.delete :mask
|
555
|
+
params.empty? or
|
556
|
+
raise ArgumentError, "invalid parameter: #{params.keys.first}."
|
557
|
+
end
|
558
|
+
end
|
559
|
+
|
560
|
+
# :call-seq:
|
561
|
+
# needs? str -> true or false
|
562
|
+
#
|
563
|
+
# Check whether a string needs encoding.
|
564
|
+
#
|
565
|
+
def needs? str
|
566
|
+
(not str.ascii_only? or str =~ @mask) and true or false
|
567
|
+
end
|
568
|
+
|
569
|
+
# :call-seq:
|
570
|
+
# encode( str) -> str
|
571
|
+
#
|
572
|
+
# Create a header field style encoded string. The following parameters
|
573
|
+
# will be evaluated:
|
574
|
+
#
|
575
|
+
# :base64 # build ?B? instead of ?Q?
|
576
|
+
# :limit # break words longer than this
|
577
|
+
# :lower # build lower case ?b? and ?q?
|
578
|
+
# :mask # a regular expression detecting characters to mask
|
579
|
+
#
|
580
|
+
# The result will not contain any 8-bit characters. The encoding will
|
581
|
+
# be kept although it won't have a meaning.
|
582
|
+
#
|
583
|
+
# The parameter <code>:mask</code> will have no influence on the masking
|
584
|
+
# itself but will guarantee characters to be masked.
|
585
|
+
#
|
586
|
+
# == Examples
|
587
|
+
#
|
588
|
+
# yodel = "Holleri du dödl di, diri diri dudl dö."
|
589
|
+
#
|
590
|
+
# con = HeaderExt.new
|
591
|
+
# con.encode yodel
|
592
|
+
# #=> "Holleri du =?UTF-8?Q?d=C3=B6dl?= di, diri diri dudl =?UTF-8?Q?d=C3=B6=2E?="
|
593
|
+
#
|
594
|
+
# yodel.encode! "iso8859-1"
|
595
|
+
# con.encode yodel
|
596
|
+
# #=> "Holleri du =?ISO8859-1?Q?d=F6dl?= di, diri diri dudl =?ISO8859-1?Q?d=F6=2E?="
|
597
|
+
#
|
598
|
+
# e = "€"
|
599
|
+
# e.encode! "utf-8" ; con.encode e #=> "=?UTF-8?Q?=E2=82=AC?="
|
600
|
+
# e.encode! "iso8859-15" ; con.encode e #=> "=?ISO8859-15?Q?=A4?="
|
601
|
+
# e.encode! "ms-ansi" ; con.encode e #=> "=?MS-ANSI?Q?=80?="
|
602
|
+
#
|
603
|
+
# con = HeaderExt.new :mask => /["'()]/
|
604
|
+
# con.encode "'Stop!' said Fred."
|
605
|
+
# #=> "=?UTF-8?Q?=27Stop=21=27?= said Fred."
|
606
|
+
#
|
607
|
+
def encode str
|
608
|
+
do_encoding str do
|
609
|
+
# I don't like this kind of programming style but it seems to work. BS
|
610
|
+
r, enc = "", ""
|
611
|
+
while str =~ /\S+/ do
|
612
|
+
if needs? $& then
|
613
|
+
(enc.notempty? || r) << $`
|
614
|
+
enc << $&
|
615
|
+
else
|
616
|
+
if not enc.empty? then
|
617
|
+
r << (mask enc)
|
618
|
+
enc.clear
|
619
|
+
end
|
620
|
+
r << $` << $&
|
621
|
+
end
|
622
|
+
str = $'
|
623
|
+
end
|
624
|
+
if not enc.empty? then
|
625
|
+
enc << str
|
626
|
+
r << (mask enc)
|
627
|
+
else
|
628
|
+
r << str
|
629
|
+
end
|
630
|
+
r
|
631
|
+
end
|
632
|
+
end
|
633
|
+
|
634
|
+
# :call-seq:
|
635
|
+
# encode_whole( str) -> str
|
636
|
+
#
|
637
|
+
# The unlike +encode+ the whole string as one piece will be encoded.
|
638
|
+
#
|
639
|
+
# yodel = "Holleri du dödl di, diri diri dudl dö."
|
640
|
+
# HeaderExt.encode_whole yodel
|
641
|
+
# #=> "=?UTF-8?Q?Holleri_du_d=C3=B6dl_di,_diri_diri_dudl_d=C3=B6=2E?="
|
642
|
+
#
|
643
|
+
def encode_whole str
|
644
|
+
do_encoding str do
|
645
|
+
mask str
|
646
|
+
end
|
647
|
+
end
|
648
|
+
|
649
|
+
private
|
650
|
+
|
651
|
+
def do_encoding str
|
652
|
+
@charset = str.encoding
|
653
|
+
@type, @encoder = @base64 ? [ "B", :base64] : [ "Q", :quopri ]
|
654
|
+
if @lower then
|
655
|
+
@charset.downcase!
|
656
|
+
@type.downcase!
|
657
|
+
end
|
658
|
+
yield.force_encoding str.encoding
|
659
|
+
ensure
|
660
|
+
@charset = @type = @encoder = nil
|
661
|
+
end
|
662
|
+
|
663
|
+
# :stopdoc:
|
664
|
+
SPACE = " "
|
665
|
+
# :startdoc:
|
666
|
+
|
667
|
+
def mask str
|
668
|
+
r, i = [], 0
|
669
|
+
while i < str.length do
|
670
|
+
l = @limit||str.length
|
671
|
+
r.push "=?#@charset?#@type?#{send @encoder, str[ i, l]}?="
|
672
|
+
i += l
|
673
|
+
end
|
674
|
+
r.join SPACE
|
675
|
+
end
|
676
|
+
|
677
|
+
def base64 c
|
678
|
+
c = [c].pack "m*"
|
679
|
+
c.gsub! /\s/, ""
|
680
|
+
c
|
681
|
+
end
|
682
|
+
|
683
|
+
def quopri c
|
684
|
+
c.force_encoding Encoding::ASCII_8BIT
|
685
|
+
c.gsub! /([^ a-zA-Z0-9])/ do |s| "=%02X" % s.ord end
|
686
|
+
c.tr! " ", "_"
|
687
|
+
c
|
688
|
+
end
|
689
|
+
|
690
|
+
public
|
691
|
+
|
692
|
+
def decode str
|
693
|
+
self.class.decode str
|
694
|
+
end
|
695
|
+
|
696
|
+
class <<self
|
697
|
+
|
698
|
+
# The standard header content encoding has a word break limit of 64.
|
699
|
+
#
|
700
|
+
def std
|
701
|
+
@std ||= new :limit => 64
|
702
|
+
end
|
703
|
+
|
704
|
+
# :call-seq:
|
705
|
+
# needs? str -> true or false
|
706
|
+
#
|
707
|
+
# Use the standard content encoding.
|
708
|
+
#
|
709
|
+
def needs? str
|
710
|
+
std.needs? str
|
711
|
+
end
|
712
|
+
|
713
|
+
# :call-seq:
|
714
|
+
# encode( str) -> str
|
715
|
+
#
|
716
|
+
# Use the standard content encoding.
|
717
|
+
#
|
718
|
+
def encode str
|
719
|
+
std.encode str
|
720
|
+
end
|
721
|
+
|
722
|
+
# :call-seq:
|
723
|
+
# encode_whole( str) -> str
|
724
|
+
#
|
725
|
+
# Use the standard content encoding.
|
726
|
+
#
|
727
|
+
def encode_whole str
|
728
|
+
std.encode_whole str
|
729
|
+
end
|
730
|
+
|
731
|
+
# :call-seq:
|
732
|
+
# decode( str) -> str
|
733
|
+
#
|
734
|
+
# Remove header field style escapes.
|
735
|
+
#
|
736
|
+
# HeaderExt.decode "=?UTF-8?Q?J=C3=B6rg_M=C3=BCller?="
|
737
|
+
# #=> "Jörg Müller"
|
738
|
+
#
|
739
|
+
def decode str
|
740
|
+
r, e = [], []
|
741
|
+
v, l = nil, nil
|
742
|
+
lexer str do |type,piece|
|
743
|
+
case type
|
744
|
+
when :decoded then
|
745
|
+
e.push piece.encoding
|
746
|
+
if l == :space and (v == :decoded or not v) then
|
747
|
+
r.pop
|
748
|
+
elsif l == :plain then
|
749
|
+
r.push SPACE
|
750
|
+
end
|
751
|
+
when :space then
|
752
|
+
nil
|
753
|
+
when :plain then
|
754
|
+
if l == :decoded then
|
755
|
+
r.push SPACE
|
756
|
+
end
|
757
|
+
end
|
758
|
+
r.push piece
|
759
|
+
v, l = l, type
|
760
|
+
end
|
761
|
+
if l == :space and v == :decoded then
|
762
|
+
r.pop
|
763
|
+
end
|
764
|
+
e.uniq!
|
765
|
+
begin
|
766
|
+
r.join
|
767
|
+
rescue EncodingError
|
768
|
+
raise if e.empty?
|
769
|
+
f = e.shift
|
770
|
+
r.each { |x| x.encode! f }
|
771
|
+
retry
|
772
|
+
end
|
773
|
+
end
|
774
|
+
|
775
|
+
def lexer str
|
776
|
+
while str do
|
777
|
+
str =~ /(\s+)|\B=\?(\S*?)\?([QB])\?(\S*?)\?=\B/i
|
778
|
+
if $1 then
|
779
|
+
yield :plain, $` unless $`.empty?
|
780
|
+
yield :space, $&
|
781
|
+
elsif $2 then
|
782
|
+
yield :plain, $` unless $`.empty?
|
783
|
+
d = unmask $2, $3, $4
|
784
|
+
yield :decoded, d
|
785
|
+
else
|
786
|
+
yield :plain, str
|
787
|
+
end
|
788
|
+
str = $'.notempty?
|
789
|
+
end
|
790
|
+
end
|
791
|
+
|
792
|
+
private
|
793
|
+
|
794
|
+
def unmask cs, tp, txt
|
795
|
+
case tp.upcase
|
796
|
+
when "B" then txt, = txt.unpack "m*"
|
797
|
+
when "Q" then txt.tr! "_", " " ; txt, = txt.unpack "M*"
|
798
|
+
end
|
799
|
+
cs.slice! /\*\w+\z/ # language as in rfc2231, 5.
|
800
|
+
case cs
|
801
|
+
when /\Autf-?7\z/i then
|
802
|
+
# Ruby doesn't seem to do that.
|
803
|
+
txt.force_encoding Encoding::US_ASCII
|
804
|
+
txt.gsub! /\+([0-9a-zA-Z+\/]*)-?/ do
|
805
|
+
if $1.empty? then
|
806
|
+
"+"
|
807
|
+
else
|
808
|
+
s = ("#$1==".unpack "m*").join
|
809
|
+
(s.unpack "S>*").map { |x| x.chr Encoding::UTF_8 }.join
|
810
|
+
end
|
811
|
+
end
|
812
|
+
txt.force_encoding Encoding::UTF_8
|
813
|
+
when /\Aunknown/i then
|
814
|
+
txt.force_encoding Encoding::US_ASCII
|
815
|
+
else
|
816
|
+
txt.force_encoding cs
|
817
|
+
end
|
818
|
+
txt
|
819
|
+
end
|
820
|
+
|
821
|
+
end
|
822
|
+
|
823
|
+
end
|
824
|
+
|
825
|
+
end
|
826
|
+
|