hermeneutics 1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,317 @@
1
+ #
2
+ # hermeneutics/tags.rb -- Parse HTML code
3
+ #
4
+
5
+ =begin rdoc
6
+
7
+ :section: Classes definied here
8
+
9
+ Hermeneutics::Parser Parses HTML source and builds a tree
10
+
11
+ Hermeneutics::Tags Compiles parsed code to a tag tree
12
+
13
+ =end
14
+
15
+
16
+ require "hermeneutics/escape"
17
+
18
+
19
+ module Hermeneutics
20
+
21
+ # Parse a HTML file or string.
22
+ #
23
+ class Parser
24
+
25
+ class Error < StandardError ; end
26
+
27
+ ren = /[a-z_][a-z0-9_.-]*/i
28
+
29
+ RE_TAG = %r{\A\s*(#{ren}(?::#{ren})?)\s*(.*?)\s*(/)?>}mx
30
+ RE_INSTR = %r{\A\?\s*(#{ren})\s*(.*)\s*\?>}m
31
+ RE_COMMENT = %r{\A!--(.*?)-->}m
32
+ RE_CDATA = %r{\A!\[CDATA\[(.*?)\]\]>}m
33
+ RE_BANG = %r{\A!\s*([A-Z]+)\s*(.*?)>}m
34
+ RE_CMD = %r{\A!\s*(\[.*?\])\s*>}m
35
+
36
+ RE_ATTR = %r{\A(#{ren}(?::#{ren})?)(=)?}
37
+
38
+ Tok = Struct[ :type, :tag, :attrs, :data]
39
+
40
+ attr_reader :list
41
+
42
+ def initialize str, term = nil
43
+ @list = []
44
+ s = str
45
+ while s =~ /</ do
46
+ add_data $`
47
+ s = $'
48
+ e = case s
49
+ when %r{\A/\s*#{term}\s*>}im then
50
+ nil
51
+ when RE_TAG then
52
+ s = $'
53
+ t = Tok[ :tag, $1.downcase, (attrs $2),
54
+ (sub_parser s, $1, $3)]
55
+ s =~ %r{\A}
56
+ t
57
+ when RE_INSTR then Tok[ :instr, $1.downcase, (attrs $2), nil]
58
+ when RE_COMMENT then Tok[ :comm, nil, nil, $1 ]
59
+ when RE_CDATA then Tok[ nil, nil, nil, $1 ]
60
+ when RE_BANG then Tok[ :bang, $1, (attrl $2), nil]
61
+ when RE_CMD then Tok[ :cmd, $1, nil, nil]
62
+ else
63
+ raise Error, "Unclosed standalone tag <#{term}>."
64
+ end
65
+ s = $'
66
+ e or break
67
+ add_tok e
68
+ end
69
+ if term then
70
+ str.replace s
71
+ else
72
+ add_data s
73
+ end
74
+ end
75
+
76
+ def find_encoding
77
+ find_enc @list
78
+ end
79
+
80
+ def pretty_print
81
+ puts_tree @list, 0
82
+ end
83
+
84
+ private
85
+
86
+ def sub_parser s, tag, close
87
+ self.class.new s, tag unless close
88
+ end
89
+
90
+ def add_data str
91
+ if str.notempty? then
92
+ add_tok Tok[ nil, nil, nil, str]
93
+ end
94
+ end
95
+
96
+ def add_tok tok
97
+ if not tok.type and (l = @list.last) and not l.type then
98
+ l.data << tok.data
99
+ else
100
+ @list.push tok
101
+ end
102
+ end
103
+
104
+ def attrs str
105
+ a = {}
106
+ while str.notempty? do
107
+ str.slice! RE_ATTR or
108
+ raise Error, "Illegal attribute specification: #{str}"
109
+ k = $1.downcase
110
+ a[ k] = if $2 then
111
+ attr_val str
112
+ else
113
+ str.lstrip!
114
+ k
115
+ end
116
+ end
117
+ a
118
+ end
119
+
120
+ def attrl str
121
+ a = []
122
+ while str.notempty? do
123
+ v = attr_val str
124
+ a.push v
125
+ end
126
+ a
127
+ end
128
+
129
+ def attr_val str
130
+ r = case str
131
+ when /\A"(.*?)"/m then $1
132
+ when /\A'(.*?)'/m then $1
133
+ when /\A\S+/ then $&
134
+ end
135
+ str.replace $'
136
+ str.lstrip!
137
+ r
138
+ end
139
+
140
+ def find_enc p
141
+ p.each { |e|
142
+ r = case e.type
143
+ when :tag then
144
+ case e.tag
145
+ when "html", "head" then
146
+ find_enc e.data.list
147
+ when "meta" then
148
+ e.attrs[ "charset"] || (
149
+ if e.attrs[ "http-equiv"] == "Content-Type" then
150
+ require "hermeneutics/contents"
151
+ c = Contents.parse e.attrs[ "content"]
152
+ c[ "charset"]
153
+ end
154
+ )
155
+ end
156
+ when :query then
157
+ e.attrs[ "encoding"]
158
+ end
159
+ return r if r
160
+ }
161
+ nil
162
+ end
163
+
164
+ def puts_tree p, indent
165
+ p.each { |e|
166
+ print "%s[%s] %s " % [ " "*indent, e.type, e.tag, ]
167
+ r = case e.type
168
+ when :tag then puts ; puts_tree e.data.list, indent+1 if e.data
169
+ when nil then puts "%s%s" % [ " "*(indent+1), e.data.inspect, ]
170
+ else puts
171
+ end
172
+ }
173
+ end
174
+
175
+ end
176
+
177
+
178
+ # = Example
179
+ #
180
+ # This parses a table and outputs it as a CSV.
181
+ #
182
+ # t = Tags.compile "<table><tr><td> ... </table>", "iso-8859-15"
183
+ # t.table.each :tr do |row|
184
+ # if row.has? :th then
185
+ # l = row.map :th do |h| h.data end.join ";"
186
+ # else
187
+ # l = row.map :td do |c| c.data end.join ";"
188
+ # end
189
+ # puts l
190
+ # end
191
+ #
192
+ class Tags
193
+
194
+ class <<self
195
+
196
+ def compile str, parser = nil
197
+ p = (parser||Parser).new str
198
+ enc = p.find_encoding||str.encoding
199
+ l = lex p, enc
200
+ new nil, nil, l
201
+ end
202
+
203
+ def lex parser, encoding = nil
204
+ r = []
205
+ while parser.list.any? do
206
+ e = parser.list.shift
207
+ case e.type
208
+ when :tag
209
+ a = {}
210
+ e.attrs.each { |k,v|
211
+ v.force_encoding encoding if encoding
212
+ a[ k.downcase.to_sym] = Entities.new.decode v
213
+ }
214
+ i = new e.tag, a
215
+ if e.data then
216
+ f = lex e.data, encoding
217
+ i.concat f
218
+ end
219
+ r.push i
220
+ when nil
221
+ d = e.data
222
+ d.force_encoding encoding if encoding
223
+ c = Entities.new.decode d
224
+ r.push c
225
+ when :instr then
226
+ when :comm then
227
+ when :bang then
228
+ when :cmd then
229
+ end
230
+ end
231
+ r
232
+ end
233
+
234
+ end
235
+
236
+ attr_reader :name, :attrs, :list
237
+
238
+ def initialize name, attrs = nil, *elems
239
+ @name = name.to_sym if name
240
+ @attrs = {}.update attrs if attrs
241
+ @list = []
242
+ @list.concat elems.flatten
243
+ end
244
+
245
+ def push elem
246
+ @list.push elem
247
+ end
248
+
249
+ def concat elems
250
+ @list.concat elems
251
+ end
252
+
253
+ def inspect
254
+ "<##@name [#{@list.length}]>"
255
+ end
256
+
257
+ def each t = nil
258
+ if t then
259
+ @list.each { |e|
260
+ yield e if Tags === e and e.name == t
261
+ }
262
+ else
263
+ @list.each { |e| yield e }
264
+ end
265
+ end
266
+
267
+ def map t
268
+ @list.map { |e|
269
+ yield e if Tags === e and e.name == t
270
+ }.compact
271
+ end
272
+
273
+ def has_tag? t
274
+ @list.find { |e|
275
+ Tags === e and e.name == t
276
+ } and true
277
+ end
278
+ alias has? has_tag?
279
+
280
+ def tag t, n = nil
281
+ n ||= 0
282
+ @list.each { |e|
283
+ if Tags === e and e.name == t then
284
+ return e if n.zero?
285
+ n -= 1
286
+ end
287
+ }
288
+ nil
289
+ end
290
+
291
+ def method_missing sym, *args
292
+ (tag sym, *args) or super
293
+ rescue
294
+ super
295
+ end
296
+
297
+ def data
298
+ d = ""
299
+ gather_data self, d
300
+ d
301
+ end
302
+
303
+ private
304
+
305
+ def gather_data t, d
306
+ t.list.each { |e|
307
+ case e
308
+ when Tags then gather_data e, d
309
+ else d << e
310
+ end
311
+ }
312
+ end
313
+
314
+ end
315
+
316
+ end
317
+
@@ -0,0 +1,230 @@
1
+ #
2
+ # hermeneutics/transports.rb -- transporting mails
3
+ #
4
+
5
+ require "hermeneutics/mail"
6
+ require "hermeneutics/boxes"
7
+
8
+ require "supplement/locked"
9
+
10
+
11
+ module Hermeneutics
12
+
13
+ class Mail
14
+
15
+ SPOOLDIR = "/var/mail"
16
+ MAILDIR = "Mail"
17
+ SENDMAIL = "/usr/sbin/sendmail"
18
+ SYSDIR = ".hermeneutics"
19
+
20
+ LEVEL = {}
21
+ a = 0
22
+ LEVEL[ :ERR] = a += 1
23
+ LEVEL[ :INF] = a += 1
24
+ LEVEL[ :DBG] = a += 1
25
+ a = nil
26
+
27
+ class <<self
28
+
29
+ attr_accessor :spooldir, :spoolfile, :maildir, :sysdir, :default_format
30
+ attr_accessor :sendmail
31
+ attr_accessor :logfile, :loglevel
32
+
33
+ def box path = nil, default_format = nil
34
+ @cache ||= {}
35
+ @cache[ path] ||= find_box path, default_format
36
+ end
37
+
38
+ private
39
+
40
+ def find_box path, default_format
41
+ b = case path
42
+ when Box then
43
+ path
44
+ when nil then
45
+ @spoolfile ||= getuser
46
+ @spooldir ||= SPOOLDIR
47
+ m = File.expand_path @spoolfile, @spooldir
48
+ MBox.new m
49
+ else
50
+ m = if path =~ /\A=/ then
51
+ File.join expand_maildir, $'
52
+ else
53
+ File.expand_path path, "~"
54
+ end
55
+ Box.find m, default_format||@default_format
56
+ end
57
+ b.exists? or b.create
58
+ b
59
+ end
60
+
61
+ public
62
+
63
+ def sendmail
64
+ @sendmail||SENDMAIL
65
+ end
66
+
67
+ def log type, *message
68
+ @logfile or return
69
+ return if LEVEL[ type] > LEVEL[ @loglevel].to_i
70
+ l = File.expand_path @logfile, expand_sysdir
71
+ LockedFile.open l, "a" do |log|
72
+ log.puts "[#{Time.new}] [#$$] [#{type}] #{message.join ' '}"
73
+ end
74
+ nil
75
+ rescue Errno::ENOENT
76
+ d = File.dirname l
77
+ Dir.mkdir! d and retry
78
+ end
79
+
80
+ def expand_maildir
81
+ File.expand_path @maildir||MAILDIR, "~"
82
+ end
83
+
84
+ def expand_sysdir
85
+ File.expand_path @sysdir||SYSDIR, expand_maildir
86
+ end
87
+
88
+ private
89
+
90
+ def getuser
91
+ e = Etc.getpwuid Process.uid
92
+ e.name
93
+ rescue NameError
94
+ require "etc" and retry
95
+ end
96
+
97
+ end
98
+
99
+ # :call-seq:
100
+ # obj.save( path, default_format = nil) -> mb
101
+ #
102
+ # Save into local mailbox.
103
+ #
104
+ def save mailbox = nil, default_format = nil
105
+ b = cls.box mailbox, default_format
106
+ log :INF, "Delivering to", b.path
107
+ b.deliver self
108
+ end
109
+
110
+ # :call-seq:
111
+ # obj.pipe( cmd, *args) -> status
112
+ #
113
+ # Pipe into an external program. If a block is given, the programs
114
+ # output will be yielded there.
115
+ #
116
+ def pipe cmd, *args
117
+ log :INF, "Piping through:", cmd, *args
118
+ ri, wi = IO.pipe
119
+ ro, wo = IO.pipe
120
+ child = fork do
121
+ wi.close ; ro.close
122
+ $stdout.reopen wo ; wo.close
123
+ $stdin .reopen ri ; ri.close
124
+ exec cmd, *args
125
+ end
126
+ ri.close ; wo.close
127
+ t = Thread.new wi do |wi|
128
+ begin
129
+ wi.write to_s
130
+ ensure
131
+ wi.close
132
+ end
133
+ end
134
+ begin
135
+ r = ro.read
136
+ yield r if block_given?
137
+ ensure
138
+ ro.close
139
+ end
140
+ t.join
141
+ Process.wait child
142
+ $?.success? or
143
+ log :ERR, "Pipe failed with error code %d." % $?.exitstatus
144
+ $?
145
+ end
146
+
147
+ # :call-seq:
148
+ # obj.sendmail( *tos) -> status
149
+ #
150
+ # Send by sendmail; leave the +tos+ list empty to
151
+ # use Sendmail's -t option.
152
+ #
153
+ def sendmail *tos
154
+ if tos.empty? then
155
+ pipe cls.sendmail, "-t"
156
+ else
157
+ tos.flatten!
158
+ tos.map! { |t|
159
+ case t
160
+ when Addr then t.plain
161
+ else t.delete %q-,;"'<>(){}[]$&*?- # security
162
+ end
163
+ }
164
+ pipe cls.sendmail, *tos
165
+ end
166
+ end
167
+
168
+ # :call-seq:
169
+ # obj.send!( smtp, *tos) -> response
170
+ #
171
+ # Send by SMTP.
172
+ #
173
+ # Be aware that <code>#send</code> without bang is a
174
+ # standard Ruby method.
175
+ #
176
+ def send! conn = nil, *tos
177
+ if tos.empty? then
178
+ tos = receivers.map { |t| t.plain }
179
+ else
180
+ tos.flatten!
181
+ end
182
+ f, m = true, ""
183
+ to_s.each_line { |l|
184
+ if f then
185
+ f = false
186
+ next if l =~ /^From /
187
+ end
188
+ m << l
189
+ }
190
+ open_smtp conn do |smtp|
191
+ log :INF, "Sending to", *tos
192
+ frs = headers.from.map { |f| f.plain }
193
+ smtp.send_message m, frs.first, tos
194
+ end
195
+ rescue NoMethodError
196
+ raise "Missing field: #{$!.name}."
197
+ end
198
+
199
+ private
200
+
201
+ def net_smpt
202
+ Net::SMTP
203
+ rescue NameError
204
+ require "net/smtp" and retry
205
+ end
206
+
207
+ def open_smtp arg, &block
208
+ case arg
209
+ when String then h, p = arg.split ":"
210
+ when Array then h, p = *arg
211
+ when nil then h, p = "localhost", nil
212
+ else
213
+ if arg.respond_to? :send_message then
214
+ yield arg
215
+ return
216
+ else
217
+ h, p = arg.host, arg.port
218
+ end
219
+ end
220
+ net_smpt.start h, p, &block
221
+ end
222
+
223
+ def log level, *msg
224
+ cls.log level, *msg
225
+ end
226
+
227
+ end
228
+
229
+ end
230
+