whistle 0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +3 -0
- data/README.txt +38 -0
- data/bin/whistle +90 -0
- data/lib/config.rb +19 -0
- data/lib/phash.rb +16 -0
- data/lib/relay.rb +24 -0
- data/lib/resource.rb +113 -0
- data/lib/ssl_patch.rb +15 -0
- data/lib/switchbox.rb +54 -0
- data/lib/time_ext.rb +30 -0
- data/lib/version.rb +3 -0
- data/sample/config.yml +12 -0
- data/vendor/rscm-0.5.1-patched-stripped/README +218 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm.rb +14 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/abstract_log_parser.rb +35 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/base.rb +289 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/command_line.rb +146 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/difftool.rb +44 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/line_editor.rb +46 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/mockit.rb +157 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/parser.rb +39 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/path_converter.rb +60 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/platform.rb +26 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/revision.rb +103 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/revision_file.rb +85 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/revision_poller.rb +93 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/revisions.rb +79 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/scm/clearcase.rb +182 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/scm/cvs.rb +374 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/scm/cvs_log_parser.rb +154 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/scm/darcs.rb +120 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/scm/darcs_log_parser.rb +65 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/scm/monotone.rb +338 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/scm/monotone_log_parser.rb +109 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/scm/mooky.rb +6 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/scm/perforce.rb +216 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/scm/star_team.rb +104 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/scm/subversion.rb +397 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/scm/subversion_log_parser.rb +165 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/tempdir.rb +17 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/time_ext.rb +11 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/version.rb +13 -0
- data/vendor/ruby-feedparser-0.5-stripped/README +14 -0
- data/vendor/ruby-feedparser-0.5-stripped/lib/feedparser.rb +28 -0
- data/vendor/ruby-feedparser-0.5-stripped/lib/feedparser/feedparser.rb +300 -0
- data/vendor/ruby-feedparser-0.5-stripped/lib/feedparser/filesizes.rb +12 -0
- data/vendor/ruby-feedparser-0.5-stripped/lib/feedparser/html-output.rb +126 -0
- data/vendor/ruby-feedparser-0.5-stripped/lib/feedparser/html2text-parser.rb +409 -0
- data/vendor/ruby-feedparser-0.5-stripped/lib/feedparser/rexml_patch.rb +28 -0
- data/vendor/ruby-feedparser-0.5-stripped/lib/feedparser/sgml-parser.rb +332 -0
- data/vendor/ruby-feedparser-0.5-stripped/lib/feedparser/text-output.rb +83 -0
- data/vendor/ruby-feedparser-0.5-stripped/lib/feedparser/textconverters.rb +120 -0
- metadata +132 -0
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'feedparser/textconverters'
|
2
|
+
|
3
|
+
# Patch for REXML
|
4
|
+
# Very ugly patch to make REXML error-proof.
|
5
|
+
# The problem is REXML uses IConv, which isn't error-proof at all.
|
6
|
+
# With those changes, it uses unpack/pack with some error handling
|
7
|
+
module REXML
|
8
|
+
module Encoding
|
9
|
+
def decode(str)
|
10
|
+
return str.toUTF8(@encoding)
|
11
|
+
end
|
12
|
+
|
13
|
+
def encode(str)
|
14
|
+
return str
|
15
|
+
end
|
16
|
+
|
17
|
+
def encoding=(enc)
|
18
|
+
return if defined? @encoding and enc == @encoding
|
19
|
+
@encoding = enc || 'utf-8'
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
class Element
|
24
|
+
def children
|
25
|
+
@children
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,332 @@
|
|
1
|
+
# A parser for SGML, using the derived class as static DTD.
|
2
|
+
# from http://raa.ruby-lang.org/project/html-parser
|
3
|
+
module FeedParser
|
4
|
+
class SGMLParser
|
5
|
+
# Regular expressions used for parsing:
|
6
|
+
Interesting = /[&<]/
|
7
|
+
Incomplete = Regexp.compile('&([a-zA-Z][a-zA-Z0-9]*|#[0-9]*)?|' +
|
8
|
+
'<([a-zA-Z][^<>]*|/([a-zA-Z][^<>]*)?|' +
|
9
|
+
'![^<>]*)?')
|
10
|
+
|
11
|
+
Entityref = /&([a-zA-Z][-.a-zA-Z0-9]*);/
|
12
|
+
Charref = /&#([0-9]+);/
|
13
|
+
|
14
|
+
Starttagopen = /<[>a-zA-Z]/
|
15
|
+
Endtagopen = /<\/[<>a-zA-Z]/
|
16
|
+
Endbracket = /[<>]/
|
17
|
+
Special = /<![^<>]*>/
|
18
|
+
Commentopen = /<!--/
|
19
|
+
Commentclose = /--[ \t\n]*>/
|
20
|
+
Tagfind = /[a-zA-Z][a-zA-Z0-9.-]*/
|
21
|
+
Attrfind = Regexp.compile('[\s,]*([a-zA-Z_][a-zA-Z_0-9.-]*)' +
|
22
|
+
'(\s*=\s*' +
|
23
|
+
"('[^']*'" +
|
24
|
+
'|"[^"]*"' +
|
25
|
+
'|[-~a-zA-Z0-9,./:+*%?!()_#=]*))?')
|
26
|
+
|
27
|
+
Entitydefs =
|
28
|
+
{'lt'=>'<', 'gt'=>'>', 'amp'=>'&', 'quot'=>'"', 'apos'=>'\''}
|
29
|
+
|
30
|
+
def initialize(verbose=false)
|
31
|
+
@verbose = verbose
|
32
|
+
reset
|
33
|
+
end
|
34
|
+
|
35
|
+
def reset
|
36
|
+
@rawdata = ''
|
37
|
+
@stack = []
|
38
|
+
@lasttag = '???'
|
39
|
+
@nomoretags = false
|
40
|
+
@literal = false
|
41
|
+
end
|
42
|
+
|
43
|
+
def has_context(gi)
|
44
|
+
@stack.include? gi
|
45
|
+
end
|
46
|
+
|
47
|
+
def setnomoretags
|
48
|
+
@nomoretags = true
|
49
|
+
@literal = true
|
50
|
+
end
|
51
|
+
|
52
|
+
def setliteral(*args)
|
53
|
+
@literal = true
|
54
|
+
end
|
55
|
+
|
56
|
+
def feed(data)
|
57
|
+
@rawdata << data
|
58
|
+
goahead(false)
|
59
|
+
end
|
60
|
+
|
61
|
+
def close
|
62
|
+
goahead(true)
|
63
|
+
end
|
64
|
+
|
65
|
+
def goahead(_end)
|
66
|
+
rawdata = @rawdata
|
67
|
+
i = 0
|
68
|
+
n = rawdata.length
|
69
|
+
while i < n
|
70
|
+
if @nomoretags
|
71
|
+
handle_data(rawdata[i..(n-1)])
|
72
|
+
i = n
|
73
|
+
break
|
74
|
+
end
|
75
|
+
j = rawdata.index(Interesting, i)
|
76
|
+
j = n unless j
|
77
|
+
if i < j
|
78
|
+
handle_data(rawdata[i..(j-1)])
|
79
|
+
end
|
80
|
+
i = j
|
81
|
+
break if (i == n)
|
82
|
+
if rawdata[i] == ?< #
|
83
|
+
if rawdata.index(Starttagopen, i) == i
|
84
|
+
if @literal
|
85
|
+
handle_data(rawdata[i, 1])
|
86
|
+
i += 1
|
87
|
+
next
|
88
|
+
end
|
89
|
+
k = parse_starttag(i)
|
90
|
+
break unless k
|
91
|
+
i = k
|
92
|
+
next
|
93
|
+
end
|
94
|
+
if rawdata.index(Endtagopen, i) == i
|
95
|
+
k = parse_endtag(i)
|
96
|
+
break unless k
|
97
|
+
i = k
|
98
|
+
@literal = false
|
99
|
+
next
|
100
|
+
end
|
101
|
+
if rawdata.index(Commentopen, i) == i
|
102
|
+
if @literal
|
103
|
+
handle_data(rawdata[i,1])
|
104
|
+
i += 1
|
105
|
+
next
|
106
|
+
end
|
107
|
+
k = parse_comment(i)
|
108
|
+
break unless k
|
109
|
+
i += k
|
110
|
+
next
|
111
|
+
end
|
112
|
+
if rawdata.index(Special, i) == i
|
113
|
+
if @literal
|
114
|
+
handle_data(rawdata[i, 1])
|
115
|
+
i += 1
|
116
|
+
next
|
117
|
+
end
|
118
|
+
k = parse_special(i)
|
119
|
+
break unless k
|
120
|
+
i += k
|
121
|
+
next
|
122
|
+
end
|
123
|
+
elsif rawdata[i] == ?& #
|
124
|
+
if rawdata.index(Charref, i) == i
|
125
|
+
i += $&.length
|
126
|
+
handle_charref($1)
|
127
|
+
i -= 1 unless rawdata[i-1] == ?;
|
128
|
+
next
|
129
|
+
end
|
130
|
+
if rawdata.index(Entityref, i) == i
|
131
|
+
i += $&.length
|
132
|
+
handle_entityref($1)
|
133
|
+
i -= 1 unless rawdata[i-1] == ?;
|
134
|
+
next
|
135
|
+
end
|
136
|
+
else
|
137
|
+
raise RuntimeError, 'neither < nor & ??'
|
138
|
+
end
|
139
|
+
# We get here only if incomplete matches but
|
140
|
+
# nothing else
|
141
|
+
match = rawdata.index(Incomplete, i)
|
142
|
+
unless match == i
|
143
|
+
handle_data(rawdata[i, 1])
|
144
|
+
i += 1
|
145
|
+
next
|
146
|
+
end
|
147
|
+
j = match + $&.length
|
148
|
+
break if j == n # Really incomplete
|
149
|
+
handle_data(rawdata[i..(j-1)])
|
150
|
+
i = j
|
151
|
+
end
|
152
|
+
# end while
|
153
|
+
if _end and i < n
|
154
|
+
handle_data(@rawdata[i..(n-1)])
|
155
|
+
i = n
|
156
|
+
end
|
157
|
+
@rawdata = rawdata[i..-1]
|
158
|
+
end
|
159
|
+
|
160
|
+
def parse_comment(i)
|
161
|
+
rawdata = @rawdata
|
162
|
+
if rawdata[i, 4] != '<!--'
|
163
|
+
raise RuntimeError, 'unexpected call to handle_comment'
|
164
|
+
end
|
165
|
+
match = rawdata.index(Commentclose, i)
|
166
|
+
return nil unless match
|
167
|
+
matched_length = $&.length
|
168
|
+
j = match
|
169
|
+
handle_comment(rawdata[i+4..(j-1)])
|
170
|
+
j = match + matched_length
|
171
|
+
return j-i
|
172
|
+
end
|
173
|
+
|
174
|
+
def parse_starttag(i)
|
175
|
+
rawdata = @rawdata
|
176
|
+
j = rawdata.index(Endbracket, i + 1)
|
177
|
+
return nil unless j
|
178
|
+
attrs = []
|
179
|
+
if rawdata[i+1] == ?> #
|
180
|
+
# SGML shorthand: <> == <last open tag seen>
|
181
|
+
k = j
|
182
|
+
tag = @lasttag
|
183
|
+
else
|
184
|
+
match = rawdata.index(Tagfind, i + 1)
|
185
|
+
unless match
|
186
|
+
raise RuntimeError, 'unexpected call to parse_starttag'
|
187
|
+
end
|
188
|
+
k = i + 1 + ($&.length)
|
189
|
+
tag = $&.downcase
|
190
|
+
@lasttag = tag
|
191
|
+
end
|
192
|
+
while k < j
|
193
|
+
break unless rawdata.index(Attrfind, k)
|
194
|
+
matched_length = $&.length
|
195
|
+
attrname, rest, attrvalue = $1, $2, $3
|
196
|
+
if not rest
|
197
|
+
attrvalue = '' # was: = attrname
|
198
|
+
elsif (attrvalue[0] == ?' && attrvalue[-1] == ?') or
|
199
|
+
(attrvalue[0] == ?" && attrvalue[-1,1] == ?")
|
200
|
+
attrvalue = attrvalue[1..-2]
|
201
|
+
end
|
202
|
+
attrs << [attrname.downcase, attrvalue]
|
203
|
+
k += matched_length
|
204
|
+
end
|
205
|
+
if rawdata[j] == ?> #
|
206
|
+
j += 1
|
207
|
+
end
|
208
|
+
finish_starttag(tag, attrs)
|
209
|
+
return j
|
210
|
+
end
|
211
|
+
|
212
|
+
def parse_endtag(i)
|
213
|
+
rawdata = @rawdata
|
214
|
+
j = rawdata.index(Endbracket, i + 1)
|
215
|
+
return nil unless j
|
216
|
+
tag = (rawdata[i+2..j-1].strip).downcase
|
217
|
+
if rawdata[j] == ?> #
|
218
|
+
j += 1
|
219
|
+
end
|
220
|
+
finish_endtag(tag)
|
221
|
+
return j
|
222
|
+
end
|
223
|
+
|
224
|
+
def finish_starttag(tag, attrs)
|
225
|
+
method = 'start_' + tag
|
226
|
+
if self.respond_to?(method)
|
227
|
+
@stack << tag
|
228
|
+
handle_starttag(tag, method, attrs)
|
229
|
+
return 1
|
230
|
+
else
|
231
|
+
method = 'do_' + tag
|
232
|
+
if self.respond_to?(method)
|
233
|
+
handle_starttag(tag, method, attrs)
|
234
|
+
return 0
|
235
|
+
else
|
236
|
+
unknown_starttag(tag, attrs)
|
237
|
+
return -1
|
238
|
+
end
|
239
|
+
end
|
240
|
+
end
|
241
|
+
|
242
|
+
def finish_endtag(tag)
|
243
|
+
if tag == ''
|
244
|
+
found = @stack.length - 1
|
245
|
+
if found < 0
|
246
|
+
unknown_endtag(tag)
|
247
|
+
return
|
248
|
+
end
|
249
|
+
else
|
250
|
+
unless @stack.include? tag
|
251
|
+
method = 'end_' + tag
|
252
|
+
unless self.respond_to?(method)
|
253
|
+
unknown_endtag(tag)
|
254
|
+
end
|
255
|
+
return
|
256
|
+
end
|
257
|
+
found = @stack.index(tag) #or @stack.length
|
258
|
+
end
|
259
|
+
while @stack.length > found
|
260
|
+
tag = @stack[-1]
|
261
|
+
method = 'end_' + tag
|
262
|
+
if respond_to?(method)
|
263
|
+
handle_endtag(tag, method)
|
264
|
+
else
|
265
|
+
unknown_endtag(tag)
|
266
|
+
end
|
267
|
+
@stack.pop
|
268
|
+
end
|
269
|
+
end
|
270
|
+
|
271
|
+
def parse_special(i)
|
272
|
+
rawdata = @rawdata
|
273
|
+
match = rawdata.index(Endbracket, i+1)
|
274
|
+
return nil unless match
|
275
|
+
matched_length = $&.length
|
276
|
+
handle_special(rawdata[i+1..(match-1)])
|
277
|
+
return match - i + matched_length
|
278
|
+
end
|
279
|
+
|
280
|
+
def handle_starttag(tag, method, attrs)
|
281
|
+
self.send(method, attrs)
|
282
|
+
end
|
283
|
+
|
284
|
+
def handle_endtag(tag, method)
|
285
|
+
self.send(method)
|
286
|
+
end
|
287
|
+
|
288
|
+
def report_unbalanced(tag)
|
289
|
+
if @verbose
|
290
|
+
print '*** Unbalanced </' + tag + '>', "\n"
|
291
|
+
print '*** Stack:', self.stack, "\n"
|
292
|
+
end
|
293
|
+
end
|
294
|
+
|
295
|
+
def handle_charref(name)
|
296
|
+
n = name.to_i
|
297
|
+
if !(0 <= n && n <= 255)
|
298
|
+
unknown_charref(name)
|
299
|
+
return
|
300
|
+
end
|
301
|
+
handle_data(n.chr)
|
302
|
+
end
|
303
|
+
|
304
|
+
def handle_entityref(name)
|
305
|
+
table = Entitydefs
|
306
|
+
if table.include?(name)
|
307
|
+
handle_data(table[name])
|
308
|
+
else
|
309
|
+
unknown_entityref(name)
|
310
|
+
return
|
311
|
+
end
|
312
|
+
end
|
313
|
+
|
314
|
+
def handle_data(data)
|
315
|
+
end
|
316
|
+
|
317
|
+
def handle_comment(data)
|
318
|
+
end
|
319
|
+
|
320
|
+
def handle_special(data)
|
321
|
+
end
|
322
|
+
|
323
|
+
def unknown_starttag(tag, attrs)
|
324
|
+
end
|
325
|
+
def unknown_endtag(tag)
|
326
|
+
end
|
327
|
+
def unknown_charref(ref)
|
328
|
+
end
|
329
|
+
def unknown_entityref(ref)
|
330
|
+
end
|
331
|
+
end
|
332
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require 'feedparser'
|
2
|
+
require 'feedparser/html2text-parser'
|
3
|
+
require 'feedparser/filesizes'
|
4
|
+
|
5
|
+
class String
|
6
|
+
# Convert an HTML text to plain text
|
7
|
+
def html2text
|
8
|
+
text = self.clone
|
9
|
+
# parse HTML
|
10
|
+
p = FeedParser::HTML2TextParser::new(true)
|
11
|
+
p.feed(text)
|
12
|
+
p.close
|
13
|
+
text = p.savedata
|
14
|
+
# remove leading and trailing whilespace
|
15
|
+
text.gsub!(/\A\s*/m, '')
|
16
|
+
text.gsub!(/\s*\Z/m, '')
|
17
|
+
# remove whitespace around \n
|
18
|
+
text.gsub!(/ *\n/m, "\n")
|
19
|
+
text.gsub!(/\n */m, "\n")
|
20
|
+
# and duplicates \n
|
21
|
+
text.gsub!(/\n\n+/m, "\n\n")
|
22
|
+
# and remove duplicated whitespace
|
23
|
+
text.gsub!(/[ \t]+/, ' ')
|
24
|
+
text
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
module FeedParser
|
29
|
+
class Feed
|
30
|
+
def to_text(localtime = true)
|
31
|
+
s = ''
|
32
|
+
s += "Type: #{@type}\n"
|
33
|
+
s += "Encoding: #{@encoding}\n"
|
34
|
+
s += "Title: #{@title}\n"
|
35
|
+
s += "Link: #{@link}\n"
|
36
|
+
if @description
|
37
|
+
s += "Description: #{@description.html2text}\n"
|
38
|
+
else
|
39
|
+
s += "Description:\n"
|
40
|
+
end
|
41
|
+
s += "Creator: #{@creator}\n"
|
42
|
+
s += "\n"
|
43
|
+
@items.each do |i|
|
44
|
+
s += '*' * 40 + "\n"
|
45
|
+
s += i.to_text(localtime)
|
46
|
+
end
|
47
|
+
s
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
class FeedItem
|
52
|
+
def to_text(localtime = true)
|
53
|
+
s = ""
|
54
|
+
s += "Feed: "
|
55
|
+
s += @feed.title + ' ' if @feed.title
|
56
|
+
s += "<#{@feed.link}>" if @feed.link
|
57
|
+
s += "\n"
|
58
|
+
s += "Item: "
|
59
|
+
s += @title + ' ' if @title
|
60
|
+
s += "<#{@link}>" if @link
|
61
|
+
s += "\n"
|
62
|
+
if @date
|
63
|
+
if localtime
|
64
|
+
s += "\nDate: #{@date.to_s}"
|
65
|
+
else
|
66
|
+
s += "\nDate: #{@date.getutc.to_s}"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
s += "\nAuthor: #{@creator}" if @creator
|
70
|
+
s += "\nSubject: #{@subject}" if @subject
|
71
|
+
s += "\nCategory: #{@category}" if @category
|
72
|
+
s += "\n\n"
|
73
|
+
s += "#{@content.html2text}\n" if @content
|
74
|
+
if @enclosures and @enclosures.length > 0
|
75
|
+
s += "Files:\n"
|
76
|
+
@enclosures.each do |e|
|
77
|
+
s += " #{e[0]} (#{e[1].to_i.to_human_readable}, #{e[2]})\n"
|
78
|
+
end
|
79
|
+
end
|
80
|
+
s
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
# for URI::regexp
|
2
|
+
require 'uri'
|
3
|
+
require 'feedparser/html2text-parser'
|
4
|
+
|
5
|
+
# This class provides various converters
|
6
|
+
class String
|
7
|
+
# is this text HTML ? search for tags. used by String#text2html
|
8
|
+
def html?
|
9
|
+
return (self =~ /<p>/i) || (self =~ /<\/p>/i) || (self =~ /<br>/i) || (self =~ /<br\s*(\/)?\s*>/i) || (self =~ /<\/a>/i) || (self =~ /<img.*>/i)
|
10
|
+
end
|
11
|
+
|
12
|
+
# returns true if the text contains escaped HTML (with HTML entities). used by String#text2html
|
13
|
+
def escaped_html?
|
14
|
+
return (self =~ /<img src=/i) || (self =~ /<a href=/i) || (self =~ /<br(\/| \/|)>/i) || (self =~ /<p>/i)
|
15
|
+
end
|
16
|
+
|
17
|
+
def escape_html
|
18
|
+
r = self.gsub('&', '&')
|
19
|
+
r = r.gsub('<', '<')
|
20
|
+
r = r.gsub('>', '>')
|
21
|
+
r
|
22
|
+
end
|
23
|
+
|
24
|
+
MY_ENTITIES = {}
|
25
|
+
FeedParser::HTML2TextParser::entities.each do |k, v|
|
26
|
+
MY_ENTITIES["&#{k};"] = [v].pack('U*')
|
27
|
+
MY_ENTITIES["&##{v};"] = [v].pack('U*')
|
28
|
+
end
|
29
|
+
|
30
|
+
# un-escape HTML in the text. used by String#text2html
|
31
|
+
def unescape_html
|
32
|
+
r = self
|
33
|
+
MY_ENTITIES.each do |k, v|
|
34
|
+
r = r.gsub(k, v)
|
35
|
+
end
|
36
|
+
r
|
37
|
+
end
|
38
|
+
|
39
|
+
# convert text to HTML
|
40
|
+
def text2html(feed)
|
41
|
+
text = self.clone
|
42
|
+
realhtml = text.html?
|
43
|
+
eschtml = text.escaped_html?
|
44
|
+
# fix for RSS feeds with both real and escaped html (crazy!):
|
45
|
+
# we take the first one
|
46
|
+
if (realhtml && eschtml)
|
47
|
+
if (realhtml < eschtml)
|
48
|
+
eschtml = nil
|
49
|
+
else
|
50
|
+
realhtml = nil
|
51
|
+
end
|
52
|
+
end
|
53
|
+
if realhtml
|
54
|
+
# do nothing
|
55
|
+
elsif eschtml
|
56
|
+
text = text.unescape_html
|
57
|
+
else
|
58
|
+
# paragraphs
|
59
|
+
text.gsub!(/\A\s*(.*)\Z/m, '<p>\1</p>')
|
60
|
+
text.gsub!(/\s*\n(\s*\n)+\s*/, "</p>\n<p>")
|
61
|
+
# uris
|
62
|
+
text.gsub!(/(#{URI::regexp(['http','ftp','https'])})/,
|
63
|
+
'<a href="\1">\1</a>')
|
64
|
+
end
|
65
|
+
# Handle broken hrefs in <a> and <img>
|
66
|
+
if feed and feed.link
|
67
|
+
text.gsub!(/(\s(src|href)=['"])([^'"]*)(['"])/) do |m|
|
68
|
+
begin
|
69
|
+
first, url, last = $1, $3, $4
|
70
|
+
if (url =~ /^\s*\w+:\/\//) or (url =~ /^\s*\w+:\w/)
|
71
|
+
m
|
72
|
+
elsif url =~ /^\//
|
73
|
+
(first + feed.link.split(/\//)[0..2].join('/') + url + last)
|
74
|
+
else
|
75
|
+
t = feed.link.split(/\//)
|
76
|
+
if t.length == 3 # http://toto with no trailing /
|
77
|
+
(first + feed.link + '/' + url + last)
|
78
|
+
else
|
79
|
+
if feed.link =~ /\/$/
|
80
|
+
(first + feed.link + url + last)
|
81
|
+
else
|
82
|
+
(first + t[0...-1].join('/') + '/' + url + last)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
rescue
|
87
|
+
m
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
text
|
92
|
+
end
|
93
|
+
|
94
|
+
# Remove white space around the text
|
95
|
+
def rmWhiteSpace!
|
96
|
+
return self.gsub!(/\A\s*/m, '').gsub!(/\s*\Z/m,'')
|
97
|
+
end
|
98
|
+
|
99
|
+
# Convert a text in inputenc to a text in UTF8
|
100
|
+
# must take care of wrong input locales
|
101
|
+
def toUTF8(inputenc)
|
102
|
+
if inputenc.downcase != 'utf-8'
|
103
|
+
# it is said it is not UTF-8. Ensure it is REALLY not UTF-8
|
104
|
+
begin
|
105
|
+
if self.unpack('U*').pack('U*') == self
|
106
|
+
return self
|
107
|
+
end
|
108
|
+
rescue
|
109
|
+
# do nothing
|
110
|
+
end
|
111
|
+
begin
|
112
|
+
return self.unpack('C*').pack('U*')
|
113
|
+
rescue
|
114
|
+
return self #failsafe solution. but a dirty one :-)
|
115
|
+
end
|
116
|
+
else
|
117
|
+
return self
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|