whistle 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +3 -0
- data/README.txt +38 -0
- data/bin/whistle +90 -0
- data/lib/config.rb +19 -0
- data/lib/phash.rb +16 -0
- data/lib/relay.rb +24 -0
- data/lib/resource.rb +113 -0
- data/lib/ssl_patch.rb +15 -0
- data/lib/switchbox.rb +54 -0
- data/lib/time_ext.rb +30 -0
- data/lib/version.rb +3 -0
- data/sample/config.yml +12 -0
- data/vendor/rscm-0.5.1-patched-stripped/README +218 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm.rb +14 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/abstract_log_parser.rb +35 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/base.rb +289 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/command_line.rb +146 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/difftool.rb +44 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/line_editor.rb +46 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/mockit.rb +157 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/parser.rb +39 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/path_converter.rb +60 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/platform.rb +26 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/revision.rb +103 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/revision_file.rb +85 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/revision_poller.rb +93 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/revisions.rb +79 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/scm/clearcase.rb +182 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/scm/cvs.rb +374 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/scm/cvs_log_parser.rb +154 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/scm/darcs.rb +120 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/scm/darcs_log_parser.rb +65 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/scm/monotone.rb +338 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/scm/monotone_log_parser.rb +109 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/scm/mooky.rb +6 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/scm/perforce.rb +216 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/scm/star_team.rb +104 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/scm/subversion.rb +397 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/scm/subversion_log_parser.rb +165 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/tempdir.rb +17 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/time_ext.rb +11 -0
- data/vendor/rscm-0.5.1-patched-stripped/lib/rscm/version.rb +13 -0
- data/vendor/ruby-feedparser-0.5-stripped/README +14 -0
- data/vendor/ruby-feedparser-0.5-stripped/lib/feedparser.rb +28 -0
- data/vendor/ruby-feedparser-0.5-stripped/lib/feedparser/feedparser.rb +300 -0
- data/vendor/ruby-feedparser-0.5-stripped/lib/feedparser/filesizes.rb +12 -0
- data/vendor/ruby-feedparser-0.5-stripped/lib/feedparser/html-output.rb +126 -0
- data/vendor/ruby-feedparser-0.5-stripped/lib/feedparser/html2text-parser.rb +409 -0
- data/vendor/ruby-feedparser-0.5-stripped/lib/feedparser/rexml_patch.rb +28 -0
- data/vendor/ruby-feedparser-0.5-stripped/lib/feedparser/sgml-parser.rb +332 -0
- data/vendor/ruby-feedparser-0.5-stripped/lib/feedparser/text-output.rb +83 -0
- data/vendor/ruby-feedparser-0.5-stripped/lib/feedparser/textconverters.rb +120 -0
- metadata +132 -0
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'feedparser/textconverters'
|
2
|
+
|
3
|
+
# Patch for REXML
|
4
|
+
# Very ugly patch to make REXML error-proof.
|
5
|
+
# The problem is REXML uses IConv, which isn't error-proof at all.
|
6
|
+
# With those changes, it uses unpack/pack with some error handling
|
7
|
+
module REXML
|
8
|
+
module Encoding
|
9
|
+
def decode(str)
|
10
|
+
return str.toUTF8(@encoding)
|
11
|
+
end
|
12
|
+
|
13
|
+
def encode(str)
|
14
|
+
return str
|
15
|
+
end
|
16
|
+
|
17
|
+
def encoding=(enc)
|
18
|
+
return if defined? @encoding and enc == @encoding
|
19
|
+
@encoding = enc || 'utf-8'
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
class Element
|
24
|
+
def children
|
25
|
+
@children
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,332 @@
|
|
1
|
+
# A parser for SGML, using the derived class as static DTD.
|
2
|
+
# from http://raa.ruby-lang.org/project/html-parser
|
3
|
+
module FeedParser
|
4
|
+
class SGMLParser
|
5
|
+
# Regular expressions used for parsing:
|
6
|
+
Interesting = /[&<]/
|
7
|
+
Incomplete = Regexp.compile('&([a-zA-Z][a-zA-Z0-9]*|#[0-9]*)?|' +
|
8
|
+
'<([a-zA-Z][^<>]*|/([a-zA-Z][^<>]*)?|' +
|
9
|
+
'![^<>]*)?')
|
10
|
+
|
11
|
+
Entityref = /&([a-zA-Z][-.a-zA-Z0-9]*);/
|
12
|
+
Charref = /&#([0-9]+);/
|
13
|
+
|
14
|
+
Starttagopen = /<[>a-zA-Z]/
|
15
|
+
Endtagopen = /<\/[<>a-zA-Z]/
|
16
|
+
Endbracket = /[<>]/
|
17
|
+
Special = /<![^<>]*>/
|
18
|
+
Commentopen = /<!--/
|
19
|
+
Commentclose = /--[ \t\n]*>/
|
20
|
+
Tagfind = /[a-zA-Z][a-zA-Z0-9.-]*/
|
21
|
+
Attrfind = Regexp.compile('[\s,]*([a-zA-Z_][a-zA-Z_0-9.-]*)' +
|
22
|
+
'(\s*=\s*' +
|
23
|
+
"('[^']*'" +
|
24
|
+
'|"[^"]*"' +
|
25
|
+
'|[-~a-zA-Z0-9,./:+*%?!()_#=]*))?')
|
26
|
+
|
27
|
+
Entitydefs =
|
28
|
+
{'lt'=>'<', 'gt'=>'>', 'amp'=>'&', 'quot'=>'"', 'apos'=>'\''}
|
29
|
+
|
30
|
+
def initialize(verbose=false)
|
31
|
+
@verbose = verbose
|
32
|
+
reset
|
33
|
+
end
|
34
|
+
|
35
|
+
def reset
|
36
|
+
@rawdata = ''
|
37
|
+
@stack = []
|
38
|
+
@lasttag = '???'
|
39
|
+
@nomoretags = false
|
40
|
+
@literal = false
|
41
|
+
end
|
42
|
+
|
43
|
+
def has_context(gi)
|
44
|
+
@stack.include? gi
|
45
|
+
end
|
46
|
+
|
47
|
+
def setnomoretags
|
48
|
+
@nomoretags = true
|
49
|
+
@literal = true
|
50
|
+
end
|
51
|
+
|
52
|
+
def setliteral(*args)
|
53
|
+
@literal = true
|
54
|
+
end
|
55
|
+
|
56
|
+
def feed(data)
|
57
|
+
@rawdata << data
|
58
|
+
goahead(false)
|
59
|
+
end
|
60
|
+
|
61
|
+
def close
|
62
|
+
goahead(true)
|
63
|
+
end
|
64
|
+
|
65
|
+
def goahead(_end)
|
66
|
+
rawdata = @rawdata
|
67
|
+
i = 0
|
68
|
+
n = rawdata.length
|
69
|
+
while i < n
|
70
|
+
if @nomoretags
|
71
|
+
handle_data(rawdata[i..(n-1)])
|
72
|
+
i = n
|
73
|
+
break
|
74
|
+
end
|
75
|
+
j = rawdata.index(Interesting, i)
|
76
|
+
j = n unless j
|
77
|
+
if i < j
|
78
|
+
handle_data(rawdata[i..(j-1)])
|
79
|
+
end
|
80
|
+
i = j
|
81
|
+
break if (i == n)
|
82
|
+
if rawdata[i] == ?< #
|
83
|
+
if rawdata.index(Starttagopen, i) == i
|
84
|
+
if @literal
|
85
|
+
handle_data(rawdata[i, 1])
|
86
|
+
i += 1
|
87
|
+
next
|
88
|
+
end
|
89
|
+
k = parse_starttag(i)
|
90
|
+
break unless k
|
91
|
+
i = k
|
92
|
+
next
|
93
|
+
end
|
94
|
+
if rawdata.index(Endtagopen, i) == i
|
95
|
+
k = parse_endtag(i)
|
96
|
+
break unless k
|
97
|
+
i = k
|
98
|
+
@literal = false
|
99
|
+
next
|
100
|
+
end
|
101
|
+
if rawdata.index(Commentopen, i) == i
|
102
|
+
if @literal
|
103
|
+
handle_data(rawdata[i,1])
|
104
|
+
i += 1
|
105
|
+
next
|
106
|
+
end
|
107
|
+
k = parse_comment(i)
|
108
|
+
break unless k
|
109
|
+
i += k
|
110
|
+
next
|
111
|
+
end
|
112
|
+
if rawdata.index(Special, i) == i
|
113
|
+
if @literal
|
114
|
+
handle_data(rawdata[i, 1])
|
115
|
+
i += 1
|
116
|
+
next
|
117
|
+
end
|
118
|
+
k = parse_special(i)
|
119
|
+
break unless k
|
120
|
+
i += k
|
121
|
+
next
|
122
|
+
end
|
123
|
+
elsif rawdata[i] == ?& #
|
124
|
+
if rawdata.index(Charref, i) == i
|
125
|
+
i += $&.length
|
126
|
+
handle_charref($1)
|
127
|
+
i -= 1 unless rawdata[i-1] == ?;
|
128
|
+
next
|
129
|
+
end
|
130
|
+
if rawdata.index(Entityref, i) == i
|
131
|
+
i += $&.length
|
132
|
+
handle_entityref($1)
|
133
|
+
i -= 1 unless rawdata[i-1] == ?;
|
134
|
+
next
|
135
|
+
end
|
136
|
+
else
|
137
|
+
raise RuntimeError, 'neither < nor & ??'
|
138
|
+
end
|
139
|
+
# We get here only if incomplete matches but
|
140
|
+
# nothing else
|
141
|
+
match = rawdata.index(Incomplete, i)
|
142
|
+
unless match == i
|
143
|
+
handle_data(rawdata[i, 1])
|
144
|
+
i += 1
|
145
|
+
next
|
146
|
+
end
|
147
|
+
j = match + $&.length
|
148
|
+
break if j == n # Really incomplete
|
149
|
+
handle_data(rawdata[i..(j-1)])
|
150
|
+
i = j
|
151
|
+
end
|
152
|
+
# end while
|
153
|
+
if _end and i < n
|
154
|
+
handle_data(@rawdata[i..(n-1)])
|
155
|
+
i = n
|
156
|
+
end
|
157
|
+
@rawdata = rawdata[i..-1]
|
158
|
+
end
|
159
|
+
|
160
|
+
def parse_comment(i)
|
161
|
+
rawdata = @rawdata
|
162
|
+
if rawdata[i, 4] != '<!--'
|
163
|
+
raise RuntimeError, 'unexpected call to handle_comment'
|
164
|
+
end
|
165
|
+
match = rawdata.index(Commentclose, i)
|
166
|
+
return nil unless match
|
167
|
+
matched_length = $&.length
|
168
|
+
j = match
|
169
|
+
handle_comment(rawdata[i+4..(j-1)])
|
170
|
+
j = match + matched_length
|
171
|
+
return j-i
|
172
|
+
end
|
173
|
+
|
174
|
+
def parse_starttag(i)
|
175
|
+
rawdata = @rawdata
|
176
|
+
j = rawdata.index(Endbracket, i + 1)
|
177
|
+
return nil unless j
|
178
|
+
attrs = []
|
179
|
+
if rawdata[i+1] == ?> #
|
180
|
+
# SGML shorthand: <> == <last open tag seen>
|
181
|
+
k = j
|
182
|
+
tag = @lasttag
|
183
|
+
else
|
184
|
+
match = rawdata.index(Tagfind, i + 1)
|
185
|
+
unless match
|
186
|
+
raise RuntimeError, 'unexpected call to parse_starttag'
|
187
|
+
end
|
188
|
+
k = i + 1 + ($&.length)
|
189
|
+
tag = $&.downcase
|
190
|
+
@lasttag = tag
|
191
|
+
end
|
192
|
+
while k < j
|
193
|
+
break unless rawdata.index(Attrfind, k)
|
194
|
+
matched_length = $&.length
|
195
|
+
attrname, rest, attrvalue = $1, $2, $3
|
196
|
+
if not rest
|
197
|
+
attrvalue = '' # was: = attrname
|
198
|
+
elsif (attrvalue[0] == ?' && attrvalue[-1] == ?') or
|
199
|
+
(attrvalue[0] == ?" && attrvalue[-1,1] == ?")
|
200
|
+
attrvalue = attrvalue[1..-2]
|
201
|
+
end
|
202
|
+
attrs << [attrname.downcase, attrvalue]
|
203
|
+
k += matched_length
|
204
|
+
end
|
205
|
+
if rawdata[j] == ?> #
|
206
|
+
j += 1
|
207
|
+
end
|
208
|
+
finish_starttag(tag, attrs)
|
209
|
+
return j
|
210
|
+
end
|
211
|
+
|
212
|
+
def parse_endtag(i)
|
213
|
+
rawdata = @rawdata
|
214
|
+
j = rawdata.index(Endbracket, i + 1)
|
215
|
+
return nil unless j
|
216
|
+
tag = (rawdata[i+2..j-1].strip).downcase
|
217
|
+
if rawdata[j] == ?> #
|
218
|
+
j += 1
|
219
|
+
end
|
220
|
+
finish_endtag(tag)
|
221
|
+
return j
|
222
|
+
end
|
223
|
+
|
224
|
+
def finish_starttag(tag, attrs)
|
225
|
+
method = 'start_' + tag
|
226
|
+
if self.respond_to?(method)
|
227
|
+
@stack << tag
|
228
|
+
handle_starttag(tag, method, attrs)
|
229
|
+
return 1
|
230
|
+
else
|
231
|
+
method = 'do_' + tag
|
232
|
+
if self.respond_to?(method)
|
233
|
+
handle_starttag(tag, method, attrs)
|
234
|
+
return 0
|
235
|
+
else
|
236
|
+
unknown_starttag(tag, attrs)
|
237
|
+
return -1
|
238
|
+
end
|
239
|
+
end
|
240
|
+
end
|
241
|
+
|
242
|
+
def finish_endtag(tag)
|
243
|
+
if tag == ''
|
244
|
+
found = @stack.length - 1
|
245
|
+
if found < 0
|
246
|
+
unknown_endtag(tag)
|
247
|
+
return
|
248
|
+
end
|
249
|
+
else
|
250
|
+
unless @stack.include? tag
|
251
|
+
method = 'end_' + tag
|
252
|
+
unless self.respond_to?(method)
|
253
|
+
unknown_endtag(tag)
|
254
|
+
end
|
255
|
+
return
|
256
|
+
end
|
257
|
+
found = @stack.index(tag) #or @stack.length
|
258
|
+
end
|
259
|
+
while @stack.length > found
|
260
|
+
tag = @stack[-1]
|
261
|
+
method = 'end_' + tag
|
262
|
+
if respond_to?(method)
|
263
|
+
handle_endtag(tag, method)
|
264
|
+
else
|
265
|
+
unknown_endtag(tag)
|
266
|
+
end
|
267
|
+
@stack.pop
|
268
|
+
end
|
269
|
+
end
|
270
|
+
|
271
|
+
def parse_special(i)
|
272
|
+
rawdata = @rawdata
|
273
|
+
match = rawdata.index(Endbracket, i+1)
|
274
|
+
return nil unless match
|
275
|
+
matched_length = $&.length
|
276
|
+
handle_special(rawdata[i+1..(match-1)])
|
277
|
+
return match - i + matched_length
|
278
|
+
end
|
279
|
+
|
280
|
+
def handle_starttag(tag, method, attrs)
|
281
|
+
self.send(method, attrs)
|
282
|
+
end
|
283
|
+
|
284
|
+
def handle_endtag(tag, method)
|
285
|
+
self.send(method)
|
286
|
+
end
|
287
|
+
|
288
|
+
def report_unbalanced(tag)
|
289
|
+
if @verbose
|
290
|
+
print '*** Unbalanced </' + tag + '>', "\n"
|
291
|
+
print '*** Stack:', self.stack, "\n"
|
292
|
+
end
|
293
|
+
end
|
294
|
+
|
295
|
+
def handle_charref(name)
|
296
|
+
n = name.to_i
|
297
|
+
if !(0 <= n && n <= 255)
|
298
|
+
unknown_charref(name)
|
299
|
+
return
|
300
|
+
end
|
301
|
+
handle_data(n.chr)
|
302
|
+
end
|
303
|
+
|
304
|
+
def handle_entityref(name)
|
305
|
+
table = Entitydefs
|
306
|
+
if table.include?(name)
|
307
|
+
handle_data(table[name])
|
308
|
+
else
|
309
|
+
unknown_entityref(name)
|
310
|
+
return
|
311
|
+
end
|
312
|
+
end
|
313
|
+
|
314
|
+
def handle_data(data)
|
315
|
+
end
|
316
|
+
|
317
|
+
def handle_comment(data)
|
318
|
+
end
|
319
|
+
|
320
|
+
def handle_special(data)
|
321
|
+
end
|
322
|
+
|
323
|
+
def unknown_starttag(tag, attrs)
|
324
|
+
end
|
325
|
+
def unknown_endtag(tag)
|
326
|
+
end
|
327
|
+
def unknown_charref(ref)
|
328
|
+
end
|
329
|
+
def unknown_entityref(ref)
|
330
|
+
end
|
331
|
+
end
|
332
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require 'feedparser'
|
2
|
+
require 'feedparser/html2text-parser'
|
3
|
+
require 'feedparser/filesizes'
|
4
|
+
|
5
|
+
class String
|
6
|
+
# Convert an HTML text to plain text
|
7
|
+
def html2text
|
8
|
+
text = self.clone
|
9
|
+
# parse HTML
|
10
|
+
p = FeedParser::HTML2TextParser::new(true)
|
11
|
+
p.feed(text)
|
12
|
+
p.close
|
13
|
+
text = p.savedata
|
14
|
+
# remove leading and trailing whilespace
|
15
|
+
text.gsub!(/\A\s*/m, '')
|
16
|
+
text.gsub!(/\s*\Z/m, '')
|
17
|
+
# remove whitespace around \n
|
18
|
+
text.gsub!(/ *\n/m, "\n")
|
19
|
+
text.gsub!(/\n */m, "\n")
|
20
|
+
# and duplicates \n
|
21
|
+
text.gsub!(/\n\n+/m, "\n\n")
|
22
|
+
# and remove duplicated whitespace
|
23
|
+
text.gsub!(/[ \t]+/, ' ')
|
24
|
+
text
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
module FeedParser
|
29
|
+
class Feed
|
30
|
+
def to_text(localtime = true)
|
31
|
+
s = ''
|
32
|
+
s += "Type: #{@type}\n"
|
33
|
+
s += "Encoding: #{@encoding}\n"
|
34
|
+
s += "Title: #{@title}\n"
|
35
|
+
s += "Link: #{@link}\n"
|
36
|
+
if @description
|
37
|
+
s += "Description: #{@description.html2text}\n"
|
38
|
+
else
|
39
|
+
s += "Description:\n"
|
40
|
+
end
|
41
|
+
s += "Creator: #{@creator}\n"
|
42
|
+
s += "\n"
|
43
|
+
@items.each do |i|
|
44
|
+
s += '*' * 40 + "\n"
|
45
|
+
s += i.to_text(localtime)
|
46
|
+
end
|
47
|
+
s
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
class FeedItem
|
52
|
+
def to_text(localtime = true)
|
53
|
+
s = ""
|
54
|
+
s += "Feed: "
|
55
|
+
s += @feed.title + ' ' if @feed.title
|
56
|
+
s += "<#{@feed.link}>" if @feed.link
|
57
|
+
s += "\n"
|
58
|
+
s += "Item: "
|
59
|
+
s += @title + ' ' if @title
|
60
|
+
s += "<#{@link}>" if @link
|
61
|
+
s += "\n"
|
62
|
+
if @date
|
63
|
+
if localtime
|
64
|
+
s += "\nDate: #{@date.to_s}"
|
65
|
+
else
|
66
|
+
s += "\nDate: #{@date.getutc.to_s}"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
s += "\nAuthor: #{@creator}" if @creator
|
70
|
+
s += "\nSubject: #{@subject}" if @subject
|
71
|
+
s += "\nCategory: #{@category}" if @category
|
72
|
+
s += "\n\n"
|
73
|
+
s += "#{@content.html2text}\n" if @content
|
74
|
+
if @enclosures and @enclosures.length > 0
|
75
|
+
s += "Files:\n"
|
76
|
+
@enclosures.each do |e|
|
77
|
+
s += " #{e[0]} (#{e[1].to_i.to_human_readable}, #{e[2]})\n"
|
78
|
+
end
|
79
|
+
end
|
80
|
+
s
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
# for URI::regexp
|
2
|
+
require 'uri'
|
3
|
+
require 'feedparser/html2text-parser'
|
4
|
+
|
5
|
+
# This class provides various converters
|
6
|
+
class String
|
7
|
+
# is this text HTML ? search for tags. used by String#text2html
|
8
|
+
def html?
|
9
|
+
return (self =~ /<p>/i) || (self =~ /<\/p>/i) || (self =~ /<br>/i) || (self =~ /<br\s*(\/)?\s*>/i) || (self =~ /<\/a>/i) || (self =~ /<img.*>/i)
|
10
|
+
end
|
11
|
+
|
12
|
+
# returns true if the text contains escaped HTML (with HTML entities). used by String#text2html
|
13
|
+
def escaped_html?
|
14
|
+
return (self =~ /<img src=/i) || (self =~ /<a href=/i) || (self =~ /<br(\/| \/|)>/i) || (self =~ /<p>/i)
|
15
|
+
end
|
16
|
+
|
17
|
+
def escape_html
|
18
|
+
r = self.gsub('&', '&')
|
19
|
+
r = r.gsub('<', '<')
|
20
|
+
r = r.gsub('>', '>')
|
21
|
+
r
|
22
|
+
end
|
23
|
+
|
24
|
+
MY_ENTITIES = {}
|
25
|
+
FeedParser::HTML2TextParser::entities.each do |k, v|
|
26
|
+
MY_ENTITIES["&#{k};"] = [v].pack('U*')
|
27
|
+
MY_ENTITIES["&##{v};"] = [v].pack('U*')
|
28
|
+
end
|
29
|
+
|
30
|
+
# un-escape HTML in the text. used by String#text2html
|
31
|
+
def unescape_html
|
32
|
+
r = self
|
33
|
+
MY_ENTITIES.each do |k, v|
|
34
|
+
r = r.gsub(k, v)
|
35
|
+
end
|
36
|
+
r
|
37
|
+
end
|
38
|
+
|
39
|
+
# convert text to HTML
|
40
|
+
def text2html(feed)
|
41
|
+
text = self.clone
|
42
|
+
realhtml = text.html?
|
43
|
+
eschtml = text.escaped_html?
|
44
|
+
# fix for RSS feeds with both real and escaped html (crazy!):
|
45
|
+
# we take the first one
|
46
|
+
if (realhtml && eschtml)
|
47
|
+
if (realhtml < eschtml)
|
48
|
+
eschtml = nil
|
49
|
+
else
|
50
|
+
realhtml = nil
|
51
|
+
end
|
52
|
+
end
|
53
|
+
if realhtml
|
54
|
+
# do nothing
|
55
|
+
elsif eschtml
|
56
|
+
text = text.unescape_html
|
57
|
+
else
|
58
|
+
# paragraphs
|
59
|
+
text.gsub!(/\A\s*(.*)\Z/m, '<p>\1</p>')
|
60
|
+
text.gsub!(/\s*\n(\s*\n)+\s*/, "</p>\n<p>")
|
61
|
+
# uris
|
62
|
+
text.gsub!(/(#{URI::regexp(['http','ftp','https'])})/,
|
63
|
+
'<a href="\1">\1</a>')
|
64
|
+
end
|
65
|
+
# Handle broken hrefs in <a> and <img>
|
66
|
+
if feed and feed.link
|
67
|
+
text.gsub!(/(\s(src|href)=['"])([^'"]*)(['"])/) do |m|
|
68
|
+
begin
|
69
|
+
first, url, last = $1, $3, $4
|
70
|
+
if (url =~ /^\s*\w+:\/\//) or (url =~ /^\s*\w+:\w/)
|
71
|
+
m
|
72
|
+
elsif url =~ /^\//
|
73
|
+
(first + feed.link.split(/\//)[0..2].join('/') + url + last)
|
74
|
+
else
|
75
|
+
t = feed.link.split(/\//)
|
76
|
+
if t.length == 3 # http://toto with no trailing /
|
77
|
+
(first + feed.link + '/' + url + last)
|
78
|
+
else
|
79
|
+
if feed.link =~ /\/$/
|
80
|
+
(first + feed.link + url + last)
|
81
|
+
else
|
82
|
+
(first + t[0...-1].join('/') + '/' + url + last)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
rescue
|
87
|
+
m
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
text
|
92
|
+
end
|
93
|
+
|
94
|
+
# Remove white space around the text
|
95
|
+
def rmWhiteSpace!
|
96
|
+
return self.gsub!(/\A\s*/m, '').gsub!(/\s*\Z/m,'')
|
97
|
+
end
|
98
|
+
|
99
|
+
# Convert a text in inputenc to a text in UTF8
|
100
|
+
# must take care of wrong input locales
|
101
|
+
def toUTF8(inputenc)
|
102
|
+
if inputenc.downcase != 'utf-8'
|
103
|
+
# it is said it is not UTF-8. Ensure it is REALLY not UTF-8
|
104
|
+
begin
|
105
|
+
if self.unpack('U*').pack('U*') == self
|
106
|
+
return self
|
107
|
+
end
|
108
|
+
rescue
|
109
|
+
# do nothing
|
110
|
+
end
|
111
|
+
begin
|
112
|
+
return self.unpack('C*').pack('U*')
|
113
|
+
rescue
|
114
|
+
return self #failsafe solution. but a dirty one :-)
|
115
|
+
end
|
116
|
+
else
|
117
|
+
return self
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|