rfeedparser 0.9.931 → 0.9.940
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/rfeedparser.rb +143 -58
- data/lib/rfeedparser/aliases.rb +1 -1
- data/lib/rfeedparser/better_attributelist.rb +11 -11
- data/lib/rfeedparser/better_sgmlparser.rb +1 -1
- data/lib/rfeedparser/encoding_helpers.rb +120 -127
- data/lib/rfeedparser/feedparserdict.rb +30 -20
- data/lib/rfeedparser/forgiving_uri.rb +9 -7
- data/lib/rfeedparser/markup_helpers.rb +11 -14
- data/lib/rfeedparser/parser_mixin.rb +16 -11
- data/lib/rfeedparser/parsers.rb +1 -2
- data/lib/rfeedparser/scrub.rb +95 -90
- data/lib/rfeedparser/time_helpers.rb +379 -379
- data/lib/rfeedparser/utilities.rb +23 -0
- data/tests/rfeedparser_test_helper.rb +262 -0
- data/tests/rfeedparserserver.rb +3 -109
- data/tests/rfeedparsertest.rb +6 -165
- data/tests/rfponly/http/200.xml +30 -0
- data/tests/rfponly/http/220.xml +28 -0
- data/tests/rfponly/http/300.xml +8 -0
- data/tests/rfponly/http/300.xml_redirect +25 -0
- data/tests/rfponly/http/301.xml +8 -0
- data/tests/rfponly/http/301.xml_redirect +25 -0
- data/tests/rfponly/http/302.xml +8 -0
- data/tests/rfponly/http/302.xml_redirect +25 -0
- data/tests/rfponly/http/307.xml +8 -0
- data/tests/rfponly/http/307.xml_redirect +25 -0
- data/tests/rfponly/http/320.xml +8 -0
- data/tests/rfponly/http/320.xml_redirect +25 -0
- data/tests/rfponly/http/400.xml +7 -0
- data/tests/rfponly/http/404.xml +7 -0
- data/tests/rfponly/http/410.xml +7 -0
- data/tests/rfponly/http/420.xml +7 -0
- data/tests/rfponly/http/500.xml +7 -0
- data/tests/rfponly/http/520.xml +7 -0
- data/tests/rfponly/http/etag.xml +28 -0
- data/tests/rfponly/http/lastmodified.xml +29 -0
- data/tests/rfponly/wellformed/date/feed_modified_with_negative_numeric_timezone.xml +9 -0
- data/tests/rfponly/wellformed/date/feed_modified_with_positive_numeric_timezone.xml +9 -0
- data/tests/rfponly/wellformed/scrub/hpricot_self_closing_tag_workaround.xml +11 -0
- metadata +31 -3
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require File.join(File.dirname(__FILE__), 'aliases')
|
3
|
+
require File.join(File.dirname(__FILE__), 'encoding_helpers')
|
4
|
+
require File.join(File.dirname(__FILE__), 'markup_helpers')
|
5
|
+
require File.join(File.dirname(__FILE__), 'scrub')
|
6
|
+
require File.join(File.dirname(__FILE__), 'time_helpers')
|
7
|
+
|
8
|
+
module FeedParserUtilities
|
9
|
+
|
10
|
+
def parse_date(date_string)
|
11
|
+
FeedParser::FeedTimeParser.parse_date(date_string)
|
12
|
+
end
|
13
|
+
module_function :parse_date
|
14
|
+
|
15
|
+
def extract_tuple(atime)
|
16
|
+
FeedParser::FeedTimeParser.extract_tuple(atime)
|
17
|
+
end
|
18
|
+
module_function :extract_tuple
|
19
|
+
|
20
|
+
def py2rtime(pytuple)
|
21
|
+
return Time.utc(*pytuple[0..5]) unless pytuple.blank?
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,262 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require File.join(File.dirname(__FILE__),'../lib/rfeedparser')
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'rubygems'
|
6
|
+
gem 'mongrel'
|
7
|
+
require 'mongrel'
|
8
|
+
rescue => details
|
9
|
+
STDERR.puts "Whoops, had an error with loading mongrel as a gem. Trying just 'require'. Mongrel is required for testing."
|
10
|
+
require 'mongrel'
|
11
|
+
end
|
12
|
+
Mongrel::HTTP_STATUS_CODES[220] = "Unspecified success"
|
13
|
+
|
14
|
+
def uconvert(one, two, three); FeedParser::uconvert(one, two, three); end
|
15
|
+
def _ebcdic_to_ascii(one); FeedParser::_ebcdic_to_ascii(one); end
|
16
|
+
|
17
|
+
$PORT = 8097 # Not configurable, hard coded in the xml files
|
18
|
+
|
19
|
+
def translate_data(data)
|
20
|
+
if data[0..3] == "\x4c\x6f\xa7\x94"
|
21
|
+
# EBCDIC
|
22
|
+
data = _ebcdic_to_ascii(data)
|
23
|
+
elsif data[0..3] == "\x00\x3c\x00\x3f"
|
24
|
+
# UTF-16BE
|
25
|
+
data = uconvert(data, 'utf-16be', 'utf-8')
|
26
|
+
elsif data.size >= 4 and data[0..1] == "\xfe\xff" and data[2..3] != "\x00\x00"
|
27
|
+
# UTF-16BE with BOM
|
28
|
+
data = uconvert(data[2..-1], 'utf-16be', 'utf-8')
|
29
|
+
elsif data[0..3] == "\x3c\x00\x3f\x00"
|
30
|
+
# UTF-16LE
|
31
|
+
data = uconvert(data, 'utf-16le', 'utf-8')
|
32
|
+
elsif data.size >=4 and data[0..1] == "\xff\xfe" and data[2..3] != "\x00\x00"
|
33
|
+
# UTF-16LE with BOM
|
34
|
+
data = uconvert(data[2..-1], 'utf-16le', 'utf-8')
|
35
|
+
elsif data[0..3] == "\x00\x00\x00\x3c"
|
36
|
+
# UTF-32BE
|
37
|
+
data = uconvert(data, 'utf-32be', 'utf-8')
|
38
|
+
elsif data[0..3] == "\x3c\x00\x00\x00"
|
39
|
+
# UTF-32LE
|
40
|
+
data = uconvert(data, 'utf-32le', 'utf-8')
|
41
|
+
elsif data[0..3] == "\x00\x00\xfe\xff"
|
42
|
+
# UTF-32BE with BOM
|
43
|
+
data = uconvert(data[4..-1], 'utf-32BE', 'utf-8')
|
44
|
+
elsif data[0..3] == "\xff\xfe\x00\x00"
|
45
|
+
# UTF-32LE with BOM
|
46
|
+
data = uconvert(data[4..-1], 'utf-32LE', 'utf-8')
|
47
|
+
elsif data[0..2] == "\xef\xbb\xbf"
|
48
|
+
# UTF-8 with BOM
|
49
|
+
data = data[3..-1]
|
50
|
+
else
|
51
|
+
# ASCII-compatible
|
52
|
+
end
|
53
|
+
return data
|
54
|
+
end
|
55
|
+
|
56
|
+
def scrape_headers(xmlfile)
|
57
|
+
# Called by the server
|
58
|
+
xm = open(xmlfile)
|
59
|
+
data = xm.read
|
60
|
+
htaccess = File.dirname(xmlfile)+"/.htaccess"
|
61
|
+
xml_headers = {}
|
62
|
+
server_headers = {}
|
63
|
+
the_type = nil
|
64
|
+
if File.exists? htaccess
|
65
|
+
fn = File.split(xm.path)[-1]
|
66
|
+
ht_file = open(htaccess)
|
67
|
+
type_match = ht_file.read.match(/^\s*<Files\s+#{fn}>\s*\n\s*AddType\s+(.*?)\s+.xml/m)
|
68
|
+
the_type = type_match[1].strip.gsub(/^("|')/,'').gsub(/("|')$/,'').strip if type_match and type_match[1]
|
69
|
+
if type_match and the_type
|
70
|
+
#content_type, charset = type_match[1].split(';')
|
71
|
+
server_headers["Content-Type"] = the_type
|
72
|
+
end
|
73
|
+
end
|
74
|
+
data = translate_data(data)
|
75
|
+
header_regexp = /^Header:\s*([^:]+)\s*:\s*(.+)\s*$/
|
76
|
+
da = data.scan header_regexp
|
77
|
+
unless da.nil? or da.empty?
|
78
|
+
da.flatten!
|
79
|
+
da.each{|e| e.strip!;e.gsub!(/(Content-type|content-type|content-Type)/, "Content-Type")}
|
80
|
+
xml_headers = Hash[*da] # Asterisk magic!
|
81
|
+
end
|
82
|
+
Mongrel::Const::const_set('ETAG_FORMAT', xml_headers['ETag']) unless (xml_headers['ETag'].nil? or xml_headers['ETag'].empty?)
|
83
|
+
return xml_headers.merge(server_headers)
|
84
|
+
end
|
85
|
+
|
86
|
+
def scrape_status(xmlfile)
|
87
|
+
# Called by the server
|
88
|
+
xm = open(xmlfile)
|
89
|
+
data = xm.read
|
90
|
+
data = translate_data(data)
|
91
|
+
da = data.scan /^Status:\s*(.+)\s?$/
|
92
|
+
unless da.nil? or da.empty?
|
93
|
+
da.flatten!
|
94
|
+
da.each{ |e| return e.to_i }
|
95
|
+
end
|
96
|
+
return 200
|
97
|
+
end
|
98
|
+
|
99
|
+
def scrape_assertion_strings(xmlfile)
|
100
|
+
# Called by the testing client
|
101
|
+
data = open(xmlfile).read
|
102
|
+
data = translate_data(data)
|
103
|
+
test = data.scan /Description:\s*(.*?)\s*Expect:\s*(.*)\s*-->/
|
104
|
+
description, evalString = test.first.map{ |s| s.strip }
|
105
|
+
|
106
|
+
# Here we translate the expected values in Python to Ruby
|
107
|
+
|
108
|
+
# Find Python unicode strings starting with u"
|
109
|
+
evalString.gsub!(/\bu'(.*?)'/) do |m|
|
110
|
+
esc = $1.to_s.dup
|
111
|
+
# Replace \u hex values with actual Unicode char
|
112
|
+
esc.gsub!(/\\u([0-9a-fA-F]{4})/){ |m| [$1.hex].pack('U*') }
|
113
|
+
" '"+esc+"'"
|
114
|
+
end
|
115
|
+
|
116
|
+
# Find Python unicode strings starting with u"
|
117
|
+
evalString.gsub!(/\bu"(.*?)"/) do |m|
|
118
|
+
esc = $1.to_s.dup
|
119
|
+
# Replace \u hex values with actual Unicode char
|
120
|
+
esc.gsub!(/\\u([0-9a-fA-F]{4})/){ |m| [$1.hex].pack('U*') }
|
121
|
+
" \""+esc+"\""
|
122
|
+
end
|
123
|
+
# The above does the following: u'string' => 'string'
|
124
|
+
# u'ba\u20acha' => 'ba€ha' # Same for double quoted strings
|
125
|
+
|
126
|
+
evalString.gsub!(/\\x([0-9a-fA-F]{2})/){ |m| [$1.hex].pack('U*') } # "ba\xa3la" => "ba£la"
|
127
|
+
evalString.gsub! /'\s*:\s+/, "' => " # {'foo': 'bar'} => {'foo' => 'bar'}
|
128
|
+
evalString.gsub! /"\s*:\s+/, "\" => " # {"foo": 'bar'} => {"foo" => 'bar'}
|
129
|
+
evalString.gsub! /\=\s*\((.*?)\)/, '= [\1]' # = (2004, 12, 4) => = [2004, 12, 4]
|
130
|
+
evalString.gsub!(/"""(.*?)"""/) do # """<a b="foo">""" => "<a b=\"foo\">"
|
131
|
+
"\""+$1.gsub!(/"/,"\\\"")+"\"" # haha, ugly!
|
132
|
+
end
|
133
|
+
evalString.gsub! /(\w|\])\s*\=\= 0\s*$/, '\1 == false' # ] == 0 => ] == false
|
134
|
+
evalString.gsub! /(\w|\])\s*\=\= 1\s*$/, '\1 == true' # ] == 1 => ] == true
|
135
|
+
evalString.gsub! /len\((.*?)\)\s*\=\=\s*(\d{1,3})/, '\1.length == \2' # len(ary) == 1 => ary.length == 1
|
136
|
+
evalString.gsub! /None/, "nil" # None => nil # well, duh
|
137
|
+
return description, evalString
|
138
|
+
end
|
139
|
+
|
140
|
+
def is_invalid(response_status)
|
141
|
+
!is_valid(response_status)
|
142
|
+
end
|
143
|
+
|
144
|
+
def is_valid(response_status)
|
145
|
+
response_status > 199 && response_status < 300
|
146
|
+
end
|
147
|
+
|
148
|
+
class FeedParserTestRequestHandler < Mongrel::DirHandler
|
149
|
+
def process(request, response)
|
150
|
+
req_method = request.params[Mongrel::Const::REQUEST_METHOD] || Mongrel::Const::GET
|
151
|
+
req_path = can_serve request.params[Mongrel::Const::PATH_INFO]
|
152
|
+
if not req_path
|
153
|
+
# not found, return a 404
|
154
|
+
response.start(404) do |head, out|
|
155
|
+
head['Content-Type'] = 'text/plain'
|
156
|
+
out << "File not found"
|
157
|
+
end
|
158
|
+
else
|
159
|
+
begin
|
160
|
+
if File.directory? req_path
|
161
|
+
send_dir_listing(request.params[Mongrel::Const::REQUEST_URI], req_path, response)
|
162
|
+
elsif req_method == Mongrel::Const::HEAD
|
163
|
+
response_status = scrape_status(req_path)
|
164
|
+
response.start(response_status) do |head,out|
|
165
|
+
xml_head = scrape_headers(req_path)
|
166
|
+
xml_head.each_key{|k| head[k] = xml_head[k] }
|
167
|
+
|
168
|
+
if is_invalid(response_status)
|
169
|
+
head['content-type'] = 'text/plain;'
|
170
|
+
out << response_status
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
send_file(req_path, request, response, true) unless is_invalid(response_status)
|
175
|
+
elsif req_method == Mongrel::Const::GET
|
176
|
+
response_status = scrape_status(req_path)
|
177
|
+
response.start(response_status) do |head,out|
|
178
|
+
xml_head = scrape_headers(req_path)
|
179
|
+
xml_head.each_key{|k| head[k] = xml_head[k] }
|
180
|
+
if is_invalid(response_status)
|
181
|
+
head['content-type'] = 'text/plain;'
|
182
|
+
out << response_status
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
send_file(req_path, request, response, false) unless is_invalid(response_status)
|
187
|
+
else
|
188
|
+
response.start(403) { |head,out|
|
189
|
+
head['Content-Type'] = 'text/plain'
|
190
|
+
out.write(ONLY_HEAD_GET)
|
191
|
+
}
|
192
|
+
end
|
193
|
+
rescue => details
|
194
|
+
STDERR.puts "Error sending file #{req_path}: #{details}"
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
# Overriding the send_file in DirHandler for a goddamn one line bug fix.
|
200
|
+
# Holy shit does this suck. Changing `response.status = 200` to
|
201
|
+
# `response.status ||= 200`. Also, adding Mongrel:: in front of the Const
|
202
|
+
# because subclassing makes them break.
|
203
|
+
def send_file(req_path, request, response, header_only=false)
|
204
|
+
|
205
|
+
stat = File.stat(req_path)
|
206
|
+
|
207
|
+
# Set the last modified times as well and etag for all files
|
208
|
+
mtime = stat.mtime
|
209
|
+
# Calculated the same as apache, not sure how well the works on win32
|
210
|
+
etag = Mongrel::Const::ETAG_FORMAT % [mtime.to_i, stat.size, stat.ino]
|
211
|
+
|
212
|
+
modified_since = request.params[Mongrel::Const::HTTP_IF_MODIFIED_SINCE]
|
213
|
+
none_match = request.params[Mongrel::Const::HTTP_IF_NONE_MATCH]
|
214
|
+
|
215
|
+
# test to see if this is a conditional request, and test if
|
216
|
+
# the response would be identical to the last response
|
217
|
+
same_response = case
|
218
|
+
when modified_since && !last_response_time = Time.httpdate(modified_since) rescue nil : false
|
219
|
+
when modified_since && last_response_time > Time.now : false
|
220
|
+
when modified_since && mtime > last_response_time : false
|
221
|
+
when none_match && none_match == '*' : false
|
222
|
+
when none_match && !none_match.strip.split(/\s*,\s*/).include?(etag) : false
|
223
|
+
else modified_since || none_match # validation successful if we get this far and at least one of the header exists
|
224
|
+
end
|
225
|
+
|
226
|
+
header = response.header
|
227
|
+
header[Mongrel::Const::ETAG] = etag
|
228
|
+
|
229
|
+
if same_response
|
230
|
+
response.start(304) {}
|
231
|
+
else
|
232
|
+
# first we setup the headers and status then we do a very fast send on the socket directly
|
233
|
+
response.status ||= 200
|
234
|
+
header[Mongrel::Const::LAST_MODIFIED] = mtime.httpdate
|
235
|
+
|
236
|
+
# set the mime type from our map based on the ending
|
237
|
+
dot_at = req_path.rindex('.')
|
238
|
+
if dot_at
|
239
|
+
header[Mongrel::Const::CONTENT_TYPE] = MIME_TYPES[req_path[dot_at .. -1]] || @default_content_type
|
240
|
+
else
|
241
|
+
header[Mongrel::Const::CONTENT_TYPE] = @default_content_type
|
242
|
+
end
|
243
|
+
|
244
|
+
# send a status with out content length
|
245
|
+
response.send_status(stat.size)
|
246
|
+
response.send_header
|
247
|
+
|
248
|
+
if not header_only
|
249
|
+
response.send_file(req_path, stat.size < Mongrel::Const::CHUNK_SIZE * 2)
|
250
|
+
end
|
251
|
+
end
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
|
256
|
+
class XMLTests < Test::Unit::TestCase
|
257
|
+
# Empty, but here for clarity
|
258
|
+
def setup
|
259
|
+
end
|
260
|
+
def teardown
|
261
|
+
end
|
262
|
+
end
|
data/tests/rfeedparserserver.rb
CHANGED
@@ -1,116 +1,10 @@
|
|
1
|
-
#!/usr/bin/ruby
|
2
|
-
# This is the same server code that runs in rfeedparsertest.rb, but split
|
3
|
-
# off so that we can fully check each test individually (i.e. get the HTTP
|
4
|
-
# headers right).
|
5
|
-
# This really needs to be merged into rfeedparsertest.rb
|
6
|
-
require 'rubygems'
|
7
|
-
gem 'mongrel'
|
8
|
-
require 'mongrel'
|
9
|
-
require File.dirname(__FILE__)+'/../lib/rfeedparser'
|
10
|
-
$PORT = 8097
|
11
|
-
def translate_data(data)
|
12
|
-
if data[0..3] == "\x4c\x6f\xa7\x94"
|
13
|
-
# EBCDIC
|
14
|
-
data = _ebcdic_to_ascii(data)
|
15
|
-
elsif data[0..3] == "\x00\x3c\x00\x3f"
|
16
|
-
# UTF-16BE
|
17
|
-
data = uconvert(data, 'utf-16be', 'utf-8')
|
18
|
-
elsif data.size >= 4 and data[0..1] == "\xfe\xff" and data[2..3] != "\x00\x00"
|
19
|
-
# UTF-16BE with BOM
|
20
|
-
data = uconvert(data[2..-1], 'utf-16be', 'utf-8')
|
21
|
-
elsif data[0..3] == "\x3c\x00\x3f\x00"
|
22
|
-
# UTF-16LE
|
23
|
-
data = uconvert(data, 'utf-16le', 'utf-8')
|
24
|
-
elsif data.size >=4 and data[0..1] == "\xff\xfe" and data[2..3] != "\x00\x00"
|
25
|
-
# UTF-16LE with BOM
|
26
|
-
data = uconvert(data[2..-1], 'utf-16le', 'utf-8')
|
27
|
-
elsif data[0..3] == "\x00\x00\x00\x3c"
|
28
|
-
# UTF-32BE
|
29
|
-
data = uconvert(data, 'utf-32be', 'utf-8')
|
30
|
-
elsif data[0..3] == "\x3c\x00\x00\x00"
|
31
|
-
# UTF-32LE
|
32
|
-
data = uconvert(data, 'utf-32le', 'utf-8')
|
33
|
-
elsif data[0..3] == "\x00\x00\xfe\xff"
|
34
|
-
# UTF-32BE with BOM
|
35
|
-
data = uconvert(data[4..-1], 'utf-32BE', 'utf-8')
|
36
|
-
elsif data[0..3] == "\xef\xfe\x00\x00"
|
37
|
-
# UTF-32LE with BOM
|
38
|
-
data = uconvert(data[4..-1], 'utf-32le', 'utf-8')
|
39
|
-
elsif data[0..2] == "\xef\xbb\xbf"
|
40
|
-
# UTF-8 with BOM
|
41
|
-
data = data[3..-1]
|
42
|
-
end
|
43
|
-
return data
|
44
|
-
end
|
1
|
+
#!/usr/bin/env ruby
|
45
2
|
|
46
|
-
|
47
|
-
def scrape_headers(xmlfile)
|
48
|
-
# Called by the server
|
49
|
-
xm = open(xmlfile)
|
50
|
-
data = xm.read
|
51
|
-
htaccess = File.dirname(xmlfile)+"/.htaccess"
|
52
|
-
xml_headers = {}
|
53
|
-
server_headers = {}
|
54
|
-
the_type = nil
|
55
|
-
if File.exists? htaccess
|
56
|
-
fn = xm.path.split(File::Separator)[-1] # I can't find the right method for this
|
57
|
-
ht_file = open(htaccess)
|
58
|
-
type_match = ht_file.read.match(/^\s*<Files\s+#{fn}>\s*\n\s*AddType\s+(.*?)\s+.xml/m)
|
59
|
-
the_type = type_match[1].strip.gsub(/^("|')/,'').gsub(/("|')$/,'').strip if type_match and type_match[1]
|
60
|
-
if type_match and the_type
|
61
|
-
#content_type, charset = type_match[1].split(';')
|
62
|
-
server_headers["Content-Type"] = the_type
|
63
|
-
end
|
64
|
-
end
|
65
|
-
data = translate_data(data)
|
66
|
-
da = data.scan /^Header:\s*([^:]+):(.+)\s$/
|
67
|
-
unless da.nil? or da.empty?
|
68
|
-
da.flatten!
|
69
|
-
da.each{|e| e.strip!;e.gsub!(/(Content-type|content-type|content-Type)/, "Content-Type")}
|
70
|
-
xml_headers = Hash[*da] # Asterisk magic!
|
71
|
-
end
|
72
|
-
return xml_headers.merge(server_headers)
|
73
|
-
end
|
74
|
-
|
75
|
-
|
76
|
-
class FeedParserTestRequestHandler < Mongrel::DirHandler
|
77
|
-
|
78
|
-
def process(request, response)
|
79
|
-
req_method = request.params[Mongrel::Const::REQUEST_METHOD] || Mongrel::Const::GET
|
80
|
-
req_path = can_serve request.params[Mongrel::Const::PATH_INFO]
|
81
|
-
if not req_path
|
82
|
-
# not found, return a 404
|
83
|
-
response.start(404) do |head, out|
|
84
|
-
out << "File not found"
|
85
|
-
end
|
86
|
-
else
|
87
|
-
begin
|
88
|
-
if File.directory? req_path
|
89
|
-
send_dir_listing(request.params[Mongrel::Const::REQUEST_URI], req_path, response)
|
90
|
-
elsif req_method == Mongrel::Const::HEAD
|
91
|
-
response.start do |head,out|
|
92
|
-
xml_head = scape_headers(req_path)
|
93
|
-
xml_head.each_key{|k| head[k] = xml_head[k] }
|
94
|
-
end
|
95
|
-
send_file(req_path, request, response, true)
|
96
|
-
elsif req_method == Mongrel::Const::GET
|
97
|
-
response.start do |head,out|
|
98
|
-
xml_head = scrape_headers(req_path)
|
99
|
-
xml_head.each_key{|k| head[k] = xml_head[k] }
|
100
|
-
end
|
101
|
-
send_file(req_path, request, response, false)
|
102
|
-
else
|
103
|
-
response.start(403) {|head,out| out.write(ONLY_HEAD_GET) }
|
104
|
-
end
|
105
|
-
rescue => details
|
106
|
-
STDERR.puts "MON Error sending file #{req_path}: #{details}"
|
107
|
-
end
|
108
|
-
end
|
109
|
-
end
|
110
|
-
end
|
3
|
+
require File.join(File.dirname(__FILE__), 'rfeedparser_test_helper')
|
111
4
|
|
112
5
|
# Start up the mongrel server and tell it how to send the tests
|
113
6
|
server = Mongrel::HttpServer.new("0.0.0.0", $PORT)
|
114
7
|
Mongrel::DirHandler::add_mime_type('.xml','application/xml')
|
8
|
+
Mongrel::DirHandler::add_mime_type('.xml_redirect','application/xml')
|
115
9
|
server.register("/", FeedParserTestRequestHandler.new('.'))
|
116
10
|
server.run.join
|
data/tests/rfeedparsertest.rb
CHANGED
@@ -1,175 +1,15 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# This is based off of Sam Ruby's xml_filetest.rb
|
3
|
-
# I've adapted it for
|
3
|
+
# I've adapted it for rfeedparser
|
4
4
|
# http://intertwingly.net/blog/2005/10/30/Testing-FeedTools-Dynamically/
|
5
5
|
|
6
|
-
require '
|
7
|
-
require File.dirname(__FILE__)+'/../lib/rfeedparser'
|
8
|
-
|
9
|
-
begin
|
10
|
-
require 'rubygems'
|
11
|
-
gem 'mongrel'
|
12
|
-
require 'mongrel'
|
13
|
-
rescue => details
|
14
|
-
STDERR.puts "Whoops, had an error with loading mongrel as a gem. Trying just 'require'. Mongrel is required for testing."
|
15
|
-
require 'mongrel'
|
16
|
-
end
|
17
|
-
|
18
|
-
|
19
|
-
$PORT = 8097 # Not configurable, hard coded in the xml files
|
20
|
-
|
21
|
-
def translate_data(data)
|
22
|
-
if data[0..3] == "\x4c\x6f\xa7\x94"
|
23
|
-
# EBCDIC
|
24
|
-
data = _ebcdic_to_ascii(data)
|
25
|
-
elsif data[0..3] == "\x00\x3c\x00\x3f"
|
26
|
-
# UTF-16BE
|
27
|
-
data = uconvert(data, 'utf-16be', 'utf-8')
|
28
|
-
elsif data.size >= 4 and data[0..1] == "\xfe\xff" and data[2..3] != "\x00\x00"
|
29
|
-
# UTF-16BE with BOM
|
30
|
-
data = uconvert(data[2..-1], 'utf-16be', 'utf-8')
|
31
|
-
elsif data[0..3] == "\x3c\x00\x3f\x00"
|
32
|
-
# UTF-16LE
|
33
|
-
data = uconvert(data, 'utf-16le', 'utf-8')
|
34
|
-
elsif data.size >=4 and data[0..1] == "\xff\xfe" and data[2..3] != "\x00\x00"
|
35
|
-
# UTF-16LE with BOM
|
36
|
-
data = uconvert(data[2..-1], 'utf-16le', 'utf-8')
|
37
|
-
elsif data[0..3] == "\x00\x00\x00\x3c"
|
38
|
-
# UTF-32BE
|
39
|
-
data = uconvert(data, 'utf-32be', 'utf-8')
|
40
|
-
elsif data[0..3] == "\x3c\x00\x00\x00"
|
41
|
-
# UTF-32LE
|
42
|
-
data = uconvert(data, 'utf-32le', 'utf-8')
|
43
|
-
elsif data[0..3] == "\x00\x00\xfe\xff"
|
44
|
-
# UTF-32BE with BOM
|
45
|
-
data = uconvert(data[4..-1], 'utf-32BE', 'utf-8')
|
46
|
-
elsif data[0..3] == "\xff\xfe\x00\x00"
|
47
|
-
# UTF-32LE with BOM
|
48
|
-
data = uconvert(data[4..-1], 'utf-32LE', 'utf-8')
|
49
|
-
elsif data[0..2] == "\xef\xbb\xbf"
|
50
|
-
# UTF-8 with BOM
|
51
|
-
data = data[3..-1]
|
52
|
-
else
|
53
|
-
# ASCII-compatible
|
54
|
-
end
|
55
|
-
return data
|
56
|
-
end
|
57
|
-
|
58
|
-
def scrape_headers(xmlfile)
|
59
|
-
# Called by the server
|
60
|
-
xm = open(xmlfile)
|
61
|
-
data = xm.read
|
62
|
-
htaccess = File.dirname(xmlfile)+"/.htaccess"
|
63
|
-
xml_headers = {}
|
64
|
-
server_headers = {}
|
65
|
-
the_type = nil
|
66
|
-
if File.exists? htaccess
|
67
|
-
fn = xm.path.split(File::Separator)[-1] # I can't find the right method for this
|
68
|
-
ht_file = open(htaccess)
|
69
|
-
type_match = ht_file.read.match(/^\s*<Files\s+#{fn}>\s*\n\s*AddType\s+(.*?)\s+.xml/m)
|
70
|
-
the_type = type_match[1].strip.gsub(/^("|')/,'').gsub(/("|')$/,'').strip if type_match and type_match[1]
|
71
|
-
if type_match and the_type
|
72
|
-
#content_type, charset = type_match[1].split(';')
|
73
|
-
server_headers["Content-Type"] = the_type
|
74
|
-
end
|
75
|
-
end
|
76
|
-
data = translate_data(data)
|
77
|
-
da = data.scan /^Header:\s*([^:]+):(.+)\s$/
|
78
|
-
unless da.nil? or da.empty?
|
79
|
-
da.flatten!
|
80
|
-
da.each{|e| e.strip!;e.gsub!(/(Content-type|content-type|content-Type)/, "Content-Type")}
|
81
|
-
xml_headers = Hash[*da] # Asterisk magic!
|
82
|
-
end
|
83
|
-
return xml_headers.merge(server_headers)
|
84
|
-
end
|
85
|
-
|
86
|
-
def scrape_assertion_strings(xmlfile)
|
87
|
-
# Called by the testing client
|
88
|
-
data = open(xmlfile).read
|
89
|
-
data = translate_data(data)
|
90
|
-
test = data.scan /Description:\s*(.*?)\s*Expect:\s*(.*)\s*-->/
|
91
|
-
description, evalString = test.first.map{ |s| s.strip }
|
92
|
-
|
93
|
-
# Here we translate the expected values in Python to Ruby
|
94
|
-
evalString.gsub!(/\bu'(.*?)'/) do |m|
|
95
|
-
esc = $1.to_s.dup
|
96
|
-
esc.gsub!(/\\u([0-9a-fA-F]{4})/){ |m| [$1.hex].pack('U*') }
|
97
|
-
" '"+esc+"'"
|
98
|
-
end
|
99
|
-
evalString.gsub!(/\bu"(.*?)"/) do |m|
|
100
|
-
esc = $1.to_s.dup
|
101
|
-
esc.gsub!(/\\u([0-9a-fA-F]{4})/){ |m| [$1.hex].pack('U*') }
|
102
|
-
" \""+esc+"\""
|
103
|
-
end
|
104
|
-
# The above does the following: u'string' => 'string'
|
105
|
-
# u'ba\u20acha' => 'ba€ha' # Same for double quoted strings
|
106
|
-
|
107
|
-
evalString.gsub!(/\\x([0-9a-fA-F]{2})/){ |m| [$1.hex].pack('U*') } # "ba\xa3la" => "ba£la"
|
108
|
-
evalString.gsub! /'\s*:\s+/, "' => " # {'foo': 'bar'} => {'foo' => 'bar'}
|
109
|
-
evalString.gsub! /"\s*:\s+/, "\" => " # {"foo": 'bar'} => {"foo" => 'bar'}
|
110
|
-
evalString.gsub! /\=\s*\((.*?)\)/, '= [\1]' # = (2004, 12, 4) => = [2004, 12, 4]
|
111
|
-
evalString.gsub!(/"""(.*?)"""/) do # """<a b="foo">""" => "<a b="foo">"
|
112
|
-
"\""+$1.gsub!(/"/,"\\\"")+"\"" # haha, ugly!
|
113
|
-
end
|
114
|
-
evalString.gsub! /(\w|\])\s*\=\= 0\s*$/, '\1 == false' # ] == 0 => ] == false
|
115
|
-
evalString.gsub! /(\w|\])\s*\=\= 1\s*$/, '\1 == true' # ] == 1 => ] == true
|
116
|
-
evalString.gsub! /len\((.*?)\)\s*\=\=\s*(\d{1,3})/, '\1.length == \2' # len(ary) == 1 => ary.length == 1
|
117
|
-
evalString.gsub! /None/, "nil" # None => nil # well, duh
|
118
|
-
return description, evalString
|
119
|
-
end
|
120
|
-
|
121
|
-
class FeedParserTestRequestHandler < Mongrel::DirHandler
|
122
|
-
def process(request, response)
|
123
|
-
req_method = request.params[Mongrel::Const::REQUEST_METHOD] || Mongrel::Const::GET
|
124
|
-
req_path = can_serve request.params[Mongrel::Const::PATH_INFO]
|
125
|
-
if not req_path
|
126
|
-
# not found, return a 404
|
127
|
-
response.start(404) do |head, out|
|
128
|
-
out << "File not found"
|
129
|
-
end
|
130
|
-
else
|
131
|
-
begin
|
132
|
-
if File.directory? req_path
|
133
|
-
send_dir_listing(request.params[Mongrel::Const::REQUEST_URI], req_path, response)
|
134
|
-
elsif req_method == Mongrel::Const::HEAD
|
135
|
-
response.start do |head,out|
|
136
|
-
xml_head = scrape_headers(req_path)
|
137
|
-
xml_head.each_key{|k| head[k] = xml_head[k] }
|
138
|
-
end
|
139
|
-
|
140
|
-
send_file(req_path, request, response, true)
|
141
|
-
elsif req_method == Mongrel::Const::GET
|
142
|
-
response.start do |head,out|
|
143
|
-
xml_head = scrape_headers(req_path)
|
144
|
-
xml_head.each_key{|k| head[k] = xml_head[k] }
|
145
|
-
end
|
146
|
-
|
147
|
-
send_file(req_path, request, response, false)
|
148
|
-
else
|
149
|
-
response.start(403) {|head,out| out.write(ONLY_HEAD_GET) }
|
150
|
-
end
|
151
|
-
rescue => details
|
152
|
-
STDERR.puts "Error sending file #{req_path}: #{details}"
|
153
|
-
end
|
154
|
-
end
|
155
|
-
end
|
156
|
-
end
|
157
|
-
|
158
|
-
|
159
|
-
class XMLTests < Test::Unit::TestCase
|
160
|
-
# Empty, but here for clarity
|
161
|
-
def setup
|
162
|
-
end
|
163
|
-
def teardown
|
164
|
-
end
|
165
|
-
end
|
6
|
+
require File.join(File.dirname(__FILE__),'rfeedparser_test_helper')
|
166
7
|
|
167
8
|
# default methods to be public
|
168
9
|
XMLTests.send(:public)
|
169
10
|
# add one unit test for each file
|
170
|
-
Dir[
|
171
|
-
|
172
|
-
methname = "tests_"+xmlfile.gsub('/','_').sub('.xml','')
|
11
|
+
Dir["#{File.dirname(__FILE__)}/**/*.xml"].each do |xmlfile|
|
12
|
+
methname = "tests_"+xmlfile.gsub('./', '').gsub('/','_').sub('.xml','')
|
173
13
|
XMLTests.send(:define_method, methname) {
|
174
14
|
|
175
15
|
options = {}
|
@@ -184,7 +24,7 @@ Dir['**/*.xml'].each do |xmlfile|
|
|
184
24
|
fp = FeedParser.parse("http://127.0.0.1:#{$PORT}/#{xmlfile}", options)
|
185
25
|
# I should point out that the 'compatible' arg is not necessary,
|
186
26
|
# but probably will be in the future if we decide to change the default.
|
187
|
-
|
27
|
+
|
188
28
|
description, evalString = scrape_assertion_strings(xmlfile)
|
189
29
|
assert fp.instance_eval(evalString), description.inspect
|
190
30
|
}
|
@@ -192,5 +32,6 @@ end
|
|
192
32
|
# Start up the mongrel server and tell it how to send the tests
|
193
33
|
server = Mongrel::HttpServer.new("0.0.0.0",$PORT)
|
194
34
|
Mongrel::DirHandler::add_mime_type('.xml','application/xml')
|
35
|
+
Mongrel::DirHandler::add_mime_type('.xml_redirect','application/xml')
|
195
36
|
server.register("/", FeedParserTestRequestHandler.new("."))
|
196
37
|
server.run
|