rfeedparser 0.9.931 → 0.9.940
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/rfeedparser.rb +143 -58
- data/lib/rfeedparser/aliases.rb +1 -1
- data/lib/rfeedparser/better_attributelist.rb +11 -11
- data/lib/rfeedparser/better_sgmlparser.rb +1 -1
- data/lib/rfeedparser/encoding_helpers.rb +120 -127
- data/lib/rfeedparser/feedparserdict.rb +30 -20
- data/lib/rfeedparser/forgiving_uri.rb +9 -7
- data/lib/rfeedparser/markup_helpers.rb +11 -14
- data/lib/rfeedparser/parser_mixin.rb +16 -11
- data/lib/rfeedparser/parsers.rb +1 -2
- data/lib/rfeedparser/scrub.rb +95 -90
- data/lib/rfeedparser/time_helpers.rb +379 -379
- data/lib/rfeedparser/utilities.rb +23 -0
- data/tests/rfeedparser_test_helper.rb +262 -0
- data/tests/rfeedparserserver.rb +3 -109
- data/tests/rfeedparsertest.rb +6 -165
- data/tests/rfponly/http/200.xml +30 -0
- data/tests/rfponly/http/220.xml +28 -0
- data/tests/rfponly/http/300.xml +8 -0
- data/tests/rfponly/http/300.xml_redirect +25 -0
- data/tests/rfponly/http/301.xml +8 -0
- data/tests/rfponly/http/301.xml_redirect +25 -0
- data/tests/rfponly/http/302.xml +8 -0
- data/tests/rfponly/http/302.xml_redirect +25 -0
- data/tests/rfponly/http/307.xml +8 -0
- data/tests/rfponly/http/307.xml_redirect +25 -0
- data/tests/rfponly/http/320.xml +8 -0
- data/tests/rfponly/http/320.xml_redirect +25 -0
- data/tests/rfponly/http/400.xml +7 -0
- data/tests/rfponly/http/404.xml +7 -0
- data/tests/rfponly/http/410.xml +7 -0
- data/tests/rfponly/http/420.xml +7 -0
- data/tests/rfponly/http/500.xml +7 -0
- data/tests/rfponly/http/520.xml +7 -0
- data/tests/rfponly/http/etag.xml +28 -0
- data/tests/rfponly/http/lastmodified.xml +29 -0
- data/tests/rfponly/wellformed/date/feed_modified_with_negative_numeric_timezone.xml +9 -0
- data/tests/rfponly/wellformed/date/feed_modified_with_positive_numeric_timezone.xml +9 -0
- data/tests/rfponly/wellformed/scrub/hpricot_self_closing_tag_workaround.xml +11 -0
- metadata +31 -3
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require File.join(File.dirname(__FILE__), 'aliases')
|
3
|
+
require File.join(File.dirname(__FILE__), 'encoding_helpers')
|
4
|
+
require File.join(File.dirname(__FILE__), 'markup_helpers')
|
5
|
+
require File.join(File.dirname(__FILE__), 'scrub')
|
6
|
+
require File.join(File.dirname(__FILE__), 'time_helpers')
|
7
|
+
|
8
|
+
module FeedParserUtilities
|
9
|
+
|
10
|
+
def parse_date(date_string)
|
11
|
+
FeedParser::FeedTimeParser.parse_date(date_string)
|
12
|
+
end
|
13
|
+
module_function :parse_date
|
14
|
+
|
15
|
+
def extract_tuple(atime)
|
16
|
+
FeedParser::FeedTimeParser.extract_tuple(atime)
|
17
|
+
end
|
18
|
+
module_function :extract_tuple
|
19
|
+
|
20
|
+
def py2rtime(pytuple)
|
21
|
+
return Time.utc(*pytuple[0..5]) unless pytuple.blank?
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,262 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require File.join(File.dirname(__FILE__),'../lib/rfeedparser')
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'rubygems'
|
6
|
+
gem 'mongrel'
|
7
|
+
require 'mongrel'
|
8
|
+
rescue => details
|
9
|
+
STDERR.puts "Whoops, had an error with loading mongrel as a gem. Trying just 'require'. Mongrel is required for testing."
|
10
|
+
require 'mongrel'
|
11
|
+
end
|
12
|
+
Mongrel::HTTP_STATUS_CODES[220] = "Unspecified success"
|
13
|
+
|
14
|
+
def uconvert(one, two, three); FeedParser::uconvert(one, two, three); end
|
15
|
+
def _ebcdic_to_ascii(one); FeedParser::_ebcdic_to_ascii(one); end
|
16
|
+
|
17
|
+
$PORT = 8097 # Not configurable, hard coded in the xml files
|
18
|
+
|
19
|
+
def translate_data(data)
|
20
|
+
if data[0..3] == "\x4c\x6f\xa7\x94"
|
21
|
+
# EBCDIC
|
22
|
+
data = _ebcdic_to_ascii(data)
|
23
|
+
elsif data[0..3] == "\x00\x3c\x00\x3f"
|
24
|
+
# UTF-16BE
|
25
|
+
data = uconvert(data, 'utf-16be', 'utf-8')
|
26
|
+
elsif data.size >= 4 and data[0..1] == "\xfe\xff" and data[2..3] != "\x00\x00"
|
27
|
+
# UTF-16BE with BOM
|
28
|
+
data = uconvert(data[2..-1], 'utf-16be', 'utf-8')
|
29
|
+
elsif data[0..3] == "\x3c\x00\x3f\x00"
|
30
|
+
# UTF-16LE
|
31
|
+
data = uconvert(data, 'utf-16le', 'utf-8')
|
32
|
+
elsif data.size >=4 and data[0..1] == "\xff\xfe" and data[2..3] != "\x00\x00"
|
33
|
+
# UTF-16LE with BOM
|
34
|
+
data = uconvert(data[2..-1], 'utf-16le', 'utf-8')
|
35
|
+
elsif data[0..3] == "\x00\x00\x00\x3c"
|
36
|
+
# UTF-32BE
|
37
|
+
data = uconvert(data, 'utf-32be', 'utf-8')
|
38
|
+
elsif data[0..3] == "\x3c\x00\x00\x00"
|
39
|
+
# UTF-32LE
|
40
|
+
data = uconvert(data, 'utf-32le', 'utf-8')
|
41
|
+
elsif data[0..3] == "\x00\x00\xfe\xff"
|
42
|
+
# UTF-32BE with BOM
|
43
|
+
data = uconvert(data[4..-1], 'utf-32BE', 'utf-8')
|
44
|
+
elsif data[0..3] == "\xff\xfe\x00\x00"
|
45
|
+
# UTF-32LE with BOM
|
46
|
+
data = uconvert(data[4..-1], 'utf-32LE', 'utf-8')
|
47
|
+
elsif data[0..2] == "\xef\xbb\xbf"
|
48
|
+
# UTF-8 with BOM
|
49
|
+
data = data[3..-1]
|
50
|
+
else
|
51
|
+
# ASCII-compatible
|
52
|
+
end
|
53
|
+
return data
|
54
|
+
end
|
55
|
+
|
56
|
+
def scrape_headers(xmlfile)
|
57
|
+
# Called by the server
|
58
|
+
xm = open(xmlfile)
|
59
|
+
data = xm.read
|
60
|
+
htaccess = File.dirname(xmlfile)+"/.htaccess"
|
61
|
+
xml_headers = {}
|
62
|
+
server_headers = {}
|
63
|
+
the_type = nil
|
64
|
+
if File.exists? htaccess
|
65
|
+
fn = File.split(xm.path)[-1]
|
66
|
+
ht_file = open(htaccess)
|
67
|
+
type_match = ht_file.read.match(/^\s*<Files\s+#{fn}>\s*\n\s*AddType\s+(.*?)\s+.xml/m)
|
68
|
+
the_type = type_match[1].strip.gsub(/^("|')/,'').gsub(/("|')$/,'').strip if type_match and type_match[1]
|
69
|
+
if type_match and the_type
|
70
|
+
#content_type, charset = type_match[1].split(';')
|
71
|
+
server_headers["Content-Type"] = the_type
|
72
|
+
end
|
73
|
+
end
|
74
|
+
data = translate_data(data)
|
75
|
+
header_regexp = /^Header:\s*([^:]+)\s*:\s*(.+)\s*$/
|
76
|
+
da = data.scan header_regexp
|
77
|
+
unless da.nil? or da.empty?
|
78
|
+
da.flatten!
|
79
|
+
da.each{|e| e.strip!;e.gsub!(/(Content-type|content-type|content-Type)/, "Content-Type")}
|
80
|
+
xml_headers = Hash[*da] # Asterisk magic!
|
81
|
+
end
|
82
|
+
Mongrel::Const::const_set('ETAG_FORMAT', xml_headers['ETag']) unless (xml_headers['ETag'].nil? or xml_headers['ETag'].empty?)
|
83
|
+
return xml_headers.merge(server_headers)
|
84
|
+
end
|
85
|
+
|
86
|
+
def scrape_status(xmlfile)
|
87
|
+
# Called by the server
|
88
|
+
xm = open(xmlfile)
|
89
|
+
data = xm.read
|
90
|
+
data = translate_data(data)
|
91
|
+
da = data.scan /^Status:\s*(.+)\s?$/
|
92
|
+
unless da.nil? or da.empty?
|
93
|
+
da.flatten!
|
94
|
+
da.each{ |e| return e.to_i }
|
95
|
+
end
|
96
|
+
return 200
|
97
|
+
end
|
98
|
+
|
99
|
+
def scrape_assertion_strings(xmlfile)
|
100
|
+
# Called by the testing client
|
101
|
+
data = open(xmlfile).read
|
102
|
+
data = translate_data(data)
|
103
|
+
test = data.scan /Description:\s*(.*?)\s*Expect:\s*(.*)\s*-->/
|
104
|
+
description, evalString = test.first.map{ |s| s.strip }
|
105
|
+
|
106
|
+
# Here we translate the expected values in Python to Ruby
|
107
|
+
|
108
|
+
# Find Python unicode strings starting with u"
|
109
|
+
evalString.gsub!(/\bu'(.*?)'/) do |m|
|
110
|
+
esc = $1.to_s.dup
|
111
|
+
# Replace \u hex values with actual Unicode char
|
112
|
+
esc.gsub!(/\\u([0-9a-fA-F]{4})/){ |m| [$1.hex].pack('U*') }
|
113
|
+
" '"+esc+"'"
|
114
|
+
end
|
115
|
+
|
116
|
+
# Find Python unicode strings starting with u"
|
117
|
+
evalString.gsub!(/\bu"(.*?)"/) do |m|
|
118
|
+
esc = $1.to_s.dup
|
119
|
+
# Replace \u hex values with actual Unicode char
|
120
|
+
esc.gsub!(/\\u([0-9a-fA-F]{4})/){ |m| [$1.hex].pack('U*') }
|
121
|
+
" \""+esc+"\""
|
122
|
+
end
|
123
|
+
# The above does the following: u'string' => 'string'
|
124
|
+
# u'ba\u20acha' => 'ba€ha' # Same for double quoted strings
|
125
|
+
|
126
|
+
evalString.gsub!(/\\x([0-9a-fA-F]{2})/){ |m| [$1.hex].pack('U*') } # "ba\xa3la" => "ba£la"
|
127
|
+
evalString.gsub! /'\s*:\s+/, "' => " # {'foo': 'bar'} => {'foo' => 'bar'}
|
128
|
+
evalString.gsub! /"\s*:\s+/, "\" => " # {"foo": 'bar'} => {"foo" => 'bar'}
|
129
|
+
evalString.gsub! /\=\s*\((.*?)\)/, '= [\1]' # = (2004, 12, 4) => = [2004, 12, 4]
|
130
|
+
evalString.gsub!(/"""(.*?)"""/) do # """<a b="foo">""" => "<a b=\"foo\">"
|
131
|
+
"\""+$1.gsub!(/"/,"\\\"")+"\"" # haha, ugly!
|
132
|
+
end
|
133
|
+
evalString.gsub! /(\w|\])\s*\=\= 0\s*$/, '\1 == false' # ] == 0 => ] == false
|
134
|
+
evalString.gsub! /(\w|\])\s*\=\= 1\s*$/, '\1 == true' # ] == 1 => ] == true
|
135
|
+
evalString.gsub! /len\((.*?)\)\s*\=\=\s*(\d{1,3})/, '\1.length == \2' # len(ary) == 1 => ary.length == 1
|
136
|
+
evalString.gsub! /None/, "nil" # None => nil # well, duh
|
137
|
+
return description, evalString
|
138
|
+
end
|
139
|
+
|
140
|
+
def is_invalid(response_status)
|
141
|
+
!is_valid(response_status)
|
142
|
+
end
|
143
|
+
|
144
|
+
def is_valid(response_status)
|
145
|
+
response_status > 199 && response_status < 300
|
146
|
+
end
|
147
|
+
|
148
|
+
class FeedParserTestRequestHandler < Mongrel::DirHandler
|
149
|
+
def process(request, response)
|
150
|
+
req_method = request.params[Mongrel::Const::REQUEST_METHOD] || Mongrel::Const::GET
|
151
|
+
req_path = can_serve request.params[Mongrel::Const::PATH_INFO]
|
152
|
+
if not req_path
|
153
|
+
# not found, return a 404
|
154
|
+
response.start(404) do |head, out|
|
155
|
+
head['Content-Type'] = 'text/plain'
|
156
|
+
out << "File not found"
|
157
|
+
end
|
158
|
+
else
|
159
|
+
begin
|
160
|
+
if File.directory? req_path
|
161
|
+
send_dir_listing(request.params[Mongrel::Const::REQUEST_URI], req_path, response)
|
162
|
+
elsif req_method == Mongrel::Const::HEAD
|
163
|
+
response_status = scrape_status(req_path)
|
164
|
+
response.start(response_status) do |head,out|
|
165
|
+
xml_head = scrape_headers(req_path)
|
166
|
+
xml_head.each_key{|k| head[k] = xml_head[k] }
|
167
|
+
|
168
|
+
if is_invalid(response_status)
|
169
|
+
head['content-type'] = 'text/plain;'
|
170
|
+
out << response_status
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
send_file(req_path, request, response, true) unless is_invalid(response_status)
|
175
|
+
elsif req_method == Mongrel::Const::GET
|
176
|
+
response_status = scrape_status(req_path)
|
177
|
+
response.start(response_status) do |head,out|
|
178
|
+
xml_head = scrape_headers(req_path)
|
179
|
+
xml_head.each_key{|k| head[k] = xml_head[k] }
|
180
|
+
if is_invalid(response_status)
|
181
|
+
head['content-type'] = 'text/plain;'
|
182
|
+
out << response_status
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
send_file(req_path, request, response, false) unless is_invalid(response_status)
|
187
|
+
else
|
188
|
+
response.start(403) { |head,out|
|
189
|
+
head['Content-Type'] = 'text/plain'
|
190
|
+
out.write(ONLY_HEAD_GET)
|
191
|
+
}
|
192
|
+
end
|
193
|
+
rescue => details
|
194
|
+
STDERR.puts "Error sending file #{req_path}: #{details}"
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
# Overriding the send_file in DirHandler for a goddamn one line bug fix.
|
200
|
+
# Holy shit does this suck. Changing `response.status = 200` to
|
201
|
+
# `response.status ||= 200`. Also, adding Mongrel:: in front of the Const
|
202
|
+
# because subclassing makes them break.
|
203
|
+
def send_file(req_path, request, response, header_only=false)
|
204
|
+
|
205
|
+
stat = File.stat(req_path)
|
206
|
+
|
207
|
+
# Set the last modified times as well and etag for all files
|
208
|
+
mtime = stat.mtime
|
209
|
+
# Calculated the same as apache, not sure how well the works on win32
|
210
|
+
etag = Mongrel::Const::ETAG_FORMAT % [mtime.to_i, stat.size, stat.ino]
|
211
|
+
|
212
|
+
modified_since = request.params[Mongrel::Const::HTTP_IF_MODIFIED_SINCE]
|
213
|
+
none_match = request.params[Mongrel::Const::HTTP_IF_NONE_MATCH]
|
214
|
+
|
215
|
+
# test to see if this is a conditional request, and test if
|
216
|
+
# the response would be identical to the last response
|
217
|
+
same_response = case
|
218
|
+
when modified_since && !last_response_time = Time.httpdate(modified_since) rescue nil : false
|
219
|
+
when modified_since && last_response_time > Time.now : false
|
220
|
+
when modified_since && mtime > last_response_time : false
|
221
|
+
when none_match && none_match == '*' : false
|
222
|
+
when none_match && !none_match.strip.split(/\s*,\s*/).include?(etag) : false
|
223
|
+
else modified_since || none_match # validation successful if we get this far and at least one of the header exists
|
224
|
+
end
|
225
|
+
|
226
|
+
header = response.header
|
227
|
+
header[Mongrel::Const::ETAG] = etag
|
228
|
+
|
229
|
+
if same_response
|
230
|
+
response.start(304) {}
|
231
|
+
else
|
232
|
+
# first we setup the headers and status then we do a very fast send on the socket directly
|
233
|
+
response.status ||= 200
|
234
|
+
header[Mongrel::Const::LAST_MODIFIED] = mtime.httpdate
|
235
|
+
|
236
|
+
# set the mime type from our map based on the ending
|
237
|
+
dot_at = req_path.rindex('.')
|
238
|
+
if dot_at
|
239
|
+
header[Mongrel::Const::CONTENT_TYPE] = MIME_TYPES[req_path[dot_at .. -1]] || @default_content_type
|
240
|
+
else
|
241
|
+
header[Mongrel::Const::CONTENT_TYPE] = @default_content_type
|
242
|
+
end
|
243
|
+
|
244
|
+
# send a status with out content length
|
245
|
+
response.send_status(stat.size)
|
246
|
+
response.send_header
|
247
|
+
|
248
|
+
if not header_only
|
249
|
+
response.send_file(req_path, stat.size < Mongrel::Const::CHUNK_SIZE * 2)
|
250
|
+
end
|
251
|
+
end
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
|
256
|
+
class XMLTests < Test::Unit::TestCase
|
257
|
+
# Empty, but here for clarity
|
258
|
+
def setup
|
259
|
+
end
|
260
|
+
def teardown
|
261
|
+
end
|
262
|
+
end
|
data/tests/rfeedparserserver.rb
CHANGED
@@ -1,116 +1,10 @@
|
|
1
|
-
#!/usr/bin/ruby
|
2
|
-
# This is the same server code that runs in rfeedparsertest.rb, but split
|
3
|
-
# off so that we can fully check each test individually (i.e. get the HTTP
|
4
|
-
# headers right).
|
5
|
-
# This really needs to be merged into rfeedparsertest.rb
|
6
|
-
require 'rubygems'
|
7
|
-
gem 'mongrel'
|
8
|
-
require 'mongrel'
|
9
|
-
require File.dirname(__FILE__)+'/../lib/rfeedparser'
|
10
|
-
$PORT = 8097
|
11
|
-
def translate_data(data)
|
12
|
-
if data[0..3] == "\x4c\x6f\xa7\x94"
|
13
|
-
# EBCDIC
|
14
|
-
data = _ebcdic_to_ascii(data)
|
15
|
-
elsif data[0..3] == "\x00\x3c\x00\x3f"
|
16
|
-
# UTF-16BE
|
17
|
-
data = uconvert(data, 'utf-16be', 'utf-8')
|
18
|
-
elsif data.size >= 4 and data[0..1] == "\xfe\xff" and data[2..3] != "\x00\x00"
|
19
|
-
# UTF-16BE with BOM
|
20
|
-
data = uconvert(data[2..-1], 'utf-16be', 'utf-8')
|
21
|
-
elsif data[0..3] == "\x3c\x00\x3f\x00"
|
22
|
-
# UTF-16LE
|
23
|
-
data = uconvert(data, 'utf-16le', 'utf-8')
|
24
|
-
elsif data.size >=4 and data[0..1] == "\xff\xfe" and data[2..3] != "\x00\x00"
|
25
|
-
# UTF-16LE with BOM
|
26
|
-
data = uconvert(data[2..-1], 'utf-16le', 'utf-8')
|
27
|
-
elsif data[0..3] == "\x00\x00\x00\x3c"
|
28
|
-
# UTF-32BE
|
29
|
-
data = uconvert(data, 'utf-32be', 'utf-8')
|
30
|
-
elsif data[0..3] == "\x3c\x00\x00\x00"
|
31
|
-
# UTF-32LE
|
32
|
-
data = uconvert(data, 'utf-32le', 'utf-8')
|
33
|
-
elsif data[0..3] == "\x00\x00\xfe\xff"
|
34
|
-
# UTF-32BE with BOM
|
35
|
-
data = uconvert(data[4..-1], 'utf-32BE', 'utf-8')
|
36
|
-
elsif data[0..3] == "\xef\xfe\x00\x00"
|
37
|
-
# UTF-32LE with BOM
|
38
|
-
data = uconvert(data[4..-1], 'utf-32le', 'utf-8')
|
39
|
-
elsif data[0..2] == "\xef\xbb\xbf"
|
40
|
-
# UTF-8 with BOM
|
41
|
-
data = data[3..-1]
|
42
|
-
end
|
43
|
-
return data
|
44
|
-
end
|
1
|
+
#!/usr/bin/env ruby
|
45
2
|
|
46
|
-
|
47
|
-
def scrape_headers(xmlfile)
|
48
|
-
# Called by the server
|
49
|
-
xm = open(xmlfile)
|
50
|
-
data = xm.read
|
51
|
-
htaccess = File.dirname(xmlfile)+"/.htaccess"
|
52
|
-
xml_headers = {}
|
53
|
-
server_headers = {}
|
54
|
-
the_type = nil
|
55
|
-
if File.exists? htaccess
|
56
|
-
fn = xm.path.split(File::Separator)[-1] # I can't find the right method for this
|
57
|
-
ht_file = open(htaccess)
|
58
|
-
type_match = ht_file.read.match(/^\s*<Files\s+#{fn}>\s*\n\s*AddType\s+(.*?)\s+.xml/m)
|
59
|
-
the_type = type_match[1].strip.gsub(/^("|')/,'').gsub(/("|')$/,'').strip if type_match and type_match[1]
|
60
|
-
if type_match and the_type
|
61
|
-
#content_type, charset = type_match[1].split(';')
|
62
|
-
server_headers["Content-Type"] = the_type
|
63
|
-
end
|
64
|
-
end
|
65
|
-
data = translate_data(data)
|
66
|
-
da = data.scan /^Header:\s*([^:]+):(.+)\s$/
|
67
|
-
unless da.nil? or da.empty?
|
68
|
-
da.flatten!
|
69
|
-
da.each{|e| e.strip!;e.gsub!(/(Content-type|content-type|content-Type)/, "Content-Type")}
|
70
|
-
xml_headers = Hash[*da] # Asterisk magic!
|
71
|
-
end
|
72
|
-
return xml_headers.merge(server_headers)
|
73
|
-
end
|
74
|
-
|
75
|
-
|
76
|
-
class FeedParserTestRequestHandler < Mongrel::DirHandler
|
77
|
-
|
78
|
-
def process(request, response)
|
79
|
-
req_method = request.params[Mongrel::Const::REQUEST_METHOD] || Mongrel::Const::GET
|
80
|
-
req_path = can_serve request.params[Mongrel::Const::PATH_INFO]
|
81
|
-
if not req_path
|
82
|
-
# not found, return a 404
|
83
|
-
response.start(404) do |head, out|
|
84
|
-
out << "File not found"
|
85
|
-
end
|
86
|
-
else
|
87
|
-
begin
|
88
|
-
if File.directory? req_path
|
89
|
-
send_dir_listing(request.params[Mongrel::Const::REQUEST_URI], req_path, response)
|
90
|
-
elsif req_method == Mongrel::Const::HEAD
|
91
|
-
response.start do |head,out|
|
92
|
-
xml_head = scape_headers(req_path)
|
93
|
-
xml_head.each_key{|k| head[k] = xml_head[k] }
|
94
|
-
end
|
95
|
-
send_file(req_path, request, response, true)
|
96
|
-
elsif req_method == Mongrel::Const::GET
|
97
|
-
response.start do |head,out|
|
98
|
-
xml_head = scrape_headers(req_path)
|
99
|
-
xml_head.each_key{|k| head[k] = xml_head[k] }
|
100
|
-
end
|
101
|
-
send_file(req_path, request, response, false)
|
102
|
-
else
|
103
|
-
response.start(403) {|head,out| out.write(ONLY_HEAD_GET) }
|
104
|
-
end
|
105
|
-
rescue => details
|
106
|
-
STDERR.puts "MON Error sending file #{req_path}: #{details}"
|
107
|
-
end
|
108
|
-
end
|
109
|
-
end
|
110
|
-
end
|
3
|
+
require File.join(File.dirname(__FILE__), 'rfeedparser_test_helper')
|
111
4
|
|
112
5
|
# Start up the mongrel server and tell it how to send the tests
|
113
6
|
server = Mongrel::HttpServer.new("0.0.0.0", $PORT)
|
114
7
|
Mongrel::DirHandler::add_mime_type('.xml','application/xml')
|
8
|
+
Mongrel::DirHandler::add_mime_type('.xml_redirect','application/xml')
|
115
9
|
server.register("/", FeedParserTestRequestHandler.new('.'))
|
116
10
|
server.run.join
|
data/tests/rfeedparsertest.rb
CHANGED
@@ -1,175 +1,15 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# This is based off of Sam Ruby's xml_filetest.rb
|
3
|
-
# I've adapted it for
|
3
|
+
# I've adapted it for rfeedparser
|
4
4
|
# http://intertwingly.net/blog/2005/10/30/Testing-FeedTools-Dynamically/
|
5
5
|
|
6
|
-
require '
|
7
|
-
require File.dirname(__FILE__)+'/../lib/rfeedparser'
|
8
|
-
|
9
|
-
begin
|
10
|
-
require 'rubygems'
|
11
|
-
gem 'mongrel'
|
12
|
-
require 'mongrel'
|
13
|
-
rescue => details
|
14
|
-
STDERR.puts "Whoops, had an error with loading mongrel as a gem. Trying just 'require'. Mongrel is required for testing."
|
15
|
-
require 'mongrel'
|
16
|
-
end
|
17
|
-
|
18
|
-
|
19
|
-
$PORT = 8097 # Not configurable, hard coded in the xml files
|
20
|
-
|
21
|
-
def translate_data(data)
|
22
|
-
if data[0..3] == "\x4c\x6f\xa7\x94"
|
23
|
-
# EBCDIC
|
24
|
-
data = _ebcdic_to_ascii(data)
|
25
|
-
elsif data[0..3] == "\x00\x3c\x00\x3f"
|
26
|
-
# UTF-16BE
|
27
|
-
data = uconvert(data, 'utf-16be', 'utf-8')
|
28
|
-
elsif data.size >= 4 and data[0..1] == "\xfe\xff" and data[2..3] != "\x00\x00"
|
29
|
-
# UTF-16BE with BOM
|
30
|
-
data = uconvert(data[2..-1], 'utf-16be', 'utf-8')
|
31
|
-
elsif data[0..3] == "\x3c\x00\x3f\x00"
|
32
|
-
# UTF-16LE
|
33
|
-
data = uconvert(data, 'utf-16le', 'utf-8')
|
34
|
-
elsif data.size >=4 and data[0..1] == "\xff\xfe" and data[2..3] != "\x00\x00"
|
35
|
-
# UTF-16LE with BOM
|
36
|
-
data = uconvert(data[2..-1], 'utf-16le', 'utf-8')
|
37
|
-
elsif data[0..3] == "\x00\x00\x00\x3c"
|
38
|
-
# UTF-32BE
|
39
|
-
data = uconvert(data, 'utf-32be', 'utf-8')
|
40
|
-
elsif data[0..3] == "\x3c\x00\x00\x00"
|
41
|
-
# UTF-32LE
|
42
|
-
data = uconvert(data, 'utf-32le', 'utf-8')
|
43
|
-
elsif data[0..3] == "\x00\x00\xfe\xff"
|
44
|
-
# UTF-32BE with BOM
|
45
|
-
data = uconvert(data[4..-1], 'utf-32BE', 'utf-8')
|
46
|
-
elsif data[0..3] == "\xff\xfe\x00\x00"
|
47
|
-
# UTF-32LE with BOM
|
48
|
-
data = uconvert(data[4..-1], 'utf-32LE', 'utf-8')
|
49
|
-
elsif data[0..2] == "\xef\xbb\xbf"
|
50
|
-
# UTF-8 with BOM
|
51
|
-
data = data[3..-1]
|
52
|
-
else
|
53
|
-
# ASCII-compatible
|
54
|
-
end
|
55
|
-
return data
|
56
|
-
end
|
57
|
-
|
58
|
-
def scrape_headers(xmlfile)
|
59
|
-
# Called by the server
|
60
|
-
xm = open(xmlfile)
|
61
|
-
data = xm.read
|
62
|
-
htaccess = File.dirname(xmlfile)+"/.htaccess"
|
63
|
-
xml_headers = {}
|
64
|
-
server_headers = {}
|
65
|
-
the_type = nil
|
66
|
-
if File.exists? htaccess
|
67
|
-
fn = xm.path.split(File::Separator)[-1] # I can't find the right method for this
|
68
|
-
ht_file = open(htaccess)
|
69
|
-
type_match = ht_file.read.match(/^\s*<Files\s+#{fn}>\s*\n\s*AddType\s+(.*?)\s+.xml/m)
|
70
|
-
the_type = type_match[1].strip.gsub(/^("|')/,'').gsub(/("|')$/,'').strip if type_match and type_match[1]
|
71
|
-
if type_match and the_type
|
72
|
-
#content_type, charset = type_match[1].split(';')
|
73
|
-
server_headers["Content-Type"] = the_type
|
74
|
-
end
|
75
|
-
end
|
76
|
-
data = translate_data(data)
|
77
|
-
da = data.scan /^Header:\s*([^:]+):(.+)\s$/
|
78
|
-
unless da.nil? or da.empty?
|
79
|
-
da.flatten!
|
80
|
-
da.each{|e| e.strip!;e.gsub!(/(Content-type|content-type|content-Type)/, "Content-Type")}
|
81
|
-
xml_headers = Hash[*da] # Asterisk magic!
|
82
|
-
end
|
83
|
-
return xml_headers.merge(server_headers)
|
84
|
-
end
|
85
|
-
|
86
|
-
def scrape_assertion_strings(xmlfile)
|
87
|
-
# Called by the testing client
|
88
|
-
data = open(xmlfile).read
|
89
|
-
data = translate_data(data)
|
90
|
-
test = data.scan /Description:\s*(.*?)\s*Expect:\s*(.*)\s*-->/
|
91
|
-
description, evalString = test.first.map{ |s| s.strip }
|
92
|
-
|
93
|
-
# Here we translate the expected values in Python to Ruby
|
94
|
-
evalString.gsub!(/\bu'(.*?)'/) do |m|
|
95
|
-
esc = $1.to_s.dup
|
96
|
-
esc.gsub!(/\\u([0-9a-fA-F]{4})/){ |m| [$1.hex].pack('U*') }
|
97
|
-
" '"+esc+"'"
|
98
|
-
end
|
99
|
-
evalString.gsub!(/\bu"(.*?)"/) do |m|
|
100
|
-
esc = $1.to_s.dup
|
101
|
-
esc.gsub!(/\\u([0-9a-fA-F]{4})/){ |m| [$1.hex].pack('U*') }
|
102
|
-
" \""+esc+"\""
|
103
|
-
end
|
104
|
-
# The above does the following: u'string' => 'string'
|
105
|
-
# u'ba\u20acha' => 'ba€ha' # Same for double quoted strings
|
106
|
-
|
107
|
-
evalString.gsub!(/\\x([0-9a-fA-F]{2})/){ |m| [$1.hex].pack('U*') } # "ba\xa3la" => "ba£la"
|
108
|
-
evalString.gsub! /'\s*:\s+/, "' => " # {'foo': 'bar'} => {'foo' => 'bar'}
|
109
|
-
evalString.gsub! /"\s*:\s+/, "\" => " # {"foo": 'bar'} => {"foo" => 'bar'}
|
110
|
-
evalString.gsub! /\=\s*\((.*?)\)/, '= [\1]' # = (2004, 12, 4) => = [2004, 12, 4]
|
111
|
-
evalString.gsub!(/"""(.*?)"""/) do # """<a b="foo">""" => "<a b="foo">"
|
112
|
-
"\""+$1.gsub!(/"/,"\\\"")+"\"" # haha, ugly!
|
113
|
-
end
|
114
|
-
evalString.gsub! /(\w|\])\s*\=\= 0\s*$/, '\1 == false' # ] == 0 => ] == false
|
115
|
-
evalString.gsub! /(\w|\])\s*\=\= 1\s*$/, '\1 == true' # ] == 1 => ] == true
|
116
|
-
evalString.gsub! /len\((.*?)\)\s*\=\=\s*(\d{1,3})/, '\1.length == \2' # len(ary) == 1 => ary.length == 1
|
117
|
-
evalString.gsub! /None/, "nil" # None => nil # well, duh
|
118
|
-
return description, evalString
|
119
|
-
end
|
120
|
-
|
121
|
-
class FeedParserTestRequestHandler < Mongrel::DirHandler
|
122
|
-
def process(request, response)
|
123
|
-
req_method = request.params[Mongrel::Const::REQUEST_METHOD] || Mongrel::Const::GET
|
124
|
-
req_path = can_serve request.params[Mongrel::Const::PATH_INFO]
|
125
|
-
if not req_path
|
126
|
-
# not found, return a 404
|
127
|
-
response.start(404) do |head, out|
|
128
|
-
out << "File not found"
|
129
|
-
end
|
130
|
-
else
|
131
|
-
begin
|
132
|
-
if File.directory? req_path
|
133
|
-
send_dir_listing(request.params[Mongrel::Const::REQUEST_URI], req_path, response)
|
134
|
-
elsif req_method == Mongrel::Const::HEAD
|
135
|
-
response.start do |head,out|
|
136
|
-
xml_head = scrape_headers(req_path)
|
137
|
-
xml_head.each_key{|k| head[k] = xml_head[k] }
|
138
|
-
end
|
139
|
-
|
140
|
-
send_file(req_path, request, response, true)
|
141
|
-
elsif req_method == Mongrel::Const::GET
|
142
|
-
response.start do |head,out|
|
143
|
-
xml_head = scrape_headers(req_path)
|
144
|
-
xml_head.each_key{|k| head[k] = xml_head[k] }
|
145
|
-
end
|
146
|
-
|
147
|
-
send_file(req_path, request, response, false)
|
148
|
-
else
|
149
|
-
response.start(403) {|head,out| out.write(ONLY_HEAD_GET) }
|
150
|
-
end
|
151
|
-
rescue => details
|
152
|
-
STDERR.puts "Error sending file #{req_path}: #{details}"
|
153
|
-
end
|
154
|
-
end
|
155
|
-
end
|
156
|
-
end
|
157
|
-
|
158
|
-
|
159
|
-
class XMLTests < Test::Unit::TestCase
|
160
|
-
# Empty, but here for clarity
|
161
|
-
def setup
|
162
|
-
end
|
163
|
-
def teardown
|
164
|
-
end
|
165
|
-
end
|
6
|
+
require File.join(File.dirname(__FILE__),'rfeedparser_test_helper')
|
166
7
|
|
167
8
|
# default methods to be public
|
168
9
|
XMLTests.send(:public)
|
169
10
|
# add one unit test for each file
|
170
|
-
Dir[
|
171
|
-
|
172
|
-
methname = "tests_"+xmlfile.gsub('/','_').sub('.xml','')
|
11
|
+
Dir["#{File.dirname(__FILE__)}/**/*.xml"].each do |xmlfile|
|
12
|
+
methname = "tests_"+xmlfile.gsub('./', '').gsub('/','_').sub('.xml','')
|
173
13
|
XMLTests.send(:define_method, methname) {
|
174
14
|
|
175
15
|
options = {}
|
@@ -184,7 +24,7 @@ Dir['**/*.xml'].each do |xmlfile|
|
|
184
24
|
fp = FeedParser.parse("http://127.0.0.1:#{$PORT}/#{xmlfile}", options)
|
185
25
|
# I should point out that the 'compatible' arg is not necessary,
|
186
26
|
# but probably will be in the future if we decide to change the default.
|
187
|
-
|
27
|
+
|
188
28
|
description, evalString = scrape_assertion_strings(xmlfile)
|
189
29
|
assert fp.instance_eval(evalString), description.inspect
|
190
30
|
}
|
@@ -192,5 +32,6 @@ end
|
|
192
32
|
# Start up the mongrel server and tell it how to send the tests
|
193
33
|
server = Mongrel::HttpServer.new("0.0.0.0",$PORT)
|
194
34
|
Mongrel::DirHandler::add_mime_type('.xml','application/xml')
|
35
|
+
Mongrel::DirHandler::add_mime_type('.xml_redirect','application/xml')
|
195
36
|
server.register("/", FeedParserTestRequestHandler.new("."))
|
196
37
|
server.run
|