feedtools 0.2.22 → 0.2.23
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +28 -0
- data/README +23 -2
- data/db/migration.rb +19 -0
- data/db/schema.mysql.sql +1 -1
- data/db/schema.postgresql.sql +1 -1
- data/db/schema.sqlite.sql +1 -1
- data/lib/feed_tools.rb +71 -388
- data/lib/feed_tools/database_feed_cache.rb +4 -3
- data/lib/feed_tools/feed.rb +809 -607
- data/lib/feed_tools/feed_item.rb +551 -574
- data/lib/feed_tools/feed_structures.rb +252 -0
- data/lib/feed_tools/helpers/feed_tools_helper.rb +6 -5
- data/lib/feed_tools/helpers/generic_helper.rb +16 -158
- data/lib/feed_tools/helpers/html_helper.rb +629 -0
- data/lib/feed_tools/helpers/retrieval_helper.rb +5 -0
- data/lib/feed_tools/helpers/uri_helper.rb +223 -0
- data/lib/feed_tools/helpers/xml_helper.rb +239 -0
- data/rakefile +10 -237
- data/test/unit/amp_test.rb +102 -94
- data/test/unit/atom_test.rb +239 -6
- data/test/unit/cache_test.rb +1 -1
- data/test/unit/encoding_test.rb +5 -5
- data/test/unit/generation_test.rb +34 -1
- data/test/unit/helper_test.rb +111 -17
- data/test/unit/rss_test.rb +21 -2
- metadata +7 -3
- data/lib/feed_tools/helpers/module_helper.rb +0 -27
data/test/unit/helper_test.rb
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
require 'test/unit'
|
2
2
|
require 'feed_tools'
|
3
|
+
require 'feed_tools/helpers/html_helper'
|
4
|
+
require 'feed_tools/helpers/xml_helper'
|
3
5
|
require 'feed_tools/helpers/feed_tools_helper'
|
4
6
|
|
5
7
|
class HelperTest < Test::Unit::TestCase
|
6
8
|
include FeedTools::FeedToolsHelper
|
7
|
-
include FeedTools::GenericHelper
|
8
9
|
|
9
10
|
def setup
|
10
11
|
FeedTools.reset_configurations
|
@@ -24,7 +25,7 @@ class HelperTest < Test::Unit::TestCase
|
|
24
25
|
</RoOt>
|
25
26
|
XML
|
26
27
|
xml_doc = REXML::Document.new(xml)
|
27
|
-
test_string = try_xpaths(xml_doc, [
|
28
|
+
test_string = FeedTools::XmlHelper.try_xpaths(xml_doc, [
|
28
29
|
"ROOT/child/text()"
|
29
30
|
], :select_result_value => true)
|
30
31
|
assert_equal("Test String #1", test_string)
|
@@ -35,7 +36,7 @@ class HelperTest < Test::Unit::TestCase
|
|
35
36
|
</root>
|
36
37
|
XML
|
37
38
|
xml_doc = REXML::Document.new(xml)
|
38
|
-
test_string = try_xpaths(xml_doc, [
|
39
|
+
test_string = FeedTools::XmlHelper.try_xpaths(xml_doc, [
|
39
40
|
"ROOT/testnamespace:child/text()"
|
40
41
|
], :select_result_value => true)
|
41
42
|
assert_equal("Test String #2", test_string)
|
@@ -46,7 +47,7 @@ class HelperTest < Test::Unit::TestCase
|
|
46
47
|
</RoOt>
|
47
48
|
XML
|
48
49
|
xml_doc = REXML::Document.new(xml)
|
49
|
-
test_string = try_xpaths(xml_doc, [
|
50
|
+
test_string = FeedTools::XmlHelper.try_xpaths(xml_doc, [
|
50
51
|
"ROOT/child/@ATTRIB"
|
51
52
|
], :select_result_value => true)
|
52
53
|
assert_equal("Test String #3", test_string)
|
@@ -57,7 +58,7 @@ class HelperTest < Test::Unit::TestCase
|
|
57
58
|
</RoOt>
|
58
59
|
XML
|
59
60
|
xml_doc = REXML::Document.new(xml)
|
60
|
-
test_string = try_xpaths(xml_doc, [
|
61
|
+
test_string = FeedTools::XmlHelper.try_xpaths(xml_doc, [
|
61
62
|
"ROOT/child/@testnamespace:ATTRIB"
|
62
63
|
], :select_result_value => true)
|
63
64
|
assert_equal("Test String #4", test_string)
|
@@ -71,36 +72,47 @@ class HelperTest < Test::Unit::TestCase
|
|
71
72
|
|
72
73
|
def test_normalize_url
|
73
74
|
assert_equal("http://slashdot.org/",
|
74
|
-
FeedTools.normalize_url("slashdot.org"))
|
75
|
+
FeedTools::UriHelper.normalize_url("slashdot.org"))
|
75
76
|
assert_equal("http://example.com/index.php",
|
76
|
-
FeedTools.normalize_url("example.com/index.php"))
|
77
|
+
FeedTools::UriHelper.normalize_url("example.com/index.php"))
|
77
78
|
|
78
79
|
# Test windows-style file: protocol normalization
|
79
80
|
assert_equal("file:///c:/windows/My%20Documents%20100%20/foo.txt",
|
80
|
-
FeedTools.normalize_url("c:\\windows\\My Documents 100%20\\foo.txt"))
|
81
|
+
FeedTools::UriHelper.normalize_url("c:\\windows\\My Documents 100%20\\foo.txt"))
|
81
82
|
assert_equal("file:///c:/windows/My%20Documents%20100%20/foo.txt",
|
82
|
-
FeedTools.normalize_url(
|
83
|
+
FeedTools::UriHelper.normalize_url(
|
83
84
|
"file://c:\\windows\\My Documents 100%20\\foo.txt"))
|
84
85
|
assert_equal("file:///c:/windows/My%20Documents%20100%20/foo.txt",
|
85
|
-
FeedTools.normalize_url(
|
86
|
+
FeedTools::UriHelper.normalize_url(
|
86
87
|
"file:///c|/windows/My%20Documents%20100%20/foo.txt"))
|
87
88
|
assert_equal("file:///c:/windows/My%20Documents%20100%20/foo.txt",
|
88
|
-
FeedTools.normalize_url(
|
89
|
+
FeedTools::UriHelper.normalize_url(
|
89
90
|
"file:///c:/windows/My%20Documents%20100%20/foo.txt"))
|
91
|
+
if FeedTools::UriHelper.idn_enabled?
|
92
|
+
# Test internationalized domain names
|
93
|
+
assert_equal(
|
94
|
+
"http://www.xn--8ws00zhy3a.com/atomtests/iri/everything.atom",
|
95
|
+
FeedTools::UriHelper.normalize_url(
|
96
|
+
"http://www.詹姆斯.com/atomtests/iri/everything.atom"))
|
97
|
+
assert_equal(
|
98
|
+
"http://www.xn--8ws00zhy3a.com/atomtests/iri/%E8%A9%B9.html",
|
99
|
+
FeedTools::UriHelper.normalize_url(
|
100
|
+
"http://www.詹姆斯.com/atomtests/iri/詹.html"))
|
101
|
+
end
|
90
102
|
end
|
91
103
|
|
92
104
|
def test_sanitize_html
|
93
|
-
assert_equal("<!--foo-->", FeedTools.sanitize_html("<!--foo-->"))
|
105
|
+
assert_equal("<!--foo-->", FeedTools::HtmlHelper.sanitize_html("<!--foo-->"))
|
94
106
|
assert_equal("<P>Upper-case tags</P>",
|
95
|
-
FeedTools.sanitize_html("<P>Upper-case tags</P>"))
|
107
|
+
FeedTools::HtmlHelper.sanitize_html("<P>Upper-case tags</P>"))
|
96
108
|
assert_equal("<A HREF='/dev/null'>Upper-case attributes</A>",
|
97
|
-
FeedTools.sanitize_html(
|
109
|
+
FeedTools::HtmlHelper.sanitize_html(
|
98
110
|
"<A HREF='/dev/null'>Upper-case attributes</A>"))
|
99
111
|
end
|
100
112
|
|
101
113
|
def test_tidy_html
|
102
114
|
FeedTools.configurations[:tidy_enabled] = true
|
103
|
-
unless FeedTools.tidy_enabled?
|
115
|
+
unless FeedTools::HtmlHelper.tidy_enabled?
|
104
116
|
puts "\nCould not test tidy support. Libtidy couldn't be found."
|
105
117
|
else
|
106
118
|
illegal_pre = <<-EOF
|
@@ -134,15 +146,97 @@ class HelperTest < Test::Unit::TestCase
|
|
134
146
|
end
|
135
147
|
</pre>
|
136
148
|
EOF
|
137
|
-
illegal_pre_after_tidy = FeedTools.tidy_html(illegal_pre)
|
149
|
+
illegal_pre_after_tidy = FeedTools::HtmlHelper.tidy_html(illegal_pre)
|
138
150
|
assert_not_equal(nil, illegal_pre_after_tidy =~ /class HTTPIO < HTTP/,
|
139
151
|
"Tidy failed to clean up illegal chars in <pre> block.")
|
152
|
+
|
153
|
+
unescaped_utf8_characters = <<-EOF
|
154
|
+
\302\240
|
155
|
+
EOF
|
156
|
+
unescaped_utf8_characters_after_tidy =
|
157
|
+
FeedTools::HtmlHelper.tidy_html(unescaped_utf8_characters)
|
158
|
+
assert_not_equal(" ", unescaped_utf8_characters_after_tidy,
|
159
|
+
"Tidy failed to escape the unicode characters correctly.")
|
160
|
+
assert_not_equal("Â ", unescaped_utf8_characters_after_tidy,
|
161
|
+
"Tidy failed to escape the unicode characters correctly.")
|
140
162
|
end
|
141
163
|
FeedTools.configurations[:tidy_enabled] = false
|
142
164
|
end
|
143
165
|
|
144
166
|
def test_build_urn_uri
|
145
167
|
assert_equal("urn:uuid:fa6d0b87-3f36-517d-b9b7-1349f8c3fc6b",
|
146
|
-
FeedTools.build_urn_uri('http://sporkmonger.com/'))
|
168
|
+
FeedTools::UriHelper.build_urn_uri('http://sporkmonger.com/'))
|
169
|
+
end
|
170
|
+
|
171
|
+
def test_build_merged_feed
|
172
|
+
merged_feed = FeedTools.build_merged_feed([
|
173
|
+
"http://rss.slashdot.org/Slashdot/slashdot"
|
174
|
+
])
|
175
|
+
end
|
176
|
+
|
177
|
+
def test_extract_xhtml
|
178
|
+
FeedTools.configurations[:tidy_enabled] = false
|
179
|
+
|
180
|
+
xml = <<-XML
|
181
|
+
<content>
|
182
|
+
<div xmlns='http://www.w3.org/1999/xhtml'><em>Testing.</em></div>
|
183
|
+
</content>
|
184
|
+
XML
|
185
|
+
doc = REXML::Document.new(xml)
|
186
|
+
assert_equal(
|
187
|
+
"<div><em>Testing.</em></div>",
|
188
|
+
FeedTools::HtmlHelper.extract_xhtml(doc.root))
|
189
|
+
xml = <<-XML
|
190
|
+
<content xmlns:xhtml='http://www.w3.org/1999/xhtml'>
|
191
|
+
<xhtml:div><xhtml:em>Testing.</xhtml:em></xhtml:div>
|
192
|
+
</content>
|
193
|
+
XML
|
194
|
+
doc = REXML::Document.new(xml)
|
195
|
+
assert_equal(
|
196
|
+
"<div><em>Testing.</em></div>",
|
197
|
+
FeedTools::HtmlHelper.extract_xhtml(doc.root))
|
198
|
+
xml = <<-XML
|
199
|
+
<content type="xhtml" xmlns:xhtml='http://www.w3.org/1999/xhtml'>
|
200
|
+
<xhtml:div xmlns='http://hsivonen.iki.fi/FooML'>
|
201
|
+
<xhtml:ul>
|
202
|
+
<xhtml:li>XHTML List Item</xhtml:li>
|
203
|
+
</xhtml:ul>
|
204
|
+
<ul>
|
205
|
+
<li>FooML List Item</li>
|
206
|
+
</ul>
|
207
|
+
</xhtml:div>
|
208
|
+
</content>
|
209
|
+
XML
|
210
|
+
doc = REXML::Document.new(xml)
|
211
|
+
xhtml = FeedTools::HtmlHelper.extract_xhtml(doc.root)
|
212
|
+
assert((xhtml =~ /<div>/) && (xhtml =~ /<\/div>/),
|
213
|
+
"XHTML divs were not normalized properly.")
|
214
|
+
assert((xhtml =~ /hsivonen\.iki\.fi/),
|
215
|
+
"FooML namespace was not preserved.")
|
216
|
+
assert((xhtml =~ /<ul xmlns=/),
|
217
|
+
"Namespace was not placed correctly.")
|
218
|
+
|
219
|
+
FeedTools.configurations[:tidy_enabled] = true
|
220
|
+
|
221
|
+
xml = <<-XML
|
222
|
+
<content type="xhtml" xmlns:xhtml='http://www.w3.org/1999/xhtml'>
|
223
|
+
<xhtml:div xmlns='http://hsivonen.iki.fi/FooML'>
|
224
|
+
<xhtml:ul>
|
225
|
+
<xhtml:li>XHTML List Item</xhtml:li>
|
226
|
+
</xhtml:ul>
|
227
|
+
<ul>
|
228
|
+
<li>FooML List Item</li>
|
229
|
+
</ul>
|
230
|
+
</xhtml:div>
|
231
|
+
</content>
|
232
|
+
XML
|
233
|
+
doc = REXML::Document.new(xml)
|
234
|
+
xhtml = FeedTools::HtmlHelper.extract_xhtml(doc.root)
|
235
|
+
assert((xhtml =~ /<div>/) && (xhtml =~ /<\/div>/),
|
236
|
+
"XHTML divs were not normalized properly.")
|
237
|
+
assert((xhtml =~ /hsivonen\.iki\.fi/),
|
238
|
+
"FooML namespace was not preserved.")
|
239
|
+
assert((xhtml =~ /<ul xmlns=/),
|
240
|
+
"Namespace was not placed correctly.")
|
147
241
|
end
|
148
242
|
end
|
data/test/unit/rss_test.rb
CHANGED
@@ -476,8 +476,8 @@ class RssTest < Test::Unit::TestCase
|
|
476
476
|
|
477
477
|
def test_item_source
|
478
478
|
with_feed(:from_file => 'wellformed/rss/item_source.xml') { |feed|
|
479
|
-
assert_equal("http://example.com/", feed.items.first.source.
|
480
|
-
assert_equal("Example source", feed.items.first.source.
|
479
|
+
assert_equal("http://example.com/", feed.items.first.source.href)
|
480
|
+
assert_equal("Example source", feed.items.first.source.title)
|
481
481
|
}
|
482
482
|
end
|
483
483
|
|
@@ -736,4 +736,23 @@ class RssTest < Test::Unit::TestCase
|
|
736
736
|
assert_equal(1, feed.items.size)
|
737
737
|
}
|
738
738
|
end
|
739
|
+
|
740
|
+
def test_feed_item_description_plus_content_encoded
|
741
|
+
with_feed(:from_data => <<-FEED
|
742
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
743
|
+
<rss version="2.0">
|
744
|
+
<channel>
|
745
|
+
<item>
|
746
|
+
<description>Excerpt</description>
|
747
|
+
<content:encoded><![CDATA[Full Content]]></content:encoded>
|
748
|
+
</item>
|
749
|
+
</channel>
|
750
|
+
</rss>
|
751
|
+
FEED
|
752
|
+
) { |feed|
|
753
|
+
assert_equal(1, feed.items.size)
|
754
|
+
assert_equal("Excerpt", feed.items[0].summary)
|
755
|
+
assert_equal("Full Content", feed.items[0].content)
|
756
|
+
}
|
757
|
+
end
|
739
758
|
end
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
|
|
3
3
|
specification_version: 1
|
4
4
|
name: feedtools
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.2.
|
7
|
-
date: 2006-
|
6
|
+
version: 0.2.23
|
7
|
+
date: 2006-03-03 00:00:00 -05:00
|
8
8
|
summary: "Parsing, generation, and caching system for xml news feeds."
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -38,6 +38,7 @@ files:
|
|
38
38
|
- lib/feed_tools/database_feed_cache.rb
|
39
39
|
- lib/feed_tools/feed.rb
|
40
40
|
- lib/feed_tools/feed_item.rb
|
41
|
+
- lib/feed_tools/feed_structures.rb
|
41
42
|
- lib/feed_tools/helpers
|
42
43
|
- lib/feed_tools/vendor
|
43
44
|
- lib/feed_tools/helpers/debug_helper.rb
|
@@ -45,8 +46,10 @@ files:
|
|
45
46
|
- lib/feed_tools/helpers/feed_item_helper.rb
|
46
47
|
- lib/feed_tools/helpers/feed_tools_helper.rb
|
47
48
|
- lib/feed_tools/helpers/generic_helper.rb
|
48
|
-
- lib/feed_tools/helpers/
|
49
|
+
- lib/feed_tools/helpers/html_helper.rb
|
49
50
|
- lib/feed_tools/helpers/retrieval_helper.rb
|
51
|
+
- lib/feed_tools/helpers/uri_helper.rb
|
52
|
+
- lib/feed_tools/helpers/xml_helper.rb
|
50
53
|
- lib/feed_tools/vendor/htree
|
51
54
|
- lib/feed_tools/vendor/htree.rb
|
52
55
|
- lib/feed_tools/vendor/htree/container.rb
|
@@ -2856,6 +2859,7 @@ files:
|
|
2856
2859
|
- test/unit/nonstandard_test.rb
|
2857
2860
|
- test/unit/rdf_test.rb
|
2858
2861
|
- test/unit/rss_test.rb
|
2862
|
+
- db/migration.rb
|
2859
2863
|
- db/schema.mysql.sql
|
2860
2864
|
- db/schema.postgresql.sql
|
2861
2865
|
- db/schema.sqlite.sql
|
@@ -1,27 +0,0 @@
|
|
1
|
-
#--
|
2
|
-
# Copyright (c) 2005 Robert Aman
|
3
|
-
#
|
4
|
-
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
-
# a copy of this software and associated documentation files (the
|
6
|
-
# "Software"), to deal in the Software without restriction, including
|
7
|
-
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
-
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
-
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
-
# the following conditions:
|
11
|
-
#
|
12
|
-
# The above copyright notice and this permission notice shall be
|
13
|
-
# included in all copies or substantial portions of the Software.
|
14
|
-
#
|
15
|
-
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
-
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
-
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
-
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
-
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
-
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
-
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
-
#++
|
23
|
-
|
24
|
-
module FeedTools
|
25
|
-
module FeedToolsModuleHelper
|
26
|
-
end
|
27
|
-
end
|