feedtools 0.2.22 → 0.2.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +28 -0
- data/README +23 -2
- data/db/migration.rb +19 -0
- data/db/schema.mysql.sql +1 -1
- data/db/schema.postgresql.sql +1 -1
- data/db/schema.sqlite.sql +1 -1
- data/lib/feed_tools.rb +71 -388
- data/lib/feed_tools/database_feed_cache.rb +4 -3
- data/lib/feed_tools/feed.rb +809 -607
- data/lib/feed_tools/feed_item.rb +551 -574
- data/lib/feed_tools/feed_structures.rb +252 -0
- data/lib/feed_tools/helpers/feed_tools_helper.rb +6 -5
- data/lib/feed_tools/helpers/generic_helper.rb +16 -158
- data/lib/feed_tools/helpers/html_helper.rb +629 -0
- data/lib/feed_tools/helpers/retrieval_helper.rb +5 -0
- data/lib/feed_tools/helpers/uri_helper.rb +223 -0
- data/lib/feed_tools/helpers/xml_helper.rb +239 -0
- data/rakefile +10 -237
- data/test/unit/amp_test.rb +102 -94
- data/test/unit/atom_test.rb +239 -6
- data/test/unit/cache_test.rb +1 -1
- data/test/unit/encoding_test.rb +5 -5
- data/test/unit/generation_test.rb +34 -1
- data/test/unit/helper_test.rb +111 -17
- data/test/unit/rss_test.rb +21 -2
- metadata +7 -3
- data/lib/feed_tools/helpers/module_helper.rb +0 -27
data/test/unit/helper_test.rb
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
require 'test/unit'
|
2
2
|
require 'feed_tools'
|
3
|
+
require 'feed_tools/helpers/html_helper'
|
4
|
+
require 'feed_tools/helpers/xml_helper'
|
3
5
|
require 'feed_tools/helpers/feed_tools_helper'
|
4
6
|
|
5
7
|
class HelperTest < Test::Unit::TestCase
|
6
8
|
include FeedTools::FeedToolsHelper
|
7
|
-
include FeedTools::GenericHelper
|
8
9
|
|
9
10
|
def setup
|
10
11
|
FeedTools.reset_configurations
|
@@ -24,7 +25,7 @@ class HelperTest < Test::Unit::TestCase
|
|
24
25
|
</RoOt>
|
25
26
|
XML
|
26
27
|
xml_doc = REXML::Document.new(xml)
|
27
|
-
test_string = try_xpaths(xml_doc, [
|
28
|
+
test_string = FeedTools::XmlHelper.try_xpaths(xml_doc, [
|
28
29
|
"ROOT/child/text()"
|
29
30
|
], :select_result_value => true)
|
30
31
|
assert_equal("Test String #1", test_string)
|
@@ -35,7 +36,7 @@ class HelperTest < Test::Unit::TestCase
|
|
35
36
|
</root>
|
36
37
|
XML
|
37
38
|
xml_doc = REXML::Document.new(xml)
|
38
|
-
test_string = try_xpaths(xml_doc, [
|
39
|
+
test_string = FeedTools::XmlHelper.try_xpaths(xml_doc, [
|
39
40
|
"ROOT/testnamespace:child/text()"
|
40
41
|
], :select_result_value => true)
|
41
42
|
assert_equal("Test String #2", test_string)
|
@@ -46,7 +47,7 @@ class HelperTest < Test::Unit::TestCase
|
|
46
47
|
</RoOt>
|
47
48
|
XML
|
48
49
|
xml_doc = REXML::Document.new(xml)
|
49
|
-
test_string = try_xpaths(xml_doc, [
|
50
|
+
test_string = FeedTools::XmlHelper.try_xpaths(xml_doc, [
|
50
51
|
"ROOT/child/@ATTRIB"
|
51
52
|
], :select_result_value => true)
|
52
53
|
assert_equal("Test String #3", test_string)
|
@@ -57,7 +58,7 @@ class HelperTest < Test::Unit::TestCase
|
|
57
58
|
</RoOt>
|
58
59
|
XML
|
59
60
|
xml_doc = REXML::Document.new(xml)
|
60
|
-
test_string = try_xpaths(xml_doc, [
|
61
|
+
test_string = FeedTools::XmlHelper.try_xpaths(xml_doc, [
|
61
62
|
"ROOT/child/@testnamespace:ATTRIB"
|
62
63
|
], :select_result_value => true)
|
63
64
|
assert_equal("Test String #4", test_string)
|
@@ -71,36 +72,47 @@ class HelperTest < Test::Unit::TestCase
|
|
71
72
|
|
72
73
|
def test_normalize_url
|
73
74
|
assert_equal("http://slashdot.org/",
|
74
|
-
FeedTools.normalize_url("slashdot.org"))
|
75
|
+
FeedTools::UriHelper.normalize_url("slashdot.org"))
|
75
76
|
assert_equal("http://example.com/index.php",
|
76
|
-
FeedTools.normalize_url("example.com/index.php"))
|
77
|
+
FeedTools::UriHelper.normalize_url("example.com/index.php"))
|
77
78
|
|
78
79
|
# Test windows-style file: protocol normalization
|
79
80
|
assert_equal("file:///c:/windows/My%20Documents%20100%20/foo.txt",
|
80
|
-
FeedTools.normalize_url("c:\\windows\\My Documents 100%20\\foo.txt"))
|
81
|
+
FeedTools::UriHelper.normalize_url("c:\\windows\\My Documents 100%20\\foo.txt"))
|
81
82
|
assert_equal("file:///c:/windows/My%20Documents%20100%20/foo.txt",
|
82
|
-
FeedTools.normalize_url(
|
83
|
+
FeedTools::UriHelper.normalize_url(
|
83
84
|
"file://c:\\windows\\My Documents 100%20\\foo.txt"))
|
84
85
|
assert_equal("file:///c:/windows/My%20Documents%20100%20/foo.txt",
|
85
|
-
FeedTools.normalize_url(
|
86
|
+
FeedTools::UriHelper.normalize_url(
|
86
87
|
"file:///c|/windows/My%20Documents%20100%20/foo.txt"))
|
87
88
|
assert_equal("file:///c:/windows/My%20Documents%20100%20/foo.txt",
|
88
|
-
FeedTools.normalize_url(
|
89
|
+
FeedTools::UriHelper.normalize_url(
|
89
90
|
"file:///c:/windows/My%20Documents%20100%20/foo.txt"))
|
91
|
+
if FeedTools::UriHelper.idn_enabled?
|
92
|
+
# Test internationalized domain names
|
93
|
+
assert_equal(
|
94
|
+
"http://www.xn--8ws00zhy3a.com/atomtests/iri/everything.atom",
|
95
|
+
FeedTools::UriHelper.normalize_url(
|
96
|
+
"http://www.詹姆斯.com/atomtests/iri/everything.atom"))
|
97
|
+
assert_equal(
|
98
|
+
"http://www.xn--8ws00zhy3a.com/atomtests/iri/%E8%A9%B9.html",
|
99
|
+
FeedTools::UriHelper.normalize_url(
|
100
|
+
"http://www.詹姆斯.com/atomtests/iri/詹.html"))
|
101
|
+
end
|
90
102
|
end
|
91
103
|
|
92
104
|
def test_sanitize_html
|
93
|
-
assert_equal("<!--foo-->", FeedTools.sanitize_html("<!--foo-->"))
|
105
|
+
assert_equal("<!--foo-->", FeedTools::HtmlHelper.sanitize_html("<!--foo-->"))
|
94
106
|
assert_equal("<P>Upper-case tags</P>",
|
95
|
-
FeedTools.sanitize_html("<P>Upper-case tags</P>"))
|
107
|
+
FeedTools::HtmlHelper.sanitize_html("<P>Upper-case tags</P>"))
|
96
108
|
assert_equal("<A HREF='/dev/null'>Upper-case attributes</A>",
|
97
|
-
FeedTools.sanitize_html(
|
109
|
+
FeedTools::HtmlHelper.sanitize_html(
|
98
110
|
"<A HREF='/dev/null'>Upper-case attributes</A>"))
|
99
111
|
end
|
100
112
|
|
101
113
|
def test_tidy_html
|
102
114
|
FeedTools.configurations[:tidy_enabled] = true
|
103
|
-
unless FeedTools.tidy_enabled?
|
115
|
+
unless FeedTools::HtmlHelper.tidy_enabled?
|
104
116
|
puts "\nCould not test tidy support. Libtidy couldn't be found."
|
105
117
|
else
|
106
118
|
illegal_pre = <<-EOF
|
@@ -134,15 +146,97 @@ class HelperTest < Test::Unit::TestCase
|
|
134
146
|
end
|
135
147
|
</pre>
|
136
148
|
EOF
|
137
|
-
illegal_pre_after_tidy = FeedTools.tidy_html(illegal_pre)
|
149
|
+
illegal_pre_after_tidy = FeedTools::HtmlHelper.tidy_html(illegal_pre)
|
138
150
|
assert_not_equal(nil, illegal_pre_after_tidy =~ /class HTTPIO < HTTP/,
|
139
151
|
"Tidy failed to clean up illegal chars in <pre> block.")
|
152
|
+
|
153
|
+
unescaped_utf8_characters = <<-EOF
|
154
|
+
\302\240
|
155
|
+
EOF
|
156
|
+
unescaped_utf8_characters_after_tidy =
|
157
|
+
FeedTools::HtmlHelper.tidy_html(unescaped_utf8_characters)
|
158
|
+
assert_not_equal(" ", unescaped_utf8_characters_after_tidy,
|
159
|
+
"Tidy failed to escape the unicode characters correctly.")
|
160
|
+
assert_not_equal("Â ", unescaped_utf8_characters_after_tidy,
|
161
|
+
"Tidy failed to escape the unicode characters correctly.")
|
140
162
|
end
|
141
163
|
FeedTools.configurations[:tidy_enabled] = false
|
142
164
|
end
|
143
165
|
|
144
166
|
def test_build_urn_uri
|
145
167
|
assert_equal("urn:uuid:fa6d0b87-3f36-517d-b9b7-1349f8c3fc6b",
|
146
|
-
FeedTools.build_urn_uri('http://sporkmonger.com/'))
|
168
|
+
FeedTools::UriHelper.build_urn_uri('http://sporkmonger.com/'))
|
169
|
+
end
|
170
|
+
|
171
|
+
def test_build_merged_feed
|
172
|
+
merged_feed = FeedTools.build_merged_feed([
|
173
|
+
"http://rss.slashdot.org/Slashdot/slashdot"
|
174
|
+
])
|
175
|
+
end
|
176
|
+
|
177
|
+
def test_extract_xhtml
|
178
|
+
FeedTools.configurations[:tidy_enabled] = false
|
179
|
+
|
180
|
+
xml = <<-XML
|
181
|
+
<content>
|
182
|
+
<div xmlns='http://www.w3.org/1999/xhtml'><em>Testing.</em></div>
|
183
|
+
</content>
|
184
|
+
XML
|
185
|
+
doc = REXML::Document.new(xml)
|
186
|
+
assert_equal(
|
187
|
+
"<div><em>Testing.</em></div>",
|
188
|
+
FeedTools::HtmlHelper.extract_xhtml(doc.root))
|
189
|
+
xml = <<-XML
|
190
|
+
<content xmlns:xhtml='http://www.w3.org/1999/xhtml'>
|
191
|
+
<xhtml:div><xhtml:em>Testing.</xhtml:em></xhtml:div>
|
192
|
+
</content>
|
193
|
+
XML
|
194
|
+
doc = REXML::Document.new(xml)
|
195
|
+
assert_equal(
|
196
|
+
"<div><em>Testing.</em></div>",
|
197
|
+
FeedTools::HtmlHelper.extract_xhtml(doc.root))
|
198
|
+
xml = <<-XML
|
199
|
+
<content type="xhtml" xmlns:xhtml='http://www.w3.org/1999/xhtml'>
|
200
|
+
<xhtml:div xmlns='http://hsivonen.iki.fi/FooML'>
|
201
|
+
<xhtml:ul>
|
202
|
+
<xhtml:li>XHTML List Item</xhtml:li>
|
203
|
+
</xhtml:ul>
|
204
|
+
<ul>
|
205
|
+
<li>FooML List Item</li>
|
206
|
+
</ul>
|
207
|
+
</xhtml:div>
|
208
|
+
</content>
|
209
|
+
XML
|
210
|
+
doc = REXML::Document.new(xml)
|
211
|
+
xhtml = FeedTools::HtmlHelper.extract_xhtml(doc.root)
|
212
|
+
assert((xhtml =~ /<div>/) && (xhtml =~ /<\/div>/),
|
213
|
+
"XHTML divs were not normalized properly.")
|
214
|
+
assert((xhtml =~ /hsivonen\.iki\.fi/),
|
215
|
+
"FooML namespace was not preserved.")
|
216
|
+
assert((xhtml =~ /<ul xmlns=/),
|
217
|
+
"Namespace was not placed correctly.")
|
218
|
+
|
219
|
+
FeedTools.configurations[:tidy_enabled] = true
|
220
|
+
|
221
|
+
xml = <<-XML
|
222
|
+
<content type="xhtml" xmlns:xhtml='http://www.w3.org/1999/xhtml'>
|
223
|
+
<xhtml:div xmlns='http://hsivonen.iki.fi/FooML'>
|
224
|
+
<xhtml:ul>
|
225
|
+
<xhtml:li>XHTML List Item</xhtml:li>
|
226
|
+
</xhtml:ul>
|
227
|
+
<ul>
|
228
|
+
<li>FooML List Item</li>
|
229
|
+
</ul>
|
230
|
+
</xhtml:div>
|
231
|
+
</content>
|
232
|
+
XML
|
233
|
+
doc = REXML::Document.new(xml)
|
234
|
+
xhtml = FeedTools::HtmlHelper.extract_xhtml(doc.root)
|
235
|
+
assert((xhtml =~ /<div>/) && (xhtml =~ /<\/div>/),
|
236
|
+
"XHTML divs were not normalized properly.")
|
237
|
+
assert((xhtml =~ /hsivonen\.iki\.fi/),
|
238
|
+
"FooML namespace was not preserved.")
|
239
|
+
assert((xhtml =~ /<ul xmlns=/),
|
240
|
+
"Namespace was not placed correctly.")
|
147
241
|
end
|
148
242
|
end
|
data/test/unit/rss_test.rb
CHANGED
@@ -476,8 +476,8 @@ class RssTest < Test::Unit::TestCase
|
|
476
476
|
|
477
477
|
def test_item_source
|
478
478
|
with_feed(:from_file => 'wellformed/rss/item_source.xml') { |feed|
|
479
|
-
assert_equal("http://example.com/", feed.items.first.source.
|
480
|
-
assert_equal("Example source", feed.items.first.source.
|
479
|
+
assert_equal("http://example.com/", feed.items.first.source.href)
|
480
|
+
assert_equal("Example source", feed.items.first.source.title)
|
481
481
|
}
|
482
482
|
end
|
483
483
|
|
@@ -736,4 +736,23 @@ class RssTest < Test::Unit::TestCase
|
|
736
736
|
assert_equal(1, feed.items.size)
|
737
737
|
}
|
738
738
|
end
|
739
|
+
|
740
|
+
def test_feed_item_description_plus_content_encoded
|
741
|
+
with_feed(:from_data => <<-FEED
|
742
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
743
|
+
<rss version="2.0">
|
744
|
+
<channel>
|
745
|
+
<item>
|
746
|
+
<description>Excerpt</description>
|
747
|
+
<content:encoded><![CDATA[Full Content]]></content:encoded>
|
748
|
+
</item>
|
749
|
+
</channel>
|
750
|
+
</rss>
|
751
|
+
FEED
|
752
|
+
) { |feed|
|
753
|
+
assert_equal(1, feed.items.size)
|
754
|
+
assert_equal("Excerpt", feed.items[0].summary)
|
755
|
+
assert_equal("Full Content", feed.items[0].content)
|
756
|
+
}
|
757
|
+
end
|
739
758
|
end
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
|
|
3
3
|
specification_version: 1
|
4
4
|
name: feedtools
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.2.
|
7
|
-
date: 2006-
|
6
|
+
version: 0.2.23
|
7
|
+
date: 2006-03-03 00:00:00 -05:00
|
8
8
|
summary: "Parsing, generation, and caching system for xml news feeds."
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -38,6 +38,7 @@ files:
|
|
38
38
|
- lib/feed_tools/database_feed_cache.rb
|
39
39
|
- lib/feed_tools/feed.rb
|
40
40
|
- lib/feed_tools/feed_item.rb
|
41
|
+
- lib/feed_tools/feed_structures.rb
|
41
42
|
- lib/feed_tools/helpers
|
42
43
|
- lib/feed_tools/vendor
|
43
44
|
- lib/feed_tools/helpers/debug_helper.rb
|
@@ -45,8 +46,10 @@ files:
|
|
45
46
|
- lib/feed_tools/helpers/feed_item_helper.rb
|
46
47
|
- lib/feed_tools/helpers/feed_tools_helper.rb
|
47
48
|
- lib/feed_tools/helpers/generic_helper.rb
|
48
|
-
- lib/feed_tools/helpers/
|
49
|
+
- lib/feed_tools/helpers/html_helper.rb
|
49
50
|
- lib/feed_tools/helpers/retrieval_helper.rb
|
51
|
+
- lib/feed_tools/helpers/uri_helper.rb
|
52
|
+
- lib/feed_tools/helpers/xml_helper.rb
|
50
53
|
- lib/feed_tools/vendor/htree
|
51
54
|
- lib/feed_tools/vendor/htree.rb
|
52
55
|
- lib/feed_tools/vendor/htree/container.rb
|
@@ -2856,6 +2859,7 @@ files:
|
|
2856
2859
|
- test/unit/nonstandard_test.rb
|
2857
2860
|
- test/unit/rdf_test.rb
|
2858
2861
|
- test/unit/rss_test.rb
|
2862
|
+
- db/migration.rb
|
2859
2863
|
- db/schema.mysql.sql
|
2860
2864
|
- db/schema.postgresql.sql
|
2861
2865
|
- db/schema.sqlite.sql
|
@@ -1,27 +0,0 @@
|
|
1
|
-
#--
|
2
|
-
# Copyright (c) 2005 Robert Aman
|
3
|
-
#
|
4
|
-
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
-
# a copy of this software and associated documentation files (the
|
6
|
-
# "Software"), to deal in the Software without restriction, including
|
7
|
-
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
-
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
-
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
-
# the following conditions:
|
11
|
-
#
|
12
|
-
# The above copyright notice and this permission notice shall be
|
13
|
-
# included in all copies or substantial portions of the Software.
|
14
|
-
#
|
15
|
-
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
-
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
-
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
-
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
-
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
-
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
-
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
-
#++
|
23
|
-
|
24
|
-
module FeedTools
|
25
|
-
module FeedToolsModuleHelper
|
26
|
-
end
|
27
|
-
end
|