feed-normalizer 1.5.1 → 1.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +48 -48
 - data/License.txt +27 -27
 - data/Manifest.txt +18 -19
 - data/README.txt +63 -63
 - data/Rakefile +29 -25
 - data/lib/feed-normalizer.rb +149 -149
 - data/lib/html-cleaner.rb +181 -190
 - data/lib/parsers/rss.rb +110 -95
 - data/lib/parsers/simple-rss.rb +138 -137
 - data/lib/structures.rb +245 -244
 - data/test/data/atom03.xml +128 -127
 - data/test/data/atom10.xml +114 -112
 - data/test/data/rdf10.xml +1498 -1498
 - data/test/data/rss20.xml +64 -63
 - data/test/data/rss20diff.xml +59 -59
 - data/test/data/rss20diff_short.xml +51 -51
 - data/test/test_feednormalizer.rb +265 -267
 - data/test/test_htmlcleaner.rb +156 -155
 - metadata +99 -63
 - data/test/test_all.rb +0 -6
 
    
        data/test/data/rss20.xml
    CHANGED
    
    | 
         @@ -1,63 +1,64 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            <?xml version="1.0" encoding="ISO-8859-1" ?>
         
     | 
| 
       2 
     | 
    
         
            -
            <?xml-stylesheet title="XSL_formatting" type="text/xsl" href="/shared/bsp/xsl/rss/nolsol.xsl"?>
         
     | 
| 
       3 
     | 
    
         
            -
            <rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/">
         
     | 
| 
       4 
     | 
    
         
            -
              <channel>
         
     | 
| 
       5 
     | 
    
         
            -
                <title>BBC News | Technology | UK Edition</title>
         
     | 
| 
       6 
     | 
    
         
            -
                <link>http://news.bbc.co.uk/go/rss/-/1/hi/technology/default.stm</link>
         
     | 
| 
       7 
     | 
    
         
            -
                <description>Visit BBC News for up-to-the-minute news, breaking news, video, audio and feature stories. BBC News provides trusted World and UK news as well as local and regional perspectives. Also entertainment, business, science, technology and health news.</description>
         
     | 
| 
       8 
     | 
    
         
            -
                <language>en-gb</language>
         
     | 
| 
       9 
     | 
    
         
            -
                <lastBuildDate>Sat, 09 Sep 2006 14:57:06 GMT</lastBuildDate>
         
     | 
| 
       10 
     | 
    
         
            -
                <copyright>Copyright: (C) British Broadcasting Corporation, see http://news.bbc.co.uk/1/hi/help/rss/4498287.stm for terms and conditions of reuse</copyright>
         
     | 
| 
       11 
     | 
    
         
            -
                <docs>http://www.bbc.co.uk/syndication/</docs>
         
     | 
| 
       12 
     | 
    
         
            -
                <ttl>15</ttl>
         
     | 
| 
       13 
     | 
    
         
            -
                <skipHours>
         
     | 
| 
       14 
     | 
    
         
            -
                   <hour>6</hour>
         
     | 
| 
       15 
     | 
    
         
            -
                   <hour>7</hour>
         
     | 
| 
       16 
     | 
    
         
            -
                   <hour>8</hour>
         
     | 
| 
       17 
     | 
    
         
            -
                   <hour>9</hour>
         
     | 
| 
       18 
     | 
    
         
            -
                   <hour>10</hour>
         
     | 
| 
       19 
     | 
    
         
            -
                   <hour>11</hour>
         
     | 
| 
       20 
     | 
    
         
            -
                </skipHours>
         
     | 
| 
       21 
     | 
    
         
            -
                <skipDays>
         
     | 
| 
       22 
     | 
    
         
            -
                   <day>Sunday</day>
         
     | 
| 
       23 
     | 
    
         
            -
                </skipDays>
         
     | 
| 
       24 
     | 
    
         
            -
             
     | 
| 
       25 
     | 
    
         
            -
                <image>
         
     | 
| 
       26 
     | 
    
         
            -
                  <title>BBC News</title>
         
     | 
| 
       27 
     | 
    
         
            -
                  <url>http://news.bbc.co.uk/nol/shared/img/bbc_news_120x60.gif</url>
         
     | 
| 
       28 
     | 
    
         
            -
                  <link>http://news.bbc.co.uk/go/rss/-/1/hi/technology/default.stm</link>
         
     | 
| 
       29 
     | 
    
         
            -
                </image>
         
     | 
| 
       30 
     | 
    
         
            -
             
     | 
| 
       31 
     | 
    
         
            -
                <item>
         
     | 
| 
       32 
     | 
    
         
            -
                  <title>Concerns over security software</title>
         
     | 
| 
       33 
     | 
    
         
            -
                  <description><![CDATA[BBC Click investigates free security software and finds out who will protect PCs when Microsoft launches Vista.]]></description>
         
     | 
| 
       34 
     | 
    
         
            -
                  <content:encoded 
     | 
| 
       35 
     | 
    
         
            -
                  <link>http://news.bbc.co.uk/go/rss/-/1/hi/programmes/click_online/5326654.stm</link>
         
     | 
| 
       36 
     | 
    
         
            -
                  <guid isPermaLink="false">http://news.bbc.co.uk/1/hi/programmes/click_online/5326654.stm</guid>
         
     | 
| 
       37 
     | 
    
         
            -
                  <pubDate>Sat, 09 Sep 2006 12:45:35 GMT</pubDate>
         
     | 
| 
       38 
     | 
    
         
            -
                  <category>Click</category>
         
     | 
| 
       39 
     | 
    
         
            -
                </item>
         
     | 
| 
       40 
     | 
    
         
            -
             
     | 
| 
       41 
     | 
    
         
            -
                <item>
         
     | 
| 
       42 
     | 
    
         
            -
                  <title>Top prize for 'light' inventor</title>
         
     | 
| 
       43 
     | 
    
         
            -
                  <description>A Japanese scientist who invented a sustainable form of light is awarded the Millennium Technology Prize.</description>
         
     | 
| 
       44 
     | 
    
         
            -
                  <content:encoded><![CDATA[<p>test2</p>]]></content:encoded>
         
     | 
| 
       45 
     | 
    
         
            -
                  <link>http://news.bbc.co.uk/go/rss/-/1/hi/technology/5328446.stm</link>
         
     | 
| 
       46 
     | 
    
         
            -
                  < 
     | 
| 
       47 
     | 
    
         
            -
                  < 
     | 
| 
       48 
     | 
    
         
            -
                  < 
     | 
| 
       49 
     | 
    
         
            -
                  <category> 
     | 
| 
       50 
     | 
    
         
            -
             
     | 
| 
       51 
     | 
    
         
            -
             
     | 
| 
       52 
     | 
    
         
            -
             
     | 
| 
       53 
     | 
    
         
            -
             
     | 
| 
       54 
     | 
    
         
            -
                  < 
     | 
| 
       55 
     | 
    
         
            -
                  < 
     | 
| 
       56 
     | 
    
         
            -
                  < 
     | 
| 
       57 
     | 
    
         
            -
                  < 
     | 
| 
       58 
     | 
    
         
            -
                  < 
     | 
| 
       59 
     | 
    
         
            -
             
     | 
| 
       60 
     | 
    
         
            -
             
     | 
| 
       61 
     | 
    
         
            -
             
     | 
| 
       62 
     | 
    
         
            -
            </ 
     | 
| 
       63 
     | 
    
         
            -
             
     | 
| 
      
 1 
     | 
    
         
            +
            <?xml version="1.0" encoding="ISO-8859-1" ?>
         
     | 
| 
      
 2 
     | 
    
         
            +
            <?xml-stylesheet title="XSL_formatting" type="text/xsl" href="/shared/bsp/xsl/rss/nolsol.xsl"?>
         
     | 
| 
      
 3 
     | 
    
         
            +
            <rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/">
         
     | 
| 
      
 4 
     | 
    
         
            +
              <channel>
         
     | 
| 
      
 5 
     | 
    
         
            +
                <title>BBC News | Technology | UK Edition</title>
         
     | 
| 
      
 6 
     | 
    
         
            +
                <link>http://news.bbc.co.uk/go/rss/-/1/hi/technology/default.stm</link>
         
     | 
| 
      
 7 
     | 
    
         
            +
                <description>Visit BBC News for up-to-the-minute news, breaking news, video, audio and feature stories. BBC News provides trusted World and UK news as well as local and regional perspectives. Also entertainment, business, science, technology and health news.</description>
         
     | 
| 
      
 8 
     | 
    
         
            +
                <language>en-gb</language>
         
     | 
| 
      
 9 
     | 
    
         
            +
                <lastBuildDate>Sat, 09 Sep 2006 14:57:06 GMT</lastBuildDate>
         
     | 
| 
      
 10 
     | 
    
         
            +
                <copyright>Copyright: (C) British Broadcasting Corporation, see http://news.bbc.co.uk/1/hi/help/rss/4498287.stm for terms and conditions of reuse</copyright>
         
     | 
| 
      
 11 
     | 
    
         
            +
                <docs>http://www.bbc.co.uk/syndication/</docs>
         
     | 
| 
      
 12 
     | 
    
         
            +
                <ttl>15</ttl>
         
     | 
| 
      
 13 
     | 
    
         
            +
                <skipHours>
         
     | 
| 
      
 14 
     | 
    
         
            +
                   <hour>6</hour>
         
     | 
| 
      
 15 
     | 
    
         
            +
                   <hour>7</hour>
         
     | 
| 
      
 16 
     | 
    
         
            +
                   <hour>8</hour>
         
     | 
| 
      
 17 
     | 
    
         
            +
                   <hour>9</hour>
         
     | 
| 
      
 18 
     | 
    
         
            +
                   <hour>10</hour>
         
     | 
| 
      
 19 
     | 
    
         
            +
                   <hour>11</hour>
         
     | 
| 
      
 20 
     | 
    
         
            +
                </skipHours>
         
     | 
| 
      
 21 
     | 
    
         
            +
                <skipDays>
         
     | 
| 
      
 22 
     | 
    
         
            +
                   <day>Sunday</day>
         
     | 
| 
      
 23 
     | 
    
         
            +
                </skipDays>
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
                <image>
         
     | 
| 
      
 26 
     | 
    
         
            +
                  <title>BBC News</title>
         
     | 
| 
      
 27 
     | 
    
         
            +
                  <url>http://news.bbc.co.uk/nol/shared/img/bbc_news_120x60.gif</url>
         
     | 
| 
      
 28 
     | 
    
         
            +
                  <link>http://news.bbc.co.uk/go/rss/-/1/hi/technology/default.stm</link>
         
     | 
| 
      
 29 
     | 
    
         
            +
                </image>
         
     | 
| 
      
 30 
     | 
    
         
            +
             
     | 
| 
      
 31 
     | 
    
         
            +
                <item>
         
     | 
| 
      
 32 
     | 
    
         
            +
                  <title>Concerns over security software</title>
         
     | 
| 
      
 33 
     | 
    
         
            +
                  <description><![CDATA[BBC Click investigates free security software and finds out who will protect PCs when Microsoft launches Vista.]]></description>
         
     | 
| 
      
 34 
     | 
    
         
            +
                  <content:encoded>  <![CDATA[<p>test1</p>]]>  </content:encoded>
         
     | 
| 
      
 35 
     | 
    
         
            +
                  <link>http://news.bbc.co.uk/go/rss/-/1/hi/programmes/click_online/5326654.stm</link>
         
     | 
| 
      
 36 
     | 
    
         
            +
                  <guid isPermaLink="false">http://news.bbc.co.uk/1/hi/programmes/click_online/5326654.stm</guid>
         
     | 
| 
      
 37 
     | 
    
         
            +
                  <pubDate>Sat, 09 Sep 2006 12:45:35 GMT</pubDate>
         
     | 
| 
      
 38 
     | 
    
         
            +
                  <category>Click</category>
         
     | 
| 
      
 39 
     | 
    
         
            +
                </item>
         
     | 
| 
      
 40 
     | 
    
         
            +
             
     | 
| 
      
 41 
     | 
    
         
            +
                <item>
         
     | 
| 
      
 42 
     | 
    
         
            +
                  <title>Top prize for 'light' inventor</title>
         
     | 
| 
      
 43 
     | 
    
         
            +
                  <description>A Japanese scientist who invented a sustainable form of light is awarded the Millennium Technology Prize.</description>
         
     | 
| 
      
 44 
     | 
    
         
            +
                  <content:encoded><![CDATA[<p>test2</p>]]></content:encoded>
         
     | 
| 
      
 45 
     | 
    
         
            +
                  <link>http://news.bbc.co.uk/go/rss/-/1/hi/technology/5328446.stm</link>
         
     | 
| 
      
 46 
     | 
    
         
            +
                  <enclosure url="http://websrvr60ny.audiovideoweb.com/ny60web16519/LTN/POA/POA_042905.mp3" length="12619776" type="audio/mpeg"></enclosure>
         
     | 
| 
      
 47 
     | 
    
         
            +
                  <guid isPermaLink="false">http://news.bbc.co.uk/1/hi/technology/5328446.stm</guid>
         
     | 
| 
      
 48 
     | 
    
         
            +
                  <pubDate>Fri, 08 Sep 2006 16:18:08 GMT</pubDate>
         
     | 
| 
      
 49 
     | 
    
         
            +
                  <category>Technology</category>
         
     | 
| 
      
 50 
     | 
    
         
            +
                  <category>Japan</category>
         
     | 
| 
      
 51 
     | 
    
         
            +
                </item>
         
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
      
 53 
     | 
    
         
            +
                <item>
         
     | 
| 
      
 54 
     | 
    
         
            +
                  <title>MP3 player court order overturned</title>
         
     | 
| 
      
 55 
     | 
    
         
            +
                  <description><b>SanDisk</b> puts its MP3 players back on display at a German electronics show after overturning a court injunction.</description>
         
     | 
| 
      
 56 
     | 
    
         
            +
                  <content:encoded><![CDATA[<p>test3</p>]]></content:encoded>
         
     | 
| 
      
 57 
     | 
    
         
            +
                  <link>http://news.bbc.co.uk/go/rss/-/1/hi/technology/5326660.stm</link>
         
     | 
| 
      
 58 
     | 
    
         
            +
                  <guid isPermaLink="false">http://news.bbc.co.uk/1/hi/technology/5326660.stm</guid>
         
     | 
| 
      
 59 
     | 
    
         
            +
                  <pubDate>Fri, 08 Sep 2006 10:14:41 GMT</pubDate>
         
     | 
| 
      
 60 
     | 
    
         
            +
                </item>
         
     | 
| 
      
 61 
     | 
    
         
            +
             
     | 
| 
      
 62 
     | 
    
         
            +
              </channel>
         
     | 
| 
      
 63 
     | 
    
         
            +
            </rss>
         
     | 
| 
      
 64 
     | 
    
         
            +
             
     | 
    
        data/test/data/rss20diff.xml
    CHANGED
    
    | 
         @@ -1,59 +1,59 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            <?xml version="1.0" encoding="ISO-8859-1" ?>
         
     | 
| 
       2 
     | 
    
         
            -
            <?xml-stylesheet title="XSL_formatting" type="text/xsl" href="/shared/bsp/xsl/rss/nolsol.xsl"?>
         
     | 
| 
       3 
     | 
    
         
            -
            <rss version="2.0">
         
     | 
| 
       4 
     | 
    
         
            -
              <channel>
         
     | 
| 
       5 
     | 
    
         
            -
                <title>diff</title>
         
     | 
| 
       6 
     | 
    
         
            -
                <link>http://news.bbc.co.uk/go/rss/-/1/hi/technology/default.stm</link>
         
     | 
| 
       7 
     | 
    
         
            -
                <description>Visit BBC News for up-to-the-minute news, breaking news, video, audio and feature stories. BBC News provides trusted World and UK news as well as local and regional perspectives. Also entertainment, business, science, technology and health news.</description>
         
     | 
| 
       8 
     | 
    
         
            -
                <language>en-gb</language>
         
     | 
| 
       9 
     | 
    
         
            -
                <lastBuildDate>Sat, 09 Sep 2006 14:57:06 GMT</lastBuildDate>
         
     | 
| 
       10 
     | 
    
         
            -
                <copyright>Copyright: (C) British Broadcasting Corporation, see http://news.bbc.co.uk/1/hi/help/rss/4498287.stm for terms and conditions of reuse</copyright>
         
     | 
| 
       11 
     | 
    
         
            -
                <docs>http://www.bbc.co.uk/syndication/</docs>
         
     | 
| 
       12 
     | 
    
         
            -
                <ttl>15</ttl>
         
     | 
| 
       13 
     | 
    
         
            -
                <skipHours>
         
     | 
| 
       14 
     | 
    
         
            -
                   <hour>6</hour>
         
     | 
| 
       15 
     | 
    
         
            -
                   <hour>7</hour>
         
     | 
| 
       16 
     | 
    
         
            -
                   <hour>8</hour>
         
     | 
| 
       17 
     | 
    
         
            -
                   <hour>9</hour>
         
     | 
| 
       18 
     | 
    
         
            -
                   <hour>10</hour>
         
     | 
| 
       19 
     | 
    
         
            -
                   <hour>11</hour>
         
     | 
| 
       20 
     | 
    
         
            -
                </skipHours>
         
     | 
| 
       21 
     | 
    
         
            -
                <skipDays>
         
     | 
| 
       22 
     | 
    
         
            -
                   <day>Sunday</day>
         
     | 
| 
       23 
     | 
    
         
            -
                </skipDays>
         
     | 
| 
       24 
     | 
    
         
            -
             
     | 
| 
       25 
     | 
    
         
            -
                <image>
         
     | 
| 
       26 
     | 
    
         
            -
                  <title>BBC News</title>
         
     | 
| 
       27 
     | 
    
         
            -
                  <url>http://news.bbc.co.uk/nol/shared/img/bbc_news_120x60.gif</url>
         
     | 
| 
       28 
     | 
    
         
            -
                  <link>http://news.bbc.co.uk/go/rss/-/1/hi/technology/default.stm</link>
         
     | 
| 
       29 
     | 
    
         
            -
                </image>
         
     | 
| 
       30 
     | 
    
         
            -
             
     | 
| 
       31 
     | 
    
         
            -
                <item>
         
     | 
| 
       32 
     | 
    
         
            -
                  <title>diff</title>
         
     | 
| 
       33 
     | 
    
         
            -
                  <description>BBC Click investigates free security software and finds out who will protect PCs when Microsoft launches Vista.</description>
         
     | 
| 
       34 
     | 
    
         
            -
                  <link>http://news.bbc.co.uk/go/rss/-/1/hi/programmes/click_online/5326654.stm</link>
         
     | 
| 
       35 
     | 
    
         
            -
                  <guid isPermaLink="false">http://news.bbc.co.uk/1/hi/programmes/click_online/5326654.stm</guid>
         
     | 
| 
       36 
     | 
    
         
            -
                  <pubDate>Sat, 09 Sep 2006 12:45:35 GMT</pubDate>
         
     | 
| 
       37 
     | 
    
         
            -
                  <category>Click</category>
         
     | 
| 
       38 
     | 
    
         
            -
                </item>
         
     | 
| 
       39 
     | 
    
         
            -
             
     | 
| 
       40 
     | 
    
         
            -
                <item>
         
     | 
| 
       41 
     | 
    
         
            -
                  <title>diff</title>
         
     | 
| 
       42 
     | 
    
         
            -
                  <description>diff</description>
         
     | 
| 
       43 
     | 
    
         
            -
                  <link>diff</link>
         
     | 
| 
       44 
     | 
    
         
            -
                  <guid isPermaLink="false">http://news.bbc.co.uk/1/hi/technology/5328446.stm</guid>
         
     | 
| 
       45 
     | 
    
         
            -
                  <pubDate>Fri, 08 Sep 2006 16:18:08 GMT</pubDate>
         
     | 
| 
       46 
     | 
    
         
            -
                  <category>diff</category>
         
     | 
| 
       47 
     | 
    
         
            -
                </item>
         
     | 
| 
       48 
     | 
    
         
            -
             
     | 
| 
       49 
     | 
    
         
            -
                <item>
         
     | 
| 
       50 
     | 
    
         
            -
                  <title>MP3 player court order overturned</title>
         
     | 
| 
       51 
     | 
    
         
            -
                  <description>SanDisk puts its MP3 players back on display at a German electronics show after overturning a court injunction.</description>
         
     | 
| 
       52 
     | 
    
         
            -
                  <link>http://news.bbc.co.uk/go/rss/-/1/hi/technology/5326660.stm</link>
         
     | 
| 
       53 
     | 
    
         
            -
                  <guid isPermaLink="false">http://news.bbc.co.uk/1/hi/technology/5326660.stm</guid>
         
     | 
| 
       54 
     | 
    
         
            -
                  <pubDate>Fri, 08 Sep 2006 10:14:41 GMT</pubDate>
         
     | 
| 
       55 
     | 
    
         
            -
                </item>
         
     | 
| 
       56 
     | 
    
         
            -
             
     | 
| 
       57 
     | 
    
         
            -
              </channel>
         
     | 
| 
       58 
     | 
    
         
            -
            </rss>
         
     | 
| 
       59 
     | 
    
         
            -
             
     | 
| 
      
 1 
     | 
    
         
            +
            <?xml version="1.0" encoding="ISO-8859-1" ?>
         
     | 
| 
      
 2 
     | 
    
         
            +
            <?xml-stylesheet title="XSL_formatting" type="text/xsl" href="/shared/bsp/xsl/rss/nolsol.xsl"?>
         
     | 
| 
      
 3 
     | 
    
         
            +
            <rss version="2.0">
         
     | 
| 
      
 4 
     | 
    
         
            +
              <channel>
         
     | 
| 
      
 5 
     | 
    
         
            +
                <title>diff</title>
         
     | 
| 
      
 6 
     | 
    
         
            +
                <link>http://news.bbc.co.uk/go/rss/-/1/hi/technology/default.stm</link>
         
     | 
| 
      
 7 
     | 
    
         
            +
                <description>Visit BBC News for up-to-the-minute news, breaking news, video, audio and feature stories. BBC News provides trusted World and UK news as well as local and regional perspectives. Also entertainment, business, science, technology and health news.</description>
         
     | 
| 
      
 8 
     | 
    
         
            +
                <language>en-gb</language>
         
     | 
| 
      
 9 
     | 
    
         
            +
                <lastBuildDate>Sat, 09 Sep 2006 14:57:06 GMT</lastBuildDate>
         
     | 
| 
      
 10 
     | 
    
         
            +
                <copyright>Copyright: (C) British Broadcasting Corporation, see http://news.bbc.co.uk/1/hi/help/rss/4498287.stm for terms and conditions of reuse</copyright>
         
     | 
| 
      
 11 
     | 
    
         
            +
                <docs>http://www.bbc.co.uk/syndication/</docs>
         
     | 
| 
      
 12 
     | 
    
         
            +
                <ttl>15</ttl>
         
     | 
| 
      
 13 
     | 
    
         
            +
                <skipHours>
         
     | 
| 
      
 14 
     | 
    
         
            +
                   <hour>6</hour>
         
     | 
| 
      
 15 
     | 
    
         
            +
                   <hour>7</hour>
         
     | 
| 
      
 16 
     | 
    
         
            +
                   <hour>8</hour>
         
     | 
| 
      
 17 
     | 
    
         
            +
                   <hour>9</hour>
         
     | 
| 
      
 18 
     | 
    
         
            +
                   <hour>10</hour>
         
     | 
| 
      
 19 
     | 
    
         
            +
                   <hour>11</hour>
         
     | 
| 
      
 20 
     | 
    
         
            +
                </skipHours>
         
     | 
| 
      
 21 
     | 
    
         
            +
                <skipDays>
         
     | 
| 
      
 22 
     | 
    
         
            +
                   <day>Sunday</day>
         
     | 
| 
      
 23 
     | 
    
         
            +
                </skipDays>
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
                <image>
         
     | 
| 
      
 26 
     | 
    
         
            +
                  <title>BBC News</title>
         
     | 
| 
      
 27 
     | 
    
         
            +
                  <url>http://news.bbc.co.uk/nol/shared/img/bbc_news_120x60.gif</url>
         
     | 
| 
      
 28 
     | 
    
         
            +
                  <link>http://news.bbc.co.uk/go/rss/-/1/hi/technology/default.stm</link>
         
     | 
| 
      
 29 
     | 
    
         
            +
                </image>
         
     | 
| 
      
 30 
     | 
    
         
            +
             
     | 
| 
      
 31 
     | 
    
         
            +
                <item>
         
     | 
| 
      
 32 
     | 
    
         
            +
                  <title>diff</title>
         
     | 
| 
      
 33 
     | 
    
         
            +
                  <description>BBC Click investigates free security software and finds out who will protect PCs when Microsoft launches Vista.</description>
         
     | 
| 
      
 34 
     | 
    
         
            +
                  <link>http://news.bbc.co.uk/go/rss/-/1/hi/programmes/click_online/5326654.stm</link>
         
     | 
| 
      
 35 
     | 
    
         
            +
                  <guid isPermaLink="false">http://news.bbc.co.uk/1/hi/programmes/click_online/5326654.stm</guid>
         
     | 
| 
      
 36 
     | 
    
         
            +
                  <pubDate>Sat, 09 Sep 2006 12:45:35 GMT</pubDate>
         
     | 
| 
      
 37 
     | 
    
         
            +
                  <category>Click</category>
         
     | 
| 
      
 38 
     | 
    
         
            +
                </item>
         
     | 
| 
      
 39 
     | 
    
         
            +
             
     | 
| 
      
 40 
     | 
    
         
            +
                <item>
         
     | 
| 
      
 41 
     | 
    
         
            +
                  <title>diff</title>
         
     | 
| 
      
 42 
     | 
    
         
            +
                  <description>diff</description>
         
     | 
| 
      
 43 
     | 
    
         
            +
                  <link>diff</link>
         
     | 
| 
      
 44 
     | 
    
         
            +
                  <guid isPermaLink="false">http://news.bbc.co.uk/1/hi/technology/5328446.stm</guid>
         
     | 
| 
      
 45 
     | 
    
         
            +
                  <pubDate>Fri, 08 Sep 2006 16:18:08 GMT</pubDate>
         
     | 
| 
      
 46 
     | 
    
         
            +
                  <category>diff</category>
         
     | 
| 
      
 47 
     | 
    
         
            +
                </item>
         
     | 
| 
      
 48 
     | 
    
         
            +
             
     | 
| 
      
 49 
     | 
    
         
            +
                <item>
         
     | 
| 
      
 50 
     | 
    
         
            +
                  <title>MP3 player court order overturned</title>
         
     | 
| 
      
 51 
     | 
    
         
            +
                  <description>SanDisk puts its MP3 players back on display at a German electronics show after overturning a court injunction.</description>
         
     | 
| 
      
 52 
     | 
    
         
            +
                  <link>http://news.bbc.co.uk/go/rss/-/1/hi/technology/5326660.stm</link>
         
     | 
| 
      
 53 
     | 
    
         
            +
                  <guid isPermaLink="false">http://news.bbc.co.uk/1/hi/technology/5326660.stm</guid>
         
     | 
| 
      
 54 
     | 
    
         
            +
                  <pubDate>Fri, 08 Sep 2006 10:14:41 GMT</pubDate>
         
     | 
| 
      
 55 
     | 
    
         
            +
                </item>
         
     | 
| 
      
 56 
     | 
    
         
            +
             
     | 
| 
      
 57 
     | 
    
         
            +
              </channel>
         
     | 
| 
      
 58 
     | 
    
         
            +
            </rss>
         
     | 
| 
      
 59 
     | 
    
         
            +
             
     | 
| 
         @@ -1,51 +1,51 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            <?xml version="1.0" encoding="ISO-8859-1" ?>
         
     | 
| 
       2 
     | 
    
         
            -
            <?xml-stylesheet title="XSL_formatting" type="text/xsl" href="/shared/bsp/xsl/rss/nolsol.xsl"?>
         
     | 
| 
       3 
     | 
    
         
            -
            <rss version="2.0">
         
     | 
| 
       4 
     | 
    
         
            -
              <channel>
         
     | 
| 
       5 
     | 
    
         
            -
                <title>diff</title>
         
     | 
| 
       6 
     | 
    
         
            -
                <link>http://news.bbc.co.uk/go/rss/-/1/hi/technology/default.stm</link>
         
     | 
| 
       7 
     | 
    
         
            -
                <description>Visit BBC News for up-to-the-minute news, breaking news, video, audio and feature stories. BBC News provides trusted World and UK news as well as local and regional perspectives. Also entertainment, business, science, technology and health news.</description>
         
     | 
| 
       8 
     | 
    
         
            -
                <language>en-gb</language>
         
     | 
| 
       9 
     | 
    
         
            -
                <lastBuildDate>Sat, 09 Sep 2006 14:57:06 GMT</lastBuildDate>
         
     | 
| 
       10 
     | 
    
         
            -
                <copyright>Copyright: (C) British Broadcasting Corporation, see http://news.bbc.co.uk/1/hi/help/rss/4498287.stm for terms and conditions of reuse</copyright>
         
     | 
| 
       11 
     | 
    
         
            -
                <docs>http://www.bbc.co.uk/syndication/</docs>
         
     | 
| 
       12 
     | 
    
         
            -
                <ttl>15</ttl>
         
     | 
| 
       13 
     | 
    
         
            -
                <skipHours>
         
     | 
| 
       14 
     | 
    
         
            -
                   <hour>6</hour>
         
     | 
| 
       15 
     | 
    
         
            -
                   <hour>7</hour>
         
     | 
| 
       16 
     | 
    
         
            -
                   <hour>8</hour>
         
     | 
| 
       17 
     | 
    
         
            -
                   <hour>9</hour>
         
     | 
| 
       18 
     | 
    
         
            -
                   <hour>10</hour>
         
     | 
| 
       19 
     | 
    
         
            -
                   <hour>11</hour>
         
     | 
| 
       20 
     | 
    
         
            -
                </skipHours>
         
     | 
| 
       21 
     | 
    
         
            -
                <skipDays>
         
     | 
| 
       22 
     | 
    
         
            -
                   <day>Sunday</day>
         
     | 
| 
       23 
     | 
    
         
            -
                </skipDays>
         
     | 
| 
       24 
     | 
    
         
            -
             
     | 
| 
       25 
     | 
    
         
            -
                <image>
         
     | 
| 
       26 
     | 
    
         
            -
                  <title>BBC News</title>
         
     | 
| 
       27 
     | 
    
         
            -
                  <url>http://news.bbc.co.uk/nol/shared/img/bbc_news_120x60.gif</url>
         
     | 
| 
       28 
     | 
    
         
            -
                  <link>http://news.bbc.co.uk/go/rss/-/1/hi/technology/default.stm</link>
         
     | 
| 
       29 
     | 
    
         
            -
                </image>
         
     | 
| 
       30 
     | 
    
         
            -
             
     | 
| 
       31 
     | 
    
         
            -
                <item>
         
     | 
| 
       32 
     | 
    
         
            -
                  <title>diff</title>
         
     | 
| 
       33 
     | 
    
         
            -
                  <description>BBC Click investigates free security software and finds out who will protect PCs when Microsoft launches Vista.</description>
         
     | 
| 
       34 
     | 
    
         
            -
                  <link>http://news.bbc.co.uk/go/rss/-/1/hi/programmes/click_online/5326654.stm</link>
         
     | 
| 
       35 
     | 
    
         
            -
                  <guid isPermaLink="false">http://news.bbc.co.uk/1/hi/programmes/click_online/5326654.stm</guid>
         
     | 
| 
       36 
     | 
    
         
            -
                  <pubDate>Sat, 09 Sep 2006 12:45:35 GMT</pubDate>
         
     | 
| 
       37 
     | 
    
         
            -
                  <category>Click</category>
         
     | 
| 
       38 
     | 
    
         
            -
                </item>
         
     | 
| 
       39 
     | 
    
         
            -
             
     | 
| 
       40 
     | 
    
         
            -
                <item>
         
     | 
| 
       41 
     | 
    
         
            -
                  <title>diff</title>
         
     | 
| 
       42 
     | 
    
         
            -
                  <description>A Japanese scientist who invented a sustainable form of light is awarded the Millennium Technology Prize.</description>
         
     | 
| 
       43 
     | 
    
         
            -
                  <link>diff</link>
         
     | 
| 
       44 
     | 
    
         
            -
                  <guid isPermaLink="false">http://news.bbc.co.uk/1/hi/technology/5328446.stm</guid>
         
     | 
| 
       45 
     | 
    
         
            -
                  <pubDate>Fri, 08 Sep 2006 16:18:08 GMT</pubDate>
         
     | 
| 
       46 
     | 
    
         
            -
                  <category>diff</category>
         
     | 
| 
       47 
     | 
    
         
            -
                </item>
         
     | 
| 
       48 
     | 
    
         
            -
             
     | 
| 
       49 
     | 
    
         
            -
              </channel>
         
     | 
| 
       50 
     | 
    
         
            -
            </rss>
         
     | 
| 
       51 
     | 
    
         
            -
             
     | 
| 
      
 1 
     | 
    
         
            +
            <?xml version="1.0" encoding="ISO-8859-1" ?>
         
     | 
| 
      
 2 
     | 
    
         
            +
            <?xml-stylesheet title="XSL_formatting" type="text/xsl" href="/shared/bsp/xsl/rss/nolsol.xsl"?>
         
     | 
| 
      
 3 
     | 
    
         
            +
            <rss version="2.0">
         
     | 
| 
      
 4 
     | 
    
         
            +
              <channel>
         
     | 
| 
      
 5 
     | 
    
         
            +
                <title>diff</title>
         
     | 
| 
      
 6 
     | 
    
         
            +
                <link>http://news.bbc.co.uk/go/rss/-/1/hi/technology/default.stm</link>
         
     | 
| 
      
 7 
     | 
    
         
            +
                <description>Visit BBC News for up-to-the-minute news, breaking news, video, audio and feature stories. BBC News provides trusted World and UK news as well as local and regional perspectives. Also entertainment, business, science, technology and health news.</description>
         
     | 
| 
      
 8 
     | 
    
         
            +
                <language>en-gb</language>
         
     | 
| 
      
 9 
     | 
    
         
            +
                <lastBuildDate>Sat, 09 Sep 2006 14:57:06 GMT</lastBuildDate>
         
     | 
| 
      
 10 
     | 
    
         
            +
                <copyright>Copyright: (C) British Broadcasting Corporation, see http://news.bbc.co.uk/1/hi/help/rss/4498287.stm for terms and conditions of reuse</copyright>
         
     | 
| 
      
 11 
     | 
    
         
            +
                <docs>http://www.bbc.co.uk/syndication/</docs>
         
     | 
| 
      
 12 
     | 
    
         
            +
                <ttl>15</ttl>
         
     | 
| 
      
 13 
     | 
    
         
            +
                <skipHours>
         
     | 
| 
      
 14 
     | 
    
         
            +
                   <hour>6</hour>
         
     | 
| 
      
 15 
     | 
    
         
            +
                   <hour>7</hour>
         
     | 
| 
      
 16 
     | 
    
         
            +
                   <hour>8</hour>
         
     | 
| 
      
 17 
     | 
    
         
            +
                   <hour>9</hour>
         
     | 
| 
      
 18 
     | 
    
         
            +
                   <hour>10</hour>
         
     | 
| 
      
 19 
     | 
    
         
            +
                   <hour>11</hour>
         
     | 
| 
      
 20 
     | 
    
         
            +
                </skipHours>
         
     | 
| 
      
 21 
     | 
    
         
            +
                <skipDays>
         
     | 
| 
      
 22 
     | 
    
         
            +
                   <day>Sunday</day>
         
     | 
| 
      
 23 
     | 
    
         
            +
                </skipDays>
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
                <image>
         
     | 
| 
      
 26 
     | 
    
         
            +
                  <title>BBC News</title>
         
     | 
| 
      
 27 
     | 
    
         
            +
                  <url>http://news.bbc.co.uk/nol/shared/img/bbc_news_120x60.gif</url>
         
     | 
| 
      
 28 
     | 
    
         
            +
                  <link>http://news.bbc.co.uk/go/rss/-/1/hi/technology/default.stm</link>
         
     | 
| 
      
 29 
     | 
    
         
            +
                </image>
         
     | 
| 
      
 30 
     | 
    
         
            +
             
     | 
| 
      
 31 
     | 
    
         
            +
                <item>
         
     | 
| 
      
 32 
     | 
    
         
            +
                  <title>diff</title>
         
     | 
| 
      
 33 
     | 
    
         
            +
                  <description>BBC Click investigates free security software and finds out who will protect PCs when Microsoft launches Vista.</description>
         
     | 
| 
      
 34 
     | 
    
         
            +
                  <link>http://news.bbc.co.uk/go/rss/-/1/hi/programmes/click_online/5326654.stm</link>
         
     | 
| 
      
 35 
     | 
    
         
            +
                  <guid isPermaLink="false">http://news.bbc.co.uk/1/hi/programmes/click_online/5326654.stm</guid>
         
     | 
| 
      
 36 
     | 
    
         
            +
                  <pubDate>Sat, 09 Sep 2006 12:45:35 GMT</pubDate>
         
     | 
| 
      
 37 
     | 
    
         
            +
                  <category>Click</category>
         
     | 
| 
      
 38 
     | 
    
         
            +
                </item>
         
     | 
| 
      
 39 
     | 
    
         
            +
             
     | 
| 
      
 40 
     | 
    
         
            +
                <item>
         
     | 
| 
      
 41 
     | 
    
         
            +
                  <title>diff</title>
         
     | 
| 
      
 42 
     | 
    
         
            +
                  <description>A Japanese scientist who invented a sustainable form of light is awarded the Millennium Technology Prize.</description>
         
     | 
| 
      
 43 
     | 
    
         
            +
                  <link>diff</link>
         
     | 
| 
      
 44 
     | 
    
         
            +
                  <guid isPermaLink="false">http://news.bbc.co.uk/1/hi/technology/5328446.stm</guid>
         
     | 
| 
      
 45 
     | 
    
         
            +
                  <pubDate>Fri, 08 Sep 2006 16:18:08 GMT</pubDate>
         
     | 
| 
      
 46 
     | 
    
         
            +
                  <category>diff</category>
         
     | 
| 
      
 47 
     | 
    
         
            +
                </item>
         
     | 
| 
      
 48 
     | 
    
         
            +
             
     | 
| 
      
 49 
     | 
    
         
            +
              </channel>
         
     | 
| 
      
 50 
     | 
    
         
            +
            </rss>
         
     | 
| 
      
 51 
     | 
    
         
            +
             
     | 
    
        data/test/test_feednormalizer.rb
    CHANGED
    
    | 
         @@ -1,267 +1,265 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
             
     | 
| 
       2 
     | 
    
         
            -
             
     | 
| 
       3 
     | 
    
         
            -
             
     | 
| 
       4 
     | 
    
         
            -
             
     | 
| 
       5 
     | 
    
         
            -
             
     | 
| 
       6 
     | 
    
         
            -
             
     | 
| 
       7 
     | 
    
         
            -
             
     | 
| 
       8 
     | 
    
         
            -
             
     | 
| 
       9 
     | 
    
         
            -
             
     | 
| 
       10 
     | 
    
         
            -
             
     | 
| 
       11 
     | 
    
         
            -
               
     | 
| 
       12 
     | 
    
         
            -
             
     | 
| 
       13 
     | 
    
         
            -
               
     | 
| 
       14 
     | 
    
         
            -
             
     | 
| 
       15 
     | 
    
         
            -
             
     | 
| 
       16 
     | 
    
         
            -
             
     | 
| 
       17 
     | 
    
         
            -
             
     | 
| 
       18 
     | 
    
         
            -
             
     | 
| 
       19 
     | 
    
         
            -
               
     | 
| 
       20 
     | 
    
         
            -
             
     | 
| 
       21 
     | 
    
         
            -
               
     | 
| 
       22 
     | 
    
         
            -
             
     | 
| 
       23 
     | 
    
         
            -
               
     | 
| 
       24 
     | 
    
         
            -
             
     | 
| 
       25 
     | 
    
         
            -
             
     | 
| 
       26 
     | 
    
         
            -
             
     | 
| 
       27 
     | 
    
         
            -
             
     | 
| 
       28 
     | 
    
         
            -
               
     | 
| 
       29 
     | 
    
         
            -
             
     | 
| 
       30 
     | 
    
         
            -
             
     | 
| 
       31 
     | 
    
         
            -
             
     | 
| 
       32 
     | 
    
         
            -
             
     | 
| 
       33 
     | 
    
         
            -
               
     | 
| 
       34 
     | 
    
         
            -
             
     | 
| 
       35 
     | 
    
         
            -
             
     | 
| 
       36 
     | 
    
         
            -
                assert_kind_of Feed, FeedNormalizer::FeedNormalizer.parse(XML_FILES[: 
     | 
| 
       37 
     | 
    
         
            -
                  :force_parser => RubyRssParser, :try_others => false)
         
     | 
| 
       38 
     | 
    
         
            -
             
     | 
| 
       39 
     | 
    
         
            -
             
     | 
| 
       40 
     | 
    
         
            -
               
     | 
| 
       41 
     | 
    
         
            -
             
     | 
| 
       42 
     | 
    
         
            -
             
     | 
| 
       43 
     | 
    
         
            -
                assert_kind_of Feed, FeedNormalizer::FeedNormalizer.parse(XML_FILES[: 
     | 
| 
       44 
     | 
    
         
            -
                  :force_parser => SimpleRssParser, :try_others => false)
         
     | 
| 
       45 
     | 
    
         
            -
             
     | 
| 
       46 
     | 
    
         
            -
             
     | 
| 
       47 
     | 
    
         
            -
               
     | 
| 
       48 
     | 
    
         
            -
             
     | 
| 
       49 
     | 
    
         
            -
               
     | 
| 
       50 
     | 
    
         
            -
             
     | 
| 
       51 
     | 
    
         
            -
               
     | 
| 
       52 
     | 
    
         
            -
             
     | 
| 
       53 
     | 
    
         
            -
               
     | 
| 
       54 
     | 
    
         
            -
             
     | 
| 
       55 
     | 
    
         
            -
               
     | 
| 
       56 
     | 
    
         
            -
             
     | 
| 
       57 
     | 
    
         
            -
               
     | 
| 
       58 
     | 
    
         
            -
             
     | 
| 
       59 
     | 
    
         
            -
               
     | 
| 
       60 
     | 
    
         
            -
             
     | 
| 
       61 
     | 
    
         
            -
             
     | 
| 
       62 
     | 
    
         
            -
             
     | 
| 
       63 
     | 
    
         
            -
             
     | 
| 
       64 
     | 
    
         
            -
               
     | 
| 
       65 
     | 
    
         
            -
             
     | 
| 
       66 
     | 
    
         
            -
             
     | 
| 
       67 
     | 
    
         
            -
                 
     | 
| 
       68 
     | 
    
         
            -
             
     | 
| 
       69 
     | 
    
         
            -
                assert_equal  
     | 
| 
       70 
     | 
    
         
            -
                assert_equal [ 
     | 
| 
       71 
     | 
    
         
            -
                assert_equal  
     | 
| 
       72 
     | 
    
         
            -
                assert_equal  
     | 
| 
       73 
     | 
    
         
            -
                assert_equal  
     | 
| 
       74 
     | 
    
         
            -
                 
     | 
| 
       75 
     | 
    
         
            -
                 
     | 
| 
       76 
     | 
    
         
            -
             
     | 
| 
       77 
     | 
    
         
            -
             
     | 
| 
       78 
     | 
    
         
            -
               
     | 
| 
       79 
     | 
    
         
            -
             
     | 
| 
       80 
     | 
    
         
            -
             
     | 
| 
       81 
     | 
    
         
            -
                 
     | 
| 
       82 
     | 
    
         
            -
             
     | 
| 
       83 
     | 
    
         
            -
                assert_equal  
     | 
| 
       84 
     | 
    
         
            -
                assert_equal  
     | 
| 
       85 
     | 
    
         
            -
                assert_equal  
     | 
| 
       86 
     | 
    
         
            -
                assert_equal  
     | 
| 
       87 
     | 
    
         
            -
                assert_equal  
     | 
| 
       88 
     | 
    
         
            -
             
     | 
| 
       89 
     | 
    
         
            -
                 
     | 
| 
       90 
     | 
    
         
            -
             
     | 
| 
       91 
     | 
    
         
            -
             
     | 
| 
       92 
     | 
    
         
            -
             
     | 
| 
       93 
     | 
    
         
            -
               
     | 
| 
       94 
     | 
    
         
            -
             
     | 
| 
       95 
     | 
    
         
            -
             
     | 
| 
       96 
     | 
    
         
            -
             
     | 
| 
       97 
     | 
    
         
            -
                  feed  
     | 
| 
       98 
     | 
    
         
            -
             
     | 
| 
       99 
     | 
    
         
            -
             
     | 
| 
       100 
     | 
    
         
            -
             
     | 
| 
       101 
     | 
    
         
            -
               
     | 
| 
       102 
     | 
    
         
            -
             
     | 
| 
       103 
     | 
    
         
            -
             
     | 
| 
       104 
     | 
    
         
            -
                 
     | 
| 
       105 
     | 
    
         
            -
                 
     | 
| 
       106 
     | 
    
         
            -
                assert_not_equal FeedNormalizer::FeedNormalizer.parse(XML_FILES[: 
     | 
| 
       107 
     | 
    
         
            -
             
     | 
| 
       108 
     | 
    
         
            -
             
     | 
| 
       109 
     | 
    
         
            -
               
     | 
| 
       110 
     | 
    
         
            -
             
     | 
| 
       111 
     | 
    
         
            -
             
     | 
| 
       112 
     | 
    
         
            -
                 
     | 
| 
       113 
     | 
    
         
            -
             
     | 
| 
       114 
     | 
    
         
            -
                 
     | 
| 
       115 
     | 
    
         
            -
             
     | 
| 
       116 
     | 
    
         
            -
                 
     | 
| 
       117 
     | 
    
         
            -
             
     | 
| 
       118 
     | 
    
         
            -
             
     | 
| 
       119 
     | 
    
         
            -
                 
     | 
| 
       120 
     | 
    
         
            -
             
     | 
| 
       121 
     | 
    
         
            -
             
     | 
| 
       122 
     | 
    
         
            -
                 
     | 
| 
       123 
     | 
    
         
            -
             
     | 
| 
       124 
     | 
    
         
            -
             
     | 
| 
       125 
     | 
    
         
            -
               
     | 
| 
       126 
     | 
    
         
            -
             
     | 
| 
       127 
     | 
    
         
            -
             
     | 
| 
       128 
     | 
    
         
            -
                 
     | 
| 
       129 
     | 
    
         
            -
             
     | 
| 
       130 
     | 
    
         
            -
             
     | 
| 
       131 
     | 
    
         
            -
               
     | 
| 
       132 
     | 
    
         
            -
             
     | 
| 
       133 
     | 
    
         
            -
             
     | 
| 
       134 
     | 
    
         
            -
             
     | 
| 
       135 
     | 
    
         
            -
             
     | 
| 
       136 
     | 
    
         
            -
               
     | 
| 
       137 
     | 
    
         
            -
             
     | 
| 
       138 
     | 
    
         
            -
               
     | 
| 
       139 
     | 
    
         
            -
             
     | 
| 
       140 
     | 
    
         
            -
               
     | 
| 
       141 
     | 
    
         
            -
             
     | 
| 
       142 
     | 
    
         
            -
             
     | 
| 
       143 
     | 
    
         
            -
                feed 
     | 
| 
       144 
     | 
    
         
            -
             
     | 
| 
       145 
     | 
    
         
            -
                 
     | 
| 
       146 
     | 
    
         
            -
                 
     | 
| 
       147 
     | 
    
         
            -
                feed. 
     | 
| 
       148 
     | 
    
         
            -
             
     | 
| 
       149 
     | 
    
         
            -
             
     | 
| 
       150 
     | 
    
         
            -
               
     | 
| 
       151 
     | 
    
         
            -
             
     | 
| 
       152 
     | 
    
         
            -
               
     | 
| 
       153 
     | 
    
         
            -
             
     | 
| 
       154 
     | 
    
         
            -
               
     | 
| 
       155 
     | 
    
         
            -
             
     | 
| 
       156 
     | 
    
         
            -
             
     | 
| 
       157 
     | 
    
         
            -
             
     | 
| 
       158 
     | 
    
         
            -
             
     | 
| 
       159 
     | 
    
         
            -
               
     | 
| 
       160 
     | 
    
         
            -
             
     | 
| 
       161 
     | 
    
         
            -
             
     | 
| 
       162 
     | 
    
         
            -
             
     | 
| 
       163 
     | 
    
         
            -
             
     | 
| 
       164 
     | 
    
         
            -
               
     | 
| 
       165 
     | 
    
         
            -
             
     | 
| 
       166 
     | 
    
         
            -
             
     | 
| 
       167 
     | 
    
         
            -
             
     | 
| 
       168 
     | 
    
         
            -
             
     | 
| 
       169 
     | 
    
         
            -
               
     | 
| 
       170 
     | 
    
         
            -
             
     | 
| 
       171 
     | 
    
         
            -
             
     | 
| 
       172 
     | 
    
         
            -
             
     | 
| 
       173 
     | 
    
         
            -
             
     | 
| 
       174 
     | 
    
         
            -
               
     | 
| 
       175 
     | 
    
         
            -
             
     | 
| 
       176 
     | 
    
         
            -
             
     | 
| 
       177 
     | 
    
         
            -
             
     | 
| 
       178 
     | 
    
         
            -
             
     | 
| 
       179 
     | 
    
         
            -
               
     | 
| 
       180 
     | 
    
         
            -
             
     | 
| 
       181 
     | 
    
         
            -
             
     | 
| 
       182 
     | 
    
         
            -
             
     | 
| 
       183 
     | 
    
         
            -
             
     | 
| 
       184 
     | 
    
         
            -
               
     | 
| 
       185 
     | 
    
         
            -
             
     | 
| 
       186 
     | 
    
         
            -
             
     | 
| 
       187 
     | 
    
         
            -
             
     | 
| 
       188 
     | 
    
         
            -
             
     | 
| 
       189 
     | 
    
         
            -
               
     | 
| 
       190 
     | 
    
         
            -
             
     | 
| 
       191 
     | 
    
         
            -
             
     | 
| 
       192 
     | 
    
         
            -
             
     | 
| 
       193 
     | 
    
         
            -
             
     | 
| 
       194 
     | 
    
         
            -
               
     | 
| 
       195 
     | 
    
         
            -
             
     | 
| 
       196 
     | 
    
         
            -
             
     | 
| 
       197 
     | 
    
         
            -
                feed 
     | 
| 
       198 
     | 
    
         
            -
             
     | 
| 
       199 
     | 
    
         
            -
                 
     | 
| 
       200 
     | 
    
         
            -
             
     | 
| 
       201 
     | 
    
         
            -
             
     | 
| 
       202 
     | 
    
         
            -
               
     | 
| 
       203 
     | 
    
         
            -
             
     | 
| 
       204 
     | 
    
         
            -
             
     | 
| 
       205 
     | 
    
         
            -
                feed 
     | 
| 
       206 
     | 
    
         
            -
             
     | 
| 
       207 
     | 
    
         
            -
                 
     | 
| 
       208 
     | 
    
         
            -
             
     | 
| 
       209 
     | 
    
         
            -
             
     | 
| 
       210 
     | 
    
         
            -
               
     | 
| 
       211 
     | 
    
         
            -
             
     | 
| 
       212 
     | 
    
         
            -
             
     | 
| 
       213 
     | 
    
         
            -
                 
     | 
| 
       214 
     | 
    
         
            -
             
     | 
| 
       215 
     | 
    
         
            -
             
     | 
| 
       216 
     | 
    
         
            -
               
     | 
| 
       217 
     | 
    
         
            -
             
     | 
| 
       218 
     | 
    
         
            -
             
     | 
| 
       219 
     | 
    
         
            -
             
     | 
| 
       220 
     | 
    
         
            -
                 
     | 
| 
       221 
     | 
    
         
            -
             
     | 
| 
       222 
     | 
    
         
            -
                Time. 
     | 
| 
       223 
     | 
    
         
            -
             
     | 
| 
       224 
     | 
    
         
            -
             
     | 
| 
       225 
     | 
    
         
            -
             
     | 
| 
       226 
     | 
    
         
            -
               
     | 
| 
       227 
     | 
    
         
            -
             
     | 
| 
       228 
     | 
    
         
            -
             
     | 
| 
       229 
     | 
    
         
            -
                 
     | 
| 
       230 
     | 
    
         
            -
             
     | 
| 
       231 
     | 
    
         
            -
                 
     | 
| 
       232 
     | 
    
         
            -
             
     | 
| 
       233 
     | 
    
         
            -
                 
     | 
| 
       234 
     | 
    
         
            -
             
     | 
| 
       235 
     | 
    
         
            -
             
     | 
| 
       236 
     | 
    
         
            -
               
     | 
| 
       237 
     | 
    
         
            -
             
     | 
| 
       238 
     | 
    
         
            -
             
     | 
| 
       239 
     | 
    
         
            -
             
     | 
| 
       240 
     | 
    
         
            -
                 
     | 
| 
       241 
     | 
    
         
            -
             
     | 
| 
       242 
     | 
    
         
            -
             
     | 
| 
       243 
     | 
    
         
            -
             
     | 
| 
       244 
     | 
    
         
            -
               
     | 
| 
       245 
     | 
    
         
            -
             
     | 
| 
       246 
     | 
    
         
            -
             
     | 
| 
       247 
     | 
    
         
            -
             
     | 
| 
       248 
     | 
    
         
            -
             
     | 
| 
       249 
     | 
    
         
            -
                   
     | 
| 
       250 
     | 
    
         
            -
             
     | 
| 
       251 
     | 
    
         
            -
             
     | 
| 
       252 
     | 
    
         
            -
               
     | 
| 
       253 
     | 
    
         
            -
             
     | 
| 
       254 
     | 
    
         
            -
             
     | 
| 
       255 
     | 
    
         
            -
                 
     | 
| 
       256 
     | 
    
         
            -
             
     | 
| 
       257 
     | 
    
         
            -
             
     | 
| 
       258 
     | 
    
         
            -
               
     | 
| 
       259 
     | 
    
         
            -
             
     | 
| 
       260 
     | 
    
         
            -
             
     | 
| 
       261 
     | 
    
         
            -
                feed 
     | 
| 
       262 
     | 
    
         
            -
             
     | 
| 
       263 
     | 
    
         
            -
             
     | 
| 
       264 
     | 
    
         
            -
             
     | 
| 
       265 
     | 
    
         
            -
             
     | 
| 
       266 
     | 
    
         
            -
            end
         
     | 
| 
       267 
     | 
    
         
            -
             
     | 
| 
      
 1 
     | 
    
         
            +
            $LOAD_PATH.unshift(File.expand_path(File.join(File.dirname(__FILE__), '../lib')))
         
     | 
| 
      
 2 
     | 
    
         
            +
            require 'test/unit'
         
     | 
| 
      
 3 
     | 
    
         
            +
            require 'feed-normalizer'
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            class FeedNormalizerTest < Test::Unit::TestCase
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
              XML_FILES = {}
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
              Fn = FeedNormalizer
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
              data_dir = File.dirname(__FILE__) + '/data'
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
              # Load up the xml files
         
     | 
| 
      
 14 
     | 
    
         
            +
              Dir.open(data_dir).each do |fn|
         
     | 
| 
      
 15 
     | 
    
         
            +
                next unless fn =~ /[.]xml$/
         
     | 
| 
      
 16 
     | 
    
         
            +
                XML_FILES[fn.scan(/(.*)[.]/).to_s.to_sym] = File.read(data_dir + "/#{fn}")
         
     | 
| 
      
 17 
     | 
    
         
            +
              end
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
              def test_basic_parse
         
     | 
| 
      
 20 
     | 
    
         
            +
                assert_kind_of Fn::Feed, FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20])
         
     | 
| 
      
 21 
     | 
    
         
            +
              end
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
              def test_force_parser
         
     | 
| 
      
 24 
     | 
    
         
            +
                assert_kind_of Fn::Feed, FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20],
         
     | 
| 
      
 25 
     | 
    
         
            +
                  :force_parser => Fn::RubyRssParser, :try_others => true)
         
     | 
| 
      
 26 
     | 
    
         
            +
              end
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
              def test_force_parser_exclusive
         
     | 
| 
      
 29 
     | 
    
         
            +
                assert_kind_of Fn::Feed, FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20],
         
     | 
| 
      
 30 
     | 
    
         
            +
                  :force_parser => Fn::RubyRssParser, :try_others => false)
         
     | 
| 
      
 31 
     | 
    
         
            +
              end
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
      
 33 
     | 
    
         
            +
              def test_ruby_rss_parser
         
     | 
| 
      
 34 
     | 
    
         
            +
                assert_kind_of Fn::Feed, FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20],
         
     | 
| 
      
 35 
     | 
    
         
            +
                  :force_parser => Fn::RubyRssParser, :try_others => false)
         
     | 
| 
      
 36 
     | 
    
         
            +
                assert_kind_of Fn::Feed, FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rdf10],
         
     | 
| 
      
 37 
     | 
    
         
            +
                  :force_parser => Fn::RubyRssParser, :try_others => false)
         
     | 
| 
      
 38 
     | 
    
         
            +
              end
         
     | 
| 
      
 39 
     | 
    
         
            +
             
     | 
| 
      
 40 
     | 
    
         
            +
              def test_simple_rss_parser
         
     | 
| 
      
 41 
     | 
    
         
            +
                assert_kind_of Fn::Feed, FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20],
         
     | 
| 
      
 42 
     | 
    
         
            +
                  :force_parser => Fn::SimpleRssParser, :try_others => false)
         
     | 
| 
      
 43 
     | 
    
         
            +
                assert_kind_of Fn::Feed, FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom10],
         
     | 
| 
      
 44 
     | 
    
         
            +
                  :force_parser => Fn::SimpleRssParser, :try_others => false)
         
     | 
| 
      
 45 
     | 
    
         
            +
              end
         
     | 
| 
      
 46 
     | 
    
         
            +
             
     | 
| 
      
 47 
     | 
    
         
            +
              def test_parser_failover_order
         
     | 
| 
      
 48 
     | 
    
         
            +
                assert_equal 'SimpleRSS', FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom10], :force_parser => Fn::RubyRssParser).parser
         
     | 
| 
      
 49 
     | 
    
         
            +
              end
         
     | 
| 
      
 50 
     | 
    
         
            +
             
     | 
| 
      
 51 
     | 
    
         
            +
              def test_force_parser_fail
         
     | 
| 
      
 52 
     | 
    
         
            +
                assert_nil FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom10], :force_parser => Fn::RubyRssParser, :try_others => false)
         
     | 
| 
      
 53 
     | 
    
         
            +
              end
         
     | 
| 
      
 54 
     | 
    
         
            +
             
     | 
| 
      
 55 
     | 
    
         
            +
              def test_all_parsers_fail
         
     | 
| 
      
 56 
     | 
    
         
            +
                assert_nil FeedNormalizer::FeedNormalizer.parse("This isn't RSS or Atom!")
         
     | 
| 
      
 57 
     | 
    
         
            +
              end
         
     | 
| 
      
 58 
     | 
    
         
            +
             
     | 
| 
      
 59 
     | 
    
         
            +
              def test_correct_parser_used
         
     | 
| 
      
 60 
     | 
    
         
            +
                assert_equal 'RSS::Parser', FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20]).parser
         
     | 
| 
      
 61 
     | 
    
         
            +
                assert_equal 'SimpleRSS', FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom10]).parser
         
     | 
| 
      
 62 
     | 
    
         
            +
              end
         
     | 
| 
      
 63 
     | 
    
         
            +
             
     | 
| 
      
 64 
     | 
    
         
            +
              def test_rss
         
     | 
| 
      
 65 
     | 
    
         
            +
                feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20])
         
     | 
| 
      
 66 
     | 
    
         
            +
             
     | 
| 
      
 67 
     | 
    
         
            +
                assert_equal "BBC News | Technology | UK Edition", feed.title
         
     | 
| 
      
 68 
     | 
    
         
            +
                assert_equal ["http://news.bbc.co.uk/go/rss/-/1/hi/technology/default.stm"], feed.urls
         
     | 
| 
      
 69 
     | 
    
         
            +
                assert_equal 15, feed.ttl
         
     | 
| 
      
 70 
     | 
    
         
            +
                assert_equal [6, 7, 8, 9, 10, 11], feed.skip_hours
         
     | 
| 
      
 71 
     | 
    
         
            +
                assert_equal ["Sunday"], feed.skip_days
         
     | 
| 
      
 72 
     | 
    
         
            +
                assert_equal "MP3 player court order overturned", feed.entries.last.title
         
     | 
| 
      
 73 
     | 
    
         
            +
                assert_equal "<b>SanDisk</b> puts its MP3 players back on display at a German electronics show after overturning a court injunction.", feed.entries.last.description
         
     | 
| 
      
 74 
     | 
    
         
            +
                assert_match(/test\d/, feed.entries.last.content)
         
     | 
| 
      
 75 
     | 
    
         
            +
                assert_instance_of Time, feed.entries.last.date_published
         
     | 
| 
      
 76 
     | 
    
         
            +
              end
         
     | 
| 
      
 77 
     | 
    
         
            +
             
     | 
| 
      
 78 
     | 
    
         
            +
              def test_simplerss
         
     | 
| 
      
 79 
     | 
    
         
            +
                feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom10])
         
     | 
| 
      
 80 
     | 
    
         
            +
             
     | 
| 
      
 81 
     | 
    
         
            +
                assert_equal "~:caboose", feed.title
         
     | 
| 
      
 82 
     | 
    
         
            +
                assert_equal "http://habtm.com/xml/atom10/feed.xml", feed.url
         
     | 
| 
      
 83 
     | 
    
         
            +
                assert_equal nil, feed.ttl
         
     | 
| 
      
 84 
     | 
    
         
            +
                assert_equal [], feed.skip_hours
         
     | 
| 
      
 85 
     | 
    
         
            +
                assert_equal [], feed.skip_days
         
     | 
| 
      
 86 
     | 
    
         
            +
                assert_equal "Starfish - Easy Distribution of Site Maintenance", feed.entries.last.title
         
     | 
| 
      
 87 
     | 
    
         
            +
                assert_equal "urn:uuid:6c028f36-f87a-4f53-b7e3-1f943d2341f0", feed.entries.last.id
         
     | 
| 
      
 88 
     | 
    
         
            +
             
     | 
| 
      
 89 
     | 
    
         
            +
                assert !feed.entries.last.description.include?("google fame")
         
     | 
| 
      
 90 
     | 
    
         
            +
                assert feed.entries.last.content.include?("google fame")
         
     | 
| 
      
 91 
     | 
    
         
            +
              end
         
     | 
| 
      
 92 
     | 
    
         
            +
             
     | 
| 
      
 93 
     | 
    
         
            +
              def test_sanity_check
         
     | 
| 
      
 94 
     | 
    
         
            +
                XML_FILES.keys.each do |xml_file|
         
     | 
| 
      
 95 
     | 
    
         
            +
                  feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[xml_file])
         
     | 
| 
      
 96 
     | 
    
         
            +
             
     | 
| 
      
 97 
     | 
    
         
            +
                  assert [feed.parser, feed.title, feed.url, feed.entries.first.url].collect{|e| e.is_a?(String)}.all?, "Not everything was a String in #{xml_file}"
         
     | 
| 
      
 98 
     | 
    
         
            +
                end
         
     | 
| 
      
 99 
     | 
    
         
            +
              end
         
     | 
| 
      
 100 
     | 
    
         
            +
             
     | 
| 
      
 101 
     | 
    
         
            +
              def test_feed_equality
         
     | 
| 
      
 102 
     | 
    
         
            +
                assert_equal FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20]), FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20])
         
     | 
| 
      
 103 
     | 
    
         
            +
                assert_equal FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom10]), FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom10])
         
     | 
| 
      
 104 
     | 
    
         
            +
                assert_not_equal FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom03]), FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom10])
         
     | 
| 
      
 105 
     | 
    
         
            +
                assert_not_equal FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20]), FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom10])
         
     | 
| 
      
 106 
     | 
    
         
            +
                assert_not_equal FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20]), FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20diff])
         
     | 
| 
      
 107 
     | 
    
         
            +
              end
         
     | 
| 
      
 108 
     | 
    
         
            +
             
     | 
| 
      
 109 
     | 
    
         
            +
              def test_feed_diff
         
     | 
| 
      
 110 
     | 
    
         
            +
                feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20])
         
     | 
| 
      
 111 
     | 
    
         
            +
             
     | 
| 
      
 112 
     | 
    
         
            +
                diff = feed.diff(FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20diff]))
         
     | 
| 
      
 113 
     | 
    
         
            +
                diff_short = feed.diff(FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20diff_short]))
         
     | 
| 
      
 114 
     | 
    
         
            +
                no_diff = feed.diff(feed)
         
     | 
| 
      
 115 
     | 
    
         
            +
             
     | 
| 
      
 116 
     | 
    
         
            +
                assert diff.keys.all? {|key| [:title, :items].include?(key)}
         
     | 
| 
      
 117 
     | 
    
         
            +
                assert_equal 3, diff[:items].size
         
     | 
| 
      
 118 
     | 
    
         
            +
             
     | 
| 
      
 119 
     | 
    
         
            +
                assert diff_short.keys.all? {|key| [:title, :items].include?(key)}
         
     | 
| 
      
 120 
     | 
    
         
            +
                assert_equal [3,2], diff_short[:items]
         
     | 
| 
      
 121 
     | 
    
         
            +
             
     | 
| 
      
 122 
     | 
    
         
            +
                assert no_diff.empty?
         
     | 
| 
      
 123 
     | 
    
         
            +
              end
         
     | 
| 
      
 124 
     | 
    
         
            +
             
     | 
| 
      
 125 
     | 
    
         
            +
              def test_marshal
         
     | 
| 
      
 126 
     | 
    
         
            +
                feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20])
         
     | 
| 
      
 127 
     | 
    
         
            +
             
     | 
| 
      
 128 
     | 
    
         
            +
                assert_nothing_raised { Marshal.load(Marshal.dump(feed)) }
         
     | 
| 
      
 129 
     | 
    
         
            +
              end
         
     | 
| 
      
 130 
     | 
    
         
            +
             
     | 
| 
      
 131 
     | 
    
         
            +
              def test_yaml
         
     | 
| 
      
 132 
     | 
    
         
            +
                feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20])
         
     | 
| 
      
 133 
     | 
    
         
            +
                assert_nothing_raised { YAML.load(YAML.dump(feed)) }
         
     | 
| 
      
 134 
     | 
    
         
            +
              end
         
     | 
| 
      
 135 
     | 
    
         
            +
             
     | 
| 
      
 136 
     | 
    
         
            +
              def test_method_missing
         
     | 
| 
      
 137 
     | 
    
         
            +
                assert_raise(NoMethodError) { Fn::Feed.new(nil).nonexistant }
         
     | 
| 
      
 138 
     | 
    
         
            +
              end
         
     | 
| 
      
 139 
     | 
    
         
            +
             
     | 
| 
      
 140 
     | 
    
         
            +
              def test_clean
         
     | 
| 
      
 141 
     | 
    
         
            +
                feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom10])
         
     | 
| 
      
 142 
     | 
    
         
            +
             
     | 
| 
      
 143 
     | 
    
         
            +
                assert_match(/<plaintext>/, feed.entries.first.content)
         
     | 
| 
      
 144 
     | 
    
         
            +
                assert_match(/<plaintext>/, feed.entries.first.description)
         
     | 
| 
      
 145 
     | 
    
         
            +
                feed.clean!
         
     | 
| 
      
 146 
     | 
    
         
            +
                assert_no_match(/<plaintext>/, feed.entries.first.content)
         
     | 
| 
      
 147 
     | 
    
         
            +
                assert_no_match(/<plaintext>/, feed.entries.first.description)
         
     | 
| 
      
 148 
     | 
    
         
            +
              end
         
     | 
| 
      
 149 
     | 
    
         
            +
             
     | 
| 
      
 150 
     | 
    
         
            +
              def test_malformed_feed
         
     | 
| 
      
 151 
     | 
    
         
            +
                assert_nothing_raised { FeedNormalizer::FeedNormalizer.parse('<feed></feed>') }
         
     | 
| 
      
 152 
     | 
    
         
            +
              end
         
     | 
| 
      
 153 
     | 
    
         
            +
             
     | 
| 
      
 154 
     | 
    
         
            +
              def test_dublin_core_date_ruby_rss
         
     | 
| 
      
 155 
     | 
    
         
            +
                feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rdf10], :force_parser => Fn::RubyRssParser, :try_others => false)
         
     | 
| 
      
 156 
     | 
    
         
            +
                assert_instance_of Time, feed.entries.first.date_published
         
     | 
| 
      
 157 
     | 
    
         
            +
              end
         
     | 
| 
      
 158 
     | 
    
         
            +
             
     | 
| 
      
 159 
     | 
    
         
            +
              def test_dublin_core_date_simple_rss
         
     | 
| 
      
 160 
     | 
    
         
            +
                feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rdf10], :force_parser => Fn::SimpleRssParser, :try_others => false)
         
     | 
| 
      
 161 
     | 
    
         
            +
                assert_instance_of Time, feed.entries.first.date_published
         
     | 
| 
      
 162 
     | 
    
         
            +
              end
         
     | 
| 
      
 163 
     | 
    
         
            +
             
     | 
| 
      
 164 
     | 
    
         
            +
              def test_dublin_core_creator_ruby_rss
         
     | 
| 
      
 165 
     | 
    
         
            +
                feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rdf10], :force_parser => Fn::RubyRssParser, :try_others => false)
         
     | 
| 
      
 166 
     | 
    
         
            +
                assert_equal 'Jeff Hecht', feed.entries.last.author
         
     | 
| 
      
 167 
     | 
    
         
            +
              end
         
     | 
| 
      
 168 
     | 
    
         
            +
             
     | 
| 
      
 169 
     | 
    
         
            +
              def test_dublin_core_creator_simple_rss
         
     | 
| 
      
 170 
     | 
    
         
            +
                feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rdf10], :force_parser => Fn::SimpleRssParser, :try_others => false)
         
     | 
| 
      
 171 
     | 
    
         
            +
                assert_equal 'Jeff Hecht', feed.entries.last.author
         
     | 
| 
      
 172 
     | 
    
         
            +
              end
         
     | 
| 
      
 173 
     | 
    
         
            +
             
     | 
| 
      
 174 
     | 
    
         
            +
              def test_entry_categories_ruby_rss
         
     | 
| 
      
 175 
     | 
    
         
            +
                feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20], :force_parser => Fn::RubyRssParser, :try_others => false)
         
     | 
| 
      
 176 
     | 
    
         
            +
                assert_equal [['Click'],['Technology'],[]], feed.items.collect {|i|i.categories}
         
     | 
| 
      
 177 
     | 
    
         
            +
              end
         
     | 
| 
      
 178 
     | 
    
         
            +
             
     | 
| 
      
 179 
     | 
    
         
            +
              def test_entry_categories_simple_rss
         
     | 
| 
      
 180 
     | 
    
         
            +
                feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20], :force_parser => Fn::SimpleRssParser, :try_others => false)
         
     | 
| 
      
 181 
     | 
    
         
            +
                assert_equal [['Click'],['Technology'],[]], feed.items.collect {|i|i.categories}
         
     | 
| 
      
 182 
     | 
    
         
            +
              end
         
     | 
| 
      
 183 
     | 
    
         
            +
             
     | 
| 
      
 184 
     | 
    
         
            +
              def test_loose_categories_ruby_rss
         
     | 
| 
      
 185 
     | 
    
         
            +
                feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20], :force_parser => Fn::RubyRssParser, :try_others => false, :loose => true)
         
     | 
| 
      
 186 
     | 
    
         
            +
                assert_equal [1,2,0], feed.entries.collect{|e|e.categories.size}
         
     | 
| 
      
 187 
     | 
    
         
            +
              end
         
     | 
| 
      
 188 
     | 
    
         
            +
             
     | 
| 
      
 189 
     | 
    
         
            +
              def test_loose_categories_simple_rss
         
     | 
| 
      
 190 
     | 
    
         
            +
                feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20], :force_parser => Fn::SimpleRssParser, :try_others => false, :loose => true)
         
     | 
| 
      
 191 
     | 
    
         
            +
                assert_equal [1,1,0], feed.entries.collect{|e|e.categories.size}
         
     | 
| 
      
 192 
     | 
    
         
            +
              end
         
     | 
| 
      
 193 
     | 
    
         
            +
             
     | 
| 
      
 194 
     | 
    
         
            +
              def test_content_encoded_simple_rss
         
     | 
| 
      
 195 
     | 
    
         
            +
                feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20], :force_parser => Fn::SimpleRssParser, :try_others => false)
         
     | 
| 
      
 196 
     | 
    
         
            +
             
     | 
| 
      
 197 
     | 
    
         
            +
                feed.entries.each_with_index do |e, i|
         
     | 
| 
      
 198 
     | 
    
         
            +
                  assert_match(/\s*<p>test#{i+1}<\/p>\s*/, e.content)
         
     | 
| 
      
 199 
     | 
    
         
            +
                end
         
     | 
| 
      
 200 
     | 
    
         
            +
              end
         
     | 
| 
      
 201 
     | 
    
         
            +
             
     | 
| 
      
 202 
     | 
    
         
            +
              def test_content_encoded_ruby_rss
         
     | 
| 
      
 203 
     | 
    
         
            +
                feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20], :force_parser => Fn::RubyRssParser, :try_others => false)
         
     | 
| 
      
 204 
     | 
    
         
            +
             
     | 
| 
      
 205 
     | 
    
         
            +
                feed.entries.each_with_index do |e, i|
         
     | 
| 
      
 206 
     | 
    
         
            +
                  assert_match(/\s*<p>test#{i+1}<\/p>\s*/, e.content)
         
     | 
| 
      
 207 
     | 
    
         
            +
                end
         
     | 
| 
      
 208 
     | 
    
         
            +
              end
         
     | 
| 
      
 209 
     | 
    
         
            +
             
     | 
| 
      
 210 
     | 
    
         
            +
              def test_atom_content_contains_pluses
         
     | 
| 
      
 211 
     | 
    
         
            +
                feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom10], :force_parser => Fn::SimpleRssParser, :try_others => false)
         
     | 
| 
      
 212 
     | 
    
         
            +
             
     | 
| 
      
 213 
     | 
    
         
            +
                assert_equal 2, feed.entries.last.content.scan(/\+/).size
         
     | 
| 
      
 214 
     | 
    
         
            +
              end
         
     | 
| 
      
 215 
     | 
    
         
            +
             
     | 
| 
      
 216 
     | 
    
         
            +
              # http://code.google.com/p/feed-normalizer/issues/detail?id=13
         
     | 
| 
      
 217 
     | 
    
         
            +
              def test_times_are_reparsed
         
     | 
| 
      
 218 
     | 
    
         
            +
                feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20], :force_parser => Fn::RubyRssParser, :try_others => false)
         
     | 
| 
      
 219 
     | 
    
         
            +
             
     | 
| 
      
 220 
     | 
    
         
            +
                Time.class_eval "alias :old_to_s :to_s; def to_s(x=1); old_to_s; end"
         
     | 
| 
      
 221 
     | 
    
         
            +
             
     | 
| 
      
 222 
     | 
    
         
            +
                assert_equal Time.parse("Sat Sep 09 10:57:06 -0400 2006").to_s, feed.last_updated.to_s(:foo)
         
     | 
| 
      
 223 
     | 
    
         
            +
                assert_equal Time.parse("Sat Sep 09 08:45:35 -0400 2006").to_s, feed.entries.first.date_published.to_s(:foo)
         
     | 
| 
      
 224 
     | 
    
         
            +
              end
         
     | 
| 
      
 225 
     | 
    
         
            +
             
     | 
| 
      
 226 
     | 
    
         
            +
              def test_atom03_has_issued
         
     | 
| 
      
 227 
     | 
    
         
            +
                SimpleRSS.class_eval "@@item_tags.delete(:issued)"
         
     | 
| 
      
 228 
     | 
    
         
            +
                feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom03], :force_parser => Fn::SimpleRssParser, :try_others => false)
         
     | 
| 
      
 229 
     | 
    
         
            +
                assert_nil feed.entries.first.date_published
         
     | 
| 
      
 230 
     | 
    
         
            +
             
     | 
| 
      
 231 
     | 
    
         
            +
                SimpleRSS.class_eval "@@item_tags << :issued"
         
     | 
| 
      
 232 
     | 
    
         
            +
                feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom03], :force_parser => Fn::SimpleRssParser, :try_others => false)
         
     | 
| 
      
 233 
     | 
    
         
            +
                assert_equal "Tue Aug 29 02:31:03 UTC 2006", feed.entries.first.date_published.to_s
         
     | 
| 
      
 234 
     | 
    
         
            +
              end
         
     | 
| 
      
 235 
     | 
    
         
            +
             
     | 
| 
      
 236 
     | 
    
         
            +
              def test_html_should_be_escaped_by_default
         
     | 
| 
      
 237 
     | 
    
         
            +
                feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20], :force_parser => Fn::RubyRssParser, :try_others => false)
         
     | 
| 
      
 238 
     | 
    
         
            +
                assert_match "<b>SanDisk</b>", feed.items.last.description
         
     | 
| 
      
 239 
     | 
    
         
            +
             
     | 
| 
      
 240 
     | 
    
         
            +
                feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20], :force_parser => Fn::SimpleRssParser, :try_others => false)
         
     | 
| 
      
 241 
     | 
    
         
            +
                assert_match "<b>SanDisk</b>", feed.items.last.description
         
     | 
| 
      
 242 
     | 
    
         
            +
              end
         
     | 
| 
      
 243 
     | 
    
         
            +
             
     | 
| 
      
 244 
     | 
    
         
            +
              def test_relative_links_and_images_should_be_rewritten_with_url_base
         
     | 
| 
      
 245 
     | 
    
         
            +
                feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom03])
         
     | 
| 
      
 246 
     | 
    
         
            +
                assert_match '<a href="http://www.cheapstingybargains.com/link/tplclick?lid=41000000011334249&pubid=21000000000053626"' + 
         
     | 
| 
      
 247 
     | 
    
         
            +
                  ' target=_"blank"><img  src="http://www.cheapstingybargains.com/assets/images/product/productDetail/9990000058546711.jpg"' + 
         
     | 
| 
      
 248 
     | 
    
         
            +
                  ' width="150" height="150" border="0" style="float: right; margin: 0px 0px 5px 5px;" /></a>',
         
     | 
| 
      
 249 
     | 
    
         
            +
                  feed.items.first.content
         
     | 
| 
      
 250 
     | 
    
         
            +
              end
         
     | 
| 
      
 251 
     | 
    
         
            +
             
     | 
| 
      
 252 
     | 
    
         
            +
              def test_last_updated_simple_rss
         
     | 
| 
      
 253 
     | 
    
         
            +
                feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom10], :force_parser => Fn::SimpleRssParser, :try_others => false)
         
     | 
| 
      
 254 
     | 
    
         
            +
             
     | 
| 
      
 255 
     | 
    
         
            +
                assert_equal Time.parse("Wed Aug 16 09:59:44 -0700 2006"), feed.entries.first.last_updated
         
     | 
| 
      
 256 
     | 
    
         
            +
              end
         
     | 
| 
      
 257 
     | 
    
         
            +
             
     | 
| 
      
 258 
     | 
    
         
            +
              def test_last_updated_ruby_rss
         
     | 
| 
      
 259 
     | 
    
         
            +
                feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20], :force_parser => Fn::RubyRssParser, :try_others => false)
         
     | 
| 
      
 260 
     | 
    
         
            +
             
     | 
| 
      
 261 
     | 
    
         
            +
                assert_equal feed.entries.first.date_published, feed.entries.first.last_updated
         
     | 
| 
      
 262 
     | 
    
         
            +
              end
         
     | 
| 
      
 263 
     | 
    
         
            +
             
     | 
| 
      
 264 
     | 
    
         
            +
            end
         
     | 
| 
      
 265 
     | 
    
         
            +
             
     |