raakt 0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. data/lib/raakt.rb +454 -0
  2. data/tests/empty.htm +1 -0
  3. data/tests/emptytitledoc.htm +8 -0
  4. data/tests/fielddoc1.htm +2 -0
  5. data/tests/fielddoc2.htm +11 -0
  6. data/tests/fielddoc3.htm +14 -0
  7. data/tests/flickerdoc1.htm +0 -0
  8. data/tests/framedoc1.htm +22 -0
  9. data/tests/framedoc2.htm +8 -0
  10. data/tests/full_google.htm +17 -0
  11. data/tests/headingsdoc1.htm +17 -0
  12. data/tests/headingsdoc2.htm +14 -0
  13. data/tests/headingsdoc3.htm +6 -0
  14. data/tests/headingsdoc4.htm +9 -0
  15. data/tests/headingsdoc5.htm +9 -0
  16. data/tests/headingsdoc6.htm +6 -0
  17. data/tests/headingsdoc7.htm +8 -0
  18. data/tests/headingsdoc8.htm +12 -0
  19. data/tests/headingsdoc9.htm +20 -0
  20. data/tests/imagedoc1.htm +8 -0
  21. data/tests/imagedoc2.htm +1 -0
  22. data/tests/imagedoc3.htm +11 -0
  23. data/tests/imagedoc4.htm +7 -0
  24. data/tests/invalidelements1.htm +18 -0
  25. data/tests/invalidhtmldoc1.htm +10 -0
  26. data/tests/invalidhtmldoc2.htm +20 -0
  27. data/tests/invalidxhtmldoc1.htm +17 -0
  28. data/tests/linkdoc1.htm +18 -0
  29. data/tests/linkdoc2.htm +12 -0
  30. data/tests/linkdoc3.htm +16 -0
  31. data/tests/linkdoc4.htm +10 -0
  32. data/tests/metarefreshdoc1.htm +10 -0
  33. data/tests/metarefreshdoc2.htm +14 -0
  34. data/tests/metarefreshdoc3.htm +10 -0
  35. data/tests/nestedcomment.htm +7 -0
  36. data/tests/newlinetext.txt +3 -0
  37. data/tests/raakt_test.rb +224 -0
  38. data/tests/scriptdoc1.htm +15 -0
  39. data/tests/scriptdoc2.htm +10 -0
  40. data/tests/tabledoc1.htm +5 -0
  41. data/tests/tabledoc2.htm +9 -0
  42. data/tests/tabledoc3.htm +6 -0
  43. data/tests/tabledoc4.htm +17 -0
  44. data/tests/tabledoc5.htm +11 -0
  45. data/tests/tabledoc6.htm +11 -0
  46. data/tests/tablelayoutdoc.htm +16 -0
  47. data/tests/test_helper.rb +21 -0
  48. data/tests/xhtmldoc1.htm +14 -0
  49. metadata +100 -0
@@ -0,0 +1 @@
1
+ <empty>Blank document</empty>
@@ -0,0 +1,8 @@
1
+ <html lang="en">
2
+ <head>
3
+ <title></title>
4
+ </head>
5
+ <body>
6
+ <p>This is a document with en amty title element.</p>
7
+ </body>
8
+ </html>
@@ -0,0 +1,2 @@
1
+ <label for="myid">My label</label>
2
+ <input type="text" id="myid" />
@@ -0,0 +1,11 @@
1
+ <html>
2
+ <body>
3
+ <h3>First heading</h3>
4
+ <label for="textid">My label</label>
5
+ <p id="pid">This is a minimal <a href="http://www.w3.org/TR/xhtml1/">image document.</a>
6
+ <input type="text" id="textid" />
7
+ <input type="hidden" id="hiddenid"/>
8
+ <p>This is a minimal <a href="http://www.w3.org/TR/xhtml1/">image document.</a>
9
+ <INPUT TYPE='text' ID='myid'>
10
+ </body>
11
+ </html>
@@ -0,0 +1,14 @@
1
+ <LABEL for='myid' lang='en'>My input
2
+ </LABEL>
3
+ <input
4
+ type="text"
5
+ id="myid"
6
+ >
7
+ <textarea>value</textarea>
8
+ <label for="selectid">
9
+ My select
10
+ </label>
11
+ <select lang='en'
12
+ id='selectid'>
13
+ <option>My value</opiton>
14
+ </select>
File without changes
@@ -0,0 +1,22 @@
1
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN"
2
+ "http://www.w3.org/TR/html4/frameset.dtd">
3
+ <HTML>
4
+ <HEAD>
5
+ <TITLE>A simple frameset document</TITLE>
6
+ </HEAD>
7
+ <FRAMESET cols="20%, 80%">
8
+ <FRAMESET rows="100, 200">
9
+ <FRAME src="contents_of_frame1.html">
10
+ <FRAME src="contents_of_frame2.gif">
11
+ </FRAMESET>
12
+ <FRAME src="contents_of_frame3.html">
13
+ <NOFRAMES>
14
+ <P>This frameset document contains:
15
+ <UL>
16
+ <LI><A href="contents_of_frame1.html">Some neat contents</A>
17
+ <LI><IMG src="contents_of_frame2.gif" alt="A neat image">
18
+ <LI><A href="contents_of_frame3.html">Some other neat contents</A>
19
+ </UL>
20
+ </NOFRAMES>
21
+ </FRAMESET>
22
+ </HTML>
@@ -0,0 +1,8 @@
1
+ <frameset
2
+ cols="20%, 80%">
3
+ <FRAMESET rows="100, 200">
4
+ <FRAME src="contents_of_frame1.html" title="menu">
5
+ <FRAME src="contents_of_frame2.gif" title="content">
6
+ </FRAMESET>
7
+ </frameset>
8
+ </HTML>
@@ -0,0 +1,17 @@
1
+ <html><head><meta http-equiv="content-type" content="text/html; charset=UTF-8"><title>Google</title><style><!--
2
+ body,td,a,p,.h{font-family:arial,sans-serif;}
3
+ .h{font-size: 20px;}
4
+ .q{color:#0000cc;}
5
+ -->
6
+ </style>
7
+ <script>
8
+ <!--
9
+ function sf(){document.f.q.focus();}
10
+ function asq(event,el,oi,cad,ct,cd,sg){if(window.XMLHttpRequest){if(el.handledFirstTime){el.handledFirstTime=false;return false;}el.handledFirstTime=true;var e = window.encodeURIComponent ? encodeURIComponent : escape;var oi_param="";var cad_param="";if (oi) oi_param="&oi="+e(oi);if (cad) cad_param="&cad="+e(cad);var x=new XMLHttpRequest();x.open("GET","/url?sa=T"+oi_param+cad_param+"&ct="+e(ct)+"&cd="+e(cd)+"&url="+e(el.href).replace(/\+/g,"%2B")+"&ei="+sg,true);var m=event.altKey||event.metaKey;if(!m){x.onreadystatechange=function(){if(x.readyState==4){clearTimeout(timeoutid);el.dispatchEvent(event);}};var timeoutid=setTimeout(function(){x.abort();el.dispatchEvent(event);},2000);}x.send(null);return m;}return true;}
11
+ // -->
12
+ </script>
13
+ </head><body bgcolor=#ffffff text=#000000 link=#0000cc vlink=#551a8b alink=#ff0000 onLoad=sf() topmargin=3 marginheight=3><center><table border=0 cellspacing=0 cellpadding=0 width=100%><tr><td align=right nowrap><font size=-1><b>...</b>&nbsp;|&nbsp;<a href="/url?sa=p&pref=ig&pval=3&q=http://www.google.com/" onmousedown="return asq(event,this,'promos','hppphou:def','pro','1','&sig2=')">Personalized Home</a>&nbsp;|&nbsp;<a href="/searchhistory/?hl=en">Search History</a>&nbsp;|&nbsp;<a href="https://www.google.com/accounts/ManageAccount">My Account</a>&nbsp;|&nbsp;<a href="http://www.google.com/accounts/Logout?continue=http://www.google.com/">Sign out</a></font></td></tr><tr height=4><td><img alt="" width=1 height=1></td></tr></table><img src="/intl/en/images/logo.gif" width=276 height=110 alt="Google"><br><br>
14
+ <form action=/search name=f><script><!--
15
+ function qs(el) {if (window.RegExp && window.encodeURIComponent) {var ue=el.href;var qe=encodeURIComponent(document.f.q.value);if(ue.indexOf("q=")!=-1){el.href=ue.replace(new RegExp("q=[^&$]*"),"q="+qe);}else{el.href=ue+"&q="+qe;}}return 1;}
16
+ // -->
17
+ </script><table border=0 cellspacing=0 cellpadding=4><tr><td nowrap><font size=-1><b>Web</b>&nbsp;&nbsp;&nbsp;&nbsp;<a id=1a class=q href="/imghp?hl=en&tab=wi" onClick="return qs(this);">Images</a>&nbsp;&nbsp;&nbsp;&nbsp;<a id=2a class=q href="http://groups.google.com/grphp?hl=en&tab=wg" onClick="return qs(this);">Groups</a>&nbsp;&nbsp;&nbsp;&nbsp;<a id=4a class=q href="http://news.google.com/nwshp?hl=en&tab=wn" onClick="return qs(this);">News</a>&nbsp;&nbsp;&nbsp;&nbsp;<a id=5a class=q href="http://froogle.google.com/frghp?hl=en&tab=wf" onClick="return qs(this);">Froogle</a>&nbsp;&nbsp;&nbsp;&nbsp;<a id=7a class=q href="/maphp?hl=en&tab=wl" onClick="return qs(this);">Maps</a>&nbsp;&nbsp;&nbsp;&nbsp;<b><a href="/intl/en/options/" class=q>more&nbsp;&raquo;</a></b></font></td></tr></table><table cellspacing=0 cellpadding=0><tr><td width=25%>&nbsp;</td><td align=center><input type=hidden name=hl value=en><input maxlength=2048 size=55 name=q value="" title="Google Search"><br><input type=submit value="Google Search" name=btnG><input type=submit value="I'm Feeling Lucky" name=btnI></td><td valign=top nowrap width=25%><font size=-2>&nbsp;&nbsp;<a href=/advanced_search?hl=en>Advanced Search</a><br>&nbsp;&nbsp;<a href=/preferences?hl=en>Preferences</a><br>&nbsp;&nbsp;<a href=/language_tools?hl=en>Language Tools</a></font></td></tr></table></form><br><br><font size=-1><a href="/intl/en/ads/">Advertising&nbsp;Programs</a> - <a href=/intl/en/services/>Business Solutions</a> - <a href=/intl/en/about.html>About Google</a> - <b><a href=http://www.google.se/>Go to Google Sverige</a></b></font><p><font size=-2>&copy;2006 Google</font></p></center></body></html>
@@ -0,0 +1,17 @@
1
+ <!DOCTYPE html
2
+ PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
3
+ "DTD/xhtml1-strict.dtd">
4
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
5
+ <head>
6
+ <title>This is the title</title>
7
+ </head>
8
+ <body>
9
+ <h1>First h1 heading</h1>
10
+ <p>This is a minimal <a href="http://www.w3.org/TR/xhtml1/">XHTML 1.0</a>
11
+ document.</p>
12
+ <h1>Second h1
13
+ heading</h1>
14
+ <table><tr><td>Test table</td></tr></table>
15
+ <H1>Third h1 heading</H1>
16
+ </body>
17
+ </html>
@@ -0,0 +1,14 @@
1
+ <html>
2
+ <head>
3
+ <title>This is the title</title>
4
+ </head>
5
+ <body>
6
+ <h1>First h1 heading</h1>
7
+ <p>This is a minimal <a href="http://www.w3.org/TR/xhtml1/">XHTML 1.0</a>
8
+ document.</p>
9
+ <h2>First h2
10
+ heading</h2>
11
+ <table><tr><td>Test table</td></tr></table>
12
+ <H3>First h3 heading</H3>
13
+ </body>
14
+ </html>
@@ -0,0 +1,6 @@
1
+ <html>
2
+ <body>
3
+ <h3>First heading</h3>
4
+ <p>This is a minimal <a href="http://www.w3.org/TR/xhtml1/">XHTML 1.0</a>
5
+ </body>
6
+ </html>
@@ -0,0 +1,9 @@
1
+ <html>
2
+ <body>
3
+ <h1>First h1 heading</h1>
4
+ <p>This is a minimal <a href="http://www.w3.org/TR/xhtml1/">XHTML 1.0</a>
5
+ document.</p>
6
+ <table><tr><td>Test table</td></tr></table>
7
+ <H3>First h3 heading</H3>
8
+ </body>
9
+ </html>
@@ -0,0 +1,9 @@
1
+ <html>
2
+ <body>
3
+ <h2>First h1 heading</h2>
4
+ <p>This is a minimal <a href="http://www.w3.org/TR/xhtml1/">XHTML 1.0</a>
5
+ document.</p>
6
+ <table><tr><td>Test table</td></tr></table>
7
+ <H4>First h3 heading</H4>
8
+ </body>
9
+ </html>
@@ -0,0 +1,6 @@
1
+ <h1>h1 heading</h1>
2
+ <h2>h2 heading</h2>
3
+ <h1>h1 heading 2</h1>
4
+ <h2>h2 heading 2</h2>
5
+ <h3>h3 heading</h3>
6
+ <h1>h1 heading 3</h1>
@@ -0,0 +1,8 @@
1
+ <html>
2
+ <body>
3
+ <h2></h2>
4
+ <p>This is a minimal <a href="http://www.w3.org/TR/xhtml1/">XHTML 1.0</a>
5
+ document.</p>
6
+ <table><tr><td>Test table</td></tr></table>
7
+ </body>
8
+ </html>
@@ -0,0 +1,12 @@
1
+ <script type="text/javascript">
2
+ function printpopup(url){
3
+ MyWin = window.open("","","scrollbars=no,resizable=yes,toolbar=no,location=no,directories=no,status=no,menubar=no,width=100,height=100");
4
+ with(MyWin.document){
5
+ open();
6
+ write("<html>\n<head><scr"+"ipt>awidth=(document.layers)?0:8;awidth+=(document.all)?4:0;aheight=(document.layers)?0:29;</scr"+"ipt>\n<title>This is not the title</title>\n</head>\n<body onLoad=\"window.resizeTo(document.images[0].width+awidth,document.images[0].height+aheight)\"marginwidth=0 marginheight=0 leftmargin=0 topmargin=0 rightmargin=0 style=\"overflow:hidden;\">\n");
7
+ write("<h1>This is not a document heading</h1>\n");
8
+ write("</body>\n</html>\n");
9
+ close();
10
+ }
11
+ }
12
+ </script>
@@ -0,0 +1,20 @@
1
+ <html lang="en">
2
+ <head>
3
+ <title>Sample</title>
4
+ </head>
5
+ <body>
6
+ <p>This is a document with an empty title element.</p>
7
+ <script type="text/javascript">
8
+ function printpopup(url){
9
+ MyWin = window.open("","","");
10
+ with(MyWin.document){
11
+ open();
12
+ write("<html><head><title>This is not the title</title>\n</head>\n<body>\n");
13
+ write("<h1>This is not a document heading</h1>\n");
14
+ write("</body>\n</html>\n");
15
+ close();
16
+ }
17
+ }
18
+ </script>
19
+ </body>
20
+ </html>
@@ -0,0 +1,8 @@
1
+ <img
2
+ src="image.png" />
3
+ <img src="image.png" alt="">
4
+ <img src=image.png alt=mytext>
5
+ <IMG
6
+ SRC="image.png"
7
+ ALT="mytext"
8
+ />
@@ -0,0 +1 @@
1
+ <p><img src="image.png" alt=''></p>
@@ -0,0 +1,11 @@
1
+ <html>
2
+ <body>
3
+ <h3>First heading</h3>
4
+ <p>This is a minimal <a href="http://www.w3.org/TR/xhtml1/">image document.</a>
5
+ <img src="noimagealt.png" border="0">
6
+ <p>This is a minimal <a href="http://www.w3.org/TR/xhtml1/">image document.</a>
7
+ <img src="/noimagealt2.png">
8
+ <p>This is a minimal <a href="http://www.w3.org/TR/xhtml1/">image document.</a>
9
+ <img src="../folder/noimagealt3.png" border="0">
10
+ </body>
11
+ </html>
@@ -0,0 +1,7 @@
1
+ <html>
2
+ <body>
3
+ <h3>First heading</h3>
4
+ <p>This is a minimal <a href="http://www.w3.org/TR/xhtml1/">image document.</a>
5
+ <IMG src="noimagealt.png" border="0">
6
+ </body>
7
+ </html>
@@ -0,0 +1,18 @@
1
+ <html lang="en">
2
+ <head>
3
+ <title>This is the title</title>
4
+ </head>
5
+ <body>
6
+ <h1>First h1 heading</h1>
7
+ <p>This is a <b>minimal</b> <a href="http://www.w3.org/TR/xhtml1/">XHTML 1.0</a>
8
+ document.</p>
9
+ <p>This is a <font style="bold">bold</font> <a href="http://www.w3.org/TR/xhtml1/">XHTML 1.0</a>
10
+ document.</p>
11
+ <h1>Second h1
12
+ heading</h1>
13
+ <blink>No blinking text!</blink>
14
+ <table><tr><td>Test table</td></tr></table>
15
+ <H1>Third h1 heading</H1>
16
+ <marquee>Marquees are so nineties!</marquee>
17
+ </body>
18
+ </html>
@@ -0,0 +1,10 @@
1
+ <HTML>
2
+ <head>
3
+ <TITLE>This is the title</title>
4
+ </head>
5
+ <body>
6
+ <H1>Heading</H1>
7
+ <p>S small document</p>
8
+ <!-- comment -->
9
+ </body>
10
+ </HTML>
@@ -0,0 +1,20 @@
1
+ <HTML>
2
+ <head>
3
+ <TITLE>This is the
4
+ title
5
+
6
+ </title>
7
+ <link rel="schema.DC" href="http://purl.org/DC/elements/1.0">
8
+ </head>
9
+ <body>
10
+
11
+ <!-- h3>Heading 3
12
+ </h3>
13
+ <h1>Heading 1</h1 -->
14
+ <p>S small document</p>
15
+ <p> more content </p>
16
+ <!-- second comment
17
+
18
+ -->
19
+ </body>
20
+ </HTML>
@@ -0,0 +1,17 @@
1
+ <!DOCTYPE html
2
+ PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
3
+ "DTD/xhtml1-strict.dtd">
4
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
5
+ <head>
6
+ <?xml version="1.0" encoding="UTF-8"?>
7
+ <title>This is the title</title>
8
+ <link rel="schema.DC" href="http://purl.org/DC/elements/1.0/" />
9
+ </head>
10
+ <body>
11
+ <h1>First h1 heading</h1>
12
+ <p>This is a minimal <a href="http://www.w3.org/TR/xhtml1/">XHTML 1.0</a>
13
+ document.</p>
14
+ <h1>Second h1 heading</h1>
15
+ <table><tr><td>Test table</td></tr></table>
16
+ </body>
17
+ </html>
@@ -0,0 +1,18 @@
1
+ <html>
2
+ <head>
3
+ <title>This is a link document</title>
4
+ </head>
5
+ <body>
6
+ <h1>The first heading</h1>
7
+ <h2><a href="/news1">New sitem 1</a></h2>
8
+ <p>This is the text for the first news item. <a href="/news1">Read more </a></p>
9
+ <h2><a href="/news2">New sitem 2</a></h2>
10
+ <p>This is the text for the second news item. <a href="/news2">
11
+ Read more</a></p>
12
+ <h2><a href="/news1">New sitem 3</a></h2>
13
+ <p>This is the text for the third news item. <a href="/news3">Read
14
+ more</a></p>
15
+ <h2><a href="/news1">New sitem 4</a></h2>
16
+ <p>This is the text for the fourth news item. <a href="/news4">Read&nbsp;more</a></p>
17
+ </body>
18
+ </html>
@@ -0,0 +1,12 @@
1
+ <html>
2
+ <head>
3
+ <title>This is a link document</title>
4
+ </head>
5
+ <body>
6
+ <h1>The first heading</h1>
7
+ <h2><a href="/news1">New sitem 1</a></h2>
8
+ <p>This is the text for the first news item. <a href="/news1" title="More on item 1">Read more</a></p>
9
+ <h2><a href="/news2">New sitem 2</a></h2>
10
+ <p>This is the text for the second news item. <a href="/news2">Read more</a></p>
11
+ </body>
12
+ </html>
@@ -0,0 +1,16 @@
1
+ <html>
2
+ <head>
3
+ <title>This is a link document</title>
4
+ </head>
5
+ <body>
6
+ <h1>The first heading</h1>
7
+ <h2><a href="/news1">New sitem 1</a></h2>
8
+ <p>This is the text for the first news item.</p>
9
+ <h2><a href="/news2">New sitem 2</a></h2>
10
+ <p>This is the text for the second news item.</p>
11
+ <h2><a href="/news1">New sitem 3</a></h2>
12
+ <p>This is the text for the third news item.</p>
13
+ <h2><a href="/news1">New sitem 4</a></h2>
14
+ <p>This is the text for the fourth news item.</p>
15
+ </body>
16
+ </html>
@@ -0,0 +1,10 @@
1
+ <html>
2
+ <head>
3
+ <title>This is a link document</title>
4
+ </head>
5
+ <body>
6
+ <h1>The first heading</h1>
7
+ <p><a href="/news1"><img src="/example.gif" alt="Read more"></a></p>
8
+ <p><a href="/news2">Read more</a></p>
9
+ </body>
10
+ </html>
@@ -0,0 +1,10 @@
1
+ <html lang="en">
2
+ <head>
3
+ <title>This is the title</title>
4
+ <meta http-equiv="refresh" content="5"/>
5
+ </head>
6
+ <body>
7
+ <p>This is a minimal <a href="http://www.w3.org/TR/xhtml1/">XHTML 1.0</a>
8
+ document.</p>
9
+ </body>
10
+ </html>
@@ -0,0 +1,14 @@
1
+ <html lang="en">
2
+ <head>
3
+ <title>This is the title</title>
4
+ <meta http-equiv="cache-control" content="no-cache" >
5
+ <meta http-equiv=
6
+ "refresh"
7
+ content=
8
+ "5" />
9
+ </head>
10
+ <body>
11
+ <p>This is a minimal <a href="http://www.w3.org/TR/xhtml1/">XHTML 1.0</a>
12
+ document.</p>
13
+ </body>
14
+ </html>
@@ -0,0 +1,10 @@
1
+ <html lang="en">
2
+ <head>
3
+ <title>This is the title</title>
4
+ <META HTTP-equiv="refresh" CONTENT="5">
5
+ </head>
6
+ <body>
7
+ <p>This is a minimal <a href="http://www.w3.org/TR/xhtml1/">XHTML 1.0</a>
8
+ document.</p>
9
+ </body>
10
+ </html>
@@ -0,0 +1,7 @@
1
+ <script language="JavaScript">
2
+ <!-- function BrowserCheck() {
3
+ test = "<!--my" + "broken"
4
+ + " comment" +
5
+ "-->
6
+ -->
7
+ </script>
@@ -0,0 +1,3 @@
1
+ line 1
2
+ line 2
3
+ line 3
@@ -0,0 +1,224 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+ require File.dirname(__FILE__) + '/../lib/raakt'
3
+ require 'rubyful_soup'
4
+
5
+ class RaaktTest < Test::Unit::TestCase
6
+
7
+ def setup
8
+ @raakt = Raakt::Test.new
9
+ end
10
+
11
+ def test_all
12
+ puts @raakt.all(data_full_google)
13
+ end
14
+
15
+ def test_check_images
16
+ assert_equal 1, @raakt.check_images(data_imagedoc1).length
17
+ assert_equal "missingalt", @raakt.check_images(data_imagedoc1)[0].eid
18
+
19
+ assert_equal 0, @raakt.check_images(data_imagedoc2).length
20
+
21
+ assert_equal 3, @raakt.check_images(data_imagedoc3).length
22
+
23
+ assert_equal 1, @raakt.check_images(data_imagedoc4).length
24
+ end
25
+
26
+ def test_check_images_in_blank_doc
27
+ assert_equal 0, @raakt.check_images(data_empty).length
28
+ end
29
+
30
+
31
+ def test_check_title
32
+ assert_equal 0, @raakt.check_title(data_xhtmldoc1).length
33
+ assert_equal 1, @raakt.check_title(data_empty).length
34
+ assert_equal "missingtitle", @raakt.check_title(data_empty)[0].eid
35
+
36
+ assert_equal 1, @raakt.check_title(data_emptytitledoc).length
37
+ assert_equal "emptytitle", @raakt.check_title(data_emptytitledoc)[0].eid
38
+
39
+ assert_equal 0, @raakt.check_title(data_invalidhtmldoc1).length
40
+ assert_equal 0, @raakt.check_title(data_invalidhtmldoc2).length
41
+ end
42
+
43
+
44
+ def test_headings
45
+ assert_equal 3, @raakt.headings(data_headingsdoc1).length
46
+ assert_equal 0, @raakt.headings(data_invalidhtmldoc2).length
47
+ end
48
+
49
+ def test_level
50
+ assert_equal 1, @raakt.level("h1")
51
+ assert_equal 2, @raakt.level("h2")
52
+ assert_equal 6, @raakt.level("h6")
53
+ end
54
+
55
+ def test_check_has_heading
56
+ assert_equal 1, @raakt.check_has_heading(data_empty).length
57
+ assert_equal "missingheading", @raakt.check_has_heading(data_empty)[0].eid
58
+ assert_equal 0, @raakt.check_has_heading(data_headingsdoc1).length
59
+ assert_equal 0, @raakt.check_has_heading(data_headingsdoc9).length
60
+
61
+ assert_equal 1, @raakt.check_has_heading(data_invalidhtmldoc2).length
62
+ assert_equal "missingheading", @raakt.check_has_heading(data_invalidhtmldoc2)[0].eid
63
+ end
64
+
65
+ def test_check_document_structure
66
+ assert_equal 0, @raakt.check_document_structure(data_headingsdoc1).length
67
+ assert_equal 1, @raakt.check_document_structure(data_headingsdoc3).length
68
+ assert_equal "firsthnoth1", @raakt.check_document_structure(data_headingsdoc3)[0].eid
69
+ assert_equal "wronghstructure", @raakt.check_document_structure(data_headingsdoc4)[0].eid
70
+ assert_equal "firsthnoth1", @raakt.check_document_structure(data_headingsdoc5)[0].eid
71
+ assert_equal "wronghstructure", @raakt.check_document_structure(data_headingsdoc5)[1].eid
72
+ assert_equal 0, @raakt.check_document_structure(data_headingsdoc6).length
73
+ assert_equal 0, @raakt.check_document_structure("").length
74
+ end
75
+
76
+
77
+ def test_check_for_nested_tables
78
+ assert_equal 0, @raakt.check_for_nested_tables(data_tabledoc1).length
79
+ assert_equal 0, @raakt.check_for_nested_tables(data_tabledoc2).length
80
+ assert_equal 1, @raakt.check_for_nested_tables(data_tabledoc3).length
81
+ assert_equal 0, @raakt.check_for_nested_tables(data_tabledoc4).length
82
+ assert_equal 1, @raakt.check_for_nested_tables(data_tabledoc5).length
83
+ assert_equal "hasnestedtables", @raakt.check_for_nested_tables(data_tabledoc3)[0].eid
84
+ end
85
+
86
+ def test_check_tables
87
+ puts @raakt.check_tables(data_tabledoc4).to_s
88
+ assert_equal 0, @raakt.check_tables(data_tabledoc4).length
89
+ assert_equal 0, @raakt.check_tables(data_tabledoc1).length
90
+ assert_equal 2, @raakt.check_tables(data_tabledoc2).length
91
+ end
92
+
93
+
94
+ def test_check_for_formatting_elements
95
+ assert_equal 1, @raakt.check_for_formatting_elements(data_invalidelements1).length
96
+ assert_equal "boldused", @raakt.check_for_formatting_elements(data_invalidelements1)[0].eid
97
+ end
98
+
99
+
100
+ def test_check_for_language_info
101
+ assert_equal 0, @raakt.check_for_language_info(data_xhtmldoc1).length
102
+ assert_equal 1, @raakt.check_for_language_info(data_tabledoc2).length
103
+ assert_equal 1, @raakt.check_for_language_info(data_tablelayoutdoc).length
104
+ end
105
+
106
+
107
+ def test_check_link_text
108
+ assert_equal 1, @raakt.check_link_text(data_linkdoc1).length
109
+ assert_equal "ambiguouslinktext", @raakt.check_link_text(data_linkdoc1)[0].eid
110
+ assert_equal 0, @raakt.check_link_text(data_linkdoc3).length
111
+ assert_equal 0, @raakt.check_link_text(data_linkdoc2).length
112
+ assert_equal 1, @raakt.check_link_text(data_linkdoc4).length
113
+ end
114
+
115
+
116
+ def test_get_links
117
+ assert_equal 8, @raakt.get_links(data_linkdoc1).length
118
+ assert_equal 2, @raakt.get_links(data_linkdoc4).length
119
+ assert_equal "Read more", @raakt.get_links(data_linkdoc4)[0][3]
120
+ end
121
+
122
+ def test_img_to_text
123
+ assert_equal "Read more", @raakt.img_to_text(BeautifulSoup.new("<img src='123' alt='Read more' />").img)
124
+ end
125
+
126
+ def test_elements_to_text
127
+ assert_equal "Read more about", @raakt.elements_to_text(BeautifulSoup.new("<a href='rrr'>Read <img src='123' alt='more' /> about</a>").a)
128
+ assert_equal "A sample text here", @raakt.elements_to_text(BeautifulSoup.new("<a href='r'><strong>A</strong> sample <img src='123' alt='text' /> <b>here</b></a>").a)
129
+ end
130
+
131
+ def test_normalize_text
132
+ assert_equal "Read more", @raakt.normalize_text("Read&nbsp;more")
133
+ assert_equal "Read more", @raakt.normalize_text("Read&#160;more")
134
+ assert_equal "Read more", @raakt.normalize_text("Read more")
135
+ assert_equal "Read more", @raakt.normalize_text("Read more")
136
+ assert_equal "Read more", @raakt.normalize_text("Read more")
137
+ assert_equal "Read more", @raakt.normalize_text("Read\n more")
138
+ assert_equal "L�s mer", @raakt.normalize_text("L�s\n mer")
139
+ assert_equal "L�s mer", @raakt.normalize_text("L�s \nmer")
140
+ assert_equal "Read more", @raakt.normalize_text("Read \n\n\nmore")
141
+ assert_equal "Read more", @raakt.normalize_text("Read \tmore")
142
+ assert_equal "Read more", @raakt.normalize_text(" Read more")
143
+ end
144
+
145
+ def test_is_ambiguous_link
146
+ link_a = [1, "/news/1", "", "Read more"]
147
+ link_b = [2, "/news/2", "", "Read more"]
148
+ assert_equal true, @raakt.is_ambiguous_link(link_a, link_b)
149
+
150
+ link_c = [1, "/news/1", "More about first news item", "Read more"]
151
+ link_d = [2, "/news/2", "More about second news item", "Read more"]
152
+ assert_equal false, @raakt.is_ambiguous_link(link_c, link_d)
153
+
154
+ link_a = [1, "/news/1", nil, "Read more"]
155
+ link_b = [2, "/news/2", nil, "Read more"]
156
+ assert_equal true, @raakt.is_ambiguous_link(link_a, link_b)
157
+
158
+ link_g = [1, "/news/1", "", "Read more"]
159
+ link_h = [2, "/news/1", "", "Read more"]
160
+ assert_equal false, @raakt.is_ambiguous_link(link_g, link_h)
161
+
162
+ link_i = [1, "/news/1", "", "L�s mer"]
163
+ link_j = [2, "/news/2", "", "L�s\n mer"]
164
+ assert_equal true, @raakt.is_ambiguous_link(link_i, link_j)
165
+
166
+ link_k = [1, "/news/1", "", "L�s mer"]
167
+ link_l = [2, "/news/2", "", "L�s \nmer"]
168
+ assert_equal true, @raakt.is_ambiguous_link(link_k, link_l)
169
+ end
170
+
171
+
172
+ def test_get_labels
173
+ assert_equal 1, @raakt.get_labels(data_fielddoc1).length
174
+ assert_equal 1, @raakt.get_labels(data_fielddoc2).length
175
+ assert_equal 2, @raakt.get_labels(data_fielddoc3).length
176
+ end
177
+
178
+
179
+ def test_get_editable_fields
180
+ assert_equal 1, @raakt.get_editable_fields(data_fielddoc1).length
181
+ assert_equal 2, @raakt.get_editable_fields(data_fielddoc2).length
182
+ assert_equal 3, @raakt.get_editable_fields(data_fielddoc3).length
183
+ end
184
+
185
+
186
+ def test_check_form
187
+ assert_equal 0, @raakt.check_form(data_fielddoc1).length
188
+ assert_equal 1, @raakt.check_form(data_fielddoc2).length
189
+ assert_equal "fieldmissinglabel", @raakt.check_form(data_fielddoc2)[0].eid
190
+ assert_equal 1, @raakt.check_form(data_fielddoc3).length
191
+ assert_equal "fieldmissinglabel", @raakt.check_form(data_fielddoc3)[0].eid
192
+ end
193
+
194
+
195
+ def test_is_frameset
196
+ assert @raakt.is_frameset(data_framedoc1)
197
+ assert @raakt.is_frameset(data_framedoc2)
198
+ assert !@raakt.is_frameset(data_xhtmldoc1)
199
+ end
200
+
201
+
202
+ def test_check_frames
203
+ assert_equal 3, @raakt.check_frames(data_framedoc1).length
204
+ assert_equal 0, @raakt.check_frames(data_framedoc2).length
205
+ end
206
+
207
+
208
+ def test_check_for_formatting_elements
209
+ invaliderrs = @raakt.check_for_formatting_elements(data_invalidelements1)
210
+ assert_equal 2, invaliderrs.length
211
+ assert_equal "missingsemantics", invaliderrs[0].eid
212
+ assert_equal "hasflicker", invaliderrs[1].eid
213
+ assert_equal 0, @raakt.check_for_formatting_elements(data_xhtmldoc1).length
214
+ end
215
+
216
+
217
+ def test_refresh
218
+ assert_equal 1, @raakt.check_refresh(data_metarefreshdoc1).length
219
+ assert_equal 1, @raakt.check_refresh(data_metarefreshdoc2).length
220
+ assert_equal 1, @raakt.check_refresh(data_metarefreshdoc3).length
221
+ assert_equal 0, @raakt.check_refresh(data_xhtmldoc1).length
222
+ end
223
+
224
+ end