raakt 0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/raakt.rb +454 -0
- data/tests/empty.htm +1 -0
- data/tests/emptytitledoc.htm +8 -0
- data/tests/fielddoc1.htm +2 -0
- data/tests/fielddoc2.htm +11 -0
- data/tests/fielddoc3.htm +14 -0
- data/tests/flickerdoc1.htm +0 -0
- data/tests/framedoc1.htm +22 -0
- data/tests/framedoc2.htm +8 -0
- data/tests/full_google.htm +17 -0
- data/tests/headingsdoc1.htm +17 -0
- data/tests/headingsdoc2.htm +14 -0
- data/tests/headingsdoc3.htm +6 -0
- data/tests/headingsdoc4.htm +9 -0
- data/tests/headingsdoc5.htm +9 -0
- data/tests/headingsdoc6.htm +6 -0
- data/tests/headingsdoc7.htm +8 -0
- data/tests/headingsdoc8.htm +12 -0
- data/tests/headingsdoc9.htm +20 -0
- data/tests/imagedoc1.htm +8 -0
- data/tests/imagedoc2.htm +1 -0
- data/tests/imagedoc3.htm +11 -0
- data/tests/imagedoc4.htm +7 -0
- data/tests/invalidelements1.htm +18 -0
- data/tests/invalidhtmldoc1.htm +10 -0
- data/tests/invalidhtmldoc2.htm +20 -0
- data/tests/invalidxhtmldoc1.htm +17 -0
- data/tests/linkdoc1.htm +18 -0
- data/tests/linkdoc2.htm +12 -0
- data/tests/linkdoc3.htm +16 -0
- data/tests/linkdoc4.htm +10 -0
- data/tests/metarefreshdoc1.htm +10 -0
- data/tests/metarefreshdoc2.htm +14 -0
- data/tests/metarefreshdoc3.htm +10 -0
- data/tests/nestedcomment.htm +7 -0
- data/tests/newlinetext.txt +3 -0
- data/tests/raakt_test.rb +224 -0
- data/tests/scriptdoc1.htm +15 -0
- data/tests/scriptdoc2.htm +10 -0
- data/tests/tabledoc1.htm +5 -0
- data/tests/tabledoc2.htm +9 -0
- data/tests/tabledoc3.htm +6 -0
- data/tests/tabledoc4.htm +17 -0
- data/tests/tabledoc5.htm +11 -0
- data/tests/tabledoc6.htm +11 -0
- data/tests/tablelayoutdoc.htm +16 -0
- data/tests/test_helper.rb +21 -0
- data/tests/xhtmldoc1.htm +14 -0
- metadata +100 -0
data/tests/empty.htm
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
<empty>Blank document</empty>
|
data/tests/fielddoc1.htm
ADDED
data/tests/fielddoc2.htm
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
<html>
|
2
|
+
<body>
|
3
|
+
<h3>First heading</h3>
|
4
|
+
<label for="textid">My label</label>
|
5
|
+
<p id="pid">This is a minimal <a href="http://www.w3.org/TR/xhtml1/">image document.</a>
|
6
|
+
<input type="text" id="textid" />
|
7
|
+
<input type="hidden" id="hiddenid"/>
|
8
|
+
<p>This is a minimal <a href="http://www.w3.org/TR/xhtml1/">image document.</a>
|
9
|
+
<INPUT TYPE='text' ID='myid'>
|
10
|
+
</body>
|
11
|
+
</html>
|
data/tests/fielddoc3.htm
ADDED
File without changes
|
data/tests/framedoc1.htm
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN"
|
2
|
+
"http://www.w3.org/TR/html4/frameset.dtd">
|
3
|
+
<HTML>
|
4
|
+
<HEAD>
|
5
|
+
<TITLE>A simple frameset document</TITLE>
|
6
|
+
</HEAD>
|
7
|
+
<FRAMESET cols="20%, 80%">
|
8
|
+
<FRAMESET rows="100, 200">
|
9
|
+
<FRAME src="contents_of_frame1.html">
|
10
|
+
<FRAME src="contents_of_frame2.gif">
|
11
|
+
</FRAMESET>
|
12
|
+
<FRAME src="contents_of_frame3.html">
|
13
|
+
<NOFRAMES>
|
14
|
+
<P>This frameset document contains:
|
15
|
+
<UL>
|
16
|
+
<LI><A href="contents_of_frame1.html">Some neat contents</A>
|
17
|
+
<LI><IMG src="contents_of_frame2.gif" alt="A neat image">
|
18
|
+
<LI><A href="contents_of_frame3.html">Some other neat contents</A>
|
19
|
+
</UL>
|
20
|
+
</NOFRAMES>
|
21
|
+
</FRAMESET>
|
22
|
+
</HTML>
|
data/tests/framedoc2.htm
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
<html><head><meta http-equiv="content-type" content="text/html; charset=UTF-8"><title>Google</title><style><!--
|
2
|
+
body,td,a,p,.h{font-family:arial,sans-serif;}
|
3
|
+
.h{font-size: 20px;}
|
4
|
+
.q{color:#0000cc;}
|
5
|
+
-->
|
6
|
+
</style>
|
7
|
+
<script>
|
8
|
+
<!--
|
9
|
+
function sf(){document.f.q.focus();}
|
10
|
+
function asq(event,el,oi,cad,ct,cd,sg){if(window.XMLHttpRequest){if(el.handledFirstTime){el.handledFirstTime=false;return false;}el.handledFirstTime=true;var e = window.encodeURIComponent ? encodeURIComponent : escape;var oi_param="";var cad_param="";if (oi) oi_param="&oi="+e(oi);if (cad) cad_param="&cad="+e(cad);var x=new XMLHttpRequest();x.open("GET","/url?sa=T"+oi_param+cad_param+"&ct="+e(ct)+"&cd="+e(cd)+"&url="+e(el.href).replace(/\+/g,"%2B")+"&ei="+sg,true);var m=event.altKey||event.metaKey;if(!m){x.onreadystatechange=function(){if(x.readyState==4){clearTimeout(timeoutid);el.dispatchEvent(event);}};var timeoutid=setTimeout(function(){x.abort();el.dispatchEvent(event);},2000);}x.send(null);return m;}return true;}
|
11
|
+
// -->
|
12
|
+
</script>
|
13
|
+
</head><body bgcolor=#ffffff text=#000000 link=#0000cc vlink=#551a8b alink=#ff0000 onLoad=sf() topmargin=3 marginheight=3><center><table border=0 cellspacing=0 cellpadding=0 width=100%><tr><td align=right nowrap><font size=-1><b>...</b> | <a href="/url?sa=p&pref=ig&pval=3&q=http://www.google.com/" onmousedown="return asq(event,this,'promos','hppphou:def','pro','1','&sig2=')">Personalized Home</a> | <a href="/searchhistory/?hl=en">Search History</a> | <a href="https://www.google.com/accounts/ManageAccount">My Account</a> | <a href="http://www.google.com/accounts/Logout?continue=http://www.google.com/">Sign out</a></font></td></tr><tr height=4><td><img alt="" width=1 height=1></td></tr></table><img src="/intl/en/images/logo.gif" width=276 height=110 alt="Google"><br><br>
|
14
|
+
<form action=/search name=f><script><!--
|
15
|
+
function qs(el) {if (window.RegExp && window.encodeURIComponent) {var ue=el.href;var qe=encodeURIComponent(document.f.q.value);if(ue.indexOf("q=")!=-1){el.href=ue.replace(new RegExp("q=[^&$]*"),"q="+qe);}else{el.href=ue+"&q="+qe;}}return 1;}
|
16
|
+
// -->
|
17
|
+
</script><table border=0 cellspacing=0 cellpadding=4><tr><td nowrap><font size=-1><b>Web</b> <a id=1a class=q href="/imghp?hl=en&tab=wi" onClick="return qs(this);">Images</a> <a id=2a class=q href="http://groups.google.com/grphp?hl=en&tab=wg" onClick="return qs(this);">Groups</a> <a id=4a class=q href="http://news.google.com/nwshp?hl=en&tab=wn" onClick="return qs(this);">News</a> <a id=5a class=q href="http://froogle.google.com/frghp?hl=en&tab=wf" onClick="return qs(this);">Froogle</a> <a id=7a class=q href="/maphp?hl=en&tab=wl" onClick="return qs(this);">Maps</a> <b><a href="/intl/en/options/" class=q>more »</a></b></font></td></tr></table><table cellspacing=0 cellpadding=0><tr><td width=25%> </td><td align=center><input type=hidden name=hl value=en><input maxlength=2048 size=55 name=q value="" title="Google Search"><br><input type=submit value="Google Search" name=btnG><input type=submit value="I'm Feeling Lucky" name=btnI></td><td valign=top nowrap width=25%><font size=-2> <a href=/advanced_search?hl=en>Advanced Search</a><br> <a href=/preferences?hl=en>Preferences</a><br> <a href=/language_tools?hl=en>Language Tools</a></font></td></tr></table></form><br><br><font size=-1><a href="/intl/en/ads/">Advertising Programs</a> - <a href=/intl/en/services/>Business Solutions</a> - <a href=/intl/en/about.html>About Google</a> - <b><a href=http://www.google.se/>Go to Google Sverige</a></b></font><p><font size=-2>©2006 Google</font></p></center></body></html>
|
@@ -0,0 +1,17 @@
|
|
1
|
+
<!DOCTYPE html
|
2
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
3
|
+
"DTD/xhtml1-strict.dtd">
|
4
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
5
|
+
<head>
|
6
|
+
<title>This is the title</title>
|
7
|
+
</head>
|
8
|
+
<body>
|
9
|
+
<h1>First h1 heading</h1>
|
10
|
+
<p>This is a minimal <a href="http://www.w3.org/TR/xhtml1/">XHTML 1.0</a>
|
11
|
+
document.</p>
|
12
|
+
<h1>Second h1
|
13
|
+
heading</h1>
|
14
|
+
<table><tr><td>Test table</td></tr></table>
|
15
|
+
<H1>Third h1 heading</H1>
|
16
|
+
</body>
|
17
|
+
</html>
|
@@ -0,0 +1,14 @@
|
|
1
|
+
<html>
|
2
|
+
<head>
|
3
|
+
<title>This is the title</title>
|
4
|
+
</head>
|
5
|
+
<body>
|
6
|
+
<h1>First h1 heading</h1>
|
7
|
+
<p>This is a minimal <a href="http://www.w3.org/TR/xhtml1/">XHTML 1.0</a>
|
8
|
+
document.</p>
|
9
|
+
<h2>First h2
|
10
|
+
heading</h2>
|
11
|
+
<table><tr><td>Test table</td></tr></table>
|
12
|
+
<H3>First h3 heading</H3>
|
13
|
+
</body>
|
14
|
+
</html>
|
@@ -0,0 +1,12 @@
|
|
1
|
+
<script type="text/javascript">
|
2
|
+
function printpopup(url){
|
3
|
+
MyWin = window.open("","","scrollbars=no,resizable=yes,toolbar=no,location=no,directories=no,status=no,menubar=no,width=100,height=100");
|
4
|
+
with(MyWin.document){
|
5
|
+
open();
|
6
|
+
write("<html>\n<head><scr"+"ipt>awidth=(document.layers)?0:8;awidth+=(document.all)?4:0;aheight=(document.layers)?0:29;</scr"+"ipt>\n<title>This is not the title</title>\n</head>\n<body onLoad=\"window.resizeTo(document.images[0].width+awidth,document.images[0].height+aheight)\"marginwidth=0 marginheight=0 leftmargin=0 topmargin=0 rightmargin=0 style=\"overflow:hidden;\">\n");
|
7
|
+
write("<h1>This is not a document heading</h1>\n");
|
8
|
+
write("</body>\n</html>\n");
|
9
|
+
close();
|
10
|
+
}
|
11
|
+
}
|
12
|
+
</script>
|
@@ -0,0 +1,20 @@
|
|
1
|
+
<html lang="en">
|
2
|
+
<head>
|
3
|
+
<title>Sample</title>
|
4
|
+
</head>
|
5
|
+
<body>
|
6
|
+
<p>This is a document with an empty title element.</p>
|
7
|
+
<script type="text/javascript">
|
8
|
+
function printpopup(url){
|
9
|
+
MyWin = window.open("","","");
|
10
|
+
with(MyWin.document){
|
11
|
+
open();
|
12
|
+
write("<html><head><title>This is not the title</title>\n</head>\n<body>\n");
|
13
|
+
write("<h1>This is not a document heading</h1>\n");
|
14
|
+
write("</body>\n</html>\n");
|
15
|
+
close();
|
16
|
+
}
|
17
|
+
}
|
18
|
+
</script>
|
19
|
+
</body>
|
20
|
+
</html>
|
data/tests/imagedoc1.htm
ADDED
data/tests/imagedoc2.htm
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
<p><img src="image.png" alt=''></p>
|
data/tests/imagedoc3.htm
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
<html>
|
2
|
+
<body>
|
3
|
+
<h3>First heading</h3>
|
4
|
+
<p>This is a minimal <a href="http://www.w3.org/TR/xhtml1/">image document.</a>
|
5
|
+
<img src="noimagealt.png" border="0">
|
6
|
+
<p>This is a minimal <a href="http://www.w3.org/TR/xhtml1/">image document.</a>
|
7
|
+
<img src="/noimagealt2.png">
|
8
|
+
<p>This is a minimal <a href="http://www.w3.org/TR/xhtml1/">image document.</a>
|
9
|
+
<img src="../folder/noimagealt3.png" border="0">
|
10
|
+
</body>
|
11
|
+
</html>
|
data/tests/imagedoc4.htm
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
<html lang="en">
|
2
|
+
<head>
|
3
|
+
<title>This is the title</title>
|
4
|
+
</head>
|
5
|
+
<body>
|
6
|
+
<h1>First h1 heading</h1>
|
7
|
+
<p>This is a <b>minimal</b> <a href="http://www.w3.org/TR/xhtml1/">XHTML 1.0</a>
|
8
|
+
document.</p>
|
9
|
+
<p>This is a <font style="bold">bold</font> <a href="http://www.w3.org/TR/xhtml1/">XHTML 1.0</a>
|
10
|
+
document.</p>
|
11
|
+
<h1>Second h1
|
12
|
+
heading</h1>
|
13
|
+
<blink>No blinking text!</blink>
|
14
|
+
<table><tr><td>Test table</td></tr></table>
|
15
|
+
<H1>Third h1 heading</H1>
|
16
|
+
<marquee>Marquees are so nineties!</marquee>
|
17
|
+
</body>
|
18
|
+
</html>
|
@@ -0,0 +1,20 @@
|
|
1
|
+
<HTML>
|
2
|
+
<head>
|
3
|
+
<TITLE>This is the
|
4
|
+
title
|
5
|
+
|
6
|
+
</title>
|
7
|
+
<link rel="schema.DC" href="http://purl.org/DC/elements/1.0">
|
8
|
+
</head>
|
9
|
+
<body>
|
10
|
+
|
11
|
+
<!-- h3>Heading 3
|
12
|
+
</h3>
|
13
|
+
<h1>Heading 1</h1 -->
|
14
|
+
<p>S small document</p>
|
15
|
+
<p> more content </p>
|
16
|
+
<!-- second comment
|
17
|
+
|
18
|
+
-->
|
19
|
+
</body>
|
20
|
+
</HTML>
|
@@ -0,0 +1,17 @@
|
|
1
|
+
<!DOCTYPE html
|
2
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
3
|
+
"DTD/xhtml1-strict.dtd">
|
4
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
5
|
+
<head>
|
6
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
7
|
+
<title>This is the title</title>
|
8
|
+
<link rel="schema.DC" href="http://purl.org/DC/elements/1.0/" />
|
9
|
+
</head>
|
10
|
+
<body>
|
11
|
+
<h1>First h1 heading</h1>
|
12
|
+
<p>This is a minimal <a href="http://www.w3.org/TR/xhtml1/">XHTML 1.0</a>
|
13
|
+
document.</p>
|
14
|
+
<h1>Second h1 heading</h1>
|
15
|
+
<table><tr><td>Test table</td></tr></table>
|
16
|
+
</body>
|
17
|
+
</html>
|
data/tests/linkdoc1.htm
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
<html>
|
2
|
+
<head>
|
3
|
+
<title>This is a link document</title>
|
4
|
+
</head>
|
5
|
+
<body>
|
6
|
+
<h1>The first heading</h1>
|
7
|
+
<h2><a href="/news1">New sitem 1</a></h2>
|
8
|
+
<p>This is the text for the first news item. <a href="/news1">Read more </a></p>
|
9
|
+
<h2><a href="/news2">New sitem 2</a></h2>
|
10
|
+
<p>This is the text for the second news item. <a href="/news2">
|
11
|
+
Read more</a></p>
|
12
|
+
<h2><a href="/news1">New sitem 3</a></h2>
|
13
|
+
<p>This is the text for the third news item. <a href="/news3">Read
|
14
|
+
more</a></p>
|
15
|
+
<h2><a href="/news1">New sitem 4</a></h2>
|
16
|
+
<p>This is the text for the fourth news item. <a href="/news4">Read more</a></p>
|
17
|
+
</body>
|
18
|
+
</html>
|
data/tests/linkdoc2.htm
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
<html>
|
2
|
+
<head>
|
3
|
+
<title>This is a link document</title>
|
4
|
+
</head>
|
5
|
+
<body>
|
6
|
+
<h1>The first heading</h1>
|
7
|
+
<h2><a href="/news1">New sitem 1</a></h2>
|
8
|
+
<p>This is the text for the first news item. <a href="/news1" title="More on item 1">Read more</a></p>
|
9
|
+
<h2><a href="/news2">New sitem 2</a></h2>
|
10
|
+
<p>This is the text for the second news item. <a href="/news2">Read more</a></p>
|
11
|
+
</body>
|
12
|
+
</html>
|
data/tests/linkdoc3.htm
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
<html>
|
2
|
+
<head>
|
3
|
+
<title>This is a link document</title>
|
4
|
+
</head>
|
5
|
+
<body>
|
6
|
+
<h1>The first heading</h1>
|
7
|
+
<h2><a href="/news1">New sitem 1</a></h2>
|
8
|
+
<p>This is the text for the first news item.</p>
|
9
|
+
<h2><a href="/news2">New sitem 2</a></h2>
|
10
|
+
<p>This is the text for the second news item.</p>
|
11
|
+
<h2><a href="/news1">New sitem 3</a></h2>
|
12
|
+
<p>This is the text for the third news item.</p>
|
13
|
+
<h2><a href="/news1">New sitem 4</a></h2>
|
14
|
+
<p>This is the text for the fourth news item.</p>
|
15
|
+
</body>
|
16
|
+
</html>
|
data/tests/linkdoc4.htm
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
<html lang="en">
|
2
|
+
<head>
|
3
|
+
<title>This is the title</title>
|
4
|
+
<meta http-equiv="cache-control" content="no-cache" >
|
5
|
+
<meta http-equiv=
|
6
|
+
"refresh"
|
7
|
+
content=
|
8
|
+
"5" />
|
9
|
+
</head>
|
10
|
+
<body>
|
11
|
+
<p>This is a minimal <a href="http://www.w3.org/TR/xhtml1/">XHTML 1.0</a>
|
12
|
+
document.</p>
|
13
|
+
</body>
|
14
|
+
</html>
|
data/tests/raakt_test.rb
ADDED
@@ -0,0 +1,224 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/test_helper'
|
2
|
+
require File.dirname(__FILE__) + '/../lib/raakt'
|
3
|
+
require 'rubyful_soup'
|
4
|
+
|
5
|
+
class RaaktTest < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def setup
|
8
|
+
@raakt = Raakt::Test.new
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_all
|
12
|
+
puts @raakt.all(data_full_google)
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_check_images
|
16
|
+
assert_equal 1, @raakt.check_images(data_imagedoc1).length
|
17
|
+
assert_equal "missingalt", @raakt.check_images(data_imagedoc1)[0].eid
|
18
|
+
|
19
|
+
assert_equal 0, @raakt.check_images(data_imagedoc2).length
|
20
|
+
|
21
|
+
assert_equal 3, @raakt.check_images(data_imagedoc3).length
|
22
|
+
|
23
|
+
assert_equal 1, @raakt.check_images(data_imagedoc4).length
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_check_images_in_blank_doc
|
27
|
+
assert_equal 0, @raakt.check_images(data_empty).length
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
def test_check_title
|
32
|
+
assert_equal 0, @raakt.check_title(data_xhtmldoc1).length
|
33
|
+
assert_equal 1, @raakt.check_title(data_empty).length
|
34
|
+
assert_equal "missingtitle", @raakt.check_title(data_empty)[0].eid
|
35
|
+
|
36
|
+
assert_equal 1, @raakt.check_title(data_emptytitledoc).length
|
37
|
+
assert_equal "emptytitle", @raakt.check_title(data_emptytitledoc)[0].eid
|
38
|
+
|
39
|
+
assert_equal 0, @raakt.check_title(data_invalidhtmldoc1).length
|
40
|
+
assert_equal 0, @raakt.check_title(data_invalidhtmldoc2).length
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
def test_headings
|
45
|
+
assert_equal 3, @raakt.headings(data_headingsdoc1).length
|
46
|
+
assert_equal 0, @raakt.headings(data_invalidhtmldoc2).length
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_level
|
50
|
+
assert_equal 1, @raakt.level("h1")
|
51
|
+
assert_equal 2, @raakt.level("h2")
|
52
|
+
assert_equal 6, @raakt.level("h6")
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_check_has_heading
|
56
|
+
assert_equal 1, @raakt.check_has_heading(data_empty).length
|
57
|
+
assert_equal "missingheading", @raakt.check_has_heading(data_empty)[0].eid
|
58
|
+
assert_equal 0, @raakt.check_has_heading(data_headingsdoc1).length
|
59
|
+
assert_equal 0, @raakt.check_has_heading(data_headingsdoc9).length
|
60
|
+
|
61
|
+
assert_equal 1, @raakt.check_has_heading(data_invalidhtmldoc2).length
|
62
|
+
assert_equal "missingheading", @raakt.check_has_heading(data_invalidhtmldoc2)[0].eid
|
63
|
+
end
|
64
|
+
|
65
|
+
def test_check_document_structure
|
66
|
+
assert_equal 0, @raakt.check_document_structure(data_headingsdoc1).length
|
67
|
+
assert_equal 1, @raakt.check_document_structure(data_headingsdoc3).length
|
68
|
+
assert_equal "firsthnoth1", @raakt.check_document_structure(data_headingsdoc3)[0].eid
|
69
|
+
assert_equal "wronghstructure", @raakt.check_document_structure(data_headingsdoc4)[0].eid
|
70
|
+
assert_equal "firsthnoth1", @raakt.check_document_structure(data_headingsdoc5)[0].eid
|
71
|
+
assert_equal "wronghstructure", @raakt.check_document_structure(data_headingsdoc5)[1].eid
|
72
|
+
assert_equal 0, @raakt.check_document_structure(data_headingsdoc6).length
|
73
|
+
assert_equal 0, @raakt.check_document_structure("").length
|
74
|
+
end
|
75
|
+
|
76
|
+
|
77
|
+
def test_check_for_nested_tables
|
78
|
+
assert_equal 0, @raakt.check_for_nested_tables(data_tabledoc1).length
|
79
|
+
assert_equal 0, @raakt.check_for_nested_tables(data_tabledoc2).length
|
80
|
+
assert_equal 1, @raakt.check_for_nested_tables(data_tabledoc3).length
|
81
|
+
assert_equal 0, @raakt.check_for_nested_tables(data_tabledoc4).length
|
82
|
+
assert_equal 1, @raakt.check_for_nested_tables(data_tabledoc5).length
|
83
|
+
assert_equal "hasnestedtables", @raakt.check_for_nested_tables(data_tabledoc3)[0].eid
|
84
|
+
end
|
85
|
+
|
86
|
+
def test_check_tables
|
87
|
+
puts @raakt.check_tables(data_tabledoc4).to_s
|
88
|
+
assert_equal 0, @raakt.check_tables(data_tabledoc4).length
|
89
|
+
assert_equal 0, @raakt.check_tables(data_tabledoc1).length
|
90
|
+
assert_equal 2, @raakt.check_tables(data_tabledoc2).length
|
91
|
+
end
|
92
|
+
|
93
|
+
|
94
|
+
def test_check_for_formatting_elements
|
95
|
+
assert_equal 1, @raakt.check_for_formatting_elements(data_invalidelements1).length
|
96
|
+
assert_equal "boldused", @raakt.check_for_formatting_elements(data_invalidelements1)[0].eid
|
97
|
+
end
|
98
|
+
|
99
|
+
|
100
|
+
def test_check_for_language_info
|
101
|
+
assert_equal 0, @raakt.check_for_language_info(data_xhtmldoc1).length
|
102
|
+
assert_equal 1, @raakt.check_for_language_info(data_tabledoc2).length
|
103
|
+
assert_equal 1, @raakt.check_for_language_info(data_tablelayoutdoc).length
|
104
|
+
end
|
105
|
+
|
106
|
+
|
107
|
+
def test_check_link_text
|
108
|
+
assert_equal 1, @raakt.check_link_text(data_linkdoc1).length
|
109
|
+
assert_equal "ambiguouslinktext", @raakt.check_link_text(data_linkdoc1)[0].eid
|
110
|
+
assert_equal 0, @raakt.check_link_text(data_linkdoc3).length
|
111
|
+
assert_equal 0, @raakt.check_link_text(data_linkdoc2).length
|
112
|
+
assert_equal 1, @raakt.check_link_text(data_linkdoc4).length
|
113
|
+
end
|
114
|
+
|
115
|
+
|
116
|
+
def test_get_links
|
117
|
+
assert_equal 8, @raakt.get_links(data_linkdoc1).length
|
118
|
+
assert_equal 2, @raakt.get_links(data_linkdoc4).length
|
119
|
+
assert_equal "Read more", @raakt.get_links(data_linkdoc4)[0][3]
|
120
|
+
end
|
121
|
+
|
122
|
+
def test_img_to_text
|
123
|
+
assert_equal "Read more", @raakt.img_to_text(BeautifulSoup.new("<img src='123' alt='Read more' />").img)
|
124
|
+
end
|
125
|
+
|
126
|
+
def test_elements_to_text
|
127
|
+
assert_equal "Read more about", @raakt.elements_to_text(BeautifulSoup.new("<a href='rrr'>Read <img src='123' alt='more' /> about</a>").a)
|
128
|
+
assert_equal "A sample text here", @raakt.elements_to_text(BeautifulSoup.new("<a href='r'><strong>A</strong> sample <img src='123' alt='text' /> <b>here</b></a>").a)
|
129
|
+
end
|
130
|
+
|
131
|
+
def test_normalize_text
|
132
|
+
assert_equal "Read more", @raakt.normalize_text("Read more")
|
133
|
+
assert_equal "Read more", @raakt.normalize_text("Read more")
|
134
|
+
assert_equal "Read more", @raakt.normalize_text("Read more")
|
135
|
+
assert_equal "Read more", @raakt.normalize_text("Read more")
|
136
|
+
assert_equal "Read more", @raakt.normalize_text("Read more")
|
137
|
+
assert_equal "Read more", @raakt.normalize_text("Read\n more")
|
138
|
+
assert_equal "L�s mer", @raakt.normalize_text("L�s\n mer")
|
139
|
+
assert_equal "L�s mer", @raakt.normalize_text("L�s \nmer")
|
140
|
+
assert_equal "Read more", @raakt.normalize_text("Read \n\n\nmore")
|
141
|
+
assert_equal "Read more", @raakt.normalize_text("Read \tmore")
|
142
|
+
assert_equal "Read more", @raakt.normalize_text(" Read more")
|
143
|
+
end
|
144
|
+
|
145
|
+
def test_is_ambiguous_link
|
146
|
+
link_a = [1, "/news/1", "", "Read more"]
|
147
|
+
link_b = [2, "/news/2", "", "Read more"]
|
148
|
+
assert_equal true, @raakt.is_ambiguous_link(link_a, link_b)
|
149
|
+
|
150
|
+
link_c = [1, "/news/1", "More about first news item", "Read more"]
|
151
|
+
link_d = [2, "/news/2", "More about second news item", "Read more"]
|
152
|
+
assert_equal false, @raakt.is_ambiguous_link(link_c, link_d)
|
153
|
+
|
154
|
+
link_a = [1, "/news/1", nil, "Read more"]
|
155
|
+
link_b = [2, "/news/2", nil, "Read more"]
|
156
|
+
assert_equal true, @raakt.is_ambiguous_link(link_a, link_b)
|
157
|
+
|
158
|
+
link_g = [1, "/news/1", "", "Read more"]
|
159
|
+
link_h = [2, "/news/1", "", "Read more"]
|
160
|
+
assert_equal false, @raakt.is_ambiguous_link(link_g, link_h)
|
161
|
+
|
162
|
+
link_i = [1, "/news/1", "", "L�s mer"]
|
163
|
+
link_j = [2, "/news/2", "", "L�s\n mer"]
|
164
|
+
assert_equal true, @raakt.is_ambiguous_link(link_i, link_j)
|
165
|
+
|
166
|
+
link_k = [1, "/news/1", "", "L�s mer"]
|
167
|
+
link_l = [2, "/news/2", "", "L�s \nmer"]
|
168
|
+
assert_equal true, @raakt.is_ambiguous_link(link_k, link_l)
|
169
|
+
end
|
170
|
+
|
171
|
+
|
172
|
+
def test_get_labels
|
173
|
+
assert_equal 1, @raakt.get_labels(data_fielddoc1).length
|
174
|
+
assert_equal 1, @raakt.get_labels(data_fielddoc2).length
|
175
|
+
assert_equal 2, @raakt.get_labels(data_fielddoc3).length
|
176
|
+
end
|
177
|
+
|
178
|
+
|
179
|
+
def test_get_editable_fields
|
180
|
+
assert_equal 1, @raakt.get_editable_fields(data_fielddoc1).length
|
181
|
+
assert_equal 2, @raakt.get_editable_fields(data_fielddoc2).length
|
182
|
+
assert_equal 3, @raakt.get_editable_fields(data_fielddoc3).length
|
183
|
+
end
|
184
|
+
|
185
|
+
|
186
|
+
def test_check_form
|
187
|
+
assert_equal 0, @raakt.check_form(data_fielddoc1).length
|
188
|
+
assert_equal 1, @raakt.check_form(data_fielddoc2).length
|
189
|
+
assert_equal "fieldmissinglabel", @raakt.check_form(data_fielddoc2)[0].eid
|
190
|
+
assert_equal 1, @raakt.check_form(data_fielddoc3).length
|
191
|
+
assert_equal "fieldmissinglabel", @raakt.check_form(data_fielddoc3)[0].eid
|
192
|
+
end
|
193
|
+
|
194
|
+
|
195
|
+
def test_is_frameset
|
196
|
+
assert @raakt.is_frameset(data_framedoc1)
|
197
|
+
assert @raakt.is_frameset(data_framedoc2)
|
198
|
+
assert !@raakt.is_frameset(data_xhtmldoc1)
|
199
|
+
end
|
200
|
+
|
201
|
+
|
202
|
+
def test_check_frames
|
203
|
+
assert_equal 3, @raakt.check_frames(data_framedoc1).length
|
204
|
+
assert_equal 0, @raakt.check_frames(data_framedoc2).length
|
205
|
+
end
|
206
|
+
|
207
|
+
|
208
|
+
def test_check_for_formatting_elements
|
209
|
+
invaliderrs = @raakt.check_for_formatting_elements(data_invalidelements1)
|
210
|
+
assert_equal 2, invaliderrs.length
|
211
|
+
assert_equal "missingsemantics", invaliderrs[0].eid
|
212
|
+
assert_equal "hasflicker", invaliderrs[1].eid
|
213
|
+
assert_equal 0, @raakt.check_for_formatting_elements(data_xhtmldoc1).length
|
214
|
+
end
|
215
|
+
|
216
|
+
|
217
|
+
def test_refresh
|
218
|
+
assert_equal 1, @raakt.check_refresh(data_metarefreshdoc1).length
|
219
|
+
assert_equal 1, @raakt.check_refresh(data_metarefreshdoc2).length
|
220
|
+
assert_equal 1, @raakt.check_refresh(data_metarefreshdoc3).length
|
221
|
+
assert_equal 0, @raakt.check_refresh(data_xhtmldoc1).length
|
222
|
+
end
|
223
|
+
|
224
|
+
end
|