raakt 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/raakt.rb +454 -0
- data/tests/empty.htm +1 -0
- data/tests/emptytitledoc.htm +8 -0
- data/tests/fielddoc1.htm +2 -0
- data/tests/fielddoc2.htm +11 -0
- data/tests/fielddoc3.htm +14 -0
- data/tests/flickerdoc1.htm +0 -0
- data/tests/framedoc1.htm +22 -0
- data/tests/framedoc2.htm +8 -0
- data/tests/full_google.htm +17 -0
- data/tests/headingsdoc1.htm +17 -0
- data/tests/headingsdoc2.htm +14 -0
- data/tests/headingsdoc3.htm +6 -0
- data/tests/headingsdoc4.htm +9 -0
- data/tests/headingsdoc5.htm +9 -0
- data/tests/headingsdoc6.htm +6 -0
- data/tests/headingsdoc7.htm +8 -0
- data/tests/headingsdoc8.htm +12 -0
- data/tests/headingsdoc9.htm +20 -0
- data/tests/imagedoc1.htm +8 -0
- data/tests/imagedoc2.htm +1 -0
- data/tests/imagedoc3.htm +11 -0
- data/tests/imagedoc4.htm +7 -0
- data/tests/invalidelements1.htm +18 -0
- data/tests/invalidhtmldoc1.htm +10 -0
- data/tests/invalidhtmldoc2.htm +20 -0
- data/tests/invalidxhtmldoc1.htm +17 -0
- data/tests/linkdoc1.htm +18 -0
- data/tests/linkdoc2.htm +12 -0
- data/tests/linkdoc3.htm +16 -0
- data/tests/linkdoc4.htm +10 -0
- data/tests/metarefreshdoc1.htm +10 -0
- data/tests/metarefreshdoc2.htm +14 -0
- data/tests/metarefreshdoc3.htm +10 -0
- data/tests/nestedcomment.htm +7 -0
- data/tests/newlinetext.txt +3 -0
- data/tests/raakt_test.rb +224 -0
- data/tests/scriptdoc1.htm +15 -0
- data/tests/scriptdoc2.htm +10 -0
- data/tests/tabledoc1.htm +5 -0
- data/tests/tabledoc2.htm +9 -0
- data/tests/tabledoc3.htm +6 -0
- data/tests/tabledoc4.htm +17 -0
- data/tests/tabledoc5.htm +11 -0
- data/tests/tabledoc6.htm +11 -0
- data/tests/tablelayoutdoc.htm +16 -0
- data/tests/test_helper.rb +21 -0
- data/tests/xhtmldoc1.htm +14 -0
- metadata +100 -0
data/tests/empty.htm
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
<empty>Blank document</empty>
|
data/tests/fielddoc1.htm
ADDED
data/tests/fielddoc2.htm
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
<html>
|
2
|
+
<body>
|
3
|
+
<h3>First heading</h3>
|
4
|
+
<label for="textid">My label</label>
|
5
|
+
<p id="pid">This is a minimal <a href="http://www.w3.org/TR/xhtml1/">image document.</a>
|
6
|
+
<input type="text" id="textid" />
|
7
|
+
<input type="hidden" id="hiddenid"/>
|
8
|
+
<p>This is a minimal <a href="http://www.w3.org/TR/xhtml1/">image document.</a>
|
9
|
+
<INPUT TYPE='text' ID='myid'>
|
10
|
+
</body>
|
11
|
+
</html>
|
data/tests/fielddoc3.htm
ADDED
File without changes
|
data/tests/framedoc1.htm
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN"
|
2
|
+
"http://www.w3.org/TR/html4/frameset.dtd">
|
3
|
+
<HTML>
|
4
|
+
<HEAD>
|
5
|
+
<TITLE>A simple frameset document</TITLE>
|
6
|
+
</HEAD>
|
7
|
+
<FRAMESET cols="20%, 80%">
|
8
|
+
<FRAMESET rows="100, 200">
|
9
|
+
<FRAME src="contents_of_frame1.html">
|
10
|
+
<FRAME src="contents_of_frame2.gif">
|
11
|
+
</FRAMESET>
|
12
|
+
<FRAME src="contents_of_frame3.html">
|
13
|
+
<NOFRAMES>
|
14
|
+
<P>This frameset document contains:
|
15
|
+
<UL>
|
16
|
+
<LI><A href="contents_of_frame1.html">Some neat contents</A>
|
17
|
+
<LI><IMG src="contents_of_frame2.gif" alt="A neat image">
|
18
|
+
<LI><A href="contents_of_frame3.html">Some other neat contents</A>
|
19
|
+
</UL>
|
20
|
+
</NOFRAMES>
|
21
|
+
</FRAMESET>
|
22
|
+
</HTML>
|
data/tests/framedoc2.htm
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
<html><head><meta http-equiv="content-type" content="text/html; charset=UTF-8"><title>Google</title><style><!--
|
2
|
+
body,td,a,p,.h{font-family:arial,sans-serif;}
|
3
|
+
.h{font-size: 20px;}
|
4
|
+
.q{color:#0000cc;}
|
5
|
+
-->
|
6
|
+
</style>
|
7
|
+
<script>
|
8
|
+
<!--
|
9
|
+
function sf(){document.f.q.focus();}
|
10
|
+
function asq(event,el,oi,cad,ct,cd,sg){if(window.XMLHttpRequest){if(el.handledFirstTime){el.handledFirstTime=false;return false;}el.handledFirstTime=true;var e = window.encodeURIComponent ? encodeURIComponent : escape;var oi_param="";var cad_param="";if (oi) oi_param="&oi="+e(oi);if (cad) cad_param="&cad="+e(cad);var x=new XMLHttpRequest();x.open("GET","/url?sa=T"+oi_param+cad_param+"&ct="+e(ct)+"&cd="+e(cd)+"&url="+e(el.href).replace(/\+/g,"%2B")+"&ei="+sg,true);var m=event.altKey||event.metaKey;if(!m){x.onreadystatechange=function(){if(x.readyState==4){clearTimeout(timeoutid);el.dispatchEvent(event);}};var timeoutid=setTimeout(function(){x.abort();el.dispatchEvent(event);},2000);}x.send(null);return m;}return true;}
|
11
|
+
// -->
|
12
|
+
</script>
|
13
|
+
</head><body bgcolor=#ffffff text=#000000 link=#0000cc vlink=#551a8b alink=#ff0000 onLoad=sf() topmargin=3 marginheight=3><center><table border=0 cellspacing=0 cellpadding=0 width=100%><tr><td align=right nowrap><font size=-1><b>...</b> | <a href="/url?sa=p&pref=ig&pval=3&q=http://www.google.com/" onmousedown="return asq(event,this,'promos','hppphou:def','pro','1','&sig2=')">Personalized Home</a> | <a href="/searchhistory/?hl=en">Search History</a> | <a href="https://www.google.com/accounts/ManageAccount">My Account</a> | <a href="http://www.google.com/accounts/Logout?continue=http://www.google.com/">Sign out</a></font></td></tr><tr height=4><td><img alt="" width=1 height=1></td></tr></table><img src="/intl/en/images/logo.gif" width=276 height=110 alt="Google"><br><br>
|
14
|
+
<form action=/search name=f><script><!--
|
15
|
+
function qs(el) {if (window.RegExp && window.encodeURIComponent) {var ue=el.href;var qe=encodeURIComponent(document.f.q.value);if(ue.indexOf("q=")!=-1){el.href=ue.replace(new RegExp("q=[^&$]*"),"q="+qe);}else{el.href=ue+"&q="+qe;}}return 1;}
|
16
|
+
// -->
|
17
|
+
</script><table border=0 cellspacing=0 cellpadding=4><tr><td nowrap><font size=-1><b>Web</b> <a id=1a class=q href="/imghp?hl=en&tab=wi" onClick="return qs(this);">Images</a> <a id=2a class=q href="http://groups.google.com/grphp?hl=en&tab=wg" onClick="return qs(this);">Groups</a> <a id=4a class=q href="http://news.google.com/nwshp?hl=en&tab=wn" onClick="return qs(this);">News</a> <a id=5a class=q href="http://froogle.google.com/frghp?hl=en&tab=wf" onClick="return qs(this);">Froogle</a> <a id=7a class=q href="/maphp?hl=en&tab=wl" onClick="return qs(this);">Maps</a> <b><a href="/intl/en/options/" class=q>more »</a></b></font></td></tr></table><table cellspacing=0 cellpadding=0><tr><td width=25%> </td><td align=center><input type=hidden name=hl value=en><input maxlength=2048 size=55 name=q value="" title="Google Search"><br><input type=submit value="Google Search" name=btnG><input type=submit value="I'm Feeling Lucky" name=btnI></td><td valign=top nowrap width=25%><font size=-2> <a href=/advanced_search?hl=en>Advanced Search</a><br> <a href=/preferences?hl=en>Preferences</a><br> <a href=/language_tools?hl=en>Language Tools</a></font></td></tr></table></form><br><br><font size=-1><a href="/intl/en/ads/">Advertising Programs</a> - <a href=/intl/en/services/>Business Solutions</a> - <a href=/intl/en/about.html>About Google</a> - <b><a href=http://www.google.se/>Go to Google Sverige</a></b></font><p><font size=-2>©2006 Google</font></p></center></body></html>
|
@@ -0,0 +1,17 @@
|
|
1
|
+
<!DOCTYPE html
|
2
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
3
|
+
"DTD/xhtml1-strict.dtd">
|
4
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
5
|
+
<head>
|
6
|
+
<title>This is the title</title>
|
7
|
+
</head>
|
8
|
+
<body>
|
9
|
+
<h1>First h1 heading</h1>
|
10
|
+
<p>This is a minimal <a href="http://www.w3.org/TR/xhtml1/">XHTML 1.0</a>
|
11
|
+
document.</p>
|
12
|
+
<h1>Second h1
|
13
|
+
heading</h1>
|
14
|
+
<table><tr><td>Test table</td></tr></table>
|
15
|
+
<H1>Third h1 heading</H1>
|
16
|
+
</body>
|
17
|
+
</html>
|
@@ -0,0 +1,14 @@
|
|
1
|
+
<html>
|
2
|
+
<head>
|
3
|
+
<title>This is the title</title>
|
4
|
+
</head>
|
5
|
+
<body>
|
6
|
+
<h1>First h1 heading</h1>
|
7
|
+
<p>This is a minimal <a href="http://www.w3.org/TR/xhtml1/">XHTML 1.0</a>
|
8
|
+
document.</p>
|
9
|
+
<h2>First h2
|
10
|
+
heading</h2>
|
11
|
+
<table><tr><td>Test table</td></tr></table>
|
12
|
+
<H3>First h3 heading</H3>
|
13
|
+
</body>
|
14
|
+
</html>
|
@@ -0,0 +1,12 @@
|
|
1
|
+
<script type="text/javascript">
|
2
|
+
function printpopup(url){
|
3
|
+
MyWin = window.open("","","scrollbars=no,resizable=yes,toolbar=no,location=no,directories=no,status=no,menubar=no,width=100,height=100");
|
4
|
+
with(MyWin.document){
|
5
|
+
open();
|
6
|
+
write("<html>\n<head><scr"+"ipt>awidth=(document.layers)?0:8;awidth+=(document.all)?4:0;aheight=(document.layers)?0:29;</scr"+"ipt>\n<title>This is not the title</title>\n</head>\n<body onLoad=\"window.resizeTo(document.images[0].width+awidth,document.images[0].height+aheight)\"marginwidth=0 marginheight=0 leftmargin=0 topmargin=0 rightmargin=0 style=\"overflow:hidden;\">\n");
|
7
|
+
write("<h1>This is not a document heading</h1>\n");
|
8
|
+
write("</body>\n</html>\n");
|
9
|
+
close();
|
10
|
+
}
|
11
|
+
}
|
12
|
+
</script>
|
@@ -0,0 +1,20 @@
|
|
1
|
+
<html lang="en">
|
2
|
+
<head>
|
3
|
+
<title>Sample</title>
|
4
|
+
</head>
|
5
|
+
<body>
|
6
|
+
<p>This is a document with an empty title element.</p>
|
7
|
+
<script type="text/javascript">
|
8
|
+
function printpopup(url){
|
9
|
+
MyWin = window.open("","","");
|
10
|
+
with(MyWin.document){
|
11
|
+
open();
|
12
|
+
write("<html><head><title>This is not the title</title>\n</head>\n<body>\n");
|
13
|
+
write("<h1>This is not a document heading</h1>\n");
|
14
|
+
write("</body>\n</html>\n");
|
15
|
+
close();
|
16
|
+
}
|
17
|
+
}
|
18
|
+
</script>
|
19
|
+
</body>
|
20
|
+
</html>
|
data/tests/imagedoc1.htm
ADDED
data/tests/imagedoc2.htm
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
<p><img src="image.png" alt=''></p>
|
data/tests/imagedoc3.htm
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
<html>
|
2
|
+
<body>
|
3
|
+
<h3>First heading</h3>
|
4
|
+
<p>This is a minimal <a href="http://www.w3.org/TR/xhtml1/">image document.</a>
|
5
|
+
<img src="noimagealt.png" border="0">
|
6
|
+
<p>This is a minimal <a href="http://www.w3.org/TR/xhtml1/">image document.</a>
|
7
|
+
<img src="/noimagealt2.png">
|
8
|
+
<p>This is a minimal <a href="http://www.w3.org/TR/xhtml1/">image document.</a>
|
9
|
+
<img src="../folder/noimagealt3.png" border="0">
|
10
|
+
</body>
|
11
|
+
</html>
|
data/tests/imagedoc4.htm
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
<html lang="en">
|
2
|
+
<head>
|
3
|
+
<title>This is the title</title>
|
4
|
+
</head>
|
5
|
+
<body>
|
6
|
+
<h1>First h1 heading</h1>
|
7
|
+
<p>This is a <b>minimal</b> <a href="http://www.w3.org/TR/xhtml1/">XHTML 1.0</a>
|
8
|
+
document.</p>
|
9
|
+
<p>This is a <font style="bold">bold</font> <a href="http://www.w3.org/TR/xhtml1/">XHTML 1.0</a>
|
10
|
+
document.</p>
|
11
|
+
<h1>Second h1
|
12
|
+
heading</h1>
|
13
|
+
<blink>No blinking text!</blink>
|
14
|
+
<table><tr><td>Test table</td></tr></table>
|
15
|
+
<H1>Third h1 heading</H1>
|
16
|
+
<marquee>Marquees are so nineties!</marquee>
|
17
|
+
</body>
|
18
|
+
</html>
|
@@ -0,0 +1,20 @@
|
|
1
|
+
<HTML>
|
2
|
+
<head>
|
3
|
+
<TITLE>This is the
|
4
|
+
title
|
5
|
+
|
6
|
+
</title>
|
7
|
+
<link rel="schema.DC" href="http://purl.org/DC/elements/1.0">
|
8
|
+
</head>
|
9
|
+
<body>
|
10
|
+
|
11
|
+
<!-- h3>Heading 3
|
12
|
+
</h3>
|
13
|
+
<h1>Heading 1</h1 -->
|
14
|
+
<p>S small document</p>
|
15
|
+
<p> more content </p>
|
16
|
+
<!-- second comment
|
17
|
+
|
18
|
+
-->
|
19
|
+
</body>
|
20
|
+
</HTML>
|
@@ -0,0 +1,17 @@
|
|
1
|
+
<!DOCTYPE html
|
2
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
3
|
+
"DTD/xhtml1-strict.dtd">
|
4
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
5
|
+
<head>
|
6
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
7
|
+
<title>This is the title</title>
|
8
|
+
<link rel="schema.DC" href="http://purl.org/DC/elements/1.0/" />
|
9
|
+
</head>
|
10
|
+
<body>
|
11
|
+
<h1>First h1 heading</h1>
|
12
|
+
<p>This is a minimal <a href="http://www.w3.org/TR/xhtml1/">XHTML 1.0</a>
|
13
|
+
document.</p>
|
14
|
+
<h1>Second h1 heading</h1>
|
15
|
+
<table><tr><td>Test table</td></tr></table>
|
16
|
+
</body>
|
17
|
+
</html>
|
data/tests/linkdoc1.htm
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
<html>
|
2
|
+
<head>
|
3
|
+
<title>This is a link document</title>
|
4
|
+
</head>
|
5
|
+
<body>
|
6
|
+
<h1>The first heading</h1>
|
7
|
+
<h2><a href="/news1">New sitem 1</a></h2>
|
8
|
+
<p>This is the text for the first news item. <a href="/news1">Read more </a></p>
|
9
|
+
<h2><a href="/news2">New sitem 2</a></h2>
|
10
|
+
<p>This is the text for the second news item. <a href="/news2">
|
11
|
+
Read more</a></p>
|
12
|
+
<h2><a href="/news1">New sitem 3</a></h2>
|
13
|
+
<p>This is the text for the third news item. <a href="/news3">Read
|
14
|
+
more</a></p>
|
15
|
+
<h2><a href="/news1">New sitem 4</a></h2>
|
16
|
+
<p>This is the text for the fourth news item. <a href="/news4">Read more</a></p>
|
17
|
+
</body>
|
18
|
+
</html>
|
data/tests/linkdoc2.htm
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
<html>
|
2
|
+
<head>
|
3
|
+
<title>This is a link document</title>
|
4
|
+
</head>
|
5
|
+
<body>
|
6
|
+
<h1>The first heading</h1>
|
7
|
+
<h2><a href="/news1">New sitem 1</a></h2>
|
8
|
+
<p>This is the text for the first news item. <a href="/news1" title="More on item 1">Read more</a></p>
|
9
|
+
<h2><a href="/news2">New sitem 2</a></h2>
|
10
|
+
<p>This is the text for the second news item. <a href="/news2">Read more</a></p>
|
11
|
+
</body>
|
12
|
+
</html>
|
data/tests/linkdoc3.htm
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
<html>
|
2
|
+
<head>
|
3
|
+
<title>This is a link document</title>
|
4
|
+
</head>
|
5
|
+
<body>
|
6
|
+
<h1>The first heading</h1>
|
7
|
+
<h2><a href="/news1">New sitem 1</a></h2>
|
8
|
+
<p>This is the text for the first news item.</p>
|
9
|
+
<h2><a href="/news2">New sitem 2</a></h2>
|
10
|
+
<p>This is the text for the second news item.</p>
|
11
|
+
<h2><a href="/news1">New sitem 3</a></h2>
|
12
|
+
<p>This is the text for the third news item.</p>
|
13
|
+
<h2><a href="/news1">New sitem 4</a></h2>
|
14
|
+
<p>This is the text for the fourth news item.</p>
|
15
|
+
</body>
|
16
|
+
</html>
|
data/tests/linkdoc4.htm
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
<html lang="en">
|
2
|
+
<head>
|
3
|
+
<title>This is the title</title>
|
4
|
+
<meta http-equiv="cache-control" content="no-cache" >
|
5
|
+
<meta http-equiv=
|
6
|
+
"refresh"
|
7
|
+
content=
|
8
|
+
"5" />
|
9
|
+
</head>
|
10
|
+
<body>
|
11
|
+
<p>This is a minimal <a href="http://www.w3.org/TR/xhtml1/">XHTML 1.0</a>
|
12
|
+
document.</p>
|
13
|
+
</body>
|
14
|
+
</html>
|
data/tests/raakt_test.rb
ADDED
@@ -0,0 +1,224 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/test_helper'
|
2
|
+
require File.dirname(__FILE__) + '/../lib/raakt'
|
3
|
+
require 'rubyful_soup'
|
4
|
+
|
5
|
+
class RaaktTest < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def setup
|
8
|
+
@raakt = Raakt::Test.new
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_all
|
12
|
+
puts @raakt.all(data_full_google)
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_check_images
|
16
|
+
assert_equal 1, @raakt.check_images(data_imagedoc1).length
|
17
|
+
assert_equal "missingalt", @raakt.check_images(data_imagedoc1)[0].eid
|
18
|
+
|
19
|
+
assert_equal 0, @raakt.check_images(data_imagedoc2).length
|
20
|
+
|
21
|
+
assert_equal 3, @raakt.check_images(data_imagedoc3).length
|
22
|
+
|
23
|
+
assert_equal 1, @raakt.check_images(data_imagedoc4).length
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_check_images_in_blank_doc
|
27
|
+
assert_equal 0, @raakt.check_images(data_empty).length
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
def test_check_title
|
32
|
+
assert_equal 0, @raakt.check_title(data_xhtmldoc1).length
|
33
|
+
assert_equal 1, @raakt.check_title(data_empty).length
|
34
|
+
assert_equal "missingtitle", @raakt.check_title(data_empty)[0].eid
|
35
|
+
|
36
|
+
assert_equal 1, @raakt.check_title(data_emptytitledoc).length
|
37
|
+
assert_equal "emptytitle", @raakt.check_title(data_emptytitledoc)[0].eid
|
38
|
+
|
39
|
+
assert_equal 0, @raakt.check_title(data_invalidhtmldoc1).length
|
40
|
+
assert_equal 0, @raakt.check_title(data_invalidhtmldoc2).length
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
def test_headings
|
45
|
+
assert_equal 3, @raakt.headings(data_headingsdoc1).length
|
46
|
+
assert_equal 0, @raakt.headings(data_invalidhtmldoc2).length
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_level
|
50
|
+
assert_equal 1, @raakt.level("h1")
|
51
|
+
assert_equal 2, @raakt.level("h2")
|
52
|
+
assert_equal 6, @raakt.level("h6")
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_check_has_heading
|
56
|
+
assert_equal 1, @raakt.check_has_heading(data_empty).length
|
57
|
+
assert_equal "missingheading", @raakt.check_has_heading(data_empty)[0].eid
|
58
|
+
assert_equal 0, @raakt.check_has_heading(data_headingsdoc1).length
|
59
|
+
assert_equal 0, @raakt.check_has_heading(data_headingsdoc9).length
|
60
|
+
|
61
|
+
assert_equal 1, @raakt.check_has_heading(data_invalidhtmldoc2).length
|
62
|
+
assert_equal "missingheading", @raakt.check_has_heading(data_invalidhtmldoc2)[0].eid
|
63
|
+
end
|
64
|
+
|
65
|
+
def test_check_document_structure
|
66
|
+
assert_equal 0, @raakt.check_document_structure(data_headingsdoc1).length
|
67
|
+
assert_equal 1, @raakt.check_document_structure(data_headingsdoc3).length
|
68
|
+
assert_equal "firsthnoth1", @raakt.check_document_structure(data_headingsdoc3)[0].eid
|
69
|
+
assert_equal "wronghstructure", @raakt.check_document_structure(data_headingsdoc4)[0].eid
|
70
|
+
assert_equal "firsthnoth1", @raakt.check_document_structure(data_headingsdoc5)[0].eid
|
71
|
+
assert_equal "wronghstructure", @raakt.check_document_structure(data_headingsdoc5)[1].eid
|
72
|
+
assert_equal 0, @raakt.check_document_structure(data_headingsdoc6).length
|
73
|
+
assert_equal 0, @raakt.check_document_structure("").length
|
74
|
+
end
|
75
|
+
|
76
|
+
|
77
|
+
def test_check_for_nested_tables
|
78
|
+
assert_equal 0, @raakt.check_for_nested_tables(data_tabledoc1).length
|
79
|
+
assert_equal 0, @raakt.check_for_nested_tables(data_tabledoc2).length
|
80
|
+
assert_equal 1, @raakt.check_for_nested_tables(data_tabledoc3).length
|
81
|
+
assert_equal 0, @raakt.check_for_nested_tables(data_tabledoc4).length
|
82
|
+
assert_equal 1, @raakt.check_for_nested_tables(data_tabledoc5).length
|
83
|
+
assert_equal "hasnestedtables", @raakt.check_for_nested_tables(data_tabledoc3)[0].eid
|
84
|
+
end
|
85
|
+
|
86
|
+
def test_check_tables
|
87
|
+
puts @raakt.check_tables(data_tabledoc4).to_s
|
88
|
+
assert_equal 0, @raakt.check_tables(data_tabledoc4).length
|
89
|
+
assert_equal 0, @raakt.check_tables(data_tabledoc1).length
|
90
|
+
assert_equal 2, @raakt.check_tables(data_tabledoc2).length
|
91
|
+
end
|
92
|
+
|
93
|
+
|
94
|
+
def test_check_for_formatting_elements
|
95
|
+
assert_equal 1, @raakt.check_for_formatting_elements(data_invalidelements1).length
|
96
|
+
assert_equal "boldused", @raakt.check_for_formatting_elements(data_invalidelements1)[0].eid
|
97
|
+
end
|
98
|
+
|
99
|
+
|
100
|
+
def test_check_for_language_info
|
101
|
+
assert_equal 0, @raakt.check_for_language_info(data_xhtmldoc1).length
|
102
|
+
assert_equal 1, @raakt.check_for_language_info(data_tabledoc2).length
|
103
|
+
assert_equal 1, @raakt.check_for_language_info(data_tablelayoutdoc).length
|
104
|
+
end
|
105
|
+
|
106
|
+
|
107
|
+
def test_check_link_text
|
108
|
+
assert_equal 1, @raakt.check_link_text(data_linkdoc1).length
|
109
|
+
assert_equal "ambiguouslinktext", @raakt.check_link_text(data_linkdoc1)[0].eid
|
110
|
+
assert_equal 0, @raakt.check_link_text(data_linkdoc3).length
|
111
|
+
assert_equal 0, @raakt.check_link_text(data_linkdoc2).length
|
112
|
+
assert_equal 1, @raakt.check_link_text(data_linkdoc4).length
|
113
|
+
end
|
114
|
+
|
115
|
+
|
116
|
+
def test_get_links
|
117
|
+
assert_equal 8, @raakt.get_links(data_linkdoc1).length
|
118
|
+
assert_equal 2, @raakt.get_links(data_linkdoc4).length
|
119
|
+
assert_equal "Read more", @raakt.get_links(data_linkdoc4)[0][3]
|
120
|
+
end
|
121
|
+
|
122
|
+
def test_img_to_text
|
123
|
+
assert_equal "Read more", @raakt.img_to_text(BeautifulSoup.new("<img src='123' alt='Read more' />").img)
|
124
|
+
end
|
125
|
+
|
126
|
+
def test_elements_to_text
|
127
|
+
assert_equal "Read more about", @raakt.elements_to_text(BeautifulSoup.new("<a href='rrr'>Read <img src='123' alt='more' /> about</a>").a)
|
128
|
+
assert_equal "A sample text here", @raakt.elements_to_text(BeautifulSoup.new("<a href='r'><strong>A</strong> sample <img src='123' alt='text' /> <b>here</b></a>").a)
|
129
|
+
end
|
130
|
+
|
131
|
+
def test_normalize_text
|
132
|
+
assert_equal "Read more", @raakt.normalize_text("Read more")
|
133
|
+
assert_equal "Read more", @raakt.normalize_text("Read more")
|
134
|
+
assert_equal "Read more", @raakt.normalize_text("Read more")
|
135
|
+
assert_equal "Read more", @raakt.normalize_text("Read more")
|
136
|
+
assert_equal "Read more", @raakt.normalize_text("Read more")
|
137
|
+
assert_equal "Read more", @raakt.normalize_text("Read\n more")
|
138
|
+
assert_equal "L�s mer", @raakt.normalize_text("L�s\n mer")
|
139
|
+
assert_equal "L�s mer", @raakt.normalize_text("L�s \nmer")
|
140
|
+
assert_equal "Read more", @raakt.normalize_text("Read \n\n\nmore")
|
141
|
+
assert_equal "Read more", @raakt.normalize_text("Read \tmore")
|
142
|
+
assert_equal "Read more", @raakt.normalize_text(" Read more")
|
143
|
+
end
|
144
|
+
|
145
|
+
def test_is_ambiguous_link
|
146
|
+
link_a = [1, "/news/1", "", "Read more"]
|
147
|
+
link_b = [2, "/news/2", "", "Read more"]
|
148
|
+
assert_equal true, @raakt.is_ambiguous_link(link_a, link_b)
|
149
|
+
|
150
|
+
link_c = [1, "/news/1", "More about first news item", "Read more"]
|
151
|
+
link_d = [2, "/news/2", "More about second news item", "Read more"]
|
152
|
+
assert_equal false, @raakt.is_ambiguous_link(link_c, link_d)
|
153
|
+
|
154
|
+
link_a = [1, "/news/1", nil, "Read more"]
|
155
|
+
link_b = [2, "/news/2", nil, "Read more"]
|
156
|
+
assert_equal true, @raakt.is_ambiguous_link(link_a, link_b)
|
157
|
+
|
158
|
+
link_g = [1, "/news/1", "", "Read more"]
|
159
|
+
link_h = [2, "/news/1", "", "Read more"]
|
160
|
+
assert_equal false, @raakt.is_ambiguous_link(link_g, link_h)
|
161
|
+
|
162
|
+
link_i = [1, "/news/1", "", "L�s mer"]
|
163
|
+
link_j = [2, "/news/2", "", "L�s\n mer"]
|
164
|
+
assert_equal true, @raakt.is_ambiguous_link(link_i, link_j)
|
165
|
+
|
166
|
+
link_k = [1, "/news/1", "", "L�s mer"]
|
167
|
+
link_l = [2, "/news/2", "", "L�s \nmer"]
|
168
|
+
assert_equal true, @raakt.is_ambiguous_link(link_k, link_l)
|
169
|
+
end
|
170
|
+
|
171
|
+
|
172
|
+
def test_get_labels
|
173
|
+
assert_equal 1, @raakt.get_labels(data_fielddoc1).length
|
174
|
+
assert_equal 1, @raakt.get_labels(data_fielddoc2).length
|
175
|
+
assert_equal 2, @raakt.get_labels(data_fielddoc3).length
|
176
|
+
end
|
177
|
+
|
178
|
+
|
179
|
+
def test_get_editable_fields
|
180
|
+
assert_equal 1, @raakt.get_editable_fields(data_fielddoc1).length
|
181
|
+
assert_equal 2, @raakt.get_editable_fields(data_fielddoc2).length
|
182
|
+
assert_equal 3, @raakt.get_editable_fields(data_fielddoc3).length
|
183
|
+
end
|
184
|
+
|
185
|
+
|
186
|
+
def test_check_form
|
187
|
+
assert_equal 0, @raakt.check_form(data_fielddoc1).length
|
188
|
+
assert_equal 1, @raakt.check_form(data_fielddoc2).length
|
189
|
+
assert_equal "fieldmissinglabel", @raakt.check_form(data_fielddoc2)[0].eid
|
190
|
+
assert_equal 1, @raakt.check_form(data_fielddoc3).length
|
191
|
+
assert_equal "fieldmissinglabel", @raakt.check_form(data_fielddoc3)[0].eid
|
192
|
+
end
|
193
|
+
|
194
|
+
|
195
|
+
def test_is_frameset
|
196
|
+
assert @raakt.is_frameset(data_framedoc1)
|
197
|
+
assert @raakt.is_frameset(data_framedoc2)
|
198
|
+
assert !@raakt.is_frameset(data_xhtmldoc1)
|
199
|
+
end
|
200
|
+
|
201
|
+
|
202
|
+
def test_check_frames
|
203
|
+
assert_equal 3, @raakt.check_frames(data_framedoc1).length
|
204
|
+
assert_equal 0, @raakt.check_frames(data_framedoc2).length
|
205
|
+
end
|
206
|
+
|
207
|
+
|
208
|
+
def test_check_for_formatting_elements
|
209
|
+
invaliderrs = @raakt.check_for_formatting_elements(data_invalidelements1)
|
210
|
+
assert_equal 2, invaliderrs.length
|
211
|
+
assert_equal "missingsemantics", invaliderrs[0].eid
|
212
|
+
assert_equal "hasflicker", invaliderrs[1].eid
|
213
|
+
assert_equal 0, @raakt.check_for_formatting_elements(data_xhtmldoc1).length
|
214
|
+
end
|
215
|
+
|
216
|
+
|
217
|
+
def test_refresh
|
218
|
+
assert_equal 1, @raakt.check_refresh(data_metarefreshdoc1).length
|
219
|
+
assert_equal 1, @raakt.check_refresh(data_metarefreshdoc2).length
|
220
|
+
assert_equal 1, @raakt.check_refresh(data_metarefreshdoc3).length
|
221
|
+
assert_equal 0, @raakt.check_refresh(data_xhtmldoc1).length
|
222
|
+
end
|
223
|
+
|
224
|
+
end
|