pismo 0.5.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,152 +0,0 @@
1
- require 'helper'
2
-
3
- class TestReadability < Test::Unit::TestCase
4
- context "Readability" do
5
- setup do
6
- @simple_html_fixture = <<-HTML
7
- <html>
8
- <head>
9
- <title>title!</title>
10
- </head>
11
- <body class='comment'>
12
- <div>
13
- <p class='comment'>a comment</p>
14
- <div class='comment' id='body'>real content</div>
15
- <div id="contains_blockquote"><blockquote>something in a table</blockquote></div>
16
- </div>
17
- </body>
18
- </html>
19
- HTML
20
- end
21
-
22
- context "transformMisusedDivsIntoParagraphs" do
23
- setup do
24
- @doc = Readability::Document.new(@simple_html_fixture)
25
- @doc.transform_misused_divs_into_paragraphs!
26
- end
27
-
28
- should "transform divs containing no block elements into <p>s" do
29
- assert_equal "p", @doc.html.css("#body").first.name
30
- end
31
-
32
- should "not transform divs that contain block elements" do
33
- assert_equal "div", @doc.html.css("#contains_blockquote").first.name
34
- end
35
- end
36
-
37
- context "score_node" do
38
- setup do
39
- @doc = Readability::Document.new(<<-HTML)
40
- <html>
41
- <body>
42
- <div id='elem1'>
43
- <p>some content</p>
44
- </div>
45
- <th id='elem2'>
46
- <p>some other content</p>
47
- </th>
48
- </body>
49
- </html>
50
- HTML
51
- @elem1 = @doc.html.css("#elem1").first
52
- @elem2 = @doc.html.css("#elem2").first
53
- end
54
-
55
- should "like <div>s more than <th>s" do
56
- assert @doc.score_node(@elem1)[:content_score] > @doc.score_node(@elem2)[:content_score]
57
- end
58
-
59
- should "like classes like text more than classes like comment" do
60
- @elem2.name = "div"
61
- assert_equal @doc.score_node(@elem2)[:content_score], @doc.score_node(@elem1)[:content_score]
62
- @elem1['class'] = "text"
63
- @elem2['class'] = "comment"
64
- assert @doc.score_node(@elem1)[:content_score] > @doc.score_node(@elem2)[:content_score]
65
- end
66
- end
67
-
68
- context "remove_unlikely_candidates!" do
69
- setup do
70
- @doc = Readability::Document.new(@simple_html_fixture)
71
- @doc.remove_unlikely_candidates!
72
- end
73
-
74
- should "remove things that have class comment" do
75
- assert @doc.html.inner_html !~ /a comment/
76
- end
77
-
78
- should "not remove body tags" do
79
- assert @doc.html.inner_html =~ /<\/body>/
80
- end
81
-
82
- should "not remove things with class comment and id body" do
83
- assert @doc.html.inner_html =~ /real content/
84
- end
85
- end
86
-
87
- context "score_paragraphs" do
88
- setup do
89
- @doc = Readability::Document.new(%{
90
- <html>
91
- <head>
92
- <title>title!</title>
93
- </head>
94
- <body id="body">
95
- <div id="div1">
96
- <div id="div2>
97
- <p id="some_comment">a comment</p>
98
- </div>
99
- <p id="some_text">some text</p>
100
- </div>
101
- <div id="div3">
102
- <p id="some_text2">some more text</p>
103
- </div>
104
- </body>
105
- </html>
106
- })
107
- @candidates = @doc.score_paragraphs(0)
108
- end
109
-
110
- should "score elements in the document" do
111
- assert_equal 3, @candidates.values.length
112
- end
113
-
114
- should "prefer the body in this particular example" do
115
- assert_equal "body", @candidates.values.sort { |a, b|
116
- b[:content_score] <=> a[:content_score]
117
- }.first[:elem][:id]
118
- end
119
- end
120
-
121
- context "the cant_read.html fixture" do
122
- should "work on the cant_read.html fixture with some allowed tags" do
123
- allowed_tags = %w[div span table tr td p i strong u h1 h2 h3 h4 pre code br a]
124
- allowed_attributes = %w[href]
125
- html = File.read(HTML_DIRECTORY + "/cant_read.html")
126
- assert Readability::Document.new(html, :tags => allowed_tags, :attributes => allowed_attributes).content.match(/Can you talk a little about how you developed the looks for the/)
127
- end
128
- end
129
-
130
- context "general functionality" do
131
- setup do
132
- @doc = Readability::Document.new("<html><head><title>title!</title></head><body><div><p>Some content</p></div></body>",
133
- :min_text_length => 0, :retry_length => 1)
134
- end
135
-
136
- should "return the main page content" do
137
- assert @doc.content.match("Some content")
138
- end
139
- end
140
-
141
- context "ignoring sidebars" do
142
- setup do
143
- @doc = Readability::Document.new("<html><head><title>title!</title></head><body><div><p>Some content</p></div><div class='sidebar'><p>sidebar<p></div></body>",
144
- :min_text_length => 0, :retry_length => 1)
145
- end
146
-
147
- should "not return the sidebar" do
148
- assert !@doc.content.match("sidebar")
149
- end
150
- end
151
- end
152
- end