pismo 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,152 +0,0 @@
1
- require 'helper'
2
-
3
- class TestReadability < Test::Unit::TestCase
4
- context "Readability" do
5
- setup do
6
- @simple_html_fixture = <<-HTML
7
- <html>
8
- <head>
9
- <title>title!</title>
10
- </head>
11
- <body class='comment'>
12
- <div>
13
- <p class='comment'>a comment</p>
14
- <div class='comment' id='body'>real content</div>
15
- <div id="contains_blockquote"><blockquote>something in a table</blockquote></div>
16
- </div>
17
- </body>
18
- </html>
19
- HTML
20
- end
21
-
22
- context "transformMisusedDivsIntoParagraphs" do
23
- setup do
24
- @doc = Readability::Document.new(@simple_html_fixture)
25
- @doc.transform_misused_divs_into_paragraphs!
26
- end
27
-
28
- should "transform divs containing no block elements into <p>s" do
29
- assert_equal "p", @doc.html.css("#body").first.name
30
- end
31
-
32
- should "not transform divs that contain block elements" do
33
- assert_equal "div", @doc.html.css("#contains_blockquote").first.name
34
- end
35
- end
36
-
37
- context "score_node" do
38
- setup do
39
- @doc = Readability::Document.new(<<-HTML)
40
- <html>
41
- <body>
42
- <div id='elem1'>
43
- <p>some content</p>
44
- </div>
45
- <th id='elem2'>
46
- <p>some other content</p>
47
- </th>
48
- </body>
49
- </html>
50
- HTML
51
- @elem1 = @doc.html.css("#elem1").first
52
- @elem2 = @doc.html.css("#elem2").first
53
- end
54
-
55
- should "like <div>s more than <th>s" do
56
- assert @doc.score_node(@elem1)[:content_score] > @doc.score_node(@elem2)[:content_score]
57
- end
58
-
59
- should "like classes like text more than classes like comment" do
60
- @elem2.name = "div"
61
- assert_equal @doc.score_node(@elem2)[:content_score], @doc.score_node(@elem1)[:content_score]
62
- @elem1['class'] = "text"
63
- @elem2['class'] = "comment"
64
- assert @doc.score_node(@elem1)[:content_score] > @doc.score_node(@elem2)[:content_score]
65
- end
66
- end
67
-
68
- context "remove_unlikely_candidates!" do
69
- setup do
70
- @doc = Readability::Document.new(@simple_html_fixture)
71
- @doc.remove_unlikely_candidates!
72
- end
73
-
74
- should "remove things that have class comment" do
75
- assert @doc.html.inner_html !~ /a comment/
76
- end
77
-
78
- should "not remove body tags" do
79
- assert @doc.html.inner_html =~ /<\/body>/
80
- end
81
-
82
- should "not remove things with class comment and id body" do
83
- assert @doc.html.inner_html =~ /real content/
84
- end
85
- end
86
-
87
- context "score_paragraphs" do
88
- setup do
89
- @doc = Readability::Document.new(%{
90
- <html>
91
- <head>
92
- <title>title!</title>
93
- </head>
94
- <body id="body">
95
- <div id="div1">
96
- <div id="div2>
97
- <p id="some_comment">a comment</p>
98
- </div>
99
- <p id="some_text">some text</p>
100
- </div>
101
- <div id="div3">
102
- <p id="some_text2">some more text</p>
103
- </div>
104
- </body>
105
- </html>
106
- })
107
- @candidates = @doc.score_paragraphs(0)
108
- end
109
-
110
- should "score elements in the document" do
111
- assert_equal 3, @candidates.values.length
112
- end
113
-
114
- should "prefer the body in this particular example" do
115
- assert_equal "body", @candidates.values.sort { |a, b|
116
- b[:content_score] <=> a[:content_score]
117
- }.first[:elem][:id]
118
- end
119
- end
120
-
121
- context "the cant_read.html fixture" do
122
- should "work on the cant_read.html fixture with some allowed tags" do
123
- allowed_tags = %w[div span table tr td p i strong u h1 h2 h3 h4 pre code br a]
124
- allowed_attributes = %w[href]
125
- html = File.read(HTML_DIRECTORY + "/cant_read.html")
126
- assert Readability::Document.new(html, :tags => allowed_tags, :attributes => allowed_attributes).content.match(/Can you talk a little about how you developed the looks for the/)
127
- end
128
- end
129
-
130
- context "general functionality" do
131
- setup do
132
- @doc = Readability::Document.new("<html><head><title>title!</title></head><body><div><p>Some content</p></div></body>",
133
- :min_text_length => 0, :retry_length => 1)
134
- end
135
-
136
- should "return the main page content" do
137
- assert @doc.content.match("Some content")
138
- end
139
- end
140
-
141
- context "ignoring sidebars" do
142
- setup do
143
- @doc = Readability::Document.new("<html><head><title>title!</title></head><body><div><p>Some content</p></div><div class='sidebar'><p>sidebar<p></div></body>",
144
- :min_text_length => 0, :retry_length => 1)
145
- end
146
-
147
- should "not return the sidebar" do
148
- assert !@doc.content.match("sidebar")
149
- end
150
- end
151
- end
152
- end