ruby-readability 0.7.0 → 0.7.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.github/workflows/ruby.yml +25 -0
- data/.rspec +1 -1
- data/README.md +2 -5
- data/lib/readability.rb +9 -4
- data/ruby-readability.gemspec +1 -4
- data/spec/fixtures/codinghorror.html +189 -0
- data/spec/fixtures/images/Confusion_of_Tongues.png +0 -0
- data/spec/fixtures/images/JohnPinhole.jpg +0 -0
- data/spec/fixtures/nested_images.html +11 -0
- data/spec/readability_spec.rb +254 -99
- data/spec/spec_helper.rb +0 -6
- metadata +28 -35
- data/.travis.yml +0 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 906a25fd00e8fc221c84aa41fedf38bbd3045aa0e4a543ff16a1d494e59c3a92
|
4
|
+
data.tar.gz: bf28e458f7fb7f87a49ea71f16e736191c53130b91bdf2203cf260e6dce99aee
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e2d262b6c4f0d7a2146718d3e16c0dd8973b217a9fe0ba850d03a456c68b7bd4355cbdd0a78454b09f6f50717c87ac8da524d42d99e78e0f362830c554376fdd
|
7
|
+
data.tar.gz: 6306f195c8d40842c0a4ed8ab2cfab1648fc562b03ba3137a0fd8c68ecb7a3668357c83abefd2b76bcac06efc961cdd042be10f44760aa102e34cdce2fe5d6d4
|
@@ -0,0 +1,25 @@
|
|
1
|
+
name: Ruby
|
2
|
+
|
3
|
+
on:
|
4
|
+
push:
|
5
|
+
branches: [ master ]
|
6
|
+
pull_request:
|
7
|
+
branches: [ master ]
|
8
|
+
|
9
|
+
jobs:
|
10
|
+
test:
|
11
|
+
|
12
|
+
runs-on: ubuntu-latest
|
13
|
+
strategy:
|
14
|
+
matrix:
|
15
|
+
ruby-version: ['2.7']
|
16
|
+
|
17
|
+
steps:
|
18
|
+
- uses: actions/checkout@v2
|
19
|
+
- name: Set up Ruby
|
20
|
+
uses: ruby/setup-ruby@v1
|
21
|
+
with:
|
22
|
+
ruby-version: ${{ matrix.ruby-version }}
|
23
|
+
bundler-cache: true # runs 'bundle install' and caches installed gems automatically
|
24
|
+
- name: Run tests
|
25
|
+
run: bundle exec rspec
|
data/.rspec
CHANGED
@@ -1,2 +1,2 @@
|
|
1
1
|
--colour
|
2
|
-
--format
|
2
|
+
--format documentation -c
|
data/README.md
CHANGED
@@ -7,7 +7,7 @@ webpage. It is a Ruby port of arc90's readability project.
|
|
7
7
|
Build Status
|
8
8
|
------------
|
9
9
|
|
10
|
-
[![
|
10
|
+
[![Ruby](https://github.com/cantino/ruby-readability/actions/workflows/ruby.yml/badge.svg?branch=master)](https://github.com/cantino/ruby-readability/actions/workflows/ruby.yml)
|
11
11
|
|
12
12
|
Install
|
13
13
|
-------
|
@@ -78,6 +78,7 @@ feature requires that the `fastimage` gem be installed.
|
|
78
78
|
Related Projects
|
79
79
|
----------------
|
80
80
|
|
81
|
+
* [readability.cr](https://github.com/joenas/readability.cr) - Port of ruby-readability's port of arc90's readability project to Crystal
|
81
82
|
* [newspaper](https://github.com/codelucas/newspaper) is an advanced news extraction, article extraction, and content curation library for Python.
|
82
83
|
|
83
84
|
Potential Issues
|
@@ -102,7 +103,3 @@ License
|
|
102
103
|
This code is under the Apache License 2.0. See <http://www.apache.org/licenses/LICENSE-2.0>.
|
103
104
|
|
104
105
|
Ruby port by cantino, starrhorne, libc, and iterationlabs. Special thanks to fizx and marcosinger.
|
105
|
-
|
106
|
-
|
107
|
-
[![Bitdeli Badge](https://d2weczhvl823v0.cloudfront.net/cantino/ruby-readability/trend.png)](https://bitdeli.com/free "Bitdeli Badge")
|
108
|
-
|
data/lib/readability.rb
CHANGED
@@ -17,7 +17,9 @@ module Readability
|
|
17
17
|
:min_image_height => 80,
|
18
18
|
:ignore_image_format => [],
|
19
19
|
:blacklist => nil,
|
20
|
-
:whitelist => nil
|
20
|
+
:whitelist => nil,
|
21
|
+
:elements_to_score => ["p", "td", "pre"],
|
22
|
+
:likely_siblings => ["p"]
|
21
23
|
}.freeze
|
22
24
|
|
23
25
|
REGEXES = {
|
@@ -260,13 +262,14 @@ module Readability
|
|
260
262
|
# Things like preambles, content split by ads that we removed, etc.
|
261
263
|
|
262
264
|
sibling_score_threshold = [10, best_candidate[:content_score] * 0.2].max
|
265
|
+
downcased_likely_siblings = options[:likely_siblings].map(&:downcase)
|
263
266
|
output = Nokogiri::XML::Node.new('div', @html)
|
264
267
|
best_candidate[:elem].parent.children.each do |sibling|
|
265
268
|
append = false
|
266
269
|
append = true if sibling == best_candidate[:elem]
|
267
270
|
append = true if candidates[sibling] && candidates[sibling][:content_score] >= sibling_score_threshold
|
268
271
|
|
269
|
-
if sibling.name.downcase
|
272
|
+
if downcased_likely_siblings.include?(sibling.name.downcase)
|
270
273
|
link_density = get_link_density(sibling)
|
271
274
|
node_content = sibling.text
|
272
275
|
node_length = node_content.length
|
@@ -310,7 +313,7 @@ module Readability
|
|
310
313
|
|
311
314
|
def score_paragraphs(min_text_length)
|
312
315
|
candidates = {}
|
313
|
-
@html.css(
|
316
|
+
@html.css(options[:elements_to_score].join(',')).each do |elem|
|
314
317
|
parent_node = elem.parent
|
315
318
|
grand_parent_node = parent_node.respond_to?(:parent) ? parent_node.parent : nil
|
316
319
|
inner_text = elem.text
|
@@ -423,6 +426,8 @@ module Readability
|
|
423
426
|
|
424
427
|
# We'll sanitize all elements using a whitelist
|
425
428
|
base_whitelist = @options[:tags] || %w[div p]
|
429
|
+
all_whitelisted = base_whitelist.include?("*")
|
430
|
+
|
426
431
|
# We'll add whitespace instead of block elements,
|
427
432
|
# so a<br>b will have a nice space between them
|
428
433
|
base_replace_with_whitespace = %w[br hr h1 h2 h3 h4 h5 h6 dl dd ol li ul address blockquote center]
|
@@ -435,7 +440,7 @@ module Readability
|
|
435
440
|
|
436
441
|
([node] + node.css("*")).each do |el|
|
437
442
|
# If element is in whitelist, delete all its attributes
|
438
|
-
if whitelist[el.node_name]
|
443
|
+
if all_whitelisted || whitelist[el.node_name]
|
439
444
|
el.attributes.each { |a, x| el.delete(a) unless @options[:attributes] && @options[:attributes].include?(a.to_s) }
|
440
445
|
|
441
446
|
# Otherwise, replace the element with its contents
|
data/ruby-readability.gemspec
CHANGED
@@ -3,15 +3,13 @@ $:.push File.expand_path("../lib", __FILE__)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = "ruby-readability"
|
6
|
-
s.version = '0.7.
|
6
|
+
s.version = '0.7.1'
|
7
7
|
s.authors = ["Andrew Cantino", "starrhorne", "libc", "Kyle Maxwell"]
|
8
8
|
s.email = ["andrew@iterationlabs.com"]
|
9
9
|
s.homepage = "http://github.com/cantino/ruby-readability"
|
10
10
|
s.summary = %q{Port of arc90's readability project to ruby}
|
11
11
|
s.description = %q{Port of arc90's readability project to ruby}
|
12
12
|
|
13
|
-
s.rubyforge_project = "ruby-readability"
|
14
|
-
|
15
13
|
s.files = `git ls-files`.split("\n")
|
16
14
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
17
15
|
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
@@ -19,7 +17,6 @@ Gem::Specification.new do |s|
|
|
19
17
|
|
20
18
|
s.add_development_dependency "rspec", ">= 2.8"
|
21
19
|
s.add_development_dependency "rspec-expectations", ">= 2.8"
|
22
|
-
s.add_development_dependency "rr", ">= 1.0"
|
23
20
|
s.add_dependency 'nokogiri', '>= 1.6.0'
|
24
21
|
s.add_dependency 'guess_html_encoding', '>= 0.0.4'
|
25
22
|
end
|
@@ -0,0 +1,189 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<meta charset="utf-8"/>
|
5
|
+
<title>Standard Flavored Markdown</title>
|
6
|
+
<meta name="description" content=""/>
|
7
|
+
<meta name="HandheldFriendly" content="True"/>
|
8
|
+
<meta name="MobileOptimized" content="320"/>
|
9
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
|
10
|
+
<link rel="shortcut icon" href="/assets/images/favicon.ico?v=8684b6a35e">
|
11
|
+
<link rel="apple-touch-icon" href="/assets/images/codinghorror-app-icon.png?v=8684b6a35e">
|
12
|
+
<meta name="google-site-verification" content="sl0m9SU_4V0JcvjWlOX4dUFBR6VS2P4tlxjJMo0gphU"/>
|
13
|
+
<link rel="stylesheet" type="text/css" href="/assets/css/screen.css?v=8684b6a35e"/>
|
14
|
+
<link rel="stylesheet" type="text/css" href="//fonts.googleapis.com/css?family=Open+Sans:400italic,700italic,400,700"/>
|
15
|
+
<link rel="alternate" type="application/rss+xml" title="Coding Horror" href="http://feeds.feedburner.com/codinghorror">
|
16
|
+
</head>
|
17
|
+
<body class="post-template">
|
18
|
+
<header class="site-head">
|
19
|
+
<div class="site-head-content">
|
20
|
+
<a class="blog-logo" href="http://blog.codinghorror.com"><img src="/assets/images/codinghorror-app-icon.png?v=8684b6a35e" alt="Coding Horror Logo" width="158" height="158"/></a>
|
21
|
+
<h1 class="blog-title"><a href="http://blog.codinghorror.com">Coding Horror</a></h1>
|
22
|
+
<h2 class="blog-description">programming and human factors</h2>
|
23
|
+
<div class="site-search">
|
24
|
+
<script>
|
25
|
+
(function() {
|
26
|
+
var cx = '016956275695630057531:lqveu9tah7y';
|
27
|
+
var gcse = document.createElement('script');
|
28
|
+
gcse.type = 'text/javascript';
|
29
|
+
gcse.async = true;
|
30
|
+
gcse.src = (document.location.protocol == 'https:' ? 'https:' : 'http:') + '//www.google.com/cse/cse.js?cx=' + cx;
|
31
|
+
var s = document.getElementsByTagName('script')[0];
|
32
|
+
s.parentNode.insertBefore(gcse, s);
|
33
|
+
})();
|
34
|
+
</script>
|
35
|
+
<gcse:search></gcse:search>
|
36
|
+
</div>
|
37
|
+
</div>
|
38
|
+
</header>
|
39
|
+
<div class="wrap clearfix">
|
40
|
+
<div class="clearfix"></div>
|
41
|
+
<main class="content" role="main">
|
42
|
+
<article class="post">
|
43
|
+
<span class="post-meta"><time datetime="2014-09-03">03 Sep 2014</time> </span>
|
44
|
+
<h1 class="post-title">Standard Flavored Markdown</h1>
|
45
|
+
<section class="post-content">
|
46
|
+
<p>In 2009 I <a href="http://blog.codinghorror.com/responsible-open-source-code-parenting/">lamented the state of Markdown</a>:</p>
|
47
|
+
<blockquote>
|
48
|
+
<p>Right now we have the worst of both worlds. Lack of leadership from the top, and a bunch of fragmented, poorly coordinated community efforts to advance Markdown, none of which are officially canon. This isn't merely incovenient for anyone trying to find accurate information about Markdown; it's actually harming the project's future. </p>
|
49
|
+
</blockquote>
|
50
|
+
<p>In late 2012, David Greenspan from <a href="https://www.meteor.com/">Meteor</a> approached me and proposed we move forward, and <a href="http://blog.codinghorror.com/the-future-of-markdown/">a project crystallized</a>:</p>
|
51
|
+
<blockquote>
|
52
|
+
<p>I propose that Stack Exchange, GitHub, Meteor, Reddit, and any other company with lots of traffic and a strategic investment in Markdown, all work together to <strong>come up with an official Markdown specification, and standard test suites to validate Markdown implementations</strong>. We've all been working at cross purposes for too long, accidentally fragmenting Markdown while popularizing it.</p>
|
53
|
+
</blockquote>
|
54
|
+
<p>We formed a small private working group with key representatives from GitHub, from Reddit, from Stack Exchange, from the open source community. We spent months hashing out the details and agreeing on the necessary changes to turn Markdown into a language you can parse without feeling like you just walked through a sewer – while preserving the simple, clear, ASCII email inspired spirit of Markdown.</p>
|
55
|
+
<p>We really struggled with this at <a href="http://www.discourse.org">Discourse</a>, which is also based on Markdown, but an even more complex dialect than the one we built at Stack Overflow. In Discourse, you can mix <em>three</em> forms of markup interchangeably:</p>
|
56
|
+
<ul>
|
57
|
+
<li>Markdown</li>
|
58
|
+
<li>HTML (safe subset)</li>
|
59
|
+
<li>BBCode (subset)</li>
|
60
|
+
</ul>
|
61
|
+
<p>Discourse is primarily a JavaScript app, so naturally we needed a nice, compliant implementation of Markdown in JavaScript. Surely such a thing exists, yes? Nope. Even in 2012, we found <em>zero</em> JavaScript implementations of Markdown that could pass the only Markdown test suite I know of, <a href="https://github.com/michelf/mdtest/">MDTest</a>. It isn't authoritative, it's a community created initiative that embodies its own decisions about rendering ambiguities in Markdown, but it's all we've got. We contributed many <a href="https://github.com/evilstreak/markdown-js/commits/master">upstream fixes to markdown.js</a> to make it pass MDTest – but it still only passes in our locally extended version.</p>
|
62
|
+
<p>As an open source project ourselves, we're perfectly happy contributing upstream code to improve it for everyone. But it's an indictment of the state of the Markdown ecosystem that any remotely popular implementation wasn't already testing itself against a formal spec and test suite. But who can blame them, because <i>it didn't exist!</i></p>
|
63
|
+
<p>Well, now it does.</p>
|
64
|
+
<p>It took a while, but I'm pleased to announce that <a href="http://standardmarkdown.com"><strong>Standard Markdown</strong></a> is now finally ready for public review.</p>
|
65
|
+
<p><strong><a href="http://standardmarkdown.com">standardmarkdown.com</a></strong></p>
|
66
|
+
<p>It's a spec, including embedded examples, and implementations in portable C and JavaScript. We strived mightily to stay true to the spirit of Markdown in writing it. The primary author, John MacFarlane, <a href="http://spec.standardmarkdown.com">explains in the introduction to the spec</a>:</p>
|
67
|
+
<blockquote>
|
68
|
+
<p>Because Gruber’s syntax description leaves many aspects of the syntax undetermined, writing a precise spec requires making a large number of decisions, many of them somewhat arbitrary. In making them, I have appealed to existing conventions and considerations of simplicity, readability, expressive power, and consistency. I have tried to ensure that “normal” documents in the many incompatible existing implementations of markdown will render, as far as possible, as their authors intended. And I have tried to make the rules for different elements work together harmoniously. In places where different decisions could have been made (for example, the rules governing list indentation), I have explained the rationale for my choices. In a few cases, I have departed slightly from the canonical syntax description, in ways that I think further the goals of markdown as stated in that description.</p>
|
69
|
+
</blockquote>
|
70
|
+
<p>Part of my contribution to the project is to host the discussion / mailing list for Standard Markdown in a Discourse instance. </p>
|
71
|
+
<p><strong><a href="http://talk.standardmarkdown.com">talk.standardmarkdown.com</a></strong></p>
|
72
|
+
<p>Fortunately, Discourse itself <a href="http://blog.discourse.org/2014/08/introducing-discourse-1-0/">just reached version 1.0</a>. If the only thing Standard Markdown does is help save a few users from the continuing horror that is mailing list web UI, we all win.</p>
|
73
|
+
<p>What I'm most excited about is that we got a massive contribution from the one person who, in my mind, was the most perfect person in the world to work on this project: <a href="http://johnmacfarlane.net/">John MacFarlane</a>. He took our feedback and wrote the entire Standard Markdown spec and both implementations.</p>
|
74
|
+
<p><a href="http://johnmacfarlane.net/"><img src="/content/images/2014/Sep/JohnPinhole.jpg" alt="" title=""/></a></p>
|
75
|
+
<p>A lot of people know of John through his <a href="http://johnmacfarlane.net/pandoc/">Pandoc</a> project, which is amazing in its own right, but I found out about him because he built <a href="http://johnmacfarlane.net/babelmark2/faq.html">Babelmark</a>. I learned to refer to Babelmark extensively while working on Stack Overflow and MarkdownSharp, a C# implementation of Markdown.</p>
|
76
|
+
<p>Here's how crazy Markdown is: to decide what the "correct" behavior is, you provide sample Markdown input to 20+ different Markdown parsers … and then pray that some consensus emerges in all their output. That's what Babelmark does.</p>
|
77
|
+
<p>Consider this simple Markdown example:</p>
|
78
|
+
<pre><code># Hello there
|
79
|
+
|
80
|
+
This is a paragraph.
|
81
|
+
|
82
|
+
- one
|
83
|
+
- two
|
84
|
+
- three
|
85
|
+
- four
|
86
|
+
|
87
|
+
1. pirate
|
88
|
+
2. ninja
|
89
|
+
3. zombie
|
90
|
+
</code></pre>
|
91
|
+
<p>Just for that, I count <a href="http://johnmacfarlane.net/babelmark2/?text=%23+Hello+there%0A%0AThis+is+a+paragraph.%0A%0A-+one%0A-+two%0A-+three%0A-+four%0A%0A1.+pirate%0A2.+ninja%0A3.+zombie"><em>fifteen</em> different rendered outputs</a> from 22 different Markdown parsers.</p>
|
92
|
+
<p><a href="http://en.wikipedia.org/wiki/Tower_of_Babel"><img src="/content/images/2014/Sep/Confusion_of_Tongues.png" alt="" title=""/></a></p>
|
93
|
+
<p>In Markdown, we <em>literally</em> built a <a href="http://en.wikipedia.org/wiki/Tower_of_Babel">Tower of Babel</a>. </p>
|
94
|
+
<p>Have I mentioned that it's a good idea for a language to have a formal specification and test suites? Maybe now you can see why that is.</p>
|
95
|
+
<p>Oh, and in his spare time, John is also the chair of the department of philosophy at the University of California, Berkeley. <em>No big deal.</em> While I don't mean to minimize the contributions of anyone to the Standard Markdown project, we all owe a special thanks to John.</p>
|
96
|
+
<p>Markdown is indeed everywhere. And that's a good thing. But it needs to be sane, parseable, and standard. That's the goal of <a href="http://standardmarkdown.com/">Standard Markdown</a> — but we need your help to get there. If you use Markdown on a website, <strong>ask what it would take for that site to become compatible with Standard Markdown</strong>; when you see the word "Markdown" you have the right to expect consistent rendering across all the websites you visit. If you implement Markdown, <a href="http://spec.standardmarkdown.com">take a look at the spec</a>, try to <strong>make your parser compatible with Standard Markdown</strong>, and <a href="http://talk.standardmarkdown.com">discuss improvements or refinements</a> to the spec.</p>
|
97
|
+
<p><span style="color:red;">Update:</span> The project was renamed <a href="http://commonmark.org">CommonMark</a>. See <a href="http://blog.codinghorror.com/standard-markdown-is-now-common-markdown/">my subsequent blog post</a>.</p>
|
98
|
+
<table>
|
99
|
+
<tr><td class="welovecodinghorror">
|
100
|
+
[advertisement] How are you showing off your awesome? Create a <a href="http://careers.stackoverflow.com/cv" rel="nofollow">Stack Overflow Careers profile</a> and show off all of your hard work from Stack Overflow, Github, and virtually every other coding site. Who knows, you might even get recruited for a great <a href="http://careers.stackoverflow.com/jobs" rel="nofollow">new position</a>!
|
101
|
+
</td></tr>
|
102
|
+
</table>
|
103
|
+
</section>
|
104
|
+
<footer class="post-footer">
|
105
|
+
<section class="author">
|
106
|
+
<h4>Written by Jeff Atwood</h4>
|
107
|
+
<p>Indoor enthusiast. Co-founder of Stack Exchange and Discourse. Disclaimer: I have no idea what I'm talking about. Find me here: <a href="http://twitter.com/codinghorror">http://twitter.com/codinghorror</a></p>
|
108
|
+
</section>
|
109
|
+
</footer>
|
110
|
+
<div id="nrelate_related_placeholder"></div> <script async id="nrelate_loader_script" type="text/javascript" src="http://static.nrelate.com/common_js/0.52.1/loader.min.js"></script>
|
111
|
+
</article>
|
112
|
+
<div id="discourse-comments"></div>
|
113
|
+
<script type="text/javascript">
|
114
|
+
var discourseUrl = "http://discourse.codinghorror.com/",
|
115
|
+
discourseEmbedUrl = 'http://blog.codinghorror.com/standard-flavored-markdown/';
|
116
|
+
|
117
|
+
(function() {
|
118
|
+
var d = document.createElement('script'); d.type = 'text/javascript'; d.async = true;
|
119
|
+
d.src = discourseUrl + 'javascripts/embed.js';
|
120
|
+
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(d);
|
121
|
+
})();
|
122
|
+
</script>
|
123
|
+
</main>
|
124
|
+
<aside class="sidebar">
|
125
|
+
|
126
|
+
<div id="carbonads-container"><div class="carbonad"><div id="azcarbon"></div><script type="text/javascript">var z = document.createElement("script"); z.type = "text/javascript"; z.async = true; z.src = "http://engine.carbonads.com/z/56742/azcarbon_2_1_0_VERT"; var s = document.getElementsByTagName("script")[0]; s.parentNode.insertBefore(z, s);</script></div></div>
|
127
|
+
<div id="hireme" class="hireme codinghorror" style="min-height: 220px; margin-bottom: 15px;"></div>
|
128
|
+
<script>
|
129
|
+
setTimeout(function () {
|
130
|
+
var a = document.createElement("script");
|
131
|
+
var b = document.getElementsByTagName('script')[0];
|
132
|
+
a.src = "http://careers.stackoverflow.com/ad/js";
|
133
|
+
a.async = true;
|
134
|
+
a.type = "text/javascript";
|
135
|
+
b.parentNode.insertBefore(a, b);
|
136
|
+
}, 5);
|
137
|
+
</script>
|
138
|
+
<div class="welovecodinghorror" style="margin-bottom:15px">
|
139
|
+
[ad] Enjoy the blog? Read <b><a href="http://www.hyperink.com/Effective-Programming-More-Than-Writing-Code-b1559">Effective Programming: More than Writing Code</a></b> and <b><a href="http://www.hyperink.com/How-To-Stop-Sucking-And-Be-Awesome-Instead-b9A74B5CBA6">How to Stop Sucking and Be Awesome Instead</a></b> on your Kindle, iPad, Nook, or as a PDF.
|
140
|
+
</div>
|
141
|
+
<h3>Resources</h3>
|
142
|
+
<ul>
|
143
|
+
<li><a href="/about-me/">About Me</a></li>
|
144
|
+
<li><a href="http://twitter.com/codinghorror">@codinghorror</a></li>
|
145
|
+
<li><a href="http://www.discourse.org/">discourse.org</a></li>
|
146
|
+
<li><a href="http://stackexchange.com/">stackexchange.com</a></li>
|
147
|
+
<li><a href="/recommended-reading-for-developers/">Recommended Reading</a></li>
|
148
|
+
</ul>
|
149
|
+
<ul>
|
150
|
+
<li><a href="http://feeds.feedburner.com/codinghorror" class="icon-feed"> Subscribe in a reader</a></li>
|
151
|
+
<li><a href="http://feedburner.google.com/fb/a/mailverify?uri=codinghorror&loc=en_US" class="icon-email"> Subscribe via email</a></li>
|
152
|
+
</ul>
|
153
|
+
<p>Coding Horror has been continuously published since 2004</p>
|
154
|
+
<ul>
|
155
|
+
<li><img src="http://feeds.feedburner.com/~fc/codinghorror?bg=EEEEEE&fg=111111&anim=0" height="26" width="88" style="border:0" alt="Count of RSS readers"></li>
|
156
|
+
<li><a href="http://my.statcounter.com/project/standard/stats.php?project_id=2600027&guest=1">Traffic Stats</a></li>
|
157
|
+
</ul>
|
158
|
+
<footer class="site-footer">
|
159
|
+
<section class="copyright">Copyright <a rel="author" href="https://profiles.google.com/codinghorror1">Jeff Atwood</a> © 2014<br/>
|
160
|
+
Logo image © 1993 Steven C. McConnell <br/>
|
161
|
+
Proudly published with <a class="icon-ghost" href="http://ghost.org">Ghost</a></section>
|
162
|
+
</footer></aside>
|
163
|
+
</div>
|
164
|
+
<script src="/public/jquery.min.js?v=8684b6a35e"></script>
|
165
|
+
<script type="text/javascript" src="/assets/js/jquery.fitvids.js?v=8684b6a35e"></script>
|
166
|
+
<script type="text/javascript" src="/assets/js/index.js?v=8684b6a35e"></script>
|
167
|
+
<script async src="http://www.statcounter.com/counter/counter.js"></script>
|
168
|
+
<noscript><a href="http://www.statcounter.com/"><img src="http://c26.statcounter.com/counter.php?sc_project=2600027&java=0&security=dcff5548&invisible=0" alt="web metrics"></a> </noscript>
|
169
|
+
|
170
|
+
<script>
|
171
|
+
document.write(unescape("%3Cscript src='" + (document.location.protocol == "https:" ? "https://sb" : "http://b") + ".scorecardresearch.com/beacon.js'%3E%3C/script%3E"));
|
172
|
+
</script>
|
173
|
+
<script>
|
174
|
+
COMSCORE.beacon({
|
175
|
+
c1: 2,
|
176
|
+
c2: "6035669",
|
177
|
+
c3: "",
|
178
|
+
c4: "http://www.codinghorror.com/blog/",
|
179
|
+
c5: "",
|
180
|
+
c6: "",
|
181
|
+
c15: ""
|
182
|
+
});
|
183
|
+
</script>
|
184
|
+
<noscript>
|
185
|
+
<img src="http://b.scorecardresearch.com/b?c1=2&c2=6035669&c3=&c4=http%3A%2F%2Fwww.codinghorror.com%2Fblog%2F&c5=&c6=&c15=&cv=1.3&cj=1" style="display:none" width="0" height="0" alt=""/>
|
186
|
+
</noscript>
|
187
|
+
<img src="/view.gif?page=/standard-flavored-markdown/" alt="" style="display:none" hidden />
|
188
|
+
</body>
|
189
|
+
</html>
|
Binary file
|
Binary file
|
data/spec/readability_spec.rb
CHANGED
@@ -19,7 +19,7 @@ describe Readability do
|
|
19
19
|
</body>
|
20
20
|
</html>
|
21
21
|
HTML
|
22
|
-
|
22
|
+
|
23
23
|
@simple_html_with_img_no_text = <<-HTML
|
24
24
|
<html>
|
25
25
|
<head>
|
@@ -32,7 +32,7 @@ describe Readability do
|
|
32
32
|
</body>
|
33
33
|
</html>
|
34
34
|
HTML
|
35
|
-
|
35
|
+
|
36
36
|
@simple_html_with_img_in_noscript = <<-HTML
|
37
37
|
<html>
|
38
38
|
<head>
|
@@ -40,8 +40,8 @@ describe Readability do
|
|
40
40
|
</head>
|
41
41
|
<body class='main'>
|
42
42
|
<div class="article-img">
|
43
|
-
<img src="http://img.thesun.co.uk/multimedia/archive/00703/sign_up_emails_682__703711a.gif" width="660"
|
44
|
-
height="317" alt="test" class="lazy"
|
43
|
+
<img src="http://img.thesun.co.uk/multimedia/archive/00703/sign_up_emails_682__703711a.gif" width="660"
|
44
|
+
height="317" alt="test" class="lazy"
|
45
45
|
data-original="http://img.thesun.co.uk/multimedia/archive/01416/dim_1416768a.jpg">
|
46
46
|
<noscript><img src="http://img.thesun.co.uk/multimedia/archive/01416/dim_1416768a.jpg"></noscript>
|
47
47
|
</div>
|
@@ -54,30 +54,65 @@ describe Readability do
|
|
54
54
|
before do
|
55
55
|
@bbc = File.read(File.dirname(__FILE__) + "/fixtures/bbc.html")
|
56
56
|
@nytimes = File.read(File.dirname(__FILE__) + "/fixtures/nytimes.html")
|
57
|
-
@
|
57
|
+
@thesun = File.read(File.dirname(__FILE__) + "/fixtures/thesun.html")
|
58
|
+
@ch = File.read(File.dirname(__FILE__) + "/fixtures/codinghorror.html")
|
59
|
+
@nested = File.read(File.dirname(__FILE__) + "/fixtures/nested_images.html")
|
58
60
|
|
59
61
|
FakeWeb::Registry.instance.clean_registry
|
62
|
+
|
60
63
|
FakeWeb.register_uri(:get, "http://img.thesun.co.uk/multimedia/archive/01416/dim_1416768a.jpg",
|
61
64
|
:body => File.read(File.dirname(__FILE__) + "/fixtures/images/dim_1416768a.jpg"))
|
62
|
-
|
65
|
+
|
63
66
|
FakeWeb.register_uri(:get, "http://img.thesun.co.uk/multimedia/archive/00703/sign_up_emails_682__703711a.gif",
|
64
67
|
:body => File.read(File.dirname(__FILE__) + "/fixtures/images/sign_up_emails_682__703711a.gif"))
|
65
|
-
|
66
|
-
FakeWeb.register_uri(:get, "http://img.thesun.co.uk/multimedia/archive/00703/sign_up_emails_682__703712a.gif",
|
68
|
+
|
69
|
+
FakeWeb.register_uri(:get, "http://img.thesun.co.uk/multimedia/archive/00703/sign_up_emails_682__703712a.gif",
|
67
70
|
:body => File.read(File.dirname(__FILE__) + "/fixtures/images/sign_up_emails_682__703712a.gif"))
|
68
|
-
|
71
|
+
|
72
|
+
# Register images for codinghorror
|
73
|
+
FakeWeb.register_uri(:get, 'http://blog.codinghorror.com/content/images/2014/Sep/JohnPinhole.jpg',
|
74
|
+
:body => File.read(File.dirname(__FILE__) + "/fixtures/images/JohnPinhole.jpg"))
|
75
|
+
FakeWeb.register_uri(:get, 'http://blog.codinghorror.com/content/images/2014/Sep/Confusion_of_Tongues.png',
|
76
|
+
:body => File.read(File.dirname(__FILE__) + "/fixtures/images/Confusion_of_Tongues.png"))
|
69
77
|
end
|
70
78
|
|
71
79
|
it "should show one image, but outside of the best candidate" do
|
72
|
-
@doc = Readability::Document.new(@
|
73
|
-
@doc.images.
|
74
|
-
@doc.best_candidate_has_image.
|
80
|
+
@doc = Readability::Document.new(@thesun)
|
81
|
+
expect(@doc.images).to eq(["http://img.thesun.co.uk/multimedia/archive/01416/dim_1416768a.jpg", "http://img.thesun.co.uk/multimedia/archive/00703/sign_up_emails_682__703711a.gif", "http://img.thesun.co.uk/multimedia/archive/00703/sign_up_emails_682__703712a.gif"])
|
82
|
+
expect(@doc.best_candidate_has_image).to eq(false)
|
75
83
|
end
|
76
84
|
|
77
85
|
it "should show one image inside of the best candidate" do
|
78
86
|
@doc = Readability::Document.new(@nytimes)
|
79
|
-
@doc.images.
|
80
|
-
@doc.best_candidate_has_image.
|
87
|
+
expect(@doc.images).to eq(["http://graphics8.nytimes.com/images/2011/12/02/opinion/02fixes-freelancersunion/02fixes-freelancersunion-blog427.jpg"])
|
88
|
+
expect(@doc.best_candidate_has_image).to eq(true)
|
89
|
+
end
|
90
|
+
|
91
|
+
it "should expand relative image url" do
|
92
|
+
url = 'http://blog.codinghorror.com/standard-flavored-markdown/'
|
93
|
+
@doc = Readability::Document.new(@ch, tags: %w[div p img a],
|
94
|
+
attributes: %w[src href],
|
95
|
+
remove_empty_nodes: false)
|
96
|
+
@doc.images_with_fqdn_uris!(url)
|
97
|
+
|
98
|
+
expect(@doc.content).to include('http://blog.codinghorror.com/content/images/2014/Sep/JohnPinhole.jpg')
|
99
|
+
expect(@doc.content).to include('http://blog.codinghorror.com/content/images/2014/Sep/Confusion_of_Tongues.png')
|
100
|
+
|
101
|
+
expect(@doc.images).to match_array([
|
102
|
+
'http://blog.codinghorror.com/content/images/2014/Sep/JohnPinhole.jpg',
|
103
|
+
'http://blog.codinghorror.com/content/images/2014/Sep/Confusion_of_Tongues.png'
|
104
|
+
])
|
105
|
+
end
|
106
|
+
|
107
|
+
it "should be able to preserve deeply nested image tags in the article's content by whitelisting all tags" do
|
108
|
+
@doc = Readability::Document.new(@nested, attributes: ["src"])
|
109
|
+
expect(@doc.images).to be_empty
|
110
|
+
|
111
|
+
@doc = Readability::Document.new(@nested, attributes: ["src"], tags: ["figure", "image"])
|
112
|
+
expect(@doc.images).to be_empty
|
113
|
+
|
114
|
+
@doc = Readability::Document.new(@nested, attributes: ["src"], tags: ["*"])
|
115
|
+
expect(@doc.content).to include('<img src="http://example.com/image.jpeg" />')
|
81
116
|
end
|
82
117
|
|
83
118
|
it "should not try to download local images" do
|
@@ -93,69 +128,69 @@ describe Readability do
|
|
93
128
|
</body>
|
94
129
|
</html>
|
95
130
|
HTML
|
96
|
-
|
97
|
-
@doc.images.
|
131
|
+
expect(@doc).not_to receive(:get_image_size)
|
132
|
+
expect(@doc.images).to eq([])
|
98
133
|
end
|
99
134
|
|
100
135
|
describe "no images" do
|
101
136
|
it "shouldn't show images" do
|
102
137
|
@doc = Readability::Document.new(@bbc, :min_image_height => 600)
|
103
|
-
@doc.images.
|
104
|
-
@doc.best_candidate_has_image.
|
138
|
+
expect(@doc.images).to eq([])
|
139
|
+
expect(@doc.best_candidate_has_image).to eq(false)
|
105
140
|
end
|
106
141
|
end
|
107
142
|
|
108
143
|
describe "poll of images" do
|
109
144
|
it "should show some images inside of the best candidate" do
|
110
145
|
@doc = Readability::Document.new(@bbc)
|
111
|
-
@doc.images.
|
146
|
+
expect(@doc.images).to match_array(["http://news.bbcimg.co.uk/media/images/57027000/jpg/_57027794_perseus_getty.jpg",
|
112
147
|
"http://news.bbcimg.co.uk/media/images/57027000/jpg/_57027786_john_capes229_rnsm.jpg",
|
113
148
|
"http://news.bbcimg.co.uk/media/images/57060000/gif/_57060487_sub_escapes304x416.gif",
|
114
|
-
"http://news.bbcimg.co.uk/media/images/57055000/jpg/_57055063_perseus_thoctarides.jpg"]
|
115
|
-
@doc.best_candidate_has_image.
|
149
|
+
"http://news.bbcimg.co.uk/media/images/57055000/jpg/_57055063_perseus_thoctarides.jpg"])
|
150
|
+
expect(@doc.best_candidate_has_image).to eq(true)
|
116
151
|
end
|
117
152
|
|
118
153
|
it "should show some images inside of the best candidate, include gif format" do
|
119
154
|
@doc = Readability::Document.new(@bbc, :ignore_image_format => [])
|
120
|
-
@doc.images.
|
121
|
-
@doc.best_candidate_has_image.
|
155
|
+
expect(@doc.images).to eq(["http://news.bbcimg.co.uk/media/images/57027000/jpg/_57027794_perseus_getty.jpg", "http://news.bbcimg.co.uk/media/images/57027000/jpg/_57027786_john_capes229_rnsm.jpg", "http://news.bbcimg.co.uk/media/images/57060000/gif/_57060487_sub_escapes304x416.gif", "http://news.bbcimg.co.uk/media/images/57055000/jpg/_57055063_perseus_thoctarides.jpg"])
|
156
|
+
expect(@doc.best_candidate_has_image).to eq(true)
|
122
157
|
end
|
123
158
|
|
124
159
|
describe "width, height and format" do
|
125
160
|
it "should show some images inside of the best candidate, but with width most equal to 400px" do
|
126
161
|
@doc = Readability::Document.new(@bbc, :min_image_width => 400, :ignore_image_format => [])
|
127
|
-
@doc.images.
|
128
|
-
@doc.best_candidate_has_image.
|
162
|
+
expect(@doc.images).to eq(["http://news.bbcimg.co.uk/media/images/57027000/jpg/_57027794_perseus_getty.jpg"])
|
163
|
+
expect(@doc.best_candidate_has_image).to eq(true)
|
129
164
|
end
|
130
165
|
|
131
166
|
it "should show some images inside of the best candidate, but with width most equal to 304px" do
|
132
167
|
@doc = Readability::Document.new(@bbc, :min_image_width => 304, :ignore_image_format => [])
|
133
|
-
@doc.images.
|
134
|
-
@doc.best_candidate_has_image.
|
168
|
+
expect(@doc.images).to eq(["http://news.bbcimg.co.uk/media/images/57027000/jpg/_57027794_perseus_getty.jpg", "http://news.bbcimg.co.uk/media/images/57060000/gif/_57060487_sub_escapes304x416.gif", "http://news.bbcimg.co.uk/media/images/57055000/jpg/_57055063_perseus_thoctarides.jpg"])
|
169
|
+
expect(@doc.best_candidate_has_image).to eq(true)
|
135
170
|
end
|
136
171
|
|
137
172
|
it "should show some images inside of the best candidate, but with width most equal to 304px and ignoring JPG format" do
|
138
173
|
@doc = Readability::Document.new(@bbc, :min_image_width => 304, :ignore_image_format => ["jpg"])
|
139
|
-
@doc.images.
|
140
|
-
@doc.best_candidate_has_image.
|
174
|
+
expect(@doc.images).to eq(["http://news.bbcimg.co.uk/media/images/57060000/gif/_57060487_sub_escapes304x416.gif"])
|
175
|
+
expect(@doc.best_candidate_has_image).to eq(true)
|
141
176
|
end
|
142
177
|
|
143
178
|
it "should show some images inside of the best candidate, but with height most equal to 400px, no ignoring no format" do
|
144
179
|
@doc = Readability::Document.new(@bbc, :min_image_height => 400, :ignore_image_format => [])
|
145
|
-
@doc.images.
|
146
|
-
@doc.best_candidate_has_image.
|
180
|
+
expect(@doc.images).to eq(["http://news.bbcimg.co.uk/media/images/57060000/gif/_57060487_sub_escapes304x416.gif"])
|
181
|
+
expect(@doc.best_candidate_has_image).to eq(true)
|
147
182
|
end
|
148
|
-
|
183
|
+
|
149
184
|
it "should not miss an image if it exists by itself in a div without text" do
|
150
185
|
@doc = Readability::Document.new(@simple_html_with_img_no_text,:tags => %w[div p img a], :attributes => %w[src href], :remove_empty_nodes => false, :do_not_guess_encoding => true)
|
151
|
-
@doc.images.
|
186
|
+
expect(@doc.images).to eq(["http://img.thesun.co.uk/multimedia/archive/01416/dim_1416768a.jpg"])
|
152
187
|
end
|
153
|
-
|
188
|
+
|
154
189
|
it "should not double count an image between script and noscript" do
|
155
190
|
@doc = Readability::Document.new(@simple_html_with_img_in_noscript,:tags => %w[div p img a], :attributes => %w[src href], :remove_empty_nodes => false, :do_not_guess_encoding => true)
|
156
|
-
@doc.images.
|
191
|
+
expect(@doc.images).to eq(["http://img.thesun.co.uk/multimedia/archive/00703/sign_up_emails_682__703711a.gif", "http://img.thesun.co.uk/multimedia/archive/01416/dim_1416768a.jpg"])
|
157
192
|
end
|
158
|
-
|
193
|
+
|
159
194
|
end
|
160
195
|
end
|
161
196
|
end
|
@@ -167,11 +202,11 @@ describe Readability do
|
|
167
202
|
end
|
168
203
|
|
169
204
|
it "should transform divs containing no block elements into <p>s" do
|
170
|
-
@doc.html.css("#body").first.name.
|
205
|
+
expect(@doc.html.css("#body").first.name).to eq("p")
|
171
206
|
end
|
172
207
|
|
173
208
|
it "should not transform divs that contain block elements" do
|
174
|
-
@doc.html.css("#contains_blockquote").first.name.
|
209
|
+
expect(@doc.html.css("#contains_blockquote").first.name).to eq("div")
|
175
210
|
end
|
176
211
|
end
|
177
212
|
|
@@ -185,9 +220,9 @@ describe Readability do
|
|
185
220
|
<body></body>
|
186
221
|
</html>
|
187
222
|
HTML
|
188
|
-
doc.author.
|
223
|
+
expect(doc.author).to eql("Austin Fonacier")
|
189
224
|
end
|
190
|
-
|
225
|
+
|
191
226
|
it "should pick up readability's recommended author format" do
|
192
227
|
doc = Readability::Document.new(<<-HTML)
|
193
228
|
<html>
|
@@ -200,9 +235,9 @@ describe Readability do
|
|
200
235
|
</body>
|
201
236
|
</html>
|
202
237
|
HTML
|
203
|
-
doc.author.
|
238
|
+
expect(doc.author).to eql("Austin Fonacier")
|
204
239
|
end
|
205
|
-
|
240
|
+
|
206
241
|
it "should pick up vcard fn" do
|
207
242
|
doc = Readability::Document.new(<<-HTML)
|
208
243
|
<html>
|
@@ -216,9 +251,9 @@ describe Readability do
|
|
216
251
|
</body>
|
217
252
|
</html>
|
218
253
|
HTML
|
219
|
-
doc.author.
|
254
|
+
expect(doc.author).to eql("Austin Fonacier")
|
220
255
|
end
|
221
|
-
|
256
|
+
|
222
257
|
it "should pick up <a rel='author'>" do
|
223
258
|
doc = Readability::Document.new(<<-HTML)
|
224
259
|
<html>
|
@@ -228,9 +263,9 @@ describe Readability do
|
|
228
263
|
</body>
|
229
264
|
</html>
|
230
265
|
HTML
|
231
|
-
doc.author.
|
266
|
+
expect(doc.author).to eql("Danny Banks (rel)")
|
232
267
|
end
|
233
|
-
|
268
|
+
|
234
269
|
it "should pick up <div id='author'>" do
|
235
270
|
doc = Readability::Document.new(<<-HTML)
|
236
271
|
<html>
|
@@ -240,7 +275,7 @@ describe Readability do
|
|
240
275
|
</body>
|
241
276
|
</html>
|
242
277
|
HTML
|
243
|
-
doc.author.
|
278
|
+
expect(doc.author).to eql("Austin Fonacier (author)")
|
244
279
|
end
|
245
280
|
end
|
246
281
|
|
@@ -263,15 +298,15 @@ describe Readability do
|
|
263
298
|
end
|
264
299
|
|
265
300
|
it "should like <div>s more than <th>s" do
|
266
|
-
@doc.score_node(@elem1)[:content_score].
|
301
|
+
expect(@doc.score_node(@elem1)[:content_score]).to be > @doc.score_node(@elem2)[:content_score]
|
267
302
|
end
|
268
303
|
|
269
304
|
it "should like classes like text more than classes like comment" do
|
270
305
|
@elem2.name = "div"
|
271
|
-
@doc.score_node(@elem1)[:content_score].
|
306
|
+
expect(@doc.score_node(@elem1)[:content_score]).to eq(@doc.score_node(@elem2)[:content_score])
|
272
307
|
@elem1['class'] = "text"
|
273
308
|
@elem2['class'] = "comment"
|
274
|
-
@doc.score_node(@elem1)[:content_score].
|
309
|
+
expect(@doc.score_node(@elem1)[:content_score]).to be > @doc.score_node(@elem2)[:content_score]
|
275
310
|
end
|
276
311
|
end
|
277
312
|
|
@@ -282,15 +317,15 @@ describe Readability do
|
|
282
317
|
end
|
283
318
|
|
284
319
|
it "should remove things that have class comment" do
|
285
|
-
@doc.html.inner_html.
|
320
|
+
expect(@doc.html.inner_html).not_to match(/a comment/)
|
286
321
|
end
|
287
322
|
|
288
323
|
it "should not remove body tags" do
|
289
|
-
@doc.html.inner_html.
|
324
|
+
expect(@doc.html.inner_html).to match(/<\/body>/)
|
290
325
|
end
|
291
326
|
|
292
327
|
it "should not remove things with class comment and id body" do
|
293
|
-
@doc.html.inner_html.
|
328
|
+
expect(@doc.html.inner_html).to match(/real content/)
|
294
329
|
end
|
295
330
|
end
|
296
331
|
|
@@ -318,13 +353,13 @@ describe Readability do
|
|
318
353
|
end
|
319
354
|
|
320
355
|
it "should score elements in the document" do
|
321
|
-
@candidates.values.length.
|
356
|
+
expect(@candidates.values.length).to eq(3)
|
322
357
|
end
|
323
358
|
|
324
359
|
it "should prefer the body in this particular example" do
|
325
|
-
@candidates.values.sort { |a, b|
|
360
|
+
expect(@candidates.values.sort { |a, b|
|
326
361
|
b[:content_score] <=> a[:content_score]
|
327
|
-
}.first[:elem][:id].
|
362
|
+
}.first[:elem][:id]).to eq("body")
|
328
363
|
end
|
329
364
|
|
330
365
|
context "when two consequent br tags are used instead of p" do
|
@@ -349,9 +384,129 @@ describe Readability do
|
|
349
384
|
</html>
|
350
385
|
HTML
|
351
386
|
@candidates = @doc.score_paragraphs(0)
|
352
|
-
@candidates.values.sort_by { |a| -a[:content_score] }.first[:elem][:id].
|
387
|
+
expect(@candidates.values.sort_by { |a| -a[:content_score] }.first[:elem][:id]).to eq('post1')
|
353
388
|
end
|
354
389
|
end
|
390
|
+
|
391
|
+
it "does not include short paragraphs as related siblings in the output" do
|
392
|
+
@doc = Readability::Document.new(<<-HTML, min_text_length: 1, elements_to_score: ["h1", "p"])
|
393
|
+
<html>
|
394
|
+
<head>
|
395
|
+
<title>title!</title>
|
396
|
+
</head>
|
397
|
+
<body>
|
398
|
+
<section>
|
399
|
+
<p>Paragraph 1</p>
|
400
|
+
<p>Paragraph 2</p>
|
401
|
+
</section>
|
402
|
+
<section>
|
403
|
+
<p>Too short</p>
|
404
|
+
</section>
|
405
|
+
#{'<a href="/">This link lowers the body score.</a>' * 5}
|
406
|
+
</body>
|
407
|
+
</html>
|
408
|
+
HTML
|
409
|
+
|
410
|
+
expect(@doc.content).to include("Paragraph 1")
|
411
|
+
expect(@doc.content).to include("Paragraph 2")
|
412
|
+
expect(@doc.content).not_to include("Too short")
|
413
|
+
end
|
414
|
+
|
415
|
+
it "includes long paragraphs as related siblings in the output" do
|
416
|
+
@doc = Readability::Document.new(<<-HTML, min_text_length: 1, elements_to_score: ["h1", "p"])
|
417
|
+
<html>
|
418
|
+
<head>
|
419
|
+
<title>title!</title>
|
420
|
+
</head>
|
421
|
+
<body>
|
422
|
+
<section>
|
423
|
+
<p>Paragraph 1</p>
|
424
|
+
<p>Paragraph 2</p>
|
425
|
+
</section>
|
426
|
+
<p>This paragraph is longer than 80 characters so should be included as a sibling in the output.</p>
|
427
|
+
#{'<a href="/">This link lowers the body score.</a>' * 5}
|
428
|
+
</body>
|
429
|
+
</html>
|
430
|
+
HTML
|
431
|
+
|
432
|
+
expect(@doc.content).to include("Paragraph 1")
|
433
|
+
expect(@doc.content).to include("Paragraph 2")
|
434
|
+
expect(@doc.content).to include("This paragraph is longer")
|
435
|
+
end
|
436
|
+
|
437
|
+
it "does not include non-paragraph tags in the output, even when longer than 80 characters" do
|
438
|
+
@doc = Readability::Document.new(<<-HTML, min_text_length: 1, elements_to_score: ["h1", "p"])
|
439
|
+
<html>
|
440
|
+
<head>
|
441
|
+
<title>title!</title>
|
442
|
+
</head>
|
443
|
+
<body>
|
444
|
+
<section>
|
445
|
+
<p>Paragraph 1</p>
|
446
|
+
<p>Paragraph 2</p>
|
447
|
+
</section>
|
448
|
+
<section>
|
449
|
+
<p>Although this paragraph is longer than 80 characters, the sibling is the section so it should not be included.</p>
|
450
|
+
</section>
|
451
|
+
#{'<a href="/">This link lowers the body score.</a>' * 5}
|
452
|
+
</body>
|
453
|
+
</html>
|
454
|
+
HTML
|
455
|
+
|
456
|
+
expect(@doc.content).to include("Paragraph 1")
|
457
|
+
expect(@doc.content).to include("Paragraph 2")
|
458
|
+
expect(@doc.content).not_to include("Although this paragraph")
|
459
|
+
end
|
460
|
+
|
461
|
+
it "does include non-paragraph tags in the output if their content score is high enough" do
|
462
|
+
@doc = Readability::Document.new(<<-HTML, min_text_length: 1, elements_to_score: ["h1", "p"])
|
463
|
+
<html>
|
464
|
+
<head>
|
465
|
+
<title>title!</title>
|
466
|
+
</head>
|
467
|
+
<body>
|
468
|
+
<section>
|
469
|
+
<p>Paragraph 1</p>
|
470
|
+
#{'<p>Paragraph 2</p>' * 10} <!-- Ensure this section remains the best_candidate. -->
|
471
|
+
</section>
|
472
|
+
<section>
|
473
|
+
<p>This should be included in the output because the content is score is high enough.<p>
|
474
|
+
<p>The, inclusion, of, lots, of, commas, increases, the, score, of, an, element.</p>
|
475
|
+
</section>
|
476
|
+
#{'<a href="/">This link lowers the body score.</a>' * 5}
|
477
|
+
</body>
|
478
|
+
</html>
|
479
|
+
HTML
|
480
|
+
|
481
|
+
expect(@doc.content).to include("Paragraph 1")
|
482
|
+
expect(@doc.content).to include("Paragraph 2")
|
483
|
+
expect(@doc.content).to include("This should be included")
|
484
|
+
end
|
485
|
+
|
486
|
+
it "can optionally include other related siblings in the output if they meet the 80 character threshold" do
|
487
|
+
@doc = Readability::Document.new(<<-HTML, min_text_length: 1, elements_to_score: ["h1", "p"], likely_siblings: ["section"])
|
488
|
+
<html>
|
489
|
+
<head>
|
490
|
+
<title>title!</title>
|
491
|
+
</head>
|
492
|
+
<body>
|
493
|
+
<section>
|
494
|
+
<p>Paragraph 1</p>
|
495
|
+
#{'<p>Paragraph 2</p>' * 10} <!-- Ensure this section remains the best_candidate. -->
|
496
|
+
</section>
|
497
|
+
<section>
|
498
|
+
<p>This paragraph is longer than 80 characters and inside a section that is a sibling of the best_candidate.</p>
|
499
|
+
<p>The likely_siblings now include the section tag so it should be included in the output.</p>
|
500
|
+
</section>
|
501
|
+
#{'<a href="/">This link lowers the body score.</a>' * 5}
|
502
|
+
</body>
|
503
|
+
</html>
|
504
|
+
HTML
|
505
|
+
|
506
|
+
expect(@doc.content).to include("Paragraph 1")
|
507
|
+
expect(@doc.content).to include("Paragraph 2")
|
508
|
+
expect(@doc.content).to include("should be included")
|
509
|
+
end
|
355
510
|
end
|
356
511
|
|
357
512
|
describe "the cant_read.html fixture" do
|
@@ -359,7 +514,7 @@ describe Readability do
|
|
359
514
|
allowed_tags = %w[div span table tr td p i strong u h1 h2 h3 h4 pre code br a]
|
360
515
|
allowed_attributes = %w[href]
|
361
516
|
html = File.read(File.dirname(__FILE__) + "/fixtures/cant_read.html")
|
362
|
-
Readability::Document.new(html, :tags => allowed_tags, :attributes => allowed_attributes).content.
|
517
|
+
expect(Readability::Document.new(html, :tags => allowed_tags, :attributes => allowed_attributes).content).to match(/Can you talk a little about how you developed the looks for the/)
|
363
518
|
end
|
364
519
|
end
|
365
520
|
|
@@ -370,15 +525,15 @@ describe Readability do
|
|
370
525
|
end
|
371
526
|
|
372
527
|
it "should return the main page content" do
|
373
|
-
@doc.content.
|
528
|
+
expect(@doc.content).to match("Some content")
|
374
529
|
end
|
375
530
|
|
376
531
|
it "should return the page title if present" do
|
377
|
-
@doc.title.
|
532
|
+
expect(@doc.title).to match("title!")
|
378
533
|
|
379
534
|
doc = Readability::Document.new("<html><head></head><body><div><p>Some content</p></div></body>",
|
380
535
|
:min_text_length => 0, :retry_length => 1)
|
381
|
-
doc.title.
|
536
|
+
expect(doc.title).to be_nil
|
382
537
|
end
|
383
538
|
end
|
384
539
|
|
@@ -389,7 +544,7 @@ describe Readability do
|
|
389
544
|
end
|
390
545
|
|
391
546
|
it "should not return the sidebar" do
|
392
|
-
@doc.content.
|
547
|
+
expect(@doc.content).not_to match("sidebar")
|
393
548
|
end
|
394
549
|
end
|
395
550
|
|
@@ -407,7 +562,7 @@ describe Readability do
|
|
407
562
|
end
|
408
563
|
|
409
564
|
it "should not return the sidebar" do
|
410
|
-
@doc.content.
|
565
|
+
expect(@doc.content).not_to match("a b c d f")
|
411
566
|
end
|
412
567
|
end
|
413
568
|
|
@@ -427,12 +582,12 @@ describe Readability do
|
|
427
582
|
#puts "testing #{sample}..."
|
428
583
|
|
429
584
|
$required_fragments.each do |required_text|
|
430
|
-
doc.
|
585
|
+
expect(doc).to include(required_text)
|
431
586
|
checks += 1
|
432
587
|
end
|
433
588
|
|
434
589
|
$excluded_fragments.each do |text_to_avoid|
|
435
|
-
doc.
|
590
|
+
expect(doc).not_to include(text_to_avoid)
|
436
591
|
checks += 1
|
437
592
|
end
|
438
593
|
end
|
@@ -446,18 +601,18 @@ describe Readability do
|
|
446
601
|
it "should correctly guess and enforce HTML encoding" do
|
447
602
|
doc = Readability::Document.new("<html><head><meta http-equiv='content-type' content='text/html; charset=LATIN1'></head><body><div>hi!</div></body></html>")
|
448
603
|
content = doc.content
|
449
|
-
content.encoding.to_s.
|
450
|
-
content.
|
604
|
+
expect(content.encoding.to_s).to eq("ISO-8859-1")
|
605
|
+
expect(content).to be_valid_encoding
|
451
606
|
end
|
452
607
|
|
453
608
|
it "should allow encoding guessing to be skipped" do
|
454
|
-
|
609
|
+
expect(GuessHtmlEncoding).to_not receive(:encode)
|
455
610
|
doc = Readability::Document.new(@simple_html_fixture, :do_not_guess_encoding => true)
|
456
611
|
doc.content
|
457
612
|
end
|
458
613
|
|
459
614
|
it "should allow encoding guessing to be overridden" do
|
460
|
-
|
615
|
+
expect(GuessHtmlEncoding).to_not receive(:encode)
|
461
616
|
doc = Readability::Document.new(@simple_html_fixture, :encoding => "UTF-8")
|
462
617
|
doc.content
|
463
618
|
end
|
@@ -469,54 +624,54 @@ describe Readability do
|
|
469
624
|
it "should strip the html comments tag" do
|
470
625
|
doc = Readability::Document.new("<html><head><meta http-equiv='content-type' content='text/html; charset=LATIN1'></head><body><div>hi!<!-- bye~ --></div></body></html>")
|
471
626
|
content = doc.content
|
472
|
-
content.
|
473
|
-
content.
|
627
|
+
expect(content).to include("hi!")
|
628
|
+
expect(content).not_to include("bye")
|
474
629
|
end
|
475
630
|
|
476
631
|
it "should not error with empty content" do
|
477
|
-
Readability::Document.new('').content.
|
632
|
+
expect(Readability::Document.new('').content).to eq('<div><div></div></div>')
|
478
633
|
end
|
479
634
|
|
480
635
|
it "should not error with a document with no <body>" do
|
481
|
-
Readability::Document.new('<html><head><meta http-equiv="refresh" content="0;URL=http://example.com"></head></html>').content.
|
636
|
+
expect(Readability::Document.new('<html><head><meta http-equiv="refresh" content="0;URL=http://example.com"></head></html>').content).to eq('<div><div></div></div>')
|
482
637
|
end
|
483
638
|
end
|
484
|
-
|
639
|
+
|
485
640
|
describe "No side-effects" do
|
486
641
|
before do
|
487
642
|
@bbc = File.read(File.dirname(__FILE__) + "/fixtures/bbc.html")
|
488
643
|
@nytimes = File.read(File.dirname(__FILE__) + "/fixtures/nytimes.html")
|
489
|
-
@
|
644
|
+
@thesun = File.read(File.dirname(__FILE__) + "/fixtures/thesun.html")
|
490
645
|
end
|
491
|
-
|
646
|
+
|
492
647
|
it "should not have any side-effects when calling content() and then images()" do
|
493
|
-
@doc=Readability::Document.new(@nytimes, :tags => %w[div p img a], :attributes => %w[src href], :remove_empty_nodes => false,
|
648
|
+
@doc=Readability::Document.new(@nytimes, :tags => %w[div p img a], :attributes => %w[src href], :remove_empty_nodes => false,
|
494
649
|
:do_not_guess_encoding => true)
|
495
|
-
@doc.images.
|
650
|
+
expect(@doc.images).to eq(["http://graphics8.nytimes.com/images/2011/12/02/opinion/02fixes-freelancersunion/02fixes-freelancersunion-blog427.jpg"])
|
496
651
|
@doc.content
|
497
|
-
@doc.images.
|
652
|
+
expect(@doc.images).to eq(["http://graphics8.nytimes.com/images/2011/12/02/opinion/02fixes-freelancersunion/02fixes-freelancersunion-blog427.jpg"])
|
498
653
|
end
|
499
|
-
|
654
|
+
|
500
655
|
it "should not have any side-effects when calling content() multiple times" do
|
501
|
-
@doc=Readability::Document.new(@nytimes, :tags => %w[div p img a], :attributes => %w[src href], :remove_empty_nodes => false,
|
656
|
+
@doc=Readability::Document.new(@nytimes, :tags => %w[div p img a], :attributes => %w[src href], :remove_empty_nodes => false,
|
502
657
|
:do_not_guess_encoding => true)
|
503
|
-
@doc.content.
|
658
|
+
expect(@doc.content).to eq(@doc.content)
|
504
659
|
end
|
505
|
-
|
660
|
+
|
506
661
|
it "should not have any side-effects when calling content and images multiple times" do
|
507
|
-
@doc=Readability::Document.new(@nytimes, :tags => %w[div p img a], :attributes => %w[src href], :remove_empty_nodes => false,
|
662
|
+
@doc=Readability::Document.new(@nytimes, :tags => %w[div p img a], :attributes => %w[src href], :remove_empty_nodes => false,
|
508
663
|
:do_not_guess_encoding => true)
|
509
|
-
@doc.images.
|
510
|
-
@doc.content.
|
511
|
-
@doc.images.
|
664
|
+
expect(@doc.images).to eq(["http://graphics8.nytimes.com/images/2011/12/02/opinion/02fixes-freelancersunion/02fixes-freelancersunion-blog427.jpg"])
|
665
|
+
expect(@doc.content).to eq(@doc.content)
|
666
|
+
expect(@doc.images).to eq(["http://graphics8.nytimes.com/images/2011/12/02/opinion/02fixes-freelancersunion/02fixes-freelancersunion-blog427.jpg"])
|
512
667
|
end
|
513
|
-
|
668
|
+
|
514
669
|
end
|
515
|
-
|
670
|
+
|
516
671
|
describe "Code blocks" do
|
517
672
|
before do
|
518
673
|
@code = File.read(File.dirname(__FILE__) + "/fixtures/code.html")
|
519
|
-
@content = Readability::Document.new(@code,
|
674
|
+
@content = Readability::Document.new(@code,
|
520
675
|
:tags => %w[div p img a ul ol li h1 h2 h3 h4 h5 h6 blockquote strong em b code pre],
|
521
676
|
:attributes => %w[src href],
|
522
677
|
:remove_empty_nodes => false).content
|
@@ -524,29 +679,29 @@ describe Readability do
|
|
524
679
|
end
|
525
680
|
|
526
681
|
it "preserve the code blocks" do
|
527
|
-
@doc.css("code pre").text.
|
682
|
+
expect(@doc.css("code pre").text).to eq("\nroot\n indented\n ")
|
528
683
|
end
|
529
684
|
|
530
685
|
it "preserve backwards code blocks" do
|
531
|
-
@doc.css("pre code").text.
|
686
|
+
expect(@doc.css("pre code").text).to eq("\nsecond\n indented\n ")
|
532
687
|
end
|
533
688
|
end
|
534
689
|
|
535
690
|
describe "remove all tags" do
|
536
691
|
it "should work for an incomplete piece of HTML" do
|
537
692
|
doc = Readability::Document.new('<div>test</div', :tags => [])
|
538
|
-
doc.content.
|
693
|
+
expect(doc.content).to eq('test')
|
539
694
|
end
|
540
695
|
|
541
696
|
it "should work for a HTML document" do
|
542
697
|
doc = Readability::Document.new('<html><head><title>title!</title></head><body><div><p>test</p></div></body></html>',
|
543
698
|
:tags => [])
|
544
|
-
doc.content.
|
699
|
+
expect(doc.content).to eq('test')
|
545
700
|
end
|
546
701
|
|
547
702
|
it "should work for a plain text" do
|
548
703
|
doc = Readability::Document.new('test', :tags => [])
|
549
|
-
doc.content.
|
704
|
+
expect(doc.content).to eq('test')
|
550
705
|
end
|
551
706
|
end
|
552
707
|
|
@@ -563,8 +718,8 @@ describe Readability do
|
|
563
718
|
doc = Readability::Document.new(boing_boing)
|
564
719
|
|
565
720
|
content = doc.content
|
566
|
-
(content !~ /Bees and Bombs/).
|
567
|
-
content.
|
721
|
+
expect(content !~ /Bees and Bombs/).to eq(true)
|
722
|
+
expect(content).to match(/ADVERTISE/)
|
568
723
|
end
|
569
724
|
|
570
725
|
it "should apply whitelist" do
|
@@ -572,13 +727,13 @@ describe Readability do
|
|
572
727
|
doc = Readability::Document.new(boing_boing,
|
573
728
|
whitelist: ".post-content")
|
574
729
|
content = doc.content
|
575
|
-
content.
|
730
|
+
expect(content).to match(/Bees and Bombs/)
|
576
731
|
end
|
577
732
|
|
578
733
|
it "should apply blacklist" do
|
579
734
|
doc = Readability::Document.new(boing_boing, blacklist: "#sidebar_adblock")
|
580
735
|
content = doc.content
|
581
|
-
(content !~ /ADVERTISE/).
|
736
|
+
expect(content !~ /ADVERTISE/).to eq(true)
|
582
737
|
|
583
738
|
end
|
584
739
|
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,86 +1,72 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-readability
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Cantino
|
8
8
|
- starrhorne
|
9
9
|
- libc
|
10
10
|
- Kyle Maxwell
|
11
|
-
autorequire:
|
11
|
+
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date:
|
14
|
+
date: 2024-06-11 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: rspec
|
18
18
|
requirement: !ruby/object:Gem::Requirement
|
19
19
|
requirements:
|
20
|
-
- -
|
20
|
+
- - ">="
|
21
21
|
- !ruby/object:Gem::Version
|
22
22
|
version: '2.8'
|
23
23
|
type: :development
|
24
24
|
prerelease: false
|
25
25
|
version_requirements: !ruby/object:Gem::Requirement
|
26
26
|
requirements:
|
27
|
-
- -
|
27
|
+
- - ">="
|
28
28
|
- !ruby/object:Gem::Version
|
29
29
|
version: '2.8'
|
30
30
|
- !ruby/object:Gem::Dependency
|
31
31
|
name: rspec-expectations
|
32
32
|
requirement: !ruby/object:Gem::Requirement
|
33
33
|
requirements:
|
34
|
-
- -
|
34
|
+
- - ">="
|
35
35
|
- !ruby/object:Gem::Version
|
36
36
|
version: '2.8'
|
37
37
|
type: :development
|
38
38
|
prerelease: false
|
39
39
|
version_requirements: !ruby/object:Gem::Requirement
|
40
40
|
requirements:
|
41
|
-
- -
|
41
|
+
- - ">="
|
42
42
|
- !ruby/object:Gem::Version
|
43
43
|
version: '2.8'
|
44
|
-
- !ruby/object:Gem::Dependency
|
45
|
-
name: rr
|
46
|
-
requirement: !ruby/object:Gem::Requirement
|
47
|
-
requirements:
|
48
|
-
- - '>='
|
49
|
-
- !ruby/object:Gem::Version
|
50
|
-
version: '1.0'
|
51
|
-
type: :development
|
52
|
-
prerelease: false
|
53
|
-
version_requirements: !ruby/object:Gem::Requirement
|
54
|
-
requirements:
|
55
|
-
- - '>='
|
56
|
-
- !ruby/object:Gem::Version
|
57
|
-
version: '1.0'
|
58
44
|
- !ruby/object:Gem::Dependency
|
59
45
|
name: nokogiri
|
60
46
|
requirement: !ruby/object:Gem::Requirement
|
61
47
|
requirements:
|
62
|
-
- -
|
48
|
+
- - ">="
|
63
49
|
- !ruby/object:Gem::Version
|
64
50
|
version: 1.6.0
|
65
51
|
type: :runtime
|
66
52
|
prerelease: false
|
67
53
|
version_requirements: !ruby/object:Gem::Requirement
|
68
54
|
requirements:
|
69
|
-
- -
|
55
|
+
- - ">="
|
70
56
|
- !ruby/object:Gem::Version
|
71
57
|
version: 1.6.0
|
72
58
|
- !ruby/object:Gem::Dependency
|
73
59
|
name: guess_html_encoding
|
74
60
|
requirement: !ruby/object:Gem::Requirement
|
75
61
|
requirements:
|
76
|
-
- -
|
62
|
+
- - ">="
|
77
63
|
- !ruby/object:Gem::Version
|
78
64
|
version: 0.0.4
|
79
65
|
type: :runtime
|
80
66
|
prerelease: false
|
81
67
|
version_requirements: !ruby/object:Gem::Requirement
|
82
68
|
requirements:
|
83
|
-
- -
|
69
|
+
- - ">="
|
84
70
|
- !ruby/object:Gem::Version
|
85
71
|
version: 0.0.4
|
86
72
|
description: Port of arc90's readability project to ruby
|
@@ -91,10 +77,10 @@ executables:
|
|
91
77
|
extensions: []
|
92
78
|
extra_rdoc_files: []
|
93
79
|
files:
|
94
|
-
- .
|
95
|
-
- .
|
96
|
-
- .
|
97
|
-
- .yardopts
|
80
|
+
- ".github/workflows/ruby.yml"
|
81
|
+
- ".gitignore"
|
82
|
+
- ".rspec"
|
83
|
+
- ".yardopts"
|
98
84
|
- Gemfile
|
99
85
|
- Guardfile
|
100
86
|
- LICENSE
|
@@ -108,9 +94,13 @@ files:
|
|
108
94
|
- spec/fixtures/boing_boing.html
|
109
95
|
- spec/fixtures/cant_read.html
|
110
96
|
- spec/fixtures/code.html
|
97
|
+
- spec/fixtures/codinghorror.html
|
98
|
+
- spec/fixtures/images/Confusion_of_Tongues.png
|
99
|
+
- spec/fixtures/images/JohnPinhole.jpg
|
111
100
|
- spec/fixtures/images/dim_1416768a.jpg
|
112
101
|
- spec/fixtures/images/sign_up_emails_682__703711a.gif
|
113
102
|
- spec/fixtures/images/sign_up_emails_682__703712a.gif
|
103
|
+
- spec/fixtures/nested_images.html
|
114
104
|
- spec/fixtures/nytimes.html
|
115
105
|
- spec/fixtures/sample.html
|
116
106
|
- spec/fixtures/samples/blogpost_with_links-fragments.rb
|
@@ -129,24 +119,23 @@ files:
|
|
129
119
|
homepage: http://github.com/cantino/ruby-readability
|
130
120
|
licenses: []
|
131
121
|
metadata: {}
|
132
|
-
post_install_message:
|
122
|
+
post_install_message:
|
133
123
|
rdoc_options: []
|
134
124
|
require_paths:
|
135
125
|
- lib
|
136
126
|
required_ruby_version: !ruby/object:Gem::Requirement
|
137
127
|
requirements:
|
138
|
-
- -
|
128
|
+
- - ">="
|
139
129
|
- !ruby/object:Gem::Version
|
140
130
|
version: '0'
|
141
131
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
142
132
|
requirements:
|
143
|
-
- -
|
133
|
+
- - ">="
|
144
134
|
- !ruby/object:Gem::Version
|
145
135
|
version: '0'
|
146
136
|
requirements: []
|
147
|
-
|
148
|
-
|
149
|
-
signing_key:
|
137
|
+
rubygems_version: 3.5.10
|
138
|
+
signing_key:
|
150
139
|
specification_version: 4
|
151
140
|
summary: Port of arc90's readability project to ruby
|
152
141
|
test_files:
|
@@ -154,9 +143,13 @@ test_files:
|
|
154
143
|
- spec/fixtures/boing_boing.html
|
155
144
|
- spec/fixtures/cant_read.html
|
156
145
|
- spec/fixtures/code.html
|
146
|
+
- spec/fixtures/codinghorror.html
|
147
|
+
- spec/fixtures/images/Confusion_of_Tongues.png
|
148
|
+
- spec/fixtures/images/JohnPinhole.jpg
|
157
149
|
- spec/fixtures/images/dim_1416768a.jpg
|
158
150
|
- spec/fixtures/images/sign_up_emails_682__703711a.gif
|
159
151
|
- spec/fixtures/images/sign_up_emails_682__703712a.gif
|
152
|
+
- spec/fixtures/nested_images.html
|
160
153
|
- spec/fixtures/nytimes.html
|
161
154
|
- spec/fixtures/sample.html
|
162
155
|
- spec/fixtures/samples/blogpost_with_links-fragments.rb
|