ruby-readability 0.7.0 → 0.7.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: b9f4f443e32b774c8c2b14856c78e7c593c6ef41
4
- data.tar.gz: 3f6916bfc9b1c88c3c45f5e839fe0e2a4b882ab5
2
+ SHA256:
3
+ metadata.gz: f83eb55e4c0c4c30ad54e8e7104d68da8a5eb2b4d9cc76b45255055d89bf4b5c
4
+ data.tar.gz: 4d003c39b589477449bedd34634c5482dd503e94bfe24b9a5c29ea94f9b49f83
5
5
  SHA512:
6
- metadata.gz: fdf2bb73b0ff4db4617c34996e72f23465d33d90a7631eaaa979235fd8f1f8c529dcf39f7930dc447df72e35e640726b0a3567e3cf0abdafb1ab88e46eb4e3ac
7
- data.tar.gz: e75ebfeb153e89fbe52e94e0eab2f33865b32c75ed89e5411387d2cfa6a2f92d0671ecc000229d1ac3cf2027d18e7b7050053c32ab44dca05c8f9a35b20a1194
6
+ metadata.gz: e799e831297b18b381c3b1caad19531f99fe084f640afbddd1cf91e75fe234d3af4618f07e02a0c6214824726e3afe79accbb8ea5f0d66d9117b13112d22e8ef
7
+ data.tar.gz: 404d3a1bc702f3bd609e8c3ba8e37d6f023b2a3c126c278e7463a3dfee1cc5bf683f6c0c75cfabbb14e477f582b33cc8204d8682f33ed9a235b6fac8e90d9ad2
@@ -0,0 +1,25 @@
1
+ name: Ruby
2
+
3
+ on:
4
+ push:
5
+ branches: [ master ]
6
+ pull_request:
7
+ branches: [ master ]
8
+
9
+ jobs:
10
+ test:
11
+
12
+ runs-on: ubuntu-latest
13
+ strategy:
14
+ matrix:
15
+ ruby-version: ['2.7']
16
+
17
+ steps:
18
+ - uses: actions/checkout@v2
19
+ - name: Set up Ruby
20
+ uses: ruby/setup-ruby@v1
21
+ with:
22
+ ruby-version: ${{ matrix.ruby-version }}
23
+ bundler-cache: true # runs 'bundle install' and caches installed gems automatically
24
+ - name: Run tests
25
+ run: bundle exec rspec
data/.rspec CHANGED
@@ -1,2 +1,2 @@
1
1
  --colour
2
- --format s -c
2
+ --format documentation -c
data/README.md CHANGED
@@ -7,7 +7,7 @@ webpage. It is a Ruby port of arc90's readability project.
7
7
  Build Status
8
8
  ------------
9
9
 
10
- [![Build Status](https://travis-ci.org/cantino/ruby-readability.png)](https://travis-ci.org/cantino/ruby-readability)
10
+ [![Ruby](https://github.com/cantino/ruby-readability/actions/workflows/ruby.yml/badge.svg?branch=master)](https://github.com/cantino/ruby-readability/actions/workflows/ruby.yml)
11
11
 
12
12
  Install
13
13
  -------
@@ -41,7 +41,7 @@ You may provide options to `Readability::Document.new`, including:
41
41
  * `:remove_empty_nodes`: remove `<p>` tags that have no text content; also
42
42
  removes `<p>` tags that contain only images;
43
43
  * `:attributes`: whitelist of allowed attributes;
44
- * `:debug`: provide debugging output, defaults false;
44
+ * `:debug`: provide debugging output, defaults false; supports setting a Proc;
45
45
  * `:encoding`: if the page is of a known encoding, you can specify it; if left
46
46
  unspecified, the encoding will be guessed (only in Ruby 1.9.x). If you wish
47
47
  to disable guessing, supply `:do_not_guess_encoding => true`;
@@ -78,6 +78,7 @@ feature requires that the `fastimage` gem be installed.
78
78
  Related Projects
79
79
  ----------------
80
80
 
81
+ * [readability.cr](https://github.com/joenas/readability.cr) - Port of ruby-readability's port of arc90's readability project to Crystal
81
82
  * [newspaper](https://github.com/codelucas/newspaper) is an advanced news extraction, article extraction, and content curation library for Python.
82
83
 
83
84
  Potential Issues
@@ -102,7 +103,3 @@ License
102
103
  This code is under the Apache License 2.0. See <http://www.apache.org/licenses/LICENSE-2.0>.
103
104
 
104
105
  Ruby port by cantino, starrhorne, libc, and iterationlabs. Special thanks to fizx and marcosinger.
105
-
106
-
107
- [![Bitdeli Badge](https://d2weczhvl823v0.cloudfront.net/cantino/ruby-readability/trend.png)](https://bitdeli.com/free "Bitdeli Badge")
108
-
data/lib/readability.rb CHANGED
@@ -17,9 +17,12 @@ module Readability
17
17
  :min_image_height => 80,
18
18
  :ignore_image_format => [],
19
19
  :blacklist => nil,
20
- :whitelist => nil
20
+ :whitelist => nil,
21
+ :elements_to_score => ["p", "td", "pre"],
22
+ :likely_siblings => ["p"],
23
+ :ignore_redundant_nesting => false
21
24
  }.freeze
22
-
25
+
23
26
  REGEXES = {
24
27
  :unlikelyCandidatesRe => /combx|comment|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup/i,
25
28
  :okMaybeItsACandidateRe => /and|article|body|column|main|shadow/i,
@@ -33,7 +36,7 @@ module Readability
33
36
  :killBreaksRe => /(<br\s*\/?>(\s|&nbsp;?)*){1,}/,
34
37
  :videoRe => /http:\/\/(www\.)?(youtube|vimeo)\.com/i
35
38
  }
36
-
39
+
37
40
  attr_accessor :options, :html, :best_candidate, :candidates, :best_candidate_has_image
38
41
 
39
42
  def initialize(input, options = {})
@@ -48,7 +51,7 @@ module Readability
48
51
  @input = @input.gsub(REGEXES[:replaceBrsRe], '</p><p>').gsub(REGEXES[:replaceFontsRe], '<\1span>')
49
52
  @remove_unlikely_candidates = @options[:remove_unlikely_candidates]
50
53
  @weight_classes = @options[:weight_classes]
51
- @clean_conditionally = @options[:clean_conditionally]
54
+ @clean_conditionally = !!@options[:clean_conditionally]
52
55
  @best_candidate_has_image = true
53
56
  make_html
54
57
  handle_exclusions!(@options[:whitelist], @options[:blacklist])
@@ -143,11 +146,11 @@ module Readability
143
146
 
144
147
  (list_images.empty? and content != @html) ? images(@html, true) : list_images
145
148
  end
146
-
149
+
147
150
  def images_with_fqdn_uris!(source_uri)
148
151
  images_with_fqdn_uris(@html, source_uri)
149
152
  end
150
-
153
+
151
154
  def images_with_fqdn_uris(document = @html.dup, source_uri)
152
155
  uri = URI.parse(source_uri)
153
156
  host = uri.host
@@ -159,7 +162,7 @@ module Readability
159
162
  images = []
160
163
  document.css("img").each do |elem|
161
164
  begin
162
- elem['src'] = URI.join(base,elem['src']).to_s if URI.parse(elem['src']).host == nil
165
+ elem['src'] = URI.join(base,elem['src']).to_s if URI.parse(elem['src']).host == nil
163
166
  images << elem['src'].to_s
164
167
  rescue URI::InvalidURIError => exc
165
168
  elem.remove
@@ -260,15 +263,27 @@ module Readability
260
263
  # Things like preambles, content split by ads that we removed, etc.
261
264
 
262
265
  sibling_score_threshold = [10, best_candidate[:content_score] * 0.2].max
266
+ downcased_likely_siblings = options[:likely_siblings].map(&:downcase)
263
267
  output = Nokogiri::XML::Node.new('div', @html)
264
- best_candidate[:elem].parent.children.each do |sibling|
268
+
269
+ # If the best candidate is the only element in its parent then we will never find any siblings. Therefore,
270
+ # find the closest ancestor that has siblings (if :ignore_redundant_nesting is true). This improves the
271
+ # related content detection, but could lead to false positives. Not supported in arc90's readability.
272
+ node =
273
+ if options[:ignore_redundant_nesting]
274
+ closest_node_with_siblings(best_candidate[:elem])
275
+ else
276
+ best_candidate[:elem] # This is the default behaviour for consistency with arc90's readability.
277
+ end
278
+
279
+ node.parent.children.each do |sibling|
265
280
  append = false
266
- append = true if sibling == best_candidate[:elem]
281
+ append = true if sibling == node
267
282
  append = true if candidates[sibling] && candidates[sibling][:content_score] >= sibling_score_threshold
268
283
 
269
- if sibling.name.downcase == "p"
284
+ if downcased_likely_siblings.include?(sibling.name.downcase)
270
285
  link_density = get_link_density(sibling)
271
- node_content = sibling.text
286
+ node_content = sibling.text.strip
272
287
  node_length = node_content.length
273
288
 
274
289
  append = if node_length > 80 && link_density < 0.25
@@ -288,6 +303,23 @@ module Readability
288
303
  output
289
304
  end
290
305
 
306
+ def closest_node_with_siblings(element)
307
+ node = element
308
+
309
+ until node.node_name == 'body'
310
+ siblings = node.parent.children
311
+ non_empty = siblings.reject { |sibling| sibling.text? && sibling.text.strip.empty? }
312
+
313
+ if non_empty.size > 1
314
+ return node
315
+ else
316
+ node = node.parent
317
+ end
318
+ end
319
+
320
+ node
321
+ end
322
+
291
323
  def select_best_candidate(candidates)
292
324
  sorted_candidates = candidates.values.sort { |a, b| b[:content_score] <=> a[:content_score] }
293
325
 
@@ -310,7 +342,7 @@ module Readability
310
342
 
311
343
  def score_paragraphs(min_text_length)
312
344
  candidates = {}
313
- @html.css("p,td").each do |elem|
345
+ @html.css(options[:elements_to_score].join(',')).each do |elem|
314
346
  parent_node = elem.parent
315
347
  grand_parent_node = parent_node.respond_to?(:parent) ? parent_node.parent : nil
316
348
  inner_text = elem.text
@@ -369,7 +401,11 @@ module Readability
369
401
  end
370
402
 
371
403
  def debug(str)
372
- puts str if options[:debug]
404
+ if options[:debug].respond_to?(:call)
405
+ options[:debug].call(str)
406
+ elsif options[:debug]
407
+ puts str
408
+ end
373
409
  end
374
410
 
375
411
  def remove_unlikely_candidates!
@@ -423,6 +459,9 @@ module Readability
423
459
 
424
460
  # We'll sanitize all elements using a whitelist
425
461
  base_whitelist = @options[:tags] || %w[div p]
462
+ all_tags_whitelisted = base_whitelist.include?("*")
463
+ all_attr_whitelisted = @options[:attributes] && @options[:attributes].include?("*")
464
+
426
465
  # We'll add whitespace instead of block elements,
427
466
  # so a<br>b will have a nice space between them
428
467
  base_replace_with_whitespace = %w[br hr h1 h2 h3 h4 h5 h6 dl dd ol li ul address blockquote center]
@@ -435,8 +474,8 @@ module Readability
435
474
 
436
475
  ([node] + node.css("*")).each do |el|
437
476
  # If element is in whitelist, delete all its attributes
438
- if whitelist[el.node_name]
439
- el.attributes.each { |a, x| el.delete(a) unless @options[:attributes] && @options[:attributes].include?(a.to_s) }
477
+ if all_tags_whitelisted || whitelist[el.node_name]
478
+ el.attributes.each { |a, x| el.delete(a) unless @options[:attributes] && @options[:attributes].include?(a.to_s) } unless all_attr_whitelisted
440
479
 
441
480
  # Otherwise, replace the element with its contents
442
481
  else
@@ -465,30 +504,43 @@ module Readability
465
504
 
466
505
  def clean_conditionally(node, candidates, selector)
467
506
  return unless @clean_conditionally
507
+
468
508
  node.css(selector).each do |el|
469
509
  weight = class_weight(el)
470
510
  content_score = candidates[el] ? candidates[el][:content_score] : 0
471
511
  name = el.name.downcase
472
-
512
+ remove = false
513
+ message = nil
514
+
473
515
  if weight + content_score < 0
474
- el.remove
475
- debug("Conditionally cleaned #{name}##{el[:id]}.#{el[:class]} with weight #{weight} and content score #{content_score} because score + content score was less than zero.")
516
+ remove = true
517
+ message = "Conditionally cleaned #{name}##{el[:id]}.#{el[:class]} with weight #{weight} and content score #{content_score} because score + content score was less than zero."
476
518
  elsif el.text.count(",") < 10
477
519
  counts = %w[p img li a embed input].inject({}) { |m, kind| m[kind] = el.css(kind).length; m }
478
520
  counts["li"] -= 100
479
521
 
480
522
  # For every img under a noscript tag discount one from the count to avoid double counting
481
523
  counts["img"] -= el.css("noscript").css("img").length
482
-
524
+
483
525
  content_length = el.text.strip.length # Count the text length excluding any surrounding whitespace
484
526
  link_density = get_link_density(el)
485
527
 
486
528
  reason = clean_conditionally_reason?(name, counts, content_length, options, weight, link_density)
487
529
  if reason
488
- debug("Conditionally cleaned #{name}##{el[:id]}.#{el[:class]} with weight #{weight} and content score #{content_score} because it has #{reason}.")
489
- el.remove
530
+ message = "Conditionally cleaned #{name}##{el[:id]}.#{el[:class]} with weight #{weight} and content score #{content_score} because it has #{reason}."
531
+ remove = true
490
532
  end
491
533
  end
534
+
535
+ if options[:clean_conditionally].respond_to?(:call)
536
+ context = { remove: remove, message: message, weight: weight, content_score: content_score, el: el }
537
+ remove = options[:clean_conditionally].call(context) # Allow the user to override the decision for whether to remove the element.
538
+ end
539
+
540
+ if remove
541
+ debug(message || "Conditionally cleaned by user-specified function.")
542
+ el.remove
543
+ end
492
544
  end
493
545
  end
494
546
 
@@ -3,15 +3,13 @@ $:.push File.expand_path("../lib", __FILE__)
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "ruby-readability"
6
- s.version = '0.7.0'
6
+ s.version = '0.7.2'
7
7
  s.authors = ["Andrew Cantino", "starrhorne", "libc", "Kyle Maxwell"]
8
8
  s.email = ["andrew@iterationlabs.com"]
9
9
  s.homepage = "http://github.com/cantino/ruby-readability"
10
10
  s.summary = %q{Port of arc90's readability project to ruby}
11
11
  s.description = %q{Port of arc90's readability project to ruby}
12
12
 
13
- s.rubyforge_project = "ruby-readability"
14
-
15
13
  s.files = `git ls-files`.split("\n")
16
14
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
17
15
  s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
@@ -19,7 +17,6 @@ Gem::Specification.new do |s|
19
17
 
20
18
  s.add_development_dependency "rspec", ">= 2.8"
21
19
  s.add_development_dependency "rspec-expectations", ">= 2.8"
22
- s.add_development_dependency "rr", ">= 1.0"
23
20
  s.add_dependency 'nokogiri', '>= 1.6.0'
24
21
  s.add_dependency 'guess_html_encoding', '>= 0.0.4'
25
22
  end
@@ -0,0 +1,189 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <meta charset="utf-8"/>
5
+ <title>Standard Flavored Markdown</title>
6
+ <meta name="description" content=""/>
7
+ <meta name="HandheldFriendly" content="True"/>
8
+ <meta name="MobileOptimized" content="320"/>
9
+ <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
10
+ <link rel="shortcut icon" href="/assets/images/favicon.ico?v=8684b6a35e">
11
+ <link rel="apple-touch-icon" href="/assets/images/codinghorror-app-icon.png?v=8684b6a35e">
12
+ <meta name="google-site-verification" content="sl0m9SU_4V0JcvjWlOX4dUFBR6VS2P4tlxjJMo0gphU"/>
13
+ <link rel="stylesheet" type="text/css" href="/assets/css/screen.css?v=8684b6a35e"/>
14
+ <link rel="stylesheet" type="text/css" href="//fonts.googleapis.com/css?family=Open+Sans:400italic,700italic,400,700"/>
15
+ <link rel="alternate" type="application/rss+xml" title="Coding Horror" href="http://feeds.feedburner.com/codinghorror">
16
+ </head>
17
+ <body class="post-template">
18
+ <header class="site-head">
19
+ <div class="site-head-content">
20
+ <a class="blog-logo" href="http://blog.codinghorror.com"><img src="/assets/images/codinghorror-app-icon.png?v=8684b6a35e" alt="Coding Horror Logo" width="158" height="158"/></a>
21
+ <h1 class="blog-title"><a href="http://blog.codinghorror.com">Coding Horror</a></h1>
22
+ <h2 class="blog-description">programming and human factors</h2>
23
+ <div class="site-search">
24
+ <script>
25
+ (function() {
26
+ var cx = '016956275695630057531:lqveu9tah7y';
27
+ var gcse = document.createElement('script');
28
+ gcse.type = 'text/javascript';
29
+ gcse.async = true;
30
+ gcse.src = (document.location.protocol == 'https:' ? 'https:' : 'http:') + '//www.google.com/cse/cse.js?cx=' + cx;
31
+ var s = document.getElementsByTagName('script')[0];
32
+ s.parentNode.insertBefore(gcse, s);
33
+ })();
34
+ </script>
35
+ <gcse:search></gcse:search>
36
+ </div>
37
+ </div>
38
+ </header>
39
+ <div class="wrap clearfix">
40
+ <div class="clearfix"></div>
41
+ <main class="content" role="main">
42
+ <article class="post">
43
+ <span class="post-meta"><time datetime="2014-09-03">03 Sep 2014</time> </span>
44
+ <h1 class="post-title">Standard Flavored Markdown</h1>
45
+ <section class="post-content">
46
+ <p>In 2009 I <a href="http://blog.codinghorror.com/responsible-open-source-code-parenting/">lamented the state of Markdown</a>:</p>
47
+ <blockquote>
48
+ <p>Right now we have the worst of both worlds. Lack of leadership from the top, and a bunch of fragmented, poorly coordinated community efforts to advance Markdown, none of which are officially canon. This isn't merely incovenient for anyone trying to find accurate information about Markdown; it's actually harming the project's future. </p>
49
+ </blockquote>
50
+ <p>In late 2012, David Greenspan from <a href="https://www.meteor.com/">Meteor</a> approached me and proposed we move forward, and <a href="http://blog.codinghorror.com/the-future-of-markdown/">a project crystallized</a>:</p>
51
+ <blockquote>
52
+ <p>I propose that Stack Exchange, GitHub, Meteor, Reddit, and any other company with lots of traffic and a strategic investment in Markdown, all work together to <strong>come up with an official Markdown specification, and standard test suites to validate Markdown implementations</strong>. We've all been working at cross purposes for too long, accidentally fragmenting Markdown while popularizing it.</p>
53
+ </blockquote>
54
+ <p>We formed a small private working group with key representatives from GitHub, from Reddit, from Stack Exchange, from the open source community. We spent months hashing out the details and agreeing on the necessary changes to turn Markdown into a language you can parse without feeling like you just walked through a sewer &ndash; while preserving the simple, clear, ASCII email inspired spirit of Markdown.</p>
55
+ <p>We really struggled with this at <a href="http://www.discourse.org">Discourse</a>, which is also based on Markdown, but an even more complex dialect than the one we built at Stack Overflow. In Discourse, you can mix <em>three</em> forms of markup interchangeably:</p>
56
+ <ul>
57
+ <li>Markdown</li>
58
+ <li>HTML (safe subset)</li>
59
+ <li>BBCode (subset)</li>
60
+ </ul>
61
+ <p>Discourse is primarily a JavaScript app, so naturally we needed a nice, compliant implementation of Markdown in JavaScript. Surely such a thing exists, yes? Nope. Even in 2012, we found <em>zero</em> JavaScript implementations of Markdown that could pass the only Markdown test suite I know of, <a href="https://github.com/michelf/mdtest/">MDTest</a>. It isn't authoritative, it's a community created initiative that embodies its own decisions about rendering ambiguities in Markdown, but it's all we've got. We contributed many <a href="https://github.com/evilstreak/markdown-js/commits/master">upstream fixes to markdown.js</a> to make it pass MDTest &ndash; but it still only passes in our locally extended version.</p>
62
+ <p>As an open source project ourselves, we're perfectly happy contributing upstream code to improve it for everyone. But it's an indictment of the state of the Markdown ecosystem that any remotely popular implementation wasn't already testing itself against a formal spec and test suite. But who can blame them, because <i>it didn't exist!</i></p>
63
+ <p>Well, now it does.</p>
64
+ <p>It took a while, but I'm pleased to announce that <a href="http://standardmarkdown.com"><strong>Standard Markdown</strong></a> is now finally ready for public review.</p>
65
+ <p><strong><a href="http://standardmarkdown.com">standardmarkdown.com</a></strong></p>
66
+ <p>It's a spec, including embedded examples, and implementations in portable C and JavaScript. We strived mightily to stay true to the spirit of Markdown in writing it. The primary author, John MacFarlane, <a href="http://spec.standardmarkdown.com">explains in the introduction to the spec</a>:</p>
67
+ <blockquote>
68
+ <p>Because Gruber’s syntax description leaves many aspects of the syntax undetermined, writing a precise spec requires making a large number of decisions, many of them somewhat arbitrary. In making them, I have appealed to existing conventions and considerations of simplicity, readability, expressive power, and consistency. I have tried to ensure that “normal” documents in the many incompatible existing implementations of markdown will render, as far as possible, as their authors intended. And I have tried to make the rules for different elements work together harmoniously. In places where different decisions could have been made (for example, the rules governing list indentation), I have explained the rationale for my choices. In a few cases, I have departed slightly from the canonical syntax description, in ways that I think further the goals of markdown as stated in that description.</p>
69
+ </blockquote>
70
+ <p>Part of my contribution to the project is to host the discussion / mailing list for Standard Markdown in a Discourse instance. </p>
71
+ <p><strong><a href="http://talk.standardmarkdown.com">talk.standardmarkdown.com</a></strong></p>
72
+ <p>Fortunately, Discourse itself <a href="http://blog.discourse.org/2014/08/introducing-discourse-1-0/">just reached version 1.0</a>. If the only thing Standard Markdown does is help save a few users from the continuing horror that is mailing list web UI, we all win.</p>
73
+ <p>What I'm most excited about is that we got a massive contribution from the one person who, in my mind, was the most perfect person in the world to work on this project: <a href="http://johnmacfarlane.net/">John MacFarlane</a>. He took our feedback and wrote the entire Standard Markdown spec and both implementations.</p>
74
+ <p><a href="http://johnmacfarlane.net/"><img src="/content/images/2014/Sep/JohnPinhole.jpg" alt="" title=""/></a></p>
75
+ <p>A lot of people know of John through his <a href="http://johnmacfarlane.net/pandoc/">Pandoc</a> project, which is amazing in its own right, but I found out about him because he built <a href="http://johnmacfarlane.net/babelmark2/faq.html">Babelmark</a>. I learned to refer to Babelmark extensively while working on Stack Overflow and MarkdownSharp, a C# implementation of Markdown.</p>
76
+ <p>Here's how crazy Markdown is: to decide what the "correct" behavior is, you provide sample Markdown input to 20+ different Markdown parsers &hellip; and then pray that some consensus emerges in all their output. That's what Babelmark does.</p>
77
+ <p>Consider this simple Markdown example:</p>
78
+ <pre><code># Hello there
79
+
80
+ This is a paragraph.
81
+
82
+ - one
83
+ - two
84
+ - three
85
+ - four
86
+
87
+ 1. pirate
88
+ 2. ninja
89
+ 3. zombie
90
+ </code></pre>
91
+ <p>Just for that, I count <a href="http://johnmacfarlane.net/babelmark2/?text=%23+Hello+there%0A%0AThis+is+a+paragraph.%0A%0A-+one%0A-+two%0A-+three%0A-+four%0A%0A1.+pirate%0A2.+ninja%0A3.+zombie"><em>fifteen</em> different rendered outputs</a> from 22 different Markdown parsers.</p>
92
+ <p><a href="http://en.wikipedia.org/wiki/Tower_of_Babel"><img src="/content/images/2014/Sep/Confusion_of_Tongues.png" alt="" title=""/></a></p>
93
+ <p>In Markdown, we <em>literally</em> built a <a href="http://en.wikipedia.org/wiki/Tower_of_Babel">Tower of Babel</a>. </p>
94
+ <p>Have I mentioned that it's a good idea for a language to have a formal specification and test suites? Maybe now you can see why that is.</p>
95
+ <p>Oh, and in his spare time, John is also the chair of the department of philosophy at the University of California, Berkeley. <em>No big deal.</em> While I don't mean to minimize the contributions of anyone to the Standard Markdown project, we all owe a special thanks to John.</p>
96
+ <p>Markdown is indeed everywhere. And that's a good thing. But it needs to be sane, parseable, and standard. That's the goal of <a href="http://standardmarkdown.com/">Standard Markdown</a> &mdash; but we need your help to get there. If you use Markdown on a website, <strong>ask what it would take for that site to become compatible with Standard Markdown</strong>; when you see the word "Markdown" you have the right to expect consistent rendering across all the websites you visit. If you implement Markdown, <a href="http://spec.standardmarkdown.com">take a look at the spec</a>, try to <strong>make your parser compatible with Standard Markdown</strong>, and <a href="http://talk.standardmarkdown.com">discuss improvements or refinements</a> to the spec.</p>
97
+ <p><span style="color:red;">Update:</span> The project was renamed <a href="http://commonmark.org">CommonMark</a>. See <a href="http://blog.codinghorror.com/standard-markdown-is-now-common-markdown/">my subsequent blog post</a>.</p>
98
+ <table>
99
+ <tr><td class="welovecodinghorror">
100
+ [advertisement] How are you showing off your awesome? Create a <a href="http://careers.stackoverflow.com/cv" rel="nofollow">Stack Overflow Careers profile</a> and show off all of your hard work from Stack Overflow, Github, and virtually every other coding site. Who knows, you might even get recruited for a great <a href="http://careers.stackoverflow.com/jobs" rel="nofollow">new position</a>!
101
+ </td></tr>
102
+ </table>
103
+ </section>
104
+ <footer class="post-footer">
105
+ <section class="author">
106
+ <h4>Written by Jeff Atwood</h4>
107
+ <p>Indoor enthusiast. Co-founder of Stack Exchange and Discourse. Disclaimer: I have no idea what I&#x27;m talking about. Find me here: <a href="http://twitter.com/codinghorror">http://twitter.com/codinghorror</a></p>
108
+ </section>
109
+ </footer>
110
+ <div id="nrelate_related_placeholder"></div> <script async id="nrelate_loader_script" type="text/javascript" src="http://static.nrelate.com/common_js/0.52.1/loader.min.js"></script>
111
+ </article>
112
+ <div id="discourse-comments"></div>
113
+ <script type="text/javascript">
114
+ var discourseUrl = "http://discourse.codinghorror.com/",
115
+ discourseEmbedUrl = 'http://blog.codinghorror.com/standard-flavored-markdown/';
116
+
117
+ (function() {
118
+ var d = document.createElement('script'); d.type = 'text/javascript'; d.async = true;
119
+ d.src = discourseUrl + 'javascripts/embed.js';
120
+ (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(d);
121
+ })();
122
+ </script>
123
+ </main>
124
+ <aside class="sidebar">
125
+
126
+ <div id="carbonads-container"><div class="carbonad"><div id="azcarbon"></div><script type="text/javascript">var z = document.createElement("script"); z.type = "text/javascript"; z.async = true; z.src = "http://engine.carbonads.com/z/56742/azcarbon_2_1_0_VERT"; var s = document.getElementsByTagName("script")[0]; s.parentNode.insertBefore(z, s);</script></div></div>
127
+ <div id="hireme" class="hireme codinghorror" style="min-height: 220px; margin-bottom: 15px;"></div>
128
+ <script>
129
+ setTimeout(function () {
130
+ var a = document.createElement("script");
131
+ var b = document.getElementsByTagName('script')[0];
132
+ a.src = "http://careers.stackoverflow.com/ad/js";
133
+ a.async = true;
134
+ a.type = "text/javascript";
135
+ b.parentNode.insertBefore(a, b);
136
+ }, 5);
137
+ </script>
138
+ <div class="welovecodinghorror" style="margin-bottom:15px">
139
+ [ad] Enjoy the blog? Read <b><a href="http://www.hyperink.com/Effective-Programming-More-Than-Writing-Code-b1559">Effective Programming: More than Writing Code</a></b> and <b><a href="http://www.hyperink.com/How-To-Stop-Sucking-And-Be-Awesome-Instead-b9A74B5CBA6">How to Stop Sucking and Be Awesome Instead</a></b> on your Kindle, iPad, Nook, or as a PDF.
140
+ </div>
141
+ <h3>Resources</h3>
142
+ <ul>
143
+ <li><a href="/about-me/">About Me</a></li>
144
+ <li><a href="http://twitter.com/codinghorror">@codinghorror</a></li>
145
+ <li><a href="http://www.discourse.org/">discourse.org</a></li>
146
+ <li><a href="http://stackexchange.com/">stackexchange.com</a></li>
147
+ <li><a href="/recommended-reading-for-developers/">Recommended Reading</a></li>
148
+ </ul>
149
+ <ul>
150
+ <li><a href="http://feeds.feedburner.com/codinghorror" class="icon-feed">&nbsp;Subscribe in a reader</a></li>
151
+ <li><a href="http://feedburner.google.com/fb/a/mailverify?uri=codinghorror&amp;loc=en_US" class="icon-email">&nbsp;Subscribe via email</a></li>
152
+ </ul>
153
+ <p>Coding Horror has been continuously published since 2004</p>
154
+ <ul>
155
+ <li><img src="http://feeds.feedburner.com/~fc/codinghorror?bg=EEEEEE&amp;fg=111111&amp;anim=0" height="26" width="88" style="border:0" alt="Count of RSS readers"></li>
156
+ <li><a href="http://my.statcounter.com/project/standard/stats.php?project_id=2600027&amp;guest=1">Traffic Stats</a></li>
157
+ </ul>
158
+ <footer class="site-footer">
159
+ <section class="copyright">Copyright <a rel="author" href="https://profiles.google.com/codinghorror1">Jeff Atwood</a> &copy; 2014<br/>
160
+ Logo image &copy; 1993 Steven C. McConnell <br/>
161
+ Proudly published with <a class="icon-ghost" href="http://ghost.org">Ghost</a></section>
162
+ </footer></aside>
163
+ </div>
164
+ <script src="/public/jquery.min.js?v=8684b6a35e"></script>
165
+ <script type="text/javascript" src="/assets/js/jquery.fitvids.js?v=8684b6a35e"></script>
166
+ <script type="text/javascript" src="/assets/js/index.js?v=8684b6a35e"></script>
167
+ <script async src="http://www.statcounter.com/counter/counter.js"></script>
168
+ <noscript><a href="http://www.statcounter.com/"><img src="http://c26.statcounter.com/counter.php?sc_project=2600027&amp;java=0&amp;security=dcff5548&amp;invisible=0" alt="web metrics"></a> </noscript>
169
+
170
+ <script>
171
+ document.write(unescape("%3Cscript src='" + (document.location.protocol == "https:" ? "https://sb" : "http://b") + ".scorecardresearch.com/beacon.js'%3E%3C/script%3E"));
172
+ </script>
173
+ <script>
174
+ COMSCORE.beacon({
175
+ c1: 2,
176
+ c2: "6035669",
177
+ c3: "",
178
+ c4: "http://www.codinghorror.com/blog/",
179
+ c5: "",
180
+ c6: "",
181
+ c15: ""
182
+ });
183
+ </script>
184
+ <noscript>
185
+ <img src="http://b.scorecardresearch.com/b?c1=2&amp;c2=6035669&amp;c3=&amp;c4=http%3A%2F%2Fwww.codinghorror.com%2Fblog%2F&amp;c5=&amp;c6=&amp;c15=&amp;cv=1.3&amp;cj=1" style="display:none" width="0" height="0" alt=""/>
186
+ </noscript>
187
+ <img src="/view.gif?page=/standard-flavored-markdown/" alt="" style="display:none" hidden />
188
+ </body>
189
+ </html>
@@ -0,0 +1,11 @@
1
+ <html>
2
+ <body>
3
+ <article>
4
+ <section>
5
+ <figure>
6
+ <img src="http://example.com/image.jpeg" />
7
+ </figure>
8
+ </section>
9
+ </article>
10
+ </body>
11
+ </html>