md-noko 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 9fde11dc2fda8402592b512221cf66545b1d17a9fcb79183ae4cc299449685d8
4
+ data.tar.gz: ea5ac14c87fde2826ad792007a2f38df8be6c4890b93a3915700fefb94cf6efe
5
+ SHA512:
6
+ metadata.gz: 80d7b6bbf6cf730a42780e8dcb2740f8e98b753303e736103734b38d7add4fa2d9cc4cb6911c7480e1b066cf279df6639b85469667d534c424c6ae9f9010c1d4
7
+ data.tar.gz: dad35e41c0a72f8af134896444f277bef50db97af43bfdacfa9484b9bf21753e2bb5578aeeff673387d1d585b72d17bb5bf7032003cbbb62a961eb61dde9241b
@@ -0,0 +1,13 @@
1
+ syntax: glob
2
+ /.bundle/
3
+ /.yardoc
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ Gemfile.lock
11
+
12
+ # rspec failure tracking
13
+ .rspec_status
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
@@ -0,0 +1,5 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.5.1
5
+ before_install: gem install bundler -v 1.16.1
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "https://rubygems.org"
2
+
3
+ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
4
+
5
+ # Specify your gem's dependencies in md-noko.gemspec
6
+ gemspec
@@ -0,0 +1,73 @@
1
+ # MD::Noko: in goes Markdown, out comes Nokogiri
2
+
3
+ ## Synopsis
4
+
5
+ ```ruby
6
+ require 'md-noko'
7
+
8
+ mdnk = MD::Noko.new
9
+ doc = mdnk.ingest File.open('lolwut.md')
10
+
11
+ # or
12
+
13
+ doc = mdnk.ingest <<EOT
14
+ # Hi
15
+
16
+ Markdown here!
17
+
18
+ ![lulz](meme.jpeg)
19
+
20
+ EOT
21
+
22
+ # doc is a Nokogiri::XML::Document
23
+ ```
24
+
25
+ ## Description
26
+
27
+ This is a simple module that encapsulates a set of desirable
28
+ manipulations to the (X)HTML output
29
+ of [Redcarpet](https://www.rubydoc.info/gems/redcarpet/). It exposes
30
+ (for now) a single method, `ingest`, which returns a [`Nokogiri::XML::Document`](https://www.rubydoc.info/gems/nokogiri/Nokogiri/XML/Document), for further manipulation. In particular, this module:
31
+
32
+ * Adds HTML preamble to produce a valid document,
33
+ * Creates a `<base href=""/>` element which you can pass a URL,
34
+ * Creates a hierarchy of `<section>` elements and places headings
35
+ and content inside,
36
+ * If the document contains exactly one `<h1>` which is the very first
37
+ thing in the file, it is copied into the `<title>`, and removed from
38
+ the document body if determined to be redundant (i.e. unless it
39
+ contains markup elements and not strictly text),
40
+ * A `<blockquote>` element containing exactly one nested
41
+ `<blockquote>` element is converted into a single `<aside role="note">`,
42
+ * Images on their own paragraph are transformed into a `<figure>`,
43
+ * Text nodes not descendants of `<pre>` are whitespace-normalized and
44
+ indentation is repaired.
45
+
46
+ The embedded `Redcarpet::Markdown` instance has the following flags set:
47
+
48
+ * `:tables`
49
+ * `:fenced_code_blocks`
50
+ * `:quote`
51
+ * `:highlight`
52
+
53
+ These are currently not exposed.
54
+
55
+ ## Installation
56
+
57
+ The usual:
58
+
59
+ $ gem install md-noko
60
+
61
+ Or, [download it off rubygems.org](https://rubygems.org/gems/md-noko).
62
+
63
+ ## Contributing
64
+
65
+ Bug reports and pull requests are welcome at
66
+ [the GitHub repository](https://github.com/doriantaylor/rb-md-noko).
67
+
68
+ ## Copyright & License
69
+
70
+ ©2018 [Dorian Taylor](https://doriantaylor.com/)
71
+
72
+ This software is provided under
73
+ the [Apache License, 2.0](https://www.apache.org/licenses/LICENSE-2.0).
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "md/noko"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1 @@
1
+ require 'md/noko'
@@ -0,0 +1,168 @@
1
+ require 'md/noko/version'
2
+ require 'redcarpet'
3
+ require 'xml-mixup'
4
+
5
+ class MD::Noko
6
+ include XML::Mixup
7
+
8
+ private
9
+
10
+ XHTML_BP = <<-BP
11
+ <!DOCTYPE html>
12
+ <html xmlns="http://www.w3.org/1999/xhtml">
13
+ <head>
14
+ <title/>
15
+ <base href="%s"/>
16
+ </head>
17
+ <body>
18
+ %s
19
+ </body>
20
+ </html>
21
+ BP
22
+
23
+ XPATH_NS = { html: 'http://www.w3.org/1999/xhtml' }.freeze
24
+
25
+ @@markdown = Redcarpet::Markdown.new(
26
+ Redcarpet::Render::XHTML.new(prettify: false),
27
+ tables: true, fenced_code_blocks: true, quote: true, highlight: true)
28
+
29
+ def h1_title body
30
+ heads = body.xpath('html:h1', XPATH_NS)
31
+
32
+ # set the title to the first and only h1 if it's the first element
33
+ if heads.length == 1 and not heads[0].previous_element
34
+ h1 = heads[0]
35
+
36
+ # assign document title to the header content
37
+ d = body.document
38
+ t = d.at_xpath('/html:html/html:head/html:title[1]', XPATH_NS)
39
+ t.content = h1.content
40
+
41
+ # unlink redundant h1 if it has no children (ie unformatted)
42
+ h1.unlink unless h1.at_xpath('*')
43
+
44
+ # only do h2 through h6
45
+ return true
46
+ end
47
+
48
+ # otherwise do nothing
49
+ false
50
+ end
51
+
52
+ def make_sections node, ranks=1..6
53
+ raise 'fail range' if ranks.nil? or ranks.size < 1
54
+
55
+ h = "html:h#{ranks.first}"
56
+
57
+ ranks = Range.new(ranks.first + 1, ranks.last)
58
+
59
+ headers = node.xpath(h, XPATH_NS).to_a
60
+ if headers.empty?
61
+ return make_sections node, ranks if ranks.size > 0
62
+ else
63
+ headers.each_index do |i|
64
+ hdr = headers[i]
65
+ xp = "following-sibling::node()[not(self::#{h})]"
66
+ if i < headers.length - 1
67
+ # note that this is always 1 because all preceding
68
+ # siblings prior to that will have been removed
69
+ xp += "[following-sibling::#{h}" +
70
+ "[count(preceding-sibling::#{h}) = #{1}]]"
71
+ end
72
+
73
+ # duplicate and nuke these elements
74
+ siblings = hdr.xpath(xp, XPATH_NS).to_a.map do |s|
75
+ # note that :unlink returns the node, but with a
76
+ # garbaged-up set of namespaces
77
+ o = s.dup; s.unlink; o
78
+ end
79
+
80
+ # add the header to the front of the list
81
+ siblings.unshift hdr.dup
82
+ siblings.unshift "\n"
83
+
84
+ # now construct the section
85
+ section = markup replace: hdr, spec: { nil => :section }
86
+ markup after: section, spec: "\n"
87
+ markup parent: section, spec: siblings
88
+
89
+ # now recurse
90
+ make_sections section, ranks
91
+ end
92
+ end
93
+ end
94
+
95
+ def bq_aside body
96
+ x = './/%s[not(parent::%s)][%s][count(*) = 1]' % (%w{html:blockquote} * 3)
97
+ body.xpath(x, XPATH_NS).each do |node|
98
+ fc = node.first_element_child
99
+ fc.name = 'aside'
100
+ fc[:role] = 'note'
101
+ node.replace fc
102
+ end
103
+ end
104
+
105
+ def img_figure body
106
+ body.xpath('.//html:p[html:img][count(*) = 1]', XPATH_NS).each do |node|
107
+ node.name = 'figure'
108
+ end
109
+ end
110
+
111
+ def prune_text body
112
+ doc = body.document
113
+ doc.xpath('//text()[not(ancestor::html:pre)]', XPATH_NS).each do |n|
114
+ n.content = n.content.gsub(/(?: |\t|\r|\n)+/, ' ')
115
+ # might as well fix the damn indentation while we're here
116
+ if n.content == ' '
117
+ a = n.ancestors.count - 1
118
+ if (p = n.previous_sibling)
119
+ # nth text node
120
+ if p.text? and p.content =~ /^\s+$/
121
+ n.unlink
122
+ else
123
+ a -= 1 unless n.next_sibling
124
+ n.content = "\n" + (' ' * a)
125
+ end
126
+ else
127
+ # first text node
128
+ n.content = "\n" + (' ' * a)
129
+ end
130
+ end
131
+ end
132
+ end
133
+
134
+ public
135
+
136
+ # Ingest a markdown file, with sensible defaults
137
+ #
138
+ # @param obj [String, IO] the content to be ingested
139
+ #
140
+ # @param uri [String, #to_s, nil] the document's base URI
141
+ #
142
+ # @return [Nokogiri::XML::Document]
143
+
144
+ def ingest obj, uri=nil
145
+ doc = obj.respond_to?(:read) ? obj.read : obj
146
+ doc = XHTML_BP % [uri ? uri.to_s : '', @@markdown.render(doc)]
147
+ doc = Nokogiri::XML.parse doc, uri
148
+
149
+ body = doc.at_xpath('/html:html/html:body[1]', XPATH_NS)
150
+
151
+ # default all six headers unless there's a lone h1
152
+ ranks = h1_title(body) ? 2..6 : 1..6
153
+
154
+ # markdown just makes a flat list of elements so let's plump it up
155
+ make_sections body, ranks
156
+
157
+ # redo double blockquotes as <aside role="note">
158
+ bq_aside body
159
+
160
+ # redo paragraphs containing only images as figures
161
+ img_figure body
162
+
163
+ # fix the damn text nodes
164
+ prune_text body
165
+
166
+ doc
167
+ end
168
+ end
@@ -0,0 +1,5 @@
1
+ module MD
2
+ class Noko
3
+ VERSION = "0.1.0"
4
+ end
5
+ end
@@ -0,0 +1,33 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'md/noko/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'md-noko'
8
+ spec.version = MD::Noko::VERSION
9
+ spec.authors = ['Dorian Taylor']
10
+ spec.email = ['code@doriantaylor.com']
11
+ spec.license = 'Apache-2.0'
12
+ spec.summary = %q{In goes Markdown, out pops Nokogiri.}
13
+ spec.description = <<-DESC
14
+ This is a simple module that encapsulates a set of desirable
15
+ manipulations to the (X)HTML output of Redcarpet, producing a
16
+ Nokogiri::XML::Document which is amenable to further manipulation.
17
+ DESC
18
+ spec.homepage = 'https://github.com/doriantaylor/rb-md-noko'
19
+
20
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
21
+ spec.bindir = 'exe'
22
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
23
+ spec.require_paths = ['lib']
24
+
25
+ spec.required_ruby_version = '~> 2.0'
26
+
27
+ spec.add_development_dependency "bundler", "~> 1.11"
28
+ spec.add_development_dependency "rake", "~> 10.0"
29
+ spec.add_development_dependency "rspec", "~> 3.0"
30
+
31
+ spec.add_runtime_dependency 'redcarpet', '~> 3.4.0'
32
+ spec.add_runtime_dependency 'xml-mixup', '~> 0.1.5'
33
+ end
metadata ADDED
@@ -0,0 +1,129 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: md-noko
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Dorian Taylor
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2018-05-16 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.11'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.11'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: redcarpet
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 3.4.0
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 3.4.0
69
+ - !ruby/object:Gem::Dependency
70
+ name: xml-mixup
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: 0.1.5
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: 0.1.5
83
+ description: |
84
+ This is a simple module that encapsulates a set of desirable
85
+ manipulations to the (X)HTML output of Redcarpet, producing a
86
+ Nokogiri::XML::Document which is amenable to further manipulation.
87
+ email:
88
+ - code@doriantaylor.com
89
+ executables: []
90
+ extensions: []
91
+ extra_rdoc_files: []
92
+ files:
93
+ - ".gitignore"
94
+ - ".rspec"
95
+ - ".travis.yml"
96
+ - Gemfile
97
+ - README.md
98
+ - Rakefile
99
+ - bin/console
100
+ - bin/setup
101
+ - lib/md-noko.rb
102
+ - lib/md/noko.rb
103
+ - lib/md/noko/version.rb
104
+ - md-noko.gemspec
105
+ homepage: https://github.com/doriantaylor/rb-md-noko
106
+ licenses:
107
+ - Apache-2.0
108
+ metadata: {}
109
+ post_install_message:
110
+ rdoc_options: []
111
+ require_paths:
112
+ - lib
113
+ required_ruby_version: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '2.0'
118
+ required_rubygems_version: !ruby/object:Gem::Requirement
119
+ requirements:
120
+ - - ">="
121
+ - !ruby/object:Gem::Version
122
+ version: '0'
123
+ requirements: []
124
+ rubyforge_project:
125
+ rubygems_version: 2.7.6
126
+ signing_key:
127
+ specification_version: 4
128
+ summary: In goes Markdown, out pops Nokogiri.
129
+ test_files: []