md-noko 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +13 -0
- data/.rspec +3 -0
- data/.travis.yml +5 -0
- data/Gemfile +6 -0
- data/README.md +73 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/md-noko.rb +1 -0
- data/lib/md/noko.rb +168 -0
- data/lib/md/noko/version.rb +5 -0
- data/md-noko.gemspec +33 -0
- metadata +129 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 9fde11dc2fda8402592b512221cf66545b1d17a9fcb79183ae4cc299449685d8
|
4
|
+
data.tar.gz: ea5ac14c87fde2826ad792007a2f38df8be6c4890b93a3915700fefb94cf6efe
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 80d7b6bbf6cf730a42780e8dcb2740f8e98b753303e736103734b38d7add4fa2d9cc4cb6911c7480e1b066cf279df6639b85469667d534c424c6ae9f9010c1d4
|
7
|
+
data.tar.gz: dad35e41c0a72f8af134896444f277bef50db97af43bfdacfa9484b9bf21753e2bb5578aeeff673387d1d585b72d17bb5bf7032003cbbb62a961eb61dde9241b
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
# MD::Noko: in goes Markdown, out comes Nokogiri
|
2
|
+
|
3
|
+
## Synopsis
|
4
|
+
|
5
|
+
```ruby
|
6
|
+
require 'md-noko'
|
7
|
+
|
8
|
+
mdnk = MD::Noko.new
|
9
|
+
doc = mdnk.ingest File.open('lolwut.md')
|
10
|
+
|
11
|
+
# or
|
12
|
+
|
13
|
+
doc = mdnk.ingest <<EOT
|
14
|
+
# Hi
|
15
|
+
|
16
|
+
Markdown here!
|
17
|
+
|
18
|
+

|
19
|
+
|
20
|
+
EOT
|
21
|
+
|
22
|
+
# doc is a Nokogiri::XML::Document
|
23
|
+
```
|
24
|
+
|
25
|
+
## Description
|
26
|
+
|
27
|
+
This is a simple module that encapsulates a set of desirable
|
28
|
+
manipulations to the (X)HTML output
|
29
|
+
of [Redcarpet](https://www.rubydoc.info/gems/redcarpet/). It exposes
|
30
|
+
(for now) a single method, `ingest`, which returns a [`Nokogiri::XML::Document`](https://www.rubydoc.info/gems/nokogiri/Nokogiri/XML/Document), for further manipulation. In particular, this module:
|
31
|
+
|
32
|
+
* Adds HTML preamble to produce a valid document,
|
33
|
+
* Creates a `<base href=""/>` element which you can pass a URL,
|
34
|
+
* Creates a hierarchy of `<section>` elements and places headings
|
35
|
+
and content inside,
|
36
|
+
* If the document contains exactly one `<h1>` which is the very first
|
37
|
+
thing in the file, it is copied into the `<title>`, and removed from
|
38
|
+
the document body if determined to be redundant (i.e. unless it
|
39
|
+
contains markup elements and not strictly text),
|
40
|
+
* A `<blockquote>` element containing exactly one nested
|
41
|
+
`<blockquote>` element is converted into a single `<aside role="note">`,
|
42
|
+
* Images on their own paragraph are transformed into a `<figure>`,
|
43
|
+
* Text nodes not descendants of `<pre>` are whitespace-normalized and
|
44
|
+
indentation is repaired.
|
45
|
+
|
46
|
+
The embedded `Redcarpet::Markdown` instance has the following flags set:
|
47
|
+
|
48
|
+
* `:tables`
|
49
|
+
* `:fenced_code_blocks`
|
50
|
+
* `:quote`
|
51
|
+
* `:highlight`
|
52
|
+
|
53
|
+
These are currently not exposed.
|
54
|
+
|
55
|
+
## Installation
|
56
|
+
|
57
|
+
The usual:
|
58
|
+
|
59
|
+
$ gem install md-noko
|
60
|
+
|
61
|
+
Or, [download it off rubygems.org](https://rubygems.org/gems/md-noko).
|
62
|
+
|
63
|
+
## Contributing
|
64
|
+
|
65
|
+
Bug reports and pull requests are welcome at
|
66
|
+
[the GitHub repository](https://github.com/doriantaylor/rb-md-noko).
|
67
|
+
|
68
|
+
## Copyright & License
|
69
|
+
|
70
|
+
©2018 [Dorian Taylor](https://doriantaylor.com/)
|
71
|
+
|
72
|
+
This software is provided under
|
73
|
+
the [Apache License, 2.0](https://www.apache.org/licenses/LICENSE-2.0).
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "md/noko"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/lib/md-noko.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'md/noko'
|
data/lib/md/noko.rb
ADDED
@@ -0,0 +1,168 @@
|
|
1
|
+
require 'md/noko/version'
|
2
|
+
require 'redcarpet'
|
3
|
+
require 'xml-mixup'
|
4
|
+
|
5
|
+
class MD::Noko
|
6
|
+
include XML::Mixup
|
7
|
+
|
8
|
+
private
|
9
|
+
|
10
|
+
XHTML_BP = <<-BP
|
11
|
+
<!DOCTYPE html>
|
12
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
13
|
+
<head>
|
14
|
+
<title/>
|
15
|
+
<base href="%s"/>
|
16
|
+
</head>
|
17
|
+
<body>
|
18
|
+
%s
|
19
|
+
</body>
|
20
|
+
</html>
|
21
|
+
BP
|
22
|
+
|
23
|
+
XPATH_NS = { html: 'http://www.w3.org/1999/xhtml' }.freeze
|
24
|
+
|
25
|
+
@@markdown = Redcarpet::Markdown.new(
|
26
|
+
Redcarpet::Render::XHTML.new(prettify: false),
|
27
|
+
tables: true, fenced_code_blocks: true, quote: true, highlight: true)
|
28
|
+
|
29
|
+
def h1_title body
|
30
|
+
heads = body.xpath('html:h1', XPATH_NS)
|
31
|
+
|
32
|
+
# set the title to the first and only h1 if it's the first element
|
33
|
+
if heads.length == 1 and not heads[0].previous_element
|
34
|
+
h1 = heads[0]
|
35
|
+
|
36
|
+
# assign document title to the header content
|
37
|
+
d = body.document
|
38
|
+
t = d.at_xpath('/html:html/html:head/html:title[1]', XPATH_NS)
|
39
|
+
t.content = h1.content
|
40
|
+
|
41
|
+
# unlink redundant h1 if it has no children (ie unformatted)
|
42
|
+
h1.unlink unless h1.at_xpath('*')
|
43
|
+
|
44
|
+
# only do h2 through h6
|
45
|
+
return true
|
46
|
+
end
|
47
|
+
|
48
|
+
# otherwise do nothing
|
49
|
+
false
|
50
|
+
end
|
51
|
+
|
52
|
+
def make_sections node, ranks=1..6
|
53
|
+
raise 'fail range' if ranks.nil? or ranks.size < 1
|
54
|
+
|
55
|
+
h = "html:h#{ranks.first}"
|
56
|
+
|
57
|
+
ranks = Range.new(ranks.first + 1, ranks.last)
|
58
|
+
|
59
|
+
headers = node.xpath(h, XPATH_NS).to_a
|
60
|
+
if headers.empty?
|
61
|
+
return make_sections node, ranks if ranks.size > 0
|
62
|
+
else
|
63
|
+
headers.each_index do |i|
|
64
|
+
hdr = headers[i]
|
65
|
+
xp = "following-sibling::node()[not(self::#{h})]"
|
66
|
+
if i < headers.length - 1
|
67
|
+
# note that this is always 1 because all preceding
|
68
|
+
# siblings prior to that will have been removed
|
69
|
+
xp += "[following-sibling::#{h}" +
|
70
|
+
"[count(preceding-sibling::#{h}) = #{1}]]"
|
71
|
+
end
|
72
|
+
|
73
|
+
# duplicate and nuke these elements
|
74
|
+
siblings = hdr.xpath(xp, XPATH_NS).to_a.map do |s|
|
75
|
+
# note that :unlink returns the node, but with a
|
76
|
+
# garbaged-up set of namespaces
|
77
|
+
o = s.dup; s.unlink; o
|
78
|
+
end
|
79
|
+
|
80
|
+
# add the header to the front of the list
|
81
|
+
siblings.unshift hdr.dup
|
82
|
+
siblings.unshift "\n"
|
83
|
+
|
84
|
+
# now construct the section
|
85
|
+
section = markup replace: hdr, spec: { nil => :section }
|
86
|
+
markup after: section, spec: "\n"
|
87
|
+
markup parent: section, spec: siblings
|
88
|
+
|
89
|
+
# now recurse
|
90
|
+
make_sections section, ranks
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def bq_aside body
|
96
|
+
x = './/%s[not(parent::%s)][%s][count(*) = 1]' % (%w{html:blockquote} * 3)
|
97
|
+
body.xpath(x, XPATH_NS).each do |node|
|
98
|
+
fc = node.first_element_child
|
99
|
+
fc.name = 'aside'
|
100
|
+
fc[:role] = 'note'
|
101
|
+
node.replace fc
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def img_figure body
|
106
|
+
body.xpath('.//html:p[html:img][count(*) = 1]', XPATH_NS).each do |node|
|
107
|
+
node.name = 'figure'
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
def prune_text body
|
112
|
+
doc = body.document
|
113
|
+
doc.xpath('//text()[not(ancestor::html:pre)]', XPATH_NS).each do |n|
|
114
|
+
n.content = n.content.gsub(/(?: |\t|\r|\n)+/, ' ')
|
115
|
+
# might as well fix the damn indentation while we're here
|
116
|
+
if n.content == ' '
|
117
|
+
a = n.ancestors.count - 1
|
118
|
+
if (p = n.previous_sibling)
|
119
|
+
# nth text node
|
120
|
+
if p.text? and p.content =~ /^\s+$/
|
121
|
+
n.unlink
|
122
|
+
else
|
123
|
+
a -= 1 unless n.next_sibling
|
124
|
+
n.content = "\n" + (' ' * a)
|
125
|
+
end
|
126
|
+
else
|
127
|
+
# first text node
|
128
|
+
n.content = "\n" + (' ' * a)
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
public
|
135
|
+
|
136
|
+
# Ingest a markdown file, with sensible defaults
|
137
|
+
#
|
138
|
+
# @param obj [String, IO] the content to be ingested
|
139
|
+
#
|
140
|
+
# @param uri [String, #to_s, nil] the document's base URI
|
141
|
+
#
|
142
|
+
# @return [Nokogiri::XML::Document]
|
143
|
+
|
144
|
+
def ingest obj, uri=nil
|
145
|
+
doc = obj.respond_to?(:read) ? obj.read : obj
|
146
|
+
doc = XHTML_BP % [uri ? uri.to_s : '', @@markdown.render(doc)]
|
147
|
+
doc = Nokogiri::XML.parse doc, uri
|
148
|
+
|
149
|
+
body = doc.at_xpath('/html:html/html:body[1]', XPATH_NS)
|
150
|
+
|
151
|
+
# default all six headers unless there's a lone h1
|
152
|
+
ranks = h1_title(body) ? 2..6 : 1..6
|
153
|
+
|
154
|
+
# markdown just makes a flat list of elements so let's plump it up
|
155
|
+
make_sections body, ranks
|
156
|
+
|
157
|
+
# redo double blockquotes as <aside role="note">
|
158
|
+
bq_aside body
|
159
|
+
|
160
|
+
# redo paragraphs containing only images as figures
|
161
|
+
img_figure body
|
162
|
+
|
163
|
+
# fix the damn text nodes
|
164
|
+
prune_text body
|
165
|
+
|
166
|
+
doc
|
167
|
+
end
|
168
|
+
end
|
data/md-noko.gemspec
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'md/noko/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = 'md-noko'
|
8
|
+
spec.version = MD::Noko::VERSION
|
9
|
+
spec.authors = ['Dorian Taylor']
|
10
|
+
spec.email = ['code@doriantaylor.com']
|
11
|
+
spec.license = 'Apache-2.0'
|
12
|
+
spec.summary = %q{In goes Markdown, out pops Nokogiri.}
|
13
|
+
spec.description = <<-DESC
|
14
|
+
This is a simple module that encapsulates a set of desirable
|
15
|
+
manipulations to the (X)HTML output of Redcarpet, producing a
|
16
|
+
Nokogiri::XML::Document which is amenable to further manipulation.
|
17
|
+
DESC
|
18
|
+
spec.homepage = 'https://github.com/doriantaylor/rb-md-noko'
|
19
|
+
|
20
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
21
|
+
spec.bindir = 'exe'
|
22
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
23
|
+
spec.require_paths = ['lib']
|
24
|
+
|
25
|
+
spec.required_ruby_version = '~> 2.0'
|
26
|
+
|
27
|
+
spec.add_development_dependency "bundler", "~> 1.11"
|
28
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
29
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
30
|
+
|
31
|
+
spec.add_runtime_dependency 'redcarpet', '~> 3.4.0'
|
32
|
+
spec.add_runtime_dependency 'xml-mixup', '~> 0.1.5'
|
33
|
+
end
|
metadata
ADDED
@@ -0,0 +1,129 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: md-noko
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Dorian Taylor
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2018-05-16 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.11'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.11'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '3.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: redcarpet
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 3.4.0
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 3.4.0
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: xml-mixup
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 0.1.5
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 0.1.5
|
83
|
+
description: |
|
84
|
+
This is a simple module that encapsulates a set of desirable
|
85
|
+
manipulations to the (X)HTML output of Redcarpet, producing a
|
86
|
+
Nokogiri::XML::Document which is amenable to further manipulation.
|
87
|
+
email:
|
88
|
+
- code@doriantaylor.com
|
89
|
+
executables: []
|
90
|
+
extensions: []
|
91
|
+
extra_rdoc_files: []
|
92
|
+
files:
|
93
|
+
- ".gitignore"
|
94
|
+
- ".rspec"
|
95
|
+
- ".travis.yml"
|
96
|
+
- Gemfile
|
97
|
+
- README.md
|
98
|
+
- Rakefile
|
99
|
+
- bin/console
|
100
|
+
- bin/setup
|
101
|
+
- lib/md-noko.rb
|
102
|
+
- lib/md/noko.rb
|
103
|
+
- lib/md/noko/version.rb
|
104
|
+
- md-noko.gemspec
|
105
|
+
homepage: https://github.com/doriantaylor/rb-md-noko
|
106
|
+
licenses:
|
107
|
+
- Apache-2.0
|
108
|
+
metadata: {}
|
109
|
+
post_install_message:
|
110
|
+
rdoc_options: []
|
111
|
+
require_paths:
|
112
|
+
- lib
|
113
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '2.0'
|
118
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
119
|
+
requirements:
|
120
|
+
- - ">="
|
121
|
+
- !ruby/object:Gem::Version
|
122
|
+
version: '0'
|
123
|
+
requirements: []
|
124
|
+
rubyforge_project:
|
125
|
+
rubygems_version: 2.7.6
|
126
|
+
signing_key:
|
127
|
+
specification_version: 4
|
128
|
+
summary: In goes Markdown, out pops Nokogiri.
|
129
|
+
test_files: []
|