md-noko 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 9fde11dc2fda8402592b512221cf66545b1d17a9fcb79183ae4cc299449685d8
4
+ data.tar.gz: ea5ac14c87fde2826ad792007a2f38df8be6c4890b93a3915700fefb94cf6efe
5
+ SHA512:
6
+ metadata.gz: 80d7b6bbf6cf730a42780e8dcb2740f8e98b753303e736103734b38d7add4fa2d9cc4cb6911c7480e1b066cf279df6639b85469667d534c424c6ae9f9010c1d4
7
+ data.tar.gz: dad35e41c0a72f8af134896444f277bef50db97af43bfdacfa9484b9bf21753e2bb5578aeeff673387d1d585b72d17bb5bf7032003cbbb62a961eb61dde9241b
@@ -0,0 +1,13 @@
1
+ syntax: glob
2
+ /.bundle/
3
+ /.yardoc
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ Gemfile.lock
11
+
12
+ # rspec failure tracking
13
+ .rspec_status
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
@@ -0,0 +1,5 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.5.1
5
+ before_install: gem install bundler -v 1.16.1
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "https://rubygems.org"
2
+
3
+ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
4
+
5
+ # Specify your gem's dependencies in md-noko.gemspec
6
+ gemspec
@@ -0,0 +1,73 @@
1
+ # MD::Noko: in goes Markdown, out comes Nokogiri
2
+
3
+ ## Synopsis
4
+
5
+ ```ruby
6
+ require 'md-noko'
7
+
8
+ mdnk = MD::Noko.new
9
+ doc = mdnk.ingest File.open('lolwut.md')
10
+
11
+ # or
12
+
13
+ doc = mdnk.ingest <<EOT
14
+ # Hi
15
+
16
+ Markdown here!
17
+
18
+ ![lulz](meme.jpeg)
19
+
20
+ EOT
21
+
22
+ # doc is a Nokogiri::XML::Document
23
+ ```
24
+
25
+ ## Description
26
+
27
+ This is a simple module that encapsulates a set of desirable
28
+ manipulations to the (X)HTML output
29
+ of [Redcarpet](https://www.rubydoc.info/gems/redcarpet/). It exposes
30
+ (for now) a single method, `ingest`, which returns a [`Nokogiri::XML::Document`](https://www.rubydoc.info/gems/nokogiri/Nokogiri/XML/Document), for further manipulation. In particular, this module:
31
+
32
+ * Adds HTML preamble to produce a valid document,
33
+ * Creates a `<base href=""/>` element which you can pass a URL,
34
+ * Creates a hierarchy of `<section>` elements and places headings
35
+ and content inside,
36
+ * If the document contains exactly one `<h1>` which is the very first
37
+ thing in the file, it is copied into the `<title>`, and removed from
38
+ the document body if determined to be redundant (i.e. unless it
39
+ contains markup elements and not strictly text),
40
+ * A `<blockquote>` element containing exactly one nested
41
+ `<blockquote>` element is converted into a single `<aside role="note">`,
42
+ * Images on their own paragraph are transformed into a `<figure>`,
43
+ * Text nodes not descendants of `<pre>` are whitespace-normalized and
44
+ indentation is repaired.
45
+
46
+ The embedded `Redcarpet::Markdown` instance has the following flags set:
47
+
48
+ * `:tables`
49
+ * `:fenced_code_blocks`
50
+ * `:quote`
51
+ * `:highlight`
52
+
53
+ These are currently not exposed.
54
+
55
+ ## Installation
56
+
57
+ The usual:
58
+
59
+ $ gem install md-noko
60
+
61
+ Or, [download it off rubygems.org](https://rubygems.org/gems/md-noko).
62
+
63
+ ## Contributing
64
+
65
+ Bug reports and pull requests are welcome at
66
+ [the GitHub repository](https://github.com/doriantaylor/rb-md-noko).
67
+
68
+ ## Copyright & License
69
+
70
+ ©2018 [Dorian Taylor](https://doriantaylor.com/)
71
+
72
+ This software is provided under
73
+ the [Apache License, 2.0](https://www.apache.org/licenses/LICENSE-2.0).
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "md/noko"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1 @@
1
+ require 'md/noko'
@@ -0,0 +1,168 @@
1
+ require 'md/noko/version'
2
+ require 'redcarpet'
3
+ require 'xml-mixup'
4
+
5
+ class MD::Noko
6
+ include XML::Mixup
7
+
8
+ private
9
+
10
+ XHTML_BP = <<-BP
11
+ <!DOCTYPE html>
12
+ <html xmlns="http://www.w3.org/1999/xhtml">
13
+ <head>
14
+ <title/>
15
+ <base href="%s"/>
16
+ </head>
17
+ <body>
18
+ %s
19
+ </body>
20
+ </html>
21
+ BP
22
+
23
+ XPATH_NS = { html: 'http://www.w3.org/1999/xhtml' }.freeze
24
+
25
+ @@markdown = Redcarpet::Markdown.new(
26
+ Redcarpet::Render::XHTML.new(prettify: false),
27
+ tables: true, fenced_code_blocks: true, quote: true, highlight: true)
28
+
29
+ def h1_title body
30
+ heads = body.xpath('html:h1', XPATH_NS)
31
+
32
+ # set the title to the first and only h1 if it's the first element
33
+ if heads.length == 1 and not heads[0].previous_element
34
+ h1 = heads[0]
35
+
36
+ # assign document title to the header content
37
+ d = body.document
38
+ t = d.at_xpath('/html:html/html:head/html:title[1]', XPATH_NS)
39
+ t.content = h1.content
40
+
41
+ # unlink redundant h1 if it has no children (ie unformatted)
42
+ h1.unlink unless h1.at_xpath('*')
43
+
44
+ # only do h2 through h6
45
+ return true
46
+ end
47
+
48
+ # otherwise do nothing
49
+ false
50
+ end
51
+
52
+ def make_sections node, ranks=1..6
53
+ raise 'fail range' if ranks.nil? or ranks.size < 1
54
+
55
+ h = "html:h#{ranks.first}"
56
+
57
+ ranks = Range.new(ranks.first + 1, ranks.last)
58
+
59
+ headers = node.xpath(h, XPATH_NS).to_a
60
+ if headers.empty?
61
+ return make_sections node, ranks if ranks.size > 0
62
+ else
63
+ headers.each_index do |i|
64
+ hdr = headers[i]
65
+ xp = "following-sibling::node()[not(self::#{h})]"
66
+ if i < headers.length - 1
67
+ # note that this is always 1 because all preceding
68
+ # siblings prior to that will have been removed
69
+ xp += "[following-sibling::#{h}" +
70
+ "[count(preceding-sibling::#{h}) = #{1}]]"
71
+ end
72
+
73
+ # duplicate and nuke these elements
74
+ siblings = hdr.xpath(xp, XPATH_NS).to_a.map do |s|
75
+ # note that :unlink returns the node, but with a
76
+ # garbaged-up set of namespaces
77
+ o = s.dup; s.unlink; o
78
+ end
79
+
80
+ # add the header to the front of the list
81
+ siblings.unshift hdr.dup
82
+ siblings.unshift "\n"
83
+
84
+ # now construct the section
85
+ section = markup replace: hdr, spec: { nil => :section }
86
+ markup after: section, spec: "\n"
87
+ markup parent: section, spec: siblings
88
+
89
+ # now recurse
90
+ make_sections section, ranks
91
+ end
92
+ end
93
+ end
94
+
95
+ def bq_aside body
96
+ x = './/%s[not(parent::%s)][%s][count(*) = 1]' % (%w{html:blockquote} * 3)
97
+ body.xpath(x, XPATH_NS).each do |node|
98
+ fc = node.first_element_child
99
+ fc.name = 'aside'
100
+ fc[:role] = 'note'
101
+ node.replace fc
102
+ end
103
+ end
104
+
105
+ def img_figure body
106
+ body.xpath('.//html:p[html:img][count(*) = 1]', XPATH_NS).each do |node|
107
+ node.name = 'figure'
108
+ end
109
+ end
110
+
111
+ def prune_text body
112
+ doc = body.document
113
+ doc.xpath('//text()[not(ancestor::html:pre)]', XPATH_NS).each do |n|
114
+ n.content = n.content.gsub(/(?: |\t|\r|\n)+/, ' ')
115
+ # might as well fix the damn indentation while we're here
116
+ if n.content == ' '
117
+ a = n.ancestors.count - 1
118
+ if (p = n.previous_sibling)
119
+ # nth text node
120
+ if p.text? and p.content =~ /^\s+$/
121
+ n.unlink
122
+ else
123
+ a -= 1 unless n.next_sibling
124
+ n.content = "\n" + (' ' * a)
125
+ end
126
+ else
127
+ # first text node
128
+ n.content = "\n" + (' ' * a)
129
+ end
130
+ end
131
+ end
132
+ end
133
+
134
+ public
135
+
136
+ # Ingest a markdown file, with sensible defaults
137
+ #
138
+ # @param obj [String, IO] the content to be ingested
139
+ #
140
+ # @param uri [String, #to_s, nil] the document's base URI
141
+ #
142
+ # @return [Nokogiri::XML::Document]
143
+
144
+ def ingest obj, uri=nil
145
+ doc = obj.respond_to?(:read) ? obj.read : obj
146
+ doc = XHTML_BP % [uri ? uri.to_s : '', @@markdown.render(doc)]
147
+ doc = Nokogiri::XML.parse doc, uri
148
+
149
+ body = doc.at_xpath('/html:html/html:body[1]', XPATH_NS)
150
+
151
+ # default all six headers unless there's a lone h1
152
+ ranks = h1_title(body) ? 2..6 : 1..6
153
+
154
+ # markdown just makes a flat list of elements so let's plump it up
155
+ make_sections body, ranks
156
+
157
+ # redo double blockquotes as <aside role="note">
158
+ bq_aside body
159
+
160
+ # redo paragraphs containing only images as figures
161
+ img_figure body
162
+
163
+ # fix the damn text nodes
164
+ prune_text body
165
+
166
+ doc
167
+ end
168
+ end
@@ -0,0 +1,5 @@
1
+ module MD
2
+ class Noko
3
+ VERSION = "0.1.0"
4
+ end
5
+ end
@@ -0,0 +1,33 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'md/noko/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'md-noko'
8
+ spec.version = MD::Noko::VERSION
9
+ spec.authors = ['Dorian Taylor']
10
+ spec.email = ['code@doriantaylor.com']
11
+ spec.license = 'Apache-2.0'
12
+ spec.summary = %q{In goes Markdown, out pops Nokogiri.}
13
+ spec.description = <<-DESC
14
+ This is a simple module that encapsulates a set of desirable
15
+ manipulations to the (X)HTML output of Redcarpet, producing a
16
+ Nokogiri::XML::Document which is amenable to further manipulation.
17
+ DESC
18
+ spec.homepage = 'https://github.com/doriantaylor/rb-md-noko'
19
+
20
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
21
+ spec.bindir = 'exe'
22
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
23
+ spec.require_paths = ['lib']
24
+
25
+ spec.required_ruby_version = '~> 2.0'
26
+
27
+ spec.add_development_dependency "bundler", "~> 1.11"
28
+ spec.add_development_dependency "rake", "~> 10.0"
29
+ spec.add_development_dependency "rspec", "~> 3.0"
30
+
31
+ spec.add_runtime_dependency 'redcarpet', '~> 3.4.0'
32
+ spec.add_runtime_dependency 'xml-mixup', '~> 0.1.5'
33
+ end
metadata ADDED
@@ -0,0 +1,129 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: md-noko
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Dorian Taylor
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2018-05-16 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.11'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.11'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: redcarpet
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 3.4.0
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 3.4.0
69
+ - !ruby/object:Gem::Dependency
70
+ name: xml-mixup
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: 0.1.5
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: 0.1.5
83
+ description: |
84
+ This is a simple module that encapsulates a set of desirable
85
+ manipulations to the (X)HTML output of Redcarpet, producing a
86
+ Nokogiri::XML::Document which is amenable to further manipulation.
87
+ email:
88
+ - code@doriantaylor.com
89
+ executables: []
90
+ extensions: []
91
+ extra_rdoc_files: []
92
+ files:
93
+ - ".gitignore"
94
+ - ".rspec"
95
+ - ".travis.yml"
96
+ - Gemfile
97
+ - README.md
98
+ - Rakefile
99
+ - bin/console
100
+ - bin/setup
101
+ - lib/md-noko.rb
102
+ - lib/md/noko.rb
103
+ - lib/md/noko/version.rb
104
+ - md-noko.gemspec
105
+ homepage: https://github.com/doriantaylor/rb-md-noko
106
+ licenses:
107
+ - Apache-2.0
108
+ metadata: {}
109
+ post_install_message:
110
+ rdoc_options: []
111
+ require_paths:
112
+ - lib
113
+ required_ruby_version: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '2.0'
118
+ required_rubygems_version: !ruby/object:Gem::Requirement
119
+ requirements:
120
+ - - ">="
121
+ - !ruby/object:Gem::Version
122
+ version: '0'
123
+ requirements: []
124
+ rubyforge_project:
125
+ rubygems_version: 2.7.6
126
+ signing_key:
127
+ specification_version: 4
128
+ summary: In goes Markdown, out pops Nokogiri.
129
+ test_files: []