paru 0.2.4.3 → 0.2.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/do-pandoc.rb +5 -0
- data/lib/paru.rb +3 -0
- data/lib/paru/filter.rb +126 -18
- data/lib/paru/filter/document.rb +3 -0
- data/lib/paru/filter/link.rb +1 -1
- data/lib/paru/filter/meta_map.rb +10 -0
- data/lib/paru/filter/str.rb +5 -0
- data/lib/paru/pandoc.rb +57 -20
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 49f91b95228c398d4ed7e752c9780d4a56bdb128
|
4
|
+
data.tar.gz: 6096c105a736e686a02e2fc16f1d1985c55dfb05
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5cf262bca810cbae0f4dffae889de6cc4d53fcc466cc6ea3ad342484aef86a54878933cca5e120fac569b6938d92e5d1c14627f412546349b8bae898a282f77f
|
7
|
+
data.tar.gz: a79687986c7709c5489efdb59bca496669d3ae1dd7b31397e64a9b5092d1f3b55a673c41463e68e400814066e49483c1db74243775e802affc85a4ffbaea9154
|
data/bin/do-pandoc.rb
CHANGED
@@ -51,6 +51,11 @@ if metadata.has_key? "pandoc" then
|
|
51
51
|
pandoc = Paru::Pandoc.new
|
52
52
|
to_stdout = true
|
53
53
|
metadata["pandoc"].each do |option, value|
|
54
|
+
if value.is_a? String then
|
55
|
+
value = value.gsub '\\', ''
|
56
|
+
elsif value.is_a? Array then
|
57
|
+
value = value.map {|v| v.gsub '\\', '' if v.is_a? String}
|
58
|
+
end
|
54
59
|
pandoc.send option, value
|
55
60
|
to_stdout = false if option == "output"
|
56
61
|
end
|
data/lib/paru.rb
CHANGED
data/lib/paru/filter.rb
CHANGED
@@ -70,24 +70,132 @@ module Paru
|
|
70
70
|
|
71
71
|
|
72
72
|
# Filter is used to write your own pandoc filter in Ruby. A Filter is
|
73
|
-
# almost always created and immediately executed via the +run+ method
|
74
|
-
#
|
75
|
-
#
|
76
|
-
#
|
77
|
-
#
|
78
|
-
#
|
79
|
-
#
|
80
|
-
#
|
81
|
-
#
|
82
|
-
#
|
83
|
-
#
|
84
|
-
#
|
85
|
-
#
|
86
|
-
#
|
87
|
-
#
|
88
|
-
#
|
89
|
-
#
|
90
|
-
#
|
73
|
+
# almost always created and immediately executed via the +run+ method. The
|
74
|
+
# most simple filter you can write in paru is the so-called "identity":
|
75
|
+
#
|
76
|
+
# {include:file:examples/filters/identity.rb}
|
77
|
+
#
|
78
|
+
# It runs the filter, but it makes no selection nor performs an action.
|
79
|
+
# This is pretty useless, of course—although it makes for a great way to
|
80
|
+
# test the filter functionality—, but it shows the general setup of a
|
81
|
+
# filter well.
|
82
|
+
#
|
83
|
+
# = Writing a simple filter: numbering figures
|
84
|
+
#
|
85
|
+
# Inside a {Filter#run} block, you specify *selectors* with *actions*. For
|
86
|
+
# example, to number all figures in a document and prefix their captions
|
87
|
+
# with "Figure", the following filter would work:
|
88
|
+
#
|
89
|
+
# {include:file:examples/filters/number_figures.rb}
|
90
|
+
#
|
91
|
+
# This filter selects all {PandocFilter::Image} nodes. For each
|
92
|
+
# {PandocFilter::Image} node it increments the figure counter
|
93
|
+
# +figure_counter+ and then sets the figure's caption to "Figure" followed
|
94
|
+
# by the figure count and the original caption. In other words, the
|
95
|
+
# following input document
|
96
|
+
#
|
97
|
+
# 
|
98
|
+
#
|
99
|
+
# 
|
100
|
+
#
|
101
|
+
# will be transformed into
|
102
|
+
#
|
103
|
+
# 
|
104
|
+
#
|
105
|
+
# 
|
106
|
+
#
|
107
|
+
# The method {PandocFilter::Node#inner_markdown} and its counterpart
|
108
|
+
# {PandocFilter::Node#outer_markdown} are a great way to manipulate the
|
109
|
+
# contents of a selected {PandocFilter::Node}. No messing about creating
|
110
|
+
# and filling {PandocFilter::Node}s, you can just use pandoc's own
|
111
|
+
# markdown format!
|
112
|
+
#
|
113
|
+
# = Writing a more involved filters
|
114
|
+
#
|
115
|
+
# == Using the "follows" selector: Numbering figures and chapters
|
116
|
+
#
|
117
|
+
# The previous example can be extended to also number chapters and to
|
118
|
+
# start numbering figures anew per chapter. As you would expect, we need
|
119
|
+
# two counters, one for the figures and one for the chapters:
|
120
|
+
#
|
121
|
+
# {include:file:examples/filters/number_figures_per_chapter.rb}
|
122
|
+
#
|
123
|
+
# What is new in this filter, however, is the selector "Header + Image"
|
124
|
+
# which selects all {PandocFilter::Image} nodes that *follow* a
|
125
|
+
# {PandocFilter::Header} node. Documents in pandoc have a _flat_ structure
|
126
|
+
# where chapters do not exists as separate concepts. Instead, a chapter is
|
127
|
+
# implied by a header of a certain level and everything that follows until
|
128
|
+
# the next header of that level.
|
129
|
+
#
|
130
|
+
# == Using the "child of" selector: Annotate custom blocks
|
131
|
+
#
|
132
|
+
# Hierarchical structures do exist in a pandoc document, however. For
|
133
|
+
# example, the contents of a paragraph ({PandocFilter::Para}), which
|
134
|
+
# itself is a {PandocFilter::Block} level node, are {PandocFilter::Inline}
|
135
|
+
# level nodes. Another example are custom block or {PandocFilter::Div}
|
136
|
+
# nodes. You select a child node by using the +>+ selector as in the
|
137
|
+
# example below:
|
138
|
+
#
|
139
|
+
# {include:file:examples/filters/example.rb}
|
140
|
+
#
|
141
|
+
# Here all {PandocFilter::Header} nodes that are inside a
|
142
|
+
# {PandocFilter::Div} node are selected. Furthermore, if these headers are
|
143
|
+
# of level 3, they are prefixed by the string "Example" followed by a
|
144
|
+
# count.
|
145
|
+
#
|
146
|
+
# In this example, "important" {PandocFilter::Div} nodes are annotated by
|
147
|
+
# putting the string *important* before the contents of the node.
|
148
|
+
#
|
149
|
+
# == Using a distance in a selector: Capitalize the first N characters of
|
150
|
+
# a paragraph
|
151
|
+
#
|
152
|
+
# Given the flat structure of a pandoc document, the "follows" selector
|
153
|
+
# has quite a reach. For example, "Header + Para" selects all paragraphs
|
154
|
+
# that follow a header. In most well-structured documents, this would
|
155
|
+
# select basically all paragraphs.
|
156
|
+
#
|
157
|
+
# But what if you need to be more specific? For example, if you would like
|
158
|
+
# to capitalize the first sentence of each first paragraph of a chapter,
|
159
|
+
# you need a way to specify a sequence number of sorts. To that end, paru
|
160
|
+
# filter selectors take an optional *distance* parameter. A filter for
|
161
|
+
# this example could look like:
|
162
|
+
#
|
163
|
+
# {include:file:examples/filters/capitalize_first_sentence.rb}
|
164
|
+
#
|
165
|
+
# The distance is denoted after a selector by an integer. In this case
|
166
|
+
# "Header +1 Para" selects all {PandocFilter::Para} nodes that directly
|
167
|
+
# follow an {PandocFilter::Header} node. You can use a distance with any
|
168
|
+
# selector.
|
169
|
+
#
|
170
|
+
# == Manipulating nodes: Removing horizontal lines
|
171
|
+
#
|
172
|
+
# Although the {PandocFilter::Node#inner_markdown} and
|
173
|
+
# {PandocFilter::Node#outer_markdown} work in most situations, sometimes
|
174
|
+
# direct manipulation of the pandoc document AST is useful. These
|
175
|
+
# {PandocFilter::ASTManipulation} methods are mixed in
|
176
|
+
# {PandocFilter::Node} and can be used on any node in your filter. For
|
177
|
+
# example, to delete all {PandocFilter::HorizontalRule} nodes, can use a
|
178
|
+
# filter like:
|
179
|
+
#
|
180
|
+
# {include:file:examples/filters/delete_horizontal_rules.rb}
|
181
|
+
#
|
182
|
+
# Note that you could have arrived at the same effect by using:
|
183
|
+
#
|
184
|
+
# rule.outer_markdown = ""
|
185
|
+
#
|
186
|
+
#
|
187
|
+
#
|
188
|
+
# == Manipulating metadata:
|
189
|
+
#
|
190
|
+
# One of the interesting features of the pandoc markdown format is the
|
191
|
+
# ability to add metadata to a document via a YAML block or command line
|
192
|
+
# options. For example, if you use a template that uses the metadata
|
193
|
+
# property +$date$+ to write a date on a title page, it is quite useful to
|
194
|
+
# automatically add the date of _today_ to the metadata. You can do so
|
195
|
+
# with a filter like:
|
196
|
+
#
|
197
|
+
# {include:file:examples/filters/add_today.rb}
|
198
|
+
#
|
91
199
|
class Filter
|
92
200
|
|
93
201
|
# Run the filter specified by block. In the block you specify
|
data/lib/paru/filter/document.rb
CHANGED
@@ -25,8 +25,11 @@ module Paru
|
|
25
25
|
require_relative "./meta"
|
26
26
|
require_relative "./version"
|
27
27
|
|
28
|
+
# Pandoc type version key
|
28
29
|
VERSION = "pandoc-api-version"
|
30
|
+
# Pandoc type meta key
|
29
31
|
META = "meta"
|
32
|
+
# Pandoc type block key
|
30
33
|
BLOCKS = "blocks"
|
31
34
|
|
32
35
|
# The current pandoc type version
|
data/lib/paru/filter/link.rb
CHANGED
data/lib/paru/filter/meta_map.rb
CHANGED
@@ -50,6 +50,16 @@ module Paru
|
|
50
50
|
end
|
51
51
|
end
|
52
52
|
|
53
|
+
# Set a value with a key
|
54
|
+
#
|
55
|
+
# @param key [String] the key to set
|
56
|
+
# @param value
|
57
|
+
# [MetaBlocks|MetaBool|MetaInline|MetaList|MetaMap|MetaString|MetaValue]
|
58
|
+
# the value to set
|
59
|
+
def []=(key, value)
|
60
|
+
@children[key] = value
|
61
|
+
end
|
62
|
+
|
53
63
|
# Does this MetaMap node have key?
|
54
64
|
#
|
55
65
|
# @param key [String] the key to find
|
data/lib/paru/filter/str.rb
CHANGED
@@ -21,8 +21,13 @@ module Paru
|
|
21
21
|
require_relative "./inline"
|
22
22
|
|
23
23
|
# A Str node represents a string
|
24
|
+
#
|
25
|
+
# @!attribute string
|
26
|
+
# @return [String] the value of this Str node.
|
24
27
|
class Str < Inline
|
25
28
|
|
29
|
+
attr_accessor :string
|
30
|
+
|
26
31
|
# Create a new Str node based on the value
|
27
32
|
#
|
28
33
|
# @param value [String]
|
data/lib/paru/pandoc.rb
CHANGED
@@ -22,32 +22,69 @@ module Paru
|
|
22
22
|
require "yaml"
|
23
23
|
|
24
24
|
# Pandoc is a wrapper around the pandoc document converter. See
|
25
|
-
# <http://pandoc.org/README.html> for details about pandoc.
|
26
|
-
# basically a straightforward translation from the pandoc command
|
27
|
-
# program to
|
25
|
+
# <http://pandoc.org/README.html> for details about pandoc. The Pandoc
|
26
|
+
# class is basically a straightforward translation from the pandoc command
|
27
|
+
# line program to Ruby. It is a Rubyesque API to work with pandoc.
|
28
|
+
#
|
29
|
+
# For information about writing pandoc filters in Ruby see {Filter}.
|
30
|
+
#
|
31
|
+
# Creating a Paru pandoc converter in Ruby is quite straightforward: you
|
32
|
+
# create a new Paru::Pandoc object with a block that configures that
|
33
|
+
# Pandoc object with pandoc options. Each command-line option to pandoc is
|
34
|
+
# a method on the Pandoc object. Command-line options with dashes in them,
|
35
|
+
# such as "--reference-docx", can be called by replacing the dash with an
|
36
|
+
# underscore. So, "--reference-docx" becomes the method +reference_docx+.
|
37
|
+
#
|
38
|
+
# Pandoc command-line flags, such as "--parse-raw", "--chapters", or
|
39
|
+
# "--toc", have been translated to Paru::Pandoc methods that take an
|
40
|
+
# optional Boolean parameter; +true+ is the default value. Therefore, if
|
41
|
+
# you want to enable a flag, no parameter is needed.
|
42
|
+
#
|
43
|
+
# All other pandoc command-line options are translated to Paru::Pandoc
|
44
|
+
# methods that take either one String or Number argument, or a list of
|
45
|
+
# String arguments if that command-line option can occur more than once
|
46
|
+
# (such as "--include-before-header" or "--filter").
|
47
|
+
#
|
48
|
+
# Once you have configured a Paru::Pandoc converter, you can call
|
49
|
+
# +convert+ or +<<+ (which is an alias for +convert+) with a string to
|
50
|
+
# convert. You can call +convert+ as often as you like and, if you like,
|
51
|
+
# reconfigure the converter in between!
|
28
52
|
#
|
29
|
-
# @example Convert the markdown string 'hello *world*' to HTML
|
30
|
-
# converter = Paru::Pandoc.new
|
31
|
-
# converter.configure do
|
32
|
-
# from "markdown"
|
33
|
-
# to "html"
|
34
|
-
# end
|
35
|
-
# converter.convert 'hello *world*'
|
36
53
|
#
|
37
|
-
# @example Convert markdown
|
54
|
+
# @example Convert the markdown string 'hello *world*' to HTML
|
38
55
|
# Paru::Pandoc.new do
|
39
|
-
# from markdown
|
40
|
-
# to html
|
56
|
+
# from 'markdown
|
57
|
+
# to 'html'
|
41
58
|
# end << 'hello *world*'
|
42
59
|
#
|
60
|
+
# @example Convert a HTML file to DOCX with a reference file
|
61
|
+
# Paru::Pandoc.new do
|
62
|
+
# from "html"
|
63
|
+
# to "docx"
|
64
|
+
# reference_docx "styled_output.docx"
|
65
|
+
# output "output.docx"
|
66
|
+
# end.convert File.read("input.html")
|
67
|
+
#
|
68
|
+
# @example Convert a markdown file to html but add in references in APA style
|
69
|
+
# Paru::Pandoc.new do
|
70
|
+
# from "markdown"
|
71
|
+
# toc
|
72
|
+
# bibliography "literature.bib"
|
73
|
+
# to "html"
|
74
|
+
# csl "apa.csl"
|
75
|
+
# output "report_with_references.md"
|
76
|
+
# end << File.read("report.md")
|
77
|
+
#
|
43
78
|
#
|
44
79
|
class Pandoc
|
45
80
|
|
46
|
-
# Gather information about pandoc. It runs
|
47
|
-
# pandoc's version number and default data
|
81
|
+
# Gather information about the pandoc installation. It runs +pandoc
|
82
|
+
# --version+ and extracts pandoc's version number and default data
|
83
|
+
# directory. This method is typically used in scripts that use Paru to
|
84
|
+
# automate the use of pandoc.
|
48
85
|
#
|
49
|
-
# @return [Hash
|
50
|
-
# pandoc
|
86
|
+
# @return [Hash{:version => String, :data_dir => String}] Pandoc's
|
87
|
+
# version, such as "1.17.0.4" and the data directory, such as "/home/huub/.pandoc".
|
51
88
|
def self.info()
|
52
89
|
output = ''
|
53
90
|
IO.popen('pandoc --version', 'r+') do |p|
|
@@ -63,10 +100,10 @@ module Paru
|
|
63
100
|
}
|
64
101
|
end
|
65
102
|
|
66
|
-
# Create a new Pandoc converter, optionally configured by block
|
103
|
+
# Create a new Pandoc converter, optionally configured by a block with
|
104
|
+
# pandoc options. See {#configure} on how to configure a converter.
|
67
105
|
#
|
68
|
-
# @param block [Proc] an optional configuration block.
|
69
|
-
# for how to configure a Pandoc converter
|
106
|
+
# @param block [Proc] an optional configuration block.
|
70
107
|
def initialize(&block)
|
71
108
|
@options = {}
|
72
109
|
configure(&block) if block_given?
|