paru 1.5.0 → 1.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/paru/error.rb +6 -4
- data/lib/paru/filter/ast_manipulation.rb +90 -91
- data/lib/paru/filter/attr.rb +75 -69
- data/lib/paru/filter/block.rb +15 -14
- data/lib/paru/filter/block_quote.rb +14 -12
- data/lib/paru/filter/bullet_list.rb +17 -16
- data/lib/paru/filter/caption.rb +50 -48
- data/lib/paru/filter/cell.rb +52 -50
- data/lib/paru/filter/citation.rb +53 -51
- data/lib/paru/filter/cite.rb +34 -33
- data/lib/paru/filter/code.rb +51 -49
- data/lib/paru/filter/code_block.rb +76 -76
- data/lib/paru/filter/col_spec.rb +58 -56
- data/lib/paru/filter/definition_list.rb +51 -52
- data/lib/paru/filter/definition_list_item.rb +45 -43
- data/lib/paru/filter/div.rb +37 -35
- data/lib/paru/filter/document.rb +112 -115
- data/lib/paru/filter/emph.rb +7 -5
- data/lib/paru/filter/empty_block.rb +17 -16
- data/lib/paru/filter/empty_inline.rb +23 -22
- data/lib/paru/filter/figure.rb +41 -39
- data/lib/paru/filter/header.rb +41 -39
- data/lib/paru/filter/horizontal_rule.rb +7 -5
- data/lib/paru/filter/image.rb +13 -12
- data/lib/paru/filter/inline.rb +27 -26
- data/lib/paru/filter/inner_markdown.rb +60 -62
- data/lib/paru/filter/int_value.rb +19 -18
- data/lib/paru/filter/line_block.rb +13 -11
- data/lib/paru/filter/line_break.rb +7 -5
- data/lib/paru/filter/link.rb +34 -33
- data/lib/paru/filter/list.rb +37 -37
- data/lib/paru/filter/list_attributes.rb +52 -51
- data/lib/paru/filter/math.rb +66 -64
- data/lib/paru/filter/meta.rb +40 -39
- data/lib/paru/filter/meta_blocks.rb +7 -5
- data/lib/paru/filter/meta_bool.rb +7 -5
- data/lib/paru/filter/meta_inlines.rb +9 -7
- data/lib/paru/filter/meta_list.rb +7 -5
- data/lib/paru/filter/meta_map.rb +50 -49
- data/lib/paru/filter/meta_string.rb +7 -6
- data/lib/paru/filter/meta_value.rb +26 -25
- data/lib/paru/filter/metadata.rb +150 -88
- data/lib/paru/filter/node.rb +400 -406
- data/lib/paru/filter/note.rb +29 -29
- data/lib/paru/filter/null.rb +7 -5
- data/lib/paru/filter/ordered_list.rb +50 -49
- data/lib/paru/filter/para.rb +21 -20
- data/lib/paru/filter/plain.rb +23 -21
- data/lib/paru/filter/quoted.rb +28 -26
- data/lib/paru/filter/short_caption.rb +7 -5
- data/lib/paru/filter/small_caps.rb +8 -7
- data/lib/paru/filter/soft_break.rb +7 -5
- data/lib/paru/filter/space.rb +7 -5
- data/lib/paru/filter/span.rb +29 -27
- data/lib/paru/filter/str.rb +33 -32
- data/lib/paru/filter/strikeout.rb +7 -6
- data/lib/paru/filter/strong.rb +7 -6
- data/lib/paru/filter/subscript.rb +7 -6
- data/lib/paru/filter/superscript.rb +7 -6
- data/lib/paru/filter/table.rb +201 -210
- data/lib/paru/filter/table_body.rb +67 -67
- data/lib/paru/filter/table_end.rb +53 -55
- data/lib/paru/filter/table_foot.rb +8 -7
- data/lib/paru/filter/table_head.rb +8 -7
- data/lib/paru/filter/target.rb +29 -27
- data/lib/paru/filter/underline.rb +7 -5
- data/lib/paru/filter/value.rb +74 -75
- data/lib/paru/filter/version.rb +23 -22
- data/lib/paru/filter.rb +355 -331
- data/lib/paru/filter_error.rb +7 -5
- data/lib/paru/info.rb +29 -30
- data/lib/paru/pandoc.rb +241 -248
- data/lib/paru/pandoc2yaml.rb +51 -42
- data/lib/paru/selector.rb +193 -184
- data/lib/paru.rb +3 -1
- metadata +4 -73
data/lib/paru/filter.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
#--
|
2
|
-
# Copyright 2015
|
4
|
+
# Copyright 2015--2025 Huub de Beer <Huub@heerdebeer.org>
|
3
5
|
#
|
4
6
|
# This file is part of Paru
|
5
7
|
#
|
@@ -16,360 +18,382 @@
|
|
16
18
|
# You should have received a copy of the GNU General Public License
|
17
19
|
# along with Paru. If not, see <http://www.gnu.org/licenses/>.
|
18
20
|
#++
|
19
|
-
require_relative
|
20
|
-
require_relative
|
21
|
-
require_relative
|
21
|
+
require_relative 'selector'
|
22
|
+
require_relative 'filter/document'
|
23
|
+
require_relative 'filter/metadata'
|
22
24
|
|
23
25
|
module Paru
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
26
|
+
# Paru filter is a wrapper around pandoc's JSON api, which is based on
|
27
|
+
# {pandoc-types}[https://hackage.haskell.org/package/pandoc-types-1.23/docs/Text-Pandoc-Definition.html].
|
28
|
+
# Pandoc treats block elements and inline elements differently.
|
29
|
+
#
|
30
|
+
# Pandoc's block elements are:
|
31
|
+
PANDOC_BLOCK = %w[
|
32
|
+
Plain
|
33
|
+
Para
|
34
|
+
LineBlock
|
35
|
+
CodeBlock
|
36
|
+
RawBlock
|
37
|
+
BlockQuote
|
38
|
+
OrderedList
|
39
|
+
BulletList
|
40
|
+
DefinitionList
|
41
|
+
Header
|
42
|
+
HorizontalRule
|
43
|
+
Table
|
44
|
+
TableHead
|
45
|
+
TableFoot
|
46
|
+
TableBody
|
47
|
+
Row
|
48
|
+
Cell
|
49
|
+
Figure
|
50
|
+
Caption
|
51
|
+
Div
|
52
|
+
Null
|
53
|
+
].freeze
|
52
54
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
55
|
+
# Pandoc's inline elements are
|
56
|
+
PANDOC_INLINE = %w[
|
57
|
+
Str
|
58
|
+
Emph
|
59
|
+
Underline
|
60
|
+
Strong
|
61
|
+
Strikeout
|
62
|
+
Superscript
|
63
|
+
Subscript
|
64
|
+
SmallCaps
|
65
|
+
Quoted
|
66
|
+
Cite
|
67
|
+
Code
|
68
|
+
Space
|
69
|
+
SoftBreak
|
70
|
+
LineBreak
|
71
|
+
Math
|
72
|
+
RawInline
|
73
|
+
Link
|
74
|
+
Image
|
75
|
+
Note
|
76
|
+
Span
|
77
|
+
].freeze
|
76
78
|
|
77
|
-
|
78
|
-
|
79
|
+
# All of pandoc's type together:
|
80
|
+
PANDOC_TYPES = PANDOC_BLOCK + PANDOC_INLINE
|
79
81
|
|
82
|
+
# Filter is used to write your own pandoc filter in Ruby. A Filter is
|
83
|
+
# almost always created and immediately executed via the +run+ method. The
|
84
|
+
# most simple filter you can write in paru is the so-called "identity":
|
85
|
+
#
|
86
|
+
# {include:file:examples/filters/identity.rb}
|
87
|
+
#
|
88
|
+
# It runs the filter, but it makes no selection nor performs an action.
|
89
|
+
# This is pretty useless, of course—although it makes for a great way to
|
90
|
+
# test the filter functionality—, but it shows the general setup of a
|
91
|
+
# filter well.
|
92
|
+
#
|
93
|
+
# = Writing a simple filter: numbering figures
|
94
|
+
#
|
95
|
+
# Inside a {Filter#run} block, you specify *selectors* with *actions*. For
|
96
|
+
# example, to number all figures in a document and prefix their captions
|
97
|
+
# with "Figure", the following filter would work:
|
98
|
+
#
|
99
|
+
# {include:file:examples/filters/number_figures.rb}
|
100
|
+
#
|
101
|
+
# This filter selects all {PandocFilter::Image} nodes. For each
|
102
|
+
# {PandocFilter::Image} node it increments the figure counter
|
103
|
+
# +figure_counter+ and then sets the figure's caption to "Figure" followed
|
104
|
+
# by the figure count and the original caption. In other words, the
|
105
|
+
# following input document
|
106
|
+
#
|
107
|
+
# 
|
108
|
+
#
|
109
|
+
# 
|
110
|
+
#
|
111
|
+
# will be transformed into
|
112
|
+
#
|
113
|
+
# 
|
114
|
+
#
|
115
|
+
# 
|
116
|
+
#
|
117
|
+
# The method {PandocFilter::InnerMarkdown#inner_markdown} and its counterpart
|
118
|
+
# {PandocFilter::Node#markdown} are a great way to manipulate the
|
119
|
+
# contents of a selected {PandocFilter::Node}. No messing about creating
|
120
|
+
# and filling {PandocFilter::Node}s, you can just use pandoc's own
|
121
|
+
# markdown format!
|
122
|
+
#
|
123
|
+
# = Writing a more involved filters
|
124
|
+
#
|
125
|
+
# == Using the "follows" selector: Numbering figures and chapters
|
126
|
+
#
|
127
|
+
# The previous example can be extended to also number chapters and to
|
128
|
+
# start numbering figures anew per chapter. As you would expect, we need
|
129
|
+
# two counters, one for the figures and one for the chapters:
|
130
|
+
#
|
131
|
+
# {include:file:examples/filters/number_figures_per_chapter.rb}
|
132
|
+
#
|
133
|
+
# What is new in this filter, however, is the selector "Header + Image"
|
134
|
+
# which selects all {PandocFilter::Image} nodes that *follow* a
|
135
|
+
# {PandocFilter::Header} node. Documents in pandoc have a _flat_ structure
|
136
|
+
# where chapters do not exists as separate concepts. Instead, a chapter is
|
137
|
+
# implied by a header of a certain level and everything that follows until
|
138
|
+
# the next header of that level.
|
139
|
+
#
|
140
|
+
# == Using the "child of" selector: Annotate custom blocks
|
141
|
+
#
|
142
|
+
# Hierarchical structures do exist in a pandoc document, however. For
|
143
|
+
# example, the contents of a paragraph ({PandocFilter::Para}), which
|
144
|
+
# itself is a {PandocFilter::Block} level node, are {PandocFilter::Inline}
|
145
|
+
# level nodes. Another example are custom block or {PandocFilter::Div}
|
146
|
+
# nodes. You select a child node by using the +>+ selector as in the
|
147
|
+
# example below:
|
148
|
+
#
|
149
|
+
# {include:file:examples/filters/example.rb}
|
150
|
+
#
|
151
|
+
# Here all {PandocFilter::Header} nodes that are inside a
|
152
|
+
# {PandocFilter::Div} node are selected. Furthermore, if these headers are
|
153
|
+
# of level 3, they are prefixed by the string "Example" followed by a
|
154
|
+
# count.
|
155
|
+
#
|
156
|
+
# In this example, "important" {PandocFilter::Div} nodes are annotated by
|
157
|
+
# putting the string *important* before the contents of the node.
|
158
|
+
#
|
159
|
+
# == Using a distance in a selector: Capitalize the first N characters of
|
160
|
+
# a paragraph
|
161
|
+
#
|
162
|
+
# Given the flat structure of a pandoc document, the "follows" selector
|
163
|
+
# has quite a reach. For example, "Header + Para" selects all paragraphs
|
164
|
+
# that follow a header. In most well-structured documents, this would
|
165
|
+
# select basically all paragraphs.
|
166
|
+
#
|
167
|
+
# But what if you need to be more specific? For example, if you would like
|
168
|
+
# to capitalize the first sentence of each first paragraph of a chapter,
|
169
|
+
# you need a way to specify a sequence number of sorts. To that end, paru
|
170
|
+
# filter selectors take an optional *distance* parameter. A filter for
|
171
|
+
# this example could look like:
|
172
|
+
#
|
173
|
+
# {include:file:examples/filters/capitalize_first_sentence.rb}
|
174
|
+
#
|
175
|
+
# The distance is denoted after a selector by an integer. In this case
|
176
|
+
# "Header +1 Para" selects all {PandocFilter::Para} nodes that directly
|
177
|
+
# follow an {PandocFilter::Header} node. You can use a distance with any
|
178
|
+
# selector.
|
179
|
+
#
|
180
|
+
# == Manipulating nodes: Removing horizontal lines
|
181
|
+
#
|
182
|
+
# Although the {PandocFilter::InnerMarkdown#inner_markdown} and
|
183
|
+
# {PandocFilter::Node#markdown} work in most situations, sometimes
|
184
|
+
# direct manipulation of the pandoc document AST is useful. These
|
185
|
+
# {PandocFilter::ASTManipulation} methods are mixed in
|
186
|
+
# {PandocFilter::Node} and can be used on any node in your filter. For
|
187
|
+
# example, to delete all {PandocFilter::HorizontalRule} nodes, can use a
|
188
|
+
# filter like:
|
189
|
+
#
|
190
|
+
# {include:file:examples/filters/delete_horizontal_rules.rb}
|
191
|
+
#
|
192
|
+
# Note that you could have arrived at the same effect by using:
|
193
|
+
#
|
194
|
+
# rule.markdown = ""
|
195
|
+
#
|
196
|
+
# == Manipulating metadata:
|
197
|
+
#
|
198
|
+
# One of the interesting features of the pandoc markdown format is the
|
199
|
+
# ability to add metadata to a document via a YAML block or command line
|
200
|
+
# options. For example, if you use a template that uses the metadata
|
201
|
+
# property +$date$+ to write a date on a title page, it is quite useful to
|
202
|
+
# automatically add the date of _today_ to the metadata. You can do so
|
203
|
+
# with a filter like:
|
204
|
+
#
|
205
|
+
# {include:file:examples/filters/add_today.rb}
|
206
|
+
#
|
207
|
+
# In a filter, the +metadata+ property is a Ruby Hash of Strings, Numbers,
|
208
|
+
# Booleans, Arrays, and Hashes. You can manipulate it like any other Ruby
|
209
|
+
# Hash.
|
210
|
+
#
|
211
|
+
# @!attribute metadata
|
212
|
+
# @return [Hash] The metadata of the document being filtered as a Ruby
|
213
|
+
# Hash
|
214
|
+
#
|
215
|
+
# @!attribute document
|
216
|
+
# @return [Document] The document being filtered
|
217
|
+
#
|
218
|
+
# @!attribute current_node
|
219
|
+
# @return [Node] The node in the AST of the document being filtered that
|
220
|
+
# is currently being inspected by the filter.
|
221
|
+
#
|
222
|
+
class Filter
|
223
|
+
attr_reader :metadata, :document, :current_node
|
80
224
|
|
81
|
-
#
|
82
|
-
#
|
83
|
-
#
|
84
|
-
#
|
85
|
-
#
|
86
|
-
#
|
87
|
-
#
|
88
|
-
#
|
89
|
-
#
|
90
|
-
#
|
91
|
-
#
|
92
|
-
#
|
93
|
-
#
|
94
|
-
#
|
95
|
-
#
|
96
|
-
#
|
97
|
-
#
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
# by the figure count and the original caption. In other words, the
|
104
|
-
# following input document
|
105
|
-
#
|
106
|
-
# 
|
107
|
-
#
|
108
|
-
# 
|
109
|
-
#
|
110
|
-
# will be transformed into
|
111
|
-
#
|
112
|
-
# 
|
113
|
-
#
|
114
|
-
# 
|
115
|
-
#
|
116
|
-
# The method {PandocFilter::InnerMarkdown#inner_markdown} and its counterpart
|
117
|
-
# {PandocFilter::Node#markdown} are a great way to manipulate the
|
118
|
-
# contents of a selected {PandocFilter::Node}. No messing about creating
|
119
|
-
# and filling {PandocFilter::Node}s, you can just use pandoc's own
|
120
|
-
# markdown format!
|
121
|
-
#
|
122
|
-
# = Writing a more involved filters
|
123
|
-
#
|
124
|
-
# == Using the "follows" selector: Numbering figures and chapters
|
125
|
-
#
|
126
|
-
# The previous example can be extended to also number chapters and to
|
127
|
-
# start numbering figures anew per chapter. As you would expect, we need
|
128
|
-
# two counters, one for the figures and one for the chapters:
|
129
|
-
#
|
130
|
-
# {include:file:examples/filters/number_figures_per_chapter.rb}
|
131
|
-
#
|
132
|
-
# What is new in this filter, however, is the selector "Header + Image"
|
133
|
-
# which selects all {PandocFilter::Image} nodes that *follow* a
|
134
|
-
# {PandocFilter::Header} node. Documents in pandoc have a _flat_ structure
|
135
|
-
# where chapters do not exists as separate concepts. Instead, a chapter is
|
136
|
-
# implied by a header of a certain level and everything that follows until
|
137
|
-
# the next header of that level.
|
138
|
-
#
|
139
|
-
# == Using the "child of" selector: Annotate custom blocks
|
140
|
-
#
|
141
|
-
# Hierarchical structures do exist in a pandoc document, however. For
|
142
|
-
# example, the contents of a paragraph ({PandocFilter::Para}), which
|
143
|
-
# itself is a {PandocFilter::Block} level node, are {PandocFilter::Inline}
|
144
|
-
# level nodes. Another example are custom block or {PandocFilter::Div}
|
145
|
-
# nodes. You select a child node by using the +>+ selector as in the
|
146
|
-
# example below:
|
147
|
-
#
|
148
|
-
# {include:file:examples/filters/example.rb}
|
149
|
-
#
|
150
|
-
# Here all {PandocFilter::Header} nodes that are inside a
|
151
|
-
# {PandocFilter::Div} node are selected. Furthermore, if these headers are
|
152
|
-
# of level 3, they are prefixed by the string "Example" followed by a
|
153
|
-
# count.
|
154
|
-
#
|
155
|
-
# In this example, "important" {PandocFilter::Div} nodes are annotated by
|
156
|
-
# putting the string *important* before the contents of the node.
|
157
|
-
#
|
158
|
-
# == Using a distance in a selector: Capitalize the first N characters of
|
159
|
-
# a paragraph
|
160
|
-
#
|
161
|
-
# Given the flat structure of a pandoc document, the "follows" selector
|
162
|
-
# has quite a reach. For example, "Header + Para" selects all paragraphs
|
163
|
-
# that follow a header. In most well-structured documents, this would
|
164
|
-
# select basically all paragraphs.
|
165
|
-
#
|
166
|
-
# But what if you need to be more specific? For example, if you would like
|
167
|
-
# to capitalize the first sentence of each first paragraph of a chapter,
|
168
|
-
# you need a way to specify a sequence number of sorts. To that end, paru
|
169
|
-
# filter selectors take an optional *distance* parameter. A filter for
|
170
|
-
# this example could look like:
|
171
|
-
#
|
172
|
-
# {include:file:examples/filters/capitalize_first_sentence.rb}
|
173
|
-
#
|
174
|
-
# The distance is denoted after a selector by an integer. In this case
|
175
|
-
# "Header +1 Para" selects all {PandocFilter::Para} nodes that directly
|
176
|
-
# follow an {PandocFilter::Header} node. You can use a distance with any
|
177
|
-
# selector.
|
178
|
-
#
|
179
|
-
# == Manipulating nodes: Removing horizontal lines
|
180
|
-
#
|
181
|
-
# Although the {PandocFilter::InnerMarkdown#inner_markdown} and
|
182
|
-
# {PandocFilter::Node#markdown} work in most situations, sometimes
|
183
|
-
# direct manipulation of the pandoc document AST is useful. These
|
184
|
-
# {PandocFilter::ASTManipulation} methods are mixed in
|
185
|
-
# {PandocFilter::Node} and can be used on any node in your filter. For
|
186
|
-
# example, to delete all {PandocFilter::HorizontalRule} nodes, can use a
|
187
|
-
# filter like:
|
188
|
-
#
|
189
|
-
# {include:file:examples/filters/delete_horizontal_rules.rb}
|
190
|
-
#
|
191
|
-
# Note that you could have arrived at the same effect by using:
|
192
|
-
#
|
193
|
-
# rule.markdown = ""
|
194
|
-
#
|
195
|
-
# == Manipulating metadata:
|
196
|
-
#
|
197
|
-
# One of the interesting features of the pandoc markdown format is the
|
198
|
-
# ability to add metadata to a document via a YAML block or command line
|
199
|
-
# options. For example, if you use a template that uses the metadata
|
200
|
-
# property +$date$+ to write a date on a title page, it is quite useful to
|
201
|
-
# automatically add the date of _today_ to the metadata. You can do so
|
202
|
-
# with a filter like:
|
203
|
-
#
|
204
|
-
# {include:file:examples/filters/add_today.rb}
|
205
|
-
#
|
206
|
-
# In a filter, the +metadata+ property is a Ruby Hash of Strings, Numbers,
|
207
|
-
# Booleans, Arrays, and Hashes. You can manipulate it like any other Ruby
|
208
|
-
# Hash.
|
209
|
-
#
|
210
|
-
# @!attribute metadata
|
211
|
-
# @return [Hash] The metadata of the document being filtered as a Ruby
|
212
|
-
# Hash
|
213
|
-
#
|
214
|
-
# @!attribute document
|
215
|
-
# @return [Document] The document being filtered
|
216
|
-
#
|
217
|
-
# @!attribute current_node
|
218
|
-
# @return [Node] The node in the AST of the document being filtered that
|
219
|
-
# is currently being inspected by the filter.
|
220
|
-
#
|
221
|
-
class Filter
|
222
|
-
|
223
|
-
attr_reader :metadata, :document, :current_node
|
224
|
-
|
225
|
-
# Create a new Filter instance. For convenience, {run} creates a new
|
226
|
-
# {Filter} and runs it immediately. Use this constructor if you want
|
227
|
-
# to run a filter on different input and output streams that STDIN and
|
228
|
-
# STDOUT respectively.
|
229
|
-
#
|
230
|
-
# @param input [IO = $stdin] the input stream to read, defaults to
|
231
|
-
# STDIN
|
232
|
-
# @param output [IO = $stdout] the output stream to write, defaults to
|
233
|
-
# STDOUT
|
234
|
-
def initialize(input = $stdin, output = $stdout)
|
235
|
-
@input = input
|
236
|
-
@output = output
|
237
|
-
end
|
225
|
+
# Create a new Filter instance. For convenience, {run} creates a new
|
226
|
+
# {Filter} and runs it immediately. Use this constructor if you want
|
227
|
+
# to run a filter on different input and output streams that STDIN and
|
228
|
+
# STDOUT respectively.
|
229
|
+
#
|
230
|
+
# @param input [IO = $stdin] the input stream to read, defaults to
|
231
|
+
# STDIN
|
232
|
+
# @param output [IO = $stdout] the output stream to write, defaults to
|
233
|
+
# STDOUT
|
234
|
+
# @param treat_metadata_strings_as_plain_strings [Boolean = false] feature
|
235
|
+
# toggle to treat metadata string values as plain strings instead of
|
236
|
+
# markdown strings if all AST leaf metadata string values have pandoc type
|
237
|
+
# "MetaString". This option is only relevant when you **only** set metadata
|
238
|
+
# string values via command-line option `--metadata` and not also via a
|
239
|
+
# YAML or title block. Using this option improves performance in this
|
240
|
+
# specific situation because metadata values don't have to be converted to
|
241
|
+
# string by pandoc in a separate process but can be collected as is.
|
242
|
+
def initialize(input = $stdin, output = $stdout, treat_metadata_strings_as_plain_strings: false)
|
243
|
+
@input = input
|
244
|
+
@output = output
|
245
|
+
@treat_metadata_strings_as_plain_strings = treat_metadata_strings_as_plain_strings
|
246
|
+
end
|
238
247
|
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
248
|
+
# Run the filter specified by block. This is a convenience method that
|
249
|
+
# creates a new {Filter} using input stream STDIN and output stream
|
250
|
+
# STDOUT and immediately runs {filter} with the block supplied.
|
251
|
+
#
|
252
|
+
# @param treat_metadata_strings_as_plain_strings [Boolean = false] feature
|
253
|
+
# toggle to treat metadata string values as plain strings instead of
|
254
|
+
# markdown strings if all AST leaf metadata string values have pandoc type
|
255
|
+
# "MetaString". This option is only relevant when you **only** set metadata
|
256
|
+
# string values via command-line option `--metadata` and not also via a
|
257
|
+
# YAML or title block. Using this option improves performance in this
|
258
|
+
# specific situation because metadata values don't have to be converted to
|
259
|
+
# string by pandoc in a separate process but can be collected as is.
|
260
|
+
# @param block [Proc] the filter specification
|
261
|
+
#
|
262
|
+
# @example Add 'Figure' to each image's caption
|
263
|
+
# Paru::Filter.run do
|
264
|
+
# with "Image" do |image|
|
265
|
+
# image.inner_markdown = "Figure. #{image.inner_markdown}"
|
266
|
+
# end
|
267
|
+
# end
|
268
|
+
def self.run(treat_metadata_strings_as_plain_strings: false, &block)
|
269
|
+
Filter.new(
|
270
|
+
$stdin,
|
271
|
+
$stdout,
|
272
|
+
treat_metadata_strings_as_plain_strings: treat_metadata_strings_as_plain_strings
|
273
|
+
).filter(&block)
|
274
|
+
end
|
254
275
|
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
276
|
+
# Create a filter using +block+. In the block you specify
|
277
|
+
# selectors and actions to be performed on selected nodes. In the
|
278
|
+
# example below, the selector is "Image", which selects all image
|
279
|
+
# nodes. The action is to prepend the contents of the image's caption
|
280
|
+
# by the string "Figure. ".
|
281
|
+
#
|
282
|
+
# @param block [Proc] the filter specification
|
283
|
+
#
|
284
|
+
# @return [JSON] a JSON string with the filtered pandoc AST
|
285
|
+
#
|
286
|
+
# @example Add 'Figure' to each image's caption
|
287
|
+
# input = IOString.new(File.read("my_report.md")
|
288
|
+
# output = IOString.new
|
289
|
+
#
|
290
|
+
# Paru::Filter.new(input, output).filter do
|
291
|
+
# with "Image" do |image|
|
292
|
+
# image.inner_markdown = "Figure. #{image.inner_markdown}"
|
293
|
+
# end
|
294
|
+
# end
|
295
|
+
#
|
296
|
+
def filter(&block)
|
297
|
+
@selectors = {}
|
298
|
+
@filtered_nodes = []
|
299
|
+
@document = read_document
|
279
300
|
|
280
|
-
|
301
|
+
@metadata = PandocFilter::Metadata.new(
|
302
|
+
@document.meta,
|
303
|
+
treat_metadata_strings_as_plain_strings: @treat_metadata_strings_as_plain_strings
|
304
|
+
)
|
281
305
|
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
306
|
+
nodes_to_filter = Enumerator.new do |node_list|
|
307
|
+
@document.each_depth_first do |node|
|
308
|
+
node_list << node
|
309
|
+
end
|
310
|
+
end
|
287
311
|
|
288
|
-
|
312
|
+
@current_node = @document
|
289
313
|
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
314
|
+
@ran_before = false
|
315
|
+
@ran_after = false
|
316
|
+
instance_eval(&block) # run filter with before block
|
317
|
+
@ran_before = true
|
294
318
|
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
319
|
+
nodes_to_filter.each do |node|
|
320
|
+
if @current_node.has_been_replaced?
|
321
|
+
@current_node = @current_node.get_replacement
|
322
|
+
@filtered_nodes.pop
|
323
|
+
else
|
324
|
+
@current_node = node
|
325
|
+
end
|
302
326
|
|
303
|
-
|
327
|
+
@filtered_nodes.push @current_node
|
304
328
|
|
305
|
-
|
306
|
-
|
329
|
+
instance_eval(&block) # run the actual filter code
|
330
|
+
end
|
307
331
|
|
308
|
-
|
309
|
-
|
332
|
+
@ran_after = true
|
333
|
+
instance_eval(&block) # run filter with after block
|
310
334
|
|
311
|
-
|
312
|
-
|
335
|
+
write_document
|
336
|
+
end
|
313
337
|
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
@selectors[selector] = Selector.new selector unless @selectors.has_key? selector
|
322
|
-
yield @current_node if @selectors[selector].matches? @current_node, @filtered_nodes
|
323
|
-
end
|
324
|
-
end
|
338
|
+
# Specify what nodes to filter with a +selector+. If the +current_node+
|
339
|
+
# matches that selector, it is passed to the block to this +with+ method.
|
340
|
+
#
|
341
|
+
# @param selector [String] a selector string
|
342
|
+
# @yield [Node] the current node if it matches the selector
|
343
|
+
def with(selector)
|
344
|
+
return unless @ran_before && !@ran_after
|
325
345
|
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
# @yield [Document] the document
|
330
|
-
def before()
|
331
|
-
yield @document unless @ran_before
|
332
|
-
end
|
346
|
+
@selectors[selector] = Selector.new selector unless @selectors.key? selector
|
347
|
+
yield @current_node if @selectors[selector].matches? @current_node, @filtered_nodes
|
348
|
+
end
|
333
349
|
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
350
|
+
# Before running the filter on all nodes, the +document+ is passed to
|
351
|
+
# the block to this +before+ method. This method is run exactly once.
|
352
|
+
#
|
353
|
+
# @yield [Document] the document
|
354
|
+
def before
|
355
|
+
yield @document unless @ran_before
|
356
|
+
end
|
341
357
|
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
def stop!()
|
350
|
-
write_document
|
351
|
-
exit true
|
352
|
-
end
|
358
|
+
# After running the filter on all nodes, the +document+ is passed to
|
359
|
+
# the block to this +after+ method. This method is run exactly once.
|
360
|
+
#
|
361
|
+
# @yield [Document] the document
|
362
|
+
def after
|
363
|
+
yield @document if @ran_after
|
364
|
+
end
|
353
365
|
|
354
|
-
|
366
|
+
# Stop processing the document any further and output it as it is now.
|
367
|
+
# This is a great timesaver for filters that only act on a small
|
368
|
+
# number of nodes in a large document, or when you only want to set
|
369
|
+
# the metadata.
|
370
|
+
#
|
371
|
+
# Note, stop will break off the filter immediately after outputting
|
372
|
+
# the document in its current state.
|
373
|
+
def stop!
|
374
|
+
write_document
|
375
|
+
exit
|
376
|
+
end
|
355
377
|
|
356
|
-
|
357
|
-
# on STDIN that is being filtered
|
358
|
-
#
|
359
|
-
# @return [Document] create a new Document node from a pandoc AST from
|
360
|
-
# JSON from STDIN
|
361
|
-
def read_document()
|
362
|
-
PandocFilter::Document.from_JSON @input.read
|
363
|
-
end
|
378
|
+
private
|
364
379
|
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
380
|
+
# The Document node from JSON formatted pandoc document structure
|
381
|
+
# on STDIN that is being filtered
|
382
|
+
#
|
383
|
+
# @return [Document] create a new Document node from a pandoc AST from
|
384
|
+
# JSON from STDIN
|
385
|
+
def read_document
|
386
|
+
PandocFilter::Document.from_JSON @input.read
|
370
387
|
end
|
371
|
-
|
372
|
-
#
|
373
|
-
|
388
|
+
|
389
|
+
# Write the document being filtered to STDOUT
|
390
|
+
def write_document
|
391
|
+
@document.meta = @metadata.to_meta
|
392
|
+
@output.write @document.to_JSON
|
374
393
|
end
|
394
|
+
end
|
395
|
+
|
396
|
+
# FilterError is thrown when there is an error during filtering
|
397
|
+
class FilterError < Error
|
398
|
+
end
|
375
399
|
end
|