paru 1.5.0 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. checksums.yaml +4 -4
  2. data/lib/paru/error.rb +6 -4
  3. data/lib/paru/filter/ast_manipulation.rb +90 -91
  4. data/lib/paru/filter/attr.rb +75 -69
  5. data/lib/paru/filter/block.rb +15 -14
  6. data/lib/paru/filter/block_quote.rb +14 -12
  7. data/lib/paru/filter/bullet_list.rb +17 -16
  8. data/lib/paru/filter/caption.rb +50 -48
  9. data/lib/paru/filter/cell.rb +52 -50
  10. data/lib/paru/filter/citation.rb +53 -51
  11. data/lib/paru/filter/cite.rb +34 -33
  12. data/lib/paru/filter/code.rb +51 -49
  13. data/lib/paru/filter/code_block.rb +76 -76
  14. data/lib/paru/filter/col_spec.rb +58 -56
  15. data/lib/paru/filter/definition_list.rb +51 -52
  16. data/lib/paru/filter/definition_list_item.rb +45 -43
  17. data/lib/paru/filter/div.rb +37 -35
  18. data/lib/paru/filter/document.rb +112 -115
  19. data/lib/paru/filter/emph.rb +7 -5
  20. data/lib/paru/filter/empty_block.rb +17 -16
  21. data/lib/paru/filter/empty_inline.rb +23 -22
  22. data/lib/paru/filter/figure.rb +41 -39
  23. data/lib/paru/filter/header.rb +41 -39
  24. data/lib/paru/filter/horizontal_rule.rb +7 -5
  25. data/lib/paru/filter/image.rb +13 -12
  26. data/lib/paru/filter/inline.rb +27 -26
  27. data/lib/paru/filter/inner_markdown.rb +60 -62
  28. data/lib/paru/filter/int_value.rb +19 -18
  29. data/lib/paru/filter/line_block.rb +13 -11
  30. data/lib/paru/filter/line_break.rb +7 -5
  31. data/lib/paru/filter/link.rb +34 -33
  32. data/lib/paru/filter/list.rb +37 -37
  33. data/lib/paru/filter/list_attributes.rb +52 -51
  34. data/lib/paru/filter/math.rb +66 -64
  35. data/lib/paru/filter/meta.rb +40 -39
  36. data/lib/paru/filter/meta_blocks.rb +7 -5
  37. data/lib/paru/filter/meta_bool.rb +7 -5
  38. data/lib/paru/filter/meta_inlines.rb +9 -7
  39. data/lib/paru/filter/meta_list.rb +7 -5
  40. data/lib/paru/filter/meta_map.rb +50 -49
  41. data/lib/paru/filter/meta_string.rb +7 -6
  42. data/lib/paru/filter/meta_value.rb +26 -25
  43. data/lib/paru/filter/metadata.rb +150 -88
  44. data/lib/paru/filter/node.rb +400 -406
  45. data/lib/paru/filter/note.rb +29 -29
  46. data/lib/paru/filter/null.rb +7 -5
  47. data/lib/paru/filter/ordered_list.rb +50 -49
  48. data/lib/paru/filter/para.rb +21 -20
  49. data/lib/paru/filter/plain.rb +23 -21
  50. data/lib/paru/filter/quoted.rb +28 -26
  51. data/lib/paru/filter/short_caption.rb +7 -5
  52. data/lib/paru/filter/small_caps.rb +8 -7
  53. data/lib/paru/filter/soft_break.rb +7 -5
  54. data/lib/paru/filter/space.rb +7 -5
  55. data/lib/paru/filter/span.rb +29 -27
  56. data/lib/paru/filter/str.rb +33 -32
  57. data/lib/paru/filter/strikeout.rb +7 -6
  58. data/lib/paru/filter/strong.rb +7 -6
  59. data/lib/paru/filter/subscript.rb +7 -6
  60. data/lib/paru/filter/superscript.rb +7 -6
  61. data/lib/paru/filter/table.rb +201 -210
  62. data/lib/paru/filter/table_body.rb +67 -67
  63. data/lib/paru/filter/table_end.rb +53 -55
  64. data/lib/paru/filter/table_foot.rb +8 -7
  65. data/lib/paru/filter/table_head.rb +8 -7
  66. data/lib/paru/filter/target.rb +29 -27
  67. data/lib/paru/filter/underline.rb +7 -5
  68. data/lib/paru/filter/value.rb +74 -75
  69. data/lib/paru/filter/version.rb +23 -22
  70. data/lib/paru/filter.rb +355 -331
  71. data/lib/paru/filter_error.rb +7 -5
  72. data/lib/paru/info.rb +29 -30
  73. data/lib/paru/pandoc.rb +241 -248
  74. data/lib/paru/pandoc2yaml.rb +51 -42
  75. data/lib/paru/selector.rb +193 -184
  76. data/lib/paru.rb +3 -1
  77. metadata +4 -73
data/lib/paru/filter.rb CHANGED
@@ -1,5 +1,7 @@
1
+ # frozen_string_literal: true
2
+
1
3
  #--
2
- # Copyright 2015, 2016, 2017, 2022, 2023 Huub de Beer <Huub@heerdebeer.org>
4
+ # Copyright 2015--2025 Huub de Beer <Huub@heerdebeer.org>
3
5
  #
4
6
  # This file is part of Paru
5
7
  #
@@ -16,360 +18,382 @@
16
18
  # You should have received a copy of the GNU General Public License
17
19
  # along with Paru. If not, see <http://www.gnu.org/licenses/>.
18
20
  #++
19
- require_relative "./selector.rb"
20
- require_relative "./filter/document.rb"
21
- require_relative "./filter/metadata.rb"
21
+ require_relative 'selector'
22
+ require_relative 'filter/document'
23
+ require_relative 'filter/metadata'
22
24
 
23
25
  module Paru
24
- # Paru filter is a wrapper around pandoc's JSON api, which is based on
25
- # {pandoc-types}[https://hackage.haskell.org/package/pandoc-types-1.23/docs/Text-Pandoc-Definition.html].
26
- # Pandoc treats block elements and inline elements differently.
27
- #
28
- # Pandoc's block elements are:
29
- PANDOC_BLOCK = [
30
- "Plain",
31
- "Para",
32
- "LineBlock",
33
- "CodeBlock",
34
- "RawBlock",
35
- "BlockQuote",
36
- "OrderedList",
37
- "BulletList",
38
- "DefinitionList",
39
- "Header",
40
- "HorizontalRule",
41
- "Table",
42
- "TableHead",
43
- "TableFoot",
44
- "TableBody",
45
- "Row",
46
- "Cell",
47
- "Figure",
48
- "Caption",
49
- "Div",
50
- "Null"
51
- ]
26
+ # Paru filter is a wrapper around pandoc's JSON api, which is based on
27
+ # {pandoc-types}[https://hackage.haskell.org/package/pandoc-types-1.23/docs/Text-Pandoc-Definition.html].
28
+ # Pandoc treats block elements and inline elements differently.
29
+ #
30
+ # Pandoc's block elements are:
31
+ PANDOC_BLOCK = %w[
32
+ Plain
33
+ Para
34
+ LineBlock
35
+ CodeBlock
36
+ RawBlock
37
+ BlockQuote
38
+ OrderedList
39
+ BulletList
40
+ DefinitionList
41
+ Header
42
+ HorizontalRule
43
+ Table
44
+ TableHead
45
+ TableFoot
46
+ TableBody
47
+ Row
48
+ Cell
49
+ Figure
50
+ Caption
51
+ Div
52
+ Null
53
+ ].freeze
52
54
 
53
- # Pandoc's inline elements are
54
- PANDOC_INLINE = [
55
- "Str",
56
- "Emph",
57
- "Underline",
58
- "Strong",
59
- "Strikeout",
60
- "Superscript",
61
- "Subscript",
62
- "SmallCaps",
63
- "Quoted",
64
- "Cite",
65
- "Code",
66
- "Space",
67
- "SoftBreak",
68
- "LineBreak",
69
- "Math",
70
- "RawInline",
71
- "Link",
72
- "Image",
73
- "Note",
74
- "Span"
75
- ]
55
+ # Pandoc's inline elements are
56
+ PANDOC_INLINE = %w[
57
+ Str
58
+ Emph
59
+ Underline
60
+ Strong
61
+ Strikeout
62
+ Superscript
63
+ Subscript
64
+ SmallCaps
65
+ Quoted
66
+ Cite
67
+ Code
68
+ Space
69
+ SoftBreak
70
+ LineBreak
71
+ Math
72
+ RawInline
73
+ Link
74
+ Image
75
+ Note
76
+ Span
77
+ ].freeze
76
78
 
77
- # All of pandoc's type together:
78
- PANDOC_TYPES = PANDOC_BLOCK + PANDOC_INLINE
79
+ # All of pandoc's type together:
80
+ PANDOC_TYPES = PANDOC_BLOCK + PANDOC_INLINE
79
81
 
82
+ # Filter is used to write your own pandoc filter in Ruby. A Filter is
83
+ # almost always created and immediately executed via the +run+ method. The
84
+ # most simple filter you can write in paru is the so-called "identity":
85
+ #
86
+ # {include:file:examples/filters/identity.rb}
87
+ #
88
+ # It runs the filter, but it makes no selection nor performs an action.
89
+ # This is pretty useless, of course—although it makes for a great way to
90
+ # test the filter functionality—, but it shows the general setup of a
91
+ # filter well.
92
+ #
93
+ # = Writing a simple filter: numbering figures
94
+ #
95
+ # Inside a {Filter#run} block, you specify *selectors* with *actions*. For
96
+ # example, to number all figures in a document and prefix their captions
97
+ # with "Figure", the following filter would work:
98
+ #
99
+ # {include:file:examples/filters/number_figures.rb}
100
+ #
101
+ # This filter selects all {PandocFilter::Image} nodes. For each
102
+ # {PandocFilter::Image} node it increments the figure counter
103
+ # +figure_counter+ and then sets the figure's caption to "Figure" followed
104
+ # by the figure count and the original caption. In other words, the
105
+ # following input document
106
+ #
107
+ # ![My first image](img/horse.png)
108
+ #
109
+ # ![My second image](img/rabbit.jpeg)
110
+ #
111
+ # will be transformed into
112
+ #
113
+ # ![Figure 1. My first image](img/horse.png)
114
+ #
115
+ # ![Figure 2. My second image](img/rabbit.jpeg)
116
+ #
117
+ # The method {PandocFilter::InnerMarkdown#inner_markdown} and its counterpart
118
+ # {PandocFilter::Node#markdown} are a great way to manipulate the
119
+ # contents of a selected {PandocFilter::Node}. No messing about creating
120
+ # and filling {PandocFilter::Node}s, you can just use pandoc's own
121
+ # markdown format!
122
+ #
123
+ # = Writing a more involved filters
124
+ #
125
+ # == Using the "follows" selector: Numbering figures and chapters
126
+ #
127
+ # The previous example can be extended to also number chapters and to
128
+ # start numbering figures anew per chapter. As you would expect, we need
129
+ # two counters, one for the figures and one for the chapters:
130
+ #
131
+ # {include:file:examples/filters/number_figures_per_chapter.rb}
132
+ #
133
+ # What is new in this filter, however, is the selector "Header + Image"
134
+ # which selects all {PandocFilter::Image} nodes that *follow* a
135
+ # {PandocFilter::Header} node. Documents in pandoc have a _flat_ structure
136
+ # where chapters do not exists as separate concepts. Instead, a chapter is
137
+ # implied by a header of a certain level and everything that follows until
138
+ # the next header of that level.
139
+ #
140
+ # == Using the "child of" selector: Annotate custom blocks
141
+ #
142
+ # Hierarchical structures do exist in a pandoc document, however. For
143
+ # example, the contents of a paragraph ({PandocFilter::Para}), which
144
+ # itself is a {PandocFilter::Block} level node, are {PandocFilter::Inline}
145
+ # level nodes. Another example are custom block or {PandocFilter::Div}
146
+ # nodes. You select a child node by using the +>+ selector as in the
147
+ # example below:
148
+ #
149
+ # {include:file:examples/filters/example.rb}
150
+ #
151
+ # Here all {PandocFilter::Header} nodes that are inside a
152
+ # {PandocFilter::Div} node are selected. Furthermore, if these headers are
153
+ # of level 3, they are prefixed by the string "Example" followed by a
154
+ # count.
155
+ #
156
+ # In this example, "important" {PandocFilter::Div} nodes are annotated by
157
+ # putting the string *important* before the contents of the node.
158
+ #
159
+ # == Using a distance in a selector: Capitalize the first N characters of
160
+ # a paragraph
161
+ #
162
+ # Given the flat structure of a pandoc document, the "follows" selector
163
+ # has quite a reach. For example, "Header + Para" selects all paragraphs
164
+ # that follow a header. In most well-structured documents, this would
165
+ # select basically all paragraphs.
166
+ #
167
+ # But what if you need to be more specific? For example, if you would like
168
+ # to capitalize the first sentence of each first paragraph of a chapter,
169
+ # you need a way to specify a sequence number of sorts. To that end, paru
170
+ # filter selectors take an optional *distance* parameter. A filter for
171
+ # this example could look like:
172
+ #
173
+ # {include:file:examples/filters/capitalize_first_sentence.rb}
174
+ #
175
+ # The distance is denoted after a selector by an integer. In this case
176
+ # "Header +1 Para" selects all {PandocFilter::Para} nodes that directly
177
+ # follow an {PandocFilter::Header} node. You can use a distance with any
178
+ # selector.
179
+ #
180
+ # == Manipulating nodes: Removing horizontal lines
181
+ #
182
+ # Although the {PandocFilter::InnerMarkdown#inner_markdown} and
183
+ # {PandocFilter::Node#markdown} work in most situations, sometimes
184
+ # direct manipulation of the pandoc document AST is useful. These
185
+ # {PandocFilter::ASTManipulation} methods are mixed in
186
+ # {PandocFilter::Node} and can be used on any node in your filter. For
187
+ # example, to delete all {PandocFilter::HorizontalRule} nodes, can use a
188
+ # filter like:
189
+ #
190
+ # {include:file:examples/filters/delete_horizontal_rules.rb}
191
+ #
192
+ # Note that you could have arrived at the same effect by using:
193
+ #
194
+ # rule.markdown = ""
195
+ #
196
+ # == Manipulating metadata:
197
+ #
198
+ # One of the interesting features of the pandoc markdown format is the
199
+ # ability to add metadata to a document via a YAML block or command line
200
+ # options. For example, if you use a template that uses the metadata
201
+ # property +$date$+ to write a date on a title page, it is quite useful to
202
+ # automatically add the date of _today_ to the metadata. You can do so
203
+ # with a filter like:
204
+ #
205
+ # {include:file:examples/filters/add_today.rb}
206
+ #
207
+ # In a filter, the +metadata+ property is a Ruby Hash of Strings, Numbers,
208
+ # Booleans, Arrays, and Hashes. You can manipulate it like any other Ruby
209
+ # Hash.
210
+ #
211
+ # @!attribute metadata
212
+ # @return [Hash] The metadata of the document being filtered as a Ruby
213
+ # Hash
214
+ #
215
+ # @!attribute document
216
+ # @return [Document] The document being filtered
217
+ #
218
+ # @!attribute current_node
219
+ # @return [Node] The node in the AST of the document being filtered that
220
+ # is currently being inspected by the filter.
221
+ #
222
+ class Filter
223
+ attr_reader :metadata, :document, :current_node
80
224
 
81
- # Filter is used to write your own pandoc filter in Ruby. A Filter is
82
- # almost always created and immediately executed via the +run+ method. The
83
- # most simple filter you can write in paru is the so-called "identity":
84
- #
85
- # {include:file:examples/filters/identity.rb}
86
- #
87
- # It runs the filter, but it makes no selection nor performs an action.
88
- # This is pretty useless, of course—although it makes for a great way to
89
- # test the filter functionality—, but it shows the general setup of a
90
- # filter well.
91
- #
92
- # = Writing a simple filter: numbering figures
93
- #
94
- # Inside a {Filter#run} block, you specify *selectors* with *actions*. For
95
- # example, to number all figures in a document and prefix their captions
96
- # with "Figure", the following filter would work:
97
- #
98
- # {include:file:examples/filters/number_figures.rb}
99
- #
100
- # This filter selects all {PandocFilter::Image} nodes. For each
101
- # {PandocFilter::Image} node it increments the figure counter
102
- # +figure_counter+ and then sets the figure's caption to "Figure" followed
103
- # by the figure count and the original caption. In other words, the
104
- # following input document
105
- #
106
- # ![My first image](img/horse.png)
107
- #
108
- # ![My second image](img/rabbit.jpeg)
109
- #
110
- # will be transformed into
111
- #
112
- # ![Figure 1. My first image](img/horse.png)
113
- #
114
- # ![Figure 2. My second image](img/rabbit.jpeg)
115
- #
116
- # The method {PandocFilter::InnerMarkdown#inner_markdown} and its counterpart
117
- # {PandocFilter::Node#markdown} are a great way to manipulate the
118
- # contents of a selected {PandocFilter::Node}. No messing about creating
119
- # and filling {PandocFilter::Node}s, you can just use pandoc's own
120
- # markdown format!
121
- #
122
- # = Writing a more involved filters
123
- #
124
- # == Using the "follows" selector: Numbering figures and chapters
125
- #
126
- # The previous example can be extended to also number chapters and to
127
- # start numbering figures anew per chapter. As you would expect, we need
128
- # two counters, one for the figures and one for the chapters:
129
- #
130
- # {include:file:examples/filters/number_figures_per_chapter.rb}
131
- #
132
- # What is new in this filter, however, is the selector "Header + Image"
133
- # which selects all {PandocFilter::Image} nodes that *follow* a
134
- # {PandocFilter::Header} node. Documents in pandoc have a _flat_ structure
135
- # where chapters do not exists as separate concepts. Instead, a chapter is
136
- # implied by a header of a certain level and everything that follows until
137
- # the next header of that level.
138
- #
139
- # == Using the "child of" selector: Annotate custom blocks
140
- #
141
- # Hierarchical structures do exist in a pandoc document, however. For
142
- # example, the contents of a paragraph ({PandocFilter::Para}), which
143
- # itself is a {PandocFilter::Block} level node, are {PandocFilter::Inline}
144
- # level nodes. Another example are custom block or {PandocFilter::Div}
145
- # nodes. You select a child node by using the +>+ selector as in the
146
- # example below:
147
- #
148
- # {include:file:examples/filters/example.rb}
149
- #
150
- # Here all {PandocFilter::Header} nodes that are inside a
151
- # {PandocFilter::Div} node are selected. Furthermore, if these headers are
152
- # of level 3, they are prefixed by the string "Example" followed by a
153
- # count.
154
- #
155
- # In this example, "important" {PandocFilter::Div} nodes are annotated by
156
- # putting the string *important* before the contents of the node.
157
- #
158
- # == Using a distance in a selector: Capitalize the first N characters of
159
- # a paragraph
160
- #
161
- # Given the flat structure of a pandoc document, the "follows" selector
162
- # has quite a reach. For example, "Header + Para" selects all paragraphs
163
- # that follow a header. In most well-structured documents, this would
164
- # select basically all paragraphs.
165
- #
166
- # But what if you need to be more specific? For example, if you would like
167
- # to capitalize the first sentence of each first paragraph of a chapter,
168
- # you need a way to specify a sequence number of sorts. To that end, paru
169
- # filter selectors take an optional *distance* parameter. A filter for
170
- # this example could look like:
171
- #
172
- # {include:file:examples/filters/capitalize_first_sentence.rb}
173
- #
174
- # The distance is denoted after a selector by an integer. In this case
175
- # "Header +1 Para" selects all {PandocFilter::Para} nodes that directly
176
- # follow an {PandocFilter::Header} node. You can use a distance with any
177
- # selector.
178
- #
179
- # == Manipulating nodes: Removing horizontal lines
180
- #
181
- # Although the {PandocFilter::InnerMarkdown#inner_markdown} and
182
- # {PandocFilter::Node#markdown} work in most situations, sometimes
183
- # direct manipulation of the pandoc document AST is useful. These
184
- # {PandocFilter::ASTManipulation} methods are mixed in
185
- # {PandocFilter::Node} and can be used on any node in your filter. For
186
- # example, to delete all {PandocFilter::HorizontalRule} nodes, can use a
187
- # filter like:
188
- #
189
- # {include:file:examples/filters/delete_horizontal_rules.rb}
190
- #
191
- # Note that you could have arrived at the same effect by using:
192
- #
193
- # rule.markdown = ""
194
- #
195
- # == Manipulating metadata:
196
- #
197
- # One of the interesting features of the pandoc markdown format is the
198
- # ability to add metadata to a document via a YAML block or command line
199
- # options. For example, if you use a template that uses the metadata
200
- # property +$date$+ to write a date on a title page, it is quite useful to
201
- # automatically add the date of _today_ to the metadata. You can do so
202
- # with a filter like:
203
- #
204
- # {include:file:examples/filters/add_today.rb}
205
- #
206
- # In a filter, the +metadata+ property is a Ruby Hash of Strings, Numbers,
207
- # Booleans, Arrays, and Hashes. You can manipulate it like any other Ruby
208
- # Hash.
209
- #
210
- # @!attribute metadata
211
- # @return [Hash] The metadata of the document being filtered as a Ruby
212
- # Hash
213
- #
214
- # @!attribute document
215
- # @return [Document] The document being filtered
216
- #
217
- # @!attribute current_node
218
- # @return [Node] The node in the AST of the document being filtered that
219
- # is currently being inspected by the filter.
220
- #
221
- class Filter
222
-
223
- attr_reader :metadata, :document, :current_node
224
-
225
- # Create a new Filter instance. For convenience, {run} creates a new
226
- # {Filter} and runs it immediately. Use this constructor if you want
227
- # to run a filter on different input and output streams that STDIN and
228
- # STDOUT respectively.
229
- #
230
- # @param input [IO = $stdin] the input stream to read, defaults to
231
- # STDIN
232
- # @param output [IO = $stdout] the output stream to write, defaults to
233
- # STDOUT
234
- def initialize(input = $stdin, output = $stdout)
235
- @input = input
236
- @output = output
237
- end
225
+ # Create a new Filter instance. For convenience, {run} creates a new
226
+ # {Filter} and runs it immediately. Use this constructor if you want
227
+ # to run a filter on different input and output streams that STDIN and
228
+ # STDOUT respectively.
229
+ #
230
+ # @param input [IO = $stdin] the input stream to read, defaults to
231
+ # STDIN
232
+ # @param output [IO = $stdout] the output stream to write, defaults to
233
+ # STDOUT
234
+ # @param treat_metadata_strings_as_plain_strings [Boolean = false] feature
235
+ # toggle to treat metadata string values as plain strings instead of
236
+ # markdown strings if all AST leaf metadata string values have pandoc type
237
+ # "MetaString". This option is only relevant when you **only** set metadata
238
+ # string values via command-line option `--metadata` and not also via a
239
+ # YAML or title block. Using this option improves performance in this
240
+ # specific situation because metadata values don't have to be converted to
241
+ # string by pandoc in a separate process but can be collected as is.
242
+ def initialize(input = $stdin, output = $stdout, treat_metadata_strings_as_plain_strings: false)
243
+ @input = input
244
+ @output = output
245
+ @treat_metadata_strings_as_plain_strings = treat_metadata_strings_as_plain_strings
246
+ end
238
247
 
239
- # Run the filter specified by block. This is a convenience method that
240
- # creates a new {Filter} using input stream STDIN and output stream
241
- # STDOUT and immediately runs {filter} with the block supplied.
242
- #
243
- # @param block [Proc] the filter specification
244
- #
245
- # @example Add 'Figure' to each image's caption
246
- # Paru::Filter.run do
247
- # with "Image" do |image|
248
- # image.inner_markdown = "Figure. #{image.inner_markdown}"
249
- # end
250
- # end
251
- def self.run(&block)
252
- Filter.new($stdin, $stdout).filter(&block)
253
- end
248
+ # Run the filter specified by block. This is a convenience method that
249
+ # creates a new {Filter} using input stream STDIN and output stream
250
+ # STDOUT and immediately runs {filter} with the block supplied.
251
+ #
252
+ # @param treat_metadata_strings_as_plain_strings [Boolean = false] feature
253
+ # toggle to treat metadata string values as plain strings instead of
254
+ # markdown strings if all AST leaf metadata string values have pandoc type
255
+ # "MetaString". This option is only relevant when you **only** set metadata
256
+ # string values via command-line option `--metadata` and not also via a
257
+ # YAML or title block. Using this option improves performance in this
258
+ # specific situation because metadata values don't have to be converted to
259
+ # string by pandoc in a separate process but can be collected as is.
260
+ # @param block [Proc] the filter specification
261
+ #
262
+ # @example Add 'Figure' to each image's caption
263
+ # Paru::Filter.run do
264
+ # with "Image" do |image|
265
+ # image.inner_markdown = "Figure. #{image.inner_markdown}"
266
+ # end
267
+ # end
268
+ def self.run(treat_metadata_strings_as_plain_strings: false, &block)
269
+ Filter.new(
270
+ $stdin,
271
+ $stdout,
272
+ treat_metadata_strings_as_plain_strings: treat_metadata_strings_as_plain_strings
273
+ ).filter(&block)
274
+ end
254
275
 
255
- # Create a filter using +block+. In the block you specify
256
- # selectors and actions to be performed on selected nodes. In the
257
- # example below, the selector is "Image", which selects all image
258
- # nodes. The action is to prepend the contents of the image's caption
259
- # by the string "Figure. ".
260
- #
261
- # @param block [Proc] the filter specification
262
- #
263
- # @return [JSON] a JSON string with the filtered pandoc AST
264
- #
265
- # @example Add 'Figure' to each image's caption
266
- # input = IOString.new(File.read("my_report.md")
267
- # output = IOString.new
268
- #
269
- # Paru::Filter.new(input, output).filter do
270
- # with "Image" do |image|
271
- # image.inner_markdown = "Figure. #{image.inner_markdown}"
272
- # end
273
- # end
274
- #
275
- def filter(&block)
276
- @selectors = Hash.new
277
- @filtered_nodes = []
278
- @document = read_document
276
+ # Create a filter using +block+. In the block you specify
277
+ # selectors and actions to be performed on selected nodes. In the
278
+ # example below, the selector is "Image", which selects all image
279
+ # nodes. The action is to prepend the contents of the image's caption
280
+ # by the string "Figure. ".
281
+ #
282
+ # @param block [Proc] the filter specification
283
+ #
284
+ # @return [JSON] a JSON string with the filtered pandoc AST
285
+ #
286
+ # @example Add 'Figure' to each image's caption
287
+ # input = IOString.new(File.read("my_report.md")
288
+ # output = IOString.new
289
+ #
290
+ # Paru::Filter.new(input, output).filter do
291
+ # with "Image" do |image|
292
+ # image.inner_markdown = "Figure. #{image.inner_markdown}"
293
+ # end
294
+ # end
295
+ #
296
+ def filter(&block)
297
+ @selectors = {}
298
+ @filtered_nodes = []
299
+ @document = read_document
279
300
 
280
- @metadata = PandocFilter::Metadata.new @document.meta
301
+ @metadata = PandocFilter::Metadata.new(
302
+ @document.meta,
303
+ treat_metadata_strings_as_plain_strings: @treat_metadata_strings_as_plain_strings
304
+ )
281
305
 
282
- nodes_to_filter = Enumerator.new do |node_list|
283
- @document.each_depth_first do |node|
284
- node_list << node
285
- end
286
- end
306
+ nodes_to_filter = Enumerator.new do |node_list|
307
+ @document.each_depth_first do |node|
308
+ node_list << node
309
+ end
310
+ end
287
311
 
288
- @current_node = @document
312
+ @current_node = @document
289
313
 
290
- @ran_before = false
291
- @ran_after = false
292
- instance_eval(&block) # run filter with before block
293
- @ran_before = true
314
+ @ran_before = false
315
+ @ran_after = false
316
+ instance_eval(&block) # run filter with before block
317
+ @ran_before = true
294
318
 
295
- nodes_to_filter.each do |node|
296
- if @current_node.has_been_replaced?
297
- @current_node = @current_node.get_replacement
298
- @filtered_nodes.pop
299
- else
300
- @current_node = node
301
- end
319
+ nodes_to_filter.each do |node|
320
+ if @current_node.has_been_replaced?
321
+ @current_node = @current_node.get_replacement
322
+ @filtered_nodes.pop
323
+ else
324
+ @current_node = node
325
+ end
302
326
 
303
- @filtered_nodes.push @current_node
327
+ @filtered_nodes.push @current_node
304
328
 
305
- instance_eval(&block) # run the actual filter code
306
- end
329
+ instance_eval(&block) # run the actual filter code
330
+ end
307
331
 
308
- @ran_after = true
309
- instance_eval(&block) # run filter with after block
332
+ @ran_after = true
333
+ instance_eval(&block) # run filter with after block
310
334
 
311
- write_document
312
- end
335
+ write_document
336
+ end
313
337
 
314
- # Specify what nodes to filter with a +selector+. If the +current_node+
315
- # matches that selector, it is passed to the block to this +with+ method.
316
- #
317
- # @param selector [String] a selector string
318
- # @yield [Node] the current node if it matches the selector
319
- def with(selector)
320
- if @ran_before and !@ran_after
321
- @selectors[selector] = Selector.new selector unless @selectors.has_key? selector
322
- yield @current_node if @selectors[selector].matches? @current_node, @filtered_nodes
323
- end
324
- end
338
+ # Specify what nodes to filter with a +selector+. If the +current_node+
339
+ # matches that selector, it is passed to the block to this +with+ method.
340
+ #
341
+ # @param selector [String] a selector string
342
+ # @yield [Node] the current node if it matches the selector
343
+ def with(selector)
344
+ return unless @ran_before && !@ran_after
325
345
 
326
- # Before running the filter on all nodes, the +document+ is passed to
327
- # the block to this +before+ method. This method is run exactly once.
328
- #
329
- # @yield [Document] the document
330
- def before()
331
- yield @document unless @ran_before
332
- end
346
+ @selectors[selector] = Selector.new selector unless @selectors.key? selector
347
+ yield @current_node if @selectors[selector].matches? @current_node, @filtered_nodes
348
+ end
333
349
 
334
- # After running the filter on all nodes, the +document+ is passed to
335
- # the block to this +after+ method. This method is run exactly once.
336
- #
337
- # @yield [Document] the document
338
- def after()
339
- yield @document if @ran_after
340
- end
350
+ # Before running the filter on all nodes, the +document+ is passed to
351
+ # the block to this +before+ method. This method is run exactly once.
352
+ #
353
+ # @yield [Document] the document
354
+ def before
355
+ yield @document unless @ran_before
356
+ end
341
357
 
342
- # Stop processing the document any further and output it as it is now.
343
- # This is a great timesaver for filters that only act on a small
344
- # number of nodes in a large document, or when you only want to set
345
- # the metadata.
346
- #
347
- # Note, stop will break off the filter immediately after outputting
348
- # the document in its current state.
349
- def stop!()
350
- write_document
351
- exit true
352
- end
358
+ # After running the filter on all nodes, the +document+ is passed to
359
+ # the block to this +after+ method. This method is run exactly once.
360
+ #
361
+ # @yield [Document] the document
362
+ def after
363
+ yield @document if @ran_after
364
+ end
353
365
 
354
- private
366
+ # Stop processing the document any further and output it as it is now.
367
+ # This is a great timesaver for filters that only act on a small
368
+ # number of nodes in a large document, or when you only want to set
369
+ # the metadata.
370
+ #
371
+ # Note, stop will break off the filter immediately after outputting
372
+ # the document in its current state.
373
+ def stop!
374
+ write_document
375
+ exit
376
+ end
355
377
 
356
- # The Document node from JSON formatted pandoc document structure
357
- # on STDIN that is being filtered
358
- #
359
- # @return [Document] create a new Document node from a pandoc AST from
360
- # JSON from STDIN
361
- def read_document()
362
- PandocFilter::Document.from_JSON @input.read
363
- end
378
+ private
364
379
 
365
- # Write the document being filtered to STDOUT
366
- def write_document()
367
- @document.meta = @metadata.to_meta
368
- @output.write @document.to_JSON
369
- end
380
+ # The Document node from JSON formatted pandoc document structure
381
+ # on STDIN that is being filtered
382
+ #
383
+ # @return [Document] create a new Document node from a pandoc AST from
384
+ # JSON from STDIN
385
+ def read_document
386
+ PandocFilter::Document.from_JSON @input.read
370
387
  end
371
-
372
- # FilterError is thrown when there is an error during filtering
373
- class FilterError < Error
388
+
389
+ # Write the document being filtered to STDOUT
390
+ def write_document
391
+ @document.meta = @metadata.to_meta
392
+ @output.write @document.to_JSON
374
393
  end
394
+ end
395
+
396
+ # FilterError is thrown when there is an error during filtering
397
+ class FilterError < Error
398
+ end
375
399
  end