pandoc_refeq_mathml 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +51 -0
- data/ChangeLog +5 -0
- data/LICENSE.txt +20 -0
- data/Makefile +26 -0
- data/README.en.rdoc +227 -0
- data/Rakefile +9 -0
- data/bin/pandoc_refeq_mathml +195 -0
- data/lib/pandoc_refeq_mathml.rb +189 -0
- data/pandoc_refeq_mathml.gemspec +59 -0
- data/test/test_pandoc_refeq_mathml.rb +188 -0
- metadata +86 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: c025068f2083e35c2d82be9dadf5260b70f2823ce4ab9607238395388d4b84dc
|
4
|
+
data.tar.gz: bb47e488a24a8a23b1f93be4c001225492c26465ec77fc425aab9d34876f1113
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 53f285f6113a7173c112f06bb06bbe27d7aa8ab6a2288ca98059fd33550c5dc7ff932a62de8ad7dbe7f11cddad818fbec795959a9c569717ae6df515e41e8ea2
|
7
|
+
data.tar.gz: eec508b49714a0a76fde53883459d41940a3814c763902aee73887603a4883a6d1d378461504af9fb01ccd31ab62bbe270770e3663abee4d8ae34cfcdddbb2f5
|
data/.gitignore
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
# See https://help.github.com/articles/ignoring-files for more about ignoring files.
|
2
|
+
#
|
3
|
+
# If you find yourself ignoring temporary files generated by your text editor
|
4
|
+
# or operating system, you probably want to add a global ignore instead:
|
5
|
+
# git config --global core.excludesfile '~/.gitignore_global'
|
6
|
+
|
7
|
+
# Ignore bundler config.
|
8
|
+
/.bundle
|
9
|
+
/vendor/bundle
|
10
|
+
|
11
|
+
# Ignore all logfiles and tempfiles.
|
12
|
+
/log/*
|
13
|
+
/tmp/*
|
14
|
+
!/log/.keep
|
15
|
+
!/tmp/.keep
|
16
|
+
|
17
|
+
.rbenv-version
|
18
|
+
# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
|
19
|
+
.rvmrc
|
20
|
+
|
21
|
+
/node_modules
|
22
|
+
/yarn-error.log
|
23
|
+
|
24
|
+
.byebug_history
|
25
|
+
|
26
|
+
*.[oa]
|
27
|
+
*.so
|
28
|
+
*~
|
29
|
+
*.nogem
|
30
|
+
*nogem.*
|
31
|
+
*.bak
|
32
|
+
*.BAK
|
33
|
+
*.backup
|
34
|
+
*.org
|
35
|
+
*.orig
|
36
|
+
*.elc
|
37
|
+
*.pyc
|
38
|
+
\#*\#
|
39
|
+
|
40
|
+
# Elastic Beanstalk Files
|
41
|
+
.elasticbeanstalk/*
|
42
|
+
!.elasticbeanstalk/*.cfg.yml
|
43
|
+
!.elasticbeanstalk/*.global.yml
|
44
|
+
|
45
|
+
# yard
|
46
|
+
*.yardoc
|
47
|
+
|
48
|
+
# Ruby Gem doc
|
49
|
+
*.gem
|
50
|
+
doc/*
|
51
|
+
|
data/ChangeLog
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2022 Masa Sakano
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Makefile
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
ALL =
|
2
|
+
|
3
|
+
objs =
|
4
|
+
|
5
|
+
.SUFFIXES: .so .o .c .f
|
6
|
+
|
7
|
+
#.o.so:
|
8
|
+
# ${LD} ${LFLAGS} -o $@ $< ${LINK_LIB}
|
9
|
+
|
10
|
+
all: ${ALL}
|
11
|
+
|
12
|
+
|
13
|
+
.PHONY: clean test doc
|
14
|
+
clean:
|
15
|
+
$(RM) bin/*~
|
16
|
+
|
17
|
+
## You may need RUBYLIB=`pwd`/lib:$RUBYLIB
|
18
|
+
test:
|
19
|
+
rake test
|
20
|
+
|
21
|
+
## yard2md_afterclean in Gem plain_text https://rubygems.org/gems/plain_text
|
22
|
+
doc:
|
23
|
+
yard doc; [[ -x ".github" && ( "README.en.rdoc" -nt ".github/README.md" ) ]] && ( ruby -r rdoc -e 'puts RDoc::Markup::ToMarkdown.new.convert ARGF.read' < README.en.rdoc | /Users/alpin/Rubygems/Project/plain_text/bin/yard2md_afterclean --lang= > .github/README.md.$$ && ( mv -f .github/README.md.$$ .github/README.md && echo ".github/README.md is updated." ) || ( echo "ERROR: failed to create .github/README.md" >&2 ) ) || exit 0
|
24
|
+
# yard doc; [[ -x ".github" && ( "README.en.rdoc" -nt ".github/README.md" ) ]] && ( ruby -r rdoc -e 'puts RDoc::Markup::ToMarkdown.new.convert ARGF.read' < README.en.rdoc | yard2md_afterclean --lang=bash > .github/README.md.$$ && ( mv -f .github/README.md.$$ .github/README.md && echo ".github/README.md is updated." ) || ( echo "ERROR: failed to create .github/README.md" >&2 ) ) || exit 0
|
25
|
+
|
26
|
+
|
data/README.en.rdoc
ADDED
@@ -0,0 +1,227 @@
|
|
1
|
+
|
2
|
+
= PandocRefeqMathml - ad hoc tool to modify pandoc-converted MathML from LaTeX
|
3
|
+
|
4
|
+
== Summary
|
5
|
+
|
6
|
+
This Ruby command-line command modifies a MathML file converted with <tt>pandoc</tt> from LaTeX.
|
7
|
+
|
8
|
+
Whereas <tt>pandoc</tt> is a great text-ish file converter, there are a
|
9
|
+
few caveats, at the time of writing, in converting a LaTeX file to MathML.
|
10
|
+
|
11
|
+
A major caveat is the generated MathML does not display
|
12
|
+
the equation numbers that are auto-generated by LaTeX in default for
|
13
|
+
the <tt>equation</tt> and <tt>eqnarray</tt> environments, nor their (LaTeX) labels.
|
14
|
+
All the (LaTeX) <tt>ref</tt> remain as they are, which is a coded message
|
15
|
+
for readers.
|
16
|
+
|
17
|
+
Another caveat is the alignments of equations in the <tt>eqnarray</tt> environment.
|
18
|
+
|
19
|
+
This tool is a bit of ad hoc (dirty) hack to correct these points
|
20
|
+
*in some basic situations*. "Basic" here means just the standard
|
21
|
+
LaTeX commands, not some package-specific commands. Also, this may not
|
22
|
+
correctly handle complicated formats of equations, using arrays etc.
|
23
|
+
|
24
|
+
The full package of this module is found in
|
25
|
+
{PandocRefeqMathml Ruby Gems page}[http://rubygems.org/gems/pandoc_refeq_mathml]
|
26
|
+
(with document created from source annotation with yard) and
|
27
|
+
in {Github}[https://github.com/masasakano/pandoc_refeq_mathml]
|
28
|
+
|
29
|
+
== Background and constraints
|
30
|
+
|
31
|
+
Pandoc-converted MathML.html from LaTeX lacks equation numbers that
|
32
|
+
are present in the original LaTeX. This tool offers a very crude fix,
|
33
|
+
adding equation numbers based on the annotation fields in +<math>+
|
34
|
+
and LaTeX aux file (which is automatically generated as a byproduct
|
35
|
+
when you compile a LaTeX document). Not all the numbers are recovered
|
36
|
+
but only those that are referenced somewhere in the MathML/LaTeX
|
37
|
+
file.
|
38
|
+
|
39
|
+
(Note that in principle, it should not be too difficult to modify the program so
|
40
|
+
that all the labelled equations in LaTeX are labelled again in
|
41
|
+
MathML. However, it would be tricky to label equations that are not
|
42
|
+
explicitly labelled in LaTeX because implicit numbering information is not
|
43
|
+
available in the LaTeX aux file.)
|
44
|
+
|
45
|
+
The algorithm assumes a LaTeX standard aux file-format, the MathML having
|
46
|
+
a link tag +<a>+ with the attributes "data-reference-type=ref" and href to the label
|
47
|
+
of the exact reference label in LaTeX (and the label should have no duplicates
|
48
|
+
in the MathML) and also having the +'annotation[ encoding="application/x-tex"]'+ tag
|
49
|
+
in each math tag containing the original LaTeX code. The LaTeX code must have
|
50
|
+
either the standard "equation" or "eqnarray" structures associated with
|
51
|
+
the standard "label" tag with a simple content (if it contains, apart from the label string,
|
52
|
+
something more than preceding or trailing white spaces, such as a comment,
|
53
|
+
this algorithm would likely fail). If equations in an eqnarray environment have
|
54
|
+
complicated nested structures like a matrix, I do not know how the algorithm
|
55
|
+
of this routine handles them. Also, the LaTeX section numbering must
|
56
|
+
be combinations of Arabic numbers, full-stops, and maybe capital
|
57
|
+
letters (for Appendix) only.
|
58
|
+
|
59
|
+
Essentially, LaTeX has a huge amount of freedom and so I am afraid it would be a somewhat
|
60
|
+
futile effort to deal with every possibility...
|
61
|
+
|
62
|
+
=== Output MathML by pandoc-2.19 converted from LaTeX
|
63
|
+
|
64
|
+
Ordinary LaTeX inline maths expressions (e.g., +$5^2$+) are expressed
|
65
|
+
as follows:
|
66
|
+
|
67
|
+
<math display="inline" xmlns="http://w..."><semantics>
|
68
|
+
<mrow><mn>5</mn><mi>π</mi></mrow>
|
69
|
+
<annotation encoding="application/x-tex">5\pi</annotation>
|
70
|
+
</semantics></math>
|
71
|
+
|
72
|
+
LaTeX's +begin{equation}+ is as follows (n.b., the +<p>+ tag may not
|
73
|
+
be closed immediately after +</math>+ but another ordinary sentences
|
74
|
+
may follow):
|
75
|
+
|
76
|
+
<p><math display="block" xmlns="http://w..."><semantics>
|
77
|
+
<mrow><mi>x</mi><mo>±</mo><mi>ϵ</mi></mrow></mrow>
|
78
|
+
<annotation encoding="application/x-tex">x \pm \epsilon \label{my_xe}</annotation>
|
79
|
+
</semantics></math>
|
80
|
+
|
81
|
+
LaTeX's +begin{eqnarray}+ is as follows:
|
82
|
+
|
83
|
+
<p><math display="block" xmlns="http://w..."><semantics><mtable>
|
84
|
+
<mtr><mtd columnalign="right"><mrow><mn>1</mn><mo>+</mo><mi>x</mi></mrow></mtd>
|
85
|
+
<mtd columnalign="left"><mo>=</mo></mtd>
|
86
|
+
<mtd columnalign="right"><mrow><mn>1</mn><mo>−</mo><mi>x</mi></mrow></mtd></mtr>
|
87
|
+
<mtr><mtd columnalign="right"></mtd>
|
88
|
+
<mtd columnalign="left"><mo>=</mo></mtd>
|
89
|
+
<mtd columnalign="right"><mfrac><mn>2</mn><mrow><mn>1</mn><mi>x</mi></mrow></mfrac></mtd></mtr>
|
90
|
+
</mtable><annotation encoding="application/x-tex">\begin{aligned}
|
91
|
+
1+x & = & 1-x \nonumber\\
|
92
|
+
& = & \frac{2}{1x} \label{eq_trivial}
|
93
|
+
\end{aligned}</annotation></semantics></math></p>
|
94
|
+
|
95
|
+
They are referred to as from another text follows:
|
96
|
+
|
97
|
+
<p>Eq.<a href="#eq_trivial" data-reference-type="ref"
|
98
|
+
data-reference="eq_trivial">[eq_trivial]</a> was easy...
|
99
|
+
|
100
|
+
=== Algorithm
|
101
|
+
|
102
|
+
For fixing the alignments to follow the standard eqnarray alignments
|
103
|
+
(right, centre, and left in this order), the program searches for
|
104
|
+
+<mtable>+ and rewrites the <tt>columnalign</tt> attributes in the +<mtd>+ tags.
|
105
|
+
|
106
|
+
For fixing the equation numbers and links, the program
|
107
|
+
|
108
|
+
1. first reads a LaTeX aux file and lists all the labels for equations and their numbers.
|
109
|
+
2. Then, it picks up an internally-pointing HTML anchor,
|
110
|
+
3. matches it with the list generated from the LaTeX aux file and identifies the equation number,
|
111
|
+
4. searches labels in +<annotation>+ tags for the identical string for the HTML/MathML-anchor,
|
112
|
+
5. identifies the exact equation corresponding to the label (if in the eqnarray environment),
|
113
|
+
6. inserts the identified equation number next to the MathML equation,
|
114
|
+
7. and finally modifies the plain text for the HTML anchor.
|
115
|
+
|
116
|
+
Each of the inserted equation number next to the corresponding
|
117
|
+
equation is inside the +<mtext>+ tags. In +<mtable>+ (for LaTeX +\eqnarray{}+),
|
118
|
+
it is inserted as a new +<mtd>+ cell.
|
119
|
+
In both cases, the text is right-aligned with some padding to the
|
120
|
+
left. However, the position is relative to either the equation or the set of the
|
121
|
+
equations that contains the relevant equation (for LaTeX
|
122
|
+
+\eqnarray{}+) and is not like the original LaTeX, where equation
|
123
|
+
numbers inside a pair of parentheses are always located at the right
|
124
|
+
edge of a page in default.
|
125
|
+
|
126
|
+
== How to use this command
|
127
|
+
|
128
|
+
Once you have installed it according to the standard RubyGems procedure
|
129
|
+
(see section Install), the main Ruby executable (command) <tt>pandoc_refeq_mathml</tt>
|
130
|
+
should be in your command-search path.
|
131
|
+
|
132
|
+
It basically reads a MathML file from either the first command-line
|
133
|
+
argument or STDIN and also a LaTeX aux file specified in a
|
134
|
+
command-line, and then outputs the modified (corrected) MathML to STDOUT.
|
135
|
+
|
136
|
+
Any warnings are printed to either STDERR or a log-file specified in a
|
137
|
+
command-line as an option.
|
138
|
+
|
139
|
+
Failure in matching the labels from an HTML tag with any of the MathML
|
140
|
+
equations are printed as a warning (to STDERR in default).
|
141
|
+
Although it may genuinely mean the non-existent labels in the original
|
142
|
+
LaTeX source, it is far more likely that the labels belong to one of
|
143
|
+
the sections (or tables of figures), because the algorithm cannot tell
|
144
|
+
what the type (section, table, figure, or equation or else) of each label's origin is.
|
145
|
+
|
146
|
+
=== Help doc
|
147
|
+
|
148
|
+
The help doc for the command-line interface is displayed with +-h+ (or +--help+) option:
|
149
|
+
|
150
|
+
% pandoc_refeq_mathml -h
|
151
|
+
Usage: pandoc_refeq_mathml [options] [--] [MathML.html] > STDOUT
|
152
|
+
pandoc_refeq_mathml [options] [--] < STDIN > STDOUT
|
153
|
+
|
154
|
+
Description (Version=0.1):
|
155
|
+
This fixes issues, label-references of equations and eqnarray alignments, of pandoc-converted MathML from LaTeX.
|
156
|
+
|
157
|
+
Specific options:
|
158
|
+
-a, --aux [FILENAME] (mandatory) LaTeX aux filename
|
159
|
+
--log [FILENAME] Log filename (Default: STDERR). /dev/null to disable it.
|
160
|
+
--[no-]fixalign Fix eqnarray-alignment problems? (Def: true)
|
161
|
+
-v, --[no-]verbose Run verbosely (Def: true)
|
162
|
+
|
163
|
+
Common options:
|
164
|
+
-h, --help Show this message
|
165
|
+
--version Show version
|
166
|
+
|
167
|
+
=== Examples
|
168
|
+
|
169
|
+
% pandoc_refeq_mathml --aux=mydoc.aux --log=error.log mydoc.html > revised1.html
|
170
|
+
% head -n 90 mydoc.html | pandoc_refeq_mathml --aux=mydoc.aux --no-fixalign > revised2.html
|
171
|
+
|
172
|
+
== Install
|
173
|
+
|
174
|
+
Standard Ruby-gem install procedure is suffice
|
175
|
+
|
176
|
+
% gem install pandoc_refeq_mathml
|
177
|
+
|
178
|
+
which should also install the dependant
|
179
|
+
{Nokogiri gem}[https://rubygems.org/gems/nokogiri/].
|
180
|
+
|
181
|
+
Alternatively, it is possible to download the library file
|
182
|
+
<tt>lib/pandoc_refeq_mathml.rb</tt> somewhere in your local directory,
|
183
|
+
set the environmental variable <tt>RUBYLIB</tt> to also point to the
|
184
|
+
directory for the library, and execute
|
185
|
+
|
186
|
+
% ruby bin/pandoc_refeq_mathml
|
187
|
+
|
188
|
+
where <tt>ruby</tt> is optional. Note that {Nokogiri gem}[https://rubygems.org/gems/nokogiri/]
|
189
|
+
must be available in your RUBY library path.
|
190
|
+
|
191
|
+
In the developer's environment {diff-lcs gem}[https://rubygems.org/gems/diff-lcs] is also required.
|
192
|
+
|
193
|
+
This tool requires {Ruby}[http://www.ruby-lang.org] Version 2.0
|
194
|
+
or above.
|
195
|
+
|
196
|
+
== Developer's note
|
197
|
+
|
198
|
+
The source code is maintained also in
|
199
|
+
{Github}[https://github.com/masasakano/pandoc_refeq_mathml]
|
200
|
+
with no intuitive interface for annotations.
|
201
|
+
|
202
|
+
=== Tests
|
203
|
+
|
204
|
+
The Ruby codes under the directory <tt>test/</tt> are the test scripts.
|
205
|
+
You can run them from the top directory as <tt>ruby test/test_****.rb</tt>
|
206
|
+
or simply run <tt>make test</tt> or <tt>rake test</tt>.
|
207
|
+
|
208
|
+
== Known bugs and ToDo items
|
209
|
+
|
210
|
+
* pandoc-generated HTMLs do not contain Table/Figure numbers in their +<caption>+, even
|
211
|
+
though each anchored text refers to the corresponding number, such
|
212
|
+
as, +see Table "2"+, where "2" is the anchor.
|
213
|
+
* In fact, pandoc-generated HTMLs do not generate +<figure>+ tags, let
|
214
|
+
alone +<figurecaption>+ for
|
215
|
+
the LaTeX figure environments that contain more than one figure
|
216
|
+
(with +\includegraphics+)...
|
217
|
+
|
218
|
+
|
219
|
+
== Copyright
|
220
|
+
|
221
|
+
Author:: Masa Sakano < info a_t wisebabel dot com >
|
222
|
+
Versions:: The versions of this package follow Semantic Versioning (2.0.0) http://semver.org/
|
223
|
+
License:: MIT
|
224
|
+
Warranty:: No warranty.
|
225
|
+
|
226
|
+
----------
|
227
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,195 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# coding: utf-8
|
3
|
+
|
4
|
+
require 'optparse'
|
5
|
+
require 'nokogiri'
|
6
|
+
|
7
|
+
require 'pandoc_refeq_mathml'
|
8
|
+
|
9
|
+
# Optparse-handling Template: <https://gist.github.com/masasakano/45ee0d737f2d33122e8ff44007693b40>
|
10
|
+
class OptparsePandocRefeqMathml
|
11
|
+
begin
|
12
|
+
ver = nil
|
13
|
+
gemfile = __dir__+'/../pandoc_refeq_mathml.gemspec'
|
14
|
+
open(gemfile){ |ioin|
|
15
|
+
while line=ioin.gets
|
16
|
+
if /^\s*[^.]+.version\s*=\s*(['"])([\d\._a-zA-Z\-]+)\1/ =~ line
|
17
|
+
ver = $2
|
18
|
+
break $2
|
19
|
+
end
|
20
|
+
end
|
21
|
+
}
|
22
|
+
rescue
|
23
|
+
warn "gemspec file (#{gemfile}) is not found or its format is unexpected." #if $DEBUG
|
24
|
+
ensure
|
25
|
+
# Reads Version from the gemspec file.
|
26
|
+
# Make sure the format of the version in the gemspec file is standard,
|
27
|
+
# using quotations (not like "%q@1.2.3@").
|
28
|
+
Version = (ver || "0.1") # Just in case, update this hard-coded fallback version number, too!
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
# Default values for the command-line options.
|
33
|
+
# If nil, it means they are mandatory AS DEFINED in "def parse(args)" below,
|
34
|
+
# except for the 2 arguments of "inplace" and "extension".
|
35
|
+
# The Hash keyword (and attribute) names should be identical to the command-line
|
36
|
+
# option names, except "inplace" and "extension" in this case as demonstrated
|
37
|
+
# (in *parse()* etc).
|
38
|
+
DEF_OPTS = {
|
39
|
+
aux: nil, # mandatory LaTeX aux filename
|
40
|
+
auxstr: "", # Contents (String) of the LaTeX aux (extracted from --aux option)
|
41
|
+
fixalign: true, # Fix eqnarray-originating alignment issues, if true
|
42
|
+
#fixref: true, # Fix equation-labels and refs (Def: true); this option is not implemented.
|
43
|
+
log: "", # Log filename, where warning messages are recorded. Default is "", meaning STDERR
|
44
|
+
verbose: true,
|
45
|
+
}
|
46
|
+
|
47
|
+
class ScriptOptions
|
48
|
+
|
49
|
+
# attr_accessor :aux, :auxstr, :verbose
|
50
|
+
DEF_OPTS.each_key do |ek|
|
51
|
+
attr_accessor ek
|
52
|
+
end
|
53
|
+
|
54
|
+
def initialize
|
55
|
+
DEF_OPTS.each_pair do |ek, ev|
|
56
|
+
self.public_send(ek.to_s+"=", ev)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def define_options(parser)
|
61
|
+
parser.banner = "Usage: pandoc_refeq_mathml [options] [--] [MathML.html] > STDOUT"
|
62
|
+
parser.separator " pandoc_refeq_mathml [options] [--] < STDIN > STDOUT"
|
63
|
+
|
64
|
+
descriptions = []
|
65
|
+
descriptions.push <<EOF
|
66
|
+
This fixes issues, label-references of equations and eqnarray alignments, of pandoc-converted MathML from LaTeX.
|
67
|
+
EOF
|
68
|
+
|
69
|
+
parser.separator ""
|
70
|
+
parser.separator "Description (Version=#{Version}):"
|
71
|
+
descriptions.each do |et|
|
72
|
+
parser.separator et.gsub(/\s*\n\*/, " ")
|
73
|
+
parser.separator ""
|
74
|
+
end
|
75
|
+
parser.separator "Specific options:"
|
76
|
+
|
77
|
+
# add additional options
|
78
|
+
read_aux_option(parser) # mandatory
|
79
|
+
read_log_option(parser) # mandatory
|
80
|
+
fixalign_option(parser)
|
81
|
+
boolean_verbose_option(parser)
|
82
|
+
|
83
|
+
parser.separator ""
|
84
|
+
parser.separator "Common options:"
|
85
|
+
# No argument, shows at tail. This will print an options summary.
|
86
|
+
parser.on_tail("-h", "--help", "Show this message") do
|
87
|
+
puts parser
|
88
|
+
exit
|
89
|
+
end
|
90
|
+
# Another typical switch to print the version.
|
91
|
+
parser.on_tail("--version", "Show version") do
|
92
|
+
puts Version
|
93
|
+
exit
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# Specifies a LaTeX aux filename
|
98
|
+
def read_aux_option(parser) # mandatory option
|
99
|
+
parser.on("-a", "--aux [FILENAME]", # Do not change this into "--aux FILENAME".
|
100
|
+
"(mandatory) LaTeX aux filename") do |fname|
|
101
|
+
raise OptionParser::MissingArgument if !fname # This is necessary as fname is mandatory!
|
102
|
+
self.aux = fname
|
103
|
+
self.auxstr = File.read fname # may raise an Exception!
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
# Specifies a log file
|
108
|
+
#
|
109
|
+
# if self.log.empty? is true, it should be treated as STDERR later.
|
110
|
+
def read_log_option(parser)
|
111
|
+
parser.on("--log [FILENAME]", # Do not change this into "--aux FILENAME".
|
112
|
+
"Log filename (Default: STDERR). /dev/null to disable it.") do |fname|
|
113
|
+
raise OptionParser::MissingArgument if !fname # This is necessary as fname is mandatory!
|
114
|
+
self.log = ((fname == "STDERR") ? "" : fname)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
# Fix alignment-problems? (Boolean)
|
119
|
+
def fixalign_option(parser)
|
120
|
+
parser.on("--[no-]fixalign", "Fix eqnarray-alignment problems? (Def: #{self.fixalign.inspect})") do |v|
|
121
|
+
self.fixalign = v
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
# Boolean switch.
|
126
|
+
def boolean_verbose_option(parser)
|
127
|
+
parser.on("-v", "--[no-]verbose", "Run verbosely (Def: #{self.verbose.inspect})") do |v|
|
128
|
+
self.verbose = v
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end # class ScriptOptions
|
132
|
+
|
133
|
+
#
|
134
|
+
# Return a structure describing the options.
|
135
|
+
#
|
136
|
+
def parse(args)
|
137
|
+
# The options specified on the command line will be collected in
|
138
|
+
# *options*.
|
139
|
+
#
|
140
|
+
# NOTE: if undefined options are specified, OptionParser::InvalidOption is raised.
|
141
|
+
|
142
|
+
@options = ScriptOptions.new
|
143
|
+
@args = OptionParser.new do |parser|
|
144
|
+
@options.define_options(parser)
|
145
|
+
begin
|
146
|
+
parser.parse!(args)
|
147
|
+
|
148
|
+
# Error if mandatory options are not specified.
|
149
|
+
DEF_OPTS.keys.each do |ek| # 2 arguments are exceptions
|
150
|
+
if @options.public_send(ek).nil? # *.nil? is used b/c "false" must be accepted.
|
151
|
+
# Assuming the Hash keyword (and attribute) name is identical to the option name!
|
152
|
+
warn "ERROR: Keyword argument --#{ek.to_s} is mandatory."
|
153
|
+
exit 1
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
rescue OptionParser::MissingArgument => err
|
158
|
+
# Missing argument for optional arguments.
|
159
|
+
warn sprintf "ERROR: %s: %s", err.reason, err.args.join(" ")
|
160
|
+
exit 1
|
161
|
+
|
162
|
+
rescue OptionParser::ParseError => err
|
163
|
+
# Other types of argument-handling errors
|
164
|
+
raise
|
165
|
+
end
|
166
|
+
end
|
167
|
+
@options
|
168
|
+
end
|
169
|
+
|
170
|
+
attr_reader :parser, :options
|
171
|
+
end # class OptparsePandocRefeqMathml
|
172
|
+
|
173
|
+
####################################
|
174
|
+
# MAIN
|
175
|
+
####################################
|
176
|
+
|
177
|
+
if $0 == __FILE__
|
178
|
+
cmdarg = OptparsePandocRefeqMathml.new
|
179
|
+
cmdopts = cmdarg.parse(ARGV) # Optional command-line options, apart from ARGV
|
180
|
+
# cmdopts.auxstr : String of *.aux
|
181
|
+
|
182
|
+
logio = (cmdopts.log.empty? ? $stderr : open(cmdopts.log, "w"))
|
183
|
+
logio.sync = true
|
184
|
+
|
185
|
+
begin
|
186
|
+
apmre = PandocRefeqMathml.new( Nokogiri::HTML(ARGF.read), cmdopts.auxstr, logio: logio, is_verbose: cmdopts.verbose)
|
187
|
+
apmre.alter_html!(fixalign: cmdopts.fixalign)
|
188
|
+
|
189
|
+
# outputs the alterned HTML to STDOUT
|
190
|
+
puts apmre.page.to_s
|
191
|
+
ensure
|
192
|
+
logio.close if logio != $stderr
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
@@ -0,0 +1,189 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require 'nokogiri'
|
4
|
+
|
5
|
+
# Class to handle MathML and LaTeX aux
|
6
|
+
#
|
7
|
+
class PandocRefeqMathml
|
8
|
+
|
9
|
+
attr_reader :page, :aux
|
10
|
+
|
11
|
+
# Alignment for LaTeX eqnarray.
|
12
|
+
EQNARRAY_ALIGNS = %w(right center left)
|
13
|
+
|
14
|
+
# @param page [Nokogiri::HTML4::Document]
|
15
|
+
# @param auxstr [String] String of the contents of a LaTeX aux file
|
16
|
+
# @param logio [IO] Output IO for logs. You may give +IO.open("/dev/null", "w")+. Def: $stderr
|
17
|
+
# @param is_verbose [Boolean] If true (Default), verbose.
|
18
|
+
def initialize(page, auxstr, logio: $stderr, is_verbose: true)
|
19
|
+
@page = page
|
20
|
+
@auxstr = auxstr
|
21
|
+
@logio = logio
|
22
|
+
@is_verbose = is_verbose
|
23
|
+
@hslabel = {} # String(label) => String(EqNumber), which have been detected.
|
24
|
+
end
|
25
|
+
|
26
|
+
# @param taga [Nokogiri::HTML4::Document] Nokogiri <A> object.
|
27
|
+
# @return [String, NilClass] Label string used in +<a href=..>+. Nil if something is wrong.
|
28
|
+
def get_label(taga)
|
29
|
+
kwdlink = taga['href'].split('#')[1]
|
30
|
+
return kwdlink if kwdlink == taga['data-reference']
|
31
|
+
|
32
|
+
@logio.puts "WARNING: Inconsistent href and data-reference: "+a.to_s if @is_verbose
|
33
|
+
return nil
|
34
|
+
end
|
35
|
+
|
36
|
+
# Return the equation number.
|
37
|
+
#
|
38
|
+
# The number is assumed to contain only numbers and maybe full-stops +[\d.]+.
|
39
|
+
#
|
40
|
+
# @return [String, NilClass] Equation number guessed from the Aux file. nil if something goes wrong.
|
41
|
+
def get_eq_num(kwdlink)
|
42
|
+
mat = /^\\newlabel\{#{kwdlink}\}\{\{([\d\.]+)\}\{(\d+)\}\{(.*)\}\{equation.([a-zA-Z\d\.]+)\}/.match @auxstr # it may be like equation.B.193 (for Appendix B).
|
43
|
+
# => #<MatchData "\\newlabel{eq_my_lab}{{65}{35}{割り算}{equation.4.62}" 1:"65" 2:"35" 3:"割り算" 4:"4.62">
|
44
|
+
return mat[1] if mat && mat[1] && !mat[1].empty?
|
45
|
+
|
46
|
+
## Something is wrong.
|
47
|
+
str = sprintf 'WARNING: Not found equation number for label="%s" (maybe it is for a section etc?): MatchData=%s', kwdlink, mat.inspect
|
48
|
+
@logio.puts str
|
49
|
+
return nil
|
50
|
+
end
|
51
|
+
|
52
|
+
# @param kwdlink [String] label
|
53
|
+
# @param n_eq [String] Equation number like "58", maybe "52.3"
|
54
|
+
# @return [Integer, NilClass] If something goes wrong,
|
55
|
+
def find_insert_n_eq(kwdlink, n_eq)
|
56
|
+
# Select the <math> tag component that hs the kwdlink
|
57
|
+
maths = @page.css('math').select{|ep|
|
58
|
+
/\\label\{\s*#{Regexp.quote(kwdlink)}\s*\}/ =~ (ep.css('annotation[encoding="application/x-tex"]').children[0].text.strip rescue "X")
|
59
|
+
}
|
60
|
+
if maths.size != 1
|
61
|
+
if maths.size == 0
|
62
|
+
@logio.puts 'WARNING: no math tag contains label="#{kwdlink}"'
|
63
|
+
else
|
64
|
+
@logio.print 'WARNING: Multiple math tags contain label="#{kwdlink}"'
|
65
|
+
@logio.puts (@is_verbose ? ": maths="+maths.inspect : "")
|
66
|
+
end
|
67
|
+
return nil
|
68
|
+
end
|
69
|
+
|
70
|
+
mtext = sprintf '<mtext id="%s" style="padding-left:1em; text-align:right;">(%s)</mtext>', kwdlink, n_eq
|
71
|
+
|
72
|
+
if maths[0].css('mtable').empty?
|
73
|
+
# \begin{equation}
|
74
|
+
#
|
75
|
+
# Insert the new node (<mrow><mtext...>(65)</mtext></mrow>) as the last child of
|
76
|
+
# the last top-level existing <mrow>; if it is added AFTER the <mrow>
|
77
|
+
# the <mtext> number would not be displayed!
|
78
|
+
newnode = '<mrow>' + mtext + '</mrow>'
|
79
|
+
begin
|
80
|
+
maths[0].css("mrow")[0].parent.css("> mrow")[-1].add_child(newnode)
|
81
|
+
# Between the last <mrow> and <annotation>
|
82
|
+
# n.b., css('mrow')[-1] would give an mrow inside another mrow!
|
83
|
+
rescue
|
84
|
+
msg = "FATAL: contact the code developer: equation: maths[0]="+maths[0].inspect
|
85
|
+
@logio.puts msg
|
86
|
+
raise msg
|
87
|
+
end
|
88
|
+
return 0
|
89
|
+
else
|
90
|
+
# \begin{eqnarray}
|
91
|
+
newnode = '<mtd columnalign="right">' + mtext + '</mtd>'
|
92
|
+
annot_node= maths[0].css('annotation[encoding="application/x-tex"]')[0]
|
93
|
+
i_eq_annot = annot_node.children[0].text.split(/\\\\\s*(?:\%[^\n]*)?\n?/).find_index{|ev| /\\label\{#{kwdlink}\}/ =~ ev} # Index of the equation (starting from 0) in the eqnarray
|
94
|
+
raise "FATAL: contact the code developer: eqnarray: "+annot_node.inspect if !i_eq_annot
|
95
|
+
|
96
|
+
# Insert the new node (<mtd><mtext...>(65)</mtext></mtd>)
|
97
|
+
mtrnode = maths[0].css('mtable mtr')[i_eq_annot]
|
98
|
+
# mtrnode.css('mtd')[-1].add_next_sibling(newnode) # not considering multi-layer tables
|
99
|
+
find_last_shallowest(mtrnode, 'mtd').add_next_sibling(newnode)
|
100
|
+
end
|
101
|
+
return i_eq_annot+1
|
102
|
+
end
|
103
|
+
|
104
|
+
# @param kwdlink [String] label
|
105
|
+
# @param taga [Nokogiri::HTML4::Document] Nokogiri <A> object.
|
106
|
+
# @param n_eq_str [String] equation number string
|
107
|
+
# @return [void]
|
108
|
+
def alter_link_text(kwdlink, taga, n_eq_str)
|
109
|
+
textnode = taga.children[0]
|
110
|
+
if !textnode.text?
|
111
|
+
@logio.puts "WARNING: Inconsistent text inside href: "+taga.to_s
|
112
|
+
return nil
|
113
|
+
end
|
114
|
+
|
115
|
+
if /\A\[?#{Regexp.quote(kwdlink)}\]?\z/ !~ textnode.to_s.strip
|
116
|
+
@logio.puts "WARNING: Strange linked-text=(#{textnode.to_s}) inside <a>: "+a.to_s if @is_verbose
|
117
|
+
end
|
118
|
+
|
119
|
+
taga.content=n_eq_str
|
120
|
+
end
|
121
|
+
|
122
|
+
# Alter the alignments of mtable originating from eqnarray
|
123
|
+
#
|
124
|
+
# Original is all right-aligned.
|
125
|
+
# After alteration, it will be right, center, left (which is the specification of eqnarray).
|
126
|
+
def alter_align_eqnarray!
|
127
|
+
@page.css("math mtable mtr").each do |ea_mtr|
|
128
|
+
ea_mtr.css("mtd").each_with_index do |ea_mtd, i|
|
129
|
+
break if i >= EQNARRAY_ALIGNS.size
|
130
|
+
ea_mtd["columnalign"]=EQNARRAY_ALIGNS[i]
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
# Alter the existing HTML Nokogiri content
|
136
|
+
def alter_reflinks!
|
137
|
+
all_ref_href = @page.css("a[data-reference-type=ref]")
|
138
|
+
all_ref_href.each do |ea_nodes|
|
139
|
+
# Gets a label from MathML
|
140
|
+
(kwdlink = get_label(ea_nodes)) || next
|
141
|
+
|
142
|
+
if !@hslabel.keys.include? kwdlink
|
143
|
+
# Gets the number of the equation from Aux
|
144
|
+
n_eq = get_eq_num(kwdlink)
|
145
|
+
next if !n_eq
|
146
|
+
@hslabel[kwdlink] = n_eq
|
147
|
+
|
148
|
+
# Finds the equation in MathML and adds the number of the equation.
|
149
|
+
find_insert_n_eq(kwdlink, n_eq) # this returns nil if something goes wrong
|
150
|
+
elsif !@hslabel[kwdlink]
|
151
|
+
# the label "kwdlink" has been detected, but no Equation-number was found.
|
152
|
+
next
|
153
|
+
end
|
154
|
+
|
155
|
+
# Alter the original link text in MathML to Equation number.
|
156
|
+
alter_link_text(kwdlink, ea_nodes, @hslabel[kwdlink])
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
# @param fixalign [Boolean] fix alignments if true
|
161
|
+
def alter_html!(fixalign: true)
|
162
|
+
alter_align_eqnarray! if fixalign
|
163
|
+
alter_reflinks!
|
164
|
+
end
|
165
|
+
|
166
|
+
# Returns the last shallowest node with the given tag-name
|
167
|
+
#
|
168
|
+
# @see https://stackoverflow.com/a/73459162/3577922
|
169
|
+
#
|
170
|
+
# @param root [NokogiriXmlNode] root node
|
171
|
+
# @param tagname [String] Tag-name like "mrow" for which the last-shallowest is looked
|
172
|
+
def find_last_shallowest(root, tagname)
|
173
|
+
queue = [root]
|
174
|
+
while queue.any?
|
175
|
+
element = queue.shift
|
176
|
+
return element if node_matching?(element, tagname)
|
177
|
+
queue.concat element.children.reverse
|
178
|
+
end
|
179
|
+
end
|
180
|
+
private :find_last_shallowest
|
181
|
+
|
182
|
+
# @param element [NokogiriXmlNode] including Nokogiri::XML::Element
|
183
|
+
def node_matching?(element, tagname)
|
184
|
+
# Put your matching logic here
|
185
|
+
element.name == tagname
|
186
|
+
end
|
187
|
+
private :node_matching?
|
188
|
+
end # PandocRefeqMathml
|
189
|
+
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
require 'rake'
|
4
|
+
require 'date'
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = 'pandoc_refeq_mathml'.sub(/.*/){|c| (c == File.basename(Dir.pwd)) ? c : raise("ERROR: s.name=(#{c}) in gemspec seems wrong!")}
|
8
|
+
s.version = "0.1".sub(/.*/){|c| fs = Dir.glob('changelog{,.*}', File::FNM_CASEFOLD); raise('More than one ChangeLog exist!') if fs.size > 1; warn("WARNING: Version(s.version=#{c}) already exists in #{fs[0]} - ok?") if fs.size == 1 && !IO.readlines(fs[0]).grep(/^\(Version: #{Regexp.quote c}\)$/).empty? ; c } # n.b., In macOS, changelog and ChangeLog are identical in default.
|
9
|
+
# s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
10
|
+
# s.bindir = 'bin'
|
11
|
+
# %w(slim_string).each do |f|
|
12
|
+
# path = s.bindir+'/'+f
|
13
|
+
# File.executable?(path) ? s.executables << f : raise("ERROR: Executable (#{path}) is not executable!")
|
14
|
+
# end
|
15
|
+
s.authors = ["Masa Sakano"]
|
16
|
+
s.date = %q{2022-08-26}.sub(/.*/){|c| (Date.parse(c) == Date.today) ? c : raise("ERROR: s.date=(#{c}) is not today!")}
|
17
|
+
s.summary = %q{Add equation numbers to a pandoc-output MathML converted from LaTeX}
|
18
|
+
s.description = <<-EOF
|
19
|
+
Add equation numbers in a crude way to a pandoc-output MathML converted from LaTeX, utilising its LaTeX aux file, and also adjust math-table alignments.
|
20
|
+
EOF
|
21
|
+
# s.email = %q{abc@example.com}
|
22
|
+
s.extra_rdoc_files = [
|
23
|
+
#"LICENSE.txt",
|
24
|
+
"README.en.rdoc",
|
25
|
+
]
|
26
|
+
s.license = 'MIT'
|
27
|
+
s.files = FileList['.gitignore','lib/**/*.rb','[A-Z]*','test/**/*.rb', '*.gemspec', 'bin/pandoc_refeq_mathml'].to_a.delete_if{ |f|
|
28
|
+
ret = false
|
29
|
+
arignore = IO.readlines('.gitignore')
|
30
|
+
arignore.map{|i| i.chomp}.each do |suffix|
|
31
|
+
if File.fnmatch(suffix, File.basename(f))
|
32
|
+
ret = true
|
33
|
+
break
|
34
|
+
end
|
35
|
+
end
|
36
|
+
ret
|
37
|
+
}
|
38
|
+
s.files.reject! { |fn| File.symlink? fn }
|
39
|
+
|
40
|
+
s.add_runtime_dependency 'nokogiri', '>= 1.13'
|
41
|
+
s.add_development_dependency 'diff-lcs', '>= 1.5'
|
42
|
+
|
43
|
+
s.homepage = "https://www.wisebabel.com"
|
44
|
+
# s.rdoc_options = ["--charset=UTF-8"] # "-e UTF-8" is now Default...
|
45
|
+
|
46
|
+
# s.require_paths = ["lib"] # Default "lib"
|
47
|
+
s.required_ruby_version = '>= 2.0' # respond_to_missing?
|
48
|
+
s.test_files = Dir['test/**/*.rb']
|
49
|
+
s.test_files.reject! { |fn| File.symlink? fn }
|
50
|
+
# s.requirements << 'libmagick, v6.0' # Simply, info to users.
|
51
|
+
# s.rubygems_version = %q{1.3.5} # This is always set automatically!!
|
52
|
+
|
53
|
+
## cf. https://guides.rubygems.org/specification-reference/#metadata
|
54
|
+
s.metadata["yard.run"] = "yri" # use "yard" to build full HTML docs.
|
55
|
+
# s.metadata["changelog_uri"] = "https://github.com/masasakano/slim_string/blob/master/ChangeLog"
|
56
|
+
# s.metadata["source_code_uri"] = "https://github.com/masasakano/slim_string"
|
57
|
+
# s.metadata["documentation_uri"] = "https://www.example.info/gems/bestgemever/0.0.1"
|
58
|
+
end
|
59
|
+
|
@@ -0,0 +1,188 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
# Usage(at-Parent-directory): RUBYLIB=$RUBYLIB:./lib ruby test/test_pandoc_refeq_mathml.rb
|
4
|
+
|
5
|
+
require 'open3'
|
6
|
+
require 'tempfile'
|
7
|
+
require 'diff/lcs'
|
8
|
+
require 'diff/lcs/string'
|
9
|
+
|
10
|
+
$stdout.sync=true
|
11
|
+
$stderr.sync=true
|
12
|
+
# print '$LOAD_PATH=';p $LOAD_PATH
|
13
|
+
arlibrelpath = []
|
14
|
+
arlibbase = %w(pandoc_refeq_mathml) # Write those that are loaded inside a library to make their absolute paths be displayed.
|
15
|
+
|
16
|
+
arlibbase.each do |elibbase|
|
17
|
+
arAllPaths = []
|
18
|
+
er=nil
|
19
|
+
pathnow = nil
|
20
|
+
(['../lib/', 'lib/', ''].map{|i| i+elibbase+'/'} + ['']).each do |dir|
|
21
|
+
begin
|
22
|
+
s = dir+File.basename(elibbase)
|
23
|
+
arAllPaths.push(s)
|
24
|
+
require s
|
25
|
+
pathnow = s
|
26
|
+
break
|
27
|
+
rescue LoadError => er
|
28
|
+
end
|
29
|
+
end # (['../lib/', 'lib/', ''].map{|i| i+elibbase+'/'} + '').each do |dir|
|
30
|
+
|
31
|
+
if pathnow.nil?
|
32
|
+
warn "Warning: All the attempts to load the following files have failed. Abort..."
|
33
|
+
warn arAllPaths.inspect
|
34
|
+
warn " NOTE: It may be because a require statement in that file failed,
|
35
|
+
rather than requiring the file itself.
|
36
|
+
Check with % ruby -r#{File.basename(elibbase)} -e p
|
37
|
+
or maybe add env RUBYLIB=$RUBYLIB:`pwd`"
|
38
|
+
# p $LOADED_FEATURES.grep(/#{Regexp.quote(File.basename(elibbase)+'.rb')}$/)
|
39
|
+
raise er
|
40
|
+
else
|
41
|
+
#print pathnow," is loaded!\n"
|
42
|
+
arlibrelpath.push pathnow
|
43
|
+
end
|
44
|
+
end # arlibbase.each do |elibbase|
|
45
|
+
|
46
|
+
print "NOTE: Library relative paths: "; p arlibrelpath
|
47
|
+
print "NOTE: Library full paths:\n"
|
48
|
+
arlibbase.each do |elibbase|
|
49
|
+
p $LOADED_FEATURES.grep(/#{Regexp.quote(File.basename(elibbase)+'.rb')}$/)
|
50
|
+
end
|
51
|
+
|
52
|
+
|
53
|
+
|
54
|
+
#################################################
|
55
|
+
# Unit Test
|
56
|
+
#################################################
|
57
|
+
|
58
|
+
gem "minitest"
|
59
|
+
# require 'minitest/unit'
|
60
|
+
require 'minitest/autorun'
|
61
|
+
# MiniTest::Unit.autorun
|
62
|
+
|
63
|
+
class TestUnitPandocRefeqMathml < MiniTest::Test
|
64
|
+
T = true
|
65
|
+
F = false
|
66
|
+
|
67
|
+
def setup
|
68
|
+
@exefile = __dir__ + "/../bin/pandoc_refeq_mathml"
|
69
|
+
@auxfile = __dir__ + "/data/try01_latex.aux"
|
70
|
+
@htmlfile = __dir__ + "/data/try01.html"
|
71
|
+
|
72
|
+
# Array of IOs for temporary files (automatically set in generate_tmpfile())
|
73
|
+
@tmpfiles = []
|
74
|
+
end
|
75
|
+
|
76
|
+
def teardown
|
77
|
+
@tmpfiles.each do |ef|
|
78
|
+
ef.close if !ef.closed?
|
79
|
+
File.unlink(ef.path)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
# @option root [#to_s] Root-name of the temporary filename
|
84
|
+
def generate_tmpfile(root=File.basename($0))
|
85
|
+
io_tmpfile = Tempfile.open(root.to_s)
|
86
|
+
$stderr.print "TEST: Tmpfile="+io_tmpfile.path if ENV.key?('PRINT_TMPFILE') # To display Filename (NOTE the file will be removed when the script ends anyway.)
|
87
|
+
@tmpfiles.push io_tmpfile
|
88
|
+
[io_tmpfile, io_tmpfile.path]
|
89
|
+
end
|
90
|
+
|
91
|
+
def test_pandoc_refeq_mathml
|
92
|
+
auxstr = File.read @auxfile
|
93
|
+
htmlstr = File.read @htmlfile
|
94
|
+
page00 = Nokogiri::HTML(htmlstr)
|
95
|
+
page = Nokogiri::HTML(htmlstr)
|
96
|
+
io_tmp, path_tmp = generate_tmpfile(__method__)
|
97
|
+
|
98
|
+
prm = PandocRefeqMathml.new page, auxstr, logio: io_tmp, is_verbose: true
|
99
|
+
prm.alter_html!
|
100
|
+
|
101
|
+
# an Equation (LaTeX: \begin{equation})
|
102
|
+
math1_org = page00.css("math:first-of-type")[0]
|
103
|
+
math1_rev = prm.page.css("math:first-of-type")[0]
|
104
|
+
lcs = math1_org.to_s.diff(math1_rev.to_s)
|
105
|
+
assert_equal 1, lcs.size, 'Diff-size should be 1 (one continuous addition only)'
|
106
|
+
assert_operator 90, '<', lcs[0].size, 'Number of different characters should be larger than 90'
|
107
|
+
assert_operator 99, '>', lcs[0].size, 'Number of different characters should be smaller than 99: Diff='+mk_str_diff_chg(lcs).inspect # "mrow><mtext id=\"square_pm\" style=\"padding-left:1em; text-align:right;\">(36)</mtext></mrow><"
|
108
|
+
assert((%r@</mtext>@ !~ math1_org.to_s), '</mtext> should not be included')
|
109
|
+
assert_match(%r@</mtext></mrow></mrow>@, math1_rev.to_s, '</mtext></mrow></mrow> should be included')
|
110
|
+
|
111
|
+
mtds = prm.page.css("math mtable mtr")[2].css("mtd")
|
112
|
+
assert_equal "right", mtds[0]["columnalign"]
|
113
|
+
assert_equal "center", mtds[1]["columnalign"], "align should be center: "+mtds[1]
|
114
|
+
assert_equal "left", mtds[2]["columnalign"]
|
115
|
+
# NOTE: --no-fixalign is tested in test_integrated()
|
116
|
+
|
117
|
+
io_tmp.rewind
|
118
|
+
msg_log = io_tmp.read
|
119
|
+
assert_match(%r@label=.?sec_@, msg_log, "Warning message should be present in the log file because Equation-ID is not found for a label for a Section: \n> "+msg_log)
|
120
|
+
end
|
121
|
+
|
122
|
+
# Integrated tests
|
123
|
+
def test_integrated
|
124
|
+
#com = sprintf "%s --aux=%s --log=%s ", @exefile, @auxfile, @logfilename
|
125
|
+
com = sprintf "%s --aux=%s --no-fixalign", @exefile, @auxfile # Logfile => STDERR, fixalign=no
|
126
|
+
|
127
|
+
## From STDIN, out to STDOUT, log-file to STDERR
|
128
|
+
out, err, stat = Open3.capture3(com, stdin_data: File.read(@htmlfile))
|
129
|
+
assert_equal 0, stat
|
130
|
+
assert_match(%r@label=.?sec_@, err, "Warning message should be present in STDERR because Equation-ID is not found for a label for a Section: \n> "+err)
|
131
|
+
assert_operator 5, '<=', out.scan(%r@(?=</mtext>)@).count, 'There should be many </mtext>. out[0..100]='+out[0..100]
|
132
|
+
assert_match(%r@\bcolumnalign="right"@, out, 'Sanity check columnalign')
|
133
|
+
refute_match(%r@\bcolumnalign="center"@, out, 'With --no-fixalign center columnalign should not exist, but..')
|
134
|
+
end
|
135
|
+
|
136
|
+
# Read a 2-dim Array of Diff::LCS::Change and convert it to a single Array of them
|
137
|
+
#
|
138
|
+
# Each Diff may (or usually) have more than 1 character.
|
139
|
+
# And therefore, it should be far more readable for humans.
|
140
|
+
# Here is an example.
|
141
|
+
#
|
142
|
+
# # [#<Diff::LCS::Change: ["+", 1, "x"]>, #<Diff::LCS::Change: ["+", 2, "y"]>]
|
143
|
+
# # => <Diff::LCS::Change: ["+", 1, "xy"]>
|
144
|
+
#
|
145
|
+
# You can still patch it.
|
146
|
+
#
|
147
|
+
# s2 == s1.patch( [mk_str_diff_chg(Diff::LCS.diff(s1, s2))] )
|
148
|
+
#
|
149
|
+
# However, +s2.unpatch [mk_str_diff_chg(...)]+ raises RuntimeError.
|
150
|
+
# I think it works by starting from the beginning, swapping "`+`" and "`-`",
|
151
|
+
# where interpreting "`-`"+0 as inserting before pos=0 and "`+`"+1 as deleting after pos=1.
|
152
|
+
#
|
153
|
+
# @param arlcs [Array<Array<<Diff::LCS::Change>>]
|
154
|
+
# @return [Array<Diff::LCS::Change>]
|
155
|
+
def mk_str_diff_chg(ar2lcs)
|
156
|
+
arlcs = [] # ar2lcs.flatten actually also flattens the contents of Diff::LCS::Change !
|
157
|
+
# Therefore, this is a custom Array#flatten
|
158
|
+
ar2lcs.each do |ea1|
|
159
|
+
ea1.each do |ea2|
|
160
|
+
arlcs.push ea2
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
return arlcs if arlcs.empty?
|
165
|
+
|
166
|
+
pos = pos_ini = arlcs[0].position - 99
|
167
|
+
strdiff = nil
|
168
|
+
action_now = nil
|
169
|
+
|
170
|
+
arret = []
|
171
|
+
arlcs.each do |ed| # ed: EachDiff
|
172
|
+
if (pos != ed.position - 1) || (ed.action != action_now)
|
173
|
+
# The previous series has ended.
|
174
|
+
arret.push Diff::LCS::Change.new(action_now, pos_ini, strdiff) if action_now # unless the very first one
|
175
|
+
pos = pos_ini = ed.position
|
176
|
+
strdiff = ed.element.dup
|
177
|
+
action_now = ed.action
|
178
|
+
next
|
179
|
+
end
|
180
|
+
|
181
|
+
pos = ed.position
|
182
|
+
strdiff << ed.element
|
183
|
+
end
|
184
|
+
arret.push Diff::LCS::Change.new(action_now, pos_ini, strdiff)
|
185
|
+
arret
|
186
|
+
end
|
187
|
+
end # class TestUnitPandocRefeqMathml < MiniTest::Test
|
188
|
+
|
metadata
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: pandoc_refeq_mathml
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: '0.1'
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Masa Sakano
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2022-08-26 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokogiri
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.13'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.13'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: diff-lcs
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.5'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.5'
|
41
|
+
description: 'Add equation numbers in a crude way to a pandoc-output MathML converted
|
42
|
+
from LaTeX, utilising its LaTeX aux file, and also adjust math-table alignments.
|
43
|
+
|
44
|
+
'
|
45
|
+
email:
|
46
|
+
executables: []
|
47
|
+
extensions: []
|
48
|
+
extra_rdoc_files:
|
49
|
+
- README.en.rdoc
|
50
|
+
files:
|
51
|
+
- ".gitignore"
|
52
|
+
- ChangeLog
|
53
|
+
- LICENSE.txt
|
54
|
+
- Makefile
|
55
|
+
- README.en.rdoc
|
56
|
+
- Rakefile
|
57
|
+
- bin/pandoc_refeq_mathml
|
58
|
+
- lib/pandoc_refeq_mathml.rb
|
59
|
+
- pandoc_refeq_mathml.gemspec
|
60
|
+
- test/test_pandoc_refeq_mathml.rb
|
61
|
+
homepage: https://www.wisebabel.com
|
62
|
+
licenses:
|
63
|
+
- MIT
|
64
|
+
metadata:
|
65
|
+
yard.run: yri
|
66
|
+
post_install_message:
|
67
|
+
rdoc_options: []
|
68
|
+
require_paths:
|
69
|
+
- lib
|
70
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '2.0'
|
75
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '0'
|
80
|
+
requirements: []
|
81
|
+
rubygems_version: 3.3.7
|
82
|
+
signing_key:
|
83
|
+
specification_version: 4
|
84
|
+
summary: Add equation numbers to a pandoc-output MathML converted from LaTeX
|
85
|
+
test_files:
|
86
|
+
- test/test_pandoc_refeq_mathml.rb
|