mathtype_to_mathml 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +14 -0
  3. data/.rspec +2 -0
  4. data/.ruby-version +1 -0
  5. data/.travis.yml +3 -0
  6. data/Gemfile +6 -0
  7. data/LICENSE.txt +22 -0
  8. data/README.md +45 -0
  9. data/Rakefile +7 -0
  10. data/lib/mathtype_to_mathml.rb +28 -0
  11. data/lib/mathtype_to_mathml/char_replacer.rb +311 -0
  12. data/lib/mathtype_to_mathml/mover.rb +151 -0
  13. data/lib/mathtype_to_mathml/version.rb +3 -0
  14. data/lib/transform.xsl +104 -0
  15. data/lib/xsl/arrow.xsl +319 -0
  16. data/lib/xsl/brace.xsl +55 -0
  17. data/lib/xsl/char.xsl +35 -0
  18. data/lib/xsl/embellishment.xsl +389 -0
  19. data/lib/xsl/matrix.xsl +116 -0
  20. data/lib/xsl/pile.xsl +54 -0
  21. data/lib/xsl/subsup.xsl +55 -0
  22. data/lib/xsl/sum.xsl +57 -0
  23. data/lib/xsl/union_intersection.xsl +104 -0
  24. data/mathtype_to_mathml.gemspec +28 -0
  25. data/spec/fixtures/expected/arrows.xml +389 -0
  26. data/spec/fixtures/expected/embellishments.xml +178 -0
  27. data/spec/fixtures/expected/equation1.xml +52 -0
  28. data/spec/fixtures/expected/equation10.xml +19 -0
  29. data/spec/fixtures/expected/equation11.xml +17 -0
  30. data/spec/fixtures/expected/equation12.xml +34 -0
  31. data/spec/fixtures/expected/equation13.xml +113 -0
  32. data/spec/fixtures/expected/equation2.xml +33 -0
  33. data/spec/fixtures/expected/equation3.xml +324 -0
  34. data/spec/fixtures/expected/equation4.xml +14 -0
  35. data/spec/fixtures/expected/equation5.xml +23 -0
  36. data/spec/fixtures/expected/equation6.xml +13 -0
  37. data/spec/fixtures/expected/equation7.xml +19 -0
  38. data/spec/fixtures/expected/equation8.xml +17 -0
  39. data/spec/fixtures/expected/equation9.xml +15 -0
  40. data/spec/fixtures/input/arrows.bin +0 -0
  41. data/spec/fixtures/input/embellishments.bin +0 -0
  42. data/spec/fixtures/input/equation1.bin +0 -0
  43. data/spec/fixtures/input/equation10.bin +0 -0
  44. data/spec/fixtures/input/equation11.bin +0 -0
  45. data/spec/fixtures/input/equation12.bin +0 -0
  46. data/spec/fixtures/input/equation13.bin +0 -0
  47. data/spec/fixtures/input/equation2.bin +0 -0
  48. data/spec/fixtures/input/equation3.bin +0 -0
  49. data/spec/fixtures/input/equation4.bin +0 -0
  50. data/spec/fixtures/input/equation5.bin +0 -0
  51. data/spec/fixtures/input/equation6.bin +0 -0
  52. data/spec/fixtures/input/equation7.bin +0 -0
  53. data/spec/fixtures/input/equation8.bin +0 -0
  54. data/spec/fixtures/input/equation9.bin +0 -0
  55. data/spec/html_output.rb +28 -0
  56. data/spec/mathtype_to_mathml_spec.rb +19 -0
  57. data/spec/spec_helper.rb +2 -0
  58. metadata +220 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: c01e41b7c35f3c71aa5cc88424320297a1d5366d
4
+ data.tar.gz: bd36ef37f1900eca7617503bcd6c84ab79ad7755
5
+ SHA512:
6
+ metadata.gz: 4d116d2674ee17634a20e1dbc02f9a92e188ab30940d6f5ffa8cf8f99863d0d857351d57474020f4b7c41bd1d422785683ec2f0a66fe683585a9145581933567
7
+ data.tar.gz: d9668ff0ccc786b75833175b953d086d881d0ab9230ba683d29fcac1dcc18b19c0419aa001fbb468d4f5e26acaee9d29330a1a81776b24747bc90c8af88fc18f
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.2.2
data/.travis.yml ADDED
@@ -0,0 +1,3 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.2.2
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+
2
+ source 'https://rubygems.org'
3
+
4
+ # Specify your gem's dependencies in mathtype_to_mathml.gemspec
5
+
6
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 PLOS
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,45 @@
1
+ # MathTypeToMathML
2
+
3
+ This gem can be used to convert MathType equations from a binary format (e.g. embedded in Word documents) to an open MathML representation. It achieves that in several stages, first using the "mathtype" gem to convert from a binary to an XML form of MTEF, and second, using XSLTs to convert XML to MathML.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'mathtype_to_mathml'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install mathtype_to_mathml
20
+
21
+ ## Usage
22
+
23
+ To convert a MathType equation embedded in a Word document (the file is usually named something like `oleObject1.bin`):
24
+
25
+ ```
26
+ mathml = MathTypeToMathML::Converter.new(`oleObject1.bin`).convert
27
+ ```
28
+
29
+ This will return a MathML string of the MathType equation.
30
+
31
+ # Testing
32
+
33
+ Run `bundle exec rspec` to run specs. Additionally, you can create a visual output using `html_output.rb`, like so:
34
+
35
+ ```
36
+ bundle exec ruby spec/html_output.rb > test.html
37
+ ```
38
+
39
+ ## Contributing
40
+
41
+ 1. Fork it ( https://github.com/[my-github-username]/mathtype_to_mathml/fork )
42
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
43
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
44
+ 4. Push to the branch (`git push origin my-new-feature`)
45
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,7 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
7
+
@@ -0,0 +1,28 @@
1
+ require "mathtype_to_mathml/version"
2
+ require "nokogiri"
3
+ require "mathtype"
4
+ require_relative "mathtype_to_mathml/mover"
5
+ require_relative "mathtype_to_mathml/char_replacer"
6
+ require "pry"
7
+
8
+ module MathTypeToMathML
9
+ class Converter
10
+ def initialize(mathtype)
11
+ @xslt = Nokogiri::XSLT(File.read("lib/transform.xsl"))
12
+
13
+ @mathtype = Mathtype::Converter.new(mathtype).xml.doc
14
+
15
+ # Addresses lack of scaning mode in our translator. See "Mover" for more.
16
+ mover = Mover.new(@mathtype)
17
+ mover.move
18
+
19
+ # Character ranges are tricky in XSLT 1.0, so we deal with them in Ruby
20
+ char_replacer = CharReplacer.new(@mathtype)
21
+ char_replacer.replace
22
+ end
23
+
24
+ def convert
25
+ @xslt.transform(@mathtype).to_s
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,311 @@
1
+ require "nokogiri"
2
+
3
+ # XSLT 1.0 has virtually non-existent capabilities for character ranges,
4
+ # codepoints, hex to decimal, etc., so we replace characters within character
5
+ # ranges with Ruby. Single character translations are still done with XSLT.
6
+
7
+ module MathTypeToMathML
8
+ class CharReplacer
9
+ UNSUPPORTED = "Unsupported (Char)"
10
+ REPLACEMENTS = {
11
+ 0x0000..0x0008 => {
12
+ mathmode: UNSUPPORTED,
13
+ textmode: UNSUPPORTED
14
+ },
15
+ 0x000B..0x001F => {
16
+ mathmode: UNSUPPORTED,
17
+ textmode: UNSUPPORTED
18
+ },
19
+ 0x0030..0x0039 => { # (range) DIGITS 0-9
20
+ mathmode: "<mn>(Char)</mn>",
21
+ number: "(Char)",
22
+ textmode: "(Char)"
23
+ },
24
+ 0x003A..0x003B => { # (range) COLON, SEMICOLON
25
+ mathmode: "<mo>(Char)</mo>",
26
+ textmode: "(Char)"
27
+ },
28
+ 0x0041..0x005A => { # range: Basic Latin
29
+ mathmode: "<mi>(Char)</mi>",
30
+ textmode: "(Char)"
31
+ },
32
+ 0x0061..0x007A => { # range: Basic Latin
33
+ mathmode: "<mi>(Char)</mi>",
34
+ textmode: "(Char)"
35
+ },
36
+ 0x0080..0x009F => { # range: C1 Controls
37
+ mathmode: UNSUPPORTED,
38
+ textmode: UNSUPPORTED
39
+ },
40
+ 0x00A0..0x00B0 => { # range: Latin-1 Supplement
41
+ mathmode: "<mo>(CharHex)</mo>"
42
+ },
43
+ 0x00B2..0x00BB => { # range: Latin-1 Supplement
44
+ mathmode: "<mo>(CharHex)</mo>"
45
+ },
46
+ 0x00BC..0x00BE => { # range: Latin-1 Supplement
47
+ mathmode: "<mn>(CharHex)</mn>"
48
+ },
49
+ 0x02C6..0x02FF => { # range: Spacing Modifier Letters
50
+ mathmode: "<mo>(CharHex)</mo>"
51
+ },
52
+ 0x0300..0x036F => { # range: Combining Diacritical Marks
53
+ mathmode: "<mo>(CharHex)</mo>"
54
+ },
55
+ 0x2000..0x200B => { # range: Spaces
56
+ mathmode: "<mtext>(CharHex)</mtext>"
57
+ },
58
+ 0x200C..0x200F => { # range: Formatting Characters
59
+ mathmode: UNSUPPORTED,
60
+ textmode: UNSUPPORTED
61
+ },
62
+ 0x2010..0x2027 => { # range: General Punctuation
63
+ mathmode: "<mo>(CharHex)</mo>"
64
+ },
65
+ 0x2028..0x202F => { # range: Formatting Characters
66
+ mathmode: UNSUPPORTED,
67
+ textmode: UNSUPPORTED
68
+ },
69
+ 0x2030..0x2069 => { # range: General Punctuation
70
+ mathmode: "<mo>(CharHex)</mo>"
71
+ },
72
+ 0x206A..0x206F => {
73
+ mathmode: UNSUPPORTED,
74
+ textmode: UNSUPPORTED
75
+ },
76
+ 0x2070..0x209F => { # range: Superscripts and Subscripts
77
+ mathmode: "<mo>(CharHex)</mo>"
78
+ },
79
+ 0x20A0..0x20CF => { # range: Currency Symbols
80
+ mathmode: "<mi>(CharHex)</mi>"
81
+ },
82
+ 0x20D0..0x20FF => { # range: Combining Diacritical Marks for Symbols
83
+ mathmode: "<mo>(CharHex)</mo>"
84
+ },
85
+ 0x2100..0x2101 => { # range: Letterlike Symbols
86
+ mathmode: "<mo>(CharHex)</mo>"
87
+ },
88
+ 0x2103..0x210A => { # range: CJK Symbols and Punctuation
89
+ mathmode: "<mo>(CharHex)</mo>"
90
+ },
91
+ 0x2116..0x2117 => { # range: Supplemental Mathematical Operators
92
+ mathmode: "<mo>(CharHex)</mo>"
93
+ },
94
+ 0x213C..0x2146 => { # range: Miscellaneous Mathematical Symbols-B
95
+ mathmode: "<mo>(CharHex)</mo>"
96
+ },
97
+ 0x2150..0x218F => { # range: Miscellaneous Mathematical Symbols-B
98
+ mathmode: "<mn>(CharHex)</mn>"
99
+ },
100
+ 0x2190..0x21FF => { # range: Miscellaneous Mathematical Symbols-B
101
+ mathmode: "<mo>(CharHex)</mo>"
102
+ },
103
+ 0x2200..0x2211 => { # range: Supplemental Arrows-B
104
+ mathmode: "<mo>(CharHex)</mo>"
105
+ },
106
+ 0x2213..0x221D => { # range: Supplemental Arrows-A
107
+ mathmode: "<mo>(CharHex)</mo>"
108
+ },
109
+ 0x221F..0x22FF => { # range: Dingbats
110
+ mathmode: "<mo>(CharHex)</mo>"
111
+ },
112
+ 0x2300..0x23FF => { # range: Miscellaneous Symbols
113
+ mathmode: "<mo>(CharHex)</mo>"
114
+ },
115
+ 0x2400..0x243F => { # range: Geometric Shapes
116
+ mathmode: "<mo>(CharHex)</mo>"
117
+ },
118
+ 0x2500..0x257F => { # range: Block Elements
119
+ mathmode: "<mo>(CharHex)</mo>"
120
+ },
121
+ 0x2580..0x259F => { # range: Box Drawing
122
+ mathmode: "<mo>(CharHex)</mo>"
123
+ },
124
+ 0x25A0..0x25FF => { # range: Control Pictures
125
+ mathmode: "<mo>(CharHex)</mo>"
126
+ },
127
+ 0x2600..0x267F => { # range: Miscellaneous Technical
128
+ mathmode: "<mo>(CharHex)</mo>"
129
+ },
130
+ 0x2700..0x27BF => { # range: Mathematical Operators
131
+ mathmode: "<mo>(CharHex)</mo>"
132
+ },
133
+ 0x27F0..0x27FF => { # range: Mathematical Operators
134
+ mathmode: "<mo>(CharHex)</mo>"
135
+ },
136
+ 0x2900..0x297F => { # range: Mathematical Operators
137
+ mathmode: "<mo>(CharHex)</mo>"
138
+ },
139
+ 0x2980..0x29AF => { # range: Arrows
140
+ mathmode: "<mo>(CharHex)</mo>"
141
+ },
142
+ 0x29B1..0x29DB => { # range: Number Forms
143
+ mathmode: "<mo>(CharHex)</mo>"
144
+ },
145
+ 0x29DD..0x29FF => { # range: Letterlike Symbols
146
+ mathmode: "<mo>(CharHex)</mo>"
147
+ },
148
+ 0x2A00..0x2AFF => { # range: Letterlike Symbols
149
+ mathmode: "<mo>(CharHex)</mo>"
150
+ },
151
+ 0x3000..0x303F => { # range: Letterlike Symbols
152
+ mathmode: "<mo>(CharHex)</mo>"
153
+ },
154
+ 0xE000..0xE900 => {
155
+ mathmode: UNSUPPORTED,
156
+ textmode: UNSUPPORTED
157
+ },
158
+ 0xE905..0xE90A => {
159
+ mathmode: UNSUPPORTED,
160
+ textmode: UNSUPPORTED
161
+ },
162
+ 0xE90D..0xE921 => {
163
+ mathmode: UNSUPPORTED,
164
+ textmode: UNSUPPORTED
165
+ },
166
+ 0xE926..0xE92C => {
167
+ mathmode: UNSUPPORTED,
168
+ textmode: UNSUPPORTED
169
+ },
170
+ 0xE92E..0xE931 => {
171
+ mathmode: UNSUPPORTED,
172
+ textmode: UNSUPPORTED
173
+ },
174
+ 0xE934..0xE939 => {
175
+ mathmode: UNSUPPORTED,
176
+ textmode: UNSUPPORTED
177
+ },
178
+ 0xE93C..0xE98E => {
179
+ mathmode: UNSUPPORTED,
180
+ textmode: UNSUPPORTED
181
+ },
182
+ 0xE990..0xEA05 => {
183
+ mathmode: UNSUPPORTED,
184
+ textmode: UNSUPPORTED
185
+ },
186
+ 0xEA08..0xEA0A => {
187
+ mathmode: UNSUPPORTED,
188
+ textmode: UNSUPPORTED
189
+ },
190
+ 0xEA0D..0xEA31 => {
191
+ mathmode: UNSUPPORTED,
192
+ textmode: UNSUPPORTED
193
+ },
194
+ 0xEA36..0xEA39 => {
195
+ mathmode: UNSUPPORTED,
196
+ textmode: UNSUPPORTED
197
+ },
198
+ 0xEA3C..0xEA3F => {
199
+ mathmode: UNSUPPORTED,
200
+ textmode: UNSUPPORTED
201
+ },
202
+ 0xEA46..0xEB00 => {
203
+ mathmode: UNSUPPORTED,
204
+ textmode: UNSUPPORTED
205
+ },
206
+ 0xEB03..0xEB04 => {
207
+ mathmode: UNSUPPORTED,
208
+ textmode: UNSUPPORTED
209
+ },
210
+ 0xEB07..0xED09 => {
211
+ mathmode: UNSUPPORTED,
212
+ textmode: UNSUPPORTED
213
+ },
214
+ 0xED14..0xED15 => {
215
+ mathmode: UNSUPPORTED,
216
+ textmode: UNSUPPORTED
217
+ },
218
+ 0xED17..0xEE03 => {
219
+ mathmode: UNSUPPORTED,
220
+ textmode: UNSUPPORTED
221
+ },
222
+ 0xEE04..0xEE0C => {
223
+ textmode: UNSUPPORTED
224
+ },
225
+ 0xEE0D..0xEE18 => {
226
+ mathmode: UNSUPPORTED,
227
+ textmode: UNSUPPORTED
228
+ },
229
+ 0xEE1A..0xEEFF => {
230
+ mathmode: UNSUPPORTED,
231
+ textmode: UNSUPPORTED
232
+ },
233
+ 0xEF09..0xEFFF => {
234
+ mathmode: UNSUPPORTED,
235
+ textmode: UNSUPPORTED
236
+ },
237
+ 0xF000..0xF033 => {
238
+ textmode: UNSUPPORTED
239
+ },
240
+ 0xF034..0xF07F => {
241
+ mathmode: UNSUPPORTED,
242
+ textmode: UNSUPPORTED
243
+ },
244
+ 0xF080..0xF0B3 => {
245
+ textmode: UNSUPPORTED
246
+ },
247
+ 0xF0B4..0xF0BF => {
248
+ mathmode: UNSUPPORTED,
249
+ textmode: UNSUPPORTED
250
+ },
251
+ 0xF0C0..0xF0C9 => {
252
+ textmode: UNSUPPORTED
253
+ },
254
+ 0xF0CA..0xF0FF => {
255
+ mathmode: UNSUPPORTED,
256
+ textmode: UNSUPPORTED
257
+ },
258
+ 0xF100..0xF133 => {
259
+ textmode: UNSUPPORTED
260
+ },
261
+ 0xF134..0xF8FF => {
262
+ mathmode: UNSUPPORTED,
263
+ textmode: UNSUPPORTED
264
+ },
265
+ 0xFB00..0xFB4F => { # range: Alphabetic Presentation Forms
266
+ mathmode: "<mtext>(CharHex)</mtext>"
267
+ },
268
+ 0xFE35..0xFE4F => { # range: CJK Compatibility Forms
269
+ mathmode: "<mo>(CharHex)</mo>"
270
+ }
271
+ }
272
+
273
+ attr_accessor :mathtype
274
+
275
+ def initialize(mathtype)
276
+ @mathtype = mathtype
277
+
278
+ end
279
+
280
+ def replace
281
+ @mathtype.css("char").each do |char|
282
+ replacement = REPLACEMENTS.find do |range, _|
283
+ range === char.xpath("mt_code_value").text.hex
284
+ end
285
+ replace_character(replacement, char) if replacement
286
+ end
287
+ end
288
+
289
+ def replace_character(replacement, char)
290
+ if char.xpath("variation = 'textmode'")
291
+ xml = replacement_xml(replacement[1][:textmode], char)
292
+ else
293
+ xml = replacement_xml(replacement[1][:mathmode], char)
294
+ end
295
+
296
+ char.replace Nokogiri::HTML::DocumentFragment.parse(xml)
297
+ end
298
+
299
+ def replacement_xml(string, char)
300
+ string.gsub("(Char)") do
301
+ char.xpath("mt_code_value").text.hex.chr # e.g. π
302
+ end.gsub("(CharHex)") do
303
+ "&#x#{char.xpath('mt_code_value').text[2..-1]};" # e.g. &#x2229;
304
+ end
305
+ end
306
+ end
307
+ end
308
+
309
+
310
+
311
+