ritex 0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README +96 -0
- data/ReleaseNotes +11 -0
- data/lib/ritex.rb +200 -0
- data/lib/ritex/lexer.rb +140 -0
- data/lib/ritex/mathml/entities.rb +688 -0
- data/lib/ritex/mathml/functions.rb +88 -0
- data/lib/ritex/mathml/markup.rb +30 -0
- data/lib/ritex/parser.rb +845 -0
- data/lib/ritex/parser.y +140 -0
- data/test/all.rb +7 -0
- data/test/itex2mml-key.yaml +292 -0
- data/test/mathml.rb +265 -0
- data/test/parser.rb +36 -0
- metadata +55 -0
data/README
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
Author:: William Morgan (mailto: wmorgan-ritex@masanjin.net)
|
2
|
+
Copyright:: Copyright 2005 William Morgan
|
3
|
+
License:: GNU GPL version 2
|
4
|
+
|
5
|
+
= Introduction
|
6
|
+
|
7
|
+
Ritex converts expressions from WebTeX into MathML. WebTeX is an
|
8
|
+
adaptation of TeX math syntax for web display.
|
9
|
+
|
10
|
+
Ritex makes inserting math into HTML pages easy. It supports most TeX
|
11
|
+
math syntax as well as macros.
|
12
|
+
|
13
|
+
For example, Ritex turns
|
14
|
+
\alpha^\beta
|
15
|
+
into
|
16
|
+
<math xmlns="http://www.w3.org/1998/Math/MathML">
|
17
|
+
<msup>
|
18
|
+
<mi>α</mi>
|
19
|
+
<mi>β</mi>
|
20
|
+
</msup>
|
21
|
+
</math>
|
22
|
+
|
23
|
+
Ritex is based heavily on itex2mml
|
24
|
+
(http://pear.math.pitt.edu/mathzilla/itex2mmlItex.html), a popular TeX
|
25
|
+
math to MathML convertor--so much so that the default correct answer
|
26
|
+
to unit tests is to do whatever itex2mml does!
|
27
|
+
|
28
|
+
Ritex features several advantages over itex2mml:
|
29
|
+
|
30
|
+
* It's written in Ruby (hey, I consider that an advantage).
|
31
|
+
* It supports macros.
|
32
|
+
* It handles unary minus better.
|
33
|
+
* It's easier to extend.
|
34
|
+
|
35
|
+
= Synopsis
|
36
|
+
|
37
|
+
require 'ritex'
|
38
|
+
p = Ritex::Parser.new
|
39
|
+
ARGF.each { |l| puts p.parse(l) }
|
40
|
+
|
41
|
+
## or ...
|
42
|
+
|
43
|
+
ARGF.each do |l|
|
44
|
+
begin
|
45
|
+
puts p.parse(l)
|
46
|
+
rescue Racc::ParseError
|
47
|
+
$stderr.puts "invalid input"
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
= Using Ritex
|
52
|
+
|
53
|
+
Calling Ritex from Ruby is very simple. If the synopsis above isn't
|
54
|
+
enough, see the documentation for Ritex::Parser for the gory details.
|
55
|
+
|
56
|
+
= Creating MathML with Ritex
|
57
|
+
|
58
|
+
Ritex parses WebTeX. WebTeX is an adapation of the TeX math syntax
|
59
|
+
which is designed for web page display. The WebTeX documentation can
|
60
|
+
be found at
|
61
|
+
http://stuff.mit.edu/afs/athena/software/webeq/currenthome/docs/webtex/toc.html.
|
62
|
+
|
63
|
+
If you're familiar with TeX math syntax, you'll feel right at
|
64
|
+
home. But there are several important differences between it and
|
65
|
+
WebTeX. Most notably:
|
66
|
+
|
67
|
+
* arrays: different \array syntax; no \eqnarray or \align
|
68
|
+
* macro definitions: \define; no \newcommand or \def
|
69
|
+
* \left and \right no longer need "invisible" delimiters
|
70
|
+
|
71
|
+
These differences are explained in the WebTeX documentation.
|
72
|
+
|
73
|
+
Ritex is based heavily on Itex2mml. Itex2mml accepts what it calls
|
74
|
+
"Itex", an extension of WebTeX which adds a few aliases (like
|
75
|
+
\infinity for \infty) and markups (like \underoverset). Ritex supports
|
76
|
+
these extensions. Regardless, I've chosen to say that Ritex parses
|
77
|
+
WebTeX rather than Itex, mainly because the former includes macros and
|
78
|
+
is better documented.
|
79
|
+
|
80
|
+
Itex is described at
|
81
|
+
http://pear.math.pitt.edu/mathzilla/itex2mmlItex.html.
|
82
|
+
|
83
|
+
See the ReleaseNotes for features in WebTeX that are currently
|
84
|
+
unimplemented in Ritex.
|
85
|
+
|
86
|
+
= Differences between Ritex and itex2mml
|
87
|
+
|
88
|
+
If you're familiar with itex2mml, there are a few subtle differences
|
89
|
+
between the two:
|
90
|
+
* A sequence of letters like "abc" is treated as three separate
|
91
|
+
variables and not as one variable. I believe that's the TeX Way (tm).
|
92
|
+
* \ (backslash space) is a medium space, not an undefined character.
|
93
|
+
* Sequences like "x--3" will correctly mark the second operator as a
|
94
|
+
unary minus.
|
95
|
+
* And of course, macros.
|
96
|
+
|
data/ReleaseNotes
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
version 0.1, 9/15/2005
|
2
|
+
----------------------
|
3
|
+
|
4
|
+
First version. Highly experimental!
|
5
|
+
|
6
|
+
Unimplemented features:
|
7
|
+
* \floatleft, \floatright
|
8
|
+
* array options
|
9
|
+
* \tensor and \multiscripts
|
10
|
+
* macros with 4 or more arguments
|
11
|
+
* \bghighlight, \fghighlight, \statusline
|
data/lib/ritex.rb
ADDED
@@ -0,0 +1,200 @@
|
|
1
|
+
## lib/ritex.rb -- contains Ritex::Parser
|
2
|
+
## Author:: William Morgan (mailto: wmorgan-ritex@masanjin.net)
|
3
|
+
## Copyright:: Copyright 2005 William Morgan
|
4
|
+
## License:: GNU GPL version 2
|
5
|
+
##
|
6
|
+
## :title:Ritex: a Ruby itex to mathml converter
|
7
|
+
## :main:README
|
8
|
+
|
9
|
+
require "ritex/parser"
|
10
|
+
require "ritex/lexer"
|
11
|
+
require "ritex/mathml/entities"
|
12
|
+
require "ritex/mathml/functions"
|
13
|
+
require "ritex/mathml/markup"
|
14
|
+
require 'racc/parser' # just for Racc::ParserError
|
15
|
+
|
16
|
+
## Container module for all Ritex stuff. The entry point is
|
17
|
+
## Ritex::Parser.
|
18
|
+
module Ritex
|
19
|
+
|
20
|
+
## See #merror=
|
21
|
+
class Error < Racc::ParseError; end
|
22
|
+
|
23
|
+
## This is not ideal by any means. Until we can call a Proc with an
|
24
|
+
## arbitrary binding (Ruby 1.9?), we will relay all #markup and
|
25
|
+
## #lookup calls within the module to a registered parser, so that the
|
26
|
+
## "functions" in lib/functions.rb can be written more easily. Any
|
27
|
+
## better ideas?
|
28
|
+
##
|
29
|
+
## In the mean time, I'd recommend not having more than one parser at
|
30
|
+
## a time going.
|
31
|
+
attr_accessor :global_parser
|
32
|
+
module_function :global_parser, :global_parser=
|
33
|
+
|
34
|
+
## The parser for itex and the main entry point for Ritex. This class
|
35
|
+
## is partially defined here and partially generated by Racc from
|
36
|
+
## lib/parser.y.
|
37
|
+
##
|
38
|
+
## Create the parser with #new. Parse strings with #parse. That's all
|
39
|
+
## there is to it.
|
40
|
+
class Parser
|
41
|
+
FORMATS = [:mathml]
|
42
|
+
|
43
|
+
## If true, Ritex will output a <merror>...</merror> message in the
|
44
|
+
## MathML if an unknown entity is encountered. If false (the default),
|
45
|
+
## Ritex will throw a Ritex::Error.
|
46
|
+
attr_accessor :merror
|
47
|
+
|
48
|
+
## _format_ is the desired output format and must be in the FORMATS
|
49
|
+
## list. Right now that's just :mathml.
|
50
|
+
def initialize format = :mathml
|
51
|
+
self.format = format
|
52
|
+
@macros = {}
|
53
|
+
Ritex.global_parser = self # lame
|
54
|
+
@merror = false
|
55
|
+
end
|
56
|
+
|
57
|
+
## Parse a string. Returns the MathML output in string form. Note
|
58
|
+
## that macro definitios are cumulative and persistent across calls
|
59
|
+
## to #parse. If you don't want this behavior, you must explicitly
|
60
|
+
## call #flush_macros after every #parse call.
|
61
|
+
##
|
62
|
+
## _wrap_ denotes whether you want the output wrapped in the
|
63
|
+
## top-level XML math tag. Unless you're generating these tags
|
64
|
+
## yourself, you want this.
|
65
|
+
##
|
66
|
+
## _inline_ denotes whether you want inline markup versus block or
|
67
|
+
## "display" markup. For mathml output this only has an effect if
|
68
|
+
## _wrap_ is true.
|
69
|
+
def parse s, wrap = true, inline = true
|
70
|
+
@lex = Lexer.new(self, s)
|
71
|
+
r = yyparse @lex, :lex
|
72
|
+
r = markup r, (inline ? :math : :displaymath) if wrap unless r.empty?
|
73
|
+
r
|
74
|
+
end
|
75
|
+
|
76
|
+
attr_reader :format
|
77
|
+
def format= format
|
78
|
+
raise ArgumentError, "format must be one of #{FORMATS * ', '}" unless FORMATS.include? format
|
79
|
+
@format = format
|
80
|
+
end
|
81
|
+
|
82
|
+
## Delete all macros
|
83
|
+
def flush_macros; @macros = {}; end
|
84
|
+
|
85
|
+
def markup what, tag, opts=nil #:nodoc:
|
86
|
+
case @format
|
87
|
+
when :mathml
|
88
|
+
# puts "x marking up #{type}, member? #{MathML::MARKUP.member? type}"
|
89
|
+
tag, opts =
|
90
|
+
case tag
|
91
|
+
when String
|
92
|
+
[tag, opts]
|
93
|
+
when Symbol
|
94
|
+
MathML::MARKUP[tag]
|
95
|
+
end
|
96
|
+
if opts
|
97
|
+
"<#{tag} #{opts}>#{what}</#{tag}>"
|
98
|
+
else
|
99
|
+
"<#{tag}>#{what}</#{tag}>"
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def lookup sym #:nodoc:
|
105
|
+
case @format
|
106
|
+
when :mathml
|
107
|
+
return error("unknown entity #{sym.inspect}") unless MathML::ENTITIES.member? sym
|
108
|
+
MathML::ENTITIES[sym]
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def funcs #:nodoc:
|
113
|
+
case @format
|
114
|
+
when :mathml
|
115
|
+
MathML::FUNCTIONS
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
def envs #:nodoc:
|
120
|
+
case @format
|
121
|
+
when :mathml
|
122
|
+
MathML::ENVS
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
def macros #:nodoc:
|
127
|
+
@macros
|
128
|
+
end
|
129
|
+
|
130
|
+
def op_symbols #:nodoc:
|
131
|
+
case @format
|
132
|
+
when :mathml
|
133
|
+
MathML::OPERATORS.merge(MathML::UNARY_OPERATORS).merge(MathML::MATH_FUNCTIONS)
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
private
|
138
|
+
def error e
|
139
|
+
if @merror
|
140
|
+
"<merror>e</merror>"
|
141
|
+
else
|
142
|
+
raise Error, e
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
def safe s
|
147
|
+
case @format
|
148
|
+
when :mathml
|
149
|
+
s.gsub("&", "&").gsub(">", ">").gsub("<", "<")
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
def join *a
|
154
|
+
case @format
|
155
|
+
when :mathml
|
156
|
+
a.join ""
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
def special name, *a
|
161
|
+
if @macros.member? name
|
162
|
+
# puts "evaluating macro (arity #{@macros[name].arity}): type #{name.inspect}, #{a.length} args #{a.inspect}"
|
163
|
+
res = @macros[name][*a]
|
164
|
+
# puts "got #{res}"
|
165
|
+
@lex.push res
|
166
|
+
""
|
167
|
+
elsif funcs.member? name
|
168
|
+
# puts "*** running func #{name}"
|
169
|
+
funcs[name][*a]
|
170
|
+
elsif envs.member? name
|
171
|
+
envs[name][*a]
|
172
|
+
else
|
173
|
+
error "unknown function, macro or environment #{name}"
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def define sym, arity, exp
|
178
|
+
arity = arity.to_i
|
179
|
+
raise Error, "macro arity must be <= 3" unless arity <= 3
|
180
|
+
raise Error, "macro arity must be >= 0" unless arity >= 0
|
181
|
+
|
182
|
+
# puts "defining macro #{sym} with exp #{exp} (arity #{arity})"
|
183
|
+
warn "overriding definition for #{sym}" if @macros.member? sym
|
184
|
+
@macros[sym] = lambda do |*a|
|
185
|
+
raise Error, "expecting #{arity} arguments, got #{a.length}" unless a.length == arity
|
186
|
+
# puts "evaluating macro #{sym}, args #{a.inspect}"
|
187
|
+
x = (0 ... arity).inject(exp) { |s, i| s.gsub(/\##{i + 1}/, a[i]) }
|
188
|
+
# puts "macro evals to: #{x.inspect}"
|
189
|
+
x
|
190
|
+
end
|
191
|
+
@macros[sym].instance_eval "def arity; #{arity}; end" # hack!
|
192
|
+
""
|
193
|
+
end
|
194
|
+
|
195
|
+
def warn s
|
196
|
+
$stderr.puts "warning: #{s}"
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
end
|
data/lib/ritex/lexer.rb
ADDED
@@ -0,0 +1,140 @@
|
|
1
|
+
## lib/ritex/lexer.rb -- contains Ritex::Lexer
|
2
|
+
## Author:: William Morgan (mailto: wmorgan-ritex@masanjin.net)
|
3
|
+
## Copyright:: Copyright 2005 William Morgan
|
4
|
+
## License:: GNU GPL version 2
|
5
|
+
|
6
|
+
require 'racc/parser' # just for Racc::ParseError
|
7
|
+
|
8
|
+
module Ritex
|
9
|
+
|
10
|
+
## thrown upon lexing errors
|
11
|
+
class LexError < Racc::ParseError; end
|
12
|
+
|
13
|
+
## The lexer splits input stream into tokens. These are handed to the
|
14
|
+
## parser. Ritex::Parser takes care of setting up and configuring the
|
15
|
+
## lexer.
|
16
|
+
##
|
17
|
+
## In order to support macros, the lexer maintains a stack of
|
18
|
+
## strings. Pushing a string onto the stack will cause #lex to yield
|
19
|
+
## tokens from that string, until it reaches the end, at which point
|
20
|
+
## it will discard the string and resume yielding tokens from the
|
21
|
+
## previous string.
|
22
|
+
##
|
23
|
+
## The lexer has two states. Normally it ignores all spacing. After
|
24
|
+
## hitting an ENV token it will start returning SPACE tokens for each
|
25
|
+
## space until it hits a '}'.
|
26
|
+
##
|
27
|
+
## The lexer also handles unary minus. It decides whether a '-' is
|
28
|
+
## unary or binary by considering the previous token.
|
29
|
+
class Lexer
|
30
|
+
TOKENS = '+-\/\*|\.,;:<>=()#&\[\]^_!?~%\'{} ' # passed as themselves
|
31
|
+
|
32
|
+
## _s_ is an initial string to push on the stack, or nil.
|
33
|
+
def initialize parser, s = nil
|
34
|
+
@parser = parser
|
35
|
+
@s = []
|
36
|
+
push s unless s.nil?
|
37
|
+
end
|
38
|
+
|
39
|
+
## push an additional string on to the stack.
|
40
|
+
def push s; @s.unshift [s, 0]; end
|
41
|
+
|
42
|
+
## Yield token and value pairs from the string stack.
|
43
|
+
def lex #:yields: token, value
|
44
|
+
@lastop = nil
|
45
|
+
lex_inner do |sym, val|
|
46
|
+
@lastop = val
|
47
|
+
yield sym, val
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
## For debugging purposes.
|
52
|
+
def dlex #:nodoc:
|
53
|
+
lex do |sym, val|
|
54
|
+
puts "** got #{sym.inspect}: [#{val}]"
|
55
|
+
yield sym, val
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
|
61
|
+
def lex_inner
|
62
|
+
state = :normal
|
63
|
+
|
64
|
+
until @s.empty?
|
65
|
+
# puts "- @s length #{@s.length}: #{@s.inspect}"
|
66
|
+
s, i = @s.first
|
67
|
+
if i >= s.length
|
68
|
+
@s.shift
|
69
|
+
next
|
70
|
+
end
|
71
|
+
|
72
|
+
# puts "> now have #{s[i .. s.length]}"
|
73
|
+
case s[i .. s.length]
|
74
|
+
when /\A(\s+)/
|
75
|
+
@s.first[1] += $1.length
|
76
|
+
yield [:SPACE, $1] if state == :env
|
77
|
+
when /\A(\\array)/
|
78
|
+
@s.first[1] += $1.length
|
79
|
+
yield [:ARRAY, $1]
|
80
|
+
when /\A(\\define)/
|
81
|
+
@s.first[1] += $1.length
|
82
|
+
yield [:DEFINE, $1]
|
83
|
+
when /\A(\\left)/
|
84
|
+
@s.first[1] += $1.length
|
85
|
+
yield [:LEFT, $1]
|
86
|
+
when /\A(\\right)/
|
87
|
+
@s.first[1] += $1.length
|
88
|
+
yield [:RIGHT, $1]
|
89
|
+
when /\A-/
|
90
|
+
@s.first[1] += 1
|
91
|
+
if [[nil, '{', '(', '[', '+', '-', '/', '*', '=', '<', '>', '&'],
|
92
|
+
@parser.op_symbols].any? { |x| x.member? @lastop }
|
93
|
+
yield [:UNARYMINUS, '-']
|
94
|
+
else
|
95
|
+
yield ['-', '-']
|
96
|
+
end
|
97
|
+
when /\A([#{TOKENS}])/
|
98
|
+
@s.first[1] += 1
|
99
|
+
state = :normal if (state == :env) && ($1 == '}')
|
100
|
+
yield [$1, $1]
|
101
|
+
when /\A(\\\\)/
|
102
|
+
@s.first[1] += $1.length
|
103
|
+
yield [:DOUBLEBACK, $1]
|
104
|
+
when /\A\\([#{TOKENS}\\\\])/
|
105
|
+
@s.first[1] += $1.length + 1
|
106
|
+
yield [:SYMBOL, $1]
|
107
|
+
when /\A\\([a-zA-Z][a-zA-Z*\d]+)/
|
108
|
+
name = $1
|
109
|
+
type = :SYMBOL
|
110
|
+
# puts "** checking #{name} against specials list #{specs.keys * ' '}, got #{specs[name].inspect}"
|
111
|
+
if @parser.funcs.member? name
|
112
|
+
proc = @parser.funcs[name]
|
113
|
+
type = [:FUNC0, :FUNC1, :FUNC2, :FUNC3][proc.arity]
|
114
|
+
raise LexError, "functions of arity '#{proc.arity}' unsupported" if type.nil?
|
115
|
+
elsif @parser.envs[name]
|
116
|
+
type = :ENV
|
117
|
+
state = :env
|
118
|
+
elsif @parser.macros.member? name
|
119
|
+
proc = @parser.macros[name]
|
120
|
+
type = [:MACRO0, :MACRO1, :MACRO2, :MACRO3][proc.arity]
|
121
|
+
raise LexError, "macro of arity '#{proc.arity}' unsupported" if type.nil?
|
122
|
+
end
|
123
|
+
@s.first[1] += $1.length + 1
|
124
|
+
yield [type, name]
|
125
|
+
when /\A(-?(\d+|\d*\.\d+))/
|
126
|
+
@s.first[1] += $1.length
|
127
|
+
yield [:NUMBER, $1]
|
128
|
+
when /\A(\w)/
|
129
|
+
@s.first[1] += $1.length
|
130
|
+
yield [:VAR, $1]
|
131
|
+
else
|
132
|
+
raise LexError, "unlexable at position #{i}: #{s[i .. [s.length, i + 20].min]}"
|
133
|
+
end
|
134
|
+
end
|
135
|
+
yield [false, false]
|
136
|
+
end
|
137
|
+
|
138
|
+
end
|
139
|
+
|
140
|
+
end
|