ritex 0.3 → 1.0

Sign up to get free protection for your applications and to get access to all the features.
data/README CHANGED
@@ -1,5 +1,5 @@
1
1
  Author:: William Morgan (mailto: wmorgan-ritex@masanjin.net)
2
- Copyright:: Copyright 2005--2009 William Morgan
2
+ Copyright:: Copyright 2005--2010 William Morgan
3
3
  License:: GNU GPL version 2
4
4
 
5
5
  = Introduction
@@ -1,3 +1,12 @@
1
+ version 1.0, 04/13/2010
2
+ =======================
3
+ - API change: Parser#parse now takes an options hash instead of
4
+ the two boolean arguments.
5
+ - Add a :raw format which outputs LaTeX math. You can use this to
6
+ take advantage of Ritex's macro resolution.
7
+ - Remove the requirement that only one Parser exist. This has the
8
+ advantage of also making Ritex thread-save.
9
+
1
10
  version 0.3, 06/17/2009
2
11
  =======================
3
12
  Various Ruby 1.9 updates. No API changes.
@@ -1,6 +1,6 @@
1
1
  ## lib/ritex.rb -- contains Ritex::Parser
2
2
  ## Author:: William Morgan (mailto: wmorgan-ritex@masanjin.net)
3
- ## Copyright:: Copyright 2005-2009 William Morgan
3
+ ## Copyright:: Copyright 2005-2010 William Morgan
4
4
  ## License:: GNU GPL version 2
5
5
  ##
6
6
  ## :title:Ritex: a Ruby WebTeX to MathML converter
@@ -17,17 +17,6 @@ require 'racc/parser' # just for Racc::ParserError
17
17
  ## Ritex::Parser.
18
18
  module Ritex
19
19
 
20
- ## This is not ideal by any means. Until we can call a Proc with an
21
- ## arbitrary binding (Ruby 1.9?), we will relay all #markup and
22
- ## #lookup calls within the module to a registered parser, so that the
23
- ## "functions" in lib/functions.rb can be written more easily. Any
24
- ## better ideas?
25
- ##
26
- ## In the mean time, I'd recommend not having more than one parser at
27
- ## a time going.
28
- attr_accessor :global_parser
29
- module_function :global_parser, :global_parser=
30
-
31
20
  ## Thrown by Parser upon errors. See Parser#merror=.
32
21
  class Error < StandardError; end
33
22
 
@@ -38,7 +27,7 @@ class Error < StandardError; end
38
27
  ## Create the parser with #new. Parse strings with #parse. That's all
39
28
  ## there is to it.
40
29
  class Parser
41
- FORMATS = [:mathml]
30
+ FORMATS = [:mathml, :raw]
42
31
 
43
32
  ## If true, Ritex will output a <merror>...</merror> message in the
44
33
  ## MathML if an unknown entity is encountered. If false (the default),
@@ -50,26 +39,28 @@ class Parser
50
39
  def initialize format = :mathml
51
40
  self.format = format
52
41
  @macros = {}
53
- Ritex.global_parser = self # lame
54
42
  @merror = false
55
43
  end
56
44
 
57
- ## Parse a string. Returns the MathML output in string form. Note
58
- ## that macro definitios are cumulative and persistent across calls
59
- ## to #parse. If you don't want this behavior, you must explicitly
60
- ## call #flush_macros after every #parse call.
45
+ ## Parse a string. Returns the MathML output in string form. Note that macro
46
+ ## definitions are cumulative and persistent across calls to #parse. If you
47
+ ## don't want this behavior, you must explicitly call #flush_macros after
48
+ ## every #parse call.
49
+ ##
50
+ ## _opts_ is a hash of options:
61
51
  ##
62
- ## _wrap_ denotes whether you want the output wrapped in the
63
- ## top-level XML math tag. Unless you're generating these tags
64
- ## yourself, you want this.
52
+ ## _nowrap_, if true, will omit wrapping the output in a top-level XML math
53
+ ## tag. Only useful if you're generating these tags yourself.
65
54
  ##
66
- ## _inline_ denotes whether you want inline markup versus block or
67
- ## "display" markup. For mathml output this only has an effect if
68
- ## _wrap_ is true.
69
- def parse s, wrap = true, inline = true
70
- @lex = Lexer.new(self, s)
55
+ ## _display_, if true, emits display markup, as opposed to inline markup.
56
+ ## For mathml output this only has an effect if _nowrap_ is true.
57
+ def parse s, opts={}
58
+ nowrap = opts[:nowrap]
59
+ display = opts[:display]
60
+ @lex = Lexer.new self, s
71
61
  r = yyparse @lex, :lex
72
- r = markup r, (inline ? :math : :displaymath) if wrap
62
+ r = markup r, (display ? :displaymath : :math) unless nowrap
63
+ r = raw_blob_to_string(r) if @format == :raw
73
64
  r
74
65
  end
75
66
 
@@ -84,19 +75,36 @@ class Parser
84
75
 
85
76
  def markup what, tag, opts=[] #:nodoc:
86
77
  case @format
87
- when :mathml
88
- tag, opts = case tag
89
- when String
90
- [tag, opts]
91
- when Symbol
92
- a, b = MathML::MARKUP[tag]
93
- [a, [b, opts].flatten.compact.join(" ")]
94
- end
95
- unless opts.empty?
96
- "<#{tag} #{opts}>#{what}</#{tag}>"
97
- else
98
- "<#{tag}>#{what}</#{tag}>"
78
+ when :mathml; handle_mathml_markup what, tag, opts
79
+ when :raw; handle_raw_markup what, tag, opts
80
+ end
81
+ end
82
+
83
+ def handle_mathml_markup what, tag, opts
84
+ tag, opts = case tag
85
+ when String
86
+ [tag, opts]
87
+ when Symbol
88
+ a, b = MathML::MARKUP[tag]
89
+ [a, [b, opts].flatten.compact.join(" ")]
99
90
  end
91
+ unless opts.empty?
92
+ "<#{tag} #{opts}>#{what}</#{tag}>"
93
+ else
94
+ "<#{tag}>#{what}</#{tag}>"
95
+ end
96
+ end
97
+
98
+ ## this is a great example of how much a horrible hack raw mode is
99
+ def handle_raw_markup what, tag, opts #:nodoc:
100
+ case tag
101
+ when :var; what
102
+ when :subsup; "#{what[0]}_#{what[1]}^#{what[2]}"
103
+ when :sub; "#{what[0]}_#{what[1]}"
104
+ when :sup; "#{what[0]}^#{what[1]}"
105
+ when :unaryminus; "-#{what[0]}"
106
+ when :group; "{#{raw_blob_to_string what}}"
107
+ else; what
100
108
  end
101
109
  end
102
110
 
@@ -105,34 +113,34 @@ class Parser
105
113
  when :mathml
106
114
  return error("unknown entity #{sym.inspect}") unless MathML::ENTITIES.member? sym
107
115
  MathML::ENTITIES[sym]
116
+ when :raw
117
+ "\\" + sym
108
118
  end
109
119
  end
110
120
 
111
121
  def token o #:nodoc:
112
122
  case @format
113
- when :mathml
114
- MathML::TOKENS[o] || o
123
+ when :mathml; MathML::TOKENS[o] || o
124
+ when :raw; o
115
125
  end
116
126
  end
117
127
 
118
128
  def op o, opts=[]
119
129
  case @format
120
- when :mathml
121
- markup(token(o), "mo", opts)
130
+ when :mathml; markup(token(o), "mo", opts)
131
+ when :raw; o
122
132
  end
123
133
  end
124
134
 
125
135
  def funcs #:nodoc:
126
136
  case @format
127
- when :mathml
128
- MathML::FUNCTIONS
137
+ when :mathml, :raw; MathML::FUNCTIONS
129
138
  end
130
139
  end
131
140
 
132
141
  def envs #:nodoc:
133
142
  case @format
134
- when :mathml
135
- MathML::ENVS
143
+ when :mathml, :raw; MathML::ENVS
136
144
  end
137
145
  end
138
146
 
@@ -142,12 +150,12 @@ class Parser
142
150
 
143
151
  def op_symbols #:nodoc:
144
152
  case @format
145
- when :mathml
146
- MathML::OPERATORS.merge(MathML::UNARY_OPERATORS).merge(MathML::MATH_FUNCTIONS)
153
+ when :mathml, :raw; MathML::OPERATORS.merge(MathML::UNARY_OPERATORS).merge(MathML::MATH_FUNCTIONS)
147
154
  end
148
155
  end
149
156
 
150
157
  private
158
+
151
159
  def error e
152
160
  if @merror
153
161
  "<merror>e</merror>"
@@ -158,35 +166,80 @@ private
158
166
 
159
167
  def safe s
160
168
  case @format
161
- when :mathml
162
- s.gsub("&", "&amp;").gsub(">", "&gt;").gsub("<", "&lt;")
169
+ when :mathml; s.gsub("&", "&amp;").gsub(">", "&gt;").gsub("<", "&lt;")
170
+ when :raw; s
163
171
  end
164
172
  end
165
173
 
166
174
  def join *a
167
175
  case @format
168
- when :mathml
169
- a.join ""
176
+ when :mathml; a.join
177
+ when :raw # horrible hack for raw "blobs"
178
+ if a.size == 1
179
+ a[0]
180
+ elsif a.size == 2 && a.first == ""
181
+ a[1]
182
+ else
183
+ a.flatten
184
+ end
170
185
  end
171
186
  end
172
187
 
173
188
  def special name, *a
174
189
  if @macros.member? name
175
- # puts "evaluating macro (arity #{@macros[name].arity}): type #{name.inspect}, #{a.length} args #{a.inspect}"
190
+ #puts "evaluating macro (arity #{@macros[name].arity}): type #{name.inspect}, #{a.length} args #{a.inspect}"
176
191
  res = @macros[name][*a]
177
- # puts "got #{res}"
192
+ res = raw_blob_to_string res if @format == :raw
193
+ #puts "got #{res}"
178
194
  @lex.push res
179
195
  ""
180
196
  elsif funcs.member? name
181
197
  # puts "*** running func #{name}"
182
- funcs[name][*a]
198
+ if @format == :raw
199
+ "\\#{name}" + a.map { |x| raw_funarg(x) }.join
200
+ else
201
+ interpret funcs[name][*a]
202
+ end
183
203
  elsif envs.member? name
184
- envs[name][*a]
204
+ if @format == :raw
205
+ "\\#{name}" + a.map { |x| raw_funarg(x) }.join
206
+ else
207
+ interpret envs[name][*a]
208
+ end
185
209
  else
186
210
  error "unknown function, macro or environment #{name.inspect}"
187
211
  end
188
212
  end
189
213
 
214
+ def raw_funarg f
215
+ f = raw_blob_to_string f
216
+ f[0, 1] == '{' ? f : "{#{f}}"
217
+ end
218
+
219
+ def raw_blob_to_string x #:nodoc:
220
+ case x
221
+ when String; x
222
+ when Array; x.join
223
+ else; x # ?
224
+ end
225
+ end
226
+
227
+ ## functions and environments return either a [method, [args]] array, or a
228
+ ## string. if the former, do the call; if the latter, just use the string
229
+ ## directly.
230
+ ##
231
+ ## possible one level too many of indirection going on here, but it makes
232
+ ## writing the functions really simple.
233
+ def interpret x # :nodoc:
234
+ case x
235
+ when Array
236
+ m, args = x
237
+ send m, *args
238
+ else
239
+ x
240
+ end
241
+ end
242
+
190
243
  def define sym, arity, exp
191
244
  arity = arity.to_i
192
245
  raise Error, "macro arity must be <= 3" unless arity <= 3
@@ -196,10 +249,11 @@ private
196
249
  warn "overriding definition for #{sym}" if @macros.member? sym
197
250
  @macros[sym] = lambda do |*a|
198
251
  raise Error, "expecting #{arity} arguments, got #{a.length}" unless a.length == arity
199
- # puts "evaluating macro #{sym}, args #{a.inspect}"
200
- x = (0 ... arity).inject(exp) { |s, i| s.gsub(/\##{i + 1}/, a[i]) }
201
- # puts "macro evals to: #{x.inspect}"
202
- x
252
+ if @format == :raw
253
+ a = a.map { |x| raw_blob_to_string x }
254
+ exp = raw_blob_to_string(exp)
255
+ end
256
+ (0 ... arity).inject(exp) { |s, i| s.gsub(/\##{i + 1}/, a[i]) }
203
257
  end
204
258
  @macros[sym].instance_eval "def arity; #{arity}; end" # hack!
205
259
  ""
@@ -1,6 +1,6 @@
1
1
  ## lib/ritex/lexer.rb -- contains Ritex::Lexer
2
2
  ## Author:: William Morgan (mailto: wmorgan-ritex@masanjin.net)
3
- ## Copyright:: Copyright 2005--2009 William Morgan
3
+ ## Copyright:: Copyright 2005--2010 William Morgan
4
4
  ## License:: GNU GPL version 2
5
5
 
6
6
  require 'racc/parser' # just for Racc::ParseError
@@ -77,7 +77,7 @@ private
77
77
  name = $1
78
78
  @s.first[1] += name.length + 1
79
79
  yield [token, name]
80
- state = :env if @parser.envs[name]
80
+ state = :env if @parser.envs.member?(name)
81
81
  true
82
82
  end
83
83
  end
@@ -106,7 +106,7 @@ private
106
106
  proc = @parser.funcs[name]
107
107
  type = [:FUNC0, :FUNC1, :FUNC2, :FUNC3][proc.arity]
108
108
  raise LexError, "functions of arity '#{proc.arity}' unsupported" if type.nil?
109
- elsif @parser.envs[name]
109
+ elsif @parser.envs.member? name
110
110
  type = :ENV
111
111
  state = :env
112
112
  elsif @parser.macros.member? name
@@ -17,11 +17,8 @@ end
17
17
  module Ritex
18
18
  module MathML
19
19
 
20
- ## this is not ideal by any means. until we can call lambda with
21
- ## arbitrary bindings, though, we will relay markup and lookup calls
22
- ## to a registered parser, so that "functions" can be written easily.
23
- def markup *a; Ritex::global_parser.markup(*a); end
24
- def lookup *a; Ritex::global_parser.lookup(*a); end
20
+ def markup *a; [:markup, a] end
21
+ def lookup *a; [:lookup, a] end
25
22
  module_function :markup, :lookup
26
23
 
27
24
  FUNCTIONS = {
@@ -2,7 +2,7 @@
2
2
  ## See Ritex::MathML.
3
3
  ##
4
4
  ## Author:: William Morgan (mailto: wmorgan-ritex@masanjin.net)
5
- ## Copyright:: Copyright 2005--2009 William Morgan
5
+ ## Copyright:: Copyright 2005--2010 William Morgan
6
6
  ## License:: GNU GPL version 2
7
7
 
8
8
  module Ritex
@@ -1,6 +1,6 @@
1
1
  ## test/mathml.rb -- contains Ritex::Test::MathML
2
2
  ## Author:: William Morgan (mailto: wmorgan-ritex@masanjin.net)
3
- ## Copyright:: Copyright 2005--2009 William Morgan
3
+ ## Copyright:: Copyright 2005--2010 William Morgan
4
4
  ## License:: GNU GPL version 2
5
5
 
6
6
  require 'test/unit'
@@ -51,17 +51,18 @@ class MathML < ::Test::Unit::TestCase
51
51
  ## by the binary answer-key generator.
52
52
  def cmp s, same_as=nil
53
53
  gold = gold_standard(same_as || s)
54
- test = @p.parse s, false
54
+ test = @p.parse s, :nowrap => true
55
55
 
56
- if gold.gsub('"', "'") != test.gsub('"', "'")
57
- puts "BADBADBADBAD"
58
- p gold
59
- p test
60
- exit
61
- end
62
56
  assert_equal gold.gsub('"', "'"), test.gsub('"', "'"), "Difference in MathML output of #{s.inspect}"
63
57
  end
64
58
 
59
+ ## compare in raw mode
60
+ def cmp_raw s, gold
61
+ @raw_parser ||= Ritex::Parser.new :raw
62
+ x = @raw_parser.parse s
63
+ assert_equal gold, x
64
+ end
65
+
65
66
  ## fix up itex2MML's common known errors
66
67
  def fix_itex2mml_output l
67
68
  l.gsub(/<mi>&(lt|gt);<\/mi>/, "<mo>&\\1;</mo>").
@@ -226,6 +227,19 @@ class MathML < ::Test::Unit::TestCase
226
227
  cmp '\float{\goat}{\boat{\goat}}', '{\frac{\alpha-{\mathbb{GOAT}}}{\beta-{{\mathbb{GOAT}}-x}}}'
227
228
  end
228
229
 
230
+ def test_macros_in_raw_mode
231
+ cmp_raw '\define{\goat}{\mathbb{GOAT}}', ''
232
+ cmp_raw '\goat_3', '{\mathbb{GOAT}}_3'
233
+ cmp_raw '\define{\boat}[1]{#1-x}', ''
234
+ cmp_raw '\boat{y}', '{y-x}'
235
+ cmp_raw '\boat{\boat{N}}', '{{N-x}-x}'
236
+ cmp_raw '\boat{\boat{\boat{N}}}', '{{{N-x}-x}-x}'
237
+ cmp_raw '\frac{\boat{N}}{2}', '\frac{{N-x}}{2}'
238
+ cmp_raw '\define{\float}[2]{\frac{\alpha-#1}{\beta-#2}}', ''
239
+ cmp_raw '\float{\delta}{\gamma}', '{\frac{\alpha-\delta}{\beta-\gamma}}'
240
+ cmp_raw '\float{\goat}{\boat{\goat}}', '{\frac{\alpha-{\mathbb{GOAT}}}{\beta-{{\mathbb{GOAT}}-x}}}'
241
+ end
242
+
229
243
  def test_more_macros
230
244
  cmp '\define{\exp}[1]{E_\theta\left[#1\right]} \exp{\hat{\theta}}', '{E_\theta\left[\hat{\theta}\right]}'
231
245
  end
@@ -286,7 +300,7 @@ private
286
300
  ret = normalize_itex2mml_output(fix_itex2mml_output(ret))
287
301
 
288
302
  if ENV["ALLOW_OVERRIDE"]
289
- test = Ritex::Parser.new.parse s, false
303
+ test = Ritex::Parser.new.parse s, :nowrap => true
290
304
 
291
305
  if ret.gsub('"', "'") != test.gsub('"', "'")
292
306
  puts
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ritex
3
3
  version: !ruby/object:Gem::Version
4
- version: "0.3"
4
+ version: "1.0"
5
5
  platform: ruby
6
6
  authors:
7
7
  - William Morgan
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-06-17 00:00:00 -04:00
12
+ date: 2010-04-13 00:00:00 -04:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
@@ -28,12 +28,12 @@ files:
28
28
  - lib/ritex/mathml/functions.rb
29
29
  - lib/ritex/mathml/markup.rb
30
30
  - lib/ritex/lexer.rb
31
+ - lib/ritex/parser.rb
31
32
  - test/all.rb
32
33
  - test/parser.rb
33
34
  - test/mathml.rb
34
35
  - README
35
36
  - ReleaseNotes
36
- - lib/ritex/parser.rb
37
37
  - test/answer-key.yaml
38
38
  has_rdoc: true
39
39
  homepage: http://masanjin.net/ritex/