html2doc 0.8.6 → 0.8.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +2 -7
- data/Gemfile +1 -2
- data/Gemfile.lock +20 -22
- data/html2doc.gemspec +2 -2
- data/lib/asciimath/cli.rb +18 -0
- data/lib/asciimath/html.rb +222 -0
- data/lib/asciimath/mathml.rb +131 -0
- data/lib/asciimath/parser.rb +591 -0
- data/lib/asciimath/version.rb +3 -0
- data/lib/html2doc.rb +3 -0
- data/lib/html2doc/base.rb +12 -1
- data/lib/html2doc/mime.rb +3 -1
- data/lib/html2doc/version.rb +1 -1
- data/spec/html2doc_spec.rb +2 -1
- metadata +13 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8b4a4257cfea9d3621715b98d906f8eb0ffdc7ce49f38b732dc2aada2e7bf122
|
4
|
+
data.tar.gz: a767085867c7dc1eb105e90eeb221ba4c89383dad79a61615df7e03615f890fb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0d47018c5d6c3d9432f860485994462cfc6634d487de4a72375df7974b9af51d0409c5ac0bdc1ffcac585c51fc329b5d39cb4c1e6810821ba6489dfbd4b4e557
|
7
|
+
data.tar.gz: 5067f79bb2dca4836021d960d807a9ec81a97b5ec3545588fe60120e98d2d46f26665f7a5ac0c6ffd5bed50e7f858017f1fd6677edd8e10958240afae11c23bc
|
data/.rubocop.yml
CHANGED
@@ -3,13 +3,8 @@
|
|
3
3
|
# All project-specific additions and overrides should be specified in this file.
|
4
4
|
|
5
5
|
inherit_from:
|
6
|
-
|
7
|
-
- ".rubocop.tb.yml"
|
8
|
-
# Overrides from Ribose
|
9
|
-
- ".rubocop.ribose.yml"
|
6
|
+
- https://raw.githubusercontent.com/riboseinc/oss-guides/master/ci/rubocop.yml
|
10
7
|
AllCops:
|
11
|
-
|
12
|
-
StyleGuideCopsOnly: false
|
13
|
-
TargetRubyVersion: 2.4
|
8
|
+
TargetRubyVersion: 2.3
|
14
9
|
Rails:
|
15
10
|
Enabled: true
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
html2doc (0.8.
|
4
|
+
html2doc (0.8.7)
|
5
5
|
asciimath
|
6
6
|
htmlentities (~> 4.3.4)
|
7
7
|
image_size
|
@@ -14,7 +14,7 @@ PATH
|
|
14
14
|
GEM
|
15
15
|
remote: https://rubygems.org/
|
16
16
|
specs:
|
17
|
-
asciimath (1.0.
|
17
|
+
asciimath (1.0.7)
|
18
18
|
ast (2.4.0)
|
19
19
|
byebug (9.1.0)
|
20
20
|
coderay (1.1.2)
|
@@ -22,9 +22,9 @@ GEM
|
|
22
22
|
docile (1.3.1)
|
23
23
|
equivalent-xml (0.6.0)
|
24
24
|
nokogiri (>= 1.4.3)
|
25
|
-
ffi (1.
|
25
|
+
ffi (1.10.0)
|
26
26
|
formatador (0.2.5)
|
27
|
-
guard (2.
|
27
|
+
guard (2.15.0)
|
28
28
|
formatador (>= 0.2.4)
|
29
29
|
listen (>= 2.7, < 4.0)
|
30
30
|
lumberjack (>= 1.0.12, < 2.0)
|
@@ -40,21 +40,20 @@ GEM
|
|
40
40
|
rspec (>= 2.99.0, < 4.0)
|
41
41
|
htmlentities (4.3.4)
|
42
42
|
image_size (2.0.0)
|
43
|
-
jaro_winkler (1.5.1)
|
44
43
|
json (2.1.0)
|
45
44
|
listen (3.1.5)
|
46
45
|
rb-fsevent (~> 0.9, >= 0.9.4)
|
47
46
|
rb-inotify (~> 0.9, >= 0.9.7)
|
48
47
|
ruby_dep (~> 1.2)
|
49
48
|
lumberjack (1.0.13)
|
50
|
-
method_source (0.9.
|
49
|
+
method_source (0.9.2)
|
51
50
|
mime-types (3.2.2)
|
52
51
|
mime-types-data (~> 3.2015)
|
53
52
|
mime-types-data (3.2018.0812)
|
54
|
-
mini_portile2 (2.
|
53
|
+
mini_portile2 (2.4.0)
|
55
54
|
nenv (0.3.0)
|
56
|
-
nokogiri (1.
|
57
|
-
mini_portile2 (~> 2.
|
55
|
+
nokogiri (1.10.0)
|
56
|
+
mini_portile2 (~> 2.4.0)
|
58
57
|
notiffany (0.1.1)
|
59
58
|
nenv (~> 0.1)
|
60
59
|
shellany (~> 0.0)
|
@@ -62,14 +61,14 @@ GEM
|
|
62
61
|
parser (2.5.3.0)
|
63
62
|
ast (~> 2.4.0)
|
64
63
|
powerpack (0.1.2)
|
65
|
-
pry (0.
|
64
|
+
pry (0.12.2)
|
66
65
|
coderay (~> 1.1.0)
|
67
66
|
method_source (~> 0.9.0)
|
68
67
|
rainbow (3.0.0)
|
69
|
-
rake (12.3.
|
68
|
+
rake (12.3.2)
|
70
69
|
rb-fsevent (0.10.3)
|
71
|
-
rb-inotify (0.
|
72
|
-
ffi (
|
70
|
+
rb-inotify (0.10.0)
|
71
|
+
ffi (~> 1.0)
|
73
72
|
rspec (3.8.0)
|
74
73
|
rspec-core (~> 3.8.0)
|
75
74
|
rspec-expectations (~> 3.8.0)
|
@@ -85,14 +84,13 @@ GEM
|
|
85
84
|
diff-lcs (>= 1.2.0, < 2.0)
|
86
85
|
rspec-support (~> 3.8.0)
|
87
86
|
rspec-support (3.8.0)
|
88
|
-
rubocop (0.
|
89
|
-
jaro_winkler (~> 1.5.1)
|
87
|
+
rubocop (0.54.0)
|
90
88
|
parallel (~> 1.10)
|
91
|
-
parser (>= 2.5
|
89
|
+
parser (>= 2.5)
|
92
90
|
powerpack (~> 0.1)
|
93
91
|
rainbow (>= 2.2.2, < 4.0)
|
94
92
|
ruby-progressbar (~> 1.7)
|
95
|
-
unicode-display_width (~> 1.
|
93
|
+
unicode-display_width (~> 1.0, >= 1.0.1)
|
96
94
|
ruby-progressbar (1.10.0)
|
97
95
|
ruby-xslt (0.9.10)
|
98
96
|
ruby_dep (1.5.0)
|
@@ -102,17 +100,17 @@ GEM
|
|
102
100
|
json (>= 1.8, < 3)
|
103
101
|
simplecov-html (~> 0.10.0)
|
104
102
|
simplecov-html (0.10.2)
|
105
|
-
thor (0.20.
|
103
|
+
thor (0.20.3)
|
106
104
|
thread_safe (0.3.6)
|
107
105
|
timecop (0.9.1)
|
108
|
-
unicode-display_width (1.4.
|
106
|
+
unicode-display_width (1.4.1)
|
109
107
|
uuidtools (2.1.5)
|
110
108
|
|
111
109
|
PLATFORMS
|
112
110
|
ruby
|
113
111
|
|
114
112
|
DEPENDENCIES
|
115
|
-
bundler (~> 1
|
113
|
+
bundler (~> 2.0.1)
|
116
114
|
byebug (~> 9.1)
|
117
115
|
equivalent-xml (~> 0.6)
|
118
116
|
guard (~> 2.14)
|
@@ -121,9 +119,9 @@ DEPENDENCIES
|
|
121
119
|
rake (~> 12.0)
|
122
120
|
rspec
|
123
121
|
rspec-match_fuzzy (~> 0.1.3)
|
124
|
-
rubocop (
|
122
|
+
rubocop (= 0.54.0)
|
125
123
|
simplecov (~> 0.15)
|
126
124
|
timecop (~> 0.9)
|
127
125
|
|
128
126
|
BUNDLED WITH
|
129
|
-
|
127
|
+
2.0.1
|
data/html2doc.gemspec
CHANGED
@@ -34,14 +34,14 @@ Gem::Specification.new do |spec|
|
|
34
34
|
spec.add_dependency "ruby-xslt"
|
35
35
|
spec.add_dependency "asciimath"
|
36
36
|
|
37
|
-
spec.add_development_dependency "bundler", "~> 1
|
37
|
+
spec.add_development_dependency "bundler", "~> 2.0.1"
|
38
38
|
spec.add_development_dependency "byebug", "~> 9.1"
|
39
39
|
spec.add_development_dependency "equivalent-xml", "~> 0.6"
|
40
40
|
spec.add_development_dependency "guard", "~> 2.14"
|
41
41
|
spec.add_development_dependency "guard-rspec", "~> 4.7"
|
42
42
|
spec.add_development_dependency "rake", "~> 12.0"
|
43
43
|
spec.add_development_dependency "rspec", "~> 3.6"
|
44
|
-
spec.add_development_dependency "rubocop", "
|
44
|
+
spec.add_development_dependency "rubocop", "= 0.54.0"
|
45
45
|
spec.add_development_dependency "simplecov", "~> 0.15"
|
46
46
|
spec.add_development_dependency "timecop", "~> 0.9"
|
47
47
|
spec.add_development_dependency "rspec-match_fuzzy", "~> 0.1.3"
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require_relative 'parser'
|
2
|
+
require_relative 'mathml'
|
3
|
+
require_relative 'html'
|
4
|
+
|
5
|
+
module AsciiMath
|
6
|
+
module CLI
|
7
|
+
def self.run(args)
|
8
|
+
asciimath = args.last
|
9
|
+
output = ''
|
10
|
+
if args.length == 1 || args.first == "mathml"
|
11
|
+
output = AsciiMath.parse(asciimath).to_mathml
|
12
|
+
elsif args.first == "html"
|
13
|
+
output = AsciiMath.parse(asciimath).to_html
|
14
|
+
end
|
15
|
+
puts output
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,222 @@
|
|
1
|
+
module AsciiMath
|
2
|
+
class HTMLBuilder
|
3
|
+
def initialize(prefix)
|
4
|
+
@prefix = prefix
|
5
|
+
@html = ''
|
6
|
+
end
|
7
|
+
|
8
|
+
def to_s
|
9
|
+
@html
|
10
|
+
end
|
11
|
+
|
12
|
+
def append_expression(expression, inline, attrs = {})
|
13
|
+
if inline
|
14
|
+
inline('', attrs) do
|
15
|
+
append(expression, :single_child => true)
|
16
|
+
end
|
17
|
+
else
|
18
|
+
block('', attrs) do
|
19
|
+
append(expression, :single_child => true)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
ZWJ = "\u8205"
|
27
|
+
|
28
|
+
def append(expression, opts = {})
|
29
|
+
case expression
|
30
|
+
when Array
|
31
|
+
row do
|
32
|
+
expression.each { |e| append(e) }
|
33
|
+
end
|
34
|
+
when Hash
|
35
|
+
case expression[:type]
|
36
|
+
when :operator
|
37
|
+
operator(expression[:c])
|
38
|
+
when :identifier
|
39
|
+
identifier(expression[:c])
|
40
|
+
when :number
|
41
|
+
number(expression[:c])
|
42
|
+
when :text
|
43
|
+
text(expression[:c])
|
44
|
+
when :paren
|
45
|
+
paren = !opts[:strip_paren]
|
46
|
+
if paren
|
47
|
+
if opts[:single_child]
|
48
|
+
brace(expression[:lparen]) if expression[:lparen]
|
49
|
+
append(expression[:e], :single_child => true)
|
50
|
+
brace(expression[:rparen]) if expression[:rparen]
|
51
|
+
else
|
52
|
+
row do
|
53
|
+
brace(expression[:lparen]) if expression[:lparen]
|
54
|
+
append(expression[:e], :single_child => true)
|
55
|
+
brace(expression[:rparen]) if expression[:rparen]
|
56
|
+
end
|
57
|
+
end
|
58
|
+
else
|
59
|
+
append(expression[:e])
|
60
|
+
end
|
61
|
+
when :font
|
62
|
+
#TODO - currently ignored
|
63
|
+
when :unary
|
64
|
+
operator = expression[:operator]
|
65
|
+
tag(operator) do
|
66
|
+
append(expression[:s], :single_child => true, :strip_paren => true)
|
67
|
+
end
|
68
|
+
when :binary
|
69
|
+
operator = expression[:operator]
|
70
|
+
if operator == :frac
|
71
|
+
append_fraction(expression[:s1],expression[:s2])
|
72
|
+
elsif operator == :sub
|
73
|
+
append_subsup(expression[:s1],expression[:s2],nil)
|
74
|
+
elsif operator == :sup
|
75
|
+
append_subsup(expression[:s1],nil,expression[:s2])
|
76
|
+
elsif operator == :under
|
77
|
+
append_underover(expression[:s1],expression[:s2],nil)
|
78
|
+
elsif operator == :over
|
79
|
+
append_underover(expression[:s1],nil,expression[:s2])
|
80
|
+
else
|
81
|
+
tag(operator) do
|
82
|
+
append(expression[:s1], :strip_paren => true)
|
83
|
+
append(expression[:s2], :strip_paren => true)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
when :ternary
|
87
|
+
operator = expression[:operator]
|
88
|
+
if operator == :subsup
|
89
|
+
append_subsup(expression[:s1],expression[:s2],expression[:s3])
|
90
|
+
elsif operator == :underover
|
91
|
+
# TODO: Handle over/under braces in some way? SVG maybe?
|
92
|
+
append_underover(expression[:s1],expression[:s2],expression[:s3])
|
93
|
+
end
|
94
|
+
when :matrix
|
95
|
+
row do
|
96
|
+
# Figures out a font size for the braces, based on the height of the matrix.
|
97
|
+
# NOTE: This does not currently consider the size of each element within the matrix.
|
98
|
+
brace_height = "font-size: " + expression[:rows].length.to_s + "00%;"
|
99
|
+
|
100
|
+
if expression[:lparen]
|
101
|
+
brace(expression[:lparen], {:style => brace_height})
|
102
|
+
else
|
103
|
+
blank(ZWJ)
|
104
|
+
end
|
105
|
+
matrix_width = "grid-template-columns:repeat(" + expression[:rows][0].length.to_s + ",1fr);"
|
106
|
+
matrix_height = "grid-template-rows:repeat(" + expression[:rows].length.to_s + ",1fr);"
|
107
|
+
|
108
|
+
matrix({:style => (matrix_width + matrix_height)}) do
|
109
|
+
expression[:rows].each do |row|
|
110
|
+
row.each do |col|
|
111
|
+
row do
|
112
|
+
append(col)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
if expression[:rparen]
|
118
|
+
brace(expression[:rparen], {:style => brace_height})
|
119
|
+
else
|
120
|
+
blank(ZWJ)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def append_subsup(base, sub, sup)
|
128
|
+
append(base)
|
129
|
+
subsup do
|
130
|
+
if sup
|
131
|
+
smaller do
|
132
|
+
append(sup, :strip_paren => true)
|
133
|
+
end
|
134
|
+
else
|
135
|
+
smaller(ZWJ)
|
136
|
+
end
|
137
|
+
if sub
|
138
|
+
smaller do
|
139
|
+
append(sub, :strip_paren => true)
|
140
|
+
end
|
141
|
+
else
|
142
|
+
smaller(ZWJ)
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
def append_underover(base, under, over)
|
148
|
+
blank(ZWJ)
|
149
|
+
underover do
|
150
|
+
smaller do
|
151
|
+
if over
|
152
|
+
append(over, :strip_paren => true)
|
153
|
+
else
|
154
|
+
blank(ZWJ)
|
155
|
+
end
|
156
|
+
end
|
157
|
+
append(base)
|
158
|
+
smaller do
|
159
|
+
if under
|
160
|
+
append(under, :strip_paren => true)
|
161
|
+
else
|
162
|
+
blank(ZWJ)
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
def append_fraction(numerator, denominator)
|
169
|
+
blank(ZWJ)
|
170
|
+
fraction do
|
171
|
+
fraction_row do
|
172
|
+
fraction_cell do
|
173
|
+
smaller do
|
174
|
+
row do
|
175
|
+
append(numerator, :strip_paren => true)
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
180
|
+
fraction_row do
|
181
|
+
fraction_cell do
|
182
|
+
smaller do
|
183
|
+
row do
|
184
|
+
append(denominator, :strip_paren => true)
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
def method_missing(meth, *args, &block)
|
193
|
+
tag(meth, *args, &block)
|
194
|
+
end
|
195
|
+
|
196
|
+
def tag(tag, *args)
|
197
|
+
attrs = args.last.is_a?(Hash) ? args.pop : {}
|
198
|
+
text = args.last.is_a?(String) ? args.pop : ''
|
199
|
+
|
200
|
+
@html << '<span class="math-' << @prefix << tag.to_s << '"'
|
201
|
+
|
202
|
+
attrs.each_pair do |key, value|
|
203
|
+
@html << ' ' << key.to_s << '="' << value.to_s << '"'
|
204
|
+
end
|
205
|
+
|
206
|
+
if block_given? || text
|
207
|
+
@html << '>'
|
208
|
+
@html << text.encode(Encoding::US_ASCII, :xml => :text) if text
|
209
|
+
yield if block_given?
|
210
|
+
@html << '</span>'
|
211
|
+
else
|
212
|
+
@html << '/>'
|
213
|
+
end
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
class Expression
|
218
|
+
def to_html(prefix = "", inline = true, attrs = {})
|
219
|
+
HTMLBuilder.new(prefix).append_expression(@parsed_expression, inline, attrs).to_s
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
@@ -0,0 +1,131 @@
|
|
1
|
+
module AsciiMath
|
2
|
+
class MathMLBuilder
|
3
|
+
def initialize(prefix)
|
4
|
+
@prefix = prefix
|
5
|
+
@mathml = ''
|
6
|
+
end
|
7
|
+
|
8
|
+
def to_s
|
9
|
+
@mathml
|
10
|
+
end
|
11
|
+
|
12
|
+
def append_expression(expression, attrs = {})
|
13
|
+
math('', attrs) do
|
14
|
+
append(expression, :single_child => true)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def append(expression, opts = {})
|
21
|
+
case expression
|
22
|
+
when Array
|
23
|
+
if expression.length <= 1 || opts[:single_child]
|
24
|
+
expression.each { |e| append(e) }
|
25
|
+
else
|
26
|
+
mrow do
|
27
|
+
expression.each { |e| append(e) }
|
28
|
+
end
|
29
|
+
end
|
30
|
+
when Hash
|
31
|
+
case expression[:type]
|
32
|
+
when :operator
|
33
|
+
mo(expression[:c])
|
34
|
+
when :identifier
|
35
|
+
mi(expression[:c])
|
36
|
+
when :number
|
37
|
+
mn(expression[:c])
|
38
|
+
when :text
|
39
|
+
mtext(expression[:c])
|
40
|
+
when :paren
|
41
|
+
paren = !opts[:strip_paren]
|
42
|
+
if paren
|
43
|
+
if opts[:single_child]
|
44
|
+
mo(expression[:lparen]) if expression[:lparen]
|
45
|
+
append(expression[:e], :single_child => true)
|
46
|
+
mo(expression[:rparen]) if expression[:rparen]
|
47
|
+
else
|
48
|
+
mrow do
|
49
|
+
mo(expression[:lparen]) if expression[:lparen]
|
50
|
+
append(expression[:e], :single_child => true)
|
51
|
+
mo(expression[:rparen]) if expression[:rparen]
|
52
|
+
end
|
53
|
+
end
|
54
|
+
else
|
55
|
+
append(expression[:e])
|
56
|
+
end
|
57
|
+
when :font
|
58
|
+
style = expression[:operator]
|
59
|
+
tag("mstyle", :mathvariant => style.to_s.gsub('_', '-')) do
|
60
|
+
append(expression[:s], :single_child => true, :strip_paren => true)
|
61
|
+
end
|
62
|
+
when :unary
|
63
|
+
operator = expression[:operator]
|
64
|
+
tag("m#{operator}") do
|
65
|
+
append(expression[:s], :single_child => true, :strip_paren => true)
|
66
|
+
end
|
67
|
+
when :binary
|
68
|
+
operator = expression[:operator]
|
69
|
+
tag("m#{operator}") do
|
70
|
+
append(expression[:s1], :strip_paren => (operator != :sub && operator != :sup))
|
71
|
+
append(expression[:s2], :strip_paren => true)
|
72
|
+
end
|
73
|
+
when :ternary
|
74
|
+
operator = expression[:operator]
|
75
|
+
tag("m#{operator}") do
|
76
|
+
append(expression[:s1])
|
77
|
+
append(expression[:s2], :strip_paren => true)
|
78
|
+
append(expression[:s3], :strip_paren => true)
|
79
|
+
end
|
80
|
+
when :matrix
|
81
|
+
mrow do
|
82
|
+
mo(expression[:lparen]) if expression[:lparen]
|
83
|
+
mtable do
|
84
|
+
expression[:rows].each do |row|
|
85
|
+
mtr do
|
86
|
+
row.each do |col|
|
87
|
+
mtd do
|
88
|
+
append(col)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
mo(expression[:rparen]) if expression[:rparen]
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def method_missing(meth, *args, &block)
|
101
|
+
tag(meth, *args, &block)
|
102
|
+
end
|
103
|
+
|
104
|
+
def tag(tag, *args)
|
105
|
+
attrs = args.last.is_a?(Hash) ? args.pop : {}
|
106
|
+
text = args.last.is_a?(String) ? args.pop : ''
|
107
|
+
|
108
|
+
@mathml << '<' << @prefix << tag.to_s
|
109
|
+
|
110
|
+
attrs.each_pair do |key, value|
|
111
|
+
@mathml << ' ' << key.to_s << '="' << value.to_s << '"'
|
112
|
+
end
|
113
|
+
|
114
|
+
|
115
|
+
if block_given? || text
|
116
|
+
@mathml << '>'
|
117
|
+
@mathml << text.encode(Encoding::US_ASCII, :xml => :text) if text
|
118
|
+
yield self if block_given?
|
119
|
+
@mathml << '</' << @prefix << tag.to_s << '>'
|
120
|
+
else
|
121
|
+
@mathml << '/>'
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
class Expression
|
127
|
+
def to_mathml(prefix = "", attrs = {})
|
128
|
+
MathMLBuilder.new(prefix).append_expression(@parsed_expression, attrs).to_s
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
@@ -0,0 +1,591 @@
|
|
1
|
+
require 'strscan'
|
2
|
+
|
3
|
+
# Parser for ASCIIMath expressions.
|
4
|
+
#
|
5
|
+
# The syntax for ASCIIMath in EBNF style notation is
|
6
|
+
#
|
7
|
+
# expr = ( simp ( fraction | sub | super ) )+
|
8
|
+
# simp = constant | paren_expr | unary_expr | binary_expr | text
|
9
|
+
# fraction = '/' simp
|
10
|
+
# super = '^' simp
|
11
|
+
# sub = '_' simp super?
|
12
|
+
# paren_expr = lparen expr rparen
|
13
|
+
# lparen = '(' | '[' | '{' | '(:' | '{:'
|
14
|
+
# rparen = ')' | ']' | '}' | ':)' | ':}'
|
15
|
+
# unary_expr = unary_op simp
|
16
|
+
# unary_op = 'sqrt' | 'text'
|
17
|
+
# binary_expr = binary_op simp simp
|
18
|
+
# binary_op = 'frac' | 'root' | 'stackrel'
|
19
|
+
# text = '"' [^"]* '"'
|
20
|
+
# constant = number | symbol | identifier
|
21
|
+
# number = '-'? [0-9]+ ( '.' [0-9]+ )?
|
22
|
+
# symbol = /* any string in the symbol table */
|
23
|
+
# identifier = [A-z]
|
24
|
+
#
|
25
|
+
# ASCIIMath is parsed left to right without any form of operator precedence.
|
26
|
+
# When parsing the 'constant' the parser will try to find the longest matching string in the symbol
|
27
|
+
# table starting at the current position of the parser. If no matching string can be found the
|
28
|
+
# character at the current position of the parser is interpreted as an identifier instead.
|
29
|
+
module AsciiMath
|
30
|
+
# Internal: Splits an ASCIIMath expression into a sequence of tokens.
|
31
|
+
# Each token is represented as a Hash containing the keys :value and :type.
|
32
|
+
# The :value key is used to store the text associated with each token.
|
33
|
+
# The :type key indicates the semantics of the token. The value for :type will be one
|
34
|
+
# of the following symbols:
|
35
|
+
#
|
36
|
+
# - :identifier a symbolic name or a bit of text without any further semantics
|
37
|
+
# - :text a bit of arbitrary text
|
38
|
+
# - :number a number
|
39
|
+
# - :operator a mathematical operator symbol
|
40
|
+
# - :unary a unary operator (e.g., sqrt, text, ...)
|
41
|
+
# - :font a unary font command (e.g., bb, cc, ...)
|
42
|
+
# - :infix an infix operator (e.g, /, _, ^, ...)
|
43
|
+
# - :binary a binary operator (e.g., frac, root, ...)
|
44
|
+
# - :accent an accent character
|
45
|
+
# - :eof indicates no more tokens are available
|
46
|
+
#
|
47
|
+
# Each token type may also have an :underover modifier. When present and set to true
|
48
|
+
# sub- and superscript expressions associated with the token will be rendered as
|
49
|
+
# under- and overscriptabove and below rather than as sub- or superscript.
|
50
|
+
#
|
51
|
+
# :accent tokens additionally have a :postion value which is set to either :over or :under.
|
52
|
+
# This determines if the accent should be rendered over or under the expression to which
|
53
|
+
# it applies.
|
54
|
+
#
|
55
|
+
class Tokenizer
|
56
|
+
WHITESPACE = /^\s+/
|
57
|
+
NUMBER = /-?[0-9]+(?:\.[0-9]+)?/
|
58
|
+
QUOTED_TEXT = /"[^"]*"/
|
59
|
+
TEX_TEXT = /text\([^)]*\)/
|
60
|
+
|
61
|
+
# Public: Initializes an ASCIIMath tokenizer.
|
62
|
+
#
|
63
|
+
# string - The ASCIIMath expression to tokenize
|
64
|
+
# symbols - The symbol table to use while tokenizing
|
65
|
+
def initialize(string, symbols)
|
66
|
+
@string = StringScanner.new(string)
|
67
|
+
@symbols = symbols
|
68
|
+
lookahead = @symbols.keys.map { |k| k.length }.max
|
69
|
+
@symbol_regexp = /([^\s0-9]{1,#{lookahead}})/
|
70
|
+
@push_back = nil
|
71
|
+
end
|
72
|
+
|
73
|
+
# Public: Read the next token from the ASCIIMath expression and move the tokenizer
|
74
|
+
# ahead by one token.
|
75
|
+
#
|
76
|
+
# Returns the next token as a Hash
|
77
|
+
def next_token
|
78
|
+
if @push_back
|
79
|
+
t = @push_back
|
80
|
+
@push_back = nil
|
81
|
+
return t
|
82
|
+
end
|
83
|
+
|
84
|
+
@string.scan(WHITESPACE)
|
85
|
+
|
86
|
+
return {:value => nil, :type => :eof} if @string.eos?
|
87
|
+
|
88
|
+
case @string.peek(1)
|
89
|
+
when '"'
|
90
|
+
read_quoted_text
|
91
|
+
when 't'
|
92
|
+
case @string.peek(5)
|
93
|
+
when 'text('
|
94
|
+
read_tex_text
|
95
|
+
else
|
96
|
+
read_symbol
|
97
|
+
end
|
98
|
+
when '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9'
|
99
|
+
read_number || read_symbol
|
100
|
+
else
|
101
|
+
read_symbol
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
# Public: Pushes the given token back to the tokenizer. A subsequent call to next_token
|
106
|
+
# will return the given token rather than generating a new one. At most one
|
107
|
+
# token can be pushed back.
|
108
|
+
#
|
109
|
+
# token - The token to push back
|
110
|
+
def push_back(token)
|
111
|
+
@push_back = token unless token[:type] == :eof
|
112
|
+
end
|
113
|
+
|
114
|
+
private
|
115
|
+
|
116
|
+
# Private: Reads a text token from the input string
|
117
|
+
#
|
118
|
+
# Returns the text token or nil if a text token could not be matched at
|
119
|
+
# the current position
|
120
|
+
def read_quoted_text
|
121
|
+
read_value(QUOTED_TEXT) do |text|
|
122
|
+
{:value => text[1..-2], :type => :text}
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
# Private: Reads a text token from the input string
|
127
|
+
#
|
128
|
+
# Returns the text token or nil if a text token could not be matched at
|
129
|
+
# the current position
|
130
|
+
def read_tex_text
|
131
|
+
read_value(TEX_TEXT) do |text|
|
132
|
+
{:value => text[5..-2], :type => :text}
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
# Private: Reads a number token from the input string
|
137
|
+
#
|
138
|
+
# Returns the number token or nil if a number token could not be matched at
|
139
|
+
# the current position
|
140
|
+
def read_number
|
141
|
+
read_value(NUMBER) do |number|
|
142
|
+
{:value => number, :type => :number}
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
if String.method_defined?(:bytesize)
|
147
|
+
def bytesize(s)
|
148
|
+
s.bytesize
|
149
|
+
end
|
150
|
+
else
|
151
|
+
def bytesize(s)
|
152
|
+
s.length
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
|
157
|
+
# Private: Reads a symbol token from the input string. This method first creates
|
158
|
+
# a String from the input String starting from the current position with a length
|
159
|
+
# that matches that of the longest key in the symbol table. It then looks up that
|
160
|
+
# substring in the symbol table. If the substring is present in the symbol table, the
|
161
|
+
# associated value is returned and the position is moved ahead by the length of the
|
162
|
+
# substring. Otherwise this method chops one character off the end of the substring
|
163
|
+
# and repeats the symbol lookup. This continues until a single character is left.
|
164
|
+
# If that character can still not be found in the symbol table, then an identifier
|
165
|
+
# token is returned whose value is the remaining single character string.
|
166
|
+
#
|
167
|
+
# Returns the token that was read or nil if a token could not be matched at
|
168
|
+
# the current position
|
169
|
+
def read_symbol
|
170
|
+
position = @string.pos
|
171
|
+
read_value(@symbol_regexp) do |s|
|
172
|
+
until s.length == 1 || @symbols.include?(s)
|
173
|
+
s.chop!
|
174
|
+
end
|
175
|
+
@string.pos = position + bytesize(s)
|
176
|
+
@symbols[s] || {:value => s, :type => :identifier}
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
# Private: Reads a String from the input String that matches the given RegExp
|
181
|
+
#
|
182
|
+
# regexp - a RegExp that will be used to match the token
|
183
|
+
# block - if a block is provided the matched token will be passed to the block
|
184
|
+
#
|
185
|
+
# Returns the matched String or the value returned by the block if one was given
|
186
|
+
def read_value(regexp)
|
187
|
+
s = @string.scan(regexp)
|
188
|
+
if s
|
189
|
+
yield s
|
190
|
+
else
|
191
|
+
s
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
if String.respond_to?(:byte_size)
|
196
|
+
def byte_size(s)
|
197
|
+
s.byte_size
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
class Parser
|
203
|
+
SYMBOLS = {
|
204
|
+
# Operation symbols
|
205
|
+
'+' => {:value => '+', :type => :operator},
|
206
|
+
'-' => {:value => "\u2212", :type => :operator},
|
207
|
+
'*' => {:value => "\u22C5", :type => :operator},
|
208
|
+
'**' => {:value => "\u002A", :type => :operator},
|
209
|
+
'***' => {:value => "\u22C6", :type => :operator},
|
210
|
+
'//' => {:value => '/', :type => :operator},
|
211
|
+
'\\\\' => {:value => '\\', :type => :operator},
|
212
|
+
'xx' => {:value => "\u00D7", :type => :operator},
|
213
|
+
'-:' => {:value => "\u00F7", :type => :operator},
|
214
|
+
'|><' => {:value => "\u22C9", :type => :operator},
|
215
|
+
'><|' => {:value => "\u22CA", :type => :operator},
|
216
|
+
'|><|' => {:value => "\u22C8", :type => :operator},
|
217
|
+
'@' => {:value => "\u26AC", :type => :operator},
|
218
|
+
'o+' => {:value => "\u2295", :type => :operator},
|
219
|
+
'ox' => {:value => "\u2297", :type => :operator},
|
220
|
+
'o.' => {:value => "\u2299", :type => :operator},
|
221
|
+
'sum' => {:value => "\u2211", :type => :operator, :underover => true},
|
222
|
+
'prod' => {:value => "\u220F", :type => :operator, :underover => true},
|
223
|
+
'^^' => {:value => "\u2227", :type => :operator},
|
224
|
+
'^^^' => {:value => "\u22C0", :type => :operator, :underover => true},
|
225
|
+
'vv' => {:value => "\u2228", :type => :operator},
|
226
|
+
'vvv' => {:value => "\u22C1", :type => :operator, :underover => true},
|
227
|
+
'nn' => {:value => "\u2229", :type => :operator},
|
228
|
+
'nnn' => {:value => "\u22C2", :type => :operator, :underover => true},
|
229
|
+
'uu' => {:value => "\u222A", :type => :operator},
|
230
|
+
'uuu' => {:value => "\u22C3", :type => :operator, :underover => true},
|
231
|
+
|
232
|
+
# Relation symbols
|
233
|
+
'=' => {:value => '=', :type => :operator},
|
234
|
+
'!=' => {:value => "\u2260", :type => :operator},
|
235
|
+
':=' => {:value => ':=', :type => :operator},
|
236
|
+
'<' => {:value => "\u003C", :type => :operator},
|
237
|
+
'lt' => {:value => "\u003C", :type => :operator},
|
238
|
+
'>' => {:value => "\u003E", :type => :operator},
|
239
|
+
'gt' => {:value => "\u003E", :type => :operator},
|
240
|
+
'<=' => {:value => "\u2264", :type => :operator},
|
241
|
+
'le' => {:value => "\u2264", :type => :operator},
|
242
|
+
'>=' => {:value => "\u2265", :type => :operator},
|
243
|
+
'ge' => {:value => "\u2265", :type => :operator},
|
244
|
+
'-<' => {:value => "\u227A", :type => :operator},
|
245
|
+
'>-' => {:value => "\u227B", :type => :operator},
|
246
|
+
'-<=' => {:value => "\u2AAF", :type => :operator},
|
247
|
+
'>-=' => {:value => "\u2AB0", :type => :operator},
|
248
|
+
'in' => {:value => "\u2208", :type => :operator},
|
249
|
+
'!in' => {:value => "\u2209", :type => :operator},
|
250
|
+
'sub' => {:value => "\u2282", :type => :operator},
|
251
|
+
'sup' => {:value => "\u2283", :type => :operator},
|
252
|
+
'sube' => {:value => "\u2286", :type => :operator},
|
253
|
+
'supe' => {:value => "\u2287", :type => :operator},
|
254
|
+
'-=' => {:value => "\u2261", :type => :operator},
|
255
|
+
'~=' => {:value => "\u2245", :type => :operator},
|
256
|
+
'~~' => {:value => "\u2248", :type => :operator},
|
257
|
+
'prop' => {:value => "\u221D", :type => :operator},
|
258
|
+
|
259
|
+
# Logical symbols
|
260
|
+
'and' => {:value => 'and', :type => :text},
|
261
|
+
'or' => {:value => 'or', :type => :text},
|
262
|
+
'not' => {:value => "\u00AC", :type => :operator},
|
263
|
+
'=>' => {:value => "\u21D2", :type => :operator},
|
264
|
+
'if' => {:value => 'if', :type => :operator},
|
265
|
+
'<=>' => {:value => "\u21D4", :type => :operator},
|
266
|
+
'AA' => {:value => "\u2200", :type => :operator},
|
267
|
+
'EE' => {:value => "\u2203", :type => :operator},
|
268
|
+
'_|_' => {:value => "\u22A5", :type => :operator},
|
269
|
+
'TT' => {:value => "\u22A4", :type => :operator},
|
270
|
+
'|--' => {:value => "\u22A2", :type => :operator},
|
271
|
+
'|==' => {:value => "\u22A8", :type => :operator},
|
272
|
+
|
273
|
+
# Grouping brackets
|
274
|
+
'(' => {:value => '(', :type => :lparen},
|
275
|
+
')' => {:value => ')', :type => :rparen},
|
276
|
+
'[' => {:value => '[', :type => :lparen},
|
277
|
+
']' => {:value => ']', :type => :rparen},
|
278
|
+
'{' => {:value => '{', :type => :lparen},
|
279
|
+
'}' => {:value => '}', :type => :rparen},
|
280
|
+
'(:' => {:value => "\u2329", :type => :lparen},
|
281
|
+
':)' => {:value => "\u232A", :type => :rparen},
|
282
|
+
'<<' => {:value => "\u2329", :type => :lparen},
|
283
|
+
'>>' => {:value => "\u232A", :type => :rparen},
|
284
|
+
'|' => {:value => '|', :type => :lrparen},
|
285
|
+
'||' => {:value => '||', :type => :lrparen},
|
286
|
+
'{:' => {:value => nil, :type => :lparen},
|
287
|
+
':}' => {:value => nil, :type => :rparen},
|
288
|
+
|
289
|
+
# Miscellaneous symbols
|
290
|
+
'int' => {:value => "\u222B", :type => :operator},
|
291
|
+
'dx' => {:value => 'dx', :type => :identifier},
|
292
|
+
'dy' => {:value => 'dy', :type => :identifier},
|
293
|
+
'dz' => {:value => 'dz', :type => :identifier},
|
294
|
+
'dt' => {:value => 'dt', :type => :identifier},
|
295
|
+
'oint' => {:value => "\u222E", :type => :operator},
|
296
|
+
'del' => {:value => "\u2202", :type => :operator},
|
297
|
+
'grad' => {:value => "\u2207", :type => :operator},
|
298
|
+
'+-' => {:value => "\u00B1", :type => :operator},
|
299
|
+
'O/' => {:value => "\u2205", :type => :operator},
|
300
|
+
'oo' => {:value => "\u221E", :type => :operator},
|
301
|
+
'aleph' => {:value => "\u2135", :type => :operator},
|
302
|
+
'...' => {:value => '...', :type => :operator},
|
303
|
+
':.' => {:value => "\u2234", :type => :operator},
|
304
|
+
'/_' => {:value => "\u2220", :type => :operator},
|
305
|
+
'\\ ' => {:value => "\u00A0", :type => :operator},
|
306
|
+
'quad' => {:value => '\u00A0\u00A0', :type => :operator},
|
307
|
+
'qquad' => {:value => '\u00A0\u00A0\u00A0\u00A0', :type => :operator},
|
308
|
+
'cdots' => {:value => "\u22EF", :type => :operator},
|
309
|
+
'vdots' => {:value => "\u22EE", :type => :operator},
|
310
|
+
'ddots' => {:value => "\u22F1", :type => :operator},
|
311
|
+
'diamond' => {:value => "\u22C4", :type => :operator},
|
312
|
+
'square' => {:value => "\u25A1", :type => :operator},
|
313
|
+
'|__' => {:value => "\u230A", :type => :operator},
|
314
|
+
'__|' => {:value => "\u230B", :type => :operator},
|
315
|
+
'|~' => {:value => "\u2308", :type => :operator},
|
316
|
+
'~|' => {:value => "\u2309", :type => :operator},
|
317
|
+
'CC' => {:value => "\u2102", :type => :operator},
|
318
|
+
'NN' => {:value => "\u2115", :type => :operator},
|
319
|
+
'QQ' => {:value => "\u211A", :type => :operator},
|
320
|
+
'RR' => {:value => "\u211D", :type => :operator},
|
321
|
+
'ZZ' => {:value => "\u2124", :type => :operator},
|
322
|
+
|
323
|
+
'lim' => {:value => 'lim', :type => :operator, :underover => true},
|
324
|
+
'Lim' => {:value => 'Lim', :type => :operator, :underover => true},
|
325
|
+
|
326
|
+
# Standard functions
|
327
|
+
'sin' => {:value => 'sin', :type => :identifier},
|
328
|
+
'cos' => {:value => 'cos', :type => :identifier},
|
329
|
+
'tan' => {:value => 'tan', :type => :identifier},
|
330
|
+
'sec' => {:value => 'sec', :type => :identifier},
|
331
|
+
'csc' => {:value => 'csc', :type => :identifier},
|
332
|
+
'cot' => {:value => 'cot', :type => :identifier},
|
333
|
+
'arcsin' => {:value => 'arcsin', :type => :identifier},
|
334
|
+
'arccos' => {:value => 'arccos', :type => :identifier},
|
335
|
+
'arctan' => {:value => 'arctan', :type => :identifier},
|
336
|
+
'sinh' => {:value => 'sinh', :type => :identifier},
|
337
|
+
'cosh' => {:value => 'cosh', :type => :identifier},
|
338
|
+
'tanh' => {:value => 'tanh', :type => :identifier},
|
339
|
+
'sech' => {:value => 'sech', :type => :identifier},
|
340
|
+
'csch' => {:value => 'csch', :type => :identifier},
|
341
|
+
'coth' => {:value => 'coth', :type => :identifier},
|
342
|
+
'exp' => {:value => 'exp', :type => :identifier},
|
343
|
+
'log' => {:value => 'log', :type => :identifier},
|
344
|
+
'ln' => {:value => 'ln', :type => :identifier},
|
345
|
+
'det' => {:value => 'det', :type => :identifier},
|
346
|
+
'dim' => {:value => 'dim', :type => :identifier},
|
347
|
+
'mod' => {:value => 'mod', :type => :identifier},
|
348
|
+
'gcd' => {:value => 'gcd', :type => :identifier},
|
349
|
+
'lcm' => {:value => 'lcm', :type => :identifier},
|
350
|
+
'lub' => {:value => 'lub', :type => :identifier},
|
351
|
+
'glb' => {:value => 'glb', :type => :identifier},
|
352
|
+
'min' => {:value => 'min', :type => :identifier, :underover => true},
|
353
|
+
'max' => {:value => 'max', :type => :identifier, :underover => true},
|
354
|
+
'f' => {:value => 'f', :type => :identifier},
|
355
|
+
'g' => {:value => 'g', :type => :identifier},
|
356
|
+
|
357
|
+
# Accents
|
358
|
+
'hat' => {:value => "\u005E", :type => :accent, :position => :over},
|
359
|
+
'bar' => {:value => "\u00AF", :type => :accent, :position => :over},
|
360
|
+
'ul' => {:value => '_', :type => :accent, :position => :under},
|
361
|
+
'vec' => {:value => "\u2192", :type => :accent, :position => :over},
|
362
|
+
'dot' => {:value => '.', :type => :accent, :position => :over},
|
363
|
+
'ddot' => {:value => '..', :type => :accent, :position => :over},
|
364
|
+
'obrace' => {:value => "\u23DE", :type => :accent, :position => :over},
|
365
|
+
'ubrace' => {:value => "\u23DF", :type => :accent, :position => :under},
|
366
|
+
|
367
|
+
# Arrows
|
368
|
+
'uarr' => {:value => "\u2191", :type => :operator},
|
369
|
+
'darr' => {:value => "\u2193", :type => :operator},
|
370
|
+
'rarr' => {:value => "\u2192", :type => :operator},
|
371
|
+
'->' => {:value => "\u2192", :type => :operator},
|
372
|
+
'>->' => {:value => "\u21A3", :type => :operator},
|
373
|
+
'->>' => {:value => "\u21A0", :type => :operator},
|
374
|
+
'>->>' => {:value => "\u2916", :type => :operator},
|
375
|
+
'|->' => {:value => "\u21A6", :type => :operator},
|
376
|
+
'larr' => {:value => "\u2190", :type => :operator},
|
377
|
+
'harr' => {:value => "\u2194", :type => :operator},
|
378
|
+
'rArr' => {:value => "\u21D2", :type => :operator},
|
379
|
+
'lArr' => {:value => "\u21D0", :type => :operator},
|
380
|
+
'hArr' => {:value => "\u21D4", :type => :operator},
|
381
|
+
|
382
|
+
# Other
|
383
|
+
'sqrt' => {:value => :sqrt, :type => :unary},
|
384
|
+
'text' => {:value => :text, :type => :unary},
|
385
|
+
'bb' => {:value => :bold, :type => :font},
|
386
|
+
'bbb' => {:value => :double_struck, :type => :font},
|
387
|
+
'ii' => {:value => :italic, :type => :font},
|
388
|
+
'bii' => {:value => :bold_italic, :type => :font},
|
389
|
+
'cc' => {:value => :script, :type => :font},
|
390
|
+
'bcc' => {:value => :bold_script, :type => :font},
|
391
|
+
'tt' => {:value => :monospace, :type => :font},
|
392
|
+
'fr' => {:value => :fraktur, :type => :font},
|
393
|
+
'bfr' => {:value => :bold_fraktur, :type => :font},
|
394
|
+
'sf' => {:value => :sans_serif, :type => :font},
|
395
|
+
'bsf' => {:value => :bold_sans_serif, :type => :font},
|
396
|
+
'sfi' => {:value => :sans_serif_italic, :type => :font},
|
397
|
+
'sfbi' => {:value => :sans_serif_bold_italic, :type => :font},
|
398
|
+
'frac' => {:value => :frac, :type => :binary},
|
399
|
+
'root' => {:value => :root, :type => :binary},
|
400
|
+
'stackrel' => {:value => :over, :type => :binary},
|
401
|
+
'/' => {:value => :frac, :type => :infix},
|
402
|
+
'_' => {:value => :sub, :type => :infix},
|
403
|
+
'^' => {:value => :sup, :type => :infix},
|
404
|
+
|
405
|
+
# Greek letters
|
406
|
+
'alpha' => {:value => "\u03b1", :type => :identifier},
|
407
|
+
'Alpha' => {:value => "\u0391", :type => :identifier},
|
408
|
+
'beta' => {:value => "\u03b2", :type => :identifier},
|
409
|
+
'Beta' => {:value => "\u0392", :type => :identifier},
|
410
|
+
'gamma' => {:value => "\u03b3", :type => :identifier},
|
411
|
+
'Gamma' => {:value => "\u0393", :type => :operator},
|
412
|
+
'delta' => {:value => "\u03b4", :type => :identifier},
|
413
|
+
'Delta' => {:value => "\u0394", :type => :operator},
|
414
|
+
'epsilon' => {:value => "\u03b5", :type => :identifier},
|
415
|
+
'Epsilon' => {:value => "\u0395", :type => :identifier},
|
416
|
+
'varepsilon' => {:value => "\u025b", :type => :identifier},
|
417
|
+
'zeta' => {:value => "\u03b6", :type => :identifier},
|
418
|
+
'Zeta' => {:value => "\u0396", :type => :identifier},
|
419
|
+
'eta' => {:value => "\u03b7", :type => :identifier},
|
420
|
+
'Eta' => {:value => "\u0397", :type => :identifier},
|
421
|
+
'theta' => {:value => "\u03b8", :type => :identifier},
|
422
|
+
'Theta' => {:value => "\u0398", :type => :operator},
|
423
|
+
'vartheta' => {:value => "\u03d1", :type => :identifier},
|
424
|
+
'iota' => {:value => "\u03b9", :type => :identifier},
|
425
|
+
'Iota' => {:value => "\u0399", :type => :identifier},
|
426
|
+
'kappa' => {:value => "\u03ba", :type => :identifier},
|
427
|
+
'Kappa' => {:value => "\u039a", :type => :identifier},
|
428
|
+
'lambda' => {:value => "\u03bb", :type => :identifier},
|
429
|
+
'Lambda' => {:value => "\u039b", :type => :operator},
|
430
|
+
'mu' => {:value => "\u03bc", :type => :identifier},
|
431
|
+
'Mu' => {:value => "\u039c", :type => :identifier},
|
432
|
+
'nu' => {:value => "\u03bd", :type => :identifier},
|
433
|
+
'Nu' => {:value => "\u039d", :type => :identifier},
|
434
|
+
'xi' => {:value => "\u03be", :type => :identifier},
|
435
|
+
'Xi' => {:value => "\u039e", :type => :operator},
|
436
|
+
'omicron' => {:value => "\u03bf", :type => :identifier},
|
437
|
+
'Omicron' => {:value => "\u039f", :type => :identifier},
|
438
|
+
'pi' => {:value => "\u03c0", :type => :identifier},
|
439
|
+
'Pi' => {:value => "\u03a0", :type => :operator},
|
440
|
+
'rho' => {:value => "\u03c1", :type => :identifier},
|
441
|
+
'Rho' => {:value => "\u03a1", :type => :identifier},
|
442
|
+
'sigma' => {:value => "\u03c3", :type => :identifier},
|
443
|
+
'Sigma' => {:value => "\u03a3", :type => :operator},
|
444
|
+
'tau' => {:value => "\u03c4", :type => :identifier},
|
445
|
+
'Tau' => {:value => "\u03a4", :type => :identifier},
|
446
|
+
'upsilon' => {:value => "\u03c5", :type => :identifier},
|
447
|
+
'Upsilon' => {:value => "\u03a5", :type => :identifier},
|
448
|
+
'phi' => {:value => "\u03c6", :type => :identifier},
|
449
|
+
'Phi' => {:value => "\u03a6", :type => :identifier},
|
450
|
+
'varphi' => {:value => "\u03d5", :type => :identifier},
|
451
|
+
'chi' => {:value => '\u03b3c7', :type => :identifier},
|
452
|
+
'Chi' => {:value => '\u0393a7', :type => :identifier},
|
453
|
+
'psi' => {:value => "\u03c8", :type => :identifier},
|
454
|
+
'Psi' => {:value => "\u03a8", :type => :identifier},
|
455
|
+
'omega' => {:value => "\u03c9", :type => :identifier},
|
456
|
+
'Omega' => {:value => "\u03a9", :type => :operator},
|
457
|
+
}
|
458
|
+
|
459
|
+
def parse(input)
|
460
|
+
Expression.new(
|
461
|
+
input,
|
462
|
+
parse_expression(Tokenizer.new(input, SYMBOLS), 0)
|
463
|
+
)
|
464
|
+
end
|
465
|
+
|
466
|
+
private
|
467
|
+
def parse_expression(tok, depth)
|
468
|
+
e = []
|
469
|
+
|
470
|
+
while (s1 = parse_simple_expression(tok, depth))
|
471
|
+
t1 = tok.next_token
|
472
|
+
|
473
|
+
if t1[:type] == :infix
|
474
|
+
s2 = parse_simple_expression(tok, depth)
|
475
|
+
t2 = tok.next_token
|
476
|
+
if t1[:value] == :sub && t2[:value] == :sup
|
477
|
+
s3 = parse_simple_expression(tok, depth)
|
478
|
+
operator = s1[:underover] ? :underover : :subsup
|
479
|
+
e << {:type => :ternary, :operator => operator, :s1 => s1, :s2 => s2, :s3 => s3}
|
480
|
+
else
|
481
|
+
operator = s1[:underover] ? (t1[:value] == :sub ? :under : :over) : t1[:value]
|
482
|
+
e << {:type => :binary, :operator => operator, :s1 => s1, :s2 => s2}
|
483
|
+
tok.push_back(t2)
|
484
|
+
if (t2[:type] == :lrparen || t2[:type] == :rparen) && depth > 0
|
485
|
+
break
|
486
|
+
end
|
487
|
+
end
|
488
|
+
elsif t1[:type] == :eof
|
489
|
+
e << s1
|
490
|
+
break
|
491
|
+
else
|
492
|
+
e << s1
|
493
|
+
tok.push_back(t1)
|
494
|
+
if (t1[:type] == :lrparen || t1[:type] == :rparen) && depth > 0
|
495
|
+
break
|
496
|
+
end
|
497
|
+
end
|
498
|
+
end
|
499
|
+
|
500
|
+
e
|
501
|
+
end
|
502
|
+
|
503
|
+
def parse_simple_expression(tok, depth)
|
504
|
+
t1 = tok.next_token
|
505
|
+
|
506
|
+
case t1[:type]
|
507
|
+
when :lparen, :lrparen
|
508
|
+
t2 = tok.next_token
|
509
|
+
case t2[:type]
|
510
|
+
when :rparen, :lrparen
|
511
|
+
{:type => :paren, :e => nil, :lparen => t1[:value], :rparen => t2[:value]}
|
512
|
+
else
|
513
|
+
tok.push_back(t2)
|
514
|
+
|
515
|
+
e = parse_expression(tok, depth + 1)
|
516
|
+
|
517
|
+
t2 = tok.next_token
|
518
|
+
case t2[:type]
|
519
|
+
when :rparen, :lrparen
|
520
|
+
convert_to_matrix({:type => :paren, :e => e, :lparen => t1[:value], :rparen => t2[:value]})
|
521
|
+
else
|
522
|
+
tok.push_back(t2)
|
523
|
+
{:type => :paren, :e => e, :lparen => t1[:value]}
|
524
|
+
end
|
525
|
+
end
|
526
|
+
when :accent
|
527
|
+
s = parse_simple_expression(tok, depth)
|
528
|
+
{:type => :binary, :s1 => s, :s2 => {:type => :operator, :c => t1[:value]}, :operator => t1[:position]}
|
529
|
+
when :unary, :font
|
530
|
+
s = parse_simple_expression(tok, depth)
|
531
|
+
{:type => t1[:type], :s => s, :operator => t1[:value]}
|
532
|
+
when :binary
|
533
|
+
s1 = parse_simple_expression(tok, depth)
|
534
|
+
s2 = parse_simple_expression(tok, depth)
|
535
|
+
{:type => :binary, :s1 => s1, :s2 => s2, :operator => t1[:value]}
|
536
|
+
when :eof
|
537
|
+
nil
|
538
|
+
else
|
539
|
+
{:type => t1[:type], :c => t1[:value], :underover => t1[:underover]}
|
540
|
+
end
|
541
|
+
end
|
542
|
+
|
543
|
+
def convert_to_matrix(expression)
|
544
|
+
return expression unless matrix? expression
|
545
|
+
|
546
|
+
rows = expression[:e].select.with_index { |obj, i| i.even? }.map do |row|
|
547
|
+
row[:e].select.with_index { |obj, i| i.even? }
|
548
|
+
end
|
549
|
+
|
550
|
+
{:type => :matrix, :rows => rows, :lparen => expression[:lparen], :rparen => expression[:rparen]}
|
551
|
+
end
|
552
|
+
|
553
|
+
def matrix?(expression)
|
554
|
+
return false unless expression.is_a?(Hash) && expression[:type] == :paren
|
555
|
+
|
556
|
+
rows, separators = expression[:e].partition.with_index { |obj, i| i.even? }
|
557
|
+
|
558
|
+
rows.length > 1 &&
|
559
|
+
rows.length > separators.length &&
|
560
|
+
separators.all? { |item| item[:type] == :identifier && item[:c] == ',' } &&
|
561
|
+
(rows.all? { |item| item[:type] == :paren && item[:lparen] == '(' && item[:rparen] == ')' } ||
|
562
|
+
rows.all? { |item| item[:type] == :paren && item[:lparen] == '[' && item[:rparen] == ']' }) &&
|
563
|
+
rows.all? { |item| item[:e].length == rows[0][:e].length } &&
|
564
|
+
rows.all? { |item| matrix_cols?(item[:e]) }
|
565
|
+
end
|
566
|
+
|
567
|
+
def matrix_cols?(expression)
|
568
|
+
return false unless expression.is_a?(Array)
|
569
|
+
|
570
|
+
cols, separators = expression.partition.with_index { |obj, i| i.even? }
|
571
|
+
|
572
|
+
cols.all? { |item| item[:type] != :identifier || item[:c] != ',' } &&
|
573
|
+
separators.all? { |item| item[:type] == :identifier && item[:c] == ',' }
|
574
|
+
end
|
575
|
+
end
|
576
|
+
|
577
|
+
class Expression
|
578
|
+
def initialize(asciimath, parsed_expression)
|
579
|
+
@asciimath = asciimath
|
580
|
+
@parsed_expression = parsed_expression
|
581
|
+
end
|
582
|
+
|
583
|
+
def to_s
|
584
|
+
@asciimath
|
585
|
+
end
|
586
|
+
end
|
587
|
+
|
588
|
+
def self.parse(asciimath)
|
589
|
+
Parser.new.parse(asciimath)
|
590
|
+
end
|
591
|
+
end
|
data/lib/html2doc.rb
CHANGED
data/lib/html2doc/base.rb
CHANGED
@@ -44,6 +44,7 @@ module Html2Doc
|
|
44
44
|
end
|
45
45
|
|
46
46
|
def self.cleanup(docxml, hash)
|
47
|
+
namespace(docxml.root)
|
47
48
|
image_cleanup(docxml, hash[:dir1], File.dirname(hash[:filename]))
|
48
49
|
mathml_to_ooml(docxml)
|
49
50
|
lists(docxml, hash[:liststyles])
|
@@ -91,6 +92,12 @@ module Html2Doc
|
|
91
92
|
r.gsub!(%r{<meta http-equiv="Content-Type"},
|
92
93
|
"<meta http-equiv=Content-Type")
|
93
94
|
r.gsub!(%r{></m:jc>}, "/>")
|
95
|
+
r.gsub!(%r{></v:stroke>}, "/>")
|
96
|
+
r.gsub!(%r{></v:f>}, "/>")
|
97
|
+
r.gsub!(%r{></v:path>}, "/>")
|
98
|
+
r.gsub!(%r{></o:lock>}, "/>")
|
99
|
+
r.gsub!(%r{></v:imagedata>}, "/>")
|
100
|
+
r.gsub!(%r{></w:wrap>}, "/>")
|
94
101
|
r.gsub!(%r{&tab;|&tab;}, '<span style="mso-tab-count:1">  </span>')
|
95
102
|
r
|
96
103
|
end
|
@@ -142,7 +149,7 @@ module Html2Doc
|
|
142
149
|
css = stylesheet(hash[:filename], hash[:header_file], hash[:stylesheet])
|
143
150
|
add_stylesheet(head, title, css)
|
144
151
|
define_head1(docxml, hash[:dir1])
|
145
|
-
|
152
|
+
rootnamespace(docxml.root)
|
146
153
|
end
|
147
154
|
|
148
155
|
def self.add_stylesheet(head, title, css)
|
@@ -162,12 +169,16 @@ module Html2Doc
|
|
162
169
|
v: "urn:schemas-microsoft-com:vml",
|
163
170
|
m: "http://schemas.microsoft.com/office/2004/12/omml",
|
164
171
|
}.each { |k, v| root.add_namespace_definition(k.to_s, v) }
|
172
|
+
end
|
173
|
+
|
174
|
+
def self.rootnamespace(root)
|
165
175
|
root.add_namespace(nil, "http://www.w3.org/TR/REC-html40")
|
166
176
|
end
|
167
177
|
|
168
178
|
def self.bookmarks(docxml)
|
169
179
|
docxml.xpath("//*[@id][not(@name)][not(@style = 'mso-element:footnote')]").each do |x|
|
170
180
|
next if x["id"].empty?
|
181
|
+
next if %w(shapetype v:shapetype shape v:shape).include? x.name
|
171
182
|
if x.children.empty?
|
172
183
|
x.add_child("<a name='#{x["id"]}'></a>")
|
173
184
|
else
|
data/lib/html2doc/mime.rb
CHANGED
@@ -83,7 +83,9 @@ module Html2Doc
|
|
83
83
|
|
84
84
|
# only processes locally stored images
|
85
85
|
def self.image_cleanup(docxml, dir, localdir)
|
86
|
-
docxml.xpath(IMAGE_PATH).each do |i|
|
86
|
+
#docxml.xpath(IMAGE_PATH).each do |i|
|
87
|
+
docxml.traverse do |i|
|
88
|
+
next unless i.element? && %w(img v:imagedata).include?(i.name)
|
87
89
|
warnsvg(i["src"])
|
88
90
|
next if /^http/.match i["src"]
|
89
91
|
local_filename = File.join(localdir, i["src"])
|
data/lib/html2doc/version.rb
CHANGED
data/spec/html2doc_spec.rb
CHANGED
@@ -383,7 +383,7 @@ RSpec.describe Html2Doc do
|
|
383
383
|
end
|
384
384
|
|
385
385
|
it "processes AsciiMath" do
|
386
|
-
Html2Doc.process(html_input(
|
386
|
+
Html2Doc.process(html_input(%[<div>{{sum_(i=1)^n i^3=((n(n+1))/2)^2 text("integer"))}}</div>]), filename: "test", asciimathdelims: ["{{", "}}"])
|
387
387
|
expect(guid_clean(File.read("test.doc", encoding: "utf-8"))).
|
388
388
|
to match_fuzzy(<<~OUTPUT)
|
389
389
|
#{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
|
@@ -401,6 +401,7 @@ RSpec.describe Html2Doc do
|
|
401
401
|
</m:num><m:den><m:r><m:t>2</m:t></m:r></m:den></m:f>
|
402
402
|
<m:r><m:t>)</m:t></m:r>
|
403
403
|
</m:e><m:sup><m:r><m:t>2</m:t></m:r></m:sup></m:sSup>
|
404
|
+
<m:r><m:rPr><m:nor></m:nor></m:rPr><m:t>"integer"</m:t></m:r>
|
404
405
|
</m:oMath>
|
405
406
|
</div>', '<div style="mso-element:footnote-list"/>')}
|
406
407
|
#{WORD_FTR1}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html2doc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-01-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: htmlentities
|
@@ -128,14 +128,14 @@ dependencies:
|
|
128
128
|
requirements:
|
129
129
|
- - "~>"
|
130
130
|
- !ruby/object:Gem::Version
|
131
|
-
version:
|
131
|
+
version: 2.0.1
|
132
132
|
type: :development
|
133
133
|
prerelease: false
|
134
134
|
version_requirements: !ruby/object:Gem::Requirement
|
135
135
|
requirements:
|
136
136
|
- - "~>"
|
137
137
|
- !ruby/object:Gem::Version
|
138
|
-
version:
|
138
|
+
version: 2.0.1
|
139
139
|
- !ruby/object:Gem::Dependency
|
140
140
|
name: byebug
|
141
141
|
requirement: !ruby/object:Gem::Requirement
|
@@ -224,16 +224,16 @@ dependencies:
|
|
224
224
|
name: rubocop
|
225
225
|
requirement: !ruby/object:Gem::Requirement
|
226
226
|
requirements:
|
227
|
-
- -
|
227
|
+
- - '='
|
228
228
|
- !ruby/object:Gem::Version
|
229
|
-
version:
|
229
|
+
version: 0.54.0
|
230
230
|
type: :development
|
231
231
|
prerelease: false
|
232
232
|
version_requirements: !ruby/object:Gem::Requirement
|
233
233
|
requirements:
|
234
|
-
- -
|
234
|
+
- - '='
|
235
235
|
- !ruby/object:Gem::Version
|
236
|
-
version:
|
236
|
+
version: 0.54.0
|
237
237
|
- !ruby/object:Gem::Dependency
|
238
238
|
name: simplecov
|
239
239
|
requirement: !ruby/object:Gem::Requirement
|
@@ -306,6 +306,11 @@ files:
|
|
306
306
|
- bin/rspec
|
307
307
|
- bin/setup
|
308
308
|
- html2doc.gemspec
|
309
|
+
- lib/asciimath/cli.rb
|
310
|
+
- lib/asciimath/html.rb
|
311
|
+
- lib/asciimath/mathml.rb
|
312
|
+
- lib/asciimath/parser.rb
|
313
|
+
- lib/asciimath/version.rb
|
309
314
|
- lib/html2doc.rb
|
310
315
|
- lib/html2doc/base.rb
|
311
316
|
- lib/html2doc/lists.rb
|