rsyntaxtree 0.8.8 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.tags +203 -0
- data/Gemfile +2 -0
- data/README.md +0 -1
- data/Rakefile +7 -0
- data/bin/rsyntaxtree +38 -31
- data/fonts/NotoSansJP-Bold.otf +0 -0
- data/fonts/NotoSansJP-Regular.otf +0 -0
- data/fonts/NotoSansMath-Regular.ttf +0 -0
- data/fonts/NotoSerifJP-Bold.otf +0 -0
- data/fonts/NotoSerifJP-Regular.otf +0 -0
- data/fonts/OpenMoji-Black.ttf +0 -0
- data/fonts/OpenMoji-Color.ttf +0 -0
- data/lib/rsyntaxtree/base_graph.rb +262 -0
- data/lib/rsyntaxtree/element.rb +155 -25
- data/lib/rsyntaxtree/elementlist.rb +16 -13
- data/lib/rsyntaxtree/markup_parser.rb +208 -0
- data/lib/rsyntaxtree/string_parser.rb +190 -197
- data/lib/rsyntaxtree/svg_graph.rb +450 -260
- data/lib/rsyntaxtree/utils.rb +49 -6
- data/lib/rsyntaxtree/version.rb +1 -1
- data/lib/rsyntaxtree.rb +143 -154
- data/rsyntaxtree.gemspec +2 -0
- data/test/markup_parser_test.rb +207 -0
- metadata +41 -13
- data/fonts/NotoSansCJKjp-Regular.otf +0 -0
- data/fonts/NotoSerifCJKjp-Regular.otf +0 -0
- data/fonts/latinmodern-math.otf +0 -0
- data/fonts/lmroman10-bold.otf +0 -0
- data/fonts/lmroman10-bolditalic.otf +0 -0
- data/fonts/lmroman10-italic.otf +0 -0
- data/fonts/lmroman10-regular.otf +0 -0
- data/lib/rsyntaxtree/error_message.rb +0 -68
- data/lib/rsyntaxtree/graph.rb +0 -307
- data/lib/rsyntaxtree/tree_graph.rb +0 -309
@@ -7,241 +7,234 @@
|
|
7
7
|
#
|
8
8
|
# Parses a phrase into leafs and nodes and store the result in an element list
|
9
9
|
# (see element_list.rb)
|
10
|
-
#
|
11
|
-
# This file is part of RSyntaxTree, which is a ruby port of Andre Eisenbach's
|
12
|
-
# excellent program phpSyntaxTree.
|
13
|
-
#
|
14
10
|
# Copyright (c) 2007-2021 Yoichiro Hasebe <yohasebe@gmail.com>
|
15
|
-
# Copyright (c) 2003-2004 Andre Eisenbach <andre@ironcreek.net>
|
16
11
|
|
17
12
|
require 'elementlist'
|
18
13
|
require 'element'
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
#
|
27
|
-
#
|
28
|
-
|
29
|
-
#
|
30
|
-
|
31
|
-
#
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
@elist = ElementList.new # Initialize internal element list
|
45
|
-
@pos = 0 # Position in the sentence
|
46
|
-
@id = 1 # ID for the next element
|
47
|
-
@level = 0 # Level in the diagram
|
48
|
-
@tncnt = Hash.new # Node type counts
|
49
|
-
end
|
50
|
-
|
51
|
-
# caution: quick and dirty solution
|
52
|
-
def valid?
|
53
|
-
if(@data.length < 1)
|
54
|
-
return false
|
55
|
-
end
|
56
|
-
|
57
|
-
if /\[\s*\]/m =~ @data
|
58
|
-
return false
|
59
|
-
end
|
60
|
-
|
61
|
-
if /\[\_/ =~ @data
|
62
|
-
return false
|
63
|
-
end
|
64
|
-
|
65
|
-
text = @data.strip
|
66
|
-
text_r = text.split(//)
|
67
|
-
open_br, close_br = [], []
|
68
|
-
escape = false
|
69
|
-
text_r.each do |chr|
|
70
|
-
if chr == "\\"
|
71
|
-
escape = true
|
72
|
-
elsif chr == '[' && !escape
|
73
|
-
open_br.push(chr)
|
74
|
-
elsif chr == ']' && !escape
|
75
|
-
close_br.push(chr)
|
76
|
-
if open_br.length < close_br.length
|
77
|
-
break
|
14
|
+
require 'utils'
|
15
|
+
|
16
|
+
module RSyntaxTree
|
17
|
+
class StringParser
|
18
|
+
|
19
|
+
attr_accessor :data, :elist, :pos, :id, :level
|
20
|
+
def initialize(str, fontset, fontsize)
|
21
|
+
# Clean up the data a little to make processing easier
|
22
|
+
# repeated newlines => a newline
|
23
|
+
string = str.gsub(/[\n\r]+/m, "\n")
|
24
|
+
# a backslash followed by a newline => a backslash followed by an 'n'
|
25
|
+
string.gsub!(/\\\n\s*/m, "\\n")
|
26
|
+
# repeated whitespace characters => " "
|
27
|
+
string.gsub!(/\s+/, " ")
|
28
|
+
string.gsub!(/\]\s+\[/, "][")
|
29
|
+
string.gsub!(/\s+\[/, "[")
|
30
|
+
string.gsub!(/\[\s+/, "[")
|
31
|
+
string.gsub!(/\s+\]/, "]")
|
32
|
+
string.gsub!(/\]\s+/, "]")
|
33
|
+
string.gsub!(/<(\d*)>/) do
|
34
|
+
num_padding = $1.to_i
|
35
|
+
if num_padding > 0
|
36
|
+
result = WHITESPACE_BLOCK * num_padding
|
37
|
+
else
|
38
|
+
result = WHITESPACE_BLOCK
|
78
39
|
end
|
79
|
-
|
80
|
-
escape = false
|
40
|
+
result
|
81
41
|
end
|
82
|
-
end
|
83
|
-
|
84
|
-
return false unless open_br.length == close_br.length
|
85
|
-
# make_tree(0)
|
86
|
-
# return false if @tncnt.empty?
|
87
|
-
# @tncnt.each do |key, value|
|
88
|
-
# return false if key == ""
|
89
|
-
# end
|
90
|
-
return true
|
91
|
-
end
|
92
|
-
|
93
|
-
def parse
|
94
|
-
make_tree(0);
|
95
|
-
end
|
96
42
|
|
97
|
-
|
98
|
-
|
99
|
-
|
43
|
+
@data = string # Store it for later...
|
44
|
+
if @data.contains_cjk?
|
45
|
+
fontset[:normal] = fontset[:cjk]
|
46
|
+
end
|
47
|
+
@elist = ElementList.new # Initialize internal element list
|
48
|
+
@pos = 0 # Position in the sentence
|
49
|
+
@id = 1 # ID for the next element
|
50
|
+
@level = 0 # Level in the diagram
|
51
|
+
@fontset = fontset
|
52
|
+
@fontsize = fontsize
|
53
|
+
end
|
100
54
|
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
if(element.type == ETYPE_NODE)
|
106
|
-
count = 1
|
107
|
-
content = element.content
|
55
|
+
def self.valid?(data)
|
56
|
+
if(data.length < 1)
|
57
|
+
raise RSTError, "Error: input text is empty"
|
58
|
+
end
|
108
59
|
|
109
|
-
|
110
|
-
|
111
|
-
|
60
|
+
if /\[\s*\]/m =~ data
|
61
|
+
raise RSTError, "Error: inside the brackets is empty"
|
62
|
+
end
|
112
63
|
|
113
|
-
|
114
|
-
|
115
|
-
|
64
|
+
text = data.strip
|
65
|
+
text_r = text.split(//)
|
66
|
+
open_br, close_br = [], []
|
67
|
+
escape = false
|
68
|
+
text_r.each do |chr|
|
69
|
+
if chr == "\\"
|
70
|
+
if escape
|
71
|
+
escape = false
|
116
72
|
else
|
117
|
-
|
73
|
+
escape = true
|
118
74
|
end
|
75
|
+
next
|
76
|
+
end
|
119
77
|
|
120
|
-
|
78
|
+
if escape && /[\[\]]/ =~ chr
|
79
|
+
escape = false
|
80
|
+
next
|
81
|
+
elsif chr == '['
|
82
|
+
open_br.push(chr)
|
83
|
+
elsif chr == ']'
|
84
|
+
close_br.push(chr)
|
85
|
+
if open_br.length < close_br.length
|
86
|
+
break
|
87
|
+
end
|
121
88
|
end
|
89
|
+
escape = false
|
90
|
+
end
|
122
91
|
|
92
|
+
if open_br.empty? && close_br.empty?
|
93
|
+
raise RSTError, "Error: input text does not contain paired brackets"
|
94
|
+
elsif open_br.length == close_br.length
|
95
|
+
return true
|
96
|
+
else
|
97
|
+
raise RSTError, "Error: open and close brackets do not match"
|
123
98
|
end
|
124
|
-
end
|
125
|
-
@tncnt
|
126
|
-
end
|
127
|
-
|
128
|
-
def count_node(name)
|
129
|
-
name = name.strip
|
130
|
-
if @tncnt[name]
|
131
|
-
@tncnt[name] += 1
|
132
|
-
else
|
133
|
-
@tncnt[name] = 1
|
134
99
|
end
|
135
|
-
end
|
136
100
|
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
i = 0
|
101
|
+
def parse
|
102
|
+
make_tree(0);
|
103
|
+
@elist.set_hierarchy
|
104
|
+
end
|
142
105
|
|
143
|
-
|
144
|
-
|
106
|
+
def get_elementlist
|
107
|
+
@elist;
|
145
108
|
end
|
146
109
|
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
110
|
+
def get_next_token
|
111
|
+
data = @data.split(//)
|
112
|
+
gottoken = false
|
113
|
+
token = ""
|
114
|
+
i = 0
|
115
|
+
|
116
|
+
if((@pos + 1) >= data.length)
|
117
|
+
return ""
|
118
|
+
end
|
119
|
+
|
120
|
+
escape = false
|
121
|
+
while(((@pos + i) < data.length) && !gottoken)
|
122
|
+
ch = data[@pos + i];
|
123
|
+
case ch
|
124
|
+
when "["
|
125
|
+
if escape
|
126
|
+
token += ch
|
127
|
+
escape = false
|
128
|
+
else
|
129
|
+
if(i > 0)
|
130
|
+
gottoken = true
|
131
|
+
else
|
132
|
+
token += ch
|
133
|
+
end
|
134
|
+
end
|
135
|
+
when "]"
|
136
|
+
if escape
|
137
|
+
token += ch
|
138
|
+
escape = false
|
139
|
+
else
|
140
|
+
if(i == 0 )
|
141
|
+
token += ch
|
142
|
+
end
|
157
143
|
gottoken = true
|
144
|
+
end
|
145
|
+
when "\\"
|
146
|
+
if escape
|
147
|
+
token += '\\\\'
|
148
|
+
escape = false
|
149
|
+
else
|
150
|
+
escape = true
|
151
|
+
end
|
152
|
+
when " "
|
153
|
+
if escape
|
154
|
+
token += '\\n'
|
155
|
+
escape = false
|
156
|
+
else
|
157
|
+
token += ch
|
158
|
+
end
|
159
|
+
when /[n{}<>^+*_=~\|\-]/
|
160
|
+
if escape
|
161
|
+
token += '\\' + ch
|
162
|
+
escape = false
|
158
163
|
else
|
159
164
|
token += ch
|
160
165
|
end
|
161
|
-
end
|
162
|
-
when "]"
|
163
|
-
if escape
|
164
|
-
token += ch
|
165
|
-
escape = false
|
166
166
|
else
|
167
|
-
if
|
167
|
+
if escape
|
168
|
+
token += ch
|
169
|
+
escape = false
|
170
|
+
else
|
168
171
|
token += ch
|
169
172
|
end
|
170
|
-
gottoken = true
|
171
173
|
end
|
172
|
-
|
173
|
-
escape = true
|
174
|
-
when /[\n\r]/
|
175
|
-
gottoken = false # same as do nothing
|
176
|
-
else
|
177
|
-
token += ch
|
178
|
-
escape = false if escape
|
174
|
+
i += 1
|
179
175
|
end
|
180
|
-
i += 1
|
181
|
-
end
|
182
176
|
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
177
|
+
if(i > 1)
|
178
|
+
@pos += (i - 1)
|
179
|
+
else
|
180
|
+
@pos += 1
|
181
|
+
end
|
182
|
+
return token
|
187
183
|
end
|
188
|
-
return token
|
189
|
-
end
|
190
184
|
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
end
|
185
|
+
def make_tree(parent)
|
186
|
+
token = get_next_token.strip
|
187
|
+
parts = Array.new
|
188
|
+
|
189
|
+
while(token != "" && token != "]" )
|
190
|
+
token_r = token.split(//)
|
191
|
+
case token_r[0]
|
192
|
+
when "["
|
193
|
+
tl = token_r.length
|
194
|
+
token_r = token_r[1, tl - 1]
|
195
|
+
spaceat = token_r.index(" ")
|
196
|
+
newparent = -1
|
197
|
+
|
198
|
+
if spaceat
|
199
|
+
parts[0] = token_r[0, spaceat].join
|
200
|
+
# parts[0] = parts[0].gsub("\\>", "﹥").gsub("\\<", "﹤")
|
201
|
+
|
202
|
+
tl =token_r.length
|
203
|
+
parts[1] = token_r[spaceat, tl - spaceat].join
|
204
|
+
# parts[1] = parts[1].gsub("\\>", "﹥").gsub("\\<", "﹤")
|
205
|
+
|
206
|
+
element = Element.new(@id, parent, parts[0], @level, @fontset, @fontsize)
|
207
|
+
@id += 1
|
208
|
+
@elist.add(element)
|
209
|
+
newparent = element.id
|
210
|
+
|
211
|
+
element = Element.new(@id, @id - 1, parts[1], @level + 1, @fontset, @fontsize)
|
212
|
+
@id += 1
|
213
|
+
@elist.add(element)
|
214
|
+
else
|
215
|
+
# joined = token_r.join.gsub("\\>", "﹥").gsub("\\<", "﹤")
|
216
|
+
joined = token_r.join
|
217
|
+
element = Element.new(@id, parent, joined, @level, @fontset, @fontsize)
|
218
|
+
@id += 1
|
219
|
+
newparent = element.id
|
220
|
+
@elist.add(element)
|
221
|
+
end
|
229
222
|
|
230
|
-
|
231
|
-
|
223
|
+
@level += 1
|
224
|
+
make_tree(newparent)
|
232
225
|
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
226
|
+
else
|
227
|
+
if token.strip != ""
|
228
|
+
element = Element.new(@id, parent, token, @level, @fontset, @fontsize)
|
229
|
+
@id += 1
|
230
|
+
@elist.add(element)
|
231
|
+
end
|
239
232
|
end
|
240
|
-
end
|
241
233
|
|
242
|
-
|
234
|
+
token = get_next_token
|
235
|
+
end
|
236
|
+
@level -= 1
|
243
237
|
end
|
244
|
-
@level -= 1
|
245
238
|
end
|
246
239
|
end
|
247
240
|
|