rsyntaxtree 1.0.8 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,237 +1,221 @@
1
- #!/usr/bin/env ruby
2
- # -*- coding: utf-8 -*-
3
-
4
- #==========================
5
- # string_parser.rb
6
- #==========================
7
- #
8
- # Parses a phrase into leafs and nodes and store the result in an element list
9
- # (see element_list.rb)
10
- # Copyright (c) 2007-2021 Yoichiro Hasebe <yohasebe@gmail.com>
11
-
12
- require 'elementlist'
13
- require 'element'
14
- require 'utils'
15
-
16
- module RSyntaxTree
17
- class StringParser
18
-
19
- attr_accessor :data, :elist, :pos, :id, :level
20
- def initialize(str, fontset, fontsize)
21
- # Clean up the data a little to make processing easier
22
- # repeated newlines => a newline
23
- string = str.gsub(/[\n\r]+/m, "\n")
24
- # a backslash followed by a newline => a backslash followed by an 'n'
25
- string.gsub!(/\\\n\s*/m, "\\n")
26
- # repeated whitespace characters => " "
27
- string.gsub!(/\s+/, " ")
28
- string.gsub!(/\]\s+\[/, "][")
29
- string.gsub!(/\s+\[/, "[")
30
- string.gsub!(/\[\s+/, "[")
31
- string.gsub!(/\s+\]/, "]")
32
- string.gsub!(/\]\s+/, "]")
33
- string.gsub!(/<(\d*)>/) do
34
- num_padding = $1.to_i
35
- if num_padding > 0
36
- result = WHITESPACE_BLOCK * num_padding
37
- else
38
- result = WHITESPACE_BLOCK
39
- end
40
- result
41
- end
42
-
43
- @data = string # Store it for later...
44
- if @data.contains_cjk?
45
- fontset[:normal] = fontset[:cjk]
46
- end
47
- @elist = ElementList.new # Initialize internal element list
48
- @pos = 0 # Position in the sentence
49
- @id = 1 # ID for the next element
50
- @level = 0 # Level in the diagram
51
- @fontset = fontset
52
- @fontsize = fontsize
53
- end
54
-
55
- def self.valid?(data)
56
- if(data.length < 1)
57
- raise RSTError, "Error: input text is empty"
58
- end
59
-
60
- if /\[\s*\]/m =~ data
61
- raise RSTError, "Error: inside the brackets is empty"
62
- end
63
-
64
- text = data.strip
65
- text_r = text.split(//)
66
- open_br, close_br = [], []
67
- escape = false
68
- text_r.each do |chr|
69
- if chr == "\\"
70
- if escape
71
- escape = false
72
- else
73
- escape = true
74
- end
75
- next
76
- end
77
-
78
- if escape && /[\[\]]/ =~ chr
79
- escape = false
80
- next
81
- elsif chr == '['
82
- open_br.push(chr)
83
- elsif chr == ']'
84
- close_br.push(chr)
85
- if open_br.length < close_br.length
86
- break
87
- end
88
- end
89
- escape = false
90
- end
91
-
92
- if open_br.empty? && close_br.empty?
93
- raise RSTError, "Error: input text does not contain paired brackets"
94
- elsif open_br.length == close_br.length
95
- return true
96
- else
97
- raise RSTError, "Error: open and close brackets do not match"
98
- end
99
- end
100
-
101
- def parse
102
- make_tree(0);
103
- @elist.set_hierarchy
104
- end
105
-
106
- def get_elementlist
107
- @elist;
108
- end
109
-
110
- def get_next_token
111
- data = @data.split(//)
112
- gottoken = false
113
- token = ""
114
- i = 0
115
-
116
- if((@pos + 1) >= data.length)
117
- return ""
118
- end
119
-
120
- escape = false
121
- while(((@pos + i) < data.length) && !gottoken)
122
- ch = data[@pos + i];
123
- case ch
124
- when "["
125
- if escape
126
- token += ch
127
- escape = false
128
- else
129
- if(i > 0)
130
- gottoken = true
131
- else
132
- token += ch
133
- end
134
- end
135
- when "]"
136
- if escape
137
- token += ch
138
- escape = false
139
- else
140
- if(i == 0 )
141
- token += ch
142
- end
143
- gottoken = true
144
- end
145
- when "\\"
146
- if escape
147
- token += '\\\\'
148
- escape = false
149
- else
150
- escape = true
151
- end
152
- when " "
153
- if escape
154
- token += '\\n'
155
- escape = false
156
- else
157
- token += ch
158
- end
159
- when /[n{}<>^+*_=~\|\-]/
160
- if escape
161
- token += '\\' + ch
162
- escape = false
163
- else
164
- token += ch
165
- end
166
- else
167
- if escape
168
- token += ch
169
- escape = false
170
- else
171
- token += ch
172
- end
173
- end
174
- i += 1
175
- end
176
-
177
- if(i > 1)
178
- @pos += (i - 1)
179
- else
180
- @pos += 1
181
- end
182
- return token
183
- end
184
-
185
- def make_tree(parent)
186
- token = get_next_token.strip
187
- parts = Array.new
188
-
189
- while(token != "" && token != "]" )
190
- token_r = token.split(//)
191
- case token_r[0]
192
- when "["
193
- tl = token_r.length
194
- token_r = token_r[1, tl - 1]
195
- spaceat = token_r.index(" ")
196
- newparent = -1
197
-
198
- if spaceat
199
- parts[0] = token_r[0, spaceat].join
200
-
201
- tl =token_r.length
202
- parts[1] = token_r[spaceat, tl - spaceat].join
203
-
204
- element = Element.new(@id, parent, parts[0], @level, @fontset, @fontsize)
205
- @id += 1
206
- @elist.add(element)
207
- newparent = element.id
208
-
209
- element = Element.new(@id, @id - 1, parts[1], @level + 1, @fontset, @fontsize)
210
- @id += 1
211
- @elist.add(element)
212
- else
213
- joined = token_r.join
214
- element = Element.new(@id, parent, joined, @level, @fontset, @fontsize)
215
- @id += 1
216
- newparent = element.id
217
- @elist.add(element)
218
- end
219
-
220
- @level += 1
221
- make_tree(newparent)
222
-
223
- else
224
- if token.strip != ""
225
- element = Element.new(@id, parent, token, @level, @fontset, @fontsize)
226
- @id += 1
227
- @elist.add(element)
228
- end
229
- end
230
-
231
- token = get_next_token
232
- end
233
- @level -= 1
234
- end
235
- end
236
- end
237
-
1
+ # frozen_string_literal: true
2
+
3
+ #==========================
4
+ # string_parser.rb
5
+ #==========================
6
+ #
7
+ # Parses a phrase into leafs and nodes and store the result in an element list
8
+ # (see element_list.rb)
9
+ # Copyright (c) 2007-2023 Yoichiro Hasebe <yohasebe@gmail.com>
10
+
11
+ require_relative 'elementlist'
12
+ require_relative 'element'
13
+ require_relative 'utils'
14
+
15
+ module RSyntaxTree
16
+ class StringParser
17
+ attr_accessor :data, :elist, :pos, :id, :level
18
+
19
+ def initialize(str, fontset, fontsize, global)
20
+ @global = global
21
+ # Clean up the data a little to make processing easier
22
+ # repeated newlines => a newline
23
+ string = str.gsub(/[\n\r]+/m, "\n")
24
+ # a backslash followed by a newline => a backslash followed by an 'n'
25
+ string.gsub!(/\\\n\s*/m, "\\n")
26
+ # repeated whitespace characters => " "
27
+ string.gsub!(/\s+/, " ")
28
+ string.gsub!(/\]\s+\[/, "][")
29
+ string.gsub!(/\s+\[/, "[")
30
+ string.gsub!(/\[\s+/, "[")
31
+ string.gsub!(/\s+\]/, "]")
32
+ string.gsub!(/\]\s+/, "]")
33
+ string.gsub!(/<(\d*)>/) do
34
+ num_padding = $1.to_i
35
+ result = if num_padding.positive?
36
+ WHITESPACE_BLOCK * num_padding
37
+ else
38
+ WHITESPACE_BLOCK
39
+ end
40
+ result
41
+ end
42
+
43
+ @data = string # Store it for later...
44
+ fontset[:normal] = fontset[:cjk] if @data.contains_cjk?
45
+ @elist = ElementList.new # Initialize internal element list
46
+ @pos = 0 # Position in the sentence
47
+ @id = 1 # ID for the next element
48
+ @level = 0 # Level in the diagram
49
+ @fontset = fontset
50
+ @fontsize = fontsize
51
+ end
52
+
53
+ def self.valid?(data)
54
+ raise RSTError, "Error: input text is empty" if data.empty?
55
+
56
+ if /\[\s*\]/m =~ data
57
+ raise RSTError, "Error: inside the brackets is empty"
58
+ end
59
+
60
+ text = data.strip
61
+ text_r = text.split(//)
62
+ open_br = []
63
+ close_br = []
64
+ escape = false
65
+ text_r.each do |chr|
66
+ if chr == "\\"
67
+ escape = if escape
68
+ false
69
+ else
70
+ true
71
+ end
72
+ next
73
+ end
74
+
75
+ if escape && /[\[\]]/ =~ chr
76
+ escape = false
77
+ next
78
+ elsif chr == '['
79
+ open_br.push(chr)
80
+ elsif chr == ']'
81
+ close_br.push(chr)
82
+ break if open_br.length < close_br.length
83
+ end
84
+ escape = false
85
+ end
86
+
87
+ if open_br.empty? && close_br.empty?
88
+ raise RSTError, "Error: input text does not contain paired brackets"
89
+ elsif open_br.length == close_br.length
90
+ true
91
+ else
92
+ raise RSTError, "Error: open and close brackets do not match"
93
+ end
94
+ end
95
+
96
+ def parse
97
+ make_tree(0);
98
+ @elist.set_hierarchy
99
+ end
100
+
101
+ def get_elementlist
102
+ @elist;
103
+ end
104
+
105
+ def get_next_token
106
+ data = @data.split(//)
107
+ gottoken = false
108
+ token = ""
109
+ i = 0
110
+
111
+ return "" if (@pos + 1) >= data.length
112
+
113
+ escape = false
114
+ while ((@pos + i) < data.length) && !gottoken
115
+ ch = data[@pos + i];
116
+ case ch
117
+ when "["
118
+ if escape
119
+ token += ch
120
+ escape = false
121
+ elsif i.positive?
122
+ gottoken = true
123
+ else
124
+ token += ch
125
+ end
126
+ when "]"
127
+ if escape
128
+ token += ch
129
+ escape = false
130
+ else
131
+ token += ch if i.zero?
132
+ gottoken = true
133
+ end
134
+ when "\\"
135
+ if escape
136
+ token += '\\\\'
137
+ escape = false
138
+ else
139
+ escape = true
140
+ end
141
+ when " "
142
+ if escape
143
+ token += '\\n'
144
+ escape = false
145
+ else
146
+ token += ch
147
+ end
148
+ when /[n{}<>^+*_=~|-]/
149
+ if escape
150
+ token += '\\' + ch
151
+ escape = false
152
+ else
153
+ token += ch
154
+ end
155
+ else
156
+ if escape
157
+ token += ch
158
+ escape = false
159
+ else
160
+ token += ch
161
+ end
162
+ end
163
+ i += 1
164
+ end
165
+
166
+ @pos += if i > 1
167
+ i - 1
168
+ else
169
+ 1
170
+ end
171
+ token
172
+ end
173
+
174
+ def make_tree(parent)
175
+ token = get_next_token.strip
176
+ parts = []
177
+
178
+ while token != "" && token != "]"
179
+ token_r = token.split(//)
180
+ case token_r[0]
181
+ when "["
182
+ tl = token_r.length
183
+ token_r = token_r[1, tl - 1]
184
+ spaceat = token_r.index(" ")
185
+ newparent = -1
186
+
187
+ if spaceat
188
+ parts[0] = token_r[0, spaceat].join
189
+
190
+ tl = token_r.length
191
+ parts[1] = token_r[spaceat, tl - spaceat].join
192
+
193
+ element = Element.new(@id, parent, parts[0], @level, @fontset, @fontsize, @global)
194
+ @id += 1
195
+ @elist.add(element)
196
+ newparent = element.id
197
+
198
+ element = Element.new(@id, @id - 1, parts[1], @level + 1, @fontset, @fontsize, @global)
199
+ @id += 1
200
+ else
201
+ joined = token_r.join
202
+ element = Element.new(@id, parent, joined, @level, @fontset, @fontsize, @global)
203
+ @id += 1
204
+ newparent = element.id
205
+ end
206
+ @elist.add(element)
207
+ @level += 1
208
+ make_tree(newparent)
209
+ else
210
+ if token.strip != ""
211
+ element = Element.new(@id, parent, token, @level, @fontset, @fontsize, @global)
212
+ @id += 1
213
+ @elist.add(element)
214
+ end
215
+ end
216
+ token = get_next_token
217
+ end
218
+ @level -= 1
219
+ end
220
+ end
221
+ end