rsyntaxtree 1.0.8 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,237 +1,221 @@
1
- #!/usr/bin/env ruby
2
- # -*- coding: utf-8 -*-
3
-
4
- #==========================
5
- # string_parser.rb
6
- #==========================
7
- #
8
- # Parses a phrase into leafs and nodes and store the result in an element list
9
- # (see element_list.rb)
10
- # Copyright (c) 2007-2021 Yoichiro Hasebe <yohasebe@gmail.com>
11
-
12
- require 'elementlist'
13
- require 'element'
14
- require 'utils'
15
-
16
- module RSyntaxTree
17
- class StringParser
18
-
19
- attr_accessor :data, :elist, :pos, :id, :level
20
- def initialize(str, fontset, fontsize)
21
- # Clean up the data a little to make processing easier
22
- # repeated newlines => a newline
23
- string = str.gsub(/[\n\r]+/m, "\n")
24
- # a backslash followed by a newline => a backslash followed by an 'n'
25
- string.gsub!(/\\\n\s*/m, "\\n")
26
- # repeated whitespace characters => " "
27
- string.gsub!(/\s+/, " ")
28
- string.gsub!(/\]\s+\[/, "][")
29
- string.gsub!(/\s+\[/, "[")
30
- string.gsub!(/\[\s+/, "[")
31
- string.gsub!(/\s+\]/, "]")
32
- string.gsub!(/\]\s+/, "]")
33
- string.gsub!(/<(\d*)>/) do
34
- num_padding = $1.to_i
35
- if num_padding > 0
36
- result = WHITESPACE_BLOCK * num_padding
37
- else
38
- result = WHITESPACE_BLOCK
39
- end
40
- result
41
- end
42
-
43
- @data = string # Store it for later...
44
- if @data.contains_cjk?
45
- fontset[:normal] = fontset[:cjk]
46
- end
47
- @elist = ElementList.new # Initialize internal element list
48
- @pos = 0 # Position in the sentence
49
- @id = 1 # ID for the next element
50
- @level = 0 # Level in the diagram
51
- @fontset = fontset
52
- @fontsize = fontsize
53
- end
54
-
55
- def self.valid?(data)
56
- if(data.length < 1)
57
- raise RSTError, "Error: input text is empty"
58
- end
59
-
60
- if /\[\s*\]/m =~ data
61
- raise RSTError, "Error: inside the brackets is empty"
62
- end
63
-
64
- text = data.strip
65
- text_r = text.split(//)
66
- open_br, close_br = [], []
67
- escape = false
68
- text_r.each do |chr|
69
- if chr == "\\"
70
- if escape
71
- escape = false
72
- else
73
- escape = true
74
- end
75
- next
76
- end
77
-
78
- if escape && /[\[\]]/ =~ chr
79
- escape = false
80
- next
81
- elsif chr == '['
82
- open_br.push(chr)
83
- elsif chr == ']'
84
- close_br.push(chr)
85
- if open_br.length < close_br.length
86
- break
87
- end
88
- end
89
- escape = false
90
- end
91
-
92
- if open_br.empty? && close_br.empty?
93
- raise RSTError, "Error: input text does not contain paired brackets"
94
- elsif open_br.length == close_br.length
95
- return true
96
- else
97
- raise RSTError, "Error: open and close brackets do not match"
98
- end
99
- end
100
-
101
- def parse
102
- make_tree(0);
103
- @elist.set_hierarchy
104
- end
105
-
106
- def get_elementlist
107
- @elist;
108
- end
109
-
110
- def get_next_token
111
- data = @data.split(//)
112
- gottoken = false
113
- token = ""
114
- i = 0
115
-
116
- if((@pos + 1) >= data.length)
117
- return ""
118
- end
119
-
120
- escape = false
121
- while(((@pos + i) < data.length) && !gottoken)
122
- ch = data[@pos + i];
123
- case ch
124
- when "["
125
- if escape
126
- token += ch
127
- escape = false
128
- else
129
- if(i > 0)
130
- gottoken = true
131
- else
132
- token += ch
133
- end
134
- end
135
- when "]"
136
- if escape
137
- token += ch
138
- escape = false
139
- else
140
- if(i == 0 )
141
- token += ch
142
- end
143
- gottoken = true
144
- end
145
- when "\\"
146
- if escape
147
- token += '\\\\'
148
- escape = false
149
- else
150
- escape = true
151
- end
152
- when " "
153
- if escape
154
- token += '\\n'
155
- escape = false
156
- else
157
- token += ch
158
- end
159
- when /[n{}<>^+*_=~\|\-]/
160
- if escape
161
- token += '\\' + ch
162
- escape = false
163
- else
164
- token += ch
165
- end
166
- else
167
- if escape
168
- token += ch
169
- escape = false
170
- else
171
- token += ch
172
- end
173
- end
174
- i += 1
175
- end
176
-
177
- if(i > 1)
178
- @pos += (i - 1)
179
- else
180
- @pos += 1
181
- end
182
- return token
183
- end
184
-
185
- def make_tree(parent)
186
- token = get_next_token.strip
187
- parts = Array.new
188
-
189
- while(token != "" && token != "]" )
190
- token_r = token.split(//)
191
- case token_r[0]
192
- when "["
193
- tl = token_r.length
194
- token_r = token_r[1, tl - 1]
195
- spaceat = token_r.index(" ")
196
- newparent = -1
197
-
198
- if spaceat
199
- parts[0] = token_r[0, spaceat].join
200
-
201
- tl =token_r.length
202
- parts[1] = token_r[spaceat, tl - spaceat].join
203
-
204
- element = Element.new(@id, parent, parts[0], @level, @fontset, @fontsize)
205
- @id += 1
206
- @elist.add(element)
207
- newparent = element.id
208
-
209
- element = Element.new(@id, @id - 1, parts[1], @level + 1, @fontset, @fontsize)
210
- @id += 1
211
- @elist.add(element)
212
- else
213
- joined = token_r.join
214
- element = Element.new(@id, parent, joined, @level, @fontset, @fontsize)
215
- @id += 1
216
- newparent = element.id
217
- @elist.add(element)
218
- end
219
-
220
- @level += 1
221
- make_tree(newparent)
222
-
223
- else
224
- if token.strip != ""
225
- element = Element.new(@id, parent, token, @level, @fontset, @fontsize)
226
- @id += 1
227
- @elist.add(element)
228
- end
229
- end
230
-
231
- token = get_next_token
232
- end
233
- @level -= 1
234
- end
235
- end
236
- end
237
-
1
+ # frozen_string_literal: true
2
+
3
+ #==========================
4
+ # string_parser.rb
5
+ #==========================
6
+ #
7
+ # Parses a phrase into leafs and nodes and store the result in an element list
8
+ # (see element_list.rb)
9
+ # Copyright (c) 2007-2023 Yoichiro Hasebe <yohasebe@gmail.com>
10
+
11
+ require_relative 'elementlist'
12
+ require_relative 'element'
13
+ require_relative 'utils'
14
+
15
+ module RSyntaxTree
16
+ class StringParser
17
+ attr_accessor :data, :elist, :pos, :id, :level
18
+
19
+ def initialize(str, fontset, fontsize, global)
20
+ @global = global
21
+ # Clean up the data a little to make processing easier
22
+ # repeated newlines => a newline
23
+ string = str.gsub(/[\n\r]+/m, "\n")
24
+ # a backslash followed by a newline => a backslash followed by an 'n'
25
+ string.gsub!(/\\\n\s*/m, "\\n")
26
+ # repeated whitespace characters => " "
27
+ string.gsub!(/\s+/, " ")
28
+ string.gsub!(/\]\s+\[/, "][")
29
+ string.gsub!(/\s+\[/, "[")
30
+ string.gsub!(/\[\s+/, "[")
31
+ string.gsub!(/\s+\]/, "]")
32
+ string.gsub!(/\]\s+/, "]")
33
+ string.gsub!(/<(\d*)>/) do
34
+ num_padding = $1.to_i
35
+ result = if num_padding.positive?
36
+ WHITESPACE_BLOCK * num_padding
37
+ else
38
+ WHITESPACE_BLOCK
39
+ end
40
+ result
41
+ end
42
+
43
+ @data = string # Store it for later...
44
+ fontset[:normal] = fontset[:cjk] if @data.contains_cjk?
45
+ @elist = ElementList.new # Initialize internal element list
46
+ @pos = 0 # Position in the sentence
47
+ @id = 1 # ID for the next element
48
+ @level = 0 # Level in the diagram
49
+ @fontset = fontset
50
+ @fontsize = fontsize
51
+ end
52
+
53
+ def self.valid?(data)
54
+ raise RSTError, "Error: input text is empty" if data.empty?
55
+
56
+ if /\[\s*\]/m =~ data
57
+ raise RSTError, "Error: inside the brackets is empty"
58
+ end
59
+
60
+ text = data.strip
61
+ text_r = text.split(//)
62
+ open_br = []
63
+ close_br = []
64
+ escape = false
65
+ text_r.each do |chr|
66
+ if chr == "\\"
67
+ escape = if escape
68
+ false
69
+ else
70
+ true
71
+ end
72
+ next
73
+ end
74
+
75
+ if escape && /[\[\]]/ =~ chr
76
+ escape = false
77
+ next
78
+ elsif chr == '['
79
+ open_br.push(chr)
80
+ elsif chr == ']'
81
+ close_br.push(chr)
82
+ break if open_br.length < close_br.length
83
+ end
84
+ escape = false
85
+ end
86
+
87
+ if open_br.empty? && close_br.empty?
88
+ raise RSTError, "Error: input text does not contain paired brackets"
89
+ elsif open_br.length == close_br.length
90
+ true
91
+ else
92
+ raise RSTError, "Error: open and close brackets do not match"
93
+ end
94
+ end
95
+
96
+ def parse
97
+ make_tree(0);
98
+ @elist.set_hierarchy
99
+ end
100
+
101
+ def get_elementlist
102
+ @elist;
103
+ end
104
+
105
+ def get_next_token
106
+ data = @data.split(//)
107
+ gottoken = false
108
+ token = ""
109
+ i = 0
110
+
111
+ return "" if (@pos + 1) >= data.length
112
+
113
+ escape = false
114
+ while ((@pos + i) < data.length) && !gottoken
115
+ ch = data[@pos + i];
116
+ case ch
117
+ when "["
118
+ if escape
119
+ token += ch
120
+ escape = false
121
+ elsif i.positive?
122
+ gottoken = true
123
+ else
124
+ token += ch
125
+ end
126
+ when "]"
127
+ if escape
128
+ token += ch
129
+ escape = false
130
+ else
131
+ token += ch if i.zero?
132
+ gottoken = true
133
+ end
134
+ when "\\"
135
+ if escape
136
+ token += '\\\\'
137
+ escape = false
138
+ else
139
+ escape = true
140
+ end
141
+ when " "
142
+ if escape
143
+ token += '\\n'
144
+ escape = false
145
+ else
146
+ token += ch
147
+ end
148
+ when /[n{}<>^+*_=~|-]/
149
+ if escape
150
+ token += '\\' + ch
151
+ escape = false
152
+ else
153
+ token += ch
154
+ end
155
+ else
156
+ if escape
157
+ token += ch
158
+ escape = false
159
+ else
160
+ token += ch
161
+ end
162
+ end
163
+ i += 1
164
+ end
165
+
166
+ @pos += if i > 1
167
+ i - 1
168
+ else
169
+ 1
170
+ end
171
+ token
172
+ end
173
+
174
+ def make_tree(parent)
175
+ token = get_next_token.strip
176
+ parts = []
177
+
178
+ while token != "" && token != "]"
179
+ token_r = token.split(//)
180
+ case token_r[0]
181
+ when "["
182
+ tl = token_r.length
183
+ token_r = token_r[1, tl - 1]
184
+ spaceat = token_r.index(" ")
185
+ newparent = -1
186
+
187
+ if spaceat
188
+ parts[0] = token_r[0, spaceat].join
189
+
190
+ tl = token_r.length
191
+ parts[1] = token_r[spaceat, tl - spaceat].join
192
+
193
+ element = Element.new(@id, parent, parts[0], @level, @fontset, @fontsize, @global)
194
+ @id += 1
195
+ @elist.add(element)
196
+ newparent = element.id
197
+
198
+ element = Element.new(@id, @id - 1, parts[1], @level + 1, @fontset, @fontsize, @global)
199
+ @id += 1
200
+ else
201
+ joined = token_r.join
202
+ element = Element.new(@id, parent, joined, @level, @fontset, @fontsize, @global)
203
+ @id += 1
204
+ newparent = element.id
205
+ end
206
+ @elist.add(element)
207
+ @level += 1
208
+ make_tree(newparent)
209
+ else
210
+ if token.strip != ""
211
+ element = Element.new(@id, parent, token, @level, @fontset, @fontsize, @global)
212
+ @id += 1
213
+ @elist.add(element)
214
+ end
215
+ end
216
+ token = get_next_token
217
+ end
218
+ @level -= 1
219
+ end
220
+ end
221
+ end