meteor 0.9.12 → 0.9.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,143 @@
1
+ # -* coding: UTF-8 -*-
2
+ # frozen_string_literal: true
3
+
4
+ module Meteor
5
+ module Ml
6
+ module Xhtml
7
+ #
8
+ # XHTML parser (XHTMLパーサ)
9
+ #
10
+ class ParserImpl < Meteor::Ml::Xhtml4::ParserImpl
11
+ #[Array] 論理値で指定する属性
12
+ ATTR_LOGIC = ["disabled", "readonly", "checked", "selected", "multiple", "required"]
13
+
14
+ #[Array] disabled属性のある要素
15
+ DISABLE_ELEMENT = ["input", "textarea", "select", "optgroup", "fieldset"]
16
+
17
+ #[Array] required属性のある要素
18
+ REQUIRE_ELEMENT = ["input", "textarea"]
19
+
20
+ REQUIRED_M = "\\srequired=\"[^\"]*\"\\s|\\srequired=\"[^\"]*\"$"
21
+ REQUIRED_M1 = "\\srequired=\"([^\"]*)\"\\s|\\srequired=\"([^\"]*)\"$"
22
+ REQUIRED_R = "required=\"[^\"]*\""
23
+ REQUIRED_U = "required=\"required\""
24
+
25
+ @@pattern_required_m = Regexp.new(REQUIRED_M)
26
+ @@pattern_required_m1 = Regexp.new(REQUIRED_M1)
27
+ @@pattern_required_r = Regexp.new(REQUIRED_R)
28
+
29
+ #
30
+ # initializer (イニシャライザ)
31
+ # @overload initialize
32
+ # @overload initialize(ps)
33
+ # @param [Meteor::Parser] ps parser (パーサ)
34
+ #
35
+ def initialize(*args)
36
+ super()
37
+ @@attr_logic = ATTR_LOGIC
38
+ @doc_type = Parser::XHTML
39
+ case args.length
40
+ when ZERO
41
+ # initialize_0
42
+ when ONE
43
+ initialize_1(args[0])
44
+ else
45
+ raise ArgumentError
46
+ end
47
+ end
48
+
49
+ #
50
+ # initializer (イニシャライザ)
51
+ #
52
+ # def initialize_0
53
+ # end
54
+ #
55
+ # private :initialize_0
56
+
57
+ #
58
+ # initializer (イニシャライザ)
59
+ # @param [Meteor::Parser] ps parser (パーサ)
60
+ #
61
+ def initialize_1(ps)
62
+ @root.document = String.new(ps.document)
63
+ self.document_hook = String.new(ps.document_hook)
64
+ @root.content_type = String.new(ps.root_element.content_type)
65
+ @root.charset = ps.root_element.charset
66
+ @root.kaigyo_code = ps.root_element.kaigyo_code
67
+ end
68
+
69
+ private :initialize_1
70
+
71
+ #
72
+ # analyze document , set content type (ドキュメントをパースし、コンテントタイプをセットする)
73
+ #
74
+ def analyze_content_type
75
+ @error_check = false
76
+
77
+ element_3("meta", "charset", "[a-zA-Z-]+", false)
78
+
79
+ if !@elm_
80
+ element_3("meta", "charset", "[a-zA-Z-]+", false)
81
+ end
82
+
83
+ @error_check = true
84
+
85
+ if @elm_
86
+ @root.charset = @elm_.attr("charset")
87
+ if !@root.charset
88
+ @root.charset = "utf-8"
89
+ end
90
+ else
91
+ @root.charset = "utf-8"
92
+ end
93
+
94
+ @root.content_type = "text/html"
95
+ end
96
+
97
+ private :analyze_content_type
98
+
99
+ def edit_attrs_(elm, attr_name, attr_value)
100
+ if is_match("selected", attr_name) && is_match("option", elm.name)
101
+ edit_attrs_5(elm, attr_value, @@pattern_selected_m, @@pattern_selected_r, SELECTED_U)
102
+ elsif is_match("multiple", attr_name) && is_match("select", elm.name)
103
+ edit_attrs_5(elm, attr_value, @@pattern_multiple_m, @@pattern_multiple_r, MULTIPLE_U)
104
+ elsif is_match("disabled", attr_name) && is_match(DISABLE_ELEMENT, elm.name)
105
+ edit_attrs_5(elm, attr_value, @@pattern_disabled_m, @@pattern_disabled_r, DISABLED_U)
106
+ elsif is_match("checked", attr_name) && is_match("input", elm.name) && is_match("radio", get_type(elm))
107
+ edit_attrs_5(elm, attr_value, @@pattern_checked_m, @@pattern_checked_r, CHECKED_U)
108
+ elsif is_match("readonly", attr_name) &&
109
+ (is_match("textarea", elm.name) || (is_match("input", elm.name) && is_match(READONLY_TYPE, get_type(elm))))
110
+ edit_attrs_5(elm, attr_value, @@pattern_readonly_m, @@pattern_readonly_r, READONLY_U)
111
+ elsif is_match("required", attr_name) && is_match(REQUIRE_ELEMENT, elm.name)
112
+ edit_attrs_5(elm, attr_value, @@pattern_required_m, @@pattern_required_r, REQUIRED_U)
113
+ else
114
+ super(elm, attr_name, attr_value)
115
+ end
116
+ end
117
+
118
+ private :edit_attrs_
119
+
120
+ def get_attr_value_(elm, attr_name)
121
+ if is_match("selected", attr_name) && is_match("option", elm.name)
122
+ get_attr_value_r(elm, attr_name, @@pattern_selected_m1)
123
+ elsif is_match("multiple", attr_name) && is_match("select", elm.name)
124
+ get_attr_value_r(elm, attr_name, @@pattern_multiple_m1)
125
+ elsif is_match("disabled", attr_name) && is_match(DISABLE_ELEMENT, elm.name)
126
+ get_attr_value_r(elm, attr_name, @@pattern_disabled_m1)
127
+ elsif is_match("checked", attr_name) && is_match("input", elm.name) && is_match("radio", get_type(elm))
128
+ get_attr_value_r(elm, attr_name, @@pattern_checked_m1)
129
+ elsif is_match("readonly", attr_name) &&
130
+ (is_match("textarea", elm.name) || (is_match("input", elm.name) && is_match(READONLY_TYPE, get_type(elm))))
131
+ get_attr_value_r(elm, attr_name, @@pattern_readonly_m1)
132
+ elsif is_match("required", attr_name) && is_match(REQUIRE_ELEMENT, elm.name)
133
+ get_attr_value_r(elm, attr_name, @@pattern_required_m1)
134
+ else
135
+ super(elm, attr_name)
136
+ end
137
+ end
138
+
139
+ private :get_attr_value_
140
+ end
141
+ end
142
+ end
143
+ end
@@ -0,0 +1,406 @@
1
+ # -* coding: UTF-8 -*-
2
+ # frozen_string_literal: true
3
+
4
+ module Meteor
5
+ module Ml
6
+ module Xhtml4
7
+ #
8
+ # XHTML4 parser (XHTML4パーサ)
9
+ #
10
+ class ParserImpl < Meteor::Core::Kernel
11
+
12
+ # KAIGYO_CODE = "\r?\n|\r"
13
+ KAIGYO_CODE = ["\r\n", "\n", "\r"]
14
+ BR = "<br/>"
15
+
16
+ # @@match_tag_2 = "textarea|option|pre"
17
+ #[Array] 改行を<br/>に変換する必要のない要素
18
+ @@match_tag_2 = ["textarea", "option", "pre"]
19
+
20
+ #[Array] 論理値で指定する属性
21
+ @@attr_logic = ["disabled", "readonly", "checked", "selected", "multiple"]
22
+
23
+ # DISABLE_ELEMENT = "input|textarea|select|optgroup"
24
+ #[Array] element with disablled attribute (disabled属性のある要素)
25
+ DISABLE_ELEMENT = ["input", "textarea", "select", "optgroup"]
26
+ # READONLY_TYPE = "text|password"
27
+ #[Array] readonly属性のあるinput要素のタイプ
28
+ READONLY_TYPE = ["text", "password"]
29
+
30
+ SELECTED_M = "\\sselected=\"[^\"]*\"\\s|\\sselected=\"[^\"]*\"$"
31
+ SELECTED_M1 = "\\sselected=\"([^\"]*)\"\\s|\\sselected=\"([^\"]*)\"$"
32
+ SELECTED_R = "selected=\"[^\"]*\""
33
+ SELECTED_U = "selected=\"selected\""
34
+ CHECKED_M = "\\schecked=\"[^\"]*\"\\s|\\schecked=\"[^\"]*\"$"
35
+ CHECKED_M1 = "\\schecked=\"([^\"]*)\"\\s|\\schecked=\"([^\"]*)\"$"
36
+ CHECKED_R = "checked=\"[^\"]*\""
37
+ CHECKED_U = "checked=\"checked\""
38
+ DISABLED_M = "\\sdisabled=\"[^\"]*\"\\s|\\sdisabled=\"[^\"]*\"$"
39
+ DISABLED_M1 = "\\sdisabled=\"([^\"]*)\"\\s|\\sdisabled=\"([^\"]*)\"$"
40
+ DISABLED_R = "disabled=\"[^\"]*\""
41
+ DISABLED_U = "disabled=\"disabled\""
42
+ READONLY_M = "\\sreadonly=\"[^\"]*\"\\s|\\sreadonly=\"[^\"]*\"$"
43
+ READONLY_M1 = "\\sreadonly=\"([^\"]*)\"\\s|\\sreadonly=\"([^\"]*)\"$"
44
+ READONLY_R = "readonly=\"[^\"]*\""
45
+ READONLY_U = "readonly=\"readonly\""
46
+ MULTIPLE_M = "\\smultiple=\"[^\"]*\"\\s|\\smultiple=\"[^\"]*\"$"
47
+ MULTIPLE_M1 = "\\smultiple=\"([^\"]*)\"\\s|\\smultiple=\"([^\"]*)\"$"
48
+ MULTIPLE_R = "multiple=\"[^\"]*\""
49
+ MULTIPLE_U = "multiple=\"multiple\""
50
+
51
+ PATTERN_UNESCAPE = "&(amp|quot|apos|gt|lt|nbsp);"
52
+
53
+ @@pattern_selected_m = Regexp.new(SELECTED_M)
54
+ @@pattern_selected_m1 = Regexp.new(SELECTED_M1)
55
+ @@pattern_selected_r = Regexp.new(SELECTED_R)
56
+ @@pattern_checked_m = Regexp.new(CHECKED_M)
57
+ @@pattern_checked_m1 = Regexp.new(CHECKED_M1)
58
+ @@pattern_checked_r = Regexp.new(CHECKED_R)
59
+ @@pattern_disabled_m = Regexp.new(DISABLED_M)
60
+ @@pattern_disabled_m1 = Regexp.new(DISABLED_M1)
61
+ @@pattern_disabled_r = Regexp.new(DISABLED_R)
62
+ @@pattern_readonly_m = Regexp.new(READONLY_M)
63
+ @@pattern_readonly_m1 = Regexp.new(READONLY_M1)
64
+ @@pattern_readonly_r = Regexp.new(READONLY_R)
65
+ @@pattern_multiple_m = Regexp.new(MULTIPLE_M)
66
+ @@pattern_multiple_m1 = Regexp.new(MULTIPLE_M1)
67
+ @@pattern_multiple_r = Regexp.new(MULTIPLE_R)
68
+
69
+ @@pattern_unescape = Regexp.new(PATTERN_UNESCAPE)
70
+
71
+ @@pattern_br_2 = Regexp.new("<br\\/>")
72
+
73
+ # @@pattern_match_tag = Regexp.new(@@match_tag)
74
+ # @@pattern_match_tag2 = Regexp.new(@@match_tag_2)
75
+
76
+ TABLE_FOR_ESCAPE_ = {
77
+ "&" => "&amp;",
78
+ "\"" => "&quot;",
79
+ "'" => "&apos;",
80
+ "<" => "&lt;",
81
+ ">" => "&gt;",
82
+ " " => "&nbsp;"
83
+ }
84
+
85
+ TABLE_FOR_ESCAPE_CONTENT_ = {
86
+ "&" => "&amp;",
87
+ "\"" => "&quot;",
88
+ "'" => "&apos;",
89
+ "<" => "&lt;",
90
+ ">" => "&gt;",
91
+ " " => "&nbsp;",
92
+ "\r\n" => "<br/>",
93
+ "\r" => "<br/>",
94
+ "\n" => "<br/>"
95
+ }
96
+
97
+ PATTERN_ESCAPE = "[&\"'<> ]"
98
+ PATTERN_ESCAPE_CONTENT = "[&\"'<> \\n]"
99
+ @@pattern_escape = Regexp.new(PATTERN_ESCAPE)
100
+ @@pattern_escape_content = Regexp.new(PATTERN_ESCAPE_CONTENT)
101
+
102
+ #
103
+ # initializer (イニシャライザ)
104
+ # @overload initialize
105
+ # @overload initialize(ps)
106
+ # @param [Meteor::Parser] ps parser (パーサ)
107
+ #
108
+ def initialize(*args)
109
+ super()
110
+ @doc_type = Parser::XHTML4
111
+ case args.length
112
+ when ZERO
113
+ # initialize_0
114
+ when ONE
115
+ initialize_1(args[0])
116
+ else
117
+ raise ArgumentError
118
+ end
119
+ end
120
+
121
+ #
122
+ # initializer (イニシャライザ)
123
+ #
124
+ # def initialize_0
125
+ # end
126
+ #
127
+ # private :initialize_0
128
+
129
+ #
130
+ # initializer (イニシャライザ)
131
+ # @param [Meteor::Parser] ps parser (パーサ)
132
+ #
133
+ def initialize_1(ps)
134
+ @root.document = String.new(ps.document)
135
+ self.document_hook = String.new(ps.document_hook)
136
+ @root.content_type = String.new(ps.root_element.content_type)
137
+ @root.charset = ps.root_element.charset
138
+ @root.kaigyo_code = ps.root_element.kaigyo_code
139
+ end
140
+
141
+ private :initialize_1
142
+
143
+ #
144
+ # parse document (ドキュメントを解析する)
145
+ #
146
+ def parse
147
+ analyze_ml
148
+ end
149
+
150
+ #
151
+ # analyze document (ドキュメントをパースする)
152
+ #
153
+ def analyze_ml
154
+ analyze_content_type
155
+ analyze_kaigyo_code
156
+ @res = nil
157
+ end
158
+
159
+ private :analyze_ml
160
+
161
+ #
162
+ # get content type (コンテントタイプを取得する)
163
+ # @return [String] content type (コンテントタイプ)
164
+ #
165
+ def content_type
166
+ @root.content_type
167
+ end
168
+
169
+ #
170
+ # analyze document , set content type (ドキュメントをパースし、コンテントタイプをセットする)
171
+ #
172
+ def analyze_content_type
173
+ @error_check = false
174
+
175
+ element_3("meta", "http-equiv", "Content-Type")
176
+
177
+ if !@elm_
178
+ element_3("meta", "http-equiv", "Content-Type")
179
+ end
180
+
181
+ @error_check = true
182
+
183
+ if @elm_
184
+ content = @elm_.attr("content")
185
+ content_arr = content&.split(";")
186
+ @root.content_type = content_arr&.at(0) || ""
187
+ @root.charset = content_arr&.at(1)&.split("=")&.at(1) || ""
188
+ else
189
+ @root.content_type = ""
190
+ end
191
+ end
192
+
193
+ private :analyze_content_type
194
+
195
+ #
196
+ # analyze document , set newline (ドキュメントをパースし、改行コードをセットする)
197
+ #
198
+ def analyze_kaigyo_code
199
+ for a in KAIGYO_CODE
200
+ if @root.document.include?(a)
201
+ @root.kaigyo_code = a
202
+ end
203
+ end
204
+ end
205
+
206
+ private :analyze_kaigyo_code
207
+
208
+ def edit_attrs_(elm, attr_name, attr_value)
209
+ if is_match("selected", attr_name) && is_match("option", elm.name)
210
+ edit_attrs_5(elm, attr_value, @@pattern_selected_m, @@pattern_selected_r, SELECTED_U)
211
+ elsif is_match("multiple", attr_name) && is_match("select", elm.name)
212
+ edit_attrs_5(elm, attr_value, @@pattern_multiple_m, @@pattern_multiple_r, MULTIPLE_U)
213
+ elsif is_match("disabled", attr_name) && is_match(DISABLE_ELEMENT, elm.name)
214
+ edit_attrs_5(elm, attr_value, @@pattern_disabled_m, @@pattern_disabled_r, DISABLED_U)
215
+ elsif is_match("checked", attr_name) && is_match("input", elm.name) && is_match("radio", get_type(elm))
216
+ edit_attrs_5(elm, attr_value, @@pattern_checked_m, @@pattern_checked_r, CHECKED_U)
217
+ elsif is_match("readonly", attr_name) &&
218
+ (is_match("textarea", elm.name) || (is_match("input", elm.name) && is_match(READONLY_TYPE, get_type(elm))))
219
+ edit_attrs_5(elm, attr_value, @@pattern_readonly_m, @@pattern_readonly_r, READONLY_U)
220
+ else
221
+ super(elm, attr_name, attr_value)
222
+ end
223
+ end
224
+
225
+ private :edit_attrs_
226
+
227
+ def edit_attrs_5(elm, attr_value, match_p, replace_regex, replace_update)
228
+ # attr_value = escape(attr_value)
229
+
230
+ if true.equal?(attr_value) || is_match("true", attr_value)
231
+ @res = match_p.match(elm.attributes)
232
+
233
+ if !@res
234
+ # add and attribute to attributes (属性文字列の最後に新規の属性を追加する)
235
+ if elm.attributes != ""
236
+ elm.attributes = String.new("") << " " << elm.attributes.strip
237
+ # else
238
+ end
239
+
240
+ elm.attributes << " " << replace_update
241
+ else
242
+ # reolace attribute (属性の置換)
243
+ elm.attributes.gsub!(replace_regex, replace_update)
244
+ end
245
+ elsif false.equal?(attr_value) || is_match("false", attr_value)
246
+ # delete if attribute_name attrubute exeists (attr_name属性が存在するなら削除)
247
+ # reolace attribute (属性の置換)
248
+ elm.attributes.gsub!(replace_regex, "")
249
+ end
250
+ end
251
+
252
+ private :edit_attrs_5
253
+
254
+ def get_attr_value_(elm, attr_name)
255
+ if is_match("selected", attr_name) && is_match("option", elm.name)
256
+ get_attr_value_r(elm, attr_name, @@pattern_selected_m1)
257
+ elsif is_match("multiple", attr_name) && is_match("select", elm.name)
258
+ get_attr_value_r(elm, attr_name, @@pattern_multiple_m1)
259
+ elsif is_match("diabled", attr_name) && is_match(DISABLE_ELEMENT, elm.name)
260
+ get_attr_value_r(elm, attr_name, @@pattern_disabled_m1)
261
+ elsif is_match("checked", attr_name) && is_match("input", elm.name) && is_match("radio", get_type(elm))
262
+ get_attr_value_r(elm, attr_name, @@pattern_checked_m1)
263
+ elsif is_match("readonly", attr_name) &&
264
+ (is_match("textarea", elm.name) || (is_match("input", elm.name) && is_match(READONLY_TYPE, get_type(elm))))
265
+ get_attr_value_r(elm, attr_name, @@pattern_readonly_m1)
266
+ else
267
+ super(elm, attr_name)
268
+ end
269
+ end
270
+
271
+ private :get_attr_value_
272
+
273
+ def get_type(elm)
274
+ if !elm.type_value
275
+ elm.type_value = get_attr_value(elm, "type")
276
+ if !elm.type_value
277
+ elm.type_value = get_attr_value(elm, "TYPE")
278
+ end
279
+ end
280
+
281
+ elm.type_value
282
+ end
283
+
284
+ private :get_type
285
+
286
+ def get_attr_value_r(elm, attr_name, match_p)
287
+ @res = match_p.match(elm.attributes)
288
+
289
+ if @res
290
+ if @res[1]
291
+ if attr_name == @res[1]
292
+ "true"
293
+ else
294
+ @res[1]
295
+ end
296
+ elsif @res[2]
297
+ if attr_name == @res[2]
298
+ "true"
299
+ else
300
+ @res[2]
301
+ end
302
+ elsif @res[3]
303
+ if attr_name == @res[3]
304
+ "true"
305
+ else
306
+ @res[3]
307
+ end
308
+ elsif @res[4]
309
+ if attr_name == @res[4]
310
+ "true"
311
+ else
312
+ @res[4]
313
+ end
314
+ end
315
+ else
316
+ "false"
317
+ end
318
+ end
319
+
320
+ private :get_attr_value_r
321
+
322
+ #
323
+ # get attribute map (属性マップを取得する)
324
+ # @param [Meteor::Element] elm element (要素)
325
+ # @return [Meteor::AttributeMap] attribute map (属性マップ)
326
+ #
327
+ def get_attr_map(elm)
328
+ attrs = Meteor::AttributeMap.new
329
+
330
+ elm.attributes.scan(@@pattern_get_attrs_map) do |a, b|
331
+ if is_match(@@attr_logic, a) && a == b
332
+ attrs.store(a, "true")
333
+ else
334
+ attrs.store(a, unescape(b))
335
+ end
336
+ end
337
+
338
+ attrs.recordable = true
339
+
340
+ attrs
341
+ end
342
+
343
+ private :get_attr_map
344
+
345
+ def escape(content)
346
+ # replace special character (特殊文字の置換)
347
+ content = content.gsub(@@pattern_escape, TABLE_FOR_ESCAPE_)
348
+
349
+ content
350
+ end
351
+
352
+ def escape_content(content, elm)
353
+ # replace special character (特殊文字の置換)
354
+ content = content.gsub(@@pattern_escape_content, TABLE_FOR_ESCAPE_CONTENT_)
355
+
356
+ content
357
+ end
358
+
359
+ private :escape
360
+ private :escape_content
361
+
362
+ def unescape(content)
363
+ # replace special character (特殊文字の置換)
364
+ # 「<」<-「&lt;」
365
+ # 「>」<-「&gt;」
366
+ # 「"」<-「&quotl」
367
+ # 「 」<-「&nbsp;」
368
+ # 「&」<-「&amp;」
369
+ content.gsub(@@pattern_unescape) do
370
+ case $1
371
+ when "amp"
372
+ "&"
373
+ when "quot"
374
+ "\""
375
+ when "apos"
376
+ "'"
377
+ when "gt"
378
+ ">"
379
+ when "lt"
380
+ "<"
381
+ when "nbsp"
382
+ " "
383
+ end
384
+ end
385
+
386
+ content
387
+ end
388
+
389
+ private :unescape
390
+
391
+ def unescape_content(content, elm)
392
+ content_ = unescape(content)
393
+
394
+ if (elm.cx || !is_match(@@match_tag_2, elm.name)) && content.include?(BR)
395
+ # 「<br>」->「¥r?¥n」
396
+ content_.gsub!(@@pattern_br_2, @root.kaigyo_code)
397
+ end
398
+
399
+ content_
400
+ end
401
+
402
+ private :unescape_content
403
+ end
404
+ end
405
+ end
406
+ end