meteor 0.9.13 → 0.9.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ChangeLog +4 -0
- data/Gemfile.lock +3 -3
- data/demo/html.rb +49 -39
- data/demo/html4.rb +40 -33
- data/demo/ml/sample.xml +36 -38
- data/demo/ml/sample_html.html +7 -2
- data/demo/ml/sample_html4.html +9 -2
- data/demo/ml/sample_xhtml.html +7 -2
- data/demo/ml/sample_xhtml4.html +10 -3
- data/demo/xhtml.rb +41 -30
- data/demo/xhtml4.rb +35 -26
- data/demo/xml.rb +25 -20
- data/lib/meteor/attribute_map.rb +8 -6
- data/lib/meteor/core/kernel.rb +288 -260
- data/lib/meteor/core/util/pattern_cache.rb +16 -15
- data/lib/meteor/element.rb +65 -63
- data/lib/meteor/exception/no_such_element_exception.rb +15 -15
- data/lib/meteor/ml/html/parser_impl.rb +58 -33
- data/lib/meteor/ml/html4/parser_impl.rb +118 -90
- data/lib/meteor/ml/xhtml/parser_impl.rb +33 -29
- data/lib/meteor/ml/xhtml4/parser_impl.rb +98 -90
- data/lib/meteor/ml/xml/parser_impl.rb +35 -25
- data/lib/meteor/parser_factory.rb +125 -125
- data/lib/meteor.rb +2 -2
- metadata +2 -2
|
@@ -11,40 +11,44 @@ module Meteor
|
|
|
11
11
|
|
|
12
12
|
# KAIGYO_CODE = "\r?\n|\r"
|
|
13
13
|
KAIGYO_CODE = ["\r\n", "\n", "\r"]
|
|
14
|
-
BR =
|
|
14
|
+
BR = "<br/>"
|
|
15
15
|
|
|
16
16
|
# @@match_tag_2 = "textarea|option|pre"
|
|
17
|
-
|
|
17
|
+
#[Array] 改行を<br/>に変換する必要のない要素
|
|
18
|
+
@@match_tag_2 = ["textarea", "option", "pre"]
|
|
18
19
|
|
|
19
|
-
|
|
20
|
+
#[Array] 論理値で指定する属性
|
|
21
|
+
@@attr_logic = ["disabled", "readonly", "checked", "selected", "multiple"]
|
|
20
22
|
|
|
21
23
|
# DISABLE_ELEMENT = "input|textarea|select|optgroup"
|
|
22
|
-
|
|
24
|
+
#[Array] element with disablled attribute (disabled属性のある要素)
|
|
25
|
+
DISABLE_ELEMENT = ["input", "textarea", "select", "optgroup"]
|
|
23
26
|
# READONLY_TYPE = "text|password"
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
27
|
+
#[Array] readonly属性のあるinput要素のタイプ
|
|
28
|
+
READONLY_TYPE = ["text", "password"]
|
|
29
|
+
|
|
30
|
+
SELECTED_M = "\\sselected=\"[^\"]*\"\\s|\\sselected=\"[^\"]*\"$"
|
|
31
|
+
SELECTED_M1 = "\\sselected=\"([^\"]*)\"\\s|\\sselected=\"([^\"]*)\"$"
|
|
32
|
+
SELECTED_R = "selected=\"[^\"]*\""
|
|
33
|
+
SELECTED_U = "selected=\"selected\""
|
|
34
|
+
CHECKED_M = "\\schecked=\"[^\"]*\"\\s|\\schecked=\"[^\"]*\"$"
|
|
35
|
+
CHECKED_M1 = "\\schecked=\"([^\"]*)\"\\s|\\schecked=\"([^\"]*)\"$"
|
|
36
|
+
CHECKED_R = "checked=\"[^\"]*\""
|
|
37
|
+
CHECKED_U = "checked=\"checked\""
|
|
38
|
+
DISABLED_M = "\\sdisabled=\"[^\"]*\"\\s|\\sdisabled=\"[^\"]*\"$"
|
|
39
|
+
DISABLED_M1 = "\\sdisabled=\"([^\"]*)\"\\s|\\sdisabled=\"([^\"]*)\"$"
|
|
40
|
+
DISABLED_R = "disabled=\"[^\"]*\""
|
|
41
|
+
DISABLED_U = "disabled=\"disabled\""
|
|
42
|
+
READONLY_M = "\\sreadonly=\"[^\"]*\"\\s|\\sreadonly=\"[^\"]*\"$"
|
|
43
|
+
READONLY_M1 = "\\sreadonly=\"([^\"]*)\"\\s|\\sreadonly=\"([^\"]*)\"$"
|
|
44
|
+
READONLY_R = "readonly=\"[^\"]*\""
|
|
45
|
+
READONLY_U = "readonly=\"readonly\""
|
|
46
|
+
MULTIPLE_M = "\\smultiple=\"[^\"]*\"\\s|\\smultiple=\"[^\"]*\"$"
|
|
47
|
+
MULTIPLE_M1 = "\\smultiple=\"([^\"]*)\"\\s|\\smultiple=\"([^\"]*)\"$"
|
|
48
|
+
MULTIPLE_R = "multiple=\"[^\"]*\""
|
|
49
|
+
MULTIPLE_U = "multiple=\"multiple\""
|
|
50
|
+
|
|
51
|
+
PATTERN_UNESCAPE = "&(amp|quot|apos|gt|lt|nbsp);"
|
|
48
52
|
|
|
49
53
|
@@pattern_selected_m = Regexp.new(SELECTED_M)
|
|
50
54
|
@@pattern_selected_m1 = Regexp.new(SELECTED_M1)
|
|
@@ -64,34 +68,34 @@ module Meteor
|
|
|
64
68
|
|
|
65
69
|
@@pattern_unescape = Regexp.new(PATTERN_UNESCAPE)
|
|
66
70
|
|
|
67
|
-
@@pattern_br_2 = Regexp.new(
|
|
71
|
+
@@pattern_br_2 = Regexp.new("<br\\/>")
|
|
68
72
|
|
|
69
73
|
# @@pattern_match_tag = Regexp.new(@@match_tag)
|
|
70
74
|
# @@pattern_match_tag2 = Regexp.new(@@match_tag_2)
|
|
71
75
|
|
|
72
76
|
TABLE_FOR_ESCAPE_ = {
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
'
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
77
|
+
"&" => "&",
|
|
78
|
+
"\"" => """,
|
|
79
|
+
"'" => "'",
|
|
80
|
+
"<" => "<",
|
|
81
|
+
">" => ">",
|
|
82
|
+
" " => " "
|
|
79
83
|
}
|
|
80
84
|
|
|
81
85
|
TABLE_FOR_ESCAPE_CONTENT_ = {
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
'
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
"\r\n" =>
|
|
89
|
-
"\r" =>
|
|
90
|
-
"\n" =>
|
|
86
|
+
"&" => "&",
|
|
87
|
+
"\"" => """,
|
|
88
|
+
"'" => "'",
|
|
89
|
+
"<" => "<",
|
|
90
|
+
">" => ">",
|
|
91
|
+
" " => " ",
|
|
92
|
+
"\r\n" => "<br/>",
|
|
93
|
+
"\r" => "<br/>",
|
|
94
|
+
"\n" => "<br/>"
|
|
91
95
|
}
|
|
92
96
|
|
|
93
|
-
PATTERN_ESCAPE =
|
|
94
|
-
PATTERN_ESCAPE_CONTENT =
|
|
97
|
+
PATTERN_ESCAPE = "[&\"'<> ]"
|
|
98
|
+
PATTERN_ESCAPE_CONTENT = "[&\"'<> \\n]"
|
|
95
99
|
@@pattern_escape = Regexp.new(PATTERN_ESCAPE)
|
|
96
100
|
@@pattern_escape_content = Regexp.new(PATTERN_ESCAPE_CONTENT)
|
|
97
101
|
|
|
@@ -105,12 +109,12 @@ module Meteor
|
|
|
105
109
|
super()
|
|
106
110
|
@doc_type = Parser::XHTML4
|
|
107
111
|
case args.length
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
112
|
+
when ZERO
|
|
113
|
+
# initialize_0
|
|
114
|
+
when ONE
|
|
115
|
+
initialize_1(args[0])
|
|
116
|
+
else
|
|
117
|
+
raise ArgumentError
|
|
114
118
|
end
|
|
115
119
|
end
|
|
116
120
|
|
|
@@ -168,21 +172,21 @@ module Meteor
|
|
|
168
172
|
def analyze_content_type
|
|
169
173
|
@error_check = false
|
|
170
174
|
|
|
171
|
-
element_3(
|
|
175
|
+
element_3("meta", "http-equiv", "Content-Type")
|
|
172
176
|
|
|
173
177
|
if !@elm_
|
|
174
|
-
element_3(
|
|
178
|
+
element_3("meta", "http-equiv", "Content-Type")
|
|
175
179
|
end
|
|
176
180
|
|
|
177
181
|
@error_check = true
|
|
178
182
|
|
|
179
183
|
if @elm_
|
|
180
|
-
content = @elm_.attr(
|
|
181
|
-
content_arr = content&.split(
|
|
182
|
-
@root.content_type = content_arr&.at(0) ||
|
|
183
|
-
@root.charset = content_arr&.at(1)&.split(
|
|
184
|
+
content = @elm_.attr("content")
|
|
185
|
+
content_arr = content&.split(";")
|
|
186
|
+
@root.content_type = content_arr&.at(0) || ""
|
|
187
|
+
@root.charset = content_arr&.at(1)&.split("=")&.at(1) || ""
|
|
184
188
|
else
|
|
185
|
-
@root.content_type =
|
|
189
|
+
@root.content_type = ""
|
|
186
190
|
end
|
|
187
191
|
end
|
|
188
192
|
|
|
@@ -202,20 +206,20 @@ module Meteor
|
|
|
202
206
|
private :analyze_kaigyo_code
|
|
203
207
|
|
|
204
208
|
def edit_attrs_(elm, attr_name, attr_value)
|
|
205
|
-
if is_match(
|
|
209
|
+
if is_match("selected", attr_name) && is_match("option", elm.name)
|
|
206
210
|
edit_attrs_5(elm, attr_value, @@pattern_selected_m, @@pattern_selected_r, SELECTED_U)
|
|
207
|
-
elsif is_match(
|
|
211
|
+
elsif is_match("multiple", attr_name) && is_match("select", elm.name)
|
|
208
212
|
edit_attrs_5(elm, attr_value, @@pattern_multiple_m, @@pattern_multiple_r, MULTIPLE_U)
|
|
209
|
-
elsif is_match(
|
|
213
|
+
elsif is_match("disabled", attr_name) && is_match(DISABLE_ELEMENT, elm.name)
|
|
210
214
|
edit_attrs_5(elm, attr_value, @@pattern_disabled_m, @@pattern_disabled_r, DISABLED_U)
|
|
211
|
-
elsif is_match(
|
|
215
|
+
elsif is_match("checked", attr_name) && is_match("input", elm.name) && is_match("radio", get_type(elm))
|
|
212
216
|
edit_attrs_5(elm, attr_value, @@pattern_checked_m, @@pattern_checked_r, CHECKED_U)
|
|
213
|
-
elsif is_match(
|
|
217
|
+
elsif is_match("readonly", attr_name) &&
|
|
218
|
+
(is_match("textarea", elm.name) || (is_match("input", elm.name) && is_match(READONLY_TYPE, get_type(elm))))
|
|
214
219
|
edit_attrs_5(elm, attr_value, @@pattern_readonly_m, @@pattern_readonly_r, READONLY_U)
|
|
215
220
|
else
|
|
216
221
|
super(elm, attr_name, attr_value)
|
|
217
222
|
end
|
|
218
|
-
|
|
219
223
|
end
|
|
220
224
|
|
|
221
225
|
private :edit_attrs_
|
|
@@ -228,11 +232,12 @@ module Meteor
|
|
|
228
232
|
|
|
229
233
|
if !@res
|
|
230
234
|
# add and attribute to attributes (属性文字列の最後に新規の属性を追加する)
|
|
231
|
-
if elm.attributes !=
|
|
232
|
-
elm.attributes = String.new(
|
|
235
|
+
if elm.attributes != ""
|
|
236
|
+
elm.attributes = String.new("") << " " << elm.attributes.strip
|
|
233
237
|
# else
|
|
234
238
|
end
|
|
235
|
-
|
|
239
|
+
|
|
240
|
+
elm.attributes << " " << replace_update
|
|
236
241
|
else
|
|
237
242
|
# reolace attribute (属性の置換)
|
|
238
243
|
elm.attributes.gsub!(replace_regex, replace_update)
|
|
@@ -240,22 +245,23 @@ module Meteor
|
|
|
240
245
|
elsif false.equal?(attr_value) || is_match("false", attr_value)
|
|
241
246
|
# delete if attribute_name attrubute exeists (attr_name属性が存在するなら削除)
|
|
242
247
|
# reolace attribute (属性の置換)
|
|
243
|
-
elm.attributes.gsub!(replace_regex,
|
|
248
|
+
elm.attributes.gsub!(replace_regex, "")
|
|
244
249
|
end
|
|
245
250
|
end
|
|
246
251
|
|
|
247
252
|
private :edit_attrs_5
|
|
248
253
|
|
|
249
254
|
def get_attr_value_(elm, attr_name)
|
|
250
|
-
if is_match(
|
|
255
|
+
if is_match("selected", attr_name) && is_match("option", elm.name)
|
|
251
256
|
get_attr_value_r(elm, attr_name, @@pattern_selected_m1)
|
|
252
|
-
elsif is_match(
|
|
257
|
+
elsif is_match("multiple", attr_name) && is_match("select", elm.name)
|
|
253
258
|
get_attr_value_r(elm, attr_name, @@pattern_multiple_m1)
|
|
254
|
-
elsif is_match(
|
|
259
|
+
elsif is_match("diabled", attr_name) && is_match(DISABLE_ELEMENT, elm.name)
|
|
255
260
|
get_attr_value_r(elm, attr_name, @@pattern_disabled_m1)
|
|
256
|
-
elsif is_match(
|
|
261
|
+
elsif is_match("checked", attr_name) && is_match("input", elm.name) && is_match("radio", get_type(elm))
|
|
257
262
|
get_attr_value_r(elm, attr_name, @@pattern_checked_m1)
|
|
258
|
-
elsif is_match(
|
|
263
|
+
elsif is_match("readonly", attr_name) &&
|
|
264
|
+
(is_match("textarea", elm.name) || (is_match("input", elm.name) && is_match(READONLY_TYPE, get_type(elm))))
|
|
259
265
|
get_attr_value_r(elm, attr_name, @@pattern_readonly_m1)
|
|
260
266
|
else
|
|
261
267
|
super(elm, attr_name)
|
|
@@ -266,11 +272,12 @@ module Meteor
|
|
|
266
272
|
|
|
267
273
|
def get_type(elm)
|
|
268
274
|
if !elm.type_value
|
|
269
|
-
elm.type_value = get_attr_value(elm,
|
|
275
|
+
elm.type_value = get_attr_value(elm, "type")
|
|
270
276
|
if !elm.type_value
|
|
271
277
|
elm.type_value = get_attr_value(elm, "TYPE")
|
|
272
278
|
end
|
|
273
279
|
end
|
|
280
|
+
|
|
274
281
|
elm.type_value
|
|
275
282
|
end
|
|
276
283
|
|
|
@@ -321,12 +328,13 @@ module Meteor
|
|
|
321
328
|
attrs = Meteor::AttributeMap.new
|
|
322
329
|
|
|
323
330
|
elm.attributes.scan(@@pattern_get_attrs_map) do |a, b|
|
|
324
|
-
if is_match(@@attr_logic, a) && a==b
|
|
331
|
+
if is_match(@@attr_logic, a) && a == b
|
|
325
332
|
attrs.store(a, "true")
|
|
326
333
|
else
|
|
327
334
|
attrs.store(a, unescape(b))
|
|
328
335
|
end
|
|
329
336
|
end
|
|
337
|
+
|
|
330
338
|
attrs.recordable = true
|
|
331
339
|
|
|
332
340
|
attrs
|
|
@@ -360,18 +368,18 @@ module Meteor
|
|
|
360
368
|
# 「&」<-「&」
|
|
361
369
|
content.gsub(@@pattern_unescape) do
|
|
362
370
|
case $1
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
371
|
+
when "amp"
|
|
372
|
+
"&"
|
|
373
|
+
when "quot"
|
|
374
|
+
"\""
|
|
375
|
+
when "apos"
|
|
376
|
+
"'"
|
|
377
|
+
when "gt"
|
|
378
|
+
">"
|
|
379
|
+
when "lt"
|
|
380
|
+
"<"
|
|
381
|
+
when "nbsp"
|
|
382
|
+
" "
|
|
375
383
|
end
|
|
376
384
|
end
|
|
377
385
|
|
|
@@ -384,8 +392,8 @@ module Meteor
|
|
|
384
392
|
content_ = unescape(content)
|
|
385
393
|
|
|
386
394
|
if (elm.cx || !is_match(@@match_tag_2, elm.name)) && content.include?(BR)
|
|
387
|
-
|
|
388
|
-
|
|
395
|
+
# 「<br>」->「¥r?¥n」
|
|
396
|
+
content_.gsub!(@@pattern_br_2, @root.kaigyo_code)
|
|
389
397
|
end
|
|
390
398
|
|
|
391
399
|
content_
|
|
@@ -11,18 +11,18 @@ module Meteor
|
|
|
11
11
|
# KAIGYO_CODE = "\r?\n|\r"
|
|
12
12
|
KAIGYO_CODE = ["\r\n", "\n", "\r"]
|
|
13
13
|
|
|
14
|
-
PATTERN_UNESCAPE =
|
|
14
|
+
PATTERN_UNESCAPE = "&(amp|quot|apos|gt|lt);"
|
|
15
15
|
|
|
16
16
|
@@pattern_unescape = Regexp.new(PATTERN_UNESCAPE)
|
|
17
17
|
|
|
18
18
|
TABLE_FOR_ESCAPE_ = {
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
19
|
+
"&" => "&",
|
|
20
|
+
"\"" => """,
|
|
21
|
+
"'" => "'",
|
|
22
|
+
"<" => "<",
|
|
23
|
+
">" => ">"
|
|
24
24
|
}
|
|
25
|
-
PATTERN_ESCAPE =
|
|
25
|
+
PATTERN_ESCAPE = "[&\\\"'<>]"
|
|
26
26
|
@@pattern_escape = Regexp.new(PATTERN_ESCAPE)
|
|
27
27
|
|
|
28
28
|
#
|
|
@@ -35,12 +35,12 @@ module Meteor
|
|
|
35
35
|
super()
|
|
36
36
|
@doc_type = Parser::XML
|
|
37
37
|
case args.length
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
38
|
+
when ZERO
|
|
39
|
+
# initialize_0
|
|
40
|
+
when ONE
|
|
41
|
+
initialize_1(args[0])
|
|
42
|
+
else
|
|
43
|
+
raise ArgumentError
|
|
44
44
|
end
|
|
45
45
|
end
|
|
46
46
|
|
|
@@ -61,11 +61,12 @@ module Meteor
|
|
|
61
61
|
def initialize_1(ps)
|
|
62
62
|
@root.document = String.new(ps.document)
|
|
63
63
|
ps.document_hook = String.new(ps.document_hook)
|
|
64
|
+
@root.content_type = String.new(ps.root_element.content_type)
|
|
65
|
+
@root.kaigyo_code = ps.root_element.kaigyo_code
|
|
64
66
|
end
|
|
65
67
|
|
|
66
68
|
private :initialize_1
|
|
67
69
|
|
|
68
|
-
|
|
69
70
|
#
|
|
70
71
|
# parse document (ドキュメントを解析する)
|
|
71
72
|
#
|
|
@@ -78,6 +79,7 @@ module Meteor
|
|
|
78
79
|
#
|
|
79
80
|
def analyze_ml
|
|
80
81
|
analyze_kaigyo_code
|
|
82
|
+
analyze_content_type
|
|
81
83
|
|
|
82
84
|
@res = nil
|
|
83
85
|
end
|
|
@@ -102,11 +104,19 @@ module Meteor
|
|
|
102
104
|
# puts "kaigyo:" << @root.kaigyo_code
|
|
103
105
|
end
|
|
104
106
|
end
|
|
105
|
-
|
|
106
107
|
end
|
|
107
108
|
|
|
108
109
|
private :analyze_kaigyo_code
|
|
109
110
|
|
|
111
|
+
#
|
|
112
|
+
# analyze document , set content type (ドキュメントをパースし、コンテントタイプをセットする)
|
|
113
|
+
#
|
|
114
|
+
def analyze_content_type
|
|
115
|
+
@root.content_type = "text/xml"
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
private :analyze_content_type
|
|
119
|
+
|
|
110
120
|
def escape(content)
|
|
111
121
|
# replace special character (特殊文字の置換)
|
|
112
122
|
content = content.gsub(@@pattern_escape, TABLE_FOR_ESCAPE_)
|
|
@@ -131,16 +141,16 @@ module Meteor
|
|
|
131
141
|
# 「&」<-「&」
|
|
132
142
|
content.gsub(@@pattern_unescape) do
|
|
133
143
|
case $1
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
+
when "amp"
|
|
145
|
+
"&"
|
|
146
|
+
when "quot"
|
|
147
|
+
"\""
|
|
148
|
+
when "apos"
|
|
149
|
+
"'"
|
|
150
|
+
when "gt"
|
|
151
|
+
">"
|
|
152
|
+
when "lt"
|
|
153
|
+
"<"
|
|
144
154
|
end
|
|
145
155
|
end
|
|
146
156
|
|