meteor 0.9.12 → 0.9.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ChangeLog +5 -1
- data/Gemfile.lock +2 -2
- data/README.md +5 -3
- data/demo/html.rb +41 -38
- data/demo/html4.rb +53 -50
- data/demo/ml/sample_html.html +5 -4
- data/demo/ml/sample_html4.html +4 -4
- data/demo/ml/sample_xhtml.html +6 -5
- data/demo/ml/sample_xhtml4.html +3 -3
- data/demo/xhtml.rb +39 -36
- data/demo/xhtml4.rb +37 -34
- data/demo/xml.rb +63 -63
- data/lib/meteor/attribute.rb +33 -0
- data/lib/meteor/attribute_map.rb +146 -0
- data/lib/meteor/core/kernel.rb +2182 -0
- data/lib/meteor/core/util/pattern_cache.rb +107 -0
- data/lib/meteor/element.rb +532 -0
- data/lib/meteor/element_factory.rb +68 -0
- data/lib/meteor/exception/no_such_element_exception.rb +84 -0
- data/lib/meteor/ml/html/parser_impl.rb +142 -0
- data/lib/meteor/ml/html4/parser_impl.rb +684 -0
- data/lib/meteor/ml/xhtml/parser_impl.rb +139 -0
- data/lib/meteor/ml/xhtml4/parser_impl.rb +398 -0
- data/lib/meteor/ml/xml/parser_impl.rb +160 -0
- data/lib/meteor/parser.rb +15 -0
- data/lib/meteor/parser_factory.rb +493 -0
- data/lib/meteor/root_element.rb +24 -0
- data/lib/meteor.rb +20 -5593
- data/meteor.gemspec +3 -3
- metadata +21 -6
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
# -* coding: UTF-8 -*-
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
module Meteor
|
|
5
|
+
module Ml
|
|
6
|
+
module Xhtml
|
|
7
|
+
#
|
|
8
|
+
# XHTML parser (XHTMLパーサ)
|
|
9
|
+
#
|
|
10
|
+
class ParserImpl < Meteor::Ml::Xhtml4::ParserImpl
|
|
11
|
+
ATTR_LOGIC = ['disabled', 'readonly', 'checked', 'selected', 'multiple', 'required'] #[Array] 論理値で指定する属性
|
|
12
|
+
|
|
13
|
+
DISABLE_ELEMENT = ['input', 'textarea', 'select', 'optgroup', 'fieldset'] #[Array] disabled属性のある要素
|
|
14
|
+
|
|
15
|
+
REQUIRE_ELEMENT = ['input', 'textarea'] #[Array] required属性のある要素
|
|
16
|
+
|
|
17
|
+
REQUIRED_M = '\\srequired="[^"]*"\\s|\\srequired="[^"]*"$'
|
|
18
|
+
REQUIRED_M1 = '\\srequired="([^"]*)"\\s|\\srequired="([^"]*)"$'
|
|
19
|
+
REQUIRED_R = 'required="[^"]*"'
|
|
20
|
+
REQUIRED_U = 'required="required"'
|
|
21
|
+
|
|
22
|
+
@@pattern_required_m = Regexp.new(REQUIRED_M)
|
|
23
|
+
@@pattern_required_m1 = Regexp.new(REQUIRED_M1)
|
|
24
|
+
@@pattern_required_r = Regexp.new(REQUIRED_R)
|
|
25
|
+
|
|
26
|
+
#
|
|
27
|
+
# initializer (イニシャライザ)
|
|
28
|
+
# @overload initialize
|
|
29
|
+
# @overload initialize(ps)
|
|
30
|
+
# @param [Meteor::Parser] ps parser (パーサ)
|
|
31
|
+
#
|
|
32
|
+
def initialize(*args)
|
|
33
|
+
super()
|
|
34
|
+
@@attr_logic = ATTR_LOGIC
|
|
35
|
+
@doc_type = Parser::XHTML
|
|
36
|
+
case args.length
|
|
37
|
+
when ZERO
|
|
38
|
+
# initialize_0
|
|
39
|
+
when ONE
|
|
40
|
+
initialize_1(args[0])
|
|
41
|
+
else
|
|
42
|
+
raise ArgumentError
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
#
|
|
47
|
+
# initializer (イニシャライザ)
|
|
48
|
+
#
|
|
49
|
+
# def initialize_0
|
|
50
|
+
# end
|
|
51
|
+
#
|
|
52
|
+
# private :initialize_0
|
|
53
|
+
|
|
54
|
+
#
|
|
55
|
+
# initializer (イニシャライザ)
|
|
56
|
+
# @param [Meteor::Parser] ps parser (パーサ)
|
|
57
|
+
#
|
|
58
|
+
def initialize_1(ps)
|
|
59
|
+
@root.document = String.new(ps.document)
|
|
60
|
+
self.document_hook = String.new(ps.document_hook)
|
|
61
|
+
@root.content_type = String.new(ps.root_element.content_type)
|
|
62
|
+
@root.charset = ps.root_element.charset
|
|
63
|
+
@root.kaigyo_code = ps.root_element.kaigyo_code
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
private :initialize_1
|
|
67
|
+
|
|
68
|
+
#
|
|
69
|
+
# analyze document , set content type (ドキュメントをパースし、コンテントタイプをセットする)
|
|
70
|
+
#
|
|
71
|
+
def analyze_content_type
|
|
72
|
+
@error_check = false
|
|
73
|
+
|
|
74
|
+
element_3('meta', 'charset', '[a-zA-Z-]+', false)
|
|
75
|
+
|
|
76
|
+
if !@elm_
|
|
77
|
+
element_3('meta', 'charset', '[a-zA-Z-]+', false)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
@error_check = true
|
|
81
|
+
|
|
82
|
+
if @elm_
|
|
83
|
+
@root.charset = @elm_.attr("charset")
|
|
84
|
+
if !@root.charset
|
|
85
|
+
@root.charset = "utf-8"
|
|
86
|
+
end
|
|
87
|
+
else
|
|
88
|
+
@root.charset = "utf-8"
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
@root.content_type = ''
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
private :analyze_content_type
|
|
95
|
+
|
|
96
|
+
def edit_attrs_(elm, attr_name, attr_value)
|
|
97
|
+
if is_match('selected', attr_name) && is_match('option', elm.name)
|
|
98
|
+
edit_attrs_5(elm, attr_value, @@pattern_selected_m, @@pattern_selected_r, SELECTED_U)
|
|
99
|
+
elsif is_match('multiple', attr_name) && is_match('select', elm.name)
|
|
100
|
+
edit_attrs_5(elm, attr_value, @@pattern_multiple_m, @@pattern_multiple_r, MULTIPLE_U)
|
|
101
|
+
elsif is_match('disabled', attr_name) && is_match(DISABLE_ELEMENT, elm.name)
|
|
102
|
+
edit_attrs_5(elm, attr_value, @@pattern_disabled_m, @@pattern_disabled_r, DISABLED_U)
|
|
103
|
+
elsif is_match('checked', attr_name) && is_match('input', elm.name) && is_match('radio', get_type(elm))
|
|
104
|
+
edit_attrs_5(elm, attr_value, @@pattern_checked_m, @@pattern_checked_r, CHECKED_U)
|
|
105
|
+
elsif is_match('readonly', attr_name) && (is_match('textarea', elm.name) || (is_match('input', elm.name) && is_match(READONLY_TYPE, get_type(elm))))
|
|
106
|
+
edit_attrs_5(elm, attr_value, @@pattern_readonly_m, @@pattern_readonly_r, READONLY_U)
|
|
107
|
+
elsif is_match('required', attr_name) && is_match(REQUIRE_ELEMENT, elm.name)
|
|
108
|
+
edit_attrs_5(elm, attr_value, @@pattern_required_m, @@pattern_required_r, REQUIRED_U)
|
|
109
|
+
else
|
|
110
|
+
super(elm, attr_name, attr_value)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
private :edit_attrs_
|
|
116
|
+
|
|
117
|
+
def get_attr_value_(elm, attr_name)
|
|
118
|
+
if is_match('selected', attr_name) && is_match('option', elm.name)
|
|
119
|
+
get_attr_value_r(elm, attr_name, @@pattern_selected_m1)
|
|
120
|
+
elsif is_match('multiple', attr_name) && is_match('select', elm.name)
|
|
121
|
+
get_attr_value_r(elm, attr_name, @@pattern_multiple_m1)
|
|
122
|
+
elsif is_match('disabled', attr_name) && is_match(DISABLE_ELEMENT, elm.name)
|
|
123
|
+
get_attr_value_r(elm, attr_name, @@pattern_disabled_m1)
|
|
124
|
+
elsif is_match('checked', attr_name) && is_match('input', elm.name) && is_match('radio', get_type(elm))
|
|
125
|
+
get_attr_value_r(elm, attr_name, @@pattern_checked_m1)
|
|
126
|
+
elsif is_match('readonly', attr_name) && (is_match('textarea', elm.name) || (is_match('input', elm.name) && is_match(READONLY_TYPE, get_type(elm))))
|
|
127
|
+
get_attr_value_r(elm, attr_name, @@pattern_readonly_m1)
|
|
128
|
+
elsif is_match('required', attr_name) && is_match(REQUIRE_ELEMENT, elm.name)
|
|
129
|
+
get_attr_value_r(elm, attr_name, @@pattern_required_m1)
|
|
130
|
+
else
|
|
131
|
+
super(elm, attr_name)
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
private :get_attr_value_
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
end
|
|
@@ -0,0 +1,398 @@
|
|
|
1
|
+
# -* coding: UTF-8 -*-
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
module Meteor
|
|
5
|
+
module Ml
|
|
6
|
+
module Xhtml4
|
|
7
|
+
#
|
|
8
|
+
# XHTML4 parser (XHTML4パーサ)
|
|
9
|
+
#
|
|
10
|
+
class ParserImpl < Meteor::Core::Kernel
|
|
11
|
+
|
|
12
|
+
# KAIGYO_CODE = "\r?\n|\r"
|
|
13
|
+
KAIGYO_CODE = ["\r\n", "\n", "\r"]
|
|
14
|
+
BR = '<br/>'
|
|
15
|
+
|
|
16
|
+
# @@match_tag_2 = "textarea|option|pre"
|
|
17
|
+
@@match_tag_2 = ['textarea', 'option', 'pre'] #[Array] 改行を<br/>に変換する必要のない要素
|
|
18
|
+
|
|
19
|
+
@@attr_logic = ['disabled', 'readonly', 'checked', 'selected', 'multiple'] #[Array] 論理値で指定する属性
|
|
20
|
+
|
|
21
|
+
# DISABLE_ELEMENT = "input|textarea|select|optgroup"
|
|
22
|
+
DISABLE_ELEMENT = ['input', 'textarea', 'select', 'optgroup'] #[Array] element with disablled attribute (disabled属性のある要素)
|
|
23
|
+
# READONLY_TYPE = "text|password"
|
|
24
|
+
READONLY_TYPE = ['text', 'password'] #[Array] readonly属性のあるinput要素のタイプ
|
|
25
|
+
|
|
26
|
+
SELECTED_M = '\\sselected="[^"]*"\\s|\\sselected="[^"]*"$'
|
|
27
|
+
SELECTED_M1 = '\\sselected="([^"]*)"\\s|\\sselected="([^"]*)"$'
|
|
28
|
+
SELECTED_R = 'selected="[^"]*"'
|
|
29
|
+
SELECTED_U = 'selected="selected"'
|
|
30
|
+
CHECKED_M = '\\schecked="[^"]*"\\s|\\schecked="[^"]*"$'
|
|
31
|
+
CHECKED_M1 = '\\schecked="([^"]*)"\\s|\\schecked="([^"]*)"$'
|
|
32
|
+
CHECKED_R = 'checked="[^"]*"'
|
|
33
|
+
CHECKED_U = 'checked="checked"'
|
|
34
|
+
DISABLED_M = '\\sdisabled="[^"]*"\\s|\\sdisabled="[^"]*"$'
|
|
35
|
+
DISABLED_M1 = '\\sdisabled="([^"]*)"\\s|\\sdisabled="([^"]*)"$'
|
|
36
|
+
DISABLED_R = 'disabled="[^"]*"'
|
|
37
|
+
DISABLED_U = 'disabled="disabled"'
|
|
38
|
+
READONLY_M = '\\sreadonly="[^"]*"\\s|\\sreadonly="[^"]*"$'
|
|
39
|
+
READONLY_M1 = '\\sreadonly="([^"]*)"\\s|\\sreadonly="([^"]*)"$'
|
|
40
|
+
READONLY_R = 'readonly="[^"]*"'
|
|
41
|
+
READONLY_U = 'readonly="readonly"'
|
|
42
|
+
MULTIPLE_M = '\\smultiple="[^"]*"\\s|\\smultiple="[^"]*"$'
|
|
43
|
+
MULTIPLE_M1 = '\\smultiple="([^"]*)"\\s|\\smultiple="([^"]*)"$'
|
|
44
|
+
MULTIPLE_R = 'multiple="[^"]*"'
|
|
45
|
+
MULTIPLE_U = 'multiple="multiple"'
|
|
46
|
+
|
|
47
|
+
PATTERN_UNESCAPE = '&(amp|quot|apos|gt|lt|nbsp);'
|
|
48
|
+
|
|
49
|
+
@@pattern_selected_m = Regexp.new(SELECTED_M)
|
|
50
|
+
@@pattern_selected_m1 = Regexp.new(SELECTED_M1)
|
|
51
|
+
@@pattern_selected_r = Regexp.new(SELECTED_R)
|
|
52
|
+
@@pattern_checked_m = Regexp.new(CHECKED_M)
|
|
53
|
+
@@pattern_checked_m1 = Regexp.new(CHECKED_M1)
|
|
54
|
+
@@pattern_checked_r = Regexp.new(CHECKED_R)
|
|
55
|
+
@@pattern_disabled_m = Regexp.new(DISABLED_M)
|
|
56
|
+
@@pattern_disabled_m1 = Regexp.new(DISABLED_M1)
|
|
57
|
+
@@pattern_disabled_r = Regexp.new(DISABLED_R)
|
|
58
|
+
@@pattern_readonly_m = Regexp.new(READONLY_M)
|
|
59
|
+
@@pattern_readonly_m1 = Regexp.new(READONLY_M1)
|
|
60
|
+
@@pattern_readonly_r = Regexp.new(READONLY_R)
|
|
61
|
+
@@pattern_multiple_m = Regexp.new(MULTIPLE_M)
|
|
62
|
+
@@pattern_multiple_m1 = Regexp.new(MULTIPLE_M1)
|
|
63
|
+
@@pattern_multiple_r = Regexp.new(MULTIPLE_R)
|
|
64
|
+
|
|
65
|
+
@@pattern_unescape = Regexp.new(PATTERN_UNESCAPE)
|
|
66
|
+
|
|
67
|
+
@@pattern_br_2 = Regexp.new('<br\\/>')
|
|
68
|
+
|
|
69
|
+
# @@pattern_match_tag = Regexp.new(@@match_tag)
|
|
70
|
+
# @@pattern_match_tag2 = Regexp.new(@@match_tag_2)
|
|
71
|
+
|
|
72
|
+
TABLE_FOR_ESCAPE_ = {
|
|
73
|
+
'&' => '&',
|
|
74
|
+
'"' => '"',
|
|
75
|
+
'\'' => ''',
|
|
76
|
+
'<' => '<',
|
|
77
|
+
'>' => '>',
|
|
78
|
+
' ' => ' ',
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
TABLE_FOR_ESCAPE_CONTENT_ = {
|
|
82
|
+
'&' => '&',
|
|
83
|
+
'"' => '"',
|
|
84
|
+
'\'' => ''',
|
|
85
|
+
'<' => '<',
|
|
86
|
+
'>' => '>',
|
|
87
|
+
' ' => ' ',
|
|
88
|
+
"\r\n" => '<br/>',
|
|
89
|
+
"\r" => '<br/>',
|
|
90
|
+
"\n" => '<br/>',
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
PATTERN_ESCAPE = '[&"\'<> ]'
|
|
94
|
+
PATTERN_ESCAPE_CONTENT = '[&"\'<> \\n]'
|
|
95
|
+
@@pattern_escape = Regexp.new(PATTERN_ESCAPE)
|
|
96
|
+
@@pattern_escape_content = Regexp.new(PATTERN_ESCAPE_CONTENT)
|
|
97
|
+
|
|
98
|
+
#
|
|
99
|
+
# initializer (イニシャライザ)
|
|
100
|
+
# @overload initialize
|
|
101
|
+
# @overload initialize(ps)
|
|
102
|
+
# @param [Meteor::Parser] ps parser (パーサ)
|
|
103
|
+
#
|
|
104
|
+
def initialize(*args)
|
|
105
|
+
super()
|
|
106
|
+
@doc_type = Parser::XHTML4
|
|
107
|
+
case args.length
|
|
108
|
+
when ZERO
|
|
109
|
+
# initialize_0
|
|
110
|
+
when ONE
|
|
111
|
+
initialize_1(args[0])
|
|
112
|
+
else
|
|
113
|
+
raise ArgumentError
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
#
|
|
118
|
+
# initializer (イニシャライザ)
|
|
119
|
+
#
|
|
120
|
+
# def initialize_0
|
|
121
|
+
# end
|
|
122
|
+
#
|
|
123
|
+
# private :initialize_0
|
|
124
|
+
|
|
125
|
+
#
|
|
126
|
+
# initializer (イニシャライザ)
|
|
127
|
+
# @param [Meteor::Parser] ps parser (パーサ)
|
|
128
|
+
#
|
|
129
|
+
def initialize_1(ps)
|
|
130
|
+
@root.document = String.new(ps.document)
|
|
131
|
+
self.document_hook = String.new(ps.document_hook)
|
|
132
|
+
@root.content_type = String.new(ps.root_element.content_type)
|
|
133
|
+
@root.charset = ps.root_element.charset
|
|
134
|
+
@root.kaigyo_code = ps.root_element.kaigyo_code
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
private :initialize_1
|
|
138
|
+
|
|
139
|
+
#
|
|
140
|
+
# parse document (ドキュメントを解析する)
|
|
141
|
+
#
|
|
142
|
+
def parse
|
|
143
|
+
analyze_ml
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
#
|
|
147
|
+
# analyze document (ドキュメントをパースする)
|
|
148
|
+
#
|
|
149
|
+
def analyze_ml
|
|
150
|
+
analyze_content_type
|
|
151
|
+
analyze_kaigyo_code
|
|
152
|
+
@res = nil
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
private :analyze_ml
|
|
156
|
+
|
|
157
|
+
#
|
|
158
|
+
# get content type (コンテントタイプを取得する)
|
|
159
|
+
# @return [String] content type (コンテントタイプ)
|
|
160
|
+
#
|
|
161
|
+
def content_type
|
|
162
|
+
@root.content_type
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
#
|
|
166
|
+
# analyze document , set content type (ドキュメントをパースし、コンテントタイプをセットする)
|
|
167
|
+
#
|
|
168
|
+
def analyze_content_type
|
|
169
|
+
@error_check = false
|
|
170
|
+
|
|
171
|
+
element_3('meta', 'http-equiv', 'Content-Type')
|
|
172
|
+
|
|
173
|
+
if !@elm_
|
|
174
|
+
element_3('meta', 'http-equiv', 'Content-Type')
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
@error_check = true
|
|
178
|
+
|
|
179
|
+
if @elm_
|
|
180
|
+
content = @elm_.attr('content')
|
|
181
|
+
content_arr = content&.split(';')
|
|
182
|
+
@root.content_type = content_arr&.at(0) || ''
|
|
183
|
+
@root.charset = content_arr&.at(1)&.split('=')&.at(1) || ''
|
|
184
|
+
else
|
|
185
|
+
@root.content_type = ''
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
private :analyze_content_type
|
|
190
|
+
|
|
191
|
+
#
|
|
192
|
+
# analyze document , set newline (ドキュメントをパースし、改行コードをセットする)
|
|
193
|
+
#
|
|
194
|
+
def analyze_kaigyo_code
|
|
195
|
+
for a in KAIGYO_CODE
|
|
196
|
+
if @root.document.include?(a)
|
|
197
|
+
@root.kaigyo_code = a
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
private :analyze_kaigyo_code
|
|
203
|
+
|
|
204
|
+
def edit_attrs_(elm, attr_name, attr_value)
|
|
205
|
+
if is_match('selected', attr_name) && is_match('option', elm.name)
|
|
206
|
+
edit_attrs_5(elm, attr_value, @@pattern_selected_m, @@pattern_selected_r, SELECTED_U)
|
|
207
|
+
elsif is_match('multiple', attr_name) && is_match('select', elm.name)
|
|
208
|
+
edit_attrs_5(elm, attr_value, @@pattern_multiple_m, @@pattern_multiple_r, MULTIPLE_U)
|
|
209
|
+
elsif is_match('disabled', attr_name) && is_match(DISABLE_ELEMENT, elm.name)
|
|
210
|
+
edit_attrs_5(elm, attr_value, @@pattern_disabled_m, @@pattern_disabled_r, DISABLED_U)
|
|
211
|
+
elsif is_match('checked', attr_name) && is_match('input', elm.name) && is_match('radio', get_type(elm))
|
|
212
|
+
edit_attrs_5(elm, attr_value, @@pattern_checked_m, @@pattern_checked_r, CHECKED_U)
|
|
213
|
+
elsif is_match('readonly', attr_name) && (is_match('textarea', elm.name) || (is_match('input', elm.name) && is_match(READONLY_TYPE, get_type(elm))))
|
|
214
|
+
edit_attrs_5(elm, attr_value, @@pattern_readonly_m, @@pattern_readonly_r, READONLY_U)
|
|
215
|
+
else
|
|
216
|
+
super(elm, attr_name, attr_value)
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
private :edit_attrs_
|
|
222
|
+
|
|
223
|
+
def edit_attrs_5(elm, attr_value, match_p, replace_regex, replace_update)
|
|
224
|
+
# attr_value = escape(attr_value)
|
|
225
|
+
|
|
226
|
+
if true.equal?(attr_value) || is_match("true", attr_value)
|
|
227
|
+
@res = match_p.match(elm.attributes)
|
|
228
|
+
|
|
229
|
+
if !@res
|
|
230
|
+
# add and attribute to attributes (属性文字列の最後に新規の属性を追加する)
|
|
231
|
+
if elm.attributes != ''
|
|
232
|
+
elm.attributes = String.new('') << ' ' << elm.attributes.strip
|
|
233
|
+
# else
|
|
234
|
+
end
|
|
235
|
+
elm.attributes << ' ' << replace_update
|
|
236
|
+
else
|
|
237
|
+
# reolace attribute (属性の置換)
|
|
238
|
+
elm.attributes.gsub!(replace_regex, replace_update)
|
|
239
|
+
end
|
|
240
|
+
elsif false.equal?(attr_value) || is_match("false", attr_value)
|
|
241
|
+
# delete if attribute_name attrubute exeists (attr_name属性が存在するなら削除)
|
|
242
|
+
# reolace attribute (属性の置換)
|
|
243
|
+
elm.attributes.gsub!(replace_regex, '')
|
|
244
|
+
end
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
private :edit_attrs_5
|
|
248
|
+
|
|
249
|
+
def get_attr_value_(elm, attr_name)
|
|
250
|
+
if is_match('selected', attr_name) && is_match('option', elm.name)
|
|
251
|
+
get_attr_value_r(elm, attr_name, @@pattern_selected_m1)
|
|
252
|
+
elsif is_match('multiple', attr_name) && is_match('select', elm.name)
|
|
253
|
+
get_attr_value_r(elm, attr_name, @@pattern_multiple_m1)
|
|
254
|
+
elsif is_match('diabled', attr_name) && is_match(DISABLE_ELEMENT, elm.name)
|
|
255
|
+
get_attr_value_r(elm, attr_name, @@pattern_disabled_m1)
|
|
256
|
+
elsif is_match('checked', attr_name) && is_match('input', elm.name) && is_match('radio', get_type(elm))
|
|
257
|
+
get_attr_value_r(elm, attr_name, @@pattern_checked_m1)
|
|
258
|
+
elsif is_match('readonly', attr_name) && (is_match('textarea', elm.name) || (is_match('input', elm.name) && is_match(READONLY_TYPE, get_type(elm))))
|
|
259
|
+
get_attr_value_r(elm, attr_name, @@pattern_readonly_m1)
|
|
260
|
+
else
|
|
261
|
+
super(elm, attr_name)
|
|
262
|
+
end
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
private :get_attr_value_
|
|
266
|
+
|
|
267
|
+
def get_type(elm)
|
|
268
|
+
if !elm.type_value
|
|
269
|
+
elm.type_value = get_attr_value(elm, 'type')
|
|
270
|
+
if !elm.type_value
|
|
271
|
+
elm.type_value = get_attr_value(elm, "TYPE")
|
|
272
|
+
end
|
|
273
|
+
end
|
|
274
|
+
elm.type_value
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
private :get_type
|
|
278
|
+
|
|
279
|
+
def get_attr_value_r(elm, attr_name, match_p)
|
|
280
|
+
@res = match_p.match(elm.attributes)
|
|
281
|
+
|
|
282
|
+
if @res
|
|
283
|
+
if @res[1]
|
|
284
|
+
if attr_name == @res[1]
|
|
285
|
+
"true"
|
|
286
|
+
else
|
|
287
|
+
@res[1]
|
|
288
|
+
end
|
|
289
|
+
elsif @res[2]
|
|
290
|
+
if attr_name == @res[2]
|
|
291
|
+
"true"
|
|
292
|
+
else
|
|
293
|
+
@res[2]
|
|
294
|
+
end
|
|
295
|
+
elsif @res[3]
|
|
296
|
+
if attr_name == @res[3]
|
|
297
|
+
"true"
|
|
298
|
+
else
|
|
299
|
+
@res[3]
|
|
300
|
+
end
|
|
301
|
+
elsif @res[4]
|
|
302
|
+
if attr_name == @res[4]
|
|
303
|
+
"true"
|
|
304
|
+
else
|
|
305
|
+
@res[4]
|
|
306
|
+
end
|
|
307
|
+
end
|
|
308
|
+
else
|
|
309
|
+
"false"
|
|
310
|
+
end
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
private :get_attr_value_r
|
|
314
|
+
|
|
315
|
+
#
|
|
316
|
+
# get attribute map (属性マップを取得する)
|
|
317
|
+
# @param [Meteor::Element] elm element (要素)
|
|
318
|
+
# @return [Meteor::AttributeMap] attribute map (属性マップ)
|
|
319
|
+
#
|
|
320
|
+
def get_attr_map(elm)
|
|
321
|
+
attrs = Meteor::AttributeMap.new
|
|
322
|
+
|
|
323
|
+
elm.attributes.scan(@@pattern_get_attrs_map) do |a, b|
|
|
324
|
+
if is_match(@@attr_logic, a) && a==b
|
|
325
|
+
attrs.store(a, "true")
|
|
326
|
+
else
|
|
327
|
+
attrs.store(a, unescape(b))
|
|
328
|
+
end
|
|
329
|
+
end
|
|
330
|
+
attrs.recordable = true
|
|
331
|
+
|
|
332
|
+
attrs
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
private :get_attr_map
|
|
336
|
+
|
|
337
|
+
def escape(content)
|
|
338
|
+
# replace special character (特殊文字の置換)
|
|
339
|
+
content = content.gsub(@@pattern_escape, TABLE_FOR_ESCAPE_)
|
|
340
|
+
|
|
341
|
+
content
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
def escape_content(content, elm)
|
|
345
|
+
# replace special character (特殊文字の置換)
|
|
346
|
+
content = content.gsub(@@pattern_escape_content, TABLE_FOR_ESCAPE_CONTENT_)
|
|
347
|
+
|
|
348
|
+
content
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
private :escape
|
|
352
|
+
private :escape_content
|
|
353
|
+
|
|
354
|
+
def unescape(content)
|
|
355
|
+
# replace special character (特殊文字の置換)
|
|
356
|
+
# 「<」<-「<」
|
|
357
|
+
# 「>」<-「>」
|
|
358
|
+
# 「"」<-「"l」
|
|
359
|
+
# 「 」<-「 」
|
|
360
|
+
# 「&」<-「&」
|
|
361
|
+
content.gsub(@@pattern_unescape) do
|
|
362
|
+
case $1
|
|
363
|
+
when 'amp'
|
|
364
|
+
'&'
|
|
365
|
+
when 'quot'
|
|
366
|
+
'"'
|
|
367
|
+
when 'apos'
|
|
368
|
+
"'"
|
|
369
|
+
when 'gt'
|
|
370
|
+
'>'
|
|
371
|
+
when 'lt'
|
|
372
|
+
'<'
|
|
373
|
+
when 'nbsp'
|
|
374
|
+
' '
|
|
375
|
+
end
|
|
376
|
+
end
|
|
377
|
+
|
|
378
|
+
content
|
|
379
|
+
end
|
|
380
|
+
|
|
381
|
+
private :unescape
|
|
382
|
+
|
|
383
|
+
def unescape_content(content, elm)
|
|
384
|
+
content_ = unescape(content)
|
|
385
|
+
|
|
386
|
+
if (elm.cx || !is_match(@@match_tag_2, elm.name)) && content.include?(BR)
|
|
387
|
+
# 「<br>」->「¥r?¥n」
|
|
388
|
+
content_.gsub!(@@pattern_br_2, @root.kaigyo_code)
|
|
389
|
+
end
|
|
390
|
+
|
|
391
|
+
content_
|
|
392
|
+
end
|
|
393
|
+
|
|
394
|
+
private :unescape_content
|
|
395
|
+
end
|
|
396
|
+
end
|
|
397
|
+
end
|
|
398
|
+
end
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
# -* coding: UTF-8 -*-
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
module Meteor
|
|
5
|
+
module Ml
|
|
6
|
+
module Xml
|
|
7
|
+
#
|
|
8
|
+
# XML parser (XMLパーサ)
|
|
9
|
+
#
|
|
10
|
+
class ParserImpl < Meteor::Core::Kernel
|
|
11
|
+
# KAIGYO_CODE = "\r?\n|\r"
|
|
12
|
+
KAIGYO_CODE = ["\r\n", "\n", "\r"]
|
|
13
|
+
|
|
14
|
+
PATTERN_UNESCAPE = '&(amp|quot|apos|gt|lt);'
|
|
15
|
+
|
|
16
|
+
@@pattern_unescape = Regexp.new(PATTERN_UNESCAPE)
|
|
17
|
+
|
|
18
|
+
TABLE_FOR_ESCAPE_ = {
|
|
19
|
+
'&' => '&',
|
|
20
|
+
'"' => '"',
|
|
21
|
+
'\'' => ''',
|
|
22
|
+
'<' => '<',
|
|
23
|
+
'>' => '>',
|
|
24
|
+
}
|
|
25
|
+
PATTERN_ESCAPE = '[&\"\'<>]'
|
|
26
|
+
@@pattern_escape = Regexp.new(PATTERN_ESCAPE)
|
|
27
|
+
|
|
28
|
+
#
|
|
29
|
+
# initializer (イニシャライザ)
|
|
30
|
+
# @overload initialize
|
|
31
|
+
# @overload initialize(ps)
|
|
32
|
+
# @param [Meteor::Parser] ps parser (パーサ)
|
|
33
|
+
#
|
|
34
|
+
def initialize(*args)
|
|
35
|
+
super()
|
|
36
|
+
@doc_type = Parser::XML
|
|
37
|
+
case args.length
|
|
38
|
+
when ZERO
|
|
39
|
+
# initialize_0
|
|
40
|
+
when ONE
|
|
41
|
+
initialize_1(args[0])
|
|
42
|
+
else
|
|
43
|
+
raise ArgumentError
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
#
|
|
48
|
+
# initializer (イニシャライザ)
|
|
49
|
+
#
|
|
50
|
+
# def initialize_0
|
|
51
|
+
# end
|
|
52
|
+
#
|
|
53
|
+
# private :initialize_0
|
|
54
|
+
#
|
|
55
|
+
# private :initialize_0
|
|
56
|
+
|
|
57
|
+
#
|
|
58
|
+
# initializer (イニシャライザ)
|
|
59
|
+
# @param [Meteor::Parser] ps parser (パーサ)
|
|
60
|
+
#
|
|
61
|
+
def initialize_1(ps)
|
|
62
|
+
@root.document = String.new(ps.document)
|
|
63
|
+
ps.document_hook = String.new(ps.document_hook)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
private :initialize_1
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
#
|
|
70
|
+
# parse document (ドキュメントを解析する)
|
|
71
|
+
#
|
|
72
|
+
def parse
|
|
73
|
+
analyze_ml
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
#
|
|
77
|
+
# analyze document (ドキュメントをパースする)
|
|
78
|
+
#
|
|
79
|
+
def analyze_ml
|
|
80
|
+
analyze_kaigyo_code
|
|
81
|
+
|
|
82
|
+
@res = nil
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
private :analyze_ml
|
|
86
|
+
|
|
87
|
+
#
|
|
88
|
+
# get content type (コンテントタイプを取得する)
|
|
89
|
+
# @return [String] conent type (コンテントタイプ)
|
|
90
|
+
#
|
|
91
|
+
def content_type
|
|
92
|
+
@root.content_type
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
#
|
|
96
|
+
# analuze document , set newline (ドキュメントをパースし、改行コードをセットする)
|
|
97
|
+
#
|
|
98
|
+
def analyze_kaigyo_code
|
|
99
|
+
for a in KAIGYO_CODE
|
|
100
|
+
if @root.document.include?(a)
|
|
101
|
+
@root.kaigyo_code = a
|
|
102
|
+
# puts "kaigyo:" << @root.kaigyo_code
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
private :analyze_kaigyo_code
|
|
109
|
+
|
|
110
|
+
def escape(content)
|
|
111
|
+
# replace special character (特殊文字の置換)
|
|
112
|
+
content = content.gsub(@@pattern_escape, TABLE_FOR_ESCAPE_)
|
|
113
|
+
|
|
114
|
+
content
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
private :escape
|
|
118
|
+
|
|
119
|
+
def escape_content(*args)
|
|
120
|
+
escape(args[0])
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
private :escape_content
|
|
124
|
+
|
|
125
|
+
def unescape(content)
|
|
126
|
+
# replace special character (特殊文字の置換)
|
|
127
|
+
# 「<」<-「<」
|
|
128
|
+
# 「>」<-「>」
|
|
129
|
+
# 「"」<-「"」
|
|
130
|
+
# 「'」<-「'」
|
|
131
|
+
# 「&」<-「&」
|
|
132
|
+
content.gsub(@@pattern_unescape) do
|
|
133
|
+
case $1
|
|
134
|
+
when 'amp'
|
|
135
|
+
'&'
|
|
136
|
+
when 'quot'
|
|
137
|
+
'"'
|
|
138
|
+
when 'apos'
|
|
139
|
+
"'"
|
|
140
|
+
when 'gt'
|
|
141
|
+
'>'
|
|
142
|
+
when 'lt'
|
|
143
|
+
'<'
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
content
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
private :unescape
|
|
151
|
+
|
|
152
|
+
def unescape_content(*args)
|
|
153
|
+
unescape(args[0])
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
private :unescape_content
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
end
|