meteor 0.9.12 → 0.9.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ChangeLog +5 -1
- data/Gemfile.lock +2 -2
- data/README.md +5 -3
- data/demo/html.rb +41 -38
- data/demo/html4.rb +53 -50
- data/demo/ml/sample_html.html +5 -4
- data/demo/ml/sample_html4.html +4 -4
- data/demo/ml/sample_xhtml.html +6 -5
- data/demo/ml/sample_xhtml4.html +3 -3
- data/demo/xhtml.rb +39 -36
- data/demo/xhtml4.rb +37 -34
- data/demo/xml.rb +63 -63
- data/lib/meteor/attribute.rb +33 -0
- data/lib/meteor/attribute_map.rb +146 -0
- data/lib/meteor/core/kernel.rb +2182 -0
- data/lib/meteor/core/util/pattern_cache.rb +107 -0
- data/lib/meteor/element.rb +532 -0
- data/lib/meteor/element_factory.rb +68 -0
- data/lib/meteor/exception/no_such_element_exception.rb +84 -0
- data/lib/meteor/ml/html/parser_impl.rb +142 -0
- data/lib/meteor/ml/html4/parser_impl.rb +684 -0
- data/lib/meteor/ml/xhtml/parser_impl.rb +139 -0
- data/lib/meteor/ml/xhtml4/parser_impl.rb +398 -0
- data/lib/meteor/ml/xml/parser_impl.rb +160 -0
- data/lib/meteor/parser.rb +15 -0
- data/lib/meteor/parser_factory.rb +493 -0
- data/lib/meteor/root_element.rb +24 -0
- data/lib/meteor.rb +20 -5593
- data/meteor.gemspec +3 -3
- metadata +21 -6
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# -* coding: UTF-8 -*-
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
module Meteor
|
|
5
|
+
module Exception
|
|
6
|
+
#
|
|
7
|
+
# Element Search Exception (要素検索例外)
|
|
8
|
+
#
|
|
9
|
+
# @!attribute [rw] message
|
|
10
|
+
# @return [String] message (メッセージ)
|
|
11
|
+
#
|
|
12
|
+
class NoSuchElementException
|
|
13
|
+
attr_accessor :message
|
|
14
|
+
|
|
15
|
+
#
|
|
16
|
+
# initializer (イニシャライザ)
|
|
17
|
+
# @overload initialize(name)
|
|
18
|
+
# @param [String,Symbol] name tag name (タグ名)
|
|
19
|
+
# @overload initialize(attr_name,attr_value)
|
|
20
|
+
# @param [String,Symbol] attr_name attribute name (属性名)
|
|
21
|
+
# @param [String] attr_value attribute value (属性値)
|
|
22
|
+
# @overload initialize(name,attr_name,attr_value)
|
|
23
|
+
# @param [String,Symbol] name tag name (タグ名)
|
|
24
|
+
# @param [String,Symbol] attr_name attribute name (属性名)
|
|
25
|
+
# @param [String] attr_value attribute value (属性値)
|
|
26
|
+
# @overload initialize(attr_name1,attr_value1,attr_name2,attr_value2)
|
|
27
|
+
# @param [String,Symbol] attr_name1 attribute name1 (属性名1)
|
|
28
|
+
# @param [String] attr_value1 attribute value1 (属性値1)
|
|
29
|
+
# @param [String,Symbol] attr_name2 attribute name2 (属性名2)
|
|
30
|
+
# @param [String] attr_value2 attribute value2 (属性値2)
|
|
31
|
+
# @overload initialize(name,attr_name1,attr_value1,attr_name2,attr_value2)
|
|
32
|
+
# @param [String,Symbol] name tag name (タグ名)
|
|
33
|
+
# @param [String,Symbol] attr_name1 attribute name1 (属性名1)
|
|
34
|
+
# @param [String] attr_value1 attribute value1 (属性値1)
|
|
35
|
+
# @param [String,Symbol] attr_name2 attribute name2 (属性名2)
|
|
36
|
+
# @param [String] attr_value2 attribute value2 (属性値2)
|
|
37
|
+
#
|
|
38
|
+
def initialize(*args)
|
|
39
|
+
case args.length
|
|
40
|
+
when ONE
|
|
41
|
+
initialize_1(args[0])
|
|
42
|
+
when TWO
|
|
43
|
+
initialize_2(args[0], args[1])
|
|
44
|
+
when THREE
|
|
45
|
+
initialize_3(args[0], args[1], args[2])
|
|
46
|
+
when FOUR
|
|
47
|
+
initialize_4(args[0], args[1], args[2], args[3])
|
|
48
|
+
when FIVE
|
|
49
|
+
initialize_5(args[0], args[1], args[2], args[3], args[4])
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def initialize_1(name)
|
|
54
|
+
self.message="element not found : #{name}"
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
private :initialize_1
|
|
58
|
+
|
|
59
|
+
def initialize_2(attr_name, attr_value)
|
|
60
|
+
self.message="element not found : [#{attr_name}=#{attr_value}]"
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
private :initialize_2
|
|
64
|
+
|
|
65
|
+
def initialize_3(name, attr_name, attr_value)
|
|
66
|
+
self.message="element not found : #{name}[#{attr_name}=#{attr_value}]"
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
private :initialize_3
|
|
70
|
+
|
|
71
|
+
def initialize_4(attr_name1, attr_value1, attr_name2, attr_value2)
|
|
72
|
+
self.message="element not found : [#{attr_name1}=#{attr_value1}][#{attr_name2}=#{attr_value2}]"
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
private :initialize_4
|
|
76
|
+
|
|
77
|
+
def initialize_5(name, attr_name1, attr_value1, attr_name2, attr_value2)
|
|
78
|
+
self.message="element not found : #{name}[#{attr_name1}=#{attr_value1}][#{attr_name2}=#{attr_value2}]"
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
private :initialize_5
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
# -* coding: UTF-8 -*-
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
module Meteor
|
|
5
|
+
module Ml
|
|
6
|
+
module Html
|
|
7
|
+
#
|
|
8
|
+
# HTML parser (HTMLパーサ)
|
|
9
|
+
#
|
|
10
|
+
class ParserImpl < Meteor::Ml::Html4::ParserImpl
|
|
11
|
+
MATCH_TAG = ['br', 'hr', 'img', 'input', 'meta', 'base', 'embed', 'command', 'keygen'] #[Array] void elements (空要素)
|
|
12
|
+
|
|
13
|
+
MATCH_TAG_SNG = ['texarea', 'select', 'option', 'form', 'fieldset', 'figure', 'figcaption', 'video', 'audio', 'progress', 'meter', 'time', 'ruby', 'rt', 'rp', 'datalist', 'output'] #[Array] non-nestable elements (入れ子にできない要素)
|
|
14
|
+
|
|
15
|
+
ATTR_LOGIC = ['disabled', 'readonly', 'checked', 'selected', 'multiple', 'required'] #[Array] boolean attributes (論理値で指定する属性)
|
|
16
|
+
|
|
17
|
+
DISABLE_ELEMENT = ['input', 'textarea', 'select', 'optgroup', 'fieldset'] #[Array] elements with the disabled attribute (disabled属性のある要素)
|
|
18
|
+
|
|
19
|
+
REQUIRE_ELEMENT = ['input', 'textarea'] #[Array] elements with the required attribute (required属性のある要素)
|
|
20
|
+
|
|
21
|
+
REQUIRED_M = '\\srequired\\s|\\srequired$|\\sREQUIRED\\s|\\sREQUIRED$'
|
|
22
|
+
# REQUIRED_M = [' required ',' required',' REQUIRED ',' REQUIRED']
|
|
23
|
+
REQUIRED_R = 'required\\s|required$|REQUIRED\\s|REQUIRED$'
|
|
24
|
+
|
|
25
|
+
@@pattern_required_m = Regexp.new(REQUIRED_M)
|
|
26
|
+
@@pattern_required_r = Regexp.new(REQUIRED_R)
|
|
27
|
+
|
|
28
|
+
#
|
|
29
|
+
# initializer (イニシャライザ)
|
|
30
|
+
# @overload initialize
|
|
31
|
+
# @overload initialize(ps)
|
|
32
|
+
# @param [Meteor::Parser] ps paser (パーサ)
|
|
33
|
+
#
|
|
34
|
+
def initialize(*args)
|
|
35
|
+
super()
|
|
36
|
+
@@match_tag = MATCH_TAG
|
|
37
|
+
@@match_tag_sng = MATCH_TAG_SNG
|
|
38
|
+
@@attr_logic = ATTR_LOGIC
|
|
39
|
+
@doc_type = Parser::HTML
|
|
40
|
+
case args.length
|
|
41
|
+
when ZERO
|
|
42
|
+
# initialize_0
|
|
43
|
+
when ONE
|
|
44
|
+
initialize_1(args[0])
|
|
45
|
+
else
|
|
46
|
+
raise ArgumentError
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
#
|
|
51
|
+
# initializer (イニシャライザ)
|
|
52
|
+
#
|
|
53
|
+
# def initialize_0
|
|
54
|
+
# end
|
|
55
|
+
#
|
|
56
|
+
# private :initialize_0
|
|
57
|
+
|
|
58
|
+
#
|
|
59
|
+
# initializer (イニシャライザ)
|
|
60
|
+
# @param [Meteor::Parser] ps parser (パーサ)
|
|
61
|
+
#
|
|
62
|
+
def initialize_1(ps)
|
|
63
|
+
@root.document = String.new(ps.document)
|
|
64
|
+
self.document_hook = String.new(ps.document_hook)
|
|
65
|
+
@root.content_type = String.new(ps.root_element.content_type)
|
|
66
|
+
@root.charset = ps.root_element.charset
|
|
67
|
+
@root.kaigyo_code = ps.root_element.kaigyo_code
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
private :initialize_1
|
|
71
|
+
|
|
72
|
+
#
|
|
73
|
+
# analyze document , set content type (ドキュメントをパースし、コンテントタイプをセットする)
|
|
74
|
+
#
|
|
75
|
+
def analyze_content_type
|
|
76
|
+
@error_check = false
|
|
77
|
+
|
|
78
|
+
element_3('meta', 'charset', '[a-zA-Z-]+', false)
|
|
79
|
+
|
|
80
|
+
if !@elm_
|
|
81
|
+
element_3('meta', 'charset', '[a-zA-Z-]+', false)
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
@error_check = true
|
|
85
|
+
|
|
86
|
+
if @elm_
|
|
87
|
+
@root.charset = @elm_.attr("charset")
|
|
88
|
+
if !@root.charset
|
|
89
|
+
@root.charset = "utf-8"
|
|
90
|
+
end
|
|
91
|
+
else
|
|
92
|
+
@root.charset = "utf-8"
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
@root.content_type = 'text/html'
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
private :analyze_content_type
|
|
99
|
+
|
|
100
|
+
def edit_attrs_(elm, attr_name, attr_value)
|
|
101
|
+
if is_match('selected', attr_name) && is_match('option', elm.name)
|
|
102
|
+
edit_attrs_5(elm, attr_name, attr_value, @@pattern_selected_m, @@pattern_selected_r)
|
|
103
|
+
elsif is_match('multiple', attr_name) && is_match('select', elm.name)
|
|
104
|
+
edit_attrs_5(elm, attr_name, attr_value, @@pattern_multiple_m, @@pattern_multiple_r)
|
|
105
|
+
elsif is_match('disabled', attr_name) && is_match(DISABLE_ELEMENT, elm.name)
|
|
106
|
+
edit_attrs_5(elm, attr_name, attr_value, @@pattern_disabled_m, @@pattern_disabled_r)
|
|
107
|
+
elsif is_match('checked', attr_name) && is_match('input', elm.name) && is_match('radio', get_type(elm))
|
|
108
|
+
edit_attrs_5(elm, attr_name, attr_value, @@pattern_checked_m, @@pattern_checked_r)
|
|
109
|
+
elsif is_match('readonly', attr_name) && (is_match('textarea', elm.name) || (is_match('input', elm.name) && is_match(READONLY_TYPE, get_type(elm))))
|
|
110
|
+
edit_attrs_5(elm, attr_name, attr_value, @@pattern_readonly_m, @@pattern_readonly_r)
|
|
111
|
+
elsif is_match('required', attr_name) && is_match(REQUIRE_ELEMENT, elm.name)
|
|
112
|
+
edit_attrs_5(elm, attr_name, attr_value, @@pattern_required_m, @@pattern_required_r)
|
|
113
|
+
else
|
|
114
|
+
super(elm, attr_name, attr_value)
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
private :edit_attrs_
|
|
119
|
+
|
|
120
|
+
def get_attr_value_(elm, attr_name)
|
|
121
|
+
if is_match('selected', attr_name) && is_match('option', elm.name)
|
|
122
|
+
get_attr_value_r(elm, @@pattern_selected_m)
|
|
123
|
+
elsif is_match('multiple', attr_name) && is_match('select', elm.name)
|
|
124
|
+
get_attr_value_r(elm, @@pattern_multiple_m)
|
|
125
|
+
elsif is_match('disabled', attr_name) && is_match(DISABLE_ELEMENT, elm.name)
|
|
126
|
+
get_attr_value_r(elm, @@pattern_disabled_m)
|
|
127
|
+
elsif is_match('checked', attr_name) && is_match('input', elm.name) && is_match('radio', get_type(elm))
|
|
128
|
+
get_attr_value_r(elm, @@pattern_checked_m)
|
|
129
|
+
elsif is_match('readonly', attr_name) && (is_match('textarea', elm.name) || (is_match('input', elm.name) && is_match(READONLY_TYPE, get_type(elm))))
|
|
130
|
+
get_attr_value_r(elm, @@pattern_readonly_m)
|
|
131
|
+
elsif is_match('required', attr_name) && is_match(REQUIRE_ELEMENT, elm.name)
|
|
132
|
+
get_attr_value_r(elm, @@pattern_required_m)
|
|
133
|
+
else
|
|
134
|
+
super(elm, attr_name)
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
private :get_attr_value_
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end
|