superfeedr-nokogiri 1.4.0.20091116183308
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.autotest +27 -0
- data/CHANGELOG.ja.rdoc +330 -0
- data/CHANGELOG.rdoc +314 -0
- data/Manifest.txt +269 -0
- data/README.ja.rdoc +105 -0
- data/README.rdoc +118 -0
- data/Rakefile +244 -0
- data/bin/nokogiri +49 -0
- data/ext/nokogiri/extconf.rb +145 -0
- data/ext/nokogiri/html_document.c +145 -0
- data/ext/nokogiri/html_document.h +10 -0
- data/ext/nokogiri/html_element_description.c +272 -0
- data/ext/nokogiri/html_element_description.h +10 -0
- data/ext/nokogiri/html_entity_lookup.c +32 -0
- data/ext/nokogiri/html_entity_lookup.h +8 -0
- data/ext/nokogiri/html_sax_parser_context.c +92 -0
- data/ext/nokogiri/html_sax_parser_context.h +11 -0
- data/ext/nokogiri/nokogiri.c +89 -0
- data/ext/nokogiri/nokogiri.h +145 -0
- data/ext/nokogiri/xml_attr.c +92 -0
- data/ext/nokogiri/xml_attr.h +9 -0
- data/ext/nokogiri/xml_attribute_decl.c +67 -0
- data/ext/nokogiri/xml_attribute_decl.h +9 -0
- data/ext/nokogiri/xml_cdata.c +54 -0
- data/ext/nokogiri/xml_cdata.h +9 -0
- data/ext/nokogiri/xml_comment.c +52 -0
- data/ext/nokogiri/xml_comment.h +9 -0
- data/ext/nokogiri/xml_document.c +388 -0
- data/ext/nokogiri/xml_document.h +24 -0
- data/ext/nokogiri/xml_document_fragment.c +46 -0
- data/ext/nokogiri/xml_document_fragment.h +10 -0
- data/ext/nokogiri/xml_dtd.c +192 -0
- data/ext/nokogiri/xml_dtd.h +10 -0
- data/ext/nokogiri/xml_element_content.c +123 -0
- data/ext/nokogiri/xml_element_content.h +10 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_element_decl.h +9 -0
- data/ext/nokogiri/xml_entity_decl.c +97 -0
- data/ext/nokogiri/xml_entity_decl.h +10 -0
- data/ext/nokogiri/xml_entity_reference.c +50 -0
- data/ext/nokogiri/xml_entity_reference.h +9 -0
- data/ext/nokogiri/xml_io.c +31 -0
- data/ext/nokogiri/xml_io.h +11 -0
- data/ext/nokogiri/xml_namespace.c +74 -0
- data/ext/nokogiri/xml_namespace.h +12 -0
- data/ext/nokogiri/xml_node.c +1060 -0
- data/ext/nokogiri/xml_node.h +13 -0
- data/ext/nokogiri/xml_node_set.c +397 -0
- data/ext/nokogiri/xml_node_set.h +9 -0
- data/ext/nokogiri/xml_processing_instruction.c +54 -0
- data/ext/nokogiri/xml_processing_instruction.h +9 -0
- data/ext/nokogiri/xml_reader.c +593 -0
- data/ext/nokogiri/xml_reader.h +10 -0
- data/ext/nokogiri/xml_relax_ng.c +159 -0
- data/ext/nokogiri/xml_relax_ng.h +9 -0
- data/ext/nokogiri/xml_sax_parser.c +286 -0
- data/ext/nokogiri/xml_sax_parser.h +43 -0
- data/ext/nokogiri/xml_sax_parser_context.c +155 -0
- data/ext/nokogiri/xml_sax_parser_context.h +10 -0
- data/ext/nokogiri/xml_sax_push_parser.c +114 -0
- data/ext/nokogiri/xml_sax_push_parser.h +9 -0
- data/ext/nokogiri/xml_schema.c +156 -0
- data/ext/nokogiri/xml_schema.h +9 -0
- data/ext/nokogiri/xml_syntax_error.c +261 -0
- data/ext/nokogiri/xml_syntax_error.h +13 -0
- data/ext/nokogiri/xml_text.c +48 -0
- data/ext/nokogiri/xml_text.h +9 -0
- data/ext/nokogiri/xml_xpath.c +53 -0
- data/ext/nokogiri/xml_xpath.h +11 -0
- data/ext/nokogiri/xml_xpath_context.c +239 -0
- data/ext/nokogiri/xml_xpath_context.h +9 -0
- data/ext/nokogiri/xslt_stylesheet.c +131 -0
- data/ext/nokogiri/xslt_stylesheet.h +9 -0
- data/lib/nokogiri.rb +116 -0
- data/lib/nokogiri/css.rb +25 -0
- data/lib/nokogiri/css/generated_parser.rb +646 -0
- data/lib/nokogiri/css/generated_tokenizer.rb +142 -0
- data/lib/nokogiri/css/node.rb +99 -0
- data/lib/nokogiri/css/parser.rb +82 -0
- data/lib/nokogiri/css/parser.y +227 -0
- data/lib/nokogiri/css/syntax_error.rb +7 -0
- data/lib/nokogiri/css/tokenizer.rb +7 -0
- data/lib/nokogiri/css/tokenizer.rex +54 -0
- data/lib/nokogiri/css/xpath_visitor.rb +162 -0
- data/lib/nokogiri/decorators/slop.rb +33 -0
- data/lib/nokogiri/ffi/html/document.rb +28 -0
- data/lib/nokogiri/ffi/html/element_description.rb +85 -0
- data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
- data/lib/nokogiri/ffi/html/sax/parser_context.rb +38 -0
- data/lib/nokogiri/ffi/io_callbacks.rb +42 -0
- data/lib/nokogiri/ffi/libxml.rb +356 -0
- data/lib/nokogiri/ffi/structs/common_node.rb +26 -0
- data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
- data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
- data/lib/nokogiri/ffi/structs/xml_attribute.rb +27 -0
- data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_document.rb +108 -0
- data/lib/nokogiri/ffi/structs/xml_dtd.rb +28 -0
- data/lib/nokogiri/ffi/structs/xml_element.rb +26 -0
- data/lib/nokogiri/ffi/structs/xml_element_content.rb +17 -0
- data/lib/nokogiri/ffi/structs/xml_entity.rb +32 -0
- data/lib/nokogiri/ffi/structs/xml_enumeration.rb +12 -0
- data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
- data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
- data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
- data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
- data/lib/nokogiri/ffi/structs/xml_parser_context.rb +19 -0
- data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
- data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
- data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +15 -0
- data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
- data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +37 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
- data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
- data/lib/nokogiri/ffi/xml/attr.rb +41 -0
- data/lib/nokogiri/ffi/xml/attribute_decl.rb +27 -0
- data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
- data/lib/nokogiri/ffi/xml/comment.rb +18 -0
- data/lib/nokogiri/ffi/xml/document.rb +135 -0
- data/lib/nokogiri/ffi/xml/document_fragment.rb +21 -0
- data/lib/nokogiri/ffi/xml/dtd.rb +69 -0
- data/lib/nokogiri/ffi/xml/element_content.rb +43 -0
- data/lib/nokogiri/ffi/xml/element_decl.rb +19 -0
- data/lib/nokogiri/ffi/xml/entity_decl.rb +27 -0
- data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
- data/lib/nokogiri/ffi/xml/namespace.rb +44 -0
- data/lib/nokogiri/ffi/xml/node.rb +444 -0
- data/lib/nokogiri/ffi/xml/node_set.rb +133 -0
- data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
- data/lib/nokogiri/ffi/xml/reader.rb +227 -0
- data/lib/nokogiri/ffi/xml/relax_ng.rb +85 -0
- data/lib/nokogiri/ffi/xml/sax/parser.rb +142 -0
- data/lib/nokogiri/ffi/xml/sax/parser_context.rb +67 -0
- data/lib/nokogiri/ffi/xml/sax/push_parser.rb +39 -0
- data/lib/nokogiri/ffi/xml/schema.rb +92 -0
- data/lib/nokogiri/ffi/xml/syntax_error.rb +91 -0
- data/lib/nokogiri/ffi/xml/text.rb +18 -0
- data/lib/nokogiri/ffi/xml/xpath.rb +19 -0
- data/lib/nokogiri/ffi/xml/xpath_context.rb +135 -0
- data/lib/nokogiri/ffi/xslt/stylesheet.rb +47 -0
- data/lib/nokogiri/html.rb +35 -0
- data/lib/nokogiri/html/builder.rb +35 -0
- data/lib/nokogiri/html/document.rb +88 -0
- data/lib/nokogiri/html/document_fragment.rb +15 -0
- data/lib/nokogiri/html/element_description.rb +23 -0
- data/lib/nokogiri/html/entity_lookup.rb +13 -0
- data/lib/nokogiri/html/sax/parser.rb +48 -0
- data/lib/nokogiri/html/sax/parser_context.rb +16 -0
- data/lib/nokogiri/syntax_error.rb +4 -0
- data/lib/nokogiri/version.rb +33 -0
- data/lib/nokogiri/version_warning.rb +11 -0
- data/lib/nokogiri/xml.rb +67 -0
- data/lib/nokogiri/xml/attr.rb +14 -0
- data/lib/nokogiri/xml/attribute_decl.rb +18 -0
- data/lib/nokogiri/xml/builder.rb +405 -0
- data/lib/nokogiri/xml/cdata.rb +11 -0
- data/lib/nokogiri/xml/character_data.rb +7 -0
- data/lib/nokogiri/xml/document.rb +131 -0
- data/lib/nokogiri/xml/document_fragment.rb +69 -0
- data/lib/nokogiri/xml/dtd.rb +11 -0
- data/lib/nokogiri/xml/element_content.rb +36 -0
- data/lib/nokogiri/xml/element_decl.rb +13 -0
- data/lib/nokogiri/xml/entity_decl.rb +15 -0
- data/lib/nokogiri/xml/fragment_handler.rb +71 -0
- data/lib/nokogiri/xml/namespace.rb +13 -0
- data/lib/nokogiri/xml/node.rb +665 -0
- data/lib/nokogiri/xml/node/save_options.rb +42 -0
- data/lib/nokogiri/xml/node_set.rb +307 -0
- data/lib/nokogiri/xml/notation.rb +6 -0
- data/lib/nokogiri/xml/parse_options.rb +85 -0
- data/lib/nokogiri/xml/pp.rb +2 -0
- data/lib/nokogiri/xml/pp/character_data.rb +18 -0
- data/lib/nokogiri/xml/pp/node.rb +56 -0
- data/lib/nokogiri/xml/processing_instruction.rb +8 -0
- data/lib/nokogiri/xml/reader.rb +74 -0
- data/lib/nokogiri/xml/relax_ng.rb +32 -0
- data/lib/nokogiri/xml/sax.rb +4 -0
- data/lib/nokogiri/xml/sax/document.rb +160 -0
- data/lib/nokogiri/xml/sax/parser.rb +115 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
- data/lib/nokogiri/xml/schema.rb +61 -0
- data/lib/nokogiri/xml/syntax_error.rb +38 -0
- data/lib/nokogiri/xml/xpath.rb +10 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +8 -0
- data/lib/nokogiri/xml/xpath_context.rb +16 -0
- data/lib/nokogiri/xslt.rb +48 -0
- data/lib/nokogiri/xslt/stylesheet.rb +25 -0
- data/lib/xsd/xmlparser/nokogiri.rb +71 -0
- data/tasks/test.rb +100 -0
- data/test/css/test_nthiness.rb +159 -0
- data/test/css/test_parser.rb +277 -0
- data/test/css/test_tokenizer.rb +183 -0
- data/test/css/test_xpath_visitor.rb +76 -0
- data/test/ffi/test_document.rb +35 -0
- data/test/files/2ch.html +108 -0
- data/test/files/address_book.rlx +12 -0
- data/test/files/address_book.xml +10 -0
- data/test/files/bar/bar.xsd +4 -0
- data/test/files/dont_hurt_em_why.xml +422 -0
- data/test/files/exslt.xml +8 -0
- data/test/files/exslt.xslt +35 -0
- data/test/files/foo/foo.xsd +4 -0
- data/test/files/po.xml +32 -0
- data/test/files/po.xsd +66 -0
- data/test/files/shift_jis.html +10 -0
- data/test/files/shift_jis.xml +5 -0
- data/test/files/snuggles.xml +3 -0
- data/test/files/staff.dtd +10 -0
- data/test/files/staff.xml +59 -0
- data/test/files/staff.xslt +32 -0
- data/test/files/tlm.html +850 -0
- data/test/files/valid_bar.xml +2 -0
- data/test/helper.rb +136 -0
- data/test/html/sax/test_parser.rb +64 -0
- data/test/html/sax/test_parser_context.rb +48 -0
- data/test/html/test_builder.rb +164 -0
- data/test/html/test_document.rb +390 -0
- data/test/html/test_document_encoding.rb +77 -0
- data/test/html/test_document_fragment.rb +132 -0
- data/test/html/test_element_description.rb +94 -0
- data/test/html/test_named_characters.rb +14 -0
- data/test/html/test_node.rb +228 -0
- data/test/html/test_node_encoding.rb +27 -0
- data/test/test_convert_xpath.rb +135 -0
- data/test/test_css_cache.rb +45 -0
- data/test/test_gc.rb +15 -0
- data/test/test_memory_leak.rb +77 -0
- data/test/test_nokogiri.rb +134 -0
- data/test/test_reader.rb +358 -0
- data/test/test_xslt_transforms.rb +131 -0
- data/test/xml/node/test_save_options.rb +20 -0
- data/test/xml/node/test_subclass.rb +44 -0
- data/test/xml/sax/test_parser.rb +307 -0
- data/test/xml/sax/test_parser_context.rb +56 -0
- data/test/xml/sax/test_push_parser.rb +131 -0
- data/test/xml/test_attr.rb +38 -0
- data/test/xml/test_attribute_decl.rb +82 -0
- data/test/xml/test_builder.rb +167 -0
- data/test/xml/test_cdata.rb +38 -0
- data/test/xml/test_comment.rb +29 -0
- data/test/xml/test_document.rb +607 -0
- data/test/xml/test_document_encoding.rb +26 -0
- data/test/xml/test_document_fragment.rb +138 -0
- data/test/xml/test_dtd.rb +82 -0
- data/test/xml/test_dtd_encoding.rb +33 -0
- data/test/xml/test_element_content.rb +56 -0
- data/test/xml/test_element_decl.rb +73 -0
- data/test/xml/test_entity_decl.rb +83 -0
- data/test/xml/test_entity_reference.rb +21 -0
- data/test/xml/test_namespace.rb +68 -0
- data/test/xml/test_node.rb +889 -0
- data/test/xml/test_node_attributes.rb +34 -0
- data/test/xml/test_node_encoding.rb +107 -0
- data/test/xml/test_node_set.rb +531 -0
- data/test/xml/test_parse_options.rb +52 -0
- data/test/xml/test_processing_instruction.rb +30 -0
- data/test/xml/test_reader_encoding.rb +126 -0
- data/test/xml/test_relax_ng.rb +60 -0
- data/test/xml/test_schema.rb +89 -0
- data/test/xml/test_syntax_error.rb +27 -0
- data/test/xml/test_text.rb +30 -0
- data/test/xml/test_unparented_node.rb +381 -0
- data/test/xml/test_xpath.rb +106 -0
- metadata +430 -0
@@ -0,0 +1,272 @@
|
|
1
|
+
#include <html_element_description.h>
|
2
|
+
|
3
|
+
/*
|
4
|
+
* call-seq:
|
5
|
+
* required_attributes
|
6
|
+
*
|
7
|
+
* A list of required attributes for this element
|
8
|
+
*/
|
9
|
+
static VALUE required_attributes(VALUE self)
|
10
|
+
{
|
11
|
+
htmlElemDesc * description;
|
12
|
+
Data_Get_Struct(self, htmlElemDesc, description);
|
13
|
+
|
14
|
+
VALUE list = rb_ary_new();
|
15
|
+
|
16
|
+
if(NULL == description->attrs_req) return list;
|
17
|
+
|
18
|
+
int i = 0;
|
19
|
+
while(description->attrs_req[i]) {
|
20
|
+
rb_ary_push(list, NOKOGIRI_STR_NEW2(description->attrs_req[i]));
|
21
|
+
i++;
|
22
|
+
}
|
23
|
+
|
24
|
+
return list;
|
25
|
+
}
|
26
|
+
|
27
|
+
/*
|
28
|
+
* call-seq:
|
29
|
+
* deprecated_attributes
|
30
|
+
*
|
31
|
+
* A list of deprecated attributes for this element
|
32
|
+
*/
|
33
|
+
static VALUE deprecated_attributes(VALUE self)
|
34
|
+
{
|
35
|
+
htmlElemDesc * description;
|
36
|
+
Data_Get_Struct(self, htmlElemDesc, description);
|
37
|
+
|
38
|
+
VALUE list = rb_ary_new();
|
39
|
+
|
40
|
+
if(NULL == description->attrs_depr) return list;
|
41
|
+
|
42
|
+
int i = 0;
|
43
|
+
while(description->attrs_depr[i]) {
|
44
|
+
rb_ary_push(list, NOKOGIRI_STR_NEW2(description->attrs_depr[i]));
|
45
|
+
i++;
|
46
|
+
}
|
47
|
+
|
48
|
+
return list;
|
49
|
+
}
|
50
|
+
|
51
|
+
/*
|
52
|
+
* call-seq:
|
53
|
+
* optional_attributes
|
54
|
+
*
|
55
|
+
* A list of optional attributes for this element
|
56
|
+
*/
|
57
|
+
static VALUE optional_attributes(VALUE self)
|
58
|
+
{
|
59
|
+
htmlElemDesc * description;
|
60
|
+
Data_Get_Struct(self, htmlElemDesc, description);
|
61
|
+
|
62
|
+
VALUE list = rb_ary_new();
|
63
|
+
|
64
|
+
if(NULL == description->attrs_opt) return list;
|
65
|
+
|
66
|
+
int i = 0;
|
67
|
+
while(description->attrs_opt[i]) {
|
68
|
+
rb_ary_push(list, NOKOGIRI_STR_NEW2(description->attrs_opt[i]));
|
69
|
+
i++;
|
70
|
+
}
|
71
|
+
|
72
|
+
return list;
|
73
|
+
}
|
74
|
+
|
75
|
+
/*
|
76
|
+
* call-seq:
|
77
|
+
* default_sub_element
|
78
|
+
*
|
79
|
+
* The default sub element for this element
|
80
|
+
*/
|
81
|
+
static VALUE default_sub_element(VALUE self)
|
82
|
+
{
|
83
|
+
htmlElemDesc * description;
|
84
|
+
Data_Get_Struct(self, htmlElemDesc, description);
|
85
|
+
|
86
|
+
return NOKOGIRI_STR_NEW2(description->defaultsubelt);
|
87
|
+
}
|
88
|
+
|
89
|
+
/*
|
90
|
+
* call-seq:
|
91
|
+
* sub_elements
|
92
|
+
*
|
93
|
+
* A list of allowed sub elements for this element.
|
94
|
+
*/
|
95
|
+
static VALUE sub_elements(VALUE self)
|
96
|
+
{
|
97
|
+
htmlElemDesc * description;
|
98
|
+
Data_Get_Struct(self, htmlElemDesc, description);
|
99
|
+
|
100
|
+
VALUE list = rb_ary_new();
|
101
|
+
|
102
|
+
if(NULL == description->subelts) return list;
|
103
|
+
|
104
|
+
int i = 0;
|
105
|
+
while(description->subelts[i]) {
|
106
|
+
rb_ary_push(list, NOKOGIRI_STR_NEW2(description->subelts[i]));
|
107
|
+
i++;
|
108
|
+
}
|
109
|
+
|
110
|
+
return list;
|
111
|
+
}
|
112
|
+
|
113
|
+
/*
|
114
|
+
* call-seq:
|
115
|
+
* description
|
116
|
+
*
|
117
|
+
* The description for this element
|
118
|
+
*/
|
119
|
+
static VALUE description(VALUE self)
|
120
|
+
{
|
121
|
+
htmlElemDesc * description;
|
122
|
+
Data_Get_Struct(self, htmlElemDesc, description);
|
123
|
+
|
124
|
+
return NOKOGIRI_STR_NEW2(description->desc);
|
125
|
+
}
|
126
|
+
|
127
|
+
/*
|
128
|
+
* call-seq:
|
129
|
+
* inline?
|
130
|
+
*
|
131
|
+
* Is this element an inline element?
|
132
|
+
*/
|
133
|
+
static VALUE inline_eh(VALUE self)
|
134
|
+
{
|
135
|
+
htmlElemDesc * description;
|
136
|
+
Data_Get_Struct(self, htmlElemDesc, description);
|
137
|
+
|
138
|
+
if(description->isinline) return Qtrue;
|
139
|
+
return Qfalse;
|
140
|
+
}
|
141
|
+
|
142
|
+
/*
|
143
|
+
* call-seq:
|
144
|
+
* deprecated?
|
145
|
+
*
|
146
|
+
* Is this element deprecated?
|
147
|
+
*/
|
148
|
+
static VALUE deprecated_eh(VALUE self)
|
149
|
+
{
|
150
|
+
htmlElemDesc * description;
|
151
|
+
Data_Get_Struct(self, htmlElemDesc, description);
|
152
|
+
|
153
|
+
if(description->depr) return Qtrue;
|
154
|
+
return Qfalse;
|
155
|
+
}
|
156
|
+
|
157
|
+
/*
|
158
|
+
* call-seq:
|
159
|
+
* empty?
|
160
|
+
*
|
161
|
+
* Is this an empty element?
|
162
|
+
*/
|
163
|
+
static VALUE empty_eh(VALUE self)
|
164
|
+
{
|
165
|
+
htmlElemDesc * description;
|
166
|
+
Data_Get_Struct(self, htmlElemDesc, description);
|
167
|
+
|
168
|
+
if(description->empty) return Qtrue;
|
169
|
+
return Qfalse;
|
170
|
+
}
|
171
|
+
|
172
|
+
/*
|
173
|
+
* call-seq:
|
174
|
+
* save_end_tag?
|
175
|
+
*
|
176
|
+
* Should the end tag be saved?
|
177
|
+
*/
|
178
|
+
static VALUE save_end_tag_eh(VALUE self)
|
179
|
+
{
|
180
|
+
htmlElemDesc * description;
|
181
|
+
Data_Get_Struct(self, htmlElemDesc, description);
|
182
|
+
|
183
|
+
if(description->saveEndTag) return Qtrue;
|
184
|
+
return Qfalse;
|
185
|
+
}
|
186
|
+
|
187
|
+
/*
|
188
|
+
* call-seq:
|
189
|
+
* implied_end_tag?
|
190
|
+
*
|
191
|
+
* Can the end tag be implied for this tag?
|
192
|
+
*/
|
193
|
+
static VALUE implied_end_tag_eh(VALUE self)
|
194
|
+
{
|
195
|
+
htmlElemDesc * description;
|
196
|
+
Data_Get_Struct(self, htmlElemDesc, description);
|
197
|
+
|
198
|
+
if(description->endTag) return Qtrue;
|
199
|
+
return Qfalse;
|
200
|
+
}
|
201
|
+
|
202
|
+
/*
|
203
|
+
* call-seq:
|
204
|
+
* implied_start_tag?
|
205
|
+
*
|
206
|
+
* Can the start tag be implied for this tag?
|
207
|
+
*/
|
208
|
+
static VALUE implied_start_tag_eh(VALUE self)
|
209
|
+
{
|
210
|
+
htmlElemDesc * description;
|
211
|
+
Data_Get_Struct(self, htmlElemDesc, description);
|
212
|
+
|
213
|
+
if(description->startTag) return Qtrue;
|
214
|
+
return Qfalse;
|
215
|
+
}
|
216
|
+
|
217
|
+
/*
|
218
|
+
* call-seq:
|
219
|
+
* name
|
220
|
+
*
|
221
|
+
* Get the tag name for this ElemementDescription
|
222
|
+
*/
|
223
|
+
static VALUE name(VALUE self)
|
224
|
+
{
|
225
|
+
htmlElemDesc * description;
|
226
|
+
Data_Get_Struct(self, htmlElemDesc, description);
|
227
|
+
|
228
|
+
if(NULL == description->name) return Qnil;
|
229
|
+
return NOKOGIRI_STR_NEW2(description->name);
|
230
|
+
}
|
231
|
+
|
232
|
+
/*
|
233
|
+
* call-seq:
|
234
|
+
* [](tag_name)
|
235
|
+
*
|
236
|
+
* Get ElemementDescription for +tag_name+
|
237
|
+
*/
|
238
|
+
static VALUE get_description(VALUE klass, VALUE tag_name)
|
239
|
+
{
|
240
|
+
const htmlElemDesc * description = htmlTagLookup(
|
241
|
+
(const xmlChar *)StringValuePtr(tag_name)
|
242
|
+
);
|
243
|
+
|
244
|
+
if(NULL == description) return Qnil;
|
245
|
+
return Data_Wrap_Struct(klass, 0, 0, description);
|
246
|
+
}
|
247
|
+
|
248
|
+
VALUE cNokogiriHtmlElementDescription ;
|
249
|
+
void init_html_element_description()
|
250
|
+
{
|
251
|
+
VALUE nokogiri = rb_define_module("Nokogiri");
|
252
|
+
VALUE html = rb_define_module_under(nokogiri, "HTML");
|
253
|
+
VALUE klass = rb_define_class_under(html, "ElementDescription",rb_cObject);
|
254
|
+
|
255
|
+
cNokogiriHtmlElementDescription = klass;
|
256
|
+
|
257
|
+
rb_define_singleton_method(klass, "[]", get_description, 1);
|
258
|
+
|
259
|
+
rb_define_method(klass, "name", name, 0);
|
260
|
+
rb_define_method(klass, "implied_start_tag?", implied_start_tag_eh, 0);
|
261
|
+
rb_define_method(klass, "implied_end_tag?", implied_end_tag_eh, 0);
|
262
|
+
rb_define_method(klass, "save_end_tag?", save_end_tag_eh, 0);
|
263
|
+
rb_define_method(klass, "empty?", empty_eh, 0);
|
264
|
+
rb_define_method(klass, "deprecated?", deprecated_eh, 0);
|
265
|
+
rb_define_method(klass, "inline?", inline_eh, 0);
|
266
|
+
rb_define_method(klass, "description", description, 0);
|
267
|
+
rb_define_method(klass, "sub_elements", sub_elements, 0);
|
268
|
+
rb_define_method(klass, "default_sub_element", default_sub_element, 0);
|
269
|
+
rb_define_method(klass, "optional_attributes", optional_attributes, 0);
|
270
|
+
rb_define_method(klass, "deprecated_attributes", deprecated_attributes, 0);
|
271
|
+
rb_define_method(klass, "required_attributes", required_attributes, 0);
|
272
|
+
}
|
@@ -0,0 +1,32 @@
|
|
1
|
+
#include <html_entity_lookup.h>
|
2
|
+
|
3
|
+
/*
|
4
|
+
* call-seq:
|
5
|
+
* get(key)
|
6
|
+
*
|
7
|
+
* Get the HTML::EntityDescription for +key+
|
8
|
+
*/
|
9
|
+
static VALUE get(VALUE self, VALUE key)
|
10
|
+
{
|
11
|
+
const htmlEntityDesc * desc =
|
12
|
+
htmlEntityLookup((const xmlChar *)StringValuePtr(key));
|
13
|
+
|
14
|
+
if(NULL == desc) return Qnil;
|
15
|
+
VALUE klass = rb_const_get(mNokogiriHtml, rb_intern("EntityDescription"));
|
16
|
+
|
17
|
+
VALUE args[3];
|
18
|
+
args[0] = INT2NUM((long)desc->value);
|
19
|
+
args[1] = NOKOGIRI_STR_NEW2(desc->name);
|
20
|
+
args[2] = NOKOGIRI_STR_NEW2(desc->desc);
|
21
|
+
|
22
|
+
return rb_class_new_instance(3, args, klass);
|
23
|
+
}
|
24
|
+
|
25
|
+
void init_html_entity_lookup()
|
26
|
+
{
|
27
|
+
VALUE nokogiri = rb_define_module("Nokogiri");
|
28
|
+
VALUE html = rb_define_module_under(nokogiri, "HTML");
|
29
|
+
VALUE klass = rb_define_class_under(html, "EntityLookup", rb_cObject);
|
30
|
+
|
31
|
+
rb_define_method(klass, "get", get, 1);
|
32
|
+
}
|
@@ -0,0 +1,92 @@
|
|
1
|
+
#include <html_sax_parser_context.h>
|
2
|
+
|
3
|
+
VALUE cNokogiriHtmlSaxParserContext ;
|
4
|
+
|
5
|
+
static void deallocate(xmlParserCtxtPtr ctxt)
|
6
|
+
{
|
7
|
+
NOKOGIRI_DEBUG_START(handler);
|
8
|
+
|
9
|
+
ctxt->sax = NULL;
|
10
|
+
|
11
|
+
htmlFreeParserCtxt(ctxt);
|
12
|
+
|
13
|
+
NOKOGIRI_DEBUG_END(handler);
|
14
|
+
}
|
15
|
+
|
16
|
+
static VALUE parse_memory(VALUE klass, VALUE data, VALUE encoding)
|
17
|
+
{
|
18
|
+
if(NIL_P(data)) rb_raise(rb_eArgError, "data cannot be nil");
|
19
|
+
if(!(int)RSTRING_LEN(data))
|
20
|
+
rb_raise(rb_eRuntimeError, "data cannot be empty");
|
21
|
+
|
22
|
+
htmlParserCtxtPtr ctxt = htmlCreateMemoryParserCtxt(
|
23
|
+
StringValuePtr(data),
|
24
|
+
(int)RSTRING_LEN(data)
|
25
|
+
);
|
26
|
+
|
27
|
+
if(RTEST(encoding)) {
|
28
|
+
xmlCharEncoding enc = xmlParseCharEncoding(StringValuePtr(encoding));
|
29
|
+
if(enc != XML_CHAR_ENCODING_ERROR) {
|
30
|
+
xmlSwitchEncoding(ctxt, enc);
|
31
|
+
if(ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
|
32
|
+
rb_raise(rb_eRuntimeError, "Unsupported encoding %s",
|
33
|
+
StringValuePtr(encoding));
|
34
|
+
}
|
35
|
+
}
|
36
|
+
}
|
37
|
+
|
38
|
+
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
|
39
|
+
}
|
40
|
+
|
41
|
+
static VALUE parse_file(VALUE klass, VALUE filename, VALUE encoding)
|
42
|
+
{
|
43
|
+
htmlParserCtxtPtr ctxt = htmlCreateFileParserCtxt(
|
44
|
+
StringValuePtr(filename),
|
45
|
+
StringValuePtr(encoding)
|
46
|
+
);
|
47
|
+
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
|
48
|
+
}
|
49
|
+
|
50
|
+
static VALUE parse_with(VALUE self, VALUE sax_handler)
|
51
|
+
{
|
52
|
+
if(!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser))
|
53
|
+
rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
|
54
|
+
|
55
|
+
htmlParserCtxtPtr ctxt;
|
56
|
+
Data_Get_Struct(self, htmlParserCtxt, ctxt);
|
57
|
+
|
58
|
+
htmlSAXHandlerPtr sax;
|
59
|
+
Data_Get_Struct(sax_handler, htmlSAXHandler, sax);
|
60
|
+
|
61
|
+
// Free the sax handler since we'll assign our own
|
62
|
+
if(ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler)
|
63
|
+
xmlFree(ctxt->sax);
|
64
|
+
|
65
|
+
ctxt->sax = sax;
|
66
|
+
ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
|
67
|
+
|
68
|
+
htmlParseDocument(ctxt);
|
69
|
+
|
70
|
+
if(NULL != ctxt->myDoc) xmlFreeDoc(ctxt->myDoc);
|
71
|
+
|
72
|
+
NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
|
73
|
+
return self;
|
74
|
+
}
|
75
|
+
|
76
|
+
void init_html_sax_parser_context()
|
77
|
+
{
|
78
|
+
VALUE nokogiri = rb_define_module("Nokogiri");
|
79
|
+
VALUE xml = rb_define_module_under(nokogiri, "XML");
|
80
|
+
VALUE html = rb_define_module_under(nokogiri, "HTML");
|
81
|
+
VALUE sax = rb_define_module_under(xml, "SAX");
|
82
|
+
VALUE hsax = rb_define_module_under(html, "SAX");
|
83
|
+
VALUE pc = rb_define_class_under(sax, "ParserContext", rb_cObject);
|
84
|
+
VALUE klass = rb_define_class_under(hsax, "ParserContext", pc);
|
85
|
+
|
86
|
+
cNokogiriHtmlSaxParserContext = klass;
|
87
|
+
|
88
|
+
rb_define_singleton_method(klass, "memory", parse_memory, 2);
|
89
|
+
rb_define_singleton_method(klass, "file", parse_file, 2);
|
90
|
+
|
91
|
+
rb_define_method(klass, "parse_with", parse_with, 1);
|
92
|
+
}
|
@@ -0,0 +1,89 @@
|
|
1
|
+
#include <nokogiri.h>
|
2
|
+
|
3
|
+
VALUE mNokogiri ;
|
4
|
+
VALUE mNokogiriXml ;
|
5
|
+
VALUE mNokogiriHtml ;
|
6
|
+
VALUE mNokogiriXslt ;
|
7
|
+
VALUE mNokogiriXmlSax ;
|
8
|
+
VALUE mNokogiriHtmlSax ;
|
9
|
+
|
10
|
+
#ifdef USE_INCLUDED_VASPRINTF
|
11
|
+
/*
|
12
|
+
* I srsly hate windows. it doesn't have vasprintf.
|
13
|
+
* Thank you Geoffroy Couprie for this implementation of vasprintf!
|
14
|
+
*/
|
15
|
+
int vasprintf (char **strp, const char *fmt, va_list ap)
|
16
|
+
{
|
17
|
+
int len = vsnprintf (NULL, 0, fmt, ap) + 1;
|
18
|
+
char *res = (char *)malloc((unsigned int)len);
|
19
|
+
if (res == NULL)
|
20
|
+
return -1;
|
21
|
+
*strp = res;
|
22
|
+
return vsnprintf(res, (unsigned int)len, fmt, ap);
|
23
|
+
}
|
24
|
+
#endif
|
25
|
+
|
26
|
+
int is_2_6_16(void)
|
27
|
+
{
|
28
|
+
return (strcmp(xmlParserVersion, "20616") <= 0) ? 1 : 0 ;
|
29
|
+
}
|
30
|
+
|
31
|
+
void Init_nokogiri()
|
32
|
+
{
|
33
|
+
xmlMemSetup(
|
34
|
+
(xmlFreeFunc)ruby_xfree,
|
35
|
+
(xmlMallocFunc)ruby_xmalloc,
|
36
|
+
(xmlReallocFunc)ruby_xrealloc,
|
37
|
+
strdup
|
38
|
+
);
|
39
|
+
|
40
|
+
mNokogiri = rb_define_module("Nokogiri");
|
41
|
+
mNokogiriXml = rb_define_module_under(mNokogiri, "XML");
|
42
|
+
mNokogiriHtml = rb_define_module_under(mNokogiri, "HTML");
|
43
|
+
mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT");
|
44
|
+
mNokogiriXmlSax = rb_define_module_under(mNokogiriXml, "SAX");
|
45
|
+
mNokogiriHtmlSax = rb_define_module_under(mNokogiriHtml, "SAX");
|
46
|
+
|
47
|
+
rb_const_set( mNokogiri,
|
48
|
+
rb_intern("LIBXML_VERSION"),
|
49
|
+
NOKOGIRI_STR_NEW2(LIBXML_DOTTED_VERSION)
|
50
|
+
);
|
51
|
+
rb_const_set( mNokogiri,
|
52
|
+
rb_intern("LIBXML_PARSER_VERSION"),
|
53
|
+
NOKOGIRI_STR_NEW2(xmlParserVersion)
|
54
|
+
);
|
55
|
+
|
56
|
+
xmlInitParser();
|
57
|
+
|
58
|
+
init_xml_document();
|
59
|
+
init_html_document();
|
60
|
+
init_xml_node();
|
61
|
+
init_xml_document_fragment();
|
62
|
+
init_xml_text();
|
63
|
+
init_xml_cdata();
|
64
|
+
init_xml_processing_instruction();
|
65
|
+
init_xml_attr();
|
66
|
+
init_xml_entity_reference();
|
67
|
+
init_xml_comment();
|
68
|
+
init_xml_node_set();
|
69
|
+
init_xml_xpath_context();
|
70
|
+
init_xml_xpath();
|
71
|
+
init_xml_sax_parser_context();
|
72
|
+
init_xml_sax_parser();
|
73
|
+
init_xml_sax_push_parser();
|
74
|
+
init_xml_reader();
|
75
|
+
init_xml_dtd();
|
76
|
+
init_xml_element_content();
|
77
|
+
init_xml_attribute_decl();
|
78
|
+
init_xml_element_decl();
|
79
|
+
init_xml_entity_decl();
|
80
|
+
init_xml_namespace();
|
81
|
+
init_html_sax_parser_context();
|
82
|
+
init_xslt_stylesheet();
|
83
|
+
init_xml_syntax_error();
|
84
|
+
init_html_entity_lookup();
|
85
|
+
init_html_element_description();
|
86
|
+
init_xml_schema();
|
87
|
+
init_xml_relax_ng();
|
88
|
+
init_nokogiri_io();
|
89
|
+
}
|