nokogiri-maglev- 1.5.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.autotest +26 -0
- data/.gemtest +0 -0
- data/CHANGELOG.ja.rdoc +544 -0
- data/CHANGELOG.rdoc +532 -0
- data/Manifest.txt +283 -0
- data/README.ja.rdoc +106 -0
- data/README.rdoc +174 -0
- data/Rakefile +171 -0
- data/bin/nokogiri +53 -0
- data/ext/nokogiri/depend +358 -0
- data/ext/nokogiri/extconf.rb +124 -0
- data/ext/nokogiri/html_document.c +154 -0
- data/ext/nokogiri/html_document.h +10 -0
- data/ext/nokogiri/html_element_description.c +276 -0
- data/ext/nokogiri/html_element_description.h +10 -0
- data/ext/nokogiri/html_entity_lookup.c +32 -0
- data/ext/nokogiri/html_entity_lookup.h +8 -0
- data/ext/nokogiri/html_sax_parser_context.c +94 -0
- data/ext/nokogiri/html_sax_parser_context.h +11 -0
- data/ext/nokogiri/nokogiri.c +115 -0
- data/ext/nokogiri/nokogiri.h +160 -0
- data/ext/nokogiri/st.c +576 -0
- data/ext/nokogiri/xml_attr.c +94 -0
- data/ext/nokogiri/xml_attr.h +9 -0
- data/ext/nokogiri/xml_attribute_decl.c +70 -0
- data/ext/nokogiri/xml_attribute_decl.h +9 -0
- data/ext/nokogiri/xml_cdata.c +56 -0
- data/ext/nokogiri/xml_cdata.h +9 -0
- data/ext/nokogiri/xml_comment.c +54 -0
- data/ext/nokogiri/xml_comment.h +9 -0
- data/ext/nokogiri/xml_document.c +478 -0
- data/ext/nokogiri/xml_document.h +23 -0
- data/ext/nokogiri/xml_document_fragment.c +48 -0
- data/ext/nokogiri/xml_document_fragment.h +10 -0
- data/ext/nokogiri/xml_dtd.c +202 -0
- data/ext/nokogiri/xml_dtd.h +10 -0
- data/ext/nokogiri/xml_element_content.c +123 -0
- data/ext/nokogiri/xml_element_content.h +10 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_element_decl.h +9 -0
- data/ext/nokogiri/xml_encoding_handler.c +79 -0
- data/ext/nokogiri/xml_encoding_handler.h +8 -0
- data/ext/nokogiri/xml_entity_decl.c +110 -0
- data/ext/nokogiri/xml_entity_decl.h +10 -0
- data/ext/nokogiri/xml_entity_reference.c +52 -0
- data/ext/nokogiri/xml_entity_reference.h +9 -0
- data/ext/nokogiri/xml_io.c +56 -0
- data/ext/nokogiri/xml_io.h +11 -0
- data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
- data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
- data/ext/nokogiri/xml_namespace.c +84 -0
- data/ext/nokogiri/xml_namespace.h +13 -0
- data/ext/nokogiri/xml_node.c +1397 -0
- data/ext/nokogiri/xml_node.h +13 -0
- data/ext/nokogiri/xml_node_set.c +418 -0
- data/ext/nokogiri/xml_node_set.h +9 -0
- data/ext/nokogiri/xml_processing_instruction.c +56 -0
- data/ext/nokogiri/xml_processing_instruction.h +9 -0
- data/ext/nokogiri/xml_reader.c +684 -0
- data/ext/nokogiri/xml_reader.h +10 -0
- data/ext/nokogiri/xml_relax_ng.c +162 -0
- data/ext/nokogiri/xml_relax_ng.h +9 -0
- data/ext/nokogiri/xml_sax_parser.c +293 -0
- data/ext/nokogiri/xml_sax_parser.h +39 -0
- data/ext/nokogiri/xml_sax_parser_context.c +199 -0
- data/ext/nokogiri/xml_sax_parser_context.h +10 -0
- data/ext/nokogiri/xml_sax_push_parser.c +115 -0
- data/ext/nokogiri/xml_sax_push_parser.h +9 -0
- data/ext/nokogiri/xml_schema.c +205 -0
- data/ext/nokogiri/xml_schema.h +9 -0
- data/ext/nokogiri/xml_syntax_error.c +58 -0
- data/ext/nokogiri/xml_syntax_error.h +13 -0
- data/ext/nokogiri/xml_text.c +50 -0
- data/ext/nokogiri/xml_text.h +9 -0
- data/ext/nokogiri/xml_xpath_context.c +315 -0
- data/ext/nokogiri/xml_xpath_context.h +9 -0
- data/ext/nokogiri/xslt_stylesheet.c +265 -0
- data/ext/nokogiri/xslt_stylesheet.h +9 -0
- data/lib/nokogiri.rb +127 -0
- data/lib/nokogiri/css.rb +27 -0
- data/lib/nokogiri/css/node.rb +99 -0
- data/lib/nokogiri/css/parser.rb +677 -0
- data/lib/nokogiri/css/parser.y +237 -0
- data/lib/nokogiri/css/parser_extras.rb +91 -0
- data/lib/nokogiri/css/syntax_error.rb +7 -0
- data/lib/nokogiri/css/tokenizer.rb +152 -0
- data/lib/nokogiri/css/tokenizer.rex +55 -0
- data/lib/nokogiri/css/xpath_visitor.rb +171 -0
- data/lib/nokogiri/decorators/slop.rb +35 -0
- data/lib/nokogiri/html.rb +36 -0
- data/lib/nokogiri/html/builder.rb +35 -0
- data/lib/nokogiri/html/document.rb +213 -0
- data/lib/nokogiri/html/document_fragment.rb +41 -0
- data/lib/nokogiri/html/element_description.rb +23 -0
- data/lib/nokogiri/html/element_description_defaults.rb +671 -0
- data/lib/nokogiri/html/entity_lookup.rb +13 -0
- data/lib/nokogiri/html/sax/parser.rb +52 -0
- data/lib/nokogiri/html/sax/parser_context.rb +16 -0
- data/lib/nokogiri/syntax_error.rb +4 -0
- data/lib/nokogiri/version.rb +88 -0
- data/lib/nokogiri/xml.rb +67 -0
- data/lib/nokogiri/xml/attr.rb +14 -0
- data/lib/nokogiri/xml/attribute_decl.rb +18 -0
- data/lib/nokogiri/xml/builder.rb +426 -0
- data/lib/nokogiri/xml/cdata.rb +11 -0
- data/lib/nokogiri/xml/character_data.rb +7 -0
- data/lib/nokogiri/xml/document.rb +234 -0
- data/lib/nokogiri/xml/document_fragment.rb +98 -0
- data/lib/nokogiri/xml/dtd.rb +22 -0
- data/lib/nokogiri/xml/element_content.rb +36 -0
- data/lib/nokogiri/xml/element_decl.rb +13 -0
- data/lib/nokogiri/xml/entity_decl.rb +19 -0
- data/lib/nokogiri/xml/namespace.rb +13 -0
- data/lib/nokogiri/xml/node.rb +915 -0
- data/lib/nokogiri/xml/node/save_options.rb +61 -0
- data/lib/nokogiri/xml/node_set.rb +357 -0
- data/lib/nokogiri/xml/notation.rb +6 -0
- data/lib/nokogiri/xml/parse_options.rb +93 -0
- data/lib/nokogiri/xml/pp.rb +2 -0
- data/lib/nokogiri/xml/pp/character_data.rb +18 -0
- data/lib/nokogiri/xml/pp/node.rb +56 -0
- data/lib/nokogiri/xml/processing_instruction.rb +8 -0
- data/lib/nokogiri/xml/reader.rb +112 -0
- data/lib/nokogiri/xml/relax_ng.rb +32 -0
- data/lib/nokogiri/xml/sax.rb +4 -0
- data/lib/nokogiri/xml/sax/document.rb +164 -0
- data/lib/nokogiri/xml/sax/parser.rb +115 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
- data/lib/nokogiri/xml/schema.rb +63 -0
- data/lib/nokogiri/xml/syntax_error.rb +47 -0
- data/lib/nokogiri/xml/text.rb +9 -0
- data/lib/nokogiri/xml/xpath.rb +10 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
- data/lib/nokogiri/xml/xpath_context.rb +16 -0
- data/lib/nokogiri/xslt.rb +52 -0
- data/lib/nokogiri/xslt/stylesheet.rb +25 -0
- data/lib/xsd/xmlparser/nokogiri.rb +90 -0
- data/nokogiri_help_responses.md +40 -0
- data/tasks/cross_compile.rb +152 -0
- data/tasks/nokogiri.org.rb +18 -0
- data/tasks/test.rb +94 -0
- data/test/css/test_nthiness.rb +159 -0
- data/test/css/test_parser.rb +303 -0
- data/test/css/test_tokenizer.rb +198 -0
- data/test/css/test_xpath_visitor.rb +85 -0
- data/test/decorators/test_slop.rb +16 -0
- data/test/files/2ch.html +108 -0
- data/test/files/address_book.rlx +12 -0
- data/test/files/address_book.xml +10 -0
- data/test/files/bar/bar.xsd +4 -0
- data/test/files/dont_hurt_em_why.xml +422 -0
- data/test/files/encoding.html +82 -0
- data/test/files/encoding.xhtml +84 -0
- data/test/files/exslt.xml +8 -0
- data/test/files/exslt.xslt +35 -0
- data/test/files/foo/foo.xsd +4 -0
- data/test/files/metacharset.html +10 -0
- data/test/files/noencoding.html +47 -0
- data/test/files/po.xml +32 -0
- data/test/files/po.xsd +66 -0
- data/test/files/shift_jis.html +10 -0
- data/test/files/shift_jis.xml +5 -0
- data/test/files/snuggles.xml +3 -0
- data/test/files/staff.dtd +10 -0
- data/test/files/staff.xml +59 -0
- data/test/files/staff.xslt +32 -0
- data/test/files/tlm.html +850 -0
- data/test/files/valid_bar.xml +2 -0
- data/test/helper.rb +173 -0
- data/test/html/sax/test_parser.rb +139 -0
- data/test/html/sax/test_parser_context.rb +48 -0
- data/test/html/test_builder.rb +165 -0
- data/test/html/test_document.rb +472 -0
- data/test/html/test_document_encoding.rb +138 -0
- data/test/html/test_document_fragment.rb +255 -0
- data/test/html/test_element_description.rb +101 -0
- data/test/html/test_named_characters.rb +14 -0
- data/test/html/test_node.rb +193 -0
- data/test/html/test_node_encoding.rb +27 -0
- data/test/test_convert_xpath.rb +135 -0
- data/test/test_css_cache.rb +45 -0
- data/test/test_encoding_handler.rb +46 -0
- data/test/test_memory_leak.rb +72 -0
- data/test/test_nokogiri.rb +133 -0
- data/test/test_reader.rb +425 -0
- data/test/test_soap4r_sax.rb +52 -0
- data/test/test_xslt_transforms.rb +193 -0
- data/test/xml/node/test_save_options.rb +28 -0
- data/test/xml/node/test_subclass.rb +44 -0
- data/test/xml/sax/test_parser.rb +338 -0
- data/test/xml/sax/test_parser_context.rb +113 -0
- data/test/xml/sax/test_push_parser.rb +156 -0
- data/test/xml/test_attr.rb +65 -0
- data/test/xml/test_attribute_decl.rb +86 -0
- data/test/xml/test_builder.rb +227 -0
- data/test/xml/test_cdata.rb +50 -0
- data/test/xml/test_comment.rb +29 -0
- data/test/xml/test_document.rb +697 -0
- data/test/xml/test_document_encoding.rb +26 -0
- data/test/xml/test_document_fragment.rb +192 -0
- data/test/xml/test_dtd.rb +107 -0
- data/test/xml/test_dtd_encoding.rb +33 -0
- data/test/xml/test_element_content.rb +56 -0
- data/test/xml/test_element_decl.rb +73 -0
- data/test/xml/test_entity_decl.rb +122 -0
- data/test/xml/test_entity_reference.rb +21 -0
- data/test/xml/test_namespace.rb +70 -0
- data/test/xml/test_node.rb +917 -0
- data/test/xml/test_node_attributes.rb +34 -0
- data/test/xml/test_node_encoding.rb +107 -0
- data/test/xml/test_node_reparenting.rb +334 -0
- data/test/xml/test_node_set.rb +742 -0
- data/test/xml/test_parse_options.rb +52 -0
- data/test/xml/test_processing_instruction.rb +30 -0
- data/test/xml/test_reader_encoding.rb +126 -0
- data/test/xml/test_relax_ng.rb +60 -0
- data/test/xml/test_schema.rb +94 -0
- data/test/xml/test_syntax_error.rb +12 -0
- data/test/xml/test_text.rb +47 -0
- data/test/xml/test_unparented_node.rb +381 -0
- data/test/xml/test_xpath.rb +237 -0
- data/test/xslt/test_custom_functions.rb +94 -0
- data/test/xslt/test_exception_handling.rb +37 -0
- metadata +548 -0
@@ -0,0 +1,265 @@
|
|
1
|
+
#include <xslt_stylesheet.h>
|
2
|
+
|
3
|
+
#include <libxslt/xsltInternals.h>
|
4
|
+
#include <libxslt/xsltutils.h>
|
5
|
+
#include <libxslt/transform.h>
|
6
|
+
#include <libexslt/exslt.h>
|
7
|
+
|
8
|
+
VALUE xslt;
|
9
|
+
|
10
|
+
int vasprintf (char **strp, const char *fmt, va_list ap);
|
11
|
+
void vasprintf_free (void *p);
|
12
|
+
|
13
|
+
static void dealloc(xsltStylesheetPtr doc)
|
14
|
+
{
|
15
|
+
NOKOGIRI_DEBUG_START(doc);
|
16
|
+
xsltFreeStylesheet(doc); /* commented out for now. */
|
17
|
+
NOKOGIRI_DEBUG_END(doc);
|
18
|
+
}
|
19
|
+
|
20
|
+
NORETURN(static void xslt_generic_error_handler(void * ctx, const char *msg, ...));
|
21
|
+
static void xslt_generic_error_handler(void * ctx, const char *msg, ...)
|
22
|
+
{
|
23
|
+
char * message;
|
24
|
+
VALUE exception;
|
25
|
+
|
26
|
+
va_list args;
|
27
|
+
va_start(args, msg);
|
28
|
+
vasprintf(&message, msg, args);
|
29
|
+
va_end(args);
|
30
|
+
|
31
|
+
exception = rb_exc_new2(rb_eRuntimeError, message);
|
32
|
+
vasprintf_free(message);
|
33
|
+
rb_exc_raise(exception);
|
34
|
+
}
|
35
|
+
|
36
|
+
/*
|
37
|
+
* call-seq:
|
38
|
+
* parse_stylesheet_doc(document)
|
39
|
+
*
|
40
|
+
* Parse a stylesheet from +document+.
|
41
|
+
*/
|
42
|
+
static VALUE parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
|
43
|
+
{
|
44
|
+
xmlDocPtr xml ;
|
45
|
+
xsltStylesheetPtr ss ;
|
46
|
+
Data_Get_Struct(xmldocobj, xmlDoc, xml);
|
47
|
+
exsltRegisterAll();
|
48
|
+
|
49
|
+
xsltSetGenericErrorFunc(NULL, xslt_generic_error_handler);
|
50
|
+
|
51
|
+
ss = xsltParseStylesheetDoc(xmlCopyDoc(xml, 1)); /* 1 => recursive */
|
52
|
+
|
53
|
+
xsltSetGenericErrorFunc(NULL, NULL);
|
54
|
+
|
55
|
+
return Data_Wrap_Struct(klass, NULL, dealloc, ss);
|
56
|
+
}
|
57
|
+
|
58
|
+
|
59
|
+
/*
|
60
|
+
* call-seq:
|
61
|
+
* serialize(document)
|
62
|
+
*
|
63
|
+
* Serialize +document+ to an xml string.
|
64
|
+
*/
|
65
|
+
static VALUE serialize(VALUE self, VALUE xmlobj)
|
66
|
+
{
|
67
|
+
xmlDocPtr xml ;
|
68
|
+
xsltStylesheetPtr ss ;
|
69
|
+
xmlChar* doc_ptr ;
|
70
|
+
int doc_len ;
|
71
|
+
VALUE rval ;
|
72
|
+
|
73
|
+
Data_Get_Struct(xmlobj, xmlDoc, xml);
|
74
|
+
Data_Get_Struct(self, xsltStylesheet, ss);
|
75
|
+
xsltSaveResultToString(&doc_ptr, &doc_len, xml, ss);
|
76
|
+
rval = NOKOGIRI_STR_NEW(doc_ptr, doc_len);
|
77
|
+
xmlFree(doc_ptr);
|
78
|
+
return rval ;
|
79
|
+
}
|
80
|
+
|
81
|
+
/*
|
82
|
+
* call-seq:
|
83
|
+
* transform(document, params = [])
|
84
|
+
*
|
85
|
+
* Apply an XSLT stylesheet to an XML::Document.
|
86
|
+
* +params+ is an array of strings used as XSLT parameters.
|
87
|
+
* returns Nokogiri::XML::Document
|
88
|
+
*
|
89
|
+
* Example:
|
90
|
+
*
|
91
|
+
* doc = Nokogiri::XML(File.read(ARGV[0]))
|
92
|
+
* xslt = Nokogiri::XSLT(File.read(ARGV[1]))
|
93
|
+
* puts xslt.transform(doc, ['key', 'value'])
|
94
|
+
*
|
95
|
+
*/
|
96
|
+
static VALUE transform(int argc, VALUE* argv, VALUE self)
|
97
|
+
{
|
98
|
+
VALUE xmldoc, paramobj ;
|
99
|
+
xmlDocPtr xml ;
|
100
|
+
xmlDocPtr result ;
|
101
|
+
xsltStylesheetPtr ss ;
|
102
|
+
const char** params ;
|
103
|
+
long param_len, j ;
|
104
|
+
|
105
|
+
rb_scan_args(argc, argv, "11", &xmldoc, ¶mobj);
|
106
|
+
if (NIL_P(paramobj)) { paramobj = rb_ary_new2(0L) ; }
|
107
|
+
if (!RTEST(rb_obj_is_kind_of(xmldoc, cNokogiriXmlDocument)))
|
108
|
+
rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::Document");
|
109
|
+
|
110
|
+
/* handle hashes as arguments. */
|
111
|
+
if(T_HASH == TYPE(paramobj)) {
|
112
|
+
paramobj = rb_funcall(paramobj, rb_intern("to_a"), 0);
|
113
|
+
paramobj = rb_funcall(paramobj, rb_intern("flatten"), 0);
|
114
|
+
}
|
115
|
+
|
116
|
+
Check_Type(paramobj, T_ARRAY);
|
117
|
+
|
118
|
+
Data_Get_Struct(xmldoc, xmlDoc, xml);
|
119
|
+
Data_Get_Struct(self, xsltStylesheet, ss);
|
120
|
+
|
121
|
+
param_len = RARRAY_LEN(paramobj);
|
122
|
+
params = calloc((size_t)param_len+1, sizeof(char*));
|
123
|
+
for (j = 0 ; j < param_len ; j++) {
|
124
|
+
VALUE entry = rb_ary_entry(paramobj, j);
|
125
|
+
const char * ptr = StringValuePtr(entry);
|
126
|
+
params[j] = ptr;
|
127
|
+
}
|
128
|
+
params[param_len] = 0 ;
|
129
|
+
|
130
|
+
result = xsltApplyStylesheet(ss, xml, params);
|
131
|
+
free(params);
|
132
|
+
|
133
|
+
if (!result) rb_raise(rb_eRuntimeError, "could not perform xslt transform on document");
|
134
|
+
|
135
|
+
return Nokogiri_wrap_xml_document((VALUE)0, result) ;
|
136
|
+
}
|
137
|
+
|
138
|
+
static void method_caller(xmlXPathParserContextPtr ctxt, int nargs)
|
139
|
+
{
|
140
|
+
const xmlChar * function;
|
141
|
+
const xmlChar * functionURI;
|
142
|
+
size_t i, count;
|
143
|
+
|
144
|
+
xsltTransformContextPtr transform;
|
145
|
+
xmlXPathObjectPtr xpath;
|
146
|
+
VALUE obj;
|
147
|
+
VALUE *args;
|
148
|
+
VALUE result;
|
149
|
+
|
150
|
+
transform = xsltXPathGetTransformContext(ctxt);
|
151
|
+
|
152
|
+
function = ctxt->context->function;
|
153
|
+
functionURI = ctxt->context->functionURI;
|
154
|
+
obj = (VALUE)xsltGetExtData(transform, functionURI);
|
155
|
+
|
156
|
+
count = (size_t)ctxt->valueNr;
|
157
|
+
args = calloc(count, sizeof(VALUE *));
|
158
|
+
|
159
|
+
for(i = 0; i < count; i++) {
|
160
|
+
VALUE thing;
|
161
|
+
|
162
|
+
xpath = valuePop(ctxt);
|
163
|
+
switch(xpath->type) {
|
164
|
+
case XPATH_STRING:
|
165
|
+
thing = NOKOGIRI_STR_NEW2(xpath->stringval);
|
166
|
+
break;
|
167
|
+
case XPATH_NODESET:
|
168
|
+
if(NULL == xpath->nodesetval) {
|
169
|
+
thing = Nokogiri_wrap_xml_node_set(
|
170
|
+
xmlXPathNodeSetCreate(NULL),
|
171
|
+
DOC_RUBY_OBJECT(ctxt->context->doc));
|
172
|
+
} else {
|
173
|
+
thing = Nokogiri_wrap_xml_node_set(xpath->nodesetval,
|
174
|
+
DOC_RUBY_OBJECT(ctxt->context->doc));
|
175
|
+
}
|
176
|
+
break;
|
177
|
+
default:
|
178
|
+
rb_raise(rb_eRuntimeError, "do not handle type: %d", xpath->type);
|
179
|
+
}
|
180
|
+
args[i] = thing;
|
181
|
+
}
|
182
|
+
result = rb_funcall3(obj, rb_intern((const char *)function), (int)count, args);
|
183
|
+
switch(TYPE(result)) {
|
184
|
+
case T_FLOAT:
|
185
|
+
case T_BIGNUM:
|
186
|
+
case T_FIXNUM:
|
187
|
+
xmlXPathReturnNumber(ctxt, NUM2DBL(result));
|
188
|
+
break;
|
189
|
+
case T_STRING:
|
190
|
+
xmlXPathReturnString(
|
191
|
+
ctxt,
|
192
|
+
xmlStrdup((xmlChar *)StringValuePtr(result))
|
193
|
+
);
|
194
|
+
break;
|
195
|
+
case T_TRUE:
|
196
|
+
xmlXPathReturnTrue(ctxt);
|
197
|
+
break;
|
198
|
+
case T_FALSE:
|
199
|
+
xmlXPathReturnFalse(ctxt);
|
200
|
+
break;
|
201
|
+
case T_NIL:
|
202
|
+
break;
|
203
|
+
default:
|
204
|
+
rb_raise(rb_eRuntimeError, "Invalid return type");
|
205
|
+
}
|
206
|
+
}
|
207
|
+
|
208
|
+
static void * initFunc(xsltTransformContextPtr ctxt, const xmlChar *uri)
|
209
|
+
{
|
210
|
+
VALUE modules = rb_iv_get(xslt, "@modules");
|
211
|
+
VALUE obj = rb_hash_aref(modules, rb_str_new2((const char *)uri));
|
212
|
+
VALUE args = { Qfalse };
|
213
|
+
VALUE methods = rb_funcall(obj, rb_intern("instance_methods"), 1, args);
|
214
|
+
int i;
|
215
|
+
|
216
|
+
for(i = 0; i < RARRAY_LEN(methods); i++) {
|
217
|
+
VALUE elem = rb_ary_entry(methods, i); // maglev, replaces RARRAY_PTR
|
218
|
+
VALUE method_name = rb_obj_as_string(elem);
|
219
|
+
xsltRegisterExtFunction(ctxt,
|
220
|
+
(unsigned char *)StringValuePtr(method_name), uri, method_caller);
|
221
|
+
}
|
222
|
+
|
223
|
+
return (void *)rb_class_new_instance(0, NULL, obj);
|
224
|
+
}
|
225
|
+
|
226
|
+
static void shutdownFunc(xsltTransformContextPtr ctxt,
|
227
|
+
const xmlChar *uri, void *data)
|
228
|
+
{
|
229
|
+
}
|
230
|
+
|
231
|
+
/*
|
232
|
+
* call-seq:
|
233
|
+
* register(uri, custom_handler_class)
|
234
|
+
*
|
235
|
+
* Register a class that implements custom XLST transformation functions.
|
236
|
+
*/
|
237
|
+
static VALUE registr(VALUE self, VALUE uri, VALUE obj)
|
238
|
+
{
|
239
|
+
VALUE modules = rb_iv_get(self, "@modules");
|
240
|
+
if(NIL_P(modules)) rb_raise(rb_eRuntimeError, "wtf! @modules isn't set");
|
241
|
+
|
242
|
+
rb_hash_aset(modules, uri, obj);
|
243
|
+
xsltRegisterExtModule((unsigned char *)StringValuePtr(uri), initFunc, shutdownFunc);
|
244
|
+
return self;
|
245
|
+
}
|
246
|
+
|
247
|
+
VALUE cNokogiriXsltStylesheet ;
|
248
|
+
void init_xslt_stylesheet()
|
249
|
+
{
|
250
|
+
VALUE nokogiri;
|
251
|
+
VALUE klass;
|
252
|
+
|
253
|
+
nokogiri = rb_define_module("Nokogiri");
|
254
|
+
xslt = rb_define_module_under(nokogiri, "XSLT");
|
255
|
+
klass = rb_define_class_under(xslt, "Stylesheet", rb_cObject);
|
256
|
+
|
257
|
+
rb_iv_set(xslt, "@modules", rb_hash_new());
|
258
|
+
|
259
|
+
cNokogiriXsltStylesheet = klass;
|
260
|
+
|
261
|
+
rb_define_singleton_method(klass, "parse_stylesheet_doc", parse_stylesheet_doc, 1);
|
262
|
+
rb_define_singleton_method(xslt, "register", registr, 2);
|
263
|
+
rb_define_method(klass, "serialize", serialize, 1);
|
264
|
+
rb_define_method(klass, "transform", transform, -1);
|
265
|
+
}
|
data/lib/nokogiri.rb
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# Modify the PATH on windows so that the external DLLs will get loaded.
|
3
|
+
|
4
|
+
require 'rbconfig'
|
5
|
+
ENV['PATH'] = [File.expand_path(
|
6
|
+
File.join(File.dirname(__FILE__), "..", "ext", "nokogiri")
|
7
|
+
), ENV['PATH']].compact.join(';') if RbConfig::CONFIG['host_os'] =~ /(mswin|mingw)/i
|
8
|
+
|
9
|
+
if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby"
|
10
|
+
# The line below caused a problem on non-GAE rack environment.
|
11
|
+
# unless defined?(JRuby::Rack::VERSION) || defined?(AppEngine::ApiProxy)
|
12
|
+
#
|
13
|
+
# However, simply cutting defined?(JRuby::Rack::VERSION) off resulted in
|
14
|
+
# an unable-to-load-nokogiri problem. Thus, now, Nokogiri checks the presense
|
15
|
+
# of appengine-rack.jar in $LOAD_PATH. If Nokogiri is on GAE, Nokogiri
|
16
|
+
# should skip loading xml jars. This is because those are in WEB-INF/lib and
|
17
|
+
# already set in the classpath.
|
18
|
+
unless $LOAD_PATH.to_s.include?("appengine-rack")
|
19
|
+
require 'isorelax.jar'
|
20
|
+
require 'jing.jar'
|
21
|
+
require 'nekohtml.jar'
|
22
|
+
require 'nekodtd.jar'
|
23
|
+
require 'xercesImpl.jar'
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
require 'nokogiri/nokogiri'
|
28
|
+
require 'nokogiri/version'
|
29
|
+
require 'nokogiri/syntax_error'
|
30
|
+
require 'nokogiri/xml'
|
31
|
+
require 'nokogiri/xslt'
|
32
|
+
require 'nokogiri/html'
|
33
|
+
require 'nokogiri/decorators/slop'
|
34
|
+
require 'nokogiri/css'
|
35
|
+
require 'nokogiri/html/builder'
|
36
|
+
|
37
|
+
# Nokogiri parses and searches XML/HTML very quickly, and also has
|
38
|
+
# correctly implemented CSS3 selector support as well as XPath support.
|
39
|
+
#
|
40
|
+
# Parsing a document returns either a Nokogiri::XML::Document, or a
|
41
|
+
# Nokogiri::HTML::Document depending on the kind of document you parse.
|
42
|
+
#
|
43
|
+
# Here is an example:
|
44
|
+
#
|
45
|
+
# require 'nokogiri'
|
46
|
+
# require 'open-uri'
|
47
|
+
#
|
48
|
+
# # Get a Nokogiri::HTML:Document for the page we’re interested in...
|
49
|
+
#
|
50
|
+
# doc = Nokogiri::HTML(open('http://www.google.com/search?q=tenderlove'))
|
51
|
+
#
|
52
|
+
# # Do funky things with it using Nokogiri::XML::Node methods...
|
53
|
+
#
|
54
|
+
# ####
|
55
|
+
# # Search for nodes by css
|
56
|
+
# doc.css('h3.r a.l').each do |link|
|
57
|
+
# puts link.content
|
58
|
+
# end
|
59
|
+
#
|
60
|
+
# See Nokogiri::XML::Node#css for more information about CSS searching.
|
61
|
+
# See Nokogiri::XML::Node#xpath for more information about XPath searching.
|
62
|
+
module Nokogiri
|
63
|
+
class << self
|
64
|
+
###
|
65
|
+
# Parse an HTML or XML document. +string+ contains the document.
|
66
|
+
def parse string, url = nil, encoding = nil, options = nil
|
67
|
+
doc =
|
68
|
+
if string.respond_to?(:read) ||
|
69
|
+
string =~ /^\s*<[^Hh>]*html/i # Probably html
|
70
|
+
Nokogiri.HTML(
|
71
|
+
string,
|
72
|
+
url,
|
73
|
+
encoding, options || XML::ParseOptions::DEFAULT_HTML
|
74
|
+
)
|
75
|
+
else
|
76
|
+
Nokogiri.XML(string, url, encoding,
|
77
|
+
options || XML::ParseOptions::DEFAULT_XML)
|
78
|
+
end
|
79
|
+
yield doc if block_given?
|
80
|
+
doc
|
81
|
+
end
|
82
|
+
|
83
|
+
###
|
84
|
+
# Create a new Nokogiri::XML::DocumentFragment
|
85
|
+
def make input = nil, opts = {}, &blk
|
86
|
+
if input
|
87
|
+
Nokogiri::HTML.fragment(input).children.first
|
88
|
+
else
|
89
|
+
Nokogiri(&blk)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
###
|
94
|
+
# Parse a document and add the Slop decorator. The Slop decorator
|
95
|
+
# implements method_missing such that methods may be used instead of CSS
|
96
|
+
# or XPath. For example:
|
97
|
+
#
|
98
|
+
# doc = Nokogiri::Slop(<<-eohtml)
|
99
|
+
# <html>
|
100
|
+
# <body>
|
101
|
+
# <p>first</p>
|
102
|
+
# <p>second</p>
|
103
|
+
# </body>
|
104
|
+
# </html>
|
105
|
+
# eohtml
|
106
|
+
# assert_equal('second', doc.html.body.p[1].text)
|
107
|
+
#
|
108
|
+
def Slop(*args, &block)
|
109
|
+
Nokogiri(*args, &block).slop!
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
###
|
115
|
+
# Parser a document contained in +args+. Nokogiri will try to guess what
|
116
|
+
# type of document you are attempting to parse. For more information, see
|
117
|
+
# Nokogiri.parse
|
118
|
+
#
|
119
|
+
# To specify the type of document, use Nokogiri.XML or Nokogiri.HTML.
|
120
|
+
def Nokogiri(*args, &block)
|
121
|
+
if block_given?
|
122
|
+
builder = Nokogiri::HTML::Builder.new(&block)
|
123
|
+
return builder.doc.root
|
124
|
+
else
|
125
|
+
Nokogiri.parse(*args)
|
126
|
+
end
|
127
|
+
end
|
data/lib/nokogiri/css.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'nokogiri/css/node'
|
2
|
+
require 'nokogiri/css/xpath_visitor'
|
3
|
+
x = $-w
|
4
|
+
$-w = false
|
5
|
+
require 'nokogiri/css/parser'
|
6
|
+
$-w = x
|
7
|
+
|
8
|
+
require 'nokogiri/css/tokenizer'
|
9
|
+
require 'nokogiri/css/syntax_error'
|
10
|
+
|
11
|
+
module Nokogiri
|
12
|
+
module CSS
|
13
|
+
class << self
|
14
|
+
###
|
15
|
+
# Parse this CSS selector in +selector+. Returns an AST.
|
16
|
+
def parse selector
|
17
|
+
Parser.new.parse selector
|
18
|
+
end
|
19
|
+
|
20
|
+
###
|
21
|
+
# Get the XPath for +selector+.
|
22
|
+
def xpath_for selector, options={}
|
23
|
+
Parser.new(options[:ns] || {}).xpath_for selector, options
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,99 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module CSS
|
3
|
+
class Node
|
4
|
+
# Get the type of this node
|
5
|
+
attr_accessor :type
|
6
|
+
# Get the value of this node
|
7
|
+
attr_accessor :value
|
8
|
+
|
9
|
+
# Create a new Node with +type+ and +value+
|
10
|
+
def initialize type, value
|
11
|
+
@type = type
|
12
|
+
@value = value
|
13
|
+
end
|
14
|
+
|
15
|
+
# Accept +visitor+
|
16
|
+
def accept visitor
|
17
|
+
visitor.send(:"visit_#{type.to_s.downcase}", self)
|
18
|
+
end
|
19
|
+
|
20
|
+
###
|
21
|
+
# Convert this CSS node to xpath with +prefix+ using +visitor+
|
22
|
+
def to_xpath prefix = '//', visitor = XPathVisitor.new
|
23
|
+
self.preprocess!
|
24
|
+
prefix + visitor.accept(self)
|
25
|
+
end
|
26
|
+
|
27
|
+
# Preprocess this node tree
|
28
|
+
def preprocess!
|
29
|
+
### Deal with nth-child
|
30
|
+
matches = find_by_type(
|
31
|
+
[:CONDITIONAL_SELECTOR,
|
32
|
+
[:ELEMENT_NAME],
|
33
|
+
[:PSEUDO_CLASS,
|
34
|
+
[:FUNCTION]
|
35
|
+
]
|
36
|
+
]
|
37
|
+
)
|
38
|
+
matches.each do |match|
|
39
|
+
if match.value[1].value[0].value[0] =~ /^nth-(last-)?child/
|
40
|
+
tag_name = match.value[0].value.first
|
41
|
+
match.value[0].value = ['*']
|
42
|
+
match.value[1] = Node.new(:COMBINATOR, [
|
43
|
+
match.value[1].value[0],
|
44
|
+
Node.new(:FUNCTION, ['self(', tag_name])
|
45
|
+
])
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
### Deal with first-child, last-child
|
50
|
+
matches = find_by_type(
|
51
|
+
[:CONDITIONAL_SELECTOR,
|
52
|
+
[:ELEMENT_NAME], [:PSEUDO_CLASS]
|
53
|
+
])
|
54
|
+
matches.each do |match|
|
55
|
+
if ['first-child', 'last-child'].include?(match.value[1].value.first)
|
56
|
+
which = match.value[1].value.first.gsub(/-\w*$/, '')
|
57
|
+
tag_name = match.value[0].value.first
|
58
|
+
match.value[0].value = ['*']
|
59
|
+
match.value[1] = Node.new(:COMBINATOR, [
|
60
|
+
Node.new(:FUNCTION, ["#{which}("]),
|
61
|
+
Node.new(:FUNCTION, ['self(', tag_name])
|
62
|
+
])
|
63
|
+
elsif 'only-child' == match.value[1].value.first
|
64
|
+
tag_name = match.value[0].value.first
|
65
|
+
match.value[0].value = ['*']
|
66
|
+
match.value[1] = Node.new(:COMBINATOR, [
|
67
|
+
Node.new(:FUNCTION, ["#{match.value[1].value.first}("]),
|
68
|
+
Node.new(:FUNCTION, ['self(', tag_name])
|
69
|
+
])
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
self
|
74
|
+
end
|
75
|
+
|
76
|
+
# Find a node by type using +types+
|
77
|
+
def find_by_type types
|
78
|
+
matches = []
|
79
|
+
matches << self if to_type == types
|
80
|
+
@value.each do |v|
|
81
|
+
matches += v.find_by_type(types) if v.respond_to?(:find_by_type)
|
82
|
+
end
|
83
|
+
matches
|
84
|
+
end
|
85
|
+
|
86
|
+
# Convert to_type
|
87
|
+
def to_type
|
88
|
+
[@type] + @value.map { |n|
|
89
|
+
n.to_type if n.respond_to?(:to_type)
|
90
|
+
}.compact
|
91
|
+
end
|
92
|
+
|
93
|
+
# Convert to array
|
94
|
+
def to_a
|
95
|
+
[@type] + @value.map { |n| n.respond_to?(:to_a) ? n.to_a : [n] }
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|