fastxml 0.1.91
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +0 -0
- data/README +9 -0
- data/ext/Makefile +153 -0
- data/ext/extconf.rb +13 -0
- data/ext/fastxml.bundle +0 -0
- data/ext/fastxml.c +280 -0
- data/ext/fastxml.h +61 -0
- data/ext/fastxml.o +0 -0
- data/ext/fastxml_attrlist.c +60 -0
- data/ext/fastxml_attrlist.h +11 -0
- data/ext/fastxml_attrlist.o +0 -0
- data/ext/fastxml_doc.c +190 -0
- data/ext/fastxml_doc.h +16 -0
- data/ext/fastxml_doc.o +0 -0
- data/ext/fastxml_node.c +240 -0
- data/ext/fastxml_node.h +21 -0
- data/ext/fastxml_node.o +0 -0
- data/ext/fastxml_nodelist.c +146 -0
- data/ext/fastxml_nodelist.h +13 -0
- data/ext/fastxml_nodelist.o +0 -0
- data/ext/mkmf.log +119 -0
- data/lib/fastxml_lib.rb +93 -0
- data/specs/basic_html_spec.rb +70 -0
- data/specs/basic_parsing_spec.rb +48 -0
- data/specs/fastxml_doc_spec.rb +82 -0
- data/specs/fastxml_node_spec.rb +110 -0
- data/specs/fastxml_nodelist_spec.rb +46 -0
- data/test_data/cnn_main.html +1348 -0
- data/test_data/hasno_feed.html +364 -0
- data/test_data/hasno_feed.xml +301 -0
- data/test_data/hasno_test.xml +301 -0
- data/test_data/labels.xml +23 -0
- data/test_data/simple.xml +10 -0
- data/test_data/twitter_public.html +949 -0
- data/test_data/unicode.xml +10596 -0
- data/test_data/xslspec.xml +24682 -0
- metadata +114 -0
data/ext/fastxml_node.h
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
/*
|
2
|
+
* $Id$
|
3
|
+
*/
|
4
|
+
|
5
|
+
#ifndef fastxml_node_h
|
6
|
+
#define fastxml_node_h
|
7
|
+
RUBY_EXTERN VALUE fastxml_node_initialize(VALUE self);
|
8
|
+
RUBY_EXTERN VALUE fastxml_node_search(VALUE self,VALUE raw_xpath, VALUE blk);
|
9
|
+
RUBY_EXTERN VALUE fastxml_node_name(VALUE self);
|
10
|
+
RUBY_EXTERN VALUE fastxml_node_value(VALUE self);
|
11
|
+
RUBY_EXTERN VALUE fastxml_node_value_set(VALUE self, VALUE new_val);
|
12
|
+
RUBY_EXTERN VALUE fastxml_node_innerxml(VALUE self);
|
13
|
+
RUBY_EXTERN VALUE fastxml_node_to_s(VALUE self);
|
14
|
+
RUBY_EXTERN VALUE fastxml_node_xpath(VALUE self);
|
15
|
+
RUBY_EXTERN VALUE fastxml_node_attr(VALUE self);
|
16
|
+
RUBY_EXTERN VALUE fastxml_node_children(VALUE self);
|
17
|
+
RUBY_EXTERN VALUE fastxml_node_next(VALUE self);
|
18
|
+
RUBY_EXTERN VALUE fastxml_node_prev(VALUE self);
|
19
|
+
RUBY_EXTERN VALUE fastxml_node_parent(VALUE self);
|
20
|
+
RUBY_EXTERN VALUE fastxml_node_inspect(VALUE self);
|
21
|
+
#endif
|
data/ext/fastxml_node.o
ADDED
Binary file
|
@@ -0,0 +1,146 @@
|
|
1
|
+
/*
|
2
|
+
* $Id: fastxml_node.c 29 2007-08-16 05:16:47Z segfault $
|
3
|
+
*/
|
4
|
+
|
5
|
+
#include "fastxml.h"
|
6
|
+
#include "fastxml_node.h"
|
7
|
+
#include "fastxml_doc.h"
|
8
|
+
#include "fastxml_nodelist.h"
|
9
|
+
|
10
|
+
/* {{{ fastxml_nodelist
|
11
|
+
*/
|
12
|
+
VALUE fastxml_nodelist_inspect(VALUE self)
|
13
|
+
{
|
14
|
+
VALUE dv;
|
15
|
+
VALUE *argv;
|
16
|
+
fxml_data_t *data;
|
17
|
+
|
18
|
+
dv = rb_iv_get( self, "@lxml_doc" );
|
19
|
+
Data_Get_Struct( dv, fxml_data_t, data );
|
20
|
+
|
21
|
+
argv = ALLOCA_N( VALUE, 4 );
|
22
|
+
argv[0] = rb_str_new2( "#<%s:0x%x %d>" );
|
23
|
+
argv[1] = CLASS_OF( self );
|
24
|
+
argv[2] = rb_obj_id( self );
|
25
|
+
argv[3] = fastxml_nodelist_length( self );
|
26
|
+
return rb_f_sprintf( 4, argv );
|
27
|
+
}
|
28
|
+
|
29
|
+
VALUE fastxml_nodelist_initialize(VALUE self)
|
30
|
+
{
|
31
|
+
return self;
|
32
|
+
}
|
33
|
+
|
34
|
+
VALUE fastxml_nodelist_length(VALUE self)
|
35
|
+
{
|
36
|
+
VALUE dv;
|
37
|
+
xmlNodePtr cur;
|
38
|
+
fxml_data_t *data;
|
39
|
+
|
40
|
+
dv = rb_iv_get( self, "@lxml_doc" );
|
41
|
+
Data_Get_Struct( dv, fxml_data_t, data );
|
42
|
+
|
43
|
+
if (data->list_len == -1)
|
44
|
+
{
|
45
|
+
data->list_len = 0;
|
46
|
+
cur = data->list;
|
47
|
+
while (cur != NULL)
|
48
|
+
{
|
49
|
+
data->list_len++;
|
50
|
+
cur = cur->next;
|
51
|
+
}
|
52
|
+
}
|
53
|
+
|
54
|
+
return rb_int2inum( data->list_len );
|
55
|
+
}
|
56
|
+
|
57
|
+
VALUE fastxml_nodelist_obj_to_ary(fxml_data_t *root)
|
58
|
+
{
|
59
|
+
VALUE ret;
|
60
|
+
xmlNodePtr cur = root->list;
|
61
|
+
|
62
|
+
ret = rb_ary_new();
|
63
|
+
while (cur != NULL) {
|
64
|
+
rb_ary_push( ret, fastxml_raw_node_to_obj( cur ) );
|
65
|
+
cur = cur->next;
|
66
|
+
}
|
67
|
+
|
68
|
+
return ret;
|
69
|
+
}
|
70
|
+
|
71
|
+
VALUE fastxml_nodeset_obj_to_ary(fxml_data_t *root)
|
72
|
+
{
|
73
|
+
VALUE ret;
|
74
|
+
xmlNodePtr cur = root->xpath_obj->nodesetval->nodeTab;
|
75
|
+
int i;
|
76
|
+
|
77
|
+
ret = rb_ary_new();
|
78
|
+
for (i = 0; i < root->list_len; i++) {
|
79
|
+
rb_ary_push( ret, fastxml_raw_node_to_obj( cur ) );
|
80
|
+
cur++;
|
81
|
+
}
|
82
|
+
|
83
|
+
return ret;
|
84
|
+
}
|
85
|
+
|
86
|
+
VALUE fastxml_nodelist_gen_list(VALUE self, fxml_data_t *data)
|
87
|
+
{
|
88
|
+
VALUE lst = rb_iv_get( self, "@list" );
|
89
|
+
|
90
|
+
if (lst == Qnil) {
|
91
|
+
if (data->xpath_obj != NULL) {
|
92
|
+
lst = fastxml_nodeset_obj_to_ary( data );
|
93
|
+
rb_iv_set( self, "@list", lst );
|
94
|
+
} else {
|
95
|
+
lst = fastxml_nodelist_obj_to_ary( data );
|
96
|
+
rb_iv_set( self, "@list", lst );
|
97
|
+
}
|
98
|
+
}
|
99
|
+
|
100
|
+
return lst;
|
101
|
+
}
|
102
|
+
|
103
|
+
VALUE fastxml_nodelist_to_ary(VALUE self)
|
104
|
+
{
|
105
|
+
VALUE dv;
|
106
|
+
fxml_data_t *data;
|
107
|
+
|
108
|
+
dv = rb_iv_get( self, "@lxml_doc" );
|
109
|
+
Data_Get_Struct( dv, fxml_data_t, data );
|
110
|
+
return fastxml_nodelist_gen_list( self, data );
|
111
|
+
}
|
112
|
+
|
113
|
+
VALUE fastxml_nodelist_each(VALUE self)
|
114
|
+
{
|
115
|
+
VALUE lst, dv;
|
116
|
+
fxml_data_t *data;
|
117
|
+
int i;
|
118
|
+
|
119
|
+
dv = rb_iv_get( self, "@lxml_doc" );
|
120
|
+
Data_Get_Struct( dv, fxml_data_t, data );
|
121
|
+
lst = fastxml_nodelist_gen_list( self, data );
|
122
|
+
|
123
|
+
for (i=0; i<RARRAY(lst)->len; i++) {
|
124
|
+
rb_yield( RARRAY(lst)->ptr[i] );
|
125
|
+
}
|
126
|
+
|
127
|
+
return self;
|
128
|
+
}
|
129
|
+
|
130
|
+
VALUE fastxml_nodelist_entry(VALUE self, long idx)
|
131
|
+
{
|
132
|
+
VALUE lst, dv;
|
133
|
+
fxml_data_t *data;
|
134
|
+
|
135
|
+
dv = rb_iv_get( self, "@lxml_doc" );
|
136
|
+
Data_Get_Struct( dv, fxml_data_t, data );
|
137
|
+
lst = fastxml_nodelist_gen_list( self, data );
|
138
|
+
if (idx > 0) // this comes in offset by 1
|
139
|
+
idx = idx-1;
|
140
|
+
// TODO: find out why this is provided offset by 1 and not 0-based
|
141
|
+
|
142
|
+
return rb_ary_entry( lst, idx );
|
143
|
+
}
|
144
|
+
|
145
|
+
/* }}} fastxml_nodelist
|
146
|
+
*/
|
@@ -0,0 +1,13 @@
|
|
1
|
+
/*
|
2
|
+
* $Id$
|
3
|
+
*/
|
4
|
+
|
5
|
+
#ifndef fastxml_nodelist_h
|
6
|
+
#define fastxml_nodelist_h
|
7
|
+
RUBY_EXTERN VALUE fastxml_nodelist_initialize(VALUE self);
|
8
|
+
RUBY_EXTERN VALUE fastxml_nodelist_inspect(VALUE self);
|
9
|
+
RUBY_EXTERN VALUE fastxml_nodelist_length(VALUE self);
|
10
|
+
RUBY_EXTERN VALUE fastxml_nodelist_entry(VALUE self, long idx);
|
11
|
+
RUBY_EXTERN VALUE fastxml_nodelist_each(VALUE self);
|
12
|
+
RUBY_EXTERN VALUE fastxml_nodelist_to_ary(VALUE self);
|
13
|
+
#endif /*fastxml_nodelist_h*/
|
Binary file
|
data/ext/mkmf.log
ADDED
@@ -0,0 +1,119 @@
|
|
1
|
+
find_header: checking for #include <libxml/tree.h>
|
2
|
+
... -------------------- yes
|
3
|
+
|
4
|
+
"/usr/bin/cpp-4.0 -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.2.2 -I. -I/opt/local/include -O2 -fno-common -pipe -fno-common conftest.c -o conftest.i"
|
5
|
+
conftest.c:1:25: error: libxml/tree.h: No such file or directory
|
6
|
+
checked program was:
|
7
|
+
/* begin */
|
8
|
+
1: #include <libxml/tree.h>
|
9
|
+
/* end */
|
10
|
+
|
11
|
+
"/usr/bin/cpp-4.0 -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.2.2 -I. -I/opt/local/include -O2 -fno-common -pipe -fno-common -I/usr/include/libxml2 conftest.c -o conftest.i"
|
12
|
+
checked program was:
|
13
|
+
/* begin */
|
14
|
+
1: #include <libxml/tree.h>
|
15
|
+
/* end */
|
16
|
+
|
17
|
+
--------------------
|
18
|
+
|
19
|
+
find_header: checking for #include <libxslt/xslt.h>
|
20
|
+
... -------------------- yes
|
21
|
+
|
22
|
+
"/usr/bin/cpp-4.0 -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.2.2 -I. -I/usr/include/libxml2 -I/opt/local/include -O2 -fno-common -pipe -fno-common conftest.c -o conftest.i"
|
23
|
+
checked program was:
|
24
|
+
/* begin */
|
25
|
+
1: #include <libxslt/xslt.h>
|
26
|
+
/* end */
|
27
|
+
|
28
|
+
--------------------
|
29
|
+
|
30
|
+
find_library: checking for xmlInitParser() in -lxml2... -------------------- yes
|
31
|
+
|
32
|
+
"/usr/bin/gcc-4.0 -o conftest -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.2.2 -I. -I/usr/include/libxml2 -I/opt/local/include -O2 -fno-common -pipe -fno-common conftest.c -L"." -L"/opt/local/lib" -L. -L/opt/local/lib -lruby-static -lxml2 -lpthread -ldl -lobjc "
|
33
|
+
conftest.c: In function ‘t’:
|
34
|
+
conftest.c:3: error: ‘xmlInitParser’ undeclared (first use in this function)
|
35
|
+
conftest.c:3: error: (Each undeclared identifier is reported only once
|
36
|
+
conftest.c:3: error: for each function it appears in.)
|
37
|
+
checked program was:
|
38
|
+
/* begin */
|
39
|
+
1: /*top*/
|
40
|
+
2: int main() { return 0; }
|
41
|
+
3: int t() { void ((*volatile p)()); p = (void ((*)()))xmlInitParser; return 0; }
|
42
|
+
/* end */
|
43
|
+
|
44
|
+
"/usr/bin/gcc-4.0 -o conftest -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.2.2 -I. -I/usr/include/libxml2 -I/opt/local/include -O2 -fno-common -pipe -fno-common conftest.c -L"." -L"/opt/local/lib" -L. -L/opt/local/lib -lruby-static -lxml2 -lpthread -ldl -lobjc "
|
45
|
+
checked program was:
|
46
|
+
/* begin */
|
47
|
+
1: /*top*/
|
48
|
+
2: int main() { return 0; }
|
49
|
+
3: int t() { xmlInitParser(); return 0; }
|
50
|
+
/* end */
|
51
|
+
|
52
|
+
--------------------
|
53
|
+
|
54
|
+
have_library: checking for xmlInitParser() in -lxml2... -------------------- yes
|
55
|
+
|
56
|
+
"/usr/bin/gcc-4.0 -o conftest -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.2.2 -I. -I/usr/include/libxml2 -I/opt/local/include -O2 -fno-common -pipe -fno-common conftest.c -L"." -L"/opt/local/lib" -L. -L/opt/local/lib -lxml2 -lruby-static -lxml2 -lxml2 -lpthread -ldl -lobjc "
|
57
|
+
checked program was:
|
58
|
+
/* begin */
|
59
|
+
1: #include <libxml/parser.h>
|
60
|
+
2:
|
61
|
+
3: /*top*/
|
62
|
+
4: int main() { return 0; }
|
63
|
+
5: int t() { void ((*volatile p)()); p = (void ((*)()))xmlInitParser; return 0; }
|
64
|
+
/* end */
|
65
|
+
|
66
|
+
--------------------
|
67
|
+
|
68
|
+
find_library: checking for xmlInitParser() in -lxslt... -------------------- yes
|
69
|
+
|
70
|
+
"/usr/bin/gcc-4.0 -o conftest -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.2.2 -I. -I/usr/include/libxml2 -I/opt/local/include -O2 -fno-common -pipe -fno-common conftest.c -L"." -L"/opt/local/lib" -L. -L/opt/local/lib -lxml2 -lxml2 -lruby-static -lxslt -lxml2 -lxml2 -lpthread -ldl -lobjc "
|
71
|
+
conftest.c: In function ‘t’:
|
72
|
+
conftest.c:3: error: ‘xmlInitParser’ undeclared (first use in this function)
|
73
|
+
conftest.c:3: error: (Each undeclared identifier is reported only once
|
74
|
+
conftest.c:3: error: for each function it appears in.)
|
75
|
+
checked program was:
|
76
|
+
/* begin */
|
77
|
+
1: /*top*/
|
78
|
+
2: int main() { return 0; }
|
79
|
+
3: int t() { void ((*volatile p)()); p = (void ((*)()))xmlInitParser; return 0; }
|
80
|
+
/* end */
|
81
|
+
|
82
|
+
"/usr/bin/gcc-4.0 -o conftest -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.2.2 -I. -I/usr/include/libxml2 -I/opt/local/include -O2 -fno-common -pipe -fno-common conftest.c -L"." -L"/opt/local/lib" -L. -L/opt/local/lib -lxml2 -lxml2 -lruby-static -lxslt -lxml2 -lxml2 -lpthread -ldl -lobjc "
|
83
|
+
checked program was:
|
84
|
+
/* begin */
|
85
|
+
1: /*top*/
|
86
|
+
2: int main() { return 0; }
|
87
|
+
3: int t() { xmlInitParser(); return 0; }
|
88
|
+
/* end */
|
89
|
+
|
90
|
+
--------------------
|
91
|
+
|
92
|
+
have_library: checking for xsltParseStylesheetFile() in -lxslt... -------------------- yes
|
93
|
+
|
94
|
+
"/usr/bin/gcc-4.0 -o conftest -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.2.2 -I. -I/usr/include/libxml2 -I/opt/local/include -O2 -fno-common -pipe -fno-common conftest.c -L"." -L"/opt/local/lib" -L. -L/opt/local/lib -lxslt -lxml2 -lxml2 -lruby-static -lxslt -lxslt -lxml2 -lxml2 -lpthread -ldl -lobjc "
|
95
|
+
conftest.c: In function ‘t’:
|
96
|
+
conftest.c:5: error: ‘xsltParseStylesheetFile’ undeclared (first use in this function)
|
97
|
+
conftest.c:5: error: (Each undeclared identifier is reported only once
|
98
|
+
conftest.c:5: error: for each function it appears in.)
|
99
|
+
checked program was:
|
100
|
+
/* begin */
|
101
|
+
1: #include <libxslt/xslt.h>
|
102
|
+
2:
|
103
|
+
3: /*top*/
|
104
|
+
4: int main() { return 0; }
|
105
|
+
5: int t() { void ((*volatile p)()); p = (void ((*)()))xsltParseStylesheetFile; return 0; }
|
106
|
+
/* end */
|
107
|
+
|
108
|
+
"/usr/bin/gcc-4.0 -o conftest -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.2.2 -I. -I/usr/include/libxml2 -I/opt/local/include -O2 -fno-common -pipe -fno-common conftest.c -L"." -L"/opt/local/lib" -L. -L/opt/local/lib -lxslt -lxml2 -lxml2 -lruby-static -lxslt -lxslt -lxml2 -lxml2 -lpthread -ldl -lobjc "
|
109
|
+
checked program was:
|
110
|
+
/* begin */
|
111
|
+
1: #include <libxslt/xslt.h>
|
112
|
+
2:
|
113
|
+
3: /*top*/
|
114
|
+
4: int main() { return 0; }
|
115
|
+
5: int t() { xsltParseStylesheetFile(); return 0; }
|
116
|
+
/* end */
|
117
|
+
|
118
|
+
--------------------
|
119
|
+
|
data/lib/fastxml_lib.rb
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
# $Id$
|
2
|
+
module FastXml
|
3
|
+
VERSION = "0.1.91"
|
4
|
+
end
|
5
|
+
|
6
|
+
module FastXml::Common
|
7
|
+
def children_of_type(type)
|
8
|
+
self.search( "//#{type}" )
|
9
|
+
end
|
10
|
+
|
11
|
+
def each_child(&blk)
|
12
|
+
self.children.each { |chld| yield chld }
|
13
|
+
end
|
14
|
+
|
15
|
+
def /(xpath)
|
16
|
+
self.search( "/#{xpath.to_s}" )
|
17
|
+
end
|
18
|
+
|
19
|
+
def at(xpath)
|
20
|
+
nodes = self.search( xpath )
|
21
|
+
return nil unless nodes && nodes.length > 0
|
22
|
+
nodes[0]
|
23
|
+
end
|
24
|
+
|
25
|
+
alias :to_s :display
|
26
|
+
end
|
27
|
+
|
28
|
+
class FastXml::Doc
|
29
|
+
include FastXml::Common
|
30
|
+
|
31
|
+
def doc?
|
32
|
+
true
|
33
|
+
end
|
34
|
+
|
35
|
+
def doctype?
|
36
|
+
nil
|
37
|
+
end
|
38
|
+
|
39
|
+
def forgiving?
|
40
|
+
(@forgiving ||= false)
|
41
|
+
end
|
42
|
+
|
43
|
+
def validate?
|
44
|
+
(@validate_dtd ||= false)
|
45
|
+
end
|
46
|
+
|
47
|
+
def xpath
|
48
|
+
"/"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
class FastXml::Node
|
53
|
+
include FastXml::Common
|
54
|
+
def doc?
|
55
|
+
false
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
class FastXml::NodeList
|
60
|
+
def [](idx)
|
61
|
+
self.entry(idx)
|
62
|
+
end
|
63
|
+
|
64
|
+
def first
|
65
|
+
self.entry(0)
|
66
|
+
end
|
67
|
+
|
68
|
+
def last
|
69
|
+
self.entry(-1)
|
70
|
+
end
|
71
|
+
|
72
|
+
def empty?
|
73
|
+
return (length == 0)
|
74
|
+
end
|
75
|
+
|
76
|
+
def at(tgt)
|
77
|
+
return self.entry( tgt.to_i ) if tgt =~ /^\d+$/
|
78
|
+
ret = []
|
79
|
+
each { |nd| ret << (nd/tgt).to_ary }
|
80
|
+
ret.flatten!
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
|
85
|
+
def FastXml(data=nil, opts = {}, &blk)
|
86
|
+
FastXml::Doc.new( data, opts, &blk )
|
87
|
+
end
|
88
|
+
|
89
|
+
def FastHtml(data=nil, opts = {}, &blk)
|
90
|
+
opts ||= {}
|
91
|
+
opts[:html] = true
|
92
|
+
FastXml::Doc.new( data, opts, &blk )
|
93
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
# $Id$
|
2
|
+
$: << '../ext'
|
3
|
+
$: << './ext'
|
4
|
+
|
5
|
+
require 'fastxml'
|
6
|
+
|
7
|
+
describe FastXml::Doc, " doing html parsing" do
|
8
|
+
before(:all) do
|
9
|
+
@data_raw = open( "./test_data/hasno_feed.html" )
|
10
|
+
@data_ary = @data_raw.readlines
|
11
|
+
@data_str = @data_ary.join('')
|
12
|
+
end
|
13
|
+
|
14
|
+
before do
|
15
|
+
@data_raw.rewind if @data_raw
|
16
|
+
end
|
17
|
+
|
18
|
+
after(:all) do
|
19
|
+
@data_raw.close if @data_raw
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
it 'should parse string input' do
|
24
|
+
@data_str.should_not be_nil
|
25
|
+
doc = FastXml::Doc.new( @data_str, {:html=>true} )
|
26
|
+
doc.should_not be_nil
|
27
|
+
doc.to_s.should_not be_nil
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'should parse array input' do
|
31
|
+
@data_ary.should_not be_nil
|
32
|
+
doc = FastXml::Doc.new( @data_ary, {:html=>true} )
|
33
|
+
doc.should_not be_nil
|
34
|
+
doc.to_s.should_not be_nil
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'should be able to parse hasno and search' do
|
38
|
+
doc = FastHtml( @data_str )
|
39
|
+
descs = (doc/"p[class=description]")
|
40
|
+
descs.should_not be_nil
|
41
|
+
descs.each do |d|
|
42
|
+
d.should_not be_nil
|
43
|
+
d.length.should_be >= 1
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'should handle the twitter public timeline' do
|
48
|
+
raw_data = open( "./test_data/twitter_public.html" ).readlines.join('')
|
49
|
+
doc = FastHtml( raw_data )
|
50
|
+
doc.should_not be_nil
|
51
|
+
doc.to_s.should_not be_nil
|
52
|
+
doc.to_s.length.should >= 30000
|
53
|
+
doc.root.should_not be_nil
|
54
|
+
(doc/"").should_not be_nil
|
55
|
+
doc.root.children.should_not be_nil
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
|
60
|
+
it 'should be able to handle the cnn site' do
|
61
|
+
raw_data = open( "./test_data/cnn_main.html" ).readlines.join('')
|
62
|
+
doc = FastHtml( raw_data )
|
63
|
+
doc.should_not be_nil
|
64
|
+
doc.to_s.should_not be_nil
|
65
|
+
doc.to_s.length.should >= 10000
|
66
|
+
(doc/"").should_not be_nil
|
67
|
+
doc.root.children.should_not be_nil
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|