fastxml 0.1.91

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ /*
2
+ * $Id$
3
+ */
4
+
5
+ #ifndef fastxml_node_h
6
+ #define fastxml_node_h
7
+ RUBY_EXTERN VALUE fastxml_node_initialize(VALUE self);
8
+ RUBY_EXTERN VALUE fastxml_node_search(VALUE self,VALUE raw_xpath, VALUE blk);
9
+ RUBY_EXTERN VALUE fastxml_node_name(VALUE self);
10
+ RUBY_EXTERN VALUE fastxml_node_value(VALUE self);
11
+ RUBY_EXTERN VALUE fastxml_node_value_set(VALUE self, VALUE new_val);
12
+ RUBY_EXTERN VALUE fastxml_node_innerxml(VALUE self);
13
+ RUBY_EXTERN VALUE fastxml_node_to_s(VALUE self);
14
+ RUBY_EXTERN VALUE fastxml_node_xpath(VALUE self);
15
+ RUBY_EXTERN VALUE fastxml_node_attr(VALUE self);
16
+ RUBY_EXTERN VALUE fastxml_node_children(VALUE self);
17
+ RUBY_EXTERN VALUE fastxml_node_next(VALUE self);
18
+ RUBY_EXTERN VALUE fastxml_node_prev(VALUE self);
19
+ RUBY_EXTERN VALUE fastxml_node_parent(VALUE self);
20
+ RUBY_EXTERN VALUE fastxml_node_inspect(VALUE self);
21
+ #endif
Binary file
@@ -0,0 +1,146 @@
1
+ /*
2
+ * $Id: fastxml_node.c 29 2007-08-16 05:16:47Z segfault $
3
+ */
4
+
5
+ #include "fastxml.h"
6
+ #include "fastxml_node.h"
7
+ #include "fastxml_doc.h"
8
+ #include "fastxml_nodelist.h"
9
+
10
+ /* {{{ fastxml_nodelist
11
+ */
12
+ VALUE fastxml_nodelist_inspect(VALUE self)
13
+ {
14
+ VALUE dv;
15
+ VALUE *argv;
16
+ fxml_data_t *data;
17
+
18
+ dv = rb_iv_get( self, "@lxml_doc" );
19
+ Data_Get_Struct( dv, fxml_data_t, data );
20
+
21
+ argv = ALLOCA_N( VALUE, 4 );
22
+ argv[0] = rb_str_new2( "#<%s:0x%x %d>" );
23
+ argv[1] = CLASS_OF( self );
24
+ argv[2] = rb_obj_id( self );
25
+ argv[3] = fastxml_nodelist_length( self );
26
+ return rb_f_sprintf( 4, argv );
27
+ }
28
+
29
+ VALUE fastxml_nodelist_initialize(VALUE self)
30
+ {
31
+ return self;
32
+ }
33
+
34
+ VALUE fastxml_nodelist_length(VALUE self)
35
+ {
36
+ VALUE dv;
37
+ xmlNodePtr cur;
38
+ fxml_data_t *data;
39
+
40
+ dv = rb_iv_get( self, "@lxml_doc" );
41
+ Data_Get_Struct( dv, fxml_data_t, data );
42
+
43
+ if (data->list_len == -1)
44
+ {
45
+ data->list_len = 0;
46
+ cur = data->list;
47
+ while (cur != NULL)
48
+ {
49
+ data->list_len++;
50
+ cur = cur->next;
51
+ }
52
+ }
53
+
54
+ return rb_int2inum( data->list_len );
55
+ }
56
+
57
+ VALUE fastxml_nodelist_obj_to_ary(fxml_data_t *root)
58
+ {
59
+ VALUE ret;
60
+ xmlNodePtr cur = root->list;
61
+
62
+ ret = rb_ary_new();
63
+ while (cur != NULL) {
64
+ rb_ary_push( ret, fastxml_raw_node_to_obj( cur ) );
65
+ cur = cur->next;
66
+ }
67
+
68
+ return ret;
69
+ }
70
+
71
+ VALUE fastxml_nodeset_obj_to_ary(fxml_data_t *root)
72
+ {
73
+ VALUE ret;
74
+ xmlNodePtr cur = root->xpath_obj->nodesetval->nodeTab;
75
+ int i;
76
+
77
+ ret = rb_ary_new();
78
+ for (i = 0; i < root->list_len; i++) {
79
+ rb_ary_push( ret, fastxml_raw_node_to_obj( cur ) );
80
+ cur++;
81
+ }
82
+
83
+ return ret;
84
+ }
85
+
86
+ VALUE fastxml_nodelist_gen_list(VALUE self, fxml_data_t *data)
87
+ {
88
+ VALUE lst = rb_iv_get( self, "@list" );
89
+
90
+ if (lst == Qnil) {
91
+ if (data->xpath_obj != NULL) {
92
+ lst = fastxml_nodeset_obj_to_ary( data );
93
+ rb_iv_set( self, "@list", lst );
94
+ } else {
95
+ lst = fastxml_nodelist_obj_to_ary( data );
96
+ rb_iv_set( self, "@list", lst );
97
+ }
98
+ }
99
+
100
+ return lst;
101
+ }
102
+
103
+ VALUE fastxml_nodelist_to_ary(VALUE self)
104
+ {
105
+ VALUE dv;
106
+ fxml_data_t *data;
107
+
108
+ dv = rb_iv_get( self, "@lxml_doc" );
109
+ Data_Get_Struct( dv, fxml_data_t, data );
110
+ return fastxml_nodelist_gen_list( self, data );
111
+ }
112
+
113
+ VALUE fastxml_nodelist_each(VALUE self)
114
+ {
115
+ VALUE lst, dv;
116
+ fxml_data_t *data;
117
+ int i;
118
+
119
+ dv = rb_iv_get( self, "@lxml_doc" );
120
+ Data_Get_Struct( dv, fxml_data_t, data );
121
+ lst = fastxml_nodelist_gen_list( self, data );
122
+
123
+ for (i=0; i<RARRAY(lst)->len; i++) {
124
+ rb_yield( RARRAY(lst)->ptr[i] );
125
+ }
126
+
127
+ return self;
128
+ }
129
+
130
+ VALUE fastxml_nodelist_entry(VALUE self, long idx)
131
+ {
132
+ VALUE lst, dv;
133
+ fxml_data_t *data;
134
+
135
+ dv = rb_iv_get( self, "@lxml_doc" );
136
+ Data_Get_Struct( dv, fxml_data_t, data );
137
+ lst = fastxml_nodelist_gen_list( self, data );
138
+ if (idx > 0) // this comes in offset by 1
139
+ idx = idx-1;
140
+ // TODO: find out why this is provided offset by 1 and not 0-based
141
+
142
+ return rb_ary_entry( lst, idx );
143
+ }
144
+
145
+ /* }}} fastxml_nodelist
146
+ */
@@ -0,0 +1,13 @@
1
+ /*
2
+ * $Id$
3
+ */
4
+
5
+ #ifndef fastxml_nodelist_h
6
+ #define fastxml_nodelist_h
7
+ RUBY_EXTERN VALUE fastxml_nodelist_initialize(VALUE self);
8
+ RUBY_EXTERN VALUE fastxml_nodelist_inspect(VALUE self);
9
+ RUBY_EXTERN VALUE fastxml_nodelist_length(VALUE self);
10
+ RUBY_EXTERN VALUE fastxml_nodelist_entry(VALUE self, long idx);
11
+ RUBY_EXTERN VALUE fastxml_nodelist_each(VALUE self);
12
+ RUBY_EXTERN VALUE fastxml_nodelist_to_ary(VALUE self);
13
+ #endif /*fastxml_nodelist_h*/
Binary file
data/ext/mkmf.log ADDED
@@ -0,0 +1,119 @@
1
+ find_header: checking for #include <libxml/tree.h>
2
+ ... -------------------- yes
3
+
4
+ "/usr/bin/cpp-4.0 -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.2.2 -I. -I/opt/local/include -O2 -fno-common -pipe -fno-common conftest.c -o conftest.i"
5
+ conftest.c:1:25: error: libxml/tree.h: No such file or directory
6
+ checked program was:
7
+ /* begin */
8
+ 1: #include <libxml/tree.h>
9
+ /* end */
10
+
11
+ "/usr/bin/cpp-4.0 -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.2.2 -I. -I/opt/local/include -O2 -fno-common -pipe -fno-common -I/usr/include/libxml2 conftest.c -o conftest.i"
12
+ checked program was:
13
+ /* begin */
14
+ 1: #include <libxml/tree.h>
15
+ /* end */
16
+
17
+ --------------------
18
+
19
+ find_header: checking for #include <libxslt/xslt.h>
20
+ ... -------------------- yes
21
+
22
+ "/usr/bin/cpp-4.0 -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.2.2 -I. -I/usr/include/libxml2 -I/opt/local/include -O2 -fno-common -pipe -fno-common conftest.c -o conftest.i"
23
+ checked program was:
24
+ /* begin */
25
+ 1: #include <libxslt/xslt.h>
26
+ /* end */
27
+
28
+ --------------------
29
+
30
+ find_library: checking for xmlInitParser() in -lxml2... -------------------- yes
31
+
32
+ "/usr/bin/gcc-4.0 -o conftest -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.2.2 -I. -I/usr/include/libxml2 -I/opt/local/include -O2 -fno-common -pipe -fno-common conftest.c -L"." -L"/opt/local/lib" -L. -L/opt/local/lib -lruby-static -lxml2 -lpthread -ldl -lobjc "
33
+ conftest.c: In function ‘t’:
34
+ conftest.c:3: error: ‘xmlInitParser’ undeclared (first use in this function)
35
+ conftest.c:3: error: (Each undeclared identifier is reported only once
36
+ conftest.c:3: error: for each function it appears in.)
37
+ checked program was:
38
+ /* begin */
39
+ 1: /*top*/
40
+ 2: int main() { return 0; }
41
+ 3: int t() { void ((*volatile p)()); p = (void ((*)()))xmlInitParser; return 0; }
42
+ /* end */
43
+
44
+ "/usr/bin/gcc-4.0 -o conftest -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.2.2 -I. -I/usr/include/libxml2 -I/opt/local/include -O2 -fno-common -pipe -fno-common conftest.c -L"." -L"/opt/local/lib" -L. -L/opt/local/lib -lruby-static -lxml2 -lpthread -ldl -lobjc "
45
+ checked program was:
46
+ /* begin */
47
+ 1: /*top*/
48
+ 2: int main() { return 0; }
49
+ 3: int t() { xmlInitParser(); return 0; }
50
+ /* end */
51
+
52
+ --------------------
53
+
54
+ have_library: checking for xmlInitParser() in -lxml2... -------------------- yes
55
+
56
+ "/usr/bin/gcc-4.0 -o conftest -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.2.2 -I. -I/usr/include/libxml2 -I/opt/local/include -O2 -fno-common -pipe -fno-common conftest.c -L"." -L"/opt/local/lib" -L. -L/opt/local/lib -lxml2 -lruby-static -lxml2 -lxml2 -lpthread -ldl -lobjc "
57
+ checked program was:
58
+ /* begin */
59
+ 1: #include <libxml/parser.h>
60
+ 2:
61
+ 3: /*top*/
62
+ 4: int main() { return 0; }
63
+ 5: int t() { void ((*volatile p)()); p = (void ((*)()))xmlInitParser; return 0; }
64
+ /* end */
65
+
66
+ --------------------
67
+
68
+ find_library: checking for xmlInitParser() in -lxslt... -------------------- yes
69
+
70
+ "/usr/bin/gcc-4.0 -o conftest -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.2.2 -I. -I/usr/include/libxml2 -I/opt/local/include -O2 -fno-common -pipe -fno-common conftest.c -L"." -L"/opt/local/lib" -L. -L/opt/local/lib -lxml2 -lxml2 -lruby-static -lxslt -lxml2 -lxml2 -lpthread -ldl -lobjc "
71
+ conftest.c: In function ‘t’:
72
+ conftest.c:3: error: ‘xmlInitParser’ undeclared (first use in this function)
73
+ conftest.c:3: error: (Each undeclared identifier is reported only once
74
+ conftest.c:3: error: for each function it appears in.)
75
+ checked program was:
76
+ /* begin */
77
+ 1: /*top*/
78
+ 2: int main() { return 0; }
79
+ 3: int t() { void ((*volatile p)()); p = (void ((*)()))xmlInitParser; return 0; }
80
+ /* end */
81
+
82
+ "/usr/bin/gcc-4.0 -o conftest -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.2.2 -I. -I/usr/include/libxml2 -I/opt/local/include -O2 -fno-common -pipe -fno-common conftest.c -L"." -L"/opt/local/lib" -L. -L/opt/local/lib -lxml2 -lxml2 -lruby-static -lxslt -lxml2 -lxml2 -lpthread -ldl -lobjc "
83
+ checked program was:
84
+ /* begin */
85
+ 1: /*top*/
86
+ 2: int main() { return 0; }
87
+ 3: int t() { xmlInitParser(); return 0; }
88
+ /* end */
89
+
90
+ --------------------
91
+
92
+ have_library: checking for xsltParseStylesheetFile() in -lxslt... -------------------- yes
93
+
94
+ "/usr/bin/gcc-4.0 -o conftest -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.2.2 -I. -I/usr/include/libxml2 -I/opt/local/include -O2 -fno-common -pipe -fno-common conftest.c -L"." -L"/opt/local/lib" -L. -L/opt/local/lib -lxslt -lxml2 -lxml2 -lruby-static -lxslt -lxslt -lxml2 -lxml2 -lpthread -ldl -lobjc "
95
+ conftest.c: In function ‘t’:
96
+ conftest.c:5: error: ‘xsltParseStylesheetFile’ undeclared (first use in this function)
97
+ conftest.c:5: error: (Each undeclared identifier is reported only once
98
+ conftest.c:5: error: for each function it appears in.)
99
+ checked program was:
100
+ /* begin */
101
+ 1: #include <libxslt/xslt.h>
102
+ 2:
103
+ 3: /*top*/
104
+ 4: int main() { return 0; }
105
+ 5: int t() { void ((*volatile p)()); p = (void ((*)()))xsltParseStylesheetFile; return 0; }
106
+ /* end */
107
+
108
+ "/usr/bin/gcc-4.0 -o conftest -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.2.2 -I. -I/usr/include/libxml2 -I/opt/local/include -O2 -fno-common -pipe -fno-common conftest.c -L"." -L"/opt/local/lib" -L. -L/opt/local/lib -lxslt -lxml2 -lxml2 -lruby-static -lxslt -lxslt -lxml2 -lxml2 -lpthread -ldl -lobjc "
109
+ checked program was:
110
+ /* begin */
111
+ 1: #include <libxslt/xslt.h>
112
+ 2:
113
+ 3: /*top*/
114
+ 4: int main() { return 0; }
115
+ 5: int t() { xsltParseStylesheetFile(); return 0; }
116
+ /* end */
117
+
118
+ --------------------
119
+
@@ -0,0 +1,93 @@
1
+ # $Id$
2
+ module FastXml
3
+ VERSION = "0.1.91"
4
+ end
5
+
6
+ module FastXml::Common
7
+ def children_of_type(type)
8
+ self.search( "//#{type}" )
9
+ end
10
+
11
+ def each_child(&blk)
12
+ self.children.each { |chld| yield chld }
13
+ end
14
+
15
+ def /(xpath)
16
+ self.search( "/#{xpath.to_s}" )
17
+ end
18
+
19
+ def at(xpath)
20
+ nodes = self.search( xpath )
21
+ return nil unless nodes && nodes.length > 0
22
+ nodes[0]
23
+ end
24
+
25
+ alias :to_s :display
26
+ end
27
+
28
+ class FastXml::Doc
29
+ include FastXml::Common
30
+
31
+ def doc?
32
+ true
33
+ end
34
+
35
+ def doctype?
36
+ nil
37
+ end
38
+
39
+ def forgiving?
40
+ (@forgiving ||= false)
41
+ end
42
+
43
+ def validate?
44
+ (@validate_dtd ||= false)
45
+ end
46
+
47
+ def xpath
48
+ "/"
49
+ end
50
+ end
51
+
52
+ class FastXml::Node
53
+ include FastXml::Common
54
+ def doc?
55
+ false
56
+ end
57
+ end
58
+
59
+ class FastXml::NodeList
60
+ def [](idx)
61
+ self.entry(idx)
62
+ end
63
+
64
+ def first
65
+ self.entry(0)
66
+ end
67
+
68
+ def last
69
+ self.entry(-1)
70
+ end
71
+
72
+ def empty?
73
+ return (length == 0)
74
+ end
75
+
76
+ def at(tgt)
77
+ return self.entry( tgt.to_i ) if tgt =~ /^\d+$/
78
+ ret = []
79
+ each { |nd| ret << (nd/tgt).to_ary }
80
+ ret.flatten!
81
+ end
82
+ end
83
+
84
+
85
+ def FastXml(data=nil, opts = {}, &blk)
86
+ FastXml::Doc.new( data, opts, &blk )
87
+ end
88
+
89
+ def FastHtml(data=nil, opts = {}, &blk)
90
+ opts ||= {}
91
+ opts[:html] = true
92
+ FastXml::Doc.new( data, opts, &blk )
93
+ end
@@ -0,0 +1,70 @@
1
+ # $Id$
2
+ $: << '../ext'
3
+ $: << './ext'
4
+
5
+ require 'fastxml'
6
+
7
+ describe FastXml::Doc, " doing html parsing" do
8
+ before(:all) do
9
+ @data_raw = open( "./test_data/hasno_feed.html" )
10
+ @data_ary = @data_raw.readlines
11
+ @data_str = @data_ary.join('')
12
+ end
13
+
14
+ before do
15
+ @data_raw.rewind if @data_raw
16
+ end
17
+
18
+ after(:all) do
19
+ @data_raw.close if @data_raw
20
+ end
21
+
22
+
23
+ it 'should parse string input' do
24
+ @data_str.should_not be_nil
25
+ doc = FastXml::Doc.new( @data_str, {:html=>true} )
26
+ doc.should_not be_nil
27
+ doc.to_s.should_not be_nil
28
+ end
29
+
30
+ it 'should parse array input' do
31
+ @data_ary.should_not be_nil
32
+ doc = FastXml::Doc.new( @data_ary, {:html=>true} )
33
+ doc.should_not be_nil
34
+ doc.to_s.should_not be_nil
35
+ end
36
+
37
+ it 'should be able to parse hasno and search' do
38
+ doc = FastHtml( @data_str )
39
+ descs = (doc/"p[class=description]")
40
+ descs.should_not be_nil
41
+ descs.each do |d|
42
+ d.should_not be_nil
43
+ d.length.should_be >= 1
44
+ end
45
+ end
46
+
47
+ it 'should handle the twitter public timeline' do
48
+ raw_data = open( "./test_data/twitter_public.html" ).readlines.join('')
49
+ doc = FastHtml( raw_data )
50
+ doc.should_not be_nil
51
+ doc.to_s.should_not be_nil
52
+ doc.to_s.length.should >= 30000
53
+ doc.root.should_not be_nil
54
+ (doc/"").should_not be_nil
55
+ doc.root.children.should_not be_nil
56
+ end
57
+
58
+
59
+
60
+ it 'should be able to handle the cnn site' do
61
+ raw_data = open( "./test_data/cnn_main.html" ).readlines.join('')
62
+ doc = FastHtml( raw_data )
63
+ doc.should_not be_nil
64
+ doc.to_s.should_not be_nil
65
+ doc.to_s.length.should >= 10000
66
+ (doc/"").should_not be_nil
67
+ doc.root.children.should_not be_nil
68
+ end
69
+
70
+ end