scrubyt 0.1.0 → 0.1.9
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +34 -0
- data/COPYING +340 -0
- data/README +34 -5
- data/Rakefile +6 -5
- data/lib/scrubyt.rb +1 -0
- data/lib/scrubyt/constraint.rb +12 -24
- data/lib/scrubyt/constraint_adder.rb +3 -17
- data/lib/scrubyt/export.rb +33 -17
- data/lib/scrubyt/extractor.rb +74 -23
- data/lib/scrubyt/filter.rb +52 -37
- data/lib/scrubyt/pattern.rb +74 -30
- data/lib/scrubyt/post_processor.rb +58 -0
- data/lib/scrubyt/result.rb +2 -2
- data/lib/scrubyt/result_dumper.rb +6 -0
- data/lib/scrubyt/xpathutils.rb +52 -15
- data/test/unittests/constraint_test.rb +0 -3
- data/test/unittests/extractor_test.rb +11 -13
- data/test/unittests/xpathutils_test.rb +31 -31
- metadata +8 -5
@@ -94,15 +94,15 @@ class XPathUtilsTest < Test::Unit::TestCase
|
|
94
94
|
end
|
95
95
|
|
96
96
|
def test_find_index
|
97
|
-
assert_equal(Scrubyt::XPathUtils.find_index(@a),
|
98
|
-
assert_equal(Scrubyt::XPathUtils.find_index(@b),
|
99
|
-
assert_equal(Scrubyt::XPathUtils.find_index(@c),
|
100
|
-
assert_equal(Scrubyt::XPathUtils.find_index(@d),
|
101
|
-
assert_equal(Scrubyt::XPathUtils.find_index(@n_1),
|
102
|
-
assert_equal(Scrubyt::XPathUtils.find_index(@n_2),
|
103
|
-
assert_equal(Scrubyt::XPathUtils.find_index(@n_3),
|
104
|
-
assert_equal(Scrubyt::XPathUtils.find_index(@n_4),
|
105
|
-
assert_equal(Scrubyt::XPathUtils.find_index(@r),
|
97
|
+
assert_equal(Scrubyt::XPathUtils.find_index(@a), 1)
|
98
|
+
assert_equal(Scrubyt::XPathUtils.find_index(@b), 1)
|
99
|
+
assert_equal(Scrubyt::XPathUtils.find_index(@c), 1)
|
100
|
+
assert_equal(Scrubyt::XPathUtils.find_index(@d), 1)
|
101
|
+
assert_equal(Scrubyt::XPathUtils.find_index(@n_1), 1)
|
102
|
+
assert_equal(Scrubyt::XPathUtils.find_index(@n_2), 2)
|
103
|
+
assert_equal(Scrubyt::XPathUtils.find_index(@n_3), 3)
|
104
|
+
assert_equal(Scrubyt::XPathUtils.find_index(@n_4), 4)
|
105
|
+
assert_equal(Scrubyt::XPathUtils.find_index(@r), 1)
|
106
106
|
end
|
107
107
|
|
108
108
|
def test_generate_XPath
|
@@ -121,32 +121,32 @@ class XPathUtilsTest < Test::Unit::TestCase
|
|
121
121
|
end
|
122
122
|
|
123
123
|
def test_generate_XPath_with_indices
|
124
|
-
assert_equal(Scrubyt::XPathUtils.generate_XPath(@a, nil, true), "/a[
|
125
|
-
assert_equal(Scrubyt::XPathUtils.generate_XPath(@b, nil, true), "/a[
|
126
|
-
assert_equal(Scrubyt::XPathUtils.generate_XPath(@c, nil, true), "/a[
|
127
|
-
assert_equal(Scrubyt::XPathUtils.generate_XPath(@d, nil, true), "/a[
|
128
|
-
assert_equal(Scrubyt::XPathUtils.generate_XPath(@e, nil, true), "/a[
|
129
|
-
assert_equal(Scrubyt::XPathUtils.generate_XPath(@f, nil, true), "/a[
|
130
|
-
assert_equal(Scrubyt::XPathUtils.generate_XPath(@n_1, nil, true), "/a[
|
131
|
-
assert_equal(Scrubyt::XPathUtils.generate_XPath(@n_2, nil, true), "/a[
|
132
|
-
assert_equal(Scrubyt::XPathUtils.generate_XPath(@n_3, nil, true), "/a[
|
133
|
-
assert_equal(Scrubyt::XPathUtils.generate_XPath(@n_4, nil, true), "/a[
|
134
|
-
assert_equal(Scrubyt::XPathUtils.generate_XPath(@p, nil, true), "/a[
|
135
|
-
assert_equal(Scrubyt::XPathUtils.generate_XPath(@r, nil, true), "/a[
|
124
|
+
assert_equal(Scrubyt::XPathUtils.generate_XPath(@a, nil, true), "/a[1]")
|
125
|
+
assert_equal(Scrubyt::XPathUtils.generate_XPath(@b, nil, true), "/a[1]/b[1]")
|
126
|
+
assert_equal(Scrubyt::XPathUtils.generate_XPath(@c, nil, true), "/a[1]/b[1]/c[1]")
|
127
|
+
assert_equal(Scrubyt::XPathUtils.generate_XPath(@d, nil, true), "/a[1]/b[1]/d[1]")
|
128
|
+
assert_equal(Scrubyt::XPathUtils.generate_XPath(@e, nil, true), "/a[1]/b[1]/e[1]")
|
129
|
+
assert_equal(Scrubyt::XPathUtils.generate_XPath(@f, nil, true), "/a[1]/b[1]/e[1]/f[1]")
|
130
|
+
assert_equal(Scrubyt::XPathUtils.generate_XPath(@n_1, nil, true), "/a[1]/b[1]/e[1]/n[1]")
|
131
|
+
assert_equal(Scrubyt::XPathUtils.generate_XPath(@n_2, nil, true), "/a[1]/b[1]/e[1]/n[2]")
|
132
|
+
assert_equal(Scrubyt::XPathUtils.generate_XPath(@n_3, nil, true), "/a[1]/b[1]/e[1]/n[3]")
|
133
|
+
assert_equal(Scrubyt::XPathUtils.generate_XPath(@n_4, nil, true), "/a[1]/b[1]/e[1]/n[4]")
|
134
|
+
assert_equal(Scrubyt::XPathUtils.generate_XPath(@p, nil, true), "/a[1]/b[1]/e[1]/p[1]")
|
135
|
+
assert_equal(Scrubyt::XPathUtils.generate_XPath(@r, nil, true), "/a[1]/b[1]/e[1]/n[3]/r[1]")
|
136
136
|
end
|
137
137
|
|
138
138
|
def test_generate_relative_XPath
|
139
139
|
assert_nil(Scrubyt::XPathUtils.generate_relative_XPath(@a,@a))
|
140
|
-
assert_equal(Scrubyt::XPathUtils.generate_relative_XPath(@b, @a), "/b[
|
141
|
-
assert_equal(Scrubyt::XPathUtils.generate_relative_XPath(@c, @a), "/b[
|
142
|
-
assert_equal(Scrubyt::XPathUtils.generate_relative_XPath(@d, @a), "/b[
|
143
|
-
assert_equal(Scrubyt::XPathUtils.generate_relative_XPath(@f, @a), "/b[
|
144
|
-
assert_equal(Scrubyt::XPathUtils.generate_relative_XPath(@n_1, @a), "/b[
|
145
|
-
assert_equal(Scrubyt::XPathUtils.generate_relative_XPath(@n_2, @a), "/b[
|
146
|
-
assert_equal(Scrubyt::XPathUtils.generate_relative_XPath(@n_3, @a), "/b[
|
147
|
-
assert_equal(Scrubyt::XPathUtils.generate_relative_XPath(@n_4, @a), "/b[
|
148
|
-
assert_equal(Scrubyt::XPathUtils.generate_relative_XPath(@r, @b), "/e[
|
149
|
-
assert_equal(Scrubyt::XPathUtils.generate_relative_XPath(@q, @e), "/n[
|
140
|
+
assert_equal(Scrubyt::XPathUtils.generate_relative_XPath(@b, @a), "/b[1]")
|
141
|
+
assert_equal(Scrubyt::XPathUtils.generate_relative_XPath(@c, @a), "/b[1]/c[1]")
|
142
|
+
assert_equal(Scrubyt::XPathUtils.generate_relative_XPath(@d, @a), "/b[1]/d[1]")
|
143
|
+
assert_equal(Scrubyt::XPathUtils.generate_relative_XPath(@f, @a), "/b[1]/e[1]/f[1]")
|
144
|
+
assert_equal(Scrubyt::XPathUtils.generate_relative_XPath(@n_1, @a), "/b[1]/e[1]/n[1]")
|
145
|
+
assert_equal(Scrubyt::XPathUtils.generate_relative_XPath(@n_2, @a), "/b[1]/e[1]/n[2]")
|
146
|
+
assert_equal(Scrubyt::XPathUtils.generate_relative_XPath(@n_3, @a), "/b[1]/e[1]/n[3]")
|
147
|
+
assert_equal(Scrubyt::XPathUtils.generate_relative_XPath(@n_4, @a), "/b[1]/e[1]/n[4]")
|
148
|
+
assert_equal(Scrubyt::XPathUtils.generate_relative_XPath(@r, @b), "/e[1]/n[3]/r[1]")
|
149
|
+
assert_equal(Scrubyt::XPathUtils.generate_relative_XPath(@q, @e), "/n[3]/q[1]")
|
150
150
|
|
151
151
|
assert_nil(Scrubyt::XPathUtils.generate_relative_XPath(@r, @n_2))
|
152
152
|
assert_nil(Scrubyt::XPathUtils.generate_relative_XPath(@q, @g))
|
metadata
CHANGED
@@ -3,19 +3,19 @@ rubygems_version: 0.9.0
|
|
3
3
|
specification_version: 1
|
4
4
|
name: scrubyt
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.1.
|
7
|
-
date: 2007-01-
|
6
|
+
version: 0.1.9
|
7
|
+
date: 2007-01-28 00:00:00 +01:00
|
8
8
|
summary: A powerful Web-scraping framework
|
9
9
|
require_paths:
|
10
10
|
- lib
|
11
11
|
email: peter@rubyrailways.com
|
12
|
-
homepage: http://www.scrubyt.
|
12
|
+
homepage: http://www.scrubyt.org
|
13
13
|
rubyforge_project:
|
14
14
|
description: scRUBYt! is an easy to learn and use, yet powerful and effective web scraping framework. It's most interesting part is a Web-scraping DSL built on HPricot and WWW::Mechanize, which allows to navigate to the page of interest, then extract and query data records with a few lines of code. It is hard to describe scRUBYt! in a few sentences - you have to see it for yourself!
|
15
15
|
autorequire:
|
16
16
|
default_executable:
|
17
17
|
bindir: bin
|
18
|
-
has_rdoc:
|
18
|
+
has_rdoc: "true"
|
19
19
|
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
20
|
requirements:
|
21
21
|
- - ">"
|
@@ -30,6 +30,8 @@ authors:
|
|
30
30
|
- Peter Szinek
|
31
31
|
files:
|
32
32
|
- README
|
33
|
+
- COPYING
|
34
|
+
- CHANGELOG
|
33
35
|
- Rakefile
|
34
36
|
- lib/scrubyt.rb
|
35
37
|
- lib/scrubyt/constraint_adder.rb
|
@@ -40,10 +42,11 @@ files:
|
|
40
42
|
- lib/scrubyt/filter.rb
|
41
43
|
- lib/scrubyt/pattern.rb
|
42
44
|
- lib/scrubyt/result.rb
|
45
|
+
- lib/scrubyt/post_processor.rb
|
43
46
|
- lib/scrubyt/xpathutils.rb
|
44
47
|
test_files:
|
45
|
-
- test/unittests/filter_test.rb
|
46
48
|
- test/unittests/input
|
49
|
+
- test/unittests/filter_test.rb
|
47
50
|
- test/unittests/extractor_test.rb
|
48
51
|
- test/unittests/xpathutils_test.rb
|
49
52
|
- test/unittests/constraint_test.rb
|