scrubyt 0.4.05 → 0.4.06
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +2 -2
- data/Rakefile +3 -1
- data/lib/scrubyt/core/navigation/agents/mechanize.rb +2 -1
- data/lib/scrubyt/core/scraping/pattern.rb +1 -0
- metadata +2 -2
data/CHANGELOG
CHANGED
@@ -4,8 +4,8 @@
|
|
4
4
|
== 14th November
|
5
5
|
|
6
6
|
=<tt>changes:</tt>
|
7
|
-
- [NEW] possibility to use FireWatir as the agent for scraping (credit:
|
8
|
-
- [FIX] navigation doesn't crash if a 404/500 is returned (credit:
|
7
|
+
- [NEW] possibility to use FireWatir as the agent for scraping (credit: Glenn Gillen)
|
8
|
+
- [FIX] navigation doesn't crash if a 404/500 is returned (credit: Glenn Gillen)
|
9
9
|
- [NEW] navigation actions: click_by_xpath, click_link_and_wait
|
10
10
|
- [MOD] dropped dependencies: RubyInline, ParseTree, Ruby2Ruby (hooray for win32 users)
|
11
11
|
- [MOD] exporting temporarily doesn't work - for now, generated XPaths are printed to the screen
|
data/Rakefile
CHANGED
@@ -17,7 +17,7 @@ task "cleanup_readme" => ["rdoc"]
|
|
17
17
|
|
18
18
|
gem_spec = Gem::Specification.new do |s|
|
19
19
|
s.name = 'scrubyt'
|
20
|
-
s.version = '0.4.
|
20
|
+
s.version = '0.4.06'
|
21
21
|
s.summary = 'A powerful Web-scraping framework built on Mechanize and Hpricot (and FireWatir)'
|
22
22
|
s.description = %{scRUBYt! is an easy to learn and use, yet powerful and effective web scraping framework. It's most interesting part is a Web-scraping DSL built on HPricot and WWW::Mechanize, which allows to navigate to the page of interest, then extract and query data records with a few lines of code. It is hard to describe scRUBYt! in a few sentences - you have to see it for yourself!}
|
23
23
|
# Files containing Test::Unit test cases.
|
@@ -94,8 +94,10 @@ Rake::GemPackageTask.new(gem_spec) do |pkg|
|
|
94
94
|
pkg.need_tar = false
|
95
95
|
end
|
96
96
|
|
97
|
+
=begin
|
97
98
|
Rake::PackageTask.new('scrubyt-examples', gem_spec.version) do |pkg|
|
98
99
|
pkg.need_zip = true
|
99
100
|
pkg.need_tar = true
|
100
101
|
pkg.package_files.include("examples/**/*")
|
101
102
|
end
|
103
|
+
=end
|
@@ -86,13 +86,14 @@ module Scrubyt
|
|
86
86
|
|
87
87
|
##
|
88
88
|
#Click the link specified by the text
|
89
|
-
def self.click_link(link_spec,index = 0)
|
89
|
+
def self.click_link(link_spec,index = 0,wait_secs=0)
|
90
90
|
Scrubyt.log :ACTION, "Clicking link specified by: %p" % link_spec
|
91
91
|
if link_spec.is_a? Hash
|
92
92
|
clicked_elem = CompoundExampleLookup.find_node_from_compund_example(@@hpricot_doc, link_spec, false, index)
|
93
93
|
else
|
94
94
|
clicked_elem = SimpleExampleLookup.find_node_from_text(@@hpricot_doc, link_spec, false, index)
|
95
95
|
end
|
96
|
+
sleep(wait_secs) if wait_secs > 0
|
96
97
|
clicked_elem = XPathUtils.find_nearest_node_with_attribute(clicked_elem, 'href')
|
97
98
|
result_page = @@agent.click(clicked_elem)
|
98
99
|
@@current_doc_url = result_page.uri.to_s
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scrubyt
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.06
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Peter Szinek
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2008-11-
|
12
|
+
date: 2008-11-24 00:00:00 +01:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|