scrubyt 0.4.05 → 0.4.06
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +2 -2
- data/Rakefile +3 -1
- data/lib/scrubyt/core/navigation/agents/mechanize.rb +2 -1
- data/lib/scrubyt/core/scraping/pattern.rb +1 -0
- metadata +2 -2
data/CHANGELOG
CHANGED
@@ -4,8 +4,8 @@
|
|
4
4
|
== 14th November
|
5
5
|
|
6
6
|
=<tt>changes:</tt>
|
7
|
-
- [NEW] possibility to use FireWatir as the agent for scraping (credit:
|
8
|
-
- [FIX] navigation doesn't crash if a 404/500 is returned (credit:
|
7
|
+
- [NEW] possibility to use FireWatir as the agent for scraping (credit: Glenn Gillen)
|
8
|
+
- [FIX] navigation doesn't crash if a 404/500 is returned (credit: Glenn Gillen)
|
9
9
|
- [NEW] navigation actions: click_by_xpath, click_link_and_wait
|
10
10
|
- [MOD] dropped dependencies: RubyInline, ParseTree, Ruby2Ruby (hooray for win32 users)
|
11
11
|
- [MOD] exporting temporarily doesn't work - for now, generated XPaths are printed to the screen
|
data/Rakefile
CHANGED
@@ -17,7 +17,7 @@ task "cleanup_readme" => ["rdoc"]
|
|
17
17
|
|
18
18
|
gem_spec = Gem::Specification.new do |s|
|
19
19
|
s.name = 'scrubyt'
|
20
|
-
s.version = '0.4.
|
20
|
+
s.version = '0.4.06'
|
21
21
|
s.summary = 'A powerful Web-scraping framework built on Mechanize and Hpricot (and FireWatir)'
|
22
22
|
s.description = %{scRUBYt! is an easy to learn and use, yet powerful and effective web scraping framework. It's most interesting part is a Web-scraping DSL built on HPricot and WWW::Mechanize, which allows to navigate to the page of interest, then extract and query data records with a few lines of code. It is hard to describe scRUBYt! in a few sentences - you have to see it for yourself!}
|
23
23
|
# Files containing Test::Unit test cases.
|
@@ -94,8 +94,10 @@ Rake::GemPackageTask.new(gem_spec) do |pkg|
|
|
94
94
|
pkg.need_tar = false
|
95
95
|
end
|
96
96
|
|
97
|
+
=begin
|
97
98
|
Rake::PackageTask.new('scrubyt-examples', gem_spec.version) do |pkg|
|
98
99
|
pkg.need_zip = true
|
99
100
|
pkg.need_tar = true
|
100
101
|
pkg.package_files.include("examples/**/*")
|
101
102
|
end
|
103
|
+
=end
|
@@ -86,13 +86,14 @@ module Scrubyt
|
|
86
86
|
|
87
87
|
##
|
88
88
|
#Click the link specified by the text
|
89
|
-
def self.click_link(link_spec,index = 0)
|
89
|
+
def self.click_link(link_spec,index = 0,wait_secs=0)
|
90
90
|
Scrubyt.log :ACTION, "Clicking link specified by: %p" % link_spec
|
91
91
|
if link_spec.is_a? Hash
|
92
92
|
clicked_elem = CompoundExampleLookup.find_node_from_compund_example(@@hpricot_doc, link_spec, false, index)
|
93
93
|
else
|
94
94
|
clicked_elem = SimpleExampleLookup.find_node_from_text(@@hpricot_doc, link_spec, false, index)
|
95
95
|
end
|
96
|
+
sleep(wait_secs) if wait_secs > 0
|
96
97
|
clicked_elem = XPathUtils.find_nearest_node_with_attribute(clicked_elem, 'href')
|
97
98
|
result_page = @@agent.click(clicked_elem)
|
98
99
|
@@current_doc_url = result_page.uri.to_s
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scrubyt
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.06
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Peter Szinek
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2008-11-
|
12
|
+
date: 2008-11-24 00:00:00 +01:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|