scrubyt 0.3.4 → 0.4.1
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +31 -0
- data/README +1 -1
- data/Rakefile +4 -9
- data/lib/scrubyt.rb +37 -56
- data/lib/scrubyt/core/navigation/agents/firewatir.rb +249 -0
- data/lib/scrubyt/core/navigation/agents/mechanize.rb +253 -0
- data/lib/scrubyt/core/navigation/fetch_action.rb +2 -183
- data/lib/scrubyt/core/navigation/navigation_actions.rb +30 -48
- data/lib/scrubyt/core/scraping/filters/attribute_filter.rb +0 -3
- data/lib/scrubyt/core/scraping/filters/base_filter.rb +6 -6
- data/lib/scrubyt/core/scraping/filters/constant_filter.rb +0 -3
- data/lib/scrubyt/core/scraping/filters/detail_page_filter.rb +6 -6
- data/lib/scrubyt/core/scraping/filters/download_filter.rb +0 -4
- data/lib/scrubyt/core/scraping/filters/html_subtree_filter.rb +0 -3
- data/lib/scrubyt/core/scraping/filters/regexp_filter.rb +0 -4
- data/lib/scrubyt/core/scraping/filters/script_filter.rb +0 -3
- data/lib/scrubyt/core/scraping/filters/text_filter.rb +1 -5
- data/lib/scrubyt/core/scraping/filters/tree_filter.rb +0 -8
- data/lib/scrubyt/core/scraping/pattern.rb +6 -27
- data/lib/scrubyt/core/scraping/result_indexer.rb +0 -4
- data/lib/scrubyt/core/shared/extractor.rb +15 -1
- data/lib/scrubyt/output/result_node.rb +42 -6
- data/lib/scrubyt/output/scrubyt_result.rb +35 -30
- data/lib/scrubyt/utils/ruby_extensions.rb +0 -53
- data/lib/scrubyt/utils/xpathutils.rb +2 -1
- metadata +84 -119
- data/lib/scrubyt/output/export.rb +0 -157
@@ -134,13 +134,5 @@ module Scrubyt
|
|
134
134
|
@xpath = XPathUtils.generate_relative_XPath_from_XPaths(parent_xpath, @xpath) if (@xpath =~ /^\/html/) #TODO: should not rely on <html> being the root node
|
135
135
|
end
|
136
136
|
|
137
|
-
def to_sexp
|
138
|
-
if @example =~ /.+\[@.+\]$/
|
139
|
-
[:str, "#{@xpath}/@#{@example.scan(/\[@(.+?)\]/)[0][0]}"]
|
140
|
-
else
|
141
|
-
[:str, @xpath]
|
142
|
-
end
|
143
|
-
end
|
144
|
-
|
145
137
|
end #End of class TreeFilter
|
146
138
|
end #End of module Scrubyt
|
@@ -35,6 +35,10 @@ module Scrubyt
|
|
35
35
|
|
36
36
|
VALID_PATTERN_TYPES = [:tree, :attribute, :regexp, :detail_page, :download, :html_subtree, :constant, :script, :text]
|
37
37
|
|
38
|
+
# :determine - default value, represent that type of example need determine
|
39
|
+
# :string - represent node with example type EXAMPLE_TYPE_STRING
|
40
|
+
VALID_PATTERN_EXAMPLE_TYPES = [:determine, :xpath]
|
41
|
+
|
38
42
|
#The pattern can be either a model pattern (in this case it is
|
39
43
|
#written to the output) or a temp pattern (in this case it is skipped)
|
40
44
|
#Will be implemented in a higher version (i.e. not 0.1.0) - for now, everything
|
@@ -59,7 +63,7 @@ module Scrubyt
|
|
59
63
|
|
60
64
|
option_reader(:type => :tree, :output_type => :model, :generalize => false,
|
61
65
|
:write_text => lambda { @children.size == 0 }, :limit => nil,
|
62
|
-
:default => nil, :resolve => :full, :except => nil, :example_type =>
|
66
|
+
:default => nil, :resolve => :full, :except => nil, :example_type => :determine)
|
63
67
|
|
64
68
|
def initialize(name, args=[], extractor=nil, parent=nil, &block)
|
65
69
|
#init attributes
|
@@ -305,32 +309,6 @@ module Scrubyt
|
|
305
309
|
end
|
306
310
|
end
|
307
311
|
|
308
|
-
def to_sexp
|
309
|
-
#collect arguments
|
310
|
-
args = []
|
311
|
-
args.push(*@filters.to_sexp_array) if type != :detail_page #TODO: this if shouldn't be there
|
312
|
-
args.push(@options.to_sexp) if !@options.empty?
|
313
|
-
|
314
|
-
#build main call
|
315
|
-
sexp = [:fcall, @name, [:array, *args]]
|
316
|
-
|
317
|
-
if type == :detail_page
|
318
|
-
#add detail page extractor
|
319
|
-
sexp = [:iter, sexp, nil, @filters[0].get_detail_sexp]
|
320
|
-
else
|
321
|
-
#add child block if the pattern has children
|
322
|
-
sexp = [:iter, sexp, nil, [:block, *@children.to_sexp_array ]] if !@children.empty?
|
323
|
-
end
|
324
|
-
|
325
|
-
#add modifier calls - TODO: remove when everything is exported to the options hash
|
326
|
-
@modifier_calls.each do |modifier_sexp|
|
327
|
-
sexp = [:call, sexp, *modifier_sexp]
|
328
|
-
end
|
329
|
-
|
330
|
-
#return complete sexp
|
331
|
-
sexp
|
332
|
-
end
|
333
|
-
|
334
312
|
private
|
335
313
|
def parse_options_hash(hash)
|
336
314
|
#merge provided hash
|
@@ -339,6 +317,7 @@ module Scrubyt
|
|
339
317
|
hash.each { |key, value| check_option(key.to_sym) }
|
340
318
|
raise "Invalid pattern type: #{type.to_s}" if VALID_PATTERN_TYPES.index(type.to_sym).nil?
|
341
319
|
raise "Invalid output type: #{output_type.to_s}" if VALID_OUTPUT_TYPES.index(output_type.to_sym).nil?
|
320
|
+
raise "Invalid example type: #{example_type.to_s}" if VALID_PATTERN_EXAMPLE_TYPES.index(example_type.to_sym).nil?
|
342
321
|
end
|
343
322
|
|
344
323
|
def check_option(option)
|
@@ -14,6 +14,21 @@ module Scrubyt
|
|
14
14
|
|
15
15
|
#The definition of the extractor is passed through this method
|
16
16
|
def self.define(mode=nil, &extractor_definition)
|
17
|
+
if mode.is_a?(Hash)
|
18
|
+
if mode[:agent]==:firefox
|
19
|
+
FetchAction.class_eval do
|
20
|
+
include Navigation::Firewatir
|
21
|
+
end
|
22
|
+
else
|
23
|
+
FetchAction.class_eval do
|
24
|
+
include Navigation::Mechanize
|
25
|
+
end
|
26
|
+
end
|
27
|
+
else
|
28
|
+
FetchAction.class_eval do
|
29
|
+
include Navigation::Mechanize
|
30
|
+
end
|
31
|
+
end
|
17
32
|
extractor = self.new(mode, extractor_definition)
|
18
33
|
extractor.result
|
19
34
|
end
|
@@ -117,7 +132,6 @@ module Scrubyt
|
|
117
132
|
catch :quit_next_page_loop do
|
118
133
|
loop do
|
119
134
|
url = get_current_doc_url #TODO need absolute address here 2/4
|
120
|
-
puts url
|
121
135
|
@processed_pages << url
|
122
136
|
@root_patterns.each do |root_pattern|
|
123
137
|
root_results.push(*root_pattern.evaluate(get_hpricot_doc, nil))
|
@@ -23,7 +23,11 @@ module Scrubyt
|
|
23
23
|
text = (@result.is_a? String) ? @result : @result.inner_html.gsub(/<.*?>/, '')
|
24
24
|
text = SharedUtils.unescape_entities(text)
|
25
25
|
text.strip!
|
26
|
-
text
|
26
|
+
if (@options[:default] && ((text == '') || (text == @options[:default])))
|
27
|
+
@options[:default]
|
28
|
+
else
|
29
|
+
text
|
30
|
+
end
|
27
31
|
end
|
28
32
|
|
29
33
|
def to_libxml
|
@@ -41,26 +45,54 @@ module Scrubyt
|
|
41
45
|
def to_hash(delimiter=',')
|
42
46
|
result = []
|
43
47
|
flat_hash_inner = lambda {|e, hash|
|
44
|
-
hash[e.name.to_sym] = hash[e.name.to_sym] ? hash[e.name.to_sym] + delimiter + e.to_s : e.to_s if e.write_text && !e.to_s.empty?
|
48
|
+
hash[e.name.to_sym] = hash[e.name.to_sym] ? hash[e.name.to_sym] + delimiter + e.to_s : e.to_s if ((e.write_text && !e.to_s.empty?) || e.options[:default])
|
45
49
|
e.each {|c| flat_hash_inner.call(c, hash) }
|
46
50
|
hash
|
47
51
|
}
|
48
52
|
self.each {|e| result << flat_hash_inner.call(e, {}) }
|
49
53
|
result
|
50
54
|
end
|
55
|
+
|
56
|
+
def to_flat_hash()
|
57
|
+
hash_result = self.to_hash('@@@@@@')
|
58
|
+
merged_hash = hash_result.delete_at 0
|
59
|
+
hash_result.each do |hash|
|
60
|
+
merged_hash.keys.each do |key|
|
61
|
+
merged_hash[key] += "@@@@@@#{hash[key]}"
|
62
|
+
end
|
63
|
+
end
|
64
|
+
result_sets = merged_hash.values.map!{|x| x.split('@@@@@@')}.transpose
|
65
|
+
final_result = []
|
66
|
+
|
67
|
+
result_sets.each do |rs|
|
68
|
+
temp_result = {}
|
69
|
+
merged_hash.keys.each do |k|
|
70
|
+
temp_result[k] = rs[merged_hash.keys.index(k)]
|
71
|
+
end
|
72
|
+
final_result << temp_result
|
73
|
+
end
|
74
|
+
final_result
|
75
|
+
end
|
51
76
|
|
52
77
|
def to_flat_xml(delimiter=nil)
|
53
78
|
lines = []
|
54
79
|
hash_result = delimiter ? self.to_hash(delimiter) : self.to_hash
|
80
|
+
merged_hash = hash_result.delete_at 0
|
81
|
+
|
82
|
+
hash_result.each do |hash|
|
83
|
+
merged_hash.keys.each do |key|
|
84
|
+
merged_hash[key] += "#{delimiter}#{hash[key]}"
|
85
|
+
end
|
86
|
+
end
|
55
87
|
|
56
88
|
if delimiter
|
57
|
-
result_sets =
|
89
|
+
result_sets = merged_hash.values.map!{|x| x.split(delimiter)}.transpose
|
58
90
|
final_result = []
|
59
91
|
|
60
92
|
result_sets.each do |rs|
|
61
93
|
temp_result = {}
|
62
|
-
|
63
|
-
temp_result[k] = rs[
|
94
|
+
merged_hash.keys.each do |k|
|
95
|
+
temp_result[k] = rs[merged_hash.keys.index(k)]
|
64
96
|
end
|
65
97
|
final_result << temp_result
|
66
98
|
end
|
@@ -89,7 +121,11 @@ module Scrubyt
|
|
89
121
|
elsif write_text && !to_s.empty?
|
90
122
|
lines << "<#{name}>#{ERB::Util.html_escape(to_s)}</#{name}>"
|
91
123
|
else
|
92
|
-
|
124
|
+
if @options[:default]
|
125
|
+
lines << "<#{name}>#{@options[:default]}</#{name}>"
|
126
|
+
else
|
127
|
+
lines << "<#{name}/>"
|
128
|
+
end
|
93
129
|
end
|
94
130
|
else
|
95
131
|
lines << "<#{name}>"
|
@@ -2,36 +2,41 @@ module Scrubyt
|
|
2
2
|
class ScrubytResult < ResultNode
|
3
3
|
attr_accessor :root_patterns, :source_file, :source_proc
|
4
4
|
|
5
|
-
def export
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
5
|
+
def export
|
6
|
+
#Temporary solution; the real one will be back later - or not
|
7
|
+
result = <<-EXPLANATION
|
8
|
+
|
9
|
+
=== Extractor tree ===
|
10
|
+
|
11
|
+
export() is not working at the moment, due to the removal or ParseTree, ruby2ruby and RubyInline.
|
12
|
+
For now, in case you are using examples, you can replace them by hand based on the output below.
|
13
|
+
So if your pattern in the learning extractor looks like
|
14
|
+
|
15
|
+
book "Ruby Cookbook"
|
16
|
+
|
17
|
+
and you see the following below:
|
18
|
+
|
19
|
+
[book] /table[1]/tr/td[2]
|
20
|
+
|
21
|
+
then replace "Ruby Cookbook" with "/table[1]/tr/td[2]" (and all the other XPaths) and you are ready!
|
22
|
+
|
23
|
+
EXPLANATION
|
24
|
+
|
25
|
+
tree_builder = lambda do |node, level|
|
26
|
+
result += current_level = (" " * (level == 0 ? 0 : level-1) +
|
27
|
+
"|\n" * (level == 0 ? 0 : 1) +
|
28
|
+
" " * (level == 0 ? 0 : level-1) +
|
29
|
+
"+-- " * (level == 0 ? 0 : 1) +
|
30
|
+
"[#{node.name}]")
|
31
|
+
result += " #{node.filters[0].xpath}" if node.type == :tree
|
32
|
+
result += "\n"
|
33
|
+
|
34
|
+
node.children.each {|c| tree_builder[c, level+1]}
|
35
|
+
end
|
36
|
+
|
37
|
+
tree_builder[root_patterns[0],0]
|
38
|
+
|
39
|
+
result += "\n"
|
35
40
|
end
|
36
41
|
end
|
37
42
|
end
|
@@ -65,57 +65,4 @@ class String
|
|
65
65
|
def write(stringio, add_indent=0)
|
66
66
|
stringio.write((self.split("\n").collect { |line| (' ' * add_indent) + line }).join("\n"))
|
67
67
|
end
|
68
|
-
end
|
69
|
-
|
70
|
-
class Array
|
71
|
-
def to_sexp
|
72
|
-
[:array, *to_sexp_array]
|
73
|
-
end
|
74
|
-
|
75
|
-
def to_sexp_array
|
76
|
-
collect { |element| element.to_sexp }
|
77
|
-
end
|
78
|
-
end
|
79
|
-
|
80
|
-
class Hash
|
81
|
-
def to_sexp
|
82
|
-
[:hash, *to_sexp_array]
|
83
|
-
end
|
84
|
-
|
85
|
-
def to_sexp_array
|
86
|
-
sexp = []
|
87
|
-
each { |key, value| sexp.push(key.to_sexp, value.to_sexp) }
|
88
|
-
sexp
|
89
|
-
end
|
90
|
-
end
|
91
|
-
|
92
|
-
class Symbol
|
93
|
-
def to_sexp
|
94
|
-
[:lit, self]
|
95
|
-
end
|
96
|
-
end
|
97
|
-
|
98
|
-
class String
|
99
|
-
def to_sexp
|
100
|
-
[:str, self]
|
101
|
-
end
|
102
|
-
end
|
103
|
-
|
104
|
-
class TrueClass
|
105
|
-
def to_sexp
|
106
|
-
[:true]
|
107
|
-
end
|
108
|
-
end
|
109
|
-
|
110
|
-
class FalseClass
|
111
|
-
def to_sexp
|
112
|
-
[:false]
|
113
|
-
end
|
114
|
-
end
|
115
|
-
|
116
|
-
class Proc
|
117
|
-
alias_method :parse_tree_to_sexp, :to_sexp
|
118
|
-
def to_sexp
|
119
|
-
[:iter, [:fcall, :lambda], nil, parse_tree_to_sexp[1] ]
|
120
|
-
end
|
121
68
|
end
|
@@ -107,7 +107,8 @@ module Scrubyt
|
|
107
107
|
#find the <form> node which is the parent of the <input> node
|
108
108
|
def self.traverse_up_until_name(node, name)
|
109
109
|
while node.class != Hpricot::Doc do
|
110
|
-
raise "The element is nil! This probably means the widget with the specified name ('#{name}') does not exist" unless node
|
110
|
+
#raise "The element is nil! This probably means the widget with the specified name ('#{name}') does not exist" unless node
|
111
|
+
return nil unless node
|
111
112
|
break if node.name == name
|
112
113
|
node = node.parent
|
113
114
|
end
|
metadata
CHANGED
@@ -1,87 +1,22 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.9.0
|
3
|
-
specification_version: 1
|
4
2
|
name: scrubyt
|
5
3
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.
|
7
|
-
date: 2007-09-26 00:00:00 +02:00
|
8
|
-
summary: A powerful Web-scraping framework built on Mechanize and Hpricot
|
9
|
-
require_paths:
|
10
|
-
- lib
|
11
|
-
email: peter@rubyrailways.com
|
12
|
-
homepage: http://www.scrubyt.org
|
13
|
-
rubyforge_project:
|
14
|
-
description: scRUBYt! is an easy to learn and use, yet powerful and effective web scraping framework. It's most interesting part is a Web-scraping DSL built on HPricot and WWW::Mechanize, which allows to navigate to the page of interest, then extract and query data records with a few lines of code. It is hard to describe scRUBYt! in a few sentences - you have to see it for yourself!
|
15
|
-
autorequire:
|
16
|
-
default_executable:
|
17
|
-
bindir: bin
|
18
|
-
has_rdoc: false
|
19
|
-
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
-
requirements:
|
21
|
-
- - ">"
|
22
|
-
- !ruby/object:Gem::Version
|
23
|
-
version: 0.0.0
|
24
|
-
version:
|
4
|
+
version: 0.4.1
|
25
5
|
platform: ruby
|
26
|
-
signing_key:
|
27
|
-
cert_chain:
|
28
|
-
post_install_message:
|
29
6
|
authors:
|
30
7
|
- Peter Szinek
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
- CHANGELOG
|
35
|
-
- Rakefile
|
36
|
-
- lib/scrubyt.rb
|
37
|
-
- lib/scrubyt/logging.rb
|
38
|
-
- lib/scrubyt/output/result_dumper.rb
|
39
|
-
- lib/scrubyt/output/result.rb
|
40
|
-
- lib/scrubyt/output/export.rb
|
41
|
-
- lib/scrubyt/output/post_processor.rb
|
42
|
-
- lib/scrubyt/output/result_node.rb
|
43
|
-
- lib/scrubyt/output/scrubyt_result.rb
|
44
|
-
- lib/scrubyt/utils/compound_example_lookup.rb
|
45
|
-
- lib/scrubyt/utils/simple_example_lookup.rb
|
46
|
-
- lib/scrubyt/utils/ruby_extensions.rb
|
47
|
-
- lib/scrubyt/utils/xpathutils.rb
|
48
|
-
- lib/scrubyt/utils/shared_utils.rb
|
49
|
-
- lib/scrubyt/core/navigation/navigation_actions.rb
|
50
|
-
- lib/scrubyt/core/navigation/fetch_action.rb
|
51
|
-
- lib/scrubyt/core/scraping/constraint.rb
|
52
|
-
- lib/scrubyt/core/scraping/pattern.rb
|
53
|
-
- lib/scrubyt/core/scraping/pre_filter_document.rb
|
54
|
-
- lib/scrubyt/core/scraping/compound_example.rb
|
55
|
-
- lib/scrubyt/core/scraping/constraint_adder.rb
|
56
|
-
- lib/scrubyt/core/scraping/result_indexer.rb
|
57
|
-
- lib/scrubyt/core/scraping/filters/attribute_filter.rb
|
58
|
-
- lib/scrubyt/core/scraping/filters/base_filter.rb
|
59
|
-
- lib/scrubyt/core/scraping/filters/regexp_filter.rb
|
60
|
-
- lib/scrubyt/core/scraping/filters/tree_filter.rb
|
61
|
-
- lib/scrubyt/core/scraping/filters/html_subtree_filter.rb
|
62
|
-
- lib/scrubyt/core/scraping/filters/detail_page_filter.rb
|
63
|
-
- lib/scrubyt/core/scraping/filters/download_filter.rb
|
64
|
-
- lib/scrubyt/core/scraping/filters/text_filter.rb
|
65
|
-
- lib/scrubyt/core/scraping/filters/constant_filter.rb
|
66
|
-
- lib/scrubyt/core/scraping/filters/script_filter.rb
|
67
|
-
- lib/scrubyt/core/shared/extractor.rb
|
68
|
-
test_files: []
|
69
|
-
|
70
|
-
rdoc_options: []
|
71
|
-
|
72
|
-
extra_rdoc_files: []
|
73
|
-
|
74
|
-
executables: []
|
75
|
-
|
76
|
-
extensions: []
|
77
|
-
|
78
|
-
requirements: []
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
79
11
|
|
12
|
+
date: 2008-12-10 00:00:00 +01:00
|
13
|
+
default_executable:
|
80
14
|
dependencies:
|
81
15
|
- !ruby/object:Gem::Dependency
|
82
16
|
name: hpricot
|
17
|
+
type: :runtime
|
83
18
|
version_requirement:
|
84
|
-
version_requirements: !ruby/object:Gem::
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
85
20
|
requirements:
|
86
21
|
- - ">="
|
87
22
|
- !ruby/object:Gem::Version
|
@@ -89,55 +24,85 @@ dependencies:
|
|
89
24
|
version:
|
90
25
|
- !ruby/object:Gem::Dependency
|
91
26
|
name: mechanize
|
27
|
+
type: :runtime
|
92
28
|
version_requirement:
|
93
|
-
version_requirements: !ruby/object:Gem::
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
94
30
|
requirements:
|
95
31
|
- - ">="
|
96
32
|
- !ruby/object:Gem::Version
|
97
33
|
version: 0.6.3
|
98
34
|
version:
|
99
|
-
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
35
|
+
description: scRUBYt! is an easy to learn and use, yet powerful and effective web scraping framework. It's most interesting part is a Web-scraping DSL built on HPricot and WWW::Mechanize, which allows to navigate to the page of interest, then extract and query data records with a few lines of code. It is hard to describe scRUBYt! in a few sentences - you have to see it for yourself!
|
36
|
+
email: peter@rubyrailways.com
|
37
|
+
executables: []
|
38
|
+
|
39
|
+
extensions: []
|
40
|
+
|
41
|
+
extra_rdoc_files: []
|
42
|
+
|
43
|
+
files:
|
44
|
+
- COPYING
|
45
|
+
- README
|
46
|
+
- CHANGELOG
|
47
|
+
- Rakefile
|
48
|
+
- lib/scrubyt/core/navigation/agents/firewatir.rb
|
49
|
+
- lib/scrubyt/core/navigation/agents/mechanize.rb
|
50
|
+
- lib/scrubyt/core/navigation/fetch_action.rb
|
51
|
+
- lib/scrubyt/core/navigation/navigation_actions.rb
|
52
|
+
- lib/scrubyt/core/scraping/compound_example.rb
|
53
|
+
- lib/scrubyt/core/scraping/constraint.rb
|
54
|
+
- lib/scrubyt/core/scraping/constraint_adder.rb
|
55
|
+
- lib/scrubyt/core/scraping/filters/attribute_filter.rb
|
56
|
+
- lib/scrubyt/core/scraping/filters/base_filter.rb
|
57
|
+
- lib/scrubyt/core/scraping/filters/constant_filter.rb
|
58
|
+
- lib/scrubyt/core/scraping/filters/detail_page_filter.rb
|
59
|
+
- lib/scrubyt/core/scraping/filters/download_filter.rb
|
60
|
+
- lib/scrubyt/core/scraping/filters/html_subtree_filter.rb
|
61
|
+
- lib/scrubyt/core/scraping/filters/regexp_filter.rb
|
62
|
+
- lib/scrubyt/core/scraping/filters/script_filter.rb
|
63
|
+
- lib/scrubyt/core/scraping/filters/text_filter.rb
|
64
|
+
- lib/scrubyt/core/scraping/filters/tree_filter.rb
|
65
|
+
- lib/scrubyt/core/scraping/pattern.rb
|
66
|
+
- lib/scrubyt/core/scraping/pre_filter_document.rb
|
67
|
+
- lib/scrubyt/core/scraping/result_indexer.rb
|
68
|
+
- lib/scrubyt/core/shared/extractor.rb
|
69
|
+
- lib/scrubyt/logging.rb
|
70
|
+
- lib/scrubyt/output/post_processor.rb
|
71
|
+
- lib/scrubyt/output/result.rb
|
72
|
+
- lib/scrubyt/output/result_dumper.rb
|
73
|
+
- lib/scrubyt/output/result_node.rb
|
74
|
+
- lib/scrubyt/output/scrubyt_result.rb
|
75
|
+
- lib/scrubyt/utils/compound_example_lookup.rb
|
76
|
+
- lib/scrubyt/utils/ruby_extensions.rb
|
77
|
+
- lib/scrubyt/utils/shared_utils.rb
|
78
|
+
- lib/scrubyt/utils/simple_example_lookup.rb
|
79
|
+
- lib/scrubyt/utils/xpathutils.rb
|
80
|
+
- lib/scrubyt.rb
|
81
|
+
has_rdoc: "true"
|
82
|
+
homepage: http://www.scrubyt.org
|
83
|
+
post_install_message:
|
84
|
+
rdoc_options: []
|
85
|
+
|
86
|
+
require_paths:
|
87
|
+
- lib
|
88
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
89
|
+
requirements:
|
90
|
+
- - ">="
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: "0"
|
93
|
+
version:
|
94
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
95
|
+
requirements:
|
96
|
+
- - ">="
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
version: "0"
|
99
|
+
version:
|
100
|
+
requirements: []
|
101
|
+
|
102
|
+
rubyforge_project:
|
103
|
+
rubygems_version: 1.3.1
|
104
|
+
signing_key:
|
105
|
+
specification_version: 2
|
106
|
+
summary: A powerful Web-scraping framework built on Mechanize and Hpricot (and FireWatir)
|
107
|
+
test_files: []
|
108
|
+
|