scrubyt 0.3.4 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +31 -0
- data/README +1 -1
- data/Rakefile +4 -9
- data/lib/scrubyt.rb +37 -56
- data/lib/scrubyt/core/navigation/agents/firewatir.rb +249 -0
- data/lib/scrubyt/core/navigation/agents/mechanize.rb +253 -0
- data/lib/scrubyt/core/navigation/fetch_action.rb +2 -183
- data/lib/scrubyt/core/navigation/navigation_actions.rb +30 -48
- data/lib/scrubyt/core/scraping/filters/attribute_filter.rb +0 -3
- data/lib/scrubyt/core/scraping/filters/base_filter.rb +6 -6
- data/lib/scrubyt/core/scraping/filters/constant_filter.rb +0 -3
- data/lib/scrubyt/core/scraping/filters/detail_page_filter.rb +6 -6
- data/lib/scrubyt/core/scraping/filters/download_filter.rb +0 -4
- data/lib/scrubyt/core/scraping/filters/html_subtree_filter.rb +0 -3
- data/lib/scrubyt/core/scraping/filters/regexp_filter.rb +0 -4
- data/lib/scrubyt/core/scraping/filters/script_filter.rb +0 -3
- data/lib/scrubyt/core/scraping/filters/text_filter.rb +1 -5
- data/lib/scrubyt/core/scraping/filters/tree_filter.rb +0 -8
- data/lib/scrubyt/core/scraping/pattern.rb +6 -27
- data/lib/scrubyt/core/scraping/result_indexer.rb +0 -4
- data/lib/scrubyt/core/shared/extractor.rb +15 -1
- data/lib/scrubyt/output/result_node.rb +42 -6
- data/lib/scrubyt/output/scrubyt_result.rb +35 -30
- data/lib/scrubyt/utils/ruby_extensions.rb +0 -53
- data/lib/scrubyt/utils/xpathutils.rb +2 -1
- metadata +84 -119
- data/lib/scrubyt/output/export.rb +0 -157
@@ -134,13 +134,5 @@ module Scrubyt
|
|
134
134
|
@xpath = XPathUtils.generate_relative_XPath_from_XPaths(parent_xpath, @xpath) if (@xpath =~ /^\/html/) #TODO: should not rely on <html> being the root node
|
135
135
|
end
|
136
136
|
|
137
|
-
def to_sexp
|
138
|
-
if @example =~ /.+\[@.+\]$/
|
139
|
-
[:str, "#{@xpath}/@#{@example.scan(/\[@(.+?)\]/)[0][0]}"]
|
140
|
-
else
|
141
|
-
[:str, @xpath]
|
142
|
-
end
|
143
|
-
end
|
144
|
-
|
145
137
|
end #End of class TreeFilter
|
146
138
|
end #End of module Scrubyt
|
@@ -35,6 +35,10 @@ module Scrubyt
|
|
35
35
|
|
36
36
|
VALID_PATTERN_TYPES = [:tree, :attribute, :regexp, :detail_page, :download, :html_subtree, :constant, :script, :text]
|
37
37
|
|
38
|
+
# :determine - default value, represent that type of example need determine
|
39
|
+
# :string - represent node with example type EXAMPLE_TYPE_STRING
|
40
|
+
VALID_PATTERN_EXAMPLE_TYPES = [:determine, :xpath]
|
41
|
+
|
38
42
|
#The pattern can be either a model pattern (in this case it is
|
39
43
|
#written to the output) or a temp pattern (in this case it is skipped)
|
40
44
|
#Will be implemented in a higher version (i.e. not 0.1.0) - for now, everything
|
@@ -59,7 +63,7 @@ module Scrubyt
|
|
59
63
|
|
60
64
|
option_reader(:type => :tree, :output_type => :model, :generalize => false,
|
61
65
|
:write_text => lambda { @children.size == 0 }, :limit => nil,
|
62
|
-
:default => nil, :resolve => :full, :except => nil, :example_type =>
|
66
|
+
:default => nil, :resolve => :full, :except => nil, :example_type => :determine)
|
63
67
|
|
64
68
|
def initialize(name, args=[], extractor=nil, parent=nil, &block)
|
65
69
|
#init attributes
|
@@ -305,32 +309,6 @@ module Scrubyt
|
|
305
309
|
end
|
306
310
|
end
|
307
311
|
|
308
|
-
def to_sexp
|
309
|
-
#collect arguments
|
310
|
-
args = []
|
311
|
-
args.push(*@filters.to_sexp_array) if type != :detail_page #TODO: this if shouldn't be there
|
312
|
-
args.push(@options.to_sexp) if !@options.empty?
|
313
|
-
|
314
|
-
#build main call
|
315
|
-
sexp = [:fcall, @name, [:array, *args]]
|
316
|
-
|
317
|
-
if type == :detail_page
|
318
|
-
#add detail page extractor
|
319
|
-
sexp = [:iter, sexp, nil, @filters[0].get_detail_sexp]
|
320
|
-
else
|
321
|
-
#add child block if the pattern has children
|
322
|
-
sexp = [:iter, sexp, nil, [:block, *@children.to_sexp_array ]] if !@children.empty?
|
323
|
-
end
|
324
|
-
|
325
|
-
#add modifier calls - TODO: remove when everything is exported to the options hash
|
326
|
-
@modifier_calls.each do |modifier_sexp|
|
327
|
-
sexp = [:call, sexp, *modifier_sexp]
|
328
|
-
end
|
329
|
-
|
330
|
-
#return complete sexp
|
331
|
-
sexp
|
332
|
-
end
|
333
|
-
|
334
312
|
private
|
335
313
|
def parse_options_hash(hash)
|
336
314
|
#merge provided hash
|
@@ -339,6 +317,7 @@ module Scrubyt
|
|
339
317
|
hash.each { |key, value| check_option(key.to_sym) }
|
340
318
|
raise "Invalid pattern type: #{type.to_s}" if VALID_PATTERN_TYPES.index(type.to_sym).nil?
|
341
319
|
raise "Invalid output type: #{output_type.to_s}" if VALID_OUTPUT_TYPES.index(output_type.to_sym).nil?
|
320
|
+
raise "Invalid example type: #{example_type.to_s}" if VALID_PATTERN_EXAMPLE_TYPES.index(example_type.to_sym).nil?
|
342
321
|
end
|
343
322
|
|
344
323
|
def check_option(option)
|
@@ -14,6 +14,21 @@ module Scrubyt
|
|
14
14
|
|
15
15
|
#The definition of the extractor is passed through this method
|
16
16
|
def self.define(mode=nil, &extractor_definition)
|
17
|
+
if mode.is_a?(Hash)
|
18
|
+
if mode[:agent]==:firefox
|
19
|
+
FetchAction.class_eval do
|
20
|
+
include Navigation::Firewatir
|
21
|
+
end
|
22
|
+
else
|
23
|
+
FetchAction.class_eval do
|
24
|
+
include Navigation::Mechanize
|
25
|
+
end
|
26
|
+
end
|
27
|
+
else
|
28
|
+
FetchAction.class_eval do
|
29
|
+
include Navigation::Mechanize
|
30
|
+
end
|
31
|
+
end
|
17
32
|
extractor = self.new(mode, extractor_definition)
|
18
33
|
extractor.result
|
19
34
|
end
|
@@ -117,7 +132,6 @@ module Scrubyt
|
|
117
132
|
catch :quit_next_page_loop do
|
118
133
|
loop do
|
119
134
|
url = get_current_doc_url #TODO need absolute address here 2/4
|
120
|
-
puts url
|
121
135
|
@processed_pages << url
|
122
136
|
@root_patterns.each do |root_pattern|
|
123
137
|
root_results.push(*root_pattern.evaluate(get_hpricot_doc, nil))
|
@@ -23,7 +23,11 @@ module Scrubyt
|
|
23
23
|
text = (@result.is_a? String) ? @result : @result.inner_html.gsub(/<.*?>/, '')
|
24
24
|
text = SharedUtils.unescape_entities(text)
|
25
25
|
text.strip!
|
26
|
-
text
|
26
|
+
if (@options[:default] && ((text == '') || (text == @options[:default])))
|
27
|
+
@options[:default]
|
28
|
+
else
|
29
|
+
text
|
30
|
+
end
|
27
31
|
end
|
28
32
|
|
29
33
|
def to_libxml
|
@@ -41,26 +45,54 @@ module Scrubyt
|
|
41
45
|
def to_hash(delimiter=',')
|
42
46
|
result = []
|
43
47
|
flat_hash_inner = lambda {|e, hash|
|
44
|
-
hash[e.name.to_sym] = hash[e.name.to_sym] ? hash[e.name.to_sym] + delimiter + e.to_s : e.to_s if e.write_text && !e.to_s.empty?
|
48
|
+
hash[e.name.to_sym] = hash[e.name.to_sym] ? hash[e.name.to_sym] + delimiter + e.to_s : e.to_s if ((e.write_text && !e.to_s.empty?) || e.options[:default])
|
45
49
|
e.each {|c| flat_hash_inner.call(c, hash) }
|
46
50
|
hash
|
47
51
|
}
|
48
52
|
self.each {|e| result << flat_hash_inner.call(e, {}) }
|
49
53
|
result
|
50
54
|
end
|
55
|
+
|
56
|
+
def to_flat_hash()
|
57
|
+
hash_result = self.to_hash('@@@@@@')
|
58
|
+
merged_hash = hash_result.delete_at 0
|
59
|
+
hash_result.each do |hash|
|
60
|
+
merged_hash.keys.each do |key|
|
61
|
+
merged_hash[key] += "@@@@@@#{hash[key]}"
|
62
|
+
end
|
63
|
+
end
|
64
|
+
result_sets = merged_hash.values.map!{|x| x.split('@@@@@@')}.transpose
|
65
|
+
final_result = []
|
66
|
+
|
67
|
+
result_sets.each do |rs|
|
68
|
+
temp_result = {}
|
69
|
+
merged_hash.keys.each do |k|
|
70
|
+
temp_result[k] = rs[merged_hash.keys.index(k)]
|
71
|
+
end
|
72
|
+
final_result << temp_result
|
73
|
+
end
|
74
|
+
final_result
|
75
|
+
end
|
51
76
|
|
52
77
|
def to_flat_xml(delimiter=nil)
|
53
78
|
lines = []
|
54
79
|
hash_result = delimiter ? self.to_hash(delimiter) : self.to_hash
|
80
|
+
merged_hash = hash_result.delete_at 0
|
81
|
+
|
82
|
+
hash_result.each do |hash|
|
83
|
+
merged_hash.keys.each do |key|
|
84
|
+
merged_hash[key] += "#{delimiter}#{hash[key]}"
|
85
|
+
end
|
86
|
+
end
|
55
87
|
|
56
88
|
if delimiter
|
57
|
-
result_sets =
|
89
|
+
result_sets = merged_hash.values.map!{|x| x.split(delimiter)}.transpose
|
58
90
|
final_result = []
|
59
91
|
|
60
92
|
result_sets.each do |rs|
|
61
93
|
temp_result = {}
|
62
|
-
|
63
|
-
temp_result[k] = rs[
|
94
|
+
merged_hash.keys.each do |k|
|
95
|
+
temp_result[k] = rs[merged_hash.keys.index(k)]
|
64
96
|
end
|
65
97
|
final_result << temp_result
|
66
98
|
end
|
@@ -89,7 +121,11 @@ module Scrubyt
|
|
89
121
|
elsif write_text && !to_s.empty?
|
90
122
|
lines << "<#{name}>#{ERB::Util.html_escape(to_s)}</#{name}>"
|
91
123
|
else
|
92
|
-
|
124
|
+
if @options[:default]
|
125
|
+
lines << "<#{name}>#{@options[:default]}</#{name}>"
|
126
|
+
else
|
127
|
+
lines << "<#{name}/>"
|
128
|
+
end
|
93
129
|
end
|
94
130
|
else
|
95
131
|
lines << "<#{name}>"
|
@@ -2,36 +2,41 @@ module Scrubyt
|
|
2
2
|
class ScrubytResult < ResultNode
|
3
3
|
attr_accessor :root_patterns, :source_file, :source_proc
|
4
4
|
|
5
|
-
def export
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
5
|
+
def export
|
6
|
+
#Temporary solution; the real one will be back later - or not
|
7
|
+
result = <<-EXPLANATION
|
8
|
+
|
9
|
+
=== Extractor tree ===
|
10
|
+
|
11
|
+
export() is not working at the moment, due to the removal or ParseTree, ruby2ruby and RubyInline.
|
12
|
+
For now, in case you are using examples, you can replace them by hand based on the output below.
|
13
|
+
So if your pattern in the learning extractor looks like
|
14
|
+
|
15
|
+
book "Ruby Cookbook"
|
16
|
+
|
17
|
+
and you see the following below:
|
18
|
+
|
19
|
+
[book] /table[1]/tr/td[2]
|
20
|
+
|
21
|
+
then replace "Ruby Cookbook" with "/table[1]/tr/td[2]" (and all the other XPaths) and you are ready!
|
22
|
+
|
23
|
+
EXPLANATION
|
24
|
+
|
25
|
+
tree_builder = lambda do |node, level|
|
26
|
+
result += current_level = (" " * (level == 0 ? 0 : level-1) +
|
27
|
+
"|\n" * (level == 0 ? 0 : 1) +
|
28
|
+
" " * (level == 0 ? 0 : level-1) +
|
29
|
+
"+-- " * (level == 0 ? 0 : 1) +
|
30
|
+
"[#{node.name}]")
|
31
|
+
result += " #{node.filters[0].xpath}" if node.type == :tree
|
32
|
+
result += "\n"
|
33
|
+
|
34
|
+
node.children.each {|c| tree_builder[c, level+1]}
|
35
|
+
end
|
36
|
+
|
37
|
+
tree_builder[root_patterns[0],0]
|
38
|
+
|
39
|
+
result += "\n"
|
35
40
|
end
|
36
41
|
end
|
37
42
|
end
|
@@ -65,57 +65,4 @@ class String
|
|
65
65
|
def write(stringio, add_indent=0)
|
66
66
|
stringio.write((self.split("\n").collect { |line| (' ' * add_indent) + line }).join("\n"))
|
67
67
|
end
|
68
|
-
end
|
69
|
-
|
70
|
-
class Array
|
71
|
-
def to_sexp
|
72
|
-
[:array, *to_sexp_array]
|
73
|
-
end
|
74
|
-
|
75
|
-
def to_sexp_array
|
76
|
-
collect { |element| element.to_sexp }
|
77
|
-
end
|
78
|
-
end
|
79
|
-
|
80
|
-
class Hash
|
81
|
-
def to_sexp
|
82
|
-
[:hash, *to_sexp_array]
|
83
|
-
end
|
84
|
-
|
85
|
-
def to_sexp_array
|
86
|
-
sexp = []
|
87
|
-
each { |key, value| sexp.push(key.to_sexp, value.to_sexp) }
|
88
|
-
sexp
|
89
|
-
end
|
90
|
-
end
|
91
|
-
|
92
|
-
class Symbol
|
93
|
-
def to_sexp
|
94
|
-
[:lit, self]
|
95
|
-
end
|
96
|
-
end
|
97
|
-
|
98
|
-
class String
|
99
|
-
def to_sexp
|
100
|
-
[:str, self]
|
101
|
-
end
|
102
|
-
end
|
103
|
-
|
104
|
-
class TrueClass
|
105
|
-
def to_sexp
|
106
|
-
[:true]
|
107
|
-
end
|
108
|
-
end
|
109
|
-
|
110
|
-
class FalseClass
|
111
|
-
def to_sexp
|
112
|
-
[:false]
|
113
|
-
end
|
114
|
-
end
|
115
|
-
|
116
|
-
class Proc
|
117
|
-
alias_method :parse_tree_to_sexp, :to_sexp
|
118
|
-
def to_sexp
|
119
|
-
[:iter, [:fcall, :lambda], nil, parse_tree_to_sexp[1] ]
|
120
|
-
end
|
121
68
|
end
|
@@ -107,7 +107,8 @@ module Scrubyt
|
|
107
107
|
#find the <form> node which is the parent of the <input> node
|
108
108
|
def self.traverse_up_until_name(node, name)
|
109
109
|
while node.class != Hpricot::Doc do
|
110
|
-
raise "The element is nil! This probably means the widget with the specified name ('#{name}') does not exist" unless node
|
110
|
+
#raise "The element is nil! This probably means the widget with the specified name ('#{name}') does not exist" unless node
|
111
|
+
return nil unless node
|
111
112
|
break if node.name == name
|
112
113
|
node = node.parent
|
113
114
|
end
|
metadata
CHANGED
@@ -1,87 +1,22 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.9.0
|
3
|
-
specification_version: 1
|
4
2
|
name: scrubyt
|
5
3
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.
|
7
|
-
date: 2007-09-26 00:00:00 +02:00
|
8
|
-
summary: A powerful Web-scraping framework built on Mechanize and Hpricot
|
9
|
-
require_paths:
|
10
|
-
- lib
|
11
|
-
email: peter@rubyrailways.com
|
12
|
-
homepage: http://www.scrubyt.org
|
13
|
-
rubyforge_project:
|
14
|
-
description: scRUBYt! is an easy to learn and use, yet powerful and effective web scraping framework. It's most interesting part is a Web-scraping DSL built on HPricot and WWW::Mechanize, which allows to navigate to the page of interest, then extract and query data records with a few lines of code. It is hard to describe scRUBYt! in a few sentences - you have to see it for yourself!
|
15
|
-
autorequire:
|
16
|
-
default_executable:
|
17
|
-
bindir: bin
|
18
|
-
has_rdoc: false
|
19
|
-
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
-
requirements:
|
21
|
-
- - ">"
|
22
|
-
- !ruby/object:Gem::Version
|
23
|
-
version: 0.0.0
|
24
|
-
version:
|
4
|
+
version: 0.4.1
|
25
5
|
platform: ruby
|
26
|
-
signing_key:
|
27
|
-
cert_chain:
|
28
|
-
post_install_message:
|
29
6
|
authors:
|
30
7
|
- Peter Szinek
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
- CHANGELOG
|
35
|
-
- Rakefile
|
36
|
-
- lib/scrubyt.rb
|
37
|
-
- lib/scrubyt/logging.rb
|
38
|
-
- lib/scrubyt/output/result_dumper.rb
|
39
|
-
- lib/scrubyt/output/result.rb
|
40
|
-
- lib/scrubyt/output/export.rb
|
41
|
-
- lib/scrubyt/output/post_processor.rb
|
42
|
-
- lib/scrubyt/output/result_node.rb
|
43
|
-
- lib/scrubyt/output/scrubyt_result.rb
|
44
|
-
- lib/scrubyt/utils/compound_example_lookup.rb
|
45
|
-
- lib/scrubyt/utils/simple_example_lookup.rb
|
46
|
-
- lib/scrubyt/utils/ruby_extensions.rb
|
47
|
-
- lib/scrubyt/utils/xpathutils.rb
|
48
|
-
- lib/scrubyt/utils/shared_utils.rb
|
49
|
-
- lib/scrubyt/core/navigation/navigation_actions.rb
|
50
|
-
- lib/scrubyt/core/navigation/fetch_action.rb
|
51
|
-
- lib/scrubyt/core/scraping/constraint.rb
|
52
|
-
- lib/scrubyt/core/scraping/pattern.rb
|
53
|
-
- lib/scrubyt/core/scraping/pre_filter_document.rb
|
54
|
-
- lib/scrubyt/core/scraping/compound_example.rb
|
55
|
-
- lib/scrubyt/core/scraping/constraint_adder.rb
|
56
|
-
- lib/scrubyt/core/scraping/result_indexer.rb
|
57
|
-
- lib/scrubyt/core/scraping/filters/attribute_filter.rb
|
58
|
-
- lib/scrubyt/core/scraping/filters/base_filter.rb
|
59
|
-
- lib/scrubyt/core/scraping/filters/regexp_filter.rb
|
60
|
-
- lib/scrubyt/core/scraping/filters/tree_filter.rb
|
61
|
-
- lib/scrubyt/core/scraping/filters/html_subtree_filter.rb
|
62
|
-
- lib/scrubyt/core/scraping/filters/detail_page_filter.rb
|
63
|
-
- lib/scrubyt/core/scraping/filters/download_filter.rb
|
64
|
-
- lib/scrubyt/core/scraping/filters/text_filter.rb
|
65
|
-
- lib/scrubyt/core/scraping/filters/constant_filter.rb
|
66
|
-
- lib/scrubyt/core/scraping/filters/script_filter.rb
|
67
|
-
- lib/scrubyt/core/shared/extractor.rb
|
68
|
-
test_files: []
|
69
|
-
|
70
|
-
rdoc_options: []
|
71
|
-
|
72
|
-
extra_rdoc_files: []
|
73
|
-
|
74
|
-
executables: []
|
75
|
-
|
76
|
-
extensions: []
|
77
|
-
|
78
|
-
requirements: []
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
79
11
|
|
12
|
+
date: 2008-12-10 00:00:00 +01:00
|
13
|
+
default_executable:
|
80
14
|
dependencies:
|
81
15
|
- !ruby/object:Gem::Dependency
|
82
16
|
name: hpricot
|
17
|
+
type: :runtime
|
83
18
|
version_requirement:
|
84
|
-
version_requirements: !ruby/object:Gem::
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
85
20
|
requirements:
|
86
21
|
- - ">="
|
87
22
|
- !ruby/object:Gem::Version
|
@@ -89,55 +24,85 @@ dependencies:
|
|
89
24
|
version:
|
90
25
|
- !ruby/object:Gem::Dependency
|
91
26
|
name: mechanize
|
27
|
+
type: :runtime
|
92
28
|
version_requirement:
|
93
|
-
version_requirements: !ruby/object:Gem::
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
94
30
|
requirements:
|
95
31
|
- - ">="
|
96
32
|
- !ruby/object:Gem::Version
|
97
33
|
version: 0.6.3
|
98
34
|
version:
|
99
|
-
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
35
|
+
description: scRUBYt! is an easy to learn and use, yet powerful and effective web scraping framework. It's most interesting part is a Web-scraping DSL built on HPricot and WWW::Mechanize, which allows to navigate to the page of interest, then extract and query data records with a few lines of code. It is hard to describe scRUBYt! in a few sentences - you have to see it for yourself!
|
36
|
+
email: peter@rubyrailways.com
|
37
|
+
executables: []
|
38
|
+
|
39
|
+
extensions: []
|
40
|
+
|
41
|
+
extra_rdoc_files: []
|
42
|
+
|
43
|
+
files:
|
44
|
+
- COPYING
|
45
|
+
- README
|
46
|
+
- CHANGELOG
|
47
|
+
- Rakefile
|
48
|
+
- lib/scrubyt/core/navigation/agents/firewatir.rb
|
49
|
+
- lib/scrubyt/core/navigation/agents/mechanize.rb
|
50
|
+
- lib/scrubyt/core/navigation/fetch_action.rb
|
51
|
+
- lib/scrubyt/core/navigation/navigation_actions.rb
|
52
|
+
- lib/scrubyt/core/scraping/compound_example.rb
|
53
|
+
- lib/scrubyt/core/scraping/constraint.rb
|
54
|
+
- lib/scrubyt/core/scraping/constraint_adder.rb
|
55
|
+
- lib/scrubyt/core/scraping/filters/attribute_filter.rb
|
56
|
+
- lib/scrubyt/core/scraping/filters/base_filter.rb
|
57
|
+
- lib/scrubyt/core/scraping/filters/constant_filter.rb
|
58
|
+
- lib/scrubyt/core/scraping/filters/detail_page_filter.rb
|
59
|
+
- lib/scrubyt/core/scraping/filters/download_filter.rb
|
60
|
+
- lib/scrubyt/core/scraping/filters/html_subtree_filter.rb
|
61
|
+
- lib/scrubyt/core/scraping/filters/regexp_filter.rb
|
62
|
+
- lib/scrubyt/core/scraping/filters/script_filter.rb
|
63
|
+
- lib/scrubyt/core/scraping/filters/text_filter.rb
|
64
|
+
- lib/scrubyt/core/scraping/filters/tree_filter.rb
|
65
|
+
- lib/scrubyt/core/scraping/pattern.rb
|
66
|
+
- lib/scrubyt/core/scraping/pre_filter_document.rb
|
67
|
+
- lib/scrubyt/core/scraping/result_indexer.rb
|
68
|
+
- lib/scrubyt/core/shared/extractor.rb
|
69
|
+
- lib/scrubyt/logging.rb
|
70
|
+
- lib/scrubyt/output/post_processor.rb
|
71
|
+
- lib/scrubyt/output/result.rb
|
72
|
+
- lib/scrubyt/output/result_dumper.rb
|
73
|
+
- lib/scrubyt/output/result_node.rb
|
74
|
+
- lib/scrubyt/output/scrubyt_result.rb
|
75
|
+
- lib/scrubyt/utils/compound_example_lookup.rb
|
76
|
+
- lib/scrubyt/utils/ruby_extensions.rb
|
77
|
+
- lib/scrubyt/utils/shared_utils.rb
|
78
|
+
- lib/scrubyt/utils/simple_example_lookup.rb
|
79
|
+
- lib/scrubyt/utils/xpathutils.rb
|
80
|
+
- lib/scrubyt.rb
|
81
|
+
has_rdoc: "true"
|
82
|
+
homepage: http://www.scrubyt.org
|
83
|
+
post_install_message:
|
84
|
+
rdoc_options: []
|
85
|
+
|
86
|
+
require_paths:
|
87
|
+
- lib
|
88
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
89
|
+
requirements:
|
90
|
+
- - ">="
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: "0"
|
93
|
+
version:
|
94
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
95
|
+
requirements:
|
96
|
+
- - ">="
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
version: "0"
|
99
|
+
version:
|
100
|
+
requirements: []
|
101
|
+
|
102
|
+
rubyforge_project:
|
103
|
+
rubygems_version: 1.3.1
|
104
|
+
signing_key:
|
105
|
+
specification_version: 2
|
106
|
+
summary: A powerful Web-scraping framework built on Mechanize and Hpricot (and FireWatir)
|
107
|
+
test_files: []
|
108
|
+
|