proto 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/proto/scraper.rb +18 -25
- data/lib/proto/version.rb +1 -1
- metadata +1 -1
data/lib/proto/scraper.rb
CHANGED
@@ -3,7 +3,7 @@ module Proto
|
|
3
3
|
attr_accessor :url, :doc, :url_collection
|
4
4
|
|
5
5
|
def initialize(url)
|
6
|
-
@url = url.chomp '/'
|
6
|
+
@url = url.chomp '/' #remove trailing slash
|
7
7
|
@doc = Nokogiri::HTML(open(url))
|
8
8
|
end
|
9
9
|
|
@@ -15,45 +15,38 @@ module Proto
|
|
15
15
|
|
16
16
|
def fetch(name='Type', args)
|
17
17
|
if url_collection
|
18
|
-
attributes =
|
19
|
-
protos = create_return_objects(name, attributes)
|
20
|
-
return protos
|
18
|
+
attributes = scrape_multiple_pages(args)
|
21
19
|
else
|
22
|
-
attributes =
|
23
|
-
protos = create_return_objects(name, attributes)
|
24
|
-
return protos
|
20
|
+
attributes = scrape_single_page(args)
|
25
21
|
end
|
22
|
+
protos = create_return_objects(name, attributes)
|
23
|
+
return protos
|
26
24
|
end
|
27
25
|
alias_method :fetch_and_create!, :fetch
|
28
26
|
|
29
27
|
private
|
30
28
|
|
31
|
-
def
|
32
|
-
|
33
|
-
|
34
|
-
page = Nokogiri::HTML(open(url))
|
35
|
-
attrs_hash = gather_data(page, attributes)
|
36
|
-
hash_array << attrs_hash
|
29
|
+
def scrape_multiple_pages(attributes)
|
30
|
+
url_collection.each_with_object([]).map do |url, hash_array|
|
31
|
+
gather_data(url, attributes)
|
37
32
|
end
|
38
|
-
return hash_array
|
39
33
|
end
|
40
34
|
|
41
|
-
def gather_data(
|
42
|
-
|
35
|
+
def gather_data(url, attributes)
|
36
|
+
page = Nokogiri::HTML(open(url))
|
37
|
+
attributes.each_with_object({}) do |(key, selector), attrs|
|
43
38
|
attrs[key] = page.css(selector).text.strip
|
44
39
|
end
|
45
40
|
end
|
46
41
|
|
47
|
-
def
|
48
|
-
length_of_scrape =
|
42
|
+
def scrape_single_page(attributes)
|
43
|
+
length_of_scrape = doc.css(attributes.first[1]).count
|
49
44
|
|
50
|
-
|
51
|
-
attributes.inject(
|
52
|
-
hash.merge(attr_name =>
|
45
|
+
length_of_scrape.times.map do |index|
|
46
|
+
attributes.inject({}) do |hash, (attr_name, selector)|
|
47
|
+
hash.merge(attr_name => doc.css(selector)[index].text.strip) if doc.css(selector)[index]
|
53
48
|
end
|
54
|
-
end
|
55
|
-
|
56
|
-
final_array.compact
|
49
|
+
end.compact
|
57
50
|
end
|
58
51
|
|
59
52
|
def create_return_objects(name, attributes)
|
@@ -62,4 +55,4 @@ module Proto
|
|
62
55
|
attributes.map { |hash| Proto.const_get(name).new(hash) }
|
63
56
|
end
|
64
57
|
end
|
65
|
-
end
|
58
|
+
end
|
data/lib/proto/version.rb
CHANGED