proto 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/lib/proto/scraper.rb +18 -25
  2. data/lib/proto/version.rb +1 -1
  3. metadata +1 -1
@@ -3,7 +3,7 @@ module Proto
3
3
  attr_accessor :url, :doc, :url_collection
4
4
 
5
5
  def initialize(url)
6
- @url = url.chomp '/'
6
+ @url = url.chomp '/' #remove trailing slash
7
7
  @doc = Nokogiri::HTML(open(url))
8
8
  end
9
9
 
@@ -15,45 +15,38 @@ module Proto
15
15
 
16
16
  def fetch(name='Type', args)
17
17
  if url_collection
18
- attributes = visit_urls_and_fetch(args)
19
- protos = create_return_objects(name, attributes)
20
- return protos
18
+ attributes = scrape_multiple_pages(args)
21
19
  else
22
- attributes = scrape_attribute_data(args)
23
- protos = create_return_objects(name, attributes)
24
- return protos
20
+ attributes = scrape_single_page(args)
25
21
  end
22
+ protos = create_return_objects(name, attributes)
23
+ return protos
26
24
  end
27
25
  alias_method :fetch_and_create!, :fetch
28
26
 
29
27
  private
30
28
 
31
- def visit_urls_and_fetch(attributes)
32
- hash_array = []
33
- final_array = url_collection.map do |url|
34
- page = Nokogiri::HTML(open(url))
35
- attrs_hash = gather_data(page, attributes)
36
- hash_array << attrs_hash
29
+ def scrape_multiple_pages(attributes)
30
+ url_collection.each_with_object([]).map do |url, hash_array|
31
+ gather_data(url, attributes)
37
32
  end
38
- return hash_array
39
33
  end
40
34
 
41
- def gather_data(page, attributes)
42
- job_hash = attributes.each_with_object({}) do |(key, selector), attrs|
35
+ def gather_data(url, attributes)
36
+ page = Nokogiri::HTML(open(url))
37
+ attributes.each_with_object({}) do |(key, selector), attrs|
43
38
  attrs[key] = page.css(selector).text.strip
44
39
  end
45
40
  end
46
41
 
47
- def scrape_attribute_data(document=self.doc, attributes)
48
- length_of_scrape = document.css(attributes.first[1]).count
42
+ def scrape_single_page(attributes)
43
+ length_of_scrape = doc.css(attributes.first[1]).count
49
44
 
50
- final_array = length_of_scrape.times.map do |index|
51
- attributes.inject(Hash.new) do |hash, (attr_name, selector)|
52
- hash.merge(attr_name => document.css(selector)[index].text.strip) if document.css(selector)[index]
45
+ length_of_scrape.times.map do |index|
46
+ attributes.inject({}) do |hash, (attr_name, selector)|
47
+ hash.merge(attr_name => doc.css(selector)[index].text.strip) if doc.css(selector)[index]
53
48
  end
54
- end
55
-
56
- final_array.compact
49
+ end.compact
57
50
  end
58
51
 
59
52
  def create_return_objects(name, attributes)
@@ -62,4 +55,4 @@ module Proto
62
55
  attributes.map { |hash| Proto.const_get(name).new(hash) }
63
56
  end
64
57
  end
65
- end
58
+ end
@@ -1,3 +1,3 @@
1
1
  module Proto
2
- VERSION = "0.0.4"
2
+ VERSION = "0.0.5"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: proto
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: