proto 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/lib/proto/scraper.rb +18 -25
  2. data/lib/proto/version.rb +1 -1
  3. metadata +1 -1
@@ -3,7 +3,7 @@ module Proto
3
3
  attr_accessor :url, :doc, :url_collection
4
4
 
5
5
  def initialize(url)
6
- @url = url.chomp '/'
6
+ @url = url.chomp '/' #remove trailing slash
7
7
  @doc = Nokogiri::HTML(open(url))
8
8
  end
9
9
 
@@ -15,45 +15,38 @@ module Proto
15
15
 
16
16
  def fetch(name='Type', args)
17
17
  if url_collection
18
- attributes = visit_urls_and_fetch(args)
19
- protos = create_return_objects(name, attributes)
20
- return protos
18
+ attributes = scrape_multiple_pages(args)
21
19
  else
22
- attributes = scrape_attribute_data(args)
23
- protos = create_return_objects(name, attributes)
24
- return protos
20
+ attributes = scrape_single_page(args)
25
21
  end
22
+ protos = create_return_objects(name, attributes)
23
+ return protos
26
24
  end
27
25
  alias_method :fetch_and_create!, :fetch
28
26
 
29
27
  private
30
28
 
31
- def visit_urls_and_fetch(attributes)
32
- hash_array = []
33
- final_array = url_collection.map do |url|
34
- page = Nokogiri::HTML(open(url))
35
- attrs_hash = gather_data(page, attributes)
36
- hash_array << attrs_hash
29
+ def scrape_multiple_pages(attributes)
30
+ url_collection.each_with_object([]).map do |url, hash_array|
31
+ gather_data(url, attributes)
37
32
  end
38
- return hash_array
39
33
  end
40
34
 
41
- def gather_data(page, attributes)
42
- job_hash = attributes.each_with_object({}) do |(key, selector), attrs|
35
+ def gather_data(url, attributes)
36
+ page = Nokogiri::HTML(open(url))
37
+ attributes.each_with_object({}) do |(key, selector), attrs|
43
38
  attrs[key] = page.css(selector).text.strip
44
39
  end
45
40
  end
46
41
 
47
- def scrape_attribute_data(document=self.doc, attributes)
48
- length_of_scrape = document.css(attributes.first[1]).count
42
+ def scrape_single_page(attributes)
43
+ length_of_scrape = doc.css(attributes.first[1]).count
49
44
 
50
- final_array = length_of_scrape.times.map do |index|
51
- attributes.inject(Hash.new) do |hash, (attr_name, selector)|
52
- hash.merge(attr_name => document.css(selector)[index].text.strip) if document.css(selector)[index]
45
+ length_of_scrape.times.map do |index|
46
+ attributes.inject({}) do |hash, (attr_name, selector)|
47
+ hash.merge(attr_name => doc.css(selector)[index].text.strip) if doc.css(selector)[index]
53
48
  end
54
- end
55
-
56
- final_array.compact
49
+ end.compact
57
50
  end
58
51
 
59
52
  def create_return_objects(name, attributes)
@@ -62,4 +55,4 @@ module Proto
62
55
  attributes.map { |hash| Proto.const_get(name).new(hash) }
63
56
  end
64
57
  end
65
- end
58
+ end
@@ -1,3 +1,3 @@
1
1
  module Proto
2
- VERSION = "0.0.4"
2
+ VERSION = "0.0.5"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: proto
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: