getter_cyndi5 0.0.5 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7c8aca3dbcf1b13892236cb942a221c439183a8e0f4da646fb5611497ee45856
4
- data.tar.gz: fd7476bf359058cde81d6bad771696da1afd6b538fdd21d9b99fdfb2348c6c84
3
+ metadata.gz: f7933d7cb91df16f9c9848171515a7864e3f2422d42d08ba900606f8af0e72dd
4
+ data.tar.gz: 938d12d96067df1f1b4ca24aca6687ba289d0e2f5bfab5d136e85597aa7489f9
5
5
  SHA512:
6
- metadata.gz: b58eb918784e8e1b38fdc503efbf1ef5d798d9657b9838d320f8c88a155a98e34b6484d8b91f0babf764d6404275a1e74e4961bca74fbccd866fae69bd5ff512
7
- data.tar.gz: 3c5d0673dca79e796f457b2f1b74121f5c79191e386643d8d59d803dde63ce4d0465800d4e6ef9a3c1ba52d909778ecd31bf814a0c8675c6d1ab8b3980aed123
6
+ metadata.gz: 55c69b23808f5fbeed61730525d8b00db0a3008e4e7de93d0245bc9e7cd8fad3372e52f02060ea3af184a91546b71e21c40255aad49f6272b5a2e504a25255fd
7
+ data.tar.gz: 97258bb348db262ae19d9d0d9ca49283ee0f2ae5809bc8d81d034110b5cfcafe2fbb85c901eb8b6091df4ed7739ae0f9f3d6e7cd4c225ac1f90812ba7a25dc66
data/bin/getter_cyndi5 CHANGED
@@ -1,10 +1,14 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require 'getter_cyndi5'
4
- base_url = ARGV[0] || 'https://thehappyco.com'
5
- products_page_path = ARGV[1] || '/kelly/products'
6
- item_row_selector = ARGV[2] || '.item-row'
7
- item_anchor_selector = ARGV[3] || 'div > div.product-desc.text-center > div.product-title > h3 > a'
8
- mode = (ARGV[4] || '2').to_i
9
- filename = ARGV[5] || './tmp/document1.html'
10
- GetterCyndi5.go(base_url, products_page_path, item_row_selector, item_anchor_selector, mode, filename)
4
+
5
+ options = {
6
+ base_url: ARGV[0] || 'https://thehappyco.com',
7
+ products_page_path: ARGV[1] || '/kelly/products',
8
+ item_row_selector: ARGV[2] || '.item-row',
9
+ item_anchor_selector: ARGV[3] || 'div > div.product-desc.text-center > div.product-title > h3 > a',
10
+ item_price_selector: ARGV[4] || 'div > div.product-desc.text-center > div.product-price',
11
+ mode: (ARGV[7] || '2').to_i,
12
+ filename: ARGV[8] || './tmp/test_document.html'
13
+ }
14
+ GetterCyndi5.go(**options)
data/lib/getter_cyndi5.rb CHANGED
@@ -3,26 +3,43 @@ class GetterCyndi5
3
3
  # Getter
4
4
  #
5
5
  # Example:
6
- # >> GetterCyndi5.go(base_url = 'https://thehappyco.com', products_page_path = '/kelly/products', item_row_selector = '.item-row', item_anchor_selector = 'div > div.product-desc.text-center > div.product-title > h3 > a', mode = 2, './tmp/document1.html')
6
+ # >> GetterCyndi5.go(
7
+ # base_url = 'https://thehappyco.com',
8
+ # products_page_path = '/kelly/products',
9
+ # item_row_selector = '.item-row',
10
+ # item_anchor_selector = 'div > div.product-desc.text-center > div.product-title > h3 > a',
11
+ # item_price_selector = 'div > div.product-desc.text-center > div.product-price',
12
+ # price_a_text: 'One Time Purchase',
13
+ # price_b_text: 'SmartShip',
14
+ # mode = 2,
15
+ # filename = './tmp/test_document.html'
16
+ # )
7
17
  # Arguments:
8
18
  # base_url: (String)
9
19
  # products_page_path: (String)
10
20
  # item_row_selector: (String)
11
21
  # item_anchor_selector: (String)
12
- # mode: (Integer) 0 = retrieve and parse without saving HTML document to file, 1 = retrieve and parse saving HTML document to file, 2 = load and parse HTML document from file
22
+ # item_price_selector: (String)
23
+ # price_a_text: (String)
24
+ # price_b_text: (String)
25
+ # mode: (Integer)
26
+ # 0 = retrieve and parse without saving HTML document to file,
27
+ # 1 = retrieve and parse saving HTML document to file,
28
+ # 2 = load and parse HTML document from file
13
29
  # filename: (String)
14
30
 
15
- def self.go(base_url, products_page_path, item_row_selector, item_anchor_selector, mode, filename)
16
- all_products = products(base_url, products_page_path, item_row_selector, item_anchor_selector, mode, filename)
31
+ def self.go(options = {})
32
+ all_products = products(**options)
17
33
  all_products.each do |product|
18
- puts "#{product.name} :-: #{product.url}"
34
+ puts "#{product.inspect}"
19
35
  end
20
36
  end
21
37
 
22
- def self.products(base_url, products_page_path, item_row_selector, item_anchor_selector, mode, filename)
23
- retriever = Retriever.new(base_url, products_page_path, item_row_selector, mode, filename)
38
+ def self.products(options = {})
39
+ retriever = Retriever.new(**options)
24
40
  retriever.retrieve
25
- parser = Parser.new(retriever.document, base_url, item_row_selector, item_anchor_selector)
41
+
42
+ parser = Parser.new(document: retriever.document, **options)
26
43
  products = parser.parse
27
44
  end
28
45
  end
@@ -1,21 +1,32 @@
1
1
  require 'getter_cyndi5/product'
2
2
  class GetterCyndi5::Parser
3
3
  attr_reader :products
4
- def initialize(document, base_url, item_row_selector, item_anchor_selector)
5
- @document = document
6
- @base_url = base_url
7
- @item_row_selector = item_row_selector
8
- @item_anchor_selector = item_anchor_selector
4
+ def initialize(options = {})
5
+ @options = options
9
6
  @products = []
10
7
  end
11
8
 
12
- def parse
13
- item_rows = @document.css(@item_row_selector)
9
+ def parse
10
+ document = @options.fetch(:document)
11
+ base_url = @options.fetch(:base_url)
12
+ item_row_selector = @options.fetch(:item_row_selector)
13
+ item_anchor_selector = @options.fetch(:item_anchor_selector)
14
+ item_price_selector = @options.fetch(:item_price_selector)
15
+ item_rows = document.css(item_row_selector)
14
16
  item_rows.each do |item_row|
15
- product_element = item_row.css(@item_anchor_selector)[0]
16
- product_name = product_element.text
17
- product_url = "#{@base_url}#{product_element.attributes['href']}"
18
- product = GetterCyndi5::Product.new(product_name, product_url)
17
+ product_element = item_row.css(item_anchor_selector)[0]
18
+ price_elements = item_row.css(item_price_selector)
19
+ prices = {}
20
+ price_elements.each do |price_element|
21
+ prices[price_element.children[1].text] = price_element.children[0].text.gsub(/[^\d\.]/, '').to_f
22
+ end
23
+ attributes = {
24
+ name: product_element.text,
25
+ url: "#{base_url}#{product_element.attributes['href']}",
26
+ item_code: item_row.attributes['data-itemcode'].value,
27
+ prices: prices
28
+ }
29
+ product = GetterCyndi5::Product.new(attributes)
19
30
  products.append(product)
20
31
  end
21
32
  products
@@ -1,10 +1,6 @@
1
1
  class GetterCyndi5::Product
2
- attr_accessor :name, :url, :price_a, :price_b, :price_c
3
- def initialize(name, url, price_a=0.0, price_b=0.0, price_c=0.0)
4
- @name = name
5
- @url = url
6
- @price_a = price_a
7
- @price_b = price_b
8
- @price_c = price_c
2
+ attr_accessor :attributes
3
+ def initialize(attributes = {})
4
+ @attributes = attributes
9
5
  end
10
6
  end
@@ -4,33 +4,30 @@ require 'watir'
4
4
  require 'webdrivers'
5
5
 
6
6
  class GetterCyndi5::Retriever
7
- def initialize(base_url, products_page_path, item_row_selector, mode, filename)
8
- @base_url = base_url
9
- @products_page_path = products_page_path
10
- @products_page_url = "#{@base_url}#{@products_page_path}"
11
- @item_row_selector = item_row_selector
12
- @mode = mode
13
- @filename = filename
7
+ def initialize(options = {})
8
+ @options = options
14
9
  end
15
10
 
11
+ attr_reader :document
12
+ attr_reader :item_row_elements
16
13
  def retrieve()
17
- if @mode == 0 || @mode == 1
18
- browser = Watir::Browser.new
19
- browser.goto(@products_page_url)
20
- item_row_elements = browser.elements(css: @item_row_selector)
14
+ products_page_url = "#{@options.fetch(:base_url)}#{@options.fetch(:products_page_path)}"
15
+ mode = @options.fetch(:mode)
16
+ if mode == 0 || mode == 1
17
+ browser = Watir::Browser.new :chrome, args: %w[--headless --no-sandbox --disable-dev-shm-usage --disable-gpu --remote-debugging-port=9222]
18
+ browser.goto(products_page_url)
19
+ @item_row_elements = browser.elements(css: @options.fetch(:item_row_selector))
21
20
  end
22
- if @mode == 1
23
- File.write(@filename, browser.html)
21
+ if mode == 1
22
+ File.write(@options.fetch(:filename), browser.html)
23
+ browser.close
24
24
  end
25
- if @mode == 0
25
+ if mode == 0
26
26
  @document = Nokogiri::HTML(browser.html)
27
+ browser.close
27
28
  end
28
- if @mode == 1 || @mode == 2
29
- @document = File.open(@filename) { |f| Nokogiri::HTML(f) }
29
+ if mode == 1 || mode == 2
30
+ @document = File.open(@options.fetch(:filename)) { |f| Nokogiri::HTML(f) }
30
31
  end
31
32
  end
32
-
33
- def document
34
- @document
35
- end
36
33
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: getter_cyndi5
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Cyndi Cavanaugh
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-03-12 00:00:00.000000000 Z
11
+ date: 2021-03-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: httparty