getter_cyndi5 0.0.6 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1cd951e59fcde272f1b72e45d7a7f935a966e193bdf9b91bd219864cdf00db22
4
- data.tar.gz: 60023272d347c7d374ec501559d547c5d5fcdfd0998bc3936b23935633a5b86a
3
+ metadata.gz: 8897522fa8be7a71ee0a93636ad6c8250dc8ceb4b774e4682b6add74440e3561
4
+ data.tar.gz: 2f457d708b7c100b56a354101efe367299e6bdb97dbaa6c9f9b019b9f68b9318
5
5
  SHA512:
6
- metadata.gz: 24f20cbe4de2956352b27c1603d726c6797b2022db27b9afd0a531058645d5cd481dd283bb57e823d0faa223d67f02bb58e7e54af2b333752dbaafb18bb0d298
7
- data.tar.gz: d175ff2ce3b5bb583dd574455c73dd63d098780c6f5d304ee9d84744be9901ea4b7025a49450973aa3c979671c3f7864ac958c3c062747b1540b5e1a59df2a2c
6
+ metadata.gz: 5d627257e6e981258d321032a5959c0ddee38193606ae0d1f1538a2bebbc19b31c1570b477131ed29075b0c71cb1de9c1811eebf431c73d118f135689535383d
7
+ data.tar.gz: 3280ea73a619aea5be935a3fd7f2e622deecf7786e1d3967f3c22cc38e38678f804e21cc909e7e132791fe9ab5a69af36274f6b9010301113dcf1ae8525c0f23
data/bin/getter_cyndi5 CHANGED
@@ -1,10 +1,14 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require 'getter_cyndi5'
4
- base_url = ARGV[0] || 'https://thehappyco.com'
5
- products_page_path = ARGV[1] || '/kelly/products'
6
- item_row_selector = ARGV[2] || '.item-row'
7
- item_anchor_selector = ARGV[3] || 'div > div.product-desc.text-center > div.product-title > h3 > a'
8
- mode = (ARGV[4] || '2').to_i
9
- filename = ARGV[5] || './tmp/document1.html'
10
- GetterCyndi5.go(base_url, products_page_path, item_row_selector, item_anchor_selector, mode, filename)
4
+
5
+ options = {
6
+ base_url: ARGV[0] || 'https://thehappyco.com',
7
+ products_page_path: ARGV[1] || '/kelly/products',
8
+ item_row_selector: ARGV[2] || '.item-row',
9
+ item_anchor_selector: ARGV[3] || 'div > div.product-desc.text-center > div.product-title > h3 > a',
10
+ item_price_selector: ARGV[4] || 'div > div.product-desc.text-center > div.product-price',
11
+ mode: (ARGV[7] || '2').to_i,
12
+ filename: ARGV[8] || './tmp/test_document.html'
13
+ }
14
+ GetterCyndi5.go(**options)
data/lib/getter_cyndi5.rb CHANGED
@@ -3,26 +3,43 @@ class GetterCyndi5
3
3
  # Getter
4
4
  #
5
5
  # Example:
6
- # >> GetterCyndi5.go(base_url = 'https://thehappyco.com', products_page_path = '/kelly/products', item_row_selector = '.item-row', item_anchor_selector = 'div > div.product-desc.text-center > div.product-title > h3 > a', mode = 2, './tmp/document1.html')
6
+ # >> GetterCyndi5.go(
7
+ # base_url = 'https://thehappyco.com',
8
+ # products_page_path = '/kelly/products',
9
+ # item_row_selector = '.item-row',
10
+ # item_anchor_selector = 'div > div.product-desc.text-center > div.product-title > h3 > a',
11
+ # item_price_selector = 'div > div.product-desc.text-center > div.product-price',
12
+ # price_a_text: 'One Time Purchase',
13
+ # price_b_text: 'SmartShip',
14
+ # mode = 2,
15
+ # filename = './tmp/test_document.html'
16
+ # )
7
17
  # Arguments:
8
18
  # base_url: (String)
9
19
  # products_page_path: (String)
10
20
  # item_row_selector: (String)
11
21
  # item_anchor_selector: (String)
12
- # mode: (Integer) 0 = retrieve and parse without saving HTML document to file, 1 = retrieve and parse saving HTML document to file, 2 = load and parse HTML document from file
22
+ # item_price_selector: (String)
23
+ # price_a_text: (String)
24
+ # price_b_text: (String)
25
+ # mode: (Integer)
26
+ # 0 = retrieve and parse without saving HTML document to file,
27
+ # 1 = retrieve and parse saving HTML document to file,
28
+ # 2 = load and parse HTML document from file
13
29
  # filename: (String)
14
30
 
15
- def self.go(base_url, products_page_path, item_row_selector, item_anchor_selector, mode, filename)
16
- all_products = products(base_url, products_page_path, item_row_selector, item_anchor_selector, mode, filename)
31
+ def self.go(options = {})
32
+ all_products = products(**options)
17
33
  all_products.each do |product|
18
- puts "#{product.name} :-: #{product.url}"
34
+ puts "#{product.inspect}"
19
35
  end
20
36
  end
21
37
 
22
- def self.products(base_url, products_page_path, item_row_selector, item_anchor_selector, mode, filename)
23
- retriever = Retriever.new(base_url, products_page_path, item_row_selector, mode, filename)
38
+ def self.products(options = {})
39
+ retriever = Retriever.new(**options)
24
40
  retriever.retrieve
25
- parser = Parser.new(retriever.document, base_url, item_row_selector, item_anchor_selector)
41
+
42
+ parser = Parser.new(document: retriever.document, **options)
26
43
  products = parser.parse
27
44
  end
28
45
  end
@@ -1,21 +1,34 @@
1
1
  require 'getter_cyndi5/product'
2
2
  class GetterCyndi5::Parser
3
+ DATA_PREFIX = 'data-'.freeze
3
4
  attr_reader :products
4
- def initialize(document, base_url, item_row_selector, item_anchor_selector)
5
- @document = document
6
- @base_url = base_url
7
- @item_row_selector = item_row_selector
8
- @item_anchor_selector = item_anchor_selector
5
+ def initialize(options = {})
6
+ @options = options
9
7
  @products = []
10
8
  end
11
9
 
12
- def parse
13
- item_rows = @document.css(@item_row_selector)
10
+ def parse
11
+ document = @options.fetch(:document)
12
+ base_url = @options.fetch(:base_url)
13
+ item_row_selector = @options.fetch(:item_row_selector)
14
+ item_anchor_selector = @options.fetch(:item_anchor_selector)
15
+ item_price_selector = @options.fetch(:item_price_selector)
16
+ item_rows = document.css(item_row_selector)
14
17
  item_rows.each do |item_row|
15
- product_element = item_row.css(@item_anchor_selector)[0]
16
- product_name = product_element.text
17
- product_url = "#{@base_url}#{product_element.attributes['href']}"
18
- product = GetterCyndi5::Product.new(product_name, product_url)
18
+ item_data = item_row.attributes.select { |k, v| k.start_with? DATA_PREFIX }.map { |k, v| [k.delete_prefix(DATA_PREFIX), v.value]}.to_h
19
+ product_element = item_row.css(item_anchor_selector)[0]
20
+ price_elements = item_row.css(item_price_selector)
21
+ prices = {}
22
+ price_elements.each do |price_element|
23
+ prices[price_element.children[1].text] = price_element.children[0].text.gsub(/[^\d\.]/, '').to_f
24
+ end
25
+ attributes = {
26
+ name: product_element.text,
27
+ url: "#{base_url}#{product_element.attributes['href']}",
28
+ item_data: item_data,
29
+ prices: prices
30
+ }
31
+ product = GetterCyndi5::Product.new(attributes)
19
32
  products.append(product)
20
33
  end
21
34
  products
@@ -1,10 +1,6 @@
1
1
  class GetterCyndi5::Product
2
- attr_accessor :name, :url, :price_a, :price_b, :price_c
3
- def initialize(name, url, price_a=0.0, price_b=0.0, price_c=0.0)
4
- @name = name
5
- @url = url
6
- @price_a = price_a
7
- @price_b = price_b
8
- @price_c = price_c
2
+ attr_accessor :attributes
3
+ def initialize(attributes = {})
4
+ @attributes = attributes
9
5
  end
10
6
  end
@@ -4,34 +4,30 @@ require 'watir'
4
4
  require 'webdrivers'
5
5
 
6
6
  class GetterCyndi5::Retriever
7
- def initialize(base_url, products_page_path, item_row_selector, mode, filename)
8
- @base_url = base_url
9
- @products_page_path = products_page_path
10
- @products_page_url = "#{@base_url}#{@products_page_path}"
11
- @item_row_selector = item_row_selector
12
- @mode = mode
13
- @filename = filename
7
+ def initialize(options = {})
8
+ @options = options
14
9
  end
15
10
 
11
+ attr_reader :document
12
+ attr_reader :item_row_elements
16
13
  def retrieve()
17
- if @mode == 0 || @mode == 1
18
- browser = Watir::Browser.new
19
- browser.goto(@products_page_url)
20
- item_row_elements = browser.elements(css: @item_row_selector)
21
- browser.close
14
+ products_page_url = "#{@options.fetch(:base_url)}#{@options.fetch(:products_page_path)}"
15
+ mode = @options.fetch(:mode)
16
+ if mode == 0 || mode == 1
17
+ browser = Watir::Browser.new :chrome, args: %w[--headless --no-sandbox --disable-dev-shm-usage --disable-gpu --remote-debugging-port=9222]
18
+ browser.goto(products_page_url)
19
+ @item_row_elements = browser.elements(css: @options.fetch(:item_row_selector))
22
20
  end
23
- if @mode == 1
24
- File.write(@filename, browser.html)
21
+ if mode == 1
22
+ File.write(@options.fetch(:filename), browser.html)
23
+ browser.close
25
24
  end
26
- if @mode == 0
25
+ if mode == 0
27
26
  @document = Nokogiri::HTML(browser.html)
27
+ browser.close
28
28
  end
29
- if @mode == 1 || @mode == 2
30
- @document = File.open(@filename) { |f| Nokogiri::HTML(f) }
29
+ if mode == 1 || mode == 2
30
+ @document = File.open(@options.fetch(:filename)) { |f| Nokogiri::HTML(f) }
31
31
  end
32
32
  end
33
-
34
- def document
35
- @document
36
- end
37
33
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: getter_cyndi5
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 2.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Cyndi Cavanaugh
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-03-12 00:00:00.000000000 Z
11
+ date: 2021-03-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: httparty