getter_cyndi5 0.0.4 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2614abec8711942eeef9deb4e322fa112719f29e936371de5b7ac3f03f767833
4
- data.tar.gz: 53d049b3b8115bb0bfc859482bd1e27322b95cc3d93ec64927992b1b608e0151
3
+ metadata.gz: b4dcc442ee6a82b1bfbe21cd5b644b169000e6793f83dc30be7c24801e1ee0da
4
+ data.tar.gz: 326ec69f36cbb6a124029f824df09198328025e96b67203e3b758b8705d36801
5
5
  SHA512:
6
- metadata.gz: 177291453efc5d02b9259869d785b327e397d2c3b71d4ea97edf4d1c8b060b627614b15948bb6598398a687a315734a9a64a02e6b1bf06e4f3b5902087a1c140
7
- data.tar.gz: 2c7dd3e3ff2e91fddeadf79033976385c7a15e12334a553bc20a0f0232b529f4d211e37e804119057d4feac01f045894df1a64d4212ea639fcebc35895de1d7e
6
+ metadata.gz: ba57a3dd888a5c578ec699ec3fbccb613ddcc2a865d533e954aad6f0f19d8b2eb84c349f3a24eca359d0f3f608e2eaaa7c4a0bc01c601ab9871972805d8eb4dd
7
+ data.tar.gz: c709a5a1011055388456e7b6c2232364b0fe521fea2fe21943888b2de823c0c4eef4c47f5709231722fac662f04f65fc7364ad6db922d210f62ab3ff68f6b538
data/bin/getter_cyndi5 CHANGED
@@ -5,6 +5,17 @@ base_url = ARGV[0] || 'https://thehappyco.com'
5
5
  products_page_path = ARGV[1] || '/kelly/products'
6
6
  item_row_selector = ARGV[2] || '.item-row'
7
7
  item_anchor_selector = ARGV[3] || 'div > div.product-desc.text-center > div.product-title > h3 > a'
8
- mode = (ARGV[4] || '2').to_i
9
- filename = ARGV[5] || './tmp/document1.html'
10
- GetterCyndi5.go(base_url, products_page_path, item_row_selector, item_anchor_selector, mode, filename)
8
+ item_price_selector = ARGV[4] || 'div > div.product-desc.text-center > div.product-price'
9
+ price_a_text = ARGV[5] || 'One Time Purchase'
10
+ price_b_text = ARGV[6] || 'SmartShip'
11
+ mode = (ARGV[7] || '2').to_i
12
+ filename = ARGV[8] || './tmp/test_document.html'
13
+ GetterCyndi5.go(base_url,
14
+ products_page_path,
15
+ item_row_selector,
16
+ item_anchor_selector,
17
+ item_price_selector,
18
+ price_a_text,
19
+ price_b_text,
20
+ mode,
21
+ filename)
data/lib/getter_cyndi5.rb CHANGED
@@ -3,26 +3,42 @@ class GetterCyndi5
3
3
  # Getter
4
4
  #
5
5
  # Example:
6
- # >> GetterCyndi5.go(base_url = 'https://thehappyco.com', products_page_path = '/kelly/products', item_row_selector = '.item-row', item_anchor_selector = 'div > div.product-desc.text-center > div.product-title > h3 > a', mode = 2, './tmp/document1.html')
6
+ # >> GetterCyndi5.go(
7
+ # base_url = 'https://thehappyco.com',
8
+ # products_page_path = '/kelly/products',
9
+ # item_row_selector = '.item-row',
10
+ # item_anchor_selector = 'div > div.product-desc.text-center > div.product-title > h3 > a',
11
+ # item_price_selector = 'div > div.product-desc.text-center > div.product-price',
12
+ # price_a_text: 'One Time Purchase',
13
+ # price_b_text: 'SmartShip',
14
+ # mode = 2,
15
+ # filename = './tmp/test_document.html'
16
+ # )
7
17
  # Arguments:
8
18
  # base_url: (String)
9
19
  # products_page_path: (String)
10
20
  # item_row_selector: (String)
11
21
  # item_anchor_selector: (String)
12
- # mode: (Integer) 0 = retrieve and parse without saving HTML document to file, 1 = retrieve and parse saving HTML document to file, 2 = load and parse HTML document from file
22
+ # item_price_selector: (String)
23
+ # price_a_text: (String)
24
+ # price_b_text: (String)
25
+ # mode: (Integer)
26
+ # 0 = retrieve and parse without saving HTML document to file,
27
+ # 1 = retrieve and parse saving HTML document to file,
28
+ # 2 = load and parse HTML document from file
13
29
  # filename: (String)
14
30
 
15
- def self.go(base_url, products_page_path, item_row_selector, item_anchor_selector, mode, filename)
16
- all_products = products(base_url, products_page_path, item_row_selector, item_anchor_selector, mode, filename)
31
+ def self.go(base_url, products_page_path, item_row_selector, item_anchor_selector, item_price_selector, price_a_text, price_b_text, mode, filename)
32
+ all_products = products(base_url, products_page_path, item_row_selector, item_anchor_selector, item_price_selector, price_a_text, price_b_text, mode, filename)
17
33
  all_products.each do |product|
18
- puts "#{product.name} :-: #{product.url}"
34
+ puts "#{product.item_code}, #{product.name}, #{product.url}, #{product.price_a}, #{product.price_b}"
19
35
  end
20
36
  end
21
37
 
22
- def self.products(base_url, products_page_path, item_row_selector, item_anchor_selector, mode, filename)
38
+ def self.products(base_url, products_page_path, item_row_selector, item_anchor_selector, item_price_selector, price_a_text, price_b_text, mode, filename)
23
39
  retriever = Retriever.new(base_url, products_page_path, item_row_selector, mode, filename)
24
40
  retriever.retrieve
25
- parser = Parser.new(retriever.document, base_url, item_row_selector, item_anchor_selector)
41
+ parser = Parser.new(retriever.document, base_url, item_row_selector, item_anchor_selector, item_price_selector, price_a_text, price_b_text)
26
42
  products = parser.parse
27
43
  end
28
44
  end
@@ -1,11 +1,14 @@
1
1
  require 'getter_cyndi5/product'
2
2
  class GetterCyndi5::Parser
3
3
  attr_reader :products
4
- def initialize(document, base_url, item_row_selector, item_anchor_selector)
4
+ def initialize(document, base_url, item_row_selector, item_anchor_selector, item_price_selector, price_a_text, price_b_text)
5
5
  @document = document
6
6
  @base_url = base_url
7
7
  @item_row_selector = item_row_selector
8
8
  @item_anchor_selector = item_anchor_selector
9
+ @item_price_selector = item_price_selector
10
+ @price_a_text = price_a_text
11
+ @price_b_text = price_b_text
9
12
  @products = []
10
13
  end
11
14
 
@@ -15,7 +18,13 @@ class GetterCyndi5::Parser
15
18
  product_element = item_row.css(@item_anchor_selector)[0]
16
19
  product_name = product_element.text
17
20
  product_url = "#{@base_url}#{product_element.attributes['href']}"
18
- product = GetterCyndi5::Product.new(product_name, product_url)
21
+ product_item_code = item_row.attributes['data-itemcode'].value
22
+ product_prices = item_row.css(@item_price_selector)
23
+ price_a_element = product_prices.find { |price| price.children[1].text == @price_a_text }
24
+ price_a = price_a_element.nil? ? 0.0 : price_a_element.children[0].text.gsub(/[^\d\.]/, '').to_f
25
+ price_b_element = product_prices.find { |price| price.children[1].text == @price_b_text }
26
+ price_b = price_b_element.nil? ? 0.0 : price_b_element.children[0].text.gsub(/[^\d\.]/, '').to_f
27
+ product = GetterCyndi5::Product.new(product_name, product_url, product_item_code, price_a, price_b)
19
28
  products.append(product)
20
29
  end
21
30
  products
@@ -1,8 +1,9 @@
1
1
  class GetterCyndi5::Product
2
- attr_accessor :name, :url, :price_a, :price_b, :price_c
3
- def initialize(name, url, price_a=0.0, price_b=0.0, price_c=0.0)
2
+ attr_accessor :name, :url, :item_code, :price_a, :price_b, :price_c
3
+ def initialize(name, url, item_code, price_a=0.0, price_b=0.0, price_c=0.0)
4
4
  @name = name
5
5
  @url = url
6
+ @item_code = item_code
6
7
  @price_a = price_a
7
8
  @price_b = price_b
8
9
  @price_c = price_c
@@ -15,15 +15,17 @@ class GetterCyndi5::Retriever
15
15
 
16
16
  def retrieve()
17
17
  if @mode == 0 || @mode == 1
18
- browser = Watir::Browser.new
18
+ browser = Watir::Browser.new :chrome, args: %w[--headless --no-sandbox --disable-dev-shm-usage --disable-gpu --remote-debugging-port=9222]
19
19
  browser.goto(@products_page_url)
20
20
  item_row_elements = browser.elements(css: @item_row_selector)
21
21
  end
22
22
  if @mode == 1
23
23
  File.write(@filename, browser.html)
24
+ browser.close
24
25
  end
25
26
  if @mode == 0
26
27
  @document = Nokogiri::HTML(browser.html)
28
+ browser.close
27
29
  end
28
30
  if @mode == 1 || @mode == 2
29
31
  @document = File.open(@filename) { |f| Nokogiri::HTML(f) }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: getter_cyndi5
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Cyndi Cavanaugh
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-03-12 00:00:00.000000000 Z
11
+ date: 2021-03-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: httparty