getter_cyndi5 0.0.4 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/getter_cyndi5 +14 -3
- data/lib/getter_cyndi5.rb +23 -7
- data/lib/getter_cyndi5/parser.rb +11 -2
- data/lib/getter_cyndi5/product.rb +3 -2
- data/lib/getter_cyndi5/retriever.rb +3 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b4dcc442ee6a82b1bfbe21cd5b644b169000e6793f83dc30be7c24801e1ee0da
|
4
|
+
data.tar.gz: 326ec69f36cbb6a124029f824df09198328025e96b67203e3b758b8705d36801
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ba57a3dd888a5c578ec699ec3fbccb613ddcc2a865d533e954aad6f0f19d8b2eb84c349f3a24eca359d0f3f608e2eaaa7c4a0bc01c601ab9871972805d8eb4dd
|
7
|
+
data.tar.gz: c709a5a1011055388456e7b6c2232364b0fe521fea2fe21943888b2de823c0c4eef4c47f5709231722fac662f04f65fc7364ad6db922d210f62ab3ff68f6b538
|
data/bin/getter_cyndi5
CHANGED
@@ -5,6 +5,17 @@ base_url = ARGV[0] || 'https://thehappyco.com'
|
|
5
5
|
products_page_path = ARGV[1] || '/kelly/products'
|
6
6
|
item_row_selector = ARGV[2] || '.item-row'
|
7
7
|
item_anchor_selector = ARGV[3] || 'div > div.product-desc.text-center > div.product-title > h3 > a'
|
8
|
-
|
9
|
-
|
10
|
-
|
8
|
+
item_price_selector = ARGV[4] || 'div > div.product-desc.text-center > div.product-price'
|
9
|
+
price_a_text = ARGV[5] || 'One Time Purchase'
|
10
|
+
price_b_text = ARGV[6] || 'SmartShip'
|
11
|
+
mode = (ARGV[7] || '2').to_i
|
12
|
+
filename = ARGV[8] || './tmp/test_document.html'
|
13
|
+
GetterCyndi5.go(base_url,
|
14
|
+
products_page_path,
|
15
|
+
item_row_selector,
|
16
|
+
item_anchor_selector,
|
17
|
+
item_price_selector,
|
18
|
+
price_a_text,
|
19
|
+
price_b_text,
|
20
|
+
mode,
|
21
|
+
filename)
|
data/lib/getter_cyndi5.rb
CHANGED
@@ -3,26 +3,42 @@ class GetterCyndi5
|
|
3
3
|
# Getter
|
4
4
|
#
|
5
5
|
# Example:
|
6
|
-
# >> GetterCyndi5.go(
|
6
|
+
# >> GetterCyndi5.go(
|
7
|
+
# base_url = 'https://thehappyco.com',
|
8
|
+
# products_page_path = '/kelly/products',
|
9
|
+
# item_row_selector = '.item-row',
|
10
|
+
# item_anchor_selector = 'div > div.product-desc.text-center > div.product-title > h3 > a',
|
11
|
+
# item_price_selector = 'div > div.product-desc.text-center > div.product-price',
|
12
|
+
# price_a_text: 'One Time Purchase',
|
13
|
+
# price_b_text: 'SmartShip',
|
14
|
+
# mode = 2,
|
15
|
+
# filename = './tmp/test_document.html'
|
16
|
+
# )
|
7
17
|
# Arguments:
|
8
18
|
# base_url: (String)
|
9
19
|
# products_page_path: (String)
|
10
20
|
# item_row_selector: (String)
|
11
21
|
# item_anchor_selector: (String)
|
12
|
-
#
|
22
|
+
# item_price_selector: (String)
|
23
|
+
# price_a_text: (String)
|
24
|
+
# price_b_text: (String)
|
25
|
+
# mode: (Integer)
|
26
|
+
# 0 = retrieve and parse without saving HTML document to file,
|
27
|
+
# 1 = retrieve and parse saving HTML document to file,
|
28
|
+
# 2 = load and parse HTML document from file
|
13
29
|
# filename: (String)
|
14
30
|
|
15
|
-
def self.go(base_url, products_page_path, item_row_selector, item_anchor_selector, mode, filename)
|
16
|
-
all_products = products(base_url, products_page_path, item_row_selector, item_anchor_selector, mode, filename)
|
31
|
+
def self.go(base_url, products_page_path, item_row_selector, item_anchor_selector, item_price_selector, price_a_text, price_b_text, mode, filename)
|
32
|
+
all_products = products(base_url, products_page_path, item_row_selector, item_anchor_selector, item_price_selector, price_a_text, price_b_text, mode, filename)
|
17
33
|
all_products.each do |product|
|
18
|
-
puts "#{product.name}
|
34
|
+
puts "#{product.item_code}, #{product.name}, #{product.url}, #{product.price_a}, #{product.price_b}"
|
19
35
|
end
|
20
36
|
end
|
21
37
|
|
22
|
-
def self.products(base_url, products_page_path, item_row_selector, item_anchor_selector, mode, filename)
|
38
|
+
def self.products(base_url, products_page_path, item_row_selector, item_anchor_selector, item_price_selector, price_a_text, price_b_text, mode, filename)
|
23
39
|
retriever = Retriever.new(base_url, products_page_path, item_row_selector, mode, filename)
|
24
40
|
retriever.retrieve
|
25
|
-
parser = Parser.new(retriever.document, base_url, item_row_selector, item_anchor_selector)
|
41
|
+
parser = Parser.new(retriever.document, base_url, item_row_selector, item_anchor_selector, item_price_selector, price_a_text, price_b_text)
|
26
42
|
products = parser.parse
|
27
43
|
end
|
28
44
|
end
|
data/lib/getter_cyndi5/parser.rb
CHANGED
@@ -1,11 +1,14 @@
|
|
1
1
|
require 'getter_cyndi5/product'
|
2
2
|
class GetterCyndi5::Parser
|
3
3
|
attr_reader :products
|
4
|
-
def initialize(document, base_url, item_row_selector, item_anchor_selector)
|
4
|
+
def initialize(document, base_url, item_row_selector, item_anchor_selector, item_price_selector, price_a_text, price_b_text)
|
5
5
|
@document = document
|
6
6
|
@base_url = base_url
|
7
7
|
@item_row_selector = item_row_selector
|
8
8
|
@item_anchor_selector = item_anchor_selector
|
9
|
+
@item_price_selector = item_price_selector
|
10
|
+
@price_a_text = price_a_text
|
11
|
+
@price_b_text = price_b_text
|
9
12
|
@products = []
|
10
13
|
end
|
11
14
|
|
@@ -15,7 +18,13 @@ class GetterCyndi5::Parser
|
|
15
18
|
product_element = item_row.css(@item_anchor_selector)[0]
|
16
19
|
product_name = product_element.text
|
17
20
|
product_url = "#{@base_url}#{product_element.attributes['href']}"
|
18
|
-
|
21
|
+
product_item_code = item_row.attributes['data-itemcode'].value
|
22
|
+
product_prices = item_row.css(@item_price_selector)
|
23
|
+
price_a_element = product_prices.find { |price| price.children[1].text == @price_a_text }
|
24
|
+
price_a = price_a_element.nil? ? 0.0 : price_a_element.children[0].text.gsub(/[^\d\.]/, '').to_f
|
25
|
+
price_b_element = product_prices.find { |price| price.children[1].text == @price_b_text }
|
26
|
+
price_b = price_b_element.nil? ? 0.0 : price_b_element.children[0].text.gsub(/[^\d\.]/, '').to_f
|
27
|
+
product = GetterCyndi5::Product.new(product_name, product_url, product_item_code, price_a, price_b)
|
19
28
|
products.append(product)
|
20
29
|
end
|
21
30
|
products
|
@@ -1,8 +1,9 @@
|
|
1
1
|
class GetterCyndi5::Product
|
2
|
-
attr_accessor :name, :url, :price_a, :price_b, :price_c
|
3
|
-
def initialize(name, url, price_a=0.0, price_b=0.0, price_c=0.0)
|
2
|
+
attr_accessor :name, :url, :item_code, :price_a, :price_b, :price_c
|
3
|
+
def initialize(name, url, item_code, price_a=0.0, price_b=0.0, price_c=0.0)
|
4
4
|
@name = name
|
5
5
|
@url = url
|
6
|
+
@item_code = item_code
|
6
7
|
@price_a = price_a
|
7
8
|
@price_b = price_b
|
8
9
|
@price_c = price_c
|
@@ -15,15 +15,17 @@ class GetterCyndi5::Retriever
|
|
15
15
|
|
16
16
|
def retrieve()
|
17
17
|
if @mode == 0 || @mode == 1
|
18
|
-
browser = Watir::Browser.new
|
18
|
+
browser = Watir::Browser.new :chrome, args: %w[--headless --no-sandbox --disable-dev-shm-usage --disable-gpu --remote-debugging-port=9222]
|
19
19
|
browser.goto(@products_page_url)
|
20
20
|
item_row_elements = browser.elements(css: @item_row_selector)
|
21
21
|
end
|
22
22
|
if @mode == 1
|
23
23
|
File.write(@filename, browser.html)
|
24
|
+
browser.close
|
24
25
|
end
|
25
26
|
if @mode == 0
|
26
27
|
@document = Nokogiri::HTML(browser.html)
|
28
|
+
browser.close
|
27
29
|
end
|
28
30
|
if @mode == 1 || @mode == 2
|
29
31
|
@document = File.open(@filename) { |f| Nokogiri::HTML(f) }
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: getter_cyndi5
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Cyndi Cavanaugh
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-03-
|
11
|
+
date: 2021-03-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: httparty
|