getter_cyndi5 1.0.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/getter_cyndi5 +11 -18
- data/lib/getter_cyndi5.rb +7 -6
- data/lib/getter_cyndi5/parser.rb +22 -20
- data/lib/getter_cyndi5/product.rb +3 -8
- data/lib/getter_cyndi5/retriever.rb +14 -19
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f7933d7cb91df16f9c9848171515a7864e3f2422d42d08ba900606f8af0e72dd
|
4
|
+
data.tar.gz: 938d12d96067df1f1b4ca24aca6687ba289d0e2f5bfab5d136e85597aa7489f9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 55c69b23808f5fbeed61730525d8b00db0a3008e4e7de93d0245bc9e7cd8fad3372e52f02060ea3af184a91546b71e21c40255aad49f6272b5a2e504a25255fd
|
7
|
+
data.tar.gz: 97258bb348db262ae19d9d0d9ca49283ee0f2ae5809bc8d81d034110b5cfcafe2fbb85c901eb8b6091df4ed7739ae0f9f3d6e7cd4c225ac1f90812ba7a25dc66
|
data/bin/getter_cyndi5
CHANGED
@@ -1,21 +1,14 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
require 'getter_cyndi5'
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
mode
|
12
|
-
filename
|
13
|
-
|
14
|
-
|
15
|
-
item_row_selector,
|
16
|
-
item_anchor_selector,
|
17
|
-
item_price_selector,
|
18
|
-
price_a_text,
|
19
|
-
price_b_text,
|
20
|
-
mode,
|
21
|
-
filename)
|
4
|
+
|
5
|
+
options = {
|
6
|
+
base_url: ARGV[0] || 'https://thehappyco.com',
|
7
|
+
products_page_path: ARGV[1] || '/kelly/products',
|
8
|
+
item_row_selector: ARGV[2] || '.item-row',
|
9
|
+
item_anchor_selector: ARGV[3] || 'div > div.product-desc.text-center > div.product-title > h3 > a',
|
10
|
+
item_price_selector: ARGV[4] || 'div > div.product-desc.text-center > div.product-price',
|
11
|
+
mode: (ARGV[7] || '2').to_i,
|
12
|
+
filename: ARGV[8] || './tmp/test_document.html'
|
13
|
+
}
|
14
|
+
GetterCyndi5.go(**options)
|
data/lib/getter_cyndi5.rb
CHANGED
@@ -28,17 +28,18 @@ class GetterCyndi5
|
|
28
28
|
# 2 = load and parse HTML document from file
|
29
29
|
# filename: (String)
|
30
30
|
|
31
|
-
def self.go(
|
32
|
-
all_products = products(
|
31
|
+
def self.go(options = {})
|
32
|
+
all_products = products(**options)
|
33
33
|
all_products.each do |product|
|
34
|
-
puts "#{product.
|
34
|
+
puts "#{product.inspect}"
|
35
35
|
end
|
36
36
|
end
|
37
37
|
|
38
|
-
def self.products(
|
39
|
-
retriever = Retriever.new(
|
38
|
+
def self.products(options = {})
|
39
|
+
retriever = Retriever.new(**options)
|
40
40
|
retriever.retrieve
|
41
|
-
|
41
|
+
|
42
|
+
parser = Parser.new(document: retriever.document, **options)
|
42
43
|
products = parser.parse
|
43
44
|
end
|
44
45
|
end
|
data/lib/getter_cyndi5/parser.rb
CHANGED
@@ -1,30 +1,32 @@
|
|
1
1
|
require 'getter_cyndi5/product'
|
2
2
|
class GetterCyndi5::Parser
|
3
3
|
attr_reader :products
|
4
|
-
def initialize(
|
5
|
-
@
|
6
|
-
@base_url = base_url
|
7
|
-
@item_row_selector = item_row_selector
|
8
|
-
@item_anchor_selector = item_anchor_selector
|
9
|
-
@item_price_selector = item_price_selector
|
10
|
-
@price_a_text = price_a_text
|
11
|
-
@price_b_text = price_b_text
|
4
|
+
def initialize(options = {})
|
5
|
+
@options = options
|
12
6
|
@products = []
|
13
7
|
end
|
14
8
|
|
15
|
-
def parse
|
16
|
-
|
9
|
+
def parse
|
10
|
+
document = @options.fetch(:document)
|
11
|
+
base_url = @options.fetch(:base_url)
|
12
|
+
item_row_selector = @options.fetch(:item_row_selector)
|
13
|
+
item_anchor_selector = @options.fetch(:item_anchor_selector)
|
14
|
+
item_price_selector = @options.fetch(:item_price_selector)
|
15
|
+
item_rows = document.css(item_row_selector)
|
17
16
|
item_rows.each do |item_row|
|
18
|
-
product_element = item_row.css(
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
17
|
+
product_element = item_row.css(item_anchor_selector)[0]
|
18
|
+
price_elements = item_row.css(item_price_selector)
|
19
|
+
prices = {}
|
20
|
+
price_elements.each do |price_element|
|
21
|
+
prices[price_element.children[1].text] = price_element.children[0].text.gsub(/[^\d\.]/, '').to_f
|
22
|
+
end
|
23
|
+
attributes = {
|
24
|
+
name: product_element.text,
|
25
|
+
url: "#{base_url}#{product_element.attributes['href']}",
|
26
|
+
item_code: item_row.attributes['data-itemcode'].value,
|
27
|
+
prices: prices
|
28
|
+
}
|
29
|
+
product = GetterCyndi5::Product.new(attributes)
|
28
30
|
products.append(product)
|
29
31
|
end
|
30
32
|
products
|
@@ -1,11 +1,6 @@
|
|
1
1
|
class GetterCyndi5::Product
|
2
|
-
attr_accessor :
|
3
|
-
def initialize(
|
4
|
-
@
|
5
|
-
@url = url
|
6
|
-
@item_code = item_code
|
7
|
-
@price_a = price_a
|
8
|
-
@price_b = price_b
|
9
|
-
@price_c = price_c
|
2
|
+
attr_accessor :attributes
|
3
|
+
def initialize(attributes = {})
|
4
|
+
@attributes = attributes
|
10
5
|
end
|
11
6
|
end
|
@@ -4,35 +4,30 @@ require 'watir'
|
|
4
4
|
require 'webdrivers'
|
5
5
|
|
6
6
|
class GetterCyndi5::Retriever
|
7
|
-
def initialize(
|
8
|
-
@
|
9
|
-
@products_page_path = products_page_path
|
10
|
-
@products_page_url = "#{@base_url}#{@products_page_path}"
|
11
|
-
@item_row_selector = item_row_selector
|
12
|
-
@mode = mode
|
13
|
-
@filename = filename
|
7
|
+
def initialize(options = {})
|
8
|
+
@options = options
|
14
9
|
end
|
15
10
|
|
11
|
+
attr_reader :document
|
12
|
+
attr_reader :item_row_elements
|
16
13
|
def retrieve()
|
17
|
-
|
14
|
+
products_page_url = "#{@options.fetch(:base_url)}#{@options.fetch(:products_page_path)}"
|
15
|
+
mode = @options.fetch(:mode)
|
16
|
+
if mode == 0 || mode == 1
|
18
17
|
browser = Watir::Browser.new :chrome, args: %w[--headless --no-sandbox --disable-dev-shm-usage --disable-gpu --remote-debugging-port=9222]
|
19
|
-
browser.goto(
|
20
|
-
item_row_elements = browser.elements(css: @item_row_selector)
|
18
|
+
browser.goto(products_page_url)
|
19
|
+
@item_row_elements = browser.elements(css: @options.fetch(:item_row_selector))
|
21
20
|
end
|
22
|
-
if
|
23
|
-
File.write(@filename, browser.html)
|
21
|
+
if mode == 1
|
22
|
+
File.write(@options.fetch(:filename), browser.html)
|
24
23
|
browser.close
|
25
24
|
end
|
26
|
-
if
|
25
|
+
if mode == 0
|
27
26
|
@document = Nokogiri::HTML(browser.html)
|
28
27
|
browser.close
|
29
28
|
end
|
30
|
-
if
|
31
|
-
@document = File.open(@filename) { |f| Nokogiri::HTML(f) }
|
29
|
+
if mode == 1 || mode == 2
|
30
|
+
@document = File.open(@options.fetch(:filename)) { |f| Nokogiri::HTML(f) }
|
32
31
|
end
|
33
32
|
end
|
34
|
-
|
35
|
-
def document
|
36
|
-
@document
|
37
|
-
end
|
38
33
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: getter_cyndi5
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Cyndi Cavanaugh
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-03-
|
11
|
+
date: 2021-03-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: httparty
|