getter_cyndi5 0.0.5 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/getter_cyndi5 +11 -7
- data/lib/getter_cyndi5.rb +25 -8
- data/lib/getter_cyndi5/parser.rb +22 -11
- data/lib/getter_cyndi5/product.rb +3 -7
- data/lib/getter_cyndi5/retriever.rb +17 -20
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f7933d7cb91df16f9c9848171515a7864e3f2422d42d08ba900606f8af0e72dd
|
4
|
+
data.tar.gz: 938d12d96067df1f1b4ca24aca6687ba289d0e2f5bfab5d136e85597aa7489f9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 55c69b23808f5fbeed61730525d8b00db0a3008e4e7de93d0245bc9e7cd8fad3372e52f02060ea3af184a91546b71e21c40255aad49f6272b5a2e504a25255fd
|
7
|
+
data.tar.gz: 97258bb348db262ae19d9d0d9ca49283ee0f2ae5809bc8d81d034110b5cfcafe2fbb85c901eb8b6091df4ed7739ae0f9f3d6e7cd4c225ac1f90812ba7a25dc66
|
data/bin/getter_cyndi5
CHANGED
@@ -1,10 +1,14 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
require 'getter_cyndi5'
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
4
|
+
|
5
|
+
options = {
|
6
|
+
base_url: ARGV[0] || 'https://thehappyco.com',
|
7
|
+
products_page_path: ARGV[1] || '/kelly/products',
|
8
|
+
item_row_selector: ARGV[2] || '.item-row',
|
9
|
+
item_anchor_selector: ARGV[3] || 'div > div.product-desc.text-center > div.product-title > h3 > a',
|
10
|
+
item_price_selector: ARGV[4] || 'div > div.product-desc.text-center > div.product-price',
|
11
|
+
mode: (ARGV[7] || '2').to_i,
|
12
|
+
filename: ARGV[8] || './tmp/test_document.html'
|
13
|
+
}
|
14
|
+
GetterCyndi5.go(**options)
|
data/lib/getter_cyndi5.rb
CHANGED
@@ -3,26 +3,43 @@ class GetterCyndi5
|
|
3
3
|
# Getter
|
4
4
|
#
|
5
5
|
# Example:
|
6
|
-
# >> GetterCyndi5.go(
|
6
|
+
# >> GetterCyndi5.go(
|
7
|
+
# base_url = 'https://thehappyco.com',
|
8
|
+
# products_page_path = '/kelly/products',
|
9
|
+
# item_row_selector = '.item-row',
|
10
|
+
# item_anchor_selector = 'div > div.product-desc.text-center > div.product-title > h3 > a',
|
11
|
+
# item_price_selector = 'div > div.product-desc.text-center > div.product-price',
|
12
|
+
# price_a_text: 'One Time Purchase',
|
13
|
+
# price_b_text: 'SmartShip',
|
14
|
+
# mode = 2,
|
15
|
+
# filename = './tmp/test_document.html'
|
16
|
+
# )
|
7
17
|
# Arguments:
|
8
18
|
# base_url: (String)
|
9
19
|
# products_page_path: (String)
|
10
20
|
# item_row_selector: (String)
|
11
21
|
# item_anchor_selector: (String)
|
12
|
-
#
|
22
|
+
# item_price_selector: (String)
|
23
|
+
# price_a_text: (String)
|
24
|
+
# price_b_text: (String)
|
25
|
+
# mode: (Integer)
|
26
|
+
# 0 = retrieve and parse without saving HTML document to file,
|
27
|
+
# 1 = retrieve and parse saving HTML document to file,
|
28
|
+
# 2 = load and parse HTML document from file
|
13
29
|
# filename: (String)
|
14
30
|
|
15
|
-
def self.go(
|
16
|
-
all_products = products(
|
31
|
+
def self.go(options = {})
|
32
|
+
all_products = products(**options)
|
17
33
|
all_products.each do |product|
|
18
|
-
puts "#{product.
|
34
|
+
puts "#{product.inspect}"
|
19
35
|
end
|
20
36
|
end
|
21
37
|
|
22
|
-
def self.products(
|
23
|
-
retriever = Retriever.new(
|
38
|
+
def self.products(options = {})
|
39
|
+
retriever = Retriever.new(**options)
|
24
40
|
retriever.retrieve
|
25
|
-
|
41
|
+
|
42
|
+
parser = Parser.new(document: retriever.document, **options)
|
26
43
|
products = parser.parse
|
27
44
|
end
|
28
45
|
end
|
data/lib/getter_cyndi5/parser.rb
CHANGED
@@ -1,21 +1,32 @@
|
|
1
1
|
require 'getter_cyndi5/product'
|
2
2
|
class GetterCyndi5::Parser
|
3
3
|
attr_reader :products
|
4
|
-
def initialize(
|
5
|
-
@
|
6
|
-
@base_url = base_url
|
7
|
-
@item_row_selector = item_row_selector
|
8
|
-
@item_anchor_selector = item_anchor_selector
|
4
|
+
def initialize(options = {})
|
5
|
+
@options = options
|
9
6
|
@products = []
|
10
7
|
end
|
11
8
|
|
12
|
-
def parse
|
13
|
-
|
9
|
+
def parse
|
10
|
+
document = @options.fetch(:document)
|
11
|
+
base_url = @options.fetch(:base_url)
|
12
|
+
item_row_selector = @options.fetch(:item_row_selector)
|
13
|
+
item_anchor_selector = @options.fetch(:item_anchor_selector)
|
14
|
+
item_price_selector = @options.fetch(:item_price_selector)
|
15
|
+
item_rows = document.css(item_row_selector)
|
14
16
|
item_rows.each do |item_row|
|
15
|
-
product_element = item_row.css(
|
16
|
-
|
17
|
-
|
18
|
-
|
17
|
+
product_element = item_row.css(item_anchor_selector)[0]
|
18
|
+
price_elements = item_row.css(item_price_selector)
|
19
|
+
prices = {}
|
20
|
+
price_elements.each do |price_element|
|
21
|
+
prices[price_element.children[1].text] = price_element.children[0].text.gsub(/[^\d\.]/, '').to_f
|
22
|
+
end
|
23
|
+
attributes = {
|
24
|
+
name: product_element.text,
|
25
|
+
url: "#{base_url}#{product_element.attributes['href']}",
|
26
|
+
item_code: item_row.attributes['data-itemcode'].value,
|
27
|
+
prices: prices
|
28
|
+
}
|
29
|
+
product = GetterCyndi5::Product.new(attributes)
|
19
30
|
products.append(product)
|
20
31
|
end
|
21
32
|
products
|
@@ -1,10 +1,6 @@
|
|
1
1
|
class GetterCyndi5::Product
|
2
|
-
attr_accessor :
|
3
|
-
def initialize(
|
4
|
-
@
|
5
|
-
@url = url
|
6
|
-
@price_a = price_a
|
7
|
-
@price_b = price_b
|
8
|
-
@price_c = price_c
|
2
|
+
attr_accessor :attributes
|
3
|
+
def initialize(attributes = {})
|
4
|
+
@attributes = attributes
|
9
5
|
end
|
10
6
|
end
|
@@ -4,33 +4,30 @@ require 'watir'
|
|
4
4
|
require 'webdrivers'
|
5
5
|
|
6
6
|
class GetterCyndi5::Retriever
|
7
|
-
def initialize(
|
8
|
-
@
|
9
|
-
@products_page_path = products_page_path
|
10
|
-
@products_page_url = "#{@base_url}#{@products_page_path}"
|
11
|
-
@item_row_selector = item_row_selector
|
12
|
-
@mode = mode
|
13
|
-
@filename = filename
|
7
|
+
def initialize(options = {})
|
8
|
+
@options = options
|
14
9
|
end
|
15
10
|
|
11
|
+
attr_reader :document
|
12
|
+
attr_reader :item_row_elements
|
16
13
|
def retrieve()
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
14
|
+
products_page_url = "#{@options.fetch(:base_url)}#{@options.fetch(:products_page_path)}"
|
15
|
+
mode = @options.fetch(:mode)
|
16
|
+
if mode == 0 || mode == 1
|
17
|
+
browser = Watir::Browser.new :chrome, args: %w[--headless --no-sandbox --disable-dev-shm-usage --disable-gpu --remote-debugging-port=9222]
|
18
|
+
browser.goto(products_page_url)
|
19
|
+
@item_row_elements = browser.elements(css: @options.fetch(:item_row_selector))
|
21
20
|
end
|
22
|
-
if
|
23
|
-
File.write(@filename, browser.html)
|
21
|
+
if mode == 1
|
22
|
+
File.write(@options.fetch(:filename), browser.html)
|
23
|
+
browser.close
|
24
24
|
end
|
25
|
-
if
|
25
|
+
if mode == 0
|
26
26
|
@document = Nokogiri::HTML(browser.html)
|
27
|
+
browser.close
|
27
28
|
end
|
28
|
-
if
|
29
|
-
@document = File.open(@filename) { |f| Nokogiri::HTML(f) }
|
29
|
+
if mode == 1 || mode == 2
|
30
|
+
@document = File.open(@options.fetch(:filename)) { |f| Nokogiri::HTML(f) }
|
30
31
|
end
|
31
32
|
end
|
32
|
-
|
33
|
-
def document
|
34
|
-
@document
|
35
|
-
end
|
36
33
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: getter_cyndi5
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Cyndi Cavanaugh
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-03-
|
11
|
+
date: 2021-03-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: httparty
|