getter_cyndi5 0.0.6 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/getter_cyndi5 +11 -7
- data/lib/getter_cyndi5.rb +25 -8
- data/lib/getter_cyndi5/parser.rb +24 -11
- data/lib/getter_cyndi5/product.rb +3 -7
- data/lib/getter_cyndi5/retriever.rb +17 -21
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8897522fa8be7a71ee0a93636ad6c8250dc8ceb4b774e4682b6add74440e3561
|
4
|
+
data.tar.gz: 2f457d708b7c100b56a354101efe367299e6bdb97dbaa6c9f9b019b9f68b9318
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5d627257e6e981258d321032a5959c0ddee38193606ae0d1f1538a2bebbc19b31c1570b477131ed29075b0c71cb1de9c1811eebf431c73d118f135689535383d
|
7
|
+
data.tar.gz: 3280ea73a619aea5be935a3fd7f2e622deecf7786e1d3967f3c22cc38e38678f804e21cc909e7e132791fe9ab5a69af36274f6b9010301113dcf1ae8525c0f23
|
data/bin/getter_cyndi5
CHANGED
@@ -1,10 +1,14 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
require 'getter_cyndi5'
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
4
|
+
|
5
|
+
options = {
|
6
|
+
base_url: ARGV[0] || 'https://thehappyco.com',
|
7
|
+
products_page_path: ARGV[1] || '/kelly/products',
|
8
|
+
item_row_selector: ARGV[2] || '.item-row',
|
9
|
+
item_anchor_selector: ARGV[3] || 'div > div.product-desc.text-center > div.product-title > h3 > a',
|
10
|
+
item_price_selector: ARGV[4] || 'div > div.product-desc.text-center > div.product-price',
|
11
|
+
mode: (ARGV[7] || '2').to_i,
|
12
|
+
filename: ARGV[8] || './tmp/test_document.html'
|
13
|
+
}
|
14
|
+
GetterCyndi5.go(**options)
|
data/lib/getter_cyndi5.rb
CHANGED
@@ -3,26 +3,43 @@ class GetterCyndi5
|
|
3
3
|
# Getter
|
4
4
|
#
|
5
5
|
# Example:
|
6
|
-
# >> GetterCyndi5.go(
|
6
|
+
# >> GetterCyndi5.go(
|
7
|
+
# base_url = 'https://thehappyco.com',
|
8
|
+
# products_page_path = '/kelly/products',
|
9
|
+
# item_row_selector = '.item-row',
|
10
|
+
# item_anchor_selector = 'div > div.product-desc.text-center > div.product-title > h3 > a',
|
11
|
+
# item_price_selector = 'div > div.product-desc.text-center > div.product-price',
|
12
|
+
# price_a_text: 'One Time Purchase',
|
13
|
+
# price_b_text: 'SmartShip',
|
14
|
+
# mode = 2,
|
15
|
+
# filename = './tmp/test_document.html'
|
16
|
+
# )
|
7
17
|
# Arguments:
|
8
18
|
# base_url: (String)
|
9
19
|
# products_page_path: (String)
|
10
20
|
# item_row_selector: (String)
|
11
21
|
# item_anchor_selector: (String)
|
12
|
-
#
|
22
|
+
# item_price_selector: (String)
|
23
|
+
# price_a_text: (String)
|
24
|
+
# price_b_text: (String)
|
25
|
+
# mode: (Integer)
|
26
|
+
# 0 = retrieve and parse without saving HTML document to file,
|
27
|
+
# 1 = retrieve and parse saving HTML document to file,
|
28
|
+
# 2 = load and parse HTML document from file
|
13
29
|
# filename: (String)
|
14
30
|
|
15
|
-
def self.go(
|
16
|
-
all_products = products(
|
31
|
+
def self.go(options = {})
|
32
|
+
all_products = products(**options)
|
17
33
|
all_products.each do |product|
|
18
|
-
puts "#{product.
|
34
|
+
puts "#{product.inspect}"
|
19
35
|
end
|
20
36
|
end
|
21
37
|
|
22
|
-
def self.products(
|
23
|
-
retriever = Retriever.new(
|
38
|
+
def self.products(options = {})
|
39
|
+
retriever = Retriever.new(**options)
|
24
40
|
retriever.retrieve
|
25
|
-
|
41
|
+
|
42
|
+
parser = Parser.new(document: retriever.document, **options)
|
26
43
|
products = parser.parse
|
27
44
|
end
|
28
45
|
end
|
data/lib/getter_cyndi5/parser.rb
CHANGED
@@ -1,21 +1,34 @@
|
|
1
1
|
require 'getter_cyndi5/product'
|
2
2
|
class GetterCyndi5::Parser
|
3
|
+
DATA_PREFIX = 'data-'.freeze
|
3
4
|
attr_reader :products
|
4
|
-
def initialize(
|
5
|
-
@
|
6
|
-
@base_url = base_url
|
7
|
-
@item_row_selector = item_row_selector
|
8
|
-
@item_anchor_selector = item_anchor_selector
|
5
|
+
def initialize(options = {})
|
6
|
+
@options = options
|
9
7
|
@products = []
|
10
8
|
end
|
11
9
|
|
12
|
-
def parse
|
13
|
-
|
10
|
+
def parse
|
11
|
+
document = @options.fetch(:document)
|
12
|
+
base_url = @options.fetch(:base_url)
|
13
|
+
item_row_selector = @options.fetch(:item_row_selector)
|
14
|
+
item_anchor_selector = @options.fetch(:item_anchor_selector)
|
15
|
+
item_price_selector = @options.fetch(:item_price_selector)
|
16
|
+
item_rows = document.css(item_row_selector)
|
14
17
|
item_rows.each do |item_row|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
18
|
+
item_data = item_row.attributes.select { |k, v| k.start_with? DATA_PREFIX }.map { |k, v| [k.delete_prefix(DATA_PREFIX), v.value]}.to_h
|
19
|
+
product_element = item_row.css(item_anchor_selector)[0]
|
20
|
+
price_elements = item_row.css(item_price_selector)
|
21
|
+
prices = {}
|
22
|
+
price_elements.each do |price_element|
|
23
|
+
prices[price_element.children[1].text] = price_element.children[0].text.gsub(/[^\d\.]/, '').to_f
|
24
|
+
end
|
25
|
+
attributes = {
|
26
|
+
name: product_element.text,
|
27
|
+
url: "#{base_url}#{product_element.attributes['href']}",
|
28
|
+
item_data: item_data,
|
29
|
+
prices: prices
|
30
|
+
}
|
31
|
+
product = GetterCyndi5::Product.new(attributes)
|
19
32
|
products.append(product)
|
20
33
|
end
|
21
34
|
products
|
@@ -1,10 +1,6 @@
|
|
1
1
|
class GetterCyndi5::Product
|
2
|
-
attr_accessor :
|
3
|
-
def initialize(
|
4
|
-
@
|
5
|
-
@url = url
|
6
|
-
@price_a = price_a
|
7
|
-
@price_b = price_b
|
8
|
-
@price_c = price_c
|
2
|
+
attr_accessor :attributes
|
3
|
+
def initialize(attributes = {})
|
4
|
+
@attributes = attributes
|
9
5
|
end
|
10
6
|
end
|
@@ -4,34 +4,30 @@ require 'watir'
|
|
4
4
|
require 'webdrivers'
|
5
5
|
|
6
6
|
class GetterCyndi5::Retriever
|
7
|
-
def initialize(
|
8
|
-
@
|
9
|
-
@products_page_path = products_page_path
|
10
|
-
@products_page_url = "#{@base_url}#{@products_page_path}"
|
11
|
-
@item_row_selector = item_row_selector
|
12
|
-
@mode = mode
|
13
|
-
@filename = filename
|
7
|
+
def initialize(options = {})
|
8
|
+
@options = options
|
14
9
|
end
|
15
10
|
|
11
|
+
attr_reader :document
|
12
|
+
attr_reader :item_row_elements
|
16
13
|
def retrieve()
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
browser.
|
14
|
+
products_page_url = "#{@options.fetch(:base_url)}#{@options.fetch(:products_page_path)}"
|
15
|
+
mode = @options.fetch(:mode)
|
16
|
+
if mode == 0 || mode == 1
|
17
|
+
browser = Watir::Browser.new :chrome, args: %w[--headless --no-sandbox --disable-dev-shm-usage --disable-gpu --remote-debugging-port=9222]
|
18
|
+
browser.goto(products_page_url)
|
19
|
+
@item_row_elements = browser.elements(css: @options.fetch(:item_row_selector))
|
22
20
|
end
|
23
|
-
if
|
24
|
-
File.write(@filename, browser.html)
|
21
|
+
if mode == 1
|
22
|
+
File.write(@options.fetch(:filename), browser.html)
|
23
|
+
browser.close
|
25
24
|
end
|
26
|
-
if
|
25
|
+
if mode == 0
|
27
26
|
@document = Nokogiri::HTML(browser.html)
|
27
|
+
browser.close
|
28
28
|
end
|
29
|
-
if
|
30
|
-
@document = File.open(@filename) { |f| Nokogiri::HTML(f) }
|
29
|
+
if mode == 1 || mode == 2
|
30
|
+
@document = File.open(@options.fetch(:filename)) { |f| Nokogiri::HTML(f) }
|
31
31
|
end
|
32
32
|
end
|
33
|
-
|
34
|
-
def document
|
35
|
-
@document
|
36
|
-
end
|
37
33
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: getter_cyndi5
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Cyndi Cavanaugh
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-03-
|
11
|
+
date: 2021-03-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: httparty
|