getter_cyndi5 0.0.3 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/getter_cyndi5 +7 -1
- data/lib/getter_cyndi5.rb +10 -3
- data/lib/getter_cyndi5/parser.rb +7 -2
- data/lib/getter_cyndi5/product.rb +10 -0
- data/lib/getter_cyndi5/retriever.rb +3 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 75f153b3644ea1beac5c5a3967734d3d03d1a2b8ac5e08740dd2236beffcaa53
|
4
|
+
data.tar.gz: 979d808e9a7eaf40843eef450c0b776058bd306c14a71102d57783fc2f34acf4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a321509cbdf2cf0bd51f09d0490597e63e84dd5614ac656255e6ce36b84d97fbe462309a5cac3a46f59c9b7fdcd151075c1a28d2ec8ca75af0d2da8cca37b363
|
7
|
+
data.tar.gz: fbb193d9559ee820fbec6cd96e5e5a297daea5b88ab9ae7306e2ca03931edc63687c21d6e4795a3e0717c1c7488f001fb1381841a6cd7e76e0c121fbf2da7ec2
|
data/bin/getter_cyndi5
CHANGED
@@ -1,4 +1,10 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
require 'getter_cyndi5'
|
4
|
-
|
4
|
+
base_url = ARGV[0] || 'https://thehappyco.com'
|
5
|
+
products_page_path = ARGV[1] || '/kelly/products'
|
6
|
+
item_row_selector = ARGV[2] || '.item-row'
|
7
|
+
item_anchor_selector = ARGV[3] || 'div > div.product-desc.text-center > div.product-title > h3 > a'
|
8
|
+
mode = (ARGV[4] || '2').to_i
|
9
|
+
filename = ARGV[5] || './tmp/document1.html'
|
10
|
+
GetterCyndi5.go(base_url, products_page_path, item_row_selector, item_anchor_selector, mode, filename)
|
data/lib/getter_cyndi5.rb
CHANGED
@@ -3,7 +3,7 @@ class GetterCyndi5
|
|
3
3
|
# Getter
|
4
4
|
#
|
5
5
|
# Example:
|
6
|
-
# >> GetterCyndi5.go(base_url = 'https://thehappyco.com', products_page_path = '/kelly/products', item_row_selector = '.item-row', item_anchor_selector = 'div > div.product-desc.text-center > div.product-title > h3 > a', mode =
|
6
|
+
# >> GetterCyndi5.go(base_url = 'https://thehappyco.com', products_page_path = '/kelly/products', item_row_selector = '.item-row', item_anchor_selector = 'div > div.product-desc.text-center > div.product-title > h3 > a', mode = 2, './tmp/document1.html')
|
7
7
|
# Arguments:
|
8
8
|
# base_url: (String)
|
9
9
|
# products_page_path: (String)
|
@@ -12,11 +12,18 @@ class GetterCyndi5
|
|
12
12
|
# mode: (Integer) 0 = retrieve and parse without saving HTML document to file, 1 = retrieve and parse saving HTML document to file, 2 = load and parse HTML document from file
|
13
13
|
# filename: (String)
|
14
14
|
|
15
|
-
def self.go(base_url
|
15
|
+
def self.go(base_url, products_page_path, item_row_selector, item_anchor_selector, mode, filename)
|
16
|
+
all_products = products(base_url, products_page_path, item_row_selector, item_anchor_selector, mode, filename)
|
17
|
+
all_products.each do |product|
|
18
|
+
puts "#{product.name} :-: #{product.url}"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.products(base_url, products_page_path, item_row_selector, item_anchor_selector, mode, filename)
|
16
23
|
retriever = Retriever.new(base_url, products_page_path, item_row_selector, mode, filename)
|
17
24
|
retriever.retrieve
|
18
25
|
parser = Parser.new(retriever.document, base_url, item_row_selector, item_anchor_selector)
|
19
|
-
parser.parse
|
26
|
+
products = parser.parse
|
20
27
|
end
|
21
28
|
end
|
22
29
|
|
data/lib/getter_cyndi5/parser.rb
CHANGED
@@ -1,9 +1,12 @@
|
|
1
|
+
require 'getter_cyndi5/product'
|
1
2
|
class GetterCyndi5::Parser
|
3
|
+
attr_reader :products
|
2
4
|
def initialize(document, base_url, item_row_selector, item_anchor_selector)
|
3
5
|
@document = document
|
4
6
|
@base_url = base_url
|
5
7
|
@item_row_selector = item_row_selector
|
6
8
|
@item_anchor_selector = item_anchor_selector
|
9
|
+
@products = []
|
7
10
|
end
|
8
11
|
|
9
12
|
def parse
|
@@ -12,7 +15,9 @@ class GetterCyndi5::Parser
|
|
12
15
|
product_element = item_row.css(@item_anchor_selector)[0]
|
13
16
|
product_name = product_element.text
|
14
17
|
product_url = "#{@base_url}#{product_element.attributes['href']}"
|
15
|
-
|
18
|
+
product = GetterCyndi5::Product.new(product_name, product_url)
|
19
|
+
products.append(product)
|
16
20
|
end
|
21
|
+
products
|
17
22
|
end
|
18
|
-
end
|
23
|
+
end
|
@@ -15,15 +15,17 @@ class GetterCyndi5::Retriever
|
|
15
15
|
|
16
16
|
def retrieve()
|
17
17
|
if @mode == 0 || @mode == 1
|
18
|
-
browser = Watir::Browser.new
|
18
|
+
browser = Watir::Browser.new :chrome, args: %w[--headless --no-sandbox --disable-dev-shm-usage --disable-gpu --remote-debugging-port=9222]
|
19
19
|
browser.goto(@products_page_url)
|
20
20
|
item_row_elements = browser.elements(css: @item_row_selector)
|
21
21
|
end
|
22
22
|
if @mode == 1
|
23
23
|
File.write(@filename, browser.html)
|
24
|
+
browser.close
|
24
25
|
end
|
25
26
|
if @mode == 0
|
26
27
|
@document = Nokogiri::HTML(browser.html)
|
28
|
+
browser.close
|
27
29
|
end
|
28
30
|
if @mode == 1 || @mode == 2
|
29
31
|
@document = File.open(@filename) { |f| Nokogiri::HTML(f) }
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: getter_cyndi5
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Cyndi Cavanaugh
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-03-
|
11
|
+
date: 2021-03-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: httparty
|
@@ -76,6 +76,7 @@ files:
|
|
76
76
|
- bin/getter_cyndi5
|
77
77
|
- lib/getter_cyndi5.rb
|
78
78
|
- lib/getter_cyndi5/parser.rb
|
79
|
+
- lib/getter_cyndi5/product.rb
|
79
80
|
- lib/getter_cyndi5/retriever.rb
|
80
81
|
homepage: https://cyndicavanaugh.com
|
81
82
|
licenses:
|