getter_cyndi5 0.0.7 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/getter_cyndi5 +11 -7
- data/lib/getter_cyndi5.rb +27 -9
- data/lib/getter_cyndi5/importer.rb +13 -0
- data/lib/getter_cyndi5/parser.rb +24 -11
- data/lib/getter_cyndi5/product.rb +3 -7
- data/lib/getter_cyndi5/retriever.rb +15 -20
- metadata +17 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 41b26c485b1a30f7c06914529d8c5a191cf48598524612f320848c479e6e1410
|
4
|
+
data.tar.gz: e81a4c7846c2b8a222828482a237083b1721546f6ee7e87c4ec79c71302930cd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ea53f1c726b4d4d5d97fcfd6e7e52bc003f2369b11c0047e2b6ebde387c6d13df53014f14fe8a6704a40b770e7a6ab504ecadf209e1bbcb99337b060160f6078
|
7
|
+
data.tar.gz: cb5e36cea8f71d160f7e1232c812c4b9613662d8570c0bdaae59502f440153268995d66632dca8d36d23ab0f61e30324d4636bf41fbba973de0ae1c8a62cfe88
|
data/bin/getter_cyndi5
CHANGED
@@ -1,10 +1,14 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
require 'getter_cyndi5'
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
4
|
+
|
5
|
+
options = {
|
6
|
+
base_url: ARGV[0] || 'https://thehappyco.com',
|
7
|
+
products_page_path: ARGV[1] || '/kelly/products',
|
8
|
+
item_row_selector: ARGV[2] || '.item-row',
|
9
|
+
item_anchor_selector: ARGV[3] || 'div > div.product-desc.text-center > div.product-title > h3 > a',
|
10
|
+
item_price_selector: ARGV[4] || 'div > div.product-desc.text-center > div.product-price',
|
11
|
+
mode: (ARGV[7] || '2').to_i,
|
12
|
+
filename: ARGV[8] || './tmp/test_document.html'
|
13
|
+
}
|
14
|
+
GetterCyndi5.go(**options)
|
data/lib/getter_cyndi5.rb
CHANGED
@@ -3,29 +3,47 @@ class GetterCyndi5
|
|
3
3
|
# Getter
|
4
4
|
#
|
5
5
|
# Example:
|
6
|
-
# >> GetterCyndi5.go(
|
6
|
+
# >> GetterCyndi5.go(
|
7
|
+
# base_url = 'https://thehappyco.com',
|
8
|
+
# products_page_path = '/kelly/products',
|
9
|
+
# item_row_selector = '.item-row',
|
10
|
+
# item_anchor_selector = 'div > div.product-desc.text-center > div.product-title > h3 > a',
|
11
|
+
# item_price_selector = 'div > div.product-desc.text-center > div.product-price',
|
12
|
+
# price_a_text: 'One Time Purchase',
|
13
|
+
# price_b_text: 'SmartShip',
|
14
|
+
# mode = 2,
|
15
|
+
# filename = './tmp/test_document.html'
|
16
|
+
# )
|
7
17
|
# Arguments:
|
8
18
|
# base_url: (String)
|
9
19
|
# products_page_path: (String)
|
10
20
|
# item_row_selector: (String)
|
11
21
|
# item_anchor_selector: (String)
|
12
|
-
#
|
22
|
+
# item_price_selector: (String)
|
23
|
+
# price_a_text: (String)
|
24
|
+
# price_b_text: (String)
|
25
|
+
# mode: (Integer)
|
26
|
+
# 0 = retrieve and parse without saving HTML document to file,
|
27
|
+
# 1 = retrieve and parse saving HTML document to file,
|
28
|
+
# 2 = load and parse HTML document from file
|
13
29
|
# filename: (String)
|
14
30
|
|
15
|
-
def self.go(
|
16
|
-
all_products = products(
|
31
|
+
def self.go(options = {})
|
32
|
+
all_products = products(**options)
|
17
33
|
all_products.each do |product|
|
18
|
-
puts "#{product.
|
34
|
+
puts "#{product.inspect}"
|
19
35
|
end
|
20
36
|
end
|
21
37
|
|
22
|
-
def self.products(
|
23
|
-
retriever = Retriever.new(
|
38
|
+
def self.products(options = {})
|
39
|
+
retriever = Retriever.new(**options)
|
24
40
|
retriever.retrieve
|
25
|
-
|
41
|
+
|
42
|
+
parser = Parser.new(document: retriever.document, **options)
|
26
43
|
products = parser.parse
|
27
44
|
end
|
28
45
|
end
|
29
46
|
|
30
47
|
require 'getter_cyndi5/retriever'
|
31
|
-
require 'getter_cyndi5/parser'
|
48
|
+
require 'getter_cyndi5/parser'
|
49
|
+
require 'getter_cyndi5/importer'
|
@@ -0,0 +1,13 @@
|
|
1
|
+
class Importer
|
2
|
+
def initialize(importer_options = {})
|
3
|
+
@importer_options = importer_options
|
4
|
+
end
|
5
|
+
|
6
|
+
def import(product)
|
7
|
+
{ :item_code => product.attributes[:item_data].fetch(@importer_options.fetch(:itemcode_key), ''),
|
8
|
+
:name => product.attributes[:name],
|
9
|
+
:url => product.attributes[:url],
|
10
|
+
:price_a => product.attributes[:prices].fetch(@importer_options.fetch(:price_a_key), 0.0),
|
11
|
+
:price_b => product.attributes[:prices].fetch(@importer_options.fetch(:price_b_key), 0.0) }
|
12
|
+
end
|
13
|
+
end
|
data/lib/getter_cyndi5/parser.rb
CHANGED
@@ -1,21 +1,34 @@
|
|
1
1
|
require 'getter_cyndi5/product'
|
2
2
|
class GetterCyndi5::Parser
|
3
|
+
DATA_PREFIX = 'data-'.freeze
|
3
4
|
attr_reader :products
|
4
|
-
def initialize(
|
5
|
-
@
|
6
|
-
@base_url = base_url
|
7
|
-
@item_row_selector = item_row_selector
|
8
|
-
@item_anchor_selector = item_anchor_selector
|
5
|
+
def initialize(options = {})
|
6
|
+
@options = options
|
9
7
|
@products = []
|
10
8
|
end
|
11
9
|
|
12
|
-
def parse
|
13
|
-
|
10
|
+
def parse
|
11
|
+
document = @options.fetch(:document)
|
12
|
+
base_url = @options.fetch(:base_url)
|
13
|
+
item_row_selector = @options.fetch(:item_row_selector)
|
14
|
+
item_anchor_selector = @options.fetch(:item_anchor_selector)
|
15
|
+
item_price_selector = @options.fetch(:item_price_selector)
|
16
|
+
item_rows = document.css(item_row_selector)
|
14
17
|
item_rows.each do |item_row|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
18
|
+
item_data = item_row.attributes.select { |k, v| k.start_with? DATA_PREFIX }.map { |k, v| [k.delete_prefix(DATA_PREFIX), v.value]}.to_h
|
19
|
+
product_element = item_row.css(item_anchor_selector)[0]
|
20
|
+
price_elements = item_row.css(item_price_selector)
|
21
|
+
prices = {}
|
22
|
+
price_elements.each do |price_element|
|
23
|
+
prices[price_element.children[1].text] = price_element.children[0].text.gsub(/[^\d\.]/, '').to_f
|
24
|
+
end
|
25
|
+
attributes = {
|
26
|
+
name: product_element.text,
|
27
|
+
url: "#{base_url}#{product_element.attributes['href']}",
|
28
|
+
item_data: item_data,
|
29
|
+
prices: prices
|
30
|
+
}
|
31
|
+
product = GetterCyndi5::Product.new(attributes)
|
19
32
|
products.append(product)
|
20
33
|
end
|
21
34
|
products
|
@@ -1,10 +1,6 @@
|
|
1
1
|
class GetterCyndi5::Product
|
2
|
-
attr_accessor :
|
3
|
-
def initialize(
|
4
|
-
@
|
5
|
-
@url = url
|
6
|
-
@price_a = price_a
|
7
|
-
@price_b = price_b
|
8
|
-
@price_c = price_c
|
2
|
+
attr_accessor :attributes
|
3
|
+
def initialize(attributes = {})
|
4
|
+
@attributes = attributes
|
9
5
|
end
|
10
6
|
end
|
@@ -4,35 +4,30 @@ require 'watir'
|
|
4
4
|
require 'webdrivers'
|
5
5
|
|
6
6
|
class GetterCyndi5::Retriever
|
7
|
-
def initialize(
|
8
|
-
@
|
9
|
-
@products_page_path = products_page_path
|
10
|
-
@products_page_url = "#{@base_url}#{@products_page_path}"
|
11
|
-
@item_row_selector = item_row_selector
|
12
|
-
@mode = mode
|
13
|
-
@filename = filename
|
7
|
+
def initialize(options = {})
|
8
|
+
@options = options
|
14
9
|
end
|
15
10
|
|
11
|
+
attr_reader :document
|
12
|
+
attr_reader :item_row_elements
|
16
13
|
def retrieve()
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
14
|
+
products_page_url = "#{@options.fetch(:base_url)}#{@options.fetch(:products_page_path)}"
|
15
|
+
mode = @options.fetch(:mode)
|
16
|
+
if mode == 0 || mode == 1
|
17
|
+
browser = Watir::Browser.new :chrome, args: %w[--headless --no-sandbox --disable-dev-shm-usage --disable-gpu --remote-debugging-port=9222]
|
18
|
+
browser.goto(products_page_url)
|
19
|
+
@item_row_elements = browser.elements(css: @options.fetch(:item_row_selector))
|
21
20
|
end
|
22
|
-
if
|
23
|
-
File.write(@filename, browser.html)
|
21
|
+
if mode == 1
|
22
|
+
File.write(@options.fetch(:filename), browser.html)
|
24
23
|
browser.close
|
25
24
|
end
|
26
|
-
if
|
25
|
+
if mode == 0
|
27
26
|
@document = Nokogiri::HTML(browser.html)
|
28
27
|
browser.close
|
29
28
|
end
|
30
|
-
if
|
31
|
-
@document = File.open(@filename) { |f| Nokogiri::HTML(f) }
|
29
|
+
if mode == 1 || mode == 2
|
30
|
+
@document = File.open(@options.fetch(:filename)) { |f| Nokogiri::HTML(f) }
|
32
31
|
end
|
33
32
|
end
|
34
|
-
|
35
|
-
def document
|
36
|
-
@document
|
37
|
-
end
|
38
33
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: getter_cyndi5
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Cyndi Cavanaugh
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-03-
|
11
|
+
date: 2021-03-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: httparty
|
@@ -66,6 +66,20 @@ dependencies:
|
|
66
66
|
- - '='
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: 4.6.0
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: minitest
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - '='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 5.13.0
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - '='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 5.13.0
|
69
83
|
description: A simple getter cyndi5 gem
|
70
84
|
email: cynthiacavanaugh1@gmail.com
|
71
85
|
executables:
|
@@ -75,6 +89,7 @@ extra_rdoc_files: []
|
|
75
89
|
files:
|
76
90
|
- bin/getter_cyndi5
|
77
91
|
- lib/getter_cyndi5.rb
|
92
|
+
- lib/getter_cyndi5/importer.rb
|
78
93
|
- lib/getter_cyndi5/parser.rb
|
79
94
|
- lib/getter_cyndi5/product.rb
|
80
95
|
- lib/getter_cyndi5/retriever.rb
|