getter_cyndi5 0.0.7 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f375f528599295fb8680b981577d5f4696a4f0a82dc6cd98c0e8e71c38adb900
4
- data.tar.gz: 1e68edbf8781aa1d1ee17318d4a5cfb074cd4dc0b4b7fef78d5b8802ad15e37a
3
+ metadata.gz: 41b26c485b1a30f7c06914529d8c5a191cf48598524612f320848c479e6e1410
4
+ data.tar.gz: e81a4c7846c2b8a222828482a237083b1721546f6ee7e87c4ec79c71302930cd
5
5
  SHA512:
6
- metadata.gz: 3bed2a5ed995bafaaa11a157dd9c19888ae5ed6989ddbbe0bf10200e2b3cfb520717f176238c2299ae3ca8ff0fe4455c258f5b671c3b23bcd11ff1ade050e7e8
7
- data.tar.gz: cf5821fea29ee00a65031413a62affa6f3fea672580b5a8c81c905f4fc9fc1f10ab2e54ea33cc95fa6f8eafc8b04d13bb42f7c09de38ced5c08fa2dbb899c21d
6
+ metadata.gz: ea53f1c726b4d4d5d97fcfd6e7e52bc003f2369b11c0047e2b6ebde387c6d13df53014f14fe8a6704a40b770e7a6ab504ecadf209e1bbcb99337b060160f6078
7
+ data.tar.gz: cb5e36cea8f71d160f7e1232c812c4b9613662d8570c0bdaae59502f440153268995d66632dca8d36d23ab0f61e30324d4636bf41fbba973de0ae1c8a62cfe88
data/bin/getter_cyndi5 CHANGED
@@ -1,10 +1,14 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require 'getter_cyndi5'
4
- base_url = ARGV[0] || 'https://thehappyco.com'
5
- products_page_path = ARGV[1] || '/kelly/products'
6
- item_row_selector = ARGV[2] || '.item-row'
7
- item_anchor_selector = ARGV[3] || 'div > div.product-desc.text-center > div.product-title > h3 > a'
8
- mode = (ARGV[4] || '2').to_i
9
- filename = ARGV[5] || './tmp/document1.html'
10
- GetterCyndi5.go(base_url, products_page_path, item_row_selector, item_anchor_selector, mode, filename)
4
+
5
+ options = {
6
+ base_url: ARGV[0] || 'https://thehappyco.com',
7
+ products_page_path: ARGV[1] || '/kelly/products',
8
+ item_row_selector: ARGV[2] || '.item-row',
9
+ item_anchor_selector: ARGV[3] || 'div > div.product-desc.text-center > div.product-title > h3 > a',
10
+ item_price_selector: ARGV[4] || 'div > div.product-desc.text-center > div.product-price',
11
+ mode: (ARGV[7] || '2').to_i,
12
+ filename: ARGV[8] || './tmp/test_document.html'
13
+ }
14
+ GetterCyndi5.go(**options)
data/lib/getter_cyndi5.rb CHANGED
@@ -3,29 +3,47 @@ class GetterCyndi5
3
3
  # Getter
4
4
  #
5
5
  # Example:
6
- # >> GetterCyndi5.go(base_url = 'https://thehappyco.com', products_page_path = '/kelly/products', item_row_selector = '.item-row', item_anchor_selector = 'div > div.product-desc.text-center > div.product-title > h3 > a', mode = 2, './tmp/document1.html')
6
+ # >> GetterCyndi5.go(
7
+ # base_url = 'https://thehappyco.com',
8
+ # products_page_path = '/kelly/products',
9
+ # item_row_selector = '.item-row',
10
+ # item_anchor_selector = 'div > div.product-desc.text-center > div.product-title > h3 > a',
11
+ # item_price_selector = 'div > div.product-desc.text-center > div.product-price',
12
+ # price_a_text: 'One Time Purchase',
13
+ # price_b_text: 'SmartShip',
14
+ # mode = 2,
15
+ # filename = './tmp/test_document.html'
16
+ # )
7
17
  # Arguments:
8
18
  # base_url: (String)
9
19
  # products_page_path: (String)
10
20
  # item_row_selector: (String)
11
21
  # item_anchor_selector: (String)
12
- # mode: (Integer) 0 = retrieve and parse without saving HTML document to file, 1 = retrieve and parse saving HTML document to file, 2 = load and parse HTML document from file
22
+ # item_price_selector: (String)
23
+ # price_a_text: (String)
24
+ # price_b_text: (String)
25
+ # mode: (Integer)
26
+ # 0 = retrieve and parse without saving HTML document to file,
27
+ # 1 = retrieve and parse saving HTML document to file,
28
+ # 2 = load and parse HTML document from file
13
29
  # filename: (String)
14
30
 
15
- def self.go(base_url, products_page_path, item_row_selector, item_anchor_selector, mode, filename)
16
- all_products = products(base_url, products_page_path, item_row_selector, item_anchor_selector, mode, filename)
31
+ def self.go(options = {})
32
+ all_products = products(**options)
17
33
  all_products.each do |product|
18
- puts "#{product.name} :-: #{product.url}"
34
+ puts "#{product.inspect}"
19
35
  end
20
36
  end
21
37
 
22
- def self.products(base_url, products_page_path, item_row_selector, item_anchor_selector, mode, filename)
23
- retriever = Retriever.new(base_url, products_page_path, item_row_selector, mode, filename)
38
+ def self.products(options = {})
39
+ retriever = Retriever.new(**options)
24
40
  retriever.retrieve
25
- parser = Parser.new(retriever.document, base_url, item_row_selector, item_anchor_selector)
41
+
42
+ parser = Parser.new(document: retriever.document, **options)
26
43
  products = parser.parse
27
44
  end
28
45
  end
29
46
 
30
47
  require 'getter_cyndi5/retriever'
31
- require 'getter_cyndi5/parser'
48
+ require 'getter_cyndi5/parser'
49
+ require 'getter_cyndi5/importer'
@@ -0,0 +1,13 @@
1
+ class Importer
2
+ def initialize(importer_options = {})
3
+ @importer_options = importer_options
4
+ end
5
+
6
+ def import(product)
7
+ { :item_code => product.attributes[:item_data].fetch(@importer_options.fetch(:itemcode_key), ''),
8
+ :name => product.attributes[:name],
9
+ :url => product.attributes[:url],
10
+ :price_a => product.attributes[:prices].fetch(@importer_options.fetch(:price_a_key), 0.0),
11
+ :price_b => product.attributes[:prices].fetch(@importer_options.fetch(:price_b_key), 0.0) }
12
+ end
13
+ end
@@ -1,21 +1,34 @@
1
1
  require 'getter_cyndi5/product'
2
2
  class GetterCyndi5::Parser
3
+ DATA_PREFIX = 'data-'.freeze
3
4
  attr_reader :products
4
- def initialize(document, base_url, item_row_selector, item_anchor_selector)
5
- @document = document
6
- @base_url = base_url
7
- @item_row_selector = item_row_selector
8
- @item_anchor_selector = item_anchor_selector
5
+ def initialize(options = {})
6
+ @options = options
9
7
  @products = []
10
8
  end
11
9
 
12
- def parse
13
- item_rows = @document.css(@item_row_selector)
10
+ def parse
11
+ document = @options.fetch(:document)
12
+ base_url = @options.fetch(:base_url)
13
+ item_row_selector = @options.fetch(:item_row_selector)
14
+ item_anchor_selector = @options.fetch(:item_anchor_selector)
15
+ item_price_selector = @options.fetch(:item_price_selector)
16
+ item_rows = document.css(item_row_selector)
14
17
  item_rows.each do |item_row|
15
- product_element = item_row.css(@item_anchor_selector)[0]
16
- product_name = product_element.text
17
- product_url = "#{@base_url}#{product_element.attributes['href']}"
18
- product = GetterCyndi5::Product.new(product_name, product_url)
18
+ item_data = item_row.attributes.select { |k, v| k.start_with? DATA_PREFIX }.map { |k, v| [k.delete_prefix(DATA_PREFIX), v.value]}.to_h
19
+ product_element = item_row.css(item_anchor_selector)[0]
20
+ price_elements = item_row.css(item_price_selector)
21
+ prices = {}
22
+ price_elements.each do |price_element|
23
+ prices[price_element.children[1].text] = price_element.children[0].text.gsub(/[^\d\.]/, '').to_f
24
+ end
25
+ attributes = {
26
+ name: product_element.text,
27
+ url: "#{base_url}#{product_element.attributes['href']}",
28
+ item_data: item_data,
29
+ prices: prices
30
+ }
31
+ product = GetterCyndi5::Product.new(attributes)
19
32
  products.append(product)
20
33
  end
21
34
  products
@@ -1,10 +1,6 @@
1
1
  class GetterCyndi5::Product
2
- attr_accessor :name, :url, :price_a, :price_b, :price_c
3
- def initialize(name, url, price_a=0.0, price_b=0.0, price_c=0.0)
4
- @name = name
5
- @url = url
6
- @price_a = price_a
7
- @price_b = price_b
8
- @price_c = price_c
2
+ attr_accessor :attributes
3
+ def initialize(attributes = {})
4
+ @attributes = attributes
9
5
  end
10
6
  end
@@ -4,35 +4,30 @@ require 'watir'
4
4
  require 'webdrivers'
5
5
 
6
6
  class GetterCyndi5::Retriever
7
- def initialize(base_url, products_page_path, item_row_selector, mode, filename)
8
- @base_url = base_url
9
- @products_page_path = products_page_path
10
- @products_page_url = "#{@base_url}#{@products_page_path}"
11
- @item_row_selector = item_row_selector
12
- @mode = mode
13
- @filename = filename
7
+ def initialize(options = {})
8
+ @options = options
14
9
  end
15
10
 
11
+ attr_reader :document
12
+ attr_reader :item_row_elements
16
13
  def retrieve()
17
- if @mode == 0 || @mode == 1
18
- browser = Watir::Browser.new
19
- browser.goto(@products_page_url)
20
- item_row_elements = browser.elements(css: @item_row_selector)
14
+ products_page_url = "#{@options.fetch(:base_url)}#{@options.fetch(:products_page_path)}"
15
+ mode = @options.fetch(:mode)
16
+ if mode == 0 || mode == 1
17
+ browser = Watir::Browser.new :chrome, args: %w[--headless --no-sandbox --disable-dev-shm-usage --disable-gpu --remote-debugging-port=9222]
18
+ browser.goto(products_page_url)
19
+ @item_row_elements = browser.elements(css: @options.fetch(:item_row_selector))
21
20
  end
22
- if @mode == 1
23
- File.write(@filename, browser.html)
21
+ if mode == 1
22
+ File.write(@options.fetch(:filename), browser.html)
24
23
  browser.close
25
24
  end
26
- if @mode == 0
25
+ if mode == 0
27
26
  @document = Nokogiri::HTML(browser.html)
28
27
  browser.close
29
28
  end
30
- if @mode == 1 || @mode == 2
31
- @document = File.open(@filename) { |f| Nokogiri::HTML(f) }
29
+ if mode == 1 || mode == 2
30
+ @document = File.open(@options.fetch(:filename)) { |f| Nokogiri::HTML(f) }
32
31
  end
33
32
  end
34
-
35
- def document
36
- @document
37
- end
38
33
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: getter_cyndi5
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 2.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Cyndi Cavanaugh
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-03-12 00:00:00.000000000 Z
11
+ date: 2021-03-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: httparty
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - '='
67
67
  - !ruby/object:Gem::Version
68
68
  version: 4.6.0
69
+ - !ruby/object:Gem::Dependency
70
+ name: minitest
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '='
74
+ - !ruby/object:Gem::Version
75
+ version: 5.13.0
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '='
81
+ - !ruby/object:Gem::Version
82
+ version: 5.13.0
69
83
  description: A simple getter cyndi5 gem
70
84
  email: cynthiacavanaugh1@gmail.com
71
85
  executables:
@@ -75,6 +89,7 @@ extra_rdoc_files: []
75
89
  files:
76
90
  - bin/getter_cyndi5
77
91
  - lib/getter_cyndi5.rb
92
+ - lib/getter_cyndi5/importer.rb
78
93
  - lib/getter_cyndi5/parser.rb
79
94
  - lib/getter_cyndi5/product.rb
80
95
  - lib/getter_cyndi5/retriever.rb