getter_cyndi5 0.0.8 → 2.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 75f153b3644ea1beac5c5a3967734d3d03d1a2b8ac5e08740dd2236beffcaa53
4
- data.tar.gz: 979d808e9a7eaf40843eef450c0b776058bd306c14a71102d57783fc2f34acf4
3
+ metadata.gz: c9bfded80458e5ce9dce37a23bdcea8bf1fb2594eb43dfdd82ab5273d5ed8fd3
4
+ data.tar.gz: 69e337e0f111cd8c603a0f5d5367afb827c5fb9b582a773b84c59112412abba6
5
5
  SHA512:
6
- metadata.gz: a321509cbdf2cf0bd51f09d0490597e63e84dd5614ac656255e6ce36b84d97fbe462309a5cac3a46f59c9b7fdcd151075c1a28d2ec8ca75af0d2da8cca37b363
7
- data.tar.gz: fbb193d9559ee820fbec6cd96e5e5a297daea5b88ab9ae7306e2ca03931edc63687c21d6e4795a3e0717c1c7488f001fb1381841a6cd7e76e0c121fbf2da7ec2
6
+ metadata.gz: d5880f7c7e73ee8bac8fcec1273bc2957fb941ec3d08c11ef4ba9156a94142e35296f8413856a42c202a61ac6b96e030feee13091d3ae0977f669e5110c3e77d
7
+ data.tar.gz: afeb7eeab86d2501921596849aab93eb87a3dbbe0cf4fa4a5ef4c015adcfc994ac08b5ab50efa1e8c0efdc89068870474822b7e1b238b67d75baa71c6cea8547
data/bin/getter_cyndi5 CHANGED
@@ -1,10 +1,14 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require 'getter_cyndi5'
4
- base_url = ARGV[0] || 'https://thehappyco.com'
5
- products_page_path = ARGV[1] || '/kelly/products'
6
- item_row_selector = ARGV[2] || '.item-row'
7
- item_anchor_selector = ARGV[3] || 'div > div.product-desc.text-center > div.product-title > h3 > a'
8
- mode = (ARGV[4] || '2').to_i
9
- filename = ARGV[5] || './tmp/document1.html'
10
- GetterCyndi5.go(base_url, products_page_path, item_row_selector, item_anchor_selector, mode, filename)
4
+
5
+ options = {
6
+ base_url: ARGV[0] || 'https://thehappyco.com',
7
+ products_page_path: ARGV[1] || '/kelly/products',
8
+ item_row_selector: ARGV[2] || '.item-row',
9
+ item_anchor_selector: ARGV[3] || 'div > div.product-desc.text-center > div.product-title > h3 > a',
10
+ item_price_selector: ARGV[4] || 'div > div.product-desc.text-center > div.product-price',
11
+ mode: (ARGV[7] || '2').to_i,
12
+ filename: ARGV[8] || './tmp/test_document.html'
13
+ }
14
+ GetterCyndi5.go(**options)
data/lib/getter_cyndi5.rb CHANGED
@@ -3,29 +3,47 @@ class GetterCyndi5
3
3
  # Getter
4
4
  #
5
5
  # Example:
6
- # >> GetterCyndi5.go(base_url = 'https://thehappyco.com', products_page_path = '/kelly/products', item_row_selector = '.item-row', item_anchor_selector = 'div > div.product-desc.text-center > div.product-title > h3 > a', mode = 2, './tmp/document1.html')
6
+ # >> GetterCyndi5.go(
7
+ # base_url = 'https://thehappyco.com',
8
+ # products_page_path = '/kelly/products',
9
+ # item_row_selector = '.item-row',
10
+ # item_anchor_selector = 'div > div.product-desc.text-center > div.product-title > h3 > a',
11
+ # item_price_selector = 'div > div.product-desc.text-center > div.product-price',
12
+ # price_a_text: 'One Time Purchase',
13
+ # price_b_text: 'SmartShip',
14
+ # mode = 2,
15
+ # filename = './tmp/test_document.html'
16
+ # )
7
17
  # Arguments:
8
18
  # base_url: (String)
9
19
  # products_page_path: (String)
10
20
  # item_row_selector: (String)
11
21
  # item_anchor_selector: (String)
12
- # mode: (Integer) 0 = retrieve and parse without saving HTML document to file, 1 = retrieve and parse saving HTML document to file, 2 = load and parse HTML document from file
22
+ # item_price_selector: (String)
23
+ # price_a_text: (String)
24
+ # price_b_text: (String)
25
+ # mode: (Integer)
26
+ # 0 = retrieve and parse without saving HTML document to file,
27
+ # 1 = retrieve and parse saving HTML document to file,
28
+ # 2 = load and parse HTML document from file
13
29
  # filename: (String)
14
30
 
15
- def self.go(base_url, products_page_path, item_row_selector, item_anchor_selector, mode, filename)
16
- all_products = products(base_url, products_page_path, item_row_selector, item_anchor_selector, mode, filename)
31
+ def self.go(options = {})
32
+ all_products = products(**options)
17
33
  all_products.each do |product|
18
- puts "#{product.name} :-: #{product.url}"
34
+ puts "#{product.inspect}"
19
35
  end
20
36
  end
21
37
 
22
- def self.products(base_url, products_page_path, item_row_selector, item_anchor_selector, mode, filename)
23
- retriever = Retriever.new(base_url, products_page_path, item_row_selector, mode, filename)
38
+ def self.products(options = {})
39
+ retriever = Retriever.new(**options)
24
40
  retriever.retrieve
25
- parser = Parser.new(retriever.document, base_url, item_row_selector, item_anchor_selector)
41
+
42
+ parser = Parser.new(document: retriever.document, **options)
26
43
  products = parser.parse
27
44
  end
28
45
  end
29
46
 
30
47
  require 'getter_cyndi5/retriever'
31
- require 'getter_cyndi5/parser'
48
+ require 'getter_cyndi5/parser'
49
+ require 'getter_cyndi5/importer'
@@ -0,0 +1,13 @@
1
+ class Importer
2
+ def initialize(importer_options = {})
3
+ @importer_options = importer_options
4
+ end
5
+
6
+ def import(product)
7
+ { :item_code => product.attributes[:item_data].fetch(@importer_options.fetch(:itemcode_key), ''),
8
+ :name => product.attributes[:name],
9
+ :url => product.attributes[:url],
10
+ :price_a => product.attributes[:prices].fetch(@importer_options.fetch(:price_a_key), 0.0),
11
+ :price_b => product.attributes[:prices].fetch(@importer_options.fetch(:price_b_key), 0.0) }
12
+ end
13
+ end
@@ -1,23 +1,39 @@
1
1
  require 'getter_cyndi5/product'
2
2
  class GetterCyndi5::Parser
3
+ DATA_PREFIX = 'data-'.freeze
3
4
  attr_reader :products
4
- def initialize(document, base_url, item_row_selector, item_anchor_selector)
5
- @document = document
6
- @base_url = base_url
7
- @item_row_selector = item_row_selector
8
- @item_anchor_selector = item_anchor_selector
5
+ def initialize(options = {})
6
+ @options = options
9
7
  @products = []
10
8
  end
11
9
 
12
- def parse
13
- item_rows = @document.css(@item_row_selector)
10
+ def parse
11
+ base_url = @options.fetch(:base_url)
12
+ item_anchor_selector = @options.fetch(:item_anchor_selector)
13
+ item_price_selector = @options.fetch(:item_price_selector)
14
14
  item_rows.each do |item_row|
15
- product_element = item_row.css(@item_anchor_selector)[0]
16
- product_name = product_element.text
17
- product_url = "#{@base_url}#{product_element.attributes['href']}"
18
- product = GetterCyndi5::Product.new(product_name, product_url)
15
+ item_data = item_row.attributes.select { |k, v| k.start_with? DATA_PREFIX }.map { |k, v| [k.delete_prefix(DATA_PREFIX), v.value]}.to_h
16
+ product_element = item_row.css(item_anchor_selector)[0]
17
+ price_elements = item_row.css(item_price_selector)
18
+ prices = {}
19
+ price_elements.each do |price_element|
20
+ prices[price_element.children[1].text] = price_element.children[0].text.gsub(/[^\d\.]/, '').to_f
21
+ end
22
+ attributes = {
23
+ name: product_element.text,
24
+ url: "#{base_url}#{product_element.attributes['href']}",
25
+ item_data: item_data,
26
+ prices: prices
27
+ }
28
+ product = GetterCyndi5::Product.new(attributes)
19
29
  products.append(product)
20
30
  end
21
31
  products
22
32
  end
33
+
34
+ def item_rows
35
+ document = @options.fetch(:document)
36
+ item_row_selector = @options.fetch(:item_row_selector)
37
+ document.css(item_row_selector)
38
+ end
23
39
  end
@@ -1,10 +1,6 @@
1
1
  class GetterCyndi5::Product
2
- attr_accessor :name, :url, :price_a, :price_b, :price_c
3
- def initialize(name, url, price_a=0.0, price_b=0.0, price_c=0.0)
4
- @name = name
5
- @url = url
6
- @price_a = price_a
7
- @price_b = price_b
8
- @price_c = price_c
2
+ attr_accessor :attributes
3
+ def initialize(attributes = {})
4
+ @attributes = attributes
9
5
  end
10
6
  end
@@ -4,35 +4,30 @@ require 'watir'
4
4
  require 'webdrivers'
5
5
 
6
6
  class GetterCyndi5::Retriever
7
- def initialize(base_url, products_page_path, item_row_selector, mode, filename)
8
- @base_url = base_url
9
- @products_page_path = products_page_path
10
- @products_page_url = "#{@base_url}#{@products_page_path}"
11
- @item_row_selector = item_row_selector
12
- @mode = mode
13
- @filename = filename
7
+ def initialize(options = {})
8
+ @options = options
14
9
  end
15
10
 
11
+ attr_reader :document
12
+ attr_reader :item_row_elements
16
13
  def retrieve()
17
- if @mode == 0 || @mode == 1
14
+ products_page_url = "#{@options.fetch(:base_url)}#{@options.fetch(:products_page_path)}"
15
+ mode = @options.fetch(:mode)
16
+ if mode == 0 || mode == 1
18
17
  browser = Watir::Browser.new :chrome, args: %w[--headless --no-sandbox --disable-dev-shm-usage --disable-gpu --remote-debugging-port=9222]
19
- browser.goto(@products_page_url)
20
- item_row_elements = browser.elements(css: @item_row_selector)
18
+ browser.goto(products_page_url)
19
+ @item_row_elements = browser.elements(css: @options.fetch(:item_row_selector))
21
20
  end
22
- if @mode == 1
23
- File.write(@filename, browser.html)
21
+ if mode == 1
22
+ File.write(@options.fetch(:filename), browser.html)
24
23
  browser.close
25
24
  end
26
- if @mode == 0
25
+ if mode == 0
27
26
  @document = Nokogiri::HTML(browser.html)
28
27
  browser.close
29
28
  end
30
- if @mode == 1 || @mode == 2
31
- @document = File.open(@filename) { |f| Nokogiri::HTML(f) }
29
+ if mode == 1 || mode == 2
30
+ @document = File.open(@options.fetch(:filename)) { |f| Nokogiri::HTML(f) }
32
31
  end
33
32
  end
34
-
35
- def document
36
- @document
37
- end
38
33
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: getter_cyndi5
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
4
+ version: 2.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Cyndi Cavanaugh
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-03-13 00:00:00.000000000 Z
11
+ date: 2021-03-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: httparty
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - '='
67
67
  - !ruby/object:Gem::Version
68
68
  version: 4.6.0
69
+ - !ruby/object:Gem::Dependency
70
+ name: minitest
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '='
74
+ - !ruby/object:Gem::Version
75
+ version: 5.13.0
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '='
81
+ - !ruby/object:Gem::Version
82
+ version: 5.13.0
69
83
  description: A simple getter cyndi5 gem
70
84
  email: cynthiacavanaugh1@gmail.com
71
85
  executables:
@@ -75,6 +89,7 @@ extra_rdoc_files: []
75
89
  files:
76
90
  - bin/getter_cyndi5
77
91
  - lib/getter_cyndi5.rb
92
+ - lib/getter_cyndi5/importer.rb
78
93
  - lib/getter_cyndi5/parser.rb
79
94
  - lib/getter_cyndi5/product.rb
80
95
  - lib/getter_cyndi5/retriever.rb