jaleb 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,68 @@
1
+ # = Jaleb
2
+ # Author:: Hasan Basheer - 2017
3
+ require "uri"
4
+ require_relative "jaleb/version"
5
+ require_relative "jaleb/data"
6
+ require_relative "jaleb/model"
7
+ require_relative "jaleb/string"
8
+ require_relative "jaleb/nokogiri"
9
+
10
+
11
+ module Jaleb
12
+ LIBRARY_PATH = File.join(File.dirname(__FILE__), 'jaleb')
13
+ MODEL_PATH = File.join(LIBRARY_PATH, 'models')
14
+
15
+ # Module Methods
16
+ class << self
17
+ # Fetch information based on url
18
+ def fetch(url)
19
+ # Look for model based on url
20
+ model_class = Jaleb::Model.identify(url)
21
+
22
+ # Use Base class for fallback
23
+ model_class = Jaleb::Model::Base unless model_class
24
+
25
+ data = Jaleb::Data.read(url)
26
+
27
+ product = model_class.new
28
+ product.parse(data)
29
+
30
+ # Save url
31
+ product.url = url
32
+
33
+ return product
34
+ end
35
+
36
+
37
+ # get array of models syms
38
+ def models
39
+ models = Array.new
40
+ Dir[File.join(File.dirname(__FILE__), "jaleb", "models", "*.rb")].each do |f|
41
+ model = File.basename(f, ".rb").to_sym
42
+ models << model unless model == :base
43
+ end
44
+ return models
45
+ end
46
+
47
+ # get array of model classes
48
+ # Jaleb.model_classes = [Amazon, Ebay, ThinkGeek]
49
+ def model_classes
50
+ models = Array.new
51
+ Dir[File.join(File.dirname(__FILE__), "jaleb", "models", "*.rb")].each do |f|
52
+ filename = File.basename(f, ".rb")
53
+ models << ["Jaleb", "Model", filename.camelize].join("::").constantize unless filename == "base"
54
+ end
55
+ return models
56
+ end
57
+ end
58
+
59
+
60
+
61
+ # Autoload Models
62
+ module Model
63
+ autoload :Base, File.join(MODEL_PATH, "base")
64
+ for model in Jaleb.models
65
+ autoload model.to_s.camelize.to_sym, File.join(MODEL_PATH, model.to_s)
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,22 @@
1
+ require "nokogiri"
2
+ require "open-uri"
3
+
4
+
5
+ module Jaleb
6
+ # this class is responsible for fetching and parsing data
7
+ class Data
8
+ # Get read url and get data object
9
+ def self.read(url )
10
+ opt= {}
11
+ opt['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.57 Safari/537.36'
12
+ response = open(url, opt)
13
+ doc = ::Nokogiri::HTML(response.read)
14
+ # Save contents of URL/Remote File for debugging
15
+ response.rewind
16
+ last_response_file = File.expand_path(File.join("..", "..", "last_response"), File.dirname(__FILE__))
17
+ File.new(last_response_file, "w+").write(response.read)
18
+ return doc
19
+ end
20
+
21
+ end
22
+ end
@@ -0,0 +1,21 @@
1
+ module Jaleb
2
+ module Model
3
+ class << self
4
+ #Detect model by url
5
+ def identify(url)
6
+ if url =~ ::URI.regexp
7
+ uri = ::URI::parse(url)
8
+ host = uri.host
9
+ match_model = nil
10
+ for model in Jaleb.model_classes
11
+ match_model = model if host =~ model.regexp
12
+ end
13
+ return match_model
14
+
15
+ else
16
+ raise AgrumentError , "not a url"
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,51 @@
1
+ module Jaleb module Model
2
+ class Amazon < Jaleb::Model::Base # A regular expression for determining if a url comes from a specific service/website
3
+ def self.regexp
4
+ /amazon\.com/
5
+ end
6
+
7
+ # Parse data and look for object attributes to give to object
8
+ def parse(data)
9
+ super(data)
10
+
11
+ case doc
12
+ when Nokogiri::HTML::Document
13
+ # Get Name
14
+ self.name = doc.css("h1#title").first_string
15
+ self.name = doc.xpath("string(//title)").split(" - ").first unless self.name
16
+
17
+ # Get Description
18
+ self.description = doc.css(".productDescriptionWrapper").first_string
19
+
20
+ # Get description from meta title if not found
21
+ self.description = doc.xpath("//meta[@name='description']/@content").first_string if description.nil?
22
+
23
+ # Get Price
24
+ parse_price(doc.css("#actualPriceValue").first_string)
25
+ parse_price(doc.css("#priceblock_ourprice").first_string) unless self.price
26
+ parse_price(doc.css("#priceblock_saleprice").first_string) unless self.price
27
+ parse_price(doc.xpath("//span[contains(@id, 'price')]").first_string) unless self.price
28
+
29
+ # Get Unqualified Price
30
+ parse_price(doc.xpath("//*[contains(@id, 'unqualifiedBuyBox')]//span").first_string) unless self.price
31
+
32
+ # Get Used Price
33
+ parse_price(doc.xpath("//*[contains(@id, 'secondaryUsedAndNew')]//*[@class='price']").first_string) unless self.price
34
+
35
+
36
+ # Get Images
37
+ self.images = doc.xpath("//*[@data-action='main-image-click']//img").attribute_array
38
+ self.images = doc.xpath("//*[@id='imageBlock']//img").attribute_array unless self.images
39
+
40
+ # Get images for in-house products (kindle, etc.)
41
+ self.images = doc.xpath("//*[@id='kib-ma-container-0']//img").attribute_array if self.images.empty?
42
+
43
+ # Get images for third-party sellers
44
+ self.images = doc.xpath("//*[@id='prodImageContainer']//img").attribute_array if self.images.empty?
45
+
46
+ self.image = images.first
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,26 @@
1
+ require "hashie"
2
+ require "money"
3
+ require "monetize"
4
+
5
+ module Jaleb
6
+ module Model
7
+ class Base < ::Hashie::Mash
8
+ # Parse data and set object attributes
9
+ def parse(data)
10
+ self.doc = data # save data for if user wants to access it later
11
+
12
+ # Get page title as name
13
+ self.name = doc.css("head > title").first_string
14
+ end
15
+
16
+ # Parse a raw price string to get usable data
17
+ # model.parse_price("$5.00") # => #<Money cents:500 currency:USD>
18
+ # model.price.to_f # => 5.0
19
+ # model.price.currency.symbol # => '$'
20
+ def parse_price(raw_price = nil)
21
+ return if raw_price.nil?
22
+ self.price = ::Monetize.parse(raw_price)
23
+ end
24
+ end # Baseasd
25
+ end # Model
26
+ end # Jaleb
@@ -0,0 +1,32 @@
1
+ module Jaleb
2
+ module Model
3
+ class Ebay < Jaleb::Model::Base
4
+ # A regular expression for determining if a url comes from a specific service/website
5
+ def self.regexp
6
+ /ebay\.com/
7
+ end
8
+
9
+ # Parse data and look for object attributes to give to object
10
+ def parse(data)
11
+ super(data)
12
+
13
+ case doc
14
+ when Nokogiri::HTML::Document
15
+ # Get Name
16
+ self.name = doc.xpath("//h1[@itemprop='name']").first_string
17
+
18
+ # Get Description
19
+ # OMITTED: This is tough to get because ebay item descriptions are custom html/content created by sellers
20
+
21
+ # Get Price
22
+ raw_price = doc.xpath("//span[@itemprop='price']").first_string
23
+ parse_price(raw_price.gsub(/US/, "")) if raw_price
24
+
25
+ # Get Image
26
+ self.images = [{:src => doc.xpath("//span[@itemprop='image']/img").first_string}]
27
+ self.image = images.first
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,31 @@
1
+ module Jaleb
2
+ module Model
3
+ class Etsy < Jaleb::Model::Base
4
+ # A regular expression for determining if a url comes from a specific service/website
5
+ def self.regexp
6
+ /etsy\.com/
7
+ end
8
+
9
+ # Parse data and look for object attributes to give to object
10
+ def parse(data)
11
+ super(data)
12
+
13
+ case doc
14
+ when Nokogiri::HTML::Document
15
+ # Get Name
16
+ self.name = doc.xpath("//span[@itemprop='name']").first_string
17
+
18
+ # Get Description
19
+ self.description = doc.xpath("//div[@id='description-text']").first_string
20
+
21
+ # Get Price
22
+ parse_price(doc.css("span#listing-price").first_string)
23
+
24
+ # Get Images
25
+ self.images = doc.xpath("//div[@id='image-main']//img").attribute_array
26
+ self.image = images.first
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,40 @@
1
+ module Jaleb
2
+ module Model
3
+ class Gamecouk < Jaleb::Model::Base
4
+ # A regular expression for determining if a url comes from a specific service/website
5
+ def self.regexp
6
+ /game\.co\.uk/
7
+ end
8
+
9
+ # Parse data and look for object attributes to give to object
10
+ def parse(data)
11
+ super(data)
12
+
13
+ case doc
14
+ when Nokogiri::HTML::Document
15
+ require 'date'
16
+ # Get Name
17
+ self.name = doc.xpath("//meta[@property='og:title']/@content").first_string
18
+
19
+ # Get Description
20
+ # OMITTED: This is tough to get because ebay item descriptions are custom html/content created by sellers
21
+
22
+ self.description = doc.css('div#primary div#details.panel div.description').to_s
23
+
24
+ self.release_date = doc.css('div#primary div#details.panel p.releaseDate').first_string
25
+
26
+ self.release_date = Date.strptime(self.release_date.gsub('Released on ',''), '%d-%b-%Y') if self.release_date
27
+
28
+ # Get Price
29
+ raw_price = doc.css("span.price").first_string
30
+ Money.default_currency = Money::Currency.new("GBP")
31
+ parse_price(raw_price.gsub(/Only /, "")) if raw_price
32
+
33
+ # Get Image
34
+ self.images = [{:src => doc.xpath("//meta[@property='og:image']/@content").first_string}]
35
+ self.image = images.first
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,31 @@
1
+ module Jaleb
2
+ module Model
3
+ class Googleshopping < Jaleb::Model::Base
4
+ # A regular expression for determining if a url comes from a specific service/website
5
+ def self.regexp
6
+ /google\.com/
7
+ end
8
+
9
+ # Parse data and look for object attributes to give to object
10
+ def parse(data)
11
+ super(data)
12
+ case doc
13
+ when Nokogiri::HTML::Document
14
+ # Get Name
15
+ name = doc.css('h1#product-name span.main-title').first_string
16
+ self.name = name if name
17
+
18
+ # Get Description
19
+ self.description = doc.css("#product-description-full").first_string
20
+
21
+ # Get Price
22
+ parse_price(doc.css('#summary-prices .price').first_string)
23
+
24
+ # Get Images
25
+ self.images = doc.css('div#product-basic-info img').attribute_array
26
+ self.image = images.first
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,33 @@
1
+ module Jaleb
2
+ module Model
3
+ class Newegg < Jaleb::Model::Base
4
+ # A regular expression for determining if a url comes from a specific service/website
5
+ def self.regexp
6
+ /newegg\.com/
7
+ end
8
+
9
+ # Parse data and look for object attributes to give to object
10
+ def parse(data)
11
+ super(data)
12
+ case doc
13
+ when Nokogiri::HTML::Document
14
+ # Get Name
15
+ self.name = doc.css("#synopsis .grpDesc .wrapper h1 > span").first_string
16
+ #self.name = doc.css("#synopsis .grpDesc .wrapper > .span").first_string
17
+
18
+ # Description - Not always reliable.
19
+ self.description = doc.css(".itmDesc > p").first_string
20
+
21
+ # Get Price (currently disabled because price is displayed after page load)
22
+ #parse_price doc.css("li.price-current").attribute("content").content
23
+
24
+ # # Get Images
25
+ self.images = [{:src => doc.css('.mainSlide > img').attribute("src").content}]
26
+ self.image = images.first
27
+ end
28
+ rescue
29
+ return nil
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,32 @@
1
+ module Jaleb
2
+ module Model
3
+ class Souq < Jaleb::Model::Base
4
+ # A regular expression for determining if a url comes from a specific service/website
5
+ def self.regexp
6
+ /souq\.com/
7
+ end
8
+
9
+ # Parse data and look for object attributes to give to object
10
+ def parse(data)
11
+ super(data)
12
+
13
+ case doc
14
+ when Nokogiri::HTML::Document
15
+ # Get Name
16
+ self.name = doc.xpath("//meta[@property='og:title']/@content").first_string
17
+
18
+ # Get Description
19
+ self.description = doc.xpath("//meta[@property='og:description']/@content").first_string
20
+
21
+ # Get Price
22
+ raw_price = doc.css('h3.price').first_string
23
+ parse_price(raw_price.gsub(/,/, "")) if raw_price
24
+
25
+ # Get Images
26
+ self.images = [{:src => doc.xpath("//meta[@property='og:image']/@content").first_string}]
27
+ self.image = images.first
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,31 @@
1
+ module Jaleb
2
+ module Model
3
+ class Thinkgeek < Jaleb::Model::Base
4
+ # A regular expression for determining if a url comes from a specific service/website
5
+ def self.regexp
6
+ /thinkgeek\.com/
7
+ end
8
+
9
+ # Parse data and look for object attributes to give to object
10
+ def parse(data)
11
+ super(data)
12
+
13
+ case doc
14
+ when Nokogiri::HTML::Document
15
+ # Get Name
16
+ self.name = doc.xpath("//meta[@property='og:title']/@content").first_string
17
+
18
+ # Get Description
19
+ self.description = doc.xpath("//meta[@property='og:description']/@content").first_string
20
+
21
+ # Get Price
22
+ parse_price(doc.xpath("//form[@id='buy']/h3").first_string) rescue nil
23
+
24
+ # Get Images
25
+ self.images = [{:src => doc.xpath("//meta[@property='og:image']/@content").first_string}]
26
+ self.image = images.first
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,53 @@
1
+ require "cgi"
2
+
3
+ module Jaleb
4
+ # This contains helper methods for Nokogiri interactions
5
+ module Nokogiri
6
+ module HTML
7
+ module Document
8
+
9
+ end # Document
10
+ end # HTML
11
+
12
+ module XML
13
+ module NodeSet
14
+ # get string from first nodeset model
15
+ def first_string
16
+ node = first
17
+ case node
18
+ # xml/html element?
19
+ when ::Nokogiri::XML::Element
20
+ return node.content.sanitize
21
+ # xml/html attribute?
22
+ when ::Nokogiri::XML::Attr
23
+ return node.value.sanitize
24
+ end
25
+ end
26
+
27
+ # convert nodeset models to an array of hashes
28
+ # @doc.xpath("//img")).attribute_array # => [{:element => "img", :src => ".../someimage.png"}]
29
+ def attribute_array
30
+ a = Array.new
31
+ each do |node|
32
+ temp_hash = Hash.new
33
+ case node
34
+ when ::Nokogiri::XML::Element
35
+ temp_hash[:element] = node.name
36
+ node.attributes.each do |key, value|
37
+ case value
38
+ when ::Nokogiri::XML::Attr
39
+ temp_hash[key.to_sym] = value.value.sanitize
40
+ end
41
+ end
42
+ end
43
+ a << temp_hash
44
+ end
45
+ return a
46
+ end
47
+ end # Nodeset
48
+ end # XML
49
+ end # Nokogiri
50
+ end # Jaleb
51
+
52
+ ::Nokogiri::HTML::Document.send(:include, ::Jaleb::Nokogiri::HTML::Document)
53
+ ::Nokogiri::XML::NodeSet.send(:include, ::Jaleb::Nokogiri::XML::NodeSet)