queenshop 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 4ccb94cb12167097b2f0d30b5602d911d37c4295
4
+ data.tar.gz: 68c743d55718026e871bf2b041adfe2e2b2329df
5
+ SHA512:
6
+ metadata.gz: 721a7e7e251081cbdbe220481ffa5038b2bd4a80f6ae6e393207081b106730b1f91ee732dbaa779a9797c24f0ee3e025f0a2d0295aa8418d333c01694aa34df7
7
+ data.tar.gz: 5a925e85811e0f6eeba1848e8a63d0f6604d59a98aaf220e899f9b1b6cf3f9a0b99bef679037c3c591153a94f982c265919c7510a46c40266467d112e6f28567
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+ # require 'queenshop' # for production
3
+ require_relative '../lib/queenshop.rb' # for testing
4
+
5
+ scraper = QueenShopScraper::Filter.new
6
+ puts scraper.scrape
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env ruby
2
+ require_relative 'queenshop/config'
3
+ require_relative 'queenshop/scraper'
@@ -0,0 +1,58 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # this class takes care of
4
+ # parsing the parameters
5
+ module Validate
6
+ attr_reader :parameters
7
+ attr_reader :pages
8
+
9
+ VALID_ARGS = [:item, :price, :pages]
10
+
11
+ def validate_args(args)
12
+ @parameters = {item: '', price: '', pages: '1..2'}
13
+ args.each do |arg|
14
+ begin
15
+ match = /(?<key>.*?)=(?<value>.*)/.match(arg)
16
+ fail unless VALID_ARGS.include?(match[:key].to_sym)
17
+ value = check(match)
18
+ @parameters[match[:key].to_sym] = value
19
+ rescue StandardError
20
+ abort "invalid usage...\n" << usage << "\n\n"
21
+ end
22
+ end
23
+ end # end validate_args
24
+
25
+ def check(match)
26
+ value = match[:value]
27
+ fail unless value =~ /^(>|<|>=|<=|==)\d*.\d*?$/ if match[:key].to_sym.eql?(:price)
28
+ # Float(value) if match[:key].to_sym.eql?(:price)
29
+ fail unless value =~ /^\d*([.]{2}\d*)?$/ if match[:key].to_sym.eql?(:pages)
30
+ value
31
+ rescue StandardError
32
+ abort "invalid parameters"
33
+ end
34
+
35
+ def pages
36
+ first_page = @parameters[:pages].scan(/\d+/).first.to_i
37
+ last_page = @parameters[:pages].scan(/\d+/).last.to_i
38
+ @pages = *(first_page..last_page)
39
+ end
40
+
41
+ def usage
42
+ 'Usage: queenshop [options]
43
+ item=(string)
44
+ price=(float[,float])
45
+ examples:
46
+ queenshop item="blouse" price=300
47
+ queenshop price=0,100
48
+ queenshop item="skirt"'
49
+ end
50
+ end
51
+
52
+ class QConfig
53
+ include Validate
54
+ def initialize (args)
55
+ validate_args (args)
56
+ pages
57
+ end
58
+ end
@@ -0,0 +1,79 @@
1
+ #!/usr/bin/env ruby
2
+ require 'oga'
3
+ require 'open-uri'
4
+ require_relative './config'
5
+
6
+ # scrape data
7
+ module QueenShopScraper
8
+ # filter class basically uses xpath selectors to get attribs
9
+ class Filter
10
+ attr_reader :result
11
+ attr_writer :item_selector
12
+ attr_writer :title_selector
13
+ attr_writer :price_selector
14
+ attr_writer :site_url
15
+
16
+ private
17
+
18
+ def get_xmldata(url)
19
+ raw_html = open(url)
20
+ Oga.parse_html(raw_html)
21
+ rescue StandardError
22
+ 'error'
23
+ end
24
+
25
+ def fetch_result(uri = '')
26
+ url = @site_url + uri
27
+ # try to open the url
28
+ document = get_xmldata(url)
29
+ # hard return on an error
30
+ return [] unless document != 'error'
31
+
32
+ items = document.xpath(@item_selector)
33
+ # loop through the items and get the title and price
34
+ items.map do |item|
35
+ title = item.xpath(@title_selector).text.force_encoding('UTF-8')
36
+ price = item.xpath(@price_selector).text
37
+ strip_filter(title, price)
38
+ end
39
+ @result
40
+ end
41
+
42
+ def strip_filter (title, price)
43
+ price = price.gsub!(/NT. /, '')
44
+ if !@price_filter.empty?
45
+ if eval("#{price} #{@price_filter}")
46
+ @result << { title: "#{title}", price: "#{price}" }
47
+ end
48
+ else
49
+ @result << { title: "#{title}", price: "#{price}" } unless title.empty?
50
+ end
51
+
52
+ end
53
+
54
+ public
55
+
56
+ def initialize
57
+ @result = []
58
+ # xml selectors that will be used to scrape data
59
+ @item_selector = "//div[@class=\'pditem\']/div[@class=\'pdicon\']"
60
+ @title_selector = "div[@class=\'pdicon_name\']/a"
61
+ @price_selector = "div[@class=\'pdicon_price\']/div[@style=\'font-weight:bold;\']"
62
+ @site_url = 'https://www.queenshop.com.tw/m/PDList2.asp?'
63
+ @price_filter = nil
64
+ end
65
+
66
+ def scrape (params=[])
67
+ params.concat(ARGV)
68
+ conf = QConfig.new(params)
69
+ @price_filter = conf.parameters[:price]
70
+
71
+ conf.pages.map do |page|
72
+ paginated_uri = "&page=#{page}"
73
+ fetch_result (paginated_uri)
74
+ end
75
+ @result
76
+ end
77
+
78
+ end
79
+ end
metadata ADDED
@@ -0,0 +1,56 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: queenshop
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Even Chang
8
+ - Luis Herrera
9
+ - Katy Lee
10
+ - Frank Lee
11
+ autorequire:
12
+ bindir: bin
13
+ cert_chain: []
14
+ date: 2015-10-23 00:00:00.000000000 Z
15
+ dependencies: []
16
+ description: This is a gem scraping queenshop's website and returns the items with
17
+ corresponding prices
18
+ email:
19
+ - kiki44552002@gmail.com
20
+ - lmherrera86@gmail.com
21
+ - katylee41024@yahoo.com.tw
22
+ - frank1234211@gmail.com
23
+ executables:
24
+ - queenshop
25
+ extensions: []
26
+ extra_rdoc_files: []
27
+ files:
28
+ - bin/queenshop
29
+ - lib/queenshop.rb
30
+ - lib/queenshop/config.rb
31
+ - lib/queenshop/scraper.rb
32
+ homepage: http://rubygems.org/gems/pinkoi
33
+ licenses:
34
+ - MIT
35
+ metadata: {}
36
+ post_install_message:
37
+ rdoc_options: []
38
+ require_paths:
39
+ - lib
40
+ required_ruby_version: !ruby/object:Gem::Requirement
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
45
+ required_rubygems_version: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: '0'
50
+ requirements: []
51
+ rubyforge_project:
52
+ rubygems_version: 2.5.0
53
+ signing_key:
54
+ specification_version: 4
55
+ summary: Scraper for Queenshop
56
+ test_files: []