queenshop 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 4ccb94cb12167097b2f0d30b5602d911d37c4295
4
+ data.tar.gz: 68c743d55718026e871bf2b041adfe2e2b2329df
5
+ SHA512:
6
+ metadata.gz: 721a7e7e251081cbdbe220481ffa5038b2bd4a80f6ae6e393207081b106730b1f91ee732dbaa779a9797c24f0ee3e025f0a2d0295aa8418d333c01694aa34df7
7
+ data.tar.gz: 5a925e85811e0f6eeba1848e8a63d0f6604d59a98aaf220e899f9b1b6cf3f9a0b99bef679037c3c591153a94f982c265919c7510a46c40266467d112e6f28567
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+ # require 'queenshop' # for production
3
+ require_relative '../lib/queenshop.rb' # for testing
4
+
5
+ scraper = QueenShopScraper::Filter.new
6
+ puts scraper.scrape
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env ruby
2
+ require_relative 'queenshop/config'
3
+ require_relative 'queenshop/scraper'
@@ -0,0 +1,58 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # this class takes care of
4
+ # parsing the parameters
5
+ module Validate
6
+ attr_reader :parameters
7
+ attr_reader :pages
8
+
9
+ VALID_ARGS = [:item, :price, :pages]
10
+
11
+ def validate_args(args)
12
+ @parameters = {item: '', price: '', pages: '1..2'}
13
+ args.each do |arg|
14
+ begin
15
+ match = /(?<key>.*?)=(?<value>.*)/.match(arg)
16
+ fail unless VALID_ARGS.include?(match[:key].to_sym)
17
+ value = check(match)
18
+ @parameters[match[:key].to_sym] = value
19
+ rescue StandardError
20
+ abort "invalid usage...\n" << usage << "\n\n"
21
+ end
22
+ end
23
+ end # end validate_args
24
+
25
+ def check(match)
26
+ value = match[:value]
27
+ fail unless value =~ /^(>|<|>=|<=|==)\d*.\d*?$/ if match[:key].to_sym.eql?(:price)
28
+ # Float(value) if match[:key].to_sym.eql?(:price)
29
+ fail unless value =~ /^\d*([.]{2}\d*)?$/ if match[:key].to_sym.eql?(:pages)
30
+ value
31
+ rescue StandardError
32
+ abort "invalid parameters"
33
+ end
34
+
35
+ def pages
36
+ first_page = @parameters[:pages].scan(/\d+/).first.to_i
37
+ last_page = @parameters[:pages].scan(/\d+/).last.to_i
38
+ @pages = *(first_page..last_page)
39
+ end
40
+
41
+ def usage
42
+ 'Usage: queenshop [options]
43
+ item=(string)
44
+ price=(float[,float])
45
+ examples:
46
+ queenshop item="blouse" price=300
47
+ queenshop price=0,100
48
+ queenshop item="skirt"'
49
+ end
50
+ end
51
+
52
+ class QConfig
53
+ include Validate
54
+ def initialize (args)
55
+ validate_args (args)
56
+ pages
57
+ end
58
+ end
@@ -0,0 +1,79 @@
1
+ #!/usr/bin/env ruby
2
+ require 'oga'
3
+ require 'open-uri'
4
+ require_relative './config'
5
+
6
+ # scrape data
7
+ module QueenShopScraper
8
+ # filter class basically uses xpath selectors to get attribs
9
+ class Filter
10
+ attr_reader :result
11
+ attr_writer :item_selector
12
+ attr_writer :title_selector
13
+ attr_writer :price_selector
14
+ attr_writer :site_url
15
+
16
+ private
17
+
18
+ def get_xmldata(url)
19
+ raw_html = open(url)
20
+ Oga.parse_html(raw_html)
21
+ rescue StandardError
22
+ 'error'
23
+ end
24
+
25
+ def fetch_result(uri = '')
26
+ url = @site_url + uri
27
+ # try to open the url
28
+ document = get_xmldata(url)
29
+ # hard return on an error
30
+ return [] unless document != 'error'
31
+
32
+ items = document.xpath(@item_selector)
33
+ # loop through the items and get the title and price
34
+ items.map do |item|
35
+ title = item.xpath(@title_selector).text.force_encoding('UTF-8')
36
+ price = item.xpath(@price_selector).text
37
+ strip_filter(title, price)
38
+ end
39
+ @result
40
+ end
41
+
42
+ def strip_filter (title, price)
43
+ price = price.gsub!(/NT. /, '')
44
+ if !@price_filter.empty?
45
+ if eval("#{price} #{@price_filter}")
46
+ @result << { title: "#{title}", price: "#{price}" }
47
+ end
48
+ else
49
+ @result << { title: "#{title}", price: "#{price}" } unless title.empty?
50
+ end
51
+
52
+ end
53
+
54
+ public
55
+
56
+ def initialize
57
+ @result = []
58
+ # xml selectors that will be used to scrape data
59
+ @item_selector = "//div[@class=\'pditem\']/div[@class=\'pdicon\']"
60
+ @title_selector = "div[@class=\'pdicon_name\']/a"
61
+ @price_selector = "div[@class=\'pdicon_price\']/div[@style=\'font-weight:bold;\']"
62
+ @site_url = 'https://www.queenshop.com.tw/m/PDList2.asp?'
63
+ @price_filter = nil
64
+ end
65
+
66
+ def scrape (params=[])
67
+ params.concat(ARGV)
68
+ conf = QConfig.new(params)
69
+ @price_filter = conf.parameters[:price]
70
+
71
+ conf.pages.map do |page|
72
+ paginated_uri = "&page=#{page}"
73
+ fetch_result (paginated_uri)
74
+ end
75
+ @result
76
+ end
77
+
78
+ end
79
+ end
metadata ADDED
@@ -0,0 +1,56 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: queenshop
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Even Chang
8
+ - Luis Herrera
9
+ - Katy Lee
10
+ - Frank Lee
11
+ autorequire:
12
+ bindir: bin
13
+ cert_chain: []
14
+ date: 2015-10-23 00:00:00.000000000 Z
15
+ dependencies: []
16
+ description: This is a gem scraping queenshop's website and returns the items with
17
+ corresponding prices
18
+ email:
19
+ - kiki44552002@gmail.com
20
+ - lmherrera86@gmail.com
21
+ - katylee41024@yahoo.com.tw
22
+ - frank1234211@gmail.com
23
+ executables:
24
+ - queenshop
25
+ extensions: []
26
+ extra_rdoc_files: []
27
+ files:
28
+ - bin/queenshop
29
+ - lib/queenshop.rb
30
+ - lib/queenshop/config.rb
31
+ - lib/queenshop/scraper.rb
32
+ homepage: http://rubygems.org/gems/pinkoi
33
+ licenses:
34
+ - MIT
35
+ metadata: {}
36
+ post_install_message:
37
+ rdoc_options: []
38
+ require_paths:
39
+ - lib
40
+ required_ruby_version: !ruby/object:Gem::Requirement
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
45
+ required_rubygems_version: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: '0'
50
+ requirements: []
51
+ rubyforge_project:
52
+ rubygems_version: 2.5.0
53
+ signing_key:
54
+ specification_version: 4
55
+ summary: Scraper for Queenshop
56
+ test_files: []