queenshop 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/queenshop +6 -0
- data/lib/queenshop.rb +3 -0
- data/lib/queenshop/config.rb +58 -0
- data/lib/queenshop/scraper.rb +79 -0
- metadata +56 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 4ccb94cb12167097b2f0d30b5602d911d37c4295
|
4
|
+
data.tar.gz: 68c743d55718026e871bf2b041adfe2e2b2329df
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 721a7e7e251081cbdbe220481ffa5038b2bd4a80f6ae6e393207081b106730b1f91ee732dbaa779a9797c24f0ee3e025f0a2d0295aa8418d333c01694aa34df7
|
7
|
+
data.tar.gz: 5a925e85811e0f6eeba1848e8a63d0f6604d59a98aaf220e899f9b1b6cf3f9a0b99bef679037c3c591153a94f982c265919c7510a46c40266467d112e6f28567
|
data/bin/queenshop
ADDED
data/lib/queenshop.rb
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# this class takes care of
|
4
|
+
# parsing the parameters
|
5
|
+
module Validate
|
6
|
+
attr_reader :parameters
|
7
|
+
attr_reader :pages
|
8
|
+
|
9
|
+
VALID_ARGS = [:item, :price, :pages]
|
10
|
+
|
11
|
+
def validate_args(args)
|
12
|
+
@parameters = {item: '', price: '', pages: '1..2'}
|
13
|
+
args.each do |arg|
|
14
|
+
begin
|
15
|
+
match = /(?<key>.*?)=(?<value>.*)/.match(arg)
|
16
|
+
fail unless VALID_ARGS.include?(match[:key].to_sym)
|
17
|
+
value = check(match)
|
18
|
+
@parameters[match[:key].to_sym] = value
|
19
|
+
rescue StandardError
|
20
|
+
abort "invalid usage...\n" << usage << "\n\n"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end # end validate_args
|
24
|
+
|
25
|
+
def check(match)
|
26
|
+
value = match[:value]
|
27
|
+
fail unless value =~ /^(>|<|>=|<=|==)\d*.\d*?$/ if match[:key].to_sym.eql?(:price)
|
28
|
+
# Float(value) if match[:key].to_sym.eql?(:price)
|
29
|
+
fail unless value =~ /^\d*([.]{2}\d*)?$/ if match[:key].to_sym.eql?(:pages)
|
30
|
+
value
|
31
|
+
rescue StandardError
|
32
|
+
abort "invalid parameters"
|
33
|
+
end
|
34
|
+
|
35
|
+
def pages
|
36
|
+
first_page = @parameters[:pages].scan(/\d+/).first.to_i
|
37
|
+
last_page = @parameters[:pages].scan(/\d+/).last.to_i
|
38
|
+
@pages = *(first_page..last_page)
|
39
|
+
end
|
40
|
+
|
41
|
+
def usage
|
42
|
+
'Usage: queenshop [options]
|
43
|
+
item=(string)
|
44
|
+
price=(float[,float])
|
45
|
+
examples:
|
46
|
+
queenshop item="blouse" price=300
|
47
|
+
queenshop price=0,100
|
48
|
+
queenshop item="skirt"'
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
class QConfig
|
53
|
+
include Validate
|
54
|
+
def initialize (args)
|
55
|
+
validate_args (args)
|
56
|
+
pages
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'oga'
|
3
|
+
require 'open-uri'
|
4
|
+
require_relative './config'
|
5
|
+
|
6
|
+
# scrape data
|
7
|
+
module QueenShopScraper
|
8
|
+
# filter class basically uses xpath selectors to get attribs
|
9
|
+
class Filter
|
10
|
+
attr_reader :result
|
11
|
+
attr_writer :item_selector
|
12
|
+
attr_writer :title_selector
|
13
|
+
attr_writer :price_selector
|
14
|
+
attr_writer :site_url
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def get_xmldata(url)
|
19
|
+
raw_html = open(url)
|
20
|
+
Oga.parse_html(raw_html)
|
21
|
+
rescue StandardError
|
22
|
+
'error'
|
23
|
+
end
|
24
|
+
|
25
|
+
def fetch_result(uri = '')
|
26
|
+
url = @site_url + uri
|
27
|
+
# try to open the url
|
28
|
+
document = get_xmldata(url)
|
29
|
+
# hard return on an error
|
30
|
+
return [] unless document != 'error'
|
31
|
+
|
32
|
+
items = document.xpath(@item_selector)
|
33
|
+
# loop through the items and get the title and price
|
34
|
+
items.map do |item|
|
35
|
+
title = item.xpath(@title_selector).text.force_encoding('UTF-8')
|
36
|
+
price = item.xpath(@price_selector).text
|
37
|
+
strip_filter(title, price)
|
38
|
+
end
|
39
|
+
@result
|
40
|
+
end
|
41
|
+
|
42
|
+
def strip_filter (title, price)
|
43
|
+
price = price.gsub!(/NT. /, '')
|
44
|
+
if !@price_filter.empty?
|
45
|
+
if eval("#{price} #{@price_filter}")
|
46
|
+
@result << { title: "#{title}", price: "#{price}" }
|
47
|
+
end
|
48
|
+
else
|
49
|
+
@result << { title: "#{title}", price: "#{price}" } unless title.empty?
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
|
54
|
+
public
|
55
|
+
|
56
|
+
def initialize
|
57
|
+
@result = []
|
58
|
+
# xml selectors that will be used to scrape data
|
59
|
+
@item_selector = "//div[@class=\'pditem\']/div[@class=\'pdicon\']"
|
60
|
+
@title_selector = "div[@class=\'pdicon_name\']/a"
|
61
|
+
@price_selector = "div[@class=\'pdicon_price\']/div[@style=\'font-weight:bold;\']"
|
62
|
+
@site_url = 'https://www.queenshop.com.tw/m/PDList2.asp?'
|
63
|
+
@price_filter = nil
|
64
|
+
end
|
65
|
+
|
66
|
+
def scrape (params=[])
|
67
|
+
params.concat(ARGV)
|
68
|
+
conf = QConfig.new(params)
|
69
|
+
@price_filter = conf.parameters[:price]
|
70
|
+
|
71
|
+
conf.pages.map do |page|
|
72
|
+
paginated_uri = "&page=#{page}"
|
73
|
+
fetch_result (paginated_uri)
|
74
|
+
end
|
75
|
+
@result
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
79
|
+
end
|
metadata
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: queenshop
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Even Chang
|
8
|
+
- Luis Herrera
|
9
|
+
- Katy Lee
|
10
|
+
- Frank Lee
|
11
|
+
autorequire:
|
12
|
+
bindir: bin
|
13
|
+
cert_chain: []
|
14
|
+
date: 2015-10-23 00:00:00.000000000 Z
|
15
|
+
dependencies: []
|
16
|
+
description: This is a gem scraping queenshop's website and returns the items with
|
17
|
+
corresponding prices
|
18
|
+
email:
|
19
|
+
- kiki44552002@gmail.com
|
20
|
+
- lmherrera86@gmail.com
|
21
|
+
- katylee41024@yahoo.com.tw
|
22
|
+
- frank1234211@gmail.com
|
23
|
+
executables:
|
24
|
+
- queenshop
|
25
|
+
extensions: []
|
26
|
+
extra_rdoc_files: []
|
27
|
+
files:
|
28
|
+
- bin/queenshop
|
29
|
+
- lib/queenshop.rb
|
30
|
+
- lib/queenshop/config.rb
|
31
|
+
- lib/queenshop/scraper.rb
|
32
|
+
homepage: http://rubygems.org/gems/pinkoi
|
33
|
+
licenses:
|
34
|
+
- MIT
|
35
|
+
metadata: {}
|
36
|
+
post_install_message:
|
37
|
+
rdoc_options: []
|
38
|
+
require_paths:
|
39
|
+
- lib
|
40
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
41
|
+
requirements:
|
42
|
+
- - ">="
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: '0'
|
45
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '0'
|
50
|
+
requirements: []
|
51
|
+
rubyforge_project:
|
52
|
+
rubygems_version: 2.5.0
|
53
|
+
signing_key:
|
54
|
+
specification_version: 4
|
55
|
+
summary: Scraper for Queenshop
|
56
|
+
test_files: []
|