queenshop 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/queenshop +6 -0
- data/lib/queenshop.rb +3 -0
- data/lib/queenshop/config.rb +58 -0
- data/lib/queenshop/scraper.rb +79 -0
- metadata +56 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 4ccb94cb12167097b2f0d30b5602d911d37c4295
|
4
|
+
data.tar.gz: 68c743d55718026e871bf2b041adfe2e2b2329df
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 721a7e7e251081cbdbe220481ffa5038b2bd4a80f6ae6e393207081b106730b1f91ee732dbaa779a9797c24f0ee3e025f0a2d0295aa8418d333c01694aa34df7
|
7
|
+
data.tar.gz: 5a925e85811e0f6eeba1848e8a63d0f6604d59a98aaf220e899f9b1b6cf3f9a0b99bef679037c3c591153a94f982c265919c7510a46c40266467d112e6f28567
|
data/bin/queenshop
ADDED
data/lib/queenshop.rb
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# this class takes care of
|
4
|
+
# parsing the parameters
|
5
|
+
module Validate
|
6
|
+
attr_reader :parameters
|
7
|
+
attr_reader :pages
|
8
|
+
|
9
|
+
VALID_ARGS = [:item, :price, :pages]
|
10
|
+
|
11
|
+
def validate_args(args)
|
12
|
+
@parameters = {item: '', price: '', pages: '1..2'}
|
13
|
+
args.each do |arg|
|
14
|
+
begin
|
15
|
+
match = /(?<key>.*?)=(?<value>.*)/.match(arg)
|
16
|
+
fail unless VALID_ARGS.include?(match[:key].to_sym)
|
17
|
+
value = check(match)
|
18
|
+
@parameters[match[:key].to_sym] = value
|
19
|
+
rescue StandardError
|
20
|
+
abort "invalid usage...\n" << usage << "\n\n"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end # end validate_args
|
24
|
+
|
25
|
+
def check(match)
|
26
|
+
value = match[:value]
|
27
|
+
fail unless value =~ /^(>|<|>=|<=|==)\d*.\d*?$/ if match[:key].to_sym.eql?(:price)
|
28
|
+
# Float(value) if match[:key].to_sym.eql?(:price)
|
29
|
+
fail unless value =~ /^\d*([.]{2}\d*)?$/ if match[:key].to_sym.eql?(:pages)
|
30
|
+
value
|
31
|
+
rescue StandardError
|
32
|
+
abort "invalid parameters"
|
33
|
+
end
|
34
|
+
|
35
|
+
def pages
|
36
|
+
first_page = @parameters[:pages].scan(/\d+/).first.to_i
|
37
|
+
last_page = @parameters[:pages].scan(/\d+/).last.to_i
|
38
|
+
@pages = *(first_page..last_page)
|
39
|
+
end
|
40
|
+
|
41
|
+
def usage
|
42
|
+
'Usage: queenshop [options]
|
43
|
+
item=(string)
|
44
|
+
price=(float[,float])
|
45
|
+
examples:
|
46
|
+
queenshop item="blouse" price=300
|
47
|
+
queenshop price=0,100
|
48
|
+
queenshop item="skirt"'
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
class QConfig
|
53
|
+
include Validate
|
54
|
+
def initialize (args)
|
55
|
+
validate_args (args)
|
56
|
+
pages
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'oga'
|
3
|
+
require 'open-uri'
|
4
|
+
require_relative './config'
|
5
|
+
|
6
|
+
# scrape data
|
7
|
+
module QueenShopScraper
|
8
|
+
# filter class basically uses xpath selectors to get attribs
|
9
|
+
class Filter
|
10
|
+
attr_reader :result
|
11
|
+
attr_writer :item_selector
|
12
|
+
attr_writer :title_selector
|
13
|
+
attr_writer :price_selector
|
14
|
+
attr_writer :site_url
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def get_xmldata(url)
|
19
|
+
raw_html = open(url)
|
20
|
+
Oga.parse_html(raw_html)
|
21
|
+
rescue StandardError
|
22
|
+
'error'
|
23
|
+
end
|
24
|
+
|
25
|
+
def fetch_result(uri = '')
|
26
|
+
url = @site_url + uri
|
27
|
+
# try to open the url
|
28
|
+
document = get_xmldata(url)
|
29
|
+
# hard return on an error
|
30
|
+
return [] unless document != 'error'
|
31
|
+
|
32
|
+
items = document.xpath(@item_selector)
|
33
|
+
# loop through the items and get the title and price
|
34
|
+
items.map do |item|
|
35
|
+
title = item.xpath(@title_selector).text.force_encoding('UTF-8')
|
36
|
+
price = item.xpath(@price_selector).text
|
37
|
+
strip_filter(title, price)
|
38
|
+
end
|
39
|
+
@result
|
40
|
+
end
|
41
|
+
|
42
|
+
def strip_filter (title, price)
|
43
|
+
price = price.gsub!(/NT. /, '')
|
44
|
+
if !@price_filter.empty?
|
45
|
+
if eval("#{price} #{@price_filter}")
|
46
|
+
@result << { title: "#{title}", price: "#{price}" }
|
47
|
+
end
|
48
|
+
else
|
49
|
+
@result << { title: "#{title}", price: "#{price}" } unless title.empty?
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
|
54
|
+
public
|
55
|
+
|
56
|
+
def initialize
|
57
|
+
@result = []
|
58
|
+
# xml selectors that will be used to scrape data
|
59
|
+
@item_selector = "//div[@class=\'pditem\']/div[@class=\'pdicon\']"
|
60
|
+
@title_selector = "div[@class=\'pdicon_name\']/a"
|
61
|
+
@price_selector = "div[@class=\'pdicon_price\']/div[@style=\'font-weight:bold;\']"
|
62
|
+
@site_url = 'https://www.queenshop.com.tw/m/PDList2.asp?'
|
63
|
+
@price_filter = nil
|
64
|
+
end
|
65
|
+
|
66
|
+
def scrape (params=[])
|
67
|
+
params.concat(ARGV)
|
68
|
+
conf = QConfig.new(params)
|
69
|
+
@price_filter = conf.parameters[:price]
|
70
|
+
|
71
|
+
conf.pages.map do |page|
|
72
|
+
paginated_uri = "&page=#{page}"
|
73
|
+
fetch_result (paginated_uri)
|
74
|
+
end
|
75
|
+
@result
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
79
|
+
end
|
metadata
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: queenshop
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Even Chang
|
8
|
+
- Luis Herrera
|
9
|
+
- Katy Lee
|
10
|
+
- Frank Lee
|
11
|
+
autorequire:
|
12
|
+
bindir: bin
|
13
|
+
cert_chain: []
|
14
|
+
date: 2015-10-23 00:00:00.000000000 Z
|
15
|
+
dependencies: []
|
16
|
+
description: This is a gem scraping queenshop's website and returns the items with
|
17
|
+
corresponding prices
|
18
|
+
email:
|
19
|
+
- kiki44552002@gmail.com
|
20
|
+
- lmherrera86@gmail.com
|
21
|
+
- katylee41024@yahoo.com.tw
|
22
|
+
- frank1234211@gmail.com
|
23
|
+
executables:
|
24
|
+
- queenshop
|
25
|
+
extensions: []
|
26
|
+
extra_rdoc_files: []
|
27
|
+
files:
|
28
|
+
- bin/queenshop
|
29
|
+
- lib/queenshop.rb
|
30
|
+
- lib/queenshop/config.rb
|
31
|
+
- lib/queenshop/scraper.rb
|
32
|
+
homepage: http://rubygems.org/gems/pinkoi
|
33
|
+
licenses:
|
34
|
+
- MIT
|
35
|
+
metadata: {}
|
36
|
+
post_install_message:
|
37
|
+
rdoc_options: []
|
38
|
+
require_paths:
|
39
|
+
- lib
|
40
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
41
|
+
requirements:
|
42
|
+
- - ">="
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: '0'
|
45
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '0'
|
50
|
+
requirements: []
|
51
|
+
rubyforge_project:
|
52
|
+
rubygems_version: 2.5.0
|
53
|
+
signing_key:
|
54
|
+
specification_version: 4
|
55
|
+
summary: Scraper for Queenshop
|
56
|
+
test_files: []
|