pinkoi 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/pinkoi +1 -0
- data/lib/pinkoi/parameter_parser.rb +51 -0
- data/lib/pinkoi/pinkoi_scraper.rb +52 -0
- data/lib/pinkoi.rb +3 -0
- metadata +56 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: fea9cf5201f4af10ea864be7388516e8379ae3f6
|
4
|
+
data.tar.gz: 7252bfb82bf9d0d45ccd934781cd49b8aed25689
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 176945132328d8f1084970df9aeaf0ffc4d478fe128c4a8f328218e055f8f29e99ec81ce240363c5fcf54fdd6de08f4979ffd938aed2761536f15741bf463760
|
7
|
+
data.tar.gz: 86b02d047ca377b6408a6e938619da33519b4af4fde86d11f861b2b77c08071907b2d53f2acc7022d648cb68e01d65c377a31e7afac98b852a7d13bd0111e6f8
|
data/bin/pinkoi
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
pinkoi.rb
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# this class takes care of
|
2
|
+
# parsing the parameters
|
3
|
+
# creating url
|
4
|
+
class ParameterParser
|
5
|
+
private
|
6
|
+
|
7
|
+
@@valid_args = [:category, :price, :subcategory, :location, :material]
|
8
|
+
|
9
|
+
def validate_args
|
10
|
+
# the home page is ajax loaded so a default parameter is set
|
11
|
+
# to force the scraper to get data
|
12
|
+
ARGV[0] = '--category=1' if ARGV.empty?
|
13
|
+
ARGV.each do |arg|
|
14
|
+
begin
|
15
|
+
match = /--(?<key>.*?)=(?<value>.*)/.match(arg)
|
16
|
+
fail unless @@valid_args.include?(match[:key].to_sym)
|
17
|
+
# should also validate value to match numbers
|
18
|
+
# in other function
|
19
|
+
@parameters[match[:key]] = match[:value]
|
20
|
+
rescue StandardError
|
21
|
+
abort "invalid usage...\n" << usage << "\n\n"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end # end validate_args
|
25
|
+
|
26
|
+
public
|
27
|
+
|
28
|
+
def initialize
|
29
|
+
@parameters = {}
|
30
|
+
validate_args
|
31
|
+
end
|
32
|
+
|
33
|
+
def build_uri
|
34
|
+
# create a string based on the parameters
|
35
|
+
# assuming there was error check (big assumption for now)
|
36
|
+
@parameters.map { |k, v| "#{k}=#{v}" }.join('&')
|
37
|
+
end
|
38
|
+
|
39
|
+
def usage
|
40
|
+
'Usage: pinkoi [options]
|
41
|
+
--category=(int)
|
42
|
+
--price=[int[,int]]
|
43
|
+
--location=[US, JP, HK, CN, TW]
|
44
|
+
--subcategory=(int)
|
45
|
+
--material=(int[,int,int,...])
|
46
|
+
examples:
|
47
|
+
pinkoi --category=1 --price=300
|
48
|
+
pinkoi --category=1 --price=0,100
|
49
|
+
pinkoi --category=1 --subcategory=205 --price=0,100'
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'oga'
|
3
|
+
require 'open-uri'
|
4
|
+
|
5
|
+
# scrape data
|
6
|
+
module PinkoiScraper
|
7
|
+
# filter class basically uses xpath selectors to get attribs
|
8
|
+
class Filter
|
9
|
+
attr_reader :result
|
10
|
+
attr_writer :item_selector
|
11
|
+
attr_writer :title_selector
|
12
|
+
attr_writer :price_selector
|
13
|
+
attr_writer :site_url
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def get_xmldata(url)
|
18
|
+
raw_html = open(url)
|
19
|
+
Oga.parse_html(raw_html)
|
20
|
+
rescue StandardError
|
21
|
+
'error'
|
22
|
+
end
|
23
|
+
|
24
|
+
public
|
25
|
+
|
26
|
+
def initialize
|
27
|
+
@result = []
|
28
|
+
# xml selectors that will be used to scrape data
|
29
|
+
@item_selector = "//div[contains(@class,\'items\')]/div"
|
30
|
+
@title_selector = "div[contains(@class,\'title\')]"
|
31
|
+
@price_selector = "div[@class=\'info\']/div[@class=\'price\']"
|
32
|
+
@site_url = 'http://www.pinkoi.com/browse?'
|
33
|
+
end
|
34
|
+
|
35
|
+
def fetch_result(uri = 'category=1')
|
36
|
+
url = @site_url + uri
|
37
|
+
# try to open the url
|
38
|
+
document = get_xmldata(url)
|
39
|
+
# hard return on an error
|
40
|
+
return [] unless document != 'error'
|
41
|
+
|
42
|
+
items = document.xpath(@item_selector)
|
43
|
+
# loop through the items and get the title and price
|
44
|
+
items.map do |item|
|
45
|
+
title = item.xpath(@title_selector).text
|
46
|
+
price = item.xpath(@price_selector).text
|
47
|
+
@result << { title: "#{title}", price: "#{price}" } unless title.empty?
|
48
|
+
end
|
49
|
+
result
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
data/lib/pinkoi.rb
ADDED
metadata
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: pinkoi
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Even Chang
|
8
|
+
- Luis Herrera
|
9
|
+
- Katy Lee
|
10
|
+
- Frank Lee
|
11
|
+
autorequire:
|
12
|
+
bindir: bin
|
13
|
+
cert_chain: []
|
14
|
+
date: 2015-10-17 00:00:00.000000000 Z
|
15
|
+
dependencies: []
|
16
|
+
description: This is a gem scraping pinkoi's website and returb the first two pages
|
17
|
+
of items
|
18
|
+
email:
|
19
|
+
- kiki44552002@gmail.com
|
20
|
+
- lmherrera86@gmail.com
|
21
|
+
- katylee41024@yahoo.com.tw
|
22
|
+
- frank1234211@gmail.com
|
23
|
+
executables:
|
24
|
+
- pinkoi
|
25
|
+
extensions: []
|
26
|
+
extra_rdoc_files: []
|
27
|
+
files:
|
28
|
+
- bin/pinkoi
|
29
|
+
- lib/pinkoi.rb
|
30
|
+
- lib/pinkoi/parameter_parser.rb
|
31
|
+
- lib/pinkoi/pinkoi_scraper.rb
|
32
|
+
homepage: http://rubygems.org/gems/pinkoi
|
33
|
+
licenses:
|
34
|
+
- MIT
|
35
|
+
metadata: {}
|
36
|
+
post_install_message:
|
37
|
+
rdoc_options: []
|
38
|
+
require_paths:
|
39
|
+
- lib
|
40
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
41
|
+
requirements:
|
42
|
+
- - ">="
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: '0'
|
45
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '0'
|
50
|
+
requirements: []
|
51
|
+
rubyforge_project:
|
52
|
+
rubygems_version: 2.5.0
|
53
|
+
signing_key:
|
54
|
+
specification_version: 4
|
55
|
+
summary: Scraper for Pinkoi
|
56
|
+
test_files: []
|