pinkoi 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/pinkoi +1 -0
- data/lib/pinkoi/parameter_parser.rb +51 -0
- data/lib/pinkoi/pinkoi_scraper.rb +52 -0
- data/lib/pinkoi.rb +3 -0
- metadata +56 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: fea9cf5201f4af10ea864be7388516e8379ae3f6
|
4
|
+
data.tar.gz: 7252bfb82bf9d0d45ccd934781cd49b8aed25689
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 176945132328d8f1084970df9aeaf0ffc4d478fe128c4a8f328218e055f8f29e99ec81ce240363c5fcf54fdd6de08f4979ffd938aed2761536f15741bf463760
|
7
|
+
data.tar.gz: 86b02d047ca377b6408a6e938619da33519b4af4fde86d11f861b2b77c08071907b2d53f2acc7022d648cb68e01d65c377a31e7afac98b852a7d13bd0111e6f8
|
data/bin/pinkoi
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
pinkoi.rb
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# this class takes care of
|
2
|
+
# parsing the parameters
|
3
|
+
# creating url
|
4
|
+
class ParameterParser
|
5
|
+
private
|
6
|
+
|
7
|
+
@@valid_args = [:category, :price, :subcategory, :location, :material]
|
8
|
+
|
9
|
+
def validate_args
|
10
|
+
# the home page is ajax loaded so a default parameter is set
|
11
|
+
# to force the scraper to get data
|
12
|
+
ARGV[0] = '--category=1' if ARGV.empty?
|
13
|
+
ARGV.each do |arg|
|
14
|
+
begin
|
15
|
+
match = /--(?<key>.*?)=(?<value>.*)/.match(arg)
|
16
|
+
fail unless @@valid_args.include?(match[:key].to_sym)
|
17
|
+
# should also validate value to match numbers
|
18
|
+
# in other function
|
19
|
+
@parameters[match[:key]] = match[:value]
|
20
|
+
rescue StandardError
|
21
|
+
abort "invalid usage...\n" << usage << "\n\n"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end # end validate_args
|
25
|
+
|
26
|
+
public
|
27
|
+
|
28
|
+
def initialize
|
29
|
+
@parameters = {}
|
30
|
+
validate_args
|
31
|
+
end
|
32
|
+
|
33
|
+
def build_uri
|
34
|
+
# create a string based on the parameters
|
35
|
+
# assuming there was error check (big assumption for now)
|
36
|
+
@parameters.map { |k, v| "#{k}=#{v}" }.join('&')
|
37
|
+
end
|
38
|
+
|
39
|
+
def usage
|
40
|
+
'Usage: pinkoi [options]
|
41
|
+
--category=(int)
|
42
|
+
--price=[int[,int]]
|
43
|
+
--location=[US, JP, HK, CN, TW]
|
44
|
+
--subcategory=(int)
|
45
|
+
--material=(int[,int,int,...])
|
46
|
+
examples:
|
47
|
+
pinkoi --category=1 --price=300
|
48
|
+
pinkoi --category=1 --price=0,100
|
49
|
+
pinkoi --category=1 --subcategory=205 --price=0,100'
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'oga'
|
3
|
+
require 'open-uri'
|
4
|
+
|
5
|
+
# scrape data
|
6
|
+
module PinkoiScraper
|
7
|
+
# filter class basically uses xpath selectors to get attribs
|
8
|
+
class Filter
|
9
|
+
attr_reader :result
|
10
|
+
attr_writer :item_selector
|
11
|
+
attr_writer :title_selector
|
12
|
+
attr_writer :price_selector
|
13
|
+
attr_writer :site_url
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def get_xmldata(url)
|
18
|
+
raw_html = open(url)
|
19
|
+
Oga.parse_html(raw_html)
|
20
|
+
rescue StandardError
|
21
|
+
'error'
|
22
|
+
end
|
23
|
+
|
24
|
+
public
|
25
|
+
|
26
|
+
def initialize
|
27
|
+
@result = []
|
28
|
+
# xml selectors that will be used to scrape data
|
29
|
+
@item_selector = "//div[contains(@class,\'items\')]/div"
|
30
|
+
@title_selector = "div[contains(@class,\'title\')]"
|
31
|
+
@price_selector = "div[@class=\'info\']/div[@class=\'price\']"
|
32
|
+
@site_url = 'http://www.pinkoi.com/browse?'
|
33
|
+
end
|
34
|
+
|
35
|
+
def fetch_result(uri = 'category=1')
|
36
|
+
url = @site_url + uri
|
37
|
+
# try to open the url
|
38
|
+
document = get_xmldata(url)
|
39
|
+
# hard return on an error
|
40
|
+
return [] unless document != 'error'
|
41
|
+
|
42
|
+
items = document.xpath(@item_selector)
|
43
|
+
# loop through the items and get the title and price
|
44
|
+
items.map do |item|
|
45
|
+
title = item.xpath(@title_selector).text
|
46
|
+
price = item.xpath(@price_selector).text
|
47
|
+
@result << { title: "#{title}", price: "#{price}" } unless title.empty?
|
48
|
+
end
|
49
|
+
result
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
data/lib/pinkoi.rb
ADDED
metadata
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: pinkoi
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Even Chang
|
8
|
+
- Luis Herrera
|
9
|
+
- Katy Lee
|
10
|
+
- Frank Lee
|
11
|
+
autorequire:
|
12
|
+
bindir: bin
|
13
|
+
cert_chain: []
|
14
|
+
date: 2015-10-17 00:00:00.000000000 Z
|
15
|
+
dependencies: []
|
16
|
+
description: This is a gem scraping pinkoi's website and returb the first two pages
|
17
|
+
of items
|
18
|
+
email:
|
19
|
+
- kiki44552002@gmail.com
|
20
|
+
- lmherrera86@gmail.com
|
21
|
+
- katylee41024@yahoo.com.tw
|
22
|
+
- frank1234211@gmail.com
|
23
|
+
executables:
|
24
|
+
- pinkoi
|
25
|
+
extensions: []
|
26
|
+
extra_rdoc_files: []
|
27
|
+
files:
|
28
|
+
- bin/pinkoi
|
29
|
+
- lib/pinkoi.rb
|
30
|
+
- lib/pinkoi/parameter_parser.rb
|
31
|
+
- lib/pinkoi/pinkoi_scraper.rb
|
32
|
+
homepage: http://rubygems.org/gems/pinkoi
|
33
|
+
licenses:
|
34
|
+
- MIT
|
35
|
+
metadata: {}
|
36
|
+
post_install_message:
|
37
|
+
rdoc_options: []
|
38
|
+
require_paths:
|
39
|
+
- lib
|
40
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
41
|
+
requirements:
|
42
|
+
- - ">="
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: '0'
|
45
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '0'
|
50
|
+
requirements: []
|
51
|
+
rubyforge_project:
|
52
|
+
rubygems_version: 2.5.0
|
53
|
+
signing_key:
|
54
|
+
specification_version: 4
|
55
|
+
summary: Scraper for Pinkoi
|
56
|
+
test_files: []
|