pinkoi 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: fea9cf5201f4af10ea864be7388516e8379ae3f6
4
+ data.tar.gz: 7252bfb82bf9d0d45ccd934781cd49b8aed25689
5
+ SHA512:
6
+ metadata.gz: 176945132328d8f1084970df9aeaf0ffc4d478fe128c4a8f328218e055f8f29e99ec81ce240363c5fcf54fdd6de08f4979ffd938aed2761536f15741bf463760
7
+ data.tar.gz: 86b02d047ca377b6408a6e938619da33519b4af4fde86d11f861b2b77c08071907b2d53f2acc7022d648cb68e01d65c377a31e7afac98b852a7d13bd0111e6f8
data/bin/pinkoi ADDED
@@ -0,0 +1 @@
1
+ pinkoi.rb
@@ -0,0 +1,51 @@
1
+ # this class takes care of
2
+ # parsing the parameters
3
+ # creating url
4
+ class ParameterParser
5
+ private
6
+
7
+ @@valid_args = [:category, :price, :subcategory, :location, :material]
8
+
9
+ def validate_args
10
+ # the home page is ajax loaded so a default parameter is set
11
+ # to force the scraper to get data
12
+ ARGV[0] = '--category=1' if ARGV.empty?
13
+ ARGV.each do |arg|
14
+ begin
15
+ match = /--(?<key>.*?)=(?<value>.*)/.match(arg)
16
+ fail unless @@valid_args.include?(match[:key].to_sym)
17
+ # should also validate value to match numbers
18
+ # in other function
19
+ @parameters[match[:key]] = match[:value]
20
+ rescue StandardError
21
+ abort "invalid usage...\n" << usage << "\n\n"
22
+ end
23
+ end
24
+ end # end validate_args
25
+
26
+ public
27
+
28
+ def initialize
29
+ @parameters = {}
30
+ validate_args
31
+ end
32
+
33
+ def build_uri
34
+ # create a string based on the parameters
35
+ # assuming there was error check (big assumption for now)
36
+ @parameters.map { |k, v| "#{k}=#{v}" }.join('&')
37
+ end
38
+
39
+ def usage
40
+ 'Usage: pinkoi [options]
41
+ --category=(int)
42
+ --price=[int[,int]]
43
+ --location=[US, JP, HK, CN, TW]
44
+ --subcategory=(int)
45
+ --material=(int[,int,int,...])
46
+ examples:
47
+ pinkoi --category=1 --price=300
48
+ pinkoi --category=1 --price=0,100
49
+ pinkoi --category=1 --subcategory=205 --price=0,100'
50
+ end
51
+ end
@@ -0,0 +1,52 @@
1
+ #!/usr/bin/env ruby
2
+ require 'oga'
3
+ require 'open-uri'
4
+
5
+ # scrape data
6
+ module PinkoiScraper
7
+ # filter class basically uses xpath selectors to get attribs
8
+ class Filter
9
+ attr_reader :result
10
+ attr_writer :item_selector
11
+ attr_writer :title_selector
12
+ attr_writer :price_selector
13
+ attr_writer :site_url
14
+
15
+ private
16
+
17
+ def get_xmldata(url)
18
+ raw_html = open(url)
19
+ Oga.parse_html(raw_html)
20
+ rescue StandardError
21
+ 'error'
22
+ end
23
+
24
+ public
25
+
26
+ def initialize
27
+ @result = []
28
+ # xml selectors that will be used to scrape data
29
+ @item_selector = "//div[contains(@class,\'items\')]/div"
30
+ @title_selector = "div[contains(@class,\'title\')]"
31
+ @price_selector = "div[@class=\'info\']/div[@class=\'price\']"
32
+ @site_url = 'http://www.pinkoi.com/browse?'
33
+ end
34
+
35
+ def fetch_result(uri = 'category=1')
36
+ url = @site_url + uri
37
+ # try to open the url
38
+ document = get_xmldata(url)
39
+ # hard return on an error
40
+ return [] unless document != 'error'
41
+
42
+ items = document.xpath(@item_selector)
43
+ # loop through the items and get the title and price
44
+ items.map do |item|
45
+ title = item.xpath(@title_selector).text
46
+ price = item.xpath(@price_selector).text
47
+ @result << { title: "#{title}", price: "#{price}" } unless title.empty?
48
+ end
49
+ result
50
+ end
51
+ end
52
+ end
data/lib/pinkoi.rb ADDED
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env ruby
2
+ require_relative 'pinkoi/parameter_parser'
3
+ require_relative 'pinkoi/pinkoi_scraper'
metadata ADDED
@@ -0,0 +1,56 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pinkoi
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Even Chang
8
+ - Luis Herrera
9
+ - Katy Lee
10
+ - Frank Lee
11
+ autorequire:
12
+ bindir: bin
13
+ cert_chain: []
14
+ date: 2015-10-17 00:00:00.000000000 Z
15
+ dependencies: []
16
+ description: This is a gem scraping pinkoi's website and returb the first two pages
17
+ of items
18
+ email:
19
+ - kiki44552002@gmail.com
20
+ - lmherrera86@gmail.com
21
+ - katylee41024@yahoo.com.tw
22
+ - frank1234211@gmail.com
23
+ executables:
24
+ - pinkoi
25
+ extensions: []
26
+ extra_rdoc_files: []
27
+ files:
28
+ - bin/pinkoi
29
+ - lib/pinkoi.rb
30
+ - lib/pinkoi/parameter_parser.rb
31
+ - lib/pinkoi/pinkoi_scraper.rb
32
+ homepage: http://rubygems.org/gems/pinkoi
33
+ licenses:
34
+ - MIT
35
+ metadata: {}
36
+ post_install_message:
37
+ rdoc_options: []
38
+ require_paths:
39
+ - lib
40
+ required_ruby_version: !ruby/object:Gem::Requirement
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
45
+ required_rubygems_version: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: '0'
50
+ requirements: []
51
+ rubyforge_project:
52
+ rubygems_version: 2.5.0
53
+ signing_key:
54
+ specification_version: 4
55
+ summary: Scraper for Pinkoi
56
+ test_files: []