joyceshop 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: a3acee65aadff3a04affd99a74cdf5504632c695
4
+ data.tar.gz: 2709281e4846fe17d1a70649554aa87ddbea2b81
5
+ SHA512:
6
+ metadata.gz: 2f512e054122cfe33a784207d417de2539687646cf23127dbefe5719270c78c6e7f8740c7103c4242a1ab506b1af2f934a733389c48ba05624243e3cfc3d0d5f
7
+ data.tar.gz: af79d4953a8ad049e69f7074ca7bbc575ecc5e3df6518a9c6b53b08b0006f6b67ab46a94a625de4be3cc608f47999e7dc00bc9d2ec6f74912dd44dc249953632
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+ # require 'joyceshop' # for production
3
+ require_relative '../lib/joyceshop.rb' # for testing
4
+
5
+ scraper = JoyceShop::Scraper.new()
6
+ puts scraper.accessories(1)
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env ruby
2
+ require_relative 'joyceshop/scraper'
@@ -0,0 +1,101 @@
1
+ #!/usr/bin/env ruby
2
+ require 'oga'
3
+ require 'open-uri'
4
+
5
+ # scrape data
6
+ module JoyceShop
7
+ class Scraper
8
+ # URI
9
+ @@BASE_URI = 'https://www.joyce-shop.com'
10
+ @@LATEST_URI = "#{@@BASE_URI}/PDList.asp?brand=01&item1=&item2=&ya19=&keyword=&recommand=1412170001&ob=F"
11
+ @@POPULAR_URI = "#{@@BASE_URI}/PDList.asp?brand=01&item1=&item2=&ya19=&keyword=&recommand=1305080002&ob=F"
12
+ @@TOPS_URI = "#{@@BASE_URI}/PDList.asp?brand=01&item1=110&item2=111&ya19=&keyword=&recommand=&ob=F"
13
+ @@PANTS_URI = "#{@@BASE_URI}/PDList.asp?brand=01&item1=120&item2=121&ya19=&keyword=&recommand=&ob=F"
14
+ @@ACCESSORIES_URI = "#{@@BASE_URI}/PDList.asp?brand=01&item1=140&item2=141&ya19=&keyword=&recommand=&ob=F"
15
+
16
+ # Selectors
17
+ @@ITEM_SELECTOR = "//div[contains(@class, 'NEW_shop_list')]/ul/li/div[contains(@class, 'NEW_shop_list_pic')]"
18
+ @@LINK_SELECTOR = 'a'
19
+ @@IMAGE_SELECTOR = "a/img[contains(@class, 'lazyload')]"
20
+ @@ITEM_INFO_SELECTOR = "div[contains(@class, 'NEW_shop_list_info')]"
21
+ @@TITLE_SELECTOR = "#{@@ITEM_INFO_SELECTOR}/div[1]"
22
+ @@PRICE_SELECTOR = "#{@@ITEM_INFO_SELECTOR}/span"
23
+
24
+ # Regular
25
+ @@TITLE_REGEX = /([.\p{Han}[a-zA-Z]]+)/
26
+
27
+ def latest(page)
28
+ uri = uri_with_page(@@LATEST_URI, page)
29
+ body = fetch_data(uri)
30
+ filter(body)
31
+ end
32
+
33
+ def popular(page)
34
+ uri = uri_with_page(@@POPULAR_URI, page)
35
+ body = fetch_data(uri)
36
+ filter(body)
37
+ end
38
+
39
+ def tops(page)
40
+ uri = uri_with_page(@@TOPS_URI, page)
41
+ body = fetch_data(uri)
42
+ filter(body)
43
+ end
44
+
45
+ def pants(page)
46
+ uri = uri_with_page(@@PANTS_URI, page)
47
+ body = fetch_data(uri)
48
+ filter(body)
49
+ end
50
+
51
+ def accessories(page)
52
+ uri = uri_with_page(@@ACCESSORIES_URI, page)
53
+ body = fetch_data(uri)
54
+ filter(body)
55
+ end
56
+
57
+ private
58
+ def uri_with_page(uri, page)
59
+ "#{uri}&pageno=#{page}"
60
+ end
61
+
62
+ def fetch_data(uri)
63
+ open(uri) {|file| file.read}
64
+ end
65
+
66
+ def filter(raw)
67
+ Oga.parse_html(raw)
68
+ .xpath(@@ITEM_SELECTOR)
69
+ .map { |item| parse(item) }
70
+ end
71
+
72
+ def parse(item)
73
+ {
74
+ title: extract_title(item),
75
+ price: extract_price(item),
76
+ images: extract_images(item),
77
+ link: extract_link(item)
78
+ }
79
+ end
80
+
81
+ def extract_title(item)
82
+ item.xpath(@@TITLE_SELECTOR).text
83
+ .scan(@@TITLE_REGEX)
84
+ .flatten[0]
85
+ end
86
+
87
+ def extract_price(item)
88
+ item.xpath(@@PRICE_SELECTOR).text.to_i
89
+ end
90
+
91
+ def extract_images(item)
92
+ image = item.xpath(@@IMAGE_SELECTOR).attribute(:src).first.value
93
+ image_hover = image.sub(/\.jpg/, '-h.jpg')
94
+ ["#{@@BASE_URI}#{image}", "#{@@BASE_URI}#{image_hover}"]
95
+ end
96
+
97
+ def extract_link(item)
98
+ "#{@@BASE_URI}/#{item.xpath(@@LINK_SELECTOR).attribute(:href).first.value}"
99
+ end
100
+ end
101
+ end
metadata ADDED
@@ -0,0 +1,55 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: joyceshop
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Even Chang
8
+ - Luis Herrera
9
+ - Katy Lee
10
+ - Frank Lee
11
+ autorequire:
12
+ bindir: bin
13
+ cert_chain: []
14
+ date: 2015-12-14 00:00:00.000000000 Z
15
+ dependencies: []
16
+ description: This is a gem scraping joyceshop's website and returns the popular/latest
17
+ items
18
+ email:
19
+ - kiki44552002@gmail.com
20
+ - lmherrera86@gmail.com
21
+ - katylee41024@yahoo.com.tw
22
+ - frank1234211@gmail.com
23
+ executables:
24
+ - joyceshop
25
+ extensions: []
26
+ extra_rdoc_files: []
27
+ files:
28
+ - bin/joyceshop
29
+ - lib/joyceshop.rb
30
+ - lib/joyceshop/scraper.rb
31
+ homepage: http://rubygems.org/gems/joyceshop
32
+ licenses:
33
+ - MIT
34
+ metadata: {}
35
+ post_install_message:
36
+ rdoc_options: []
37
+ require_paths:
38
+ - lib
39
+ required_ruby_version: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ required_rubygems_version: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: '0'
49
+ requirements: []
50
+ rubyforge_project:
51
+ rubygems_version: 2.4.7
52
+ signing_key:
53
+ specification_version: 4
54
+ summary: Scraper for JoyceShop
55
+ test_files: []