joyceshop 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: a3acee65aadff3a04affd99a74cdf5504632c695
4
+ data.tar.gz: 2709281e4846fe17d1a70649554aa87ddbea2b81
5
+ SHA512:
6
+ metadata.gz: 2f512e054122cfe33a784207d417de2539687646cf23127dbefe5719270c78c6e7f8740c7103c4242a1ab506b1af2f934a733389c48ba05624243e3cfc3d0d5f
7
+ data.tar.gz: af79d4953a8ad049e69f7074ca7bbc575ecc5e3df6518a9c6b53b08b0006f6b67ab46a94a625de4be3cc608f47999e7dc00bc9d2ec6f74912dd44dc249953632
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+ # require 'joyceshop' # for production
3
+ require_relative '../lib/joyceshop.rb' # for testing
4
+
5
+ scraper = JoyceShop::Scraper.new()
6
+ puts scraper.accessories(1)
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env ruby
2
+ require_relative 'joyceshop/scraper'
@@ -0,0 +1,101 @@
1
+ #!/usr/bin/env ruby
2
+ require 'oga'
3
+ require 'open-uri'
4
+
5
+ # scrape data
6
+ module JoyceShop
7
+ class Scraper
8
+ # URI
9
+ @@BASE_URI = 'https://www.joyce-shop.com'
10
+ @@LATEST_URI = "#{@@BASE_URI}/PDList.asp?brand=01&item1=&item2=&ya19=&keyword=&recommand=1412170001&ob=F"
11
+ @@POPULAR_URI = "#{@@BASE_URI}/PDList.asp?brand=01&item1=&item2=&ya19=&keyword=&recommand=1305080002&ob=F"
12
+ @@TOPS_URI = "#{@@BASE_URI}/PDList.asp?brand=01&item1=110&item2=111&ya19=&keyword=&recommand=&ob=F"
13
+ @@PANTS_URI = "#{@@BASE_URI}/PDList.asp?brand=01&item1=120&item2=121&ya19=&keyword=&recommand=&ob=F"
14
+ @@ACCESSORIES_URI = "#{@@BASE_URI}/PDList.asp?brand=01&item1=140&item2=141&ya19=&keyword=&recommand=&ob=F"
15
+
16
+ # Selectors
17
+ @@ITEM_SELECTOR = "//div[contains(@class, 'NEW_shop_list')]/ul/li/div[contains(@class, 'NEW_shop_list_pic')]"
18
+ @@LINK_SELECTOR = 'a'
19
+ @@IMAGE_SELECTOR = "a/img[contains(@class, 'lazyload')]"
20
+ @@ITEM_INFO_SELECTOR = "div[contains(@class, 'NEW_shop_list_info')]"
21
+ @@TITLE_SELECTOR = "#{@@ITEM_INFO_SELECTOR}/div[1]"
22
+ @@PRICE_SELECTOR = "#{@@ITEM_INFO_SELECTOR}/span"
23
+
24
+ # Regular
25
+ @@TITLE_REGEX = /([.\p{Han}[a-zA-Z]]+)/
26
+
27
+ def latest(page)
28
+ uri = uri_with_page(@@LATEST_URI, page)
29
+ body = fetch_data(uri)
30
+ filter(body)
31
+ end
32
+
33
+ def popular(page)
34
+ uri = uri_with_page(@@POPULAR_URI, page)
35
+ body = fetch_data(uri)
36
+ filter(body)
37
+ end
38
+
39
+ def tops(page)
40
+ uri = uri_with_page(@@TOPS_URI, page)
41
+ body = fetch_data(uri)
42
+ filter(body)
43
+ end
44
+
45
+ def pants(page)
46
+ uri = uri_with_page(@@PANTS_URI, page)
47
+ body = fetch_data(uri)
48
+ filter(body)
49
+ end
50
+
51
+ def accessories(page)
52
+ uri = uri_with_page(@@ACCESSORIES_URI, page)
53
+ body = fetch_data(uri)
54
+ filter(body)
55
+ end
56
+
57
+ private
58
+ def uri_with_page(uri, page)
59
+ "#{uri}&pageno=#{page}"
60
+ end
61
+
62
+ def fetch_data(uri)
63
+ open(uri) {|file| file.read}
64
+ end
65
+
66
+ def filter(raw)
67
+ Oga.parse_html(raw)
68
+ .xpath(@@ITEM_SELECTOR)
69
+ .map { |item| parse(item) }
70
+ end
71
+
72
+ def parse(item)
73
+ {
74
+ title: extract_title(item),
75
+ price: extract_price(item),
76
+ images: extract_images(item),
77
+ link: extract_link(item)
78
+ }
79
+ end
80
+
81
+ def extract_title(item)
82
+ item.xpath(@@TITLE_SELECTOR).text
83
+ .scan(@@TITLE_REGEX)
84
+ .flatten[0]
85
+ end
86
+
87
+ def extract_price(item)
88
+ item.xpath(@@PRICE_SELECTOR).text.to_i
89
+ end
90
+
91
+ def extract_images(item)
92
+ image = item.xpath(@@IMAGE_SELECTOR).attribute(:src).first.value
93
+ image_hover = image.sub(/\.jpg/, '-h.jpg')
94
+ ["#{@@BASE_URI}#{image}", "#{@@BASE_URI}#{image_hover}"]
95
+ end
96
+
97
+ def extract_link(item)
98
+ "#{@@BASE_URI}/#{item.xpath(@@LINK_SELECTOR).attribute(:href).first.value}"
99
+ end
100
+ end
101
+ end
metadata ADDED
@@ -0,0 +1,55 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: joyceshop
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Even Chang
8
+ - Luis Herrera
9
+ - Katy Lee
10
+ - Frank Lee
11
+ autorequire:
12
+ bindir: bin
13
+ cert_chain: []
14
+ date: 2015-12-14 00:00:00.000000000 Z
15
+ dependencies: []
16
+ description: This is a gem scraping joyceshop's website and returns the popular/latest
17
+ items
18
+ email:
19
+ - kiki44552002@gmail.com
20
+ - lmherrera86@gmail.com
21
+ - katylee41024@yahoo.com.tw
22
+ - frank1234211@gmail.com
23
+ executables:
24
+ - joyceshop
25
+ extensions: []
26
+ extra_rdoc_files: []
27
+ files:
28
+ - bin/joyceshop
29
+ - lib/joyceshop.rb
30
+ - lib/joyceshop/scraper.rb
31
+ homepage: http://rubygems.org/gems/joyceshop
32
+ licenses:
33
+ - MIT
34
+ metadata: {}
35
+ post_install_message:
36
+ rdoc_options: []
37
+ require_paths:
38
+ - lib
39
+ required_ruby_version: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ required_rubygems_version: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: '0'
49
+ requirements: []
50
+ rubyforge_project:
51
+ rubygems_version: 2.4.7
52
+ signing_key:
53
+ specification_version: 4
54
+ summary: Scraper for JoyceShop
55
+ test_files: []