joyceshop 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/joyceshop +2 -1
- data/lib/joyceshop/scraper.rb +26 -4
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2c4b143619c3fe55c48d39a9b419f39d6923fce0
|
4
|
+
data.tar.gz: 1b0fd41b0f9aff1067b680b43e2260a3f7f83cae
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1b7810cbdc56d47096794d986417082dbe14dc84122e8d0539dbbc2e1f0e4c243f98ba451be45642fafb93fc0f053c0c258e8e4c6fcc6b288caded95c1b961c7
|
7
|
+
data.tar.gz: bd71e91f8adda7003cd73357e1eb2495c4e262d30e39169242bca461f4baaf0caffc5925849c0a5526f3a9fc3c438e2c05b3df9eb72170b1036e40ef063aa02f
|
data/bin/joyceshop
CHANGED
data/lib/joyceshop/scraper.rb
CHANGED
@@ -1,10 +1,14 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
require 'oga'
|
3
|
+
require 'uri'
|
3
4
|
require 'open-uri'
|
4
5
|
|
5
6
|
# scrape data
|
6
7
|
module JoyceShop
|
7
8
|
class Scraper
|
9
|
+
# Types
|
10
|
+
@@VALID_TYPES = [:tops, :popular, :pants, :pants, :accessories, :latest]
|
11
|
+
|
8
12
|
# URI
|
9
13
|
@@BASE_URI = 'https://www.joyce-shop.com'
|
10
14
|
@@LATEST_URI = "#{@@BASE_URI}/PDList.asp?brand=01&item1=&item2=&ya19=&keyword=&recommand=1412170001&ob=F"
|
@@ -12,11 +16,12 @@ module JoyceShop
|
|
12
16
|
@@TOPS_URI = "#{@@BASE_URI}/PDList.asp?brand=01&item1=110&item2=111&ya19=&keyword=&recommand=&ob=F"
|
13
17
|
@@PANTS_URI = "#{@@BASE_URI}/PDList.asp?brand=01&item1=120&item2=121&ya19=&keyword=&recommand=&ob=F"
|
14
18
|
@@ACCESSORIES_URI = "#{@@BASE_URI}/PDList.asp?brand=01&item1=140&item2=141&ya19=&keyword=&recommand=&ob=F"
|
19
|
+
@@SEARCH_URI = "#{@@BASE_URI}/PDList.asp?"
|
15
20
|
|
16
21
|
# Selectors
|
17
22
|
@@ITEM_SELECTOR = "//div[contains(@class, 'NEW_shop_list')]/ul/li/div[contains(@class, 'NEW_shop_list_pic')]"
|
18
|
-
@@LINK_SELECTOR = 'a'
|
19
|
-
@@IMAGE_SELECTOR = "a/img[contains(@class, 'lazyload')]"
|
23
|
+
@@LINK_SELECTOR = 'a[1]/@href'
|
24
|
+
@@IMAGE_SELECTOR = "a/img[contains(@class, 'lazyload')]/@src"
|
20
25
|
@@ITEM_INFO_SELECTOR = "div[contains(@class, 'NEW_shop_list_info')]"
|
21
26
|
@@TITLE_SELECTOR = "#{@@ITEM_INFO_SELECTOR}/div[1]"
|
22
27
|
@@PRICE_SELECTOR = "#{@@ITEM_INFO_SELECTOR}/span"
|
@@ -58,6 +63,19 @@ module JoyceShop
|
|
58
63
|
data = parse_html(body)
|
59
64
|
filter(data, options)
|
60
65
|
end
|
66
|
+
|
67
|
+
def search(keyword, options={})
|
68
|
+
uri = uri_with_search(keyword)
|
69
|
+
body = fetch_data(uri)
|
70
|
+
data = parse_html(body)
|
71
|
+
filter(data, options)
|
72
|
+
end
|
73
|
+
|
74
|
+
def scrape(type, page, options = {})
|
75
|
+
abort "only supports #{@@VALID_TYPES}" unless @@VALID_TYPES.include?(type.to_sym)
|
76
|
+
|
77
|
+
method = self.method(type)
|
78
|
+
method.call(page, options)
|
61
79
|
end
|
62
80
|
|
63
81
|
private
|
@@ -65,6 +83,10 @@ module JoyceShop
|
|
65
83
|
"#{uri}&pageno=#{page}"
|
66
84
|
end
|
67
85
|
|
86
|
+
def uri_with_search(keyword)
|
87
|
+
"#{@@SEARCH_URI}keyword=#{URI.escape(keyword)}"
|
88
|
+
end
|
89
|
+
|
68
90
|
def fetch_data(uri)
|
69
91
|
open(uri) { |file| file.read }
|
70
92
|
end
|
@@ -116,13 +138,13 @@ module JoyceShop
|
|
116
138
|
end
|
117
139
|
|
118
140
|
def extract_images(item)
|
119
|
-
image = item.xpath(@@IMAGE_SELECTOR).
|
141
|
+
image = item.xpath(@@IMAGE_SELECTOR).text
|
120
142
|
image_hover = image.sub(/\.jpg/, '-h.jpg')
|
121
143
|
["#{@@BASE_URI}#{image}", "#{@@BASE_URI}#{image_hover}"]
|
122
144
|
end
|
123
145
|
|
124
146
|
def extract_link(item)
|
125
|
-
"#{@@BASE_URI}/#{item.xpath(@@LINK_SELECTOR).
|
147
|
+
"#{@@BASE_URI}/#{item.xpath(@@LINK_SELECTOR).text}"
|
126
148
|
end
|
127
149
|
end
|
128
150
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: joyceshop
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Even Chang
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2016-01-
|
14
|
+
date: 2016-01-04 00:00:00.000000000 Z
|
15
15
|
dependencies: []
|
16
16
|
description: This is a gem scraping joyceshop's website and returns the popular/latest
|
17
17
|
items
|