shopee 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +1 -0
- data/README.md +7 -7
- data/lib/data/mobile_category.rb +1 -0
- data/lib/shopee.rb +70 -30
- data/lib/shopee/version.rb +2 -2
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0dea88ac985e2a00925725ba22d00524f20f5686
|
4
|
+
data.tar.gz: cc396b07bf301732655b22c836f007f3e076cb52
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5960417b9ea1160df982eba61c4f10389d2afed77f278b038a820af1bf45570f9221ac7d8fe0e959c5b3843e52bc487602b40b115eeceef57db0a953fcfc356a
|
7
|
+
data.tar.gz: 6024ea6d9e1aac0d2d72c32ececa5592e0f607e3509646da1a4e9edfe4dbd4511b8fe0835d6b8d1c9d93a02fa426543b46d94dff8612aa909822f33dfdaf9fb4
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -20,11 +20,11 @@ $ gem install shopee
|
|
20
20
|
## 1. List all categories of market of Mobile01
|
21
21
|
$ shopee allcate
|
22
22
|
|
23
|
-
## 2. List
|
23
|
+
## 2. List goods by a category
|
24
24
|
$ shopee list [category_name]
|
25
25
|
|
26
|
-
## 3. Find top
|
27
|
-
$ shopee search [category_name] [keyword]
|
26
|
+
## 3. Find top number goods in a category by the keyword
|
27
|
+
$ shopee search [category_name] [keyword] [num]
|
28
28
|
|
29
29
|
```
|
30
30
|
|
@@ -186,7 +186,7 @@ GPS
|
|
186
186
|
```
|
187
187
|
|
188
188
|
|
189
|
-
**2. List
|
189
|
+
**2. List goods by a category**
|
190
190
|
```ruby
|
191
191
|
require 'shopee'
|
192
192
|
##Example :category is '電腦資訊'
|
@@ -231,13 +231,13 @@ puts shopeecate.goods
|
|
231
231
|
{"name"=>"全新未拆 羅技MX Master fervidity (5) 商品所在地:台中市 (可交換商品)", "price"=>"9,500元 10,000元", "num"=>"0", "update_time"=>"2015-11-15"}
|
232
232
|
{"name"=>"賣 99.99 新Samsung Galaxy Tab S 8.4 白3G/16G LTE ( 可通話平... 星空下的雨 (31) 商品所在地:新北市 (可交換商品)", "price"=>nil, "num"=>nil, "update_time"=>nil}
|
233
233
|
```
|
234
|
-
**3. Find top
|
234
|
+
**3. Find top num goods in a category by the keyword**
|
235
235
|
```ruby
|
236
236
|
require 'shopee'
|
237
237
|
##Example :category is '電腦資訊'; keyword is 'samsung'
|
238
|
-
|
238
|
+
shopeecate = ShopeeScrape::ShopeeListGoodsByCate.new(category)
|
239
239
|
goods = shopeecate.goods
|
240
|
-
puts shopeecate.search_keyword(goods, keyword)
|
240
|
+
puts shopeecate.search_keyword(goods, keyword, num)
|
241
241
|
```
|
242
242
|
- Expected output
|
243
243
|
```
|
data/lib/data/mobile_category.rb
CHANGED
@@ -140,3 +140,4 @@ ALL_LINK = { "電腦資訊"=>"http://www.mobile01.com/mpcatlist.php?c=1",
|
|
140
140
|
"南部地區"=>"http://www.mobile01.com/mpcatlist.php?c=90",
|
141
141
|
"東部與外島地區"=>"http://www.mobile01.com/mpcatlist.php?c=91"
|
142
142
|
}
|
143
|
+
CATEGORY_LIST = ["電腦資訊", "手持通訊", "攝影器材", "數位家電", "休閒旅遊", "生活用品", "汽車", "機車", "自行車", "男性時尚", "女性流行", "代購與虛擬物品", "房屋地產"]
|
data/lib/shopee.rb
CHANGED
@@ -7,6 +7,7 @@ module ShopeeScrape
|
|
7
7
|
class ShopeeListGoodsByCate
|
8
8
|
|
9
9
|
require_relative './data/mobile_category'
|
10
|
+
GOOD_INFO = "//div[contains(@class, 'img')]"
|
10
11
|
GOOD_NAME = "//div[contains(@class, 'subject')]"
|
11
12
|
GOOD_PRICE = "//div[contains(@class, 'price')]"
|
12
13
|
GOOD_NUM = "//div[contains(@class, 'num')]"
|
@@ -27,46 +28,85 @@ module ShopeeScrape
|
|
27
28
|
private
|
28
29
|
|
29
30
|
def parse_html(id)
|
31
|
+
@document = []
|
30
32
|
url = ALL_LINK[id]
|
31
|
-
@document
|
32
|
-
end
|
33
|
+
@document << Oga.parse_html(open(url))
|
33
34
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
num = []
|
38
|
-
update_time = []
|
39
|
-
@document.xpath(GOOD_NAME).map do |good|
|
40
|
-
name << good.text
|
35
|
+
page_num = 2
|
36
|
+
if CATEGORY_LIST.include?(id) == true
|
37
|
+
page_num = 5
|
41
38
|
end
|
42
39
|
|
43
|
-
|
44
|
-
|
40
|
+
check = 1
|
41
|
+
i = 1
|
42
|
+
|
43
|
+
while check == 1 && i < page_num
|
44
|
+
i += 1
|
45
|
+
begin
|
46
|
+
url_t = url +'&p='+i.to_s
|
47
|
+
open url_t, :proxy=>true
|
48
|
+
@document << Oga.parse_html(open(url_t))
|
49
|
+
rescue
|
50
|
+
check = 0
|
51
|
+
end
|
45
52
|
end
|
46
53
|
|
47
|
-
|
48
|
-
num << good.text
|
49
|
-
end
|
54
|
+
end
|
50
55
|
|
51
|
-
|
52
|
-
update_time << good.text
|
53
|
-
end
|
56
|
+
def extract_goods
|
54
57
|
|
55
|
-
number = name.length
|
56
58
|
results = []
|
57
|
-
# puts number
|
58
|
-
if number > 32
|
59
|
-
number = 32
|
60
|
-
end
|
61
|
-
for i in 2..number-1
|
62
|
-
element = {}
|
63
|
-
element['name'] = name[i]
|
64
|
-
element['price'] = price[i]
|
65
|
-
element['num'] = num[i]
|
66
|
-
element['update_time'] = update_time[i]
|
67
|
-
results << element
|
68
|
-
end
|
69
59
|
|
60
|
+
@document.each do |doc|
|
61
|
+
|
62
|
+
name = []
|
63
|
+
price = []
|
64
|
+
num = []
|
65
|
+
update_time = []
|
66
|
+
pic = []
|
67
|
+
link = []
|
68
|
+
|
69
|
+
doc.xpath(GOOD_NAME).map do |good|
|
70
|
+
name << good.text
|
71
|
+
end
|
72
|
+
|
73
|
+
doc.xpath(GOOD_PRICE).map do |good|
|
74
|
+
price << good.text
|
75
|
+
end
|
76
|
+
|
77
|
+
doc.xpath(GOOD_NUM).map do |good|
|
78
|
+
num << good.text
|
79
|
+
end
|
80
|
+
|
81
|
+
doc.xpath(GOOD_UPTIME).map do |good|
|
82
|
+
update_time << good.text
|
83
|
+
end
|
84
|
+
|
85
|
+
doc.xpath(GOOD_INFO).map do |good|
|
86
|
+
link << good.css('a').attribute('href')[0].to_s
|
87
|
+
# puts good.css('img').attribute('src')[0].to_s
|
88
|
+
pic << good.css('img').attribute('src')[0].to_s
|
89
|
+
end
|
90
|
+
|
91
|
+
number = name.length
|
92
|
+
|
93
|
+
# puts number
|
94
|
+
if number > 32
|
95
|
+
number = 32
|
96
|
+
end
|
97
|
+
for i in 2..number-1
|
98
|
+
element = {}
|
99
|
+
element['name'] = name[i]
|
100
|
+
element['price'] = price[i]
|
101
|
+
element['num'] = num[i]
|
102
|
+
element['update_time'] = update_time[i]
|
103
|
+
element['link'] = link[i]
|
104
|
+
element['pic'] = pic[i]
|
105
|
+
results << element
|
106
|
+
end
|
107
|
+
|
108
|
+
end
|
109
|
+
puts results.length
|
70
110
|
results
|
71
111
|
end
|
72
112
|
|
data/lib/shopee/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: shopee
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sheng Jung Wu
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date:
|
14
|
+
date: 2016-01-09 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: minitest
|
@@ -125,7 +125,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
125
125
|
version: '0'
|
126
126
|
requirements: []
|
127
127
|
rubyforge_project:
|
128
|
-
rubygems_version: 2.4.
|
128
|
+
rubygems_version: 2.4.6
|
129
129
|
signing_key:
|
130
130
|
specification_version: 4
|
131
131
|
summary: Scrape categories of mobile01!
|