shopee 0.2.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +6 -6
- data/bin/shopee +5 -6
- data/lib/shopee.rb +38 -32
- data/lib/shopee/version.rb +1 -1
- data/spec/shopee_spec.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 755028be4d2f5bdc40ddbcd3d607bb4bae24e8ad
|
4
|
+
data.tar.gz: a5428a405378f2370d7756a345ae3a37ab0ac34d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c12cb36534a9357af5aacc0a631ebac53137b02f3e9f41af9866901a4e88243ef1741db50d7184bc011233ec2394034b91f676579dcfb8b78a3efac6f4dfe60a
|
7
|
+
data.tar.gz: 0035557bdd8d291913fe8b60fa2298b3966baafca47d79f6762ab6c5011ca54dffd8f23f3c57969d3cdce481fa62357933ad2d713a3b00051f61ceab5a68c022
|
data/README.md
CHANGED
@@ -21,10 +21,10 @@ $ gem install shopee
|
|
21
21
|
$ shopee allcate
|
22
22
|
|
23
23
|
## 2. List goods by a category
|
24
|
-
$ shopee list [category_name]
|
24
|
+
$ shopee list [category_name] [page_number]
|
25
25
|
|
26
26
|
## 3. Find top number goods in a category by the keyword
|
27
|
-
$ shopee search [category_name] [keyword] [
|
27
|
+
$ shopee search [category_name] [keyword] [number_of_product]
|
28
28
|
|
29
29
|
```
|
30
30
|
|
@@ -191,7 +191,8 @@ GPS
|
|
191
191
|
require 'shopee'
|
192
192
|
##Example :category is '電腦資訊'
|
193
193
|
category_name = '電腦資訊'
|
194
|
-
|
194
|
+
page = 1
|
195
|
+
shopeecate = ShopeeScrape::ShopeeListGoodsByCate.new(category_name, page)
|
195
196
|
puts shopeecate.goods
|
196
197
|
```
|
197
198
|
|
@@ -235,9 +236,8 @@ puts shopeecate.goods
|
|
235
236
|
```ruby
|
236
237
|
require 'shopee'
|
237
238
|
##Example :category is '電腦資訊'; keyword is 'samsung'
|
238
|
-
shopeecate = ShopeeScrape::ShopeeListGoodsByCate.new(
|
239
|
-
|
240
|
-
puts shopeecate.search_keyword(goods, keyword, num)
|
239
|
+
shopeecate = ShopeeScrape::ShopeeListGoodsByCate.new()
|
240
|
+
puts shopeecate.search_keyword('電腦資訊', 'Asus', '10')
|
241
241
|
```
|
242
242
|
- Expected output
|
243
243
|
```
|
data/bin/shopee
CHANGED
@@ -12,11 +12,11 @@ Commander.configure do
|
|
12
12
|
program :description, 'Scrape informations from Mobile01'
|
13
13
|
|
14
14
|
command :list do |c|
|
15
|
-
c.syntax = 'shopee list [
|
15
|
+
c.syntax = 'shopee list [category] [page]'
|
16
16
|
c.description = 'List goods by the category'
|
17
17
|
|
18
18
|
c.action do |args, options|
|
19
|
-
shopeecate = ShopeeScrape::ShopeeListGoodsByCate.new(args[0])
|
19
|
+
shopeecate = ShopeeScrape::ShopeeListGoodsByCate.new(args[0], args[1])
|
20
20
|
puts shopeecate.goods
|
21
21
|
end
|
22
22
|
end
|
@@ -32,13 +32,12 @@ Commander.configure do
|
|
32
32
|
end
|
33
33
|
|
34
34
|
command :search do |c|
|
35
|
-
c.syntax = 'shopee search [
|
35
|
+
c.syntax = 'shopee search [category] [keyword] [number of products]'
|
36
36
|
c.description = 'search similar good'
|
37
37
|
|
38
38
|
c.action do |args, options|
|
39
|
-
shopeecate = ShopeeScrape::ShopeeListGoodsByCate.new(
|
40
|
-
|
41
|
-
puts shopeecate.search_keyword(goods, args[1], args[2])
|
39
|
+
shopeecate = ShopeeScrape::ShopeeListGoodsByCate.new()
|
40
|
+
puts shopeecate.search_keyword(args[0], args[1], args[2])
|
42
41
|
end
|
43
42
|
end
|
44
43
|
|
data/lib/shopee.rb
CHANGED
@@ -4,6 +4,10 @@ require 'json'
|
|
4
4
|
require 'fuzzystringmatch'
|
5
5
|
|
6
6
|
module ShopeeScrape
|
7
|
+
|
8
|
+
# =============================
|
9
|
+
# Class For List products by category and search products with keyword
|
10
|
+
# =============================
|
7
11
|
class ShopeeListGoodsByCate
|
8
12
|
|
9
13
|
require_relative './data/mobile_category'
|
@@ -13,51 +17,40 @@ module ShopeeScrape
|
|
13
17
|
GOOD_NUM = "//div[contains(@class, 'num')]"
|
14
18
|
GOOD_UPTIME = "//div[contains(@class, 'updated')]"
|
15
19
|
|
16
|
-
def initialize(category)
|
17
|
-
|
20
|
+
def initialize(category=nil, page=nil)
|
21
|
+
puts category, page
|
22
|
+
if !category.nil? && !page.nil?
|
23
|
+
get_page_html(category, page)
|
24
|
+
end
|
18
25
|
end
|
19
26
|
|
20
27
|
def goods
|
21
28
|
@goods ||= extract_goods
|
22
29
|
end
|
23
30
|
|
24
|
-
def search_keyword(
|
25
|
-
@similar ||= find_similiar_goods(
|
31
|
+
def search_keyword(category, keyword, list_num)
|
32
|
+
@similar ||= find_similiar_goods(category, keyword, list_num)
|
26
33
|
end
|
27
34
|
|
28
35
|
private
|
29
36
|
|
30
|
-
def
|
37
|
+
def get_page_html(cate, page)
|
31
38
|
@document = []
|
32
|
-
url = ALL_LINK[
|
33
|
-
@document << Oga.parse_html(open(url))
|
34
|
-
|
35
|
-
page_num = 2
|
36
|
-
if CATEGORY_LIST.include?(id) == true
|
37
|
-
page_num = 5
|
38
|
-
end
|
39
|
+
url = ALL_LINK[cate]
|
39
40
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
begin
|
46
|
-
url_t = url +'&p='+i.to_s
|
47
|
-
@document << Oga.parse_html(open(url_t))
|
48
|
-
rescue
|
49
|
-
check = 0
|
41
|
+
if CATEGORY_LIST.include?(cate)
|
42
|
+
if page != 1
|
43
|
+
@document << Oga.parse_html(open(url << '&p=' << page.to_s))
|
44
|
+
else
|
45
|
+
@document << Oga.parse_html(open(url))
|
50
46
|
end
|
51
47
|
end
|
52
|
-
|
53
48
|
end
|
54
49
|
|
55
50
|
def extract_goods
|
56
|
-
|
57
51
|
results = []
|
58
52
|
|
59
53
|
@document.each do |doc|
|
60
|
-
|
61
54
|
name = []
|
62
55
|
price = []
|
63
56
|
num = []
|
@@ -83,7 +76,6 @@ module ShopeeScrape
|
|
83
76
|
|
84
77
|
doc.xpath(GOOD_INFO).map do |good|
|
85
78
|
link << good.css('a').attribute('href')[0].to_s
|
86
|
-
# puts good.css('img').attribute('src')[0].to_s
|
87
79
|
pic << good.css('img').attribute('src')[0].to_s
|
88
80
|
end
|
89
81
|
|
@@ -93,6 +85,7 @@ module ShopeeScrape
|
|
93
85
|
if number > 32
|
94
86
|
number = 32
|
95
87
|
end
|
88
|
+
|
96
89
|
for i in 2..number-1
|
97
90
|
element = {}
|
98
91
|
element['name'] = name[i]
|
@@ -105,17 +98,32 @@ module ShopeeScrape
|
|
105
98
|
end
|
106
99
|
|
107
100
|
end
|
101
|
+
|
108
102
|
puts results.length
|
109
103
|
results
|
110
104
|
end
|
111
105
|
|
112
|
-
def find_similiar_goods(
|
106
|
+
def find_similiar_goods(category, keyword, list_num)
|
107
|
+
goods = []
|
108
|
+
goodsSet = []
|
109
|
+
|
110
|
+
for i in 1..5
|
111
|
+
shopeecate = ShopeeScrape::ShopeeListGoodsByCate.new(category, i)
|
112
|
+
goodsSet << shopeecate.goods
|
113
|
+
end
|
114
|
+
|
115
|
+
goodsSet.each do |good|
|
116
|
+
good.each do |g|
|
117
|
+
goods << g
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
113
121
|
jarow = FuzzyStringMatch::JaroWinkler.create( :native )
|
114
122
|
rank = {}
|
123
|
+
|
115
124
|
goods.each do |good|
|
116
|
-
|
117
|
-
|
118
|
-
rank[good_name] = value
|
125
|
+
value = jarow.getDistance(good['name'] ,keyword)
|
126
|
+
rank[good['name']] = value
|
119
127
|
end
|
120
128
|
|
121
129
|
rank_after_sort = Hash[rank.sort_by{|k, v| v}.reverse]
|
@@ -136,8 +144,6 @@ module ShopeeScrape
|
|
136
144
|
end
|
137
145
|
end
|
138
146
|
|
139
|
-
|
140
|
-
|
141
147
|
# =============================
|
142
148
|
# List all category of mobile01
|
143
149
|
# =============================
|
data/lib/shopee/version.rb
CHANGED
data/spec/shopee_spec.rb
CHANGED
@@ -157,7 +157,7 @@ end
|
|
157
157
|
|
158
158
|
VCR.use_cassette('shopee') do
|
159
159
|
# //It should be placed outside.
|
160
|
-
shopeecate = ShopeeScrape::ShopeeListGoodsByCate.new('電腦資訊')
|
160
|
+
shopeecate = ShopeeScrape::ShopeeListGoodsByCate.new('電腦資訊', 1)
|
161
161
|
describe 'Shopee testament using vcr' do
|
162
162
|
it 'should return an array of string and name of categories' do
|
163
163
|
good_list = shopeecate.goods
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: shopee
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sheng Jung Wu
|
@@ -125,7 +125,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
125
125
|
version: '0'
|
126
126
|
requirements: []
|
127
127
|
rubyforge_project:
|
128
|
-
rubygems_version: 2.
|
128
|
+
rubygems_version: 2.5.1
|
129
129
|
signing_key:
|
130
130
|
specification_version: 4
|
131
131
|
summary: Scrape categories of mobile01!
|