shopee 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3aff6cf07785ae43aa42c1f7d802ed94a74a1f53
4
- data.tar.gz: c9ac1da5895b9ed8d9e823b4f9a77bb6630aa83e
3
+ metadata.gz: 0dea88ac985e2a00925725ba22d00524f20f5686
4
+ data.tar.gz: cc396b07bf301732655b22c836f007f3e076cb52
5
5
  SHA512:
6
- metadata.gz: 902449372a269b53f95fadb46d434ada76547d44bf67242e2feb2b62482126bc9e176894fef5aa4314ad5054fc60c9aecac0189b368d9b07076dc8019d7aa63a
7
- data.tar.gz: da698c2812a2dccb9f88f1c39df063c1a1db922e00077a23108bf6c11b0d610f6930034aab6d5787da20c529e96d8b704fcd87d6807d894ced913b97f4c5e8b6
6
+ metadata.gz: 5960417b9ea1160df982eba61c4f10389d2afed77f278b038a820af1bf45570f9221ac7d8fe0e959c5b3843e52bc487602b40b115eeceef57db0a953fcfc356a
7
+ data.tar.gz: 6024ea6d9e1aac0d2d72c32ececa5592e0f607e3509646da1a4e9edfe4dbd4511b8fe0835d6b8d1c9d93a02fa426543b46d94dff8612aa909822f33dfdaf9fb4
data/Gemfile CHANGED
@@ -6,6 +6,7 @@ gem 'minitest'
6
6
  gem 'vcr'
7
7
  gem 'webmock'
8
8
  gem 'fuzzy-string-match'
9
+ gem 'commander', '~> 4.3', '>= 4.3.5'
9
10
 
10
11
  group :test do
11
12
  gem 'rake'
data/README.md CHANGED
@@ -20,11 +20,11 @@ $ gem install shopee
20
20
  ## 1. List all categories of market of Mobile01
21
21
  $ shopee allcate
22
22
 
23
- ## 2. List top 30 goods by a category
23
+ ## 2. List goods by a category
24
24
  $ shopee list [category_name]
25
25
 
26
- ## 3. Find top 3 goods in a category by the keyword
27
- $ shopee search [category_name] [keyword]
26
+ ## 3. Find top number goods in a category by the keyword
27
+ $ shopee search [category_name] [keyword] [num]
28
28
 
29
29
  ```
30
30
 
@@ -186,7 +186,7 @@ GPS
186
186
  ```
187
187
 
188
188
 
189
- **2. List top 30 goods by a category**
189
+ **2. List goods by a category**
190
190
  ```ruby
191
191
  require 'shopee'
192
192
  ##Example :category is '電腦資訊'
@@ -231,13 +231,13 @@ puts shopeecate.goods
231
231
  {"name"=>"全新未拆 羅技MX Master fervidity (5) 商品所在地:台中市 (可交換商品)", "price"=>"9,500元 10,000元", "num"=>"0", "update_time"=>"2015-11-15"}
232
232
  {"name"=>"賣 99.99 新Samsung Galaxy Tab S 8.4 白3G/16G LTE ( 可通話平... 星空下的雨 (31) 商品所在地:新北市 (可交換商品)", "price"=>nil, "num"=>nil, "update_time"=>nil}
233
233
  ```
234
- **3. Find top 3 goods in a category by the keyword**
234
+ **3. Find top num goods in a category by the keyword**
235
235
  ```ruby
236
236
  require 'shopee'
237
237
  ##Example :category is '電腦資訊'; keyword is 'samsung'
238
- cshopeecate = ShopeeScrape::ShopeeListGoodsByCate.new(category)
238
+ shopeecate = ShopeeScrape::ShopeeListGoodsByCate.new(category)
239
239
  goods = shopeecate.goods
240
- puts shopeecate.search_keyword(goods, keyword)
240
+ puts shopeecate.search_keyword(goods, keyword, num)
241
241
  ```
242
242
  - Expected output
243
243
  ```
@@ -140,3 +140,4 @@ ALL_LINK = { "電腦資訊"=>"http://www.mobile01.com/mpcatlist.php?c=1",
140
140
  "南部地區"=>"http://www.mobile01.com/mpcatlist.php?c=90",
141
141
  "東部與外島地區"=>"http://www.mobile01.com/mpcatlist.php?c=91"
142
142
  }
143
+ CATEGORY_LIST = ["電腦資訊", "手持通訊", "攝影器材", "數位家電", "休閒旅遊", "生活用品", "汽車", "機車", "自行車", "男性時尚", "女性流行", "代購與虛擬物品", "房屋地產"]
@@ -7,6 +7,7 @@ module ShopeeScrape
7
7
  class ShopeeListGoodsByCate
8
8
 
9
9
  require_relative './data/mobile_category'
10
+ GOOD_INFO = "//div[contains(@class, 'img')]"
10
11
  GOOD_NAME = "//div[contains(@class, 'subject')]"
11
12
  GOOD_PRICE = "//div[contains(@class, 'price')]"
12
13
  GOOD_NUM = "//div[contains(@class, 'num')]"
@@ -27,46 +28,85 @@ module ShopeeScrape
27
28
  private
28
29
 
29
30
  def parse_html(id)
31
+ @document = []
30
32
  url = ALL_LINK[id]
31
- @document = Oga.parse_html(open(url))
32
- end
33
+ @document << Oga.parse_html(open(url))
33
34
 
34
- def extract_goods
35
- name = []
36
- price = []
37
- num = []
38
- update_time = []
39
- @document.xpath(GOOD_NAME).map do |good|
40
- name << good.text
35
+ page_num = 2
36
+ if CATEGORY_LIST.include?(id) == true
37
+ page_num = 5
41
38
  end
42
39
 
43
- @document.xpath(GOOD_PRICE).map do |good|
44
- price << good.text
40
+ check = 1
41
+ i = 1
42
+
43
+ while check == 1 && i < page_num
44
+ i += 1
45
+ begin
46
+ url_t = url +'&p='+i.to_s
47
+ open url_t, :proxy=>true
48
+ @document << Oga.parse_html(open(url_t))
49
+ rescue
50
+ check = 0
51
+ end
45
52
  end
46
53
 
47
- @document.xpath(GOOD_NUM).map do |good|
48
- num << good.text
49
- end
54
+ end
50
55
 
51
- @document.xpath(GOOD_UPTIME).map do |good|
52
- update_time << good.text
53
- end
56
+ def extract_goods
54
57
 
55
- number = name.length
56
58
  results = []
57
- # puts number
58
- if number > 32
59
- number = 32
60
- end
61
- for i in 2..number-1
62
- element = {}
63
- element['name'] = name[i]
64
- element['price'] = price[i]
65
- element['num'] = num[i]
66
- element['update_time'] = update_time[i]
67
- results << element
68
- end
69
59
 
60
+ @document.each do |doc|
61
+
62
+ name = []
63
+ price = []
64
+ num = []
65
+ update_time = []
66
+ pic = []
67
+ link = []
68
+
69
+ doc.xpath(GOOD_NAME).map do |good|
70
+ name << good.text
71
+ end
72
+
73
+ doc.xpath(GOOD_PRICE).map do |good|
74
+ price << good.text
75
+ end
76
+
77
+ doc.xpath(GOOD_NUM).map do |good|
78
+ num << good.text
79
+ end
80
+
81
+ doc.xpath(GOOD_UPTIME).map do |good|
82
+ update_time << good.text
83
+ end
84
+
85
+ doc.xpath(GOOD_INFO).map do |good|
86
+ link << good.css('a').attribute('href')[0].to_s
87
+ # puts good.css('img').attribute('src')[0].to_s
88
+ pic << good.css('img').attribute('src')[0].to_s
89
+ end
90
+
91
+ number = name.length
92
+
93
+ # puts number
94
+ if number > 32
95
+ number = 32
96
+ end
97
+ for i in 2..number-1
98
+ element = {}
99
+ element['name'] = name[i]
100
+ element['price'] = price[i]
101
+ element['num'] = num[i]
102
+ element['update_time'] = update_time[i]
103
+ element['link'] = link[i]
104
+ element['pic'] = pic[i]
105
+ results << element
106
+ end
107
+
108
+ end
109
+ puts results.length
70
110
  results
71
111
  end
72
112
 
@@ -1,4 +1,4 @@
1
1
  module ShopeeScrape
2
- VERSION = '0.1.2'
3
- DATE = '2015-12-06'
2
+ VERSION = '0.2.0'
3
+ DATE = '2016-01-09'
4
4
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: shopee
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sheng Jung Wu
@@ -11,7 +11,7 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2015-12-06 00:00:00.000000000 Z
14
+ date: 2016-01-09 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: minitest
@@ -125,7 +125,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
125
125
  version: '0'
126
126
  requirements: []
127
127
  rubyforge_project:
128
- rubygems_version: 2.4.5.1
128
+ rubygems_version: 2.4.6
129
129
  signing_key:
130
130
  specification_version: 4
131
131
  summary: Scrape categories of mobile01!