shopee 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +1 -0
- data/.travis.yml +4 -0
- data/Gemfile +10 -0
- data/README.md +23 -0
- data/Rakefile +10 -0
- data/bin/shopee_titles +15 -0
- data/lib/shopee_scrap.rb +45 -0
- data/spec/shopee_spec.rb +25 -0
- data/vcr/vcr.rb +7 -0
- metadata +127 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 8162d4eda0b852b61f80f873d566f046df929f5a
|
4
|
+
data.tar.gz: e8cfdea68b4163292dc62113e4ed92c3fc1e8622
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 92fb9cd073ebc2dd6014e865afb70cf790106a2dd10c4c09d4e7ec79b35268eb53ea2345d8ad73174d2fb94037819b6f1fc4ee1faba1855b708115425cf5503e
|
7
|
+
data.tar.gz: e814c518173af4929411433132063c6b37b8e5c7844b8c89feff6b92a57e5af96d28b4030fc28ff37bf970205b03f1a056d96db8745460bed4c32c3814600d0b
|
data/.gitignore
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
Gemfile.lock
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# Ideate-and-Scrape
|
2
|
+
|
3
|
+
[](https://travis-ci.org/Smartibuy/ideate-and-scrape)
|
4
|
+
|
5
|
+
## Description
|
6
|
+
|
7
|
+
Parse Category from [Shopee](http://shopee.tw/mobile/)
|
8
|
+
|
9
|
+
### Installation
|
10
|
+
- Install packages
|
11
|
+
```
|
12
|
+
$ bundle install
|
13
|
+
```
|
14
|
+
- Run
|
15
|
+
```
|
16
|
+
$ ./bin/shopee_titles
|
17
|
+
```
|
18
|
+
|
19
|
+
### Test
|
20
|
+
```shell
|
21
|
+
$ cd spec
|
22
|
+
$ ruby shopee_spec.rb
|
23
|
+
```
|
data/Rakefile
ADDED
data/bin/shopee_titles
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'json'
|
3
|
+
require './lib/shopee_scrap'
|
4
|
+
|
5
|
+
begin
|
6
|
+
google_news = ShopeeTileScrape::ShopeeTile.new
|
7
|
+
news_arr = JSON.parse(google_news.titles)
|
8
|
+
|
9
|
+
news_arr.each do |hash|
|
10
|
+
puts "#{hash['title']} "
|
11
|
+
end
|
12
|
+
|
13
|
+
rescue => e
|
14
|
+
puts "Error occured - see details: #{e}"
|
15
|
+
end
|
data/lib/shopee_scrap.rb
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'oga'
|
2
|
+
require 'open-uri'
|
3
|
+
require 'json'
|
4
|
+
|
5
|
+
|
6
|
+
module ShopeeTileScrape
|
7
|
+
class ShopeeTile
|
8
|
+
|
9
|
+
URL = 'http://mall.shopee.tw/?utm_source=OrganicA&utm_medium=OrganicA&utm_campaign=lp_home_mall'
|
10
|
+
XPATH_CARD = "//div[(@class='title')]"
|
11
|
+
CARD_TITLE_XPATH = "//div[(@class='list-wrapper')]"
|
12
|
+
|
13
|
+
def initialize
|
14
|
+
parse_html
|
15
|
+
end
|
16
|
+
|
17
|
+
def titles
|
18
|
+
@titles ||= extract_titles
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
def parse_html
|
24
|
+
url = "#{URL}"
|
25
|
+
@document = Oga.parse_html(open(url))
|
26
|
+
end
|
27
|
+
|
28
|
+
def extract_titles
|
29
|
+
result = []
|
30
|
+
@document.xpath(CARD_TITLE_XPATH).map do |card|
|
31
|
+
card.xpath(XPATH_CARD).map do |item|
|
32
|
+
|
33
|
+
stri = item.text
|
34
|
+
check = stri.empty?
|
35
|
+
if check == false
|
36
|
+
element = {}
|
37
|
+
element['title'] = item.text
|
38
|
+
result << element
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
result.to_json
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
data/spec/shopee_spec.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'webmock/minitest'
|
3
|
+
require 'json'
|
4
|
+
require_relative '../lib/shopee_scrap'
|
5
|
+
require_relative '../vcr/vcr.rb'
|
6
|
+
|
7
|
+
CATEGORY_LIST = ['女生衣著', '男生衣著', '女生包包與配件', '男生包包與配件', '女鞋', '男鞋','3C相關', '手機平板與周邊', '居家生活',
|
8
|
+
'娛樂、收藏', '代買代購', '服務、票券', '化妝、保養品', '嬰幼童與母親', '美食、伴手禮', '寵物', '戶外、運動',
|
9
|
+
'家電影音', '電玩遊戲相關', '其他類別']
|
10
|
+
|
11
|
+
TESTAMENT_ARR = []
|
12
|
+
|
13
|
+
CATEGORY_LIST.each do |e|
|
14
|
+
TESTAMENT_ARR.push({'title' => e})
|
15
|
+
end
|
16
|
+
|
17
|
+
VCR.use_cassette('shopee', :record=>:once) do
|
18
|
+
scraper = ShopeeTileScrape::ShopeeTile.new
|
19
|
+
describe 'Shopee testament using vcr' do
|
20
|
+
it 'should return an array of string and name of categories' do
|
21
|
+
category_arr = JSON.parse(scraper.titles)
|
22
|
+
category_arr.must_equal TESTAMENT_ARR
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
data/vcr/vcr.rb
ADDED
metadata
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: shopee
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Sheng Jung Wu
|
8
|
+
- Calvin Jeng
|
9
|
+
- Henry Chang
|
10
|
+
- Yi Wei Huang
|
11
|
+
autorequire:
|
12
|
+
bindir: bin
|
13
|
+
cert_chain: []
|
14
|
+
date: 2015-10-17 00:00:00.000000000 Z
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: minitest
|
18
|
+
requirement: !ruby/object:Gem::Requirement
|
19
|
+
requirements:
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '0'
|
23
|
+
type: :development
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: vcr
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
requirements:
|
34
|
+
- - ">="
|
35
|
+
- !ruby/object:Gem::Version
|
36
|
+
version: '0'
|
37
|
+
type: :development
|
38
|
+
prerelease: false
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
- !ruby/object:Gem::Dependency
|
45
|
+
name: webmock
|
46
|
+
requirement: !ruby/object:Gem::Requirement
|
47
|
+
requirements:
|
48
|
+
- - ">="
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: '0'
|
51
|
+
type: :development
|
52
|
+
prerelease: false
|
53
|
+
version_requirements: !ruby/object:Gem::Requirement
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: '0'
|
58
|
+
- !ruby/object:Gem::Dependency
|
59
|
+
name: oga
|
60
|
+
requirement: !ruby/object:Gem::Requirement
|
61
|
+
requirements:
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: '0'
|
65
|
+
type: :runtime
|
66
|
+
prerelease: false
|
67
|
+
version_requirements: !ruby/object:Gem::Requirement
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: '0'
|
72
|
+
- !ruby/object:Gem::Dependency
|
73
|
+
name: json
|
74
|
+
requirement: !ruby/object:Gem::Requirement
|
75
|
+
requirements:
|
76
|
+
- - ">="
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: '0'
|
79
|
+
type: :runtime
|
80
|
+
prerelease: false
|
81
|
+
version_requirements: !ruby/object:Gem::Requirement
|
82
|
+
requirements:
|
83
|
+
- - ">="
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
86
|
+
description: Scrape categories of shopee!
|
87
|
+
email:
|
88
|
+
executables:
|
89
|
+
- shopee_titles
|
90
|
+
extensions: []
|
91
|
+
extra_rdoc_files: []
|
92
|
+
files:
|
93
|
+
- ".gitignore"
|
94
|
+
- ".travis.yml"
|
95
|
+
- Gemfile
|
96
|
+
- README.md
|
97
|
+
- Rakefile
|
98
|
+
- bin/shopee_titles
|
99
|
+
- lib/shopee_scrap.rb
|
100
|
+
- spec/shopee_spec.rb
|
101
|
+
- vcr/vcr.rb
|
102
|
+
homepage: https://github.com/Smartibuy/ideate-and-scrape
|
103
|
+
licenses:
|
104
|
+
- MIT
|
105
|
+
metadata: {}
|
106
|
+
post_install_message:
|
107
|
+
rdoc_options: []
|
108
|
+
require_paths:
|
109
|
+
- lib
|
110
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
111
|
+
requirements:
|
112
|
+
- - ">="
|
113
|
+
- !ruby/object:Gem::Version
|
114
|
+
version: '0'
|
115
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
116
|
+
requirements:
|
117
|
+
- - ">="
|
118
|
+
- !ruby/object:Gem::Version
|
119
|
+
version: '0'
|
120
|
+
requirements: []
|
121
|
+
rubyforge_project:
|
122
|
+
rubygems_version: 2.4.6
|
123
|
+
signing_key:
|
124
|
+
specification_version: 4
|
125
|
+
summary: Scrape categories of shopee!
|
126
|
+
test_files:
|
127
|
+
- spec/shopee_spec.rb
|