kiwicourse 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +58 -7
- data/bin/kiwicourse +38 -9
- data/lib/coursesdesc/courses.rb +31 -34
- data/lib/coursesdesc/version.rb +1 -1
- data/spec/courses_date_data.rb +1 -1
- data/spec/courses_id_data.rb +220 -0
- data/spec/courses_name_data.rb +220 -0
- data/spec/courses_url_data.rb +220 -0
- data/spec/coursesdesc_spec.rb +6 -7
- metadata +7 -7
- data/spec/courses_id_test_data.rb +0 -219
- data/spec/courses_name_test_data.rb +0 -219
- data/spec/urls_test_data.rb +0 -219
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cf325fef6f456da234f7b16a171df31edf564fe3
|
4
|
+
data.tar.gz: 6ff8b1e49e58cfc77549283114f152b1416c7ac3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4f45126b2484ba0cf979ee3c2ac75d0891ee2eafe5692d43c4295184f024cdd9ae2f4fce3008351e4d707e46de5638eddd718135c053f4f390a3fc352a2743cc
|
7
|
+
data.tar.gz: d07c2bbd7ccee331b2fff2d3195556becb1b21ce683dd2361fdc4ae57013d821eb0235b5da53cf3eda47a08bf72cc9d8258d33c15713bbc7d269450a8a143b54
|
data/README.md
CHANGED
@@ -3,7 +3,9 @@
|
|
3
3
|
[![Build Status](https://travis-ci.org/Kiwi-Learn/kiwi-scraper.svg?branch=master)](https://travis-ci.org/Kiwi-Learn/kiwi-scraper)
|
4
4
|
[![Gem Version](https://badge.fury.io/rb/kiwicourse.svg)](https://badge.fury.io/rb/kiwicourse)
|
5
5
|
|
6
|
-
Kiwi Scraper is a great tool to get [Sharecourse](http://sharecourse.net/sharecourse/general/home/)
|
6
|
+
Kiwi Scraper is a great tool to get [Sharecourse](http://sharecourse.net/sharecourse/general/home/) information!
|
7
|
+
|
8
|
+
Yes, you can use CLI to browse course information even you are **offline**!!
|
7
9
|
|
8
10
|
We respect Sharecourse's `robots.txt`
|
9
11
|
|
@@ -14,23 +16,72 @@ Install it with the following command:
|
|
14
16
|
$ gem install kiwicourse
|
15
17
|
```
|
16
18
|
|
17
|
-
Run it from the command line.
|
19
|
+
Run it from the command line. We provide 5 features to you to browse courses information on [Sharecourse](http://sharecourse.net/sharecourse/general/home/).
|
20
|
+
|
21
|
+
- list
|
22
|
+
- search
|
23
|
+
- info
|
24
|
+
- open
|
25
|
+
- update
|
26
|
+
|
27
|
+
|
28
|
+
Help
|
29
|
+
```sh
|
30
|
+
$ kiwicourse
|
31
|
+
Commands:
|
32
|
+
kiwicourse help [COMMAND] # Describe available commands or one specific command
|
33
|
+
kiwicourse info ID # Display information about course.
|
34
|
+
kiwicourse list # List all courses on ShareCourse
|
35
|
+
kiwicourse open ID # Open the course page on browser with course id
|
36
|
+
kiwicourse search COURSENAME # Search a course on ShareCourse
|
37
|
+
kiwicourse update # Update the offline courses data
|
38
|
+
```
|
39
|
+
|
40
|
+
|
41
|
+
List all courses on [Sharecourse](http://sharecourse.net/sharecourse/general/home/)
|
42
|
+
```sh
|
43
|
+
$ kiwicourse list
|
44
|
+
MA02004 - 會計學原理
|
45
|
+
AO35004 - 行動磨課師【曠世名琴訴說的故事】
|
46
|
+
DM91002 - 方法對了,人人都可以是設計師
|
47
|
+
|
48
|
+
...
|
49
|
+
|
50
|
+
CS01001 - 網路安全 Network Security
|
51
|
+
CS01002 - 作業系統 Operating Systems
|
52
|
+
EE62002 - 小型風力機系統與國際認證 (104 秋季班)
|
53
|
+
```
|
18
54
|
|
19
55
|
Search a course with a keyword
|
20
56
|
```sh
|
21
|
-
$ kiwicourse search
|
22
|
-
|
57
|
+
$ kiwicourse search 會計
|
58
|
+
MA02004 - 會計學原理
|
23
59
|
```
|
24
60
|
|
25
|
-
|
61
|
+
Display information about the course.
|
62
|
+
|
26
63
|
```sh
|
27
|
-
$ kiwicourse
|
64
|
+
$ kiwicourse info CS06001
|
65
|
+
Course ID: CS06001
|
66
|
+
Course: 資料結構
|
67
|
+
Course time: 2014-09-14 - 2015-02-28
|
68
|
+
Course webpagehttp://www.sharecourse.net/sharecourse/course/view/courseInfo/28
|
28
69
|
```
|
29
70
|
|
71
|
+
Open the course webpage in browser
|
72
|
+
```sh
|
73
|
+
$ kiwicourse open MA02004
|
74
|
+
```
|
75
|
+
|
76
|
+
Update the offline courses data
|
77
|
+
```sh
|
78
|
+
$ kiwicourse update
|
79
|
+
Already up to date.
|
80
|
+
```
|
30
81
|
|
31
82
|
Use it from your Ruby code:
|
32
83
|
````ruby
|
33
|
-
require '
|
84
|
+
require 'coursesdesc'
|
34
85
|
sc = KiwiScraper::ShareCourse.new
|
35
86
|
|
36
87
|
course_found = sc.course_name
|
data/bin/kiwicourse
CHANGED
@@ -4,12 +4,19 @@ require 'thor'
|
|
4
4
|
require 'fuzzy_match'
|
5
5
|
require 'launchy'
|
6
6
|
require 'coursesdesc' # for production
|
7
|
-
# require '
|
7
|
+
# require './lib/coursesdesc/courses.rb' # for cmd line testing purposes
|
8
|
+
|
9
|
+
DUMP_FILE_PATH = Dir.home + '/.kiwicourse.dump'
|
8
10
|
|
9
11
|
class KiwiCLI < Thor
|
10
12
|
desc 'search COURSENAME', 'Search a course on ShareCourse'
|
11
13
|
def search(coursename)
|
12
|
-
|
14
|
+
if File.exist?(DUMP_FILE_PATH)
|
15
|
+
sc = Marshal.load(File.read(DUMP_FILE_PATH))
|
16
|
+
else
|
17
|
+
sc = KiwiScraper::ShareCourse.new
|
18
|
+
File.open(DUMP_FILE_PATH, 'w') { |f| f.write(Marshal.dump(sc)) }
|
19
|
+
end
|
13
20
|
result = FuzzyMatch.new(sc.course_name).find(coursename)
|
14
21
|
input_key = Digest::SHA256.digest result
|
15
22
|
id = sc.courses_name_to_id_mapping[input_key]
|
@@ -18,28 +25,50 @@ class KiwiCLI < Thor
|
|
18
25
|
|
19
26
|
desc 'list', 'List all courses on ShareCourse'
|
20
27
|
def list
|
21
|
-
|
22
|
-
|
28
|
+
if File.exist?(DUMP_FILE_PATH)
|
29
|
+
sc = Marshal.load(File.read(DUMP_FILE_PATH))
|
30
|
+
else
|
31
|
+
sc = KiwiScraper::ShareCourse.new
|
32
|
+
File.open(DUMP_FILE_PATH, 'w') { |f| f.write(Marshal.dump(sc)) }
|
33
|
+
end
|
34
|
+
sc.courses_id_to_all_mapping.each do |key, info|
|
23
35
|
puts "#{key} - #{info['name']}"
|
24
36
|
end
|
25
37
|
end
|
26
38
|
|
27
|
-
desc 'open ID', '
|
39
|
+
desc 'open ID', 'Open the course page on browser with course id'
|
28
40
|
def open(id)
|
29
|
-
|
30
|
-
|
41
|
+
if File.exist?(DUMP_FILE_PATH)
|
42
|
+
sc = Marshal.load(File.read(DUMP_FILE_PATH))
|
43
|
+
else
|
44
|
+
sc = KiwiScraper::ShareCourse.new
|
45
|
+
File.open(DUMP_FILE_PATH, 'w') { |f| f.write(Marshal.dump(sc)) }
|
46
|
+
end
|
47
|
+
course = sc.courses_id_to_all_mapping[id]
|
31
48
|
Launchy.open(course['url'])
|
32
49
|
end
|
33
50
|
|
34
51
|
desc 'info ID', 'Display information about course.'
|
35
52
|
def info(id)
|
36
|
-
|
37
|
-
|
53
|
+
if File.exist?(DUMP_FILE_PATH)
|
54
|
+
sc = Marshal.load(File.read(DUMP_FILE_PATH))
|
55
|
+
else
|
56
|
+
sc = KiwiScraper::ShareCourse.new
|
57
|
+
File.open(DUMP_FILE_PATH, 'w') { |f| f.write(Marshal.dump(sc)) }
|
58
|
+
end
|
59
|
+
course = sc.courses_id_to_all_mapping[id]
|
38
60
|
puts "Course ID: #{course['id']}"
|
39
61
|
puts "Course: #{course['name']}"
|
40
62
|
puts "Course time: #{course['date']}"
|
41
63
|
puts "Course webpage#{course['url']}"
|
42
64
|
end
|
65
|
+
|
66
|
+
desc 'update', 'Update the offline courses data'
|
67
|
+
def update
|
68
|
+
sc = KiwiScraper::ShareCourse.new
|
69
|
+
File.open(DUMP_FILE_PATH, 'w') { |f| f.write(Marshal.dump(sc)) }
|
70
|
+
puts 'Already up to date.'
|
71
|
+
end
|
43
72
|
end
|
44
73
|
|
45
74
|
KiwiCLI.start(ARGV)
|
data/lib/coursesdesc/courses.rb
CHANGED
@@ -9,11 +9,7 @@ module KiwiScraper
|
|
9
9
|
URL = 'http://www.sharecourse.net/sharecourse/course/view/courseList'
|
10
10
|
|
11
11
|
def initialize
|
12
|
-
|
13
|
-
end
|
14
|
-
|
15
|
-
def get_html
|
16
|
-
@document = Oga.parse_html(open(URL))
|
12
|
+
parse_html
|
17
13
|
end
|
18
14
|
|
19
15
|
def course_name
|
@@ -21,7 +17,7 @@ module KiwiScraper
|
|
21
17
|
end
|
22
18
|
|
23
19
|
def course_date
|
24
|
-
@course_date ||=
|
20
|
+
@course_date ||= parse_course_date
|
25
21
|
end
|
26
22
|
|
27
23
|
def course_url
|
@@ -29,19 +25,31 @@ module KiwiScraper
|
|
29
25
|
end
|
30
26
|
|
31
27
|
def courses_name_to_id_mapping
|
32
|
-
@course_map ||=
|
28
|
+
@course_map ||= map_course_name_to_id
|
33
29
|
end
|
34
30
|
|
35
31
|
def course_id
|
36
32
|
@course_id ||= parse_course_id
|
37
33
|
end
|
38
34
|
|
39
|
-
def
|
40
|
-
@course_id_map ||=
|
35
|
+
def courses_id_to_all_mapping
|
36
|
+
@course_id_map ||= map_course_id_to_all
|
41
37
|
end
|
42
38
|
|
43
39
|
private
|
44
40
|
|
41
|
+
def parse_html
|
42
|
+
@document = Oga.parse_html(open(URL))
|
43
|
+
end
|
44
|
+
|
45
|
+
def parse_course_id
|
46
|
+
course_id = []
|
47
|
+
@document.xpath("//div[@class='gb_mid']").each do |course|
|
48
|
+
course_id << course.children[4].children[0].children.text.split(':')[1]
|
49
|
+
end
|
50
|
+
course_id
|
51
|
+
end
|
52
|
+
|
45
53
|
def parse_course_name
|
46
54
|
result = []
|
47
55
|
@document.xpath("//h4[@id='courseName']").each do |course|
|
@@ -58,7 +66,17 @@ module KiwiScraper
|
|
58
66
|
result
|
59
67
|
end
|
60
68
|
|
61
|
-
def
|
69
|
+
def parse_course_date
|
70
|
+
# date type yyyy-mm-dd - yyyy-mm-dd
|
71
|
+
date = []
|
72
|
+
@document.xpath("//p[@id='courseDate']").each do |course|
|
73
|
+
longdate = course.text
|
74
|
+
date << longdate.split(':')[1]
|
75
|
+
end
|
76
|
+
date
|
77
|
+
end
|
78
|
+
|
79
|
+
def map_course_name_to_id
|
62
80
|
name = parse_course_name
|
63
81
|
course_id = parse_course_id
|
64
82
|
|
@@ -67,15 +85,13 @@ module KiwiScraper
|
|
67
85
|
hash_name[index] = Digest::SHA256.digest name[index]
|
68
86
|
end
|
69
87
|
|
70
|
-
|
71
|
-
|
72
|
-
result
|
88
|
+
Hash[hash_name.zip(course_id)] # return value
|
73
89
|
end
|
74
90
|
|
75
|
-
def
|
91
|
+
def map_course_id_to_all
|
76
92
|
name = parse_course_name
|
77
93
|
course_id = parse_course_id
|
78
|
-
date =
|
94
|
+
date = parse_course_date
|
79
95
|
url = parse_course_url
|
80
96
|
info = {}
|
81
97
|
|
@@ -90,24 +106,5 @@ module KiwiScraper
|
|
90
106
|
end
|
91
107
|
info
|
92
108
|
end
|
93
|
-
|
94
|
-
def get_course_date
|
95
|
-
#date type yyyy-mm-dd - yyyy-mm-dd
|
96
|
-
date = []
|
97
|
-
@document.xpath("//p[@id='courseDate']").each do |course|
|
98
|
-
longdate = course.text
|
99
|
-
date << longdate.split(":")[1]
|
100
|
-
end
|
101
|
-
date
|
102
|
-
end
|
103
|
-
|
104
|
-
def parse_course_id
|
105
|
-
course_id = []
|
106
|
-
@document.xpath("//div[@class='gb_mid']").each do |course|
|
107
|
-
course_id << course.children[4].children[0].children.text.split(":")[1]
|
108
|
-
end
|
109
|
-
course_id
|
110
|
-
end
|
111
|
-
|
112
109
|
end
|
113
110
|
end
|
data/lib/coursesdesc/version.rb
CHANGED
data/spec/courses_date_data.rb
CHANGED
@@ -0,0 +1,220 @@
|
|
1
|
+
COURSES_ID_LIST = [
|
2
|
+
'MA02004',
|
3
|
+
'AO35004',
|
4
|
+
'DM91002',
|
5
|
+
'WL33002',
|
6
|
+
'SP31002',
|
7
|
+
'SO31003',
|
8
|
+
'SO31004',
|
9
|
+
'AN54005',
|
10
|
+
'CM81009',
|
11
|
+
'MM39012',
|
12
|
+
'MM39013',
|
13
|
+
'ML22006',
|
14
|
+
'CS08006',
|
15
|
+
'OL48008',
|
16
|
+
'OL48009',
|
17
|
+
'OL48010',
|
18
|
+
'CM99001',
|
19
|
+
'AN99002',
|
20
|
+
'WL32009',
|
21
|
+
'OL32010',
|
22
|
+
'JL91001',
|
23
|
+
'MM93001',
|
24
|
+
'PH05009',
|
25
|
+
'CS01023',
|
26
|
+
'CS92001',
|
27
|
+
'CS121000',
|
28
|
+
'CS01022',
|
29
|
+
'CS01024',
|
30
|
+
'CS08004',
|
31
|
+
'SC46005',
|
32
|
+
'SC46006',
|
33
|
+
'OL33001',
|
34
|
+
'CS01024',
|
35
|
+
'HS05007',
|
36
|
+
'PH05008',
|
37
|
+
'AN54004',
|
38
|
+
'CS13001',
|
39
|
+
'CS81006',
|
40
|
+
'CM81005',
|
41
|
+
'MM81007',
|
42
|
+
'CM81008',
|
43
|
+
'CS08005',
|
44
|
+
'ML22007',
|
45
|
+
'ML34002',
|
46
|
+
'SS39009',
|
47
|
+
'MM39010',
|
48
|
+
'AM39011',
|
49
|
+
'ML98001',
|
50
|
+
'CS01021',
|
51
|
+
'CS08003',
|
52
|
+
'CM81004',
|
53
|
+
'OL48007',
|
54
|
+
'CS32008',
|
55
|
+
'LI01023',
|
56
|
+
'MM81003',
|
57
|
+
'CS81002',
|
58
|
+
'WL32006',
|
59
|
+
'ML22005',
|
60
|
+
'CM81001',
|
61
|
+
'CS08002',
|
62
|
+
'CS05005',
|
63
|
+
'CH54003',
|
64
|
+
'CS08001',
|
65
|
+
'ML22004',
|
66
|
+
'CS01019',
|
67
|
+
'OL31001',
|
68
|
+
'CS02005',
|
69
|
+
'CS05006',
|
70
|
+
'AN54002',
|
71
|
+
'EE57005',
|
72
|
+
'OL48005',
|
73
|
+
'OL48006',
|
74
|
+
'SC46004',
|
75
|
+
'SC46003',
|
76
|
+
'MM10037',
|
77
|
+
'EE10038',
|
78
|
+
'AO35003',
|
79
|
+
'AO35002',
|
80
|
+
'CM55004',
|
81
|
+
'AO35001',
|
82
|
+
'CM55003',
|
83
|
+
'EE57004',
|
84
|
+
'SC46002',
|
85
|
+
'EC05001',
|
86
|
+
'ML22003',
|
87
|
+
'LE58001',
|
88
|
+
'CE02002',
|
89
|
+
'CS02003',
|
90
|
+
'TT10201',
|
91
|
+
'TO60001',
|
92
|
+
'OL59001',
|
93
|
+
'EE57003',
|
94
|
+
'MM16002',
|
95
|
+
'ML22002',
|
96
|
+
'MS57002',
|
97
|
+
'OL48003',
|
98
|
+
'AN54001',
|
99
|
+
'OL48002',
|
100
|
+
'NB13001',
|
101
|
+
'MM16003',
|
102
|
+
'MM16004',
|
103
|
+
'OL48004',
|
104
|
+
'EE10031',
|
105
|
+
'CS10032',
|
106
|
+
'EE10033',
|
107
|
+
'EE10034',
|
108
|
+
'EE10035',
|
109
|
+
'EE10036',
|
110
|
+
'CS05003',
|
111
|
+
'CS05004',
|
112
|
+
'EP57001',
|
113
|
+
'WL32005',
|
114
|
+
'ML22001',
|
115
|
+
'OL32004',
|
116
|
+
'EE62001',
|
117
|
+
'CS51001',
|
118
|
+
'PH05002',
|
119
|
+
'EE50001',
|
120
|
+
'OL48001',
|
121
|
+
'CM55002',
|
122
|
+
'CS01017',
|
123
|
+
'SC46001',
|
124
|
+
'CM55001',
|
125
|
+
'GE10206',
|
126
|
+
'CS06001',
|
127
|
+
'GE10204',
|
128
|
+
'EM01018',
|
129
|
+
'CS32003',
|
130
|
+
'GE10203',
|
131
|
+
'HR10018',
|
132
|
+
'HR10007',
|
133
|
+
'HR10008',
|
134
|
+
'HR10009',
|
135
|
+
'HR10010',
|
136
|
+
'HR10011',
|
137
|
+
'HR10012',
|
138
|
+
'HR10013',
|
139
|
+
'HR10014',
|
140
|
+
'HR10015',
|
141
|
+
'HR10016',
|
142
|
+
'HR10017',
|
143
|
+
'CS32002',
|
144
|
+
'HR10001',
|
145
|
+
'HR10002',
|
146
|
+
'HR10003',
|
147
|
+
'HR10004',
|
148
|
+
'HR10005',
|
149
|
+
'HR10006',
|
150
|
+
'CS10007',
|
151
|
+
'CS10003',
|
152
|
+
'CS05003',
|
153
|
+
'EE10028',
|
154
|
+
'CS05002',
|
155
|
+
'EE10024',
|
156
|
+
'EE10025',
|
157
|
+
'EE10026',
|
158
|
+
'EE10020',
|
159
|
+
'EE10021',
|
160
|
+
'EE10022',
|
161
|
+
'EE10023',
|
162
|
+
'EE10027',
|
163
|
+
'CS10004',
|
164
|
+
'EE10018',
|
165
|
+
'CS10005',
|
166
|
+
'EE10019',
|
167
|
+
'EE10014',
|
168
|
+
'EE10015',
|
169
|
+
'EE10017',
|
170
|
+
'EE10013',
|
171
|
+
'EE10016',
|
172
|
+
'CS10006',
|
173
|
+
'EE10012',
|
174
|
+
'EE10010',
|
175
|
+
'EE10011',
|
176
|
+
'EE10008',
|
177
|
+
'EE10009',
|
178
|
+
'EE10005',
|
179
|
+
'CS32001',
|
180
|
+
'EE10004',
|
181
|
+
'EE10002',
|
182
|
+
'EE10003',
|
183
|
+
'EE10007',
|
184
|
+
'EE10029',
|
185
|
+
'EE10030',
|
186
|
+
'EE10001',
|
187
|
+
'EE10006',
|
188
|
+
'CS10001',
|
189
|
+
'CM10001',
|
190
|
+
'ML10001',
|
191
|
+
'CS25001',
|
192
|
+
'CS10002',
|
193
|
+
'MN09009',
|
194
|
+
'CS01016',
|
195
|
+
'CS02001',
|
196
|
+
'CS01013',
|
197
|
+
'CS01014',
|
198
|
+
'CS01015',
|
199
|
+
'CS18001',
|
200
|
+
'CS01007',
|
201
|
+
'CS16001',
|
202
|
+
'CS05001',
|
203
|
+
'CS01009',
|
204
|
+
'CS01010',
|
205
|
+
'CS07001',
|
206
|
+
'MN09007',
|
207
|
+
'HSS19001',
|
208
|
+
'CS01012',
|
209
|
+
'CS01011',
|
210
|
+
'EE01003',
|
211
|
+
'CS01004',
|
212
|
+
'CS04001',
|
213
|
+
'CS01005',
|
214
|
+
'CS03001',
|
215
|
+
'CS04001',
|
216
|
+
'CS01008',
|
217
|
+
'CS01006',
|
218
|
+
'CS01001',
|
219
|
+
'CS01002',
|
220
|
+
'EE62002']
|