kiwicourse 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +35 -0
- data/.travis.yml +9 -0
- data/Gemfile +10 -0
- data/LICENSE +22 -0
- data/README.md +51 -0
- data/Rakefile +8 -0
- data/bin/coursesdesc +29 -0
- data/lib/coursesdesc/courses.rb +68 -0
- data/lib/coursesdesc/version.rb +5 -0
- data/lib/coursesdesc.rb +2 -0
- data/spec/courses_name_test_data.rb +1 -0
- data/spec/coursesdesc_spec.rb +25 -0
- data/spec/fixtures/vcr_cassettes/courses.yml +7960 -0
- data/spec/urls_test_data.rb +1 -0
- metadata +121 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 04ccb550e08d08396510f42e62e153fe8a701976
|
4
|
+
data.tar.gz: ca8578ad21980ca74bcdb4ebbc44883761b5f9b0
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 2309243dfdd67c71b192168406e6a3a4a7396d6ba1fe4f023c858b6126deb29a256d561d00679ad0d97d08c6ceeb335a9e760492aaa8442abe679e4b1a6661ee
|
7
|
+
data.tar.gz: 881eb12a360f3aeaaa444cb71d9070adf8ce95fbf4185a5f4994e2c8000b1dea15c62c0dba6f0f846a5cd0468e5e5e5074e819e2dc9bab0b315be36b70e95845
|
data/.gitignore
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
/.config
|
4
|
+
/coverage/
|
5
|
+
/InstalledFiles
|
6
|
+
/pkg/
|
7
|
+
/spec/reports/
|
8
|
+
/test/tmp/
|
9
|
+
/test/version_tmp/
|
10
|
+
/tmp/
|
11
|
+
|
12
|
+
## Specific to RubyMotion:
|
13
|
+
.dat*
|
14
|
+
.repl_history
|
15
|
+
build/
|
16
|
+
|
17
|
+
## Documentation cache and generated files:
|
18
|
+
/.yardoc/
|
19
|
+
/_yardoc/
|
20
|
+
/doc/
|
21
|
+
/rdoc/
|
22
|
+
|
23
|
+
## Environment normalisation:
|
24
|
+
/.bundle/
|
25
|
+
/vendor/bundle
|
26
|
+
/lib/bundler/man/
|
27
|
+
|
28
|
+
# for a library or gem, you might want to ignore these files since the code is
|
29
|
+
# intended to run in multiple environments; otherwise, check them in:
|
30
|
+
Gemfile.lock
|
31
|
+
# .ruby-version
|
32
|
+
# .ruby-gemset
|
33
|
+
|
34
|
+
# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
|
35
|
+
.rvmrc
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2015 Kiwi-Learn
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
22
|
+
|
data/README.md
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
# kiwi-scraper
|
2
|
+
|
3
|
+
[![Build Status](https://travis-ci.org/Kiwi-Learn/kiwi-scraper.svg?branch=master)](https://travis-ci.org/Kiwi-Learn/kiwi-scraper)
|
4
|
+
|
5
|
+
|
6
|
+
Kiwi Scraper is a great tool to get [Sharecourse](http://sharecourse.net/sharecourse/general/home/) description!
|
7
|
+
|
8
|
+
We respect Sharecourse's `robots.txt`
|
9
|
+
|
10
|
+
## Usage
|
11
|
+
|
12
|
+
Install it with the following command:
|
13
|
+
```sh
|
14
|
+
$ bundle install
|
15
|
+
```
|
16
|
+
|
17
|
+
Run it from the command line.
|
18
|
+
```sh
|
19
|
+
$ ./bin/coursesdesc
|
20
|
+
```
|
21
|
+
|
22
|
+
Screenshots
|
23
|
+
|
24
|
+
```sh
|
25
|
+
_________ _______ ____
|
26
|
+
/ __/ ___/ ____ / ___/ / / _/
|
27
|
+
_\ \/ /__ /___/ / /__/ /___/ /
|
28
|
+
/___/\___/ \___/____/___/
|
29
|
+
|
30
|
+
> courses
|
31
|
+
```
|
32
|
+
|
33
|
+
Use it from your Ruby code:
|
34
|
+
````ruby
|
35
|
+
require './lib/coursesdesc/courses.rb'
|
36
|
+
sc = KiwiScraper::ShareCourse.new
|
37
|
+
|
38
|
+
course_found = sc.course_name
|
39
|
+
puts course_found
|
40
|
+
|
41
|
+
url_found = sc.course_url
|
42
|
+
puts url_found
|
43
|
+
|
44
|
+
````
|
45
|
+
|
46
|
+
## Test
|
47
|
+
|
48
|
+
```sh
|
49
|
+
$ cd spec
|
50
|
+
$ ruby coursesdesc_spec.rb
|
51
|
+
```
|
data/Rakefile
ADDED
data/bin/coursesdesc
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'readline'
|
3
|
+
require '../lib/coursesdesc/courses.rb'
|
4
|
+
|
5
|
+
puts ' _________ _______ ____'
|
6
|
+
puts ' / __/ ___/ ____ / ___/ / / _/'
|
7
|
+
puts ' _\\ \\/ /__ /___/ / /__/ /___/ / '
|
8
|
+
puts ' /___/\\___/ \\___/____/___/ '
|
9
|
+
puts ''
|
10
|
+
|
11
|
+
sc = KiwiScraper::ShareCourse.new
|
12
|
+
|
13
|
+
while buf = Readline.readline('> ', true)
|
14
|
+
if buf == 'courses'
|
15
|
+
course_found = sc.course_name
|
16
|
+
course_found.each do |ele_courses|
|
17
|
+
puts ele_courses
|
18
|
+
end
|
19
|
+
elsif buf == 'url'
|
20
|
+
url_found = sc.course_url
|
21
|
+
url_found.each do |ele_urls|
|
22
|
+
puts ele_urls
|
23
|
+
end
|
24
|
+
elsif buf == 'test'
|
25
|
+
puts 'testing'
|
26
|
+
else
|
27
|
+
puts 'Command not found!'
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'oga'
|
2
|
+
require 'open-uri'
|
3
|
+
|
4
|
+
module KiwiScraper
|
5
|
+
# parse course description from sharecourse web
|
6
|
+
class ShareCourse
|
7
|
+
URL = 'http://www.sharecourse.net/sharecourse/course/view/courseList'
|
8
|
+
|
9
|
+
def initialize
|
10
|
+
get_html
|
11
|
+
end
|
12
|
+
|
13
|
+
def get_html
|
14
|
+
@document = Oga.parse_html(open(URL))
|
15
|
+
end
|
16
|
+
|
17
|
+
def course_name
|
18
|
+
@course_name ||= get_course_name
|
19
|
+
end
|
20
|
+
|
21
|
+
def course_url
|
22
|
+
@course_url ||= get_course_url
|
23
|
+
end
|
24
|
+
|
25
|
+
def courses_name_to_url_mapping
|
26
|
+
@course_map ||= get_course
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def get_course_name
|
32
|
+
result = []
|
33
|
+
@document.xpath("//h4[@id='courseName']").each do |course|
|
34
|
+
result << course.text
|
35
|
+
end
|
36
|
+
result
|
37
|
+
end
|
38
|
+
|
39
|
+
def get_course_url
|
40
|
+
result = []
|
41
|
+
@document.xpath('//div[@onclick]').each do |course|
|
42
|
+
result << course.attributes[2].value.split("'")[1]
|
43
|
+
end
|
44
|
+
result
|
45
|
+
end
|
46
|
+
|
47
|
+
def get_course
|
48
|
+
name = []
|
49
|
+
@document.xpath("//h4[@id='courseName']").each do |course|
|
50
|
+
name << course.text
|
51
|
+
end
|
52
|
+
|
53
|
+
url = []
|
54
|
+
@document.xpath('//div[@onclick]').each do |course|
|
55
|
+
url << course.attributes[2].value.split("'")[1]
|
56
|
+
end
|
57
|
+
|
58
|
+
key = ['course_name', 'course_url']
|
59
|
+
result = []
|
60
|
+
name.each_index do |index|
|
61
|
+
element = Hash[key.zip [name[index], url[index]]]
|
62
|
+
result << element
|
63
|
+
end
|
64
|
+
|
65
|
+
result
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
data/lib/coursesdesc.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
COURSES_NAME_LIST = [" 方法對了,人人都可以是設計師", "103全球化/在地化莎士比亞", "2011 國際醫療器材專業法規趨勢研討會", "2013年 創業風清華", "2015 全球化/ 在地化 莎士比亞 Global/Local Shakespeare", "2015 生態體系與全球變遷Eco-system and Global Changes", "2015 程式設計導論Introduction to Computer Programming", "2015 細胞神經科學Cellular Neuroscience", "2015-秋季-電腦網路概論Introduction to Computer Networks", "2015微積分導論Introduction to Calculus", "2D動畫製作", "2D動畫製作-2015春季班", "2D動畫製作-2015秋季班 ", "3D動畫製作:Maya", "Android行動裝置程式設計證照班第一期(正式)", "Android行動裝置程式設計證照班第一期(試聽)", "Android行動裝置程式設計證照班第二期(正式)", "Android行動裝置程式設計證照班第二期(試聽)", "D010 基本電子電路", "D011 半導體元件物理 ", "D014 半導體製程、元件結構及可靠度整合課程 ", "D028 ESD Basic and Circuit Design ", "D029 黃光微影製程技術(Photolithography)", "D031 蝕刻技術(Etching) ", "D035 薄膜工程 ", "D039 先進電子封裝設計導論 ", "D042 電子電路學概論 ", "D043 積體電路設計概論 ", "D044 IC設計流程概論-數位篇 ", "D045 半導體製程整合原理 ", "D060 Android介紹 ", "D060 Android介紹 ", "D061 觸控面板技術與發展 ", "D062 Linux Device Drivers: 架構與運作原理 ", "D063 Android Device Drivers: HAL架構與運作原理 ", "D064 電子學第一篇:Semiconductors、BJTs", "D065 電子學第二篇:MOS Field-Effect Transistors(MOSFET)", "D066 電子學第三篇:Building Blocks of Integrated-Circuit Amplifiers、Differential Amplifiers ", "D067 電子學第四篇:Frequency Response、Feedback ", "D068 電子學第五篇:Operational-Amplifier、Digital CMOS Logic Circuits ", "D074 射頻微機電元件及應用", "D075 品管七大手法", "D103 二極體元件物理", "D114 LED原理,製程及結構分析 ", "D117 太陽能電池技術 ", "D119 電池電源技術 ", "D123 TFT-LCD製程模組-Array製程 ", "D127 有機發光二極體材料及顯示器技術 ", "D204 影像壓縮原理 ", "D212 無線射頻身份辨別標籤晶片之原理與應用 ", "D215 智慧手機技術發展與市場實務 ", "D217 行動數位電視技術發展與市場實務 ", "D218 無線通訊晶片設計", "D305 Linux 網路伺服器架設", "D306 Linux 應用伺服器架設", "D308 雲端計算", "D402 太陽能電池概論 ", "D404 品質8D課程", "D406 「2013全球化人力資本高峰會議(SGHC)」─主題演講:判斷、協商與說服的心理學(英文) ", "D407 「2013全球化人力資本高峰會議(SGHC)」─HR論壇:以全球視野打造創新職場(中文)", "D408 「2013全球化人力資本高峰會議(SGHC)」─全球論壇:全球市場下的人才管理(英文)", "D409 「2013全球化人力資本高峰會議(SGHC)」─全球人力資源效能比較:最新「韜睿惠悅2013人力資源服務調查」結果發現(英文)", "D410 「2013全球化人力資本高峰會議(SGHC)」─人才浪潮:徹底重新評估人才管理(英文)", "D411 「2013全球化人力資本高峰會議(SGHC)」─領導的多樣性:使用心理測量資料建立頂尖團隊(英文)", "D412 「2012全球化人力資本高峰會議(SGHC)」─策略性人力資源發展模型-以三星診斷系統為例(英文)", "D413 「2012全球化人力資本高峰會議(SGHC)」─如何運用雲端運算與行動學習,建構企業的領導力發展中心(英文)", "D414 「2012全球化人力資本高峰會議(SGHC)」─主題演講:整合性人才管理之執行指南(英文)", "D415 「2012全球化人力資本高峰會議(SGHC)」─主題演講:人力資本的奧秘(英文)", "D416 「2012全球化人力資本高峰會議(SGHC)」─貴賓演講:勞動力發展署之展望與規劃(中文) ", "D417 「2012全球化人力資本高峰會議(SGHC)」─HR 論壇:亞太地區人才發展的機會與挑戰(英文) ", "D418 「2012全球化人力資本高峰會議(SGHC)」─鼓舞員工參與及激勵人才(英文) ", "D419 「2012全球化人力資本高峰會議(SGHC)」─策略性全球人才移動管理(英文)", "D420 「2012全球化人力資本高峰會議(SGHC)」─主題演講:企業創造力與人力資源(英文)", "D421 「2012全球化人力資本高峰會議(SGHC)」─大師講座:解讀高效領導力發展失落檔案(英文) ", "D422 「2012全球化人力資本高峰會議(SGHC)」─大師講座:人力資源功能的全球化(英文) ", "D423 「2012全球化人力資本高峰會議(SGHC)」─CEO 論壇:透過學習及人才策略推動創新(英文)", "D424 精選熱門32堂課(企業專案)_半導體、資通訊、光電、綠能、科技管理", "IPv6 一 新一代網際網路通訊協議原理與應用", "IPv6 網路管理與安全 一 企業網路", "IT企業升級策略&思考技術", "Java 程式設計 OCPJP 認證課程", "MEMS系列講座", "MOOCs/SPOCs課程平臺研討會", "SDN 網路技術與發展趨勢研討會", "ShareCourse 學聯網行動磨課師 Apps說明會", "『2012年專家講堂』系列講座", "『中藥養生保健』系列講座", "【化化世界】現代社會的化學", "人魚線研究院-從認識骨骼、肌肉與關節的構造和運動開始", "企業管理課程", "作業系統", "作業系統 (2015 秋季班)", "作業系統 Operating Systems ", "作業系統 Operating Systems 10209", "作業系統(夏季班)", "光電工程一", "全球化的進程與趨勢(104-1)", "全球化與多元文化", "全球化與多元文化(103-2)", "共善的社會設計", "創意思解 (2015 秋季班) ", "創意思解(夏季班)", "創新管理", "動作分析—了解身體,了解自己", "動力學(冬季班)", "動力學(春季班)", "動力學(秋季班)", "化學", "化學 - 第二波!", "古韻新妍:大家作伙來吟詩", "台灣歷史與文化", "台灣歷史與文化(103-2)", "台灣鳥獸誌", "國際英文新聞導讀", "在食安風暴下如何趨吉避凶", "地球從誕生到演化", "天才李白", "宜蘭歷史踏查", "小型風力機系統與國際認證", "小型風力機系統與國際認證 (104 秋季班)", "居家照顧實務行動磨課師課程服務應用", "工程數學 (Engineering Mathematics)", "從社企「力」到社企「利」", "微積分預備課程", "微積分預備課程 - 第二波!", "愛情心理學", "攝影趣", "教師專業素養", "數位匯流 Digital Convergence", "數位學習導論與實務", "數位邏輯設計 (Digital Logic Design)", "新世代網際網路(IPv6)整合技術", "易數邏輯", "曠世名琴訴說的故事", "書法e動—文字的生命律動", "會計學原理", "楚漢相爭之職場競爭力", "海洋之窗-台灣的海洋故事書", "海洋之窗-海洋博物館和它們所聯結的海洋世界", "海洋之窗-海洋博物館和它們所聯結的海洋世界 ", "漫步在雲端", "漫話漫畫", "為公司把把脈", "烘焙學概論", "無線感測網路概論", "無線網路與行動計算", "片刻體物-行動中學物理", "物聯網創新應用架構與案例", "物聯網基礎架構與應用簡介", "物聯網感測器與感測網路設計", "物聯網概論", "物聯網概論2015", "物聯網無線傳輸技術與應用", "環境科技與保護", "生命教育-生命不設限", "生態旅遊Ecotourism", "生活中無所不在的物理", "用藥快餐車", "科學與倫理 (2015秋季班)", "科學與倫理(夏季班)", "科學計算", "科技世代之職場人際關係與應變", "程式設計(三)", "第十一屆徵文獎頒獎典禮", "統計學", "統計學(一)", "綠能生活", "網路安全 Network Security", "網路安全 Network Security 10209", "網際網路素養科普講座", "臨床藥學講座", "臨床藥學講座(II)", "臨床藥學講座III", "臺灣歷史與文化(104-1)", "臺灣當代藝術與視覺文化鑑賞", "臺灣美食的日語筆記", "色彩照明影像科技概論", "英語課室的戲劇表演", "華文戲劇概論", "華越雙語教學", "華越雙語教學(103-2)", "行動磨課師【曠世名琴訴說的故事】", "西遊記", "西遊記(2015秋季班)", "計算方法設計與分析2012", "計算方法設計與分析201309", "計算機程式設計 C Programming", "計算機程式設計(一)", "計算機程式設計(二)", "計算機結構 Computer Architecture", "計算機網路 (Computer Networks)", "計算機網路概論 (Introduction to Computer Networks)", "計算機網路概論 Intro. to Computer Networks", "設計愛上IT:IT產品創新策略的設計思考(Design Thinking) 觀念與應用", "論語中的生活大師", "資料庫系統 Database Systems", "資料結構", "走進臺灣戲曲大觀園", "運用智慧系統創造企業新價值(NEW)", "醫學資訊學", "閱讀以色列", "閱讀研究 Reading Studies for Schools", "防天災保平安", "雲端技術及網路服務認證", "雲端教學設計與實務應用", "雲端運算", "電腦安全概論", "音樂基礎訓練 (2015秋季班)", "音樂基礎訓練(夏季班)", "飲食與生活文化", "飲食與生活文化(104-1)", "養氣方程式", "餐旅會計學", "高等字串比對演算法 Advanced String Matching Algorithms", ""自媒體"傳播", "(104春季班)海洋之窗-海洋博物館和它們所聯結的海洋世界", "(104秋季班)海洋之窗-台灣的海洋故事書", "(104秋季班)海洋之窗-海洋博物館和它們所聯結的海洋世界"]
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'vcr'
|
3
|
+
require 'webmock/minitest'
|
4
|
+
require './lib/coursesdesc.rb'
|
5
|
+
require './spec/courses_name_test_data.rb'
|
6
|
+
require './spec/urls_test_data.rb'
|
7
|
+
|
8
|
+
VCR.configure do |config|
|
9
|
+
config.cassette_library_dir = 'spec/fixtures/vcr_cassettes'
|
10
|
+
config.hook_into :webmock
|
11
|
+
end
|
12
|
+
|
13
|
+
VCR.use_cassette('courses') do
|
14
|
+
sc = KiwiScraper::ShareCourse.new
|
15
|
+
describe 'Courses desc scrap' do
|
16
|
+
it 'should return an array include name of courses' do
|
17
|
+
courses_names = sc.course_name
|
18
|
+
courses_names.sort.must_equal COURSES_NAME_LIST.sort
|
19
|
+
end
|
20
|
+
it 'should return an array include name of url' do
|
21
|
+
courses_urls = sc.course_url
|
22
|
+
courses_urls.sort.must_equal URLS_LIST.sort
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|