osu-cc-scraper 1.0.4 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/osu-cc-scraper.rb +1 -0
- data/lib/osu-cc-scraper/course.rb +80 -33
- data/lib/osu-cc-scraper/department.rb +33 -35
- data/lib/osu-cc-scraper/section.rb +3 -74
- data/lib/osu-cc-scraper/university.rb +37 -0
- metadata +40 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 71df7f91c3049b5d62323435cb066f324c2882ab
|
4
|
+
data.tar.gz: 02eb25347ade8940adabd831f87cbd631c943f1b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1a2017bbf88c24da39b941bbd69bacc3a8cf5fe1309e28a530649bbac78de660a17d60b6b068e91e7d037f05950d28cd33e7f530b9765acc1ee4081c683061fd
|
7
|
+
data.tar.gz: 9573bdcb5255459cdfda6072ec943d9001915ef6fc97860177fa0bd3358acd145bbb75335372d7f8267bad1bccbad60df247cc4fc32b399f41f4643d7657e127
|
data/lib/osu-cc-scraper.rb
CHANGED
@@ -1,43 +1,90 @@
|
|
1
1
|
require "open-uri"
|
2
|
-
require "
|
2
|
+
require "oga"
|
3
|
+
|
3
4
|
require "osu-cc-scraper/section"
|
4
5
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
attr_accessor :name
|
12
|
-
|
13
|
-
# @return [OsuCcScraper::Course]
|
14
|
-
def initialize(args)
|
15
|
-
args.each do |k,v|
|
16
|
-
instance_variable_set("@#{k}", v) unless v.nil?
|
6
|
+
module OsuCcScraper
|
7
|
+
class Course < Struct.new(:subject_code, :course_number, :name)
|
8
|
+
|
9
|
+
def sections
|
10
|
+
html = fetch_sections
|
11
|
+
parse_sections(html)
|
17
12
|
end
|
18
|
-
end
|
19
13
|
|
20
|
-
|
21
|
-
def sections
|
22
|
-
html = OsuCcScraper::Section::fetch(self.subject_code, self.course_number)
|
23
|
-
OsuCcScraper::Section::parse(html)
|
24
|
-
end
|
14
|
+
private
|
25
15
|
|
26
|
-
|
16
|
+
SECTIONS_XPATH = "//table[@id='ctl00_ContentPlaceHolder1_SOCListUC1_gvOfferings']//tr[position() > 1]"
|
17
|
+
TITLE_XPATH = "//form/h3"
|
27
18
|
|
28
|
-
|
29
|
-
|
30
|
-
|
19
|
+
def fetch_sections
|
20
|
+
open("#{ENDPOINT}/CourseDetail.aspx?subjectcode=#{subject_code}&coursenumber=#{course_number}").read
|
21
|
+
end
|
22
|
+
|
23
|
+
def parse_sections(html)
|
24
|
+
document = Oga.parse_html(html)
|
25
|
+
|
26
|
+
document.xpath(SECTIONS_XPATH).map { |row|
|
27
|
+
Section.new(
|
28
|
+
parse_department(document),
|
29
|
+
parse_number(document),
|
30
|
+
parse_name(document),
|
31
|
+
parse_term(row),
|
32
|
+
parse_section(row),
|
33
|
+
parse_instructor(row),
|
34
|
+
parse_type(row),
|
35
|
+
parse_status(row),
|
36
|
+
parse_capacity(row),
|
37
|
+
parse_current(row)
|
38
|
+
)
|
39
|
+
}
|
40
|
+
end
|
41
|
+
|
42
|
+
def fetch_column(document, selector)
|
43
|
+
document.xpath(selector)&.text&.delete("\r\n")&.strip
|
44
|
+
end
|
45
|
+
|
46
|
+
def parse_department(document)
|
47
|
+
title = document.xpath(TITLE_XPATH).text.split("\n")
|
48
|
+
title[2].delete("\r\n").strip[0..-2].split(" ")[0]
|
49
|
+
end
|
50
|
+
|
51
|
+
def parse_number(document)
|
52
|
+
title = document.xpath(TITLE_XPATH).text.split("\n")
|
53
|
+
title[2].split(" ")[1][0..-2]
|
54
|
+
end
|
55
|
+
|
56
|
+
def parse_name(document)
|
57
|
+
title = document.xpath(TITLE_XPATH).text.split("\n")
|
58
|
+
title[3].strip
|
59
|
+
end
|
60
|
+
|
61
|
+
def parse_term(row)
|
62
|
+
fetch_column(row, "td[position() = 1]")
|
63
|
+
end
|
64
|
+
|
65
|
+
def parse_section(row)
|
66
|
+
fetch_column(row, "td[position() = 3]")
|
67
|
+
end
|
68
|
+
|
69
|
+
def parse_instructor(row)
|
70
|
+
fetch_column(row, "td[position() = 6]")
|
71
|
+
end
|
72
|
+
|
73
|
+
def parse_type(row)
|
74
|
+
fetch_column(row, "td[position() = 11]")
|
75
|
+
end
|
76
|
+
|
77
|
+
def parse_status(row)
|
78
|
+
fetch_column(row, "td[position() = 12]")
|
79
|
+
end
|
80
|
+
|
81
|
+
def parse_capacity(row)
|
82
|
+
fetch_column(row, "td[position() = 13]")&.to_i
|
83
|
+
end
|
84
|
+
|
85
|
+
def parse_current(row)
|
86
|
+
fetch_column(row, "td[position() = 14]")&.to_i
|
87
|
+
end
|
31
88
|
|
32
|
-
def self.parse(html)
|
33
|
-
ng = Nokogiri::HTML(html)
|
34
|
-
ng.css("tr td strong a:last").map{ |course|
|
35
|
-
# NE 311H INTRODUCTION TO THERMAL-FLUID SCIENCES (4)
|
36
|
-
parts = course.text.split(' ')
|
37
|
-
OsuCcScraper::Course.new \
|
38
|
-
subject_code: parts[0],
|
39
|
-
course_number: parts[1],
|
40
|
-
name: parts[2..parts.length-2].join(" ")
|
41
|
-
}
|
42
89
|
end
|
43
90
|
end
|
@@ -1,47 +1,45 @@
|
|
1
1
|
require "open-uri"
|
2
|
-
require "
|
2
|
+
require "oga"
|
3
|
+
|
3
4
|
require "osu-cc-scraper/course"
|
4
5
|
|
5
|
-
|
6
|
-
|
7
|
-
attr_accessor :name
|
8
|
-
# @return [String]
|
9
|
-
attr_accessor :subject_code
|
6
|
+
module OsuCcScraper
|
7
|
+
class Department < Struct.new(:name, :subject_code)
|
10
8
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
instance_variable_set("@#{k}", v) unless v.nil?
|
9
|
+
def courses
|
10
|
+
html = fetch_courses
|
11
|
+
parse_courses(html)
|
15
12
|
end
|
16
|
-
end
|
17
13
|
|
18
|
-
|
19
|
-
def self.all
|
20
|
-
html = OsuCcScraper::Department::fetch
|
21
|
-
OsuCcScraper::Department::parse(html)
|
22
|
-
end
|
14
|
+
private
|
23
15
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
OsuCcScraper::Course::parse(html)
|
28
|
-
end
|
16
|
+
def fetch_courses
|
17
|
+
open("#{ENDPOINT}/CourseList.aspx?subjectcode=#{subject_code}&level=undergrad&campus=corvallis").read
|
18
|
+
end
|
29
19
|
|
30
|
-
|
20
|
+
def parse_courses(html)
|
21
|
+
document = Oga.parse_html(html)
|
22
|
+
document.xpath("//tr//td//strong/a[last()]").map { |row|
|
23
|
+
Course.new(
|
24
|
+
parse_course_subject_code(row),
|
25
|
+
parse_course_course_number(row),
|
26
|
+
parse_course_name(row)
|
27
|
+
)
|
28
|
+
}
|
29
|
+
end
|
31
30
|
|
32
|
-
|
33
|
-
|
34
|
-
|
31
|
+
# NE 311H INTRODUCTION TO THERMAL-FLUID SCIENCES (4)
|
32
|
+
def parse_course_subject_code(row)
|
33
|
+
row.text.split(' ')[0]
|
34
|
+
end
|
35
|
+
|
36
|
+
def parse_course_course_number(row)
|
37
|
+
row.text.split(' ')[1]
|
38
|
+
end
|
39
|
+
|
40
|
+
def parse_course_name(row)
|
41
|
+
row.text.split(' ')[2..-2].join(" ")
|
42
|
+
end
|
35
43
|
|
36
|
-
def self.parse(html)
|
37
|
-
ng = Nokogiri::HTML(html)
|
38
|
-
ng.xpath("//tr/td/font/a").map{ |department|
|
39
|
-
OsuCcScraper::Department.new({
|
40
|
-
subject_code: department.content[/\(.*?\)/][1..-2],
|
41
|
-
name: department.content[/([^(]+)/].strip
|
42
|
-
})
|
43
|
-
}.sort { |a,b|
|
44
|
-
a.subject_code.downcase <=> b.subject_code.downcase
|
45
|
-
}
|
46
44
|
end
|
47
45
|
end
|
@@ -1,76 +1,5 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
class OsuCcScraper::Section
|
5
|
-
# @return [String]
|
6
|
-
attr_accessor :department
|
7
|
-
# @return [String]
|
8
|
-
attr_accessor :number
|
9
|
-
# @return [String]
|
10
|
-
attr_accessor :name
|
11
|
-
# @return [String]
|
12
|
-
attr_accessor :term
|
13
|
-
# @return [String]
|
14
|
-
attr_accessor :section
|
15
|
-
# @return [String]
|
16
|
-
attr_accessor :instructor
|
17
|
-
# @return [String]
|
18
|
-
attr_accessor :type
|
19
|
-
# @return [String]
|
20
|
-
attr_accessor :status
|
21
|
-
# @return [String]
|
22
|
-
attr_accessor :capacity
|
23
|
-
# @return [String]
|
24
|
-
attr_accessor :availability
|
25
|
-
|
26
|
-
# @return [OsuCcScraper::Section]
|
27
|
-
def initialize(args)
|
28
|
-
args.each do |k,v|
|
29
|
-
instance_variable_set("@#{k}", v) unless v.nil?
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
# @return [Array]
|
34
|
-
def to_a
|
35
|
-
[@department, @number, @name, @term, @section, @instructor, @type, @status, @capacity, @availability]
|
36
|
-
end
|
37
|
-
|
38
|
-
private
|
39
|
-
|
40
|
-
def self.fetch(subject_code, course_number)
|
41
|
-
open("#{OsuCcScraper::ENDPOINT}/CourseDetail.aspx?subjectcode=#{subject_code}&coursenumber=#{course_number}").read
|
42
|
-
end
|
43
|
-
|
44
|
-
def self.parse(html)
|
45
|
-
sections = []
|
46
|
-
|
47
|
-
ng = Nokogiri::HTML(html)
|
48
|
-
rows = ng.xpath('//table[@id="ctl00_ContentPlaceHolder1_SOCListUC1_gvOfferings"]/tr[position() > 1]')
|
49
|
-
rows.each_with_index do |row, key|
|
50
|
-
title = ng.at_xpath('//form/h3').content.split("\n")
|
51
|
-
query = "//table[@id='ctl00_ContentPlaceHolder1_SOCListUC1_gvOfferings']"
|
52
|
-
|
53
|
-
sections << OsuCcScraper::Section.new({
|
54
|
-
:department => title[2].delete("\r\n").strip[0..-2].split(" ")[0],
|
55
|
-
:number => title[2].split(" ")[1][0..-2],
|
56
|
-
:name => title[3].strip,
|
57
|
-
:term => at_xpath_or_null(ng, "#{query}/tr[position() = #{key+2}]/td[position() = 1]"),
|
58
|
-
:section => at_xpath_or_null(ng, "#{query}/tr[position() = #{key+2}]/td[position() = 3]"),
|
59
|
-
:instructor => at_xpath_or_null(ng, "#{query}/tr[position() = #{key+2}]/td[position() = 6]"),
|
60
|
-
:type => at_xpath_or_null(ng, "#{query}/tr[position() = #{key+2}]/td[position() = 11]"),
|
61
|
-
:status => at_xpath_or_null(ng, "#{query}/tr[position() = #{key+2}]/td[position() = 12]"),
|
62
|
-
:capacity => at_xpath_or_null(ng, "#{query}/tr[position() = #{key+2}]/td[position() = 13]"),
|
63
|
-
:availability => at_xpath_or_null(ng, "#{query}/tr[position() = #{key+2}]/td[position() = 14]")
|
64
|
-
})
|
65
|
-
end
|
66
|
-
sections
|
67
|
-
end
|
68
|
-
|
69
|
-
def self.at_xpath_or_null(document, selector)
|
70
|
-
if document.at_xpath(selector) != nil
|
71
|
-
return document.at_xpath(selector).content.delete("\r\n").strip
|
72
|
-
else
|
73
|
-
return nil
|
74
|
-
end
|
1
|
+
module OsuCcScraper
|
2
|
+
class Section < Struct.new(:department, :number, :name, :term, :section,
|
3
|
+
:instructor, :type, :status, :capacity, :current)
|
75
4
|
end
|
76
5
|
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require "open-uri"
|
2
|
+
require "oga"
|
3
|
+
|
4
|
+
require "osu-cc-scraper/department"
|
5
|
+
|
6
|
+
module OsuCcScraper
|
7
|
+
class University
|
8
|
+
|
9
|
+
def departments
|
10
|
+
html = fetch_departments
|
11
|
+
parse_departments(html)
|
12
|
+
end
|
13
|
+
|
14
|
+
def fetch_departments
|
15
|
+
open("#{ENDPOINT}/CourseDescription.aspx?level=undergrad").read
|
16
|
+
end
|
17
|
+
|
18
|
+
def parse_departments(html)
|
19
|
+
ng = Oga.parse_html(html)
|
20
|
+
ng.xpath("//tr/td/font/a").map { |row|
|
21
|
+
Department.new(
|
22
|
+
parse_department_name(row),
|
23
|
+
parse_department_subject_code(row),
|
24
|
+
)
|
25
|
+
}
|
26
|
+
end
|
27
|
+
|
28
|
+
def parse_department_subject_code(row)
|
29
|
+
row.text[/\(.*?\)/][1..-2]
|
30
|
+
end
|
31
|
+
|
32
|
+
def parse_department_name(row)
|
33
|
+
row.text[/([^(]+)/].strip
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
metadata
CHANGED
@@ -1,30 +1,59 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: osu-cc-scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jonah George
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-02-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: oga
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 1.
|
19
|
+
version: 2.1.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 1.
|
27
|
-
|
26
|
+
version: 2.1.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
description: A script to gather course data from Oregon State University's Course
|
56
|
+
Catalog.
|
28
57
|
email: jonah.george@me.com
|
29
58
|
executables: []
|
30
59
|
extensions: []
|
@@ -34,6 +63,7 @@ files:
|
|
34
63
|
- lib/osu-cc-scraper/course.rb
|
35
64
|
- lib/osu-cc-scraper/department.rb
|
36
65
|
- lib/osu-cc-scraper/section.rb
|
66
|
+
- lib/osu-cc-scraper/university.rb
|
37
67
|
homepage: https://github.com/jonahgeorge/osu-cc-scraper
|
38
68
|
licenses:
|
39
69
|
- MIT
|
@@ -46,7 +76,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
46
76
|
requirements:
|
47
77
|
- - ">="
|
48
78
|
- !ruby/object:Gem::Version
|
49
|
-
version:
|
79
|
+
version: 2.3.0
|
50
80
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
51
81
|
requirements:
|
52
82
|
- - ">="
|
@@ -54,7 +84,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
54
84
|
version: '0'
|
55
85
|
requirements: []
|
56
86
|
rubyforge_project:
|
57
|
-
rubygems_version: 2.
|
87
|
+
rubygems_version: 2.5.1
|
58
88
|
signing_key:
|
59
89
|
specification_version: 4
|
60
90
|
summary: OSU Course Catalog Scraper
|