osu-cc-scraper 1.0.4 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 00e547e50b759e05a38eb3251f333ef13acda4bb
4
- data.tar.gz: 2fe51e85192c1e79df01eba6392d680c1979c8ae
3
+ metadata.gz: 71df7f91c3049b5d62323435cb066f324c2882ab
4
+ data.tar.gz: 02eb25347ade8940adabd831f87cbd631c943f1b
5
5
  SHA512:
6
- metadata.gz: 6b07ba4686e5d1ff8fad54f92d4dbf13372fc4cba300d3c71fa76683fcb832703a04f7db8e1b0b0e39b263df67cc6ee780c49f303b4f6b14ab1d5bc40918948e
7
- data.tar.gz: 2ac1f329ec36729cbc347fe3cd59cb639a5867dde8ed9e13e284e1b3e37d71b3c67cdf497687915241f46ced0cf185e18ad1344dcaac4917ddf2357b6ce9edbe
6
+ metadata.gz: 1a2017bbf88c24da39b941bbd69bacc3a8cf5fe1309e28a530649bbac78de660a17d60b6b068e91e7d037f05950d28cd33e7f530b9765acc1ee4081c683061fd
7
+ data.tar.gz: 9573bdcb5255459cdfda6072ec943d9001915ef6fc97860177fa0bd3358acd145bbb75335372d7f8267bad1bccbad60df247cc4fc32b399f41f4643d7657e127
@@ -2,6 +2,7 @@ module OsuCcScraper
2
2
  ENDPOINT = "http://catalog.oregonstate.edu"
3
3
  end
4
4
 
5
+ require "osu-cc-scraper/university"
5
6
  require "osu-cc-scraper/department"
6
7
  require "osu-cc-scraper/course"
7
8
  require "osu-cc-scraper/section"
@@ -1,43 +1,90 @@
1
1
  require "open-uri"
2
- require "nokogiri"
2
+ require "oga"
3
+
3
4
  require "osu-cc-scraper/section"
4
5
 
5
- class OsuCcScraper::Course
6
- # @return [String]
7
- attr_accessor :subject_code
8
- # @return [String]
9
- attr_accessor :course_number
10
- # @return [String]
11
- attr_accessor :name
12
-
13
- # @return [OsuCcScraper::Course]
14
- def initialize(args)
15
- args.each do |k,v|
16
- instance_variable_set("@#{k}", v) unless v.nil?
6
+ module OsuCcScraper
7
+ class Course < Struct.new(:subject_code, :course_number, :name)
8
+
9
+ def sections
10
+ html = fetch_sections
11
+ parse_sections(html)
17
12
  end
18
- end
19
13
 
20
- # @return [Array<OsuCcScraper::Section>]
21
- def sections
22
- html = OsuCcScraper::Section::fetch(self.subject_code, self.course_number)
23
- OsuCcScraper::Section::parse(html)
24
- end
14
+ private
25
15
 
26
- private
16
+ SECTIONS_XPATH = "//table[@id='ctl00_ContentPlaceHolder1_SOCListUC1_gvOfferings']//tr[position() > 1]"
17
+ TITLE_XPATH = "//form/h3"
27
18
 
28
- def self.fetch(subject_code)
29
- open("#{OsuCcScraper::ENDPOINT}/CourseList.aspx?subjectcode=#{subject_code}&level=undergrad&campus=corvallis").read
30
- end
19
+ def fetch_sections
20
+ open("#{ENDPOINT}/CourseDetail.aspx?subjectcode=#{subject_code}&coursenumber=#{course_number}").read
21
+ end
22
+
23
+ def parse_sections(html)
24
+ document = Oga.parse_html(html)
25
+
26
+ document.xpath(SECTIONS_XPATH).map { |row|
27
+ Section.new(
28
+ parse_department(document),
29
+ parse_number(document),
30
+ parse_name(document),
31
+ parse_term(row),
32
+ parse_section(row),
33
+ parse_instructor(row),
34
+ parse_type(row),
35
+ parse_status(row),
36
+ parse_capacity(row),
37
+ parse_current(row)
38
+ )
39
+ }
40
+ end
41
+
42
+ def fetch_column(document, selector)
43
+ document.xpath(selector)&.text&.delete("\r\n")&.strip
44
+ end
45
+
46
+ def parse_department(document)
47
+ title = document.xpath(TITLE_XPATH).text.split("\n")
48
+ title[2].delete("\r\n").strip[0..-2].split(" ")[0]
49
+ end
50
+
51
+ def parse_number(document)
52
+ title = document.xpath(TITLE_XPATH).text.split("\n")
53
+ title[2].split(" ")[1][0..-2]
54
+ end
55
+
56
+ def parse_name(document)
57
+ title = document.xpath(TITLE_XPATH).text.split("\n")
58
+ title[3].strip
59
+ end
60
+
61
+ def parse_term(row)
62
+ fetch_column(row, "td[position() = 1]")
63
+ end
64
+
65
+ def parse_section(row)
66
+ fetch_column(row, "td[position() = 3]")
67
+ end
68
+
69
+ def parse_instructor(row)
70
+ fetch_column(row, "td[position() = 6]")
71
+ end
72
+
73
+ def parse_type(row)
74
+ fetch_column(row, "td[position() = 11]")
75
+ end
76
+
77
+ def parse_status(row)
78
+ fetch_column(row, "td[position() = 12]")
79
+ end
80
+
81
+ def parse_capacity(row)
82
+ fetch_column(row, "td[position() = 13]")&.to_i
83
+ end
84
+
85
+ def parse_current(row)
86
+ fetch_column(row, "td[position() = 14]")&.to_i
87
+ end
31
88
 
32
- def self.parse(html)
33
- ng = Nokogiri::HTML(html)
34
- ng.css("tr td strong a:last").map{ |course|
35
- # NE 311H INTRODUCTION TO THERMAL-FLUID SCIENCES (4)
36
- parts = course.text.split(' ')
37
- OsuCcScraper::Course.new \
38
- subject_code: parts[0],
39
- course_number: parts[1],
40
- name: parts[2..parts.length-2].join(" ")
41
- }
42
89
  end
43
90
  end
@@ -1,47 +1,45 @@
1
1
  require "open-uri"
2
- require "nokogiri"
2
+ require "oga"
3
+
3
4
  require "osu-cc-scraper/course"
4
5
 
5
- class OsuCcScraper::Department
6
- # @return [String]
7
- attr_accessor :name
8
- # @return [String]
9
- attr_accessor :subject_code
6
+ module OsuCcScraper
7
+ class Department < Struct.new(:name, :subject_code)
10
8
 
11
- # @return [OsuCcScraper::Department]
12
- def initialize(args)
13
- args.each do |k,v|
14
- instance_variable_set("@#{k}", v) unless v.nil?
9
+ def courses
10
+ html = fetch_courses
11
+ parse_courses(html)
15
12
  end
16
- end
17
13
 
18
- # @return [Array<OsuCcScraper::Department>]
19
- def self.all
20
- html = OsuCcScraper::Department::fetch
21
- OsuCcScraper::Department::parse(html)
22
- end
14
+ private
23
15
 
24
- # @return [Array<OsuCcScraper::Course>]
25
- def courses
26
- html = OsuCcScraper::Course::fetch(self.subject_code)
27
- OsuCcScraper::Course::parse(html)
28
- end
16
+ def fetch_courses
17
+ open("#{ENDPOINT}/CourseList.aspx?subjectcode=#{subject_code}&level=undergrad&campus=corvallis").read
18
+ end
29
19
 
30
- private
20
+ def parse_courses(html)
21
+ document = Oga.parse_html(html)
22
+ document.xpath("//tr//td//strong/a[last()]").map { |row|
23
+ Course.new(
24
+ parse_course_subject_code(row),
25
+ parse_course_course_number(row),
26
+ parse_course_name(row)
27
+ )
28
+ }
29
+ end
31
30
 
32
- def self.fetch
33
- open("#{OsuCcScraper::ENDPOINT}/CourseDescription.aspx?level=undergrad").read
34
- end
31
+ # NE 311H INTRODUCTION TO THERMAL-FLUID SCIENCES (4)
32
+ def parse_course_subject_code(row)
33
+ row.text.split(' ')[0]
34
+ end
35
+
36
+ def parse_course_course_number(row)
37
+ row.text.split(' ')[1]
38
+ end
39
+
40
+ def parse_course_name(row)
41
+ row.text.split(' ')[2..-2].join(" ")
42
+ end
35
43
 
36
- def self.parse(html)
37
- ng = Nokogiri::HTML(html)
38
- ng.xpath("//tr/td/font/a").map{ |department|
39
- OsuCcScraper::Department.new({
40
- subject_code: department.content[/\(.*?\)/][1..-2],
41
- name: department.content[/([^(]+)/].strip
42
- })
43
- }.sort { |a,b|
44
- a.subject_code.downcase <=> b.subject_code.downcase
45
- }
46
44
  end
47
45
  end
@@ -1,76 +1,5 @@
1
- require "open-uri"
2
- require "nokogiri"
3
-
4
- class OsuCcScraper::Section
5
- # @return [String]
6
- attr_accessor :department
7
- # @return [String]
8
- attr_accessor :number
9
- # @return [String]
10
- attr_accessor :name
11
- # @return [String]
12
- attr_accessor :term
13
- # @return [String]
14
- attr_accessor :section
15
- # @return [String]
16
- attr_accessor :instructor
17
- # @return [String]
18
- attr_accessor :type
19
- # @return [String]
20
- attr_accessor :status
21
- # @return [String]
22
- attr_accessor :capacity
23
- # @return [String]
24
- attr_accessor :availability
25
-
26
- # @return [OsuCcScraper::Section]
27
- def initialize(args)
28
- args.each do |k,v|
29
- instance_variable_set("@#{k}", v) unless v.nil?
30
- end
31
- end
32
-
33
- # @return [Array]
34
- def to_a
35
- [@department, @number, @name, @term, @section, @instructor, @type, @status, @capacity, @availability]
36
- end
37
-
38
- private
39
-
40
- def self.fetch(subject_code, course_number)
41
- open("#{OsuCcScraper::ENDPOINT}/CourseDetail.aspx?subjectcode=#{subject_code}&coursenumber=#{course_number}").read
42
- end
43
-
44
- def self.parse(html)
45
- sections = []
46
-
47
- ng = Nokogiri::HTML(html)
48
- rows = ng.xpath('//table[@id="ctl00_ContentPlaceHolder1_SOCListUC1_gvOfferings"]/tr[position() > 1]')
49
- rows.each_with_index do |row, key|
50
- title = ng.at_xpath('//form/h3').content.split("\n")
51
- query = "//table[@id='ctl00_ContentPlaceHolder1_SOCListUC1_gvOfferings']"
52
-
53
- sections << OsuCcScraper::Section.new({
54
- :department => title[2].delete("\r\n").strip[0..-2].split(" ")[0],
55
- :number => title[2].split(" ")[1][0..-2],
56
- :name => title[3].strip,
57
- :term => at_xpath_or_null(ng, "#{query}/tr[position() = #{key+2}]/td[position() = 1]"),
58
- :section => at_xpath_or_null(ng, "#{query}/tr[position() = #{key+2}]/td[position() = 3]"),
59
- :instructor => at_xpath_or_null(ng, "#{query}/tr[position() = #{key+2}]/td[position() = 6]"),
60
- :type => at_xpath_or_null(ng, "#{query}/tr[position() = #{key+2}]/td[position() = 11]"),
61
- :status => at_xpath_or_null(ng, "#{query}/tr[position() = #{key+2}]/td[position() = 12]"),
62
- :capacity => at_xpath_or_null(ng, "#{query}/tr[position() = #{key+2}]/td[position() = 13]"),
63
- :availability => at_xpath_or_null(ng, "#{query}/tr[position() = #{key+2}]/td[position() = 14]")
64
- })
65
- end
66
- sections
67
- end
68
-
69
- def self.at_xpath_or_null(document, selector)
70
- if document.at_xpath(selector) != nil
71
- return document.at_xpath(selector).content.delete("\r\n").strip
72
- else
73
- return nil
74
- end
1
+ module OsuCcScraper
2
+ class Section < Struct.new(:department, :number, :name, :term, :section,
3
+ :instructor, :type, :status, :capacity, :current)
75
4
  end
76
5
  end
@@ -0,0 +1,37 @@
1
+ require "open-uri"
2
+ require "oga"
3
+
4
+ require "osu-cc-scraper/department"
5
+
6
+ module OsuCcScraper
7
+ class University
8
+
9
+ def departments
10
+ html = fetch_departments
11
+ parse_departments(html)
12
+ end
13
+
14
+ def fetch_departments
15
+ open("#{ENDPOINT}/CourseDescription.aspx?level=undergrad").read
16
+ end
17
+
18
+ def parse_departments(html)
19
+ ng = Oga.parse_html(html)
20
+ ng.xpath("//tr/td/font/a").map { |row|
21
+ Department.new(
22
+ parse_department_name(row),
23
+ parse_department_subject_code(row),
24
+ )
25
+ }
26
+ end
27
+
28
+ def parse_department_subject_code(row)
29
+ row.text[/\(.*?\)/][1..-2]
30
+ end
31
+
32
+ def parse_department_name(row)
33
+ row.text[/([^(]+)/].strip
34
+ end
35
+
36
+ end
37
+ end
metadata CHANGED
@@ -1,30 +1,59 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: osu-cc-scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.4
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jonah George
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-12-18 00:00:00.000000000 Z
11
+ date: 2016-02-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: nokogiri
14
+ name: oga
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - '='
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 1.6.6.2
19
+ version: 2.1.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - '='
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 1.6.6.2
27
- description: A script to gather course data from Oregon State's Course Catalog.
26
+ version: 2.1.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: A script to gather course data from Oregon State University's Course
56
+ Catalog.
28
57
  email: jonah.george@me.com
29
58
  executables: []
30
59
  extensions: []
@@ -34,6 +63,7 @@ files:
34
63
  - lib/osu-cc-scraper/course.rb
35
64
  - lib/osu-cc-scraper/department.rb
36
65
  - lib/osu-cc-scraper/section.rb
66
+ - lib/osu-cc-scraper/university.rb
37
67
  homepage: https://github.com/jonahgeorge/osu-cc-scraper
38
68
  licenses:
39
69
  - MIT
@@ -46,7 +76,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
46
76
  requirements:
47
77
  - - ">="
48
78
  - !ruby/object:Gem::Version
49
- version: '0'
79
+ version: 2.3.0
50
80
  required_rubygems_version: !ruby/object:Gem::Requirement
51
81
  requirements:
52
82
  - - ">="
@@ -54,7 +84,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
54
84
  version: '0'
55
85
  requirements: []
56
86
  rubyforge_project:
57
- rubygems_version: 2.4.5.1
87
+ rubygems_version: 2.5.1
58
88
  signing_key:
59
89
  specification_version: 4
60
90
  summary: OSU Course Catalog Scraper