osu-cc-scraper 1.0.4 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 00e547e50b759e05a38eb3251f333ef13acda4bb
4
- data.tar.gz: 2fe51e85192c1e79df01eba6392d680c1979c8ae
3
+ metadata.gz: 71df7f91c3049b5d62323435cb066f324c2882ab
4
+ data.tar.gz: 02eb25347ade8940adabd831f87cbd631c943f1b
5
5
  SHA512:
6
- metadata.gz: 6b07ba4686e5d1ff8fad54f92d4dbf13372fc4cba300d3c71fa76683fcb832703a04f7db8e1b0b0e39b263df67cc6ee780c49f303b4f6b14ab1d5bc40918948e
7
- data.tar.gz: 2ac1f329ec36729cbc347fe3cd59cb639a5867dde8ed9e13e284e1b3e37d71b3c67cdf497687915241f46ced0cf185e18ad1344dcaac4917ddf2357b6ce9edbe
6
+ metadata.gz: 1a2017bbf88c24da39b941bbd69bacc3a8cf5fe1309e28a530649bbac78de660a17d60b6b068e91e7d037f05950d28cd33e7f530b9765acc1ee4081c683061fd
7
+ data.tar.gz: 9573bdcb5255459cdfda6072ec943d9001915ef6fc97860177fa0bd3358acd145bbb75335372d7f8267bad1bccbad60df247cc4fc32b399f41f4643d7657e127
@@ -2,6 +2,7 @@ module OsuCcScraper
2
2
  ENDPOINT = "http://catalog.oregonstate.edu"
3
3
  end
4
4
 
5
+ require "osu-cc-scraper/university"
5
6
  require "osu-cc-scraper/department"
6
7
  require "osu-cc-scraper/course"
7
8
  require "osu-cc-scraper/section"
@@ -1,43 +1,90 @@
1
1
  require "open-uri"
2
- require "nokogiri"
2
+ require "oga"
3
+
3
4
  require "osu-cc-scraper/section"
4
5
 
5
- class OsuCcScraper::Course
6
- # @return [String]
7
- attr_accessor :subject_code
8
- # @return [String]
9
- attr_accessor :course_number
10
- # @return [String]
11
- attr_accessor :name
12
-
13
- # @return [OsuCcScraper::Course]
14
- def initialize(args)
15
- args.each do |k,v|
16
- instance_variable_set("@#{k}", v) unless v.nil?
6
+ module OsuCcScraper
7
+ class Course < Struct.new(:subject_code, :course_number, :name)
8
+
9
+ def sections
10
+ html = fetch_sections
11
+ parse_sections(html)
17
12
  end
18
- end
19
13
 
20
- # @return [Array<OsuCcScraper::Section>]
21
- def sections
22
- html = OsuCcScraper::Section::fetch(self.subject_code, self.course_number)
23
- OsuCcScraper::Section::parse(html)
24
- end
14
+ private
25
15
 
26
- private
16
+ SECTIONS_XPATH = "//table[@id='ctl00_ContentPlaceHolder1_SOCListUC1_gvOfferings']//tr[position() > 1]"
17
+ TITLE_XPATH = "//form/h3"
27
18
 
28
- def self.fetch(subject_code)
29
- open("#{OsuCcScraper::ENDPOINT}/CourseList.aspx?subjectcode=#{subject_code}&level=undergrad&campus=corvallis").read
30
- end
19
+ def fetch_sections
20
+ open("#{ENDPOINT}/CourseDetail.aspx?subjectcode=#{subject_code}&coursenumber=#{course_number}").read
21
+ end
22
+
23
+ def parse_sections(html)
24
+ document = Oga.parse_html(html)
25
+
26
+ document.xpath(SECTIONS_XPATH).map { |row|
27
+ Section.new(
28
+ parse_department(document),
29
+ parse_number(document),
30
+ parse_name(document),
31
+ parse_term(row),
32
+ parse_section(row),
33
+ parse_instructor(row),
34
+ parse_type(row),
35
+ parse_status(row),
36
+ parse_capacity(row),
37
+ parse_current(row)
38
+ )
39
+ }
40
+ end
41
+
42
+ def fetch_column(document, selector)
43
+ document.xpath(selector)&.text&.delete("\r\n")&.strip
44
+ end
45
+
46
+ def parse_department(document)
47
+ title = document.xpath(TITLE_XPATH).text.split("\n")
48
+ title[2].delete("\r\n").strip[0..-2].split(" ")[0]
49
+ end
50
+
51
+ def parse_number(document)
52
+ title = document.xpath(TITLE_XPATH).text.split("\n")
53
+ title[2].split(" ")[1][0..-2]
54
+ end
55
+
56
+ def parse_name(document)
57
+ title = document.xpath(TITLE_XPATH).text.split("\n")
58
+ title[3].strip
59
+ end
60
+
61
+ def parse_term(row)
62
+ fetch_column(row, "td[position() = 1]")
63
+ end
64
+
65
+ def parse_section(row)
66
+ fetch_column(row, "td[position() = 3]")
67
+ end
68
+
69
+ def parse_instructor(row)
70
+ fetch_column(row, "td[position() = 6]")
71
+ end
72
+
73
+ def parse_type(row)
74
+ fetch_column(row, "td[position() = 11]")
75
+ end
76
+
77
+ def parse_status(row)
78
+ fetch_column(row, "td[position() = 12]")
79
+ end
80
+
81
+ def parse_capacity(row)
82
+ fetch_column(row, "td[position() = 13]")&.to_i
83
+ end
84
+
85
+ def parse_current(row)
86
+ fetch_column(row, "td[position() = 14]")&.to_i
87
+ end
31
88
 
32
- def self.parse(html)
33
- ng = Nokogiri::HTML(html)
34
- ng.css("tr td strong a:last").map{ |course|
35
- # NE 311H INTRODUCTION TO THERMAL-FLUID SCIENCES (4)
36
- parts = course.text.split(' ')
37
- OsuCcScraper::Course.new \
38
- subject_code: parts[0],
39
- course_number: parts[1],
40
- name: parts[2..parts.length-2].join(" ")
41
- }
42
89
  end
43
90
  end
@@ -1,47 +1,45 @@
1
1
  require "open-uri"
2
- require "nokogiri"
2
+ require "oga"
3
+
3
4
  require "osu-cc-scraper/course"
4
5
 
5
- class OsuCcScraper::Department
6
- # @return [String]
7
- attr_accessor :name
8
- # @return [String]
9
- attr_accessor :subject_code
6
+ module OsuCcScraper
7
+ class Department < Struct.new(:name, :subject_code)
10
8
 
11
- # @return [OsuCcScraper::Department]
12
- def initialize(args)
13
- args.each do |k,v|
14
- instance_variable_set("@#{k}", v) unless v.nil?
9
+ def courses
10
+ html = fetch_courses
11
+ parse_courses(html)
15
12
  end
16
- end
17
13
 
18
- # @return [Array<OsuCcScraper::Department>]
19
- def self.all
20
- html = OsuCcScraper::Department::fetch
21
- OsuCcScraper::Department::parse(html)
22
- end
14
+ private
23
15
 
24
- # @return [Array<OsuCcScraper::Course>]
25
- def courses
26
- html = OsuCcScraper::Course::fetch(self.subject_code)
27
- OsuCcScraper::Course::parse(html)
28
- end
16
+ def fetch_courses
17
+ open("#{ENDPOINT}/CourseList.aspx?subjectcode=#{subject_code}&level=undergrad&campus=corvallis").read
18
+ end
29
19
 
30
- private
20
+ def parse_courses(html)
21
+ document = Oga.parse_html(html)
22
+ document.xpath("//tr//td//strong/a[last()]").map { |row|
23
+ Course.new(
24
+ parse_course_subject_code(row),
25
+ parse_course_course_number(row),
26
+ parse_course_name(row)
27
+ )
28
+ }
29
+ end
31
30
 
32
- def self.fetch
33
- open("#{OsuCcScraper::ENDPOINT}/CourseDescription.aspx?level=undergrad").read
34
- end
31
+ # NE 311H INTRODUCTION TO THERMAL-FLUID SCIENCES (4)
32
+ def parse_course_subject_code(row)
33
+ row.text.split(' ')[0]
34
+ end
35
+
36
+ def parse_course_course_number(row)
37
+ row.text.split(' ')[1]
38
+ end
39
+
40
+ def parse_course_name(row)
41
+ row.text.split(' ')[2..-2].join(" ")
42
+ end
35
43
 
36
- def self.parse(html)
37
- ng = Nokogiri::HTML(html)
38
- ng.xpath("//tr/td/font/a").map{ |department|
39
- OsuCcScraper::Department.new({
40
- subject_code: department.content[/\(.*?\)/][1..-2],
41
- name: department.content[/([^(]+)/].strip
42
- })
43
- }.sort { |a,b|
44
- a.subject_code.downcase <=> b.subject_code.downcase
45
- }
46
44
  end
47
45
  end
@@ -1,76 +1,5 @@
1
- require "open-uri"
2
- require "nokogiri"
3
-
4
- class OsuCcScraper::Section
5
- # @return [String]
6
- attr_accessor :department
7
- # @return [String]
8
- attr_accessor :number
9
- # @return [String]
10
- attr_accessor :name
11
- # @return [String]
12
- attr_accessor :term
13
- # @return [String]
14
- attr_accessor :section
15
- # @return [String]
16
- attr_accessor :instructor
17
- # @return [String]
18
- attr_accessor :type
19
- # @return [String]
20
- attr_accessor :status
21
- # @return [String]
22
- attr_accessor :capacity
23
- # @return [String]
24
- attr_accessor :availability
25
-
26
- # @return [OsuCcScraper::Section]
27
- def initialize(args)
28
- args.each do |k,v|
29
- instance_variable_set("@#{k}", v) unless v.nil?
30
- end
31
- end
32
-
33
- # @return [Array]
34
- def to_a
35
- [@department, @number, @name, @term, @section, @instructor, @type, @status, @capacity, @availability]
36
- end
37
-
38
- private
39
-
40
- def self.fetch(subject_code, course_number)
41
- open("#{OsuCcScraper::ENDPOINT}/CourseDetail.aspx?subjectcode=#{subject_code}&coursenumber=#{course_number}").read
42
- end
43
-
44
- def self.parse(html)
45
- sections = []
46
-
47
- ng = Nokogiri::HTML(html)
48
- rows = ng.xpath('//table[@id="ctl00_ContentPlaceHolder1_SOCListUC1_gvOfferings"]/tr[position() > 1]')
49
- rows.each_with_index do |row, key|
50
- title = ng.at_xpath('//form/h3').content.split("\n")
51
- query = "//table[@id='ctl00_ContentPlaceHolder1_SOCListUC1_gvOfferings']"
52
-
53
- sections << OsuCcScraper::Section.new({
54
- :department => title[2].delete("\r\n").strip[0..-2].split(" ")[0],
55
- :number => title[2].split(" ")[1][0..-2],
56
- :name => title[3].strip,
57
- :term => at_xpath_or_null(ng, "#{query}/tr[position() = #{key+2}]/td[position() = 1]"),
58
- :section => at_xpath_or_null(ng, "#{query}/tr[position() = #{key+2}]/td[position() = 3]"),
59
- :instructor => at_xpath_or_null(ng, "#{query}/tr[position() = #{key+2}]/td[position() = 6]"),
60
- :type => at_xpath_or_null(ng, "#{query}/tr[position() = #{key+2}]/td[position() = 11]"),
61
- :status => at_xpath_or_null(ng, "#{query}/tr[position() = #{key+2}]/td[position() = 12]"),
62
- :capacity => at_xpath_or_null(ng, "#{query}/tr[position() = #{key+2}]/td[position() = 13]"),
63
- :availability => at_xpath_or_null(ng, "#{query}/tr[position() = #{key+2}]/td[position() = 14]")
64
- })
65
- end
66
- sections
67
- end
68
-
69
- def self.at_xpath_or_null(document, selector)
70
- if document.at_xpath(selector) != nil
71
- return document.at_xpath(selector).content.delete("\r\n").strip
72
- else
73
- return nil
74
- end
1
+ module OsuCcScraper
2
+ class Section < Struct.new(:department, :number, :name, :term, :section,
3
+ :instructor, :type, :status, :capacity, :current)
75
4
  end
76
5
  end
@@ -0,0 +1,37 @@
1
+ require "open-uri"
2
+ require "oga"
3
+
4
+ require "osu-cc-scraper/department"
5
+
6
+ module OsuCcScraper
7
+ class University
8
+
9
+ def departments
10
+ html = fetch_departments
11
+ parse_departments(html)
12
+ end
13
+
14
+ def fetch_departments
15
+ open("#{ENDPOINT}/CourseDescription.aspx?level=undergrad").read
16
+ end
17
+
18
+ def parse_departments(html)
19
+ ng = Oga.parse_html(html)
20
+ ng.xpath("//tr/td/font/a").map { |row|
21
+ Department.new(
22
+ parse_department_name(row),
23
+ parse_department_subject_code(row),
24
+ )
25
+ }
26
+ end
27
+
28
+ def parse_department_subject_code(row)
29
+ row.text[/\(.*?\)/][1..-2]
30
+ end
31
+
32
+ def parse_department_name(row)
33
+ row.text[/([^(]+)/].strip
34
+ end
35
+
36
+ end
37
+ end
metadata CHANGED
@@ -1,30 +1,59 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: osu-cc-scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.4
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jonah George
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-12-18 00:00:00.000000000 Z
11
+ date: 2016-02-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: nokogiri
14
+ name: oga
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - '='
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 1.6.6.2
19
+ version: 2.1.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - '='
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 1.6.6.2
27
- description: A script to gather course data from Oregon State's Course Catalog.
26
+ version: 2.1.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: A script to gather course data from Oregon State University's Course
56
+ Catalog.
28
57
  email: jonah.george@me.com
29
58
  executables: []
30
59
  extensions: []
@@ -34,6 +63,7 @@ files:
34
63
  - lib/osu-cc-scraper/course.rb
35
64
  - lib/osu-cc-scraper/department.rb
36
65
  - lib/osu-cc-scraper/section.rb
66
+ - lib/osu-cc-scraper/university.rb
37
67
  homepage: https://github.com/jonahgeorge/osu-cc-scraper
38
68
  licenses:
39
69
  - MIT
@@ -46,7 +76,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
46
76
  requirements:
47
77
  - - ">="
48
78
  - !ruby/object:Gem::Version
49
- version: '0'
79
+ version: 2.3.0
50
80
  required_rubygems_version: !ruby/object:Gem::Requirement
51
81
  requirements:
52
82
  - - ">="
@@ -54,7 +84,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
54
84
  version: '0'
55
85
  requirements: []
56
86
  rubyforge_project:
57
- rubygems_version: 2.4.5.1
87
+ rubygems_version: 2.5.1
58
88
  signing_key:
59
89
  specification_version: 4
60
90
  summary: OSU Course Catalog Scraper