osu-ctl-scraper 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 0f61f5674ffa411e662d676ca6a1a5c68c3c35b8
4
+ data.tar.gz: 08336a7892b67f47adf6ad823005f23b057ff9b6
5
+ SHA512:
6
+ metadata.gz: b9283653ac17768853f815ecad02e26011df72bf67b099e70b5469c60a4873e73ca717457ba568948b4f473ac646905521d1a5c48298831c63494c8ea144ecb1
7
+ data.tar.gz: d427cdc637e0eecf8072ba812b7d1d08b44b8614f09e25537c415b39b94c634a74e3027ab77fd47d9281ad797ad65763d85d987c6d09a0f583c86f7adc964ad8
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "csv"
4
+ require "osu-ctl-scraper"
5
+
6
+ books = OsuCtlScraper::Book.where("BA", 2015, :spring)
7
+ books.each do |book|
8
+ $stdout.puts(book.values.to_csv)
9
+ end
@@ -0,0 +1,10 @@
1
+ require "net/http"
2
+ require "nokogiri"
3
+ require "json"
4
+ require "open-uri"
5
+
6
+ module OsuCtlScraper
7
+ end
8
+
9
+ require "osu-ctl-scraper/department"
10
+ require "osu-ctl-scraper/book"
@@ -0,0 +1,88 @@
1
+ module OsuCtlScraper
2
+ class Book
3
+ # Returns an array of Books for the given options
4
+ #
5
+ # @param [Hash] options
6
+ # @return [Array<Hash>]
7
+ def self.where(subject_code, year, term)
8
+ res = get_html(subject_code, year, term)
9
+ html = format_response(res.body)
10
+ process_html(subject_code, html)
11
+ end
12
+
13
+ # @param [String] subject_code
14
+ # @param [Integer] year
15
+ # @param [Symbol] term Must be one of: "winter", "spring", "summer", or "fall"
16
+ # @return [String]
17
+ def self.get_html(subject_code, year, term)
18
+ url = URI.parse("http://osubeaverstore.com/Faculty/GetTextbooks/")
19
+ params = form_params(subject_code, year, term)
20
+ res = Net::HTTP.post_form(url, params)
21
+ end
22
+
23
+ # @param [String] subject_code
24
+ # @param [Integer] year
25
+ # @param [Symbol] term Must be one of: "winter", "spring", "summer", or "fall"
26
+ # @return [Hash]
27
+ def self.form_params(subject_code, year, term)
28
+ {
29
+ termcode: termcode(year, term),
30
+ dept: subject_code
31
+ }
32
+ end
33
+
34
+ # Formats the year and term into the expected server request format
35
+ #
36
+ # @param [Integer] year
37
+ # @param [Symbol] term Must be one of: "winter", "spring", "summer", or "fall"
38
+ # @return [String]
39
+ def self.termcode(year, term)
40
+ terms = { winter: "A", spring: "B", summer: "C", fall: "D" }
41
+ "#{year}#{terms[term]}"
42
+ end
43
+
44
+ # Marshals the server response into a format understandable by Nokogiri
45
+ #
46
+ # @param [String] body
47
+ # @return [String]
48
+ def self.format_response(body)
49
+ body.
50
+ gsub('\u003c', '<').
51
+ gsub('\u003e', '>').
52
+ gsub('\\"', '"')
53
+ end
54
+
55
+ # @param [String] subject_code
56
+ # @param [String] html
57
+ # @return [Array<Hash>]
58
+ def self.process_html(subject_code, html)
59
+ books = []
60
+ ng = Nokogiri::HTML(html)
61
+ ng.css("tr:not(:first-child)").each do |row|
62
+ book = process_row(row)
63
+ book[:subject_code] = subject_code
64
+ books << book
65
+ end
66
+ books
67
+ end
68
+
69
+ # @param [Nokogiri::XML::Element] row
70
+ # @return [Hash]
71
+ def self.process_row(row)
72
+ {
73
+ course: row.css("td:nth-child(1)").text.strip,
74
+ section: row.css("td:nth-child(2)").text.strip,
75
+ instructor: row.css("td:nth-child(3)").text.strip,
76
+ title: row.css("td:nth-child(4)").text.strip,
77
+ edition: row.css("td:nth-child(5)").text.strip,
78
+ author: row.css("td:nth-child(6)").text.strip,
79
+ isbn: row.css("td:nth-child(7)").text.strip,
80
+ publisher: row.css("td:nth-child(8)").text.strip,
81
+ requirement: row.css("td:nth-child(9)").text.strip,
82
+ sku: row.css("td:nth-child(10)").text.strip,
83
+ comments: row.css("td:nth-child(11)").text.strip,
84
+ req_date: row.css("td:nth-child(12)").text.strip
85
+ }
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,40 @@
1
+ module OsuCtlScraper
2
+ class Department
3
+ # @return [Array<Hash>]
4
+ def self.all
5
+ html = get_html
6
+ process_html(html)
7
+ end
8
+
9
+ # @return [String] html
10
+ def self.get_html
11
+ open("http://osubeaverstore.com/faculty/textbooks/").read
12
+ end
13
+
14
+ # @param [String] html
15
+ # @return [Array<Hash>]
16
+ def self.process_html(html)
17
+ departments = []
18
+ ng = Nokogiri.HTML(html)
19
+ ng.css("select[id='Dept'] option:not(:first-child)").each do |option|
20
+ departments << process_option(option)
21
+ end
22
+ departments
23
+ end
24
+
25
+ # @param [String] option
26
+ # @return [Hash]
27
+ def self.process_option(option)
28
+ {
29
+ subject_code: option["value"],
30
+ title: process_title(option.text)
31
+ }
32
+ end
33
+
34
+ # @param [String] title
35
+ # @return [String]
36
+ def self.process_title(title)
37
+ title.split(':')[1].strip
38
+ end
39
+ end
40
+ end
metadata ADDED
@@ -0,0 +1,63 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: osu-ctl-scraper
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Jonah George
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-08-25 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '='
18
+ - !ruby/object:Gem::Version
19
+ version: 1.6.6.2
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '='
25
+ - !ruby/object:Gem::Version
26
+ version: 1.6.6.2
27
+ description: A script to gather course data from Oregon State's Current Textbook List.
28
+ email: jonah.george@me.com
29
+ executables:
30
+ - osu-ctl-scraper
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - bin/osu-ctl-scraper
35
+ - lib/osu-ctl-scraper.rb
36
+ - lib/osu-ctl-scraper/book.rb
37
+ - lib/osu-ctl-scraper/department.rb
38
+ homepage: https://github.com/jonahgeorge/osu-ctl-scraper
39
+ licenses:
40
+ - MIT
41
+ metadata: {}
42
+ post_install_message:
43
+ rdoc_options: []
44
+ require_paths:
45
+ - lib
46
+ required_ruby_version: !ruby/object:Gem::Requirement
47
+ requirements:
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
51
+ required_rubygems_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ requirements: []
57
+ rubyforge_project:
58
+ rubygems_version: 2.4.8
59
+ signing_key:
60
+ specification_version: 4
61
+ summary: OSU Current Textbook List Scraper
62
+ test_files: []
63
+ has_rdoc: