scry 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +123 -0
- data/bin/console +14 -0
- data/bin/monitor +8 -0
- data/bin/scrape_blackboard +6 -0
- data/lib/scry.rb +57 -0
- data/lib/scry/course.rb +194 -0
- data/lib/scry/export_failed.rb +7 -0
- data/lib/scry/helpers.rb +19 -0
- data/lib/scry/scraper.rb +62 -0
- data/lib/scry/sidekiq/config.ru +8 -0
- data/lib/scry/sidekiq/workers/export_downloader.rb +39 -0
- data/lib/scry/sidekiq/workers/export_generator.rb +45 -0
- data/lib/scry/sidekiq/workers/log_writer.rb +22 -0
- data/lib/scry/tasks.rb +32 -0
- data/lib/scry/version.rb +3 -0
- data/lib/scry/workers.rb +4 -0
- metadata +196 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 8498f89f31168881c6697f7cdc1a9425096f1162
|
4
|
+
data.tar.gz: f4edcdd786e68962a9edf92ab98c0b12d5731386
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 71cb018b167abfd16e0375392791665c30d9ec1deef82a8cbf57f8c3eaf24f83e5e901108e149d32e9f28e549d5c1000dd172b857c6b64417847504591cc156a
|
7
|
+
data.tar.gz: db13dd81f55baeee873b7344b168b1c8db14765019c42b26eea2202c7bec2873c391dd08588238e80395e7cf44689052f74b93a42ef7e78bb4b6d05d9c7ad1b6
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2016 AtomicJolt
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
# Scry [](https://travis-ci.org/atomicjolt/scry)
|
2
|
+
|
3
|
+
Scrapes courses from blackboard.
|
4
|
+
|
5
|
+
## Dependencies
|
6
|
+
ruby >= 2.2.2
|
7
|
+
|
8
|
+
redis-server >= 3.0.7
|
9
|
+
|
10
|
+
## Installation
|
11
|
+
|
12
|
+
Add this line to your application's Gemfile:
|
13
|
+
|
14
|
+
```ruby
|
15
|
+
gem "scry"
|
16
|
+
```
|
17
|
+
|
18
|
+
And then execute:
|
19
|
+
```sh
|
20
|
+
$ bundle
|
21
|
+
```
|
22
|
+
|
23
|
+
Or install it yourself as:
|
24
|
+
```sh
|
25
|
+
$ gem install scry
|
26
|
+
```
|
27
|
+
|
28
|
+
Create a ruby file `workers.rb` and add
|
29
|
+
```ruby
|
30
|
+
require "scry/workers"
|
31
|
+
```
|
32
|
+
|
33
|
+
Create a `Rakefile` and add
|
34
|
+
```ruby
|
35
|
+
require "scry/tasks"
|
36
|
+
Scry::Tasks.install_tasks
|
37
|
+
```
|
38
|
+
|
39
|
+
Create a `sidekiq.yml` file and add
|
40
|
+
```yml
|
41
|
+
:concurrency: 20
|
42
|
+
|
43
|
+
:queues:
|
44
|
+
- [scry_export_generator, 1]
|
45
|
+
- [scry_export_downloader, 1]
|
46
|
+
- [scry_log_writer, 1]
|
47
|
+
|
48
|
+
:limits:
|
49
|
+
scry_export_generator: 5
|
50
|
+
scry_export_downloader: 15
|
51
|
+
scry_log_writer: 1
|
52
|
+
```
|
53
|
+
_note: limits is available through the [sidekiq-limit_fetch](https://github.com/brainopia/sidekiq-limit_fetch) gem_
|
54
|
+
|
55
|
+
Create a "scry.yml" file and add
|
56
|
+
```yml
|
57
|
+
:url: https://<blackboard_url>/
|
58
|
+
:login: <user_name>
|
59
|
+
:passwd: <user_password>
|
60
|
+
```
|
61
|
+
|
62
|
+
### Optional
|
63
|
+
If different log file names are desired, in the `scry.yml` add the file names.
|
64
|
+
This is the default configuration:
|
65
|
+
```yml
|
66
|
+
:export_generation_good: export_generation_good.txt
|
67
|
+
:export_generation_bad: export_generation_bad.txt
|
68
|
+
:export_download_good: export_download_good.txt
|
69
|
+
:export_download_bad: export_download_bad.txt
|
70
|
+
:export_generation_no_export_button: export_generation_no_export_button.txt
|
71
|
+
```
|
72
|
+
And if a different export folder is desired:
|
73
|
+
```yml
|
74
|
+
:default_dir: blackboard_exports
|
75
|
+
```
|
76
|
+
|
77
|
+
## Usage
|
78
|
+
|
79
|
+
Start up sidekiq
|
80
|
+
```sh
|
81
|
+
bundle exec sidekiq -r ./workers.rb -C sidekiq.yml
|
82
|
+
```
|
83
|
+
|
84
|
+
Run the rake task to download all the courses.
|
85
|
+
```sh
|
86
|
+
bundle exec rake scry:scrape
|
87
|
+
```
|
88
|
+
This will download each cartridge zip into the default directory `blackboard_exports`
|
89
|
+
|
90
|
+
Delete entire default blackboard_exports folder
|
91
|
+
```sh
|
92
|
+
bundle exec rake scry:clean
|
93
|
+
```
|
94
|
+
|
95
|
+
Monitor sidekiq
|
96
|
+
```sh
|
97
|
+
bundle exec monitor
|
98
|
+
```
|
99
|
+
|
100
|
+
# Development
|
101
|
+
|
102
|
+
After checking out the repo, run `bundle install` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
103
|
+
|
104
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
105
|
+
|
106
|
+
## Running sidekiq
|
107
|
+
|
108
|
+
Need redis running first: `redis-server`
|
109
|
+
|
110
|
+
Run `bundle exec sidekiq -r ./lib/scry/workers.rb -C sidekiq.yml`
|
111
|
+
|
112
|
+
To get access to the workers in code require "lib/scry/sidekiq/boot.rb"
|
113
|
+
|
114
|
+
To monitor sidekiq using the web UI, run `bin/monitor`
|
115
|
+
|
116
|
+
## Contributing
|
117
|
+
|
118
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/scry. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
119
|
+
|
120
|
+
|
121
|
+
## License
|
122
|
+
|
123
|
+
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "scry"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
require "pry"
|
11
|
+
Pry.start
|
12
|
+
|
13
|
+
# require "irb"
|
14
|
+
# IRB.start
|
data/bin/monitor
ADDED
data/lib/scry.rb
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
require "scry/scraper"
|
2
|
+
|
3
|
+
EXPORT_GENERATION_GOOD = "export_generation_good.txt".freeze
|
4
|
+
EXPORT_GENERATION_BAD = "export_generation_bad.txt".freeze
|
5
|
+
EXPORT_DOWNLOAD_GOOD = "export_download_good.txt".freeze
|
6
|
+
EXPORT_DOWNLOAD_BAD = "export_download_bad.txt".freeze
|
7
|
+
EXPORT_GENERATION_NO_EXPORT_BUTTON =
|
8
|
+
"export_generation_no_export_button.txt".freeze
|
9
|
+
|
10
|
+
DEFAULT_DIR = "blackboard_exports".freeze
|
11
|
+
|
12
|
+
module Scry
|
13
|
+
def self.config
|
14
|
+
if File.exists? "scry.yml"
|
15
|
+
YAML::load(File.read("scry.yml"))
|
16
|
+
else
|
17
|
+
{}
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.url
|
22
|
+
Scry.config[:url]
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.login
|
26
|
+
Scry.config[:login]
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.passwd
|
30
|
+
Scry.config[:passwd]
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.default_dir
|
34
|
+
Scry.config[:default_dir] || DEFAULT_DIR
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.export_generation_good
|
38
|
+
Scry.config[:export_generation_good] || EXPORT_GENERATION_GOOD
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.export_generation_bad
|
42
|
+
Scry.config[:export_generation_bad] || EXPORT_GENERATION_BAD
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.export_download_good
|
46
|
+
Scry.config[:export_download_good] || EXPORT_DOWNLOAD_GOOD
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.export_download_bad
|
50
|
+
Scry.config[:export_download_bad] || EXPORT_DOWNLOAD_BAD
|
51
|
+
end
|
52
|
+
|
53
|
+
def self.export_generation_no_export_button
|
54
|
+
Scry.config[:export_generation_no_export_button] ||
|
55
|
+
EXPORT_GENERATION_NO_EXPORT_BUTTON
|
56
|
+
end
|
57
|
+
end
|
data/lib/scry/course.rb
ADDED
@@ -0,0 +1,194 @@
|
|
1
|
+
require "mechanize"
|
2
|
+
require "scry"
|
3
|
+
require "scry/helpers"
|
4
|
+
require "scry/export_failed"
|
5
|
+
|
6
|
+
TWO_HOURS = 7200
|
7
|
+
|
8
|
+
module Scry
|
9
|
+
##
|
10
|
+
# This class represents a course for which we are extracting data
|
11
|
+
##
|
12
|
+
class Course
|
13
|
+
include Scry::Helpers
|
14
|
+
|
15
|
+
##
|
16
|
+
# A new course accepts a Mechanize Agent
|
17
|
+
# and a Mechanize::Page::Link object for a course link
|
18
|
+
##
|
19
|
+
def initialize(agent, course_link)
|
20
|
+
@agent = agent
|
21
|
+
@course_link = course_link
|
22
|
+
end
|
23
|
+
|
24
|
+
##
|
25
|
+
# Creates a new instance of a course
|
26
|
+
# from given cookies so the user is signed in
|
27
|
+
##
|
28
|
+
def self.from_cookies(cookie_crumbs, course_url)
|
29
|
+
agent = Mechanize.new
|
30
|
+
agent.cookie_jar = YAML::load(cookie_crumbs)
|
31
|
+
course_link = Mechanize::Page::Link.new(
|
32
|
+
{
|
33
|
+
"href" => course_url,
|
34
|
+
},
|
35
|
+
agent,
|
36
|
+
nil,
|
37
|
+
)
|
38
|
+
Course.new(agent, course_link)
|
39
|
+
end
|
40
|
+
|
41
|
+
##
|
42
|
+
# Creates an export file for the course
|
43
|
+
#
|
44
|
+
# Navigates from the course page to the export course page
|
45
|
+
# and creates an export for the course.
|
46
|
+
#
|
47
|
+
# First it all existing exports, then attempts to create a new export.
|
48
|
+
#
|
49
|
+
# It will wait a specified amount of time for the export to be created.
|
50
|
+
##
|
51
|
+
def create_export
|
52
|
+
course_page = @agent.click(@course_link)
|
53
|
+
package_links = course_page.links_with(
|
54
|
+
text: /Packages & Utilities Overview Page/,
|
55
|
+
)
|
56
|
+
if package_links.any?
|
57
|
+
utilities_page = click_link(
|
58
|
+
agent: @agent,
|
59
|
+
page: course_page,
|
60
|
+
text: /Packages & Utilities Overview Page/,
|
61
|
+
)
|
62
|
+
exports_page = click_link(
|
63
|
+
agent: @agent,
|
64
|
+
page: utilities_page,
|
65
|
+
text: /Export\/Archive Course/,
|
66
|
+
)
|
67
|
+
export_button_link = exports_page.links_with(
|
68
|
+
text: /Export Package/,
|
69
|
+
)
|
70
|
+
if export_button_link.any?
|
71
|
+
course_id =
|
72
|
+
exports_page.form_with(name: "selectFileToDelete")["courseId"]
|
73
|
+
_delete_existing_exports(exports_page, course_id, nil)
|
74
|
+
export_page = click_link(
|
75
|
+
agent: @agent,
|
76
|
+
page: exports_page,
|
77
|
+
text: /Export Package/,
|
78
|
+
)
|
79
|
+
exports_page = _process_export_form(export_page)
|
80
|
+
exports = exports_page.links_with(
|
81
|
+
text: "View Basic Log",
|
82
|
+
)
|
83
|
+
_wait_for_export(exports, utilities_page, exports_page, course_id)
|
84
|
+
else
|
85
|
+
write_log(
|
86
|
+
Scry.export_generation_no_export_button,
|
87
|
+
@course_link.href.strip,
|
88
|
+
)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
##
|
94
|
+
# Opens the log for an export and checks if it was successful.
|
95
|
+
##
|
96
|
+
def validate_export(exports_page)
|
97
|
+
links = exports_page.links_with(text: "View Basic Log")
|
98
|
+
if links.empty?
|
99
|
+
raise Scry::ExportFailed, "Links empty #{exports_page.uri}"
|
100
|
+
end
|
101
|
+
url = links.last.attributes["onclick"][/'(.*)'/, 1]
|
102
|
+
log_page = @agent.get(url)
|
103
|
+
text = Nokogiri::HTML(log_page.body).css("div#containerdiv").text
|
104
|
+
!text.match(/error/i)
|
105
|
+
end
|
106
|
+
|
107
|
+
##
|
108
|
+
# Extracts the download URL for an export
|
109
|
+
##
|
110
|
+
def download_url(page)
|
111
|
+
download_link = page.links_with(text: "Open").last
|
112
|
+
download_link.href
|
113
|
+
end
|
114
|
+
|
115
|
+
##
|
116
|
+
# Downloads the export into the given directory.
|
117
|
+
##
|
118
|
+
def download_export(url)
|
119
|
+
puts "Start downloading #{url}"
|
120
|
+
time = Time.now
|
121
|
+
@agent.pluggable_parser["application/zip"] = Mechanize::Download
|
122
|
+
filename = File.basename(URI.parse(url).path)
|
123
|
+
@agent.get(url).save(File.join(Scry.default_dir, filename))
|
124
|
+
elapsed = Time.now - time
|
125
|
+
puts "Done downloading #{url} took #{elapsed} seconds"
|
126
|
+
end
|
127
|
+
|
128
|
+
##
|
129
|
+
# :nodoc:
|
130
|
+
# Fills out the export form and submits it.
|
131
|
+
##
|
132
|
+
def _process_export_form(export_page)
|
133
|
+
export_page.form_with(name: "selectCourse") do |export_form|
|
134
|
+
export_form.radiobutton_with(
|
135
|
+
id: "copyLinkToCourseFilesAndCopiesOfContent",
|
136
|
+
).check
|
137
|
+
export_form.radiobutton_with(
|
138
|
+
id: "copyLinkToExternalCourseFilesAndCopiesOfContent",
|
139
|
+
).check
|
140
|
+
export_form.checkboxes.each(&:check)
|
141
|
+
end.submit
|
142
|
+
end
|
143
|
+
|
144
|
+
##
|
145
|
+
# :nodoc:
|
146
|
+
# Waits a specified amount of time for an export
|
147
|
+
# to show up on the exports page.
|
148
|
+
##
|
149
|
+
def _wait_for_export(exports, utilities_page, exports_page, course_id)
|
150
|
+
time = Time.now
|
151
|
+
elapsed = 0
|
152
|
+
puts "Begin waiting for export link for #{course_id}"
|
153
|
+
while exports.count.zero? && elapsed < TWO_HOURS
|
154
|
+
sleep 30
|
155
|
+
exports_page = click_link(
|
156
|
+
agent: @agent,
|
157
|
+
page: utilities_page,
|
158
|
+
text: /Export\/Archive Course/,
|
159
|
+
)
|
160
|
+
exports = exports_page.links_with(
|
161
|
+
text: "View Basic Log",
|
162
|
+
)
|
163
|
+
elapsed = Time.now - time
|
164
|
+
puts "#{course_id} waited #{elapsed.to_i} seconds for link"
|
165
|
+
end
|
166
|
+
if elapsed >= TWO_HOURS
|
167
|
+
raise Scry::ExportFailed, "Export timeout for #{course_id}"
|
168
|
+
end
|
169
|
+
puts "#{course_id} done after #{(Time.now - time).to_i} seconds"
|
170
|
+
exports_page
|
171
|
+
end
|
172
|
+
|
173
|
+
##
|
174
|
+
# :nodoc:
|
175
|
+
# Deletes all existing exports from a page.
|
176
|
+
##
|
177
|
+
def _delete_existing_exports(page, course_id, links)
|
178
|
+
links ||= page.links_with(text: "Delete")
|
179
|
+
puts "#{course_id} Deleting exports... #{links.count} remaining"
|
180
|
+
if links.any?
|
181
|
+
filename = links.last.href[/'(.*)'\,/, 1]
|
182
|
+
|
183
|
+
page = page.form_with(name: "selectFileToDelete") do |form|
|
184
|
+
form.field_with(name: "filename").value = filename
|
185
|
+
end.submit
|
186
|
+
|
187
|
+
links = page.links_with(text: "Delete")
|
188
|
+
if links.any?
|
189
|
+
_delete_existing_exports(page, course_id, links)
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
194
|
+
end
|
data/lib/scry/helpers.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
require "scry/sidekiq/workers/log_writer"
|
2
|
+
|
3
|
+
module Scry
|
4
|
+
module Helpers
|
5
|
+
##
|
6
|
+
# Clicks a link with the given text.
|
7
|
+
##
|
8
|
+
def click_link(agent:, page:, text:)
|
9
|
+
agent.click(page.link_with(text: text))
|
10
|
+
end
|
11
|
+
|
12
|
+
##
|
13
|
+
# Enqueues data to be written to a log file.
|
14
|
+
##
|
15
|
+
def write_log(log, data)
|
16
|
+
Scry::LogWriter.perform_async(log, data)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/scry/scraper.rb
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
require "rubygems"
|
2
|
+
require "mechanize"
|
3
|
+
require "scry"
|
4
|
+
require "scry/sidekiq/workers/export_generator"
|
5
|
+
require "scry/helpers"
|
6
|
+
require "scry/course"
|
7
|
+
|
8
|
+
module Scry
|
9
|
+
extend Scry::Helpers
|
10
|
+
|
11
|
+
##
|
12
|
+
# Creates sidekiq jobs for each course to generate an export.
|
13
|
+
#
|
14
|
+
# Logs in the user and goes over every course
|
15
|
+
# and creates a sidekiq to generate an export for it.
|
16
|
+
##
|
17
|
+
def self.scrape
|
18
|
+
url = Scry.url
|
19
|
+
login = Scry.login
|
20
|
+
passwd = Scry.passwd
|
21
|
+
|
22
|
+
agent = Mechanize.new do |secret_agent_man|
|
23
|
+
secret_agent_man.follow_meta_refresh = true
|
24
|
+
end
|
25
|
+
|
26
|
+
agent.get(url) do |home_page|
|
27
|
+
index_page = home_page.form_with(name: "login") do |form|
|
28
|
+
form["user_id"] = login
|
29
|
+
form["password"] = passwd
|
30
|
+
end.submit
|
31
|
+
courses_page = click_link(
|
32
|
+
agent: agent,
|
33
|
+
page: index_page,
|
34
|
+
text: /Open Bb Course List/,
|
35
|
+
)
|
36
|
+
|
37
|
+
courses_downloaded = Scry.courses_downloaded
|
38
|
+
|
39
|
+
# It is currently unknown if the links on the courses_page are paginated
|
40
|
+
# This gets them as if they are not.
|
41
|
+
course_links = courses_page.links_with(href: /type=Course/)
|
42
|
+
course_links.each do |course_link|
|
43
|
+
course_url = course_link.href.strip
|
44
|
+
if !courses_downloaded.include? course_url
|
45
|
+
cookie_crumbs = agent.cookie_jar.to_yaml
|
46
|
+
Scry::ExportGenerator.perform_async(
|
47
|
+
cookie_crumbs,
|
48
|
+
File.join(url, course_url),
|
49
|
+
)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def self.courses_downloaded
|
56
|
+
if File.exists?(Scry.export_download_good)
|
57
|
+
File.read(Scry.export_download_good)
|
58
|
+
else
|
59
|
+
""
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require "mechanize"
|
2
|
+
require "sidekiq"
|
3
|
+
require "scry"
|
4
|
+
require "scry/course"
|
5
|
+
require "scry/export_failed"
|
6
|
+
require "scry/helpers"
|
7
|
+
|
8
|
+
module Scry
|
9
|
+
##
|
10
|
+
# Works on downloading an export.
|
11
|
+
#
|
12
|
+
# Will attempt 5 times before giving up.
|
13
|
+
##
|
14
|
+
class ExportDownloader
|
15
|
+
include Sidekiq::Worker
|
16
|
+
include Scry::Helpers
|
17
|
+
sidekiq_options queue: :scry_export_downloader, retry: 5
|
18
|
+
|
19
|
+
##
|
20
|
+
# Instigates downloading an export.
|
21
|
+
#
|
22
|
+
# Creates a course from the cookies,
|
23
|
+
# then starts downloading the export.
|
24
|
+
##
|
25
|
+
def perform(cookie_crumbs, course_url, download_url)
|
26
|
+
course = Course.from_cookies(cookie_crumbs, course_url)
|
27
|
+
uri = URI.parse(course_url)
|
28
|
+
uri.path = download_url
|
29
|
+
course.download_export(uri.to_s)
|
30
|
+
write_log(Scry.export_download_good, course_url)
|
31
|
+
rescue SocketError, Mechanize::Error, Net::HTTPClientError => e
|
32
|
+
write_log(
|
33
|
+
Scry.export_download_bad,
|
34
|
+
"#{course_url} #{e.class} #{e.message}",
|
35
|
+
)
|
36
|
+
raise
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require "sidekiq"
|
2
|
+
require "scry"
|
3
|
+
require "scry/course"
|
4
|
+
require "scry/export_failed"
|
5
|
+
require "scry/helpers"
|
6
|
+
require "scry/sidekiq/workers/export_downloader"
|
7
|
+
|
8
|
+
##
|
9
|
+
# Works on generating the export.
|
10
|
+
#
|
11
|
+
# Will attempt 5 times before giving up.
|
12
|
+
##
|
13
|
+
module Scry
|
14
|
+
class ExportGenerator
|
15
|
+
include Sidekiq::Worker
|
16
|
+
include Scry::Helpers
|
17
|
+
sidekiq_options queue: :scry_export_generator, retry: 5
|
18
|
+
|
19
|
+
##
|
20
|
+
# Instigates generating an export.
|
21
|
+
#
|
22
|
+
# Creates a course from the cookies,
|
23
|
+
# then starts generating the export.
|
24
|
+
##
|
25
|
+
def perform(cookie_crumbs, course_url)
|
26
|
+
course = Course.from_cookies(cookie_crumbs, course_url)
|
27
|
+
exports_page = course.create_export
|
28
|
+
if exports_page.is_a? Mechanize::Page
|
29
|
+
valid = course.validate_export(exports_page)
|
30
|
+
if valid
|
31
|
+
write_log(Scry.export_generation_good, course_url)
|
32
|
+
download_url = course.download_url(exports_page)
|
33
|
+
Scry::ExportDownloader.perform_async(
|
34
|
+
cookie_crumbs,
|
35
|
+
course_url,
|
36
|
+
download_url,
|
37
|
+
)
|
38
|
+
else
|
39
|
+
write_log(Scry.export_generation_bad, course_url)
|
40
|
+
raise Scry::ExportFailed, "Something failed"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require "sidekiq"
|
2
|
+
|
3
|
+
##
|
4
|
+
# Writes log files
|
5
|
+
#
|
6
|
+
# Will attempt 5 times before giving up.
|
7
|
+
##
|
8
|
+
module Scry
|
9
|
+
class LogWriter
|
10
|
+
include Sidekiq::Worker
|
11
|
+
sidekiq_options queue: :scry_log_writer, retry: 5
|
12
|
+
|
13
|
+
##
|
14
|
+
# Writes data to the given log file
|
15
|
+
##
|
16
|
+
def perform(log, data)
|
17
|
+
File.open(log, "a") do |file|
|
18
|
+
file.puts data
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
data/lib/scry/tasks.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
require "rake/clean"
|
2
|
+
require "scry"
|
3
|
+
|
4
|
+
module Scry
|
5
|
+
class Tasks
|
6
|
+
extend Rake::DSL if defined? Rake::DSL
|
7
|
+
|
8
|
+
##
|
9
|
+
# Creates rake tasks that can be ran from the gem.
|
10
|
+
#
|
11
|
+
# Add this to your Rakefile
|
12
|
+
#
|
13
|
+
# require "scry/tasks"
|
14
|
+
# Scry::Tasks.install_tasks
|
15
|
+
#
|
16
|
+
##
|
17
|
+
def self.install_tasks
|
18
|
+
namespace :scry do
|
19
|
+
desc "Scrape the configured url for course data"
|
20
|
+
task :scrape do
|
21
|
+
mkdir_p Scry.default_dir
|
22
|
+
Scry.scrape
|
23
|
+
end
|
24
|
+
|
25
|
+
desc "Completely delete all downloaded files"
|
26
|
+
task :clean do
|
27
|
+
rm_rf Scry.default_dir
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
data/lib/scry/version.rb
ADDED
data/lib/scry/workers.rb
ADDED
metadata
ADDED
@@ -0,0 +1,196 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: scry
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Atomic Jolt
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-12-07 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: pry-byebug
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '3.4'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '3.4'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rspec
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '3.5'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '3.5'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: webmock
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '2.1'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '2.1'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '11.3'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '11.3'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: mechanize
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '2.7'
|
76
|
+
- - ">="
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: 2.7.5
|
79
|
+
type: :runtime
|
80
|
+
prerelease: false
|
81
|
+
version_requirements: !ruby/object:Gem::Requirement
|
82
|
+
requirements:
|
83
|
+
- - "~>"
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '2.7'
|
86
|
+
- - ">="
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: 2.7.5
|
89
|
+
- !ruby/object:Gem::Dependency
|
90
|
+
name: fileutils
|
91
|
+
requirement: !ruby/object:Gem::Requirement
|
92
|
+
requirements:
|
93
|
+
- - "~>"
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: '0.7'
|
96
|
+
type: :runtime
|
97
|
+
prerelease: false
|
98
|
+
version_requirements: !ruby/object:Gem::Requirement
|
99
|
+
requirements:
|
100
|
+
- - "~>"
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: '0.7'
|
103
|
+
- !ruby/object:Gem::Dependency
|
104
|
+
name: sidekiq
|
105
|
+
requirement: !ruby/object:Gem::Requirement
|
106
|
+
requirements:
|
107
|
+
- - "~>"
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '4.2'
|
110
|
+
type: :runtime
|
111
|
+
prerelease: false
|
112
|
+
version_requirements: !ruby/object:Gem::Requirement
|
113
|
+
requirements:
|
114
|
+
- - "~>"
|
115
|
+
- !ruby/object:Gem::Version
|
116
|
+
version: '4.2'
|
117
|
+
- !ruby/object:Gem::Dependency
|
118
|
+
name: sidekiq-limit_fetch
|
119
|
+
requirement: !ruby/object:Gem::Requirement
|
120
|
+
requirements:
|
121
|
+
- - "~>"
|
122
|
+
- !ruby/object:Gem::Version
|
123
|
+
version: '3.4'
|
124
|
+
type: :runtime
|
125
|
+
prerelease: false
|
126
|
+
version_requirements: !ruby/object:Gem::Requirement
|
127
|
+
requirements:
|
128
|
+
- - "~>"
|
129
|
+
- !ruby/object:Gem::Version
|
130
|
+
version: '3.4'
|
131
|
+
- !ruby/object:Gem::Dependency
|
132
|
+
name: thin
|
133
|
+
requirement: !ruby/object:Gem::Requirement
|
134
|
+
requirements:
|
135
|
+
- - "~>"
|
136
|
+
- !ruby/object:Gem::Version
|
137
|
+
version: '1.7'
|
138
|
+
type: :runtime
|
139
|
+
prerelease: false
|
140
|
+
version_requirements: !ruby/object:Gem::Requirement
|
141
|
+
requirements:
|
142
|
+
- - "~>"
|
143
|
+
- !ruby/object:Gem::Version
|
144
|
+
version: '1.7'
|
145
|
+
description: Commandline tool that downloads blackboard cartridges
|
146
|
+
email: joel@atomicjolt.com
|
147
|
+
executables:
|
148
|
+
- console
|
149
|
+
- monitor
|
150
|
+
- scrape_blackboard
|
151
|
+
extensions: []
|
152
|
+
extra_rdoc_files:
|
153
|
+
- README.md
|
154
|
+
files:
|
155
|
+
- LICENSE.txt
|
156
|
+
- README.md
|
157
|
+
- bin/console
|
158
|
+
- bin/monitor
|
159
|
+
- bin/scrape_blackboard
|
160
|
+
- lib/scry.rb
|
161
|
+
- lib/scry/course.rb
|
162
|
+
- lib/scry/export_failed.rb
|
163
|
+
- lib/scry/helpers.rb
|
164
|
+
- lib/scry/scraper.rb
|
165
|
+
- lib/scry/sidekiq/config.ru
|
166
|
+
- lib/scry/sidekiq/workers/export_downloader.rb
|
167
|
+
- lib/scry/sidekiq/workers/export_generator.rb
|
168
|
+
- lib/scry/sidekiq/workers/log_writer.rb
|
169
|
+
- lib/scry/tasks.rb
|
170
|
+
- lib/scry/version.rb
|
171
|
+
- lib/scry/workers.rb
|
172
|
+
homepage: https://github.com/atomicjolt/scry
|
173
|
+
licenses:
|
174
|
+
- MIT
|
175
|
+
metadata: {}
|
176
|
+
post_install_message:
|
177
|
+
rdoc_options: []
|
178
|
+
require_paths:
|
179
|
+
- lib
|
180
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
181
|
+
requirements:
|
182
|
+
- - ">="
|
183
|
+
- !ruby/object:Gem::Version
|
184
|
+
version: '0'
|
185
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
186
|
+
requirements:
|
187
|
+
- - ">="
|
188
|
+
- !ruby/object:Gem::Version
|
189
|
+
version: '0'
|
190
|
+
requirements: []
|
191
|
+
rubyforge_project:
|
192
|
+
rubygems_version: 2.5.1
|
193
|
+
signing_key:
|
194
|
+
specification_version: 4
|
195
|
+
summary: Downloads Blackboard Cartridges
|
196
|
+
test_files: []
|