mwcrawler 0.1.0 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/mwcrawler/classes.rb +13 -11
- data/lib/mwcrawler/courses.rb +3 -1
- data/lib/mwcrawler/crawler.rb +5 -1
- data/lib/mwcrawler/curriculum.rb +4 -2
- data/lib/mwcrawler/departments.rb +3 -1
- data/lib/mwcrawler/subjects.rb +7 -5
- data/lib/mwcrawler/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f261cbbdfe2fcc489def8bbc14a36967ef454190e201b5c1f2c62c160a602ccc
|
4
|
+
data.tar.gz: 37253ef7297d23721b6f2a006568227581a2b3423032c98ba431b364d737ec25
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '02422271583bc57dbfad728ef0897b573fde92020c65bc28bcf1dea3c552bf41cb09f211ed150b4c70196355b70334084d379d892fc91a99594d12af7112e8cf'
|
7
|
+
data.tar.gz: e9f8e1ee8b32cd646e5cfb06fa1de71c273518ae3c75fa5338b1c240e29d2cc6d84bba87ed25e8b9a09e6c08f7429d28ebcf11a08bc44f3dc30251851ede7547
|
data/Gemfile.lock
CHANGED
data/lib/mwcrawler/classes.rb
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
module Mwcrawler
|
4
4
|
# Scraps Classes by campus
|
5
5
|
module Classes
|
6
|
-
def self.scrap(department_code)
|
6
|
+
def self.scrap(department_code, options)
|
7
7
|
courses_links = scrap_courses_links(department_code)
|
8
8
|
rows = []
|
9
9
|
courses_links.each do |course_link|
|
@@ -12,13 +12,15 @@ module Mwcrawler
|
|
12
12
|
rows
|
13
13
|
end
|
14
14
|
|
15
|
-
|
15
|
+
private
|
16
|
+
|
17
|
+
def self.scrap_courses_links(department_code)
|
16
18
|
page = Helpers.set_crawler(department_code, 'graduacao/oferta_dis.aspx?cod=', exact: true)
|
17
19
|
page.css('#datatable tr td:nth-child(2) a')
|
18
20
|
.map { |link| link['href'] }
|
19
21
|
end
|
20
22
|
|
21
|
-
|
23
|
+
def self.scrap_classes(course_link)
|
22
24
|
rows = []
|
23
25
|
|
24
26
|
page = Helpers.set_crawler(course_link, 'graduacao/', exact: true)
|
@@ -31,7 +33,7 @@ module Mwcrawler
|
|
31
33
|
end
|
32
34
|
rows
|
33
35
|
end
|
34
|
-
|
36
|
+
def self.class_row_init(page, name)
|
35
37
|
{ department: page.css('#datatable tr:first-child a').text,
|
36
38
|
code: page.css('#datatable')[0].css('tr:nth-child(2) td').text.to_i,
|
37
39
|
course_code: scrap_course_code(page),
|
@@ -39,18 +41,18 @@ module Mwcrawler
|
|
39
41
|
name: name }
|
40
42
|
end
|
41
43
|
|
42
|
-
|
44
|
+
def self.scrap_course_code(page)
|
43
45
|
course_uri = page.css('#datatable')[0].css('tr:nth-child(3) td a').first['href']
|
44
46
|
Helpers.uri_query_params(course_uri)['cod'].to_i
|
45
47
|
end
|
46
48
|
|
47
|
-
|
49
|
+
def self.scrap_credit_hash(page)
|
48
50
|
credit_string = page.css('#datatable')[0].css('tr:nth-child(4) td').text
|
49
51
|
credits = credit_string.split('-').map(&:to_i)
|
50
52
|
{ theory: credits[0], practical: credits[1], extension: credits[2], study: credits[3] }
|
51
53
|
end
|
52
54
|
|
53
|
-
|
55
|
+
def self.scrap_row(row_init, page, count)
|
54
56
|
row = row_init
|
55
57
|
row.merge(scrap_vacancies(page, count))
|
56
58
|
# HORARIOS
|
@@ -60,7 +62,7 @@ module Mwcrawler
|
|
60
62
|
row
|
61
63
|
end
|
62
64
|
|
63
|
-
|
65
|
+
def self.scrap_schedules(page, count)
|
64
66
|
schedules = page.css('.tabela-oferta')[count]
|
65
67
|
.css('tr td:nth-child(4) .table')
|
66
68
|
.css('td').map(&:text)
|
@@ -68,7 +70,7 @@ module Mwcrawler
|
|
68
70
|
Helpers.format_hours(schedules)
|
69
71
|
end
|
70
72
|
|
71
|
-
|
73
|
+
def self.scrap_teachers(page, count)
|
72
74
|
teachers = page.css('.tabela-oferta')[count]
|
73
75
|
.css('tr td:nth-child(5) td')
|
74
76
|
.map(&:text)
|
@@ -76,7 +78,7 @@ module Mwcrawler
|
|
76
78
|
Helpers.format_teachers(teachers)
|
77
79
|
end
|
78
80
|
|
79
|
-
|
81
|
+
def self.scrap_vacancies(page, count)
|
80
82
|
{
|
81
83
|
vacancies_total: scrap_vacancy(1, page, count),
|
82
84
|
vacancies_occupied: scrap_vacancy(2, page, count),
|
@@ -84,7 +86,7 @@ module Mwcrawler
|
|
84
86
|
}
|
85
87
|
end
|
86
88
|
|
87
|
-
|
89
|
+
def self.scrap_vacancy(vacancy_row, page, count)
|
88
90
|
page.css('.tabela-oferta')[count]
|
89
91
|
.css(".tabela-vagas tr:nth-child(#{vacancy_row}) td:nth-child(3)").text
|
90
92
|
end
|
data/lib/mwcrawler/courses.rb
CHANGED
data/lib/mwcrawler/crawler.rb
CHANGED
@@ -7,7 +7,6 @@ module Mwcrawler
|
|
7
7
|
|
8
8
|
SCRAPPERS = {
|
9
9
|
courses: Courses,
|
10
|
-
classes: Classes,
|
11
10
|
departments: Departments
|
12
11
|
}.freeze
|
13
12
|
|
@@ -18,6 +17,11 @@ module Mwcrawler
|
|
18
17
|
end
|
19
18
|
end
|
20
19
|
|
20
|
+
def classes(department_code, options = { log: false })
|
21
|
+
Options.init(options)
|
22
|
+
Classes.scrap department_code, options
|
23
|
+
end
|
24
|
+
|
21
25
|
def subjects(department, options = { log: false })
|
22
26
|
Options.init(options)
|
23
27
|
Subjects.scrap department, options
|
data/lib/mwcrawler/curriculum.rb
CHANGED
@@ -6,14 +6,16 @@ module Mwcrawler
|
|
6
6
|
def self.scrap(code)
|
7
7
|
rows = []
|
8
8
|
page = Helpers.set_crawler(code, 'graduacao/curso_dados.aspx?cod=', exact: true)
|
9
|
-
curriculums = page.css('.table-responsive h4').map { |item| item.children[0]
|
9
|
+
curriculums = page.css('.table-responsive h4').map { |item| item.children[0]&.text }
|
10
10
|
page.css('.table-responsive .table').each do |table|
|
11
11
|
rows << scrap_row(curriculums.shift, table)
|
12
12
|
end
|
13
13
|
rows
|
14
14
|
end
|
15
15
|
|
16
|
-
|
16
|
+
private
|
17
|
+
|
18
|
+
def self.scrap_row(curriculum_name, table)
|
17
19
|
row = {}
|
18
20
|
row['name'] = curriculum_name
|
19
21
|
row['degree'] = table.css('tr:first td').text
|
data/lib/mwcrawler/subjects.rb
CHANGED
@@ -13,24 +13,26 @@ module Mwcrawler
|
|
13
13
|
end
|
14
14
|
end
|
15
15
|
|
16
|
-
|
16
|
+
private
|
17
|
+
|
18
|
+
def self.subject_by_department(department)
|
17
19
|
page = Helpers.set_crawler(department, 'graduacao/oferta_dis.aspx?cod=', exact: true)
|
18
20
|
scrap_row(department, page)
|
19
21
|
end
|
20
22
|
|
21
|
-
|
23
|
+
def self.subject_by_id(id)
|
22
24
|
page = Helpers.set_crawler(id, 'graduacao/oferta_dados.aspx?cod=', exact: true)
|
23
25
|
row_init_by_id(page)
|
24
26
|
end
|
25
27
|
|
26
|
-
|
28
|
+
def self.row_init_by_id(page)
|
27
29
|
{ code: page.css('#datatable')[0].css('tr:nth-child(2) td').text.to_i,
|
28
30
|
name: page.css('#datatable')[0].css('tr:nth-child(3) td').text,
|
29
31
|
department: page.css('#datatable tr:first-child a').first['href'].scan(/\d+/)[0].to_i,
|
30
32
|
level: 'graduação' }
|
31
33
|
end
|
32
34
|
|
33
|
-
|
35
|
+
def self.scrap_row(dep_code, page)
|
34
36
|
subjects = []
|
35
37
|
length = page.css('#datatable tr td:nth-child(1)').count
|
36
38
|
length.times do |i|
|
@@ -39,7 +41,7 @@ module Mwcrawler
|
|
39
41
|
subjects
|
40
42
|
end
|
41
43
|
|
42
|
-
|
44
|
+
def self.row_init_by_department(page, dep_code, index)
|
43
45
|
{ code: page.css('#datatable tr td:nth-child(1)').map(&:text)[index].to_i,
|
44
46
|
name: page.css('#datatable tr td:nth-child(2)').map(&:text)[index],
|
45
47
|
department: dep_code.to_i,
|
data/lib/mwcrawler/version.rb
CHANGED