mwcrawler 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/mwcrawler/classes.rb +13 -11
- data/lib/mwcrawler/courses.rb +3 -1
- data/lib/mwcrawler/crawler.rb +5 -1
- data/lib/mwcrawler/curriculum.rb +4 -2
- data/lib/mwcrawler/departments.rb +3 -1
- data/lib/mwcrawler/subjects.rb +7 -5
- data/lib/mwcrawler/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: f261cbbdfe2fcc489def8bbc14a36967ef454190e201b5c1f2c62c160a602ccc
|
|
4
|
+
data.tar.gz: 37253ef7297d23721b6f2a006568227581a2b3423032c98ba431b364d737ec25
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: '02422271583bc57dbfad728ef0897b573fde92020c65bc28bcf1dea3c552bf41cb09f211ed150b4c70196355b70334084d379d892fc91a99594d12af7112e8cf'
|
|
7
|
+
data.tar.gz: e9f8e1ee8b32cd646e5cfb06fa1de71c273518ae3c75fa5338b1c240e29d2cc6d84bba87ed25e8b9a09e6c08f7429d28ebcf11a08bc44f3dc30251851ede7547
|
data/Gemfile.lock
CHANGED
data/lib/mwcrawler/classes.rb
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
module Mwcrawler
|
|
4
4
|
# Scraps Classes by campus
|
|
5
5
|
module Classes
|
|
6
|
-
def self.scrap(department_code)
|
|
6
|
+
def self.scrap(department_code, options)
|
|
7
7
|
courses_links = scrap_courses_links(department_code)
|
|
8
8
|
rows = []
|
|
9
9
|
courses_links.each do |course_link|
|
|
@@ -12,13 +12,15 @@ module Mwcrawler
|
|
|
12
12
|
rows
|
|
13
13
|
end
|
|
14
14
|
|
|
15
|
-
|
|
15
|
+
private
|
|
16
|
+
|
|
17
|
+
def self.scrap_courses_links(department_code)
|
|
16
18
|
page = Helpers.set_crawler(department_code, 'graduacao/oferta_dis.aspx?cod=', exact: true)
|
|
17
19
|
page.css('#datatable tr td:nth-child(2) a')
|
|
18
20
|
.map { |link| link['href'] }
|
|
19
21
|
end
|
|
20
22
|
|
|
21
|
-
|
|
23
|
+
def self.scrap_classes(course_link)
|
|
22
24
|
rows = []
|
|
23
25
|
|
|
24
26
|
page = Helpers.set_crawler(course_link, 'graduacao/', exact: true)
|
|
@@ -31,7 +33,7 @@ module Mwcrawler
|
|
|
31
33
|
end
|
|
32
34
|
rows
|
|
33
35
|
end
|
|
34
|
-
|
|
36
|
+
def self.class_row_init(page, name)
|
|
35
37
|
{ department: page.css('#datatable tr:first-child a').text,
|
|
36
38
|
code: page.css('#datatable')[0].css('tr:nth-child(2) td').text.to_i,
|
|
37
39
|
course_code: scrap_course_code(page),
|
|
@@ -39,18 +41,18 @@ module Mwcrawler
|
|
|
39
41
|
name: name }
|
|
40
42
|
end
|
|
41
43
|
|
|
42
|
-
|
|
44
|
+
def self.scrap_course_code(page)
|
|
43
45
|
course_uri = page.css('#datatable')[0].css('tr:nth-child(3) td a').first['href']
|
|
44
46
|
Helpers.uri_query_params(course_uri)['cod'].to_i
|
|
45
47
|
end
|
|
46
48
|
|
|
47
|
-
|
|
49
|
+
def self.scrap_credit_hash(page)
|
|
48
50
|
credit_string = page.css('#datatable')[0].css('tr:nth-child(4) td').text
|
|
49
51
|
credits = credit_string.split('-').map(&:to_i)
|
|
50
52
|
{ theory: credits[0], practical: credits[1], extension: credits[2], study: credits[3] }
|
|
51
53
|
end
|
|
52
54
|
|
|
53
|
-
|
|
55
|
+
def self.scrap_row(row_init, page, count)
|
|
54
56
|
row = row_init
|
|
55
57
|
row.merge(scrap_vacancies(page, count))
|
|
56
58
|
# HORARIOS
|
|
@@ -60,7 +62,7 @@ module Mwcrawler
|
|
|
60
62
|
row
|
|
61
63
|
end
|
|
62
64
|
|
|
63
|
-
|
|
65
|
+
def self.scrap_schedules(page, count)
|
|
64
66
|
schedules = page.css('.tabela-oferta')[count]
|
|
65
67
|
.css('tr td:nth-child(4) .table')
|
|
66
68
|
.css('td').map(&:text)
|
|
@@ -68,7 +70,7 @@ module Mwcrawler
|
|
|
68
70
|
Helpers.format_hours(schedules)
|
|
69
71
|
end
|
|
70
72
|
|
|
71
|
-
|
|
73
|
+
def self.scrap_teachers(page, count)
|
|
72
74
|
teachers = page.css('.tabela-oferta')[count]
|
|
73
75
|
.css('tr td:nth-child(5) td')
|
|
74
76
|
.map(&:text)
|
|
@@ -76,7 +78,7 @@ module Mwcrawler
|
|
|
76
78
|
Helpers.format_teachers(teachers)
|
|
77
79
|
end
|
|
78
80
|
|
|
79
|
-
|
|
81
|
+
def self.scrap_vacancies(page, count)
|
|
80
82
|
{
|
|
81
83
|
vacancies_total: scrap_vacancy(1, page, count),
|
|
82
84
|
vacancies_occupied: scrap_vacancy(2, page, count),
|
|
@@ -84,7 +86,7 @@ module Mwcrawler
|
|
|
84
86
|
}
|
|
85
87
|
end
|
|
86
88
|
|
|
87
|
-
|
|
89
|
+
def self.scrap_vacancy(vacancy_row, page, count)
|
|
88
90
|
page.css('.tabela-oferta')[count]
|
|
89
91
|
.css(".tabela-vagas tr:nth-child(#{vacancy_row}) td:nth-child(3)").text
|
|
90
92
|
end
|
data/lib/mwcrawler/courses.rb
CHANGED
data/lib/mwcrawler/crawler.rb
CHANGED
|
@@ -7,7 +7,6 @@ module Mwcrawler
|
|
|
7
7
|
|
|
8
8
|
SCRAPPERS = {
|
|
9
9
|
courses: Courses,
|
|
10
|
-
classes: Classes,
|
|
11
10
|
departments: Departments
|
|
12
11
|
}.freeze
|
|
13
12
|
|
|
@@ -18,6 +17,11 @@ module Mwcrawler
|
|
|
18
17
|
end
|
|
19
18
|
end
|
|
20
19
|
|
|
20
|
+
def classes(department_code, options = { log: false })
|
|
21
|
+
Options.init(options)
|
|
22
|
+
Classes.scrap department_code, options
|
|
23
|
+
end
|
|
24
|
+
|
|
21
25
|
def subjects(department, options = { log: false })
|
|
22
26
|
Options.init(options)
|
|
23
27
|
Subjects.scrap department, options
|
data/lib/mwcrawler/curriculum.rb
CHANGED
|
@@ -6,14 +6,16 @@ module Mwcrawler
|
|
|
6
6
|
def self.scrap(code)
|
|
7
7
|
rows = []
|
|
8
8
|
page = Helpers.set_crawler(code, 'graduacao/curso_dados.aspx?cod=', exact: true)
|
|
9
|
-
curriculums = page.css('.table-responsive h4').map { |item| item.children[0]
|
|
9
|
+
curriculums = page.css('.table-responsive h4').map { |item| item.children[0]&.text }
|
|
10
10
|
page.css('.table-responsive .table').each do |table|
|
|
11
11
|
rows << scrap_row(curriculums.shift, table)
|
|
12
12
|
end
|
|
13
13
|
rows
|
|
14
14
|
end
|
|
15
15
|
|
|
16
|
-
|
|
16
|
+
private
|
|
17
|
+
|
|
18
|
+
def self.scrap_row(curriculum_name, table)
|
|
17
19
|
row = {}
|
|
18
20
|
row['name'] = curriculum_name
|
|
19
21
|
row['degree'] = table.css('tr:first td').text
|
data/lib/mwcrawler/subjects.rb
CHANGED
|
@@ -13,24 +13,26 @@ module Mwcrawler
|
|
|
13
13
|
end
|
|
14
14
|
end
|
|
15
15
|
|
|
16
|
-
|
|
16
|
+
private
|
|
17
|
+
|
|
18
|
+
def self.subject_by_department(department)
|
|
17
19
|
page = Helpers.set_crawler(department, 'graduacao/oferta_dis.aspx?cod=', exact: true)
|
|
18
20
|
scrap_row(department, page)
|
|
19
21
|
end
|
|
20
22
|
|
|
21
|
-
|
|
23
|
+
def self.subject_by_id(id)
|
|
22
24
|
page = Helpers.set_crawler(id, 'graduacao/oferta_dados.aspx?cod=', exact: true)
|
|
23
25
|
row_init_by_id(page)
|
|
24
26
|
end
|
|
25
27
|
|
|
26
|
-
|
|
28
|
+
def self.row_init_by_id(page)
|
|
27
29
|
{ code: page.css('#datatable')[0].css('tr:nth-child(2) td').text.to_i,
|
|
28
30
|
name: page.css('#datatable')[0].css('tr:nth-child(3) td').text,
|
|
29
31
|
department: page.css('#datatable tr:first-child a').first['href'].scan(/\d+/)[0].to_i,
|
|
30
32
|
level: 'graduação' }
|
|
31
33
|
end
|
|
32
34
|
|
|
33
|
-
|
|
35
|
+
def self.scrap_row(dep_code, page)
|
|
34
36
|
subjects = []
|
|
35
37
|
length = page.css('#datatable tr td:nth-child(1)').count
|
|
36
38
|
length.times do |i|
|
|
@@ -39,7 +41,7 @@ module Mwcrawler
|
|
|
39
41
|
subjects
|
|
40
42
|
end
|
|
41
43
|
|
|
42
|
-
|
|
44
|
+
def self.row_init_by_department(page, dep_code, index)
|
|
43
45
|
{ code: page.css('#datatable tr td:nth-child(1)').map(&:text)[index].to_i,
|
|
44
46
|
name: page.css('#datatable tr td:nth-child(2)').map(&:text)[index],
|
|
45
47
|
department: dep_code.to_i,
|
data/lib/mwcrawler/version.rb
CHANGED