@mvegter/scrapedin 1.0.26 → 1.0.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/dependabot.yml +11 -0
- package/LICENSE +201 -201
- package/README.md +36 -36
- package/package.json +34 -43
- package/src/company/company.js +35 -35
- package/src/company/companyScraperTemplate.js +30 -30
- package/src/logger.js +29 -29
- package/src/login.js +68 -69
- package/src/openPage.js +33 -33
- package/src/package.js +11 -11
- package/src/profile/cleanProfileData.js +99 -94
- package/src/profile/contactInfo.js +48 -48
- package/src/profile/profile.js +81 -81
- package/src/profile/profileScraperTemplate.js +189 -190
- package/src/profile/scrapAccomplishmentPanel.js +17 -17
- package/src/profile/scrollToPageBottom.js +24 -24
- package/src/profile/seeMoreButtons.js +42 -42
- package/src/scrapSection.js +49 -49
- package/src/scrapedin.js +41 -41
- package/.travis.yml +0 -5
- package/src/scrapedin.test.js +0 -338
package/src/profile/profile.js
CHANGED
|
@@ -1,81 +1,81 @@
|
|
|
1
|
-
const openPage = require('../openPage')
|
|
2
|
-
const scrapSection = require('../scrapSection')
|
|
3
|
-
const scrapAccomplishmentPanel = require('./scrapAccomplishmentPanel')
|
|
4
|
-
const scrollToPageBottom = require('./scrollToPageBottom')
|
|
5
|
-
const seeMoreButtons = require('./seeMoreButtons')
|
|
6
|
-
const contactInfo = require('./contactInfo')
|
|
7
|
-
const template = require('./profileScraperTemplate')
|
|
8
|
-
const cleanProfileData = require('./cleanProfileData')
|
|
9
|
-
|
|
10
|
-
const logger = require('../logger')(__filename)
|
|
11
|
-
|
|
12
|
-
module.exports = async (browser, cookies, url, waitTimeToScrapMs = 500, hasToGetContactInfo = false, puppeteerAuthenticate = undefined) => {
|
|
13
|
-
logger.info(`starting scraping url: ${url}`)
|
|
14
|
-
|
|
15
|
-
const page = await openPage({ browser, cookies, url, puppeteerAuthenticate })
|
|
16
|
-
const profilePageIndicatorSelector = '.pv-profile-section'
|
|
17
|
-
await page.waitForSelector(profilePageIndicatorSelector, { timeout: 5000 })
|
|
18
|
-
.catch(() => {
|
|
19
|
-
//why doesn't throw error instead of continuing scraping?
|
|
20
|
-
//because it can be just a false negative meaning LinkedIn only changed that selector but everything else is fine :)
|
|
21
|
-
logger.warn('profile selector was not found')
|
|
22
|
-
})
|
|
23
|
-
|
|
24
|
-
logger.info('scrolling page to the bottom')
|
|
25
|
-
await scrollToPageBottom(page)
|
|
26
|
-
|
|
27
|
-
if(waitTimeToScrapMs) {
|
|
28
|
-
logger.info(`applying 1st delay`)
|
|
29
|
-
await new Promise((resolve) => { setTimeout(() => { resolve() }, waitTimeToScrapMs / 2)})
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
await seeMoreButtons.clickAll(page)
|
|
33
|
-
|
|
34
|
-
if(waitTimeToScrapMs) {
|
|
35
|
-
logger.info(`applying 2nd (and last) delay`)
|
|
36
|
-
await new Promise((resolve) => { setTimeout(() => { resolve() }, waitTimeToScrapMs / 2)})
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
const [profile] = await scrapSection(page, template.profile)
|
|
40
|
-
const [about] = await scrapSection(page, template.about)
|
|
41
|
-
const positions = await scrapSection(page, template.positions)
|
|
42
|
-
const educations = await scrapSection(page, template.educations)
|
|
43
|
-
const [recommendationsCount] = await scrapSection(page, template.recommendationsCount)
|
|
44
|
-
const recommendationsReceived = await scrapSection(page, template.recommendationsReceived)
|
|
45
|
-
const recommendationsGiven = await scrapSection(page, template.recommendationsGiven)
|
|
46
|
-
const skills = await scrapSection(page, template.skills)
|
|
47
|
-
const accomplishments = await scrapSection(page, template.accomplishments)
|
|
48
|
-
const courses = await scrapAccomplishmentPanel(page, 'courses')
|
|
49
|
-
const languages = await scrapAccomplishmentPanel(page, 'languages')
|
|
50
|
-
const projects = await scrapAccomplishmentPanel(page, 'projects')
|
|
51
|
-
const volunteerExperience = await scrapSection(page, template.volunteerExperience)
|
|
52
|
-
const peopleAlsoViewed = await scrapSection(page, template.peopleAlsoViewed)
|
|
53
|
-
const contact = hasToGetContactInfo ? await contactInfo(page) : []
|
|
54
|
-
|
|
55
|
-
await page.close()
|
|
56
|
-
logger.info(`finished scraping url: ${url}`)
|
|
57
|
-
|
|
58
|
-
const rawProfile = {
|
|
59
|
-
profile,
|
|
60
|
-
about,
|
|
61
|
-
positions,
|
|
62
|
-
educations,
|
|
63
|
-
skills,
|
|
64
|
-
recommendations: {
|
|
65
|
-
givenCount: recommendationsCount ? recommendationsCount.given : "0",
|
|
66
|
-
receivedCount: recommendationsCount ? recommendationsCount.received : "0",
|
|
67
|
-
given: recommendationsReceived,
|
|
68
|
-
received: recommendationsGiven
|
|
69
|
-
},
|
|
70
|
-
accomplishments,
|
|
71
|
-
courses,
|
|
72
|
-
languages,
|
|
73
|
-
projects,
|
|
74
|
-
peopleAlsoViewed,
|
|
75
|
-
volunteerExperience,
|
|
76
|
-
contact
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
const cleanedProfile = cleanProfileData(rawProfile)
|
|
80
|
-
return cleanedProfile
|
|
81
|
-
}
|
|
1
|
+
const openPage = require('../openPage')
|
|
2
|
+
const scrapSection = require('../scrapSection')
|
|
3
|
+
const scrapAccomplishmentPanel = require('./scrapAccomplishmentPanel')
|
|
4
|
+
const scrollToPageBottom = require('./scrollToPageBottom')
|
|
5
|
+
const seeMoreButtons = require('./seeMoreButtons')
|
|
6
|
+
const contactInfo = require('./contactInfo')
|
|
7
|
+
const template = require('./profileScraperTemplate')
|
|
8
|
+
const cleanProfileData = require('./cleanProfileData')
|
|
9
|
+
|
|
10
|
+
const logger = require('../logger')(__filename)
|
|
11
|
+
|
|
12
|
+
module.exports = async (browser, cookies, url, waitTimeToScrapMs = 500, hasToGetContactInfo = false, puppeteerAuthenticate = undefined) => {
|
|
13
|
+
logger.info(`starting scraping url: ${url}`)
|
|
14
|
+
|
|
15
|
+
const page = await openPage({ browser, cookies, url, puppeteerAuthenticate })
|
|
16
|
+
const profilePageIndicatorSelector = '.pv-profile-section'
|
|
17
|
+
await page.waitForSelector(profilePageIndicatorSelector, { timeout: 5000 })
|
|
18
|
+
.catch(() => {
|
|
19
|
+
//why doesn't throw error instead of continuing scraping?
|
|
20
|
+
//because it can be just a false negative meaning LinkedIn only changed that selector but everything else is fine :)
|
|
21
|
+
logger.warn('profile selector was not found')
|
|
22
|
+
})
|
|
23
|
+
|
|
24
|
+
logger.info('scrolling page to the bottom')
|
|
25
|
+
await scrollToPageBottom(page)
|
|
26
|
+
|
|
27
|
+
if(waitTimeToScrapMs) {
|
|
28
|
+
logger.info(`applying 1st delay`)
|
|
29
|
+
await new Promise((resolve) => { setTimeout(() => { resolve() }, waitTimeToScrapMs / 2)})
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
await seeMoreButtons.clickAll(page)
|
|
33
|
+
|
|
34
|
+
if(waitTimeToScrapMs) {
|
|
35
|
+
logger.info(`applying 2nd (and last) delay`)
|
|
36
|
+
await new Promise((resolve) => { setTimeout(() => { resolve() }, waitTimeToScrapMs / 2)})
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const [profile] = await scrapSection(page, template.profile)
|
|
40
|
+
const [about] = await scrapSection(page, template.about)
|
|
41
|
+
const positions = await scrapSection(page, template.positions)
|
|
42
|
+
const educations = await scrapSection(page, template.educations)
|
|
43
|
+
const [recommendationsCount] = await scrapSection(page, template.recommendationsCount)
|
|
44
|
+
const recommendationsReceived = await scrapSection(page, template.recommendationsReceived)
|
|
45
|
+
const recommendationsGiven = await scrapSection(page, template.recommendationsGiven)
|
|
46
|
+
const skills = await scrapSection(page, template.skills)
|
|
47
|
+
const accomplishments = await scrapSection(page, template.accomplishments)
|
|
48
|
+
const courses = await scrapAccomplishmentPanel(page, 'courses')
|
|
49
|
+
const languages = await scrapAccomplishmentPanel(page, 'languages')
|
|
50
|
+
const projects = await scrapAccomplishmentPanel(page, 'projects')
|
|
51
|
+
const volunteerExperience = await scrapSection(page, template.volunteerExperience)
|
|
52
|
+
const peopleAlsoViewed = await scrapSection(page, template.peopleAlsoViewed)
|
|
53
|
+
const contact = hasToGetContactInfo ? await contactInfo(page) : []
|
|
54
|
+
|
|
55
|
+
await page.close()
|
|
56
|
+
logger.info(`finished scraping url: ${url}`)
|
|
57
|
+
|
|
58
|
+
const rawProfile = {
|
|
59
|
+
profile,
|
|
60
|
+
about,
|
|
61
|
+
positions,
|
|
62
|
+
educations,
|
|
63
|
+
skills,
|
|
64
|
+
recommendations: {
|
|
65
|
+
givenCount: recommendationsCount ? recommendationsCount.given : "0",
|
|
66
|
+
receivedCount: recommendationsCount ? recommendationsCount.received : "0",
|
|
67
|
+
given: recommendationsReceived,
|
|
68
|
+
received: recommendationsGiven
|
|
69
|
+
},
|
|
70
|
+
accomplishments,
|
|
71
|
+
courses,
|
|
72
|
+
languages,
|
|
73
|
+
projects,
|
|
74
|
+
peopleAlsoViewed,
|
|
75
|
+
volunteerExperience,
|
|
76
|
+
contact
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
const cleanedProfile = cleanProfileData(rawProfile)
|
|
80
|
+
return cleanedProfile
|
|
81
|
+
}
|
|
@@ -1,190 +1,189 @@
|
|
|
1
|
-
const profileSelector = '.core-rail > *:first-child section >'
|
|
2
|
-
|
|
3
|
-
const template = {
|
|
4
|
-
profile: {
|
|
5
|
-
selector: '.pv-top-card',
|
|
6
|
-
fields: {
|
|
7
|
-
name: `.pv-text-details__left-panel:first-child h1`,
|
|
8
|
-
headline: `.pv-text-details__left-panel:first-child .text-body-medium`,
|
|
9
|
-
location: `.pv-text-details__left-panel:first-child .pb2`,
|
|
10
|
-
connections: `.pv-top-card--list span`,
|
|
11
|
-
imageurl: {
|
|
12
|
-
selector: `img.pv-top-card__photo`,
|
|
13
|
-
attribute: 'src'
|
|
14
|
-
}
|
|
15
|
-
}
|
|
16
|
-
},
|
|
17
|
-
about: {
|
|
18
|
-
selector: '.pv-about-section',
|
|
19
|
-
fields: {
|
|
20
|
-
text: 'div'
|
|
21
|
-
}
|
|
22
|
-
},
|
|
23
|
-
positions: {
|
|
24
|
-
selector: '
|
|
25
|
-
fields: {
|
|
26
|
-
title: 'h3',
|
|
27
|
-
link: {
|
|
28
|
-
selector: 'a',
|
|
29
|
-
attribute: 'href',
|
|
30
|
-
},
|
|
31
|
-
url: {
|
|
32
|
-
selector: 'a',
|
|
33
|
-
attribute: 'href'
|
|
34
|
-
},
|
|
35
|
-
companyName: '.
|
|
36
|
-
location: '.pv-entity__location span:last-child',
|
|
37
|
-
description: '.pv-entity__description',
|
|
38
|
-
date1: '.pv-entity__date-range span:last-child',
|
|
39
|
-
date2: '.pv-entity__bullet-item-v2',
|
|
40
|
-
roles: {
|
|
41
|
-
selector: 'li',
|
|
42
|
-
hasChildrenFields: true,
|
|
43
|
-
fields: {
|
|
44
|
-
title: '
|
|
45
|
-
description: '.pv-entity__description',
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
module.exports = template
|
|
1
|
+
const profileSelector = '.core-rail > *:first-child section >'
|
|
2
|
+
|
|
3
|
+
const template = {
|
|
4
|
+
profile: {
|
|
5
|
+
selector: '.pv-top-card',
|
|
6
|
+
fields: {
|
|
7
|
+
name: `.pv-text-details__left-panel:first-child h1`,
|
|
8
|
+
headline: `.pv-text-details__left-panel:first-child .text-body-medium`,
|
|
9
|
+
location: `.pv-text-details__left-panel:first-child .pb2 span`,
|
|
10
|
+
connections: `.pv-top-card--list span`,
|
|
11
|
+
imageurl: {
|
|
12
|
+
selector: `img.pv-top-card__photo`,
|
|
13
|
+
attribute: 'src'
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
},
|
|
17
|
+
about: {
|
|
18
|
+
selector: '.pv-about-section',
|
|
19
|
+
fields: {
|
|
20
|
+
text: 'div'
|
|
21
|
+
}
|
|
22
|
+
},
|
|
23
|
+
positions: {
|
|
24
|
+
selector: 'section[id$="EXPERIENCE-en-US"] li.artdeco-list__item',
|
|
25
|
+
fields: {
|
|
26
|
+
title: 'h3',
|
|
27
|
+
link: {
|
|
28
|
+
selector: 'a',
|
|
29
|
+
attribute: 'href',
|
|
30
|
+
},
|
|
31
|
+
url: {
|
|
32
|
+
selector: 'a',
|
|
33
|
+
attribute: 'href'
|
|
34
|
+
},
|
|
35
|
+
companyName: 'span.t-bold span',
|
|
36
|
+
location: '.pv-entity__location span:last-child',
|
|
37
|
+
description: '.pv-entity__description',
|
|
38
|
+
date1: '.pv-entity__date-range span:last-child',
|
|
39
|
+
date2: '.pv-entity__bullet-item-v2',
|
|
40
|
+
roles: {
|
|
41
|
+
selector: 'li',
|
|
42
|
+
hasChildrenFields: true,
|
|
43
|
+
fields: {
|
|
44
|
+
title: 'span.t-bold span:last-child',
|
|
45
|
+
description: '.pv-entity__description',
|
|
46
|
+
date: '.t-14.t-normal:last-child span:first-child',
|
|
47
|
+
location: '.pv-entity__location span:last-child'
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
},
|
|
52
|
+
educations: {
|
|
53
|
+
selector: '#education-section li',
|
|
54
|
+
fields: {
|
|
55
|
+
title: 'h3',
|
|
56
|
+
degree: 'span[class=pv-entity__comma-item]',
|
|
57
|
+
url: {
|
|
58
|
+
selector: 'a',
|
|
59
|
+
attribute: 'href'
|
|
60
|
+
},
|
|
61
|
+
fieldOfStudy: 'p.pv-entity__fos span:nth-child(2)',
|
|
62
|
+
date1: '.pv-entity__dates time:nth-child(1)',
|
|
63
|
+
date2: '.pv-entity__dates time:nth-child(2)',
|
|
64
|
+
description: '.pv-entity__description'
|
|
65
|
+
}
|
|
66
|
+
},
|
|
67
|
+
skills: {
|
|
68
|
+
selector: '.pv-skill-category-entity__skill-wrapper',
|
|
69
|
+
fields: {
|
|
70
|
+
title: '.pv-skill-category-entity__name-text',
|
|
71
|
+
count: '.pv-skill-category-entity__endorsement-count'
|
|
72
|
+
}
|
|
73
|
+
},
|
|
74
|
+
recommendationsCount: {
|
|
75
|
+
selector: '.recommendations-inlining',
|
|
76
|
+
fields: {
|
|
77
|
+
received: '.artdeco-tab:nth-child(1)',
|
|
78
|
+
given: '.artdeco-tab:nth-child(2)'
|
|
79
|
+
}
|
|
80
|
+
},
|
|
81
|
+
recommendationsReceived: {
|
|
82
|
+
selector: '.recommendations-inlining',
|
|
83
|
+
fields: {
|
|
84
|
+
user: {
|
|
85
|
+
selector: '.pv-recommendation-entity__member',
|
|
86
|
+
attribute: 'href'
|
|
87
|
+
},
|
|
88
|
+
text: 'blockquote.pv-recommendation-entity__text',
|
|
89
|
+
profileImage: {
|
|
90
|
+
selector: 'a img',
|
|
91
|
+
attribute: 'src'
|
|
92
|
+
},
|
|
93
|
+
name: {
|
|
94
|
+
selector: 'a h3'
|
|
95
|
+
},
|
|
96
|
+
userDescription: {
|
|
97
|
+
selector: '.pv-recommendation-entity__headline'
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
},
|
|
101
|
+
recommendationsGiven: {
|
|
102
|
+
selector: '.artdeco-tabpanel li.pv-recommendation-entity',
|
|
103
|
+
fields: {
|
|
104
|
+
user: {
|
|
105
|
+
selector: '.pv-recommendation-entity__member',
|
|
106
|
+
attribute: 'href'
|
|
107
|
+
},
|
|
108
|
+
text: 'blockquote.pv-recommendation-entity__text',
|
|
109
|
+
profileImage: {
|
|
110
|
+
selector: 'a img',
|
|
111
|
+
attribute: 'src'
|
|
112
|
+
},
|
|
113
|
+
name: {
|
|
114
|
+
selector: 'a h3'
|
|
115
|
+
},
|
|
116
|
+
userDescription: {
|
|
117
|
+
selector: '.pv-recommendation-entity__headline'
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
},
|
|
121
|
+
accomplishments: {
|
|
122
|
+
selector: '.pv-accomplishments-section > div',
|
|
123
|
+
fields: {
|
|
124
|
+
count: 'h3 span:last-child',
|
|
125
|
+
title: '.pv-accomplishments-block__title',
|
|
126
|
+
items: {
|
|
127
|
+
selector: 'li',
|
|
128
|
+
isMultipleFields: true
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
},
|
|
132
|
+
peopleAlsoViewed: {
|
|
133
|
+
selector: 'li.pv-browsemap-section__member-container',
|
|
134
|
+
fields: {
|
|
135
|
+
user: {
|
|
136
|
+
selector: 'a',
|
|
137
|
+
attribute: 'href'
|
|
138
|
+
},
|
|
139
|
+
text: 'p',
|
|
140
|
+
profileImage: {
|
|
141
|
+
selector: 'a img',
|
|
142
|
+
attribute: 'src'
|
|
143
|
+
},
|
|
144
|
+
name: {
|
|
145
|
+
selector: '.name'
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
},
|
|
149
|
+
volunteerExperience: {
|
|
150
|
+
selector: 'section.volunteering-section li',
|
|
151
|
+
fields: {
|
|
152
|
+
title: 'h3',
|
|
153
|
+
experience: 'span[class=pv-entity__secondary-title]',
|
|
154
|
+
location: '.pv-entity__location span:nth-child(2)',
|
|
155
|
+
description: '.pv-volunteer-causes',
|
|
156
|
+
date1: '.pv-entity__date-range span:nth-child(2)',
|
|
157
|
+
date2: '.pv-entity__bullet-item'
|
|
158
|
+
}
|
|
159
|
+
},
|
|
160
|
+
courses: {
|
|
161
|
+
selector: '.pv-accomplishments-section',
|
|
162
|
+
fields: {
|
|
163
|
+
name: '.pv-accomplishment-entity__title',
|
|
164
|
+
year: '.pv-accomplishment-entity__course-number'
|
|
165
|
+
}
|
|
166
|
+
},
|
|
167
|
+
languages: {
|
|
168
|
+
selector: '.pv-accomplishments-block.languages li',
|
|
169
|
+
fields: {
|
|
170
|
+
name: '.pv-accomplishment-entity__title',
|
|
171
|
+
proficiency: '.pv-accomplishment-entity__proficiency',
|
|
172
|
+
}
|
|
173
|
+
},
|
|
174
|
+
projects: {
|
|
175
|
+
selector: '.pv-accomplishments-block.projects li',
|
|
176
|
+
fields: {
|
|
177
|
+
name: '.pv-accomplishment-entity__title',
|
|
178
|
+
date: '.pv-accomplishment-entity__date',
|
|
179
|
+
description: '.pv-accomplishment-entity__description',
|
|
180
|
+
link: {
|
|
181
|
+
selector: '.mt4',
|
|
182
|
+
attribute: 'href'
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
module.exports = template
|
|
@@ -1,18 +1,18 @@
|
|
|
1
|
-
const scrapSection = require('../scrapSection');
|
|
2
|
-
const template = require('./profileScraperTemplate');
|
|
3
|
-
|
|
4
|
-
const scrapAccomplishmentPanel = async (page, section) => {
|
|
5
|
-
const queryString = `.pv-accomplishments-block.${section} button`
|
|
6
|
-
|
|
7
|
-
const openingButton = await page.$(queryString);
|
|
8
|
-
|
|
9
|
-
if (openingButton) {
|
|
10
|
-
await page.evaluate((q) => {
|
|
11
|
-
document.querySelector(q).click();
|
|
12
|
-
}, queryString);
|
|
13
|
-
|
|
14
|
-
return scrapSection(page, template[section]);
|
|
15
|
-
}
|
|
16
|
-
};
|
|
17
|
-
|
|
1
|
+
const scrapSection = require('../scrapSection');
|
|
2
|
+
const template = require('./profileScraperTemplate');
|
|
3
|
+
|
|
4
|
+
const scrapAccomplishmentPanel = async (page, section) => {
|
|
5
|
+
const queryString = `.pv-accomplishments-block.${section} button`
|
|
6
|
+
|
|
7
|
+
const openingButton = await page.$(queryString);
|
|
8
|
+
|
|
9
|
+
if (openingButton) {
|
|
10
|
+
await page.evaluate((q) => {
|
|
11
|
+
document.querySelector(q).click();
|
|
12
|
+
}, queryString);
|
|
13
|
+
|
|
14
|
+
return scrapSection(page, template[section]);
|
|
15
|
+
}
|
|
16
|
+
};
|
|
17
|
+
|
|
18
18
|
module.exports = scrapAccomplishmentPanel;
|
|
@@ -1,24 +1,24 @@
|
|
|
1
|
-
const logger = require('../logger')(__filename)
|
|
2
|
-
|
|
3
|
-
module.exports = async (page) => {
|
|
4
|
-
const MAX_TIMES_TO_SCROLL = 25
|
|
5
|
-
const TIMEOUT_BETWEEN_SCROLLS = 500
|
|
6
|
-
const PAGE_BOTTOM_SELECTOR_STRING = '#expanded-footer'
|
|
7
|
-
|
|
8
|
-
for (let i = 0; i < MAX_TIMES_TO_SCROLL; i++) {
|
|
9
|
-
await page.evaluate(() => window.scrollBy(0, window.innerHeight))
|
|
10
|
-
|
|
11
|
-
const hasReachedEnd = await page.waitForSelector(PAGE_BOTTOM_SELECTOR_STRING, {
|
|
12
|
-
visible: true,
|
|
13
|
-
timeout: TIMEOUT_BETWEEN_SCROLLS
|
|
14
|
-
}).catch(() => {
|
|
15
|
-
logger.info(`scrolling to page bottom (${i + 1})`)
|
|
16
|
-
})
|
|
17
|
-
|
|
18
|
-
if (hasReachedEnd) {
|
|
19
|
-
return
|
|
20
|
-
}
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
logger.warn('page bottom not found')
|
|
24
|
-
}
|
|
1
|
+
const logger = require('../logger')(__filename)
|
|
2
|
+
|
|
3
|
+
module.exports = async (page) => {
|
|
4
|
+
const MAX_TIMES_TO_SCROLL = 25
|
|
5
|
+
const TIMEOUT_BETWEEN_SCROLLS = 500
|
|
6
|
+
const PAGE_BOTTOM_SELECTOR_STRING = '#expanded-footer'
|
|
7
|
+
|
|
8
|
+
for (let i = 0; i < MAX_TIMES_TO_SCROLL; i++) {
|
|
9
|
+
await page.evaluate(() => window.scrollBy(0, window.innerHeight))
|
|
10
|
+
|
|
11
|
+
const hasReachedEnd = await page.waitForSelector(PAGE_BOTTOM_SELECTOR_STRING, {
|
|
12
|
+
visible: true,
|
|
13
|
+
timeout: TIMEOUT_BETWEEN_SCROLLS
|
|
14
|
+
}).catch(() => {
|
|
15
|
+
logger.info(`scrolling to page bottom (${i + 1})`)
|
|
16
|
+
})
|
|
17
|
+
|
|
18
|
+
if (hasReachedEnd) {
|
|
19
|
+
return
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
logger.warn('page bottom not found')
|
|
24
|
+
}
|