@mvegter/scrapedin 1.0.28 → 1.0.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/dependabot.yml +11 -11
- package/LICENSE +201 -201
- package/README.md +36 -36
- package/package.json +34 -34
- package/src/company/company.js +35 -35
- package/src/company/companyScraperTemplate.js +30 -30
- package/src/logger.js +29 -29
- package/src/login.js +68 -68
- package/src/openPage.js +33 -33
- package/src/package.js +11 -11
- package/src/profile/cleanProfileData.js +94 -94
- package/src/profile/contactInfo.js +48 -48
- package/src/profile/profile.js +81 -81
- package/src/profile/profileScraperTemplate.js +190 -190
- package/src/profile/scrapAccomplishmentPanel.js +17 -17
- package/src/profile/scrollToPageBottom.js +24 -24
- package/src/profile/seeMoreButtons.js +42 -42
- package/src/scrapSection.js +49 -49
- package/src/scrapedin.js +41 -41
|
@@ -1,30 +1,30 @@
|
|
|
1
|
-
const template = {
|
|
2
|
-
profile: {
|
|
3
|
-
selector: '.org-top-card',
|
|
4
|
-
fields: {
|
|
5
|
-
name: `h1`,
|
|
6
|
-
headline: `p`,
|
|
7
|
-
imageurl: {
|
|
8
|
-
selector: `img.org-top-card-primary-content__logo`,
|
|
9
|
-
attribute: 'src'
|
|
10
|
-
}
|
|
11
|
-
}
|
|
12
|
-
},
|
|
13
|
-
about: {
|
|
14
|
-
selector: '.org-grid__core-rail--no-margin-left',
|
|
15
|
-
fields: {
|
|
16
|
-
overview: 'p',
|
|
17
|
-
types:{
|
|
18
|
-
selector: 'dl dt',
|
|
19
|
-
isMultipleFields: true
|
|
20
|
-
},
|
|
21
|
-
values:{
|
|
22
|
-
selector: 'dl dd:not(.org-page-details__employees-on-linkedin-count)',
|
|
23
|
-
isMultipleFields: true
|
|
24
|
-
}
|
|
25
|
-
}
|
|
26
|
-
}
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
module.exports = template
|
|
1
|
+
const template = {
|
|
2
|
+
profile: {
|
|
3
|
+
selector: '.org-top-card',
|
|
4
|
+
fields: {
|
|
5
|
+
name: `h1`,
|
|
6
|
+
headline: `p`,
|
|
7
|
+
imageurl: {
|
|
8
|
+
selector: `img.org-top-card-primary-content__logo`,
|
|
9
|
+
attribute: 'src'
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
},
|
|
13
|
+
about: {
|
|
14
|
+
selector: '.org-grid__core-rail--no-margin-left',
|
|
15
|
+
fields: {
|
|
16
|
+
overview: 'p',
|
|
17
|
+
types:{
|
|
18
|
+
selector: 'dl dt',
|
|
19
|
+
isMultipleFields: true
|
|
20
|
+
},
|
|
21
|
+
values:{
|
|
22
|
+
selector: 'dl dd:not(.org-page-details__employees-on-linkedin-count)',
|
|
23
|
+
isMultipleFields: true
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
module.exports = template
|
package/src/logger.js
CHANGED
|
@@ -1,29 +1,29 @@
|
|
|
1
|
-
const path = require('path')
|
|
2
|
-
const pkg = require('./package')
|
|
3
|
-
const winston = require('winston')
|
|
4
|
-
const logger = winston.createLogger({
|
|
5
|
-
format: winston.format.combine(
|
|
6
|
-
winston.format.splat(),
|
|
7
|
-
winston.format.simple(),
|
|
8
|
-
winston.format.timestamp(),
|
|
9
|
-
winston.format.colorize(),
|
|
10
|
-
winston.format.printf(info => `${pkg.name}: ${info.timestamp} ${info.level}: ${info.message}`)
|
|
11
|
-
),
|
|
12
|
-
transports: [new winston.transports.Console()]
|
|
13
|
-
})
|
|
14
|
-
|
|
15
|
-
const loggerWrapper = (absoluteFilePath) => {
|
|
16
|
-
const file = path.relative(__dirname, absoluteFilePath)
|
|
17
|
-
// Because this file is in the source code root folder, the above will make all paths relative to it: just the info needed for the log.
|
|
18
|
-
|
|
19
|
-
return {
|
|
20
|
-
info: (message) => logger.info(`[${file}] ${message}`),
|
|
21
|
-
warn: (message) => logger.warn(`[${file}] ${message}`),
|
|
22
|
-
error: (message, error) => logger.error(`[${file}] ${message}${error && error.stack ? error.stack : (error || '')}`),
|
|
23
|
-
stopLogging: () => {
|
|
24
|
-
logger.silent = true
|
|
25
|
-
}
|
|
26
|
-
}
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
module.exports = loggerWrapper
|
|
1
|
+
const path = require('path')
|
|
2
|
+
const pkg = require('./package')
|
|
3
|
+
const winston = require('winston')
|
|
4
|
+
const logger = winston.createLogger({
|
|
5
|
+
format: winston.format.combine(
|
|
6
|
+
winston.format.splat(),
|
|
7
|
+
winston.format.simple(),
|
|
8
|
+
winston.format.timestamp(),
|
|
9
|
+
winston.format.colorize(),
|
|
10
|
+
winston.format.printf(info => `${pkg.name}: ${info.timestamp} ${info.level}: ${info.message}`)
|
|
11
|
+
),
|
|
12
|
+
transports: [new winston.transports.Console()]
|
|
13
|
+
})
|
|
14
|
+
|
|
15
|
+
const loggerWrapper = (absoluteFilePath) => {
|
|
16
|
+
const file = path.relative(__dirname, absoluteFilePath)
|
|
17
|
+
// Because this file is in the source code root folder, the above will make all paths relative to it: just the info needed for the log.
|
|
18
|
+
|
|
19
|
+
return {
|
|
20
|
+
info: (message) => logger.info(`[${file}] ${message}`),
|
|
21
|
+
warn: (message) => logger.warn(`[${file}] ${message}`),
|
|
22
|
+
error: (message, error) => logger.error(`[${file}] ${message}${error && error.stack ? error.stack : (error || '')}`),
|
|
23
|
+
stopLogging: () => {
|
|
24
|
+
logger.silent = true
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
module.exports = loggerWrapper
|
package/src/login.js
CHANGED
|
@@ -1,68 +1,68 @@
|
|
|
1
|
-
const openPage = require('./openPage')
|
|
2
|
-
const logger = require('./logger')(__filename)
|
|
3
|
-
const pkg = require('./package')
|
|
4
|
-
|
|
5
|
-
module.exports = async (browser, email, password) => {
|
|
6
|
-
const url = 'https://www.linkedin.com/login'
|
|
7
|
-
const page = await openPage({ browser, url })
|
|
8
|
-
logger.info(`logging at: ${url}`)
|
|
9
|
-
|
|
10
|
-
await page.waitForSelector('#username')
|
|
11
|
-
|
|
12
|
-
await page.$('#username')
|
|
13
|
-
.then((emailElement) => emailElement.type(email))
|
|
14
|
-
await page.$('#password')
|
|
15
|
-
.then((passwordElement) => passwordElement.type(password))
|
|
16
|
-
|
|
17
|
-
await page.$x("//button[contains(text(), 'Sign in')]")
|
|
18
|
-
.then((button) => button[0].click())
|
|
19
|
-
|
|
20
|
-
return page.waitForSelector('input[role=combobox]', {
|
|
21
|
-
timeout: 15000
|
|
22
|
-
})
|
|
23
|
-
.then(async () => {
|
|
24
|
-
logger.info('logged feed page selector found')
|
|
25
|
-
await page.close()
|
|
26
|
-
})
|
|
27
|
-
.catch(async () => {
|
|
28
|
-
logger.warn('successful login element was not found')
|
|
29
|
-
const emailError = await page.evaluate(() => {
|
|
30
|
-
const e = document.querySelector('div[error-for=username]')
|
|
31
|
-
if (!e) { return false }
|
|
32
|
-
const style = window.getComputedStyle(e)
|
|
33
|
-
return style && style.display !== 'none' && style.visibility !== 'hidden' && style.opacity !== '0'
|
|
34
|
-
})
|
|
35
|
-
|
|
36
|
-
const passwordError = await page.evaluate(() => {
|
|
37
|
-
const e = document.querySelector('div[error-for=password]')
|
|
38
|
-
if (!e) { return false }
|
|
39
|
-
const style = window.getComputedStyle(e)
|
|
40
|
-
return style && style.display !== 'none' && style.visibility !== 'hidden' && style.opacity !== '0'
|
|
41
|
-
})
|
|
42
|
-
|
|
43
|
-
const manualChallengeRequested = await page.evaluate(() => {
|
|
44
|
-
const e = document.querySelector('.flow-challenge-content')
|
|
45
|
-
if (!e) { return false }
|
|
46
|
-
const style = window.getComputedStyle(e)
|
|
47
|
-
return style && style.display !== 'none' && style.visibility !== 'hidden' && style.opacity !== '0'
|
|
48
|
-
})
|
|
49
|
-
|
|
50
|
-
if (emailError) {
|
|
51
|
-
logger.info('wrong username element found')
|
|
52
|
-
return Promise.reject(new Error(`linkedin: invalid username: ${email}`))
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
if (passwordError) {
|
|
56
|
-
logger.info('wrong password element found')
|
|
57
|
-
return Promise.reject(new Error('linkedin: invalid password'))
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
if (page.$(manualChallengeRequested)) {
|
|
61
|
-
logger.warn('manual check was required')
|
|
62
|
-
return Promise.reject(new Error(`linkedin: manual check was required, verify if your login is properly working manually or report this issue: ${pkg.name} ${pkg.version} ${pkg.bugs.url}`))
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
logger.error('could not find any element to retrieve a proper error')
|
|
66
|
-
return Promise.reject(new Error(`${pkg.name} ${pkg.version} login is not working, please report: ${pkg.bugs.url}`))
|
|
67
|
-
})
|
|
68
|
-
}
|
|
1
|
+
const openPage = require('./openPage')
|
|
2
|
+
const logger = require('./logger')(__filename)
|
|
3
|
+
const pkg = require('./package')
|
|
4
|
+
|
|
5
|
+
module.exports = async (browser, email, password) => {
|
|
6
|
+
const url = 'https://www.linkedin.com/login'
|
|
7
|
+
const page = await openPage({ browser, url })
|
|
8
|
+
logger.info(`logging at: ${url}`)
|
|
9
|
+
|
|
10
|
+
await page.waitForSelector('#username')
|
|
11
|
+
|
|
12
|
+
await page.$('#username')
|
|
13
|
+
.then((emailElement) => emailElement.type(email))
|
|
14
|
+
await page.$('#password')
|
|
15
|
+
.then((passwordElement) => passwordElement.type(password))
|
|
16
|
+
|
|
17
|
+
await page.$x("//button[contains(text(), 'Sign in')]")
|
|
18
|
+
.then((button) => button[0].click())
|
|
19
|
+
|
|
20
|
+
return page.waitForSelector('input[role=combobox]', {
|
|
21
|
+
timeout: 15000
|
|
22
|
+
})
|
|
23
|
+
.then(async () => {
|
|
24
|
+
logger.info('logged feed page selector found')
|
|
25
|
+
await page.close()
|
|
26
|
+
})
|
|
27
|
+
.catch(async () => {
|
|
28
|
+
logger.warn('successful login element was not found')
|
|
29
|
+
const emailError = await page.evaluate(() => {
|
|
30
|
+
const e = document.querySelector('div[error-for=username]')
|
|
31
|
+
if (!e) { return false }
|
|
32
|
+
const style = window.getComputedStyle(e)
|
|
33
|
+
return style && style.display !== 'none' && style.visibility !== 'hidden' && style.opacity !== '0'
|
|
34
|
+
})
|
|
35
|
+
|
|
36
|
+
const passwordError = await page.evaluate(() => {
|
|
37
|
+
const e = document.querySelector('div[error-for=password]')
|
|
38
|
+
if (!e) { return false }
|
|
39
|
+
const style = window.getComputedStyle(e)
|
|
40
|
+
return style && style.display !== 'none' && style.visibility !== 'hidden' && style.opacity !== '0'
|
|
41
|
+
})
|
|
42
|
+
|
|
43
|
+
const manualChallengeRequested = await page.evaluate(() => {
|
|
44
|
+
const e = document.querySelector('.flow-challenge-content')
|
|
45
|
+
if (!e) { return false }
|
|
46
|
+
const style = window.getComputedStyle(e)
|
|
47
|
+
return style && style.display !== 'none' && style.visibility !== 'hidden' && style.opacity !== '0'
|
|
48
|
+
})
|
|
49
|
+
|
|
50
|
+
if (emailError) {
|
|
51
|
+
logger.info('wrong username element found')
|
|
52
|
+
return Promise.reject(new Error(`linkedin: invalid username: ${email}`))
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
if (passwordError) {
|
|
56
|
+
logger.info('wrong password element found')
|
|
57
|
+
return Promise.reject(new Error('linkedin: invalid password'))
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
if (page.$(manualChallengeRequested)) {
|
|
61
|
+
logger.warn('manual check was required')
|
|
62
|
+
return Promise.reject(new Error(`linkedin: manual check was required, verify if your login is properly working manually or report this issue: ${pkg.name} ${pkg.version} ${pkg.bugs.url}`))
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
logger.error('could not find any element to retrieve a proper error')
|
|
66
|
+
return Promise.reject(new Error(`${pkg.name} ${pkg.version} login is not working, please report: ${pkg.bugs.url}`))
|
|
67
|
+
})
|
|
68
|
+
}
|
package/src/openPage.js
CHANGED
|
@@ -1,33 +1,33 @@
|
|
|
1
|
-
const agents = [
|
|
2
|
-
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36'
|
|
3
|
-
// "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36",
|
|
4
|
-
// "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36",
|
|
5
|
-
// "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:34.0) Gecko/20100101 Firefox/34.0",
|
|
6
|
-
// "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36",
|
|
7
|
-
// "Mozilla/5.0 (Windows NT 6.3; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0",
|
|
8
|
-
// "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36",
|
|
9
|
-
// "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0",
|
|
10
|
-
// "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36"
|
|
11
|
-
]
|
|
12
|
-
|
|
13
|
-
module.exports = async ({ browser, cookies, url, puppeteerAuthenticate }) => {
|
|
14
|
-
const page = await browser.newPage()
|
|
15
|
-
await page.setDefaultNavigationTimeout(0)
|
|
16
|
-
|
|
17
|
-
if (cookies) {
|
|
18
|
-
await page.setCookie(...cookies)
|
|
19
|
-
}
|
|
20
|
-
await page.setUserAgent(agents[Math.floor(Math.random() * agents.length)])
|
|
21
|
-
await page.setExtraHTTPHeaders({ 'Accept-Language': 'en-GB,en-US;q=0.9,en;q=0.8' })
|
|
22
|
-
await page.setViewport({
|
|
23
|
-
width: 1920,
|
|
24
|
-
height: 1080
|
|
25
|
-
})
|
|
26
|
-
|
|
27
|
-
if (puppeteerAuthenticate) {
|
|
28
|
-
await page.authenticate(puppeteerAuthenticate)
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
await page.goto(url, { waitUntil: 'load' })
|
|
32
|
-
return page
|
|
33
|
-
}
|
|
1
|
+
const agents = [
|
|
2
|
+
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36'
|
|
3
|
+
// "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36",
|
|
4
|
+
// "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36",
|
|
5
|
+
// "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:34.0) Gecko/20100101 Firefox/34.0",
|
|
6
|
+
// "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36",
|
|
7
|
+
// "Mozilla/5.0 (Windows NT 6.3; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0",
|
|
8
|
+
// "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36",
|
|
9
|
+
// "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0",
|
|
10
|
+
// "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36"
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
module.exports = async ({ browser, cookies, url, puppeteerAuthenticate }) => {
|
|
14
|
+
const page = await browser.newPage()
|
|
15
|
+
await page.setDefaultNavigationTimeout(0)
|
|
16
|
+
|
|
17
|
+
if (cookies) {
|
|
18
|
+
await page.setCookie(...cookies)
|
|
19
|
+
}
|
|
20
|
+
await page.setUserAgent(agents[Math.floor(Math.random() * agents.length)])
|
|
21
|
+
await page.setExtraHTTPHeaders({ 'Accept-Language': 'en-GB,en-US;q=0.9,en;q=0.8' })
|
|
22
|
+
await page.setViewport({
|
|
23
|
+
width: 1920,
|
|
24
|
+
height: 1080
|
|
25
|
+
})
|
|
26
|
+
|
|
27
|
+
if (puppeteerAuthenticate) {
|
|
28
|
+
await page.authenticate(puppeteerAuthenticate)
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
await page.goto(url, { waitUntil: 'load' })
|
|
32
|
+
return page
|
|
33
|
+
}
|
package/src/package.js
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
const pkg = require('../package.json')
|
|
2
|
-
|
|
3
|
-
// Only specific keys are needed, not the whole file.
|
|
4
|
-
|
|
5
|
-
module.exports = {
|
|
6
|
-
bugs: {
|
|
7
|
-
url: pkg.bugs.url
|
|
8
|
-
},
|
|
9
|
-
name: pkg.name,
|
|
10
|
-
version: pkg.version
|
|
11
|
-
}
|
|
1
|
+
const pkg = require('../package.json')
|
|
2
|
+
|
|
3
|
+
// Only specific keys are needed, not the whole file.
|
|
4
|
+
|
|
5
|
+
module.exports = {
|
|
6
|
+
bugs: {
|
|
7
|
+
url: pkg.bugs.url
|
|
8
|
+
},
|
|
9
|
+
name: pkg.name,
|
|
10
|
+
version: pkg.version
|
|
11
|
+
}
|
|
@@ -1,94 +1,94 @@
|
|
|
1
|
-
const logger = require('../logger')(__filename)
|
|
2
|
-
const pkg = require('../package')
|
|
3
|
-
|
|
4
|
-
module.exports = (profile) => {
|
|
5
|
-
if(!profile.profile.name) {
|
|
6
|
-
const messageError = `LinkedIn website changed and ${pkg.name} ${pkg.version} can't read basic data. Please report this issue at ${pkg.bugs.url}`
|
|
7
|
-
logger.error(messageError, '')
|
|
8
|
-
throw new Error(messageError)
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
profile.profile.summary = profile.about.text
|
|
12
|
-
|
|
13
|
-
profile.positions.forEach((position) => {
|
|
14
|
-
if(position.title){
|
|
15
|
-
position.title = position.title.replace('Company Name\n', '')
|
|
16
|
-
}
|
|
17
|
-
if(position.description) {
|
|
18
|
-
position.description = position.description.replace('See more', '');
|
|
19
|
-
position.description = position.description.replace('see more', '');
|
|
20
|
-
position.description = position.description.replace('See less', '');
|
|
21
|
-
}
|
|
22
|
-
if(position.roles) {
|
|
23
|
-
position.roles.forEach((role) => {
|
|
24
|
-
if(role.title) {
|
|
25
|
-
role.title = role.title.replace('Title\n', '')
|
|
26
|
-
}
|
|
27
|
-
if(role.description) {
|
|
28
|
-
role.description = role.description.replace('See more', '')
|
|
29
|
-
role.description = role.description.replace('see more', '')
|
|
30
|
-
}
|
|
31
|
-
})
|
|
32
|
-
}
|
|
33
|
-
})
|
|
34
|
-
|
|
35
|
-
if(profile.recommendations.receivedCount) {
|
|
36
|
-
profile.recommendations.receivedCount = profile.recommendations.receivedCount.replace(/[^\d]/g, '')
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
if(profile.recommendations.givenCount) {
|
|
40
|
-
profile.recommendations.givenCount = profile.recommendations.givenCount.replace(/[^\d]/g, '')
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
if(profile.recommendations.received) {
|
|
44
|
-
profile.recommendations.received.forEach((recommendation) => {
|
|
45
|
-
if(recommendation.summary){
|
|
46
|
-
recommendation.summary = recommendation.summary.replace('See more', '')
|
|
47
|
-
recommendation.summary = recommendation.summary.replace('See less', '')
|
|
48
|
-
}
|
|
49
|
-
})
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
if(profile.recommendations.given) {
|
|
53
|
-
profile.recommendations.given.forEach((recommendation) => {
|
|
54
|
-
if(recommendation.summary){
|
|
55
|
-
recommendation.summary = recommendation.summary.replace('See more', '')
|
|
56
|
-
recommendation.summary = recommendation.summary.replace('See less', '')
|
|
57
|
-
}
|
|
58
|
-
})
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
if(profile.courses){
|
|
62
|
-
profile.courses = profile.courses.map(({ name, year }) => {
|
|
63
|
-
const coursesObj = {}
|
|
64
|
-
if(name) {
|
|
65
|
-
coursesObj.name = name.replace('Course name\n', '')
|
|
66
|
-
}
|
|
67
|
-
if(year) {
|
|
68
|
-
coursesObj.year = year.replace('Course number\n', '')
|
|
69
|
-
}
|
|
70
|
-
return coursesObj
|
|
71
|
-
}
|
|
72
|
-
);
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
if(profile.languages){
|
|
76
|
-
profile.languages = profile.languages.map(({ name, proficiency }) => ({
|
|
77
|
-
name: name ? name.replace('Language name\n', '') : undefined,
|
|
78
|
-
proficiency,
|
|
79
|
-
}));
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
if(profile.projects){
|
|
83
|
-
profile.projects = profile.projects.map(
|
|
84
|
-
({ name, date, description, link }) => ({
|
|
85
|
-
name: name ? name.replace('Project name\n', '') : undefined,
|
|
86
|
-
date,
|
|
87
|
-
description: description ? description.replace('Project description\n', '') : undefined,
|
|
88
|
-
link,
|
|
89
|
-
}),
|
|
90
|
-
);
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
return profile
|
|
94
|
-
}
|
|
1
|
+
const logger = require('../logger')(__filename)
|
|
2
|
+
const pkg = require('../package')
|
|
3
|
+
|
|
4
|
+
module.exports = (profile) => {
|
|
5
|
+
if(!profile.profile.name) {
|
|
6
|
+
const messageError = `LinkedIn website changed and ${pkg.name} ${pkg.version} can't read basic data. Please report this issue at ${pkg.bugs.url}`
|
|
7
|
+
logger.error(messageError, '')
|
|
8
|
+
throw new Error(messageError)
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
profile.profile.summary = profile.about.text
|
|
12
|
+
|
|
13
|
+
profile.positions.forEach((position) => {
|
|
14
|
+
if(position.title){
|
|
15
|
+
position.title = position.title.replace('Company Name\n', '')
|
|
16
|
+
}
|
|
17
|
+
if(position.description) {
|
|
18
|
+
position.description = position.description.replace('See more', '');
|
|
19
|
+
position.description = position.description.replace('see more', '');
|
|
20
|
+
position.description = position.description.replace('See less', '');
|
|
21
|
+
}
|
|
22
|
+
if(position.roles) {
|
|
23
|
+
position.roles.forEach((role) => {
|
|
24
|
+
if(role.title) {
|
|
25
|
+
role.title = role.title.replace('Title\n', '')
|
|
26
|
+
}
|
|
27
|
+
if(role.description) {
|
|
28
|
+
role.description = role.description.replace('See more', '')
|
|
29
|
+
role.description = role.description.replace('see more', '')
|
|
30
|
+
}
|
|
31
|
+
})
|
|
32
|
+
}
|
|
33
|
+
})
|
|
34
|
+
|
|
35
|
+
if(profile.recommendations.receivedCount) {
|
|
36
|
+
profile.recommendations.receivedCount = profile.recommendations.receivedCount.replace(/[^\d]/g, '')
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
if(profile.recommendations.givenCount) {
|
|
40
|
+
profile.recommendations.givenCount = profile.recommendations.givenCount.replace(/[^\d]/g, '')
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
if(profile.recommendations.received) {
|
|
44
|
+
profile.recommendations.received.forEach((recommendation) => {
|
|
45
|
+
if(recommendation.summary){
|
|
46
|
+
recommendation.summary = recommendation.summary.replace('See more', '')
|
|
47
|
+
recommendation.summary = recommendation.summary.replace('See less', '')
|
|
48
|
+
}
|
|
49
|
+
})
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
if(profile.recommendations.given) {
|
|
53
|
+
profile.recommendations.given.forEach((recommendation) => {
|
|
54
|
+
if(recommendation.summary){
|
|
55
|
+
recommendation.summary = recommendation.summary.replace('See more', '')
|
|
56
|
+
recommendation.summary = recommendation.summary.replace('See less', '')
|
|
57
|
+
}
|
|
58
|
+
})
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
if(profile.courses){
|
|
62
|
+
profile.courses = profile.courses.map(({ name, year }) => {
|
|
63
|
+
const coursesObj = {}
|
|
64
|
+
if(name) {
|
|
65
|
+
coursesObj.name = name.replace('Course name\n', '')
|
|
66
|
+
}
|
|
67
|
+
if(year) {
|
|
68
|
+
coursesObj.year = year.replace('Course number\n', '')
|
|
69
|
+
}
|
|
70
|
+
return coursesObj
|
|
71
|
+
}
|
|
72
|
+
);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
if(profile.languages){
|
|
76
|
+
profile.languages = profile.languages.map(({ name, proficiency }) => ({
|
|
77
|
+
name: name ? name.replace('Language name\n', '') : undefined,
|
|
78
|
+
proficiency,
|
|
79
|
+
}));
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
if(profile.projects){
|
|
83
|
+
profile.projects = profile.projects.map(
|
|
84
|
+
({ name, date, description, link }) => ({
|
|
85
|
+
name: name ? name.replace('Project name\n', '') : undefined,
|
|
86
|
+
date,
|
|
87
|
+
description: description ? description.replace('Project description\n', '') : undefined,
|
|
88
|
+
link,
|
|
89
|
+
}),
|
|
90
|
+
);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
return profile
|
|
94
|
+
}
|
|
@@ -1,48 +1,48 @@
|
|
|
1
|
-
const logger = require('../logger')(__filename)
|
|
2
|
-
const scrapSection = require('../scrapSection')
|
|
3
|
-
|
|
4
|
-
const SEE_MORE_SELECTOR = 'a[data-control-name=contact_see_more]'
|
|
5
|
-
const CLOSE_MODAL_SELECTOR = '.artdeco-modal__dismiss';
|
|
6
|
-
|
|
7
|
-
const template = {
|
|
8
|
-
selector: '.pv-contact-info__contact-type',
|
|
9
|
-
fields: {
|
|
10
|
-
type: 'header',
|
|
11
|
-
values: {
|
|
12
|
-
selector: '.pv-contact-info__ci-container',
|
|
13
|
-
isMultipleFields: true
|
|
14
|
-
},
|
|
15
|
-
links: {
|
|
16
|
-
selector: 'a',
|
|
17
|
-
attribute: 'href',
|
|
18
|
-
isMultipleFields: true
|
|
19
|
-
}
|
|
20
|
-
}
|
|
21
|
-
}
|
|
22
|
-
const getContactInfo = async(page) => {
|
|
23
|
-
await page.waitFor(SEE_MORE_SELECTOR, { timeout: 2000 })
|
|
24
|
-
.catch(() => {
|
|
25
|
-
logger.warn('contact-info', 'selector not found')
|
|
26
|
-
return {}
|
|
27
|
-
})
|
|
28
|
-
|
|
29
|
-
const element = await page.$(SEE_MORE_SELECTOR)
|
|
30
|
-
if(element){
|
|
31
|
-
await element.click()
|
|
32
|
-
const contactInfoIndicatorSelector = '#pv-contact-info'
|
|
33
|
-
await page.waitFor(contactInfoIndicatorSelector, { timeout: 5000 })
|
|
34
|
-
.catch(() => {
|
|
35
|
-
logger.warn('contact info was not found')
|
|
36
|
-
})
|
|
37
|
-
|
|
38
|
-
const contactInfo = await scrapSection(page, template)
|
|
39
|
-
const closeButton = await page.$(CLOSE_MODAL_SELECTOR)
|
|
40
|
-
if(closeButton)
|
|
41
|
-
await closeButton.click()
|
|
42
|
-
|
|
43
|
-
return contactInfo
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
module.exports = getContactInfo
|
|
1
|
+
const logger = require('../logger')(__filename)
|
|
2
|
+
const scrapSection = require('../scrapSection')
|
|
3
|
+
|
|
4
|
+
const SEE_MORE_SELECTOR = 'a[data-control-name=contact_see_more]'
|
|
5
|
+
const CLOSE_MODAL_SELECTOR = '.artdeco-modal__dismiss';
|
|
6
|
+
|
|
7
|
+
const template = {
|
|
8
|
+
selector: '.pv-contact-info__contact-type',
|
|
9
|
+
fields: {
|
|
10
|
+
type: 'header',
|
|
11
|
+
values: {
|
|
12
|
+
selector: '.pv-contact-info__ci-container',
|
|
13
|
+
isMultipleFields: true
|
|
14
|
+
},
|
|
15
|
+
links: {
|
|
16
|
+
selector: 'a',
|
|
17
|
+
attribute: 'href',
|
|
18
|
+
isMultipleFields: true
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
const getContactInfo = async(page) => {
|
|
23
|
+
await page.waitFor(SEE_MORE_SELECTOR, { timeout: 2000 })
|
|
24
|
+
.catch(() => {
|
|
25
|
+
logger.warn('contact-info', 'selector not found')
|
|
26
|
+
return {}
|
|
27
|
+
})
|
|
28
|
+
|
|
29
|
+
const element = await page.$(SEE_MORE_SELECTOR)
|
|
30
|
+
if(element){
|
|
31
|
+
await element.click()
|
|
32
|
+
const contactInfoIndicatorSelector = '#pv-contact-info'
|
|
33
|
+
await page.waitFor(contactInfoIndicatorSelector, { timeout: 5000 })
|
|
34
|
+
.catch(() => {
|
|
35
|
+
logger.warn('contact info was not found')
|
|
36
|
+
})
|
|
37
|
+
|
|
38
|
+
const contactInfo = await scrapSection(page, template)
|
|
39
|
+
const closeButton = await page.$(CLOSE_MODAL_SELECTOR)
|
|
40
|
+
if(closeButton)
|
|
41
|
+
await closeButton.click()
|
|
42
|
+
|
|
43
|
+
return contactInfo
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
module.exports = getContactInfo
|