@mvegter/scrapedin 1.0.26 → 1.0.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/dependabot.yml +11 -0
- package/LICENSE +201 -201
- package/README.md +36 -36
- package/package.json +34 -43
- package/src/company/company.js +35 -35
- package/src/company/companyScraperTemplate.js +30 -30
- package/src/logger.js +29 -29
- package/src/login.js +68 -69
- package/src/openPage.js +33 -33
- package/src/package.js +11 -11
- package/src/profile/cleanProfileData.js +99 -94
- package/src/profile/contactInfo.js +48 -48
- package/src/profile/profile.js +81 -81
- package/src/profile/profileScraperTemplate.js +189 -190
- package/src/profile/scrapAccomplishmentPanel.js +17 -17
- package/src/profile/scrollToPageBottom.js +24 -24
- package/src/profile/seeMoreButtons.js +42 -42
- package/src/scrapSection.js +49 -49
- package/src/scrapedin.js +41 -41
- package/.travis.yml +0 -5
- package/src/scrapedin.test.js +0 -338
|
@@ -1,42 +1,42 @@
|
|
|
1
|
-
const logger = require('../logger')(__filename)
|
|
2
|
-
const seeMoreButtons = [
|
|
3
|
-
{
|
|
4
|
-
id: 'SHOW_MORE_ABOUT',
|
|
5
|
-
selector: '#line-clamp-show-more-button'
|
|
6
|
-
},{
|
|
7
|
-
id: 'SHOW_MORE_EXPERIENCES',
|
|
8
|
-
selector: '#experience-section .pv-profile-section__see-more-inline'
|
|
9
|
-
},{
|
|
10
|
-
id: 'SEE_MORE_EXPERIENCES',
|
|
11
|
-
selector: '#experience-section .inline-show-more-text__button'
|
|
12
|
-
},{
|
|
13
|
-
id: 'SHOW_MORE_CERTIFICATIONS',
|
|
14
|
-
selector: '#certifications-section .pv-profile-section__see-more-inline'
|
|
15
|
-
},{
|
|
16
|
-
id: 'SHOW_MORE_SKILLS',
|
|
17
|
-
selector: '.pv-skills-section__additional-skills'
|
|
18
|
-
},{
|
|
19
|
-
id: 'SEE_MORE_RECOMMENDATIONS',
|
|
20
|
-
selector: '.recommendations-inlining #line-clamp-show-more-button'
|
|
21
|
-
}
|
|
22
|
-
]
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
const clickAll = async(page) => {
|
|
26
|
-
for(let i = 0; i < seeMoreButtons.length; i++){
|
|
27
|
-
const button = seeMoreButtons[i]
|
|
28
|
-
const elems = await page.$$(button.selector)
|
|
29
|
-
|
|
30
|
-
for(let j = 0; j < elems.length; j++){
|
|
31
|
-
const elem = elems[j]
|
|
32
|
-
if (elem) {
|
|
33
|
-
await elem.click()
|
|
34
|
-
.catch((e) => logger.warn(`couldn't click on ${button.selector}, it's probably invisible`))
|
|
35
|
-
}
|
|
36
|
-
}
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
return
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
module.exports = { clickAll }
|
|
1
|
+
const logger = require('../logger')(__filename)
|
|
2
|
+
const seeMoreButtons = [
|
|
3
|
+
{
|
|
4
|
+
id: 'SHOW_MORE_ABOUT',
|
|
5
|
+
selector: '#line-clamp-show-more-button'
|
|
6
|
+
},{
|
|
7
|
+
id: 'SHOW_MORE_EXPERIENCES',
|
|
8
|
+
selector: '#experience-section .pv-profile-section__see-more-inline'
|
|
9
|
+
},{
|
|
10
|
+
id: 'SEE_MORE_EXPERIENCES',
|
|
11
|
+
selector: '#experience-section .inline-show-more-text__button'
|
|
12
|
+
},{
|
|
13
|
+
id: 'SHOW_MORE_CERTIFICATIONS',
|
|
14
|
+
selector: '#certifications-section .pv-profile-section__see-more-inline'
|
|
15
|
+
},{
|
|
16
|
+
id: 'SHOW_MORE_SKILLS',
|
|
17
|
+
selector: '.pv-skills-section__additional-skills'
|
|
18
|
+
},{
|
|
19
|
+
id: 'SEE_MORE_RECOMMENDATIONS',
|
|
20
|
+
selector: '.recommendations-inlining #line-clamp-show-more-button'
|
|
21
|
+
}
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
const clickAll = async(page) => {
|
|
26
|
+
for(let i = 0; i < seeMoreButtons.length; i++){
|
|
27
|
+
const button = seeMoreButtons[i]
|
|
28
|
+
const elems = await page.$$(button.selector)
|
|
29
|
+
|
|
30
|
+
for(let j = 0; j < elems.length; j++){
|
|
31
|
+
const elem = elems[j]
|
|
32
|
+
if (elem) {
|
|
33
|
+
await elem.click()
|
|
34
|
+
.catch((e) => logger.warn(`couldn't click on ${button.selector}, it's probably invisible`))
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
return
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
module.exports = { clickAll }
|
package/src/scrapSection.js
CHANGED
|
@@ -1,49 +1,49 @@
|
|
|
1
|
-
const scrapSelectorFields = (selector, section) => async (scrapedObjectPromise, fieldKey) => {
|
|
2
|
-
const scrapedObject = await scrapedObjectPromise
|
|
3
|
-
const field = section.fields[fieldKey]
|
|
4
|
-
|
|
5
|
-
// currently field can be a selector string, or an object containing a selector field
|
|
6
|
-
const fieldSelectorString = await field.selector
|
|
7
|
-
? field.selector
|
|
8
|
-
: field
|
|
9
|
-
|
|
10
|
-
const isFieldPresent = await selector.$(fieldSelectorString)
|
|
11
|
-
|
|
12
|
-
if (!isFieldPresent) { return scrapedObject }
|
|
13
|
-
|
|
14
|
-
if (field.isMultipleFields) {
|
|
15
|
-
if (field.attribute === 'href') {
|
|
16
|
-
scrapedObject[fieldKey] = await selector.$$eval(fieldSelectorString, (elems) => elems.map(elem => elem.href ? elem.href.trim() : elem.innerHTML.trim()))
|
|
17
|
-
} else if(field.attribute === 'src'){
|
|
18
|
-
scrapedObject[fieldKey] = await selector.$$eval(fieldSelectorString, (elems) => elems.map(elem => elem.src ? elem.src.trim() : elem.innerHTML.trim()))
|
|
19
|
-
}else{
|
|
20
|
-
scrapedObject[fieldKey] = await selector.$$eval(fieldSelectorString, (elems) => elems.map(elem => elem.innerText.trim()))
|
|
21
|
-
}
|
|
22
|
-
} else if (field.hasChildrenFields) {
|
|
23
|
-
const fieldChildrenSelectors = await selector.$$(field.selector)
|
|
24
|
-
|
|
25
|
-
scrapedObject[fieldKey] = await Promise.all(
|
|
26
|
-
fieldChildrenSelectors.map((s) => scrapSelector(s, field))
|
|
27
|
-
)
|
|
28
|
-
} else if (field.attribute && field.attribute === 'href') {
|
|
29
|
-
scrapedObject[fieldKey] = await selector.$eval(fieldSelectorString, (elem) => elem && elem.href ? elem.href.trim() : '')
|
|
30
|
-
} else if (field.attribute && field.attribute === 'src') {
|
|
31
|
-
scrapedObject[fieldKey] = await selector.$eval(fieldSelectorString, (elem) => elem && elem.src ? elem.src.trim() : '')
|
|
32
|
-
} else {
|
|
33
|
-
scrapedObject[fieldKey] = await selector.$eval(fieldSelectorString, (elem) => elem && elem.innerText ? elem.innerText.trim() : '')
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
return scrapedObject
|
|
37
|
-
}
|
|
38
|
-
const scrapSelector = (selector, section) =>
|
|
39
|
-
Object.keys(section.fields)
|
|
40
|
-
.reduce(scrapSelectorFields(selector, section), Promise.resolve({}))
|
|
41
|
-
|
|
42
|
-
module.exports = async (page, section) => {
|
|
43
|
-
const sectionSelectors = await page.$$(section.selector)
|
|
44
|
-
|
|
45
|
-
const scrapedPromises = sectionSelectors
|
|
46
|
-
.map((selector) => scrapSelector(selector, section))
|
|
47
|
-
|
|
48
|
-
return Promise.all(scrapedPromises)
|
|
49
|
-
}
|
|
1
|
+
const scrapSelectorFields = (selector, section) => async (scrapedObjectPromise, fieldKey) => {
|
|
2
|
+
const scrapedObject = await scrapedObjectPromise
|
|
3
|
+
const field = section.fields[fieldKey]
|
|
4
|
+
|
|
5
|
+
// currently field can be a selector string, or an object containing a selector field
|
|
6
|
+
const fieldSelectorString = await field.selector
|
|
7
|
+
? field.selector
|
|
8
|
+
: field
|
|
9
|
+
|
|
10
|
+
const isFieldPresent = await selector.$(fieldSelectorString)
|
|
11
|
+
|
|
12
|
+
if (!isFieldPresent) { return scrapedObject }
|
|
13
|
+
|
|
14
|
+
if (field.isMultipleFields) {
|
|
15
|
+
if (field.attribute === 'href') {
|
|
16
|
+
scrapedObject[fieldKey] = await selector.$$eval(fieldSelectorString, (elems) => elems.map(elem => elem.href ? elem.href.trim() : elem.innerHTML.trim()))
|
|
17
|
+
} else if (field.attribute === 'src') {
|
|
18
|
+
scrapedObject[fieldKey] = await selector.$$eval(fieldSelectorString, (elems) => elems.map(elem => elem.src ? elem.src.trim() : elem.innerHTML.trim()))
|
|
19
|
+
} else {
|
|
20
|
+
scrapedObject[fieldKey] = await selector.$$eval(fieldSelectorString, (elems) => elems.map(elem => elem.innerText.trim()))
|
|
21
|
+
}
|
|
22
|
+
} else if (field.hasChildrenFields) {
|
|
23
|
+
const fieldChildrenSelectors = await selector.$$(field.selector)
|
|
24
|
+
|
|
25
|
+
scrapedObject[fieldKey] = await Promise.all(
|
|
26
|
+
fieldChildrenSelectors.map((s) => scrapSelector(s, field))
|
|
27
|
+
)
|
|
28
|
+
} else if (field.attribute && field.attribute === 'href') {
|
|
29
|
+
scrapedObject[fieldKey] = await selector.$eval(fieldSelectorString, (elem) => elem && elem.href ? elem.href.trim() : '')
|
|
30
|
+
} else if (field.attribute && field.attribute === 'src') {
|
|
31
|
+
scrapedObject[fieldKey] = await selector.$eval(fieldSelectorString, (elem) => elem && elem.src ? elem.src.trim() : '')
|
|
32
|
+
} else {
|
|
33
|
+
scrapedObject[fieldKey] = await selector.$eval(fieldSelectorString, (elem) => elem && elem.innerText ? elem.innerText.trim() : '')
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
return scrapedObject
|
|
37
|
+
}
|
|
38
|
+
const scrapSelector = (selector, section) =>
|
|
39
|
+
Object.keys(section.fields)
|
|
40
|
+
.reduce(scrapSelectorFields(selector, section), Promise.resolve({}))
|
|
41
|
+
|
|
42
|
+
module.exports = async (page, section) => {
|
|
43
|
+
const sectionSelectors = await page.$$(section.selector)
|
|
44
|
+
|
|
45
|
+
const scrapedPromises = sectionSelectors
|
|
46
|
+
.map((selector) => scrapSelector(selector, section))
|
|
47
|
+
|
|
48
|
+
return Promise.all(scrapedPromises)
|
|
49
|
+
}
|
package/src/scrapedin.js
CHANGED
|
@@ -1,41 +1,41 @@
|
|
|
1
|
-
const puppeteer = require('puppeteer')
|
|
2
|
-
const login = require('./login')
|
|
3
|
-
const profile = require('./profile/profile')
|
|
4
|
-
const company = require('./company/company')
|
|
5
|
-
const logger = require('./logger')(__filename)
|
|
6
|
-
|
|
7
|
-
module.exports = async ({ cookies, email, password, isHeadless, hasToLog, hasToGetContactInfo, puppeteerArgs, puppeteerAuthenticate, endpoint } = { isHeadless: true, hasToLog: false }) => {
|
|
8
|
-
if (!hasToLog) {
|
|
9
|
-
logger.stopLogging()
|
|
10
|
-
}
|
|
11
|
-
logger.info('initializing')
|
|
12
|
-
|
|
13
|
-
let browser
|
|
14
|
-
if(endpoint){
|
|
15
|
-
browser = await puppeteer.connect({
|
|
16
|
-
browserWSEndpoint: endpoint
|
|
17
|
-
})
|
|
18
|
-
}else{
|
|
19
|
-
const args = Object.assign({ headless: isHeadless, args: ['--no-sandbox'] }, puppeteerArgs)
|
|
20
|
-
browser = await puppeteer.launch(args)
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
if (cookies) {
|
|
24
|
-
logger.info('using cookies, login will be bypassed')
|
|
25
|
-
} else if (email && password) {
|
|
26
|
-
logger.info('email and password was provided, we\'re going to login...')
|
|
27
|
-
|
|
28
|
-
try {
|
|
29
|
-
await login(browser, email, password, logger)
|
|
30
|
-
} catch (e) {
|
|
31
|
-
if(!endpoint){
|
|
32
|
-
await browser.close()
|
|
33
|
-
}
|
|
34
|
-
throw e
|
|
35
|
-
}
|
|
36
|
-
} else {
|
|
37
|
-
logger.warn('email/password and cookies wasn\'t provided, only public data will be collected')
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
return (url, waitMs) => url.includes('/school/') || url.includes('/company/') ? company(browser, cookies, url, waitMs, hasToGetContactInfo, puppeteerAuthenticate) :profile(browser, cookies, url, waitMs, hasToGetContactInfo, puppeteerAuthenticate)
|
|
41
|
-
}
|
|
1
|
+
const puppeteer = require('puppeteer')
|
|
2
|
+
const login = require('./login')
|
|
3
|
+
const profile = require('./profile/profile')
|
|
4
|
+
const company = require('./company/company')
|
|
5
|
+
const logger = require('./logger')(__filename)
|
|
6
|
+
|
|
7
|
+
module.exports = async ({ cookies, email, password, isHeadless, hasToLog, hasToGetContactInfo, puppeteerArgs, puppeteerAuthenticate, endpoint } = { isHeadless: true, hasToLog: false }) => {
|
|
8
|
+
if (!hasToLog) {
|
|
9
|
+
logger.stopLogging()
|
|
10
|
+
}
|
|
11
|
+
logger.info('initializing')
|
|
12
|
+
|
|
13
|
+
let browser
|
|
14
|
+
if (endpoint) {
|
|
15
|
+
browser = await puppeteer.connect({
|
|
16
|
+
browserWSEndpoint: endpoint
|
|
17
|
+
})
|
|
18
|
+
} else {
|
|
19
|
+
const args = Object.assign({ headless: isHeadless, args: ['--no-sandbox'] }, puppeteerArgs)
|
|
20
|
+
browser = await puppeteer.launch(args)
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
if (cookies) {
|
|
24
|
+
logger.info('using cookies, login will be bypassed')
|
|
25
|
+
} else if (email && password) {
|
|
26
|
+
logger.info('email and password was provided, we\'re going to login...')
|
|
27
|
+
|
|
28
|
+
try {
|
|
29
|
+
await login(browser, email, password, logger)
|
|
30
|
+
} catch (e) {
|
|
31
|
+
if (!endpoint) {
|
|
32
|
+
await browser.close()
|
|
33
|
+
}
|
|
34
|
+
throw e
|
|
35
|
+
}
|
|
36
|
+
} else {
|
|
37
|
+
logger.warn('email/password and cookies wasn\'t provided, only public data will be collected')
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
return (url, waitMs) => url.includes('/school/') || url.includes('/company/') ? company(browser, cookies, url, waitMs, hasToGetContactInfo, puppeteerAuthenticate) : profile(browser, cookies, url, waitMs, hasToGetContactInfo, puppeteerAuthenticate)
|
|
41
|
+
}
|
package/.travis.yml
DELETED
package/src/scrapedin.test.js
DELETED
|
@@ -1,338 +0,0 @@
|
|
|
1
|
-
const faker = require('faker')
|
|
2
|
-
const { expect } = require('chai')
|
|
3
|
-
const profile = require('./profile/profile')
|
|
4
|
-
const logger = require('./logger')(__filename)
|
|
5
|
-
const { mock, match } = require('sinon')
|
|
6
|
-
const profileScraperTemplate = require('./profile/profileScraperTemplate')
|
|
7
|
-
const url = faker.internet.url()
|
|
8
|
-
const fakeEvalResult = faker.lorem.words(1)
|
|
9
|
-
|
|
10
|
-
// Make the linter happy.
|
|
11
|
-
var mocha = require('mocha')
|
|
12
|
-
var it = mocha.it
|
|
13
|
-
|
|
14
|
-
logger.stopLogging()
|
|
15
|
-
|
|
16
|
-
it('should get complete profile', async () => {
|
|
17
|
-
const browserMock = prepareBrowserMock()
|
|
18
|
-
const result = await profile(browserMock, [], url, 0)
|
|
19
|
-
const expectedResult = {
|
|
20
|
-
aboutAlternative: {
|
|
21
|
-
text: fakeEvalResult
|
|
22
|
-
},
|
|
23
|
-
aboutLegacy: {
|
|
24
|
-
text: fakeEvalResult
|
|
25
|
-
},
|
|
26
|
-
accomplishments: [
|
|
27
|
-
{
|
|
28
|
-
count: fakeEvalResult,
|
|
29
|
-
items: [fakeEvalResult],
|
|
30
|
-
title: fakeEvalResult
|
|
31
|
-
}
|
|
32
|
-
],
|
|
33
|
-
contact: {},
|
|
34
|
-
courses: [
|
|
35
|
-
{
|
|
36
|
-
name: fakeEvalResult,
|
|
37
|
-
year: fakeEvalResult
|
|
38
|
-
}
|
|
39
|
-
],
|
|
40
|
-
educations: [
|
|
41
|
-
{
|
|
42
|
-
date1: fakeEvalResult,
|
|
43
|
-
date2: fakeEvalResult,
|
|
44
|
-
degree: fakeEvalResult,
|
|
45
|
-
fieldOfStudy: fakeEvalResult,
|
|
46
|
-
url: fakeEvalResult,
|
|
47
|
-
title: fakeEvalResult
|
|
48
|
-
}
|
|
49
|
-
],
|
|
50
|
-
languages: [
|
|
51
|
-
{
|
|
52
|
-
name: fakeEvalResult,
|
|
53
|
-
proficiency: fakeEvalResult
|
|
54
|
-
}
|
|
55
|
-
],
|
|
56
|
-
peopleAlsoViewed: [
|
|
57
|
-
{
|
|
58
|
-
text: fakeEvalResult,
|
|
59
|
-
user: fakeEvalResult
|
|
60
|
-
}
|
|
61
|
-
],
|
|
62
|
-
positions: [
|
|
63
|
-
{
|
|
64
|
-
companyName: fakeEvalResult,
|
|
65
|
-
date1: fakeEvalResult,
|
|
66
|
-
date2: fakeEvalResult,
|
|
67
|
-
description: fakeEvalResult,
|
|
68
|
-
link: fakeEvalResult,
|
|
69
|
-
location: fakeEvalResult,
|
|
70
|
-
roles: [
|
|
71
|
-
{
|
|
72
|
-
date1: fakeEvalResult,
|
|
73
|
-
date2: fakeEvalResult,
|
|
74
|
-
description: fakeEvalResult,
|
|
75
|
-
location: fakeEvalResult,
|
|
76
|
-
title: fakeEvalResult
|
|
77
|
-
}
|
|
78
|
-
],
|
|
79
|
-
title: fakeEvalResult,
|
|
80
|
-
url: fakeEvalResult
|
|
81
|
-
}
|
|
82
|
-
],
|
|
83
|
-
profile: {
|
|
84
|
-
connections: fakeEvalResult,
|
|
85
|
-
headline: fakeEvalResult,
|
|
86
|
-
location: fakeEvalResult,
|
|
87
|
-
name: fakeEvalResult,
|
|
88
|
-
summary: fakeEvalResult
|
|
89
|
-
},
|
|
90
|
-
profileAlternative: {
|
|
91
|
-
connections: fakeEvalResult,
|
|
92
|
-
headline: fakeEvalResult,
|
|
93
|
-
imageurl: fakeEvalResult,
|
|
94
|
-
location: fakeEvalResult,
|
|
95
|
-
name: fakeEvalResult
|
|
96
|
-
},
|
|
97
|
-
profileLegacy: {
|
|
98
|
-
connections: fakeEvalResult,
|
|
99
|
-
headline: fakeEvalResult,
|
|
100
|
-
location: fakeEvalResult,
|
|
101
|
-
name: fakeEvalResult,
|
|
102
|
-
summary: fakeEvalResult
|
|
103
|
-
},
|
|
104
|
-
projects: [
|
|
105
|
-
{
|
|
106
|
-
date: fakeEvalResult,
|
|
107
|
-
description: fakeEvalResult,
|
|
108
|
-
link: fakeEvalResult,
|
|
109
|
-
name: fakeEvalResult
|
|
110
|
-
}
|
|
111
|
-
],
|
|
112
|
-
recommendations: {
|
|
113
|
-
given: [
|
|
114
|
-
{
|
|
115
|
-
text: fakeEvalResult,
|
|
116
|
-
user: fakeEvalResult
|
|
117
|
-
}
|
|
118
|
-
],
|
|
119
|
-
givenCount: '',
|
|
120
|
-
received: [
|
|
121
|
-
{
|
|
122
|
-
text: fakeEvalResult,
|
|
123
|
-
user: fakeEvalResult
|
|
124
|
-
}
|
|
125
|
-
],
|
|
126
|
-
receivedCount: ''
|
|
127
|
-
},
|
|
128
|
-
skills: [
|
|
129
|
-
{
|
|
130
|
-
count: fakeEvalResult,
|
|
131
|
-
title: fakeEvalResult
|
|
132
|
-
}
|
|
133
|
-
],
|
|
134
|
-
volunteerExperience: [
|
|
135
|
-
{
|
|
136
|
-
date1: fakeEvalResult,
|
|
137
|
-
date2: fakeEvalResult,
|
|
138
|
-
description: fakeEvalResult,
|
|
139
|
-
experience: fakeEvalResult,
|
|
140
|
-
location: fakeEvalResult,
|
|
141
|
-
title: fakeEvalResult
|
|
142
|
-
}
|
|
143
|
-
]
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
expect(result).to.deep.equals(expectedResult)
|
|
147
|
-
})
|
|
148
|
-
|
|
149
|
-
it('should get an incomplete profile', async () => {
|
|
150
|
-
const browser = prepareBrowserMock(true)
|
|
151
|
-
|
|
152
|
-
const result = await profile(browser, [], url, 0)
|
|
153
|
-
const expectedResult = {
|
|
154
|
-
aboutAlternative: {
|
|
155
|
-
text: ''
|
|
156
|
-
},
|
|
157
|
-
aboutLegacy: {
|
|
158
|
-
text: ''
|
|
159
|
-
},
|
|
160
|
-
accomplishments: [
|
|
161
|
-
{
|
|
162
|
-
count: '',
|
|
163
|
-
items: [fakeEvalResult],
|
|
164
|
-
title: ''
|
|
165
|
-
}
|
|
166
|
-
],
|
|
167
|
-
contact: {},
|
|
168
|
-
courses: [{}],
|
|
169
|
-
educations: [
|
|
170
|
-
{
|
|
171
|
-
date1: '',
|
|
172
|
-
date2: '',
|
|
173
|
-
degree: '',
|
|
174
|
-
fieldOfStudy: '',
|
|
175
|
-
url: ''
|
|
176
|
-
}
|
|
177
|
-
],
|
|
178
|
-
languages: [
|
|
179
|
-
{
|
|
180
|
-
name: undefined,
|
|
181
|
-
proficiency: ''
|
|
182
|
-
}
|
|
183
|
-
],
|
|
184
|
-
peopleAlsoViewed: [
|
|
185
|
-
{
|
|
186
|
-
text: '',
|
|
187
|
-
user: ''
|
|
188
|
-
}
|
|
189
|
-
],
|
|
190
|
-
positions: [
|
|
191
|
-
{
|
|
192
|
-
companyName: '',
|
|
193
|
-
date1: '',
|
|
194
|
-
date2: '',
|
|
195
|
-
description: '',
|
|
196
|
-
link: '',
|
|
197
|
-
location: '',
|
|
198
|
-
roles: [
|
|
199
|
-
{
|
|
200
|
-
date1: '',
|
|
201
|
-
date2: '',
|
|
202
|
-
description: '',
|
|
203
|
-
location: '',
|
|
204
|
-
title: ''
|
|
205
|
-
}
|
|
206
|
-
],
|
|
207
|
-
url: ''
|
|
208
|
-
}
|
|
209
|
-
],
|
|
210
|
-
profile: {
|
|
211
|
-
connections: '',
|
|
212
|
-
headline: '',
|
|
213
|
-
location: '',
|
|
214
|
-
name: ''
|
|
215
|
-
},
|
|
216
|
-
profileAlternative: {
|
|
217
|
-
connections: '',
|
|
218
|
-
headline: '',
|
|
219
|
-
imageurl: '',
|
|
220
|
-
location: '',
|
|
221
|
-
name: ''
|
|
222
|
-
},
|
|
223
|
-
profileLegacy: {
|
|
224
|
-
connections: '',
|
|
225
|
-
headline: '',
|
|
226
|
-
location: '',
|
|
227
|
-
name: ''
|
|
228
|
-
},
|
|
229
|
-
projects: [
|
|
230
|
-
{
|
|
231
|
-
date: '',
|
|
232
|
-
description: undefined,
|
|
233
|
-
link: '',
|
|
234
|
-
name: undefined
|
|
235
|
-
}
|
|
236
|
-
],
|
|
237
|
-
recommendations: {
|
|
238
|
-
given: [
|
|
239
|
-
{
|
|
240
|
-
text: '',
|
|
241
|
-
user: ''
|
|
242
|
-
}
|
|
243
|
-
],
|
|
244
|
-
givenCount: '',
|
|
245
|
-
received: [
|
|
246
|
-
{
|
|
247
|
-
text: '',
|
|
248
|
-
user: ''
|
|
249
|
-
}
|
|
250
|
-
],
|
|
251
|
-
receivedCount: ''
|
|
252
|
-
},
|
|
253
|
-
skills: [
|
|
254
|
-
{
|
|
255
|
-
count: '',
|
|
256
|
-
title: ''
|
|
257
|
-
}
|
|
258
|
-
],
|
|
259
|
-
volunteerExperience: [
|
|
260
|
-
{
|
|
261
|
-
date1: '',
|
|
262
|
-
date2: '',
|
|
263
|
-
description: '',
|
|
264
|
-
experience: '',
|
|
265
|
-
location: ''
|
|
266
|
-
}
|
|
267
|
-
]
|
|
268
|
-
}
|
|
269
|
-
|
|
270
|
-
expect(result).to.deep.equals(expectedResult)
|
|
271
|
-
})
|
|
272
|
-
|
|
273
|
-
const prepareBrowserMock = (isIncompleteProfile) => {
|
|
274
|
-
const Page = function () {
|
|
275
|
-
this.goto = mock().once().withExactArgs(url).resolves()
|
|
276
|
-
this.setUserAgent = mock().once().resolves()
|
|
277
|
-
this.setExtraHTTPHeaders = mock().once().resolves()
|
|
278
|
-
this.setViewport = mock().once().resolves()
|
|
279
|
-
this.waitFor = mock().once().resolves()
|
|
280
|
-
|
|
281
|
-
this.evaluate = mock()
|
|
282
|
-
.twice()
|
|
283
|
-
.withExactArgs(match.func)
|
|
284
|
-
.atLeast(1)
|
|
285
|
-
.resolves()
|
|
286
|
-
this.waitForSelector = mock()
|
|
287
|
-
.withExactArgs(match.string, match.object)
|
|
288
|
-
.twice()
|
|
289
|
-
.onCall(0)
|
|
290
|
-
.rejects()
|
|
291
|
-
.onCall(1)
|
|
292
|
-
.resolves(true)
|
|
293
|
-
|
|
294
|
-
this.setCookie = mock().once().withExactArgs().resolves()
|
|
295
|
-
|
|
296
|
-
this.click = mock().atLeast(1).withExactArgs().resolves()
|
|
297
|
-
this.$$eval = mock()
|
|
298
|
-
.withExactArgs(match.string, match.func)
|
|
299
|
-
.atLeast(1)
|
|
300
|
-
.callsArgWith(1, [{ innerText: fakeEvalResult }])
|
|
301
|
-
.resolves([fakeEvalResult])
|
|
302
|
-
|
|
303
|
-
this.$eval = mock()
|
|
304
|
-
.withExactArgs(match.string, match.func)
|
|
305
|
-
.atLeast(1)
|
|
306
|
-
.callsArgWith(
|
|
307
|
-
1,
|
|
308
|
-
isIncompleteProfile
|
|
309
|
-
? undefined
|
|
310
|
-
: {
|
|
311
|
-
innerText: fakeEvalResult,
|
|
312
|
-
src: fakeEvalResult,
|
|
313
|
-
href: fakeEvalResult
|
|
314
|
-
}
|
|
315
|
-
)
|
|
316
|
-
.resolves(isIncompleteProfile ? '' : fakeEvalResult)
|
|
317
|
-
|
|
318
|
-
this.close = mock().once().resolves()
|
|
319
|
-
}
|
|
320
|
-
|
|
321
|
-
Page.prototype.$ = () => new Page()
|
|
322
|
-
|
|
323
|
-
if (isIncompleteProfile) {
|
|
324
|
-
// I couldn't do that with sinon :(
|
|
325
|
-
Page.prototype.$ = (arg) =>
|
|
326
|
-
arg === profileScraperTemplate.positions.fields.title
|
|
327
|
-
? undefined
|
|
328
|
-
: Promise.resolve(new Page())
|
|
329
|
-
}
|
|
330
|
-
|
|
331
|
-
Page.prototype.$$ = () => [new Page()]
|
|
332
|
-
|
|
333
|
-
const browser = {
|
|
334
|
-
newPage: mock().once().withExactArgs().resolves(new Page())
|
|
335
|
-
}
|
|
336
|
-
|
|
337
|
-
return browser
|
|
338
|
-
}
|