@mvegter/scrapedin 1.0.32 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/dependabot.yml +11 -11
- package/LICENSE +201 -201
- package/README.md +36 -36
- package/package.json +34 -34
- package/src/company/company.js +34 -35
- package/src/company/companyScraperTemplate.js +29 -30
- package/src/logger.js +29 -29
- package/src/login.js +70 -68
- package/src/openPage.js +23 -33
- package/src/package.js +11 -11
- package/src/profile/cleanProfileData.js +98 -99
- package/src/profile/contactInfo.js +46 -48
- package/src/profile/profile.js +221 -81
- package/src/profile/profileScraperTemplate.js +186 -189
- package/src/profile/scrapAccomplishmentPanel.js +11 -18
- package/src/profile/scrollToPageBottom.js +23 -24
- package/src/profile/seeMoreButtons.js +18 -42
- package/src/scrapSection.js +49 -49
- package/src/scrapedin.js +41 -41
package/src/scrapedin.js
CHANGED
|
@@ -1,41 +1,41 @@
|
|
|
1
|
-
const puppeteer = require('puppeteer')
|
|
2
|
-
const login = require('./login')
|
|
3
|
-
const profile = require('./profile/profile')
|
|
4
|
-
const company = require('./company/company')
|
|
5
|
-
const logger = require('./logger')(__filename)
|
|
6
|
-
|
|
7
|
-
module.exports = async ({ cookies, email, password, isHeadless, hasToLog, hasToGetContactInfo, puppeteerArgs, puppeteerAuthenticate, endpoint } = { isHeadless: true, hasToLog: false }) => {
|
|
8
|
-
if (!hasToLog) {
|
|
9
|
-
logger.stopLogging()
|
|
10
|
-
}
|
|
11
|
-
logger.info('initializing')
|
|
12
|
-
|
|
13
|
-
let browser
|
|
14
|
-
if (endpoint) {
|
|
15
|
-
browser = await puppeteer.connect({
|
|
16
|
-
browserWSEndpoint: endpoint
|
|
17
|
-
})
|
|
18
|
-
} else {
|
|
19
|
-
const args = Object.assign({ headless: isHeadless, args: ['--no-sandbox'] }, puppeteerArgs)
|
|
20
|
-
browser = await puppeteer.launch(args)
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
if (cookies) {
|
|
24
|
-
logger.info('using cookies, login will be bypassed')
|
|
25
|
-
} else if (email && password) {
|
|
26
|
-
logger.info('email and password was provided, we\'re going to login...')
|
|
27
|
-
|
|
28
|
-
try {
|
|
29
|
-
await login(browser, email, password, logger)
|
|
30
|
-
} catch (e) {
|
|
31
|
-
if (!endpoint) {
|
|
32
|
-
await browser.close()
|
|
33
|
-
}
|
|
34
|
-
throw e
|
|
35
|
-
}
|
|
36
|
-
} else {
|
|
37
|
-
logger.warn('email/password and cookies wasn\'t provided, only public data will be collected')
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
return (url, waitMs) => url.includes('/school/') || url.includes('/company/') ? company(browser, cookies, url, waitMs, hasToGetContactInfo, puppeteerAuthenticate) : profile(browser, cookies, url, waitMs, hasToGetContactInfo, puppeteerAuthenticate)
|
|
41
|
-
}
|
|
1
|
+
const puppeteer = require('puppeteer')
|
|
2
|
+
const login = require('./login')
|
|
3
|
+
const profile = require('./profile/profile')
|
|
4
|
+
const company = require('./company/company')
|
|
5
|
+
const logger = require('./logger')(__filename)
|
|
6
|
+
|
|
7
|
+
module.exports = async ({ cookies, email, password, isHeadless, hasToLog, hasToGetContactInfo, puppeteerArgs, puppeteerAuthenticate, endpoint } = { isHeadless: true, hasToLog: false }) => {
|
|
8
|
+
if (!hasToLog) {
|
|
9
|
+
logger.stopLogging()
|
|
10
|
+
}
|
|
11
|
+
logger.info('initializing')
|
|
12
|
+
|
|
13
|
+
let browser
|
|
14
|
+
if (endpoint) {
|
|
15
|
+
browser = await puppeteer.connect({
|
|
16
|
+
browserWSEndpoint: endpoint
|
|
17
|
+
})
|
|
18
|
+
} else {
|
|
19
|
+
const args = Object.assign({ headless: isHeadless, args: ['--no-sandbox'] }, puppeteerArgs)
|
|
20
|
+
browser = await puppeteer.launch(args)
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
if (cookies) {
|
|
24
|
+
logger.info('using cookies, login will be bypassed')
|
|
25
|
+
} else if (email && password) {
|
|
26
|
+
logger.info('email and password was provided, we\'re going to login...')
|
|
27
|
+
|
|
28
|
+
try {
|
|
29
|
+
await login(browser, email, password, logger)
|
|
30
|
+
} catch (e) {
|
|
31
|
+
if (!endpoint) {
|
|
32
|
+
await browser.close()
|
|
33
|
+
}
|
|
34
|
+
throw e
|
|
35
|
+
}
|
|
36
|
+
} else {
|
|
37
|
+
logger.warn('email/password and cookies wasn\'t provided, only public data will be collected')
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
return (url, waitMs) => url.includes('/school/') || url.includes('/company/') ? company(browser, cookies, url, waitMs, hasToGetContactInfo, puppeteerAuthenticate) : profile(browser, cookies, url, waitMs, hasToGetContactInfo, puppeteerAuthenticate)
|
|
41
|
+
}
|