@things-factory/integration-base 9.0.35 → 9.0.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist-server/engine/connector/headless-connector.js +231 -18
- package/dist-server/engine/connector/headless-connector.js.map +1 -1
- package/dist-server/engine/task/headless-scrap.js +139 -58
- package/dist-server/engine/task/headless-scrap.js.map +1 -1
- package/dist-server/engine/task/utils/headless-request-with-recovery.d.ts +4 -0
- package/dist-server/engine/task/utils/headless-request-with-recovery.js +197 -38
- package/dist-server/engine/task/utils/headless-request-with-recovery.js.map +1 -1
- package/dist-server/tsconfig.tsbuildinfo +1 -1
- package/package.json +7 -7
@@ -3,82 +3,157 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
const url_1 = require("url");
|
4
4
|
const task_registry_1 = require("../task-registry");
|
5
5
|
const connection_manager_1 = require("../connection-manager");
|
6
|
+
const headless_request_with_recovery_1 = require("./utils/headless-request-with-recovery");
|
6
7
|
async function HeadlessScrap(step, { logger, data, domain }) {
|
7
8
|
const { connection: connectionName, params: stepOptions } = step;
|
8
|
-
const { headers: requestHeaders, path, selectors = [], waitForSelectors, waitForTimeout } = stepOptions || {};
|
9
|
+
const { headers: requestHeaders, path, selectors = [], waitForSelectors, waitForTimeout, maxRetries = 2 } = stepOptions || {};
|
9
10
|
const connection = await connection_manager_1.ConnectionManager.getConnectionInstanceByName(domain, connectionName);
|
10
11
|
if (!connection) {
|
11
12
|
throw new Error(`Connection '${connectionName}' is not established.`);
|
12
13
|
}
|
13
|
-
const { endpoint, params: connectionParams, acquireSessionPage, releasePage } = connection;
|
14
|
+
const { endpoint, params: connectionParams, acquireSessionPage, releasePage, validateSession, reAuthenticateSession } = connection;
|
15
|
+
const loginPagePath = connectionParams?.loginPagePath || '/login';
|
14
16
|
const headers = {
|
15
17
|
...requestHeaders
|
16
18
|
};
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
if (request.postData()) {
|
30
|
-
console.log(`- Post Data: ${request.postData()}`);
|
19
|
+
let page = null;
|
20
|
+
let pageResource = null; // 리소스 추적 객체
|
21
|
+
let lastError = null;
|
22
|
+
// 재시도 로직 추가
|
23
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
24
|
+
try {
|
25
|
+
// 페이지 획득
|
26
|
+
const sessionResult = await acquireSessionPage();
|
27
|
+
// reAuthenticateSession의 반환 형태 확인
|
28
|
+
if (sessionResult && typeof sessionResult === 'object' && sessionResult.page) {
|
29
|
+
pageResource = sessionResult; // {page, browser, requiresManualRelease}
|
30
|
+
page = sessionResult.page;
|
31
31
|
}
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
// waitForSelectors, waitForTimeout 처리 추가
|
36
|
-
if (waitForSelectors) {
|
37
|
-
try {
|
38
|
-
await page.waitForFunction(selectorsString => {
|
39
|
-
const selectors = selectorsString.split(',').map(s => s.trim());
|
40
|
-
return selectors.every(selector => {
|
41
|
-
const el = document.querySelector(selector);
|
42
|
-
return el && el.textContent && el.textContent.trim().length > 0;
|
43
|
-
});
|
44
|
-
}, { timeout: waitForTimeout ? Number(waitForTimeout) : 10000 }, waitForSelectors // 콤마로 구분된 셀렉터 문자열
|
45
|
-
);
|
46
|
-
}
|
47
|
-
catch (e) {
|
48
|
-
logger.error(`waitForSelectors(${waitForSelectors}) 값이 모두 채워지지 않음:`, e);
|
49
|
-
throw e;
|
32
|
+
else {
|
33
|
+
page = sessionResult;
|
34
|
+
pageResource = { page, requiresManualRelease: false };
|
50
35
|
}
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
36
|
+
page.on('console', async (msg) => {
|
37
|
+
console.log(`[browser ${msg.type()}] ${msg.text()}`);
|
38
|
+
});
|
39
|
+
page.on('requestfailed', request => {
|
40
|
+
console.log('Request failed:');
|
41
|
+
console.log(`- URL: ${request.url()}`);
|
42
|
+
console.log(`- Method: ${request.method()}`);
|
43
|
+
console.log(`- Failure Text: ${request.failure()?.errorText}`);
|
44
|
+
console.log(`- Headers:`, request.headers());
|
45
|
+
// POST 데이터 (필요한 경우)
|
46
|
+
if (request.postData()) {
|
47
|
+
console.log(`- Post Data: ${request.postData()}`);
|
48
|
+
}
|
49
|
+
});
|
50
|
+
// 302 리디렉션 감지 추가 - 이벤트 핸들러는 비동기 리소스 정리 불가하므로 단순 로깅만
|
51
|
+
page.on('response', response => {
|
52
|
+
try {
|
53
|
+
if ([301, 302, 307, 308].includes(response.status())) {
|
54
|
+
const location = response.headers()['location'] || '';
|
55
|
+
if (location.includes(loginPagePath) ||
|
56
|
+
location.includes('/login') ||
|
57
|
+
location.includes('/signin') ||
|
58
|
+
location.includes('/auth')) {
|
59
|
+
logger.warn(`Login redirect detected during response: ${location}`);
|
60
|
+
// Note: Cannot throw from event handler - will be caught in main flow
|
61
|
+
}
|
62
|
+
}
|
63
|
+
}
|
64
|
+
catch (eventError) {
|
65
|
+
logger.error('Error in response event handler:', eventError);
|
66
|
+
}
|
67
|
+
});
|
68
|
+
// 첫 번째 시도가 아니면 세션 검증
|
69
|
+
if (attempt > 0 && validateSession) {
|
70
|
+
const isSessionValid = await validateSession(page);
|
71
|
+
if (!isSessionValid) {
|
72
|
+
logger.warn(`Session invalid for connection '${connectionName}', attempting re-authentication`);
|
73
|
+
await (0, headless_request_with_recovery_1.safeReleasePageResource)(pageResource, releasePage, logger);
|
74
|
+
const reauthResult = await reAuthenticateSession();
|
75
|
+
if (reauthResult && typeof reauthResult === 'object' && reauthResult.page) {
|
76
|
+
pageResource = reauthResult;
|
77
|
+
page = reauthResult.page;
|
64
78
|
}
|
65
79
|
else {
|
66
|
-
|
80
|
+
page = reauthResult;
|
81
|
+
pageResource = { page, requiresManualRelease: false };
|
67
82
|
}
|
83
|
+
}
|
84
|
+
}
|
85
|
+
await page.setExtraHTTPHeaders(headers);
|
86
|
+
await page.goto(new url_1.URL(path, endpoint), { waitUntil: 'networkidle2' });
|
87
|
+
// 페이지 로드 후 로그인 페이지로 리디렉션되었는지 확인
|
88
|
+
const currentUrl = page.url();
|
89
|
+
if (currentUrl.includes(loginPagePath) ||
|
90
|
+
currentUrl.includes('/login') ||
|
91
|
+
currentUrl.includes('/signin') ||
|
92
|
+
currentUrl.includes('/auth')) {
|
93
|
+
throw new Error(`Page redirected to login: ${currentUrl}`);
|
94
|
+
}
|
95
|
+
// waitForSelectors, waitForTimeout 처리 추가
|
96
|
+
if (waitForSelectors) {
|
97
|
+
try {
|
98
|
+
await page.waitForFunction(selectorsString => {
|
99
|
+
const selectors = selectorsString.split(',').map(s => s.trim());
|
100
|
+
return selectors.every(selector => {
|
101
|
+
const el = document.querySelector(selector);
|
102
|
+
return el && el.textContent && el.textContent.trim().length > 0;
|
103
|
+
});
|
104
|
+
}, { timeout: waitForTimeout ? Number(waitForTimeout) : 10000 }, waitForSelectors // 콤마로 구분된 셀렉터 문자열
|
105
|
+
);
|
106
|
+
}
|
107
|
+
catch (e) {
|
108
|
+
logger.error(`waitForSelectors(${waitForSelectors}) 값이 모두 채워지지 않음:`, e);
|
109
|
+
throw e;
|
110
|
+
}
|
111
|
+
}
|
112
|
+
else if (waitForTimeout) {
|
113
|
+
await page.waitForTimeout(Number(waitForTimeout));
|
114
|
+
}
|
115
|
+
const result = {};
|
116
|
+
for (const selector of selectors) {
|
117
|
+
const { text, value } = selector;
|
118
|
+
result[text] = await page.$$eval(value, elements => {
|
119
|
+
return elements.map(element => {
|
120
|
+
if (element instanceof HTMLInputElement ||
|
121
|
+
element instanceof HTMLTextAreaElement ||
|
122
|
+
element instanceof HTMLSelectElement) {
|
123
|
+
return element.value;
|
124
|
+
}
|
125
|
+
else {
|
126
|
+
return element.textContent?.trim();
|
127
|
+
}
|
128
|
+
});
|
68
129
|
});
|
69
|
-
}
|
130
|
+
}
|
131
|
+
// 성공시 페이지 릴리즈 후 결과 반환
|
132
|
+
await (0, headless_request_with_recovery_1.safeReleasePageResource)(pageResource, releasePage, logger);
|
133
|
+
return {
|
134
|
+
data: result
|
135
|
+
};
|
136
|
+
}
|
137
|
+
catch (error) {
|
138
|
+
lastError = error;
|
139
|
+
logger.error(`HeadlessScrap attempt ${attempt + 1} failed:`, error);
|
140
|
+
await (0, headless_request_with_recovery_1.safeReleasePageResource)(pageResource, releasePage, logger);
|
141
|
+
page = null;
|
142
|
+
pageResource = null;
|
143
|
+
// 로그인 관련 에러나 복구 가능한 에러가 아니거나 마지막 재시도면 에러 발생
|
144
|
+
const errorMessage = error.message?.toLowerCase() || '';
|
145
|
+
const isRecoverableError = errorMessage.includes('login') || errorMessage.includes('redirect') || errorMessage.includes('unauthorized') || errorMessage.includes('forbidden') || errorMessage.includes('session');
|
146
|
+
if (!isRecoverableError || attempt === maxRetries) {
|
147
|
+
throw error;
|
148
|
+
}
|
149
|
+
logger.info(`Retrying HeadlessScrap... (${attempt + 2}/${maxRetries + 1})`);
|
70
150
|
}
|
71
|
-
return {
|
72
|
-
data: result
|
73
|
-
};
|
74
|
-
}
|
75
|
-
catch (e) {
|
76
|
-
logger.error('Error in HeadlessScrap:', e);
|
77
|
-
throw e;
|
78
151
|
}
|
79
|
-
|
80
|
-
|
152
|
+
// 모든 재시도가 실패한 경우 - 혹시 남은 리소스가 있으면 정리
|
153
|
+
if (pageResource) {
|
154
|
+
await (0, headless_request_with_recovery_1.safeReleasePageResource)(pageResource, releasePage, logger);
|
81
155
|
}
|
156
|
+
throw lastError || new Error('HeadlessScrap failed after all retry attempts');
|
82
157
|
}
|
83
158
|
HeadlessScrap.parameterSpec = [
|
84
159
|
{
|
@@ -105,6 +180,12 @@ HeadlessScrap.parameterSpec = [
|
|
105
180
|
type: 'string',
|
106
181
|
name: 'waitForTimeout',
|
107
182
|
label: 'wait-for-timeout'
|
183
|
+
},
|
184
|
+
{
|
185
|
+
type: 'number',
|
186
|
+
name: 'maxRetries',
|
187
|
+
label: 'maximum-retries',
|
188
|
+
value: 2
|
108
189
|
}
|
109
190
|
];
|
110
191
|
HeadlessScrap.help = 'integration/task/headless-scrap';
|
@@ -1 +1 @@
|
|
1
|
-
{"version":3,"file":"headless-scrap.js","sourceRoot":"","sources":["../../../server/engine/task/headless-scrap.ts"],"names":[],"mappings":";;AAAA,6BAAyB;AAEzB,oDAA+C;AAC/C,8DAAyD;AAEzD,KAAK,UAAU,aAAa,CAAC,IAAI,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE;IACzD,MAAM,EAAE,UAAU,EAAE,cAAc,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,IAAI,CAAA;IAChE,MAAM,EAAE,OAAO,EAAE,cAAc,EAAE,IAAI,EAAE,SAAS,GAAG,EAAE,EAAE,gBAAgB,EAAE,cAAc,EAAE,GAAG,WAAW,IAAI,EAAE,CAAA;IAE7G,MAAM,UAAU,GAAG,MAAM,sCAAiB,CAAC,2BAA2B,CAAC,MAAM,EAAE,cAAc,CAAC,CAAA;IAE9F,IAAI,CAAC,UAAU,EAAE,CAAC;QAChB,MAAM,IAAI,KAAK,CAAC,eAAe,cAAc,uBAAuB,CAAC,CAAA;IACvE,CAAC;IAED,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,gBAAgB,EAAE,kBAAkB,EAAE,WAAW,EAAE,GAAG,UAAU,CAAA;IAE1F,MAAM,OAAO,GAAG;QACd,GAAG,cAAc;KAClB,CAAA;IAED,MAAM,IAAI,GAAG,MAAM,kBAAkB,EAAE,CAAA;IAEvC,IAAI,CAAC;QACH,IAAI,CAAC,EAAE,CAAC,SAAS,EAAE,KAAK,EAAC,GAAG,EAAC,EAAE;YAC7B,OAAO,CAAC,GAAG,CAAC,YAAY,GAAG,CAAC,IAAI,EAAE,KAAK,GAAG,CAAC,IAAI,EAAE,EAAE,CAAC,CAAA;QACtD,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,EAAE,CAAC,eAAe,EAAE,OAAO,CAAC,EAAE;YACjC,OAAO,CAAC,GAAG,CAAC,iBAAiB,CAAC,CAAA;YAC9B,OAAO,CAAC,GAAG,CAAC,UAAU,OAAO,CAAC,GAAG,EAAE,EAAE,CAAC,CAAA;YACtC,OAAO,CAAC,GAAG,CAAC,aAAa,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAA;YAC5C,OAAO,CAAC,GAAG,CAAC,mBAAmB,OAAO,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,CAAC,CAAA;YAC9D,OAAO,CAAC,GAAG,CAAC,YAAY,EAAE,OAAO,CAAC,OAAO,EAAE,CAAC,CAAA;YAE5C,oBAAoB;YACpB,IAAI,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC;gBACvB,OAAO,CAAC,GAAG,CAAC,gBAAgB,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAA;YACnD,CAAC;QACH,CAAC,CAAC,CAAA;QAEF,MAAM,IAAI,CAAC,mBAAmB,CAAC,OAAO,CAAC,CAAA;QACvC,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,SAAG,CAAC,IAAI,EAAE,QAAQ,CAAC,EAAE,EAAE,SAAS,EAAE,cAAc,EAAE,CAAC,CAAA;QAEvE,yCAAyC;QACzC,IAAI,gBAAgB,EAAE,CAAC;YACrB,IAAI,CAAC;gBACH,MAAM,IAAI,CAAC,eAAe,CACxB,eAAe,CAAC,EAAE;oBAChB,MAAM,SAAS,GAAG,eAAe,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAA;oBAC/D,OAAO,SAAS,CAAC,KAAK,CAAC,QAAQ,CAAC,EAAE;wBAChC,MAAM,EAAE,GAAG,QAAQ,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAA;wBAC3C,OAAO,EAAE,IAAI,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAA;oBACjE,CAAC,CAAC,CAAA;gBACJ,CAAC,EACD,EAAE,OAAO,EAAE,cAAc,CAAC,CAAC,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,KAAK,EAAE,EAC5D,gBAAgB,CAAC,kBAAkB;iBACpC,CAAA;YACH,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,MAAM,CAAC,KAAK,CAAC,oBAAoB,gBAAgB,kBAAkB,EAAE,CAAC,CAAC,CAAA;gBACvE,MAAM,CAAC,CAAA;YACT,CAAC;QACH,CAAC;aAAM,IAAI,cAAc,EAAE,CAAC;YAC1B,MAAM,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAA;QACnD,CAAC;QAED,MAAM,MAAM,GAAG,EAAE,CAAA;QAEjB,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;YACjC,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,QAAQ,CAAA;YAChC,MAAM,CAAC,IAAI,CAAC,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,QAAQ,CAAC,EAAE;gBACjD,OAAO,QAAQ,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE;oBAC5B,IACE,OAAO,YAAY,gBAAgB;wBACnC,OAAO,YAAY,mBAAmB;wBACtC,OAAO,YAAY,iBAAiB,EACpC,CAAC;wBACD,OAAO,OAAO,CAAC,KAAK,CAAA;oBACtB,CAAC;yBAAM,CAAC;wBACN,OAAO,OAAO,CAAC,WAAW,EAAE,IAAI,EAAE,CAAA;oBACpC,CAAC;gBACH,CAAC,CAAC,CAAA;YACJ,CAAC,CAAC,CAAA;QACJ,CAAC;QAED,OAAO;YACL,IAAI,EAAE,MAAM;SACb,CAAA;IACH,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,MAAM,CAAC,KAAK,CAAC,yBAAyB,EAAE,CAAC,CAAC,CAAA;QAC1C,MAAM,CAAC,CAAA;IACT,CAAC;YAAS,CAAC;QACT,MAAM,WAAW,CAAC,IAAI,CAAC,CAAA;IACzB,CAAC;AACH,CAAC;AAED,aAAa,CAAC,aAAa,GAAG;IAC5B;QACE,IAAI,EAAE,QAAQ;QACd,IAAI,EAAE,MAAM;QACZ,KAAK,EAAE,MAAM;KACd;IACD;QACE,IAAI,EAAE,cAAc;QACpB,IAAI,EAAE,SAAS;QACf,KAAK,EAAE,SAAS;KACjB;IACD;QACE,IAAI,EAAE,SAAS;QACf,IAAI,EAAE,WAAW;QACjB,KAAK,EAAE,WAAW;KACnB;IACD;QACE,IAAI,EAAE,QAAQ;QACd,IAAI,EAAE,kBAAkB;QACxB,KAAK,EAAE,oBAAoB;KAC5B;IACD;QACE,IAAI,EAAE,QAAQ;QACd,IAAI,EAAE,gBAAgB;QACtB,KAAK,EAAE,kBAAkB;KAC1B;CACF,CAAA;AAED,aAAa,CAAC,IAAI,GAAG,iCAAiC,CAAA;AAEtD,4BAAY,CAAC,mBAAmB,CAAC,gBAAgB,EAAE,aAAa,CAAC,CAAA","sourcesContent":["import { URL } from 'url'\n\nimport { TaskRegistry } from '../task-registry'\nimport { ConnectionManager } from '../connection-manager'\n\nasync function HeadlessScrap(step, { logger, data, domain }) {\n const { connection: connectionName, params: stepOptions } = step\n const { headers: requestHeaders, path, selectors = [], waitForSelectors, waitForTimeout } = stepOptions || {}\n\n const connection = await ConnectionManager.getConnectionInstanceByName(domain, connectionName)\n\n if (!connection) {\n throw new Error(`Connection '${connectionName}' is not established.`)\n }\n\n const { endpoint, params: connectionParams, acquireSessionPage, releasePage } = connection\n\n const headers = {\n ...requestHeaders\n }\n\n const page = await acquireSessionPage()\n\n try {\n page.on('console', async msg => {\n console.log(`[browser ${msg.type()}] ${msg.text()}`)\n })\n\n page.on('requestfailed', request => {\n console.log('Request failed:')\n console.log(`- URL: ${request.url()}`)\n console.log(`- Method: ${request.method()}`)\n console.log(`- Failure Text: ${request.failure()?.errorText}`)\n console.log(`- Headers:`, request.headers())\n\n // POST 데이터 (필요한 경우)\n if (request.postData()) {\n console.log(`- Post Data: ${request.postData()}`)\n }\n })\n\n await page.setExtraHTTPHeaders(headers)\n await page.goto(new URL(path, endpoint), { waitUntil: 'networkidle2' })\n\n // waitForSelectors, waitForTimeout 처리 추가\n if (waitForSelectors) {\n try {\n await page.waitForFunction(\n selectorsString => {\n const selectors = selectorsString.split(',').map(s => s.trim())\n return selectors.every(selector => {\n const el = document.querySelector(selector)\n return el && el.textContent && el.textContent.trim().length > 0\n })\n },\n { timeout: waitForTimeout ? Number(waitForTimeout) : 10000 },\n waitForSelectors // 콤마로 구분된 셀렉터 문자열\n )\n } catch (e) {\n logger.error(`waitForSelectors(${waitForSelectors}) 값이 모두 채워지지 않음:`, e)\n throw e\n }\n } else if (waitForTimeout) {\n await page.waitForTimeout(Number(waitForTimeout))\n }\n\n const result = {}\n\n for (const selector of selectors) {\n const { text, value } = selector\n result[text] = await page.$$eval(value, elements => {\n return elements.map(element => {\n if (\n element instanceof HTMLInputElement ||\n element instanceof HTMLTextAreaElement ||\n element instanceof HTMLSelectElement\n ) {\n return element.value\n } else {\n return element.textContent?.trim()\n }\n })\n })\n }\n\n return {\n data: result\n }\n } catch (e) {\n logger.error('Error in HeadlessScrap:', e)\n throw e\n } finally {\n await releasePage(page)\n }\n}\n\nHeadlessScrap.parameterSpec = [\n {\n type: 'string',\n name: 'path',\n label: 'path'\n },\n {\n type: 'http-headers',\n name: 'headers',\n label: 'headers'\n },\n {\n type: 'options',\n name: 'selectors',\n label: 'selectors'\n },\n {\n type: 'string',\n name: 'waitForSelectors',\n label: 'wait-for-selectors'\n },\n {\n type: 'string',\n name: 'waitForTimeout',\n label: 'wait-for-timeout'\n }\n]\n\nHeadlessScrap.help = 'integration/task/headless-scrap'\n\nTaskRegistry.registerTaskHandler('headless-scrap', HeadlessScrap)\n"]}
|
1
|
+
{"version":3,"file":"headless-scrap.js","sourceRoot":"","sources":["../../../server/engine/task/headless-scrap.ts"],"names":[],"mappings":";;AAAA,6BAAyB;AAEzB,oDAA+C;AAC/C,8DAAyD;AACzD,2FAAgF;AAEhF,KAAK,UAAU,aAAa,CAAC,IAAI,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE;IACzD,MAAM,EAAE,UAAU,EAAE,cAAc,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,IAAI,CAAA;IAChE,MAAM,EAAE,OAAO,EAAE,cAAc,EAAE,IAAI,EAAE,SAAS,GAAG,EAAE,EAAE,gBAAgB,EAAE,cAAc,EAAE,UAAU,GAAG,CAAC,EAAE,GAAG,WAAW,IAAI,EAAE,CAAA;IAE7H,MAAM,UAAU,GAAG,MAAM,sCAAiB,CAAC,2BAA2B,CAAC,MAAM,EAAE,cAAc,CAAC,CAAA;IAE9F,IAAI,CAAC,UAAU,EAAE,CAAC;QAChB,MAAM,IAAI,KAAK,CAAC,eAAe,cAAc,uBAAuB,CAAC,CAAA;IACvE,CAAC;IAED,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,gBAAgB,EAAE,kBAAkB,EAAE,WAAW,EAAE,eAAe,EAAE,qBAAqB,EAAE,GAAG,UAAU,CAAA;IAClI,MAAM,aAAa,GAAG,gBAAgB,EAAE,aAAa,IAAI,QAAQ,CAAA;IAEjE,MAAM,OAAO,GAAG;QACd,GAAG,cAAc;KAClB,CAAA;IAED,IAAI,IAAI,GAAG,IAAI,CAAA;IACf,IAAI,YAAY,GAAG,IAAI,CAAA,CAAE,YAAY;IACrC,IAAI,SAAS,GAAG,IAAI,CAAA;IAEpB,YAAY;IACZ,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,UAAU,EAAE,OAAO,EAAE,EAAE,CAAC;QACvD,IAAI,CAAC;YACH,SAAS;YACT,MAAM,aAAa,GAAG,MAAM,kBAAkB,EAAE,CAAA;YAEhD,kCAAkC;YAClC,IAAI,aAAa,IAAI,OAAO,aAAa,KAAK,QAAQ,IAAI,aAAa,CAAC,IAAI,EAAE,CAAC;gBAC7E,YAAY,GAAG,aAAa,CAAA,CAAE,yCAAyC;gBACvE,IAAI,GAAG,aAAa,CAAC,IAAI,CAAA;YAC3B,CAAC;iBAAM,CAAC;gBACN,IAAI,GAAG,aAAa,CAAA;gBACpB,YAAY,GAAG,EAAE,IAAI,EAAE,qBAAqB,EAAE,KAAK,EAAE,CAAA;YACvD,CAAC;YAED,IAAI,CAAC,EAAE,CAAC,SAAS,EAAE,KAAK,EAAC,GAAG,EAAC,EAAE;gBAC7B,OAAO,CAAC,GAAG,CAAC,YAAY,GAAG,CAAC,IAAI,EAAE,KAAK,GAAG,CAAC,IAAI,EAAE,EAAE,CAAC,CAAA;YACtD,CAAC,CAAC,CAAA;YAEF,IAAI,CAAC,EAAE,CAAC,eAAe,EAAE,OAAO,CAAC,EAAE;gBACjC,OAAO,CAAC,GAAG,CAAC,iBAAiB,CAAC,CAAA;gBAC9B,OAAO,CAAC,GAAG,CAAC,UAAU,OAAO,CAAC,GAAG,EAAE,EAAE,CAAC,CAAA;gBACtC,OAAO,CAAC,GAAG,CAAC,aAAa,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAA;gBAC5C,OAAO,CAAC,GAAG,CAAC,mBAAmB,OAAO,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,CAAC,CAAA;gBAC9D,OAAO,CAAC,GAAG,CAAC,YAAY,EAAE,OAAO,CAAC,OAAO,EAAE,CAAC,CAAA;gBAE5C,oBAAoB;gBACpB,IAAI,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC;oBACvB,OAAO,CAAC,GAAG,CAAC,gBAAgB,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAA;gBACnD,CAAC;YACH,CAAC,CAAC,CAAA;YAEF,oDAAoD;YACpD,IAAI,CAAC,EAAE,CAAC,UAAU,EAAE,QAAQ,CAAC,EAAE;gBAC7B,IAAI,CAAC;oBACH,IAAI,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,CAAC;wBACrD,MAAM,QAAQ,GAAG,QAAQ,CAAC,OAAO,EAAE,CAAC,UAAU,CAAC,IAAI,EAAE,CAAA;wBACrD,IAAI,QAAQ,CAAC,QAAQ,CAAC,aAAa,CAAC;4BAChC,QAAQ,CAAC,QAAQ,CAAC,QAAQ,CAAC;4BAC3B,QAAQ,CAAC,QAAQ,CAAC,SAAS,CAAC;4BAC5B,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;4BAC/B,MAAM,CAAC,IAAI,CAAC,4CAA4C,QAAQ,EAAE,CAAC,CAAA;4BACnE,sEAAsE;wBACxE,CAAC;oBACH,CAAC;gBACH,CAAC;gBAAC,OAAO,UAAU,EAAE,CAAC;oBACpB,MAAM,CAAC,KAAK,CAAC,kCAAkC,EAAE,UAAU,CAAC,CAAA;gBAC9D,CAAC;YACH,CAAC,CAAC,CAAA;YAEF,qBAAqB;YACrB,IAAI,OAAO,GAAG,CAAC,IAAI,eAAe,EAAE,CAAC;gBACnC,MAAM,cAAc,GAAG,MAAM,eAAe,CAAC,IAAI,CAAC,CAAA;gBAClD,IAAI,CAAC,cAAc,EAAE,CAAC;oBACpB,MAAM,CAAC,IAAI,CAAC,mCAAmC,cAAc,iCAAiC,CAAC,CAAA;oBAC/F,MAAM,IAAA,wDAAuB,EAAC,YAAY,EAAE,WAAW,EAAE,MAAM,CAAC,CAAA;oBAEhE,MAAM,YAAY,GAAG,MAAM,qBAAqB,EAAE,CAAA;oBAClD,IAAI,YAAY,IAAI,OAAO,YAAY,KAAK,QAAQ,IAAI,YAAY,CAAC,IAAI,EAAE,CAAC;wBAC1E,YAAY,GAAG,YAAY,CAAA;wBAC3B,IAAI,GAAG,YAAY,CAAC,IAAI,CAAA;oBAC1B,CAAC;yBAAM,CAAC;wBACN,IAAI,GAAG,YAAY,CAAA;wBACnB,YAAY,GAAG,EAAE,IAAI,EAAE,qBAAqB,EAAE,KAAK,EAAE,CAAA;oBACvD,CAAC;gBACH,CAAC;YACH,CAAC;YAED,MAAM,IAAI,CAAC,mBAAmB,CAAC,OAAO,CAAC,CAAA;YACvC,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,SAAG,CAAC,IAAI,EAAE,QAAQ,CAAC,EAAE,EAAE,SAAS,EAAE,cAAc,EAAE,CAAC,CAAA;YAEvE,gCAAgC;YAChC,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,CAAA;YAC7B,IAAI,UAAU,CAAC,QAAQ,CAAC,aAAa,CAAC;gBAClC,UAAU,CAAC,QAAQ,CAAC,QAAQ,CAAC;gBAC7B,UAAU,CAAC,QAAQ,CAAC,SAAS,CAAC;gBAC9B,UAAU,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;gBACjC,MAAM,IAAI,KAAK,CAAC,6BAA6B,UAAU,EAAE,CAAC,CAAA;YAC5D,CAAC;YAED,yCAAyC;YACzC,IAAI,gBAAgB,EAAE,CAAC;gBACrB,IAAI,CAAC;oBACH,MAAM,IAAI,CAAC,eAAe,CACxB,eAAe,CAAC,EAAE;wBAChB,MAAM,SAAS,GAAG,eAAe,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAA;wBAC/D,OAAO,SAAS,CAAC,KAAK,CAAC,QAAQ,CAAC,EAAE;4BAChC,MAAM,EAAE,GAAG,QAAQ,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAA;4BAC3C,OAAO,EAAE,IAAI,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAA;wBACjE,CAAC,CAAC,CAAA;oBACJ,CAAC,EACD,EAAE,OAAO,EAAE,cAAc,CAAC,CAAC,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,KAAK,EAAE,EAC5D,gBAAgB,CAAC,kBAAkB;qBACpC,CAAA;gBACH,CAAC;gBAAC,OAAO,CAAC,EAAE,CAAC;oBACX,MAAM,CAAC,KAAK,CAAC,oBAAoB,gBAAgB,kBAAkB,EAAE,CAAC,CAAC,CAAA;oBACvE,MAAM,CAAC,CAAA;gBACT,CAAC;YACH,CAAC;iBAAM,IAAI,cAAc,EAAE,CAAC;gBAC1B,MAAM,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAA;YACnD,CAAC;YAED,MAAM,MAAM,GAAG,EAAE,CAAA;YAEjB,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;gBACjC,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,QAAQ,CAAA;gBAChC,MAAM,CAAC,IAAI,CAAC,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,QAAQ,CAAC,EAAE;oBACjD,OAAO,QAAQ,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE;wBAC5B,IACE,OAAO,YAAY,gBAAgB;4BACnC,OAAO,YAAY,mBAAmB;4BACtC,OAAO,YAAY,iBAAiB,EACpC,CAAC;4BACD,OAAO,OAAO,CAAC,KAAK,CAAA;wBACtB,CAAC;6BAAM,CAAC;4BACN,OAAO,OAAO,CAAC,WAAW,EAAE,IAAI,EAAE,CAAA;wBACpC,CAAC;oBACH,CAAC,CAAC,CAAA;gBACJ,CAAC,CAAC,CAAA;YACJ,CAAC;YAED,sBAAsB;YACtB,MAAM,IAAA,wDAAuB,EAAC,YAAY,EAAE,WAAW,EAAE,MAAM,CAAC,CAAA;YAEhE,OAAO;gBACL,IAAI,EAAE,MAAM;aACb,CAAA;QAEH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,SAAS,GAAG,KAAK,CAAA;YACjB,MAAM,CAAC,KAAK,CAAC,yBAAyB,OAAO,GAAG,CAAC,UAAU,EAAE,KAAK,CAAC,CAAA;YAEnE,MAAM,IAAA,wDAAuB,EAAC,YAAY,EAAE,WAAW,EAAE,MAAM,CAAC,CAAA;YAChE,IAAI,GAAG,IAAI,CAAA;YACX,YAAY,GAAG,IAAI,CAAA;YAEnB,4CAA4C;YAC5C,MAAM,YAAY,GAAG,KAAK,CAAC,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,CAAA;YACvD,MAAM,kBAAkB,GAAG,YAAY,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,YAAY,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,YAAY,CAAC,QAAQ,CAAC,cAAc,CAAC,IAAI,YAAY,CAAC,QAAQ,CAAC,WAAW,CAAC,IAAI,YAAY,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAA;YAEjN,IAAI,CAAC,kBAAkB,IAAI,OAAO,KAAK,UAAU,EAAE,CAAC;gBAClD,MAAM,KAAK,CAAA;YACb,CAAC;YAED,MAAM,CAAC,IAAI,CAAC,8BAA8B,OAAO,GAAG,CAAC,IAAI,UAAU,GAAG,CAAC,GAAG,CAAC,CAAA;QAC7E,CAAC;IACH,CAAC;IAED,qCAAqC;IACrC,IAAI,YAAY,EAAE,CAAC;QACjB,MAAM,IAAA,wDAAuB,EAAC,YAAY,EAAE,WAAW,EAAE,MAAM,CAAC,CAAA;IAClE,CAAC;IACD,MAAM,SAAS,IAAI,IAAI,KAAK,CAAC,+CAA+C,CAAC,CAAA;AAC/E,CAAC;AAED,aAAa,CAAC,aAAa,GAAG;IAC5B;QACE,IAAI,EAAE,QAAQ;QACd,IAAI,EAAE,MAAM;QACZ,KAAK,EAAE,MAAM;KACd;IACD;QACE,IAAI,EAAE,cAAc;QACpB,IAAI,EAAE,SAAS;QACf,KAAK,EAAE,SAAS;KACjB;IACD;QACE,IAAI,EAAE,SAAS;QACf,IAAI,EAAE,WAAW;QACjB,KAAK,EAAE,WAAW;KACnB;IACD;QACE,IAAI,EAAE,QAAQ;QACd,IAAI,EAAE,kBAAkB;QACxB,KAAK,EAAE,oBAAoB;KAC5B;IACD;QACE,IAAI,EAAE,QAAQ;QACd,IAAI,EAAE,gBAAgB;QACtB,KAAK,EAAE,kBAAkB;KAC1B;IACD;QACE,IAAI,EAAE,QAAQ;QACd,IAAI,EAAE,YAAY;QAClB,KAAK,EAAE,iBAAiB;QACxB,KAAK,EAAE,CAAC;KACT;CACF,CAAA;AAED,aAAa,CAAC,IAAI,GAAG,iCAAiC,CAAA;AAEtD,4BAAY,CAAC,mBAAmB,CAAC,gBAAgB,EAAE,aAAa,CAAC,CAAA","sourcesContent":["import { URL } from 'url'\n\nimport { TaskRegistry } from '../task-registry'\nimport { ConnectionManager } from '../connection-manager'\nimport { safeReleasePageResource } from './utils/headless-request-with-recovery'\n\nasync function HeadlessScrap(step, { logger, data, domain }) {\n const { connection: connectionName, params: stepOptions } = step\n const { headers: requestHeaders, path, selectors = [], waitForSelectors, waitForTimeout, maxRetries = 2 } = stepOptions || {}\n\n const connection = await ConnectionManager.getConnectionInstanceByName(domain, connectionName)\n\n if (!connection) {\n throw new Error(`Connection '${connectionName}' is not established.`)\n }\n\n const { endpoint, params: connectionParams, acquireSessionPage, releasePage, validateSession, reAuthenticateSession } = connection\n const loginPagePath = connectionParams?.loginPagePath || '/login'\n\n const headers = {\n ...requestHeaders\n }\n\n let page = null\n let pageResource = null // 리소스 추적 객체\n let lastError = null\n\n // 재시도 로직 추가\n for (let attempt = 0; attempt <= maxRetries; attempt++) {\n try {\n // 페이지 획득\n const sessionResult = await acquireSessionPage()\n \n // reAuthenticateSession의 반환 형태 확인\n if (sessionResult && typeof sessionResult === 'object' && sessionResult.page) {\n pageResource = sessionResult // {page, browser, requiresManualRelease}\n page = sessionResult.page\n } else {\n page = sessionResult\n pageResource = { page, requiresManualRelease: false }\n }\n\n page.on('console', async msg => {\n console.log(`[browser ${msg.type()}] ${msg.text()}`)\n })\n\n page.on('requestfailed', request => {\n console.log('Request failed:')\n console.log(`- URL: ${request.url()}`)\n console.log(`- Method: ${request.method()}`)\n console.log(`- Failure Text: ${request.failure()?.errorText}`)\n console.log(`- Headers:`, request.headers())\n\n // POST 데이터 (필요한 경우)\n if (request.postData()) {\n console.log(`- Post Data: ${request.postData()}`)\n }\n })\n\n // 302 리디렉션 감지 추가 - 이벤트 핸들러는 비동기 리소스 정리 불가하므로 단순 로깅만\n page.on('response', response => {\n try {\n if ([301, 302, 307, 308].includes(response.status())) {\n const location = response.headers()['location'] || ''\n if (location.includes(loginPagePath) || \n location.includes('/login') || \n location.includes('/signin') || \n location.includes('/auth')) {\n logger.warn(`Login redirect detected during response: ${location}`)\n // Note: Cannot throw from event handler - will be caught in main flow\n }\n }\n } catch (eventError) {\n logger.error('Error in response event handler:', eventError)\n }\n })\n\n // 첫 번째 시도가 아니면 세션 검증\n if (attempt > 0 && validateSession) {\n const isSessionValid = await validateSession(page)\n if (!isSessionValid) {\n logger.warn(`Session invalid for connection '${connectionName}', attempting re-authentication`)\n await safeReleasePageResource(pageResource, releasePage, logger)\n \n const reauthResult = await reAuthenticateSession()\n if (reauthResult && typeof reauthResult === 'object' && reauthResult.page) {\n pageResource = reauthResult\n page = reauthResult.page\n } else {\n page = reauthResult\n pageResource = { page, requiresManualRelease: false }\n }\n }\n }\n\n await page.setExtraHTTPHeaders(headers)\n await page.goto(new URL(path, endpoint), { waitUntil: 'networkidle2' })\n\n // 페이지 로드 후 로그인 페이지로 리디렉션되었는지 확인\n const currentUrl = page.url()\n if (currentUrl.includes(loginPagePath) || \n currentUrl.includes('/login') || \n currentUrl.includes('/signin') || \n currentUrl.includes('/auth')) {\n throw new Error(`Page redirected to login: ${currentUrl}`)\n }\n\n // waitForSelectors, waitForTimeout 처리 추가\n if (waitForSelectors) {\n try {\n await page.waitForFunction(\n selectorsString => {\n const selectors = selectorsString.split(',').map(s => s.trim())\n return selectors.every(selector => {\n const el = document.querySelector(selector)\n return el && el.textContent && el.textContent.trim().length > 0\n })\n },\n { timeout: waitForTimeout ? Number(waitForTimeout) : 10000 },\n waitForSelectors // 콤마로 구분된 셀렉터 문자열\n )\n } catch (e) {\n logger.error(`waitForSelectors(${waitForSelectors}) 값이 모두 채워지지 않음:`, e)\n throw e\n }\n } else if (waitForTimeout) {\n await page.waitForTimeout(Number(waitForTimeout))\n }\n\n const result = {}\n\n for (const selector of selectors) {\n const { text, value } = selector\n result[text] = await page.$$eval(value, elements => {\n return elements.map(element => {\n if (\n element instanceof HTMLInputElement ||\n element instanceof HTMLTextAreaElement ||\n element instanceof HTMLSelectElement\n ) {\n return element.value\n } else {\n return element.textContent?.trim()\n }\n })\n })\n }\n\n // 성공시 페이지 릴리즈 후 결과 반환\n await safeReleasePageResource(pageResource, releasePage, logger)\n\n return {\n data: result\n }\n\n } catch (error) {\n lastError = error\n logger.error(`HeadlessScrap attempt ${attempt + 1} failed:`, error)\n\n await safeReleasePageResource(pageResource, releasePage, logger)\n page = null\n pageResource = null\n\n // 로그인 관련 에러나 복구 가능한 에러가 아니거나 마지막 재시도면 에러 발생\n const errorMessage = error.message?.toLowerCase() || ''\n const isRecoverableError = errorMessage.includes('login') || errorMessage.includes('redirect') || errorMessage.includes('unauthorized') || errorMessage.includes('forbidden') || errorMessage.includes('session')\n\n if (!isRecoverableError || attempt === maxRetries) {\n throw error\n }\n\n logger.info(`Retrying HeadlessScrap... (${attempt + 2}/${maxRetries + 1})`)\n }\n }\n\n // 모든 재시도가 실패한 경우 - 혹시 남은 리소스가 있으면 정리\n if (pageResource) {\n await safeReleasePageResource(pageResource, releasePage, logger)\n }\n throw lastError || new Error('HeadlessScrap failed after all retry attempts')\n}\n\nHeadlessScrap.parameterSpec = [\n {\n type: 'string',\n name: 'path',\n label: 'path'\n },\n {\n type: 'http-headers',\n name: 'headers',\n label: 'headers'\n },\n {\n type: 'options',\n name: 'selectors',\n label: 'selectors'\n },\n {\n type: 'string',\n name: 'waitForSelectors',\n label: 'wait-for-selectors'\n },\n {\n type: 'string',\n name: 'waitForTimeout',\n label: 'wait-for-timeout'\n },\n {\n type: 'number',\n name: 'maxRetries',\n label: 'maximum-retries',\n value: 2\n }\n]\n\nHeadlessScrap.help = 'integration/task/headless-scrap'\n\nTaskRegistry.registerTaskHandler('headless-scrap', HeadlessScrap)\n"]}
|
@@ -16,3 +16,7 @@ export declare function executeHeadlessRequestWithRecovery(connectionName: strin
|
|
16
16
|
data: any;
|
17
17
|
domain: any;
|
18
18
|
}): Promise<any>;
|
19
|
+
/**
|
20
|
+
* Safely release page resource with comprehensive error handling
|
21
|
+
*/
|
22
|
+
export declare function safeReleasePageResource(pageResource: any, releasePage: Function, logger: any): Promise<void>;
|
@@ -1,6 +1,7 @@
|
|
1
1
|
"use strict";
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
3
3
|
exports.executeHeadlessRequestWithRecovery = executeHeadlessRequestWithRecovery;
|
4
|
+
exports.safeReleasePageResource = safeReleasePageResource;
|
4
5
|
const utils_1 = require("@things-factory/utils");
|
5
6
|
const connection_manager_1 = require("../../connection-manager");
|
6
7
|
/**
|
@@ -18,14 +19,25 @@ async function executeHeadlessRequestWithRecovery(connectionName, options, conte
|
|
18
19
|
if (!connection) {
|
19
20
|
throw new Error(`Connection '${connectionName}' is not established.`);
|
20
21
|
}
|
21
|
-
const { endpoint, acquireSessionPage, releasePage, reAuthenticateSession, validateSession } = connection;
|
22
|
+
const { endpoint, params: connectionParams, acquireSessionPage, releasePage, reAuthenticateSession, validateSession } = connection;
|
23
|
+
const loginPagePath = connectionParams?.loginPagePath || '/login';
|
22
24
|
let page = null;
|
25
|
+
let pageResource = null; // 리소스 추적 객체
|
23
26
|
let lastError = null;
|
24
27
|
// 재시도 로직
|
25
28
|
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
26
29
|
try {
|
27
30
|
// 페이지 획득
|
28
|
-
|
31
|
+
const sessionResult = await acquireSessionPage();
|
32
|
+
// reAuthenticateSession의 반환 형태 확인
|
33
|
+
if (sessionResult && typeof sessionResult === 'object' && sessionResult.page) {
|
34
|
+
pageResource = sessionResult; // {page, browser, requiresManualRelease}
|
35
|
+
page = sessionResult.page;
|
36
|
+
}
|
37
|
+
else {
|
38
|
+
page = sessionResult;
|
39
|
+
pageResource = { page, requiresManualRelease: false };
|
40
|
+
}
|
29
41
|
// 페이지가 올바른 도메인에 있는지 확인
|
30
42
|
const currentUrl = page.url();
|
31
43
|
const targetDomain = new URL(endpoint).origin;
|
@@ -33,13 +45,30 @@ async function executeHeadlessRequestWithRecovery(connectionName, options, conte
|
|
33
45
|
logger.info(`Navigating to target domain: ${targetDomain}`);
|
34
46
|
await page.goto(targetDomain, { waitUntil: 'networkidle2' });
|
35
47
|
}
|
36
|
-
//
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
48
|
+
// 세션 검증 (모든 시도에서 수행)
|
49
|
+
const isSessionValid = await validateSession(page);
|
50
|
+
if (!isSessionValid) {
|
51
|
+
logger.warn(`Session invalid for connection '${connectionName}', attempting re-authentication (attempt: ${attempt + 1})`);
|
52
|
+
await safeReleasePageResource(pageResource, releasePage, logger);
|
53
|
+
try {
|
54
|
+
const reauthResult = await reAuthenticateSession();
|
55
|
+
if (reauthResult && typeof reauthResult === 'object' && reauthResult.page) {
|
56
|
+
pageResource = reauthResult;
|
57
|
+
page = reauthResult.page;
|
58
|
+
}
|
59
|
+
else {
|
60
|
+
page = reauthResult;
|
61
|
+
pageResource = { page, requiresManualRelease: false };
|
62
|
+
}
|
63
|
+
logger.info(`Re-authentication successful for connection '${connectionName}'`);
|
64
|
+
}
|
65
|
+
catch (reauthError) {
|
66
|
+
logger.error(`Re-authentication failed for connection '${connectionName}':`, reauthError);
|
67
|
+
// 재인증 실패 시 이번 시도는 실패로 처리하고 다음 시도로 넘어감
|
68
|
+
if (attempt === maxRetries) {
|
69
|
+
throw new Error(`Re-authentication failed after ${maxRetries + 1} attempts: ${reauthError.message}`);
|
70
|
+
}
|
71
|
+
continue;
|
43
72
|
}
|
44
73
|
}
|
45
74
|
// URL 구성
|
@@ -78,16 +107,43 @@ async function executeHeadlessRequestWithRecovery(connectionName, options, conte
|
|
78
107
|
requestOptions.body = typeof requestBody === 'string' ? requestBody : JSON.stringify(requestBody);
|
79
108
|
}
|
80
109
|
}
|
81
|
-
// fetch 요청 실행 - try-catch로 네트워크 에러 처리
|
82
|
-
const response = await page.evaluate(async (urlString, opts) => {
|
110
|
+
// fetch 요청 실행 - try-catch로 네트워크 에러 처리
|
111
|
+
const response = await page.evaluate(async (urlString, opts, loginPagePath) => {
|
83
112
|
try {
|
84
|
-
const response = await fetch(urlString, opts);
|
113
|
+
const response = await fetch(urlString, { ...opts, redirect: 'manual' });
|
85
114
|
const result = {
|
86
115
|
ok: response.ok,
|
87
116
|
status: response.status,
|
88
117
|
statusText: response.statusText,
|
89
118
|
headers: Object.fromEntries(response.headers.entries())
|
90
119
|
};
|
120
|
+
// 302 리디렉션 감지 - 로그인 페이지로의 리디렉션 체크
|
121
|
+
if ([301, 302, 307, 308].includes(response.status)) {
|
122
|
+
const location = response.headers.get('location') || '';
|
123
|
+
// connection의 loginPagePath와 일반적인 로그인 경로들을 체크
|
124
|
+
if (location.includes(loginPagePath) ||
|
125
|
+
location.includes('/login') ||
|
126
|
+
location.includes('/signin') ||
|
127
|
+
location.includes('/auth')) {
|
128
|
+
return {
|
129
|
+
...result,
|
130
|
+
error: `Redirected to login page: ${location}`,
|
131
|
+
redirectedToLogin: true,
|
132
|
+
location,
|
133
|
+
data: null
|
134
|
+
};
|
135
|
+
}
|
136
|
+
// 다른 리디렉션은 follow
|
137
|
+
const redirectResponse = await fetch(urlString, opts);
|
138
|
+
return {
|
139
|
+
ok: redirectResponse.ok,
|
140
|
+
status: redirectResponse.status,
|
141
|
+
statusText: redirectResponse.statusText,
|
142
|
+
headers: Object.fromEntries(redirectResponse.headers.entries()),
|
143
|
+
data: redirectResponse.ok ? await redirectResponse.json().catch(() => redirectResponse.text()).catch(() => null) : null,
|
144
|
+
error: redirectResponse.ok ? null : `HTTP ${redirectResponse.status}: ${redirectResponse.statusText}`
|
145
|
+
};
|
146
|
+
}
|
91
147
|
if (!response.ok) {
|
92
148
|
return {
|
93
149
|
...result,
|
@@ -120,38 +176,82 @@ async function executeHeadlessRequestWithRecovery(connectionName, options, conte
|
|
120
176
|
networkError: true
|
121
177
|
};
|
122
178
|
}
|
123
|
-
}, url.toString(), requestOptions);
|
179
|
+
}, url.toString(), requestOptions, loginPagePath);
|
124
180
|
// 네트워크 에러 체크 (fetch 자체가 실패한 경우)
|
125
181
|
if (response.networkError) {
|
126
182
|
if (attempt < maxRetries) {
|
127
183
|
logger.warn(`Network error detected: ${response.error}, retrying... (${attempt + 1}/${maxRetries + 1})`);
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
}
|
184
|
+
await safeReleasePageResource(pageResource, releasePage, logger);
|
185
|
+
page = null;
|
186
|
+
pageResource = null;
|
132
187
|
continue;
|
133
188
|
}
|
134
189
|
else {
|
135
|
-
|
136
|
-
await releasePage(page);
|
137
|
-
}
|
190
|
+
await safeReleasePageResource(pageResource, releasePage, logger);
|
138
191
|
throw new Error(`Network error after ${maxRetries + 1} attempts: ${response.error}`);
|
139
192
|
}
|
140
193
|
}
|
194
|
+
// 로그인 리디렉션 감지 처리
|
195
|
+
if (response.redirectedToLogin) {
|
196
|
+
if (attempt < maxRetries) {
|
197
|
+
logger.warn(`Login redirect detected: ${response.location}, performing re-authentication... (${attempt + 1}/${maxRetries + 1})`);
|
198
|
+
await safeReleasePageResource(pageResource, releasePage, logger);
|
199
|
+
// CRITICAL: 실제 재인증 수행
|
200
|
+
try {
|
201
|
+
const reauthResult = await reAuthenticateSession();
|
202
|
+
if (reauthResult && typeof reauthResult === 'object' && reauthResult.page) {
|
203
|
+
pageResource = reauthResult;
|
204
|
+
page = reauthResult.page;
|
205
|
+
}
|
206
|
+
else {
|
207
|
+
page = reauthResult;
|
208
|
+
pageResource = { page, requiresManualRelease: false };
|
209
|
+
}
|
210
|
+
logger.info(`Re-authentication successful after login redirect for connection '${connectionName}'`);
|
211
|
+
}
|
212
|
+
catch (reauthError) {
|
213
|
+
logger.error(`Re-authentication failed after login redirect for connection '${connectionName}':`, reauthError);
|
214
|
+
if (attempt === maxRetries) {
|
215
|
+
throw new Error(`Re-authentication failed after login redirect: ${reauthError.message}`);
|
216
|
+
}
|
217
|
+
continue;
|
218
|
+
}
|
219
|
+
continue;
|
220
|
+
}
|
221
|
+
else {
|
222
|
+
await safeReleasePageResource(pageResource, releasePage, logger);
|
223
|
+
throw new Error(`Login redirect after ${maxRetries + 1} attempts: ${response.error}`);
|
224
|
+
}
|
225
|
+
}
|
141
226
|
// 세션 타임아웃 관련 에러 체크
|
142
227
|
if (!response.ok && isSessionTimeoutError(response.status)) {
|
143
228
|
if (attempt < maxRetries) {
|
144
|
-
logger.warn(`Session timeout detected (${response.status}),
|
145
|
-
|
146
|
-
|
147
|
-
|
229
|
+
logger.warn(`Session timeout detected (${response.status}), performing re-authentication... (${attempt + 1}/${maxRetries + 1})`);
|
230
|
+
await safeReleasePageResource(pageResource, releasePage, logger);
|
231
|
+
// CRITICAL: 실제 재인증 수행
|
232
|
+
try {
|
233
|
+
const reauthResult = await reAuthenticateSession();
|
234
|
+
if (reauthResult && typeof reauthResult === 'object' && reauthResult.page) {
|
235
|
+
pageResource = reauthResult;
|
236
|
+
page = reauthResult.page;
|
237
|
+
}
|
238
|
+
else {
|
239
|
+
page = reauthResult;
|
240
|
+
pageResource = { page, requiresManualRelease: false };
|
241
|
+
}
|
242
|
+
logger.info(`Re-authentication successful after session timeout for connection '${connectionName}'`);
|
243
|
+
}
|
244
|
+
catch (reauthError) {
|
245
|
+
logger.error(`Re-authentication failed after session timeout for connection '${connectionName}':`, reauthError);
|
246
|
+
if (attempt === maxRetries) {
|
247
|
+
throw new Error(`Re-authentication failed after session timeout: ${reauthError.message}`);
|
248
|
+
}
|
249
|
+
continue;
|
148
250
|
}
|
149
251
|
continue;
|
150
252
|
}
|
151
253
|
else {
|
152
|
-
|
153
|
-
await releasePage(page);
|
154
|
-
}
|
254
|
+
await safeReleasePageResource(pageResource, releasePage, logger);
|
155
255
|
throw new Error(`Session timeout after ${maxRetries + 1} attempts: ${response.error}`);
|
156
256
|
}
|
157
257
|
}
|
@@ -165,18 +265,15 @@ async function executeHeadlessRequestWithRecovery(connectionName, options, conte
|
|
165
265
|
status: response.status,
|
166
266
|
headers: response.headers
|
167
267
|
};
|
168
|
-
|
169
|
-
await releasePage(page);
|
170
|
-
}
|
268
|
+
await safeReleasePageResource(pageResource, releasePage, logger);
|
171
269
|
return result;
|
172
270
|
}
|
173
271
|
catch (error) {
|
174
272
|
lastError = error;
|
175
273
|
logger.error(`Headless request attempt ${attempt + 1} failed:`, error);
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
}
|
274
|
+
await safeReleasePageResource(pageResource, releasePage, logger);
|
275
|
+
page = null;
|
276
|
+
pageResource = null;
|
180
277
|
// 세션 관련 에러가 아니거나 마지막 재시도면 에러 발생
|
181
278
|
if (!isRecoverableError(error) || attempt === maxRetries) {
|
182
279
|
throw error;
|
@@ -184,9 +281,9 @@ async function executeHeadlessRequestWithRecovery(connectionName, options, conte
|
|
184
281
|
logger.info(`Retrying request... (${attempt + 2}/${maxRetries + 1})`);
|
185
282
|
}
|
186
283
|
}
|
187
|
-
// 모든 재시도가 실패한 경우 - 혹시 남은
|
188
|
-
if (
|
189
|
-
await releasePage
|
284
|
+
// 모든 재시도가 실패한 경우 - 혹시 남은 리소스가 있으면 정리
|
285
|
+
if (pageResource) {
|
286
|
+
await safeReleasePageResource(pageResource, releasePage, logger);
|
190
287
|
}
|
191
288
|
throw lastError || new Error('Request failed after all retry attempts');
|
192
289
|
}
|
@@ -214,8 +311,70 @@ function isRecoverableError(error) {
|
|
214
311
|
'session',
|
215
312
|
'authentication',
|
216
313
|
'login',
|
217
|
-
'expired'
|
314
|
+
'expired',
|
315
|
+
'redirected to login' // 로그인 리디렉션 추가
|
218
316
|
];
|
219
317
|
return recoverableErrorKeywords.some(keyword => errorMessage.includes(keyword));
|
220
318
|
}
|
319
|
+
/**
|
320
|
+
* Safely release page resource with comprehensive error handling
|
321
|
+
*/
|
322
|
+
async function safeReleasePageResource(pageResource, releasePage, logger) {
|
323
|
+
if (!pageResource) {
|
324
|
+
return;
|
325
|
+
}
|
326
|
+
try {
|
327
|
+
// Handle different pageResource formats
|
328
|
+
if (pageResource.page) {
|
329
|
+
// This is a complex resource object {page, browser, requiresManualRelease}
|
330
|
+
const { page, browser, requiresManualRelease } = pageResource;
|
331
|
+
if (requiresManualRelease && browser) {
|
332
|
+
// Manual release required - close page first, then release browser
|
333
|
+
try {
|
334
|
+
if (page && !page.isClosed()) {
|
335
|
+
await page.close();
|
336
|
+
logger.info('Page closed during manual resource release');
|
337
|
+
}
|
338
|
+
}
|
339
|
+
catch (closeError) {
|
340
|
+
logger.error('Failed to close page during manual release:', closeError);
|
341
|
+
}
|
342
|
+
// Release browser back to pool
|
343
|
+
try {
|
344
|
+
const { getHeadlessPool } = require('../../resource-pool/headless-pool');
|
345
|
+
const pool = getHeadlessPool();
|
346
|
+
await pool.release(browser);
|
347
|
+
logger.info('Browser manually released to pool');
|
348
|
+
}
|
349
|
+
catch (releaseError) {
|
350
|
+
logger.error('Failed to manually release browser to pool:', releaseError);
|
351
|
+
}
|
352
|
+
}
|
353
|
+
else {
|
354
|
+
// Standard release through releasePage
|
355
|
+
await releasePage(page);
|
356
|
+
logger.info('Page released through standard releasePage method');
|
357
|
+
}
|
358
|
+
}
|
359
|
+
else {
|
360
|
+
// Simple page object - use standard release
|
361
|
+
await releasePage(pageResource);
|
362
|
+
logger.info('Simple page resource released');
|
363
|
+
}
|
364
|
+
}
|
365
|
+
catch (error) {
|
366
|
+
logger.error('Critical error during page resource release:', error);
|
367
|
+
// Last resort: try to force close the page if it exists
|
368
|
+
try {
|
369
|
+
const page = pageResource.page || pageResource;
|
370
|
+
if (page && !page.isClosed()) {
|
371
|
+
await page.close();
|
372
|
+
logger.warn('Force closed page as last resort');
|
373
|
+
}
|
374
|
+
}
|
375
|
+
catch (forceCloseError) {
|
376
|
+
logger.error('Failed to force close page:', forceCloseError);
|
377
|
+
}
|
378
|
+
}
|
379
|
+
}
|
221
380
|
//# sourceMappingURL=headless-request-with-recovery.js.map
|