@clazic/urban 0.2.9 → 0.2.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/agent/skills/_dbpia-access-filter.js +109 -0
- package/src/agent/skills/_normalize.js +228 -0
- package/src/agent/skills/_registry.js +79 -0
- package/src/agent/skills/base.js +90 -0
- package/src/agent/skills/dbpia.js +678 -0
- package/src/agent/skills/nanet.js +485 -0
- package/src/agent/skills/prism.js +150 -0
|
@@ -0,0 +1,678 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DBpia 학술 DB 외부검색 스킬 (B2B 기관 라이선스)
|
|
3
|
+
*
|
|
4
|
+
* - 세션: Bootstrap → b2bLoginProc → 쿠키 보관 (TTL 25분)
|
|
5
|
+
* - 검색: POST /api/search/list (JSON)
|
|
6
|
+
* - Phase 1: search + normalize 전용 (discover·download 미지원)
|
|
7
|
+
* - 결과 필터: externalLink === true 인 항목만 반환 (원문 있는 것만)
|
|
8
|
+
*/
|
|
9
|
+
import { Agent, fetch, request } from 'undici';
|
|
10
|
+
import zlib from 'node:zlib';
|
|
11
|
+
import { promisify } from 'node:util';
|
|
12
|
+
import { Skill } from './base.js';
|
|
13
|
+
import { normalizeDbpiaItem } from './_normalize.js';
|
|
14
|
+
import { getSecureConfig } from '../../kb/db.js';
|
|
15
|
+
|
|
16
|
+
// ── 상수 ─────────────────────────────────────────────────────────────
|
|
17
|
+
|
|
18
|
+
const BASE_URL = 'https://www.dbpia.co.kr';
|
|
19
|
+
const URLS = {
|
|
20
|
+
bootstrap: `${BASE_URL}/`,
|
|
21
|
+
loginPage: `${BASE_URL}/member/b2bLogin`,
|
|
22
|
+
loginProc: `${BASE_URL}/member/b2bLoginProc`,
|
|
23
|
+
searchPage: `${BASE_URL}/search/topSearch`,
|
|
24
|
+
searchList: `${BASE_URL}/api/search/list`,
|
|
25
|
+
me: `${BASE_URL}/member/me`,
|
|
26
|
+
articleDetail: (nodeId) => `${BASE_URL}/journal/articleDetail?nodeId=${encodeURIComponent(nodeId)}`,
|
|
27
|
+
pdfView: (nodeId) => `${BASE_URL}/pdf/pdfView.do?nodeId=${encodeURIComponent(nodeId)}&width=1920`,
|
|
28
|
+
downloadData: `${BASE_URL}/download/downloadData`,
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
const SESSION_TTL_MS = 25 * 60 * 1_000; // 25분
|
|
32
|
+
const REQUEST_DELAY_MS = 800; // 요청 간 최소 간격 (ms)
|
|
33
|
+
const DEFAULT_TIMEOUT_MS = 15_000;
|
|
34
|
+
|
|
35
|
+
// Chrome 124 기반 공통 브라우저 헤더
|
|
36
|
+
const CHROME_UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36';
|
|
37
|
+
const HEADERS_NAV = {
|
|
38
|
+
'user-agent': CHROME_UA,
|
|
39
|
+
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
|
|
40
|
+
'accept-language': 'ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7',
|
|
41
|
+
'accept-encoding': 'gzip, deflate, br',
|
|
42
|
+
'sec-ch-ua': '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"',
|
|
43
|
+
'sec-ch-ua-mobile': '?0',
|
|
44
|
+
'sec-ch-ua-platform': '"macOS"',
|
|
45
|
+
'upgrade-insecure-requests': '1',
|
|
46
|
+
};
|
|
47
|
+
const HEADERS_XHR = {
|
|
48
|
+
'user-agent': CHROME_UA,
|
|
49
|
+
'accept': 'application/json, text/plain, */*',
|
|
50
|
+
'accept-language': 'ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7',
|
|
51
|
+
'accept-encoding': 'gzip, deflate, br',
|
|
52
|
+
'sec-ch-ua': '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"',
|
|
53
|
+
'sec-ch-ua-mobile': '?0',
|
|
54
|
+
'sec-ch-ua-platform': '"macOS"',
|
|
55
|
+
'sec-fetch-dest': 'empty',
|
|
56
|
+
'sec-fetch-mode': 'cors',
|
|
57
|
+
'sec-fetch-site': 'same-origin',
|
|
58
|
+
'x-requested-with': 'XMLHttpRequest',
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
// ── 압축 해제 ─────────────────────────────────────────────────────────
|
|
62
|
+
|
|
63
|
+
const _gunzip = promisify(zlib.gunzip);
|
|
64
|
+
const _brotliDecompress = promisify(zlib.brotliDecompress);
|
|
65
|
+
const _inflate = promisify(zlib.inflate);
|
|
66
|
+
|
|
67
|
+
async function readBody(res) {
|
|
68
|
+
const chunks = [];
|
|
69
|
+
for await (const chunk of res.body) chunks.push(chunk);
|
|
70
|
+
const buf = Buffer.concat(chunks);
|
|
71
|
+
const enc = (res.headers?.['content-encoding'] ?? '').toLowerCase();
|
|
72
|
+
try {
|
|
73
|
+
if (enc.includes('br')) return (await _brotliDecompress(buf)).toString('utf-8');
|
|
74
|
+
if (enc.includes('gzip')) return (await _gunzip(buf)).toString('utf-8');
|
|
75
|
+
if (enc.includes('deflate')) return (await _inflate(buf)).toString('utf-8');
|
|
76
|
+
} catch { /* 압축 해제 실패 시 raw 반환 */ }
|
|
77
|
+
return buf.toString('utf-8');
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// ── 쿠키 유틸 ────────────────────────────────────────────────────────
|
|
81
|
+
|
|
82
|
+
function parseSetCookies(headers) {
|
|
83
|
+
const raw = headers['set-cookie'];
|
|
84
|
+
if (!raw) return [];
|
|
85
|
+
return Array.isArray(raw) ? raw : [raw];
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
function mergeCookies(jar, setCookieArray) {
|
|
89
|
+
for (const line of setCookieArray) {
|
|
90
|
+
const eq = line.indexOf('=');
|
|
91
|
+
if (eq < 0) continue;
|
|
92
|
+
const name = line.slice(0, eq).trim();
|
|
93
|
+
const value = line.slice(eq + 1).split(';')[0].trim();
|
|
94
|
+
// null 값 쿠키 무시 (dbp_pxy=null 세션 리셋 방지)
|
|
95
|
+
if (value === 'null' || value === '') continue;
|
|
96
|
+
jar[name] = value;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
function cookieStr(jar) {
|
|
101
|
+
return Object.entries(jar)
|
|
102
|
+
.filter(([, v]) => v && v !== 'null')
|
|
103
|
+
.map(([k, v]) => `${k}=${v}`)
|
|
104
|
+
.join('; ');
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// ── 인증정보 로드 ─────────────────────────────────────────────────────
|
|
108
|
+
|
|
109
|
+
function loadCredentials() {
|
|
110
|
+
let b2bAcc, userPass;
|
|
111
|
+
try {
|
|
112
|
+
b2bAcc = getSecureConfig('source.dbpia.b2bAcc');
|
|
113
|
+
userPass = getSecureConfig('source.dbpia.userPass');
|
|
114
|
+
} catch { /* DB 미초기화 */ }
|
|
115
|
+
|
|
116
|
+
if (!b2bAcc || !userPass) {
|
|
117
|
+
throw new Error(
|
|
118
|
+
'DBpia 인증정보가 없습니다. 설정 → 계정 정보 화면에서 DBpia 기관 계정을 저장하세요.'
|
|
119
|
+
);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// 기관 고정값 — secure_config 우선, 없으면 기본값
|
|
123
|
+
let b2bId, b2bName, b2bLoginType;
|
|
124
|
+
try {
|
|
125
|
+
b2bId = getSecureConfig('source.dbpia.b2bId') || 'ICST00001014';
|
|
126
|
+
b2bName = getSecureConfig('source.dbpia.b2bName') || '울산연구원';
|
|
127
|
+
b2bLoginType = getSecureConfig('source.dbpia.b2bLoginType') || '151004';
|
|
128
|
+
} catch {
|
|
129
|
+
b2bId = 'ICST00001014';
|
|
130
|
+
b2bName = '울산연구원';
|
|
131
|
+
b2bLoginType = '151004';
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
return { b2bAcc, userPass, b2bId, b2bName, b2bLoginType };
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// ── 세션 클래스 ──────────────────────────────────────────────────────
|
|
138
|
+
|
|
139
|
+
class DbpiaSession {
|
|
140
|
+
constructor() {
|
|
141
|
+
this._agent = new Agent({
|
|
142
|
+
connect: { connectTimeout: 15_000 },
|
|
143
|
+
connections: 3,
|
|
144
|
+
pipelining: 1,
|
|
145
|
+
keepAliveTimeout: 30_000,
|
|
146
|
+
keepAliveMaxTimeout: 60_000,
|
|
147
|
+
maxRedirections: 5,
|
|
148
|
+
headersTimeout: DEFAULT_TIMEOUT_MS,
|
|
149
|
+
bodyTimeout: DEFAULT_TIMEOUT_MS,
|
|
150
|
+
});
|
|
151
|
+
this._cookies = {};
|
|
152
|
+
this._loginTime = null;
|
|
153
|
+
this._lastRequestTime = 0;
|
|
154
|
+
this._bootstrapReady = false;
|
|
155
|
+
this._credentials = null;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
get isExpired() {
|
|
159
|
+
return !this._loginTime || (Date.now() - this._loginTime) > SESSION_TTL_MS;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
async _throttle() {
|
|
163
|
+
const elapsed = Date.now() - this._lastRequestTime;
|
|
164
|
+
if (elapsed < REQUEST_DELAY_MS)
|
|
165
|
+
await new Promise(r => setTimeout(r, REQUEST_DELAY_MS - elapsed));
|
|
166
|
+
this._lastRequestTime = Date.now();
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/** 메인 페이지 방문으로 기본 쿠키(AWSALB, SESSION, dbp_bid 등) 수집 */
|
|
170
|
+
async _ensureBootstrap() {
|
|
171
|
+
if (this._bootstrapReady) return;
|
|
172
|
+
|
|
173
|
+
// 메인 페이지
|
|
174
|
+
const r1 = await request(URLS.bootstrap, {
|
|
175
|
+
method: 'GET',
|
|
176
|
+
dispatcher: this._agent,
|
|
177
|
+
headers: {
|
|
178
|
+
...HEADERS_NAV,
|
|
179
|
+
'sec-fetch-dest': 'document',
|
|
180
|
+
'sec-fetch-mode': 'navigate',
|
|
181
|
+
'sec-fetch-site': 'none',
|
|
182
|
+
},
|
|
183
|
+
});
|
|
184
|
+
await readBody(r1);
|
|
185
|
+
mergeCookies(this._cookies, parseSetCookies(r1.headers));
|
|
186
|
+
|
|
187
|
+
// 로그인 페이지 (dbp_bid 수집)
|
|
188
|
+
const r2 = await request(URLS.loginPage, {
|
|
189
|
+
method: 'GET',
|
|
190
|
+
dispatcher: this._agent,
|
|
191
|
+
headers: {
|
|
192
|
+
...HEADERS_NAV,
|
|
193
|
+
'sec-fetch-dest': 'document',
|
|
194
|
+
'sec-fetch-mode': 'navigate',
|
|
195
|
+
'sec-fetch-site': 'same-origin',
|
|
196
|
+
referer: URLS.bootstrap,
|
|
197
|
+
cookie: cookieStr(this._cookies),
|
|
198
|
+
},
|
|
199
|
+
});
|
|
200
|
+
await readBody(r2);
|
|
201
|
+
mergeCookies(this._cookies, parseSetCookies(r2.headers));
|
|
202
|
+
|
|
203
|
+
this._bootstrapReady = true;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
async login() {
|
|
207
|
+
if (this._loginTime && !this.isExpired) return;
|
|
208
|
+
|
|
209
|
+
if (!this._credentials) this._credentials = loadCredentials();
|
|
210
|
+
const creds = this._credentials;
|
|
211
|
+
|
|
212
|
+
await this._ensureBootstrap();
|
|
213
|
+
|
|
214
|
+
const formBody = new URLSearchParams({
|
|
215
|
+
b2bAcc: creds.b2bAcc,
|
|
216
|
+
userPass: creds.userPass,
|
|
217
|
+
b2bLoginType: creds.b2bLoginType,
|
|
218
|
+
b2bId: creds.b2bId,
|
|
219
|
+
b2bName: creds.b2bName,
|
|
220
|
+
reAuth: '',
|
|
221
|
+
}).toString();
|
|
222
|
+
|
|
223
|
+
const ctrl = new AbortController();
|
|
224
|
+
const timer = setTimeout(() => ctrl.abort(), DEFAULT_TIMEOUT_MS);
|
|
225
|
+
let text;
|
|
226
|
+
try {
|
|
227
|
+
const res = await request(URLS.loginProc, {
|
|
228
|
+
method: 'POST',
|
|
229
|
+
dispatcher: this._agent,
|
|
230
|
+
signal: ctrl.signal,
|
|
231
|
+
headers: {
|
|
232
|
+
...HEADERS_XHR,
|
|
233
|
+
'content-type': 'application/x-www-form-urlencoded',
|
|
234
|
+
referer: URLS.loginPage,
|
|
235
|
+
origin: BASE_URL,
|
|
236
|
+
cookie: cookieStr(this._cookies),
|
|
237
|
+
},
|
|
238
|
+
body: formBody,
|
|
239
|
+
});
|
|
240
|
+
text = await readBody(res);
|
|
241
|
+
mergeCookies(this._cookies, parseSetCookies(res.headers));
|
|
242
|
+
} finally {
|
|
243
|
+
clearTimeout(timer);
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
// 성공 검증: SESSION + NSSO 쿠키 확인
|
|
247
|
+
if (!this._cookies.SESSION && !this._cookies.NSSO) {
|
|
248
|
+
const preview = text?.startsWith('<') ? '(HTML 응답)' : text?.slice(0, 120);
|
|
249
|
+
throw new Error(`DBpia 로그인 실패: 세션 쿠키 없음. ${preview}`);
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
// 로그인 응답 JSON 검증 (경고만, 실패 처리 안함)
|
|
253
|
+
try {
|
|
254
|
+
const json = JSON.parse(text);
|
|
255
|
+
if (json.loginSucYN !== 'Y') {
|
|
256
|
+
console.warn(`[dbpia] 로그인 응답 loginSucYN=${json.loginSucYN}`);
|
|
257
|
+
}
|
|
258
|
+
} catch { /* JSON 아닐 수 있음 */ }
|
|
259
|
+
|
|
260
|
+
this._loginTime = Date.now();
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
cookieHeader() { return cookieStr(this._cookies); }
|
|
264
|
+
|
|
265
|
+
invalidate() {
|
|
266
|
+
this._cookies = {};
|
|
267
|
+
this._loginTime = null;
|
|
268
|
+
this._bootstrapReady = false;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
async close() { await this._agent.close(); }
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
// ── 싱글턴 세션 ──────────────────────────────────────────────────────
|
|
275
|
+
|
|
276
|
+
let _session = null;
|
|
277
|
+
function getSession() {
|
|
278
|
+
if (!_session) _session = new DbpiaSession();
|
|
279
|
+
return _session;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
/** 인증정보 변경 시 기존 세션 무효화 (server.js에서 호출) */
|
|
283
|
+
export function invalidateDbpiaSession() {
|
|
284
|
+
if (_session) _session.invalidate();
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
// ── 검색 실행 ────────────────────────────────────────────────────────
|
|
288
|
+
|
|
289
|
+
async function _doSearch(query, limit, page) {
|
|
290
|
+
const session = getSession();
|
|
291
|
+
await session.login();
|
|
292
|
+
await session._throttle();
|
|
293
|
+
|
|
294
|
+
const payload = JSON.stringify({
|
|
295
|
+
query,
|
|
296
|
+
page,
|
|
297
|
+
pageSize: Math.min(limit * 3, 100), // 파일 없는 결과 필터 후에도 충분히 남도록
|
|
298
|
+
sort: 'RANK',
|
|
299
|
+
searchType: 'ALL',
|
|
300
|
+
dataTypes: [],
|
|
301
|
+
language: 'KO',
|
|
302
|
+
isFullText: false,
|
|
303
|
+
isAccessible: false,
|
|
304
|
+
accessOption: 'ALL',
|
|
305
|
+
subjectCodes: [],
|
|
306
|
+
pbshYears: [],
|
|
307
|
+
plctNames: [],
|
|
308
|
+
publicationInfos: [],
|
|
309
|
+
collection: 'ALL',
|
|
310
|
+
includeAr: false,
|
|
311
|
+
collectionQuery: [],
|
|
312
|
+
filter: [],
|
|
313
|
+
prefix: [],
|
|
314
|
+
realQuery: '',
|
|
315
|
+
startDate: '1970.01.01',
|
|
316
|
+
endDate: '',
|
|
317
|
+
originText: false,
|
|
318
|
+
});
|
|
319
|
+
|
|
320
|
+
const referer = `${URLS.searchPage}?query=${encodeURIComponent(query)}`;
|
|
321
|
+
const ctrl = new AbortController();
|
|
322
|
+
const timer = setTimeout(() => ctrl.abort(), DEFAULT_TIMEOUT_MS);
|
|
323
|
+
|
|
324
|
+
let text;
|
|
325
|
+
try {
|
|
326
|
+
const res = await request(URLS.searchList, {
|
|
327
|
+
method: 'POST',
|
|
328
|
+
dispatcher: session._agent,
|
|
329
|
+
signal: ctrl.signal,
|
|
330
|
+
headers: {
|
|
331
|
+
...HEADERS_XHR,
|
|
332
|
+
'content-type': 'application/json',
|
|
333
|
+
referer,
|
|
334
|
+
origin: BASE_URL,
|
|
335
|
+
cookie: session.cookieHeader(),
|
|
336
|
+
},
|
|
337
|
+
body: payload,
|
|
338
|
+
});
|
|
339
|
+
text = await readBody(res);
|
|
340
|
+
mergeCookies(session._cookies, parseSetCookies(res.headers));
|
|
341
|
+
|
|
342
|
+
if (!res.statusCode || res.statusCode >= 400) {
|
|
343
|
+
throw new Error(`DBpia 검색 HTTP ${res.statusCode}`);
|
|
344
|
+
}
|
|
345
|
+
} finally {
|
|
346
|
+
clearTimeout(timer);
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
// 세션 만료 감지 (HTML 응답)
|
|
350
|
+
if (text.startsWith('<!') || text.startsWith('<html') || text.startsWith('<HTML')) {
|
|
351
|
+
session.invalidate();
|
|
352
|
+
throw new Error('DBpia 세션 만료: 서버가 HTML 페이지를 반환했습니다');
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
let json;
|
|
356
|
+
try { json = JSON.parse(text); }
|
|
357
|
+
catch { throw new Error(`DBpia 검색 JSON 파싱 실패: ${text.slice(0, 120)}`); }
|
|
358
|
+
|
|
359
|
+
const rawList = json?.list ?? [];
|
|
360
|
+
const total = json?.pageInfo?.totalCount ?? rawList.length;
|
|
361
|
+
|
|
362
|
+
return { rawList, total };
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
// ── Skill 클래스 ─────────────────────────────────────────────────────
|
|
366
|
+
|
|
367
|
+
export class DbpiaSkill extends Skill {
|
|
368
|
+
static manifest = {
|
|
369
|
+
id: 'dbpia',
|
|
370
|
+
name: 'DBpia (학술 DB)',
|
|
371
|
+
version: '1.0.0',
|
|
372
|
+
defaultCron: null, // 정기 수집 대상 아님
|
|
373
|
+
defaultKeywords: [
|
|
374
|
+
'도시기본계획', '도시재생', '스마트시티', '지속가능발전',
|
|
375
|
+
'지역경제', '주택정책', '교통계획',
|
|
376
|
+
],
|
|
377
|
+
rateLimit: { rps: 1, burst: 2 },
|
|
378
|
+
requiredEnv: [],
|
|
379
|
+
capabilities: ['search', 'normalize', 'download'], // B2B 기관 세션으로 원문 다운로드 지원
|
|
380
|
+
};
|
|
381
|
+
|
|
382
|
+
get meta() {
|
|
383
|
+
return {
|
|
384
|
+
sessionTtlMs: SESSION_TTL_MS,
|
|
385
|
+
minRequestIntervalMs: REQUEST_DELAY_MS,
|
|
386
|
+
earlyStopConsecutiveEmpty: 3,
|
|
387
|
+
supportsRangeRequest: false,
|
|
388
|
+
};
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
/** DB 주입 (access cache upsert/조회용) — server.js 에서 호출 */
|
|
392
|
+
setDb(db) {
|
|
393
|
+
this._db = db;
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
_b2bId() {
|
|
397
|
+
try {
|
|
398
|
+
return getSecureConfig('source.dbpia.b2bId') || 'ICST00001014';
|
|
399
|
+
} catch {
|
|
400
|
+
return 'ICST00001014';
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
async ensureSession() {
|
|
405
|
+
await getSession().login();
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
invalidateSession() {
|
|
409
|
+
getSession().invalidate();
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
/**
|
|
413
|
+
* DBpia 학술 검색 (UI 실시간 검색 전용)
|
|
414
|
+
* - 학위논문(T*) 제외: downloadData HTTP 500 구조적 실패 (Sonnet §9.1)
|
|
415
|
+
* - access cache 기반 필터 (Phase 1): db 주입 시 strict=true 에서는 status='ok' 만 통과
|
|
416
|
+
* - externalLink 는 필터 조건으로 사용하지 않음 (판별력 없음, VOIS 연계 포함)
|
|
417
|
+
*/
|
|
418
|
+
async search(query, { limit = 20, page = 1, db = null, strict = true } = {}) {
|
|
419
|
+
let rawList, total;
|
|
420
|
+
try {
|
|
421
|
+
({ rawList, total } = await _doSearch(query, limit, page));
|
|
422
|
+
} catch (err) {
|
|
423
|
+
if (err.message.includes('세션 만료')) {
|
|
424
|
+
// 1회 자동 재시도
|
|
425
|
+
({ rawList, total } = await _doSearch(query, limit, page));
|
|
426
|
+
} else {
|
|
427
|
+
throw err;
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
const seen = new Set();
|
|
432
|
+
let items = rawList
|
|
433
|
+
.filter(it => !String(it.nodeId || '').startsWith('T')) // AD-2: 학위논문 차단
|
|
434
|
+
.map(it => normalizeDbpiaItem(it, query))
|
|
435
|
+
.filter(item => {
|
|
436
|
+
if (seen.has(item.docid)) return false;
|
|
437
|
+
seen.add(item.docid);
|
|
438
|
+
return true;
|
|
439
|
+
});
|
|
440
|
+
|
|
441
|
+
// Phase 1: access cache 적용 (db 미주입 시 노옵)
|
|
442
|
+
if (db) {
|
|
443
|
+
try {
|
|
444
|
+
const { filterByAccessCache } = await import('./_dbpia-access-filter.js');
|
|
445
|
+
const b2bId = getSecureConfig('source.dbpia.b2bId') || 'ICST00001014';
|
|
446
|
+
items = filterByAccessCache(items, { db, strict, b2bId });
|
|
447
|
+
} catch (e) {
|
|
448
|
+
console.warn('[dbpia] access cache 필터 미적용:', e.message);
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
return items.slice(0, limit);
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
/** discover: Phase 1 미지원 — 빈 배열 반환 */
|
|
456
|
+
async discover({ keywords = [], limit = 50 } = {}) {
|
|
457
|
+
return [];
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
/**
|
|
461
|
+
* DBpia 원문 PDF 다운로드 (B2B 기관 세션)
|
|
462
|
+
*
|
|
463
|
+
* 흐름:
|
|
464
|
+
* 1) articleDetail 방문 (Referer 체인 + 세션 워밍)
|
|
465
|
+
* 2) pdfView 방문 (뷰어 페이지 — 추가 세션 토큰 발급)
|
|
466
|
+
* 3) POST /download/downloadData (form-urlencoded) → JSON { link: "<download.dbpia.co.kr/File/Download?key=...>" }
|
|
467
|
+
* 4) GET link → PDF 바이너리 Response 반환
|
|
468
|
+
*/
|
|
469
|
+
async openDownloadResponse(item, { signal } = {}) {
|
|
470
|
+
const session = getSession();
|
|
471
|
+
await session.login();
|
|
472
|
+
|
|
473
|
+
const nodeId = item.nodeId || (item.docid?.startsWith('dbpia-') ? item.docid.slice(6) : '');
|
|
474
|
+
if (!nodeId) {
|
|
475
|
+
throw new Error(`DBpia 다운로드: nodeId 없음 (docid=${item.docid})`);
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
// Phase 1: access cache upsert 준비 (db 주입 시에만)
|
|
479
|
+
const db = this._db;
|
|
480
|
+
const b2bId = this._b2bId();
|
|
481
|
+
const cacheUpsert = db
|
|
482
|
+
? async (status, reason, extra = {}) => {
|
|
483
|
+
try {
|
|
484
|
+
const { upsertAccessCache } = await import('./_dbpia-access-filter.js');
|
|
485
|
+
upsertAccessCache(db, { nodeId, b2bId, status, reason, ...extra });
|
|
486
|
+
} catch (e) { console.warn('[dbpia] cache upsert 실패:', e.message); }
|
|
487
|
+
}
|
|
488
|
+
: async () => {};
|
|
489
|
+
|
|
490
|
+
// 1) articleDetail 방문
|
|
491
|
+
await session._throttle();
|
|
492
|
+
{
|
|
493
|
+
const r = await request(URLS.articleDetail(nodeId), {
|
|
494
|
+
method: 'GET',
|
|
495
|
+
dispatcher: session._agent,
|
|
496
|
+
headers: {
|
|
497
|
+
...HEADERS_NAV,
|
|
498
|
+
'sec-fetch-dest': 'document',
|
|
499
|
+
'sec-fetch-mode': 'navigate',
|
|
500
|
+
'sec-fetch-site': 'same-origin',
|
|
501
|
+
referer: URLS.searchPage,
|
|
502
|
+
cookie: session.cookieHeader(),
|
|
503
|
+
},
|
|
504
|
+
});
|
|
505
|
+
await readBody(r);
|
|
506
|
+
mergeCookies(session._cookies, parseSetCookies(r.headers));
|
|
507
|
+
if (!session._cookies.SESSION && !session._cookies.NSSO) {
|
|
508
|
+
session.invalidate();
|
|
509
|
+
throw new Error('DBpia 세션 만료: articleDetail 방문 실패');
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
// 2) pdfView 방문 (뷰어 페이지 — 세션 쿠키 갱신·foreignIP 등 설정)
|
|
514
|
+
await session._throttle();
|
|
515
|
+
{
|
|
516
|
+
const r = await request(URLS.pdfView(nodeId), {
|
|
517
|
+
method: 'GET',
|
|
518
|
+
dispatcher: session._agent,
|
|
519
|
+
headers: {
|
|
520
|
+
...HEADERS_NAV,
|
|
521
|
+
'sec-fetch-dest': 'document',
|
|
522
|
+
'sec-fetch-mode': 'navigate',
|
|
523
|
+
'sec-fetch-site': 'same-origin',
|
|
524
|
+
referer: URLS.articleDetail(nodeId),
|
|
525
|
+
cookie: session.cookieHeader(),
|
|
526
|
+
},
|
|
527
|
+
});
|
|
528
|
+
await readBody(r);
|
|
529
|
+
mergeCookies(session._cookies, parseSetCookies(r.headers));
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
// 3) POST /download/downloadData → JSON { link }
|
|
533
|
+
await session._throttle();
|
|
534
|
+
let downloadLink;
|
|
535
|
+
{
|
|
536
|
+
const body = new URLSearchParams({
|
|
537
|
+
nodeId,
|
|
538
|
+
systemCode: '',
|
|
539
|
+
depth: 'Article',
|
|
540
|
+
shape: 'pdf',
|
|
541
|
+
}).toString();
|
|
542
|
+
const r = await request(URLS.downloadData, {
|
|
543
|
+
method: 'POST',
|
|
544
|
+
dispatcher: session._agent,
|
|
545
|
+
headers: {
|
|
546
|
+
...HEADERS_XHR,
|
|
547
|
+
'content-type': 'application/x-www-form-urlencoded',
|
|
548
|
+
origin: BASE_URL,
|
|
549
|
+
referer: URLS.pdfView(nodeId),
|
|
550
|
+
'x-referer': URLS.articleDetail(nodeId),
|
|
551
|
+
cookie: session.cookieHeader(),
|
|
552
|
+
},
|
|
553
|
+
body,
|
|
554
|
+
});
|
|
555
|
+
const text = await readBody(r);
|
|
556
|
+
mergeCookies(session._cookies, parseSetCookies(r.headers));
|
|
557
|
+
|
|
558
|
+
// 학위논문(T*) 등은 HTTP 500 반환 — 구조적 미지원 (nonRetryable)
|
|
559
|
+
if (r.statusCode >= 500) {
|
|
560
|
+
await cacheUpsert('thesis_unsupported', `HTTP ${r.statusCode}`);
|
|
561
|
+
const err = new Error(`DBpia downloadData HTTP ${r.statusCode}: 이 자료 타입은 원문 다운로드 미지원 (nodeId=${nodeId})`);
|
|
562
|
+
err.nonRetryable = true;
|
|
563
|
+
err.reason = 'thesis_unsupported';
|
|
564
|
+
throw err;
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
let json;
|
|
568
|
+
try { json = JSON.parse(text); }
|
|
569
|
+
catch {
|
|
570
|
+
await cacheUpsert('link_missing', 'JSON 파싱 실패');
|
|
571
|
+
const err = new Error(`DBpia downloadData JSON 파싱 실패: ${text.slice(0, 120)}`);
|
|
572
|
+
err.nonRetryable = true;
|
|
573
|
+
err.reason = 'link_missing';
|
|
574
|
+
throw err;
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
downloadLink = json.link;
|
|
578
|
+
if (!downloadLink) {
|
|
579
|
+
const msg = json.msg || json.resultCode || '원문 링크 없음';
|
|
580
|
+
if (json.isNotDownloadPlct === 'true') {
|
|
581
|
+
await cacheUpsert('publisher_disallow', msg);
|
|
582
|
+
const err = new Error(`DBpia 다운로드 불가: 해당 출판사가 다운로드를 허용하지 않음 (nodeId=${nodeId})`);
|
|
583
|
+
err.nonRetryable = true;
|
|
584
|
+
err.reason = 'publisher_disallow';
|
|
585
|
+
throw err;
|
|
586
|
+
}
|
|
587
|
+
await cacheUpsert('link_missing', msg);
|
|
588
|
+
const err = new Error(`DBpia 다운로드 불가: ${msg} (nodeId=${nodeId}, showAlert=${json.showAlert_YN})`);
|
|
589
|
+
err.nonRetryable = true;
|
|
590
|
+
err.reason = 'link_missing';
|
|
591
|
+
throw err;
|
|
592
|
+
}
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
// 4) ★ GET /pdfviewer/web/viewer.html?file=<encoded_link> (iframe 뷰어 워밍)
|
|
596
|
+
// — 이 단계에서 download.dbpia.co.kr 접근에 필요한 dbp_auk 쿠키가 세팅됨
|
|
597
|
+
// — 이 단계를 건너뛰면 download.dbpia.co.kr GET 시 HTML(접근 제한 페이지) 반환
|
|
598
|
+
const viewerFullUrl = `${BASE_URL}/pdfviewer/web/viewer.html?file=${encodeURIComponent(downloadLink)}`;
|
|
599
|
+
await session._throttle();
|
|
600
|
+
{
|
|
601
|
+
const r = await request(viewerFullUrl, {
|
|
602
|
+
method: 'GET',
|
|
603
|
+
dispatcher: session._agent,
|
|
604
|
+
headers: {
|
|
605
|
+
...HEADERS_NAV,
|
|
606
|
+
'sec-fetch-dest': 'iframe',
|
|
607
|
+
'sec-fetch-mode': 'navigate',
|
|
608
|
+
'sec-fetch-site': 'same-origin',
|
|
609
|
+
referer: URLS.pdfView(nodeId),
|
|
610
|
+
cookie: session.cookieHeader(),
|
|
611
|
+
},
|
|
612
|
+
});
|
|
613
|
+
await readBody(r);
|
|
614
|
+
mergeCookies(session._cookies, parseSetCookies(r.headers));
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
// 5) GET {downloadLink} → PDF 바이너리 Response
|
|
618
|
+
await session._throttle();
|
|
619
|
+
const ctrl = signal ? null : new AbortController();
|
|
620
|
+
const timer = ctrl ? setTimeout(() => ctrl.abort(), 300_000) : null;
|
|
621
|
+
try {
|
|
622
|
+
const res = await fetch(downloadLink, {
|
|
623
|
+
method: 'GET',
|
|
624
|
+
dispatcher: session._agent,
|
|
625
|
+
redirect: 'follow',
|
|
626
|
+
signal: signal ?? ctrl.signal,
|
|
627
|
+
headers: {
|
|
628
|
+
'user-agent': CHROME_UA,
|
|
629
|
+
'accept': 'application/pdf,application/octet-stream,*/*',
|
|
630
|
+
'accept-language': 'ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7',
|
|
631
|
+
'accept-encoding': 'identity',
|
|
632
|
+
'sec-fetch-dest': 'empty', // pdf.js 시뮬레이션 (cors 모드)
|
|
633
|
+
'sec-fetch-mode': 'cors',
|
|
634
|
+
'sec-fetch-site': 'same-site',
|
|
635
|
+
origin: BASE_URL,
|
|
636
|
+
referer: viewerFullUrl, // iframe viewer.html을 Referer로
|
|
637
|
+
cookie: session.cookieHeader(),
|
|
638
|
+
},
|
|
639
|
+
});
|
|
640
|
+
|
|
641
|
+
if (res.status === 401 || res.status === 403) {
|
|
642
|
+
session.invalidate();
|
|
643
|
+
await cacheUpsert('license_out', `HTTP ${res.status}`);
|
|
644
|
+
const err = new Error(`DBpia 원문 권한 없음 (HTTP ${res.status})`);
|
|
645
|
+
err.nonRetryable = true;
|
|
646
|
+
err.reason = 'license_out_of_scope';
|
|
647
|
+
throw err;
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
// 200 + text/html 선제 분류 — 라이선스 범위 밖 접근 제한 페이지 반환
|
|
651
|
+
// (downloader.js 가 재감지하면 invalidateSession 이 잘못 호출될 수 있으므로 여기서 차단)
|
|
652
|
+
const ctype = res.headers.get('content-type') || '';
|
|
653
|
+
if (/^text\/html/i.test(ctype)) {
|
|
654
|
+
await cacheUpsert('license_out', 'HTML 응답', { contentType: ctype });
|
|
655
|
+
const err = new Error('DBpia: 기관 라이선스 범위 밖 또는 외부 연계자료');
|
|
656
|
+
err.nonRetryable = true;
|
|
657
|
+
err.reason = 'license_out_of_scope';
|
|
658
|
+
// 세션은 유효 — invalidate 하지 않음
|
|
659
|
+
throw err;
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
// 성공 경로 — 응답은 downloader 가 stream 처리하므로 여기서 cache 는 적재 안 함
|
|
663
|
+
// (downloader 완료 후 별도 훅에서 적재해야 정확한 bytes 기록. 일단 'ok' 를 선제 기록)
|
|
664
|
+
const cl = Number(res.headers.get('content-length')) || null;
|
|
665
|
+
await cacheUpsert('ok', null, { contentType: ctype, bytes: cl });
|
|
666
|
+
|
|
667
|
+
return res;
|
|
668
|
+
} finally {
|
|
669
|
+
if (timer) clearTimeout(timer);
|
|
670
|
+
}
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
normalize(item, _unused, query) {
|
|
674
|
+
return normalizeDbpiaItem(item, query);
|
|
675
|
+
}
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
export default DbpiaSkill;
|