search_paper 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,210 @@
1
+ import { GhostClient } from 'ghostfetch';
2
+
3
+ type SourceType = 'google_scholar' | 'semantic_scholar' | 'arxiv';
4
+ interface Author {
5
+ name: string;
6
+ affiliations?: string[];
7
+ scholarId?: string;
8
+ isFirstAuthor?: boolean;
9
+ isCorresponding?: boolean;
10
+ }
11
+ interface JournalInfo {
12
+ name?: string;
13
+ volume?: string;
14
+ issue?: string;
15
+ }
16
+ interface Section {
17
+ heading: string;
18
+ level: number;
19
+ content?: string;
20
+ }
21
+ interface Figure {
22
+ label: string;
23
+ caption?: string;
24
+ filePath?: string;
25
+ }
26
+ interface Table {
27
+ label: string;
28
+ caption?: string;
29
+ headers?: string[];
30
+ rows?: string[][];
31
+ }
32
+ interface Paper {
33
+ title: string;
34
+ authors: Author[];
35
+ abstract?: string;
36
+ year?: number;
37
+ publicationDate?: string;
38
+ venue?: string;
39
+ doi?: string;
40
+ url: string;
41
+ canonicalUrl?: string;
42
+ pdfUrl?: string;
43
+ citationCount?: number;
44
+ impactFactor?: number;
45
+ source: SourceType;
46
+ sourceId?: string;
47
+ keywords?: string[];
48
+ tags?: string[];
49
+ references?: string[];
50
+ journal?: JournalInfo;
51
+ sections?: Section[];
52
+ figures?: Figure[];
53
+ tables?: Table[];
54
+ }
55
+ interface SearchResult {
56
+ query: string;
57
+ totalResults?: number;
58
+ papers: Paper[];
59
+ nextPageToken?: string;
60
+ source: SourceType;
61
+ errors?: SourceError[];
62
+ }
63
+ interface SourceError {
64
+ source: SourceType;
65
+ message: string;
66
+ code?: 'RATE_LIMITED' | 'CAPTCHA' | 'TIMEOUT' | 'NETWORK_ERROR' | 'PARSE_ERROR' | 'UNKNOWN';
67
+ }
68
+ interface SearchOptions {
69
+ sources?: SourceType[];
70
+ limit?: number;
71
+ offset?: number;
72
+ year?: {
73
+ from?: number;
74
+ to?: number;
75
+ };
76
+ sort?: 'relevance' | 'date' | 'citations';
77
+ fields?: (keyof Paper)[];
78
+ }
79
+ interface ClientOptions {
80
+ browser?: string;
81
+ timeout?: number;
82
+ proxy?: string;
83
+ proxyPool?: string[];
84
+ semanticScholarApiKey?: string;
85
+ }
86
+
87
+ interface PaperSource {
88
+ readonly name: SourceType;
89
+ search(query: string, options?: SearchOptions): Promise<SearchResult>;
90
+ getPaper(id: string): Promise<Paper | null>;
91
+ }
92
+ interface CitationSource extends PaperSource {
93
+ getCitations(id: string, options?: SearchOptions): Promise<SearchResult>;
94
+ getReferences(id: string, options?: SearchOptions): Promise<SearchResult>;
95
+ }
96
+
97
+ declare function searchPapers(query: string, options?: SearchOptions & {
98
+ client?: ClientOptions;
99
+ }): Promise<SearchResult>;
100
+
101
+ declare function getPaper(doi: string, options?: {
102
+ client?: ClientOptions;
103
+ }): Promise<Paper | null>;
104
+
105
+ /**
106
+ * 논문의 전체 상세 정보를 가져옴
107
+ * - S2 API: 메타데이터 (DOI, 저널, 키워드, 인용수 등)
108
+ * - ar5iv HTML: 본문 구조 (섹션, 그림, 표, 저자 소속)
109
+ *
110
+ * @param id - arXiv ID 또는 DOI
111
+ */
112
+ declare function getFullPaper(id: string, options?: {
113
+ client?: ClientOptions;
114
+ }): Promise<Paper | null>;
115
+
116
+ declare function createClient(options?: ClientOptions): GhostClient;
117
+
118
+ declare class SemanticScholarSource implements CitationSource {
119
+ readonly name: "semantic_scholar";
120
+ private readonly client;
121
+ private readonly apiKey?;
122
+ private readonly rateLimiter;
123
+ constructor(client: GhostClient, options?: ClientOptions);
124
+ search(query: string, options?: SearchOptions): Promise<SearchResult>;
125
+ getPaper(id: string): Promise<Paper | null>;
126
+ getPapers(ids: string[]): Promise<(Paper | null)[]>;
127
+ getCitations(id: string, options?: SearchOptions): Promise<SearchResult>;
128
+ getReferences(id: string, options?: SearchOptions): Promise<SearchResult>;
129
+ private fetchApi;
130
+ }
131
+
132
+ declare class RateLimiter {
133
+ private minIntervalMs;
134
+ private jitterMs;
135
+ private lastRequest;
136
+ constructor(minIntervalMs: number, jitterMs?: number);
137
+ wait(): Promise<void>;
138
+ }
139
+ /**
140
+ * 적응형 Rate Limiter
141
+ * - 차단(CAPTCHA 등) 감지 시 딜레이를 지수적으로 증가
142
+ * - 성공 시 점진적으로 기본값으로 복귀
143
+ */
144
+ declare class AdaptiveRateLimiter {
145
+ private readonly baseIntervalMs;
146
+ private readonly baseJitterMs;
147
+ private readonly maxIntervalMs;
148
+ private readonly backoffMultiplier;
149
+ /** 연속 성공 몇 회 후 딜레이를 줄일지 */
150
+ private readonly cooldownAfter;
151
+ /** 딜레이 감소 비율 (0.7 = 30% 감소) */
152
+ private readonly cooldownFactor;
153
+ private lastRequest;
154
+ private currentIntervalMs;
155
+ private consecutiveSuccesses;
156
+ constructor(baseIntervalMs: number, baseJitterMs?: number, maxIntervalMs?: number, backoffMultiplier?: number,
157
+ /** 연속 성공 몇 회 후 딜레이를 줄일지 */
158
+ cooldownAfter?: number,
159
+ /** 딜레이 감소 비율 (0.7 = 30% 감소) */
160
+ cooldownFactor?: number);
161
+ private consecutiveBlocks;
162
+ /** 현재 적용 중인 인터벌 (ms) */
163
+ get interval(): number;
164
+ /** 현재 차단 상태인지 (연속 차단 2회 이상이면 true) */
165
+ get isBlocked(): boolean;
166
+ wait(): Promise<void>;
167
+ /** 요청 성공 시 호출 → 점진적으로 딜레이 감소 */
168
+ success(): void;
169
+ /** 차단 감지 시 호출 → 딜레이를 지수적으로 증가 */
170
+ backoff(): void;
171
+ }
172
+
173
+ declare class GoogleScholarSource implements PaperSource {
174
+ readonly name: "google_scholar";
175
+ private readonly client;
176
+ private readonly rateLimiter;
177
+ constructor(client: GhostClient, rateLimiter?: AdaptiveRateLimiter);
178
+ search(query: string, options?: SearchOptions): Promise<SearchResult>;
179
+ getPaper(id: string): Promise<Paper | null>;
180
+ }
181
+
182
+ declare class ArxivSource implements PaperSource {
183
+ readonly name: "arxiv";
184
+ private readonly client;
185
+ private readonly rateLimiter;
186
+ constructor(client: GhostClient);
187
+ search(query: string, options?: SearchOptions): Promise<SearchResult>;
188
+ getPaper(id: string): Promise<Paper | null>;
189
+ /**
190
+ * arXiv 논문의 전체 내용을 가져옴 (메타데이터 + ar5iv HTML 파싱)
191
+ * - API에서 기본 메타데이터 조회
192
+ * - ar5iv HTML에서 저자(소속 포함), 섹션, 그림, 표 파싱
193
+ */
194
+ getFullPaper(id: string): Promise<Paper | null>;
195
+ }
196
+
197
+ declare function withRetry<T>(fn: () => Promise<T>, options?: {
198
+ maxRetries?: number;
199
+ baseDelayMs?: number;
200
+ }): Promise<T>;
201
+
202
+ declare class HttpError extends Error {
203
+ readonly status: number;
204
+ readonly statusText: string;
205
+ constructor(status: number, statusText: string);
206
+ }
207
+
208
+ declare function deduplicatePapers(papers: Paper[]): Paper[];
209
+
210
+ export { AdaptiveRateLimiter, ArxivSource, type Author, type CitationSource, type ClientOptions, type Figure, GoogleScholarSource, HttpError, type JournalInfo, type Paper, type PaperSource, RateLimiter, type SearchOptions, type SearchResult, type Section, SemanticScholarSource, type SourceError, type SourceType, type Table, createClient, deduplicatePapers, getFullPaper, getPaper, searchPapers, withRetry };
@@ -0,0 +1,210 @@
1
+ import { GhostClient } from 'ghostfetch';
2
+
3
+ type SourceType = 'google_scholar' | 'semantic_scholar' | 'arxiv';
4
+ interface Author {
5
+ name: string;
6
+ affiliations?: string[];
7
+ scholarId?: string;
8
+ isFirstAuthor?: boolean;
9
+ isCorresponding?: boolean;
10
+ }
11
+ interface JournalInfo {
12
+ name?: string;
13
+ volume?: string;
14
+ issue?: string;
15
+ }
16
+ interface Section {
17
+ heading: string;
18
+ level: number;
19
+ content?: string;
20
+ }
21
+ interface Figure {
22
+ label: string;
23
+ caption?: string;
24
+ filePath?: string;
25
+ }
26
+ interface Table {
27
+ label: string;
28
+ caption?: string;
29
+ headers?: string[];
30
+ rows?: string[][];
31
+ }
32
+ interface Paper {
33
+ title: string;
34
+ authors: Author[];
35
+ abstract?: string;
36
+ year?: number;
37
+ publicationDate?: string;
38
+ venue?: string;
39
+ doi?: string;
40
+ url: string;
41
+ canonicalUrl?: string;
42
+ pdfUrl?: string;
43
+ citationCount?: number;
44
+ impactFactor?: number;
45
+ source: SourceType;
46
+ sourceId?: string;
47
+ keywords?: string[];
48
+ tags?: string[];
49
+ references?: string[];
50
+ journal?: JournalInfo;
51
+ sections?: Section[];
52
+ figures?: Figure[];
53
+ tables?: Table[];
54
+ }
55
+ interface SearchResult {
56
+ query: string;
57
+ totalResults?: number;
58
+ papers: Paper[];
59
+ nextPageToken?: string;
60
+ source: SourceType;
61
+ errors?: SourceError[];
62
+ }
63
+ interface SourceError {
64
+ source: SourceType;
65
+ message: string;
66
+ code?: 'RATE_LIMITED' | 'CAPTCHA' | 'TIMEOUT' | 'NETWORK_ERROR' | 'PARSE_ERROR' | 'UNKNOWN';
67
+ }
68
+ interface SearchOptions {
69
+ sources?: SourceType[];
70
+ limit?: number;
71
+ offset?: number;
72
+ year?: {
73
+ from?: number;
74
+ to?: number;
75
+ };
76
+ sort?: 'relevance' | 'date' | 'citations';
77
+ fields?: (keyof Paper)[];
78
+ }
79
+ interface ClientOptions {
80
+ browser?: string;
81
+ timeout?: number;
82
+ proxy?: string;
83
+ proxyPool?: string[];
84
+ semanticScholarApiKey?: string;
85
+ }
86
+
87
+ interface PaperSource {
88
+ readonly name: SourceType;
89
+ search(query: string, options?: SearchOptions): Promise<SearchResult>;
90
+ getPaper(id: string): Promise<Paper | null>;
91
+ }
92
+ interface CitationSource extends PaperSource {
93
+ getCitations(id: string, options?: SearchOptions): Promise<SearchResult>;
94
+ getReferences(id: string, options?: SearchOptions): Promise<SearchResult>;
95
+ }
96
+
97
+ declare function searchPapers(query: string, options?: SearchOptions & {
98
+ client?: ClientOptions;
99
+ }): Promise<SearchResult>;
100
+
101
+ declare function getPaper(doi: string, options?: {
102
+ client?: ClientOptions;
103
+ }): Promise<Paper | null>;
104
+
105
+ /**
106
+ * 논문의 전체 상세 정보를 가져옴
107
+ * - S2 API: 메타데이터 (DOI, 저널, 키워드, 인용수 등)
108
+ * - ar5iv HTML: 본문 구조 (섹션, 그림, 표, 저자 소속)
109
+ *
110
+ * @param id - arXiv ID 또는 DOI
111
+ */
112
+ declare function getFullPaper(id: string, options?: {
113
+ client?: ClientOptions;
114
+ }): Promise<Paper | null>;
115
+
116
+ declare function createClient(options?: ClientOptions): GhostClient;
117
+
118
+ declare class SemanticScholarSource implements CitationSource {
119
+ readonly name: "semantic_scholar";
120
+ private readonly client;
121
+ private readonly apiKey?;
122
+ private readonly rateLimiter;
123
+ constructor(client: GhostClient, options?: ClientOptions);
124
+ search(query: string, options?: SearchOptions): Promise<SearchResult>;
125
+ getPaper(id: string): Promise<Paper | null>;
126
+ getPapers(ids: string[]): Promise<(Paper | null)[]>;
127
+ getCitations(id: string, options?: SearchOptions): Promise<SearchResult>;
128
+ getReferences(id: string, options?: SearchOptions): Promise<SearchResult>;
129
+ private fetchApi;
130
+ }
131
+
132
+ declare class RateLimiter {
133
+ private minIntervalMs;
134
+ private jitterMs;
135
+ private lastRequest;
136
+ constructor(minIntervalMs: number, jitterMs?: number);
137
+ wait(): Promise<void>;
138
+ }
139
+ /**
140
+ * 적응형 Rate Limiter
141
+ * - 차단(CAPTCHA 등) 감지 시 딜레이를 지수적으로 증가
142
+ * - 성공 시 점진적으로 기본값으로 복귀
143
+ */
144
+ declare class AdaptiveRateLimiter {
145
+ private readonly baseIntervalMs;
146
+ private readonly baseJitterMs;
147
+ private readonly maxIntervalMs;
148
+ private readonly backoffMultiplier;
149
+ /** 연속 성공 몇 회 후 딜레이를 줄일지 */
150
+ private readonly cooldownAfter;
151
+ /** 딜레이 감소 비율 (0.7 = 30% 감소) */
152
+ private readonly cooldownFactor;
153
+ private lastRequest;
154
+ private currentIntervalMs;
155
+ private consecutiveSuccesses;
156
+ constructor(baseIntervalMs: number, baseJitterMs?: number, maxIntervalMs?: number, backoffMultiplier?: number,
157
+ /** 연속 성공 몇 회 후 딜레이를 줄일지 */
158
+ cooldownAfter?: number,
159
+ /** 딜레이 감소 비율 (0.7 = 30% 감소) */
160
+ cooldownFactor?: number);
161
+ private consecutiveBlocks;
162
+ /** 현재 적용 중인 인터벌 (ms) */
163
+ get interval(): number;
164
+ /** 현재 차단 상태인지 (연속 차단 2회 이상이면 true) */
165
+ get isBlocked(): boolean;
166
+ wait(): Promise<void>;
167
+ /** 요청 성공 시 호출 → 점진적으로 딜레이 감소 */
168
+ success(): void;
169
+ /** 차단 감지 시 호출 → 딜레이를 지수적으로 증가 */
170
+ backoff(): void;
171
+ }
172
+
173
+ declare class GoogleScholarSource implements PaperSource {
174
+ readonly name: "google_scholar";
175
+ private readonly client;
176
+ private readonly rateLimiter;
177
+ constructor(client: GhostClient, rateLimiter?: AdaptiveRateLimiter);
178
+ search(query: string, options?: SearchOptions): Promise<SearchResult>;
179
+ getPaper(id: string): Promise<Paper | null>;
180
+ }
181
+
182
+ declare class ArxivSource implements PaperSource {
183
+ readonly name: "arxiv";
184
+ private readonly client;
185
+ private readonly rateLimiter;
186
+ constructor(client: GhostClient);
187
+ search(query: string, options?: SearchOptions): Promise<SearchResult>;
188
+ getPaper(id: string): Promise<Paper | null>;
189
+ /**
190
+ * arXiv 논문의 전체 내용을 가져옴 (메타데이터 + ar5iv HTML 파싱)
191
+ * - API에서 기본 메타데이터 조회
192
+ * - ar5iv HTML에서 저자(소속 포함), 섹션, 그림, 표 파싱
193
+ */
194
+ getFullPaper(id: string): Promise<Paper | null>;
195
+ }
196
+
197
+ declare function withRetry<T>(fn: () => Promise<T>, options?: {
198
+ maxRetries?: number;
199
+ baseDelayMs?: number;
200
+ }): Promise<T>;
201
+
202
+ declare class HttpError extends Error {
203
+ readonly status: number;
204
+ readonly statusText: string;
205
+ constructor(status: number, statusText: string);
206
+ }
207
+
208
+ declare function deduplicatePapers(papers: Paper[]): Paper[];
209
+
210
+ export { AdaptiveRateLimiter, ArxivSource, type Author, type CitationSource, type ClientOptions, type Figure, GoogleScholarSource, HttpError, type JournalInfo, type Paper, type PaperSource, RateLimiter, type SearchOptions, type SearchResult, type Section, SemanticScholarSource, type SourceError, type SourceType, type Table, createClient, deduplicatePapers, getFullPaper, getPaper, searchPapers, withRetry };