scholar-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,387 @@
1
+ import { normalizeDoi, normalizeWhitespace, parseYear, tokenizeForRanking } from './utils.js';
2
+ import { ResearchHttpClient } from './http-client.js';
3
+ import { OpenAlexClient } from './providers/openalex-client.js';
4
+ import { CrossrefClient } from './providers/crossref-client.js';
5
+ import { SemanticScholarClient } from './providers/semantic-scholar-client.js';
6
+ const providerWeight = {
7
+ openalex: 1,
8
+ crossref: 0.9,
9
+ semantic_scholar: 1.1,
10
+ scholar_scrape: 0.7
11
+ };
12
+ const DEFAULT_SOURCES = ['openalex', 'crossref', 'semantic_scholar'];
13
+ const scoreFromCitations = (citations) => {
14
+ if (citations <= 0) {
15
+ return 0;
16
+ }
17
+ return Math.min(1, Math.log10(citations + 1) / 4);
18
+ };
19
+ const normalizeTitleKey = (title) => normalizeWhitespace(title)
20
+ .toLowerCase()
21
+ .replace(/[^a-z0-9\s]/g, '');
22
+ const tokenSetFromTitle = (title) => new Set(tokenizeForRanking(title));
23
+ const setJaccard = (a, b) => {
24
+ if (a.size === 0 || b.size === 0) {
25
+ return 0;
26
+ }
27
+ let overlap = 0;
28
+ for (const token of a) {
29
+ if (b.has(token)) {
30
+ overlap += 1;
31
+ }
32
+ }
33
+ return overlap / (a.size + b.size - overlap);
34
+ };
35
+ const mergeFields = (current, incoming) => {
36
+ const set = new Set(current);
37
+ for (const value of incoming) {
38
+ if (value) {
39
+ set.add(value);
40
+ }
41
+ }
42
+ return [...set];
43
+ };
44
+ const isWithinYearRange = (year, range) => {
45
+ if (!range || !year) {
46
+ return true;
47
+ }
48
+ return year >= range[0] && year <= range[1];
49
+ };
50
+ const matchesFieldOfStudy = (work, requestedFields) => {
51
+ if (!requestedFields || requestedFields.length === 0) {
52
+ return true;
53
+ }
54
+ const normalized = new Set(work.fieldsOfStudy.map((field) => field.trim().toLowerCase()));
55
+ return requestedFields.some((field) => normalized.has(field.trim().toLowerCase()));
56
+ };
57
+ const yearsCompatible = (a, b) => !a || !b || Math.abs(a - b) <= 2;
58
+ const normalizeAuthorName = (name) => normalizeWhitespace(name)
59
+ .toLowerCase()
60
+ .replace(/[^a-z0-9\s]/g, '');
61
+ const sharesAuthorSignal = (left, right) => {
62
+ if (left.length === 0 || right.length === 0) {
63
+ return true;
64
+ }
65
+ const leftIds = new Set(left.map((author) => author.authorId).filter((id) => Boolean(id)));
66
+ if (leftIds.size > 0 && right.some((author) => author.authorId && leftIds.has(author.authorId))) {
67
+ return true;
68
+ }
69
+ const leftNames = new Set(left.map((author) => normalizeAuthorName(author.name)).filter((name) => name.length > 0));
70
+ return right.some((author) => leftNames.has(normalizeAuthorName(author.name)));
71
+ };
72
+ const cloneResult = (value) => {
73
+ try {
74
+ return structuredClone(value);
75
+ }
76
+ catch {
77
+ return JSON.parse(JSON.stringify(value));
78
+ }
79
+ };
80
+ export class LiteratureService {
81
+ config;
82
+ logger;
83
+ scholarService;
84
+ httpClient;
85
+ openAlexClient;
86
+ crossrefClient;
87
+ semanticScholarClient;
88
+ searchCache = new Map();
89
+ constructor(config, logger, scholarService) {
90
+ this.config = config;
91
+ this.logger = logger;
92
+ this.scholarService = scholarService;
93
+ this.httpClient = new ResearchHttpClient(config);
94
+ this.openAlexClient = new OpenAlexClient(config, this.httpClient);
95
+ this.crossrefClient = new CrossrefClient(config, this.httpClient);
96
+ this.semanticScholarClient = new SemanticScholarClient(config, this.httpClient);
97
+ }
98
+ async searchGraph(input) {
99
+ const requestedSources = new Set(input.sources ?? DEFAULT_SOURCES);
100
+ const cacheKey = this.createCacheKey(input, requestedSources);
101
+ const cached = this.getCache(cacheKey);
102
+ if (cached) {
103
+ this.logger.debug('Returning literature graph result from cache', {
104
+ query: input.query,
105
+ sources: [...requestedSources],
106
+ limit: input.limit
107
+ });
108
+ return cached;
109
+ }
110
+ const providerErrors = [];
111
+ const providerLimit = Math.max(input.limit, Math.ceil(input.limit * this.config.researchGraphProviderResultMultiplier));
112
+ const providerPromises = [];
113
+ if (requestedSources.has('openalex')) {
114
+ providerPromises.push(this.openAlexClient.searchWorks(input.query, providerLimit).catch((error) => {
115
+ providerErrors.push({ provider: 'openalex', message: error instanceof Error ? error.message : String(error) });
116
+ return [];
117
+ }));
118
+ }
119
+ if (requestedSources.has('crossref')) {
120
+ providerPromises.push(this.crossrefClient.searchWorks(input.query, providerLimit).catch((error) => {
121
+ providerErrors.push({ provider: 'crossref', message: error instanceof Error ? error.message : String(error) });
122
+ return [];
123
+ }));
124
+ }
125
+ if (requestedSources.has('semantic_scholar')) {
126
+ providerPromises.push(this.semanticScholarClient.searchWorks(input.query, providerLimit).catch((error) => {
127
+ providerErrors.push({
128
+ provider: 'semantic_scholar',
129
+ message: error instanceof Error ? error.message : String(error)
130
+ });
131
+ return [];
132
+ }));
133
+ }
134
+ if (requestedSources.has('scholar_scrape')) {
135
+ providerPromises.push(this.searchWithScholarScrape(input.query, providerLimit).catch((error) => {
136
+ providerErrors.push({ provider: 'scholar_scrape', message: error instanceof Error ? error.message : String(error) });
137
+ return [];
138
+ }));
139
+ }
140
+ const providerResults = (await Promise.all(providerPromises)).flat();
141
+ const filtered = providerResults.filter((work) => isWithinYearRange(work.year, input.yearRange) && matchesFieldOfStudy(work, input.fieldsOfStudy));
142
+ const merged = new Map();
143
+ const doiToKey = new Map();
144
+ const titleToKeys = new Map();
145
+ const indexTitle = (titleKey, key) => {
146
+ const existing = titleToKeys.get(titleKey) ?? new Set();
147
+ existing.add(key);
148
+ titleToKeys.set(titleKey, existing);
149
+ };
150
+ const resolveTargetKey = (work) => {
151
+ const normalizedDoi = normalizeDoi(work.doi);
152
+ if (normalizedDoi && doiToKey.has(normalizedDoi)) {
153
+ return doiToKey.get(normalizedDoi) ?? null;
154
+ }
155
+ const titleKey = normalizeTitleKey(work.title);
156
+ const exactCandidateKeys = [...(titleToKeys.get(titleKey) ?? [])];
157
+ for (const key of exactCandidateKeys) {
158
+ const candidate = merged.get(key);
159
+ if (!candidate) {
160
+ continue;
161
+ }
162
+ if (yearsCompatible(candidate.year, work.year) && sharesAuthorSignal(candidate.authors, work.authors)) {
163
+ return key;
164
+ }
165
+ }
166
+ const incomingTokens = tokenSetFromTitle(work.title);
167
+ let bestKey = null;
168
+ let bestSimilarity = 0;
169
+ for (const [key, candidate] of merged.entries()) {
170
+ if (!yearsCompatible(candidate.year, work.year)) {
171
+ continue;
172
+ }
173
+ if (!sharesAuthorSignal(candidate.authors, work.authors)) {
174
+ continue;
175
+ }
176
+ const similarity = setJaccard(tokenSetFromTitle(candidate.title), incomingTokens);
177
+ if (similarity > bestSimilarity) {
178
+ bestSimilarity = similarity;
179
+ bestKey = key;
180
+ }
181
+ }
182
+ if (bestKey && bestSimilarity >= this.config.researchGraphFuzzyTitleThreshold) {
183
+ return bestKey;
184
+ }
185
+ return null;
186
+ };
187
+ for (const work of filtered) {
188
+ const targetKey = resolveTargetKey(work);
189
+ const normalizedDoi = normalizeDoi(work.doi);
190
+ const titleKey = normalizeTitleKey(work.title);
191
+ const confidence = providerWeight[work.provider] ?? 0.8;
192
+ const provenance = {
193
+ provider: work.provider,
194
+ sourceUrl: work.sourceUrl,
195
+ fetchedAt: new Date().toISOString(),
196
+ confidence
197
+ };
198
+ const relevanceScore = 0.6 * work.score + 0.3 * scoreFromCitations(work.citationCount) + 0.1 * confidence;
199
+ if (!targetKey) {
200
+ const generatedKey = normalizedDoi ?? `title:${titleKey}:year:${work.year ?? 'na'}`;
201
+ merged.set(generatedKey, {
202
+ title: work.title,
203
+ abstract: work.abstract,
204
+ year: work.year,
205
+ venue: work.venue,
206
+ doi: normalizedDoi,
207
+ url: work.url,
208
+ paperId: work.providerId,
209
+ citationCount: work.citationCount,
210
+ influentialCitationCount: work.influentialCitationCount,
211
+ referenceCount: work.referenceCount,
212
+ authors: work.authors,
213
+ openAccess: {
214
+ isOpenAccess: work.openAccess.isOpenAccess,
215
+ pdfUrl: work.openAccess.pdfUrl,
216
+ license: work.openAccess.license
217
+ },
218
+ externalIds: work.externalIds,
219
+ fieldsOfStudy: work.fieldsOfStudy,
220
+ score: relevanceScore,
221
+ provenance: [provenance]
222
+ });
223
+ if (normalizedDoi) {
224
+ doiToKey.set(normalizedDoi, generatedKey);
225
+ }
226
+ indexTitle(titleKey, generatedKey);
227
+ continue;
228
+ }
229
+ const existing = merged.get(targetKey);
230
+ if (!existing) {
231
+ continue;
232
+ }
233
+ existing.abstract = existing.abstract ?? work.abstract;
234
+ existing.year = existing.year ?? work.year;
235
+ existing.venue = existing.venue ?? work.venue;
236
+ existing.url = existing.url ?? work.url;
237
+ existing.doi = existing.doi ?? normalizedDoi;
238
+ existing.citationCount = Math.max(existing.citationCount, work.citationCount);
239
+ existing.influentialCitationCount = Math.max(existing.influentialCitationCount, work.influentialCitationCount);
240
+ existing.referenceCount = Math.max(existing.referenceCount, work.referenceCount);
241
+ existing.authors = existing.authors.length > 0 ? existing.authors : work.authors;
242
+ existing.fieldsOfStudy = mergeFields(existing.fieldsOfStudy, work.fieldsOfStudy);
243
+ existing.externalIds = {
244
+ ...existing.externalIds,
245
+ ...work.externalIds
246
+ };
247
+ existing.openAccess = {
248
+ isOpenAccess: existing.openAccess.isOpenAccess || work.openAccess.isOpenAccess,
249
+ pdfUrl: existing.openAccess.pdfUrl ?? work.openAccess.pdfUrl,
250
+ license: existing.openAccess.license ?? work.openAccess.license
251
+ };
252
+ existing.provenance.push(provenance);
253
+ existing.score = Math.max(existing.score, relevanceScore);
254
+ const latestDoi = existing.doi ?? normalizedDoi;
255
+ if (latestDoi) {
256
+ doiToKey.set(latestDoi, targetKey);
257
+ }
258
+ indexTitle(titleKey, targetKey);
259
+ }
260
+ const currentYear = new Date().getFullYear();
261
+ const ranked = [...merged.values()]
262
+ .map((work) => {
263
+ const citationScore = scoreFromCitations(work.citationCount);
264
+ const recencyScore = work.year ? 1 / Math.max(1, currentYear - work.year + 1) : 0.15;
265
+ const diversityScore = Math.min(1, new Set(work.provenance.map((record) => record.provider)).size / Math.max(1, requestedSources.size));
266
+ const blended = 0.5 * work.score + 0.25 * citationScore + 0.15 * diversityScore + 0.1 * Math.min(1, recencyScore * 2);
267
+ return {
268
+ ...work,
269
+ score: blended
270
+ };
271
+ })
272
+ .sort((a, b) => b.score - a.score || (b.citationCount ?? 0) - (a.citationCount ?? 0))
273
+ .slice(0, input.limit);
274
+ const result = {
275
+ query: input.query,
276
+ totalResults: ranked.length,
277
+ results: ranked,
278
+ providerErrors
279
+ };
280
+ this.setCache(cacheKey, result);
281
+ this.logger.debug('Literature graph search complete', {
282
+ query: input.query,
283
+ providers: [...requestedSources],
284
+ providerLimit,
285
+ mergedCount: ranked.length,
286
+ providerErrors
287
+ });
288
+ return cloneResult(result);
289
+ }
290
+ async resolveByDoi(doi) {
291
+ const normalized = normalizeDoi(doi);
292
+ if (!normalized) {
293
+ return null;
294
+ }
295
+ const result = await this.searchGraph({
296
+ query: normalized,
297
+ limit: 10,
298
+ sources: ['openalex', 'crossref', 'semantic_scholar']
299
+ });
300
+ return (result.results.find((item) => normalizeDoi(item.doi) === normalized) ??
301
+ result.results.find((item) => normalizeDoi(item.externalIds.doi) === normalized) ??
302
+ result.results[0] ??
303
+ null);
304
+ }
305
+ createCacheKey(input, sources) {
306
+ const normalizedFields = (input.fieldsOfStudy ?? []).map((field) => field.trim().toLowerCase()).sort();
307
+ const normalizedSources = [...sources].sort();
308
+ const normalizedYearRange = input.yearRange ? `${input.yearRange[0]}:${input.yearRange[1]}` : 'none';
309
+ return JSON.stringify({
310
+ query: normalizeWhitespace(input.query).toLowerCase(),
311
+ limit: input.limit,
312
+ yearRange: normalizedYearRange,
313
+ fields: normalizedFields,
314
+ sources: normalizedSources
315
+ });
316
+ }
317
+ getCache(cacheKey) {
318
+ if (this.config.researchGraphCacheTtlMs <= 0) {
319
+ return null;
320
+ }
321
+ const cached = this.searchCache.get(cacheKey);
322
+ if (!cached) {
323
+ return null;
324
+ }
325
+ if (cached.expiresAt <= Date.now()) {
326
+ this.searchCache.delete(cacheKey);
327
+ return null;
328
+ }
329
+ return cloneResult(cached.value);
330
+ }
331
+ setCache(cacheKey, value) {
332
+ if (this.config.researchGraphCacheTtlMs <= 0) {
333
+ return;
334
+ }
335
+ const now = Date.now();
336
+ for (const [key, cached] of this.searchCache.entries()) {
337
+ if (cached.expiresAt <= now) {
338
+ this.searchCache.delete(key);
339
+ }
340
+ }
341
+ this.searchCache.set(cacheKey, {
342
+ value: cloneResult(value),
343
+ expiresAt: now + this.config.researchGraphCacheTtlMs
344
+ });
345
+ while (this.searchCache.size > this.config.researchGraphMaxCacheEntries) {
346
+ const oldestKey = this.searchCache.keys().next().value;
347
+ if (!oldestKey) {
348
+ break;
349
+ }
350
+ this.searchCache.delete(oldestKey);
351
+ }
352
+ }
353
+ async searchWithScholarScrape(query, limit) {
354
+ const result = await this.scholarService.searchKeywords({
355
+ query,
356
+ numResults: limit,
357
+ start: 0,
358
+ language: this.config.scholarLanguage
359
+ });
360
+ return result.papers.map((paper) => ({
361
+ provider: 'scholar_scrape',
362
+ providerId: paper.url ?? `scholar:${paper.title}`,
363
+ title: paper.title,
364
+ abstract: paper.abstract || null,
365
+ year: parseYear(paper.year),
366
+ venue: null,
367
+ doi: null,
368
+ url: paper.url,
369
+ citationCount: paper.citedByCount,
370
+ influentialCitationCount: 0,
371
+ referenceCount: 0,
372
+ authors: paper.authorsLine
373
+ .split(',')
374
+ .map((name) => ({ name: name.trim() }))
375
+ .filter((author) => author.name.length > 0),
376
+ openAccess: {
377
+ isOpenAccess: Boolean(paper.pdfUrl),
378
+ pdfUrl: paper.pdfUrl,
379
+ license: null
380
+ },
381
+ externalIds: {},
382
+ fieldsOfStudy: [],
383
+ score: 0.4,
384
+ sourceUrl: result.requestedUrl
385
+ }));
386
+ }
387
+ }
@@ -0,0 +1,73 @@
1
+ import { normalizeDoi, parseYear } from '../utils.js';
2
+ const parseCrossrefYear = (item) => {
3
+ const fromIssued = item.issued?.['date-parts']?.[0]?.[0];
4
+ if (typeof fromIssued === 'number') {
5
+ return parseYear(fromIssued);
6
+ }
7
+ const fromPublished = item.published?.['date-parts']?.[0]?.[0];
8
+ if (typeof fromPublished === 'number') {
9
+ return parseYear(fromPublished);
10
+ }
11
+ return null;
12
+ };
13
+ const toPlainAbstract = (value) => {
14
+ if (!value) {
15
+ return null;
16
+ }
17
+ const stripped = value.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim();
18
+ return stripped.length > 0 ? stripped : null;
19
+ };
20
+ export class CrossrefClient {
21
+ config;
22
+ httpClient;
23
+ constructor(config, httpClient) {
24
+ this.config = config;
25
+ this.httpClient = httpClient;
26
+ }
27
+ async searchWorks(query, limit) {
28
+ const url = new URL('/works', this.config.researchCrossrefBaseUrl);
29
+ url.searchParams.set('query.bibliographic', query);
30
+ url.searchParams.set('rows', String(limit));
31
+ const payload = await this.httpClient.fetchJson({
32
+ provider: 'crossref',
33
+ url,
34
+ headers: {
35
+ accept: 'application/json'
36
+ }
37
+ });
38
+ return (payload.message?.items ?? []).map((item) => {
39
+ const doi = normalizeDoi(item.DOI ?? null);
40
+ const linkPdf = (item.link ?? []).find((link) => (link['content-type'] ?? '').includes('pdf'))?.URL ?? null;
41
+ return {
42
+ provider: 'crossref',
43
+ providerId: doi ? `doi:${doi}` : `crossref:${item.URL ?? 'unknown'}`,
44
+ title: item.title?.[0] ?? 'Untitled',
45
+ abstract: toPlainAbstract(item.abstract),
46
+ year: parseCrossrefYear(item),
47
+ venue: item['container-title']?.[0] ?? null,
48
+ doi,
49
+ url: item.URL ?? null,
50
+ citationCount: item['is-referenced-by-count'] ?? 0,
51
+ influentialCitationCount: 0,
52
+ referenceCount: item.reference?.length ?? 0,
53
+ authors: (item.author ?? [])
54
+ .map((author) => ({
55
+ name: [author.given ?? '', author.family ?? ''].join(' ').trim(),
56
+ authorId: author.ORCID?.replace('https://orcid.org/', '') ?? null
57
+ }))
58
+ .filter((author) => author.name.length > 0),
59
+ openAccess: {
60
+ isOpenAccess: Boolean(linkPdf),
61
+ pdfUrl: linkPdf,
62
+ license: item.license?.[0]?.URL ?? null
63
+ },
64
+ externalIds: {
65
+ ...(doi ? { doi } : {})
66
+ },
67
+ fieldsOfStudy: item.subject ?? [],
68
+ score: item.score ?? 0.5,
69
+ sourceUrl: url.toString()
70
+ };
71
+ });
72
+ }
73
+ }
@@ -0,0 +1,80 @@
1
+ import { normalizeDoi, parseYear } from '../utils.js';
2
+ const decodeInvertedAbstract = (inverted) => {
3
+ if (!inverted || Object.keys(inverted).length === 0) {
4
+ return null;
5
+ }
6
+ let max = 0;
7
+ for (const positions of Object.values(inverted)) {
8
+ for (const index of positions) {
9
+ if (index > max) {
10
+ max = index;
11
+ }
12
+ }
13
+ }
14
+ const words = new Array(max + 1).fill('');
15
+ for (const [token, positions] of Object.entries(inverted)) {
16
+ for (const index of positions) {
17
+ words[index] = token;
18
+ }
19
+ }
20
+ const text = words.join(' ').replace(/\s+/g, ' ').trim();
21
+ return text.length > 0 ? text : null;
22
+ };
23
+ export class OpenAlexClient {
24
+ config;
25
+ httpClient;
26
+ constructor(config, httpClient) {
27
+ this.config = config;
28
+ this.httpClient = httpClient;
29
+ }
30
+ async searchWorks(query, limit) {
31
+ const url = new URL('/works', this.config.researchOpenAlexBaseUrl);
32
+ url.searchParams.set('search', query);
33
+ url.searchParams.set('per-page', String(limit));
34
+ if (this.config.researchOpenAlexApiKey) {
35
+ url.searchParams.set('api_key', this.config.researchOpenAlexApiKey);
36
+ }
37
+ const payload = await this.httpClient.fetchJson({
38
+ provider: 'openalex',
39
+ url
40
+ });
41
+ return (payload.results ?? []).map((item) => {
42
+ const doi = normalizeDoi(item.ids?.doi ?? null);
43
+ return {
44
+ provider: 'openalex',
45
+ providerId: item.id ?? `openalex:${item.display_name ?? 'unknown'}`,
46
+ title: item.display_name ?? 'Untitled',
47
+ abstract: decodeInvertedAbstract(item.abstract_inverted_index),
48
+ year: parseYear(item.publication_year),
49
+ venue: item.primary_location?.source?.display_name ?? null,
50
+ doi,
51
+ url: item.primary_location?.landing_page_url ?? item.id ?? null,
52
+ citationCount: item.cited_by_count ?? 0,
53
+ influentialCitationCount: 0,
54
+ referenceCount: item.referenced_works_count ?? 0,
55
+ authors: (item.authorships ?? [])
56
+ .map((auth) => ({
57
+ name: auth.author?.display_name ?? '',
58
+ authorId: auth.author?.id ?? null
59
+ }))
60
+ .filter((author) => author.name.length > 0),
61
+ openAccess: {
62
+ isOpenAccess: item.open_access?.is_oa ?? item.open_access?.any_repository_has_fulltext ?? Boolean(item.primary_location?.pdf_url),
63
+ pdfUrl: item.primary_location?.pdf_url ?? item.open_access?.oa_url ?? null,
64
+ license: item.primary_location?.license ?? item.open_access?.oa_status ?? null
65
+ },
66
+ externalIds: {
67
+ ...(item.ids?.openalex ? { openalex: item.ids.openalex } : {}),
68
+ ...(doi ? { doi } : {}),
69
+ ...(item.ids?.pmid ? { pmid: item.ids.pmid } : {}),
70
+ ...(item.ids?.pmcid ? { pmcid: item.ids.pmcid } : {})
71
+ },
72
+ fieldsOfStudy: (item.concepts ?? [])
73
+ .map((concept) => concept.display_name ?? '')
74
+ .filter((value) => value.length > 0),
75
+ score: item.relevance_score ?? 0.5,
76
+ sourceUrl: url.toString()
77
+ };
78
+ });
79
+ }
80
+ }
@@ -0,0 +1,60 @@
1
+ import { normalizeDoi, parseYear } from '../utils.js';
2
+ export class SemanticScholarClient {
3
+ config;
4
+ httpClient;
5
+ constructor(config, httpClient) {
6
+ this.config = config;
7
+ this.httpClient = httpClient;
8
+ }
9
+ async searchWorks(query, limit) {
10
+ const url = new URL('/paper/search', this.config.researchSemanticScholarBaseUrl.endsWith('/')
11
+ ? this.config.researchSemanticScholarBaseUrl
12
+ : `${this.config.researchSemanticScholarBaseUrl}/`);
13
+ url.searchParams.set('query', query);
14
+ url.searchParams.set('limit', String(limit));
15
+ url.searchParams.set('fields', 'paperId,title,abstract,year,venue,externalIds,url,citationCount,influentialCitationCount,referenceCount,isOpenAccess,openAccessPdf,fieldsOfStudy,authors');
16
+ const headers = { accept: 'application/json' };
17
+ if (this.config.researchSemanticScholarApiKey) {
18
+ headers['x-api-key'] = this.config.researchSemanticScholarApiKey;
19
+ }
20
+ const payload = await this.httpClient.fetchJson({
21
+ provider: 'semantic_scholar',
22
+ url,
23
+ headers
24
+ });
25
+ return (payload.data ?? []).map((item) => {
26
+ const doi = normalizeDoi(item.externalIds?.DOI ?? null);
27
+ return {
28
+ provider: 'semantic_scholar',
29
+ providerId: item.paperId ?? `semantic:${item.title ?? 'unknown'}`,
30
+ title: item.title ?? 'Untitled',
31
+ abstract: item.abstract ?? null,
32
+ year: parseYear(item.year),
33
+ venue: item.venue ?? null,
34
+ doi,
35
+ url: item.url ?? null,
36
+ citationCount: item.citationCount ?? 0,
37
+ influentialCitationCount: item.influentialCitationCount ?? 0,
38
+ referenceCount: item.referenceCount ?? 0,
39
+ authors: (item.authors ?? [])
40
+ .map((author) => ({
41
+ name: author.name ?? '',
42
+ authorId: author.authorId ?? null
43
+ }))
44
+ .filter((author) => author.name.length > 0),
45
+ openAccess: {
46
+ isOpenAccess: item.isOpenAccess ?? Boolean(item.openAccessPdf?.url),
47
+ pdfUrl: item.openAccessPdf?.url ?? null,
48
+ license: item.openAccessPdf?.license ?? null
49
+ },
50
+ externalIds: {
51
+ ...(doi ? { doi } : {}),
52
+ ...(item.externalIds ?? {})
53
+ },
54
+ fieldsOfStudy: item.fieldsOfStudy ?? [],
55
+ score: 0.7,
56
+ sourceUrl: url.toString()
57
+ };
58
+ });
59
+ }
60
+ }
@@ -0,0 +1,53 @@
1
+ import { CitationService } from './citation-service.js';
2
+ import { ExtractionService } from './extraction-service.js';
3
+ import { LiteratureService } from './literature-service.js';
4
+ import { IngestionService } from './ingestion-service.js';
5
+ export class ResearchService {
6
+ config;
7
+ logger;
8
+ scholarService;
9
+ literatureService;
10
+ ingestionService;
11
+ extractionService;
12
+ citationService;
13
+ constructor(config, logger, scholarService) {
14
+ this.config = config;
15
+ this.logger = logger;
16
+ this.scholarService = scholarService;
17
+ this.literatureService = new LiteratureService(config, logger, scholarService);
18
+ this.ingestionService = new IngestionService(config, logger, this.literatureService);
19
+ this.extractionService = new ExtractionService();
20
+ this.citationService = new CitationService(this.literatureService);
21
+ }
22
+ static fromConfig(config, logger, scholarService) {
23
+ return new ResearchService(config, logger, scholarService);
24
+ }
25
+ async searchLiteratureGraph(input) {
26
+ return this.literatureService.searchGraph(input);
27
+ }
28
+ async resolveWorkByDoi(doi) {
29
+ return this.literatureService.resolveByDoi(doi);
30
+ }
31
+ ingestPaperFullText(input) {
32
+ return this.ingestionService.enqueueIngestion(input);
33
+ }
34
+ getIngestionStatus(jobId) {
35
+ return this.ingestionService.getJob(jobId);
36
+ }
37
+ getParsedDocument(documentId) {
38
+ return this.ingestionService.getDocument(documentId);
39
+ }
40
+ extractGranularPaperDetails(documentId, input) {
41
+ const document = this.ingestionService.getDocument(documentId);
42
+ return this.extractionService.extract(document, input);
43
+ }
44
+ suggestContextualCitations(input) {
45
+ return this.citationService.suggestContextualCitations(input);
46
+ }
47
+ buildReferenceList(input) {
48
+ return this.citationService.buildReferenceList(input);
49
+ }
50
+ validateManuscriptCitations(manuscriptText, references, options) {
51
+ return this.citationService.validateManuscriptCitations(manuscriptText, references, options);
52
+ }
53
+ }
@@ -0,0 +1 @@
1
+ export {};