@longtable/research-search 0.1.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,11 @@
1
+ # LongTable Search
2
+
3
+ Scholar-first research search and evidence card workflow for LongTable.
4
+
5
+ The package converts a research prompt into deterministic query variants, routes
6
+ them through scholarly metadata sources, normalizes results as evidence cards,
7
+ deduplicates overlapping records, and assigns lightweight relevance and citation
8
+ support signals.
9
+
10
+ It does not scrape paywalled full text, bypass access controls, or claim
11
+ full-paper verification from metadata or abstracts.
@@ -0,0 +1,5 @@
1
+ export * from "./types.js";
2
+ export * from "./query.js";
3
+ export * from "./sources.js";
4
+ export * from "./rank.js";
5
+ export * from "./run.js";
package/dist/index.js ADDED
@@ -0,0 +1,5 @@
1
+ export * from "./types.js";
2
+ export * from "./query.js";
3
+ export * from "./sources.js";
4
+ export * from "./rank.js";
5
+ export * from "./run.js";
@@ -0,0 +1,6 @@
1
+ import { type BuildSearchIntentInput, type ResearchSearchIntent, type SearchSource } from "./types.js";
2
+ export declare function normalizeSearchText(value: string): string;
3
+ export declare function splitCsvTerms(value?: string): string[];
4
+ export declare function extractSearchKeywords(text: string, limit?: number): string[];
5
+ export declare function parseSearchSources(value?: string): SearchSource[];
6
+ export declare function buildResearchSearchIntent(input: BuildSearchIntentInput): ResearchSearchIntent;
package/dist/query.js ADDED
@@ -0,0 +1,179 @@
1
+ import { SEARCH_SOURCES } from "./types.js";
2
+ const STOP_WORDS = new Set([
3
+ "a",
4
+ "about",
5
+ "an",
6
+ "and",
7
+ "are",
8
+ "as",
9
+ "at",
10
+ "be",
11
+ "before",
12
+ "between",
13
+ "but",
14
+ "by",
15
+ "can",
16
+ "do",
17
+ "does",
18
+ "for",
19
+ "from",
20
+ "has",
21
+ "have",
22
+ "how",
23
+ "in",
24
+ "into",
25
+ "is",
26
+ "it",
27
+ "its",
28
+ "of",
29
+ "on",
30
+ "or",
31
+ "should",
32
+ "study",
33
+ "that",
34
+ "the",
35
+ "their",
36
+ "this",
37
+ "to",
38
+ "what",
39
+ "when",
40
+ "where",
41
+ "whether",
42
+ "which",
43
+ "with"
44
+ ]);
45
+ function searchId() {
46
+ return `search_${Date.now().toString(36)}_${Math.random().toString(36).slice(2, 8)}`;
47
+ }
48
+ export function normalizeSearchText(value) {
49
+ return value
50
+ .toLowerCase()
51
+ .replace(/https?:\/\/\S+/g, " ")
52
+ .replace(/doi:\s*/g, "doi ")
53
+ .replace(/[^a-z0-9가-힣._:/-]+/gi, " ")
54
+ .replace(/\s+/g, " ")
55
+ .trim();
56
+ }
57
+ export function splitCsvTerms(value) {
58
+ if (!value) {
59
+ return [];
60
+ }
61
+ return value
62
+ .split(",")
63
+ .map((entry) => normalizeSearchText(entry))
64
+ .filter(Boolean);
65
+ }
66
+ export function extractSearchKeywords(text, limit = 12) {
67
+ const normalized = normalizeSearchText(text);
68
+ const counts = new Map();
69
+ for (const token of normalized.split(" ")) {
70
+ if (token.length < 3 || STOP_WORDS.has(token)) {
71
+ continue;
72
+ }
73
+ counts.set(token, (counts.get(token) ?? 0) + 1);
74
+ }
75
+ return [...counts.entries()]
76
+ .sort((a, b) => {
77
+ const countDelta = b[1] - a[1];
78
+ return countDelta !== 0 ? countDelta : a[0].localeCompare(b[0]);
79
+ })
80
+ .slice(0, limit)
81
+ .map(([token]) => token);
82
+ }
83
+ export function parseSearchSources(value) {
84
+ if (!value || value === "all") {
85
+ return [...SEARCH_SOURCES];
86
+ }
87
+ const requested = value
88
+ .split(",")
89
+ .map((entry) => entry.trim())
90
+ .filter(Boolean);
91
+ const sources = [];
92
+ for (const source of requested) {
93
+ if (!SEARCH_SOURCES.includes(source)) {
94
+ throw new Error(`Unknown search source: ${source}`);
95
+ }
96
+ sources.push(source);
97
+ }
98
+ return sources.length > 0 ? sources : [...SEARCH_SOURCES];
99
+ }
100
+ function inferIntentKind(text, explicit) {
101
+ if (explicit) {
102
+ if (explicit === "literature" ||
103
+ explicit === "theory" ||
104
+ explicit === "measurement" ||
105
+ explicit === "citation" ||
106
+ explicit === "metadata" ||
107
+ explicit === "venue") {
108
+ return explicit;
109
+ }
110
+ throw new Error(`Unknown search intent: ${explicit}`);
111
+ }
112
+ const normalized = normalizeSearchText(text);
113
+ if (/\b(citation|reference|doi|source|hallucination|verify|support)\b|인용|레퍼런스|출처|근거/.test(normalized)) {
114
+ return "citation";
115
+ }
116
+ if (/\b(scale|measure|measurement|instrument|validity|reliability)\b|측정|척도|타당도|도구/.test(normalized)) {
117
+ return "measurement";
118
+ }
119
+ if (/\b(theory|theoretical|framework|construct|conceptual)\b|이론|개념|프레임워크/.test(normalized)) {
120
+ return "theory";
121
+ }
122
+ if (/\b(journal|venue|conference|submission|scope|fit)\b|저널|학회|투고/.test(normalized)) {
123
+ return "venue";
124
+ }
125
+ if (/\b(metadata|pmid|arxiv|openalex|semantic scholar)\b/.test(normalized)) {
126
+ return "metadata";
127
+ }
128
+ return "literature";
129
+ }
130
+ function buildQueryVariants(baseQuery, keywords, field, mustTerms = []) {
131
+ const variants = new Set();
132
+ variants.add(baseQuery);
133
+ const compact = [...mustTerms, ...keywords].slice(0, 8).join(" ");
134
+ if (compact) {
135
+ variants.add(compact);
136
+ }
137
+ if (field && compact) {
138
+ variants.add(`${field} ${compact}`);
139
+ }
140
+ return [...variants].filter(Boolean).slice(0, 3);
141
+ }
142
+ export function buildResearchSearchIntent(input) {
143
+ const explicitQuery = input.query?.trim();
144
+ const baseText = explicitQuery || [
145
+ input.prompt,
146
+ input.projectGoal,
147
+ input.projectBlocker
148
+ ]
149
+ .filter((entry) => Boolean(entry && entry.trim()))
150
+ .join(" ");
151
+ if (!baseText.trim()) {
152
+ throw new Error("A search query is required. Pass --query or run inside a workspace with a current goal.");
153
+ }
154
+ const mustTerms = splitCsvTerms(input.must);
155
+ const excludeTerms = splitCsvTerms(input.exclude);
156
+ const field = input.field?.trim() || undefined;
157
+ const keywordText = [baseText, field, ...mustTerms].filter(Boolean).join(" ");
158
+ const keywords = extractSearchKeywords(keywordText);
159
+ const normalizedQuery = normalizeSearchText(baseText);
160
+ const query = keywords.length > 0 ? keywords.slice(0, 10).join(" ") : normalizedQuery;
161
+ const limit = Number.isInteger(input.limit) && input.limit && input.limit > 0
162
+ ? Math.min(input.limit, 50)
163
+ : 10;
164
+ return {
165
+ id: searchId(),
166
+ createdAt: new Date().toISOString(),
167
+ kind: inferIntentKind(baseText, input.intent),
168
+ query,
169
+ normalizedQuery,
170
+ queryVariants: buildQueryVariants(query, keywords, field, mustTerms),
171
+ keywords,
172
+ ...(field ? { field } : {}),
173
+ mustTerms,
174
+ excludeTerms,
175
+ requestedSources: parseSearchSources(input.sources),
176
+ limit,
177
+ source: input.source ?? "cli"
178
+ };
179
+ }
package/dist/rank.d.ts ADDED
@@ -0,0 +1,2 @@
1
+ import type { EvidenceCard, ResearchSearchIntent } from "./types.js";
2
+ export declare function dedupeAndRankCards(cards: EvidenceCard[], intent: ResearchSearchIntent): EvidenceCard[];
package/dist/rank.js ADDED
@@ -0,0 +1,145 @@
1
+ import { normalizeSearchText } from "./query.js";
2
+ function keyForCard(card) {
3
+ if (card.doi)
4
+ return `doi:${card.doi.toLowerCase()}`;
5
+ if (card.pmid)
6
+ return `pmid:${card.pmid}`;
7
+ if (card.arxivId)
8
+ return `arxiv:${card.arxivId.toLowerCase()}`;
9
+ if (card.openAlexId)
10
+ return `openalex:${card.openAlexId}`;
11
+ if (card.semanticScholarId)
12
+ return `s2:${card.semanticScholarId}`;
13
+ const title = normalizeSearchText(card.title).replace(/\s+/g, "-");
14
+ return `title:${title}:${card.year ?? "unknown"}`;
15
+ }
16
+ function keywordMatches(card, keywords) {
17
+ const haystack = normalizeSearchText([
18
+ card.title,
19
+ card.abstract,
20
+ card.venue,
21
+ card.researchDesign,
22
+ card.mainFinding
23
+ ].filter(Boolean).join(" "));
24
+ return keywords.filter((keyword) => haystack.includes(normalizeSearchText(keyword)));
25
+ }
26
+ function sourceBoost(card) {
27
+ if (card.sourceRoutes.includes("openalex"))
28
+ return 6;
29
+ if (card.sourceRoutes.includes("semantic_scholar"))
30
+ return 5;
31
+ if (card.sourceRoutes.includes("pubmed"))
32
+ return 5;
33
+ if (card.sourceRoutes.includes("crossref"))
34
+ return 4;
35
+ if (card.sourceRoutes.includes("arxiv"))
36
+ return 3;
37
+ if (card.sourceRoutes.includes("doaj"))
38
+ return 3;
39
+ if (card.sourceRoutes.includes("eric"))
40
+ return 3;
41
+ return 1;
42
+ }
43
+ function supportStatus(card, matches, keywords) {
44
+ if (!card.abstractAvailable) {
45
+ return "not_verified";
46
+ }
47
+ const ratio = keywords.length === 0 ? 0 : matches.length / keywords.length;
48
+ if (ratio >= 0.65)
49
+ return "direct_support";
50
+ if (ratio >= 0.35)
51
+ return "indirect_support";
52
+ if (ratio > 0)
53
+ return "background";
54
+ return "questionable_fit";
55
+ }
56
+ function scoreCard(card, intent, matches) {
57
+ const title = normalizeSearchText(card.title);
58
+ const abstract = normalizeSearchText(card.abstract ?? "");
59
+ let score = 0;
60
+ for (const keyword of intent.keywords) {
61
+ const normalized = normalizeSearchText(keyword);
62
+ if (title.includes(normalized))
63
+ score += 8;
64
+ if (abstract.includes(normalized))
65
+ score += 3;
66
+ }
67
+ for (const term of intent.mustTerms) {
68
+ const normalized = normalizeSearchText(term);
69
+ if (title.includes(normalized) || abstract.includes(normalized)) {
70
+ score += 10;
71
+ }
72
+ else {
73
+ score -= 20;
74
+ }
75
+ }
76
+ for (const term of intent.excludeTerms) {
77
+ const normalized = normalizeSearchText(term);
78
+ if (title.includes(normalized) || abstract.includes(normalized)) {
79
+ score -= 30;
80
+ }
81
+ }
82
+ if (card.year) {
83
+ const age = Math.max(0, new Date().getUTCFullYear() - card.year);
84
+ score += Math.max(0, 10 - Math.min(age, 10));
85
+ }
86
+ if (card.citationCount) {
87
+ score += Math.min(12, Math.log10(card.citationCount + 1) * 4);
88
+ }
89
+ if (card.legalFullTextAvailable)
90
+ score += 4;
91
+ score += sourceBoost(card);
92
+ score += matches.length * 2;
93
+ return Math.max(0, Math.round(score * 10) / 10);
94
+ }
95
+ function mergeCards(existing, incoming) {
96
+ const sourceRoutes = [...new Set([...existing.sourceRoutes, ...incoming.sourceRoutes])];
97
+ return {
98
+ ...existing,
99
+ authors: existing.authors.length > 0 ? existing.authors : incoming.authors,
100
+ year: existing.year ?? incoming.year,
101
+ venue: existing.venue ?? incoming.venue,
102
+ doi: existing.doi ?? incoming.doi,
103
+ pmid: existing.pmid ?? incoming.pmid,
104
+ arxivId: existing.arxivId ?? incoming.arxivId,
105
+ openAlexId: existing.openAlexId ?? incoming.openAlexId,
106
+ semanticScholarId: existing.semanticScholarId ?? incoming.semanticScholarId,
107
+ ericId: existing.ericId ?? incoming.ericId,
108
+ url: existing.url ?? incoming.url,
109
+ abstract: existing.abstract ?? incoming.abstract,
110
+ abstractAvailable: existing.abstractAvailable || incoming.abstractAvailable,
111
+ legalFullTextAvailable: existing.legalFullTextAvailable || incoming.legalFullTextAvailable,
112
+ fullTextUrl: existing.fullTextUrl ?? incoming.fullTextUrl,
113
+ citationCount: Math.max(existing.citationCount ?? 0, incoming.citationCount ?? 0) || undefined,
114
+ researchDesign: existing.researchDesign ?? incoming.researchDesign,
115
+ constructsOrMeasures: [...new Set([...(existing.constructsOrMeasures ?? []), ...(incoming.constructsOrMeasures ?? [])])],
116
+ mainFinding: existing.mainFinding ?? incoming.mainFinding,
117
+ sourceRoutes,
118
+ limitations: [...new Set([...existing.limitations, ...incoming.limitations])],
119
+ matchedKeywords: [...new Set([...existing.matchedKeywords, ...incoming.matchedKeywords])],
120
+ relevanceScore: Math.max(existing.relevanceScore, incoming.relevanceScore)
121
+ };
122
+ }
123
+ export function dedupeAndRankCards(cards, intent) {
124
+ const byKey = new Map();
125
+ for (const card of cards) {
126
+ const matches = keywordMatches(card, intent.keywords);
127
+ const scored = {
128
+ ...card,
129
+ matchedKeywords: matches,
130
+ citationSupportStatus: supportStatus(card, matches, intent.keywords),
131
+ relevanceScore: scoreCard(card, intent, matches)
132
+ };
133
+ const key = keyForCard(scored);
134
+ const existing = byKey.get(key);
135
+ byKey.set(key, existing ? mergeCards(existing, scored) : scored);
136
+ }
137
+ return [...byKey.values()]
138
+ .sort((a, b) => {
139
+ const scoreDelta = b.relevanceScore - a.relevanceScore;
140
+ if (scoreDelta !== 0)
141
+ return scoreDelta;
142
+ return (b.year ?? 0) - (a.year ?? 0);
143
+ })
144
+ .slice(0, intent.limit);
145
+ }
package/dist/run.d.ts ADDED
@@ -0,0 +1,2 @@
1
+ import type { EvidenceRun, RunResearchSearchInput } from "./types.js";
2
+ export declare function runResearchSearch(input: RunResearchSearchInput): Promise<EvidenceRun>;
package/dist/run.js ADDED
@@ -0,0 +1,106 @@
1
+ import { buildResearchSearchIntent } from "./query.js";
2
+ import { dedupeAndRankCards } from "./rank.js";
3
+ import { assessSearchSourceCapabilities, runSourceSearch } from "./sources.js";
4
+ function runId() {
5
+ return `evidence_${Date.now().toString(36)}_${Math.random().toString(36).slice(2, 8)}`;
6
+ }
7
+ function now() {
8
+ return new Date().toISOString();
9
+ }
10
+ function defaultFetch() {
11
+ if (typeof fetch !== "function") {
12
+ throw new Error("LongTable search requires a fetch-capable Node runtime.");
13
+ }
14
+ return fetch;
15
+ }
16
+ export async function runResearchSearch(input) {
17
+ const createdAt = now();
18
+ const id = runId();
19
+ const intent = buildResearchSearchIntent(input);
20
+ const env = input.env ?? process.env;
21
+ const capabilities = assessSearchSourceCapabilities(intent.requestedSources, env);
22
+ const skippedSources = capabilities.filter((capability) => !capability.enabled);
23
+ if (skippedSources.length > 0 && input.allowPartial !== true) {
24
+ const updatedAt = now();
25
+ return {
26
+ id,
27
+ createdAt,
28
+ updatedAt,
29
+ status: "blocked",
30
+ intent,
31
+ sourceReports: skippedSources.map((capability) => ({
32
+ source: capability.source,
33
+ status: "skipped",
34
+ count: 0,
35
+ elapsedMs: 0,
36
+ reason: capability.reason
37
+ })),
38
+ cards: [],
39
+ skippedSources,
40
+ warnings: skippedSources.map((capability) => capability.reason ?? `${capability.source} unavailable.`),
41
+ blockedReason: "One or more requested scholarly sources are unavailable. Confirm partial search or configure credentials."
42
+ };
43
+ }
44
+ const httpFetch = input.fetch ?? defaultFetch();
45
+ const sourceReports = [];
46
+ const cards = [];
47
+ for (const capability of capabilities) {
48
+ if (!capability.enabled) {
49
+ sourceReports.push({
50
+ source: capability.source,
51
+ status: "skipped",
52
+ count: 0,
53
+ elapsedMs: 0,
54
+ reason: capability.reason
55
+ });
56
+ continue;
57
+ }
58
+ const started = Date.now();
59
+ try {
60
+ const result = await runSourceSearch({
61
+ intent,
62
+ source: capability.source,
63
+ limit: intent.limit
64
+ }, {
65
+ fetch: httpFetch,
66
+ env
67
+ });
68
+ cards.push(...result.cards);
69
+ sourceReports.push({
70
+ source: capability.source,
71
+ status: "completed",
72
+ count: result.cards.length,
73
+ elapsedMs: Date.now() - started,
74
+ endpoint: result.endpoint
75
+ });
76
+ }
77
+ catch (error) {
78
+ sourceReports.push({
79
+ source: capability.source,
80
+ status: "failed",
81
+ count: 0,
82
+ elapsedMs: Date.now() - started,
83
+ reason: error instanceof Error ? error.message : String(error)
84
+ });
85
+ }
86
+ }
87
+ const rankedCards = dedupeAndRankCards(cards, intent);
88
+ const hasFailure = sourceReports.some((report) => report.status === "failed" || report.status === "skipped");
89
+ const status = hasFailure ? "partial" : "completed";
90
+ return {
91
+ id,
92
+ createdAt,
93
+ updatedAt: now(),
94
+ status,
95
+ intent,
96
+ sourceReports,
97
+ cards: rankedCards,
98
+ skippedSources,
99
+ warnings: [
100
+ ...skippedSources.map((capability) => capability.reason ?? `${capability.source} unavailable.`),
101
+ ...sourceReports
102
+ .filter((report) => report.status === "failed")
103
+ .map((report) => `${report.source} failed: ${report.reason ?? "unknown error"}`)
104
+ ]
105
+ };
106
+ }
@@ -0,0 +1,5 @@
1
+ import { type SearchSource, type SearchSourceCapability, type SourceSearchContext, type SourceSearchRequest, type SourceSearchResult } from "./types.js";
2
+ export declare function assessSearchSourceCapabilities(sources: SearchSource[], env?: Record<string, string | undefined>): SearchSourceCapability[];
3
+ export declare function enabledSearchSources(sources: SearchSource[], env?: Record<string, string | undefined>): SearchSource[];
4
+ export declare function runSourceSearch(request: SourceSearchRequest, context: SourceSearchContext): Promise<SourceSearchResult>;
5
+ export declare function allSearchSources(): SearchSource[];
@@ -0,0 +1,510 @@
1
+ import { SEARCH_SOURCES } from "./types.js";
2
+ function endpoint(url, params) {
3
+ const parsed = new URL(url);
4
+ for (const [key, value] of Object.entries(params)) {
5
+ if (value !== undefined && value !== "") {
6
+ parsed.searchParams.set(key, String(value));
7
+ }
8
+ }
9
+ return parsed.toString();
10
+ }
11
+ function cleanText(value) {
12
+ if (typeof value !== "string") {
13
+ return undefined;
14
+ }
15
+ const cleaned = value
16
+ .replace(/<[^>]+>/g, " ")
17
+ .replace(/&amp;/g, "&")
18
+ .replace(/&lt;/g, "<")
19
+ .replace(/&gt;/g, ">")
20
+ .replace(/&quot;/g, "\"")
21
+ .replace(/&#39;/g, "'")
22
+ .replace(/\s+/g, " ")
23
+ .trim();
24
+ return cleaned || undefined;
25
+ }
26
+ function asRecord(value) {
27
+ return value && typeof value === "object" && !Array.isArray(value) ? value : {};
28
+ }
29
+ function asArray(value) {
30
+ return Array.isArray(value) ? value : [];
31
+ }
32
+ function asString(value) {
33
+ return typeof value === "string" && value.trim() ? value.trim() : undefined;
34
+ }
35
+ function asNumber(value) {
36
+ return typeof value === "number" && Number.isFinite(value) ? value : undefined;
37
+ }
38
+ function normalizeDoi(value) {
39
+ if (!value) {
40
+ return undefined;
41
+ }
42
+ return value
43
+ .replace(/^https?:\/\/(dx\.)?doi\.org\//i, "")
44
+ .replace(/^doi:\s*/i, "")
45
+ .trim()
46
+ .toLowerCase() || undefined;
47
+ }
48
+ function firstString(value) {
49
+ const entries = asArray(value);
50
+ return asString(entries[0]);
51
+ }
52
+ function yearFromParts(value) {
53
+ const parts = asRecord(value)["date-parts"];
54
+ const first = asArray(parts)[0];
55
+ const year = asArray(first)[0];
56
+ return asNumber(year);
57
+ }
58
+ function inferEvidenceDepth(abstract, legalFullTextAvailable = false) {
59
+ if (legalFullTextAvailable)
60
+ return "legal_full_text_available";
61
+ if (abstract)
62
+ return "abstract_only";
63
+ return "metadata_only";
64
+ }
65
+ function inferResearchDesign(abstract) {
66
+ const normalized = abstract?.toLowerCase() ?? "";
67
+ if (!normalized)
68
+ return undefined;
69
+ if (/\bsystematic review\b|\bmeta-analysis\b/.test(normalized))
70
+ return "review or meta-analysis";
71
+ if (/\brandomi[sz]ed\b|\bexperiment\b|\btrial\b/.test(normalized))
72
+ return "experimental or trial design";
73
+ if (/\bsurvey\b|\bquestionnaire\b|\bscale\b/.test(normalized))
74
+ return "survey or scale-based design";
75
+ if (/\binterview\b|\bqualitative\b|\bcase study\b/.test(normalized))
76
+ return "qualitative design";
77
+ if (/\blongitudinal\b|\bpanel data\b/.test(normalized))
78
+ return "longitudinal design";
79
+ return undefined;
80
+ }
81
+ function inferMainFinding(abstract) {
82
+ if (!abstract)
83
+ return undefined;
84
+ const sentence = abstract.split(/(?<=[.!?])\s+/).find((entry) => entry.length > 40);
85
+ return sentence ? sentence.slice(0, 360) : undefined;
86
+ }
87
+ function baseCard(input) {
88
+ const title = cleanText(input.title) ?? "Untitled scholarly record";
89
+ const abstract = cleanText(input.abstract);
90
+ const legalFullTextAvailable = input.legalFullTextAvailable === true;
91
+ const limitations = [
92
+ abstract ? "" : "No abstract was available from this source.",
93
+ legalFullTextAvailable ? "" : "LongTable did not retrieve full text for this card."
94
+ ].filter(Boolean);
95
+ return {
96
+ id: `${input.source}:${input.sourceRecordId ?? normalizeDoi(input.doi) ?? title.toLowerCase().replace(/\s+/g, "-").slice(0, 80)}`,
97
+ title,
98
+ authors: input.authors ?? [],
99
+ year: input.year,
100
+ venue: cleanText(input.venue),
101
+ doi: normalizeDoi(input.doi),
102
+ pmid: input.pmid,
103
+ arxivId: input.arxivId,
104
+ openAlexId: input.openAlexId,
105
+ semanticScholarId: input.semanticScholarId,
106
+ ericId: input.ericId,
107
+ url: input.url,
108
+ sourceRoute: input.source,
109
+ sourceRoutes: [input.source],
110
+ sourceRecordId: input.sourceRecordId,
111
+ abstract,
112
+ abstractAvailable: Boolean(abstract),
113
+ evidenceDepth: inferEvidenceDepth(abstract, legalFullTextAvailable),
114
+ legalFullTextAvailable,
115
+ fullTextUrl: input.fullTextUrl,
116
+ citationCount: input.citationCount,
117
+ researchDesign: inferResearchDesign(abstract),
118
+ constructsOrMeasures: undefined,
119
+ mainFinding: inferMainFinding(abstract),
120
+ relevanceToProject: undefined,
121
+ citationSupportStatus: "not_verified",
122
+ limitations,
123
+ matchedKeywords: [],
124
+ relevanceScore: 0
125
+ };
126
+ }
127
+ async function fetchJson(context, url) {
128
+ const response = await context.fetch(url, {
129
+ headers: {
130
+ "accept": "application/json",
131
+ "user-agent": "LongTable/0.1.28 (https://github.com/HosungYou/LongTable)"
132
+ }
133
+ });
134
+ if (!response.ok) {
135
+ throw new Error(`HTTP ${response.status} ${response.statusText}`);
136
+ }
137
+ return response.json();
138
+ }
139
+ async function fetchText(context, url) {
140
+ const response = await context.fetch(url, {
141
+ headers: {
142
+ "accept": "application/xml, text/xml, application/atom+xml, text/plain",
143
+ "user-agent": "LongTable/0.1.28 (https://github.com/HosungYou/LongTable)"
144
+ }
145
+ });
146
+ if (!response.ok) {
147
+ throw new Error(`HTTP ${response.status} ${response.statusText}`);
148
+ }
149
+ return response.text();
150
+ }
151
+ function queryForSource(intent) {
152
+ return intent.queryVariants[0] ?? intent.query;
153
+ }
154
+ function getCapability(source, env) {
155
+ if (source === "openalex" && !env.OPENALEX_API_KEY) {
156
+ return {
157
+ source,
158
+ enabled: false,
159
+ requiredEnv: ["OPENALEX_API_KEY"],
160
+ missingEnv: ["OPENALEX_API_KEY"],
161
+ reason: "OpenAlex route is disabled because OPENALEX_API_KEY is missing.",
162
+ setupHint: "Set OPENALEX_API_KEY to enable reliable OpenAlex API use."
163
+ };
164
+ }
165
+ if (source === "unpaywall" && !env.LONGTABLE_CONTACT_EMAIL) {
166
+ return {
167
+ source,
168
+ enabled: false,
169
+ requiredEnv: ["LONGTABLE_CONTACT_EMAIL"],
170
+ missingEnv: ["LONGTABLE_CONTACT_EMAIL"],
171
+ reason: "Unpaywall route is disabled because LONGTABLE_CONTACT_EMAIL is missing.",
172
+ setupHint: "Set LONGTABLE_CONTACT_EMAIL so Unpaywall can receive the required email parameter."
173
+ };
174
+ }
175
+ return {
176
+ source,
177
+ enabled: true,
178
+ requiredEnv: [],
179
+ missingEnv: []
180
+ };
181
+ }
182
+ export function assessSearchSourceCapabilities(sources, env = process.env) {
183
+ return sources.map((source) => getCapability(source, env));
184
+ }
185
+ export function enabledSearchSources(sources, env = process.env) {
186
+ return assessSearchSourceCapabilities(sources, env)
187
+ .filter((capability) => capability.enabled)
188
+ .map((capability) => capability.source);
189
+ }
190
+ function authorNameFromCrossref(value) {
191
+ const author = asRecord(value);
192
+ const given = asString(author.given);
193
+ const family = asString(author.family);
194
+ return [given, family].filter(Boolean).join(" ") || asString(author.name);
195
+ }
196
+ async function searchCrossref(request, context) {
197
+ const url = endpoint("https://api.crossref.org/works", {
198
+ "query.bibliographic": queryForSource(request.intent),
199
+ rows: request.limit,
200
+ mailto: context.env.LONGTABLE_CONTACT_EMAIL
201
+ });
202
+ const payload = asRecord(await fetchJson(context, url));
203
+ const message = asRecord(payload.message);
204
+ const cards = asArray(message.items).map((item) => {
205
+ const record = asRecord(item);
206
+ const authors = asArray(record.author)
207
+ .map(authorNameFromCrossref)
208
+ .filter((author) => Boolean(author));
209
+ return baseCard({
210
+ source: "crossref",
211
+ title: firstString(record.title),
212
+ authors,
213
+ year: yearFromParts(record.issued),
214
+ venue: firstString(record["container-title"]),
215
+ doi: asString(record.DOI),
216
+ url: asString(record.URL),
217
+ sourceRecordId: asString(record.DOI),
218
+ abstract: asString(record.abstract),
219
+ citationCount: asNumber(record["is-referenced-by-count"])
220
+ });
221
+ });
222
+ return { source: "crossref", endpoint: url, cards };
223
+ }
224
+ function extractXmlBlocks(xml, tag) {
225
+ const pattern = new RegExp(`<${tag}\\b[^>]*>([\\s\\S]*?)</${tag}>`, "gi");
226
+ const blocks = [];
227
+ let match;
228
+ while ((match = pattern.exec(xml)) !== null) {
229
+ blocks.push(match[1] ?? "");
230
+ }
231
+ return blocks;
232
+ }
233
+ function extractXmlTag(xml, tag) {
234
+ const escaped = tag.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
235
+ const pattern = new RegExp(`<${escaped}\\b[^>]*>([\\s\\S]*?)</${escaped}>`, "i");
236
+ return cleanText(pattern.exec(xml)?.[1]);
237
+ }
238
+ async function searchArxiv(request, context) {
239
+ const url = endpoint("https://export.arxiv.org/api/query", {
240
+ search_query: `all:${queryForSource(request.intent)}`,
241
+ start: 0,
242
+ max_results: request.limit,
243
+ sortBy: "relevance"
244
+ });
245
+ const xml = await fetchText(context, url);
246
+ const cards = extractXmlBlocks(xml, "entry").map((entry) => {
247
+ const idUrl = extractXmlTag(entry, "id");
248
+ const arxivId = idUrl?.split("/abs/")[1];
249
+ const authors = extractXmlBlocks(entry, "author")
250
+ .map((author) => extractXmlTag(author, "name"))
251
+ .filter((author) => Boolean(author));
252
+ const published = extractXmlTag(entry, "published");
253
+ return baseCard({
254
+ source: "arxiv",
255
+ title: extractXmlTag(entry, "title"),
256
+ authors,
257
+ year: published ? Number(published.slice(0, 4)) : undefined,
258
+ venue: "arXiv",
259
+ doi: extractXmlTag(entry, "arxiv:doi"),
260
+ arxivId,
261
+ url: idUrl,
262
+ sourceRecordId: arxivId,
263
+ abstract: extractXmlTag(entry, "summary")
264
+ });
265
+ });
266
+ return { source: "arxiv", endpoint: url, cards };
267
+ }
268
+ async function searchOpenAlex(request, context) {
269
+ const url = endpoint("https://api.openalex.org/works", {
270
+ search: queryForSource(request.intent),
271
+ "per-page": request.limit,
272
+ api_key: context.env.OPENALEX_API_KEY,
273
+ mailto: context.env.LONGTABLE_CONTACT_EMAIL
274
+ });
275
+ const payload = asRecord(await fetchJson(context, url));
276
+ const cards = asArray(payload.results).map((item) => {
277
+ const record = asRecord(item);
278
+ const primaryLocation = asRecord(record.primary_location);
279
+ const source = asRecord(primaryLocation.source);
280
+ const oaLocation = asRecord(record.open_access);
281
+ const fullTextUrl = asString(primaryLocation.pdf_url) ?? asString(primaryLocation.landing_page_url);
282
+ const authors = asArray(record.authorships)
283
+ .map((authorship) => asString(asRecord(asRecord(authorship).author).display_name))
284
+ .filter((author) => Boolean(author));
285
+ return baseCard({
286
+ source: "openalex",
287
+ title: asString(record.display_name),
288
+ authors,
289
+ year: asNumber(record.publication_year),
290
+ venue: asString(source.display_name),
291
+ doi: asString(record.doi),
292
+ openAlexId: asString(record.id),
293
+ url: asString(record.id),
294
+ sourceRecordId: asString(record.id),
295
+ abstract: invertedIndexToAbstract(record.abstract_inverted_index),
296
+ legalFullTextAvailable: asString(oaLocation.is_oa) === "true" || oaLocation.is_oa === true,
297
+ fullTextUrl,
298
+ citationCount: asNumber(record.cited_by_count)
299
+ });
300
+ });
301
+ return { source: "openalex", endpoint: url, cards };
302
+ }
303
+ function invertedIndexToAbstract(value) {
304
+ const index = asRecord(value);
305
+ const positions = [];
306
+ for (const [word, rawPositions] of Object.entries(index)) {
307
+ for (const position of asArray(rawPositions)) {
308
+ const numeric = asNumber(position);
309
+ if (numeric !== undefined) {
310
+ positions.push([numeric, word]);
311
+ }
312
+ }
313
+ }
314
+ if (positions.length === 0) {
315
+ return undefined;
316
+ }
317
+ return positions.sort((a, b) => a[0] - b[0]).map(([, word]) => word).join(" ");
318
+ }
319
+ async function searchSemanticScholar(request, context) {
320
+ const url = endpoint("https://api.semanticscholar.org/graph/v1/paper/research-search", {
321
+ query: queryForSource(request.intent),
322
+ limit: request.limit,
323
+ fields: "title,authors,year,venue,externalIds,abstract,citationCount,openAccessPdf,url"
324
+ });
325
+ const headers = {};
326
+ if (context.env.SEMANTIC_SCHOLAR_API_KEY) {
327
+ headers["x-api-key"] = context.env.SEMANTIC_SCHOLAR_API_KEY;
328
+ }
329
+ const response = await context.fetch(url, {
330
+ headers: {
331
+ accept: "application/json",
332
+ ...headers
333
+ }
334
+ });
335
+ if (!response.ok) {
336
+ throw new Error(`HTTP ${response.status} ${response.statusText}`);
337
+ }
338
+ const payload = asRecord(await response.json());
339
+ const cards = asArray(payload.data).map((item) => {
340
+ const record = asRecord(item);
341
+ const externalIds = asRecord(record.externalIds);
342
+ const oaPdf = asRecord(record.openAccessPdf);
343
+ const authors = asArray(record.authors)
344
+ .map((author) => asString(asRecord(author).name))
345
+ .filter((author) => Boolean(author));
346
+ return baseCard({
347
+ source: "semantic_scholar",
348
+ title: asString(record.title),
349
+ authors,
350
+ year: asNumber(record.year),
351
+ venue: asString(record.venue),
352
+ doi: asString(externalIds.DOI),
353
+ pmid: asString(externalIds.PubMed),
354
+ arxivId: asString(externalIds.ArXiv),
355
+ semanticScholarId: asString(record.paperId),
356
+ url: asString(record.url),
357
+ sourceRecordId: asString(record.paperId),
358
+ abstract: asString(record.abstract),
359
+ legalFullTextAvailable: Boolean(asString(oaPdf.url)),
360
+ fullTextUrl: asString(oaPdf.url),
361
+ citationCount: asNumber(record.citationCount)
362
+ });
363
+ });
364
+ return { source: "semantic_scholar", endpoint: url, cards };
365
+ }
366
+ async function searchPubMed(request, context) {
367
+ const searchUrl = endpoint("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi", {
368
+ db: "pubmed",
369
+ retmode: "json",
370
+ term: queryForSource(request.intent),
371
+ retmax: request.limit,
372
+ api_key: context.env.NCBI_API_KEY
373
+ });
374
+ const searchPayload = asRecord(await fetchJson(context, searchUrl));
375
+ const ids = asArray(asRecord(searchPayload.esearchresult).idlist)
376
+ .map(asString)
377
+ .filter((id) => Boolean(id));
378
+ if (ids.length === 0) {
379
+ return { source: "pubmed", endpoint: searchUrl, cards: [] };
380
+ }
381
+ const summaryUrl = endpoint("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi", {
382
+ db: "pubmed",
383
+ retmode: "json",
384
+ id: ids.join(","),
385
+ api_key: context.env.NCBI_API_KEY
386
+ });
387
+ const summaryPayload = asRecord(await fetchJson(context, summaryUrl));
388
+ const result = asRecord(summaryPayload.result);
389
+ const cards = ids.map((id) => {
390
+ const record = asRecord(result[id]);
391
+ const authors = asArray(record.authors)
392
+ .map((author) => asString(asRecord(author).name))
393
+ .filter((author) => Boolean(author));
394
+ const pubdate = asString(record.pubdate);
395
+ return baseCard({
396
+ source: "pubmed",
397
+ title: asString(record.title),
398
+ authors,
399
+ year: pubdate ? Number(pubdate.slice(0, 4)) : undefined,
400
+ venue: asString(record.fulljournalname) ?? asString(record.source),
401
+ pmid: id,
402
+ url: `https://pubmed.ncbi.nlm.nih.gov/${id}/`,
403
+ sourceRecordId: id
404
+ });
405
+ });
406
+ return { source: "pubmed", endpoint: summaryUrl, cards };
407
+ }
408
+ async function searchEric(request, context) {
409
+ const url = endpoint("https://api.ies.ed.gov/eric/", {
410
+ search: queryForSource(request.intent),
411
+ format: "json",
412
+ rows: request.limit
413
+ });
414
+ const payload = asRecord(await fetchJson(context, url));
415
+ const docs = asArray(asRecord(payload.response).docs);
416
+ const cards = docs.map((item) => {
417
+ const record = asRecord(item);
418
+ const id = asString(record.id) ?? asString(record.ericNumber);
419
+ return baseCard({
420
+ source: "eric",
421
+ title: asString(record.title),
422
+ authors: asArray(record.author).map(asString).filter((author) => Boolean(author)),
423
+ year: asNumber(record.publicationdateyear) ?? asNumber(record.year),
424
+ venue: asString(record.source),
425
+ ericId: id,
426
+ url: id ? `https://eric.ed.gov/?id=${id}` : undefined,
427
+ sourceRecordId: id,
428
+ abstract: asString(record.description) ?? asString(record.abstract)
429
+ });
430
+ });
431
+ return { source: "eric", endpoint: url, cards };
432
+ }
433
+ async function searchDoaj(request, context) {
434
+ const url = endpoint(`https://doaj.org/api/v4/research-search/articles/${encodeURIComponent(queryForSource(request.intent))}`, {
435
+ page: 1,
436
+ pageSize: request.limit
437
+ });
438
+ const payload = asRecord(await fetchJson(context, url));
439
+ const cards = asArray(payload.results).map((item) => {
440
+ const record = asRecord(item);
441
+ const bibjson = asRecord(record.bibjson);
442
+ const journal = asRecord(bibjson.journal);
443
+ const identifiers = asArray(bibjson.identifier).map(asRecord);
444
+ const doi = identifiers.find((identifier) => asString(identifier.type)?.toLowerCase() === "doi");
445
+ const links = asArray(bibjson.link).map(asRecord);
446
+ const fullText = links.find((link) => asString(link.type)?.toLowerCase().includes("fulltext"));
447
+ return baseCard({
448
+ source: "doaj",
449
+ title: asString(bibjson.title),
450
+ authors: asArray(bibjson.author).map((author) => asString(asRecord(author).name)).filter((author) => Boolean(author)),
451
+ year: asNumber(bibjson.year),
452
+ venue: asString(journal.title),
453
+ doi: asString(doi?.id),
454
+ url: asString(record.id) ? `https://doaj.org/article/${record.id}` : undefined,
455
+ sourceRecordId: asString(record.id),
456
+ abstract: asString(bibjson.abstract),
457
+ legalFullTextAvailable: Boolean(fullText),
458
+ fullTextUrl: asString(fullText?.url)
459
+ });
460
+ });
461
+ return { source: "doaj", endpoint: url, cards };
462
+ }
463
+ async function searchUnpaywall(request, context) {
464
+ const url = endpoint("https://api.unpaywall.org/v2/research-search/", {
465
+ query: queryForSource(request.intent),
466
+ email: context.env.LONGTABLE_CONTACT_EMAIL
467
+ });
468
+ const payload = asRecord(await fetchJson(context, url));
469
+ const cards = asArray(payload.results).map((item) => {
470
+ const result = asRecord(item);
471
+ const response = asRecord(result.response);
472
+ const best = asRecord(response.best_oa_location);
473
+ return baseCard({
474
+ source: "unpaywall",
475
+ title: asString(response.title),
476
+ authors: [],
477
+ year: asNumber(response.year),
478
+ venue: asString(response.journal_name),
479
+ doi: asString(response.doi),
480
+ url: asString(response.doi_url),
481
+ sourceRecordId: asString(response.doi),
482
+ legalFullTextAvailable: Boolean(asString(best.url)),
483
+ fullTextUrl: asString(best.url)
484
+ });
485
+ });
486
+ return { source: "unpaywall", endpoint: url, cards };
487
+ }
488
+ export async function runSourceSearch(request, context) {
489
+ switch (request.source) {
490
+ case "crossref":
491
+ return searchCrossref(request, context);
492
+ case "arxiv":
493
+ return searchArxiv(request, context);
494
+ case "openalex":
495
+ return searchOpenAlex(request, context);
496
+ case "semantic_scholar":
497
+ return searchSemanticScholar(request, context);
498
+ case "pubmed":
499
+ return searchPubMed(request, context);
500
+ case "eric":
501
+ return searchEric(request, context);
502
+ case "doaj":
503
+ return searchDoaj(request, context);
504
+ case "unpaywall":
505
+ return searchUnpaywall(request, context);
506
+ }
507
+ }
508
+ export function allSearchSources() {
509
+ return [...SEARCH_SOURCES];
510
+ }
@@ -0,0 +1,123 @@
1
+ export declare const SEARCH_SOURCES: readonly ["crossref", "arxiv", "openalex", "semantic_scholar", "pubmed", "eric", "doaj", "unpaywall"];
2
+ export type SearchSource = typeof SEARCH_SOURCES[number];
3
+ export type ResearchSearchIntentKind = "literature" | "theory" | "measurement" | "citation" | "metadata" | "venue";
4
+ export type EvidenceDepth = "metadata_only" | "abstract_only" | "legal_full_text_available" | "legal_full_text_unavailable" | "secondary_summary_only";
5
+ export type CitationSupportStatus = "direct_support" | "indirect_support" | "background" | "questionable_fit" | "not_verified";
6
+ export type SourceRunStatus = "completed" | "skipped" | "failed";
7
+ export type EvidenceRunStatus = "completed" | "partial" | "blocked";
8
+ export interface ResearchSearchIntent {
9
+ id: string;
10
+ createdAt: string;
11
+ kind: ResearchSearchIntentKind;
12
+ query: string;
13
+ normalizedQuery: string;
14
+ queryVariants: string[];
15
+ keywords: string[];
16
+ field?: string;
17
+ mustTerms: string[];
18
+ excludeTerms: string[];
19
+ requestedSources: SearchSource[];
20
+ limit: number;
21
+ source: "cli" | "runtime" | "test";
22
+ }
23
+ export interface EvidenceCard {
24
+ id: string;
25
+ title: string;
26
+ authors: string[];
27
+ year?: number;
28
+ venue?: string;
29
+ doi?: string;
30
+ pmid?: string;
31
+ arxivId?: string;
32
+ openAlexId?: string;
33
+ semanticScholarId?: string;
34
+ ericId?: string;
35
+ url?: string;
36
+ sourceRoute: SearchSource;
37
+ sourceRoutes: SearchSource[];
38
+ sourceRecordId?: string;
39
+ abstract?: string;
40
+ abstractAvailable: boolean;
41
+ evidenceDepth: EvidenceDepth;
42
+ legalFullTextAvailable: boolean;
43
+ fullTextUrl?: string;
44
+ citationCount?: number;
45
+ researchDesign?: string;
46
+ constructsOrMeasures?: string[];
47
+ mainFinding?: string;
48
+ relevanceToProject?: string;
49
+ citationSupportStatus: CitationSupportStatus;
50
+ limitations: string[];
51
+ matchedKeywords: string[];
52
+ relevanceScore: number;
53
+ }
54
+ export interface SearchSourceCapability {
55
+ source: SearchSource;
56
+ enabled: boolean;
57
+ requiredEnv: string[];
58
+ missingEnv: string[];
59
+ reason?: string;
60
+ setupHint?: string;
61
+ }
62
+ export interface SourceReport {
63
+ source: SearchSource;
64
+ status: SourceRunStatus;
65
+ count: number;
66
+ elapsedMs: number;
67
+ reason?: string;
68
+ endpoint?: string;
69
+ }
70
+ export interface EvidenceRun {
71
+ id: string;
72
+ createdAt: string;
73
+ updatedAt: string;
74
+ status: EvidenceRunStatus;
75
+ intent: ResearchSearchIntent;
76
+ sourceReports: SourceReport[];
77
+ cards: EvidenceCard[];
78
+ skippedSources: SearchSourceCapability[];
79
+ warnings: string[];
80
+ blockedReason?: string;
81
+ }
82
+ export interface BuildSearchIntentInput {
83
+ query?: string;
84
+ prompt?: string;
85
+ projectGoal?: string;
86
+ projectBlocker?: string;
87
+ intent?: string;
88
+ field?: string;
89
+ must?: string;
90
+ exclude?: string;
91
+ sources?: string;
92
+ limit?: number;
93
+ source?: ResearchSearchIntent["source"];
94
+ }
95
+ export interface HttpResponseLike {
96
+ ok: boolean;
97
+ status: number;
98
+ statusText: string;
99
+ text(): Promise<string>;
100
+ json(): Promise<unknown>;
101
+ }
102
+ export type SearchFetch = (url: string, init?: {
103
+ headers?: Record<string, string>;
104
+ }) => Promise<HttpResponseLike>;
105
+ export interface SourceSearchContext {
106
+ fetch: SearchFetch;
107
+ env: Record<string, string | undefined>;
108
+ }
109
+ export interface SourceSearchRequest {
110
+ intent: ResearchSearchIntent;
111
+ source: SearchSource;
112
+ limit: number;
113
+ }
114
+ export interface SourceSearchResult {
115
+ source: SearchSource;
116
+ endpoint: string;
117
+ cards: EvidenceCard[];
118
+ }
119
+ export interface RunResearchSearchInput extends BuildSearchIntentInput {
120
+ env?: Record<string, string | undefined>;
121
+ fetch?: SearchFetch;
122
+ allowPartial?: boolean;
123
+ }
package/dist/types.js ADDED
@@ -0,0 +1,10 @@
1
+ export const SEARCH_SOURCES = [
2
+ "crossref",
3
+ "arxiv",
4
+ "openalex",
5
+ "semantic_scholar",
6
+ "pubmed",
7
+ "eric",
8
+ "doaj",
9
+ "unpaywall"
10
+ ];
package/package.json ADDED
@@ -0,0 +1,46 @@
1
+ {
2
+ "name": "@longtable/research-search",
3
+ "version": "0.1.28",
4
+ "private": false,
5
+ "description": "Scholar-first research search and evidence card workflow for LongTable",
6
+ "type": "module",
7
+ "main": "./dist/index.js",
8
+ "types": "./dist/index.d.ts",
9
+ "exports": {
10
+ ".": {
11
+ "types": "./dist/index.d.ts",
12
+ "import": "./dist/index.js"
13
+ }
14
+ },
15
+ "files": [
16
+ "dist",
17
+ "README.md"
18
+ ],
19
+ "scripts": {
20
+ "build": "tsc -p tsconfig.json",
21
+ "typecheck": "tsc -p tsconfig.json --noEmit"
22
+ },
23
+ "devDependencies": {
24
+ "typescript": "^5.6.0"
25
+ },
26
+ "keywords": [
27
+ "longtable",
28
+ "research",
29
+ "search",
30
+ "evidence",
31
+ "scholarly"
32
+ ],
33
+ "author": "Hosung You",
34
+ "license": "MIT",
35
+ "repository": {
36
+ "type": "git",
37
+ "url": "git+https://github.com/HosungYou/LongTable.git"
38
+ },
39
+ "homepage": "https://github.com/HosungYou/LongTable#readme",
40
+ "publishConfig": {
41
+ "access": "public"
42
+ },
43
+ "engines": {
44
+ "node": ">=18.0.0"
45
+ }
46
+ }