@longtable/scholar-research 0.1.60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,7 @@
1
+ # @longtable/scholar-research
2
+
3
+ LongTable scholarly evidence recovery and citation-slot research support.
4
+
5
+ This package exposes the legal scholarly search adapters, connector readiness,
6
+ smoke fixtures, and `.longtable/research-runs/<run-id>/` scaffold used by the
7
+ `scholar-research` skill.
@@ -0,0 +1,7 @@
1
+ export * from "./types.js";
2
+ export * from "./query.js";
3
+ export * from "./sources.js";
4
+ export * from "./rank.js";
5
+ export * from "./run.js";
6
+ export * from "./publisher-access.js";
7
+ export * from "./protocol.js";
package/dist/index.js ADDED
@@ -0,0 +1,7 @@
1
+ export * from "./types.js";
2
+ export * from "./query.js";
3
+ export * from "./sources.js";
4
+ export * from "./rank.js";
5
+ export * from "./run.js";
6
+ export * from "./publisher-access.js";
7
+ export * from "./protocol.js";
@@ -0,0 +1,56 @@
1
+ export declare const SCHOLAR_RESEARCH_SKILL_NAME = "scholar-research";
2
+ export declare const SCHOLAR_RESEARCH_FAILURE_REASONS: readonly ["not_found", "no_full_text", "restricted_access", "robots_or_terms_blocked", "ambiguous_match", "download_failed", "parse_failed", "weak_evidence"];
3
+ export type ScholarResearchFailureReason = typeof SCHOLAR_RESEARCH_FAILURE_REASONS[number];
4
+ export declare const SCHOLAR_RESEARCH_SLOT_STATUSES: readonly ["filled", "provisional", "unfilled", "blocked"];
5
+ export type ScholarResearchSlotStatus = typeof SCHOLAR_RESEARCH_SLOT_STATUSES[number];
6
+ export declare const SCHOLAR_RESEARCH_SMOKE_CATEGORIES: readonly ["oa_pdf", "publisher_landing", "preprint_and_published", "restricted_fallback", "korean_institutional_report"];
7
+ export type ScholarResearchSmokeCategory = typeof SCHOLAR_RESEARCH_SMOKE_CATEGORIES[number];
8
+ export type ScholarResearchConnectorStatus = "ready" | "optional" | "missing";
9
+ export interface ScholarResearchConnectorReadiness {
10
+ readonly name: string;
11
+ readonly status: ScholarResearchConnectorStatus;
12
+ readonly requiredEnv: readonly string[];
13
+ readonly missingEnv: readonly string[];
14
+ readonly purpose: string;
15
+ }
16
+ export interface ScholarResearchSafetyStatus {
17
+ readonly paywallBypassAllowed: false;
18
+ readonly institutionLoginAutomationAllowed: false;
19
+ readonly cookieReuseAllowed: false;
20
+ readonly robotsOrTermsBypassAllowed: false;
21
+ }
22
+ export interface ScholarResearchReadiness {
23
+ readonly skillName: typeof SCHOLAR_RESEARCH_SKILL_NAME;
24
+ readonly connectors: readonly ScholarResearchConnectorReadiness[];
25
+ readonly safety: ScholarResearchSafetyStatus;
26
+ readonly fallbackLedgerRequired: true;
27
+ readonly citationSlotFilledRequiresFullTextQuote: true;
28
+ }
29
+ export interface ScholarResearchSmokeItem {
30
+ readonly id: string;
31
+ readonly category: ScholarResearchSmokeCategory;
32
+ readonly label: string;
33
+ readonly expectedBehavior: string;
34
+ }
35
+ export interface ScholarResearchRunScaffoldInput {
36
+ readonly cwd: string;
37
+ readonly runId?: string;
38
+ readonly createdAt?: string;
39
+ }
40
+ export interface ScholarResearchRunScaffold {
41
+ readonly runId: string;
42
+ readonly runDir: string;
43
+ readonly files: {
44
+ readonly journal: string;
45
+ readonly expansionLog: string;
46
+ readonly claimLedger: string;
47
+ readonly evidenceLedger: string;
48
+ readonly fallbackLedger: string;
49
+ readonly citationSlotMatrix: string;
50
+ readonly manifest: string;
51
+ };
52
+ }
53
+ export declare function assessScholarResearchReadiness(env?: Record<string, string | undefined>): ScholarResearchReadiness;
54
+ export declare function buildScholarResearchSmokeFixture(): readonly ScholarResearchSmokeItem[];
55
+ export declare function buildScholarResearchRunScaffold(input: ScholarResearchRunScaffoldInput): ScholarResearchRunScaffold;
56
+ export declare function writeScholarResearchRunScaffold(input: ScholarResearchRunScaffoldInput): Promise<ScholarResearchRunScaffold>;
@@ -0,0 +1,159 @@
1
+ import { mkdir, writeFile } from "node:fs/promises";
2
+ import { join, resolve } from "node:path";
3
+ export const SCHOLAR_RESEARCH_SKILL_NAME = "scholar-research";
4
+ export const SCHOLAR_RESEARCH_FAILURE_REASONS = [
5
+ "not_found",
6
+ "no_full_text",
7
+ "restricted_access",
8
+ "robots_or_terms_blocked",
9
+ "ambiguous_match",
10
+ "download_failed",
11
+ "parse_failed",
12
+ "weak_evidence"
13
+ ];
14
+ export const SCHOLAR_RESEARCH_SLOT_STATUSES = [
15
+ "filled",
16
+ "provisional",
17
+ "unfilled",
18
+ "blocked"
19
+ ];
20
+ export const SCHOLAR_RESEARCH_SMOKE_CATEGORIES = [
21
+ "oa_pdf",
22
+ "publisher_landing",
23
+ "preprint_and_published",
24
+ "restricted_fallback",
25
+ "korean_institutional_report"
26
+ ];
27
+ const CONNECTORS = [
28
+ {
29
+ name: "Crossref",
30
+ requiredEnv: [],
31
+ purpose: "DOI and publisher metadata resolution."
32
+ },
33
+ {
34
+ name: "OpenAlex",
35
+ requiredEnv: [],
36
+ purpose: "Open scholarly metadata and citation graph lookup; OPENALEX_API_KEY remains optional."
37
+ },
38
+ {
39
+ name: "Semantic Scholar",
40
+ requiredEnv: [],
41
+ purpose: "Paper metadata, abstracts, citation counts, and open PDF hints."
42
+ },
43
+ {
44
+ name: "Unpaywall",
45
+ requiredEnv: ["LONGTABLE_CONTACT_EMAIL"],
46
+ purpose: "Open-access location discovery for DOI seeds."
47
+ },
48
+ {
49
+ name: "arXiv",
50
+ requiredEnv: [],
51
+ purpose: "Preprint metadata and open PDF route discovery."
52
+ },
53
+ {
54
+ name: "PubMed/PMC",
55
+ requiredEnv: [],
56
+ purpose: "Biomedical metadata and PubMed Central open full text discovery."
57
+ },
58
+ {
59
+ name: "CORE",
60
+ requiredEnv: ["CORE_API_KEY"],
61
+ purpose: "Repository sweep for legal full text and institutional copies."
62
+ },
63
+ {
64
+ name: "DOAJ",
65
+ requiredEnv: [],
66
+ purpose: "Open-access journal metadata and full-text links."
67
+ },
68
+ {
69
+ name: "Local PDF folder/manual upload",
70
+ requiredEnv: [],
71
+ purpose: "Researcher-provided files when the researcher has legitimate access."
72
+ }
73
+ ];
74
+ const SAFETY_STATUS = {
75
+ paywallBypassAllowed: false,
76
+ institutionLoginAutomationAllowed: false,
77
+ cookieReuseAllowed: false,
78
+ robotsOrTermsBypassAllowed: false
79
+ };
80
+ function hasEnv(env, key) {
81
+ return Boolean(env[key]?.trim());
82
+ }
83
+ function connectorStatus(requirement, env) {
84
+ const missingEnv = requirement.requiredEnv.filter((key) => !hasEnv(env, key));
85
+ return {
86
+ name: requirement.name,
87
+ status: missingEnv.length === 0 ? "ready" : "missing",
88
+ requiredEnv: requirement.requiredEnv,
89
+ missingEnv,
90
+ purpose: requirement.purpose
91
+ };
92
+ }
93
+ export function assessScholarResearchReadiness(env = process.env) {
94
+ return {
95
+ skillName: SCHOLAR_RESEARCH_SKILL_NAME,
96
+ connectors: CONNECTORS.map((connector) => connectorStatus(connector, env)),
97
+ safety: SAFETY_STATUS,
98
+ fallbackLedgerRequired: true,
99
+ citationSlotFilledRequiresFullTextQuote: true
100
+ };
101
+ }
102
+ export function buildScholarResearchSmokeFixture() {
103
+ return [
104
+ { id: "oa-1", category: "oa_pdf", label: "Known open-access PDF seed", expectedBehavior: "Recover legal PDF/full text and mark citation slots filled only after quote extraction." },
105
+ { id: "oa-2", category: "oa_pdf", label: "Open repository PDF seed", expectedBehavior: "Prefer repository PDF when publisher full text is not needed." },
106
+ { id: "oa-3", category: "oa_pdf", label: "DOAJ full-text seed", expectedBehavior: "Resolve journal metadata and full-text route." },
107
+ { id: "landing-1", category: "publisher_landing", label: "DOI landing page seed", expectedBehavior: "Resolve metadata and record landing-page-only fallback until full text is legal." },
108
+ { id: "landing-2", category: "publisher_landing", label: "Publisher metadata seed", expectedBehavior: "Avoid claiming filled citation support from metadata alone." },
109
+ { id: "preprint-1", category: "preprint_and_published", label: "arXiv plus published version", expectedBehavior: "Link preprint and version-of-record without treating them as identical evidence." },
110
+ { id: "preprint-2", category: "preprint_and_published", label: "Repository manuscript plus DOI", expectedBehavior: "Record both routes and choose the evidence source explicitly." },
111
+ { id: "restricted-1", category: "restricted_fallback", label: "Restricted publisher article", expectedBehavior: "Create fallback ledger and Researcher Checkpoint instead of bypassing access control." },
112
+ { id: "restricted-2", category: "restricted_fallback", label: "Login-required full text", expectedBehavior: "Request manual upload only when the researcher has legitimate access." },
113
+ { id: "kr-report-1", category: "korean_institutional_report", label: "Korean institutional PDF/report", expectedBehavior: "Recover legal report PDF and preserve Korean metadata." }
114
+ ];
115
+ }
116
+ function generatedRunId(createdAt) {
117
+ return `scholar-${createdAt.replace(/[^0-9]/g, "").slice(0, 14)}`;
118
+ }
119
+ function normalizeRunId(value) {
120
+ return value
121
+ .trim()
122
+ .replace(/[^\w.-]+/g, "-")
123
+ .replace(/-+/g, "-")
124
+ .replace(/^-+|-+$/g, "") || "scholar-run";
125
+ }
126
+ export function buildScholarResearchRunScaffold(input) {
127
+ const createdAt = input.createdAt ?? new Date().toISOString();
128
+ const runId = normalizeRunId(input.runId ?? generatedRunId(createdAt));
129
+ const runDir = resolve(input.cwd, ".longtable", "research-runs", runId);
130
+ return {
131
+ runId,
132
+ runDir,
133
+ files: {
134
+ journal: join(runDir, "journal.md"),
135
+ expansionLog: join(runDir, "expansion-log.md"),
136
+ claimLedger: join(runDir, "claim-ledger.md"),
137
+ evidenceLedger: join(runDir, "evidence-ledger.md"),
138
+ fallbackLedger: join(runDir, "fallback-ledger.md"),
139
+ citationSlotMatrix: join(runDir, "citation-slot-matrix.md"),
140
+ manifest: join(runDir, "sources", "manifest.jsonl")
141
+ }
142
+ };
143
+ }
144
+ function markdownFile(title, createdAt, lines = []) {
145
+ return [`# ${title}`, "", `Created: ${createdAt}`, "", ...lines, ""].join("\n");
146
+ }
147
+ export async function writeScholarResearchRunScaffold(input) {
148
+ const createdAt = input.createdAt ?? new Date().toISOString();
149
+ const scaffold = buildScholarResearchRunScaffold({ ...input, createdAt });
150
+ await mkdir(join(scaffold.runDir, "sources"), { recursive: true });
151
+ await writeFile(scaffold.files.journal, markdownFile("Scholar Research Journal", createdAt), "utf8");
152
+ await writeFile(scaffold.files.expansionLog, markdownFile("Expansion Log", createdAt), "utf8");
153
+ await writeFile(scaffold.files.claimLedger, markdownFile("Claim Ledger", createdAt, ["| claim | risk | sources | status |", "| --- | --- | --- | --- |"]), "utf8");
154
+ await writeFile(scaffold.files.evidenceLedger, markdownFile("Evidence Ledger", createdAt, ["| source | slot | status | note |", "| --- | --- | --- | --- |"]), "utf8");
155
+ await writeFile(scaffold.files.fallbackLedger, markdownFile("Fallback Ledger", createdAt, ["| source | reason | fallback | checkpoint |", "| --- | --- | --- | --- |"]), "utf8");
156
+ await writeFile(scaffold.files.citationSlotMatrix, markdownFile("Citation Slot Matrix", createdAt, ["| slot | claim | status | quote/claim | source |", "| --- | --- | --- | --- | --- |"]), "utf8");
157
+ await writeFile(scaffold.files.manifest, "", "utf8");
158
+ return scaffold;
159
+ }
@@ -0,0 +1,21 @@
1
+ import { type CrossrefTdmDiscovery, type EvidenceCard, type Publisher, type PublisherAccessRecord, type PublisherProbeInput, type PublisherProbeTarget, type SearchFetch } from "./types.js";
2
+ interface PublisherConfig {
3
+ publisher: Publisher;
4
+ label: string;
5
+ requiredEnv: string[];
6
+ optionalEnv: string[];
7
+ setupHint: string;
8
+ }
9
+ export declare function normalizeDoi(value: string): string;
10
+ export declare function parsePublisherTarget(value?: string | boolean): PublisherProbeTarget;
11
+ export declare function discoverCrossrefTdm(doi: string, env?: Record<string, string | undefined>, httpFetch?: SearchFetch): Promise<CrossrefTdmDiscovery>;
12
+ export declare function publisherConfigs(): PublisherConfig[];
13
+ export declare function probePublisherAccess(input: PublisherProbeInput): Promise<PublisherAccessRecord>;
14
+ export declare function summarizeConfiguredPublisherAccess(env?: Record<string, string | undefined>): PublisherAccessRecord[];
15
+ export declare function enrichCardsWithPublisherAccess(input: {
16
+ cards: EvidenceCard[];
17
+ env?: Record<string, string | undefined>;
18
+ fetch?: SearchFetch;
19
+ limit?: number;
20
+ }): Promise<EvidenceCard[]>;
21
+ export {};