@ahkohd/yagami 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.beads/.beads-credential-key +1 -0
- package/.beads/README.md +81 -0
- package/.beads/config.yaml +54 -0
- package/.beads/hooks/post-checkout +24 -0
- package/.beads/hooks/post-merge +24 -0
- package/.beads/hooks/pre-commit +24 -0
- package/.beads/hooks/pre-push +24 -0
- package/.beads/hooks/prepare-commit-msg +24 -0
- package/.beads/metadata.json +7 -0
- package/.github/workflows/ci.yml +43 -0
- package/.github/workflows/release.yml +115 -0
- package/AGENTS.md +150 -0
- package/README.md +210 -0
- package/biome.json +36 -0
- package/config/mcporter.json +8 -0
- package/dist/cli/theme.js +202 -0
- package/dist/cli/theme.js.map +1 -0
- package/dist/cli.js +1883 -0
- package/dist/cli.js.map +1 -0
- package/dist/config.js +223 -0
- package/dist/config.js.map +1 -0
- package/dist/daemon.js +745 -0
- package/dist/daemon.js.map +1 -0
- package/dist/engine/constants.js +131 -0
- package/dist/engine/constants.js.map +1 -0
- package/dist/engine/deep-research.js +167 -0
- package/dist/engine/deep-research.js.map +1 -0
- package/dist/engine/defuddle-utils.js +57 -0
- package/dist/engine/defuddle-utils.js.map +1 -0
- package/dist/engine/github-fetch.js +232 -0
- package/dist/engine/github-fetch.js.map +1 -0
- package/dist/engine/helpers.js +372 -0
- package/dist/engine/helpers.js.map +1 -0
- package/dist/engine/limiter.js +75 -0
- package/dist/engine/limiter.js.map +1 -0
- package/dist/engine/policy.js +313 -0
- package/dist/engine/policy.js.map +1 -0
- package/dist/engine/runtime-utils.js +65 -0
- package/dist/engine/runtime-utils.js.map +1 -0
- package/dist/engine/search-discovery.js +275 -0
- package/dist/engine/search-discovery.js.map +1 -0
- package/dist/engine/url-utils.js +72 -0
- package/dist/engine/url-utils.js.map +1 -0
- package/dist/engine.js +2030 -0
- package/dist/engine.js.map +1 -0
- package/dist/mcp.js +282 -0
- package/dist/mcp.js.map +1 -0
- package/dist/types/cli.js +2 -0
- package/dist/types/cli.js.map +1 -0
- package/dist/types/config.js +2 -0
- package/dist/types/config.js.map +1 -0
- package/dist/types/daemon.js +2 -0
- package/dist/types/daemon.js.map +1 -0
- package/dist/types/engine.js +2 -0
- package/dist/types/engine.js.map +1 -0
- package/package.json +66 -0
- package/packages/pi-yagami-search/README.md +39 -0
- package/packages/pi-yagami-search/extensions/yagami-search.ts +273 -0
- package/packages/pi-yagami-search/package.json +41 -0
- package/src/cli/theme.ts +260 -0
- package/src/cli.ts +2226 -0
- package/src/config.ts +250 -0
- package/src/daemon.ts +990 -0
- package/src/engine/constants.ts +147 -0
- package/src/engine/deep-research.ts +207 -0
- package/src/engine/defuddle-utils.ts +75 -0
- package/src/engine/github-fetch.ts +265 -0
- package/src/engine/helpers.ts +394 -0
- package/src/engine/limiter.ts +97 -0
- package/src/engine/policy.ts +392 -0
- package/src/engine/runtime-utils.ts +79 -0
- package/src/engine/search-discovery.ts +351 -0
- package/src/engine/url-utils.ts +86 -0
- package/src/engine.ts +2516 -0
- package/src/mcp.ts +337 -0
- package/src/shims-cli.d.ts +3 -0
- package/src/types/cli.ts +7 -0
- package/src/types/config.ts +53 -0
- package/src/types/daemon.ts +22 -0
- package/src/types/engine.ts +194 -0
- package/tsconfig.json +18 -0
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
import type { CountryProfile, DeepEffort } from "../types/engine.js";
|
|
2
|
+
|
|
3
|
+
export const URL_REGEX = /https?:\/\/[^\s)\]]+/g;
|
|
4
|
+
|
|
5
|
+
export const DEEP_EFFORT_LEVELS: ReadonlySet<DeepEffort> = new Set(["fast", "balanced", "thorough"]);
|
|
6
|
+
|
|
7
|
+
export const CODE_PREFERRED_DOMAINS = [
|
|
8
|
+
"github.com",
|
|
9
|
+
"stackoverflow.com",
|
|
10
|
+
"developer.mozilla.org",
|
|
11
|
+
"docs.python.org",
|
|
12
|
+
"npmjs.com",
|
|
13
|
+
] as const;
|
|
14
|
+
|
|
15
|
+
export const COMPANY_PREFERRED_DOMAINS = [
|
|
16
|
+
"crunchbase.com",
|
|
17
|
+
"pitchbook.com",
|
|
18
|
+
"dnb.com",
|
|
19
|
+
"dnb.co.uk",
|
|
20
|
+
"opencorporates.com",
|
|
21
|
+
"find-and-update.company-information.service.gov.uk",
|
|
22
|
+
"endole.co.uk",
|
|
23
|
+
"companycheck.co.uk",
|
|
24
|
+
"northdata.com",
|
|
25
|
+
"sec.gov",
|
|
26
|
+
"linkedin.com",
|
|
27
|
+
"abr.business.gov.au",
|
|
28
|
+
"companies-register.companiesoffice.govt.nz",
|
|
29
|
+
"core.cro.ie",
|
|
30
|
+
"cro.ie",
|
|
31
|
+
"ised-isde.canada.ca",
|
|
32
|
+
"houjin-bangou.nta.go.jp",
|
|
33
|
+
"sirene.fr",
|
|
34
|
+
"kvk.nl",
|
|
35
|
+
"bizfile.gov.sg",
|
|
36
|
+
"handelsregister.de",
|
|
37
|
+
"unternehmensregister.de",
|
|
38
|
+
] as const;
|
|
39
|
+
|
|
40
|
+
export const COMPANY_COUNTRY_ALIASES: Readonly<Record<string, string>> = {
|
|
41
|
+
uk: "uk",
|
|
42
|
+
gb: "uk",
|
|
43
|
+
"united-kingdom": "uk",
|
|
44
|
+
"great-britain": "uk",
|
|
45
|
+
|
|
46
|
+
us: "us",
|
|
47
|
+
usa: "us",
|
|
48
|
+
"united-states": "us",
|
|
49
|
+
|
|
50
|
+
au: "au",
|
|
51
|
+
australia: "au",
|
|
52
|
+
|
|
53
|
+
nz: "nz",
|
|
54
|
+
"new-zealand": "nz",
|
|
55
|
+
|
|
56
|
+
ie: "ie",
|
|
57
|
+
ireland: "ie",
|
|
58
|
+
|
|
59
|
+
ca: "ca",
|
|
60
|
+
canada: "ca",
|
|
61
|
+
|
|
62
|
+
fr: "fr",
|
|
63
|
+
france: "fr",
|
|
64
|
+
|
|
65
|
+
de: "de",
|
|
66
|
+
germany: "de",
|
|
67
|
+
|
|
68
|
+
nl: "nl",
|
|
69
|
+
netherlands: "nl",
|
|
70
|
+
|
|
71
|
+
sg: "sg",
|
|
72
|
+
singapore: "sg",
|
|
73
|
+
|
|
74
|
+
jp: "jp",
|
|
75
|
+
japan: "jp",
|
|
76
|
+
};
|
|
77
|
+
|
|
78
|
+
export const COMPANY_COUNTRY_PROFILES: Readonly<Record<string, CountryProfile>> = {
|
|
79
|
+
uk: {
|
|
80
|
+
label: "United Kingdom",
|
|
81
|
+
domains: [
|
|
82
|
+
"find-and-update.company-information.service.gov.uk",
|
|
83
|
+
"opencorporates.com",
|
|
84
|
+
"endole.co.uk",
|
|
85
|
+
"companycheck.co.uk",
|
|
86
|
+
],
|
|
87
|
+
seedUrls: (query: string) => [
|
|
88
|
+
`https://find-and-update.company-information.service.gov.uk/search?q=${encodeURIComponent(query)}`,
|
|
89
|
+
],
|
|
90
|
+
},
|
|
91
|
+
us: {
|
|
92
|
+
label: "United States",
|
|
93
|
+
domains: ["sec.gov", "opencorporates.com"],
|
|
94
|
+
seedUrls: (query: string) => [`https://www.sec.gov/edgar/search/#/q=${encodeURIComponent(query)}`],
|
|
95
|
+
},
|
|
96
|
+
au: {
|
|
97
|
+
label: "Australia",
|
|
98
|
+
domains: ["abr.business.gov.au", "abn.business.gov.au", "opencorporates.com"],
|
|
99
|
+
seedUrls: (query: string) => [`https://abr.business.gov.au/Search/Index?SearchText=${encodeURIComponent(query)}`],
|
|
100
|
+
},
|
|
101
|
+
nz: {
|
|
102
|
+
label: "New Zealand",
|
|
103
|
+
domains: ["companies-register.companiesoffice.govt.nz", "opencorporates.com"],
|
|
104
|
+
seedUrls: () => ["https://companies-register.companiesoffice.govt.nz/search/"],
|
|
105
|
+
},
|
|
106
|
+
ie: {
|
|
107
|
+
label: "Ireland",
|
|
108
|
+
domains: ["core.cro.ie", "cro.ie", "opencorporates.com"],
|
|
109
|
+
seedUrls: () => ["https://core.cro.ie/"],
|
|
110
|
+
},
|
|
111
|
+
ca: {
|
|
112
|
+
label: "Canada",
|
|
113
|
+
domains: ["ised-isde.canada.ca", "opencorporates.com"],
|
|
114
|
+
seedUrls: () => ["https://ised-isde.canada.ca/cc/lgcy/fdrlCrpSrch.html"],
|
|
115
|
+
},
|
|
116
|
+
fr: {
|
|
117
|
+
label: "France",
|
|
118
|
+
domains: ["sirene.fr", "opencorporates.com"],
|
|
119
|
+
seedUrls: (query: string) => [
|
|
120
|
+
"https://www.sirene.fr/sirene/public/static/recherche?sirene_locale=en",
|
|
121
|
+
`https://www.sirene.fr/sirene/public/recherche?nom=${encodeURIComponent(query)}`,
|
|
122
|
+
],
|
|
123
|
+
},
|
|
124
|
+
de: {
|
|
125
|
+
label: "Germany",
|
|
126
|
+
domains: ["handelsregister.de", "unternehmensregister.de", "northdata.com"],
|
|
127
|
+
seedUrls: () => [
|
|
128
|
+
"https://www.handelsregister.de/rp_web/normalesuche/welcome.xhtml",
|
|
129
|
+
"https://www.unternehmensregister.de/en",
|
|
130
|
+
],
|
|
131
|
+
},
|
|
132
|
+
nl: {
|
|
133
|
+
label: "Netherlands",
|
|
134
|
+
domains: ["kvk.nl", "opencorporates.com"],
|
|
135
|
+
seedUrls: () => ["https://www.kvk.nl/en/search/"],
|
|
136
|
+
},
|
|
137
|
+
sg: {
|
|
138
|
+
label: "Singapore",
|
|
139
|
+
domains: ["bizfile.gov.sg", "opencorporates.com"],
|
|
140
|
+
seedUrls: () => ["https://www.bizfile.gov.sg/"],
|
|
141
|
+
},
|
|
142
|
+
jp: {
|
|
143
|
+
label: "Japan",
|
|
144
|
+
domains: ["houjin-bangou.nta.go.jp", "opencorporates.com"],
|
|
145
|
+
seedUrls: () => ["https://www.houjin-bangou.nta.go.jp/en/index.html"],
|
|
146
|
+
},
|
|
147
|
+
};
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
import { randomUUID } from "node:crypto";
|
|
2
|
+
|
|
3
|
+
import { DEEP_EFFORT_LEVELS } from "./constants.js";
|
|
4
|
+
import { clampInteger, normalizeWhitespace } from "./helpers.js";
|
|
5
|
+
import { normalizeUniqueUrls } from "./url-utils.js";
|
|
6
|
+
import type {
|
|
7
|
+
DeepEffort,
|
|
8
|
+
DeepEffortProfile,
|
|
9
|
+
DeepResearchTaskRecord,
|
|
10
|
+
SearchResultEntry,
|
|
11
|
+
WebSearchLikeResult,
|
|
12
|
+
} from "../types/engine.js";
|
|
13
|
+
|
|
14
|
+
export function resolveDeepEffort(value: unknown): DeepEffort {
|
|
15
|
+
const requested = String(value ?? "balanced")
|
|
16
|
+
.trim()
|
|
17
|
+
.toLowerCase() as DeepEffort;
|
|
18
|
+
return DEEP_EFFORT_LEVELS.has(requested) ? requested : "balanced";
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export function getDeepEffortProfile(effort: DeepEffort): DeepEffortProfile {
|
|
22
|
+
if (effort === "thorough") {
|
|
23
|
+
return {
|
|
24
|
+
numResults: 24,
|
|
25
|
+
maxHops: 5,
|
|
26
|
+
refinementPasses: 3,
|
|
27
|
+
minPrimarySources: 8,
|
|
28
|
+
thinkingLevel: "high",
|
|
29
|
+
queryTimeoutMs: 8 * 60 * 1000,
|
|
30
|
+
textMaxCharacters: 5600,
|
|
31
|
+
contextMaxCharacters: 42000,
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
if (effort === "balanced") {
|
|
36
|
+
return {
|
|
37
|
+
numResults: 14,
|
|
38
|
+
maxHops: 3,
|
|
39
|
+
refinementPasses: 1,
|
|
40
|
+
minPrimarySources: 4,
|
|
41
|
+
thinkingLevel: "medium",
|
|
42
|
+
queryTimeoutMs: 5 * 60 * 1000,
|
|
43
|
+
textMaxCharacters: 4400,
|
|
44
|
+
contextMaxCharacters: 28000,
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
return {
|
|
49
|
+
numResults: 8,
|
|
50
|
+
maxHops: 2,
|
|
51
|
+
refinementPasses: 1,
|
|
52
|
+
minPrimarySources: 3,
|
|
53
|
+
thinkingLevel: "low",
|
|
54
|
+
queryTimeoutMs: 3 * 60 * 1000,
|
|
55
|
+
textMaxCharacters: 3400,
|
|
56
|
+
contextMaxCharacters: 18000,
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export function buildDeepCustomInstruction(effort: DeepEffort, profile: DeepEffortProfile): string {
|
|
61
|
+
const hint =
|
|
62
|
+
effort === "thorough"
|
|
63
|
+
? "Prioritize completeness and source triangulation over speed."
|
|
64
|
+
: effort === "balanced"
|
|
65
|
+
? "Balance thoroughness with speed."
|
|
66
|
+
: "Prioritize speed. Focus on the strongest sources.";
|
|
67
|
+
|
|
68
|
+
return `${hint} Target at least ${profile.minPrimarySources} primary sources (official docs, papers, first-party).`;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
export function buildDeepFollowUpPrompts(effort: DeepEffort, profile: DeepEffortProfile): string[] {
|
|
72
|
+
const prompts: string[] = [];
|
|
73
|
+
|
|
74
|
+
if (profile.refinementPasses >= 1) {
|
|
75
|
+
prompts.push(
|
|
76
|
+
[
|
|
77
|
+
"Run a gap-check pass now.",
|
|
78
|
+
"Identify the weakest-supported claims in your current draft and browse additional sources to strengthen or revise them.",
|
|
79
|
+
"Prioritize primary sources and contradiction checks.",
|
|
80
|
+
"Then provide a revised report with confidence notes and updated sources.",
|
|
81
|
+
].join(" "),
|
|
82
|
+
);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
if (profile.refinementPasses >= 2 || effort === "thorough") {
|
|
86
|
+
prompts.push(
|
|
87
|
+
[
|
|
88
|
+
"Run an adversarial verification pass.",
|
|
89
|
+
"Challenge your top conclusions, verify key dates/numbers against independent sources, and resolve remaining ambiguities where possible.",
|
|
90
|
+
"Revise any weakly supported claims and keep disagreements explicit.",
|
|
91
|
+
].join(" "),
|
|
92
|
+
);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
if (profile.refinementPasses >= 3) {
|
|
96
|
+
prompts.push(
|
|
97
|
+
[
|
|
98
|
+
"Run a final quality polish pass.",
|
|
99
|
+
"Re-read the full report for clarity and evidence traceability.",
|
|
100
|
+
"Ensure every major claim has supporting sources, confidence notes are explicit, and unresolved unknowns remain clearly marked.",
|
|
101
|
+
"Then produce the final report in the required deep structure.",
|
|
102
|
+
].join(" "),
|
|
103
|
+
);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
return prompts;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
export function createDeepResearchTask(instructions: string, effort: DeepEffort): DeepResearchTaskRecord {
|
|
110
|
+
return {
|
|
111
|
+
researchId: randomUUID(),
|
|
112
|
+
status: "pending",
|
|
113
|
+
instructions,
|
|
114
|
+
effort,
|
|
115
|
+
createdAt: new Date().toISOString(),
|
|
116
|
+
startedAt: null,
|
|
117
|
+
completedAt: null,
|
|
118
|
+
durationMs: null,
|
|
119
|
+
report: null,
|
|
120
|
+
citations: [],
|
|
121
|
+
error: null,
|
|
122
|
+
costDollars: 0,
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
export function evictOldDeepResearchTasks(tasks: Map<string, DeepResearchTaskRecord>, maxTasks = 100): void {
|
|
127
|
+
const cap = Math.max(1, clampInteger(maxTasks, 100, { min: 1, max: 1000 }));
|
|
128
|
+
while (tasks.size > cap) {
|
|
129
|
+
const oldest = tasks.keys().next().value;
|
|
130
|
+
if (!oldest) break;
|
|
131
|
+
tasks.delete(oldest);
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
export function composeDeepResearchReport(
|
|
136
|
+
instructions: string,
|
|
137
|
+
effort: DeepEffort,
|
|
138
|
+
searchResult: WebSearchLikeResult,
|
|
139
|
+
): string {
|
|
140
|
+
const successful = (searchResult.results ?? []).filter((entry) => !entry.error);
|
|
141
|
+
const failed = (searchResult.results ?? []).filter((entry) => entry.error);
|
|
142
|
+
|
|
143
|
+
const lines: string[] = [];
|
|
144
|
+
lines.push("# Deep Research Report");
|
|
145
|
+
lines.push("");
|
|
146
|
+
lines.push("## Prompt");
|
|
147
|
+
lines.push(instructions);
|
|
148
|
+
lines.push("");
|
|
149
|
+
lines.push("## Summary");
|
|
150
|
+
|
|
151
|
+
if (successful.length === 0) {
|
|
152
|
+
lines.push("No successful pages were extracted for this run.");
|
|
153
|
+
} else {
|
|
154
|
+
lines.push(
|
|
155
|
+
`Collected ${successful.length} source(s)${failed.length ? `, with ${failed.length} failed crawl(s)` : ""}.`,
|
|
156
|
+
);
|
|
157
|
+
const depthLabel = effort === "thorough" ? "high depth" : effort === "balanced" ? "standard depth" : "fast depth";
|
|
158
|
+
lines.push(`Depth profile: ${effort} (${depthLabel}).`);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
lines.push("");
|
|
162
|
+
lines.push("## Findings");
|
|
163
|
+
|
|
164
|
+
if (successful.length === 0) {
|
|
165
|
+
lines.push("- Unable to extract reliable findings from available pages.");
|
|
166
|
+
} else {
|
|
167
|
+
for (const entry of successful) {
|
|
168
|
+
const title = entry.title || entry.url;
|
|
169
|
+
const snippet = entry.snippet || "No snippet available.";
|
|
170
|
+
const contentPreview = normalizeWhitespace(entry.content || "").slice(0, 260);
|
|
171
|
+
|
|
172
|
+
lines.push(`- **${title}** (${entry.url})`);
|
|
173
|
+
lines.push(` - Snippet: ${snippet}`);
|
|
174
|
+
if (contentPreview) {
|
|
175
|
+
lines.push(` - Extracted: ${contentPreview}${contentPreview.length >= 260 ? "…" : ""}`);
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
if (failed.length > 0) {
|
|
181
|
+
lines.push("");
|
|
182
|
+
lines.push("## Failed Crawls");
|
|
183
|
+
for (const entry of failed) {
|
|
184
|
+
lines.push(`- ${entry.url}: ${entry.error}`);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
lines.push("");
|
|
189
|
+
lines.push("## Sources");
|
|
190
|
+
if (successful.length === 0) {
|
|
191
|
+
lines.push("- none");
|
|
192
|
+
} else {
|
|
193
|
+
for (const entry of successful) {
|
|
194
|
+
lines.push(`- ${entry.title || entry.url}: ${entry.url}`);
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
return lines.join("\n");
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
export function extractDeepResearchCitations(searchResult: WebSearchLikeResult): string[] {
|
|
202
|
+
const urls = (searchResult.results ?? [])
|
|
203
|
+
.filter((entry: SearchResultEntry) => !entry.error)
|
|
204
|
+
.map((entry: SearchResultEntry) => entry.url);
|
|
205
|
+
|
|
206
|
+
return normalizeUniqueUrls(urls);
|
|
207
|
+
}
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
type WarnFn = (...args: unknown[]) => void;
|
|
2
|
+
|
|
3
|
+
interface DefuddleWarnFilterState {
|
|
4
|
+
installed: boolean;
|
|
5
|
+
activeScopes: number;
|
|
6
|
+
suppressedCount: number;
|
|
7
|
+
originalWarn: WarnFn;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
const state: DefuddleWarnFilterState = {
|
|
11
|
+
installed: false,
|
|
12
|
+
activeScopes: 0,
|
|
13
|
+
suppressedCount: 0,
|
|
14
|
+
originalWarn: (...args: unknown[]) => {
|
|
15
|
+
console.warn(...args);
|
|
16
|
+
},
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
function hasInvalidUrlShape(value: unknown): value is { code?: unknown; input?: unknown } {
|
|
20
|
+
return Boolean(value) && typeof value === "object";
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function isDefuddleInvalidUrlWarning(args: unknown[]): boolean {
|
|
24
|
+
const head = String(args[0] ?? "")
|
|
25
|
+
.trim()
|
|
26
|
+
.toLowerCase();
|
|
27
|
+
|
|
28
|
+
if (!head.includes("failed to parse url")) return false;
|
|
29
|
+
|
|
30
|
+
const details = args.find((value) => hasInvalidUrlShape(value));
|
|
31
|
+
if (!details) return true;
|
|
32
|
+
|
|
33
|
+
const code = String(details.code ?? "").trim();
|
|
34
|
+
if (code && code !== "ERR_INVALID_URL") return false;
|
|
35
|
+
|
|
36
|
+
const input = String(details.input ?? "").trim();
|
|
37
|
+
if (!input) return true;
|
|
38
|
+
|
|
39
|
+
return input.includes(",");
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function ensureWarnFilterInstalled(): void {
|
|
43
|
+
if (state.installed) return;
|
|
44
|
+
|
|
45
|
+
state.originalWarn = console.warn.bind(console);
|
|
46
|
+
console.warn = (...args: unknown[]) => {
|
|
47
|
+
if (state.activeScopes > 0 && isDefuddleInvalidUrlWarning(args)) {
|
|
48
|
+
state.suppressedCount += 1;
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
state.originalWarn(...args);
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
state.installed = true;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export async function withSuppressedDefuddleWarnings<T>(
|
|
59
|
+
operation: () => Promise<T>,
|
|
60
|
+
): Promise<{ value: T; suppressedCount: number }> {
|
|
61
|
+
ensureWarnFilterInstalled();
|
|
62
|
+
|
|
63
|
+
const startSuppressedCount = state.suppressedCount;
|
|
64
|
+
state.activeScopes += 1;
|
|
65
|
+
|
|
66
|
+
try {
|
|
67
|
+
const value = await operation();
|
|
68
|
+
return {
|
|
69
|
+
value,
|
|
70
|
+
suppressedCount: state.suppressedCount - startSuppressedCount,
|
|
71
|
+
};
|
|
72
|
+
} finally {
|
|
73
|
+
state.activeScopes = Math.max(0, state.activeScopes - 1);
|
|
74
|
+
}
|
|
75
|
+
}
|
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
import { execFile } from "node:child_process";
|
|
2
|
+
import { randomUUID } from "node:crypto";
|
|
3
|
+
import { promisify } from "node:util";
|
|
4
|
+
|
|
5
|
+
import { countWords, truncateText } from "./helpers.js";
|
|
6
|
+
import { normalizeUrl } from "./url-utils.js";
|
|
7
|
+
|
|
8
|
+
const execFileAsync = promisify(execFile);
|
|
9
|
+
let ghCliAvailablePromise: Promise<boolean> | null = null;
|
|
10
|
+
|
|
11
|
+
interface GitHubRepoReference {
|
|
12
|
+
owner: string;
|
|
13
|
+
repo: string;
|
|
14
|
+
repoUrl: string;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
function asObject(value: unknown): Record<string, unknown> | null {
|
|
18
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) return null;
|
|
19
|
+
return value as Record<string, unknown>;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function parseGitHubRepoReference(input: string): GitHubRepoReference | null {
|
|
23
|
+
let normalizedInput: string;
|
|
24
|
+
try {
|
|
25
|
+
normalizedInput = normalizeUrl(input);
|
|
26
|
+
} catch {
|
|
27
|
+
return null;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
try {
|
|
31
|
+
const url = new URL(normalizedInput);
|
|
32
|
+
const host = String(url.hostname || "")
|
|
33
|
+
.trim()
|
|
34
|
+
.toLowerCase();
|
|
35
|
+
|
|
36
|
+
if (host !== "github.com" && host !== "www.github.com") {
|
|
37
|
+
return null;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const parts = url.pathname.split("/").filter(Boolean);
|
|
41
|
+
if (parts.length !== 2) {
|
|
42
|
+
return null;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const owner = String(parts[0] || "").trim();
|
|
46
|
+
const repo = String(parts[1] || "")
|
|
47
|
+
.replace(/\.git$/i, "")
|
|
48
|
+
.trim();
|
|
49
|
+
|
|
50
|
+
if (!owner || !repo) return null;
|
|
51
|
+
|
|
52
|
+
return {
|
|
53
|
+
owner,
|
|
54
|
+
repo,
|
|
55
|
+
repoUrl: `https://github.com/${owner}/${repo}`,
|
|
56
|
+
};
|
|
57
|
+
} catch {
|
|
58
|
+
return null;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
function decodeBase64ToUtf8(rawValue: unknown): string {
|
|
63
|
+
const encoded = String(rawValue ?? "")
|
|
64
|
+
.replace(/\s+/g, "")
|
|
65
|
+
.trim();
|
|
66
|
+
|
|
67
|
+
if (!encoded) return "";
|
|
68
|
+
|
|
69
|
+
try {
|
|
70
|
+
return Buffer.from(encoded, "base64").toString("utf8");
|
|
71
|
+
} catch {
|
|
72
|
+
return "";
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
async function hasGhCli(): Promise<boolean> {
|
|
77
|
+
if (!ghCliAvailablePromise) {
|
|
78
|
+
ghCliAvailablePromise = execFileAsync("gh", ["--version"], {
|
|
79
|
+
timeout: 2000,
|
|
80
|
+
windowsHide: true,
|
|
81
|
+
})
|
|
82
|
+
.then(() => true)
|
|
83
|
+
.catch(() => false);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
return await ghCliAvailablePromise;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
async function runGhApi(endpoint: string, timeoutMs = 12000): Promise<unknown> {
|
|
90
|
+
const { stdout } = await execFileAsync("gh", ["api", endpoint], {
|
|
91
|
+
timeout: timeoutMs,
|
|
92
|
+
maxBuffer: 10 * 1024 * 1024,
|
|
93
|
+
windowsHide: true,
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
const raw = String(stdout || "").trim();
|
|
97
|
+
if (!raw) return null;
|
|
98
|
+
|
|
99
|
+
return JSON.parse(raw) as unknown;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
export async function tryFetchGitHubRepoContent(
|
|
103
|
+
requestedUrl: string,
|
|
104
|
+
maxCharacters: number,
|
|
105
|
+
options: { log?: (message: string) => void } = {},
|
|
106
|
+
): Promise<Record<string, unknown> | null> {
|
|
107
|
+
const repoRef = parseGitHubRepoReference(requestedUrl);
|
|
108
|
+
if (!repoRef) return null;
|
|
109
|
+
|
|
110
|
+
if (!(await hasGhCli())) {
|
|
111
|
+
return null;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const startedAt = Date.now();
|
|
115
|
+
const baseEndpoint = `repos/${encodeURIComponent(repoRef.owner)}/${encodeURIComponent(repoRef.repo)}`;
|
|
116
|
+
|
|
117
|
+
try {
|
|
118
|
+
const repoPayload = asObject(await runGhApi(baseEndpoint));
|
|
119
|
+
if (!repoPayload) {
|
|
120
|
+
return null;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
const repoUrl = String(repoPayload.html_url || repoRef.repoUrl).trim() || repoRef.repoUrl;
|
|
124
|
+
const repoName = String(repoPayload.full_name || `${repoRef.owner}/${repoRef.repo}`).trim();
|
|
125
|
+
const description = String(repoPayload.description || "").trim();
|
|
126
|
+
const defaultBranch = String(repoPayload.default_branch || "").trim();
|
|
127
|
+
const language = String(repoPayload.language || "").trim();
|
|
128
|
+
const homepage = String(repoPayload.homepage || "").trim();
|
|
129
|
+
const pushedAt = String(repoPayload.pushed_at || "").trim();
|
|
130
|
+
const updatedAt = String(repoPayload.updated_at || "").trim();
|
|
131
|
+
const starsRaw = Number(repoPayload.stargazers_count || 0);
|
|
132
|
+
const forksRaw = Number(repoPayload.forks_count || 0);
|
|
133
|
+
const openIssuesRaw = Number(repoPayload.open_issues_count || 0);
|
|
134
|
+
const stars = Number.isFinite(starsRaw) ? Math.max(0, Math.trunc(starsRaw)) : 0;
|
|
135
|
+
const forks = Number.isFinite(forksRaw) ? Math.max(0, Math.trunc(forksRaw)) : 0;
|
|
136
|
+
const openIssues = Number.isFinite(openIssuesRaw) ? Math.max(0, Math.trunc(openIssuesRaw)) : 0;
|
|
137
|
+
|
|
138
|
+
const licensePayload = asObject(repoPayload.license);
|
|
139
|
+
const license = String(licensePayload?.spdx_id || licensePayload?.name || "").trim();
|
|
140
|
+
|
|
141
|
+
const topics = Array.isArray(repoPayload.topics)
|
|
142
|
+
? repoPayload.topics
|
|
143
|
+
.map((value) => String(value || "").trim())
|
|
144
|
+
.filter(Boolean)
|
|
145
|
+
.slice(0, 20)
|
|
146
|
+
: [];
|
|
147
|
+
|
|
148
|
+
let readmeName = "";
|
|
149
|
+
let readmeContent = "";
|
|
150
|
+
try {
|
|
151
|
+
const readmePayload = asObject(await runGhApi(`${baseEndpoint}/readme`));
|
|
152
|
+
if (readmePayload) {
|
|
153
|
+
readmeName = String(readmePayload.name || "README").trim() || "README";
|
|
154
|
+
readmeContent = decodeBase64ToUtf8(readmePayload.content).trim();
|
|
155
|
+
}
|
|
156
|
+
} catch {
|
|
157
|
+
// README can be missing; continue with metadata-only output.
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
let topLevelEntries: string[] = [];
|
|
161
|
+
try {
|
|
162
|
+
const contentsPayload = await runGhApi(`${baseEndpoint}/contents`);
|
|
163
|
+
if (Array.isArray(contentsPayload)) {
|
|
164
|
+
topLevelEntries = contentsPayload
|
|
165
|
+
.map((entry) => {
|
|
166
|
+
const parsed = asObject(entry);
|
|
167
|
+
if (!parsed) return "";
|
|
168
|
+
|
|
169
|
+
const name = String(parsed.name || "").trim();
|
|
170
|
+
if (!name) return "";
|
|
171
|
+
|
|
172
|
+
const type = String(parsed.type || "").trim();
|
|
173
|
+
if (type === "dir") return `${name}/`;
|
|
174
|
+
if (type === "symlink") return `${name}@`;
|
|
175
|
+
if (type === "submodule") return `${name} (submodule)`;
|
|
176
|
+
return name;
|
|
177
|
+
})
|
|
178
|
+
.filter(Boolean)
|
|
179
|
+
.slice(0, 30);
|
|
180
|
+
}
|
|
181
|
+
} catch {
|
|
182
|
+
// Top-level listing is optional.
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
const lines: string[] = [`# ${repoName}`];
|
|
186
|
+
if (description) {
|
|
187
|
+
lines.push("", description);
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
lines.push("", `Repository: ${repoUrl}`);
|
|
191
|
+
if (requestedUrl !== repoUrl) {
|
|
192
|
+
lines.push(`Requested URL: ${requestedUrl}`);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
if (defaultBranch) {
|
|
196
|
+
lines.push(`Default branch: ${defaultBranch}`);
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
lines.push(`Stars: ${stars}`);
|
|
200
|
+
lines.push(`Forks: ${forks}`);
|
|
201
|
+
lines.push(`Open issues: ${openIssues}`);
|
|
202
|
+
|
|
203
|
+
if (language) {
|
|
204
|
+
lines.push(`Primary language: ${language}`);
|
|
205
|
+
}
|
|
206
|
+
if (license) {
|
|
207
|
+
lines.push(`License: ${license}`);
|
|
208
|
+
}
|
|
209
|
+
if (homepage) {
|
|
210
|
+
lines.push(`Homepage: ${homepage}`);
|
|
211
|
+
}
|
|
212
|
+
if (updatedAt) {
|
|
213
|
+
lines.push(`Updated at: ${updatedAt}`);
|
|
214
|
+
}
|
|
215
|
+
if (pushedAt) {
|
|
216
|
+
lines.push(`Pushed at: ${pushedAt}`);
|
|
217
|
+
}
|
|
218
|
+
if (topics.length > 0) {
|
|
219
|
+
lines.push(`Topics: ${topics.join(", ")}`);
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
if (topLevelEntries.length > 0) {
|
|
223
|
+
lines.push("", "Top-level files:");
|
|
224
|
+
lines.push(...topLevelEntries.map((entry) => `- ${entry}`));
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
if (readmeContent) {
|
|
228
|
+
lines.push("", `## ${readmeName || "README"}`, "", readmeContent);
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
const rawContent = lines.join("\n").trim();
|
|
232
|
+
const content = truncateText(rawContent, maxCharacters);
|
|
233
|
+
const truncated = content.length < rawContent.length;
|
|
234
|
+
const durationMs = Date.now() - startedAt;
|
|
235
|
+
|
|
236
|
+
return {
|
|
237
|
+
url: repoUrl,
|
|
238
|
+
requestedUrl,
|
|
239
|
+
title: repoName,
|
|
240
|
+
author: repoRef.owner,
|
|
241
|
+
published: pushedAt || updatedAt || "Unknown",
|
|
242
|
+
wordCount: countWords(content),
|
|
243
|
+
content,
|
|
244
|
+
truncated,
|
|
245
|
+
documentId: `gh-${randomUUID()}`,
|
|
246
|
+
status: 200,
|
|
247
|
+
cache: {
|
|
248
|
+
browse: "gh",
|
|
249
|
+
present: "gh",
|
|
250
|
+
},
|
|
251
|
+
timing: {
|
|
252
|
+
totalMs: durationMs,
|
|
253
|
+
browseMs: null,
|
|
254
|
+
presentMs: null,
|
|
255
|
+
ghMs: durationMs,
|
|
256
|
+
},
|
|
257
|
+
};
|
|
258
|
+
} catch (error) {
|
|
259
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
260
|
+
options.log?.(
|
|
261
|
+
`gh fetch failed for ${repoRef.owner}/${repoRef.repo}; falling back to browser fetch (${message.slice(0, 220)})`,
|
|
262
|
+
);
|
|
263
|
+
return null;
|
|
264
|
+
}
|
|
265
|
+
}
|