@bothat-io/molenkopf 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. package/.env.example +2 -0
  2. package/LICENSE +21 -0
  3. package/README.md +199 -0
  4. package/SECURITY.md +36 -0
  5. package/bin/launcher.js +76 -0
  6. package/bin/molenkopf.js +4 -0
  7. package/docs/DEPLOYMENT.md +104 -0
  8. package/docs/MOLENKOPF_PLUGIN_API.md +113 -0
  9. package/docs/MOLENKOPF_PROVIDER_ENV.md +123 -0
  10. package/docs/MOLENKOPF_USAGE.md +195 -0
  11. package/docs/PRODUCT_INTENT.md +36 -0
  12. package/docs/THREAT_MODEL.md +94 -0
  13. package/molenkopf.config.example.json +68 -0
  14. package/package.json +98 -0
  15. package/packages/core/src/auth/password.ts +47 -0
  16. package/packages/core/src/auth/session.ts +64 -0
  17. package/packages/core/src/ci/ci-mode.ts +71 -0
  18. package/packages/core/src/compression/content-classifier.ts +25 -0
  19. package/packages/core/src/compression/context-compressor.ts +48 -0
  20. package/packages/core/src/compression/json-compressor.ts +54 -0
  21. package/packages/core/src/compression/log-compressor.ts +32 -0
  22. package/packages/core/src/compression/operational-block-compressor.ts +43 -0
  23. package/packages/core/src/compression/stacktrace-compressor.ts +23 -0
  24. package/packages/core/src/config/config-policies.ts +146 -0
  25. package/packages/core/src/config/molenkopf-config.ts +137 -0
  26. package/packages/core/src/config/provider-config.ts +139 -0
  27. package/packages/core/src/events/event-bus.ts +88 -0
  28. package/packages/core/src/identity/api-keys.ts +149 -0
  29. package/packages/core/src/identity/budget.ts +51 -0
  30. package/packages/core/src/identity/db.ts +68 -0
  31. package/packages/core/src/identity/identity-store.ts +175 -0
  32. package/packages/core/src/identity/identity-validation.ts +102 -0
  33. package/packages/core/src/identity/key-permissions.ts +18 -0
  34. package/packages/core/src/identity/pricing.ts +11 -0
  35. package/packages/core/src/identity/types.ts +87 -0
  36. package/packages/core/src/identity/usage-snapshot.ts +116 -0
  37. package/packages/core/src/manifest/audit-activity.ts +74 -0
  38. package/packages/core/src/manifest/audit-metrics.ts +7 -0
  39. package/packages/core/src/manifest/audit-safety.ts +113 -0
  40. package/packages/core/src/manifest/audit-store.ts +189 -0
  41. package/packages/core/src/manifest/audit-summary.ts +184 -0
  42. package/packages/core/src/manifest/usage-meter.ts +105 -0
  43. package/packages/core/src/memory/memory-extractor.ts +57 -0
  44. package/packages/core/src/memory/memory-graph.ts +55 -0
  45. package/packages/core/src/pipeline/json-string-spans.ts +143 -0
  46. package/packages/core/src/pipeline/openai-request-rewriter.ts +66 -0
  47. package/packages/core/src/plugins/builtin-plugin-descriptors.ts +10 -0
  48. package/packages/core/src/plugins/builtin-plugin-modules.ts +9 -0
  49. package/packages/core/src/plugins/plugin-api.ts +96 -0
  50. package/packages/core/src/plugins/plugin-catalog.ts +42 -0
  51. package/packages/core/src/plugins/plugin-descriptor.ts +51 -0
  52. package/packages/core/src/plugins/plugin-sdk.ts +47 -0
  53. package/packages/core/src/plugins/static-pipeline.ts +5 -0
  54. package/packages/core/src/profiles/profile-router.ts +45 -0
  55. package/packages/core/src/providers/provider-catalog.ts +186 -0
  56. package/packages/core/src/routing/distribution.ts +31 -0
  57. package/packages/core/src/security/secret-redactor.ts +139 -0
  58. package/packages/core/src/security/target-policy.ts +61 -0
  59. package/packages/core/src/storage/local-paths.ts +6 -0
  60. package/packages/core/src/storage/private-state.ts +30 -0
  61. package/packages/core/src/storage/purge-dir.ts +10 -0
  62. package/packages/core/src/store/retrieval-store.ts +114 -0
  63. package/packages/core/src/utils/hash.ts +9 -0
  64. package/packages/core/src/utils/text.ts +18 -0
  65. package/packages/core/src/utils/tokens.ts +3 -0
  66. package/packages/dashboard/dist/assets/index-B_aSPgHx.js +11 -0
  67. package/packages/dashboard/dist/assets/index-D6z2TEL2.css +1 -0
  68. package/packages/dashboard/dist/favicon.png +0 -0
  69. package/packages/dashboard/dist/index.html +15 -0
  70. package/packages/dashboard/dist/molenkopf-logo.png +0 -0
  71. package/packages/dashboard/public/favicon.png +0 -0
  72. package/packages/dashboard/public/molenkopf-logo.png +0 -0
  73. package/packages/plugins/context-compressor-plugin/descriptor.ts +19 -0
  74. package/packages/plugins/context-compressor-plugin/page.html +191 -0
  75. package/packages/plugins/context-compressor-plugin/plugin.ts +40 -0
  76. package/packages/plugins/obsidian-graph-plugin/descriptor.ts +19 -0
  77. package/packages/plugins/obsidian-graph-plugin/page.html +68 -0
  78. package/packages/plugins/obsidian-graph-plugin/plugin.ts +27 -0
  79. package/packages/plugins/shared/audit-projects.ts +32 -0
  80. package/packages/proxy/src/cli/args.ts +34 -0
  81. package/packages/proxy/src/cli/config-loader.ts +43 -0
  82. package/packages/proxy/src/cli/env-file.ts +43 -0
  83. package/packages/proxy/src/cli/main.ts +132 -0
  84. package/packages/proxy/src/cli/profile-server.ts +176 -0
  85. package/packages/proxy/src/cli/target.ts +7 -0
  86. package/packages/proxy/src/http/agent-drafts.ts +103 -0
  87. package/packages/proxy/src/http/agent-router.ts +69 -0
  88. package/packages/proxy/src/http/audit-view.ts +15 -0
  89. package/packages/proxy/src/http/auth-state.ts +44 -0
  90. package/packages/proxy/src/http/budget-gate.ts +45 -0
  91. package/packages/proxy/src/http/budget-warnings.ts +7 -0
  92. package/packages/proxy/src/http/cli-stream-response.ts +51 -0
  93. package/packages/proxy/src/http/client-identity.ts +51 -0
  94. package/packages/proxy/src/http/communication-graph.ts +139 -0
  95. package/packages/proxy/src/http/control-plane-guard.ts +56 -0
  96. package/packages/proxy/src/http/dashboard-assets.ts +115 -0
  97. package/packages/proxy/src/http/encoded-usage-meter.ts +32 -0
  98. package/packages/proxy/src/http/header-utils.ts +65 -0
  99. package/packages/proxy/src/http/identity-id.ts +11 -0
  100. package/packages/proxy/src/http/local-api-agent-actions.ts +17 -0
  101. package/packages/proxy/src/http/local-api-auth.ts +120 -0
  102. package/packages/proxy/src/http/local-api-consumer-actions.ts +20 -0
  103. package/packages/proxy/src/http/local-api-identity.ts +194 -0
  104. package/packages/proxy/src/http/local-api-io.ts +82 -0
  105. package/packages/proxy/src/http/local-api-keys.ts +126 -0
  106. package/packages/proxy/src/http/local-api-pipeline.ts +41 -0
  107. package/packages/proxy/src/http/local-api-plugin-actions.ts +31 -0
  108. package/packages/proxy/src/http/local-api-provider-actions.ts +181 -0
  109. package/packages/proxy/src/http/local-api-retention.ts +28 -0
  110. package/packages/proxy/src/http/local-api-runtime-auth.ts +119 -0
  111. package/packages/proxy/src/http/local-api-scope.ts +47 -0
  112. package/packages/proxy/src/http/local-api-state.ts +180 -0
  113. package/packages/proxy/src/http/local-api.ts +166 -0
  114. package/packages/proxy/src/http/password-policy.ts +5 -0
  115. package/packages/proxy/src/http/plugin-data.ts +38 -0
  116. package/packages/proxy/src/http/plugin-host.ts +87 -0
  117. package/packages/proxy/src/http/plugin-modules.ts +1 -0
  118. package/packages/proxy/src/http/plugin-page-loader.ts +24 -0
  119. package/packages/proxy/src/http/plugin-pipeline.ts +125 -0
  120. package/packages/proxy/src/http/provider-access.ts +33 -0
  121. package/packages/proxy/src/http/provider-http-test.ts +133 -0
  122. package/packages/proxy/src/http/provider-input.ts +39 -0
  123. package/packages/proxy/src/http/provider-routing-snapshot.ts +28 -0
  124. package/packages/proxy/src/http/provider-test.ts +149 -0
  125. package/packages/proxy/src/http/proxy-identity.ts +78 -0
  126. package/packages/proxy/src/http/public-bind.ts +8 -0
  127. package/packages/proxy/src/http/request-finish.ts +62 -0
  128. package/packages/proxy/src/http/request-path.ts +8 -0
  129. package/packages/proxy/src/http/request-policy.ts +46 -0
  130. package/packages/proxy/src/http/runtime-auth-proof.ts +55 -0
  131. package/packages/proxy/src/http/runtime-auth-registry.ts +105 -0
  132. package/packages/proxy/src/http/runtime-settings.ts +199 -0
  133. package/packages/proxy/src/http/runtime-state.ts +198 -0
  134. package/packages/proxy/src/http/server-io.ts +80 -0
  135. package/packages/proxy/src/http/server-types.ts +17 -0
  136. package/packages/proxy/src/http/server.ts +190 -0
  137. package/packages/proxy/src/http/session-secret.ts +19 -0
  138. package/packages/proxy/src/http/streaming-proxy.ts +88 -0
  139. package/packages/proxy/src/http/usage-accounting.ts +100 -0
  140. package/packages/proxy/src/http/usage-restore.ts +15 -0
  141. package/packages/proxy/src/runtime/cli-diagnostics.ts +64 -0
  142. package/packages/proxy/src/runtime/cli-env.ts +22 -0
  143. package/packages/proxy/src/runtime/cli-executor.ts +134 -0
  144. package/packages/proxy/src/runtime/cli-provider.ts +162 -0
  145. package/packages/proxy/src/runtime/cli-request.ts +79 -0
  146. package/packages/proxy/src/runtime/codex-runtime-config.ts +37 -0
  147. package/packages/proxy/src/runtime/runtime-profile.ts +170 -0
@@ -0,0 +1,186 @@
1
+ import { validateProviderTarget } from "../security/target-policy.ts";
2
+
3
+ export type ProviderKind = "api" | "local" | "cli";
4
+
5
+ export type RuntimeProfileConfig = {
6
+ settingsRef?: string;
7
+ configRef?: string;
8
+ permissionMode?: string;
9
+ allowedTools?: string[];
10
+ disallowedTools?: string[];
11
+ addDirs?: string[];
12
+ sandbox?: string;
13
+ approval?: string;
14
+ summary?: string[];
15
+ };
16
+
17
+ export type RuntimeProfileDiagnostics = {
18
+ settingsSource?: string;
19
+ configSource?: string;
20
+ permissionMode?: string;
21
+ sandbox?: string;
22
+ approval?: string;
23
+ allowedToolCount: number;
24
+ deniedToolCount: number;
25
+ addDirCount: number;
26
+ outerHarness: "unknown";
27
+ remediation: string;
28
+ };
29
+
30
+ export type RuntimeProfileView = { summary: string[]; diagnostics?: RuntimeProfileDiagnostics };
31
+
32
+ export type ProviderConfig = {
33
+ id: string;
34
+ name: string;
35
+ kind: ProviderKind;
36
+ target: string;
37
+ credentialEnv?: string;
38
+ credentialRef?: string;
39
+ credentialValue?: string;
40
+ authScheme?: "bearer" | "x-api-key" | "none";
41
+ runtime?: "claude" | "codex";
42
+ cliCommand?: string;
43
+ cliArgs?: string[];
44
+ cliInputMode?: "stdin" | "argument";
45
+ cliTimeoutMs?: number;
46
+ runtimeAuthDir?: string;
47
+ authRef?: string;
48
+ runtimeProfile?: RuntimeProfileConfig;
49
+ protocol?: "openai-responses" | "anthropic-messages" | "openai-chat" | "ollama-tags";
50
+ allowDistribution?: boolean;
51
+ allowClientCredentialForwarding?: boolean;
52
+ enabled?: boolean;
53
+ };
54
+
55
+ export type ProviderView = Omit<ProviderConfig, "credentialValue" | "runtimeAuthDir" | "authRef" | "runtimeProfile" | "cliArgs"> & {
56
+ active: boolean;
57
+ credentialConfigured: boolean;
58
+ runtimeAuthConfigured: boolean;
59
+ runtimeProfile?: RuntimeProfileView;
60
+ selectable: boolean;
61
+ };
62
+
63
+ export type ProviderCatalogOptions = { includeBuiltIns?: boolean; includeEnvProviders?: boolean };
64
+
65
+ export function buildProviderCatalog(target: string, extra: ProviderConfig[] = [], env: Record<string, string | undefined> = process.env, options: ProviderCatalogOptions = {}): ProviderConfig[] {
66
+ const includeBuiltIns = options.includeBuiltIns !== false;
67
+ const includeEnvProviders = options.includeEnvProviders !== false;
68
+ const providers: ProviderConfig[] = [
69
+ ...(includeBuiltIns ? builtInProviders(target, env) : []),
70
+ ...(includeEnvProviders ? configuredEnvProviders(env) : []),
71
+ ...extra.map((item) => ({ ...item, enabled: item.enabled !== false }))
72
+ ];
73
+ return uniqueById(providers);
74
+ }
75
+
76
+ export function viewProviders(providers: ProviderConfig[], activeProviderId: string, env: Record<string, string | undefined> = process.env): ProviderView[] {
77
+ return providers.map((provider) => {
78
+ const { credentialValue, runtimeAuthDir, authRef, runtimeProfile, cliArgs, ...safeProvider } = provider;
79
+ return {
80
+ ...safeProvider,
81
+ active: provider.id === activeProviderId,
82
+ credentialConfigured: Boolean(credentialValue || (provider.credentialEnv && env[provider.credentialEnv])),
83
+ runtimeAuthConfigured: Boolean(runtimeAuthDir),
84
+ runtimeProfile: viewRuntimeProfile(runtimeProfile),
85
+ selectable: provider.enabled !== false
86
+ };
87
+ });
88
+ }
89
+
90
+ export function viewRuntimeProfile(profile: RuntimeProfileConfig | undefined): RuntimeProfileView | undefined {
91
+ const summary = profile?.summary?.map((item) => item.slice(0, 80)).filter(Boolean) ?? [];
92
+ if (!summary.length) return undefined;
93
+ return { summary, diagnostics: runtimeDiagnostics(profile) };
94
+ }
95
+
96
+ function runtimeDiagnostics(profile: RuntimeProfileConfig | undefined): RuntimeProfileDiagnostics | undefined {
97
+ if (!profile) return undefined;
98
+ return {
99
+ settingsSource: profile.settingsRef,
100
+ configSource: profile.configRef,
101
+ permissionMode: profile.permissionMode,
102
+ sandbox: profile.sandbox,
103
+ approval: profile.approval,
104
+ allowedToolCount: profile.allowedTools?.length ?? 0,
105
+ deniedToolCount: profile.disallowedTools?.length ?? 0,
106
+ addDirCount: profile.addDirs?.length ?? 0,
107
+ outerHarness: "unknown",
108
+ remediation: "If the host client still asks, approve that prompt or configure this project in .claude/settings.json; Molenkopf cannot bypass a separate Claude/Codex harness."
109
+ };
110
+ }
111
+
112
+ function builtInProviders(target: string, env: Record<string, string | undefined>): ProviderConfig[] {
113
+ const openaiTarget = safeTarget(env.OPENAI_BASE_URL, "https://api.openai.com/v1");
114
+ const anthropicTarget = safeTarget(env.ANTHROPIC_BASE_URL, "https://api.anthropic.com/v1");
115
+ const ollamaTarget = safeTarget(env.OLLAMA_BASE_URL, "http://127.0.0.1:11434/v1", true);
116
+ const lmstudioTarget = safeTarget(env.LMSTUDIO_BASE_URL, "http://127.0.0.1:1234/v1", true);
117
+ return [
118
+ { id: "default", name: "Default upstream", kind: "api", target, authScheme: "none", enabled: true },
119
+ { id: "openai-env", name: "OpenAI env profile", kind: "api", target: openaiTarget.value, credentialEnv: "OPENAI_API_KEY", credentialRef: "env:OPENAI_API_KEY", authScheme: "bearer", protocol: "openai-responses", enabled: openaiTarget.safe && Boolean(env.OPENAI_BASE_URL || env.OPENAI_API_KEY) },
120
+ { id: "anthropic-env", name: "Anthropic env profile", kind: "api", target: anthropicTarget.value, credentialEnv: "ANTHROPIC_API_KEY", credentialRef: "env:ANTHROPIC_API_KEY", authScheme: "x-api-key", protocol: "anthropic-messages", enabled: anthropicTarget.safe && Boolean(env.ANTHROPIC_BASE_URL || env.ANTHROPIC_API_KEY) },
121
+ { id: "ollama-local", name: "Local Ollama compatible", kind: "local", target: ollamaTarget.value, authScheme: "none", protocol: "ollama-tags", enabled: ollamaTarget.safe && Boolean(env.OLLAMA_BASE_URL) },
122
+ { id: "lmstudio-local", name: "Local LM Studio compatible", kind: "local", target: lmstudioTarget.value, authScheme: "none", protocol: "openai-chat", enabled: lmstudioTarget.safe && Boolean(env.LMSTUDIO_BASE_URL) }
123
+ ];
124
+ }
125
+
126
+ function configuredEnvProviders(env: Record<string, string | undefined>): ProviderConfig[] {
127
+ return splitCsv(env.MOLENKOPF_PROVIDER_IDS).map((id) => providerFromEnv(id, env)).filter((item): item is ProviderConfig => Boolean(item));
128
+ }
129
+
130
+ function providerFromEnv(id: string, env: Record<string, string | undefined>): ProviderConfig | undefined {
131
+ if (!/^[a-z0-9][a-z0-9._:-]{0,63}$/i.test(id)) return undefined;
132
+ const prefix = `MOLENKOPF_PROVIDER_${id.toUpperCase().replace(/[^A-Z0-9]/g, "_")}_`;
133
+ const kind = env[`${prefix}KIND`] === "local" ? "local" : "api";
134
+ const checkedTarget = safeTarget(env[`${prefix}TARGET`], "", kind === "local");
135
+ const target = checkedTarget.value;
136
+ if (!target || !checkedTarget.safe) return undefined;
137
+ const credentialEnv = env[`${prefix}CREDENTIAL_ENV`]?.trim();
138
+ return {
139
+ id,
140
+ name: env[`${prefix}NAME`]?.trim() || id,
141
+ kind,
142
+ target,
143
+ credentialEnv: credentialEnv || undefined,
144
+ credentialRef: credentialEnv ? `env:${credentialEnv}` : "none",
145
+ authScheme: authScheme(env[`${prefix}AUTH`], target, credentialEnv),
146
+ protocol: protocol(env[`${prefix}PROTOCOL`], kind, target),
147
+ enabled: env[`${prefix}ENABLED`]?.toLowerCase() !== "false"
148
+ };
149
+ }
150
+
151
+ function safeTarget(value: string | undefined, fallback = "", allowPrivate = false): { value: string; safe: boolean } {
152
+ const candidate = value?.trim() || fallback;
153
+ if (!candidate) return { value: "", safe: false };
154
+ try {
155
+ validateProviderTarget(candidate, { allowPrivate });
156
+ return { value: candidate, safe: true };
157
+ } catch {
158
+ return { value: fallback, safe: false };
159
+ }
160
+ }
161
+
162
+ function authScheme(value: string | undefined, target: string, credentialEnv?: string): ProviderConfig["authScheme"] {
163
+ if (value === "bearer" || value === "x-api-key" || value === "none") return value;
164
+ if (!credentialEnv) return "none";
165
+ return target.includes("anthropic") ? "x-api-key" : "bearer";
166
+ }
167
+
168
+ function protocol(value: string | undefined, kind: ProviderKind, target: string): ProviderConfig["protocol"] {
169
+ if (value === "openai-responses" || value === "anthropic-messages" || value === "openai-chat" || value === "ollama-tags") return value;
170
+ if (kind === "local" && target.includes("11434")) return "ollama-tags";
171
+ if (kind === "local") return "openai-chat";
172
+ return target.includes("anthropic") ? "anthropic-messages" : "openai-responses";
173
+ }
174
+
175
+ function splitCsv(value: string | undefined): string[] {
176
+ return (value ?? "").split(",").map((item) => item.trim()).filter(Boolean);
177
+ }
178
+
179
+ function uniqueById(providers: ProviderConfig[]): ProviderConfig[] {
180
+ const seen = new Set<string>();
181
+ return providers.filter((provider) => {
182
+ if (seen.has(provider.id)) return false;
183
+ seen.add(provider.id);
184
+ return true;
185
+ });
186
+ }
@@ -0,0 +1,31 @@
1
+ // Weighted token distribution across providers. The provider currently furthest
2
+ // below its target share of total tokens is chosen next, so equal weights give a
3
+ // fair split and unequal weights (e.g. 80/20) hold the configured ratio over time.
4
+
5
+ export type ProviderShare = { id: string; weight: number; usedTokens: number };
6
+
7
+ export function chooseByDistribution(shares: ProviderShare[]): string | undefined {
8
+ const enabled = shares.filter((share) => share.weight > 0);
9
+ if (!enabled.length) return undefined;
10
+ const totalWeight = enabled.reduce((sum, share) => sum + share.weight, 0);
11
+ const totalUsed = enabled.reduce((sum, share) => sum + Math.max(0, share.usedTokens), 0);
12
+ let bestId: string | undefined;
13
+ let bestDeficit = -Infinity;
14
+ for (const share of enabled) {
15
+ const target = (share.weight / totalWeight) * (totalUsed + 1);
16
+ const deficit = target - Math.max(0, share.usedTokens);
17
+ if (deficit > bestDeficit) {
18
+ bestDeficit = deficit;
19
+ bestId = share.id;
20
+ }
21
+ }
22
+ return bestId;
23
+ }
24
+
25
+ // Normalizes provider weights into display shares (percent of total weight).
26
+ export function weightShares(shares: { id: string; weight: number }[]): Record<string, number> {
27
+ const total = shares.reduce((sum, share) => sum + Math.max(0, share.weight), 0);
28
+ const out: Record<string, number> = {};
29
+ for (const share of shares) out[share.id] = total > 0 ? Math.round((Math.max(0, share.weight) / total) * 1000) / 10 : 0;
30
+ return out;
31
+ }
@@ -0,0 +1,139 @@
1
+ import { shortHash } from "../utils/hash.ts";
2
+ import { replaceJsonStrings, scanJsonStringValues, type JsonStringReplacement } from "../pipeline/json-string-spans.ts";
3
+
4
+ export type Redaction = { kind: string; hash: string };
5
+ export type RedactionResult = { text: string; redactions: Redaction[] };
6
+
7
+ type Rule = { kind: string; pattern: RegExp; value?: (match: RegExpExecArray) => string };
8
+
9
+ const rules: Rule[] = [
10
+ { kind: "private_key", pattern: /-----BEGIN [A-Z ]*PRIVATE KEY-----[\s\S]*?-----END [A-Z ]*PRIVATE KEY-----/g },
11
+ { kind: "authorization_bearer", pattern: /(Authorization:\s*Bearer\s+)([^\s\r\n]+)/gi, value: (m) => m[2] },
12
+ { kind: "authorization_basic", pattern: /(Authorization:\s*Basic\s+)([A-Za-z0-9+/=]+)/gi, value: (m) => m[2] },
13
+ { kind: "cookie", pattern: /(Cookie:\s*)([^\r\n]+)/gi, value: (m) => m[2] },
14
+ { kind: "anthropic_api_key", pattern: /\bsk-ant-[A-Za-z0-9_-]{32,}\b/g },
15
+ { kind: "openai_api_key", pattern: /\bsk-(?:proj-|)[A-Za-z0-9_-]{32,}\b/g },
16
+ { kind: "github_token", pattern: /\b(?:ghp|gho|ghu|ghs|ghr)_[A-Za-z0-9_]{30,}\b/g },
17
+ { kind: "molenkopf_api_key", pattern: /(?<![A-Za-z0-9_-])mk_[A-Za-z0-9_-]{24,}(?![A-Za-z0-9_-])/g },
18
+ { kind: "gitlab_token", pattern: /\bglpat-[A-Za-z0-9_-]{20,}\b/g },
19
+ { kind: "npm_token", pattern: /\bnpm_[A-Za-z0-9]{32,}\b/g },
20
+ { kind: "slack_token", pattern: /\bxox[baprs]-[A-Za-z0-9-]{20,}\b/g },
21
+ { kind: "stripe_secret", pattern: /\b(?:sk|rk)_(?:live|test)_[A-Za-z0-9]{16,}\b/g },
22
+ { kind: "google_api_key", pattern: /\bAIza[0-9A-Za-z_-]{35}\b/g },
23
+ { kind: "aws_access_key_id", pattern: /\bAKIA[0-9A-Z]{16}\b/g },
24
+ { kind: "jwt", pattern: /\beyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\b/g },
25
+ { kind: "db_url", pattern: /\b(?:postgres(?:ql)?|mysql|mariadb|mongodb(?:\+srv)?|redis):\/\/[^:\s/@]+:[^@\s]+@[^\s"'<>]+/gi },
26
+ { kind: "basic_auth_url", pattern: /\b[a-z][a-z0-9+.-]*:\/\/[^:\s/@]+:[^@\s]+@[^\s"'<>]+/gi },
27
+ { kind: "sentry_dsn", pattern: /\bhttps:\/\/[A-Za-z0-9]+@[^/\s"'<>]*sentry\.io\/[0-9A-Za-z_-]+/gi },
28
+ { kind: "account_key", pattern: /\b(AccountKey=)(?!\[REDACTED_SECRET:)([^;\s]+)/gi, value: (m) => m[2] },
29
+ { kind: "sensitive_assignment", pattern: /(?<!REDACTED_SECRET:)\b((?:access[_-]?token|refresh[_-]?token|client[_-]?secret|session[_-]?token|auth[_-]?token|db[_-]?url)\s*[:=]\s*)(?!\[REDACTED_SECRET:)([^\s&"'`,;}\\\]]+)/gi, value: (m) => m[2] },
30
+ { kind: "password", pattern: /\b(password=)(?!\[REDACTED_SECRET:)([^\s&"'`,;}\\\]]+)/gi, value: (m) => m[2] },
31
+ { kind: "token", pattern: /\b(token=)(?!\[REDACTED_SECRET:)([^\s&"'`,;}\\\]]+)/gi, value: (m) => m[2] },
32
+ { kind: "secret", pattern: /\b(secret=)(?!\[REDACTED_SECRET:)([^\s&"'`,;}\\\]]+)/gi, value: (m) => m[2] },
33
+ { kind: "api_key", pattern: /\b(api_key=)(?!\[REDACTED_SECRET:)([^\s&"'`,;}\\\]]+)/gi, value: (m) => m[2] },
34
+ { kind: "env_secret", pattern: /(^|[\s{[,;])((?!REDACTED_SECRET\b)[A-Z][A-Z0-9_]*(?:PASSWORD|PASSWD|PWD|TOKEN|SECRET|API_KEY|PRIVATE_KEY|CREDENTIAL)[A-Z0-9_]*\s*[:=]\s*)(?!\[REDACTED_SECRET:)([^\s&"'`,;}\\\]]+)/g, value: (m) => m[3] }
35
+ ];
36
+ const sensitiveJsonKeys = /(?:^|[_-])(?:password|passwd|pwd|token|authorization|auth|cookie|secret|api[_-]?key|credential|private[_-]?key)(?:$|[_-])/i;
37
+
38
+ export function redactSecrets(input: string): RedactionResult {
39
+ const redactions: Redaction[] = [];
40
+ let text = redactJsonKeys(input, redactions) ?? input;
41
+ for (const rule of rules) {
42
+ text = text.replace(rule.pattern, (...args) => {
43
+ const match = args[args.length - 3] as string;
44
+ const exec = args.slice(0, -2) as unknown as RegExpExecArray;
45
+ const secret = rule.value ? rule.value(exec) : match;
46
+ const marker = redactionMarker(rule.kind, secret, redactions);
47
+ if (rule.kind === "env_secret") return `${exec[1]}${exec[2]}${marker}`;
48
+ if (rule.value && exec[1]) return `${exec[1]}${marker}`;
49
+ return marker;
50
+ });
51
+ }
52
+ return { text, redactions };
53
+ }
54
+
55
+ function redactJsonKeys(input: string, redactions: Redaction[]): string | undefined {
56
+ if (!/^\s*[\[{]/.test(input)) return undefined;
57
+ if (exceedsSafeJsonDepth(input)) return containsSensitiveJsonKey(input) ? redactionMarker("json_too_deep", input, redactions) : undefined;
58
+ const structural = redactSensitiveJsonValues(input, redactions);
59
+ const base = structural ?? input;
60
+ const spans = scanJsonStringValues(base);
61
+ if (!spans) return structural;
62
+ const replacements: JsonStringReplacement[] = [];
63
+ for (const span of spans) {
64
+ if (span.key && isSensitiveJsonKey(span.key)) {
65
+ if (isRedactionMarker(span.value)) continue;
66
+ replacements.push({ start: span.start, end: span.end, value: redactionMarker(`json_${safeKind(span.key)}`, span.value, redactions) });
67
+ continue;
68
+ }
69
+ const nested = redactJsonKeys(span.value, redactions);
70
+ if (nested && nested !== span.value) replacements.push({ start: span.start, end: span.end, value: nested });
71
+ }
72
+ return replacements.length ? replaceJsonStrings(base, replacements) : structural;
73
+ }
74
+
75
+ function redactSensitiveJsonValues(input: string, redactions: Redaction[]): string | undefined {
76
+ let root: unknown;
77
+ try { root = JSON.parse(input) as unknown; } catch { return undefined; }
78
+ const stack: Array<{ value: unknown; key?: string; parent?: Record<string, unknown> | unknown[]; index?: string | number }> = [{ value: root }];
79
+ let changed = false;
80
+ while (stack.length) {
81
+ const item = stack.pop()!;
82
+ if (item.key && isSensitiveJsonKey(item.key) && typeof item.value !== "string") {
83
+ const marker = redactionMarker(`json_${safeKind(item.key)}`, JSON.stringify(item.value), redactions);
84
+ if (Array.isArray(item.parent) && typeof item.index === "number") item.parent[item.index] = marker;
85
+ else if (item.parent && typeof item.index === "string") item.parent[item.index] = marker;
86
+ changed = true;
87
+ continue;
88
+ }
89
+ if (!item.value || typeof item.value !== "object") continue;
90
+ if (Array.isArray(item.value)) {
91
+ for (let i = 0; i < item.value.length; i++) stack.push({ value: item.value[i], parent: item.value, index: i });
92
+ } else {
93
+ const object = item.value as Record<string, unknown>;
94
+ for (const [key, value] of Object.entries(object)) stack.push({ value, key, parent: object, index: key });
95
+ }
96
+ }
97
+ return changed ? JSON.stringify(root) : undefined;
98
+ }
99
+
100
+ function redactionMarker(kind: string, secret: string, redactions: Redaction[]): string {
101
+ const hash = shortHash(secret);
102
+ redactions.push({ kind, hash });
103
+ return `[REDACTED_SECRET:${kind}:sha256:${hash}]`;
104
+ }
105
+
106
+ function safeKind(key: string): string {
107
+ return key.toLowerCase().replace(/[^a-z0-9]+/g, "_");
108
+ }
109
+
110
+ function isSensitiveJsonKey(key: string): boolean {
111
+ const normalized = key.replace(/([a-z0-9])([A-Z])/g, "$1_$2");
112
+ return sensitiveJsonKeys.test(normalized);
113
+ }
114
+
115
+ function containsSensitiveJsonKey(input: string): boolean {
116
+ return /"(?:[^"\\]|\\.)*(?:password|passwd|pwd|token|authorization|auth|cookie|secret|api[_-]?key|credential|private[_-]?key)(?:[^"\\]|\\.)*"\s*:/i.test(input);
117
+ }
118
+
119
+ function isRedactionMarker(value: string): boolean {
120
+ return /^\[REDACTED_SECRET:[a-z0-9_-]+:sha256:[a-f0-9]{12}\]$/.test(value);
121
+ }
122
+
123
+ function exceedsSafeJsonDepth(text: string): boolean {
124
+ let depth = 0, inString = false;
125
+ for (let i = 0; i < text.length; i++) {
126
+ if (text[i] === '"' && !escaped(text, i)) inString = !inString;
127
+ if (inString) continue;
128
+ if (text[i] === "{" || text[i] === "[") depth++;
129
+ else if (text[i] === "}" || text[i] === "]") depth--;
130
+ if (depth > 1000) return true;
131
+ }
132
+ return false;
133
+ }
134
+
135
+ function escaped(text: string, quoteIndex: number): boolean {
136
+ let slashes = 0;
137
+ for (let i = quoteIndex - 1; i >= 0 && text[i] === "\\"; i--) slashes++;
138
+ return slashes % 2 === 1;
139
+ }
@@ -0,0 +1,61 @@
1
+ import { lookup } from "node:dns/promises";
2
+ import { isIP } from "node:net";
3
+
4
+ export type TargetPolicy = { allowPrivate?: boolean; allowSearch?: boolean; path?: string };
5
+ export type ConnectTarget = { url: URL; address: string; family: 4 | 6 };
6
+
7
+ export function validateProviderTarget(value: string, policy: TargetPolicy = {}): string {
8
+ const url = parseHttpTarget(value, policy.path ?? "target");
9
+ if (url.username || url.password || (!policy.allowSearch && url.search)) throw new Error(`unsafe URL: ${policy.path ?? "target"}`);
10
+ if (!policy.allowPrivate && isPrivateHost(url.hostname)) throw new Error(`unsafe private URL: ${policy.path ?? "target"}`);
11
+ return value;
12
+ }
13
+
14
+ export async function resolveConnectTarget(value: string, policy: TargetPolicy = {}): Promise<ConnectTarget> {
15
+ const url = parseHttpTarget(value, policy.path ?? "target");
16
+ if (url.username || url.password || (!policy.allowSearch && url.search)) throw new Error(`unsafe URL: ${policy.path ?? "target"}`);
17
+ const resolved = await lookup(url.hostname, { all: true, verbatim: true });
18
+ if (!policy.allowPrivate && resolved.some((item) => isPrivateHost(item.address))) throw new Error(`unsafe private URL: ${policy.path ?? "target"}`);
19
+ const first = resolved[0];
20
+ if (!first) throw new Error(`invalid URL: ${policy.path ?? "target"}`);
21
+ return { url, address: first.address, family: first.family === 6 ? 6 : 4 };
22
+ }
23
+
24
+ export function isPrivateHost(hostname: string): boolean {
25
+ const host = hostname.toLowerCase().replace(/^\[|\]$/g, "");
26
+ if (host === "localhost" || host === "metadata.google.internal") return true;
27
+ const ip = isIP(host);
28
+ if (ip === 6) return isPrivateIpv6(host);
29
+ if (ip === 4) return isPrivateIpv4(host);
30
+ return false;
31
+ }
32
+
33
+ function parseHttpTarget(value: string, path: string): URL {
34
+ let url: URL;
35
+ try { url = new URL(value); } catch { throw new Error(`invalid URL: ${path}`); }
36
+ if (url.protocol !== "http:" && url.protocol !== "https:") throw new Error(`invalid URL protocol: ${path}`);
37
+ return url;
38
+ }
39
+
40
+ function isPrivateIpv4(host: string): boolean {
41
+ const parts = host.split(".").map(Number);
42
+ if (parts.length !== 4 || parts.some((part) => !Number.isInteger(part) || part < 0 || part > 255)) return false;
43
+ const [a, b] = parts;
44
+ return a === 10 || a === 127 || a === 0 || a >= 224 || (a === 100 && b >= 64 && b <= 127)
45
+ || (a === 169 && b === 254) || (a === 172 && b >= 16 && b <= 31) || (a === 192 && b === 168);
46
+ }
47
+
48
+ function isPrivateIpv6(host: string): boolean {
49
+ if (host === "::" || host === "::1" || host === "0:0:0:0:0:0:0:1") return true;
50
+ if (host.startsWith("fe80:") || host.startsWith("fc") || host.startsWith("fd")) return true;
51
+ if (host.startsWith("::ffff:")) return isPrivateIpv4(ipv4Mapped(host));
52
+ return false;
53
+ }
54
+
55
+ function ipv4Mapped(host: string): string {
56
+ const tail = host.slice("::ffff:".length);
57
+ if (tail.includes(".")) return tail;
58
+ const groups = tail.split(":").map((part) => Number.parseInt(part || "0", 16));
59
+ const number = ((groups[0] ?? 0) << 16) + (groups[1] ?? 0);
60
+ return [(number >>> 24) & 255, (number >>> 16) & 255, (number >>> 8) & 255, number & 255].join(".");
61
+ }
@@ -0,0 +1,6 @@
1
+ export const DEFAULT_DATA_DIR = ".molenkopf";
2
+
3
+ export function defaultDataDir(): string {
4
+ if (process.env.MOLENKOPF_DATA_DIR) return process.env.MOLENKOPF_DATA_DIR;
5
+ return DEFAULT_DATA_DIR;
6
+ }
@@ -0,0 +1,30 @@
1
+ import { chmod, mkdir, writeFile } from "node:fs/promises";
2
+ import { chmodSync, mkdirSync } from "node:fs";
3
+
4
+ export const PRIVATE_DIR_MODE = 0o700;
5
+ export const PRIVATE_FILE_MODE = 0o600;
6
+
7
+ export async function ensurePrivateDir(path: string): Promise<void> {
8
+ await mkdir(path, process.platform === "win32" ? { recursive: true } : { recursive: true, mode: PRIVATE_DIR_MODE });
9
+ await chmodPrivate(path, PRIVATE_DIR_MODE);
10
+ }
11
+
12
+ export function ensurePrivateDirSync(path: string): void {
13
+ mkdirSync(path, process.platform === "win32" ? { recursive: true } : { recursive: true, mode: PRIVATE_DIR_MODE });
14
+ chmodPrivateSync(path, PRIVATE_DIR_MODE);
15
+ }
16
+
17
+ export async function writePrivateFile(path: string, data: string | Buffer): Promise<void> {
18
+ await writeFile(path, data, process.platform === "win32" ? undefined : { mode: PRIVATE_FILE_MODE });
19
+ await chmodPrivate(path, PRIVATE_FILE_MODE);
20
+ }
21
+
22
+ export async function chmodPrivate(path: string, mode: number): Promise<void> {
23
+ if (process.platform === "win32") return;
24
+ try { await chmod(path, mode); } catch { /* best effort for non-POSIX filesystems */ }
25
+ }
26
+
27
+ export function chmodPrivateSync(path: string, mode: number): void {
28
+ if (process.platform === "win32") return;
29
+ try { chmodSync(path, mode); } catch { /* best effort for non-POSIX filesystems */ }
30
+ }
@@ -0,0 +1,10 @@
1
+ import { rm } from "node:fs/promises";
2
+ import { isAbsolute, relative, resolve } from "node:path";
3
+
4
+ export async function purgeChildDir(root: string, child: string): Promise<void> {
5
+ const base = resolve(root);
6
+ const target = resolve(base, child);
7
+ const rel = relative(base, target);
8
+ if (!rel || rel.startsWith("..") || isAbsolute(rel)) throw new Error("unsafe_purge_path");
9
+ await rm(target, { recursive: true, force: true });
10
+ }
@@ -0,0 +1,114 @@
1
+ import { readFile, rename, rm } from "node:fs/promises";
2
+ import { join } from "node:path";
3
+ import { defaultDataDir } from "../storage/local-paths.ts";
4
+ import { chmodPrivate, ensurePrivateDir, PRIVATE_FILE_MODE, writePrivateFile } from "../storage/private-state.ts";
5
+ import { purgeChildDir } from "../storage/purge-dir.ts";
6
+ import { sha256 } from "../utils/hash.ts";
7
+ import { byteLength } from "../utils/text.ts";
8
+
9
+ export type RetrievalMeta = {
10
+ hash: string;
11
+ createdAt: string;
12
+ contentKind: string;
13
+ originalBytes: number;
14
+ compressedBytes: number;
15
+ compressorName: string;
16
+ redacted: boolean;
17
+ requestId?: string;
18
+ };
19
+
20
+ const EXCERPT_CHARS = 320;
21
+ const RETRIEVAL_PREFIX = "molenkopf://sha256/";
22
+
23
+ export class RetrievalStore {
24
+ private root: string;
25
+
26
+ constructor(root = defaultDataDir()) {
27
+ this.root = root;
28
+ }
29
+
30
+ async save(text: string, meta: Omit<RetrievalMeta, "hash" | "createdAt" | "originalBytes">): Promise<{ id: string; meta: RetrievalMeta }> {
31
+ const hash = sha256(text);
32
+ const full: RetrievalMeta = { hash, createdAt: new Date().toISOString(), originalBytes: byteLength(text), ...meta };
33
+ const dir = this.dirFor(hash);
34
+ await ensurePrivateDir(dir);
35
+ await atomicPairWrite(dir, hash, boundedExcerpt(text), JSON.stringify(full, null, 2));
36
+ return { id: `${RETRIEVAL_PREFIX}${hash}`, meta: full };
37
+ }
38
+
39
+ idFor(text: string): string {
40
+ return `${RETRIEVAL_PREFIX}${sha256(text)}`;
41
+ }
42
+
43
+ async retrieve(id: string): Promise<string> {
44
+ const hash = this.hashFromId(id);
45
+ await this.checkedMetadata(hash);
46
+ return readFile(join(this.dirFor(hash), `${hash}.txt`), "utf8");
47
+ }
48
+
49
+ async metadata(id: string): Promise<RetrievalMeta> {
50
+ const hash = this.hashFromId(id);
51
+ return this.checkedMetadata(hash);
52
+ }
53
+
54
+ async purgeAll(): Promise<void> {
55
+ await purgeChildDir(this.root, "store");
56
+ }
57
+
58
+ private hashFromId(id: string): string {
59
+ if (!id.startsWith(RETRIEVAL_PREFIX)) throw new Error("invalid retrieval id");
60
+ const hash = id.slice(RETRIEVAL_PREFIX.length).toLowerCase();
61
+ if (!/^[a-f0-9]{64}$/.test(hash)) throw new Error("invalid retrieval id");
62
+ return hash;
63
+ }
64
+
65
+ private dirFor(hash: string): string {
66
+ return join(this.root, "store", "sha256", hash.slice(0, 2), hash.slice(2, 4));
67
+ }
68
+
69
+ private async checkedMetadata(hash: string): Promise<RetrievalMeta> {
70
+ const meta = JSON.parse(await readFile(join(this.dirFor(hash), `${hash}.json`), "utf8")) as RetrievalMeta;
71
+ if (!isRetrievalMeta(meta) || meta.hash !== hash) throw new Error("invalid retrieval metadata");
72
+ return meta;
73
+ }
74
+ }
75
+
76
+ async function atomicPairWrite(dir: string, hash: string, text: string, json: string): Promise<void> {
77
+ const suffix = `${process.pid}-${Date.now()}`;
78
+ const textTmp = join(dir, `${hash}.${suffix}.txt.tmp`);
79
+ const jsonTmp = join(dir, `${hash}.${suffix}.json.tmp`);
80
+ const textPath = join(dir, `${hash}.txt`);
81
+ const jsonPath = join(dir, `${hash}.json`);
82
+ let textFinalized = false;
83
+ let jsonFinalized = false;
84
+ try {
85
+ await writePrivateFile(textTmp, text);
86
+ await writePrivateFile(jsonTmp, json);
87
+ await rename(textTmp, textPath);
88
+ textFinalized = true;
89
+ await chmodPrivate(textPath, PRIVATE_FILE_MODE);
90
+ await rename(jsonTmp, jsonPath);
91
+ jsonFinalized = true;
92
+ await chmodPrivate(jsonPath, PRIVATE_FILE_MODE);
93
+ } catch (err) {
94
+ await rm(textTmp, { force: true }).catch(() => {});
95
+ await rm(jsonTmp, { force: true }).catch(() => {});
96
+ if (textFinalized && !jsonFinalized) await rm(textPath, { force: true }).catch(() => {});
97
+ if (jsonFinalized && !textFinalized) await rm(jsonPath, { force: true }).catch(() => {});
98
+ throw err;
99
+ }
100
+ }
101
+
102
+ function isRetrievalMeta(value: unknown): value is RetrievalMeta {
103
+ if (!value || typeof value !== "object") return false;
104
+ const item = value as RetrievalMeta;
105
+ return /^[a-f0-9]{64}$/.test(item.hash) && typeof item.createdAt === "string" && typeof item.contentKind === "string"
106
+ && typeof item.originalBytes === "number" && typeof item.compressedBytes === "number"
107
+ && typeof item.compressorName === "string" && typeof item.redacted === "boolean"
108
+ && (item.requestId === undefined || typeof item.requestId === "string");
109
+ }
110
+
111
+ function boundedExcerpt(text: string): string {
112
+ const excerpt = text.length > EXCERPT_CHARS ? `${text.slice(0, EXCERPT_CHARS)}\n[TRUNCATED_CONTEXT:${text.length - EXCERPT_CHARS}_CHARS]` : text;
113
+ return `Context excerpt only. Full original content is not persisted.\n${excerpt}`;
114
+ }
@@ -0,0 +1,9 @@
1
+ import { createHash } from "node:crypto";
2
+
3
+ export function sha256(text: string): string {
4
+ return createHash("sha256").update(text).digest("hex");
5
+ }
6
+
7
+ export function shortHash(text: string): string {
8
+ return sha256(text).slice(0, 12);
9
+ }
@@ -0,0 +1,18 @@
1
+ export function byteLength(text: string): number {
2
+ return Buffer.byteLength(text, "utf8");
3
+ }
4
+
5
+ export function stripAnsi(text: string): string {
6
+ return text.replace(/\u001b\[[0-9;?]*[ -/]*[@-~]/g, "");
7
+ }
8
+
9
+ export function truncateValue(value: unknown, max = 240): unknown {
10
+ if (typeof value === "string" && value.length > max) {
11
+ return `${value.slice(0, max)}...[truncated ${value.length - max} chars]`;
12
+ }
13
+ if (Array.isArray(value)) return value.map((item) => truncateValue(item, max));
14
+ if (value && typeof value === "object") {
15
+ return Object.fromEntries(Object.entries(value).map(([k, v]) => [k, truncateValue(v, max)]));
16
+ }
17
+ return value;
18
+ }
@@ -0,0 +1,3 @@
1
+ export function estimateTokens(text: string): number {
2
+ return Math.ceil(text.length / 4);
3
+ }