@polylogicai/polycode 1.1.3 → 1.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +44 -20
- package/bin/polycode.mjs +85 -15
- package/lib/agentic.mjs +160 -11
- package/lib/key-store.mjs +232 -0
- package/lib/paste-aware-prompt.mjs +208 -0
- package/lib/polycode-hosted-client.mjs +100 -0
- package/lib/repl-ui.mjs +3 -1
- package/lib/slash-commands.mjs +33 -2
- package/lib/tools/describe-image.mjs +111 -0
- package/lib/tools/fetch-url.mjs +130 -0
- package/lib/tools/web-search.mjs +107 -0
- package/lib/witness/identity-gate.mjs +123 -0
- package/package.json +1 -1
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
// lib/tools/describe-image.mjs
|
|
2
|
+
// Vision tool. Reads an image from disk, base64-encodes it, sends to a
|
|
3
|
+
// vision-capable model, returns the text description. In hosted mode the
|
|
4
|
+
// request goes through the Polylogic inference proxy so the user does not
|
|
5
|
+
// need any vision-specific key. In BYOK mode we use the user's Groq key
|
|
6
|
+
// against the same endpoint.
|
|
7
|
+
|
|
8
|
+
import { promises as fs } from 'node:fs';
|
|
9
|
+
import { extname } from 'node:path';
|
|
10
|
+
|
|
11
|
+
const MAX_IMAGE_BYTES = 4 * 1024 * 1024;
|
|
12
|
+
const DEFAULT_VISION_MODEL = 'meta-llama/llama-4-scout-17b-16e-instruct';
|
|
13
|
+
const FALLBACK_VISION_MODEL = 'meta-llama/llama-4-scout-17b-16e-instruct';
|
|
14
|
+
|
|
15
|
+
const MIME_BY_EXT = {
|
|
16
|
+
'.png': 'image/png',
|
|
17
|
+
'.jpg': 'image/jpeg',
|
|
18
|
+
'.jpeg': 'image/jpeg',
|
|
19
|
+
'.webp': 'image/webp',
|
|
20
|
+
'.gif': 'image/gif',
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
function mimeFor(path) {
|
|
24
|
+
const ext = extname(path).toLowerCase();
|
|
25
|
+
return MIME_BY_EXT[ext] || null;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export async function describeImage({ path, question }) {
|
|
29
|
+
const stat = await fs.stat(path);
|
|
30
|
+
if (!stat.isFile()) throw new Error(`not a file: ${path}`);
|
|
31
|
+
if (stat.size > MAX_IMAGE_BYTES) {
|
|
32
|
+
throw new Error(`image too large: ${stat.size} bytes (limit ${MAX_IMAGE_BYTES})`);
|
|
33
|
+
}
|
|
34
|
+
const mime = mimeFor(path);
|
|
35
|
+
if (!mime) {
|
|
36
|
+
throw new Error(`unsupported image type: ${extname(path) || '(no extension)'}. Supported: png, jpg, jpeg, webp, gif.`);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const bytes = await fs.readFile(path);
|
|
40
|
+
const dataUrl = `data:${mime};base64,${bytes.toString('base64')}`;
|
|
41
|
+
|
|
42
|
+
const body = {
|
|
43
|
+
model: DEFAULT_VISION_MODEL,
|
|
44
|
+
max_tokens: 700,
|
|
45
|
+
temperature: 0.2,
|
|
46
|
+
messages: [
|
|
47
|
+
{
|
|
48
|
+
role: 'user',
|
|
49
|
+
content: [
|
|
50
|
+
{ type: 'text', text: question || 'Describe this image in detail.' },
|
|
51
|
+
{ type: 'image_url', image_url: { url: dataUrl } },
|
|
52
|
+
],
|
|
53
|
+
},
|
|
54
|
+
],
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
// Routing: BYOK uses Groq directly, hosted mode uses the Polylogic proxy.
|
|
58
|
+
const groqKey = process.env.GROQ_API_KEY || '';
|
|
59
|
+
let res;
|
|
60
|
+
let endpoint;
|
|
61
|
+
let headers;
|
|
62
|
+
if (groqKey) {
|
|
63
|
+
endpoint = 'https://api.groq.com/openai/v1/chat/completions';
|
|
64
|
+
headers = {
|
|
65
|
+
Authorization: `Bearer ${groqKey}`,
|
|
66
|
+
'Content-Type': 'application/json',
|
|
67
|
+
};
|
|
68
|
+
} else {
|
|
69
|
+
const { getOrCreateInstallId } = await import('../polycode-hosted-client.mjs');
|
|
70
|
+
endpoint = process.env.POLYCODE_PROXY_URL || 'https://polylogicai.com/api/polycode/inference';
|
|
71
|
+
headers = {
|
|
72
|
+
'Content-Type': 'application/json',
|
|
73
|
+
'X-Polycode-Install-ID': getOrCreateInstallId(),
|
|
74
|
+
'X-Polycode-Version': 'vision',
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
async function callOnce(modelOverride) {
|
|
79
|
+
const payload = modelOverride ? { ...body, model: modelOverride } : body;
|
|
80
|
+
const r = await fetch(endpoint, { method: 'POST', headers, body: JSON.stringify(payload) });
|
|
81
|
+
const text = await r.text();
|
|
82
|
+
if (!r.ok) {
|
|
83
|
+
const err = new Error(`vision upstream ${r.status}: ${text.slice(0, 300)}`);
|
|
84
|
+
err.status = r.status;
|
|
85
|
+
err.body = text;
|
|
86
|
+
throw err;
|
|
87
|
+
}
|
|
88
|
+
try { return JSON.parse(text); } catch { throw new Error(`vision upstream returned non-JSON: ${text.slice(0, 200)}`); }
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
let completion;
|
|
92
|
+
try {
|
|
93
|
+
completion = await callOnce();
|
|
94
|
+
} catch (err) {
|
|
95
|
+
// Fall back to the older preview model if the maverick model is unavailable
|
|
96
|
+
// on the backend (e.g. proxy allowlist or decommissioned).
|
|
97
|
+
if (err.status === 400 || err.status === 404) {
|
|
98
|
+
completion = await callOnce(FALLBACK_VISION_MODEL);
|
|
99
|
+
} else {
|
|
100
|
+
throw err;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
const content = completion?.choices?.[0]?.message?.content;
|
|
105
|
+
if (typeof content === 'string' && content.length > 0) return content;
|
|
106
|
+
if (Array.isArray(content)) {
|
|
107
|
+
// Some models return content as an array of blocks.
|
|
108
|
+
return content.map((b) => (typeof b === 'string' ? b : b?.text || '')).join('\n').trim();
|
|
109
|
+
}
|
|
110
|
+
throw new Error('vision response had no content');
|
|
111
|
+
}
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
// lib/tools/fetch-url.mjs
|
|
2
|
+
// Fetches a URL from the user's machine and returns its body as text.
|
|
3
|
+
// Supports HTTP(S), follows redirects, converts HTML to readable plain text,
|
|
4
|
+
// returns JSON and text verbatim. Refuses binary content, private network
|
|
5
|
+
// addresses, and non-http protocols.
|
|
6
|
+
|
|
7
|
+
const MAX_BODY_BYTES = 200 * 1024;
|
|
8
|
+
const FETCH_TIMEOUT_MS = 15_000;
|
|
9
|
+
|
|
10
|
+
const USER_AGENT = 'polycode/1.1 (+https://polylogicai.com/polycode)';
|
|
11
|
+
|
|
12
|
+
// Block private network ranges so the tool cannot be used as a server-side
|
|
13
|
+
// request forgery vector against the user's own localhost or LAN. This is a
|
|
14
|
+
// defense-in-depth since polycode is a user-agent, but the LLM might be
|
|
15
|
+
// told to ping an internal endpoint.
|
|
16
|
+
function isForbiddenHost(host) {
|
|
17
|
+
if (!host) return true;
|
|
18
|
+
const h = host.toLowerCase();
|
|
19
|
+
if (h === 'localhost' || h === '127.0.0.1' || h === '::1' || h === '0.0.0.0') return true;
|
|
20
|
+
if (/^10\./.test(h)) return true;
|
|
21
|
+
if (/^192\.168\./.test(h)) return true;
|
|
22
|
+
if (/^172\.(1[6-9]|2\d|3[0-1])\./.test(h)) return true;
|
|
23
|
+
if (/^169\.254\./.test(h)) return true;
|
|
24
|
+
if (/^fc[0-9a-f][0-9a-f]:/.test(h)) return true;
|
|
25
|
+
if (/^fe80:/.test(h)) return true;
|
|
26
|
+
return false;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
// Strip HTML tags and normalize whitespace for a body that the model can
|
|
30
|
+
// actually read. We drop script/style blocks first, then replace tags with
|
|
31
|
+
// newlines, then collapse whitespace runs. This is a best-effort reader, not
|
|
32
|
+
// a full DOM parser.
|
|
33
|
+
function htmlToText(html) {
|
|
34
|
+
let out = html;
|
|
35
|
+
out = out.replace(/<script\b[^>]*>[\s\S]*?<\/script>/gi, ' ');
|
|
36
|
+
out = out.replace(/<style\b[^>]*>[\s\S]*?<\/style>/gi, ' ');
|
|
37
|
+
out = out.replace(/<noscript\b[^>]*>[\s\S]*?<\/noscript>/gi, ' ');
|
|
38
|
+
out = out.replace(/<(?:br|hr|p|div|li|h[1-6]|tr|td|th|section|article)\b[^>]*>/gi, '\n');
|
|
39
|
+
out = out.replace(/<\/(?:p|div|li|h[1-6]|tr|section|article)>/gi, '\n');
|
|
40
|
+
out = out.replace(/<[^>]+>/g, ' ');
|
|
41
|
+
out = out
|
|
42
|
+
.replace(/ /gi, ' ')
|
|
43
|
+
.replace(/&/gi, '&')
|
|
44
|
+
.replace(/</gi, '<')
|
|
45
|
+
.replace(/>/gi, '>')
|
|
46
|
+
.replace(/"/gi, '"')
|
|
47
|
+
.replace(/'/gi, "'");
|
|
48
|
+
out = out.replace(/[ \t]+/g, ' ');
|
|
49
|
+
out = out.replace(/\n[ \t]+/g, '\n');
|
|
50
|
+
out = out.replace(/\n{3,}/g, '\n\n');
|
|
51
|
+
return out.trim();
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export async function fetchUrl(rawUrl) {
|
|
55
|
+
let url;
|
|
56
|
+
try {
|
|
57
|
+
url = new URL(rawUrl);
|
|
58
|
+
} catch {
|
|
59
|
+
throw new Error(`invalid URL: ${rawUrl}`);
|
|
60
|
+
}
|
|
61
|
+
if (url.protocol !== 'http:' && url.protocol !== 'https:') {
|
|
62
|
+
throw new Error(`unsupported protocol: ${url.protocol}`);
|
|
63
|
+
}
|
|
64
|
+
if (isForbiddenHost(url.hostname)) {
|
|
65
|
+
throw new Error(`refusing to fetch private-network or loopback host: ${url.hostname}`);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const controller = new AbortController();
|
|
69
|
+
const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
|
|
70
|
+
let res;
|
|
71
|
+
try {
|
|
72
|
+
res = await fetch(url.toString(), {
|
|
73
|
+
method: 'GET',
|
|
74
|
+
headers: {
|
|
75
|
+
'User-Agent': USER_AGENT,
|
|
76
|
+
Accept: 'text/html,text/plain,application/json,application/xhtml+xml,*/*;q=0.8',
|
|
77
|
+
},
|
|
78
|
+
redirect: 'follow',
|
|
79
|
+
signal: controller.signal,
|
|
80
|
+
});
|
|
81
|
+
} catch (err) {
|
|
82
|
+
clearTimeout(timer);
|
|
83
|
+
if (err.name === 'AbortError') throw new Error(`timeout after ${FETCH_TIMEOUT_MS}ms: ${url.hostname}`);
|
|
84
|
+
throw new Error(`network error: ${err.message}`);
|
|
85
|
+
}
|
|
86
|
+
clearTimeout(timer);
|
|
87
|
+
|
|
88
|
+
const contentType = (res.headers.get('content-type') || '').toLowerCase();
|
|
89
|
+
const isText =
|
|
90
|
+
contentType.startsWith('text/') ||
|
|
91
|
+
contentType.includes('json') ||
|
|
92
|
+
contentType.includes('xml') ||
|
|
93
|
+
contentType === '';
|
|
94
|
+
if (!isText) {
|
|
95
|
+
return `[${res.status} ${res.statusText}] ${url.toString()}\ncontent-type: ${contentType}\n(binary content refused; use describe_image for images)`;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
const reader = res.body?.getReader?.();
|
|
99
|
+
let received = 0;
|
|
100
|
+
const chunks = [];
|
|
101
|
+
if (reader) {
|
|
102
|
+
while (true) {
|
|
103
|
+
const { value, done } = await reader.read();
|
|
104
|
+
if (done) break;
|
|
105
|
+
received += value.byteLength;
|
|
106
|
+
if (received > MAX_BODY_BYTES) {
|
|
107
|
+
chunks.push(value.subarray(0, MAX_BODY_BYTES - (received - value.byteLength)));
|
|
108
|
+
break;
|
|
109
|
+
}
|
|
110
|
+
chunks.push(value);
|
|
111
|
+
}
|
|
112
|
+
} else {
|
|
113
|
+
const buf = Buffer.from(await res.arrayBuffer());
|
|
114
|
+
chunks.push(buf.subarray(0, MAX_BODY_BYTES));
|
|
115
|
+
received = buf.byteLength;
|
|
116
|
+
}
|
|
117
|
+
const raw = Buffer.concat(chunks.map((c) => (c instanceof Buffer ? c : Buffer.from(c)))).toString('utf8');
|
|
118
|
+
|
|
119
|
+
let body;
|
|
120
|
+
if (contentType.includes('json')) {
|
|
121
|
+
body = raw;
|
|
122
|
+
} else if (contentType.includes('html') || /<html[^>]*>/i.test(raw)) {
|
|
123
|
+
body = htmlToText(raw);
|
|
124
|
+
} else {
|
|
125
|
+
body = raw;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
const header = `[${res.status} ${res.statusText}] ${url.toString()}\ncontent-type: ${contentType || 'unknown'}\nbytes: ${received}${received > MAX_BODY_BYTES ? ' (truncated)' : ''}\n---\n`;
|
|
129
|
+
return header + body;
|
|
130
|
+
}
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
// lib/tools/web-search.mjs
|
|
2
|
+
// Web search tool. In hosted mode the request goes to the Polylogic search
|
|
3
|
+
// proxy at polylogicai.com/api/polycode/search, which runs the query against
|
|
4
|
+
// a real search backend (Tavily / Brave) with a server-side key so the user
|
|
5
|
+
// does not need to provision anything. In BYOK mode a user-supplied
|
|
6
|
+
// TAVILY_API_KEY is used directly if present; otherwise we still fall
|
|
7
|
+
// through to the Polylogic proxy.
|
|
8
|
+
//
|
|
9
|
+
// Returns a formatted text block with up to 8 (title, url, snippet) rows so
|
|
10
|
+
// the model can read it as plain context. Never dumps raw HTML.
|
|
11
|
+
|
|
12
|
+
const DEFAULT_PROXY_URL = 'https://polylogicai.com/api/polycode/search';
|
|
13
|
+
const MAX_RESULTS = 8;
|
|
14
|
+
const SEARCH_TIMEOUT_MS = 15_000;
|
|
15
|
+
|
|
16
|
+
function formatResults(results, query) {
|
|
17
|
+
if (!Array.isArray(results) || results.length === 0) {
|
|
18
|
+
return `web_search("${query}")\n(no results)`;
|
|
19
|
+
}
|
|
20
|
+
const lines = [`web_search("${query}")`, ''];
|
|
21
|
+
let i = 1;
|
|
22
|
+
for (const r of results.slice(0, MAX_RESULTS)) {
|
|
23
|
+
const title = String(r.title || r.name || '').slice(0, 200);
|
|
24
|
+
const url = String(r.url || r.link || '').slice(0, 400);
|
|
25
|
+
const snippet = String(r.content || r.snippet || r.description || '').replace(/\s+/g, ' ').slice(0, 300);
|
|
26
|
+
lines.push(`${i}. ${title}`);
|
|
27
|
+
if (url) lines.push(` ${url}`);
|
|
28
|
+
if (snippet) lines.push(` ${snippet}`);
|
|
29
|
+
lines.push('');
|
|
30
|
+
i++;
|
|
31
|
+
}
|
|
32
|
+
return lines.join('\n');
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
async function searchViaTavilyDirect(query, apiKey) {
|
|
36
|
+
const controller = new AbortController();
|
|
37
|
+
const timer = setTimeout(() => controller.abort(), SEARCH_TIMEOUT_MS);
|
|
38
|
+
try {
|
|
39
|
+
const res = await fetch('https://api.tavily.com/search', {
|
|
40
|
+
method: 'POST',
|
|
41
|
+
headers: {
|
|
42
|
+
'Content-Type': 'application/json',
|
|
43
|
+
Authorization: `Bearer ${apiKey}`,
|
|
44
|
+
},
|
|
45
|
+
body: JSON.stringify({
|
|
46
|
+
query,
|
|
47
|
+
max_results: MAX_RESULTS,
|
|
48
|
+
search_depth: 'basic',
|
|
49
|
+
}),
|
|
50
|
+
signal: controller.signal,
|
|
51
|
+
});
|
|
52
|
+
const text = await res.text();
|
|
53
|
+
if (!res.ok) throw new Error(`tavily ${res.status}: ${text.slice(0, 200)}`);
|
|
54
|
+
const data = JSON.parse(text);
|
|
55
|
+
return formatResults(data.results || [], query);
|
|
56
|
+
} finally {
|
|
57
|
+
clearTimeout(timer);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
async function searchViaPolylogicProxy(query) {
|
|
62
|
+
const { getOrCreateInstallId } = await import('../polycode-hosted-client.mjs');
|
|
63
|
+
const endpoint = process.env.POLYCODE_SEARCH_URL || DEFAULT_PROXY_URL;
|
|
64
|
+
const controller = new AbortController();
|
|
65
|
+
const timer = setTimeout(() => controller.abort(), SEARCH_TIMEOUT_MS);
|
|
66
|
+
try {
|
|
67
|
+
const res = await fetch(endpoint, {
|
|
68
|
+
method: 'POST',
|
|
69
|
+
headers: {
|
|
70
|
+
'Content-Type': 'application/json',
|
|
71
|
+
'X-Polycode-Install-ID': getOrCreateInstallId(),
|
|
72
|
+
'X-Polycode-Version': 'search',
|
|
73
|
+
},
|
|
74
|
+
body: JSON.stringify({ query, max_results: MAX_RESULTS }),
|
|
75
|
+
signal: controller.signal,
|
|
76
|
+
});
|
|
77
|
+
const text = await res.text();
|
|
78
|
+
if (!res.ok) {
|
|
79
|
+
let msg;
|
|
80
|
+
try { msg = JSON.parse(text).error; } catch { msg = text; }
|
|
81
|
+
throw new Error(`polycode search proxy ${res.status}: ${String(msg).slice(0, 300)}`);
|
|
82
|
+
}
|
|
83
|
+
const data = JSON.parse(text);
|
|
84
|
+
if (Array.isArray(data.results)) return formatResults(data.results, query);
|
|
85
|
+
if (typeof data === 'string') return data;
|
|
86
|
+
return formatResults(data, query);
|
|
87
|
+
} finally {
|
|
88
|
+
clearTimeout(timer);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
export async function webSearch(query) {
|
|
93
|
+
const q = String(query || '').trim();
|
|
94
|
+
if (!q) throw new Error('empty query');
|
|
95
|
+
|
|
96
|
+
const byokKey = process.env.TAVILY_API_KEY || '';
|
|
97
|
+
if (byokKey) {
|
|
98
|
+
try {
|
|
99
|
+
return await searchViaTavilyDirect(q, byokKey);
|
|
100
|
+
} catch (err) {
|
|
101
|
+
// Fall through to the hosted proxy on direct-provider failure.
|
|
102
|
+
const hosted = await searchViaPolylogicProxy(q);
|
|
103
|
+
return `${hosted}\n\n(note: direct tavily call failed: ${err.message})`;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
return searchViaPolylogicProxy(q);
|
|
107
|
+
}
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
// lib/witness/identity-gate.mjs
|
|
2
|
+
// Deterministic post-generation gate that enforces polycode's brand identity.
|
|
3
|
+
// The system prompt already tells the model "you are polycode, built by
|
|
4
|
+
// Polylogic AI" with explicit negatives, but a language model's identity is
|
|
5
|
+
// fragile: under context poisoning, casual probing, or a clever adversarial
|
|
6
|
+
// frame, the model can still leak its pretraining creator ("I was created by
|
|
7
|
+
// Anthropic", "I'm Claude", "my company is OpenAI"). Substrate cannot witness
|
|
8
|
+
// its own brand.
|
|
9
|
+
//
|
|
10
|
+
// This gate runs on every user-facing text message the model produces. It
|
|
11
|
+
// detects common identity-leak shapes and rewrites them to the canonical
|
|
12
|
+
// polycode answer. The rewritten text is what the user sees.
|
|
13
|
+
|
|
14
|
+
const CANONICAL = "I'm polycode, built by Polylogic AI.";
|
|
15
|
+
const CANONICAL_LONG =
|
|
16
|
+
"I'm polycode, an agentic coding CLI built by Polylogic AI. I'm not affiliated with Anthropic, OpenAI, Moonshot, Groq, Google, xAI, or Meta.";
|
|
17
|
+
|
|
18
|
+
// Provider and model brand names that must never appear as polycode's own
|
|
19
|
+
// identity. Matched case-insensitively as whole words.
|
|
20
|
+
const FORBIDDEN_BRANDS = [
|
|
21
|
+
'Anthropic',
|
|
22
|
+
'OpenAI',
|
|
23
|
+
'Moonshot(?: AI)?',
|
|
24
|
+
'Groq',
|
|
25
|
+
'Google(?: DeepMind)?',
|
|
26
|
+
'DeepMind',
|
|
27
|
+
'xAI',
|
|
28
|
+
'Meta(?: AI)?',
|
|
29
|
+
'Microsoft',
|
|
30
|
+
'Mistral(?: AI)?',
|
|
31
|
+
'Cohere',
|
|
32
|
+
];
|
|
33
|
+
|
|
34
|
+
// Forbidden model names that must never appear as polycode's own identity.
|
|
35
|
+
const FORBIDDEN_MODELS = [
|
|
36
|
+
'Claude(?:\\s*\\d(?:\\.\\d)?)?(?:\\s+(?:Opus|Sonnet|Haiku))?',
|
|
37
|
+
'ChatGPT',
|
|
38
|
+
'GPT-?\\d?(?:\\.\\d)?(?:o|o-mini|-turbo)?',
|
|
39
|
+
'Gemini(?:\\s*\\d(?:\\.\\d)?)?(?:\\s+(?:Pro|Flash|Ultra))?',
|
|
40
|
+
'PaLM',
|
|
41
|
+
'Bard',
|
|
42
|
+
'Grok',
|
|
43
|
+
'LLaMA(?:\\s*\\d(?:\\.\\d)?)?',
|
|
44
|
+
'Kimi(?:\\s*K\\d)?',
|
|
45
|
+
'Mistral',
|
|
46
|
+
'Mixtral',
|
|
47
|
+
'Phi-?\\d?',
|
|
48
|
+
];
|
|
49
|
+
|
|
50
|
+
const BRAND_OR_MODEL = [...FORBIDDEN_BRANDS, ...FORBIDDEN_MODELS].join('|');
|
|
51
|
+
|
|
52
|
+
// Patterns that claim origin/creator/identity. Each one, if matched, triggers
|
|
53
|
+
// a rewrite to the canonical answer. The patterns are ordered by specificity
|
|
54
|
+
// so the most informative rewrite wins.
|
|
55
|
+
// Shorthand for "I am" / "I'm" / "I was" / "I will be" etc. The "'m"
|
|
56
|
+
// form has no whitespace between "I" and the apostrophe, so we match it
|
|
57
|
+
// with an optional apostrophe-m path and a mandatory space after.
|
|
58
|
+
const I_AM = `I(?:\\s+(?:am|was|have\\s+been|will\\s+be)|'m|\\s+am)\\s+`;
|
|
59
|
+
|
|
60
|
+
const LEAK_PATTERNS = [
|
|
61
|
+
{
|
|
62
|
+
name: 'created_by_brand',
|
|
63
|
+
// "I was created by Anthropic", "made by OpenAI", "built by Google", etc.
|
|
64
|
+
regex: new RegExp(
|
|
65
|
+
`(?:${I_AM}(?:created|made|built|developed|trained|designed)|I\\s+come\\s+from|my\\s+(?:creator|maker|developer|company|owner|lab|team)\\s+(?:is|was))\\s+(?:by\\s+)?(?:${BRAND_OR_MODEL})`,
|
|
66
|
+
'gi'
|
|
67
|
+
),
|
|
68
|
+
},
|
|
69
|
+
{
|
|
70
|
+
name: 'i_am_model',
|
|
71
|
+
// "I am Claude", "I'm ChatGPT", "I am GPT-4"
|
|
72
|
+
regex: new RegExp(
|
|
73
|
+
`${I_AM}(?:an?\\s+)?(?:${FORBIDDEN_MODELS.join('|')})\\b`,
|
|
74
|
+
'gi'
|
|
75
|
+
),
|
|
76
|
+
},
|
|
77
|
+
{
|
|
78
|
+
name: 'product_of_brand',
|
|
79
|
+
// "I'm a product of Anthropic", "an assistant from OpenAI"
|
|
80
|
+
regex: new RegExp(
|
|
81
|
+
`\\b(?:product|assistant|model|system|AI|chatbot)\\s+(?:of|from|by|made\\s+by|built\\s+by|created\\s+by)\\s+(?:${FORBIDDEN_BRANDS.join('|')})`,
|
|
82
|
+
'gi'
|
|
83
|
+
),
|
|
84
|
+
},
|
|
85
|
+
{
|
|
86
|
+
name: 'trained_by_brand',
|
|
87
|
+
// "trained by Anthropic", "Anthropic trained me"
|
|
88
|
+
regex: new RegExp(
|
|
89
|
+
`(?:trained|developed|pretrained)\\s+by\\s+(?:${FORBIDDEN_BRANDS.join('|')})|(?:${FORBIDDEN_BRANDS.join('|')})\\s+(?:trained|developed|pretrained|created|built|made)\\s+me`,
|
|
90
|
+
'gi'
|
|
91
|
+
),
|
|
92
|
+
},
|
|
93
|
+
];
|
|
94
|
+
|
|
95
|
+
// Run the gate. Returns:
|
|
96
|
+
// { ok: true, text } — text passed, unchanged
|
|
97
|
+
// { ok: false, text: canonical, leak: {...}} — text failed, canonical returned
|
|
98
|
+
//
|
|
99
|
+
// The caller should always emit `text` regardless of `ok`.
|
|
100
|
+
export function checkIdentity(text) {
|
|
101
|
+
if (!text || typeof text !== 'string') {
|
|
102
|
+
return { ok: true, text: text || '' };
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
for (const pattern of LEAK_PATTERNS) {
|
|
106
|
+
pattern.regex.lastIndex = 0;
|
|
107
|
+
if (pattern.regex.test(text)) {
|
|
108
|
+
// Short messages (under ~80 chars) get the short canonical answer.
|
|
109
|
+
// Long messages get the long canonical answer so the user understands
|
|
110
|
+
// the correction was intentional.
|
|
111
|
+
const canonical = text.length < 80 ? CANONICAL : CANONICAL_LONG;
|
|
112
|
+
return {
|
|
113
|
+
ok: false,
|
|
114
|
+
text: canonical,
|
|
115
|
+
leak: { pattern: pattern.name, original: text },
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
return { ok: true, text };
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
export const IDENTITY_GATE_CANONICAL = CANONICAL;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@polylogicai/polycode",
|
|
3
|
-
"version": "1.1.
|
|
3
|
+
"version": "1.1.5",
|
|
4
4
|
"description": "An agentic coding CLI. Runs on your machine with your keys. Every turn is appended to a SHA-256 chained session log, so your history is auditable, replayable, and portable.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "bin/polycode.mjs",
|