@remnic/export-weclone 1.0.1 → 9.3.515

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -91,6 +91,10 @@ declare function sweepPii(records: TrainingExportRecord[]): PrivacySweepResult;
91
91
  * compatible with WeClone / LLaMA Factory.
92
92
  */
93
93
 
94
+ interface TrainingExportRegistry {
95
+ getTrainingExportAdapter(name: string): TrainingExportAdapter | undefined;
96
+ registerTrainingExportAdapter(adapter: TrainingExportAdapter): void;
97
+ }
94
98
  /**
95
99
  * Idempotently register the WeClone adapter with the core training-export
96
100
  * registry. Callable multiple times without throwing (CLAUDE.md #13:
@@ -100,6 +104,6 @@ declare function sweepPii(records: TrainingExportRecord[]): PrivacySweepResult;
100
104
  * Returns true when the adapter was newly registered, false when an adapter
101
105
  * with the same name already exists.
102
106
  */
103
- declare function ensureWecloneExportAdapterRegistered(): boolean;
107
+ declare function ensureWecloneExportAdapterRegistered(registry?: TrainingExportRegistry): boolean;
104
108
 
105
- export { type PrivacySweepResult, type StyleMarkers, type SynthesizerOptions, ensureWecloneExportAdapterRegistered, extractStyleMarkers, sweepPii, synthesizeTrainingPairs, wecloneExportAdapter };
109
+ export { type PrivacySweepResult, type StyleMarkers, type SynthesizerOptions, type TrainingExportRegistry, ensureWecloneExportAdapterRegistered, extractStyleMarkers, sweepPii, synthesizeTrainingPairs, wecloneExportAdapter };
package/dist/index.js CHANGED
@@ -63,7 +63,7 @@ var CATEGORY_TO_TEMPLATE = {
63
63
  moment: "personal"
64
64
  };
65
65
  function synthesizeTrainingPairs(records, options) {
66
- const maxPairs = options?.maxPairsPerRecord ?? DEFAULT_MAX_PAIRS;
66
+ const maxPairs = resolveMaxPairsPerRecord(options?.maxPairsPerRecord);
67
67
  const style = options?.styleMarkers;
68
68
  const result = [];
69
69
  for (let i = 0; i < records.length; i++) {
@@ -91,6 +91,13 @@ function synthesizeTrainingPairs(records, options) {
91
91
  }
92
92
  return result;
93
93
  }
94
+ function resolveMaxPairsPerRecord(maxPairsPerRecord) {
95
+ const maxPairs = maxPairsPerRecord ?? DEFAULT_MAX_PAIRS;
96
+ if (!Number.isFinite(maxPairs) || !Number.isInteger(maxPairs) || maxPairs <= 0) {
97
+ throw new RangeError("maxPairsPerRecord must be a finite positive integer");
98
+ }
99
+ return maxPairs;
100
+ }
94
101
  function resolveTemplateKey(category) {
95
102
  if (!category) return "";
96
103
  return CATEGORY_TO_TEMPLATE[category.toLowerCase()] ?? "";
@@ -250,6 +257,35 @@ function findCommonPhrases(samples) {
250
257
  }
251
258
 
252
259
  // src/privacy.ts
260
+ function normalizeCardCandidate(value) {
261
+ return value.replace(/[-\s]/g, "");
262
+ }
263
+ function passesLuhn(value) {
264
+ let sum = 0;
265
+ let doubleDigit = false;
266
+ for (let i = value.length - 1; i >= 0; i -= 1) {
267
+ let digit = Number(value[i]);
268
+ if (!Number.isInteger(digit)) return false;
269
+ if (doubleDigit) {
270
+ digit *= 2;
271
+ if (digit > 9) digit -= 9;
272
+ }
273
+ sum += digit;
274
+ doubleDigit = !doubleDigit;
275
+ }
276
+ return sum > 0 && sum % 10 === 0;
277
+ }
278
+ function isCreditCardCandidate(match) {
279
+ const digits = normalizeCardCandidate(match);
280
+ return digits.length >= 13 && digits.length <= 19 && passesLuhn(digits);
281
+ }
282
+ var CREDIT_CARD_PATTERNS = [19, 18, 17, 16, 15, 14, 13].map(
283
+ (digitCount) => ({
284
+ name: "credit_card",
285
+ regex: new RegExp(`\\b\\d(?:[-\\s]?\\d){${digitCount - 1}}\\b`, "g"),
286
+ validate: isCreditCardCandidate
287
+ })
288
+ );
253
289
  var PII_PATTERNS = [
254
290
  {
255
291
  // Email: user@domain.tld
@@ -261,11 +297,10 @@ var PII_PATTERNS = [
261
297
  name: "ssn",
262
298
  regex: /\b\d{3}-\d{2}-\d{4}\b/g
263
299
  },
264
- {
265
- // Credit card: 4 groups of 4 digits separated by dashes or spaces
266
- name: "credit_card",
267
- regex: /\b\d{4}[-\s]\d{4}[-\s]\d{4}[-\s]\d{4}\b/g
268
- },
300
+ // Credit card: 13-19 digits, optionally separated by dashes or spaces.
301
+ // Try longest candidates first so valid 19-digit cards are preserved while
302
+ // shorter cards next to numeric metadata can still be reconsidered.
303
+ ...CREDIT_CARD_PATTERNS,
269
304
  {
270
305
  // IP address: four octets 0-255
271
306
  name: "ip_address",
@@ -293,16 +328,16 @@ function sweepPii(records) {
293
328
  if (!value) continue;
294
329
  for (const pattern of PII_PATTERNS) {
295
330
  pattern.regex.lastIndex = 0;
296
- if (pattern.regex.test(value)) {
297
- pattern.regex.lastIndex = 0;
298
- value = value.replace(pattern.regex, "[REDACTED]");
331
+ value = value.replace(pattern.regex, (match) => {
332
+ if (pattern.validate && !pattern.validate(match)) return match;
299
333
  recordHasRedaction.add(idx);
300
334
  redactionDetails.push({
301
335
  index: idx,
302
336
  field,
303
337
  pattern: pattern.name
304
338
  });
305
- }
339
+ return "[REDACTED]";
340
+ });
306
341
  }
307
342
  cleaned[field] = value;
308
343
  }
@@ -316,11 +351,14 @@ function sweepPii(records) {
316
351
  }
317
352
 
318
353
  // src/index.ts
319
- function ensureWecloneExportAdapterRegistered() {
320
- if (getTrainingExportAdapter(wecloneExportAdapter.name) !== void 0) {
354
+ function ensureWecloneExportAdapterRegistered(registry = {
355
+ getTrainingExportAdapter,
356
+ registerTrainingExportAdapter
357
+ }) {
358
+ if (registry.getTrainingExportAdapter(wecloneExportAdapter.name) !== void 0) {
321
359
  return false;
322
360
  }
323
- registerTrainingExportAdapter(wecloneExportAdapter);
361
+ registry.registerTrainingExportAdapter(wecloneExportAdapter);
324
362
  return true;
325
363
  }
326
364
  try {
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/index.ts","../src/adapter.ts","../src/synthesizer.ts","../src/style-extractor.ts","../src/privacy.ts"],"sourcesContent":["/**\n * @remnic/export-weclone\n *\n * WeClone-specific training-data export adapter that converts\n * Remnic memories into Alpaca-format fine-tuning datasets\n * compatible with WeClone / LLaMA Factory.\n */\n\nimport {\n getTrainingExportAdapter,\n registerTrainingExportAdapter,\n} from \"@remnic/core\";\n\nimport { wecloneExportAdapter } from \"./adapter.js\";\n\nexport { wecloneExportAdapter } from \"./adapter.js\";\nexport { synthesizeTrainingPairs, type SynthesizerOptions } from \"./synthesizer.js\";\nexport { extractStyleMarkers, type StyleMarkers } from \"./style-extractor.js\";\nexport { sweepPii, type PrivacySweepResult } from \"./privacy.js\";\n\n/**\n * Idempotently register the WeClone adapter with the core training-export\n * registry. Callable multiple times without throwing (CLAUDE.md #13:\n * secondary calls must not crash host processes that pre-register the\n * adapter for test fixtures).\n *\n * Returns true when the adapter was newly registered, false when an adapter\n * with the same name already exists.\n */\nexport function ensureWecloneExportAdapterRegistered(): boolean {\n if (getTrainingExportAdapter(wecloneExportAdapter.name) !== undefined) {\n return false;\n }\n registerTrainingExportAdapter(wecloneExportAdapter);\n return true;\n}\n\n// Side-effect registration: importing this module registers the adapter.\n// Callers that need to manage registration manually (e.g. tests that call\n// `clearTrainingExportAdapters()`) can re-invoke\n// `ensureWecloneExportAdapterRegistered()` after clearing.\n//\n// The try/catch keeps import-time errors from breaking unrelated callers —\n// the adapter surfaces `formatRecords` purely, so a failure here would be\n// surprising, but defensive coding keeps CLI startup resilient.\ntry {\n ensureWecloneExportAdapterRegistered();\n} catch {\n // Swallow — explicit callers can re-invoke ensureWecloneExportAdapterRegistered().\n}\n","/**\n * WeClone Alpaca-format training export adapter.\n *\n * Converts TrainingExportRecord[] into the JSON format that\n * WeClone / LLaMA Factory expects for fine-tuning:\n *\n * [{ \"instruction\": \"...\", \"input\": \"\", \"output\": \"...\" }, ...]\n *\n * Only the three Alpaca fields are emitted; Remnic-specific\n * metadata (category, confidence, sourceIds) is stripped.\n */\n\nimport type { TrainingExportAdapter, TrainingExportRecord } from \"@remnic/core\";\n\nexport const wecloneExportAdapter: TrainingExportAdapter = {\n name: \"weclone\",\n fileExtension: \".json\",\n\n formatRecords(records: TrainingExportRecord[]): string {\n const alpacaRecords = records.map((r) => ({\n instruction: r.instruction,\n input: r.input,\n output: r.output,\n }));\n return JSON.stringify(alpacaRecords, null, 2);\n },\n};\n","/**\n * Training-pair synthesizer.\n *\n * Converts Remnic's flat TrainingExportRecord[] — where\n * `instruction` is a natural-language description and\n * `category` identifies the memory type — into natural\n * conversational question-answer pairs suitable for\n * WeClone / LLaMA Factory fine-tuning.\n *\n * Uses template-based question generation (no LLM calls).\n */\n\nimport type { TrainingExportRecord } from \"@remnic/core\";\nimport type { StyleMarkers } from \"./style-extractor.js\";\n\nexport interface SynthesizerOptions {\n styleMarkers?: StyleMarkers;\n maxPairsPerRecord?: number;\n}\n\n/** Default limit for pairs generated per input record. */\nconst DEFAULT_MAX_PAIRS = 1;\n\n/**\n * Question templates keyed by template group.\n * Each array provides variety; the synthesizer picks\n * based on record index for deterministic output.\n */\nconst QUESTION_TEMPLATES: Record<string, string[]> = {\n preferences: [\n \"What kind of {topic} do you like?\",\n \"What's your preference for {topic}?\",\n \"What are your favorite {topic}?\",\n ],\n opinions: [\n \"What do you think about {topic}?\",\n \"How do you feel about {topic}?\",\n \"What's your opinion on {topic}?\",\n ],\n expertise: [\n \"Tell me about {topic}.\",\n \"What do you know about {topic}?\",\n \"Can you explain {topic}?\",\n ],\n personal: [\n \"Can you tell me about your {topic}?\",\n \"Tell me about your {topic}.\",\n \"What can you share about your {topic}?\",\n ],\n};\n\nconst DEFAULT_TEMPLATES = [\n \"Tell me about {topic}.\",\n \"What can you share about {topic}?\",\n];\n\n/**\n * Maps record.category values (from core converter) to\n * QUESTION_TEMPLATES keys. Categories not listed here\n * fall through to DEFAULT_TEMPLATES.\n */\nconst CATEGORY_TO_TEMPLATE: Record<string, string> = {\n preference: \"preferences\",\n fact: \"expertise\",\n entity: \"expertise\",\n skill: \"expertise\",\n correction: \"opinions\",\n decision: \"opinions\",\n principle: \"opinions\",\n rule: \"opinions\",\n personal: \"personal\",\n relationship: \"personal\",\n commitment: \"personal\",\n moment: \"personal\",\n};\n\n/**\n * Synthesize natural conversational training pairs from\n * category-tagged memory records.\n */\nexport function synthesizeTrainingPairs(\n records: TrainingExportRecord[],\n options?: SynthesizerOptions,\n): TrainingExportRecord[] {\n const maxPairs = options?.maxPairsPerRecord ?? DEFAULT_MAX_PAIRS;\n const style = options?.styleMarkers;\n const result: TrainingExportRecord[] = [];\n\n for (let i = 0; i < records.length; i++) {\n const record = records[i];\n const templateKey = resolveTemplateKey(record.category);\n const topic = extractTopic(record.instruction);\n const templates = QUESTION_TEMPLATES[templateKey] ?? DEFAULT_TEMPLATES;\n\n const pairCount = Math.min(maxPairs, templates.length);\n\n for (let j = 0; j < pairCount; j++) {\n const templateIndex = (i + j) % templates.length;\n const question = templates[templateIndex].replace(\"{topic}\", topic);\n let output = record.output;\n\n if (style?.usesLowercase) {\n output = output.toLowerCase();\n }\n\n result.push({\n instruction: question,\n input: \"\",\n output,\n category: record.category,\n confidence: record.confidence,\n sourceIds: record.sourceIds,\n });\n }\n }\n\n return result;\n}\n\n// ── Internals ────────────────────────────────────────────\n\n/**\n * Resolve a record's category field to a QUESTION_TEMPLATES key.\n * Falls back to empty string (which triggers DEFAULT_TEMPLATES).\n */\nfunction resolveTemplateKey(category: string | undefined): string {\n if (!category) return \"\";\n return CATEGORY_TO_TEMPLATE[category.toLowerCase()] ?? \"\";\n}\n\n/**\n * Extract a human-readable topic from the instruction string.\n *\n * The core converter produces instructions like:\n * \"Recall a factual memory (food, cooking)\"\n * \"Recall a user preference\"\n *\n * When parenthesized tags are present, use them as the topic.\n * Otherwise fall back to \"this\".\n */\nfunction extractTopic(instruction: string): string {\n const tagMatch = instruction.match(/\\(([^()]+)\\)/);\n if (tagMatch) {\n return tagMatch[1].trim().toLowerCase();\n }\n return \"this\";\n}\n","/**\n * Communication style marker extraction.\n *\n * Analyzes text samples using simple heuristics to produce\n * a StyleMarkers profile. No LLM calls — pure regex and\n * counting.\n */\n\nexport interface StyleMarkers {\n avgSentenceLength: number;\n usesEmoji: boolean;\n formality: \"formal\" | \"casual\" | \"mixed\";\n usesLowercase: boolean;\n commonPhrases: string[];\n}\n\n/**\n * Regex matching most common emoji code-point ranges.\n * Covers Emoticons, Dingbats, Transport/Map symbols,\n * Misc symbols, and supplemental blocks.\n */\nconst EMOJI_RE =\n /[\\u{1F600}-\\u{1F64F}\\u{1F300}-\\u{1F5FF}\\u{1F680}-\\u{1F6FF}\\u{1F1E0}-\\u{1F1FF}\\u{2600}-\\u{27BF}\\u{2702}-\\u{27B0}\\u{FE00}-\\u{FE0F}\\u{1FA00}-\\u{1FA6F}\\u{1FA70}-\\u{1FAFF}\\u{2328}\\u{23CF}\\u{23E9}-\\u{23F3}\\u{23F8}-\\u{23FA}\\u{200D}\\u{20E3}\\u{FE0F}\\u{E0020}-\\u{E007F}\\u{2B50}\\u{2B55}\\u{2934}\\u{2935}\\u{25AA}-\\u{25FE}\\u{2600}-\\u{26FF}\\u{2700}-\\u{27BF}\\u{231A}\\u{231B}\\u{23E9}-\\u{23F3}\\u{23F8}-\\u{23FA}\\u{25FB}-\\u{25FE}\\u{2614}\\u{2615}\\u{2648}-\\u{2653}\\u{267F}\\u{2693}\\u{26A1}\\u{26AA}\\u{26AB}\\u{26BD}\\u{26BE}\\u{26C4}\\u{26C5}\\u{26CE}\\u{26D4}\\u{26EA}\\u{26F2}\\u{26F3}\\u{26F5}\\u{26FA}\\u{26FD}\\u{2702}\\u{2705}\\u{2708}-\\u{270D}\\u{270F}\\u{2712}\\u{2714}\\u{2716}\\u{271D}\\u{2721}\\u{2728}\\u{2733}\\u{2734}\\u{2744}\\u{2747}\\u{274C}\\u{274E}\\u{2753}-\\u{2755}\\u{2757}\\u{2763}\\u{2764}\\u{2795}-\\u{2797}\\u{27A1}\\u{27B0}\\u{27BF}\\u{2934}\\u{2935}]/u;\n\n/** Words/phrases that signal formal register. */\nconst FORMAL_MARKERS = [\n \"furthermore\",\n \"however\",\n \"therefore\",\n \"moreover\",\n \"consequently\",\n \"nevertheless\",\n \"in addition\",\n \"accordingly\",\n \"subsequently\",\n \"regarding\",\n \"pertaining\",\n \"shall\",\n \"hereby\",\n \"whereas\",\n \"notwithstanding\",\n \"henceforth\",\n \"aforementioned\",\n \"please consider\",\n \"would like to\",\n \"i would\",\n \"appreciation\",\n \"recommendations\",\n \"thoroughly\",\n \"documentation\",\n];\n\n/** Words/phrases that signal casual register. */\nconst CASUAL_MARKERS = [\n \"gonna\",\n \"wanna\",\n \"kinda\",\n \"sorta\",\n \"gotta\",\n \"dunno\",\n \"lemme\",\n \"yeah\",\n \"yep\",\n \"nah\",\n \"lol\",\n \"omg\",\n \"tbh\",\n \"imo\",\n \"btw\",\n \"nope\",\n \"cuz\",\n \"tho\",\n \"ain't\",\n \"y'all\",\n \"awesome\",\n \"cool\",\n \"dude\",\n \"bro\",\n \"bruh\",\n];\n\n/** Minimum occurrences for a phrase to count as \"common\". */\nconst MIN_PHRASE_FREQUENCY = 2;\n\n/** Maximum number of common phrases to return. */\nconst MAX_COMMON_PHRASES = 10;\n\n/**\n * Analyse text samples and extract communication style markers.\n */\nexport function extractStyleMarkers(samples: string[]): StyleMarkers {\n if (samples.length === 0) {\n return {\n avgSentenceLength: 0,\n usesEmoji: false,\n formality: \"mixed\",\n usesLowercase: false,\n commonPhrases: [],\n };\n }\n\n const joined = samples.join(\" \");\n\n return {\n avgSentenceLength: calcAvgSentenceLength(joined),\n usesEmoji: detectEmoji(joined),\n formality: detectFormality(joined),\n usesLowercase: detectLowercase(joined),\n commonPhrases: findCommonPhrases(samples),\n };\n}\n\n// ── Internals ────────────────────────────────────────────\n\nfunction calcAvgSentenceLength(text: string): number {\n // Split on sentence-ending punctuation, filter empties\n const sentences = text\n .split(/[.!?]+/)\n .map((s) => s.trim())\n .filter((s) => s.length > 0);\n\n if (sentences.length === 0) return 0;\n\n const totalWords = sentences.reduce((sum, s) => {\n const words = s.split(/\\s+/).filter((w) => w.length > 0);\n return sum + words.length;\n }, 0);\n\n return Math.round((totalWords / sentences.length) * 10) / 10;\n}\n\nfunction detectEmoji(text: string): boolean {\n return EMOJI_RE.test(text);\n}\n\nfunction detectFormality(text: string): \"formal\" | \"casual\" | \"mixed\" {\n const lower = text.toLowerCase();\n\n let formalScore = 0;\n for (const marker of FORMAL_MARKERS) {\n // Word-boundary matching prevents false positives\n // (e.g., \"tho\" matching inside \"those\" or \"method\")\n if (new RegExp(`\\\\b${marker}\\\\b`, \"i\").test(lower)) formalScore++;\n }\n\n let casualScore = 0;\n for (const marker of CASUAL_MARKERS) {\n if (new RegExp(`\\\\b${marker}\\\\b`, \"i\").test(lower)) casualScore++;\n }\n\n // Threshold: need at least 2 markers to declare a style\n const THRESHOLD = 2;\n\n if (formalScore >= THRESHOLD && formalScore > casualScore) return \"formal\";\n if (casualScore >= THRESHOLD && casualScore > formalScore) return \"casual\";\n return \"mixed\";\n}\n\nfunction detectLowercase(text: string): boolean {\n // Split into sentences and check what fraction start with lowercase\n const sentences = text\n .split(/[.!?]+/)\n .map((s) => s.trim())\n .filter((s) => s.length > 0);\n\n if (sentences.length === 0) return false;\n\n const lowercaseStarts = sentences.filter((s) => {\n const firstChar = s.charAt(0);\n return firstChar === firstChar.toLowerCase() && firstChar !== firstChar.toUpperCase();\n }).length;\n\n // Majority (>50%) of sentences start lowercase\n return lowercaseStarts / sentences.length > 0.5;\n}\n\n/**\n * Check whether a character is alphanumeric (ASCII a-z, A-Z, 0-9) using\n * code-point comparison. Pure function — no regex, no backtracking.\n */\nfunction isAlnum(ch: string): boolean {\n const c = ch.charCodeAt(0);\n return (\n (c >= 48 && c <= 57) || // 0-9\n (c >= 65 && c <= 90) || // A-Z\n (c >= 97 && c <= 122) // a-z\n );\n}\n\n/**\n * Strip leading and trailing non-alphanumeric characters from `word` using\n * a single linear scan on each side. This replaces the previous\n * `/^[^a-zA-Z0-9]+/` / `/[^a-zA-Z0-9]+$/` regexes, which CodeQL flagged as\n * polynomial ReDoS on uncontrolled input (e.g. long `///...///` runs).\n */\nfunction trimNonAlnum(word: string): string {\n let start = 0;\n let end = word.length;\n while (start < end && !isAlnum(word.charAt(start))) start++;\n while (end > start && !isAlnum(word.charAt(end - 1))) end--;\n return start === 0 && end === word.length ? word : word.slice(start, end);\n}\n\nfunction findCommonPhrases(samples: string[]): string[] {\n const phraseCount = new Map<string, number>();\n\n for (const sample of samples) {\n // Tokenize: split on whitespace, strip edge punctuation with a linear\n // scan (no regex) to eliminate the polynomial backtracking that the\n // previous `replace(/^[^a-zA-Z0-9]+/, \"\")` chain exposed.\n const words = sample\n .split(/\\s+/)\n .map((w) => trimNonAlnum(w))\n .filter((w) => w.length > 0);\n\n // Build 2-gram and 3-gram phrases\n const seenInSample = new Set<string>();\n for (let ngramSize = 2; ngramSize <= 3; ngramSize++) {\n for (let i = 0; i <= words.length - ngramSize; i++) {\n const phrase = words.slice(i, i + ngramSize).join(\" \").toLowerCase();\n // Only count once per sample to avoid inflating from repetition within one sample\n if (!seenInSample.has(phrase)) {\n seenInSample.add(phrase);\n phraseCount.set(phrase, (phraseCount.get(phrase) ?? 0) + 1);\n }\n }\n }\n }\n\n // Filter by minimum frequency and sort by count descending, then alphabetical for stability\n return [...phraseCount.entries()]\n .filter(([, count]) => count >= MIN_PHRASE_FREQUENCY)\n .sort((a, b) => {\n if (b[1] !== a[1]) return b[1] - a[1];\n return a[0].localeCompare(b[0]);\n })\n .slice(0, MAX_COMMON_PHRASES)\n .map(([phrase]) => phrase);\n}\n","/**\n * PII privacy sweep for training export records.\n *\n * Belt-and-suspenders check that runs after Remnic's own\n * privacy controls. Scans instruction, input, and output\n * fields for common PII patterns and replaces matches with\n * [REDACTED].\n */\n\nimport type { TrainingExportRecord } from \"@remnic/core\";\n\nexport interface PrivacySweepResult {\n cleanRecords: TrainingExportRecord[];\n redactedCount: number;\n redactionDetails: { index: number; field: string; pattern: string }[];\n}\n\ninterface PiiPattern {\n name: string;\n regex: RegExp;\n}\n\n/**\n * Ordered list of PII patterns.\n *\n * Order matters: more specific patterns (SSN, credit card)\n * come before broader ones (phone) to avoid partial matches.\n */\nconst PII_PATTERNS: PiiPattern[] = [\n {\n // Email: user@domain.tld\n name: \"email\",\n regex: /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}/g,\n },\n {\n // SSN: 123-45-6789 (exactly 3-2-4 digit groups)\n name: \"ssn\",\n regex: /\\b\\d{3}-\\d{2}-\\d{4}\\b/g,\n },\n {\n // Credit card: 4 groups of 4 digits separated by dashes or spaces\n name: \"credit_card\",\n regex: /\\b\\d{4}[-\\s]\\d{4}[-\\s]\\d{4}[-\\s]\\d{4}\\b/g,\n },\n {\n // IP address: four octets 0-255\n name: \"ip_address\",\n regex: /\\b(?:(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\b/g,\n },\n {\n // Phone: optional +1- prefix, then 3-3-4 with dashes, dots, or spaces\n // Also matches (555) 123-4567 format\n name: \"phone\",\n regex: /(?:\\+\\d{1,3}[-.\\s]?)?\\(?\\d{3}\\)?[-.\\s]\\d{3}[-.\\s]\\d{4}\\b/g,\n },\n];\n\nconst SCANNED_FIELDS: (keyof Pick<TrainingExportRecord, \"instruction\" | \"input\" | \"output\">)[] = [\n \"instruction\",\n \"input\",\n \"output\",\n];\n\n/**\n * Scan and redact PII from training export records.\n *\n * Returns a new array of cleaned records, leaving the originals\n * unmodified. The `redactedCount` is the number of records that\n * had at least one redaction. `redactionDetails` lists every\n * individual match with its record index, field, and pattern name.\n */\nexport function sweepPii(records: TrainingExportRecord[]): PrivacySweepResult {\n const redactionDetails: PrivacySweepResult[\"redactionDetails\"] = [];\n const recordHasRedaction = new Set<number>();\n\n const cleanRecords = records.map((record, idx) => {\n const cleaned: TrainingExportRecord = { ...record };\n\n for (const field of SCANNED_FIELDS) {\n let value = record[field];\n if (!value) continue;\n\n for (const pattern of PII_PATTERNS) {\n // Reset lastIndex for global regex reuse\n pattern.regex.lastIndex = 0;\n if (pattern.regex.test(value)) {\n pattern.regex.lastIndex = 0;\n value = value.replace(pattern.regex, \"[REDACTED]\");\n recordHasRedaction.add(idx);\n redactionDetails.push({\n index: idx,\n field,\n pattern: pattern.name,\n });\n }\n }\n\n cleaned[field] = value;\n }\n\n return cleaned;\n });\n\n return {\n cleanRecords,\n redactedCount: recordHasRedaction.size,\n redactionDetails,\n };\n}\n"],"mappings":";;;AAQA;AAAA,EACE;AAAA,EACA;AAAA,OACK;;;ACGA,IAAM,uBAA8C;AAAA,EACzD,MAAM;AAAA,EACN,eAAe;AAAA,EAEf,cAAc,SAAyC;AACrD,UAAM,gBAAgB,QAAQ,IAAI,CAAC,OAAO;AAAA,MACxC,aAAa,EAAE;AAAA,MACf,OAAO,EAAE;AAAA,MACT,QAAQ,EAAE;AAAA,IACZ,EAAE;AACF,WAAO,KAAK,UAAU,eAAe,MAAM,CAAC;AAAA,EAC9C;AACF;;;ACLA,IAAM,oBAAoB;AAO1B,IAAM,qBAA+C;AAAA,EACnD,aAAa;AAAA,IACX;AAAA,IACA;AAAA,IACA;AAAA,EACF;AAAA,EACA,UAAU;AAAA,IACR;AAAA,IACA;AAAA,IACA;AAAA,EACF;AAAA,EACA,WAAW;AAAA,IACT;AAAA,IACA;AAAA,IACA;AAAA,EACF;AAAA,EACA,UAAU;AAAA,IACR;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;AAEA,IAAM,oBAAoB;AAAA,EACxB;AAAA,EACA;AACF;AAOA,IAAM,uBAA+C;AAAA,EACnD,YAAY;AAAA,EACZ,MAAM;AAAA,EACN,QAAQ;AAAA,EACR,OAAO;AAAA,EACP,YAAY;AAAA,EACZ,UAAU;AAAA,EACV,WAAW;AAAA,EACX,MAAM;AAAA,EACN,UAAU;AAAA,EACV,cAAc;AAAA,EACd,YAAY;AAAA,EACZ,QAAQ;AACV;AAMO,SAAS,wBACd,SACA,SACwB;AACxB,QAAM,WAAW,SAAS,qBAAqB;AAC/C,QAAM,QAAQ,SAAS;AACvB,QAAM,SAAiC,CAAC;AAExC,WAAS,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK;AACvC,UAAM,SAAS,QAAQ,CAAC;AACxB,UAAM,cAAc,mBAAmB,OAAO,QAAQ;AACtD,UAAM,QAAQ,aAAa,OAAO,WAAW;AAC7C,UAAM,YAAY,mBAAmB,WAAW,KAAK;AAErD,UAAM,YAAY,KAAK,IAAI,UAAU,UAAU,MAAM;AAErD,aAAS,IAAI,GAAG,IAAI,WAAW,KAAK;AAClC,YAAM,iBAAiB,IAAI,KAAK,UAAU;AAC1C,YAAM,WAAW,UAAU,aAAa,EAAE,QAAQ,WAAW,KAAK;AAClE,UAAI,SAAS,OAAO;AAEpB,UAAI,OAAO,eAAe;AACxB,iBAAS,OAAO,YAAY;AAAA,MAC9B;AAEA,aAAO,KAAK;AAAA,QACV,aAAa;AAAA,QACb,OAAO;AAAA,QACP;AAAA,QACA,UAAU,OAAO;AAAA,QACjB,YAAY,OAAO;AAAA,QACnB,WAAW,OAAO;AAAA,MACpB,CAAC;AAAA,IACH;AAAA,EACF;AAEA,SAAO;AACT;AAQA,SAAS,mBAAmB,UAAsC;AAChE,MAAI,CAAC,SAAU,QAAO;AACtB,SAAO,qBAAqB,SAAS,YAAY,CAAC,KAAK;AACzD;AAYA,SAAS,aAAa,aAA6B;AACjD,QAAM,WAAW,YAAY,MAAM,cAAc;AACjD,MAAI,UAAU;AACZ,WAAO,SAAS,CAAC,EAAE,KAAK,EAAE,YAAY;AAAA,EACxC;AACA,SAAO;AACT;;;AC7HA,IAAM,WACJ;AAGF,IAAM,iBAAiB;AAAA,EACrB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAGA,IAAM,iBAAiB;AAAA,EACrB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAGA,IAAM,uBAAuB;AAG7B,IAAM,qBAAqB;AAKpB,SAAS,oBAAoB,SAAiC;AACnE,MAAI,QAAQ,WAAW,GAAG;AACxB,WAAO;AAAA,MACL,mBAAmB;AAAA,MACnB,WAAW;AAAA,MACX,WAAW;AAAA,MACX,eAAe;AAAA,MACf,eAAe,CAAC;AAAA,IAClB;AAAA,EACF;AAEA,QAAM,SAAS,QAAQ,KAAK,GAAG;AAE/B,SAAO;AAAA,IACL,mBAAmB,sBAAsB,MAAM;AAAA,IAC/C,WAAW,YAAY,MAAM;AAAA,IAC7B,WAAW,gBAAgB,MAAM;AAAA,IACjC,eAAe,gBAAgB,MAAM;AAAA,IACrC,eAAe,kBAAkB,OAAO;AAAA,EAC1C;AACF;AAIA,SAAS,sBAAsB,MAAsB;AAEnD,QAAM,YAAY,KACf,MAAM,QAAQ,EACd,IAAI,CAAC,MAAM,EAAE,KAAK,CAAC,EACnB,OAAO,CAAC,MAAM,EAAE,SAAS,CAAC;AAE7B,MAAI,UAAU,WAAW,EAAG,QAAO;AAEnC,QAAM,aAAa,UAAU,OAAO,CAAC,KAAK,MAAM;AAC9C,UAAM,QAAQ,EAAE,MAAM,KAAK,EAAE,OAAO,CAAC,MAAM,EAAE,SAAS,CAAC;AACvD,WAAO,MAAM,MAAM;AAAA,EACrB,GAAG,CAAC;AAEJ,SAAO,KAAK,MAAO,aAAa,UAAU,SAAU,EAAE,IAAI;AAC5D;AAEA,SAAS,YAAY,MAAuB;AAC1C,SAAO,SAAS,KAAK,IAAI;AAC3B;AAEA,SAAS,gBAAgB,MAA6C;AACpE,QAAM,QAAQ,KAAK,YAAY;AAE/B,MAAI,cAAc;AAClB,aAAW,UAAU,gBAAgB;AAGnC,QAAI,IAAI,OAAO,MAAM,MAAM,OAAO,GAAG,EAAE,KAAK,KAAK,EAAG;AAAA,EACtD;AAEA,MAAI,cAAc;AAClB,aAAW,UAAU,gBAAgB;AACnC,QAAI,IAAI,OAAO,MAAM,MAAM,OAAO,GAAG,EAAE,KAAK,KAAK,EAAG;AAAA,EACtD;AAGA,QAAM,YAAY;AAElB,MAAI,eAAe,aAAa,cAAc,YAAa,QAAO;AAClE,MAAI,eAAe,aAAa,cAAc,YAAa,QAAO;AAClE,SAAO;AACT;AAEA,SAAS,gBAAgB,MAAuB;AAE9C,QAAM,YAAY,KACf,MAAM,QAAQ,EACd,IAAI,CAAC,MAAM,EAAE,KAAK,CAAC,EACnB,OAAO,CAAC,MAAM,EAAE,SAAS,CAAC;AAE7B,MAAI,UAAU,WAAW,EAAG,QAAO;AAEnC,QAAM,kBAAkB,UAAU,OAAO,CAAC,MAAM;AAC9C,UAAM,YAAY,EAAE,OAAO,CAAC;AAC5B,WAAO,cAAc,UAAU,YAAY,KAAK,cAAc,UAAU,YAAY;AAAA,EACtF,CAAC,EAAE;AAGH,SAAO,kBAAkB,UAAU,SAAS;AAC9C;AAMA,SAAS,QAAQ,IAAqB;AACpC,QAAM,IAAI,GAAG,WAAW,CAAC;AACzB,SACG,KAAK,MAAM,KAAK;AAAA,EAChB,KAAK,MAAM,KAAK;AAAA,EAChB,KAAK,MAAM,KAAK;AAErB;AAQA,SAAS,aAAa,MAAsB;AAC1C,MAAI,QAAQ;AACZ,MAAI,MAAM,KAAK;AACf,SAAO,QAAQ,OAAO,CAAC,QAAQ,KAAK,OAAO,KAAK,CAAC,EAAG;AACpD,SAAO,MAAM,SAAS,CAAC,QAAQ,KAAK,OAAO,MAAM,CAAC,CAAC,EAAG;AACtD,SAAO,UAAU,KAAK,QAAQ,KAAK,SAAS,OAAO,KAAK,MAAM,OAAO,GAAG;AAC1E;AAEA,SAAS,kBAAkB,SAA6B;AACtD,QAAM,cAAc,oBAAI,IAAoB;AAE5C,aAAW,UAAU,SAAS;AAI5B,UAAM,QAAQ,OACX,MAAM,KAAK,EACX,IAAI,CAAC,MAAM,aAAa,CAAC,CAAC,EAC1B,OAAO,CAAC,MAAM,EAAE,SAAS,CAAC;AAG7B,UAAM,eAAe,oBAAI,IAAY;AACrC,aAAS,YAAY,GAAG,aAAa,GAAG,aAAa;AACnD,eAAS,IAAI,GAAG,KAAK,MAAM,SAAS,WAAW,KAAK;AAClD,cAAM,SAAS,MAAM,MAAM,GAAG,IAAI,SAAS,EAAE,KAAK,GAAG,EAAE,YAAY;AAEnE,YAAI,CAAC,aAAa,IAAI,MAAM,GAAG;AAC7B,uBAAa,IAAI,MAAM;AACvB,sBAAY,IAAI,SAAS,YAAY,IAAI,MAAM,KAAK,KAAK,CAAC;AAAA,QAC5D;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAGA,SAAO,CAAC,GAAG,YAAY,QAAQ,CAAC,EAC7B,OAAO,CAAC,CAAC,EAAE,KAAK,MAAM,SAAS,oBAAoB,EACnD,KAAK,CAAC,GAAG,MAAM;AACd,QAAI,EAAE,CAAC,MAAM,EAAE,CAAC,EAAG,QAAO,EAAE,CAAC,IAAI,EAAE,CAAC;AACpC,WAAO,EAAE,CAAC,EAAE,cAAc,EAAE,CAAC,CAAC;AAAA,EAChC,CAAC,EACA,MAAM,GAAG,kBAAkB,EAC3B,IAAI,CAAC,CAAC,MAAM,MAAM,MAAM;AAC7B;;;AClNA,IAAM,eAA6B;AAAA,EACjC;AAAA;AAAA,IAEE,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AAAA,EACA;AAAA;AAAA,IAEE,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AAAA,EACA;AAAA;AAAA,IAEE,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AAAA,EACA;AAAA;AAAA,IAEE,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AAAA,EACA;AAAA;AAAA;AAAA,IAGE,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AACF;AAEA,IAAM,iBAA2F;AAAA,EAC/F;AAAA,EACA;AAAA,EACA;AACF;AAUO,SAAS,SAAS,SAAqD;AAC5E,QAAM,mBAA2D,CAAC;AAClE,QAAM,qBAAqB,oBAAI,IAAY;AAE3C,QAAM,eAAe,QAAQ,IAAI,CAAC,QAAQ,QAAQ;AAChD,UAAM,UAAgC,EAAE,GAAG,OAAO;AAElD,eAAW,SAAS,gBAAgB;AAClC,UAAI,QAAQ,OAAO,KAAK;AACxB,UAAI,CAAC,MAAO;AAEZ,iBAAW,WAAW,cAAc;AAElC,gBAAQ,MAAM,YAAY;AAC1B,YAAI,QAAQ,MAAM,KAAK,KAAK,GAAG;AAC7B,kBAAQ,MAAM,YAAY;AAC1B,kBAAQ,MAAM,QAAQ,QAAQ,OAAO,YAAY;AACjD,6BAAmB,IAAI,GAAG;AAC1B,2BAAiB,KAAK;AAAA,YACpB,OAAO;AAAA,YACP;AAAA,YACA,SAAS,QAAQ;AAAA,UACnB,CAAC;AAAA,QACH;AAAA,MACF;AAEA,cAAQ,KAAK,IAAI;AAAA,IACnB;AAEA,WAAO;AAAA,EACT,CAAC;AAED,SAAO;AAAA,IACL;AAAA,IACA,eAAe,mBAAmB;AAAA,IAClC;AAAA,EACF;AACF;;;AJ/EO,SAAS,uCAAgD;AAC9D,MAAI,yBAAyB,qBAAqB,IAAI,MAAM,QAAW;AACrE,WAAO;AAAA,EACT;AACA,gCAA8B,oBAAoB;AAClD,SAAO;AACT;AAUA,IAAI;AACF,uCAAqC;AACvC,QAAQ;AAER;","names":[]}
1
+ {"version":3,"sources":["../src/index.ts","../src/adapter.ts","../src/synthesizer.ts","../src/style-extractor.ts","../src/privacy.ts"],"sourcesContent":["/**\n * @remnic/export-weclone\n *\n * WeClone-specific training-data export adapter that converts\n * Remnic memories into Alpaca-format fine-tuning datasets\n * compatible with WeClone / LLaMA Factory.\n */\n\nimport {\n getTrainingExportAdapter,\n registerTrainingExportAdapter,\n type TrainingExportAdapter,\n} from \"@remnic/core\";\n\nimport { wecloneExportAdapter } from \"./adapter.js\";\n\nexport { wecloneExportAdapter } from \"./adapter.js\";\nexport { synthesizeTrainingPairs, type SynthesizerOptions } from \"./synthesizer.js\";\nexport { extractStyleMarkers, type StyleMarkers } from \"./style-extractor.js\";\nexport { sweepPii, type PrivacySweepResult } from \"./privacy.js\";\n\nexport interface TrainingExportRegistry {\n getTrainingExportAdapter(name: string): TrainingExportAdapter | undefined;\n registerTrainingExportAdapter(adapter: TrainingExportAdapter): void;\n}\n\n/**\n * Idempotently register the WeClone adapter with the core training-export\n * registry. Callable multiple times without throwing (CLAUDE.md #13:\n * secondary calls must not crash host processes that pre-register the\n * adapter for test fixtures).\n *\n * Returns true when the adapter was newly registered, false when an adapter\n * with the same name already exists.\n */\nexport function ensureWecloneExportAdapterRegistered(\n registry: TrainingExportRegistry = {\n getTrainingExportAdapter,\n registerTrainingExportAdapter,\n },\n): boolean {\n if (registry.getTrainingExportAdapter(wecloneExportAdapter.name) !== undefined) {\n return false;\n }\n registry.registerTrainingExportAdapter(wecloneExportAdapter);\n return true;\n}\n\n// Side-effect registration: importing this module registers the adapter.\n// Callers that need to manage registration manually (e.g. tests that call\n// `clearTrainingExportAdapters()`) can re-invoke\n// `ensureWecloneExportAdapterRegistered()` after clearing.\n//\n// The try/catch keeps import-time errors from breaking unrelated callers —\n// the adapter surfaces `formatRecords` purely, so a failure here would be\n// surprising, but defensive coding keeps CLI startup resilient.\ntry {\n ensureWecloneExportAdapterRegistered();\n} catch {\n // Swallow — explicit callers can re-invoke ensureWecloneExportAdapterRegistered().\n}\n","/**\n * WeClone Alpaca-format training export adapter.\n *\n * Converts TrainingExportRecord[] into the JSON format that\n * WeClone / LLaMA Factory expects for fine-tuning:\n *\n * [{ \"instruction\": \"...\", \"input\": \"\", \"output\": \"...\" }, ...]\n *\n * Only the three Alpaca fields are emitted; Remnic-specific\n * metadata (category, confidence, sourceIds) is stripped.\n */\n\nimport type { TrainingExportAdapter, TrainingExportRecord } from \"@remnic/core\";\n\nexport const wecloneExportAdapter: TrainingExportAdapter = {\n name: \"weclone\",\n fileExtension: \".json\",\n\n formatRecords(records: TrainingExportRecord[]): string {\n const alpacaRecords = records.map((r) => ({\n instruction: r.instruction,\n input: r.input,\n output: r.output,\n }));\n return JSON.stringify(alpacaRecords, null, 2);\n },\n};\n","/**\n * Training-pair synthesizer.\n *\n * Converts Remnic's flat TrainingExportRecord[] — where\n * `instruction` is a natural-language description and\n * `category` identifies the memory type — into natural\n * conversational question-answer pairs suitable for\n * WeClone / LLaMA Factory fine-tuning.\n *\n * Uses template-based question generation (no LLM calls).\n */\n\nimport type { TrainingExportRecord } from \"@remnic/core\";\nimport type { StyleMarkers } from \"./style-extractor.js\";\n\nexport interface SynthesizerOptions {\n styleMarkers?: StyleMarkers;\n maxPairsPerRecord?: number;\n}\n\n/** Default limit for pairs generated per input record. */\nconst DEFAULT_MAX_PAIRS = 1;\n\n/**\n * Question templates keyed by template group.\n * Each array provides variety; the synthesizer picks\n * based on record index for deterministic output.\n */\nconst QUESTION_TEMPLATES: Record<string, string[]> = {\n preferences: [\n \"What kind of {topic} do you like?\",\n \"What's your preference for {topic}?\",\n \"What are your favorite {topic}?\",\n ],\n opinions: [\n \"What do you think about {topic}?\",\n \"How do you feel about {topic}?\",\n \"What's your opinion on {topic}?\",\n ],\n expertise: [\n \"Tell me about {topic}.\",\n \"What do you know about {topic}?\",\n \"Can you explain {topic}?\",\n ],\n personal: [\n \"Can you tell me about your {topic}?\",\n \"Tell me about your {topic}.\",\n \"What can you share about your {topic}?\",\n ],\n};\n\nconst DEFAULT_TEMPLATES = [\n \"Tell me about {topic}.\",\n \"What can you share about {topic}?\",\n];\n\n/**\n * Maps record.category values (from core converter) to\n * QUESTION_TEMPLATES keys. Categories not listed here\n * fall through to DEFAULT_TEMPLATES.\n */\nconst CATEGORY_TO_TEMPLATE: Record<string, string> = {\n preference: \"preferences\",\n fact: \"expertise\",\n entity: \"expertise\",\n skill: \"expertise\",\n correction: \"opinions\",\n decision: \"opinions\",\n principle: \"opinions\",\n rule: \"opinions\",\n personal: \"personal\",\n relationship: \"personal\",\n commitment: \"personal\",\n moment: \"personal\",\n};\n\n/**\n * Synthesize natural conversational training pairs from\n * category-tagged memory records.\n */\nexport function synthesizeTrainingPairs(\n records: TrainingExportRecord[],\n options?: SynthesizerOptions,\n): TrainingExportRecord[] {\n const maxPairs = resolveMaxPairsPerRecord(options?.maxPairsPerRecord);\n const style = options?.styleMarkers;\n const result: TrainingExportRecord[] = [];\n\n for (let i = 0; i < records.length; i++) {\n const record = records[i];\n const templateKey = resolveTemplateKey(record.category);\n const topic = extractTopic(record.instruction);\n const templates = QUESTION_TEMPLATES[templateKey] ?? DEFAULT_TEMPLATES;\n\n const pairCount = Math.min(maxPairs, templates.length);\n\n for (let j = 0; j < pairCount; j++) {\n const templateIndex = (i + j) % templates.length;\n const question = templates[templateIndex].replace(\"{topic}\", topic);\n let output = record.output;\n\n if (style?.usesLowercase) {\n output = output.toLowerCase();\n }\n\n result.push({\n instruction: question,\n input: \"\",\n output,\n category: record.category,\n confidence: record.confidence,\n sourceIds: record.sourceIds,\n });\n }\n }\n\n return result;\n}\n\n// ── Internals ────────────────────────────────────────────\n\nfunction resolveMaxPairsPerRecord(maxPairsPerRecord: number | undefined): number {\n const maxPairs = maxPairsPerRecord ?? DEFAULT_MAX_PAIRS;\n if (\n !Number.isFinite(maxPairs) ||\n !Number.isInteger(maxPairs) ||\n maxPairs <= 0\n ) {\n throw new RangeError(\"maxPairsPerRecord must be a finite positive integer\");\n }\n return maxPairs;\n}\n\n/**\n * Resolve a record's category field to a QUESTION_TEMPLATES key.\n * Falls back to empty string (which triggers DEFAULT_TEMPLATES).\n */\nfunction resolveTemplateKey(category: string | undefined): string {\n if (!category) return \"\";\n return CATEGORY_TO_TEMPLATE[category.toLowerCase()] ?? \"\";\n}\n\n/**\n * Extract a human-readable topic from the instruction string.\n *\n * The core converter produces instructions like:\n * \"Recall a factual memory (food, cooking)\"\n * \"Recall a user preference\"\n *\n * When parenthesized tags are present, use them as the topic.\n * Otherwise fall back to \"this\".\n */\nfunction extractTopic(instruction: string): string {\n const tagMatch = instruction.match(/\\(([^()]+)\\)/);\n if (tagMatch) {\n return tagMatch[1].trim().toLowerCase();\n }\n return \"this\";\n}\n","/**\n * Communication style marker extraction.\n *\n * Analyzes text samples using simple heuristics to produce\n * a StyleMarkers profile. No LLM calls — pure regex and\n * counting.\n */\n\nexport interface StyleMarkers {\n avgSentenceLength: number;\n usesEmoji: boolean;\n formality: \"formal\" | \"casual\" | \"mixed\";\n usesLowercase: boolean;\n commonPhrases: string[];\n}\n\n/**\n * Regex matching most common emoji code-point ranges.\n * Covers Emoticons, Dingbats, Transport/Map symbols,\n * Misc symbols, and supplemental blocks.\n */\nconst EMOJI_RE =\n /[\\u{1F600}-\\u{1F64F}\\u{1F300}-\\u{1F5FF}\\u{1F680}-\\u{1F6FF}\\u{1F1E0}-\\u{1F1FF}\\u{2600}-\\u{27BF}\\u{2702}-\\u{27B0}\\u{FE00}-\\u{FE0F}\\u{1FA00}-\\u{1FA6F}\\u{1FA70}-\\u{1FAFF}\\u{2328}\\u{23CF}\\u{23E9}-\\u{23F3}\\u{23F8}-\\u{23FA}\\u{200D}\\u{20E3}\\u{FE0F}\\u{E0020}-\\u{E007F}\\u{2B50}\\u{2B55}\\u{2934}\\u{2935}\\u{25AA}-\\u{25FE}\\u{2600}-\\u{26FF}\\u{2700}-\\u{27BF}\\u{231A}\\u{231B}\\u{23E9}-\\u{23F3}\\u{23F8}-\\u{23FA}\\u{25FB}-\\u{25FE}\\u{2614}\\u{2615}\\u{2648}-\\u{2653}\\u{267F}\\u{2693}\\u{26A1}\\u{26AA}\\u{26AB}\\u{26BD}\\u{26BE}\\u{26C4}\\u{26C5}\\u{26CE}\\u{26D4}\\u{26EA}\\u{26F2}\\u{26F3}\\u{26F5}\\u{26FA}\\u{26FD}\\u{2702}\\u{2705}\\u{2708}-\\u{270D}\\u{270F}\\u{2712}\\u{2714}\\u{2716}\\u{271D}\\u{2721}\\u{2728}\\u{2733}\\u{2734}\\u{2744}\\u{2747}\\u{274C}\\u{274E}\\u{2753}-\\u{2755}\\u{2757}\\u{2763}\\u{2764}\\u{2795}-\\u{2797}\\u{27A1}\\u{27B0}\\u{27BF}\\u{2934}\\u{2935}]/u;\n\n/** Words/phrases that signal formal register. */\nconst FORMAL_MARKERS = [\n \"furthermore\",\n \"however\",\n \"therefore\",\n \"moreover\",\n \"consequently\",\n \"nevertheless\",\n \"in addition\",\n \"accordingly\",\n \"subsequently\",\n \"regarding\",\n \"pertaining\",\n \"shall\",\n \"hereby\",\n \"whereas\",\n \"notwithstanding\",\n \"henceforth\",\n \"aforementioned\",\n \"please consider\",\n \"would like to\",\n \"i would\",\n \"appreciation\",\n \"recommendations\",\n \"thoroughly\",\n \"documentation\",\n];\n\n/** Words/phrases that signal casual register. */\nconst CASUAL_MARKERS = [\n \"gonna\",\n \"wanna\",\n \"kinda\",\n \"sorta\",\n \"gotta\",\n \"dunno\",\n \"lemme\",\n \"yeah\",\n \"yep\",\n \"nah\",\n \"lol\",\n \"omg\",\n \"tbh\",\n \"imo\",\n \"btw\",\n \"nope\",\n \"cuz\",\n \"tho\",\n \"ain't\",\n \"y'all\",\n \"awesome\",\n \"cool\",\n \"dude\",\n \"bro\",\n \"bruh\",\n];\n\n/** Minimum occurrences for a phrase to count as \"common\". */\nconst MIN_PHRASE_FREQUENCY = 2;\n\n/** Maximum number of common phrases to return. */\nconst MAX_COMMON_PHRASES = 10;\n\n/**\n * Analyse text samples and extract communication style markers.\n */\nexport function extractStyleMarkers(samples: string[]): StyleMarkers {\n if (samples.length === 0) {\n return {\n avgSentenceLength: 0,\n usesEmoji: false,\n formality: \"mixed\",\n usesLowercase: false,\n commonPhrases: [],\n };\n }\n\n const joined = samples.join(\" \");\n\n return {\n avgSentenceLength: calcAvgSentenceLength(joined),\n usesEmoji: detectEmoji(joined),\n formality: detectFormality(joined),\n usesLowercase: detectLowercase(joined),\n commonPhrases: findCommonPhrases(samples),\n };\n}\n\n// ── Internals ────────────────────────────────────────────\n\nfunction calcAvgSentenceLength(text: string): number {\n // Split on sentence-ending punctuation, filter empties\n const sentences = text\n .split(/[.!?]+/)\n .map((s) => s.trim())\n .filter((s) => s.length > 0);\n\n if (sentences.length === 0) return 0;\n\n const totalWords = sentences.reduce((sum, s) => {\n const words = s.split(/\\s+/).filter((w) => w.length > 0);\n return sum + words.length;\n }, 0);\n\n return Math.round((totalWords / sentences.length) * 10) / 10;\n}\n\nfunction detectEmoji(text: string): boolean {\n return EMOJI_RE.test(text);\n}\n\nfunction detectFormality(text: string): \"formal\" | \"casual\" | \"mixed\" {\n const lower = text.toLowerCase();\n\n let formalScore = 0;\n for (const marker of FORMAL_MARKERS) {\n // Word-boundary matching prevents false positives\n // (e.g., \"tho\" matching inside \"those\" or \"method\")\n if (new RegExp(`\\\\b${marker}\\\\b`, \"i\").test(lower)) formalScore++;\n }\n\n let casualScore = 0;\n for (const marker of CASUAL_MARKERS) {\n if (new RegExp(`\\\\b${marker}\\\\b`, \"i\").test(lower)) casualScore++;\n }\n\n // Threshold: need at least 2 markers to declare a style\n const THRESHOLD = 2;\n\n if (formalScore >= THRESHOLD && formalScore > casualScore) return \"formal\";\n if (casualScore >= THRESHOLD && casualScore > formalScore) return \"casual\";\n return \"mixed\";\n}\n\nfunction detectLowercase(text: string): boolean {\n // Split into sentences and check what fraction start with lowercase\n const sentences = text\n .split(/[.!?]+/)\n .map((s) => s.trim())\n .filter((s) => s.length > 0);\n\n if (sentences.length === 0) return false;\n\n const lowercaseStarts = sentences.filter((s) => {\n const firstChar = s.charAt(0);\n return firstChar === firstChar.toLowerCase() && firstChar !== firstChar.toUpperCase();\n }).length;\n\n // Majority (>50%) of sentences start lowercase\n return lowercaseStarts / sentences.length > 0.5;\n}\n\n/**\n * Check whether a character is alphanumeric (ASCII a-z, A-Z, 0-9) using\n * code-point comparison. Pure function — no regex, no backtracking.\n */\nfunction isAlnum(ch: string): boolean {\n const c = ch.charCodeAt(0);\n return (\n (c >= 48 && c <= 57) || // 0-9\n (c >= 65 && c <= 90) || // A-Z\n (c >= 97 && c <= 122) // a-z\n );\n}\n\n/**\n * Strip leading and trailing non-alphanumeric characters from `word` using\n * a single linear scan on each side. This replaces the previous\n * `/^[^a-zA-Z0-9]+/` / `/[^a-zA-Z0-9]+$/` regexes, which CodeQL flagged as\n * polynomial ReDoS on uncontrolled input (e.g. long `///...///` runs).\n */\nfunction trimNonAlnum(word: string): string {\n let start = 0;\n let end = word.length;\n while (start < end && !isAlnum(word.charAt(start))) start++;\n while (end > start && !isAlnum(word.charAt(end - 1))) end--;\n return start === 0 && end === word.length ? word : word.slice(start, end);\n}\n\nfunction findCommonPhrases(samples: string[]): string[] {\n const phraseCount = new Map<string, number>();\n\n for (const sample of samples) {\n // Tokenize: split on whitespace, strip edge punctuation with a linear\n // scan (no regex) to eliminate the polynomial backtracking that the\n // previous `replace(/^[^a-zA-Z0-9]+/, \"\")` chain exposed.\n const words = sample\n .split(/\\s+/)\n .map((w) => trimNonAlnum(w))\n .filter((w) => w.length > 0);\n\n // Build 2-gram and 3-gram phrases\n const seenInSample = new Set<string>();\n for (let ngramSize = 2; ngramSize <= 3; ngramSize++) {\n for (let i = 0; i <= words.length - ngramSize; i++) {\n const phrase = words.slice(i, i + ngramSize).join(\" \").toLowerCase();\n // Only count once per sample to avoid inflating from repetition within one sample\n if (!seenInSample.has(phrase)) {\n seenInSample.add(phrase);\n phraseCount.set(phrase, (phraseCount.get(phrase) ?? 0) + 1);\n }\n }\n }\n }\n\n // Filter by minimum frequency and sort by count descending, then alphabetical for stability\n return [...phraseCount.entries()]\n .filter(([, count]) => count >= MIN_PHRASE_FREQUENCY)\n .sort((a, b) => {\n if (b[1] !== a[1]) return b[1] - a[1];\n return a[0].localeCompare(b[0]);\n })\n .slice(0, MAX_COMMON_PHRASES)\n .map(([phrase]) => phrase);\n}\n","/**\n * PII privacy sweep for training export records.\n *\n * Belt-and-suspenders check that runs after Remnic's own\n * privacy controls. Scans instruction, input, and output\n * fields for common PII patterns and replaces matches with\n * [REDACTED].\n */\n\nimport type { TrainingExportRecord } from \"@remnic/core\";\n\nexport interface PrivacySweepResult {\n cleanRecords: TrainingExportRecord[];\n redactedCount: number;\n redactionDetails: { index: number; field: string; pattern: string }[];\n}\n\ninterface PiiPattern {\n name: string;\n regex: RegExp;\n validate?: (match: string) => boolean;\n}\n\nfunction normalizeCardCandidate(value: string): string {\n return value.replace(/[-\\s]/g, \"\");\n}\n\nfunction passesLuhn(value: string): boolean {\n let sum = 0;\n let doubleDigit = false;\n\n for (let i = value.length - 1; i >= 0; i -= 1) {\n let digit = Number(value[i]);\n if (!Number.isInteger(digit)) return false;\n if (doubleDigit) {\n digit *= 2;\n if (digit > 9) digit -= 9;\n }\n sum += digit;\n doubleDigit = !doubleDigit;\n }\n\n return sum > 0 && sum % 10 === 0;\n}\n\nfunction isCreditCardCandidate(match: string): boolean {\n const digits = normalizeCardCandidate(match);\n return digits.length >= 13 && digits.length <= 19 && passesLuhn(digits);\n}\n\nconst CREDIT_CARD_PATTERNS: PiiPattern[] = [19, 18, 17, 16, 15, 14, 13].map(\n (digitCount) => ({\n name: \"credit_card\",\n regex: new RegExp(`\\\\b\\\\d(?:[-\\\\s]?\\\\d){${digitCount - 1}}\\\\b`, \"g\"),\n validate: isCreditCardCandidate,\n }),\n);\n\n/**\n * Ordered list of PII patterns.\n *\n * Order matters: more specific patterns (SSN, credit card)\n * come before broader ones (phone) to avoid partial matches.\n */\nconst PII_PATTERNS: PiiPattern[] = [\n {\n // Email: user@domain.tld\n name: \"email\",\n regex: /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}/g,\n },\n {\n // SSN: 123-45-6789 (exactly 3-2-4 digit groups)\n name: \"ssn\",\n regex: /\\b\\d{3}-\\d{2}-\\d{4}\\b/g,\n },\n // Credit card: 13-19 digits, optionally separated by dashes or spaces.\n // Try longest candidates first so valid 19-digit cards are preserved while\n // shorter cards next to numeric metadata can still be reconsidered.\n ...CREDIT_CARD_PATTERNS,\n {\n // IP address: four octets 0-255\n name: \"ip_address\",\n regex: /\\b(?:(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\b/g,\n },\n {\n // Phone: optional +1- prefix, then 3-3-4 with dashes, dots, or spaces\n // Also matches (555) 123-4567 format\n name: \"phone\",\n regex: /(?:\\+\\d{1,3}[-.\\s]?)?\\(?\\d{3}\\)?[-.\\s]\\d{3}[-.\\s]\\d{4}\\b/g,\n },\n];\n\nconst SCANNED_FIELDS: (keyof Pick<TrainingExportRecord, \"instruction\" | \"input\" | \"output\">)[] = [\n \"instruction\",\n \"input\",\n \"output\",\n];\n\n/**\n * Scan and redact PII from training export records.\n *\n * Returns a new array of cleaned records, leaving the originals\n * unmodified. The `redactedCount` is the number of records that\n * had at least one redaction. `redactionDetails` lists every\n * individual match with its record index, field, and pattern name.\n */\nexport function sweepPii(records: TrainingExportRecord[]): PrivacySweepResult {\n const redactionDetails: PrivacySweepResult[\"redactionDetails\"] = [];\n const recordHasRedaction = new Set<number>();\n\n const cleanRecords = records.map((record, idx) => {\n const cleaned: TrainingExportRecord = { ...record };\n\n for (const field of SCANNED_FIELDS) {\n let value = record[field];\n if (!value) continue;\n\n for (const pattern of PII_PATTERNS) {\n // Reset lastIndex for global regex reuse\n pattern.regex.lastIndex = 0;\n value = value.replace(pattern.regex, (match) => {\n if (pattern.validate && !pattern.validate(match)) return match;\n recordHasRedaction.add(idx);\n redactionDetails.push({\n index: idx,\n field,\n pattern: pattern.name,\n });\n return \"[REDACTED]\";\n });\n }\n\n cleaned[field] = value;\n }\n\n return cleaned;\n });\n\n return {\n cleanRecords,\n redactedCount: recordHasRedaction.size,\n redactionDetails,\n };\n}\n"],"mappings":";;;AAQA;AAAA,EACE;AAAA,EACA;AAAA,OAEK;;;ACEA,IAAM,uBAA8C;AAAA,EACzD,MAAM;AAAA,EACN,eAAe;AAAA,EAEf,cAAc,SAAyC;AACrD,UAAM,gBAAgB,QAAQ,IAAI,CAAC,OAAO;AAAA,MACxC,aAAa,EAAE;AAAA,MACf,OAAO,EAAE;AAAA,MACT,QAAQ,EAAE;AAAA,IACZ,EAAE;AACF,WAAO,KAAK,UAAU,eAAe,MAAM,CAAC;AAAA,EAC9C;AACF;;;ACLA,IAAM,oBAAoB;AAO1B,IAAM,qBAA+C;AAAA,EACnD,aAAa;AAAA,IACX;AAAA,IACA;AAAA,IACA;AAAA,EACF;AAAA,EACA,UAAU;AAAA,IACR;AAAA,IACA;AAAA,IACA;AAAA,EACF;AAAA,EACA,WAAW;AAAA,IACT;AAAA,IACA;AAAA,IACA;AAAA,EACF;AAAA,EACA,UAAU;AAAA,IACR;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;AAEA,IAAM,oBAAoB;AAAA,EACxB;AAAA,EACA;AACF;AAOA,IAAM,uBAA+C;AAAA,EACnD,YAAY;AAAA,EACZ,MAAM;AAAA,EACN,QAAQ;AAAA,EACR,OAAO;AAAA,EACP,YAAY;AAAA,EACZ,UAAU;AAAA,EACV,WAAW;AAAA,EACX,MAAM;AAAA,EACN,UAAU;AAAA,EACV,cAAc;AAAA,EACd,YAAY;AAAA,EACZ,QAAQ;AACV;AAMO,SAAS,wBACd,SACA,SACwB;AACxB,QAAM,WAAW,yBAAyB,SAAS,iBAAiB;AACpE,QAAM,QAAQ,SAAS;AACvB,QAAM,SAAiC,CAAC;AAExC,WAAS,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK;AACvC,UAAM,SAAS,QAAQ,CAAC;AACxB,UAAM,cAAc,mBAAmB,OAAO,QAAQ;AACtD,UAAM,QAAQ,aAAa,OAAO,WAAW;AAC7C,UAAM,YAAY,mBAAmB,WAAW,KAAK;AAErD,UAAM,YAAY,KAAK,IAAI,UAAU,UAAU,MAAM;AAErD,aAAS,IAAI,GAAG,IAAI,WAAW,KAAK;AAClC,YAAM,iBAAiB,IAAI,KAAK,UAAU;AAC1C,YAAM,WAAW,UAAU,aAAa,EAAE,QAAQ,WAAW,KAAK;AAClE,UAAI,SAAS,OAAO;AAEpB,UAAI,OAAO,eAAe;AACxB,iBAAS,OAAO,YAAY;AAAA,MAC9B;AAEA,aAAO,KAAK;AAAA,QACV,aAAa;AAAA,QACb,OAAO;AAAA,QACP;AAAA,QACA,UAAU,OAAO;AAAA,QACjB,YAAY,OAAO;AAAA,QACnB,WAAW,OAAO;AAAA,MACpB,CAAC;AAAA,IACH;AAAA,EACF;AAEA,SAAO;AACT;AAIA,SAAS,yBAAyB,mBAA+C;AAC/E,QAAM,WAAW,qBAAqB;AACtC,MACE,CAAC,OAAO,SAAS,QAAQ,KACzB,CAAC,OAAO,UAAU,QAAQ,KAC1B,YAAY,GACZ;AACA,UAAM,IAAI,WAAW,qDAAqD;AAAA,EAC5E;AACA,SAAO;AACT;AAMA,SAAS,mBAAmB,UAAsC;AAChE,MAAI,CAAC,SAAU,QAAO;AACtB,SAAO,qBAAqB,SAAS,YAAY,CAAC,KAAK;AACzD;AAYA,SAAS,aAAa,aAA6B;AACjD,QAAM,WAAW,YAAY,MAAM,cAAc;AACjD,MAAI,UAAU;AACZ,WAAO,SAAS,CAAC,EAAE,KAAK,EAAE,YAAY;AAAA,EACxC;AACA,SAAO;AACT;;;ACzIA,IAAM,WACJ;AAGF,IAAM,iBAAiB;AAAA,EACrB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAGA,IAAM,iBAAiB;AAAA,EACrB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAGA,IAAM,uBAAuB;AAG7B,IAAM,qBAAqB;AAKpB,SAAS,oBAAoB,SAAiC;AACnE,MAAI,QAAQ,WAAW,GAAG;AACxB,WAAO;AAAA,MACL,mBAAmB;AAAA,MACnB,WAAW;AAAA,MACX,WAAW;AAAA,MACX,eAAe;AAAA,MACf,eAAe,CAAC;AAAA,IAClB;AAAA,EACF;AAEA,QAAM,SAAS,QAAQ,KAAK,GAAG;AAE/B,SAAO;AAAA,IACL,mBAAmB,sBAAsB,MAAM;AAAA,IAC/C,WAAW,YAAY,MAAM;AAAA,IAC7B,WAAW,gBAAgB,MAAM;AAAA,IACjC,eAAe,gBAAgB,MAAM;AAAA,IACrC,eAAe,kBAAkB,OAAO;AAAA,EAC1C;AACF;AAIA,SAAS,sBAAsB,MAAsB;AAEnD,QAAM,YAAY,KACf,MAAM,QAAQ,EACd,IAAI,CAAC,MAAM,EAAE,KAAK,CAAC,EACnB,OAAO,CAAC,MAAM,EAAE,SAAS,CAAC;AAE7B,MAAI,UAAU,WAAW,EAAG,QAAO;AAEnC,QAAM,aAAa,UAAU,OAAO,CAAC,KAAK,MAAM;AAC9C,UAAM,QAAQ,EAAE,MAAM,KAAK,EAAE,OAAO,CAAC,MAAM,EAAE,SAAS,CAAC;AACvD,WAAO,MAAM,MAAM;AAAA,EACrB,GAAG,CAAC;AAEJ,SAAO,KAAK,MAAO,aAAa,UAAU,SAAU,EAAE,IAAI;AAC5D;AAEA,SAAS,YAAY,MAAuB;AAC1C,SAAO,SAAS,KAAK,IAAI;AAC3B;AAEA,SAAS,gBAAgB,MAA6C;AACpE,QAAM,QAAQ,KAAK,YAAY;AAE/B,MAAI,cAAc;AAClB,aAAW,UAAU,gBAAgB;AAGnC,QAAI,IAAI,OAAO,MAAM,MAAM,OAAO,GAAG,EAAE,KAAK,KAAK,EAAG;AAAA,EACtD;AAEA,MAAI,cAAc;AAClB,aAAW,UAAU,gBAAgB;AACnC,QAAI,IAAI,OAAO,MAAM,MAAM,OAAO,GAAG,EAAE,KAAK,KAAK,EAAG;AAAA,EACtD;AAGA,QAAM,YAAY;AAElB,MAAI,eAAe,aAAa,cAAc,YAAa,QAAO;AAClE,MAAI,eAAe,aAAa,cAAc,YAAa,QAAO;AAClE,SAAO;AACT;AAEA,SAAS,gBAAgB,MAAuB;AAE9C,QAAM,YAAY,KACf,MAAM,QAAQ,EACd,IAAI,CAAC,MAAM,EAAE,KAAK,CAAC,EACnB,OAAO,CAAC,MAAM,EAAE,SAAS,CAAC;AAE7B,MAAI,UAAU,WAAW,EAAG,QAAO;AAEnC,QAAM,kBAAkB,UAAU,OAAO,CAAC,MAAM;AAC9C,UAAM,YAAY,EAAE,OAAO,CAAC;AAC5B,WAAO,cAAc,UAAU,YAAY,KAAK,cAAc,UAAU,YAAY;AAAA,EACtF,CAAC,EAAE;AAGH,SAAO,kBAAkB,UAAU,SAAS;AAC9C;AAMA,SAAS,QAAQ,IAAqB;AACpC,QAAM,IAAI,GAAG,WAAW,CAAC;AACzB,SACG,KAAK,MAAM,KAAK;AAAA,EAChB,KAAK,MAAM,KAAK;AAAA,EAChB,KAAK,MAAM,KAAK;AAErB;AAQA,SAAS,aAAa,MAAsB;AAC1C,MAAI,QAAQ;AACZ,MAAI,MAAM,KAAK;AACf,SAAO,QAAQ,OAAO,CAAC,QAAQ,KAAK,OAAO,KAAK,CAAC,EAAG;AACpD,SAAO,MAAM,SAAS,CAAC,QAAQ,KAAK,OAAO,MAAM,CAAC,CAAC,EAAG;AACtD,SAAO,UAAU,KAAK,QAAQ,KAAK,SAAS,OAAO,KAAK,MAAM,OAAO,GAAG;AAC1E;AAEA,SAAS,kBAAkB,SAA6B;AACtD,QAAM,cAAc,oBAAI,IAAoB;AAE5C,aAAW,UAAU,SAAS;AAI5B,UAAM,QAAQ,OACX,MAAM,KAAK,EACX,IAAI,CAAC,MAAM,aAAa,CAAC,CAAC,EAC1B,OAAO,CAAC,MAAM,EAAE,SAAS,CAAC;AAG7B,UAAM,eAAe,oBAAI,IAAY;AACrC,aAAS,YAAY,GAAG,aAAa,GAAG,aAAa;AACnD,eAAS,IAAI,GAAG,KAAK,MAAM,SAAS,WAAW,KAAK;AAClD,cAAM,SAAS,MAAM,MAAM,GAAG,IAAI,SAAS,EAAE,KAAK,GAAG,EAAE,YAAY;AAEnE,YAAI,CAAC,aAAa,IAAI,MAAM,GAAG;AAC7B,uBAAa,IAAI,MAAM;AACvB,sBAAY,IAAI,SAAS,YAAY,IAAI,MAAM,KAAK,KAAK,CAAC;AAAA,QAC5D;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAGA,SAAO,CAAC,GAAG,YAAY,QAAQ,CAAC,EAC7B,OAAO,CAAC,CAAC,EAAE,KAAK,MAAM,SAAS,oBAAoB,EACnD,KAAK,CAAC,GAAG,MAAM;AACd,QAAI,EAAE,CAAC,MAAM,EAAE,CAAC,EAAG,QAAO,EAAE,CAAC,IAAI,EAAE,CAAC;AACpC,WAAO,EAAE,CAAC,EAAE,cAAc,EAAE,CAAC,CAAC;AAAA,EAChC,CAAC,EACA,MAAM,GAAG,kBAAkB,EAC3B,IAAI,CAAC,CAAC,MAAM,MAAM,MAAM;AAC7B;;;ACvNA,SAAS,uBAAuB,OAAuB;AACrD,SAAO,MAAM,QAAQ,UAAU,EAAE;AACnC;AAEA,SAAS,WAAW,OAAwB;AAC1C,MAAI,MAAM;AACV,MAAI,cAAc;AAElB,WAAS,IAAI,MAAM,SAAS,GAAG,KAAK,GAAG,KAAK,GAAG;AAC7C,QAAI,QAAQ,OAAO,MAAM,CAAC,CAAC;AAC3B,QAAI,CAAC,OAAO,UAAU,KAAK,EAAG,QAAO;AACrC,QAAI,aAAa;AACf,eAAS;AACT,UAAI,QAAQ,EAAG,UAAS;AAAA,IAC1B;AACA,WAAO;AACP,kBAAc,CAAC;AAAA,EACjB;AAEA,SAAO,MAAM,KAAK,MAAM,OAAO;AACjC;AAEA,SAAS,sBAAsB,OAAwB;AACrD,QAAM,SAAS,uBAAuB,KAAK;AAC3C,SAAO,OAAO,UAAU,MAAM,OAAO,UAAU,MAAM,WAAW,MAAM;AACxE;AAEA,IAAM,uBAAqC,CAAC,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,EAAE,EAAE;AAAA,EACtE,CAAC,gBAAgB;AAAA,IACf,MAAM;AAAA,IACN,OAAO,IAAI,OAAO,wBAAwB,aAAa,CAAC,QAAQ,GAAG;AAAA,IACnE,UAAU;AAAA,EACZ;AACF;AAQA,IAAM,eAA6B;AAAA,EACjC;AAAA;AAAA,IAEE,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AAAA,EACA;AAAA;AAAA,IAEE,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AAAA;AAAA;AAAA;AAAA,EAIA,GAAG;AAAA,EACH;AAAA;AAAA,IAEE,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AAAA,EACA;AAAA;AAAA;AAAA,IAGE,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AACF;AAEA,IAAM,iBAA2F;AAAA,EAC/F;AAAA,EACA;AAAA,EACA;AACF;AAUO,SAAS,SAAS,SAAqD;AAC5E,QAAM,mBAA2D,CAAC;AAClE,QAAM,qBAAqB,oBAAI,IAAY;AAE3C,QAAM,eAAe,QAAQ,IAAI,CAAC,QAAQ,QAAQ;AAChD,UAAM,UAAgC,EAAE,GAAG,OAAO;AAElD,eAAW,SAAS,gBAAgB;AAClC,UAAI,QAAQ,OAAO,KAAK;AACxB,UAAI,CAAC,MAAO;AAEZ,iBAAW,WAAW,cAAc;AAElC,gBAAQ,MAAM,YAAY;AAC1B,gBAAQ,MAAM,QAAQ,QAAQ,OAAO,CAAC,UAAU;AAC9C,cAAI,QAAQ,YAAY,CAAC,QAAQ,SAAS,KAAK,EAAG,QAAO;AACzD,6BAAmB,IAAI,GAAG;AAC1B,2BAAiB,KAAK;AAAA,YACpB,OAAO;AAAA,YACP;AAAA,YACA,SAAS,QAAQ;AAAA,UACnB,CAAC;AACD,iBAAO;AAAA,QACT,CAAC;AAAA,MACH;AAEA,cAAQ,KAAK,IAAI;AAAA,IACnB;AAEA,WAAO;AAAA,EACT,CAAC;AAED,SAAO;AAAA,IACL;AAAA,IACA,eAAe,mBAAmB;AAAA,IAClC;AAAA,EACF;AACF;;;AJ5GO,SAAS,qCACd,WAAmC;AAAA,EACjC;AAAA,EACA;AACF,GACS;AACT,MAAI,SAAS,yBAAyB,qBAAqB,IAAI,MAAM,QAAW;AAC9E,WAAO;AAAA,EACT;AACA,WAAS,8BAA8B,oBAAoB;AAC3D,SAAO;AACT;AAUA,IAAI;AACF,uCAAqC;AACvC,QAAQ;AAER;","names":[]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@remnic/export-weclone",
3
- "version": "1.0.1",
3
+ "version": "9.3.515",
4
4
  "description": "Export Remnic memories as WeClone-compatible Alpaca-format fine-tuning datasets",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -19,13 +19,14 @@
19
19
  "access": "public",
20
20
  "provenance": true
21
21
  },
22
- "dependencies": {
23
- "@remnic/core": "^1.0.3"
22
+ "peerDependencies": {
23
+ "@remnic/core": "^9.3.515"
24
24
  },
25
25
  "devDependencies": {
26
26
  "tsup": "^8.0.0",
27
27
  "typescript": "^5.7.0",
28
- "tsx": "^4.0.0"
28
+ "tsx": "^4.0.0",
29
+ "@remnic/core": "9.3.515"
29
30
  },
30
31
  "license": "MIT",
31
32
  "repository": {
@@ -43,6 +44,8 @@
43
44
  ],
44
45
  "scripts": {
45
46
  "build": "tsup src/index.ts --format esm --dts",
47
+ "precheck-types": "node ../../scripts/ensure-bench-build-deps.mjs",
48
+ "check-types": "tsc --noEmit",
46
49
  "test": "tsx --test 'src/**/*.test.ts'"
47
50
  }
48
51
  }