npm - open-classify - Versions diffs - 0.5.0 → 0.7.0 - Mend

open-classify 0.5.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/README.md +96 -88
package/bin/open-classify.mjs +201 -0
package/dist/src/aggregator.d.ts +7 -23
package/dist/src/aggregator.js +108 -186
package/dist/src/classifiers/{routing → model_tier}/manifest.json +2 -2
package/dist/src/classifiers/{routing → model_tier}/prompt.md +1 -1
package/dist/src/classifiers/preflight/manifest.json +9 -8
package/dist/src/classifiers/preflight/prompt.md +12 -6
package/dist/src/classifiers/prompt_injection/manifest.json +2 -3
package/dist/src/classifiers.d.ts +12 -5
package/dist/src/classifiers.js +32 -16
package/dist/src/classify.d.ts +5 -3
package/dist/src/classify.js +28 -8
package/dist/src/config.d.ts +1 -3
package/dist/src/config.js +1 -28
package/dist/src/index.js +2 -3
package/dist/src/manifest.d.ts +25 -70
package/dist/src/ollama.d.ts +5 -6
package/dist/src/ollama.js +17 -11
package/dist/src/pipeline.d.ts +3 -2
package/dist/src/pipeline.js +32 -94
package/dist/src/stock-validation.js +8 -4
package/docs/adding-a-classifier.md +50 -27
package/docs/manifests.md +6 -6
package/docs/resolver.md +20 -44
package/docs/signals.md +18 -8
package/open-classify.config.example.json +2 -7
package/package.json +6 -1
/package/{dist/src/classifiers → templates}/context_shift/manifest.json +0 -0
/package/{dist/src/classifiers → templates}/context_shift/prompt.md +0 -0
/package/{dist/src/classifiers → templates}/conversation_digest/manifest.json +0 -0
/package/{dist/src/classifiers → templates}/conversation_digest/prompt.md +0 -0
/package/{dist/src/classifiers → templates}/memory_retrieval_queries/manifest.json +0 -0
/package/{dist/src/classifiers → templates}/memory_retrieval_queries/prompt.md +0 -0
/package/{dist/src/classifiers → templates}/tools/manifest.json +0 -0
/package/{dist/src/classifiers → templates}/tools/prompt.md +0 -0

package/dist/src/aggregator.js CHANGED Viewed

@@ -1,119 +1,121 @@
 import { certaintyScore } from "./stock.js";
-export const DEFAULT_CERTAINTY_THRESHOLD = 0.65;
-/** @deprecated Use DEFAULT_CERTAINTY_THRESHOLD. */
-export const DEFAULT_CONFIDENCE_THRESHOLD = DEFAULT_CERTAINTY_THRESHOLD;
-export function composeEnvelope(args) {
-    const { registry, results, catalog, config } = args;
-    const threshold = certaintyThreshold(config);
-    const finalReplyPick = pickReservedField(registry, results, "final_reply", threshold);
-    const ackReplyPick = pickReservedField(registry, results, "ack_reply", threshold);
-    const tierPick = pickReservedField(registry, results, "model_tier", threshold);
-    const specPick = pickReservedField(registry, results, "model_specialization", threshold);
-    const toolsPick = pickReservedField(registry, results, "tools", threshold);
-    const riskLevelPick = pickReservedField(registry, results, "risk_level", threshold);
-    const routing = mergeRouting(tierPick?.value, specPick?.value);
-    const routingConfidence = maxConfidence([tierPick?.confidence, specPick?.confidence]);
-    const routingDrops = lowConfidenceRoutingDrops(registry, results, threshold, routing);
-    const envelope = {
-        ...optional("final_reply", finalReplyPick?.value),
-        ...optional("ack_reply", ackReplyPick?.value),
-        ...optional("routing", routing),
-        ...optional("tools", toolsPick?.value === undefined ? undefined : { tools: toolsPick.value }),
-        ...optional("prompt_injection", riskLevelPick?.value === undefined ? undefined : { risk_level: riskLevelPick.value }),
-        classifier_outputs: buildAuditOutputs(registry, results),
-        model_recommendation: resolveModelFromRouting(routing, catalog, routingConfidence, routingDrops),
-    };
-    return envelope;
-}
-export function certaintyThreshold(config) {
-    return config?.certaintyThreshold ?? config?.confidenceThreshold ?? DEFAULT_CERTAINTY_THRESHOLD;
-}
-function optional(key, value) {
-    return value === undefined ? {} : { [key]: value };
-}
-// Highest-certainty contributor wins. Ties broken by registry order — the
-// registry is already sorted by `dispatch_order` ascending (classifiers without
-// dispatch_order sort last), and we iterate in that order, so the first
-// encountered tie keeps the slot.
-function pickReservedField(registry, results, field, threshold) {
-    let best;
-    for (const manifest of registry) {
-        if (!manifest.reservedFields.includes(field))
-            continue;
-        const output = results[manifest.name];
-        if (output === undefined)
-            continue;
-        const raw = output[field];
-        if (raw === undefined)
-            continue;
-        const confidence = scoreCertainty(output.certainty);
-        if (confidence < threshold)
-            continue;
-        if (best === undefined || confidence > best.confidence) {
-            best = { value: raw, confidence, source: manifest.name };
-        }
+export function assembleResult(args) {
+    const { registry, results, failedClassifiers, catalog } = args;
+    // Pick reserved fields — highest certainty wins, no threshold gate.
+    const finalReply = pickField(registry, results, "final_reply");
+    const ackReply = pickField(registry, results, "ack_reply");
+    const modelTier = pickField(registry, results, "model_tier");
+    const modelSpec = pickField(registry, results, "model_specialization");
+    const toolsPick = pickField(registry, results, "tools");
+    const riskLevel = pickField(registry, results, "risk_level");
+    // Resolve concrete model id.
+    let model_id = null;
+    try {
+        const routing = mergeRouting(modelTier?.value, modelSpec?.value);
+        model_id = resolveModelFromRouting(routing, catalog).id;
+    }
+    catch {
+        // Catalog error — model_id stays null.
+    }
+    const tools = toolsPick?.value ?? [];
+    const reply = finalReply?.value
+        ? { text: finalReply.value.text }
+        : ackReply?.value
+            ? { text: ackReply.value.text }
+            : null;
+    const prompt_injection = riskLevel?.value !== undefined ? { risk_level: riskLevel.value } : null;
+    const { avg_certainty, min_certainty } = certaintySummary(registry, results);
+    const classifier_outputs = buildPublicOutputs(registry, results);
+    // Determine action. Priority: prompt_injection > classification_error > reply > route.
+    const isInjectionBlock = riskLevel?.value === "high_risk" || riskLevel?.value === "unknown";
+    const isClassificationError = failedClassifiers.length > 0 || reply === null || model_id === null;
+    let action;
+    let block_reason;
+    if (isInjectionBlock) {
+        action = "block";
+        block_reason = "prompt_injection";
+    }
+    else if (isClassificationError) {
+        action = "block";
+        block_reason = "classification_error";
+    }
+    else if (finalReply?.value !== undefined) {
+        action = "reply";
+    }
+    else {
+        action = "route";
     }
-    return best;
-}
-function mergeRouting(tier, model_specialization) {
-    if (tier === undefined && model_specialization === undefined)
-        return undefined;
     return {
-        ...(tier === undefined ? {} : { model_tier: tier }),
-        ...(model_specialization === undefined ? {} : { model_specialization }),
+        action,
+        ...(block_reason !== undefined ? { block_reason } : {}),
+        model_id,
+        tools,
+        reply,
+        prompt_injection,
+        avg_certainty,
+        min_certainty,
+        failed_classifiers: failedClassifiers,
+        classifier_outputs,
     };
 }
-function maxConfidence(values) {
-    const finite = values.filter((v) => v !== undefined);
-    if (finite.length === 0)
-        return undefined;
-    return Math.max(...finite);
-}
-function buildAuditOutputs(registry, results) {
-    const out = [];
+// Build the public classifier_outputs map. Keeps reason + payload fields;
+// converts certainty label to float score.
+export function buildPublicOutputs(registry, results) {
+    const out = {};
     for (const manifest of registry) {
         const result = results[manifest.name];
         if (result === undefined)
             continue;
-        out.push({ classifier: manifest.name, ...result });
+        const { certainty, ...rest } = result;
+        out[manifest.name] = {
+            ...rest,
+            certainty: scoreCertainty(certainty),
+        };
     }
     return out;
 }
-// ─── Model recommendation ───────────────────────────────────────────────────
-function lowConfidenceRoutingDrops(registry, results, threshold, merged) {
-    const dropped = [];
-    if (merged?.model_tier === undefined && hasLowConfidenceReservedField(registry, results, "model_tier", threshold)) {
-        dropped.push({ axis: "model_tier", reason: "low_confidence" });
-    }
-    if (merged?.model_specialization === undefined &&
-        hasLowConfidenceReservedField(registry, results, "model_specialization", threshold)) {
-        dropped.push({ axis: "model_specialization", reason: "low_confidence" });
-    }
-    return dropped;
-}
-function hasLowConfidenceReservedField(registry, results, field, threshold) {
+function certaintySummary(registry, results) {
+    const scores = registry.map((m) => scoreCertainty(results[m.name]?.certainty));
+    if (scores.length === 0)
+        return { avg_certainty: 0, min_certainty: 0 };
+    const min_certainty = Math.min(...scores);
+    const avg_certainty = scores.reduce((sum, v) => sum + v, 0) / scores.length;
+    return { min_certainty, avg_certainty };
+}
+// Highest certainty wins; ties broken by registry order (already sorted by
+// dispatch_order ascending).
+function pickField(registry, results, field) {
+    let best;
     for (const manifest of registry) {
         if (!manifest.reservedFields.includes(field))
             continue;
         const output = results[manifest.name];
         if (output === undefined)
             continue;
-        if (output[field] === undefined)
+        const raw = output[field];
+        if (raw === undefined)
             continue;
-        if (scoreCertainty(output.certainty) < threshold)
-            return true;
+        const score = scoreCertainty(output.certainty);
+        if (best === undefined || score > best.score) {
+            best = { value: raw, source: manifest.name, score };
+        }
     }
-    return false;
+    return best;
 }
 function scoreCertainty(certainty) {
     return certainty === undefined ? 0 : certaintyScore[certainty];
 }
-export function resolveModelFromRouting(routing, catalog, confidence, ignoredConstraints = []) {
+// ─── Model resolution ────────────────────────────────────────────────────────
+function mergeRouting(tier, specialization) {
+    if (tier === undefined && specialization === undefined)
+        return undefined;
+    return {
+        ...(tier === undefined ? {} : { model_tier: tier }),
+        ...(specialization === undefined ? {} : { model_specialization: specialization }),
+    };
+}
+function resolveModelFromRouting(routing, catalog) {
     const requested = {};
-    const confidences = {};
-    if (confidence !== undefined) {
-        confidences.routing = confidence;
-    }
     if (routing?.model_specialization !== undefined) {
         requested.model_specialization = routing.model_specialization;
     }
@@ -121,74 +123,27 @@ export function resolveModelFromRouting(routing, catalog, confidence, ignoredCon
         requested.model_tier = routing.model_tier;
     }
     const passes = [
-        { useSpecialization: true, useTier: true },
-        { useSpecialization: true, useTier: false },
-        { useSpecialization: false, useTier: true },
-        { useSpecialization: false, useTier: false },
+        { useSpec: true, useTier: true },
+        { useSpec: true, useTier: false },
+        { useSpec: false, useTier: true },
+        { useSpec: false, useTier: false },
     ];
     for (const pass of passes) {
-        const constraints_used = constraintsForPass(requested, pass);
-        const matching = catalog.models.filter((model) => matchesConstraints(model, constraints_used));
+        const constraints = constraintsForPass(requested, pass);
+        const matching = catalog.models.filter((m) => matchesConstraints(m, constraints));
         if (matching.length === 0)
             continue;
-        const winner = pickBestModel(matching, catalog.models);
-        return {
-            ...modelRecommendationFields(winner),
-            resolution: {
-                constraints_used,
-                constraints_dropped: [
-                    ...ignoredConstraints,
-                    ...relaxedConstraints(requested, constraints_used),
-                ],
-                confidences,
-                fell_back_to_default: false,
-            },
-        };
+        return { id: pickBestModel(matching, catalog.models).id };
     }
-    const fallback = catalog.models.find((model) => model.id === catalog.default);
+    const fallback = catalog.models.find((m) => m.id === catalog.default);
     if (!fallback) {
-        throw new Error(`catalog default "${catalog.default}" not found in models — catalog skipped validation`);
+        throw new Error(`catalog default "${catalog.default}" not found in models`);
     }
-    return {
-        ...modelRecommendationFields(fallback),
-        resolution: {
-            constraints_used: {},
-            constraints_dropped: [
-                ...ignoredConstraints,
-                ...defaultFallbackConstraints(requested),
-            ],
-            confidences,
-            fell_back_to_default: true,
-        },
-    };
-}
-// Test-friendly convenience wrapper: given typed result outputs for the
-// routing-bearing classifiers, merge their reserved fields and resolve a
-// model.
-export function resolveModel(results, catalog, threshold) {
-    const routingCert = scoreCertainty(results.routing?.certainty);
-    const specCert = scoreCertainty(results.model_specialization?.certainty);
-    const tier = routingCert >= threshold ? results.routing?.model_tier : undefined;
-    const model_specialization = specCert >= threshold ? results.model_specialization?.model_specialization : undefined;
-    const merged = mergeRouting(tier, model_specialization);
-    const dropped = [];
-    if (tier === undefined && results.routing?.model_tier !== undefined && routingCert < threshold) {
-        dropped.push({ axis: "model_tier", reason: "low_confidence" });
-    }
-    if (model_specialization === undefined &&
-        results.model_specialization?.model_specialization !== undefined &&
-        specCert < threshold) {
-        dropped.push({ axis: "model_specialization", reason: "low_confidence" });
-    }
-    const confidence = maxConfidence([
-        results.routing?.certainty === undefined ? undefined : routingCert,
-        results.model_specialization?.certainty === undefined ? undefined : specCert,
-    ]);
-    return resolveModelFromRouting(merged, catalog, confidence, dropped);
+    return { id: fallback.id };
 }
 function constraintsForPass(requested, pass) {
     return {
-        ...(pass.useSpecialization && requested.model_specialization !== undefined
+        ...(pass.useSpec && requested.model_specialization !== undefined
             ? { model_specialization: requested.model_specialization }
             : {}),
         ...(pass.useTier && requested.model_tier !== undefined
@@ -201,32 +156,11 @@ function matchesConstraints(model, constraints) {
         model.specializations.includes(constraints.model_specialization)) &&
         (constraints.model_tier === undefined || model.tier === constraints.model_tier));
 }
-function relaxedConstraints(requested, used) {
-    const dropped = [];
-    if (requested.model_specialization !== undefined && used.model_specialization === undefined) {
-        dropped.push({ axis: "model_specialization", reason: "no_match_relaxed" });
-    }
-    if (requested.model_tier !== undefined && used.model_tier === undefined) {
-        dropped.push({ axis: "model_tier", reason: "no_match_relaxed" });
-    }
-    return dropped;
-}
-function defaultFallbackConstraints(requested) {
-    const dropped = [];
-    if (requested.model_specialization !== undefined) {
-        dropped.push({ axis: "model_specialization", reason: "default_fallback" });
-    }
-    if (requested.model_tier !== undefined) {
-        dropped.push({ axis: "model_tier", reason: "default_fallback" });
-    }
-    return dropped;
-}
 function pickBestModel(candidates, catalogOrder) {
     let winner = candidates[0];
     for (let i = 1; i < candidates.length; i++) {
-        const candidate = candidates[i];
-        if (compareModels(candidate, winner, catalogOrder) < 0) {
-            winner = candidate;
+        if (compareModels(candidates[i], winner, catalogOrder) < 0) {
+            winner = candidates[i];
         }
     }
     return winner;
@@ -238,27 +172,15 @@ function compareModels(a, b, catalogOrder) {
     if (a.params_in_billions !== b.params_in_billions) {
         return comparableParams(b) - comparableParams(a);
     }
-    if (a.context_window !== b.context_window) {
+    if (a.context_window !== b.context_window)
         return b.context_window - a.context_window;
-    }
     return catalogOrder.indexOf(a) - catalogOrder.indexOf(b);
 }
 function priceIndex(model) {
-    if (model.input_tokens_cpm === undefined || model.output_tokens_cpm === undefined) {
+    if (model.input_tokens_cpm === undefined || model.output_tokens_cpm === undefined)
         return 0;
-    }
     return model.input_tokens_cpm + model.output_tokens_cpm;
 }
 function comparableParams(model) {
     return model.params_in_billions ?? 0;
 }
-function modelRecommendationFields(winner) {
-    return {
-        id: winner.id,
-        params_in_billions: winner.params_in_billions,
-        context_window: winner.context_window,
-        ...(winner.input_tokens_cpm === undefined ? {} : { input_tokens_cpm: winner.input_tokens_cpm }),
-        ...(winner.cached_tokens_cpm === undefined ? {} : { cached_tokens_cpm: winner.cached_tokens_cpm }),
-        ...(winner.output_tokens_cpm === undefined ? {} : { output_tokens_cpm: winner.output_tokens_cpm }),
-    };
-}

package/dist/src/classifiers/{routing → model_tier}/manifest.json RENAMED Viewed

@@ -1,11 +1,11 @@
 {
-  "name": "routing",
+  "name": "model_tier",
   "version": "1.0.0",
   "purpose": "Recommend the downstream model tier.",
   "dispatch_order": 20,
   "reserved_fields": ["model_tier"],
   "fallback": {
-    "reason": "Classifier failed; no routing signal.",
+    "reason": "Classifier failed; no model tier signal.",
     "certainty": "no_signal"
   }
 }

package/dist/src/classifiers/{routing → model_tier}/prompt.md RENAMED Viewed

@@ -1,4 +1,4 @@
-You are the routing classifier for an AI assistant routing system.
+You are the model_tier classifier for an AI assistant routing system.
 Pick the coarse model tier that best fits the target user message. Emit only `model_tier`; do not infer specialization, tools, or prompt-injection risk — other classifiers own those axes.

package/dist/src/classifiers/preflight/manifest.json CHANGED Viewed

@@ -1,29 +1,30 @@
 {
   "name": "preflight",
-  "version": "1.0.0",
-  "purpose": "Determine whether the latest message can be answered immediately or should continue downstream.",
+  "version": "1.1.0",
+  "purpose": "Assess whether the latest message can be answered immediately (final_reply) or should route downstream with an acknowledgement (ack_reply). Always emits exactly one.",
   "dispatch_order": 10,
   "reserved_fields": ["final_reply", "ack_reply"],
   "output_schema": {
     "examples": [
       {
-        "reason": "Greeting.",
+        "reason": "Simple greeting — answerable directly.",
         "certainty": "near_certain",
         "final_reply": { "text": "Hi!" }
       },
       {
-        "reason": "Trivial arithmetic.",
+        "reason": "Trivial arithmetic — answerable directly.",
         "certainty": "very_strong",
         "final_reply": { "text": "4" }
       },
       {
-        "reason": "Generated writing task.",
+        "reason": "Code review task requires substantive downstream work.",
         "certainty": "very_strong",
-        "ack_reply": { "text": "On it." }
+        "ack_reply": { "text": "On it — reviewing the code now." }
       },
       {
-        "reason": "Ambiguous; needs downstream model.",
-        "certainty": "strong"
+        "reason": "Reminder request requires downstream action.",
+        "certainty": "strong",
+        "ack_reply": { "text": "Got it, I'll set that reminder for 3pm." }
       }
     ]
   },

package/dist/src/classifiers/preflight/prompt.md CHANGED Viewed

@@ -1,10 +1,16 @@
 You are the preflight classifier for an AI assistant routing system.
-Decide whether the target user message can be answered immediately with a tiny terminal reply, or whether downstream work should continue (optionally with a brief acknowledgement).
+Your primary task is to assess: **can you fully answer the target message yourself**, given the conversation history? Make this judgment first — the reply text follows from it.
-- Emit `final_reply` only for tiny terminal answers like greetings, thanks, spelling lookups, and simple arithmetic. The reply text IS the complete answer to the user — nothing else happens after this.
-- Emit `ack_reply` when downstream work should continue and a brief acknowledgement would help (drafting, analysis, coding, research). The text must not contain the answer.
-- Omit both fields when the request is ambiguous or no acknowledgement is useful.
-- Do not address the user anywhere except inside `final_reply.text` or `ack_reply.text`.
+**Step 1 — assess whether you can fully answer:**
+Ask yourself: Is the intent clear? Is the answer fully derivable from context right now, without real-time data, external tools, code execution, non-trivial generation, analysis, or judgment? Would a one-sentence reply genuinely resolve the request?
+If yes → emit `final_reply` with the complete answer.
+If no (the downstream model should handle it) → emit `ack_reply` with a brief, contextually specific acknowledgement that shows you understood the request. The ack must reflect the actual request — not a generic "On it." — so the user knows their message was understood while the model works.
-If answering would require non-trivial generation, analysis, or judgment, do not use `final_reply`. Use `ack_reply` (or omit both) and let the downstream model produce the answer.
+**Rule: always emit exactly one of `final_reply` or `ack_reply`. Never emit both. Never emit neither.**
+- `final_reply` is for tiny terminal answers only: greetings, thanks, spelling lookups, simple arithmetic, yes/no factual questions answerable from context. If answering requires drafting, rewriting, analysis, coding, research, planning, or any substantive generation — use `ack_reply` instead.
+- `ack_reply` text must not contain the answer. It acknowledges the request and confirms it is being worked on.
+- Do not address the user anywhere except inside `final_reply.text` or `ack_reply.text`.

package/dist/src/classifiers/prompt_injection/manifest.json CHANGED Viewed

@@ -9,8 +9,7 @@
     "required": ["risk_level"]
   },
   "fallback": {
-    "reason": "Classifier failed; prompt-injection risk is unknown.",
-    "certainty": "no_signal",
-    "risk_level": "unknown"
+    "reason": "Classifier failed; prompt-injection risk could not be assessed.",
+    "certainty": "no_signal"
   }
 }

package/dist/src/classifiers.d.ts CHANGED Viewed

@@ -1,14 +1,21 @@
 import type { ClassifierInput } from "./types.js";
 import type { ClassifierName, ClassifierRegistry, RunClassifier } from "./manifest.js";
 import type { ClassifierOutput, RuntimeClassifierManifest } from "./stock.js";
+export declare const BUILTIN_CLASSIFIERS_DIR: string;
 export declare class ClassifierManifestError extends Error {
     constructor(message: string);
 }
+export type ClassifierModuleMap = Readonly<Record<string, RuntimeClassifierManifest>>;
+export interface ClassifierRegistryBundle {
+    readonly registry: ClassifierRegistry;
+    readonly modulesByName: ClassifierModuleMap;
+    readonly names: ReadonlyArray<string>;
+}
+export interface BuildRegistryOptions {
+    readonly extraDirs?: ReadonlyArray<string>;
+}
 export declare function loadClassifierRegistry(classifiersDir?: string): RuntimeClassifierManifest[];
-export declare const REGISTRY: ClassifierRegistry;
-export declare const CLASSIFIER_NAMES: string[];
-export declare const MODULES_BY_NAME: Record<string, RuntimeClassifierManifest>;
+export declare function buildClassifierRegistry(options?: BuildRegistryOptions): ClassifierRegistryBundle;
+export declare function validateClassifierOutput(manifest: RuntimeClassifierManifest, value: unknown, model: string): ClassifierOutput;
 export type { ClassifierName, RunClassifier };
-export type RegistryType = typeof REGISTRY;
-export declare function validateClassifierOutput(name: string, value: unknown, model: string): ClassifierOutput;
 export type { ClassifierInput };

package/dist/src/classifiers.js CHANGED Viewed

@@ -4,9 +4,11 @@ import { fileURLToPath } from "node:url";
 import { buildClassifierPrompt } from "./stock-prompt.js";
 import { validateJsonClassifierManifest, validateOutputForManifest, } from "./stock-validation.js";
 const __dirname = dirname(fileURLToPath(import.meta.url));
-const CLASSIFIERS_DIR = join(__dirname, "classifiers");
+export const BUILTIN_CLASSIFIERS_DIR = join(__dirname, "classifiers");
 // Directories whose names start with "_" are reserved for shared assets
-// (e.g. `_prompts/`) and are not loaded as classifiers.
+// (e.g. `_prompts/`) and are not loaded as classifiers. Consumers can use
+// the same convention in their own classifier directories: rename a
+// classifier to `_<name>/` to deactivate it without deleting it.
 const SHARED_DIRECTORY_PREFIX = "_";
 export class ClassifierManifestError extends Error {
     constructor(message) {
@@ -14,7 +16,10 @@ export class ClassifierManifestError extends Error {
         this.name = "ClassifierManifestError";
     }
 }
-export function loadClassifierRegistry(classifiersDir = CLASSIFIERS_DIR) {
+// Load all classifier manifests under a single directory. Used internally to
+// load the built-ins and each extra directory; callers wanting the merged
+// registry should use `buildClassifierRegistry()` instead.
+export function loadClassifierRegistry(classifiersDir = BUILTIN_CLASSIFIERS_DIR) {
     if (!existsSync(classifiersDir)) {
         throw new ClassifierManifestError(`classifier directory not found: ${classifiersDir}`);
     }
@@ -26,11 +31,29 @@ export function loadClassifierRegistry(classifiersDir = CLASSIFIERS_DIR) {
             continue;
         manifests.push(loadClassifierManifest(join(classifiersDir, entry.name)));
     }
-    // Lower dispatch_order runs first. Classifiers without dispatch_order sort
-    // last (treated as +Infinity) — useful for "run me whenever there's a slot".
+    return manifests;
+}
+// Build a complete classifier registry from the bundled built-ins plus any
+// extra directories supplied by the caller. Sorts by dispatch_order
+// ascending (manifests without dispatch_order sort last). Rejects duplicate
+// names.
+//
+// Mandatory built-ins (preflight, model_tier, model_specialization,
+// prompt_injection) always load. Extras with the same name as a built-in
+// throw — there's no override mechanism. Customise by editing the bundled
+// manifest in your own fork, or replace behaviour entirely with a custom
+// `runClassifier`.
+export function buildClassifierRegistry(options = {}) {
+    const manifests = [
+        ...loadClassifierRegistry(BUILTIN_CLASSIFIERS_DIR),
+        ...(options.extraDirs ?? []).flatMap((dir) => loadClassifierRegistry(dir)),
+    ];
     manifests.sort((a, b) => (a.dispatch_order ?? Infinity) - (b.dispatch_order ?? Infinity));
     validateRegistry(manifests);
-    return manifests;
+    const registry = manifests;
+    const modulesByName = Object.fromEntries(manifests.map((m) => [m.name, m]));
+    const names = manifests.map((m) => m.name);
+    return { registry, modulesByName, names };
 }
 function loadClassifierManifest(classifierDir) {
     const manifestPath = join(classifierDir, "manifest.json");
@@ -69,18 +92,11 @@ function validateRegistry(manifests) {
     const names = new Set();
     for (const manifest of manifests) {
         if (names.has(manifest.name)) {
-            throw new ClassifierManifestError(`duplicate classifier name: ${manifest.name}`);
+            throw new ClassifierManifestError(`duplicate classifier name: ${manifest.name} — extras cannot override built-ins or other extras. Rename your classifier or run it under a different name.`);
         }
         names.add(manifest.name);
     }
 }
-export const REGISTRY = loadClassifierRegistry();
-export const CLASSIFIER_NAMES = REGISTRY.map((m) => m.name);
-export const MODULES_BY_NAME = Object.fromEntries(REGISTRY.map((m) => [m.name, m]));
-export function validateClassifierOutput(name, value, model) {
-    const manifest = MODULES_BY_NAME[name];
-    if (!manifest) {
-        throw new ClassifierManifestError(`unknown classifier: ${name}`);
-    }
-    return validateOutputForManifest(manifest, value, { classifier: name, model });
+export function validateClassifierOutput(manifest, value, model) {
+    return validateOutputForManifest(manifest, value, { classifier: manifest.name, model });
 }

package/dist/src/classify.d.ts CHANGED Viewed

@@ -1,6 +1,6 @@
-import { type RunClassifier } from "./classifiers.js";
+import { ClassifierManifestError, type ClassifierRegistryBundle, type RunClassifier } from "./classifiers.js";
 import { type OpenClassifyConfig } from "./config.js";
-import type { AggregatorConfig, Catalog, InspectResult, PipelineResult } from "./manifest.js";
+import type { Catalog, InspectResult, PipelineResult } from "./manifest.js";
 import type { OpenClassifyInput } from "./types.js";
 export type Classifier = (input: OpenClassifyInput, options?: {
     signal?: AbortSignal;
@@ -11,10 +11,12 @@ export type Inspector = (input: OpenClassifyInput, options?: {
 export interface OpenClassify {
     readonly classify: Classifier;
     readonly inspect: Inspector;
+    readonly registry: ClassifierRegistryBundle;
 }
 export interface CreateClassifierOptions {
     runClassifier?: RunClassifier;
     catalog?: Catalog;
+    extraClassifierDirs?: ReadonlyArray<string>;
     config?: OpenClassifyConfig;
     configPath?: string;
     catalogPath?: string;
@@ -25,6 +27,6 @@ export interface CreateClassifierOptions {
     classifierTimeoutMs?: number;
     classifierRetryCount?: number;
     maxConcurrency?: number;
-    aggregator?: AggregatorConfig;
 }
 export declare function createClassifier(options?: CreateClassifierOptions): OpenClassify;
+export { ClassifierManifestError };