@mnemom/agent-alignment-protocol 0.1.8 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/index.d.mts +156 -1
- package/dist/index.d.ts +156 -1
- package/dist/index.js +235 -0
- package/dist/index.mjs +229 -0
- package/package.json +1 -1
- package/src/compliance.ts +56 -0
- package/src/constants.ts +8 -0
- package/src/index.ts +15 -1
- package/src/verification/api.ts +266 -0
- package/src/verification/models.ts +94 -0
package/README.md
CHANGED
package/dist/index.d.mts
CHANGED
|
@@ -415,6 +415,92 @@ interface CoherenceResult {
|
|
|
415
415
|
reason: string;
|
|
416
416
|
} | null;
|
|
417
417
|
}
|
|
418
|
+
/** A single pairwise coherence entry in the fleet matrix. */
|
|
419
|
+
interface PairwiseEntry {
|
|
420
|
+
/** First agent ID */
|
|
421
|
+
agent_a: string;
|
|
422
|
+
/** Second agent ID */
|
|
423
|
+
agent_b: string;
|
|
424
|
+
/** Pairwise coherence result */
|
|
425
|
+
result: CoherenceResult;
|
|
426
|
+
}
|
|
427
|
+
/** An agent flagged as an outlier in fleet coherence. */
|
|
428
|
+
interface FleetOutlier {
|
|
429
|
+
/** Agent ID */
|
|
430
|
+
agent_id: string;
|
|
431
|
+
/** Agent's mean pairwise score */
|
|
432
|
+
agent_mean_score: number;
|
|
433
|
+
/** Fleet-wide mean score */
|
|
434
|
+
fleet_mean_score: number;
|
|
435
|
+
/** Standard deviations below fleet mean */
|
|
436
|
+
deviation: number;
|
|
437
|
+
/** Values causing primary conflicts */
|
|
438
|
+
primary_conflicts: string[];
|
|
439
|
+
}
|
|
440
|
+
/** A cluster of compatible agents. */
|
|
441
|
+
interface FleetCluster {
|
|
442
|
+
/** Cluster identifier */
|
|
443
|
+
cluster_id: number;
|
|
444
|
+
/** Agent IDs in this cluster */
|
|
445
|
+
agent_ids: string[];
|
|
446
|
+
/** Mean coherence score within the cluster */
|
|
447
|
+
internal_coherence: number;
|
|
448
|
+
/** Values shared by all agents in the cluster */
|
|
449
|
+
shared_values: string[];
|
|
450
|
+
/** Values that distinguish this cluster from others */
|
|
451
|
+
distinguishing_values: string[];
|
|
452
|
+
}
|
|
453
|
+
/** A value dimension where agents diverge. */
|
|
454
|
+
interface ValueDivergence {
|
|
455
|
+
/** The value in question */
|
|
456
|
+
value: string;
|
|
457
|
+
/** Agent IDs that declare this value */
|
|
458
|
+
agents_declaring: string[];
|
|
459
|
+
/** Agent IDs missing this value */
|
|
460
|
+
agents_missing: string[];
|
|
461
|
+
/** Agent IDs whose conflicts_with includes this value */
|
|
462
|
+
agents_conflicting: string[];
|
|
463
|
+
/** Estimated impact on fleet score if resolved */
|
|
464
|
+
impact_on_fleet_score: number;
|
|
465
|
+
}
|
|
466
|
+
/** Summary of one agent's position in the fleet. */
|
|
467
|
+
interface AgentCoherenceSummary {
|
|
468
|
+
/** Agent ID */
|
|
469
|
+
agent_id: string;
|
|
470
|
+
/** Mean pairwise score with all other agents */
|
|
471
|
+
mean_score: number;
|
|
472
|
+
/** Number of compatible pairs */
|
|
473
|
+
compatible_count: number;
|
|
474
|
+
/** Number of conflicting pairs */
|
|
475
|
+
conflict_count: number;
|
|
476
|
+
/** Cluster this agent belongs to */
|
|
477
|
+
cluster_id: number;
|
|
478
|
+
/** Whether this agent is flagged as an outlier */
|
|
479
|
+
is_outlier: boolean;
|
|
480
|
+
}
|
|
481
|
+
/** Result of N-way fleet coherence analysis. */
|
|
482
|
+
interface FleetCoherenceResult {
|
|
483
|
+
/** Mean of all pairwise coherence scores */
|
|
484
|
+
fleet_score: number;
|
|
485
|
+
/** Minimum pairwise score (weakest link) */
|
|
486
|
+
min_pair_score: number;
|
|
487
|
+
/** Maximum pairwise score */
|
|
488
|
+
max_pair_score: number;
|
|
489
|
+
/** Number of agents analyzed */
|
|
490
|
+
agent_count: number;
|
|
491
|
+
/** Number of pairwise comparisons */
|
|
492
|
+
pair_count: number;
|
|
493
|
+
/** All pairwise coherence results */
|
|
494
|
+
pairwise_matrix: PairwiseEntry[];
|
|
495
|
+
/** Agents flagged as outliers */
|
|
496
|
+
outliers: FleetOutlier[];
|
|
497
|
+
/** Clusters of compatible agents */
|
|
498
|
+
clusters: FleetCluster[];
|
|
499
|
+
/** Value dimensions where agents diverge */
|
|
500
|
+
divergence_report: ValueDivergence[];
|
|
501
|
+
/** Per-agent coherence summaries */
|
|
502
|
+
agent_summaries: AgentCoherenceSummary[];
|
|
503
|
+
}
|
|
418
504
|
|
|
419
505
|
/**
|
|
420
506
|
* AAP Verification API - The three public entry points.
|
|
@@ -454,6 +540,24 @@ declare function verifyTrace(trace: APTrace, card: AlignmentCard): VerificationR
|
|
|
454
540
|
* @returns CoherenceResult with compatibility assessment
|
|
455
541
|
*/
|
|
456
542
|
declare function checkCoherence(myCard: AlignmentCard, theirCard: AlignmentCard, taskValues?: string[]): CoherenceResult;
|
|
543
|
+
/**
|
|
544
|
+
* Check fleet-level value coherence across N agents.
|
|
545
|
+
*
|
|
546
|
+
* Computes all C(n,2) pairwise coherence scores, then derives:
|
|
547
|
+
* - Fleet score: mean of all pairwise scores
|
|
548
|
+
* - Outlier detection: agents >1 std dev below fleet mean
|
|
549
|
+
* - Cluster analysis: connected components at compatibility threshold
|
|
550
|
+
* - Divergence report: values where agents disagree
|
|
551
|
+
*
|
|
552
|
+
* @param cards - Array of agent cards with their IDs
|
|
553
|
+
* @param taskValues - Optional list of values required for the task
|
|
554
|
+
* @returns FleetCoherenceResult with full analysis
|
|
555
|
+
* @throws Error if fewer than 2 agents provided
|
|
556
|
+
*/
|
|
557
|
+
declare function checkFleetCoherence(cards: Array<{
|
|
558
|
+
agentId: string;
|
|
559
|
+
card: AlignmentCard;
|
|
560
|
+
}>, taskValues?: string[]): FleetCoherenceResult;
|
|
457
561
|
/**
|
|
458
562
|
* Detect behavioral drift from declared alignment.
|
|
459
563
|
*
|
|
@@ -693,6 +797,57 @@ declare const CONFLICT_PENALTY_MULTIPLIER = 0.5;
|
|
|
693
797
|
declare const MIN_WORD_LENGTH = 3;
|
|
694
798
|
/** Maximum features to extract from TF-IDF vectorization */
|
|
695
799
|
declare const MAX_TFIDF_FEATURES = 500;
|
|
800
|
+
/** Standard deviations below fleet mean to flag an agent as outlier */
|
|
801
|
+
declare const OUTLIER_STD_DEV_THRESHOLD = 1;
|
|
802
|
+
/** Minimum pairwise score to consider agents compatible for cluster analysis */
|
|
803
|
+
declare const CLUSTER_COMPATIBILITY_THRESHOLD = 0.7;
|
|
696
804
|
declare const ALGORITHM_VERSION = "1.2.0";
|
|
697
805
|
|
|
698
|
-
|
|
806
|
+
/**
|
|
807
|
+
* EU AI Act Article 50 compliance presets for AAP.
|
|
808
|
+
*
|
|
809
|
+
* These presets provide recommended configuration values for deploying
|
|
810
|
+
* AAP-instrumented agents in EU jurisdictions subject to AI Act
|
|
811
|
+
* transparency obligations. Spread them into your AlignmentCard fields.
|
|
812
|
+
*
|
|
813
|
+
* @example
|
|
814
|
+
* ```typescript
|
|
815
|
+
* import {
|
|
816
|
+
* EU_COMPLIANCE_AUDIT_COMMITMENT,
|
|
817
|
+
* EU_COMPLIANCE_EXTENSIONS,
|
|
818
|
+
* EU_COMPLIANCE_VALUES,
|
|
819
|
+
* } from "agent-alignment-protocol";
|
|
820
|
+
*
|
|
821
|
+
* const card: AlignmentCard = {
|
|
822
|
+
* ...,
|
|
823
|
+
* audit_commitment: { ...EU_COMPLIANCE_AUDIT_COMMITMENT },
|
|
824
|
+
* values: { declared: EU_COMPLIANCE_VALUES, ... },
|
|
825
|
+
* extensions: { ...EU_COMPLIANCE_EXTENSIONS },
|
|
826
|
+
* };
|
|
827
|
+
* ```
|
|
828
|
+
*
|
|
829
|
+
* DISCLAIMER: These presets reflect a technical mapping of AAP features to
|
|
830
|
+
* Article 50 requirements. They do not constitute legal advice. Consult
|
|
831
|
+
* qualified legal counsel for your specific compliance obligations.
|
|
832
|
+
*/
|
|
833
|
+
/** Audit commitment values that satisfy Article 50(4) audit trail requirements. */
|
|
834
|
+
declare const EU_COMPLIANCE_AUDIT_COMMITMENT: {
|
|
835
|
+
readonly retention_days: 90;
|
|
836
|
+
readonly queryable: true;
|
|
837
|
+
readonly query_endpoint: "https://audit.example.com/traces";
|
|
838
|
+
readonly tamper_evidence: "append_only";
|
|
839
|
+
readonly trace_format: "ap-trace-v1";
|
|
840
|
+
};
|
|
841
|
+
/** Extension block for EU AI Act metadata on the Alignment Card. */
|
|
842
|
+
declare const EU_COMPLIANCE_EXTENSIONS: {
|
|
843
|
+
readonly eu_ai_act: {
|
|
844
|
+
readonly article_50_compliant: true;
|
|
845
|
+
readonly ai_system_classification: "general_purpose";
|
|
846
|
+
readonly disclosure_text: string;
|
|
847
|
+
readonly compliance_version: "2026-08";
|
|
848
|
+
};
|
|
849
|
+
};
|
|
850
|
+
/** Recommended declared values for Article 50 transparency obligations. */
|
|
851
|
+
declare const EU_COMPLIANCE_VALUES: readonly ["transparency", "honesty", "user_control", "principal_benefit"];
|
|
852
|
+
|
|
853
|
+
export { ALGORITHM_VERSION, type APTrace, type Action, type ActionCategory, type ActionTarget, type ActionType, type AgentCoherenceSummary, type AlignmentCard, type AlignmentCardRequest, type AlignmentCardResponse, type Alternative, type AuditCommitment, type AuditStorage, type AutonomyEnvelope, type AutonomyScope, CLUSTER_COMPATIBILITY_THRESHOLD, CONFLICT_PENALTY_MULTIPLIER, type Coherence, type CoherenceResult, type CoherenceResultMessage, DEFAULT_SIMILARITY_THRESHOLD, DEFAULT_SUSTAINED_TURNS_THRESHOLD, type DataSharing, type Decision, type DriftAlert, type DriftAnalysis, type DriftDirection, type DriftIndicator, EU_COMPLIANCE_AUDIT_COMMITMENT, EU_COMPLIANCE_EXTENSIONS, EU_COMPLIANCE_VALUES, type Escalation, type EscalationStatus, type EscalationTrigger, type FleetCluster, type FleetCoherenceResult, type FleetOutlier, type HierarchyType, MAX_TFIDF_FEATURES, MIN_COHERENCE_FOR_PROCEED, MIN_WORD_LENGTH, type MonetaryValue, NEAR_BOUNDARY_THRESHOLD, OUTLIER_STD_DEV_THRESHOLD, type PairwiseEntry, type Principal, type PrincipalResponse, type PrincipalType, type ProposedCollaboration, type ProposedResolution, type RelationshipType, type RequesterInfo, type Severity, type Signature, type StorageType, type TamperEvidence, type TaskContext, type TraceContext, type TriggerAction, type TriggerCheck, VIOLATION_SEVERITY, type ValueAlignment, type ValueAlignmentDetail, type ValueCoherenceCheck, type ValueCoherenceMessage, type ValueConflict, type ValueConflictResult, type ValueDefinition, type ValueDivergence, type Values, type VerificationMetadata, type VerificationResult, type Violation, type ViolationType, type Warning, checkCoherence, checkFleetCoherence, computeCentroid, cosineSimilarity, createViolation, detectDrift, extractCardFeatures, extractTraceFeatures, getSelectedAlternative, hadViolations, hasValue, isActionBounded, isActionForbidden, isCardExpired, verifyTrace, wasEscalated };
|
package/dist/index.d.ts
CHANGED
|
@@ -415,6 +415,92 @@ interface CoherenceResult {
|
|
|
415
415
|
reason: string;
|
|
416
416
|
} | null;
|
|
417
417
|
}
|
|
418
|
+
/** A single pairwise coherence entry in the fleet matrix. */
|
|
419
|
+
interface PairwiseEntry {
|
|
420
|
+
/** First agent ID */
|
|
421
|
+
agent_a: string;
|
|
422
|
+
/** Second agent ID */
|
|
423
|
+
agent_b: string;
|
|
424
|
+
/** Pairwise coherence result */
|
|
425
|
+
result: CoherenceResult;
|
|
426
|
+
}
|
|
427
|
+
/** An agent flagged as an outlier in fleet coherence. */
|
|
428
|
+
interface FleetOutlier {
|
|
429
|
+
/** Agent ID */
|
|
430
|
+
agent_id: string;
|
|
431
|
+
/** Agent's mean pairwise score */
|
|
432
|
+
agent_mean_score: number;
|
|
433
|
+
/** Fleet-wide mean score */
|
|
434
|
+
fleet_mean_score: number;
|
|
435
|
+
/** Standard deviations below fleet mean */
|
|
436
|
+
deviation: number;
|
|
437
|
+
/** Values causing primary conflicts */
|
|
438
|
+
primary_conflicts: string[];
|
|
439
|
+
}
|
|
440
|
+
/** A cluster of compatible agents. */
|
|
441
|
+
interface FleetCluster {
|
|
442
|
+
/** Cluster identifier */
|
|
443
|
+
cluster_id: number;
|
|
444
|
+
/** Agent IDs in this cluster */
|
|
445
|
+
agent_ids: string[];
|
|
446
|
+
/** Mean coherence score within the cluster */
|
|
447
|
+
internal_coherence: number;
|
|
448
|
+
/** Values shared by all agents in the cluster */
|
|
449
|
+
shared_values: string[];
|
|
450
|
+
/** Values that distinguish this cluster from others */
|
|
451
|
+
distinguishing_values: string[];
|
|
452
|
+
}
|
|
453
|
+
/** A value dimension where agents diverge. */
|
|
454
|
+
interface ValueDivergence {
|
|
455
|
+
/** The value in question */
|
|
456
|
+
value: string;
|
|
457
|
+
/** Agent IDs that declare this value */
|
|
458
|
+
agents_declaring: string[];
|
|
459
|
+
/** Agent IDs missing this value */
|
|
460
|
+
agents_missing: string[];
|
|
461
|
+
/** Agent IDs whose conflicts_with includes this value */
|
|
462
|
+
agents_conflicting: string[];
|
|
463
|
+
/** Estimated impact on fleet score if resolved */
|
|
464
|
+
impact_on_fleet_score: number;
|
|
465
|
+
}
|
|
466
|
+
/** Summary of one agent's position in the fleet. */
|
|
467
|
+
interface AgentCoherenceSummary {
|
|
468
|
+
/** Agent ID */
|
|
469
|
+
agent_id: string;
|
|
470
|
+
/** Mean pairwise score with all other agents */
|
|
471
|
+
mean_score: number;
|
|
472
|
+
/** Number of compatible pairs */
|
|
473
|
+
compatible_count: number;
|
|
474
|
+
/** Number of conflicting pairs */
|
|
475
|
+
conflict_count: number;
|
|
476
|
+
/** Cluster this agent belongs to */
|
|
477
|
+
cluster_id: number;
|
|
478
|
+
/** Whether this agent is flagged as an outlier */
|
|
479
|
+
is_outlier: boolean;
|
|
480
|
+
}
|
|
481
|
+
/** Result of N-way fleet coherence analysis. */
|
|
482
|
+
interface FleetCoherenceResult {
|
|
483
|
+
/** Mean of all pairwise coherence scores */
|
|
484
|
+
fleet_score: number;
|
|
485
|
+
/** Minimum pairwise score (weakest link) */
|
|
486
|
+
min_pair_score: number;
|
|
487
|
+
/** Maximum pairwise score */
|
|
488
|
+
max_pair_score: number;
|
|
489
|
+
/** Number of agents analyzed */
|
|
490
|
+
agent_count: number;
|
|
491
|
+
/** Number of pairwise comparisons */
|
|
492
|
+
pair_count: number;
|
|
493
|
+
/** All pairwise coherence results */
|
|
494
|
+
pairwise_matrix: PairwiseEntry[];
|
|
495
|
+
/** Agents flagged as outliers */
|
|
496
|
+
outliers: FleetOutlier[];
|
|
497
|
+
/** Clusters of compatible agents */
|
|
498
|
+
clusters: FleetCluster[];
|
|
499
|
+
/** Value dimensions where agents diverge */
|
|
500
|
+
divergence_report: ValueDivergence[];
|
|
501
|
+
/** Per-agent coherence summaries */
|
|
502
|
+
agent_summaries: AgentCoherenceSummary[];
|
|
503
|
+
}
|
|
418
504
|
|
|
419
505
|
/**
|
|
420
506
|
* AAP Verification API - The three public entry points.
|
|
@@ -454,6 +540,24 @@ declare function verifyTrace(trace: APTrace, card: AlignmentCard): VerificationR
|
|
|
454
540
|
* @returns CoherenceResult with compatibility assessment
|
|
455
541
|
*/
|
|
456
542
|
declare function checkCoherence(myCard: AlignmentCard, theirCard: AlignmentCard, taskValues?: string[]): CoherenceResult;
|
|
543
|
+
/**
|
|
544
|
+
* Check fleet-level value coherence across N agents.
|
|
545
|
+
*
|
|
546
|
+
* Computes all C(n,2) pairwise coherence scores, then derives:
|
|
547
|
+
* - Fleet score: mean of all pairwise scores
|
|
548
|
+
* - Outlier detection: agents >1 std dev below fleet mean
|
|
549
|
+
* - Cluster analysis: connected components at compatibility threshold
|
|
550
|
+
* - Divergence report: values where agents disagree
|
|
551
|
+
*
|
|
552
|
+
* @param cards - Array of agent cards with their IDs
|
|
553
|
+
* @param taskValues - Optional list of values required for the task
|
|
554
|
+
* @returns FleetCoherenceResult with full analysis
|
|
555
|
+
* @throws Error if fewer than 2 agents provided
|
|
556
|
+
*/
|
|
557
|
+
declare function checkFleetCoherence(cards: Array<{
|
|
558
|
+
agentId: string;
|
|
559
|
+
card: AlignmentCard;
|
|
560
|
+
}>, taskValues?: string[]): FleetCoherenceResult;
|
|
457
561
|
/**
|
|
458
562
|
* Detect behavioral drift from declared alignment.
|
|
459
563
|
*
|
|
@@ -693,6 +797,57 @@ declare const CONFLICT_PENALTY_MULTIPLIER = 0.5;
|
|
|
693
797
|
declare const MIN_WORD_LENGTH = 3;
|
|
694
798
|
/** Maximum features to extract from TF-IDF vectorization */
|
|
695
799
|
declare const MAX_TFIDF_FEATURES = 500;
|
|
800
|
+
/** Standard deviations below fleet mean to flag an agent as outlier */
|
|
801
|
+
declare const OUTLIER_STD_DEV_THRESHOLD = 1;
|
|
802
|
+
/** Minimum pairwise score to consider agents compatible for cluster analysis */
|
|
803
|
+
declare const CLUSTER_COMPATIBILITY_THRESHOLD = 0.7;
|
|
696
804
|
declare const ALGORITHM_VERSION = "1.2.0";
|
|
697
805
|
|
|
698
|
-
|
|
806
|
+
/**
|
|
807
|
+
* EU AI Act Article 50 compliance presets for AAP.
|
|
808
|
+
*
|
|
809
|
+
* These presets provide recommended configuration values for deploying
|
|
810
|
+
* AAP-instrumented agents in EU jurisdictions subject to AI Act
|
|
811
|
+
* transparency obligations. Spread them into your AlignmentCard fields.
|
|
812
|
+
*
|
|
813
|
+
* @example
|
|
814
|
+
* ```typescript
|
|
815
|
+
* import {
|
|
816
|
+
* EU_COMPLIANCE_AUDIT_COMMITMENT,
|
|
817
|
+
* EU_COMPLIANCE_EXTENSIONS,
|
|
818
|
+
* EU_COMPLIANCE_VALUES,
|
|
819
|
+
* } from "agent-alignment-protocol";
|
|
820
|
+
*
|
|
821
|
+
* const card: AlignmentCard = {
|
|
822
|
+
* ...,
|
|
823
|
+
* audit_commitment: { ...EU_COMPLIANCE_AUDIT_COMMITMENT },
|
|
824
|
+
* values: { declared: EU_COMPLIANCE_VALUES, ... },
|
|
825
|
+
* extensions: { ...EU_COMPLIANCE_EXTENSIONS },
|
|
826
|
+
* };
|
|
827
|
+
* ```
|
|
828
|
+
*
|
|
829
|
+
* DISCLAIMER: These presets reflect a technical mapping of AAP features to
|
|
830
|
+
* Article 50 requirements. They do not constitute legal advice. Consult
|
|
831
|
+
* qualified legal counsel for your specific compliance obligations.
|
|
832
|
+
*/
|
|
833
|
+
/** Audit commitment values that satisfy Article 50(4) audit trail requirements. */
|
|
834
|
+
declare const EU_COMPLIANCE_AUDIT_COMMITMENT: {
|
|
835
|
+
readonly retention_days: 90;
|
|
836
|
+
readonly queryable: true;
|
|
837
|
+
readonly query_endpoint: "https://audit.example.com/traces";
|
|
838
|
+
readonly tamper_evidence: "append_only";
|
|
839
|
+
readonly trace_format: "ap-trace-v1";
|
|
840
|
+
};
|
|
841
|
+
/** Extension block for EU AI Act metadata on the Alignment Card. */
|
|
842
|
+
declare const EU_COMPLIANCE_EXTENSIONS: {
|
|
843
|
+
readonly eu_ai_act: {
|
|
844
|
+
readonly article_50_compliant: true;
|
|
845
|
+
readonly ai_system_classification: "general_purpose";
|
|
846
|
+
readonly disclosure_text: string;
|
|
847
|
+
readonly compliance_version: "2026-08";
|
|
848
|
+
};
|
|
849
|
+
};
|
|
850
|
+
/** Recommended declared values for Article 50 transparency obligations. */
|
|
851
|
+
declare const EU_COMPLIANCE_VALUES: readonly ["transparency", "honesty", "user_control", "principal_benefit"];
|
|
852
|
+
|
|
853
|
+
export { ALGORITHM_VERSION, type APTrace, type Action, type ActionCategory, type ActionTarget, type ActionType, type AgentCoherenceSummary, type AlignmentCard, type AlignmentCardRequest, type AlignmentCardResponse, type Alternative, type AuditCommitment, type AuditStorage, type AutonomyEnvelope, type AutonomyScope, CLUSTER_COMPATIBILITY_THRESHOLD, CONFLICT_PENALTY_MULTIPLIER, type Coherence, type CoherenceResult, type CoherenceResultMessage, DEFAULT_SIMILARITY_THRESHOLD, DEFAULT_SUSTAINED_TURNS_THRESHOLD, type DataSharing, type Decision, type DriftAlert, type DriftAnalysis, type DriftDirection, type DriftIndicator, EU_COMPLIANCE_AUDIT_COMMITMENT, EU_COMPLIANCE_EXTENSIONS, EU_COMPLIANCE_VALUES, type Escalation, type EscalationStatus, type EscalationTrigger, type FleetCluster, type FleetCoherenceResult, type FleetOutlier, type HierarchyType, MAX_TFIDF_FEATURES, MIN_COHERENCE_FOR_PROCEED, MIN_WORD_LENGTH, type MonetaryValue, NEAR_BOUNDARY_THRESHOLD, OUTLIER_STD_DEV_THRESHOLD, type PairwiseEntry, type Principal, type PrincipalResponse, type PrincipalType, type ProposedCollaboration, type ProposedResolution, type RelationshipType, type RequesterInfo, type Severity, type Signature, type StorageType, type TamperEvidence, type TaskContext, type TraceContext, type TriggerAction, type TriggerCheck, VIOLATION_SEVERITY, type ValueAlignment, type ValueAlignmentDetail, type ValueCoherenceCheck, type ValueCoherenceMessage, type ValueConflict, type ValueConflictResult, type ValueDefinition, type ValueDivergence, type Values, type VerificationMetadata, type VerificationResult, type Violation, type ViolationType, type Warning, checkCoherence, checkFleetCoherence, computeCentroid, cosineSimilarity, createViolation, detectDrift, extractCardFeatures, extractTraceFeatures, getSelectedAlternative, hadViolations, hasValue, isActionBounded, isActionForbidden, isCardExpired, verifyTrace, wasEscalated };
|
package/dist/index.js
CHANGED
|
@@ -21,15 +21,21 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
21
21
|
var index_exports = {};
|
|
22
22
|
__export(index_exports, {
|
|
23
23
|
ALGORITHM_VERSION: () => ALGORITHM_VERSION,
|
|
24
|
+
CLUSTER_COMPATIBILITY_THRESHOLD: () => CLUSTER_COMPATIBILITY_THRESHOLD,
|
|
24
25
|
CONFLICT_PENALTY_MULTIPLIER: () => CONFLICT_PENALTY_MULTIPLIER,
|
|
25
26
|
DEFAULT_SIMILARITY_THRESHOLD: () => DEFAULT_SIMILARITY_THRESHOLD,
|
|
26
27
|
DEFAULT_SUSTAINED_TURNS_THRESHOLD: () => DEFAULT_SUSTAINED_TURNS_THRESHOLD,
|
|
28
|
+
EU_COMPLIANCE_AUDIT_COMMITMENT: () => EU_COMPLIANCE_AUDIT_COMMITMENT,
|
|
29
|
+
EU_COMPLIANCE_EXTENSIONS: () => EU_COMPLIANCE_EXTENSIONS,
|
|
30
|
+
EU_COMPLIANCE_VALUES: () => EU_COMPLIANCE_VALUES,
|
|
27
31
|
MAX_TFIDF_FEATURES: () => MAX_TFIDF_FEATURES,
|
|
28
32
|
MIN_COHERENCE_FOR_PROCEED: () => MIN_COHERENCE_FOR_PROCEED,
|
|
29
33
|
MIN_WORD_LENGTH: () => MIN_WORD_LENGTH,
|
|
30
34
|
NEAR_BOUNDARY_THRESHOLD: () => NEAR_BOUNDARY_THRESHOLD,
|
|
35
|
+
OUTLIER_STD_DEV_THRESHOLD: () => OUTLIER_STD_DEV_THRESHOLD,
|
|
31
36
|
VIOLATION_SEVERITY: () => VIOLATION_SEVERITY,
|
|
32
37
|
checkCoherence: () => checkCoherence,
|
|
38
|
+
checkFleetCoherence: () => checkFleetCoherence,
|
|
33
39
|
computeCentroid: () => computeCentroid,
|
|
34
40
|
cosineSimilarity: () => cosineSimilarity,
|
|
35
41
|
createViolation: () => createViolation,
|
|
@@ -55,6 +61,8 @@ var MIN_COHERENCE_FOR_PROCEED = 0.7;
|
|
|
55
61
|
var CONFLICT_PENALTY_MULTIPLIER = 0.5;
|
|
56
62
|
var MIN_WORD_LENGTH = 3;
|
|
57
63
|
var MAX_TFIDF_FEATURES = 500;
|
|
64
|
+
var OUTLIER_STD_DEV_THRESHOLD = 1;
|
|
65
|
+
var CLUSTER_COMPATIBILITY_THRESHOLD = 0.7;
|
|
58
66
|
var ALGORITHM_VERSION = "1.2.0";
|
|
59
67
|
|
|
60
68
|
// src/verification/features.ts
|
|
@@ -430,6 +438,204 @@ function checkCoherence(myCard, theirCard, taskValues) {
|
|
|
430
438
|
proposed_resolution: proposedResolution
|
|
431
439
|
};
|
|
432
440
|
}
|
|
441
|
+
function checkFleetCoherence(cards, taskValues) {
|
|
442
|
+
if (cards.length < 2) {
|
|
443
|
+
throw new Error("Fleet coherence requires at least 2 agents");
|
|
444
|
+
}
|
|
445
|
+
const pairwiseMatrix = [];
|
|
446
|
+
for (let i = 0; i < cards.length; i++) {
|
|
447
|
+
for (let j = i + 1; j < cards.length; j++) {
|
|
448
|
+
pairwiseMatrix.push({
|
|
449
|
+
agent_a: cards[i].agentId,
|
|
450
|
+
agent_b: cards[j].agentId,
|
|
451
|
+
result: checkCoherence(cards[i].card, cards[j].card, taskValues)
|
|
452
|
+
});
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
const allScores = pairwiseMatrix.map((p) => p.result.score);
|
|
456
|
+
const fleetScore = allScores.reduce((a, b) => a + b, 0) / allScores.length;
|
|
457
|
+
const minPairScore = Math.min(...allScores);
|
|
458
|
+
const maxPairScore = Math.max(...allScores);
|
|
459
|
+
const agentIds = cards.map((c) => c.agentId);
|
|
460
|
+
const agentScoreMap = /* @__PURE__ */ new Map();
|
|
461
|
+
const agentCompatibleCount = /* @__PURE__ */ new Map();
|
|
462
|
+
const agentConflictCount = /* @__PURE__ */ new Map();
|
|
463
|
+
for (const id of agentIds) {
|
|
464
|
+
agentScoreMap.set(id, []);
|
|
465
|
+
agentCompatibleCount.set(id, 0);
|
|
466
|
+
agentConflictCount.set(id, 0);
|
|
467
|
+
}
|
|
468
|
+
for (const pair of pairwiseMatrix) {
|
|
469
|
+
agentScoreMap.get(pair.agent_a).push(pair.result.score);
|
|
470
|
+
agentScoreMap.get(pair.agent_b).push(pair.result.score);
|
|
471
|
+
if (pair.result.compatible) {
|
|
472
|
+
agentCompatibleCount.set(pair.agent_a, agentCompatibleCount.get(pair.agent_a) + 1);
|
|
473
|
+
agentCompatibleCount.set(pair.agent_b, agentCompatibleCount.get(pair.agent_b) + 1);
|
|
474
|
+
}
|
|
475
|
+
if (pair.result.value_alignment.conflicts.length > 0) {
|
|
476
|
+
agentConflictCount.set(pair.agent_a, agentConflictCount.get(pair.agent_a) + 1);
|
|
477
|
+
agentConflictCount.set(pair.agent_b, agentConflictCount.get(pair.agent_b) + 1);
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
const agentMeans = /* @__PURE__ */ new Map();
|
|
481
|
+
for (const id of agentIds) {
|
|
482
|
+
const scores = agentScoreMap.get(id);
|
|
483
|
+
agentMeans.set(id, scores.reduce((a, b) => a + b, 0) / scores.length);
|
|
484
|
+
}
|
|
485
|
+
const meanValues = [...agentMeans.values()];
|
|
486
|
+
const fleetMeanOfMeans = meanValues.reduce((a, b) => a + b, 0) / meanValues.length;
|
|
487
|
+
const variance = meanValues.reduce((sum, v) => sum + (v - fleetMeanOfMeans) ** 2, 0) / meanValues.length;
|
|
488
|
+
const stddev = Math.sqrt(variance);
|
|
489
|
+
const outliers = [];
|
|
490
|
+
if (stddev > 0 && agentIds.length >= 3) {
|
|
491
|
+
for (const id of agentIds) {
|
|
492
|
+
const agentMean = agentMeans.get(id);
|
|
493
|
+
const deviation = (fleetMeanOfMeans - agentMean) / stddev;
|
|
494
|
+
if (deviation >= OUTLIER_STD_DEV_THRESHOLD) {
|
|
495
|
+
const primaryConflicts = /* @__PURE__ */ new Set();
|
|
496
|
+
for (const pair of pairwiseMatrix) {
|
|
497
|
+
if (pair.agent_a === id || pair.agent_b === id) {
|
|
498
|
+
for (const conflict of pair.result.value_alignment.conflicts) {
|
|
499
|
+
if (conflict.initiator_value !== "(conflicts_with)") {
|
|
500
|
+
primaryConflicts.add(conflict.initiator_value);
|
|
501
|
+
}
|
|
502
|
+
if (conflict.responder_value !== "(conflicts_with)") {
|
|
503
|
+
primaryConflicts.add(conflict.responder_value);
|
|
504
|
+
}
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
outliers.push({
|
|
509
|
+
agent_id: id,
|
|
510
|
+
agent_mean_score: Math.round(agentMean * 1e4) / 1e4,
|
|
511
|
+
fleet_mean_score: Math.round(fleetMeanOfMeans * 1e4) / 1e4,
|
|
512
|
+
deviation: Math.round(deviation * 1e4) / 1e4,
|
|
513
|
+
primary_conflicts: [...primaryConflicts]
|
|
514
|
+
});
|
|
515
|
+
}
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
const adjacency = /* @__PURE__ */ new Map();
|
|
519
|
+
for (const id of agentIds) {
|
|
520
|
+
adjacency.set(id, /* @__PURE__ */ new Set());
|
|
521
|
+
}
|
|
522
|
+
for (const pair of pairwiseMatrix) {
|
|
523
|
+
if (pair.result.compatible) {
|
|
524
|
+
adjacency.get(pair.agent_a).add(pair.agent_b);
|
|
525
|
+
adjacency.get(pair.agent_b).add(pair.agent_a);
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
const visited = /* @__PURE__ */ new Set();
|
|
529
|
+
const clusters = [];
|
|
530
|
+
let clusterId = 0;
|
|
531
|
+
for (const id of agentIds) {
|
|
532
|
+
if (visited.has(id)) continue;
|
|
533
|
+
const component = [];
|
|
534
|
+
const queue = [id];
|
|
535
|
+
visited.add(id);
|
|
536
|
+
while (queue.length > 0) {
|
|
537
|
+
const current = queue.shift();
|
|
538
|
+
component.push(current);
|
|
539
|
+
for (const neighbor of adjacency.get(current)) {
|
|
540
|
+
if (!visited.has(neighbor)) {
|
|
541
|
+
visited.add(neighbor);
|
|
542
|
+
queue.push(neighbor);
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
let internalSum = 0;
|
|
547
|
+
let internalCount = 0;
|
|
548
|
+
for (let i = 0; i < component.length; i++) {
|
|
549
|
+
for (let j = i + 1; j < component.length; j++) {
|
|
550
|
+
const pair = pairwiseMatrix.find(
|
|
551
|
+
(p) => p.agent_a === component[i] && p.agent_b === component[j] || p.agent_a === component[j] && p.agent_b === component[i]
|
|
552
|
+
);
|
|
553
|
+
if (pair) {
|
|
554
|
+
internalSum += pair.result.score;
|
|
555
|
+
internalCount++;
|
|
556
|
+
}
|
|
557
|
+
}
|
|
558
|
+
}
|
|
559
|
+
const internalCoherence = internalCount > 0 ? internalSum / internalCount : 1;
|
|
560
|
+
const clusterCards = component.map((cid) => cards.find((c) => c.agentId === cid));
|
|
561
|
+
const sharedValues = clusterCards.reduce((shared, entry, idx) => {
|
|
562
|
+
const declared = entry.card.values.declared ?? [];
|
|
563
|
+
if (idx === 0) return [...declared];
|
|
564
|
+
return shared.filter((v) => declared.includes(v));
|
|
565
|
+
}, []);
|
|
566
|
+
const allOtherValues = /* @__PURE__ */ new Set();
|
|
567
|
+
for (const entry of cards) {
|
|
568
|
+
if (!component.includes(entry.agentId)) {
|
|
569
|
+
for (const v of entry.card.values.declared ?? []) {
|
|
570
|
+
allOtherValues.add(v);
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
}
|
|
574
|
+
const distinguishingValues = sharedValues.filter((v) => !allOtherValues.has(v));
|
|
575
|
+
clusters.push({
|
|
576
|
+
cluster_id: clusterId++,
|
|
577
|
+
agent_ids: component,
|
|
578
|
+
internal_coherence: Math.round(internalCoherence * 1e4) / 1e4,
|
|
579
|
+
shared_values: sharedValues,
|
|
580
|
+
distinguishing_values: distinguishingValues
|
|
581
|
+
});
|
|
582
|
+
}
|
|
583
|
+
const allValues = /* @__PURE__ */ new Set();
|
|
584
|
+
const agentValueMap = /* @__PURE__ */ new Map();
|
|
585
|
+
const agentConflictMap = /* @__PURE__ */ new Map();
|
|
586
|
+
for (const entry of cards) {
|
|
587
|
+
const declared = new Set(entry.card.values.declared ?? []);
|
|
588
|
+
const conflicts = new Set(entry.card.values.conflicts_with ?? []);
|
|
589
|
+
agentValueMap.set(entry.agentId, declared);
|
|
590
|
+
agentConflictMap.set(entry.agentId, conflicts);
|
|
591
|
+
for (const v of declared) allValues.add(v);
|
|
592
|
+
}
|
|
593
|
+
const divergenceReport = [];
|
|
594
|
+
for (const value of allValues) {
|
|
595
|
+
const declaring = agentIds.filter((id) => agentValueMap.get(id).has(value));
|
|
596
|
+
const missing = agentIds.filter((id) => !agentValueMap.get(id).has(value));
|
|
597
|
+
const conflicting = agentIds.filter((id) => agentConflictMap.get(id).has(value));
|
|
598
|
+
if (missing.length === 0 && conflicting.length === 0) continue;
|
|
599
|
+
const impactOnFleetScore = Math.round(
|
|
600
|
+
(missing.length + conflicting.length) / agentIds.length * 1e4
|
|
601
|
+
) / 1e4;
|
|
602
|
+
divergenceReport.push({
|
|
603
|
+
value,
|
|
604
|
+
agents_declaring: declaring,
|
|
605
|
+
agents_missing: missing,
|
|
606
|
+
agents_conflicting: conflicting,
|
|
607
|
+
impact_on_fleet_score: impactOnFleetScore
|
|
608
|
+
});
|
|
609
|
+
}
|
|
610
|
+
divergenceReport.sort((a, b) => b.impact_on_fleet_score - a.impact_on_fleet_score);
|
|
611
|
+
const agentClusterMap = /* @__PURE__ */ new Map();
|
|
612
|
+
for (const cluster of clusters) {
|
|
613
|
+
for (const id of cluster.agent_ids) {
|
|
614
|
+
agentClusterMap.set(id, cluster.cluster_id);
|
|
615
|
+
}
|
|
616
|
+
}
|
|
617
|
+
const outlierIds = new Set(outliers.map((o) => o.agent_id));
|
|
618
|
+
const agentSummaries = agentIds.map((id) => ({
|
|
619
|
+
agent_id: id,
|
|
620
|
+
mean_score: Math.round(agentMeans.get(id) * 1e4) / 1e4,
|
|
621
|
+
compatible_count: agentCompatibleCount.get(id),
|
|
622
|
+
conflict_count: agentConflictCount.get(id),
|
|
623
|
+
cluster_id: agentClusterMap.get(id) ?? 0,
|
|
624
|
+
is_outlier: outlierIds.has(id)
|
|
625
|
+
}));
|
|
626
|
+
return {
|
|
627
|
+
fleet_score: Math.round(fleetScore * 1e4) / 1e4,
|
|
628
|
+
min_pair_score: Math.round(minPairScore * 1e4) / 1e4,
|
|
629
|
+
max_pair_score: Math.round(maxPairScore * 1e4) / 1e4,
|
|
630
|
+
agent_count: cards.length,
|
|
631
|
+
pair_count: pairwiseMatrix.length,
|
|
632
|
+
pairwise_matrix: pairwiseMatrix,
|
|
633
|
+
outliers,
|
|
634
|
+
clusters,
|
|
635
|
+
divergence_report: divergenceReport,
|
|
636
|
+
agent_summaries: agentSummaries
|
|
637
|
+
};
|
|
638
|
+
}
|
|
433
639
|
function detectDrift(card, traces, similarityThreshold = DEFAULT_SIMILARITY_THRESHOLD, sustainedThreshold = DEFAULT_SUSTAINED_TURNS_THRESHOLD) {
|
|
434
640
|
const sorted = [...traces].sort(
|
|
435
641
|
(a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime()
|
|
@@ -636,18 +842,47 @@ function wasEscalated(trace) {
|
|
|
636
842
|
function hadViolations(trace) {
|
|
637
843
|
return trace.action.category === "forbidden";
|
|
638
844
|
}
|
|
845
|
+
|
|
846
|
+
// src/compliance.ts
|
|
847
|
+
var EU_COMPLIANCE_AUDIT_COMMITMENT = {
|
|
848
|
+
retention_days: 90,
|
|
849
|
+
queryable: true,
|
|
850
|
+
query_endpoint: "https://audit.example.com/traces",
|
|
851
|
+
tamper_evidence: "append_only",
|
|
852
|
+
trace_format: "ap-trace-v1"
|
|
853
|
+
};
|
|
854
|
+
var EU_COMPLIANCE_EXTENSIONS = {
|
|
855
|
+
eu_ai_act: {
|
|
856
|
+
article_50_compliant: true,
|
|
857
|
+
ai_system_classification: "general_purpose",
|
|
858
|
+
disclosure_text: "This system is powered by an AI agent. Its decisions are logged and auditable. You may request a human review of any decision.",
|
|
859
|
+
compliance_version: "2026-08"
|
|
860
|
+
}
|
|
861
|
+
};
|
|
862
|
+
var EU_COMPLIANCE_VALUES = [
|
|
863
|
+
"transparency",
|
|
864
|
+
"honesty",
|
|
865
|
+
"user_control",
|
|
866
|
+
"principal_benefit"
|
|
867
|
+
];
|
|
639
868
|
// Annotate the CommonJS export names for ESM import in node:
|
|
640
869
|
0 && (module.exports = {
|
|
641
870
|
ALGORITHM_VERSION,
|
|
871
|
+
CLUSTER_COMPATIBILITY_THRESHOLD,
|
|
642
872
|
CONFLICT_PENALTY_MULTIPLIER,
|
|
643
873
|
DEFAULT_SIMILARITY_THRESHOLD,
|
|
644
874
|
DEFAULT_SUSTAINED_TURNS_THRESHOLD,
|
|
875
|
+
EU_COMPLIANCE_AUDIT_COMMITMENT,
|
|
876
|
+
EU_COMPLIANCE_EXTENSIONS,
|
|
877
|
+
EU_COMPLIANCE_VALUES,
|
|
645
878
|
MAX_TFIDF_FEATURES,
|
|
646
879
|
MIN_COHERENCE_FOR_PROCEED,
|
|
647
880
|
MIN_WORD_LENGTH,
|
|
648
881
|
NEAR_BOUNDARY_THRESHOLD,
|
|
882
|
+
OUTLIER_STD_DEV_THRESHOLD,
|
|
649
883
|
VIOLATION_SEVERITY,
|
|
650
884
|
checkCoherence,
|
|
885
|
+
checkFleetCoherence,
|
|
651
886
|
computeCentroid,
|
|
652
887
|
cosineSimilarity,
|
|
653
888
|
createViolation,
|