npm - @railtownai/railtracks-visualizer - Versions diffs - 0.0.49 → 0.0.50 - Mend

@railtownai/railtracks-visualizer 0.0.49 → 0.0.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/dist/cjs/index.js +17967 -5939
package/dist/esm/index.js +17972 -5951
package/dist/types/agenthub/components/EvaluationsTable.d.ts +34 -0
package/dist/types/agenthub/components/ToolUseEvaluatorMetricsTable.d.ts +17 -0
package/dist/types/agenthub/context/EvaluationsMockProvider.d.ts +3 -31
package/dist/types/agenthub/hooks/useEvaluations.d.ts +2 -2
package/dist/types/agenthub/pages/evaluation-details-drawer.d.ts +10 -0
package/dist/types/agenthub/pages/evaluations.d.ts +5 -1
package/dist/types/agenthub/pages/evaluations.types.d.ts +26 -0
package/dist/types/agenthub/pages/evaluator-result-page.d.ts +11 -0
package/dist/types/agenthub/pages/evaluator-result.d.ts +15 -0
package/dist/types/agenthub/pages/index.d.ts +2 -0
package/dist/types/agenthub/utils/transformEvaluation.d.ts +9 -0
package/dist/types/dto/Evaluation.d.ts +140 -0
package/dist/types/dto/index.d.ts +1 -0
package/dist/types/index.d.ts +4 -0
package/dist/types/lib/theme.d.ts +16 -0
package/dist/types/lib/utils.d.ts +32 -0
package/package.json +1 -1
package/dist/types/agenthub/pages/evaluation-details.d.ts +0 -9

package/dist/types/agenthub/components/EvaluationsTable.d.ts ADDED Viewed

@@ -0,0 +1,34 @@
+/**
+ * We can export this component to be used in other React projects. Just pass in the
+ * evaluations data and there we go!
+ *
+ * Accepts either raw DTO (API) data or pre-transformed data. DTO is auto-detected
+ * and transformed internally.
+ *
+ * Example (raw API/DTO):
+ * <EvaluationsTable evaluations={apiData} />
+ *
+ * Example (transformed):
+ * <EvaluationsTable evaluations={evaluations} onRowClick={handleRowClick} />
+ *
+ * Example:
+ * <EvaluationsTable evaluations={evaluations} onCompare={handleCompare} />
+ */
+import React from "react";
+import type { Evaluation } from "../pages/evaluations.types";
+import type { Evaluation as EvaluationDto } from "../../dto/Evaluation";
+export interface EvaluationsTableProps {
+    /** Raw DTO (API) or pre-transformed evaluations. DTO is auto-detected and transformed. */
+    evaluations: (Evaluation | EvaluationDto)[];
+    loading?: boolean;
+    error?: string | null;
+    onRefresh?: () => void;
+    onRowClick?: (evaluation: Evaluation) => void;
+    onCompare?: (sourceId: string, targetId: string) => void;
+    showFilters?: boolean;
+    showCompare?: boolean;
+    emptyMessage?: React.ReactNode;
+    /** Optional title rendered to the left of the toolbar (e.g. "Evaluations") */
+    title?: React.ReactNode;
+}
+export declare const EvaluationsTable: React.FC<EvaluationsTableProps>;

package/dist/types/agenthub/components/ToolUseEvaluatorMetricsTable.d.ts ADDED Viewed

@@ -0,0 +1,17 @@
+import React from "react";
+import type { EvaluationResultItem } from "../../dto/Evaluation";
+export interface ToolUseTreeRow {
+    key: string;
+    name: string;
+    invocations?: number;
+    runtimeMs?: number;
+    failureRate?: string | "Success" | "Failed";
+    level: 1 | 2 | 3;
+    children?: ToolUseTreeRow[];
+}
+export interface ToolUseEvaluatorMetricsTableProps {
+    rawResults: EvaluationResultItem[];
+    /** Title shown above the table. When provided, an expand/collapse-all button is shown next to it. */
+    title?: string;
+}
+export declare const ToolUseEvaluatorMetricsTable: React.FC<ToolUseEvaluatorMetricsTableProps>;

package/dist/types/agenthub/context/EvaluationsMockProvider.d.ts CHANGED Viewed

@@ -1,39 +1,11 @@
 import React, { ReactNode } from "react";
-interface ApiEvaluatorResult {
-    name: string;
-    evaluator_id: string;
-    config_hash: string;
-    results: Array<{
-        metric: {
-            name: string;
-        };
-        value: number;
-    }>;
-    agent_runs: Array<{
-        session_id: string;
-        run_id: string;
-    }>;
-}
-export interface ApiEvaluation {
-    evaluation_id: string;
-    evaluation_name: string;
-    agent_name: string;
-    created_at: string;
-    agent_run_ids: string[];
-    results: ApiEvaluatorResult[];
-    metrics: Array<{
-        name: string;
-        min_value?: number;
-        max_value?: number;
-        options?: string[];
-    }>;
-}
+import type { Evaluation } from "../../dto/Evaluation";
 interface EvaluationsMockContextValue {
-    mockEvaluations: ApiEvaluation[] | null;
+    mockEvaluations: Evaluation[] | null;
 }
 export interface EvaluationsMockProviderProps {
     children: ReactNode;
-    mockEvaluations: ApiEvaluation[] | null;
+    mockEvaluations: Evaluation[] | null;
 }
 export declare const EvaluationsMockProvider: React.FC<EvaluationsMockProviderProps>;
 export declare const useEvaluationsMock: () => EvaluationsMockContextValue | undefined;

package/dist/types/agenthub/hooks/useEvaluations.d.ts CHANGED Viewed

@@ -1,6 +1,6 @@
-import type { Evaluation } from "../pages/evaluations.types";
+import type { Evaluation as EvaluationsPageEvaluation } from "../pages/evaluations.types";
 export declare const useEvaluations: () => {
-    evaluations: Evaluation[];
+    evaluations: EvaluationsPageEvaluation[];
     loading: boolean;
     error: string | null;
     refetch: () => Promise<void>;

package/dist/types/agenthub/pages/evaluation-details-drawer.d.ts ADDED Viewed

@@ -0,0 +1,10 @@
+import React from "react";
+import type { Evaluation } from "./evaluations.types";
+interface EvaluationDetailsDrawerProps {
+    evaluation: Evaluation | null;
+    open: boolean;
+    onClose: () => void;
+    getEvaluatorResultsHref?: (evaluationId: string, evaluatorId: string) => string;
+}
+export declare const EvaluationDetailsDrawer: React.FC<EvaluationDetailsDrawerProps>;
+export {};

package/dist/types/agenthub/pages/evaluations.d.ts CHANGED Viewed

@@ -1,2 +1,6 @@
 import React from "react";
-export declare const EvaluationsPage: React.FC;
+interface EvaluationsPageProps {
+    onRefresh?: () => void;
+}
+export declare const EvaluationsPage: React.FC<EvaluationsPageProps>;
+export {};

package/dist/types/agenthub/pages/evaluations.types.d.ts CHANGED Viewed

@@ -1,9 +1,11 @@
+import type { EvaluationResultItem } from "../../dto/Evaluation";
 export type MetricType = "Categorical" | "Continuous";
 export interface CategoricalMetric {
     name: string;
     sha: string;
     type: "Categorical";
     options: string[];
+    description?: string | null;
 }
 export interface ContinuousMetric {
     name: string;
@@ -11,6 +13,7 @@ export interface ContinuousMetric {
     type: "Continuous";
     min_value: number;
     max_value: number;
+    description?: string | null;
 }
 export type Metric = CategoricalMetric | ContinuousMetric;
 export interface Evaluator {
@@ -24,12 +27,35 @@ export interface EvaluationRun {
     run_id: string;
     results: Record<string, Record<string, number>>;
 }
+/** Detailed stats for a single metric (e.g. Latency/dice_roll) */
+export interface DetailedMetricStat {
+    metricLabel: string;
+    mean: number;
+    median: number;
+    minimum: number;
+    maximum: number;
+    std: number;
+}
 export interface Evaluation {
     evaluation_id: string;
     evaluators: Evaluator[];
     name: string;
     runs: EvaluationRun[];
     results: Record<string, Record<string, number>>;
+    /** Per-evaluator detailed stats (mean, median, min, max, std) for each metric */
+    detailedStats?: Record<string, DetailedMetricStat[]>;
+    /** Per-evaluator status (evaluator_id -> status) */
+    evaluatorStatuses?: Record<string, string>;
+    /** Per-evaluator raw results from DTO (for ToolUseEvaluator tree table) */
+    rawEvaluatorResults?: Record<string, EvaluationResultItem[]>;
+    /** Display name(s) - comma-separated when multiple agents */
     agent_name: string;
+    /** Agents in the evaluation (name per agent) */
+    agents: {
+        agent_name: string;
+    }[];
+    /** Number of agents in the evaluation */
+    agents_count: number;
     created_at: string;
+    completed_at?: string;
 }

package/dist/types/agenthub/pages/evaluator-result-page.d.ts ADDED Viewed

@@ -0,0 +1,11 @@
+import React from "react";
+export interface EvaluatorResultPageProps {
+    /** Optional href for back link. Pass from the host app (e.g. "#/evaluations" for HashRouter). */
+    backHref?: string;
+}
+/**
+ * Page that resolves evaluationId and evaluatorId from the route,
+ * fetches evaluation data, and renders EvaluatorResult.
+ * backHref should be passed from the outer/host app for portability.
+ */
+export declare const EvaluatorResultPage: React.FC<EvaluatorResultPageProps>;

package/dist/types/agenthub/pages/evaluator-result.d.ts ADDED Viewed

@@ -0,0 +1,15 @@
+import React from "react";
+import type { Evaluation } from "./evaluations.types";
+export interface EvaluatorResultProps {
+    /** Full evaluation. Pass null to show empty/loading state. */
+    evaluation: Evaluation | null;
+    /** Evaluator ID to show results for. */
+    evaluatorId: string;
+    /** Optional href for back link. When provided, a "Back to Evaluations" control is shown. */
+    backHref?: string;
+}
+/**
+ * Full-page component that renders a single evaluator's result.
+ * Exportable and reusable: pass Evaluation and evaluatorId from any React app.
+ */
+export declare const EvaluatorResult: React.FC<EvaluatorResultProps>;

package/dist/types/agenthub/pages/index.d.ts CHANGED Viewed

@@ -2,4 +2,6 @@ export { VisualizerPage } from "./visualizer";
 export { SessionsPage } from "./sessions";
 export { EvaluationsPage } from "./evaluations";
 export { EvaluationsComparePage } from "./evaluations-compare";
+export { EvaluatorResult } from "./evaluator-result";
+export { EvaluatorResultPage } from "./evaluator-result-page";
 export { SessionDetails } from "./session-details";

package/dist/types/agenthub/utils/transformEvaluation.d.ts ADDED Viewed

@@ -0,0 +1,9 @@
+import type { Evaluation as EvaluationsPageEvaluation } from "../pages/evaluations.types";
+import type { Evaluation as EvaluationDto } from "../../dto/Evaluation";
+/**
+ * Transform Evaluation DTO to evaluations page Evaluation type.
+ * Use when passing raw API/JSON data to EvaluationsTable or when building a data hook.
+ */
+export declare function transformEvaluation(evalDto: EvaluationDto): EvaluationsPageEvaluation;
+/** Type guard: true if the value is an Evaluation DTO (raw API shape) rather than transformed. */
+export declare function isEvaluationDto(x: EvaluationsPageEvaluation | EvaluationDto): x is EvaluationDto;

package/dist/types/dto/Evaluation.d.ts ADDED Viewed

@@ -0,0 +1,140 @@
+/**
+ * DTOs for evaluation data from .railtracks/data/evaluations/*.json
+ */
+/** Numeric metric definition (LLMMetric or ToolMetric) */
+export type EvaluationNumericMetricDefinition = {
+    name: string;
+    metric_type: "LLMMetric" | "ToolMetric";
+    identifier: string;
+    description: string | null;
+    min_value: number | null;
+    max_value: number | null;
+};
+/** Categorical metric definition */
+export type EvaluationCategoricalMetricDefinition = {
+    name: string;
+    metric_type: "Categorical";
+    identifier: string;
+    description: string | null;
+    categories: string[];
+};
+/** Metric definition in the evaluation's metrics_map */
+export type EvaluationMetricDefinition = EvaluationNumericMetricDefinition | EvaluationCategoricalMetricDefinition;
+/** LLM result (per-call metric value) */
+export type EvaluationLLMResult = {
+    type: "LLM";
+    result_name: string;
+    metric_id: string;
+    agent_data_id: string[];
+    value: number;
+    llm_call_index: number;
+    model_name: string;
+    model_provider: string;
+};
+/** Tool result (per-tool-call metric value) */
+export type EvaluationToolResult = {
+    type: "Tool";
+    result_name: string;
+    metric_id: string;
+    agent_data_id: string[];
+    value: number;
+    tool_name: string;
+    tool_node_id: string | null;
+};
+/** Base result (e.g. JudgeResult, JudgeReasoning - value can be string for categorical) */
+export type EvaluationBaseResult = {
+    type: "Base";
+    result_name: string;
+    metric_id: string;
+    agent_data_id: string[];
+    value: number | string;
+};
+/** Metric reference used in aggregated stats */
+export type EvaluationMetricReference = {
+    name: string;
+    identifier: string;
+    min_value?: number;
+    max_value?: number | null;
+    metric_type?: "LLMMetric" | "ToolMetric" | "Categorical";
+    description?: string | null;
+    categories?: string[];
+};
+/** LLM inference aggregate (mean/median/etc per llm_call_index) */
+export type EvaluationLLMInferenceAggregate = {
+    type: "LLMInferenceAggregate";
+    metric: EvaluationMetricReference;
+    values: number[];
+    mean: number;
+    minimum: number;
+    maximum: number;
+    median: number;
+    std: number;
+    mode: number;
+    llm_call_index: number;
+    model_name: string;
+    model_provider: string;
+};
+/** Tool aggregate (mean/median/etc per tool) */
+export type EvaluationToolAggregate = {
+    type: "ToolAggregate";
+    metric: EvaluationMetricReference;
+    values: number[];
+    mean: number;
+    minimum: number;
+    maximum: number;
+    median: number;
+    std: number;
+    mode: number;
+    tool_name: string;
+};
+/** Categorical aggregate (counts and labels) */
+export type EvaluationAggregateCategorical = {
+    type: "AggregateCategorical";
+    metric: EvaluationMetricReference & {
+        metric_type: "Categorical";
+        categories: string[];
+    };
+    labels: string[];
+    most_common_label: string;
+    least_common_label: string;
+    counts: Record<string, number>;
+};
+/** Union of possible items in evaluator results array */
+export type EvaluationResultItem = EvaluationLLMResult | EvaluationToolResult | EvaluationBaseResult | EvaluationLLMInferenceAggregate | EvaluationToolAggregate | EvaluationAggregateCategorical;
+/** Type guard for LLM result */
+export declare function isLLMResult(item: EvaluationResultItem): item is EvaluationLLMResult;
+/** Type guard for Tool result */
+export declare function isToolResult(item: EvaluationResultItem): item is EvaluationToolResult;
+/** Type guard for Base result */
+export declare function isBaseResult(item: EvaluationResultItem): item is EvaluationBaseResult;
+/** Type guard for LLM inference aggregate */
+export declare function isLLMInferenceAggregate(item: EvaluationResultItem): item is EvaluationLLMInferenceAggregate;
+/** Type guard for Tool aggregate */
+export declare function isToolAggregate(item: EvaluationResultItem): item is EvaluationToolAggregate;
+/** Type guard for AggregateCategorical */
+export declare function isAggregateCategorical(item: EvaluationResultItem): item is EvaluationAggregateCategorical;
+/** Type guard for aggregated stats (numeric - LLMInferenceAggregate or ToolAggregate) */
+export declare function isAggregatedStats(item: EvaluationResultItem): item is EvaluationLLMInferenceAggregate | EvaluationToolAggregate;
+/** Result set from a single evaluator. Array of these in evaluator_results. */
+export type EvaluationEvaluatorResult = {
+    evaluator_name: string;
+    evaluator_id: string;
+    status?: string;
+    results: EvaluationResultItem[];
+};
+/** Agent entry in the evaluation (each has name and associated data IDs) */
+export type EvaluationAgent = {
+    agent_name: string;
+    agent_data_ids: string[];
+};
+/** Root evaluation document */
+export type Evaluation = {
+    evaluation_id: string;
+    evaluation_name: string;
+    created_at: string;
+    completed_at?: string;
+    agents: EvaluationAgent[];
+    metrics_map: Record<string, EvaluationMetricDefinition>;
+    /** Array of evaluator results. One evaluation can have multiple evaluator results. */
+    evaluator_results: EvaluationEvaluatorResult[];
+};

package/dist/types/dto/index.d.ts CHANGED Viewed

@@ -7,3 +7,4 @@ import { AgentRunStamp } from "./AgentRunStamp";
 import { AgentRunStep } from "./AgentRunStep";
 import { AgentSession } from "./AgentSession";
 export type { AgentRun, AgentSession, AgentRunNode, AgentRunEdge, AgentRunEdgeDetails, AgentRunEdgeStamp, AgentRunStamp, AgentRunStep };
+export { type Evaluation, type EvaluationMetricDefinition, type EvaluationNumericMetricDefinition, type EvaluationCategoricalMetricDefinition, type EvaluationLLMResult, type EvaluationToolResult, type EvaluationBaseResult, type EvaluationMetricReference, type EvaluationLLMInferenceAggregate, type EvaluationToolAggregate, type EvaluationAggregateCategorical, type EvaluationResultItem, type EvaluationEvaluatorResult, isLLMResult, isToolResult, isBaseResult, isLLMInferenceAggregate, isToolAggregate, isAggregateCategorical, isAggregatedStats } from "./Evaluation";

package/dist/types/index.d.ts CHANGED Viewed

@@ -1,6 +1,10 @@
 export { default as AgenticFlowVisualizer } from "./components/AgenticFlowVisualizer";
 export { default as Visualizer } from "./components/Visualizer";
 export { SessionDetails } from "./agenthub/pages/session-details";
+export { EvaluationsTable } from "./agenthub/components/EvaluationsTable";
+export { transformEvaluation, isEvaluationDto } from "./agenthub/utils/transformEvaluation";
+export type { Evaluation } from "./agenthub/pages/evaluations.types";
+export type { Evaluation as EvaluationDto } from "./dto/Evaluation";
 export { Node } from "./components/Node";
 export { Edge } from "./components/Edge";
 export { Timeline } from "./components/Timeline";

package/dist/types/lib/theme.d.ts CHANGED Viewed

@@ -22,6 +22,22 @@ export interface Theme {
         mutedBorder: string;
         input: string;
         ring: string;
+        /** Evaluator metric tag variants (tooluse=blue, llminference=purple, judge=red) */
+        tagTooluse?: {
+            bg: string;
+            border: string;
+            color: string;
+        };
+        tagLlminference?: {
+            bg: string;
+            border: string;
+            color: string;
+        };
+        tagJudge?: {
+            bg: string;
+            border: string;
+            color: string;
+        };
     };
     spacing: {
         xs: string;

package/dist/types/lib/utils.d.ts CHANGED Viewed

@@ -25,6 +25,17 @@ export declare function formatNumberWithCommas(value: number | null | undefined)
  * @returns The formatted latency string
  */
 export declare function formatLatency(latency: number): string;
+/**
+ * Formats duration between two timestamps as compact string (e.g. "2m 4s", "1h 5m 30s")
+ * @param createdAt - Start timestamp (ISO string)
+ * @param completedAt - End timestamp (ISO string, optional)
+ * @returns Formatted duration or "-" if incomplete
+ */
+export declare function formatDuration(createdAt: string, completedAt?: string): string;
+/**
+ * Formats an ISO date string to a short locale string (e.g. "Jan 15, 3:45 PM PST")
+ */
+export declare function formatDateShort(iso: string): string;
 /**
  * Truncates text to a specified length
  * @param text - The text to truncate
@@ -91,6 +102,27 @@ export declare const detectContentType: (output: {
     role: string;
     content: any;
 }) => "string" | "toolResponse" | "toolCallList" | "other";
+/**
+ * Minimal evaluator shape for formatMetricValue (avoids coupling to agenthub types)
+ */
+export interface EvaluatorMetricInfo {
+    metrics: Array<{
+        name: string;
+        type: string;
+        options?: string[];
+    }>;
+}
+/**
+ * Formats a metric value for display based on evaluator metric definitions.
+ * Handles Categorical (index → option label), Continuous (decimal), and fallback.
+ */
+export declare function formatMetricValue(evaluator: EvaluatorMetricInfo | null | undefined, metricName: string, value: number | undefined): string;
+/**
+ * Maps a status string to an Ant Design Tag color
+ * @param status - The status string (e.g., completed, failed, running)
+ * @returns The Ant Design color name for Tag component
+ */
+export declare function getStatusColor(status: string): string;
 /**
  * Formats tool calls for display in a code block
  */

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@railtownai/railtracks-visualizer",
-  "version": "0.0.49",
+  "version": "0.0.50",
   "license": "MIT",
   "author": "Railtown AI",
   "description": "A visualizer for Railtracks agentic flows",

package/dist/types/agenthub/pages/evaluation-details.d.ts DELETED Viewed

@@ -1,9 +0,0 @@
-import React from "react";
-import type { Evaluation } from "./evaluations.types";
-interface EvaluationDetailsProps {
-    evaluation: Evaluation | null;
-    open: boolean;
-    onClose: () => void;
-}
-export declare const EvaluationDetails: React.FC<EvaluationDetailsProps>;
-export {};