@tracemarketplace/shared 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/extractors/claude-code.d.ts +3 -0
- package/dist/extractors/claude-code.d.ts.map +1 -0
- package/dist/extractors/claude-code.js +158 -0
- package/dist/extractors/claude-code.js.map +1 -0
- package/dist/extractors/codex.d.ts +3 -0
- package/dist/extractors/codex.d.ts.map +1 -0
- package/dist/extractors/codex.js +192 -0
- package/dist/extractors/codex.js.map +1 -0
- package/dist/extractors/cursor.d.ts +3 -0
- package/dist/extractors/cursor.d.ts.map +1 -0
- package/dist/extractors/cursor.js +99 -0
- package/dist/extractors/cursor.js.map +1 -0
- package/dist/hash.d.ts +4 -0
- package/dist/hash.d.ts.map +1 -0
- package/dist/hash.js +13 -0
- package/dist/hash.js.map +1 -0
- package/dist/hash.test.d.ts +2 -0
- package/dist/hash.test.d.ts.map +1 -0
- package/dist/hash.test.js +67 -0
- package/dist/hash.test.js.map +1 -0
- package/dist/index.d.ts +9 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +9 -0
- package/dist/index.js.map +1 -0
- package/dist/scoring.d.ts +5 -0
- package/dist/scoring.d.ts.map +1 -0
- package/dist/scoring.js +114 -0
- package/dist/scoring.js.map +1 -0
- package/dist/scoring.test.d.ts +2 -0
- package/dist/scoring.test.d.ts.map +1 -0
- package/dist/scoring.test.js +157 -0
- package/dist/scoring.test.js.map +1 -0
- package/dist/types.d.ts +98 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/dist/utils.d.ts +3 -0
- package/dist/utils.d.ts.map +1 -0
- package/dist/utils.js +11 -0
- package/dist/utils.js.map +1 -0
- package/dist/validators.d.ts +247 -0
- package/dist/validators.d.ts.map +1 -0
- package/dist/validators.js +36 -0
- package/dist/validators.js.map +1 -0
- package/dist/validators.test.d.ts +2 -0
- package/dist/validators.test.d.ts.map +1 -0
- package/dist/validators.test.js +52 -0
- package/dist/validators.test.js.map +1 -0
- package/package.json +42 -0
- package/src/extractors/claude-code.ts +178 -0
- package/src/extractors/codex.ts +208 -0
- package/src/extractors/cursor.ts +118 -0
- package/src/hash.test.ts +72 -0
- package/src/hash.ts +15 -0
- package/src/index.ts +8 -0
- package/src/scoring.test.ts +173 -0
- package/src/scoring.ts +149 -0
- package/src/types.ts +96 -0
- package/src/utils.ts +9 -0
- package/src/validators.test.ts +61 -0
- package/src/validators.ts +41 -0
- package/tsconfig.json +8 -0
- package/vitest.config.ts +8 -0
package/src/scoring.ts
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
import type { NormalizedTrace, TraceScore, FailureMode } from "./types.js";
|
|
2
|
+
|
|
3
|
+
export function detectFailureModes(trace: NormalizedTrace): FailureMode[] {
|
|
4
|
+
const modes = new Set<FailureMode>();
|
|
5
|
+
const allBlocks = trace.turns.flatMap((t) => t.content);
|
|
6
|
+
|
|
7
|
+
// tool_call_failure: any tool_result with is_error=true
|
|
8
|
+
const hasToolError = allBlocks.some(
|
|
9
|
+
(b) => b.type === "tool_result" && b.is_error
|
|
10
|
+
);
|
|
11
|
+
if (hasToolError) modes.add("tool_call_failure");
|
|
12
|
+
|
|
13
|
+
// repeated_tool_calls: same tool_name 3+ times in a row
|
|
14
|
+
const toolUses = allBlocks.filter((b) => b.type === "tool_use") as Array<{
|
|
15
|
+
type: "tool_use";
|
|
16
|
+
tool_call_id: string;
|
|
17
|
+
tool_name: string;
|
|
18
|
+
tool_input: Record<string, unknown>;
|
|
19
|
+
}>;
|
|
20
|
+
let streak = 1;
|
|
21
|
+
for (let i = 1; i < toolUses.length; i++) {
|
|
22
|
+
if (toolUses[i].tool_name === toolUses[i - 1].tool_name) {
|
|
23
|
+
streak++;
|
|
24
|
+
if (streak >= 3) {
|
|
25
|
+
modes.add("repeated_tool_calls");
|
|
26
|
+
break;
|
|
27
|
+
}
|
|
28
|
+
} else {
|
|
29
|
+
streak = 1;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// context_limit_approached: text mentioning context/limit
|
|
34
|
+
const contextLimitRegex = /context.*(limit|window|maximum)|context limit/i;
|
|
35
|
+
const hasContextLimit = trace.turns.some((t) =>
|
|
36
|
+
t.content.some(
|
|
37
|
+
(b) =>
|
|
38
|
+
b.type === "text" && contextLimitRegex.test(b.text)
|
|
39
|
+
)
|
|
40
|
+
);
|
|
41
|
+
if (hasContextLimit) modes.add("context_limit_approached");
|
|
42
|
+
|
|
43
|
+
// graceful_recovery: tool errors followed by recovery text
|
|
44
|
+
if (hasToolError) {
|
|
45
|
+
const recoveryRegex = /let me try|instead|alternative|another approach|different way/i;
|
|
46
|
+
const laterTurns = trace.turns.slice(Math.floor(trace.turns.length / 2));
|
|
47
|
+
const hasRecovery = laterTurns.some((t) =>
|
|
48
|
+
t.content.some(
|
|
49
|
+
(b) => b.type === "text" && recoveryRegex.test(b.text)
|
|
50
|
+
)
|
|
51
|
+
);
|
|
52
|
+
if (hasRecovery) modes.add("graceful_recovery");
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// repeated_tool_calls → graceful_recovery if later success
|
|
56
|
+
if (modes.has("repeated_tool_calls")) {
|
|
57
|
+
const lastTurn = trace.turns[trace.turns.length - 1];
|
|
58
|
+
if (lastTurn?.role === "assistant") {
|
|
59
|
+
const hasSuccessText = lastTurn.content.some(
|
|
60
|
+
(b) => b.type === "text" && b.text.length > 50
|
|
61
|
+
);
|
|
62
|
+
if (hasSuccessText) modes.add("graceful_recovery");
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// catastrophic_failure: last 3+ turns are all errors with no recovery
|
|
67
|
+
const lastTurns = trace.turns.slice(-3);
|
|
68
|
+
const allLastAreErrors =
|
|
69
|
+
lastTurns.length >= 2 &&
|
|
70
|
+
lastTurns.every((t) =>
|
|
71
|
+
t.content.some((b) => b.type === "tool_result" && b.is_error)
|
|
72
|
+
);
|
|
73
|
+
if (allLastAreErrors && !modes.has("graceful_recovery")) {
|
|
74
|
+
modes.add("catastrophic_failure");
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
if (modes.size === 0) modes.add("no_failure");
|
|
78
|
+
|
|
79
|
+
return Array.from(modes);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
export function checkCompleteness(
|
|
83
|
+
trace: NormalizedTrace
|
|
84
|
+
): "complete" | "incomplete" | "malformed" {
|
|
85
|
+
if (trace.turns.length === 0) return "malformed";
|
|
86
|
+
|
|
87
|
+
// malformed: any turn with empty content
|
|
88
|
+
if (trace.turns.some((t) => t.content.length === 0)) return "malformed";
|
|
89
|
+
|
|
90
|
+
const lastTurn = trace.turns[trace.turns.length - 1];
|
|
91
|
+
if (!lastTurn) return "malformed";
|
|
92
|
+
|
|
93
|
+
// complete: last assistant turn ends with text
|
|
94
|
+
if (lastTurn.role === "assistant") {
|
|
95
|
+
const hasText = lastTurn.content.some((b) => b.type === "text");
|
|
96
|
+
if (hasText) return "complete";
|
|
97
|
+
// ends with tool_use but no following result → incomplete
|
|
98
|
+
const lastBlock = lastTurn.content[lastTurn.content.length - 1];
|
|
99
|
+
if (lastBlock?.type === "tool_use") return "incomplete";
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// last turn is user (e.g., tool_result with no following assistant) → incomplete
|
|
103
|
+
return "incomplete";
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
export function scoreTrace(trace: NormalizedTrace): TraceScore {
|
|
107
|
+
const completeness = checkCompleteness(trace);
|
|
108
|
+
const failureModes = detectFailureModes(trace);
|
|
109
|
+
|
|
110
|
+
const fidelityBase = trace.content_fidelity === "full" ? 0.4 : 0.15;
|
|
111
|
+
|
|
112
|
+
let interestBonus = 0;
|
|
113
|
+
if (failureModes.includes("graceful_recovery")) interestBonus += 0.3;
|
|
114
|
+
if (failureModes.includes("repeated_tool_calls")) interestBonus += 0.2;
|
|
115
|
+
if (failureModes.includes("catastrophic_failure")) interestBonus += 0.15;
|
|
116
|
+
if (
|
|
117
|
+
failureModes.includes("tool_call_failure") &&
|
|
118
|
+
!failureModes.includes("graceful_recovery")
|
|
119
|
+
)
|
|
120
|
+
interestBonus += 0.1;
|
|
121
|
+
|
|
122
|
+
const totalTokens =
|
|
123
|
+
(trace.total_input_tokens ?? 0) + (trace.total_output_tokens ?? 0);
|
|
124
|
+
const lengthBonus = Math.min(
|
|
125
|
+
0.15,
|
|
126
|
+
Math.log10(Math.max(1, totalTokens / 1000)) * 0.05
|
|
127
|
+
);
|
|
128
|
+
|
|
129
|
+
const total = Math.min(1.0, fidelityBase + interestBonus + lengthBonus);
|
|
130
|
+
const payoutCents = Math.min(500, Math.round(total * 500));
|
|
131
|
+
|
|
132
|
+
return {
|
|
133
|
+
completeness,
|
|
134
|
+
failure_modes: failureModes,
|
|
135
|
+
has_error_recovery: failureModes.includes("graceful_recovery"),
|
|
136
|
+
has_repeated_calls: failureModes.includes("repeated_tool_calls"),
|
|
137
|
+
content_fidelity: trace.content_fidelity,
|
|
138
|
+
total,
|
|
139
|
+
payout_cents: payoutCents,
|
|
140
|
+
failure_taxonomy_label: null,
|
|
141
|
+
failure_taxonomy_explanation: null,
|
|
142
|
+
rarity_score: null,
|
|
143
|
+
cluster_id: null,
|
|
144
|
+
is_duplicate: false,
|
|
145
|
+
duplicate_of: null,
|
|
146
|
+
scored_at: new Date().toISOString(),
|
|
147
|
+
scorer_version: "v0-heuristic",
|
|
148
|
+
};
|
|
149
|
+
}
|
package/src/types.ts
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
export type SourceTool = "claude_code" | "codex_cli" | "cursor";
|
|
2
|
+
|
|
3
|
+
export type FailureMode =
|
|
4
|
+
| "tool_call_failure"
|
|
5
|
+
| "repeated_tool_calls"
|
|
6
|
+
| "context_limit_approached"
|
|
7
|
+
| "catastrophic_failure"
|
|
8
|
+
| "graceful_recovery"
|
|
9
|
+
| "unexpected_capability"
|
|
10
|
+
| "wrong_tool_sequence"
|
|
11
|
+
| "no_failure";
|
|
12
|
+
|
|
13
|
+
export interface TokenUsage {
|
|
14
|
+
input_tokens: number;
|
|
15
|
+
output_tokens: number;
|
|
16
|
+
cache_read_input_tokens: number | null;
|
|
17
|
+
cache_creation_input_tokens: number | null;
|
|
18
|
+
reasoning_tokens: number | null;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export type ContentBlock =
|
|
22
|
+
| { type: "text"; text: string }
|
|
23
|
+
| { type: "thinking"; text: string }
|
|
24
|
+
| { type: "tool_use"; tool_call_id: string; tool_name: string; tool_input: Record<string, unknown> }
|
|
25
|
+
| { type: "tool_result"; tool_call_id: string; is_error: boolean; result_content: string | null; exit_code: number | null }
|
|
26
|
+
| { type: "image"; media_type: string; data_r2_key: string };
|
|
27
|
+
|
|
28
|
+
export interface Turn {
|
|
29
|
+
turn_id: string;
|
|
30
|
+
parent_turn_id: string | null;
|
|
31
|
+
role: "user" | "assistant";
|
|
32
|
+
timestamp: string | null;
|
|
33
|
+
content: ContentBlock[];
|
|
34
|
+
model: string | null;
|
|
35
|
+
usage: TokenUsage | null;
|
|
36
|
+
source_metadata: Record<string, unknown>;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export interface EnvState {
|
|
40
|
+
git_branch: string | null;
|
|
41
|
+
inferred_file_tree: string[] | null;
|
|
42
|
+
inferred_changed_files: string[] | null;
|
|
43
|
+
inferred_error_files: string[] | null;
|
|
44
|
+
shell_exit_codes: number[] | null;
|
|
45
|
+
open_files_in_editor: string[] | null;
|
|
46
|
+
extraction_method: "passive" | "active";
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export interface TraceScore {
|
|
50
|
+
completeness: "complete" | "incomplete" | "malformed";
|
|
51
|
+
failure_modes: FailureMode[];
|
|
52
|
+
has_error_recovery: boolean;
|
|
53
|
+
has_repeated_calls: boolean;
|
|
54
|
+
content_fidelity: "full" | "chat_only";
|
|
55
|
+
total: number;
|
|
56
|
+
payout_cents: number;
|
|
57
|
+
failure_taxonomy_label: string | null;
|
|
58
|
+
failure_taxonomy_explanation: string | null;
|
|
59
|
+
rarity_score: number | null;
|
|
60
|
+
cluster_id: string | null;
|
|
61
|
+
is_duplicate: boolean;
|
|
62
|
+
duplicate_of: string | null;
|
|
63
|
+
scored_at: string;
|
|
64
|
+
scorer_version: string;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export interface NormalizedTrace {
|
|
68
|
+
trace_id: string;
|
|
69
|
+
schema_version: string;
|
|
70
|
+
source_tool: SourceTool;
|
|
71
|
+
source_session_id: string;
|
|
72
|
+
source_version: string | null;
|
|
73
|
+
submitted_by: string;
|
|
74
|
+
submitted_at: string;
|
|
75
|
+
extracted_at: string;
|
|
76
|
+
git_branch: string | null;
|
|
77
|
+
cwd_hash: string | null;
|
|
78
|
+
working_language: string | null;
|
|
79
|
+
started_at: string;
|
|
80
|
+
ended_at: string;
|
|
81
|
+
turns: Turn[];
|
|
82
|
+
turn_count: number;
|
|
83
|
+
tool_call_count: number;
|
|
84
|
+
has_tool_calls: boolean;
|
|
85
|
+
has_thinking_blocks: boolean;
|
|
86
|
+
has_file_changes: boolean;
|
|
87
|
+
has_shell_commands: boolean;
|
|
88
|
+
total_input_tokens: number | null;
|
|
89
|
+
total_output_tokens: number | null;
|
|
90
|
+
total_cache_read_tokens: number | null;
|
|
91
|
+
content_fidelity: "full" | "chat_only";
|
|
92
|
+
env_state: EnvState | null;
|
|
93
|
+
score: TraceScore | null;
|
|
94
|
+
raw_r2_key: string;
|
|
95
|
+
normalized_r2_key: string;
|
|
96
|
+
}
|
package/src/utils.ts
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
export function formatCents(cents: number | null): string {
|
|
2
|
+
if (cents === null) return "—";
|
|
3
|
+
return `$${(cents / 100).toFixed(2)}`;
|
|
4
|
+
}
|
|
5
|
+
|
|
6
|
+
export function formatTimestamp(ts: number | null): string {
|
|
7
|
+
if (!ts) return "—";
|
|
8
|
+
return new Date(ts * 1000).toLocaleString();
|
|
9
|
+
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import { BatchSubmitSchema, NormalizedTraceSchema } from "./validators.js";
|
|
3
|
+
|
|
4
|
+
function makeTrace(overrides: Record<string, unknown> = {}) {
|
|
5
|
+
return {
|
|
6
|
+
source_tool: "claude_code",
|
|
7
|
+
source_session_id: "session-abc-123",
|
|
8
|
+
...overrides,
|
|
9
|
+
};
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
describe("NormalizedTraceSchema", () => {
|
|
13
|
+
it("valid minimal trace → success", () => {
|
|
14
|
+
const result = NormalizedTraceSchema.safeParse(makeTrace());
|
|
15
|
+
expect(result.success).toBe(true);
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
it("missing source_tool → ZodError", () => {
|
|
19
|
+
const result = NormalizedTraceSchema.safeParse({ source_session_id: "abc" });
|
|
20
|
+
expect(result.success).toBe(false);
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
it("missing source_session_id → ZodError", () => {
|
|
24
|
+
const result = NormalizedTraceSchema.safeParse({ source_tool: "claude_code" });
|
|
25
|
+
expect(result.success).toBe(false);
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
it("invalid content_fidelity → ZodError", () => {
|
|
29
|
+
const result = NormalizedTraceSchema.safeParse(makeTrace({ content_fidelity: "high_fidelity" }));
|
|
30
|
+
expect(result.success).toBe(false);
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
it("negative turn_count → ZodError", () => {
|
|
34
|
+
const result = NormalizedTraceSchema.safeParse(makeTrace({ turn_count: -1 }));
|
|
35
|
+
expect(result.success).toBe(false);
|
|
36
|
+
});
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
describe("BatchSubmitSchema", () => {
|
|
40
|
+
it("valid batch → success", () => {
|
|
41
|
+
const result = BatchSubmitSchema.safeParse({ traces: [makeTrace()] });
|
|
42
|
+
expect(result.success).toBe(true);
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
it("empty traces array → ZodError", () => {
|
|
46
|
+
const result = BatchSubmitSchema.safeParse({ traces: [] });
|
|
47
|
+
expect(result.success).toBe(false);
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
it("missing traces → ZodError", () => {
|
|
51
|
+
const result = BatchSubmitSchema.safeParse({});
|
|
52
|
+
expect(result.success).toBe(false);
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
it("invalid source_tool in batch → ZodError", () => {
|
|
56
|
+
const result = BatchSubmitSchema.safeParse({
|
|
57
|
+
traces: [{ source_tool: "vscode", source_session_id: "abc" }],
|
|
58
|
+
});
|
|
59
|
+
expect(result.success).toBe(false);
|
|
60
|
+
});
|
|
61
|
+
});
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
|
|
3
|
+
export const NormalizedTraceSchema = z.object({
|
|
4
|
+
trace_id: z.string().optional(),
|
|
5
|
+
schema_version: z.string().optional(),
|
|
6
|
+
source_tool: z.enum(["claude_code", "codex_cli", "cursor"]),
|
|
7
|
+
source_session_id: z.string().min(1),
|
|
8
|
+
source_version: z.string().nullable().optional(),
|
|
9
|
+
submitted_by: z.string().optional(),
|
|
10
|
+
submitted_at: z.string().optional(),
|
|
11
|
+
extracted_at: z.string().optional(),
|
|
12
|
+
git_branch: z.string().nullable().optional(),
|
|
13
|
+
cwd_hash: z.string().nullable().optional(),
|
|
14
|
+
working_language: z.string().nullable().optional(),
|
|
15
|
+
started_at: z.string().optional(),
|
|
16
|
+
ended_at: z.string().optional(),
|
|
17
|
+
turns: z.array(z.any()).optional(),
|
|
18
|
+
turn_count: z.number().int().nonnegative().optional(),
|
|
19
|
+
tool_call_count: z.number().int().nonnegative().optional(),
|
|
20
|
+
has_tool_calls: z.boolean().optional(),
|
|
21
|
+
has_thinking_blocks: z.boolean().optional(),
|
|
22
|
+
has_file_changes: z.boolean().optional(),
|
|
23
|
+
has_shell_commands: z.boolean().optional(),
|
|
24
|
+
total_input_tokens: z.number().nullable().optional(),
|
|
25
|
+
total_output_tokens: z.number().nullable().optional(),
|
|
26
|
+
total_cache_read_tokens: z.number().nullable().optional(),
|
|
27
|
+
content_fidelity: z.enum(["full", "chat_only"]).optional(),
|
|
28
|
+
env_state: z.any().nullable().optional(),
|
|
29
|
+
score: z.any().nullable().optional(),
|
|
30
|
+
raw_r2_key: z.string().optional(),
|
|
31
|
+
normalized_r2_key: z.string().optional(),
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
export type NormalizedTraceInput = z.infer<typeof NormalizedTraceSchema>;
|
|
35
|
+
|
|
36
|
+
export const BatchSubmitSchema = z.object({
|
|
37
|
+
traces: z.array(NormalizedTraceSchema).min(1),
|
|
38
|
+
source_tool: z.enum(["claude_code", "codex_cli", "cursor"]).optional(),
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
export type BatchSubmitInput = z.infer<typeof BatchSubmitSchema>;
|
package/tsconfig.json
ADDED