@candor.sh/cli 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.cjs +125 -36
  2. package/package.json +1 -1
package/dist/index.cjs CHANGED
@@ -10025,7 +10025,57 @@ function getConfigPath() {
10025
10025
  }
10026
10026
 
10027
10027
  // src/skill-content.ts
10028
- var SKILL_CONTENT = '# Candor \u2014 AI-Moderated User Studies & Human Evaluation\n\nCandor lets you create and manage AI-moderated user studies and human evaluation jobs directly from the terminal. Use `candor study` for product research (AI voice interviews with real users) and `candor eval` for quick human labeling, ranking, and evaluation tasks.\n\n## Available Commands\n\nRun these via the Bash tool. Always use `--json` for machine-readable output. Run any command with `--help` for full usage details (e.g. `candor eval create --help`).\n\n### Studies (product research)\n\n- `candor study create --url <url> --goal "<goal>" [--participants N] --json` \u2014 Create a study\n- `candor study list --json` \u2014 List all studies (or `candor study --json`)\n- `candor study status <study-id> --json` \u2014 Check participant progress\n- `candor study findings <study-id> --json` \u2014 Get prioritized feedback (P0\u2013P3)\n- `candor study approve <study-id> --json` \u2014 Approve recruitment (human-in-the-loop gate)\n\n### Evals (quick human evaluation / labeling)\n\n- `candor eval create --goal "<goal>" --items "<item1,item2,...>" [--type pairwise_comparison] [--workers N] [--reward <cents>] --json` \u2014 Create an eval. Items can be text labels, file paths (audio/image/video), glob patterns (`*.mp3`), or a CSV file.\n- `candor eval list --json` \u2014 List all evals\n- `candor eval approve <eval-id> --json` \u2014 Launch eval\n- `candor eval status <eval-id> [--live] --json` \u2014 Check eval progress (use `--live` for real-time monitoring)\n- `candor eval results <eval-id> --json` \u2014 View results and rankings\n- `candor eval cancel <eval-id> --json` \u2014 Cancel eval and stop recruiting (keeps collected responses)\n\n## Workflow\n\n### Creating a Study\n\n1. **Infer context** \u2014 If the user doesn\'t provide a URL, check:\n - `package.json` `homepage` field\n - README for a deployed URL\n - Ask the user if nothing is found\n2. **Run `candor study create`** with the product URL and goal\n3. **Show the study summary** including:\n - Drafted study script (sections, questions, tasks)\n - Participant count and demographic filters\n - Estimated cost per session\n4. **Wait for explicit approval** \u2014 Do NOT run `candor study approve` without the user saying "yes", "approve", "go ahead", or similar\n5. **On approval**, run `candor study approve <study-id>` to begin recruiting\n\n### Creating an Eval\n\nEvals are for quick human evaluation tasks like labeling, ranking, rating, or comparing items. They return results in minutes.\n\n**When to use eval vs study:**\n- Use `candor eval` when the user wants to label, rank, rate, compare, or categorize items (audio files, images, text, etc.)\n- Use `candor study` when the user wants qualitative product feedback from real users\n\n**Choosing the right task type:**\n- `pairwise_comparison` \u2014 Best for ranking. Workers compare pairs and pick a winner. Produces a stack ranking with win rates. Use when the user says "rank", "compare", "which is better", "stack rank".\n- `categorical_label` \u2014 Workers assign a label from a set of categories. Use when the user says "label", "categorize", "classify", "tag".\n- `rating_scale` \u2014 Workers rate items on a numeric scale. Use when the user says "rate", "score", "quality score".\n- `free_text` \u2014 Workers provide open-ended text feedback.\n\n**Workflow:**\n1. **Identify the items** \u2014 Find the files or values the user wants evaluated. The `--items` flag is comma-separated and the CLI auto-detects the type of each token:\n - **Text labels**: `--items "option_a,option_b,option_c"` \u2014 plain strings, no upload\n - **File paths**: `--items "sample_a.mp3,sample_b.mp3"` \u2014 if the path exists on disk and has a supported media extension, it\'s uploaded automatically\n - **Glob patterns**: `--items "*.mp3"` or `--items "outputs/*.png"` \u2014 expanded to matching files, then uploaded. Use this when the user says "these audio files" or "all the PNGs in that folder"\n - **CSV file**: `--items "items.csv"` \u2014 a single CSV path. Rows are parsed into items. See CSV format below.\n - **Mixed**: You can mix types in one command: `--items "baseline.mp3,challenger.mp3,control_text"`\n\n **Supported media types** (auto-detected by extension): audio (mp3, wav, ogg, flac, m4a, aac, webm, wma), image (png, jpg, jpeg, gif, webp, svg, bmp, tiff), video (mp4, mov, avi, mkv, wmv). Files with unsupported extensions are treated as text labels.\n\n **Size limits**: 100 MB per file, 1 GB total, max 50 files per eval.\n\n2. **Infer the task type** \u2014 The API infers the task type from the `--goal` string using keyword matching. You don\'t need to pass `--type` unless you want to override. If the goal is vague, it defaults to `pairwise_comparison`.\n3. **Design the eval** \u2014 The API auto-generates the experiment design (randomized pairs, worker counts, etc.) from the goal and items. If the user is prescriptive about methodology, pass their preferences.\n4. **Create the eval** \u2014 Run `candor eval create` with goal and items. Files are uploaded during creation (before approval), so the approve step is fast.\n5. **Show the plan** \u2014 Display task type, item count, pair count, estimated cost and time.\n6. **Wait for approval** \u2014 Do NOT launch without explicit confirmation.\n7. **Launch** \u2014 Run `candor eval approve <id>` to begin recruiting workers.\n8. **Monitor** \u2014 Run `candor eval status <id> --live` (without `--json`) so the user sees a live-updating progress bar, worker activity feed, and completion status in their terminal. This is the best way to keep the user informed while workers complete tasks.\n9. **Results** \u2014 Use `candor eval results <id>` to show rankings and agreement metrics.\n10. **Cancel** (if needed) \u2014 Use `candor eval cancel <id>` to stop recruiting while keeping collected responses. Uploaded files are cleaned up automatically on completion or cancel.\n\n**CSV format:**\n\nWhen `--items` points to a `.csv` file, the CLI parses it row by row:\n- **Single column** \u2014 each value is auto-detected (URL, file path, or text label)\n- **Two+ columns** \u2014 first column is the label, second is the URL or file path\n- **Header row** \u2014 auto-detected if first row contains "label", "url", "path", or "file"\n- **URLs** (`https://...`) are passed through as asset URLs directly \u2014 no upload needed\n- **Local file paths** in the CSV are uploaded just like inline file paths\n\nExample CSV with URLs (no upload):\n```csv\nlabel,url\nModel A,https://example.com/audio/model_a.mp3\nModel B,https://example.com/audio/model_b.mp3\n```\n\nExample CSV with local files (uploaded automatically):\n```csv\noutputs/model_a.mp3\noutputs/model_b.mp3\n```\n\n**How to build the --items flag:**\n\nWhen the user mentions files, use Bash tools to find them first, then pass them to `candor eval create`:\n- User says "evaluate the audio files in outputs/" \u2192 run `ls outputs/*.mp3` to confirm they exist, then `--items "outputs/*.mp3"`\n- User says "compare these two images" and you see `a.png`, `b.png` in cwd \u2192 `--items "a.png,b.png"`\n- User has a spreadsheet of URLs \u2192 save as CSV, then `--items "urls.csv"`\n- User says "rank A, B, C" (no files) \u2192 `--items "A,B,C"` (text labels)\n\n**Example natural language \u2192 eval mapping:**\n\n| User says | Task type | Items |\n|-----------|-----------|-------|\n| "rank these audio samples by TTS quality" | pairwise_comparison | `--items "*.mp3"` (glob) |\n| "label these images as cat or dog" | categorical_label | `--items "*.png"` (glob) |\n| "rate these UI mockups on a 1-5 scale" | rating_scale | `--items "mockup_a.png,mockup_b.png"` (files) |\n| "which of these headlines is more engaging" | pairwise_comparison | `--items "Headline A,Headline B"` (text) |\n| "evaluate the samples in results.csv" | pairwise_comparison | `--items "results.csv"` (CSV) |\n\n### Pricing evals (`--reward`)\n\nThe `--reward` flag sets the payment **per assignment** (a batch of tasks a single worker completes) in cents. Getting this right is critical \u2014 underpaying means workers ignore your HIT and results take hours; paying well means results in minutes.\n\n**How to estimate reward:**\n\n1. **Estimate time per task** based on what the worker has to do:\n - Read two short text labels and pick one: ~5 seconds\n - View two images and compare: ~10\u201315 seconds\n - Listen to two audio clips (each 5\u201315s) and compare: ~20\u201340 seconds\n - Watch two short videos and compare: ~30\u201360 seconds\n - Rate or label a single item (text/image): ~5\u201310 seconds\n - Rate or label a single audio/video item: ~15\u201330 seconds\n\n2. **Multiply by tasks per assignment.** The API auto-sets batch size, but typical values:\n - Pairwise with N items \u2192 N\xD7(N-1)/2 pairs, batched into groups of ~5\u201310\n - Single-item tasks \u2192 batched into groups of ~5\u201310\n\n3. **Target $15\u201320/hour equivalent** for fast pickup. This is above the MTurk average and ensures your HITs are grabbed immediately.\n\n4. **Calculate:** `reward_cents = (time_per_task_seconds \xD7 tasks_per_batch / 60) \xD7 (hourly_rate / 60) \xD7 100`\n\n**Quick reference (for fast pickup):**\n\n| Scenario | Items | Est. time/assignment | Recommended `--reward` |\n|----------|-------|---------------------|----------------------|\n| Compare short text labels | 3\u20135 items | ~30 sec | `15` (15\xA2) |\n| Compare images | 3\u20135 items | ~1\u20132 min | `40` (40\xA2) |\n| Compare audio clips (<15s each) | 3\u20135 items | ~2\u20133 min | `60` (60\xA2) |\n| Compare audio clips (<15s each) | 6\u201310 items | ~5\u20138 min | `150` ($1.50) |\n| Compare short videos | 3\u20135 items | ~3\u20135 min | `100` ($1.00) |\n| Label/rate images | 10 items | ~2 min | `50` (50\xA2) |\n| Label/rate audio clips | 10 items | ~5 min | `125` ($1.25) |\n\n**Rules of thumb:**\n- When in doubt, round up \u2014 the cost difference is small but pickup speed difference is large\n- Audio/video evals cost more because workers spend real time consuming media\n- More items = more pairs (pairwise grows quadratically) = longer assignments = higher reward needed\n- If the user doesn\'t specify a budget, use these guidelines. If they ask for cheaper, warn that lower rewards mean slower results.\n- The default of 5\xA2 is only appropriate for very quick text-only tasks with few items. **Always override it** for media evals.\n\n### Checking Results\n\n- Use `candor study status <id> --json` to show completion progress\n- Use `candor study findings <id> --json` to show prioritized findings\n- Use `candor eval status <id> --json` to show eval progress\n- Use `candor eval results <id> --json` to show rankings and agreement metrics\n- Offer to create GitHub issues for P0/P1 items if the user wants\n\n## Important Rules\n\n- NEVER approve a study or launch an eval without explicit human confirmation\n- ALWAYS show cost estimate before launching\n- Evals are billed per assignment. Use the pricing guide above to set `--reward` appropriately \u2014 this directly affects how fast workers pick up the job\n- If Candor is not initialized, tell the user to run `candor init` first\n';
10028
+ var SKILL_CONTENT = '# Candor \u2014 AI-Moderated User Studies & Human Evaluation\n\nCandor lets you create and manage AI-moderated user studies and human evaluation jobs directly from the terminal. Use `candor study` for product research (AI voice interviews with real users) and `candor eval` for quick human labeling, ranking, and evaluation tasks.\n\n## Available Commands\n\nRun these via the Bash tool. Always use `--json` for machine-readable output. Run any command with `--help` for full usage details (e.g. `candor eval create --help`).\n\n### Studies (product research)\n\n- `candor study create --url <url> --goal "<goal>" [--participants N] --json` \u2014 Create a study\n- `candor study list --json` \u2014 List all studies (or `candor study --json`)\n- `candor study status <study-id> --json` \u2014 Check participant progress\n- `candor study findings <study-id> --json` \u2014 Get prioritized feedback (P0\u2013P3)\n- `candor study approve <study-id> --json` \u2014 Approve recruitment (human-in-the-loop gate)\n\n### Evals (quick human evaluation / labeling)\n\n- `candor eval create --goal "<goal>" --items "<item1,item2,...>" [--type pairwise_comparison] [--workers N] [--reward <cents>] --json` \u2014 Create an eval. Items can be text labels, file paths (audio/image/video), glob patterns (`*.mp3`), or a CSV file.\n- `candor eval list --json` \u2014 List all evals\n- `candor eval approve <eval-id> --json` \u2014 Launch eval\n- `candor eval status <eval-id> [--live] --json` \u2014 Check eval progress (use `--live` for real-time monitoring)\n- `candor eval results <eval-id> --json` \u2014 View results and rankings\n- `candor eval cancel <eval-id> --json` \u2014 Cancel eval and stop recruiting (keeps collected responses)\n\n## Workflow\n\n### Creating a Study\n\n1. **Infer context** \u2014 If the user doesn\'t provide a URL, check:\n - `package.json` `homepage` field\n - README for a deployed URL\n - Ask the user if nothing is found\n2. **Run `candor study create`** with the product URL and goal\n3. **Show the study summary** including:\n - Drafted study script (sections, questions, tasks)\n - Participant count and demographic filters\n - Estimated cost per session\n4. **Wait for explicit approval** \u2014 Do NOT run `candor study approve` without the user saying "yes", "approve", "go ahead", or similar\n5. **On approval**, run `candor study approve <study-id>` to begin recruiting\n\n### Creating an Eval\n\nEvals are for quick human evaluation tasks like labeling, ranking, rating, or comparing items. They return results in minutes.\n\n**When to use eval vs study:**\n- Use `candor eval` when the user wants to label, rank, rate, compare, or categorize items (audio files, images, text, etc.)\n- Use `candor study` when the user wants qualitative product feedback from real users\n\n**Choosing the right task type:**\n- `pairwise_comparison` \u2014 Best for ranking. Workers compare pairs and pick a winner. Produces a stack ranking with win rates. Use when the user says "rank", "compare", "which is better", "stack rank".\n- `categorical_label` \u2014 Workers assign a label from a set of categories. Use when the user says "label", "categorize", "classify", "tag".\n- `rating_scale` \u2014 Workers rate items on a numeric scale. Use when the user says "rate", "score", "quality score".\n- `free_text` \u2014 Workers provide open-ended text feedback.\n\n**Workflow:**\n1. **Identify the items** \u2014 Find the files or values the user wants evaluated. The `--items` flag is comma-separated and the CLI auto-detects the type of each token:\n - **Text labels**: `--items "option_a,option_b,option_c"` \u2014 plain strings, no upload\n - **File paths**: `--items "sample_a.mp3,sample_b.mp3"` \u2014 if the path exists on disk and has a supported media extension, it\'s uploaded automatically\n - **Glob patterns**: `--items "*.mp3"` or `--items "outputs/*.png"` \u2014 expanded to matching files, then uploaded. Use this when the user says "these audio files" or "all the PNGs in that folder"\n - **CSV file**: `--items "items.csv"` \u2014 a single CSV path. Rows are parsed into items. See CSV format below.\n - **Mixed**: You can mix types in one command: `--items "baseline.mp3,challenger.mp3,control_text"`\n\n **Supported media types** (auto-detected by extension): audio (mp3, wav, ogg, flac, m4a, aac, webm, wma), image (png, jpg, jpeg, gif, webp, svg, bmp, tiff), video (mp4, mov, avi, mkv, wmv). Files with unsupported extensions are treated as text labels.\n\n **Size limits**: 100 MB per file, 1 GB total, max 50 files per eval.\n\n2. **Infer the task type** \u2014 The API infers the task type from the `--goal` string using keyword matching. You don\'t need to pass `--type` unless you want to override. If the goal is vague, it defaults to `pairwise_comparison`.\n3. **Design the eval** \u2014 The API auto-generates the experiment design (randomized pairs, worker counts, etc.) from the goal and items. If the user is prescriptive about methodology, pass their preferences.\n4. **Create the eval** \u2014 Run `candor eval create` with goal and items. Files are uploaded during creation (before approval), so the approve step is fast.\n5. **Show the plan** \u2014 Display task type, item count, pair count, estimated cost and time.\n6. **Wait for approval** \u2014 Do NOT launch without explicit confirmation.\n7. **Launch** \u2014 Run `candor eval approve <id>` to begin recruiting workers.\n8. **Monitor** \u2014 Run `candor eval status <id> --live` (without `--json`) so the user sees a live-updating progress bar, worker activity feed, and completion status in their terminal. This is the best way to keep the user informed while workers complete tasks.\n9. **Results** \u2014 Use `candor eval results <id>` to show rankings and agreement metrics.\n10. **Cancel** (if needed) \u2014 Use `candor eval cancel <id>` to stop recruiting while keeping collected responses. Uploaded files are cleaned up automatically on completion or cancel.\n\n**CSV format:**\n\nWhen `--items` points to a `.csv` file, the CLI parses it row by row:\n- **Single column** \u2014 each value is auto-detected (URL, file path, or text label)\n- **Two+ columns** \u2014 first column is the label, second is the URL or file path\n- **Header row** \u2014 auto-detected if first row contains "label", "url", "path", or "file"\n- **URLs** (`https://...`) are passed through as asset URLs directly \u2014 no upload needed\n- **Local file paths** in the CSV are uploaded just like inline file paths\n\nExample CSV with URLs (no upload):\n```csv\nlabel,url\nModel A,https://example.com/audio/model_a.mp3\nModel B,https://example.com/audio/model_b.mp3\n```\n\nExample CSV with local files (uploaded automatically):\n```csv\noutputs/model_a.mp3\noutputs/model_b.mp3\n```\n\n**How to build the --items flag:**\n\nWhen the user mentions files, use Bash tools to find them first, then pass them to `candor eval create`:\n- User says "evaluate the audio files in outputs/" \u2192 run `ls outputs/*.mp3` to confirm they exist, then `--items "outputs/*.mp3"`\n- User says "compare these two images" and you see `a.png`, `b.png` in cwd \u2192 `--items "a.png,b.png"`\n- User has a spreadsheet of URLs \u2192 save as CSV, then `--items "urls.csv"`\n- User says "rank A, B, C" (no files) \u2192 `--items "A,B,C"` (text labels)\n\n**Example natural language \u2192 eval mapping:**\n\n| User says | Task type | Items |\n|-----------|-----------|-------|\n| "rank these audio samples by TTS quality" | pairwise_comparison | `--items "*.mp3"` (glob) |\n| "label these images as cat or dog" | categorical_label | `--items "*.png"` (glob) |\n| "rate these UI mockups on a 1-5 scale" | rating_scale | `--items "mockup_a.png,mockup_b.png"` (files) |\n| "which of these headlines is more engaging" | pairwise_comparison | `--items "Headline A,Headline B"` (text) |\n| "evaluate the samples in results.csv" | pairwise_comparison | `--items "results.csv"` (CSV) |\n\n### Pricing evals (`--reward`)\n\nThe `--reward` flag sets the payment **per assignment** (a batch of tasks a single worker completes) in cents. Getting this right is critical \u2014 underpaying means workers ignore your HIT and results take hours; paying well means results in minutes.\n\n**How to estimate reward:**\n\n1. **Estimate time per task** based on what the worker has to do:\n - Read two short text labels and pick one: ~5 seconds\n - View two images and compare: ~10\u201315 seconds\n - Listen to two audio clips (each 5\u201315s) and compare: ~20\u201340 seconds\n - Watch two short videos and compare: ~30\u201360 seconds\n - Rate or label a single item (text/image): ~5\u201310 seconds\n - Rate or label a single audio/video item: ~15\u201330 seconds\n\n2. **Multiply by tasks per assignment.** The API auto-sets batch size, but typical values:\n - Pairwise with N items \u2192 N\xD7(N-1)/2 pairs, batched into groups of ~5\u201310\n - Single-item tasks \u2192 batched into groups of ~5\u201310\n\n3. **Target $15\u201320/hour equivalent** for fast pickup. This is above the MTurk average and ensures your HITs are grabbed immediately.\n\n4. **Calculate:** `reward_cents = (time_per_task_seconds \xD7 tasks_per_batch / 60) \xD7 (hourly_rate / 60) \xD7 100`\n\n**Quick reference (for fast pickup):**\n\n| Scenario | Items | Est. time/assignment | Recommended `--reward` |\n|----------|-------|---------------------|----------------------|\n| Compare short text labels | 3\u20135 items | ~30 sec | `15` (15\xA2) |\n| Compare images | 3\u20135 items | ~1\u20132 min | `40` (40\xA2) |\n| Compare audio clips (<15s each) | 3\u20135 items | ~2\u20133 min | `60` (60\xA2) |\n| Compare audio clips (<15s each) | 6\u201310 items | ~5\u20138 min | `150` ($1.50) |\n| Compare short videos | 3\u20135 items | ~3\u20135 min | `100` ($1.00) |\n| Label/rate images | 10 items | ~2 min | `50` (50\xA2) |\n| Label/rate audio clips | 10 items | ~5 min | `125` ($1.25) |\n\n**Rules of thumb:**\n- When in doubt, round up \u2014 the cost difference is small but pickup speed difference is large\n- Audio/video evals cost more because workers spend real time consuming media\n- More items = more pairs (pairwise grows quadratically) = longer assignments = higher reward needed\n- If the user doesn\'t specify a budget, use these guidelines. If they ask for cheaper, warn that lower rewards mean slower results.\n- The default of 5\xA2 is only appropriate for very quick text-only tasks with few items. **Always override it** for media evals.\n\n### Checking Results\n\n- Use `candor study status <id> --json` to show completion progress\n- Use `candor study findings <id> --json` to show prioritized findings\n- Use `candor eval status <id> --json` to show eval progress\n- Use `candor eval results <id> --json` to show rankings and agreement metrics\n- Offer to create GitHub issues for P0/P1 items if the user wants\n\n## Billing & Balance\n\nLaunching evals and studies requires a funded balance. Costs include a 40% Candor platform fee on top of worker payments.\n\n- **First time**: `candor init` opens Stripe to add a card. New users get **$5 free credit**.\n- **Balance check**: Balance is shown during `candor init` and at `candor eval create` (as `Est. cost`).\n- **Charge timing**: Balance is debited when you run `candor eval approve` or `candor study approve` \u2014 not at draft creation.\n- **Insufficient funds**: If balance is too low, the CLI shows the shortfall and a URL to add funds. The user pays via Stripe Checkout (minimum $10 top-up), then re-runs the approve command.\n- **Auto-reload**: Users can configure auto-reload at https://candor.sh/billing \u2014 set a threshold and target, and the saved card is charged automatically.\n- **Cancellation refund**: `candor eval cancel` refunds the proportional cost of incomplete tasks.\n- **Billing dashboard**: https://candor.sh/billing shows balance, transaction history, and auto-reload settings.\n\nWhen a 402 error occurs, show the balance shortfall and the top-up URL. Do not retry \u2014 the user needs to add funds first.\n\n## Important Rules\n\n- NEVER approve a study or launch an eval without explicit human confirmation\n- ALWAYS show cost estimate before launching\n- Evals are billed per assignment. Use the pricing guide above to set `--reward` appropriately \u2014 this directly affects how fast workers pick up the job\n- If Candor is not initialized, tell the user to run `candor init` first\n';
10029
+
10030
+ // src/api.ts
10031
+ async function apiRequest(path2, options = {}) {
10032
+ const config = loadConfig();
10033
+ if (!config.apiKey) {
10034
+ throw new AuthError("Not authenticated. Run 'candor init' first.");
10035
+ }
10036
+ const res = await fetch(`${config.apiUrl}${path2}`, {
10037
+ method: options.method || "GET",
10038
+ headers: {
10039
+ Authorization: `Bearer ${config.apiKey}`,
10040
+ "Content-Type": "application/json"
10041
+ },
10042
+ ...options.body ? { body: JSON.stringify(options.body) } : {}
10043
+ });
10044
+ if (res.status === 401) {
10045
+ throw new AuthError("API key expired or invalid. Run 'candor login' to re-authenticate.");
10046
+ }
10047
+ if (res.status === 402) {
10048
+ const data = await res.json().catch(() => ({}));
10049
+ throw new InsufficientBalanceError(
10050
+ data.balanceCents ?? 0,
10051
+ data.requiredCents ?? 0,
10052
+ data.topUpUrl ?? "https://candor.sh/billing/add-funds"
10053
+ );
10054
+ }
10055
+ if (!res.ok) {
10056
+ const text = await res.text().catch(() => "");
10057
+ throw new Error(`API error: ${res.status} ${res.statusText}${text ? ` \u2014 ${text}` : ""}`);
10058
+ }
10059
+ return res.json();
10060
+ }
10061
+ var AuthError = class extends Error {
10062
+ constructor(message) {
10063
+ super(message);
10064
+ this.name = "AuthError";
10065
+ }
10066
+ };
10067
+ var InsufficientBalanceError = class extends Error {
10068
+ balanceCents;
10069
+ requiredCents;
10070
+ topUpUrl;
10071
+ constructor(balanceCents, requiredCents, topUpUrl) {
10072
+ super("Insufficient balance");
10073
+ this.name = "InsufficientBalanceError";
10074
+ this.balanceCents = balanceCents;
10075
+ this.requiredCents = requiredCents;
10076
+ this.topUpUrl = topUpUrl;
10077
+ }
10078
+ };
10029
10079
 
10030
10080
  // src/commands/init.ts
10031
10081
  var CLAUDE_SKILLS_DIR = (0, import_path2.join)((0, import_os2.homedir)(), ".claude", "skills", "candor");
@@ -10076,7 +10126,7 @@ async function initCommand(options) {
10076
10126
  } else {
10077
10127
  const tempToken = (0, import_crypto.randomBytes)(16).toString("hex");
10078
10128
  const authUrl = `${config.apiUrl.replace("api.", "")}/auth/cli?token=${tempToken}`;
10079
- console.log(chalk2.dim(" -> Opening candor.sh in your browser to authenticate..."));
10129
+ console.log(chalk2.dim(" -> Opening candor.sh to sign in & add your card..."));
10080
10130
  try {
10081
10131
  await openBrowser(authUrl);
10082
10132
  } catch {
@@ -10092,6 +10142,31 @@ async function initCommand(options) {
10092
10142
  saveConfig(config);
10093
10143
  }
10094
10144
  }
10145
+ const currentConfig = loadConfig();
10146
+ if (currentConfig.apiKey) {
10147
+ try {
10148
+ const balanceData = await apiRequest("/api/billing/balance");
10149
+ if (!balanceData.hasPaymentMethod) {
10150
+ console.log(chalk2.dim(" -> Waiting for card setup (complete in your browser)..."));
10151
+ for (let i = 0; i < 120; i++) {
10152
+ await new Promise((r) => setTimeout(r, 2e3));
10153
+ try {
10154
+ const poll = await apiRequest("/api/billing/balance");
10155
+ if (poll.balanceCents > 0) {
10156
+ console.log(chalk2.green(` + Balance: $${(poll.balanceCents / 100).toFixed(2)} \u2014 you're ready to go!`));
10157
+ break;
10158
+ }
10159
+ } catch {
10160
+ }
10161
+ }
10162
+ } else if (balanceData.balanceCents > 0) {
10163
+ console.log(chalk2.green(` + Balance: $${(balanceData.balanceCents / 100).toFixed(2)}`));
10164
+ } else {
10165
+ console.log(chalk2.yellow(` + Balance: $0.00 \u2014 add funds at https://candor.sh/billing`));
10166
+ }
10167
+ } catch {
10168
+ }
10169
+ }
10095
10170
  if ((0, import_fs2.existsSync)((0, import_path2.join)((0, import_os2.homedir)(), ".claude"))) {
10096
10171
  if (installSkill()) {
10097
10172
  console.log(chalk2.green(" + Installed Candor skill -> ~/.claude/skills/candor/"));
@@ -10206,36 +10281,6 @@ async function doctorCommand() {
10206
10281
  console.log();
10207
10282
  }
10208
10283
 
10209
- // src/api.ts
10210
- async function apiRequest(path2, options = {}) {
10211
- const config = loadConfig();
10212
- if (!config.apiKey) {
10213
- throw new AuthError("Not authenticated. Run 'candor init' first.");
10214
- }
10215
- const res = await fetch(`${config.apiUrl}${path2}`, {
10216
- method: options.method || "GET",
10217
- headers: {
10218
- Authorization: `Bearer ${config.apiKey}`,
10219
- "Content-Type": "application/json"
10220
- },
10221
- ...options.body ? { body: JSON.stringify(options.body) } : {}
10222
- });
10223
- if (res.status === 401) {
10224
- throw new AuthError("API key expired or invalid. Run 'candor login' to re-authenticate.");
10225
- }
10226
- if (!res.ok) {
10227
- const text = await res.text().catch(() => "");
10228
- throw new Error(`API error: ${res.status} ${res.statusText}${text ? ` \u2014 ${text}` : ""}`);
10229
- }
10230
- return res.json();
10231
- }
10232
- var AuthError = class extends Error {
10233
- constructor(message) {
10234
- super(message);
10235
- this.name = "AuthError";
10236
- }
10237
- };
10238
-
10239
10284
  // src/commands/studies.ts
10240
10285
  async function studiesCommand(options) {
10241
10286
  const chalk2 = (await Promise.resolve().then(() => (init_source(), source_exports))).default;
@@ -10482,8 +10527,8 @@ async function updateCommand() {
10482
10527
  }
10483
10528
  }
10484
10529
  function getCurrentVersion() {
10485
- if ("0.4.0") {
10486
- return "0.4.0";
10530
+ if ("0.5.0") {
10531
+ return "0.5.0";
10487
10532
  }
10488
10533
  try {
10489
10534
  const pkgPath = (0, import_path4.join)((0, import_path4.dirname)(new URL(import_meta2.url).pathname), "..", "package.json");
@@ -10835,9 +10880,10 @@ async function evalCreateCommand(options) {
10835
10880
  ` Items: ${result.items.length} | Tasks: ${result.estimate.totalTasks}`
10836
10881
  )
10837
10882
  );
10883
+ const costCents = result.estimate.totalCostCents || result.estimate.estimatedCostCents;
10838
10884
  console.log(
10839
10885
  chalk2.dim(
10840
- ` Est. cost: $${(result.estimate.estimatedCostCents / 100).toFixed(2)} | Est. time: ~${result.estimate.estimatedMinutes} min`
10886
+ ` Est. cost: $${(costCents / 100).toFixed(2)} | Est. time: ~${result.estimate.estimatedMinutes} min`
10841
10887
  )
10842
10888
  );
10843
10889
  console.log(chalk2.dim(` Status: ${result.eval.status}`));
@@ -10881,6 +10927,23 @@ async function evalLaunchCommand(id, options) {
10881
10927
  console.log(chalk2.dim(` View results: candor eval results ${id}`));
10882
10928
  console.log();
10883
10929
  } catch (err) {
10930
+ if (err instanceof InsufficientBalanceError) {
10931
+ if (options.json) {
10932
+ console.log(JSON.stringify({
10933
+ error: "Insufficient balance",
10934
+ balanceCents: err.balanceCents,
10935
+ requiredCents: err.requiredCents,
10936
+ topUpUrl: err.topUpUrl
10937
+ }));
10938
+ process.exit(1);
10939
+ }
10940
+ console.log();
10941
+ console.log(chalk2.red(` Insufficient balance`));
10942
+ console.log(chalk2.dim(` Balance: $${(err.balanceCents / 100).toFixed(2)} | Required: $${(err.requiredCents / 100).toFixed(2)}`));
10943
+ console.log(chalk2.cyan(` Add funds: ${err.topUpUrl}`));
10944
+ console.log();
10945
+ process.exit(1);
10946
+ }
10884
10947
  if (options.json) {
10885
10948
  console.log(
10886
10949
  JSON.stringify({ error: err instanceof Error ? err.message : String(err) })
@@ -11201,9 +11264,34 @@ async function evalCancelCommand(id, options) {
11201
11264
  }
11202
11265
  }
11203
11266
 
11267
+ // src/commands/balance.ts
11268
+ async function balanceCommand(options) {
11269
+ const chalk2 = (await Promise.resolve().then(() => (init_source(), source_exports))).default;
11270
+ try {
11271
+ const data = await apiRequest("/api/billing/balance");
11272
+ if (options.json) {
11273
+ console.log(JSON.stringify(data, null, 2));
11274
+ return;
11275
+ }
11276
+ console.log();
11277
+ console.log(chalk2.bold(` $${(data.balanceCents / 100).toFixed(2)}`));
11278
+ if (!data.hasPaymentMethod) {
11279
+ console.log(chalk2.yellow(` No card on file \u2014 run candor init to add one`));
11280
+ }
11281
+ console.log();
11282
+ } catch (err) {
11283
+ if (options.json) {
11284
+ console.log(JSON.stringify({ error: err instanceof Error ? err.message : String(err) }));
11285
+ process.exit(1);
11286
+ }
11287
+ console.log(chalk2.red(`Error: ${err instanceof Error ? err.message : err}`));
11288
+ process.exit(1);
11289
+ }
11290
+ }
11291
+
11204
11292
  // src/index.ts
11205
11293
  var program2 = new Command();
11206
- program2.name("candor").description("AI-moderated user studies, wired into your dev workflow").version("0.4.0").enablePositionalOptions();
11294
+ program2.name("candor").description("AI-moderated user studies, wired into your dev workflow").version("0.5.0").enablePositionalOptions();
11207
11295
  program2.command("init").description("Set up Candor: authenticate and configure Claude Code integration").option("--skill-only", "Only reinstall the skill file").action(initCommand);
11208
11296
  program2.command("login").description("Re-authenticate with Candor").action(loginCommand);
11209
11297
  program2.command("logout").description("Log out and clear stored credentials").action(logoutCommand);
@@ -11221,6 +11309,7 @@ evalCmd.command("approve <id>").description("Approve and launch an eval").option
11221
11309
  evalCmd.command("status <id>").description("Check eval progress").option("--json", "Output as JSON").option("--live", "Poll for updates until complete").action(evalStatusCommand);
11222
11310
  evalCmd.command("results <id>").description("View eval results and rankings").option("--json", "Output as JSON").action(evalResultsCommand);
11223
11311
  evalCmd.command("cancel <id>").description("Cancel an active eval and stop recruiting").option("--json", "Output as JSON").action(evalCancelCommand);
11312
+ program2.command("balance").description("Check your current balance").option("--json", "Output as JSON").action(balanceCommand);
11224
11313
  program2.command("update").description("Update the Candor CLI to the latest version").action(updateCommand);
11225
11314
  program2.command("doctor", { hidden: true }).description("Run diagnostic checks on your Candor installation").action(doctorCommand);
11226
11315
  program2.parse();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@candor.sh/cli",
3
- "version": "0.4.0",
3
+ "version": "0.5.0",
4
4
  "private": false,
5
5
  "type": "module",
6
6
  "bin": {