@candor.sh/cli 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.cjs +4 -4
  2. package/package.json +1 -1
package/dist/index.cjs CHANGED
@@ -4513,7 +4513,7 @@ function getConfigPath() {
4513
4513
  }
4514
4514
 
4515
4515
  // src/skill-content.ts
4516
- var SKILL_CONTENT = '# Candor \u2014 AI-Moderated User Studies & Human Evaluation\n\nCandor lets you create and manage AI-moderated user studies and human evaluation jobs directly from the terminal. Use `candor study` for product research (AI voice interviews with real users) and `candor eval` for quick human labeling, ranking, and evaluation tasks via Mechanical Turk.\n\n## Available Commands\n\nRun these via the Bash tool. Always use `--json` for machine-readable output.\n\n### Studies (product research)\n\n- `candor study create --url <url> --goal "<goal>" [--participants N] --json` \u2014 Create a study\n- `candor study list --json` \u2014 List all studies (or `candor study --json`)\n- `candor study status <study-id> --json` \u2014 Check participant progress\n- `candor study findings <study-id> --json` \u2014 Get prioritized feedback (P0\u2013P3)\n- `candor study approve <study-id> --json` \u2014 Approve recruitment (human-in-the-loop gate)\n\n### Evals (quick human evaluation / labeling)\n\n- `candor eval create --goal "<goal>" --items "<item1,item2,...>" [--type pairwise_comparison] [--workers N] [--reward <cents>] --json` \u2014 Create an eval\n- `candor eval list --json` \u2014 List all evals\n- `candor eval approve <eval-id> --json` \u2014 Launch eval on MTurk\n- `candor eval status <eval-id> --json` \u2014 Check eval progress\n- `candor eval results <eval-id> --json` \u2014 View results and rankings\n\n## Workflow\n\n### Creating a Study\n\n1. **Infer context** \u2014 If the user doesn\'t provide a URL, check:\n - `package.json` `homepage` field\n - README for a deployed URL\n - Ask the user if nothing is found\n2. **Run `candor study create`** with the product URL and goal\n3. **Show the study summary** including:\n - Drafted study script (sections, questions, tasks)\n - Participant count and demographic filters\n - Estimated cost per session\n4. **Wait for explicit approval** \u2014 Do NOT run `candor study approve` without the user saying "yes", "approve", "go ahead", or similar\n5. **On approval**, run `candor study approve <study-id>` to begin recruiting\n\n### Creating an Eval\n\nEvals are for quick human evaluation tasks like labeling, ranking, rating, or comparing items. They run on Mechanical Turk and return results in minutes.\n\n**When to use eval vs study:**\n- Use `candor eval` when the user wants to label, rank, rate, compare, or categorize items (audio files, images, text, etc.)\n- Use `candor study` when the user wants qualitative product feedback from real users\n\n**Choosing the right task type:**\n- `pairwise_comparison` \u2014 Best for ranking. Workers compare pairs and pick a winner. Produces a stack ranking with win rates. Use when the user says "rank", "compare", "which is better", "stack rank".\n- `categorical_label` \u2014 Workers assign a label from a set of categories. Use when the user says "label", "categorize", "classify", "tag".\n- `rating_scale` \u2014 Workers rate items on a numeric scale. Use when the user says "rate", "score", "quality score".\n- `free_text` \u2014 Workers provide open-ended text feedback.\n\n**Workflow:**\n1. **Identify the items** \u2014 Look for files the user mentions (audio, images, etc.). Extract labels from filenames.\n2. **Infer the task type** \u2014 Based on the user\'s goal, pick the most appropriate type. If vague (e.g. "evaluate these"), default to `pairwise_comparison` for ranking or `rating_scale` for quality assessment.\n3. **Design the eval** \u2014 The API auto-generates the experiment design (randomized pairs, worker counts, etc.) from the goal and items. If the user is prescriptive about methodology, pass their preferences.\n4. **Create the eval** \u2014 Run `candor eval create` with goal and items.\n5. **Show the plan** \u2014 Display task type, item count, pair count, estimated cost and time.\n6. **Wait for approval** \u2014 Do NOT launch without explicit confirmation.\n7. **Launch** \u2014 Run `candor eval approve <id>` to create HITs on MTurk.\n8. **Poll and display results** \u2014 Use `candor eval status <id>` and `candor eval results <id>` to show progress and rankings.\n\n**Example natural language \u2192 eval mapping:**\n\n| User says | Task type | Items |\n|-----------|-----------|-------|\n| "rank these audio samples by TTS quality" | pairwise_comparison | Audio file names |\n| "label these images as cat or dog" | categorical_label | Image file names |\n| "rate these UI mockups on a 1-5 scale" | rating_scale | Mockup file names |\n| "which of these headlines is more engaging" | pairwise_comparison | Headline text |\n| "evaluate these samples" (vague) | pairwise_comparison | File names |\n\n### Checking Results\n\n- Use `candor study status <id> --json` to show completion progress\n- Use `candor study findings <id> --json` to show prioritized findings\n- Use `candor eval status <id> --json` to show eval progress\n- Use `candor eval results <id> --json` to show rankings and agreement metrics\n- Offer to create GitHub issues for P0/P1 items if the user wants\n\n## Important Rules\n\n- NEVER approve a study or launch an eval without explicit human confirmation\n- ALWAYS show cost estimate before launching\n- Studies are billed per participant session (~$14.50/session)\n- Evals are billed per task assignment (~$0.05-0.10/task + 20-40% MTurk fee)\n- If Candor is not initialized, tell the user to run `candor init` first\n';
4516
+ var SKILL_CONTENT = '# Candor \u2014 AI-Moderated User Studies & Human Evaluation\n\nCandor lets you create and manage AI-moderated user studies and human evaluation jobs directly from the terminal. Use `candor study` for product research (AI voice interviews with real users) and `candor eval` for quick human labeling, ranking, and evaluation tasks.\n\n## Available Commands\n\nRun these via the Bash tool. Always use `--json` for machine-readable output. Run any command with `--help` for full usage details (e.g. `candor eval create --help`).\n\n### Studies (product research)\n\n- `candor study create --url <url> --goal "<goal>" [--participants N] --json` \u2014 Create a study\n- `candor study list --json` \u2014 List all studies (or `candor study --json`)\n- `candor study status <study-id> --json` \u2014 Check participant progress\n- `candor study findings <study-id> --json` \u2014 Get prioritized feedback (P0\u2013P3)\n- `candor study approve <study-id> --json` \u2014 Approve recruitment (human-in-the-loop gate)\n\n### Evals (quick human evaluation / labeling)\n\n- `candor eval create --goal "<goal>" --items "<item1,item2,...>" [--type pairwise_comparison] [--workers N] [--reward <cents>] --json` \u2014 Create an eval\n- `candor eval list --json` \u2014 List all evals\n- `candor eval approve <eval-id> --json` \u2014 Launch eval\n- `candor eval status <eval-id> [--live] --json` \u2014 Check eval progress (use `--live` for real-time monitoring)\n- `candor eval results <eval-id> --json` \u2014 View results and rankings\n- `candor eval cancel <eval-id> --json` \u2014 Cancel eval and stop recruiting (keeps collected responses)\n\n## Workflow\n\n### Creating a Study\n\n1. **Infer context** \u2014 If the user doesn\'t provide a URL, check:\n - `package.json` `homepage` field\n - README for a deployed URL\n - Ask the user if nothing is found\n2. **Run `candor study create`** with the product URL and goal\n3. **Show the study summary** including:\n - Drafted study script (sections, questions, tasks)\n - Participant count and demographic filters\n - Estimated cost per session\n4. **Wait for explicit approval** \u2014 Do NOT run `candor study approve` without the user saying "yes", "approve", "go ahead", or similar\n5. **On approval**, run `candor study approve <study-id>` to begin recruiting\n\n### Creating an Eval\n\nEvals are for quick human evaluation tasks like labeling, ranking, rating, or comparing items. They return results in minutes.\n\n**When to use eval vs study:**\n- Use `candor eval` when the user wants to label, rank, rate, compare, or categorize items (audio files, images, text, etc.)\n- Use `candor study` when the user wants qualitative product feedback from real users\n\n**Choosing the right task type:**\n- `pairwise_comparison` \u2014 Best for ranking. Workers compare pairs and pick a winner. Produces a stack ranking with win rates. Use when the user says "rank", "compare", "which is better", "stack rank".\n- `categorical_label` \u2014 Workers assign a label from a set of categories. Use when the user says "label", "categorize", "classify", "tag".\n- `rating_scale` \u2014 Workers rate items on a numeric scale. Use when the user says "rate", "score", "quality score".\n- `free_text` \u2014 Workers provide open-ended text feedback.\n\n**Workflow:**\n1. **Identify the items** \u2014 Look for files the user mentions (audio, images, etc.). Extract labels from filenames.\n2. **Infer the task type** \u2014 Based on the user\'s goal, pick the most appropriate type. If vague (e.g. "evaluate these"), default to `pairwise_comparison` for ranking or `rating_scale` for quality assessment.\n3. **Design the eval** \u2014 The API auto-generates the experiment design (randomized pairs, worker counts, etc.) from the goal and items. If the user is prescriptive about methodology, pass their preferences.\n4. **Create the eval** \u2014 Run `candor eval create` with goal and items.\n5. **Show the plan** \u2014 Display task type, item count, pair count, estimated cost and time.\n6. **Wait for approval** \u2014 Do NOT launch without explicit confirmation.\n7. **Launch** \u2014 Run `candor eval approve <id>` to begin recruiting workers.\n8. **Monitor** \u2014 Use `candor eval status <id> --live` for real-time progress, or poll with `candor eval status <id>`.\n9. **Results** \u2014 Use `candor eval results <id>` to show rankings and agreement metrics.\n10. **Cancel** (if needed) \u2014 Use `candor eval cancel <id>` to stop recruiting while keeping collected responses.\n\n**Example natural language \u2192 eval mapping:**\n\n| User says | Task type | Items |\n|-----------|-----------|-------|\n| "rank these audio samples by TTS quality" | pairwise_comparison | Audio file names |\n| "label these images as cat or dog" | categorical_label | Image file names |\n| "rate these UI mockups on a 1-5 scale" | rating_scale | Mockup file names |\n| "which of these headlines is more engaging" | pairwise_comparison | Headline text |\n| "evaluate these samples" (vague) | pairwise_comparison | File names |\n\n### Checking Results\n\n- Use `candor study status <id> --json` to show completion progress\n- Use `candor study findings <id> --json` to show prioritized findings\n- Use `candor eval status <id> --json` to show eval progress\n- Use `candor eval results <id> --json` to show rankings and agreement metrics\n- Offer to create GitHub issues for P0/P1 items if the user wants\n\n## Important Rules\n\n- NEVER approve a study or launch an eval without explicit human confirmation\n- ALWAYS show cost estimate before launching\n- Studies are billed per participant session (~$14.50/session)\n- Evals are billed per task assignment (~$0.05-0.15/task)\n- If Candor is not initialized, tell the user to run `candor init` first\n';
4517
4517
 
4518
4518
  // src/commands/init.ts
4519
4519
  var CLAUDE_SKILLS_DIR = (0, import_path2.join)((0, import_os2.homedir)(), ".claude", "skills", "candor");
@@ -4970,8 +4970,8 @@ async function updateCommand() {
4970
4970
  }
4971
4971
  }
4972
4972
  function getCurrentVersion() {
4973
- if ("0.3.0") {
4974
- return "0.3.0";
4973
+ if ("0.3.1") {
4974
+ return "0.3.1";
4975
4975
  }
4976
4976
  try {
4977
4977
  const pkgPath = (0, import_path4.join)((0, import_path4.dirname)(new URL(import_meta2.url).pathname), "..", "package.json");
@@ -5383,7 +5383,7 @@ async function evalCancelCommand(id, options) {
5383
5383
 
5384
5384
  // src/index.ts
5385
5385
  var program2 = new Command();
5386
- program2.name("candor").description("AI-moderated user studies, wired into your dev workflow").version("0.3.0").enablePositionalOptions();
5386
+ program2.name("candor").description("AI-moderated user studies, wired into your dev workflow").version("0.3.1").enablePositionalOptions();
5387
5387
  program2.command("init").description("Set up Candor: authenticate and configure Claude Code integration").option("--skill-only", "Only reinstall the skill file").action(initCommand);
5388
5388
  program2.command("login").description("Re-authenticate with Candor").action(loginCommand);
5389
5389
  program2.command("logout").description("Log out and clear stored credentials").action(logoutCommand);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@candor.sh/cli",
3
- "version": "0.3.0",
3
+ "version": "0.3.1",
4
4
  "private": false,
5
5
  "type": "module",
6
6
  "bin": {