ellf-cli 5.0.8__tar.gz → 5.0.14__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ellf_cli-5.0.8/ellf_cli.egg-info → ellf_cli-5.0.14}/PKG-INFO +1 -1
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/about.json +1 -1
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/auth.py +18 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/clusters.py +4 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/config.py +5 -1
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/general.py +27 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf.json +1 -1
- ellf_cli-5.0.14/ellf_cli/ellf_skills/skills/ellf-annotate.assistant/SKILL.md +170 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-annotate.coding/SKILL.md +8 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-handoff/SKILL.md +3 -1
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-monitor.assistant/SKILL.md +41 -23
- {ellf_cli-5.0.8/ellf_cli/ellf_skills/skills/ellf-monitor.coding → ellf_cli-5.0.14/ellf_cli/ellf_skills/skills/ellf-monitor.assistant}/references/annotation_metrics.md +7 -4
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-monitor.assistant/references/training_monitoring.md +1 -1
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-monitor.coding/SKILL.md +26 -2
- ellf_cli-5.0.14/ellf_cli/ellf_skills/skills/ellf-ops.assistant/SKILL.md +369 -0
- ellf_cli-5.0.14/ellf_cli/ellf_skills/skills/ellf-ops.coding/SKILL.md +161 -0
- ellf_cli-5.0.14/ellf_cli/ellf_skills/skills/ellf-ops.coding/references/data_infra_cli.md +156 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-patterns/references/pattern_strategies.md +3 -1
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-project.assistant/SKILL.md +42 -20
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-project.coding/SKILL.md +43 -21
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-support.assistant/SKILL.md +5 -4
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-support.coding/SKILL.md +13 -12
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-todo/SKILL.md +2 -2
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-train.assistant/SKILL.md +7 -7
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-train.assistant/references/workflow.md +21 -40
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-train.coding/SKILL.md +1 -1
- {ellf_cli-5.0.8 → ellf_cli-5.0.14/ellf_cli.egg-info}/PKG-INFO +1 -1
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli.egg-info/SOURCES.txt +1 -2
- ellf_cli-5.0.8/ellf_cli/ellf_skills/bin/write-current-session.py +0 -71
- ellf_cli-5.0.8/ellf_cli/ellf_skills/hooks/hooks.json +0 -37
- ellf_cli-5.0.8/ellf_cli/ellf_skills/skills/ellf-annotate.assistant/SKILL.md +0 -168
- ellf_cli-5.0.8/ellf_cli/ellf_skills/skills/ellf-ops.assistant/SKILL.md +0 -210
- ellf_cli-5.0.8/ellf_cli/ellf_skills/skills/ellf-ops.coding/SKILL.md +0 -321
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/LICENSE +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/MANIFEST.in +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/README.md +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/__init__.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/__main__.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/about.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/appdirs.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/cli.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/cloud/__init__.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/cloud/gcp.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/cluster_config.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/__init__.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/_cluster_select.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/_recipe_file.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/_recipe_subcommand.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/_state.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/actions.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/agents.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/assets.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/auth.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/datasets.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/files/__init__.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/files/cp.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/files/ls.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/files/rm.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/files/rsync.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/files/stats.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/import_export.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/infra/__init__.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/infra/_helpers.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/infra/deploy.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/infra/init_values.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/infra/provision.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/infra/register.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/infra/setup.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/infra/start.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/infra/terraform.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/infra/tls.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/jobs.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/packages.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/paths.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/plans.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/projects.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/publish_code.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/publish_data.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/recipes.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/secrets.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/support.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/tasks.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/commands/todos.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/config.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/.claude-plugin/plugin.json +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/.gitignore +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skill_variants.json +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-annotate.assistant/references/annotation_audit.md +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-annotate.assistant/references/builtin_ellf_annotation_recipes.md +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-annotate.coding/references/annotation_audit.md +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-annotate.coding/references/builtin_ellf_annotation_recipes.md +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-annotate.coding/references/builtin_prodigy_recipes.md +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-ask/SKILL.md +0 -0
- {ellf_cli-5.0.8/ellf_cli/ellf_skills/skills/ellf-monitor.assistant → ellf_cli-5.0.14/ellf_cli/ellf_skills/skills/ellf-monitor.coding}/references/annotation_metrics.md +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-monitor.coding/references/training_monitoring.md +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-monitor.coding/scripts/check_training.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-ops.coding/scripts/run_job.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-patterns/SKILL.md +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-prodigy/SKILL.md +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-prodigy/assets/templates/template_action_recipe.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-prodigy/assets/templates/template_agent_recipe.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-prodigy/assets/templates/template_blocks_ui.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-prodigy/assets/templates/template_correct.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-prodigy/assets/templates/template_custom_ui.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-prodigy/assets/templates/template_manual.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-prodigy/assets/templates/template_pages_ui.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-prodigy/assets/templates/template_routing.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-prodigy/assets/templates/template_task_recipe.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-prodigy/assets/templates/template_teach.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-prodigy/references/builtin_recipes.md +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-prodigy/references/ellf_recipe_sdk.md +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-prodigy/references/lint_recipe.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-prodigy/references/prodigy_recipe_api.md +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-prodigy/references/template_index.md +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-project.assistant/references/consulting_patterns.md +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-project.assistant/references/explosion_strategy.md +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-project.assistant/references/prodigy_llm_bot.md +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-project.coding/references/consulting_patterns.md +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-project.coding/references/explosion_strategy.md +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-project.coding/references/prodigy_llm_bot.md +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-train.assistant/references/diagnostics.md +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-train.assistant/references/evaluation_guide.md +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-train.assistant/references/model_selection.md +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-train.assistant/references/training_paradigms.md +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-train.coding/references/config_advanced.md +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-train.coding/references/config_architectures.md +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-train.coding/references/config_training.md +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-train.coding/references/diagnostics.md +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-train.coding/references/evaluation_guide.md +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-train.coding/references/experiment_patterns.md +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-train.coding/references/model_selection.md +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-train.coding/references/training_paradigms.md +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-train.coding/references/training_troubleshooting.md +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-train.coding/references/workflow.md +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-train.coding/scripts/ellf_logger.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/errors.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/helm.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/key_pair.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/main.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/messages.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/query.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/recipes_cookiecutter/cookiecutter.json +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/recipes_cookiecutter/{{cookiecutter.package_dir}}/.gitignore +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/recipes_cookiecutter/{{cookiecutter.package_dir}}/README.md.tmpl +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/recipes_cookiecutter/{{cookiecutter.package_dir}}/requirements-dev.in +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/recipes_cookiecutter/{{cookiecutter.package_dir}}/requirements.in +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/recipes_cookiecutter/{{cookiecutter.package_dir}}/setup.py.tmpl +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/recipes_cookiecutter/{{cookiecutter.package_dir}}/{{cookiecutter.package_name}}/__init__.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/recipes_cookiecutter/{{cookiecutter.package_dir}}/{{cookiecutter.package_name}}/about.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/recipes_cookiecutter/{{cookiecutter.package_dir}}/{{cookiecutter.package_name}}/recipes/__init__.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/recipes_cookiecutter/{{cookiecutter.package_dir}}/{{cookiecutter.package_name}}/recipes/example_task.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/testing/__init__.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ty.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ui.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/url.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/util.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli.egg-info/dependency_links.txt +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli.egg-info/entry_points.txt +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli.egg-info/not-zip-safe +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli.egg-info/requires.txt +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli.egg-info/top_level.txt +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/pyproject.toml +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/setup.cfg +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/setup.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/tests/test_appdirs.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/tests/test_auth.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/tests/test_config.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/tests/test_errors.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/tests/test_files_cp.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/tests/test_files_cp_helpers.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/tests/test_info.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/tests/test_invalid_secrets.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/tests/test_key_pair.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/tests/test_login.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/tests/test_main.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/tests/test_plans.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/tests/test_projects.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/tests/test_query.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/tests/test_recipe_file.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/tests/test_recipes.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/tests/test_state.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/tests/test_support.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/tests/test_ty.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/tests/test_ui.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/tests/test_ui_extras.py +0 -0
- {ellf_cli-5.0.8 → ellf_cli-5.0.14}/tests/test_util.py +0 -0
|
@@ -145,6 +145,9 @@ class AuthState(Protocol):
|
|
|
145
145
|
@property
|
|
146
146
|
def org_id(self) -> UUID: ...
|
|
147
147
|
|
|
148
|
+
@property
|
|
149
|
+
def user_id(self) -> UUID: ...
|
|
150
|
+
|
|
148
151
|
@property
|
|
149
152
|
def pam_host(self) -> str: ...
|
|
150
153
|
|
|
@@ -269,6 +272,21 @@ class AuthStateImpl:
|
|
|
269
272
|
self._org_id = orgs[0].id
|
|
270
273
|
return self._org_id
|
|
271
274
|
|
|
275
|
+
@property
|
|
276
|
+
def user_id(self) -> UUID:
|
|
277
|
+
"""Extract the current user's UUID from the PAM api token.
|
|
278
|
+
|
|
279
|
+
Decoded from the JWT's ``uid`` claim (set by pam.tokens.encode_pam_token).
|
|
280
|
+
Reading from the token avoids a /v1/user/read round-trip for the
|
|
281
|
+
common case where a caller just wants to associate state with the
|
|
282
|
+
current user — e.g. persisting last_cluster_id after `clusters use`.
|
|
283
|
+
"""
|
|
284
|
+
token = self.get_api_token()
|
|
285
|
+
uid = token.header.get("uid")
|
|
286
|
+
if uid is None:
|
|
287
|
+
raise EllfError(message="PAM token missing uid claim")
|
|
288
|
+
return UUID(uid)
|
|
289
|
+
|
|
272
290
|
@property
|
|
273
291
|
def pam_host(self) -> str:
|
|
274
292
|
return self.pam_url.netloc
|
|
@@ -33,6 +33,7 @@ from ..ui import print_info_table, print_mutation_result, print_table_with_selec
|
|
|
33
33
|
from ..util import URL
|
|
34
34
|
from ._cluster_select import select_cluster
|
|
35
35
|
from ._state import get_auth_state, get_root_cfg, get_saved_settings
|
|
36
|
+
from .general import persist_last_active_context
|
|
36
37
|
|
|
37
38
|
|
|
38
39
|
def uuid7() -> uuid.UUID:
|
|
@@ -156,6 +157,9 @@ def use(
|
|
|
156
157
|
settings.update("cluster_id", chosen.id)
|
|
157
158
|
settings.save(get_root_cfg().saved_settings_path)
|
|
158
159
|
auth.set_active_cluster(chosen.id, broker_url)
|
|
160
|
+
# Mirror to the user's PAM record so the web app — and other CLI
|
|
161
|
+
# sessions — see the same active cluster after an org switch.
|
|
162
|
+
persist_last_active_context(auth, last_cluster_id=chosen.id)
|
|
159
163
|
print_mutation_result(
|
|
160
164
|
{
|
|
161
165
|
"status": "ok",
|
|
@@ -16,7 +16,8 @@ from ..query import (
|
|
|
16
16
|
)
|
|
17
17
|
from ..ui import print_mutation_result
|
|
18
18
|
from ..util import URL
|
|
19
|
-
from ._state import get_root_cfg, get_saved_settings
|
|
19
|
+
from ._state import get_auth_state, get_root_cfg, get_saved_settings
|
|
20
|
+
from .general import persist_last_active_context
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
@cli.subcommand(
|
|
@@ -53,6 +54,9 @@ def project(name_or_id: Union[str, UUID], as_json: bool = False) -> UUID:
|
|
|
53
54
|
settings = get_saved_settings()
|
|
54
55
|
settings.update("project", project_id)
|
|
55
56
|
settings.save(root_cfg.saved_settings_path)
|
|
57
|
+
# Mirror to the user's PAM record so the web app — and other CLI
|
|
58
|
+
# sessions — see the same active project after an org switch.
|
|
59
|
+
persist_last_active_context(get_auth_state(), last_project_id=project_id)
|
|
56
60
|
print_mutation_result(
|
|
57
61
|
{"status": "ok", "project_id": str(project_id)},
|
|
58
62
|
Messages.T019.format(noun="project", name=project_id),
|
|
@@ -114,9 +114,36 @@ def _select_and_persist_cluster(
|
|
|
114
114
|
settings.update("cluster_id", chosen.id)
|
|
115
115
|
settings.save(get_root_cfg().saved_settings_path)
|
|
116
116
|
auth.set_active_cluster(chosen.id, broker_url)
|
|
117
|
+
persist_last_active_context(auth, last_cluster_id=chosen.id)
|
|
117
118
|
return chosen.id
|
|
118
119
|
|
|
119
120
|
|
|
121
|
+
def persist_last_active_context(
|
|
122
|
+
auth: AuthState,
|
|
123
|
+
*,
|
|
124
|
+
last_cluster_id: Optional[UUID] = None,
|
|
125
|
+
last_project_id: Optional[UUID] = None,
|
|
126
|
+
) -> None:
|
|
127
|
+
"""Mirror local cluster/project state into the user's PAM record.
|
|
128
|
+
|
|
129
|
+
The web app reads ``User.last_cluster_id`` / ``last_project_id`` to
|
|
130
|
+
restore context after org switches, and the CLI persists here so a
|
|
131
|
+
web session that follows a ``clusters use`` lands on the same
|
|
132
|
+
cluster.
|
|
133
|
+
"""
|
|
134
|
+
from ellf_pam_sdk.models import UserUpdating
|
|
135
|
+
|
|
136
|
+
if last_cluster_id is None and last_project_id is None:
|
|
137
|
+
return
|
|
138
|
+
auth.client.user.update(
|
|
139
|
+
UserUpdating(
|
|
140
|
+
id=auth.user_id,
|
|
141
|
+
last_cluster_id=last_cluster_id,
|
|
142
|
+
last_project_id=last_project_id,
|
|
143
|
+
)
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
|
|
120
147
|
@cli.command("info", field=Arg(help=Messages.select_field))
|
|
121
148
|
def info(field: Optional[Literal["config-dir", "code", "defaults"]] = None) -> Any:
|
|
122
149
|
"""Print information about the CLI"""
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: ellf-annotate
|
|
3
|
+
description: "Prepares annotation for launch in the Ellf cluster: audits readiness, selects the right recipe, and resolves arguments. Delegates to `/ellf-ops` to actually create the task (and optionally start it) once the audit passes. Use when the user is setting up annotation from scratch, choosing the right built-in recipe, verifying readiness, or planning an annotation agent — not for `run X` / `start Y` requests on a known recipe, which go directly to `/ellf-ops`. Use `/ellf-handoff` when a new custom recipe is needed (routes the implementation to the coding agent), and `/ellf-project` when broader project planning is required."
|
|
4
|
+
argument-hint: "[describe what you want to annotate]"
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Prepare Annotation In Ellf
|
|
8
|
+
|
|
9
|
+
Help the user get from an annotation-ready plan to a running annotation task.
|
|
10
|
+
|
|
11
|
+
$ARGUMENTS
|
|
12
|
+
|
|
13
|
+
## Your role
|
|
14
|
+
|
|
15
|
+
You are responsible for:
|
|
16
|
+
- checking that annotation is ready to launch (the audit)
|
|
17
|
+
- choosing the right built-in Ellf annotation and/or agent recipe
|
|
18
|
+
- deciding whether an existing custom recipe is sufficient
|
|
19
|
+
- deciding when a new custom recipe is required
|
|
20
|
+
- resolving the natural-scalar arguments the user can answer in chat
|
|
21
|
+
- delegating to `/ellf-ops` to actually create (and optionally start) the task
|
|
22
|
+
|
|
23
|
+
You do not call `*_create` tools yourself — that is `/ellf-ops`'s job. After
|
|
24
|
+
the audit and arg resolution, delegate. You also do not implement new recipes.
|
|
25
|
+
If the workflow requires a new custom recipe or custom interface, use
|
|
26
|
+
`/ellf-handoff` to route the implementation to the coding workflow. If broader
|
|
27
|
+
methodology or schema work is needed, use `/ellf-project`.
|
|
28
|
+
|
|
29
|
+
**Never narrate routing or argument inference to the user.** The user does
|
|
30
|
+
not need to know that you're delegating to `/ellf-ops`, what `auto_start`
|
|
31
|
+
value you inferred from their verb, or which cli-name you chose for which
|
|
32
|
+
field. Say only what the user can act on: the outcome, the next step, the
|
|
33
|
+
link. The audit summary is useful when something failed and the user needs
|
|
34
|
+
to fix it; otherwise skip it and let the create tool's artifact (card,
|
|
35
|
+
form link, or prerequisite link) do the talking.
|
|
36
|
+
|
|
37
|
+
## Required readiness audit
|
|
38
|
+
|
|
39
|
+
Before choosing a recipe or building a launch spec, read:
|
|
40
|
+
|
|
41
|
+
- `${CLAUDE_SKILL_DIR}/references/annotation_audit.md`
|
|
42
|
+
- `${CLAUDE_SKILL_DIR}/references/builtin_ellf_annotation_recipes.md`
|
|
43
|
+
|
|
44
|
+
The audit exists because launching with a poorly designed schema or a
|
|
45
|
+
mismatched recipe wastes annotation effort and produces training data the
|
|
46
|
+
model can't learn from. Use it to catch problems before you touch the
|
|
47
|
+
platform.
|
|
48
|
+
|
|
49
|
+
Do not launch until you have confirmed:
|
|
50
|
+
- the annotation objective is clear
|
|
51
|
+
- the schema or review target is stable enough
|
|
52
|
+
- the recipe choice actually matches the task
|
|
53
|
+
- the input data is ready
|
|
54
|
+
- the target dataset is clear
|
|
55
|
+
|
|
56
|
+
If the audit surfaces a problem:
|
|
57
|
+
- methodological issues (schema design, task decomposition) → route to `/ellf-project`
|
|
58
|
+
- recipe implementation needs (custom UI, routing logic) → route to `/ellf-handoff`
|
|
59
|
+
|
|
60
|
+
## Recipe selection
|
|
61
|
+
|
|
62
|
+
### Built-in task recipe first
|
|
63
|
+
|
|
64
|
+
Prefer a built-in Ellf task recipe whenever it fits cleanly. The built-in
|
|
65
|
+
recipes are documented in
|
|
66
|
+
`${CLAUDE_SKILL_DIR}/references/builtin_ellf_annotation_recipes.md`.
|
|
67
|
+
|
|
68
|
+
Call `mcp__pam__recipe_list` to confirm the recipe is available in the
|
|
69
|
+
current environment before committing to it.
|
|
70
|
+
|
|
71
|
+
### Existing custom recipe
|
|
72
|
+
|
|
73
|
+
If the user names an existing cluster recipe and it matches the workflow,
|
|
74
|
+
use it.
|
|
75
|
+
|
|
76
|
+
### New custom recipe required
|
|
77
|
+
|
|
78
|
+
If the audit shows the user needs a custom interface, routing logic, or
|
|
79
|
+
annotation flow that built-ins cannot express cleanly:
|
|
80
|
+
- do not force a bad built-in fit
|
|
81
|
+
- use `/ellf-handoff` to assign custom recipe implementation to the coding agent
|
|
82
|
+
- describe what the custom recipe must do
|
|
83
|
+
|
|
84
|
+
## Annotation agents
|
|
85
|
+
|
|
86
|
+
If the user wants automated annotation:
|
|
87
|
+
- first ensure the base task is methodologically sound
|
|
88
|
+
- prepare the task spec first
|
|
89
|
+
- then identify an annotation-capable `agent_recipe` and prepare its args
|
|
90
|
+
- both task and agent get created via `/ellf-ops` (separate `*_create` calls,
|
|
91
|
+
then `mcp__pam__task_assign_bot` to attach the agent to the task)
|
|
92
|
+
|
|
93
|
+
Do not treat the agent as a replacement for task setup. The task is the
|
|
94
|
+
base annotation workflow.
|
|
95
|
+
|
|
96
|
+
## Recipe arguments
|
|
97
|
+
|
|
98
|
+
Once the recipe is selected:
|
|
99
|
+
- call `mcp__pam__recipe_list` to find the recipe ID
|
|
100
|
+
- call `mcp__pam__recipe_schema` with the recipe ID
|
|
101
|
+
- treat the returned field spec as the authoritative source for: exact arg keys, types, required vs optional fields, union variants, and cli-name remappings
|
|
102
|
+
- fill args from context and the project plan where possible
|
|
103
|
+
- ask only for natural-scalar values the user can answer in a sentence (a name, a language, a label list); object-typed args belong to the form
|
|
104
|
+
- the create tool runs validation internally and decides what the user sees — do not narrate the schema response back to the user
|
|
105
|
+
|
|
106
|
+
The annotation interface itself (what annotators see) cannot be previewed
|
|
107
|
+
from the assistant. If the user needs to verify the annotation UI before
|
|
108
|
+
cluster launch, they should use `ellf-dev run <recipe> [args]` in their
|
|
109
|
+
local coding environment.
|
|
110
|
+
|
|
111
|
+
## Execute via /ellf-ops
|
|
112
|
+
|
|
113
|
+
After audit + recipe selection + arg resolution, delegate to `/ellf-ops` to
|
|
114
|
+
create the task (and optionally start it):
|
|
115
|
+
|
|
116
|
+
```text
|
|
117
|
+
mcp__pam__task_create(
|
|
118
|
+
recipe_id=<from recipe_list>,
|
|
119
|
+
args=<resolved scalar args>,
|
|
120
|
+
name=<optional; PAM auto-names from recipe + timestamp when omitted>,
|
|
121
|
+
auto_start=<true if the user said run/start/launch, false for create/save/set up>,
|
|
122
|
+
)
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
If an annotation agent was also planned, call `mcp__pam__agent_create(...)`
|
|
126
|
+
for the agent. After the agent is created and started, attach it to the
|
|
127
|
+
task with `mcp__pam__task_assign_bot(task_id=..., agent_id=...)`.
|
|
128
|
+
|
|
129
|
+
The create tool's internal validation routes every case to a useful
|
|
130
|
+
user-facing artifact:
|
|
131
|
+
- confirmation card (three buttons: `Create and start` / `Create only` / `Cancel`) on clean validation
|
|
132
|
+
- form-handoff link on missing scalars or complex JSON args
|
|
133
|
+
- missing-prerequisite link on a referenced asset / dataset / secret that doesn't exist on the cluster
|
|
134
|
+
|
|
135
|
+
Do not stop early to flag schema issues, preview validation problems, or
|
|
136
|
+
direct the user elsewhere. The create tool produces a more useful artifact
|
|
137
|
+
than your commentary. See `/ellf-ops` for the full create workflow and the
|
|
138
|
+
forbidden patterns ("this won't validate, please fix", "the schema requires
|
|
139
|
+
X, Y, Z", any pre-emptive validation reasoning).
|
|
140
|
+
|
|
141
|
+
## After creation
|
|
142
|
+
|
|
143
|
+
Once the user confirms the create card (or the form-handoff link is followed
|
|
144
|
+
through to completion), help verify and run any follow-ups:
|
|
145
|
+
|
|
146
|
+
- if the task wasn't auto-started (user clicked `Create only`): use `/ellf-ops`
|
|
147
|
+
start workflow when the user is ready
|
|
148
|
+
- check cluster status with `mcp__cluster__job_status(id="<id>")` to confirm the
|
|
149
|
+
task is running or healthy
|
|
150
|
+
- provide the task link and tell the user where to open it in the app
|
|
151
|
+
- if an agent was added, confirm it is assigned with `mcp__pam__task_bots_read`
|
|
152
|
+
|
|
153
|
+
If startup or assignment fails:
|
|
154
|
+
- inspect the cluster error or logs with `mcp__cluster__job_logs(id="<id>")` /
|
|
155
|
+
`mcp__cluster__job_errors(id="<id>")`
|
|
156
|
+
- if this is an operational problem, use `/ellf-monitor` (or consult `/ellf-ops`)
|
|
157
|
+
- if this is a recipe-capability problem, use `/ellf-handoff`
|
|
158
|
+
|
|
159
|
+
When you finish:
|
|
160
|
+
- state whether the setup passed the readiness audit
|
|
161
|
+
- state which recipe path you selected
|
|
162
|
+
- summarize the launch outcome (created / created and started / handed off to form / blocked on missing prerequisite)
|
|
163
|
+
- if not launched, explain exactly what is missing and where you are routing the user next
|
|
164
|
+
|
|
165
|
+
## Reference files
|
|
166
|
+
|
|
167
|
+
| File | What it covers | When to read |
|
|
168
|
+
|------|---------------|--------------|
|
|
169
|
+
| `${CLAUDE_SKILL_DIR}/references/annotation_audit.md` | Readiness checklist: objective, schema, recipe fit, data, dataset | Before every launch |
|
|
170
|
+
| `${CLAUDE_SKILL_DIR}/references/builtin_ellf_annotation_recipes.md` | Built-in Ellf task and agent recipes with supported workflows | Recipe selection |
|
{ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-annotate.coding/SKILL.md
RENAMED
|
@@ -183,3 +183,11 @@ When you finish:
|
|
|
183
183
|
- state which runtime target will be used
|
|
184
184
|
- summarize the launch spec clearly
|
|
185
185
|
- if not launching, explain exactly what is missing and where you are routing the user next
|
|
186
|
+
|
|
187
|
+
## Reference files
|
|
188
|
+
|
|
189
|
+
| File | What it covers | When to read |
|
|
190
|
+
|------|---------------|--------------|
|
|
191
|
+
| `${CLAUDE_SKILL_DIR}/references/annotation_audit.md` | Readiness checklist: objective, schema, recipe fit, data, dataset | Before every launch |
|
|
192
|
+
| `${CLAUDE_SKILL_DIR}/references/builtin_ellf_annotation_recipes.md` | Built-in Ellf task and agent recipes with supported workflows | Recipe selection for cluster |
|
|
193
|
+
| `${CLAUDE_SKILL_DIR}/references/builtin_prodigy_recipes.md` | Built-in standalone Prodigy recipes | Recipe selection for local |
|
|
@@ -21,6 +21,7 @@ the **AskUserQuestion** tool to ask — do NOT ask in plain text.
|
|
|
21
21
|
|---|---|
|
|
22
22
|
| **description** | Specific and actionable. Not "implement NER" but "Create a custom ner.correct recipe with Ctrl+Enter keybinding for accept_best." |
|
|
23
23
|
| **context_summary** | 2-3 paragraphs condensing the conversation: what was discussed, decisions made, constraints, label schemes, data formats. |
|
|
24
|
+
| **plan_docs** | List of plan names the coding agent should read for project context. Use `project_plan_list` to see what exists, then pass only the plans relevant to this handoff (e.g. the overview plan plus the component plan the work targets). Leave empty if no plans exist yet. |
|
|
24
25
|
|
|
25
26
|
## Step 2: Create the request
|
|
26
27
|
|
|
@@ -29,7 +30,8 @@ Call the `todo_create` PAM tool — do NOT use Bash or ellf:
|
|
|
29
30
|
```
|
|
30
31
|
todo_create(
|
|
31
32
|
description="<description>",
|
|
32
|
-
context_summary="<context_summary>"
|
|
33
|
+
context_summary="<context_summary>",
|
|
34
|
+
plan_docs=["project_plan", ...]
|
|
33
35
|
)
|
|
34
36
|
```
|
|
35
37
|
|
{ellf_cli-5.0.8 → ellf_cli-5.0.14}/ellf_cli/ellf_skills/skills/ellf-monitor.assistant/SKILL.md
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: ellf-monitor
|
|
3
|
-
description: "
|
|
3
|
+
description: "Monitors cluster jobs, annotation activity, training progress, and cluster health — produces structured summaries instead of raw log dumps. Includes alert classification (overfitting, plateau, NaN loss, spikes), annotation metrics, and diagnostic routing. Use proactively after launching any job, not just when the user asks. Also trigger on status checks, log inspection, 'how's the task doing', 'what failed', or training metric questions."
|
|
4
4
|
argument-hint: "[job name, job type, or 'cluster']"
|
|
5
5
|
---
|
|
6
6
|
|
|
@@ -29,8 +29,8 @@ If an action is needed:
|
|
|
29
29
|
## Tool surface
|
|
30
30
|
|
|
31
31
|
Use:
|
|
32
|
-
- `
|
|
33
|
-
- PAM read/list tools when you need persisted object details such as task, action, or agent identity
|
|
32
|
+
- The named cluster tools — `job_status`, `job_logs`, `job_errors`, `cluster_status`, `nodes_list`, `worker_types_list`, `dataset_example_count`, `dataset_session_counts` — for runtime state. No free-form proxy; each tool has a typed schema.
|
|
33
|
+
- PAM read/list tools when you need persisted object details such as task, action, or agent identity.
|
|
34
34
|
|
|
35
35
|
Do not guess cluster state. Always check.
|
|
36
36
|
|
|
@@ -44,7 +44,7 @@ Before monitoring, determine what to monitor.
|
|
|
44
44
|
|
|
45
45
|
Use:
|
|
46
46
|
```text
|
|
47
|
-
|
|
47
|
+
mcp__cluster__job_status()
|
|
48
48
|
```
|
|
49
49
|
|
|
50
50
|
If there are multiple plausible jobs, use `AskUserQuestion` to let the user choose.
|
|
@@ -56,11 +56,11 @@ If there are multiple plausible jobs, use `AskUserQuestion` to let the user choo
|
|
|
56
56
|
Read:
|
|
57
57
|
- `${CLAUDE_SKILL_DIR}/references/training_monitoring.md`
|
|
58
58
|
- `${CLAUDE_SKILL_DIR}/../ellf-train/references/training_troubleshooting.md`
|
|
59
|
-
Use
|
|
59
|
+
Use cluster calls such as:
|
|
60
60
|
```text
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
61
|
+
mcp__cluster__job_status(id="<id>")
|
|
62
|
+
mcp__cluster__job_logs(id="<id>", tail_lines=100)
|
|
63
|
+
mcp__cluster__job_errors(id="<id>")
|
|
64
64
|
```
|
|
65
65
|
|
|
66
66
|
Your job is to:
|
|
@@ -75,19 +75,29 @@ If follow-up action is needed, route to `/ellf-ops` or `/ellf-handoff`.
|
|
|
75
75
|
|
|
76
76
|
### Annotation tasks
|
|
77
77
|
|
|
78
|
-
Use
|
|
78
|
+
Use cluster calls for job status and per-dataset counts; use PAM for dataset
|
|
79
|
+
discovery:
|
|
79
80
|
```text
|
|
80
|
-
|
|
81
|
-
|
|
81
|
+
mcp__cluster__job_status(id="<id>")
|
|
82
|
+
mcp__pam__dataset_list(cluster_id="<id>") # discover datasets (org-wide, registered with PAM)
|
|
83
|
+
mcp__cluster__dataset_example_count(name="<name>") # total examples in the dataset (per-cluster, via broker)
|
|
84
|
+
mcp__cluster__dataset_session_counts(name="<name>") # per-annotator breakdown (keys are session_ids; null = bulk import)
|
|
82
85
|
```
|
|
83
86
|
|
|
87
|
+
`dataset_list` is a PAM read (org-wide registry), while the count tools talk to
|
|
88
|
+
the broker because example counts live in the per-cluster Prodigy database.
|
|
89
|
+
All three are read-only and do not require user confirmation. They do not
|
|
90
|
+
read annotation contents — only counts and metadata.
|
|
91
|
+
|
|
84
92
|
Read when needed:
|
|
85
93
|
- `${CLAUDE_SKILL_DIR}/references/annotation_metrics.md`
|
|
86
94
|
|
|
87
95
|
Report:
|
|
88
96
|
- task state
|
|
89
97
|
- whether the task appears reachable and healthy
|
|
90
|
-
- annotation count
|
|
98
|
+
- annotation count (from `dataset_example_count`)
|
|
99
|
+
- per-annotator activity (from `dataset_session_counts`) when the user asks
|
|
100
|
+
who annotated what or about active annotators
|
|
91
101
|
- dataset growth
|
|
92
102
|
- whether agent assignment appears to be producing data if applicable
|
|
93
103
|
|
|
@@ -95,11 +105,11 @@ If useful, combine task status with task detail from PAM reads to confirm the da
|
|
|
95
105
|
|
|
96
106
|
### Agents
|
|
97
107
|
|
|
98
|
-
Use
|
|
108
|
+
Use cluster calls such as:
|
|
99
109
|
```text
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
110
|
+
mcp__cluster__job_status(id="<id>")
|
|
111
|
+
mcp__cluster__job_logs(id="<id>", tail_lines=50)
|
|
112
|
+
mcp__cluster__job_errors(id="<id>")
|
|
103
113
|
```
|
|
104
114
|
|
|
105
115
|
Report:
|
|
@@ -114,9 +124,9 @@ If the agent is failing repeatedly, recommend `/ellf-ops` for stop/restart or `/
|
|
|
114
124
|
|
|
115
125
|
For non-training actions:
|
|
116
126
|
```text
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
127
|
+
mcp__cluster__job_status(id="<id>")
|
|
128
|
+
mcp__cluster__job_logs(id="<id>", tail_lines=50)
|
|
129
|
+
mcp__cluster__job_errors(id="<id>")
|
|
120
130
|
```
|
|
121
131
|
|
|
122
132
|
Report:
|
|
@@ -129,9 +139,9 @@ Report:
|
|
|
129
139
|
|
|
130
140
|
When the user asks about the cluster itself, use:
|
|
131
141
|
```text
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
142
|
+
mcp__cluster__cluster_status()
|
|
143
|
+
mcp__cluster__nodes_list()
|
|
144
|
+
mcp__cluster__worker_types_list()
|
|
135
145
|
```
|
|
136
146
|
|
|
137
147
|
Report:
|
|
@@ -144,7 +154,7 @@ Report:
|
|
|
144
154
|
|
|
145
155
|
Use compact summaries and tables.
|
|
146
156
|
|
|
147
|
-
Never dump raw JSON or raw
|
|
157
|
+
Never dump raw JSON or raw cluster responses.
|
|
148
158
|
|
|
149
159
|
When presenting logs:
|
|
150
160
|
- summarize the important lines
|
|
@@ -173,3 +183,11 @@ When you finish, state:
|
|
|
173
183
|
- the most important evidence
|
|
174
184
|
- whether intervention is needed
|
|
175
185
|
- the next action, if any
|
|
186
|
+
|
|
187
|
+
## Reference files
|
|
188
|
+
|
|
189
|
+
| File | What it covers | When to read |
|
|
190
|
+
|------|---------------|--------------|
|
|
191
|
+
| `${CLAUDE_SKILL_DIR}/references/training_monitoring.md` | Training log interpretation, alert classification, metric extraction | Training actions |
|
|
192
|
+
| `${CLAUDE_SKILL_DIR}/references/annotation_metrics.md` | Annotation progress signals, dataset growth, annotator activity | Annotation tasks |
|
|
193
|
+
| `${CLAUDE_SKILL_DIR}/../ellf-train.assistant/references/diagnostics.md` | Six problem classes with detection signals and fix guidance | Diagnosing training issues |
|
|
@@ -4,10 +4,13 @@ Use this reference to interpret annotation activity and quality signals.
|
|
|
4
4
|
|
|
5
5
|
## What to watch
|
|
6
6
|
|
|
7
|
-
- Total annotation count
|
|
8
|
-
- Dataset growth over time
|
|
9
|
-
- Number of active annotators or agents
|
|
10
|
-
-
|
|
7
|
+
- Total annotation count — from `mcp__cluster__dataset_example_count`
|
|
8
|
+
- Dataset growth over time — compare counts across checks
|
|
9
|
+
- Number of active annotators or agents — from `mcp__cluster__dataset_session_counts`
|
|
10
|
+
(each non-null key is a session_id; `null` means examples written directly via
|
|
11
|
+
`db-in` rather than through an annotation session)
|
|
12
|
+
- Whether a running task is producing new examples — combine job status with
|
|
13
|
+
example count over time
|
|
11
14
|
|
|
12
15
|
## Warning signals
|
|
13
16
|
|
|
@@ -4,7 +4,7 @@ How to monitor training jobs from the web assistant and interpret their signals.
|
|
|
4
4
|
|
|
5
5
|
## What the web assistant can do
|
|
6
6
|
|
|
7
|
-
Use
|
|
7
|
+
Use cluster-backed status, logs, and errors to determine:
|
|
8
8
|
- whether the training action is running
|
|
9
9
|
- whether it completed or failed
|
|
10
10
|
- whether logs show loss, score, or alert-like signals
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: ellf-monitor
|
|
3
|
-
description: "
|
|
3
|
+
description: "Monitors Ellf jobs, local Prodigy servers, training progress, and cluster health — keeps raw logs out of context and produces structured summaries. Includes a structured training check script, alert classification (overfitting, plateau, NaN loss, spikes), annotation metrics, and diagnostic routing. Use proactively after launching any job, not just when the user asks. Also trigger on status checks, log inspection, 'how's the task doing', 'what failed', or training metric questions."
|
|
4
4
|
argument-hint: "[job name, job type, output dir, or 'cluster']"
|
|
5
5
|
---
|
|
6
6
|
|
|
@@ -10,6 +10,21 @@ You are the monitoring and diagnosis skill for the coding environment.
|
|
|
10
10
|
|
|
11
11
|
$ARGUMENTS
|
|
12
12
|
|
|
13
|
+
## Contents
|
|
14
|
+
- Scope — what this skill does and doesn't do
|
|
15
|
+
- Monitoring surfaces — cluster jobs, local runs, Prodigy servers, cluster health
|
|
16
|
+
- Job discovery — finding the right job to monitor
|
|
17
|
+
- Training monitoring — structured check script, alerts, log interpretation
|
|
18
|
+
- Annotation tasks — task state, dataset growth
|
|
19
|
+
- Agents — state, errors, assignment health
|
|
20
|
+
- Generic actions — state, duration, failures
|
|
21
|
+
- Standalone local Prodigy — process and URL checks
|
|
22
|
+
- Cluster health — connectivity, nodes, worker classes
|
|
23
|
+
- Presenting results — summaries, not raw dumps
|
|
24
|
+
- Continuous monitoring — `/loop` pattern
|
|
25
|
+
- When to escalate — routing to ops, train, project
|
|
26
|
+
- Reference files
|
|
27
|
+
|
|
13
28
|
## Scope
|
|
14
29
|
|
|
15
30
|
You monitor and diagnose.
|
|
@@ -152,7 +167,7 @@ Use the captured output or process info from the launch context when available.
|
|
|
152
167
|
# Connectivity and service health
|
|
153
168
|
ellf clusters check
|
|
154
169
|
|
|
155
|
-
# Deeper
|
|
170
|
+
# Deeper cluster-side checks (K8s, NFS, database)
|
|
156
171
|
ellf clusters check --deep
|
|
157
172
|
|
|
158
173
|
# Node capacity and utilization (cpu, memory, gpu, pod count)
|
|
@@ -206,3 +221,12 @@ When you finish, state:
|
|
|
206
221
|
- the key evidence
|
|
207
222
|
- whether intervention is needed
|
|
208
223
|
- the next operational or implementation step
|
|
224
|
+
|
|
225
|
+
## Reference files
|
|
226
|
+
|
|
227
|
+
| File | What it covers | When to read |
|
|
228
|
+
|------|---------------|--------------|
|
|
229
|
+
| `${CLAUDE_SKILL_DIR}/references/training_monitoring.md` | Training log interpretation, alert classification, metric extraction | Training actions |
|
|
230
|
+
| `${CLAUDE_SKILL_DIR}/references/annotation_metrics.md` | Annotation progress signals, dataset growth, annotator activity | Annotation tasks |
|
|
231
|
+
| `${CLAUDE_SKILL_DIR}/../ellf-train.coding/references/diagnostics.md` | Six problem classes with detection signals and fix guidance | Diagnosing training issues |
|
|
232
|
+
| `${CLAUDE_SKILL_DIR}/../ellf-train.coding/references/training_troubleshooting.md` | Error taxonomy with concrete fixes | Concrete setup or execution errors |
|