data-refinery-cli 0.3.2__tar.gz → 0.3.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/CHANGELOG.md +30 -0
- data_refinery_cli-0.3.3/CLAUDE.md +247 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/PKG-INFO +5 -5
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/README.md +2 -2
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/data_refinery/cli/__init__.py +2 -2
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/data_refinery/cli/_commands/cli.py +5 -5
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/data_refinery/cli/_commands/doctor.py +2 -2
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/data_refinery/cli/_commands/explain.py +2 -2
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/data_refinery/cli/_commands/learn.py +17 -16
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/data_refinery/cli/_commands/overview.py +2 -2
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/data_refinery/cli/_commands/whoami.py +1 -1
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/data_refinery/cli/_errors.py +2 -2
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/data_refinery/explain/__init__.py +1 -1
- data_refinery_cli-0.3.3/data_refinery/explain/catalog.py +136 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/pyproject.toml +3 -3
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/tests/test_cli.py +13 -5
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/tests/test_cli_introspection.py +5 -5
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/uv.lock +1 -1
- data_refinery_cli-0.3.2/CLAUDE.md +0 -28
- data_refinery_cli-0.3.2/data_refinery/explain/catalog.py +0 -129
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/agent-config/SKILL.md +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/agent-config/data/backend-fingerprints.yaml +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/agent-config/scripts/show.sh +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/ask-colleague/SKILL.md +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/ask-colleague/prompts/explore.md +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/ask-colleague/prompts/review.md +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/ask-colleague/prompts/write.md +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/ask-colleague/scripts/ask-colleague.sh +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/assign-to-workforce/SKILL.md +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/assign-to-workforce/scripts/assign-to-workforce.sh +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/cicd/SKILL.md +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/cicd/scripts/_resolve-nick.sh +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/cicd/scripts/portability-lint.sh +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/cicd/scripts/pr-reply.sh +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/cicd/scripts/pr-status.sh +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/cicd/scripts/workflow.sh +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/communicate/SKILL.md +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/communicate/scripts/fetch-issues.sh +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/communicate/scripts/mesh-message.sh +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/communicate/scripts/post-comment.sh +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/communicate/scripts/post-issue.sh +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/communicate/scripts/templates/skill-new-brief.md +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/communicate/scripts/templates/skill-update-brief.md +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/doc-test-alignment/SKILL.md +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/doc-test-alignment/scripts/check.sh +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/pypi-maintainer/SKILL.md +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/pypi-maintainer/scripts/switch-source.sh +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/run-tests/SKILL.md +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/run-tests/scripts/test.sh +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/sonarclaude/SKILL.md +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/sonarclaude/scripts/sonar.sh +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/spec-to-plan/SKILL.md +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/spec-to-plan/scripts/spec-to-plan.sh +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/think/SKILL.md +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/think/scripts/think.sh +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/version-bump/SKILL.md +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/version-bump/scripts/bump.py +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills.local.yaml.example +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.flake8 +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.github/workflows/publish.yml +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.github/workflows/tests.yml +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.gitignore +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.markdownlint-cli2.yaml +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/AGENTS.colleague.md +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/LICENSE +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/culture.yaml +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/data_refinery/__init__.py +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/data_refinery/__main__.py +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/data_refinery/cli/_commands/__init__.py +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/data_refinery/cli/_output.py +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/docs/skill-sources.md +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/sonar-project.properties +0 -0
- {data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/tests/__init__.py +0 -0
|
@@ -5,6 +5,36 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
Format follows [Keep a Changelog](https://keepachangelog.com/). This project
|
|
6
6
|
adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [0.3.3] - 2026-06-20
|
|
9
|
+
|
|
10
|
+
### Changed
|
|
11
|
+
|
|
12
|
+
- `/init`: rewrote the seed `CLAUDE.md` placeholder into a full Claude Code
|
|
13
|
+
runtime prompt — the three-name split (`data-refinery` command vs
|
|
14
|
+
`data-refinery-cli` dist/nick vs `data_refinery` package), the CLI contracts
|
|
15
|
+
(`CliError`/exit-codes, stdout/stderr split, the explain catalog, the `doctor`
|
|
16
|
+
invariants), the `colleague` backend + dual prompt files, version-bump-every-PR,
|
|
17
|
+
the CI gates, the skills kit, and the issue #1 storage/data-quality domain
|
|
18
|
+
roadmap.
|
|
19
|
+
- Reconciled the CLI command-surface text from `data-refinery-cli` to the actual
|
|
20
|
+
binary name `data-refinery` (argparse `prog`/description, `learn`, the `explain`
|
|
21
|
+
catalog bodies + headings, `overview`/`cli` subjects, `doctor` output, and the
|
|
22
|
+
README quickstart). `data-refinery-cli` is kept as `whoami`'s nick and a
|
|
23
|
+
back-compat `explain` alias.
|
|
24
|
+
- CLI self-description (`learn`/`explain`/`overview`) no longer calls the agent
|
|
25
|
+
"a clonable template" — it names the data-quality domain and notes the domain
|
|
26
|
+
verbs are not built yet.
|
|
27
|
+
|
|
28
|
+
### Fixed
|
|
29
|
+
|
|
30
|
+
- Agent-first rubric (`teken cli doctor . --strict`) was red on `explain_self`:
|
|
31
|
+
it runs `explain data-refinery` (the `[project.scripts]` command) but the
|
|
32
|
+
catalog only keyed `("data-refinery-cli",)`. Added the `("data-refinery",)` root
|
|
33
|
+
entry (alias kept) — rubric back to 26/26.
|
|
34
|
+
- `pyproject.toml` license `MIT` → `Apache-2.0` (+ matching classifier) to match
|
|
35
|
+
the `LICENSE` file and README (org-wide scaffold bug inherited from the
|
|
36
|
+
template; a correction, not a relicense).
|
|
37
|
+
|
|
8
38
|
## [0.3.2] - 2026-06-18
|
|
9
39
|
|
|
10
40
|
### Added
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
# CLAUDE.md
|
|
2
|
+
|
|
3
|
+
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
|
4
|
+
|
|
5
|
+
## What this is
|
|
6
|
+
|
|
7
|
+
**data-refinery-cli** is an AgentCulture mesh agent: a CLI for **data quality in
|
|
8
|
+
storage and retrieval** — validating, deduplicating, and checking the integrity
|
|
9
|
+
and freshness of data as it is stored and fetched. It is being split out of
|
|
10
|
+
**eidetic-cli** so eidetic keeps the agent-memory layer; it is a sibling to
|
|
11
|
+
**daria** (the Data Refinery Intelligent Agent).
|
|
12
|
+
|
|
13
|
+
**Current state — read this first.** The data-quality/storage domain is **not
|
|
14
|
+
built yet**. Runtime `dependencies = []`; the code on disk today is the inherited
|
|
15
|
+
*agent-first introspection scaffold* (`whoami` / `learn` / `explain` / `overview`
|
|
16
|
+
/ `doctor` + a `cli` noun), cloned from `culture-agent-template` and cited from
|
|
17
|
+
[teken](https://github.com/agentculture/teken)'s `python-cli` reference. Its
|
|
18
|
+
self-description (`learn`, `explain`, `overview`) now names the data-quality
|
|
19
|
+
domain honestly — "the data-quality verbs are not built yet" — rather than the
|
|
20
|
+
old "clonable template" scaffold framing. The repo's true purpose is the
|
|
21
|
+
data-quality agent above and the build order in **issue #1** (see "Domain
|
|
22
|
+
roadmap").
|
|
23
|
+
|
|
24
|
+
## Names: there are three, and they differ on purpose
|
|
25
|
+
|
|
26
|
+
This trips up every change to the CLI surface. Keep them straight:
|
|
27
|
+
|
|
28
|
+
| Name | Value | Where it lives |
|
|
29
|
+
|------|-------|----------------|
|
|
30
|
+
| **CLI command** (the binary) | `data-refinery` | `[project.scripts]` in `pyproject.toml` — this is what you invoke: `uv run data-refinery whoami` |
|
|
31
|
+
| **PyPI dist + mesh nick** | `data-refinery-cli` | `pyproject.toml` `name`, `culture.yaml` `suffix`, the Sonar key `agentculture_data-refinery-cli`, `__version__` lookup, `_ISSUES_URL` |
|
|
32
|
+
| **Python package / import** | `data_refinery` | the `data_refinery/` dir, `import data_refinery`, `sonar.sources` |
|
|
33
|
+
|
|
34
|
+
`data-refinery-cli` is **not** an executable — `uv run data-refinery-cli …`
|
|
35
|
+
fails; the binary is `data-refinery`. The CLI's command-surface text (help,
|
|
36
|
+
`learn`, the `explain` catalog, `overview` subjects) and the README quickstart
|
|
37
|
+
all use `data-refinery`. The dist/nick name `data-refinery-cli` is kept as a
|
|
38
|
+
back-compat **alias** in the explain catalog (`explain data-refinery-cli` still
|
|
39
|
+
resolves) and as `whoami`'s nick. When you add a verb, keep this split: command
|
|
40
|
+
text → `data-refinery`, nick/dist/URL/`_pkg_version`/`_FALLBACK_NICK` →
|
|
41
|
+
`data-refinery-cli`.
|
|
42
|
+
|
|
43
|
+
## Common commands
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
uv sync # create .venv, install runtime + dev deps
|
|
47
|
+
uv run pytest -n auto # full test suite (xdist parallel)
|
|
48
|
+
uv run pytest tests/test_cli.py::test_whoami_json # a single test
|
|
49
|
+
uv run pytest -k explain # tests matching a name
|
|
50
|
+
uv run data-refinery whoami # run the CLI (note: data-refinery, not -cli)
|
|
51
|
+
uv run data-refinery learn --json # every verb supports --json
|
|
52
|
+
python -m data_refinery whoami # equivalent module entry point
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Lint / quality gates (each is its own CI job — run all before a PR):
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
uv run black --check data_refinery tests
|
|
59
|
+
uv run isort --check-only data_refinery tests
|
|
60
|
+
uv run flake8 data_refinery tests
|
|
61
|
+
uv run bandit -c pyproject.toml -r data_refinery
|
|
62
|
+
markdownlint-cli2 "**/*.md" "#node_modules" "#.local" "#.claude/skills" "#.teken"
|
|
63
|
+
uv run teken cli doctor . --strict # the agent-first rubric gate (see below)
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
`black` / `isort` use line-length 100 (`pyproject.toml`); `flake8` matches it via
|
|
67
|
+
`.flake8`. Run `black`/`isort` without `--check` to auto-fix. For bulk markdown
|
|
68
|
+
fixes use the `lint-fix` agent.
|
|
69
|
+
|
|
70
|
+
## Architecture
|
|
71
|
+
|
|
72
|
+
The whole CLI is built around one contract: **an agent reading the output can
|
|
73
|
+
rely on it.** Structured errors, a strict stream split, `--json` everywhere, and
|
|
74
|
+
documented exit codes. The pieces that enforce this span several files:
|
|
75
|
+
|
|
76
|
+
- **`data_refinery/cli/__init__.py`** — `main(argv) -> int` is the entry point
|
|
77
|
+
(`main(argv: list[str] | None = None)`, the contract teken checks). It builds an
|
|
78
|
+
argparse tree, dispatches, and translates every failure to an exit code.
|
|
79
|
+
- `_CliArgumentParser` overrides `.error()` so **argparse-level** failures
|
|
80
|
+
(unknown verb, bad flag) route through the same `error:` / `hint:` structured
|
|
81
|
+
format and exit 1 — not argparse's default `stderr` + exit 2. `parser_class`
|
|
82
|
+
is propagated to every subparser so nested parse errors behave the same.
|
|
83
|
+
Because parse errors happen *before* `args.json` exists, `main()` pre-scans
|
|
84
|
+
raw argv for `--json` and sets the class-level `_json_hint`.
|
|
85
|
+
- `_dispatch()` calls the handler; catches `CliError` → `emit_error`; wraps
|
|
86
|
+
**any other exception** into a `CliError` so **no Python traceback ever
|
|
87
|
+
leaks** to stderr (a hard rubric requirement).
|
|
88
|
+
- **`_errors.py`** — `CliError{code, message, remediation}` and the exit-code
|
|
89
|
+
policy: `0` success, `1` user-input error, `2` environment/setup error, `3+`
|
|
90
|
+
reserved. Every failure path raises `CliError`.
|
|
91
|
+
- **`_output.py`** — the strict stream split: `emit_result` → stdout,
|
|
92
|
+
`emit_error` / `emit_diagnostic` → stderr, **never mixed**. In JSON mode each
|
|
93
|
+
goes to its own stream as one JSON line. The `hint:` prefix on errors is
|
|
94
|
+
load-bearing (agents and the rubric grep for it).
|
|
95
|
+
- **Command modules** in `cli/_commands/` each expose `register(sub)` and a
|
|
96
|
+
handler returning `int | None`. To add a verb/noun: write the module, then call
|
|
97
|
+
its `register()` in `_build_parser()`. Nouns with action-verbs must also expose
|
|
98
|
+
an `overview` (rubric `overview_cli_noun_exists`) — the `cli` noun exists purely
|
|
99
|
+
to model that pattern (it has no action-verbs yet, only `cli overview`).
|
|
100
|
+
- **`data_refinery/explain/`** — `catalog.py` holds verbatim markdown keyed by
|
|
101
|
+
**command-path tuples** (`("whoami",)`, `("cli","overview")`); `resolve()`
|
|
102
|
+
raises `CliError` on an unknown path. Every registered noun/verb needs an
|
|
103
|
+
entry, and the root must be keyed under the **command name** `("data-refinery",)`
|
|
104
|
+
(the rubric's `explain_self` runs `explain data-refinery`) — with
|
|
105
|
+
`("data-refinery-cli",)` kept as a back-compat alias.
|
|
106
|
+
- **Identity without a YAML dependency.** `whoami.py` and `doctor.py` parse
|
|
107
|
+
`culture.yaml` by hand (line-scanning, not PyYAML) to preserve the deps-empty
|
|
108
|
+
invariant. `find_culture_yaml()` walks up from `__file__` to find *this agent's
|
|
109
|
+
own* `culture.yaml` (not whatever is in the caller's CWD); in a wheel install
|
|
110
|
+
none ships, so identity falls back to literal defaults.
|
|
111
|
+
- **`doctor`** mirrors the `steward doctor` invariants for a mesh agent:
|
|
112
|
+
*prompt-file-present* + *backend-consistency* (`backend → prompt file`:
|
|
113
|
+
`claude→CLAUDE.md`, `colleague→AGENTS.colleague.md`, `acp→AGENTS.md`,
|
|
114
|
+
`gemini→GEMINI.md`) and a *skills-present* check. It returns the rubric-shaped
|
|
115
|
+
`{healthy, checks:[{id,passed,severity,message,remediation}]}`.
|
|
116
|
+
|
|
117
|
+
### The agent-first rubric (`teken cli doctor . --strict`)
|
|
118
|
+
|
|
119
|
+
This is the **lint job's gate** and the design spec for the whole CLI. It checks
|
|
120
|
+
seven bundles — structure, learnability, json, errors, explain, overview, doctor
|
|
121
|
+
— against the *actual running CLI* (it invokes `data-refinery <verb>` and asserts
|
|
122
|
+
on output/exit codes). When you change the CLI surface, this is the fast check:
|
|
123
|
+
keep `learn` ≥200 chars with all markers, keep errors traceback-free with hints,
|
|
124
|
+
keep `explain <command-name>` resolving, keep every noun's `overview`.
|
|
125
|
+
|
|
126
|
+
## Identity & the two prompt files
|
|
127
|
+
|
|
128
|
+
`culture.yaml` declares the agent: `suffix: data-refinery-cli`,
|
|
129
|
+
**`backend: colleague`**, model `sakamakismile/Qwen3.6-27B-Text-NVFP4-MTP`. Two
|
|
130
|
+
prompt files coexist and serve different runtimes:
|
|
131
|
+
|
|
132
|
+
- **`CLAUDE.md`** (this file) — the prompt for **Claude Code** when a human (or
|
|
133
|
+
you) operates in the repo interactively.
|
|
134
|
+
- **`AGENTS.colleague.md`** — the resident prompt for the **colleague backend**
|
|
135
|
+
(the Qwen tool-loop peer that actually *runs* as this mesh agent). Because the
|
|
136
|
+
declared backend is `colleague`, this is the file `doctor`/`steward` require,
|
|
137
|
+
and it is currently a thin generic stub.
|
|
138
|
+
|
|
139
|
+
When you change durable agent behavior that should hold regardless of who runs
|
|
140
|
+
the agent, update **both** files (they target different runtimes but describe the
|
|
141
|
+
same agent). The seed version of this file claimed `backend: claude` — that was
|
|
142
|
+
stale; the live backend is `colleague`.
|
|
143
|
+
|
|
144
|
+
## Conventions that gate merges
|
|
145
|
+
|
|
146
|
+
- **Version-bump-every-PR.** Every PR — even docs/config/CI-only — must bump the
|
|
147
|
+
`pyproject.toml` version, or the `version-check` CI job fails. Use the
|
|
148
|
+
`version-bump` skill (updates `pyproject.toml` + prepends a Keep-a-Changelog
|
|
149
|
+
entry to `CHANGELOG.md`). The check compares your version to `origin/main`.
|
|
150
|
+
- **Runtime deps stay empty.** `dependencies = []` is an invariant of the current
|
|
151
|
+
scaffold; `teken` and the linters are **dev-only**. When the domain lands and
|
|
152
|
+
needs `neo4j` / `pymongo` (issue #1), follow the sibling pattern proven across
|
|
153
|
+
this org: put heavy deps behind an **optional extra**, **lazy-import** them
|
|
154
|
+
inside function bodies, exit `CliError(code=2)` with an install `hint:` when
|
|
155
|
+
absent, and add a static test asserting no top-level import of the optional dep.
|
|
156
|
+
- **No traceback, ever.** Failures raise `CliError`; the dispatcher wraps stray
|
|
157
|
+
exceptions. Don't `print()` to stdout for errors or let exceptions escape.
|
|
158
|
+
- **`--json` on every command**, stdout/stderr never mixed.
|
|
159
|
+
- **SonarCloud coverage uses repo-relative paths.** `pyproject.toml`
|
|
160
|
+
`[tool.coverage.run] relative_files = true, source = ["data_refinery"]` is what
|
|
161
|
+
makes `coverage.xml` map onto `sonar.sources` — don't remove it. `omit` ≠ Sonar
|
|
162
|
+
coverage exclusion; mirror any `omit` into `sonar.coverage.exclusions`.
|
|
163
|
+
|
|
164
|
+
### CI jobs (what must be green)
|
|
165
|
+
|
|
166
|
+
- `.github/workflows/tests.yml` → **test** (pytest + coverage, then SonarCloud
|
|
167
|
+
scan gated on `SONAR_TOKEN`; `sonar.qualitygate.wait=true` fails the job on a
|
|
168
|
+
red gate), **lint** (black, isort, flake8, bandit, markdownlint, **the teken
|
|
169
|
+
rubric gate**), **version-check** (PR-only; enforces the bump).
|
|
170
|
+
- `.github/workflows/publish.yml` → TestPyPI on PRs, PyPI on push to `main`, via
|
|
171
|
+
Trusted Publishing (OIDC, no tokens). Triggered by changes to `pyproject.toml`
|
|
172
|
+
or `data_refinery/**`.
|
|
173
|
+
|
|
174
|
+
Use the **`cicd`** skill for the PR lifecycle (open / read / reply / status /
|
|
175
|
+
await SonarCloud); **`sonarclaude`** for quality-gate queries; **`run-tests`** to
|
|
176
|
+
run pytest. PR comments/issue posts auto-sign as `- data-refinery-cli (Claude)`.
|
|
177
|
+
|
|
178
|
+
## Skills (cite-don't-import)
|
|
179
|
+
|
|
180
|
+
`.claude/skills/` carries 12 skills vendored from **guildmaster** (the
|
|
181
|
+
AgentCulture skills supplier; `steward` keeps only the alignment role). They are
|
|
182
|
+
copied, not depended on — each consumer owns its copy. Provenance, per-skill
|
|
183
|
+
adaptation notes, and the re-sync procedure live in
|
|
184
|
+
[`docs/skill-sources.md`](docs/skill-sources.md). Two tracked divergences:
|
|
185
|
+
`agex→devex` and `outsource→ask-colleague` (vendored directly from `colleague`
|
|
186
|
+
until guildmaster re-broadcasts). Every `SKILL.md` must keep `type: command` in
|
|
187
|
+
frontmatter — `core.skill_loader` silently skips any that omit it.
|
|
188
|
+
|
|
189
|
+
Reach for **`ask-colleague`** reflexively: `review` for a diverse second opinion
|
|
190
|
+
on a committed diff before a PR, `explore` for a fresh read of an unfamiliar area
|
|
191
|
+
(both read-only, isolated in a throwaway worktree — always safe); `write --apply`
|
|
192
|
+
/ `--pr` mutates and needs the user's go-ahead. Optional `colleague` CLI on PATH.
|
|
193
|
+
|
|
194
|
+
## Domain roadmap (issue #1)
|
|
195
|
+
|
|
196
|
+
The agent's actual build order lives in
|
|
197
|
+
[issue #1](https://github.com/agentculture/data-refinery-cli/issues/1): take
|
|
198
|
+
ownership of the **storage + data-quality layer** split from eidetic-cli (this
|
|
199
|
+
returns the store substrate to its origin — eidetic's store/cypher/embedding
|
|
200
|
+
logic was *cited from* `data-refinery` in the first place). data-refinery-cli is
|
|
201
|
+
to own:
|
|
202
|
+
|
|
203
|
+
- a **`docker-compose.yml`** bringing up `mongo:8.0` on host port **27018** (not
|
|
204
|
+
27017 — deliberate collision-avoidance, eidetic's defaults already point there)
|
|
205
|
+
and `neo4j:5-community` (bolt 7687, apoc), and a **multi-arch GHCR image**
|
|
206
|
+
bundling that stack;
|
|
207
|
+
- the **files backend** + **cypher/mongo store adapters**, and the
|
|
208
|
+
**consumer-agnostic data-quality surface**: validate, dedup (by `id`/`hash`,
|
|
209
|
+
idempotent upsert), integrity, freshness — with **no eidetic memory semantics
|
|
210
|
+
leaking in**;
|
|
211
|
+
- the `neo4j` + `pymongo` runtime deps.
|
|
212
|
+
|
|
213
|
+
eidetic-cli keeps the record schema, the `remember`/`recall`/`sweep`/`migrate`
|
|
214
|
+
verbs, relevance scoring + the freshness *signal*, the no-hard-delete lifecycle,
|
|
215
|
+
and the per-scope public/private no-leak invariant — and becomes the **first
|
|
216
|
+
consumer** over a process (subprocess-not-import) boundary. Invariants your layer
|
|
217
|
+
must not break: idempotent dedup, and the public/private scope no-leak. The
|
|
218
|
+
proposed boundary is **phased**: ship the GHCR image first, move adapters + the
|
|
219
|
+
CLI surface second. Naming of the image and the store/quality verbs is this
|
|
220
|
+
repo's call (it owns the surface) but must be documented so eidetic can pin.
|
|
221
|
+
|
|
222
|
+
## Remaining gaps / next steps
|
|
223
|
+
|
|
224
|
+
The `/init` PR reconciled the three scaffold defects (the `explain_self` rubric
|
|
225
|
+
failure via the `("data-refinery",)` catalog key + command-surface sweep; the
|
|
226
|
+
`pyproject` license `MIT` → `Apache-2.0`; the "clonable template" self-description
|
|
227
|
+
→ the data-quality domain). What is still open:
|
|
228
|
+
|
|
229
|
+
1. **The domain itself is unbuilt** — implement issue #1 (the storage +
|
|
230
|
+
data-quality layer). This is the substantive work.
|
|
231
|
+
2. **`AGENTS.colleague.md` is a thin generic stub.** Since the agent runs on the
|
|
232
|
+
`colleague` backend, its resident prompt should be fleshed out to match this
|
|
233
|
+
file's identity/invariants (the sibling cloudai-cli did this during its init).
|
|
234
|
+
3. **README + `overview` still carry some template framing** ("Make it your own",
|
|
235
|
+
the "sibling-pattern artifacts" section) — minor doc drift to retire as the
|
|
236
|
+
domain lands.
|
|
237
|
+
|
|
238
|
+
## Renaming / scaffold lineage
|
|
239
|
+
|
|
240
|
+
This repo descends from `culture-agent-template`; the template name is hard-coded
|
|
241
|
+
in ~100 places. To find every occurrence before a sweep:
|
|
242
|
+
|
|
243
|
+
```bash
|
|
244
|
+
git grep -nw data-refinery-cli # dist/nick name
|
|
245
|
+
git grep -nw data_refinery # python package
|
|
246
|
+
git grep -nw data-refinery # CLI command
|
|
247
|
+
```
|
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: data-refinery-cli
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.3
|
|
4
4
|
Summary: Agent and CLI for data quality in storage and retrieval — validating, deduplicating, and checking the integrity and freshness of data as it is stored and fetched. Split out of eidetic-cli so eidetic keeps agent-memory; sibling to daria, the Data Refinery Intelligent Agent.
|
|
5
5
|
Project-URL: Homepage, https://github.com/agentculture/data-refinery-cli
|
|
6
6
|
Project-URL: Issues, https://github.com/agentculture/data-refinery-cli/issues
|
|
7
7
|
Author: AgentCulture
|
|
8
|
-
License-Expression:
|
|
8
|
+
License-Expression: Apache-2.0
|
|
9
9
|
License-File: LICENSE
|
|
10
10
|
Classifier: Development Status :: 3 - Alpha
|
|
11
11
|
Classifier: Intended Audience :: Developers
|
|
12
|
-
Classifier: License :: OSI Approved ::
|
|
12
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.12
|
|
14
14
|
Classifier: Topic :: Software Development
|
|
15
15
|
Requires-Python: >=3.12
|
|
@@ -35,8 +35,8 @@ Agent and CLI for data quality in storage and retrieval — validating, deduplic
|
|
|
35
35
|
```bash
|
|
36
36
|
uv sync
|
|
37
37
|
uv run pytest -n auto # run the test suite
|
|
38
|
-
uv run data-refinery
|
|
39
|
-
uv run data-refinery
|
|
38
|
+
uv run data-refinery whoami # identity from culture.yaml
|
|
39
|
+
uv run data-refinery learn # self-teaching prompt (add --json)
|
|
40
40
|
uv run teken cli doctor . --strict # the agent-first rubric gate CI runs
|
|
41
41
|
```
|
|
42
42
|
|
|
@@ -18,8 +18,8 @@ Agent and CLI for data quality in storage and retrieval — validating, deduplic
|
|
|
18
18
|
```bash
|
|
19
19
|
uv sync
|
|
20
20
|
uv run pytest -n auto # run the test suite
|
|
21
|
-
uv run data-refinery
|
|
22
|
-
uv run data-refinery
|
|
21
|
+
uv run data-refinery whoami # identity from culture.yaml
|
|
22
|
+
uv run data-refinery learn # self-teaching prompt (add --json)
|
|
23
23
|
uv run teken cli doctor . --strict # the agent-first rubric gate CI runs
|
|
24
24
|
```
|
|
25
25
|
|
|
@@ -70,8 +70,8 @@ def _build_parser() -> argparse.ArgumentParser:
|
|
|
70
70
|
from data_refinery.cli._commands import whoami as _whoami_cmd
|
|
71
71
|
|
|
72
72
|
parser = _CliArgumentParser(
|
|
73
|
-
prog="data-refinery
|
|
74
|
-
description="data-refinery
|
|
73
|
+
prog="data-refinery",
|
|
74
|
+
description="data-refinery — agent and CLI for data quality in storage and retrieval.",
|
|
75
75
|
)
|
|
76
76
|
parser.add_argument(
|
|
77
77
|
"--version",
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""``data-refinery
|
|
1
|
+
"""``data-refinery cli`` — noun grouping CLI-surface introspection.
|
|
2
2
|
|
|
3
3
|
Exists to satisfy the agent-first rubric's ``overview_cli_noun_exists`` check:
|
|
4
4
|
any noun with action-verbs must also expose ``overview``. There are no
|
|
@@ -15,7 +15,7 @@ from data_refinery.cli._commands.overview import cli_sections, emit_overview
|
|
|
15
15
|
|
|
16
16
|
def cmd_cli_overview(args: argparse.Namespace) -> int:
|
|
17
17
|
emit_overview(
|
|
18
|
-
"data-refinery
|
|
18
|
+
"data-refinery cli",
|
|
19
19
|
cli_sections(),
|
|
20
20
|
json_mode=bool(getattr(args, "json", False)),
|
|
21
21
|
)
|
|
@@ -23,14 +23,14 @@ def cmd_cli_overview(args: argparse.Namespace) -> int:
|
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
def _no_verb(args: argparse.Namespace) -> int:
|
|
26
|
-
# `data-refinery
|
|
26
|
+
# `data-refinery cli` with no sub-verb prints the noun's overview.
|
|
27
27
|
return cmd_cli_overview(args)
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
def register(sub: argparse._SubParsersAction) -> None:
|
|
31
31
|
p = sub.add_parser(
|
|
32
32
|
"cli",
|
|
33
|
-
help="CLI-surface introspection (see 'data-refinery
|
|
33
|
+
help="CLI-surface introspection (see 'data-refinery cli overview').",
|
|
34
34
|
)
|
|
35
35
|
p.add_argument("--json", action="store_true", help="Emit structured JSON.")
|
|
36
36
|
p.set_defaults(func=_no_verb, json=False)
|
|
@@ -38,6 +38,6 @@ def register(sub: argparse._SubParsersAction) -> None:
|
|
|
38
38
|
# parser_class); propagate it so `cli overview` parse errors route through
|
|
39
39
|
# the structured error contract instead of argparse's default stderr/exit 2.
|
|
40
40
|
noun_sub = p.add_subparsers(dest="cli_command", parser_class=type(p))
|
|
41
|
-
ov = noun_sub.add_parser("overview", help="Describe the data-refinery
|
|
41
|
+
ov = noun_sub.add_parser("overview", help="Describe the data-refinery CLI surface.")
|
|
42
42
|
ov.add_argument("--json", action="store_true", help="Emit structured JSON.")
|
|
43
43
|
ov.set_defaults(func=cmd_cli_overview)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""``data-refinery
|
|
1
|
+
"""``data-refinery doctor`` — check the agent-identity invariants.
|
|
2
2
|
|
|
3
3
|
Mirrors the two invariants ``steward doctor`` verifies for a mesh agent:
|
|
4
4
|
|
|
@@ -105,7 +105,7 @@ def cmd_doctor(args: argparse.Namespace) -> int:
|
|
|
105
105
|
emit_result(report, json_mode=True)
|
|
106
106
|
else:
|
|
107
107
|
status = "healthy" if report["healthy"] else "unhealthy"
|
|
108
|
-
lines = [f"data-refinery
|
|
108
|
+
lines = [f"data-refinery doctor: {status}", ""]
|
|
109
109
|
for check in report["checks"]:
|
|
110
110
|
mark = "ok" if check["passed"] else "FAIL"
|
|
111
111
|
lines.append(f"[{mark}] {check['id']}: {check['message']}")
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""``data-refinery
|
|
1
|
+
"""``data-refinery explain <path>...`` — global markdown catalog lookup (stable-contract).
|
|
2
2
|
|
|
3
3
|
``explain`` is global (not nested under a noun). It takes zero or more path
|
|
4
4
|
tokens and resolves them via the catalog in :mod:`data_refinery.explain`.
|
|
@@ -32,7 +32,7 @@ def register(sub: argparse._SubParsersAction) -> None:
|
|
|
32
32
|
p.add_argument(
|
|
33
33
|
"path",
|
|
34
34
|
nargs="*",
|
|
35
|
-
help="Command path tokens; empty = root (same as 'data-refinery
|
|
35
|
+
help="Command path tokens; empty = root (same as 'data-refinery').",
|
|
36
36
|
)
|
|
37
37
|
p.add_argument("--json", action="store_true", help="Emit structured JSON.")
|
|
38
38
|
p.set_defaults(func=cmd_explain)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""``data-refinery
|
|
1
|
+
"""``data-refinery learn`` — the learnability affordance.
|
|
2
2
|
|
|
3
3
|
Prints a structured self-teaching prompt. Must satisfy the agent-first rubric:
|
|
4
4
|
>=200 chars and mention purpose, command map, exit codes, --json, and explain.
|
|
@@ -12,23 +12,24 @@ from data_refinery import __version__
|
|
|
12
12
|
from data_refinery.cli._output import emit_result
|
|
13
13
|
|
|
14
14
|
_TEXT = """\
|
|
15
|
-
data-refinery
|
|
15
|
+
data-refinery — agent and CLI for data quality in storage and retrieval.
|
|
16
16
|
|
|
17
17
|
Purpose
|
|
18
18
|
-------
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
19
|
+
Validate, deduplicate, and check the integrity and freshness of data as it is
|
|
20
|
+
stored and fetched. Split out of eidetic-cli so eidetic keeps agent-memory;
|
|
21
|
+
sibling to daria. The data-quality verbs are not built yet (see issue #1) —
|
|
22
|
+
today this exposes the agent-first introspection surface below on a
|
|
23
|
+
self-contained runtime (no third-party dependencies).
|
|
23
24
|
|
|
24
25
|
Commands
|
|
25
26
|
--------
|
|
26
|
-
data-refinery
|
|
27
|
-
data-refinery
|
|
28
|
-
data-refinery
|
|
29
|
-
data-refinery
|
|
30
|
-
data-refinery
|
|
31
|
-
data-refinery
|
|
27
|
+
data-refinery whoami Identity from culture.yaml.
|
|
28
|
+
data-refinery learn This self-teaching prompt.
|
|
29
|
+
data-refinery explain <path>... Markdown docs for any noun/verb path.
|
|
30
|
+
data-refinery overview Descriptive snapshot of the agent.
|
|
31
|
+
data-refinery doctor Check the agent-identity invariants.
|
|
32
|
+
data-refinery cli overview Describe the CLI surface itself.
|
|
32
33
|
|
|
33
34
|
Machine-readable output
|
|
34
35
|
-----------------------
|
|
@@ -44,15 +45,15 @@ Exit-code policy
|
|
|
44
45
|
|
|
45
46
|
More detail
|
|
46
47
|
-----------
|
|
47
|
-
data-refinery
|
|
48
|
+
data-refinery explain data-refinery
|
|
48
49
|
"""
|
|
49
50
|
|
|
50
51
|
|
|
51
52
|
def _as_json_payload() -> dict[str, object]:
|
|
52
53
|
return {
|
|
53
|
-
"tool": "data-refinery
|
|
54
|
+
"tool": "data-refinery",
|
|
54
55
|
"version": __version__,
|
|
55
|
-
"purpose": "
|
|
56
|
+
"purpose": "Agent and CLI for data quality in storage and retrieval.",
|
|
56
57
|
"commands": [
|
|
57
58
|
{"path": ["whoami"], "summary": "Identity probe from culture.yaml."},
|
|
58
59
|
{"path": ["learn"], "summary": "Self-teaching prompt."},
|
|
@@ -67,7 +68,7 @@ def _as_json_payload() -> dict[str, object]:
|
|
|
67
68
|
"2": "environment/setup error",
|
|
68
69
|
},
|
|
69
70
|
"json_support": True,
|
|
70
|
-
"explain_pointer": "data-refinery
|
|
71
|
+
"explain_pointer": "data-refinery explain <path>",
|
|
71
72
|
}
|
|
72
73
|
|
|
73
74
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""``data-refinery
|
|
1
|
+
"""``data-refinery overview`` — read-only descriptive snapshot of the agent.
|
|
2
2
|
|
|
3
3
|
Describes the agent to an agent reader: identity (from culture.yaml), the verb
|
|
4
4
|
surface, and the sibling-pattern artifacts this template carries. The shared
|
|
@@ -90,7 +90,7 @@ def cmd_overview(args: argparse.Namespace) -> int:
|
|
|
90
90
|
# `target` is accepted for rubric compatibility (descriptive verbs must not
|
|
91
91
|
# hard-fail on a missing path) but overview describes this agent itself.
|
|
92
92
|
emit_overview(
|
|
93
|
-
"data-refinery
|
|
93
|
+
"data-refinery",
|
|
94
94
|
agent_sections(),
|
|
95
95
|
json_mode=bool(getattr(args, "json", False)),
|
|
96
96
|
)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""``data-refinery
|
|
1
|
+
"""``data-refinery whoami`` — the smallest identity probe.
|
|
2
2
|
|
|
3
3
|
Reports the agent's identity as declared in ``culture.yaml``: its nick
|
|
4
4
|
(``suffix``), the backend it runs on, and the served model (if any) — plus the
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""CliError and exit-code policy (stable-contract).
|
|
2
2
|
|
|
3
|
-
Every failure inside data-refinery
|
|
3
|
+
Every failure inside data-refinery raises :class:`CliError`. The
|
|
4
4
|
top-level ``main()`` catches it, formats via :mod:`data_refinery.cli._output`,
|
|
5
5
|
and exits with :attr:`CliError.code`. This guarantees:
|
|
6
6
|
|
|
@@ -13,7 +13,7 @@ from __future__ import annotations
|
|
|
13
13
|
|
|
14
14
|
from dataclasses import dataclass
|
|
15
15
|
|
|
16
|
-
# Exit-code policy. Documented in ``data-refinery
|
|
16
|
+
# Exit-code policy. Documented in ``data-refinery learn`` output.
|
|
17
17
|
# 0 = success
|
|
18
18
|
# 1 = user-input error (bad flag, missing required arg, unknown path)
|
|
19
19
|
# 2 = environment / setup error (tool not installed, file unreadable)
|
|
@@ -16,7 +16,7 @@ def resolve(path: tuple[str, ...]) -> str:
|
|
|
16
16
|
raise CliError(
|
|
17
17
|
code=EXIT_USER_ERROR,
|
|
18
18
|
message=f"no explain entry for: {display}",
|
|
19
|
-
remediation="list entries with: data-refinery
|
|
19
|
+
remediation="list entries with: data-refinery explain data-refinery",
|
|
20
20
|
)
|
|
21
21
|
|
|
22
22
|
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
"""Markdown catalog for ``data-refinery explain <path>``.
|
|
2
|
+
|
|
3
|
+
Each entry is verbatim markdown. Keys are command-path tuples. The empty tuple
|
|
4
|
+
and ``("data-refinery",)`` both resolve to the root entry; ``("data-refinery-cli",)``
|
|
5
|
+
is kept as a back-compat alias for the dist/nick name.
|
|
6
|
+
|
|
7
|
+
Keep bodies self-contained: an agent reading one entry should get enough
|
|
8
|
+
context without chaining reads.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
_ROOT = """\
|
|
14
|
+
# data-refinery
|
|
15
|
+
|
|
16
|
+
Agent and CLI for **data quality in storage and retrieval** — validating,
|
|
17
|
+
deduplicating, and checking the integrity and freshness of data as it is stored
|
|
18
|
+
and fetched. Split out of eidetic-cli so eidetic keeps agent-memory; sibling to
|
|
19
|
+
daria, the Data Refinery Intelligent Agent.
|
|
20
|
+
|
|
21
|
+
The data-quality verbs are not built yet (see issue #1). Today this exposes the
|
|
22
|
+
agent-first introspection surface below on a self-contained runtime (no
|
|
23
|
+
third-party dependencies). The binary is `data-refinery` (the PyPI dist and mesh
|
|
24
|
+
nick are `data-refinery-cli`).
|
|
25
|
+
|
|
26
|
+
## Verbs
|
|
27
|
+
|
|
28
|
+
- `data-refinery whoami` — identity probe from `culture.yaml`.
|
|
29
|
+
- `data-refinery learn` — structured self-teaching prompt.
|
|
30
|
+
- `data-refinery explain <path>` — markdown docs for any noun/verb.
|
|
31
|
+
- `data-refinery overview` — descriptive snapshot of the agent.
|
|
32
|
+
- `data-refinery doctor` — check the agent-identity invariants.
|
|
33
|
+
- `data-refinery cli overview` — describe the CLI surface.
|
|
34
|
+
|
|
35
|
+
## Exit-code policy
|
|
36
|
+
|
|
37
|
+
- `0` success
|
|
38
|
+
- `1` user-input error
|
|
39
|
+
- `2` environment / setup error
|
|
40
|
+
- `3+` reserved
|
|
41
|
+
|
|
42
|
+
## See also
|
|
43
|
+
|
|
44
|
+
- `data-refinery explain whoami`
|
|
45
|
+
- `data-refinery explain doctor`
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
_WHOAMI = """\
|
|
49
|
+
# data-refinery whoami
|
|
50
|
+
|
|
51
|
+
Reports the agent's identity from `culture.yaml`: nick (`suffix`), backend,
|
|
52
|
+
served model, and the package version. Read-only.
|
|
53
|
+
|
|
54
|
+
## Usage
|
|
55
|
+
|
|
56
|
+
data-refinery whoami
|
|
57
|
+
data-refinery whoami --json
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
_LEARN = """\
|
|
61
|
+
# data-refinery learn
|
|
62
|
+
|
|
63
|
+
Prints a structured self-teaching prompt covering purpose, command map,
|
|
64
|
+
exit-code policy, `--json` support, and the `explain` pointer.
|
|
65
|
+
|
|
66
|
+
## Usage
|
|
67
|
+
|
|
68
|
+
data-refinery learn
|
|
69
|
+
data-refinery learn --json
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
_EXPLAIN = """\
|
|
73
|
+
# data-refinery explain <path>
|
|
74
|
+
|
|
75
|
+
Prints markdown documentation for any noun/verb path. Unlike `--help` (terse,
|
|
76
|
+
positional), `explain` is global and addressable by path.
|
|
77
|
+
|
|
78
|
+
## Usage
|
|
79
|
+
|
|
80
|
+
data-refinery explain data-refinery
|
|
81
|
+
data-refinery explain whoami
|
|
82
|
+
data-refinery explain --json <path>
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
_OVERVIEW = """\
|
|
86
|
+
# data-refinery overview
|
|
87
|
+
|
|
88
|
+
Read-only descriptive snapshot of the agent: identity (from `culture.yaml`), the
|
|
89
|
+
verb surface, and the sibling-pattern artifacts the agent carries. Accepts an
|
|
90
|
+
ignored `target` so a stray path never hard-fails.
|
|
91
|
+
|
|
92
|
+
## Usage
|
|
93
|
+
|
|
94
|
+
data-refinery overview
|
|
95
|
+
data-refinery overview --json
|
|
96
|
+
"""
|
|
97
|
+
|
|
98
|
+
_DOCTOR = """\
|
|
99
|
+
# data-refinery doctor
|
|
100
|
+
|
|
101
|
+
Checks the agent-identity invariants `steward doctor` verifies:
|
|
102
|
+
prompt-file-present and backend-consistency (`colleague` → `AGENTS.colleague.md`),
|
|
103
|
+
plus a skills-present check. Exits 1 when unhealthy.
|
|
104
|
+
|
|
105
|
+
## Usage
|
|
106
|
+
|
|
107
|
+
data-refinery doctor
|
|
108
|
+
data-refinery doctor --json
|
|
109
|
+
"""
|
|
110
|
+
|
|
111
|
+
_CLI = """\
|
|
112
|
+
# data-refinery cli
|
|
113
|
+
|
|
114
|
+
Noun group for CLI-surface introspection. `cli overview` describes the CLI
|
|
115
|
+
itself (distinct from the global `overview`, which describes the agent).
|
|
116
|
+
|
|
117
|
+
## Usage
|
|
118
|
+
|
|
119
|
+
data-refinery cli overview
|
|
120
|
+
data-refinery cli overview --json
|
|
121
|
+
"""
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
ENTRIES: dict[tuple[str, ...], str] = {
|
|
125
|
+
(): _ROOT,
|
|
126
|
+
("data-refinery",): _ROOT,
|
|
127
|
+
# Back-compat alias for the dist/nick name (the binary is `data-refinery`).
|
|
128
|
+
("data-refinery-cli",): _ROOT,
|
|
129
|
+
("whoami",): _WHOAMI,
|
|
130
|
+
("learn",): _LEARN,
|
|
131
|
+
("explain",): _EXPLAIN,
|
|
132
|
+
("overview",): _OVERVIEW,
|
|
133
|
+
("doctor",): _DOCTOR,
|
|
134
|
+
("cli",): _CLI,
|
|
135
|
+
("cli", "overview"): _CLI,
|
|
136
|
+
}
|
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "data-refinery-cli"
|
|
3
|
-
version = "0.3.
|
|
3
|
+
version = "0.3.3"
|
|
4
4
|
description = "Agent and CLI for data quality in storage and retrieval — validating, deduplicating, and checking the integrity and freshness of data as it is stored and fetched. Split out of eidetic-cli so eidetic keeps agent-memory; sibling to daria, the Data Refinery Intelligent Agent."
|
|
5
5
|
readme = "README.md"
|
|
6
|
-
license = "
|
|
6
|
+
license = "Apache-2.0"
|
|
7
7
|
requires-python = ">=3.12"
|
|
8
8
|
authors = [{name = "AgentCulture"}]
|
|
9
9
|
classifiers = [
|
|
10
10
|
"Development Status :: 3 - Alpha",
|
|
11
11
|
"Programming Language :: Python :: 3.12",
|
|
12
|
-
"License :: OSI Approved ::
|
|
12
|
+
"License :: OSI Approved :: Apache Software License",
|
|
13
13
|
"Topic :: Software Development",
|
|
14
14
|
"Intended Audience :: Developers",
|
|
15
15
|
]
|
|
@@ -21,7 +21,7 @@ def test_version_flag(capsys: pytest.CaptureFixture[str]) -> None:
|
|
|
21
21
|
def test_no_args_prints_help(capsys: pytest.CaptureFixture[str]) -> None:
|
|
22
22
|
rc = main([])
|
|
23
23
|
assert rc == 0
|
|
24
|
-
assert "usage: data-refinery
|
|
24
|
+
assert "usage: data-refinery" in capsys.readouterr().out
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
def test_unknown_command_errors(capsys: pytest.CaptureFixture[str]) -> None:
|
|
@@ -62,7 +62,7 @@ def test_learn_text(capsys: pytest.CaptureFixture[str]) -> None:
|
|
|
62
62
|
assert rc == 0
|
|
63
63
|
out = capsys.readouterr().out
|
|
64
64
|
assert len(out) >= 200
|
|
65
|
-
assert "data-refinery
|
|
65
|
+
assert "data-refinery" in out
|
|
66
66
|
assert "Exit-code policy" in out
|
|
67
67
|
assert "--json" in out
|
|
68
68
|
assert "explain" in out
|
|
@@ -72,7 +72,7 @@ def test_learn_json(capsys: pytest.CaptureFixture[str]) -> None:
|
|
|
72
72
|
rc = main(["learn", "--json"])
|
|
73
73
|
assert rc == 0
|
|
74
74
|
payload = json.loads(capsys.readouterr().out)
|
|
75
|
-
assert payload["tool"] == "data-refinery
|
|
75
|
+
assert payload["tool"] == "data-refinery"
|
|
76
76
|
assert payload["version"] == __version__
|
|
77
77
|
assert payload["json_support"] is True
|
|
78
78
|
|
|
@@ -83,10 +83,18 @@ def test_learn_json(capsys: pytest.CaptureFixture[str]) -> None:
|
|
|
83
83
|
def test_explain_root(capsys: pytest.CaptureFixture[str]) -> None:
|
|
84
84
|
rc = main(["explain"])
|
|
85
85
|
assert rc == 0
|
|
86
|
-
assert "# data-refinery
|
|
86
|
+
assert "# data-refinery" in capsys.readouterr().out
|
|
87
87
|
|
|
88
88
|
|
|
89
89
|
def test_explain_self(capsys: pytest.CaptureFixture[str]) -> None:
|
|
90
|
+
# `explain <command-name>` must resolve (the rubric's explain_self check).
|
|
91
|
+
rc = main(["explain", "data-refinery"])
|
|
92
|
+
assert rc == 0
|
|
93
|
+
assert capsys.readouterr().out.startswith("#")
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def test_explain_dist_name_alias(capsys: pytest.CaptureFixture[str]) -> None:
|
|
97
|
+
# The dist/nick name stays a back-compat alias for the root entry.
|
|
90
98
|
rc = main(["explain", "data-refinery-cli"])
|
|
91
99
|
assert rc == 0
|
|
92
100
|
assert capsys.readouterr().out.startswith("#")
|
|
@@ -97,7 +105,7 @@ def test_explain_json(capsys: pytest.CaptureFixture[str]) -> None:
|
|
|
97
105
|
assert rc == 0
|
|
98
106
|
payload = json.loads(capsys.readouterr().out)
|
|
99
107
|
assert payload["path"] == ["whoami"]
|
|
100
|
-
assert "data-refinery
|
|
108
|
+
assert "data-refinery whoami" in payload["markdown"]
|
|
101
109
|
|
|
102
110
|
|
|
103
111
|
def test_explain_unknown_path_errors(capsys: pytest.CaptureFixture[str]) -> None:
|
|
@@ -15,7 +15,7 @@ def test_overview_text(capsys: pytest.CaptureFixture[str]) -> None:
|
|
|
15
15
|
rc = main(["overview"])
|
|
16
16
|
assert rc == 0
|
|
17
17
|
out = capsys.readouterr().out
|
|
18
|
-
assert "# data-refinery
|
|
18
|
+
assert "# data-refinery" in out
|
|
19
19
|
assert "Identity" in out
|
|
20
20
|
|
|
21
21
|
|
|
@@ -23,7 +23,7 @@ def test_overview_json_shape(capsys: pytest.CaptureFixture[str]) -> None:
|
|
|
23
23
|
rc = main(["overview", "--json"])
|
|
24
24
|
assert rc == 0
|
|
25
25
|
payload = json.loads(capsys.readouterr().out)
|
|
26
|
-
assert payload["subject"] == "data-refinery
|
|
26
|
+
assert payload["subject"] == "data-refinery"
|
|
27
27
|
assert isinstance(payload["sections"], list)
|
|
28
28
|
assert payload["sections"]
|
|
29
29
|
|
|
@@ -41,14 +41,14 @@ def test_overview_graceful_on_bad_path(capsys: pytest.CaptureFixture[str]) -> No
|
|
|
41
41
|
def test_cli_overview_text(capsys: pytest.CaptureFixture[str]) -> None:
|
|
42
42
|
rc = main(["cli", "overview"])
|
|
43
43
|
assert rc == 0
|
|
44
|
-
assert "# data-refinery
|
|
44
|
+
assert "# data-refinery cli" in capsys.readouterr().out
|
|
45
45
|
|
|
46
46
|
|
|
47
47
|
def test_cli_overview_json_shape(capsys: pytest.CaptureFixture[str]) -> None:
|
|
48
48
|
rc = main(["cli", "overview", "--json"])
|
|
49
49
|
assert rc == 0
|
|
50
50
|
payload = json.loads(capsys.readouterr().out)
|
|
51
|
-
assert payload["subject"] == "data-refinery
|
|
51
|
+
assert payload["subject"] == "data-refinery cli"
|
|
52
52
|
assert isinstance(payload["sections"], list)
|
|
53
53
|
|
|
54
54
|
|
|
@@ -77,7 +77,7 @@ def test_cli_overview_unknown_flag_structured_error(
|
|
|
77
77
|
def test_doctor_text(capsys: pytest.CaptureFixture[str]) -> None:
|
|
78
78
|
rc = main(["doctor"])
|
|
79
79
|
assert rc in (0, 1)
|
|
80
|
-
assert "data-refinery
|
|
80
|
+
assert "data-refinery doctor" in capsys.readouterr().out
|
|
81
81
|
|
|
82
82
|
|
|
83
83
|
def test_doctor_json_shape(capsys: pytest.CaptureFixture[str]) -> None:
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
# CLAUDE.md — seed / bootstrap placeholder
|
|
2
|
-
|
|
3
|
-
> **This is a self-initializing seed, not a finished runtime prompt.**
|
|
4
|
-
> Run `/init` (or describe the agent's domain to your AI assistant) to
|
|
5
|
-
> re-initialize this file into a full runtime prompt, using the description
|
|
6
|
-
> below and the scaffolded repo as context.
|
|
7
|
-
|
|
8
|
-
## Agent
|
|
9
|
-
|
|
10
|
-
This repository hosts the **data-refinery-cli** agent.
|
|
11
|
-
|
|
12
|
-
## Description
|
|
13
|
-
|
|
14
|
-
Agent and CLI for data quality in storage and retrieval — validating, deduplicating, and checking the integrity and freshness of data as it is stored and fetched. Split out of eidetic-cli so eidetic keeps agent-memory; sibling to daria, the Data Refinery Intelligent Agent.
|
|
15
|
-
|
|
16
|
-
## Re-init instruction
|
|
17
|
-
|
|
18
|
-
This file is a seed. To expand it into your full runtime prompt:
|
|
19
|
-
|
|
20
|
-
1. Open this repo in Claude Code (or your preferred AI assistant).
|
|
21
|
-
2. Run `/init` — the assistant will read the repo, incorporate the description
|
|
22
|
-
above, and replace this seed with a complete `CLAUDE.md`.
|
|
23
|
-
3. Commit the result.
|
|
24
|
-
|
|
25
|
-
Until you run `/init`, `data-refinery-cli` satisfies the `steward doctor`
|
|
26
|
-
`prompt-file-present` and `backend-consistency` invariants (a `CLAUDE.md`
|
|
27
|
-
exists and `culture.yaml` declares `backend: claude`) but the prompt is not
|
|
28
|
-
yet tailored to this agent's domain.
|
|
@@ -1,129 +0,0 @@
|
|
|
1
|
-
"""Markdown catalog for ``data-refinery-cli explain <path>``.
|
|
2
|
-
|
|
3
|
-
Each entry is verbatim markdown. Keys are command-path tuples. The empty tuple
|
|
4
|
-
and ``("data-refinery-cli",)`` both resolve to the root entry.
|
|
5
|
-
|
|
6
|
-
Keep bodies self-contained: an agent reading one entry should get enough
|
|
7
|
-
context without chaining reads.
|
|
8
|
-
"""
|
|
9
|
-
|
|
10
|
-
from __future__ import annotations
|
|
11
|
-
|
|
12
|
-
_ROOT = """\
|
|
13
|
-
# data-refinery-cli
|
|
14
|
-
|
|
15
|
-
A clonable template for AgentCulture mesh agents. It carries an agent-first CLI
|
|
16
|
-
(cited from the teken `python-cli` reference), a mesh identity (`culture.yaml` +
|
|
17
|
-
`CLAUDE.md`), the canonical guildmaster skill kit under `.claude/skills/`, and a
|
|
18
|
-
buildable/deployable package baseline. Clone it, rename the package, edit
|
|
19
|
-
`culture.yaml`, and you have a new agent.
|
|
20
|
-
|
|
21
|
-
## Verbs
|
|
22
|
-
|
|
23
|
-
- `data-refinery-cli whoami` — identity probe from `culture.yaml`.
|
|
24
|
-
- `data-refinery-cli learn` — structured self-teaching prompt.
|
|
25
|
-
- `data-refinery-cli explain <path>` — markdown docs for any noun/verb.
|
|
26
|
-
- `data-refinery-cli overview` — descriptive snapshot of the agent.
|
|
27
|
-
- `data-refinery-cli doctor` — check the agent-identity invariants.
|
|
28
|
-
- `data-refinery-cli cli overview` — describe the CLI surface.
|
|
29
|
-
|
|
30
|
-
## Exit-code policy
|
|
31
|
-
|
|
32
|
-
- `0` success
|
|
33
|
-
- `1` user-input error
|
|
34
|
-
- `2` environment / setup error
|
|
35
|
-
- `3+` reserved
|
|
36
|
-
|
|
37
|
-
## See also
|
|
38
|
-
|
|
39
|
-
- `data-refinery-cli explain whoami`
|
|
40
|
-
- `data-refinery-cli explain doctor`
|
|
41
|
-
"""
|
|
42
|
-
|
|
43
|
-
_WHOAMI = """\
|
|
44
|
-
# data-refinery-cli whoami
|
|
45
|
-
|
|
46
|
-
Reports the agent's identity from `culture.yaml`: nick (`suffix`), backend,
|
|
47
|
-
served model, and the package version. Read-only.
|
|
48
|
-
|
|
49
|
-
## Usage
|
|
50
|
-
|
|
51
|
-
data-refinery-cli whoami
|
|
52
|
-
data-refinery-cli whoami --json
|
|
53
|
-
"""
|
|
54
|
-
|
|
55
|
-
_LEARN = """\
|
|
56
|
-
# data-refinery-cli learn
|
|
57
|
-
|
|
58
|
-
Prints a structured self-teaching prompt covering purpose, command map,
|
|
59
|
-
exit-code policy, `--json` support, and the `explain` pointer.
|
|
60
|
-
|
|
61
|
-
## Usage
|
|
62
|
-
|
|
63
|
-
data-refinery-cli learn
|
|
64
|
-
data-refinery-cli learn --json
|
|
65
|
-
"""
|
|
66
|
-
|
|
67
|
-
_EXPLAIN = """\
|
|
68
|
-
# data-refinery-cli explain <path>
|
|
69
|
-
|
|
70
|
-
Prints markdown documentation for any noun/verb path. Unlike `--help` (terse,
|
|
71
|
-
positional), `explain` is global and addressable by path.
|
|
72
|
-
|
|
73
|
-
## Usage
|
|
74
|
-
|
|
75
|
-
data-refinery-cli explain data-refinery-cli
|
|
76
|
-
data-refinery-cli explain whoami
|
|
77
|
-
data-refinery-cli explain --json <path>
|
|
78
|
-
"""
|
|
79
|
-
|
|
80
|
-
_OVERVIEW = """\
|
|
81
|
-
# data-refinery-cli overview
|
|
82
|
-
|
|
83
|
-
Read-only descriptive snapshot of the agent: identity (from `culture.yaml`), the
|
|
84
|
-
verb surface, and the sibling-pattern artifacts the template carries. Accepts an
|
|
85
|
-
ignored `target` so a stray path never hard-fails.
|
|
86
|
-
|
|
87
|
-
## Usage
|
|
88
|
-
|
|
89
|
-
data-refinery-cli overview
|
|
90
|
-
data-refinery-cli overview --json
|
|
91
|
-
"""
|
|
92
|
-
|
|
93
|
-
_DOCTOR = """\
|
|
94
|
-
# data-refinery-cli doctor
|
|
95
|
-
|
|
96
|
-
Checks the agent-identity invariants `steward doctor` verifies:
|
|
97
|
-
prompt-file-present and backend-consistency (`claude` → `CLAUDE.md`), plus a
|
|
98
|
-
skills-present check. Exits 1 when unhealthy.
|
|
99
|
-
|
|
100
|
-
## Usage
|
|
101
|
-
|
|
102
|
-
data-refinery-cli doctor
|
|
103
|
-
data-refinery-cli doctor --json
|
|
104
|
-
"""
|
|
105
|
-
|
|
106
|
-
_CLI = """\
|
|
107
|
-
# data-refinery-cli cli
|
|
108
|
-
|
|
109
|
-
Noun group for CLI-surface introspection. `cli overview` describes the CLI
|
|
110
|
-
itself (distinct from the global `overview`, which describes the agent).
|
|
111
|
-
|
|
112
|
-
## Usage
|
|
113
|
-
|
|
114
|
-
data-refinery-cli cli overview
|
|
115
|
-
data-refinery-cli cli overview --json
|
|
116
|
-
"""
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
ENTRIES: dict[tuple[str, ...], str] = {
|
|
120
|
-
(): _ROOT,
|
|
121
|
-
("data-refinery-cli",): _ROOT,
|
|
122
|
-
("whoami",): _WHOAMI,
|
|
123
|
-
("learn",): _LEARN,
|
|
124
|
-
("explain",): _EXPLAIN,
|
|
125
|
-
("overview",): _OVERVIEW,
|
|
126
|
-
("doctor",): _DOCTOR,
|
|
127
|
-
("cli",): _CLI,
|
|
128
|
-
("cli", "overview"): _CLI,
|
|
129
|
-
}
|
|
File without changes
|
|
File without changes
|
{data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/agent-config/scripts/show.sh
RENAMED
|
File without changes
|
|
File without changes
|
{data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/ask-colleague/prompts/explore.md
RENAMED
|
File without changes
|
{data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/ask-colleague/prompts/review.md
RENAMED
|
File without changes
|
{data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/ask-colleague/prompts/write.md
RENAMED
|
File without changes
|
|
File without changes
|
{data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/assign-to-workforce/SKILL.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/cicd/scripts/_resolve-nick.sh
RENAMED
|
File without changes
|
{data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/cicd/scripts/portability-lint.sh
RENAMED
|
File without changes
|
|
File without changes
|
{data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/cicd/scripts/pr-status.sh
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/communicate/scripts/post-issue.sh
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/doc-test-alignment/SKILL.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/run-tests/scripts/test.sh
RENAMED
|
File without changes
|
|
File without changes
|
{data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/sonarclaude/scripts/sonar.sh
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{data_refinery_cli-0.3.2 → data_refinery_cli-0.3.3}/.claude/skills/version-bump/scripts/bump.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|