agentme 0.18.0 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.filedist-package.yml +1 -36
- package/.xdrs/agentme/edrs/application/021-ai-workflow-development-standards.md +74 -5
- package/.xdrs/agentme/edrs/application/028-ai-eval-standards.md +112 -16
- package/.xdrs/agentme/edrs/application/skills/001-create-javascript-project/SKILL.md +1 -1
- package/.xdrs/agentme/edrs/application/skills/003-create-golang-project/SKILL.md +1 -1
- package/.xdrs/agentme/edrs/application/skills/004-select-relevant-xdrs/SKILL.md +18 -44
- package/.xdrs/agentme/edrs/application/skills/005-create-python-project/SKILL.md +1 -1
- package/.xdrs/agentme/edrs/devops/skills/002-monorepo-setup/SKILL.md +1 -1
- package/.xdrs/index.md +3 -1
- package/README.md +3 -16
- package/package.json +4 -8
- package/.github/agents/speckit.analyze.agent.md +0 -184
- package/.github/agents/speckit.checklist.agent.md +0 -295
- package/.github/agents/speckit.clarify.agent.md +0 -181
- package/.github/agents/speckit.constitution.agent.md +0 -84
- package/.github/agents/speckit.implement.agent.md +0 -198
- package/.github/agents/speckit.plan.agent.md +0 -90
- package/.github/agents/speckit.specify.agent.md +0 -237
- package/.github/agents/speckit.tasks.agent.md +0 -200
- package/.github/agents/speckit.taskstoissues.agent.md +0 -30
- package/.github/prompts/speckit.analyze.prompt.md +0 -3
- package/.github/prompts/speckit.checklist.prompt.md +0 -3
- package/.github/prompts/speckit.clarify.prompt.md +0 -3
- package/.github/prompts/speckit.constitution.prompt.md +0 -3
- package/.github/prompts/speckit.implement.prompt.md +0 -3
- package/.github/prompts/speckit.plan.prompt.md +0 -3
- package/.github/prompts/speckit.specify.prompt.md +0 -3
- package/.github/prompts/speckit.tasks.prompt.md +0 -3
- package/.github/prompts/speckit.taskstoissues.prompt.md +0 -3
- package/.specify/memory/constitution.md +0 -99
- package/.specify/scripts/bash/check-prerequisites.sh +0 -190
- package/.specify/scripts/bash/common.sh +0 -253
- package/.specify/scripts/bash/create-new-feature.sh +0 -333
- package/.specify/scripts/bash/setup-plan.sh +0 -73
- package/.specify/scripts/bash/update-agent-context.sh +0 -808
- package/.specify/templates/agent-file-template.md +0 -28
- package/.specify/templates/checklist-template.md +0 -40
- package/.specify/templates/constitution-template.md +0 -50
- package/.specify/templates/plan-template.md +0 -110
- package/.specify/templates/spec-template.md +0 -115
- package/.specify/templates/tasks-template.md +0 -251
- package/.vscode/settings.json +0 -14
package/.filedist-package.yml
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
sets:
|
|
2
|
-
- package: xdrs-core@0.
|
|
2
|
+
- package: xdrs-core@0.30.0
|
|
3
3
|
# - package: git:https://github.com/flaviostutz/xdrs-core.git@main
|
|
4
4
|
selector:
|
|
5
5
|
files:
|
|
@@ -41,38 +41,3 @@ sets:
|
|
|
41
41
|
presets:
|
|
42
42
|
- basic
|
|
43
43
|
|
|
44
|
-
- selector:
|
|
45
|
-
files:
|
|
46
|
-
- .github/agents/speckit*
|
|
47
|
-
- .github/prompts/speckit*
|
|
48
|
-
- .specify/**
|
|
49
|
-
exclude:
|
|
50
|
-
- .specify/templates/**
|
|
51
|
-
output:
|
|
52
|
-
path: .
|
|
53
|
-
gitignore: false
|
|
54
|
-
readonly: true
|
|
55
|
-
presets:
|
|
56
|
-
- speckit
|
|
57
|
-
|
|
58
|
-
- selector:
|
|
59
|
-
files:
|
|
60
|
-
- .specify/templates/**
|
|
61
|
-
output:
|
|
62
|
-
path: .
|
|
63
|
-
gitignore: false
|
|
64
|
-
readonly: false
|
|
65
|
-
presets:
|
|
66
|
-
- speckit
|
|
67
|
-
|
|
68
|
-
- selector:
|
|
69
|
-
files:
|
|
70
|
-
- .vscode/settings.json
|
|
71
|
-
output:
|
|
72
|
-
path: .
|
|
73
|
-
managed: false
|
|
74
|
-
skipIfExists: true
|
|
75
|
-
readonly: false
|
|
76
|
-
gitignore: false
|
|
77
|
-
presets:
|
|
78
|
-
- speckit
|
|
@@ -107,12 +107,13 @@ Eval folder structure and script requirements are defined in [agentme-edr-028](0
|
|
|
107
107
|
|
|
108
108
|
LangGraph node names MUST follow a suffix convention that communicates the node's role at a glance. Names MUST be action-oriented and descriptive.
|
|
109
109
|
|
|
110
|
-
|
|
|
110
|
+
| Convention | Node type | When to use |
|
|
111
111
|
|---|---|---|
|
|
112
|
-
| `_llm` | LLM call | Any node whose primary action is a direct LLM inference call (see [agentme-edr-018](018-ai-llm-development-standards.md)) |
|
|
113
|
-
| `_step` | Algorithmic step | Deterministic logic with no LLM involvement (transformation, validation, routing) |
|
|
114
|
-
| `_tool` | Tool/API call | A node that wraps a single external tool or API (e.g. a REST endpoint, DB query) |
|
|
115
|
-
| `_agent` | Subgraph agent | A node that invokes a nested subgraph containing its own tool-invocation cycle and LLM calls; use the **deepagents** library for these nodes (see [agentme-edr-019](019-ai-agents-development-standards.md)) |
|
|
112
|
+
| suffix `_llm` | LLM call | Any node whose primary action is a direct LLM inference call (see [agentme-edr-018](018-ai-llm-development-standards.md)) |
|
|
113
|
+
| suffix `_step` | Algorithmic step | Deterministic logic with no LLM involvement (transformation, validation, routing) |
|
|
114
|
+
| suffix `_tool` | Tool/API call | A node that wraps a single external tool or API (e.g. a REST endpoint, DB query) |
|
|
115
|
+
| suffix `_agent` | Subgraph agent | A node that invokes a nested subgraph containing its own tool-invocation cycle and LLM calls; use the **deepagents** library for these nodes (see [agentme-edr-019](019-ai-agents-development-standards.md)) |
|
|
116
|
+
| prefix `evaluate_` | Judge node | A node that evaluates the quality, correctness, completeness, or progress of prior outputs and returns a structured verdict; MUST follow rule `13-judge-node-output-format` |
|
|
116
117
|
|
|
117
118
|
The Python function implementing the node SHOULD share the same name as the node alias passed to `add_node`, so that graph definitions and stack traces remain unambiguous:
|
|
118
119
|
|
|
@@ -131,6 +132,8 @@ graph.add_node("code_reviewer_agent", code_reviewer_agent)
|
|
|
131
132
|
|
|
132
133
|
Names MUST NOT use generic labels such as `node1`, `process`, or `run`. Each name must clearly express what action the node performs.
|
|
133
134
|
|
|
135
|
+
Judge nodes use a **prefix** convention instead of a suffix: the name MUST start with `evaluate_` followed by the subject being judged (e.g. `evaluate_progress`, `evaluate_quality`, `evaluate_completeness`, `evaluate_relevance`). This makes judge nodes immediately distinguishable from all other node types at a glance.
|
|
136
|
+
|
|
134
137
|
#### 10-workflow-unit-testing
|
|
135
138
|
|
|
136
139
|
All LLM calls within workflow nodes are external API calls and MUST be mocked in unit tests per [agentme-edr-018](018-ai-llm-development-standards.md) rule `04-unit-test-mocking`. Workflow unit tests must run fully offline with no real LLM provider calls.
|
|
@@ -222,6 +225,72 @@ Choose a name that summarises what the workflow consumes, processes, and produce
|
|
|
222
225
|
|
|
223
226
|
**Bad names** (FORBIDDEN): `MainWorkflow`, `AgentGraph`, `ProcessFlow`, `Workflow1`, `RunGraph`.
|
|
224
227
|
|
|
228
|
+
#### 13-judge-node-output-format
|
|
229
|
+
|
|
230
|
+
Every node whose name starts with `evaluate_` (a judge node) MUST return a structured verdict object as its output. This ensures all judge nodes are interchangeable and their results can be uniformly consumed by downstream routing logic, logged, and compared across runs.
|
|
231
|
+
|
|
232
|
+
**Required output schema:**
|
|
233
|
+
|
|
234
|
+
```python
|
|
235
|
+
from typing import Literal, Optional
|
|
236
|
+
from dataclasses import dataclass, field
|
|
237
|
+
|
|
238
|
+
FindingLevel = Literal["OK", "INFO", "WARNING", "ERROR"]
|
|
239
|
+
|
|
240
|
+
@dataclass
|
|
241
|
+
class JudgeFinding:
|
|
242
|
+
level: FindingLevel
|
|
243
|
+
# MUST: short action-oriented label; < 10 words
|
|
244
|
+
title: str
|
|
245
|
+
# MUST when level != "OK": why this is an issue; < 30 words
|
|
246
|
+
reason: Optional[str] = None
|
|
247
|
+
# MUST when level != "OK": notes/findings using mandatory (MUST) or advisory (SHOULD) language; < 400 words
|
|
248
|
+
details: Optional[str] = None
|
|
249
|
+
# OPTIONAL: possible fixes, only when directly inferrable from the finding without further analysis; < 200 words
|
|
250
|
+
fix: Optional[str] = None
|
|
251
|
+
|
|
252
|
+
@dataclass
|
|
253
|
+
class JudgeVerdict:
|
|
254
|
+
# MUST: highest severity level across all findings; "OK" only when every finding is "OK"
|
|
255
|
+
verdict: FindingLevel
|
|
256
|
+
# MUST: at least one finding present
|
|
257
|
+
findings: list[JudgeFinding] = field(default_factory=list)
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
Example (for logging, state storage, and inter-node communication):
|
|
261
|
+
|
|
262
|
+
```json
|
|
263
|
+
{
|
|
264
|
+
"verdict": "WARNING",
|
|
265
|
+
"findings": [
|
|
266
|
+
{
|
|
267
|
+
"level": "OK",
|
|
268
|
+
"title": "All required sections present"
|
|
269
|
+
},
|
|
270
|
+
{
|
|
271
|
+
"level": "WARNING",
|
|
272
|
+
"title": "Code coverage below threshold",
|
|
273
|
+
"reason": "Current coverage is 62%, minimum required is 80%.",
|
|
274
|
+
"details": "The following modules have no test coverage: auth.py, payments.py. SHOULD add unit tests for all public methods in these modules.",
|
|
275
|
+
"fix": "Add unit tests for auth.py and payments.py. Run `make test-coverage` to verify the threshold is met."
|
|
276
|
+
}
|
|
277
|
+
]
|
|
278
|
+
}
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
**Routing from judge nodes:**
|
|
282
|
+
|
|
283
|
+
Downstream conditional edges MUST route on `verdict` only:
|
|
284
|
+
|
|
285
|
+
```python
|
|
286
|
+
def route_after_evaluate_quality(state) -> str:
|
|
287
|
+
if state["evaluate_quality_result"].verdict in ("ERROR", "WARNING"):
|
|
288
|
+
return "revise_draft_llm"
|
|
289
|
+
return "publish_step"
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
**Logging:** Log `verdict` and the count of each level as MLflow metrics on the current run per rule `03-observability-and-experiment-tracking`.
|
|
293
|
+
|
|
225
294
|
#### 15-workflow-state-persistence
|
|
226
295
|
|
|
227
296
|
For long-running workflows that may need to be paused and resumed:
|
|
@@ -23,41 +23,47 @@ For when evals are required per AI tier, see [agentme-edr-007](../principles/007
|
|
|
23
23
|
|
|
24
24
|
#### 01-eval-folder-structure
|
|
25
25
|
|
|
26
|
-
|
|
26
|
+
Each named eval is a self-contained unit. Create one directory per eval under `evals/` at the same level as `lib/` and `examples/`:
|
|
27
27
|
|
|
28
28
|
```text
|
|
29
29
|
evals/
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
30
|
+
eval-<name>/
|
|
31
|
+
dataset/ # EDR-024 compliant dataset (README.md, dataset.schema.json, data/)
|
|
32
|
+
eval-<name>.py # evaluation script
|
|
33
|
+
eval-report.md # generated report (overwritten on each run — see rule 03)
|
|
34
|
+
Makefile # eval and run targets
|
|
35
|
+
eval-<name2>/
|
|
36
|
+
...
|
|
34
37
|
```
|
|
35
38
|
|
|
36
|
-
Where `<
|
|
39
|
+
Where `<name>` identifies the specific evaluation scenario (e.g., `eval-basic`, `eval-complex`, `eval-edge-cases`).
|
|
37
40
|
|
|
38
|
-
The
|
|
41
|
+
The `dataset/` subfolder MUST be a valid [agentme-edr-024](024-ml-dataset-structure.md) dataset — it MUST include `README.md` and `dataset.schema.json` at its root. For input/output pairs, use JSONL files per `agentme-edr-024.04-complex-structured-datasets-must-use-jsonl`.
|
|
42
|
+
|
|
43
|
+
Each `evals/eval-<name>/Makefile` MUST define:
|
|
39
44
|
|
|
40
45
|
| Target | Behaviour |
|
|
41
46
|
|---|---|
|
|
42
|
-
| `eval` | Runs
|
|
43
|
-
| `
|
|
47
|
+
| `eval` | Runs the eval with threshold enforcement; exits non-zero on failure (CI-safe) |
|
|
48
|
+
| `run` | Runs the eval without threshold enforcement (exploration / debugging) |
|
|
44
49
|
|
|
45
|
-
The module root Makefile MUST expose a `make eval` target that delegates to `eval` in every `evals
|
|
50
|
+
The module root Makefile MUST expose a `make eval` target that delegates to `eval` in every `evals/eval-<name>/Makefile`:
|
|
46
51
|
|
|
47
52
|
```makefile
|
|
48
53
|
eval:
|
|
49
|
-
$(MAKE) -C evals/
|
|
50
|
-
$(MAKE) -C evals/
|
|
54
|
+
$(MAKE) -C evals/eval-basic eval
|
|
55
|
+
$(MAKE) -C evals/eval-complex eval
|
|
51
56
|
```
|
|
52
57
|
|
|
53
58
|
#### 02-eval-script-requirements
|
|
54
59
|
|
|
55
|
-
Each `
|
|
60
|
+
Each `eval-<name>.py` script MUST:
|
|
56
61
|
|
|
57
|
-
- Load the dataset from `
|
|
62
|
+
- Load the dataset from `dataset/` in the same eval folder, following [agentme-edr-024](024-ml-dataset-structure.md). For input/output pairs, use the JSONL format per `agentme-edr-024.04-complex-structured-datasets-must-use-jsonl`.
|
|
58
63
|
- Run every input through the live component against **real LLM providers** (not mocked responses), to capture model drift.
|
|
59
64
|
- Log per-sample and aggregate metrics to an MLflow experiment that runs **locally** — a remote MLflow server MUST NOT be required.
|
|
60
65
|
- Compare outputs to expected values using project-defined quality thresholds. Thresholds MUST be declared explicitly (e.g., in a Makefile variable or README).
|
|
66
|
+
- Write `eval-report.md` in the same folder per rule `03-eval-report-file`.
|
|
61
67
|
- Exit with a non-zero status when any metric falls below its defined threshold, consistent with [agentme-edr-007](../principles/007-project-quality-standards.md) rule `07-statistical-models-must-have-eval-targets`.
|
|
62
68
|
|
|
63
69
|
**Example:**
|
|
@@ -68,19 +74,109 @@ from my_package.app.workflows.document_review_workflow.graph import graph
|
|
|
68
74
|
|
|
69
75
|
EVAL_MIN_ACCURACY = 0.85
|
|
70
76
|
|
|
71
|
-
with mlflow.start_run():
|
|
77
|
+
with mlflow.start_run() as run:
|
|
72
78
|
results = []
|
|
73
|
-
for sample in load_dataset("
|
|
79
|
+
for sample in load_dataset("dataset/"):
|
|
74
80
|
output = graph.invoke({"document": sample["input"]})
|
|
75
81
|
results.append(output["label"] == sample["expected_label"])
|
|
76
82
|
|
|
77
83
|
accuracy = sum(results) / len(results)
|
|
78
84
|
mlflow.log_metric("accuracy", accuracy)
|
|
79
85
|
|
|
86
|
+
write_eval_report(run, results, thresholds={"accuracy": EVAL_MIN_ACCURACY})
|
|
87
|
+
|
|
80
88
|
if accuracy < EVAL_MIN_ACCURACY:
|
|
81
89
|
raise SystemExit(f"Eval failed: accuracy {accuracy:.2f} < {EVAL_MIN_ACCURACY}")
|
|
82
90
|
```
|
|
83
91
|
|
|
92
|
+
#### 03-eval-report-file
|
|
93
|
+
|
|
94
|
+
Each eval script MUST produce `eval-report.md` in the same `evals/eval-<name>/` folder and overwrite it on every run.
|
|
95
|
+
|
|
96
|
+
**Generation constraint:** The report MUST be produced programmatically, reading raw metric values directly from MLflow. No LLM or generative model may write, summarize, or paraphrase any section of the report, to prevent hallucinated metric values.
|
|
97
|
+
|
|
98
|
+
The report MUST follow this template:
|
|
99
|
+
|
|
100
|
+
```markdown
|
|
101
|
+
# Eval Report: <name>
|
|
102
|
+
|
|
103
|
+
**Date:** <ISO date>
|
|
104
|
+
**Dataset:** dataset/
|
|
105
|
+
**Script:** eval-<name>.py
|
|
106
|
+
**Thresholds:** accuracy ≥ <value>, F1 ≥ <value>
|
|
107
|
+
|
|
108
|
+
## Overall Results
|
|
109
|
+
|
|
110
|
+
| Metric | Value | 95% CI | Threshold | Status |
|
|
111
|
+
|-----------|--------|----------------|-----------|---------|
|
|
112
|
+
| Accuracy | <val> | [<low>, <high>]| ≥ <thr> | ✓/✗ PASS/FAIL |
|
|
113
|
+
| F1 Score | <val> | — | ≥ <thr> | ✓/✗ PASS/FAIL |
|
|
114
|
+
| Precision | <val> | — | — | — |
|
|
115
|
+
| Recall | <val> | — | — | — |
|
|
116
|
+
| Samples | <n> | — | — | — |
|
|
117
|
+
|
|
118
|
+
**Overall: PASS / FAIL**
|
|
119
|
+
|
|
120
|
+
## Per-item Results
|
|
121
|
+
|
|
122
|
+
| ID | Input Summary | Expected | Actual | Correct |
|
|
123
|
+
|-----|---------------|----------|--------|---------|
|
|
124
|
+
| 001 | <summary> | <label> | <label>| ✓ |
|
|
125
|
+
| 002 | <summary> | <label> | <label>| ✗ |
|
|
126
|
+
|
|
127
|
+
## Notes
|
|
128
|
+
|
|
129
|
+
- <observations, failure patterns, MLflow run link>
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
**Confidence interval:** The 95% CI for accuracy MUST be computed using the **Wilson score interval** (preferred over the normal approximation for small $n$). A wide interval signals that the dataset is too small to support confident conclusions and the sample count should be increased.
|
|
133
|
+
|
|
134
|
+
The Wilson score bounds at 95% confidence ($z = 1.96$) are:
|
|
135
|
+
|
|
136
|
+
$$\frac{\hat{p} + \frac{z^2}{2n} \pm z\sqrt{\frac{\hat{p}(1-\hat{p})}{n} + \frac{z^2}{4n^2}}}{1 + \frac{z^2}{n}}$$
|
|
137
|
+
|
|
138
|
+
Where $\hat{p}$ is observed accuracy and $n$ is sample count. Accuracy and F1 are required; precision and recall are recommended.
|
|
139
|
+
|
|
140
|
+
**Filled-in example** (`evals/eval-basic/eval-report.md` for a document review workflow):
|
|
141
|
+
|
|
142
|
+
```markdown
|
|
143
|
+
# Eval Report: eval-basic
|
|
144
|
+
|
|
145
|
+
**Date:** 2026-06-12
|
|
146
|
+
**Dataset:** dataset/
|
|
147
|
+
**Script:** eval-basic.py
|
|
148
|
+
**Thresholds:** accuracy ≥ 0.85, F1 ≥ 0.80
|
|
149
|
+
|
|
150
|
+
## Overall Results
|
|
151
|
+
|
|
152
|
+
| Metric | Value | 95% CI | Threshold | Status |
|
|
153
|
+
|-----------|-------|--------------|-----------|-------------|
|
|
154
|
+
| Accuracy | 0.88 | [0.69, 0.97] | ≥ 0.85 | ✓ PASS |
|
|
155
|
+
| F1 Score | 0.86 | — | ≥ 0.80 | ✓ PASS |
|
|
156
|
+
| Precision | 0.89 | — | — | — |
|
|
157
|
+
| Recall | 0.84 | — | — | — |
|
|
158
|
+
| Samples | 25 | — | — | — |
|
|
159
|
+
|
|
160
|
+
**Overall: PASS**
|
|
161
|
+
|
|
162
|
+
> Note: CI [0.69, 0.97] is wide — 25 samples may be insufficient for high confidence. Consider expanding the dataset.
|
|
163
|
+
|
|
164
|
+
## Per-item Results
|
|
165
|
+
|
|
166
|
+
| ID | Input Summary | Expected | Actual | Correct |
|
|
167
|
+
|-----|-------------------------------------|----------|----------|---------|
|
|
168
|
+
| 001 | Contract renewal, 3 pages, standard | approve | approve | ✓ |
|
|
169
|
+
| 002 | NDA with unusual liability clause | escalate | escalate | ✓ |
|
|
170
|
+
| 003 | Vendor invoice, missing PO number | reject | reject | ✓ |
|
|
171
|
+
| 004 | Employment agreement, standard terms| approve | approve | ✓ |
|
|
172
|
+
| 005 | Amendment with redlined IP clause | escalate | approve | ✗ |
|
|
173
|
+
|
|
174
|
+
## Notes
|
|
175
|
+
|
|
176
|
+
- Sample 005 misclassified: redlined IP clause not flagged as escalation trigger. Possible model drift.
|
|
177
|
+
- MLflow run: experiment `eval_basic` — view with `mlflow ui`
|
|
178
|
+
```
|
|
179
|
+
|
|
84
180
|
## References
|
|
85
181
|
|
|
86
182
|
- [agentme-edr-007](../principles/007-project-quality-standards.md) — Project quality standards: when evals are required per AI tier (rule `09-ai-project-testing-requirements`) and statistical model eval targets (rule `07-statistical-models-must-have-eval-targets`)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
---
|
|
2
|
-
name:
|
|
2
|
+
name: 001-create-javascript-project
|
|
3
3
|
description: >
|
|
4
4
|
Scaffolds the initial boilerplate structure for a JavaScript/TypeScript project following
|
|
5
5
|
the standard tooling and layout defined in agentme-edr-003. Activate this skill when the user
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
---
|
|
2
|
-
name:
|
|
2
|
+
name: 003-create-golang-project
|
|
3
3
|
description: >
|
|
4
4
|
Scaffolds the initial boilerplate structure for a Go (Golang) CLI or library project following
|
|
5
5
|
the standard tooling and layout defined in agentme-edr-010. Activate this skill when the user
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
---
|
|
2
|
-
name:
|
|
2
|
+
name: 004-select-relevant-xdrs
|
|
3
3
|
description: >
|
|
4
4
|
Analyzes a client repository, extracts the full agentme XDR set, and excludes the records that do
|
|
5
5
|
not fit the project's structure and workflow needs. Activate this skill when the user asks to
|
|
@@ -15,8 +15,7 @@ compatibility: Node.js 18+
|
|
|
15
15
|
|
|
16
16
|
Installs the full agentme XDR set for a repository through the published CLI, then removes only the
|
|
17
17
|
records that clearly do not fit the target project by passing explicit `--exclude` flags during
|
|
18
|
-
extraction.
|
|
19
|
-
separately instead of using presets to narrow the XDR set.
|
|
18
|
+
extraction.
|
|
20
19
|
|
|
21
20
|
## Instructions
|
|
22
21
|
|
|
@@ -25,7 +24,7 @@ separately instead of using presets to narrow the XDR set.
|
|
|
25
24
|
1. Discover how the current published package exposes extraction. Prefer the package CLI help,
|
|
26
25
|
installed package metadata, and the agentme README so you know how to invoke:
|
|
27
26
|
- the full XDR extraction path with `extract --all`
|
|
28
|
-
- any optional workflow artifacts
|
|
27
|
+
- any optional workflow artifacts
|
|
29
28
|
2. Build an explicit inventory of the shipped agentme XDR files so exclusions can be chosen by
|
|
30
29
|
stable path.
|
|
31
30
|
3. If the runtime environment cannot enumerate the shipped XDRs directly, fall back to the package
|
|
@@ -41,16 +40,13 @@ separately instead of using presets to narrow the XDR set.
|
|
|
41
40
|
- project shape (single package, monorepo, library, application, CLI, service)
|
|
42
41
|
- frameworks and tooling (`next.config.*`, `vite.config.*`, `jest.config.*`, `.mise.toml`,
|
|
43
42
|
`docker-compose.*`, CI workflows, Makefiles)
|
|
44
|
-
- existing agent workflow files (`.xdrs/`, `AGENTS.md`, `.github/agents/`, `.github/prompts
|
|
45
|
-
`.specify/`, `.vscode/settings.json`)
|
|
43
|
+
- existing agent workflow files (`.xdrs/`, `AGENTS.md`, `.github/agents/`, `.github/prompts/`)
|
|
46
44
|
2. Determine whether the repository already has:
|
|
47
45
|
- XDR-driven guidance in `.xdrs/`
|
|
48
|
-
- spec-driven workflow artifacts from speckit
|
|
49
46
|
- local conventions that would make a preset redundant or conflicting
|
|
50
47
|
3. Summarize the project in a short decision note before selecting exclusions:
|
|
51
48
|
- what the project is
|
|
52
49
|
- which languages and frameworks are present
|
|
53
|
-
- whether it appears to want only XDRs or also agent workflow scaffolding such as `speckit`
|
|
54
50
|
4. From that analysis, build a candidate list of XDRs that are obviously out of scope for the
|
|
55
51
|
repository. Only include exclusions when the mismatch is concrete, for example:
|
|
56
52
|
- language-specific XDRs for languages not used in the repository
|
|
@@ -59,7 +55,7 @@ separately instead of using presets to narrow the XDR set.
|
|
|
59
55
|
5. Do not exclude baseline or broadly applicable guidance just because the project is small or
|
|
60
56
|
simple. Exclusions must remove only XDRs that would clearly mislead the repository.
|
|
61
57
|
|
|
62
|
-
### Phase 3: Select exclusions
|
|
58
|
+
### Phase 3: Select exclusions
|
|
63
59
|
|
|
64
60
|
1. Start from the full shipped agentme XDR set as the default installation target.
|
|
65
61
|
2. Reduce that set only by excluding the XDRs that clearly do not fit the repository. Use
|
|
@@ -68,18 +64,10 @@ separately instead of using presets to narrow the XDR set.
|
|
|
68
64
|
- `.xdrs/agentme/edrs/devops/005-monorepo-structure.md` for non-monorepos
|
|
69
65
|
- `.xdrs/agentme/edrs/observability/011-service-health-check-endpoint.md` for projects without
|
|
70
66
|
a long-running service surface
|
|
71
|
-
3.
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
baseline decision framework.
|
|
75
|
-
- Add `speckit` only when the repository already uses speckit artifacts, explicitly asks for
|
|
76
|
-
specification-driven workflow support, or clearly needs the `.github/agents/`, `.github/prompts/`,
|
|
77
|
-
`.specify/`, and related workflow files shipped by that preset.
|
|
78
|
-
5. If the repository does not want agentme XDRs and does not want any optional workflow artifacts,
|
|
79
|
-
stop and explain why instead of forcing an installation.
|
|
80
|
-
6. State the final decision with:
|
|
67
|
+
3. If the repository does not want agentme XDRs, stop and explain why instead of forcing an
|
|
68
|
+
installation.
|
|
69
|
+
4. State the final decision with:
|
|
81
70
|
- whether full XDR extraction will run
|
|
82
|
-
- whether `speckit` will be added
|
|
83
71
|
- the final exclude list with one-line rationale per excluded XDR
|
|
84
72
|
|
|
85
73
|
### Phase 4: Install the selected presets
|
|
@@ -91,28 +79,18 @@ separately instead of using presets to narrow the XDR set.
|
|
|
91
79
|
npx -y agentme extract --output . --all --exclude <xdr-path> --exclude <xdr-path>
|
|
92
80
|
```
|
|
93
81
|
|
|
94
|
-
3. If the
|
|
95
|
-
stays explicit:
|
|
96
|
-
|
|
97
|
-
```sh
|
|
98
|
-
npx -y agentme extract --output . --presets speckit
|
|
99
|
-
```
|
|
100
|
-
|
|
101
|
-
4. If the user wants a pinned dependency instead of one-off extraction, prefer:
|
|
82
|
+
3. If the user wants a pinned dependency instead of one-off extraction, prefer:
|
|
102
83
|
|
|
103
84
|
```sh
|
|
104
85
|
pnpm add -D agentme
|
|
105
86
|
pnpm exec agentme extract --output . --all --exclude <xdr-path> --exclude <xdr-path>
|
|
106
|
-
pnpm exec agentme extract --output . --presets speckit
|
|
107
87
|
```
|
|
108
88
|
|
|
109
|
-
|
|
89
|
+
4. After extraction, verify that the expected XDR files now exist:
|
|
110
90
|
- `.xdrs/index.md`, `.xdrs/agentme/`, `AGENTS.md`
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
8. Report whether full XDR extraction ran, whether `speckit` was added, which XDRs were excluded
|
|
115
|
-
with `--exclude`, and the paths that were added or updated.
|
|
91
|
+
5. Also verify that each excluded XDR path is absent from the extracted output.
|
|
92
|
+
6. Report whether full XDR extraction ran, which XDRs were excluded with `--exclude`, and the
|
|
93
|
+
paths that were added or updated.
|
|
116
94
|
|
|
117
95
|
### Phase 5: Handle conflicts and repeat runs
|
|
118
96
|
|
|
@@ -130,28 +108,24 @@ separately instead of using presets to narrow the XDR set.
|
|
|
130
108
|
|
|
131
109
|
Input: "Install the right agentme XDR presets for this Node.js library"
|
|
132
110
|
- Inventory the shipped agentme XDR files
|
|
133
|
-
- Analyze the repository and detect a JavaScript library with Makefiles
|
|
111
|
+
- Analyze the repository and detect a JavaScript library with Makefiles
|
|
134
112
|
- Exclude `.xdrs/agentme/edrs/application/010-golang-project-tooling.md` and `.xdrs/agentme/edrs/observability/011-service-health-check-endpoint.md`
|
|
135
113
|
- Run `npx -y agentme extract --output . --all --exclude .xdrs/agentme/edrs/application/010-golang-project-tooling.md --exclude .xdrs/agentme/edrs/observability/011-service-health-check-endpoint.md`
|
|
136
114
|
|
|
137
|
-
Input: "Set up agentme for this repo
|
|
115
|
+
Input: "Set up agentme for this repo"
|
|
138
116
|
- Inventory the shipped agentme XDR files
|
|
139
|
-
- Detect `.
|
|
140
|
-
- Choose full XDR extraction
|
|
117
|
+
- Detect `.xdrs/`, `.github/agents/`, and `.github/prompts/`
|
|
118
|
+
- Choose full XDR extraction
|
|
141
119
|
- Exclude only the XDRs that are concretely irrelevant for the repository shape
|
|
142
120
|
- Run `npx -y agentme extract --output . --all --exclude <irrelevant-xdr-path>`
|
|
143
|
-
- Run `npx -y agentme extract --output . --presets speckit`
|
|
144
121
|
|
|
145
122
|
## Edge Cases
|
|
146
123
|
|
|
147
124
|
- If the CLI cannot directly list the shipped XDRs, use the package's published metadata or README
|
|
148
125
|
as a fallback instead of failing immediately.
|
|
149
|
-
- If the repository already has `.xdrs
|
|
150
|
-
set unless the user requests the workflow files.
|
|
126
|
+
- If the repository already has `.xdrs/`, install or update the XDR set.
|
|
151
127
|
- If a candidate exclusion is debatable, keep the XDR. The skill should exclude only records that
|
|
152
128
|
clearly do not make sense for the project.
|
|
153
|
-
- If the user asks specifically for XDRs, do not add `speckit` unless they also request that
|
|
154
|
-
workflow.
|
|
155
129
|
- If preset extraction would overwrite locally customized agent files, warn the user and describe the
|
|
156
130
|
likely merge points.
|
|
157
131
|
- If the repository is a spike or intentionally minimal experiment, still prefer the smallest preset
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
---
|
|
2
|
-
name:
|
|
2
|
+
name: 005-create-python-project
|
|
3
3
|
description: >
|
|
4
4
|
Scaffolds the initial boilerplate structure for a Python project following the standard tooling
|
|
5
5
|
and layout defined in agentme-edr-014. Activate this skill when the user asks to create,
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
---
|
|
2
|
-
name:
|
|
2
|
+
name: 002-monorepo-setup
|
|
3
3
|
description: >
|
|
4
4
|
Step-by-step instructions for setting up and scaffolding a new monorepo following the standard
|
|
5
5
|
layout, naming conventions, Makefiles, Mise tooling, and README requirements defined in
|
package/.xdrs/index.md
CHANGED
|
@@ -25,4 +25,6 @@ Opiniated set of decisions and skills for common development tasks
|
|
|
25
25
|
|
|
26
26
|
### _local (reserved)
|
|
27
27
|
|
|
28
|
-
|
|
28
|
+
_local scope is the default scope for new xdrs and might override other scope decisions. These decisions are local and are not supposed to be shared in other contexts.
|
|
29
|
+
|
|
30
|
+
Read _local scope index at `_local/index.md` when it exists.
|
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# agentme
|
|
2
2
|
|
|
3
|
-
Curated distribution package of XDRs and
|
|
3
|
+
Curated distribution package of XDRs and AI agent workflow files for AI-assisted software development.
|
|
4
4
|
|
|
5
5
|
This collection is being updated as we develop applications and feel the need for new instructions and skills to help with AI agents.
|
|
6
6
|
|
|
@@ -8,7 +8,7 @@ For guidance on turning recurring delivery friction into reusable decision recor
|
|
|
8
8
|
|
|
9
9
|
## Getting Started
|
|
10
10
|
|
|
11
|
-
This will extract all the features of agentme (skills, github configurations,
|
|
11
|
+
This will extract all the features of agentme (skills, github configurations, xdrs collection):
|
|
12
12
|
|
|
13
13
|
```sh
|
|
14
14
|
npx agentme
|
|
@@ -27,7 +27,6 @@ mise exec -- pnpm exec agentme check --output . --presets basic
|
|
|
27
27
|
agentme is published as an npm package and consumed through `filedist`-based extraction. It ships a curated set of reusable artifacts for other repositories:
|
|
28
28
|
|
|
29
29
|
- XDRs in `.xdrs/agentme/` for engineering, architecture, testing, tooling, and CI/CD standards.
|
|
30
|
-
- speckit agent files in `.github/`, `.specify/`, and `.vscode/` for specification-driven AI development workflows.
|
|
31
30
|
|
|
32
31
|
The package is intentionally static: consumers install it as a development dependency, extract files into their own repository, and commit the generated output.
|
|
33
32
|
|
|
@@ -36,7 +35,6 @@ The package is intentionally static: consumers install it as a development depen
|
|
|
36
35
|
| Preset | Contents |
|
|
37
36
|
| --- | --- |
|
|
38
37
|
| `basic` | `xdrs-core` baseline ADRs, `AGENTS.md`, and agentme XDRs |
|
|
39
|
-
| `speckit` | speckit agents, prompts, templates, scripts, memory files, and VS Code settings |
|
|
40
38
|
| no preset | all shipped artifacts combined |
|
|
41
39
|
|
|
42
40
|
Typical consumer workflow:
|
|
@@ -68,16 +66,6 @@ This is useful when you want feature work to leave behind maintainable documenta
|
|
|
68
66
|
- product requirements and business constraints;
|
|
69
67
|
- business decisions that explain why the feature behaves the way it does.
|
|
70
68
|
|
|
71
|
-
### Keep project documentation current through the speckit workflow
|
|
72
|
-
|
|
73
|
-
Use the `speckit` distribution when you want the delivery workflow itself to instruct agents to maintain ADRs, EDRs, and BDRs as implementation evolves. In this model, feature development and project documentation happen together: major technical and business decisions are continuously written back into XDRs instead of being deferred to a separate documentation pass.
|
|
74
|
-
|
|
75
|
-
This is useful when you want to:
|
|
76
|
-
|
|
77
|
-
- keep the project decision log up to date as new features are specified and implemented;
|
|
78
|
-
- have architecture, engineering, and business documentation generated in a consistent XDR format;
|
|
79
|
-
- reuse the same XDR-based approach across other repositories that consume agentme.
|
|
80
|
-
|
|
81
69
|
## Development
|
|
82
70
|
|
|
83
71
|
Install [Mise](https://mise.jdx.dev/getting-started.html), then bootstrap the repository through the root `Makefile`:
|
|
@@ -110,8 +98,7 @@ What these targets do:
|
|
|
110
98
|
├── bin/ CLI entrypoint delegated to filedist
|
|
111
99
|
├── dist/ Generated npm package tarballs
|
|
112
100
|
├── examples/ Runnable verification of consumer extraction behavior
|
|
113
|
-
├── .github/ Shipped
|
|
114
|
-
├── .specify/ Shipped speckit memory, scripts, and templates
|
|
101
|
+
├── .github/ Shipped agent and prompt files
|
|
115
102
|
└── .xdrs/ Shipped XDRs plus local project-only decision records
|
|
116
103
|
```
|
|
117
104
|
|
package/package.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agentme",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.20.0",
|
|
4
4
|
"description": "",
|
|
5
5
|
"dependencies": {
|
|
6
|
-
"filedist": "^0.
|
|
6
|
+
"filedist": "^0.36.0"
|
|
7
7
|
},
|
|
8
8
|
"bin": "bin/filedist.js",
|
|
9
9
|
"files": [
|
|
@@ -11,17 +11,13 @@
|
|
|
11
11
|
"bin/filedist.js",
|
|
12
12
|
".filedist-package.yml",
|
|
13
13
|
".xdrs/agentme/**",
|
|
14
|
-
".xdrs/index.md"
|
|
15
|
-
".github/agents/speckit*",
|
|
16
|
-
".github/prompts/speckit*",
|
|
17
|
-
".specify/**",
|
|
18
|
-
".vscode/settings.json"
|
|
14
|
+
".xdrs/index.md"
|
|
19
15
|
],
|
|
20
16
|
"repository": {
|
|
21
17
|
"type": "git",
|
|
22
18
|
"url": "https://github.com/flaviostutz/agentme.git"
|
|
23
19
|
},
|
|
24
20
|
"devDependencies": {
|
|
25
|
-
"xdrs-core": "^0.
|
|
21
|
+
"xdrs-core": "^0.30.1"
|
|
26
22
|
}
|
|
27
23
|
}
|