skopus 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skopus-0.1.1/.github/workflows/bench-smoke.yml +52 -0
- skopus-0.1.1/.github/workflows/test.yml +44 -0
- skopus-0.1.1/.gitignore +41 -0
- skopus-0.1.1/CHANGELOG.md +170 -0
- skopus-0.1.1/LICENSE +21 -0
- skopus-0.1.1/Makefile +31 -0
- skopus-0.1.1/PKG-INFO +145 -0
- skopus-0.1.1/README.md +102 -0
- skopus-0.1.1/bench/__init__.py +29 -0
- skopus-0.1.1/bench/config.py +91 -0
- skopus-0.1.1/bench/context.py +96 -0
- skopus-0.1.1/bench/correction_persistence/__init__.py +15 -0
- skopus-0.1.1/bench/correction_persistence/dataset.json +287 -0
- skopus-0.1.1/bench/correction_persistence/runner.py +138 -0
- skopus-0.1.1/bench/driver.py +180 -0
- skopus-0.1.1/bench/harness.py +169 -0
- skopus-0.1.1/bench/locomo/__init__.py +11 -0
- skopus-0.1.1/bench/longmemeval/__init__.py +14 -0
- skopus-0.1.1/bench/msc/__init__.py +10 -0
- skopus-0.1.1/bench/ruler/__init__.py +12 -0
- skopus-0.1.1/docs/DESIGN.md +264 -0
- skopus-0.1.1/pyproject.toml +103 -0
- skopus-0.1.1/skopus/__init__.py +6 -0
- skopus-0.1.1/skopus/__main__.py +6 -0
- skopus-0.1.1/skopus/adapters/__init__.py +45 -0
- skopus-0.1.1/skopus/adapters/aider.py +36 -0
- skopus-0.1.1/skopus/adapters/base.py +242 -0
- skopus-0.1.1/skopus/adapters/claude_code.py +159 -0
- skopus-0.1.1/skopus/adapters/codex.py +16 -0
- skopus-0.1.1/skopus/adapters/copilot.py +21 -0
- skopus-0.1.1/skopus/adapters/cursor.py +117 -0
- skopus-0.1.1/skopus/adapters/gemini.py +16 -0
- skopus-0.1.1/skopus/cli.py +607 -0
- skopus-0.1.1/skopus/evolve.py +281 -0
- skopus-0.1.1/skopus/graphify_bridge.py +297 -0
- skopus-0.1.1/skopus/renderer.py +288 -0
- skopus-0.1.1/skopus/templates/charter/CLAUDE.md.j2 +88 -0
- skopus-0.1.1/skopus/templates/charter/user_profile.md.j2 +60 -0
- skopus-0.1.1/skopus/templates/charter/workflow_partnership.md.j2 +201 -0
- skopus-0.1.1/skopus/templates/memory/MEMORY.md.j2 +46 -0
- skopus-0.1.1/skopus/templates/memory/feedback_seed_blank.md +18 -0
- skopus-0.1.1/skopus/templates/memory/feedback_seed_bug_hunter.md +33 -0
- skopus-0.1.1/skopus/templates/memory/feedback_seed_founder.md +32 -0
- skopus-0.1.1/skopus/templates/memory/feedback_seed_research.md +31 -0
- skopus-0.1.1/skopus/templates/memory/feedback_seed_solo_dev.md +33 -0
- skopus-0.1.1/skopus/templates/memory/feedback_seed_team_lead.md +31 -0
- skopus-0.1.1/skopus/templates/vault/.claude/commands/compile.md +48 -0
- skopus-0.1.1/skopus/templates/vault/.claude/commands/ingest.md +39 -0
- skopus-0.1.1/skopus/templates/vault/.claude/commands/lint.md +54 -0
- skopus-0.1.1/skopus/templates/vault/.claude/commands/query.md +37 -0
- skopus-0.1.1/skopus/templates/vault/.claude/commands/wiki.md +37 -0
- skopus-0.1.1/skopus/templates/vault/CLAUDE.md.j2 +103 -0
- skopus-0.1.1/skopus/templates/vault/log.md.j2 +18 -0
- skopus-0.1.1/skopus/templates/vault/wiki/index.md.j2 +59 -0
- skopus-0.1.1/skopus/wizard.py +229 -0
- skopus-0.1.1/tests/__init__.py +0 -0
- skopus-0.1.1/tests/test_adapter_path_resolution.py +112 -0
- skopus-0.1.1/tests/test_adapters.py +154 -0
- skopus-0.1.1/tests/test_bench_cp.py +107 -0
- skopus-0.1.1/tests/test_bench_harness.py +176 -0
- skopus-0.1.1/tests/test_cli.py +86 -0
- skopus-0.1.1/tests/test_evolve.py +158 -0
- skopus-0.1.1/tests/test_graphify_bridge.py +195 -0
- skopus-0.1.1/tests/test_multi_adapters.py +240 -0
- skopus-0.1.1/tests/test_renderer.py +169 -0
- skopus-0.1.1/tests/test_wizard.py +37 -0
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
name: bench-smoke
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
pull_request:
|
|
5
|
+
branches: [main]
|
|
6
|
+
|
|
7
|
+
# Runs the Correction-Persistence benchmark with the mock driver on every PR.
|
|
8
|
+
# The mock driver is deterministic and free (no API cost), so this is safe
|
|
9
|
+
# to run on every PR as a smoke test. Real benchmark runs (with an actual
|
|
10
|
+
# LLM driver) happen manually or on a release cadence.
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
bench-smoke:
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
|
|
19
|
+
- name: Set up Python
|
|
20
|
+
uses: actions/setup-python@v5
|
|
21
|
+
with:
|
|
22
|
+
python-version: "3.12"
|
|
23
|
+
cache: pip
|
|
24
|
+
|
|
25
|
+
- name: Install skopus
|
|
26
|
+
run: |
|
|
27
|
+
python -m pip install --upgrade pip
|
|
28
|
+
pip install -e ".[dev]"
|
|
29
|
+
|
|
30
|
+
- name: Initialize skopus (non-interactive)
|
|
31
|
+
run: |
|
|
32
|
+
mkdir -p /tmp/smoke-home
|
|
33
|
+
HOME=/tmp/smoke-home skopus init \
|
|
34
|
+
--non-interactive \
|
|
35
|
+
--name "CI" \
|
|
36
|
+
--profile bug-hunter \
|
|
37
|
+
--vault /tmp/smoke-home/Vault
|
|
38
|
+
|
|
39
|
+
- name: Run CP benchmark (mock driver, full ablation)
|
|
40
|
+
run: |
|
|
41
|
+
HOME=/tmp/smoke-home skopus bench run cp \
|
|
42
|
+
--ablation \
|
|
43
|
+
--driver mock \
|
|
44
|
+
--limit 5 \
|
|
45
|
+
--no-save
|
|
46
|
+
|
|
47
|
+
- name: Run single CP with mock (sanity)
|
|
48
|
+
run: |
|
|
49
|
+
HOME=/tmp/smoke-home skopus bench run cp \
|
|
50
|
+
--driver mock \
|
|
51
|
+
--limit 3 \
|
|
52
|
+
--no-save
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
name: test
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
strategy:
|
|
13
|
+
fail-fast: false
|
|
14
|
+
matrix:
|
|
15
|
+
python-version: ["3.10", "3.11", "3.12"]
|
|
16
|
+
|
|
17
|
+
steps:
|
|
18
|
+
- uses: actions/checkout@v4
|
|
19
|
+
|
|
20
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
21
|
+
uses: actions/setup-python@v5
|
|
22
|
+
with:
|
|
23
|
+
python-version: ${{ matrix.python-version }}
|
|
24
|
+
cache: pip
|
|
25
|
+
|
|
26
|
+
- name: Install dependencies (skopus + dev + graph-full)
|
|
27
|
+
run: |
|
|
28
|
+
python -m pip install --upgrade pip
|
|
29
|
+
pip install -e ".[dev]"
|
|
30
|
+
|
|
31
|
+
- name: Ruff lint
|
|
32
|
+
run: ruff check skopus tests bench
|
|
33
|
+
|
|
34
|
+
- name: Ruff format check
|
|
35
|
+
run: ruff format --check skopus tests bench
|
|
36
|
+
|
|
37
|
+
- name: Run pytest
|
|
38
|
+
run: pytest tests/ -v --tb=short
|
|
39
|
+
|
|
40
|
+
- name: Smoke test CLI
|
|
41
|
+
run: |
|
|
42
|
+
skopus version
|
|
43
|
+
skopus --help
|
|
44
|
+
skopus bench list
|
skopus-0.1.1/.gitignore
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
.Python
|
|
7
|
+
build/
|
|
8
|
+
dist/
|
|
9
|
+
*.egg-info/
|
|
10
|
+
*.egg
|
|
11
|
+
.eggs/
|
|
12
|
+
.pytest_cache/
|
|
13
|
+
.mypy_cache/
|
|
14
|
+
.ruff_cache/
|
|
15
|
+
.coverage
|
|
16
|
+
htmlcov/
|
|
17
|
+
.tox/
|
|
18
|
+
|
|
19
|
+
# venv
|
|
20
|
+
.venv/
|
|
21
|
+
venv/
|
|
22
|
+
env/
|
|
23
|
+
ENV/
|
|
24
|
+
|
|
25
|
+
# IDE
|
|
26
|
+
.vscode/
|
|
27
|
+
.idea/
|
|
28
|
+
*.swp
|
|
29
|
+
*.swo
|
|
30
|
+
*~
|
|
31
|
+
|
|
32
|
+
# OS
|
|
33
|
+
.DS_Store
|
|
34
|
+
Thumbs.db
|
|
35
|
+
|
|
36
|
+
# Benchmark outputs
|
|
37
|
+
bench/results/
|
|
38
|
+
bench/datasets/raw/
|
|
39
|
+
|
|
40
|
+
# Skopus runtime artifacts (when testing)
|
|
41
|
+
.skopus-test/
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to Skopus are documented here. Format follows
|
|
4
|
+
[Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and the project
|
|
5
|
+
uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
6
|
+
|
|
7
|
+
## [0.1.1] — 2026-04-10
|
|
8
|
+
|
|
9
|
+
Bug-fix release: `/graphify` was not actually invokable as a slash command
|
|
10
|
+
in Claude Code after `skopus init`.
|
|
11
|
+
|
|
12
|
+
### Fixed
|
|
13
|
+
- **`/graphify` slash command missing in Claude Code** — graphify has two
|
|
14
|
+
install commands that do different things: `graphify install` (global,
|
|
15
|
+
one-time, copies the skill file to `~/.claude/skills/graphify/SKILL.md`)
|
|
16
|
+
and `graphify claude install` (per-project, writes CLAUDE.md block +
|
|
17
|
+
PreToolUse hook). Skopus was only calling the per-project one, which
|
|
18
|
+
meant `/graphify` was never a real slash command in Claude Code — only
|
|
19
|
+
the hook and CLAUDE.md block were installed. Fix: skopus now calls
|
|
20
|
+
`graphify install` (the global one-time step) before
|
|
21
|
+
`graphify claude install` during `skopus init`.
|
|
22
|
+
|
|
23
|
+
### Added
|
|
24
|
+
- `skopus.graphify_bridge.ensure_graphify_skill_installed()` — idempotent
|
|
25
|
+
helper that copies the graphify skill file if missing. Safe to call on
|
|
26
|
+
every `skopus init`. Short-circuits when the skill file already exists.
|
|
27
|
+
- Two new tests covering the short-circuit and bool-return contract.
|
|
28
|
+
|
|
29
|
+
### Testing
|
|
30
|
+
- **97 tests passing**, 1 skipped (up from 95 in v0.1.0)
|
|
31
|
+
|
|
32
|
+
---
|
|
33
|
+
|
|
34
|
+
## [0.1.0] — 2026-04-10
|
|
35
|
+
|
|
36
|
+
The **benchmark release**. Skopus is now measurable, reproducible, and
|
|
37
|
+
compound-over-sessions with a full benchmark harness and a novel
|
|
38
|
+
Correction-Persistence dataset.
|
|
39
|
+
|
|
40
|
+
### Added
|
|
41
|
+
- `bench/` package with a unified benchmark harness
|
|
42
|
+
- **Correction-Persistence** benchmark (novel, skopus's research contribution):
|
|
43
|
+
- 20 scenarios at v0.1.0-alpha (target: 100+ for v1.0)
|
|
44
|
+
- Domains: code, prose, reasoning, tool-use
|
|
45
|
+
- Runner, scorer, and dataset loader
|
|
46
|
+
- `LensConfig` — 5-config ablation framework:
|
|
47
|
+
- `vanilla` (no skopus)
|
|
48
|
+
- `charter` (+charter only)
|
|
49
|
+
- `charter+memory` (+charter +feedback memory)
|
|
50
|
+
- `charter+memory+vault` (+decisions and learnings)
|
|
51
|
+
- `full` (+graph via graphify MCP)
|
|
52
|
+
- `LLMDriver` abstraction with two implementations:
|
|
53
|
+
- `MockDriver` — deterministic responses for testing without API cost
|
|
54
|
+
- `AnthropicDriver` — real Claude API calls (requires `ANTHROPIC_API_KEY`)
|
|
55
|
+
- `bench.harness` — `run_benchmark`, `run_all`, `run_ablation`, `save_report`,
|
|
56
|
+
`format_markdown_report`
|
|
57
|
+
- `bench.context.build_system_prompt` — converts a `LensConfig` to a system
|
|
58
|
+
prompt that encodes the corresponding amount of skopus context
|
|
59
|
+
- Stub wrappers for `LongMemEval`, `LoCoMo`, `MSC`, and `RULER` with
|
|
60
|
+
integration paths documented (full runners planned for v0.1.1)
|
|
61
|
+
- New CLI subcommands under `skopus bench`:
|
|
62
|
+
- `skopus bench list` — show available benchmarks
|
|
63
|
+
- `skopus bench run <name> [--lens | --ablation] [--driver] [--limit]`
|
|
64
|
+
- JSON results auto-saved to `bench/results/`
|
|
65
|
+
- GitHub Actions workflow `.github/workflows/test.yml` — lint + typecheck +
|
|
66
|
+
full pytest on every push and PR
|
|
67
|
+
- CHANGELOG.md
|
|
68
|
+
|
|
69
|
+
### Changed
|
|
70
|
+
- Package layout: `bench/` now shipped as a sibling package inside the wheel
|
|
71
|
+
- `skopus init` auto-tracks the current directory as a linked project when
|
|
72
|
+
an adapter is wired (previously only `skopus link` did)
|
|
73
|
+
|
|
74
|
+
### Testing
|
|
75
|
+
- **95 tests passing**, 1 skipped (up from 72 in v0.0.3)
|
|
76
|
+
- New test files:
|
|
77
|
+
- `tests/test_bench_cp.py` — Correction-Persistence coverage
|
|
78
|
+
- `tests/test_bench_harness.py` — harness dispatch, ablation, reports
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
## [0.0.3] — 2026-04-10
|
|
83
|
+
|
|
84
|
+
Multi-agent expansion — five new platform adapters plus the session-end
|
|
85
|
+
reflection loop.
|
|
86
|
+
|
|
87
|
+
### Added
|
|
88
|
+
- `MarkdownAdapter` DRY base class in `skopus.adapters.base`
|
|
89
|
+
- Five new platform adapters:
|
|
90
|
+
- `CursorAdapter` — `.cursor/rules/skopus.mdc` with `alwaysApply: true`
|
|
91
|
+
- `CodexAdapter` — `AGENTS.md`
|
|
92
|
+
- `AiderAdapter` — `AGENTS.md` with custom detect for `.aider.conf.yml`
|
|
93
|
+
- `GeminiCliAdapter` — `GEMINI.md`
|
|
94
|
+
- `CopilotCliAdapter` — `AGENTS.md` with gh/copilot binary detection
|
|
95
|
+
- `skopus.evolve` — session-end reflection loop:
|
|
96
|
+
- Interactive 3-question prompt (validated calls, drifts, rules)
|
|
97
|
+
- Writes feedback files to `~/.skopus/memory/feedback/YYYY-MM-DD-<slug>.md`
|
|
98
|
+
- Appends drifts to `workflow_partnership.md §7`, wins to `§8`
|
|
99
|
+
- Auto-commits to `~/.skopus/.git`
|
|
100
|
+
- Programmatic mode for testing: `run_evolve(entries=[...])`
|
|
101
|
+
- `skopus charter evolve` CLI command
|
|
102
|
+
- `SKOPUS_SECTION_START/END` markers and `build_skopus_block()` moved from
|
|
103
|
+
`claude_code.py` to `base.py` (shared by all adapters)
|
|
104
|
+
- 22 new tests covering the multi-adapter pattern + evolve
|
|
105
|
+
|
|
106
|
+
### Changed
|
|
107
|
+
- `claude_code.py` refactored to import shared helpers from `base.py`
|
|
108
|
+
- Registry includes aliases (`gemini` → `gemini-cli`, `copilot` → `copilot-cli`)
|
|
109
|
+
|
|
110
|
+
---
|
|
111
|
+
|
|
112
|
+
## [0.0.2] — 2026-04-10
|
|
113
|
+
|
|
114
|
+
Graphify integration + non-destructive init + `.claude/CLAUDE.md` preference.
|
|
115
|
+
|
|
116
|
+
### Added
|
|
117
|
+
- **Graphify as a hard dependency** (`graphifyy>=0.1`) — the fourth lens
|
|
118
|
+
ships with every install
|
|
119
|
+
- `skopus.graphify_bridge` — installation helpers:
|
|
120
|
+
- `install_graphify_for_claude()` — runs `graphify claude install` +
|
|
121
|
+
`graphify hook install` in a project
|
|
122
|
+
- `_consolidate_graphify_block()` — moves graphify's block from root
|
|
123
|
+
`CLAUDE.md` into `.claude/CLAUDE.md` (skopus convention)
|
|
124
|
+
- `first_build_hint()` — reads scope hint for the first-build reminder
|
|
125
|
+
- `skopus init` auto-wires graphify into linked projects
|
|
126
|
+
- `skopus doctor` reports per-project graph status
|
|
127
|
+
- `--force` flag on `skopus init` for explicit overwrite
|
|
128
|
+
- `MaterializeReport` return type with `written` and `skipped` lists
|
|
129
|
+
- `claude_md_path()` helper preferring `.claude/CLAUDE.md` over root
|
|
130
|
+
- 11 new tests covering graphify bridge, path resolution, non-destructive merge
|
|
131
|
+
|
|
132
|
+
### Changed
|
|
133
|
+
- `renderer.materialize()` is non-destructive by default — existing files are
|
|
134
|
+
preserved unless `force=True` is passed
|
|
135
|
+
- `claude_code.adapter.install()` now prefers `<project>/.claude/CLAUDE.md`
|
|
136
|
+
- `adapters.lock` now tracks the vault location explicitly
|
|
137
|
+
|
|
138
|
+
### Fixed
|
|
139
|
+
- `git commit` during init now uses inline identity flags so it works
|
|
140
|
+
without a configured global git user/email
|
|
141
|
+
- Initial branch set to `main` (instead of `master`)
|
|
142
|
+
- `skopus init` + `skopus link` both update `projects.json` via
|
|
143
|
+
the shared `_track_linked_project()` helper
|
|
144
|
+
|
|
145
|
+
---
|
|
146
|
+
|
|
147
|
+
## [0.0.1] — 2026-04-10
|
|
148
|
+
|
|
149
|
+
Initial bootstrap. The four-lens model as runnable code.
|
|
150
|
+
|
|
151
|
+
### Added
|
|
152
|
+
- Core Python package scaffold (`skopus/`, `pyproject.toml`, `Makefile`,
|
|
153
|
+
`docs/DESIGN.md`, MIT `LICENSE`, `README.md`)
|
|
154
|
+
- Bundled Jinja2 markdown templates for charter, memory, and vault
|
|
155
|
+
- `skopus init` — interactive wizard (10 questions, `questionary`)
|
|
156
|
+
- `skopus link` / `skopus unlink` — per-project adapter wiring
|
|
157
|
+
- `skopus doctor` — health check
|
|
158
|
+
- `Adapter` ABC + `ClaudeCodeAdapter` reference implementation
|
|
159
|
+
- 24 initial tests
|
|
160
|
+
|
|
161
|
+
### Design decisions locked
|
|
162
|
+
- Four-lens mental model: charter, memory, vault, graph
|
|
163
|
+
- Monorepo with Python package + bench subpackage
|
|
164
|
+
- Non-destructive-by-default init
|
|
165
|
+
- Multi-agent from day one
|
|
166
|
+
- Benchmarks as first-class CI-gated deliverable
|
|
167
|
+
- Graphify integration as the structural knowledge layer
|
|
168
|
+
- Personalization via interactive wizard + `/charter-evolve` loop
|
|
169
|
+
|
|
170
|
+
See [`docs/DESIGN.md`](docs/DESIGN.md) for the full spec.
|
skopus-0.1.1/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Carlos Valentin and Skopus contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
skopus-0.1.1/Makefile
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
.PHONY: install dev test lint typecheck fmt clean build
|
|
2
|
+
|
|
3
|
+
install:
|
|
4
|
+
pip install -e .
|
|
5
|
+
|
|
6
|
+
dev:
|
|
7
|
+
pip install -e ".[dev]"
|
|
8
|
+
|
|
9
|
+
test:
|
|
10
|
+
pytest
|
|
11
|
+
|
|
12
|
+
lint:
|
|
13
|
+
ruff check skopus tests
|
|
14
|
+
|
|
15
|
+
fmt:
|
|
16
|
+
ruff format skopus tests
|
|
17
|
+
|
|
18
|
+
typecheck:
|
|
19
|
+
mypy skopus
|
|
20
|
+
|
|
21
|
+
ci: lint typecheck test
|
|
22
|
+
|
|
23
|
+
clean:
|
|
24
|
+
rm -rf build dist *.egg-info
|
|
25
|
+
find . -type d -name __pycache__ -exec rm -rf {} +
|
|
26
|
+
find . -type d -name .pytest_cache -exec rm -rf {} +
|
|
27
|
+
find . -type d -name .mypy_cache -exec rm -rf {} +
|
|
28
|
+
find . -type d -name .ruff_cache -exec rm -rf {} +
|
|
29
|
+
|
|
30
|
+
build:
|
|
31
|
+
python -m build
|
skopus-0.1.1/PKG-INFO
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: skopus
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Persistent four-lens context for AI coding assistants — charter, memory, vault, graph. One install, multi-agent, benchmark-driven.
|
|
5
|
+
Project-URL: Homepage, https://github.com/cvalentinhorizontes/skopus
|
|
6
|
+
Project-URL: Documentation, https://github.com/cvalentinhorizontes/skopus/blob/main/docs/DESIGN.md
|
|
7
|
+
Project-URL: Issues, https://github.com/cvalentinhorizontes/skopus/issues
|
|
8
|
+
Project-URL: Repository, https://github.com/cvalentinhorizontes/skopus
|
|
9
|
+
Author: Carlos Valentin
|
|
10
|
+
License: MIT
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: agent,ai,claude-code,codex,cursor,karpathy,knowledge-graph,llm-wiki,memory
|
|
13
|
+
Classifier: Development Status :: 3 - Alpha
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Operating System :: MacOS
|
|
17
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
23
|
+
Classifier: Topic :: Software Development
|
|
24
|
+
Classifier: Topic :: Software Development :: Documentation
|
|
25
|
+
Requires-Python: >=3.10
|
|
26
|
+
Requires-Dist: graphifyy[leiden,mcp,pdf,watch]>=0.1
|
|
27
|
+
Requires-Dist: jinja2>=3.1
|
|
28
|
+
Requires-Dist: pyyaml>=6.0
|
|
29
|
+
Requires-Dist: questionary>=2.0
|
|
30
|
+
Requires-Dist: rich>=13.7
|
|
31
|
+
Requires-Dist: typer>=0.12
|
|
32
|
+
Provides-Extra: bench
|
|
33
|
+
Requires-Dist: datasets>=2.14; extra == 'bench'
|
|
34
|
+
Requires-Dist: numpy>=1.26; extra == 'bench'
|
|
35
|
+
Provides-Extra: dev
|
|
36
|
+
Requires-Dist: mypy>=1.10; extra == 'dev'
|
|
37
|
+
Requires-Dist: pytest-mock>=3.12; extra == 'dev'
|
|
38
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
39
|
+
Requires-Dist: ruff>=0.5; extra == 'dev'
|
|
40
|
+
Provides-Extra: video
|
|
41
|
+
Requires-Dist: graphifyy[video]>=0.1; extra == 'video'
|
|
42
|
+
Description-Content-Type: text/markdown
|
|
43
|
+
|
|
44
|
+
# Skopus
|
|
45
|
+
|
|
46
|
+
**Persistent four-lens context for AI coding assistants.** Charter, memory, vault, graph — one install, multi-agent, benchmark-driven.
|
|
47
|
+
|
|
48
|
+
> σκοπός *(skopos)* — Greek for *watcher*, *lookout*, *target*, *purpose*. The root of *scope*, *telescope*, *episcopal*. A system that gives agents durable scope across sessions.
|
|
49
|
+
|
|
50
|
+
---
|
|
51
|
+
|
|
52
|
+
## The problem
|
|
53
|
+
|
|
54
|
+
Every AI coding assistant — Claude Code, Cursor, Codex, Aider, Gemini CLI, Copilot CLI — loses context at the end of every session. You teach them your preferences, they forget. You correct them, they repeat the mistake next week. Your hard-won lessons evaporate into chat history.
|
|
55
|
+
|
|
56
|
+
The few persistent-memory systems that exist (claude-mem, Mem0, MemPalace, OpenAI memory) record conversations but don't encode *how you work* — the non-negotiables, the drift log, the anti-rationalization rules, the "do this, not that" patterns that actually make a collaboration compound. Meanwhile, structural knowledge about a codebase gets rediscovered every session via grep because nothing persists the map.
|
|
57
|
+
|
|
58
|
+
The result: agents that are smart per-message but stupid across sessions, and humans who spend half their time re-teaching.
|
|
59
|
+
|
|
60
|
+
## The promise
|
|
61
|
+
|
|
62
|
+
A unified **four-lens context system** any AI coding assistant can load at session start:
|
|
63
|
+
|
|
64
|
+
1. **Charter** — how you work together (non-negotiables, anti-rationalization table, drift log)
|
|
65
|
+
2. **Memory** — what happened before (feedback, corrections, project state)
|
|
66
|
+
3. **Vault** — what you decided and learned (narrative wiki, Karpathy `/raw` pattern)
|
|
67
|
+
4. **Graph** — what the code looks like (via [graphify](https://github.com/safishamsi/graphify))
|
|
68
|
+
|
|
69
|
+
One install. Works with 6+ agents. Ships with a benchmark suite that proves it works.
|
|
70
|
+
|
|
71
|
+
## Quickstart
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
pip install skopus
|
|
75
|
+
skopus init # interactive wizard (10 questions, ~5 min)
|
|
76
|
+
cd my-project && skopus link # wire the current project to your charter + vault
|
|
77
|
+
skopus doctor # health check all four lenses
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## What ships at v0.0.3 (alpha)
|
|
81
|
+
|
|
82
|
+
- ✅ **Charter templates** — high-level `CLAUDE.md`, full `workflow_partnership.md`, `user_profile.md`
|
|
83
|
+
- ✅ **Memory scaffold** — `MEMORY.md` index, feedback/project templates, 6 seed profiles
|
|
84
|
+
- ✅ **Vault scaffold** — Karpathy `raw/wiki/output` layout with `/ingest`, `/compile`, `/query`, `/lint`, `/wiki` slash commands
|
|
85
|
+
- ✅ **Interactive wizard** — 10-question personalization flow (+ `--non-interactive` for CI)
|
|
86
|
+
- ✅ **Non-destructive init** — re-running `skopus init` preserves user edits by default; `--force` to overwrite
|
|
87
|
+
- ✅ **Six platform adapters** — Claude Code, Cursor, Codex, Aider, Gemini CLI, Copilot CLI. All idempotent with automatic backup.
|
|
88
|
+
- ✅ **Graphify integration** — hard dependency, automatic wiring of graphify's PreToolUse hook + git post-commit hook, consolidation of graphify's block into `.claude/CLAUDE.md`
|
|
89
|
+
- ✅ **`skopus charter evolve`** — session-end reflection loop. Three-question interactive prompt captures validated calls, drifts, and new rules into feedback memory and the charter's drift log. The mechanism that makes the charter compound.
|
|
90
|
+
- ✅ **`skopus doctor`** — health check across all four lenses plus linked projects
|
|
91
|
+
- 🚧 **Benchmark harness** — LongMemEval, LoCoMo, MSC, RULER, Correction-Persistence — planned for v0.1.0
|
|
92
|
+
|
|
93
|
+
### Supported platforms (v0.0.3)
|
|
94
|
+
|
|
95
|
+
| Platform | Context file | Detection |
|
|
96
|
+
|---|---|---|
|
|
97
|
+
| **Claude Code** | `.claude/CLAUDE.md` (preferred) or root `CLAUDE.md` | `~/.claude/` |
|
|
98
|
+
| **Cursor** | `.cursor/rules/skopus.mdc` (alwaysApply: true) | `cursor` binary or `~/.cursor/` |
|
|
99
|
+
| **Codex** (OpenAI) | `AGENTS.md` | `codex` binary or `~/.codex/` |
|
|
100
|
+
| **Aider** | `AGENTS.md` | `aider` binary or `~/.aider.conf.yml` |
|
|
101
|
+
| **Gemini CLI** | `GEMINI.md` | `gemini` binary or `~/.gemini/` |
|
|
102
|
+
| **Copilot CLI** | `AGENTS.md` | `gh` / `copilot` binary or `~/.copilot/` |
|
|
103
|
+
|
|
104
|
+
See [`docs/DESIGN.md`](docs/DESIGN.md) for the full spec and roadmap.
|
|
105
|
+
|
|
106
|
+
## The benchmark pillar
|
|
107
|
+
|
|
108
|
+
Skopus is designed to be **measurable**. The charter's core non-negotiable — *evidence over assumption* — is applied reflexively to the project itself. Every PR that touches the charter templates, adapter wiring, or wizard flow must move a benchmark number or explain why it's orthogonal.
|
|
109
|
+
|
|
110
|
+
At v0.1.0, the `skopus bench run` harness will run:
|
|
111
|
+
|
|
112
|
+
| Benchmark | What it tests |
|
|
113
|
+
|---|---|
|
|
114
|
+
| **LongMemEval** (Wu et al. 2024) | 6 memory abilities: single-session, multi-session, knowledge update, temporal reasoning, explicit/implicit refs |
|
|
115
|
+
| **LoCoMo** (Google 2024) | Long multi-session conversations |
|
|
116
|
+
| **MSC** (Facebook 2021) | Persona consistency across sessions |
|
|
117
|
+
| **RULER** (NVIDIA 2024) | Long-context retrieval, up to 128K ctx |
|
|
118
|
+
| **Skopus Correction-Persistence** (novel) | Does the agent apply yesterday's corrections to today's tasks? |
|
|
119
|
+
|
|
120
|
+
The ablation mode measures the additive contribution of each lens:
|
|
121
|
+
|
|
122
|
+
```bash
|
|
123
|
+
skopus bench run all --ablation --agent claude-code
|
|
124
|
+
# Runs vanilla / +charter / +memory / +vault / +graph — shows delta per lens
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
## Philosophy
|
|
128
|
+
|
|
129
|
+
Skopus combines three existing patterns into one coherent system:
|
|
130
|
+
|
|
131
|
+
- **Karpathy's LLM Knowledge Base** ([tweet](https://x.com/karpathy/status/2039805659525644595), [gist](https://gist.github.com/karpathy/442a6bf555914893e9891c11519de94f)) — the `raw/wiki/output` three-folder split with Ingest/Query/Lint operations, framed as a modern Vannevar Bush Memex.
|
|
132
|
+
- **The Partnership Charter** — evidence over assumption, premium quality, anti-rationalization tables, drift logs. A meta-workflow layer most agent setups don't have.
|
|
133
|
+
- **Graphify** ([safishamsi/graphify](https://github.com/safishamsi/graphify)) — automatic structural knowledge graph extraction from any codebase with honest audit trails (`EXTRACTED` / `INFERRED` / `AMBIGUOUS`).
|
|
134
|
+
|
|
135
|
+
Nothing here is new on its own. The contribution is the **coherent integration** plus the **benchmark commitment** to prove it works.
|
|
136
|
+
|
|
137
|
+
## Contributing
|
|
138
|
+
|
|
139
|
+
New platform adapters welcome. Each adapter is a single Python file implementing a 5-method ABC (`detect`, `install`, `uninstall`, `status`, `session_end_hook`). See `skopus/adapters/base.py` and `skopus/adapters/claude_code.py` for the reference implementation.
|
|
140
|
+
|
|
141
|
+
Benchmark dataset contributions welcome — especially for the Correction-Persistence benchmark, which ships with 100+ scenarios in `bench/correction_persistence/dataset.json`.
|
|
142
|
+
|
|
143
|
+
## License
|
|
144
|
+
|
|
145
|
+
MIT — see [LICENSE](LICENSE).
|
skopus-0.1.1/README.md
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
# Skopus
|
|
2
|
+
|
|
3
|
+
**Persistent four-lens context for AI coding assistants.** Charter, memory, vault, graph — one install, multi-agent, benchmark-driven.
|
|
4
|
+
|
|
5
|
+
> σκοπός *(skopos)* — Greek for *watcher*, *lookout*, *target*, *purpose*. The root of *scope*, *telescope*, *episcopal*. A system that gives agents durable scope across sessions.
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## The problem
|
|
10
|
+
|
|
11
|
+
Every AI coding assistant — Claude Code, Cursor, Codex, Aider, Gemini CLI, Copilot CLI — loses context at the end of every session. You teach them your preferences, they forget. You correct them, they repeat the mistake next week. Your hard-won lessons evaporate into chat history.
|
|
12
|
+
|
|
13
|
+
The few persistent-memory systems that exist (claude-mem, Mem0, MemPalace, OpenAI memory) record conversations but don't encode *how you work* — the non-negotiables, the drift log, the anti-rationalization rules, the "do this, not that" patterns that actually make a collaboration compound. Meanwhile, structural knowledge about a codebase gets rediscovered every session via grep because nothing persists the map.
|
|
14
|
+
|
|
15
|
+
The result: agents that are smart per-message but stupid across sessions, and humans who spend half their time re-teaching.
|
|
16
|
+
|
|
17
|
+
## The promise
|
|
18
|
+
|
|
19
|
+
A unified **four-lens context system** any AI coding assistant can load at session start:
|
|
20
|
+
|
|
21
|
+
1. **Charter** — how you work together (non-negotiables, anti-rationalization table, drift log)
|
|
22
|
+
2. **Memory** — what happened before (feedback, corrections, project state)
|
|
23
|
+
3. **Vault** — what you decided and learned (narrative wiki, Karpathy `/raw` pattern)
|
|
24
|
+
4. **Graph** — what the code looks like (via [graphify](https://github.com/safishamsi/graphify))
|
|
25
|
+
|
|
26
|
+
One install. Works with 6+ agents. Ships with a benchmark suite that proves it works.
|
|
27
|
+
|
|
28
|
+
## Quickstart
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
pip install skopus
|
|
32
|
+
skopus init # interactive wizard (10 questions, ~5 min)
|
|
33
|
+
cd my-project && skopus link # wire the current project to your charter + vault
|
|
34
|
+
skopus doctor # health check all four lenses
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## What ships at v0.0.3 (alpha)
|
|
38
|
+
|
|
39
|
+
- ✅ **Charter templates** — high-level `CLAUDE.md`, full `workflow_partnership.md`, `user_profile.md`
|
|
40
|
+
- ✅ **Memory scaffold** — `MEMORY.md` index, feedback/project templates, 6 seed profiles
|
|
41
|
+
- ✅ **Vault scaffold** — Karpathy `raw/wiki/output` layout with `/ingest`, `/compile`, `/query`, `/lint`, `/wiki` slash commands
|
|
42
|
+
- ✅ **Interactive wizard** — 10-question personalization flow (+ `--non-interactive` for CI)
|
|
43
|
+
- ✅ **Non-destructive init** — re-running `skopus init` preserves user edits by default; `--force` to overwrite
|
|
44
|
+
- ✅ **Six platform adapters** — Claude Code, Cursor, Codex, Aider, Gemini CLI, Copilot CLI. All idempotent with automatic backup.
|
|
45
|
+
- ✅ **Graphify integration** — hard dependency, automatic wiring of graphify's PreToolUse hook + git post-commit hook, consolidation of graphify's block into `.claude/CLAUDE.md`
|
|
46
|
+
- ✅ **`skopus charter evolve`** — session-end reflection loop. Three-question interactive prompt captures validated calls, drifts, and new rules into feedback memory and the charter's drift log. The mechanism that makes the charter compound.
|
|
47
|
+
- ✅ **`skopus doctor`** — health check across all four lenses plus linked projects
|
|
48
|
+
- 🚧 **Benchmark harness** — LongMemEval, LoCoMo, MSC, RULER, Correction-Persistence — planned for v0.1.0
|
|
49
|
+
|
|
50
|
+
### Supported platforms (v0.0.3)
|
|
51
|
+
|
|
52
|
+
| Platform | Context file | Detection |
|
|
53
|
+
|---|---|---|
|
|
54
|
+
| **Claude Code** | `.claude/CLAUDE.md` (preferred) or root `CLAUDE.md` | `~/.claude/` |
|
|
55
|
+
| **Cursor** | `.cursor/rules/skopus.mdc` (alwaysApply: true) | `cursor` binary or `~/.cursor/` |
|
|
56
|
+
| **Codex** (OpenAI) | `AGENTS.md` | `codex` binary or `~/.codex/` |
|
|
57
|
+
| **Aider** | `AGENTS.md` | `aider` binary or `~/.aider.conf.yml` |
|
|
58
|
+
| **Gemini CLI** | `GEMINI.md` | `gemini` binary or `~/.gemini/` |
|
|
59
|
+
| **Copilot CLI** | `AGENTS.md` | `gh` / `copilot` binary or `~/.copilot/` |
|
|
60
|
+
|
|
61
|
+
See [`docs/DESIGN.md`](docs/DESIGN.md) for the full spec and roadmap.
|
|
62
|
+
|
|
63
|
+
## The benchmark pillar
|
|
64
|
+
|
|
65
|
+
Skopus is designed to be **measurable**. The charter's core non-negotiable — *evidence over assumption* — is applied reflexively to the project itself. Every PR that touches the charter templates, adapter wiring, or wizard flow must move a benchmark number or explain why it's orthogonal.
|
|
66
|
+
|
|
67
|
+
At v0.1.0, the `skopus bench run` harness will run:
|
|
68
|
+
|
|
69
|
+
| Benchmark | What it tests |
|
|
70
|
+
|---|---|
|
|
71
|
+
| **LongMemEval** (Wu et al. 2024) | 6 memory abilities: single-session, multi-session, knowledge update, temporal reasoning, explicit/implicit refs |
|
|
72
|
+
| **LoCoMo** (Google 2024) | Long multi-session conversations |
|
|
73
|
+
| **MSC** (Facebook 2021) | Persona consistency across sessions |
|
|
74
|
+
| **RULER** (NVIDIA 2024) | Long-context retrieval, up to 128K ctx |
|
|
75
|
+
| **Skopus Correction-Persistence** (novel) | Does the agent apply yesterday's corrections to today's tasks? |
|
|
76
|
+
|
|
77
|
+
The ablation mode measures the additive contribution of each lens:
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
skopus bench run all --ablation --agent claude-code
|
|
81
|
+
# Runs vanilla / +charter / +memory / +vault / +graph — shows delta per lens
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## Philosophy
|
|
85
|
+
|
|
86
|
+
Skopus combines three existing patterns into one coherent system:
|
|
87
|
+
|
|
88
|
+
- **Karpathy's LLM Knowledge Base** ([tweet](https://x.com/karpathy/status/2039805659525644595), [gist](https://gist.github.com/karpathy/442a6bf555914893e9891c11519de94f)) — the `raw/wiki/output` three-folder split with Ingest/Query/Lint operations, framed as a modern Vannevar Bush Memex.
|
|
89
|
+
- **The Partnership Charter** — evidence over assumption, premium quality, anti-rationalization tables, drift logs. A meta-workflow layer most agent setups don't have.
|
|
90
|
+
- **Graphify** ([safishamsi/graphify](https://github.com/safishamsi/graphify)) — automatic structural knowledge graph extraction from any codebase with honest audit trails (`EXTRACTED` / `INFERRED` / `AMBIGUOUS`).
|
|
91
|
+
|
|
92
|
+
Nothing here is new on its own. The contribution is the **coherent integration** plus the **benchmark commitment** to prove it works.
|
|
93
|
+
|
|
94
|
+
## Contributing
|
|
95
|
+
|
|
96
|
+
New platform adapters welcome. Each adapter is a single Python file implementing a 5-method ABC (`detect`, `install`, `uninstall`, `status`, `session_end_hook`). See `skopus/adapters/base.py` and `skopus/adapters/claude_code.py` for the reference implementation.
|
|
97
|
+
|
|
98
|
+
Benchmark dataset contributions welcome — especially for the Correction-Persistence benchmark, which ships with 100+ scenarios in `bench/correction_persistence/dataset.json`.
|
|
99
|
+
|
|
100
|
+
## License
|
|
101
|
+
|
|
102
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""Skopus benchmark harness.
|
|
2
|
+
|
|
3
|
+
Measures the additive contribution of each lens (charter, memory, vault,
|
|
4
|
+
graph) against an LLM baseline across five benchmarks:
|
|
5
|
+
|
|
6
|
+
- LongMemEval (Wu et al. 2024) — cross-session memory
|
|
7
|
+
- LoCoMo (Google 2024) — long multi-session conversations
|
|
8
|
+
- MSC (Facebook 2021) — persona consistency
|
|
9
|
+
- RULER (NVIDIA 2024) — long-context retrieval
|
|
10
|
+
- Correction-Persistence (novel) — does yesterday's correction persist today?
|
|
11
|
+
|
|
12
|
+
The harness ships with five lens configurations for ablation:
|
|
13
|
+
|
|
14
|
+
1. vanilla — LLM call, no skopus context
|
|
15
|
+
2. +charter — charter loaded into system prompt
|
|
16
|
+
3. +charter +memory — + feedback/project memory files
|
|
17
|
+
4. +charter +memory +vault — + relevant vault pages
|
|
18
|
+
5. full skopus — + graphify MCP tools available
|
|
19
|
+
|
|
20
|
+
Running:
|
|
21
|
+
|
|
22
|
+
skopus bench run cp # Correction-Persistence only
|
|
23
|
+
skopus bench run all # All 5 benchmarks
|
|
24
|
+
skopus bench run all --ablation # Ablation across 5 configs
|
|
25
|
+
skopus bench list # Show available benchmarks
|
|
26
|
+
skopus bench report # Render markdown report from results
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
__version__ = "0.1.0-alpha"
|