comptext-codex 5.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- comptext_codex/__init__.py +50 -0
- comptext_codex/cli.py +91 -0
- comptext_codex/cli_v5.py +330 -0
- comptext_codex/executor.py +248 -0
- comptext_codex/mcp_server_v5.py +343 -0
- comptext_codex/modules/__init__.py +20 -0
- comptext_codex/modules/base.py +33 -0
- comptext_codex/modules/module_a.py +55 -0
- comptext_codex/modules/module_b.py +84 -0
- comptext_codex/modules/module_c.py +199 -0
- comptext_codex/modules/module_d.py +31 -0
- comptext_codex/modules/module_e.py +33 -0
- comptext_codex/modules/module_f.py +31 -0
- comptext_codex/modules/module_g.py +31 -0
- comptext_codex/modules/module_h.py +31 -0
- comptext_codex/modules/module_i.py +42 -0
- comptext_codex/modules/module_j.py +38 -0
- comptext_codex/modules/module_k.py +497 -0
- comptext_codex/modules/module_l.py +536 -0
- comptext_codex/modules/module_m.py +34 -0
- comptext_codex/parser.py +353 -0
- comptext_codex/parser_v5.py +330 -0
- comptext_codex/registry.py +236 -0
- comptext_codex/repl.py +410 -0
- comptext_codex/store.py +363 -0
- comptext_codex/token_reduction.py +115 -0
- comptext_codex/token_report.py +148 -0
- comptext_codex-5.0.0.dist-info/METADATA +466 -0
- comptext_codex-5.0.0.dist-info/RECORD +33 -0
- comptext_codex-5.0.0.dist-info/WHEEL +5 -0
- comptext_codex-5.0.0.dist-info/entry_points.txt +4 -0
- comptext_codex-5.0.0.dist-info/licenses/LICENSE +21 -0
- comptext_codex-5.0.0.dist-info/top_level.txt +1 -0
comptext_codex/store.py
ADDED
|
@@ -0,0 +1,363 @@
|
|
|
1
|
+
"""CodexStore - SQLite-based data layer replacing YAML/Notion flat files.
|
|
2
|
+
|
|
3
|
+
Provides a single-file, zero-config, queryable data store for all codex
|
|
4
|
+
definitions: modules, commands, profiles, and catalog entries. Uses WAL
|
|
5
|
+
mode for concurrent readers.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import sqlite3
|
|
10
|
+
import threading
|
|
11
|
+
from contextlib import contextmanager
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any, Dict, List, Optional
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
_DEFAULT_DB_PATH = Path(__file__).resolve().parent.parent.parent / "codex.db"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class CodexStore:
|
|
20
|
+
"""Embedded SQLite store for CompText Codex data."""
|
|
21
|
+
|
|
22
|
+
def __init__(self, db_path: Optional[str] = None):
|
|
23
|
+
self._db_path = str(db_path or _DEFAULT_DB_PATH)
|
|
24
|
+
self._local = threading.local()
|
|
25
|
+
self._ensure_schema()
|
|
26
|
+
|
|
27
|
+
# -- connection management ------------------------------------------------
|
|
28
|
+
|
|
29
|
+
@property
|
|
30
|
+
def _conn(self) -> sqlite3.Connection:
|
|
31
|
+
conn = getattr(self._local, "conn", None)
|
|
32
|
+
if conn is None:
|
|
33
|
+
conn = sqlite3.connect(self._db_path)
|
|
34
|
+
conn.row_factory = sqlite3.Row
|
|
35
|
+
conn.execute("PRAGMA journal_mode=WAL")
|
|
36
|
+
conn.execute("PRAGMA foreign_keys=ON")
|
|
37
|
+
self._local.conn = conn
|
|
38
|
+
return conn
|
|
39
|
+
|
|
40
|
+
@contextmanager
|
|
41
|
+
def _cursor(self):
|
|
42
|
+
cur = self._conn.cursor()
|
|
43
|
+
try:
|
|
44
|
+
yield cur
|
|
45
|
+
self._conn.commit()
|
|
46
|
+
except Exception:
|
|
47
|
+
self._conn.rollback()
|
|
48
|
+
raise
|
|
49
|
+
finally:
|
|
50
|
+
cur.close()
|
|
51
|
+
|
|
52
|
+
def close(self):
|
|
53
|
+
conn = getattr(self._local, "conn", None)
|
|
54
|
+
if conn is not None:
|
|
55
|
+
conn.close()
|
|
56
|
+
self._local.conn = None
|
|
57
|
+
|
|
58
|
+
# -- schema ---------------------------------------------------------------
|
|
59
|
+
|
|
60
|
+
def _ensure_schema(self):
|
|
61
|
+
with self._cursor() as cur:
|
|
62
|
+
cur.executescript("""
|
|
63
|
+
CREATE TABLE IF NOT EXISTS modules (
|
|
64
|
+
code TEXT PRIMARY KEY,
|
|
65
|
+
name TEXT NOT NULL,
|
|
66
|
+
purpose TEXT NOT NULL DEFAULT '',
|
|
67
|
+
token_priority TEXT NOT NULL DEFAULT 'medium',
|
|
68
|
+
mcp_exposed INTEGER NOT NULL DEFAULT 1,
|
|
69
|
+
security TEXT NOT NULL DEFAULT '{}',
|
|
70
|
+
privacy TEXT NOT NULL DEFAULT '{}'
|
|
71
|
+
);
|
|
72
|
+
|
|
73
|
+
CREATE TABLE IF NOT EXISTS commands (
|
|
74
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
75
|
+
module TEXT NOT NULL REFERENCES modules(code),
|
|
76
|
+
command TEXT NOT NULL,
|
|
77
|
+
syntax TEXT NOT NULL DEFAULT '',
|
|
78
|
+
description TEXT NOT NULL DEFAULT '',
|
|
79
|
+
examples TEXT NOT NULL DEFAULT '[]',
|
|
80
|
+
aliases TEXT NOT NULL DEFAULT '[]',
|
|
81
|
+
token_cost_hint INTEGER NOT NULL DEFAULT 0,
|
|
82
|
+
mcp_exposed INTEGER NOT NULL DEFAULT 1,
|
|
83
|
+
UNIQUE(module, command)
|
|
84
|
+
);
|
|
85
|
+
|
|
86
|
+
CREATE TABLE IF NOT EXISTS profiles (
|
|
87
|
+
name TEXT PRIMARY KEY,
|
|
88
|
+
description TEXT NOT NULL DEFAULT '',
|
|
89
|
+
enabled_modules TEXT NOT NULL DEFAULT '[]',
|
|
90
|
+
default_settings TEXT NOT NULL DEFAULT '{}'
|
|
91
|
+
);
|
|
92
|
+
|
|
93
|
+
CREATE TABLE IF NOT EXISTS catalog (
|
|
94
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
95
|
+
title TEXT NOT NULL,
|
|
96
|
+
description TEXT NOT NULL DEFAULT '',
|
|
97
|
+
module_ref TEXT NOT NULL DEFAULT '',
|
|
98
|
+
tags TEXT NOT NULL DEFAULT '[]',
|
|
99
|
+
entry_type TEXT NOT NULL DEFAULT 'documentation'
|
|
100
|
+
);
|
|
101
|
+
|
|
102
|
+
CREATE INDEX IF NOT EXISTS idx_commands_module ON commands(module);
|
|
103
|
+
CREATE INDEX IF NOT EXISTS idx_catalog_module ON catalog(module_ref);
|
|
104
|
+
CREATE INDEX IF NOT EXISTS idx_catalog_type ON catalog(entry_type);
|
|
105
|
+
""")
|
|
106
|
+
|
|
107
|
+
# -- module CRUD ----------------------------------------------------------
|
|
108
|
+
|
|
109
|
+
def upsert_module(self, module: Dict[str, Any]) -> None:
|
|
110
|
+
with self._cursor() as cur:
|
|
111
|
+
cur.execute(
|
|
112
|
+
"""INSERT INTO modules (code, name, purpose, token_priority,
|
|
113
|
+
mcp_exposed, security, privacy)
|
|
114
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
115
|
+
ON CONFLICT(code) DO UPDATE SET
|
|
116
|
+
name=excluded.name,
|
|
117
|
+
purpose=excluded.purpose,
|
|
118
|
+
token_priority=excluded.token_priority,
|
|
119
|
+
mcp_exposed=excluded.mcp_exposed,
|
|
120
|
+
security=excluded.security,
|
|
121
|
+
privacy=excluded.privacy""",
|
|
122
|
+
(
|
|
123
|
+
module["code"],
|
|
124
|
+
module["name"],
|
|
125
|
+
module.get("purpose", ""),
|
|
126
|
+
module.get("token_priority", "medium"),
|
|
127
|
+
1 if module.get("mcp_exposed", True) else 0,
|
|
128
|
+
json.dumps(module.get("security", {})),
|
|
129
|
+
json.dumps(module.get("privacy", {})),
|
|
130
|
+
),
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
def get_module(self, code: str) -> Optional[Dict[str, Any]]:
|
|
134
|
+
with self._cursor() as cur:
|
|
135
|
+
cur.execute("SELECT * FROM modules WHERE code = ?", (code,))
|
|
136
|
+
row = cur.fetchone()
|
|
137
|
+
return self._row_to_module(row) if row else None
|
|
138
|
+
|
|
139
|
+
def list_modules(self) -> List[Dict[str, Any]]:
|
|
140
|
+
with self._cursor() as cur:
|
|
141
|
+
cur.execute("SELECT * FROM modules ORDER BY code")
|
|
142
|
+
return [self._row_to_module(r) for r in cur.fetchall()]
|
|
143
|
+
|
|
144
|
+
@staticmethod
|
|
145
|
+
def _row_to_module(row: sqlite3.Row) -> Dict[str, Any]:
|
|
146
|
+
return {
|
|
147
|
+
"code": row["code"],
|
|
148
|
+
"name": row["name"],
|
|
149
|
+
"purpose": row["purpose"],
|
|
150
|
+
"token_priority": row["token_priority"],
|
|
151
|
+
"mcp_exposed": bool(row["mcp_exposed"]),
|
|
152
|
+
"security": json.loads(row["security"]),
|
|
153
|
+
"privacy": json.loads(row["privacy"]),
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
# -- command CRUD ---------------------------------------------------------
|
|
157
|
+
|
|
158
|
+
def upsert_command(self, cmd: Dict[str, Any]) -> None:
|
|
159
|
+
with self._cursor() as cur:
|
|
160
|
+
cur.execute(
|
|
161
|
+
"""INSERT INTO commands (module, command, syntax, description,
|
|
162
|
+
examples, aliases, token_cost_hint,
|
|
163
|
+
mcp_exposed)
|
|
164
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
165
|
+
ON CONFLICT(module, command) DO UPDATE SET
|
|
166
|
+
syntax=excluded.syntax,
|
|
167
|
+
description=excluded.description,
|
|
168
|
+
examples=excluded.examples,
|
|
169
|
+
aliases=excluded.aliases,
|
|
170
|
+
token_cost_hint=excluded.token_cost_hint,
|
|
171
|
+
mcp_exposed=excluded.mcp_exposed""",
|
|
172
|
+
(
|
|
173
|
+
cmd["module"],
|
|
174
|
+
cmd["command"],
|
|
175
|
+
cmd.get("syntax", ""),
|
|
176
|
+
cmd.get("description", ""),
|
|
177
|
+
json.dumps(cmd.get("examples", [])),
|
|
178
|
+
json.dumps(cmd.get("aliases", [])),
|
|
179
|
+
cmd.get("token_cost_hint", 0),
|
|
180
|
+
1 if cmd.get("mcp_exposed", True) else 0,
|
|
181
|
+
),
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
def get_command(self, module: str, command: str) -> Optional[Dict[str, Any]]:
|
|
185
|
+
with self._cursor() as cur:
|
|
186
|
+
cur.execute(
|
|
187
|
+
"SELECT * FROM commands WHERE module = ? AND command = ?",
|
|
188
|
+
(module, command),
|
|
189
|
+
)
|
|
190
|
+
row = cur.fetchone()
|
|
191
|
+
return self._row_to_command(row) if row else None
|
|
192
|
+
|
|
193
|
+
def list_commands(self, module: Optional[str] = None) -> List[Dict[str, Any]]:
|
|
194
|
+
with self._cursor() as cur:
|
|
195
|
+
if module:
|
|
196
|
+
cur.execute(
|
|
197
|
+
"SELECT * FROM commands WHERE module = ? ORDER BY command",
|
|
198
|
+
(module,),
|
|
199
|
+
)
|
|
200
|
+
else:
|
|
201
|
+
cur.execute("SELECT * FROM commands ORDER BY module, command")
|
|
202
|
+
return [self._row_to_command(r) for r in cur.fetchall()]
|
|
203
|
+
|
|
204
|
+
def search_commands(self, query: str) -> List[Dict[str, Any]]:
|
|
205
|
+
with self._cursor() as cur:
|
|
206
|
+
like = f"%{query}%"
|
|
207
|
+
cur.execute(
|
|
208
|
+
"""SELECT * FROM commands
|
|
209
|
+
WHERE command LIKE ? OR description LIKE ? OR aliases LIKE ?
|
|
210
|
+
ORDER BY module, command""",
|
|
211
|
+
(like, like, like),
|
|
212
|
+
)
|
|
213
|
+
return [self._row_to_command(r) for r in cur.fetchall()]
|
|
214
|
+
|
|
215
|
+
@staticmethod
|
|
216
|
+
def _row_to_command(row: sqlite3.Row) -> Dict[str, Any]:
|
|
217
|
+
return {
|
|
218
|
+
"module": row["module"],
|
|
219
|
+
"command": row["command"],
|
|
220
|
+
"syntax": row["syntax"],
|
|
221
|
+
"description": row["description"],
|
|
222
|
+
"examples": json.loads(row["examples"]),
|
|
223
|
+
"aliases": json.loads(row["aliases"]),
|
|
224
|
+
"token_cost_hint": row["token_cost_hint"],
|
|
225
|
+
"mcp_exposed": bool(row["mcp_exposed"]),
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
# -- profile CRUD ---------------------------------------------------------
|
|
229
|
+
|
|
230
|
+
def upsert_profile(self, profile: Dict[str, Any]) -> None:
|
|
231
|
+
with self._cursor() as cur:
|
|
232
|
+
cur.execute(
|
|
233
|
+
"""INSERT INTO profiles (name, description, enabled_modules,
|
|
234
|
+
default_settings)
|
|
235
|
+
VALUES (?, ?, ?, ?)
|
|
236
|
+
ON CONFLICT(name) DO UPDATE SET
|
|
237
|
+
description=excluded.description,
|
|
238
|
+
enabled_modules=excluded.enabled_modules,
|
|
239
|
+
default_settings=excluded.default_settings""",
|
|
240
|
+
(
|
|
241
|
+
profile["name"],
|
|
242
|
+
profile.get("description", ""),
|
|
243
|
+
json.dumps(profile.get("enabled_modules", [])),
|
|
244
|
+
json.dumps(profile.get("default_settings", {})),
|
|
245
|
+
),
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
def get_profile(self, name: str) -> Optional[Dict[str, Any]]:
|
|
249
|
+
with self._cursor() as cur:
|
|
250
|
+
cur.execute("SELECT * FROM profiles WHERE name = ?", (name,))
|
|
251
|
+
row = cur.fetchone()
|
|
252
|
+
return self._row_to_profile(row) if row else None
|
|
253
|
+
|
|
254
|
+
def list_profiles(self) -> List[Dict[str, Any]]:
|
|
255
|
+
with self._cursor() as cur:
|
|
256
|
+
cur.execute("SELECT * FROM profiles ORDER BY name")
|
|
257
|
+
return [self._row_to_profile(r) for r in cur.fetchall()]
|
|
258
|
+
|
|
259
|
+
@staticmethod
|
|
260
|
+
def _row_to_profile(row: sqlite3.Row) -> Dict[str, Any]:
|
|
261
|
+
return {
|
|
262
|
+
"name": row["name"],
|
|
263
|
+
"description": row["description"],
|
|
264
|
+
"enabled_modules": json.loads(row["enabled_modules"]),
|
|
265
|
+
"default_settings": json.loads(row["default_settings"]),
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
# -- catalog CRUD ---------------------------------------------------------
|
|
269
|
+
|
|
270
|
+
def add_catalog_entry(self, entry: Dict[str, Any]) -> int:
|
|
271
|
+
with self._cursor() as cur:
|
|
272
|
+
cur.execute(
|
|
273
|
+
"""INSERT INTO catalog (title, description, module_ref, tags,
|
|
274
|
+
entry_type)
|
|
275
|
+
VALUES (?, ?, ?, ?, ?)""",
|
|
276
|
+
(
|
|
277
|
+
entry["title"],
|
|
278
|
+
entry.get("description", ""),
|
|
279
|
+
entry.get("module_ref", ""),
|
|
280
|
+
json.dumps(entry.get("tags", [])),
|
|
281
|
+
entry.get("entry_type", "documentation"),
|
|
282
|
+
),
|
|
283
|
+
)
|
|
284
|
+
return cur.lastrowid
|
|
285
|
+
|
|
286
|
+
def list_catalog(
|
|
287
|
+
self,
|
|
288
|
+
entry_type: Optional[str] = None,
|
|
289
|
+
module_ref: Optional[str] = None,
|
|
290
|
+
) -> List[Dict[str, Any]]:
|
|
291
|
+
with self._cursor() as cur:
|
|
292
|
+
conditions, params = [], []
|
|
293
|
+
if entry_type:
|
|
294
|
+
conditions.append("entry_type = ?")
|
|
295
|
+
params.append(entry_type)
|
|
296
|
+
if module_ref:
|
|
297
|
+
conditions.append("module_ref = ?")
|
|
298
|
+
params.append(module_ref)
|
|
299
|
+
where = ("WHERE " + " AND ".join(conditions)) if conditions else ""
|
|
300
|
+
cur.execute(
|
|
301
|
+
f"SELECT * FROM catalog {where} ORDER BY id", params
|
|
302
|
+
)
|
|
303
|
+
return [self._row_to_catalog(r) for r in cur.fetchall()]
|
|
304
|
+
|
|
305
|
+
def search_catalog(self, query: str) -> List[Dict[str, Any]]:
|
|
306
|
+
with self._cursor() as cur:
|
|
307
|
+
like = f"%{query}%"
|
|
308
|
+
cur.execute(
|
|
309
|
+
"""SELECT * FROM catalog
|
|
310
|
+
WHERE title LIKE ? OR description LIKE ? OR tags LIKE ?
|
|
311
|
+
ORDER BY id""",
|
|
312
|
+
(like, like, like),
|
|
313
|
+
)
|
|
314
|
+
return [self._row_to_catalog(r) for r in cur.fetchall()]
|
|
315
|
+
|
|
316
|
+
@staticmethod
|
|
317
|
+
def _row_to_catalog(row: sqlite3.Row) -> Dict[str, Any]:
|
|
318
|
+
return {
|
|
319
|
+
"id": row["id"],
|
|
320
|
+
"title": row["title"],
|
|
321
|
+
"description": row["description"],
|
|
322
|
+
"module_ref": row["module_ref"],
|
|
323
|
+
"tags": json.loads(row["tags"]),
|
|
324
|
+
"entry_type": row["entry_type"],
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
# -- export / import ------------------------------------------------------
|
|
328
|
+
|
|
329
|
+
def export_bundle(self) -> Dict[str, Any]:
|
|
330
|
+
"""Export entire store as a JSON-serialisable dict (replaces build_bundle.py)."""
|
|
331
|
+
return {
|
|
332
|
+
"modules": self.list_modules(),
|
|
333
|
+
"commands": self.list_commands(),
|
|
334
|
+
"profiles": self.list_profiles(),
|
|
335
|
+
"catalog": self.list_catalog(),
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
def import_bundle(self, bundle: Dict[str, Any]) -> Dict[str, int]:
|
|
339
|
+
"""Import a bundle dict into the store (idempotent via upsert)."""
|
|
340
|
+
counts = {"modules": 0, "commands": 0, "profiles": 0, "catalog": 0}
|
|
341
|
+
for m in bundle.get("modules", []):
|
|
342
|
+
self.upsert_module(m)
|
|
343
|
+
counts["modules"] += 1
|
|
344
|
+
for c in bundle.get("commands", []):
|
|
345
|
+
self.upsert_command(c)
|
|
346
|
+
counts["commands"] += 1
|
|
347
|
+
for p in bundle.get("profiles", []):
|
|
348
|
+
self.upsert_profile(p)
|
|
349
|
+
counts["profiles"] += 1
|
|
350
|
+
for e in bundle.get("catalog", []):
|
|
351
|
+
self.add_catalog_entry(e)
|
|
352
|
+
counts["catalog"] += 1
|
|
353
|
+
return counts
|
|
354
|
+
|
|
355
|
+
# -- statistics -----------------------------------------------------------
|
|
356
|
+
|
|
357
|
+
def stats(self) -> Dict[str, int]:
|
|
358
|
+
with self._cursor() as cur:
|
|
359
|
+
result = {}
|
|
360
|
+
for table in ("modules", "commands", "profiles", "catalog"):
|
|
361
|
+
cur.execute(f"SELECT COUNT(*) FROM {table}")
|
|
362
|
+
result[table] = cur.fetchone()[0]
|
|
363
|
+
return result
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""Token reduction measurement utilities for natural language and CompText commands."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any, Iterable, Sequence
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass(frozen=True)
|
|
11
|
+
class TokenReductionCase:
|
|
12
|
+
"""Represents a single token reduction scenario."""
|
|
13
|
+
|
|
14
|
+
name: str
|
|
15
|
+
original: str
|
|
16
|
+
comptext: str
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
DEFAULT_CASES: Sequence[TokenReductionCase] = (
|
|
20
|
+
TokenReductionCase(
|
|
21
|
+
name="Code Optimization",
|
|
22
|
+
original=(
|
|
23
|
+
"Please analyze this Python code, identify performance bottlenecks, "
|
|
24
|
+
"suggest optimizations with code examples, explain the reasoning behind "
|
|
25
|
+
"each optimization, and provide benchmark comparisons showing expected "
|
|
26
|
+
"improvements"
|
|
27
|
+
),
|
|
28
|
+
comptext=(
|
|
29
|
+
"@CODE_ANALYZE[perf_bottleneck] + "
|
|
30
|
+
"@CODE_OPT[explain=detail, bench=compare]"
|
|
31
|
+
),
|
|
32
|
+
),
|
|
33
|
+
TokenReductionCase(
|
|
34
|
+
name="Marketing Plan",
|
|
35
|
+
original=(
|
|
36
|
+
"Generate a comprehensive marketing plan with budget breakdown, timeline, "
|
|
37
|
+
"channels, KPIs, and risk mitigation steps."
|
|
38
|
+
),
|
|
39
|
+
comptext="@PLAN[marketing, include=budget+timeline+channels+KPIs+risk]",
|
|
40
|
+
),
|
|
41
|
+
TokenReductionCase(
|
|
42
|
+
name="CI Pipeline",
|
|
43
|
+
original=(
|
|
44
|
+
"Explain how to set up a CI pipeline using GitHub Actions with linting, "
|
|
45
|
+
"tests, and deployment steps."
|
|
46
|
+
),
|
|
47
|
+
comptext="@CI_PIPELINE[gha, steps=lint+test+deploy]",
|
|
48
|
+
),
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def token_count(text: str) -> int:
|
|
53
|
+
"""Return a simple whitespace token count."""
|
|
54
|
+
return len([token for token in text.split() if token])
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def calculate_reduction(case: TokenReductionCase) -> dict[str, Any]:
|
|
58
|
+
"""Calculate token reduction metrics for a single case."""
|
|
59
|
+
original_tokens = token_count(case.original)
|
|
60
|
+
comptext_tokens = token_count(case.comptext)
|
|
61
|
+
reduction = max(original_tokens - comptext_tokens, 0)
|
|
62
|
+
reduction_pct = 0.0
|
|
63
|
+
if original_tokens:
|
|
64
|
+
reduction_pct = round((reduction / original_tokens) * 100, 1)
|
|
65
|
+
|
|
66
|
+
return {
|
|
67
|
+
"name": case.name,
|
|
68
|
+
"original_tokens": original_tokens,
|
|
69
|
+
"comptext_tokens": comptext_tokens,
|
|
70
|
+
"token_reduction": reduction,
|
|
71
|
+
"reduction_pct": reduction_pct,
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def generate_markdown_report(cases: Iterable[TokenReductionCase]) -> str:
|
|
76
|
+
"""Generate a markdown table summarizing token reductions."""
|
|
77
|
+
metrics: list[dict[str, Any]] = [calculate_reduction(case) for case in cases]
|
|
78
|
+
lines = [
|
|
79
|
+
"# Token Reduction Results",
|
|
80
|
+
"",
|
|
81
|
+
"| Case | Original Tokens | CompText Tokens | Reduction | Reduction % |",
|
|
82
|
+
"| --- | ---: | ---: | ---: | ---: |",
|
|
83
|
+
]
|
|
84
|
+
|
|
85
|
+
for metric in metrics:
|
|
86
|
+
lines.append(
|
|
87
|
+
f"| {metric['name']} | {metric['original_tokens']} | "
|
|
88
|
+
f"{metric['comptext_tokens']} | {metric['token_reduction']} | "
|
|
89
|
+
f"{metric['reduction_pct']} |"
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
lines.append("")
|
|
93
|
+
lines.append(
|
|
94
|
+
"Generated by the token reduction helper script "
|
|
95
|
+
"using deterministic sample prompts."
|
|
96
|
+
)
|
|
97
|
+
return "\n".join(lines)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def write_report(report_path: Path, content: str) -> None:
|
|
101
|
+
"""Write the report content to the given path."""
|
|
102
|
+
report_path.parent.mkdir(parents=True, exist_ok=True)
|
|
103
|
+
report_path.write_text(content, encoding="utf-8")
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def main(output_path: Path | None = None) -> None:
|
|
107
|
+
"""Entry point for running the token reduction suite."""
|
|
108
|
+
report_content = generate_markdown_report(DEFAULT_CASES)
|
|
109
|
+
target = output_path or Path.cwd() / "TOKEN_REDUCTION_RESULTS.md"
|
|
110
|
+
write_report(target, report_content)
|
|
111
|
+
print(f"✅ Token reduction report written to {target}")
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
if __name__ == "__main__":
|
|
115
|
+
main()
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""Token reporting utilities for the CompText codex."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from statistics import mean
|
|
8
|
+
from typing import Any, Iterable, Mapping
|
|
9
|
+
|
|
10
|
+
import yaml
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _load_yaml(path: Path) -> dict[str, Any]:
|
|
14
|
+
try:
|
|
15
|
+
with open(path, "r", encoding="utf-8") as handle:
|
|
16
|
+
data = yaml.safe_load(handle)
|
|
17
|
+
if not isinstance(data, dict):
|
|
18
|
+
raise ValueError(
|
|
19
|
+
f"Expected YAML mapping in {path}, got {type(data).__name__}"
|
|
20
|
+
)
|
|
21
|
+
return data
|
|
22
|
+
except yaml.YAMLError as exc:
|
|
23
|
+
raise ValueError(f"Failed to parse YAML file: {path}") from exc
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _parse_token_hint(value: Any) -> int | None:
|
|
27
|
+
"""Return a normalized token hint as int, or None when invalid."""
|
|
28
|
+
if value is None or isinstance(value, bool):
|
|
29
|
+
return None
|
|
30
|
+
if isinstance(value, int):
|
|
31
|
+
return value
|
|
32
|
+
if isinstance(value, float):
|
|
33
|
+
return int(value) if value.is_integer() else None
|
|
34
|
+
if isinstance(value, str):
|
|
35
|
+
stripped = value.strip()
|
|
36
|
+
if not stripped:
|
|
37
|
+
return None
|
|
38
|
+
try:
|
|
39
|
+
return int(stripped)
|
|
40
|
+
except ValueError:
|
|
41
|
+
return None
|
|
42
|
+
return None
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def load_commands(codex_dir: Path) -> list[dict[str, Any]]:
|
|
46
|
+
"""Load command definitions from a codex directory."""
|
|
47
|
+
commands: list[dict[str, Any]] = []
|
|
48
|
+
aggregated = codex_dir / "commands.yaml"
|
|
49
|
+
if aggregated.exists():
|
|
50
|
+
commands.extend(_load_yaml(aggregated).get("commands", []))
|
|
51
|
+
|
|
52
|
+
commands_dir = codex_dir / "commands"
|
|
53
|
+
if commands_dir.exists():
|
|
54
|
+
for path in sorted(commands_dir.glob("*.yaml")):
|
|
55
|
+
commands.extend(_load_yaml(path).get("commands", []))
|
|
56
|
+
|
|
57
|
+
return commands
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def load_modules(codex_dir: Path) -> dict[str, dict[str, Any]]:
|
|
61
|
+
"""Load module definitions keyed by module code."""
|
|
62
|
+
modules: dict[str, dict[str, Any]] = {}
|
|
63
|
+
aggregated = codex_dir / "modules.yaml"
|
|
64
|
+
if aggregated.exists():
|
|
65
|
+
for item in _load_yaml(aggregated).get("modules", []):
|
|
66
|
+
code = item.get("code")
|
|
67
|
+
if not code:
|
|
68
|
+
continue
|
|
69
|
+
modules[code] = item
|
|
70
|
+
|
|
71
|
+
modules_dir = codex_dir / "modules"
|
|
72
|
+
if modules_dir.exists():
|
|
73
|
+
for path in sorted(modules_dir.glob("*.yaml")):
|
|
74
|
+
for item in _load_yaml(path).get("modules", []):
|
|
75
|
+
code = item.get("code")
|
|
76
|
+
if not code:
|
|
77
|
+
continue
|
|
78
|
+
modules.setdefault(code, item)
|
|
79
|
+
|
|
80
|
+
return modules
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def build_token_report(
|
|
84
|
+
commands: Iterable[dict[str, Any]], modules: Mapping[str, dict[str, Any]]
|
|
85
|
+
) -> dict[str, Any]:
|
|
86
|
+
"""Create an in-memory token report."""
|
|
87
|
+
commands_list = list(commands)
|
|
88
|
+
token_hints: list[int] = []
|
|
89
|
+
for cmd in commands_list:
|
|
90
|
+
hint = _parse_token_hint(cmd.get("token_cost_hint"))
|
|
91
|
+
if hint is not None:
|
|
92
|
+
token_hints.append(hint)
|
|
93
|
+
|
|
94
|
+
module_summary: dict[str, dict[str, Any]] = {}
|
|
95
|
+
for cmd in commands_list:
|
|
96
|
+
code = cmd.get("module")
|
|
97
|
+
if not code:
|
|
98
|
+
continue
|
|
99
|
+
summary = module_summary.setdefault(
|
|
100
|
+
code,
|
|
101
|
+
{
|
|
102
|
+
"name": modules.get(code, {}).get("name", ""),
|
|
103
|
+
"token_priority": modules.get(code, {}).get("token_priority", ""),
|
|
104
|
+
"command_count": 0,
|
|
105
|
+
"token_cost_hints": [],
|
|
106
|
+
},
|
|
107
|
+
)
|
|
108
|
+
summary["command_count"] += 1
|
|
109
|
+
hint = _parse_token_hint(cmd.get("token_cost_hint"))
|
|
110
|
+
if hint is not None:
|
|
111
|
+
summary["token_cost_hints"].append(hint)
|
|
112
|
+
|
|
113
|
+
for summary in module_summary.values():
|
|
114
|
+
hints = summary["token_cost_hints"]
|
|
115
|
+
summary["avg_token_cost"] = float(mean(hints)) if hints else 0.0
|
|
116
|
+
|
|
117
|
+
return {
|
|
118
|
+
"total_commands": len(commands_list),
|
|
119
|
+
"commands_with_token_hint": len(token_hints),
|
|
120
|
+
"avg_token_cost": float(mean(token_hints)) if token_hints else 0.0,
|
|
121
|
+
"modules": module_summary,
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def render_text_report(report: Mapping[str, Any]) -> str:
|
|
126
|
+
"""Render a human-friendly text report."""
|
|
127
|
+
lines = [
|
|
128
|
+
f"Total commands: {report['total_commands']}",
|
|
129
|
+
f"Commands with token hints: {report['commands_with_token_hint']}",
|
|
130
|
+
f"Average token cost: {report['avg_token_cost']:.1f}",
|
|
131
|
+
"",
|
|
132
|
+
"Per-module summary:",
|
|
133
|
+
]
|
|
134
|
+
modules: dict[str, Any] = report.get("modules", {})
|
|
135
|
+
for code in sorted(modules.keys()):
|
|
136
|
+
module = modules[code]
|
|
137
|
+
lines.append(
|
|
138
|
+
f"- {code} ({module.get('name', '').strip() or 'Unknown'}): "
|
|
139
|
+
f"{module.get('command_count', 0)} commands, "
|
|
140
|
+
f"avg token cost {module.get('avg_token_cost', 0.0):.1f} "
|
|
141
|
+
f"(priority: {module.get('token_priority', 'n/a')})"
|
|
142
|
+
)
|
|
143
|
+
return "\n".join(lines)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def report_as_json(report: Mapping[str, Any]) -> str:
|
|
147
|
+
"""Return report as formatted JSON string."""
|
|
148
|
+
return json.dumps(report, indent=2, sort_keys=True)
|