simplicio-mapper 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ """Python package entrypoint for simplicio-mapper."""
2
+
3
+ from __future__ import annotations
4
+
5
+ __version__ = "0.5.0"
6
+
7
+ __all__ = ["__version__"]
@@ -0,0 +1,156 @@
1
+ """Command-line entry point for simplicio-mapper.
2
+
3
+ Mirrors ``bin/map.js``: generates or refreshes the machine-readable mapper
4
+ artifacts under ``.simplicio/``. Exposed as the ``simplicio-mapper`` and
5
+ ``llm-project-mapper`` console scripts (see ``pyproject.toml``).
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import os
12
+ import sys
13
+ import time
14
+ from typing import Sequence
15
+
16
+ from . import __version__
17
+ from .mapper import write_mapping_artifacts
18
+
19
+ HELP_TEXT = """simplicio-mapper map
20
+
21
+ Generate or update machine-readable mapper artifacts.
22
+
23
+ USAGE
24
+ simplicio-mapper map [--root <dir>] [--incremental] [--watch]
25
+ simplicio-mapper update [--root <dir>] [--watch]
26
+
27
+ OPTIONS
28
+ --root <dir> Project root to map. Defaults to cwd.
29
+ --stack <name> Stack hint when .starter-meta.json is absent.
30
+ --product-name <name> Product name hint when .starter-meta.json is absent.
31
+ --out <dir> Artifact directory. Defaults to .simplicio.
32
+ --incremental Record changed files and update existing artifacts.
33
+ --watch Re-run mapping when local files change.
34
+ --silent Minimal output.
35
+ -V, --version Show version and exit.
36
+ -h, --help Show this help
37
+ """
38
+
39
+
40
+ def _read_json_safe(file: str) -> dict:
41
+ try:
42
+ with open(file, "r", encoding="utf-8") as handle:
43
+ return json.load(handle)
44
+ except (OSError, ValueError):
45
+ return {}
46
+
47
+
48
+ def _parse_args(argv: Sequence[str]) -> dict:
49
+ opts = {
50
+ "root": os.getcwd(),
51
+ "out": ".simplicio",
52
+ "stack": "",
53
+ "product_name": "",
54
+ "incremental": False,
55
+ "watch": False,
56
+ "silent": False,
57
+ }
58
+ command = "update" if argv and argv[0] == "update" else "map"
59
+ if command == "update":
60
+ opts["incremental"] = True
61
+ i = 1 if argv and argv[0] in ("map", "update") else 0
62
+ while i < len(argv):
63
+ arg = argv[i]
64
+ if arg == "--root":
65
+ i += 1
66
+ opts["root"] = argv[i]
67
+ elif arg == "--out":
68
+ i += 1
69
+ opts["out"] = argv[i]
70
+ elif arg == "--stack":
71
+ i += 1
72
+ opts["stack"] = argv[i]
73
+ elif arg == "--product-name":
74
+ i += 1
75
+ opts["product_name"] = argv[i]
76
+ elif arg == "--incremental":
77
+ opts["incremental"] = True
78
+ elif arg == "--watch":
79
+ opts["watch"] = True
80
+ elif arg == "--silent":
81
+ opts["silent"] = True
82
+ elif arg in ("-h", "--help"):
83
+ print(HELP_TEXT)
84
+ sys.exit(0)
85
+ elif arg in ("-V", "--version"):
86
+ print(__version__)
87
+ sys.exit(0)
88
+ else:
89
+ print(f"Unknown map option: {arg}", file=sys.stderr)
90
+ print("Run `simplicio-mapper map --help` for usage.", file=sys.stderr)
91
+ sys.exit(2)
92
+ i += 1
93
+ return opts
94
+
95
+
96
+ def _run_once(opts: dict) -> dict:
97
+ root = os.path.abspath(opts["root"])
98
+ meta = dict(_read_json_safe(os.path.join(root, ".starter-meta.json")))
99
+ if opts["stack"]:
100
+ meta["stack"] = opts["stack"]
101
+ if opts["product_name"]:
102
+ meta["product_name"] = opts["product_name"]
103
+ log = (lambda _line: None) if opts["silent"] else print
104
+ return write_mapping_artifacts(
105
+ cwd=root,
106
+ meta=meta,
107
+ incremental=opts["incremental"],
108
+ output_dir=opts["out"],
109
+ log=log,
110
+ )
111
+
112
+
113
+ def _signature(root: str, out: str) -> tuple:
114
+ abs_out = os.path.abspath(os.path.join(root, out))
115
+ entries = []
116
+ for current, dirs, files in os.walk(root):
117
+ dirs[:] = [d for d in dirs if d not in (".git", "node_modules") and os.path.abspath(os.path.join(current, d)) != abs_out]
118
+ for name in files:
119
+ path = os.path.join(current, name)
120
+ try:
121
+ stat = os.stat(path)
122
+ except OSError:
123
+ continue
124
+ entries.append((path, stat.st_mtime_ns, stat.st_size))
125
+ return tuple(sorted(entries))
126
+
127
+
128
+ def _watch(opts: dict) -> None:
129
+ root = os.path.abspath(opts["root"])
130
+ print(f"watching {root} for mapper updates...")
131
+ last = _signature(root, opts["out"])
132
+ try:
133
+ while True:
134
+ time.sleep(0.5)
135
+ current = _signature(root, opts["out"])
136
+ if current != last:
137
+ last = current
138
+ try:
139
+ _run_once({**opts, "incremental": True})
140
+ except Exception as error: # noqa: BLE001 - watch loop must not crash
141
+ print(f"map update failed: {error}", file=sys.stderr)
142
+ except KeyboardInterrupt:
143
+ pass
144
+
145
+
146
+ def main(argv: Sequence[str] | None = None) -> int:
147
+ argv = list(sys.argv[1:] if argv is None else argv)
148
+ opts = _parse_args(argv)
149
+ _run_once(opts)
150
+ if opts["watch"]:
151
+ _watch(opts)
152
+ return 0
153
+
154
+
155
+ if __name__ == "__main__":
156
+ sys.exit(main())
@@ -0,0 +1,536 @@
1
+ """Project mapper that emits the Simplicio machine-readable artifacts.
2
+
3
+ This is the Python port of ``bin/mapper-artifacts.js``. It produces
4
+ ``.simplicio/project-map.json`` (schema ``simplicio.project-map/v1``) and
5
+ ``.simplicio/precedent-index.json`` (schema ``simplicio.precedent-index/v1``)
6
+ as documented in ``SIMPLICIO_INTEGRATION.md``. Pure standard library, no
7
+ third-party dependencies.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import hashlib
13
+ import json
14
+ import os
15
+ import re
16
+ import subprocess
17
+ from datetime import datetime, timezone
18
+ from typing import Any, Callable
19
+
20
+ ARTIFACT_SCHEMA = "simplicio.project-map/v1"
21
+ PRECEDENT_SCHEMA = "simplicio.precedent-index/v1"
22
+ ARTIFACT_VERSION = 1
23
+
24
+ TEXT_EXTS = {
25
+ ".md", ".txt", ".json", ".jsonc", ".yml", ".yaml", ".toml",
26
+ ".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs",
27
+ ".py", ".go", ".rs", ".java", ".kt", ".php", ".rb", ".cs",
28
+ ".cshtml", ".razor", ".sh", ".ps1", ".env", "",
29
+ }
30
+
31
+ SKIP_DIRS = {
32
+ ".git", "node_modules", "dist", "build", "out", "coverage",
33
+ ".next", ".nuxt", "playwright-report", "test-results", ".turbo",
34
+ ".venv", "venv", "__pycache__", ".idea", ".vscode", ".simplicio",
35
+ ".catalog", ".receipts",
36
+ }
37
+
38
+ CONFIG_FILES = {
39
+ "package.json", "pyproject.toml", "requirements.txt", "go.mod", "Cargo.toml",
40
+ "pom.xml", "build.gradle", "settings.gradle", "tsconfig.json",
41
+ "vite.config.ts", "next.config.js", "angular.json", "Dockerfile",
42
+ }
43
+
44
+ LANGUAGE_BY_EXT = {
45
+ ".js": "javascript",
46
+ ".jsx": "javascript",
47
+ ".mjs": "javascript",
48
+ ".cjs": "javascript",
49
+ ".ts": "typescript",
50
+ ".tsx": "typescript",
51
+ ".py": "python",
52
+ ".go": "go",
53
+ ".rs": "rust",
54
+ ".java": "java",
55
+ ".kt": "kotlin",
56
+ ".php": "php",
57
+ ".rb": "ruby",
58
+ ".cs": "csharp",
59
+ ".cshtml": "razor",
60
+ ".razor": "razor",
61
+ ".md": "markdown",
62
+ ".json": "json",
63
+ ".yaml": "yaml",
64
+ ".yml": "yaml",
65
+ ".toml": "toml",
66
+ ".sh": "shell",
67
+ ".ps1": "powershell",
68
+ }
69
+
70
+ ENTRYPOINT_STEMS = {"index", "main", "server", "app", "program", "cli"}
71
+ TOKEN_STOPWORDS = {"src", "lib", "test", "tests", "index", "main"}
72
+
73
+
74
+ def _iso(dt: datetime) -> str:
75
+ return dt.astimezone(timezone.utc).isoformat(timespec="milliseconds").replace("+00:00", "Z")
76
+
77
+
78
+ def _now_iso() -> str:
79
+ return _iso(datetime.now(timezone.utc))
80
+
81
+
82
+ def _normalize_rel(file: str) -> str:
83
+ return file.replace(os.sep, "/")
84
+
85
+
86
+ def _read_safe(file: str) -> str:
87
+ try:
88
+ with open(file, "r", encoding="utf-8", errors="replace") as handle:
89
+ return handle.read()
90
+ except OSError:
91
+ return ""
92
+
93
+
94
+ def _sha256(text: str) -> str:
95
+ return hashlib.sha256(text.encode("utf-8")).hexdigest()
96
+
97
+
98
+ def _parse_json_safe(file: str) -> dict:
99
+ try:
100
+ return json.loads(_read_safe(file) or "{}")
101
+ except (ValueError, TypeError):
102
+ return {}
103
+
104
+
105
+ def _walk(root: str):
106
+ try:
107
+ entries = sorted(os.scandir(root), key=lambda e: e.name)
108
+ except OSError:
109
+ return
110
+ for entry in entries:
111
+ if entry.name in SKIP_DIRS:
112
+ continue
113
+ if entry.is_dir(follow_symlinks=False):
114
+ yield from _walk(entry.path)
115
+ elif entry.is_file(follow_symlinks=False):
116
+ yield entry.path
117
+
118
+
119
+ def _language_for(file: str) -> str:
120
+ base = os.path.basename(file)
121
+ if base == "Dockerfile":
122
+ return "dockerfile"
123
+ ext = os.path.splitext(file)[1].lower()
124
+ if ext in LANGUAGE_BY_EXT:
125
+ return LANGUAGE_BY_EXT[ext]
126
+ return ext[1:] if ext else "text"
127
+
128
+
129
+ def _git_status_map(cwd: str) -> dict[str, str]:
130
+ out: dict[str, str] = {}
131
+ try:
132
+ result = subprocess.run(
133
+ ["git", "status", "--porcelain"],
134
+ cwd=cwd,
135
+ capture_output=True,
136
+ text=True,
137
+ timeout=3,
138
+ )
139
+ except (OSError, subprocess.SubprocessError):
140
+ return out
141
+ if result.returncode != 0:
142
+ return out
143
+ for line in (result.stdout or "").split("\n"):
144
+ if not line.strip():
145
+ continue
146
+ status = line[:2].strip() or "modified"
147
+ raw = line[3:].strip()
148
+ file = raw.split(" -> ")[-1] if " -> " in raw else raw
149
+ out[_normalize_rel(file)] = status
150
+ return out
151
+
152
+
153
+ def _collect_text_files(cwd: str) -> list[str]:
154
+ files = []
155
+ for file in _walk(cwd):
156
+ ext = os.path.splitext(file)[1].lower()
157
+ if ext not in TEXT_EXTS:
158
+ continue
159
+ try:
160
+ if os.path.getsize(file) > 250_000:
161
+ continue
162
+ except OSError:
163
+ continue
164
+ files.append(file)
165
+ return sorted(files)
166
+
167
+
168
+ def _parse_imports(text: str, language: str) -> list[str]:
169
+ patterns: list[re.Pattern[str]] = []
170
+ if language in ("javascript", "typescript"):
171
+ patterns.append(re.compile(r"import\s+[^'\"]*['\"]([^'\"]+)['\"]"))
172
+ patterns.append(re.compile(r"require\(['\"]([^'\"]+)['\"]\)"))
173
+ elif language == "python":
174
+ patterns.append(re.compile(r"^\s*from\s+([A-Za-z0-9_.]+)\s+import\s+", re.MULTILINE))
175
+ patterns.append(re.compile(r"^\s*import\s+([A-Za-z0-9_.]+)", re.MULTILINE))
176
+ elif language in ("csharp", "razor"):
177
+ patterns.append(re.compile(r"^\s*using\s+([A-Za-z0-9_.]+)\s*;", re.MULTILINE))
178
+ elif language == "go":
179
+ patterns.append(re.compile(r'^\s*import\s+"([^"]+)"', re.MULTILINE))
180
+ found: list[str] = []
181
+ for pattern in patterns:
182
+ for match in pattern.finditer(text):
183
+ found.append(match.group(1))
184
+ uniq = list(dict.fromkeys(found))
185
+ return sorted(uniq[:20])
186
+
187
+
188
+ _SYMBOL_PATTERNS = [
189
+ re.compile(r"\bclass\s+([A-Z][A-Za-z0-9_]*)"),
190
+ re.compile(r"\bfunction\s+([A-Za-z0-9_]+)"),
191
+ re.compile(r"\bexport\s+(?:async\s+)?function\s+([A-Za-z0-9_]+)"),
192
+ re.compile(r"\bexport\s+const\s+([A-Za-z0-9_]+)"),
193
+ re.compile(r"\bdef\s+([A-Za-z0-9_]+)"),
194
+ re.compile(r"\bfunc\s+([A-Za-z0-9_]+)"),
195
+ ]
196
+
197
+
198
+ def _parse_symbols(text: str) -> list[str]:
199
+ found: list[str] = []
200
+ for pattern in _SYMBOL_PATTERNS:
201
+ for match in pattern.finditer(text):
202
+ found.append(match.group(1))
203
+ uniq = list(dict.fromkeys(found))
204
+ return sorted(uniq[:30])
205
+
206
+
207
+ _RE_TEST_PATH = re.compile(r"(\b|/)(__tests__|tests?|specs?)(/|\b)", re.IGNORECASE)
208
+ _RE_TEST_FILE = re.compile(r"\.(test|spec)\.[^.]+$", re.IGNORECASE)
209
+ _RE_CONFIG = re.compile(r"config|rc$|\.config\.", re.IGNORECASE)
210
+ _RE_ROUTE = re.compile(r"routes?|controllers?|pages?|app/", re.IGNORECASE)
211
+ _RE_UI = re.compile(r"components?|views?", re.IGNORECASE)
212
+ _RE_DOMAIN = re.compile(r"services?|repositories?|models?|entities?", re.IGNORECASE)
213
+
214
+
215
+ def _roles_for(rel: str, pkg: dict) -> list[str]:
216
+ roles: set[str] = set()
217
+ base = os.path.basename(rel)
218
+ no_ext = re.sub(r"\.[^.]+$", "", base).lower()
219
+ if _RE_TEST_PATH.search(rel) or _RE_TEST_FILE.search(base):
220
+ roles.add("test")
221
+ if base in CONFIG_FILES or _RE_CONFIG.search(base):
222
+ roles.add("config")
223
+ main_value = _normalize_rel(pkg["main"]) if isinstance(pkg.get("main"), str) else ""
224
+ bin_field = pkg.get("bin")
225
+ if isinstance(bin_field, str):
226
+ bin_values = [_normalize_rel(bin_field)]
227
+ elif isinstance(bin_field, dict):
228
+ bin_values = [_normalize_rel(v) for v in bin_field.values() if isinstance(v, str)]
229
+ else:
230
+ bin_values = []
231
+ if main_value == rel or rel in bin_values or no_ext in ENTRYPOINT_STEMS:
232
+ roles.add("entrypoint")
233
+ if _RE_ROUTE.search(rel):
234
+ roles.add("route")
235
+ if _RE_UI.search(rel):
236
+ roles.add("ui")
237
+ if _RE_DOMAIN.search(rel):
238
+ roles.add("domain")
239
+ return sorted(roles)
240
+
241
+
242
+ def _importance_for(meta: dict) -> float:
243
+ score = 0.12
244
+ roles = meta["roles"]
245
+ if "entrypoint" in roles:
246
+ score += 0.45
247
+ if "test" in roles:
248
+ score += 0.25
249
+ if "config" in roles:
250
+ score += 0.2
251
+ if "domain" in roles:
252
+ score += 0.2
253
+ if meta["imports"]:
254
+ score += 0.08
255
+ if meta["exports"]:
256
+ score += 0.08
257
+ if meta["git_status"] and meta["git_status"] != "clean":
258
+ score += 0.2
259
+ return min(1.0, round(score, 2))
260
+
261
+
262
+ _RE_CAMEL = re.compile(r"([a-z])([A-Z])")
263
+ _RE_NON_ALNUM = re.compile(r"[^A-Za-z0-9]+")
264
+
265
+
266
+ def _token_words(value: Any) -> list[str]:
267
+ spaced = _RE_CAMEL.sub(r"\1 \2", str(value or ""))
268
+ out = []
269
+ for part in _RE_NON_ALNUM.split(spaced):
270
+ token = part.lower()
271
+ if len(token) > 2 and token not in TOKEN_STOPWORDS:
272
+ out.append(token)
273
+ return out
274
+
275
+
276
+ def _collect_entities(files: list[dict]) -> list[dict]:
277
+ scores: dict[str, int] = {}
278
+ for file in files:
279
+ stem = os.path.basename(file["path"])
280
+ ext = os.path.splitext(file["path"])[1]
281
+ if ext and stem.endswith(ext):
282
+ stem = stem[: -len(ext)]
283
+ for token in _token_words(stem):
284
+ scores[token] = scores.get(token, 0) + 1
285
+ for symbol in file.get("exports", []):
286
+ for token in _token_words(symbol):
287
+ scores[token] = scores.get(token, 0) + 2
288
+ ordered = sorted(scores.items(), key=lambda kv: (-kv[1], kv[0]))
289
+ return [{"name": name, "score": score} for name, score in ordered[:30]]
290
+
291
+
292
+ _ARCH_CHECKS = [
293
+ ("nextjs", re.compile(r"next")),
294
+ ("react", re.compile(r"react")),
295
+ ("vue", re.compile(r"vue")),
296
+ ("angular", re.compile(r"angular|@angular")),
297
+ ("express", re.compile(r"express")),
298
+ ("nestjs", re.compile(r"nestjs|@nestjs")),
299
+ ("fastapi", re.compile(r"fastapi")),
300
+ ("django", re.compile(r"django")),
301
+ ("dotnet", re.compile(r"aspnetcore|\.csproj|dotnet")),
302
+ ("go", re.compile(r"\bgo\.mod\b|\bgin\b|\bfiber\b")),
303
+ ("rust", re.compile(r"cargo\.toml|actix|axum")),
304
+ ("playwright", re.compile(r"playwright")),
305
+ ("stripe", re.compile(r"stripe")),
306
+ ("prisma", re.compile(r"prisma")),
307
+ ]
308
+
309
+
310
+ def _collect_architecture_signals(pkg: dict, corpus: str, stack: str) -> list[str]:
311
+ text = f"{stack}\n{json.dumps(pkg)}\n{corpus}".lower()
312
+ return sorted(name for name, rx in _ARCH_CHECKS if rx.search(text))
313
+
314
+
315
+ def _group_modules(files: list[dict]) -> list[dict]:
316
+ groups: dict[str, dict] = {}
317
+ for file in files:
318
+ first = file["path"].split("/")[0] if "/" in file["path"] else "."
319
+ group = groups.setdefault(first, {"name": first, "files": [], "roles": set()})
320
+ group["files"].append(file["path"])
321
+ group["roles"].update(file["roles"])
322
+ result = []
323
+ for group in sorted(groups.values(), key=lambda g: g["name"]):
324
+ result.append({
325
+ "name": group["name"],
326
+ "files": group["files"][:20],
327
+ "roles": sorted(group["roles"]),
328
+ "file_count": len(group["files"]),
329
+ })
330
+ return result
331
+
332
+
333
+ def _detect_changed_files(files, previous_map, status_map, incremental) -> list[str]:
334
+ previous = {f["path"]: f for f in previous_map.get("files", [])}
335
+ changed = {file for file, status in status_map.items() if status != "clean"}
336
+ if incremental:
337
+ for file in files:
338
+ before = previous.get(file["path"])
339
+ if not before or before.get("file_hash") != file["file_hash"] or before.get("size_bytes") != file["size_bytes"]:
340
+ changed.add(file["path"])
341
+ present = {entry["path"] for entry in files}
342
+ return sorted(file for file in changed if file in present)
343
+
344
+
345
+ def _load_previous_map(output_dir: str) -> dict:
346
+ target = os.path.join(output_dir, "project-map.json")
347
+ try:
348
+ with open(target, "r", encoding="utf-8") as handle:
349
+ return json.load(handle)
350
+ except (OSError, ValueError):
351
+ return {}
352
+
353
+
354
+ def _build_file_inventory(cwd: str, pkg: dict, status_map: dict) -> list[dict]:
355
+ inventory = []
356
+ for abs_path in _collect_text_files(cwd):
357
+ rel = _normalize_rel(os.path.relpath(abs_path, cwd))
358
+ text = _read_safe(abs_path)
359
+ stat = os.stat(abs_path)
360
+ language = _language_for(rel)
361
+ roles = _roles_for(rel, pkg)
362
+ imports = _parse_imports(text, language)
363
+ exports = _parse_symbols(text)
364
+ entry = {
365
+ "path": rel,
366
+ "language": language,
367
+ "size_bytes": stat.st_size,
368
+ "last_modified": _iso(datetime.fromtimestamp(stat.st_mtime, tz=timezone.utc)),
369
+ "file_hash": _sha256(text),
370
+ "git_status": status_map.get(rel, "clean"),
371
+ "roles": roles,
372
+ "imports": imports,
373
+ "exports": exports,
374
+ }
375
+ entry["importance"] = _importance_for(entry)
376
+ inventory.append(entry)
377
+ return sorted(inventory, key=lambda e: e["path"])
378
+
379
+
380
+ _RE_PLACEHOLDER = re.compile(r"<[A-Z][A-Z0-9_]+>")
381
+ _PRECEDENT_PATTERNS = [
382
+ (re.compile(r"\btest\s*\(|\bit\s*\(|\bdescribe\s*\(|\bdef\s+test_", re.IGNORECASE), "test"),
383
+ (re.compile(r"\bclass\s+[A-Z]|\bfunction\s+\w+|\bdef\s+\w+|\bfunc\s+\w+", re.IGNORECASE), None),
384
+ (re.compile(r"\btry\b|\bcatch\b|\bexcept\b|\bthrow\b", re.IGNORECASE), "error-handling"),
385
+ (re.compile(r"\brouter\.|\bapp\.get\b|\bapp\.post\b|@app\.", re.IGNORECASE), "route"),
386
+ ]
387
+
388
+
389
+ def _extract_snippet(lines: list[str], line_index: int, radius: int = 2) -> str:
390
+ start = max(0, line_index - radius)
391
+ end = min(len(lines), line_index + radius + 1)
392
+ return "\n".join(lines[start:end])[:1200]
393
+
394
+
395
+ def _build_precedent_items(cwd: str, files: list[dict]) -> list[dict]:
396
+ items = []
397
+ for file in files:
398
+ abs_path = os.path.join(cwd, file["path"])
399
+ lines = _read_safe(abs_path).split("\n")
400
+ is_test = "test" in file["roles"]
401
+ for i, line in enumerate(lines):
402
+ change_type = None
403
+ for rx, fixed_type in _PRECEDENT_PATTERNS:
404
+ if rx.search(line):
405
+ change_type = fixed_type if fixed_type is not None else ("test" if is_test else "feature")
406
+ break
407
+ if change_type is None:
408
+ continue
409
+ snippet = _extract_snippet(lines, i)
410
+ if _RE_PLACEHOLDER.search(snippet):
411
+ break
412
+ tags = list(dict.fromkeys(
413
+ [r for r in file["roles"] if r]
414
+ + ([file["language"]] if file["language"] else [])
415
+ + _token_words(file["path"])
416
+ ))[:10]
417
+ items.append({
418
+ "id": _sha256(f"{file['path']}:{i + 1}:{line}")[:16],
419
+ "path": file["path"],
420
+ "line": i + 1,
421
+ "language": file["language"],
422
+ "change_type": change_type,
423
+ "tags": tags,
424
+ "summary": f"{change_type} precedent in {file['path']}",
425
+ "snippet": snippet,
426
+ })
427
+ break
428
+ items.sort(key=lambda item: (item["path"], item["line"]))
429
+ return items[:250]
430
+
431
+
432
+ def build_artifacts(cwd: str, meta: dict | None = None, incremental: bool = False,
433
+ output_dir: str = ".simplicio") -> dict:
434
+ meta = meta or {}
435
+ abs_cwd = os.path.abspath(cwd or os.getcwd())
436
+ abs_out = os.path.abspath(os.path.join(abs_cwd, output_dir))
437
+ pkg = _parse_json_safe(os.path.join(abs_cwd, "package.json"))
438
+ status_map = _git_status_map(abs_cwd)
439
+ previous_map = _load_previous_map(abs_out)
440
+ files = _build_file_inventory(abs_cwd, pkg, status_map)
441
+ corpus = "\n".join(_read_safe(os.path.join(abs_cwd, f["path"]))[:3000] for f in files[:80])
442
+ changed_files = _detect_changed_files(files, previous_map, status_map, incremental)
443
+ stack = meta.get("stack") or pkg.get("type") or "unknown"
444
+ product_name = meta.get("product_name") or pkg.get("name") or os.path.basename(abs_cwd)
445
+ architecture_signals = _collect_architecture_signals(pkg, corpus, stack)
446
+ generated_at = _now_iso()
447
+
448
+ if os.path.exists(os.path.join(abs_cwd, "pnpm-lock.yaml")):
449
+ package_manager = "pnpm"
450
+ elif os.path.exists(os.path.join(abs_cwd, "yarn.lock")):
451
+ package_manager = "yarn"
452
+ else:
453
+ package_manager = "npm"
454
+
455
+ web_signal = "react" in architecture_signals or "nextjs" in architecture_signals
456
+ if meta.get("project_mode") == "monorepo":
457
+ system_type = "monorepo"
458
+ else:
459
+ system_type = "web" if web_signal else "library-or-service"
460
+
461
+ project_map = {
462
+ "schema": ARTIFACT_SCHEMA,
463
+ "version": ARTIFACT_VERSION,
464
+ "generated_at": generated_at,
465
+ "update_mode": "incremental" if incremental else "full",
466
+ "product": {
467
+ "name": product_name,
468
+ "stack": stack,
469
+ "project_mode": meta.get("project_mode", "root"),
470
+ },
471
+ "files": files,
472
+ "entry_points": [f["path"] for f in files if "entrypoint" in f["roles"]],
473
+ "test_files": [f["path"] for f in files if "test" in f["roles"]],
474
+ "config_files": [f["path"] for f in files if "config" in f["roles"]],
475
+ "modules": _group_modules(files),
476
+ "entities": _collect_entities(files),
477
+ "architecture": {
478
+ "signals": architecture_signals,
479
+ "system_type": system_type,
480
+ },
481
+ "dependencies": {
482
+ "package_manager": package_manager,
483
+ "manifest": "package.json" if pkg.get("name") else None,
484
+ "runtime": sorted((pkg.get("dependencies") or {}).keys()),
485
+ "dev": sorted((pkg.get("devDependencies") or {}).keys()),
486
+ },
487
+ "recent_changes": [
488
+ {"path": file, "status": status_map.get(file, "modified")} for file in changed_files
489
+ ],
490
+ "changed_files": changed_files,
491
+ "integration": {
492
+ "dev_cli_mapper": "read .simplicio/project-map.json, then use .simplicio/precedent-index.json for task-specific examples",
493
+ "contract": "SIMPLICIO_INTEGRATION.md",
494
+ },
495
+ }
496
+
497
+ precedent_index = {
498
+ "schema": PRECEDENT_SCHEMA,
499
+ "version": ARTIFACT_VERSION,
500
+ "generated_at": generated_at,
501
+ "source_project_map": ".simplicio/project-map.json",
502
+ "items": _build_precedent_items(abs_cwd, files),
503
+ }
504
+
505
+ return {"project_map": project_map, "precedent_index": precedent_index}
506
+
507
+
508
+ def _write_json_stable(file: str, data: Any) -> None:
509
+ os.makedirs(os.path.dirname(file), exist_ok=True)
510
+ with open(file, "w", encoding="utf-8") as handle:
511
+ handle.write(json.dumps(data, indent=2, ensure_ascii=False) + "\n")
512
+
513
+
514
+ def write_mapping_artifacts(cwd: str, meta: dict | None = None, incremental: bool = False,
515
+ output_dir: str = ".simplicio",
516
+ log: Callable[[str], None] | None = None) -> dict:
517
+ log = log or (lambda _line: None)
518
+ abs_cwd = os.path.abspath(cwd or os.getcwd())
519
+ abs_out = os.path.abspath(os.path.join(abs_cwd, output_dir))
520
+ artifacts = build_artifacts(abs_cwd, meta, incremental, output_dir)
521
+ project_map = artifacts["project_map"]
522
+ precedent_index = artifacts["precedent_index"]
523
+ project_map_path = os.path.join(abs_out, "project-map.json")
524
+ precedent_path = os.path.join(abs_out, "precedent-index.json")
525
+ _write_json_stable(project_map_path, project_map)
526
+ _write_json_stable(precedent_path, precedent_index)
527
+ log(f"-> wrote {os.path.relpath(project_map_path, abs_cwd)} "
528
+ f"({len(project_map['files'])} files, {len(project_map['changed_files'])} changed)")
529
+ log(f"-> wrote {os.path.relpath(precedent_path, abs_cwd)} "
530
+ f"({len(precedent_index['items'])} precedents)")
531
+ return {
532
+ "project_map_path": project_map_path,
533
+ "precedent_path": precedent_path,
534
+ "project_map": project_map,
535
+ "precedent_index": precedent_index,
536
+ }
@@ -0,0 +1,118 @@
1
+ Metadata-Version: 2.4
2
+ Name: simplicio-mapper
3
+ Version: 0.5.0
4
+ Summary: Python-first project mapper that emits .simplicio/project-map.json and precedent-index.json for the Simplicio ecosystem.
5
+ Project-URL: Homepage, https://github.com/wesleysimplicio/simplicio-mapper
6
+ Project-URL: Repository, https://github.com/wesleysimplicio/simplicio-mapper
7
+ Project-URL: Issues, https://github.com/wesleysimplicio/simplicio-mapper/issues
8
+ Author-email: Wesley Simplicio <wesleybob4@gmail.com>
9
+ License: MIT License
10
+
11
+ Copyright (c) 2026 Wesley Simplicio
12
+
13
+ Permission is hereby granted, free of charge, to any person obtaining a copy
14
+ of this software and associated documentation files (the "Software"), to deal
15
+ in the Software without restriction, including without limitation the rights
16
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17
+ copies of the Software, and to permit persons to whom the Software is
18
+ furnished to do so, subject to the following conditions:
19
+
20
+ The above copyright notice and this permission notice shall be included in all
21
+ copies or substantial portions of the Software.
22
+
23
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29
+ SOFTWARE.
30
+ License-File: LICENSE
31
+ Keywords: agent,llm,mapper,precedent,project-map,simplicio
32
+ Classifier: Development Status :: 4 - Beta
33
+ Classifier: Environment :: Console
34
+ Classifier: Intended Audience :: Developers
35
+ Classifier: License :: OSI Approved :: MIT License
36
+ Classifier: Operating System :: OS Independent
37
+ Classifier: Programming Language :: Python :: 3
38
+ Classifier: Programming Language :: Python :: 3 :: Only
39
+ Classifier: Programming Language :: Python :: 3.9
40
+ Classifier: Programming Language :: Python :: 3.10
41
+ Classifier: Programming Language :: Python :: 3.11
42
+ Classifier: Programming Language :: Python :: 3.12
43
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
44
+ Requires-Python: >=3.9
45
+ Description-Content-Type: text/markdown
46
+
47
+ # simplicio-mapper
48
+
49
+ Python-first project mapper for the Simplicio ecosystem. It scans a repository
50
+ and emits two machine-readable artifacts that agents and tooling can consume
51
+ without parsing the human-readable markdown docs:
52
+
53
+ - `.simplicio/project-map.json` (`simplicio.project-map/v1`) — file inventory,
54
+ architecture signals, entry points, tests, modules, entities, dependencies
55
+ and recent changes.
56
+ - `.simplicio/precedent-index.json` (`simplicio.precedent-index/v1`) —
57
+ high-signal code examples tagged by change type, file, language, roles and
58
+ snippet.
59
+
60
+ The full contract is documented in
61
+ [SIMPLICIO_INTEGRATION.md](https://github.com/wesleysimplicio/simplicio-mapper/blob/main/SIMPLICIO_INTEGRATION.md).
62
+
63
+ ## Install
64
+
65
+ ```bash
66
+ pip install simplicio-mapper
67
+ ```
68
+
69
+ ## Usage
70
+
71
+ ```bash
72
+ # Map the current directory into .simplicio/
73
+ simplicio-mapper map
74
+
75
+ # Refresh artifacts and record changed files since the last run
76
+ simplicio-mapper update
77
+
78
+ # Map another project root, with hints when .starter-meta.json is absent
79
+ simplicio-mapper map --root path/to/project --stack python --product-name "My App"
80
+
81
+ # Re-run automatically while files change locally
82
+ simplicio-mapper map --watch
83
+ ```
84
+
85
+ The `llm-project-mapper` console script is provided as an alias.
86
+
87
+ ### Options
88
+
89
+ | Option | Description |
90
+ |---|---|
91
+ | `--root <dir>` | Project root to map. Defaults to the current directory. |
92
+ | `--out <dir>` | Artifact directory. Defaults to `.simplicio`. |
93
+ | `--stack <name>` | Stack hint when `.starter-meta.json` is absent. |
94
+ | `--product-name <name>` | Product name hint when `.starter-meta.json` is absent. |
95
+ | `--incremental` | Record changed files and update existing artifacts. |
96
+ | `--watch` | Re-run mapping when local files change. |
97
+ | `--silent` | Minimal output. |
98
+ | `-V`, `--version` | Show version and exit. |
99
+ | `-h`, `--help` | Show help. |
100
+
101
+ ## Consuming the artifacts
102
+
103
+ ```python
104
+ from pathlib import Path
105
+ import json
106
+
107
+ base = Path(".simplicio")
108
+ project_map = json.loads((base / "project-map.json").read_text())
109
+ precedents = json.loads((base / "precedent-index.json").read_text())
110
+
111
+ top_files = sorted(
112
+ project_map["files"], key=lambda f: f.get("importance", 0), reverse=True
113
+ )[:8]
114
+ ```
115
+
116
+ ## License
117
+
118
+ MIT
@@ -0,0 +1,8 @@
1
+ simplicio_mapper/__init__.py,sha256=DfD3WnGOG--bjtnY_Iwp9Rlu8RdhuIXaIA6p_6EDwPE,140
2
+ simplicio_mapper/cli.py,sha256=_uFdDiIajEOLQjtRoy2yoRIScxE3550BoaptHaeGWfs,4857
3
+ simplicio_mapper/mapper.py,sha256=iDjhXyuaCjnyOfO1h_GbQuKlN38-F-4DrLwo20aD4L8,19405
4
+ simplicio_mapper-0.5.0.dist-info/METADATA,sha256=9T-hOweJHdRepwrvugRBrW2jk1ICUmd_Sn6n-mfJU8o,4671
5
+ simplicio_mapper-0.5.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
6
+ simplicio_mapper-0.5.0.dist-info/entry_points.txt,sha256=DhUiIkNnocupy9esTE_EibLc9HZ4v0LkAcTP0Dk_PJk,110
7
+ simplicio_mapper-0.5.0.dist-info/licenses/LICENSE,sha256=M9nybyYRmZgzRcII42mrIiSDj8Ay0E6XV1xGAsT7J7M,1073
8
+ simplicio_mapper-0.5.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ llm-project-mapper = simplicio_mapper.cli:main
3
+ simplicio-mapper = simplicio_mapper.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Wesley Simplicio
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.