loom-data 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. loom/__init__.py +16 -0
  2. loom/__main__.py +5 -0
  3. loom/chat.py +94 -0
  4. loom/cli.py +4 -0
  5. loom/cli_app/__init__.py +3 -0
  6. loom/cli_app/config.py +10 -0
  7. loom/cli_app/handlers.py +137 -0
  8. loom/cli_app/main.py +48 -0
  9. loom/cli_app/output.py +38 -0
  10. loom/cli_app/parser.py +79 -0
  11. loom/cli_app/routing.py +37 -0
  12. loom/cli_app/skill.py +33 -0
  13. loom/csv_profile.py +157 -0
  14. loom/data.py +44 -0
  15. loom/datacard.py +151 -0
  16. loom/datacard_parts/__init__.py +13 -0
  17. loom/datacard_parts/cards.py +66 -0
  18. loom/datacard_parts/source.py +72 -0
  19. loom/explore_repo.py +171 -0
  20. loom/raw_cache.py +1 -0
  21. loom/raw_cache_support/__init__.py +32 -0
  22. loom/raw_cache_support/conflicts.py +39 -0
  23. loom/raw_cache_support/ops.py +89 -0
  24. loom/raw_cache_support/paths.py +26 -0
  25. loom/raw_cache_support/state.py +38 -0
  26. loom/raw_snapshot.py +80 -0
  27. loom/scan_state.py +40 -0
  28. loom/scan_support/__init__.py +13 -0
  29. loom/scan_support/manifest.py +57 -0
  30. loom/scan_support/state.py +22 -0
  31. loom/scan_support/tree.py +17 -0
  32. loom/scanner.py +120 -0
  33. loom/sync_client.py +30 -0
  34. loom/sync_ops/__init__.py +13 -0
  35. loom/sync_ops/http.py +18 -0
  36. loom/sync_ops/models.py +40 -0
  37. loom/sync_ops/public.py +28 -0
  38. loom/sync_ops/raw.py +81 -0
  39. loom/sync_ops/state.py +24 -0
  40. loom/sync_ops/workspace.py +109 -0
  41. loom/sync_server.py +26 -0
  42. loom/sync_service.py +26 -0
  43. loom/sync_state.py +65 -0
  44. loom/workspace_merge.py +168 -0
  45. loom/workspace_snapshot.py +183 -0
  46. loom_data-0.1.0.dist-info/METADATA +330 -0
  47. loom_data-0.1.0.dist-info/RECORD +50 -0
  48. loom_data-0.1.0.dist-info/WHEEL +5 -0
  49. loom_data-0.1.0.dist-info/entry_points.txt +2 -0
  50. loom_data-0.1.0.dist-info/top_level.txt +1 -0
loom/__init__.py ADDED
@@ -0,0 +1,16 @@
1
+ from .chat import LoomCommandRequest, ScanRequest, is_fast_scan_command, parse_chat_request, parse_loom_command
2
+ from .data import get, pull
3
+ from .scanner import ScanResult, scan_topic_from_chat, scan_topic_to_explore
4
+
5
+ __all__ = [
6
+ "LoomCommandRequest",
7
+ "ScanRequest",
8
+ "ScanResult",
9
+ "get",
10
+ "is_fast_scan_command",
11
+ "parse_chat_request",
12
+ "parse_loom_command",
13
+ "pull",
14
+ "scan_topic_from_chat",
15
+ "scan_topic_to_explore",
16
+ ]
loom/__main__.py ADDED
@@ -0,0 +1,5 @@
1
+ from .cli import main
2
+
3
+
4
+ if __name__ == "__main__":
5
+ raise SystemExit(main())
loom/chat.py ADDED
@@ -0,0 +1,94 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ import re
5
+
6
+
7
+ @dataclass(frozen=True)
8
+ class LoomCommandRequest:
9
+ command: str
10
+ workspace: str | None
11
+ original_message: str
12
+
13
+
14
+ @dataclass(frozen=True)
15
+ class ScanRequest:
16
+ topic: str
17
+ original_message: str
18
+
19
+
20
+ _WORKSPACE_PATTERN = r"[a-z0-9][a-z0-9_./-]*"
21
+ _SCAN_PATTERNS = (
22
+ re.compile(rf"(?i)(?:^|\b)loom\s+scan\s+(?P<workspace>{_WORKSPACE_PATTERN})\b"),
23
+ re.compile(rf"(?i)(?:^|\b)(?:please\s+)?scan\s+(?P<workspace>{_WORKSPACE_PATTERN})\s+with\s+loom\b"),
24
+ )
25
+ _GENERIC_PATTERNS = (
26
+ re.compile(rf"(?i)(?:^|\b)loom\s+(?P<command>confirm|push|pull|status)(?:\s+(?P<workspace>{_WORKSPACE_PATTERN}))?\b"),
27
+ )
28
+
29
+
30
+ def is_fast_scan_command(message: str) -> bool:
31
+ normalized = message.strip()
32
+ if not normalized:
33
+ return False
34
+
35
+ lowered = normalized.lower()
36
+ return lowered.startswith("loom scan ") and len(normalized) > len("loom scan ")
37
+
38
+
39
+ def parse_chat_request(message: str) -> ScanRequest | None:
40
+ command_request = parse_loom_command(message)
41
+ if command_request is None or command_request.command != "scan" or command_request.workspace is None:
42
+ return None
43
+ return ScanRequest(topic=command_request.workspace, original_message=message)
44
+
45
+
46
+ def parse_loom_command(message: str) -> LoomCommandRequest | None:
47
+ normalized = message.strip()
48
+ if not normalized:
49
+ return None
50
+
51
+ fast_request = _parse_fast_scan_command(normalized, message)
52
+ if fast_request is not None:
53
+ return fast_request
54
+
55
+ for pattern in _SCAN_PATTERNS:
56
+ match = pattern.search(normalized)
57
+ if match:
58
+ return LoomCommandRequest(
59
+ command="scan",
60
+ workspace=_normalize_workspace(match.group("workspace")),
61
+ original_message=message,
62
+ )
63
+
64
+ for pattern in _GENERIC_PATTERNS:
65
+ match = pattern.search(normalized)
66
+ if match:
67
+ return LoomCommandRequest(
68
+ command=match.group("command").lower(),
69
+ workspace=_normalize_workspace(match.group("workspace")),
70
+ original_message=message,
71
+ )
72
+
73
+ return None
74
+
75
+
76
+ def _parse_fast_scan_command(normalized: str, original_message: str) -> LoomCommandRequest | None:
77
+ parts = normalized.split(None, 2)
78
+ if len(parts) < 3:
79
+ return None
80
+
81
+ if parts[0].lower() != "loom" or parts[1].lower() != "scan":
82
+ return None
83
+
84
+ workspace = parts[2].strip().strip("/").split()[0]
85
+ if not re.fullmatch(_WORKSPACE_PATTERN, workspace, flags=re.IGNORECASE):
86
+ return None
87
+
88
+ return LoomCommandRequest(command="scan", workspace=workspace, original_message=original_message)
89
+
90
+
91
+ def _normalize_workspace(value: str | None) -> str | None:
92
+ if value is None:
93
+ return None
94
+ return value.strip().strip("/") or None
loom/cli.py ADDED
@@ -0,0 +1,4 @@
1
+ from .cli_app import main
2
+ from .cli_app.config import DEFAULT_DATABASE_URL
3
+
4
+ __all__ = ["DEFAULT_DATABASE_URL", "main"]
@@ -0,0 +1,3 @@
1
+ from .main import main
2
+
3
+ __all__ = ["main"]
loom/cli_app/config.py ADDED
@@ -0,0 +1,10 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+
5
+
6
+ DEFAULT_SERVER_URL = os.environ.get("LOOM_SERVER_URL", "http://127.0.0.1:8765")
7
+ DEFAULT_DATABASE_URL = os.environ.get(
8
+ "LOOM_SERVER_DATABASE_URL",
9
+ "postgresql+psycopg2://loom@127.0.0.1:5432/loom",
10
+ )
@@ -0,0 +1,137 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+
5
+ from ..data import get as get_resource
6
+ from ..explore_repo import confirm_changes, ensure_explore_repo
7
+ from ..scanner import scan_topic_to_explore
8
+ from ..sync_client import pull_raw_workspaces, pull_workspaces, push_workspaces
9
+ from .output import print_status_summary, print_workspace_status
10
+ from .skill import install_skill
11
+
12
+
13
+ def run_install(args: argparse.Namespace) -> int:
14
+ skill_dir = install_skill(args.codex_home.resolve(), args.workspace_root.resolve())
15
+ repo_dir = ensure_explore_repo(args.workspace_root.resolve())
16
+ print(f"Installed Loom scan skill to: {skill_dir}")
17
+ print(f"Initialized loom_explore git repo at: {repo_dir}")
18
+ print("Codex can now use a fast path for chat messages like `loom scan energy`.")
19
+ return 0
20
+
21
+
22
+ def run_scan(args: argparse.Namespace) -> int:
23
+ result = scan_topic_to_explore(args.topic, args.workspace_root)
24
+ print(f"Scanned topic: {result.topic}")
25
+ print(f"Raw directory: {result.raw_topic_dir}")
26
+ print(f"Explore directory: {result.explore_topic_dir}")
27
+ print(f"Datasets discovered: {result.dataset_count}")
28
+ print(f"Datasets rebuilt: {len(result.rebuilt_dataset_dirs)}")
29
+ for dataset_dir in result.rebuilt_dataset_dirs:
30
+ print(f"- rebuilt: {dataset_dir}")
31
+ print(f"Datasets skipped: {len(result.skipped_dataset_dirs)}")
32
+ for dataset_dir in result.skipped_dataset_dirs:
33
+ print(f"- skipped: {dataset_dir}")
34
+ if result.missing_dataset_dirs:
35
+ print(f"Missing source datasets kept in explore: {len(result.missing_dataset_dirs)}")
36
+ for relative_dir in result.missing_dataset_dirs:
37
+ print(f"- kept: {relative_dir}")
38
+ print_status_summary(args.workspace_root, args.topic)
39
+ return 0
40
+
41
+
42
+ def run_get(args: argparse.Namespace) -> int:
43
+ local_path = get_resource(args.resource, workspace_root=args.workspace_root, server_url=args.server_url)
44
+ print(f"Cached resource: {args.resource}")
45
+ print(f"Local path: {local_path}")
46
+ return 0
47
+
48
+
49
+ def run_status(args: argparse.Namespace) -> int:
50
+ print_workspace_status(args.workspace_root, args.workspace)
51
+ return 0
52
+
53
+
54
+ def run_confirm(args: argparse.Namespace) -> int:
55
+ commit_hash = confirm_changes(args.workspace_root, args.workspace, getattr(args, "message", None))
56
+ if commit_hash is None:
57
+ print("No pending changes to confirm.")
58
+ return 0
59
+ print(f"Confirmed changes for {args.workspace or 'all workspaces'}.")
60
+ print(f"Commit: {commit_hash}")
61
+ return 0
62
+
63
+
64
+ def run_push(args: argparse.Namespace) -> int:
65
+ return _print_push_results(push_workspaces(args.workspace_root, args.server_url, args.workspace, args.message))
66
+
67
+
68
+ def run_pull(args: argparse.Namespace) -> int:
69
+ return _print_pull_results(pull_workspaces(args.workspace_root, args.server_url, args.workspace))
70
+
71
+
72
+ def run_pull_raw(args: argparse.Namespace) -> int:
73
+ results = pull_raw_workspaces(args.workspace_root, args.server_url, args.workspace)
74
+ if not results:
75
+ print("No remote workspaces found to pull raw files for.")
76
+ return 0
77
+ for result in results:
78
+ print(f"Pulled raw workspace: {result.workspace}")
79
+ print(f"Manifest files: {result.manifest_file_count}")
80
+ print(f"Downloaded files: {result.downloaded_file_count}")
81
+ print(f"Linked local files: {result.linked_file_count}")
82
+ print(f"Deleted files: {result.deleted_file_count}")
83
+ if result.raw_conflict_notice_path:
84
+ print(f"Raw conflict notice: {result.raw_conflict_notice_path}")
85
+ return 0
86
+
87
+
88
+ def _print_push_results(results: tuple[object, ...]) -> int:
89
+ if not results:
90
+ print("No local workspaces found to push.")
91
+ return 0
92
+ exit_code = 0
93
+ for result in results:
94
+ if result.conflicted:
95
+ exit_code = 1
96
+ print(f"Push paused for workspace: {result.workspace}")
97
+ print(f"Remote revision: {result.revision_id or '-'}")
98
+ print("Conflict detected during rebase. Resolve the files below, then run:")
99
+ print(f" loom confirm {result.workspace}")
100
+ print(f" loom push {result.workspace}")
101
+ for path in result.conflict_paths:
102
+ print(f"- conflict: {path}")
103
+ continue
104
+ print(f"Pushed workspace: {result.workspace}")
105
+ print(f"Revision: {result.revision_id}")
106
+ print(f"Tree hash: {result.tree_hash}")
107
+ print(f"Changed files: {result.changed_file_count}")
108
+ print(f"Deleted files: {result.deleted_file_count}")
109
+ print(f"Raw objects uploaded: {result.raw_uploaded_object_count}")
110
+ print(f"Raw path mappings changed: {result.raw_mapping_changed_count}")
111
+ print(f"Raw path mappings deleted: {result.raw_mapping_deleted_count}")
112
+ if result.raw_conflict_notice_path:
113
+ print(f"Raw conflict notice: {result.raw_conflict_notice_path}")
114
+ return exit_code
115
+
116
+
117
+ def _print_pull_results(results: tuple[object, ...]) -> int:
118
+ if not results:
119
+ print("No remote workspaces found to pull.")
120
+ return 0
121
+ exit_code = 0
122
+ for result in results:
123
+ print(f"Pulled workspace: {result.workspace}")
124
+ print(f"Revision: {result.revision_id}")
125
+ print(f"Changed: {'yes' if result.changed else 'no'}")
126
+ print(f"Changed files: {result.changed_file_count}")
127
+ print(f"Deleted files: {result.deleted_file_count}")
128
+ if result.conflicted:
129
+ exit_code = 1
130
+ print("Conflict detected during rebase. Resolve the files below, then run:")
131
+ print(f" loom confirm {result.workspace}")
132
+ print(f" loom push {result.workspace}")
133
+ for path in result.conflict_paths:
134
+ print(f"- conflict: {path}")
135
+ elif result.raw_conflict_notice_path:
136
+ print(f"Raw conflict notice: {result.raw_conflict_notice_path}")
137
+ return exit_code
loom/cli_app/main.py ADDED
@@ -0,0 +1,48 @@
1
+ from __future__ import annotations
2
+
3
+ import importlib
4
+ import sys
5
+
6
+ from .handlers import run_confirm, run_get, run_install, run_pull, run_pull_raw, run_push, run_scan, run_status
7
+ from .parser import build_parser
8
+ from .routing import run_route
9
+
10
+
11
+ def main(argv: list[str] | None = None) -> int:
12
+ args = build_parser().parse_args(argv)
13
+ handlers = {
14
+ "install": run_install,
15
+ "scan": run_scan,
16
+ "route": run_route,
17
+ "get": run_get,
18
+ "status": run_status,
19
+ "confirm": run_confirm,
20
+ "push": run_push,
21
+ "pull": run_pull,
22
+ "pull-raw": run_pull_raw,
23
+ }
24
+ if args.command in handlers:
25
+ return handlers[args.command](args)
26
+ if args.command == "server-init-db":
27
+ return _run_server_command("run_init_db", args.database_url, args.storage_root)
28
+ if args.command == "server-run":
29
+ return _run_server_command("run_server", args.database_url, args.storage_root, args.host, args.port)
30
+ build_parser().print_help()
31
+ return 1
32
+
33
+
34
+ def _run_server_command(function_name: str, *args: object) -> int:
35
+ try:
36
+ module = importlib.import_module("loom_server.cli")
37
+ except ModuleNotFoundError as exc:
38
+ if exc.name == "loom_server":
39
+ print(
40
+ "This command requires the separate 'loom-server' package. "
41
+ "Install it first, then rerun the command.",
42
+ file=sys.stderr,
43
+ )
44
+ return 1
45
+ raise
46
+
47
+ command = getattr(module, function_name)
48
+ return int(command(*args))
loom/cli_app/output.py ADDED
@@ -0,0 +1,38 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+
5
+ from ..explore_repo import get_repo_status
6
+ from ..sync_state import load_workspace_sync_state
7
+
8
+
9
+ def print_status_summary(workspace_root: Path | str, workspace: str) -> None:
10
+ status = get_repo_status(workspace_root, workspace)
11
+ if not status.entries:
12
+ print("No pending changes detected after scan.")
13
+ return
14
+ launcher = Path(workspace_root).resolve() / "scripts" / "loom.py"
15
+ print("Pending changes:")
16
+ for entry in status.entries:
17
+ print(f"{entry.code} {entry.path}")
18
+ print(f"Confirm with: uv run python {launcher} confirm {workspace}")
19
+
20
+
21
+ def print_workspace_status(workspace_root: Path | str, workspace: str | None) -> None:
22
+ status = get_repo_status(workspace_root, workspace)
23
+ print(f"Explore repo: {status.repo_dir}")
24
+ if status.scope:
25
+ state = load_workspace_sync_state(workspace_root, status.scope)
26
+ print(f"Workspace: {status.scope}")
27
+ print(f"Last pulled revision: {state.last_pulled_revision or '-'}")
28
+ print(f"Last pushed revision: {state.last_pushed_revision or '-'}")
29
+ print(f"Last synced tree hash: {state.last_synced_tree_hash or '-'}")
30
+ print(f"Last sync commit: {state.last_sync_commit or '-'}")
31
+ if state.pending_rebase_revision:
32
+ print(f"Pending rebase revision: {state.pending_rebase_revision}")
33
+ if not status.entries:
34
+ print("No pending changes.")
35
+ return
36
+ print("Pending changes:")
37
+ for entry in status.entries:
38
+ print(f"{entry.code} {entry.path}")
loom/cli_app/parser.py ADDED
@@ -0,0 +1,79 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import os
5
+ from pathlib import Path
6
+
7
+ from .config import DEFAULT_DATABASE_URL, DEFAULT_SERVER_URL
8
+
9
+
10
+ def build_parser() -> argparse.ArgumentParser:
11
+ parser = argparse.ArgumentParser(prog="loom")
12
+ subparsers = parser.add_subparsers(dest="command")
13
+ _add_install_parser(subparsers)
14
+ _add_scan_parsers(subparsers)
15
+ _add_data_parsers(subparsers)
16
+ _add_sync_parsers(subparsers)
17
+ return parser
18
+
19
+
20
+ def _add_install_parser(subparsers: argparse._SubParsersAction[argparse.ArgumentParser]) -> None:
21
+ install_parser = subparsers.add_parser("install", help="Install the Loom Codex skill and initialize loom_explore git tracking.")
22
+ install_parser.add_argument("--codex-home", type=Path, default=Path(os.environ.get("CODEX_HOME", Path.home() / ".codex")))
23
+ install_parser.add_argument("--workspace-root", type=Path, default=Path.cwd())
24
+
25
+
26
+ def _add_scan_parsers(subparsers: argparse._SubParsersAction[argparse.ArgumentParser]) -> None:
27
+ scan_parser = subparsers.add_parser("scan", help="Scan a Loom topic from loom_raw into loom_explore.")
28
+ scan_parser.add_argument("topic")
29
+ scan_parser.add_argument("--workspace-root", type=Path, default=Path.cwd())
30
+
31
+ route_parser = subparsers.add_parser("route", help="Parse a chat message and run loom scan if it matches.")
32
+ route_parser.add_argument("message")
33
+ route_parser.add_argument("--workspace-root", type=Path, default=Path.cwd())
34
+
35
+
36
+ def _add_data_parsers(subparsers: argparse._SubParsersAction[argparse.ArgumentParser]) -> None:
37
+ get_parser = subparsers.add_parser("get", help="Ensure one raw file exists under test-project/loom/.loom/raw and print its local path.")
38
+ get_parser.add_argument("resource")
39
+ get_parser.add_argument("--server-url", default=DEFAULT_SERVER_URL)
40
+ get_parser.add_argument("--workspace-root", type=Path, default=Path.cwd())
41
+
42
+ pull_raw_parser = subparsers.add_parser("pull-raw", help="Pull latest raw files into test-project/loom/.loom/raw.")
43
+ pull_raw_parser.add_argument("workspace", nargs="?")
44
+ pull_raw_parser.add_argument("--server-url", default=DEFAULT_SERVER_URL)
45
+ pull_raw_parser.add_argument("--workspace-root", type=Path, default=Path.cwd())
46
+
47
+
48
+ def _add_sync_parsers(subparsers: argparse._SubParsersAction[argparse.ArgumentParser]) -> None:
49
+ _add_workspace_parser(subparsers, "status", "Show pending loom_explore changes.", include_message=False)
50
+ _add_workspace_parser(subparsers, "confirm", "Commit pending loom_explore changes.", include_message=True)
51
+ _add_workspace_parser(subparsers, "push", "Push local workspaces to the Loom sync server.", include_message=True, include_server=True)
52
+ _add_workspace_parser(subparsers, "pull", "Pull remote workspaces from the Loom sync server.", include_server=True)
53
+
54
+ init_parser = subparsers.add_parser("server-init-db", help="Initialize the Loom sync server database schema.")
55
+ init_parser.add_argument("--database-url", default=DEFAULT_DATABASE_URL)
56
+ init_parser.add_argument("--storage-root", type=Path, default=Path(os.environ.get("LOOM_SERVER_STORAGE_ROOT", Path.cwd() / ".loom-server-storage")))
57
+
58
+ run_parser = subparsers.add_parser("server-run", help="Run the Loom sync FastAPI server.")
59
+ run_parser.add_argument("--database-url", default=DEFAULT_DATABASE_URL)
60
+ run_parser.add_argument("--storage-root", type=Path, default=Path(os.environ.get("LOOM_SERVER_STORAGE_ROOT", Path.cwd() / ".loom-server-storage")))
61
+ run_parser.add_argument("--host", default="127.0.0.1")
62
+ run_parser.add_argument("--port", type=int, default=8765)
63
+
64
+
65
+ def _add_workspace_parser(
66
+ subparsers: argparse._SubParsersAction[argparse.ArgumentParser],
67
+ name: str,
68
+ help_text: str,
69
+ *,
70
+ include_message: bool = False,
71
+ include_server: bool = False,
72
+ ) -> None:
73
+ parser = subparsers.add_parser(name, help=help_text)
74
+ parser.add_argument("workspace", nargs="?")
75
+ if include_message:
76
+ parser.add_argument("--message")
77
+ if include_server:
78
+ parser.add_argument("--server-url", default=DEFAULT_SERVER_URL)
79
+ parser.add_argument("--workspace-root", type=Path, default=Path.cwd())
@@ -0,0 +1,37 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+
5
+ from ..chat import parse_loom_command
6
+ from ..scanner import scan_topic_from_chat
7
+ from .config import DEFAULT_SERVER_URL
8
+ from .handlers import run_confirm, run_pull, run_push, run_status
9
+ from .output import print_status_summary
10
+
11
+
12
+ def run_route(args: argparse.Namespace) -> int:
13
+ request = parse_loom_command(args.message)
14
+ if request is None:
15
+ print("No loom command detected.")
16
+ return 1
17
+ if request.command == "scan":
18
+ result = scan_topic_from_chat(args.message, args.workspace_root)
19
+ if result is None:
20
+ print("No loom scan command detected.")
21
+ return 1
22
+ print(f"Detected loom scan chat command for topic: {request.workspace}")
23
+ print_status_summary(args.workspace_root, request.workspace or "")
24
+ return 0
25
+ route_args = argparse.Namespace(
26
+ workspace=request.workspace,
27
+ workspace_root=args.workspace_root,
28
+ server_url=DEFAULT_SERVER_URL,
29
+ message=None,
30
+ )
31
+ handlers = {"status": run_status, "confirm": run_confirm, "push": run_push, "pull": run_pull}
32
+ return handlers.get(request.command, _no_command)(route_args)
33
+
34
+
35
+ def _no_command(_: argparse.Namespace) -> int:
36
+ print("No loom command detected.")
37
+ return 1
loom/cli_app/skill.py ADDED
@@ -0,0 +1,33 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+
5
+
6
+ def install_skill(codex_home: Path, workspace_root: Path) -> Path:
7
+ skill_dir = codex_home / "skills" / "loom-scan"
8
+ skill_dir.mkdir(parents=True, exist_ok=True)
9
+ launcher = workspace_root / "scripts" / "loom.py"
10
+ (skill_dir / "SKILL.md").write_text(render_skill_markdown(workspace_root, launcher), encoding="utf-8")
11
+ return skill_dir
12
+
13
+
14
+ def render_skill_markdown(workspace_root: Path, launcher: Path) -> str:
15
+ return f"""# loom-scan
16
+
17
+ Use this skill when the user types a Loom request in chat, especially commands like `loom scan energy` or `loom confirm energy`.
18
+
19
+ ## Purpose
20
+
21
+ - Provide a fast path for Loom scan requests without doing broad repo exploration first.
22
+ - Generate dataset summaries under `test-project/loom/loom_explore/<topic>`.
23
+ - Track confirmed loom_explore changes in a dedicated git repository.
24
+ - Route sync commands like `loom push energy` and `loom pull energy` into the local CLI fast path.
25
+ - Prefer reading `loom_explore` outputs after the scan instead of reading raw CSV files directly.
26
+
27
+ ## Fast Path
28
+
29
+ 1. Do not browse unrelated files first.
30
+ 2. Do not read anything under `wiki/discard`.
31
+ 3. Run `uv run python {launcher} route "loom scan <topic>" --workspace-root {workspace_root}`.
32
+ 4. Or run `uv run python {launcher} scan <topic> --workspace-root {workspace_root}`.
33
+ """
loom/csv_profile.py ADDED
@@ -0,0 +1,157 @@
1
+ from __future__ import annotations
2
+
3
+ from collections import Counter, deque
4
+ import csv
5
+ from dataclasses import dataclass, field
6
+ import math
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+
11
+ @dataclass
12
+ class NumericStats:
13
+ count: int = 0
14
+ minimum: float | None = None
15
+ maximum: float | None = None
16
+ total: float = 0.0
17
+
18
+ def update(self, value: float) -> None:
19
+ self.count += 1
20
+ self.total += value
21
+ if self.minimum is None or value < self.minimum:
22
+ self.minimum = value
23
+ if self.maximum is None or value > self.maximum:
24
+ self.maximum = value
25
+
26
+ def to_dict(self) -> dict[str, Any]:
27
+ mean = self.total / self.count if self.count else None
28
+ return {
29
+ "count": self.count,
30
+ "min": self.minimum,
31
+ "max": self.maximum,
32
+ "mean": mean,
33
+ }
34
+
35
+
36
+ @dataclass
37
+ class ColumnProfile:
38
+ name: str
39
+ non_empty_count: int = 0
40
+ empty_count: int = 0
41
+ type_counts: Counter[str] = field(default_factory=Counter)
42
+ unique_examples: Counter[str] = field(default_factory=Counter)
43
+ numeric_stats: NumericStats = field(default_factory=NumericStats)
44
+
45
+ def update(self, raw_value: str) -> None:
46
+ value = raw_value.strip()
47
+ if not value:
48
+ self.empty_count += 1
49
+ return
50
+
51
+ self.non_empty_count += 1
52
+ inferred_type, parsed = _infer_value_type(value)
53
+ self.type_counts[inferred_type] += 1
54
+
55
+ if inferred_type == "number" and isinstance(parsed, float):
56
+ self.numeric_stats.update(parsed)
57
+ elif len(self.unique_examples) < 20 or value in self.unique_examples:
58
+ self.unique_examples[value] += 1
59
+
60
+ def to_dict(self) -> dict[str, Any]:
61
+ result: dict[str, Any] = {
62
+ "name": self.name,
63
+ "non_empty_count": self.non_empty_count,
64
+ "empty_count": self.empty_count,
65
+ "type_counts": dict(self.type_counts),
66
+ }
67
+
68
+ if self.numeric_stats.count:
69
+ result["numeric_stats"] = self.numeric_stats.to_dict()
70
+
71
+ if self.unique_examples:
72
+ result["top_values"] = [
73
+ {"value": value, "count": count}
74
+ for value, count in self.unique_examples.most_common(10)
75
+ ]
76
+
77
+ return result
78
+
79
+
80
+ def profile_csv(csv_path: Path, sample_size: int = 10) -> dict[str, Any]:
81
+ file_size = csv_path.stat().st_size
82
+ if file_size == 0:
83
+ return {
84
+ "file_name": csv_path.name,
85
+ "file_size_bytes": 0,
86
+ "dialect": {"delimiter": ","},
87
+ "columns": [],
88
+ "row_count": 0,
89
+ "head": [],
90
+ "tail": [],
91
+ "notes": ["File is empty."],
92
+ }
93
+
94
+ with csv_path.open("r", encoding="utf-8-sig", newline="") as handle:
95
+ sample = handle.read(4096)
96
+ handle.seek(0)
97
+ dialect = _sniff_dialect(sample)
98
+ reader = csv.DictReader(handle, dialect=dialect)
99
+
100
+ fieldnames = reader.fieldnames or []
101
+ columns = [ColumnProfile(name=name) for name in fieldnames]
102
+ head_rows: list[dict[str, str]] = []
103
+ tail_rows: deque[dict[str, str]] = deque(maxlen=sample_size)
104
+ row_count = 0
105
+
106
+ for row in reader:
107
+ normalized_row = {name: row.get(name, "") or "" for name in fieldnames}
108
+ if len(head_rows) < sample_size:
109
+ head_rows.append(normalized_row)
110
+ tail_rows.append(normalized_row)
111
+ row_count += 1
112
+
113
+ for column in columns:
114
+ column.update(normalized_row.get(column.name, ""))
115
+
116
+ notes: list[str] = []
117
+ if not fieldnames:
118
+ notes.append("CSV file does not contain a header row.")
119
+ if row_count == 0:
120
+ notes.append("CSV file contains a header but no data rows.")
121
+
122
+ return {
123
+ "file_name": csv_path.name,
124
+ "file_size_bytes": file_size,
125
+ "dialect": {"delimiter": dialect.delimiter},
126
+ "columns": [column.to_dict() for column in columns],
127
+ "row_count": row_count,
128
+ "head": head_rows,
129
+ "tail": list(tail_rows),
130
+ "notes": notes,
131
+ }
132
+
133
+
134
+ def _sniff_dialect(sample: str) -> csv.Dialect:
135
+ if not sample.strip():
136
+ return csv.get_dialect("excel")
137
+
138
+ try:
139
+ return csv.Sniffer().sniff(sample, delimiters=",\t;|")
140
+ except csv.Error:
141
+ return csv.get_dialect("excel")
142
+
143
+
144
+ def _infer_value_type(value: str) -> tuple[str, Any]:
145
+ lowered = value.lower()
146
+ if lowered in {"true", "false", "yes", "no"}:
147
+ return "boolean", lowered in {"true", "yes"}
148
+
149
+ try:
150
+ number = float(value)
151
+ except ValueError:
152
+ return "string", value
153
+
154
+ if math.isfinite(number):
155
+ return "number", number
156
+
157
+ return "string", value