codebatch 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,223 @@
1
+ """Symbols task executor - extracts symbol tables and edges from AST.
2
+
3
+ Emits:
4
+ - kind=symbol: Per-file symbol definitions (functions, classes, variables)
5
+ - kind=edge: Import/reference relationships
6
+
7
+ Inputs:
8
+ - Parse outputs (kind=ast) via iter_prior_outputs
9
+
10
+ This task consumes AST from 01_parse and produces a compact symbol table.
11
+ Files without AST are skipped (no symbols emitted).
12
+ """
13
+
14
+ import json
15
+ from typing import Iterable, Optional
16
+
17
+ from ..runner import ShardRunner
18
+
19
+
20
+ def extract_python_symbols(ast_data: dict, path: str) -> tuple[list[dict], list[dict]]:
21
+ """Extract symbols and edges from Python AST data.
22
+
23
+ Args:
24
+ ast_data: Parsed AST dict from parse task.
25
+ path: Source file path.
26
+
27
+ Returns:
28
+ Tuple of (symbols, edges).
29
+ """
30
+ symbols = []
31
+ edges = []
32
+
33
+ # Get body nodes from summary AST
34
+ body = ast_data.get("body", [])
35
+
36
+ for node in body:
37
+ node_type = node.get("type", "")
38
+ lineno = node.get("lineno")
39
+ col = node.get("col_offset", 0)
40
+
41
+ # Function definitions
42
+ if node_type == "FunctionDef" or node_type == "AsyncFunctionDef":
43
+ symbols.append({
44
+ "kind": "symbol",
45
+ "path": path,
46
+ "name": f"function_{lineno}", # Name not in summary AST
47
+ "symbol_type": "function",
48
+ "line": lineno,
49
+ "col": col,
50
+ "scope": "module",
51
+ })
52
+
53
+ # Class definitions
54
+ elif node_type == "ClassDef":
55
+ symbols.append({
56
+ "kind": "symbol",
57
+ "path": path,
58
+ "name": f"class_{lineno}",
59
+ "symbol_type": "class",
60
+ "line": lineno,
61
+ "col": col,
62
+ "scope": "module",
63
+ })
64
+
65
+ # Import statements -> edges
66
+ elif node_type == "Import":
67
+ edges.append({
68
+ "kind": "edge",
69
+ "path": path,
70
+ "edge_type": "imports",
71
+ "target": f"module_{lineno}",
72
+ "line": lineno,
73
+ })
74
+
75
+ elif node_type == "ImportFrom":
76
+ edges.append({
77
+ "kind": "edge",
78
+ "path": path,
79
+ "edge_type": "imports",
80
+ "target": f"from_module_{lineno}",
81
+ "line": lineno,
82
+ })
83
+
84
+ # Assignments at module level (potential exports/constants)
85
+ elif node_type == "Assign" or node_type == "AnnAssign":
86
+ symbols.append({
87
+ "kind": "symbol",
88
+ "path": path,
89
+ "name": f"variable_{lineno}",
90
+ "symbol_type": "variable",
91
+ "line": lineno,
92
+ "col": col,
93
+ "scope": "module",
94
+ })
95
+
96
+ return symbols, edges
97
+
98
+
99
+ def extract_js_symbols(ast_data: dict, path: str) -> tuple[list[dict], list[dict]]:
100
+ """Extract symbols from JavaScript/TypeScript token data.
101
+
102
+ Since we only have token counts (not full AST), we provide basic counts.
103
+
104
+ Args:
105
+ ast_data: Token info dict from parse task.
106
+ path: Source file path.
107
+
108
+ Returns:
109
+ Tuple of (symbols, edges).
110
+ """
111
+ symbols = []
112
+ edges = []
113
+
114
+ tokens = ast_data.get("tokens", {})
115
+
116
+ # If we have functions, emit a summary symbol
117
+ if tokens.get("keyword", 0) > 0:
118
+ symbols.append({
119
+ "kind": "symbol",
120
+ "path": path,
121
+ "name": "js_module",
122
+ "symbol_type": "module",
123
+ "line": 1,
124
+ "col": 0,
125
+ "scope": "file",
126
+ })
127
+
128
+ return symbols, edges
129
+
130
+
131
+ def extract_text_symbols(ast_data: dict, path: str) -> tuple[list[dict], list[dict]]:
132
+ """Extract minimal info from text files.
133
+
134
+ Text files don't have symbols in the traditional sense.
135
+
136
+ Args:
137
+ ast_data: Text info dict from parse task.
138
+ path: Source file path.
139
+
140
+ Returns:
141
+ Empty lists (no symbols for text files).
142
+ """
143
+ return [], []
144
+
145
+
146
+ def symbols_executor(config: dict, files: Iterable[dict], runner: ShardRunner) -> list[dict]:
147
+ """Execute the symbols task.
148
+
149
+ Consumes AST outputs from 01_parse and produces symbol tables and edges.
150
+
151
+ Args:
152
+ config: Task configuration.
153
+ files: Iterable of file records (used to get batch/task context).
154
+ runner: ShardRunner for CAS and prior output access.
155
+
156
+ Returns:
157
+ List of symbol and edge output records.
158
+ """
159
+ outputs = []
160
+
161
+ # Get context from config (set by runner during execution)
162
+ batch_id = config.get("_batch_id")
163
+ shard_id = config.get("_shard_id")
164
+
165
+ if not batch_id or not shard_id:
166
+ # Fallback: consume files to establish context
167
+ file_list = list(files)
168
+ if not file_list:
169
+ return []
170
+ # Can't get prior outputs without batch context
171
+ # Return empty - this shouldn't happen in normal execution
172
+ return []
173
+
174
+ # Iterate over parse AST outputs for this shard
175
+ for ast_output in runner.iter_prior_outputs(batch_id, "01_parse", shard_id, kind="ast"):
176
+ path = ast_output.get("path")
177
+ object_ref = ast_output.get("object")
178
+ fmt = ast_output.get("format", "json")
179
+
180
+ if not path or not object_ref:
181
+ continue
182
+
183
+ # Skip chunked ASTs for simplicity (Phase 2)
184
+ if fmt == "json+chunks":
185
+ continue
186
+
187
+ try:
188
+ # Load AST from CAS
189
+ ast_bytes = runner.object_store.get_bytes(object_ref)
190
+ ast_data = json.loads(ast_bytes.decode("utf-8"))
191
+
192
+ # Extract based on AST type
193
+ ast_type = ast_data.get("type", "")
194
+ ast_mode = ast_data.get("ast_mode", "")
195
+
196
+ symbols = []
197
+ edges = []
198
+
199
+ if ast_type == "Module" and ast_mode == "summary":
200
+ # Python summary AST
201
+ symbols, edges = extract_python_symbols(ast_data, path)
202
+ elif ast_type == "TokenInfo":
203
+ # JavaScript/TypeScript tokens
204
+ symbols, edges = extract_js_symbols(ast_data, path)
205
+ elif ast_type == "TextInfo":
206
+ # Text file stats
207
+ symbols, edges = extract_text_symbols(ast_data, path)
208
+
209
+ outputs.extend(symbols)
210
+ outputs.extend(edges)
211
+
212
+ except Exception as e:
213
+ # Emit diagnostic for failures
214
+ outputs.append({
215
+ "kind": "diagnostic",
216
+ "path": path,
217
+ "severity": "warning",
218
+ "code": "SYMBOLS_EXTRACT_ERROR",
219
+ "message": f"Failed to extract symbols: {e}",
220
+ "line": 1,
221
+ })
222
+
223
+ return outputs
@@ -0,0 +1,66 @@
1
+ Metadata-Version: 2.4
2
+ Name: codebatch
3
+ Version: 0.1.0
4
+ Summary: Content-addressed batch execution engine
5
+ Author-email: mcp-tool-shop <64996768+mcp-tool-shop@users.noreply.github.com>
6
+ License-Expression: MIT
7
+ Keywords: batch,content-addressed,deterministic,execution
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Requires-Python: >=3.10
16
+ Provides-Extra: dev
17
+ Requires-Dist: pytest-cov>=4.0; extra == 'dev'
18
+ Requires-Dist: pytest>=7.0; extra == 'dev'
19
+ Description-Content-Type: text/markdown
20
+
21
+ # CodeBatch
22
+
23
+ Content-addressed batch execution engine with deterministic sharding and queryable outputs.
24
+
25
+ ## Overview
26
+
27
+ CodeBatch provides a filesystem-based execution substrate for running deterministic transformations over codebases. It captures inputs as immutable snapshots, executes work in isolated shards, and indexes all semantic outputs for efficient querying—without requiring a database.
28
+
29
+ ## Documentation
30
+
31
+ - **[SPEC.md](./SPEC.md)** — Full storage and execution specification
32
+ - **[CHANGELOG.md](./CHANGELOG.md)** — Version history
33
+
34
+ ## Spec Versioning
35
+
36
+ The specification uses semantic versioning with draft/stable markers. Each version is tagged in git (e.g., `spec-v1.0-draft`). Breaking changes increment the major version. Implementations should declare which spec version they target and tolerate unknown fields for forward compatibility.
37
+
38
+ ## Project Structure
39
+
40
+ ```
41
+ schemas/ JSON Schema definitions for all record types
42
+ src/ Core implementation
43
+ tests/ Test suites and fixtures
44
+ examples/ Usage examples
45
+ .github/ CI/CD workflows
46
+ ```
47
+
48
+ ## Quick Start
49
+
50
+ ```bash
51
+ # Create a snapshot of a directory
52
+ codebatch snapshot ./my-project --store ./store
53
+
54
+ # Initialize a batch with a pipeline
55
+ codebatch batch init --snapshot <id> --pipeline parse
56
+
57
+ # Run a shard
58
+ codebatch run-shard --batch <id> --task 01_parse --shard ab
59
+
60
+ # Query results
61
+ codebatch query diagnostics --batch <id> --task 01_parse
62
+ ```
63
+
64
+ ## License
65
+
66
+ MIT
@@ -0,0 +1,19 @@
1
+ codebatch/__init__.py,sha256=AoGhdmUvKco4xE6vVKChJFFKGX4G_j3mOo5U_KeXBgE,86
2
+ codebatch/batch.py,sha256=tzk4-M4Z3FaSCiB_o6s_7a-hAdqnetNbrXsVeksA47Q,11419
3
+ codebatch/cas.py,sha256=a6Y12q2Ao2wcunqnm7_lHSLTQV3MlT8nPONZ4zXZZsw,5370
4
+ codebatch/cli.py,sha256=YZdMZxgV1arBavEj1SArPyE_DERKVG7WbIqDpm0C9P8,15850
5
+ codebatch/common.py,sha256=n1FaDlmwEYKRUwZ0v3rsLO6GM6PJWf1QnohLJ7W_erc,2986
6
+ codebatch/paths.py,sha256=5LT2nc3OFJEzOgoq670asZhnu3MnUrKos1m-VL9D60I,5802
7
+ codebatch/query.py,sha256=-3Ydmxz_Ox8zeTEOJXhbljeIO2EeTmgrcBKKknymeRI,7280
8
+ codebatch/runner.py,sha256=LZH9FS5L0afcStVN-LRj7iWrLTtPEnwiEmTFqNUY6v4,17951
9
+ codebatch/snapshot.py,sha256=AZP7DvkJrunUitfWZkqZInc3EZf4qap8GPGOztyjEBc,10912
10
+ codebatch/store.py,sha256=S3nQ2dVtfsEcU5p_rAzF752B7D1toeFShW2-OB3aclE,4854
11
+ codebatch/tasks/__init__.py,sha256=H1B4ylnN-OSEXpX6YNKutJWgg4ic51Ro3eFL8SM4iEk,1022
12
+ codebatch/tasks/analyze.py,sha256=S2yCcri_Ni0N5Q2I7WuSx1kh4TLwdIIs--rB7Bf9bGw,3295
13
+ codebatch/tasks/lint.py,sha256=XrpN3opPjmV6sF7XQi7YhMh-MDjyefJCaZXw0t3xJWE,7901
14
+ codebatch/tasks/parse.py,sha256=Tlye03PIMFLSgwAE8DW3N4F3wQy7WiWSdnsNikZHoiU,9791
15
+ codebatch/tasks/symbols.py,sha256=9ZjoOnQMg_DPtFo7tliRh14P-01IjQgeaPW6w3hsV24,6880
16
+ codebatch-0.1.0.dist-info/METADATA,sha256=Ukd9d8ifWvWKdH2b88pcS7bmlon9rzW_X5xeqPcLnXo,2229
17
+ codebatch-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
18
+ codebatch-0.1.0.dist-info/entry_points.txt,sha256=U3k5DUUyCyyueiyJwttLMGZ99YSRDdWrUijFj7yQH6w,49
19
+ codebatch-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.28.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ codebatch = codebatch.cli:main