codebatch 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codebatch/__init__.py +3 -0
- codebatch/batch.py +366 -0
- codebatch/cas.py +170 -0
- codebatch/cli.py +432 -0
- codebatch/common.py +104 -0
- codebatch/paths.py +196 -0
- codebatch/query.py +242 -0
- codebatch/runner.py +495 -0
- codebatch/snapshot.py +340 -0
- codebatch/store.py +162 -0
- codebatch/tasks/__init__.py +37 -0
- codebatch/tasks/analyze.py +109 -0
- codebatch/tasks/lint.py +244 -0
- codebatch/tasks/parse.py +304 -0
- codebatch/tasks/symbols.py +223 -0
- codebatch-0.1.0.dist-info/METADATA +66 -0
- codebatch-0.1.0.dist-info/RECORD +19 -0
- codebatch-0.1.0.dist-info/WHEEL +4 -0
- codebatch-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
"""Symbols task executor - extracts symbol tables and edges from AST.
|
|
2
|
+
|
|
3
|
+
Emits:
|
|
4
|
+
- kind=symbol: Per-file symbol definitions (functions, classes, variables)
|
|
5
|
+
- kind=edge: Import/reference relationships
|
|
6
|
+
|
|
7
|
+
Inputs:
|
|
8
|
+
- Parse outputs (kind=ast) via iter_prior_outputs
|
|
9
|
+
|
|
10
|
+
This task consumes AST from 01_parse and produces a compact symbol table.
|
|
11
|
+
Files without AST are skipped (no symbols emitted).
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import json
|
|
15
|
+
from typing import Iterable, Optional
|
|
16
|
+
|
|
17
|
+
from ..runner import ShardRunner
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def extract_python_symbols(ast_data: dict, path: str) -> tuple[list[dict], list[dict]]:
|
|
21
|
+
"""Extract symbols and edges from Python AST data.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
ast_data: Parsed AST dict from parse task.
|
|
25
|
+
path: Source file path.
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
Tuple of (symbols, edges).
|
|
29
|
+
"""
|
|
30
|
+
symbols = []
|
|
31
|
+
edges = []
|
|
32
|
+
|
|
33
|
+
# Get body nodes from summary AST
|
|
34
|
+
body = ast_data.get("body", [])
|
|
35
|
+
|
|
36
|
+
for node in body:
|
|
37
|
+
node_type = node.get("type", "")
|
|
38
|
+
lineno = node.get("lineno")
|
|
39
|
+
col = node.get("col_offset", 0)
|
|
40
|
+
|
|
41
|
+
# Function definitions
|
|
42
|
+
if node_type == "FunctionDef" or node_type == "AsyncFunctionDef":
|
|
43
|
+
symbols.append({
|
|
44
|
+
"kind": "symbol",
|
|
45
|
+
"path": path,
|
|
46
|
+
"name": f"function_{lineno}", # Name not in summary AST
|
|
47
|
+
"symbol_type": "function",
|
|
48
|
+
"line": lineno,
|
|
49
|
+
"col": col,
|
|
50
|
+
"scope": "module",
|
|
51
|
+
})
|
|
52
|
+
|
|
53
|
+
# Class definitions
|
|
54
|
+
elif node_type == "ClassDef":
|
|
55
|
+
symbols.append({
|
|
56
|
+
"kind": "symbol",
|
|
57
|
+
"path": path,
|
|
58
|
+
"name": f"class_{lineno}",
|
|
59
|
+
"symbol_type": "class",
|
|
60
|
+
"line": lineno,
|
|
61
|
+
"col": col,
|
|
62
|
+
"scope": "module",
|
|
63
|
+
})
|
|
64
|
+
|
|
65
|
+
# Import statements -> edges
|
|
66
|
+
elif node_type == "Import":
|
|
67
|
+
edges.append({
|
|
68
|
+
"kind": "edge",
|
|
69
|
+
"path": path,
|
|
70
|
+
"edge_type": "imports",
|
|
71
|
+
"target": f"module_{lineno}",
|
|
72
|
+
"line": lineno,
|
|
73
|
+
})
|
|
74
|
+
|
|
75
|
+
elif node_type == "ImportFrom":
|
|
76
|
+
edges.append({
|
|
77
|
+
"kind": "edge",
|
|
78
|
+
"path": path,
|
|
79
|
+
"edge_type": "imports",
|
|
80
|
+
"target": f"from_module_{lineno}",
|
|
81
|
+
"line": lineno,
|
|
82
|
+
})
|
|
83
|
+
|
|
84
|
+
# Assignments at module level (potential exports/constants)
|
|
85
|
+
elif node_type == "Assign" or node_type == "AnnAssign":
|
|
86
|
+
symbols.append({
|
|
87
|
+
"kind": "symbol",
|
|
88
|
+
"path": path,
|
|
89
|
+
"name": f"variable_{lineno}",
|
|
90
|
+
"symbol_type": "variable",
|
|
91
|
+
"line": lineno,
|
|
92
|
+
"col": col,
|
|
93
|
+
"scope": "module",
|
|
94
|
+
})
|
|
95
|
+
|
|
96
|
+
return symbols, edges
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def extract_js_symbols(ast_data: dict, path: str) -> tuple[list[dict], list[dict]]:
|
|
100
|
+
"""Extract symbols from JavaScript/TypeScript token data.
|
|
101
|
+
|
|
102
|
+
Since we only have token counts (not full AST), we provide basic counts.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
ast_data: Token info dict from parse task.
|
|
106
|
+
path: Source file path.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
Tuple of (symbols, edges).
|
|
110
|
+
"""
|
|
111
|
+
symbols = []
|
|
112
|
+
edges = []
|
|
113
|
+
|
|
114
|
+
tokens = ast_data.get("tokens", {})
|
|
115
|
+
|
|
116
|
+
# If we have functions, emit a summary symbol
|
|
117
|
+
if tokens.get("keyword", 0) > 0:
|
|
118
|
+
symbols.append({
|
|
119
|
+
"kind": "symbol",
|
|
120
|
+
"path": path,
|
|
121
|
+
"name": "js_module",
|
|
122
|
+
"symbol_type": "module",
|
|
123
|
+
"line": 1,
|
|
124
|
+
"col": 0,
|
|
125
|
+
"scope": "file",
|
|
126
|
+
})
|
|
127
|
+
|
|
128
|
+
return symbols, edges
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def extract_text_symbols(ast_data: dict, path: str) -> tuple[list[dict], list[dict]]:
|
|
132
|
+
"""Extract minimal info from text files.
|
|
133
|
+
|
|
134
|
+
Text files don't have symbols in the traditional sense.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
ast_data: Text info dict from parse task.
|
|
138
|
+
path: Source file path.
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
Empty lists (no symbols for text files).
|
|
142
|
+
"""
|
|
143
|
+
return [], []
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def symbols_executor(config: dict, files: Iterable[dict], runner: ShardRunner) -> list[dict]:
|
|
147
|
+
"""Execute the symbols task.
|
|
148
|
+
|
|
149
|
+
Consumes AST outputs from 01_parse and produces symbol tables and edges.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
config: Task configuration.
|
|
153
|
+
files: Iterable of file records (used to get batch/task context).
|
|
154
|
+
runner: ShardRunner for CAS and prior output access.
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
List of symbol and edge output records.
|
|
158
|
+
"""
|
|
159
|
+
outputs = []
|
|
160
|
+
|
|
161
|
+
# Get context from config (set by runner during execution)
|
|
162
|
+
batch_id = config.get("_batch_id")
|
|
163
|
+
shard_id = config.get("_shard_id")
|
|
164
|
+
|
|
165
|
+
if not batch_id or not shard_id:
|
|
166
|
+
# Fallback: consume files to establish context
|
|
167
|
+
file_list = list(files)
|
|
168
|
+
if not file_list:
|
|
169
|
+
return []
|
|
170
|
+
# Can't get prior outputs without batch context
|
|
171
|
+
# Return empty - this shouldn't happen in normal execution
|
|
172
|
+
return []
|
|
173
|
+
|
|
174
|
+
# Iterate over parse AST outputs for this shard
|
|
175
|
+
for ast_output in runner.iter_prior_outputs(batch_id, "01_parse", shard_id, kind="ast"):
|
|
176
|
+
path = ast_output.get("path")
|
|
177
|
+
object_ref = ast_output.get("object")
|
|
178
|
+
fmt = ast_output.get("format", "json")
|
|
179
|
+
|
|
180
|
+
if not path or not object_ref:
|
|
181
|
+
continue
|
|
182
|
+
|
|
183
|
+
# Skip chunked ASTs for simplicity (Phase 2)
|
|
184
|
+
if fmt == "json+chunks":
|
|
185
|
+
continue
|
|
186
|
+
|
|
187
|
+
try:
|
|
188
|
+
# Load AST from CAS
|
|
189
|
+
ast_bytes = runner.object_store.get_bytes(object_ref)
|
|
190
|
+
ast_data = json.loads(ast_bytes.decode("utf-8"))
|
|
191
|
+
|
|
192
|
+
# Extract based on AST type
|
|
193
|
+
ast_type = ast_data.get("type", "")
|
|
194
|
+
ast_mode = ast_data.get("ast_mode", "")
|
|
195
|
+
|
|
196
|
+
symbols = []
|
|
197
|
+
edges = []
|
|
198
|
+
|
|
199
|
+
if ast_type == "Module" and ast_mode == "summary":
|
|
200
|
+
# Python summary AST
|
|
201
|
+
symbols, edges = extract_python_symbols(ast_data, path)
|
|
202
|
+
elif ast_type == "TokenInfo":
|
|
203
|
+
# JavaScript/TypeScript tokens
|
|
204
|
+
symbols, edges = extract_js_symbols(ast_data, path)
|
|
205
|
+
elif ast_type == "TextInfo":
|
|
206
|
+
# Text file stats
|
|
207
|
+
symbols, edges = extract_text_symbols(ast_data, path)
|
|
208
|
+
|
|
209
|
+
outputs.extend(symbols)
|
|
210
|
+
outputs.extend(edges)
|
|
211
|
+
|
|
212
|
+
except Exception as e:
|
|
213
|
+
# Emit diagnostic for failures
|
|
214
|
+
outputs.append({
|
|
215
|
+
"kind": "diagnostic",
|
|
216
|
+
"path": path,
|
|
217
|
+
"severity": "warning",
|
|
218
|
+
"code": "SYMBOLS_EXTRACT_ERROR",
|
|
219
|
+
"message": f"Failed to extract symbols: {e}",
|
|
220
|
+
"line": 1,
|
|
221
|
+
})
|
|
222
|
+
|
|
223
|
+
return outputs
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: codebatch
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Content-addressed batch execution engine
|
|
5
|
+
Author-email: mcp-tool-shop <64996768+mcp-tool-shop@users.noreply.github.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Keywords: batch,content-addressed,deterministic,execution
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Requires-Python: >=3.10
|
|
16
|
+
Provides-Extra: dev
|
|
17
|
+
Requires-Dist: pytest-cov>=4.0; extra == 'dev'
|
|
18
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
|
|
21
|
+
# CodeBatch
|
|
22
|
+
|
|
23
|
+
Content-addressed batch execution engine with deterministic sharding and queryable outputs.
|
|
24
|
+
|
|
25
|
+
## Overview
|
|
26
|
+
|
|
27
|
+
CodeBatch provides a filesystem-based execution substrate for running deterministic transformations over codebases. It captures inputs as immutable snapshots, executes work in isolated shards, and indexes all semantic outputs for efficient querying—without requiring a database.
|
|
28
|
+
|
|
29
|
+
## Documentation
|
|
30
|
+
|
|
31
|
+
- **[SPEC.md](./SPEC.md)** — Full storage and execution specification
|
|
32
|
+
- **[CHANGELOG.md](./CHANGELOG.md)** — Version history
|
|
33
|
+
|
|
34
|
+
## Spec Versioning
|
|
35
|
+
|
|
36
|
+
The specification uses semantic versioning with draft/stable markers. Each version is tagged in git (e.g., `spec-v1.0-draft`). Breaking changes increment the major version. Implementations should declare which spec version they target and tolerate unknown fields for forward compatibility.
|
|
37
|
+
|
|
38
|
+
## Project Structure
|
|
39
|
+
|
|
40
|
+
```
|
|
41
|
+
schemas/ JSON Schema definitions for all record types
|
|
42
|
+
src/ Core implementation
|
|
43
|
+
tests/ Test suites and fixtures
|
|
44
|
+
examples/ Usage examples
|
|
45
|
+
.github/ CI/CD workflows
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Quick Start
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
# Create a snapshot of a directory
|
|
52
|
+
codebatch snapshot ./my-project --store ./store
|
|
53
|
+
|
|
54
|
+
# Initialize a batch with a pipeline
|
|
55
|
+
codebatch batch init --snapshot <id> --pipeline parse
|
|
56
|
+
|
|
57
|
+
# Run a shard
|
|
58
|
+
codebatch run-shard --batch <id> --task 01_parse --shard ab
|
|
59
|
+
|
|
60
|
+
# Query results
|
|
61
|
+
codebatch query diagnostics --batch <id> --task 01_parse
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## License
|
|
65
|
+
|
|
66
|
+
MIT
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
codebatch/__init__.py,sha256=AoGhdmUvKco4xE6vVKChJFFKGX4G_j3mOo5U_KeXBgE,86
|
|
2
|
+
codebatch/batch.py,sha256=tzk4-M4Z3FaSCiB_o6s_7a-hAdqnetNbrXsVeksA47Q,11419
|
|
3
|
+
codebatch/cas.py,sha256=a6Y12q2Ao2wcunqnm7_lHSLTQV3MlT8nPONZ4zXZZsw,5370
|
|
4
|
+
codebatch/cli.py,sha256=YZdMZxgV1arBavEj1SArPyE_DERKVG7WbIqDpm0C9P8,15850
|
|
5
|
+
codebatch/common.py,sha256=n1FaDlmwEYKRUwZ0v3rsLO6GM6PJWf1QnohLJ7W_erc,2986
|
|
6
|
+
codebatch/paths.py,sha256=5LT2nc3OFJEzOgoq670asZhnu3MnUrKos1m-VL9D60I,5802
|
|
7
|
+
codebatch/query.py,sha256=-3Ydmxz_Ox8zeTEOJXhbljeIO2EeTmgrcBKKknymeRI,7280
|
|
8
|
+
codebatch/runner.py,sha256=LZH9FS5L0afcStVN-LRj7iWrLTtPEnwiEmTFqNUY6v4,17951
|
|
9
|
+
codebatch/snapshot.py,sha256=AZP7DvkJrunUitfWZkqZInc3EZf4qap8GPGOztyjEBc,10912
|
|
10
|
+
codebatch/store.py,sha256=S3nQ2dVtfsEcU5p_rAzF752B7D1toeFShW2-OB3aclE,4854
|
|
11
|
+
codebatch/tasks/__init__.py,sha256=H1B4ylnN-OSEXpX6YNKutJWgg4ic51Ro3eFL8SM4iEk,1022
|
|
12
|
+
codebatch/tasks/analyze.py,sha256=S2yCcri_Ni0N5Q2I7WuSx1kh4TLwdIIs--rB7Bf9bGw,3295
|
|
13
|
+
codebatch/tasks/lint.py,sha256=XrpN3opPjmV6sF7XQi7YhMh-MDjyefJCaZXw0t3xJWE,7901
|
|
14
|
+
codebatch/tasks/parse.py,sha256=Tlye03PIMFLSgwAE8DW3N4F3wQy7WiWSdnsNikZHoiU,9791
|
|
15
|
+
codebatch/tasks/symbols.py,sha256=9ZjoOnQMg_DPtFo7tliRh14P-01IjQgeaPW6w3hsV24,6880
|
|
16
|
+
codebatch-0.1.0.dist-info/METADATA,sha256=Ukd9d8ifWvWKdH2b88pcS7bmlon9rzW_X5xeqPcLnXo,2229
|
|
17
|
+
codebatch-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
18
|
+
codebatch-0.1.0.dist-info/entry_points.txt,sha256=U3k5DUUyCyyueiyJwttLMGZ99YSRDdWrUijFj7yQH6w,49
|
|
19
|
+
codebatch-0.1.0.dist-info/RECORD,,
|