deco-assaying 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deco_assaying-0.1.1/.github/workflows/release.yml +74 -0
- deco_assaying-0.1.1/.gitignore +12 -0
- deco_assaying-0.1.1/.python-version +1 -0
- deco_assaying-0.1.1/Dockerfile +32 -0
- deco_assaying-0.1.1/PKG-INFO +234 -0
- deco_assaying-0.1.1/README.md +219 -0
- deco_assaying-0.1.1/docker-compose.yml +22 -0
- deco_assaying-0.1.1/docs/deco-assaying-architecture.md +218 -0
- deco_assaying-0.1.1/humans_notes.md +24 -0
- deco_assaying-0.1.1/pyproject.toml +60 -0
- deco_assaying-0.1.1/src/deco_assaying/__init__.py +6 -0
- deco_assaying-0.1.1/src/deco_assaying/__main__.py +16 -0
- deco_assaying-0.1.1/src/deco_assaying/analyze.py +199 -0
- deco_assaying-0.1.1/src/deco_assaying/analyzers/__init__.py +56 -0
- deco_assaying-0.1.1/src/deco_assaying/analyzers/_base.py +55 -0
- deco_assaying-0.1.1/src/deco_assaying/analyzers/_fallback.py +20 -0
- deco_assaying-0.1.1/src/deco_assaying/analyzers/_ts_js.py +570 -0
- deco_assaying-0.1.1/src/deco_assaying/analyzers/bash.py +140 -0
- deco_assaying-0.1.1/src/deco_assaying/analyzers/c.py +5 -0
- deco_assaying-0.1.1/src/deco_assaying/analyzers/c_family.py +289 -0
- deco_assaying-0.1.1/src/deco_assaying/analyzers/cpp.py +5 -0
- deco_assaying-0.1.1/src/deco_assaying/analyzers/csharp.py +256 -0
- deco_assaying-0.1.1/src/deco_assaying/analyzers/go.py +405 -0
- deco_assaying-0.1.1/src/deco_assaying/analyzers/java.py +253 -0
- deco_assaying-0.1.1/src/deco_assaying/analyzers/javascript.py +5 -0
- deco_assaying-0.1.1/src/deco_assaying/analyzers/php.py +241 -0
- deco_assaying-0.1.1/src/deco_assaying/analyzers/python.py +577 -0
- deco_assaying-0.1.1/src/deco_assaying/analyzers/ruby.py +223 -0
- deco_assaying-0.1.1/src/deco_assaying/analyzers/rust.py +528 -0
- deco_assaying-0.1.1/src/deco_assaying/analyzers/typescript.py +5 -0
- deco_assaying-0.1.1/src/deco_assaying/app.py +41 -0
- deco_assaying-0.1.1/src/deco_assaying/chunks.py +126 -0
- deco_assaying-0.1.1/src/deco_assaying/config.py +59 -0
- deco_assaying-0.1.1/src/deco_assaying/detectors.py +148 -0
- deco_assaying-0.1.1/src/deco_assaying/github.py +172 -0
- deco_assaying-0.1.1/src/deco_assaying/gitlab.py +294 -0
- deco_assaying-0.1.1/src/deco_assaying/jobs.py +1023 -0
- deco_assaying-0.1.1/src/deco_assaying/languages.py +198 -0
- deco_assaying-0.1.1/src/deco_assaying/literals.py +106 -0
- deco_assaying-0.1.1/src/deco_assaying/manifest.py +190 -0
- deco_assaying-0.1.1/src/deco_assaying/outputs.py +225 -0
- deco_assaying-0.1.1/src/deco_assaying/providers.py +38 -0
- deco_assaying-0.1.1/src/deco_assaying/retention.py +92 -0
- deco_assaying-0.1.1/src/deco_assaying/routes.py +604 -0
- deco_assaying-0.1.1/src/deco_assaying/source.py +181 -0
- deco_assaying-0.1.1/src/deco_assaying/walker.py +407 -0
- deco_assaying-0.1.1/tests/__init__.py +0 -0
- deco_assaying-0.1.1/tests/test_analyze_fallback.py +29 -0
- deco_assaying-0.1.1/tests/test_analyze_other_langs.py +324 -0
- deco_assaying-0.1.1/tests/test_analyze_python.py +136 -0
- deco_assaying-0.1.1/tests/test_chunks.py +35 -0
- deco_assaying-0.1.1/tests/test_detectors.py +37 -0
- deco_assaying-0.1.1/tests/test_github.py +112 -0
- deco_assaying-0.1.1/tests/test_gitlab.py +274 -0
- deco_assaying-0.1.1/tests/test_http_routes.py +445 -0
- deco_assaying-0.1.1/tests/test_index_repo.py +134 -0
- deco_assaying-0.1.1/tests/test_languages.py +34 -0
- deco_assaying-0.1.1/tests/test_literals.py +55 -0
- deco_assaying-0.1.1/tests/test_outputs_http.py +288 -0
- deco_assaying-0.1.1/tests/test_retention.py +141 -0
- deco_assaying-0.1.1/tests/test_source.py +101 -0
- deco_assaying-0.1.1/tests/test_streaming.py +245 -0
- deco_assaying-0.1.1/tests/test_walker.py +66 -0
- deco_assaying-0.1.1/uv.lock +972 -0
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags: ['v*']
|
|
6
|
+
|
|
7
|
+
permissions:
|
|
8
|
+
contents: read
|
|
9
|
+
packages: write # GHCR push
|
|
10
|
+
id-token: write # PyPI trusted publishing
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
docker:
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
steps:
|
|
16
|
+
- uses: actions/checkout@v4
|
|
17
|
+
|
|
18
|
+
- name: Set up QEMU (arm64 emulation)
|
|
19
|
+
uses: docker/setup-qemu-action@v3
|
|
20
|
+
|
|
21
|
+
- name: Set up Docker Buildx
|
|
22
|
+
uses: docker/setup-buildx-action@v3
|
|
23
|
+
|
|
24
|
+
- name: Log in to GHCR
|
|
25
|
+
uses: docker/login-action@v3
|
|
26
|
+
with:
|
|
27
|
+
registry: ghcr.io
|
|
28
|
+
username: ${{ github.actor }}
|
|
29
|
+
password: ${{ secrets.GITHUB_TOKEN }}
|
|
30
|
+
|
|
31
|
+
- name: Extract metadata (tags + labels)
|
|
32
|
+
id: meta
|
|
33
|
+
uses: docker/metadata-action@v5
|
|
34
|
+
with:
|
|
35
|
+
images: ghcr.io/${{ github.repository }}
|
|
36
|
+
tags: |
|
|
37
|
+
type=semver,pattern={{version}}
|
|
38
|
+
type=semver,pattern={{major}}.{{minor}}
|
|
39
|
+
type=raw,value=latest
|
|
40
|
+
|
|
41
|
+
- name: Build and push (linux/amd64 + linux/arm64)
|
|
42
|
+
uses: docker/build-push-action@v5
|
|
43
|
+
with:
|
|
44
|
+
context: .
|
|
45
|
+
platforms: linux/amd64,linux/arm64
|
|
46
|
+
push: true
|
|
47
|
+
tags: ${{ steps.meta.outputs.tags }}
|
|
48
|
+
labels: ${{ steps.meta.outputs.labels }}
|
|
49
|
+
cache-from: type=gha
|
|
50
|
+
cache-to: type=gha,mode=max
|
|
51
|
+
|
|
52
|
+
pypi:
|
|
53
|
+
runs-on: ubuntu-latest
|
|
54
|
+
environment: pypi # matches the trusted-publisher config on PyPI
|
|
55
|
+
steps:
|
|
56
|
+
- uses: actions/checkout@v4
|
|
57
|
+
|
|
58
|
+
- name: Install uv
|
|
59
|
+
uses: astral-sh/setup-uv@v3
|
|
60
|
+
|
|
61
|
+
- name: Verify tag matches pyproject version
|
|
62
|
+
run: |
|
|
63
|
+
TAG="${GITHUB_REF#refs/tags/v}"
|
|
64
|
+
PROJECT_VERSION=$(uv run python -c "import tomllib; print(tomllib.load(open('pyproject.toml','rb'))['project']['version'])")
|
|
65
|
+
if [ "$TAG" != "$PROJECT_VERSION" ]; then
|
|
66
|
+
echo "tag $TAG != pyproject $PROJECT_VERSION"
|
|
67
|
+
exit 1
|
|
68
|
+
fi
|
|
69
|
+
|
|
70
|
+
- name: Build wheel + sdist
|
|
71
|
+
run: uv build
|
|
72
|
+
|
|
73
|
+
- name: Publish to PyPI
|
|
74
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.13
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
FROM ghcr.io/astral-sh/uv:python3.13-bookworm-slim
|
|
2
|
+
|
|
3
|
+
WORKDIR /app
|
|
4
|
+
|
|
5
|
+
# git is needed at runtime for `git clone` of GitHub/GitLab sources.
|
|
6
|
+
# (uv image is debian-slim; git isn't installed by default.)
|
|
7
|
+
RUN apt-get update \
|
|
8
|
+
&& apt-get install -y --no-install-recommends git ca-certificates \
|
|
9
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
10
|
+
|
|
11
|
+
# Resolve deps first so they cache across source changes.
|
|
12
|
+
COPY pyproject.toml uv.lock ./
|
|
13
|
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
|
14
|
+
uv sync --no-dev --no-install-project
|
|
15
|
+
|
|
16
|
+
# README.md is part of the package metadata (pyproject.toml -> readme).
|
|
17
|
+
# uv sync --no-install-project skipped reading it; the second sync
|
|
18
|
+
# (which installs the project itself) does, so it must be present.
|
|
19
|
+
COPY README.md ./
|
|
20
|
+
COPY src/ src/
|
|
21
|
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
|
22
|
+
uv sync --no-dev
|
|
23
|
+
|
|
24
|
+
ENV PYTHONUNBUFFERED=1 \
|
|
25
|
+
OUTPUT_ROOT=/data \
|
|
26
|
+
PORT=35832 \
|
|
27
|
+
HOST=0.0.0.0
|
|
28
|
+
|
|
29
|
+
EXPOSE 35832
|
|
30
|
+
VOLUME ["/data"]
|
|
31
|
+
|
|
32
|
+
CMD ["uv", "run", "python", "-m", "deco_assaying"]
|
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: deco-assaying
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: MCP server that performs tree-sitter-based source code analysis.
|
|
5
|
+
Requires-Python: >=3.13
|
|
6
|
+
Requires-Dist: fastapi
|
|
7
|
+
Requires-Dist: mcp[cli]>=1.27.0
|
|
8
|
+
Requires-Dist: pathspec
|
|
9
|
+
Requires-Dist: pydantic
|
|
10
|
+
Requires-Dist: starlette
|
|
11
|
+
Requires-Dist: tree-sitter
|
|
12
|
+
Requires-Dist: tree-sitter-language-pack<1.6
|
|
13
|
+
Requires-Dist: uvicorn[standard]
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
|
|
16
|
+
# deco-assaying
|
|
17
|
+
|
|
18
|
+
MCP server that performs tree-sitter-based source code analysis. Designed
|
|
19
|
+
to feed structural information about a repo (symbols, imports, references,
|
|
20
|
+
chunks, metrics) into a downstream consumer that maintains a knowledge
|
|
21
|
+
base over many codebases.
|
|
22
|
+
|
|
23
|
+
## Run
|
|
24
|
+
|
|
25
|
+
Pick the deployment that matches your situation:
|
|
26
|
+
|
|
27
|
+
| Mode | Command | When to use |
|
|
28
|
+
|---|---|---|
|
|
29
|
+
| Daemon — pinned install | [`uv tool install`](#1-daemon--uv-tool-install-pypi) | You'll run it across many sessions; want it on `$PATH`. |
|
|
30
|
+
| Daemon — ephemeral | [`uvx`](#2-daemon--uvx-no-install) | One-off run; don't want anything left on disk. |
|
|
31
|
+
| Container | [`docker run` from GHCR](#3-docker--ghcr) | Ops deployment, compose stack, or want filesystem isolation. |
|
|
32
|
+
| From source | [`uv run`](#4-from-source) | Hacking on the server itself. |
|
|
33
|
+
|
|
34
|
+
### Prereqs
|
|
35
|
+
|
|
36
|
+
- **uv-based modes** need [`uv`](https://docs.astral.sh/uv/) and `git`.
|
|
37
|
+
uv ships a portable Python 3.13, so no system Python install required.
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
- **Docker mode** needs `docker` (or compatible). The image bundles
|
|
44
|
+
Python 3.13 and git; nothing else on the host.
|
|
45
|
+
|
|
46
|
+
### 1. Daemon — `uv tool install` (PyPI)
|
|
47
|
+
|
|
48
|
+
Installs the `deco-assaying` command on your `$PATH`, isolated in its
|
|
49
|
+
own venv that uv manages.
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
uv tool install deco-assaying
|
|
53
|
+
deco-assaying # starts the server
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
Update later with `uv tool upgrade deco-assaying`; remove with
|
|
57
|
+
`uv tool uninstall deco-assaying`.
|
|
58
|
+
|
|
59
|
+
### 2. Daemon — `uvx` (no install)
|
|
60
|
+
|
|
61
|
+
`uvx` resolves the package into a temporary venv and runs the entry
|
|
62
|
+
point in one shot. Nothing persists between runs.
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
uvx deco-assaying # latest release
|
|
66
|
+
uvx deco-assaying@0.1.0 # pin a specific version
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
Good for kicking the tires or running on a CI box where you don't
|
|
70
|
+
want to touch `~/.local/share/uv`.
|
|
71
|
+
|
|
72
|
+
### 3. Docker / GHCR
|
|
73
|
+
|
|
74
|
+
Pull and run the published multi-arch image (linux/amd64 +
|
|
75
|
+
linux/arm64):
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
docker pull ghcr.io/garycoding/deco-assaying:latest
|
|
79
|
+
docker run --rm \
|
|
80
|
+
-p 35832:35832 \
|
|
81
|
+
-v deco-assaying-data:/data \
|
|
82
|
+
ghcr.io/garycoding/deco-assaying:latest
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
Pin a specific version with a tag — `:0.1.0`, `:0.1`, or `:latest`
|
|
86
|
+
(see the [Releases](https://github.com/garycoding/deco-assaying/pkgs/container/deco-assaying)
|
|
87
|
+
page on GHCR for the available tags).
|
|
88
|
+
|
|
89
|
+
Or with compose (see [docker-compose.yml](docker-compose.yml) — pulls
|
|
90
|
+
the image, mounts a named volume at `/data`, restarts on failure):
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
docker compose up -d
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
The named volume `deco-assaying-data` persists job outputs across
|
|
97
|
+
container restarts. To pass auth tokens for private repos:
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
docker run --rm \
|
|
101
|
+
-e GITHUB_TOKEN=ghp_... \
|
|
102
|
+
-e GITLAB_TOKEN=glpat-... \
|
|
103
|
+
-p 35832:35832 \
|
|
104
|
+
-v deco-assaying-data:/data \
|
|
105
|
+
ghcr.io/garycoding/deco-assaying:latest
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### 4. From source
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
git clone https://github.com/garycoding/deco-assaying.git
|
|
112
|
+
cd deco-assaying
|
|
113
|
+
uv sync
|
|
114
|
+
uv run python -m deco_assaying
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
### Endpoints
|
|
118
|
+
|
|
119
|
+
In every mode the server listens on `PORT` (default `35832`) with:
|
|
120
|
+
|
|
121
|
+
- `POST /sse` — MCP Streamable HTTP transport.
|
|
122
|
+
- `GET /health` — liveness probe.
|
|
123
|
+
- `GET /admin/*` — read-only JSON ops endpoints.
|
|
124
|
+
- `GET /outputs/{job_id}/...` — read-only download API for job artifacts.
|
|
125
|
+
- `GET /docs` — OpenAPI / Swagger UI for the HTTP API.
|
|
126
|
+
|
|
127
|
+
Sanity-check it's up:
|
|
128
|
+
|
|
129
|
+
```bash
|
|
130
|
+
curl http://127.0.0.1:35832/health
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
## MCP tools
|
|
134
|
+
|
|
135
|
+
- `analyze_file(content, filename?, language?, options?)` — parse a single
|
|
136
|
+
file passed inline; returns structural JSON.
|
|
137
|
+
- `index_repo(source, options?)` — start a job that indexes a whole repo
|
|
138
|
+
and writes per-file artifacts plus a manifest. The server allocates a
|
|
139
|
+
fresh output dir under `OUTPUT_ROOT` and returns `{ job_id, output_path }`.
|
|
140
|
+
`source` can be a local directory, a GitHub URL
|
|
141
|
+
(`https://github.com/owner/repo`), or a GitLab URL
|
|
142
|
+
(`https://gitlab.com/owner/repo`, including nested groups
|
|
143
|
+
`https://gitlab.com/group/sub/repo`). Pass `git_ref` to pick a specific
|
|
144
|
+
branch / tag / sha.
|
|
145
|
+
- `get_job_status(job_id)` — poll a running or completed job.
|
|
146
|
+
- `cancel_job(job_id)` — cooperative cancel.
|
|
147
|
+
- `list_supported_languages()` — capability discovery.
|
|
148
|
+
- `detect_language(path)` — extension/shebang detection helper.
|
|
149
|
+
|
|
150
|
+
## Output download API
|
|
151
|
+
|
|
152
|
+
Every job's artifacts land under `OUTPUT_ROOT/{job_id}/`. A consumer
|
|
153
|
+
sharing the volume can read them off disk; one without a shared volume
|
|
154
|
+
can pull them over HTTP:
|
|
155
|
+
|
|
156
|
+
| Endpoint | Returns |
|
|
157
|
+
|---|---|
|
|
158
|
+
| `GET /outputs/{job_id}` | `manifest.json` (convenience). |
|
|
159
|
+
| `GET /outputs/{job_id}/manifest.json` | Repo-level rollup. |
|
|
160
|
+
| `GET /outputs/{job_id}/tree.json` | Full path inventory (analyzed + skipped). |
|
|
161
|
+
| `GET /outputs/{job_id}/symbols.json` | Global qualified-name index. |
|
|
162
|
+
| `GET /outputs/{job_id}/languages.json` | Per-language counts. |
|
|
163
|
+
| `GET /outputs/{job_id}/errors.json` | Parse errors + skipped files. |
|
|
164
|
+
| `GET /outputs/{job_id}/log.jsonl?from_offset=N` | Tail the job's log. |
|
|
165
|
+
| `GET /outputs/{job_id}/ls?path=&recursive=` | Directory listing. |
|
|
166
|
+
| `GET /outputs/{job_id}/file/{path}` | Single file, **or** a streaming ZIP if any path segment contains `*?[`. E.g. `/file/files/**/*.py.json`. |
|
|
167
|
+
| `GET /outputs/{job_id}/zip?path=&match=` | Explicit-bulk-zip alias. Default = whole job dir. |
|
|
168
|
+
| `DELETE /outputs/{job_id}` | Remove the dir + drop the table entry. 409 if still running. |
|
|
169
|
+
| `GET /admin/outputs` | List every job_id present on disk under `OUTPUT_ROOT`. |
|
|
170
|
+
|
|
171
|
+
Path traversal (`..`, absolute paths, escape via symlink) is rejected.
|
|
172
|
+
|
|
173
|
+
## Resource requirements
|
|
174
|
+
|
|
175
|
+
When `index_repo` runs against a GitHub URL, the server uses a partial
|
|
176
|
+
clone with bin-packed batched fetching. That gives a small, predictable
|
|
177
|
+
disk footprint regardless of how large the source repo is:
|
|
178
|
+
|
|
179
|
+
- **Source-side scratch space: ~100 MB peak** in `output_path/.source/`
|
|
180
|
+
during analysis. The server fetches each batch of source files
|
|
181
|
+
(totaling ≤ `max_partial_clone_bytes`, default 100 MB), analyzes
|
|
182
|
+
them, deletes them from the working tree, then fetches the next
|
|
183
|
+
batch. Even on a multi-GB monorepo, peak local-disk used for source
|
|
184
|
+
content stays at ~100 MB. Tunable via the `max_partial_clone_bytes`
|
|
185
|
+
option on `index_repo`.
|
|
186
|
+
|
|
187
|
+
- **Output artifacts: roughly 1-2× the analyzed-source size.** Each
|
|
188
|
+
analyzed file produces a JSON artifact under `output_path/files/`
|
|
189
|
+
containing symbols, imports, references, chunks, etc. These persist
|
|
190
|
+
past the job — the consumer reads them incrementally — and are
|
|
191
|
+
the largest *durable* footprint. The retention sweeper auto-purges
|
|
192
|
+
job dirs older than `OUTPUT_EXPIRY_DAYS`.
|
|
193
|
+
|
|
194
|
+
- **Memory: modest.** A `ProcessPoolExecutor` runs roughly
|
|
195
|
+
`2 × CPU count` workers, each holding one file's bytes plus its
|
|
196
|
+
tree-sitter parse tree in memory. Source files are capped at
|
|
197
|
+
`max_file_bytes` (default 2 MB), so worst case is ~16-32 MB of
|
|
198
|
+
resident source + parse trees on a typical 8-core box.
|
|
199
|
+
|
|
200
|
+
- **Network:** one provider-API pre-flight to plan the batches (GitHub
|
|
201
|
+
Trees REST or GitLab REST tree + GraphQL; free for public repos, set
|
|
202
|
+
`GITHUB_TOKEN` / `GITLAB_TOKEN` for higher quotas and private-repo
|
|
203
|
+
access), plus one `git fetch-pack` round-trip per batch. For a
|
|
204
|
+
typical sub-100 MB repo that's two HTTP hits total.
|
|
205
|
+
|
|
206
|
+
For local-path sources nothing is fetched and nothing is cloned —
|
|
207
|
+
the only on-disk cost is the output artifacts.
|
|
208
|
+
|
|
209
|
+
## Configuration
|
|
210
|
+
|
|
211
|
+
| Env var | Default (daemon) | Default (container) | Purpose |
|
|
212
|
+
|---|---|---|---|
|
|
213
|
+
| `PORT` | `35832` | `35832` | HTTP listen port. |
|
|
214
|
+
| `HOST` | `0.0.0.0` | `0.0.0.0` | HTTP bind address. |
|
|
215
|
+
| `OUTPUT_ROOT` | `./output` | `/data` | Where the server writes job dirs. |
|
|
216
|
+
| `OUTPUT_EXPIRY_DAYS` | `7` | `7` | Auto-purge job dirs older than this. `0` disables. |
|
|
217
|
+
| `JOB_HISTORY_MAX` | `100` | `100` | In-memory job-table cap. |
|
|
218
|
+
| `DEFAULT_MAX_FILE_BYTES` | `2097152` | `2097152` | Default per-file size cap. |
|
|
219
|
+
| `DEFAULT_CHUNK_MAX_TOKENS` | `800` | `800` | Default chunk size for cAST chunking. |
|
|
220
|
+
| `GITHUB_TOKEN` | unset | unset | Optional, raises GitHub Trees API quota from 60 to 5000 req/hr and unlocks private repos. |
|
|
221
|
+
| `GITLAB_TOKEN` | unset | unset | Optional, used for GitLab API auth and private-repo access. |
|
|
222
|
+
|
|
223
|
+
## Releasing
|
|
224
|
+
|
|
225
|
+
Tag-driven. Bump `version` in `pyproject.toml`, then:
|
|
226
|
+
|
|
227
|
+
```bash
|
|
228
|
+
git tag vX.Y.Z && git push --tags
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
The `Release` workflow builds a multi-arch image (linux/amd64 +
|
|
232
|
+
linux/arm64) and pushes it to GHCR with `vX.Y.Z`, `vX.Y`, and `latest`
|
|
233
|
+
tags, in parallel with publishing wheel + sdist to PyPI via trusted
|
|
234
|
+
publishing. ~3-5 minutes end-to-end.
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
# deco-assaying
|
|
2
|
+
|
|
3
|
+
MCP server that performs tree-sitter-based source code analysis. Designed
|
|
4
|
+
to feed structural information about a repo (symbols, imports, references,
|
|
5
|
+
chunks, metrics) into a downstream consumer that maintains a knowledge
|
|
6
|
+
base over many codebases.
|
|
7
|
+
|
|
8
|
+
## Run
|
|
9
|
+
|
|
10
|
+
Pick the deployment that matches your situation:
|
|
11
|
+
|
|
12
|
+
| Mode | Command | When to use |
|
|
13
|
+
|---|---|---|
|
|
14
|
+
| Daemon — pinned install | [`uv tool install`](#1-daemon--uv-tool-install-pypi) | You'll run it across many sessions; want it on `$PATH`. |
|
|
15
|
+
| Daemon — ephemeral | [`uvx`](#2-daemon--uvx-no-install) | One-off run; don't want anything left on disk. |
|
|
16
|
+
| Container | [`docker run` from GHCR](#3-docker--ghcr) | Ops deployment, compose stack, or want filesystem isolation. |
|
|
17
|
+
| From source | [`uv run`](#4-from-source) | Hacking on the server itself. |
|
|
18
|
+
|
|
19
|
+
### Prereqs
|
|
20
|
+
|
|
21
|
+
- **uv-based modes** need [`uv`](https://docs.astral.sh/uv/) and `git`.
|
|
22
|
+
uv ships a portable Python 3.13, so no system Python install required.
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
- **Docker mode** needs `docker` (or compatible). The image bundles
|
|
29
|
+
Python 3.13 and git; nothing else on the host.
|
|
30
|
+
|
|
31
|
+
### 1. Daemon — `uv tool install` (PyPI)
|
|
32
|
+
|
|
33
|
+
Installs the `deco-assaying` command on your `$PATH`, isolated in its
|
|
34
|
+
own venv that uv manages.
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
uv tool install deco-assaying
|
|
38
|
+
deco-assaying # starts the server
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
Update later with `uv tool upgrade deco-assaying`; remove with
|
|
42
|
+
`uv tool uninstall deco-assaying`.
|
|
43
|
+
|
|
44
|
+
### 2. Daemon — `uvx` (no install)
|
|
45
|
+
|
|
46
|
+
`uvx` resolves the package into a temporary venv and runs the entry
|
|
47
|
+
point in one shot. Nothing persists between runs.
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
uvx deco-assaying # latest release
|
|
51
|
+
uvx deco-assaying@0.1.0 # pin a specific version
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Good for kicking the tires or running on a CI box where you don't
|
|
55
|
+
want to touch `~/.local/share/uv`.
|
|
56
|
+
|
|
57
|
+
### 3. Docker / GHCR
|
|
58
|
+
|
|
59
|
+
Pull and run the published multi-arch image (linux/amd64 +
|
|
60
|
+
linux/arm64):
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
docker pull ghcr.io/garycoding/deco-assaying:latest
|
|
64
|
+
docker run --rm \
|
|
65
|
+
-p 35832:35832 \
|
|
66
|
+
-v deco-assaying-data:/data \
|
|
67
|
+
ghcr.io/garycoding/deco-assaying:latest
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
Pin a specific version with a tag — `:0.1.0`, `:0.1`, or `:latest`
|
|
71
|
+
(see the [Releases](https://github.com/garycoding/deco-assaying/pkgs/container/deco-assaying)
|
|
72
|
+
page on GHCR for the available tags).
|
|
73
|
+
|
|
74
|
+
Or with compose (see [docker-compose.yml](docker-compose.yml) — pulls
|
|
75
|
+
the image, mounts a named volume at `/data`, restarts on failure):
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
docker compose up -d
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
The named volume `deco-assaying-data` persists job outputs across
|
|
82
|
+
container restarts. To pass auth tokens for private repos:
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
docker run --rm \
|
|
86
|
+
-e GITHUB_TOKEN=ghp_... \
|
|
87
|
+
-e GITLAB_TOKEN=glpat-... \
|
|
88
|
+
-p 35832:35832 \
|
|
89
|
+
-v deco-assaying-data:/data \
|
|
90
|
+
ghcr.io/garycoding/deco-assaying:latest
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### 4. From source
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
git clone https://github.com/garycoding/deco-assaying.git
|
|
97
|
+
cd deco-assaying
|
|
98
|
+
uv sync
|
|
99
|
+
uv run python -m deco_assaying
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
### Endpoints
|
|
103
|
+
|
|
104
|
+
In every mode the server listens on `PORT` (default `35832`) with:
|
|
105
|
+
|
|
106
|
+
- `POST /sse` — MCP Streamable HTTP transport.
|
|
107
|
+
- `GET /health` — liveness probe.
|
|
108
|
+
- `GET /admin/*` — read-only JSON ops endpoints.
|
|
109
|
+
- `GET /outputs/{job_id}/...` — read-only download API for job artifacts.
|
|
110
|
+
- `GET /docs` — OpenAPI / Swagger UI for the HTTP API.
|
|
111
|
+
|
|
112
|
+
Sanity-check it's up:
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
curl http://127.0.0.1:35832/health
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
## MCP tools
|
|
119
|
+
|
|
120
|
+
- `analyze_file(content, filename?, language?, options?)` — parse a single
|
|
121
|
+
file passed inline; returns structural JSON.
|
|
122
|
+
- `index_repo(source, options?)` — start a job that indexes a whole repo
|
|
123
|
+
and writes per-file artifacts plus a manifest. The server allocates a
|
|
124
|
+
fresh output dir under `OUTPUT_ROOT` and returns `{ job_id, output_path }`.
|
|
125
|
+
`source` can be a local directory, a GitHub URL
|
|
126
|
+
(`https://github.com/owner/repo`), or a GitLab URL
|
|
127
|
+
(`https://gitlab.com/owner/repo`, including nested groups
|
|
128
|
+
`https://gitlab.com/group/sub/repo`). Pass `git_ref` to pick a specific
|
|
129
|
+
branch / tag / sha.
|
|
130
|
+
- `get_job_status(job_id)` — poll a running or completed job.
|
|
131
|
+
- `cancel_job(job_id)` — cooperative cancel.
|
|
132
|
+
- `list_supported_languages()` — capability discovery.
|
|
133
|
+
- `detect_language(path)` — extension/shebang detection helper.
|
|
134
|
+
|
|
135
|
+
## Output download API
|
|
136
|
+
|
|
137
|
+
Every job's artifacts land under `OUTPUT_ROOT/{job_id}/`. A consumer
|
|
138
|
+
sharing the volume can read them off disk; one without a shared volume
|
|
139
|
+
can pull them over HTTP:
|
|
140
|
+
|
|
141
|
+
| Endpoint | Returns |
|
|
142
|
+
|---|---|
|
|
143
|
+
| `GET /outputs/{job_id}` | `manifest.json` (convenience). |
|
|
144
|
+
| `GET /outputs/{job_id}/manifest.json` | Repo-level rollup. |
|
|
145
|
+
| `GET /outputs/{job_id}/tree.json` | Full path inventory (analyzed + skipped). |
|
|
146
|
+
| `GET /outputs/{job_id}/symbols.json` | Global qualified-name index. |
|
|
147
|
+
| `GET /outputs/{job_id}/languages.json` | Per-language counts. |
|
|
148
|
+
| `GET /outputs/{job_id}/errors.json` | Parse errors + skipped files. |
|
|
149
|
+
| `GET /outputs/{job_id}/log.jsonl?from_offset=N` | Tail the job's log. |
|
|
150
|
+
| `GET /outputs/{job_id}/ls?path=&recursive=` | Directory listing. |
|
|
151
|
+
| `GET /outputs/{job_id}/file/{path}` | Single file, **or** a streaming ZIP if any path segment contains `*?[`. E.g. `/file/files/**/*.py.json`. |
|
|
152
|
+
| `GET /outputs/{job_id}/zip?path=&match=` | Explicit-bulk-zip alias. Default = whole job dir. |
|
|
153
|
+
| `DELETE /outputs/{job_id}` | Remove the dir + drop the table entry. 409 if still running. |
|
|
154
|
+
| `GET /admin/outputs` | List every job_id present on disk under `OUTPUT_ROOT`. |
|
|
155
|
+
|
|
156
|
+
Path traversal (`..`, absolute paths, escape via symlink) is rejected.
|
|
157
|
+
|
|
158
|
+
## Resource requirements
|
|
159
|
+
|
|
160
|
+
When `index_repo` runs against a GitHub URL, the server uses a partial
|
|
161
|
+
clone with bin-packed batched fetching. That gives a small, predictable
|
|
162
|
+
disk footprint regardless of how large the source repo is:
|
|
163
|
+
|
|
164
|
+
- **Source-side scratch space: ~100 MB peak** in `output_path/.source/`
|
|
165
|
+
during analysis. The server fetches each batch of source files
|
|
166
|
+
(totaling ≤ `max_partial_clone_bytes`, default 100 MB), analyzes
|
|
167
|
+
them, deletes them from the working tree, then fetches the next
|
|
168
|
+
batch. Even on a multi-GB monorepo, peak local-disk used for source
|
|
169
|
+
content stays at ~100 MB. Tunable via the `max_partial_clone_bytes`
|
|
170
|
+
option on `index_repo`.
|
|
171
|
+
|
|
172
|
+
- **Output artifacts: roughly 1-2× the analyzed-source size.** Each
|
|
173
|
+
analyzed file produces a JSON artifact under `output_path/files/`
|
|
174
|
+
containing symbols, imports, references, chunks, etc. These persist
|
|
175
|
+
past the job — the consumer reads them incrementally — and are
|
|
176
|
+
the largest *durable* footprint. The retention sweeper auto-purges
|
|
177
|
+
job dirs older than `OUTPUT_EXPIRY_DAYS`.
|
|
178
|
+
|
|
179
|
+
- **Memory: modest.** A `ProcessPoolExecutor` runs roughly
|
|
180
|
+
`2 × CPU count` workers, each holding one file's bytes plus its
|
|
181
|
+
tree-sitter parse tree in memory. Source files are capped at
|
|
182
|
+
`max_file_bytes` (default 2 MB), so worst case is ~16-32 MB of
|
|
183
|
+
resident source + parse trees on a typical 8-core box.
|
|
184
|
+
|
|
185
|
+
- **Network:** one provider-API pre-flight to plan the batches (GitHub
|
|
186
|
+
Trees REST or GitLab REST tree + GraphQL; free for public repos, set
|
|
187
|
+
`GITHUB_TOKEN` / `GITLAB_TOKEN` for higher quotas and private-repo
|
|
188
|
+
access), plus one `git fetch-pack` round-trip per batch. For a
|
|
189
|
+
typical sub-100 MB repo that's two HTTP hits total.
|
|
190
|
+
|
|
191
|
+
For local-path sources nothing is fetched and nothing is cloned —
|
|
192
|
+
the only on-disk cost is the output artifacts.
|
|
193
|
+
|
|
194
|
+
## Configuration
|
|
195
|
+
|
|
196
|
+
| Env var | Default (daemon) | Default (container) | Purpose |
|
|
197
|
+
|---|---|---|---|
|
|
198
|
+
| `PORT` | `35832` | `35832` | HTTP listen port. |
|
|
199
|
+
| `HOST` | `0.0.0.0` | `0.0.0.0` | HTTP bind address. |
|
|
200
|
+
| `OUTPUT_ROOT` | `./output` | `/data` | Where the server writes job dirs. |
|
|
201
|
+
| `OUTPUT_EXPIRY_DAYS` | `7` | `7` | Auto-purge job dirs older than this. `0` disables. |
|
|
202
|
+
| `JOB_HISTORY_MAX` | `100` | `100` | In-memory job-table cap. |
|
|
203
|
+
| `DEFAULT_MAX_FILE_BYTES` | `2097152` | `2097152` | Default per-file size cap. |
|
|
204
|
+
| `DEFAULT_CHUNK_MAX_TOKENS` | `800` | `800` | Default chunk size for cAST chunking. |
|
|
205
|
+
| `GITHUB_TOKEN` | unset | unset | Optional, raises GitHub Trees API quota from 60 to 5000 req/hr and unlocks private repos. |
|
|
206
|
+
| `GITLAB_TOKEN` | unset | unset | Optional, used for GitLab API auth and private-repo access. |
|
|
207
|
+
|
|
208
|
+
## Releasing
|
|
209
|
+
|
|
210
|
+
Tag-driven. Bump `version` in `pyproject.toml`, then:
|
|
211
|
+
|
|
212
|
+
```bash
|
|
213
|
+
git tag vX.Y.Z && git push --tags
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
The `Release` workflow builds a multi-arch image (linux/amd64 +
|
|
217
|
+
linux/arm64) and pushes it to GHCR with `vX.Y.Z`, `vX.Y`, and `latest`
|
|
218
|
+
tags, in parallel with publishing wheel + sdist to PyPI via trusted
|
|
219
|
+
publishing. ~3-5 minutes end-to-end.
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
services:
|
|
2
|
+
deco-assaying:
|
|
3
|
+
image: ghcr.io/garycoding/deco-assaying:latest
|
|
4
|
+
container_name: deco-assaying
|
|
5
|
+
restart: unless-stopped
|
|
6
|
+
ports:
|
|
7
|
+
- "35832:35832"
|
|
8
|
+
volumes:
|
|
9
|
+
- deco-assaying-data:/data
|
|
10
|
+
environment:
|
|
11
|
+
OUTPUT_ROOT: /data
|
|
12
|
+
OUTPUT_EXPIRY_DAYS: "7"
|
|
13
|
+
# Uncomment to authenticate to private repos / raise rate limits:
|
|
14
|
+
# GITHUB_TOKEN: ${GITHUB_TOKEN:-}
|
|
15
|
+
# GITLAB_TOKEN: ${GITLAB_TOKEN:-}
|
|
16
|
+
extra_hosts:
|
|
17
|
+
# Lets the consumer running on the docker host reach the server
|
|
18
|
+
# under host.docker.internal on Linux too.
|
|
19
|
+
- "host.docker.internal:host-gateway"
|
|
20
|
+
|
|
21
|
+
volumes:
|
|
22
|
+
deco-assaying-data:
|