greploom 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- greploom-0.1.0/.claude/commands/create-release.md +17 -0
- greploom-0.1.0/.github/workflows/release.yml +115 -0
- greploom-0.1.0/.github/workflows/test.yml +79 -0
- greploom-0.1.0/.gitignore +23 -0
- greploom-0.1.0/CLAUDE.md +73 -0
- greploom-0.1.0/LICENSE +21 -0
- greploom-0.1.0/PKG-INFO +227 -0
- greploom-0.1.0/README.md +194 -0
- greploom-0.1.0/llms-full.txt +404 -0
- greploom-0.1.0/llms.txt +32 -0
- greploom-0.1.0/pyproject.toml +59 -0
- greploom-0.1.0/scripts/release.sh +104 -0
- greploom-0.1.0/src/greploom/__init__.py +5 -0
- greploom-0.1.0/src/greploom/cli/__init__.py +22 -0
- greploom-0.1.0/src/greploom/cli/index_cmd.py +60 -0
- greploom-0.1.0/src/greploom/cli/query_cmd.py +117 -0
- greploom-0.1.0/src/greploom/cli/serve_cmd.py +43 -0
- greploom-0.1.0/src/greploom/config.py +58 -0
- greploom-0.1.0/src/greploom/cpg_types.py +130 -0
- greploom-0.1.0/src/greploom/index/__init__.py +114 -0
- greploom-0.1.0/src/greploom/index/embedder.py +75 -0
- greploom-0.1.0/src/greploom/index/store.py +236 -0
- greploom-0.1.0/src/greploom/index/summarizer.py +116 -0
- greploom-0.1.0/src/greploom/mcp/__init__.py +0 -0
- greploom-0.1.0/src/greploom/mcp/server.py +104 -0
- greploom-0.1.0/src/greploom/search/__init__.py +0 -0
- greploom-0.1.0/src/greploom/search/budget.py +128 -0
- greploom-0.1.0/src/greploom/search/expand.py +159 -0
- greploom-0.1.0/src/greploom/search/hybrid.py +58 -0
- greploom-0.1.0/src/greploom/version.py +1 -0
- greploom-0.1.0/tests/__init__.py +0 -0
- greploom-0.1.0/tests/fixtures/medium_cpg.json +275 -0
- greploom-0.1.0/tests/fixtures/small_cpg.json +107 -0
- greploom-0.1.0/tests/test_cli/__init__.py +0 -0
- greploom-0.1.0/tests/test_cli/test_commands.py +32 -0
- greploom-0.1.0/tests/test_cli/test_integration.py +164 -0
- greploom-0.1.0/tests/test_config.py +127 -0
- greploom-0.1.0/tests/test_cpg_types.py +288 -0
- greploom-0.1.0/tests/test_index/__init__.py +0 -0
- greploom-0.1.0/tests/test_index/test_embedder.py +100 -0
- greploom-0.1.0/tests/test_index/test_orchestrator.py +170 -0
- greploom-0.1.0/tests/test_index/test_store.py +192 -0
- greploom-0.1.0/tests/test_index/test_summarizer.py +309 -0
- greploom-0.1.0/tests/test_mcp/__init__.py +0 -0
- greploom-0.1.0/tests/test_mcp/test_server.py +45 -0
- greploom-0.1.0/tests/test_search/__init__.py +0 -0
- greploom-0.1.0/tests/test_search/test_budget.py +165 -0
- greploom-0.1.0/tests/test_search/test_expand.py +181 -0
- greploom-0.1.0/tests/test_search/test_hybrid.py +136 -0
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Create Release
|
|
2
|
+
|
|
3
|
+
Help me create a new release of greploom.
|
|
4
|
+
|
|
5
|
+
## Steps
|
|
6
|
+
|
|
7
|
+
1. Ask me for the version number (should follow semver: major.minor.patch)
|
|
8
|
+
2. Ask me for a brief description of what changed in this release
|
|
9
|
+
3. Update the changelog section in README.md with the new version entry (newest first)
|
|
10
|
+
4. Run the release script: `./scripts/release.sh <version> "<description>"`
|
|
11
|
+
|
|
12
|
+
## Notes
|
|
13
|
+
|
|
14
|
+
- The release script handles updating version.py, pyproject.toml, committing, tagging, and pushing
|
|
15
|
+
- GitHub Actions will automatically create a GitHub Release and publish to PyPI
|
|
16
|
+
- Make sure all tests pass before releasing: `pytest`
|
|
17
|
+
- Make sure linting passes: `ruff check src tests`
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
name: Release and Publish
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- 'v*.*.*'
|
|
7
|
+
|
|
8
|
+
permissions:
|
|
9
|
+
contents: write
|
|
10
|
+
id-token: write
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
create-release:
|
|
14
|
+
name: Create GitHub Release
|
|
15
|
+
runs-on: ubuntu-latest
|
|
16
|
+
|
|
17
|
+
steps:
|
|
18
|
+
- uses: actions/checkout@v4
|
|
19
|
+
|
|
20
|
+
- name: Wait for tests to pass
|
|
21
|
+
uses: lewagon/wait-on-check-action@v1.3.1
|
|
22
|
+
with:
|
|
23
|
+
ref: ${{ github.ref }}
|
|
24
|
+
check-regexp: 'Test on Python'
|
|
25
|
+
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
|
26
|
+
wait-interval: 10
|
|
27
|
+
|
|
28
|
+
- name: Extract version from tag
|
|
29
|
+
id: version
|
|
30
|
+
run: |
|
|
31
|
+
VERSION=${GITHUB_REF#refs/tags/v}
|
|
32
|
+
echo "version=$VERSION" >> $GITHUB_OUTPUT
|
|
33
|
+
echo "tag=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
|
|
34
|
+
|
|
35
|
+
- name: Read version from version.py
|
|
36
|
+
id: read_version
|
|
37
|
+
run: |
|
|
38
|
+
VERSION_PY=$(grep -E '^__version__' src/greploom/version.py | cut -d'"' -f2)
|
|
39
|
+
echo "version_py=$VERSION_PY" >> $GITHUB_OUTPUT
|
|
40
|
+
|
|
41
|
+
- name: Verify version match
|
|
42
|
+
run: |
|
|
43
|
+
if [ "${{ steps.version.outputs.version }}" != "${{ steps.read_version.outputs.version_py }}" ]; then
|
|
44
|
+
echo "Error: Tag version (${{ steps.version.outputs.version }}) does not match version.py (${{ steps.read_version.outputs.version_py }})"
|
|
45
|
+
exit 1
|
|
46
|
+
fi
|
|
47
|
+
|
|
48
|
+
- name: Extract changelog for this version
|
|
49
|
+
id: changelog
|
|
50
|
+
run: |
|
|
51
|
+
VERSION="${{ steps.version.outputs.version }}"
|
|
52
|
+
CHANGELOG=$(awk "/### Version $VERSION/,/### Version [0-9]/ { if (/### Version [0-9]/ && !/### Version $VERSION/) exit; if (!/### Version $VERSION/) print }" README.md | sed '/^$/d')
|
|
53
|
+
if [ -z "$CHANGELOG" ]; then
|
|
54
|
+
CHANGELOG="Release version $VERSION"
|
|
55
|
+
fi
|
|
56
|
+
echo "$CHANGELOG" > /tmp/changelog.txt
|
|
57
|
+
|
|
58
|
+
- name: Create GitHub Release
|
|
59
|
+
env:
|
|
60
|
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
61
|
+
run: |
|
|
62
|
+
CHANGELOG=$(cat /tmp/changelog.txt)
|
|
63
|
+
gh release create "${{ steps.version.outputs.tag }}" \
|
|
64
|
+
--title "Release ${{ steps.version.outputs.tag }}" \
|
|
65
|
+
--notes "$CHANGELOG" \
|
|
66
|
+
--verify-tag
|
|
67
|
+
|
|
68
|
+
build:
|
|
69
|
+
name: Build distribution
|
|
70
|
+
needs: [create-release]
|
|
71
|
+
runs-on: ubuntu-latest
|
|
72
|
+
|
|
73
|
+
steps:
|
|
74
|
+
- uses: actions/checkout@v4
|
|
75
|
+
|
|
76
|
+
- name: Set up Python
|
|
77
|
+
uses: actions/setup-python@v5
|
|
78
|
+
with:
|
|
79
|
+
python-version: "3.11"
|
|
80
|
+
|
|
81
|
+
- name: Install build dependencies
|
|
82
|
+
run: |
|
|
83
|
+
python -m pip install --upgrade pip
|
|
84
|
+
pip install build
|
|
85
|
+
|
|
86
|
+
- name: Build package
|
|
87
|
+
run: python -m build
|
|
88
|
+
|
|
89
|
+
- name: Store distribution packages
|
|
90
|
+
uses: actions/upload-artifact@v4
|
|
91
|
+
with:
|
|
92
|
+
name: python-package-distributions
|
|
93
|
+
path: dist/
|
|
94
|
+
|
|
95
|
+
publish-to-pypi:
|
|
96
|
+
name: Publish to PyPI
|
|
97
|
+
needs: [build]
|
|
98
|
+
runs-on: ubuntu-latest
|
|
99
|
+
|
|
100
|
+
environment:
|
|
101
|
+
name: pypi
|
|
102
|
+
url: https://pypi.org/p/greploom
|
|
103
|
+
|
|
104
|
+
permissions:
|
|
105
|
+
id-token: write
|
|
106
|
+
|
|
107
|
+
steps:
|
|
108
|
+
- name: Download distribution packages
|
|
109
|
+
uses: actions/download-artifact@v4
|
|
110
|
+
with:
|
|
111
|
+
name: python-package-distributions
|
|
112
|
+
path: dist/
|
|
113
|
+
|
|
114
|
+
- name: Publish to PyPI
|
|
115
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
name: Tests
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [ main ]
|
|
6
|
+
tags:
|
|
7
|
+
- 'v*.*.*'
|
|
8
|
+
pull_request:
|
|
9
|
+
branches: [ main ]
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
test:
|
|
13
|
+
name: Test on Python ${{ matrix.python-version }}
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
strategy:
|
|
16
|
+
matrix:
|
|
17
|
+
python-version: ["3.10", "3.11", "3.12"]
|
|
18
|
+
|
|
19
|
+
steps:
|
|
20
|
+
- uses: actions/checkout@v4
|
|
21
|
+
|
|
22
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
23
|
+
uses: actions/setup-python@v5
|
|
24
|
+
with:
|
|
25
|
+
python-version: ${{ matrix.python-version }}
|
|
26
|
+
|
|
27
|
+
- name: Install dependencies
|
|
28
|
+
run: |
|
|
29
|
+
python -m pip install --upgrade pip
|
|
30
|
+
pip install -e .[dev,mcp]
|
|
31
|
+
|
|
32
|
+
- name: Run tests with pytest
|
|
33
|
+
run: |
|
|
34
|
+
pytest --cov=greploom --cov-report=xml --cov-report=term-missing
|
|
35
|
+
|
|
36
|
+
- name: Lint with ruff
|
|
37
|
+
run: |
|
|
38
|
+
ruff check src tests
|
|
39
|
+
|
|
40
|
+
build:
|
|
41
|
+
name: Build distribution
|
|
42
|
+
runs-on: ubuntu-latest
|
|
43
|
+
needs: [test]
|
|
44
|
+
|
|
45
|
+
steps:
|
|
46
|
+
- uses: actions/checkout@v4
|
|
47
|
+
|
|
48
|
+
- name: Set up Python
|
|
49
|
+
uses: actions/setup-python@v5
|
|
50
|
+
with:
|
|
51
|
+
python-version: "3.11"
|
|
52
|
+
|
|
53
|
+
- name: Install build dependencies
|
|
54
|
+
run: |
|
|
55
|
+
python -m pip install --upgrade pip
|
|
56
|
+
pip install build
|
|
57
|
+
|
|
58
|
+
- name: Build package
|
|
59
|
+
run: python -m build
|
|
60
|
+
|
|
61
|
+
- name: Check build with twine
|
|
62
|
+
run: |
|
|
63
|
+
pip install twine
|
|
64
|
+
twine check dist/*
|
|
65
|
+
|
|
66
|
+
status:
|
|
67
|
+
name: Tests Status
|
|
68
|
+
runs-on: ubuntu-latest
|
|
69
|
+
needs: [test, build]
|
|
70
|
+
if: always()
|
|
71
|
+
|
|
72
|
+
steps:
|
|
73
|
+
- name: Check test results
|
|
74
|
+
run: |
|
|
75
|
+
if [ "${{ needs.test.result }}" != "success" ] || [ "${{ needs.build.result }}" != "success" ]; then
|
|
76
|
+
echo "Tests or build failed"
|
|
77
|
+
exit 1
|
|
78
|
+
fi
|
|
79
|
+
echo "All tests passed successfully"
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
__pycache__/
|
|
2
|
+
*.pyc
|
|
3
|
+
*.pyo
|
|
4
|
+
*.egg-info/
|
|
5
|
+
dist/
|
|
6
|
+
build/
|
|
7
|
+
.eggs/
|
|
8
|
+
*.egg
|
|
9
|
+
.pytest_cache/
|
|
10
|
+
.mypy_cache/
|
|
11
|
+
.ruff_cache/
|
|
12
|
+
htmlcov/
|
|
13
|
+
.coverage
|
|
14
|
+
*.db
|
|
15
|
+
.greploom/
|
|
16
|
+
.DS_Store
|
|
17
|
+
*.swp
|
|
18
|
+
*~
|
|
19
|
+
venv/
|
|
20
|
+
.venv/
|
|
21
|
+
|
|
22
|
+
# Test output
|
|
23
|
+
/test-output/
|
greploom-0.1.0/CLAUDE.md
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# greploom -- Project Context
|
|
2
|
+
|
|
3
|
+
greploom is a semantic code search library with graph-aware context retrieval. It reads a treeloom Code Property Graph (JSON), indexes it for hybrid search (vector + BM25), and returns structurally-complete context neighborhoods suitable for LLM consumption.
|
|
4
|
+
|
|
5
|
+
See the [loom-research](https://github.com/rdwj/loom-research) repo for the full design document and architectural decisions.
|
|
6
|
+
|
|
7
|
+
## Tech Stack
|
|
8
|
+
|
|
9
|
+
- Python 3.10+, package name `greploom`
|
|
10
|
+
- Search: sqlite-vec (vector), SQLite FTS5 (BM25), reciprocal rank fusion
|
|
11
|
+
- Graph: reads treeloom CPG JSON format, walks edges for context expansion
|
|
12
|
+
- Embeddings: nomic-embed-text via ollama (default), any OpenAI-compatible endpoint
|
|
13
|
+
- CLI: click
|
|
14
|
+
- Token counting: tiktoken
|
|
15
|
+
- Build: Hatchling
|
|
16
|
+
- Testing: pytest, 80%+ coverage target
|
|
17
|
+
|
|
18
|
+
## Architecture
|
|
19
|
+
|
|
20
|
+
```
|
|
21
|
+
greploom/
|
|
22
|
+
├── src/greploom/
|
|
23
|
+
│ ├── index/ # Indexing pipeline: summarizer, embedder, storage
|
|
24
|
+
│ │ ├── summarizer.py # Generate text summaries from CPG nodes
|
|
25
|
+
│ │ ├── embedder.py # Embed summaries via ollama or API
|
|
26
|
+
│ │ └── store.py # SQLite storage (sqlite-vec + FTS5)
|
|
27
|
+
│ ├── search/ # Search engine: hybrid search, ranking, context assembly
|
|
28
|
+
│ │ ├── hybrid.py # BM25 + vector search with RRF
|
|
29
|
+
│ │ ├── expand.py # Graph walk to assemble context neighborhoods
|
|
30
|
+
│ │ └── budget.py # Token budget management
|
|
31
|
+
│ ├── cli/ # CLI commands
|
|
32
|
+
│ │ ├── __init__.py
|
|
33
|
+
│ │ ├── index_cmd.py
|
|
34
|
+
│ │ ├── query_cmd.py
|
|
35
|
+
│ │ └── serve_cmd.py
|
|
36
|
+
│ ├── mcp/ # MCP server mode
|
|
37
|
+
│ │ └── server.py
|
|
38
|
+
│ ├── version.py
|
|
39
|
+
│ └── __init__.py
|
|
40
|
+
├── tests/
|
|
41
|
+
│ ├── fixtures/
|
|
42
|
+
│ ├── test_index/
|
|
43
|
+
│ ├── test_search/
|
|
44
|
+
│ └── test_cli/
|
|
45
|
+
├── pyproject.toml
|
|
46
|
+
├── CLAUDE.md
|
|
47
|
+
└── README.md
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Design Principles
|
|
51
|
+
|
|
52
|
+
1. **treeloom is the graph, greploom is the search.** greploom reads treeloom's CPG JSON — it never parses source code or builds its own graph.
|
|
53
|
+
2. **SQLite-only storage.** sqlite-vec + FTS5. No servers, no Docker, one file. Portable and inspectable.
|
|
54
|
+
3. **Hybrid search.** Vector similarity for semantic queries ("where is authentication?") + BM25 for symbol queries ("find UserService"). Reciprocal rank fusion merges results.
|
|
55
|
+
4. **Graph expansion at query time.** Search finds candidate nodes. CPG walk adds callers, callees, imports, data flow sources. The agent gets code in structural context.
|
|
56
|
+
5. **Token budget management.** Returns exactly as much context as the LLM can use, ranked by structural relevance. Default: 8000 tokens.
|
|
57
|
+
6. **Incremental re-indexing.** Track content hash per node. On re-index, only re-embed changed nodes.
|
|
58
|
+
|
|
59
|
+
## Relationship to treeloom
|
|
60
|
+
|
|
61
|
+
greploom depends on treeloom but does not import treeloom at runtime for core search operations. The dependency is via the CPG JSON format:
|
|
62
|
+
|
|
63
|
+
- `greploom index` reads a treeloom CPG JSON file and builds its search index
|
|
64
|
+
- `greploom query` reads both the search index (SQLite) and the CPG JSON (for graph expansion)
|
|
65
|
+
- The CPG JSON format is documented in treeloom's CLAUDE.md
|
|
66
|
+
|
|
67
|
+
This means greploom can work with any tool that produces treeloom-compatible CPG JSON, not just treeloom itself.
|
|
68
|
+
|
|
69
|
+
## Releasing
|
|
70
|
+
|
|
71
|
+
The version lives in two files that must stay in sync:
|
|
72
|
+
- `src/greploom/version.py` — `__version__ = "x.y.z"`
|
|
73
|
+
- `pyproject.toml` — `version = "x.y.z"`
|
greploom-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 rdwj
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
greploom-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: greploom
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Semantic code search with graph-aware context retrieval, built on treeloom
|
|
5
|
+
Author: rdwj
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Classifier: Development Status :: 2 - Pre-Alpha
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
16
|
+
Requires-Python: >=3.10
|
|
17
|
+
Requires-Dist: apsw>=3.45
|
|
18
|
+
Requires-Dist: click>=8.0
|
|
19
|
+
Requires-Dist: httpx>=0.25.0
|
|
20
|
+
Requires-Dist: sqlite-vec>=0.1.0
|
|
21
|
+
Requires-Dist: tiktoken>=0.5.0
|
|
22
|
+
Requires-Dist: treeloom>=0.3.0
|
|
23
|
+
Provides-Extra: dev
|
|
24
|
+
Requires-Dist: mypy>=1.10; extra == 'dev'
|
|
25
|
+
Requires-Dist: pytest-cov>=5.0; extra == 'dev'
|
|
26
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
27
|
+
Requires-Dist: ruff>=0.4; extra == 'dev'
|
|
28
|
+
Provides-Extra: mcp
|
|
29
|
+
Requires-Dist: fastmcp>=2; extra == 'mcp'
|
|
30
|
+
Provides-Extra: ollama
|
|
31
|
+
Requires-Dist: ollama>=0.3.0; extra == 'ollama'
|
|
32
|
+
Description-Content-Type: text/markdown
|
|
33
|
+
|
|
34
|
+
# greploom
|
|
35
|
+
|
|
36
|
+
Semantic code search with graph-aware context retrieval, built on [treeloom](https://github.com/rdwj/treeloom).
|
|
37
|
+
|
|
38
|
+
greploom reads a treeloom Code Property Graph (CPG JSON), indexes it for hybrid search (vector embeddings + BM25), and returns structurally-complete context neighborhoods for LLM consumption. Vector search finds the right neighborhood; graph traversal expands it to include callers, callees, imports, and data flow sources.
|
|
39
|
+
|
|
40
|
+
## Installation
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
pip install greploom # Core — CLI and search engine
|
|
44
|
+
pip install greploom[mcp] # Adds MCP server (requires fastmcp)
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
The default embedding model is `nomic-embed-text` via a local [Ollama](https://ollama.com) instance. Any OpenAI-compatible embedding endpoint works via `GREPLOOM_EMBEDDING_URL`.
|
|
48
|
+
|
|
49
|
+
## Quick Start
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
# 1. Build a CPG with treeloom
|
|
53
|
+
treeloom build src/ -o cpg.json
|
|
54
|
+
|
|
55
|
+
# 2. Index for search (creates .greploom/index.db)
|
|
56
|
+
greploom index cpg.json
|
|
57
|
+
|
|
58
|
+
# 3. Search
|
|
59
|
+
greploom query "where is authentication handled?"
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## How It Works
|
|
63
|
+
|
|
64
|
+
```
|
|
65
|
+
Source code
|
|
66
|
+
|
|
|
67
|
+
v
|
|
68
|
+
treeloom build --> CPG (JSON)
|
|
69
|
+
|
|
|
70
|
+
v
|
|
71
|
+
greploom index --> vector store + BM25 index (SQLite)
|
|
72
|
+
|
|
|
73
|
+
v
|
|
74
|
+
greploom query "how is auth handled?" --> context bundle
|
|
75
|
+
|
|
|
76
|
+
v
|
|
77
|
+
LLM agent receives focused, graph-aware context
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Storage is a single SQLite file using sqlite-vec for vectors and FTS5 for BM25. No server required, no Docker, portable and inspectable.
|
|
81
|
+
|
|
82
|
+
## CLI Reference
|
|
83
|
+
|
|
84
|
+
### `greploom index`
|
|
85
|
+
|
|
86
|
+
Build or update the search index from a treeloom CPG JSON file.
|
|
87
|
+
|
|
88
|
+
```
|
|
89
|
+
greploom index CPG_JSON [OPTIONS]
|
|
90
|
+
|
|
91
|
+
Arguments:
|
|
92
|
+
CPG_JSON Path to the treeloom CPG JSON file
|
|
93
|
+
|
|
94
|
+
Options:
|
|
95
|
+
--db PATH SQLite database path (default: .greploom/index.db)
|
|
96
|
+
--tier [fast|enhanced] Summary tier (default: enhanced)
|
|
97
|
+
--model TEXT Embedding model name
|
|
98
|
+
--ollama-url URL Ollama server URL
|
|
99
|
+
--force Re-index all nodes, ignoring content hashes
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
Re-indexing is incremental by default — only nodes whose content has changed are re-embedded. Use `--force` to rebuild from scratch.
|
|
103
|
+
|
|
104
|
+
Summary tiers:
|
|
105
|
+
- `fast` — function signatures only; fastest to build
|
|
106
|
+
- `enhanced` — signatures, docstrings, and callees; better recall
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
# Index with defaults
|
|
110
|
+
greploom index cpg.json
|
|
111
|
+
|
|
112
|
+
# Use a custom database path and force full re-index
|
|
113
|
+
greploom index cpg.json --db /tmp/myproject.db --force
|
|
114
|
+
|
|
115
|
+
# Point at a non-default Ollama instance
|
|
116
|
+
greploom index cpg.json --ollama-url http://gpu-box:11434
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
### `greploom query`
|
|
120
|
+
|
|
121
|
+
Search the index and return graph-aware context.
|
|
122
|
+
|
|
123
|
+
```
|
|
124
|
+
greploom query QUERY_TEXT [OPTIONS]
|
|
125
|
+
|
|
126
|
+
Arguments:
|
|
127
|
+
QUERY_TEXT Natural language or symbol query
|
|
128
|
+
|
|
129
|
+
Options:
|
|
130
|
+
--db PATH SQLite database path (default: .greploom/index.db)
|
|
131
|
+
--cpg PATH CPG JSON path for graph expansion
|
|
132
|
+
--budget INT Token budget (default: 8192)
|
|
133
|
+
--top-k INT Number of search results (default: 5)
|
|
134
|
+
--format [context|json] Output format (default: context)
|
|
135
|
+
--model TEXT Embedding model name
|
|
136
|
+
--ollama-url URL Ollama server URL
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
Without `--cpg`, the query returns ranked search hits with scores and summaries. With `--cpg`, hits are expanded through the graph and assembled into a context bundle trimmed to the token budget.
|
|
140
|
+
|
|
141
|
+
```bash
|
|
142
|
+
# Simple search — ranked hits with summaries
|
|
143
|
+
greploom query "user authentication"
|
|
144
|
+
|
|
145
|
+
# Full graph-expanded context, ready for an LLM
|
|
146
|
+
greploom query "where is authentication handled?" --cpg cpg.json
|
|
147
|
+
|
|
148
|
+
# Pipe JSON output to jq
|
|
149
|
+
greploom query "UserService" --cpg cpg.json --format json | jq '.[].name'
|
|
150
|
+
|
|
151
|
+
# Narrow token budget for smaller context windows
|
|
152
|
+
greploom query "error handling" --cpg cpg.json --budget 4096
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
### `greploom serve`
|
|
156
|
+
|
|
157
|
+
Start the MCP server.
|
|
158
|
+
|
|
159
|
+
```
|
|
160
|
+
greploom serve [OPTIONS]
|
|
161
|
+
|
|
162
|
+
Options:
|
|
163
|
+
--db PATH SQLite database path
|
|
164
|
+
--cpg PATH Default CPG JSON path
|
|
165
|
+
--host TEXT Host to bind (default: 0.0.0.0)
|
|
166
|
+
--port INT Port to listen on (default: 8901)
|
|
167
|
+
--transport [streamable-http|stdio] MCP transport (default: streamable-http)
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
```bash
|
|
171
|
+
# Start the MCP server on default port 8901
|
|
172
|
+
greploom serve --db .greploom/index.db --cpg cpg.json
|
|
173
|
+
|
|
174
|
+
# stdio transport for direct agent integration
|
|
175
|
+
greploom serve --transport stdio
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
## MCP Server
|
|
179
|
+
|
|
180
|
+
The MCP server exposes two tools:
|
|
181
|
+
|
|
182
|
+
**`search_code`** — Search code semantically and return graph-aware context.
|
|
183
|
+
|
|
184
|
+
Parameters: `query` (required), `cpg_path` (required), `db_path`, `budget`, `top_k`
|
|
185
|
+
|
|
186
|
+
**`index_code`** — Build or update the search index from a CPG JSON file.
|
|
187
|
+
|
|
188
|
+
Parameters: `cpg_path` (required), `db_path`, `tier`
|
|
189
|
+
|
|
190
|
+
Example MCP server URL for agent configuration: `http://localhost:8901/mcp`
|
|
191
|
+
|
|
192
|
+
## Configuration
|
|
193
|
+
|
|
194
|
+
All settings can be provided via environment variables. CLI flags override environment variables for individual commands.
|
|
195
|
+
|
|
196
|
+
| Variable | Default | Description |
|
|
197
|
+
|---|---|---|
|
|
198
|
+
| `GREPLOOM_EMBEDDING_URL` | `http://localhost:11434` | Ollama or OpenAI-compatible endpoint |
|
|
199
|
+
| `GREPLOOM_EMBEDDING_MODEL` | `nomic-embed-text` | Embedding model name |
|
|
200
|
+
| `GREPLOOM_DB_PATH` | `.greploom/index.db` | SQLite database path |
|
|
201
|
+
| `GREPLOOM_TOKEN_BUDGET` | `8192` | Default token budget for context assembly |
|
|
202
|
+
| `GREPLOOM_SUMMARY_TIER` | `enhanced` | Summary tier (`fast` or `enhanced`) |
|
|
203
|
+
|
|
204
|
+
To use an OpenAI-compatible embedding API instead of Ollama:
|
|
205
|
+
|
|
206
|
+
```bash
|
|
207
|
+
export GREPLOOM_EMBEDDING_URL=https://api.openai.com/v1
|
|
208
|
+
export GREPLOOM_EMBEDDING_MODEL=text-embedding-3-small
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
## Relationship to treeloom
|
|
212
|
+
|
|
213
|
+
greploom reads treeloom's CPG JSON format but does not import treeloom at runtime. `greploom index` reads the CPG JSON to build the search index; `greploom query` reads both the index and the CPG JSON for graph expansion. Any tool that produces treeloom-compatible CPG JSON will work.
|
|
214
|
+
|
|
215
|
+
## Changelog
|
|
216
|
+
|
|
217
|
+
### Version 0.1.0
|
|
218
|
+
|
|
219
|
+
Initial release — full indexing pipeline (summarize, embed, store), hybrid search with RRF, graph expansion for context neighborhoods, token budget management, CLI (index/query/serve), and MCP server with search_code/index_code tools.
|
|
220
|
+
|
|
221
|
+
## LLM Documentation
|
|
222
|
+
|
|
223
|
+
This project provides [llms.txt](llms.txt) and [llms-full.txt](llms-full.txt) files following the [llmstxt.org](https://llmstxt.org/) specification for LLM-friendly documentation.
|
|
224
|
+
|
|
225
|
+
## License
|
|
226
|
+
|
|
227
|
+
MIT
|