greploom 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. greploom-0.1.0/.claude/commands/create-release.md +17 -0
  2. greploom-0.1.0/.github/workflows/release.yml +115 -0
  3. greploom-0.1.0/.github/workflows/test.yml +79 -0
  4. greploom-0.1.0/.gitignore +23 -0
  5. greploom-0.1.0/CLAUDE.md +73 -0
  6. greploom-0.1.0/LICENSE +21 -0
  7. greploom-0.1.0/PKG-INFO +227 -0
  8. greploom-0.1.0/README.md +194 -0
  9. greploom-0.1.0/llms-full.txt +404 -0
  10. greploom-0.1.0/llms.txt +32 -0
  11. greploom-0.1.0/pyproject.toml +59 -0
  12. greploom-0.1.0/scripts/release.sh +104 -0
  13. greploom-0.1.0/src/greploom/__init__.py +5 -0
  14. greploom-0.1.0/src/greploom/cli/__init__.py +22 -0
  15. greploom-0.1.0/src/greploom/cli/index_cmd.py +60 -0
  16. greploom-0.1.0/src/greploom/cli/query_cmd.py +117 -0
  17. greploom-0.1.0/src/greploom/cli/serve_cmd.py +43 -0
  18. greploom-0.1.0/src/greploom/config.py +58 -0
  19. greploom-0.1.0/src/greploom/cpg_types.py +130 -0
  20. greploom-0.1.0/src/greploom/index/__init__.py +114 -0
  21. greploom-0.1.0/src/greploom/index/embedder.py +75 -0
  22. greploom-0.1.0/src/greploom/index/store.py +236 -0
  23. greploom-0.1.0/src/greploom/index/summarizer.py +116 -0
  24. greploom-0.1.0/src/greploom/mcp/__init__.py +0 -0
  25. greploom-0.1.0/src/greploom/mcp/server.py +104 -0
  26. greploom-0.1.0/src/greploom/search/__init__.py +0 -0
  27. greploom-0.1.0/src/greploom/search/budget.py +128 -0
  28. greploom-0.1.0/src/greploom/search/expand.py +159 -0
  29. greploom-0.1.0/src/greploom/search/hybrid.py +58 -0
  30. greploom-0.1.0/src/greploom/version.py +1 -0
  31. greploom-0.1.0/tests/__init__.py +0 -0
  32. greploom-0.1.0/tests/fixtures/medium_cpg.json +275 -0
  33. greploom-0.1.0/tests/fixtures/small_cpg.json +107 -0
  34. greploom-0.1.0/tests/test_cli/__init__.py +0 -0
  35. greploom-0.1.0/tests/test_cli/test_commands.py +32 -0
  36. greploom-0.1.0/tests/test_cli/test_integration.py +164 -0
  37. greploom-0.1.0/tests/test_config.py +127 -0
  38. greploom-0.1.0/tests/test_cpg_types.py +288 -0
  39. greploom-0.1.0/tests/test_index/__init__.py +0 -0
  40. greploom-0.1.0/tests/test_index/test_embedder.py +100 -0
  41. greploom-0.1.0/tests/test_index/test_orchestrator.py +170 -0
  42. greploom-0.1.0/tests/test_index/test_store.py +192 -0
  43. greploom-0.1.0/tests/test_index/test_summarizer.py +309 -0
  44. greploom-0.1.0/tests/test_mcp/__init__.py +0 -0
  45. greploom-0.1.0/tests/test_mcp/test_server.py +45 -0
  46. greploom-0.1.0/tests/test_search/__init__.py +0 -0
  47. greploom-0.1.0/tests/test_search/test_budget.py +165 -0
  48. greploom-0.1.0/tests/test_search/test_expand.py +181 -0
  49. greploom-0.1.0/tests/test_search/test_hybrid.py +136 -0
@@ -0,0 +1,17 @@
1
+ # Create Release
2
+
3
+ Help me create a new release of greploom.
4
+
5
+ ## Steps
6
+
7
+ 1. Ask me for the version number (should follow semver: major.minor.patch)
8
+ 2. Ask me for a brief description of what changed in this release
9
+ 3. Update the changelog section in README.md with the new version entry (newest first)
10
+ 4. Run the release script: `./scripts/release.sh <version> "<description>"`
11
+
12
+ ## Notes
13
+
14
+ - The release script handles updating version.py, pyproject.toml, committing, tagging, and pushing
15
+ - GitHub Actions will automatically create a GitHub Release and publish to PyPI
16
+ - Make sure all tests pass before releasing: `pytest`
17
+ - Make sure linting passes: `ruff check src tests`
@@ -0,0 +1,115 @@
1
+ name: Release and Publish
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - 'v*.*.*'
7
+
8
+ permissions:
9
+ contents: write
10
+ id-token: write
11
+
12
+ jobs:
13
+ create-release:
14
+ name: Create GitHub Release
15
+ runs-on: ubuntu-latest
16
+
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+
20
+ - name: Wait for tests to pass
21
+ uses: lewagon/wait-on-check-action@v1.3.1
22
+ with:
23
+ ref: ${{ github.ref }}
24
+ check-regexp: 'Test on Python'
25
+ repo-token: ${{ secrets.GITHUB_TOKEN }}
26
+ wait-interval: 10
27
+
28
+ - name: Extract version from tag
29
+ id: version
30
+ run: |
31
+ VERSION=${GITHUB_REF#refs/tags/v}
32
+ echo "version=$VERSION" >> $GITHUB_OUTPUT
33
+ echo "tag=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
34
+
35
+ - name: Read version from version.py
36
+ id: read_version
37
+ run: |
38
+ VERSION_PY=$(grep -E '^__version__' src/greploom/version.py | cut -d'"' -f2)
39
+ echo "version_py=$VERSION_PY" >> $GITHUB_OUTPUT
40
+
41
+ - name: Verify version match
42
+ run: |
43
+ if [ "${{ steps.version.outputs.version }}" != "${{ steps.read_version.outputs.version_py }}" ]; then
44
+ echo "Error: Tag version (${{ steps.version.outputs.version }}) does not match version.py (${{ steps.read_version.outputs.version_py }})"
45
+ exit 1
46
+ fi
47
+
48
+ - name: Extract changelog for this version
49
+ id: changelog
50
+ run: |
51
+ VERSION="${{ steps.version.outputs.version }}"
52
+ CHANGELOG=$(awk "/### Version $VERSION/,/### Version [0-9]/ { if (/### Version [0-9]/ && !/### Version $VERSION/) exit; if (!/### Version $VERSION/) print }" README.md | sed '/^$/d')
53
+ if [ -z "$CHANGELOG" ]; then
54
+ CHANGELOG="Release version $VERSION"
55
+ fi
56
+ echo "$CHANGELOG" > /tmp/changelog.txt
57
+
58
+ - name: Create GitHub Release
59
+ env:
60
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
61
+ run: |
62
+ CHANGELOG=$(cat /tmp/changelog.txt)
63
+ gh release create "${{ steps.version.outputs.tag }}" \
64
+ --title "Release ${{ steps.version.outputs.tag }}" \
65
+ --notes "$CHANGELOG" \
66
+ --verify-tag
67
+
68
+ build:
69
+ name: Build distribution
70
+ needs: [create-release]
71
+ runs-on: ubuntu-latest
72
+
73
+ steps:
74
+ - uses: actions/checkout@v4
75
+
76
+ - name: Set up Python
77
+ uses: actions/setup-python@v5
78
+ with:
79
+ python-version: "3.11"
80
+
81
+ - name: Install build dependencies
82
+ run: |
83
+ python -m pip install --upgrade pip
84
+ pip install build
85
+
86
+ - name: Build package
87
+ run: python -m build
88
+
89
+ - name: Store distribution packages
90
+ uses: actions/upload-artifact@v4
91
+ with:
92
+ name: python-package-distributions
93
+ path: dist/
94
+
95
+ publish-to-pypi:
96
+ name: Publish to PyPI
97
+ needs: [build]
98
+ runs-on: ubuntu-latest
99
+
100
+ environment:
101
+ name: pypi
102
+ url: https://pypi.org/p/greploom
103
+
104
+ permissions:
105
+ id-token: write
106
+
107
+ steps:
108
+ - name: Download distribution packages
109
+ uses: actions/download-artifact@v4
110
+ with:
111
+ name: python-package-distributions
112
+ path: dist/
113
+
114
+ - name: Publish to PyPI
115
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,79 @@
1
+ name: Tests
2
+
3
+ on:
4
+ push:
5
+ branches: [ main ]
6
+ tags:
7
+ - 'v*.*.*'
8
+ pull_request:
9
+ branches: [ main ]
10
+
11
+ jobs:
12
+ test:
13
+ name: Test on Python ${{ matrix.python-version }}
14
+ runs-on: ubuntu-latest
15
+ strategy:
16
+ matrix:
17
+ python-version: ["3.10", "3.11", "3.12"]
18
+
19
+ steps:
20
+ - uses: actions/checkout@v4
21
+
22
+ - name: Set up Python ${{ matrix.python-version }}
23
+ uses: actions/setup-python@v5
24
+ with:
25
+ python-version: ${{ matrix.python-version }}
26
+
27
+ - name: Install dependencies
28
+ run: |
29
+ python -m pip install --upgrade pip
30
+ pip install -e .[dev,mcp]
31
+
32
+ - name: Run tests with pytest
33
+ run: |
34
+ pytest --cov=greploom --cov-report=xml --cov-report=term-missing
35
+
36
+ - name: Lint with ruff
37
+ run: |
38
+ ruff check src tests
39
+
40
+ build:
41
+ name: Build distribution
42
+ runs-on: ubuntu-latest
43
+ needs: [test]
44
+
45
+ steps:
46
+ - uses: actions/checkout@v4
47
+
48
+ - name: Set up Python
49
+ uses: actions/setup-python@v5
50
+ with:
51
+ python-version: "3.11"
52
+
53
+ - name: Install build dependencies
54
+ run: |
55
+ python -m pip install --upgrade pip
56
+ pip install build
57
+
58
+ - name: Build package
59
+ run: python -m build
60
+
61
+ - name: Check build with twine
62
+ run: |
63
+ pip install twine
64
+ twine check dist/*
65
+
66
+ status:
67
+ name: Tests Status
68
+ runs-on: ubuntu-latest
69
+ needs: [test, build]
70
+ if: always()
71
+
72
+ steps:
73
+ - name: Check test results
74
+ run: |
75
+ if [ "${{ needs.test.result }}" != "success" ] || [ "${{ needs.build.result }}" != "success" ]; then
76
+ echo "Tests or build failed"
77
+ exit 1
78
+ fi
79
+ echo "All tests passed successfully"
@@ -0,0 +1,23 @@
1
+ __pycache__/
2
+ *.pyc
3
+ *.pyo
4
+ *.egg-info/
5
+ dist/
6
+ build/
7
+ .eggs/
8
+ *.egg
9
+ .pytest_cache/
10
+ .mypy_cache/
11
+ .ruff_cache/
12
+ htmlcov/
13
+ .coverage
14
+ *.db
15
+ .greploom/
16
+ .DS_Store
17
+ *.swp
18
+ *~
19
+ venv/
20
+ .venv/
21
+
22
+ # Test output
23
+ /test-output/
@@ -0,0 +1,73 @@
1
+ # greploom -- Project Context
2
+
3
+ greploom is a semantic code search library with graph-aware context retrieval. It reads a treeloom Code Property Graph (JSON), indexes it for hybrid search (vector + BM25), and returns structurally-complete context neighborhoods suitable for LLM consumption.
4
+
5
+ See the [loom-research](https://github.com/rdwj/loom-research) repo for the full design document and architectural decisions.
6
+
7
+ ## Tech Stack
8
+
9
+ - Python 3.10+, package name `greploom`
10
+ - Search: sqlite-vec (vector), SQLite FTS5 (BM25), reciprocal rank fusion
11
+ - Graph: reads treeloom CPG JSON format, walks edges for context expansion
12
+ - Embeddings: nomic-embed-text via ollama (default), any OpenAI-compatible endpoint
13
+ - CLI: click
14
+ - Token counting: tiktoken
15
+ - Build: Hatchling
16
+ - Testing: pytest, 80%+ coverage target
17
+
18
+ ## Architecture
19
+
20
+ ```
21
+ greploom/
22
+ ├── src/greploom/
23
+ │ ├── index/ # Indexing pipeline: summarizer, embedder, storage
24
+ │ │ ├── summarizer.py # Generate text summaries from CPG nodes
25
+ │ │ ├── embedder.py # Embed summaries via ollama or API
26
+ │ │ └── store.py # SQLite storage (sqlite-vec + FTS5)
27
+ │ ├── search/ # Search engine: hybrid search, ranking, context assembly
28
+ │ │ ├── hybrid.py # BM25 + vector search with RRF
29
+ │ │ ├── expand.py # Graph walk to assemble context neighborhoods
30
+ │ │ └── budget.py # Token budget management
31
+ │ ├── cli/ # CLI commands
32
+ │ │ ├── __init__.py
33
+ │ │ ├── index_cmd.py
34
+ │ │ ├── query_cmd.py
35
+ │ │ └── serve_cmd.py
36
+ │ ├── mcp/ # MCP server mode
37
+ │ │ └── server.py
38
+ │ ├── version.py
39
+ │ └── __init__.py
40
+ ├── tests/
41
+ │ ├── fixtures/
42
+ │ ├── test_index/
43
+ │ ├── test_search/
44
+ │ └── test_cli/
45
+ ├── pyproject.toml
46
+ ├── CLAUDE.md
47
+ └── README.md
48
+ ```
49
+
50
+ ## Design Principles
51
+
52
+ 1. **treeloom is the graph, greploom is the search.** greploom reads treeloom's CPG JSON — it never parses source code or builds its own graph.
53
+ 2. **SQLite-only storage.** sqlite-vec + FTS5. No servers, no Docker, one file. Portable and inspectable.
54
+ 3. **Hybrid search.** Vector similarity for semantic queries ("where is authentication?") + BM25 for symbol queries ("find UserService"). Reciprocal rank fusion merges results.
55
+ 4. **Graph expansion at query time.** Search finds candidate nodes. CPG walk adds callers, callees, imports, data flow sources. The agent gets code in structural context.
56
+ 5. **Token budget management.** Returns exactly as much context as the LLM can use, ranked by structural relevance. Default: 8000 tokens.
57
+ 6. **Incremental re-indexing.** Track content hash per node. On re-index, only re-embed changed nodes.
58
+
59
+ ## Relationship to treeloom
60
+
61
+ greploom depends on treeloom but does not import treeloom at runtime for core search operations. The dependency is via the CPG JSON format:
62
+
63
+ - `greploom index` reads a treeloom CPG JSON file and builds its search index
64
+ - `greploom query` reads both the search index (SQLite) and the CPG JSON (for graph expansion)
65
+ - The CPG JSON format is documented in treeloom's CLAUDE.md
66
+
67
+ This means greploom can work with any tool that produces treeloom-compatible CPG JSON, not just treeloom itself.
68
+
69
+ ## Releasing
70
+
71
+ The version lives in two files that must stay in sync:
72
+ - `src/greploom/version.py` — `__version__ = "x.y.z"`
73
+ - `pyproject.toml` — `version = "x.y.z"`
greploom-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 rdwj
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,227 @@
1
+ Metadata-Version: 2.4
2
+ Name: greploom
3
+ Version: 0.1.0
4
+ Summary: Semantic code search with graph-aware context retrieval, built on treeloom
5
+ Author: rdwj
6
+ License-Expression: MIT
7
+ License-File: LICENSE
8
+ Classifier: Development Status :: 2 - Pre-Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Topic :: Software Development :: Libraries
16
+ Requires-Python: >=3.10
17
+ Requires-Dist: apsw>=3.45
18
+ Requires-Dist: click>=8.0
19
+ Requires-Dist: httpx>=0.25.0
20
+ Requires-Dist: sqlite-vec>=0.1.0
21
+ Requires-Dist: tiktoken>=0.5.0
22
+ Requires-Dist: treeloom>=0.3.0
23
+ Provides-Extra: dev
24
+ Requires-Dist: mypy>=1.10; extra == 'dev'
25
+ Requires-Dist: pytest-cov>=5.0; extra == 'dev'
26
+ Requires-Dist: pytest>=8.0; extra == 'dev'
27
+ Requires-Dist: ruff>=0.4; extra == 'dev'
28
+ Provides-Extra: mcp
29
+ Requires-Dist: fastmcp>=2; extra == 'mcp'
30
+ Provides-Extra: ollama
31
+ Requires-Dist: ollama>=0.3.0; extra == 'ollama'
32
+ Description-Content-Type: text/markdown
33
+
34
+ # greploom
35
+
36
+ Semantic code search with graph-aware context retrieval, built on [treeloom](https://github.com/rdwj/treeloom).
37
+
38
+ greploom reads a treeloom Code Property Graph (CPG JSON), indexes it for hybrid search (vector embeddings + BM25), and returns structurally-complete context neighborhoods for LLM consumption. Vector search finds the right neighborhood; graph traversal expands it to include callers, callees, imports, and data flow sources.
39
+
40
+ ## Installation
41
+
42
+ ```bash
43
+ pip install greploom # Core — CLI and search engine
44
+ pip install greploom[mcp] # Adds MCP server (requires fastmcp)
45
+ ```
46
+
47
+ The default embedding model is `nomic-embed-text` via a local [Ollama](https://ollama.com) instance. Any OpenAI-compatible embedding endpoint works via `GREPLOOM_EMBEDDING_URL`.
48
+
49
+ ## Quick Start
50
+
51
+ ```bash
52
+ # 1. Build a CPG with treeloom
53
+ treeloom build src/ -o cpg.json
54
+
55
+ # 2. Index for search (creates .greploom/index.db)
56
+ greploom index cpg.json
57
+
58
+ # 3. Search
59
+ greploom query "where is authentication handled?"
60
+ ```
61
+
62
+ ## How It Works
63
+
64
+ ```
65
+ Source code
66
+ |
67
+ v
68
+ treeloom build --> CPG (JSON)
69
+ |
70
+ v
71
+ greploom index --> vector store + BM25 index (SQLite)
72
+ |
73
+ v
74
+ greploom query "how is auth handled?" --> context bundle
75
+ |
76
+ v
77
+ LLM agent receives focused, graph-aware context
78
+ ```
79
+
80
+ Storage is a single SQLite file using sqlite-vec for vectors and FTS5 for BM25. No server required, no Docker, portable and inspectable.
81
+
82
+ ## CLI Reference
83
+
84
+ ### `greploom index`
85
+
86
+ Build or update the search index from a treeloom CPG JSON file.
87
+
88
+ ```
89
+ greploom index CPG_JSON [OPTIONS]
90
+
91
+ Arguments:
92
+ CPG_JSON Path to the treeloom CPG JSON file
93
+
94
+ Options:
95
+ --db PATH SQLite database path (default: .greploom/index.db)
96
+ --tier [fast|enhanced] Summary tier (default: enhanced)
97
+ --model TEXT Embedding model name
98
+ --ollama-url URL Ollama server URL
99
+ --force Re-index all nodes, ignoring content hashes
100
+ ```
101
+
102
+ Re-indexing is incremental by default — only nodes whose content has changed are re-embedded. Use `--force` to rebuild from scratch.
103
+
104
+ Summary tiers:
105
+ - `fast` — function signatures only; fastest to build
106
+ - `enhanced` — signatures, docstrings, and callees; better recall
107
+
108
+ ```bash
109
+ # Index with defaults
110
+ greploom index cpg.json
111
+
112
+ # Use a custom database path and force full re-index
113
+ greploom index cpg.json --db /tmp/myproject.db --force
114
+
115
+ # Point at a non-default Ollama instance
116
+ greploom index cpg.json --ollama-url http://gpu-box:11434
117
+ ```
118
+
119
+ ### `greploom query`
120
+
121
+ Search the index and return graph-aware context.
122
+
123
+ ```
124
+ greploom query QUERY_TEXT [OPTIONS]
125
+
126
+ Arguments:
127
+ QUERY_TEXT Natural language or symbol query
128
+
129
+ Options:
130
+ --db PATH SQLite database path (default: .greploom/index.db)
131
+ --cpg PATH CPG JSON path for graph expansion
132
+ --budget INT Token budget (default: 8192)
133
+ --top-k INT Number of search results (default: 5)
134
+ --format [context|json] Output format (default: context)
135
+ --model TEXT Embedding model name
136
+ --ollama-url URL Ollama server URL
137
+ ```
138
+
139
+ Without `--cpg`, the query returns ranked search hits with scores and summaries. With `--cpg`, hits are expanded through the graph and assembled into a context bundle trimmed to the token budget.
140
+
141
+ ```bash
142
+ # Simple search — ranked hits with summaries
143
+ greploom query "user authentication"
144
+
145
+ # Full graph-expanded context, ready for an LLM
146
+ greploom query "where is authentication handled?" --cpg cpg.json
147
+
148
+ # Pipe JSON output to jq
149
+ greploom query "UserService" --cpg cpg.json --format json | jq '.[].name'
150
+
151
+ # Narrow token budget for smaller context windows
152
+ greploom query "error handling" --cpg cpg.json --budget 4096
153
+ ```
154
+
155
+ ### `greploom serve`
156
+
157
+ Start the MCP server.
158
+
159
+ ```
160
+ greploom serve [OPTIONS]
161
+
162
+ Options:
163
+ --db PATH SQLite database path
164
+ --cpg PATH Default CPG JSON path
165
+ --host TEXT Host to bind (default: 0.0.0.0)
166
+ --port INT Port to listen on (default: 8901)
167
+ --transport [streamable-http|stdio] MCP transport (default: streamable-http)
168
+ ```
169
+
170
+ ```bash
171
+ # Start the MCP server on default port 8901
172
+ greploom serve --db .greploom/index.db --cpg cpg.json
173
+
174
+ # stdio transport for direct agent integration
175
+ greploom serve --transport stdio
176
+ ```
177
+
178
+ ## MCP Server
179
+
180
+ The MCP server exposes two tools:
181
+
182
+ **`search_code`** — Search code semantically and return graph-aware context.
183
+
184
+ Parameters: `query` (required), `cpg_path` (required), `db_path`, `budget`, `top_k`
185
+
186
+ **`index_code`** — Build or update the search index from a CPG JSON file.
187
+
188
+ Parameters: `cpg_path` (required), `db_path`, `tier`
189
+
190
+ Example MCP server URL for agent configuration: `http://localhost:8901/mcp`
191
+
192
+ ## Configuration
193
+
194
+ All settings can be provided via environment variables. CLI flags override environment variables for individual commands.
195
+
196
+ | Variable | Default | Description |
197
+ |---|---|---|
198
+ | `GREPLOOM_EMBEDDING_URL` | `http://localhost:11434` | Ollama or OpenAI-compatible endpoint |
199
+ | `GREPLOOM_EMBEDDING_MODEL` | `nomic-embed-text` | Embedding model name |
200
+ | `GREPLOOM_DB_PATH` | `.greploom/index.db` | SQLite database path |
201
+ | `GREPLOOM_TOKEN_BUDGET` | `8192` | Default token budget for context assembly |
202
+ | `GREPLOOM_SUMMARY_TIER` | `enhanced` | Summary tier (`fast` or `enhanced`) |
203
+
204
+ To use an OpenAI-compatible embedding API instead of Ollama:
205
+
206
+ ```bash
207
+ export GREPLOOM_EMBEDDING_URL=https://api.openai.com/v1
208
+ export GREPLOOM_EMBEDDING_MODEL=text-embedding-3-small
209
+ ```
210
+
211
+ ## Relationship to treeloom
212
+
213
+ greploom reads treeloom's CPG JSON format but does not import treeloom at runtime. `greploom index` reads the CPG JSON to build the search index; `greploom query` reads both the index and the CPG JSON for graph expansion. Any tool that produces treeloom-compatible CPG JSON will work.
214
+
215
+ ## Changelog
216
+
217
+ ### Version 0.1.0
218
+
219
+ Initial release — full indexing pipeline (summarize, embed, store), hybrid search with RRF, graph expansion for context neighborhoods, token budget management, CLI (index/query/serve), and MCP server with search_code/index_code tools.
220
+
221
+ ## LLM Documentation
222
+
223
+ This project provides [llms.txt](llms.txt) and [llms-full.txt](llms-full.txt) files following the [llmstxt.org](https://llmstxt.org/) specification for LLM-friendly documentation.
224
+
225
+ ## License
226
+
227
+ MIT