hedwig-cg 0.9.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hedwig_cg-0.9.0/.github/workflows/ci.yml +77 -0
- hedwig_cg-0.9.0/.github/workflows/release.yml +117 -0
- hedwig_cg-0.9.0/.gitignore +54 -0
- hedwig_cg-0.9.0/CHANGELOG.md +127 -0
- hedwig_cg-0.9.0/CONTRIBUTING.md +103 -0
- hedwig_cg-0.9.0/LICENSE +21 -0
- hedwig_cg-0.9.0/PKG-INFO +220 -0
- hedwig_cg-0.9.0/README.md +171 -0
- hedwig_cg-0.9.0/docs/README_de.md +124 -0
- hedwig_cg-0.9.0/docs/README_ja.md +124 -0
- hedwig_cg-0.9.0/docs/README_ko.md +124 -0
- hedwig_cg-0.9.0/docs/README_zh.md +124 -0
- hedwig_cg-0.9.0/hedwig_cg/__init__.py +5 -0
- hedwig_cg-0.9.0/hedwig_cg/__main__.py +5 -0
- hedwig_cg-0.9.0/hedwig_cg/cli/__init__.py +3 -0
- hedwig_cg-0.9.0/hedwig_cg/cli/d3.v7.min.js +2 -0
- hedwig_cg-0.9.0/hedwig_cg/cli/main.py +2004 -0
- hedwig_cg-0.9.0/hedwig_cg/cli/viz_template.html +140 -0
- hedwig_cg-0.9.0/hedwig_cg/core/__init__.py +3 -0
- hedwig_cg-0.9.0/hedwig_cg/core/analyze.py +134 -0
- hedwig_cg-0.9.0/hedwig_cg/core/build.py +245 -0
- hedwig_cg-0.9.0/hedwig_cg/core/cluster.py +245 -0
- hedwig_cg-0.9.0/hedwig_cg/core/detect.py +171 -0
- hedwig_cg-0.9.0/hedwig_cg/core/extract.py +473 -0
- hedwig_cg-0.9.0/hedwig_cg/core/lang_detect.py +94 -0
- hedwig_cg-0.9.0/hedwig_cg/core/pipeline.py +344 -0
- hedwig_cg-0.9.0/hedwig_cg/core/tags_extract.py +1015 -0
- hedwig_cg-0.9.0/hedwig_cg/core/ts_extract.py +646 -0
- hedwig_cg-0.9.0/hedwig_cg/mcp_server.py +355 -0
- hedwig_cg-0.9.0/hedwig_cg/py.typed +0 -0
- hedwig_cg-0.9.0/hedwig_cg/queries/c_sharp-tags.scm +17 -0
- hedwig_cg-0.9.0/hedwig_cg/queries/kotlin-tags.scm +17 -0
- hedwig_cg-0.9.0/hedwig_cg/queries/objc-tags.scm +23 -0
- hedwig_cg-0.9.0/hedwig_cg/query/__init__.py +3 -0
- hedwig_cg-0.9.0/hedwig_cg/query/embeddings.py +373 -0
- hedwig_cg-0.9.0/hedwig_cg/query/hybrid.py +476 -0
- hedwig_cg-0.9.0/hedwig_cg/scripts/auto_rebuild.sh +13 -0
- hedwig_cg-0.9.0/hedwig_cg/skill.md +103 -0
- hedwig_cg-0.9.0/hedwig_cg/storage/__init__.py +3 -0
- hedwig_cg-0.9.0/hedwig_cg/storage/store.py +616 -0
- hedwig_cg-0.9.0/hedwig_cg/utils/__init__.py +1 -0
- hedwig_cg-0.9.0/pyproject.toml +72 -0
- hedwig_cg-0.9.0/tests/__init__.py +0 -0
- hedwig_cg-0.9.0/tests/test_build.py +86 -0
- hedwig_cg-0.9.0/tests/test_cli.py +482 -0
- hedwig_cg-0.9.0/tests/test_community.py +135 -0
- hedwig_cg-0.9.0/tests/test_detect.py +57 -0
- hedwig_cg-0.9.0/tests/test_e2e_pipeline.py +267 -0
- hedwig_cg-0.9.0/tests/test_extract.py +144 -0
- hedwig_cg-0.9.0/tests/test_hybrid.py +62 -0
- hedwig_cg-0.9.0/tests/test_incremental.py +78 -0
- hedwig_cg-0.9.0/tests/test_lang_detect.py +104 -0
- hedwig_cg-0.9.0/tests/test_markdown_extract.py +97 -0
- hedwig_cg-0.9.0/tests/test_mcp_server.py +470 -0
- hedwig_cg-0.9.0/tests/test_pipeline.py +178 -0
- hedwig_cg-0.9.0/tests/test_store.py +103 -0
- hedwig_cg-0.9.0/tests/test_ts_extract_js.py +196 -0
- hedwig_cg-0.9.0/tests/test_ts_extract_typescript.py +259 -0
- hedwig_cg-0.9.0/tests/test_visualization.py +114 -0
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
paths:
|
|
7
|
+
- "hedwig_cg/**"
|
|
8
|
+
- "tests/**"
|
|
9
|
+
- "pyproject.toml"
|
|
10
|
+
- ".github/workflows/ci.yml"
|
|
11
|
+
pull_request:
|
|
12
|
+
branches: [main]
|
|
13
|
+
paths:
|
|
14
|
+
- "hedwig_cg/**"
|
|
15
|
+
- "tests/**"
|
|
16
|
+
- "pyproject.toml"
|
|
17
|
+
- ".github/workflows/ci.yml"
|
|
18
|
+
permissions:
|
|
19
|
+
contents: read
|
|
20
|
+
|
|
21
|
+
jobs:
|
|
22
|
+
test:
|
|
23
|
+
runs-on: ${{ matrix.os }}
|
|
24
|
+
strategy:
|
|
25
|
+
fail-fast: false
|
|
26
|
+
matrix:
|
|
27
|
+
os: [ubuntu-latest, macos-latest]
|
|
28
|
+
python-version: ["3.10", "3.11", "3.12"]
|
|
29
|
+
|
|
30
|
+
steps:
|
|
31
|
+
- uses: actions/checkout@v4
|
|
32
|
+
|
|
33
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
34
|
+
uses: actions/setup-python@v5
|
|
35
|
+
with:
|
|
36
|
+
python-version: ${{ matrix.python-version }}
|
|
37
|
+
cache: pip
|
|
38
|
+
|
|
39
|
+
- name: Install dependencies
|
|
40
|
+
run: |
|
|
41
|
+
python -m pip install --upgrade pip
|
|
42
|
+
pip install -e ".[dev]"
|
|
43
|
+
|
|
44
|
+
- name: Lint with ruff
|
|
45
|
+
run: ruff check hedwig_cg/
|
|
46
|
+
|
|
47
|
+
- name: Run tests
|
|
48
|
+
run: pytest --tb=short -q
|
|
49
|
+
|
|
50
|
+
build:
|
|
51
|
+
runs-on: ubuntu-latest
|
|
52
|
+
needs: test
|
|
53
|
+
steps:
|
|
54
|
+
- uses: actions/checkout@v4
|
|
55
|
+
|
|
56
|
+
- name: Set up Python
|
|
57
|
+
uses: actions/setup-python@v5
|
|
58
|
+
with:
|
|
59
|
+
python-version: "3.12"
|
|
60
|
+
cache: pip
|
|
61
|
+
|
|
62
|
+
- name: Build package
|
|
63
|
+
run: |
|
|
64
|
+
pip install build
|
|
65
|
+
python -m build
|
|
66
|
+
|
|
67
|
+
- name: Verify wheel contents
|
|
68
|
+
run: |
|
|
69
|
+
pip install dist/*.whl
|
|
70
|
+
hedwig-cg --version
|
|
71
|
+
|
|
72
|
+
- name: Upload build artifacts
|
|
73
|
+
uses: actions/upload-artifact@v4
|
|
74
|
+
with:
|
|
75
|
+
name: dist
|
|
76
|
+
path: dist/
|
|
77
|
+
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
workflow_dispatch:
|
|
5
|
+
inputs:
|
|
6
|
+
version:
|
|
7
|
+
description: "Release version (e.g. v0.1.0)"
|
|
8
|
+
required: true
|
|
9
|
+
type: string
|
|
10
|
+
|
|
11
|
+
permissions:
|
|
12
|
+
contents: write
|
|
13
|
+
|
|
14
|
+
jobs:
|
|
15
|
+
test:
|
|
16
|
+
runs-on: ubuntu-latest
|
|
17
|
+
steps:
|
|
18
|
+
- uses: actions/checkout@v4
|
|
19
|
+
|
|
20
|
+
- name: Set up Python
|
|
21
|
+
uses: actions/setup-python@v5
|
|
22
|
+
with:
|
|
23
|
+
python-version: "3.12"
|
|
24
|
+
cache: pip
|
|
25
|
+
|
|
26
|
+
- name: Install dependencies
|
|
27
|
+
run: |
|
|
28
|
+
python -m pip install --upgrade pip
|
|
29
|
+
pip install -e ".[dev]"
|
|
30
|
+
|
|
31
|
+
- name: Lint
|
|
32
|
+
run: ruff check hedwig_cg/
|
|
33
|
+
|
|
34
|
+
- name: Test
|
|
35
|
+
run: pytest --tb=short -q
|
|
36
|
+
|
|
37
|
+
release:
|
|
38
|
+
runs-on: ubuntu-latest
|
|
39
|
+
needs: test
|
|
40
|
+
environment: pypi
|
|
41
|
+
steps:
|
|
42
|
+
- uses: actions/checkout@v4
|
|
43
|
+
with:
|
|
44
|
+
fetch-depth: 0
|
|
45
|
+
|
|
46
|
+
- name: Set up Python
|
|
47
|
+
uses: actions/setup-python@v5
|
|
48
|
+
with:
|
|
49
|
+
python-version: "3.12"
|
|
50
|
+
cache: pip
|
|
51
|
+
|
|
52
|
+
- name: Validate version format
|
|
53
|
+
env:
|
|
54
|
+
VERSION: ${{ inputs.version }}
|
|
55
|
+
run: |
|
|
56
|
+
if [[ ! "$VERSION" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
|
|
57
|
+
echo "::error::Version must match vX.Y.Z format (e.g. v0.1.0)"
|
|
58
|
+
exit 1
|
|
59
|
+
fi
|
|
60
|
+
|
|
61
|
+
- name: Check tag does not already exist
|
|
62
|
+
env:
|
|
63
|
+
VERSION: ${{ inputs.version }}
|
|
64
|
+
run: |
|
|
65
|
+
if git rev-parse "$VERSION" >/dev/null 2>&1; then
|
|
66
|
+
echo "::error::Tag $VERSION already exists"
|
|
67
|
+
exit 1
|
|
68
|
+
fi
|
|
69
|
+
|
|
70
|
+
- name: Build package
|
|
71
|
+
run: |
|
|
72
|
+
pip install build
|
|
73
|
+
python -m build
|
|
74
|
+
|
|
75
|
+
- name: Verify wheel
|
|
76
|
+
run: |
|
|
77
|
+
pip install dist/*.whl
|
|
78
|
+
hedwig-cg --version
|
|
79
|
+
|
|
80
|
+
- name: Publish to PyPI
|
|
81
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
82
|
+
with:
|
|
83
|
+
password: ${{ secrets.PYPI_API_TOKEN }}
|
|
84
|
+
|
|
85
|
+
- name: Generate changelog
|
|
86
|
+
env:
|
|
87
|
+
VERSION: ${{ inputs.version }}
|
|
88
|
+
REPO: ${{ github.repository }}
|
|
89
|
+
run: |
|
|
90
|
+
PREV_TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "")
|
|
91
|
+
|
|
92
|
+
if [ -z "$PREV_TAG" ]; then
|
|
93
|
+
COMMITS=$(git log --pretty=format:"- %s (%h)" --no-merges)
|
|
94
|
+
COMPARE_BASE=$(git rev-list --max-parents=0 HEAD | head -1)
|
|
95
|
+
else
|
|
96
|
+
COMMITS=$(git log "${PREV_TAG}..HEAD" --pretty=format:"- %s (%h)" --no-merges)
|
|
97
|
+
COMPARE_BASE="$PREV_TAG"
|
|
98
|
+
fi
|
|
99
|
+
|
|
100
|
+
{
|
|
101
|
+
echo "## What's Changed"
|
|
102
|
+
echo ""
|
|
103
|
+
echo "$COMMITS"
|
|
104
|
+
echo ""
|
|
105
|
+
echo "**Full Changelog**: https://github.com/${REPO}/compare/${COMPARE_BASE}...${VERSION}"
|
|
106
|
+
} > release_notes.md
|
|
107
|
+
|
|
108
|
+
- name: Create GitHub Release
|
|
109
|
+
env:
|
|
110
|
+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
111
|
+
VERSION: ${{ inputs.version }}
|
|
112
|
+
SHA: ${{ github.sha }}
|
|
113
|
+
run: |
|
|
114
|
+
gh release create "$VERSION" dist/* \
|
|
115
|
+
--title "$VERSION" \
|
|
116
|
+
--notes-file release_notes.md \
|
|
117
|
+
--target "$SHA"
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.egg-info/
|
|
6
|
+
dist/
|
|
7
|
+
build/
|
|
8
|
+
*.egg
|
|
9
|
+
.eggs/
|
|
10
|
+
|
|
11
|
+
# Virtual environments
|
|
12
|
+
.venv/
|
|
13
|
+
venv/
|
|
14
|
+
env/
|
|
15
|
+
|
|
16
|
+
# IDE
|
|
17
|
+
.idea/
|
|
18
|
+
.vscode/
|
|
19
|
+
*.swp
|
|
20
|
+
*.swo
|
|
21
|
+
|
|
22
|
+
# OS
|
|
23
|
+
.DS_Store
|
|
24
|
+
Thumbs.db
|
|
25
|
+
|
|
26
|
+
# hedwig-cg databases (generated)
|
|
27
|
+
.hedwig-cg/
|
|
28
|
+
.hedwig-cb/
|
|
29
|
+
|
|
30
|
+
# Claude Code
|
|
31
|
+
.claude/
|
|
32
|
+
CLAUDE.md
|
|
33
|
+
|
|
34
|
+
# Testing
|
|
35
|
+
.pytest_cache/
|
|
36
|
+
.coverage
|
|
37
|
+
htmlcov/
|
|
38
|
+
.tox/
|
|
39
|
+
.mypy_cache/
|
|
40
|
+
|
|
41
|
+
# Internal review feedback (not for public repo)
|
|
42
|
+
feedbacks/
|
|
43
|
+
|
|
44
|
+
# Generated integration files (from hedwig-cg install commands)
|
|
45
|
+
.cursor/
|
|
46
|
+
.windsurf/
|
|
47
|
+
.codex/
|
|
48
|
+
AGENTS.md
|
|
49
|
+
GEMINI.md
|
|
50
|
+
CONVENTIONS.md
|
|
51
|
+
.aider.conf.yml
|
|
52
|
+
|
|
53
|
+
# OMC state
|
|
54
|
+
.omc/
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [0.2.0] - 2026-04-11
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- **Cursor IDE integration** (`hedwig-cg cursor install/uninstall`): Creates `.cursor/rules/hedwig-cg.mdc` with alwaysApply rules
|
|
12
|
+
- **Windsurf IDE integration** (`hedwig-cg windsurf install/uninstall`): Creates `.windsurf/rules/hedwig-cg.md` for Cascade
|
|
13
|
+
- **Cline (VS Code extension) integration** as 8th supported AI agent
|
|
14
|
+
- **`hedwig-cg doctor` command**: 21-point installation health check (Python version, deps, tree-sitter parsers, MCP, embedding models, DB integrity, FAISS indexes)
|
|
15
|
+
- **MCP tool descriptions optimized for AI agents**: `search` marked as PRIMARY tool, `communities` marked as "rarely needed", `instructions` guide agents to start with search
|
|
16
|
+
- **AI Agent Interface Design Principle** documented in CLAUDE.md: minimal interface philosophy to prevent hallucination
|
|
17
|
+
- **Weighted Reciprocal Rank Fusion**: Per-signal weights (code_vec=1.0, text_vec=1.0, graph=0.8, keyword=1.5, community=0.7) tuned for optimal search quality
|
|
18
|
+
- **Stopword filtering**: 80+ common English stopwords removed from keyword/community search terms for improved FTS5 precision
|
|
19
|
+
- **LRU search result cache** (128 entries): Instant return for repeated queries, auto-cleared on graph rebuild
|
|
20
|
+
- **Query embedding LRU cache** (256 entries): Eliminates re-encoding for identical queries (291ms → 0ms)
|
|
21
|
+
- `extract_search_terms()` public API for reusable stopword-filtered term extraction
|
|
22
|
+
- `clear_search_cache()` and `clear_query_cache()` public APIs
|
|
23
|
+
- `weights` parameter on `hybrid_search()` for runtime signal weight tuning
|
|
24
|
+
- **Weight-aware graph expansion**: BFS traversal now uses edge weights (semantic similarity + confidence + proximity) and relation-type weights (`calls`/`inherits`=1.0, `imports`=0.7, `defines`=0.5, `contains`=0.3) instead of uniform hop distance
|
|
25
|
+
- `RELATION_WEIGHTS` dictionary for configurable per-relation expansion priority
|
|
26
|
+
- **Parent class context in embeddings**: Method/constructor/property nodes now include "method of ClassName" in embedding text for better class-membership queries
|
|
27
|
+
- **Query-relevant snippets**: Search results now show the most query-term-dense region of source code instead of blind truncation from the start
|
|
28
|
+
- **MCP Server** (`hedwig-cg mcp`): Model Context Protocol server exposing 5 tools (search, node, stats, communities, build) over stdio transport for universal AI agent integration
|
|
29
|
+
- **Search signal explainability**: Each result now includes per-signal RRF contribution breakdown (code_vector, text_vector, graph, keyword, community) in CLI table and MCP output
|
|
30
|
+
- **JS/TS call graph extraction**: Tree-sitter now extracts function/method calls in JavaScript and TypeScript (previously only Python had call tracking), with JS builtin filtering
|
|
31
|
+
- **Pipeline stage timing**: Build command now displays per-stage wall-clock timing breakdown (detect, extract, build, pagerank, embed, cluster, analyze, store) with total elapsed time
|
|
32
|
+
- **Incremental embedding**: `--incremental` builds now skip re-embedding unchanged nodes by checking existing embeddings in DB, reducing rebuild time by up to 95% (8.7s → 0.4s when no files changed)
|
|
33
|
+
- **Fast search mode**: `--fast` flag uses text model only, skipping code model loading for lower cold-start latency; available in CLI, REPL, and MCP server
|
|
34
|
+
- **REPL model preloading**: `hedwig-cg query` REPL now preloads embedding models in a background thread so first search is faster
|
|
35
|
+
- **Python decorator extraction**: Decorators (`@dataclass`, `@cli.command()`, `@staticmethod`, etc.) are now extracted and stored as node attributes, enriching embeddings for decorator-aware search
|
|
36
|
+
- **Search result line numbers**: Results now include `start_line`/`end_line` in CLI (`file.py:42`), MCP server (`file.py:42-67`), and SearchResult API — enabling AI agents to navigate directly to code
|
|
37
|
+
|
|
38
|
+
### Changed
|
|
39
|
+
- README updated with real benchmarks (9.5s full build, 0.4s incremental, 0.08s warm search), new features (fast search, line numbers, decorator extraction, incremental embedding), and revised optimizations list
|
|
40
|
+
- FAISS index loading now uses `IO_FLAG_MMAP` for lower RSS and faster cold starts on large indices (with automatic fallback)
|
|
41
|
+
- Pipeline automatically clears search result and query embedding caches after rebuild
|
|
42
|
+
- RRF keyword weight boosted from 1.0 → 1.5 so exact-match code entities rank higher
|
|
43
|
+
- Graph expansion seeds increased from top-5 to top-8 for broader graph signal coverage
|
|
44
|
+
|
|
45
|
+
### Fixed
|
|
46
|
+
- **CI failure**: Added `mcp>=1.0` to dev dependencies and `pytest.importorskip("mcp")` guard for graceful skip
|
|
47
|
+
- **MCP stats tool**: Fixed `compute_god_nodes` (non-existent) → `analyze()` from analyze module returning `AnalysisResult.god_nodes`
|
|
48
|
+
- **Fast mode variable shadowing**: `code_vector_hits` was incorrectly overwritten with text-model results
|
|
49
|
+
|
|
50
|
+
### Performance
|
|
51
|
+
- Search performance improved ~46% (5.9s → 3.2s) via FAISS disk persistence and graph expansion caching
|
|
52
|
+
- Query embedding cache hit: 291ms → 0ms (3M+ speedup for repeated queries)
|
|
53
|
+
- FAISS mmap loading reduces memory footprint for large indices
|
|
54
|
+
- Warm search: 0.02s, cached search: 0.006s (986 nodes / 2091 edges)
|
|
55
|
+
|
|
56
|
+
## [0.1.2] - 2026-04-11
|
|
57
|
+
|
|
58
|
+
### Added
|
|
59
|
+
- **Chinese (简体中文) README** (`docs/README_zh.md`)
|
|
60
|
+
- **German (Deutsch) README** (`docs/README_de.md`)
|
|
61
|
+
- Cross-language navigation links across all 5 README variants (en, ko, ja, zh, de)
|
|
62
|
+
|
|
63
|
+
### Fixed
|
|
64
|
+
- Correct HybridRAG signal count from "6-signal" to "5-signal" across all documentation, code comments, and CLAUDE.md (actual RRF receives 5 ranked lists: code vector, text vector, graph, keyword, community)
|
|
65
|
+
- Clarify `hedwig-cg search` as the single primary HybridRAG entry point in skill rules and PreToolUse hook
|
|
66
|
+
|
|
67
|
+
## [Unreleased]
|
|
68
|
+
|
|
69
|
+
### Added
|
|
70
|
+
- **Community-aware HybridRAG**: 5-signal search (code vector + text vector + graph + keyword + community)
|
|
71
|
+
- **Community summaries**: Auto-generated keyword-rich text from node labels, kinds, docstrings, and file paths
|
|
72
|
+
- **`hedwig-cg communities` CLI command**: List, filter by level, and search communities
|
|
73
|
+
- **Markdown document extraction**: Headings become section nodes with hierarchy, internal links become reference edges
|
|
74
|
+
- **Incremental build** (`--incremental`): SHA-256 content hashing skips unchanged files for fast rebuilds
|
|
75
|
+
- **Embedding download UX**: Rich console message on first model download (~80MB)
|
|
76
|
+
- `community_search()` method in KnowledgeStore for summary-based community lookup
|
|
77
|
+
- **D3.js export format** (`--format d3`): Force-directed graph JSON with PageRank-based sizing and kind-based grouping
|
|
78
|
+
- **`hedwig-cg visualize` CLI command**: Self-contained interactive HTML visualization with zoom, search, tooltips, and drag
|
|
79
|
+
- **`hedwig-cg clean` CLI command**: Remove .hedwig-cg/ database directory with confirmation prompt
|
|
80
|
+
- **Graph quality metrics in `stats`**: Density, connected components, average clustering coefficient
|
|
81
|
+
- Comprehensive CLI command tests (communities, search, d3 export, visualize, clean)
|
|
82
|
+
- Comprehensive JavaScript tree-sitter extraction tests (17 tests)
|
|
83
|
+
- **`hedwig-cg query` REPL**: Interactive search session with `:node`, `:stats`, `:quit` commands
|
|
84
|
+
- **`--offline` flag for `visualize`**: Inlines D3.js (~280KB) for airgapped/offline environments
|
|
85
|
+
- **TypeScript-specific extraction**: Interfaces (with extends/method signatures), type aliases, enums with member extraction
|
|
86
|
+
- E2E integration tests for full pipeline (build → store → search → incremental → export → clean)
|
|
87
|
+
- TypeScript-specific tree-sitter extraction tests (12 tests)
|
|
88
|
+
- 160 tests with 87% code coverage (up from 61 tests)
|
|
89
|
+
- **PyPI classifiers expansion**: Python 3.10/3.11/3.12, AI/NLP topics, `Typing :: Typed`, OS Independent
|
|
90
|
+
- **GitHub Actions PyPI publish**: Automated deployment on GitHub Release via `pypa/gh-action-pypi-publish`
|
|
91
|
+
|
|
92
|
+
### Fixed
|
|
93
|
+
- **Critical**: `dependencies` in pyproject.toml was under `[project.urls]` TOML section, causing wheel to declare zero dependencies
|
|
94
|
+
- Resolved all 27 ruff lint errors (import sorting, unused variables, line length)
|
|
95
|
+
- Removed legacy ignore-file backward compatibility reference
|
|
96
|
+
- Removed stale `build_hnsw_index` backward-compat alias from store.py
|
|
97
|
+
- Fixed `try_to_load_from_cache` return value check in embeddings.py (operator precedence bug)
|
|
98
|
+
- **Critical**: Incremental build second run returned empty graph — fixed by merging unchanged files from DB via `nx.compose()`
|
|
99
|
+
|
|
100
|
+
### Changed
|
|
101
|
+
- Updated CLAUDE.md and Claude Code skill docs with new commands and features
|
|
102
|
+
- Updated CHANGELOG.md to reflect all iterations
|
|
103
|
+
|
|
104
|
+
## [0.1.0] - 2026-04-11
|
|
105
|
+
|
|
106
|
+
### Added
|
|
107
|
+
- Core pipeline: detect → extract → build → embed → cluster → analyze → store
|
|
108
|
+
- HybridRAG search engine combining vector similarity, graph traversal, and FTS5 keyword matching with RRF fusion
|
|
109
|
+
- Tree-sitter AST extraction for Python, JavaScript, TypeScript with regex fallback
|
|
110
|
+
- Hierarchical Leiden community detection at multiple resolutions (0.25, 0.5, 1.0, 2.0)
|
|
111
|
+
- Local embeddings via sentence-transformers (nomic-ai/nomic-embed-code)
|
|
112
|
+
- FAISS vector index for cosine similarity search
|
|
113
|
+
- SQLite + FTS5 full-text search with BM25 ranking
|
|
114
|
+
- CLI commands: `build`, `search`, `stats`, `node`, `export`
|
|
115
|
+
- Graph analysis: PageRank, god node detection, hub analysis, quality metrics
|
|
116
|
+
- File detection for 20+ programming languages
|
|
117
|
+
- `.hedwig-cg-ignore` for excluding files from analysis
|
|
118
|
+
- Privacy-first design: 100% local, no cloud services
|
|
119
|
+
- Claude Code skill documentation for AI tool integration
|
|
120
|
+
- Multi-language README (English, Korean, Japanese)
|
|
121
|
+
- GitHub Actions CI (Python 3.10-3.12, Ubuntu + macOS)
|
|
122
|
+
- CONTRIBUTING.md with development guide
|
|
123
|
+
|
|
124
|
+
[Unreleased]: https://github.com/hedwig-ai/hedwig-code-graph/compare/v0.2.0...HEAD
|
|
125
|
+
[0.2.0]: https://github.com/hedwig-ai/hedwig-code-graph/compare/v0.1.2...v0.2.0
|
|
126
|
+
[0.1.2]: https://github.com/hedwig-ai/hedwig-code-graph/compare/v0.1.0...v0.1.2
|
|
127
|
+
[0.1.0]: https://github.com/hedwig-ai/hedwig-code-graph/releases/tag/v0.1.0
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# Contributing to hedwig-cg
|
|
2
|
+
|
|
3
|
+
Thank you for your interest in contributing to hedwig-cg! This guide will help you get started.
|
|
4
|
+
|
|
5
|
+
## Development Setup
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
# Clone the repository
|
|
9
|
+
git clone https://github.com/hedwig-ai/hedwig-code-graph.git
|
|
10
|
+
cd hedwig-code-graph
|
|
11
|
+
|
|
12
|
+
# Create a virtual environment
|
|
13
|
+
python -m venv .venv
|
|
14
|
+
source .venv/bin/activate # Linux/macOS
|
|
15
|
+
# .venv\Scripts\activate # Windows
|
|
16
|
+
|
|
17
|
+
# Install in development mode with dev dependencies
|
|
18
|
+
pip install -e ".[dev]"
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Running Tests
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
# Run all tests with coverage
|
|
25
|
+
pytest
|
|
26
|
+
|
|
27
|
+
# Run a specific test file
|
|
28
|
+
pytest tests/test_store.py
|
|
29
|
+
|
|
30
|
+
# Run with verbose output
|
|
31
|
+
pytest -v
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Code Style
|
|
35
|
+
|
|
36
|
+
We use [Ruff](https://docs.astral.sh/ruff/) for linting and formatting:
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
# Check for issues
|
|
40
|
+
ruff check .
|
|
41
|
+
|
|
42
|
+
# Auto-fix issues
|
|
43
|
+
ruff check --fix .
|
|
44
|
+
|
|
45
|
+
# Format code
|
|
46
|
+
ruff format .
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
**Key conventions:**
|
|
50
|
+
- Line length: 100 characters
|
|
51
|
+
- Target Python: 3.10+
|
|
52
|
+
- Import sorting: isort-compatible (handled by Ruff)
|
|
53
|
+
|
|
54
|
+
## Project Structure
|
|
55
|
+
|
|
56
|
+
```
|
|
57
|
+
hedwig_cg/
|
|
58
|
+
├── cli/ # Click-based CLI interface
|
|
59
|
+
├── core/ # Pipeline stages (detect, extract, build, cluster, analyze)
|
|
60
|
+
├── query/ # Hybrid search engine (vector + graph + keyword + RRF)
|
|
61
|
+
└── storage/ # SQLite + FAISS storage layer
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Making Changes
|
|
65
|
+
|
|
66
|
+
1. **Fork** the repository and create a feature branch from `main`.
|
|
67
|
+
2. **Write tests** for any new functionality in `tests/`.
|
|
68
|
+
3. **Run the test suite** to ensure nothing is broken.
|
|
69
|
+
4. **Follow the existing code style** — Ruff will help enforce this.
|
|
70
|
+
5. **Keep commits focused** — one logical change per commit.
|
|
71
|
+
|
|
72
|
+
## Pull Request Guidelines
|
|
73
|
+
|
|
74
|
+
- Keep PRs focused on a single change.
|
|
75
|
+
- Include a clear description of what the PR does and why.
|
|
76
|
+
- Ensure all tests pass before submitting.
|
|
77
|
+
- Update documentation if you change public APIs or CLI commands.
|
|
78
|
+
|
|
79
|
+
## Architecture Notes
|
|
80
|
+
|
|
81
|
+
The pipeline follows a linear flow:
|
|
82
|
+
|
|
83
|
+
```
|
|
84
|
+
detect → extract → build → embed → cluster → analyze → store
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
- **detect**: Scans directories, classifies files by language.
|
|
88
|
+
- **extract**: Tree-sitter AST extraction with regex fallback.
|
|
89
|
+
- **build**: Assembles a NetworkX DiGraph with deduplication.
|
|
90
|
+
- **embed**: Generates sentence-transformer embeddings locally.
|
|
91
|
+
- **cluster**: Hierarchical Leiden community detection.
|
|
92
|
+
- **analyze**: Structural analysis (god nodes, hubs, quality metrics).
|
|
93
|
+
- **store**: SQLite + FTS5 + FAISS vector index, all in a single file.
|
|
94
|
+
|
|
95
|
+
## Reporting Issues
|
|
96
|
+
|
|
97
|
+
- Use [GitHub Issues](https://github.com/hedwig-ai/hedwig-code-graph/issues) for bug reports and feature requests.
|
|
98
|
+
- Include reproduction steps for bugs.
|
|
99
|
+
- Mention your Python version and OS.
|
|
100
|
+
|
|
101
|
+
## License
|
|
102
|
+
|
|
103
|
+
By contributing, you agree that your contributions will be licensed under the MIT License.
|
hedwig_cg-0.9.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Hedwig AI
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|