hedwig-cg 0.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. hedwig_cg-0.9.0/.github/workflows/ci.yml +77 -0
  2. hedwig_cg-0.9.0/.github/workflows/release.yml +117 -0
  3. hedwig_cg-0.9.0/.gitignore +54 -0
  4. hedwig_cg-0.9.0/CHANGELOG.md +127 -0
  5. hedwig_cg-0.9.0/CONTRIBUTING.md +103 -0
  6. hedwig_cg-0.9.0/LICENSE +21 -0
  7. hedwig_cg-0.9.0/PKG-INFO +220 -0
  8. hedwig_cg-0.9.0/README.md +171 -0
  9. hedwig_cg-0.9.0/docs/README_de.md +124 -0
  10. hedwig_cg-0.9.0/docs/README_ja.md +124 -0
  11. hedwig_cg-0.9.0/docs/README_ko.md +124 -0
  12. hedwig_cg-0.9.0/docs/README_zh.md +124 -0
  13. hedwig_cg-0.9.0/hedwig_cg/__init__.py +5 -0
  14. hedwig_cg-0.9.0/hedwig_cg/__main__.py +5 -0
  15. hedwig_cg-0.9.0/hedwig_cg/cli/__init__.py +3 -0
  16. hedwig_cg-0.9.0/hedwig_cg/cli/d3.v7.min.js +2 -0
  17. hedwig_cg-0.9.0/hedwig_cg/cli/main.py +2004 -0
  18. hedwig_cg-0.9.0/hedwig_cg/cli/viz_template.html +140 -0
  19. hedwig_cg-0.9.0/hedwig_cg/core/__init__.py +3 -0
  20. hedwig_cg-0.9.0/hedwig_cg/core/analyze.py +134 -0
  21. hedwig_cg-0.9.0/hedwig_cg/core/build.py +245 -0
  22. hedwig_cg-0.9.0/hedwig_cg/core/cluster.py +245 -0
  23. hedwig_cg-0.9.0/hedwig_cg/core/detect.py +171 -0
  24. hedwig_cg-0.9.0/hedwig_cg/core/extract.py +473 -0
  25. hedwig_cg-0.9.0/hedwig_cg/core/lang_detect.py +94 -0
  26. hedwig_cg-0.9.0/hedwig_cg/core/pipeline.py +344 -0
  27. hedwig_cg-0.9.0/hedwig_cg/core/tags_extract.py +1015 -0
  28. hedwig_cg-0.9.0/hedwig_cg/core/ts_extract.py +646 -0
  29. hedwig_cg-0.9.0/hedwig_cg/mcp_server.py +355 -0
  30. hedwig_cg-0.9.0/hedwig_cg/py.typed +0 -0
  31. hedwig_cg-0.9.0/hedwig_cg/queries/c_sharp-tags.scm +17 -0
  32. hedwig_cg-0.9.0/hedwig_cg/queries/kotlin-tags.scm +17 -0
  33. hedwig_cg-0.9.0/hedwig_cg/queries/objc-tags.scm +23 -0
  34. hedwig_cg-0.9.0/hedwig_cg/query/__init__.py +3 -0
  35. hedwig_cg-0.9.0/hedwig_cg/query/embeddings.py +373 -0
  36. hedwig_cg-0.9.0/hedwig_cg/query/hybrid.py +476 -0
  37. hedwig_cg-0.9.0/hedwig_cg/scripts/auto_rebuild.sh +13 -0
  38. hedwig_cg-0.9.0/hedwig_cg/skill.md +103 -0
  39. hedwig_cg-0.9.0/hedwig_cg/storage/__init__.py +3 -0
  40. hedwig_cg-0.9.0/hedwig_cg/storage/store.py +616 -0
  41. hedwig_cg-0.9.0/hedwig_cg/utils/__init__.py +1 -0
  42. hedwig_cg-0.9.0/pyproject.toml +72 -0
  43. hedwig_cg-0.9.0/tests/__init__.py +0 -0
  44. hedwig_cg-0.9.0/tests/test_build.py +86 -0
  45. hedwig_cg-0.9.0/tests/test_cli.py +482 -0
  46. hedwig_cg-0.9.0/tests/test_community.py +135 -0
  47. hedwig_cg-0.9.0/tests/test_detect.py +57 -0
  48. hedwig_cg-0.9.0/tests/test_e2e_pipeline.py +267 -0
  49. hedwig_cg-0.9.0/tests/test_extract.py +144 -0
  50. hedwig_cg-0.9.0/tests/test_hybrid.py +62 -0
  51. hedwig_cg-0.9.0/tests/test_incremental.py +78 -0
  52. hedwig_cg-0.9.0/tests/test_lang_detect.py +104 -0
  53. hedwig_cg-0.9.0/tests/test_markdown_extract.py +97 -0
  54. hedwig_cg-0.9.0/tests/test_mcp_server.py +470 -0
  55. hedwig_cg-0.9.0/tests/test_pipeline.py +178 -0
  56. hedwig_cg-0.9.0/tests/test_store.py +103 -0
  57. hedwig_cg-0.9.0/tests/test_ts_extract_js.py +196 -0
  58. hedwig_cg-0.9.0/tests/test_ts_extract_typescript.py +259 -0
  59. hedwig_cg-0.9.0/tests/test_visualization.py +114 -0
@@ -0,0 +1,77 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ paths:
7
+ - "hedwig_cg/**"
8
+ - "tests/**"
9
+ - "pyproject.toml"
10
+ - ".github/workflows/ci.yml"
11
+ pull_request:
12
+ branches: [main]
13
+ paths:
14
+ - "hedwig_cg/**"
15
+ - "tests/**"
16
+ - "pyproject.toml"
17
+ - ".github/workflows/ci.yml"
18
+ permissions:
19
+ contents: read
20
+
21
+ jobs:
22
+ test:
23
+ runs-on: ${{ matrix.os }}
24
+ strategy:
25
+ fail-fast: false
26
+ matrix:
27
+ os: [ubuntu-latest, macos-latest]
28
+ python-version: ["3.10", "3.11", "3.12"]
29
+
30
+ steps:
31
+ - uses: actions/checkout@v4
32
+
33
+ - name: Set up Python ${{ matrix.python-version }}
34
+ uses: actions/setup-python@v5
35
+ with:
36
+ python-version: ${{ matrix.python-version }}
37
+ cache: pip
38
+
39
+ - name: Install dependencies
40
+ run: |
41
+ python -m pip install --upgrade pip
42
+ pip install -e ".[dev]"
43
+
44
+ - name: Lint with ruff
45
+ run: ruff check hedwig_cg/
46
+
47
+ - name: Run tests
48
+ run: pytest --tb=short -q
49
+
50
+ build:
51
+ runs-on: ubuntu-latest
52
+ needs: test
53
+ steps:
54
+ - uses: actions/checkout@v4
55
+
56
+ - name: Set up Python
57
+ uses: actions/setup-python@v5
58
+ with:
59
+ python-version: "3.12"
60
+ cache: pip
61
+
62
+ - name: Build package
63
+ run: |
64
+ pip install build
65
+ python -m build
66
+
67
+ - name: Verify wheel contents
68
+ run: |
69
+ pip install dist/*.whl
70
+ hedwig-cg --version
71
+
72
+ - name: Upload build artifacts
73
+ uses: actions/upload-artifact@v4
74
+ with:
75
+ name: dist
76
+ path: dist/
77
+
@@ -0,0 +1,117 @@
1
+ name: Release
2
+
3
+ on:
4
+ workflow_dispatch:
5
+ inputs:
6
+ version:
7
+ description: "Release version (e.g. v0.1.0)"
8
+ required: true
9
+ type: string
10
+
11
+ permissions:
12
+ contents: write
13
+
14
+ jobs:
15
+ test:
16
+ runs-on: ubuntu-latest
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+
20
+ - name: Set up Python
21
+ uses: actions/setup-python@v5
22
+ with:
23
+ python-version: "3.12"
24
+ cache: pip
25
+
26
+ - name: Install dependencies
27
+ run: |
28
+ python -m pip install --upgrade pip
29
+ pip install -e ".[dev]"
30
+
31
+ - name: Lint
32
+ run: ruff check hedwig_cg/
33
+
34
+ - name: Test
35
+ run: pytest --tb=short -q
36
+
37
+ release:
38
+ runs-on: ubuntu-latest
39
+ needs: test
40
+ environment: pypi
41
+ steps:
42
+ - uses: actions/checkout@v4
43
+ with:
44
+ fetch-depth: 0
45
+
46
+ - name: Set up Python
47
+ uses: actions/setup-python@v5
48
+ with:
49
+ python-version: "3.12"
50
+ cache: pip
51
+
52
+ - name: Validate version format
53
+ env:
54
+ VERSION: ${{ inputs.version }}
55
+ run: |
56
+ if [[ ! "$VERSION" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
57
+ echo "::error::Version must match vX.Y.Z format (e.g. v0.1.0)"
58
+ exit 1
59
+ fi
60
+
61
+ - name: Check tag does not already exist
62
+ env:
63
+ VERSION: ${{ inputs.version }}
64
+ run: |
65
+ if git rev-parse "$VERSION" >/dev/null 2>&1; then
66
+ echo "::error::Tag $VERSION already exists"
67
+ exit 1
68
+ fi
69
+
70
+ - name: Build package
71
+ run: |
72
+ pip install build
73
+ python -m build
74
+
75
+ - name: Verify wheel
76
+ run: |
77
+ pip install dist/*.whl
78
+ hedwig-cg --version
79
+
80
+ - name: Publish to PyPI
81
+ uses: pypa/gh-action-pypi-publish@release/v1
82
+ with:
83
+ password: ${{ secrets.PYPI_API_TOKEN }}
84
+
85
+ - name: Generate changelog
86
+ env:
87
+ VERSION: ${{ inputs.version }}
88
+ REPO: ${{ github.repository }}
89
+ run: |
90
+ PREV_TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "")
91
+
92
+ if [ -z "$PREV_TAG" ]; then
93
+ COMMITS=$(git log --pretty=format:"- %s (%h)" --no-merges)
94
+ COMPARE_BASE=$(git rev-list --max-parents=0 HEAD | head -1)
95
+ else
96
+ COMMITS=$(git log "${PREV_TAG}..HEAD" --pretty=format:"- %s (%h)" --no-merges)
97
+ COMPARE_BASE="$PREV_TAG"
98
+ fi
99
+
100
+ {
101
+ echo "## What's Changed"
102
+ echo ""
103
+ echo "$COMMITS"
104
+ echo ""
105
+ echo "**Full Changelog**: https://github.com/${REPO}/compare/${COMPARE_BASE}...${VERSION}"
106
+ } > release_notes.md
107
+
108
+ - name: Create GitHub Release
109
+ env:
110
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
111
+ VERSION: ${{ inputs.version }}
112
+ SHA: ${{ github.sha }}
113
+ run: |
114
+ gh release create "$VERSION" dist/* \
115
+ --title "$VERSION" \
116
+ --notes-file release_notes.md \
117
+ --target "$SHA"
@@ -0,0 +1,54 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.egg-info/
6
+ dist/
7
+ build/
8
+ *.egg
9
+ .eggs/
10
+
11
+ # Virtual environments
12
+ .venv/
13
+ venv/
14
+ env/
15
+
16
+ # IDE
17
+ .idea/
18
+ .vscode/
19
+ *.swp
20
+ *.swo
21
+
22
+ # OS
23
+ .DS_Store
24
+ Thumbs.db
25
+
26
+ # hedwig-cg databases (generated)
27
+ .hedwig-cg/
28
+ .hedwig-cb/
29
+
30
+ # Claude Code
31
+ .claude/
32
+ CLAUDE.md
33
+
34
+ # Testing
35
+ .pytest_cache/
36
+ .coverage
37
+ htmlcov/
38
+ .tox/
39
+ .mypy_cache/
40
+
41
+ # Internal review feedback (not for public repo)
42
+ feedbacks/
43
+
44
+ # Generated integration files (from hedwig-cg install commands)
45
+ .cursor/
46
+ .windsurf/
47
+ .codex/
48
+ AGENTS.md
49
+ GEMINI.md
50
+ CONVENTIONS.md
51
+ .aider.conf.yml
52
+
53
+ # OMC state
54
+ .omc/
@@ -0,0 +1,127 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [0.2.0] - 2026-04-11
9
+
10
+ ### Added
11
+ - **Cursor IDE integration** (`hedwig-cg cursor install/uninstall`): Creates `.cursor/rules/hedwig-cg.mdc` with alwaysApply rules
12
+ - **Windsurf IDE integration** (`hedwig-cg windsurf install/uninstall`): Creates `.windsurf/rules/hedwig-cg.md` for Cascade
13
+ - **Cline (VS Code extension) integration** as 8th supported AI agent
14
+ - **`hedwig-cg doctor` command**: 21-point installation health check (Python version, deps, tree-sitter parsers, MCP, embedding models, DB integrity, FAISS indexes)
15
+ - **MCP tool descriptions optimized for AI agents**: `search` marked as PRIMARY tool, `communities` marked as "rarely needed", `instructions` guide agents to start with search
16
+ - **AI Agent Interface Design Principle** documented in CLAUDE.md: minimal interface philosophy to prevent hallucination
17
+ - **Weighted Reciprocal Rank Fusion**: Per-signal weights (code_vec=1.0, text_vec=1.0, graph=0.8, keyword=1.5, community=0.7) tuned for optimal search quality
18
+ - **Stopword filtering**: 80+ common English stopwords removed from keyword/community search terms for improved FTS5 precision
19
+ - **LRU search result cache** (128 entries): Instant return for repeated queries, auto-cleared on graph rebuild
20
+ - **Query embedding LRU cache** (256 entries): Eliminates re-encoding for identical queries (291ms → 0ms)
21
+ - `extract_search_terms()` public API for reusable stopword-filtered term extraction
22
+ - `clear_search_cache()` and `clear_query_cache()` public APIs
23
+ - `weights` parameter on `hybrid_search()` for runtime signal weight tuning
24
+ - **Weight-aware graph expansion**: BFS traversal now uses edge weights (semantic similarity + confidence + proximity) and relation-type weights (`calls`/`inherits`=1.0, `imports`=0.7, `defines`=0.5, `contains`=0.3) instead of uniform hop distance
25
+ - `RELATION_WEIGHTS` dictionary for configurable per-relation expansion priority
26
+ - **Parent class context in embeddings**: Method/constructor/property nodes now include "method of ClassName" in embedding text for better class-membership queries
27
+ - **Query-relevant snippets**: Search results now show the most query-term-dense region of source code instead of blind truncation from the start
28
+ - **MCP Server** (`hedwig-cg mcp`): Model Context Protocol server exposing 5 tools (search, node, stats, communities, build) over stdio transport for universal AI agent integration
29
+ - **Search signal explainability**: Each result now includes per-signal RRF contribution breakdown (code_vector, text_vector, graph, keyword, community) in CLI table and MCP output
30
+ - **JS/TS call graph extraction**: Tree-sitter now extracts function/method calls in JavaScript and TypeScript (previously only Python had call tracking), with JS builtin filtering
31
+ - **Pipeline stage timing**: Build command now displays per-stage wall-clock timing breakdown (detect, extract, build, pagerank, embed, cluster, analyze, store) with total elapsed time
32
+ - **Incremental embedding**: `--incremental` builds now skip re-embedding unchanged nodes by checking existing embeddings in DB, reducing rebuild time by up to 95% (8.7s → 0.4s when no files changed)
33
+ - **Fast search mode**: `--fast` flag uses text model only, skipping code model loading for lower cold-start latency; available in CLI, REPL, and MCP server
34
+ - **REPL model preloading**: `hedwig-cg query` REPL now preloads embedding models in a background thread so first search is faster
35
+ - **Python decorator extraction**: Decorators (`@dataclass`, `@cli.command()`, `@staticmethod`, etc.) are now extracted and stored as node attributes, enriching embeddings for decorator-aware search
36
+ - **Search result line numbers**: Results now include `start_line`/`end_line` in CLI (`file.py:42`), MCP server (`file.py:42-67`), and SearchResult API — enabling AI agents to navigate directly to code
37
+
38
+ ### Changed
39
+ - README updated with real benchmarks (9.5s full build, 0.4s incremental, 0.08s warm search), new features (fast search, line numbers, decorator extraction, incremental embedding), and revised optimizations list
40
+ - FAISS index loading now uses `IO_FLAG_MMAP` for lower RSS and faster cold starts on large indices (with automatic fallback)
41
+ - Pipeline automatically clears search result and query embedding caches after rebuild
42
+ - RRF keyword weight boosted from 1.0 → 1.5 so exact-match code entities rank higher
43
+ - Graph expansion seeds increased from top-5 to top-8 for broader graph signal coverage
44
+
45
+ ### Fixed
46
+ - **CI failure**: Added `mcp>=1.0` to dev dependencies and `pytest.importorskip("mcp")` guard for graceful skip
47
+ - **MCP stats tool**: Fixed `compute_god_nodes` (non-existent) → `analyze()` from analyze module returning `AnalysisResult.god_nodes`
48
+ - **Fast mode variable shadowing**: `code_vector_hits` was incorrectly overwritten with text-model results
49
+
50
+ ### Performance
51
+ - Search performance improved ~46% (5.9s → 3.2s) via FAISS disk persistence and graph expansion caching
52
+ - Query embedding cache hit: 291ms → 0ms (3M+ speedup for repeated queries)
53
+ - FAISS mmap loading reduces memory footprint for large indices
54
+ - Warm search: 0.02s, cached search: 0.006s (986 nodes / 2091 edges)
55
+
56
+ ## [0.1.2] - 2026-04-11
57
+
58
+ ### Added
59
+ - **Chinese (简体中文) README** (`docs/README_zh.md`)
60
+ - **German (Deutsch) README** (`docs/README_de.md`)
61
+ - Cross-language navigation links across all 5 README variants (en, ko, ja, zh, de)
62
+
63
+ ### Fixed
64
+ - Correct HybridRAG signal count from "6-signal" to "5-signal" across all documentation, code comments, and CLAUDE.md (actual RRF receives 5 ranked lists: code vector, text vector, graph, keyword, community)
65
+ - Clarify `hedwig-cg search` as the single primary HybridRAG entry point in skill rules and PreToolUse hook
66
+
67
+ ## [Unreleased]
68
+
69
+ ### Added
70
+ - **Community-aware HybridRAG**: 5-signal search (code vector + text vector + graph + keyword + community)
71
+ - **Community summaries**: Auto-generated keyword-rich text from node labels, kinds, docstrings, and file paths
72
+ - **`hedwig-cg communities` CLI command**: List, filter by level, and search communities
73
+ - **Markdown document extraction**: Headings become section nodes with hierarchy, internal links become reference edges
74
+ - **Incremental build** (`--incremental`): SHA-256 content hashing skips unchanged files for fast rebuilds
75
+ - **Embedding download UX**: Rich console message on first model download (~80MB)
76
+ - `community_search()` method in KnowledgeStore for summary-based community lookup
77
+ - **D3.js export format** (`--format d3`): Force-directed graph JSON with PageRank-based sizing and kind-based grouping
78
+ - **`hedwig-cg visualize` CLI command**: Self-contained interactive HTML visualization with zoom, search, tooltips, and drag
79
+ - **`hedwig-cg clean` CLI command**: Remove .hedwig-cg/ database directory with confirmation prompt
80
+ - **Graph quality metrics in `stats`**: Density, connected components, average clustering coefficient
81
+ - Comprehensive CLI command tests (communities, search, d3 export, visualize, clean)
82
+ - Comprehensive JavaScript tree-sitter extraction tests (17 tests)
83
+ - **`hedwig-cg query` REPL**: Interactive search session with `:node`, `:stats`, `:quit` commands
84
+ - **`--offline` flag for `visualize`**: Inlines D3.js (~280KB) for airgapped/offline environments
85
+ - **TypeScript-specific extraction**: Interfaces (with extends/method signatures), type aliases, enums with member extraction
86
+ - E2E integration tests for full pipeline (build → store → search → incremental → export → clean)
87
+ - TypeScript-specific tree-sitter extraction tests (12 tests)
88
+ - 160 tests with 87% code coverage (up from 61 tests)
89
+ - **PyPI classifiers expansion**: Python 3.10/3.11/3.12, AI/NLP topics, `Typing :: Typed`, OS Independent
90
+ - **GitHub Actions PyPI publish**: Automated deployment on GitHub Release via `pypa/gh-action-pypi-publish`
91
+
92
+ ### Fixed
93
+ - **Critical**: `dependencies` in pyproject.toml was under `[project.urls]` TOML section, causing wheel to declare zero dependencies
94
+ - Resolved all 27 ruff lint errors (import sorting, unused variables, line length)
95
+ - Removed legacy ignore-file backward compatibility reference
96
+ - Removed stale `build_hnsw_index` backward-compat alias from store.py
97
+ - Fixed `try_to_load_from_cache` return value check in embeddings.py (operator precedence bug)
98
+ - **Critical**: Incremental build second run returned empty graph — fixed by merging unchanged files from DB via `nx.compose()`
99
+
100
+ ### Changed
101
+ - Updated CLAUDE.md and Claude Code skill docs with new commands and features
102
+ - Updated CHANGELOG.md to reflect all iterations
103
+
104
+ ## [0.1.0] - 2026-04-11
105
+
106
+ ### Added
107
+ - Core pipeline: detect → extract → build → embed → cluster → analyze → store
108
+ - HybridRAG search engine combining vector similarity, graph traversal, and FTS5 keyword matching with RRF fusion
109
+ - Tree-sitter AST extraction for Python, JavaScript, TypeScript with regex fallback
110
+ - Hierarchical Leiden community detection at multiple resolutions (0.25, 0.5, 1.0, 2.0)
111
+ - Local embeddings via sentence-transformers (nomic-ai/nomic-embed-code)
112
+ - FAISS vector index for cosine similarity search
113
+ - SQLite + FTS5 full-text search with BM25 ranking
114
+ - CLI commands: `build`, `search`, `stats`, `node`, `export`
115
+ - Graph analysis: PageRank, god node detection, hub analysis, quality metrics
116
+ - File detection for 20+ programming languages
117
+ - `.hedwig-cg-ignore` for excluding files from analysis
118
+ - Privacy-first design: 100% local, no cloud services
119
+ - Claude Code skill documentation for AI tool integration
120
+ - Multi-language README (English, Korean, Japanese)
121
+ - GitHub Actions CI (Python 3.10-3.12, Ubuntu + macOS)
122
+ - CONTRIBUTING.md with development guide
123
+
124
+ [Unreleased]: https://github.com/hedwig-ai/hedwig-code-graph/compare/v0.2.0...HEAD
125
+ [0.2.0]: https://github.com/hedwig-ai/hedwig-code-graph/compare/v0.1.2...v0.2.0
126
+ [0.1.2]: https://github.com/hedwig-ai/hedwig-code-graph/compare/v0.1.0...v0.1.2
127
+ [0.1.0]: https://github.com/hedwig-ai/hedwig-code-graph/releases/tag/v0.1.0
@@ -0,0 +1,103 @@
1
+ # Contributing to hedwig-cg
2
+
3
+ Thank you for your interest in contributing to hedwig-cg! This guide will help you get started.
4
+
5
+ ## Development Setup
6
+
7
+ ```bash
8
+ # Clone the repository
9
+ git clone https://github.com/hedwig-ai/hedwig-code-graph.git
10
+ cd hedwig-code-graph
11
+
12
+ # Create a virtual environment
13
+ python -m venv .venv
14
+ source .venv/bin/activate # Linux/macOS
15
+ # .venv\Scripts\activate # Windows
16
+
17
+ # Install in development mode with dev dependencies
18
+ pip install -e ".[dev]"
19
+ ```
20
+
21
+ ## Running Tests
22
+
23
+ ```bash
24
+ # Run all tests with coverage
25
+ pytest
26
+
27
+ # Run a specific test file
28
+ pytest tests/test_store.py
29
+
30
+ # Run with verbose output
31
+ pytest -v
32
+ ```
33
+
34
+ ## Code Style
35
+
36
+ We use [Ruff](https://docs.astral.sh/ruff/) for linting and formatting:
37
+
38
+ ```bash
39
+ # Check for issues
40
+ ruff check .
41
+
42
+ # Auto-fix issues
43
+ ruff check --fix .
44
+
45
+ # Format code
46
+ ruff format .
47
+ ```
48
+
49
+ **Key conventions:**
50
+ - Line length: 100 characters
51
+ - Target Python: 3.10+
52
+ - Import sorting: isort-compatible (handled by Ruff)
53
+
54
+ ## Project Structure
55
+
56
+ ```
57
+ hedwig_cg/
58
+ ├── cli/ # Click-based CLI interface
59
+ ├── core/ # Pipeline stages (detect, extract, build, cluster, analyze)
60
+ ├── query/ # Hybrid search engine (vector + graph + keyword + RRF)
61
+ └── storage/ # SQLite + FAISS storage layer
62
+ ```
63
+
64
+ ## Making Changes
65
+
66
+ 1. **Fork** the repository and create a feature branch from `main`.
67
+ 2. **Write tests** for any new functionality in `tests/`.
68
+ 3. **Run the test suite** to ensure nothing is broken.
69
+ 4. **Follow the existing code style** — Ruff will help enforce this.
70
+ 5. **Keep commits focused** — one logical change per commit.
71
+
72
+ ## Pull Request Guidelines
73
+
74
+ - Keep PRs focused on a single change.
75
+ - Include a clear description of what the PR does and why.
76
+ - Ensure all tests pass before submitting.
77
+ - Update documentation if you change public APIs or CLI commands.
78
+
79
+ ## Architecture Notes
80
+
81
+ The pipeline follows a linear flow:
82
+
83
+ ```
84
+ detect → extract → build → embed → cluster → analyze → store
85
+ ```
86
+
87
+ - **detect**: Scans directories, classifies files by language.
88
+ - **extract**: Tree-sitter AST extraction with regex fallback.
89
+ - **build**: Assembles a NetworkX DiGraph with deduplication.
90
+ - **embed**: Generates sentence-transformer embeddings locally.
91
+ - **cluster**: Hierarchical Leiden community detection.
92
+ - **analyze**: Structural analysis (god nodes, hubs, quality metrics).
93
+ - **store**: SQLite + FTS5 + FAISS vector index, all in a single file.
94
+
95
+ ## Reporting Issues
96
+
97
+ - Use [GitHub Issues](https://github.com/hedwig-ai/hedwig-code-graph/issues) for bug reports and feature requests.
98
+ - Include reproduction steps for bugs.
99
+ - Mention your Python version and OS.
100
+
101
+ ## License
102
+
103
+ By contributing, you agree that your contributions will be licensed under the MIT License.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Hedwig AI
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.