sia-code 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. sia_code-0.2.1/PKG-INFO +182 -0
  2. sia_code-0.2.1/README.md +122 -0
  3. sia_code-0.2.1/pyproject.toml +116 -0
  4. sia_code-0.2.1/setup.cfg +4 -0
  5. sia_code-0.2.1/sia_code/__init__.py +7 -0
  6. sia_code-0.2.1/sia_code/cli.py +981 -0
  7. sia_code-0.2.1/sia_code/config.py +81 -0
  8. sia_code-0.2.1/sia_code/core/__init__.py +0 -0
  9. sia_code-0.2.1/sia_code/core/models.py +121 -0
  10. sia_code-0.2.1/sia_code/core/types.py +131 -0
  11. sia_code-0.2.1/sia_code/indexer/__init__.py +0 -0
  12. sia_code-0.2.1/sia_code/indexer/chunk_index.py +306 -0
  13. sia_code-0.2.1/sia_code/indexer/coordinator.py +644 -0
  14. sia_code-0.2.1/sia_code/indexer/embedder.py +0 -0
  15. sia_code-0.2.1/sia_code/indexer/hash_cache.py +194 -0
  16. sia_code-0.2.1/sia_code/indexer/metrics.py +68 -0
  17. sia_code-0.2.1/sia_code/parser/__init__.py +0 -0
  18. sia_code-0.2.1/sia_code/parser/chunker.py +221 -0
  19. sia_code-0.2.1/sia_code/parser/concepts.py +225 -0
  20. sia_code-0.2.1/sia_code/parser/engine.py +124 -0
  21. sia_code-0.2.1/sia_code/parser/languages/__init__.py +0 -0
  22. sia_code-0.2.1/sia_code/search/__init__.py +0 -0
  23. sia_code-0.2.1/sia_code/search/entity_extractor.py +222 -0
  24. sia_code-0.2.1/sia_code/search/multi_hop.py +196 -0
  25. sia_code-0.2.1/sia_code/search/service.py +0 -0
  26. sia_code-0.2.1/sia_code/search/single_hop.py +0 -0
  27. sia_code-0.2.1/sia_code/storage/__init__.py +0 -0
  28. sia_code-0.2.1/sia_code/storage/backend.py +297 -0
  29. sia_code-0.2.1/sia_code.egg-info/PKG-INFO +182 -0
  30. sia_code-0.2.1/sia_code.egg-info/SOURCES.txt +32 -0
  31. sia_code-0.2.1/sia_code.egg-info/dependency_links.txt +1 -0
  32. sia_code-0.2.1/sia_code.egg-info/entry_points.txt +2 -0
  33. sia_code-0.2.1/sia_code.egg-info/requires.txt +38 -0
  34. sia_code-0.2.1/sia_code.egg-info/top_level.txt +1 -0
@@ -0,0 +1,182 @@
1
+ Metadata-Version: 2.4
2
+ Name: sia-code
3
+ Version: 0.2.1
4
+ Summary: Local-first codebase intelligence with semantic search, multi-hop research, and 12-language AST support
5
+ Author: Sia Code Contributors
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/DxTa/sia-code
8
+ Project-URL: Documentation, https://github.com/DxTa/sia-code#readme
9
+ Project-URL: Repository, https://github.com/DxTa/sia-code
10
+ Project-URL: Issues, https://github.com/DxTa/sia-code/issues
11
+ Keywords: code-search,semantic-search,code-indexing,codebase-intelligence,tree-sitter,ast,code-analysis,developer-tools,cli
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Environment :: Console
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Topic :: Software Development
22
+ Classifier: Topic :: Software Development :: Documentation
23
+ Classifier: Topic :: Text Processing :: Indexing
24
+ Requires-Python: >=3.10
25
+ Description-Content-Type: text/markdown
26
+ Requires-Dist: memvid-sdk>=2.0
27
+ Requires-Dist: tree-sitter>=0.21.0
28
+ Requires-Dist: tree-sitter-python>=0.21.0
29
+ Requires-Dist: tree-sitter-javascript>=0.21.0
30
+ Requires-Dist: tree-sitter-typescript>=0.21.0
31
+ Requires-Dist: tree-sitter-go>=0.21.0
32
+ Requires-Dist: tree-sitter-rust>=0.21.0
33
+ Requires-Dist: tree-sitter-java>=0.21.0
34
+ Requires-Dist: tree-sitter-c>=0.21.0
35
+ Requires-Dist: tree-sitter-cpp>=0.21.0
36
+ Requires-Dist: tree-sitter-c-sharp>=0.21.0
37
+ Requires-Dist: tree-sitter-ruby>=0.21.0
38
+ Requires-Dist: tree-sitter-php>=0.21.0
39
+ Requires-Dist: click>=8.0
40
+ Requires-Dist: rich>=13.0
41
+ Requires-Dist: pathspec>=0.11
42
+ Requires-Dist: pydantic>=2.0
43
+ Requires-Dist: prompt-toolkit>=3.0
44
+ Requires-Dist: watchdog>=3.0
45
+ Provides-Extra: openai
46
+ Requires-Dist: openai>=1.0; extra == "openai"
47
+ Provides-Extra: pdf
48
+ Requires-Dist: pypdf>=3.0; extra == "pdf"
49
+ Provides-Extra: all
50
+ Requires-Dist: openai>=1.0; extra == "all"
51
+ Requires-Dist: pypdf>=3.0; extra == "all"
52
+ Provides-Extra: dev
53
+ Requires-Dist: pytest>=7.0; extra == "dev"
54
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
55
+ Requires-Dist: black>=23.0; extra == "dev"
56
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
57
+ Requires-Dist: build>=1.0; extra == "dev"
58
+ Requires-Dist: twine>=4.0; extra == "dev"
59
+ Requires-Dist: bump-my-version>=0.20; extra == "dev"
60
+
61
+ # Sia Code
62
+
63
+ **v0.2** - Local-first codebase search with semantic understanding and multi-hop code discovery.
64
+
65
+ ## Features
66
+
67
+ - **Semantic Search** - Natural language queries with OpenAI embeddings (auto-fallback to lexical)
68
+ - **Multi-Hop Research** - Automatically discover code relationships and call graphs
69
+ - **12 Languages** - Python, JS/TS, Go, Rust, Java, C/C++, C#, Ruby, PHP (full AST support)
70
+ - **Interactive Mode** - Live search with result navigation and export
71
+ - **Watch Mode** - Auto-reindex on file changes
72
+ - **Portable** - Single `.mv2` file storage, no database required
73
+
74
+ ## Installation
75
+
76
+ ```bash
77
+ # From PyPI (recommended)
78
+ pip install sia-code
79
+
80
+ # Or with uv
81
+ uv tool install sia-code
82
+
83
+ # Or from source
84
+ uv tool install git+https://github.com/DxTa/sia-code.git
85
+
86
+ # Verify installation
87
+ sia-code --version
88
+ ```
89
+
90
+ ## Quick Start
91
+
92
+ ```bash
93
+ # Initialize and index
94
+ sia-code init
95
+ sia-code index .
96
+
97
+ # Search
98
+ sia-code search "authentication logic" # Semantic search
99
+ sia-code search --regex "def.*login" # Regex search
100
+
101
+ # Multi-hop research (discover relationships)
102
+ sia-code research "how does the API handle errors?"
103
+
104
+ # Check index health
105
+ sia-code status
106
+ ```
107
+
108
+ ## Commands
109
+
110
+ | Command | Description |
111
+ |---------|-------------|
112
+ | `sia-code init` | Initialize index in current directory |
113
+ | `sia-code index .` | Index codebase (first time) |
114
+ | `sia-code index --update` | Re-index only changed files (10x faster) |
115
+ | `sia-code index --clean` | Full rebuild from scratch |
116
+ | `sia-code index --watch` | Auto-reindex on file changes |
117
+ | `sia-code search "query"` | Semantic or regex search |
118
+ | `sia-code research "question"` | Multi-hop code discovery with `--graph` |
119
+ | `sia-code interactive` | Live search mode with result navigation |
120
+ | `sia-code status` | Index health and staleness metrics |
121
+ | `sia-code compact` | Remove stale chunks when index grows |
122
+ | `sia-code config show` | View configuration |
123
+
124
+ ## Configuration
125
+
126
+ **Semantic search** requires OpenAI API key (optional):
127
+
128
+ ```bash
129
+ export OPENAI_API_KEY=sk-your-key-here
130
+ sia-code init
131
+ sia-code index .
132
+ ```
133
+
134
+ **Without API key:** Searches automatically fallback to lexical/regex mode. No crashes.
135
+
136
+ **Edit config** at `.sia-code/config.json` to:
137
+ - Change embedding model (`openai-small`, `openai-large`, `bge-small`)
138
+ - Exclude patterns (`node_modules/`, `__pycache__/`, etc.)
139
+ - Adjust chunk sizes
140
+
141
+ View config: `sia-code config show`
142
+
143
+ ## Output Formats
144
+
145
+ ```bash
146
+ sia-code search "query" --format json # JSON output
147
+ sia-code search "query" --format table # Rich table
148
+ sia-code search "query" --format csv # CSV for Excel
149
+ sia-code search "query" --output results.json # Save to file
150
+ ```
151
+
152
+ ## Supported Languages
153
+
154
+ **Full AST Support (12):** Python, JavaScript, TypeScript, JSX, TSX, Go, Rust, Java, C, C++, C#, Ruby, PHP
155
+
156
+ **Recognized:** Kotlin, Groovy, Swift, Bash, Vue, Svelte, and more (indexed as text)
157
+
158
+ ## Troubleshooting
159
+
160
+ | Issue | Solution |
161
+ |-------|----------|
162
+ | No API key warning | Normal - searches fallback to lexical mode |
163
+ | Index growing large | Run `sia-code compact` to remove stale chunks |
164
+ | Slow indexing | Use `sia-code index --update` for incremental |
165
+ | Stale search results | Run `sia-code index --clean` to rebuild |
166
+
167
+ ## How It Works
168
+
169
+ 1. **Parse** - Tree-sitter generates AST for each file
170
+ 2. **Chunk** - cAST algorithm creates semantic chunks (functions, classes)
171
+ 3. **Embed** - Optional OpenAI embeddings for semantic search
172
+ 4. **Store** - Single portable `.mv2` file with Memvid
173
+ 5. **Search** - Hybrid BM25 + vector similarity
174
+
175
+ ## Links
176
+
177
+ - [ROADMAP.md](ROADMAP.md) - Future development plans
178
+ - [KNOWN_LIMITATIONS.md](KNOWN_LIMITATIONS.md) - Current limitations and workarounds
179
+
180
+ ## License
181
+
182
+ MIT
@@ -0,0 +1,122 @@
1
+ # Sia Code
2
+
3
+ **v0.2** - Local-first codebase search with semantic understanding and multi-hop code discovery.
4
+
5
+ ## Features
6
+
7
+ - **Semantic Search** - Natural language queries with OpenAI embeddings (auto-fallback to lexical)
8
+ - **Multi-Hop Research** - Automatically discover code relationships and call graphs
9
+ - **12 Languages** - Python, JS/TS, Go, Rust, Java, C/C++, C#, Ruby, PHP (full AST support)
10
+ - **Interactive Mode** - Live search with result navigation and export
11
+ - **Watch Mode** - Auto-reindex on file changes
12
+ - **Portable** - Single `.mv2` file storage, no database required
13
+
14
+ ## Installation
15
+
16
+ ```bash
17
+ # From PyPI (recommended)
18
+ pip install sia-code
19
+
20
+ # Or with uv
21
+ uv tool install sia-code
22
+
23
+ # Or from source
24
+ uv tool install git+https://github.com/DxTa/sia-code.git
25
+
26
+ # Verify installation
27
+ sia-code --version
28
+ ```
29
+
30
+ ## Quick Start
31
+
32
+ ```bash
33
+ # Initialize and index
34
+ sia-code init
35
+ sia-code index .
36
+
37
+ # Search
38
+ sia-code search "authentication logic" # Semantic search
39
+ sia-code search --regex "def.*login" # Regex search
40
+
41
+ # Multi-hop research (discover relationships)
42
+ sia-code research "how does the API handle errors?"
43
+
44
+ # Check index health
45
+ sia-code status
46
+ ```
47
+
48
+ ## Commands
49
+
50
+ | Command | Description |
51
+ |---------|-------------|
52
+ | `sia-code init` | Initialize index in current directory |
53
+ | `sia-code index .` | Index codebase (first time) |
54
+ | `sia-code index --update` | Re-index only changed files (10x faster) |
55
+ | `sia-code index --clean` | Full rebuild from scratch |
56
+ | `sia-code index --watch` | Auto-reindex on file changes |
57
+ | `sia-code search "query"` | Semantic or regex search |
58
+ | `sia-code research "question"` | Multi-hop code discovery with `--graph` |
59
+ | `sia-code interactive` | Live search mode with result navigation |
60
+ | `sia-code status` | Index health and staleness metrics |
61
+ | `sia-code compact` | Remove stale chunks when index grows |
62
+ | `sia-code config show` | View configuration |
63
+
64
+ ## Configuration
65
+
66
+ **Semantic search** requires OpenAI API key (optional):
67
+
68
+ ```bash
69
+ export OPENAI_API_KEY=sk-your-key-here
70
+ sia-code init
71
+ sia-code index .
72
+ ```
73
+
74
+ **Without API key:** Searches automatically fallback to lexical/regex mode. No crashes.
75
+
76
+ **Edit config** at `.sia-code/config.json` to:
77
+ - Change embedding model (`openai-small`, `openai-large`, `bge-small`)
78
+ - Exclude patterns (`node_modules/`, `__pycache__/`, etc.)
79
+ - Adjust chunk sizes
80
+
81
+ View config: `sia-code config show`
82
+
83
+ ## Output Formats
84
+
85
+ ```bash
86
+ sia-code search "query" --format json # JSON output
87
+ sia-code search "query" --format table # Rich table
88
+ sia-code search "query" --format csv # CSV for Excel
89
+ sia-code search "query" --output results.json # Save to file
90
+ ```
91
+
92
+ ## Supported Languages
93
+
94
+ **Full AST Support (12):** Python, JavaScript, TypeScript, JSX, TSX, Go, Rust, Java, C, C++, C#, Ruby, PHP
95
+
96
+ **Recognized:** Kotlin, Groovy, Swift, Bash, Vue, Svelte, and more (indexed as text)
97
+
98
+ ## Troubleshooting
99
+
100
+ | Issue | Solution |
101
+ |-------|----------|
102
+ | No API key warning | Normal - searches fallback to lexical mode |
103
+ | Index growing large | Run `sia-code compact` to remove stale chunks |
104
+ | Slow indexing | Use `sia-code index --update` for incremental |
105
+ | Stale search results | Run `sia-code index --clean` to rebuild |
106
+
107
+ ## How It Works
108
+
109
+ 1. **Parse** - Tree-sitter generates AST for each file
110
+ 2. **Chunk** - cAST algorithm creates semantic chunks (functions, classes)
111
+ 3. **Embed** - Optional OpenAI embeddings for semantic search
112
+ 4. **Store** - Single portable `.mv2` file with Memvid
113
+ 5. **Search** - Hybrid BM25 + vector similarity
114
+
115
+ ## Links
116
+
117
+ - [ROADMAP.md](ROADMAP.md) - Future development plans
118
+ - [KNOWN_LIMITATIONS.md](KNOWN_LIMITATIONS.md) - Current limitations and workarounds
119
+
120
+ ## License
121
+
122
+ MIT
@@ -0,0 +1,116 @@
1
+ [project]
2
+ name = "sia-code"
3
+ version = "0.2.1"
4
+ description = "Local-first codebase intelligence with semantic search, multi-hop research, and 12-language AST support"
5
+ readme = "README.md"
6
+ requires-python = ">=3.10"
7
+ license = {text = "MIT"}
8
+ authors = [
9
+ {name = "Sia Code Contributors"}
10
+ ]
11
+ keywords = [
12
+ "code-search", "semantic-search", "code-indexing", "codebase-intelligence",
13
+ "tree-sitter", "ast", "code-analysis", "developer-tools", "cli"
14
+ ]
15
+ classifiers = [
16
+ "Development Status :: 4 - Beta",
17
+ "Environment :: Console",
18
+ "Intended Audience :: Developers",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Operating System :: OS Independent",
21
+ "Programming Language :: Python :: 3",
22
+ "Programming Language :: Python :: 3.10",
23
+ "Programming Language :: Python :: 3.11",
24
+ "Programming Language :: Python :: 3.12",
25
+ "Topic :: Software Development",
26
+ "Topic :: Software Development :: Documentation",
27
+ "Topic :: Text Processing :: Indexing",
28
+ ]
29
+ dependencies = [
30
+ "memvid-sdk>=2.0",
31
+ "tree-sitter>=0.21.0",
32
+ "tree-sitter-python>=0.21.0",
33
+ "tree-sitter-javascript>=0.21.0",
34
+ "tree-sitter-typescript>=0.21.0",
35
+ "tree-sitter-go>=0.21.0",
36
+ "tree-sitter-rust>=0.21.0",
37
+ "tree-sitter-java>=0.21.0",
38
+ "tree-sitter-c>=0.21.0",
39
+ "tree-sitter-cpp>=0.21.0",
40
+ "tree-sitter-c-sharp>=0.21.0",
41
+ "tree-sitter-ruby>=0.21.0",
42
+ "tree-sitter-php>=0.21.0",
43
+ "click>=8.0",
44
+ "rich>=13.0",
45
+ "pathspec>=0.11",
46
+ "pydantic>=2.0",
47
+ "prompt-toolkit>=3.0",
48
+ "watchdog>=3.0",
49
+ ]
50
+
51
+ [project.urls]
52
+ Homepage = "https://github.com/DxTa/sia-code"
53
+ Documentation = "https://github.com/DxTa/sia-code#readme"
54
+ Repository = "https://github.com/DxTa/sia-code"
55
+ Issues = "https://github.com/DxTa/sia-code/issues"
56
+
57
+ [project.optional-dependencies]
58
+ openai = ["openai>=1.0"]
59
+ pdf = ["pypdf>=3.0"]
60
+ all = [
61
+ "openai>=1.0",
62
+ "pypdf>=3.0",
63
+ ]
64
+ dev = [
65
+ "pytest>=7.0",
66
+ "pytest-cov>=4.0",
67
+ "black>=23.0",
68
+ "ruff>=0.1.0",
69
+ "build>=1.0",
70
+ "twine>=4.0",
71
+ "bump-my-version>=0.20",
72
+ ]
73
+
74
+ [project.scripts]
75
+ sia-code = "sia_code.cli:main"
76
+
77
+ [build-system]
78
+ requires = ["setuptools>=68", "wheel"]
79
+ build-backend = "setuptools.build_meta"
80
+
81
+ [tool.setuptools.packages.find]
82
+ where = ["."]
83
+ include = ["sia_code*"]
84
+
85
+ [tool.black]
86
+ line-length = 100
87
+ target-version = ['py310']
88
+
89
+ [tool.ruff]
90
+ line-length = 100
91
+ target-version = "py310"
92
+
93
+ [tool.pytest.ini_options]
94
+ testpaths = ["tests"]
95
+ python_files = "test_*.py"
96
+ python_classes = "Test*"
97
+ python_functions = "test_*"
98
+
99
+ [tool.bumpversion]
100
+ current_version = "0.2.1"
101
+ parse = "(?P<major>\\d+)\\.(?P<minor>\\d+)\\.(?P<patch>\\d+)"
102
+ serialize = ["{major}.{minor}.{patch}"]
103
+ commit = true
104
+ tag = true
105
+ tag_name = "v{new_version}"
106
+ message = "Bump version: {current_version} → {new_version}"
107
+
108
+ [[tool.bumpversion.files]]
109
+ filename = "pyproject.toml"
110
+ search = 'version = "{current_version}"'
111
+ replace = 'version = "{new_version}"'
112
+
113
+ [[tool.bumpversion.files]]
114
+ filename = "sia_code/__init__.py"
115
+ search = '__version__ = "{current_version}"'
116
+ replace = '__version__ = "{new_version}"'
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,7 @@
1
+ """Sia Code - Local-first codebase intelligence.
2
+
3
+ Semantic search, multi-hop research, and 12-language AST support.
4
+ """
5
+
6
+ __version__ = "0.2.1"
7
+ __all__ = ["__version__"]