coderay 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. coderay-1.0.0/PKG-INFO +145 -0
  2. coderay-1.0.0/README.md +107 -0
  3. coderay-1.0.0/pyproject.toml +76 -0
  4. coderay-1.0.0/setup.cfg +4 -0
  5. coderay-1.0.0/src/coderay/__init__.py +1 -0
  6. coderay-1.0.0/src/coderay/chunking/__init__.py +0 -0
  7. coderay-1.0.0/src/coderay/chunking/chunker.py +127 -0
  8. coderay-1.0.0/src/coderay/chunking/registry.py +190 -0
  9. coderay-1.0.0/src/coderay/cli/__init__.py +3 -0
  10. coderay-1.0.0/src/coderay/cli/commands.py +475 -0
  11. coderay-1.0.0/src/coderay/core/__init__.py +0 -0
  12. coderay-1.0.0/src/coderay/core/config.py +73 -0
  13. coderay-1.0.0/src/coderay/core/lock.py +36 -0
  14. coderay-1.0.0/src/coderay/core/models.py +71 -0
  15. coderay-1.0.0/src/coderay/core/timing.py +45 -0
  16. coderay-1.0.0/src/coderay/core/utils.py +35 -0
  17. coderay-1.0.0/src/coderay/embedding/__init__.py +0 -0
  18. coderay-1.0.0/src/coderay/embedding/base.py +60 -0
  19. coderay-1.0.0/src/coderay/embedding/local.py +68 -0
  20. coderay-1.0.0/src/coderay/embedding/openai.py +87 -0
  21. coderay-1.0.0/src/coderay/graph/__init__.py +19 -0
  22. coderay-1.0.0/src/coderay/graph/builder.py +128 -0
  23. coderay-1.0.0/src/coderay/graph/code_graph.py +311 -0
  24. coderay-1.0.0/src/coderay/graph/extractor.py +315 -0
  25. coderay-1.0.0/src/coderay/mcp_server/__init__.py +0 -0
  26. coderay-1.0.0/src/coderay/mcp_server/server.py +178 -0
  27. coderay-1.0.0/src/coderay/pipeline/__init__.py +0 -0
  28. coderay-1.0.0/src/coderay/pipeline/indexer.py +417 -0
  29. coderay-1.0.0/src/coderay/pipeline/watcher.py +318 -0
  30. coderay-1.0.0/src/coderay/retrieval/__init__.py +3 -0
  31. coderay-1.0.0/src/coderay/retrieval/boosting.py +80 -0
  32. coderay-1.0.0/src/coderay/retrieval/search.py +121 -0
  33. coderay-1.0.0/src/coderay/skeleton/__init__.py +0 -0
  34. coderay-1.0.0/src/coderay/skeleton/extractor.py +140 -0
  35. coderay-1.0.0/src/coderay/state/__init__.py +8 -0
  36. coderay-1.0.0/src/coderay/state/machine.py +242 -0
  37. coderay-1.0.0/src/coderay/state/version.py +47 -0
  38. coderay-1.0.0/src/coderay/storage/__init__.py +0 -0
  39. coderay-1.0.0/src/coderay/storage/lancedb.py +268 -0
  40. coderay-1.0.0/src/coderay/vcs/__init__.py +0 -0
  41. coderay-1.0.0/src/coderay/vcs/git.py +193 -0
  42. coderay-1.0.0/src/coderay.egg-info/PKG-INFO +145 -0
  43. coderay-1.0.0/src/coderay.egg-info/SOURCES.txt +45 -0
  44. coderay-1.0.0/src/coderay.egg-info/dependency_links.txt +1 -0
  45. coderay-1.0.0/src/coderay.egg-info/entry_points.txt +3 -0
  46. coderay-1.0.0/src/coderay.egg-info/requires.txt +37 -0
  47. coderay-1.0.0/src/coderay.egg-info/top_level.txt +1 -0
coderay-1.0.0/PKG-INFO ADDED
@@ -0,0 +1,145 @@
1
+ Metadata-Version: 2.4
2
+ Name: coderay
3
+ Version: 1.0.0
4
+ Summary: X-ray your codebase — semantic search, code graphs, file skeletons, and MCP server
5
+ Requires-Python: >=3.10
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: python-dotenv>=1.0.0
8
+ Requires-Dist: networkx>=3.0
9
+ Requires-Dist: tree-sitter>=0.24.0
10
+ Requires-Dist: tree-sitter-python>=0.25.0
11
+ Requires-Dist: lancedb>=0.5.0
12
+ Requires-Dist: pyyaml>=6.0
13
+ Requires-Dist: click>=8.0
14
+ Requires-Dist: filelock>=3.0
15
+ Requires-Dist: fastembed>=0.4.0
16
+ Requires-Dist: watchdog>=4.0.0
17
+ Requires-Dist: pathspec>=0.12.0
18
+ Provides-Extra: openai
19
+ Requires-Dist: openai>=1.0.0; extra == "openai"
20
+ Provides-Extra: languages
21
+ Requires-Dist: tree-sitter-javascript>=0.23.0; extra == "languages"
22
+ Requires-Dist: tree-sitter-typescript>=0.23.0; extra == "languages"
23
+ Requires-Dist: tree-sitter-go>=0.23.0; extra == "languages"
24
+ Provides-Extra: mcp
25
+ Requires-Dist: mcp>=1.0.0; extra == "mcp"
26
+ Provides-Extra: dev
27
+ Requires-Dist: pytest>=7.0; extra == "dev"
28
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
29
+ Requires-Dist: ruff>=0.8.0; extra == "dev"
30
+ Requires-Dist: mypy>=1.0.0; extra == "dev"
31
+ Requires-Dist: openai>=1.0.0; extra == "dev"
32
+ Requires-Dist: httpx>=0.27.0; extra == "dev"
33
+ Requires-Dist: mcp>=1.0.0; extra == "dev"
34
+ Provides-Extra: maintain
35
+ Requires-Dist: pylance>=0.15.0; extra == "maintain"
36
+ Provides-Extra: all
37
+ Requires-Dist: coderay[dev,languages,maintain,mcp,openai]; extra == "all"
38
+
39
+ # CodeRay
40
+
41
+ A local, offline-first semantic code indexer. Builds a vector index,
42
+ call/import graph, and file skeletons — exposed as an MCP server for
43
+ AI coding assistants and a standalone CLI.
44
+
45
+ ## What you get
46
+
47
+ | Capability | What it does | Why it matters | AI assistant benefit |
48
+ |---|---|---|---|
49
+ | **Semantic search** | Find code by meaning, not keywords. "where do we handle auth errors" returns results even if the code never uses that phrase. | Grep finds text. This finds *intent*. | Better context retrieval for plan and edit modes |
50
+ | **Blast radius** (`get_impact_radius`) | Given a function or module, show every node reachable within N hops via calls, imports, and inheritance. | Before changing `UserService.save()`, see exactly what breaks. | Safer refactors — agent sees downstream impact before editing |
51
+ | **File skeleton** (`get_file_skeleton`) | Signatures, docstrings, imports — no function bodies. The API surface of a file at a glance. | Understand a 500-line file in 30 lines without reading the implementation. | Drastically fewer tokens than reading the full file |
52
+ | **Index status** | Chunk count, schema version, branch, last commit, store health. | Confirm the index is fresh before relying on results. | Agent self-checks before trusting search results |
53
+
54
+ ## Install
55
+
56
+ ```bash
57
+ pip install "coderay[all] @ git+https://github.com/bogdan-copocean/coderay.git"
58
+ ```
59
+
60
+ For development:
61
+
62
+ ```bash
63
+ git clone https://github.com/bogdan-copocean/coderay.git
64
+ cd coderay
65
+ pip install -e ".[all]"
66
+ ```
67
+
68
+ ## Quick start
69
+
70
+ ```bash
71
+ cd /path/to/your/project
72
+ coderay build --repo .
73
+ coderay search "how does authentication work"
74
+ coderay watch --repo .
75
+ coderay graph --kind calls
76
+ coderay skeleton src/app/main.py
77
+ ```
78
+
79
+ ## MCP server (Claude Code / Cursor)
80
+
81
+ Add to `~/.claude/claude_code_config.json` or Cursor MCP settings:
82
+
83
+ ```json
84
+ {
85
+ "mcpServers": {
86
+ "coderay": {
87
+ "command": "/path/to/your/.venv/bin/coderay-mcp",
88
+ "args": []
89
+ }
90
+ }
91
+ }
92
+ ```
93
+
94
+ ## CLI reference
95
+
96
+ | Command | Description |
97
+ |---|---|
98
+ | `coderay build [--full] --repo .` | Build index (incremental or full rebuild) |
99
+ | `coderay update --repo .` | Incremental update (changed files only) |
100
+ | `coderay watch --repo . [--debounce N]` | Watch for file changes, re-index automatically |
101
+ | `coderay search "query" [--top-k N]` | Semantic search |
102
+ | `coderay list [--by-file]` | List indexed chunks |
103
+ | `coderay status` | Index state, branch, commit, chunk count |
104
+ | `coderay maintain --repo .` | Compact index, reclaim space |
105
+ | `coderay skeleton FILE` | Print file skeleton |
106
+ | `coderay graph --kind calls\|imports` | List graph edges |
107
+
108
+ ## Configuration
109
+
110
+ Optional `config.yaml` in the index directory:
111
+
112
+ ```yaml
113
+ embedder:
114
+ provider: local # local | openai
115
+ model: all-MiniLM-L6-v2
116
+ dimensions: 384
117
+
118
+ search:
119
+ boost_rules:
120
+ "tests/": 0.5
121
+ "src/core/": 1.2
122
+
123
+ graph:
124
+ exclude_callees:
125
+ - "our_sdk_helper"
126
+ include_callees:
127
+ - "isinstance"
128
+
129
+ watch:
130
+ debounce_seconds: 2
131
+ branch_switch_threshold: 50
132
+ exclude_patterns:
133
+ - "*.log"
134
+ ```
135
+
136
+ ## Development
137
+
138
+ ```bash
139
+ pip install -e ".[dev]"
140
+ make test
141
+ make lint
142
+ make format
143
+ ```
144
+
145
+ Requires Python >= 3.10 and Git.
@@ -0,0 +1,107 @@
1
+ # CodeRay
2
+
3
+ A local, offline-first semantic code indexer. Builds a vector index,
4
+ call/import graph, and file skeletons — exposed as an MCP server for
5
+ AI coding assistants and a standalone CLI.
6
+
7
+ ## What you get
8
+
9
+ | Capability | What it does | Why it matters | AI assistant benefit |
10
+ |---|---|---|---|
11
+ | **Semantic search** | Find code by meaning, not keywords. "where do we handle auth errors" returns results even if the code never uses that phrase. | Grep finds text. This finds *intent*. | Better context retrieval for plan and edit modes |
12
+ | **Blast radius** (`get_impact_radius`) | Given a function or module, show every node reachable within N hops via calls, imports, and inheritance. | Before changing `UserService.save()`, see exactly what breaks. | Safer refactors — agent sees downstream impact before editing |
13
+ | **File skeleton** (`get_file_skeleton`) | Signatures, docstrings, imports — no function bodies. The API surface of a file at a glance. | Understand a 500-line file in 30 lines without reading the implementation. | Drastically fewer tokens than reading the full file |
14
+ | **Index status** | Chunk count, schema version, branch, last commit, store health. | Confirm the index is fresh before relying on results. | Agent self-checks before trusting search results |
15
+
16
+ ## Install
17
+
18
+ ```bash
19
+ pip install "coderay[all] @ git+https://github.com/bogdan-copocean/coderay.git"
20
+ ```
21
+
22
+ For development:
23
+
24
+ ```bash
25
+ git clone https://github.com/bogdan-copocean/coderay.git
26
+ cd coderay
27
+ pip install -e ".[all]"
28
+ ```
29
+
30
+ ## Quick start
31
+
32
+ ```bash
33
+ cd /path/to/your/project
34
+ coderay build --repo .
35
+ coderay search "how does authentication work"
36
+ coderay watch --repo .
37
+ coderay graph --kind calls
38
+ coderay skeleton src/app/main.py
39
+ ```
40
+
41
+ ## MCP server (Claude Code / Cursor)
42
+
43
+ Add to `~/.claude/claude_code_config.json` or Cursor MCP settings:
44
+
45
+ ```json
46
+ {
47
+ "mcpServers": {
48
+ "coderay": {
49
+ "command": "/path/to/your/.venv/bin/coderay-mcp",
50
+ "args": []
51
+ }
52
+ }
53
+ }
54
+ ```
55
+
56
+ ## CLI reference
57
+
58
+ | Command | Description |
59
+ |---|---|
60
+ | `coderay build [--full] --repo .` | Build index (incremental or full rebuild) |
61
+ | `coderay update --repo .` | Incremental update (changed files only) |
62
+ | `coderay watch --repo . [--debounce N]` | Watch for file changes, re-index automatically |
63
+ | `coderay search "query" [--top-k N]` | Semantic search |
64
+ | `coderay list [--by-file]` | List indexed chunks |
65
+ | `coderay status` | Index state, branch, commit, chunk count |
66
+ | `coderay maintain --repo .` | Compact index, reclaim space |
67
+ | `coderay skeleton FILE` | Print file skeleton |
68
+ | `coderay graph --kind calls\|imports` | List graph edges |
69
+
70
+ ## Configuration
71
+
72
+ Optional `config.yaml` in the index directory:
73
+
74
+ ```yaml
75
+ embedder:
76
+ provider: local # local | openai
77
+ model: all-MiniLM-L6-v2
78
+ dimensions: 384
79
+
80
+ search:
81
+ boost_rules:
82
+ "tests/": 0.5
83
+ "src/core/": 1.2
84
+
85
+ graph:
86
+ exclude_callees:
87
+ - "our_sdk_helper"
88
+ include_callees:
89
+ - "isinstance"
90
+
91
+ watch:
92
+ debounce_seconds: 2
93
+ branch_switch_threshold: 50
94
+ exclude_patterns:
95
+ - "*.log"
96
+ ```
97
+
98
+ ## Development
99
+
100
+ ```bash
101
+ pip install -e ".[dev]"
102
+ make test
103
+ make lint
104
+ make format
105
+ ```
106
+
107
+ Requires Python >= 3.10 and Git.
@@ -0,0 +1,76 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "coderay"
7
+ version = "1.0.0"
8
+ description = "X-ray your codebase — semantic search, code graphs, file skeletons, and MCP server"
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ dependencies = [
12
+ "python-dotenv>=1.0.0",
13
+ "networkx>=3.0",
14
+ "tree-sitter>=0.24.0",
15
+ "tree-sitter-python>=0.25.0",
16
+ "lancedb>=0.5.0",
17
+ "pyyaml>=6.0",
18
+ "click>=8.0",
19
+ "filelock>=3.0",
20
+ "fastembed>=0.4.0",
21
+ "watchdog>=4.0.0",
22
+ "pathspec>=0.12.0",
23
+ ]
24
+
25
+ [project.optional-dependencies]
26
+ openai = ["openai>=1.0.0"]
27
+ languages = [
28
+ "tree-sitter-javascript>=0.23.0",
29
+ "tree-sitter-typescript>=0.23.0",
30
+ "tree-sitter-go>=0.23.0",
31
+ ]
32
+ mcp = ["mcp>=1.0.0"]
33
+ dev = [
34
+ "pytest>=7.0",
35
+ "pytest-cov>=4.0",
36
+ "ruff>=0.8.0",
37
+ "mypy>=1.0.0",
38
+ "openai>=1.0.0",
39
+ "httpx>=0.27.0",
40
+ "mcp>=1.0.0",
41
+ ]
42
+ maintain = [
43
+ "pylance>=0.15.0",
44
+ ]
45
+ all = [
46
+ "coderay[openai,languages,mcp,dev,maintain]",
47
+ ]
48
+
49
+ [project.scripts]
50
+ coderay = "coderay.cli.commands:main"
51
+ coderay-mcp = "coderay.mcp_server.server:main"
52
+
53
+ [tool.setuptools.packages.find]
54
+ where = ["src"]
55
+
56
+ [tool.ruff]
57
+ target-version = "py310"
58
+ line-length = 88
59
+ src = ["src", "tests"]
60
+
61
+ [tool.ruff.lint]
62
+ select = ["E", "F", "I", "N", "W", "UP"]
63
+ ignore = []
64
+
65
+ [tool.ruff.lint.isort]
66
+ known-first-party = ["coderay"]
67
+
68
+ [tool.mypy]
69
+ python_version = "3.10"
70
+ warn_return_any = true
71
+ warn_unused_configs = true
72
+ ignore_missing_imports = true
73
+
74
+ [tool.pytest.ini_options]
75
+ testpaths = ["tests"]
76
+ pythonpath = ["src"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1 @@
1
+ __version__ = "1.0.0"
File without changes
@@ -0,0 +1,127 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from pathlib import Path
5
+
6
+ from coderay.chunking.registry import LanguageConfig, get_language_for_file
7
+ from coderay.core.models import Chunk
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ def _get_symbol_name(node, source_bytes: bytes) -> str:
13
+ """Extract symbol name from a definition node."""
14
+ if node.type == "decorated_definition":
15
+ for child in node.children:
16
+ if child.type != "decorator":
17
+ return _get_symbol_name(child, source_bytes)
18
+ return ""
19
+
20
+ for child in node.children:
21
+ if child.type == "identifier":
22
+ return source_bytes[child.start_byte : child.end_byte].decode(
23
+ "utf-8", errors="replace"
24
+ )
25
+ if child.type in ("class", "def", "func", "function", "type"):
26
+ for sibling in node.children:
27
+ if sibling.type == "identifier":
28
+ return source_bytes[sibling.start_byte : sibling.end_byte].decode(
29
+ "utf-8", errors="replace"
30
+ )
31
+ if node.type in ("property_identifier", "field_identifier"):
32
+ return source_bytes[node.start_byte : node.end_byte].decode(
33
+ "utf-8", errors="replace"
34
+ )
35
+ return ""
36
+
37
+
38
+ def _collect_preamble_lines(
39
+ root, source_bytes: bytes, chunk_types: tuple[str, ...]
40
+ ) -> list[str]:
41
+ """Collect top-level lines that are NOT part of any chunk_type definition."""
42
+ lines: list[str] = []
43
+ for child in root.children:
44
+ if child.type in chunk_types:
45
+ continue
46
+ text = (
47
+ source_bytes[child.start_byte : child.end_byte]
48
+ .decode("utf-8", errors="replace")
49
+ .strip()
50
+ )
51
+ if text:
52
+ lines.append(text)
53
+ return lines
54
+
55
+
56
+ def _chunk_file_with_config(
57
+ path: str,
58
+ content: str,
59
+ lang_cfg: LanguageConfig,
60
+ ) -> list[Chunk]:
61
+ """Chunk a file using the provided language configuration."""
62
+ try:
63
+ parser = lang_cfg.get_parser()
64
+ except Exception as e:
65
+ logger.warning("Could not load parser for %s (%s): %s", path, lang_cfg.name, e)
66
+ return []
67
+
68
+ source_bytes = content.encode("utf-8")
69
+ tree = parser.parse(source_bytes)
70
+ root = tree.root_node
71
+ chunks: list[Chunk] = []
72
+
73
+ def dfs(node) -> None:
74
+ if node.type in lang_cfg.chunk_types:
75
+ # [py] Avoid duplicates on decorated functions.
76
+ # [py] Decorators are stored with symbol of the function that is decorating
77
+ # [py] But the content field of the decorated function will capture them
78
+ if node.parent and node.parent.type in lang_cfg.chunk_types:
79
+ for child in node.children:
80
+ dfs(child)
81
+ return
82
+ start_line = node.start_point[0] + 1
83
+ end_line = node.end_point[0] + 1
84
+ text = source_bytes[node.start_byte : node.end_byte].decode(
85
+ "utf-8", errors="replace"
86
+ )
87
+ symbol = _get_symbol_name(node, source_bytes) or f"<{node.type}>"
88
+ chunks.append(
89
+ Chunk(
90
+ path=path,
91
+ start_line=start_line,
92
+ end_line=end_line,
93
+ symbol=symbol,
94
+ language=lang_cfg.name,
95
+ content=text,
96
+ )
97
+ )
98
+ for child in node.children:
99
+ dfs(child)
100
+
101
+ if preamble_lines := _collect_preamble_lines(
102
+ root, source_bytes, lang_cfg.chunk_types
103
+ ):
104
+ chunks.append(
105
+ Chunk(
106
+ path=path,
107
+ start_line=1,
108
+ end_line=root.end_point[0] + 1,
109
+ symbol="<module>",
110
+ language=lang_cfg.name,
111
+ content="\n".join(preamble_lines),
112
+ ),
113
+ )
114
+
115
+ dfs(root)
116
+
117
+ logger.debug("Chunked %s: %d chunks", path, len(chunks))
118
+ return chunks
119
+
120
+
121
+ def chunk_file(path: str | Path, content: str, language: str = "python") -> list[Chunk]:
122
+ """Chunk a source file into semantic units (functions, classes, preamble)."""
123
+ path_str = str(path) if isinstance(path, Path) else path
124
+ if not (lang_cfg := get_language_for_file(path_str)):
125
+ logger.warning("No language config for %s ", path_str)
126
+ return []
127
+ return _chunk_file_with_config(path_str, content, lang_cfg)
@@ -0,0 +1,190 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from collections.abc import Callable
5
+ from dataclasses import dataclass
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ from tree_sitter import Language, Parser
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ @dataclass
15
+ class LanguageConfig:
16
+ """Configuration for a single language's tree-sitter grammar."""
17
+
18
+ name: str
19
+ extensions: tuple[str, ...]
20
+ language_fn: Callable[[], Any]
21
+ chunk_types: tuple[str, ...]
22
+ scope_types: tuple[str, ...] = ("function_definition", "class_definition")
23
+ import_types: tuple[str, ...] = ("import_statement", "import_from_statement")
24
+ call_types: tuple[str, ...] = ("call", "call_expression")
25
+ function_scope_types: tuple[str, ...] = ("function_definition",)
26
+ class_scope_types: tuple[str, ...] = ("class_definition",)
27
+ init_filenames: tuple[str, ...] = ()
28
+
29
+ def get_parser(self) -> Parser:
30
+ """Create and return a tree-sitter Parser for this language."""
31
+ lang = Language(self.language_fn())
32
+ parser = Parser(lang)
33
+ return parser
34
+
35
+
36
+ def _python_language():
37
+ import tree_sitter_python as tspython
38
+
39
+ return tspython.language()
40
+
41
+
42
+ def _javascript_language():
43
+ import tree_sitter_javascript as tsjs
44
+
45
+ return tsjs.language()
46
+
47
+
48
+ def _typescript_language():
49
+ import tree_sitter_typescript as tsts
50
+
51
+ return tsts.language()
52
+
53
+
54
+ def _go_language():
55
+ import tree_sitter_go as tsgo
56
+
57
+ return tsgo.language()
58
+
59
+
60
+ PYTHON_CONFIG = LanguageConfig(
61
+ name="python",
62
+ extensions=(".py", ".pyi"),
63
+ language_fn=_python_language,
64
+ chunk_types=(
65
+ "function_definition",
66
+ "class_definition",
67
+ "decorated_definition",
68
+ ),
69
+ scope_types=("function_definition", "class_definition"),
70
+ import_types=("import_statement", "import_from_statement"),
71
+ call_types=("call",),
72
+ function_scope_types=("function_definition",),
73
+ class_scope_types=("class_definition",),
74
+ init_filenames=("__init__",),
75
+ )
76
+
77
+ JAVASCRIPT_CONFIG = LanguageConfig(
78
+ name="javascript",
79
+ extensions=(".js", ".jsx", ".mjs", ".cjs"),
80
+ language_fn=_javascript_language,
81
+ chunk_types=(
82
+ "function_declaration",
83
+ "class_declaration",
84
+ "method_definition",
85
+ "arrow_function",
86
+ "export_statement",
87
+ "lexical_declaration",
88
+ ),
89
+ scope_types=("function_declaration", "class_declaration", "method_definition"),
90
+ import_types=("import_statement",),
91
+ call_types=("call_expression",),
92
+ function_scope_types=("function_declaration", "method_definition"),
93
+ class_scope_types=("class_declaration",),
94
+ init_filenames=("index",),
95
+ )
96
+
97
+ TYPESCRIPT_CONFIG = LanguageConfig(
98
+ name="typescript",
99
+ extensions=(".ts", ".tsx"),
100
+ language_fn=_typescript_language,
101
+ chunk_types=(
102
+ "function_declaration",
103
+ "class_declaration",
104
+ "method_definition",
105
+ "arrow_function",
106
+ "export_statement",
107
+ "lexical_declaration",
108
+ "interface_declaration",
109
+ "type_alias_declaration",
110
+ ),
111
+ scope_types=(
112
+ "function_declaration",
113
+ "class_declaration",
114
+ "method_definition",
115
+ "interface_declaration",
116
+ ),
117
+ import_types=("import_statement",),
118
+ call_types=("call_expression",),
119
+ function_scope_types=("function_declaration", "method_definition"),
120
+ class_scope_types=("class_declaration", "interface_declaration"),
121
+ init_filenames=("index",),
122
+ )
123
+
124
+ GO_CONFIG = LanguageConfig(
125
+ name="go",
126
+ extensions=(".go",),
127
+ language_fn=_go_language,
128
+ chunk_types=(
129
+ "function_declaration",
130
+ "method_declaration",
131
+ "type_declaration",
132
+ ),
133
+ scope_types=("function_declaration", "method_declaration"),
134
+ import_types=("import_declaration",),
135
+ call_types=("call_expression",),
136
+ function_scope_types=("function_declaration", "method_declaration"),
137
+ class_scope_types=(),
138
+ init_filenames=(),
139
+ )
140
+
141
+ LANGUAGE_REGISTRY: dict[str, LanguageConfig] = {
142
+ "python": PYTHON_CONFIG,
143
+ "javascript": JAVASCRIPT_CONFIG,
144
+ "typescript": TYPESCRIPT_CONFIG,
145
+ "go": GO_CONFIG,
146
+ }
147
+
148
+ _EXTENSION_MAP: dict[str, str] = {}
149
+ for _lang_name, _cfg in LANGUAGE_REGISTRY.items():
150
+ for _ext in _cfg.extensions:
151
+ _EXTENSION_MAP[_ext] = _lang_name
152
+
153
+
154
+ def get_language_for_file(path: str | Path) -> LanguageConfig | None:
155
+ """Return the LanguageConfig for a file based on its extension, or None."""
156
+ ext = Path(path).suffix.lower()
157
+ lang_name = _EXTENSION_MAP.get(ext)
158
+ if lang_name is None:
159
+ return None
160
+ return LANGUAGE_REGISTRY.get(lang_name)
161
+
162
+
163
+ def get_supported_extensions() -> set[str]:
164
+ """Return all file extensions we can index."""
165
+ return set(_EXTENSION_MAP.keys())
166
+
167
+
168
+ def get_init_filenames() -> set[str]:
169
+ """Return all init-style filenames across languages (e.g. __init__, index)."""
170
+ names: set[str] = set()
171
+ for cfg in LANGUAGE_REGISTRY.values():
172
+ names.update(cfg.init_filenames)
173
+ return names
174
+
175
+
176
+ def get_resolution_suffixes() -> list[str]:
177
+ """Return file suffixes for resolving import targets."""
178
+ suffixes: list[str] = []
179
+ seen: set[str] = set()
180
+ for cfg in LANGUAGE_REGISTRY.values():
181
+ for ext in cfg.extensions:
182
+ if ext not in seen:
183
+ suffixes.append(ext)
184
+ seen.add(ext)
185
+ for init in cfg.init_filenames:
186
+ combo = f"/{init}{ext}"
187
+ if combo not in seen:
188
+ suffixes.append(combo)
189
+ seen.add(combo)
190
+ return suffixes
@@ -0,0 +1,3 @@
1
+ from coderay.cli.commands import cli, main
2
+
3
+ __all__ = ["cli", "main"]