codemap-bash 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
codemap_bash/__init__.py
ADDED
codemap_bash/indexer.py
ADDED
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
"""Bash indexer built on tree-sitter-bash.
|
|
2
|
+
|
|
3
|
+
Only script-level symbols are emitted: function definitions, top-level
|
|
4
|
+
variable assignments, and top-level ``readonly`` / ``declare`` /
|
|
5
|
+
``export`` / ``local`` declaration commands. Variables defined inside a
|
|
6
|
+
function body are intentionally not surfaced — they are private state,
|
|
7
|
+
not a stable interface.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from pathlib import Path, PurePosixPath
|
|
13
|
+
from typing import ClassVar
|
|
14
|
+
|
|
15
|
+
import tree_sitter
|
|
16
|
+
import tree_sitter_bash
|
|
17
|
+
|
|
18
|
+
from codemap.core.models import Diagnostic, Edge, IndexResult, Range, Symbol
|
|
19
|
+
from codemap.core.symbol import Descriptor, DescriptorKind, SymbolID
|
|
20
|
+
from codemap.indexers.base import IndexContext
|
|
21
|
+
|
|
22
|
+
SCHEME = "scip-bash"
|
|
23
|
+
LANG = "bash"
|
|
24
|
+
|
|
25
|
+
_BASH_LANG = tree_sitter.Language(tree_sitter_bash.language())
|
|
26
|
+
_DECLARATION_KEYWORDS = frozenset({"readonly", "declare", "export", "local", "typeset"})
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class BashIndexer:
|
|
30
|
+
name: ClassVar[str] = "bash"
|
|
31
|
+
version: ClassVar[str] = "0.1.0"
|
|
32
|
+
file_patterns: ClassVar[list[str]] = ["*.sh", "*.bash", "*.bats"]
|
|
33
|
+
languages: ClassVar[list[str]] = [LANG]
|
|
34
|
+
|
|
35
|
+
def supports(self, path: Path) -> bool:
|
|
36
|
+
if path.suffix in {".sh", ".bash", ".bats"}:
|
|
37
|
+
return True
|
|
38
|
+
# Extensionless files with a bash shebang are also accepted.
|
|
39
|
+
if path.suffix == "" and path.is_file():
|
|
40
|
+
try:
|
|
41
|
+
with path.open("rb") as f:
|
|
42
|
+
head = f.read(64)
|
|
43
|
+
return _looks_like_bash_shebang(head)
|
|
44
|
+
except OSError:
|
|
45
|
+
return False
|
|
46
|
+
return False
|
|
47
|
+
|
|
48
|
+
def index_file(
|
|
49
|
+
self,
|
|
50
|
+
path: Path,
|
|
51
|
+
source: bytes,
|
|
52
|
+
ctx: IndexContext,
|
|
53
|
+
) -> IndexResult:
|
|
54
|
+
try:
|
|
55
|
+
source.decode("utf-8")
|
|
56
|
+
except UnicodeDecodeError as exc:
|
|
57
|
+
return IndexResult(
|
|
58
|
+
diagnostics=[
|
|
59
|
+
Diagnostic(
|
|
60
|
+
severity="error",
|
|
61
|
+
file=ctx.relative_path,
|
|
62
|
+
code="SH002",
|
|
63
|
+
message=f"not valid UTF-8: {exc}",
|
|
64
|
+
producer=self.name,
|
|
65
|
+
)
|
|
66
|
+
]
|
|
67
|
+
)
|
|
68
|
+
parser = tree_sitter.Parser(_BASH_LANG)
|
|
69
|
+
tree = parser.parse(source)
|
|
70
|
+
visitor = _Visitor(ctx.relative_path)
|
|
71
|
+
visitor.visit_root(tree.root_node)
|
|
72
|
+
diagnostics = list(visitor.diagnostics)
|
|
73
|
+
if tree.root_node.has_error:
|
|
74
|
+
diagnostics.append(
|
|
75
|
+
Diagnostic(
|
|
76
|
+
severity="warning",
|
|
77
|
+
file=ctx.relative_path,
|
|
78
|
+
range=Range(start_line=1, end_line=1),
|
|
79
|
+
code="SH001",
|
|
80
|
+
message="tree-sitter reported parse errors; symbols may be incomplete",
|
|
81
|
+
producer=self.name,
|
|
82
|
+
)
|
|
83
|
+
)
|
|
84
|
+
return IndexResult(
|
|
85
|
+
symbols=visitor.symbols,
|
|
86
|
+
edges=visitor.edges,
|
|
87
|
+
diagnostics=diagnostics,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class _Visitor:
|
|
92
|
+
def __init__(self, relative_path: PurePosixPath) -> None:
|
|
93
|
+
self.relative_path = relative_path
|
|
94
|
+
self.symbols: list[Symbol] = []
|
|
95
|
+
self.edges: list[Edge] = []
|
|
96
|
+
self.diagnostics: list[Diagnostic] = []
|
|
97
|
+
|
|
98
|
+
def visit_root(self, root: tree_sitter.Node) -> None:
|
|
99
|
+
# We only walk the top-level statement list — function bodies are
|
|
100
|
+
# opaque from the symbol-table perspective.
|
|
101
|
+
for child in root.children:
|
|
102
|
+
self._visit_top_level(child)
|
|
103
|
+
|
|
104
|
+
def _visit_top_level(self, node: tree_sitter.Node) -> None:
|
|
105
|
+
kind = node.type
|
|
106
|
+
if kind == "function_definition":
|
|
107
|
+
self._emit_function(node)
|
|
108
|
+
return
|
|
109
|
+
if kind == "variable_assignment":
|
|
110
|
+
self._emit_variable(node, bash_kind=None)
|
|
111
|
+
return
|
|
112
|
+
if kind == "declaration_command":
|
|
113
|
+
self._emit_declaration(node)
|
|
114
|
+
|
|
115
|
+
def _emit_function(self, node: tree_sitter.Node) -> None:
|
|
116
|
+
name = _function_name(node)
|
|
117
|
+
if name is None:
|
|
118
|
+
return
|
|
119
|
+
sid = self._make_id(name, kind=DescriptorKind.METHOD)
|
|
120
|
+
self.symbols.append(
|
|
121
|
+
Symbol(
|
|
122
|
+
id=sid,
|
|
123
|
+
kind="function",
|
|
124
|
+
language=LANG,
|
|
125
|
+
file=self.relative_path,
|
|
126
|
+
range=_node_range(node),
|
|
127
|
+
signature=f"{name}()",
|
|
128
|
+
)
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
def _emit_variable(
|
|
132
|
+
self,
|
|
133
|
+
node: tree_sitter.Node,
|
|
134
|
+
*,
|
|
135
|
+
bash_kind: str | None,
|
|
136
|
+
) -> None:
|
|
137
|
+
name = _variable_name(node)
|
|
138
|
+
if name is None:
|
|
139
|
+
return
|
|
140
|
+
sid = self._make_id(name, kind=DescriptorKind.TERM)
|
|
141
|
+
extra: dict[str, str] = {}
|
|
142
|
+
if bash_kind is not None:
|
|
143
|
+
extra["bash_kind"] = bash_kind
|
|
144
|
+
self.symbols.append(
|
|
145
|
+
Symbol(
|
|
146
|
+
id=sid,
|
|
147
|
+
kind="variable",
|
|
148
|
+
language=LANG,
|
|
149
|
+
file=self.relative_path,
|
|
150
|
+
range=_node_range(node),
|
|
151
|
+
extra=extra,
|
|
152
|
+
)
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
def _emit_declaration(self, node: tree_sitter.Node) -> None:
|
|
156
|
+
keyword: str | None = None
|
|
157
|
+
for child in node.children:
|
|
158
|
+
if child.type in _DECLARATION_KEYWORDS:
|
|
159
|
+
keyword = child.type
|
|
160
|
+
break
|
|
161
|
+
for child in node.children:
|
|
162
|
+
if child.type == "variable_assignment":
|
|
163
|
+
self._emit_variable(child, bash_kind=keyword)
|
|
164
|
+
|
|
165
|
+
def _make_id(self, name: str, *, kind: DescriptorKind) -> SymbolID:
|
|
166
|
+
descriptors = list(_path_namespaces(self.relative_path))
|
|
167
|
+
descriptors.append(Descriptor(name=name, kind=kind))
|
|
168
|
+
return SymbolID(scheme=SCHEME, descriptors=tuple(descriptors))
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
# ---------------------------------------------------------------------------
|
|
172
|
+
# Pure helpers
|
|
173
|
+
# ---------------------------------------------------------------------------
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def _path_namespaces(path: PurePosixPath) -> list[Descriptor]:
|
|
177
|
+
return [Descriptor(name=part, kind=DescriptorKind.NAMESPACE) for part in path.parts]
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _node_range(node: tree_sitter.Node) -> Range:
|
|
181
|
+
sr, sc = node.start_point
|
|
182
|
+
er, ec = node.end_point
|
|
183
|
+
return Range(
|
|
184
|
+
start_line=sr + 1,
|
|
185
|
+
start_col=sc,
|
|
186
|
+
end_line=max(er + 1, sr + 1),
|
|
187
|
+
end_col=ec,
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _node_text(node: tree_sitter.Node) -> str:
|
|
192
|
+
return node.text.decode("utf-8") if node.text is not None else ""
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def _function_name(node: tree_sitter.Node) -> str | None:
|
|
196
|
+
"""``function greet() { ... }`` and ``helper() { ... }`` both expose the
|
|
197
|
+
function name as a ``word`` child (sitting either after the optional
|
|
198
|
+
``function`` keyword or as the first content child).
|
|
199
|
+
"""
|
|
200
|
+
for child in node.children:
|
|
201
|
+
if child.type == "word":
|
|
202
|
+
return _node_text(child)
|
|
203
|
+
return None
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def _variable_name(node: tree_sitter.Node) -> str | None:
|
|
207
|
+
"""For ``variable_assignment``: the ``variable_name`` child."""
|
|
208
|
+
for child in node.children:
|
|
209
|
+
if child.type == "variable_name":
|
|
210
|
+
return _node_text(child)
|
|
211
|
+
return None
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
_BASH_SHEBANGS = (b"#!/bin/bash", b"#!/usr/bin/env bash", b"#!/bin/sh", b"#!/usr/bin/env sh")
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def _looks_like_bash_shebang(head: bytes) -> bool:
|
|
218
|
+
return any(head.startswith(sb) for sb in _BASH_SHEBANGS)
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: codemap-bash
|
|
3
|
+
Version: 0.1.0a1
|
|
4
|
+
Summary: Bash / shell-script indexer plugin for CodeMap
|
|
5
|
+
Project-URL: Homepage, https://github.com/qxbyte/codemap
|
|
6
|
+
Author: CodeMap Contributors
|
|
7
|
+
License: MIT
|
|
8
|
+
Keywords: bash,codemap,indexer,shell,tree-sitter
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Unix Shell
|
|
12
|
+
Classifier: Topic :: Software Development
|
|
13
|
+
Classifier: Topic :: System :: Shells
|
|
14
|
+
Requires-Python: >=3.11
|
|
15
|
+
Requires-Dist: codemap-core<0.2,>=0.1.0a1
|
|
16
|
+
Requires-Dist: tree-sitter-bash>=0.25
|
|
17
|
+
Requires-Dist: tree-sitter>=0.25
|
|
18
|
+
Provides-Extra: dev
|
|
19
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
|
|
22
|
+
# codemap-bash
|
|
23
|
+
|
|
24
|
+
> A Bash / shell-script indexer for [CodeMap](https://github.com/qxbyte/codemap),
|
|
25
|
+
> shipped as an independent PyPI package.
|
|
26
|
+
|
|
27
|
+
## What it captures
|
|
28
|
+
|
|
29
|
+
Backed by `tree-sitter-bash`:
|
|
30
|
+
|
|
31
|
+
| AST node | Symbol kind |
|
|
32
|
+
|---|---|
|
|
33
|
+
| `function_definition` | `function` |
|
|
34
|
+
| Top-level `variable_assignment` | `variable` |
|
|
35
|
+
| `declaration_command` (`readonly`/`declare`/`export`/`local`) at top level | `variable` (with `extra.bash_kind=<keyword>`) |
|
|
36
|
+
|
|
37
|
+
Function bodies are **not** walked for inner assignments — anything
|
|
38
|
+
declared inside a function is local state, not a script-level symbol.
|
|
39
|
+
|
|
40
|
+
## Install
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
pip install "git+https://github.com/qxbyte/codemap.git#subdirectory=plugins/codemap-bash"
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## SymbolID encoding
|
|
47
|
+
|
|
48
|
+
```
|
|
49
|
+
scip-bash . . . scripts/deploy.sh/greet().
|
|
50
|
+
scip-bash . . . scripts/deploy.sh/MAX_RETRIES.
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## File patterns
|
|
54
|
+
|
|
55
|
+
* `*.sh`, `*.bash`, `*.bats`
|
|
56
|
+
* Files without an extension whose shebang starts with `#!/bin/bash` or
|
|
57
|
+
`#!/usr/bin/env bash` are matched via `supports()`.
|
|
58
|
+
|
|
59
|
+
## Limits
|
|
60
|
+
|
|
61
|
+
* `source` / `.` includes are not turned into edges.
|
|
62
|
+
* Aliases (`alias ll='ls -la'`) are not captured.
|
|
63
|
+
* `getopts` argument schemas aren't structured.
|
|
64
|
+
* POSIX sh / Zsh / Fish dialects parse, but constructs unique to them
|
|
65
|
+
may degrade to syntax-error diagnostics.
|
|
66
|
+
|
|
67
|
+
## License
|
|
68
|
+
|
|
69
|
+
MIT.
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
codemap_bash/__init__.py,sha256=3MDMvijfGWnI9p-3bUSo0K1yC9XKIq2jQ6i8-4f4pb8,170
|
|
2
|
+
codemap_bash/indexer.py,sha256=3YKGa-whdjch9w_tcTmTJzYyYSsmOOKzxBlU_Utui2I,7210
|
|
3
|
+
codemap_bash-0.1.0a1.dist-info/METADATA,sha256=I2NxBkRRsnhJATGXFim8TtdFNBNsc-A-8cVuj_yGMXU,2012
|
|
4
|
+
codemap_bash-0.1.0a1.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
5
|
+
codemap_bash-0.1.0a1.dist-info/entry_points.txt,sha256=4IaYVUQuXXzZGhJzQzLBoxbtJWladj64IqYpnAQpY9E,51
|
|
6
|
+
codemap_bash-0.1.0a1.dist-info/RECORD,,
|