mcp-vector-search 0.7.6__py3-none-any.whl → 0.8.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcp-vector-search might be problematic. Click here for more details.
- mcp_vector_search/__init__.py +2 -2
- mcp_vector_search/cli/commands/index.py +5 -0
- mcp_vector_search/cli/commands/visualize.py +529 -0
- mcp_vector_search/cli/main.py +16 -11
- mcp_vector_search/core/indexer.py +84 -3
- mcp_vector_search/core/models.py +45 -1
- mcp_vector_search/parsers/base.py +83 -0
- mcp_vector_search/parsers/javascript.py +350 -2
- mcp_vector_search/parsers/python.py +79 -0
- {mcp_vector_search-0.7.6.dist-info → mcp_vector_search-0.8.2.dist-info}/METADATA +1 -1
- {mcp_vector_search-0.7.6.dist-info → mcp_vector_search-0.8.2.dist-info}/RECORD +14 -13
- {mcp_vector_search-0.7.6.dist-info → mcp_vector_search-0.8.2.dist-info}/WHEEL +0 -0
- {mcp_vector_search-0.7.6.dist-info → mcp_vector_search-0.8.2.dist-info}/entry_points.txt +0 -0
- {mcp_vector_search-0.7.6.dist-info → mcp_vector_search-0.8.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -295,8 +295,11 @@ class SemanticIndexer:
|
|
|
295
295
|
logger.debug(f"No chunks extracted from {file_path}")
|
|
296
296
|
return True # Not an error, just empty file
|
|
297
297
|
|
|
298
|
+
# Build hierarchical relationships between chunks
|
|
299
|
+
chunks_with_hierarchy = self._build_chunk_hierarchy(chunks)
|
|
300
|
+
|
|
298
301
|
# Add chunks to database
|
|
299
|
-
await self.database.add_chunks(
|
|
302
|
+
await self.database.add_chunks(chunks_with_hierarchy)
|
|
300
303
|
|
|
301
304
|
# Update metadata after successful indexing
|
|
302
305
|
metadata = self._load_index_metadata()
|
|
@@ -710,8 +713,11 @@ class SemanticIndexer:
|
|
|
710
713
|
chunks = await self._parse_file(file_path)
|
|
711
714
|
|
|
712
715
|
if chunks:
|
|
716
|
+
# Build hierarchical relationships
|
|
717
|
+
chunks_with_hierarchy = self._build_chunk_hierarchy(chunks)
|
|
718
|
+
|
|
713
719
|
# Add chunks to database
|
|
714
|
-
await self.database.add_chunks(
|
|
720
|
+
await self.database.add_chunks(chunks_with_hierarchy)
|
|
715
721
|
chunks_added = len(chunks)
|
|
716
722
|
logger.debug(f"Indexed {chunks_added} chunks from {file_path}")
|
|
717
723
|
|
|
@@ -721,11 +727,86 @@ class SemanticIndexer:
|
|
|
721
727
|
metadata[str(file_path)] = os.path.getmtime(file_path)
|
|
722
728
|
|
|
723
729
|
except Exception as e:
|
|
724
|
-
|
|
730
|
+
error_msg = f"Failed to index file {file_path}: {type(e).__name__}: {str(e)}"
|
|
731
|
+
logger.error(error_msg)
|
|
725
732
|
success = False
|
|
726
733
|
|
|
734
|
+
# Save error to error log file
|
|
735
|
+
try:
|
|
736
|
+
error_log_path = self.project_root / ".mcp-vector-search" / "indexing_errors.log"
|
|
737
|
+
with open(error_log_path, "a", encoding="utf-8") as f:
|
|
738
|
+
from datetime import datetime
|
|
739
|
+
timestamp = datetime.now().isoformat()
|
|
740
|
+
f.write(f"[{timestamp}] {error_msg}\n")
|
|
741
|
+
except Exception as log_err:
|
|
742
|
+
logger.debug(f"Failed to write error log: {log_err}")
|
|
743
|
+
|
|
727
744
|
# Yield progress update
|
|
728
745
|
yield (file_path, chunks_added, success)
|
|
729
746
|
|
|
730
747
|
# Save metadata at the end
|
|
731
748
|
self._save_index_metadata(metadata)
|
|
749
|
+
|
|
750
|
+
def _build_chunk_hierarchy(self, chunks: list[CodeChunk]) -> list[CodeChunk]:
|
|
751
|
+
"""Build parent-child relationships between chunks.
|
|
752
|
+
|
|
753
|
+
Logic:
|
|
754
|
+
- Module chunks (chunk_type="module") have depth 0
|
|
755
|
+
- Class chunks have depth 1, parent is module
|
|
756
|
+
- Method chunks have depth 2, parent is class
|
|
757
|
+
- Function chunks outside classes have depth 1, parent is module
|
|
758
|
+
- Nested classes increment depth
|
|
759
|
+
|
|
760
|
+
Args:
|
|
761
|
+
chunks: List of code chunks to process
|
|
762
|
+
|
|
763
|
+
Returns:
|
|
764
|
+
List of chunks with hierarchy relationships established
|
|
765
|
+
"""
|
|
766
|
+
if not chunks:
|
|
767
|
+
return chunks
|
|
768
|
+
|
|
769
|
+
# Group chunks by type and name
|
|
770
|
+
module_chunks = [c for c in chunks if c.chunk_type in ("module", "imports")]
|
|
771
|
+
class_chunks = [c for c in chunks if c.chunk_type in ("class", "interface", "mixin")]
|
|
772
|
+
function_chunks = [c for c in chunks if c.chunk_type in ("function", "method", "constructor")]
|
|
773
|
+
|
|
774
|
+
# Build relationships
|
|
775
|
+
for func in function_chunks:
|
|
776
|
+
if func.class_name:
|
|
777
|
+
# Find parent class
|
|
778
|
+
parent_class = next(
|
|
779
|
+
(c for c in class_chunks if c.class_name == func.class_name),
|
|
780
|
+
None
|
|
781
|
+
)
|
|
782
|
+
if parent_class:
|
|
783
|
+
func.parent_chunk_id = parent_class.chunk_id
|
|
784
|
+
func.chunk_depth = parent_class.chunk_depth + 1
|
|
785
|
+
if func.chunk_id not in parent_class.child_chunk_ids:
|
|
786
|
+
parent_class.child_chunk_ids.append(func.chunk_id)
|
|
787
|
+
else:
|
|
788
|
+
# Top-level function
|
|
789
|
+
if not func.chunk_depth:
|
|
790
|
+
func.chunk_depth = 1
|
|
791
|
+
# Link to module if exists
|
|
792
|
+
if module_chunks and not func.parent_chunk_id:
|
|
793
|
+
func.parent_chunk_id = module_chunks[0].chunk_id
|
|
794
|
+
if func.chunk_id not in module_chunks[0].child_chunk_ids:
|
|
795
|
+
module_chunks[0].child_chunk_ids.append(func.chunk_id)
|
|
796
|
+
|
|
797
|
+
for cls in class_chunks:
|
|
798
|
+
# Classes without parent are top-level (depth 1)
|
|
799
|
+
if not cls.chunk_depth:
|
|
800
|
+
cls.chunk_depth = 1
|
|
801
|
+
# Link to module if exists
|
|
802
|
+
if module_chunks and not cls.parent_chunk_id:
|
|
803
|
+
cls.parent_chunk_id = module_chunks[0].chunk_id
|
|
804
|
+
if cls.chunk_id not in module_chunks[0].child_chunk_ids:
|
|
805
|
+
module_chunks[0].child_chunk_ids.append(cls.chunk_id)
|
|
806
|
+
|
|
807
|
+
# Module chunks stay at depth 0
|
|
808
|
+
for mod in module_chunks:
|
|
809
|
+
if not mod.chunk_depth:
|
|
810
|
+
mod.chunk_depth = 0
|
|
811
|
+
|
|
812
|
+
return chunks
|
mcp_vector_search/core/models.py
CHANGED
|
@@ -21,12 +21,40 @@ class CodeChunk:
|
|
|
21
21
|
class_name: str | None = None
|
|
22
22
|
docstring: str | None = None
|
|
23
23
|
imports: list[str] = None
|
|
24
|
+
|
|
25
|
+
# Enhancement 1: Complexity scoring
|
|
24
26
|
complexity_score: float = 0.0
|
|
25
27
|
|
|
28
|
+
# Enhancement 3: Hierarchical relationships
|
|
29
|
+
chunk_id: str | None = None
|
|
30
|
+
parent_chunk_id: str | None = None
|
|
31
|
+
child_chunk_ids: list[str] = None
|
|
32
|
+
chunk_depth: int = 0
|
|
33
|
+
|
|
34
|
+
# Enhancement 4: Enhanced metadata
|
|
35
|
+
decorators: list[str] = None
|
|
36
|
+
parameters: list[dict] = None
|
|
37
|
+
return_type: str | None = None
|
|
38
|
+
type_annotations: dict[str, str] = None
|
|
39
|
+
|
|
26
40
|
def __post_init__(self) -> None:
|
|
27
|
-
"""Initialize default values."""
|
|
41
|
+
"""Initialize default values and generate chunk ID."""
|
|
28
42
|
if self.imports is None:
|
|
29
43
|
self.imports = []
|
|
44
|
+
if self.child_chunk_ids is None:
|
|
45
|
+
self.child_chunk_ids = []
|
|
46
|
+
if self.decorators is None:
|
|
47
|
+
self.decorators = []
|
|
48
|
+
if self.parameters is None:
|
|
49
|
+
self.parameters = []
|
|
50
|
+
if self.type_annotations is None:
|
|
51
|
+
self.type_annotations = {}
|
|
52
|
+
|
|
53
|
+
# Generate chunk ID if not provided
|
|
54
|
+
if self.chunk_id is None:
|
|
55
|
+
import hashlib
|
|
56
|
+
id_string = f"{self.file_path}:{self.chunk_type}:{self.start_line}:{self.end_line}"
|
|
57
|
+
self.chunk_id = hashlib.sha256(id_string.encode()).hexdigest()[:16]
|
|
30
58
|
|
|
31
59
|
@property
|
|
32
60
|
def id(self) -> str:
|
|
@@ -52,6 +80,14 @@ class CodeChunk:
|
|
|
52
80
|
"docstring": self.docstring,
|
|
53
81
|
"imports": self.imports,
|
|
54
82
|
"complexity_score": self.complexity_score,
|
|
83
|
+
"chunk_id": self.chunk_id,
|
|
84
|
+
"parent_chunk_id": self.parent_chunk_id,
|
|
85
|
+
"child_chunk_ids": self.child_chunk_ids,
|
|
86
|
+
"chunk_depth": self.chunk_depth,
|
|
87
|
+
"decorators": self.decorators,
|
|
88
|
+
"parameters": self.parameters,
|
|
89
|
+
"return_type": self.return_type,
|
|
90
|
+
"type_annotations": self.type_annotations,
|
|
55
91
|
}
|
|
56
92
|
|
|
57
93
|
@classmethod
|
|
@@ -69,6 +105,14 @@ class CodeChunk:
|
|
|
69
105
|
docstring=data.get("docstring"),
|
|
70
106
|
imports=data.get("imports", []),
|
|
71
107
|
complexity_score=data.get("complexity_score", 0.0),
|
|
108
|
+
chunk_id=data.get("chunk_id"),
|
|
109
|
+
parent_chunk_id=data.get("parent_chunk_id"),
|
|
110
|
+
child_chunk_ids=data.get("child_chunk_ids", []),
|
|
111
|
+
chunk_depth=data.get("chunk_depth", 0),
|
|
112
|
+
decorators=data.get("decorators", []),
|
|
113
|
+
parameters=data.get("parameters", []),
|
|
114
|
+
return_type=data.get("return_type"),
|
|
115
|
+
type_annotations=data.get("type_annotations", {}),
|
|
72
116
|
)
|
|
73
117
|
|
|
74
118
|
|
|
@@ -64,6 +64,68 @@ class BaseParser(ABC):
|
|
|
64
64
|
"""
|
|
65
65
|
...
|
|
66
66
|
|
|
67
|
+
def _calculate_complexity(self, node, language: str | None = None) -> float:
|
|
68
|
+
"""Calculate cyclomatic complexity from AST node.
|
|
69
|
+
|
|
70
|
+
Cyclomatic complexity = Number of decision points + 1
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
node: AST node (tree-sitter)
|
|
74
|
+
language: Programming language for language-specific patterns (defaults to self.language)
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
Complexity score (1.0 = simple, 10+ = complex)
|
|
78
|
+
"""
|
|
79
|
+
if language is None:
|
|
80
|
+
language = self.language
|
|
81
|
+
|
|
82
|
+
if not hasattr(node, 'children'):
|
|
83
|
+
return 1.0
|
|
84
|
+
|
|
85
|
+
complexity = 1.0 # Base complexity
|
|
86
|
+
|
|
87
|
+
# Language-specific decision node types
|
|
88
|
+
decision_nodes = {
|
|
89
|
+
"python": {
|
|
90
|
+
"if_statement", "elif_clause", "while_statement", "for_statement",
|
|
91
|
+
"except_clause", "with_statement", "conditional_expression",
|
|
92
|
+
"boolean_operator" # and, or
|
|
93
|
+
},
|
|
94
|
+
"javascript": {
|
|
95
|
+
"if_statement", "while_statement", "for_statement", "for_in_statement",
|
|
96
|
+
"switch_case", "catch_clause", "conditional_expression", "ternary_expression"
|
|
97
|
+
},
|
|
98
|
+
"typescript": {
|
|
99
|
+
"if_statement", "while_statement", "for_statement", "for_in_statement",
|
|
100
|
+
"switch_case", "catch_clause", "conditional_expression", "ternary_expression"
|
|
101
|
+
},
|
|
102
|
+
"dart": {
|
|
103
|
+
"if_statement", "while_statement", "for_statement", "for_in_statement",
|
|
104
|
+
"switch_case", "catch_clause", "conditional_expression"
|
|
105
|
+
},
|
|
106
|
+
"php": {
|
|
107
|
+
"if_statement", "elseif_clause", "while_statement", "foreach_statement",
|
|
108
|
+
"for_statement", "switch_case", "catch_clause", "ternary_expression"
|
|
109
|
+
},
|
|
110
|
+
"ruby": {
|
|
111
|
+
"if", "unless", "while", "until", "for", "case", "rescue",
|
|
112
|
+
"conditional"
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
nodes_to_count = decision_nodes.get(language, decision_nodes.get("python", set()))
|
|
117
|
+
|
|
118
|
+
def count_decision_points(n):
|
|
119
|
+
nonlocal complexity
|
|
120
|
+
if hasattr(n, 'type') and n.type in nodes_to_count:
|
|
121
|
+
complexity += 1
|
|
122
|
+
if hasattr(n, 'children'):
|
|
123
|
+
for child in n.children:
|
|
124
|
+
count_decision_points(child)
|
|
125
|
+
|
|
126
|
+
count_decision_points(node)
|
|
127
|
+
return complexity
|
|
128
|
+
|
|
67
129
|
def _create_chunk(
|
|
68
130
|
self,
|
|
69
131
|
content: str,
|
|
@@ -74,6 +136,13 @@ class BaseParser(ABC):
|
|
|
74
136
|
function_name: str | None = None,
|
|
75
137
|
class_name: str | None = None,
|
|
76
138
|
docstring: str | None = None,
|
|
139
|
+
complexity_score: float = 0.0,
|
|
140
|
+
decorators: list[str] | None = None,
|
|
141
|
+
parameters: list[dict] | None = None,
|
|
142
|
+
return_type: str | None = None,
|
|
143
|
+
chunk_id: str | None = None,
|
|
144
|
+
parent_chunk_id: str | None = None,
|
|
145
|
+
chunk_depth: int = 0,
|
|
77
146
|
) -> CodeChunk:
|
|
78
147
|
"""Create a code chunk with metadata.
|
|
79
148
|
|
|
@@ -86,6 +155,13 @@ class BaseParser(ABC):
|
|
|
86
155
|
function_name: Function name if applicable
|
|
87
156
|
class_name: Class name if applicable
|
|
88
157
|
docstring: Docstring if applicable
|
|
158
|
+
complexity_score: Cyclomatic complexity score
|
|
159
|
+
decorators: List of decorators/annotations
|
|
160
|
+
parameters: List of function parameters with metadata
|
|
161
|
+
return_type: Return type annotation
|
|
162
|
+
chunk_id: Unique chunk identifier
|
|
163
|
+
parent_chunk_id: Parent chunk ID for hierarchical relationships
|
|
164
|
+
chunk_depth: Nesting level in code hierarchy
|
|
89
165
|
|
|
90
166
|
Returns:
|
|
91
167
|
CodeChunk instance
|
|
@@ -100,6 +176,13 @@ class BaseParser(ABC):
|
|
|
100
176
|
function_name=function_name,
|
|
101
177
|
class_name=class_name,
|
|
102
178
|
docstring=docstring,
|
|
179
|
+
complexity_score=complexity_score,
|
|
180
|
+
decorators=decorators or [],
|
|
181
|
+
parameters=parameters or [],
|
|
182
|
+
return_type=return_type,
|
|
183
|
+
chunk_id=chunk_id,
|
|
184
|
+
parent_chunk_id=parent_chunk_id,
|
|
185
|
+
chunk_depth=chunk_depth,
|
|
103
186
|
)
|
|
104
187
|
|
|
105
188
|
def _split_into_lines(self, content: str) -> list[str]:
|
|
@@ -10,11 +10,32 @@ from .base import BaseParser
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class JavaScriptParser(BaseParser):
|
|
13
|
-
"""JavaScript
|
|
13
|
+
"""JavaScript parser with tree-sitter AST support and fallback regex parsing."""
|
|
14
14
|
|
|
15
15
|
def __init__(self, language: str = "javascript") -> None:
|
|
16
16
|
"""Initialize JavaScript parser."""
|
|
17
17
|
super().__init__(language)
|
|
18
|
+
self._parser = None
|
|
19
|
+
self._language = None
|
|
20
|
+
self._use_tree_sitter = False
|
|
21
|
+
self._initialize_parser()
|
|
22
|
+
|
|
23
|
+
def _initialize_parser(self) -> None:
|
|
24
|
+
"""Initialize Tree-sitter parser for JavaScript."""
|
|
25
|
+
try:
|
|
26
|
+
from tree_sitter_language_pack import get_language, get_parser
|
|
27
|
+
|
|
28
|
+
self._language = get_language("javascript")
|
|
29
|
+
self._parser = get_parser("javascript")
|
|
30
|
+
|
|
31
|
+
logger.debug(
|
|
32
|
+
"JavaScript Tree-sitter parser initialized via tree-sitter-language-pack"
|
|
33
|
+
)
|
|
34
|
+
self._use_tree_sitter = True
|
|
35
|
+
return
|
|
36
|
+
except Exception as e:
|
|
37
|
+
logger.debug(f"tree-sitter-language-pack failed: {e}, using regex fallback")
|
|
38
|
+
self._use_tree_sitter = False
|
|
18
39
|
|
|
19
40
|
async def parse_file(self, file_path: Path) -> list[CodeChunk]:
|
|
20
41
|
"""Parse a JavaScript/TypeScript file and extract code chunks."""
|
|
@@ -31,7 +52,317 @@ class JavaScriptParser(BaseParser):
|
|
|
31
52
|
if not content.strip():
|
|
32
53
|
return []
|
|
33
54
|
|
|
34
|
-
|
|
55
|
+
if self._use_tree_sitter:
|
|
56
|
+
try:
|
|
57
|
+
tree = self._parser.parse(content.encode('utf-8'))
|
|
58
|
+
return self._extract_chunks_from_tree(tree, content, file_path)
|
|
59
|
+
except Exception as e:
|
|
60
|
+
logger.warning(f"Tree-sitter parsing failed for {file_path}: {e}")
|
|
61
|
+
return await self._regex_parse(content, file_path)
|
|
62
|
+
else:
|
|
63
|
+
return await self._regex_parse(content, file_path)
|
|
64
|
+
|
|
65
|
+
def _extract_chunks_from_tree(
|
|
66
|
+
self, tree, content: str, file_path: Path
|
|
67
|
+
) -> list[CodeChunk]:
|
|
68
|
+
"""Extract code chunks from JavaScript AST."""
|
|
69
|
+
chunks = []
|
|
70
|
+
lines = self._split_into_lines(content)
|
|
71
|
+
|
|
72
|
+
def visit_node(node, current_class=None):
|
|
73
|
+
"""Recursively visit AST nodes."""
|
|
74
|
+
node_type = node.type
|
|
75
|
+
|
|
76
|
+
if node_type == "function_declaration":
|
|
77
|
+
chunks.extend(self._extract_function(node, lines, file_path, current_class))
|
|
78
|
+
elif node_type == "arrow_function":
|
|
79
|
+
chunks.extend(self._extract_arrow_function(node, lines, file_path, current_class))
|
|
80
|
+
elif node_type == "class_declaration":
|
|
81
|
+
class_chunks = self._extract_class(node, lines, file_path)
|
|
82
|
+
chunks.extend(class_chunks)
|
|
83
|
+
|
|
84
|
+
# Visit class methods
|
|
85
|
+
class_name = self._get_node_name(node)
|
|
86
|
+
for child in node.children:
|
|
87
|
+
visit_node(child, class_name)
|
|
88
|
+
elif node_type == "method_definition":
|
|
89
|
+
chunks.extend(self._extract_method(node, lines, file_path, current_class))
|
|
90
|
+
elif node_type == "lexical_declaration":
|
|
91
|
+
# const/let declarations might be arrow functions
|
|
92
|
+
chunks.extend(self._extract_variable_function(node, lines, file_path, current_class))
|
|
93
|
+
|
|
94
|
+
# Recurse into children
|
|
95
|
+
if hasattr(node, 'children'):
|
|
96
|
+
for child in node.children:
|
|
97
|
+
if child.type not in ("class_declaration", "function_declaration"):
|
|
98
|
+
visit_node(child, current_class)
|
|
99
|
+
|
|
100
|
+
visit_node(tree.root_node)
|
|
101
|
+
|
|
102
|
+
# If no specific chunks found, create a single chunk for the whole file
|
|
103
|
+
if not chunks:
|
|
104
|
+
chunks.append(
|
|
105
|
+
self._create_chunk(
|
|
106
|
+
content=content,
|
|
107
|
+
file_path=file_path,
|
|
108
|
+
start_line=1,
|
|
109
|
+
end_line=len(lines),
|
|
110
|
+
chunk_type="module",
|
|
111
|
+
)
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
return chunks
|
|
115
|
+
|
|
116
|
+
def _extract_function(
|
|
117
|
+
self, node, lines: list[str], file_path: Path, class_name: str | None = None
|
|
118
|
+
) -> list[CodeChunk]:
|
|
119
|
+
"""Extract function declaration from AST."""
|
|
120
|
+
function_name = self._get_node_name(node)
|
|
121
|
+
if not function_name:
|
|
122
|
+
return []
|
|
123
|
+
|
|
124
|
+
start_line = node.start_point[0] + 1
|
|
125
|
+
end_line = node.end_point[0] + 1
|
|
126
|
+
|
|
127
|
+
content = self._get_line_range(lines, start_line, end_line)
|
|
128
|
+
docstring = self._extract_jsdoc_from_node(node, lines)
|
|
129
|
+
|
|
130
|
+
# Calculate complexity
|
|
131
|
+
complexity = self._calculate_complexity(node, "javascript")
|
|
132
|
+
|
|
133
|
+
# Extract parameters
|
|
134
|
+
parameters = self._extract_js_parameters(node)
|
|
135
|
+
|
|
136
|
+
chunk = self._create_chunk(
|
|
137
|
+
content=content,
|
|
138
|
+
file_path=file_path,
|
|
139
|
+
start_line=start_line,
|
|
140
|
+
end_line=end_line,
|
|
141
|
+
chunk_type="function",
|
|
142
|
+
function_name=function_name,
|
|
143
|
+
class_name=class_name,
|
|
144
|
+
docstring=docstring,
|
|
145
|
+
complexity_score=complexity,
|
|
146
|
+
parameters=parameters,
|
|
147
|
+
chunk_depth=2 if class_name else 1,
|
|
148
|
+
)
|
|
149
|
+
return [chunk]
|
|
150
|
+
|
|
151
|
+
def _extract_arrow_function(
|
|
152
|
+
self, node, lines: list[str], file_path: Path, class_name: str | None = None
|
|
153
|
+
) -> list[CodeChunk]:
|
|
154
|
+
"""Extract arrow function from AST."""
|
|
155
|
+
# Arrow functions often don't have explicit names, try to get from parent
|
|
156
|
+
parent = getattr(node, 'parent', None)
|
|
157
|
+
function_name = None
|
|
158
|
+
|
|
159
|
+
if parent and parent.type == "variable_declarator":
|
|
160
|
+
function_name = self._get_node_name(parent)
|
|
161
|
+
|
|
162
|
+
if not function_name:
|
|
163
|
+
return []
|
|
164
|
+
|
|
165
|
+
start_line = node.start_point[0] + 1
|
|
166
|
+
end_line = node.end_point[0] + 1
|
|
167
|
+
|
|
168
|
+
content = self._get_line_range(lines, start_line, end_line)
|
|
169
|
+
docstring = self._extract_jsdoc_from_node(node, lines)
|
|
170
|
+
|
|
171
|
+
# Calculate complexity
|
|
172
|
+
complexity = self._calculate_complexity(node, "javascript")
|
|
173
|
+
|
|
174
|
+
# Extract parameters
|
|
175
|
+
parameters = self._extract_js_parameters(node)
|
|
176
|
+
|
|
177
|
+
chunk = self._create_chunk(
|
|
178
|
+
content=content,
|
|
179
|
+
file_path=file_path,
|
|
180
|
+
start_line=start_line,
|
|
181
|
+
end_line=end_line,
|
|
182
|
+
chunk_type="function",
|
|
183
|
+
function_name=function_name,
|
|
184
|
+
class_name=class_name,
|
|
185
|
+
docstring=docstring,
|
|
186
|
+
complexity_score=complexity,
|
|
187
|
+
parameters=parameters,
|
|
188
|
+
chunk_depth=2 if class_name else 1,
|
|
189
|
+
)
|
|
190
|
+
return [chunk]
|
|
191
|
+
|
|
192
|
+
def _extract_variable_function(
|
|
193
|
+
self, node, lines: list[str], file_path: Path, class_name: str | None = None
|
|
194
|
+
) -> list[CodeChunk]:
|
|
195
|
+
"""Extract function from variable declaration (const func = ...)."""
|
|
196
|
+
chunks = []
|
|
197
|
+
|
|
198
|
+
for child in node.children:
|
|
199
|
+
if child.type == "variable_declarator":
|
|
200
|
+
# Check if it's a function assignment
|
|
201
|
+
for subchild in child.children:
|
|
202
|
+
if subchild.type in ("arrow_function", "function"):
|
|
203
|
+
func_name = self._get_node_name(child)
|
|
204
|
+
if func_name:
|
|
205
|
+
start_line = child.start_point[0] + 1
|
|
206
|
+
end_line = child.end_point[0] + 1
|
|
207
|
+
|
|
208
|
+
content = self._get_line_range(lines, start_line, end_line)
|
|
209
|
+
docstring = self._extract_jsdoc_from_node(child, lines)
|
|
210
|
+
|
|
211
|
+
# Calculate complexity
|
|
212
|
+
complexity = self._calculate_complexity(subchild, "javascript")
|
|
213
|
+
|
|
214
|
+
# Extract parameters
|
|
215
|
+
parameters = self._extract_js_parameters(subchild)
|
|
216
|
+
|
|
217
|
+
chunk = self._create_chunk(
|
|
218
|
+
content=content,
|
|
219
|
+
file_path=file_path,
|
|
220
|
+
start_line=start_line,
|
|
221
|
+
end_line=end_line,
|
|
222
|
+
chunk_type="function",
|
|
223
|
+
function_name=func_name,
|
|
224
|
+
class_name=class_name,
|
|
225
|
+
docstring=docstring,
|
|
226
|
+
complexity_score=complexity,
|
|
227
|
+
parameters=parameters,
|
|
228
|
+
chunk_depth=2 if class_name else 1,
|
|
229
|
+
)
|
|
230
|
+
chunks.append(chunk)
|
|
231
|
+
|
|
232
|
+
return chunks
|
|
233
|
+
|
|
234
|
+
def _extract_class(
|
|
235
|
+
self, node, lines: list[str], file_path: Path
|
|
236
|
+
) -> list[CodeChunk]:
|
|
237
|
+
"""Extract class declaration from AST."""
|
|
238
|
+
class_name = self._get_node_name(node)
|
|
239
|
+
if not class_name:
|
|
240
|
+
return []
|
|
241
|
+
|
|
242
|
+
start_line = node.start_point[0] + 1
|
|
243
|
+
end_line = node.end_point[0] + 1
|
|
244
|
+
|
|
245
|
+
content = self._get_line_range(lines, start_line, end_line)
|
|
246
|
+
docstring = self._extract_jsdoc_from_node(node, lines)
|
|
247
|
+
|
|
248
|
+
# Calculate complexity
|
|
249
|
+
complexity = self._calculate_complexity(node, "javascript")
|
|
250
|
+
|
|
251
|
+
chunk = self._create_chunk(
|
|
252
|
+
content=content,
|
|
253
|
+
file_path=file_path,
|
|
254
|
+
start_line=start_line,
|
|
255
|
+
end_line=end_line,
|
|
256
|
+
chunk_type="class",
|
|
257
|
+
class_name=class_name,
|
|
258
|
+
docstring=docstring,
|
|
259
|
+
complexity_score=complexity,
|
|
260
|
+
chunk_depth=1,
|
|
261
|
+
)
|
|
262
|
+
return [chunk]
|
|
263
|
+
|
|
264
|
+
def _extract_method(
|
|
265
|
+
self, node, lines: list[str], file_path: Path, class_name: str | None = None
|
|
266
|
+
) -> list[CodeChunk]:
|
|
267
|
+
"""Extract method definition from class."""
|
|
268
|
+
method_name = self._get_node_name(node)
|
|
269
|
+
if not method_name:
|
|
270
|
+
return []
|
|
271
|
+
|
|
272
|
+
start_line = node.start_point[0] + 1
|
|
273
|
+
end_line = node.end_point[0] + 1
|
|
274
|
+
|
|
275
|
+
content = self._get_line_range(lines, start_line, end_line)
|
|
276
|
+
docstring = self._extract_jsdoc_from_node(node, lines)
|
|
277
|
+
|
|
278
|
+
# Calculate complexity
|
|
279
|
+
complexity = self._calculate_complexity(node, "javascript")
|
|
280
|
+
|
|
281
|
+
# Extract parameters
|
|
282
|
+
parameters = self._extract_js_parameters(node)
|
|
283
|
+
|
|
284
|
+
# Check for decorators (TypeScript)
|
|
285
|
+
decorators = self._extract_decorators_from_node(node)
|
|
286
|
+
|
|
287
|
+
chunk = self._create_chunk(
|
|
288
|
+
content=content,
|
|
289
|
+
file_path=file_path,
|
|
290
|
+
start_line=start_line,
|
|
291
|
+
end_line=end_line,
|
|
292
|
+
chunk_type="method",
|
|
293
|
+
function_name=method_name,
|
|
294
|
+
class_name=class_name,
|
|
295
|
+
docstring=docstring,
|
|
296
|
+
complexity_score=complexity,
|
|
297
|
+
parameters=parameters,
|
|
298
|
+
decorators=decorators,
|
|
299
|
+
chunk_depth=2,
|
|
300
|
+
)
|
|
301
|
+
return [chunk]
|
|
302
|
+
|
|
303
|
+
def _get_node_name(self, node) -> str | None:
|
|
304
|
+
"""Extract name from a named node."""
|
|
305
|
+
for child in node.children:
|
|
306
|
+
if child.type in ("identifier", "property_identifier"):
|
|
307
|
+
return child.text.decode("utf-8")
|
|
308
|
+
return None
|
|
309
|
+
|
|
310
|
+
def _get_node_text(self, node) -> str:
|
|
311
|
+
"""Get text content of a node."""
|
|
312
|
+
if hasattr(node, 'text'):
|
|
313
|
+
return node.text.decode('utf-8')
|
|
314
|
+
return ""
|
|
315
|
+
|
|
316
|
+
def _extract_js_parameters(self, node) -> list[dict]:
|
|
317
|
+
"""Extract function parameters from JavaScript/TypeScript AST."""
|
|
318
|
+
parameters = []
|
|
319
|
+
|
|
320
|
+
for child in node.children:
|
|
321
|
+
if child.type == "formal_parameters":
|
|
322
|
+
for param_node in child.children:
|
|
323
|
+
if param_node.type in ("identifier", "required_parameter", "optional_parameter", "rest_parameter"):
|
|
324
|
+
param_info = {
|
|
325
|
+
"name": None,
|
|
326
|
+
"type": None,
|
|
327
|
+
"default": None
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
# Extract parameter details
|
|
331
|
+
if param_node.type == "identifier":
|
|
332
|
+
param_info["name"] = self._get_node_text(param_node)
|
|
333
|
+
else:
|
|
334
|
+
# TypeScript typed parameters
|
|
335
|
+
for subchild in param_node.children:
|
|
336
|
+
if subchild.type == "identifier":
|
|
337
|
+
param_info["name"] = self._get_node_text(subchild)
|
|
338
|
+
elif subchild.type == "type_annotation":
|
|
339
|
+
param_info["type"] = self._get_node_text(subchild)
|
|
340
|
+
elif "default" in subchild.type or subchild.type == "number":
|
|
341
|
+
param_info["default"] = self._get_node_text(subchild)
|
|
342
|
+
|
|
343
|
+
if param_info["name"] and param_info["name"] not in ("(", ")", ",", "..."):
|
|
344
|
+
# Clean up rest parameters
|
|
345
|
+
if param_info["name"].startswith("..."):
|
|
346
|
+
param_info["name"] = param_info["name"][3:]
|
|
347
|
+
param_info["rest"] = True
|
|
348
|
+
parameters.append(param_info)
|
|
349
|
+
|
|
350
|
+
return parameters
|
|
351
|
+
|
|
352
|
+
def _extract_decorators_from_node(self, node) -> list[str]:
|
|
353
|
+
"""Extract decorators from TypeScript node."""
|
|
354
|
+
decorators = []
|
|
355
|
+
|
|
356
|
+
for child in node.children:
|
|
357
|
+
if child.type == "decorator":
|
|
358
|
+
decorators.append(self._get_node_text(child))
|
|
359
|
+
|
|
360
|
+
return decorators
|
|
361
|
+
|
|
362
|
+
def _extract_jsdoc_from_node(self, node, lines: list[str]) -> str | None:
|
|
363
|
+
"""Extract JSDoc comment from before a node."""
|
|
364
|
+
start_line = node.start_point[0]
|
|
365
|
+
return self._extract_jsdoc(lines, start_line + 1)
|
|
35
366
|
|
|
36
367
|
async def _regex_parse(self, content: str, file_path: Path) -> list[CodeChunk]:
|
|
37
368
|
"""Parse JavaScript/TypeScript using regex patterns."""
|
|
@@ -262,3 +593,20 @@ class TypeScriptParser(JavaScriptParser):
|
|
|
262
593
|
def __init__(self) -> None:
|
|
263
594
|
"""Initialize TypeScript parser."""
|
|
264
595
|
super().__init__("typescript")
|
|
596
|
+
|
|
597
|
+
def _initialize_parser(self) -> None:
|
|
598
|
+
"""Initialize Tree-sitter parser for TypeScript."""
|
|
599
|
+
try:
|
|
600
|
+
from tree_sitter_language_pack import get_language, get_parser
|
|
601
|
+
|
|
602
|
+
self._language = get_language("typescript")
|
|
603
|
+
self._parser = get_parser("typescript")
|
|
604
|
+
|
|
605
|
+
logger.debug(
|
|
606
|
+
"TypeScript Tree-sitter parser initialized via tree-sitter-language-pack"
|
|
607
|
+
)
|
|
608
|
+
self._use_tree_sitter = True
|
|
609
|
+
return
|
|
610
|
+
except Exception as e:
|
|
611
|
+
logger.debug(f"tree-sitter-language-pack failed: {e}, using regex fallback")
|
|
612
|
+
self._use_tree_sitter = False
|