codexa 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. codexa-0.4.0.dist-info/METADATA +650 -0
  2. codexa-0.4.0.dist-info/RECORD +189 -0
  3. codexa-0.4.0.dist-info/WHEEL +5 -0
  4. codexa-0.4.0.dist-info/entry_points.txt +2 -0
  5. codexa-0.4.0.dist-info/licenses/LICENSE +21 -0
  6. codexa-0.4.0.dist-info/top_level.txt +1 -0
  7. semantic_code_intelligence/__init__.py +5 -0
  8. semantic_code_intelligence/analysis/__init__.py +21 -0
  9. semantic_code_intelligence/analysis/ai_features.py +351 -0
  10. semantic_code_intelligence/bridge/__init__.py +28 -0
  11. semantic_code_intelligence/bridge/context_provider.py +245 -0
  12. semantic_code_intelligence/bridge/protocol.py +167 -0
  13. semantic_code_intelligence/bridge/server.py +348 -0
  14. semantic_code_intelligence/bridge/vscode.py +271 -0
  15. semantic_code_intelligence/ci/__init__.py +13 -0
  16. semantic_code_intelligence/ci/hooks.py +98 -0
  17. semantic_code_intelligence/ci/hotspots.py +272 -0
  18. semantic_code_intelligence/ci/impact.py +246 -0
  19. semantic_code_intelligence/ci/metrics.py +591 -0
  20. semantic_code_intelligence/ci/pr.py +412 -0
  21. semantic_code_intelligence/ci/quality.py +557 -0
  22. semantic_code_intelligence/ci/templates.py +164 -0
  23. semantic_code_intelligence/ci/trace.py +224 -0
  24. semantic_code_intelligence/cli/__init__.py +0 -0
  25. semantic_code_intelligence/cli/commands/__init__.py +0 -0
  26. semantic_code_intelligence/cli/commands/ask_cmd.py +153 -0
  27. semantic_code_intelligence/cli/commands/benchmark_cmd.py +303 -0
  28. semantic_code_intelligence/cli/commands/chat_cmd.py +252 -0
  29. semantic_code_intelligence/cli/commands/ci_gen_cmd.py +74 -0
  30. semantic_code_intelligence/cli/commands/context_cmd.py +120 -0
  31. semantic_code_intelligence/cli/commands/cross_refactor_cmd.py +113 -0
  32. semantic_code_intelligence/cli/commands/deps_cmd.py +91 -0
  33. semantic_code_intelligence/cli/commands/docs_cmd.py +101 -0
  34. semantic_code_intelligence/cli/commands/doctor_cmd.py +147 -0
  35. semantic_code_intelligence/cli/commands/evolve_cmd.py +171 -0
  36. semantic_code_intelligence/cli/commands/explain_cmd.py +112 -0
  37. semantic_code_intelligence/cli/commands/gate_cmd.py +135 -0
  38. semantic_code_intelligence/cli/commands/grep_cmd.py +234 -0
  39. semantic_code_intelligence/cli/commands/hotspots_cmd.py +119 -0
  40. semantic_code_intelligence/cli/commands/impact_cmd.py +131 -0
  41. semantic_code_intelligence/cli/commands/index_cmd.py +138 -0
  42. semantic_code_intelligence/cli/commands/init_cmd.py +152 -0
  43. semantic_code_intelligence/cli/commands/investigate_cmd.py +163 -0
  44. semantic_code_intelligence/cli/commands/languages_cmd.py +101 -0
  45. semantic_code_intelligence/cli/commands/lsp_cmd.py +49 -0
  46. semantic_code_intelligence/cli/commands/mcp_cmd.py +50 -0
  47. semantic_code_intelligence/cli/commands/metrics_cmd.py +264 -0
  48. semantic_code_intelligence/cli/commands/models_cmd.py +157 -0
  49. semantic_code_intelligence/cli/commands/plugin_cmd.py +275 -0
  50. semantic_code_intelligence/cli/commands/pr_summary_cmd.py +178 -0
  51. semantic_code_intelligence/cli/commands/quality_cmd.py +208 -0
  52. semantic_code_intelligence/cli/commands/refactor_cmd.py +103 -0
  53. semantic_code_intelligence/cli/commands/review_cmd.py +88 -0
  54. semantic_code_intelligence/cli/commands/search_cmd.py +236 -0
  55. semantic_code_intelligence/cli/commands/serve_cmd.py +117 -0
  56. semantic_code_intelligence/cli/commands/suggest_cmd.py +100 -0
  57. semantic_code_intelligence/cli/commands/summary_cmd.py +78 -0
  58. semantic_code_intelligence/cli/commands/tool_cmd.py +282 -0
  59. semantic_code_intelligence/cli/commands/trace_cmd.py +123 -0
  60. semantic_code_intelligence/cli/commands/tui_cmd.py +58 -0
  61. semantic_code_intelligence/cli/commands/viz_cmd.py +127 -0
  62. semantic_code_intelligence/cli/commands/watch_cmd.py +72 -0
  63. semantic_code_intelligence/cli/commands/web_cmd.py +61 -0
  64. semantic_code_intelligence/cli/commands/workspace_cmd.py +250 -0
  65. semantic_code_intelligence/cli/main.py +65 -0
  66. semantic_code_intelligence/cli/router.py +92 -0
  67. semantic_code_intelligence/config/__init__.py +0 -0
  68. semantic_code_intelligence/config/settings.py +260 -0
  69. semantic_code_intelligence/context/__init__.py +19 -0
  70. semantic_code_intelligence/context/engine.py +429 -0
  71. semantic_code_intelligence/context/memory.py +253 -0
  72. semantic_code_intelligence/daemon/__init__.py +1 -0
  73. semantic_code_intelligence/daemon/watcher.py +515 -0
  74. semantic_code_intelligence/docs/__init__.py +1080 -0
  75. semantic_code_intelligence/embeddings/__init__.py +0 -0
  76. semantic_code_intelligence/embeddings/enhanced.py +131 -0
  77. semantic_code_intelligence/embeddings/generator.py +149 -0
  78. semantic_code_intelligence/embeddings/model_registry.py +100 -0
  79. semantic_code_intelligence/evolution/__init__.py +1 -0
  80. semantic_code_intelligence/evolution/budget_guard.py +111 -0
  81. semantic_code_intelligence/evolution/commit_manager.py +88 -0
  82. semantic_code_intelligence/evolution/context_builder.py +131 -0
  83. semantic_code_intelligence/evolution/engine.py +249 -0
  84. semantic_code_intelligence/evolution/patch_generator.py +229 -0
  85. semantic_code_intelligence/evolution/task_selector.py +214 -0
  86. semantic_code_intelligence/evolution/test_runner.py +111 -0
  87. semantic_code_intelligence/indexing/__init__.py +0 -0
  88. semantic_code_intelligence/indexing/chunker.py +174 -0
  89. semantic_code_intelligence/indexing/parallel.py +86 -0
  90. semantic_code_intelligence/indexing/scanner.py +146 -0
  91. semantic_code_intelligence/indexing/semantic_chunker.py +337 -0
  92. semantic_code_intelligence/llm/__init__.py +62 -0
  93. semantic_code_intelligence/llm/cache.py +219 -0
  94. semantic_code_intelligence/llm/cached_provider.py +145 -0
  95. semantic_code_intelligence/llm/conversation.py +190 -0
  96. semantic_code_intelligence/llm/cross_refactor.py +272 -0
  97. semantic_code_intelligence/llm/investigation.py +274 -0
  98. semantic_code_intelligence/llm/mock_provider.py +77 -0
  99. semantic_code_intelligence/llm/ollama_provider.py +122 -0
  100. semantic_code_intelligence/llm/openai_provider.py +100 -0
  101. semantic_code_intelligence/llm/provider.py +92 -0
  102. semantic_code_intelligence/llm/rate_limiter.py +164 -0
  103. semantic_code_intelligence/llm/reasoning.py +438 -0
  104. semantic_code_intelligence/llm/safety.py +110 -0
  105. semantic_code_intelligence/llm/streaming.py +251 -0
  106. semantic_code_intelligence/lsp/__init__.py +609 -0
  107. semantic_code_intelligence/mcp/__init__.py +393 -0
  108. semantic_code_intelligence/parsing/__init__.py +19 -0
  109. semantic_code_intelligence/parsing/parser.py +375 -0
  110. semantic_code_intelligence/plugins/__init__.py +255 -0
  111. semantic_code_intelligence/plugins/examples/__init__.py +1 -0
  112. semantic_code_intelligence/plugins/examples/code_quality.py +73 -0
  113. semantic_code_intelligence/plugins/examples/search_annotator.py +56 -0
  114. semantic_code_intelligence/scalability/__init__.py +205 -0
  115. semantic_code_intelligence/search/__init__.py +0 -0
  116. semantic_code_intelligence/search/formatter.py +123 -0
  117. semantic_code_intelligence/search/grep.py +361 -0
  118. semantic_code_intelligence/search/hybrid_search.py +170 -0
  119. semantic_code_intelligence/search/keyword_search.py +311 -0
  120. semantic_code_intelligence/search/section_expander.py +103 -0
  121. semantic_code_intelligence/services/__init__.py +0 -0
  122. semantic_code_intelligence/services/indexing_service.py +630 -0
  123. semantic_code_intelligence/services/search_service.py +269 -0
  124. semantic_code_intelligence/storage/__init__.py +0 -0
  125. semantic_code_intelligence/storage/chunk_hash_store.py +86 -0
  126. semantic_code_intelligence/storage/hash_store.py +66 -0
  127. semantic_code_intelligence/storage/index_manifest.py +85 -0
  128. semantic_code_intelligence/storage/index_stats.py +138 -0
  129. semantic_code_intelligence/storage/query_history.py +160 -0
  130. semantic_code_intelligence/storage/symbol_registry.py +209 -0
  131. semantic_code_intelligence/storage/vector_store.py +297 -0
  132. semantic_code_intelligence/tests/__init__.py +0 -0
  133. semantic_code_intelligence/tests/test_ai_features.py +351 -0
  134. semantic_code_intelligence/tests/test_chunker.py +119 -0
  135. semantic_code_intelligence/tests/test_cli.py +188 -0
  136. semantic_code_intelligence/tests/test_config.py +154 -0
  137. semantic_code_intelligence/tests/test_context.py +381 -0
  138. semantic_code_intelligence/tests/test_embeddings.py +73 -0
  139. semantic_code_intelligence/tests/test_endtoend.py +1142 -0
  140. semantic_code_intelligence/tests/test_enhanced_embeddings.py +92 -0
  141. semantic_code_intelligence/tests/test_hash_store.py +79 -0
  142. semantic_code_intelligence/tests/test_logging.py +55 -0
  143. semantic_code_intelligence/tests/test_new_cli.py +138 -0
  144. semantic_code_intelligence/tests/test_parser.py +495 -0
  145. semantic_code_intelligence/tests/test_phase10.py +355 -0
  146. semantic_code_intelligence/tests/test_phase11.py +593 -0
  147. semantic_code_intelligence/tests/test_phase12.py +375 -0
  148. semantic_code_intelligence/tests/test_phase13.py +663 -0
  149. semantic_code_intelligence/tests/test_phase14.py +568 -0
  150. semantic_code_intelligence/tests/test_phase15.py +814 -0
  151. semantic_code_intelligence/tests/test_phase16.py +792 -0
  152. semantic_code_intelligence/tests/test_phase17.py +815 -0
  153. semantic_code_intelligence/tests/test_phase18.py +934 -0
  154. semantic_code_intelligence/tests/test_phase19.py +986 -0
  155. semantic_code_intelligence/tests/test_phase20.py +2753 -0
  156. semantic_code_intelligence/tests/test_phase20b.py +2058 -0
  157. semantic_code_intelligence/tests/test_phase20c.py +962 -0
  158. semantic_code_intelligence/tests/test_phase21.py +428 -0
  159. semantic_code_intelligence/tests/test_phase22.py +799 -0
  160. semantic_code_intelligence/tests/test_phase23.py +783 -0
  161. semantic_code_intelligence/tests/test_phase24.py +715 -0
  162. semantic_code_intelligence/tests/test_phase25.py +496 -0
  163. semantic_code_intelligence/tests/test_phase26.py +251 -0
  164. semantic_code_intelligence/tests/test_phase27.py +531 -0
  165. semantic_code_intelligence/tests/test_phase8.py +592 -0
  166. semantic_code_intelligence/tests/test_phase9.py +643 -0
  167. semantic_code_intelligence/tests/test_plugins.py +293 -0
  168. semantic_code_intelligence/tests/test_priority_features.py +727 -0
  169. semantic_code_intelligence/tests/test_router.py +41 -0
  170. semantic_code_intelligence/tests/test_scalability.py +138 -0
  171. semantic_code_intelligence/tests/test_scanner.py +125 -0
  172. semantic_code_intelligence/tests/test_search.py +160 -0
  173. semantic_code_intelligence/tests/test_semantic_chunker.py +255 -0
  174. semantic_code_intelligence/tests/test_tools.py +182 -0
  175. semantic_code_intelligence/tests/test_vector_store.py +151 -0
  176. semantic_code_intelligence/tests/test_watcher.py +211 -0
  177. semantic_code_intelligence/tools/__init__.py +442 -0
  178. semantic_code_intelligence/tools/executor.py +232 -0
  179. semantic_code_intelligence/tools/protocol.py +200 -0
  180. semantic_code_intelligence/tui/__init__.py +454 -0
  181. semantic_code_intelligence/utils/__init__.py +0 -0
  182. semantic_code_intelligence/utils/logging.py +112 -0
  183. semantic_code_intelligence/version.py +3 -0
  184. semantic_code_intelligence/web/__init__.py +11 -0
  185. semantic_code_intelligence/web/api.py +289 -0
  186. semantic_code_intelligence/web/server.py +397 -0
  187. semantic_code_intelligence/web/ui.py +659 -0
  188. semantic_code_intelligence/web/visualize.py +226 -0
  189. semantic_code_intelligence/workspace/__init__.py +427 -0
@@ -0,0 +1,375 @@
1
+ """Code parser — uses tree-sitter to extract functions, classes, and symbols."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ import tree_sitter
10
+
11
+ from semantic_code_intelligence.utils.logging import get_logger
12
+
13
+ logger = get_logger("parsing")
14
+
15
+ # Grammar modules for supported languages
16
+ _LANGUAGE_MODULES: dict[str, str] = {
17
+ "python": "tree_sitter_python",
18
+ "javascript": "tree_sitter_javascript",
19
+ "typescript": "tree_sitter_typescript",
20
+ "tsx": "tree_sitter_typescript",
21
+ "java": "tree_sitter_java",
22
+ "go": "tree_sitter_go",
23
+ "rust": "tree_sitter_rust",
24
+ "cpp": "tree_sitter_cpp",
25
+ "csharp": "tree_sitter_c_sharp",
26
+ "ruby": "tree_sitter_ruby",
27
+ "php": "tree_sitter_php",
28
+ }
29
+
30
+ # Languages that require a special factory function name (not just `language()`)
31
+ _LANGUAGE_FACTORY: dict[str, str] = {
32
+ "typescript": "language_typescript",
33
+ "tsx": "language_tsx",
34
+ "php": "language_php",
35
+ }
36
+
37
+ # Extension to language mapping
38
+ EXTENSION_TO_LANGUAGE: dict[str, str] = {
39
+ ".py": "python",
40
+ ".js": "javascript",
41
+ ".jsx": "javascript",
42
+ ".ts": "typescript",
43
+ ".tsx": "tsx",
44
+ ".java": "java",
45
+ ".go": "go",
46
+ ".rs": "rust",
47
+ ".cpp": "cpp",
48
+ ".cc": "cpp",
49
+ ".hpp": "cpp",
50
+ ".h": "cpp",
51
+ ".cs": "csharp",
52
+ ".rb": "ruby",
53
+ ".php": "php",
54
+ }
55
+
56
+ # Node types that represent function definitions per language
57
+ FUNCTION_NODE_TYPES: dict[str, set[str]] = {
58
+ "python": {"function_definition"},
59
+ "javascript": {"function_declaration", "arrow_function", "method_definition"},
60
+ "typescript": {"function_declaration", "arrow_function", "method_definition"},
61
+ "tsx": {"function_declaration", "arrow_function", "method_definition"},
62
+ "java": {"method_declaration", "constructor_declaration"},
63
+ "go": {"function_declaration", "method_declaration"},
64
+ "rust": {"function_item"},
65
+ "cpp": {"function_definition"},
66
+ "csharp": {"method_declaration", "constructor_declaration"},
67
+ "ruby": {"method", "singleton_method"},
68
+ "php": {"function_definition", "method_declaration"},
69
+ }
70
+
71
+ # Node types that represent class/struct definitions per language
72
+ CLASS_NODE_TYPES: dict[str, set[str]] = {
73
+ "python": {"class_definition"},
74
+ "javascript": {"class_declaration"},
75
+ "typescript": {"class_declaration", "interface_declaration", "enum_declaration"},
76
+ "tsx": {"class_declaration", "interface_declaration", "enum_declaration"},
77
+ "java": {"class_declaration", "interface_declaration", "enum_declaration"},
78
+ "go": {"type_declaration"},
79
+ "rust": {"struct_item", "enum_item", "impl_item", "trait_item"},
80
+ "cpp": {"class_specifier", "struct_specifier", "enum_specifier"},
81
+ "csharp": {"class_declaration", "interface_declaration", "struct_declaration", "enum_declaration"},
82
+ "ruby": {"class", "module"},
83
+ "php": {"class_declaration", "interface_declaration", "trait_declaration", "enum_declaration"},
84
+ }
85
+
86
+ # Node types for import statements
87
+ IMPORT_NODE_TYPES: dict[str, set[str]] = {
88
+ "python": {"import_statement", "import_from_statement"},
89
+ "javascript": {"import_statement"},
90
+ "typescript": {"import_statement"},
91
+ "tsx": {"import_statement"},
92
+ "java": {"import_declaration"},
93
+ "go": {"import_declaration"},
94
+ "rust": {"use_declaration"},
95
+ "cpp": {"preproc_include"},
96
+ "csharp": {"using_directive"},
97
+ "ruby": {"call"}, # require/require_relative detected via name filter
98
+ "php": {"namespace_use_declaration"},
99
+ }
100
+
101
+ # Cache for loaded languages
102
+ _language_cache: dict[str, tree_sitter.Language] = {}
103
+
104
+
105
+ @dataclass
106
+ class Symbol:
107
+ """A parsed symbol (function, class, method, etc.)."""
108
+
109
+ name: str
110
+ kind: str # "function", "class", "method", "import"
111
+ file_path: str
112
+ start_line: int
113
+ end_line: int
114
+ start_col: int
115
+ end_col: int
116
+ body: str
117
+ parent: str | None = None # Parent class name for methods
118
+ parameters: list[str] = field(default_factory=list)
119
+ decorators: list[str] = field(default_factory=list)
120
+
121
+ def to_dict(self) -> dict[str, Any]:
122
+ """Convert to a JSON-serializable dictionary."""
123
+ return {
124
+ "name": self.name,
125
+ "kind": self.kind,
126
+ "file_path": self.file_path,
127
+ "start_line": self.start_line,
128
+ "end_line": self.end_line,
129
+ "body": self.body,
130
+ "parent": self.parent,
131
+ "parameters": self.parameters,
132
+ "decorators": self.decorators,
133
+ }
134
+
135
+
136
+ def get_language(lang_name: str) -> tree_sitter.Language | None:
137
+ """Load and cache a tree-sitter Language for the given language name.
138
+
139
+ Args:
140
+ lang_name: Language name (e.g. 'python', 'javascript').
141
+
142
+ Returns:
143
+ A tree_sitter.Language instance, or None if unsupported.
144
+ """
145
+ if lang_name in _language_cache:
146
+ return _language_cache[lang_name]
147
+
148
+ module_name = _LANGUAGE_MODULES.get(lang_name)
149
+ if module_name is None:
150
+ return None
151
+
152
+ try:
153
+ import importlib
154
+ mod = importlib.import_module(module_name)
155
+ factory_name = _LANGUAGE_FACTORY.get(lang_name, "language")
156
+ factory = getattr(mod, factory_name)
157
+ lang = tree_sitter.Language(factory())
158
+ _language_cache[lang_name] = lang
159
+ return lang
160
+ except (ImportError, AttributeError, Exception) as e:
161
+ logger.warning("Failed to load tree-sitter grammar for %s: %s", lang_name, e)
162
+ return None
163
+
164
+
165
+ def detect_language(file_path: str) -> str | None:
166
+ """Detect language from file extension.
167
+
168
+ Returns None if the language is not supported by tree-sitter.
169
+ """
170
+ ext = Path(file_path).suffix.lower()
171
+ return EXTENSION_TO_LANGUAGE.get(ext)
172
+
173
+
174
+ def _get_node_text(node: tree_sitter.Node, source: bytes) -> str:
175
+ """Extract the text of a tree-sitter node."""
176
+ return source[node.start_byte:node.end_byte].decode("utf-8", errors="replace")
177
+
178
+
179
+ def _find_name(node: tree_sitter.Node, source: bytes) -> str:
180
+ """Find the name identifier within a definition node."""
181
+ _NAME_TYPES = {
182
+ "identifier", "property_identifier", "type_identifier",
183
+ "field_identifier", "constant", "name", # constant=Ruby, name=PHP
184
+ }
185
+ for child in node.children:
186
+ if child.type in _NAME_TYPES:
187
+ return _get_node_text(child, source)
188
+ # C++: name may be inside a declarator child (e.g. function_declarator)
189
+ for child in node.children:
190
+ if child.type.endswith("_declarator"):
191
+ for sub in child.children:
192
+ if sub.type in _NAME_TYPES:
193
+ return _get_node_text(sub, source)
194
+ return "<anonymous>"
195
+
196
+
197
+ def _find_parameters(node: tree_sitter.Node, source: bytes) -> list[str]:
198
+ """Extract parameter names from a function definition node."""
199
+ params: list[str] = []
200
+ for child in node.children:
201
+ if child.type in ("parameters", "formal_parameters", "parameter_list"):
202
+ for param in child.children:
203
+ if param.type in ("identifier", "typed_parameter", "typed_default_parameter"):
204
+ # For typed params, get just the name
205
+ name_node = param.child_by_field_name("name") or param
206
+ for sub in [name_node] if name_node.type == "identifier" else name_node.children:
207
+ if sub.type == "identifier":
208
+ params.append(_get_node_text(sub, source))
209
+ break
210
+ elif param.type == "parameter":
211
+ for sub in param.children:
212
+ if sub.type == "identifier":
213
+ params.append(_get_node_text(sub, source))
214
+ break
215
+ return params
216
+
217
+
218
+ def _find_decorators(node: tree_sitter.Node, source: bytes) -> list[str]:
219
+ """Extract decorator names from a definition node (Python)."""
220
+ decorators: list[str] = []
221
+ if node.prev_named_sibling and node.prev_named_sibling.type == "decorator":
222
+ decorators.append(_get_node_text(node.prev_named_sibling, source).strip())
223
+ # Also check for decorated_definition parent
224
+ if node.parent and node.parent.type == "decorated_definition":
225
+ for child in node.parent.children:
226
+ if child.type == "decorator":
227
+ decorators.append(_get_node_text(child, source).strip())
228
+ return decorators
229
+
230
+
231
+ def _extract_symbols_recursive(
232
+ node: tree_sitter.Node,
233
+ source: bytes,
234
+ file_path: str,
235
+ language: str,
236
+ parent_class: str | None = None,
237
+ ) -> list[Symbol]:
238
+ """Recursively walk the AST and extract symbols."""
239
+ symbols: list[Symbol] = []
240
+ func_types = FUNCTION_NODE_TYPES.get(language, set())
241
+ class_types = CLASS_NODE_TYPES.get(language, set())
242
+ import_types = IMPORT_NODE_TYPES.get(language, set())
243
+
244
+ for child in node.children:
245
+ # Handle decorated definitions (Python wraps func/class in decorated_definition)
246
+ actual = child
247
+ if child.type == "decorated_definition":
248
+ for sub in child.children:
249
+ if sub.type in func_types or sub.type in class_types:
250
+ actual = sub
251
+ break
252
+
253
+ if actual.type in func_types:
254
+ name = _find_name(actual, source)
255
+ # Some languages have dedicated method node types (e.g. Go method_declaration,
256
+ # JS method_definition) that indicate a method even without a parent class.
257
+ _method_node_types = {
258
+ "method_declaration", "method_definition", "constructor_declaration",
259
+ }
260
+ if parent_class or actual.type in _method_node_types:
261
+ kind = "method"
262
+ else:
263
+ kind = "function"
264
+ symbols.append(
265
+ Symbol(
266
+ name=name,
267
+ kind=kind,
268
+ file_path=file_path,
269
+ start_line=actual.start_point[0] + 1,
270
+ end_line=actual.end_point[0] + 1,
271
+ start_col=actual.start_point[1],
272
+ end_col=actual.end_point[1],
273
+ body=_get_node_text(actual, source),
274
+ parent=parent_class,
275
+ parameters=_find_parameters(actual, source),
276
+ decorators=_find_decorators(actual, source),
277
+ )
278
+ )
279
+
280
+ elif actual.type in class_types:
281
+ name = _find_name(actual, source)
282
+ symbols.append(
283
+ Symbol(
284
+ name=name,
285
+ kind="class",
286
+ file_path=file_path,
287
+ start_line=actual.start_point[0] + 1,
288
+ end_line=actual.end_point[0] + 1,
289
+ start_col=actual.start_point[1],
290
+ end_col=actual.end_point[1],
291
+ body=_get_node_text(actual, source),
292
+ )
293
+ )
294
+ # Recurse into class body for methods
295
+ symbols.extend(
296
+ _extract_symbols_recursive(actual, source, file_path, language, parent_class=name)
297
+ )
298
+ continue # already recursed into children
299
+
300
+ elif actual.type in import_types:
301
+ text = _get_node_text(actual, source).strip()
302
+ # Ruby: only treat require/require_relative calls as imports
303
+ if language == "ruby" and actual.type == "call":
304
+ method_name = _find_name(actual, source)
305
+ if method_name not in ("require", "require_relative"):
306
+ symbols.extend(
307
+ _extract_symbols_recursive(child, source, file_path, language, parent_class)
308
+ )
309
+ continue
310
+ symbols.append(
311
+ Symbol(
312
+ name=text,
313
+ kind="import",
314
+ file_path=file_path,
315
+ start_line=actual.start_point[0] + 1,
316
+ end_line=actual.end_point[0] + 1,
317
+ start_col=actual.start_point[1],
318
+ end_col=actual.end_point[1],
319
+ body=_get_node_text(actual, source),
320
+ )
321
+ )
322
+
323
+ # Recurse into children
324
+ symbols.extend(
325
+ _extract_symbols_recursive(child, source, file_path, language, parent_class)
326
+ )
327
+
328
+ return symbols
329
+
330
+
331
+ def parse_file(file_path: str | Path, content: str | None = None) -> list[Symbol]:
332
+ """Parse a source file and extract all symbols.
333
+
334
+ Args:
335
+ file_path: Path to the source file.
336
+ content: Optional file content. If None, reads from disk.
337
+
338
+ Returns:
339
+ List of extracted Symbol objects.
340
+ """
341
+ file_path = str(file_path)
342
+ lang_name = detect_language(file_path)
343
+ if lang_name is None:
344
+ return []
345
+
346
+ language = get_language(lang_name)
347
+ if language is None:
348
+ return []
349
+
350
+ if content is None:
351
+ try:
352
+ content = Path(file_path).read_text(encoding="utf-8", errors="replace")
353
+ except (OSError, PermissionError):
354
+ return []
355
+
356
+ source = content.encode("utf-8")
357
+ parser = tree_sitter.Parser(language)
358
+ tree = parser.parse(source)
359
+
360
+ return _extract_symbols_recursive(tree.root_node, source, file_path, lang_name)
361
+
362
+
363
+ def extract_functions(file_path: str | Path, content: str | None = None) -> list[Symbol]:
364
+ """Extract only function and method symbols from a file."""
365
+ return [s for s in parse_file(file_path, content) if s.kind in ("function", "method")]
366
+
367
+
368
+ def extract_classes(file_path: str | Path, content: str | None = None) -> list[Symbol]:
369
+ """Extract only class symbols from a file."""
370
+ return [s for s in parse_file(file_path, content) if s.kind == "class"]
371
+
372
+
373
+ def extract_imports(file_path: str | Path, content: str | None = None) -> list[Symbol]:
374
+ """Extract only import symbols from a file."""
375
+ return [s for s in parse_file(file_path, content) if s.kind == "import"]
@@ -0,0 +1,255 @@
1
+ """Plugin architecture SDK — extensible hook system for CodexA.
2
+
3
+ Provides:
4
+ - PluginBase: abstract base class for plugins
5
+ - PluginHook: enumeration of hook points
6
+ - PluginManager: discovery, registration, and lifecycle management
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import importlib
12
+ import importlib.util
13
+ from abc import ABC, abstractmethod
14
+ from dataclasses import dataclass, field
15
+ from enum import Enum
16
+ from pathlib import Path
17
+ from typing import Any
18
+
19
+ from semantic_code_intelligence.utils.logging import get_logger
20
+
21
+ logger = get_logger("plugins")
22
+
23
+
24
+ # ---------------------------------------------------------------------------
25
+ # Hook Points
26
+ # ---------------------------------------------------------------------------
27
+
28
+ class PluginHook(str, Enum):
29
+ """Available hook points in the CodexA pipeline."""
30
+
31
+ # Indexing hooks
32
+ PRE_INDEX = "pre_index"
33
+ POST_INDEX = "post_index"
34
+ ON_CHUNK = "on_chunk"
35
+
36
+ # Search hooks
37
+ PRE_SEARCH = "pre_search"
38
+ POST_SEARCH = "post_search"
39
+
40
+ # Analysis hooks
41
+ PRE_ANALYSIS = "pre_analysis"
42
+ POST_ANALYSIS = "post_analysis"
43
+
44
+ # AI hooks
45
+ PRE_AI = "pre_ai"
46
+ POST_AI = "post_ai"
47
+
48
+ # File event hooks
49
+ ON_FILE_CHANGE = "on_file_change"
50
+
51
+ # Streaming hooks (Phase 12)
52
+ ON_STREAM = "on_stream" # fired for streaming LLM token chunks
53
+
54
+ # Validation hooks (Phase 12)
55
+ CUSTOM_VALIDATION = "custom_validation" # user-defined code validation rules
56
+
57
+ # Workflow intelligence hooks (Phase 18)
58
+ PRE_HOTSPOT_ANALYSIS = "pre_hotspot_analysis"
59
+ POST_HOTSPOT_ANALYSIS = "post_hotspot_analysis"
60
+ PRE_IMPACT_ANALYSIS = "pre_impact_analysis"
61
+ POST_IMPACT_ANALYSIS = "post_impact_analysis"
62
+ PRE_TRACE = "pre_trace"
63
+ POST_TRACE = "post_trace"
64
+
65
+ # AI Agent Tool Protocol hooks (Phase 19)
66
+ REGISTER_TOOL = "register_tool"
67
+ PRE_TOOL_INVOKE = "pre_tool_invoke"
68
+ POST_TOOL_INVOKE = "post_tool_invoke"
69
+
70
+ # Custom hooks
71
+ CUSTOM = "custom"
72
+
73
+
74
+ # ---------------------------------------------------------------------------
75
+ # Plugin Metadata
76
+ # ---------------------------------------------------------------------------
77
+
78
+ @dataclass
79
+ class PluginMetadata:
80
+ """Metadata describing a plugin."""
81
+
82
+ name: str
83
+ version: str = "0.1.0"
84
+ description: str = ""
85
+ author: str = ""
86
+ hooks: list[PluginHook] = field(default_factory=list)
87
+
88
+ def to_dict(self) -> dict[str, Any]:
89
+ return {
90
+ "name": self.name,
91
+ "version": self.version,
92
+ "description": self.description,
93
+ "author": self.author,
94
+ "hooks": [h.value for h in self.hooks],
95
+ }
96
+
97
+
98
+ # ---------------------------------------------------------------------------
99
+ # Plugin Base Class
100
+ # ---------------------------------------------------------------------------
101
+
102
+ class PluginBase(ABC):
103
+ """Abstract base class for CodexA plugins.
104
+
105
+ Subclass this and implement the required methods to create a plugin.
106
+ """
107
+
108
+ @abstractmethod
109
+ def metadata(self) -> PluginMetadata:
110
+ """Return metadata for this plugin."""
111
+ ...
112
+
113
+ def activate(self, context: dict[str, Any]) -> None:
114
+ """Called when the plugin is activated. Override for setup logic."""
115
+
116
+ def deactivate(self) -> None:
117
+ """Called when the plugin is deactivated. Override for cleanup logic."""
118
+
119
+ def on_hook(self, hook: PluginHook, data: dict[str, Any]) -> dict[str, Any]:
120
+ """Called when a registered hook fires.
121
+
122
+ Args:
123
+ hook: The hook that fired.
124
+ data: Hook-specific data dict. Plugin may modify and return it.
125
+
126
+ Returns:
127
+ Possibly modified data dict (passed to next plugin in chain).
128
+ """
129
+ return data
130
+
131
+
132
+ # ---------------------------------------------------------------------------
133
+ # Plugin Manager
134
+ # ---------------------------------------------------------------------------
135
+
136
+ class PluginManager:
137
+ """Manages plugin discovery, registration, and hook dispatch."""
138
+
139
+ def __init__(self) -> None:
140
+ self._plugins: dict[str, PluginBase] = {}
141
+ self._hook_registry: dict[PluginHook, list[str]] = {h: [] for h in PluginHook}
142
+ self._active: set[str] = set()
143
+
144
+ @property
145
+ def registered_plugins(self) -> list[str]:
146
+ return list(self._plugins.keys())
147
+
148
+ @property
149
+ def active_plugins(self) -> list[str]:
150
+ return list(self._active)
151
+
152
+ def register(self, plugin: PluginBase) -> None:
153
+ """Register a plugin instance."""
154
+ meta = plugin.metadata()
155
+ if meta.name in self._plugins:
156
+ logger.warning("Plugin '%s' already registered; replacing.", meta.name)
157
+ self._plugins[meta.name] = plugin
158
+ for hook in meta.hooks:
159
+ if meta.name not in self._hook_registry[hook]:
160
+ self._hook_registry[hook].append(meta.name)
161
+ logger.info("Registered plugin: %s v%s", meta.name, meta.version)
162
+
163
+ def unregister(self, name: str) -> None:
164
+ """Unregister a plugin by name."""
165
+ if name in self._active:
166
+ self.deactivate(name)
167
+ if name in self._plugins:
168
+ # Remove from hook registry
169
+ for hook in self._hook_registry:
170
+ if name in self._hook_registry[hook]:
171
+ self._hook_registry[hook].remove(name)
172
+ del self._plugins[name]
173
+ logger.info("Unregistered plugin: %s", name)
174
+
175
+ def activate(self, name: str, context: dict[str, Any] | None = None) -> None:
176
+ """Activate a registered plugin."""
177
+ plugin = self._plugins.get(name)
178
+ if plugin is None:
179
+ raise ValueError(f"Plugin '{name}' is not registered.")
180
+ plugin.activate(context or {})
181
+ self._active.add(name)
182
+ logger.info("Activated plugin: %s", name)
183
+
184
+ def deactivate(self, name: str) -> None:
185
+ """Deactivate a plugin."""
186
+ plugin = self._plugins.get(name)
187
+ if plugin and name in self._active:
188
+ plugin.deactivate()
189
+ self._active.discard(name)
190
+ logger.info("Deactivated plugin: %s", name)
191
+
192
+ def dispatch(self, hook: PluginHook, data: dict[str, Any]) -> dict[str, Any]:
193
+ """Dispatch a hook to all active plugins registered for it.
194
+
195
+ Plugins are called in registration order. Each plugin receives
196
+ the data dict (possibly modified by the previous plugin).
197
+
198
+ Returns:
199
+ The final data dict after all plugins have processed it.
200
+ """
201
+ for name in self._hook_registry.get(hook, []):
202
+ if name not in self._active:
203
+ continue
204
+ plugin = self._plugins[name]
205
+ try:
206
+ data = plugin.on_hook(hook, data)
207
+ except Exception:
208
+ logger.exception("Plugin '%s' error on hook '%s'", name, hook.value)
209
+ return data
210
+
211
+ def get_plugin_info(self, name: str) -> dict[str, Any] | None:
212
+ """Get metadata for a specific plugin."""
213
+ plugin = self._plugins.get(name)
214
+ if plugin is None:
215
+ return None
216
+ meta = plugin.metadata()
217
+ info = meta.to_dict()
218
+ info["active"] = name in self._active
219
+ return info
220
+
221
+ def discover_from_directory(self, directory: Path) -> int:
222
+ """Discover and register plugins from a directory.
223
+
224
+ Looks for Python files with a `create_plugin()` factory function.
225
+
226
+ Returns:
227
+ Number of plugins discovered.
228
+ """
229
+ count = 0
230
+ if not directory.is_dir():
231
+ return 0
232
+
233
+ for py_file in sorted(directory.glob("*.py")):
234
+ if py_file.name.startswith("_"):
235
+ continue
236
+ try:
237
+ spec = importlib.util.spec_from_file_location(
238
+ f"codex_plugin_{py_file.stem}", py_file
239
+ )
240
+ if spec is None or spec.loader is None:
241
+ continue
242
+ module = importlib.util.module_from_spec(spec)
243
+ spec.loader.exec_module(module)
244
+
245
+ factory = getattr(module, "create_plugin", None)
246
+ if callable(factory):
247
+ plugin = factory()
248
+ if isinstance(plugin, PluginBase):
249
+ self.register(plugin)
250
+ count += 1
251
+ except Exception:
252
+ logger.exception("Failed to load plugin from %s", py_file)
253
+
254
+ logger.info("Discovered %d plugin(s) from %s", count, directory)
255
+ return count
@@ -0,0 +1 @@
1
+ """Sample plugins demonstrating the CodexA Plugin SDK."""