emdash-core 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. emdash_core/__init__.py +3 -0
  2. emdash_core/agent/__init__.py +37 -0
  3. emdash_core/agent/agents.py +225 -0
  4. emdash_core/agent/code_reviewer.py +476 -0
  5. emdash_core/agent/compaction.py +143 -0
  6. emdash_core/agent/context_manager.py +140 -0
  7. emdash_core/agent/events.py +338 -0
  8. emdash_core/agent/handlers.py +224 -0
  9. emdash_core/agent/inprocess_subagent.py +377 -0
  10. emdash_core/agent/mcp/__init__.py +50 -0
  11. emdash_core/agent/mcp/client.py +346 -0
  12. emdash_core/agent/mcp/config.py +302 -0
  13. emdash_core/agent/mcp/manager.py +496 -0
  14. emdash_core/agent/mcp/tool_factory.py +213 -0
  15. emdash_core/agent/prompts/__init__.py +38 -0
  16. emdash_core/agent/prompts/main_agent.py +104 -0
  17. emdash_core/agent/prompts/subagents.py +131 -0
  18. emdash_core/agent/prompts/workflow.py +136 -0
  19. emdash_core/agent/providers/__init__.py +34 -0
  20. emdash_core/agent/providers/base.py +143 -0
  21. emdash_core/agent/providers/factory.py +80 -0
  22. emdash_core/agent/providers/models.py +220 -0
  23. emdash_core/agent/providers/openai_provider.py +463 -0
  24. emdash_core/agent/providers/transformers_provider.py +217 -0
  25. emdash_core/agent/research/__init__.py +81 -0
  26. emdash_core/agent/research/agent.py +143 -0
  27. emdash_core/agent/research/controller.py +254 -0
  28. emdash_core/agent/research/critic.py +428 -0
  29. emdash_core/agent/research/macros.py +469 -0
  30. emdash_core/agent/research/planner.py +449 -0
  31. emdash_core/agent/research/researcher.py +436 -0
  32. emdash_core/agent/research/state.py +523 -0
  33. emdash_core/agent/research/synthesizer.py +594 -0
  34. emdash_core/agent/reviewer_profile.py +475 -0
  35. emdash_core/agent/rules.py +123 -0
  36. emdash_core/agent/runner.py +601 -0
  37. emdash_core/agent/session.py +262 -0
  38. emdash_core/agent/spec_schema.py +66 -0
  39. emdash_core/agent/specification.py +479 -0
  40. emdash_core/agent/subagent.py +397 -0
  41. emdash_core/agent/subagent_prompts.py +13 -0
  42. emdash_core/agent/toolkit.py +482 -0
  43. emdash_core/agent/toolkits/__init__.py +64 -0
  44. emdash_core/agent/toolkits/base.py +96 -0
  45. emdash_core/agent/toolkits/explore.py +47 -0
  46. emdash_core/agent/toolkits/plan.py +55 -0
  47. emdash_core/agent/tools/__init__.py +141 -0
  48. emdash_core/agent/tools/analytics.py +436 -0
  49. emdash_core/agent/tools/base.py +131 -0
  50. emdash_core/agent/tools/coding.py +484 -0
  51. emdash_core/agent/tools/github_mcp.py +592 -0
  52. emdash_core/agent/tools/history.py +13 -0
  53. emdash_core/agent/tools/modes.py +153 -0
  54. emdash_core/agent/tools/plan.py +206 -0
  55. emdash_core/agent/tools/plan_write.py +135 -0
  56. emdash_core/agent/tools/search.py +412 -0
  57. emdash_core/agent/tools/spec.py +341 -0
  58. emdash_core/agent/tools/task.py +262 -0
  59. emdash_core/agent/tools/task_output.py +204 -0
  60. emdash_core/agent/tools/tasks.py +454 -0
  61. emdash_core/agent/tools/traversal.py +588 -0
  62. emdash_core/agent/tools/web.py +179 -0
  63. emdash_core/analytics/__init__.py +5 -0
  64. emdash_core/analytics/engine.py +1286 -0
  65. emdash_core/api/__init__.py +5 -0
  66. emdash_core/api/agent.py +308 -0
  67. emdash_core/api/agents.py +154 -0
  68. emdash_core/api/analyze.py +264 -0
  69. emdash_core/api/auth.py +173 -0
  70. emdash_core/api/context.py +77 -0
  71. emdash_core/api/db.py +121 -0
  72. emdash_core/api/embed.py +131 -0
  73. emdash_core/api/feature.py +143 -0
  74. emdash_core/api/health.py +93 -0
  75. emdash_core/api/index.py +162 -0
  76. emdash_core/api/plan.py +110 -0
  77. emdash_core/api/projectmd.py +210 -0
  78. emdash_core/api/query.py +320 -0
  79. emdash_core/api/research.py +122 -0
  80. emdash_core/api/review.py +161 -0
  81. emdash_core/api/router.py +76 -0
  82. emdash_core/api/rules.py +116 -0
  83. emdash_core/api/search.py +119 -0
  84. emdash_core/api/spec.py +99 -0
  85. emdash_core/api/swarm.py +223 -0
  86. emdash_core/api/tasks.py +109 -0
  87. emdash_core/api/team.py +120 -0
  88. emdash_core/auth/__init__.py +17 -0
  89. emdash_core/auth/github.py +389 -0
  90. emdash_core/config.py +74 -0
  91. emdash_core/context/__init__.py +52 -0
  92. emdash_core/context/models.py +50 -0
  93. emdash_core/context/providers/__init__.py +11 -0
  94. emdash_core/context/providers/base.py +74 -0
  95. emdash_core/context/providers/explored_areas.py +183 -0
  96. emdash_core/context/providers/touched_areas.py +360 -0
  97. emdash_core/context/registry.py +73 -0
  98. emdash_core/context/reranker.py +199 -0
  99. emdash_core/context/service.py +260 -0
  100. emdash_core/context/session.py +352 -0
  101. emdash_core/core/__init__.py +104 -0
  102. emdash_core/core/config.py +454 -0
  103. emdash_core/core/exceptions.py +55 -0
  104. emdash_core/core/models.py +265 -0
  105. emdash_core/core/review_config.py +57 -0
  106. emdash_core/db/__init__.py +67 -0
  107. emdash_core/db/auth.py +134 -0
  108. emdash_core/db/models.py +91 -0
  109. emdash_core/db/provider.py +222 -0
  110. emdash_core/db/providers/__init__.py +5 -0
  111. emdash_core/db/providers/supabase.py +452 -0
  112. emdash_core/embeddings/__init__.py +24 -0
  113. emdash_core/embeddings/indexer.py +534 -0
  114. emdash_core/embeddings/models.py +192 -0
  115. emdash_core/embeddings/providers/__init__.py +7 -0
  116. emdash_core/embeddings/providers/base.py +112 -0
  117. emdash_core/embeddings/providers/fireworks.py +141 -0
  118. emdash_core/embeddings/providers/openai.py +104 -0
  119. emdash_core/embeddings/registry.py +146 -0
  120. emdash_core/embeddings/service.py +215 -0
  121. emdash_core/graph/__init__.py +26 -0
  122. emdash_core/graph/builder.py +134 -0
  123. emdash_core/graph/connection.py +692 -0
  124. emdash_core/graph/schema.py +416 -0
  125. emdash_core/graph/writer.py +667 -0
  126. emdash_core/ingestion/__init__.py +7 -0
  127. emdash_core/ingestion/change_detector.py +150 -0
  128. emdash_core/ingestion/git/__init__.py +5 -0
  129. emdash_core/ingestion/git/commit_analyzer.py +196 -0
  130. emdash_core/ingestion/github/__init__.py +6 -0
  131. emdash_core/ingestion/github/pr_fetcher.py +296 -0
  132. emdash_core/ingestion/github/task_extractor.py +100 -0
  133. emdash_core/ingestion/orchestrator.py +540 -0
  134. emdash_core/ingestion/parsers/__init__.py +10 -0
  135. emdash_core/ingestion/parsers/base_parser.py +66 -0
  136. emdash_core/ingestion/parsers/call_graph_builder.py +121 -0
  137. emdash_core/ingestion/parsers/class_extractor.py +154 -0
  138. emdash_core/ingestion/parsers/function_extractor.py +202 -0
  139. emdash_core/ingestion/parsers/import_analyzer.py +119 -0
  140. emdash_core/ingestion/parsers/python_parser.py +123 -0
  141. emdash_core/ingestion/parsers/registry.py +72 -0
  142. emdash_core/ingestion/parsers/ts_ast_parser.js +313 -0
  143. emdash_core/ingestion/parsers/typescript_parser.py +278 -0
  144. emdash_core/ingestion/repository.py +346 -0
  145. emdash_core/models/__init__.py +38 -0
  146. emdash_core/models/agent.py +68 -0
  147. emdash_core/models/index.py +77 -0
  148. emdash_core/models/query.py +113 -0
  149. emdash_core/planning/__init__.py +7 -0
  150. emdash_core/planning/agent_api.py +413 -0
  151. emdash_core/planning/context_builder.py +265 -0
  152. emdash_core/planning/feature_context.py +232 -0
  153. emdash_core/planning/feature_expander.py +646 -0
  154. emdash_core/planning/llm_explainer.py +198 -0
  155. emdash_core/planning/similarity.py +509 -0
  156. emdash_core/planning/team_focus.py +821 -0
  157. emdash_core/server.py +153 -0
  158. emdash_core/sse/__init__.py +5 -0
  159. emdash_core/sse/stream.py +196 -0
  160. emdash_core/swarm/__init__.py +17 -0
  161. emdash_core/swarm/merge_agent.py +383 -0
  162. emdash_core/swarm/session_manager.py +274 -0
  163. emdash_core/swarm/swarm_runner.py +226 -0
  164. emdash_core/swarm/task_definition.py +137 -0
  165. emdash_core/swarm/worker_spawner.py +319 -0
  166. emdash_core/swarm/worktree_manager.py +278 -0
  167. emdash_core/templates/__init__.py +10 -0
  168. emdash_core/templates/defaults/agent-builder.md.template +82 -0
  169. emdash_core/templates/defaults/focus.md.template +115 -0
  170. emdash_core/templates/defaults/pr-review-enhanced.md.template +309 -0
  171. emdash_core/templates/defaults/pr-review.md.template +80 -0
  172. emdash_core/templates/defaults/project.md.template +85 -0
  173. emdash_core/templates/defaults/research_critic.md.template +112 -0
  174. emdash_core/templates/defaults/research_planner.md.template +85 -0
  175. emdash_core/templates/defaults/research_synthesizer.md.template +128 -0
  176. emdash_core/templates/defaults/reviewer.md.template +81 -0
  177. emdash_core/templates/defaults/spec.md.template +41 -0
  178. emdash_core/templates/defaults/tasks.md.template +78 -0
  179. emdash_core/templates/loader.py +296 -0
  180. emdash_core/utils/__init__.py +45 -0
  181. emdash_core/utils/git.py +84 -0
  182. emdash_core/utils/image.py +502 -0
  183. emdash_core/utils/logger.py +51 -0
  184. emdash_core-0.1.7.dist-info/METADATA +35 -0
  185. emdash_core-0.1.7.dist-info/RECORD +187 -0
  186. emdash_core-0.1.7.dist-info/WHEEL +4 -0
  187. emdash_core-0.1.7.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,278 @@
1
+ """TypeScript/JavaScript parser using external TypeScript compiler."""
2
+
3
+ import json
4
+ import subprocess
5
+ import hashlib
6
+ from pathlib import Path
7
+ from typing import List, Dict, Any, Optional
8
+
9
+ from .base_parser import BaseLanguageParser
10
+ from ...core.models import (
11
+ FileEntity, FileEntities, ClassEntity, FunctionEntity,
12
+ ModuleEntity, ImportStatement
13
+ )
14
+ from ...core.exceptions import ParsingError
15
+ from ...utils.logger import log
16
+
17
+
18
+ class TypeScriptParser(BaseLanguageParser):
19
+ """Parses TypeScript/JavaScript files using TypeScript compiler API."""
20
+
21
+ def __init__(self, file_path: Path, repo_root: Optional[Path] = None):
22
+ """Initialize TypeScript parser.
23
+
24
+ Args:
25
+ file_path: Path to TypeScript/JavaScript file
26
+ repo_root: Root directory of repository (for resolving qualified names)
27
+ """
28
+ super().__init__(file_path, repo_root)
29
+ self.content: Optional[str] = None
30
+
31
+ @classmethod
32
+ def get_supported_extensions(cls) -> List[str]:
33
+ """Return list of supported file extensions.
34
+
35
+ Returns:
36
+ List containing '.ts', '.tsx', '.js', '.jsx'
37
+ """
38
+ return ['.ts', '.tsx', '.js', '.jsx']
39
+
40
+ def parse(self) -> FileEntities:
41
+ """Parse TypeScript/JavaScript file and extract entities.
42
+
43
+ Returns:
44
+ FileEntities containing all extracted entities
45
+
46
+ Raises:
47
+ ParsingError: If file cannot be parsed
48
+ """
49
+ try:
50
+ # Read file content
51
+ with open(self.file_path, 'r', encoding='utf-8', errors='ignore') as f:
52
+ self.content = f.read()
53
+
54
+ # Parse using TypeScript compiler (via Node.js)
55
+ ast = self._parse_typescript_ast()
56
+
57
+ if not ast:
58
+ log.warning(f"Failed to parse TypeScript AST for {self.file_path}")
59
+ return FileEntities()
60
+
61
+ # Extract file metadata
62
+ file_entity = self._extract_file_entity()
63
+
64
+ # Extract entities from AST
65
+ classes, class_methods = self._extract_classes(ast)
66
+ functions = self._extract_functions(ast)
67
+ # Add class methods to functions list
68
+ functions.extend(class_methods)
69
+ imports, modules = self._extract_imports(ast)
70
+
71
+ return FileEntities(
72
+ file=file_entity,
73
+ classes=classes,
74
+ functions=functions,
75
+ modules=modules,
76
+ imports=imports,
77
+ )
78
+
79
+ except Exception as e:
80
+ log.warning(f"Failed to parse TypeScript file {self.file_path}: {e}")
81
+ return FileEntities()
82
+
83
+ def _parse_typescript_ast(self) -> Optional[Dict]:
84
+ """Parse TypeScript using ts-node parser script.
85
+
86
+ Returns:
87
+ Dictionary containing AST data or None if parsing fails
88
+ """
89
+ # Call Node.js script that uses TypeScript compiler API
90
+ parser_script = Path(__file__).parent / "ts_ast_parser.js"
91
+
92
+ if not parser_script.exists():
93
+ log.warning(f"TypeScript parser script not found: {parser_script}")
94
+ return None
95
+
96
+ try:
97
+ result = subprocess.run(
98
+ ['node', str(parser_script), str(self.file_path)],
99
+ capture_output=True,
100
+ text=True,
101
+ timeout=30
102
+ )
103
+
104
+ if result.returncode != 0:
105
+ log.warning(f"TypeScript parser failed for {self.file_path}: {result.stderr}")
106
+ return None
107
+
108
+ return json.loads(result.stdout)
109
+
110
+ except subprocess.TimeoutExpired:
111
+ log.warning(f"TypeScript parsing timeout for {self.file_path}")
112
+ return None
113
+ except json.JSONDecodeError as e:
114
+ log.warning(f"Failed to parse TypeScript AST JSON: {e}")
115
+ return None
116
+ except FileNotFoundError:
117
+ log.warning("Node.js not found. Please install Node.js to parse TypeScript/JavaScript files.")
118
+ return None
119
+
120
+ def _extract_file_entity(self) -> FileEntity:
121
+ """Extract file metadata.
122
+
123
+ Returns:
124
+ FileEntity with file metadata
125
+ """
126
+ content_hash = hashlib.sha256(self.content.encode()).hexdigest()
127
+ return FileEntity.from_path(self.file_path, content_hash)
128
+
129
+ def _extract_classes(self, ast: Dict) -> tuple[List[ClassEntity], List[FunctionEntity]]:
130
+ """Extract class/interface definitions from TypeScript AST.
131
+
132
+ Args:
133
+ ast: Parsed AST dictionary
134
+
135
+ Returns:
136
+ Tuple of (ClassEntity list, FunctionEntity list for methods)
137
+ """
138
+ classes = []
139
+ all_methods = []
140
+
141
+ for cls_data in ast.get('classes', []):
142
+ try:
143
+ # Build qualified name
144
+ class_qualified_name = f"{self.module_name}.{cls_data['name']}" if self.module_name else cls_data['name']
145
+
146
+ class_entity = ClassEntity(
147
+ name=cls_data['name'],
148
+ qualified_name=class_qualified_name,
149
+ file_path=str(self.file_path),
150
+ line_start=cls_data.get('line_start', 0),
151
+ line_end=cls_data.get('line_end', 0),
152
+ docstring=cls_data.get('docstring'),
153
+ is_abstract=cls_data.get('is_abstract', False),
154
+ decorators=cls_data.get('decorators', []),
155
+ base_classes=cls_data.get('base_classes', []),
156
+ attributes=cls_data.get('properties', []),
157
+ methods=cls_data.get('methods', []),
158
+ )
159
+ classes.append(class_entity)
160
+
161
+ # Extract method entities with calls
162
+ for method_data in cls_data.get('method_entities', []):
163
+ try:
164
+ method_qualified_name = f"{class_qualified_name}.{method_data['name']}"
165
+ method_entity = FunctionEntity(
166
+ name=method_data['name'],
167
+ qualified_name=method_qualified_name,
168
+ file_path=str(self.file_path),
169
+ line_start=method_data.get('line_start', 0),
170
+ line_end=method_data.get('line_end', 0),
171
+ docstring=method_data.get('docstring'),
172
+ parameters=method_data.get('parameters', []),
173
+ return_annotation=method_data.get('return_type'),
174
+ is_async=method_data.get('is_async', False),
175
+ is_method=True,
176
+ is_static=method_data.get('is_static', False),
177
+ is_classmethod=False,
178
+ decorators=method_data.get('decorators', []),
179
+ cyclomatic_complexity=0,
180
+ calls=method_data.get('calls', []),
181
+ )
182
+ all_methods.append(method_entity)
183
+ except Exception as e:
184
+ log.warning(f"Failed to extract method {method_data.get('name', 'unknown')}: {e}")
185
+
186
+ except Exception as e:
187
+ log.warning(f"Failed to extract class {cls_data.get('name', 'unknown')}: {e}")
188
+
189
+ return classes, all_methods
190
+
191
+ def _extract_functions(self, ast: Dict) -> List[FunctionEntity]:
192
+ """Extract function/method definitions from TypeScript AST.
193
+
194
+ Args:
195
+ ast: Parsed AST dictionary
196
+
197
+ Returns:
198
+ List of FunctionEntity objects
199
+ """
200
+ functions = []
201
+
202
+ for func_data in ast.get('functions', []):
203
+ try:
204
+ # Build qualified name
205
+ qualified_name = f"{self.module_name}.{func_data['name']}" if self.module_name else func_data['name']
206
+
207
+ function_entity = FunctionEntity(
208
+ name=func_data['name'],
209
+ qualified_name=qualified_name,
210
+ file_path=str(self.file_path),
211
+ line_start=func_data.get('line_start', 0),
212
+ line_end=func_data.get('line_end', 0),
213
+ docstring=func_data.get('docstring'),
214
+ parameters=func_data.get('parameters', []),
215
+ return_annotation=func_data.get('return_type'),
216
+ is_async=func_data.get('is_async', False),
217
+ is_method=func_data.get('is_method', False),
218
+ is_static=func_data.get('is_static', False),
219
+ is_classmethod=False, # TypeScript doesn't have classmethods like Python
220
+ decorators=func_data.get('decorators', []),
221
+ cyclomatic_complexity=0, # TODO: Calculate complexity
222
+ calls=func_data.get('calls', []),
223
+ )
224
+ functions.append(function_entity)
225
+ except Exception as e:
226
+ log.warning(f"Failed to extract function {func_data.get('name', 'unknown')}: {e}")
227
+
228
+ return functions
229
+
230
+ def _extract_imports(self, ast: Dict) -> tuple[List[ImportStatement], List[ModuleEntity]]:
231
+ """Extract import statements from TypeScript AST.
232
+
233
+ Args:
234
+ ast: Parsed AST dictionary
235
+
236
+ Returns:
237
+ Tuple of (import_statements, module_entities)
238
+ """
239
+ imports = []
240
+ modules = {}
241
+
242
+ for import_data in ast.get('imports', []):
243
+ try:
244
+ module_name = import_data['module']
245
+ line_number = import_data.get('line_number', 0)
246
+
247
+ # Determine if module is external (from node_modules)
248
+ is_external = not module_name.startswith('.') and not module_name.startswith('/')
249
+
250
+ # Create import statement
251
+ import_stmt = ImportStatement(
252
+ module=module_name,
253
+ file_path=str(self.file_path),
254
+ line_number=line_number,
255
+ import_type='import', # TypeScript uses 'import'
256
+ alias=import_data.get('alias'),
257
+ imported_names=import_data.get('imported_names', []),
258
+ )
259
+ imports.append(import_stmt)
260
+
261
+ # Create module entity if not already exists
262
+ if module_name not in modules:
263
+ modules[module_name] = ModuleEntity(
264
+ name=module_name,
265
+ import_path=module_name,
266
+ is_external=is_external,
267
+ package=module_name.split('/')[0] if '/' in module_name else module_name,
268
+ )
269
+
270
+ except Exception as e:
271
+ log.warning(f"Failed to extract import: {e}")
272
+
273
+ return imports, list(modules.values())
274
+
275
+
276
+ # Auto-register TypeScript parser with registry
277
+ from .registry import ParserRegistry
278
+ ParserRegistry.register(TypeScriptParser)
@@ -0,0 +1,346 @@
1
+ """Repository management - clone, fetch, and track Git repositories."""
2
+
3
+ import hashlib
4
+ import os
5
+ import shutil
6
+ from datetime import datetime
7
+ from pathlib import Path
8
+ from typing import Optional
9
+ from urllib.parse import urlparse
10
+
11
+ import git
12
+ from git import Repo, GitCommandError
13
+
14
+ from ..core.exceptions import RepositoryError
15
+ from ..core.models import RepositoryEntity
16
+ from ..utils.logger import log
17
+
18
+
19
+ class RepositoryManager:
20
+ """Manages Git repository operations (clone, fetch, status)."""
21
+
22
+ def __init__(self, cache_dir: Optional[Path] = None):
23
+ """Initialize repository manager.
24
+
25
+ Args:
26
+ cache_dir: Directory to cache cloned repositories.
27
+ Defaults to ~/.emdash/repos
28
+ """
29
+ if cache_dir is None:
30
+ cache_dir = Path.home() / ".emdash" / "repos"
31
+
32
+ self.cache_dir = cache_dir
33
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
34
+
35
+ def get_or_clone(
36
+ self,
37
+ repo_path: str,
38
+ skip_commit_count: bool = False
39
+ ) -> tuple[Repo, RepositoryEntity]:
40
+ """Get a repository from cache or clone it.
41
+
42
+ Args:
43
+ repo_path: URL or local path to repository
44
+ skip_commit_count: Whether to skip counting commits
45
+
46
+ Returns:
47
+ Tuple of (git.Repo, RepositoryEntity)
48
+
49
+ Raises:
50
+ RepositoryError: If repository cannot be accessed
51
+ """
52
+ # Check if it's a local path
53
+ if Path(repo_path).exists():
54
+ return self._open_local_repo(repo_path)
55
+
56
+ # It's a URL - clone or fetch
57
+ return self._clone_or_fetch(repo_path, skip_commit_count)
58
+
59
+ def _open_local_repo(self, path: str) -> tuple[Repo, RepositoryEntity]:
60
+ """Open a local repository.
61
+
62
+ Args:
63
+ path: Local path to repository
64
+
65
+ Returns:
66
+ Tuple of (git.Repo, RepositoryEntity)
67
+ """
68
+ log.info(f"Opening local repository: {path}")
69
+
70
+ try:
71
+ repo = Repo(path)
72
+
73
+ # Get repository info
74
+ origin_url = self._get_origin_url(repo)
75
+ repo_name = Path(path).name
76
+
77
+ entity = RepositoryEntity(
78
+ url=origin_url or f"file://{path}",
79
+ name=repo_name,
80
+ owner=None,
81
+ default_branch=repo.active_branch.name,
82
+ last_ingested=None,
83
+ ingestion_status="pending",
84
+ )
85
+
86
+ return repo, entity
87
+
88
+ except Exception as e:
89
+ raise RepositoryError(f"Failed to open local repository {path}: {e}")
90
+
91
+ def _clone_or_fetch(
92
+ self,
93
+ url: str,
94
+ skip_commit_count: bool
95
+ ) -> tuple[Repo, RepositoryEntity]:
96
+ """Clone a repository or fetch updates if already cloned.
97
+
98
+ Args:
99
+ url: Repository URL
100
+ skip_commit_count: Whether to skip counting commits
101
+
102
+ Returns:
103
+ Tuple of (git.Repo, RepositoryEntity)
104
+ """
105
+ # Generate cache path from URL
106
+ cache_path = self._get_cache_path(url)
107
+
108
+ if cache_path.exists():
109
+ log.info(f"Repository already cached at {cache_path}")
110
+ return self._fetch_updates(cache_path, url, skip_commit_count)
111
+ else:
112
+ log.info(f"Cloning repository: {url}")
113
+ return self._clone_repo(url, cache_path, skip_commit_count)
114
+
115
+ def _clone_repo(
116
+ self,
117
+ url: str,
118
+ cache_path: Path,
119
+ skip_commit_count: bool
120
+ ) -> tuple[Repo, RepositoryEntity]:
121
+ """Clone a repository.
122
+
123
+ Args:
124
+ url: Repository URL
125
+ cache_path: Path to clone into
126
+ skip_commit_count: Whether to skip counting commits
127
+
128
+ Returns:
129
+ Tuple of (git.Repo, RepositoryEntity)
130
+ """
131
+ try:
132
+ repo = Repo.clone_from(url, cache_path, depth=None)
133
+ log.info(f"Successfully cloned {url}")
134
+
135
+ entity = self._create_repository_entity(
136
+ repo,
137
+ url,
138
+ skip_commit_count=skip_commit_count
139
+ )
140
+ return repo, entity
141
+
142
+ except GitCommandError as e:
143
+ raise RepositoryError(f"Failed to clone repository {url}: {e}")
144
+ except Exception as e:
145
+ raise RepositoryError(f"Unexpected error cloning {url}: {e}")
146
+
147
+ def _fetch_updates(
148
+ self,
149
+ cache_path: Path,
150
+ url: str,
151
+ skip_commit_count: bool
152
+ ) -> tuple[Repo, RepositoryEntity]:
153
+ """Fetch updates for an existing repository.
154
+
155
+ Args:
156
+ cache_path: Path to cached repository
157
+ url: Repository URL
158
+ skip_commit_count: Whether to skip counting commits
159
+
160
+ Returns:
161
+ Tuple of (git.Repo, RepositoryEntity)
162
+ """
163
+ try:
164
+ repo = Repo(cache_path)
165
+
166
+ log.info("Fetching updates from remote...")
167
+ repo.remotes.origin.fetch()
168
+
169
+ # Pull latest changes
170
+ repo.remotes.origin.pull()
171
+
172
+ log.info("Repository updated successfully")
173
+
174
+ entity = self._create_repository_entity(
175
+ repo,
176
+ url,
177
+ skip_commit_count=skip_commit_count
178
+ )
179
+ return repo, entity
180
+
181
+ except GitCommandError as e:
182
+ raise RepositoryError(f"Failed to fetch updates for {url}: {e}")
183
+ except Exception as e:
184
+ raise RepositoryError(f"Unexpected error fetching updates: {e}")
185
+
186
+ def _create_repository_entity(
187
+ self,
188
+ repo: Repo,
189
+ url: str,
190
+ skip_commit_count: bool = False
191
+ ) -> RepositoryEntity:
192
+ """Create a RepositoryEntity from a git.Repo.
193
+
194
+ Args:
195
+ repo: Git repository
196
+ url: Repository URL
197
+ skip_commit_count: Whether to skip counting commits
198
+
199
+ Returns:
200
+ RepositoryEntity
201
+ """
202
+ # Parse URL to extract owner and name
203
+ parsed = urlparse(url)
204
+ path_parts = parsed.path.strip("/").split("/")
205
+
206
+ if len(path_parts) >= 2:
207
+ owner = path_parts[-2]
208
+ repo_name = path_parts[-1].replace(".git", "")
209
+ else:
210
+ owner = None
211
+ repo_name = path_parts[-1].replace(".git", "") if path_parts else "unknown"
212
+
213
+ commit_count = 0
214
+ if not skip_commit_count:
215
+ try:
216
+ commit_count = sum(1 for _ in repo.iter_commits())
217
+ except Exception:
218
+ commit_count = 0
219
+
220
+ return RepositoryEntity(
221
+ url=url,
222
+ name=repo_name,
223
+ owner=owner,
224
+ default_branch=repo.active_branch.name if repo.active_branch else "main",
225
+ last_ingested=None,
226
+ ingestion_status="pending",
227
+ commit_count=commit_count,
228
+ )
229
+
230
+ def _get_cache_path(self, url: str) -> Path:
231
+ """Get the cache path for a repository URL.
232
+
233
+ Args:
234
+ url: Repository URL
235
+
236
+ Returns:
237
+ Path to cache directory
238
+ """
239
+ # Create a unique directory name from URL
240
+ url_hash = hashlib.md5(url.encode()).hexdigest()[:12]
241
+
242
+ # Extract repo name from URL
243
+ parsed = urlparse(url)
244
+ path_parts = parsed.path.strip("/").split("/")
245
+ repo_name = path_parts[-1].replace(".git", "")
246
+
247
+ return self.cache_dir / f"{repo_name}_{url_hash}"
248
+
249
+ def _get_origin_url(self, repo: Repo) -> Optional[str]:
250
+ """Get the origin URL of a repository.
251
+
252
+ Args:
253
+ repo: Git repository
254
+
255
+ Returns:
256
+ Origin URL or None
257
+ """
258
+ try:
259
+ if hasattr(repo.remotes, "origin"):
260
+ return repo.remotes.origin.url
261
+ except Exception:
262
+ pass
263
+ return None
264
+
265
+ def get_source_files(
266
+ self,
267
+ repo: Repo,
268
+ extensions: list[str],
269
+ ignore_patterns: list[str] = None
270
+ ) -> list[Path]:
271
+ """Get all source files matching given extensions.
272
+
273
+ Args:
274
+ repo: Git repository
275
+ extensions: List of file extensions (e.g., ['.py', '.ts', '.js'])
276
+ ignore_patterns: Patterns to ignore (e.g., "__pycache__", "venv")
277
+
278
+ Returns:
279
+ List of source file paths
280
+ """
281
+ if ignore_patterns is None:
282
+ ignore_patterns = [
283
+ "__pycache__",
284
+ "*.pyc",
285
+ "*.pyo",
286
+ ".git",
287
+ ".venv",
288
+ "venv",
289
+ "env",
290
+ "node_modules",
291
+ ".tox",
292
+ ".pytest_cache",
293
+ "*.egg-info",
294
+ "dist",
295
+ "build",
296
+ ]
297
+
298
+ repo_path = Path(repo.working_dir)
299
+ source_files = []
300
+
301
+ # Normalize extensions to lowercase
302
+ extensions = [ext.lower() for ext in extensions]
303
+
304
+ for source_file in repo_path.rglob("*"):
305
+ # Check if file (not directory)
306
+ if not source_file.is_file():
307
+ continue
308
+
309
+ # Check extension
310
+ if source_file.suffix.lower() not in extensions:
311
+ continue
312
+
313
+ # Check ignore patterns
314
+ relative_path = source_file.relative_to(repo_path)
315
+ if any(pattern in str(relative_path) for pattern in ignore_patterns):
316
+ continue
317
+
318
+ source_files.append(source_file)
319
+
320
+ log.info(f"Found {len(source_files)} source files with extensions {extensions}")
321
+ return source_files
322
+
323
+ def get_python_files(self, repo: Repo, ignore_patterns: list[str] = None) -> list[Path]:
324
+ """Get all Python files in a repository.
325
+
326
+ Args:
327
+ repo: Git repository
328
+ ignore_patterns: Patterns to ignore (e.g., "__pycache__", "venv")
329
+
330
+ Returns:
331
+ List of Python file paths
332
+
333
+ Note:
334
+ This is a convenience wrapper around get_source_files() for backward compatibility.
335
+ """
336
+ return self.get_source_files(repo, ['.py'], ignore_patterns)
337
+
338
+ def clear_cache(self):
339
+ """Clear all cached repositories."""
340
+ log.warning("Clearing repository cache...")
341
+
342
+ if self.cache_dir.exists():
343
+ shutil.rmtree(self.cache_dir)
344
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
345
+
346
+ log.info("Cache cleared successfully")
@@ -0,0 +1,38 @@
1
+ """Pydantic models for API requests and responses."""
2
+
3
+ from .agent import (
4
+ AgentChatRequest,
5
+ AgentMode,
6
+ ImageData,
7
+ )
8
+ from .query import (
9
+ EntityType,
10
+ SearchRequest,
11
+ SearchResult,
12
+ SearchResponse,
13
+ ExpandRequest,
14
+ )
15
+ from .index import (
16
+ IndexRequest,
17
+ IndexOptions,
18
+ IndexStatus,
19
+ IndexStats,
20
+ )
21
+
22
+ __all__ = [
23
+ # Agent
24
+ "AgentChatRequest",
25
+ "AgentMode",
26
+ "ImageData",
27
+ # Query
28
+ "EntityType",
29
+ "SearchRequest",
30
+ "SearchResult",
31
+ "SearchResponse",
32
+ "ExpandRequest",
33
+ # Index
34
+ "IndexRequest",
35
+ "IndexOptions",
36
+ "IndexStatus",
37
+ "IndexStats",
38
+ ]