emdash-core 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- emdash_core/__init__.py +3 -0
- emdash_core/agent/__init__.py +37 -0
- emdash_core/agent/agents.py +225 -0
- emdash_core/agent/code_reviewer.py +476 -0
- emdash_core/agent/compaction.py +143 -0
- emdash_core/agent/context_manager.py +140 -0
- emdash_core/agent/events.py +338 -0
- emdash_core/agent/handlers.py +224 -0
- emdash_core/agent/inprocess_subagent.py +377 -0
- emdash_core/agent/mcp/__init__.py +50 -0
- emdash_core/agent/mcp/client.py +346 -0
- emdash_core/agent/mcp/config.py +302 -0
- emdash_core/agent/mcp/manager.py +496 -0
- emdash_core/agent/mcp/tool_factory.py +213 -0
- emdash_core/agent/prompts/__init__.py +38 -0
- emdash_core/agent/prompts/main_agent.py +104 -0
- emdash_core/agent/prompts/subagents.py +131 -0
- emdash_core/agent/prompts/workflow.py +136 -0
- emdash_core/agent/providers/__init__.py +34 -0
- emdash_core/agent/providers/base.py +143 -0
- emdash_core/agent/providers/factory.py +80 -0
- emdash_core/agent/providers/models.py +220 -0
- emdash_core/agent/providers/openai_provider.py +463 -0
- emdash_core/agent/providers/transformers_provider.py +217 -0
- emdash_core/agent/research/__init__.py +81 -0
- emdash_core/agent/research/agent.py +143 -0
- emdash_core/agent/research/controller.py +254 -0
- emdash_core/agent/research/critic.py +428 -0
- emdash_core/agent/research/macros.py +469 -0
- emdash_core/agent/research/planner.py +449 -0
- emdash_core/agent/research/researcher.py +436 -0
- emdash_core/agent/research/state.py +523 -0
- emdash_core/agent/research/synthesizer.py +594 -0
- emdash_core/agent/reviewer_profile.py +475 -0
- emdash_core/agent/rules.py +123 -0
- emdash_core/agent/runner.py +601 -0
- emdash_core/agent/session.py +262 -0
- emdash_core/agent/spec_schema.py +66 -0
- emdash_core/agent/specification.py +479 -0
- emdash_core/agent/subagent.py +397 -0
- emdash_core/agent/subagent_prompts.py +13 -0
- emdash_core/agent/toolkit.py +482 -0
- emdash_core/agent/toolkits/__init__.py +64 -0
- emdash_core/agent/toolkits/base.py +96 -0
- emdash_core/agent/toolkits/explore.py +47 -0
- emdash_core/agent/toolkits/plan.py +55 -0
- emdash_core/agent/tools/__init__.py +141 -0
- emdash_core/agent/tools/analytics.py +436 -0
- emdash_core/agent/tools/base.py +131 -0
- emdash_core/agent/tools/coding.py +484 -0
- emdash_core/agent/tools/github_mcp.py +592 -0
- emdash_core/agent/tools/history.py +13 -0
- emdash_core/agent/tools/modes.py +153 -0
- emdash_core/agent/tools/plan.py +206 -0
- emdash_core/agent/tools/plan_write.py +135 -0
- emdash_core/agent/tools/search.py +412 -0
- emdash_core/agent/tools/spec.py +341 -0
- emdash_core/agent/tools/task.py +262 -0
- emdash_core/agent/tools/task_output.py +204 -0
- emdash_core/agent/tools/tasks.py +454 -0
- emdash_core/agent/tools/traversal.py +588 -0
- emdash_core/agent/tools/web.py +179 -0
- emdash_core/analytics/__init__.py +5 -0
- emdash_core/analytics/engine.py +1286 -0
- emdash_core/api/__init__.py +5 -0
- emdash_core/api/agent.py +308 -0
- emdash_core/api/agents.py +154 -0
- emdash_core/api/analyze.py +264 -0
- emdash_core/api/auth.py +173 -0
- emdash_core/api/context.py +77 -0
- emdash_core/api/db.py +121 -0
- emdash_core/api/embed.py +131 -0
- emdash_core/api/feature.py +143 -0
- emdash_core/api/health.py +93 -0
- emdash_core/api/index.py +162 -0
- emdash_core/api/plan.py +110 -0
- emdash_core/api/projectmd.py +210 -0
- emdash_core/api/query.py +320 -0
- emdash_core/api/research.py +122 -0
- emdash_core/api/review.py +161 -0
- emdash_core/api/router.py +76 -0
- emdash_core/api/rules.py +116 -0
- emdash_core/api/search.py +119 -0
- emdash_core/api/spec.py +99 -0
- emdash_core/api/swarm.py +223 -0
- emdash_core/api/tasks.py +109 -0
- emdash_core/api/team.py +120 -0
- emdash_core/auth/__init__.py +17 -0
- emdash_core/auth/github.py +389 -0
- emdash_core/config.py +74 -0
- emdash_core/context/__init__.py +52 -0
- emdash_core/context/models.py +50 -0
- emdash_core/context/providers/__init__.py +11 -0
- emdash_core/context/providers/base.py +74 -0
- emdash_core/context/providers/explored_areas.py +183 -0
- emdash_core/context/providers/touched_areas.py +360 -0
- emdash_core/context/registry.py +73 -0
- emdash_core/context/reranker.py +199 -0
- emdash_core/context/service.py +260 -0
- emdash_core/context/session.py +352 -0
- emdash_core/core/__init__.py +104 -0
- emdash_core/core/config.py +454 -0
- emdash_core/core/exceptions.py +55 -0
- emdash_core/core/models.py +265 -0
- emdash_core/core/review_config.py +57 -0
- emdash_core/db/__init__.py +67 -0
- emdash_core/db/auth.py +134 -0
- emdash_core/db/models.py +91 -0
- emdash_core/db/provider.py +222 -0
- emdash_core/db/providers/__init__.py +5 -0
- emdash_core/db/providers/supabase.py +452 -0
- emdash_core/embeddings/__init__.py +24 -0
- emdash_core/embeddings/indexer.py +534 -0
- emdash_core/embeddings/models.py +192 -0
- emdash_core/embeddings/providers/__init__.py +7 -0
- emdash_core/embeddings/providers/base.py +112 -0
- emdash_core/embeddings/providers/fireworks.py +141 -0
- emdash_core/embeddings/providers/openai.py +104 -0
- emdash_core/embeddings/registry.py +146 -0
- emdash_core/embeddings/service.py +215 -0
- emdash_core/graph/__init__.py +26 -0
- emdash_core/graph/builder.py +134 -0
- emdash_core/graph/connection.py +692 -0
- emdash_core/graph/schema.py +416 -0
- emdash_core/graph/writer.py +667 -0
- emdash_core/ingestion/__init__.py +7 -0
- emdash_core/ingestion/change_detector.py +150 -0
- emdash_core/ingestion/git/__init__.py +5 -0
- emdash_core/ingestion/git/commit_analyzer.py +196 -0
- emdash_core/ingestion/github/__init__.py +6 -0
- emdash_core/ingestion/github/pr_fetcher.py +296 -0
- emdash_core/ingestion/github/task_extractor.py +100 -0
- emdash_core/ingestion/orchestrator.py +540 -0
- emdash_core/ingestion/parsers/__init__.py +10 -0
- emdash_core/ingestion/parsers/base_parser.py +66 -0
- emdash_core/ingestion/parsers/call_graph_builder.py +121 -0
- emdash_core/ingestion/parsers/class_extractor.py +154 -0
- emdash_core/ingestion/parsers/function_extractor.py +202 -0
- emdash_core/ingestion/parsers/import_analyzer.py +119 -0
- emdash_core/ingestion/parsers/python_parser.py +123 -0
- emdash_core/ingestion/parsers/registry.py +72 -0
- emdash_core/ingestion/parsers/ts_ast_parser.js +313 -0
- emdash_core/ingestion/parsers/typescript_parser.py +278 -0
- emdash_core/ingestion/repository.py +346 -0
- emdash_core/models/__init__.py +38 -0
- emdash_core/models/agent.py +68 -0
- emdash_core/models/index.py +77 -0
- emdash_core/models/query.py +113 -0
- emdash_core/planning/__init__.py +7 -0
- emdash_core/planning/agent_api.py +413 -0
- emdash_core/planning/context_builder.py +265 -0
- emdash_core/planning/feature_context.py +232 -0
- emdash_core/planning/feature_expander.py +646 -0
- emdash_core/planning/llm_explainer.py +198 -0
- emdash_core/planning/similarity.py +509 -0
- emdash_core/planning/team_focus.py +821 -0
- emdash_core/server.py +153 -0
- emdash_core/sse/__init__.py +5 -0
- emdash_core/sse/stream.py +196 -0
- emdash_core/swarm/__init__.py +17 -0
- emdash_core/swarm/merge_agent.py +383 -0
- emdash_core/swarm/session_manager.py +274 -0
- emdash_core/swarm/swarm_runner.py +226 -0
- emdash_core/swarm/task_definition.py +137 -0
- emdash_core/swarm/worker_spawner.py +319 -0
- emdash_core/swarm/worktree_manager.py +278 -0
- emdash_core/templates/__init__.py +10 -0
- emdash_core/templates/defaults/agent-builder.md.template +82 -0
- emdash_core/templates/defaults/focus.md.template +115 -0
- emdash_core/templates/defaults/pr-review-enhanced.md.template +309 -0
- emdash_core/templates/defaults/pr-review.md.template +80 -0
- emdash_core/templates/defaults/project.md.template +85 -0
- emdash_core/templates/defaults/research_critic.md.template +112 -0
- emdash_core/templates/defaults/research_planner.md.template +85 -0
- emdash_core/templates/defaults/research_synthesizer.md.template +128 -0
- emdash_core/templates/defaults/reviewer.md.template +81 -0
- emdash_core/templates/defaults/spec.md.template +41 -0
- emdash_core/templates/defaults/tasks.md.template +78 -0
- emdash_core/templates/loader.py +296 -0
- emdash_core/utils/__init__.py +45 -0
- emdash_core/utils/git.py +84 -0
- emdash_core/utils/image.py +502 -0
- emdash_core/utils/logger.py +51 -0
- emdash_core-0.1.7.dist-info/METADATA +35 -0
- emdash_core-0.1.7.dist-info/RECORD +187 -0
- emdash_core-0.1.7.dist-info/WHEEL +4 -0
- emdash_core-0.1.7.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
"""Embedding service with multi-provider support."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional, Union
|
|
4
|
+
|
|
5
|
+
from .models import EmbeddingModel
|
|
6
|
+
from .registry import get_provider, get_available_model, ProviderRegistry
|
|
7
|
+
from .providers.base import EmbeddingProvider
|
|
8
|
+
from ..core.config import get_config
|
|
9
|
+
from ..core.models import PullRequestEntity, FunctionEntity, ClassEntity
|
|
10
|
+
from ..utils.logger import log
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class EmbeddingService:
|
|
14
|
+
"""
|
|
15
|
+
Unified embedding service with multi-provider support.
|
|
16
|
+
|
|
17
|
+
Supports OpenAI, Fireworks AI, and other providers through a registry.
|
|
18
|
+
Falls back to the first available provider if none is specified.
|
|
19
|
+
|
|
20
|
+
Usage:
|
|
21
|
+
# Use default model (OpenAI text-embedding-3-small)
|
|
22
|
+
service = EmbeddingService()
|
|
23
|
+
|
|
24
|
+
# Use specific model
|
|
25
|
+
service = EmbeddingService(model=EmbeddingModel.FIREWORKS_NOMIC_EMBED_V1_5)
|
|
26
|
+
|
|
27
|
+
# Use model from string
|
|
28
|
+
service = EmbeddingService(model="fireworks:nomic-ai/nomic-embed-text-v1.5")
|
|
29
|
+
|
|
30
|
+
# Generate embeddings
|
|
31
|
+
embeddings = service.embed_texts(["hello", "world"])
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
model: Optional[Union[EmbeddingModel, str]] = None,
|
|
37
|
+
provider: Optional[EmbeddingProvider] = None,
|
|
38
|
+
):
|
|
39
|
+
"""
|
|
40
|
+
Initialize embedding service.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
model: Embedding model to use. Can be EmbeddingModel enum or string.
|
|
44
|
+
If None, uses first available model (OpenAI > Fireworks).
|
|
45
|
+
provider: Pre-configured provider instance. If provided, model is ignored.
|
|
46
|
+
"""
|
|
47
|
+
if provider is not None:
|
|
48
|
+
self._provider = provider
|
|
49
|
+
self._model = provider.model
|
|
50
|
+
elif model is not None:
|
|
51
|
+
# Parse model from string if needed
|
|
52
|
+
if isinstance(model, str):
|
|
53
|
+
parsed = EmbeddingModel.from_string(model)
|
|
54
|
+
if parsed is None:
|
|
55
|
+
raise ValueError(f"Unknown embedding model: {model}")
|
|
56
|
+
model = parsed
|
|
57
|
+
self._model = model
|
|
58
|
+
self._provider = None # Lazy-load
|
|
59
|
+
else:
|
|
60
|
+
# Auto-select first available model
|
|
61
|
+
self._model = get_available_model()
|
|
62
|
+
self._provider = None
|
|
63
|
+
|
|
64
|
+
@property
|
|
65
|
+
def model(self) -> Optional[EmbeddingModel]:
|
|
66
|
+
"""Get the embedding model."""
|
|
67
|
+
return self._model
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def provider(self) -> Optional[EmbeddingProvider]:
|
|
71
|
+
"""Get the provider, lazy-loading if needed."""
|
|
72
|
+
if self._provider is None and self._model is not None:
|
|
73
|
+
self._provider = get_provider(self._model)
|
|
74
|
+
return self._provider
|
|
75
|
+
|
|
76
|
+
@property
|
|
77
|
+
def is_available(self) -> bool:
|
|
78
|
+
"""Check if embedding service is available."""
|
|
79
|
+
if self._model is None:
|
|
80
|
+
return False
|
|
81
|
+
try:
|
|
82
|
+
return self.provider is not None and self.provider.is_available
|
|
83
|
+
except Exception:
|
|
84
|
+
return False
|
|
85
|
+
|
|
86
|
+
@property
|
|
87
|
+
def dimensions(self) -> int:
|
|
88
|
+
"""Get embedding dimensions for the current model."""
|
|
89
|
+
if self._model is None:
|
|
90
|
+
return 1536 # Default OpenAI dimensions
|
|
91
|
+
return self._model.dimensions
|
|
92
|
+
|
|
93
|
+
def embed_texts(self, texts: list[str]) -> list[Optional[list[float]]]:
|
|
94
|
+
"""
|
|
95
|
+
Generate embeddings for multiple texts.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
texts: List of text strings to embed
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
List of embedding vectors. None for failed embeddings.
|
|
102
|
+
"""
|
|
103
|
+
if not texts:
|
|
104
|
+
return []
|
|
105
|
+
|
|
106
|
+
if not self.is_available:
|
|
107
|
+
log.warning("No embedding provider available")
|
|
108
|
+
return [None] * len(texts)
|
|
109
|
+
|
|
110
|
+
return self.provider.embed_texts(texts)
|
|
111
|
+
|
|
112
|
+
def embed_text(self, text: str) -> Optional[list[float]]:
|
|
113
|
+
"""
|
|
114
|
+
Generate embedding for a single text.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
text: Text string to embed
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
Embedding vector or None if failed
|
|
121
|
+
"""
|
|
122
|
+
if not text:
|
|
123
|
+
return None
|
|
124
|
+
embeddings = self.embed_texts([text])
|
|
125
|
+
return embeddings[0] if embeddings else None
|
|
126
|
+
|
|
127
|
+
def embed_query(self, query: str) -> Optional[list[float]]:
|
|
128
|
+
"""
|
|
129
|
+
Generate embedding for a search query.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
query: Search query string
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
Embedding vector or None if failed
|
|
136
|
+
"""
|
|
137
|
+
if not query:
|
|
138
|
+
return None
|
|
139
|
+
|
|
140
|
+
if not self.is_available:
|
|
141
|
+
log.warning("No embedding provider available")
|
|
142
|
+
return None
|
|
143
|
+
|
|
144
|
+
return self.provider.embed_query(query)
|
|
145
|
+
|
|
146
|
+
def embed_pr(self, pr: PullRequestEntity) -> Optional[list[float]]:
|
|
147
|
+
"""
|
|
148
|
+
Generate embedding for a PR (title + description).
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
pr: PullRequestEntity
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
Embedding vector or None
|
|
155
|
+
"""
|
|
156
|
+
text = f"{pr.title}\n\n{pr.description or ''}"
|
|
157
|
+
return self.embed_text(text)
|
|
158
|
+
|
|
159
|
+
def embed_function(self, func: FunctionEntity) -> Optional[list[float]]:
|
|
160
|
+
"""
|
|
161
|
+
Generate embedding for a function (name + docstring).
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
func: FunctionEntity
|
|
165
|
+
|
|
166
|
+
Returns:
|
|
167
|
+
Embedding vector or None
|
|
168
|
+
"""
|
|
169
|
+
text = f"{func.name}\n\n{func.docstring or ''}"
|
|
170
|
+
return self.embed_text(text)
|
|
171
|
+
|
|
172
|
+
def embed_class(self, cls: ClassEntity) -> Optional[list[float]]:
|
|
173
|
+
"""
|
|
174
|
+
Generate embedding for a class (name + docstring).
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
cls: ClassEntity
|
|
178
|
+
|
|
179
|
+
Returns:
|
|
180
|
+
Embedding vector or None
|
|
181
|
+
"""
|
|
182
|
+
text = f"{cls.name}\n\n{cls.docstring or ''}"
|
|
183
|
+
return self.embed_text(text)
|
|
184
|
+
|
|
185
|
+
@staticmethod
|
|
186
|
+
def list_models() -> list[dict]:
|
|
187
|
+
"""List all available embedding models."""
|
|
188
|
+
return EmbeddingModel.list_all()
|
|
189
|
+
|
|
190
|
+
@staticmethod
|
|
191
|
+
def list_available_providers() -> list[str]:
|
|
192
|
+
"""List providers that are configured and available."""
|
|
193
|
+
return [
|
|
194
|
+
name
|
|
195
|
+
for name in ProviderRegistry.list_providers()
|
|
196
|
+
if ProviderRegistry.is_provider_available(name)
|
|
197
|
+
]
|
|
198
|
+
|
|
199
|
+
@staticmethod
|
|
200
|
+
def get_model_info(model: Union[EmbeddingModel, str]) -> Optional[dict]:
|
|
201
|
+
"""Get information about a specific model."""
|
|
202
|
+
if isinstance(model, str):
|
|
203
|
+
model = EmbeddingModel.from_string(model)
|
|
204
|
+
if model is None:
|
|
205
|
+
return None
|
|
206
|
+
|
|
207
|
+
return {
|
|
208
|
+
"name": model.name,
|
|
209
|
+
"provider": model.provider,
|
|
210
|
+
"model_id": model.model_id,
|
|
211
|
+
"dimensions": model.dimensions,
|
|
212
|
+
"max_tokens": model.spec.max_tokens,
|
|
213
|
+
"batch_size": model.spec.batch_size,
|
|
214
|
+
"description": model.spec.description,
|
|
215
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""Graph module for Kuzu database operations."""
|
|
2
|
+
|
|
3
|
+
from .connection import (
|
|
4
|
+
KuzuConnection,
|
|
5
|
+
get_connection,
|
|
6
|
+
set_connection,
|
|
7
|
+
close_connection,
|
|
8
|
+
)
|
|
9
|
+
from .schema import SchemaManager, initialize_database
|
|
10
|
+
from .builder import GraphBuilder
|
|
11
|
+
from .writer import GraphWriter
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
# Connection
|
|
15
|
+
"KuzuConnection",
|
|
16
|
+
"get_connection",
|
|
17
|
+
"set_connection",
|
|
18
|
+
"close_connection",
|
|
19
|
+
# Schema
|
|
20
|
+
"SchemaManager",
|
|
21
|
+
"initialize_database",
|
|
22
|
+
# Builder
|
|
23
|
+
"GraphBuilder",
|
|
24
|
+
# Writer
|
|
25
|
+
"GraphWriter",
|
|
26
|
+
]
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
"""Graph construction - coordinates entity writing to Kuzu."""
|
|
2
|
+
|
|
3
|
+
from ..core.models import CodebaseEntities, GitData
|
|
4
|
+
from .connection import KuzuConnection
|
|
5
|
+
from .writer import GraphWriter
|
|
6
|
+
from ..utils.logger import log
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class GraphBuilder:
|
|
10
|
+
"""Builds the Kuzu knowledge graph from extracted entities."""
|
|
11
|
+
|
|
12
|
+
def __init__(self, connection: KuzuConnection, batch_size: int = 1000):
|
|
13
|
+
"""Initialize graph builder.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
connection: Neo4j connection
|
|
17
|
+
batch_size: Batch size for write operations
|
|
18
|
+
"""
|
|
19
|
+
self.connection = connection
|
|
20
|
+
self.batch_size = batch_size
|
|
21
|
+
|
|
22
|
+
def build_code_graph(self, entities: CodebaseEntities):
|
|
23
|
+
"""Build Layer A: Code structure graph.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
entities: Codebase entities to write
|
|
27
|
+
"""
|
|
28
|
+
log.info("Building code structure graph (Layer A)...")
|
|
29
|
+
|
|
30
|
+
# Use connection directly for Kuzu (no session needed)
|
|
31
|
+
writer = GraphWriter(self.connection, self.batch_size)
|
|
32
|
+
|
|
33
|
+
# Pass 1: Create nodes
|
|
34
|
+
log.info("Pass 1: Creating nodes...")
|
|
35
|
+
writer.write_files(entities.files)
|
|
36
|
+
writer.write_modules(entities.modules)
|
|
37
|
+
writer.write_classes(entities.classes)
|
|
38
|
+
writer.write_functions(entities.functions)
|
|
39
|
+
|
|
40
|
+
# Pass 2: Create relationships
|
|
41
|
+
log.info("Pass 2: Creating relationships...")
|
|
42
|
+
writer.write_imports(entities.imports)
|
|
43
|
+
writer.write_inheritance(entities.classes)
|
|
44
|
+
writer.write_calls(entities.functions)
|
|
45
|
+
|
|
46
|
+
log.info("Code graph construction complete")
|
|
47
|
+
|
|
48
|
+
def build_git_graph(self, git_data: GitData):
|
|
49
|
+
"""Build Layer B: Git history graph.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
git_data: Git data to write
|
|
53
|
+
"""
|
|
54
|
+
log.info("Building git history graph (Layer B)...")
|
|
55
|
+
|
|
56
|
+
# Use connection directly for Kuzu (no session needed)
|
|
57
|
+
writer = GraphWriter(self.connection, self.batch_size)
|
|
58
|
+
|
|
59
|
+
# Create nodes
|
|
60
|
+
writer.write_authors(git_data.authors)
|
|
61
|
+
writer.write_commits(git_data.commits)
|
|
62
|
+
|
|
63
|
+
# Create relationships
|
|
64
|
+
writer.write_commit_authorship(git_data.commits)
|
|
65
|
+
writer.write_file_modifications(git_data.modifications)
|
|
66
|
+
|
|
67
|
+
log.info("Git graph construction complete")
|
|
68
|
+
|
|
69
|
+
def delete_files(self, file_paths: list[str]) -> int:
|
|
70
|
+
"""Remove files and their associated entities from graph.
|
|
71
|
+
|
|
72
|
+
Deletes:
|
|
73
|
+
1. Classes and Functions belonging to these files
|
|
74
|
+
2. Relationships (CONTAINS_CLASS, CONTAINS_FUNCTION, IMPORTS, etc.)
|
|
75
|
+
3. The File nodes themselves
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
file_paths: List of file paths to remove
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
Number of files deleted
|
|
82
|
+
"""
|
|
83
|
+
if not file_paths:
|
|
84
|
+
return 0
|
|
85
|
+
|
|
86
|
+
log.info(f"Deleting {len(file_paths)} files from graph...")
|
|
87
|
+
|
|
88
|
+
deleted_count = 0
|
|
89
|
+
for file_path in file_paths:
|
|
90
|
+
try:
|
|
91
|
+
# Delete classes belonging to this file
|
|
92
|
+
self.connection.execute_write(
|
|
93
|
+
"MATCH (c:Class) WHERE c.file_path = $path DETACH DELETE c",
|
|
94
|
+
{"path": file_path}
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
# Delete functions belonging to this file
|
|
98
|
+
self.connection.execute_write(
|
|
99
|
+
"MATCH (f:Function) WHERE f.file_path = $path DETACH DELETE f",
|
|
100
|
+
{"path": file_path}
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
# Delete the file node itself (and its relationships)
|
|
104
|
+
self.connection.execute_write(
|
|
105
|
+
"MATCH (f:File {path: $path}) DETACH DELETE f",
|
|
106
|
+
{"path": file_path}
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
deleted_count += 1
|
|
110
|
+
except Exception as e:
|
|
111
|
+
log.warning(f"Failed to delete file {file_path}: {e}")
|
|
112
|
+
|
|
113
|
+
log.info(f"Deleted {deleted_count} files from graph")
|
|
114
|
+
return deleted_count
|
|
115
|
+
|
|
116
|
+
def clear_repository_data(self, repo_url: str):
|
|
117
|
+
"""Clear all data for a specific repository.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
repo_url: Repository URL
|
|
121
|
+
"""
|
|
122
|
+
log.warning(f"Clearing data for repository: {repo_url}")
|
|
123
|
+
|
|
124
|
+
# Use connection directly for Kuzu (no session needed)
|
|
125
|
+
# Delete all nodes related to this repository
|
|
126
|
+
# This is a simplified version - in production you'd want more sophisticated cleanup
|
|
127
|
+
query = """
|
|
128
|
+
MATCH (r:Repository {url: $url})
|
|
129
|
+
OPTIONAL MATCH (r)-[*]-(n)
|
|
130
|
+
DETACH DELETE r, n
|
|
131
|
+
"""
|
|
132
|
+
self.connection.execute_write(query, {"url": repo_url})
|
|
133
|
+
|
|
134
|
+
log.info("Repository data cleared")
|