luckyd-code 1.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. luckyd_code/__init__.py +54 -0
  2. luckyd_code/__main__.py +5 -0
  3. luckyd_code/_agent_loop.py +551 -0
  4. luckyd_code/_data_dir.py +73 -0
  5. luckyd_code/agent.py +38 -0
  6. luckyd_code/analytics/__init__.py +18 -0
  7. luckyd_code/analytics/reporter.py +195 -0
  8. luckyd_code/analytics/scanner.py +443 -0
  9. luckyd_code/analytics/smells.py +316 -0
  10. luckyd_code/analytics/trends.py +303 -0
  11. luckyd_code/api.py +473 -0
  12. luckyd_code/audit_daemon.py +845 -0
  13. luckyd_code/autonomous_fixer.py +473 -0
  14. luckyd_code/background.py +159 -0
  15. luckyd_code/backup.py +237 -0
  16. luckyd_code/brain/__init__.py +84 -0
  17. luckyd_code/brain/assembler.py +100 -0
  18. luckyd_code/brain/chunker.py +345 -0
  19. luckyd_code/brain/constants.py +73 -0
  20. luckyd_code/brain/embedder.py +163 -0
  21. luckyd_code/brain/graph.py +311 -0
  22. luckyd_code/brain/indexer.py +316 -0
  23. luckyd_code/brain/parser.py +140 -0
  24. luckyd_code/brain/retriever.py +234 -0
  25. luckyd_code/cli.py +894 -0
  26. luckyd_code/cli_commands/__init__.py +1 -0
  27. luckyd_code/cli_commands/audit.py +120 -0
  28. luckyd_code/cli_commands/background.py +83 -0
  29. luckyd_code/cli_commands/brain.py +87 -0
  30. luckyd_code/cli_commands/config.py +75 -0
  31. luckyd_code/cli_commands/dispatcher.py +695 -0
  32. luckyd_code/cli_commands/sessions.py +41 -0
  33. luckyd_code/cli_entry.py +147 -0
  34. luckyd_code/cli_utils.py +112 -0
  35. luckyd_code/config.py +205 -0
  36. luckyd_code/context.py +214 -0
  37. luckyd_code/cost_tracker.py +209 -0
  38. luckyd_code/error_reporter.py +508 -0
  39. luckyd_code/exceptions.py +39 -0
  40. luckyd_code/export.py +126 -0
  41. luckyd_code/feedback_analyzer.py +290 -0
  42. luckyd_code/file_watcher.py +258 -0
  43. luckyd_code/git/__init__.py +11 -0
  44. luckyd_code/git/auto_commit.py +157 -0
  45. luckyd_code/git/tools.py +85 -0
  46. luckyd_code/hooks.py +236 -0
  47. luckyd_code/indexer.py +280 -0
  48. luckyd_code/init.py +39 -0
  49. luckyd_code/keybindings.py +77 -0
  50. luckyd_code/log.py +55 -0
  51. luckyd_code/mcp/__init__.py +6 -0
  52. luckyd_code/mcp/client.py +184 -0
  53. luckyd_code/memory/__init__.py +19 -0
  54. luckyd_code/memory/manager.py +339 -0
  55. luckyd_code/metrics/__init__.py +5 -0
  56. luckyd_code/model_registry.py +131 -0
  57. luckyd_code/orchestrator.py +204 -0
  58. luckyd_code/permissions/__init__.py +1 -0
  59. luckyd_code/permissions/manager.py +103 -0
  60. luckyd_code/planner.py +361 -0
  61. luckyd_code/plugins.py +91 -0
  62. luckyd_code/py.typed +0 -0
  63. luckyd_code/retry.py +57 -0
  64. luckyd_code/router.py +417 -0
  65. luckyd_code/sandbox.py +156 -0
  66. luckyd_code/self_critique.py +2 -0
  67. luckyd_code/self_improve.py +274 -0
  68. luckyd_code/sessions.py +114 -0
  69. luckyd_code/settings.py +72 -0
  70. luckyd_code/skills/__init__.py +8 -0
  71. luckyd_code/skills/review.py +22 -0
  72. luckyd_code/skills/security.py +17 -0
  73. luckyd_code/tasks/__init__.py +1 -0
  74. luckyd_code/tasks/manager.py +102 -0
  75. luckyd_code/templates/icon-192.png +0 -0
  76. luckyd_code/templates/icon-512.png +0 -0
  77. luckyd_code/templates/index.html +1965 -0
  78. luckyd_code/templates/manifest.json +14 -0
  79. luckyd_code/templates/src/app.js +694 -0
  80. luckyd_code/templates/src/body.html +767 -0
  81. luckyd_code/templates/src/cdn.txt +2 -0
  82. luckyd_code/templates/src/style.css +474 -0
  83. luckyd_code/templates/sw.js +31 -0
  84. luckyd_code/templates/test.html +6 -0
  85. luckyd_code/themes.py +48 -0
  86. luckyd_code/tools/__init__.py +97 -0
  87. luckyd_code/tools/agent_tools.py +65 -0
  88. luckyd_code/tools/bash.py +360 -0
  89. luckyd_code/tools/brain_tools.py +137 -0
  90. luckyd_code/tools/browser.py +369 -0
  91. luckyd_code/tools/datetime_tool.py +34 -0
  92. luckyd_code/tools/dockerfile_gen.py +212 -0
  93. luckyd_code/tools/file_ops.py +381 -0
  94. luckyd_code/tools/game_gen.py +360 -0
  95. luckyd_code/tools/git_tools.py +130 -0
  96. luckyd_code/tools/git_worktree.py +63 -0
  97. luckyd_code/tools/path_validate.py +64 -0
  98. luckyd_code/tools/project_gen.py +187 -0
  99. luckyd_code/tools/readme_gen.py +227 -0
  100. luckyd_code/tools/registry.py +157 -0
  101. luckyd_code/tools/shell_detect.py +109 -0
  102. luckyd_code/tools/web.py +89 -0
  103. luckyd_code/tools/youtube.py +187 -0
  104. luckyd_code/tools_bridge.py +144 -0
  105. luckyd_code/undo.py +126 -0
  106. luckyd_code/update.py +60 -0
  107. luckyd_code/verify.py +360 -0
  108. luckyd_code/web_app.py +176 -0
  109. luckyd_code/web_routes/__init__.py +23 -0
  110. luckyd_code/web_routes/background.py +73 -0
  111. luckyd_code/web_routes/brain.py +109 -0
  112. luckyd_code/web_routes/cost.py +12 -0
  113. luckyd_code/web_routes/files.py +133 -0
  114. luckyd_code/web_routes/memories.py +94 -0
  115. luckyd_code/web_routes/misc.py +67 -0
  116. luckyd_code/web_routes/project.py +48 -0
  117. luckyd_code/web_routes/review.py +20 -0
  118. luckyd_code/web_routes/sessions.py +44 -0
  119. luckyd_code/web_routes/settings.py +43 -0
  120. luckyd_code/web_routes/static.py +70 -0
  121. luckyd_code/web_routes/update.py +19 -0
  122. luckyd_code/web_routes/ws.py +237 -0
  123. luckyd_code-1.2.2.dist-info/METADATA +297 -0
  124. luckyd_code-1.2.2.dist-info/RECORD +127 -0
  125. luckyd_code-1.2.2.dist-info/WHEEL +4 -0
  126. luckyd_code-1.2.2.dist-info/entry_points.txt +3 -0
  127. luckyd_code-1.2.2.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,345 @@
1
+ """Code chunker — splits source files into overlapping chunks for embedding."""
2
+
3
+ import ast
4
+ import os
5
+ import re
6
+ from pathlib import Path
7
+
8
+ from ..log import get_logger
9
+ from .constants import LANGUAGE_MAP, Chunk, should_skip
10
+
11
+ # Regex patterns for non-Python language structure detection
12
+ STRUCTURE_PATTERNS: dict[str, list[tuple[str, str, str]]] = {
13
+ "javascript": [
14
+ (r"(?:^|\n)\s*function\s+\*?\s*(\w+)\s*\(", "function", "function"),
15
+ (r"(?:^|\n)\s*(?:async\s+)?function\s+\*?\s*(\w+)\s*\(", "function", "function"),
16
+ (r"(?:^|\n)\s*class\s+(\w+)", "class", "class"),
17
+ (r"(?:^|\n)\s*(?:export\s+)?(?:default\s+)?(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?\(?.*\)?\s*=>", "function", "arrow_function"),
18
+ (r"(?:^|\n)\s*(?:export\s+)?(?:const|let|var)\s+(\w+)\s*=\s*function", "function", "function_expression"),
19
+ ],
20
+ "typescript": [
21
+ (r"(?:^|\n)\s*function\s+\*?\s*(\w+)\s*\(", "function", "function"),
22
+ (r"(?:^|\n)\s*(?:async\s+)?function\s+\*?\s*(\w+)\s*\(", "function", "function"),
23
+ (r"(?:^|\n)\s*class\s+(\w+)", "class", "class"),
24
+ (r"(?:^|\n)\s*interface\s+(\w+)", "class", "interface"),
25
+ (r"(?:^|\n)\s*type\s+(\w+)\s*=", "class", "type_alias"),
26
+ (r"(?:^|\n)\s*(?:export\s+)?(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?\(?.*\)?\s*=>", "function", "arrow_function"),
27
+ (r"(?:^|\n)\s*(?:export\s+)?(?:const|let|var)\s+(\w+)\s*=\s*function", "function", "function_expression"),
28
+ ],
29
+ "go": [
30
+ (r"(?:^|\n)\s*func\s+(\w+)", "function", "function"),
31
+ (r"(?:^|\n)\s*type\s+(\w+)\s+struct", "class", "struct"),
32
+ (r"(?:^|\n)\s*type\s+(\w+)\s+interface", "class", "interface"),
33
+ ],
34
+ "rust": [
35
+ (r"(?:^|\n)\s*fn\s+(\w+)", "function", "function"),
36
+ (r"(?:^|\n)\s*struct\s+(\w+)", "class", "struct"),
37
+ (r"(?:^|\n)\s*enum\s+(\w+)", "class", "enum"),
38
+ (r"(?:^|\n)\s*trait\s+(\w+)", "class", "trait"),
39
+ (r"(?:^|\n)\s*impl\s+(\w+)", "class", "impl"),
40
+ ],
41
+ }
42
+
43
+
44
+ def _chunk_python(filepath: Path, content: str) -> list[Chunk]:
45
+ chunks: list[Chunk] = []
46
+ lines = content.split("\n")
47
+ rel_path = str(filepath)
48
+
49
+ try:
50
+ tree = ast.parse(content, filename=str(filepath))
51
+ except SyntaxError:
52
+ return _chunk_by_lines(filepath, content, "python")
53
+
54
+ header_end = 1
55
+ module_doc = ast.get_docstring(tree) or ""
56
+ for node in ast.iter_child_nodes(tree):
57
+ if isinstance(node, (ast.Import, ast.ImportFrom)):
58
+ header_end = node.end_lineno or node.lineno
59
+ elif isinstance(node, ast.Expr) and module_doc:
60
+ header_end = node.end_lineno or node.lineno
61
+ else:
62
+ break
63
+
64
+ header_content = "\n".join(lines[:header_end])
65
+ if header_content.strip():
66
+ chunks.append(Chunk(
67
+ file_path=rel_path,
68
+ chunk_id=f"{rel_path}:module",
69
+ start_line=1,
70
+ end_line=header_end,
71
+ type="module",
72
+ name=Path(rel_path).name,
73
+ language="python",
74
+ content=header_content,
75
+ ))
76
+
77
+ for node in ast.iter_child_nodes(tree):
78
+ if isinstance(node, ast.ClassDef):
79
+ end = node.end_lineno or node.lineno
80
+ cls_lines = lines[node.lineno - 1:end]
81
+ if end < len(lines):
82
+ cls_lines.append(lines[end])
83
+
84
+ content = "\n".join(cls_lines)
85
+ chunks.append(Chunk(
86
+ file_path=rel_path,
87
+ chunk_id=f"{rel_path}:class:{node.name}",
88
+ start_line=node.lineno,
89
+ end_line=end,
90
+ type="class",
91
+ name=node.name,
92
+ language="python",
93
+ content=content,
94
+ ))
95
+
96
+ for child in ast.iter_child_nodes(node):
97
+ if isinstance(child, ast.FunctionDef):
98
+ m_end = child.end_lineno or child.lineno
99
+ method_lines = lines[child.lineno - 1:m_end]
100
+ if m_end < len(lines):
101
+ method_lines.append(lines[m_end])
102
+ chunks.append(Chunk(
103
+ file_path=rel_path,
104
+ chunk_id=f"{rel_path}:method:{child.name}",
105
+ start_line=child.lineno,
106
+ end_line=m_end,
107
+ type="method",
108
+ name=child.name,
109
+ language="python",
110
+ content="\n".join(method_lines),
111
+ ))
112
+
113
+ elif isinstance(node, ast.FunctionDef):
114
+ end = node.end_lineno or node.lineno
115
+ func_lines = lines[node.lineno - 1:end]
116
+ if end < len(lines):
117
+ func_lines.append(lines[end])
118
+
119
+ chunks.append(Chunk(
120
+ file_path=rel_path,
121
+ chunk_id=f"{rel_path}:function:{node.name}",
122
+ start_line=node.lineno,
123
+ end_line=end,
124
+ type="function",
125
+ name=node.name,
126
+ language="python",
127
+ content="\n".join(func_lines),
128
+ ))
129
+
130
+ return chunks
131
+
132
+
133
+ def _chunk_by_lines(filepath: Path, content: str, language: str) -> list[Chunk]:
134
+ chunks: list[Chunk] = []
135
+ lines = content.split("\n")
136
+ rel_path = str(filepath)
137
+
138
+ header_lines: list[str] = []
139
+ i = 0
140
+ while i < len(lines):
141
+ if lines[i].strip() == "" and any(l.strip() for l in lines[i + 1:i + 3]):
142
+ break
143
+ header_lines.append(lines[i])
144
+ i += 1
145
+ if header_lines:
146
+ chunks.append(Chunk(
147
+ file_path=rel_path,
148
+ chunk_id=f"{rel_path}:module",
149
+ start_line=1,
150
+ end_line=i,
151
+ type="module",
152
+ name=Path(rel_path).name,
153
+ language=language,
154
+ content="\n".join(header_lines).strip(),
155
+ ))
156
+
157
+ block_start = i + 1
158
+ for match in re.finditer(r"\n\s*\n", content):
159
+ block_end = match.start()
160
+ if block_end > block_start:
161
+ block_content = content[block_start:block_end].strip()
162
+ if block_content:
163
+ start_line = content[:block_start].count("\n") + 1
164
+ end_line = content[:block_end].count("\n") + 1
165
+ chunks.append(Chunk(
166
+ file_path=rel_path,
167
+ chunk_id=f"{rel_path}:block:{start_line}",
168
+ start_line=start_line,
169
+ end_line=end_line,
170
+ type="block",
171
+ name="",
172
+ language=language,
173
+ content=block_content,
174
+ ))
175
+ block_start = match.end()
176
+
177
+ if block_start < len(content):
178
+ last_block = content[block_start:].strip()
179
+ if last_block:
180
+ start_line = content[:block_start].count("\n") + 1
181
+ end_line = content.count("\n") + 1
182
+ chunks.append(Chunk(
183
+ file_path=rel_path,
184
+ chunk_id=f"{rel_path}:block:{start_line}",
185
+ start_line=start_line,
186
+ end_line=end_line,
187
+ type="block",
188
+ name="",
189
+ language=language,
190
+ content=last_block,
191
+ ))
192
+
193
+ return chunks
194
+
195
+
196
+ def _chunk_with_regex(filepath: Path, content: str, language: str) -> list[Chunk]:
197
+ chunks: list[Chunk] = []
198
+ lines = content.split("\n")
199
+ rel_path = str(filepath)
200
+ patterns = STRUCTURE_PATTERNS.get(language, [])
201
+
202
+ header_end = min(20, len(lines))
203
+ for pattern, _type, _subtype in patterns:
204
+ for m in re.finditer(pattern, content):
205
+ line_num = content[:m.start()].count("\n") + 1
206
+ header_end = min(header_end, line_num - 1)
207
+ break
208
+
209
+ header_content = "\n".join(lines[:header_end]).strip()
210
+ if header_content:
211
+ chunks.append(Chunk(
212
+ file_path=rel_path,
213
+ chunk_id=f"{rel_path}:module",
214
+ start_line=1,
215
+ end_line=header_end,
216
+ type="module",
217
+ name=Path(rel_path).name,
218
+ language=language,
219
+ content=header_content,
220
+ ))
221
+
222
+ finds: list[tuple[int, str, str, str]] = []
223
+ for pattern, chunk_type, subtype in patterns:
224
+ for m in re.finditer(pattern, content):
225
+ line_num = content[:m.start()].count("\n") + 1
226
+ name = m.group(1)
227
+ finds.append((line_num, name, chunk_type, subtype))
228
+
229
+ finds.sort(key=lambda x: x[0])
230
+
231
+ for idx, (start_line, name, chunk_type, subtype) in enumerate(finds):
232
+ byte_pos = 0
233
+ for _ in range(start_line - 1):
234
+ byte_pos = content.index("\n", byte_pos) + 1
235
+
236
+ end_byte = _find_block_end(content, byte_pos)
237
+ if end_byte <= byte_pos:
238
+ end_byte = len(content)
239
+
240
+ end_line = content[:end_byte].count("\n") + 1
241
+
242
+ chunk_content = content[byte_pos:end_byte].rstrip()
243
+ if not chunk_content:
244
+ continue
245
+
246
+ if idx + 1 < len(finds):
247
+ next_start = finds[idx + 1][0]
248
+ next_byte = 0
249
+ for _ in range(next_start - 1):
250
+ next_byte = content.index("\n", next_byte) + 1
251
+ overlap_end = min(end_byte + 200, next_byte)
252
+ overlap_content = content[end_byte:overlap_end]
253
+ chunk_content += overlap_content
254
+
255
+ chunks.append(Chunk(
256
+ file_path=rel_path,
257
+ chunk_id=f"{rel_path}:{chunk_type}:{name}",
258
+ start_line=start_line,
259
+ end_line=end_line,
260
+ type=chunk_type,
261
+ name=name,
262
+ language=language,
263
+ content=chunk_content,
264
+ ))
265
+
266
+ return chunks
267
+
268
+
269
+ def _find_block_end(content: str, start_byte: int) -> int:
270
+ """Find the end of a brace-delimited block starting at start_byte."""
271
+ # Find the first '{' after start_byte
272
+ brace_start = content.find("{", start_byte)
273
+ if brace_start == -1:
274
+ return len(content)
275
+
276
+ depth = 0
277
+ in_string = False
278
+ string_char = None
279
+ i = brace_start
280
+
281
+ while i < len(content):
282
+ ch = content[i]
283
+ if not in_string:
284
+ if ch == '"' or ch == "'":
285
+ in_string = True
286
+ string_char = ch
287
+ elif ch == "{":
288
+ depth += 1
289
+ elif ch == "}":
290
+ depth -= 1
291
+ if depth == 0:
292
+ return i + 1
293
+ else:
294
+ if ch == "\\":
295
+ i += 1 # skip escaped char
296
+ elif ch == string_char:
297
+ in_string = False
298
+ i += 1
299
+
300
+ return len(content)
301
+
302
+
303
+ def chunk_file(filepath: Path) -> list[Chunk]:
304
+ suffix = filepath.suffix.lower()
305
+ language = LANGUAGE_MAP.get(suffix)
306
+ if not language:
307
+ return []
308
+
309
+ try:
310
+ content = filepath.read_text(encoding="utf-8", errors="replace")
311
+ except OSError:
312
+ get_logger().warning("Could not read file %s", filepath, exc_info=True)
313
+ return []
314
+
315
+ if not content.strip():
316
+ return []
317
+
318
+ if language == "python":
319
+ return _chunk_python(filepath, content)
320
+ elif language in STRUCTURE_PATTERNS:
321
+ return _chunk_with_regex(filepath, content, language)
322
+ else:
323
+ return _chunk_by_lines(filepath, content, language)
324
+
325
+
326
+ def chunk_project(project_root: str) -> list[Chunk]:
327
+ root = Path(project_root).resolve()
328
+ all_chunks: list[Chunk] = []
329
+
330
+ for dirpath, dirnames, filenames in os.walk(root):
331
+ dirnames[:] = [d for d in dirnames if not should_skip(d)]
332
+
333
+ for fname in filenames:
334
+ suffix = Path(fname).suffix.lower()
335
+ if suffix not in LANGUAGE_MAP:
336
+ continue
337
+
338
+ fpath = Path(dirpath) / fname
339
+ try:
340
+ chunks = chunk_file(fpath)
341
+ all_chunks.extend(chunks)
342
+ except Exception:
343
+ get_logger().warning("Error chunking %s", fpath, exc_info=True)
344
+
345
+ return all_chunks
@@ -0,0 +1,73 @@
1
+ """Shared constants, types, and helpers for the brain module."""
2
+
3
+ from typing import TypedDict
4
+
5
+ from .._data_dir import data_path
6
+
7
+ # Directories to skip during project scanning
8
+ SKIP_DIRS = {
9
+ ".git", "__pycache__", "node_modules", ".venv", "venv", "env",
10
+ ".tox", ".eggs", "dist", "build", ".next", ".nuxt",
11
+ "target", "vendor", ".bundle", ".claude", ".deepseek-code", ".vscode", ".idea",
12
+ ".mypy_cache", ".pytest_cache", ".ruff_cache", ".ruff",
13
+ ".svn", ".hg", "egg-info",
14
+ }
15
+
16
+ # Supported file extensions and their languages
17
+ LANGUAGE_MAP = {
18
+ ".py": "python",
19
+ ".js": "javascript",
20
+ ".jsx": "javascript",
21
+ ".ts": "typescript",
22
+ ".tsx": "typescript",
23
+ ".go": "go",
24
+ ".rs": "rust",
25
+ ".java": "java",
26
+ ".cpp": "cpp",
27
+ ".cc": "cpp",
28
+ ".cxx": "cpp",
29
+ ".c": "c",
30
+ ".h": "c",
31
+ ".hpp": "cpp",
32
+ ".cs": "csharp",
33
+ ".rb": "ruby",
34
+ ".php": "php",
35
+ ".swift": "swift",
36
+ ".kt": "kotlin",
37
+ ".kts": "kotlin",
38
+ ".sh": "bash",
39
+ ".bash": "bash",
40
+ ".zsh": "bash",
41
+ ".lua": "lua",
42
+ ".r": "r",
43
+ ".R": "r",
44
+ ".scala": "scala",
45
+ ".ex": "elixir",
46
+ ".exs": "elixir",
47
+ ".md": "markdown",
48
+ ".toml": "toml",
49
+ ".yaml": "yaml",
50
+ ".yml": "yaml",
51
+ ".json": "json",
52
+ ".sql": "sql",
53
+ }
54
+
55
+ # Storage directory for index and cache files
56
+ BRAIN_DIR = data_path("brain")
57
+
58
+
59
+ class Chunk(TypedDict):
60
+ """A chunk of source code ready for embedding and search."""
61
+ file_path: str
62
+ chunk_id: str
63
+ start_line: int
64
+ end_line: int
65
+ type: str # module, class, function, method, block
66
+ name: str
67
+ language: str
68
+ content: str
69
+ # score is added by the retriever at search time
70
+
71
+
72
+ def should_skip(name: str) -> bool:
73
+ return name in SKIP_DIRS or name.startswith(".")
@@ -0,0 +1,163 @@
1
+ """Embedder — generates embeddings for code chunks.
2
+
3
+ Supports multiple backends:
4
+ - sentence-transformers (local, no API key)
5
+ - OpenAI text-embedding-3-small (requires API key)
6
+
7
+ All dependencies are optional — embedder returns available=False gracefully.
8
+ """
9
+
10
+ import os
11
+ from typing import Optional
12
+
13
+ from ..log import get_logger
14
+
15
+ # Module-level singleton
16
+ _embedder: Optional["Embedder"] = None
17
+
18
+
19
+ def get_embedder() -> "Embedder":
20
+ """Get or create the shared embedder singleton."""
21
+ global _embedder
22
+ if _embedder is None:
23
+ _embedder = Embedder()
24
+ _embedder.load()
25
+ return _embedder
26
+
27
+
28
+ class Embedder:
29
+ """Generates embeddings for text/code using available backends."""
30
+
31
+ def __init__(self):
32
+ self.available = False
33
+ self.dimension = 0
34
+ self.model_name = "none"
35
+ self._model = None
36
+ self._openai_client = None
37
+
38
+ def load(self, model_type: Optional[str] = None) -> bool:
39
+ """Load the embedding model.
40
+
41
+ Args:
42
+ model_type: "local" for sentence-transformers, "openai" for OpenAI API.
43
+ If None, reads from settings or tries local first.
44
+
45
+ Returns:
46
+ True if a backend was successfully loaded.
47
+ """
48
+ if model_type is None:
49
+ try:
50
+ from .. import settings as cfg
51
+ model_type = cfg.load_settings().get("embedding_model", "local")
52
+ except Exception:
53
+ model_type = "local"
54
+
55
+ if model_type == "openai":
56
+ return self._load_openai()
57
+ return self._load_local()
58
+
59
+ def _load_local(self) -> bool:
60
+ """Load sentence-transformers for local embeddings."""
61
+ try:
62
+ import sentence_transformers
63
+
64
+ self._model = sentence_transformers.SentenceTransformer(
65
+ "all-MiniLM-L6-v2"
66
+ )
67
+ self.dimension = 384
68
+ self.model_name = "sentence-transformers/all-MiniLM-L6-v2"
69
+ self.available = True
70
+ get_logger().info(
71
+ "Loaded local embedding model: %s (dim=%d)",
72
+ self.model_name, self.dimension,
73
+ )
74
+ return True
75
+ except ImportError:
76
+ get_logger().info(
77
+ "sentence-transformers not installed. "
78
+ "Install with: pip install sentence-transformers"
79
+ )
80
+ except Exception as exc:
81
+ get_logger().warning("Failed to load sentence-transformers: %s", exc)
82
+
83
+ self.available = False
84
+ return False
85
+
86
+ def _load_openai(self) -> bool:
87
+ """Load OpenAI embedding API client."""
88
+ api_key = os.environ.get("OPENAI_API_KEY")
89
+ if not api_key:
90
+ get_logger().warning(
91
+ "OPENAI_API_KEY not set. Cannot use OpenAI embeddings."
92
+ )
93
+ self.available = False
94
+ return False
95
+
96
+ try:
97
+ from openai import OpenAI
98
+
99
+ self._openai_client = OpenAI(api_key=api_key)
100
+ self.dimension = 1536
101
+ self.model_name = "text-embedding-3-small"
102
+ self.available = True
103
+ get_logger().info("Loaded OpenAI embedding model: text-embedding-3-small")
104
+ return True
105
+ except ImportError:
106
+ get_logger().warning("openai package not installed.")
107
+ except Exception as exc:
108
+ get_logger().warning("Failed to load OpenAI embeddings: %s", exc)
109
+
110
+ self.available = False
111
+ return False
112
+
113
+ def embed(self, texts: list[str]) -> Optional[list[list[float]]]:
114
+ """Embed a list of texts into vectors.
115
+
116
+ Args:
117
+ texts: List of text strings to embed.
118
+
119
+ Returns:
120
+ List of embedding vectors (list of floats), or None if unavailable.
121
+ """
122
+ if not self.available or not texts:
123
+ return None
124
+
125
+ # Filter out empty texts
126
+ valid_texts = [t for t in texts if t and t.strip()]
127
+ if not valid_texts:
128
+ return None
129
+
130
+ # Truncate very long texts (sentence-transformers has 256/512 token limit)
131
+ truncated = [t[:8192] for t in valid_texts]
132
+
133
+ try:
134
+ if self._model is not None:
135
+ # sentence-transformers
136
+ import numpy as np
137
+
138
+ embeddings = self._model.encode(truncated, show_progress_bar=False)
139
+ return embeddings.tolist() if isinstance(embeddings, np.ndarray) else embeddings # type: ignore[return-value]
140
+ elif self._openai_client is not None:
141
+ resp = self._openai_client.embeddings.create(
142
+ model="text-embedding-3-small",
143
+ input=truncated,
144
+ )
145
+ return [item.embedding for item in resp.data]
146
+ except Exception as exc:
147
+ get_logger().warning("Embedding failed: %s", exc)
148
+
149
+ return None
150
+
151
+ def embed_query(self, query: str) -> Optional[list[float]]:
152
+ """Embed a single query string.
153
+
154
+ Args:
155
+ query: The search query.
156
+
157
+ Returns:
158
+ Single embedding vector, or None if unavailable.
159
+ """
160
+ result = self.embed([query])
161
+ if result:
162
+ return result[0]
163
+ return None