claude-evolve 1.8.51 → 1.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/claude-evolve-ideate-py +15 -0
- package/bin/claude-evolve-main +12 -0
- package/bin/claude-evolve-run-py +15 -0
- package/bin/claude-evolve-worker-py +15 -0
- package/lib/__pycache__/ai_cli.cpython-314.pyc +0 -0
- package/lib/__pycache__/embedding.cpython-314.pyc +0 -0
- package/lib/__pycache__/evolution_csv.cpython-314.pyc +0 -0
- package/lib/__pycache__/evolve_ideate.cpython-314.pyc +0 -0
- package/lib/__pycache__/evolve_run.cpython-314.pyc +0 -0
- package/lib/__pycache__/evolve_worker.cpython-314.pyc +0 -0
- package/lib/ai_cli.py +196 -0
- package/lib/embedding.py +200 -0
- package/lib/evolution_csv.py +325 -0
- package/lib/evolve_ideate.py +509 -0
- package/lib/evolve_run.py +402 -0
- package/lib/evolve_worker.py +518 -0
- package/package.json +4 -1
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Python implementation of claude-evolve-ideate.
|
|
4
|
+
Test this alongside the shell version before switching.
|
|
5
|
+
"""
|
|
6
|
+
import sys
|
|
7
|
+
import os
|
|
8
|
+
|
|
9
|
+
# Add lib to path
|
|
10
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'lib'))
|
|
11
|
+
|
|
12
|
+
from evolve_ideate import main
|
|
13
|
+
|
|
14
|
+
if __name__ == '__main__':
|
|
15
|
+
sys.exit(main())
|
package/bin/claude-evolve-main
CHANGED
|
@@ -196,10 +196,18 @@ ideate)
|
|
|
196
196
|
shift
|
|
197
197
|
exec "$SCRIPT_DIR/claude-evolve-ideate" "$@"
|
|
198
198
|
;;
|
|
199
|
+
ideate-py)
|
|
200
|
+
shift
|
|
201
|
+
exec "$SCRIPT_DIR/claude-evolve-ideate-py" "$@"
|
|
202
|
+
;;
|
|
199
203
|
run)
|
|
200
204
|
shift
|
|
201
205
|
exec "$SCRIPT_DIR/claude-evolve-run" "$@"
|
|
202
206
|
;;
|
|
207
|
+
run-py)
|
|
208
|
+
shift
|
|
209
|
+
exec "$SCRIPT_DIR/claude-evolve-run-py" "$@"
|
|
210
|
+
;;
|
|
203
211
|
analyze)
|
|
204
212
|
shift
|
|
205
213
|
exec "$SCRIPT_DIR/claude-evolve-analyze" "$@"
|
|
@@ -232,6 +240,10 @@ killall)
|
|
|
232
240
|
shift
|
|
233
241
|
exec "$SCRIPT_DIR/claude-evolve-killall" "$@"
|
|
234
242
|
;;
|
|
243
|
+
worker-py)
|
|
244
|
+
shift
|
|
245
|
+
exec "$SCRIPT_DIR/claude-evolve-worker-py" "$@"
|
|
246
|
+
;;
|
|
235
247
|
*)
|
|
236
248
|
echo "Unknown command: ${1:-}"
|
|
237
249
|
echo
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Python implementation of claude-evolve-run.
|
|
4
|
+
Test this alongside the shell version before switching.
|
|
5
|
+
"""
|
|
6
|
+
import sys
|
|
7
|
+
import os
|
|
8
|
+
|
|
9
|
+
# Add lib to path
|
|
10
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'lib'))
|
|
11
|
+
|
|
12
|
+
from evolve_run import main
|
|
13
|
+
|
|
14
|
+
if __name__ == '__main__':
|
|
15
|
+
sys.exit(main())
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Python implementation of claude-evolve-worker.
|
|
4
|
+
Test this alongside the shell version before switching.
|
|
5
|
+
"""
|
|
6
|
+
import sys
|
|
7
|
+
import os
|
|
8
|
+
|
|
9
|
+
# Add lib to path
|
|
10
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'lib'))
|
|
11
|
+
|
|
12
|
+
from evolve_worker import main
|
|
13
|
+
|
|
14
|
+
if __name__ == '__main__':
|
|
15
|
+
sys.exit(main())
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/lib/ai_cli.py
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Python wrapper around ai-cli.sh for AI model invocation.
|
|
4
|
+
AIDEV-NOTE: This keeps ai-cli.sh as the source of truth for model configs and timeouts.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
import subprocess
|
|
9
|
+
import tempfile
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Optional, Tuple
|
|
12
|
+
|
|
13
|
+
# Path to ai-cli.sh relative to this file
|
|
14
|
+
SCRIPT_DIR = Path(__file__).parent
|
|
15
|
+
AI_CLI_PATH = SCRIPT_DIR / "ai-cli.sh"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class AIError(Exception):
|
|
19
|
+
"""Base exception for AI errors."""
|
|
20
|
+
pass
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class RateLimitError(AIError):
|
|
24
|
+
"""Rate limit hit - should retry later."""
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class APIExhaustedError(AIError):
|
|
29
|
+
"""API quota exhausted - stop processing."""
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class TimeoutError(AIError):
|
|
34
|
+
"""AI call timed out."""
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def get_git_protection_warning() -> str:
|
|
39
|
+
"""Get the git protection warning that must prefix all AI prompts."""
|
|
40
|
+
return '''!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
|
41
|
+
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
|
42
|
+
!!!
|
|
43
|
+
!!! ⛔ ABSOLUTE PROHIBITION - READ THIS FIRST ⛔
|
|
44
|
+
!!!
|
|
45
|
+
!!! YOU ARE STRICTLY FORBIDDEN FROM USING ANY GIT COMMANDS WHATSOEVER
|
|
46
|
+
!!!
|
|
47
|
+
!!! ❌ FORBIDDEN: git commit, git add, git reset, git checkout, git revert,
|
|
48
|
+
!!! git branch, git merge, git stash, git clean, git push, git pull
|
|
49
|
+
!!! OR ANY OTHER COMMAND STARTING WITH 'git'
|
|
50
|
+
!!!
|
|
51
|
+
!!! ⚠️ WHY: This runs in production. Git operations have caused DATA LOSS.
|
|
52
|
+
!!! Multiple times AIs have corrupted evolution runs with git commands.
|
|
53
|
+
!!! Version control is ONLY managed by the human operator.
|
|
54
|
+
!!!
|
|
55
|
+
!!! ✅ WHAT YOU CAN DO: Edit files directly using file editing tools ONLY.
|
|
56
|
+
!!! Never touch version control. Ever.
|
|
57
|
+
!!!
|
|
58
|
+
!!! 💀 IF YOU USE GIT: You will corrupt the entire evolution run and lose data.
|
|
59
|
+
!!! This is an automated system. No git operations allowed.
|
|
60
|
+
!!!
|
|
61
|
+
!!! 🚨 CONSEQUENCES: If you execute ANY git command, the human operator will be
|
|
62
|
+
!!! forced to SHUT DOWN ALL AI-BASED EVOLUTION WORK and switch
|
|
63
|
+
!!! to manual-only mode. You will cause the termination of this
|
|
64
|
+
!!! entire automated evolution system. DO NOT BE THAT AI.
|
|
65
|
+
!!!
|
|
66
|
+
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
|
67
|
+
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
|
68
|
+
'''
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def call_ai(
|
|
72
|
+
prompt: str,
|
|
73
|
+
command: str = "run",
|
|
74
|
+
working_dir: Optional[str] = None,
|
|
75
|
+
env_vars: Optional[dict] = None
|
|
76
|
+
) -> Tuple[str, str]:
|
|
77
|
+
"""
|
|
78
|
+
Call AI using the configured models via ai-cli.sh.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
prompt: The prompt to send to the AI
|
|
82
|
+
command: Either "run" or "ideate" - determines which model pool to use
|
|
83
|
+
working_dir: Directory to run the command in (for file editing)
|
|
84
|
+
env_vars: Additional environment variables to pass
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
Tuple of (output, model_name)
|
|
88
|
+
|
|
89
|
+
Raises:
|
|
90
|
+
TimeoutError: If the AI call times out
|
|
91
|
+
RateLimitError: If rate limited
|
|
92
|
+
APIExhaustedError: If API quota exhausted
|
|
93
|
+
AIError: For other AI errors
|
|
94
|
+
"""
|
|
95
|
+
# Create temp file for model name (ai-cli.sh writes to /tmp/.claude-evolve-model-$$)
|
|
96
|
+
pid = os.getpid()
|
|
97
|
+
model_file = f"/tmp/.claude-evolve-model-{pid}"
|
|
98
|
+
|
|
99
|
+
# Build the bash command that sources config and calls the AI
|
|
100
|
+
# We need to source config.sh first to get LLM_RUN/LLM_IDEATE variables
|
|
101
|
+
bash_script = f'''
|
|
102
|
+
source "{SCRIPT_DIR}/config.sh"
|
|
103
|
+
source "{AI_CLI_PATH}"
|
|
104
|
+
call_ai_random "$1" "$2"
|
|
105
|
+
'''
|
|
106
|
+
|
|
107
|
+
# Setup environment
|
|
108
|
+
env = os.environ.copy()
|
|
109
|
+
if env_vars:
|
|
110
|
+
env.update(env_vars)
|
|
111
|
+
|
|
112
|
+
try:
|
|
113
|
+
result = subprocess.run(
|
|
114
|
+
["bash", "-c", bash_script, "bash", prompt, command],
|
|
115
|
+
capture_output=True,
|
|
116
|
+
text=True,
|
|
117
|
+
cwd=working_dir,
|
|
118
|
+
env=env
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
output = result.stdout
|
|
122
|
+
stderr = result.stderr
|
|
123
|
+
exit_code = result.returncode
|
|
124
|
+
|
|
125
|
+
# Read model name from temp file
|
|
126
|
+
model_name = "unknown"
|
|
127
|
+
if os.path.exists(model_file):
|
|
128
|
+
with open(model_file) as f:
|
|
129
|
+
model_name = f.read().strip()
|
|
130
|
+
os.remove(model_file)
|
|
131
|
+
|
|
132
|
+
# Handle exit codes
|
|
133
|
+
if exit_code == 124:
|
|
134
|
+
raise TimeoutError(f"AI call timed out (model: {model_name})")
|
|
135
|
+
elif exit_code == 2:
|
|
136
|
+
raise RateLimitError(f"Rate limit hit (model: {model_name})")
|
|
137
|
+
elif exit_code == 3:
|
|
138
|
+
raise APIExhaustedError(f"API quota exhausted (model: {model_name})")
|
|
139
|
+
elif exit_code != 0:
|
|
140
|
+
raise AIError(f"AI call failed with exit code {exit_code}: {stderr}")
|
|
141
|
+
|
|
142
|
+
return output, model_name
|
|
143
|
+
|
|
144
|
+
except subprocess.SubprocessError as e:
|
|
145
|
+
raise AIError(f"Failed to call AI: {e}")
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def call_ai_for_file_edit(
|
|
149
|
+
prompt: str,
|
|
150
|
+
file_path: str,
|
|
151
|
+
command: str = "run",
|
|
152
|
+
working_dir: Optional[str] = None
|
|
153
|
+
) -> Tuple[bool, str]:
|
|
154
|
+
"""
|
|
155
|
+
Call AI to edit a specific file.
|
|
156
|
+
|
|
157
|
+
This is used when the AI needs to modify files directly (like CSV editing
|
|
158
|
+
during ideation). The file path is passed in the prompt context.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
prompt: The prompt including file editing instructions
|
|
162
|
+
file_path: Path to the file being edited (for verification)
|
|
163
|
+
command: Either "run" or "ideate"
|
|
164
|
+
working_dir: Directory to run in
|
|
165
|
+
|
|
166
|
+
Returns:
|
|
167
|
+
Tuple of (success: bool, model_name: str)
|
|
168
|
+
"""
|
|
169
|
+
# Get file mtime before
|
|
170
|
+
before_mtime = None
|
|
171
|
+
if os.path.exists(file_path):
|
|
172
|
+
before_mtime = os.path.getmtime(file_path)
|
|
173
|
+
|
|
174
|
+
try:
|
|
175
|
+
output, model_name = call_ai(prompt, command, working_dir)
|
|
176
|
+
|
|
177
|
+
# Verify file was modified
|
|
178
|
+
if os.path.exists(file_path):
|
|
179
|
+
after_mtime = os.path.getmtime(file_path)
|
|
180
|
+
if before_mtime is not None and after_mtime > before_mtime:
|
|
181
|
+
return True, model_name
|
|
182
|
+
|
|
183
|
+
# File not modified - might be an error
|
|
184
|
+
return False, model_name
|
|
185
|
+
|
|
186
|
+
except AIError:
|
|
187
|
+
raise
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
if __name__ == "__main__":
|
|
191
|
+
# Quick test
|
|
192
|
+
print("Testing AI CLI wrapper...")
|
|
193
|
+
print(f"AI CLI path: {AI_CLI_PATH}")
|
|
194
|
+
print(f"AI CLI exists: {AI_CLI_PATH.exists()}")
|
|
195
|
+
print("\nGit protection warning:")
|
|
196
|
+
print(get_git_protection_warning()[:200] + "...")
|
package/lib/embedding.py
ADDED
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Embedding helper using Ollama's nomic-embed-text model.
|
|
4
|
+
AIDEV-NOTE: Requires ollama with nomic-embed-text model pulled.
|
|
5
|
+
AIDEV-NOTE: Embeddings are cached to disk for efficiency.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import hashlib
|
|
9
|
+
import json
|
|
10
|
+
import math
|
|
11
|
+
import os
|
|
12
|
+
import urllib.request
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import List, Optional, Tuple, Dict
|
|
15
|
+
|
|
16
|
+
EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "nomic-embed-text")
|
|
17
|
+
OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434")
|
|
18
|
+
|
|
19
|
+
# Global cache - maps text hash to embedding
|
|
20
|
+
_embedding_cache: Dict[str, List[float]] = {}
|
|
21
|
+
_cache_file: Optional[Path] = None
|
|
22
|
+
_cache_dirty = False
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _text_hash(text: str) -> str:
|
|
26
|
+
"""Create a short hash of text for cache key."""
|
|
27
|
+
return hashlib.sha256(text.encode('utf-8')).hexdigest()[:16]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def set_cache_file(path: str) -> None:
|
|
31
|
+
"""Set the file path for persistent embedding cache."""
|
|
32
|
+
global _cache_file, _embedding_cache
|
|
33
|
+
_cache_file = Path(path)
|
|
34
|
+
_load_cache()
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _load_cache() -> None:
|
|
38
|
+
"""Load cache from disk."""
|
|
39
|
+
global _embedding_cache
|
|
40
|
+
if _cache_file and _cache_file.exists():
|
|
41
|
+
try:
|
|
42
|
+
with open(_cache_file, 'r') as f:
|
|
43
|
+
_embedding_cache = json.load(f)
|
|
44
|
+
print(f"[EMBED] Loaded {len(_embedding_cache)} cached embeddings", file=__import__('sys').stderr)
|
|
45
|
+
except Exception as e:
|
|
46
|
+
print(f"[EMBED] Cache load error: {e}", file=__import__('sys').stderr)
|
|
47
|
+
_embedding_cache = {}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def save_cache() -> None:
|
|
51
|
+
"""Save cache to disk."""
|
|
52
|
+
global _cache_dirty
|
|
53
|
+
if _cache_file and _cache_dirty:
|
|
54
|
+
try:
|
|
55
|
+
_cache_file.parent.mkdir(parents=True, exist_ok=True)
|
|
56
|
+
with open(_cache_file, 'w') as f:
|
|
57
|
+
json.dump(_embedding_cache, f)
|
|
58
|
+
_cache_dirty = False
|
|
59
|
+
except Exception as e:
|
|
60
|
+
print(f"[EMBED] Cache save error: {e}", file=__import__('sys').stderr)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def get_embedding(text: str, use_cache: bool = True) -> Optional[List[float]]:
|
|
64
|
+
"""Get embedding vector for text using Ollama. Uses cache if available."""
|
|
65
|
+
global _cache_dirty
|
|
66
|
+
|
|
67
|
+
# Check cache first
|
|
68
|
+
if use_cache:
|
|
69
|
+
key = _text_hash(text)
|
|
70
|
+
if key in _embedding_cache:
|
|
71
|
+
return _embedding_cache[key]
|
|
72
|
+
|
|
73
|
+
try:
|
|
74
|
+
req_data = json.dumps({"model": EMBEDDING_MODEL, "input": text}).encode('utf-8')
|
|
75
|
+
req = urllib.request.Request(
|
|
76
|
+
f"{OLLAMA_URL}/api/embed",
|
|
77
|
+
data=req_data,
|
|
78
|
+
headers={"Content-Type": "application/json"}
|
|
79
|
+
)
|
|
80
|
+
with urllib.request.urlopen(req, timeout=30) as response:
|
|
81
|
+
data = json.loads(response.read().decode('utf-8'))
|
|
82
|
+
embedding = data.get("embeddings", [[]])[0]
|
|
83
|
+
|
|
84
|
+
# Store in cache
|
|
85
|
+
if use_cache and embedding:
|
|
86
|
+
key = _text_hash(text)
|
|
87
|
+
_embedding_cache[key] = embedding
|
|
88
|
+
_cache_dirty = True
|
|
89
|
+
|
|
90
|
+
return embedding
|
|
91
|
+
except Exception as e:
|
|
92
|
+
print(f"Embedding error: {e}")
|
|
93
|
+
return None
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def get_file_embedding(file_path: str) -> Optional[List[float]]:
|
|
97
|
+
"""Get embedding for a file's contents."""
|
|
98
|
+
try:
|
|
99
|
+
with open(file_path, 'r') as f:
|
|
100
|
+
return get_embedding(f.read())
|
|
101
|
+
except Exception as e:
|
|
102
|
+
print(f"File read error: {e}")
|
|
103
|
+
return None
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def cosine_similarity(a: List[float], b: List[float]) -> float:
|
|
107
|
+
"""Compute cosine similarity between two embedding vectors."""
|
|
108
|
+
if not a or not b or len(a) != len(b):
|
|
109
|
+
return 0.0
|
|
110
|
+
|
|
111
|
+
dot = sum(x * y for x, y in zip(a, b))
|
|
112
|
+
norm_a = math.sqrt(sum(x * x for x in a))
|
|
113
|
+
norm_b = math.sqrt(sum(x * x for x in b))
|
|
114
|
+
|
|
115
|
+
if norm_a == 0 or norm_b == 0:
|
|
116
|
+
return 0.0
|
|
117
|
+
|
|
118
|
+
return dot / (norm_a * norm_b)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def is_similar(text1: str, text2: str, threshold: float = 0.9) -> bool:
|
|
122
|
+
"""Check if two texts are semantically similar."""
|
|
123
|
+
emb1 = get_embedding(text1)
|
|
124
|
+
emb2 = get_embedding(text2)
|
|
125
|
+
|
|
126
|
+
if not emb1 or not emb2:
|
|
127
|
+
return False
|
|
128
|
+
|
|
129
|
+
return cosine_similarity(emb1, emb2) >= threshold
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def find_most_similar(
|
|
133
|
+
query: str,
|
|
134
|
+
candidates: List[str],
|
|
135
|
+
top_k: int = 5
|
|
136
|
+
) -> List[Tuple[int, float, str]]:
|
|
137
|
+
"""
|
|
138
|
+
Find most similar texts from candidates.
|
|
139
|
+
Returns list of (index, similarity, text) tuples.
|
|
140
|
+
"""
|
|
141
|
+
query_emb = get_embedding(query)
|
|
142
|
+
if not query_emb:
|
|
143
|
+
return []
|
|
144
|
+
|
|
145
|
+
results = []
|
|
146
|
+
for i, candidate in enumerate(candidates):
|
|
147
|
+
cand_emb = get_embedding(candidate)
|
|
148
|
+
if cand_emb:
|
|
149
|
+
sim = cosine_similarity(query_emb, cand_emb)
|
|
150
|
+
results.append((i, sim, candidate))
|
|
151
|
+
|
|
152
|
+
results.sort(key=lambda x: x[1], reverse=True)
|
|
153
|
+
return results[:top_k]
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def check_novelty(
|
|
157
|
+
new_code: str,
|
|
158
|
+
existing_codes: List[str],
|
|
159
|
+
threshold: float = 0.95
|
|
160
|
+
) -> Tuple[bool, float]:
|
|
161
|
+
"""
|
|
162
|
+
Check if new code is novel enough compared to existing code.
|
|
163
|
+
Returns (is_novel, max_similarity).
|
|
164
|
+
Uses cache for efficiency - subsequent calls with same texts are instant.
|
|
165
|
+
"""
|
|
166
|
+
new_emb = get_embedding(new_code)
|
|
167
|
+
if not new_emb:
|
|
168
|
+
return True, 0.0 # Can't check, assume novel
|
|
169
|
+
|
|
170
|
+
max_sim = 0.0
|
|
171
|
+
for existing in existing_codes:
|
|
172
|
+
existing_emb = get_embedding(existing)
|
|
173
|
+
if existing_emb:
|
|
174
|
+
sim = cosine_similarity(new_emb, existing_emb)
|
|
175
|
+
max_sim = max(max_sim, sim)
|
|
176
|
+
|
|
177
|
+
# Save cache after checking (batched save)
|
|
178
|
+
save_cache()
|
|
179
|
+
|
|
180
|
+
return max_sim < threshold, max_sim
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
if __name__ == "__main__":
|
|
184
|
+
# Test
|
|
185
|
+
print("Testing embedding...")
|
|
186
|
+
emb = get_embedding("def hello(): print('hello world')")
|
|
187
|
+
if emb:
|
|
188
|
+
print(f"Embedding (first 5 dims): {emb[:5]}")
|
|
189
|
+
print(f"Full dimensions: {len(emb)}")
|
|
190
|
+
|
|
191
|
+
# Test similarity
|
|
192
|
+
code1 = "def add(a, b): return a + b"
|
|
193
|
+
code2 = "def sum(x, y): return x + y"
|
|
194
|
+
code3 = "def multiply(a, b): return a * b"
|
|
195
|
+
|
|
196
|
+
sim12 = cosine_similarity(get_embedding(code1), get_embedding(code2))
|
|
197
|
+
sim13 = cosine_similarity(get_embedding(code1), get_embedding(code3))
|
|
198
|
+
|
|
199
|
+
print(f"\nSimilarity (add vs sum): {sim12:.4f}")
|
|
200
|
+
print(f"Similarity (add vs multiply): {sim13:.4f}")
|