factorforge-cds 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. factorforge/__init__.py +19 -0
  2. factorforge/__main__.py +8 -0
  3. factorforge/cli/__init__.py +5 -0
  4. factorforge/cli/legacy_cli.py +157 -0
  5. factorforge/cli/main.py +305 -0
  6. factorforge/core/interfaces/__init__.py +7 -0
  7. factorforge/core/interfaces/exporter.py +13 -0
  8. factorforge/core/interfaces/optimizer.py +85 -0
  9. factorforge/core/interfaces/validator.py +9 -0
  10. factorforge/database.py +150 -0
  11. factorforge/engines/__init__.py +60 -0
  12. factorforge/engines/ml/__init__.py +0 -0
  13. factorforge/engines/ml/plant_optimizer.py +325 -0
  14. factorforge/engines/registry.py +141 -0
  15. factorforge/engines/v1_archived/__init__.py +15 -0
  16. factorforge/engines/v2/__init__.py +13 -0
  17. factorforge/engines/v2/codon_table_builder.py +107 -0
  18. factorforge/engines/v2/construct_builder.py +403 -0
  19. factorforge/engines/v2/exporter.py +455 -0
  20. factorforge/engines/v2/optimizer.py +190 -0
  21. factorforge/engines/v2/pipeline.py +275 -0
  22. factorforge/engines/v2/rules/__init__.py +3 -0
  23. factorforge/engines/v2/rules/domesticator.py +403 -0
  24. factorforge/engines/v2/rules/reverse_translator.py +765 -0
  25. factorforge/engines/v2/rules/rule_engine.py +867 -0
  26. factorforge/engines/v2/scoring.py +232 -0
  27. factorforge/engines/v2/utils.py +231 -0
  28. factorforge/engines/v2/validator.py +383 -0
  29. factorforge/engines/v3/__init__.py +12 -0
  30. factorforge/engines/v3/explain.py +119 -0
  31. factorforge/engines/v3/inference/__init__.py +6 -0
  32. factorforge/engines/v3/inference/constrained_decoder.py +80 -0
  33. factorforge/engines/v3/inference/v2_adapter.py +72 -0
  34. factorforge/engines/v3/metrics.py +145 -0
  35. factorforge/engines/v3/modeling_bart_decoder.py +127 -0
  36. factorforge/engines/v3/pipeline.py +192 -0
  37. factorforge/engines/v3/synonym_mask.py +61 -0
  38. factorforge/engines/v3/tokenizer.py +192 -0
  39. factorforge/ml/__init__.py +33 -0
  40. factorforge/ml/feasibility.py +199 -0
  41. factorforge/ml/metrics.py +295 -0
  42. factorforge/utils/__init__.py +31 -0
  43. factorforge/utils/construct_id.py +8 -0
  44. factorforge/utils/exceptions.py +32 -0
  45. factorforge/utils/sequence_validator.py +189 -0
  46. factorforge/utils/validation.py +104 -0
  47. factorforge_cds-3.0.0.dist-info/METADATA +475 -0
  48. factorforge_cds-3.0.0.dist-info/RECORD +52 -0
  49. factorforge_cds-3.0.0.dist-info/WHEEL +5 -0
  50. factorforge_cds-3.0.0.dist-info/entry_points.txt +2 -0
  51. factorforge_cds-3.0.0.dist-info/licenses/LICENSE +201 -0
  52. factorforge_cds-3.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,150 @@
1
+ """Database models and CRUD operations for FactorForge."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ import uuid
7
+ from typing import Dict, Optional
8
+
9
+ from sqlalchemy import ARRAY, DECIMAL, TIMESTAMP, Column, ForeignKey, String, Text, create_engine, func
10
+ from sqlalchemy.dialects.postgresql import JSONB, UUID
11
+ from sqlalchemy.orm import DeclarativeBase, sessionmaker
12
+
13
+ DATABASE_URL = os.getenv(
14
+ "DATABASE_URL",
15
+ "postgresql://plantform:plantform_dev_2026@localhost:5432/factorforge_operational",
16
+ )
17
+
18
+ engine = create_engine(DATABASE_URL, pool_pre_ping=True)
19
+ SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
20
+
21
+
22
+ class Base(DeclarativeBase):
23
+ pass
24
+
25
+
26
+ class Batch(Base):
27
+ __tablename__ = "batches"
28
+ __table_args__ = {"schema": "factorforge"}
29
+
30
+ batch_id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
31
+ study_number = Column(String(50), unique=True, nullable=False)
32
+ organism = Column(String(100), nullable=False)
33
+ target_protein = Column(String(255), nullable=False)
34
+ created_at = Column(TIMESTAMP, server_default=func.now())
35
+ updated_at = Column(TIMESTAMP, server_default=func.now(), onupdate=func.now())
36
+ status = Column(String(20), default="pending")
37
+ created_by = Column(String(100))
38
+
39
+
40
+ class Sequence(Base):
41
+ __tablename__ = "sequences"
42
+ __table_args__ = {"schema": "factorforge"}
43
+
44
+ sequence_id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
45
+ batch_id = Column(
46
+ UUID(as_uuid=True),
47
+ ForeignKey("factorforge.batches.batch_id", ondelete="CASCADE"),
48
+ nullable=False,
49
+ )
50
+ sequence_type = Column(String(20), nullable=False)
51
+ sequence_data = Column(Text, nullable=False)
52
+ gc_content = Column(DECIMAL(5, 4))
53
+ cai = Column(DECIMAL(5, 4))
54
+ tm = Column(DECIMAL(5, 2))
55
+ created_at = Column(TIMESTAMP, server_default=func.now())
56
+ metadata_ = Column("metadata", JSONB)
57
+
58
+
59
+ class OptimizationResult(Base):
60
+ __tablename__ = "optimization_results"
61
+ __table_args__ = {"schema": "factorforge"}
62
+
63
+ result_id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
64
+ batch_id = Column(UUID(as_uuid=True), ForeignKey("factorforge.batches.batch_id"))
65
+ sequence_id = Column(UUID(as_uuid=True), ForeignKey("factorforge.sequences.sequence_id"))
66
+ algorithm_version = Column(String(20), nullable=False)
67
+ execution_time_sec = Column(DECIMAL(8, 3))
68
+ avoided_motifs = Column(ARRAY(Text))
69
+ warnings = Column(ARRAY(Text))
70
+ created_at = Column(TIMESTAMP, server_default=func.now())
71
+
72
+
73
+ def save_optimization(
74
+ study_number: str,
75
+ protein_name: str,
76
+ input_sequence: str,
77
+ optimized_sequence: str,
78
+ metrics: Dict,
79
+ algorithm_version: str = "2.1.0",
80
+ ) -> str:
81
+ """Save optimization result to database."""
82
+ with SessionLocal() as session:
83
+ batch = Batch(
84
+ study_number=study_number,
85
+ organism="nicotiana_benthamiana",
86
+ target_protein=protein_name,
87
+ status="completed",
88
+ )
89
+ session.add(batch)
90
+ session.flush()
91
+
92
+ input_seq = Sequence(
93
+ batch_id=batch.batch_id,
94
+ sequence_type="input",
95
+ sequence_data=input_sequence,
96
+ )
97
+ output_seq = Sequence(
98
+ batch_id=batch.batch_id,
99
+ sequence_type="optimized",
100
+ sequence_data=optimized_sequence,
101
+ gc_content=metrics.get("gc_content"),
102
+ cai=metrics.get("cai"),
103
+ tm=metrics.get("tm"),
104
+ metadata_=metrics,
105
+ )
106
+ session.add_all([input_seq, output_seq])
107
+ session.flush()
108
+
109
+ result = OptimizationResult(
110
+ batch_id=batch.batch_id,
111
+ sequence_id=output_seq.sequence_id,
112
+ algorithm_version=algorithm_version,
113
+ execution_time_sec=metrics.get("execution_time"),
114
+ avoided_motifs=metrics.get("avoided_motifs", []),
115
+ warnings=metrics.get("warnings", []),
116
+ )
117
+ session.add(result)
118
+ session.commit()
119
+
120
+ return str(batch.batch_id)
121
+
122
+
123
+ def get_batch(study_number: str) -> Optional[Dict]:
124
+ """Retrieve batch by study number."""
125
+ with SessionLocal() as session:
126
+ batch = (
127
+ session.query(Batch)
128
+ .filter(Batch.study_number == study_number)
129
+ .first()
130
+ )
131
+ if not batch:
132
+ return None
133
+
134
+ sequences = session.query(Sequence).filter(Sequence.batch_id == batch.batch_id).all()
135
+
136
+ return {
137
+ "batch_id": str(batch.batch_id),
138
+ "study_number": batch.study_number,
139
+ "protein": batch.target_protein,
140
+ "status": batch.status,
141
+ "sequences": [
142
+ {
143
+ "type": seq.sequence_type,
144
+ "data": f"{seq.sequence_data[:50]}...",
145
+ "gc": float(seq.gc_content) if seq.gc_content is not None else None,
146
+ "cai": float(seq.cai) if seq.cai is not None else None,
147
+ }
148
+ for seq in sequences
149
+ ],
150
+ }
@@ -0,0 +1,60 @@
1
+ """Optimization Engines"""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Type
6
+
7
+ from factorforge.core.interfaces import OptimizerEngine
8
+
9
+ from .registry import EngineRegistry
10
+
11
+
12
+ def _load_v1() -> Type[OptimizerEngine]:
13
+ raise ImportError(
14
+ "FactorForge v1 is archived. Install with: pip install factorforge[v1]"
15
+ )
16
+
17
+
18
+ def _load_v3() -> Type[OptimizerEngine]:
19
+ from .v3 import V3Optimizer
20
+
21
+ return V3Optimizer # type: ignore[return-value]
22
+
23
+
24
+ def register_builtin_engines() -> None:
25
+ """Register bundled engines with lazy loaders for archived/ML engines."""
26
+ from .v2 import RuleBasedOptimizer
27
+
28
+ EngineRegistry.register(
29
+ "v2",
30
+ RuleBasedOptimizer,
31
+ metadata={
32
+ "version": "3.0.0",
33
+ "engine_type": "rule_based",
34
+ "role": "legacy_fallback",
35
+ "stable": True,
36
+ },
37
+ )
38
+ EngineRegistry.register_lazy(
39
+ "v1",
40
+ _load_v1,
41
+ metadata={
42
+ "version": "archived",
43
+ "engine_type": "rule_based",
44
+ "role": "archived",
45
+ "stable": False,
46
+ },
47
+ )
48
+ EngineRegistry.register_lazy(
49
+ "v3",
50
+ _load_v3,
51
+ metadata={
52
+ "version": "alpha",
53
+ "engine_type": "ml",
54
+ "role": "experimental",
55
+ "stable": False,
56
+ },
57
+ )
58
+
59
+
60
+ __all__ = ["EngineRegistry", "register_builtin_engines"]
File without changes
@@ -0,0 +1,325 @@
1
+ """
2
+ Machine-learning-based codon optimization for plants.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import json
8
+ import math
9
+ from pathlib import Path
10
+ from typing import Dict, List, Tuple
11
+
12
+ import pandas as pd
13
+ import torch
14
+ from transformers import BartForConditionalGeneration
15
+
16
+ CODON_TO_AA = {
17
+ "TTT": "F",
18
+ "TTC": "F",
19
+ "TTA": "L",
20
+ "TTG": "L",
21
+ "TCT": "S",
22
+ "TCC": "S",
23
+ "TCA": "S",
24
+ "TCG": "S",
25
+ "TAT": "Y",
26
+ "TAC": "Y",
27
+ "TAA": "*",
28
+ "TAG": "*",
29
+ "TGT": "C",
30
+ "TGC": "C",
31
+ "TGA": "*",
32
+ "TGG": "W",
33
+ "CTT": "L",
34
+ "CTC": "L",
35
+ "CTA": "L",
36
+ "CTG": "L",
37
+ "CCT": "P",
38
+ "CCC": "P",
39
+ "CCA": "P",
40
+ "CCG": "P",
41
+ "CAT": "H",
42
+ "CAC": "H",
43
+ "CAA": "Q",
44
+ "CAG": "Q",
45
+ "CGT": "R",
46
+ "CGC": "R",
47
+ "CGA": "R",
48
+ "CGG": "R",
49
+ "ATT": "I",
50
+ "ATC": "I",
51
+ "ATA": "I",
52
+ "ATG": "M",
53
+ "ACT": "T",
54
+ "ACC": "T",
55
+ "ACA": "T",
56
+ "ACG": "T",
57
+ "AAT": "N",
58
+ "AAC": "N",
59
+ "AAA": "K",
60
+ "AAG": "K",
61
+ "AGT": "S",
62
+ "AGC": "S",
63
+ "AGA": "R",
64
+ "AGG": "R",
65
+ "GTT": "V",
66
+ "GTC": "V",
67
+ "GTA": "V",
68
+ "GTG": "V",
69
+ "GCT": "A",
70
+ "GCC": "A",
71
+ "GCA": "A",
72
+ "GCG": "A",
73
+ "GAT": "D",
74
+ "GAC": "D",
75
+ "GAA": "E",
76
+ "GAG": "E",
77
+ "GGT": "G",
78
+ "GGC": "G",
79
+ "GGA": "G",
80
+ "GGG": "G",
81
+ }
82
+
83
+
84
+ class CodonTokenizer:
85
+ def __init__(self, token_map: Dict[str, int]):
86
+ self.token_to_id = token_map
87
+ self.id_to_token = {idx: token for token, idx in token_map.items()}
88
+ self.pad_token_id = token_map["[PAD]"]
89
+ self.unk_token_id = token_map["[UNK]"]
90
+ self.mask_token_id = token_map["[MASK]"]
91
+ self.start_token_id = token_map["[START]"]
92
+ self.end_token_id = token_map["[END]"]
93
+
94
+ @classmethod
95
+ def from_json(cls, path: Path) -> "CodonTokenizer":
96
+ with path.open("r", encoding="utf-8") as handle:
97
+ token_map = json.load(handle)
98
+ required = ["[PAD]", "[UNK]", "[MASK]", "[START]", "[END]"]
99
+ missing = [token for token in required if token not in token_map]
100
+ if missing:
101
+ raise ValueError(f"Tokenizer missing special tokens: {missing}")
102
+ return cls(token_map)
103
+
104
+ def encode_dna(self, dna_seq: str) -> List[int]:
105
+ seq = dna_seq.upper()
106
+ tokens = [self.start_token_id]
107
+ for i in range(0, len(seq), 3):
108
+ codon = seq[i : i + 3]
109
+ if len(codon) != 3:
110
+ continue
111
+ token_id = self.token_to_id.get(codon, self.unk_token_id)
112
+ tokens.append(token_id)
113
+ tokens.append(self.end_token_id)
114
+ return tokens
115
+
116
+ def decode_dna(self, ids: List[int]) -> str:
117
+ codons: List[str] = []
118
+ for idx in ids:
119
+ token = self.id_to_token.get(int(idx))
120
+ if token and len(token) == 3 and all(base in "ACGT" for base in token):
121
+ codons.append(token)
122
+ return "".join(codons)
123
+
124
+
125
+ class PlantCodonOptimizer:
126
+ """
127
+ Optimize codon usage using a trained BART model and codon frequency table.
128
+ """
129
+
130
+ def __init__(
131
+ self,
132
+ model_path: str,
133
+ codon_table_path: str,
134
+ tokenizer_path: str,
135
+ organism: str = "N.benthamiana",
136
+ ) -> None:
137
+ self.organism = organism
138
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
139
+ self.model = BartForConditionalGeneration.from_pretrained(model_path).to(self.device)
140
+ self.model.eval()
141
+
142
+ self.codon_table = self._load_codon_table(Path(codon_table_path))
143
+ self.tokenizer = CodonTokenizer.from_json(Path(tokenizer_path))
144
+ self.codon_weights = self._build_codon_weights(self.codon_table)
145
+ self.best_codon_for_aa = self._best_codon_map(self.codon_table)
146
+
147
+ def optimize(self, protein_sequence: str, beam_size: int = 5) -> str:
148
+ """
149
+ Generate an optimized DNA sequence for a protein input.
150
+ """
151
+ protein_seq = self._normalize_protein(protein_sequence)
152
+ baseline_dna = self._reverse_translate(protein_seq)
153
+ input_ids = torch.tensor(
154
+ [self.tokenizer.encode_dna(baseline_dna)],
155
+ dtype=torch.long,
156
+ device=self.device,
157
+ )
158
+ attention_mask = (input_ids != self.tokenizer.pad_token_id).long()
159
+ max_length = input_ids.shape[1]
160
+
161
+ with torch.no_grad():
162
+ outputs = self.model.generate(
163
+ input_ids=input_ids,
164
+ attention_mask=attention_mask,
165
+ num_beams=beam_size,
166
+ max_length=max_length,
167
+ min_length=max_length,
168
+ early_stopping=True,
169
+ )
170
+ return self.tokenizer.decode_dna(outputs[0].tolist())
171
+
172
+ def calculate_cai(self, dna_sequence: str) -> float:
173
+ """
174
+ Calculate Codon Adaptation Index using codon frequencies.
175
+ """
176
+ seq = dna_sequence.upper()
177
+ codon_count = len(seq) // 3
178
+ if codon_count == 0:
179
+ return 0.0
180
+ weights = []
181
+ for i in range(codon_count):
182
+ codon = seq[i * 3 : i * 3 + 3]
183
+ weight = self.codon_weights.get(codon, 0.0)
184
+ if weight <= 0:
185
+ return 0.0
186
+ weights.append(weight)
187
+ log_sum = sum(math.log(w) for w in weights)
188
+ return math.exp(log_sum / len(weights))
189
+
190
+ def compare_sequences(self, original_dna: str, optimized_dna: str) -> Dict[str, float]:
191
+ """
192
+ Compare codon-by-codon and return metrics.
193
+ """
194
+ original = original_dna.upper()
195
+ optimized = optimized_dna.upper()
196
+ total_codons = min(len(original), len(optimized)) // 3
197
+ changed = 0
198
+ for i in range(total_codons):
199
+ o_codon = original[i * 3 : i * 3 + 3]
200
+ n_codon = optimized[i * 3 : i * 3 + 3]
201
+ if o_codon != n_codon:
202
+ changed += 1
203
+
204
+ original_cai = self.calculate_cai(original)
205
+ optimized_cai = self.calculate_cai(optimized)
206
+ return {
207
+ "total_codons": total_codons,
208
+ "changed_codons": changed,
209
+ "change_rate": (changed / total_codons * 100) if total_codons else 0.0,
210
+ "original_cai": original_cai,
211
+ "optimized_cai": optimized_cai,
212
+ "cai_improvement": optimized_cai - original_cai,
213
+ "original_gc": self._gc_content(original),
214
+ "optimized_gc": self._gc_content(optimized),
215
+ }
216
+
217
+ def generate_report(
218
+ self,
219
+ protein_name: str,
220
+ protein_seq: str,
221
+ original_dna: str,
222
+ optimized_dna: str,
223
+ output_path: str,
224
+ ) -> None:
225
+ """
226
+ Generate a formatted text report for the optimization.
227
+ """
228
+ metrics = self.compare_sequences(original_dna, optimized_dna)
229
+ output = Path(output_path)
230
+ output.parent.mkdir(parents=True, exist_ok=True)
231
+ with output.open("w", encoding="utf-8") as handle:
232
+ handle.write("Codon Optimization Report\n")
233
+ handle.write("=" * 60 + "\n")
234
+ handle.write(f"Organism: {self.organism}\n")
235
+ handle.write(f"Protein: {protein_name}\n")
236
+ handle.write(f"Protein length: {len(protein_seq)} aa\n\n")
237
+ handle.write("Metrics\n")
238
+ handle.write("-" * 60 + "\n")
239
+ handle.write(f"Total codons: {metrics['total_codons']}\n")
240
+ handle.write(f"Changed codons: {metrics['changed_codons']}\n")
241
+ handle.write(f"Change rate: {metrics['change_rate']:.2f}%\n")
242
+ handle.write(f"Original CAI: {metrics['original_cai']:.4f}\n")
243
+ handle.write(f"Optimized CAI: {metrics['optimized_cai']:.4f}\n")
244
+ handle.write(f"CAI improvement: {metrics['cai_improvement']:.4f}\n")
245
+ handle.write(f"Original GC: {metrics['original_gc']:.2f}%\n")
246
+ handle.write(f"Optimized GC: {metrics['optimized_gc']:.2f}%\n\n")
247
+ handle.write("Original DNA\n")
248
+ handle.write("-" * 60 + "\n")
249
+ handle.write(original_dna + "\n\n")
250
+ handle.write("Optimized DNA\n")
251
+ handle.write("-" * 60 + "\n")
252
+ handle.write(optimized_dna + "\n")
253
+
254
+ def _load_codon_table(self, path: Path) -> Dict[str, float]:
255
+ if not path.exists():
256
+ raise FileNotFoundError(f"Codon table not found: {path}")
257
+ df = pd.read_csv(path)
258
+ columns = {col.lower(): col for col in df.columns}
259
+ if "codon" not in columns or "frequency" not in columns:
260
+ raise ValueError("Codon table must have columns: Codon, Frequency")
261
+ codon_col = columns["codon"]
262
+ freq_col = columns["frequency"]
263
+ codon_freq: Dict[str, float] = {}
264
+ for _, row in df.iterrows():
265
+ codon = str(row[codon_col]).strip().upper()
266
+ try:
267
+ freq = float(row[freq_col])
268
+ except (TypeError, ValueError):
269
+ freq = 0.0
270
+ if len(codon) == 3:
271
+ codon_freq[codon] = freq
272
+ return codon_freq
273
+
274
+ def _build_codon_weights(self, codon_freq: Dict[str, float]) -> Dict[str, float]:
275
+ by_aa: Dict[str, List[Tuple[str, float]]] = {}
276
+ for codon, freq in codon_freq.items():
277
+ aa = CODON_TO_AA.get(codon, "*")
278
+ if aa == "*":
279
+ continue
280
+ by_aa.setdefault(aa, []).append((codon, freq))
281
+
282
+ weights: Dict[str, float] = {}
283
+ for aa, codons in by_aa.items():
284
+ max_freq = max(freq for _, freq in codons) if codons else 0.0
285
+ for codon, freq in codons:
286
+ weights[codon] = freq / max_freq if max_freq > 0 else 0.0
287
+ return weights
288
+
289
+ def _best_codon_map(self, codon_freq: Dict[str, float]) -> Dict[str, str]:
290
+ best: Dict[str, Tuple[str, float]] = {}
291
+ for codon, freq in codon_freq.items():
292
+ aa = CODON_TO_AA.get(codon, "*")
293
+ if aa == "*":
294
+ continue
295
+ current = best.get(aa)
296
+ if current is None or freq > current[1]:
297
+ best[aa] = (codon, freq)
298
+ return {aa: codon for aa, (codon, _) in best.items()}
299
+
300
+ def _reverse_translate(self, protein_seq: str) -> str:
301
+ codons = []
302
+ for aa in protein_seq:
303
+ codon = self.best_codon_for_aa.get(aa)
304
+ if codon is None:
305
+ raise ValueError(f"No codon mapping for amino acid: {aa}")
306
+ codons.append(codon)
307
+ return "".join(codons)
308
+
309
+ def _normalize_protein(self, protein_sequence: str) -> str:
310
+ seq = protein_sequence.strip().replace("\n", "").replace(" ", "").upper()
311
+ valid = set("ACDEFGHIKLMNPQRSTVWY")
312
+ if not seq:
313
+ raise ValueError("Protein sequence is empty.")
314
+ invalid = {ch for ch in seq if ch not in valid}
315
+ if invalid:
316
+ raise ValueError(f"Invalid amino acids found: {''.join(sorted(invalid))}")
317
+ return seq
318
+
319
+ @staticmethod
320
+ def _gc_content(dna_sequence: str) -> float:
321
+ seq = dna_sequence.upper()
322
+ if not seq:
323
+ return 0.0
324
+ gc = seq.count("G") + seq.count("C")
325
+ return (gc / len(seq)) * 100.0
@@ -0,0 +1,141 @@
1
+ """
2
+ Engine Registry
3
+
4
+ Registry that dynamically registers and manages engines
5
+ No changes needed to existing code when adding new engines (v3, v4, etc.)
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import logging
11
+ from typing import Callable, Type
12
+
13
+ from factorforge.core.interfaces import OptimizerEngine
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class EngineRegistry:
19
+ """Optimization engine registry"""
20
+
21
+ _engines: dict[str, Type[OptimizerEngine]] = {}
22
+ _instances: dict[str, OptimizerEngine] = {}
23
+ _lazy_loaders: dict[str, Callable[[], Type[OptimizerEngine]]] = {}
24
+ _metadata: dict[str, dict[str, object]] = {}
25
+
26
+ @classmethod
27
+ def register(
28
+ cls,
29
+ name: str,
30
+ engine_class: Type[OptimizerEngine],
31
+ metadata: dict[str, object] | None = None,
32
+ ) -> None:
33
+ """
34
+ Register an engine
35
+
36
+ Args:
37
+ name: Engine identifier (e.g., "v1", "v2", "v3")
38
+ engine_class: Class implementing OptimizerEngine
39
+ metadata: Optional engine metadata.
40
+ """
41
+ cls._engines[name] = engine_class
42
+ if metadata is not None:
43
+ cls._metadata[name] = dict(metadata)
44
+ else:
45
+ cls._metadata.setdefault(name, {})
46
+ logger.debug("Registered engine: %s (%s)", name, engine_class.__name__)
47
+
48
+ @classmethod
49
+ def register_lazy(
50
+ cls,
51
+ name: str,
52
+ loader: Callable[[], Type[OptimizerEngine]],
53
+ metadata: dict[str, object] | None = None,
54
+ ) -> None:
55
+ """
56
+ Register a lazy engine loader.
57
+
58
+ Args:
59
+ name: Engine identifier (e.g., "v1", "v3")
60
+ loader: Callable that returns the engine class on demand
61
+ metadata: Optional engine metadata.
62
+ """
63
+ cls._lazy_loaders[name] = loader
64
+ if metadata is not None:
65
+ cls._metadata[name] = dict(metadata)
66
+ else:
67
+ cls._metadata.setdefault(name, {})
68
+
69
+ @classmethod
70
+ def get(cls, name: str) -> OptimizerEngine:
71
+ """
72
+ Get engine instance (singleton)
73
+
74
+ Args:
75
+ name: Engine identifier
76
+
77
+ Returns:
78
+ OptimizerEngine instance
79
+ """
80
+ if name not in cls._engines and name in cls._lazy_loaders:
81
+ engine_class = cls._lazy_loaders[name]()
82
+ cls.register(name, engine_class, metadata=cls._metadata.get(name))
83
+
84
+ if name not in cls._engines:
85
+ available = ", ".join(cls._engines.keys())
86
+ raise ValueError(f"❌ Engine '{name}' not found. Available: {available}")
87
+
88
+ # Singleton pattern
89
+ if name not in cls._instances:
90
+ cls._instances[name] = cls._engines[name]()
91
+
92
+ return cls._instances[name]
93
+
94
+ @classmethod
95
+ def list_engines(cls) -> dict[str, dict[str, str]]:
96
+ """
97
+ List available engines
98
+
99
+ Returns:
100
+ dict: {name: {version, description}}
101
+ """
102
+ result: dict[str, dict[str, str]] = {}
103
+ for name, engine_class in cls._engines.items():
104
+ instance = cls.get(name)
105
+ result[name] = {
106
+ "version": instance.version,
107
+ "name": instance.name,
108
+ }
109
+ for name in cls._lazy_loaders:
110
+ if name in result:
111
+ continue
112
+ result[name] = {
113
+ "version": "lazy",
114
+ "name": "lazy (not loaded)",
115
+ }
116
+ return result
117
+
118
+ @classmethod
119
+ def list_with_metadata(cls) -> dict[str, dict[str, object]]:
120
+ """List registered and lazy engines with metadata."""
121
+ result: dict[str, dict[str, object]] = {}
122
+ all_names = set(cls._engines) | set(cls._lazy_loaders) | set(cls._metadata)
123
+ for name in sorted(all_names):
124
+ metadata = dict(cls._metadata.get(name, {}))
125
+ if name in cls._engines:
126
+ instance = cls.get(name)
127
+ metadata.setdefault("version", instance.version)
128
+ metadata.setdefault("name", instance.name)
129
+ elif name in cls._lazy_loaders:
130
+ metadata.setdefault("version", "lazy")
131
+ metadata.setdefault("name", "lazy (not loaded)")
132
+ result[name] = metadata
133
+ return result
134
+
135
+ @classmethod
136
+ def clear(cls) -> None:
137
+ """Reset registry (for tests)"""
138
+ cls._engines.clear()
139
+ cls._instances.clear()
140
+ cls._lazy_loaders.clear()
141
+ cls._metadata.clear()
@@ -0,0 +1,15 @@
1
+ """
2
+ FactorForge v1 — Archived / Frozen
3
+
4
+ v1 (BPE Tokenizer) is preserved for research reference only.
5
+ It is not maintained and not recommended for new work.
6
+
7
+ Use v2 (rule-based, production) instead:
8
+ from factorforge.engines.v2 import RuleBasedOptimizer
9
+ """
10
+
11
+ raise ImportError(
12
+ "FactorForge v1 is archived and not available as an installed package. "
13
+ "Install with optional v1 extras to use: pip install factorforge[v1]. "
14
+ "For production use, switch to v2: from factorforge.engines.v2 import RuleBasedOptimizer"
15
+ )