remdb 0.3.7__py3-none-any.whl → 0.3.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. rem/__init__.py +129 -2
  2. rem/agentic/context.py +7 -5
  3. rem/agentic/providers/phoenix.py +32 -43
  4. rem/api/README.md +23 -0
  5. rem/api/main.py +27 -2
  6. rem/api/middleware/tracking.py +172 -0
  7. rem/api/routers/auth.py +54 -0
  8. rem/api/routers/chat/completions.py +1 -1
  9. rem/cli/commands/ask.py +13 -10
  10. rem/cli/commands/configure.py +4 -3
  11. rem/cli/commands/db.py +17 -3
  12. rem/cli/commands/experiments.py +76 -72
  13. rem/cli/commands/process.py +8 -7
  14. rem/cli/commands/scaffold.py +47 -0
  15. rem/cli/main.py +2 -0
  16. rem/models/entities/user.py +10 -3
  17. rem/registry.py +367 -0
  18. rem/services/content/providers.py +92 -133
  19. rem/services/dreaming/affinity_service.py +2 -16
  20. rem/services/dreaming/moment_service.py +2 -15
  21. rem/services/embeddings/api.py +20 -13
  22. rem/services/phoenix/EXPERIMENT_DESIGN.md +3 -3
  23. rem/services/phoenix/client.py +148 -14
  24. rem/services/postgres/schema_generator.py +86 -5
  25. rem/services/rate_limit.py +113 -0
  26. rem/services/rem/README.md +14 -0
  27. rem/services/user_service.py +98 -0
  28. rem/settings.py +79 -10
  29. rem/sql/install_models.sql +13 -0
  30. rem/sql/migrations/003_seed_default_user.sql +48 -0
  31. rem/utils/constants.py +97 -0
  32. rem/utils/date_utils.py +228 -0
  33. rem/utils/embeddings.py +17 -4
  34. rem/utils/files.py +167 -0
  35. rem/utils/mime_types.py +158 -0
  36. rem/utils/schema_loader.py +63 -14
  37. rem/utils/vision.py +9 -14
  38. rem/workers/README.md +14 -14
  39. rem/workers/db_maintainer.py +74 -0
  40. {remdb-0.3.7.dist-info → remdb-0.3.14.dist-info}/METADATA +169 -121
  41. {remdb-0.3.7.dist-info → remdb-0.3.14.dist-info}/RECORD +43 -32
  42. {remdb-0.3.7.dist-info → remdb-0.3.14.dist-info}/WHEEL +0 -0
  43. {remdb-0.3.7.dist-info → remdb-0.3.14.dist-info}/entry_points.txt +0 -0
@@ -2,17 +2,27 @@
2
2
 
3
3
  import json
4
4
  import multiprocessing
5
- import os
6
5
  import random
7
6
  import subprocess
8
7
  import sys
9
- import tempfile
10
8
  from abc import ABC, abstractmethod
11
9
  from pathlib import Path
12
10
  from typing import Any, Optional
13
11
 
14
12
  from loguru import logger
15
13
 
14
+ from rem.utils.constants import (
15
+ AUDIO_CHUNK_TARGET_SECONDS,
16
+ AUDIO_CHUNK_WINDOW_SECONDS,
17
+ MIN_SILENCE_MS,
18
+ SILENCE_THRESHOLD_DB,
19
+ SUBPROCESS_TIMEOUT_SECONDS,
20
+ WAV_HEADER_MIN_BYTES,
21
+ WHISPER_COST_PER_MINUTE,
22
+ )
23
+ from rem.utils.files import temp_file_from_bytes
24
+ from rem.utils.mime_types import get_extension
25
+
16
26
 
17
27
  class ContentProvider(ABC):
18
28
  """Base class for content extraction providers."""
@@ -132,7 +142,7 @@ import sys
132
142
  from pathlib import Path
133
143
  from kreuzberg import ExtractionConfig, extract_file_sync
134
144
 
135
- # Parse document with table extraction (requires PyTorch - Python <3.13 required)
145
+ # Parse document with kreuzberg 3.x
136
146
  config = ExtractionConfig(
137
147
  extract_tables=True,
138
148
  chunk_content=False,
@@ -155,7 +165,7 @@ print(json.dumps(output))
155
165
  [sys.executable, "-c", script, str(file_path)],
156
166
  capture_output=True,
157
167
  text=True,
158
- timeout=300, # 5 minute timeout
168
+ timeout=SUBPROCESS_TIMEOUT_SECONDS,
159
169
  )
160
170
 
161
171
  if result.returncode != 0:
@@ -177,21 +187,9 @@ print(json.dumps(output))
177
187
  # Write bytes to temp file for kreuzberg
178
188
  # Detect extension from metadata
179
189
  content_type = metadata.get("content_type", "")
180
- extension_map = {
181
- "application/pdf": ".pdf",
182
- "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
183
- "application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",
184
- "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
185
- "image/png": ".png",
186
- "image/jpeg": ".jpg",
187
- }
188
- suffix = extension_map.get(content_type, ".pdf") # Default to PDF
190
+ suffix = get_extension(content_type, default=".pdf")
189
191
 
190
- with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
191
- tmp.write(content)
192
- tmp_path = Path(tmp.name)
193
-
194
- try:
192
+ with temp_file_from_bytes(content, suffix=suffix) as tmp_path:
195
193
  # Check if running in daemon process
196
194
  if self._is_daemon_process():
197
195
  logger.info("Daemon process detected - using subprocess workaround for document parsing")
@@ -205,7 +203,7 @@ print(json.dumps(output))
205
203
  }
206
204
  except Exception as e:
207
205
  logger.error(f"Subprocess parsing failed: {e}. Falling back to text-only.")
208
- # Fallback to simple text extraction
206
+ # Fallback to simple text extraction (kreuzberg 3.x API)
209
207
  from kreuzberg import ExtractionConfig, extract_file_sync
210
208
  config = ExtractionConfig(extract_tables=False)
211
209
  result = extract_file_sync(tmp_path, config=config)
@@ -215,13 +213,12 @@ print(json.dumps(output))
215
213
  "file_extension": tmp_path.suffix,
216
214
  }
217
215
  else:
218
- # Normal execution (not in daemon)
216
+ # Normal execution (not in daemon) - kreuzberg 4.x with native ONNX/Rust
219
217
  from kreuzberg import ExtractionConfig, extract_file_sync
220
- # Table extraction with gmft (requires PyTorch - Python <3.13 required)
221
218
  config = ExtractionConfig(
222
- extract_tables=True,
223
- chunk_content=False,
224
- extract_keywords=False,
219
+ enable_quality_processing=True, # Enables table extraction with native ONNX
220
+ chunk_content=False, # We handle chunking ourselves
221
+ extract_tables=False, # Disable table extraction to avoid PyTorch dependency
225
222
  )
226
223
  result = extract_file_sync(tmp_path, config=config)
227
224
  text = result.content
@@ -236,10 +233,6 @@ print(json.dumps(output))
236
233
  "metadata": extraction_metadata,
237
234
  }
238
235
 
239
- finally:
240
- # Clean up temp file
241
- tmp_path.unlink(missing_ok=True)
242
-
243
236
 
244
237
  class AudioProvider(ContentProvider):
245
238
  """
@@ -284,19 +277,20 @@ class AudioProvider(ContentProvider):
284
277
  ValueError: If OpenAI API key missing
285
278
  """
286
279
  # Handle empty or invalid content
287
- if not content or len(content) < 44: # WAV header is minimum 44 bytes
280
+ if not content or len(content) < WAV_HEADER_MIN_BYTES:
288
281
  logger.warning("Audio content too small to be valid WAV file")
289
282
  return {
290
283
  "text": "[Invalid or empty audio file]",
291
284
  "metadata": {"error": "invalid_content", "size": len(content)},
292
285
  }
293
286
 
294
- # Check for OpenAI API key
295
- api_key = os.getenv("OPENAI_API_KEY")
287
+ # Check for OpenAI API key (use settings)
288
+ from rem.settings import settings
289
+ api_key = settings.llm.openai_api_key
296
290
  if not api_key:
297
- logger.warning("No OPENAI_API_KEY found - audio transcription disabled")
291
+ logger.warning("No OpenAI API key found - audio transcription disabled")
298
292
  return {
299
- "text": "[Audio transcription requires OPENAI_API_KEY environment variable]",
293
+ "text": "[Audio transcription requires LLM__OPENAI_API_KEY to be set]",
300
294
  "metadata": {"error": "missing_api_key"},
301
295
  }
302
296
 
@@ -313,83 +307,74 @@ class AudioProvider(ContentProvider):
313
307
  # Write bytes to temp file
314
308
  # Detect extension from metadata or use .wav as fallback
315
309
  content_type = metadata.get("content_type", "audio/wav")
316
- extension_map = {
317
- "audio/wav": ".wav",
318
- "audio/mpeg": ".mp3",
319
- "audio/mp4": ".m4a",
320
- "audio/x-m4a": ".m4a",
321
- "audio/flac": ".flac",
322
- "audio/ogg": ".ogg",
323
- }
324
- extension = extension_map.get(content_type, ".wav")
310
+ extension = get_extension(content_type, default=".wav")
325
311
 
326
- with tempfile.NamedTemporaryFile(suffix=extension, delete=False) as tmp:
327
- tmp.write(content)
328
- tmp_path = Path(tmp.name)
312
+ chunker = None
313
+ chunks = None
329
314
 
330
- try:
331
- logger.info(f"Processing audio file: {tmp_path.name} ({len(content) / 1024 / 1024:.1f} MB)")
332
-
333
- # Step 1: Chunk audio by silence
334
- chunker = AudioChunker(
335
- target_chunk_seconds=60.0,
336
- chunk_window_seconds=2.0,
337
- silence_threshold_db=-40.0,
338
- min_silence_ms=500,
339
- )
340
-
341
- chunks = chunker.chunk_audio(tmp_path)
342
- logger.info(f"Created {len(chunks)} audio chunks")
343
-
344
- # Step 2: Transcribe chunks
345
- transcriber = AudioTranscriber(api_key=api_key)
346
- results = transcriber.transcribe_chunks(chunks)
347
- logger.info(f"Transcribed {len(results)} chunks")
348
-
349
- # Step 3: Combine into markdown format
350
- # Format: Each chunk becomes a section with timestamp
351
- markdown_parts = []
352
- for result in results:
353
- timestamp = f"{result.start_seconds:.1f}s - {result.end_seconds:.1f}s"
354
- markdown_parts.append(f"## [{timestamp}]\n\n{result.text}\n")
355
-
356
- markdown_text = "\n".join(markdown_parts)
357
-
358
- # Calculate metadata
359
- total_duration = sum(r.duration_seconds for r in results)
360
- estimated_cost = (total_duration / 60) * 0.006 # $0.006 per minute
361
- successful_chunks = sum(1 for r in results if r.confidence > 0)
362
-
363
- extraction_metadata = {
364
- "chunk_count": len(chunks),
365
- "transcribed_chunks": successful_chunks,
366
- "duration_seconds": total_duration,
367
- "estimated_cost": estimated_cost,
368
- "parser": "whisper_api",
369
- }
315
+ with temp_file_from_bytes(content, suffix=extension) as tmp_path:
316
+ try:
317
+ logger.info(f"Processing audio file: {tmp_path.name} ({len(content) / 1024 / 1024:.1f} MB)")
318
+
319
+ # Step 1: Chunk audio by silence
320
+ chunker = AudioChunker(
321
+ target_chunk_seconds=AUDIO_CHUNK_TARGET_SECONDS,
322
+ chunk_window_seconds=AUDIO_CHUNK_WINDOW_SECONDS,
323
+ silence_threshold_db=SILENCE_THRESHOLD_DB,
324
+ min_silence_ms=MIN_SILENCE_MS,
325
+ )
370
326
 
371
- logger.info(
372
- f"Transcription complete: {successful_chunks}/{len(chunks)} chunks, "
373
- f"${estimated_cost:.3f} cost"
374
- )
327
+ chunks = chunker.chunk_audio(tmp_path)
328
+ logger.info(f"Created {len(chunks)} audio chunks")
375
329
 
376
- return {
377
- "text": markdown_text,
378
- "metadata": extraction_metadata,
379
- }
330
+ # Step 2: Transcribe chunks
331
+ transcriber = AudioTranscriber(api_key=api_key)
332
+ results = transcriber.transcribe_chunks(chunks)
333
+ logger.info(f"Transcribed {len(results)} chunks")
380
334
 
381
- except Exception as e:
382
- logger.error(f"Audio extraction failed: {e}")
383
- raise RuntimeError(f"Audio transcription failed: {e}") from e
335
+ # Step 3: Combine into markdown format
336
+ # Format: Each chunk becomes a section with timestamp
337
+ markdown_parts = []
338
+ for result in results:
339
+ timestamp = f"{result.start_seconds:.1f}s - {result.end_seconds:.1f}s"
340
+ markdown_parts.append(f"## [{timestamp}]\n\n{result.text}\n")
341
+
342
+ markdown_text = "\n".join(markdown_parts)
343
+
344
+ # Calculate metadata
345
+ total_duration = sum(r.duration_seconds for r in results)
346
+ estimated_cost = (total_duration / 60) * WHISPER_COST_PER_MINUTE
347
+ successful_chunks = sum(1 for r in results if r.confidence > 0)
348
+
349
+ extraction_metadata = {
350
+ "chunk_count": len(chunks),
351
+ "transcribed_chunks": successful_chunks,
352
+ "duration_seconds": total_duration,
353
+ "estimated_cost": estimated_cost,
354
+ "parser": "whisper_api",
355
+ }
356
+
357
+ logger.info(
358
+ f"Transcription complete: {successful_chunks}/{len(chunks)} chunks, "
359
+ f"${estimated_cost:.3f} cost"
360
+ )
361
+
362
+ return {
363
+ "text": markdown_text,
364
+ "metadata": extraction_metadata,
365
+ }
384
366
 
385
- finally:
386
- # Clean up temp file and chunks
387
- try:
388
- tmp_path.unlink(missing_ok=True)
389
- if 'chunker' in locals() and 'chunks' in locals():
390
- chunker.cleanup_chunks(chunks)
391
367
  except Exception as e:
392
- logger.warning(f"Cleanup failed: {e}")
368
+ logger.error(f"Audio extraction failed: {e}")
369
+ raise RuntimeError(f"Audio transcription failed: {e}") from e
370
+
371
+ finally:
372
+ # Clean up audio chunks (temp file cleanup handled by context manager)
373
+ if chunker is not None and chunks is not None:
374
+ try:
375
+ chunker.cleanup_chunks(chunks)
376
+ except Exception as e:
377
+ logger.warning(f"Chunk cleanup failed: {e}")
393
378
 
394
379
 
395
380
  class SchemaProvider(ContentProvider):
@@ -667,19 +652,9 @@ class ImageProvider(ContentProvider):
667
652
 
668
653
  # Write bytes to temp file for analysis
669
654
  content_type = metadata.get("content_type", "image/png")
670
- extension_map = {
671
- "image/png": ".png",
672
- "image/jpeg": ".jpg",
673
- "image/gif": ".gif",
674
- "image/webp": ".webp",
675
- }
676
- extension = extension_map.get(content_type, ".png")
677
-
678
- with tempfile.NamedTemporaryFile(suffix=extension, delete=False) as tmp:
679
- tmp.write(content)
680
- tmp_path = Path(tmp.name)
655
+ extension = get_extension(content_type, default=".png")
681
656
 
682
- try:
657
+ with temp_file_from_bytes(content, suffix=extension) as tmp_path:
683
658
  # Analyze image
684
659
  result = analyzer.analyze_image(tmp_path)
685
660
  vision_description = result.description
@@ -687,9 +662,6 @@ class ImageProvider(ContentProvider):
687
662
  vision_model = result.model
688
663
 
689
664
  logger.info(f"Vision analysis complete: {len(vision_description)} chars")
690
- finally:
691
- # Clean up temp file
692
- tmp_path.unlink(missing_ok=True)
693
665
 
694
666
  except ImportError as e:
695
667
  logger.warning(f"Vision analysis not available: {e}")
@@ -732,19 +704,9 @@ class ImageProvider(ContentProvider):
732
704
  if embedder.is_available():
733
705
  # Write bytes to temp file for CLIP embedding
734
706
  content_type = metadata.get("content_type", "image/png")
735
- extension_map = {
736
- "image/png": ".png",
737
- "image/jpeg": ".jpg",
738
- "image/gif": ".gif",
739
- "image/webp": ".webp",
740
- }
741
- extension = extension_map.get(content_type, ".png")
707
+ extension = get_extension(content_type, default=".png")
742
708
 
743
- with tempfile.NamedTemporaryFile(suffix=extension, delete=False) as tmp:
744
- tmp.write(content)
745
- tmp_path = Path(tmp.name)
746
-
747
- try:
709
+ with temp_file_from_bytes(content, suffix=extension) as tmp_path:
748
710
  # Generate CLIP embedding
749
711
  result = embedder.embed_image(tmp_path)
750
712
  if result:
@@ -754,9 +716,6 @@ class ImageProvider(ContentProvider):
754
716
  logger.info(
755
717
  f"CLIP embedding generated: {clip_dimensions} dims, {clip_tokens} tokens"
756
718
  )
757
- finally:
758
- # Clean up temp file
759
- tmp_path.unlink(missing_ok=True)
760
719
  else:
761
720
  logger.debug(
762
721
  "CLIP embeddings disabled - set CONTENT__JINA_API_KEY to enable. "
@@ -8,12 +8,11 @@ vector similarity (fast) or LLM analysis (intelligent).
8
8
  import json
9
9
  from datetime import datetime, timedelta
10
10
  from enum import Enum
11
- from pathlib import Path
12
11
  from typing import Any, Optional
13
12
 
14
- import yaml
15
13
  from loguru import logger
16
14
 
15
+ from ...utils.schema_loader import load_agent_schema
17
16
  from ...agentic.providers.pydantic_ai import create_agent
18
17
  from ...agentic.serialization import serialize_agent_result
19
18
  from ...models.core import QueryType, RemQuery, SearchParameters
@@ -125,20 +124,7 @@ async def build_affinity(
125
124
  # Load LLM agent for relationship assessment if needed
126
125
  affinity_agent = None
127
126
  if mode == AffinityMode.LLM:
128
- schema_path = (
129
- Path(__file__).parent.parent.parent
130
- / "schemas"
131
- / "agents"
132
- / "resource-affinity-assessor.yaml"
133
- )
134
-
135
- if not schema_path.exists():
136
- raise FileNotFoundError(
137
- f"ResourceAffinityAssessor schema not found: {schema_path}"
138
- )
139
-
140
- with open(schema_path) as f:
141
- agent_schema = yaml.safe_load(f)
127
+ agent_schema = load_agent_schema("resource-affinity-assessor")
142
128
 
143
129
  affinity_agent_runtime = await create_agent(
144
130
  agent_schema_override=agent_schema,
@@ -8,13 +8,12 @@ with temporal boundaries and metadata.
8
8
 
9
9
  import json
10
10
  from datetime import datetime, timedelta
11
- from pathlib import Path
12
11
  from typing import Any, Optional
13
12
  from uuid import uuid4
14
13
 
15
- import yaml
16
14
  from loguru import logger
17
15
 
16
+ from ...utils.schema_loader import load_agent_schema
18
17
  from ...agentic.providers.pydantic_ai import create_agent
19
18
  from ...agentic.serialization import serialize_agent_result
20
19
  from ...models.entities.moment import Moment, Person
@@ -101,19 +100,7 @@ async def construct_moments(
101
100
  }
102
101
 
103
102
  # Load MomentBuilder agent schema
104
- schema_path = (
105
- Path(__file__).parent.parent.parent
106
- / "schemas"
107
- / "agents"
108
- / "core"
109
- / "moment-builder.yaml"
110
- )
111
-
112
- if not schema_path.exists():
113
- raise FileNotFoundError(f"MomentBuilder schema not found: {schema_path}")
114
-
115
- with open(schema_path) as f:
116
- agent_schema = yaml.safe_load(f)
103
+ agent_schema = load_agent_schema("moment-builder")
117
104
 
118
105
  # Prepare input data for agent
119
106
  input_data = {
@@ -5,13 +5,20 @@ Provides synchronous and async wrappers for embedding generation using
5
5
  raw HTTP requests (no OpenAI SDK dependency).
6
6
  """
7
7
 
8
- import os
9
8
  from typing import Optional, cast
10
9
 
11
10
  import httpx
12
11
  import requests
13
12
  from loguru import logger
14
13
 
14
+ from rem.utils.constants import DEFAULT_EMBEDDING_DIMS, HTTP_TIMEOUT_DEFAULT
15
+
16
+
17
+ def _get_openai_api_key() -> Optional[str]:
18
+ """Get OpenAI API key from settings."""
19
+ from rem.settings import settings
20
+ return settings.llm.openai_api_key
21
+
15
22
 
16
23
  def generate_embedding(
17
24
  text: str,
@@ -26,16 +33,16 @@ def generate_embedding(
26
33
  text: Text to embed
27
34
  model: Model name (default: text-embedding-3-small)
28
35
  provider: Provider name (default: openai)
29
- api_key: API key (defaults to OPENAI_API_KEY env var)
36
+ api_key: API key (defaults to settings.llm.openai_api_key)
30
37
 
31
38
  Returns:
32
39
  Embedding vector (1536 dimensions for text-embedding-3-small)
33
40
  """
34
41
  if provider == "openai":
35
- api_key = api_key or os.getenv("OPENAI_API_KEY")
42
+ api_key = api_key or _get_openai_api_key()
36
43
  if not api_key:
37
44
  logger.warning("No OpenAI API key - returning zero vector")
38
- return [0.0] * 1536
45
+ return [0.0] * DEFAULT_EMBEDDING_DIMS
39
46
 
40
47
  try:
41
48
  logger.info(f"Generating OpenAI embedding for text using {model}")
@@ -47,7 +54,7 @@ def generate_embedding(
47
54
  "Content-Type": "application/json",
48
55
  },
49
56
  json={"input": [text], "model": model},
50
- timeout=30,
57
+ timeout=HTTP_TIMEOUT_DEFAULT,
51
58
  )
52
59
  response.raise_for_status()
53
60
 
@@ -58,11 +65,11 @@ def generate_embedding(
58
65
 
59
66
  except Exception as e:
60
67
  logger.error(f"Failed to generate embedding from OpenAI: {e}", exc_info=True)
61
- return [0.0] * 1536
68
+ return [0.0] * DEFAULT_EMBEDDING_DIMS
62
69
 
63
70
  else:
64
71
  logger.warning(f"Unsupported provider '{provider}' - returning zero vector")
65
- return [0.0] * 1536
72
+ return [0.0] * DEFAULT_EMBEDDING_DIMS
66
73
 
67
74
 
68
75
  async def generate_embedding_async(
@@ -78,16 +85,16 @@ async def generate_embedding_async(
78
85
  text: Text to embed
79
86
  model: Model name (default: text-embedding-3-small)
80
87
  provider: Provider name (default: openai)
81
- api_key: API key (defaults to OPENAI_API_KEY env var)
88
+ api_key: API key (defaults to settings.llm.openai_api_key)
82
89
 
83
90
  Returns:
84
91
  Embedding vector (1536 dimensions for text-embedding-3-small)
85
92
  """
86
93
  if provider == "openai":
87
- api_key = api_key or os.getenv("OPENAI_API_KEY")
94
+ api_key = api_key or _get_openai_api_key()
88
95
  if not api_key:
89
96
  logger.warning("No OpenAI API key - returning zero vector")
90
- return [0.0] * 1536
97
+ return [0.0] * DEFAULT_EMBEDDING_DIMS
91
98
 
92
99
  try:
93
100
  logger.info(f"Generating OpenAI embedding for text using {model}")
@@ -100,7 +107,7 @@ async def generate_embedding_async(
100
107
  "Content-Type": "application/json",
101
108
  },
102
109
  json={"input": [text], "model": model},
103
- timeout=30.0,
110
+ timeout=HTTP_TIMEOUT_DEFAULT,
104
111
  )
105
112
  response.raise_for_status()
106
113
 
@@ -113,8 +120,8 @@ async def generate_embedding_async(
113
120
 
114
121
  except Exception as e:
115
122
  logger.error(f"Failed to generate embedding from OpenAI: {e}", exc_info=True)
116
- return [0.0] * 1536
123
+ return [0.0] * DEFAULT_EMBEDDING_DIMS
117
124
 
118
125
  else:
119
126
  logger.warning(f"Unsupported provider '{provider}' - returning zero vector")
120
- return [0.0] * 1536
127
+ return [0.0] * DEFAULT_EMBEDDING_DIMS
@@ -164,7 +164,7 @@ cp curated-queries.csv experiments/rem-001/validation/production/
164
164
  **Option C: Curated Engrams**
165
165
  ```bash
166
166
  # Generate engrams from REM data
167
- rem dreaming full --user-id test-user --tenant-id acme --generate-test-cases
167
+ rem dreaming full --user-id test-user --generate-test-cases
168
168
 
169
169
  # Review and select high-quality engrams
170
170
  rem engram list --quality high --limit 100 --output engrams.csv
@@ -357,7 +357,7 @@ Level 4 (Mature): Multiple cycles, full query capabilities
357
357
  # Generate engrams from REM data
358
358
  rem dreaming full \
359
359
  --user-id test-user \
360
- --tenant-id acme \
360
+ \
361
361
  --generate-test-cases \
362
362
  --quality-level 3
363
363
 
@@ -1027,7 +1027,7 @@ rem experiments experiment run rem-lookup-ask_rem-golden \
1027
1027
 
1028
1028
  ```bash
1029
1029
  # 1. Generate high-quality engrams
1030
- rem dreaming full --tenant-id acme --generate-test-cases --quality-level 4
1030
+ rem dreaming full --generate-test-cases --quality-level 4
1031
1031
 
1032
1032
  # 2. Export engrams
1033
1033
  rem engram export rem-engrams-mature-mixed --output engrams.csv --format phoenix