remdb 0.3.103__py3-none-any.whl → 0.3.118__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (55) hide show
  1. rem/agentic/context.py +28 -24
  2. rem/agentic/mcp/tool_wrapper.py +29 -3
  3. rem/agentic/otel/setup.py +92 -4
  4. rem/agentic/providers/pydantic_ai.py +88 -18
  5. rem/agentic/schema.py +358 -21
  6. rem/agentic/tools/rem_tools.py +3 -3
  7. rem/api/main.py +85 -16
  8. rem/api/mcp_router/resources.py +1 -1
  9. rem/api/mcp_router/server.py +18 -4
  10. rem/api/mcp_router/tools.py +383 -16
  11. rem/api/routers/admin.py +218 -1
  12. rem/api/routers/chat/completions.py +30 -3
  13. rem/api/routers/chat/streaming.py +143 -3
  14. rem/api/routers/feedback.py +12 -319
  15. rem/api/routers/query.py +360 -0
  16. rem/api/routers/shared_sessions.py +13 -13
  17. rem/cli/commands/README.md +237 -64
  18. rem/cli/commands/cluster.py +1300 -0
  19. rem/cli/commands/configure.py +1 -3
  20. rem/cli/commands/db.py +354 -143
  21. rem/cli/commands/process.py +14 -8
  22. rem/cli/commands/schema.py +92 -45
  23. rem/cli/main.py +27 -6
  24. rem/models/core/rem_query.py +5 -2
  25. rem/models/entities/shared_session.py +2 -28
  26. rem/registry.py +10 -4
  27. rem/services/content/service.py +30 -8
  28. rem/services/embeddings/api.py +4 -4
  29. rem/services/embeddings/worker.py +16 -16
  30. rem/services/postgres/README.md +151 -26
  31. rem/services/postgres/__init__.py +2 -1
  32. rem/services/postgres/diff_service.py +531 -0
  33. rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
  34. rem/services/postgres/schema_generator.py +205 -4
  35. rem/services/postgres/service.py +6 -6
  36. rem/services/rem/parser.py +44 -9
  37. rem/services/rem/service.py +36 -2
  38. rem/services/session/reload.py +1 -1
  39. rem/settings.py +56 -7
  40. rem/sql/background_indexes.sql +19 -24
  41. rem/sql/migrations/001_install.sql +252 -69
  42. rem/sql/migrations/002_install_models.sql +2171 -593
  43. rem/sql/migrations/003_optional_extensions.sql +326 -0
  44. rem/sql/migrations/004_cache_system.sql +548 -0
  45. rem/utils/__init__.py +18 -0
  46. rem/utils/date_utils.py +2 -2
  47. rem/utils/schema_loader.py +17 -13
  48. rem/utils/sql_paths.py +146 -0
  49. rem/workers/__init__.py +2 -1
  50. rem/workers/unlogged_maintainer.py +463 -0
  51. {remdb-0.3.103.dist-info → remdb-0.3.118.dist-info}/METADATA +149 -76
  52. {remdb-0.3.103.dist-info → remdb-0.3.118.dist-info}/RECORD +54 -48
  53. rem/sql/migrations/003_seed_default_user.sql +0 -48
  54. {remdb-0.3.103.dist-info → remdb-0.3.118.dist-info}/WHEEL +0 -0
  55. {remdb-0.3.103.dist-info → remdb-0.3.118.dist-info}/entry_points.txt +0 -0
@@ -8,6 +8,7 @@ Usage:
8
8
  """
9
9
 
10
10
  import asyncio
11
+ import importlib
11
12
  from pathlib import Path
12
13
 
13
14
  import click
@@ -15,16 +16,35 @@ from loguru import logger
15
16
 
16
17
  from ...settings import settings
17
18
  from ...services.postgres.schema_generator import SchemaGenerator
19
+ from ...utils.sql_paths import get_package_sql_dir, get_package_migrations_dir
20
+
21
+
22
+ def _import_model_modules() -> list[str]:
23
+ """
24
+ Import modules specified in MODELS__IMPORT_MODULES setting.
25
+
26
+ This ensures downstream models decorated with @rem.register_model
27
+ are registered before schema generation.
28
+
29
+ Returns:
30
+ List of successfully imported module names
31
+ """
32
+ imported = []
33
+ for module_name in settings.models.module_list:
34
+ try:
35
+ importlib.import_module(module_name)
36
+ imported.append(module_name)
37
+ logger.debug(f"Imported model module: {module_name}")
38
+ except ImportError as e:
39
+ logger.warning(f"Failed to import model module '{module_name}': {e}")
40
+ click.echo(
41
+ click.style(f" ⚠ Could not import '{module_name}': {e}", fg="yellow"),
42
+ err=True,
43
+ )
44
+ return imported
18
45
 
19
46
 
20
47
  @click.command()
21
- @click.option(
22
- "--models",
23
- "-m",
24
- required=True,
25
- type=click.Path(exists=True, path_type=Path),
26
- help="Directory containing Pydantic models",
27
- )
28
48
  @click.option(
29
49
  "--output",
30
50
  "-o",
@@ -36,13 +56,13 @@ from ...services.postgres.schema_generator import SchemaGenerator
36
56
  "--output-dir",
37
57
  type=click.Path(path_type=Path),
38
58
  default=None,
39
- help=f"Base output directory (default: {settings.sql_dir}/migrations)",
59
+ help="Base output directory (default: package sql/migrations)",
40
60
  )
41
- def generate(models: Path, output: Path, output_dir: Path | None):
61
+ def generate(output: Path, output_dir: Path | None):
42
62
  """
43
- Generate database schema from Pydantic models.
63
+ Generate database schema from registered Pydantic models.
44
64
 
45
- Scans the specified directory for Pydantic models and generates:
65
+ Uses the model registry (core models + user-registered models) to generate:
46
66
  - CREATE TABLE statements
47
67
  - Embeddings tables (embeddings_<table>)
48
68
  - KV_STORE triggers for cache maintenance
@@ -51,24 +71,53 @@ def generate(models: Path, output: Path, output_dir: Path | None):
51
71
  Output is written to src/rem/sql/migrations/002_install_models.sql by default.
52
72
 
53
73
  Example:
54
- rem db schema generate --models src/rem/models/entities
74
+ rem db schema generate
75
+
76
+ To register custom models in downstream apps:
77
+
78
+ 1. Create models with @rem.register_model decorator:
79
+
80
+ # models/__init__.py
81
+ import rem
82
+ from rem.models.core import CoreModel
83
+
84
+ @rem.register_model
85
+ class MyEntity(CoreModel):
86
+ name: str
87
+
88
+ 2. Set MODELS__IMPORT_MODULES in your .env:
89
+
90
+ MODELS__IMPORT_MODULES=models
91
+
92
+ 3. Run schema generation:
93
+
94
+ rem db schema generate
55
95
 
56
96
  This creates:
57
97
  - src/rem/sql/migrations/002_install_models.sql - Entity tables and triggers
58
98
  - src/rem/sql/background_indexes.sql - HNSW indexes (apply after data load)
59
99
 
60
- After generation, apply with:
61
- rem db migrate
100
+ After generation, verify with:
101
+ rem db diff
62
102
  """
63
- click.echo(f"Discovering models in {models}")
103
+ from ...registry import get_model_registry
104
+
105
+ # Import downstream model modules to trigger @rem.register_model decorators
106
+ imported_modules = _import_model_modules()
107
+ if imported_modules:
108
+ click.echo(f"Imported model modules: {', '.join(imported_modules)}")
109
+
110
+ registry = get_model_registry()
111
+ models = registry.get_models(include_core=True)
112
+ click.echo(f"Generating schema from {len(models)} registered models")
64
113
 
65
- # Default to migrations directory
66
- actual_output_dir = output_dir or Path(settings.sql_dir) / "migrations"
114
+ # Default to package migrations directory
115
+ actual_output_dir = output_dir or get_package_migrations_dir()
67
116
  generator = SchemaGenerator(output_dir=actual_output_dir)
68
117
 
69
- # Generate schema
118
+ # Generate schema from registry
70
119
  try:
71
- schema_sql = asyncio.run(generator.generate_from_directory(models, output_file=output.name))
120
+ schema_sql = asyncio.run(generator.generate_from_registry(output_file=output.name))
72
121
 
73
122
  click.echo(f"✓ Schema generated: {len(generator.schemas)} tables")
74
123
  click.echo(f"✓ Written to: {actual_output_dir / output.name}")
@@ -76,7 +125,7 @@ def generate(models: Path, output: Path, output_dir: Path | None):
76
125
  # Generate background indexes in parent sql dir
77
126
  background_indexes = generator.generate_background_indexes()
78
127
  if background_indexes:
79
- bg_file = Path(settings.sql_dir) / "background_indexes.sql"
128
+ bg_file = get_package_sql_dir() / "background_indexes.sql"
80
129
  bg_file.write_text(background_indexes)
81
130
  click.echo(f"✓ Background indexes: {bg_file}")
82
131
 
@@ -94,48 +143,46 @@ def generate(models: Path, output: Path, output_dir: Path | None):
94
143
 
95
144
 
96
145
  @click.command()
97
- @click.option(
98
- "--models",
99
- "-m",
100
- required=True,
101
- type=click.Path(exists=True, path_type=Path),
102
- help="Directory containing Pydantic models",
103
- )
104
- def validate(models: Path):
146
+ def validate():
105
147
  """
106
- Validate Pydantic models for schema generation.
148
+ Validate registered Pydantic models for schema generation.
107
149
 
108
150
  Checks:
109
- - Models can be loaded
151
+ - Models can be loaded from registry
110
152
  - Models have suitable entity_key fields
111
153
  - Fields with embeddings are properly configured
154
+
155
+ Set MODELS__IMPORT_MODULES to include custom models from downstream apps.
112
156
  """
113
- click.echo(f"Validating models in {models}")
157
+ from ...registry import get_model_registry
114
158
 
115
- generator = SchemaGenerator()
116
- discovered = generator.discover_models(models)
159
+ # Import downstream model modules to trigger @rem.register_model decorators
160
+ imported_modules = _import_model_modules()
161
+ if imported_modules:
162
+ click.echo(f"Imported model modules: {', '.join(imported_modules)}")
117
163
 
118
- if not discovered:
119
- click.echo("✗ No models found", err=True)
120
- raise click.Abort()
164
+ registry = get_model_registry()
165
+ models = registry.get_models(include_core=True)
166
+
167
+ click.echo(f"Validating {len(models)} registered models")
121
168
 
122
- click.echo(f"✓ Discovered {len(discovered)} models")
169
+ if not models:
170
+ click.echo("✗ No models found in registry", err=True)
171
+ raise click.Abort()
123
172
 
173
+ generator = SchemaGenerator()
124
174
  errors: list[str] = []
125
175
  warnings: list[str] = []
126
176
 
127
- for model_name, model in discovered.items():
128
- table_name = generator.infer_table_name(model)
129
- entity_key = generator.infer_entity_key_field(model)
177
+ for model_name, ext in models.items():
178
+ model = ext.model
179
+ table_name = ext.table_name or generator.infer_table_name(model)
180
+ entity_key = ext.entity_key_field or generator.infer_entity_key_field(model)
130
181
 
131
182
  # Check for entity_key
132
183
  if entity_key == "id":
133
184
  warnings.append(f"{model_name}: No natural key field, using 'id'")
134
185
 
135
- # Check for embeddable fields
136
- # TODO: Implement should_embed_field check
137
- embeddable: list[str] = [] # Placeholder - needs implementation
138
-
139
186
  click.echo(f" {model_name} -> {table_name} (key: {entity_key})")
140
187
 
141
188
  if warnings:
@@ -158,7 +205,7 @@ def validate(models: Path):
158
205
  "-o",
159
206
  type=click.Path(path_type=Path),
160
207
  default=None,
161
- help=f"Output file for background indexes (default: {settings.sql_dir}/background_indexes.sql)",
208
+ help="Output file for background indexes (default: package sql/background_indexes.sql)",
162
209
  )
163
210
  def indexes(output: Path):
164
211
  """
rem/cli/main.py CHANGED
@@ -22,17 +22,30 @@ except Exception:
22
22
  __version__ = "unknown"
23
23
 
24
24
 
25
+ def _configure_logger(level: str):
26
+ """Configure loguru with custom level icons."""
27
+ logger.remove()
28
+
29
+ # Configure level icons - only warnings and errors get visual indicators
30
+ logger.level("DEBUG", icon=" ")
31
+ logger.level("INFO", icon=" ")
32
+ logger.level("WARNING", icon="🟠")
33
+ logger.level("ERROR", icon="🔴")
34
+ logger.level("CRITICAL", icon="🔴")
35
+
36
+ logger.add(
37
+ sys.stderr,
38
+ level=level,
39
+ format="<green>{time:HH:mm:ss}</green> | {level.icon} <level>{level: <8}</level> | <level>{message}</level>",
40
+ )
41
+
42
+
25
43
  @click.group()
26
44
  @click.option("--verbose", "-v", is_flag=True, help="Enable verbose logging")
27
45
  @click.version_option(version=__version__, prog_name="rem")
28
46
  def cli(verbose: bool):
29
47
  """REM - Resources Entities Moments system CLI."""
30
- if verbose:
31
- logger.remove()
32
- logger.add(sys.stderr, level="DEBUG")
33
- else:
34
- logger.remove()
35
- logger.add(sys.stderr, level="INFO")
48
+ _configure_logger("DEBUG" if verbose else "INFO")
36
49
 
37
50
 
38
51
  @cli.group()
@@ -65,6 +78,12 @@ def dreaming():
65
78
  pass
66
79
 
67
80
 
81
+ @cli.group()
82
+ def cluster():
83
+ """Kubernetes cluster deployment and management."""
84
+ pass
85
+
86
+
68
87
  # Register commands
69
88
  from .commands.schema import register_commands as register_schema_commands
70
89
  from .commands.db import register_commands as register_db_commands
@@ -76,11 +95,13 @@ from .commands.configure import register_command as register_configure_command
76
95
  from .commands.serve import register_command as register_serve_command
77
96
  from .commands.mcp import register_command as register_mcp_command
78
97
  from .commands.scaffold import scaffold as scaffold_command
98
+ from .commands.cluster import register_commands as register_cluster_commands
79
99
 
80
100
  register_schema_commands(schema)
81
101
  register_db_commands(db)
82
102
  register_process_commands(process)
83
103
  register_dreaming_commands(dreaming)
104
+ register_cluster_commands(cluster)
84
105
  register_ask_command(cli)
85
106
  register_configure_command(cli)
86
107
  register_serve_command(cli)
@@ -112,7 +112,7 @@ class SearchParameters(BaseModel):
112
112
  table_name: str = Field(..., description="Table to search (resources, moments, etc.)")
113
113
  limit: int = Field(default=10, gt=0, description="Maximum results")
114
114
  min_similarity: float = Field(
115
- default=0.7, ge=0.0, le=1.0, description="Minimum similarity score"
115
+ default=0.3, ge=0.0, le=1.0, description="Minimum similarity score (0.3 recommended for general queries)"
116
116
  )
117
117
 
118
118
 
@@ -198,7 +198,10 @@ class RemQuery(BaseModel):
198
198
  | SQLParameters
199
199
  | TraverseParameters
200
200
  ) = Field(..., description="Query parameters")
201
- user_id: str = Field(..., description="User identifier for isolation")
201
+ user_id: Optional[str] = Field(
202
+ default=None,
203
+ description="User identifier (UUID5 hash of email). None = anonymous (shared/public data only)"
204
+ )
202
205
 
203
206
 
204
207
  class TraverseStage(BaseModel):
@@ -111,28 +111,20 @@ To permanently delete, an admin can run:
111
111
 
112
112
  from datetime import datetime
113
113
  from typing import Optional
114
- from uuid import UUID
115
114
 
116
115
  from pydantic import BaseModel, Field
117
116
 
118
- from ...utils.date_utils import utc_now
117
+ from ..core import CoreModel
119
118
 
120
119
 
121
- class SharedSession(BaseModel):
120
+ class SharedSession(CoreModel):
122
121
  """
123
122
  Session sharing record between users.
124
123
 
125
124
  Links a session (identified by session_id from Message records) to a
126
125
  recipient user, enabling collaborative access to conversation history.
127
-
128
- This is NOT a CoreModel - it's a lightweight linking table without
129
- graph edges, metadata, or embeddings.
130
126
  """
131
127
 
132
- id: Optional[UUID] = Field(
133
- default=None,
134
- description="Unique identifier (auto-generated)",
135
- )
136
128
  session_id: str = Field(
137
129
  ...,
138
130
  description="The session being shared (matches Message.session_id)",
@@ -145,24 +137,6 @@ class SharedSession(BaseModel):
145
137
  ...,
146
138
  description="User ID of the recipient (who can now view the session)",
147
139
  )
148
- tenant_id: str = Field(
149
- default="default",
150
- description="Tenant identifier for multi-tenancy isolation",
151
- )
152
- created_at: datetime = Field(
153
- default_factory=utc_now,
154
- description="When the share was created",
155
- )
156
- updated_at: datetime = Field(
157
- default_factory=utc_now,
158
- description="Last modification timestamp",
159
- )
160
- deleted_at: Optional[datetime] = Field(
161
- default=None,
162
- description="Soft delete timestamp (null = active share)",
163
- )
164
-
165
- model_config = {"from_attributes": True}
166
140
 
167
141
 
168
142
  class SharedSessionCreate(BaseModel):
rem/registry.py CHANGED
@@ -123,6 +123,7 @@ class ModelRegistry:
123
123
  return
124
124
 
125
125
  from .models.entities import (
126
+ Feedback,
126
127
  File,
127
128
  ImageResource,
128
129
  Message,
@@ -131,19 +132,24 @@ class ModelRegistry:
131
132
  OntologyConfig,
132
133
  Resource,
133
134
  Schema,
135
+ Session,
136
+ SharedSession,
134
137
  User,
135
138
  )
136
139
 
137
140
  core_models = [
138
- Resource,
141
+ Feedback,
142
+ File,
139
143
  ImageResource,
140
144
  Message,
141
- User,
142
- File,
143
145
  Moment,
144
- Schema,
145
146
  Ontology,
146
147
  OntologyConfig,
148
+ Resource,
149
+ Schema,
150
+ Session,
151
+ SharedSession,
152
+ User,
147
153
  ]
148
154
 
149
155
  for model in core_models:
@@ -370,11 +370,32 @@ class ContentService:
370
370
  file_size = len(file_content)
371
371
  logger.info(f"Read {file_size} bytes from {file_uri} (source: {source_type})")
372
372
 
373
- # Step 2: Write to internal storage (user-scoped)
373
+ # Step 1.5: Early schema detection for YAML/JSON files
374
+ # Skip File entity creation for schemas (agents/evaluators)
375
+ file_suffix = Path(file_name).suffix.lower()
376
+ if file_suffix in ['.yaml', '.yml', '.json']:
377
+ import yaml
378
+ import json
379
+ try:
380
+ content_text = file_content.decode('utf-8') if isinstance(file_content, bytes) else file_content
381
+ data = yaml.safe_load(content_text) if file_suffix in ['.yaml', '.yml'] else json.loads(content_text)
382
+ if isinstance(data, dict):
383
+ json_schema_extra = data.get('json_schema_extra', {})
384
+ kind = json_schema_extra.get('kind', '')
385
+ if kind in ['agent', 'evaluator']:
386
+ # Route directly to schema processing, skip File entity
387
+ logger.info(f"Detected {kind} schema: {file_name}, routing to _process_schema")
388
+ result = self.process_uri(file_uri)
389
+ return await self._process_schema(result, file_uri, user_id)
390
+ except Exception as e:
391
+ logger.debug(f"Early schema detection failed for {file_name}: {e}")
392
+ # Fall through to standard file processing
393
+
394
+ # Step 2: Write to internal storage (public or user-scoped)
374
395
  file_id = str(uuid4())
375
396
  storage_uri, internal_key, content_type, _ = await fs_service.write_to_internal_storage(
376
397
  content=file_content,
377
- tenant_id=user_id, # Using user_id for storage scoping
398
+ tenant_id=user_id or "public", # Storage path: public/ or user_id/
378
399
  file_name=file_name,
379
400
  file_id=file_id,
380
401
  )
@@ -383,7 +404,7 @@ class ContentService:
383
404
  # Step 3: Create File entity
384
405
  file_entity = File(
385
406
  id=file_id,
386
- tenant_id=user_id, # Set tenant_id to user_id (application scoped to user)
407
+ tenant_id=user_id, # None = public/shared
387
408
  user_id=user_id,
388
409
  name=file_name,
389
410
  uri=storage_uri,
@@ -538,7 +559,7 @@ class ContentService:
538
559
  size_bytes=result["metadata"].get("size"),
539
560
  mime_type=result["metadata"].get("content_type"),
540
561
  processing_status="completed",
541
- tenant_id=user_id or "default", # Required field
562
+ tenant_id=user_id, # None = public/shared
542
563
  user_id=user_id,
543
564
  )
544
565
 
@@ -571,7 +592,7 @@ class ContentService:
571
592
  ordinal=i,
572
593
  content=chunk,
573
594
  category="document",
574
- tenant_id=user_id or "default", # Required field
595
+ tenant_id=user_id, # None = public/shared
575
596
  user_id=user_id,
576
597
  )
577
598
  for i, chunk in enumerate(chunks)
@@ -645,9 +666,10 @@ class ContentService:
645
666
  # IMPORTANT: category field distinguishes agents from evaluators
646
667
  # - kind=agent → category="agent" (AI agents with tools/resources)
647
668
  # - kind=evaluator → category="evaluator" (LLM-as-a-Judge evaluators)
669
+ # Schemas (agents/evaluators) default to system tenant for shared access
648
670
  schema_entity = Schema(
649
- tenant_id=user_id or "default",
650
- user_id=user_id,
671
+ tenant_id="system",
672
+ user_id=None,
651
673
  name=name,
652
674
  spec=schema_data,
653
675
  category=kind, # Maps kind → category for database filtering
@@ -717,7 +739,7 @@ class ContentService:
717
739
  processor = EngramProcessor(postgres)
718
740
  result = await processor.process_engram(
719
741
  data=data,
720
- tenant_id=user_id or "default",
742
+ tenant_id=user_id, # None = public/shared
721
743
  user_id=user_id,
722
744
  )
723
745
  logger.info(f"✅ Engram processed: {result.get('resource_id')} with {len(result.get('moment_ids', []))} moments")
@@ -45,7 +45,7 @@ def generate_embedding(
45
45
  return [0.0] * DEFAULT_EMBEDDING_DIMS
46
46
 
47
47
  try:
48
- logger.info(f"Generating OpenAI embedding for text using {model}")
48
+ logger.debug(f"Generating OpenAI embedding for text using {model}")
49
49
 
50
50
  response = requests.post(
51
51
  "https://api.openai.com/v1/embeddings",
@@ -60,7 +60,7 @@ def generate_embedding(
60
60
 
61
61
  data = response.json()
62
62
  embedding = data["data"][0]["embedding"]
63
- logger.info(f"Successfully generated embedding (dimension: {len(embedding)})")
63
+ logger.debug(f"Successfully generated embedding (dimension: {len(embedding)})")
64
64
  return cast(list[float], embedding)
65
65
 
66
66
  except Exception as e:
@@ -97,7 +97,7 @@ async def generate_embedding_async(
97
97
  return [0.0] * DEFAULT_EMBEDDING_DIMS
98
98
 
99
99
  try:
100
- logger.info(f"Generating OpenAI embedding for text using {model}")
100
+ logger.debug(f"Generating OpenAI embedding for text using {model}")
101
101
 
102
102
  async with httpx.AsyncClient() as client:
103
103
  response = await client.post(
@@ -113,7 +113,7 @@ async def generate_embedding_async(
113
113
 
114
114
  data = response.json()
115
115
  embedding = data["data"][0]["embedding"]
116
- logger.info(
116
+ logger.debug(
117
117
  f"Successfully generated embedding (dimension: {len(embedding)})"
118
118
  )
119
119
  return cast(list[float], embedding)
@@ -69,7 +69,7 @@ def get_global_embedding_worker(postgres_service: Any = None) -> "EmbeddingWorke
69
69
  if postgres_service is None:
70
70
  raise RuntimeError("Must provide postgres_service on first call to get_global_embedding_worker")
71
71
  _global_worker = EmbeddingWorker(postgres_service=postgres_service)
72
- logger.info("Created global EmbeddingWorker singleton")
72
+ logger.debug("Created global EmbeddingWorker singleton")
73
73
 
74
74
  return _global_worker
75
75
 
@@ -117,7 +117,7 @@ class EmbeddingWorker:
117
117
  "No OpenAI API key provided - embeddings will use zero vectors"
118
118
  )
119
119
 
120
- logger.info(
120
+ logger.debug(
121
121
  f"Initialized EmbeddingWorker: {num_workers} workers, "
122
122
  f"batch_size={batch_size}, timeout={batch_timeout}s"
123
123
  )
@@ -125,17 +125,17 @@ class EmbeddingWorker:
125
125
  async def start(self) -> None:
126
126
  """Start worker pool."""
127
127
  if self.running:
128
- logger.warning("EmbeddingWorker already running")
128
+ logger.debug("EmbeddingWorker already running")
129
129
  return
130
130
 
131
131
  self.running = True
132
- logger.info(f"Starting {self.num_workers} embedding workers")
132
+ logger.debug(f"Starting {self.num_workers} embedding workers")
133
133
 
134
134
  for i in range(self.num_workers):
135
135
  worker = asyncio.create_task(self._worker_loop(i))
136
136
  self.workers.append(worker)
137
137
 
138
- logger.info("EmbeddingWorker started")
138
+ logger.debug("EmbeddingWorker started")
139
139
 
140
140
  async def stop(self) -> None:
141
141
  """Stop worker pool gracefully - processes remaining queue before stopping."""
@@ -143,7 +143,7 @@ class EmbeddingWorker:
143
143
  return
144
144
 
145
145
  queue_size = self.task_queue.qsize()
146
- logger.info(f"Stopping EmbeddingWorker (processing {queue_size} queued tasks first)")
146
+ logger.debug(f"Stopping EmbeddingWorker (processing {queue_size} queued tasks first)")
147
147
 
148
148
  # Wait for queue to drain (with timeout)
149
149
  max_wait = 30 # 30 seconds max
@@ -171,7 +171,7 @@ class EmbeddingWorker:
171
171
  await asyncio.gather(*self.workers, return_exceptions=True)
172
172
 
173
173
  self.workers.clear()
174
- logger.info("EmbeddingWorker stopped")
174
+ logger.debug("EmbeddingWorker stopped")
175
175
 
176
176
  async def queue_task(self, task: EmbeddingTask) -> None:
177
177
  """
@@ -195,7 +195,7 @@ class EmbeddingWorker:
195
195
  Args:
196
196
  worker_id: Unique worker identifier
197
197
  """
198
- logger.info(f"Worker {worker_id} started")
198
+ logger.debug(f"Worker {worker_id} started")
199
199
 
200
200
  while self.running:
201
201
  try:
@@ -205,7 +205,7 @@ class EmbeddingWorker:
205
205
  if not batch:
206
206
  continue
207
207
 
208
- logger.info(f"Worker {worker_id} processing batch of {len(batch)} tasks")
208
+ logger.debug(f"Worker {worker_id} processing batch of {len(batch)} tasks")
209
209
 
210
210
  # Generate embeddings for batch
211
211
  await self._process_batch(batch)
@@ -213,14 +213,14 @@ class EmbeddingWorker:
213
213
  logger.debug(f"Worker {worker_id} completed batch")
214
214
 
215
215
  except asyncio.CancelledError:
216
- logger.info(f"Worker {worker_id} cancelled")
216
+ logger.debug(f"Worker {worker_id} cancelled")
217
217
  break
218
218
  except Exception as e:
219
219
  logger.error(f"Worker {worker_id} error: {e}", exc_info=True)
220
220
  # Continue processing (don't crash worker on error)
221
221
  await asyncio.sleep(1)
222
222
 
223
- logger.info(f"Worker {worker_id} stopped")
223
+ logger.debug(f"Worker {worker_id} stopped")
224
224
 
225
225
  async def _collect_batch(self) -> list[EmbeddingTask]:
226
226
  """
@@ -284,10 +284,10 @@ class EmbeddingWorker:
284
284
  )
285
285
 
286
286
  # Upsert to database
287
- logger.info(f"Upserting {len(embeddings)} embeddings to database...")
287
+ logger.debug(f"Upserting {len(embeddings)} embeddings to database...")
288
288
  await self._upsert_embeddings(batch, embeddings)
289
289
 
290
- logger.info(
290
+ logger.debug(
291
291
  f"Successfully generated and stored {len(embeddings)} embeddings "
292
292
  f"(provider={provider}, model={model})"
293
293
  )
@@ -315,7 +315,7 @@ class EmbeddingWorker:
315
315
  """
316
316
  if provider == "openai" and self.openai_api_key:
317
317
  try:
318
- logger.info(
318
+ logger.debug(
319
319
  f"Generating OpenAI embeddings for {len(texts)} texts using {model}"
320
320
  )
321
321
 
@@ -336,7 +336,7 @@ class EmbeddingWorker:
336
336
  data = response.json()
337
337
  embeddings = [item["embedding"] for item in data["data"]]
338
338
 
339
- logger.info(
339
+ logger.debug(
340
340
  f"Successfully generated {len(embeddings)} embeddings from OpenAI"
341
341
  )
342
342
  return embeddings
@@ -409,7 +409,7 @@ class EmbeddingWorker:
409
409
  ),
410
410
  )
411
411
 
412
- logger.info(
412
+ logger.debug(
413
413
  f"Upserted embedding: {task.table_name}.{task.entity_id}.{task.field_name}"
414
414
  )
415
415