remdb 0.3.181__py3-none-any.whl → 0.3.200__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

rem/auth/middleware.py CHANGED
@@ -14,15 +14,14 @@ Design Pattern:
14
14
  - MCP paths always require authentication (protected service)
15
15
 
16
16
  Authentication Flow:
17
- 1. If API key enabled: Validate X-API-Key header (access gate)
18
- 2. Check JWT token for user identity (primary)
19
- 3. Check dev token for testing (non-production only)
20
- 4. Check session for user (backward compatibility)
21
- 5. If allow_anonymous=True: Allow as anonymous (rate-limited)
22
- 6. If allow_anonymous=False: Return 401 / redirect to login
17
+ 1. Check JWT/dev token/session for user identity first
18
+ 2. If user is admin: bypass API key check (admin privilege)
19
+ 3. If API key enabled and user is not admin: Validate X-API-Key header
20
+ 4. If allow_anonymous=True: Allow as anonymous (rate-limited)
21
+ 5. If allow_anonymous=False: Return 401 / redirect to login
23
22
 
24
23
  IMPORTANT: API key validates ACCESS, JWT identifies USER.
25
- Both can be required: API key for access + JWT for user identity.
24
+ Admin users bypass the API key requirement (trusted identity).
26
25
 
27
26
  Access Modes (configured in settings.auth):
28
27
  - enabled=true, allow_anonymous=true: Auth available, anonymous gets rate-limited access
@@ -195,6 +194,12 @@ class AuthMiddleware(BaseHTTPMiddleware):
195
194
 
196
195
  return None
197
196
 
197
+ def _is_admin(self, user: dict | None) -> bool:
198
+ """Check if user has admin role."""
199
+ if not user:
200
+ return False
201
+ return "admin" in user.get("roles", [])
202
+
198
203
  async def dispatch(self, request: Request, call_next):
199
204
  """
200
205
  Check authentication for protected paths.
@@ -219,8 +224,35 @@ class AuthMiddleware(BaseHTTPMiddleware):
219
224
  if not is_protected or is_excluded:
220
225
  return await call_next(request)
221
226
 
222
- # API key validation (access control, not user identity)
223
- # API key is a guardrail for access - JWT identifies the actual user
227
+ # Check for user identity FIRST (JWT, dev token, session)
228
+ # This allows admin users to bypass API key requirement
229
+ user = None
230
+
231
+ # Check for JWT token in Authorization header (primary user identity)
232
+ jwt_user = self._check_jwt_token(request)
233
+ if jwt_user:
234
+ user = jwt_user
235
+
236
+ # Check for dev token (non-production only)
237
+ if not user:
238
+ dev_user = self._check_dev_token(request)
239
+ if dev_user:
240
+ user = dev_user
241
+
242
+ # Check for valid session (backward compatibility)
243
+ if not user:
244
+ session_user = request.session.get("user")
245
+ if session_user:
246
+ user = session_user
247
+
248
+ # If user is admin, bypass API key check entirely
249
+ if self._is_admin(user):
250
+ logger.debug(f"Admin user {user.get('email')} bypassing API key check")
251
+ request.state.user = user
252
+ request.state.is_anonymous = False
253
+ return await call_next(request)
254
+
255
+ # API key validation for non-admin users (access control guardrail)
224
256
  if settings.api.api_key_enabled:
225
257
  api_key = request.headers.get("x-api-key")
226
258
  if not api_key:
@@ -238,27 +270,9 @@ class AuthMiddleware(BaseHTTPMiddleware):
238
270
  headers={"WWW-Authenticate": 'ApiKey realm="REM API"'},
239
271
  )
240
272
  logger.debug("X-API-Key validated for access")
241
- # API key valid - continue to check JWT for user identity
242
-
243
- # Check for JWT token in Authorization header (primary user identity)
244
- jwt_user = self._check_jwt_token(request)
245
- if jwt_user:
246
- request.state.user = jwt_user
247
- request.state.is_anonymous = False
248
- return await call_next(request)
249
-
250
- # Check for dev token (non-production only)
251
- dev_user = self._check_dev_token(request)
252
- if dev_user:
253
- request.state.user = dev_user
254
- request.state.is_anonymous = False
255
- return await call_next(request)
256
-
257
- # Check for valid session (backward compatibility)
258
- user = request.session.get("user")
259
273
 
274
+ # If we have a valid user (non-admin, but passed API key check), allow access
260
275
  if user:
261
- # Authenticated user - add to request state
262
276
  request.state.user = user
263
277
  request.state.is_anonymous = False
264
278
  return await call_next(request)
rem/cli/README.md CHANGED
@@ -434,6 +434,68 @@ Ensure you're using the correct model format:
434
434
  - OpenAI: `openai:gpt-4o-mini`, `openai:gpt-4o`
435
435
  - Anthropic: `anthropic:claude-sonnet-4-5-20250929`
436
436
 
437
+ ## Data Visibility: PUBLIC vs PRIVATE
438
+
439
+ **IMPORTANT: All ingested data is PUBLIC by default.** This is the correct behavior
440
+ for shared knowledge bases (ontologies, procedures, reference data).
441
+
442
+ ### Why PUBLIC by Default?
443
+
444
+ Most data in REM should be searchable by all users:
445
+ - Clinical ontologies (disorders, symptoms, drugs)
446
+ - Procedures and protocols (SCID-5, PHQ-9, etc.)
447
+ - Reference documentation
448
+ - Shared domain knowledge
449
+
450
+ The `rem_lookup()` function searches for data where `user_id IS NULL`, which means
451
+ public data. If you set `user_id` on data, it becomes invisible to other users.
452
+
453
+ ### Ingesting Public Data (Default)
454
+
455
+ ```bash
456
+ # Standard ingestion - data is PUBLIC
457
+ rem process ingest ontology/procedures/ --table ontologies
458
+
459
+ # From S3 - also PUBLIC
460
+ rem process ingest s3://bucket/docs/reference.pdf
461
+ ```
462
+
463
+ ### Ingesting Private Data (Rare)
464
+
465
+ Private data requires explicit `--make-private` flag:
466
+
467
+ ```bash
468
+ # Private user data - requires --make-private and --user-id
469
+ rem process ingest personal-notes.md --make-private --user-id user-123
470
+ ```
471
+
472
+ **When to use private data:**
473
+ - User-uploaded personal documents
474
+ - Session-specific content
475
+ - User notes and annotations
476
+
477
+ **NEVER use private data for:**
478
+ - Ontologies and reference material
479
+ - Clinical procedures and protocols
480
+ - Shared knowledge bases
481
+ - Anything that should be searchable by agents
482
+
483
+ ### Common Mistake
484
+
485
+ If agents can't find data via `search_rem`, the most common cause is that the data
486
+ was ingested with a `user_id` set. Check with:
487
+
488
+ ```sql
489
+ SELECT name, user_id FROM ontologies WHERE name = 'phq-9-procedure';
490
+ -- user_id should be NULL for public data
491
+ ```
492
+
493
+ Fix by setting user_id to NULL:
494
+ ```sql
495
+ UPDATE ontologies SET user_id = NULL WHERE user_id IS NOT NULL;
496
+ UPDATE kv_store SET user_id = NULL WHERE entity_type = 'ontologies' AND user_id IS NOT NULL;
497
+ ```
498
+
437
499
  ## Next Steps
438
500
 
439
501
  1. **Implement Schema Registry**
rem/cli/commands/db.py CHANGED
@@ -469,8 +469,7 @@ async def _load_async(file_path: Path, table: str | None, user_id: str | None, d
469
469
  # Handle direct insert tables (non-CoreModel)
470
470
  if table_name in DIRECT_INSERT_TABLES:
471
471
  for row_data in rows:
472
- if "tenant_id" not in row_data:
473
- row_data["tenant_id"] = "default"
472
+ # tenant_id is optional - NULL means public/shared
474
473
 
475
474
  if table_name == "shared_sessions":
476
475
  await pg.fetch(
@@ -481,7 +480,7 @@ async def _load_async(file_path: Path, table: str | None, user_id: str | None, d
481
480
  row_data["session_id"],
482
481
  row_data["owner_user_id"],
483
482
  row_data["shared_with_user_id"],
484
- row_data["tenant_id"],
483
+ row_data.get("tenant_id"), # Optional - NULL means public
485
484
  )
486
485
  total_loaded += 1
487
486
  logger.success(f"Loaded shared_session: {row_data['owner_user_id']} -> {row_data['shared_with_user_id']}")
@@ -494,10 +493,8 @@ async def _load_async(file_path: Path, table: str | None, user_id: str | None, d
494
493
  model_class = MODEL_MAP[table_name]
495
494
 
496
495
  for row_idx, row_data in enumerate(rows):
497
- # user_id stays NULL for public data (accessible by any user)
498
- # Only set tenant_id for scoping - the --user-id flag controls tenant scope
499
- if "tenant_id" not in row_data and user_id is not None:
500
- row_data["tenant_id"] = user_id
496
+ # tenant_id and user_id are optional - NULL means public/shared data
497
+ # Data files can explicitly set tenant_id/user_id if needed
501
498
 
502
499
  # Convert graph_edges to InlineEdge format if present
503
500
  if "graph_edges" in row_data:
@@ -644,7 +641,7 @@ async def _diff_async(
644
641
 
645
642
  if not result.has_changes:
646
643
  click.secho("✓ No schema drift detected", fg="green")
647
- click.echo(" Database matches Pydantic models")
644
+ click.echo(" Database matches source (tables, functions, triggers, views)")
648
645
  if result.filtered_count > 0:
649
646
  click.echo()
650
647
  click.secho(f" ({result.filtered_count} destructive change(s) hidden by '{strategy}' strategy)", fg="yellow")
@@ -656,17 +653,34 @@ async def _diff_async(
656
653
  if result.filtered_count > 0:
657
654
  click.secho(f" ({result.filtered_count} destructive change(s) hidden by '{strategy}' strategy)", fg="yellow")
658
655
  click.echo()
659
- click.echo("Changes:")
660
- for line in result.summary:
661
- if line.startswith("+"):
662
- click.secho(f" {line}", fg="green")
663
- elif line.startswith("-"):
664
- click.secho(f" {line}", fg="red")
665
- elif line.startswith("~"):
666
- click.secho(f" {line}", fg="yellow")
667
- else:
668
- click.echo(f" {line}")
669
- click.echo()
656
+
657
+ # Table/column changes (Alembic)
658
+ if result.summary:
659
+ click.echo("Table Changes:")
660
+ for line in result.summary:
661
+ if line.startswith("+"):
662
+ click.secho(f" {line}", fg="green")
663
+ elif line.startswith("-"):
664
+ click.secho(f" {line}", fg="red")
665
+ elif line.startswith("~"):
666
+ click.secho(f" {line}", fg="yellow")
667
+ else:
668
+ click.echo(f" {line}")
669
+ click.echo()
670
+
671
+ # Programmable object changes (functions, triggers, views)
672
+ if result.programmable_summary:
673
+ click.echo("Programmable Objects (functions/triggers/views):")
674
+ for line in result.programmable_summary:
675
+ if line.startswith("+"):
676
+ click.secho(f" {line}", fg="green")
677
+ elif line.startswith("-"):
678
+ click.secho(f" {line}", fg="red")
679
+ elif line.startswith("~"):
680
+ click.secho(f" {line}", fg="yellow")
681
+ else:
682
+ click.echo(f" {line}")
683
+ click.echo()
670
684
 
671
685
  # Generate migration if requested
672
686
  if generate:
@@ -11,39 +11,102 @@ from rem.services.content import ContentService
11
11
 
12
12
 
13
13
  @click.command(name="ingest")
14
- @click.argument("file_path", type=click.Path(exists=True))
15
- @click.option("--user-id", default=None, help="User ID to scope file privately (default: public/shared)")
14
+ @click.argument("path", type=click.Path(exists=True))
15
+ @click.option("--table", "-t", default=None, help="Target table (e.g., ontologies, resources). Auto-detected for schemas.")
16
+ @click.option("--make-private", is_flag=True, help="Make data private to a specific user. RARELY NEEDED - most data should be public/shared.")
17
+ @click.option("--user-id", default=None, help="User ID for private data. REQUIRES --make-private flag.")
16
18
  @click.option("--category", help="Optional file category")
17
19
  @click.option("--tags", help="Optional comma-separated tags")
20
+ @click.option("--pattern", "-p", default="**/*.md", help="Glob pattern for directory ingestion (default: **/*.md)")
21
+ @click.option("--dry-run", is_flag=True, help="Show what would be ingested without making changes")
18
22
  def process_ingest(
19
- file_path: str,
23
+ path: str,
24
+ table: str | None,
25
+ make_private: bool,
20
26
  user_id: str | None,
21
27
  category: str | None,
22
28
  tags: str | None,
29
+ pattern: str,
30
+ dry_run: bool,
23
31
  ):
24
32
  """
25
- Ingest a file into REM (storage + parsing + embedding).
33
+ Ingest files into REM (storage + parsing + embedding).
26
34
 
27
- This command performs the full ingestion pipeline:
28
- 1. Reads the file from the local path.
29
- 2. Stores it in the configured storage (local/S3).
30
- 3. Parses the content.
31
- 4. Chunks and embeds the content into Resources.
32
- 5. Creates a File entity record.
35
+ Supports both single files and directories. For directories, recursively
36
+ processes files matching the pattern (default: **/*.md).
37
+
38
+ **IMPORTANT: Data is PUBLIC by default.** This is the correct behavior for
39
+ shared knowledge bases (ontologies, procedures, reference data). Private
40
+ user-scoped data is rarely needed and requires explicit --make-private flag.
41
+
42
+ Target table is auto-detected for schemas (agent.yaml → schemas table).
43
+ Use --table to explicitly set the target (e.g., ontologies for clinical knowledge).
33
44
 
34
45
  Examples:
35
46
  rem process ingest sample.pdf
36
47
  rem process ingest contract.docx --category legal --tags contract,2023
37
48
  rem process ingest agent.yaml # Auto-detects kind=agent, saves to schemas table
49
+
50
+ # Directory ingestion into ontologies table (PUBLIC - no user-id needed)
51
+ rem process ingest ontology/procedures/scid-5/ --table ontologies
52
+ rem process ingest ontology/ --table ontologies --pattern "**/*.md"
53
+
54
+ # Preview what would be ingested
55
+ rem process ingest ontology/ --table ontologies --dry-run
56
+
57
+ # RARE: Private user-scoped data (requires --make-private)
58
+ rem process ingest private-notes.md --make-private --user-id user-123
38
59
  """
39
60
  import asyncio
61
+
62
+ # Validate: user_id requires --make-private flag
63
+ if user_id and not make_private:
64
+ raise click.UsageError(
65
+ "Setting --user-id requires the --make-private flag.\n\n"
66
+ "Data should be PUBLIC by default (no user-id). Private user-scoped data\n"
67
+ "is rarely needed - only use --make-private for truly personal content.\n\n"
68
+ "Example: rem process ingest file.md --make-private --user-id user-123"
69
+ )
70
+
71
+ # If --make-private is set, user_id is required
72
+ if make_private and not user_id:
73
+ raise click.UsageError(
74
+ "--make-private requires --user-id to specify which user owns the data.\n\n"
75
+ "Example: rem process ingest file.md --make-private --user-id user-123"
76
+ )
77
+
78
+ # Clear user_id if not making private (ensure None for public data)
79
+ effective_user_id = user_id if make_private else None
80
+ from pathlib import Path
40
81
  from ...services.content import ContentService
41
82
 
42
83
  async def _ingest():
43
- # Initialize ContentService with repositories for proper resource saving
44
84
  from rem.services.postgres import get_postgres_service
45
85
  from rem.services.postgres.repository import Repository
46
- from rem.models.entities import File, Resource
86
+ from rem.models.entities import File, Resource, Ontology
87
+
88
+ input_path = Path(path)
89
+ tag_list = tags.split(",") if tags else None
90
+
91
+ # Collect files to process
92
+ if input_path.is_dir():
93
+ files_to_process = list(input_path.glob(pattern))
94
+ if not files_to_process:
95
+ logger.error(f"No files matching '{pattern}' found in {input_path}")
96
+ sys.exit(1)
97
+ logger.info(f"Found {len(files_to_process)} files matching '{pattern}'")
98
+ else:
99
+ files_to_process = [input_path]
100
+
101
+ # Dry run: just show what would be processed
102
+ if dry_run:
103
+ logger.info("DRY RUN - Would ingest:")
104
+ for f in files_to_process[:20]:
105
+ entity_key = f.stem # filename without extension
106
+ logger.info(f" {f} → {table or 'auto-detect'} (key: {entity_key})")
107
+ if len(files_to_process) > 20:
108
+ logger.info(f" ... and {len(files_to_process) - 20} more files")
109
+ return
47
110
 
48
111
  db = get_postgres_service()
49
112
  if not db:
@@ -51,53 +114,118 @@ def process_ingest(
51
114
  await db.connect()
52
115
 
53
116
  try:
54
- file_repo = Repository(File, "files", db=db)
55
- resource_repo = Repository(Resource, "resources", db=db)
56
- service = ContentService(file_repo=file_repo, resource_repo=resource_repo)
57
-
58
- tag_list = tags.split(",") if tags else None
59
-
60
- scope_msg = f"user: {user_id}" if user_id else "public"
61
- logger.info(f"Ingesting file: {file_path} ({scope_msg})")
62
- result = await service.ingest_file(
63
- file_uri=file_path,
64
- user_id=user_id,
65
- category=category,
66
- tags=tag_list,
67
- is_local_server=True, # CLI is local
68
- )
69
-
70
- # Handle schema ingestion (agents/evaluators)
71
- if result.get("schema_name"):
72
- logger.success(f"Schema ingested: {result['schema_name']} (kind={result.get('kind', 'agent')})")
73
- logger.info(f"Version: {result.get('version', '1.0.0')}")
74
- # Handle file ingestion
75
- elif result.get("processing_status") == "completed":
76
- logger.success(f"File ingested: {result['file_name']}")
77
- logger.info(f"File ID: {result['file_id']}")
78
- logger.info(f"Resources created: {result['resources_created']}")
117
+ # Direct table ingestion (ontologies, etc.)
118
+ if table:
119
+ await _ingest_to_table(
120
+ db=db,
121
+ files=files_to_process,
122
+ table_name=table,
123
+ user_id=effective_user_id,
124
+ category=category,
125
+ tag_list=tag_list,
126
+ )
79
127
  else:
80
- logger.error(f"Ingestion failed: {result.get('message', 'Unknown error')}")
81
- sys.exit(1)
128
+ # Standard file ingestion via ContentService
129
+ file_repo = Repository(File, "files", db=db)
130
+ resource_repo = Repository(Resource, "resources", db=db)
131
+ service = ContentService(file_repo=file_repo, resource_repo=resource_repo)
132
+
133
+ for file_path in files_to_process:
134
+ scope_msg = f"user: {effective_user_id}" if effective_user_id else "public"
135
+ logger.info(f"Ingesting: {file_path} ({scope_msg})")
136
+
137
+ result = await service.ingest_file(
138
+ file_uri=str(file_path),
139
+ user_id=effective_user_id,
140
+ category=category,
141
+ tags=tag_list,
142
+ is_local_server=True,
143
+ )
144
+
145
+ # Handle schema ingestion (agents/evaluators)
146
+ if result.get("schema_name"):
147
+ logger.success(f"Schema: {result['schema_name']} (kind={result.get('kind', 'agent')})")
148
+ elif result.get("processing_status") == "completed":
149
+ logger.success(f"File: {result['file_name']} ({result['resources_created']} resources)")
150
+ else:
151
+ logger.error(f"Failed: {result.get('message', 'Unknown error')}")
82
152
 
83
153
  except Exception as e:
84
154
  logger.error(f"Error during ingestion: {e}")
85
155
  sys.exit(1)
86
156
  finally:
87
- # Wait for global embedding worker to finish queued tasks
157
+ # Wait for embedding worker to finish
88
158
  from rem.services.embeddings.worker import get_global_embedding_worker
89
159
  try:
90
160
  worker = get_global_embedding_worker()
91
161
  if worker and worker.running and not worker.task_queue.empty():
92
- logger.info(f"Waiting for {worker.task_queue.qsize()} embedding tasks to complete...")
93
- # Worker.stop() waits for queue to drain (see worker.py line ~148)
162
+ logger.info(f"Waiting for {worker.task_queue.qsize()} embedding tasks...")
94
163
  await worker.stop()
95
164
  except RuntimeError:
96
- # Worker doesn't exist yet - no tasks queued
97
165
  pass
98
166
 
99
167
  await db.disconnect()
100
168
 
169
+ async def _ingest_to_table(db, files, table_name, user_id, category, tag_list):
170
+ """Direct ingestion of files to a specific table (ontologies, etc.)."""
171
+ from rem.services.postgres.repository import Repository
172
+ from rem import get_model_registry
173
+ from rem.utils.model_helpers import get_table_name
174
+
175
+ # Get model class for table
176
+ registry = get_model_registry()
177
+ registry.register_core_models()
178
+ model_class = None
179
+ for model in registry.get_model_classes().values():
180
+ if get_table_name(model) == table_name:
181
+ model_class = model
182
+ break
183
+
184
+ if not model_class:
185
+ logger.error(f"Unknown table: {table_name}")
186
+ sys.exit(1)
187
+
188
+ repo = Repository(model_class, table_name, db=db)
189
+ processed = 0
190
+ failed = 0
191
+
192
+ for file_path in files:
193
+ try:
194
+ # Read file content
195
+ content = file_path.read_text(encoding="utf-8")
196
+ entity_key = file_path.stem # filename without extension
197
+
198
+ # Build entity based on table
199
+ entity_data = {
200
+ "name": entity_key,
201
+ "content": content,
202
+ "tags": tag_list or [],
203
+ }
204
+
205
+ # Add optional fields
206
+ if category:
207
+ entity_data["category"] = category
208
+
209
+ # Scoping: user_id for private data, None for public/shared
210
+ # tenant_id=None and user_id=None means PUBLIC data (visible to all)
211
+ entity_data["tenant_id"] = user_id # None = public/shared
212
+ entity_data["user_id"] = user_id # None = public/shared
213
+
214
+ # For ontologies, add URI
215
+ if table_name == "ontologies":
216
+ entity_data["uri"] = f"file://{file_path.absolute()}"
217
+
218
+ entity = model_class(**entity_data)
219
+ await repo.upsert(entity, embeddable_fields=["content"], generate_embeddings=True)
220
+ processed += 1
221
+ logger.success(f" ✓ {entity_key}")
222
+
223
+ except Exception as e:
224
+ failed += 1
225
+ logger.error(f" ✗ {file_path.name}: {e}")
226
+
227
+ logger.info(f"Completed: {processed} succeeded, {failed} failed")
228
+
101
229
  asyncio.run(_ingest())
102
230
 
103
231
  def register_commands(group: click.Group):
@@ -274,7 +274,7 @@ class ContentService:
274
274
  async def ingest_file(
275
275
  self,
276
276
  file_uri: str,
277
- user_id: str,
277
+ user_id: str | None = None,
278
278
  category: str | None = None,
279
279
  tags: list[str] | None = None,
280
280
  is_local_server: bool = False,
@@ -283,6 +283,10 @@ class ContentService:
283
283
  """
284
284
  Complete file ingestion pipeline: read → store → parse → chunk → embed.
285
285
 
286
+ **IMPORTANT: Data is PUBLIC by default (user_id=None).**
287
+ This is correct for shared knowledge bases (ontologies, procedures, reference data).
288
+ Private user-scoped data is rarely needed - only set user_id for truly personal content.
289
+
286
290
  **CENTRALIZED INGESTION**: This is the single entry point for all file ingestion
287
291
  in REM. It handles:
288
292
 
@@ -319,7 +323,9 @@ class ContentService:
319
323
 
320
324
  Args:
321
325
  file_uri: Source file location (local path, s3://, or https://)
322
- user_id: User identifier for data isolation and ownership
326
+ user_id: User identifier for PRIVATE data only. Default None = PUBLIC/shared.
327
+ Leave as None for shared knowledge bases, ontologies, reference data.
328
+ Only set for truly private user-specific content.
323
329
  category: Optional category tag (document, code, audio, etc.)
324
330
  tags: Optional list of tags
325
331
  is_local_server: True if running as local/stdio MCP server
@@ -347,12 +353,19 @@ class ContentService:
347
353
 
348
354
  Example:
349
355
  >>> service = ContentService()
356
+ >>> # PUBLIC data (default) - visible to all users
350
357
  >>> result = await service.ingest_file(
351
- ... file_uri="s3://bucket/contract.pdf",
352
- ... user_id="user-123",
353
- ... category="legal"
358
+ ... file_uri="s3://bucket/procedure.pdf",
359
+ ... category="medical"
354
360
  ... )
355
361
  >>> print(f"Created {result['resources_created']} searchable chunks")
362
+ >>>
363
+ >>> # PRIVATE data (rare) - only for user-specific content
364
+ >>> result = await service.ingest_file(
365
+ ... file_uri="s3://bucket/personal-notes.pdf",
366
+ ... user_id="user-123", # Only this user can access
367
+ ... category="personal"
368
+ ... )
356
369
  """
357
370
  from pathlib import Path
358
371
  from uuid import uuid4
@@ -3,22 +3,47 @@ PostgreSQL service for CloudNativePG database operations.
3
3
  """
4
4
 
5
5
  from .diff_service import DiffService, SchemaDiff
6
+ from .programmable_diff_service import (
7
+ DiffResult,
8
+ ObjectDiff,
9
+ ObjectType,
10
+ ProgrammableDiffService,
11
+ )
6
12
  from .repository import Repository
7
13
  from .service import PostgresService
8
14
 
9
15
 
16
+ _postgres_instance: PostgresService | None = None
17
+
18
+
10
19
  def get_postgres_service() -> PostgresService | None:
11
20
  """
12
- Get PostgresService instance.
21
+ Get PostgresService singleton instance.
13
22
 
14
23
  Returns None if Postgres is disabled.
24
+ Uses singleton pattern to prevent connection pool exhaustion.
15
25
  """
26
+ global _postgres_instance
27
+
16
28
  from ...settings import settings
17
29
 
18
30
  if not settings.postgres.enabled:
19
31
  return None
20
32
 
21
- return PostgresService()
33
+ if _postgres_instance is None:
34
+ _postgres_instance = PostgresService()
35
+
36
+ return _postgres_instance
22
37
 
23
38
 
24
- __all__ = ["PostgresService", "get_postgres_service", "Repository", "DiffService", "SchemaDiff"]
39
+ __all__ = [
40
+ "DiffResult",
41
+ "DiffService",
42
+ "ObjectDiff",
43
+ "ObjectType",
44
+ "PostgresService",
45
+ "ProgrammableDiffService",
46
+ "Repository",
47
+ "SchemaDiff",
48
+ "get_postgres_service",
49
+ ]