remdb 0.3.14__py3-none-any.whl → 0.3.157__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. rem/agentic/README.md +76 -0
  2. rem/agentic/__init__.py +15 -0
  3. rem/agentic/agents/__init__.py +32 -2
  4. rem/agentic/agents/agent_manager.py +310 -0
  5. rem/agentic/agents/sse_simulator.py +502 -0
  6. rem/agentic/context.py +51 -27
  7. rem/agentic/context_builder.py +5 -3
  8. rem/agentic/llm_provider_models.py +301 -0
  9. rem/agentic/mcp/tool_wrapper.py +155 -18
  10. rem/agentic/otel/setup.py +93 -4
  11. rem/agentic/providers/phoenix.py +371 -108
  12. rem/agentic/providers/pydantic_ai.py +280 -57
  13. rem/agentic/schema.py +361 -21
  14. rem/agentic/tools/rem_tools.py +3 -3
  15. rem/api/README.md +215 -1
  16. rem/api/deps.py +255 -0
  17. rem/api/main.py +132 -40
  18. rem/api/mcp_router/resources.py +1 -1
  19. rem/api/mcp_router/server.py +28 -5
  20. rem/api/mcp_router/tools.py +555 -7
  21. rem/api/routers/admin.py +494 -0
  22. rem/api/routers/auth.py +278 -4
  23. rem/api/routers/chat/completions.py +402 -20
  24. rem/api/routers/chat/models.py +88 -10
  25. rem/api/routers/chat/otel_utils.py +33 -0
  26. rem/api/routers/chat/sse_events.py +542 -0
  27. rem/api/routers/chat/streaming.py +697 -45
  28. rem/api/routers/dev.py +81 -0
  29. rem/api/routers/feedback.py +268 -0
  30. rem/api/routers/messages.py +473 -0
  31. rem/api/routers/models.py +78 -0
  32. rem/api/routers/query.py +360 -0
  33. rem/api/routers/shared_sessions.py +406 -0
  34. rem/auth/__init__.py +13 -3
  35. rem/auth/middleware.py +186 -22
  36. rem/auth/providers/__init__.py +4 -1
  37. rem/auth/providers/email.py +215 -0
  38. rem/cli/commands/README.md +237 -64
  39. rem/cli/commands/cluster.py +1808 -0
  40. rem/cli/commands/configure.py +4 -7
  41. rem/cli/commands/db.py +386 -143
  42. rem/cli/commands/experiments.py +468 -76
  43. rem/cli/commands/process.py +14 -8
  44. rem/cli/commands/schema.py +97 -50
  45. rem/cli/commands/session.py +336 -0
  46. rem/cli/dreaming.py +2 -2
  47. rem/cli/main.py +29 -6
  48. rem/config.py +10 -3
  49. rem/models/core/core_model.py +7 -1
  50. rem/models/core/experiment.py +58 -14
  51. rem/models/core/rem_query.py +5 -2
  52. rem/models/entities/__init__.py +25 -0
  53. rem/models/entities/domain_resource.py +38 -0
  54. rem/models/entities/feedback.py +123 -0
  55. rem/models/entities/message.py +30 -1
  56. rem/models/entities/ontology.py +1 -1
  57. rem/models/entities/ontology_config.py +1 -1
  58. rem/models/entities/session.py +83 -0
  59. rem/models/entities/shared_session.py +180 -0
  60. rem/models/entities/subscriber.py +175 -0
  61. rem/models/entities/user.py +1 -0
  62. rem/registry.py +10 -4
  63. rem/schemas/agents/core/agent-builder.yaml +134 -0
  64. rem/schemas/agents/examples/contract-analyzer.yaml +1 -1
  65. rem/schemas/agents/examples/contract-extractor.yaml +1 -1
  66. rem/schemas/agents/examples/cv-parser.yaml +1 -1
  67. rem/schemas/agents/rem.yaml +7 -3
  68. rem/services/__init__.py +3 -1
  69. rem/services/content/service.py +92 -19
  70. rem/services/email/__init__.py +10 -0
  71. rem/services/email/service.py +459 -0
  72. rem/services/email/templates.py +360 -0
  73. rem/services/embeddings/api.py +4 -4
  74. rem/services/embeddings/worker.py +16 -16
  75. rem/services/phoenix/client.py +154 -14
  76. rem/services/postgres/README.md +197 -15
  77. rem/services/postgres/__init__.py +2 -1
  78. rem/services/postgres/diff_service.py +547 -0
  79. rem/services/postgres/pydantic_to_sqlalchemy.py +470 -140
  80. rem/services/postgres/repository.py +132 -0
  81. rem/services/postgres/schema_generator.py +205 -4
  82. rem/services/postgres/service.py +6 -6
  83. rem/services/rem/parser.py +44 -9
  84. rem/services/rem/service.py +36 -2
  85. rem/services/session/compression.py +137 -51
  86. rem/services/session/reload.py +15 -8
  87. rem/settings.py +515 -27
  88. rem/sql/background_indexes.sql +21 -16
  89. rem/sql/migrations/001_install.sql +387 -54
  90. rem/sql/migrations/002_install_models.sql +2304 -377
  91. rem/sql/migrations/003_optional_extensions.sql +326 -0
  92. rem/sql/migrations/004_cache_system.sql +548 -0
  93. rem/sql/migrations/005_schema_update.sql +145 -0
  94. rem/utils/README.md +45 -0
  95. rem/utils/__init__.py +18 -0
  96. rem/utils/date_utils.py +2 -2
  97. rem/utils/files.py +157 -1
  98. rem/utils/model_helpers.py +156 -1
  99. rem/utils/schema_loader.py +220 -22
  100. rem/utils/sql_paths.py +146 -0
  101. rem/utils/sql_types.py +3 -1
  102. rem/utils/vision.py +1 -1
  103. rem/workers/__init__.py +3 -1
  104. rem/workers/db_listener.py +579 -0
  105. rem/workers/unlogged_maintainer.py +463 -0
  106. {remdb-0.3.14.dist-info → remdb-0.3.157.dist-info}/METADATA +340 -229
  107. {remdb-0.3.14.dist-info → remdb-0.3.157.dist-info}/RECORD +109 -80
  108. {remdb-0.3.14.dist-info → remdb-0.3.157.dist-info}/WHEEL +1 -1
  109. rem/sql/002_install_models.sql +0 -1068
  110. rem/sql/install_models.sql +0 -1051
  111. rem/sql/migrations/003_seed_default_user.sql +0 -48
  112. {remdb-0.3.14.dist-info → remdb-0.3.157.dist-info}/entry_points.txt +0 -0
rem/utils/README.md CHANGED
@@ -4,6 +4,7 @@
4
4
 
5
5
  1. [SQL Types](#sql-types-sql_typespy) - Pydantic to PostgreSQL type mapping
6
6
  2. [Embeddings](#embeddings-embeddingspy) - Vector embeddings generation
7
+ 3. [Files](#files-filespy) - File utilities and DataFrame I/O
7
8
 
8
9
  ## SQL Types (`sql_types.py`)
9
10
 
@@ -581,3 +582,47 @@ This will demonstrate:
581
582
  - `sql_types.py` - Use `embedding_provider` in json_schema_extra for TEXT fields
582
583
  - OpenAI Embeddings API: https://platform.openai.com/docs/api-reference/embeddings
583
584
  - pgvector Documentation: https://github.com/pgvector/pgvector
585
+
586
+ ---
587
+
588
+ ## Files (`files.py`)
589
+
590
+ File utilities including temporary file handling and DataFrame I/O with automatic format detection.
591
+
592
+ ### DataFrame I/O
593
+
594
+ Read and write DataFrames with format auto-detected from file extension:
595
+
596
+ ```python
597
+ from rem.utils.files import read_dataframe, write_dataframe
598
+
599
+ # Read - format inferred from extension
600
+ df = read_dataframe("data.csv")
601
+ df = read_dataframe("data.parquet")
602
+ df = read_dataframe("data.xlsx")
603
+
604
+ # Read from bytes (e.g., from S3)
605
+ df = read_dataframe(content_bytes, filename="data.csv")
606
+
607
+ # Write - format inferred from extension
608
+ write_dataframe(df, "output.parquet")
609
+ ```
610
+
611
+ **Supported formats**: `.csv`, `.tsv`, `.parquet`, `.json`, `.jsonl`, `.avro`, `.xlsx`, `.xls`, `.ods`, `.ipc`, `.arrow`, `.feather`
612
+
613
+ Note: Some formats require optional dependencies (e.g., `fastexcel` for Excel).
614
+
615
+ ### Temporary File Utilities
616
+
617
+ ```python
618
+ from rem.utils.files import temp_file_from_bytes, temp_directory
619
+
620
+ # Create temp file from bytes, auto-cleanup
621
+ with temp_file_from_bytes(pdf_bytes, suffix=".pdf") as tmp_path:
622
+ result = process_pdf(tmp_path)
623
+
624
+ # Create temp directory, auto-cleanup
625
+ with temp_directory() as tmp_dir:
626
+ # Work with files in tmp_dir
627
+ pass
628
+ ```
rem/utils/__init__.py CHANGED
@@ -5,6 +5,7 @@ Utility functions and helpers for the REM system:
5
5
  - sql_types: Pydantic to PostgreSQL type mapping
6
6
  - embeddings: Vector embeddings generation using requests library
7
7
  - user_id: Deterministic UUID generation from email addresses
8
+ - sql_paths: SQL file path resolution for packages and user migrations
8
9
  """
9
10
 
10
11
  from .embeddings import (
@@ -24,6 +25,15 @@ from .user_id import (
24
25
  is_valid_uuid,
25
26
  user_id_to_uuid,
26
27
  )
28
+ from .sql_paths import (
29
+ USER_SQL_DIR_CONVENTION,
30
+ get_package_sql_dir,
31
+ get_package_migrations_dir,
32
+ get_user_sql_dir,
33
+ list_package_migrations,
34
+ list_user_migrations,
35
+ list_all_migrations,
36
+ )
27
37
 
28
38
  __all__ = [
29
39
  # SQL Types
@@ -40,4 +50,12 @@ __all__ = [
40
50
  "email_to_user_id",
41
51
  "user_id_to_uuid",
42
52
  "is_valid_uuid",
53
+ # SQL Paths
54
+ "USER_SQL_DIR_CONVENTION",
55
+ "get_package_sql_dir",
56
+ "get_package_migrations_dir",
57
+ "get_user_sql_dir",
58
+ "list_package_migrations",
59
+ "list_user_migrations",
60
+ "list_all_migrations",
43
61
  ]
rem/utils/date_utils.py CHANGED
@@ -14,7 +14,7 @@ Convention:
14
14
  See CLAUDE.md Section 1 (Datetime Convention) for details.
15
15
  """
16
16
 
17
- from datetime import datetime, timedelta
17
+ from datetime import UTC, datetime, timedelta
18
18
  from typing import Optional
19
19
 
20
20
 
@@ -30,7 +30,7 @@ def utc_now() -> datetime:
30
30
  >>> now.tzinfo is None
31
31
  True
32
32
  """
33
- return datetime.utcnow()
33
+ return datetime.now(UTC).replace(tzinfo=None)
34
34
 
35
35
 
36
36
  def to_iso(dt: datetime) -> str:
rem/utils/files.py CHANGED
@@ -3,13 +3,18 @@ File utilities for consistent file handling throughout REM.
3
3
 
4
4
  Provides context managers and helpers for temporary file operations,
5
5
  ensuring proper cleanup and consistent patterns.
6
+
7
+ Also provides DataFrame I/O utilities using Polars with automatic
8
+ format detection based on file extension.
6
9
  """
7
10
 
8
11
  import tempfile
9
12
  from contextlib import contextmanager
13
+ from io import BytesIO
10
14
  from pathlib import Path
11
- from typing import Generator, Optional
15
+ from typing import Generator, Optional, Union
12
16
 
17
+ import polars as pl
13
18
  from loguru import logger
14
19
 
15
20
 
@@ -165,3 +170,154 @@ def safe_delete(path: Path) -> bool:
165
170
  except Exception as e:
166
171
  logger.warning(f"Failed to delete {path}: {e}")
167
172
  return False
173
+
174
+
175
+ # Extension to Polars reader mapping
176
+ _EXTENSION_READERS = {
177
+ ".csv": pl.read_csv,
178
+ ".tsv": lambda p, **kw: pl.read_csv(p, separator="\t", **kw),
179
+ ".parquet": pl.read_parquet,
180
+ ".pq": pl.read_parquet,
181
+ ".json": pl.read_json,
182
+ ".jsonl": pl.read_ndjson,
183
+ ".ndjson": pl.read_ndjson,
184
+ ".avro": pl.read_avro,
185
+ ".xlsx": pl.read_excel,
186
+ ".xls": pl.read_excel,
187
+ ".ods": pl.read_ods,
188
+ ".ipc": pl.read_ipc,
189
+ ".arrow": pl.read_ipc,
190
+ ".feather": pl.read_ipc,
191
+ }
192
+
193
+ # Extension to Polars writer mapping
194
+ _EXTENSION_WRITERS = {
195
+ ".csv": "write_csv",
196
+ ".tsv": "write_csv", # with separator="\t"
197
+ ".parquet": "write_parquet",
198
+ ".pq": "write_parquet",
199
+ ".json": "write_json",
200
+ ".jsonl": "write_ndjson",
201
+ ".ndjson": "write_ndjson",
202
+ ".avro": "write_avro",
203
+ ".xlsx": "write_excel",
204
+ ".ipc": "write_ipc",
205
+ ".arrow": "write_ipc",
206
+ ".feather": "write_ipc",
207
+ }
208
+
209
+
210
+ def read_dataframe(
211
+ source: Union[str, Path, bytes],
212
+ filename: Optional[str] = None,
213
+ **kwargs,
214
+ ) -> pl.DataFrame:
215
+ """
216
+ Read a DataFrame from a file, inferring format from extension.
217
+
218
+ Supports all Polars-compatible formats:
219
+ - CSV (.csv), TSV (.tsv)
220
+ - Parquet (.parquet, .pq)
221
+ - JSON (.json), JSONL/NDJSON (.jsonl, .ndjson)
222
+ - Avro (.avro)
223
+ - Excel (.xlsx, .xls)
224
+ - OpenDocument (.ods)
225
+ - Arrow IPC (.ipc, .arrow, .feather)
226
+
227
+ Args:
228
+ source: File path (str/Path) or bytes content
229
+ filename: Required when source is bytes, to determine format
230
+ **kwargs: Additional arguments passed to the Polars reader
231
+
232
+ Returns:
233
+ Polars DataFrame
234
+
235
+ Raises:
236
+ ValueError: If format cannot be determined or is unsupported
237
+
238
+ Examples:
239
+ >>> df = read_dataframe("data.csv")
240
+ >>> df = read_dataframe("data.parquet")
241
+ >>> df = read_dataframe(csv_bytes, filename="data.csv")
242
+ """
243
+ # Determine the file extension
244
+ if isinstance(source, bytes):
245
+ if not filename:
246
+ raise ValueError("filename is required when source is bytes")
247
+ ext = Path(filename).suffix.lower()
248
+ # For bytes, we need to wrap in BytesIO
249
+ file_like = BytesIO(source)
250
+ else:
251
+ path = Path(source)
252
+ ext = path.suffix.lower()
253
+ file_like = path
254
+
255
+ # Get the appropriate reader
256
+ reader = _EXTENSION_READERS.get(ext)
257
+ if reader is None:
258
+ supported = ", ".join(sorted(_EXTENSION_READERS.keys()))
259
+ raise ValueError(
260
+ f"Unsupported file format: {ext}. "
261
+ f"Supported formats: {supported}"
262
+ )
263
+
264
+ try:
265
+ return reader(file_like, **kwargs)
266
+ except Exception as e:
267
+ logger.error(f"Failed to read DataFrame from {ext} format: {e}")
268
+ raise
269
+
270
+
271
+ def write_dataframe(
272
+ df: pl.DataFrame,
273
+ dest: Union[str, Path],
274
+ **kwargs,
275
+ ) -> None:
276
+ """
277
+ Write a DataFrame to a file, inferring format from extension.
278
+
279
+ Supports most Polars-writable formats:
280
+ - CSV (.csv), TSV (.tsv)
281
+ - Parquet (.parquet, .pq)
282
+ - JSON (.json), JSONL/NDJSON (.jsonl, .ndjson)
283
+ - Avro (.avro)
284
+ - Excel (.xlsx)
285
+ - Arrow IPC (.ipc, .arrow, .feather)
286
+
287
+ Args:
288
+ df: Polars DataFrame to write
289
+ dest: Destination file path
290
+ **kwargs: Additional arguments passed to the Polars writer
291
+
292
+ Raises:
293
+ ValueError: If format cannot be determined or is unsupported
294
+
295
+ Examples:
296
+ >>> write_dataframe(df, "output.csv")
297
+ >>> write_dataframe(df, "output.parquet")
298
+ >>> write_dataframe(df, "output.jsonl")
299
+ """
300
+ path = Path(dest)
301
+ ext = path.suffix.lower()
302
+
303
+ writer_method = _EXTENSION_WRITERS.get(ext)
304
+ if writer_method is None:
305
+ supported = ", ".join(sorted(_EXTENSION_WRITERS.keys()))
306
+ raise ValueError(
307
+ f"Unsupported file format for writing: {ext}. "
308
+ f"Supported formats: {supported}"
309
+ )
310
+
311
+ # Ensure parent directory exists
312
+ ensure_parent_exists(path)
313
+
314
+ # Handle TSV special case
315
+ if ext == ".tsv":
316
+ kwargs.setdefault("separator", "\t")
317
+
318
+ try:
319
+ writer = getattr(df, writer_method)
320
+ writer(path, **kwargs)
321
+ except Exception as e:
322
+ logger.error(f"Failed to write DataFrame to {ext} format: {e}")
323
+ raise
@@ -16,8 +16,12 @@ Embedding Field Detection:
16
16
  Table Name Inference:
17
17
  1. model_config.json_schema_extra.table_name
18
18
  2. CamelCase → snake_case + pluralization
19
+
20
+ Model Resolution:
21
+ - model_from_arbitrary_casing: Resolve model class from flexible input casing
19
22
  """
20
23
 
24
+ import re
21
25
  from typing import Any, Type
22
26
 
23
27
  from loguru import logger
@@ -94,7 +98,9 @@ def get_table_name(model: Type[BaseModel]) -> str:
94
98
  if isinstance(model_config, dict):
95
99
  json_extra = model_config.get("json_schema_extra", {})
96
100
  if isinstance(json_extra, dict) and "table_name" in json_extra:
97
- return json_extra["table_name"]
101
+ table_name = json_extra["table_name"]
102
+ if isinstance(table_name, str):
103
+ return table_name
98
104
 
99
105
  # Infer from class name
100
106
  name = model.__name__
@@ -234,3 +240,152 @@ def get_model_metadata(model: Type[BaseModel]) -> dict[str, Any]:
234
240
  "entity_key_field": get_entity_key_field(model),
235
241
  "embeddable_fields": get_embeddable_fields(model),
236
242
  }
243
+
244
+
245
+ def normalize_to_title_case(name: str) -> str:
246
+ """
247
+ Normalize arbitrary casing to TitleCase (PascalCase).
248
+
249
+ Handles various input formats:
250
+ - kebab-case: domain-resource → DomainResource
251
+ - snake_case: domain_resource → DomainResource
252
+ - lowercase: domainresource → Domainresource (single word)
253
+ - TitleCase: DomainResource → DomainResource (passthrough)
254
+ - Mixed: Domain-Resource, DOMAIN_RESOURCE → DomainResource
255
+
256
+ Args:
257
+ name: Input name in any casing format
258
+
259
+ Returns:
260
+ TitleCase (PascalCase) version of the name
261
+
262
+ Example:
263
+ >>> normalize_to_title_case("domain-resource")
264
+ 'DomainResource'
265
+ >>> normalize_to_title_case("domain_resources")
266
+ 'DomainResources'
267
+ >>> normalize_to_title_case("DomainResource")
268
+ 'DomainResource'
269
+ """
270
+ # If already TitleCase (starts with uppercase, has no delimiters, and has
271
+ # at least one lowercase letter), return as-is
272
+ if (
273
+ name
274
+ and name[0].isupper()
275
+ and '-' not in name
276
+ and '_' not in name
277
+ and any(c.islower() for c in name)
278
+ ):
279
+ return name
280
+
281
+ # Split on common delimiters (hyphen, underscore)
282
+ parts = re.split(r'[-_]', name)
283
+
284
+ # Capitalize first letter of each part, lowercase the rest
285
+ normalized_parts = [part.capitalize() for part in parts if part]
286
+
287
+ return "".join(normalized_parts)
288
+
289
+
290
+ def model_from_arbitrary_casing(
291
+ name: str,
292
+ registry: dict[str, Type[BaseModel]] | None = None,
293
+ ) -> Type[BaseModel]:
294
+ """
295
+ Resolve a model class from arbitrary casing input.
296
+
297
+ REM entity models use strict TitleCase (PascalCase) naming. This function
298
+ allows flexible input formats while maintaining consistency:
299
+
300
+ Input formats supported:
301
+ - kebab-case: domain-resource, domain-resources
302
+ - snake_case: domain_resource, domain_resources
303
+ - lowercase: resource, domainresource
304
+ - TitleCase: Resource, DomainResource
305
+
306
+ Args:
307
+ name: Model name in any supported casing format
308
+ registry: Optional dict mapping TitleCase names to model classes.
309
+ If not provided, uses rem.models.entities module.
310
+
311
+ Returns:
312
+ The resolved Pydantic model class
313
+
314
+ Raises:
315
+ ValueError: If no model matches the normalized name
316
+
317
+ Example:
318
+ >>> model = model_from_arbitrary_casing("domain-resources")
319
+ >>> model.__name__
320
+ 'DomainResource'
321
+ >>> model = model_from_arbitrary_casing("Resource")
322
+ >>> model.__name__
323
+ 'Resource'
324
+ """
325
+ # Build default registry from entities module if not provided
326
+ if registry is None:
327
+ from rem.models.entities import (
328
+ DomainResource,
329
+ Feedback,
330
+ File,
331
+ ImageResource,
332
+ Message,
333
+ Moment,
334
+ Ontology,
335
+ OntologyConfig,
336
+ Resource,
337
+ Schema,
338
+ Session,
339
+ User,
340
+ )
341
+
342
+ registry = {
343
+ "Resource": Resource,
344
+ "Resources": Resource, # Plural alias
345
+ "DomainResource": DomainResource,
346
+ "DomainResources": DomainResource, # Plural alias
347
+ "ImageResource": ImageResource,
348
+ "ImageResources": ImageResource,
349
+ "File": File,
350
+ "Files": File,
351
+ "Message": Message,
352
+ "Messages": Message,
353
+ "Moment": Moment,
354
+ "Moments": Moment,
355
+ "Session": Session,
356
+ "Sessions": Session,
357
+ "Feedback": Feedback,
358
+ "User": User,
359
+ "Users": User,
360
+ "Schema": Schema,
361
+ "Schemas": Schema,
362
+ "Ontology": Ontology,
363
+ "Ontologies": Ontology,
364
+ "OntologyConfig": OntologyConfig,
365
+ "OntologyConfigs": OntologyConfig,
366
+ }
367
+
368
+ # Normalize input to TitleCase
369
+ normalized = normalize_to_title_case(name)
370
+
371
+ # Look up in registry
372
+ if normalized in registry:
373
+ logger.debug(f"Resolved model '{name}' → {registry[normalized].__name__}")
374
+ return registry[normalized]
375
+
376
+ # Try without trailing 's' (singular form)
377
+ if normalized.endswith("s") and normalized[:-1] in registry:
378
+ logger.debug(f"Resolved model '{name}' → {registry[normalized[:-1]].__name__} (singular)")
379
+ return registry[normalized[:-1]]
380
+
381
+ # Try with trailing 's' (plural form)
382
+ plural = normalized + "s"
383
+ if plural in registry:
384
+ logger.debug(f"Resolved model '{name}' → {registry[plural].__name__} (plural)")
385
+ return registry[plural]
386
+
387
+ available = sorted(set(m.__name__ for m in registry.values()))
388
+ raise ValueError(
389
+ f"Unknown model: '{name}' (normalized: '{normalized}'). "
390
+ f"Available models: {', '.join(available)}"
391
+ )