remdb 0.3.141__py3-none-any.whl → 0.3.146__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

rem/auth/middleware.py CHANGED
@@ -6,6 +6,7 @@ Supports anonymous access with rate limiting when allow_anonymous=True.
6
6
  MCP endpoints are always protected unless explicitly disabled.
7
7
 
8
8
  Design Pattern:
9
+ - Check X-API-Key header first (if API key auth enabled)
9
10
  - Check session for user on protected paths
10
11
  - Check Bearer token for dev token (non-production only)
11
12
  - MCP paths always require authentication (protected service)
@@ -20,6 +21,12 @@ Access Modes (configured in settings.auth):
20
21
  - mcp_requires_auth=true (default): MCP always requires login regardless of allow_anonymous
21
22
  - mcp_requires_auth=false: MCP follows normal allow_anonymous rules (dev only)
22
23
 
24
+ API Key Authentication (configured in settings.api):
25
+ - api_key_enabled=true: Require X-API-Key header for protected endpoints
26
+ - api_key: The secret key to validate against
27
+ - Provides simple programmatic access without OAuth flow
28
+ - X-API-Key header takes precedence over session auth
29
+
23
30
  Dev Token Support (non-production only):
24
31
  - GET /api/auth/dev/token returns a Bearer token for test-user
25
32
  - Include as: Authorization: Bearer dev_<signature>
@@ -82,6 +89,39 @@ class AuthMiddleware(BaseHTTPMiddleware):
82
89
  self.mcp_requires_auth = mcp_requires_auth
83
90
  self.mcp_path = mcp_path
84
91
 
92
+ def _check_api_key(self, request: Request) -> dict | None:
93
+ """
94
+ Check for valid X-API-Key header.
95
+
96
+ Returns:
97
+ API key user dict if valid, None otherwise
98
+ """
99
+ # Only check if API key auth is enabled
100
+ if not settings.api.api_key_enabled:
101
+ return None
102
+
103
+ # Check for X-API-Key header
104
+ api_key = request.headers.get("x-api-key")
105
+ if not api_key:
106
+ return None
107
+
108
+ # Validate against configured API key
109
+ if settings.api.api_key and api_key == settings.api.api_key:
110
+ logger.debug("X-API-Key authenticated")
111
+ return {
112
+ "id": "api-key-user",
113
+ "email": "api@rem.local",
114
+ "name": "API Key User",
115
+ "provider": "api-key",
116
+ "tenant_id": "default",
117
+ "tier": "pro", # API key users get full access
118
+ "roles": ["user"],
119
+ }
120
+
121
+ # Invalid API key
122
+ logger.warning("Invalid X-API-Key provided")
123
+ return None
124
+
85
125
  def _check_dev_token(self, request: Request) -> dict | None:
86
126
  """
87
127
  Check for valid dev token in Authorization header (non-production only).
@@ -105,7 +145,7 @@ class AuthMiddleware(BaseHTTPMiddleware):
105
145
  # Verify dev token
106
146
  from ..api.routers.dev import verify_dev_token
107
147
  if verify_dev_token(token):
108
- logger.debug(f"Dev token authenticated as test-user")
148
+ logger.debug("Dev token authenticated as test-user")
109
149
  return {
110
150
  "id": "test-user",
111
151
  "email": "test@rem.local",
@@ -142,6 +182,31 @@ class AuthMiddleware(BaseHTTPMiddleware):
142
182
  if not is_protected or is_excluded:
143
183
  return await call_next(request)
144
184
 
185
+ # Check for X-API-Key header first (if enabled)
186
+ api_key_user = self._check_api_key(request)
187
+ if api_key_user:
188
+ request.state.user = api_key_user
189
+ request.state.is_anonymous = False
190
+ return await call_next(request)
191
+
192
+ # If API key auth is enabled but no valid key provided, reject immediately
193
+ if settings.api.api_key_enabled:
194
+ # Check if X-API-Key header was provided but invalid
195
+ if request.headers.get("x-api-key"):
196
+ logger.warning(f"Invalid X-API-Key for: {path}")
197
+ return JSONResponse(
198
+ status_code=401,
199
+ content={"detail": "Invalid API key"},
200
+ headers={"WWW-Authenticate": 'ApiKey realm="REM API"'},
201
+ )
202
+ # No API key provided when required
203
+ logger.debug(f"Missing X-API-Key for: {path}")
204
+ return JSONResponse(
205
+ status_code=401,
206
+ content={"detail": "API key required. Include X-API-Key header."},
207
+ headers={"WWW-Authenticate": 'ApiKey realm="REM API"'},
208
+ )
209
+
145
210
  # Check for dev token (non-production only)
146
211
  dev_user = self._check_dev_token(request)
147
212
  if dev_user:
@@ -125,19 +125,17 @@ def create(
125
125
  # Resolve base path: CLI arg > EXPERIMENTS_HOME env var > default "experiments"
126
126
  if base_path is None:
127
127
  base_path = os.getenv("EXPERIMENTS_HOME", "experiments")
128
- # Build dataset reference
128
+ # Build dataset reference (format auto-detected from file extension)
129
129
  if dataset_location == "git":
130
130
  dataset_ref = DatasetReference(
131
131
  location=DatasetLocation.GIT,
132
132
  path="ground-truth/dataset.csv",
133
- format="csv",
134
133
  description="Ground truth Q&A dataset for evaluation"
135
134
  )
136
135
  else: # s3 or hybrid
137
136
  dataset_ref = DatasetReference(
138
137
  location=DatasetLocation(dataset_location),
139
138
  path=f"s3://rem-experiments/{name}/datasets/ground_truth.parquet",
140
- format="parquet",
141
139
  schema_path="datasets/schema.yaml" if dataset_location == "hybrid" else None,
142
140
  description="Ground truth dataset for evaluation"
143
141
  )
@@ -930,58 +928,46 @@ def run(
930
928
  raise click.Abort()
931
929
  click.echo("✓ Evaluator credentials validated")
932
930
 
933
- # Load dataset using Polars
934
- import polars as pl
931
+ # Load dataset using read_dataframe utility (auto-detects format from extension)
932
+ from rem.utils.files import read_dataframe
935
933
 
936
934
  click.echo(f"Loading dataset: {list(config.datasets.keys())[0]}")
937
935
  dataset_ref = list(config.datasets.values())[0]
938
936
 
939
- if dataset_ref.location.value == "git":
940
- # Load from Git (local filesystem)
941
- dataset_path = Path(base_path) / name / dataset_ref.path
942
- if not dataset_path.exists():
943
- click.echo(f"Error: Dataset not found: {dataset_path}")
944
- raise click.Abort()
945
-
946
- if dataset_ref.format == "csv":
947
- dataset_df = pl.read_csv(dataset_path)
948
- elif dataset_ref.format == "parquet":
949
- dataset_df = pl.read_parquet(dataset_path)
950
- elif dataset_ref.format == "jsonl":
951
- dataset_df = pl.read_ndjson(dataset_path)
952
- else:
953
- click.echo(f"Error: Format '{dataset_ref.format}' not yet supported")
954
- raise click.Abort()
955
- elif dataset_ref.location.value in ["s3", "hybrid"]:
956
- # Load from S3 using FS provider
957
- from rem.services.fs import FS
958
- from io import BytesIO
937
+ try:
938
+ if dataset_ref.location.value == "git":
939
+ # Load from Git (local filesystem)
940
+ dataset_path = Path(base_path) / name / dataset_ref.path
941
+ if not dataset_path.exists():
942
+ click.echo(f"Error: Dataset not found: {dataset_path}")
943
+ raise click.Abort()
959
944
 
960
- fs = FS()
945
+ dataset_df = read_dataframe(dataset_path)
961
946
 
962
- try:
963
- if dataset_ref.format == "csv":
964
- content = fs.read(dataset_ref.path)
965
- dataset_df = pl.read_csv(BytesIO(content.encode() if isinstance(content, str) else content))
966
- elif dataset_ref.format == "parquet":
967
- content_bytes = fs.read(dataset_ref.path)
968
- dataset_df = pl.read_parquet(BytesIO(content_bytes if isinstance(content_bytes, bytes) else content_bytes.encode()))
969
- elif dataset_ref.format == "jsonl":
970
- content = fs.read(dataset_ref.path)
971
- dataset_df = pl.read_ndjson(BytesIO(content.encode() if isinstance(content, str) else content))
972
- else:
973
- click.echo(f"Error: Format '{dataset_ref.format}' not yet supported")
974
- raise click.Abort()
947
+ elif dataset_ref.location.value in ["s3", "hybrid"]:
948
+ # Load from S3 using FS provider
949
+ from rem.services.fs import FS
975
950
 
951
+ fs = FS()
952
+ content = fs.read(dataset_ref.path)
953
+ # Ensure we have bytes
954
+ if isinstance(content, str):
955
+ content = content.encode()
956
+ dataset_df = read_dataframe(content, filename=dataset_ref.path)
976
957
  click.echo(f"✓ Loaded dataset from S3")
977
- except Exception as e:
978
- logger.error(f"Failed to load dataset from S3: {e}")
979
- click.echo(f"Error: Could not load dataset from S3")
980
- click.echo(f" Path: {dataset_ref.path}")
981
- click.echo(f" Format: {dataset_ref.format}")
958
+
959
+ else:
960
+ click.echo(f"Error: Unknown dataset location: {dataset_ref.location.value}")
982
961
  raise click.Abort()
983
- else:
984
- click.echo(f"Error: Unknown dataset location: {dataset_ref.location.value}")
962
+
963
+ except ValueError as e:
964
+ # Unsupported format error from read_dataframe
965
+ click.echo(f"Error: {e}")
966
+ raise click.Abort()
967
+ except Exception as e:
968
+ logger.error(f"Failed to load dataset: {e}")
969
+ click.echo(f"Error: Could not load dataset")
970
+ click.echo(f" Path: {dataset_ref.path}")
985
971
  raise click.Abort()
986
972
 
987
973
  click.echo(f"✓ Loaded dataset: {len(dataset_df)} examples")
@@ -138,18 +138,14 @@ class DatasetReference(BaseModel):
138
138
 
139
139
  path: str = Field(
140
140
  description=(
141
- "Path to dataset:\n"
141
+ "Path to dataset. Format is inferred from file extension.\n"
142
+ "Supported: .csv, .tsv, .parquet, .json, .jsonl, .xlsx, .ods, .avro, .ipc\n"
142
143
  "- Git: Relative path from experiment root (e.g., 'datasets/ground_truth.csv')\n"
143
- "- S3: Full S3 URI (e.g., 's3://bucket/experiments/my-exp/datasets/ground_truth.csv')\n"
144
+ "- S3: Full S3 URI (e.g., 's3://bucket/experiments/my-exp/datasets/data.parquet')\n"
144
145
  "- Hybrid: S3 URI for data, Git path for schema"
145
146
  )
146
147
  )
147
148
 
148
- format: Literal["csv", "jsonl", "parquet", "json"] = Field(
149
- default="csv",
150
- description="Dataset file format"
151
- )
152
-
153
149
  schema_path: str | None = Field(
154
150
  default=None,
155
151
  description=(
@@ -262,8 +258,7 @@ class ExperimentConfig(BaseModel):
262
258
  datasets:
263
259
  ground_truth:
264
260
  location: git
265
- path: datasets/ground_truth.csv
266
- format: csv
261
+ path: datasets/ground_truth.csv # format inferred from extension
267
262
  results:
268
263
  location: git
269
264
  base_path: results/
@@ -288,12 +283,10 @@ class ExperimentConfig(BaseModel):
288
283
  ground_truth:
289
284
  location: s3
290
285
  path: s3://rem-prod/experiments/cv-parser-production/datasets/ground_truth.parquet
291
- format: parquet
292
286
  schema_path: datasets/schema.yaml # Schema in Git for documentation
293
287
  test_cases:
294
288
  location: s3
295
289
  path: s3://rem-prod/experiments/cv-parser-production/datasets/test_cases.jsonl
296
- format: jsonl
297
290
  results:
298
291
  location: hybrid
299
292
  base_path: s3://rem-prod/experiments/cv-parser-production/results/
@@ -558,7 +551,6 @@ class ExperimentConfig(BaseModel):
558
551
 
559
552
  - **Location**: `{dataset.location.value}`
560
553
  - **Path**: `{dataset.path}`
561
- - **Format**: `{dataset.format}`
562
554
  """
563
555
  if dataset.description:
564
556
  readme += f"- **Description**: {dataset.description}\n"
@@ -629,7 +621,6 @@ EXAMPLE_SMALL_EXPERIMENT = ExperimentConfig(
629
621
  "ground_truth": DatasetReference(
630
622
  location=DatasetLocation.GIT,
631
623
  path="datasets/ground_truth.csv",
632
- format="csv",
633
624
  description="10 manually curated test cases"
634
625
  )
635
626
  },
@@ -659,7 +650,6 @@ EXAMPLE_LARGE_EXPERIMENT = ExperimentConfig(
659
650
  "ground_truth": DatasetReference(
660
651
  location=DatasetLocation.S3,
661
652
  path="s3://rem-prod/experiments/cv-parser-production/datasets/ground_truth.parquet",
662
- format="parquet",
663
653
  schema_path="datasets/schema.yaml",
664
654
  description="10,000 CV/resume pairs with ground truth extractions"
665
655
  )
@@ -513,18 +513,15 @@ def get_target_metadata() -> MetaData:
513
513
  """
514
514
  Get SQLAlchemy metadata for Alembic autogenerate.
515
515
 
516
- This is the main entry point used by alembic/env.py.
516
+ This is the main entry point used by alembic/env.py and rem db diff.
517
+
518
+ Uses the model registry as the source of truth, which includes:
519
+ - Core REM models (Resource, Message, User, etc.)
520
+ - User-registered models via @rem.register_model decorator
517
521
 
518
522
  Returns:
519
- SQLAlchemy MetaData object representing current Pydantic models
523
+ SQLAlchemy MetaData object representing all registered Pydantic models
520
524
  """
521
- import rem
522
-
523
- package_root = Path(rem.__file__).parent.parent.parent
524
- models_dir = package_root / "src" / "rem" / "models" / "entities"
525
-
526
- if not models_dir.exists():
527
- logger.error(f"Models directory not found: {models_dir}")
528
- return MetaData()
529
-
530
- return build_sqlalchemy_metadata_from_pydantic(models_dir)
525
+ # build_sqlalchemy_metadata_from_pydantic uses the registry internally,
526
+ # so no directory path is needed (the parameter is kept for backwards compat)
527
+ return build_sqlalchemy_metadata_from_pydantic()
rem/settings.py CHANGED
@@ -21,8 +21,8 @@ Example .env file:
21
21
  LLM__OPENAI_API_KEY=sk-...
22
22
  LLM__ANTHROPIC_API_KEY=sk-ant-...
23
23
 
24
- # Database (port 5050 for Docker Compose)
25
- POSTGRES__CONNECTION_STRING=postgresql://rem:rem@localhost:5050/rem
24
+ # Database (port 5051 for Docker Compose prebuilt, 5050 for local dev)
25
+ POSTGRES__CONNECTION_STRING=postgresql://rem:rem@localhost:5051/rem
26
26
  POSTGRES__POOL_MIN_SIZE=5
27
27
  POSTGRES__POOL_MAX_SIZE=20
28
28
  POSTGRES__STATEMENT_TIMEOUT=30000
@@ -464,10 +464,11 @@ class PostgresSettings(BaseSettings):
464
464
  )
465
465
 
466
466
  connection_string: str = Field(
467
- default="postgresql://rem:rem@localhost:5050/rem",
468
- description="PostgreSQL connection string (default uses Docker Compose port 5050)",
467
+ default="postgresql://rem:rem@localhost:5051/rem",
468
+ description="PostgreSQL connection string (default uses Docker Compose prebuilt port 5051)",
469
469
  )
470
470
 
471
+
471
472
  pool_size: int = Field(
472
473
  default=10,
473
474
  description="Connection pool size (deprecated, use pool_min_size/pool_max_size)",
@@ -1060,6 +1061,8 @@ class APISettings(BaseSettings):
1060
1061
  API__RELOAD - Enable auto-reload for development
1061
1062
  API__WORKERS - Number of worker processes (production)
1062
1063
  API__LOG_LEVEL - Logging level (debug, info, warning, error)
1064
+ API__API_KEY_ENABLED - Enable X-API-Key header authentication
1065
+ API__API_KEY - API key for X-API-Key authentication
1063
1066
  """
1064
1067
 
1065
1068
  model_config = SettingsConfigDict(
@@ -1094,6 +1097,23 @@ class APISettings(BaseSettings):
1094
1097
  description="Logging level (debug, info, warning, error, critical)",
1095
1098
  )
1096
1099
 
1100
+ api_key_enabled: bool = Field(
1101
+ default=False,
1102
+ description=(
1103
+ "Enable X-API-Key header authentication for API endpoints. "
1104
+ "When enabled, requests must include X-API-Key header with valid key. "
1105
+ "This provides simple API key auth independent of OAuth."
1106
+ ),
1107
+ )
1108
+
1109
+ api_key: str | None = Field(
1110
+ default=None,
1111
+ description=(
1112
+ "API key for X-API-Key authentication. Required when api_key_enabled=true. "
1113
+ "Generate with: python -c \"import secrets; print(secrets.token_urlsafe(32))\""
1114
+ ),
1115
+ )
1116
+
1097
1117
 
1098
1118
  class ModelsSettings(BaseSettings):
1099
1119
  """
rem/utils/README.md CHANGED
@@ -4,6 +4,7 @@
4
4
 
5
5
  1. [SQL Types](#sql-types-sql_typespy) - Pydantic to PostgreSQL type mapping
6
6
  2. [Embeddings](#embeddings-embeddingspy) - Vector embeddings generation
7
+ 3. [Files](#files-filespy) - File utilities and DataFrame I/O
7
8
 
8
9
  ## SQL Types (`sql_types.py`)
9
10
 
@@ -581,3 +582,47 @@ This will demonstrate:
581
582
  - `sql_types.py` - Use `embedding_provider` in json_schema_extra for TEXT fields
582
583
  - OpenAI Embeddings API: https://platform.openai.com/docs/api-reference/embeddings
583
584
  - pgvector Documentation: https://github.com/pgvector/pgvector
585
+
586
+ ---
587
+
588
+ ## Files (`files.py`)
589
+
590
+ File utilities including temporary file handling and DataFrame I/O with automatic format detection.
591
+
592
+ ### DataFrame I/O
593
+
594
+ Read and write DataFrames with format auto-detected from file extension:
595
+
596
+ ```python
597
+ from rem.utils.files import read_dataframe, write_dataframe
598
+
599
+ # Read - format inferred from extension
600
+ df = read_dataframe("data.csv")
601
+ df = read_dataframe("data.parquet")
602
+ df = read_dataframe("data.xlsx")
603
+
604
+ # Read from bytes (e.g., from S3)
605
+ df = read_dataframe(content_bytes, filename="data.csv")
606
+
607
+ # Write - format inferred from extension
608
+ write_dataframe(df, "output.parquet")
609
+ ```
610
+
611
+ **Supported formats**: `.csv`, `.tsv`, `.parquet`, `.json`, `.jsonl`, `.avro`, `.xlsx`, `.xls`, `.ods`, `.ipc`, `.arrow`, `.feather`
612
+
613
+ Note: Some formats require optional dependencies (e.g., `fastexcel` for Excel).
614
+
615
+ ### Temporary File Utilities
616
+
617
+ ```python
618
+ from rem.utils.files import temp_file_from_bytes, temp_directory
619
+
620
+ # Create temp file from bytes, auto-cleanup
621
+ with temp_file_from_bytes(pdf_bytes, suffix=".pdf") as tmp_path:
622
+ result = process_pdf(tmp_path)
623
+
624
+ # Create temp directory, auto-cleanup
625
+ with temp_directory() as tmp_dir:
626
+ # Work with files in tmp_dir
627
+ pass
628
+ ```
rem/utils/files.py CHANGED
@@ -3,13 +3,18 @@ File utilities for consistent file handling throughout REM.
3
3
 
4
4
  Provides context managers and helpers for temporary file operations,
5
5
  ensuring proper cleanup and consistent patterns.
6
+
7
+ Also provides DataFrame I/O utilities using Polars with automatic
8
+ format detection based on file extension.
6
9
  """
7
10
 
8
11
  import tempfile
9
12
  from contextlib import contextmanager
13
+ from io import BytesIO
10
14
  from pathlib import Path
11
- from typing import Generator, Optional
15
+ from typing import Generator, Optional, Union
12
16
 
17
+ import polars as pl
13
18
  from loguru import logger
14
19
 
15
20
 
@@ -165,3 +170,154 @@ def safe_delete(path: Path) -> bool:
165
170
  except Exception as e:
166
171
  logger.warning(f"Failed to delete {path}: {e}")
167
172
  return False
173
+
174
+
175
+ # Extension to Polars reader mapping
176
+ _EXTENSION_READERS = {
177
+ ".csv": pl.read_csv,
178
+ ".tsv": lambda p, **kw: pl.read_csv(p, separator="\t", **kw),
179
+ ".parquet": pl.read_parquet,
180
+ ".pq": pl.read_parquet,
181
+ ".json": pl.read_json,
182
+ ".jsonl": pl.read_ndjson,
183
+ ".ndjson": pl.read_ndjson,
184
+ ".avro": pl.read_avro,
185
+ ".xlsx": pl.read_excel,
186
+ ".xls": pl.read_excel,
187
+ ".ods": pl.read_ods,
188
+ ".ipc": pl.read_ipc,
189
+ ".arrow": pl.read_ipc,
190
+ ".feather": pl.read_ipc,
191
+ }
192
+
193
+ # Extension to Polars writer mapping
194
+ _EXTENSION_WRITERS = {
195
+ ".csv": "write_csv",
196
+ ".tsv": "write_csv", # with separator="\t"
197
+ ".parquet": "write_parquet",
198
+ ".pq": "write_parquet",
199
+ ".json": "write_json",
200
+ ".jsonl": "write_ndjson",
201
+ ".ndjson": "write_ndjson",
202
+ ".avro": "write_avro",
203
+ ".xlsx": "write_excel",
204
+ ".ipc": "write_ipc",
205
+ ".arrow": "write_ipc",
206
+ ".feather": "write_ipc",
207
+ }
208
+
209
+
210
+ def read_dataframe(
211
+ source: Union[str, Path, bytes],
212
+ filename: Optional[str] = None,
213
+ **kwargs,
214
+ ) -> pl.DataFrame:
215
+ """
216
+ Read a DataFrame from a file, inferring format from extension.
217
+
218
+ Supports all Polars-compatible formats:
219
+ - CSV (.csv), TSV (.tsv)
220
+ - Parquet (.parquet, .pq)
221
+ - JSON (.json), JSONL/NDJSON (.jsonl, .ndjson)
222
+ - Avro (.avro)
223
+ - Excel (.xlsx, .xls)
224
+ - OpenDocument (.ods)
225
+ - Arrow IPC (.ipc, .arrow, .feather)
226
+
227
+ Args:
228
+ source: File path (str/Path) or bytes content
229
+ filename: Required when source is bytes, to determine format
230
+ **kwargs: Additional arguments passed to the Polars reader
231
+
232
+ Returns:
233
+ Polars DataFrame
234
+
235
+ Raises:
236
+ ValueError: If format cannot be determined or is unsupported
237
+
238
+ Examples:
239
+ >>> df = read_dataframe("data.csv")
240
+ >>> df = read_dataframe("data.parquet")
241
+ >>> df = read_dataframe(csv_bytes, filename="data.csv")
242
+ """
243
+ # Determine the file extension
244
+ if isinstance(source, bytes):
245
+ if not filename:
246
+ raise ValueError("filename is required when source is bytes")
247
+ ext = Path(filename).suffix.lower()
248
+ # For bytes, we need to wrap in BytesIO
249
+ file_like = BytesIO(source)
250
+ else:
251
+ path = Path(source)
252
+ ext = path.suffix.lower()
253
+ file_like = path
254
+
255
+ # Get the appropriate reader
256
+ reader = _EXTENSION_READERS.get(ext)
257
+ if reader is None:
258
+ supported = ", ".join(sorted(_EXTENSION_READERS.keys()))
259
+ raise ValueError(
260
+ f"Unsupported file format: {ext}. "
261
+ f"Supported formats: {supported}"
262
+ )
263
+
264
+ try:
265
+ return reader(file_like, **kwargs)
266
+ except Exception as e:
267
+ logger.error(f"Failed to read DataFrame from {ext} format: {e}")
268
+ raise
269
+
270
+
271
+ def write_dataframe(
272
+ df: pl.DataFrame,
273
+ dest: Union[str, Path],
274
+ **kwargs,
275
+ ) -> None:
276
+ """
277
+ Write a DataFrame to a file, inferring format from extension.
278
+
279
+ Supports most Polars-writable formats:
280
+ - CSV (.csv), TSV (.tsv)
281
+ - Parquet (.parquet, .pq)
282
+ - JSON (.json), JSONL/NDJSON (.jsonl, .ndjson)
283
+ - Avro (.avro)
284
+ - Excel (.xlsx)
285
+ - Arrow IPC (.ipc, .arrow, .feather)
286
+
287
+ Args:
288
+ df: Polars DataFrame to write
289
+ dest: Destination file path
290
+ **kwargs: Additional arguments passed to the Polars writer
291
+
292
+ Raises:
293
+ ValueError: If format cannot be determined or is unsupported
294
+
295
+ Examples:
296
+ >>> write_dataframe(df, "output.csv")
297
+ >>> write_dataframe(df, "output.parquet")
298
+ >>> write_dataframe(df, "output.jsonl")
299
+ """
300
+ path = Path(dest)
301
+ ext = path.suffix.lower()
302
+
303
+ writer_method = _EXTENSION_WRITERS.get(ext)
304
+ if writer_method is None:
305
+ supported = ", ".join(sorted(_EXTENSION_WRITERS.keys()))
306
+ raise ValueError(
307
+ f"Unsupported file format for writing: {ext}. "
308
+ f"Supported formats: {supported}"
309
+ )
310
+
311
+ # Ensure parent directory exists
312
+ ensure_parent_exists(path)
313
+
314
+ # Handle TSV special case
315
+ if ext == ".tsv":
316
+ kwargs.setdefault("separator", "\t")
317
+
318
+ try:
319
+ writer = getattr(df, writer_method)
320
+ writer(path, **kwargs)
321
+ except Exception as e:
322
+ logger.error(f"Failed to write DataFrame to {ext} format: {e}")
323
+ raise
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: remdb
3
- Version: 0.3.141
3
+ Version: 0.3.146
4
4
  Summary: Resources Entities Moments - Bio-inspired memory system for agentic AI workloads
5
5
  Project-URL: Homepage, https://github.com/Percolation-Labs/reminiscent
6
6
  Project-URL: Documentation, https://github.com/Percolation-Labs/reminiscent/blob/main/README.md
@@ -12,9 +12,11 @@ Keywords: agents,ai,mcp,memory,postgresql,vector-search
12
12
  Classifier: Development Status :: 3 - Alpha
13
13
  Classifier: Intended Audience :: Developers
14
14
  Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3.11
15
16
  Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
16
18
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
17
- Requires-Python: <3.13,>=3.12
19
+ Requires-Python: <3.14,>=3.11
18
20
  Requires-Dist: aioboto3>=13.0.0
19
21
  Requires-Dist: arize-phoenix>=5.0.0
20
22
  Requires-Dist: asyncpg>=0.30.0
@@ -143,9 +145,9 @@ pip install "remdb[all]"
143
145
  git clone https://github.com/Percolation-Labs/remstack-lab.git
144
146
  cd remstack-lab
145
147
 
146
- # Start PostgreSQL with docker-compose
148
+ # Start services (PostgreSQL, Phoenix observability)
147
149
  curl -O https://gist.githubusercontent.com/percolating-sirsh/d117b673bc0edfdef1a5068ccd3cf3e5/raw/docker-compose.prebuilt.yml
148
- docker compose -f docker-compose.prebuilt.yml up -d postgres
150
+ docker compose -f docker-compose.prebuilt.yml up -d
149
151
 
150
152
  # Configure REM (creates ~/.rem/config.yaml and installs database schema)
151
153
  # Add --claude-desktop to register with Claude Desktop app
@@ -177,7 +179,7 @@ rem ask "What is the Bitcoin whitepaper about?"
177
179
  Once configured, you can also use the OpenAI-compatible chat completions API:
178
180
 
179
181
  ```bash
180
- # Start the API server (if not using docker-compose for API)
182
+ # Start all services (PostgreSQL, Phoenix, API)
181
183
  docker compose -f docker-compose.prebuilt.yml up -d
182
184
 
183
185
  # Test the API
@@ -3,7 +3,7 @@ rem/config.py,sha256=cyXFpqgTvHeYeIriiQHGC1jSokp55BkJtMS1cVu-C1M,6769
3
3
  rem/mcp_server.py,sha256=OK0XaO2k_7BnVRozOfH_xRL51SkRN9kLoNNp_zrrGeA,1383
4
4
  rem/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  rem/registry.py,sha256=AAGcr7oRHiHsX2mu7TL4EgKw39IFea8F-YIgbX58CUM,10545
6
- rem/settings.py,sha256=BSV06mmQZxefSG2J8Uxwhe6VJsl7FsFhOODXdiVyj6k,53591
6
+ rem/settings.py,sha256=fB9_J6S8XYXTWC4qlkhVDBsnmJc-VCedd-jVQ8ceajE,54361
7
7
  rem/agentic/README.md,sha256=brF1Z1V6s8z5TLoyNPQ3BC5mqDy648QRPOQmGu6Jkzw,21815
8
8
  rem/agentic/__init__.py,sha256=-UZiEYpodfD5xDns6L0nYSqK9owr3NxiWsq6vmK1tGk,1268
9
9
  rem/agentic/context.py,sha256=EBw2rQ85mygJZHaHfO2A59cu3rgI2vfy1A0Qhri99DE,6282
@@ -49,7 +49,7 @@ rem/api/routers/chat/sse_events.py,sha256=CS1yuor09Qq48bpJBODu7INS94S4GjS8YsPZkI
49
49
  rem/api/routers/chat/streaming.py,sha256=bHSDfTjpMS7wMjF1ovKWoH7mPngh7O5EHMzadOLBxKg,37287
50
50
  rem/auth/README.md,sha256=BpZUqEVYMUpQG4guykyuvmtzlH3_LsGzspuRZS20i8k,8631
51
51
  rem/auth/__init__.py,sha256=NuOzrlipfiRUM7lwuEJTzc-b8lYWCxDl_7V82FxElJA,700
52
- rem/auth/middleware.py,sha256=_55Va07HElp_fhB9ROyI8bhRXCSCVZ7bROw2vXCer1Y,7381
52
+ rem/auth/middleware.py,sha256=3RaMTFTtu8o3VXfXGazFHSUlDG-wDJNi4msE83w_RwM,9923
53
53
  rem/auth/providers/__init__.py,sha256=NMdH84_UC2_9zX7yv-u26FxdGuDipz42S-YO1vc8w1M,318
54
54
  rem/auth/providers/base.py,sha256=wa7lrgM0AetZnITc45QFyiNHXgVVoWmZgW9tBpInDLw,11831
55
55
  rem/auth/providers/google.py,sha256=p3JAYOtyWwiN6T05rZI6sQJqrXhHaunaNOucHTBzWWc,5346
@@ -65,7 +65,7 @@ rem/cli/commands/cluster.py,sha256=MQThC3Da73ixVQ75UYxleQlB8AqPQLzEK73eaB8pNFI,6
65
65
  rem/cli/commands/configure.py,sha256=GQzlER8PkUCTaeYuLbXUYoeqlmEDafcjAcPJrDchC1I,16595
66
66
  rem/cli/commands/db.py,sha256=5Enkq7CG7fOpIwMGQ_yF4mTEP3wmoZK3ic3jxERi7Fk,24953
67
67
  rem/cli/commands/dreaming.py,sha256=2P8nyX9gnRgyCJZrDuyJt5_YAsFmjUGa6dg7OvoLA8k,13292
68
- rem/cli/commands/experiments.py,sha256=wAYyh3ti8QJkSS79mXkQ8zOAQQh3vNOwOc3g9xFlE4I,63319
68
+ rem/cli/commands/experiments.py,sha256=dRJKtvrGQu0a9bQZaLnVfdWhaur2bKYviixqOnYQC-k,62472
69
69
  rem/cli/commands/mcp.py,sha256=PiP_zXflZ2lPVgmH3N8EGOWXDSfvNTQJD-MMW3ym3xo,1666
70
70
  rem/cli/commands/process.py,sha256=DCV7KuS3idRkJ7hsl4uxFqdt67RbxuCP3DL9VeqQuFQ,8630
71
71
  rem/cli/commands/scaffold.py,sha256=hv2-ozD1bbD8FEky7OdIDzcgu-KISs5ek6APqykdZ6I,1618
@@ -75,7 +75,7 @@ rem/cli/commands/session.py,sha256=rUDeLnBNwc6A1zSOOs-uHyfixDmBYlVtLToJpbPxo7I,9
75
75
  rem/models/core/__init__.py,sha256=BBbARx5_7uVz5FuuoPdFm3tbiWZWubs97i3smU0UxXg,1449
76
76
  rem/models/core/core_model.py,sha256=aYVx2905n0b6TGLnIiV5YMWO2O8pxHVtLEWljWvURxU,2617
77
77
  rem/models/core/engram.py,sha256=CuoilA74hmlgMT2mZCoEEh0jFsMrKUw9wdyFOygBa8E,10135
78
- rem/models/core/experiment.py,sha256=F3MEJSFnkZYSUDTIg2W1SYFP1D2cCnWED99CJZN_i7g,20909
78
+ rem/models/core/experiment.py,sha256=Zc5kVtUa05mxEzvJHuXGvGMffa0Uk9Lo16XNsbIDfHw,20777
79
79
  rem/models/core/inline_edge.py,sha256=BVOYchWzb5vYRPTBJEKh06YDwzL_NKdvCRg4BtXRn7g,5059
80
80
  rem/models/core/rem_query.py,sha256=EpD1NdhxgptW81ID1_q2pnYhioM8J4Efx-JFJsNW_i0,8108
81
81
  rem/models/entities/__init__.py,sha256=p3eOFNZdlQ_dF0ZdXR9sHPHu_btMd5mZmw0v5iL8uik,1878
@@ -160,7 +160,7 @@ rem/services/postgres/README.md,sha256=qPimP6367CvfcuF1DjC0VSL0uoc_ThOVeZ0ioJy_z
160
160
  rem/services/postgres/__init__.py,sha256=hPOVs7Gi42qjz9ySu1y1Fmxcyo21UrhVycw_y4YAF-0,563
161
161
  rem/services/postgres/diff_service.py,sha256=vffYq6e5Ce_b8yuS01u9r27SYCUnmODa5mH8eeZpInM,19402
162
162
  rem/services/postgres/migration_service.py,sha256=2irsWfzczZ0_-wge6ReyCrFrE3HxvnlwKAEp8mWqtQo,14897
163
- rem/services/postgres/pydantic_to_sqlalchemy.py,sha256=Ao1Qh5epVetw8BfEiBmd-xSfUiBOzcROgrfq5BM_hTM,15950
163
+ rem/services/postgres/pydantic_to_sqlalchemy.py,sha256=Gnphk41CPfgYNgEYlnGV1bkz7o56n8PQe1v47mWks-E,16036
164
164
  rem/services/postgres/register_type.py,sha256=KwttMTvCdtLvSyW2YICmZ71BBB4oomIoX9IJT-qyEn8,11169
165
165
  rem/services/postgres/repository.py,sha256=CYbAozL7iAjVnpV8HbQ1HNu8UUlgHzvyBds1qTICfQE,15788
166
166
  rem/services/postgres/schema_generator.py,sha256=TpINJtEPcrbISbz3HUo7zEI1C-542HEAsf2m5kOuGM8,23906
@@ -184,7 +184,7 @@ rem/sql/migrations/002_install_models.sql,sha256=SMhvFKp70ctaln63ghZq9ED34LWR0__
184
184
  rem/sql/migrations/003_optional_extensions.sql,sha256=QACy3J50ZgV_4BHNkkT3iswkE1ijc0oCAOgavv6KC5g,12443
185
185
  rem/sql/migrations/004_cache_system.sql,sha256=KBpU3hQY08So_MkMfcOwTZDngTMqa_3kA0ujQ98K33k,19672
186
186
  rem/utils/AGENTIC_CHUNKING.md,sha256=Z9IyL5yoFTlvamPE5KA7cs0Btoc_6bq8hh7Q_WARlw8,17230
187
- rem/utils/README.md,sha256=KMgT5nZT023nFWLZz_f3nHSZZkHmmoS7iTLIVRStk3Y,16830
187
+ rem/utils/README.md,sha256=Osix2SVYAECMFTFnLhn8D8rsPrtNaCBWfkZfkn5RS60,18087
188
188
  rem/utils/__init__.py,sha256=ZGMTgR7g-V3fhfgKo791wGBhdxy72xTJSo7Q_xwkQRI,1417
189
189
  rem/utils/agentic_chunking.py,sha256=B7TIggqOSeHJ5clPPY7O712Wb1xz52Y_2gCPiEZlWqY,21841
190
190
  rem/utils/batch_ops.py,sha256=LgzttGV0O_a8Y70fnsX3XwlSZWZKRTnwBBwxP09BOvw,11689
@@ -194,7 +194,7 @@ rem/utils/constants.py,sha256=aX2GwgtaZx3wztsGNa8HFyKxAWNoZlZv9k45gQ7K3Qs,3283
194
194
  rem/utils/date_utils.py,sha256=LiqyiYcvfw8I-zvfDzPEs1PnwHOEXfmqn_6BDqefEBo,5542
195
195
  rem/utils/dict_utils.py,sha256=qp5myXSgGV2Daz9X-9SKzQDu2WeQeIBBcgFnqd8VhqY,2905
196
196
  rem/utils/embeddings.py,sha256=FnjZFHXgxf__dbubY2HknhDAngizr8j7P28-Sug4-f0,13150
197
- rem/utils/files.py,sha256=8vMiVljs7kDLfGMvcRtJFUH7_F_z4XYGd9cP0h1G78c,4353
197
+ rem/utils/files.py,sha256=6ax-5vmk_4cI-IG55PT9sKj_DqXBl32RkTRSsxqvgGY,8759
198
198
  rem/utils/markdown.py,sha256=zhfSiSRX36vky1b2UOGKsuSr11L2l6Kl_O7iSfwQXBY,401
199
199
  rem/utils/mime_types.py,sha256=8KGEuPWVdQ8r1DFLsgiaAgEYqMaaQIk-6lCVOBB1z_A,5346
200
200
  rem/utils/model_helpers.py,sha256=Cvqeof9KlhkkBmAFxRLtfsh4m_MQ0N8WukI3IDJcTtw,11743
@@ -213,7 +213,7 @@ rem/workers/dreaming.py,sha256=UqCf-iBUhzBVBRFj7_DtR6q27oRo7EUoal9qqHLzlo4,17823
213
213
  rem/workers/engram_processor.py,sha256=Ws92kAILMLK_np3F1HRmhKKXiruLIvFn3o9MY3V2W8g,10779
214
214
  rem/workers/sqs_file_processor.py,sha256=tX8S0yo2n1XGvaZ7JUqeGmtTwxybQqz3wkHT2j6Ak7Y,6597
215
215
  rem/workers/unlogged_maintainer.py,sha256=KhebhXl3s6DwvHnXXEJ45r5tLK9PNj-0KclNIQVQ68s,15817
216
- remdb-0.3.141.dist-info/METADATA,sha256=DSHGsUASGuvaGKIViTYvA2fqPs9LmiIXYJa1ZDMd2NU,53247
217
- remdb-0.3.141.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
218
- remdb-0.3.141.dist-info/entry_points.txt,sha256=gmmrz7tRC1WGUrCMJMg6p5pEP5h5mPYRvWIxp1FYdr0,42
219
- remdb-0.3.141.dist-info/RECORD,,
216
+ remdb-0.3.146.dist-info/METADATA,sha256=5EN8fzDT5MKmkGWP4W6-bZSPQ6uG9j0rcsUsZdpK-_c,53341
217
+ remdb-0.3.146.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
218
+ remdb-0.3.146.dist-info/entry_points.txt,sha256=gmmrz7tRC1WGUrCMJMg6p5pEP5h5mPYRvWIxp1FYdr0,42
219
+ remdb-0.3.146.dist-info/RECORD,,