remdb 0.2.6__py3-none-any.whl → 0.3.118__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (104) hide show
  1. rem/__init__.py +129 -2
  2. rem/agentic/README.md +76 -0
  3. rem/agentic/__init__.py +15 -0
  4. rem/agentic/agents/__init__.py +16 -2
  5. rem/agentic/agents/sse_simulator.py +500 -0
  6. rem/agentic/context.py +28 -22
  7. rem/agentic/llm_provider_models.py +301 -0
  8. rem/agentic/mcp/tool_wrapper.py +29 -3
  9. rem/agentic/otel/setup.py +92 -4
  10. rem/agentic/providers/phoenix.py +32 -43
  11. rem/agentic/providers/pydantic_ai.py +168 -24
  12. rem/agentic/schema.py +358 -21
  13. rem/agentic/tools/rem_tools.py +3 -3
  14. rem/api/README.md +238 -1
  15. rem/api/deps.py +255 -0
  16. rem/api/main.py +154 -37
  17. rem/api/mcp_router/resources.py +1 -1
  18. rem/api/mcp_router/server.py +26 -5
  19. rem/api/mcp_router/tools.py +454 -7
  20. rem/api/middleware/tracking.py +172 -0
  21. rem/api/routers/admin.py +494 -0
  22. rem/api/routers/auth.py +124 -0
  23. rem/api/routers/chat/completions.py +152 -16
  24. rem/api/routers/chat/models.py +7 -3
  25. rem/api/routers/chat/sse_events.py +526 -0
  26. rem/api/routers/chat/streaming.py +608 -45
  27. rem/api/routers/dev.py +81 -0
  28. rem/api/routers/feedback.py +148 -0
  29. rem/api/routers/messages.py +473 -0
  30. rem/api/routers/models.py +78 -0
  31. rem/api/routers/query.py +360 -0
  32. rem/api/routers/shared_sessions.py +406 -0
  33. rem/auth/middleware.py +126 -27
  34. rem/cli/commands/README.md +237 -64
  35. rem/cli/commands/ask.py +15 -11
  36. rem/cli/commands/cluster.py +1300 -0
  37. rem/cli/commands/configure.py +170 -97
  38. rem/cli/commands/db.py +396 -139
  39. rem/cli/commands/experiments.py +278 -96
  40. rem/cli/commands/process.py +22 -15
  41. rem/cli/commands/scaffold.py +47 -0
  42. rem/cli/commands/schema.py +97 -50
  43. rem/cli/main.py +37 -6
  44. rem/config.py +2 -2
  45. rem/models/core/core_model.py +7 -1
  46. rem/models/core/rem_query.py +5 -2
  47. rem/models/entities/__init__.py +21 -0
  48. rem/models/entities/domain_resource.py +38 -0
  49. rem/models/entities/feedback.py +123 -0
  50. rem/models/entities/message.py +30 -1
  51. rem/models/entities/session.py +83 -0
  52. rem/models/entities/shared_session.py +180 -0
  53. rem/models/entities/user.py +10 -3
  54. rem/registry.py +373 -0
  55. rem/schemas/agents/rem.yaml +7 -3
  56. rem/services/content/providers.py +94 -140
  57. rem/services/content/service.py +115 -24
  58. rem/services/dreaming/affinity_service.py +2 -16
  59. rem/services/dreaming/moment_service.py +2 -15
  60. rem/services/embeddings/api.py +24 -17
  61. rem/services/embeddings/worker.py +16 -16
  62. rem/services/phoenix/EXPERIMENT_DESIGN.md +3 -3
  63. rem/services/phoenix/client.py +252 -19
  64. rem/services/postgres/README.md +159 -15
  65. rem/services/postgres/__init__.py +2 -1
  66. rem/services/postgres/diff_service.py +531 -0
  67. rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
  68. rem/services/postgres/repository.py +132 -0
  69. rem/services/postgres/schema_generator.py +291 -9
  70. rem/services/postgres/service.py +6 -6
  71. rem/services/rate_limit.py +113 -0
  72. rem/services/rem/README.md +14 -0
  73. rem/services/rem/parser.py +44 -9
  74. rem/services/rem/service.py +36 -2
  75. rem/services/session/compression.py +17 -1
  76. rem/services/session/reload.py +1 -1
  77. rem/services/user_service.py +98 -0
  78. rem/settings.py +169 -22
  79. rem/sql/background_indexes.sql +21 -16
  80. rem/sql/migrations/001_install.sql +387 -54
  81. rem/sql/migrations/002_install_models.sql +2320 -393
  82. rem/sql/migrations/003_optional_extensions.sql +326 -0
  83. rem/sql/migrations/004_cache_system.sql +548 -0
  84. rem/utils/__init__.py +18 -0
  85. rem/utils/constants.py +97 -0
  86. rem/utils/date_utils.py +228 -0
  87. rem/utils/embeddings.py +17 -4
  88. rem/utils/files.py +167 -0
  89. rem/utils/mime_types.py +158 -0
  90. rem/utils/model_helpers.py +156 -1
  91. rem/utils/schema_loader.py +284 -21
  92. rem/utils/sql_paths.py +146 -0
  93. rem/utils/sql_types.py +3 -1
  94. rem/utils/vision.py +9 -14
  95. rem/workers/README.md +14 -14
  96. rem/workers/__init__.py +2 -1
  97. rem/workers/db_maintainer.py +74 -0
  98. rem/workers/unlogged_maintainer.py +463 -0
  99. {remdb-0.2.6.dist-info → remdb-0.3.118.dist-info}/METADATA +598 -171
  100. {remdb-0.2.6.dist-info → remdb-0.3.118.dist-info}/RECORD +102 -73
  101. {remdb-0.2.6.dist-info → remdb-0.3.118.dist-info}/WHEEL +1 -1
  102. rem/sql/002_install_models.sql +0 -1068
  103. rem/sql/install_models.sql +0 -1038
  104. {remdb-0.2.6.dist-info → remdb-0.3.118.dist-info}/entry_points.txt +0 -0
@@ -53,7 +53,7 @@ from datetime import datetime
53
53
  from pathlib import Path
54
54
  from typing import Any, Callable, TYPE_CHECKING, cast
55
55
 
56
- import pandas as pd
56
+ import polars as pl
57
57
  from loguru import logger
58
58
 
59
59
  from .config import PhoenixConfig
@@ -64,6 +64,95 @@ if TYPE_CHECKING:
64
64
  from phoenix.client.resources.experiments.types import RanExperiment
65
65
 
66
66
 
67
+ def dataframe_to_phoenix_dataset(
68
+ client: "PhoenixClient",
69
+ df: pl.DataFrame,
70
+ dataset_name: str,
71
+ input_keys: list[str] | None = None,
72
+ output_keys: list[str] | None = None,
73
+ metadata_keys: list[str] | None = None,
74
+ description: str | None = None,
75
+ ) -> "Dataset":
76
+ """Convert a Polars DataFrame to a Phoenix Dataset.
77
+
78
+ This function transforms a Polars DataFrame into a Phoenix Dataset by:
79
+ 1. Extracting input columns (what agents receive)
80
+ 2. Extracting output columns (ground truth/expected output)
81
+ 3. Extracting metadata columns (optional labels, difficulty, etc.)
82
+
83
+ If column keys are not specified, uses smart defaults:
84
+ - input_keys: columns containing 'input', 'query', 'question', or 'prompt'
85
+ - output_keys: columns containing 'output', 'expected', 'answer', or 'response'
86
+ - metadata_keys: remaining columns
87
+
88
+ Args:
89
+ client: PhoenixClient instance
90
+ df: Polars DataFrame with experiment data
91
+ dataset_name: Name for the created Phoenix dataset
92
+ input_keys: Optional list of column names for inputs
93
+ output_keys: Optional list of column names for outputs (ground truth)
94
+ metadata_keys: Optional list of column names for metadata
95
+ description: Optional dataset description
96
+
97
+ Returns:
98
+ Phoenix Dataset instance
99
+
100
+ Example:
101
+ >>> df = pl.read_csv("golden_set.csv")
102
+ >>> dataset = dataframe_to_phoenix_dataset(
103
+ ... client=phoenix_client,
104
+ ... df=df,
105
+ ... dataset_name="my-golden-set",
106
+ ... input_keys=["query"],
107
+ ... output_keys=["expected_output"],
108
+ ... metadata_keys=["difficulty"]
109
+ ... )
110
+ """
111
+ columns = df.columns
112
+
113
+ # Smart defaults for column detection
114
+ if input_keys is None:
115
+ input_keys = [c for c in columns if any(
116
+ k in c.lower() for k in ["input", "query", "question", "prompt"]
117
+ )]
118
+ if not input_keys:
119
+ # Fallback: first column
120
+ input_keys = [columns[0]] if columns else []
121
+
122
+ if output_keys is None:
123
+ output_keys = [c for c in columns if any(
124
+ k in c.lower() for k in ["output", "expected", "answer", "response", "reference"]
125
+ )]
126
+ if not output_keys:
127
+ # Fallback: second column
128
+ output_keys = [columns[1]] if len(columns) > 1 else []
129
+
130
+ if metadata_keys is None:
131
+ used_keys = set(input_keys) | set(output_keys)
132
+ metadata_keys = [c for c in columns if c not in used_keys]
133
+
134
+ logger.debug(
135
+ f"DataFrame to Phoenix Dataset: inputs={input_keys}, "
136
+ f"outputs={output_keys}, metadata={metadata_keys}"
137
+ )
138
+
139
+ # Convert to list of dicts
140
+ records = df.to_dicts()
141
+
142
+ inputs = [{k: row.get(k) for k in input_keys} for row in records]
143
+ outputs = [{k: row.get(k) for k in output_keys} for row in records]
144
+ metadata = [{k: row.get(k) for k in metadata_keys} for row in records] if metadata_keys else None
145
+
146
+ # Create Phoenix dataset
147
+ return client.create_dataset_from_data(
148
+ name=dataset_name,
149
+ inputs=inputs,
150
+ outputs=outputs,
151
+ metadata=metadata,
152
+ description=description,
153
+ )
154
+
155
+
67
156
  class PhoenixClient:
68
157
  """High-level Phoenix client for REM evaluation workflows.
69
158
 
@@ -260,19 +349,22 @@ class PhoenixClient:
260
349
  "SEARCH semantic AI engineer",sarah-chen,person,medium,SEARCH
261
350
  """
262
351
  try:
263
- # Load CSV
264
- df = pd.read_csv(csv_file_path)
352
+ # Load CSV with Polars
353
+ df = pl.read_csv(csv_file_path)
354
+
355
+ # Convert to list of dicts
356
+ records = df.to_dicts()
265
357
 
266
358
  # Extract inputs
267
- inputs = cast(list[dict[str, Any]], df[input_keys].to_dict("records"))
359
+ inputs = [{k: row.get(k) for k in input_keys} for row in records]
268
360
 
269
361
  # Extract outputs
270
- outputs = cast(list[dict[str, Any]], df[output_keys].to_dict("records"))
362
+ outputs = [{k: row.get(k) for k in output_keys} for row in records]
271
363
 
272
364
  # Extract metadata if specified
273
365
  metadata = None
274
366
  if metadata_keys:
275
- metadata = cast(list[dict[str, Any]], df[metadata_keys].to_dict("records"))
367
+ metadata = [{k: row.get(k) for k in metadata_keys} for row in records]
276
368
 
277
369
  return self.create_dataset_from_data(
278
370
  name=name,
@@ -331,13 +423,16 @@ class PhoenixClient:
331
423
 
332
424
  def run_experiment(
333
425
  self,
334
- dataset: "Dataset" | str,
426
+ dataset: "Dataset" | str | pl.DataFrame,
335
427
  task: Callable[[Any], Any] | None = None,
336
428
  evaluators: list[Callable[[Any], Any]] | None = None,
337
429
  experiment_name: str | None = None,
338
430
  experiment_description: str | None = None,
339
431
  experiment_metadata: dict[str, Any] | None = None,
340
432
  experiment_config: Any | None = None,
433
+ input_keys: list[str] | None = None,
434
+ output_keys: list[str] | None = None,
435
+ metadata_keys: list[str] | None = None,
341
436
  ) -> "RanExperiment":
342
437
  """Run an evaluation experiment.
343
438
 
@@ -346,14 +441,22 @@ class PhoenixClient:
346
441
  2. Agent run: Provide task function to execute agents on dataset
347
442
  3. Evaluator run: Provide evaluators to score existing outputs
348
443
 
444
+ Dataset can be:
445
+ - Phoenix Dataset instance
446
+ - Dataset name (string) - will be loaded from Phoenix
447
+ - Polars DataFrame - will be converted to Phoenix Dataset
448
+
349
449
  Args:
350
- dataset: Dataset instance or name (required unless experiment_config provided)
450
+ dataset: Dataset instance, name, or Polars DataFrame
351
451
  task: Optional task function to run on each example (agent execution)
352
452
  evaluators: Optional list of evaluator functions
353
453
  experiment_name: Optional experiment name
354
454
  experiment_description: Optional description
355
455
  experiment_metadata: Optional metadata dict
356
456
  experiment_config: Optional ExperimentConfig instance (overrides other params)
457
+ input_keys: Column names for inputs (required if dataset is DataFrame)
458
+ output_keys: Column names for outputs (required if dataset is DataFrame)
459
+ metadata_keys: Optional column names for metadata
357
460
 
358
461
  Returns:
359
462
  RanExperiment with results
@@ -369,6 +472,16 @@ class PhoenixClient:
369
472
  ... experiment_name="rem-v1-baseline"
370
473
  ... )
371
474
 
475
+ Example - With Polars DataFrame:
476
+ >>> df = pl.read_csv("golden_set.csv")
477
+ >>> experiment = client.run_experiment(
478
+ ... dataset=df,
479
+ ... task=run_agent,
480
+ ... experiment_name="rem-v1-baseline",
481
+ ... input_keys=["query"],
482
+ ... output_keys=["expected_output"]
483
+ ... )
484
+
372
485
  Example - Evaluator Run (Phase 2b):
373
486
  >>> experiment = client.run_experiment(
374
487
  ... dataset=agent_results,
@@ -407,6 +520,21 @@ class PhoenixClient:
407
520
  else:
408
521
  dataset = dataset_ref.path
409
522
 
523
+ # Convert Polars DataFrame to Phoenix Dataset
524
+ if isinstance(dataset, pl.DataFrame):
525
+ dataset_name_for_phoenix = f"{experiment_name or 'experiment'}-dataset-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
526
+ logger.info(f"Converting Polars DataFrame to Phoenix Dataset: {dataset_name_for_phoenix}")
527
+ dataset = dataframe_to_phoenix_dataset(
528
+ client=self,
529
+ df=dataset,
530
+ dataset_name=dataset_name_for_phoenix,
531
+ input_keys=input_keys,
532
+ output_keys=output_keys,
533
+ metadata_keys=metadata_keys,
534
+ description=f"Auto-created from DataFrame for experiment: {experiment_name}",
535
+ )
536
+ logger.info(f"✓ Created Phoenix Dataset: {dataset_name_for_phoenix}")
537
+
410
538
  # Load dataset if name provided
411
539
  if isinstance(dataset, str):
412
540
  dataset = self.get_dataset(dataset)
@@ -454,7 +582,7 @@ class PhoenixClient:
454
582
  root_spans_only: bool = True,
455
583
  trace_id: str | None = None,
456
584
  span_id: str | None = None,
457
- ) -> pd.DataFrame:
585
+ ) -> pl.DataFrame:
458
586
  """Query traces from Phoenix.
459
587
 
460
588
  Args:
@@ -467,7 +595,7 @@ class PhoenixClient:
467
595
  span_id: Filter by specific span ID
468
596
 
469
597
  Returns:
470
- DataFrame with trace data
598
+ Polars DataFrame with trace data
471
599
 
472
600
  Example:
473
601
  >>> traces = client.get_traces(
@@ -492,8 +620,11 @@ class PhoenixClient:
492
620
  if span_id:
493
621
  query_params["span_id"] = span_id
494
622
 
495
- # Query traces
496
- traces_df = self._client.query_spans(limit=limit, **query_params) # type: ignore[attr-defined]
623
+ # Query traces (Phoenix returns pandas DataFrame)
624
+ pandas_df = self._client.query_spans(limit=limit, **query_params) # type: ignore[attr-defined]
625
+
626
+ # Convert pandas to Polars
627
+ traces_df = pl.from_pandas(pandas_df)
497
628
 
498
629
  logger.debug(f"Retrieved {len(traces_df)} traces")
499
630
  return traces_df
@@ -535,7 +666,7 @@ class PhoenixClient:
535
666
  ... )
536
667
  """
537
668
  try:
538
- # Query traces
669
+ # Query traces (returns Polars DataFrame)
539
670
  traces_df = self.get_traces(
540
671
  project_name=project_name,
541
672
  start_time=start_time,
@@ -547,12 +678,15 @@ class PhoenixClient:
547
678
  if len(traces_df) == 0:
548
679
  raise ValueError("No traces found matching criteria")
549
680
 
681
+ # Convert to list of dicts for iteration
682
+ records = traces_df.to_dicts()
683
+
550
684
  # Extract inputs and outputs from traces
551
685
  inputs = []
552
686
  outputs = []
553
687
  metadata = []
554
688
 
555
- for _, row in traces_df.iterrows():
689
+ for row in records:
556
690
  # Extract input
557
691
  span_input = row.get("attributes.input")
558
692
  if span_input:
@@ -658,29 +792,128 @@ class PhoenixClient:
658
792
  label: str | None = None,
659
793
  score: float | None = None,
660
794
  explanation: str | None = None,
661
- ) -> None:
795
+ metadata: dict[str, Any] | None = None,
796
+ ) -> str | None:
662
797
  """Add feedback annotation to a span.
663
798
 
664
799
  Args:
665
800
  span_id: Span ID to annotate
666
- annotation_name: Name of the annotation (e.g., "correctness")
801
+ annotation_name: Name of the annotation (e.g., "correctness", "user_feedback")
667
802
  annotator_kind: Type of annotator ("HUMAN", "LLM", "CODE")
668
- label: Optional label (e.g., "correct", "incorrect")
803
+ label: Optional label (e.g., "correct", "incorrect", "helpful")
669
804
  score: Optional numeric score (0.0-1.0)
670
805
  explanation: Optional explanation text
806
+ metadata: Optional additional metadata dict
807
+
808
+ Returns:
809
+ Annotation ID if successful, None otherwise
671
810
  """
672
811
  try:
673
- self._client.add_span_annotation( # type: ignore[attr-defined]
812
+ result = self._client.add_span_annotation( # type: ignore[attr-defined]
674
813
  span_id=span_id,
675
814
  name=annotation_name,
676
815
  annotator_kind=annotator_kind,
677
816
  label=label,
678
817
  score=score,
679
818
  explanation=explanation,
819
+ metadata=metadata,
680
820
  )
681
821
 
682
- logger.info(f"Added {annotator_kind} feedback to span {span_id}")
822
+ annotation_id = getattr(result, "id", None) if result else None
823
+ logger.info(f"Added {annotator_kind} feedback to span {span_id} -> {annotation_id}")
824
+
825
+ return annotation_id
683
826
 
684
827
  except Exception as e:
685
828
  logger.error(f"Failed to add span feedback: {e}")
686
829
  raise
830
+
831
+ def sync_user_feedback(
832
+ self,
833
+ span_id: str,
834
+ rating: int | None = None,
835
+ categories: list[str] | None = None,
836
+ comment: str | None = None,
837
+ feedback_id: str | None = None,
838
+ ) -> str | None:
839
+ """Sync user feedback to Phoenix as a span annotation.
840
+
841
+ Convenience method for syncing Feedback entities to Phoenix.
842
+ Converts REM feedback format to Phoenix annotation format.
843
+
844
+ Args:
845
+ span_id: OTEL span ID to annotate
846
+ rating: User rating (-1, 1-5 scale)
847
+ categories: List of feedback categories
848
+ comment: Free-text comment
849
+ feedback_id: Optional REM feedback ID for reference
850
+
851
+ Returns:
852
+ Phoenix annotation ID if successful
853
+
854
+ Example:
855
+ >>> client.sync_user_feedback(
856
+ ... span_id="abc123",
857
+ ... rating=4,
858
+ ... categories=["helpful", "accurate"],
859
+ ... comment="Great response!"
860
+ ... )
861
+ """
862
+ # Convert rating to 0-1 score
863
+ score = None
864
+ if rating is not None:
865
+ if rating == -1:
866
+ score = 0.0
867
+ elif 1 <= rating <= 5:
868
+ score = rating / 5.0
869
+
870
+ # Use primary category as label
871
+ label = categories[0] if categories else None
872
+
873
+ # Build explanation from comment and additional categories
874
+ explanation = comment
875
+ if categories and len(categories) > 1:
876
+ cats_str = ", ".join(categories[1:])
877
+ if explanation:
878
+ explanation = f"{explanation} [Categories: {cats_str}]"
879
+ else:
880
+ explanation = f"Categories: {cats_str}"
881
+
882
+ # Build metadata
883
+ metadata = {
884
+ "rating": rating,
885
+ "categories": categories or [],
886
+ }
887
+ if feedback_id:
888
+ metadata["rem_feedback_id"] = feedback_id
889
+
890
+ return self.add_span_feedback(
891
+ span_id=span_id,
892
+ annotation_name="user_feedback",
893
+ annotator_kind="HUMAN",
894
+ label=label,
895
+ score=score,
896
+ explanation=explanation,
897
+ metadata=metadata,
898
+ )
899
+
900
+ def get_span_annotations(
901
+ self,
902
+ span_id: str,
903
+ annotation_name: str | None = None,
904
+ ) -> list[dict[str, Any]]:
905
+ """Get annotations for a span.
906
+
907
+ Args:
908
+ span_id: Span ID to query
909
+ annotation_name: Optional filter by annotation name
910
+
911
+ Returns:
912
+ List of annotation dicts
913
+
914
+ TODO: Implement once Phoenix client exposes this method
915
+ """
916
+ # TODO: Phoenix client doesn't expose annotation query yet
917
+ # This is a stub for future implementation
918
+ logger.warning("get_span_annotations not yet implemented in Phoenix client")
919
+ return []
@@ -348,8 +348,27 @@ results = await service.vector_search(
348
348
 
349
349
  ### Initialize Service
350
350
 
351
+ There are two ways to initialize the PostgresService:
352
+
353
+ **Option 1: Factory function (recommended for apps using remdb as a library)**
354
+
355
+ ```python
356
+ from rem.services.postgres import get_postgres_service
357
+
358
+ # Uses POSTGRES__CONNECTION_STRING from environment
359
+ pg = get_postgres_service()
360
+ if pg is None:
361
+ raise RuntimeError("Database not configured - set POSTGRES__CONNECTION_STRING")
362
+
363
+ await pg.connect()
364
+ # ... use pg ...
365
+ await pg.disconnect()
366
+ ```
367
+
368
+ **Option 2: Direct instantiation**
369
+
351
370
  ```python
352
- from rem.services.postgres import PostgresService, Repository
371
+ from rem.services.postgres import PostgresService
353
372
 
354
373
  service = PostgresService(
355
374
  connection_string="postgresql://user:pass@localhost/remdb",
@@ -359,6 +378,9 @@ service = PostgresService(
359
378
  await service.connect()
360
379
  ```
361
380
 
381
+ > **Note**: `get_postgres_service()` returns the service directly. It does NOT support
382
+ > `async with` context manager syntax. Always call `connect()` and `disconnect()` explicitly.
383
+
362
384
  ### Using Repository Pattern
363
385
 
364
386
  **Generic Repository** for simple CRUD operations:
@@ -514,34 +536,156 @@ results = await service.vector_search(
514
536
  - HNSW parameters: `m=16, ef_construction=64` (tunable)
515
537
  - Monitor shared_buffers and work_mem
516
538
 
517
- ## Migrations
539
+ ## Schema Management
518
540
 
519
- Run migrations in order:
541
+ REM uses a **code-as-source-of-truth** approach. Pydantic models define the schema, and the database is kept in sync via diff-based migrations.
520
542
 
521
- ```bash
522
- psql -d remdb -f sql/migrations/001_setup_extensions.sql
523
- psql -d remdb -f sql/migrations/002_kv_store_cache.sql
524
- psql -d remdb -f sql/generated_schema.sql
543
+ ### File Structure
544
+
545
+ ```
546
+ src/rem/sql/
547
+ ├── migrations/
548
+ │ ├── 001_install.sql # Core infrastructure (manual)
549
+ │ └── 002_install_models.sql # Entity tables (auto-generated)
550
+ └── background_indexes.sql # HNSW vector indexes (optional)
525
551
  ```
526
552
 
527
- Background indexes (after data load):
553
+ **Key principle**: Only two migration files. No incremental `003_`, `004_` files.
554
+
555
+ ### CLI Commands
528
556
 
529
557
  ```bash
530
- psql -d remdb -f sql/background_indexes.sql
558
+ # Apply migrations (installs extensions, core tables, entity tables)
559
+ rem db migrate
560
+
561
+ # Check migration status
562
+ rem db status
563
+
564
+ # Generate schema SQL from models (for remdb development)
565
+ rem db schema generate --models src/rem/models/entities
566
+
567
+ # Validate models for schema generation
568
+ rem db schema validate --models src/rem/models/entities
531
569
  ```
532
570
 
533
- ## CLI Usage
571
+ ### Model Registry
534
572
 
535
- Generate schema from models:
573
+ Models are discovered via the registry:
536
574
 
537
- ```bash
538
- rem schema generate --models src/rem/models/entities --output sql/schema.sql
575
+ ```python
576
+ import rem
577
+ from rem.models.core import CoreModel
578
+
579
+ @rem.register_model
580
+ class MyEntity(CoreModel):
581
+ name: str
582
+ description: str # Auto-embeds
583
+ ```
584
+
585
+ ## Using REM as a Library (Downstream Apps)
586
+
587
+ When building an application that **depends on remdb as a package** (e.g., `pip install remdb`),
588
+ there are important differences from developing remdb itself.
589
+
590
+ ### What Works Out of the Box
591
+
592
+ 1. **All core entity tables** - Resources, Messages, Users, Sessions, etc.
593
+ 2. **PostgresService** - Full database access via `get_postgres_service()`
594
+ 3. **Repository pattern** - CRUD operations for core entities
595
+ 4. **Migrations** - `rem db migrate` applies the bundled SQL files
596
+
597
+ ```python
598
+ # In your downstream app (e.g., myapp/main.py)
599
+ from rem.services.postgres import get_postgres_service
600
+ from rem.models.entities import Message, Resource
601
+
602
+ pg = get_postgres_service()
603
+ await pg.connect()
604
+
605
+ # Use core entities - tables already exist
606
+ messages = await pg.query(Message, {"session_id": "abc"})
539
607
  ```
540
608
 
541
- Validate models:
609
+ ### Custom Models in Downstream Apps
610
+
611
+ The `@rem.register_model` decorator registers models in the **runtime registry**, which is useful for:
612
+ - Schema introspection at runtime
613
+ - Future tooling that reads the registry
614
+
615
+ However, **`rem db migrate` only applies SQL files bundled in the remdb package**.
616
+ Custom models from downstream apps do NOT automatically get tables created.
617
+
618
+ **Options for custom model tables:**
619
+
620
+ **Option A: Use core entities with metadata**
621
+
622
+ Store custom data in the `metadata` JSONB field of existing entities:
623
+
624
+ ```python
625
+ resource = Resource(
626
+ name="my-custom-thing",
627
+ content="...",
628
+ metadata={"custom_field": "value", "another": 123}
629
+ )
630
+ ```
631
+
632
+ **Option B: Create tables manually**
633
+
634
+ Write and apply your own SQL:
635
+
636
+ ```sql
637
+ -- myapp/sql/custom_tables.sql
638
+ CREATE TABLE IF NOT EXISTS conversation_summaries (
639
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
640
+ session_ref TEXT NOT NULL,
641
+ summary TEXT NOT NULL,
642
+ -- ... include CoreModel fields for compatibility
643
+ user_id VARCHAR(256),
644
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
645
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
646
+ );
647
+ ```
542
648
 
543
649
  ```bash
544
- rem schema validate --models src/rem/models/entities
650
+ psql $DATABASE_URL -f myapp/sql/custom_tables.sql
651
+ ```
652
+
653
+ **Option C: Contribute upstream**
654
+
655
+ If your model is generally useful, contribute it to remdb so it's included in
656
+ the next release and `rem db migrate` creates it automatically.
657
+
658
+ ### Example: Downstream App Structure
659
+
660
+ ```
661
+ myapp/
662
+ ├── main.py # Import models, start API
663
+ ├── models/
664
+ │ └── __init__.py # @rem.register_model decorators
665
+ ├── sql/
666
+ │ └── custom.sql # Manual migrations for custom tables
667
+ ├── .env # POSTGRES__CONNECTION_STRING, LLM keys
668
+ └── pyproject.toml # dependencies = ["remdb>=0.3.110"]
669
+ ```
670
+
671
+ ```python
672
+ # myapp/models/__init__.py
673
+ import rem
674
+ from rem.models.core import CoreModel
675
+
676
+ @rem.register_model
677
+ class ConversationSummary(CoreModel):
678
+ """Registered for introspection, but table created via sql/custom.sql"""
679
+ session_ref: str
680
+ summary: str
681
+ ```
682
+
683
+ ```python
684
+ # myapp/main.py
685
+ import models # Registers custom models
686
+
687
+ from rem.api.main import app # Use REM's FastAPI app
688
+ # Or build your own app using rem.services
545
689
  ```
546
690
 
547
691
  ## Configuration
@@ -2,6 +2,7 @@
2
2
  PostgreSQL service for CloudNativePG database operations.
3
3
  """
4
4
 
5
+ from .diff_service import DiffService, SchemaDiff
5
6
  from .repository import Repository
6
7
  from .service import PostgresService
7
8
 
@@ -20,4 +21,4 @@ def get_postgres_service() -> PostgresService | None:
20
21
  return PostgresService()
21
22
 
22
23
 
23
- __all__ = ["PostgresService", "get_postgres_service", "Repository"]
24
+ __all__ = ["PostgresService", "get_postgres_service", "Repository", "DiffService", "SchemaDiff"]