remdb 0.2.6__py3-none-any.whl → 0.3.103__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (82) hide show
  1. rem/__init__.py +129 -2
  2. rem/agentic/README.md +76 -0
  3. rem/agentic/__init__.py +15 -0
  4. rem/agentic/agents/__init__.py +16 -2
  5. rem/agentic/agents/sse_simulator.py +500 -0
  6. rem/agentic/context.py +7 -5
  7. rem/agentic/llm_provider_models.py +301 -0
  8. rem/agentic/providers/phoenix.py +32 -43
  9. rem/agentic/providers/pydantic_ai.py +84 -10
  10. rem/api/README.md +238 -1
  11. rem/api/deps.py +255 -0
  12. rem/api/main.py +70 -22
  13. rem/api/mcp_router/server.py +8 -1
  14. rem/api/mcp_router/tools.py +80 -0
  15. rem/api/middleware/tracking.py +172 -0
  16. rem/api/routers/admin.py +277 -0
  17. rem/api/routers/auth.py +124 -0
  18. rem/api/routers/chat/completions.py +123 -14
  19. rem/api/routers/chat/models.py +7 -3
  20. rem/api/routers/chat/sse_events.py +526 -0
  21. rem/api/routers/chat/streaming.py +468 -45
  22. rem/api/routers/dev.py +81 -0
  23. rem/api/routers/feedback.py +455 -0
  24. rem/api/routers/messages.py +473 -0
  25. rem/api/routers/models.py +78 -0
  26. rem/api/routers/shared_sessions.py +406 -0
  27. rem/auth/middleware.py +126 -27
  28. rem/cli/commands/ask.py +15 -11
  29. rem/cli/commands/configure.py +169 -94
  30. rem/cli/commands/db.py +53 -7
  31. rem/cli/commands/experiments.py +278 -96
  32. rem/cli/commands/process.py +8 -7
  33. rem/cli/commands/scaffold.py +47 -0
  34. rem/cli/commands/schema.py +9 -9
  35. rem/cli/main.py +10 -0
  36. rem/config.py +2 -2
  37. rem/models/core/core_model.py +7 -1
  38. rem/models/entities/__init__.py +21 -0
  39. rem/models/entities/domain_resource.py +38 -0
  40. rem/models/entities/feedback.py +123 -0
  41. rem/models/entities/message.py +30 -1
  42. rem/models/entities/session.py +83 -0
  43. rem/models/entities/shared_session.py +206 -0
  44. rem/models/entities/user.py +10 -3
  45. rem/registry.py +367 -0
  46. rem/schemas/agents/rem.yaml +7 -3
  47. rem/services/content/providers.py +94 -140
  48. rem/services/content/service.py +85 -16
  49. rem/services/dreaming/affinity_service.py +2 -16
  50. rem/services/dreaming/moment_service.py +2 -15
  51. rem/services/embeddings/api.py +20 -13
  52. rem/services/phoenix/EXPERIMENT_DESIGN.md +3 -3
  53. rem/services/phoenix/client.py +252 -19
  54. rem/services/postgres/README.md +29 -10
  55. rem/services/postgres/repository.py +132 -0
  56. rem/services/postgres/schema_generator.py +86 -5
  57. rem/services/rate_limit.py +113 -0
  58. rem/services/rem/README.md +14 -0
  59. rem/services/session/compression.py +17 -1
  60. rem/services/user_service.py +98 -0
  61. rem/settings.py +115 -17
  62. rem/sql/background_indexes.sql +10 -0
  63. rem/sql/migrations/001_install.sql +152 -2
  64. rem/sql/migrations/002_install_models.sql +580 -231
  65. rem/sql/migrations/003_seed_default_user.sql +48 -0
  66. rem/utils/constants.py +97 -0
  67. rem/utils/date_utils.py +228 -0
  68. rem/utils/embeddings.py +17 -4
  69. rem/utils/files.py +167 -0
  70. rem/utils/mime_types.py +158 -0
  71. rem/utils/model_helpers.py +156 -1
  72. rem/utils/schema_loader.py +273 -14
  73. rem/utils/sql_types.py +3 -1
  74. rem/utils/vision.py +9 -14
  75. rem/workers/README.md +14 -14
  76. rem/workers/db_maintainer.py +74 -0
  77. {remdb-0.2.6.dist-info → remdb-0.3.103.dist-info}/METADATA +486 -132
  78. {remdb-0.2.6.dist-info → remdb-0.3.103.dist-info}/RECORD +80 -57
  79. {remdb-0.2.6.dist-info → remdb-0.3.103.dist-info}/WHEEL +1 -1
  80. rem/sql/002_install_models.sql +0 -1068
  81. rem/sql/install_models.sql +0 -1038
  82. {remdb-0.2.6.dist-info → remdb-0.3.103.dist-info}/entry_points.txt +0 -0
rem/settings.py CHANGED
@@ -15,7 +15,7 @@ Example .env file:
15
15
  API__LOG_LEVEL=info
16
16
 
17
17
  # LLM
18
- LLM__DEFAULT_MODEL=anthropic:claude-sonnet-4-5-20250929
18
+ LLM__DEFAULT_MODEL=openai:gpt-4.1
19
19
  LLM__DEFAULT_TEMPERATURE=0.5
20
20
  LLM__MAX_RETRIES=10
21
21
  LLM__OPENAI_API_KEY=sk-...
@@ -57,8 +57,10 @@ Example .env file:
57
57
  """
58
58
 
59
59
  import os
60
- from pydantic import Field, field_validator
60
+ import hashlib
61
+ from pydantic import Field, field_validator, FieldValidationInfo
61
62
  from pydantic_settings import BaseSettings, SettingsConfigDict
63
+ from loguru import logger
62
64
 
63
65
 
64
66
  class LLMSettings(BaseSettings):
@@ -72,7 +74,7 @@ class LLMSettings(BaseSettings):
72
74
  LLM__EVALUATOR_MODEL or EVALUATOR_MODEL - Model for LLM-as-judge evaluation
73
75
  LLM__OPENAI_API_KEY or OPENAI_API_KEY - OpenAI API key
74
76
  LLM__ANTHROPIC_API_KEY or ANTHROPIC_API_KEY - Anthropic API key
75
- LLM__EMBEDDING_PROVIDER or EMBEDDING_PROVIDER - Default embedding provider (openai, cohere, jina, etc.)
77
+ LLM__EMBEDDING_PROVIDER or EMBEDDING_PROVIDER - Default embedding provider (openai)
76
78
  LLM__EMBEDDING_MODEL or EMBEDDING_MODEL - Default embedding model name
77
79
  """
78
80
 
@@ -84,7 +86,7 @@ class LLMSettings(BaseSettings):
84
86
  )
85
87
 
86
88
  default_model: str = Field(
87
- default="anthropic:claude-sonnet-4-5-20250929",
89
+ default="openai:gpt-4.1",
88
90
  description="Default LLM model (format: provider:model-id)",
89
91
  )
90
92
 
@@ -127,7 +129,7 @@ class LLMSettings(BaseSettings):
127
129
 
128
130
  embedding_provider: str = Field(
129
131
  default="openai",
130
- description="Default embedding provider (openai, cohere, jina, etc.)",
132
+ description="Default embedding provider (currently only openai supported)",
131
133
  )
132
134
 
133
135
  embedding_model: str = Field(
@@ -359,10 +361,16 @@ class AuthSettings(BaseSettings):
359
361
  - Custom OIDC provider
360
362
 
361
363
  Environment variables:
362
- AUTH__ENABLED - Enable authentication (default: false)
364
+ AUTH__ENABLED - Enable authentication (default: true)
365
+ AUTH__ALLOW_ANONYMOUS - Allow rate-limited anonymous access (default: true)
363
366
  AUTH__SESSION_SECRET - Secret for session cookie signing
364
367
  AUTH__GOOGLE__* - Google OAuth settings
365
368
  AUTH__MICROSOFT__* - Microsoft OAuth settings
369
+
370
+ Access modes:
371
+ - enabled=true, allow_anonymous=true: Auth available, anonymous gets rate-limited access
372
+ - enabled=true, allow_anonymous=false: Auth required for all requests
373
+ - enabled=false: No auth, all requests treated as default user (dev mode)
366
374
  """
367
375
 
368
376
  model_config = SettingsConfigDict(
@@ -373,8 +381,26 @@ class AuthSettings(BaseSettings):
373
381
  )
374
382
 
375
383
  enabled: bool = Field(
376
- default=False,
377
- description="Enable authentication (disabled by default for testing)",
384
+ default=True,
385
+ description="Enable authentication (OAuth endpoints and middleware)",
386
+ )
387
+
388
+ allow_anonymous: bool = Field(
389
+ default=True,
390
+ description=(
391
+ "Allow anonymous (unauthenticated) access with rate limits. "
392
+ "When true, requests without auth get ANONYMOUS tier rate limits. "
393
+ "When false, all requests require authentication."
394
+ ),
395
+ )
396
+
397
+ mcp_requires_auth: bool = Field(
398
+ default=True,
399
+ description=(
400
+ "Require authentication for MCP endpoints. "
401
+ "MCP is a protected service and should always require login in production. "
402
+ "Set to false only for local development/testing."
403
+ ),
378
404
  )
379
405
 
380
406
  session_secret: str = Field(
@@ -386,6 +412,22 @@ class AuthSettings(BaseSettings):
386
412
  google: GoogleOAuthSettings = Field(default_factory=GoogleOAuthSettings)
387
413
  microsoft: MicrosoftOAuthSettings = Field(default_factory=MicrosoftOAuthSettings)
388
414
 
415
+ @field_validator("session_secret", mode="before")
416
+ @classmethod
417
+ def generate_dev_secret(cls, v: str | None, info: FieldValidationInfo) -> str:
418
+ # Only generate if not already set and not in production
419
+ if not v and info.data.get("environment") != "production":
420
+ # Deterministic secret for development
421
+ seed_string = f"{info.data.get('team', 'rem')}-{info.data.get('environment', 'development')}-auth-secret-salt"
422
+ logger.warning(
423
+ "AUTH__SESSION_SECRET not set. Generating deterministic secret for non-production environment. "
424
+ "DO NOT use in production."
425
+ )
426
+ return hashlib.sha256(seed_string.encode()).hexdigest()
427
+ elif not v and info.data.get("environment") == "production":
428
+ raise ValueError("AUTH__SESSION_SECRET must be set in production environment.")
429
+ return v
430
+
389
431
 
390
432
  class PostgresSettings(BaseSettings):
391
433
  """
@@ -962,6 +1004,54 @@ class APISettings(BaseSettings):
962
1004
  )
963
1005
 
964
1006
 
1007
+ class SchemaSettings(BaseSettings):
1008
+ """
1009
+ Schema search path settings for agent and evaluator schemas.
1010
+
1011
+ Allows extending REM's schema search with custom directories.
1012
+ Custom paths are searched BEFORE built-in package schemas.
1013
+
1014
+ Environment variables:
1015
+ SCHEMA__PATHS - Semicolon-separated list of directories to search
1016
+ Example: "/app/schemas;/shared/agents;./local-schemas"
1017
+
1018
+ Search Order:
1019
+ 1. Exact path (if file exists)
1020
+ 2. Custom paths from SCHEMA__PATHS (in order)
1021
+ 3. Built-in package schemas (schemas/agents/, schemas/evaluators/, etc.)
1022
+ 4. Database LOOKUP (if enabled)
1023
+
1024
+ Example:
1025
+ # In .env or environment
1026
+ SCHEMA__PATHS=/app/custom-agents;/shared/evaluators
1027
+
1028
+ # Then in code
1029
+ from rem.utils.schema_loader import load_agent_schema
1030
+ schema = load_agent_schema("my-custom-agent") # Found in /app/custom-agents/
1031
+ """
1032
+
1033
+ model_config = SettingsConfigDict(
1034
+ env_prefix="SCHEMA__",
1035
+ extra="ignore",
1036
+ )
1037
+
1038
+ paths: str = Field(
1039
+ default="",
1040
+ description=(
1041
+ "Semicolon-separated list of directories to search for schemas. "
1042
+ "These paths are searched BEFORE built-in package schemas. "
1043
+ "Example: '/app/schemas;/shared/agents'"
1044
+ ),
1045
+ )
1046
+
1047
+ @property
1048
+ def path_list(self) -> list[str]:
1049
+ """Get paths as a list, filtering empty strings."""
1050
+ if not self.paths:
1051
+ return []
1052
+ return [p.strip() for p in self.paths.split(";") if p.strip()]
1053
+
1054
+
965
1055
  class GitSettings(BaseSettings):
966
1056
  """
967
1057
  Git repository provider settings for versioned schema/experiment syncing.
@@ -1166,6 +1256,11 @@ class Settings(BaseSettings):
1166
1256
  extra="ignore",
1167
1257
  )
1168
1258
 
1259
+ app_name: str = Field(
1260
+ default="REM",
1261
+ description="Application/API name used in docs, titles, and user-facing text",
1262
+ )
1263
+
1169
1264
  team: str = Field(
1170
1265
  default="rem",
1171
1266
  description="Team or project name for observability",
@@ -1207,20 +1302,23 @@ class Settings(BaseSettings):
1207
1302
  sqs: SQSSettings = Field(default_factory=SQSSettings)
1208
1303
  chunking: ChunkingSettings = Field(default_factory=ChunkingSettings)
1209
1304
  content: ContentSettings = Field(default_factory=ContentSettings)
1305
+ schema_search: SchemaSettings = Field(default_factory=SchemaSettings)
1210
1306
  test: TestSettings = Field(default_factory=TestSettings)
1211
1307
 
1212
1308
 
1213
1309
  # Load configuration from ~/.rem/config.yaml before initializing settings
1214
1310
  # This allows user configuration to be merged with environment variables
1215
- try:
1216
- from rem.config import load_config, merge_config_to_env
1217
-
1218
- _config = load_config()
1219
- if _config:
1220
- merge_config_to_env(_config)
1221
- except ImportError:
1222
- # config module not available (e.g., during initial setup)
1223
- pass
1311
+ # Set REM_SKIP_CONFIG_FILE=true to disable (useful for development with .env)
1312
+ if not os.getenv("REM_SKIP_CONFIG_FILE", "").lower() in ("true", "1", "yes"):
1313
+ try:
1314
+ from rem.config import load_config, merge_config_to_env
1315
+
1316
+ _config = load_config()
1317
+ if _config:
1318
+ merge_config_to_env(_config)
1319
+ except ImportError:
1320
+ # config module not available (e.g., during initial setup)
1321
+ pass
1224
1322
 
1225
1323
  # Global settings singleton
1226
1324
  settings = Settings()
@@ -16,6 +16,11 @@ CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_moments_vector_hnsw
16
16
  ON embeddings_moments
17
17
  USING hnsw (embedding vector_cosine_ops);
18
18
 
19
+ -- HNSW vector index for embeddings_sessions
20
+ CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_sessions_vector_hnsw
21
+ ON embeddings_sessions
22
+ USING hnsw (embedding vector_cosine_ops);
23
+
19
24
  -- HNSW vector index for embeddings_resources
20
25
  CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_resources_vector_hnsw
21
26
  ON embeddings_resources
@@ -36,6 +41,11 @@ CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_ontology_configs_vector_h
36
41
  ON embeddings_ontology_configs
37
42
  USING hnsw (embedding vector_cosine_ops);
38
43
 
44
+ -- HNSW vector index for embeddings_domain_resources
45
+ CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_domain_resources_vector_hnsw
46
+ ON embeddings_domain_resources
47
+ USING hnsw (embedding vector_cosine_ops);
48
+
39
49
  -- HNSW vector index for embeddings_schemas
40
50
  CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_schemas_vector_hnsw
41
51
  ON embeddings_schemas
@@ -381,8 +381,140 @@ $$ LANGUAGE plpgsql STABLE;
381
381
  COMMENT ON FUNCTION rem_fuzzy IS
382
382
  'REM FUZZY query: Fuzzy text search using pg_trgm. Returns raw entity data as JSONB for LLM consumption. tenant_id parameter exists for backward compatibility but filtering uses user_id.';
383
383
 
384
- -- REM TRAVERSE: Moved to 002_install_models.sql (after entity tables are created)
385
- -- See 002_install_models.sql for the full rem_traverse function with keys_only parameter
384
+ -- ============================================================================
385
+ -- REM TRAVERSE (Graph Traversal)
386
+ -- ============================================================================
387
+
388
+ -- REM TRAVERSE: Recursive graph traversal following edges
389
+ -- Explores graph_edges starting from entity_key up to max_depth
390
+ -- Uses cached kv_store.graph_edges for fast traversal (no polymorphic view!)
391
+ -- When keys_only=false, automatically fetches full entity records
392
+ CREATE OR REPLACE FUNCTION rem_traverse(
393
+ p_entity_key VARCHAR(255),
394
+ p_tenant_id VARCHAR(100), -- Backward compat parameter (not used for filtering)
395
+ p_user_id VARCHAR(100),
396
+ p_max_depth INTEGER DEFAULT 1,
397
+ p_rel_type VARCHAR(100) DEFAULT NULL,
398
+ p_keys_only BOOLEAN DEFAULT FALSE
399
+ )
400
+ RETURNS TABLE(
401
+ depth INTEGER,
402
+ entity_key VARCHAR(255),
403
+ entity_type VARCHAR(100),
404
+ entity_id UUID,
405
+ rel_type VARCHAR(100),
406
+ rel_weight REAL,
407
+ path TEXT[],
408
+ entity_record JSONB
409
+ ) AS $$
410
+ DECLARE
411
+ graph_keys RECORD;
412
+ entities_by_table JSONB := '{}'::jsonb;
413
+ table_keys JSONB;
414
+ BEGIN
415
+ -- First, build graph structure from KV store
416
+ FOR graph_keys IN
417
+ WITH RECURSIVE graph_traversal AS (
418
+ -- Base case: Find starting entity
419
+ SELECT
420
+ 0 AS depth,
421
+ kv.entity_key,
422
+ kv.entity_type,
423
+ kv.entity_id,
424
+ NULL::VARCHAR(100) AS rel_type,
425
+ NULL::REAL AS rel_weight,
426
+ ARRAY[kv.entity_key]::TEXT[] AS path
427
+ FROM kv_store kv
428
+ WHERE kv.user_id = p_user_id
429
+ AND kv.entity_key = p_entity_key
430
+
431
+ UNION ALL
432
+
433
+ -- Recursive case: Follow outbound edges from discovered entities
434
+ SELECT
435
+ gt.depth + 1,
436
+ target_kv.entity_key,
437
+ target_kv.entity_type,
438
+ target_kv.entity_id,
439
+ (edge->>'rel_type')::VARCHAR(100) AS rel_type,
440
+ COALESCE((edge->>'weight')::REAL, 1.0) AS rel_weight,
441
+ gt.path || target_kv.entity_key AS path
442
+ FROM graph_traversal gt
443
+ -- Join to KV store to get source entity (with cached graph_edges!)
444
+ JOIN kv_store source_kv ON source_kv.entity_key = gt.entity_key
445
+ AND source_kv.user_id = p_user_id
446
+ -- Extract edges directly from cached kv_store.graph_edges (NO polymorphic view!)
447
+ CROSS JOIN LATERAL jsonb_array_elements(COALESCE(source_kv.graph_edges, '[]'::jsonb)) AS edge
448
+ -- Lookup target entity in KV store
449
+ JOIN kv_store target_kv ON target_kv.entity_key = (edge->>'dst')::VARCHAR(255)
450
+ AND target_kv.user_id = p_user_id
451
+ WHERE gt.depth < p_max_depth
452
+ -- Filter by relationship type if specified
453
+ AND (p_rel_type IS NULL OR (edge->>'rel_type')::VARCHAR(100) = p_rel_type)
454
+ -- Prevent cycles by checking path
455
+ AND NOT (target_kv.entity_key = ANY(gt.path))
456
+ )
457
+ SELECT DISTINCT ON (entity_key)
458
+ gt.depth,
459
+ gt.entity_key,
460
+ gt.entity_type,
461
+ gt.entity_id,
462
+ gt.rel_type,
463
+ gt.rel_weight,
464
+ gt.path
465
+ FROM graph_traversal gt
466
+ WHERE gt.depth > 0 -- Exclude starting entity
467
+ ORDER BY gt.entity_key, gt.depth
468
+ LOOP
469
+ IF p_keys_only THEN
470
+ -- Return just graph structure (no entity_record)
471
+ depth := graph_keys.depth;
472
+ entity_key := graph_keys.entity_key;
473
+ entity_type := graph_keys.entity_type;
474
+ entity_id := graph_keys.entity_id;
475
+ rel_type := graph_keys.rel_type;
476
+ rel_weight := graph_keys.rel_weight;
477
+ path := graph_keys.path;
478
+ entity_record := NULL;
479
+ RETURN NEXT;
480
+ ELSE
481
+ -- Build JSONB mapping {table: [keys]} for batch fetch
482
+ IF entities_by_table ? graph_keys.entity_type THEN
483
+ table_keys := entities_by_table->graph_keys.entity_type;
484
+ entities_by_table := jsonb_set(
485
+ entities_by_table,
486
+ ARRAY[graph_keys.entity_type],
487
+ table_keys || jsonb_build_array(graph_keys.entity_key)
488
+ );
489
+ ELSE
490
+ entities_by_table := jsonb_set(
491
+ entities_by_table,
492
+ ARRAY[graph_keys.entity_type],
493
+ jsonb_build_array(graph_keys.entity_key)
494
+ );
495
+ END IF;
496
+ END IF;
497
+ END LOOP;
498
+
499
+ -- If keys_only=false, fetch full records using rem_fetch
500
+ IF NOT p_keys_only AND entities_by_table != '{}'::jsonb THEN
501
+ RETURN QUERY
502
+ SELECT
503
+ NULL::INTEGER AS depth,
504
+ f.entity_key::VARCHAR(255),
505
+ f.entity_type::VARCHAR(100),
506
+ NULL::UUID AS entity_id,
507
+ NULL::VARCHAR(100) AS rel_type,
508
+ NULL::REAL AS rel_weight,
509
+ NULL::TEXT[] AS path,
510
+ f.entity_record
511
+ FROM rem_fetch(entities_by_table, p_user_id) f;
512
+ END IF;
513
+ END;
514
+ $$ LANGUAGE plpgsql STABLE;
515
+
516
+ COMMENT ON FUNCTION rem_traverse IS
517
+ 'REM TRAVERSE query: Recursive graph traversal using cached kv_store.graph_edges. When keys_only=false (default), automatically fetches full entity records via rem_fetch.';
386
518
 
387
519
  -- REM SEARCH: Vector similarity search using embeddings
388
520
  -- Joins to embeddings table for semantic search
@@ -464,6 +596,24 @@ $$ LANGUAGE plpgsql;
464
596
  COMMENT ON FUNCTION migration_status() IS
465
597
  'Get summary of applied migrations by type';
466
598
 
599
+ -- ============================================================================
600
+ -- RATE LIMITS (UNLOGGED for performance)
601
+ -- ============================================================================
602
+ -- High-performance rate limiting table. Uses UNLOGGED for speed - counts may
603
+ -- be lost on database crash/restart, which is acceptable (fail-open on error).
604
+
605
+ CREATE UNLOGGED TABLE IF NOT EXISTS rate_limits (
606
+ key VARCHAR(512) PRIMARY KEY,
607
+ count INTEGER NOT NULL DEFAULT 1,
608
+ expires_at TIMESTAMP NOT NULL,
609
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
610
+ );
611
+
612
+ CREATE INDEX IF NOT EXISTS idx_rate_limits_expires ON rate_limits (expires_at);
613
+
614
+ COMMENT ON TABLE rate_limits IS
615
+ 'UNLOGGED rate limiting table. Counts may be lost on crash (acceptable for rate limiting).';
616
+
467
617
  -- ============================================================================
468
618
  -- RECORD INSTALLATION
469
619
  -- ============================================================================