remdb 0.3.242__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +129 -0
- rem/agentic/README.md +760 -0
- rem/agentic/__init__.py +54 -0
- rem/agentic/agents/README.md +155 -0
- rem/agentic/agents/__init__.py +38 -0
- rem/agentic/agents/agent_manager.py +311 -0
- rem/agentic/agents/sse_simulator.py +502 -0
- rem/agentic/context.py +425 -0
- rem/agentic/context_builder.py +360 -0
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/mcp/__init__.py +0 -0
- rem/agentic/mcp/tool_wrapper.py +273 -0
- rem/agentic/otel/__init__.py +5 -0
- rem/agentic/otel/setup.py +240 -0
- rem/agentic/providers/phoenix.py +926 -0
- rem/agentic/providers/pydantic_ai.py +854 -0
- rem/agentic/query.py +117 -0
- rem/agentic/query_helper.py +89 -0
- rem/agentic/schema.py +737 -0
- rem/agentic/serialization.py +245 -0
- rem/agentic/tools/__init__.py +5 -0
- rem/agentic/tools/rem_tools.py +242 -0
- rem/api/README.md +657 -0
- rem/api/deps.py +253 -0
- rem/api/main.py +460 -0
- rem/api/mcp_router/prompts.py +182 -0
- rem/api/mcp_router/resources.py +820 -0
- rem/api/mcp_router/server.py +243 -0
- rem/api/mcp_router/tools.py +1605 -0
- rem/api/middleware/tracking.py +172 -0
- rem/api/routers/admin.py +520 -0
- rem/api/routers/auth.py +898 -0
- rem/api/routers/chat/__init__.py +5 -0
- rem/api/routers/chat/child_streaming.py +394 -0
- rem/api/routers/chat/completions.py +702 -0
- rem/api/routers/chat/json_utils.py +76 -0
- rem/api/routers/chat/models.py +202 -0
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +546 -0
- rem/api/routers/chat/streaming.py +950 -0
- rem/api/routers/chat/streaming_utils.py +327 -0
- rem/api/routers/common.py +18 -0
- rem/api/routers/dev.py +87 -0
- rem/api/routers/feedback.py +276 -0
- rem/api/routers/messages.py +620 -0
- rem/api/routers/models.py +86 -0
- rem/api/routers/query.py +362 -0
- rem/api/routers/shared_sessions.py +422 -0
- rem/auth/README.md +258 -0
- rem/auth/__init__.py +36 -0
- rem/auth/jwt.py +367 -0
- rem/auth/middleware.py +318 -0
- rem/auth/providers/__init__.py +16 -0
- rem/auth/providers/base.py +376 -0
- rem/auth/providers/email.py +215 -0
- rem/auth/providers/google.py +163 -0
- rem/auth/providers/microsoft.py +237 -0
- rem/cli/README.md +517 -0
- rem/cli/__init__.py +8 -0
- rem/cli/commands/README.md +299 -0
- rem/cli/commands/__init__.py +3 -0
- rem/cli/commands/ask.py +549 -0
- rem/cli/commands/cluster.py +1808 -0
- rem/cli/commands/configure.py +495 -0
- rem/cli/commands/db.py +828 -0
- rem/cli/commands/dreaming.py +324 -0
- rem/cli/commands/experiments.py +1698 -0
- rem/cli/commands/mcp.py +66 -0
- rem/cli/commands/process.py +388 -0
- rem/cli/commands/query.py +109 -0
- rem/cli/commands/scaffold.py +47 -0
- rem/cli/commands/schema.py +230 -0
- rem/cli/commands/serve.py +106 -0
- rem/cli/commands/session.py +453 -0
- rem/cli/dreaming.py +363 -0
- rem/cli/main.py +123 -0
- rem/config.py +244 -0
- rem/mcp_server.py +41 -0
- rem/models/core/__init__.py +49 -0
- rem/models/core/core_model.py +70 -0
- rem/models/core/engram.py +333 -0
- rem/models/core/experiment.py +672 -0
- rem/models/core/inline_edge.py +132 -0
- rem/models/core/rem_query.py +246 -0
- rem/models/entities/__init__.py +68 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/file.py +57 -0
- rem/models/entities/image_resource.py +88 -0
- rem/models/entities/message.py +64 -0
- rem/models/entities/moment.py +123 -0
- rem/models/entities/ontology.py +181 -0
- rem/models/entities/ontology_config.py +131 -0
- rem/models/entities/resource.py +95 -0
- rem/models/entities/schema.py +87 -0
- rem/models/entities/session.py +84 -0
- rem/models/entities/shared_session.py +180 -0
- rem/models/entities/subscriber.py +175 -0
- rem/models/entities/user.py +93 -0
- rem/py.typed +0 -0
- rem/registry.py +373 -0
- rem/schemas/README.md +507 -0
- rem/schemas/__init__.py +6 -0
- rem/schemas/agents/README.md +92 -0
- rem/schemas/agents/core/agent-builder.yaml +235 -0
- rem/schemas/agents/core/moment-builder.yaml +178 -0
- rem/schemas/agents/core/rem-query-agent.yaml +226 -0
- rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
- rem/schemas/agents/core/simple-assistant.yaml +19 -0
- rem/schemas/agents/core/user-profile-builder.yaml +163 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
- rem/schemas/agents/examples/contract-extractor.yaml +134 -0
- rem/schemas/agents/examples/cv-parser.yaml +263 -0
- rem/schemas/agents/examples/hello-world.yaml +37 -0
- rem/schemas/agents/examples/query.yaml +54 -0
- rem/schemas/agents/examples/simple.yaml +21 -0
- rem/schemas/agents/examples/test.yaml +29 -0
- rem/schemas/agents/rem.yaml +132 -0
- rem/schemas/evaluators/hello-world/default.yaml +77 -0
- rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
- rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
- rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
- rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
- rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
- rem/services/__init__.py +18 -0
- rem/services/audio/INTEGRATION.md +308 -0
- rem/services/audio/README.md +376 -0
- rem/services/audio/__init__.py +15 -0
- rem/services/audio/chunker.py +354 -0
- rem/services/audio/transcriber.py +259 -0
- rem/services/content/README.md +1269 -0
- rem/services/content/__init__.py +5 -0
- rem/services/content/providers.py +760 -0
- rem/services/content/service.py +762 -0
- rem/services/dreaming/README.md +230 -0
- rem/services/dreaming/__init__.py +53 -0
- rem/services/dreaming/affinity_service.py +322 -0
- rem/services/dreaming/moment_service.py +251 -0
- rem/services/dreaming/ontology_service.py +54 -0
- rem/services/dreaming/user_model_service.py +297 -0
- rem/services/dreaming/utils.py +39 -0
- rem/services/email/__init__.py +10 -0
- rem/services/email/service.py +522 -0
- rem/services/email/templates.py +360 -0
- rem/services/embeddings/__init__.py +11 -0
- rem/services/embeddings/api.py +127 -0
- rem/services/embeddings/worker.py +435 -0
- rem/services/fs/README.md +662 -0
- rem/services/fs/__init__.py +62 -0
- rem/services/fs/examples.py +206 -0
- rem/services/fs/examples_paths.py +204 -0
- rem/services/fs/git_provider.py +935 -0
- rem/services/fs/local_provider.py +760 -0
- rem/services/fs/parsing-hooks-examples.md +172 -0
- rem/services/fs/paths.py +276 -0
- rem/services/fs/provider.py +460 -0
- rem/services/fs/s3_provider.py +1042 -0
- rem/services/fs/service.py +186 -0
- rem/services/git/README.md +1075 -0
- rem/services/git/__init__.py +17 -0
- rem/services/git/service.py +469 -0
- rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
- rem/services/phoenix/README.md +453 -0
- rem/services/phoenix/__init__.py +46 -0
- rem/services/phoenix/client.py +960 -0
- rem/services/phoenix/config.py +88 -0
- rem/services/phoenix/prompt_labels.py +477 -0
- rem/services/postgres/README.md +757 -0
- rem/services/postgres/__init__.py +49 -0
- rem/services/postgres/diff_service.py +599 -0
- rem/services/postgres/migration_service.py +427 -0
- rem/services/postgres/programmable_diff_service.py +635 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +562 -0
- rem/services/postgres/register_type.py +353 -0
- rem/services/postgres/repository.py +481 -0
- rem/services/postgres/schema_generator.py +661 -0
- rem/services/postgres/service.py +802 -0
- rem/services/postgres/sql_builder.py +355 -0
- rem/services/rate_limit.py +113 -0
- rem/services/rem/README.md +318 -0
- rem/services/rem/__init__.py +23 -0
- rem/services/rem/exceptions.py +71 -0
- rem/services/rem/executor.py +293 -0
- rem/services/rem/parser.py +180 -0
- rem/services/rem/queries.py +196 -0
- rem/services/rem/query.py +371 -0
- rem/services/rem/service.py +608 -0
- rem/services/session/README.md +374 -0
- rem/services/session/__init__.py +13 -0
- rem/services/session/compression.py +488 -0
- rem/services/session/pydantic_messages.py +310 -0
- rem/services/session/reload.py +85 -0
- rem/services/user_service.py +130 -0
- rem/settings.py +1877 -0
- rem/sql/background_indexes.sql +52 -0
- rem/sql/migrations/001_install.sql +983 -0
- rem/sql/migrations/002_install_models.sql +3157 -0
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +282 -0
- rem/sql/migrations/005_schema_update.sql +145 -0
- rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
- rem/utils/AGENTIC_CHUNKING.md +597 -0
- rem/utils/README.md +628 -0
- rem/utils/__init__.py +61 -0
- rem/utils/agentic_chunking.py +622 -0
- rem/utils/batch_ops.py +343 -0
- rem/utils/chunking.py +108 -0
- rem/utils/clip_embeddings.py +276 -0
- rem/utils/constants.py +97 -0
- rem/utils/date_utils.py +228 -0
- rem/utils/dict_utils.py +98 -0
- rem/utils/embeddings.py +436 -0
- rem/utils/examples/embeddings_example.py +305 -0
- rem/utils/examples/sql_types_example.py +202 -0
- rem/utils/files.py +323 -0
- rem/utils/markdown.py +16 -0
- rem/utils/mime_types.py +158 -0
- rem/utils/model_helpers.py +492 -0
- rem/utils/schema_loader.py +649 -0
- rem/utils/sql_paths.py +146 -0
- rem/utils/sql_types.py +350 -0
- rem/utils/user_id.py +81 -0
- rem/utils/vision.py +325 -0
- rem/workers/README.md +506 -0
- rem/workers/__init__.py +7 -0
- rem/workers/db_listener.py +579 -0
- rem/workers/db_maintainer.py +74 -0
- rem/workers/dreaming.py +502 -0
- rem/workers/engram_processor.py +312 -0
- rem/workers/sqs_file_processor.py +193 -0
- rem/workers/unlogged_maintainer.py +463 -0
- remdb-0.3.242.dist-info/METADATA +1632 -0
- remdb-0.3.242.dist-info/RECORD +235 -0
- remdb-0.3.242.dist-info/WHEEL +4 -0
- remdb-0.3.242.dist-info/entry_points.txt +2 -0
rem/utils/sql_paths.py
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
"""Utilities for resolving SQL file paths.
|
|
2
|
+
|
|
3
|
+
Handles package SQL directory resolution and user migrations.
|
|
4
|
+
|
|
5
|
+
Convention for user migrations:
|
|
6
|
+
Place custom SQL files in `./sql/migrations/` relative to your project root.
|
|
7
|
+
Files should be numbered (e.g., `100_custom_table.sql`) to control execution order.
|
|
8
|
+
Package migrations (001-099) run first, then user migrations (100+).
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import List, Optional
|
|
13
|
+
import importlib.resources
|
|
14
|
+
|
|
15
|
+
# Convention: Default location for user-maintained migrations
|
|
16
|
+
USER_SQL_DIR_CONVENTION = "sql"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def get_package_sql_dir() -> Path:
|
|
20
|
+
"""Get the SQL directory from the installed rem package.
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
Path to the package's sql directory
|
|
24
|
+
|
|
25
|
+
Raises:
|
|
26
|
+
FileNotFoundError: If the SQL directory cannot be found
|
|
27
|
+
"""
|
|
28
|
+
try:
|
|
29
|
+
# Use importlib.resources for Python 3.9+
|
|
30
|
+
sql_ref = importlib.resources.files("rem") / "sql"
|
|
31
|
+
package_sql = Path(str(sql_ref))
|
|
32
|
+
if package_sql.exists():
|
|
33
|
+
return package_sql
|
|
34
|
+
except (AttributeError, TypeError):
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
# Fallback: use __file__ to find package location
|
|
38
|
+
try:
|
|
39
|
+
import rem
|
|
40
|
+
package_sql = Path(rem.__file__).parent / "sql"
|
|
41
|
+
if package_sql.exists():
|
|
42
|
+
return package_sql
|
|
43
|
+
except (ImportError, AttributeError):
|
|
44
|
+
pass
|
|
45
|
+
|
|
46
|
+
# Development fallback: check relative to cwd
|
|
47
|
+
dev_sql = Path("src/rem/sql")
|
|
48
|
+
if dev_sql.exists():
|
|
49
|
+
return dev_sql
|
|
50
|
+
|
|
51
|
+
raise FileNotFoundError(
|
|
52
|
+
"Could not locate rem SQL directory. "
|
|
53
|
+
"Ensure remdb is properly installed or run from the source directory."
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def get_package_migrations_dir() -> Path:
|
|
58
|
+
"""Get the migrations directory from the installed rem package.
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
Path to the package's migrations directory
|
|
62
|
+
"""
|
|
63
|
+
return get_package_sql_dir() / "migrations"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def get_user_sql_dir() -> Optional[Path]:
|
|
67
|
+
"""Get the conventional user SQL directory if it exists.
|
|
68
|
+
|
|
69
|
+
Looks for `./sql/` relative to the current working directory.
|
|
70
|
+
This follows the convention for user-maintained migrations.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
Path to user sql directory if it exists, None otherwise
|
|
74
|
+
"""
|
|
75
|
+
user_sql = Path.cwd() / USER_SQL_DIR_CONVENTION
|
|
76
|
+
if user_sql.exists() and user_sql.is_dir():
|
|
77
|
+
return user_sql
|
|
78
|
+
return None
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def list_package_migrations() -> List[Path]:
|
|
82
|
+
"""List all migration files in the package.
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
Sorted list of migration file paths
|
|
86
|
+
"""
|
|
87
|
+
try:
|
|
88
|
+
migrations_dir = get_package_migrations_dir()
|
|
89
|
+
if migrations_dir.exists():
|
|
90
|
+
return sorted(
|
|
91
|
+
f for f in migrations_dir.glob("*.sql")
|
|
92
|
+
if f.name[0].isdigit() # Only numbered migrations
|
|
93
|
+
)
|
|
94
|
+
except FileNotFoundError:
|
|
95
|
+
pass
|
|
96
|
+
|
|
97
|
+
return []
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def list_user_migrations() -> List[Path]:
|
|
101
|
+
"""List all migration files in the user's sql/migrations directory.
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
Sorted list of user migration file paths
|
|
105
|
+
"""
|
|
106
|
+
user_sql = get_user_sql_dir()
|
|
107
|
+
if user_sql:
|
|
108
|
+
migrations_dir = user_sql / "migrations"
|
|
109
|
+
if migrations_dir.exists():
|
|
110
|
+
return sorted(
|
|
111
|
+
f for f in migrations_dir.glob("*.sql")
|
|
112
|
+
if f.name[0].isdigit() # Only numbered migrations
|
|
113
|
+
)
|
|
114
|
+
return []
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def list_all_migrations() -> List[Path]:
|
|
118
|
+
"""List all migration files from package and user directories.
|
|
119
|
+
|
|
120
|
+
Collects migrations from:
|
|
121
|
+
1. Package migrations directory
|
|
122
|
+
2. User directory (./sql/migrations/) if it exists
|
|
123
|
+
|
|
124
|
+
Files are sorted by name, so use numbered prefixes to control order:
|
|
125
|
+
- 001-099: Reserved for package migrations
|
|
126
|
+
- 100+: Recommended for user migrations
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
Sorted list of all migration file paths (by filename)
|
|
130
|
+
"""
|
|
131
|
+
all_migrations = []
|
|
132
|
+
seen_names = set()
|
|
133
|
+
|
|
134
|
+
# Package migrations first
|
|
135
|
+
for f in list_package_migrations():
|
|
136
|
+
if f.name not in seen_names:
|
|
137
|
+
all_migrations.append(f)
|
|
138
|
+
seen_names.add(f.name)
|
|
139
|
+
|
|
140
|
+
# User migrations second
|
|
141
|
+
for f in list_user_migrations():
|
|
142
|
+
if f.name not in seen_names:
|
|
143
|
+
all_migrations.append(f)
|
|
144
|
+
seen_names.add(f.name)
|
|
145
|
+
|
|
146
|
+
return sorted(all_migrations, key=lambda p: p.name)
|
rem/utils/sql_types.py
ADDED
|
@@ -0,0 +1,350 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pydantic to PostgreSQL Type Mapping Utility.
|
|
3
|
+
|
|
4
|
+
Maps Pydantic field types to PostgreSQL column types with intelligent defaults:
|
|
5
|
+
- Strings: VARCHAR(256) by default, TEXT for content/description fields
|
|
6
|
+
- Union types: Prefer UUID, JSONB over other types
|
|
7
|
+
- Lists of strings: TEXT[] (PostgreSQL arrays)
|
|
8
|
+
- Dicts and lists of dicts: JSONB
|
|
9
|
+
- Field metadata: Respect json_schema_extra for custom types and embeddings
|
|
10
|
+
|
|
11
|
+
Best Practices:
|
|
12
|
+
- VARCHAR(256) for most strings (indexes work well, prevents excessive data)
|
|
13
|
+
- TEXT for long-form content (descriptions, summaries, content fields)
|
|
14
|
+
- JSONB for structured data (better querying than JSON)
|
|
15
|
+
- Arrays for simple lists, JSONB for complex nested structures
|
|
16
|
+
- UUID for identifiers in Union types
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import types
|
|
20
|
+
from datetime import date, datetime, time
|
|
21
|
+
from typing import Any, Union, get_args, get_origin
|
|
22
|
+
from uuid import UUID
|
|
23
|
+
|
|
24
|
+
from pydantic import BaseModel
|
|
25
|
+
from pydantic.fields import FieldInfo
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# Field names that should use TEXT instead of VARCHAR
|
|
29
|
+
LONG_TEXT_FIELD_NAMES = {
|
|
30
|
+
"content",
|
|
31
|
+
"description",
|
|
32
|
+
"summary",
|
|
33
|
+
"instructions",
|
|
34
|
+
"prompt",
|
|
35
|
+
"message",
|
|
36
|
+
"body",
|
|
37
|
+
"text",
|
|
38
|
+
"note",
|
|
39
|
+
"comment",
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def get_sql_type(field_info: FieldInfo, field_name: str) -> str:
|
|
44
|
+
"""
|
|
45
|
+
Map Pydantic field to PostgreSQL type.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
field_info: Pydantic FieldInfo object
|
|
49
|
+
field_name: Name of the field (used for heuristics)
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
PostgreSQL type string (e.g., "VARCHAR(256)", "JSONB", "TEXT[]")
|
|
53
|
+
|
|
54
|
+
Examples:
|
|
55
|
+
>>> from pydantic import Field
|
|
56
|
+
>>> get_sql_type(Field(default="test"), "name")
|
|
57
|
+
'VARCHAR(256)'
|
|
58
|
+
>>> get_sql_type(Field(default=""), "content")
|
|
59
|
+
'TEXT'
|
|
60
|
+
>>> get_sql_type(Field(default_factory=dict), "metadata")
|
|
61
|
+
'JSONB'
|
|
62
|
+
"""
|
|
63
|
+
# Check for explicit sql_type in json_schema_extra
|
|
64
|
+
if field_info.json_schema_extra:
|
|
65
|
+
if isinstance(field_info.json_schema_extra, dict):
|
|
66
|
+
if "sql_type" in field_info.json_schema_extra:
|
|
67
|
+
return field_info.json_schema_extra["sql_type"]
|
|
68
|
+
|
|
69
|
+
# Fields with embedding_provider should be TEXT (for vector search preprocessing)
|
|
70
|
+
# Format: "openai:text-embedding-3-small" or "anthropic:voyage-2"
|
|
71
|
+
if "embedding_provider" in field_info.json_schema_extra:
|
|
72
|
+
return "TEXT"
|
|
73
|
+
|
|
74
|
+
# Get the annotation (type hint)
|
|
75
|
+
annotation = field_info.annotation
|
|
76
|
+
|
|
77
|
+
# Handle None annotation (shouldn't happen, but be safe)
|
|
78
|
+
if annotation is None:
|
|
79
|
+
return "TEXT"
|
|
80
|
+
|
|
81
|
+
# Handle Union types (including Optional[T] which is Union[T, None])
|
|
82
|
+
# Also handles Python 3.10+ `X | None` syntax which uses types.UnionType
|
|
83
|
+
origin = get_origin(annotation)
|
|
84
|
+
if origin is Union or isinstance(annotation, types.UnionType):
|
|
85
|
+
args = get_args(annotation)
|
|
86
|
+
# Filter out NoneType
|
|
87
|
+
non_none_args = [arg for arg in args if arg is not type(None)]
|
|
88
|
+
|
|
89
|
+
if not non_none_args:
|
|
90
|
+
return "TEXT"
|
|
91
|
+
|
|
92
|
+
# Prefer UUID over other types in unions
|
|
93
|
+
if UUID in non_none_args:
|
|
94
|
+
return "UUID"
|
|
95
|
+
|
|
96
|
+
# Prefer dict/JSONB over other types in unions
|
|
97
|
+
if dict in non_none_args:
|
|
98
|
+
return "JSONB"
|
|
99
|
+
|
|
100
|
+
# Use the first non-None type
|
|
101
|
+
return _map_simple_type(non_none_args[0], field_name)
|
|
102
|
+
|
|
103
|
+
# Handle simple types
|
|
104
|
+
return _map_simple_type(annotation, field_name)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _map_simple_type(python_type: type, field_name: str) -> str:
|
|
108
|
+
"""
|
|
109
|
+
Map a simple Python type to PostgreSQL type.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
python_type: Python type annotation
|
|
113
|
+
field_name: Field name for heuristics
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
PostgreSQL type string
|
|
117
|
+
"""
|
|
118
|
+
# Check if it's a generic type (List, Dict, etc.)
|
|
119
|
+
origin = get_origin(python_type)
|
|
120
|
+
args = get_args(python_type)
|
|
121
|
+
|
|
122
|
+
# Handle list types
|
|
123
|
+
if origin is list:
|
|
124
|
+
if args:
|
|
125
|
+
inner_type = args[0]
|
|
126
|
+
|
|
127
|
+
# List of strings -> PostgreSQL array
|
|
128
|
+
if inner_type is str:
|
|
129
|
+
return "TEXT[]"
|
|
130
|
+
|
|
131
|
+
# List of dicts or other complex types -> JSONB
|
|
132
|
+
if inner_type is dict or get_origin(inner_type) is not None:
|
|
133
|
+
return "JSONB"
|
|
134
|
+
|
|
135
|
+
# List of primitives (int, float, bool) -> JSONB for simplicity
|
|
136
|
+
return "JSONB"
|
|
137
|
+
|
|
138
|
+
# Untyped list -> JSONB
|
|
139
|
+
return "JSONB"
|
|
140
|
+
|
|
141
|
+
# Handle dict types -> always JSONB
|
|
142
|
+
if origin is dict or python_type is dict:
|
|
143
|
+
return "JSONB"
|
|
144
|
+
|
|
145
|
+
# Handle primitive types
|
|
146
|
+
type_mapping = {
|
|
147
|
+
str: _get_string_type(field_name),
|
|
148
|
+
int: "INTEGER",
|
|
149
|
+
float: "DOUBLE PRECISION",
|
|
150
|
+
bool: "BOOLEAN",
|
|
151
|
+
UUID: "UUID",
|
|
152
|
+
datetime: "TIMESTAMP",
|
|
153
|
+
date: "DATE",
|
|
154
|
+
time: "TIME",
|
|
155
|
+
bytes: "BYTEA",
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
# Check direct type match
|
|
159
|
+
if python_type in type_mapping:
|
|
160
|
+
return type_mapping[python_type]
|
|
161
|
+
|
|
162
|
+
# Check if it's a Pydantic model -> JSONB
|
|
163
|
+
if isinstance(python_type, type) and issubclass(python_type, BaseModel):
|
|
164
|
+
return "JSONB"
|
|
165
|
+
|
|
166
|
+
# Default to TEXT for unknown types
|
|
167
|
+
return "TEXT"
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def _get_string_type(field_name: str) -> str:
|
|
171
|
+
"""
|
|
172
|
+
Determine string type based on field name.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
field_name: Name of the field
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
"TEXT" for long-form content, "VARCHAR(256)" for others
|
|
179
|
+
"""
|
|
180
|
+
# Check if field name indicates long-form content
|
|
181
|
+
field_lower = field_name.lower()
|
|
182
|
+
|
|
183
|
+
if field_lower in LONG_TEXT_FIELD_NAMES:
|
|
184
|
+
return "TEXT"
|
|
185
|
+
|
|
186
|
+
# Check for common suffixes
|
|
187
|
+
if field_lower.endswith(("_content", "_description", "_summary", "_text", "_message")):
|
|
188
|
+
return "TEXT"
|
|
189
|
+
|
|
190
|
+
# Default to VARCHAR with reasonable length
|
|
191
|
+
return "VARCHAR(256)"
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def get_column_definition(
|
|
195
|
+
field_info: FieldInfo,
|
|
196
|
+
field_name: str,
|
|
197
|
+
nullable: bool = True,
|
|
198
|
+
primary_key: bool = False,
|
|
199
|
+
) -> str:
|
|
200
|
+
"""
|
|
201
|
+
Generate complete PostgreSQL column definition.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
field_info: Pydantic FieldInfo object
|
|
205
|
+
field_name: Name of the column
|
|
206
|
+
nullable: Whether column allows NULL
|
|
207
|
+
primary_key: Whether this is a primary key
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
Complete column definition SQL
|
|
211
|
+
|
|
212
|
+
Examples:
|
|
213
|
+
>>> from pydantic import Field
|
|
214
|
+
>>> get_column_definition(Field(default=""), "name", nullable=False)
|
|
215
|
+
'name VARCHAR(256) NOT NULL'
|
|
216
|
+
>>> get_column_definition(Field(default_factory=dict), "metadata")
|
|
217
|
+
'metadata JSONB NOT NULL DEFAULT \\'{}\\'::jsonb'
|
|
218
|
+
"""
|
|
219
|
+
sql_type = get_sql_type(field_info, field_name)
|
|
220
|
+
|
|
221
|
+
parts = [field_name, sql_type]
|
|
222
|
+
|
|
223
|
+
if primary_key:
|
|
224
|
+
parts.append("PRIMARY KEY")
|
|
225
|
+
elif not nullable:
|
|
226
|
+
parts.append("NOT NULL")
|
|
227
|
+
|
|
228
|
+
# Add defaults for JSONB and arrays
|
|
229
|
+
if field_info.default_factory is not None:
|
|
230
|
+
if sql_type == "JSONB":
|
|
231
|
+
parts.append("DEFAULT '{}'::jsonb")
|
|
232
|
+
elif sql_type.endswith("[]"):
|
|
233
|
+
parts.append("DEFAULT ARRAY[]::TEXT[]")
|
|
234
|
+
|
|
235
|
+
return " ".join(parts)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def model_to_create_table(
|
|
239
|
+
model: type[BaseModel],
|
|
240
|
+
table_name: str,
|
|
241
|
+
include_indexes: bool = True,
|
|
242
|
+
) -> str:
|
|
243
|
+
"""
|
|
244
|
+
Generate CREATE TABLE statement from Pydantic model.
|
|
245
|
+
|
|
246
|
+
Args:
|
|
247
|
+
model: Pydantic model class
|
|
248
|
+
table_name: Name of the table to create
|
|
249
|
+
include_indexes: Whether to include index creation statements
|
|
250
|
+
|
|
251
|
+
Returns:
|
|
252
|
+
SQL CREATE TABLE statement
|
|
253
|
+
|
|
254
|
+
Examples:
|
|
255
|
+
>>> from pydantic import BaseModel, Field
|
|
256
|
+
>>> class User(BaseModel):
|
|
257
|
+
... id: str = Field(..., description="User ID")
|
|
258
|
+
... name: str
|
|
259
|
+
... metadata: dict = Field(default_factory=dict)
|
|
260
|
+
>>> sql = model_to_create_table(User, "users")
|
|
261
|
+
>>> "CREATE TABLE" in sql
|
|
262
|
+
True
|
|
263
|
+
"""
|
|
264
|
+
columns = []
|
|
265
|
+
indexes = []
|
|
266
|
+
|
|
267
|
+
for field_name, field_info in model.model_fields.items():
|
|
268
|
+
# Determine if field is required (not nullable)
|
|
269
|
+
nullable = not field_info.is_required() or field_info.default is not None
|
|
270
|
+
|
|
271
|
+
# Check if this is the primary key (usually 'id')
|
|
272
|
+
is_pk = field_name == "id"
|
|
273
|
+
|
|
274
|
+
column_def = get_column_definition(field_info, field_name, nullable, is_pk)
|
|
275
|
+
columns.append(f" {column_def}")
|
|
276
|
+
|
|
277
|
+
# Generate indexes for common query patterns
|
|
278
|
+
if include_indexes and not is_pk:
|
|
279
|
+
sql_type = get_sql_type(field_info, field_name)
|
|
280
|
+
|
|
281
|
+
# Index for foreign keys and frequently queried fields
|
|
282
|
+
if field_name.endswith("_id") or field_name in {"tenant_id", "user_id", "session_id"}:
|
|
283
|
+
indexes.append(
|
|
284
|
+
f"CREATE INDEX IF NOT EXISTS idx_{table_name}_{field_name} "
|
|
285
|
+
f"ON {table_name}({field_name});"
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
# GIN indexes for JSONB and arrays
|
|
289
|
+
if sql_type == "JSONB":
|
|
290
|
+
indexes.append(
|
|
291
|
+
f"CREATE INDEX IF NOT EXISTS idx_{table_name}_{field_name} "
|
|
292
|
+
f"ON {table_name} USING GIN({field_name});"
|
|
293
|
+
)
|
|
294
|
+
elif sql_type.endswith("[]"):
|
|
295
|
+
indexes.append(
|
|
296
|
+
f"CREATE INDEX IF NOT EXISTS idx_{table_name}_{field_name} "
|
|
297
|
+
f"ON {table_name} USING GIN({field_name});"
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
# Build CREATE TABLE statement
|
|
301
|
+
create_table = f"CREATE TABLE IF NOT EXISTS {table_name} (\n"
|
|
302
|
+
create_table += ",\n".join(columns)
|
|
303
|
+
create_table += "\n);"
|
|
304
|
+
|
|
305
|
+
# Add indexes
|
|
306
|
+
if indexes:
|
|
307
|
+
create_table += "\n\n-- Indexes\n"
|
|
308
|
+
create_table += "\n".join(indexes)
|
|
309
|
+
|
|
310
|
+
return create_table
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def model_to_upsert(
|
|
314
|
+
model: type[BaseModel],
|
|
315
|
+
table_name: str,
|
|
316
|
+
conflict_column: str = "id",
|
|
317
|
+
) -> str:
|
|
318
|
+
"""
|
|
319
|
+
Generate INSERT ... ON CONFLICT UPDATE (UPSERT) statement template.
|
|
320
|
+
|
|
321
|
+
Args:
|
|
322
|
+
model: Pydantic model class
|
|
323
|
+
table_name: Name of the table
|
|
324
|
+
conflict_column: Column to use for conflict detection (usually 'id')
|
|
325
|
+
|
|
326
|
+
Returns:
|
|
327
|
+
SQL UPSERT statement with placeholders
|
|
328
|
+
|
|
329
|
+
Examples:
|
|
330
|
+
>>> from pydantic import BaseModel
|
|
331
|
+
>>> class User(BaseModel):
|
|
332
|
+
... id: str
|
|
333
|
+
... name: str
|
|
334
|
+
>>> sql = model_to_upsert(User, "users")
|
|
335
|
+
>>> "ON CONFLICT" in sql
|
|
336
|
+
True
|
|
337
|
+
"""
|
|
338
|
+
field_names = list(model.model_fields.keys())
|
|
339
|
+
placeholders = [f"${i+1}" for i in range(len(field_names))]
|
|
340
|
+
|
|
341
|
+
# Exclude conflict column from UPDATE
|
|
342
|
+
update_fields = [f for f in field_names if f != conflict_column]
|
|
343
|
+
update_set = ", ".join([f"{field} = EXCLUDED.{field}" for field in update_fields])
|
|
344
|
+
|
|
345
|
+
sql = f"""INSERT INTO {table_name} ({", ".join(field_names)})
|
|
346
|
+
VALUES ({", ".join(placeholders)})
|
|
347
|
+
ON CONFLICT ({conflict_column})
|
|
348
|
+
DO UPDATE SET {update_set};"""
|
|
349
|
+
|
|
350
|
+
return sql
|
rem/utils/user_id.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Utility functions for user ID generation and management.
|
|
3
|
+
|
|
4
|
+
Provides deterministic UUID generation from email addresses for consistent
|
|
5
|
+
user identification across the REM system.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import hashlib
|
|
9
|
+
import uuid
|
|
10
|
+
from typing import Union
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def email_to_user_id(email: str) -> str:
|
|
14
|
+
"""
|
|
15
|
+
Generate a deterministic UUID from an email address.
|
|
16
|
+
|
|
17
|
+
Uses UUID5 (SHA-1 based) with a REM-specific namespace to ensure:
|
|
18
|
+
- Same email always produces same UUID
|
|
19
|
+
- Different emails produce different UUIDs
|
|
20
|
+
- UUIDs are valid RFC 4122 format
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
email: Email address to convert
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
String representation of UUID (e.g., "550e8400-e29b-41d4-a716-446655440000")
|
|
27
|
+
|
|
28
|
+
Examples:
|
|
29
|
+
>>> email_to_user_id("alice@example.com")
|
|
30
|
+
'2c5ea4c0-4067-5fef-942d-0a20124e06d8'
|
|
31
|
+
>>> email_to_user_id("alice@example.com") # Same email -> same UUID
|
|
32
|
+
'2c5ea4c0-4067-5fef-942d-0a20124e06d8'
|
|
33
|
+
"""
|
|
34
|
+
# Use REM-specific namespace UUID (generated once)
|
|
35
|
+
# This ensures our UUIDs are unique to REM system
|
|
36
|
+
REM_NAMESPACE = uuid.UUID("6ba7b810-9dad-11d1-80b4-00c04fd430c8")
|
|
37
|
+
|
|
38
|
+
# Normalize email: lowercase and strip whitespace
|
|
39
|
+
normalized_email = email.lower().strip()
|
|
40
|
+
|
|
41
|
+
# Generate deterministic UUID5
|
|
42
|
+
user_uuid = uuid.uuid5(REM_NAMESPACE, normalized_email)
|
|
43
|
+
|
|
44
|
+
return str(user_uuid)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def user_id_to_uuid(user_id: Union[str, uuid.UUID]) -> uuid.UUID:
|
|
48
|
+
"""
|
|
49
|
+
Convert a user_id string to UUID object.
|
|
50
|
+
|
|
51
|
+
Handles both UUID strings and already-parsed UUID objects.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
user_id: User ID as string or UUID
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
UUID object
|
|
58
|
+
|
|
59
|
+
Raises:
|
|
60
|
+
ValueError: If user_id is not a valid UUID format
|
|
61
|
+
"""
|
|
62
|
+
if isinstance(user_id, uuid.UUID):
|
|
63
|
+
return user_id
|
|
64
|
+
return uuid.UUID(user_id)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def is_valid_uuid(value: str) -> bool:
|
|
68
|
+
"""
|
|
69
|
+
Check if a string is a valid UUID.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
value: String to check
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
True if valid UUID, False otherwise
|
|
76
|
+
"""
|
|
77
|
+
try:
|
|
78
|
+
uuid.UUID(value)
|
|
79
|
+
return True
|
|
80
|
+
except (ValueError, AttributeError, TypeError):
|
|
81
|
+
return False
|