remdb 0.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +2 -0
- rem/agentic/README.md +650 -0
- rem/agentic/__init__.py +39 -0
- rem/agentic/agents/README.md +155 -0
- rem/agentic/agents/__init__.py +8 -0
- rem/agentic/context.py +148 -0
- rem/agentic/context_builder.py +329 -0
- rem/agentic/mcp/__init__.py +0 -0
- rem/agentic/mcp/tool_wrapper.py +107 -0
- rem/agentic/otel/__init__.py +5 -0
- rem/agentic/otel/setup.py +151 -0
- rem/agentic/providers/phoenix.py +674 -0
- rem/agentic/providers/pydantic_ai.py +572 -0
- rem/agentic/query.py +117 -0
- rem/agentic/query_helper.py +89 -0
- rem/agentic/schema.py +396 -0
- rem/agentic/serialization.py +245 -0
- rem/agentic/tools/__init__.py +5 -0
- rem/agentic/tools/rem_tools.py +231 -0
- rem/api/README.md +420 -0
- rem/api/main.py +324 -0
- rem/api/mcp_router/prompts.py +182 -0
- rem/api/mcp_router/resources.py +536 -0
- rem/api/mcp_router/server.py +213 -0
- rem/api/mcp_router/tools.py +584 -0
- rem/api/routers/auth.py +229 -0
- rem/api/routers/chat/__init__.py +5 -0
- rem/api/routers/chat/completions.py +281 -0
- rem/api/routers/chat/json_utils.py +76 -0
- rem/api/routers/chat/models.py +124 -0
- rem/api/routers/chat/streaming.py +185 -0
- rem/auth/README.md +258 -0
- rem/auth/__init__.py +26 -0
- rem/auth/middleware.py +100 -0
- rem/auth/providers/__init__.py +13 -0
- rem/auth/providers/base.py +376 -0
- rem/auth/providers/google.py +163 -0
- rem/auth/providers/microsoft.py +237 -0
- rem/cli/README.md +455 -0
- rem/cli/__init__.py +8 -0
- rem/cli/commands/README.md +126 -0
- rem/cli/commands/__init__.py +3 -0
- rem/cli/commands/ask.py +565 -0
- rem/cli/commands/configure.py +423 -0
- rem/cli/commands/db.py +493 -0
- rem/cli/commands/dreaming.py +324 -0
- rem/cli/commands/experiments.py +1124 -0
- rem/cli/commands/mcp.py +66 -0
- rem/cli/commands/process.py +245 -0
- rem/cli/commands/schema.py +183 -0
- rem/cli/commands/serve.py +106 -0
- rem/cli/dreaming.py +363 -0
- rem/cli/main.py +88 -0
- rem/config.py +237 -0
- rem/mcp_server.py +41 -0
- rem/models/core/__init__.py +49 -0
- rem/models/core/core_model.py +64 -0
- rem/models/core/engram.py +333 -0
- rem/models/core/experiment.py +628 -0
- rem/models/core/inline_edge.py +132 -0
- rem/models/core/rem_query.py +243 -0
- rem/models/entities/__init__.py +43 -0
- rem/models/entities/file.py +57 -0
- rem/models/entities/image_resource.py +88 -0
- rem/models/entities/message.py +35 -0
- rem/models/entities/moment.py +123 -0
- rem/models/entities/ontology.py +191 -0
- rem/models/entities/ontology_config.py +131 -0
- rem/models/entities/resource.py +95 -0
- rem/models/entities/schema.py +87 -0
- rem/models/entities/user.py +85 -0
- rem/py.typed +0 -0
- rem/schemas/README.md +507 -0
- rem/schemas/__init__.py +6 -0
- rem/schemas/agents/README.md +92 -0
- rem/schemas/agents/core/moment-builder.yaml +178 -0
- rem/schemas/agents/core/rem-query-agent.yaml +226 -0
- rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
- rem/schemas/agents/core/simple-assistant.yaml +19 -0
- rem/schemas/agents/core/user-profile-builder.yaml +163 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
- rem/schemas/agents/examples/contract-extractor.yaml +134 -0
- rem/schemas/agents/examples/cv-parser.yaml +263 -0
- rem/schemas/agents/examples/hello-world.yaml +37 -0
- rem/schemas/agents/examples/query.yaml +54 -0
- rem/schemas/agents/examples/simple.yaml +21 -0
- rem/schemas/agents/examples/test.yaml +29 -0
- rem/schemas/agents/rem.yaml +128 -0
- rem/schemas/evaluators/hello-world/default.yaml +77 -0
- rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
- rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
- rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
- rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
- rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
- rem/services/__init__.py +16 -0
- rem/services/audio/INTEGRATION.md +308 -0
- rem/services/audio/README.md +376 -0
- rem/services/audio/__init__.py +15 -0
- rem/services/audio/chunker.py +354 -0
- rem/services/audio/transcriber.py +259 -0
- rem/services/content/README.md +1269 -0
- rem/services/content/__init__.py +5 -0
- rem/services/content/providers.py +806 -0
- rem/services/content/service.py +657 -0
- rem/services/dreaming/README.md +230 -0
- rem/services/dreaming/__init__.py +53 -0
- rem/services/dreaming/affinity_service.py +336 -0
- rem/services/dreaming/moment_service.py +264 -0
- rem/services/dreaming/ontology_service.py +54 -0
- rem/services/dreaming/user_model_service.py +297 -0
- rem/services/dreaming/utils.py +39 -0
- rem/services/embeddings/__init__.py +11 -0
- rem/services/embeddings/api.py +120 -0
- rem/services/embeddings/worker.py +421 -0
- rem/services/fs/README.md +662 -0
- rem/services/fs/__init__.py +62 -0
- rem/services/fs/examples.py +206 -0
- rem/services/fs/examples_paths.py +204 -0
- rem/services/fs/git_provider.py +935 -0
- rem/services/fs/local_provider.py +760 -0
- rem/services/fs/parsing-hooks-examples.md +172 -0
- rem/services/fs/paths.py +276 -0
- rem/services/fs/provider.py +460 -0
- rem/services/fs/s3_provider.py +1042 -0
- rem/services/fs/service.py +186 -0
- rem/services/git/README.md +1075 -0
- rem/services/git/__init__.py +17 -0
- rem/services/git/service.py +469 -0
- rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
- rem/services/phoenix/README.md +453 -0
- rem/services/phoenix/__init__.py +46 -0
- rem/services/phoenix/client.py +686 -0
- rem/services/phoenix/config.py +88 -0
- rem/services/phoenix/prompt_labels.py +477 -0
- rem/services/postgres/README.md +575 -0
- rem/services/postgres/__init__.py +23 -0
- rem/services/postgres/migration_service.py +427 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
- rem/services/postgres/register_type.py +352 -0
- rem/services/postgres/repository.py +337 -0
- rem/services/postgres/schema_generator.py +379 -0
- rem/services/postgres/service.py +802 -0
- rem/services/postgres/sql_builder.py +354 -0
- rem/services/rem/README.md +304 -0
- rem/services/rem/__init__.py +23 -0
- rem/services/rem/exceptions.py +71 -0
- rem/services/rem/executor.py +293 -0
- rem/services/rem/parser.py +145 -0
- rem/services/rem/queries.py +196 -0
- rem/services/rem/query.py +371 -0
- rem/services/rem/service.py +527 -0
- rem/services/session/README.md +374 -0
- rem/services/session/__init__.py +6 -0
- rem/services/session/compression.py +360 -0
- rem/services/session/reload.py +77 -0
- rem/settings.py +1235 -0
- rem/sql/002_install_models.sql +1068 -0
- rem/sql/background_indexes.sql +42 -0
- rem/sql/install_models.sql +1038 -0
- rem/sql/migrations/001_install.sql +503 -0
- rem/sql/migrations/002_install_models.sql +1202 -0
- rem/utils/AGENTIC_CHUNKING.md +597 -0
- rem/utils/README.md +583 -0
- rem/utils/__init__.py +43 -0
- rem/utils/agentic_chunking.py +622 -0
- rem/utils/batch_ops.py +343 -0
- rem/utils/chunking.py +108 -0
- rem/utils/clip_embeddings.py +276 -0
- rem/utils/dict_utils.py +98 -0
- rem/utils/embeddings.py +423 -0
- rem/utils/examples/embeddings_example.py +305 -0
- rem/utils/examples/sql_types_example.py +202 -0
- rem/utils/markdown.py +16 -0
- rem/utils/model_helpers.py +236 -0
- rem/utils/schema_loader.py +229 -0
- rem/utils/sql_types.py +348 -0
- rem/utils/user_id.py +81 -0
- rem/utils/vision.py +330 -0
- rem/workers/README.md +506 -0
- rem/workers/__init__.py +5 -0
- rem/workers/dreaming.py +502 -0
- rem/workers/engram_processor.py +312 -0
- rem/workers/sqs_file_processor.py +193 -0
- remdb-0.2.6.dist-info/METADATA +1191 -0
- remdb-0.2.6.dist-info/RECORD +187 -0
- remdb-0.2.6.dist-info/WHEEL +4 -0
- remdb-0.2.6.dist-info/entry_points.txt +2 -0
rem/utils/sql_types.py
ADDED
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pydantic to PostgreSQL Type Mapping Utility.
|
|
3
|
+
|
|
4
|
+
Maps Pydantic field types to PostgreSQL column types with intelligent defaults:
|
|
5
|
+
- Strings: VARCHAR(256) by default, TEXT for content/description fields
|
|
6
|
+
- Union types: Prefer UUID, JSONB over other types
|
|
7
|
+
- Lists of strings: TEXT[] (PostgreSQL arrays)
|
|
8
|
+
- Dicts and lists of dicts: JSONB
|
|
9
|
+
- Field metadata: Respect json_schema_extra for custom types and embeddings
|
|
10
|
+
|
|
11
|
+
Best Practices:
|
|
12
|
+
- VARCHAR(256) for most strings (indexes work well, prevents excessive data)
|
|
13
|
+
- TEXT for long-form content (descriptions, summaries, content fields)
|
|
14
|
+
- JSONB for structured data (better querying than JSON)
|
|
15
|
+
- Arrays for simple lists, JSONB for complex nested structures
|
|
16
|
+
- UUID for identifiers in Union types
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from datetime import date, datetime, time
|
|
20
|
+
from typing import Any, Union, get_args, get_origin
|
|
21
|
+
from uuid import UUID
|
|
22
|
+
|
|
23
|
+
from pydantic import BaseModel
|
|
24
|
+
from pydantic.fields import FieldInfo
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# Field names that should use TEXT instead of VARCHAR
|
|
28
|
+
LONG_TEXT_FIELD_NAMES = {
|
|
29
|
+
"content",
|
|
30
|
+
"description",
|
|
31
|
+
"summary",
|
|
32
|
+
"instructions",
|
|
33
|
+
"prompt",
|
|
34
|
+
"message",
|
|
35
|
+
"body",
|
|
36
|
+
"text",
|
|
37
|
+
"note",
|
|
38
|
+
"comment",
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def get_sql_type(field_info: FieldInfo, field_name: str) -> str:
|
|
43
|
+
"""
|
|
44
|
+
Map Pydantic field to PostgreSQL type.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
field_info: Pydantic FieldInfo object
|
|
48
|
+
field_name: Name of the field (used for heuristics)
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
PostgreSQL type string (e.g., "VARCHAR(256)", "JSONB", "TEXT[]")
|
|
52
|
+
|
|
53
|
+
Examples:
|
|
54
|
+
>>> from pydantic import Field
|
|
55
|
+
>>> get_sql_type(Field(default="test"), "name")
|
|
56
|
+
'VARCHAR(256)'
|
|
57
|
+
>>> get_sql_type(Field(default=""), "content")
|
|
58
|
+
'TEXT'
|
|
59
|
+
>>> get_sql_type(Field(default_factory=dict), "metadata")
|
|
60
|
+
'JSONB'
|
|
61
|
+
"""
|
|
62
|
+
# Check for explicit sql_type in json_schema_extra
|
|
63
|
+
if field_info.json_schema_extra:
|
|
64
|
+
if isinstance(field_info.json_schema_extra, dict):
|
|
65
|
+
if "sql_type" in field_info.json_schema_extra:
|
|
66
|
+
return field_info.json_schema_extra["sql_type"]
|
|
67
|
+
|
|
68
|
+
# Fields with embedding_provider should be TEXT (for vector search preprocessing)
|
|
69
|
+
# Format: "openai:text-embedding-3-small" or "anthropic:voyage-2"
|
|
70
|
+
if "embedding_provider" in field_info.json_schema_extra:
|
|
71
|
+
return "TEXT"
|
|
72
|
+
|
|
73
|
+
# Get the annotation (type hint)
|
|
74
|
+
annotation = field_info.annotation
|
|
75
|
+
|
|
76
|
+
# Handle None annotation (shouldn't happen, but be safe)
|
|
77
|
+
if annotation is None:
|
|
78
|
+
return "TEXT"
|
|
79
|
+
|
|
80
|
+
# Handle Union types (including Optional[T] which is Union[T, None])
|
|
81
|
+
origin = get_origin(annotation)
|
|
82
|
+
if origin is Union:
|
|
83
|
+
args = get_args(annotation)
|
|
84
|
+
# Filter out NoneType
|
|
85
|
+
non_none_args = [arg for arg in args if arg is not type(None)]
|
|
86
|
+
|
|
87
|
+
if not non_none_args:
|
|
88
|
+
return "TEXT"
|
|
89
|
+
|
|
90
|
+
# Prefer UUID over other types in unions
|
|
91
|
+
if UUID in non_none_args:
|
|
92
|
+
return "UUID"
|
|
93
|
+
|
|
94
|
+
# Prefer dict/JSONB over other types in unions
|
|
95
|
+
if dict in non_none_args:
|
|
96
|
+
return "JSONB"
|
|
97
|
+
|
|
98
|
+
# Use the first non-None type
|
|
99
|
+
return _map_simple_type(non_none_args[0], field_name)
|
|
100
|
+
|
|
101
|
+
# Handle simple types
|
|
102
|
+
return _map_simple_type(annotation, field_name)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _map_simple_type(python_type: type, field_name: str) -> str:
|
|
106
|
+
"""
|
|
107
|
+
Map a simple Python type to PostgreSQL type.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
python_type: Python type annotation
|
|
111
|
+
field_name: Field name for heuristics
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
PostgreSQL type string
|
|
115
|
+
"""
|
|
116
|
+
# Check if it's a generic type (List, Dict, etc.)
|
|
117
|
+
origin = get_origin(python_type)
|
|
118
|
+
args = get_args(python_type)
|
|
119
|
+
|
|
120
|
+
# Handle list types
|
|
121
|
+
if origin is list:
|
|
122
|
+
if args:
|
|
123
|
+
inner_type = args[0]
|
|
124
|
+
|
|
125
|
+
# List of strings -> PostgreSQL array
|
|
126
|
+
if inner_type is str:
|
|
127
|
+
return "TEXT[]"
|
|
128
|
+
|
|
129
|
+
# List of dicts or other complex types -> JSONB
|
|
130
|
+
if inner_type is dict or get_origin(inner_type) is not None:
|
|
131
|
+
return "JSONB"
|
|
132
|
+
|
|
133
|
+
# List of primitives (int, float, bool) -> JSONB for simplicity
|
|
134
|
+
return "JSONB"
|
|
135
|
+
|
|
136
|
+
# Untyped list -> JSONB
|
|
137
|
+
return "JSONB"
|
|
138
|
+
|
|
139
|
+
# Handle dict types -> always JSONB
|
|
140
|
+
if origin is dict or python_type is dict:
|
|
141
|
+
return "JSONB"
|
|
142
|
+
|
|
143
|
+
# Handle primitive types
|
|
144
|
+
type_mapping = {
|
|
145
|
+
str: _get_string_type(field_name),
|
|
146
|
+
int: "INTEGER",
|
|
147
|
+
float: "DOUBLE PRECISION",
|
|
148
|
+
bool: "BOOLEAN",
|
|
149
|
+
UUID: "UUID",
|
|
150
|
+
datetime: "TIMESTAMP",
|
|
151
|
+
date: "DATE",
|
|
152
|
+
time: "TIME",
|
|
153
|
+
bytes: "BYTEA",
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
# Check direct type match
|
|
157
|
+
if python_type in type_mapping:
|
|
158
|
+
return type_mapping[python_type]
|
|
159
|
+
|
|
160
|
+
# Check if it's a Pydantic model -> JSONB
|
|
161
|
+
if isinstance(python_type, type) and issubclass(python_type, BaseModel):
|
|
162
|
+
return "JSONB"
|
|
163
|
+
|
|
164
|
+
# Default to TEXT for unknown types
|
|
165
|
+
return "TEXT"
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _get_string_type(field_name: str) -> str:
|
|
169
|
+
"""
|
|
170
|
+
Determine string type based on field name.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
field_name: Name of the field
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
"TEXT" for long-form content, "VARCHAR(256)" for others
|
|
177
|
+
"""
|
|
178
|
+
# Check if field name indicates long-form content
|
|
179
|
+
field_lower = field_name.lower()
|
|
180
|
+
|
|
181
|
+
if field_lower in LONG_TEXT_FIELD_NAMES:
|
|
182
|
+
return "TEXT"
|
|
183
|
+
|
|
184
|
+
# Check for common suffixes
|
|
185
|
+
if field_lower.endswith(("_content", "_description", "_summary", "_text", "_message")):
|
|
186
|
+
return "TEXT"
|
|
187
|
+
|
|
188
|
+
# Default to VARCHAR with reasonable length
|
|
189
|
+
return "VARCHAR(256)"
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def get_column_definition(
|
|
193
|
+
field_info: FieldInfo,
|
|
194
|
+
field_name: str,
|
|
195
|
+
nullable: bool = True,
|
|
196
|
+
primary_key: bool = False,
|
|
197
|
+
) -> str:
|
|
198
|
+
"""
|
|
199
|
+
Generate complete PostgreSQL column definition.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
field_info: Pydantic FieldInfo object
|
|
203
|
+
field_name: Name of the column
|
|
204
|
+
nullable: Whether column allows NULL
|
|
205
|
+
primary_key: Whether this is a primary key
|
|
206
|
+
|
|
207
|
+
Returns:
|
|
208
|
+
Complete column definition SQL
|
|
209
|
+
|
|
210
|
+
Examples:
|
|
211
|
+
>>> from pydantic import Field
|
|
212
|
+
>>> get_column_definition(Field(default=""), "name", nullable=False)
|
|
213
|
+
'name VARCHAR(256) NOT NULL'
|
|
214
|
+
>>> get_column_definition(Field(default_factory=dict), "metadata")
|
|
215
|
+
'metadata JSONB NOT NULL DEFAULT \\'{}\\'::jsonb'
|
|
216
|
+
"""
|
|
217
|
+
sql_type = get_sql_type(field_info, field_name)
|
|
218
|
+
|
|
219
|
+
parts = [field_name, sql_type]
|
|
220
|
+
|
|
221
|
+
if primary_key:
|
|
222
|
+
parts.append("PRIMARY KEY")
|
|
223
|
+
elif not nullable:
|
|
224
|
+
parts.append("NOT NULL")
|
|
225
|
+
|
|
226
|
+
# Add defaults for JSONB and arrays
|
|
227
|
+
if field_info.default_factory is not None:
|
|
228
|
+
if sql_type == "JSONB":
|
|
229
|
+
parts.append("DEFAULT '{}'::jsonb")
|
|
230
|
+
elif sql_type.endswith("[]"):
|
|
231
|
+
parts.append("DEFAULT ARRAY[]::TEXT[]")
|
|
232
|
+
|
|
233
|
+
return " ".join(parts)
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def model_to_create_table(
|
|
237
|
+
model: type[BaseModel],
|
|
238
|
+
table_name: str,
|
|
239
|
+
include_indexes: bool = True,
|
|
240
|
+
) -> str:
|
|
241
|
+
"""
|
|
242
|
+
Generate CREATE TABLE statement from Pydantic model.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
model: Pydantic model class
|
|
246
|
+
table_name: Name of the table to create
|
|
247
|
+
include_indexes: Whether to include index creation statements
|
|
248
|
+
|
|
249
|
+
Returns:
|
|
250
|
+
SQL CREATE TABLE statement
|
|
251
|
+
|
|
252
|
+
Examples:
|
|
253
|
+
>>> from pydantic import BaseModel, Field
|
|
254
|
+
>>> class User(BaseModel):
|
|
255
|
+
... id: str = Field(..., description="User ID")
|
|
256
|
+
... name: str
|
|
257
|
+
... metadata: dict = Field(default_factory=dict)
|
|
258
|
+
>>> sql = model_to_create_table(User, "users")
|
|
259
|
+
>>> "CREATE TABLE" in sql
|
|
260
|
+
True
|
|
261
|
+
"""
|
|
262
|
+
columns = []
|
|
263
|
+
indexes = []
|
|
264
|
+
|
|
265
|
+
for field_name, field_info in model.model_fields.items():
|
|
266
|
+
# Determine if field is required (not nullable)
|
|
267
|
+
nullable = not field_info.is_required() or field_info.default is not None
|
|
268
|
+
|
|
269
|
+
# Check if this is the primary key (usually 'id')
|
|
270
|
+
is_pk = field_name == "id"
|
|
271
|
+
|
|
272
|
+
column_def = get_column_definition(field_info, field_name, nullable, is_pk)
|
|
273
|
+
columns.append(f" {column_def}")
|
|
274
|
+
|
|
275
|
+
# Generate indexes for common query patterns
|
|
276
|
+
if include_indexes and not is_pk:
|
|
277
|
+
sql_type = get_sql_type(field_info, field_name)
|
|
278
|
+
|
|
279
|
+
# Index for foreign keys and frequently queried fields
|
|
280
|
+
if field_name.endswith("_id") or field_name in {"tenant_id", "user_id", "session_id"}:
|
|
281
|
+
indexes.append(
|
|
282
|
+
f"CREATE INDEX IF NOT EXISTS idx_{table_name}_{field_name} "
|
|
283
|
+
f"ON {table_name}({field_name});"
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
# GIN indexes for JSONB and arrays
|
|
287
|
+
if sql_type == "JSONB":
|
|
288
|
+
indexes.append(
|
|
289
|
+
f"CREATE INDEX IF NOT EXISTS idx_{table_name}_{field_name} "
|
|
290
|
+
f"ON {table_name} USING GIN({field_name});"
|
|
291
|
+
)
|
|
292
|
+
elif sql_type.endswith("[]"):
|
|
293
|
+
indexes.append(
|
|
294
|
+
f"CREATE INDEX IF NOT EXISTS idx_{table_name}_{field_name} "
|
|
295
|
+
f"ON {table_name} USING GIN({field_name});"
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
# Build CREATE TABLE statement
|
|
299
|
+
create_table = f"CREATE TABLE IF NOT EXISTS {table_name} (\n"
|
|
300
|
+
create_table += ",\n".join(columns)
|
|
301
|
+
create_table += "\n);"
|
|
302
|
+
|
|
303
|
+
# Add indexes
|
|
304
|
+
if indexes:
|
|
305
|
+
create_table += "\n\n-- Indexes\n"
|
|
306
|
+
create_table += "\n".join(indexes)
|
|
307
|
+
|
|
308
|
+
return create_table
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def model_to_upsert(
|
|
312
|
+
model: type[BaseModel],
|
|
313
|
+
table_name: str,
|
|
314
|
+
conflict_column: str = "id",
|
|
315
|
+
) -> str:
|
|
316
|
+
"""
|
|
317
|
+
Generate INSERT ... ON CONFLICT UPDATE (UPSERT) statement template.
|
|
318
|
+
|
|
319
|
+
Args:
|
|
320
|
+
model: Pydantic model class
|
|
321
|
+
table_name: Name of the table
|
|
322
|
+
conflict_column: Column to use for conflict detection (usually 'id')
|
|
323
|
+
|
|
324
|
+
Returns:
|
|
325
|
+
SQL UPSERT statement with placeholders
|
|
326
|
+
|
|
327
|
+
Examples:
|
|
328
|
+
>>> from pydantic import BaseModel
|
|
329
|
+
>>> class User(BaseModel):
|
|
330
|
+
... id: str
|
|
331
|
+
... name: str
|
|
332
|
+
>>> sql = model_to_upsert(User, "users")
|
|
333
|
+
>>> "ON CONFLICT" in sql
|
|
334
|
+
True
|
|
335
|
+
"""
|
|
336
|
+
field_names = list(model.model_fields.keys())
|
|
337
|
+
placeholders = [f"${i+1}" for i in range(len(field_names))]
|
|
338
|
+
|
|
339
|
+
# Exclude conflict column from UPDATE
|
|
340
|
+
update_fields = [f for f in field_names if f != conflict_column]
|
|
341
|
+
update_set = ", ".join([f"{field} = EXCLUDED.{field}" for field in update_fields])
|
|
342
|
+
|
|
343
|
+
sql = f"""INSERT INTO {table_name} ({", ".join(field_names)})
|
|
344
|
+
VALUES ({", ".join(placeholders)})
|
|
345
|
+
ON CONFLICT ({conflict_column})
|
|
346
|
+
DO UPDATE SET {update_set};"""
|
|
347
|
+
|
|
348
|
+
return sql
|
rem/utils/user_id.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Utility functions for user ID generation and management.
|
|
3
|
+
|
|
4
|
+
Provides deterministic UUID generation from email addresses for consistent
|
|
5
|
+
user identification across the REM system.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import hashlib
|
|
9
|
+
import uuid
|
|
10
|
+
from typing import Union
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def email_to_user_id(email: str) -> str:
|
|
14
|
+
"""
|
|
15
|
+
Generate a deterministic UUID from an email address.
|
|
16
|
+
|
|
17
|
+
Uses UUID5 (SHA-1 based) with a REM-specific namespace to ensure:
|
|
18
|
+
- Same email always produces same UUID
|
|
19
|
+
- Different emails produce different UUIDs
|
|
20
|
+
- UUIDs are valid RFC 4122 format
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
email: Email address to convert
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
String representation of UUID (e.g., "550e8400-e29b-41d4-a716-446655440000")
|
|
27
|
+
|
|
28
|
+
Examples:
|
|
29
|
+
>>> email_to_user_id("alice@example.com")
|
|
30
|
+
'2c5ea4c0-4067-5fef-942d-0a20124e06d8'
|
|
31
|
+
>>> email_to_user_id("alice@example.com") # Same email -> same UUID
|
|
32
|
+
'2c5ea4c0-4067-5fef-942d-0a20124e06d8'
|
|
33
|
+
"""
|
|
34
|
+
# Use REM-specific namespace UUID (generated once)
|
|
35
|
+
# This ensures our UUIDs are unique to REM system
|
|
36
|
+
REM_NAMESPACE = uuid.UUID("6ba7b810-9dad-11d1-80b4-00c04fd430c8")
|
|
37
|
+
|
|
38
|
+
# Normalize email: lowercase and strip whitespace
|
|
39
|
+
normalized_email = email.lower().strip()
|
|
40
|
+
|
|
41
|
+
# Generate deterministic UUID5
|
|
42
|
+
user_uuid = uuid.uuid5(REM_NAMESPACE, normalized_email)
|
|
43
|
+
|
|
44
|
+
return str(user_uuid)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def user_id_to_uuid(user_id: Union[str, uuid.UUID]) -> uuid.UUID:
|
|
48
|
+
"""
|
|
49
|
+
Convert a user_id string to UUID object.
|
|
50
|
+
|
|
51
|
+
Handles both UUID strings and already-parsed UUID objects.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
user_id: User ID as string or UUID
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
UUID object
|
|
58
|
+
|
|
59
|
+
Raises:
|
|
60
|
+
ValueError: If user_id is not a valid UUID format
|
|
61
|
+
"""
|
|
62
|
+
if isinstance(user_id, uuid.UUID):
|
|
63
|
+
return user_id
|
|
64
|
+
return uuid.UUID(user_id)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def is_valid_uuid(value: str) -> bool:
|
|
68
|
+
"""
|
|
69
|
+
Check if a string is a valid UUID.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
value: String to check
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
True if valid UUID, False otherwise
|
|
76
|
+
"""
|
|
77
|
+
try:
|
|
78
|
+
uuid.UUID(value)
|
|
79
|
+
return True
|
|
80
|
+
except (ValueError, AttributeError, TypeError):
|
|
81
|
+
return False
|