remdb 0.3.242__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (235) hide show
  1. rem/__init__.py +129 -0
  2. rem/agentic/README.md +760 -0
  3. rem/agentic/__init__.py +54 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +38 -0
  6. rem/agentic/agents/agent_manager.py +311 -0
  7. rem/agentic/agents/sse_simulator.py +502 -0
  8. rem/agentic/context.py +425 -0
  9. rem/agentic/context_builder.py +360 -0
  10. rem/agentic/llm_provider_models.py +301 -0
  11. rem/agentic/mcp/__init__.py +0 -0
  12. rem/agentic/mcp/tool_wrapper.py +273 -0
  13. rem/agentic/otel/__init__.py +5 -0
  14. rem/agentic/otel/setup.py +240 -0
  15. rem/agentic/providers/phoenix.py +926 -0
  16. rem/agentic/providers/pydantic_ai.py +854 -0
  17. rem/agentic/query.py +117 -0
  18. rem/agentic/query_helper.py +89 -0
  19. rem/agentic/schema.py +737 -0
  20. rem/agentic/serialization.py +245 -0
  21. rem/agentic/tools/__init__.py +5 -0
  22. rem/agentic/tools/rem_tools.py +242 -0
  23. rem/api/README.md +657 -0
  24. rem/api/deps.py +253 -0
  25. rem/api/main.py +460 -0
  26. rem/api/mcp_router/prompts.py +182 -0
  27. rem/api/mcp_router/resources.py +820 -0
  28. rem/api/mcp_router/server.py +243 -0
  29. rem/api/mcp_router/tools.py +1605 -0
  30. rem/api/middleware/tracking.py +172 -0
  31. rem/api/routers/admin.py +520 -0
  32. rem/api/routers/auth.py +898 -0
  33. rem/api/routers/chat/__init__.py +5 -0
  34. rem/api/routers/chat/child_streaming.py +394 -0
  35. rem/api/routers/chat/completions.py +702 -0
  36. rem/api/routers/chat/json_utils.py +76 -0
  37. rem/api/routers/chat/models.py +202 -0
  38. rem/api/routers/chat/otel_utils.py +33 -0
  39. rem/api/routers/chat/sse_events.py +546 -0
  40. rem/api/routers/chat/streaming.py +950 -0
  41. rem/api/routers/chat/streaming_utils.py +327 -0
  42. rem/api/routers/common.py +18 -0
  43. rem/api/routers/dev.py +87 -0
  44. rem/api/routers/feedback.py +276 -0
  45. rem/api/routers/messages.py +620 -0
  46. rem/api/routers/models.py +86 -0
  47. rem/api/routers/query.py +362 -0
  48. rem/api/routers/shared_sessions.py +422 -0
  49. rem/auth/README.md +258 -0
  50. rem/auth/__init__.py +36 -0
  51. rem/auth/jwt.py +367 -0
  52. rem/auth/middleware.py +318 -0
  53. rem/auth/providers/__init__.py +16 -0
  54. rem/auth/providers/base.py +376 -0
  55. rem/auth/providers/email.py +215 -0
  56. rem/auth/providers/google.py +163 -0
  57. rem/auth/providers/microsoft.py +237 -0
  58. rem/cli/README.md +517 -0
  59. rem/cli/__init__.py +8 -0
  60. rem/cli/commands/README.md +299 -0
  61. rem/cli/commands/__init__.py +3 -0
  62. rem/cli/commands/ask.py +549 -0
  63. rem/cli/commands/cluster.py +1808 -0
  64. rem/cli/commands/configure.py +495 -0
  65. rem/cli/commands/db.py +828 -0
  66. rem/cli/commands/dreaming.py +324 -0
  67. rem/cli/commands/experiments.py +1698 -0
  68. rem/cli/commands/mcp.py +66 -0
  69. rem/cli/commands/process.py +388 -0
  70. rem/cli/commands/query.py +109 -0
  71. rem/cli/commands/scaffold.py +47 -0
  72. rem/cli/commands/schema.py +230 -0
  73. rem/cli/commands/serve.py +106 -0
  74. rem/cli/commands/session.py +453 -0
  75. rem/cli/dreaming.py +363 -0
  76. rem/cli/main.py +123 -0
  77. rem/config.py +244 -0
  78. rem/mcp_server.py +41 -0
  79. rem/models/core/__init__.py +49 -0
  80. rem/models/core/core_model.py +70 -0
  81. rem/models/core/engram.py +333 -0
  82. rem/models/core/experiment.py +672 -0
  83. rem/models/core/inline_edge.py +132 -0
  84. rem/models/core/rem_query.py +246 -0
  85. rem/models/entities/__init__.py +68 -0
  86. rem/models/entities/domain_resource.py +38 -0
  87. rem/models/entities/feedback.py +123 -0
  88. rem/models/entities/file.py +57 -0
  89. rem/models/entities/image_resource.py +88 -0
  90. rem/models/entities/message.py +64 -0
  91. rem/models/entities/moment.py +123 -0
  92. rem/models/entities/ontology.py +181 -0
  93. rem/models/entities/ontology_config.py +131 -0
  94. rem/models/entities/resource.py +95 -0
  95. rem/models/entities/schema.py +87 -0
  96. rem/models/entities/session.py +84 -0
  97. rem/models/entities/shared_session.py +180 -0
  98. rem/models/entities/subscriber.py +175 -0
  99. rem/models/entities/user.py +93 -0
  100. rem/py.typed +0 -0
  101. rem/registry.py +373 -0
  102. rem/schemas/README.md +507 -0
  103. rem/schemas/__init__.py +6 -0
  104. rem/schemas/agents/README.md +92 -0
  105. rem/schemas/agents/core/agent-builder.yaml +235 -0
  106. rem/schemas/agents/core/moment-builder.yaml +178 -0
  107. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  108. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  109. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  110. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  111. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  112. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  113. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  114. rem/schemas/agents/examples/hello-world.yaml +37 -0
  115. rem/schemas/agents/examples/query.yaml +54 -0
  116. rem/schemas/agents/examples/simple.yaml +21 -0
  117. rem/schemas/agents/examples/test.yaml +29 -0
  118. rem/schemas/agents/rem.yaml +132 -0
  119. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  120. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  121. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  122. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  123. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  124. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  125. rem/services/__init__.py +18 -0
  126. rem/services/audio/INTEGRATION.md +308 -0
  127. rem/services/audio/README.md +376 -0
  128. rem/services/audio/__init__.py +15 -0
  129. rem/services/audio/chunker.py +354 -0
  130. rem/services/audio/transcriber.py +259 -0
  131. rem/services/content/README.md +1269 -0
  132. rem/services/content/__init__.py +5 -0
  133. rem/services/content/providers.py +760 -0
  134. rem/services/content/service.py +762 -0
  135. rem/services/dreaming/README.md +230 -0
  136. rem/services/dreaming/__init__.py +53 -0
  137. rem/services/dreaming/affinity_service.py +322 -0
  138. rem/services/dreaming/moment_service.py +251 -0
  139. rem/services/dreaming/ontology_service.py +54 -0
  140. rem/services/dreaming/user_model_service.py +297 -0
  141. rem/services/dreaming/utils.py +39 -0
  142. rem/services/email/__init__.py +10 -0
  143. rem/services/email/service.py +522 -0
  144. rem/services/email/templates.py +360 -0
  145. rem/services/embeddings/__init__.py +11 -0
  146. rem/services/embeddings/api.py +127 -0
  147. rem/services/embeddings/worker.py +435 -0
  148. rem/services/fs/README.md +662 -0
  149. rem/services/fs/__init__.py +62 -0
  150. rem/services/fs/examples.py +206 -0
  151. rem/services/fs/examples_paths.py +204 -0
  152. rem/services/fs/git_provider.py +935 -0
  153. rem/services/fs/local_provider.py +760 -0
  154. rem/services/fs/parsing-hooks-examples.md +172 -0
  155. rem/services/fs/paths.py +276 -0
  156. rem/services/fs/provider.py +460 -0
  157. rem/services/fs/s3_provider.py +1042 -0
  158. rem/services/fs/service.py +186 -0
  159. rem/services/git/README.md +1075 -0
  160. rem/services/git/__init__.py +17 -0
  161. rem/services/git/service.py +469 -0
  162. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  163. rem/services/phoenix/README.md +453 -0
  164. rem/services/phoenix/__init__.py +46 -0
  165. rem/services/phoenix/client.py +960 -0
  166. rem/services/phoenix/config.py +88 -0
  167. rem/services/phoenix/prompt_labels.py +477 -0
  168. rem/services/postgres/README.md +757 -0
  169. rem/services/postgres/__init__.py +49 -0
  170. rem/services/postgres/diff_service.py +599 -0
  171. rem/services/postgres/migration_service.py +427 -0
  172. rem/services/postgres/programmable_diff_service.py +635 -0
  173. rem/services/postgres/pydantic_to_sqlalchemy.py +562 -0
  174. rem/services/postgres/register_type.py +353 -0
  175. rem/services/postgres/repository.py +481 -0
  176. rem/services/postgres/schema_generator.py +661 -0
  177. rem/services/postgres/service.py +802 -0
  178. rem/services/postgres/sql_builder.py +355 -0
  179. rem/services/rate_limit.py +113 -0
  180. rem/services/rem/README.md +318 -0
  181. rem/services/rem/__init__.py +23 -0
  182. rem/services/rem/exceptions.py +71 -0
  183. rem/services/rem/executor.py +293 -0
  184. rem/services/rem/parser.py +180 -0
  185. rem/services/rem/queries.py +196 -0
  186. rem/services/rem/query.py +371 -0
  187. rem/services/rem/service.py +608 -0
  188. rem/services/session/README.md +374 -0
  189. rem/services/session/__init__.py +13 -0
  190. rem/services/session/compression.py +488 -0
  191. rem/services/session/pydantic_messages.py +310 -0
  192. rem/services/session/reload.py +85 -0
  193. rem/services/user_service.py +130 -0
  194. rem/settings.py +1877 -0
  195. rem/sql/background_indexes.sql +52 -0
  196. rem/sql/migrations/001_install.sql +983 -0
  197. rem/sql/migrations/002_install_models.sql +3157 -0
  198. rem/sql/migrations/003_optional_extensions.sql +326 -0
  199. rem/sql/migrations/004_cache_system.sql +282 -0
  200. rem/sql/migrations/005_schema_update.sql +145 -0
  201. rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
  202. rem/utils/AGENTIC_CHUNKING.md +597 -0
  203. rem/utils/README.md +628 -0
  204. rem/utils/__init__.py +61 -0
  205. rem/utils/agentic_chunking.py +622 -0
  206. rem/utils/batch_ops.py +343 -0
  207. rem/utils/chunking.py +108 -0
  208. rem/utils/clip_embeddings.py +276 -0
  209. rem/utils/constants.py +97 -0
  210. rem/utils/date_utils.py +228 -0
  211. rem/utils/dict_utils.py +98 -0
  212. rem/utils/embeddings.py +436 -0
  213. rem/utils/examples/embeddings_example.py +305 -0
  214. rem/utils/examples/sql_types_example.py +202 -0
  215. rem/utils/files.py +323 -0
  216. rem/utils/markdown.py +16 -0
  217. rem/utils/mime_types.py +158 -0
  218. rem/utils/model_helpers.py +492 -0
  219. rem/utils/schema_loader.py +649 -0
  220. rem/utils/sql_paths.py +146 -0
  221. rem/utils/sql_types.py +350 -0
  222. rem/utils/user_id.py +81 -0
  223. rem/utils/vision.py +325 -0
  224. rem/workers/README.md +506 -0
  225. rem/workers/__init__.py +7 -0
  226. rem/workers/db_listener.py +579 -0
  227. rem/workers/db_maintainer.py +74 -0
  228. rem/workers/dreaming.py +502 -0
  229. rem/workers/engram_processor.py +312 -0
  230. rem/workers/sqs_file_processor.py +193 -0
  231. rem/workers/unlogged_maintainer.py +463 -0
  232. remdb-0.3.242.dist-info/METADATA +1632 -0
  233. remdb-0.3.242.dist-info/RECORD +235 -0
  234. remdb-0.3.242.dist-info/WHEEL +4 -0
  235. remdb-0.3.242.dist-info/entry_points.txt +2 -0
rem/utils/sql_paths.py ADDED
@@ -0,0 +1,146 @@
1
+ """Utilities for resolving SQL file paths.
2
+
3
+ Handles package SQL directory resolution and user migrations.
4
+
5
+ Convention for user migrations:
6
+ Place custom SQL files in `./sql/migrations/` relative to your project root.
7
+ Files should be numbered (e.g., `100_custom_table.sql`) to control execution order.
8
+ Package migrations (001-099) run first, then user migrations (100+).
9
+ """
10
+
11
+ from pathlib import Path
12
+ from typing import List, Optional
13
+ import importlib.resources
14
+
15
+ # Convention: Default location for user-maintained migrations
16
+ USER_SQL_DIR_CONVENTION = "sql"
17
+
18
+
19
+ def get_package_sql_dir() -> Path:
20
+ """Get the SQL directory from the installed rem package.
21
+
22
+ Returns:
23
+ Path to the package's sql directory
24
+
25
+ Raises:
26
+ FileNotFoundError: If the SQL directory cannot be found
27
+ """
28
+ try:
29
+ # Use importlib.resources for Python 3.9+
30
+ sql_ref = importlib.resources.files("rem") / "sql"
31
+ package_sql = Path(str(sql_ref))
32
+ if package_sql.exists():
33
+ return package_sql
34
+ except (AttributeError, TypeError):
35
+ pass
36
+
37
+ # Fallback: use __file__ to find package location
38
+ try:
39
+ import rem
40
+ package_sql = Path(rem.__file__).parent / "sql"
41
+ if package_sql.exists():
42
+ return package_sql
43
+ except (ImportError, AttributeError):
44
+ pass
45
+
46
+ # Development fallback: check relative to cwd
47
+ dev_sql = Path("src/rem/sql")
48
+ if dev_sql.exists():
49
+ return dev_sql
50
+
51
+ raise FileNotFoundError(
52
+ "Could not locate rem SQL directory. "
53
+ "Ensure remdb is properly installed or run from the source directory."
54
+ )
55
+
56
+
57
+ def get_package_migrations_dir() -> Path:
58
+ """Get the migrations directory from the installed rem package.
59
+
60
+ Returns:
61
+ Path to the package's migrations directory
62
+ """
63
+ return get_package_sql_dir() / "migrations"
64
+
65
+
66
+ def get_user_sql_dir() -> Optional[Path]:
67
+ """Get the conventional user SQL directory if it exists.
68
+
69
+ Looks for `./sql/` relative to the current working directory.
70
+ This follows the convention for user-maintained migrations.
71
+
72
+ Returns:
73
+ Path to user sql directory if it exists, None otherwise
74
+ """
75
+ user_sql = Path.cwd() / USER_SQL_DIR_CONVENTION
76
+ if user_sql.exists() and user_sql.is_dir():
77
+ return user_sql
78
+ return None
79
+
80
+
81
+ def list_package_migrations() -> List[Path]:
82
+ """List all migration files in the package.
83
+
84
+ Returns:
85
+ Sorted list of migration file paths
86
+ """
87
+ try:
88
+ migrations_dir = get_package_migrations_dir()
89
+ if migrations_dir.exists():
90
+ return sorted(
91
+ f for f in migrations_dir.glob("*.sql")
92
+ if f.name[0].isdigit() # Only numbered migrations
93
+ )
94
+ except FileNotFoundError:
95
+ pass
96
+
97
+ return []
98
+
99
+
100
+ def list_user_migrations() -> List[Path]:
101
+ """List all migration files in the user's sql/migrations directory.
102
+
103
+ Returns:
104
+ Sorted list of user migration file paths
105
+ """
106
+ user_sql = get_user_sql_dir()
107
+ if user_sql:
108
+ migrations_dir = user_sql / "migrations"
109
+ if migrations_dir.exists():
110
+ return sorted(
111
+ f for f in migrations_dir.glob("*.sql")
112
+ if f.name[0].isdigit() # Only numbered migrations
113
+ )
114
+ return []
115
+
116
+
117
+ def list_all_migrations() -> List[Path]:
118
+ """List all migration files from package and user directories.
119
+
120
+ Collects migrations from:
121
+ 1. Package migrations directory
122
+ 2. User directory (./sql/migrations/) if it exists
123
+
124
+ Files are sorted by name, so use numbered prefixes to control order:
125
+ - 001-099: Reserved for package migrations
126
+ - 100+: Recommended for user migrations
127
+
128
+ Returns:
129
+ Sorted list of all migration file paths (by filename)
130
+ """
131
+ all_migrations = []
132
+ seen_names = set()
133
+
134
+ # Package migrations first
135
+ for f in list_package_migrations():
136
+ if f.name not in seen_names:
137
+ all_migrations.append(f)
138
+ seen_names.add(f.name)
139
+
140
+ # User migrations second
141
+ for f in list_user_migrations():
142
+ if f.name not in seen_names:
143
+ all_migrations.append(f)
144
+ seen_names.add(f.name)
145
+
146
+ return sorted(all_migrations, key=lambda p: p.name)
rem/utils/sql_types.py ADDED
@@ -0,0 +1,350 @@
1
+ """
2
+ Pydantic to PostgreSQL Type Mapping Utility.
3
+
4
+ Maps Pydantic field types to PostgreSQL column types with intelligent defaults:
5
+ - Strings: VARCHAR(256) by default, TEXT for content/description fields
6
+ - Union types: Prefer UUID, JSONB over other types
7
+ - Lists of strings: TEXT[] (PostgreSQL arrays)
8
+ - Dicts and lists of dicts: JSONB
9
+ - Field metadata: Respect json_schema_extra for custom types and embeddings
10
+
11
+ Best Practices:
12
+ - VARCHAR(256) for most strings (indexes work well, prevents excessive data)
13
+ - TEXT for long-form content (descriptions, summaries, content fields)
14
+ - JSONB for structured data (better querying than JSON)
15
+ - Arrays for simple lists, JSONB for complex nested structures
16
+ - UUID for identifiers in Union types
17
+ """
18
+
19
+ import types
20
+ from datetime import date, datetime, time
21
+ from typing import Any, Union, get_args, get_origin
22
+ from uuid import UUID
23
+
24
+ from pydantic import BaseModel
25
+ from pydantic.fields import FieldInfo
26
+
27
+
28
+ # Field names that should use TEXT instead of VARCHAR
29
+ LONG_TEXT_FIELD_NAMES = {
30
+ "content",
31
+ "description",
32
+ "summary",
33
+ "instructions",
34
+ "prompt",
35
+ "message",
36
+ "body",
37
+ "text",
38
+ "note",
39
+ "comment",
40
+ }
41
+
42
+
43
+ def get_sql_type(field_info: FieldInfo, field_name: str) -> str:
44
+ """
45
+ Map Pydantic field to PostgreSQL type.
46
+
47
+ Args:
48
+ field_info: Pydantic FieldInfo object
49
+ field_name: Name of the field (used for heuristics)
50
+
51
+ Returns:
52
+ PostgreSQL type string (e.g., "VARCHAR(256)", "JSONB", "TEXT[]")
53
+
54
+ Examples:
55
+ >>> from pydantic import Field
56
+ >>> get_sql_type(Field(default="test"), "name")
57
+ 'VARCHAR(256)'
58
+ >>> get_sql_type(Field(default=""), "content")
59
+ 'TEXT'
60
+ >>> get_sql_type(Field(default_factory=dict), "metadata")
61
+ 'JSONB'
62
+ """
63
+ # Check for explicit sql_type in json_schema_extra
64
+ if field_info.json_schema_extra:
65
+ if isinstance(field_info.json_schema_extra, dict):
66
+ if "sql_type" in field_info.json_schema_extra:
67
+ return field_info.json_schema_extra["sql_type"]
68
+
69
+ # Fields with embedding_provider should be TEXT (for vector search preprocessing)
70
+ # Format: "openai:text-embedding-3-small" or "anthropic:voyage-2"
71
+ if "embedding_provider" in field_info.json_schema_extra:
72
+ return "TEXT"
73
+
74
+ # Get the annotation (type hint)
75
+ annotation = field_info.annotation
76
+
77
+ # Handle None annotation (shouldn't happen, but be safe)
78
+ if annotation is None:
79
+ return "TEXT"
80
+
81
+ # Handle Union types (including Optional[T] which is Union[T, None])
82
+ # Also handles Python 3.10+ `X | None` syntax which uses types.UnionType
83
+ origin = get_origin(annotation)
84
+ if origin is Union or isinstance(annotation, types.UnionType):
85
+ args = get_args(annotation)
86
+ # Filter out NoneType
87
+ non_none_args = [arg for arg in args if arg is not type(None)]
88
+
89
+ if not non_none_args:
90
+ return "TEXT"
91
+
92
+ # Prefer UUID over other types in unions
93
+ if UUID in non_none_args:
94
+ return "UUID"
95
+
96
+ # Prefer dict/JSONB over other types in unions
97
+ if dict in non_none_args:
98
+ return "JSONB"
99
+
100
+ # Use the first non-None type
101
+ return _map_simple_type(non_none_args[0], field_name)
102
+
103
+ # Handle simple types
104
+ return _map_simple_type(annotation, field_name)
105
+
106
+
107
+ def _map_simple_type(python_type: type, field_name: str) -> str:
108
+ """
109
+ Map a simple Python type to PostgreSQL type.
110
+
111
+ Args:
112
+ python_type: Python type annotation
113
+ field_name: Field name for heuristics
114
+
115
+ Returns:
116
+ PostgreSQL type string
117
+ """
118
+ # Check if it's a generic type (List, Dict, etc.)
119
+ origin = get_origin(python_type)
120
+ args = get_args(python_type)
121
+
122
+ # Handle list types
123
+ if origin is list:
124
+ if args:
125
+ inner_type = args[0]
126
+
127
+ # List of strings -> PostgreSQL array
128
+ if inner_type is str:
129
+ return "TEXT[]"
130
+
131
+ # List of dicts or other complex types -> JSONB
132
+ if inner_type is dict or get_origin(inner_type) is not None:
133
+ return "JSONB"
134
+
135
+ # List of primitives (int, float, bool) -> JSONB for simplicity
136
+ return "JSONB"
137
+
138
+ # Untyped list -> JSONB
139
+ return "JSONB"
140
+
141
+ # Handle dict types -> always JSONB
142
+ if origin is dict or python_type is dict:
143
+ return "JSONB"
144
+
145
+ # Handle primitive types
146
+ type_mapping = {
147
+ str: _get_string_type(field_name),
148
+ int: "INTEGER",
149
+ float: "DOUBLE PRECISION",
150
+ bool: "BOOLEAN",
151
+ UUID: "UUID",
152
+ datetime: "TIMESTAMP",
153
+ date: "DATE",
154
+ time: "TIME",
155
+ bytes: "BYTEA",
156
+ }
157
+
158
+ # Check direct type match
159
+ if python_type in type_mapping:
160
+ return type_mapping[python_type]
161
+
162
+ # Check if it's a Pydantic model -> JSONB
163
+ if isinstance(python_type, type) and issubclass(python_type, BaseModel):
164
+ return "JSONB"
165
+
166
+ # Default to TEXT for unknown types
167
+ return "TEXT"
168
+
169
+
170
+ def _get_string_type(field_name: str) -> str:
171
+ """
172
+ Determine string type based on field name.
173
+
174
+ Args:
175
+ field_name: Name of the field
176
+
177
+ Returns:
178
+ "TEXT" for long-form content, "VARCHAR(256)" for others
179
+ """
180
+ # Check if field name indicates long-form content
181
+ field_lower = field_name.lower()
182
+
183
+ if field_lower in LONG_TEXT_FIELD_NAMES:
184
+ return "TEXT"
185
+
186
+ # Check for common suffixes
187
+ if field_lower.endswith(("_content", "_description", "_summary", "_text", "_message")):
188
+ return "TEXT"
189
+
190
+ # Default to VARCHAR with reasonable length
191
+ return "VARCHAR(256)"
192
+
193
+
194
+ def get_column_definition(
195
+ field_info: FieldInfo,
196
+ field_name: str,
197
+ nullable: bool = True,
198
+ primary_key: bool = False,
199
+ ) -> str:
200
+ """
201
+ Generate complete PostgreSQL column definition.
202
+
203
+ Args:
204
+ field_info: Pydantic FieldInfo object
205
+ field_name: Name of the column
206
+ nullable: Whether column allows NULL
207
+ primary_key: Whether this is a primary key
208
+
209
+ Returns:
210
+ Complete column definition SQL
211
+
212
+ Examples:
213
+ >>> from pydantic import Field
214
+ >>> get_column_definition(Field(default=""), "name", nullable=False)
215
+ 'name VARCHAR(256) NOT NULL'
216
+ >>> get_column_definition(Field(default_factory=dict), "metadata")
217
+ 'metadata JSONB NOT NULL DEFAULT \\'{}\\'::jsonb'
218
+ """
219
+ sql_type = get_sql_type(field_info, field_name)
220
+
221
+ parts = [field_name, sql_type]
222
+
223
+ if primary_key:
224
+ parts.append("PRIMARY KEY")
225
+ elif not nullable:
226
+ parts.append("NOT NULL")
227
+
228
+ # Add defaults for JSONB and arrays
229
+ if field_info.default_factory is not None:
230
+ if sql_type == "JSONB":
231
+ parts.append("DEFAULT '{}'::jsonb")
232
+ elif sql_type.endswith("[]"):
233
+ parts.append("DEFAULT ARRAY[]::TEXT[]")
234
+
235
+ return " ".join(parts)
236
+
237
+
238
+ def model_to_create_table(
239
+ model: type[BaseModel],
240
+ table_name: str,
241
+ include_indexes: bool = True,
242
+ ) -> str:
243
+ """
244
+ Generate CREATE TABLE statement from Pydantic model.
245
+
246
+ Args:
247
+ model: Pydantic model class
248
+ table_name: Name of the table to create
249
+ include_indexes: Whether to include index creation statements
250
+
251
+ Returns:
252
+ SQL CREATE TABLE statement
253
+
254
+ Examples:
255
+ >>> from pydantic import BaseModel, Field
256
+ >>> class User(BaseModel):
257
+ ... id: str = Field(..., description="User ID")
258
+ ... name: str
259
+ ... metadata: dict = Field(default_factory=dict)
260
+ >>> sql = model_to_create_table(User, "users")
261
+ >>> "CREATE TABLE" in sql
262
+ True
263
+ """
264
+ columns = []
265
+ indexes = []
266
+
267
+ for field_name, field_info in model.model_fields.items():
268
+ # Determine if field is required (not nullable)
269
+ nullable = not field_info.is_required() or field_info.default is not None
270
+
271
+ # Check if this is the primary key (usually 'id')
272
+ is_pk = field_name == "id"
273
+
274
+ column_def = get_column_definition(field_info, field_name, nullable, is_pk)
275
+ columns.append(f" {column_def}")
276
+
277
+ # Generate indexes for common query patterns
278
+ if include_indexes and not is_pk:
279
+ sql_type = get_sql_type(field_info, field_name)
280
+
281
+ # Index for foreign keys and frequently queried fields
282
+ if field_name.endswith("_id") or field_name in {"tenant_id", "user_id", "session_id"}:
283
+ indexes.append(
284
+ f"CREATE INDEX IF NOT EXISTS idx_{table_name}_{field_name} "
285
+ f"ON {table_name}({field_name});"
286
+ )
287
+
288
+ # GIN indexes for JSONB and arrays
289
+ if sql_type == "JSONB":
290
+ indexes.append(
291
+ f"CREATE INDEX IF NOT EXISTS idx_{table_name}_{field_name} "
292
+ f"ON {table_name} USING GIN({field_name});"
293
+ )
294
+ elif sql_type.endswith("[]"):
295
+ indexes.append(
296
+ f"CREATE INDEX IF NOT EXISTS idx_{table_name}_{field_name} "
297
+ f"ON {table_name} USING GIN({field_name});"
298
+ )
299
+
300
+ # Build CREATE TABLE statement
301
+ create_table = f"CREATE TABLE IF NOT EXISTS {table_name} (\n"
302
+ create_table += ",\n".join(columns)
303
+ create_table += "\n);"
304
+
305
+ # Add indexes
306
+ if indexes:
307
+ create_table += "\n\n-- Indexes\n"
308
+ create_table += "\n".join(indexes)
309
+
310
+ return create_table
311
+
312
+
313
+ def model_to_upsert(
314
+ model: type[BaseModel],
315
+ table_name: str,
316
+ conflict_column: str = "id",
317
+ ) -> str:
318
+ """
319
+ Generate INSERT ... ON CONFLICT UPDATE (UPSERT) statement template.
320
+
321
+ Args:
322
+ model: Pydantic model class
323
+ table_name: Name of the table
324
+ conflict_column: Column to use for conflict detection (usually 'id')
325
+
326
+ Returns:
327
+ SQL UPSERT statement with placeholders
328
+
329
+ Examples:
330
+ >>> from pydantic import BaseModel
331
+ >>> class User(BaseModel):
332
+ ... id: str
333
+ ... name: str
334
+ >>> sql = model_to_upsert(User, "users")
335
+ >>> "ON CONFLICT" in sql
336
+ True
337
+ """
338
+ field_names = list(model.model_fields.keys())
339
+ placeholders = [f"${i+1}" for i in range(len(field_names))]
340
+
341
+ # Exclude conflict column from UPDATE
342
+ update_fields = [f for f in field_names if f != conflict_column]
343
+ update_set = ", ".join([f"{field} = EXCLUDED.{field}" for field in update_fields])
344
+
345
+ sql = f"""INSERT INTO {table_name} ({", ".join(field_names)})
346
+ VALUES ({", ".join(placeholders)})
347
+ ON CONFLICT ({conflict_column})
348
+ DO UPDATE SET {update_set};"""
349
+
350
+ return sql
rem/utils/user_id.py ADDED
@@ -0,0 +1,81 @@
1
+ """
2
+ Utility functions for user ID generation and management.
3
+
4
+ Provides deterministic UUID generation from email addresses for consistent
5
+ user identification across the REM system.
6
+ """
7
+
8
+ import hashlib
9
+ import uuid
10
+ from typing import Union
11
+
12
+
13
+ def email_to_user_id(email: str) -> str:
14
+ """
15
+ Generate a deterministic UUID from an email address.
16
+
17
+ Uses UUID5 (SHA-1 based) with a REM-specific namespace to ensure:
18
+ - Same email always produces same UUID
19
+ - Different emails produce different UUIDs
20
+ - UUIDs are valid RFC 4122 format
21
+
22
+ Args:
23
+ email: Email address to convert
24
+
25
+ Returns:
26
+ String representation of UUID (e.g., "550e8400-e29b-41d4-a716-446655440000")
27
+
28
+ Examples:
29
+ >>> email_to_user_id("alice@example.com")
30
+ '2c5ea4c0-4067-5fef-942d-0a20124e06d8'
31
+ >>> email_to_user_id("alice@example.com") # Same email -> same UUID
32
+ '2c5ea4c0-4067-5fef-942d-0a20124e06d8'
33
+ """
34
+ # Use REM-specific namespace UUID (generated once)
35
+ # This ensures our UUIDs are unique to REM system
36
+ REM_NAMESPACE = uuid.UUID("6ba7b810-9dad-11d1-80b4-00c04fd430c8")
37
+
38
+ # Normalize email: lowercase and strip whitespace
39
+ normalized_email = email.lower().strip()
40
+
41
+ # Generate deterministic UUID5
42
+ user_uuid = uuid.uuid5(REM_NAMESPACE, normalized_email)
43
+
44
+ return str(user_uuid)
45
+
46
+
47
+ def user_id_to_uuid(user_id: Union[str, uuid.UUID]) -> uuid.UUID:
48
+ """
49
+ Convert a user_id string to UUID object.
50
+
51
+ Handles both UUID strings and already-parsed UUID objects.
52
+
53
+ Args:
54
+ user_id: User ID as string or UUID
55
+
56
+ Returns:
57
+ UUID object
58
+
59
+ Raises:
60
+ ValueError: If user_id is not a valid UUID format
61
+ """
62
+ if isinstance(user_id, uuid.UUID):
63
+ return user_id
64
+ return uuid.UUID(user_id)
65
+
66
+
67
+ def is_valid_uuid(value: str) -> bool:
68
+ """
69
+ Check if a string is a valid UUID.
70
+
71
+ Args:
72
+ value: String to check
73
+
74
+ Returns:
75
+ True if valid UUID, False otherwise
76
+ """
77
+ try:
78
+ uuid.UUID(value)
79
+ return True
80
+ except (ValueError, AttributeError, TypeError):
81
+ return False