remdb 0.3.242__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +129 -0
- rem/agentic/README.md +760 -0
- rem/agentic/__init__.py +54 -0
- rem/agentic/agents/README.md +155 -0
- rem/agentic/agents/__init__.py +38 -0
- rem/agentic/agents/agent_manager.py +311 -0
- rem/agentic/agents/sse_simulator.py +502 -0
- rem/agentic/context.py +425 -0
- rem/agentic/context_builder.py +360 -0
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/mcp/__init__.py +0 -0
- rem/agentic/mcp/tool_wrapper.py +273 -0
- rem/agentic/otel/__init__.py +5 -0
- rem/agentic/otel/setup.py +240 -0
- rem/agentic/providers/phoenix.py +926 -0
- rem/agentic/providers/pydantic_ai.py +854 -0
- rem/agentic/query.py +117 -0
- rem/agentic/query_helper.py +89 -0
- rem/agentic/schema.py +737 -0
- rem/agentic/serialization.py +245 -0
- rem/agentic/tools/__init__.py +5 -0
- rem/agentic/tools/rem_tools.py +242 -0
- rem/api/README.md +657 -0
- rem/api/deps.py +253 -0
- rem/api/main.py +460 -0
- rem/api/mcp_router/prompts.py +182 -0
- rem/api/mcp_router/resources.py +820 -0
- rem/api/mcp_router/server.py +243 -0
- rem/api/mcp_router/tools.py +1605 -0
- rem/api/middleware/tracking.py +172 -0
- rem/api/routers/admin.py +520 -0
- rem/api/routers/auth.py +898 -0
- rem/api/routers/chat/__init__.py +5 -0
- rem/api/routers/chat/child_streaming.py +394 -0
- rem/api/routers/chat/completions.py +702 -0
- rem/api/routers/chat/json_utils.py +76 -0
- rem/api/routers/chat/models.py +202 -0
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +546 -0
- rem/api/routers/chat/streaming.py +950 -0
- rem/api/routers/chat/streaming_utils.py +327 -0
- rem/api/routers/common.py +18 -0
- rem/api/routers/dev.py +87 -0
- rem/api/routers/feedback.py +276 -0
- rem/api/routers/messages.py +620 -0
- rem/api/routers/models.py +86 -0
- rem/api/routers/query.py +362 -0
- rem/api/routers/shared_sessions.py +422 -0
- rem/auth/README.md +258 -0
- rem/auth/__init__.py +36 -0
- rem/auth/jwt.py +367 -0
- rem/auth/middleware.py +318 -0
- rem/auth/providers/__init__.py +16 -0
- rem/auth/providers/base.py +376 -0
- rem/auth/providers/email.py +215 -0
- rem/auth/providers/google.py +163 -0
- rem/auth/providers/microsoft.py +237 -0
- rem/cli/README.md +517 -0
- rem/cli/__init__.py +8 -0
- rem/cli/commands/README.md +299 -0
- rem/cli/commands/__init__.py +3 -0
- rem/cli/commands/ask.py +549 -0
- rem/cli/commands/cluster.py +1808 -0
- rem/cli/commands/configure.py +495 -0
- rem/cli/commands/db.py +828 -0
- rem/cli/commands/dreaming.py +324 -0
- rem/cli/commands/experiments.py +1698 -0
- rem/cli/commands/mcp.py +66 -0
- rem/cli/commands/process.py +388 -0
- rem/cli/commands/query.py +109 -0
- rem/cli/commands/scaffold.py +47 -0
- rem/cli/commands/schema.py +230 -0
- rem/cli/commands/serve.py +106 -0
- rem/cli/commands/session.py +453 -0
- rem/cli/dreaming.py +363 -0
- rem/cli/main.py +123 -0
- rem/config.py +244 -0
- rem/mcp_server.py +41 -0
- rem/models/core/__init__.py +49 -0
- rem/models/core/core_model.py +70 -0
- rem/models/core/engram.py +333 -0
- rem/models/core/experiment.py +672 -0
- rem/models/core/inline_edge.py +132 -0
- rem/models/core/rem_query.py +246 -0
- rem/models/entities/__init__.py +68 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/file.py +57 -0
- rem/models/entities/image_resource.py +88 -0
- rem/models/entities/message.py +64 -0
- rem/models/entities/moment.py +123 -0
- rem/models/entities/ontology.py +181 -0
- rem/models/entities/ontology_config.py +131 -0
- rem/models/entities/resource.py +95 -0
- rem/models/entities/schema.py +87 -0
- rem/models/entities/session.py +84 -0
- rem/models/entities/shared_session.py +180 -0
- rem/models/entities/subscriber.py +175 -0
- rem/models/entities/user.py +93 -0
- rem/py.typed +0 -0
- rem/registry.py +373 -0
- rem/schemas/README.md +507 -0
- rem/schemas/__init__.py +6 -0
- rem/schemas/agents/README.md +92 -0
- rem/schemas/agents/core/agent-builder.yaml +235 -0
- rem/schemas/agents/core/moment-builder.yaml +178 -0
- rem/schemas/agents/core/rem-query-agent.yaml +226 -0
- rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
- rem/schemas/agents/core/simple-assistant.yaml +19 -0
- rem/schemas/agents/core/user-profile-builder.yaml +163 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
- rem/schemas/agents/examples/contract-extractor.yaml +134 -0
- rem/schemas/agents/examples/cv-parser.yaml +263 -0
- rem/schemas/agents/examples/hello-world.yaml +37 -0
- rem/schemas/agents/examples/query.yaml +54 -0
- rem/schemas/agents/examples/simple.yaml +21 -0
- rem/schemas/agents/examples/test.yaml +29 -0
- rem/schemas/agents/rem.yaml +132 -0
- rem/schemas/evaluators/hello-world/default.yaml +77 -0
- rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
- rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
- rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
- rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
- rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
- rem/services/__init__.py +18 -0
- rem/services/audio/INTEGRATION.md +308 -0
- rem/services/audio/README.md +376 -0
- rem/services/audio/__init__.py +15 -0
- rem/services/audio/chunker.py +354 -0
- rem/services/audio/transcriber.py +259 -0
- rem/services/content/README.md +1269 -0
- rem/services/content/__init__.py +5 -0
- rem/services/content/providers.py +760 -0
- rem/services/content/service.py +762 -0
- rem/services/dreaming/README.md +230 -0
- rem/services/dreaming/__init__.py +53 -0
- rem/services/dreaming/affinity_service.py +322 -0
- rem/services/dreaming/moment_service.py +251 -0
- rem/services/dreaming/ontology_service.py +54 -0
- rem/services/dreaming/user_model_service.py +297 -0
- rem/services/dreaming/utils.py +39 -0
- rem/services/email/__init__.py +10 -0
- rem/services/email/service.py +522 -0
- rem/services/email/templates.py +360 -0
- rem/services/embeddings/__init__.py +11 -0
- rem/services/embeddings/api.py +127 -0
- rem/services/embeddings/worker.py +435 -0
- rem/services/fs/README.md +662 -0
- rem/services/fs/__init__.py +62 -0
- rem/services/fs/examples.py +206 -0
- rem/services/fs/examples_paths.py +204 -0
- rem/services/fs/git_provider.py +935 -0
- rem/services/fs/local_provider.py +760 -0
- rem/services/fs/parsing-hooks-examples.md +172 -0
- rem/services/fs/paths.py +276 -0
- rem/services/fs/provider.py +460 -0
- rem/services/fs/s3_provider.py +1042 -0
- rem/services/fs/service.py +186 -0
- rem/services/git/README.md +1075 -0
- rem/services/git/__init__.py +17 -0
- rem/services/git/service.py +469 -0
- rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
- rem/services/phoenix/README.md +453 -0
- rem/services/phoenix/__init__.py +46 -0
- rem/services/phoenix/client.py +960 -0
- rem/services/phoenix/config.py +88 -0
- rem/services/phoenix/prompt_labels.py +477 -0
- rem/services/postgres/README.md +757 -0
- rem/services/postgres/__init__.py +49 -0
- rem/services/postgres/diff_service.py +599 -0
- rem/services/postgres/migration_service.py +427 -0
- rem/services/postgres/programmable_diff_service.py +635 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +562 -0
- rem/services/postgres/register_type.py +353 -0
- rem/services/postgres/repository.py +481 -0
- rem/services/postgres/schema_generator.py +661 -0
- rem/services/postgres/service.py +802 -0
- rem/services/postgres/sql_builder.py +355 -0
- rem/services/rate_limit.py +113 -0
- rem/services/rem/README.md +318 -0
- rem/services/rem/__init__.py +23 -0
- rem/services/rem/exceptions.py +71 -0
- rem/services/rem/executor.py +293 -0
- rem/services/rem/parser.py +180 -0
- rem/services/rem/queries.py +196 -0
- rem/services/rem/query.py +371 -0
- rem/services/rem/service.py +608 -0
- rem/services/session/README.md +374 -0
- rem/services/session/__init__.py +13 -0
- rem/services/session/compression.py +488 -0
- rem/services/session/pydantic_messages.py +310 -0
- rem/services/session/reload.py +85 -0
- rem/services/user_service.py +130 -0
- rem/settings.py +1877 -0
- rem/sql/background_indexes.sql +52 -0
- rem/sql/migrations/001_install.sql +983 -0
- rem/sql/migrations/002_install_models.sql +3157 -0
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +282 -0
- rem/sql/migrations/005_schema_update.sql +145 -0
- rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
- rem/utils/AGENTIC_CHUNKING.md +597 -0
- rem/utils/README.md +628 -0
- rem/utils/__init__.py +61 -0
- rem/utils/agentic_chunking.py +622 -0
- rem/utils/batch_ops.py +343 -0
- rem/utils/chunking.py +108 -0
- rem/utils/clip_embeddings.py +276 -0
- rem/utils/constants.py +97 -0
- rem/utils/date_utils.py +228 -0
- rem/utils/dict_utils.py +98 -0
- rem/utils/embeddings.py +436 -0
- rem/utils/examples/embeddings_example.py +305 -0
- rem/utils/examples/sql_types_example.py +202 -0
- rem/utils/files.py +323 -0
- rem/utils/markdown.py +16 -0
- rem/utils/mime_types.py +158 -0
- rem/utils/model_helpers.py +492 -0
- rem/utils/schema_loader.py +649 -0
- rem/utils/sql_paths.py +146 -0
- rem/utils/sql_types.py +350 -0
- rem/utils/user_id.py +81 -0
- rem/utils/vision.py +325 -0
- rem/workers/README.md +506 -0
- rem/workers/__init__.py +7 -0
- rem/workers/db_listener.py +579 -0
- rem/workers/db_maintainer.py +74 -0
- rem/workers/dreaming.py +502 -0
- rem/workers/engram_processor.py +312 -0
- rem/workers/sqs_file_processor.py +193 -0
- rem/workers/unlogged_maintainer.py +463 -0
- remdb-0.3.242.dist-info/METADATA +1632 -0
- remdb-0.3.242.dist-info/RECORD +235 -0
- remdb-0.3.242.dist-info/WHEEL +4 -0
- remdb-0.3.242.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,355 @@
|
|
|
1
|
+
"""SQL query builder for Pydantic models.
|
|
2
|
+
|
|
3
|
+
Generates INSERT, UPDATE, SELECT queries from Pydantic model instances.
|
|
4
|
+
Handles serialization and parameter binding automatically.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import hashlib
|
|
8
|
+
import json
|
|
9
|
+
import uuid
|
|
10
|
+
from typing import Any, Type
|
|
11
|
+
|
|
12
|
+
from pydantic import BaseModel
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def get_natural_key(model: BaseModel) -> str | None:
|
|
16
|
+
"""
|
|
17
|
+
Get natural key from model following precedence: uri -> key -> name.
|
|
18
|
+
|
|
19
|
+
Used for generating deterministic IDs from business keys.
|
|
20
|
+
Does NOT include 'id' since that's what we're trying to generate.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
model: Pydantic model instance
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
Natural key string or None
|
|
27
|
+
"""
|
|
28
|
+
for field in ["uri", "key", "name"]:
|
|
29
|
+
if hasattr(model, field):
|
|
30
|
+
value = getattr(model, field)
|
|
31
|
+
if value:
|
|
32
|
+
return str(value)
|
|
33
|
+
return None
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def get_entity_key(model: BaseModel) -> str:
|
|
37
|
+
"""
|
|
38
|
+
Get entity key for KV store following precedence: name -> key -> uri -> id.
|
|
39
|
+
|
|
40
|
+
For KV store lookups, we prefer human-readable identifiers first (name/key),
|
|
41
|
+
then URIs, with id as the fallback. This allows users to lookup entities
|
|
42
|
+
by their natural names like "panic-disorder" instead of UUIDs.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
model: Pydantic model instance
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
Entity key string (guaranteed to exist)
|
|
49
|
+
"""
|
|
50
|
+
for field in ["name", "key", "uri", "id"]:
|
|
51
|
+
if hasattr(model, field):
|
|
52
|
+
value = getattr(model, field)
|
|
53
|
+
if value:
|
|
54
|
+
return str(value)
|
|
55
|
+
# Should never reach here since id always exists in CoreModel
|
|
56
|
+
raise ValueError(f"Model {type(model)} has no name, key, uri, or id field")
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def generate_deterministic_id(user_id: str | None, entity_key: str) -> uuid.UUID:
|
|
60
|
+
"""
|
|
61
|
+
Generate deterministic UUID from hash of (user_id, entity_key).
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
user_id: User identifier (optional)
|
|
65
|
+
entity_key: Entity key field value
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
Deterministic UUID
|
|
69
|
+
"""
|
|
70
|
+
# Combine user_id and key for hashing
|
|
71
|
+
combined = f"{user_id or 'system'}:{entity_key}"
|
|
72
|
+
hash_bytes = hashlib.sha256(combined.encode()).digest()
|
|
73
|
+
# Use first 16 bytes for UUID
|
|
74
|
+
return uuid.UUID(bytes=hash_bytes[:16])
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def model_to_dict(model: BaseModel, exclude_none: bool = True) -> dict[str, Any]:
|
|
78
|
+
"""
|
|
79
|
+
Convert Pydantic model to dict suitable for SQL insertion.
|
|
80
|
+
|
|
81
|
+
Generates deterministic ID if not present, based on hash(user_id, key).
|
|
82
|
+
Serializes JSONB fields (list[dict], dict) to JSON strings for asyncpg.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
model: Pydantic model instance
|
|
86
|
+
exclude_none: Exclude None values (default: True)
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
Dict of field_name -> value with JSONB fields as JSON strings
|
|
90
|
+
"""
|
|
91
|
+
# Use python mode to preserve datetime objects
|
|
92
|
+
data = model.model_dump(exclude_none=exclude_none, mode="python")
|
|
93
|
+
|
|
94
|
+
# Generate deterministic ID if not present
|
|
95
|
+
if not data.get("id"):
|
|
96
|
+
natural_key = get_natural_key(model)
|
|
97
|
+
if natural_key:
|
|
98
|
+
user_id = data.get("user_id")
|
|
99
|
+
data["id"] = generate_deterministic_id(user_id, natural_key)
|
|
100
|
+
else:
|
|
101
|
+
# Fallback to random UUID if no natural key (uri/key/name)
|
|
102
|
+
data["id"] = uuid.uuid4()
|
|
103
|
+
|
|
104
|
+
# Note: JSONB conversion is handled by asyncpg codec (set_type_codec in PostgresService)
|
|
105
|
+
# No need to manually convert dicts/lists to JSON strings
|
|
106
|
+
|
|
107
|
+
return data
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def build_insert(
|
|
111
|
+
model: BaseModel, table_name: str, return_id: bool = True
|
|
112
|
+
) -> tuple[str, list[Any]]:
|
|
113
|
+
"""
|
|
114
|
+
Build INSERT query from Pydantic model.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
model: Pydantic model instance
|
|
118
|
+
table_name: Target table name
|
|
119
|
+
return_id: Return the inserted ID (default: True)
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
Tuple of (sql_query, parameters)
|
|
123
|
+
|
|
124
|
+
Example:
|
|
125
|
+
sql, params = build_insert(message, "messages")
|
|
126
|
+
# INSERT INTO messages (id, content, created_at) VALUES ($1, $2, $3) RETURNING id
|
|
127
|
+
"""
|
|
128
|
+
data = model_to_dict(model)
|
|
129
|
+
|
|
130
|
+
fields = list(data.keys())
|
|
131
|
+
# Quote field names to handle reserved words
|
|
132
|
+
quoted_fields = [f'"{field}"' for field in fields]
|
|
133
|
+
placeholders = [f"${i+1}" for i in range(len(fields))]
|
|
134
|
+
values = [data[field] for field in fields]
|
|
135
|
+
|
|
136
|
+
sql = f"INSERT INTO {table_name} ({', '.join(quoted_fields)}) VALUES ({', '.join(placeholders)})"
|
|
137
|
+
|
|
138
|
+
if return_id:
|
|
139
|
+
sql += " RETURNING id"
|
|
140
|
+
|
|
141
|
+
return sql, values
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def build_upsert(
|
|
145
|
+
model: BaseModel,
|
|
146
|
+
table_name: str,
|
|
147
|
+
conflict_field: str = "id",
|
|
148
|
+
return_id: bool = True,
|
|
149
|
+
) -> tuple[str, list[Any]]:
|
|
150
|
+
"""
|
|
151
|
+
Build INSERT ... ON CONFLICT DO UPDATE (upsert) query from Pydantic model.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
model: Pydantic model instance
|
|
155
|
+
table_name: Target table name
|
|
156
|
+
conflict_field: Field to check for conflicts (default: "id")
|
|
157
|
+
return_id: Return the inserted/updated ID (default: True)
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
Tuple of (sql_query, parameters)
|
|
161
|
+
|
|
162
|
+
Example:
|
|
163
|
+
sql, params = build_upsert(message, "messages")
|
|
164
|
+
# INSERT INTO messages (...) VALUES (...)
|
|
165
|
+
# ON CONFLICT (id) DO UPDATE SET field1=$1, field2=$2, ...
|
|
166
|
+
# RETURNING id
|
|
167
|
+
"""
|
|
168
|
+
data = model_to_dict(model)
|
|
169
|
+
|
|
170
|
+
fields = list(data.keys())
|
|
171
|
+
quoted_fields = [f'"{field}"' for field in fields]
|
|
172
|
+
placeholders = [f"${i+1}" for i in range(len(fields))]
|
|
173
|
+
values = [data[field] for field in fields]
|
|
174
|
+
|
|
175
|
+
# Build update clause (exclude conflict field)
|
|
176
|
+
update_fields = [f for f in fields if f != conflict_field]
|
|
177
|
+
update_clauses = [f'"{field}" = EXCLUDED."{field}"' for field in update_fields]
|
|
178
|
+
|
|
179
|
+
sql = f"""
|
|
180
|
+
INSERT INTO {table_name} ({', '.join(quoted_fields)})
|
|
181
|
+
VALUES ({', '.join(placeholders)})
|
|
182
|
+
ON CONFLICT ("{conflict_field}") DO UPDATE
|
|
183
|
+
SET {', '.join(update_clauses)}
|
|
184
|
+
"""
|
|
185
|
+
|
|
186
|
+
if return_id:
|
|
187
|
+
sql += " RETURNING id"
|
|
188
|
+
|
|
189
|
+
return sql.strip(), values
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def build_select(
|
|
193
|
+
model_class: Type[BaseModel],
|
|
194
|
+
table_name: str,
|
|
195
|
+
filters: dict[str, Any],
|
|
196
|
+
order_by: str | None = None,
|
|
197
|
+
limit: int | None = None,
|
|
198
|
+
offset: int | None = None,
|
|
199
|
+
) -> tuple[str, list[Any]]:
|
|
200
|
+
"""
|
|
201
|
+
Build SELECT query with filters.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
model_class: Pydantic model class (for field validation)
|
|
205
|
+
table_name: Source table name
|
|
206
|
+
filters: Dict of field -> value filters (AND-ed together)
|
|
207
|
+
order_by: Optional ORDER BY clause
|
|
208
|
+
limit: Optional LIMIT
|
|
209
|
+
offset: Optional OFFSET
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
Tuple of (sql_query, parameters)
|
|
213
|
+
|
|
214
|
+
Example:
|
|
215
|
+
sql, params = build_select(
|
|
216
|
+
Message,
|
|
217
|
+
"messages",
|
|
218
|
+
{"session_id": "abc", "tenant_id": "xyz"},
|
|
219
|
+
order_by="created_at DESC",
|
|
220
|
+
limit=10
|
|
221
|
+
)
|
|
222
|
+
# SELECT * FROM messages
|
|
223
|
+
# WHERE session_id = $1 AND tenant_id = $2 AND deleted_at IS NULL
|
|
224
|
+
# ORDER BY created_at DESC
|
|
225
|
+
# LIMIT 10
|
|
226
|
+
"""
|
|
227
|
+
where_clauses = ['"deleted_at" IS NULL'] # Soft delete filter
|
|
228
|
+
params = []
|
|
229
|
+
param_idx = 1
|
|
230
|
+
|
|
231
|
+
for field, value in filters.items():
|
|
232
|
+
where_clauses.append(f'"{field}" = ${param_idx}')
|
|
233
|
+
params.append(value)
|
|
234
|
+
param_idx += 1
|
|
235
|
+
|
|
236
|
+
sql = f"SELECT * FROM {table_name} WHERE {' AND '.join(where_clauses)}"
|
|
237
|
+
|
|
238
|
+
if order_by:
|
|
239
|
+
sql += f" ORDER BY {order_by}"
|
|
240
|
+
|
|
241
|
+
if limit is not None:
|
|
242
|
+
sql += f" LIMIT ${param_idx}"
|
|
243
|
+
params.append(limit)
|
|
244
|
+
param_idx += 1
|
|
245
|
+
|
|
246
|
+
if offset is not None:
|
|
247
|
+
sql += f" OFFSET ${param_idx}"
|
|
248
|
+
params.append(offset)
|
|
249
|
+
|
|
250
|
+
return sql, params
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def build_update(
|
|
254
|
+
model: BaseModel, table_name: str, id_value: str, tenant_id: str
|
|
255
|
+
) -> tuple[str, list[Any]]:
|
|
256
|
+
"""
|
|
257
|
+
Build UPDATE query from Pydantic model.
|
|
258
|
+
|
|
259
|
+
Args:
|
|
260
|
+
model: Pydantic model instance with updated values
|
|
261
|
+
table_name: Target table name
|
|
262
|
+
id_value: ID of record to update
|
|
263
|
+
tenant_id: Tenant ID for isolation
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
Tuple of (sql_query, parameters)
|
|
267
|
+
|
|
268
|
+
Example:
|
|
269
|
+
sql, params = build_update(message, "messages", "msg-123", "tenant-1")
|
|
270
|
+
# UPDATE messages SET field1=$1, field2=$2, updated_at=NOW()
|
|
271
|
+
# WHERE id=$N AND tenant_id=$N+1 AND deleted_at IS NULL
|
|
272
|
+
"""
|
|
273
|
+
data = model_to_dict(model, exclude_none=False)
|
|
274
|
+
|
|
275
|
+
# Exclude id from update fields
|
|
276
|
+
update_fields = [k for k in data.keys() if k != "id"]
|
|
277
|
+
params = [data[field] for field in update_fields]
|
|
278
|
+
|
|
279
|
+
# Build SET clause
|
|
280
|
+
set_clauses = [f'"{field}" = ${i+1}' for i, field in enumerate(update_fields)]
|
|
281
|
+
set_clauses.append('"updated_at" = NOW()')
|
|
282
|
+
|
|
283
|
+
# Add WHERE params
|
|
284
|
+
param_idx = len(params) + 1
|
|
285
|
+
sql = f"""
|
|
286
|
+
UPDATE {table_name}
|
|
287
|
+
SET {', '.join(set_clauses)}
|
|
288
|
+
WHERE "id" = ${param_idx} AND "tenant_id" = ${param_idx+1} AND "deleted_at" IS NULL
|
|
289
|
+
RETURNING "id"
|
|
290
|
+
"""
|
|
291
|
+
|
|
292
|
+
params.extend([id_value, tenant_id])
|
|
293
|
+
|
|
294
|
+
return sql.strip(), params
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def build_delete(
|
|
298
|
+
table_name: str, id_value: str, tenant_id: str
|
|
299
|
+
) -> tuple[str, list[Any]]:
|
|
300
|
+
"""
|
|
301
|
+
Build soft DELETE query (sets deleted_at).
|
|
302
|
+
|
|
303
|
+
Args:
|
|
304
|
+
table_name: Target table name
|
|
305
|
+
id_value: ID of record to delete
|
|
306
|
+
tenant_id: Tenant ID for isolation
|
|
307
|
+
|
|
308
|
+
Returns:
|
|
309
|
+
Tuple of (sql_query, parameters)
|
|
310
|
+
|
|
311
|
+
Example:
|
|
312
|
+
sql, params = build_delete("messages", "msg-123", "tenant-1")
|
|
313
|
+
# UPDATE messages SET deleted_at=NOW(), updated_at=NOW()
|
|
314
|
+
# WHERE id=$1 AND tenant_id=$2 AND deleted_at IS NULL
|
|
315
|
+
"""
|
|
316
|
+
sql = f"""
|
|
317
|
+
UPDATE {table_name}
|
|
318
|
+
SET "deleted_at" = NOW(), "updated_at" = NOW()
|
|
319
|
+
WHERE "id" = $1 AND "tenant_id" = $2 AND "deleted_at" IS NULL
|
|
320
|
+
RETURNING "id"
|
|
321
|
+
"""
|
|
322
|
+
|
|
323
|
+
return sql.strip(), [id_value, tenant_id]
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def build_count(
|
|
327
|
+
table_name: str, filters: dict[str, Any]
|
|
328
|
+
) -> tuple[str, list[Any]]:
|
|
329
|
+
"""
|
|
330
|
+
Build COUNT query with filters.
|
|
331
|
+
|
|
332
|
+
Args:
|
|
333
|
+
table_name: Source table name
|
|
334
|
+
filters: Dict of field -> value filters (AND-ed together)
|
|
335
|
+
|
|
336
|
+
Returns:
|
|
337
|
+
Tuple of (sql_query, parameters)
|
|
338
|
+
|
|
339
|
+
Example:
|
|
340
|
+
sql, params = build_count("messages", {"session_id": "abc"})
|
|
341
|
+
# SELECT COUNT(*) FROM messages
|
|
342
|
+
# WHERE session_id = $1 AND deleted_at IS NULL
|
|
343
|
+
"""
|
|
344
|
+
where_clauses = ['"deleted_at" IS NULL']
|
|
345
|
+
params = []
|
|
346
|
+
param_idx = 1
|
|
347
|
+
|
|
348
|
+
for field, value in filters.items():
|
|
349
|
+
where_clauses.append(f'"{field}" = ${param_idx}')
|
|
350
|
+
params.append(value)
|
|
351
|
+
param_idx += 1
|
|
352
|
+
|
|
353
|
+
sql = f"SELECT COUNT(*) FROM {table_name} WHERE {' AND '.join(where_clauses)}"
|
|
354
|
+
|
|
355
|
+
return sql, params
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Rate Limit Service - Postgres-backed rate limiting.
|
|
3
|
+
|
|
4
|
+
Implements tenant-aware, tiered rate limiting using PostgreSQL UNLOGGED tables
|
|
5
|
+
for high performance. Supports monthly quotas and short-term burst limits.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import random
|
|
9
|
+
from datetime import datetime, timezone
|
|
10
|
+
from enum import Enum
|
|
11
|
+
from typing import Optional
|
|
12
|
+
|
|
13
|
+
from loguru import logger
|
|
14
|
+
|
|
15
|
+
from ..models.entities.user import UserTier
|
|
16
|
+
from .postgres.service import PostgresService
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class RateLimitService:
|
|
20
|
+
"""
|
|
21
|
+
Service for tracking and enforcing API rate limits.
|
|
22
|
+
|
|
23
|
+
Uses an UNLOGGED table `rate_limits` for performance.
|
|
24
|
+
Note: Counts in UNLOGGED tables may be lost on database crash/restart.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(self, db: PostgresService):
|
|
28
|
+
self.db = db
|
|
29
|
+
|
|
30
|
+
# Rate limits configuration
|
|
31
|
+
# Format: (limit, period_seconds)
|
|
32
|
+
# This is a simple implementation. In production, move to settings.
|
|
33
|
+
self.TIER_CONFIG = {
|
|
34
|
+
UserTier.ANONYMOUS: {"limit": 1000, "period": 3600}, # 1000/hour (for testing)
|
|
35
|
+
UserTier.FREE: {"limit": 50, "period": 2592000}, # 50/month (~30 days)
|
|
36
|
+
UserTier.BASIC: {"limit": 10000, "period": 2592000}, # 10k/month
|
|
37
|
+
UserTier.PRO: {"limit": 100000, "period": 2592000}, # 100k/month
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
async def check_rate_limit(
|
|
41
|
+
self,
|
|
42
|
+
tenant_id: str,
|
|
43
|
+
identifier: str,
|
|
44
|
+
tier: UserTier
|
|
45
|
+
) -> tuple[bool, int, int]:
|
|
46
|
+
"""
|
|
47
|
+
Check if request is allowed under the rate limit.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
tenant_id: Tenant identifier
|
|
51
|
+
identifier: User ID or Anonymous ID
|
|
52
|
+
tier: User subscription tier
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
Tuple (is_allowed, current_count, limit)
|
|
56
|
+
"""
|
|
57
|
+
config = self.TIER_CONFIG.get(tier, self.TIER_CONFIG[UserTier.FREE])
|
|
58
|
+
limit = config["limit"]
|
|
59
|
+
period = config["period"]
|
|
60
|
+
|
|
61
|
+
# Construct time-window key
|
|
62
|
+
now = datetime.now(timezone.utc)
|
|
63
|
+
|
|
64
|
+
if period >= 2592000: # Monthly
|
|
65
|
+
time_key = now.strftime("%Y-%m")
|
|
66
|
+
elif period >= 86400: # Daily
|
|
67
|
+
time_key = now.strftime("%Y-%m-%d")
|
|
68
|
+
elif period >= 3600: # Hourly
|
|
69
|
+
time_key = now.strftime("%Y-%m-%d-%H")
|
|
70
|
+
else: # Minute/Second (fallback)
|
|
71
|
+
time_key = int(now.timestamp() / period)
|
|
72
|
+
|
|
73
|
+
key = f"{tenant_id}:{identifier}:{tier.value}:{time_key}"
|
|
74
|
+
|
|
75
|
+
# Calculate expiry (for cleanup)
|
|
76
|
+
expires_at = now.timestamp() + period
|
|
77
|
+
|
|
78
|
+
# Atomic UPSERT to increment counter
|
|
79
|
+
# Returns the new count
|
|
80
|
+
query = """
|
|
81
|
+
INSERT INTO rate_limits (key, count, expires_at)
|
|
82
|
+
VALUES ($1, 1, to_timestamp($2))
|
|
83
|
+
ON CONFLICT (key) DO UPDATE
|
|
84
|
+
SET count = rate_limits.count + 1
|
|
85
|
+
RETURNING count;
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
try:
|
|
89
|
+
count = await self.db.fetchval(query, key, expires_at)
|
|
90
|
+
except Exception as e:
|
|
91
|
+
logger.error(f"Rate limit check failed: {e}")
|
|
92
|
+
# Fail open to avoid blocking users on DB error
|
|
93
|
+
return True, 0, limit
|
|
94
|
+
|
|
95
|
+
is_allowed = count <= limit
|
|
96
|
+
|
|
97
|
+
# Probabilistic cleanup (1% chance)
|
|
98
|
+
if random.random() < 0.01:
|
|
99
|
+
await self.cleanup_expired()
|
|
100
|
+
|
|
101
|
+
return is_allowed, count, limit
|
|
102
|
+
|
|
103
|
+
async def cleanup_expired(self):
|
|
104
|
+
"""Remove expired rate limit keys."""
|
|
105
|
+
try:
|
|
106
|
+
# Use a small limit to avoid locking/long queries
|
|
107
|
+
query = """
|
|
108
|
+
DELETE FROM rate_limits
|
|
109
|
+
WHERE expires_at < NOW()
|
|
110
|
+
"""
|
|
111
|
+
await self.db.execute(query)
|
|
112
|
+
except Exception as e:
|
|
113
|
+
logger.warning(f"Rate limit cleanup failed: {e}")
|