remdb 0.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +2 -0
- rem/agentic/README.md +650 -0
- rem/agentic/__init__.py +39 -0
- rem/agentic/agents/README.md +155 -0
- rem/agentic/agents/__init__.py +8 -0
- rem/agentic/context.py +148 -0
- rem/agentic/context_builder.py +329 -0
- rem/agentic/mcp/__init__.py +0 -0
- rem/agentic/mcp/tool_wrapper.py +107 -0
- rem/agentic/otel/__init__.py +5 -0
- rem/agentic/otel/setup.py +151 -0
- rem/agentic/providers/phoenix.py +674 -0
- rem/agentic/providers/pydantic_ai.py +572 -0
- rem/agentic/query.py +117 -0
- rem/agentic/query_helper.py +89 -0
- rem/agentic/schema.py +396 -0
- rem/agentic/serialization.py +245 -0
- rem/agentic/tools/__init__.py +5 -0
- rem/agentic/tools/rem_tools.py +231 -0
- rem/api/README.md +420 -0
- rem/api/main.py +324 -0
- rem/api/mcp_router/prompts.py +182 -0
- rem/api/mcp_router/resources.py +536 -0
- rem/api/mcp_router/server.py +213 -0
- rem/api/mcp_router/tools.py +584 -0
- rem/api/routers/auth.py +229 -0
- rem/api/routers/chat/__init__.py +5 -0
- rem/api/routers/chat/completions.py +281 -0
- rem/api/routers/chat/json_utils.py +76 -0
- rem/api/routers/chat/models.py +124 -0
- rem/api/routers/chat/streaming.py +185 -0
- rem/auth/README.md +258 -0
- rem/auth/__init__.py +26 -0
- rem/auth/middleware.py +100 -0
- rem/auth/providers/__init__.py +13 -0
- rem/auth/providers/base.py +376 -0
- rem/auth/providers/google.py +163 -0
- rem/auth/providers/microsoft.py +237 -0
- rem/cli/README.md +455 -0
- rem/cli/__init__.py +8 -0
- rem/cli/commands/README.md +126 -0
- rem/cli/commands/__init__.py +3 -0
- rem/cli/commands/ask.py +565 -0
- rem/cli/commands/configure.py +423 -0
- rem/cli/commands/db.py +493 -0
- rem/cli/commands/dreaming.py +324 -0
- rem/cli/commands/experiments.py +1124 -0
- rem/cli/commands/mcp.py +66 -0
- rem/cli/commands/process.py +245 -0
- rem/cli/commands/schema.py +183 -0
- rem/cli/commands/serve.py +106 -0
- rem/cli/dreaming.py +363 -0
- rem/cli/main.py +88 -0
- rem/config.py +237 -0
- rem/mcp_server.py +41 -0
- rem/models/core/__init__.py +49 -0
- rem/models/core/core_model.py +64 -0
- rem/models/core/engram.py +333 -0
- rem/models/core/experiment.py +628 -0
- rem/models/core/inline_edge.py +132 -0
- rem/models/core/rem_query.py +243 -0
- rem/models/entities/__init__.py +43 -0
- rem/models/entities/file.py +57 -0
- rem/models/entities/image_resource.py +88 -0
- rem/models/entities/message.py +35 -0
- rem/models/entities/moment.py +123 -0
- rem/models/entities/ontology.py +191 -0
- rem/models/entities/ontology_config.py +131 -0
- rem/models/entities/resource.py +95 -0
- rem/models/entities/schema.py +87 -0
- rem/models/entities/user.py +85 -0
- rem/py.typed +0 -0
- rem/schemas/README.md +507 -0
- rem/schemas/__init__.py +6 -0
- rem/schemas/agents/README.md +92 -0
- rem/schemas/agents/core/moment-builder.yaml +178 -0
- rem/schemas/agents/core/rem-query-agent.yaml +226 -0
- rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
- rem/schemas/agents/core/simple-assistant.yaml +19 -0
- rem/schemas/agents/core/user-profile-builder.yaml +163 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
- rem/schemas/agents/examples/contract-extractor.yaml +134 -0
- rem/schemas/agents/examples/cv-parser.yaml +263 -0
- rem/schemas/agents/examples/hello-world.yaml +37 -0
- rem/schemas/agents/examples/query.yaml +54 -0
- rem/schemas/agents/examples/simple.yaml +21 -0
- rem/schemas/agents/examples/test.yaml +29 -0
- rem/schemas/agents/rem.yaml +128 -0
- rem/schemas/evaluators/hello-world/default.yaml +77 -0
- rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
- rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
- rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
- rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
- rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
- rem/services/__init__.py +16 -0
- rem/services/audio/INTEGRATION.md +308 -0
- rem/services/audio/README.md +376 -0
- rem/services/audio/__init__.py +15 -0
- rem/services/audio/chunker.py +354 -0
- rem/services/audio/transcriber.py +259 -0
- rem/services/content/README.md +1269 -0
- rem/services/content/__init__.py +5 -0
- rem/services/content/providers.py +806 -0
- rem/services/content/service.py +657 -0
- rem/services/dreaming/README.md +230 -0
- rem/services/dreaming/__init__.py +53 -0
- rem/services/dreaming/affinity_service.py +336 -0
- rem/services/dreaming/moment_service.py +264 -0
- rem/services/dreaming/ontology_service.py +54 -0
- rem/services/dreaming/user_model_service.py +297 -0
- rem/services/dreaming/utils.py +39 -0
- rem/services/embeddings/__init__.py +11 -0
- rem/services/embeddings/api.py +120 -0
- rem/services/embeddings/worker.py +421 -0
- rem/services/fs/README.md +662 -0
- rem/services/fs/__init__.py +62 -0
- rem/services/fs/examples.py +206 -0
- rem/services/fs/examples_paths.py +204 -0
- rem/services/fs/git_provider.py +935 -0
- rem/services/fs/local_provider.py +760 -0
- rem/services/fs/parsing-hooks-examples.md +172 -0
- rem/services/fs/paths.py +276 -0
- rem/services/fs/provider.py +460 -0
- rem/services/fs/s3_provider.py +1042 -0
- rem/services/fs/service.py +186 -0
- rem/services/git/README.md +1075 -0
- rem/services/git/__init__.py +17 -0
- rem/services/git/service.py +469 -0
- rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
- rem/services/phoenix/README.md +453 -0
- rem/services/phoenix/__init__.py +46 -0
- rem/services/phoenix/client.py +686 -0
- rem/services/phoenix/config.py +88 -0
- rem/services/phoenix/prompt_labels.py +477 -0
- rem/services/postgres/README.md +575 -0
- rem/services/postgres/__init__.py +23 -0
- rem/services/postgres/migration_service.py +427 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
- rem/services/postgres/register_type.py +352 -0
- rem/services/postgres/repository.py +337 -0
- rem/services/postgres/schema_generator.py +379 -0
- rem/services/postgres/service.py +802 -0
- rem/services/postgres/sql_builder.py +354 -0
- rem/services/rem/README.md +304 -0
- rem/services/rem/__init__.py +23 -0
- rem/services/rem/exceptions.py +71 -0
- rem/services/rem/executor.py +293 -0
- rem/services/rem/parser.py +145 -0
- rem/services/rem/queries.py +196 -0
- rem/services/rem/query.py +371 -0
- rem/services/rem/service.py +527 -0
- rem/services/session/README.md +374 -0
- rem/services/session/__init__.py +6 -0
- rem/services/session/compression.py +360 -0
- rem/services/session/reload.py +77 -0
- rem/settings.py +1235 -0
- rem/sql/002_install_models.sql +1068 -0
- rem/sql/background_indexes.sql +42 -0
- rem/sql/install_models.sql +1038 -0
- rem/sql/migrations/001_install.sql +503 -0
- rem/sql/migrations/002_install_models.sql +1202 -0
- rem/utils/AGENTIC_CHUNKING.md +597 -0
- rem/utils/README.md +583 -0
- rem/utils/__init__.py +43 -0
- rem/utils/agentic_chunking.py +622 -0
- rem/utils/batch_ops.py +343 -0
- rem/utils/chunking.py +108 -0
- rem/utils/clip_embeddings.py +276 -0
- rem/utils/dict_utils.py +98 -0
- rem/utils/embeddings.py +423 -0
- rem/utils/examples/embeddings_example.py +305 -0
- rem/utils/examples/sql_types_example.py +202 -0
- rem/utils/markdown.py +16 -0
- rem/utils/model_helpers.py +236 -0
- rem/utils/schema_loader.py +229 -0
- rem/utils/sql_types.py +348 -0
- rem/utils/user_id.py +81 -0
- rem/utils/vision.py +330 -0
- rem/workers/README.md +506 -0
- rem/workers/__init__.py +5 -0
- rem/workers/dreaming.py +502 -0
- rem/workers/engram_processor.py +312 -0
- rem/workers/sqs_file_processor.py +193 -0
- remdb-0.2.6.dist-info/METADATA +1191 -0
- remdb-0.2.6.dist-info/RECORD +187 -0
- remdb-0.2.6.dist-info/WHEEL +4 -0
- remdb-0.2.6.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Filesystem Abstraction Layer for REM.
|
|
3
|
+
|
|
4
|
+
Provides a unified interface for reading files from different sources:
|
|
5
|
+
- Local filesystem paths
|
|
6
|
+
- S3 URIs (s3://bucket/key)
|
|
7
|
+
- HTTP/HTTPS URLs
|
|
8
|
+
|
|
9
|
+
**ARCHITECTURE NOTE - Service Usage**:
|
|
10
|
+
|
|
11
|
+
This service centralizes ALL file I/O operations for REM:
|
|
12
|
+
|
|
13
|
+
1. **read_uri()**: Reads files from any supported source (local/S3/HTTP)
|
|
14
|
+
- Used by: Local file providers, S3 providers
|
|
15
|
+
- Should be used by: MCP parse_and_ingest_file tool (currently duplicated)
|
|
16
|
+
|
|
17
|
+
2. **write_to_internal_storage()**: Writes to REM's internal storage
|
|
18
|
+
- Tenant-scoped paths: {tenant_id}/files/{file_id}/{filename}
|
|
19
|
+
- Auto-selects backend: S3 (production) or ~/.rem/fs/ (local dev)
|
|
20
|
+
- Used by: SQS file processor worker
|
|
21
|
+
- Should be used by: MCP parse_and_ingest_file tool (currently duplicated)
|
|
22
|
+
|
|
23
|
+
**CODE DUPLICATION WARNING**:
|
|
24
|
+
The MCP tool 'parse_and_ingest_file' (api/mcp_router/tools.py) duplicates
|
|
25
|
+
this service's logic (lines 561-636). This violates DRY and creates maintenance
|
|
26
|
+
burden. TODO: Refactor MCP tool to use this service.
|
|
27
|
+
|
|
28
|
+
**PATH CENTRALIZATION**:
|
|
29
|
+
All file paths use the SAME format:
|
|
30
|
+
- S3: s3://{bucket}/{tenant_id}/files/{file_id}/{filename}
|
|
31
|
+
- Local: file://{home}/.rem/fs/{tenant_id}/files/{file_id}/{filename}
|
|
32
|
+
|
|
33
|
+
This ensures consistent path handling across CLI, MCP, and workers.
|
|
34
|
+
"""
|
|
35
|
+
from pathlib import Path
|
|
36
|
+
from urllib.parse import urlparse
|
|
37
|
+
|
|
38
|
+
from loguru import logger
|
|
39
|
+
import aiohttp
|
|
40
|
+
import aioboto3
|
|
41
|
+
from botocore.exceptions import ClientError
|
|
42
|
+
|
|
43
|
+
from ...settings import settings
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class FileSystemService:
|
|
47
|
+
"""
|
|
48
|
+
A service for reading files from various sources.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
async def read_uri(self, file_uri: str, is_local_server: bool = False) -> tuple[bytes, str, str]:
|
|
52
|
+
"""
|
|
53
|
+
Read content from a given URI.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
file_uri: The URI of the file to read.
|
|
57
|
+
is_local_server: Whether the server is running locally.
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
A tuple containing the file content, the filename, and the source type.
|
|
61
|
+
"""
|
|
62
|
+
parsed = urlparse(file_uri)
|
|
63
|
+
scheme = parsed.scheme
|
|
64
|
+
|
|
65
|
+
if scheme in ("http", "https"):
|
|
66
|
+
source_type = "url"
|
|
67
|
+
file_name = Path(parsed.path).name or "downloaded_file"
|
|
68
|
+
content = await self._read_from_url(file_uri)
|
|
69
|
+
elif scheme == "s3":
|
|
70
|
+
source_type = "s3"
|
|
71
|
+
s3_bucket = parsed.netloc
|
|
72
|
+
s3_key = parsed.path.lstrip("/")
|
|
73
|
+
file_name = Path(s3_key).name
|
|
74
|
+
content = await self._read_from_s3(s3_bucket, s3_key)
|
|
75
|
+
elif scheme == "" or scheme == "file":
|
|
76
|
+
if not is_local_server:
|
|
77
|
+
raise PermissionError(
|
|
78
|
+
"Local file paths are only allowed for local MCP servers."
|
|
79
|
+
)
|
|
80
|
+
source_type = "local"
|
|
81
|
+
file_path = Path(file_uri.replace("file://", ""))
|
|
82
|
+
if not file_path.exists():
|
|
83
|
+
raise FileNotFoundError(f"File not found: {file_uri}")
|
|
84
|
+
file_name = file_path.name
|
|
85
|
+
content = await self._read_from_local(file_path)
|
|
86
|
+
else:
|
|
87
|
+
raise ValueError(f"Unsupported URI scheme: {scheme}")
|
|
88
|
+
|
|
89
|
+
return content, file_name, source_type
|
|
90
|
+
|
|
91
|
+
async def _read_from_url(self, url: str) -> bytes:
|
|
92
|
+
"""Read content from a URL."""
|
|
93
|
+
logger.debug(f"Reading from URL: {url}")
|
|
94
|
+
async with aiohttp.ClientSession() as session:
|
|
95
|
+
async with session.get(url) as response:
|
|
96
|
+
response.raise_for_status()
|
|
97
|
+
return await response.read()
|
|
98
|
+
|
|
99
|
+
async def _read_from_s3(self, bucket: str, key: str) -> bytes:
|
|
100
|
+
"""Read content from S3."""
|
|
101
|
+
logger.debug(f"Reading from S3: s3://{bucket}/{key}")
|
|
102
|
+
session = aioboto3.Session()
|
|
103
|
+
async with session.client(
|
|
104
|
+
"s3",
|
|
105
|
+
endpoint_url=settings.s3.endpoint_url,
|
|
106
|
+
aws_access_key_id=settings.s3.access_key_id,
|
|
107
|
+
aws_secret_access_key=settings.s3.secret_access_key,
|
|
108
|
+
region_name=settings.s3.region,
|
|
109
|
+
) as s3_client:
|
|
110
|
+
try:
|
|
111
|
+
response = await s3_client.get_object(Bucket=bucket, Key=key)
|
|
112
|
+
return await response["Body"].read()
|
|
113
|
+
except ClientError as e:
|
|
114
|
+
logger.error(f"S3 download failed: {e}")
|
|
115
|
+
raise RuntimeError(f"S3 download failed: {e}")
|
|
116
|
+
|
|
117
|
+
async def _read_from_local(self, path: Path) -> bytes:
|
|
118
|
+
"""Read content from a local file."""
|
|
119
|
+
logger.debug(f"Reading from local path: {path}")
|
|
120
|
+
return path.read_bytes()
|
|
121
|
+
|
|
122
|
+
async def write_to_internal_storage(
|
|
123
|
+
self,
|
|
124
|
+
content: bytes,
|
|
125
|
+
tenant_id: str,
|
|
126
|
+
file_name: str,
|
|
127
|
+
file_id: str | None = None
|
|
128
|
+
) -> tuple[str, str, str, str]:
|
|
129
|
+
"""
|
|
130
|
+
Write content to REM's internal storage (S3 or local).
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
content: File content bytes
|
|
134
|
+
tenant_id: Tenant identifier
|
|
135
|
+
file_name: Name of the file
|
|
136
|
+
file_id: Optional file UUID string. If not provided, one will be generated.
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
A tuple containing (storage_uri, internal_key, content_type, file_id).
|
|
140
|
+
"""
|
|
141
|
+
from uuid import uuid4
|
|
142
|
+
import mimetypes
|
|
143
|
+
|
|
144
|
+
if not file_id:
|
|
145
|
+
file_id = str(uuid4())
|
|
146
|
+
|
|
147
|
+
internal_key = f"{tenant_id}/files/{file_id}/{file_name}"
|
|
148
|
+
storage_uri = ""
|
|
149
|
+
|
|
150
|
+
# Use storage.provider setting to determine storage backend
|
|
151
|
+
if settings.storage.provider == "s3":
|
|
152
|
+
# S3 storage
|
|
153
|
+
if not settings.s3.bucket_name:
|
|
154
|
+
raise ValueError(
|
|
155
|
+
"STORAGE__PROVIDER is set to 's3' but S3__BUCKET_NAME is not configured. "
|
|
156
|
+
"Either set S3__BUCKET_NAME or change STORAGE__PROVIDER to 'local'."
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
session = aioboto3.Session()
|
|
160
|
+
async with session.client(
|
|
161
|
+
"s3",
|
|
162
|
+
endpoint_url=settings.s3.endpoint_url,
|
|
163
|
+
aws_access_key_id=settings.s3.access_key_id,
|
|
164
|
+
aws_secret_access_key=settings.s3.secret_access_key,
|
|
165
|
+
region_name=settings.s3.region,
|
|
166
|
+
) as s3_client:
|
|
167
|
+
await s3_client.put_object(
|
|
168
|
+
Bucket=settings.s3.bucket_name,
|
|
169
|
+
Key=internal_key,
|
|
170
|
+
Body=content,
|
|
171
|
+
)
|
|
172
|
+
storage_uri = f"s3://{settings.s3.bucket_name}/{internal_key}"
|
|
173
|
+
else:
|
|
174
|
+
# Local filesystem storage (default)
|
|
175
|
+
# Expand ~ to home directory
|
|
176
|
+
base_path = Path(settings.storage.base_path).expanduser()
|
|
177
|
+
base_path.mkdir(parents=True, exist_ok=True)
|
|
178
|
+
file_path = base_path / internal_key
|
|
179
|
+
file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
180
|
+
file_path.write_bytes(content)
|
|
181
|
+
storage_uri = f"file://{file_path}"
|
|
182
|
+
|
|
183
|
+
content_type, _ = mimetypes.guess_type(file_name)
|
|
184
|
+
content_type = content_type or "application/octet-stream"
|
|
185
|
+
|
|
186
|
+
return storage_uri, internal_key, content_type, file_id
|