remdb 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (187) hide show
  1. rem/__init__.py +2 -0
  2. rem/agentic/README.md +650 -0
  3. rem/agentic/__init__.py +39 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +8 -0
  6. rem/agentic/context.py +148 -0
  7. rem/agentic/context_builder.py +329 -0
  8. rem/agentic/mcp/__init__.py +0 -0
  9. rem/agentic/mcp/tool_wrapper.py +107 -0
  10. rem/agentic/otel/__init__.py +5 -0
  11. rem/agentic/otel/setup.py +151 -0
  12. rem/agentic/providers/phoenix.py +674 -0
  13. rem/agentic/providers/pydantic_ai.py +572 -0
  14. rem/agentic/query.py +117 -0
  15. rem/agentic/query_helper.py +89 -0
  16. rem/agentic/schema.py +396 -0
  17. rem/agentic/serialization.py +245 -0
  18. rem/agentic/tools/__init__.py +5 -0
  19. rem/agentic/tools/rem_tools.py +231 -0
  20. rem/api/README.md +420 -0
  21. rem/api/main.py +324 -0
  22. rem/api/mcp_router/prompts.py +182 -0
  23. rem/api/mcp_router/resources.py +536 -0
  24. rem/api/mcp_router/server.py +213 -0
  25. rem/api/mcp_router/tools.py +584 -0
  26. rem/api/routers/auth.py +229 -0
  27. rem/api/routers/chat/__init__.py +5 -0
  28. rem/api/routers/chat/completions.py +281 -0
  29. rem/api/routers/chat/json_utils.py +76 -0
  30. rem/api/routers/chat/models.py +124 -0
  31. rem/api/routers/chat/streaming.py +185 -0
  32. rem/auth/README.md +258 -0
  33. rem/auth/__init__.py +26 -0
  34. rem/auth/middleware.py +100 -0
  35. rem/auth/providers/__init__.py +13 -0
  36. rem/auth/providers/base.py +376 -0
  37. rem/auth/providers/google.py +163 -0
  38. rem/auth/providers/microsoft.py +237 -0
  39. rem/cli/README.md +455 -0
  40. rem/cli/__init__.py +8 -0
  41. rem/cli/commands/README.md +126 -0
  42. rem/cli/commands/__init__.py +3 -0
  43. rem/cli/commands/ask.py +565 -0
  44. rem/cli/commands/configure.py +423 -0
  45. rem/cli/commands/db.py +493 -0
  46. rem/cli/commands/dreaming.py +324 -0
  47. rem/cli/commands/experiments.py +1124 -0
  48. rem/cli/commands/mcp.py +66 -0
  49. rem/cli/commands/process.py +245 -0
  50. rem/cli/commands/schema.py +183 -0
  51. rem/cli/commands/serve.py +106 -0
  52. rem/cli/dreaming.py +363 -0
  53. rem/cli/main.py +88 -0
  54. rem/config.py +237 -0
  55. rem/mcp_server.py +41 -0
  56. rem/models/core/__init__.py +49 -0
  57. rem/models/core/core_model.py +64 -0
  58. rem/models/core/engram.py +333 -0
  59. rem/models/core/experiment.py +628 -0
  60. rem/models/core/inline_edge.py +132 -0
  61. rem/models/core/rem_query.py +243 -0
  62. rem/models/entities/__init__.py +43 -0
  63. rem/models/entities/file.py +57 -0
  64. rem/models/entities/image_resource.py +88 -0
  65. rem/models/entities/message.py +35 -0
  66. rem/models/entities/moment.py +123 -0
  67. rem/models/entities/ontology.py +191 -0
  68. rem/models/entities/ontology_config.py +131 -0
  69. rem/models/entities/resource.py +95 -0
  70. rem/models/entities/schema.py +87 -0
  71. rem/models/entities/user.py +85 -0
  72. rem/py.typed +0 -0
  73. rem/schemas/README.md +507 -0
  74. rem/schemas/__init__.py +6 -0
  75. rem/schemas/agents/README.md +92 -0
  76. rem/schemas/agents/core/moment-builder.yaml +178 -0
  77. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  78. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  79. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  80. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  81. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  82. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  83. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  84. rem/schemas/agents/examples/hello-world.yaml +37 -0
  85. rem/schemas/agents/examples/query.yaml +54 -0
  86. rem/schemas/agents/examples/simple.yaml +21 -0
  87. rem/schemas/agents/examples/test.yaml +29 -0
  88. rem/schemas/agents/rem.yaml +128 -0
  89. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  90. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  91. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  92. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  93. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  94. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  95. rem/services/__init__.py +16 -0
  96. rem/services/audio/INTEGRATION.md +308 -0
  97. rem/services/audio/README.md +376 -0
  98. rem/services/audio/__init__.py +15 -0
  99. rem/services/audio/chunker.py +354 -0
  100. rem/services/audio/transcriber.py +259 -0
  101. rem/services/content/README.md +1269 -0
  102. rem/services/content/__init__.py +5 -0
  103. rem/services/content/providers.py +806 -0
  104. rem/services/content/service.py +657 -0
  105. rem/services/dreaming/README.md +230 -0
  106. rem/services/dreaming/__init__.py +53 -0
  107. rem/services/dreaming/affinity_service.py +336 -0
  108. rem/services/dreaming/moment_service.py +264 -0
  109. rem/services/dreaming/ontology_service.py +54 -0
  110. rem/services/dreaming/user_model_service.py +297 -0
  111. rem/services/dreaming/utils.py +39 -0
  112. rem/services/embeddings/__init__.py +11 -0
  113. rem/services/embeddings/api.py +120 -0
  114. rem/services/embeddings/worker.py +421 -0
  115. rem/services/fs/README.md +662 -0
  116. rem/services/fs/__init__.py +62 -0
  117. rem/services/fs/examples.py +206 -0
  118. rem/services/fs/examples_paths.py +204 -0
  119. rem/services/fs/git_provider.py +935 -0
  120. rem/services/fs/local_provider.py +760 -0
  121. rem/services/fs/parsing-hooks-examples.md +172 -0
  122. rem/services/fs/paths.py +276 -0
  123. rem/services/fs/provider.py +460 -0
  124. rem/services/fs/s3_provider.py +1042 -0
  125. rem/services/fs/service.py +186 -0
  126. rem/services/git/README.md +1075 -0
  127. rem/services/git/__init__.py +17 -0
  128. rem/services/git/service.py +469 -0
  129. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  130. rem/services/phoenix/README.md +453 -0
  131. rem/services/phoenix/__init__.py +46 -0
  132. rem/services/phoenix/client.py +686 -0
  133. rem/services/phoenix/config.py +88 -0
  134. rem/services/phoenix/prompt_labels.py +477 -0
  135. rem/services/postgres/README.md +575 -0
  136. rem/services/postgres/__init__.py +23 -0
  137. rem/services/postgres/migration_service.py +427 -0
  138. rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
  139. rem/services/postgres/register_type.py +352 -0
  140. rem/services/postgres/repository.py +337 -0
  141. rem/services/postgres/schema_generator.py +379 -0
  142. rem/services/postgres/service.py +802 -0
  143. rem/services/postgres/sql_builder.py +354 -0
  144. rem/services/rem/README.md +304 -0
  145. rem/services/rem/__init__.py +23 -0
  146. rem/services/rem/exceptions.py +71 -0
  147. rem/services/rem/executor.py +293 -0
  148. rem/services/rem/parser.py +145 -0
  149. rem/services/rem/queries.py +196 -0
  150. rem/services/rem/query.py +371 -0
  151. rem/services/rem/service.py +527 -0
  152. rem/services/session/README.md +374 -0
  153. rem/services/session/__init__.py +6 -0
  154. rem/services/session/compression.py +360 -0
  155. rem/services/session/reload.py +77 -0
  156. rem/settings.py +1235 -0
  157. rem/sql/002_install_models.sql +1068 -0
  158. rem/sql/background_indexes.sql +42 -0
  159. rem/sql/install_models.sql +1038 -0
  160. rem/sql/migrations/001_install.sql +503 -0
  161. rem/sql/migrations/002_install_models.sql +1202 -0
  162. rem/utils/AGENTIC_CHUNKING.md +597 -0
  163. rem/utils/README.md +583 -0
  164. rem/utils/__init__.py +43 -0
  165. rem/utils/agentic_chunking.py +622 -0
  166. rem/utils/batch_ops.py +343 -0
  167. rem/utils/chunking.py +108 -0
  168. rem/utils/clip_embeddings.py +276 -0
  169. rem/utils/dict_utils.py +98 -0
  170. rem/utils/embeddings.py +423 -0
  171. rem/utils/examples/embeddings_example.py +305 -0
  172. rem/utils/examples/sql_types_example.py +202 -0
  173. rem/utils/markdown.py +16 -0
  174. rem/utils/model_helpers.py +236 -0
  175. rem/utils/schema_loader.py +229 -0
  176. rem/utils/sql_types.py +348 -0
  177. rem/utils/user_id.py +81 -0
  178. rem/utils/vision.py +330 -0
  179. rem/workers/README.md +506 -0
  180. rem/workers/__init__.py +5 -0
  181. rem/workers/dreaming.py +502 -0
  182. rem/workers/engram_processor.py +312 -0
  183. rem/workers/sqs_file_processor.py +193 -0
  184. remdb-0.2.6.dist-info/METADATA +1191 -0
  185. remdb-0.2.6.dist-info/RECORD +187 -0
  186. remdb-0.2.6.dist-info/WHEEL +4 -0
  187. remdb-0.2.6.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,172 @@
1
+ # fs parsing hooks - extended examples
2
+
3
+ Clean pattern for managing parsed file versions in REM filesystem abstraction.
4
+
5
+ ## convention
6
+
7
+ Separate `uploads/` and `parsed/` directories with deterministic path mapping:
8
+
9
+ **S3:**
10
+ - Uploads: `s3://rem-io-staging/v1/uploads/user-123/2025/01/19/report.pdf`
11
+ - Parsed: `s3://rem-io-staging/v1/parsed/user-123/2025/01/19/report.pdf/{resource}`
12
+
13
+ **Local:**
14
+ - Uploads: `~/.rem/fs/v1/uploads/user-123/2025/01/19/report.pdf`
15
+ - Parsed: `~/.rem/fs/v1/parsed/user-123/2025/01/19/report.pdf/{resource}`
16
+
17
+ **Resources:**
18
+ - `metadata.json` - parse metadata (provider, timestamp, etc.)
19
+ - `content.md` - primary parsed content (markdown)
20
+ - `images/` - extracted images
21
+ - `tables/` - extracted tables (parquet)
22
+
23
+ ## basic usage
24
+
25
+ ```python
26
+ from rem.services.fs import FS
27
+
28
+ fs = FS()
29
+ upload_uri = "s3://rem-io-staging/v1/uploads/user-123/2025/01/19/report.pdf"
30
+
31
+ # check and read
32
+ if fs.has_parsed(upload_uri):
33
+ markdown = fs.read_parsed(upload_uri)
34
+ else:
35
+ # trigger parsing
36
+ from rem.services.content import ContentService
37
+ service = ContentService()
38
+ await service.process_and_save(upload_uri)
39
+ ```
40
+
41
+ ## writing parsed content
42
+
43
+ ```python
44
+ # write markdown with metadata
45
+ fs.write_parsed(
46
+ uri,
47
+ markdown_content,
48
+ metadata={
49
+ "provider": "kreuzberg",
50
+ "page_count": 10,
51
+ "table_count": 2,
52
+ }
53
+ )
54
+
55
+ # write extracted image
56
+ fs.write_parsed(uri, image_data, resource="images/page_1.png")
57
+
58
+ # write extracted table
59
+ fs.write_parsed(uri, table_df, resource="tables/table_0.parquet")
60
+ ```
61
+
62
+ ## reading specific resources
63
+
64
+ ```python
65
+ # read metadata
66
+ metadata = fs.read_parsed(uri, "metadata.json")
67
+
68
+ # read image
69
+ image = fs.read_parsed(uri, "images/page_1.png")
70
+
71
+ # read table
72
+ table = fs.read_parsed(uri, "tables/table_0.parquet")
73
+ ```
74
+
75
+ ## discovering resources
76
+
77
+ ```python
78
+ # list all parsed resources
79
+ resources = fs.list_parsed_resources(uri)
80
+ # ['content.md', 'metadata.json', 'images/page_1.png', 'tables/table_0.parquet']
81
+
82
+ # iterate and read
83
+ for resource in resources:
84
+ if resource.endswith('.png'):
85
+ image = fs.read_parsed(uri, resource)
86
+ elif resource.endswith('.parquet'):
87
+ table = fs.read_parsed(uri, resource)
88
+ ```
89
+
90
+ ## integration with ContentService
91
+
92
+ ```python
93
+ class ContentService:
94
+ async def process_and_save(self, uri: str, user_id: str | None = None):
95
+ # check cache first
96
+ if self.fs.has_parsed(uri):
97
+ logger.info(f"using cached parse for {uri}")
98
+ return self.fs.read_parsed(uri, "metadata.json")
99
+
100
+ # extract and parse
101
+ result = self.process_uri(uri)
102
+ markdown = to_markdown(result["content"], Path(uri).name)
103
+
104
+ # write parsed version
105
+ self.fs.write_parsed(
106
+ uri,
107
+ markdown,
108
+ metadata={
109
+ "provider": result["provider"],
110
+ "timestamp": datetime.now().isoformat(),
111
+ "content_type": result["metadata"].get("content_type"),
112
+ }
113
+ )
114
+
115
+ # chunk and save to database...
116
+ ```
117
+
118
+ ## multi-resource parsing
119
+
120
+ For complex documents with many extracted resources:
121
+
122
+ ```python
123
+ # parse pdf and extract everything
124
+ result = parse_pdf_advanced(uri)
125
+
126
+ # write markdown
127
+ fs.write_parsed(uri, result.markdown)
128
+
129
+ # write images
130
+ for i, img in enumerate(result.images):
131
+ fs.write_parsed(uri, img, resource=f"images/page_{i}.png")
132
+
133
+ # write tables
134
+ for i, table in enumerate(result.tables):
135
+ fs.write_parsed(uri, table, resource=f"tables/table_{i}.parquet")
136
+
137
+ # write metadata
138
+ fs.write_parsed(
139
+ uri,
140
+ result.markdown,
141
+ metadata={
142
+ "provider": "advanced_parser",
143
+ "page_count": len(result.images),
144
+ "table_count": len(result.tables),
145
+ }
146
+ )
147
+ ```
148
+
149
+ ## benefits
150
+
151
+ - **separation of concerns**: parsed files alongside originals, not in database
152
+ - **caching**: check `has_parsed()` before re-parsing expensive files
153
+ - **discoverability**: `list_parsed_resources()` shows what's available
154
+ - **flexibility**: store markdown, images, tables, any extracted content
155
+ - **convention over configuration**: standard `.parsed/` suffix
156
+
157
+ ## local provider
158
+
159
+ Same interface for local files:
160
+
161
+ ```python
162
+ from rem.services.fs import LocalProvider
163
+
164
+ fs = LocalProvider()
165
+ uri = "/data/docs/report.pdf"
166
+
167
+ if fs.has_parsed(uri):
168
+ markdown = fs.read_parsed(uri)
169
+ else:
170
+ markdown = parse_pdf(uri)
171
+ fs.write_parsed(uri, markdown, metadata={"provider": "kreuzberg"})
172
+ ```
@@ -0,0 +1,276 @@
1
+ """
2
+ Filesystem path naming conventions for REM.
3
+
4
+ Standardized path structure:
5
+ - rem/v1/uploads/{system|user_id}/{yyyy}/{mm}/{dd}/{optional_hh_mm}/
6
+ - Local: $REM_HOME/fs/...
7
+ - S3: s3://{bucket}/...
8
+
9
+ Design principles:
10
+ - Consistent hierarchical structure
11
+ - Date-based partitioning for scalability
12
+ - User vs system separation
13
+ - Environment-aware (local vs cloud)
14
+ """
15
+
16
+ import os
17
+ from datetime import datetime, date
18
+ from pathlib import Path
19
+ from typing import Literal
20
+
21
+ from rem.settings import settings
22
+
23
+
24
+ def get_rem_home() -> str:
25
+ """
26
+ Get REM_HOME directory for local filesystem.
27
+
28
+ Returns REM_HOME environment variable or defaults to ~/.rem
29
+
30
+ Returns:
31
+ Absolute path to REM home directory
32
+ """
33
+ rem_home = os.getenv("REM_HOME", str(Path.home() / ".rem"))
34
+ return str(Path(rem_home).expanduser().absolute())
35
+
36
+
37
+ def get_base_uri(use_s3: bool | None = None) -> str:
38
+ """
39
+ Get base URI for file storage.
40
+
41
+ Args:
42
+ use_s3: Force S3 (True) or local (False). If None, uses S3 in production.
43
+
44
+ Returns:
45
+ Base URI: s3://{bucket} or $REM_HOME/fs
46
+ """
47
+ if use_s3 is None:
48
+ # Auto-detect: use S3 in production, local in development
49
+ use_s3 = settings.environment == "production"
50
+
51
+ if use_s3:
52
+ bucket = settings.s3.bucket_name
53
+ return f"s3://{bucket}"
54
+ else:
55
+ rem_home = get_rem_home()
56
+ return str(Path(rem_home) / "fs")
57
+
58
+
59
+ def get_uploads_path(
60
+ user_id: str | None = None,
61
+ dt: datetime | date | None = None,
62
+ include_time: bool = False,
63
+ use_s3: bool | None = None,
64
+ ) -> str:
65
+ """
66
+ Get standardized uploads directory path for a given date.
67
+
68
+ Path structure:
69
+ rem/v1/uploads/{system|user_id}/{yyyy}/{mm}/{dd}/{hh_mm}/
70
+
71
+ Args:
72
+ user_id: User ID for user-specific uploads. If None, uses "system"
73
+ dt: Date/datetime for path. If None, uses current time
74
+ include_time: Include hour/minute in path (default: False)
75
+ use_s3: Force S3 or local. If None, auto-detects based on environment
76
+
77
+ Returns:
78
+ Full path: base_uri/rem/v1/uploads/{system|user_id}/yyyy/mm/dd[/hh_mm]
79
+
80
+ Examples:
81
+ >>> get_uploads_path()
82
+ '/Users/user/.rem/fs/rem/v1/uploads/system/2025/01/19'
83
+
84
+ >>> get_uploads_path(user_id="user-123", include_time=True)
85
+ '/Users/user/.rem/fs/rem/v1/uploads/user-123/2025/01/19/14_30'
86
+
87
+ >>> get_uploads_path(use_s3=True)
88
+ 's3://rem-bucket/rem/v1/uploads/system/2025/01/19'
89
+ """
90
+ # Get base URI
91
+ base_uri = get_base_uri(use_s3=use_s3)
92
+
93
+ # Use current time if not provided
94
+ if dt is None:
95
+ dt = datetime.now()
96
+
97
+ # Convert date to datetime for consistent handling
98
+ if isinstance(dt, date) and not isinstance(dt, datetime):
99
+ dt = datetime.combine(dt, datetime.min.time())
100
+
101
+ # Build path components
102
+ scope = user_id if user_id else "system"
103
+ year = dt.strftime("%Y")
104
+ month = dt.strftime("%m")
105
+ day = dt.strftime("%d")
106
+
107
+ # Base path
108
+ parts = [base_uri, "rem", "v1", "uploads", scope, year, month, day]
109
+
110
+ # Add time if requested
111
+ if include_time:
112
+ hour_min = dt.strftime("%H_%M")
113
+ parts.append(hour_min)
114
+
115
+ # Join path (handles both S3 and local)
116
+ if base_uri.startswith("s3://"):
117
+ return "/".join(parts)
118
+ else:
119
+ return str(Path(*parts))
120
+
121
+
122
+ def get_versioned_path(
123
+ resource_type: Literal["schemas", "agents", "tools", "datasets"],
124
+ name: str,
125
+ version: str = "v1",
126
+ use_s3: bool | None = None,
127
+ ) -> str:
128
+ """
129
+ Get path for versioned resources.
130
+
131
+ Path structure:
132
+ rem/{version}/{resource_type}/{name}/
133
+
134
+ Args:
135
+ resource_type: Type of resource (schemas, agents, tools, datasets)
136
+ name: Resource name
137
+ version: Version string (default: v1)
138
+ use_s3: Force S3 or local. If None, auto-detects
139
+
140
+ Returns:
141
+ Full path: base_uri/rem/{version}/{resource_type}/{name}
142
+
143
+ Examples:
144
+ >>> get_versioned_path("schemas", "user-schema")
145
+ '/Users/user/.rem/fs/rem/v1/schemas/user-schema'
146
+
147
+ >>> get_versioned_path("agents", "query-agent", version="v2")
148
+ '/Users/user/.rem/fs/rem/v2/agents/query-agent'
149
+ """
150
+ base_uri = get_base_uri(use_s3=use_s3)
151
+ parts = [base_uri, "rem", version, resource_type, name]
152
+
153
+ if base_uri.startswith("s3://"):
154
+ return "/".join(parts)
155
+ else:
156
+ return str(Path(*parts))
157
+
158
+
159
+ def get_user_path(
160
+ user_id: str,
161
+ subpath: str | None = None,
162
+ use_s3: bool | None = None,
163
+ ) -> str:
164
+ """
165
+ Get user-scoped storage path.
166
+
167
+ Path structure:
168
+ rem/v1/users/{user_id}/{subpath}/
169
+
170
+ Args:
171
+ user_id: User ID
172
+ subpath: Optional subpath (e.g., "documents", "images")
173
+ use_s3: Force S3 or local. If None, auto-detects
174
+
175
+ Returns:
176
+ Full path: base_uri/rem/v1/users/{user_id}[/{subpath}]
177
+
178
+ Examples:
179
+ >>> get_user_path("user-123")
180
+ '/Users/user/.rem/fs/rem/v1/users/user-123'
181
+
182
+ >>> get_user_path("user-123", "documents")
183
+ '/Users/user/.rem/fs/rem/v1/users/user-123/documents'
184
+ """
185
+ base_uri = get_base_uri(use_s3=use_s3)
186
+ parts = [base_uri, "rem", "v1", "users", user_id]
187
+
188
+ if subpath:
189
+ parts.append(subpath)
190
+
191
+ if base_uri.startswith("s3://"):
192
+ return "/".join(parts)
193
+ else:
194
+ return str(Path(*parts))
195
+
196
+
197
+ def get_temp_path(
198
+ prefix: str = "tmp",
199
+ use_s3: bool | None = None,
200
+ ) -> str:
201
+ """
202
+ Get temporary file storage path.
203
+
204
+ Path structure:
205
+ rem/v1/temp/{prefix}/{timestamp}/
206
+
207
+ Args:
208
+ prefix: Prefix for temp directory (default: "tmp")
209
+ use_s3: Force S3 or local. If None, auto-detects
210
+
211
+ Returns:
212
+ Full path: base_uri/rem/v1/temp/{prefix}/{timestamp}
213
+
214
+ Examples:
215
+ >>> get_temp_path()
216
+ '/Users/user/.rem/fs/rem/v1/temp/tmp/20250119_143045'
217
+
218
+ >>> get_temp_path("processing")
219
+ '/Users/user/.rem/fs/rem/v1/temp/processing/20250119_143045'
220
+ """
221
+ base_uri = get_base_uri(use_s3=use_s3)
222
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
223
+ parts = [base_uri, "rem", "v1", "temp", prefix, timestamp]
224
+
225
+ if base_uri.startswith("s3://"):
226
+ return "/".join(parts)
227
+ else:
228
+ return str(Path(*parts))
229
+
230
+
231
+ def ensure_dir_exists(path: str) -> str:
232
+ """
233
+ Ensure directory exists for local paths (no-op for S3).
234
+
235
+ Args:
236
+ path: Directory path
237
+
238
+ Returns:
239
+ The same path (for chaining)
240
+ """
241
+ if not path.startswith("s3://"):
242
+ Path(path).mkdir(parents=True, exist_ok=True)
243
+ return path
244
+
245
+
246
+ def join_path(*parts: str, is_s3: bool | None = None) -> str:
247
+ """
248
+ Join path parts, handling S3 vs local paths correctly.
249
+
250
+ Args:
251
+ *parts: Path components to join
252
+ is_s3: Force S3 (/) or local (os-specific). Auto-detects if None.
253
+
254
+ Returns:
255
+ Joined path
256
+
257
+ Examples:
258
+ >>> join_path("s3://bucket", "rem", "v1", "uploads")
259
+ 's3://bucket/rem/v1/uploads'
260
+
261
+ >>> join_path("/home/user", "rem", "data")
262
+ '/home/user/rem/data'
263
+ """
264
+ if not parts:
265
+ return ""
266
+
267
+ # Auto-detect S3 from first part
268
+ if is_s3 is None:
269
+ is_s3 = parts[0].startswith("s3://")
270
+
271
+ if is_s3:
272
+ # S3: always use forward slash
273
+ return "/".join(str(p) for p in parts)
274
+ else:
275
+ # Local: use Path for OS-specific separators
276
+ return str(Path(*[str(p) for p in parts]))