remdb 0.3.242__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (235) hide show
  1. rem/__init__.py +129 -0
  2. rem/agentic/README.md +760 -0
  3. rem/agentic/__init__.py +54 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +38 -0
  6. rem/agentic/agents/agent_manager.py +311 -0
  7. rem/agentic/agents/sse_simulator.py +502 -0
  8. rem/agentic/context.py +425 -0
  9. rem/agentic/context_builder.py +360 -0
  10. rem/agentic/llm_provider_models.py +301 -0
  11. rem/agentic/mcp/__init__.py +0 -0
  12. rem/agentic/mcp/tool_wrapper.py +273 -0
  13. rem/agentic/otel/__init__.py +5 -0
  14. rem/agentic/otel/setup.py +240 -0
  15. rem/agentic/providers/phoenix.py +926 -0
  16. rem/agentic/providers/pydantic_ai.py +854 -0
  17. rem/agentic/query.py +117 -0
  18. rem/agentic/query_helper.py +89 -0
  19. rem/agentic/schema.py +737 -0
  20. rem/agentic/serialization.py +245 -0
  21. rem/agentic/tools/__init__.py +5 -0
  22. rem/agentic/tools/rem_tools.py +242 -0
  23. rem/api/README.md +657 -0
  24. rem/api/deps.py +253 -0
  25. rem/api/main.py +460 -0
  26. rem/api/mcp_router/prompts.py +182 -0
  27. rem/api/mcp_router/resources.py +820 -0
  28. rem/api/mcp_router/server.py +243 -0
  29. rem/api/mcp_router/tools.py +1605 -0
  30. rem/api/middleware/tracking.py +172 -0
  31. rem/api/routers/admin.py +520 -0
  32. rem/api/routers/auth.py +898 -0
  33. rem/api/routers/chat/__init__.py +5 -0
  34. rem/api/routers/chat/child_streaming.py +394 -0
  35. rem/api/routers/chat/completions.py +702 -0
  36. rem/api/routers/chat/json_utils.py +76 -0
  37. rem/api/routers/chat/models.py +202 -0
  38. rem/api/routers/chat/otel_utils.py +33 -0
  39. rem/api/routers/chat/sse_events.py +546 -0
  40. rem/api/routers/chat/streaming.py +950 -0
  41. rem/api/routers/chat/streaming_utils.py +327 -0
  42. rem/api/routers/common.py +18 -0
  43. rem/api/routers/dev.py +87 -0
  44. rem/api/routers/feedback.py +276 -0
  45. rem/api/routers/messages.py +620 -0
  46. rem/api/routers/models.py +86 -0
  47. rem/api/routers/query.py +362 -0
  48. rem/api/routers/shared_sessions.py +422 -0
  49. rem/auth/README.md +258 -0
  50. rem/auth/__init__.py +36 -0
  51. rem/auth/jwt.py +367 -0
  52. rem/auth/middleware.py +318 -0
  53. rem/auth/providers/__init__.py +16 -0
  54. rem/auth/providers/base.py +376 -0
  55. rem/auth/providers/email.py +215 -0
  56. rem/auth/providers/google.py +163 -0
  57. rem/auth/providers/microsoft.py +237 -0
  58. rem/cli/README.md +517 -0
  59. rem/cli/__init__.py +8 -0
  60. rem/cli/commands/README.md +299 -0
  61. rem/cli/commands/__init__.py +3 -0
  62. rem/cli/commands/ask.py +549 -0
  63. rem/cli/commands/cluster.py +1808 -0
  64. rem/cli/commands/configure.py +495 -0
  65. rem/cli/commands/db.py +828 -0
  66. rem/cli/commands/dreaming.py +324 -0
  67. rem/cli/commands/experiments.py +1698 -0
  68. rem/cli/commands/mcp.py +66 -0
  69. rem/cli/commands/process.py +388 -0
  70. rem/cli/commands/query.py +109 -0
  71. rem/cli/commands/scaffold.py +47 -0
  72. rem/cli/commands/schema.py +230 -0
  73. rem/cli/commands/serve.py +106 -0
  74. rem/cli/commands/session.py +453 -0
  75. rem/cli/dreaming.py +363 -0
  76. rem/cli/main.py +123 -0
  77. rem/config.py +244 -0
  78. rem/mcp_server.py +41 -0
  79. rem/models/core/__init__.py +49 -0
  80. rem/models/core/core_model.py +70 -0
  81. rem/models/core/engram.py +333 -0
  82. rem/models/core/experiment.py +672 -0
  83. rem/models/core/inline_edge.py +132 -0
  84. rem/models/core/rem_query.py +246 -0
  85. rem/models/entities/__init__.py +68 -0
  86. rem/models/entities/domain_resource.py +38 -0
  87. rem/models/entities/feedback.py +123 -0
  88. rem/models/entities/file.py +57 -0
  89. rem/models/entities/image_resource.py +88 -0
  90. rem/models/entities/message.py +64 -0
  91. rem/models/entities/moment.py +123 -0
  92. rem/models/entities/ontology.py +181 -0
  93. rem/models/entities/ontology_config.py +131 -0
  94. rem/models/entities/resource.py +95 -0
  95. rem/models/entities/schema.py +87 -0
  96. rem/models/entities/session.py +84 -0
  97. rem/models/entities/shared_session.py +180 -0
  98. rem/models/entities/subscriber.py +175 -0
  99. rem/models/entities/user.py +93 -0
  100. rem/py.typed +0 -0
  101. rem/registry.py +373 -0
  102. rem/schemas/README.md +507 -0
  103. rem/schemas/__init__.py +6 -0
  104. rem/schemas/agents/README.md +92 -0
  105. rem/schemas/agents/core/agent-builder.yaml +235 -0
  106. rem/schemas/agents/core/moment-builder.yaml +178 -0
  107. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  108. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  109. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  110. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  111. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  112. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  113. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  114. rem/schemas/agents/examples/hello-world.yaml +37 -0
  115. rem/schemas/agents/examples/query.yaml +54 -0
  116. rem/schemas/agents/examples/simple.yaml +21 -0
  117. rem/schemas/agents/examples/test.yaml +29 -0
  118. rem/schemas/agents/rem.yaml +132 -0
  119. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  120. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  121. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  122. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  123. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  124. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  125. rem/services/__init__.py +18 -0
  126. rem/services/audio/INTEGRATION.md +308 -0
  127. rem/services/audio/README.md +376 -0
  128. rem/services/audio/__init__.py +15 -0
  129. rem/services/audio/chunker.py +354 -0
  130. rem/services/audio/transcriber.py +259 -0
  131. rem/services/content/README.md +1269 -0
  132. rem/services/content/__init__.py +5 -0
  133. rem/services/content/providers.py +760 -0
  134. rem/services/content/service.py +762 -0
  135. rem/services/dreaming/README.md +230 -0
  136. rem/services/dreaming/__init__.py +53 -0
  137. rem/services/dreaming/affinity_service.py +322 -0
  138. rem/services/dreaming/moment_service.py +251 -0
  139. rem/services/dreaming/ontology_service.py +54 -0
  140. rem/services/dreaming/user_model_service.py +297 -0
  141. rem/services/dreaming/utils.py +39 -0
  142. rem/services/email/__init__.py +10 -0
  143. rem/services/email/service.py +522 -0
  144. rem/services/email/templates.py +360 -0
  145. rem/services/embeddings/__init__.py +11 -0
  146. rem/services/embeddings/api.py +127 -0
  147. rem/services/embeddings/worker.py +435 -0
  148. rem/services/fs/README.md +662 -0
  149. rem/services/fs/__init__.py +62 -0
  150. rem/services/fs/examples.py +206 -0
  151. rem/services/fs/examples_paths.py +204 -0
  152. rem/services/fs/git_provider.py +935 -0
  153. rem/services/fs/local_provider.py +760 -0
  154. rem/services/fs/parsing-hooks-examples.md +172 -0
  155. rem/services/fs/paths.py +276 -0
  156. rem/services/fs/provider.py +460 -0
  157. rem/services/fs/s3_provider.py +1042 -0
  158. rem/services/fs/service.py +186 -0
  159. rem/services/git/README.md +1075 -0
  160. rem/services/git/__init__.py +17 -0
  161. rem/services/git/service.py +469 -0
  162. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  163. rem/services/phoenix/README.md +453 -0
  164. rem/services/phoenix/__init__.py +46 -0
  165. rem/services/phoenix/client.py +960 -0
  166. rem/services/phoenix/config.py +88 -0
  167. rem/services/phoenix/prompt_labels.py +477 -0
  168. rem/services/postgres/README.md +757 -0
  169. rem/services/postgres/__init__.py +49 -0
  170. rem/services/postgres/diff_service.py +599 -0
  171. rem/services/postgres/migration_service.py +427 -0
  172. rem/services/postgres/programmable_diff_service.py +635 -0
  173. rem/services/postgres/pydantic_to_sqlalchemy.py +562 -0
  174. rem/services/postgres/register_type.py +353 -0
  175. rem/services/postgres/repository.py +481 -0
  176. rem/services/postgres/schema_generator.py +661 -0
  177. rem/services/postgres/service.py +802 -0
  178. rem/services/postgres/sql_builder.py +355 -0
  179. rem/services/rate_limit.py +113 -0
  180. rem/services/rem/README.md +318 -0
  181. rem/services/rem/__init__.py +23 -0
  182. rem/services/rem/exceptions.py +71 -0
  183. rem/services/rem/executor.py +293 -0
  184. rem/services/rem/parser.py +180 -0
  185. rem/services/rem/queries.py +196 -0
  186. rem/services/rem/query.py +371 -0
  187. rem/services/rem/service.py +608 -0
  188. rem/services/session/README.md +374 -0
  189. rem/services/session/__init__.py +13 -0
  190. rem/services/session/compression.py +488 -0
  191. rem/services/session/pydantic_messages.py +310 -0
  192. rem/services/session/reload.py +85 -0
  193. rem/services/user_service.py +130 -0
  194. rem/settings.py +1877 -0
  195. rem/sql/background_indexes.sql +52 -0
  196. rem/sql/migrations/001_install.sql +983 -0
  197. rem/sql/migrations/002_install_models.sql +3157 -0
  198. rem/sql/migrations/003_optional_extensions.sql +326 -0
  199. rem/sql/migrations/004_cache_system.sql +282 -0
  200. rem/sql/migrations/005_schema_update.sql +145 -0
  201. rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
  202. rem/utils/AGENTIC_CHUNKING.md +597 -0
  203. rem/utils/README.md +628 -0
  204. rem/utils/__init__.py +61 -0
  205. rem/utils/agentic_chunking.py +622 -0
  206. rem/utils/batch_ops.py +343 -0
  207. rem/utils/chunking.py +108 -0
  208. rem/utils/clip_embeddings.py +276 -0
  209. rem/utils/constants.py +97 -0
  210. rem/utils/date_utils.py +228 -0
  211. rem/utils/dict_utils.py +98 -0
  212. rem/utils/embeddings.py +436 -0
  213. rem/utils/examples/embeddings_example.py +305 -0
  214. rem/utils/examples/sql_types_example.py +202 -0
  215. rem/utils/files.py +323 -0
  216. rem/utils/markdown.py +16 -0
  217. rem/utils/mime_types.py +158 -0
  218. rem/utils/model_helpers.py +492 -0
  219. rem/utils/schema_loader.py +649 -0
  220. rem/utils/sql_paths.py +146 -0
  221. rem/utils/sql_types.py +350 -0
  222. rem/utils/user_id.py +81 -0
  223. rem/utils/vision.py +325 -0
  224. rem/workers/README.md +506 -0
  225. rem/workers/__init__.py +7 -0
  226. rem/workers/db_listener.py +579 -0
  227. rem/workers/db_maintainer.py +74 -0
  228. rem/workers/dreaming.py +502 -0
  229. rem/workers/engram_processor.py +312 -0
  230. rem/workers/sqs_file_processor.py +193 -0
  231. rem/workers/unlogged_maintainer.py +463 -0
  232. remdb-0.3.242.dist-info/METADATA +1632 -0
  233. remdb-0.3.242.dist-info/RECORD +235 -0
  234. remdb-0.3.242.dist-info/WHEEL +4 -0
  235. remdb-0.3.242.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,460 @@
1
+ """
2
+ Unified file system interface abstracting S3 and local storage.
3
+
4
+ Design principles:
5
+ - No upload/download methods - use copy(from, to) instead
6
+ - No zip/unzip - use archive formats in copy operations
7
+ - Extension-based format detection
8
+ - Polars for columnar data by default
9
+ - ContentService integration for special formats
10
+ """
11
+
12
+ from pathlib import Path
13
+ from typing import Any, Callable, BinaryIO, Iterator
14
+ import io
15
+
16
+ from rem.services.fs.s3_provider import S3Provider
17
+ from rem.services.fs.local_provider import LocalProvider
18
+ from rem.services.fs.git_provider import GitProvider, is_git
19
+ from rem.settings import settings
20
+
21
+
22
+ def is_s3(uri: str) -> bool:
23
+ """Check if URI is an S3 path."""
24
+ return uri.startswith("s3://")
25
+
26
+
27
+ def is_archive(uri: str) -> bool:
28
+ """Check if URI is an archive file."""
29
+ return ".zip" in uri.lower() or ".tar" in uri.lower() or ".gz" in uri.lower()
30
+
31
+
32
+ class FS:
33
+ """
34
+ Entry point to file systems abstracting S3 and local storage.
35
+
36
+ All operations work seamlessly across S3 and local filesystems.
37
+ Uses Polars for columnar data and ContentService for special formats.
38
+ """
39
+
40
+ def __init__(self):
41
+ """Initialize filesystem with S3, local, and Git providers."""
42
+ self._s3_provider = S3Provider()
43
+ self._local_provider = LocalProvider()
44
+ self._git_provider = GitProvider() if settings.git.enabled else None
45
+
46
+ def https_to_file(self, web_request_uri: str, target_uri: str, token: str | None = None):
47
+ """
48
+ Download a remote resource to the filesystem.
49
+
50
+ Examples:
51
+ - Download Airtable/Slack files to S3
52
+ - Download public files to local storage
53
+
54
+ Args:
55
+ web_request_uri: HTTPS URL to download from
56
+ target_uri: Destination (s3://... or local path)
57
+ token: Optional bearer token for authorization
58
+ """
59
+ import requests
60
+
61
+ headers = {"Authorization": f"Bearer {token}"} if token else None
62
+ response = requests.get(web_request_uri, headers=headers)
63
+ response.raise_for_status()
64
+
65
+ with self.open(target_uri, "wb") as f:
66
+ f.write(response.content)
67
+
68
+ def open(self, uri: str, mode: str = "rb") -> BinaryIO:
69
+ """
70
+ Open file for read or write.
71
+
72
+ Args:
73
+ uri: File path (s3://... or local path)
74
+ mode: File mode (r, rb, w, wb, etc.)
75
+
76
+ Returns:
77
+ File-like object
78
+ """
79
+ if is_s3(uri):
80
+ return self._s3_provider.open(uri, mode=mode)
81
+ else:
82
+ return self._local_provider.open(uri, mode=mode)
83
+
84
+ def exists(self, uri: str) -> bool:
85
+ """
86
+ Check if file or folder exists.
87
+
88
+ Args:
89
+ uri: File or directory path (s3://, git://, or local)
90
+
91
+ Returns:
92
+ True if exists, False otherwise
93
+ """
94
+ if is_git(uri):
95
+ if not self._git_provider:
96
+ raise ValueError("Git provider not enabled. Set GIT__ENABLED=true")
97
+ return self._git_provider.exists(uri)
98
+ elif is_s3(uri):
99
+ return self._s3_provider.exists(uri)
100
+ else:
101
+ return self._local_provider.exists(uri)
102
+
103
+ def from_parent_dir(self, uri: str, file: str | None = None) -> str:
104
+ """
105
+ Construct path from parent directory.
106
+
107
+ Args:
108
+ uri: Current file path
109
+ file: Optional file name to append
110
+
111
+ Returns:
112
+ Parent directory path (optionally with file appended)
113
+ """
114
+ pth = str(Path(uri).parent)
115
+ if is_s3(uri):
116
+ pth = pth.replace("s3:/", "s3://")
117
+ return pth if not file else f"{pth}/{file}"
118
+
119
+ def read(self, uri: str, use_polars: bool = True, **options) -> Any:
120
+ """
121
+ Read any data type - extensions determine the reader.
122
+
123
+ Supports:
124
+ - Columnar: .csv, .parquet, .feather, .avro (via Polars/Pandas)
125
+ - Structured: .json, .yaml, .yml
126
+ - Documents: .pdf, .docx, .md, .txt
127
+ - Images: .png, .jpg, .jpeg, .tiff, .svg
128
+ - Binary: .pickle
129
+ - Audio: .wav, .mp3 (TODO)
130
+ - Spreadsheets: .xlsx, .xls
131
+
132
+ Args:
133
+ uri: File path (s3://, git://, or local)
134
+ use_polars: Use Polars for dataframes (default: True)
135
+ **options: Format-specific options
136
+
137
+ Returns:
138
+ Parsed data in appropriate format
139
+ """
140
+ if is_git(uri):
141
+ if not self._git_provider:
142
+ raise ValueError("Git provider not enabled. Set GIT__ENABLED=true")
143
+ return self._git_provider.read(uri, **options)
144
+ elif is_s3(uri):
145
+ return self._s3_provider.read(uri, use_polars=use_polars, **options)
146
+ else:
147
+ return self._local_provider.read(uri, use_polars=use_polars, **options)
148
+
149
+ def write(self, uri: str, data: Any, **options):
150
+ """
151
+ Write any data type - extensions determine the writer.
152
+
153
+ Args:
154
+ uri: File path
155
+ data: Data to write
156
+ **options: Format-specific options
157
+ """
158
+ if is_s3(uri):
159
+ return self._s3_provider.write(uri, data, **options)
160
+ else:
161
+ return self._local_provider.write(uri, data, **options)
162
+
163
+ def copy(self, uri_from: str, uri_to: str):
164
+ """
165
+ Copy files between filesystems.
166
+
167
+ Supports:
168
+ - s3 -> s3
169
+ - local -> s3 (upload)
170
+ - s3 -> local (download)
171
+ - local -> local
172
+
173
+ Args:
174
+ uri_from: Source path
175
+ uri_to: Destination path
176
+ """
177
+ from_s3 = is_s3(uri_from)
178
+ to_s3 = is_s3(uri_to)
179
+
180
+ if from_s3 and to_s3:
181
+ # S3 to S3
182
+ return self._s3_provider.copy(uri_from, uri_to)
183
+ elif from_s3 and not to_s3:
184
+ # S3 to local (download)
185
+ return self._s3_provider.copy(uri_from, uri_to)
186
+ elif not from_s3 and to_s3:
187
+ # Local to S3 (upload)
188
+ return self._s3_provider.copy(uri_from, uri_to)
189
+ else:
190
+ # Local to local
191
+ return self._local_provider.copy(uri_from, uri_to)
192
+
193
+ def cache_data(self, data: Any, **kwargs) -> str:
194
+ """
195
+ Cache data to S3 storage.
196
+
197
+ Currently supports images, can be extended for other types.
198
+
199
+ Args:
200
+ data: Data to cache
201
+ **kwargs: Additional options (uri, suffix, etc.)
202
+
203
+ Returns:
204
+ URI of cached data
205
+ """
206
+ return self._s3_provider.cache_data(data, **kwargs)
207
+
208
+ def ls(self, uri: str, **options) -> list[str]:
209
+ """
210
+ List files from a prefix recursively.
211
+
212
+ Args:
213
+ uri: Directory path or prefix (s3://, git://, or local)
214
+ **options: Provider-specific options
215
+
216
+ Returns:
217
+ List of file URIs
218
+ """
219
+ if is_git(uri):
220
+ if not self._git_provider:
221
+ raise ValueError("Git provider not enabled. Set GIT__ENABLED=true")
222
+ return self._git_provider.ls(uri, **options)
223
+ elif is_s3(uri):
224
+ return self._s3_provider.ls(uri, **options)
225
+ else:
226
+ return self._local_provider.ls(uri, **options)
227
+
228
+ def ls_dirs(self, uri: str, **options) -> list[str]:
229
+ """
230
+ List immediate child directories.
231
+
232
+ Args:
233
+ uri: Directory path or prefix
234
+ **options: Provider-specific options
235
+
236
+ Returns:
237
+ List of directory URIs
238
+ """
239
+ if is_s3(uri):
240
+ return self._s3_provider.ls_dirs(uri, **options)
241
+ else:
242
+ return self._local_provider.ls_dirs(uri, **options)
243
+
244
+ def ls_iter(self, uri: str, **options) -> Iterator[str]:
245
+ """
246
+ Iterate over files from a prefix (for pagination).
247
+
248
+ Args:
249
+ uri: Directory path or prefix
250
+ **options: Provider-specific options
251
+
252
+ Yields:
253
+ File URIs
254
+ """
255
+ if is_s3(uri):
256
+ yield from self._s3_provider.ls_iter(uri, **options)
257
+ else:
258
+ yield from self._local_provider.ls_iter(uri, **options)
259
+
260
+ def delete(self, uri: str, limit: int = 100):
261
+ """
262
+ Delete objects in a folder/directory.
263
+
264
+ Safety limit prevents accidental bulk deletions.
265
+
266
+ Args:
267
+ uri: File or directory path
268
+ limit: Maximum number of files to delete
269
+
270
+ Returns:
271
+ List of deleted file URIs
272
+ """
273
+ if is_s3(uri):
274
+ return self._s3_provider.delete(uri, limit=limit)
275
+ else:
276
+ return self._local_provider.delete(uri, limit=limit)
277
+
278
+ def read_dataset(self, uri: str):
279
+ """
280
+ Read data as PyArrow dataset.
281
+
282
+ Useful for:
283
+ - Lazy loading large datasets
284
+ - Partitioned data
285
+ - S3 Express use cases
286
+
287
+ Args:
288
+ uri: Dataset path (parquet, etc.)
289
+
290
+ Returns:
291
+ PyArrow Dataset
292
+ """
293
+ if is_s3(uri):
294
+ return self._s3_provider.read_dataset(uri)
295
+ else:
296
+ return self._local_provider.read_dataset(uri)
297
+
298
+ def read_image(self, uri: str):
299
+ """
300
+ Read image as PIL Image.
301
+
302
+ Args:
303
+ uri: Image file path
304
+
305
+ Returns:
306
+ PIL Image object
307
+ """
308
+ if is_s3(uri):
309
+ return self._s3_provider.read_image(uri)
310
+ else:
311
+ return self._local_provider.read_image(uri)
312
+
313
+ def apply(self, uri: str, fn: Callable[[str], Any]) -> Any:
314
+ """
315
+ Apply a function to a file.
316
+
317
+ Downloads file to temporary location if needed, then passes
318
+ local path to the function.
319
+
320
+ Args:
321
+ uri: File path
322
+ fn: Function that takes a local file path
323
+
324
+ Returns:
325
+ Result of function call
326
+ """
327
+ if is_s3(uri):
328
+ return self._s3_provider.apply(uri, fn)
329
+ else:
330
+ return self._local_provider.apply(uri, fn)
331
+
332
+ def local_file(self, uri: str) -> str:
333
+ """
334
+ Get local file path, downloading from S3 if needed.
335
+
336
+ Args:
337
+ uri: File path (s3://... or local)
338
+
339
+ Returns:
340
+ Local file path
341
+ """
342
+ if is_s3(uri):
343
+ return self._s3_provider.local_file(uri)
344
+ else:
345
+ return uri
346
+
347
+ # ========================================================================
348
+ # Parsing Hooks
349
+ # ========================================================================
350
+
351
+ def get_parsed_uri(self, uri: str, resource: str = "content.md") -> str:
352
+ """
353
+ Get URI for parsed version of a file.
354
+
355
+ Args:
356
+ uri: Original file URI
357
+ resource: Resource within parsed directory (default: content.md)
358
+
359
+ Returns:
360
+ Parsed resource URI
361
+
362
+ Example:
363
+ fs = FS()
364
+ parsed_uri = fs.get_parsed_uri("s3://bucket/file.pdf")
365
+ metadata_uri = fs.get_parsed_uri("s3://bucket/file.pdf", "metadata.json")
366
+ """
367
+ if is_s3(uri):
368
+ return self._s3_provider.get_parsed_uri(uri, resource)
369
+ else:
370
+ return self._local_provider.get_parsed_uri(uri, resource)
371
+
372
+ def has_parsed(self, uri: str) -> bool:
373
+ """
374
+ Check if parsed version exists for a file.
375
+
376
+ Args:
377
+ uri: Original file URI
378
+
379
+ Returns:
380
+ True if metadata.json exists in .parsed/ directory
381
+
382
+ Example:
383
+ if fs.has_parsed("s3://bucket/file.pdf"):
384
+ content = fs.read_parsed("s3://bucket/file.pdf")
385
+ """
386
+ if is_s3(uri):
387
+ return self._s3_provider.has_parsed(uri)
388
+ else:
389
+ return self._local_provider.has_parsed(uri)
390
+
391
+ def read_parsed(self, uri: str, resource: str = "content.md", **options) -> Any:
392
+ """
393
+ Read parsed content for a file.
394
+
395
+ Args:
396
+ uri: Original file URI
397
+ resource: Resource to read (default: content.md)
398
+ **options: Format-specific read options
399
+
400
+ Returns:
401
+ Parsed content (format depends on resource)
402
+
403
+ Example:
404
+ # Read parsed markdown
405
+ markdown = fs.read_parsed("s3://bucket/file.pdf")
406
+
407
+ # Read parse metadata
408
+ metadata = fs.read_parsed("s3://bucket/file.pdf", "metadata.json")
409
+ """
410
+ if is_s3(uri):
411
+ return self._s3_provider.read_parsed(uri, resource, **options)
412
+ else:
413
+ return self._local_provider.read_parsed(uri, resource, **options)
414
+
415
+ def write_parsed(
416
+ self,
417
+ uri: str,
418
+ content: Any,
419
+ resource: str = "content.md",
420
+ metadata: dict[str, Any] | None = None,
421
+ ):
422
+ """
423
+ Write parsed content for a file.
424
+
425
+ Args:
426
+ uri: Original file URI
427
+ content: Parsed content to write
428
+ resource: Resource name (default: content.md)
429
+ metadata: Optional parse metadata
430
+
431
+ Example:
432
+ fs.write_parsed(
433
+ "s3://bucket/file.pdf",
434
+ markdown_content,
435
+ metadata={"provider": "kreuzberg", "page_count": 10}
436
+ )
437
+ """
438
+ if is_s3(uri):
439
+ self._s3_provider.write_parsed(uri, content, resource, metadata)
440
+ else:
441
+ self._local_provider.write_parsed(uri, content, resource, metadata)
442
+
443
+ def list_parsed_resources(self, uri: str) -> list[str]:
444
+ """
445
+ List all resources in parsed directory.
446
+
447
+ Args:
448
+ uri: Original file URI
449
+
450
+ Returns:
451
+ List of resource paths (relative to .parsed/ directory)
452
+
453
+ Example:
454
+ resources = fs.list_parsed_resources("s3://bucket/file.pdf")
455
+ # ['content.md', 'metadata.json', 'images/page_1.png']
456
+ """
457
+ if is_s3(uri):
458
+ return self._s3_provider.list_parsed_resources(uri)
459
+ else:
460
+ return self._local_provider.list_parsed_resources(uri)