futurehouse-client 0.3.20.dev411__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,333 +0,0 @@
1
- import contextlib
2
- from datetime import datetime
3
- from enum import StrEnum, auto
4
- from os import PathLike
5
- from pathlib import Path
6
- from typing import Any
7
- from uuid import UUID
8
-
9
- from pydantic import BaseModel, Field, JsonValue
10
-
11
-
12
- class DataStorageEntry(BaseModel):
13
- """Model representing a data storage entry."""
14
-
15
- id: UUID = Field(description="Unique identifier for the data storage entry")
16
- name: str = Field(description="Name of the data storage entry")
17
- description: str | None = Field(
18
- default=None, description="Description of the data storage entry"
19
- )
20
- content: str | None = Field(
21
- default=None, description="Content of the data storage entry"
22
- )
23
- embedding: list[float] | None = Field(
24
- default=None, description="Embedding vector for the content"
25
- )
26
- is_collection: bool = Field(
27
- default=False, description="Whether this entry is a collection"
28
- )
29
- tags: list[str] | None = Field(
30
- default=None,
31
- description="List of tags associated with the data storage entry",
32
- )
33
- parent_id: UUID | None = Field(
34
- default=None,
35
- description="ID of the parent entry if this is a sub-entry for hierarchical storage",
36
- )
37
- dataset_id: UUID | None = Field(
38
- default=None,
39
- description="ID of the dataset this entry belongs to",
40
- )
41
- path: str | None = Field(
42
- default=None,
43
- description="Path in the storage system where this entry is located, if a file.",
44
- )
45
- bigquery_schema: Any | None = Field(
46
- default=None, description="Target BigQuery schema for the data storage entry"
47
- )
48
- user_id: str = Field(description="ID of the user who created this entry")
49
- created_at: datetime = Field(description="Timestamp when the entry was created")
50
- modified_at: datetime = Field(
51
- description="Timestamp when the entry was last updated"
52
- )
53
-
54
-
55
- class DataStorageType(StrEnum):
56
- BIGQUERY = auto()
57
- GCS = auto()
58
- PG_TABLE = auto()
59
- RAW_CONTENT = auto()
60
- ELASTIC_SEARCH = auto()
61
-
62
-
63
- class DataContentType(StrEnum):
64
- BQ_DATASET = auto()
65
- BQ_TABLE = auto()
66
- TEXT = auto()
67
- TEXT_W_EMBEDDINGS = auto()
68
- DIRECTORY = auto()
69
- FILE = auto()
70
- INDEX = auto()
71
- INDEX_W_EMBEDDINGS = auto()
72
-
73
-
74
- class DataStorageLocationPayload(BaseModel):
75
- storage_type: DataStorageType
76
- content_type: DataContentType
77
- content_schema: JsonValue | None = None
78
- metadata: JsonValue | None = None
79
- location: str | None = None
80
-
81
-
82
- class DataStorageLocationDetails(BaseModel):
83
- """Model representing the location details within a DataStorageLocations object."""
84
-
85
- storage_type: str = Field(description="Type of storage (e.g., 'gcs', 'pg_table')")
86
- content_type: str = Field(description="Type of content stored")
87
- content_schema: JsonValue | None = Field(default=None, description="Content schema")
88
- metadata: JsonValue | None = Field(default=None, description="Location metadata")
89
- location: str | None = Field(
90
- default=None, description="Location path or identifier"
91
- )
92
-
93
-
94
- class DataStorageLocations(BaseModel):
95
- """Model representing storage locations for a data storage entry."""
96
-
97
- id: UUID = Field(description="Unique identifier for the storage locations")
98
- data_storage_id: UUID = Field(description="ID of the associated data storage entry")
99
- storage_config: DataStorageLocationDetails = Field(
100
- description="Storage configuration details"
101
- )
102
- created_at: datetime = Field(description="Timestamp when the location was created")
103
-
104
-
105
- class DataStorageResponse(BaseModel):
106
- """Response model for data storage operations."""
107
-
108
- data_storage: DataStorageEntry = Field(description="The created data storage entry")
109
- storage_location: DataStorageLocations = Field(
110
- description="Storage location for this data entry"
111
- )
112
- signed_url: str | None = Field(
113
- default=None,
114
- description="Signed URL for uploading/downloading the file to/from GCS",
115
- )
116
-
117
-
118
- class DataStorageRequestPayload(BaseModel):
119
- """Payload for creating a data storage entry."""
120
-
121
- name: str = Field(description="Name of the data storage entry")
122
- description: str | None = Field(
123
- default=None, description="Description of the data storage entry"
124
- )
125
- content: str | None = Field(
126
- default=None, description="Content of the data storage entry"
127
- )
128
- is_collection: bool = Field(
129
- default=False, description="Whether this entry is a collection"
130
- )
131
- parent_id: UUID | None = Field(
132
- default=None, description="ID of the parent entry for hierarchical storage"
133
- )
134
- dataset_id: UUID | None = Field(
135
- default=None,
136
- description="ID of existing dataset to add entry to, or None to create new dataset",
137
- )
138
- path: PathLike | str | None = Field(
139
- default=None,
140
- description="Path to store in the GCS bucket.",
141
- )
142
- existing_location: DataStorageLocationPayload | None = Field(
143
- default=None, description="Target storage metadata"
144
- )
145
-
146
-
147
- class ManifestEntry(BaseModel):
148
- """Model representing a single entry in a manifest file."""
149
-
150
- description: str | None = Field(
151
- default=None, description="Description of the file or directory"
152
- )
153
- metadata: dict[str, Any] | None = Field(
154
- default=None, description="Additional metadata for the entry"
155
- )
156
-
157
-
158
- class DirectoryManifest(BaseModel):
159
- """Model representing the structure of a manifest file."""
160
-
161
- entries: dict[str, "ManifestEntry | DirectoryManifest"] = Field(
162
- default_factory=dict,
163
- description="Map of file/directory names to their manifest entries",
164
- )
165
-
166
- def get_entry_description(self, name: str) -> str | None:
167
- """Get description for a specific entry."""
168
- entry = self.entries.get(name)
169
- if isinstance(entry, ManifestEntry):
170
- return entry.description
171
- if isinstance(entry, DirectoryManifest):
172
- # For nested directories, could derive description from contents
173
- return None
174
- return None
175
-
176
- def get_entry_metadata(self, name: str) -> dict[str, Any] | None:
177
- """Get metadata for a specific entry."""
178
- entry = self.entries.get(name)
179
- if isinstance(entry, ManifestEntry):
180
- return entry.metadata
181
- return None
182
-
183
- @classmethod
184
- def from_dict(cls, data: dict[str, Any]) -> "DirectoryManifest":
185
- """Create DirectoryManifest from a dictionary (loaded from JSON/YAML)."""
186
- entries: dict[str, ManifestEntry | DirectoryManifest] = {}
187
- for name, value in data.items():
188
- if isinstance(value, dict):
189
- if "description" in value or "metadata" in value:
190
- # This looks like a ManifestEntry
191
- entries[name] = ManifestEntry(**value)
192
- else:
193
- # This looks like a nested directory
194
- entries[name] = cls.from_dict(value)
195
- else:
196
- # Simple string description
197
- entries[name] = ManifestEntry(description=str(value))
198
-
199
- return cls(entries=entries)
200
-
201
- def to_dict(self) -> dict[str, Any]:
202
- """Convert back to dictionary format."""
203
- result = {}
204
- for name, entry in self.entries.items():
205
- if isinstance(entry, ManifestEntry):
206
- if entry.description is not None or entry.metadata is not None:
207
- entry_dict = {}
208
- if entry.description is not None:
209
- entry_dict["description"] = entry.description
210
- if entry.metadata is not None:
211
- entry_dict.update(entry.metadata)
212
- result[name] = entry_dict
213
- elif isinstance(entry, DirectoryManifest):
214
- result[name] = entry.to_dict()
215
- return result
216
-
217
-
218
- class FileMetadata(BaseModel):
219
- """Model representing metadata for a file being processed."""
220
-
221
- path: Path = Field(description="Path to the file")
222
- name: str = Field(description="Name of the file")
223
- size: int | None = Field(default=None, description="Size of the file in bytes")
224
- description: str | None = Field(
225
- default=None, description="Description from manifest or generated"
226
- )
227
- is_directory: bool = Field(default=False, description="Whether this is a directory")
228
- parent_id: UUID | None = Field(
229
- default=None, description="Parent directory ID in the storage system"
230
- )
231
- dataset_id: UUID | None = Field(
232
- default=None, description="Dataset ID this file belongs to"
233
- )
234
-
235
- @classmethod
236
- def from_path(
237
- cls,
238
- path: Path,
239
- description: str | None = None,
240
- parent_id: UUID | None = None,
241
- dataset_id: UUID | None = None,
242
- ) -> "FileMetadata":
243
- """Create FileMetadata from a Path object."""
244
- size = None
245
- is_directory = path.is_dir()
246
-
247
- if not is_directory:
248
- with contextlib.suppress(OSError):
249
- size = path.stat().st_size
250
-
251
- return cls(
252
- path=path,
253
- name=path.name,
254
- size=size,
255
- description=description,
256
- is_directory=is_directory,
257
- parent_id=parent_id,
258
- dataset_id=dataset_id,
259
- )
260
-
261
-
262
- class UploadProgress(BaseModel):
263
- """Model for tracking upload progress."""
264
-
265
- total_files: int = Field(description="Total number of files to upload")
266
- uploaded_files: int = Field(default=0, description="Number of files uploaded")
267
- total_bytes: int | None = Field(default=None, description="Total bytes to upload")
268
- uploaded_bytes: int = Field(default=0, description="Number of bytes uploaded")
269
- current_file: str | None = Field(
270
- default=None, description="Currently uploading file"
271
- )
272
- errors: list[str] = Field(
273
- default_factory=list, description="List of error messages"
274
- )
275
-
276
- @property
277
- def progress_percentage(self) -> float:
278
- """Calculate progress percentage based on files."""
279
- if self.total_files == 0:
280
- return 0.0
281
- return (self.uploaded_files / self.total_files) * 100.0
282
-
283
- @property
284
- def bytes_percentage(self) -> float | None:
285
- """Calculate progress percentage based on bytes."""
286
- if not self.total_bytes or self.total_bytes == 0:
287
- return None
288
- return (self.uploaded_bytes / self.total_bytes) * 100.0
289
-
290
- def add_error(self, error: str) -> None:
291
- """Add an error message."""
292
- self.errors.append(error)
293
-
294
- def increment_files(self, bytes_uploaded: int = 0) -> None:
295
- """Increment the uploaded files counter."""
296
- self.uploaded_files += 1
297
- self.uploaded_bytes += bytes_uploaded
298
-
299
-
300
- class DirectoryUploadConfig(BaseModel):
301
- """Configuration for directory uploads."""
302
-
303
- name: str = Field(description="Name for the directory upload")
304
- description: str | None = Field(
305
- default=None, description="Description for the directory"
306
- )
307
- as_collection: bool = Field(
308
- default=False, description="Upload as single collection or hierarchically"
309
- )
310
- manifest_filename: str | None = Field(
311
- default=None, description="Name of manifest file to use"
312
- )
313
- ignore_patterns: list[str] = Field(
314
- default_factory=list, description="Patterns to ignore"
315
- )
316
- ignore_filename: str = Field(
317
- default=".gitignore", description="Name of ignore file to read"
318
- )
319
- base_path: str | None = Field(default=None, description="Base path for storage")
320
- parent_id: UUID | None = Field(default=None, description="Parent directory ID")
321
- dataset_id: UUID | None = Field(default=None, description="Dataset ID to use")
322
-
323
- def with_parent(
324
- self, parent_id: UUID, dataset_id: UUID | None = None
325
- ) -> "DirectoryUploadConfig":
326
- """Create a new config with parent and dataset IDs set."""
327
- return self.model_copy(
328
- update={"parent_id": parent_id, "dataset_id": dataset_id or self.dataset_id}
329
- )
330
-
331
-
332
- # Forward reference resolution for DirectoryManifest
333
- DirectoryManifest.model_rebuild()
@@ -1,23 +0,0 @@
1
- futurehouse_client/__init__.py,sha256=PvFTkocA-hobsWoDEBEdrUgLIbuVbDs_0nvMdImJmHk,707
2
- futurehouse_client/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- futurehouse_client/version.py,sha256=ATqNPJGbdMp-ifig01HgY_N9rods-3s_pdrLTqHW_ro,723
4
- futurehouse_client/clients/__init__.py,sha256=-HXNj-XJ3LRO5XM6MZ709iPs29YpApss0Q2YYg1qMZw,280
5
- futurehouse_client/clients/data_storage_methods.py,sha256=Ovmi72vW3qqgVJcVFDkMA4tykppsRBvU8kCbjIMY1Sk,68514
6
- futurehouse_client/clients/job_client.py,sha256=D51_qTxya6g5Wfg_ZfJdP031TV_YDJeXkGMiYAJ1qRc,11962
7
- futurehouse_client/clients/rest_client.py,sha256=hNDSN8amTXSEkiXRtNYcFM942UdvryUsf1q0D_dAOW4,100332
8
- futurehouse_client/models/__init__.py,sha256=kQ4R7VEuRxO0IQEW_sk9CndBL7zzl8rUKI24ddyYLM0,647
9
- futurehouse_client/models/app.py,sha256=TGoAeENNPc5mSBkMHjh-Z8VIlnaUNcoWUJLxUhRIkEE,31868
10
- futurehouse_client/models/client.py,sha256=n4HD0KStKLm6Ek9nL9ylP-bkK10yzAaD1uIDF83Qp_A,1828
11
- futurehouse_client/models/data_storage_methods.py,sha256=ff3l87fT379BKhaHcI3aR8O7p2Vfm0M_qP9cpnhq2Bs,12095
12
- futurehouse_client/models/rest.py,sha256=ybelLsyTsKYud7DYUCF0sFF6u81bl8WmS_wWAnbX-0M,3382
13
- futurehouse_client/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- futurehouse_client/utils/auth.py,sha256=tgWELjKfg8eWme_qdcRmc8TjQN9DVZuHHaVXZNHLchk,2960
15
- futurehouse_client/utils/general.py,sha256=Gxy8JJ2g6nO-gphf_kHAlkowb0eP_DqD4MSF58IXExE,1592
16
- futurehouse_client/utils/module_utils.py,sha256=aFyd-X-pDARXz9GWpn8SSViUVYdSbuy9vSkrzcVIaGI,4955
17
- futurehouse_client/utils/monitoring.py,sha256=UjRlufe67kI3VxRHOd5fLtJmlCbVA2Wqwpd4uZhXkQM,8728
18
- futurehouse_client/utils/world_model_tools.py,sha256=cybA82xD_UzmKN0rdQfhbB8SH4v6cFw8mRr4gFfQF5U,2208
19
- futurehouse_client-0.3.20.dev411.dist-info/licenses/LICENSE,sha256=oQ9ZHjUi-_6GfP3gs14FlPb0OlGwE1QCCKFGnJ4LD2I,11341
20
- futurehouse_client-0.3.20.dev411.dist-info/METADATA,sha256=Ay0u2c1E3UVy7gRG6j7Ww-eWqYLVeO3l8KkLONb7k0Q,26988
21
- futurehouse_client-0.3.20.dev411.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
22
- futurehouse_client-0.3.20.dev411.dist-info/top_level.txt,sha256=TRuLUCt_qBnggdFHCX4O_BoCu1j2X43lKfIZC-ElwWY,19
23
- futurehouse_client-0.3.20.dev411.dist-info/RECORD,,