nexo-schemas 0.0.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. nexo/schemas/__init__.py +0 -0
  2. nexo/schemas/application.py +292 -0
  3. nexo/schemas/connection.py +134 -0
  4. nexo/schemas/data.py +27 -0
  5. nexo/schemas/document.py +237 -0
  6. nexo/schemas/error/__init__.py +476 -0
  7. nexo/schemas/error/constants.py +50 -0
  8. nexo/schemas/error/descriptor.py +354 -0
  9. nexo/schemas/error/enums.py +40 -0
  10. nexo/schemas/error/metadata.py +15 -0
  11. nexo/schemas/error/spec.py +312 -0
  12. nexo/schemas/exception/__init__.py +0 -0
  13. nexo/schemas/exception/exc.py +911 -0
  14. nexo/schemas/exception/factory.py +1928 -0
  15. nexo/schemas/exception/handlers.py +110 -0
  16. nexo/schemas/google.py +14 -0
  17. nexo/schemas/key/__init__.py +0 -0
  18. nexo/schemas/key/rsa.py +131 -0
  19. nexo/schemas/metadata.py +21 -0
  20. nexo/schemas/mixins/__init__.py +0 -0
  21. nexo/schemas/mixins/filter.py +140 -0
  22. nexo/schemas/mixins/general.py +65 -0
  23. nexo/schemas/mixins/hierarchy.py +19 -0
  24. nexo/schemas/mixins/identity.py +387 -0
  25. nexo/schemas/mixins/parameter.py +50 -0
  26. nexo/schemas/mixins/service.py +40 -0
  27. nexo/schemas/mixins/sort.py +111 -0
  28. nexo/schemas/mixins/timestamp.py +192 -0
  29. nexo/schemas/model.py +240 -0
  30. nexo/schemas/operation/__init__.py +0 -0
  31. nexo/schemas/operation/action/__init__.py +9 -0
  32. nexo/schemas/operation/action/base.py +14 -0
  33. nexo/schemas/operation/action/resource.py +371 -0
  34. nexo/schemas/operation/action/status.py +8 -0
  35. nexo/schemas/operation/action/system.py +6 -0
  36. nexo/schemas/operation/action/websocket.py +6 -0
  37. nexo/schemas/operation/base.py +289 -0
  38. nexo/schemas/operation/constants.py +18 -0
  39. nexo/schemas/operation/context.py +68 -0
  40. nexo/schemas/operation/dependency.py +26 -0
  41. nexo/schemas/operation/enums.py +168 -0
  42. nexo/schemas/operation/extractor.py +36 -0
  43. nexo/schemas/operation/mixins.py +53 -0
  44. nexo/schemas/operation/request.py +1066 -0
  45. nexo/schemas/operation/resource.py +839 -0
  46. nexo/schemas/operation/system.py +55 -0
  47. nexo/schemas/operation/websocket.py +55 -0
  48. nexo/schemas/pagination.py +67 -0
  49. nexo/schemas/parameter.py +60 -0
  50. nexo/schemas/payload.py +116 -0
  51. nexo/schemas/resource.py +64 -0
  52. nexo/schemas/response.py +1041 -0
  53. nexo/schemas/security/__init__.py +0 -0
  54. nexo/schemas/security/api_key.py +63 -0
  55. nexo/schemas/security/authentication.py +848 -0
  56. nexo/schemas/security/authorization.py +922 -0
  57. nexo/schemas/security/enums.py +32 -0
  58. nexo/schemas/security/impersonation.py +179 -0
  59. nexo/schemas/security/token.py +402 -0
  60. nexo/schemas/security/types.py +17 -0
  61. nexo/schemas/success/__init__.py +0 -0
  62. nexo/schemas/success/descriptor.py +100 -0
  63. nexo/schemas/success/enums.py +23 -0
  64. nexo/schemas/user_agent.py +46 -0
  65. nexo_schemas-0.0.16.dist-info/METADATA +87 -0
  66. nexo_schemas-0.0.16.dist-info/RECORD +69 -0
  67. nexo_schemas-0.0.16.dist-info/WHEEL +5 -0
  68. nexo_schemas-0.0.16.dist-info/licenses/LICENSE +21 -0
  69. nexo_schemas-0.0.16.dist-info/top_level.txt +1 -0
File without changes
@@ -0,0 +1,292 @@
1
+ import socket
2
+ from Crypto.PublicKey.RSA import RsaKey
3
+ from enum import StrEnum
4
+ from functools import cached_property
5
+ from pathlib import Path
6
+ from pydantic import BaseModel, Field, computed_field, model_validator
7
+ from pydantic_settings import BaseSettings, SettingsConfigDict
8
+ from typing import Annotated, ClassVar, Self, TypeVar, overload
9
+ from uuid import UUID
10
+ from nexo.enums.environment import Environment
11
+ from nexo.enums.system import SystemRole, ListOfSystemRoles
12
+ from nexo.enums.user import UserType
13
+ from nexo.types.boolean import OptBool
14
+ from nexo.types.dict import StrToAnyDict
15
+ from nexo.types.misc import BytesOrStr
16
+ from nexo.types.string import ListOfStrs, OptStr
17
+ from .mixins.identity import EntityIdentifier
18
+ from .operation.enums import ListOfOperationTypes
19
+ from .security.api_key import validate
20
+ from .security.authentication import SystemCredentials, SystemUser, SystemAuthentication
21
+ from .security.authorization import APIKeyAuthorization
22
+ from .security.enums import Domain
23
+ from .security.token import SystemToken
24
+
25
+
26
+ class Execution(StrEnum):
27
+ CONTAINER = "container"
28
+ DIRECT = "direct"
29
+
30
+ @classmethod
31
+ def choices(cls) -> ListOfStrs:
32
+ return [e.value for e in cls]
33
+
34
+
35
+ class ApplicationContext(BaseSettings):
36
+ model_config: ClassVar[SettingsConfigDict] = SettingsConfigDict(extra="ignore")
37
+
38
+ name: Annotated[str, Field(..., validation_alias="NAME")]
39
+ environment: Annotated[Environment, Field(..., validation_alias="ENVIRONMENT")]
40
+ service_key: Annotated[str, Field(..., validation_alias="SERVICE_KEY")]
41
+
42
+ @computed_field
43
+ @cached_property
44
+ def instance_id(self) -> str:
45
+ return socket.gethostname()
46
+
47
+ @classmethod
48
+ def new(cls) -> Self:
49
+ return cls() # type: ignore
50
+
51
+
52
+ OptApplicationContext = ApplicationContext | None
53
+
54
+
55
+ class ApplicationContextMixin(BaseModel):
56
+ application_context: ApplicationContext = Field(
57
+ default_factory=ApplicationContext.new,
58
+ description="Application's context",
59
+ )
60
+
61
+
62
+ class ApplicationSettings(BaseSettings):
63
+ model_config: ClassVar[SettingsConfigDict] = SettingsConfigDict(extra="ignore")
64
+
65
+ @computed_field
66
+ @cached_property
67
+ def instance_id(self) -> str:
68
+ return socket.gethostname()
69
+
70
+ # Application related settings
71
+ NAME: Annotated[str, Field(..., description="Application's name")]
72
+ # Service related settings
73
+ ENVIRONMENT: Annotated[Environment, Field(..., description="Environment")]
74
+ SERVICE_KEY: Annotated[str, Field(..., description="Service's key")]
75
+ SERVICE_NAME: Annotated[str, Field(..., description="Service's name")]
76
+
77
+ @cached_property
78
+ def context(self) -> ApplicationContext:
79
+ return ApplicationContext(
80
+ name=self.NAME, environment=self.ENVIRONMENT, service_key=self.SERVICE_KEY
81
+ )
82
+
83
+ CLIENT_ID: Annotated[UUID, Field(..., description="Client's ID")]
84
+ CLIENT_SECRET: Annotated[UUID, Field(..., description="Client's Secret")]
85
+
86
+ # Serving related settings
87
+ EXECUTION: Annotated[
88
+ Execution, Field(Execution.CONTAINER, description="Execution mode")
89
+ ] = Execution.CONTAINER
90
+ RELOAD: Annotated[OptBool, Field(None, description="Reload")] = None
91
+
92
+ @cached_property
93
+ def reload(self) -> bool:
94
+ if self.RELOAD is not None:
95
+ return self.RELOAD
96
+ else:
97
+ return (
98
+ self.EXECUTION is Execution.DIRECT
99
+ and self.ENVIRONMENT is Environment.LOCAL
100
+ )
101
+
102
+ HOST: Annotated[str, Field("127.0.0.1", description="Application's host")] = (
103
+ "127.0.0.1"
104
+ )
105
+ PORT: Annotated[int, Field(8000, description="Application's port")] = 8000
106
+ HOST_PORT: Annotated[int, Field(8000, description="Host's port")] = 8000
107
+
108
+ @cached_property
109
+ def port(self) -> int:
110
+ if self.EXECUTION is Execution.DIRECT and self.ENVIRONMENT is Environment.LOCAL:
111
+ return self.HOST_PORT
112
+ else:
113
+ return self.PORT
114
+
115
+ DOCKER_NETWORK: Annotated[str, Field(..., description="Docker's network")]
116
+
117
+ @model_validator(mode="before")
118
+ def define_default_docker_network(cls, data: StrToAnyDict):
119
+ if "DOCKER_NETWORK" not in data:
120
+ data["DOCKER_NETWORK"] = data["NAME"]
121
+ return data
122
+
123
+ ROOT_PATH: Annotated[str, Field("", description="Application's root path")] = ""
124
+
125
+ # Configuration related settings
126
+ USE_LOCAL_CONFIG: Annotated[
127
+ bool, Field(False, description="Whether to use locally stored config")
128
+ ] = False
129
+ CONFIG_PATH: Annotated[OptStr, Field(None, description="Config path")] = None
130
+
131
+ @model_validator(mode="after")
132
+ def validate_config_path(self) -> Self:
133
+ if self.USE_LOCAL_CONFIG:
134
+ if self.CONFIG_PATH is None:
135
+ self.CONFIG_PATH = f"/etc/{self.NAME}/config/{self.SERVICE_KEY}/{self.ENVIRONMENT}.yaml"
136
+ config_path = Path(self.CONFIG_PATH)
137
+ if not config_path.exists() or not config_path.is_file():
138
+ raise ValueError(
139
+ f"Config path '{self.CONFIG_PATH}' either did not exist or is not a file"
140
+ )
141
+
142
+ return self
143
+
144
+ # Credential related settings
145
+ GOOGLE_APPLICATION_CREDENTIALS: Annotated[
146
+ str,
147
+ Field(
148
+ ...,
149
+ description="Google application credential's file path",
150
+ ),
151
+ ]
152
+
153
+ @model_validator(mode="before")
154
+ def define_default_google_aplication_credentials(cls, data: StrToAnyDict):
155
+ if "GOOGLE_APPLICATION_CREDENTIALS" not in data:
156
+ name: str = data["NAME"]
157
+ credentials = f"/etc/{name}/credentials/google-service-account.json"
158
+ data["GOOGLE_APPLICATION_CREDENTIALS"] = credentials
159
+ return data
160
+
161
+ API_KEY: Annotated[str, Field(..., description="Maleo's API Key")]
162
+
163
+ @model_validator(mode="after")
164
+ def validate_api_key(self) -> Self:
165
+ validate(self.API_KEY, self.NAME, self.ENVIRONMENT)
166
+ return self
167
+
168
+ @cached_property
169
+ def authorization(self) -> APIKeyAuthorization:
170
+ return APIKeyAuthorization(credentials=self.API_KEY)
171
+
172
+ SA_ID: Annotated[int, Field(..., description="SA's ID", ge=1)]
173
+ SA_UUID: Annotated[UUID, Field(..., description="SA's UUID")]
174
+ SA_ROLES: Annotated[
175
+ ListOfSystemRoles,
176
+ Field([SystemRole.ADMINISTRATOR], description="SA's Roles", min_length=1),
177
+ ] = [SystemRole.ADMINISTRATOR]
178
+ SA_USERNAME: Annotated[str, Field(..., description="SA's Username")]
179
+ SA_EMAIL: Annotated[str, Field(..., description="SA's Email")]
180
+
181
+ @cached_property
182
+ def authentication(self) -> SystemAuthentication:
183
+ return SystemAuthentication(
184
+ credentials=SystemCredentials(
185
+ user=EntityIdentifier[UserType](
186
+ id=self.SA_ID, uuid=self.SA_UUID, type=UserType.SERVICE
187
+ ),
188
+ domain_roles=self.SA_ROLES,
189
+ scopes=["authenticated"]
190
+ + [f"{Domain.SYSTEM}:{role.value}" for role in self.SA_ROLES],
191
+ ),
192
+ user=SystemUser(display_name=self.SA_USERNAME, identity=self.SA_EMAIL),
193
+ )
194
+
195
+ @property
196
+ def token(self) -> SystemToken:
197
+ return SystemToken.new(sub=self.SA_UUID)
198
+
199
+ @overload
200
+ def generate_token_string(
201
+ self,
202
+ key: RsaKey,
203
+ ) -> str: ...
204
+ @overload
205
+ def generate_token_string(
206
+ self,
207
+ key: BytesOrStr,
208
+ *,
209
+ password: OptStr = None,
210
+ ) -> str: ...
211
+ def generate_token_string(
212
+ self,
213
+ key: BytesOrStr | RsaKey,
214
+ *,
215
+ password: OptStr = None,
216
+ ) -> str:
217
+ return self.token.to_string(key, password=password)
218
+
219
+ # Infra related settings
220
+ PUBLISH_HEARTBEAT: Annotated[
221
+ OptBool, Field(None, description="Whether to publish heartbeat")
222
+ ] = None
223
+
224
+ @cached_property
225
+ def publish_heartbeat(self) -> bool:
226
+ if self.PUBLISH_HEARTBEAT is not None:
227
+ return self.PUBLISH_HEARTBEAT
228
+ else:
229
+ return self.ENVIRONMENT in (Environment.STAGING, Environment.PRODUCTION)
230
+
231
+ PUBLISH_RESOURCE_MEASUREMENT: Annotated[
232
+ OptBool, Field(None, description="Whether to publish resource measurement")
233
+ ] = None
234
+
235
+ @cached_property
236
+ def publish_resource_measurement(self) -> bool:
237
+ if self.PUBLISH_RESOURCE_MEASUREMENT is not None:
238
+ return self.PUBLISH_RESOURCE_MEASUREMENT
239
+ else:
240
+ return self.ENVIRONMENT in (Environment.STAGING, Environment.PRODUCTION)
241
+
242
+ # Operation related settings
243
+ PUBLISHABLE_OPERATIONS: Annotated[
244
+ ListOfOperationTypes, Field([], description="Publishable operations")
245
+ ] = []
246
+
247
+ # Security related settings
248
+ USE_LOCAL_KEY: Annotated[
249
+ bool, Field(False, description="Whether to use locally stored key")
250
+ ] = False
251
+ PRIVATE_KEY_PASSWORD: Annotated[
252
+ OptStr, Field(None, description="Private key's password")
253
+ ] = None
254
+ PRIVATE_KEY_PATH: Annotated[str, Field(..., description="Private key's path")]
255
+
256
+ @model_validator(mode="before")
257
+ def define_default_private_key_path(cls, data: StrToAnyDict):
258
+ if "PRIVATE_KEY_PATH" not in data:
259
+ name: str = data["NAME"]
260
+ keys = f"/etc/{name}/keys/private.pem"
261
+ data["PRIVATE_KEY_PATH"] = keys
262
+ return data
263
+
264
+ PUBLIC_KEY_PATH: Annotated[str, Field(..., description="Public key's path")]
265
+
266
+ @model_validator(mode="before")
267
+ def define_default_public_key_path(cls, data: StrToAnyDict):
268
+ if "PUBLIC_KEY_PATH" not in data:
269
+ name: str = data["NAME"]
270
+ keys = f"/etc/{name}/keys/public.pem"
271
+ data["PUBLIC_KEY_PATH"] = keys
272
+ return data
273
+
274
+ @model_validator(mode="after")
275
+ def validate_keys_path(self) -> Self:
276
+ if self.USE_LOCAL_KEY:
277
+ private_key_path = Path(self.PRIVATE_KEY_PATH)
278
+ if not private_key_path.exists() or not private_key_path.is_file():
279
+ raise ValueError(
280
+ f"Private key path: '{self.PRIVATE_KEY_PATH}' either did not exist or is not a file"
281
+ )
282
+
283
+ public_key_path = Path(self.PUBLIC_KEY_PATH)
284
+ if not public_key_path.exists() or not public_key_path.is_file():
285
+ raise ValueError(
286
+ f"Public key path: '{self.PUBLIC_KEY_PATH}' either did not exist or is not a file"
287
+ )
288
+
289
+ return self
290
+
291
+
292
+ ApplicationSettingsT = TypeVar("ApplicationSettingsT", bound=ApplicationSettings)
@@ -0,0 +1,134 @@
1
+ from datetime import datetime, timezone
2
+ from pydantic import BaseModel, Field
3
+ from starlette.requests import HTTPConnection
4
+ from typing import Annotated, Callable, Generic, TypeVar
5
+ from uuid import UUID, uuid4
6
+ from nexo.enums.connection import Scheme, Header
7
+ from nexo.types.boolean import OptBool
8
+ from nexo.types.dict import OptStrToStrDict
9
+ from nexo.types.string import OptStr, OptListOfDoubleStrs
10
+ from .user_agent import UserAgent, OptUserAgent
11
+
12
+
13
+ class ConnectionContext(BaseModel):
14
+ id: Annotated[UUID, Field(uuid4(), description="Connection's ID")] = uuid4()
15
+
16
+ executed_at: Annotated[
17
+ datetime,
18
+ Field(datetime.now(tz=timezone.utc), description="Executed At Timestamp"),
19
+ ] = datetime.now(tz=timezone.utc)
20
+
21
+ scheme: Annotated[Scheme, Field(..., description="Connection's scheme (http/ws)")]
22
+
23
+ method: Annotated[
24
+ OptStr, Field(None, description="HTTP method (None for WebSocket)")
25
+ ] = None
26
+
27
+ url: Annotated[str, Field(..., description="Connection URL")]
28
+ ip_address: Annotated[str, Field("unknown", description="Client's IP address")] = (
29
+ "unknown"
30
+ )
31
+ is_internal: Annotated[
32
+ OptBool, Field(None, description="True if IP is internal")
33
+ ] = None
34
+
35
+ headers: Annotated[
36
+ OptListOfDoubleStrs, Field(None, description="Connection's headers")
37
+ ] = None
38
+
39
+ path_params: Annotated[
40
+ OptStrToStrDict, Field(None, description="Path parameters")
41
+ ] = None
42
+
43
+ query_params: Annotated[OptStr, Field(None, description="Query parameters")] = None
44
+
45
+ user_agent: Annotated[OptUserAgent, Field(None, description="User agent")] = None
46
+
47
+ referer: Annotated[OptStr, Field(None, description="Referrer URL")] = None
48
+
49
+ origin: Annotated[OptStr, Field(None, description="Origin header")] = None
50
+
51
+ host: Annotated[OptStr, Field(None, description="Host header")] = None
52
+
53
+ forwarded_proto: Annotated[
54
+ OptStr, Field(None, description="Forwarded protocol")
55
+ ] = None
56
+
57
+ language: Annotated[OptStr, Field(None, description="Accepted languages")] = None
58
+
59
+ @classmethod
60
+ def extract_client_ip(cls, conn: HTTPConnection) -> str:
61
+ x_forwarded_for = conn.headers.get(Header.X_FORWARDED_FOR)
62
+ if x_forwarded_for:
63
+ ips = [ip.strip() for ip in x_forwarded_for.split(",")]
64
+ return ips[0]
65
+
66
+ x_real_ip = conn.headers.get(Header.X_REAL_IP)
67
+ if x_real_ip:
68
+ return x_real_ip
69
+
70
+ return conn.client.host if conn.client else "unknown"
71
+
72
+ @classmethod
73
+ def from_connection(cls, conn: HTTPConnection) -> "ConnectionContext":
74
+ id = getattr(conn.state, "connection_id", None)
75
+ if not id or not isinstance(id, UUID):
76
+ id = uuid4()
77
+ conn.state.connection_id = id
78
+
79
+ executed_at = getattr(conn.state, "executed_at", None)
80
+ if not executed_at or not isinstance(executed_at, datetime):
81
+ executed_at = datetime.now(tz=timezone.utc)
82
+ conn.state.executed_at = executed_at
83
+
84
+ ip_address = cls.extract_client_ip(conn)
85
+
86
+ ua_string = conn.headers.get(Header.USER_AGENT, "")
87
+ user_agent = (
88
+ UserAgent.from_string(user_agent_string=ua_string) if ua_string else None
89
+ )
90
+
91
+ return cls(
92
+ id=id,
93
+ executed_at=executed_at,
94
+ scheme=Scheme(conn.url.scheme),
95
+ method=getattr(conn, "method", None), # WebSocket doesn’t have method
96
+ url=str(conn.url),
97
+ ip_address=ip_address,
98
+ is_internal=(
99
+ None
100
+ if ip_address == "unknown"
101
+ else (
102
+ ip_address.startswith("10.")
103
+ or ip_address.startswith("192.168.")
104
+ or ip_address.startswith("172.")
105
+ or ip_address.startswith("127.")
106
+ )
107
+ ),
108
+ headers=conn.headers.items(),
109
+ path_params=conn.path_params,
110
+ query_params=(None if not conn.query_params else str(conn.query_params)),
111
+ user_agent=user_agent,
112
+ referer=conn.headers.get(Header.REFERER),
113
+ origin=conn.headers.get(Header.ORIGIN),
114
+ host=conn.headers.get(Header.HOST),
115
+ forwarded_proto=conn.headers.get(Header.X_FORWARDED_PROTO),
116
+ language=conn.headers.get(Header.ACCEPT_LANGUAGE),
117
+ )
118
+
119
+ @classmethod
120
+ def as_dependency(cls) -> Callable[[HTTPConnection], "ConnectionContext"]:
121
+ def dependency(conn: HTTPConnection) -> "ConnectionContext":
122
+ return cls.from_connection(conn)
123
+
124
+ return dependency
125
+
126
+
127
+ OptConnectionContext = ConnectionContext | None
128
+ OptConnectionContextT = TypeVar("OptConnectionContextT", bound=OptConnectionContext)
129
+
130
+
131
+ class ConnectionContextMixin(BaseModel, Generic[OptConnectionContextT]):
132
+ connection_context: OptConnectionContextT = Field(
133
+ ..., description="Connection context"
134
+ )
nexo/schemas/data.py ADDED
@@ -0,0 +1,27 @@
1
+ from pydantic import BaseModel, Field
2
+ from typing import Annotated, Generic, Literal, TypeVar
3
+
4
+
5
+ OldDataT = TypeVar("OldDataT")
6
+ NewDataT = TypeVar("NewDataT")
7
+
8
+
9
+ class DataPair(BaseModel, Generic[OldDataT, NewDataT]):
10
+ old: OldDataT = Field(..., description="Old data")
11
+ new: NewDataT = Field(..., description="New data")
12
+
13
+
14
+ AnyDataT = TypeVar("AnyDataT")
15
+
16
+
17
+ class DataMixin(BaseModel, Generic[AnyDataT]):
18
+ data: AnyDataT = Field(..., description="Data")
19
+
20
+
21
+ ModelDataT = TypeVar("ModelDataT", bound=BaseModel)
22
+
23
+
24
+ class PingData(BaseModel):
25
+ ping: Annotated[Literal["pong"], Field("pong", description="Ping response")] = (
26
+ "pong"
27
+ )
@@ -0,0 +1,237 @@
1
+ import csv
2
+ from io import StringIO
3
+ from fastapi import UploadFile
4
+ from pydantic import BaseModel, Field, PrivateAttr, computed_field, model_validator
5
+ from typing import Annotated, ClassVar, Generic, Self, TypeVar
6
+ from nexo.types.integer import OptInt
7
+ from nexo.types.string import OptStr, OptStrT, SeqOfStrs
8
+ from .error.enums import ErrorCode
9
+
10
+
11
+ class Document(BaseModel):
12
+ _raw: UploadFile | None = PrivateAttr(None)
13
+ content: Annotated[bytes, Field(..., description="Content", exclude=True)]
14
+ content_type: Annotated[str, Field(..., description="Content Type")]
15
+ filename: Annotated[str, Field(..., description="Filename")]
16
+ size: Annotated[int, Field(..., description="Size", gt=0)]
17
+
18
+ @classmethod
19
+ async def from_file(
20
+ cls,
21
+ file: UploadFile,
22
+ *,
23
+ max_size: OptInt = None,
24
+ valid_content_types: SeqOfStrs | str | None = None,
25
+ valid_extensions: SeqOfStrs | str | None = None,
26
+ ) -> Self:
27
+ content = await file.read()
28
+ if not content:
29
+ raise ValueError(ErrorCode.BAD_REQUEST, "Content can not be empty")
30
+
31
+ size = file.size
32
+ if size is None or size <= 0:
33
+ raise ValueError(
34
+ ErrorCode.BAD_REQUEST, "Size can not be None and must be larger than 0"
35
+ )
36
+ if max_size is not None:
37
+ if size > max_size:
38
+ raise ValueError(
39
+ ErrorCode.BAD_REQUEST,
40
+ f"Size of {size} exceeds set maximum of {max_size}",
41
+ )
42
+
43
+ content_type = file.content_type
44
+ if content_type is None:
45
+ raise ValueError("Content type can not be None")
46
+ if valid_content_types is not None:
47
+ if isinstance(valid_content_types, str):
48
+ if content_type != valid_content_types:
49
+ raise ValueError(
50
+ ErrorCode.BAD_REQUEST,
51
+ f"Invalid content type of '{content_type}'. Must be '{valid_content_types}'",
52
+ )
53
+ else:
54
+ if content_type not in valid_content_types:
55
+ raise ValueError(
56
+ ErrorCode.BAD_REQUEST,
57
+ f"Invalid content type of '{content_type}'. Must be one of {valid_content_types}",
58
+ )
59
+
60
+ filename = file.filename
61
+ if filename is None:
62
+ raise ValueError("Filename can not be None")
63
+ if valid_extensions is not None:
64
+ if isinstance(valid_extensions, str):
65
+ if not filename.endswith(valid_extensions):
66
+ raise ValueError(
67
+ ErrorCode.BAD_REQUEST,
68
+ f"Invalid extension. Must be '{valid_extensions}'",
69
+ )
70
+ else:
71
+ if not any(filename.endswith(ext) for ext in valid_extensions):
72
+ raise ValueError(
73
+ ErrorCode.BAD_REQUEST,
74
+ f"Invalid extension. Must be one of {valid_extensions}",
75
+ )
76
+
77
+ filename = filename.replace(" ", "_")
78
+
79
+ document = cls(
80
+ content=content, content_type=content_type, filename=filename, size=size
81
+ )
82
+ document._raw = file
83
+
84
+ return document
85
+
86
+
87
+ class CSVDocument(Document):
88
+ _content_type: ClassVar[str] = "text/csv"
89
+
90
+ def _validate_content_type(self):
91
+ if self.content_type != self._content_type:
92
+ raise TypeError(
93
+ ErrorCode.BAD_REQUEST,
94
+ f"CSV Document content type must be {self._content_type}",
95
+ )
96
+
97
+ @model_validator(mode="after")
98
+ def validate_content_type(self) -> Self:
99
+ self._validate_content_type()
100
+ return self
101
+
102
+ @classmethod
103
+ def from_document(cls, document: Document) -> Self:
104
+ csv_document = cls(
105
+ content=document.content,
106
+ content_type=document.content_type,
107
+ filename=document.filename,
108
+ size=document.size,
109
+ )
110
+ csv_document._raw = document._raw
111
+ return csv_document
112
+
113
+ @classmethod
114
+ async def from_file(
115
+ cls,
116
+ file: UploadFile,
117
+ *,
118
+ max_size: OptInt = None,
119
+ valid_content_types: SeqOfStrs | str | None = "text/csv",
120
+ valid_extensions: SeqOfStrs | str | None = ".csv",
121
+ ) -> Self:
122
+ return await super().from_file(
123
+ file,
124
+ max_size=max_size,
125
+ valid_content_types=valid_content_types,
126
+ valid_extensions=valid_extensions,
127
+ )
128
+
129
+ def _validate_no_duplicate_rows(self, rows: list[dict[str, OptStr]]) -> None:
130
+ seen = set()
131
+ for index, row in enumerate(rows):
132
+ row_tuple = tuple(sorted(row.items()))
133
+ if row_tuple in seen:
134
+ raise ValueError(
135
+ ErrorCode.BAD_REQUEST,
136
+ f"Duplicate row found at index {index}: {row}",
137
+ )
138
+ seen.add(row_tuple)
139
+
140
+ @computed_field
141
+ @property
142
+ def rows(self) -> list[dict[str, OptStr]]:
143
+ self._validate_content_type()
144
+ text = self.content.decode(encoding="utf-8-sig")
145
+ reader = csv.DictReader(StringIO(text), skipinitialspace=True)
146
+ raw_rows = list(reader)
147
+ new_rows: list[dict[str, OptStr]] = []
148
+
149
+ for row in raw_rows:
150
+ cleaned: dict[str, OptStr] = {}
151
+ for key, value in row.items():
152
+ cleaned[key] = value.strip() if value else None
153
+ new_rows.append(cleaned)
154
+
155
+ # Run duplicate validation here
156
+ self._validate_no_duplicate_rows(new_rows)
157
+
158
+ return new_rows
159
+
160
+
161
+ class PDFDocument(Document):
162
+ _content_type: ClassVar[str] = "application/pdf"
163
+
164
+ def _validate_content_type(self):
165
+ if self.content_type != self._content_type:
166
+ raise TypeError(f"PDF Document content type must be {self._content_type}")
167
+
168
+ @model_validator(mode="after")
169
+ def validate_content_type(self) -> Self:
170
+ self._validate_content_type()
171
+ return self
172
+
173
+ @classmethod
174
+ def from_document(cls, document: Document) -> Self:
175
+ pdf_document = cls(
176
+ content=document.content,
177
+ content_type=document.content_type,
178
+ filename=document.filename,
179
+ size=document.size,
180
+ )
181
+ pdf_document._raw = document._raw
182
+ return pdf_document
183
+
184
+ @classmethod
185
+ async def from_file(
186
+ cls,
187
+ file: UploadFile,
188
+ *,
189
+ max_size: OptInt = None,
190
+ valid_content_types: SeqOfStrs | str | None = "application/pdf",
191
+ valid_extensions: SeqOfStrs | str | None = ".pdf",
192
+ ) -> Self:
193
+ return await super().from_file(
194
+ file,
195
+ max_size=max_size,
196
+ valid_content_types=valid_content_types,
197
+ valid_extensions=valid_extensions,
198
+ )
199
+
200
+
201
+ DocumentT = TypeVar("DocumentT", bound=Document)
202
+ OptDocument = Document | None
203
+ OptDocumentT = TypeVar("OptDocumentT", bound=OptDocument)
204
+ ListOfDocuments = list[Document]
205
+ ListOfDocumentsT = TypeVar("ListOfDocumentsT", bound=ListOfDocuments)
206
+ OptListOfDocuments = ListOfDocuments | None
207
+ OptListOfDocumentsT = TypeVar("OptListOfDocumentsT", bound=OptListOfDocuments)
208
+
209
+
210
+ class DocumentMixin(BaseModel, Generic[OptDocumentT]):
211
+ document: Annotated[OptDocumentT, Field(..., description="Document")]
212
+
213
+
214
+ class HeterogenousDocumentsMixin(BaseModel, Generic[OptListOfDocumentsT]):
215
+ documents: Annotated[
216
+ OptListOfDocumentsT, Field(..., description="Documents", min_length=1)
217
+ ]
218
+
219
+
220
+ class HomogenousDocumentsMixin(BaseModel, Generic[DocumentT]):
221
+ documents: Annotated[
222
+ list[DocumentT], Field(..., description="Documents", min_length=1)
223
+ ]
224
+
225
+
226
+ class OptHomogenousDocumentsMixin(BaseModel, Generic[DocumentT]):
227
+ documents: Annotated[
228
+ list[DocumentT] | None, Field(None, description="Documents", min_length=1)
229
+ ] = None
230
+
231
+
232
+ class DocumentName(BaseModel, Generic[OptStrT]):
233
+ document_name: Annotated[OptStrT, Field(..., description="Document's name")]
234
+
235
+
236
+ class DocumentURL(BaseModel, Generic[OptStrT]):
237
+ document_url: Annotated[OptStrT, Field(..., description="Document's URL")]