prefect-client 3.1.9__py3-none-any.whl → 3.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. prefect/_experimental/lineage.py +7 -8
  2. prefect/_internal/_logging.py +15 -3
  3. prefect/_internal/compatibility/async_dispatch.py +22 -16
  4. prefect/_internal/compatibility/deprecated.py +42 -18
  5. prefect/_internal/compatibility/migration.py +2 -2
  6. prefect/_internal/concurrency/inspection.py +12 -14
  7. prefect/_internal/concurrency/primitives.py +2 -2
  8. prefect/_internal/concurrency/services.py +154 -80
  9. prefect/_internal/concurrency/waiters.py +13 -9
  10. prefect/_internal/pydantic/annotations/pendulum.py +7 -7
  11. prefect/_internal/pytz.py +4 -3
  12. prefect/_internal/retries.py +10 -5
  13. prefect/_internal/schemas/bases.py +19 -10
  14. prefect/_internal/schemas/validators.py +227 -388
  15. prefect/_version.py +3 -3
  16. prefect/artifacts.py +61 -74
  17. prefect/automations.py +27 -7
  18. prefect/blocks/core.py +3 -3
  19. prefect/client/{orchestration.py → orchestration/__init__.py} +38 -701
  20. prefect/client/orchestration/_artifacts/__init__.py +0 -0
  21. prefect/client/orchestration/_artifacts/client.py +239 -0
  22. prefect/client/orchestration/_concurrency_limits/__init__.py +0 -0
  23. prefect/client/orchestration/_concurrency_limits/client.py +762 -0
  24. prefect/client/orchestration/_logs/__init__.py +0 -0
  25. prefect/client/orchestration/_logs/client.py +95 -0
  26. prefect/client/orchestration/_variables/__init__.py +0 -0
  27. prefect/client/orchestration/_variables/client.py +157 -0
  28. prefect/client/orchestration/base.py +46 -0
  29. prefect/client/orchestration/routes.py +145 -0
  30. prefect/client/schemas/actions.py +2 -2
  31. prefect/client/schemas/filters.py +5 -0
  32. prefect/client/schemas/objects.py +3 -10
  33. prefect/client/schemas/schedules.py +22 -10
  34. prefect/concurrency/_asyncio.py +87 -0
  35. prefect/concurrency/{events.py → _events.py} +10 -10
  36. prefect/concurrency/asyncio.py +20 -104
  37. prefect/concurrency/context.py +6 -4
  38. prefect/concurrency/services.py +26 -74
  39. prefect/concurrency/sync.py +23 -44
  40. prefect/concurrency/v1/_asyncio.py +63 -0
  41. prefect/concurrency/v1/{events.py → _events.py} +13 -15
  42. prefect/concurrency/v1/asyncio.py +27 -80
  43. prefect/concurrency/v1/context.py +6 -4
  44. prefect/concurrency/v1/services.py +33 -79
  45. prefect/concurrency/v1/sync.py +18 -37
  46. prefect/context.py +66 -70
  47. prefect/deployments/base.py +4 -144
  48. prefect/deployments/flow_runs.py +12 -2
  49. prefect/deployments/runner.py +11 -3
  50. prefect/deployments/steps/pull.py +13 -0
  51. prefect/events/clients.py +7 -1
  52. prefect/events/schemas/events.py +3 -2
  53. prefect/flow_engine.py +54 -47
  54. prefect/flows.py +2 -1
  55. prefect/futures.py +42 -27
  56. prefect/input/run_input.py +2 -1
  57. prefect/locking/filesystem.py +8 -7
  58. prefect/locking/memory.py +5 -3
  59. prefect/locking/protocol.py +1 -1
  60. prefect/main.py +1 -3
  61. prefect/plugins.py +12 -10
  62. prefect/results.py +3 -308
  63. prefect/runner/storage.py +87 -21
  64. prefect/serializers.py +32 -25
  65. prefect/settings/legacy.py +4 -4
  66. prefect/settings/models/api.py +3 -3
  67. prefect/settings/models/cli.py +3 -3
  68. prefect/settings/models/client.py +5 -3
  69. prefect/settings/models/cloud.py +3 -3
  70. prefect/settings/models/deployments.py +3 -3
  71. prefect/settings/models/experiments.py +4 -2
  72. prefect/settings/models/flows.py +3 -3
  73. prefect/settings/models/internal.py +4 -2
  74. prefect/settings/models/logging.py +4 -3
  75. prefect/settings/models/results.py +3 -3
  76. prefect/settings/models/root.py +3 -2
  77. prefect/settings/models/runner.py +4 -4
  78. prefect/settings/models/server/api.py +3 -3
  79. prefect/settings/models/server/database.py +11 -4
  80. prefect/settings/models/server/deployments.py +6 -2
  81. prefect/settings/models/server/ephemeral.py +4 -2
  82. prefect/settings/models/server/events.py +3 -2
  83. prefect/settings/models/server/flow_run_graph.py +6 -2
  84. prefect/settings/models/server/root.py +3 -3
  85. prefect/settings/models/server/services.py +26 -11
  86. prefect/settings/models/server/tasks.py +6 -3
  87. prefect/settings/models/server/ui.py +3 -3
  88. prefect/settings/models/tasks.py +5 -5
  89. prefect/settings/models/testing.py +3 -3
  90. prefect/settings/models/worker.py +5 -3
  91. prefect/settings/profiles.py +15 -2
  92. prefect/states.py +4 -7
  93. prefect/task_engine.py +54 -75
  94. prefect/tasks.py +84 -32
  95. prefect/telemetry/processors.py +6 -6
  96. prefect/telemetry/run_telemetry.py +13 -8
  97. prefect/telemetry/services.py +32 -31
  98. prefect/transactions.py +4 -15
  99. prefect/utilities/_git.py +34 -0
  100. prefect/utilities/asyncutils.py +1 -1
  101. prefect/utilities/engine.py +3 -19
  102. prefect/utilities/generics.py +18 -0
  103. prefect/workers/__init__.py +2 -0
  104. {prefect_client-3.1.9.dist-info → prefect_client-3.1.11.dist-info}/METADATA +1 -1
  105. {prefect_client-3.1.9.dist-info → prefect_client-3.1.11.dist-info}/RECORD +108 -99
  106. prefect/records/__init__.py +0 -1
  107. prefect/records/base.py +0 -235
  108. prefect/records/filesystem.py +0 -213
  109. prefect/records/memory.py +0 -184
  110. prefect/records/result_store.py +0 -70
  111. {prefect_client-3.1.9.dist-info → prefect_client-3.1.11.dist-info}/LICENSE +0 -0
  112. {prefect_client-3.1.9.dist-info → prefect_client-3.1.11.dist-info}/WHEEL +0 -0
  113. {prefect_client-3.1.9.dist-info → prefect_client-3.1.11.dist-info}/top_level.txt +0 -0
prefect/results.py CHANGED
@@ -1,4 +1,3 @@
1
- import abc
2
1
  import inspect
3
2
  import os
4
3
  import socket
@@ -11,11 +10,11 @@ from typing import (
11
10
  Annotated,
12
11
  Any,
13
12
  Callable,
13
+ ClassVar,
14
14
  Dict,
15
15
  Generic,
16
16
  Optional,
17
17
  Tuple,
18
- Type,
19
18
  TypeVar,
20
19
  Union,
21
20
  )
@@ -28,13 +27,10 @@ from pydantic import (
28
27
  ConfigDict,
29
28
  Discriminator,
30
29
  Field,
31
- PrivateAttr,
32
30
  Tag,
33
31
  ValidationError,
34
- model_serializer,
35
32
  model_validator,
36
33
  )
37
- from pydantic_core import PydanticUndefinedType
38
34
  from typing_extensions import ParamSpec, Self
39
35
 
40
36
  import prefect
@@ -42,10 +38,7 @@ from prefect._experimental.lineage import (
42
38
  emit_result_read_event,
43
39
  emit_result_write_event,
44
40
  )
45
- from prefect._internal.compatibility import deprecated
46
- from prefect._internal.compatibility.deprecated import deprecated_field
47
41
  from prefect.blocks.core import Block
48
- from prefect.client.utilities import inject_client
49
42
  from prefect.exceptions import (
50
43
  ConfigurationError,
51
44
  MissingContextError,
@@ -63,11 +56,9 @@ from prefect.settings.context import get_current_settings
63
56
  from prefect.types import DateTime
64
57
  from prefect.utilities.annotations import NotSet
65
58
  from prefect.utilities.asyncutils import sync_compatible
66
- from prefect.utilities.pydantic import get_dispatch_key, lookup_type, register_base_type
67
59
 
68
60
  if TYPE_CHECKING:
69
61
  from prefect import Flow, Task
70
- from prefect.client.orchestration import PrefectClient
71
62
  from prefect.transactions import IsolationLevel
72
63
 
73
64
 
@@ -245,7 +236,7 @@ async def _call_explicitly_async_block_method(
245
236
  see https://github.com/PrefectHQ/prefect/issues/15008
246
237
  """
247
238
  if hasattr(block, f"a{method}"): # explicit async method
248
- return await getattr(block.__class__.__name__, f"a{method}")(*args, **kwargs)
239
+ return await getattr(block, f"a{method}")(*args, **kwargs)
249
240
  elif hasattr(getattr(block, method, None), "aio"): # sync_compatible
250
241
  return await getattr(block, method).aio(block, *args, **kwargs)
251
242
  else: # should not happen in prefect, but users can override impls
@@ -276,13 +267,6 @@ def result_storage_discriminator(x: Any) -> str:
276
267
  return "None"
277
268
 
278
269
 
279
- @deprecated_field(
280
- "persist_result",
281
- when=lambda x: x is not None,
282
- when_message="use the `should_persist_result` utility function instead",
283
- start_date="Sep 2024",
284
- end_date="Nov 2024",
285
- )
286
270
  class ResultStore(BaseModel):
287
271
  """
288
272
  Manages the storage and retrieval of results.
@@ -294,13 +278,12 @@ class ResultStore(BaseModel):
294
278
  the metadata will be stored alongside the results.
295
279
  lock_manager: The lock manager to use for locking result records. If not provided,
296
280
  the store cannot be used in transactions with the SERIALIZABLE isolation level.
297
- persist_result: Whether to persist results.
298
281
  cache_result_in_memory: Whether to cache results in memory.
299
282
  serializer: The serializer to use for results.
300
283
  storage_key_fn: The function to generate storage keys.
301
284
  """
302
285
 
303
- model_config = ConfigDict(arbitrary_types_allowed=True)
286
+ model_config: ClassVar[ConfigDict] = ConfigDict(arbitrary_types_allowed=True)
304
287
 
305
288
  result_storage: Optional[WritableFileSystem] = Field(default=None)
306
289
  metadata_storage: Annotated[
@@ -317,9 +300,6 @@ class ResultStore(BaseModel):
317
300
  storage_key_fn: Callable[[], str] = Field(default=DEFAULT_STORAGE_KEY_FN)
318
301
  cache: LRUCache[str, "ResultRecord[Any]"] = Field(default_factory=default_cache)
319
302
 
320
- # Deprecated fields
321
- persist_result: Optional[bool] = Field(default=None)
322
-
323
303
  @property
324
304
  def result_storage_block_id(self) -> Optional[UUID]:
325
305
  if self.result_storage is None:
@@ -903,52 +883,6 @@ class ResultStore(BaseModel):
903
883
  )
904
884
  return await self.lock_manager.await_for_lock(key, timeout)
905
885
 
906
- @deprecated.deprecated_callable(
907
- start_date="Sep 2024",
908
- end_date="Nov 2024",
909
- help="Use `create_result_record` instead.",
910
- )
911
- @sync_compatible
912
- async def create_result(
913
- self,
914
- obj: R,
915
- key: Optional[str] = None,
916
- expiration: Optional[DateTime] = None,
917
- ) -> Union[R, "BaseResult[R]"]:
918
- """
919
- Create a `PersistedResult` for the given object.
920
- """
921
- # Null objects are "cached" in memory at no cost
922
- should_cache_object = self.cache_result_in_memory or obj is None
923
- should_persist_result = (
924
- self.persist_result
925
- if self.persist_result is not None
926
- else not should_cache_object
927
- )
928
-
929
- if key:
930
-
931
- def key_fn():
932
- return key
933
-
934
- storage_key_fn = key_fn
935
- else:
936
- storage_key_fn = self.storage_key_fn
937
-
938
- if self.result_storage is None:
939
- self.result_storage = await get_default_result_storage()
940
-
941
- return await PersistedResult.create(
942
- obj,
943
- storage_block=self.result_storage,
944
- storage_block_id=self.result_storage_block_id,
945
- storage_key_fn=storage_key_fn,
946
- serializer=self.serializer,
947
- cache_object=should_cache_object,
948
- expiration=expiration,
949
- serialize_to_none=not should_persist_result,
950
- )
951
-
952
886
  # TODO: These two methods need to find a new home
953
887
 
954
888
  @sync_compatible
@@ -1221,242 +1155,3 @@ class ResultRecord(BaseModel, Generic[R]):
1221
1155
  if not isinstance(other, ResultRecord):
1222
1156
  return False
1223
1157
  return self.metadata == other.metadata and self.result == other.result
1224
-
1225
-
1226
- @deprecated.deprecated_class(
1227
- start_date="Sep 2024", end_date="Nov 2024", help="Use `ResultRecord` instead."
1228
- )
1229
- @register_base_type
1230
- class BaseResult(BaseModel, abc.ABC, Generic[R]):
1231
- model_config = ConfigDict(extra="forbid")
1232
- type: str
1233
-
1234
- def __init__(self, **data: Any) -> None:
1235
- type_string = (
1236
- get_dispatch_key(self) if type(self) is not BaseResult else "__base__"
1237
- )
1238
- data.setdefault("type", type_string)
1239
- super().__init__(**data)
1240
-
1241
- def __new__(cls: Type[Self], **kwargs) -> Self:
1242
- if "type" in kwargs:
1243
- try:
1244
- subcls = lookup_type(cls, dispatch_key=kwargs["type"])
1245
- except KeyError as exc:
1246
- raise ValueError(f"Invalid type: {kwargs['type']}") from exc
1247
- return super().__new__(subcls)
1248
- else:
1249
- return super().__new__(cls)
1250
-
1251
- _cache: Any = PrivateAttr(NotSet)
1252
-
1253
- def _cache_object(self, obj: Any) -> None:
1254
- self._cache = obj
1255
-
1256
- def has_cached_object(self) -> bool:
1257
- return self._cache is not NotSet
1258
-
1259
- @abc.abstractmethod
1260
- @sync_compatible
1261
- async def get(self) -> R:
1262
- ...
1263
-
1264
- @abc.abstractclassmethod
1265
- @sync_compatible
1266
- async def create(
1267
- cls: "Type[BaseResult[R]]",
1268
- obj: R,
1269
- **kwargs: Any,
1270
- ) -> "BaseResult[R]":
1271
- ...
1272
-
1273
- @classmethod
1274
- def __dispatch_key__(cls, **kwargs):
1275
- default = cls.model_fields.get("type").get_default()
1276
- return cls.__name__ if isinstance(default, PydanticUndefinedType) else default
1277
-
1278
-
1279
- @deprecated.deprecated_class(
1280
- start_date="Sep 2024", end_date="Nov 2024", help="Use `ResultRecord` instead."
1281
- )
1282
- class PersistedResult(BaseResult):
1283
- """
1284
- Result type which stores a reference to a persisted result.
1285
-
1286
- When created, the user's object is serialized and stored. The format for the content
1287
- is defined by `ResultRecord`. This reference contains metadata necessary for retrieval
1288
- of the object, such as a reference to the storage block and the key where the
1289
- content was written.
1290
- """
1291
-
1292
- type: str = "reference"
1293
-
1294
- serializer_type: str
1295
- storage_key: str
1296
- storage_block_id: Optional[uuid.UUID] = None
1297
- expiration: Optional[DateTime] = None
1298
- serialize_to_none: bool = False
1299
-
1300
- _persisted: bool = PrivateAttr(default=False)
1301
- _should_cache_object: bool = PrivateAttr(default=True)
1302
- _storage_block: WritableFileSystem = PrivateAttr(default=None)
1303
- _serializer: Serializer = PrivateAttr(default=None)
1304
-
1305
- @model_serializer(mode="wrap")
1306
- def serialize_model(self, handler, info):
1307
- if self.serialize_to_none:
1308
- return None
1309
- return handler(self, info)
1310
-
1311
- def _cache_object(
1312
- self,
1313
- obj: Any,
1314
- storage_block: WritableFileSystem = None,
1315
- serializer: Serializer = None,
1316
- ) -> None:
1317
- self._cache = obj
1318
- self._storage_block = storage_block
1319
- self._serializer = serializer
1320
-
1321
- @inject_client
1322
- async def _get_storage_block(self, client: "PrefectClient") -> WritableFileSystem:
1323
- if self._storage_block is not None:
1324
- return self._storage_block
1325
- elif self.storage_block_id is not None:
1326
- block_document = await client.read_block_document(self.storage_block_id)
1327
- self._storage_block = Block._from_block_document(block_document)
1328
- else:
1329
- self._storage_block = await get_default_result_storage()
1330
- return self._storage_block
1331
-
1332
- @sync_compatible
1333
- @inject_client
1334
- async def get(
1335
- self, ignore_cache: bool = False, client: "PrefectClient" = None
1336
- ) -> R:
1337
- """
1338
- Retrieve the data and deserialize it into the original object.
1339
- """
1340
- if self.has_cached_object() and not ignore_cache:
1341
- return self._cache
1342
-
1343
- result_store_kwargs = {}
1344
- if self._serializer:
1345
- result_store_kwargs["serializer"] = resolve_serializer(self._serializer)
1346
- storage_block = await self._get_storage_block(client=client)
1347
- result_store = ResultStore(result_storage=storage_block, **result_store_kwargs)
1348
-
1349
- record = await result_store.aread(self.storage_key)
1350
- self.expiration = record.expiration
1351
-
1352
- if self._should_cache_object:
1353
- self._cache_object(record.result)
1354
-
1355
- return record.result
1356
-
1357
- @staticmethod
1358
- def _infer_path(storage_block, key) -> str:
1359
- """
1360
- Attempts to infer a path associated with a storage block key, this method will
1361
- defer to the block in the future
1362
- """
1363
-
1364
- if hasattr(storage_block, "_resolve_path"):
1365
- return storage_block._resolve_path(key)
1366
- if hasattr(storage_block, "_remote_file_system"):
1367
- return storage_block._remote_file_system._resolve_path(key)
1368
-
1369
- @sync_compatible
1370
- @inject_client
1371
- async def write(self, obj: R = NotSet, client: "PrefectClient" = None) -> None:
1372
- """
1373
- Write the result to the storage block.
1374
- """
1375
- if self._persisted or self.serialize_to_none:
1376
- # don't double write or overwrite
1377
- return
1378
-
1379
- # load objects from a cache
1380
-
1381
- # first the object itself
1382
- if obj is NotSet and not self.has_cached_object():
1383
- raise ValueError("Cannot write a result that has no object cached.")
1384
- obj = obj if obj is not NotSet else self._cache
1385
-
1386
- # next, the storage block
1387
- storage_block = await self._get_storage_block(client=client)
1388
-
1389
- # finally, the serializer
1390
- serializer = self._serializer
1391
- if serializer is None:
1392
- # this could error if the serializer requires kwargs
1393
- serializer = Serializer(type=self.serializer_type)
1394
-
1395
- result_store = ResultStore(
1396
- result_storage=storage_block,
1397
- serializer=serializer,
1398
- )
1399
- await result_store.awrite(
1400
- obj=obj, key=self.storage_key, expiration=self.expiration
1401
- )
1402
-
1403
- self._persisted = True
1404
-
1405
- if not self._should_cache_object:
1406
- self._cache = NotSet
1407
-
1408
- @classmethod
1409
- @sync_compatible
1410
- async def create(
1411
- cls: "Type[PersistedResult]",
1412
- obj: R,
1413
- storage_block: WritableFileSystem,
1414
- storage_key_fn: Callable[[], str],
1415
- serializer: Serializer,
1416
- storage_block_id: Optional[uuid.UUID] = None,
1417
- cache_object: bool = True,
1418
- expiration: Optional[DateTime] = None,
1419
- serialize_to_none: bool = False,
1420
- ) -> "PersistedResult[R]":
1421
- """
1422
- Create a new result reference from a user's object.
1423
-
1424
- The object will be serialized and written to the storage block under a unique
1425
- key. It will then be cached on the returned result.
1426
- """
1427
- key = storage_key_fn()
1428
- if not isinstance(key, str):
1429
- raise TypeError(
1430
- f"Expected type 'str' for result storage key; got value {key!r}"
1431
- )
1432
- uri = cls._infer_path(storage_block, key)
1433
-
1434
- # in this case we store an absolute path
1435
- if storage_block_id is None and uri is not None:
1436
- key = str(uri)
1437
-
1438
- result = cls(
1439
- serializer_type=serializer.type,
1440
- storage_block_id=storage_block_id,
1441
- storage_key=key,
1442
- expiration=expiration,
1443
- serialize_to_none=serialize_to_none,
1444
- )
1445
-
1446
- object.__setattr__(result, "_should_cache_object", cache_object)
1447
- # we must cache temporarily to allow for writing later
1448
- # the cache will be removed on write
1449
- result._cache_object(obj, storage_block=storage_block, serializer=serializer)
1450
-
1451
- return result
1452
-
1453
- def __eq__(self, other):
1454
- if not isinstance(other, PersistedResult):
1455
- return False
1456
- return (
1457
- self.type == other.type
1458
- and self.serializer_type == other.serializer_type
1459
- and self.storage_key == other.storage_key
1460
- and self.storage_block_id == other.storage_block_id
1461
- and self.expiration == other.expiration
1462
- )
prefect/runner/storage.py CHANGED
@@ -2,7 +2,15 @@ import shutil
2
2
  import subprocess
3
3
  from copy import deepcopy
4
4
  from pathlib import Path
5
- from typing import Any, Dict, Optional, Protocol, TypedDict, Union, runtime_checkable
5
+ from typing import (
6
+ Any,
7
+ Dict,
8
+ Optional,
9
+ Protocol,
10
+ TypedDict,
11
+ Union,
12
+ runtime_checkable,
13
+ )
6
14
  from urllib.parse import urlparse, urlsplit, urlunparse
7
15
  from uuid import uuid4
8
16
 
@@ -87,6 +95,7 @@ class GitRepository:
87
95
  pull_interval: The interval in seconds at which to pull contents from
88
96
  remote storage to local storage. If None, remote storage will perform
89
97
  a one-time sync.
98
+ directories: The directories to pull from the Git repository (uses git sparse-checkout)
90
99
 
91
100
  Examples:
92
101
  Pull the contents of a private git repository to the local filesystem:
@@ -111,6 +120,7 @@ class GitRepository:
111
120
  branch: Optional[str] = None,
112
121
  include_submodules: bool = False,
113
122
  pull_interval: Optional[int] = 60,
123
+ directories: Optional[str] = None,
114
124
  ):
115
125
  if credentials is None:
116
126
  credentials = {}
@@ -134,6 +144,7 @@ class GitRepository:
134
144
  self._logger = get_logger(f"runner.storage.git-repository.{self._name}")
135
145
  self._storage_base_path = Path.cwd()
136
146
  self._pull_interval = pull_interval
147
+ self._directories = directories
137
148
 
138
149
  @property
139
150
  def destination(self) -> Path:
@@ -147,11 +158,9 @@ class GitRepository:
147
158
  return self._pull_interval
148
159
 
149
160
  @property
150
- def _repository_url_with_credentials(self) -> str:
161
+ def _formatted_credentials(self) -> Optional[str]:
151
162
  if not self._credentials:
152
- return self._url
153
-
154
- url_components = urlparse(self._url)
163
+ return None
155
164
 
156
165
  credentials = (
157
166
  self._credentials.model_dump()
@@ -165,18 +174,52 @@ class GitRepository:
165
174
  elif isinstance(v, SecretStr):
166
175
  credentials[k] = v.get_secret_value()
167
176
 
168
- formatted_credentials = _format_token_from_credentials(
169
- urlparse(self._url).netloc, credentials
177
+ return _format_token_from_credentials(urlparse(self._url).netloc, credentials)
178
+
179
+ def _add_credentials_to_url(self, url: str) -> str:
180
+ """Add credentials to given url if possible."""
181
+ components = urlparse(url)
182
+ credentials = self._formatted_credentials
183
+
184
+ if components.scheme != "https" or not credentials:
185
+ return url
186
+
187
+ return urlunparse(
188
+ components._replace(netloc=f"{credentials}@{components.netloc}")
170
189
  )
171
- if url_components.scheme == "https" and formatted_credentials is not None:
172
- updated_components = url_components._replace(
173
- netloc=f"{formatted_credentials}@{url_components.netloc}"
174
- )
175
- repository_url = urlunparse(updated_components)
176
- else:
177
- repository_url = self._url
178
190
 
179
- return repository_url
191
+ @property
192
+ def _repository_url_with_credentials(self) -> str:
193
+ return self._add_credentials_to_url(self._url)
194
+
195
+ @property
196
+ def _git_config(self) -> list[str]:
197
+ """Build a git configuration to use when running git commands."""
198
+ config = {}
199
+
200
+ # Submodules can be private. The url in .gitmodules
201
+ # will not include the credentials, we need to
202
+ # propagate them down here if they exist.
203
+ if self._include_submodules and self._formatted_credentials:
204
+ base_url = urlparse(self._url)._replace(path="")
205
+ without_auth = urlunparse(base_url)
206
+ with_auth = self._add_credentials_to_url(without_auth)
207
+ config[f"url.{with_auth}.insteadOf"] = without_auth
208
+
209
+ return ["-c", " ".join(f"{k}={v}" for k, v in config.items())] if config else []
210
+
211
+ async def is_sparsely_checked_out(self) -> bool:
212
+ """
213
+ Check if existing repo is sparsely checked out
214
+ """
215
+
216
+ try:
217
+ result = await run_process(
218
+ ["git", "config", "--get", "core.sparseCheckout"], cwd=self.destination
219
+ )
220
+ return result.strip().lower() == "true"
221
+ except Exception:
222
+ return False
180
223
 
181
224
  async def pull_code(self):
182
225
  """
@@ -206,9 +249,20 @@ class GitRepository:
206
249
  f"does not match the configured repository {self._url}"
207
250
  )
208
251
 
252
+ # Sparsely checkout the repository if directories are specified and the repo is not in sparse-checkout mode already
253
+ if self._directories and not await self.is_sparsely_checked_out():
254
+ await run_process(
255
+ ["git", "sparse-checkout", "set"] + self._directories,
256
+ cwd=self.destination,
257
+ )
258
+
209
259
  self._logger.debug("Pulling latest changes from origin/%s", self._branch)
210
260
  # Update the existing repository
211
- cmd = ["git", "pull", "origin"]
261
+ cmd = ["git"]
262
+ # Add the git configuration, must be given after `git` and before the command
263
+ cmd += self._git_config
264
+ # Add the pull command and parameters
265
+ cmd += ["pull", "origin"]
212
266
  if self._branch:
213
267
  cmd += [self._branch]
214
268
  if self._include_submodules:
@@ -234,17 +288,21 @@ class GitRepository:
234
288
  self._logger.debug("Cloning repository %s", self._url)
235
289
 
236
290
  repository_url = self._repository_url_with_credentials
291
+ cmd = ["git"]
292
+ # Add the git configuration, must be given after `git` and before the command
293
+ cmd += self._git_config
294
+ # Add the clone command and its parameters
295
+ cmd += ["clone", repository_url]
237
296
 
238
- cmd = [
239
- "git",
240
- "clone",
241
- repository_url,
242
- ]
243
297
  if self._branch:
244
298
  cmd += ["--branch", self._branch]
245
299
  if self._include_submodules:
246
300
  cmd += ["--recurse-submodules"]
247
301
 
302
+ # This will only checkout the top-level directory
303
+ if self._directories:
304
+ cmd += ["--sparse"]
305
+
248
306
  # Limit git history and set path to clone to
249
307
  cmd += ["--depth", "1", str(self.destination)]
250
308
 
@@ -258,6 +316,14 @@ class GitRepository:
258
316
  f" {exc.returncode}."
259
317
  ) from exc_chain
260
318
 
319
+ # Once repository is cloned and the repo is in sparse-checkout mode then grow the working directory
320
+ if self._directories:
321
+ self._logger.debug("Will add %s", self._directories)
322
+ await run_process(
323
+ ["git", "sparse-checkout", "set"] + self._directories,
324
+ cwd=self.destination,
325
+ )
326
+
261
327
  def __eq__(self, __value) -> bool:
262
328
  if isinstance(__value, GitRepository):
263
329
  return (