ai-pipeline-core 0.1.12__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. ai_pipeline_core/__init__.py +83 -119
  2. ai_pipeline_core/deployment/__init__.py +34 -0
  3. ai_pipeline_core/deployment/base.py +861 -0
  4. ai_pipeline_core/deployment/contract.py +80 -0
  5. ai_pipeline_core/deployment/deploy.py +561 -0
  6. ai_pipeline_core/deployment/helpers.py +97 -0
  7. ai_pipeline_core/deployment/progress.py +126 -0
  8. ai_pipeline_core/deployment/remote.py +116 -0
  9. ai_pipeline_core/docs_generator/__init__.py +54 -0
  10. ai_pipeline_core/docs_generator/__main__.py +5 -0
  11. ai_pipeline_core/docs_generator/cli.py +196 -0
  12. ai_pipeline_core/docs_generator/extractor.py +324 -0
  13. ai_pipeline_core/docs_generator/guide_builder.py +644 -0
  14. ai_pipeline_core/docs_generator/trimmer.py +35 -0
  15. ai_pipeline_core/docs_generator/validator.py +114 -0
  16. ai_pipeline_core/document_store/__init__.py +13 -0
  17. ai_pipeline_core/document_store/_summary.py +9 -0
  18. ai_pipeline_core/document_store/_summary_worker.py +170 -0
  19. ai_pipeline_core/document_store/clickhouse.py +492 -0
  20. ai_pipeline_core/document_store/factory.py +38 -0
  21. ai_pipeline_core/document_store/local.py +312 -0
  22. ai_pipeline_core/document_store/memory.py +85 -0
  23. ai_pipeline_core/document_store/protocol.py +68 -0
  24. ai_pipeline_core/documents/__init__.py +14 -15
  25. ai_pipeline_core/documents/_context_vars.py +85 -0
  26. ai_pipeline_core/documents/_hashing.py +52 -0
  27. ai_pipeline_core/documents/attachment.py +85 -0
  28. ai_pipeline_core/documents/context.py +128 -0
  29. ai_pipeline_core/documents/document.py +349 -1062
  30. ai_pipeline_core/documents/mime_type.py +40 -85
  31. ai_pipeline_core/documents/utils.py +62 -7
  32. ai_pipeline_core/exceptions.py +10 -62
  33. ai_pipeline_core/images/__init__.py +309 -0
  34. ai_pipeline_core/images/_processing.py +151 -0
  35. ai_pipeline_core/llm/__init__.py +5 -3
  36. ai_pipeline_core/llm/ai_messages.py +284 -73
  37. ai_pipeline_core/llm/client.py +462 -209
  38. ai_pipeline_core/llm/model_options.py +86 -53
  39. ai_pipeline_core/llm/model_response.py +187 -241
  40. ai_pipeline_core/llm/model_types.py +34 -54
  41. ai_pipeline_core/logging/__init__.py +2 -9
  42. ai_pipeline_core/logging/logging.yml +1 -1
  43. ai_pipeline_core/logging/logging_config.py +27 -43
  44. ai_pipeline_core/logging/logging_mixin.py +17 -51
  45. ai_pipeline_core/observability/__init__.py +32 -0
  46. ai_pipeline_core/observability/_debug/__init__.py +30 -0
  47. ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
  48. ai_pipeline_core/observability/_debug/_config.py +95 -0
  49. ai_pipeline_core/observability/_debug/_content.py +764 -0
  50. ai_pipeline_core/observability/_debug/_processor.py +98 -0
  51. ai_pipeline_core/observability/_debug/_summary.py +312 -0
  52. ai_pipeline_core/observability/_debug/_types.py +75 -0
  53. ai_pipeline_core/observability/_debug/_writer.py +843 -0
  54. ai_pipeline_core/observability/_document_tracking.py +146 -0
  55. ai_pipeline_core/observability/_initialization.py +194 -0
  56. ai_pipeline_core/observability/_logging_bridge.py +57 -0
  57. ai_pipeline_core/observability/_summary.py +81 -0
  58. ai_pipeline_core/observability/_tracking/__init__.py +6 -0
  59. ai_pipeline_core/observability/_tracking/_client.py +178 -0
  60. ai_pipeline_core/observability/_tracking/_internal.py +28 -0
  61. ai_pipeline_core/observability/_tracking/_models.py +138 -0
  62. ai_pipeline_core/observability/_tracking/_processor.py +158 -0
  63. ai_pipeline_core/observability/_tracking/_service.py +311 -0
  64. ai_pipeline_core/observability/_tracking/_writer.py +229 -0
  65. ai_pipeline_core/observability/tracing.py +640 -0
  66. ai_pipeline_core/pipeline/__init__.py +10 -0
  67. ai_pipeline_core/pipeline/decorators.py +915 -0
  68. ai_pipeline_core/pipeline/options.py +16 -0
  69. ai_pipeline_core/prompt_manager.py +26 -105
  70. ai_pipeline_core/settings.py +41 -32
  71. ai_pipeline_core/testing.py +9 -0
  72. ai_pipeline_core-0.4.1.dist-info/METADATA +807 -0
  73. ai_pipeline_core-0.4.1.dist-info/RECORD +76 -0
  74. {ai_pipeline_core-0.1.12.dist-info → ai_pipeline_core-0.4.1.dist-info}/WHEEL +1 -1
  75. ai_pipeline_core/documents/document_list.py +0 -240
  76. ai_pipeline_core/documents/flow_document.py +0 -128
  77. ai_pipeline_core/documents/task_document.py +0 -133
  78. ai_pipeline_core/documents/temporary_document.py +0 -95
  79. ai_pipeline_core/flow/__init__.py +0 -9
  80. ai_pipeline_core/flow/config.py +0 -314
  81. ai_pipeline_core/flow/options.py +0 -75
  82. ai_pipeline_core/pipeline.py +0 -717
  83. ai_pipeline_core/prefect.py +0 -54
  84. ai_pipeline_core/simple_runner/__init__.py +0 -24
  85. ai_pipeline_core/simple_runner/cli.py +0 -255
  86. ai_pipeline_core/simple_runner/simple_runner.py +0 -385
  87. ai_pipeline_core/tracing.py +0 -475
  88. ai_pipeline_core-0.1.12.dist-info/METADATA +0 -450
  89. ai_pipeline_core-0.1.12.dist-info/RECORD +0 -36
  90. {ai_pipeline_core-0.1.12.dist-info → ai_pipeline_core-0.4.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,76 @@
1
+ ai_pipeline_core/__init__.py,sha256=REZQInD3-LSShbonlXFpbe-IfjEtr90kkEx3JFxgfMA,3270
2
+ ai_pipeline_core/exceptions.py,sha256=csAl7vq6xjSFBF8-UM9WZODCbhsOdOG5zH6IbA8iteM,1280
3
+ ai_pipeline_core/prompt_manager.py,sha256=3wFkL5rrjtUT1cLInkgyhS8hKnO4MeD1cdXAEuLhgoE,9459
4
+ ai_pipeline_core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ ai_pipeline_core/settings.py,sha256=BUz8JEFfJQrdE4rNOhQWwxnTrfekLjWkoy-3wDZQ7PY,5142
6
+ ai_pipeline_core/testing.py,sha256=jIRrLxNvTwdamucfJoHET2qMeRhhMZV9uEJXO5vAfis,279
7
+ ai_pipeline_core/deployment/__init__.py,sha256=wTkVK6gcEQvqBajFMTAuodRONpN25yHbR1jtcumf0WQ,900
8
+ ai_pipeline_core/deployment/base.py,sha256=ros0VzvkNCwbPgG9D49ceKSjTq857iRHzvW-uhiaNUE,34750
9
+ ai_pipeline_core/deployment/contract.py,sha256=a1qbHhneTGB27oSOUy79CUIhOIzOoq37M63XoIMzA4Y,1952
10
+ ai_pipeline_core/deployment/deploy.py,sha256=TCF4fH5f-K1ADODZHEyf-x7PJzDbv4qtWxlpoCe_mTs,22909
11
+ ai_pipeline_core/deployment/helpers.py,sha256=yVtGFUs4AFXkpLkiQ_ale0nXXt5btfWSb5PAbikQHNs,3312
12
+ ai_pipeline_core/deployment/progress.py,sha256=5tVD9nW0N-b8Z2BxazcWCWHFpLu6pJ-eqPmRyj68X6Y,3591
13
+ ai_pipeline_core/deployment/remote.py,sha256=tOexisKEeeBoHLGYZWqcjr2H-nqqYc6kvoDL72AW78w,4661
14
+ ai_pipeline_core/docs_generator/__init__.py,sha256=JbWbk-Lw5GgWrCMRuw8zvKNTZY2jXv7XqoMiBYudvRI,1255
15
+ ai_pipeline_core/docs_generator/__main__.py,sha256=CH4agiM2suFJ63MhTg5m0GuXdc40z-6o4ojR72JQWVA,145
16
+ ai_pipeline_core/docs_generator/cli.py,sha256=8OjdMtzQraPxWN3uPapSNJnKyPLPtnygKL0rF5JL2GY,7172
17
+ ai_pipeline_core/docs_generator/extractor.py,sha256=yHQfeb_LwgBZW5dBY65L6a4qvNvxnwWiFXhqeZV5y5w,10631
18
+ ai_pipeline_core/docs_generator/guide_builder.py,sha256=cxVEoYMfwOsFWDLbXddJ7IBBCRshbfSUoQ84ZMw_YQE,22232
19
+ ai_pipeline_core/docs_generator/trimmer.py,sha256=olsl4MSmMHqsIEeVu9HU7xjONmIbSU7NmPwWdhOH6AA,1052
20
+ ai_pipeline_core/docs_generator/validator.py,sha256=w-UdE6h6LLCwVy9Qqmv-TavIttA_1mcRoAhF9_HKszc,4460
21
+ ai_pipeline_core/document_store/__init__.py,sha256=5aHsCpRkfkaLhLo0sVaKXEjqCcRYgzkVJErULKUpDAk,366
22
+ ai_pipeline_core/document_store/_summary.py,sha256=qwy4kHEEwHwXGN7LVol09qzf7RjOJ2-6qTme-mtE8aM,377
23
+ ai_pipeline_core/document_store/_summary_worker.py,sha256=K4575wCot0EoKCEsOj8XMCS1O6aWC37S9L_3TZjONco,6659
24
+ ai_pipeline_core/document_store/clickhouse.py,sha256=vUlN2rIxCn5A8ceBFbpaHPS2O3tYEuR_UZkffWdy7E4,20636
25
+ ai_pipeline_core/document_store/factory.py,sha256=F56ZM8TxgzFNYUkdzZidbxTe-JDiIAqi_tlE30cdlp0,1499
26
+ ai_pipeline_core/document_store/local.py,sha256=r_dCJ46fto89yxZfKuGNQonpocQ1TwFFaCUntW-ZSQw,13396
27
+ ai_pipeline_core/document_store/memory.py,sha256=MlsWHLLaEK6MdHBPZUgsNkbkFPvd2d2gFcfdDMBYvXo,3679
28
+ ai_pipeline_core/document_store/protocol.py,sha256=UhA60PuSMBwpX9yVLOtUAsKqdPnU2synDos6cB-WQng,2407
29
+ ai_pipeline_core/documents/__init__.py,sha256=LphKH_CiN3BQ0gjtJps1Y1WF_Lt2Qg-75aq2U1_PvP8,723
30
+ ai_pipeline_core/documents/_context_vars.py,sha256=JbgQoCNaHPrOAMlEa6HYB8Ti7iw_jQfZOi5eNrwHYWg,2687
31
+ ai_pipeline_core/documents/_hashing.py,sha256=_u1P4z1bMNSREJ6GNf3sSqf0TCrDr9sVcAIG9bnORnU,1667
32
+ ai_pipeline_core/documents/attachment.py,sha256=eVpb27Qu8mLO2Bxv_JYd5JXhgViaJusVM8RBGcU1iQE,2951
33
+ ai_pipeline_core/documents/context.py,sha256=vlORnRk2klMTZk6X4jEJayeH4B2Xo6ZxZ-31mtdba6o,5482
34
+ ai_pipeline_core/documents/document.py,sha256=lU3hqbyYswRlWAiS9YGW9IRZnVA-3qmMpnAKPmi8Bws,26157
35
+ ai_pipeline_core/documents/mime_type.py,sha256=QeRX6GiQnTpqx3Fk8QLhi1lT0Z5uEs496dGc4_xqgsA,6530
36
+ ai_pipeline_core/documents/utils.py,sha256=9WOW3zvKYxQPnM8LjYFy3V9-yqc6hwgCaiog3kUH274,5413
37
+ ai_pipeline_core/images/__init__.py,sha256=Hc2QKR27Q2Q-h5nH-EbzfxdE3dHArBm-st5_xjOKFh0,8854
38
+ ai_pipeline_core/images/_processing.py,sha256=MrCuPGsyyEl9UlXYIPhZs0wN8CPTMZmejV2Lo2wyCZk,4362
39
+ ai_pipeline_core/llm/__init__.py,sha256=oyRvYD5DLDl7JIRTBUaiVz6jUC5dLLujkMNFpfRp2zc,795
40
+ ai_pipeline_core/llm/ai_messages.py,sha256=Ycmntk5d6NUFqVVsnNR_IDwJUFuHYEH7CPvmmDfYaJI,17424
41
+ ai_pipeline_core/llm/client.py,sha256=CjxOiniuy5CEsA_Xz0KPLCBthbnUfC43fTpuDcqkIUM,30276
42
+ ai_pipeline_core/llm/model_options.py,sha256=hg8xR0RJdJKp8QJNA4EbLnfFsnkE4HnxD85aYxc--hM,9164
43
+ ai_pipeline_core/llm/model_response.py,sha256=Ml9wcssSssqibReJxCc9EQu488pz69Cmq_XNBs_xmak,12219
44
+ ai_pipeline_core/llm/model_types.py,sha256=qHoUPPEkHu9B4kJ5xcIC09fk72v667ZxvzigxtgLpVo,2174
45
+ ai_pipeline_core/logging/__init__.py,sha256=H8G3bycxwNxc4e4Gjwi-al9e2ufTJbTV5iFKCF1Ticw,495
46
+ ai_pipeline_core/logging/logging.yml,sha256=qsf6vcxtWIHD5xwJGtylibiuy_0KF_Ji7-qb-xvFtaU,1357
47
+ ai_pipeline_core/logging/logging_config.py,sha256=JnTarGSSkpi7eqR7N13TLKeuwNCvZgwJUPlhObiwrHk,6095
48
+ ai_pipeline_core/logging/logging_mixin.py,sha256=Jn3x0xvSwSjbAMfWELMOEfffWBB1u4IeIr7M2-55CJs,7191
49
+ ai_pipeline_core/observability/__init__.py,sha256=km2nIiY3aYH13s2m4nR91erQG4qKnGuvQkrKDdVW3bw,720
50
+ ai_pipeline_core/observability/_document_tracking.py,sha256=tXv6rbGIuxOYdq22aVbyn9Ve5EhYHPnrYCE-kj2NGXI,5428
51
+ ai_pipeline_core/observability/_initialization.py,sha256=GfwRHpg90Og3PzmG1ZUilJVXoFx9BIWpbMgXxJ5Alqk,6747
52
+ ai_pipeline_core/observability/_logging_bridge.py,sha256=T3PpkgoI0YKN2vvBJEHzR5rFMFNHq9REHJs7PQX2VQk,2053
53
+ ai_pipeline_core/observability/_summary.py,sha256=GAZXzXVkwUcubSiGb5DgkHfO1gGwx6pYoDz6RUJmL5k,3390
54
+ ai_pipeline_core/observability/tracing.py,sha256=KhIXSl5fe39UE1Eokz9-1fe5biX6anKbwZDmXY_Z2LU,27050
55
+ ai_pipeline_core/observability/_debug/__init__.py,sha256=V8pbgdQOx-7oFKQ_sNzAZ1-oq5c73P4kVjEClZDXe8k,942
56
+ ai_pipeline_core/observability/_debug/_auto_summary.py,sha256=LMvETvx_RPKF8srewCKwjigTiWs3KfDmQAYYSuVybIM,2687
57
+ ai_pipeline_core/observability/_debug/_config.py,sha256=CWfnK-F3knUuOQ34y_CjmU3l67J85NIZ3siftYhevc0,3367
58
+ ai_pipeline_core/observability/_debug/_content.py,sha256=ECy2vR8wDHJq0RD2X10XS-ed8uCq9VD3K8fnGOjQxgs,30657
59
+ ai_pipeline_core/observability/_debug/_processor.py,sha256=FkX1xqeJds-Gctt5keYSTSaC85FM4QaeFIEevTn7Qh8,3875
60
+ ai_pipeline_core/observability/_debug/_summary.py,sha256=gD7MtWldBRs2VniQxUBjr6XbD2Z8HhbqZdnkcr4HdzE,11274
61
+ ai_pipeline_core/observability/_debug/_types.py,sha256=Cw80SWSVso02kkj6T7hICGU_vn3W2RUEv74h94V5ZfI,2220
62
+ ai_pipeline_core/observability/_debug/_writer.py,sha256=0JOmaQtg9OuhqcAf15C2AAlkicIOGEoyWerKTiOmuTw,32497
63
+ ai_pipeline_core/observability/_tracking/__init__.py,sha256=tiZhj_d0STL0ACq2mTktciGjXzpepfMtl5KA_OFElTE,245
64
+ ai_pipeline_core/observability/_tracking/_client.py,sha256=q3YhKJVm3jEWDXzKclQmndZ6RYIu7_F4Az_uY98uA-k,6634
65
+ ai_pipeline_core/observability/_tracking/_internal.py,sha256=zv4DI2a8pG3wM_QEuwTNxk2V_q0jEZe6HsL6P7eVO7Y,820
66
+ ai_pipeline_core/observability/_tracking/_models.py,sha256=p3nZucNGr-JsdscqnbJOy8JL1B-w6p43I-1bXvOkfc8,3237
67
+ ai_pipeline_core/observability/_tracking/_processor.py,sha256=H8D82gRs4JY6ya0ewojoVAg85FUQV-imi9bQO8M0kGU,5999
68
+ ai_pipeline_core/observability/_tracking/_service.py,sha256=diK-0qJg4HU-BHgpN1NTyFEbgPXq2e0gluRq21B8IbE,10357
69
+ ai_pipeline_core/observability/_tracking/_writer.py,sha256=xZjwYyIxDzzzPxqkKjYAYOyNP4uvKXZ-r_u-APSV_x8,9246
70
+ ai_pipeline_core/pipeline/__init__.py,sha256=uMv1jwSyq8Ym8Hbn5097twBJLdwN1iMeqnVM4EWyrhA,282
71
+ ai_pipeline_core/pipeline/decorators.py,sha256=CDJAeOjGLt5Ewc0Jc9zEuwLZwKyutOv89LSRS9dcXmI,37456
72
+ ai_pipeline_core/pipeline/options.py,sha256=KF4FcT085-IwX8r649v0a9ua5xnApM0qG2wJHWbq39A,438
73
+ ai_pipeline_core-0.4.1.dist-info/METADATA,sha256=s6YKwEgTfky-_y2CqKaYnU5yHcxOf_r7MxpdxfRL0OE,29947
74
+ ai_pipeline_core-0.4.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
75
+ ai_pipeline_core-0.4.1.dist-info/licenses/LICENSE,sha256=kKj8mfbdWwkyG3U6n7ztB3bAZlEwShTkAsvaY657i3I,1074
76
+ ai_pipeline_core-0.4.1.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.27.0
2
+ Generator: hatchling 1.28.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,240 +0,0 @@
1
- """Type-safe list container for Document objects.
2
-
3
- @public
4
- """
5
-
6
- from typing import Any, Iterable, SupportsIndex, Union, overload
7
-
8
- from typing_extensions import Self
9
-
10
- from .document import Document
11
-
12
-
13
- class DocumentList(list[Document]):
14
- """Type-safe container for Document objects.
15
-
16
- @public
17
-
18
- Specialized list with validation and filtering for documents.
19
-
20
- Best Practice: Use default constructor in 90% of cases. Only enable
21
- validate_same_type or validate_duplicates when you explicitly need them.
22
-
23
- Example:
24
- >>> # RECOMMENDED - default constructor for most cases
25
- >>> docs = DocumentList([doc1, doc2])
26
- >>> # Or empty initialization
27
- >>> docs = DocumentList()
28
- >>> docs.append(MyDocument(name="file.txt", content=b"data"))
29
- >>>
30
- >>> # Only use validation flags when specifically needed:
31
- >>> docs = DocumentList(validate_same_type=True) # Rare use case
32
- >>> doc = docs.get_by("file.txt") # Get by name
33
- """
34
-
35
- def __init__(
36
- self,
37
- documents: list[Document] | None = None,
38
- validate_same_type: bool = False,
39
- validate_duplicates: bool = False,
40
- ) -> None:
41
- """Initialize DocumentList.
42
-
43
- @public
44
-
45
- Args:
46
- documents: Initial list of documents.
47
- validate_same_type: Enforce same document type.
48
- validate_duplicates: Prevent duplicate filenames.
49
- """
50
- super().__init__()
51
- self._validate_same_type = validate_same_type
52
- self._validate_duplicates = validate_duplicates
53
- if documents:
54
- self.extend(documents)
55
-
56
- def _validate_no_duplicates(self) -> None:
57
- """Check for duplicate document names.
58
-
59
- Raises:
60
- ValueError: If duplicate document names are found.
61
- """
62
- if not self._validate_duplicates:
63
- return
64
-
65
- filenames = [doc.name for doc in self]
66
- seen: set[str] = set()
67
- duplicates: list[str] = []
68
- for name in filenames:
69
- if name in seen:
70
- duplicates.append(name)
71
- seen.add(name)
72
- if duplicates:
73
- unique_duplicates = list(set(duplicates))
74
- raise ValueError(f"Duplicate document names found: {unique_duplicates}")
75
-
76
- def _validate_no_description_files(self) -> None:
77
- """Ensure no documents use reserved description file extension.
78
-
79
- Raises:
80
- ValueError: If any document uses the reserved description file extension.
81
- """
82
- description_files = [
83
- doc.name for doc in self if doc.name.endswith(Document.DESCRIPTION_EXTENSION)
84
- ]
85
- if description_files:
86
- raise ValueError(
87
- f"Documents with {Document.DESCRIPTION_EXTENSION} suffix are not allowed: "
88
- f"{description_files}"
89
- )
90
-
91
- def _validate_types(self) -> None:
92
- """Ensure all documents are of the same class type.
93
-
94
- Raises:
95
- ValueError: If documents have different class types.
96
- """
97
- if not self._validate_same_type or not self:
98
- return
99
-
100
- first_class = type(self[0])
101
- different_types = [doc for doc in self if type(doc) is not first_class]
102
- if different_types:
103
- types = list({type(doc).__name__ for doc in self})
104
- raise ValueError(f"All documents must have the same type. Found types: {types}")
105
-
106
- def _validate(self) -> None:
107
- """Run all configured validation checks."""
108
- self._validate_no_duplicates()
109
- self._validate_no_description_files()
110
- self._validate_types()
111
-
112
- def append(self, document: Document) -> None:
113
- """Add a document to the end of the list."""
114
- super().append(document)
115
- self._validate()
116
-
117
- def extend(self, documents: Iterable[Document]) -> None:
118
- """Add multiple documents to the list."""
119
- super().extend(documents)
120
- self._validate()
121
-
122
- def insert(self, index: SupportsIndex, document: Document) -> None:
123
- """Insert a document at the specified position."""
124
- super().insert(index, document)
125
- self._validate()
126
-
127
- @overload
128
- def __setitem__(self, index: SupportsIndex, value: Document) -> None: ...
129
-
130
- @overload
131
- def __setitem__(self, index: slice, value: Iterable[Document]) -> None: ...
132
-
133
- def __setitem__(self, index: Union[SupportsIndex, slice], value: Any) -> None:
134
- """Set item or slice with validation."""
135
- super().__setitem__(index, value)
136
- self._validate()
137
-
138
- def __iadd__(self, other: Any) -> "Self":
139
- """In-place addition (+=) with validation.
140
-
141
- Returns:
142
- Self: This DocumentList after modification.
143
- """
144
- result = super().__iadd__(other)
145
- self._validate()
146
- return result
147
-
148
- @overload
149
- def filter_by(self, arg: str) -> "DocumentList": ...
150
-
151
- @overload
152
- def filter_by(self, arg: type[Document]) -> "DocumentList": ...
153
-
154
- @overload
155
- def filter_by(self, arg: list[type[Document]]) -> "DocumentList": ...
156
-
157
- def filter_by(self, arg: str | type[Document] | list[type[Document]]) -> "DocumentList":
158
- """Filter documents by name or type(s).
159
-
160
- @public
161
-
162
- Args:
163
- arg: Document name (str), single document type, or list of document types.
164
-
165
- Returns:
166
- New DocumentList with filtered documents.
167
-
168
- Raises:
169
- TypeError: If arg is not a valid type (str, Document type, or list of Document types).
170
-
171
- Example:
172
- >>> docs.filter_by("file.txt") # Filter by name
173
- >>> docs.filter_by(MyDocument) # Filter by type
174
- >>> docs.filter_by([Doc1, Doc2]) # Filter by multiple types
175
- """
176
- if isinstance(arg, str):
177
- # Filter by name
178
- return DocumentList([doc for doc in self if doc.name == arg])
179
- elif isinstance(arg, type):
180
- # Filter by single type (including subclasses)
181
- return DocumentList([doc for doc in self if isinstance(doc, arg)])
182
- elif isinstance(arg, list): # type: ignore[reportUnnecessaryIsInstance]
183
- # Filter by multiple types
184
- documents = DocumentList()
185
- for document_type in arg:
186
- documents.extend([doc for doc in self if isinstance(doc, document_type)])
187
- return documents
188
- else:
189
- raise TypeError(f"Invalid argument type for filter_by: {type(arg)}")
190
-
191
- @overload
192
- def get_by(self, arg: str) -> Document: ...
193
-
194
- @overload
195
- def get_by(self, arg: type[Document]) -> Document: ...
196
-
197
- @overload
198
- def get_by(self, arg: str, required: bool = True) -> Document | None: ...
199
-
200
- @overload
201
- def get_by(self, arg: type[Document], required: bool = True) -> Document | None: ...
202
-
203
- def get_by(self, arg: str | type[Document], required: bool = True) -> Document | None:
204
- """Get a single document by name or type.
205
-
206
- @public
207
-
208
- Args:
209
- arg: Document name (str) or document type.
210
- required: If True, raises ValueError when not found. If False, returns None.
211
-
212
- Returns:
213
- The first matching document, or None if not found and required=False.
214
-
215
- Raises:
216
- ValueError: If required=True and document not found.
217
- TypeError: If arg is not a string or Document type.
218
-
219
- Example:
220
- >>> doc = docs.get_by("file.txt") # Get by name, raises if not found
221
- >>> doc = docs.get_by(MyDocument, required=False) # Returns None if not found
222
- """
223
- if isinstance(arg, str):
224
- # Get by name
225
- for doc in self:
226
- if doc.name == arg:
227
- return doc
228
- if required:
229
- raise ValueError(f"Document with name '{arg}' not found")
230
- return None
231
- elif isinstance(arg, type): # type: ignore[reportUnnecessaryIsInstance]
232
- # Get by type (including subclasses)
233
- for doc in self:
234
- if isinstance(doc, arg):
235
- return doc
236
- if required:
237
- raise ValueError(f"Document of type '{arg.__name__}' not found")
238
- return None
239
- else:
240
- raise TypeError(f"Invalid argument type for get_by: {type(arg)}")
@@ -1,128 +0,0 @@
1
- """Flow-specific document base class for persistent pipeline data.
2
-
3
- @public
4
-
5
- This module provides the FlowDocument abstract base class for documents
6
- that need to persist across Prefect flow runs and between pipeline steps.
7
- """
8
-
9
- from typing import Literal, final
10
-
11
- from .document import Document
12
-
13
-
14
- class FlowDocument(Document):
15
- """Abstract base class for documents that persist across flow runs.
16
-
17
- @public
18
-
19
- FlowDocument is used for data that needs to be saved between pipeline
20
- steps and across multiple flow executions. These documents are typically
21
- written to the file system using the simple_runner utilities.
22
-
23
- Key characteristics:
24
- - Persisted to file system between pipeline steps
25
- - Survives across multiple flow runs
26
- - Used for flow inputs and outputs
27
- - Saved in directories named after the document's canonical name
28
-
29
- Creating FlowDocuments:
30
- **Use the `create` classmethod** for most use cases. It handles automatic
31
- conversion of various content types. Only use __init__ when you have bytes.
32
-
33
- >>> from enum import StrEnum
34
- >>>
35
- >>> # Simple document with pass:
36
- >>> class MyDoc(FlowDocument):
37
- ... pass
38
- >>>
39
- >>> # Document with restricted file names:
40
- >>> class ConfigDoc(FlowDocument):
41
- ... class FILES(StrEnum):
42
- ... CONFIG = "config.yaml"
43
- ... SETTINGS = "settings.json"
44
- >>>
45
- >>> # RECOMMENDED - automatic conversion:
46
- >>> doc = MyDoc.create(name="data.json", content={"key": "value"})
47
- >>> doc = ConfigDoc.create(name="config.yaml", content={"host": "localhost"})
48
-
49
- Persistence:
50
- Documents are saved to: {output_dir}/{canonical_name}/{filename}
51
- For example: output/my_doc/data.json
52
-
53
- Note:
54
- - Cannot instantiate FlowDocument directly - must subclass
55
- - Used with FlowConfig to define flow input/output types
56
- - No additional abstract methods to implement
57
-
58
- See Also:
59
- TaskDocument: For temporary documents within task execution
60
- TemporaryDocument: For documents that are never persisted
61
- """
62
-
63
- def __init__(
64
- self,
65
- *,
66
- name: str,
67
- content: bytes,
68
- description: str | None = None,
69
- ) -> None:
70
- """Initialize a FlowDocument with raw bytes content.
71
-
72
- Important:
73
- **Most users should use the `create` classmethod instead of __init__.**
74
- The create method provides automatic content conversion for various types
75
- (str, dict, list, Pydantic models) while __init__ only accepts bytes.
76
-
77
- Prevents direct instantiation of the abstract FlowDocument class.
78
- FlowDocument must be subclassed for specific document types.
79
-
80
- Args:
81
- name: Document filename (required, keyword-only)
82
- content: Document content as raw bytes (required, keyword-only)
83
- description: Optional human-readable description (keyword-only)
84
-
85
- Raises:
86
- TypeError: If attempting to instantiate FlowDocument directly
87
- instead of using a concrete subclass.
88
-
89
- Example:
90
- >>> from enum import StrEnum
91
- >>>
92
- >>> # Simple subclass:
93
- >>> class MyFlowDoc(FlowDocument):
94
- ... pass
95
- >>>
96
- >>> # With FILES restriction:
97
- >>> class RestrictedDoc(FlowDocument):
98
- ... class FILES(StrEnum):
99
- ... DATA = "data.json"
100
- ... METADATA = "metadata.yaml"
101
- >>>
102
- >>> # Direct constructor - only for bytes:
103
- >>> doc = MyFlowDoc(name="test.bin", content=b"raw data")
104
- >>>
105
- >>> # RECOMMENDED - use create for automatic conversion:
106
- >>> doc = RestrictedDoc.create(name="data.json", content={"key": "value"})
107
- >>> # This would raise DocumentNameError:
108
- >>> # doc = RestrictedDoc.create(name="other.json", content={})
109
- """
110
- if type(self) is FlowDocument:
111
- raise TypeError("Cannot instantiate abstract FlowDocument class directly")
112
- super().__init__(name=name, content=content, description=description)
113
-
114
- @final
115
- def get_base_type(self) -> Literal["flow"]:
116
- """Return the base type identifier for flow documents.
117
-
118
- This method is final and cannot be overridden by subclasses.
119
- It identifies this document as a flow-persistent document.
120
-
121
- Returns:
122
- "flow" - Indicates this document persists across flow runs.
123
-
124
- Note:
125
- This determines the document's lifecycle and persistence behavior
126
- in the pipeline system.
127
- """
128
- return "flow"
@@ -1,133 +0,0 @@
1
- """Task-specific document base class for temporary pipeline data.
2
-
3
- @public
4
-
5
- This module provides the TaskDocument abstract base class for documents
6
- that exist only during Prefect task execution and are not persisted.
7
- """
8
-
9
- from typing import Literal, final
10
-
11
- from .document import Document
12
-
13
-
14
- class TaskDocument(Document):
15
- """Abstract base class for temporary documents within task execution.
16
-
17
- @public
18
-
19
- TaskDocument is used for intermediate data that exists only during
20
- the execution of a Prefect task and is not persisted to disk. These
21
- documents are ideal for temporary processing results, transformations,
22
- and data that doesn't need to survive beyond the current task.
23
-
24
- Key characteristics:
25
- - Not persisted to file system
26
- - Exists only during task execution
27
- - Garbage collected after task completes
28
- - Used for intermediate processing results
29
- - Reduces persistent I/O for temporary data
30
-
31
- Creating TaskDocuments:
32
- **Use the `create` classmethod** for most use cases. It handles automatic
33
- conversion of various content types. Only use __init__ when you have bytes.
34
-
35
- >>> from enum import StrEnum
36
- >>>
37
- >>> # Simple task document:
38
- >>> class TempDoc(TaskDocument):
39
- ... pass
40
- >>>
41
- >>> # With restricted files:
42
- >>> class CacheDoc(TaskDocument):
43
- ... class FILES(StrEnum):
44
- ... CACHE = "cache.json"
45
- ... INDEX = "index.dat"
46
- >>>
47
- >>> # RECOMMENDED - automatic conversion:
48
- >>> doc = TempDoc.create(name="temp.json", content={"status": "processing"})
49
- >>> doc = CacheDoc.create(name="cache.json", content={"data": [1, 2, 3]})
50
-
51
- Use Cases:
52
- - Intermediate transformation results
53
- - Temporary buffers during processing
54
- - Task-local cache data
55
- - Processing status documents
56
-
57
- Note:
58
- - Cannot instantiate TaskDocument directly - must subclass
59
- - Not saved by simple_runner utilities
60
- - Reduces I/O overhead for temporary data
61
- - No additional abstract methods to implement
62
-
63
- See Also:
64
- FlowDocument: For documents that persist across flow runs
65
- TemporaryDocument: Alternative for non-persistent documents
66
- """
67
-
68
- def __init__(
69
- self,
70
- *,
71
- name: str,
72
- content: bytes,
73
- description: str | None = None,
74
- ) -> None:
75
- """Initialize a TaskDocument with raw bytes content.
76
-
77
- Important:
78
- **Most users should use the `create` classmethod instead of __init__.**
79
- The create method provides automatic content conversion for various types
80
- (str, dict, list, Pydantic models) while __init__ only accepts bytes.
81
-
82
- Prevents direct instantiation of the abstract TaskDocument class.
83
- TaskDocument must be subclassed for specific temporary document types.
84
-
85
- Args:
86
- name: Document filename (required, keyword-only)
87
- content: Document content as raw bytes (required, keyword-only)
88
- description: Optional human-readable description (keyword-only)
89
-
90
- Raises:
91
- TypeError: If attempting to instantiate TaskDocument directly
92
- instead of using a concrete subclass.
93
-
94
- Example:
95
- >>> from enum import StrEnum
96
- >>>
97
- >>> # Simple subclass:
98
- >>> class MyTaskDoc(TaskDocument):
99
- ... pass
100
- >>>
101
- >>> # With FILES restriction:
102
- >>> class TempProcessDoc(TaskDocument):
103
- ... class FILES(StrEnum):
104
- ... BUFFER = "buffer.bin"
105
- ... STATUS = "status.json"
106
- >>>
107
- >>> # Direct constructor - only for bytes:
108
- >>> doc = MyTaskDoc(name="temp.bin", content=b"raw data")
109
- >>>
110
- >>> # RECOMMENDED - use create for automatic conversion:
111
- >>> doc = TempProcessDoc.create(name="status.json", content={"percent": 50})
112
- >>> # This would raise DocumentNameError:
113
- >>> # doc = TempProcessDoc.create(name="other.json", content={})
114
- """
115
- if type(self) is TaskDocument:
116
- raise TypeError("Cannot instantiate abstract TaskDocument class directly")
117
- super().__init__(name=name, content=content, description=description)
118
-
119
- @final
120
- def get_base_type(self) -> Literal["task"]:
121
- """Return the base type identifier for task documents.
122
-
123
- This method is final and cannot be overridden by subclasses.
124
- It identifies this document as a task-scoped temporary document.
125
-
126
- Returns:
127
- "task" - Indicates this document is temporary within task execution.
128
-
129
- Note:
130
- This determines that the document will not be persisted and
131
- exists only during task execution.
132
- """
133
- return "task"
@@ -1,95 +0,0 @@
1
- """Temporary document implementation for non-persistent data.
2
-
3
- @public
4
-
5
- This module provides the TemporaryDocument class for documents that
6
- are never persisted, regardless of context.
7
- """
8
-
9
- from typing import Any, Literal, final
10
-
11
- from .document import Document
12
-
13
-
14
- @final
15
- class TemporaryDocument(Document):
16
- r"""Concrete document class for data that is never persisted.
17
-
18
- @public
19
-
20
- TemporaryDocument is a final (non-subclassable) document type for
21
- data that should never be saved to disk, regardless of whether it's
22
- used in a flow or task context. Unlike FlowDocument and TaskDocument
23
- which are abstract, TemporaryDocument can be instantiated directly.
24
-
25
- Key characteristics:
26
- - Never persisted to file system
27
- - Can be instantiated directly (not abstract)
28
- - Cannot be subclassed (annotated with Python's @final decorator in code)
29
- - Useful for transient data like API responses or intermediate calculations
30
- - Ignored by simple_runner save operations
31
-
32
- Creating TemporaryDocuments:
33
- **Use the `create` classmethod** for most use cases. It handles automatic
34
- conversion of various content types. Only use __init__ when you have bytes.
35
-
36
- >>> # RECOMMENDED - automatic conversion:
37
- >>> doc = TemporaryDocument.create(
38
- ... name="api_response.json",
39
- ... content={"status": "ok", "data": [1, 2, 3]}
40
- ... )
41
- >>> doc = TemporaryDocument.create(
42
- ... name="credentials.txt",
43
- ... content="secret_token_xyz"
44
- ... )
45
- >>>
46
- >>> # Direct constructor - only for bytes:
47
- >>> doc = TemporaryDocument(
48
- ... name="binary.dat",
49
- ... content=b"\x00\x01\x02"
50
- ... )
51
- >>>
52
- >>> doc.is_temporary # Always True
53
-
54
- Use Cases:
55
- - API responses that shouldn't be cached
56
- - Sensitive credentials or tokens
57
- - Intermediate calculations
58
- - Temporary transformations
59
- - Data explicitly marked as non-persistent
60
-
61
- Note:
62
- - This is a final class and cannot be subclassed
63
- - Use when you explicitly want to prevent persistence
64
- - Useful for sensitive data that shouldn't be written to disk
65
-
66
- See Also:
67
- FlowDocument: For documents that persist across flow runs
68
- TaskDocument: For documents temporary within task execution
69
- """
70
-
71
- def __init_subclass__(cls, **kwargs: Any) -> None:
72
- """Disallow subclassing.
73
-
74
- Args:
75
- **kwargs: Additional keyword arguments (ignored).
76
-
77
- Raises:
78
- TypeError: Always raised to prevent subclassing of `TemporaryDocument`.
79
- """
80
- raise TypeError("TemporaryDocument is final and cannot be subclassed")
81
-
82
- def get_base_type(self) -> Literal["temporary"]:
83
- """Return the base type identifier for temporary documents.
84
-
85
- Identifies this document as temporary, ensuring it will
86
- never be persisted by the pipeline system.
87
-
88
- Returns:
89
- "temporary" - Indicates this document is never persisted.
90
-
91
- Note:
92
- Documents with this type are explicitly excluded from
93
- all persistence operations in the pipeline system.
94
- """
95
- return "temporary"