ai-pipeline-core 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. ai_pipeline_core/__init__.py +64 -158
  2. ai_pipeline_core/deployment/__init__.py +6 -18
  3. ai_pipeline_core/deployment/base.py +392 -212
  4. ai_pipeline_core/deployment/contract.py +6 -10
  5. ai_pipeline_core/{utils → deployment}/deploy.py +50 -69
  6. ai_pipeline_core/deployment/helpers.py +16 -17
  7. ai_pipeline_core/{progress.py → deployment/progress.py} +23 -24
  8. ai_pipeline_core/{utils/remote_deployment.py → deployment/remote.py} +11 -14
  9. ai_pipeline_core/docs_generator/__init__.py +54 -0
  10. ai_pipeline_core/docs_generator/__main__.py +5 -0
  11. ai_pipeline_core/docs_generator/cli.py +196 -0
  12. ai_pipeline_core/docs_generator/extractor.py +324 -0
  13. ai_pipeline_core/docs_generator/guide_builder.py +644 -0
  14. ai_pipeline_core/docs_generator/trimmer.py +35 -0
  15. ai_pipeline_core/docs_generator/validator.py +114 -0
  16. ai_pipeline_core/document_store/__init__.py +13 -0
  17. ai_pipeline_core/document_store/_summary.py +9 -0
  18. ai_pipeline_core/document_store/_summary_worker.py +170 -0
  19. ai_pipeline_core/document_store/clickhouse.py +492 -0
  20. ai_pipeline_core/document_store/factory.py +38 -0
  21. ai_pipeline_core/document_store/local.py +312 -0
  22. ai_pipeline_core/document_store/memory.py +85 -0
  23. ai_pipeline_core/document_store/protocol.py +68 -0
  24. ai_pipeline_core/documents/__init__.py +12 -14
  25. ai_pipeline_core/documents/_context_vars.py +85 -0
  26. ai_pipeline_core/documents/_hashing.py +52 -0
  27. ai_pipeline_core/documents/attachment.py +85 -0
  28. ai_pipeline_core/documents/context.py +128 -0
  29. ai_pipeline_core/documents/document.py +318 -1434
  30. ai_pipeline_core/documents/mime_type.py +11 -84
  31. ai_pipeline_core/documents/utils.py +4 -12
  32. ai_pipeline_core/exceptions.py +10 -62
  33. ai_pipeline_core/images/__init__.py +32 -85
  34. ai_pipeline_core/images/_processing.py +5 -11
  35. ai_pipeline_core/llm/__init__.py +6 -4
  36. ai_pipeline_core/llm/ai_messages.py +102 -90
  37. ai_pipeline_core/llm/client.py +229 -183
  38. ai_pipeline_core/llm/model_options.py +12 -84
  39. ai_pipeline_core/llm/model_response.py +53 -99
  40. ai_pipeline_core/llm/model_types.py +8 -23
  41. ai_pipeline_core/logging/__init__.py +2 -7
  42. ai_pipeline_core/logging/logging.yml +1 -1
  43. ai_pipeline_core/logging/logging_config.py +27 -37
  44. ai_pipeline_core/logging/logging_mixin.py +15 -41
  45. ai_pipeline_core/observability/__init__.py +32 -0
  46. ai_pipeline_core/observability/_debug/__init__.py +30 -0
  47. ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
  48. ai_pipeline_core/{debug/config.py → observability/_debug/_config.py} +11 -7
  49. ai_pipeline_core/{debug/content.py → observability/_debug/_content.py} +133 -75
  50. ai_pipeline_core/{debug/processor.py → observability/_debug/_processor.py} +16 -17
  51. ai_pipeline_core/{debug/summary.py → observability/_debug/_summary.py} +113 -37
  52. ai_pipeline_core/observability/_debug/_types.py +75 -0
  53. ai_pipeline_core/{debug/writer.py → observability/_debug/_writer.py} +126 -196
  54. ai_pipeline_core/observability/_document_tracking.py +146 -0
  55. ai_pipeline_core/observability/_initialization.py +194 -0
  56. ai_pipeline_core/observability/_logging_bridge.py +57 -0
  57. ai_pipeline_core/observability/_summary.py +81 -0
  58. ai_pipeline_core/observability/_tracking/__init__.py +6 -0
  59. ai_pipeline_core/observability/_tracking/_client.py +178 -0
  60. ai_pipeline_core/observability/_tracking/_internal.py +28 -0
  61. ai_pipeline_core/observability/_tracking/_models.py +138 -0
  62. ai_pipeline_core/observability/_tracking/_processor.py +158 -0
  63. ai_pipeline_core/observability/_tracking/_service.py +311 -0
  64. ai_pipeline_core/observability/_tracking/_writer.py +229 -0
  65. ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -335
  66. ai_pipeline_core/pipeline/__init__.py +10 -0
  67. ai_pipeline_core/pipeline/decorators.py +915 -0
  68. ai_pipeline_core/pipeline/options.py +16 -0
  69. ai_pipeline_core/prompt_manager.py +16 -102
  70. ai_pipeline_core/settings.py +26 -31
  71. ai_pipeline_core/testing.py +9 -0
  72. ai_pipeline_core-0.4.0.dist-info/METADATA +807 -0
  73. ai_pipeline_core-0.4.0.dist-info/RECORD +76 -0
  74. ai_pipeline_core/debug/__init__.py +0 -26
  75. ai_pipeline_core/documents/document_list.py +0 -420
  76. ai_pipeline_core/documents/flow_document.py +0 -112
  77. ai_pipeline_core/documents/task_document.py +0 -117
  78. ai_pipeline_core/documents/temporary_document.py +0 -74
  79. ai_pipeline_core/flow/__init__.py +0 -9
  80. ai_pipeline_core/flow/config.py +0 -494
  81. ai_pipeline_core/flow/options.py +0 -75
  82. ai_pipeline_core/pipeline.py +0 -718
  83. ai_pipeline_core/prefect.py +0 -63
  84. ai_pipeline_core/prompt_builder/__init__.py +0 -5
  85. ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +0 -23
  86. ai_pipeline_core/prompt_builder/global_cache.py +0 -78
  87. ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +0 -6
  88. ai_pipeline_core/prompt_builder/prompt_builder.py +0 -253
  89. ai_pipeline_core/prompt_builder/system_prompt.jinja2 +0 -41
  90. ai_pipeline_core/storage/__init__.py +0 -8
  91. ai_pipeline_core/storage/storage.py +0 -628
  92. ai_pipeline_core/utils/__init__.py +0 -8
  93. ai_pipeline_core-0.3.4.dist-info/METADATA +0 -569
  94. ai_pipeline_core-0.3.4.dist-info/RECORD +0 -57
  95. {ai_pipeline_core-0.3.4.dist-info → ai_pipeline_core-0.4.0.dist-info}/WHEEL +0 -0
  96. {ai_pipeline_core-0.3.4.dist-info → ai_pipeline_core-0.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,76 @@
1
+ ai_pipeline_core/__init__.py,sha256=_GM0O3dDQuCvXQD44dyXKvzLOZk2htwwFS0mXpvxJQU,3270
2
+ ai_pipeline_core/exceptions.py,sha256=csAl7vq6xjSFBF8-UM9WZODCbhsOdOG5zH6IbA8iteM,1280
3
+ ai_pipeline_core/prompt_manager.py,sha256=3wFkL5rrjtUT1cLInkgyhS8hKnO4MeD1cdXAEuLhgoE,9459
4
+ ai_pipeline_core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ ai_pipeline_core/settings.py,sha256=BUz8JEFfJQrdE4rNOhQWwxnTrfekLjWkoy-3wDZQ7PY,5142
6
+ ai_pipeline_core/testing.py,sha256=jIRrLxNvTwdamucfJoHET2qMeRhhMZV9uEJXO5vAfis,279
7
+ ai_pipeline_core/deployment/__init__.py,sha256=wTkVK6gcEQvqBajFMTAuodRONpN25yHbR1jtcumf0WQ,900
8
+ ai_pipeline_core/deployment/base.py,sha256=ros0VzvkNCwbPgG9D49ceKSjTq857iRHzvW-uhiaNUE,34750
9
+ ai_pipeline_core/deployment/contract.py,sha256=a1qbHhneTGB27oSOUy79CUIhOIzOoq37M63XoIMzA4Y,1952
10
+ ai_pipeline_core/deployment/deploy.py,sha256=89W0w22cRkK_yMrn9iuH5L4dvnlMu31eojhJIKHtK2E,21991
11
+ ai_pipeline_core/deployment/helpers.py,sha256=yVtGFUs4AFXkpLkiQ_ale0nXXt5btfWSb5PAbikQHNs,3312
12
+ ai_pipeline_core/deployment/progress.py,sha256=5tVD9nW0N-b8Z2BxazcWCWHFpLu6pJ-eqPmRyj68X6Y,3591
13
+ ai_pipeline_core/deployment/remote.py,sha256=tOexisKEeeBoHLGYZWqcjr2H-nqqYc6kvoDL72AW78w,4661
14
+ ai_pipeline_core/docs_generator/__init__.py,sha256=JbWbk-Lw5GgWrCMRuw8zvKNTZY2jXv7XqoMiBYudvRI,1255
15
+ ai_pipeline_core/docs_generator/__main__.py,sha256=CH4agiM2suFJ63MhTg5m0GuXdc40z-6o4ojR72JQWVA,145
16
+ ai_pipeline_core/docs_generator/cli.py,sha256=8OjdMtzQraPxWN3uPapSNJnKyPLPtnygKL0rF5JL2GY,7172
17
+ ai_pipeline_core/docs_generator/extractor.py,sha256=yHQfeb_LwgBZW5dBY65L6a4qvNvxnwWiFXhqeZV5y5w,10631
18
+ ai_pipeline_core/docs_generator/guide_builder.py,sha256=cxVEoYMfwOsFWDLbXddJ7IBBCRshbfSUoQ84ZMw_YQE,22232
19
+ ai_pipeline_core/docs_generator/trimmer.py,sha256=olsl4MSmMHqsIEeVu9HU7xjONmIbSU7NmPwWdhOH6AA,1052
20
+ ai_pipeline_core/docs_generator/validator.py,sha256=w-UdE6h6LLCwVy9Qqmv-TavIttA_1mcRoAhF9_HKszc,4460
21
+ ai_pipeline_core/document_store/__init__.py,sha256=5aHsCpRkfkaLhLo0sVaKXEjqCcRYgzkVJErULKUpDAk,366
22
+ ai_pipeline_core/document_store/_summary.py,sha256=qwy4kHEEwHwXGN7LVol09qzf7RjOJ2-6qTme-mtE8aM,377
23
+ ai_pipeline_core/document_store/_summary_worker.py,sha256=K4575wCot0EoKCEsOj8XMCS1O6aWC37S9L_3TZjONco,6659
24
+ ai_pipeline_core/document_store/clickhouse.py,sha256=vUlN2rIxCn5A8ceBFbpaHPS2O3tYEuR_UZkffWdy7E4,20636
25
+ ai_pipeline_core/document_store/factory.py,sha256=F56ZM8TxgzFNYUkdzZidbxTe-JDiIAqi_tlE30cdlp0,1499
26
+ ai_pipeline_core/document_store/local.py,sha256=r_dCJ46fto89yxZfKuGNQonpocQ1TwFFaCUntW-ZSQw,13396
27
+ ai_pipeline_core/document_store/memory.py,sha256=MlsWHLLaEK6MdHBPZUgsNkbkFPvd2d2gFcfdDMBYvXo,3679
28
+ ai_pipeline_core/document_store/protocol.py,sha256=UhA60PuSMBwpX9yVLOtUAsKqdPnU2synDos6cB-WQng,2407
29
+ ai_pipeline_core/documents/__init__.py,sha256=LphKH_CiN3BQ0gjtJps1Y1WF_Lt2Qg-75aq2U1_PvP8,723
30
+ ai_pipeline_core/documents/_context_vars.py,sha256=JbgQoCNaHPrOAMlEa6HYB8Ti7iw_jQfZOi5eNrwHYWg,2687
31
+ ai_pipeline_core/documents/_hashing.py,sha256=_u1P4z1bMNSREJ6GNf3sSqf0TCrDr9sVcAIG9bnORnU,1667
32
+ ai_pipeline_core/documents/attachment.py,sha256=eVpb27Qu8mLO2Bxv_JYd5JXhgViaJusVM8RBGcU1iQE,2951
33
+ ai_pipeline_core/documents/context.py,sha256=vlORnRk2klMTZk6X4jEJayeH4B2Xo6ZxZ-31mtdba6o,5482
34
+ ai_pipeline_core/documents/document.py,sha256=lU3hqbyYswRlWAiS9YGW9IRZnVA-3qmMpnAKPmi8Bws,26157
35
+ ai_pipeline_core/documents/mime_type.py,sha256=QeRX6GiQnTpqx3Fk8QLhi1lT0Z5uEs496dGc4_xqgsA,6530
36
+ ai_pipeline_core/documents/utils.py,sha256=9WOW3zvKYxQPnM8LjYFy3V9-yqc6hwgCaiog3kUH274,5413
37
+ ai_pipeline_core/images/__init__.py,sha256=Hc2QKR27Q2Q-h5nH-EbzfxdE3dHArBm-st5_xjOKFh0,8854
38
+ ai_pipeline_core/images/_processing.py,sha256=MrCuPGsyyEl9UlXYIPhZs0wN8CPTMZmejV2Lo2wyCZk,4362
39
+ ai_pipeline_core/llm/__init__.py,sha256=oyRvYD5DLDl7JIRTBUaiVz6jUC5dLLujkMNFpfRp2zc,795
40
+ ai_pipeline_core/llm/ai_messages.py,sha256=Ycmntk5d6NUFqVVsnNR_IDwJUFuHYEH7CPvmmDfYaJI,17424
41
+ ai_pipeline_core/llm/client.py,sha256=CjxOiniuy5CEsA_Xz0KPLCBthbnUfC43fTpuDcqkIUM,30276
42
+ ai_pipeline_core/llm/model_options.py,sha256=hg8xR0RJdJKp8QJNA4EbLnfFsnkE4HnxD85aYxc--hM,9164
43
+ ai_pipeline_core/llm/model_response.py,sha256=Ml9wcssSssqibReJxCc9EQu488pz69Cmq_XNBs_xmak,12219
44
+ ai_pipeline_core/llm/model_types.py,sha256=qHoUPPEkHu9B4kJ5xcIC09fk72v667ZxvzigxtgLpVo,2174
45
+ ai_pipeline_core/logging/__init__.py,sha256=H8G3bycxwNxc4e4Gjwi-al9e2ufTJbTV5iFKCF1Ticw,495
46
+ ai_pipeline_core/logging/logging.yml,sha256=qsf6vcxtWIHD5xwJGtylibiuy_0KF_Ji7-qb-xvFtaU,1357
47
+ ai_pipeline_core/logging/logging_config.py,sha256=JnTarGSSkpi7eqR7N13TLKeuwNCvZgwJUPlhObiwrHk,6095
48
+ ai_pipeline_core/logging/logging_mixin.py,sha256=Jn3x0xvSwSjbAMfWELMOEfffWBB1u4IeIr7M2-55CJs,7191
49
+ ai_pipeline_core/observability/__init__.py,sha256=km2nIiY3aYH13s2m4nR91erQG4qKnGuvQkrKDdVW3bw,720
50
+ ai_pipeline_core/observability/_document_tracking.py,sha256=tXv6rbGIuxOYdq22aVbyn9Ve5EhYHPnrYCE-kj2NGXI,5428
51
+ ai_pipeline_core/observability/_initialization.py,sha256=GfwRHpg90Og3PzmG1ZUilJVXoFx9BIWpbMgXxJ5Alqk,6747
52
+ ai_pipeline_core/observability/_logging_bridge.py,sha256=T3PpkgoI0YKN2vvBJEHzR5rFMFNHq9REHJs7PQX2VQk,2053
53
+ ai_pipeline_core/observability/_summary.py,sha256=GAZXzXVkwUcubSiGb5DgkHfO1gGwx6pYoDz6RUJmL5k,3390
54
+ ai_pipeline_core/observability/tracing.py,sha256=KhIXSl5fe39UE1Eokz9-1fe5biX6anKbwZDmXY_Z2LU,27050
55
+ ai_pipeline_core/observability/_debug/__init__.py,sha256=V8pbgdQOx-7oFKQ_sNzAZ1-oq5c73P4kVjEClZDXe8k,942
56
+ ai_pipeline_core/observability/_debug/_auto_summary.py,sha256=LMvETvx_RPKF8srewCKwjigTiWs3KfDmQAYYSuVybIM,2687
57
+ ai_pipeline_core/observability/_debug/_config.py,sha256=CWfnK-F3knUuOQ34y_CjmU3l67J85NIZ3siftYhevc0,3367
58
+ ai_pipeline_core/observability/_debug/_content.py,sha256=ECy2vR8wDHJq0RD2X10XS-ed8uCq9VD3K8fnGOjQxgs,30657
59
+ ai_pipeline_core/observability/_debug/_processor.py,sha256=FkX1xqeJds-Gctt5keYSTSaC85FM4QaeFIEevTn7Qh8,3875
60
+ ai_pipeline_core/observability/_debug/_summary.py,sha256=gD7MtWldBRs2VniQxUBjr6XbD2Z8HhbqZdnkcr4HdzE,11274
61
+ ai_pipeline_core/observability/_debug/_types.py,sha256=Cw80SWSVso02kkj6T7hICGU_vn3W2RUEv74h94V5ZfI,2220
62
+ ai_pipeline_core/observability/_debug/_writer.py,sha256=0JOmaQtg9OuhqcAf15C2AAlkicIOGEoyWerKTiOmuTw,32497
63
+ ai_pipeline_core/observability/_tracking/__init__.py,sha256=tiZhj_d0STL0ACq2mTktciGjXzpepfMtl5KA_OFElTE,245
64
+ ai_pipeline_core/observability/_tracking/_client.py,sha256=q3YhKJVm3jEWDXzKclQmndZ6RYIu7_F4Az_uY98uA-k,6634
65
+ ai_pipeline_core/observability/_tracking/_internal.py,sha256=zv4DI2a8pG3wM_QEuwTNxk2V_q0jEZe6HsL6P7eVO7Y,820
66
+ ai_pipeline_core/observability/_tracking/_models.py,sha256=p3nZucNGr-JsdscqnbJOy8JL1B-w6p43I-1bXvOkfc8,3237
67
+ ai_pipeline_core/observability/_tracking/_processor.py,sha256=H8D82gRs4JY6ya0ewojoVAg85FUQV-imi9bQO8M0kGU,5999
68
+ ai_pipeline_core/observability/_tracking/_service.py,sha256=diK-0qJg4HU-BHgpN1NTyFEbgPXq2e0gluRq21B8IbE,10357
69
+ ai_pipeline_core/observability/_tracking/_writer.py,sha256=xZjwYyIxDzzzPxqkKjYAYOyNP4uvKXZ-r_u-APSV_x8,9246
70
+ ai_pipeline_core/pipeline/__init__.py,sha256=uMv1jwSyq8Ym8Hbn5097twBJLdwN1iMeqnVM4EWyrhA,282
71
+ ai_pipeline_core/pipeline/decorators.py,sha256=CDJAeOjGLt5Ewc0Jc9zEuwLZwKyutOv89LSRS9dcXmI,37456
72
+ ai_pipeline_core/pipeline/options.py,sha256=Y--5-DxzxR1Ul4GltGPP3JUIk8bw1GlUdZ3IDX8UIHQ,439
73
+ ai_pipeline_core-0.4.0.dist-info/METADATA,sha256=hH5B9XsY_NT4bCB1W-LvBEG5LYR7SyaXdt4Z75PWvEE,29947
74
+ ai_pipeline_core-0.4.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
75
+ ai_pipeline_core-0.4.0.dist-info/licenses/LICENSE,sha256=kKj8mfbdWwkyG3U6n7ztB3bAZlEwShTkAsvaY657i3I,1074
76
+ ai_pipeline_core-0.4.0.dist-info/RECORD,,
@@ -1,26 +0,0 @@
1
- """Local trace debugging system for AI pipelines.
2
-
3
- This module provides filesystem-based trace debugging that saves all spans
4
- with their inputs/outputs for LLM-assisted debugging.
5
-
6
- Enable by setting TRACE_DEBUG_PATH environment variable.
7
- """
8
-
9
- from .config import TraceDebugConfig
10
- from .content import ArtifactStore, ContentRef, ContentWriter, reconstruct_span_content
11
- from .processor import LocalDebugSpanProcessor
12
- from .summary import generate_summary
13
- from .writer import LocalTraceWriter, TraceState, WriteJob
14
-
15
- __all__ = [
16
- "TraceDebugConfig",
17
- "ContentRef",
18
- "ContentWriter",
19
- "ArtifactStore",
20
- "reconstruct_span_content",
21
- "LocalDebugSpanProcessor",
22
- "LocalTraceWriter",
23
- "TraceState",
24
- "WriteJob",
25
- "generate_summary",
26
- ]
@@ -1,420 +0,0 @@
1
- """Type-safe list container for Document objects.
2
-
3
- @public
4
- """
5
-
6
- from copy import deepcopy
7
- from typing import Any, Callable, Iterable, SupportsIndex, Union, overload
8
-
9
- from typing_extensions import Self
10
-
11
- from .document import Document
12
-
13
-
14
- class DocumentList(list[Document]):
15
- """Type-safe container for Document objects.
16
-
17
- @public
18
-
19
- Specialized list with validation and filtering for documents.
20
-
21
- Best Practice: Use default constructor by default, unless instructed otherwise.
22
- Only enable validate_same_type or validate_duplicates when you explicitly need them.
23
-
24
- Example:
25
- >>> # RECOMMENDED - default constructor for most cases
26
- >>> docs = DocumentList([doc1, doc2])
27
- >>> # Or empty initialization
28
- >>> docs = DocumentList()
29
- >>> docs.append(MyDocument(name="file.txt", content=b"data"))
30
- >>>
31
- >>> # Only use validation flags when specifically needed:
32
- >>> docs = DocumentList(validate_same_type=True) # Rare use case
33
- >>> doc = docs.get_by("file.txt") # Get by name
34
- """
35
-
36
- def __init__(
37
- self,
38
- documents: list[Document] | None = None,
39
- validate_same_type: bool = False,
40
- validate_duplicates: bool = False,
41
- frozen: bool = False,
42
- ) -> None:
43
- """Initialize DocumentList.
44
-
45
- @public
46
-
47
- Args:
48
- documents: Initial list of documents.
49
- validate_same_type: Enforce same document type.
50
- validate_duplicates: Prevent duplicate filenames.
51
- frozen: If True, list is immutable from creation.
52
- """
53
- super().__init__()
54
- self._validate_same_type = validate_same_type
55
- self._validate_duplicates = validate_duplicates
56
- self._frozen = False # Initialize as unfrozen to allow initial population
57
- if documents:
58
- self.extend(documents)
59
- self._frozen = frozen # Set frozen state after initial population
60
-
61
- def _validate_no_duplicates(self) -> None:
62
- """Check for duplicate document names.
63
-
64
- Raises:
65
- ValueError: If duplicate document names are found.
66
- """
67
- if not self._validate_duplicates:
68
- return
69
-
70
- filenames = [doc.name for doc in self]
71
- seen: set[str] = set()
72
- duplicates: list[str] = []
73
- for name in filenames:
74
- if name in seen:
75
- duplicates.append(name)
76
- seen.add(name)
77
- if duplicates:
78
- unique_duplicates = list(set(duplicates))
79
- raise ValueError(f"Duplicate document names found: {unique_duplicates}")
80
-
81
- def _validate_no_description_files(self) -> None:
82
- """Ensure no documents use reserved description file extension.
83
-
84
- Raises:
85
- ValueError: If any document uses the reserved description file extension.
86
- """
87
- description_files = [
88
- doc.name for doc in self if doc.name.endswith(Document.DESCRIPTION_EXTENSION)
89
- ]
90
- if description_files:
91
- raise ValueError(
92
- f"Documents with {Document.DESCRIPTION_EXTENSION} suffix are not allowed: "
93
- f"{description_files}"
94
- )
95
-
96
- def _validate_types(self) -> None:
97
- """Ensure all documents are of the same class type.
98
-
99
- Raises:
100
- ValueError: If documents have different class types.
101
- """
102
- if not self._validate_same_type or not self:
103
- return
104
-
105
- first_class = type(self[0])
106
- different_types = [doc for doc in self if type(doc) is not first_class]
107
- if different_types:
108
- types = list({type(doc).__name__ for doc in self})
109
- raise ValueError(f"All documents must have the same type. Found types: {types}")
110
-
111
- def _validate(self) -> None:
112
- """Run all configured validation checks."""
113
- self._validate_no_duplicates()
114
- self._validate_no_description_files()
115
- self._validate_types()
116
-
117
- def freeze(self) -> None:
118
- """Permanently freeze the list, preventing modifications.
119
-
120
- Once frozen, the list cannot be unfrozen.
121
- """
122
- self._frozen = True
123
-
124
- def copy(self) -> "DocumentList":
125
- """Create an unfrozen deep copy of the list.
126
-
127
- Returns:
128
- New unfrozen DocumentList with deep-copied documents.
129
- """
130
- copied_docs = deepcopy(list(self))
131
- return DocumentList(
132
- documents=copied_docs,
133
- validate_same_type=self._validate_same_type,
134
- validate_duplicates=self._validate_duplicates,
135
- frozen=False, # Copies are always unfrozen
136
- )
137
-
138
- def _check_frozen(self) -> None:
139
- """Check if list is frozen and raise if it is.
140
-
141
- Raises:
142
- RuntimeError: If the list is frozen.
143
- """
144
- if self._frozen:
145
- raise RuntimeError("Cannot modify frozen DocumentList")
146
-
147
- def append(self, document: Document) -> None:
148
- """Add a document to the end of the list."""
149
- self._check_frozen()
150
- super().append(document)
151
- self._validate()
152
-
153
- def extend(self, documents: Iterable[Document]) -> None:
154
- """Add multiple documents to the list."""
155
- self._check_frozen()
156
- super().extend(documents)
157
- self._validate()
158
-
159
- def insert(self, index: SupportsIndex, document: Document) -> None:
160
- """Insert a document at the specified position."""
161
- self._check_frozen()
162
- super().insert(index, document)
163
- self._validate()
164
-
165
- @overload
166
- def __setitem__(self, index: SupportsIndex, value: Document) -> None: ...
167
-
168
- @overload
169
- def __setitem__(self, index: slice, value: Iterable[Document]) -> None: ...
170
-
171
- def __setitem__(self, index: Union[SupportsIndex, slice], value: Any) -> None:
172
- """Set item or slice with validation."""
173
- self._check_frozen()
174
- super().__setitem__(index, value)
175
- self._validate()
176
-
177
- def __iadd__(self, other: Any) -> "Self":
178
- """In-place addition (+=) with validation.
179
-
180
- Returns:
181
- Self: This DocumentList after modification.
182
- """
183
- self._check_frozen()
184
- result = super().__iadd__(other)
185
- self._validate()
186
- return result
187
-
188
- def __delitem__(self, index: Union[SupportsIndex, slice]) -> None:
189
- """Delete item or slice from list."""
190
- self._check_frozen()
191
- super().__delitem__(index)
192
-
193
- def pop(self, index: SupportsIndex = -1) -> Document:
194
- """Remove and return item at index.
195
-
196
- Returns:
197
- Document removed from the list.
198
- """
199
- self._check_frozen()
200
- return super().pop(index)
201
-
202
- def remove(self, document: Document) -> None:
203
- """Remove first occurrence of document."""
204
- self._check_frozen()
205
- super().remove(document)
206
-
207
- def clear(self) -> None:
208
- """Remove all items from list."""
209
- self._check_frozen()
210
- super().clear()
211
-
212
- def reverse(self) -> None:
213
- """Reverse list in place."""
214
- self._check_frozen()
215
- super().reverse()
216
-
217
- def sort(self, *, key: Callable[[Document], Any] | None = None, reverse: bool = False) -> None:
218
- """Sort list in place."""
219
- self._check_frozen()
220
- if key is None:
221
- super().sort(reverse=reverse) # type: ignore[call-arg]
222
- else:
223
- super().sort(key=key, reverse=reverse)
224
-
225
- @overload
226
- def filter_by(self, arg: str) -> "DocumentList": ...
227
-
228
- @overload
229
- def filter_by(self, arg: type[Document]) -> "DocumentList": ...
230
-
231
- @overload
232
- def filter_by(self, arg: Iterable[type[Document]]) -> "DocumentList": ...
233
-
234
- @overload
235
- def filter_by(self, arg: Iterable[str]) -> "DocumentList": ...
236
-
237
- def filter_by(
238
- self, arg: str | type[Document] | Iterable[type[Document]] | Iterable[str]
239
- ) -> "DocumentList":
240
- """Filter documents by name(s) or type(s).
241
-
242
- @public
243
-
244
- ALWAYS returns a DocumentList (which may be empty), never raises an exception
245
- for no matches. Use this when you want to process all matching documents.
246
-
247
- Args:
248
- arg: Can be one of:
249
- - str: Single document name to filter by
250
- - type[Document]: Single document type to filter by (includes subclasses)
251
- - Iterable[type[Document]]: Multiple document types to filter by
252
- (list, tuple, set, generator, or any iterable)
253
- - Iterable[str]: Multiple document names to filter by
254
- (list, tuple, set, generator, or any iterable)
255
-
256
- Returns:
257
- New DocumentList with filtered documents (may be empty).
258
- - Returns ALL matching documents
259
- - Empty DocumentList if no matches found
260
-
261
- Raises:
262
- TypeError: If arg is not a valid type (not str, type, or iterable),
263
- or if iterable contains mixed types (strings and types together).
264
- AttributeError: If arg is expected to be iterable but doesn't support iteration.
265
-
266
- Example:
267
- >>> # Returns list with all matching documents
268
- >>> matching_docs = docs.filter_by("file.txt") # May be empty
269
- >>> for doc in matching_docs:
270
- ... process(doc)
271
- >>>
272
- >>> # Filter by type - returns all instances
273
- >>> config_docs = docs.filter_by(ConfigDocument)
274
- >>> print(f"Found {len(config_docs)} config documents")
275
- >>>
276
- >>> # Filter by multiple names
277
- >>> important_docs = docs.filter_by(["config.yaml", "settings.json"])
278
- >>> if not important_docs: # Check if empty
279
- ... print("No important documents found")
280
- """
281
- if isinstance(arg, str):
282
- # Filter by single name
283
- return DocumentList([doc for doc in self if doc.name == arg])
284
- elif isinstance(arg, type):
285
- # Filter by single type (including subclasses)
286
- # The type system ensures arg is type[Document] due to overloads
287
- return DocumentList([doc for doc in self if isinstance(doc, arg)])
288
- else:
289
- # Try to consume as iterable
290
- try:
291
- # Convert to list to check the first element and allow reuse
292
- items = list(arg) # type: ignore[arg-type]
293
- if not items:
294
- return DocumentList()
295
-
296
- first_item = items[0]
297
- if isinstance(first_item, str):
298
- # Iterable of names - validate all items are strings
299
- for item in items:
300
- if not isinstance(item, str):
301
- raise TypeError(
302
- "Iterable must contain only strings or only Document types, "
303
- "not mixed types"
304
- )
305
- names_set = set(items)
306
- return DocumentList([doc for doc in self if doc.name in names_set])
307
- elif isinstance(first_item, type): # type: ignore[reportUnnecessaryIsInstance]
308
- # Iterable of document types - validate all items are types
309
- for item in items:
310
- if not isinstance(item, type):
311
- raise TypeError(
312
- "Iterable must contain only strings or only Document types, "
313
- "not mixed types"
314
- )
315
- # Convert to set for efficient lookup
316
- types_set = set(items)
317
- # Filter documents that match any of the requested types
318
- matching = [
319
- doc
320
- for doc in self
321
- if any(isinstance(doc, doc_type) for doc_type in types_set) # type: ignore[arg-type]
322
- ]
323
- return DocumentList(matching)
324
- else:
325
- raise TypeError(
326
- f"Iterable must contain strings or Document types, "
327
- f"got {type(first_item).__name__}"
328
- )
329
- except (TypeError, AttributeError) as e:
330
- # If the error message already mentions Iterable, re-raise it
331
- if "Iterable" in str(e) or "strings or Document types" in str(e):
332
- raise
333
- # Otherwise, provide a generic error message
334
- raise TypeError(f"Invalid argument type for filter_by: {type(arg).__name__}") from e
335
-
336
- @overload
337
- def get_by(self, arg: str) -> Document: ...
338
-
339
- @overload
340
- def get_by(self, arg: type[Document]) -> Document: ...
341
-
342
- @overload
343
- def get_by(self, arg: str, required: bool = True) -> Document | None: ...
344
-
345
- @overload
346
- def get_by(self, arg: type[Document], required: bool = True) -> Document | None: ...
347
-
348
- def get_by(self, arg: str | type[Document], required: bool = True) -> Document | None:
349
- """Get EXACTLY ONE document by name or type.
350
-
351
- @public
352
-
353
- IMPORTANT: This method expects to find exactly one matching document.
354
- - If no matches and required=True: raises ValueError
355
- - If no matches and required=False: returns None
356
- - If multiple matches: ALWAYS raises ValueError (ambiguous)
357
-
358
- When required=True (default), you do NOT need to check for None:
359
- >>> doc = docs.get_by("config.yaml") # Will raise if not found
360
- >>> # No need for: if doc is not None <- This is redundant!
361
- >>> print(doc.content) # Safe to use directly
362
-
363
- Args:
364
- arg: Document name (str) or document type.
365
- required: If True (default), raises ValueError when not found.
366
- If False, returns None when not found.
367
-
368
- Returns:
369
- The single matching document, or None if not found and required=False.
370
-
371
- Raises:
372
- ValueError: If required=True and document not found, OR if multiple
373
- documents match (ambiguous result).
374
- TypeError: If arg is not a string or Document type.
375
-
376
- Example:
377
- >>> # CORRECT - No need to check for None when required=True (default)
378
- >>> doc = docs.get_by("file.txt") # Raises if not found
379
- >>> print(doc.content) # Safe to use directly
380
- >>>
381
- >>> # When using required=False, check for None
382
- >>> doc = docs.get_by("optional.txt", required=False)
383
- >>> if doc is not None:
384
- ... print(doc.content)
385
- >>>
386
- >>> # Will raise if multiple documents have same type
387
- >>> # Use filter_by() instead if you want all matches
388
- >>> try:
389
- ... doc = docs.get_by(ConfigDocument) # Error if 2+ configs
390
- >>> except ValueError as e:
391
- ... configs = docs.filter_by(ConfigDocument) # Get all instead
392
- """
393
- if isinstance(arg, str):
394
- # Get by name - collect all matches to check for duplicates
395
- matches = [doc for doc in self if doc.name == arg]
396
- if len(matches) > 1:
397
- raise ValueError(
398
- f"Multiple documents found with name '{arg}'. "
399
- f"Found {len(matches)} matches. Use filter_by() to get all matches."
400
- )
401
- if matches:
402
- return matches[0]
403
- if required:
404
- raise ValueError(f"Document with name '{arg}' not found")
405
- return None
406
- elif isinstance(arg, type): # type: ignore[reportUnnecessaryIsInstance]
407
- # Get by type (including subclasses) - collect all matches
408
- matches = [doc for doc in self if isinstance(doc, arg)]
409
- if len(matches) > 1:
410
- raise ValueError(
411
- f"Multiple documents found of type '{arg.__name__}'. "
412
- f"Found {len(matches)} matches. Use filter_by() to get all matches."
413
- )
414
- if matches:
415
- return matches[0]
416
- if required:
417
- raise ValueError(f"Document of type '{arg.__name__}' not found")
418
- return None
419
- else:
420
- raise TypeError(f"Invalid argument type for get_by: {type(arg)}")
@@ -1,112 +0,0 @@
1
- """Flow-specific document base class for persistent pipeline data.
2
-
3
- @public
4
-
5
- This module provides the FlowDocument abstract base class for documents
6
- that need to persist across Prefect flow runs and between pipeline steps.
7
- """
8
-
9
- from typing import Literal, final
10
-
11
- from .document import Document
12
-
13
-
14
- class FlowDocument(Document):
15
- """Abstract base class for documents that persist across flow runs.
16
-
17
- @public
18
-
19
- FlowDocument is used for data that needs to be saved between pipeline
20
- steps and across multiple flow executions. These documents are typically
21
- written to the file system using the deployment utilities.
22
-
23
- Key characteristics:
24
- - Persisted to file system between pipeline steps
25
- - Survives across multiple flow runs
26
- - Used for flow inputs and outputs
27
- - Saved in directories organized by the document's type/name
28
-
29
- Creating FlowDocuments:
30
- Same as Document - use `create()` for automatic conversion, `__init__` for bytes.
31
- See Document.create() for detailed usage examples.
32
-
33
- Persistence:
34
- Documents are saved under an output directory path associated with the document's type/name.
35
- For example: output/my_doc/data.json
36
-
37
- Note:
38
- - Cannot instantiate FlowDocument directly - must subclass
39
- - Used with FlowConfig to define flow input/output types
40
- - No additional abstract methods to implement
41
- """
42
-
43
- def __init__(
44
- self,
45
- *,
46
- name: str,
47
- content: bytes,
48
- description: str | None = None,
49
- sources: list[str] | None = None,
50
- ) -> None:
51
- """Initialize a FlowDocument with raw bytes content.
52
-
53
- See Document.__init__() for parameter details and usage notes.
54
-
55
- Prevents direct instantiation of the abstract FlowDocument class.
56
- FlowDocument must be subclassed for specific document types.
57
-
58
- Args:
59
- name: Document filename (required, keyword-only)
60
- content: Document content as raw bytes (required, keyword-only)
61
- description: Optional human-readable description (keyword-only)
62
- sources: Optional list of strings for provenance tracking
63
-
64
- Raises:
65
- TypeError: If attempting to instantiate FlowDocument directly
66
- instead of using a concrete subclass.
67
-
68
- Example:
69
- >>> from enum import StrEnum
70
- >>>
71
- >>> # Simple subclass:
72
- >>> class MyFlowDoc(FlowDocument):
73
- ... pass
74
- >>>
75
- >>> # With FILES restriction:
76
- >>> class RestrictedDoc(FlowDocument):
77
- ... class FILES(StrEnum):
78
- ... DATA = "data.json"
79
- ... METADATA = "metadata.yaml"
80
- >>>
81
- >>> # Direct constructor - only for bytes:
82
- >>> doc = MyFlowDoc(name="test.bin", content=b"raw data")
83
- >>>
84
- >>> # RECOMMENDED - use create for automatic conversion:
85
- >>> doc = RestrictedDoc.create(name="data.json", content={"key": "value"})
86
- >>> # This would raise DocumentNameError:
87
- >>> # doc = RestrictedDoc.create(name="other.json", content={})
88
- """
89
- if type(self) is FlowDocument:
90
- raise TypeError("Cannot instantiate abstract FlowDocument class directly")
91
-
92
- # Only pass sources if not None to let Pydantic's default_factory handle it
93
- if sources is not None:
94
- super().__init__(name=name, content=content, description=description, sources=sources)
95
- else:
96
- super().__init__(name=name, content=content, description=description)
97
-
98
- @final
99
- def get_base_type(self) -> Literal["flow"]:
100
- """Return the base type identifier for flow documents.
101
-
102
- This method is final and cannot be overridden by subclasses.
103
- It identifies this document as a flow-persistent document.
104
-
105
- Returns:
106
- "flow" - Indicates this document persists across flow runs.
107
-
108
- Note:
109
- This determines the document's lifecycle and persistence behavior
110
- in the pipeline system.
111
- """
112
- return "flow"