proceed 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. proceed-0.0.1/.gitignore +140 -0
  2. proceed-0.0.1/LICENSE +24 -0
  3. proceed-0.0.1/PKG-INFO +83 -0
  4. proceed-0.0.1/README.md +66 -0
  5. proceed-0.0.1/dev-environment.yml +10 -0
  6. proceed-0.0.1/pyproject.toml +69 -0
  7. proceed-0.0.1/src/fizzbuzz/Dockerfile +7 -0
  8. proceed-0.0.1/src/fizzbuzz/__init__.py +0 -0
  9. proceed-0.0.1/src/fizzbuzz/fizzbuzz.py +61 -0
  10. proceed-0.0.1/src/proceed/__about__.py +1 -0
  11. proceed-0.0.1/src/proceed/__init__.py +0 -0
  12. proceed-0.0.1/src/proceed/aggregator.py +123 -0
  13. proceed-0.0.1/src/proceed/cli.py +161 -0
  14. proceed-0.0.1/src/proceed/config_options.py +265 -0
  15. proceed-0.0.1/src/proceed/docker_runner.py +325 -0
  16. proceed-0.0.1/src/proceed/file_matching.py +52 -0
  17. proceed-0.0.1/src/proceed/model.py +813 -0
  18. proceed-0.0.1/src/proceed/yaml_data.py +97 -0
  19. proceed-0.0.1/tests/fizzbuzz/fixture_files/classify_expected.txt +100 -0
  20. proceed-0.0.1/tests/fizzbuzz/fixture_files/classify_in.txt +100 -0
  21. proceed-0.0.1/tests/fizzbuzz/fixture_files/filter_buzz_expected.txt +6 -0
  22. proceed-0.0.1/tests/fizzbuzz/fixture_files/filter_fizz_expected.txt +33 -0
  23. proceed-0.0.1/tests/fizzbuzz/fixture_files/fizzbuzz_pipeline_spec.yaml +27 -0
  24. proceed-0.0.1/tests/fizzbuzz/test_fizzbuzz.py +114 -0
  25. proceed-0.0.1/tests/fizzbuzz/test_fizzbuzz_pipeline.py +188 -0
  26. proceed-0.0.1/tests/proceed/fixture_files/config_options/custom_options.yaml +1 -0
  27. proceed-0.0.1/tests/proceed/fixture_files/config_options/local_options.yaml +3 -0
  28. proceed-0.0.1/tests/proceed/fixture_files/config_options/user_options.yaml +3 -0
  29. proceed-0.0.1/tests/proceed/fixture_files/custom_columns/dictionary.json +6 -0
  30. proceed-0.0.1/tests/proceed/fixture_files/custom_columns/dictionary.yaml +4 -0
  31. proceed-0.0.1/tests/proceed/fixture_files/custom_columns/invalid.yaml +1 -0
  32. proceed-0.0.1/tests/proceed/fixture_files/custom_columns/list.yaml +1 -0
  33. proceed-0.0.1/tests/proceed/fixture_files/files_spec.yaml +19 -0
  34. proceed-0.0.1/tests/proceed/fixture_files/happy_spec.yaml +7 -0
  35. proceed-0.0.1/tests/proceed/fixture_files/sad_spec.yaml +5 -0
  36. proceed-0.0.1/tests/proceed/test_aggregator.py +414 -0
  37. proceed-0.0.1/tests/proceed/test_cli.py +347 -0
  38. proceed-0.0.1/tests/proceed/test_config_options.py +125 -0
  39. proceed-0.0.1/tests/proceed/test_docker_runner.py +811 -0
  40. proceed-0.0.1/tests/proceed/test_file_matching.py +71 -0
  41. proceed-0.0.1/tests/proceed/test_model.py +330 -0
@@ -0,0 +1,140 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ *.py,cover
51
+ .hypothesis/
52
+ .pytest_cache/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/build/
73
+ docs/source/generated
74
+
75
+ # PyBuilder
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ .python-version
87
+
88
+ # pipenv
89
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
90
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
91
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
92
+ # install all needed dependencies.
93
+ #Pipfile.lock
94
+
95
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
96
+ __pypackages__/
97
+
98
+ # Celery stuff
99
+ celerybeat-schedule
100
+ celerybeat.pid
101
+
102
+ # SageMath parsed files
103
+ *.sage.py
104
+
105
+ # Environments
106
+ .env
107
+ .venv
108
+ env/
109
+ venv/
110
+ ENV/
111
+ env.bak/
112
+ venv.bak/
113
+
114
+ # Spyder project settings
115
+ .spyderproject
116
+ .spyproject
117
+
118
+ # Rope project settings
119
+ .ropeproject
120
+
121
+ # mkdocs documentation
122
+ /site
123
+
124
+ # mypy
125
+ .mypy_cache/
126
+ .dmypy.json
127
+ dmypy.json
128
+
129
+ # Pyre type checker
130
+ .pyre/
131
+
132
+ # Visual Studio Code
133
+ .vscode/
134
+
135
+ # Proceed
136
+ proceed_out/
137
+ hello_world.yaml
138
+ my/work/
139
+ fizzbuzz.yaml
140
+ summary.csv
proceed-0.0.1/LICENSE ADDED
@@ -0,0 +1,24 @@
1
+ This is free and unencumbered software released into the public domain.
2
+
3
+ Anyone is free to copy, modify, publish, use, compile, sell, or
4
+ distribute this software, either in source code form or as a compiled
5
+ binary, for any purpose, commercial or non-commercial, and by any
6
+ means.
7
+
8
+ In jurisdictions that recognize copyright laws, the author or authors
9
+ of this software dedicate any and all copyright interest in the
10
+ software to the public domain. We make this dedication for the benefit
11
+ of the public at large and to the detriment of our heirs and
12
+ successors. We intend this dedication to be an overt act of
13
+ relinquishment in perpetuity of all present and future rights to this
14
+ software under copyright law.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19
+ IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20
+ OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21
+ ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
+ OTHER DEALINGS IN THE SOFTWARE.
23
+
24
+ For more information, please refer to <https://unlicense.org>
proceed-0.0.1/PKG-INFO ADDED
@@ -0,0 +1,83 @@
1
+ Metadata-Version: 2.3
2
+ Name: proceed
3
+ Version: 0.0.1
4
+ Summary: Execute pipelines based on files and containers.
5
+ Project-URL: Homepage, https://github.com/benjamin-heasly/proceed
6
+ Project-URL: Bug Tracker, https://github.com/benjamin-heasly/proceed/issues
7
+ Author-email: Ben Heasly <benjamin.heasly@gmail.com>
8
+ License-File: LICENSE
9
+ Classifier: License :: OSI Approved :: The Unlicense (Unlicense)
10
+ Classifier: Operating System :: OS Independent
11
+ Classifier: Programming Language :: Python :: 3
12
+ Requires-Python: >=3.11
13
+ Requires-Dist: docker
14
+ Requires-Dist: pandas
15
+ Requires-Dist: pyyaml
16
+ Description-Content-Type: text/markdown
17
+
18
+ # Proceed
19
+ Declarative file processing with YAML and containers.
20
+
21
+ **Proceed** is a Python library and CLI tool for declarative batch processing.
22
+ It reads a **pipeline** specification declared in [YAML](https://yaml.org/).
23
+ A pipeline contains a list of **steps** that are based on [Docker](https://www.docker.com/) images and containers.
24
+
25
+ Each pipeline execution accepts values for declared **args**, allowing controlled, explicit configuration of steps at runtime.
26
+ Each execution produces an **execution record** that accounts for accepted arg values, step logs, and checksums of input and output files.
27
+
28
+ Hopefully, Proceed will allow you to express everything you need to know about your processing pipeline in a *"nothing up my sleeves"* way. The pipeline specification should be complete enough to share with others who have Proceed and Docker installed.
29
+ The execution record should allow for auditing of expected outcomes and reproducibility.
30
+
31
+ ## docs
32
+ Here are the [main docs](https://benjamin-heasly.github.io/proceed/index.html) for Proceed.
33
+
34
+ # Installation
35
+ Proceed requires [Python](https://www.python.org/) and [Docker](https://www.docker.com/) to be installed.
36
+ With those, it should be able to run a wide variety pipelines and steps via containers.
37
+
38
+ ## pip
39
+ Proceed itself is not yet available on [PyPI](https://pypi.org/).
40
+ When it is, this will be the recommended way to install Proceed:
41
+
42
+ ```
43
+ $ pip install proceed # TODO
44
+ ```
45
+
46
+ ## git and pip
47
+ You can also install Proceed from source.
48
+
49
+ ```
50
+ $ pip install git+https://https://github.com/benjamin-heasly/proceed.git
51
+
52
+ # editable mode
53
+ $ git checkout https://github.com/benjamin-heasly/proceed.git
54
+ $ pip install -e ./proceed
55
+ ```
56
+
57
+ ## check installation
58
+ You can check if Proceed installed correctly using the `proceed` command.
59
+
60
+ ```
61
+ $ proceed --version
62
+ Proceed x.y.z
63
+
64
+ $ proceed --help
65
+ usage etc...
66
+ ```
67
+
68
+ ## development and testing
69
+
70
+ You can set up a development environment with [conda](https://conda.io/projects/conda/en/latest/user-guide/install/index.html) and [dev-environment.yml](./dev-environment.yml).
71
+
72
+ ```
73
+ conda env create -f dev-environment.yml
74
+ # or
75
+ conda env update -f dev-environment.yml
76
+ ```
77
+
78
+ With that, you should be able to run through the Proceed unit and integration tests.
79
+
80
+ ```
81
+ conda activate proceed-dev
82
+ hatch run test:cov
83
+ ```
@@ -0,0 +1,66 @@
1
+ # Proceed
2
+ Declarative file processing with YAML and containers.
3
+
4
+ **Proceed** is a Python library and CLI tool for declarative batch processing.
5
+ It reads a **pipeline** specification declared in [YAML](https://yaml.org/).
6
+ A pipeline contains a list of **steps** that are based on [Docker](https://www.docker.com/) images and containers.
7
+
8
+ Each pipeline execution accepts values for declared **args**, allowing controlled, explicit configuration of steps at runtime.
9
+ Each execution produces an **execution record** that accounts for accepted arg values, step logs, and checksums of input and output files.
10
+
11
+ Hopefully, Proceed will allow you to express everything you need to know about your processing pipeline in a *"nothing up my sleeves"* way. The pipeline specification should be complete enough to share with others who have Proceed and Docker installed.
12
+ The execution record should allow for auditing of expected outcomes and reproducibility.
13
+
14
+ ## docs
15
+ Here are the [main docs](https://benjamin-heasly.github.io/proceed/index.html) for Proceed.
16
+
17
+ # Installation
18
+ Proceed requires [Python](https://www.python.org/) and [Docker](https://www.docker.com/) to be installed.
19
+ With those, it should be able to run a wide variety pipelines and steps via containers.
20
+
21
+ ## pip
22
+ Proceed itself is not yet available on [PyPI](https://pypi.org/).
23
+ When it is, this will be the recommended way to install Proceed:
24
+
25
+ ```
26
+ $ pip install proceed # TODO
27
+ ```
28
+
29
+ ## git and pip
30
+ You can also install Proceed from source.
31
+
32
+ ```
33
+ $ pip install git+https://https://github.com/benjamin-heasly/proceed.git
34
+
35
+ # editable mode
36
+ $ git checkout https://github.com/benjamin-heasly/proceed.git
37
+ $ pip install -e ./proceed
38
+ ```
39
+
40
+ ## check installation
41
+ You can check if Proceed installed correctly using the `proceed` command.
42
+
43
+ ```
44
+ $ proceed --version
45
+ Proceed x.y.z
46
+
47
+ $ proceed --help
48
+ usage etc...
49
+ ```
50
+
51
+ ## development and testing
52
+
53
+ You can set up a development environment with [conda](https://conda.io/projects/conda/en/latest/user-guide/install/index.html) and [dev-environment.yml](./dev-environment.yml).
54
+
55
+ ```
56
+ conda env create -f dev-environment.yml
57
+ # or
58
+ conda env update -f dev-environment.yml
59
+ ```
60
+
61
+ With that, you should be able to run through the Proceed unit and integration tests.
62
+
63
+ ```
64
+ conda activate proceed-dev
65
+ hatch run test:cov
66
+ ```
@@ -0,0 +1,10 @@
1
+ name: proceed-dev
2
+ dependencies:
3
+ - python=3.11
4
+ - pip=24.0
5
+ - pip:
6
+ - docker==7.1.0
7
+ - pyyaml==6.0.2
8
+ - pandas==2.2.2
9
+ - pytest==8.3.2
10
+ - hatch==1.12.0
@@ -0,0 +1,69 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "proceed"
7
+ description = "Execute pipelines based on files and containers."
8
+ keywords = []
9
+ authors = [
10
+ { name="Ben Heasly", email="benjamin.heasly@gmail.com" }
11
+ ]
12
+ readme = "README.md"
13
+ requires-python = ">=3.11"
14
+ classifiers = [
15
+ "Programming Language :: Python :: 3",
16
+ "License :: OSI Approved :: The Unlicense (Unlicense)",
17
+ "Operating System :: OS Independent",
18
+ ]
19
+ dependencies = ["docker", "PyYAML", "pandas"]
20
+ dynamic = ["version"]
21
+
22
+ [project.urls]
23
+ "Homepage" = "https://github.com/benjamin-heasly/proceed"
24
+ "Bug Tracker" = "https://github.com/benjamin-heasly/proceed/issues"
25
+
26
+ [project.scripts]
27
+ proceed = "proceed.cli:main"
28
+
29
+ [tool.pytest.ini_options]
30
+ addopts = [
31
+ "--import-mode=importlib",
32
+ ]
33
+
34
+ [tool.hatch]
35
+
36
+ [tool.hatch.version]
37
+ path = "src/proceed/__about__.py"
38
+
39
+ [tool.hatch.envs.test]
40
+ dependencies = [
41
+ "pytest",
42
+ "pytest-cov",
43
+ ]
44
+
45
+ [tool.hatch.envs.test.scripts]
46
+ cov = 'pytest --cov-report=term-missing --cov-config=pyproject.toml --cov=proceed --cov=tests -vv {args}'
47
+
48
+ [tool.hatch.build.targets.sdist]
49
+ exclude = [
50
+ "/.github",
51
+ "/docs",
52
+ ]
53
+
54
+ [tool.hatch.build.targets.wheel]
55
+ packages = ["src/proceed"]
56
+
57
+ [tool.hatch.envs.docs]
58
+ dependencies = [
59
+ "sphinx",
60
+ ]
61
+
62
+ [tool.hatch.envs.docs.scripts]
63
+ # A long command!
64
+ # This gets sphinx under control of hatch and pyproject.toml and eliminates the clunky sphinx Makefile.
65
+ html = '''sphinx-build "docs/source" "docs/build" -b html -c docs/ -D project="$(hatch project metadata name)" -D release="$(hatch version)" -D author="Ben Heasly" {args}'''
66
+ clean = [
67
+ "rm -rf docs/build",
68
+ "rm -rf docs/source/generated"
69
+ ]
@@ -0,0 +1,7 @@
1
+ FROM python:3.7
2
+
3
+ WORKDIR /fizzbuzz
4
+ COPY ./fizzbuzz.py /fizzbuzz/fizzbuzz.py
5
+
6
+ ENTRYPOINT ["python3", "fizzbuzz.py"]
7
+ CMD ["--help"]
File without changes
@@ -0,0 +1,61 @@
1
+ import argparse
2
+ import sys
3
+ from typing import Optional, Sequence
4
+
5
+
6
+ def classify(number):
7
+ suffix = ""
8
+ if number % 3 == 0:
9
+ suffix = suffix + "fizz"
10
+
11
+ if number % 5 == 0:
12
+ suffix = suffix + "buzz"
13
+
14
+ return suffix
15
+
16
+
17
+ def append(line):
18
+ number = int(line)
19
+ suffix = classify(number)
20
+ if (suffix):
21
+ return f"{line} {suffix}"
22
+ else:
23
+ return line
24
+
25
+
26
+ def classify_lines(in_file, out_file):
27
+ with open(out_file, 'w') as out_f:
28
+ with open(in_file) as in_f:
29
+ for in_line in in_f:
30
+ out_line = append(in_line.strip()) + "\n"
31
+ out_f.write(out_line)
32
+
33
+
34
+ def filter_lines(in_file, out_file, substring):
35
+ with open(out_file, 'w') as out_f:
36
+ with open(in_file) as in_f:
37
+ for in_line in in_f:
38
+ if substring in in_line:
39
+ out_f.write(in_line)
40
+
41
+
42
+ def main(argv: Optional[Sequence[str]] = None) -> int:
43
+ parser = argparse.ArgumentParser(description="Classify and filter lines of text files, according to fizzbuzz.")
44
+ parser.add_argument("in_file", type=str, help="input file to read")
45
+ parser.add_argument("out_file", type=str, help="output file to write")
46
+ parser.add_argument("operation", type=str, help="operation to perform", choices=["classify", "filter"])
47
+ parser.add_argument("--substring", type=str, help="filter substring for lines to keep", default="fizz")
48
+ args = parser.parse_args(argv)
49
+
50
+ if args.operation == "classify":
51
+ classify_lines(args.in_file, args.out_file)
52
+ elif args.operation == "filter":
53
+ filter_lines(args.in_file, args.out_file, args.substring)
54
+
55
+ print("OK.")
56
+
57
+ return 0
58
+
59
+
60
+ if __name__ == '__main__':
61
+ sys.exit(main())
@@ -0,0 +1 @@
1
+ __version__ = "0.0.1"
File without changes
@@ -0,0 +1,123 @@
1
+ import logging
2
+ from typing import Any
3
+ from pathlib import Path
4
+ from pandas import DataFrame
5
+ import yaml
6
+ from proceed.model import ExecutionRecord, Pipeline, Step, Timing, StepResult
7
+ from proceed.file_matching import flatten_matches, file_summary, hash_contents
8
+
9
+ def summarize_results(results_path: Path, columns: list[str] = None, sort_rows_by: list[str] = None) -> DataFrame:
10
+ summary_rows = []
11
+ group_paths = [path for path in results_path.iterdir() if path.is_dir()]
12
+ for group_path in group_paths:
13
+ id_paths = [path for path in group_path.iterdir() if path.is_dir()]
14
+ for id_path in id_paths:
15
+ for yaml_file in id_path.glob("execution_record.yaml"):
16
+ execution_record = safe_read_execution_record(yaml_file)
17
+ if execution_record:
18
+ execution_summary = summarize_execution(id_path.stem, group_path.stem, execution_record)
19
+ summary_rows = summary_rows + execution_summary
20
+
21
+ summary = DataFrame(summary_rows)
22
+
23
+ if columns:
24
+ summary_columns = list(summary.columns)
25
+ usable_columns = [column for column in columns if column in summary_columns]
26
+ summary = summary.filter(items=columns)
27
+
28
+ if sort_rows_by:
29
+ summary_columns = list(summary.columns)
30
+ usable_columns = [column for column in sort_rows_by if column in summary_columns]
31
+ summary = summary.sort_values(usable_columns)
32
+
33
+ return summary
34
+
35
+
36
+ def safe_read_execution_record(yaml_file: Path) -> ExecutionRecord:
37
+ try:
38
+ with open(yaml_file) as f:
39
+ return ExecutionRecord.from_yaml(f.read())
40
+ except:
41
+ logging.error(f"Skipping file that seems not to be a Proceed execution record: {yaml_file}")
42
+ return None
43
+
44
+
45
+ def summarize_execution(results_id: str, group: str, execution_record: ExecutionRecord) -> list[dict[str, str]]:
46
+ pipeline_summary = summarize_pipeline(results_id, group, execution_record.amended, execution_record.timing)
47
+
48
+ steps_and_results = zip(execution_record.amended.steps, execution_record.step_results)
49
+ step_summaries = [summarize_step_and_result(step, result) for step, result in steps_and_results]
50
+
51
+ combined_summary = [{**pipeline_summary, **file_summary} for step_summary in step_summaries for file_summary in step_summary]
52
+ return combined_summary
53
+
54
+
55
+ def summarize_pipeline(results_id: str, group: str, pipeline: Pipeline, timing: Timing) -> dict[str, str]:
56
+ top_level_summary = {
57
+ "proceed_version": pipeline.version,
58
+ "results_id": results_id,
59
+ "results_group": group,
60
+ "pipeline_description": pipeline.description,
61
+ "pipeline_start": timing.start,
62
+ "pipeline_finish": timing.finish,
63
+ "pipeline_duration": timing.duration,
64
+ }
65
+
66
+ arg_summary = {f"arg_{key}": value for key, value in pipeline.args.items()}
67
+
68
+ combined_summary = {**top_level_summary, **arg_summary}
69
+ return combined_summary
70
+
71
+
72
+ def summarize_step_and_result(step: Step, result: StepResult) -> list[dict[str, Any]]:
73
+ step_summary = {f"step_{key}": str(value) for key, value in step.to_dict().items()}
74
+
75
+ flattened_step_attributes = {"timing", "log_file", "files_done", "files_in", "files_out", "files_summary"}
76
+ result_summary = {f"step_{key}": str(value) for key, value in result.to_dict().items() if key not in flattened_step_attributes}
77
+
78
+ result_summary["step_start"] = result.timing.start
79
+ result_summary["step_finish"] = result.timing.finish
80
+ result_summary["step_duration"] = result.timing.duration
81
+
82
+ if result.log_file:
83
+ log_path = Path(result.log_file)
84
+ log_digest = hash_contents(log_path)
85
+ log_file = file_summary(volume=log_path.parent.as_posix(), path=log_path.name, digest=log_digest, file_role="log")
86
+ else:
87
+ log_file = file_summary(volume="", path="", digest="", file_role="log")
88
+
89
+ done_files = flatten_matches(result.files_done, file_role="done")
90
+ in_files = flatten_matches(result.files_in, file_role="in")
91
+ out_files = flatten_matches(result.files_out, file_role="out")
92
+ summary_files = flatten_matches(result.files_summary, file_role="summary")
93
+
94
+ all_files = [log_file] + done_files + in_files + out_files + summary_files
95
+
96
+ custom_summary = {}
97
+ for summary_file in summary_files:
98
+ custom_columns = collect_custom_columns(summary_file["file_volume"], summary_file["file_path"])
99
+ custom_summary.update(custom_columns)
100
+
101
+ combined_summary = [{**step_summary, **result_summary, **file_summary, **custom_summary} for file_summary in all_files]
102
+ return combined_summary
103
+
104
+
105
+ def collect_custom_columns(file_volume: str, file_path: str) -> dict[str, str]:
106
+ path = Path(file_volume, file_path)
107
+ if not path.is_file() or not path.exists():
108
+ return {}
109
+
110
+ with open(path) as f:
111
+ content = f.read()
112
+
113
+ try:
114
+ parsed = yaml.safe_load(content)
115
+ if parsed and isinstance(parsed, dict):
116
+ return parsed
117
+
118
+ logging.info(f"Treating non-dictionary YAML as plain text: {path.as_posix()}")
119
+
120
+ except yaml.parser.ParserError:
121
+ logging.info(f"Treating non-YAML file as plain text: {path.as_posix()}")
122
+
123
+ return {path.stem: content.strip()}