pyantz 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. pyantz-0.1.0/.github/workflows/bandit.yml +21 -0
  2. pyantz-0.1.0/.github/workflows/coverage_badge.yml +34 -0
  3. pyantz-0.1.0/.github/workflows/mypy.yml +28 -0
  4. pyantz-0.1.0/.github/workflows/pylint.yml +31 -0
  5. pyantz-0.1.0/.github/workflows/test.yml +29 -0
  6. pyantz-0.1.0/.gitignore +16 -0
  7. pyantz-0.1.0/.python-version +1 -0
  8. pyantz-0.1.0/LICENSE.txt +19 -0
  9. pyantz-0.1.0/Makefile +23 -0
  10. pyantz-0.1.0/PKG-INFO +98 -0
  11. pyantz-0.1.0/README.md +79 -0
  12. pyantz-0.1.0/docs/examples/simple_runner.json +38 -0
  13. pyantz-0.1.0/pyproject.toml +54 -0
  14. pyantz-0.1.0/src/pyantz/__init__.py +0 -0
  15. pyantz-0.1.0/src/pyantz/infrastructure/__init__.py +4 -0
  16. pyantz-0.1.0/src/pyantz/infrastructure/config/__init__.py +0 -0
  17. pyantz-0.1.0/src/pyantz/infrastructure/config/base.py +332 -0
  18. pyantz-0.1.0/src/pyantz/infrastructure/config/get_functions.py +132 -0
  19. pyantz-0.1.0/src/pyantz/infrastructure/config/local_submitter.py +20 -0
  20. pyantz-0.1.0/src/pyantz/infrastructure/config/submitters/__init__.py +0 -0
  21. pyantz-0.1.0/src/pyantz/infrastructure/config/submitters/slurm_submitter.py +50 -0
  22. pyantz-0.1.0/src/pyantz/infrastructure/core/__init__.py +0 -0
  23. pyantz-0.1.0/src/pyantz/infrastructure/core/job.py +42 -0
  24. pyantz-0.1.0/src/pyantz/infrastructure/core/manager.py +20 -0
  25. pyantz-0.1.0/src/pyantz/infrastructure/core/mutable_job.py +44 -0
  26. pyantz-0.1.0/src/pyantz/infrastructure/core/pipeline.py +198 -0
  27. pyantz-0.1.0/src/pyantz/infrastructure/core/status.py +14 -0
  28. pyantz-0.1.0/src/pyantz/infrastructure/core/submitter_job.py +67 -0
  29. pyantz-0.1.0/src/pyantz/infrastructure/core/variables.py +358 -0
  30. pyantz-0.1.0/src/pyantz/infrastructure/distributed_queue/__init__.py +9 -0
  31. pyantz-0.1.0/src/pyantz/infrastructure/distributed_queue/relational/__init__.py +0 -0
  32. pyantz-0.1.0/src/pyantz/infrastructure/distributed_queue/relational/queue_orm.py +61 -0
  33. pyantz-0.1.0/src/pyantz/infrastructure/distributed_queue/relational/sqlite_queue.py +256 -0
  34. pyantz-0.1.0/src/pyantz/infrastructure/hpc/__init__.py +0 -0
  35. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/__init__.py +6 -0
  36. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/__init__.py +1 -0
  37. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/__init__.py +4 -0
  38. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/account.py +19 -0
  39. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/account_short.py +12 -0
  40. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/accounting.py +18 -0
  41. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/accounting_allocated.py +11 -0
  42. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/accounts_add_cond.py +15 -0
  43. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/acct_gather_energy.py +18 -0
  44. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/assoc.py +31 -0
  45. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/assoc_default.py +11 -0
  46. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/assoc_max.py +17 -0
  47. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/assoc_max_jobs.py +17 -0
  48. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/assoc_max_jobs_per.py +16 -0
  49. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/assoc_max_per.py +13 -0
  50. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/assoc_max_per_account.py +13 -0
  51. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/assoc_max_tres.py +19 -0
  52. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/assoc_max_tres_group.py +14 -0
  53. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/assoc_max_tres_minutes.py +15 -0
  54. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/assoc_max_tres_per.py +14 -0
  55. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/assoc_min.py +13 -0
  56. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/assoc_rec_set.py +36 -0
  57. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/assoc_shares_obj_wrap.py +28 -0
  58. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/assoc_shares_obj_wrap_fairshare.py +14 -0
  59. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/assoc_shares_obj_wrap_tres.py +16 -0
  60. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/assoc_short.py +15 -0
  61. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/bf_exit_fields.py +16 -0
  62. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/cluster_rec.py +22 -0
  63. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/cluster_rec_associations.py +13 -0
  64. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/cluster_rec_controller.py +12 -0
  65. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/controller_ping.py +16 -0
  66. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/coord.py +12 -0
  67. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/cron_entry.py +21 -0
  68. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/cron_entry_line.py +12 -0
  69. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/float64_no_val_struct.py +13 -0
  70. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/instance.py +18 -0
  71. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/instance_time.py +12 -0
  72. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/qos_limits_min_tres_per.py +13 -0
  73. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/shares_float128_tres.py +13 -0
  74. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/shares_uint64_tres.py +14 -0
  75. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/tres.py +14 -0
  76. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/uint32_no_val_struct.py +13 -0
  77. pyantz-0.1.0/src/pyantz/infrastructure/hpc/slurm/restful/models/uint64_no_val_struct.py +13 -0
  78. pyantz-0.1.0/src/pyantz/infrastructure/log/__init__.py +5 -0
  79. pyantz-0.1.0/src/pyantz/infrastructure/log/multiproc_logging.py +56 -0
  80. pyantz-0.1.0/src/pyantz/infrastructure/submitters/__init__.py +0 -0
  81. pyantz-0.1.0/src/pyantz/infrastructure/submitters/local.py +151 -0
  82. pyantz-0.1.0/src/pyantz/infrastructure/submitters/slurm/__init__.py +1 -0
  83. pyantz-0.1.0/src/pyantz/infrastructure/submitters/slurm/basic_slurm.py +137 -0
  84. pyantz-0.1.0/src/pyantz/jobs/__init__.py +51 -0
  85. pyantz-0.1.0/src/pyantz/jobs/analysis/__init__.py +0 -0
  86. pyantz-0.1.0/src/pyantz/jobs/analysis/count_dataframe.py +0 -0
  87. pyantz-0.1.0/src/pyantz/jobs/analysis/filter_dataframe.py +62 -0
  88. pyantz-0.1.0/src/pyantz/jobs/analysis/filter_parquet.py +57 -0
  89. pyantz-0.1.0/src/pyantz/jobs/branch/__init__.py +0 -0
  90. pyantz-0.1.0/src/pyantz/jobs/branch/compare.py +103 -0
  91. pyantz-0.1.0/src/pyantz/jobs/branch/create_pipelines_from_matrix.py +108 -0
  92. pyantz-0.1.0/src/pyantz/jobs/branch/explode_pipeline.py +57 -0
  93. pyantz-0.1.0/src/pyantz/jobs/branch/if_then.py +77 -0
  94. pyantz-0.1.0/src/pyantz/jobs/branch/parallel_pipelines.py +67 -0
  95. pyantz-0.1.0/src/pyantz/jobs/dispatch/__init__.py +0 -0
  96. pyantz-0.1.0/src/pyantz/jobs/dispatch/run_command.py +64 -0
  97. pyantz-0.1.0/src/pyantz/jobs/file/__init__.py +1 -0
  98. pyantz-0.1.0/src/pyantz/jobs/file/copy.py +102 -0
  99. pyantz-0.1.0/src/pyantz/jobs/file/delete.py +54 -0
  100. pyantz-0.1.0/src/pyantz/jobs/file/edit_json.py +118 -0
  101. pyantz-0.1.0/src/pyantz/jobs/file/make_dirs.py +37 -0
  102. pyantz-0.1.0/src/pyantz/jobs/nop.py +18 -0
  103. pyantz-0.1.0/src/pyantz/jobs/restart_pipeline.py +76 -0
  104. pyantz-0.1.0/src/pyantz/jobs/run_script.py +71 -0
  105. pyantz-0.1.0/src/pyantz/jobs/setup/__init__.py +1 -0
  106. pyantz-0.1.0/src/pyantz/jobs/variables/__init__.py +0 -0
  107. pyantz-0.1.0/src/pyantz/jobs/variables/assert_variable.py +45 -0
  108. pyantz-0.1.0/src/pyantz/jobs/variables/assign_environment_variable.py +44 -0
  109. pyantz-0.1.0/src/pyantz/jobs/variables/change_variable.py +71 -0
  110. pyantz-0.1.0/src/pyantz/jobs/variables/set_variable_from_function.py +62 -0
  111. pyantz-0.1.0/src/pyantz/py.typed +0 -0
  112. pyantz-0.1.0/src/pyantz/run.py +59 -0
  113. pyantz-0.1.0/tests/__init__.py +0 -0
  114. pyantz-0.1.0/tests/infrastructure/__init__.py +1 -0
  115. pyantz-0.1.0/tests/infrastructure/config/__init__.py +0 -0
  116. pyantz-0.1.0/tests/infrastructure/config/test_call_validation.py +99 -0
  117. pyantz-0.1.0/tests/infrastructure/config/test_job_configs.py +141 -0
  118. pyantz-0.1.0/tests/infrastructure/config/test_pipeline_configs.py +72 -0
  119. pyantz-0.1.0/tests/infrastructure/core/__init__.py +0 -0
  120. pyantz-0.1.0/tests/infrastructure/core/test_job.py +125 -0
  121. pyantz-0.1.0/tests/infrastructure/core/test_pipeline.py +177 -0
  122. pyantz-0.1.0/tests/infrastructure/core/test_variables.py +156 -0
  123. pyantz-0.1.0/tests/infrastructure/distributed_queue/__init__.py +0 -0
  124. pyantz-0.1.0/tests/infrastructure/distributed_queue/relational/test_sqlite_queue.py +171 -0
  125. pyantz-0.1.0/tests/infrastructure/hpc/__init__.py +0 -0
  126. pyantz-0.1.0/tests/infrastructure/hpc/test_slurm_basic.py +72 -0
  127. pyantz-0.1.0/tests/infrastructure/submitters/__init__.py +0 -0
  128. pyantz-0.1.0/tests/infrastructure/submitters/test_local_submitter.py +43 -0
  129. pyantz-0.1.0/tests/jobs/__init__.py +0 -0
  130. pyantz-0.1.0/tests/jobs/analysis/__init__.py +0 -0
  131. pyantz-0.1.0/tests/jobs/analysis/test_filter_dataframe.py +47 -0
  132. pyantz-0.1.0/tests/jobs/analysis/test_filter_parquet.py +53 -0
  133. pyantz-0.1.0/tests/jobs/branch/__init__.py +0 -0
  134. pyantz-0.1.0/tests/jobs/branch/test_compare.py +283 -0
  135. pyantz-0.1.0/tests/jobs/branch/test_create_pipelines_from_matrix.py +139 -0
  136. pyantz-0.1.0/tests/jobs/branch/test_explode_pipelines.py +148 -0
  137. pyantz-0.1.0/tests/jobs/branch/test_if_then.py +133 -0
  138. pyantz-0.1.0/tests/jobs/branch/test_parallel_pipelines.py +57 -0
  139. pyantz-0.1.0/tests/jobs/file/__init__.py +0 -0
  140. pyantz-0.1.0/tests/jobs/file/test_copy.py +148 -0
  141. pyantz-0.1.0/tests/jobs/file/test_delete.py +145 -0
  142. pyantz-0.1.0/tests/jobs/file/test_edit_json.py +211 -0
  143. pyantz-0.1.0/tests/jobs/file/test_make_dirs.py +135 -0
  144. pyantz-0.1.0/tests/jobs/test_job_infrastructure.py +118 -0
  145. pyantz-0.1.0/tests/jobs/test_nop.py +62 -0
  146. pyantz-0.1.0/tests/jobs/test_restart_pipeline.py +99 -0
  147. pyantz-0.1.0/tests/jobs/test_run_script.py +43 -0
  148. pyantz-0.1.0/tests/jobs/variables/__init__.py +0 -0
  149. pyantz-0.1.0/tests/jobs/variables/test_assert_variables.py +57 -0
  150. pyantz-0.1.0/tests/jobs/variables/test_change_variable.py +119 -0
  151. pyantz-0.1.0/tests/jobs/variables/test_set_variables_from_function.py +55 -0
  152. pyantz-0.1.0/tests/test_run.py +51 -0
  153. pyantz-0.1.0/uv.lock +1056 -0
@@ -0,0 +1,21 @@
1
+ name: Bandit Security
2
+ on: [push, pull_request]
3
+
4
+ jobs:
5
+ build:
6
+ name: Bandit
7
+ runs-on: ubuntu-latest
8
+
9
+ steps:
10
+ - name: Checkout the Repository
11
+ uses: actions/checkout@v4
12
+
13
+ - name: Set up Python 3.12
14
+ uses: actions/setup-python@v5
15
+ with:
16
+ python-version: "3.12"
17
+
18
+ - name: Run Bandit
19
+ env:
20
+ EXCLUDE_FILES: tests/*
21
+ run: bash <(curl -s https://raw.githubusercontent.com/CICDToolbox/bandit/master/pipeline.sh)
@@ -0,0 +1,34 @@
1
+ on:
2
+ push:
3
+ branches:
4
+ - master
5
+ name: CI
6
+
7
+ jobs:
8
+
9
+ test:
10
+ runs-on: ubuntu-latest
11
+ strategy:
12
+ matrix:
13
+ python-version:
14
+ - "3.11"
15
+ - "3.12"
16
+ - "3.13"
17
+ name: Python ${{ matrix.python-version }} on coverage
18
+ steps:
19
+ - uses: actions/checkout@v4
20
+
21
+ - name: Install uv and set the python version
22
+ uses: astral-sh/setup-uv@v5
23
+ with:
24
+ python-version: ${{ matrix.python-version }}
25
+
26
+ - name: Install the project
27
+ run: uv sync --all-extras --dev
28
+
29
+ - name: Run ruff lint
30
+ # For example, using `pytest`
31
+ run: uv run pytest tests --cov=src
32
+
33
+ - name: Run coverage-badge on own test coverage
34
+ run: uvx run coverage-badge
@@ -0,0 +1,28 @@
1
+ name: Check Types
2
+
3
+ on: [push]
4
+
5
+ jobs:
6
+ build:
7
+ name: continuous-integration
8
+ runs-on: ubuntu-latest
9
+ strategy:
10
+ matrix:
11
+ python-version:
12
+ - "3.11"
13
+ - "3.12"
14
+ - "3.13"
15
+
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Install uv and set the python version
20
+ uses: astral-sh/setup-uv@v5
21
+ with:
22
+ python-version: ${{ matrix.python-version }}
23
+
24
+ - name: Install the project
25
+ run: uv sync --all-extras --dev
26
+
27
+ - name: Run mypy type checks
28
+ run: uv run mypy src
@@ -0,0 +1,31 @@
1
+ name: Lint
2
+
3
+ on: [push]
4
+
5
+ jobs:
6
+ build:
7
+ name: continuous-integration
8
+ runs-on: ubuntu-latest
9
+ strategy:
10
+ matrix:
11
+ python-version:
12
+ - "3.11"
13
+ - "3.12"
14
+ - "3.13"
15
+
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Install uv and set the python version
20
+ uses: astral-sh/setup-uv@v5
21
+ with:
22
+ python-version: ${{ matrix.python-version }}
23
+
24
+ - name: Install the project
25
+ run: uv sync --all-extras --dev
26
+
27
+ - name: Run ruff lint
28
+ run: uv run ruff check src
29
+
30
+ - name: Run pylint
31
+ run: uv run pylint src
@@ -0,0 +1,29 @@
1
+ name: Test
2
+
3
+ on: [push]
4
+
5
+ jobs:
6
+ build:
7
+ name: continuous-integration
8
+ runs-on: ubuntu-latest
9
+ strategy:
10
+ matrix:
11
+ python-version:
12
+ - "3.11"
13
+ - "3.12"
14
+ - "3.13"
15
+
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Install uv and set the python version
20
+ uses: astral-sh/setup-uv@v5
21
+ with:
22
+ python-version: ${{ matrix.python-version }}
23
+
24
+ - name: Install the project
25
+ run: uv sync --all-extras --dev
26
+
27
+ - name: Run pytes
28
+ # For example, using `pytest`
29
+ run: uv run pytest tests
@@ -0,0 +1,16 @@
1
+ .pytest*
2
+ .venv*
3
+ **__pycache__**
4
+ .coverage*
5
+ .mypy_cache
6
+ .vscode
7
+ .idea
8
+ *.egg-info
9
+ *.egg
10
+ *.egg-info/
11
+ *.egg-info/*
12
+ *.egg-info/**/*
13
+ .ruff_cache
14
+ dist
15
+ .hypothesis
16
+ dist/*
@@ -0,0 +1 @@
1
+ >=3.10
@@ -0,0 +1,19 @@
1
+ Copyright (c) 2025
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to deal
5
+ in the Software without restriction, including without limitation the rights
6
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in all
11
+ copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
+ SOFTWARE.
pyantz-0.1.0/Makefile ADDED
@@ -0,0 +1,23 @@
1
+ test:
2
+ uv run pytest -n 16 --cov=src
3
+
4
+ format:
5
+ uv run isort src
6
+ uv run ruff check src --fix
7
+ uv run ruff format src
8
+ uv run isort tests
9
+ uv run ruff check tests --fix
10
+ uv run ruff format tests
11
+
12
+ lint:
13
+ uv run ruff check src
14
+ uv run pylint src
15
+
16
+ type:
17
+ uv run mypy src
18
+
19
+ build:
20
+ uv build
21
+
22
+ sync:
23
+ uv sync --all-extras
pyantz-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,98 @@
1
+ Metadata-Version: 2.4
2
+ Name: pyantz
3
+ Version: 0.1.0
4
+ Summary: Add your description here
5
+ Author-email: igreen1 <dev@iangreen.dev>
6
+ License-File: LICENSE.txt
7
+ Requires-Python: >=3.11
8
+ Requires-Dist: pydantic>=2.10.6
9
+ Requires-Dist: rich>=13.9.4
10
+ Requires-Dist: sqlalchemy>=2.0.38
11
+ Provides-Extra: pandas
12
+ Requires-Dist: pandas>=2.2.3; extra == 'pandas'
13
+ Provides-Extra: pyarrow
14
+ Requires-Dist: pandas>=2.2.3; extra == 'pyarrow'
15
+ Requires-Dist: pyarrow>=19.0.1; extra == 'pyarrow'
16
+ Provides-Extra: slurm
17
+ Requires-Dist: requests>=2.32.3; extra == 'slurm'
18
+ Description-Content-Type: text/markdown
19
+
20
+ # PyAntz
21
+
22
+ Job runner and scheduling
23
+
24
+ Setup configurations to chain together different jobs quickly and with minimal code. The goal here
25
+ is to help those who need to quickly setup a local data pipeline for small projects. This spawned
26
+ off of a school project
27
+
28
+
29
+ ## Dev Notes
30
+
31
+ TODO
32
+ -> slurm
33
+ on failure of submission, retry with different node
34
+ batch submit
35
+ individual submit
36
+ -> windows hpc
37
+ on failure of submission, retyr with different node
38
+ batch submit
39
+ -> GUI to create the jobs
40
+ -> check the arg types of the functions in the job
41
+ -> use pyarrow
42
+ --> support using dask, pandas or pyarrow as the backend
43
+
44
+ slurm
45
+
46
+
47
+ - edit yaml
48
+ edit fields (multiple!) set values
49
+ ---> accepts two lists. the first is a list of str (paths to edit) and the second it values
50
+ ---> must be of the same length, checked at runtime :(
51
+
52
+ - edit json
53
+ ---> same as edit yaml above
54
+ - edit csv
55
+ ---> accepts column names, list of values
56
+ ---> only one column at a time
57
+
58
+ - convert csv to parquet
59
+ ---> read csv and output to parquet
60
+ - convert excel to parquet
61
+ accepts path to excel and optionally the sheet name
62
+ ---> read into memory and output to parquet
63
+ - convert hdf5 to parquet
64
+ accepts path and keys and reads into pandas
65
+ outputs to parquet
66
+
67
+ convert parquet to csv
68
+ convert parquet to excel
69
+ convert hdf5 to excel
70
+
71
+ # DB
72
+ CRUD postgres
73
+ CRUD mysql
74
+ CRUD minio
75
+ CRUD sql server
76
+
77
+ # Plot
78
+ - scatter x/y with color options
79
+ - hist
80
+ - stacked hist
81
+
82
+ # Aggregation
83
+ - merge parquets
84
+ - concatenate parquets
85
+ - filter parquet
86
+ - count parquet
87
+ - sum parquet
88
+ - min parquet
89
+ - max parquet
90
+ - xth percentile
91
+ - groupby and agg
92
+
93
+ # splitters
94
+ - split template into N pipelines
95
+ take one example pipeline and produce a PIPELINE ID variable for each in range
96
+ - generate simple case matrix
97
+ for one variable being modified, create pipelines for range of values for the variable (only integers)
98
+ -
pyantz-0.1.0/README.md ADDED
@@ -0,0 +1,79 @@
1
+ # PyAntz
2
+
3
+ Job runner and scheduling
4
+
5
+ Setup configurations to chain together different jobs quickly and with minimal code. The goal here
6
+ is to help those who need to quickly setup a local data pipeline for small projects. This spawned
7
+ off of a school project
8
+
9
+
10
+ ## Dev Notes
11
+
12
+ TODO
13
+ -> slurm
14
+ on failure of submission, retry with different node
15
+ batch submit
16
+ individual submit
17
+ -> windows hpc
18
+ on failure of submission, retyr with different node
19
+ batch submit
20
+ -> GUI to create the jobs
21
+ -> check the arg types of the functions in the job
22
+ -> use pyarrow
23
+ --> support using dask, pandas or pyarrow as the backend
24
+
25
+ slurm
26
+
27
+
28
+ - edit yaml
29
+ edit fields (multiple!) set values
30
+ ---> accepts two lists. the first is a list of str (paths to edit) and the second it values
31
+ ---> must be of the same length, checked at runtime :(
32
+
33
+ - edit json
34
+ ---> same as edit yaml above
35
+ - edit csv
36
+ ---> accepts column names, list of values
37
+ ---> only one column at a time
38
+
39
+ - convert csv to parquet
40
+ ---> read csv and output to parquet
41
+ - convert excel to parquet
42
+ accepts path to excel and optionally the sheet name
43
+ ---> read into memory and output to parquet
44
+ - convert hdf5 to parquet
45
+ accepts path and keys and reads into pandas
46
+ outputs to parquet
47
+
48
+ convert parquet to csv
49
+ convert parquet to excel
50
+ convert hdf5 to excel
51
+
52
+ # DB
53
+ CRUD postgres
54
+ CRUD mysql
55
+ CRUD minio
56
+ CRUD sql server
57
+
58
+ # Plot
59
+ - scatter x/y with color options
60
+ - hist
61
+ - stacked hist
62
+
63
+ # Aggregation
64
+ - merge parquets
65
+ - concatenate parquets
66
+ - filter parquet
67
+ - count parquet
68
+ - sum parquet
69
+ - min parquet
70
+ - max parquet
71
+ - xth percentile
72
+ - groupby and agg
73
+
74
+ # splitters
75
+ - split template into N pipelines
76
+ take one example pipeline and produce a PIPELINE ID variable for each in range
77
+ - generate simple case matrix
78
+ for one variable being modified, create pipelines for range of values for the variable (only integers)
79
+ -
@@ -0,0 +1,38 @@
1
+ {
2
+ "submitter_config" {
3
+ "local"
4
+ },
5
+ "config": {
6
+ "type": "pipeline",
7
+ "jobs": [
8
+ {
9
+ "type": "submitter_job",
10
+ "function": "pyantz.jobs.branch.create_pipelines_from_matrix.create_pipelines_from_matrix",
11
+ "parameters": {
12
+ "pipeline_config_template": {
13
+ "type": "pipeline",
14
+ "jobs": [
15
+ {
16
+ "type": "mutable_job",
17
+ "function": "pyantz.jobs.variables.assign_environment_variable.assign_environment_variable",
18
+ "parameters": {
19
+ "environmental_variables": {
20
+ "my_account": "%{my_account_from_csv}"
21
+ }
22
+ }
23
+ },
24
+ {
25
+ "type": "simple_job",
26
+ "function": "pyantz.jobs.dispatch.run_command.run_command",
27
+ "parameters": {
28
+ "cmd": ["sbatch", "--wckey=%{wckey}", "myscript", "$env_var", "%{arg1}", "arg2"]
29
+ }
30
+ }
31
+ ]
32
+ }
33
+ }
34
+
35
+ }
36
+ ]
37
+ }
38
+ }
@@ -0,0 +1,54 @@
1
+ [project]
2
+ name = "pyantz"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ authors = [
7
+ { name = "igreen1", email = "dev@iangreen.dev" }
8
+ ]
9
+ requires-python = ">=3.11"
10
+ dependencies = [
11
+ "pydantic>=2.10.6",
12
+ "rich>=13.9.4",
13
+ "sqlalchemy>=2.0.38",
14
+ ]
15
+
16
+ [project.optional-dependencies]
17
+ pyarrow = [
18
+ "pyarrow>=19.0.1",
19
+ "pandas>=2.2.3",
20
+ ]
21
+ pandas = [
22
+ "pandas>=2.2.3",
23
+ ]
24
+ slurm = [
25
+ "requests>=2.32.3",
26
+ ]
27
+
28
+ [project.scripts]
29
+ pyantz = "pyantz:main"
30
+
31
+ [build-system]
32
+ requires = ["hatchling"]
33
+ build-backend = "hatchling.build"
34
+
35
+ [tool.isort]
36
+ profile = "black"
37
+
38
+ [tool.pylint.main]
39
+ disable = "W0401,W0614"
40
+
41
+ [dependency-groups]
42
+ dev = [
43
+ "bandit>=1.8.3",
44
+ "hypothesis>=6.127.6",
45
+ "isort>=6.0.1",
46
+ "mypy>=1.15.0",
47
+ "pandas-stubs>=2.2.3.241126",
48
+ "pyarrow-stubs>=17.17",
49
+ "pylint>=3.3.4",
50
+ "pytest>=8.3.5",
51
+ "pytest-cov>=6.0.0",
52
+ "pytest-xdist>=3.6.1",
53
+ "ruff>=0.9.9",
54
+ ]
File without changes
@@ -0,0 +1,4 @@
1
+ """
2
+ Infrastructure provides the backbones of this runner. This module contains the code
3
+ to run all the various components to setup and execute the jobs in the configuration
4
+ """