metahq-core 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. metahq_core-0.1.1/.gitignore +213 -0
  2. metahq_core-0.1.1/PKG-INFO +34 -0
  3. metahq_core-0.1.1/README.md +0 -0
  4. metahq_core-0.1.1/pyproject.toml +83 -0
  5. metahq_core-0.1.1/src/metahq_core/README.md +0 -0
  6. metahq_core-0.1.1/src/metahq_core/__init__.py +1 -0
  7. metahq_core-0.1.1/src/metahq_core/curations/_multiprocess_propagator.py +121 -0
  8. metahq_core-0.1.1/src/metahq_core/curations/annotation_converter.py +285 -0
  9. metahq_core-0.1.1/src/metahq_core/curations/annotations.py +446 -0
  10. metahq_core-0.1.1/src/metahq_core/curations/base.py +73 -0
  11. metahq_core-0.1.1/src/metahq_core/curations/index.py +91 -0
  12. metahq_core-0.1.1/src/metahq_core/curations/labels.py +315 -0
  13. metahq_core-0.1.1/src/metahq_core/curations/propagator.py +296 -0
  14. metahq_core-0.1.1/src/metahq_core/export/annotations.py +414 -0
  15. metahq_core-0.1.1/src/metahq_core/export/base.py +63 -0
  16. metahq_core-0.1.1/src/metahq_core/export/labels.py +426 -0
  17. metahq_core-0.1.1/src/metahq_core/logger.py +69 -0
  18. metahq_core-0.1.1/src/metahq_core/ontology/__init__.py +0 -0
  19. metahq_core-0.1.1/src/metahq_core/ontology/base.py +376 -0
  20. metahq_core-0.1.1/src/metahq_core/ontology/graph.py +252 -0
  21. metahq_core-0.1.1/src/metahq_core/ontology/loader.py +153 -0
  22. metahq_core-0.1.1/src/metahq_core/query.py +549 -0
  23. metahq_core-0.1.1/src/metahq_core/search.py +257 -0
  24. metahq_core-0.1.1/src/metahq_core/util/__init__.py +0 -0
  25. metahq_core-0.1.1/src/metahq_core/util/alltypes.py +29 -0
  26. metahq_core-0.1.1/src/metahq_core/util/checkers.py +13 -0
  27. metahq_core-0.1.1/src/metahq_core/util/exceptions.py +6 -0
  28. metahq_core-0.1.1/src/metahq_core/util/helpers.py +36 -0
  29. metahq_core-0.1.1/src/metahq_core/util/io.py +127 -0
  30. metahq_core-0.1.1/src/metahq_core/util/progress.py +89 -0
  31. metahq_core-0.1.1/src/metahq_core/util/supported.py +402 -0
@@ -0,0 +1,213 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ *.py.cover
51
+ .hypothesis/
52
+ .pytest_cache/
53
+ cover/
54
+
55
+ # Translations
56
+ *.mo
57
+ *.pot
58
+
59
+ # Django stuff:
60
+ *.log
61
+ local_settings.py
62
+ db.sqlite3
63
+ db.sqlite3-journal
64
+
65
+ # Flask stuff:
66
+ instance/
67
+ .webassets-cache
68
+
69
+ # Scrapy stuff:
70
+ .scrapy
71
+
72
+ # Sphinx documentation
73
+ docs/_build/
74
+
75
+ # PyBuilder
76
+ .pybuilder/
77
+ target/
78
+
79
+ # Jupyter Notebook
80
+ .ipynb_checkpoints
81
+
82
+ # IPython
83
+ profile_default/
84
+ ipython_config.py
85
+
86
+ # pyenv
87
+ # For a library or package, you might want to ignore these files since the code is
88
+ # intended to run in multiple environments; otherwise, check them in:
89
+ # .python-version
90
+
91
+ # pipenv
92
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
93
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
94
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
95
+ # install all needed dependencies.
96
+ #Pipfile.lock
97
+
98
+ # UV
99
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
100
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
101
+ # commonly ignored for libraries.
102
+ #uv.lock
103
+
104
+ # poetry
105
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
106
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
107
+ # commonly ignored for libraries.
108
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
109
+ #poetry.lock
110
+ #poetry.toml
111
+
112
+ # pdm
113
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
114
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
115
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
116
+ #pdm.lock
117
+ #pdm.toml
118
+ .pdm-python
119
+ .pdm-build/
120
+
121
+ # pixi
122
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
123
+ #pixi.lock
124
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
125
+ # in the .venv directory. It is recommended not to include this directory in version control.
126
+ .pixi
127
+
128
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
129
+ __pypackages__/
130
+
131
+ # Celery stuff
132
+ celerybeat-schedule
133
+ celerybeat.pid
134
+
135
+ # SageMath parsed files
136
+ *.sage.py
137
+
138
+ # Environments
139
+ .env
140
+ .envrc
141
+ .venv
142
+ env/
143
+ venv/
144
+ ENV/
145
+ env.bak/
146
+ venv.bak/
147
+
148
+ # Spyder project settings
149
+ .spyderproject
150
+ .spyproject
151
+
152
+ # Rope project settings
153
+ .ropeproject
154
+
155
+ # mkdocs documentation
156
+ /site
157
+
158
+ # mypy
159
+ .mypy_cache/
160
+ .dmypy.json
161
+ dmypy.json
162
+
163
+ # Pyre type checker
164
+ .pyre/
165
+
166
+ # pytype static type analyzer
167
+ .pytype/
168
+
169
+ # Cython debug symbols
170
+ cython_debug/
171
+
172
+ # PyCharm
173
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
174
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
175
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
176
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
177
+ #.idea/
178
+
179
+ # Abstra
180
+ # Abstra is an AI-powered process automation framework.
181
+ # Ignore directories containing user credentials, local state, and settings.
182
+ # Learn more at https://abstra.io/docs
183
+ .abstra/
184
+
185
+ # Visual Studio Code
186
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
187
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
188
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
189
+ # you could uncomment the following to ignore the entire vscode folder
190
+ # .vscode/
191
+
192
+ # Ruff stuff:
193
+ .ruff_cache/
194
+
195
+ # PyPI configuration file
196
+ .pypirc
197
+
198
+ # Cursor
199
+ # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
200
+ # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
201
+ # refer to https://docs.cursor.com/context/ignore-files
202
+ .cursorignore
203
+ .cursorindexingignore
204
+
205
+ # Marimo
206
+ marimo/_static/
207
+ marimo/_lsp/
208
+ __marimo__/
209
+
210
+ # Mac
211
+ .DS_Store
212
+
213
+
@@ -0,0 +1,34 @@
1
+ Metadata-Version: 2.4
2
+ Name: metahq-core
3
+ Version: 0.1.1
4
+ Summary: Core API for the meta-hq CLI.
5
+ Author-email: Parker Hicks <parker.hicks@cuanschutz.edu>, Faisal Alquaddoomi <faisal.alquaddoomi@cuanschutz.edu>
6
+ Keywords: CLI,Data Curation,Database,Public Biomedical Data
7
+ Classifier: Development Status :: 3 - Alpha
8
+ Classifier: Intended Audience :: Science/Research
9
+ Classifier: Operating System :: OS Independent
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Programming Language :: Python :: 3.13
14
+ Requires-Python: >=3.11
15
+ Requires-Dist: duckdb>=1.4.0
16
+ Requires-Dist: networkx>=3.0
17
+ Requires-Dist: numpy>=2.3.0
18
+ Requires-Dist: polars>=1.27.0
19
+ Requires-Dist: pyarrow>=10.0.0
20
+ Requires-Dist: pydantic>=2.0.0
21
+ Requires-Dist: pymongo>=4.13.0
22
+ Requires-Dist: pyyaml>=5.1
23
+ Requires-Dist: rank-bm25>=0.2.2
24
+ Provides-Extra: dev
25
+ Requires-Dist: black; extra == 'dev'
26
+ Requires-Dist: flake8; extra == 'dev'
27
+ Requires-Dist: isort; extra == 'dev'
28
+ Requires-Dist: mypy; extra == 'dev'
29
+ Requires-Dist: pytest-cov; extra == 'dev'
30
+ Requires-Dist: pytest>=8.0; extra == 'dev'
31
+ Provides-Extra: test
32
+ Requires-Dist: pytest-benchmark; extra == 'test'
33
+ Requires-Dist: pytest-cov; extra == 'test'
34
+ Requires-Dist: pytest>=8.0; extra == 'test'
File without changes
@@ -0,0 +1,83 @@
1
+ [project]
2
+ name = "metahq-core"
3
+ dynamic = ["version"]
4
+ description = "Core API for the meta-hq CLI."
5
+ readme = "README.md"
6
+ requires-python = ">=3.11"
7
+ authors = [
8
+ {name = "Parker Hicks", email = "parker.hicks@cuanschutz.edu"},
9
+ {name = "Faisal Alquaddoomi", email = "faisal.alquaddoomi@cuanschutz.edu"},
10
+ ]
11
+ keywords = ["CLI", "Database", "Data Curation", "Public Biomedical Data"]
12
+ classifiers = [
13
+ "Development Status :: 3 - Alpha",
14
+ "Intended Audience :: Science/Research",
15
+ "Operating System :: OS Independent",
16
+ "Programming Language :: Python :: 3",
17
+ "Programming Language :: Python :: 3.11",
18
+ "Programming Language :: Python :: 3.12",
19
+ "Programming Language :: Python :: 3.13",
20
+ ]
21
+
22
+ dependencies = [
23
+ "networkx>=3.0",
24
+ "numpy>=2.3.0",
25
+ "polars>=1.27.0",
26
+ "pymongo>=4.13.0",
27
+ "pyarrow>=10.0.0",
28
+ "pydantic>=2.0.0",
29
+ "pyyaml>=5.1",
30
+ "duckdb>=1.4.0",
31
+ "rank-bm25>=0.2.2",
32
+ ]
33
+
34
+ [project.optional-dependencies]
35
+ dev = [
36
+ "pytest>=8.0",
37
+ "pytest-cov",
38
+ "black",
39
+ "isort",
40
+ "flake8",
41
+ "mypy",
42
+ ]
43
+
44
+ test = [
45
+ "pytest>=8.0",
46
+ "pytest-cov",
47
+ "pytest-benchmark",
48
+ ]
49
+
50
+ [build-system]
51
+ requires = ["hatchling"]
52
+ build-backend = "hatchling.build"
53
+
54
+ [tool.hatch.version]
55
+ path = "src/metahq_core/__init__.py"
56
+
57
+ [tool.hatch.build.targets.sdist]
58
+ exclude = ["/.git", "/__pycache__", "*.pyc"]
59
+ include = ["src/metahq_core"]
60
+
61
+ [tool.hatch.build.targets.wheel]
62
+ packages = ["src/metahq_core"]
63
+
64
+ [tool.black]
65
+ line-length = 88
66
+ target-version = ['py38']
67
+ include = '\.pyi?$'
68
+
69
+ [tool.isort]
70
+ profile = "black"
71
+ multi_line_output = 3
72
+
73
+ [tool.mypy]
74
+ python_version = "3.11"
75
+ warn_return_any = true
76
+ warn_unused_configs = true
77
+ disallow_untyped_defs = true
78
+
79
+ [tool.pytest.ini_options]
80
+ testpaths = ["tests"]
81
+ python_files = ["test_*.py"]
82
+ python_classes = ["Test*"]
83
+ python_functions = ["test_*"]
File without changes
@@ -0,0 +1 @@
1
+ __version__ = "0.1.1"
@@ -0,0 +1,121 @@
1
+ """
2
+ Helper class to facilitate propagation of annotations by chunks.
3
+
4
+ Author: Parker Hicks
5
+ Date: 2025-09-26
6
+
7
+ Last updated: 2025-11-18 by Parker Hicks
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import multiprocessing as mp
13
+ import os
14
+ from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
15
+ from typing import TYPE_CHECKING
16
+
17
+ import numpy as np
18
+
19
+ from metahq_core.logger import setup_logger
20
+ from metahq_core.util.alltypes import NpIntMatrix
21
+ from metahq_core.util.progress import progress_bar
22
+
23
+ if TYPE_CHECKING:
24
+ import logging
25
+
26
+
27
+ PROCESS_POOL_THRESHOLD = os.environ.get("PROCESS_POOL_THRESHOLD", 40)
28
+
29
+ type Executor = ProcessPoolExecutor | ThreadPoolExecutor
30
+
31
+ class MultiprocessPropagator:
32
+ """Exists to allow multiprocessing within the Propagator class."""
33
+
34
+ def __init__(self, logger=None, loglevel=20, verbose=True):
35
+
36
+ if logger is None:
37
+ logger = setup_logger(__name__, level=loglevel)
38
+ self.log: logging.Logger = logger
39
+ self.verbose: bool = verbose
40
+
41
+ @staticmethod
42
+ def _process_chunk(args):
43
+ """
44
+ Worker function for matrix dot product between annotation chunk
45
+ and ontology relationship matrix.
46
+
47
+ This is the function split between workers.
48
+ """
49
+ chunk_idx, chunk, family = args
50
+ result = np.einsum("ij,jk->ik", chunk, family)
51
+
52
+ return chunk_idx, result
53
+
54
+ def multiprocess_propagate(
55
+ self,
56
+ n_indices,
57
+ split: list,
58
+ family: NpIntMatrix,
59
+ n_processes: int | None = None,
60
+ desc="Propagating",
61
+ ):
62
+ """Multiprocessing propagation"""
63
+ if n_processes is None:
64
+ n_processes = mp.cpu_count() - 1
65
+
66
+ final_shape = (
67
+ n_indices,
68
+ family.shape[1],
69
+ )
70
+
71
+ propagated = np.empty(final_shape, dtype=np.int32)
72
+ args_list = [(i, chunk, family) for i, chunk in enumerate(split)]
73
+
74
+ executor = (
75
+ ThreadPoolExecutor
76
+ if len(args_list) < int(PROCESS_POOL_THRESHOLD)
77
+ else ProcessPoolExecutor
78
+ )
79
+
80
+ with executor(max_workers=n_processes) as _executor:
81
+ if self.verbose:
82
+ results = self._execute_verbose(_executor, args_list, desc)
83
+ else:
84
+ results = self._execute_silent(_executor, args_list)
85
+
86
+ # combine
87
+ start = 0
88
+ for _, result in sorted(results, key=lambda x: x[0]):
89
+ nsamples = result.shape[0]
90
+ end = start + nsamples
91
+ propagated[start:end] = result
92
+ start = end
93
+
94
+ return propagated
95
+
96
+ def _execute_silent(self, executor: Executor, args_list: list) -> list:
97
+ futures = {
98
+ executor.submit(self._process_chunk, args): args for args in args_list
99
+ }
100
+ return [future.result() for future in as_completed(futures)]
101
+
102
+ def _execute_verbose(
103
+ self,
104
+ executor: Executor,
105
+ args_list: list,
106
+ desc: str,
107
+ ) -> list:
108
+ with progress_bar(padding=" ") as progress:
109
+ task = progress.add_task(desc, total=len(args_list))
110
+
111
+ futures = {
112
+ executor.submit(self._process_chunk, args): args for args in args_list
113
+ }
114
+ results = []
115
+ for future in as_completed(futures):
116
+ result = future.result()
117
+ results.append(result)
118
+ progress.update(task, description=desc, advance=1)
119
+ progress.refresh()
120
+
121
+ return results